5 Url = Struct.new("Url", :channel, :nick, :time, :url)
6 TITLE_RE = /<\s*?title\s*?>(.+?)<\s*?\/title\s*?>/im
21 # extras codes, for future use...
35 'otimes' => '⊗',
44 'Epsilon' => 'Ε',
49 'Upsilon' => 'Υ',
51 'there4' => '∴',
56 'rsaquo' => '›',
80 'lfloor' => '⌊',
101 'sbquo' => '‚',
114 'infin' => '∞',
119 'thinsp' => ' ',
121 'bdquo' => '„',
128 'mdash' => '—',
130 'permil' => '‰',
135 'forall' => '∀',
137 'rceil' => '⌉',
140 'lambda' => 'λ',
144 'dagger' => '†',
147 'image' => 'ℑ',
148 'alefsym' => 'ℵ',
154 'frasl' => '⁄',
156 'lowast' => '∗',
167 'oline' => '‾',
174 'empty' => '∅',
181 'weierp' => '℘',
186 'omicron' => 'ο',
187 'upsilon' => 'υ',
189 'Lambda' => 'Λ',
196 'scaron' => 'š',
197 'lsquo' => '‘',
205 'hellip' => '…',
209 'rfloor' => '⌋',
211 'crarr' => '↵',
213 'notin' => '∉',
214 'exist' => '∃',
217 'Dagger' => '‡',
218 'oplus' => '⊕',
224 'lsaquo' => '‹',
226 'Omicron' => 'Ο',
241 'sigmaf' => 'ς',
243 'minus' => '−',
246 'epsilon' => 'ε',
257 'spades' => '♠',
258 'rsquo' => '’',
262 'thetasym' => 'ϑ',
266 'ldquo' => '“',
267 'hearts' => '♥',
273 class UrlPlugin < Plugin
274 BotConfig.register BotConfigIntegerValue.new('url.max_urls',
275 :default => 100, :validate => Proc.new{|v| v > 0},
276 :desc => "Maximum number of urls to store. New urls replace oldest ones.")
277 BotConfig.register BotConfigBooleanValue.new('url.display_link_info',
279 :desc => "Get the title of any links pasted to the channel and display it (also tells if the link is broken or the site is down)")
283 @registry.set_default(Array.new)
286 def help(plugin, topic="")
287 "urls [<max>=4] => list <max> last urls mentioned in current channel, urls search [<max>=4] <regexp> => search for matching urls. In a private message, you must specify the channel to query, eg. urls <channel> [max], urls search <channel> [max] <regexp>"
290 def unescape_title(htmldata)
291 # first pass -- let CGI try to attack it...
292 htmldata = CGI::unescapeHTML htmldata
294 # second pass -- destroy the remaining bits...
295 htmldata.gsub(/(&(.+?);)/) {
298 # remove the 0-paddng from unicode integers
300 symbol = "##{$1.to_i.to_s}"
303 # output the symbol's irc-translated character, or a * if it's unknown
304 UNESCAPE_TABLE[symbol] || '*'
308 def get_title_from_html(pagedata)
309 return unless TITLE_RE.match(pagedata)
310 title = $1.strip.gsub(/\s*\n+\s*/, " ")
311 title = unescape_title title
312 title = title[0..255] if title.length > 255
313 "[Link Info] title: #{title}"
316 def read_data_from_response(response, amount)
321 response.read_body do |chunk| # read body now
323 amount_read += chunk.length
325 if amount_read > amount
326 amount_of_overflow = amount_read - amount
327 chunk = chunk[0...-amount_of_overflow]
332 break if amount_read >= amount
341 def get_title_for_url(uri_str, depth=10)
342 # This god-awful mess is what the ruby http library has reduced me to.
343 # Python's HTTP lib is so much nicer. :~(
346 raise "Error: Maximum redirects hit."
349 debug "+ Getting #{uri_str}"
350 url = URI.parse(uri_str)
351 return if url.scheme !~ /https?/
355 debug "+ connecting to #{url.host}:#{url.port}"
356 http = @bot.httputil.get_proxy(url)
358 url.path = '/' if url.path == ''
360 http.request_get(url.path, "User-Agent" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)") { |response|
363 when Net::HTTPRedirection, Net::HTTPMovedPermanently then
364 # call self recursively if this is a redirect
365 redirect_to = response['location'] || './'
366 debug "+ redirect location: #{redirect_to.inspect}"
367 url = URI.join url.to_s, redirect_to
368 debug "+ whee, redirecting to #{url.to_s}!"
369 return get_title_for_url(url.to_s, depth-1)
370 when Net::HTTPSuccess then
371 if response['content-type'] =~ /^text\//
372 # since the content is 'text/*' and is small enough to
373 # be a webpage, retrieve the title from the page
374 debug "+ getting #{url.request_uri}"
375 data = read_data_from_response(response, 50000)
376 return get_title_from_html(data)
378 # content doesn't have title, just display info.
379 size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2')
380 return "[Link Info] type: #{response['content-type']}#{size ? ", size: #{size} bytes" : ""}"
382 when Net::HTTPClientError then
383 return "[Link Info] Error getting link (#{response.code} - #{response.message})"
384 when Net::HTTPServerError then
385 return "[Link Info] Error getting link (#{response.code} - #{response.message})"
388 end # end of "case response"
390 } # end of request block
391 } # end of http start block
395 rescue SocketError => e
396 return "[Link Info] Error connecting to site (#{e.message})"
400 return unless m.kind_of?(PrivMessage)
402 # TODO support multiple urls in one line
403 if m.message =~ /(f|ht)tps?:\/\//
404 if m.message =~ /((f|ht)tps?:\/\/.*?)(?:\s+|$)/
406 list = @registry[m.target]
408 if @bot.config['url.display_link_info']
409 debug "Getting title for #{urlstr}..."
410 title = get_title_for_url urlstr
415 debug "Title not found!"
419 # check to see if this url is already listed
420 return if list.find {|u| u.url == urlstr }
422 url = Url.new(m.target, m.sourcenick, Time.new, urlstr)
423 debug "#{list.length} urls so far"
424 if list.length > @bot.config['url.max_urls']
427 debug "storing url #{url.url}"
429 debug "#{list.length} urls now"
430 @registry[m.target] = list
436 channel = params[:channel] ? params[:channel] : m.target
437 max = params[:limit].to_i
440 list = @registry[channel]
442 m.reply "no urls seen yet for channel #{channel}"
444 list[0..(max-1)].each do |url|
445 m.reply "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
450 def search(m, params)
451 channel = params[:channel] ? params[:channel] : m.target
452 max = params[:limit].to_i
453 string = params[:string]
456 regex = Regexp.new(string, Regexp::IGNORECASE)
457 list = @registry[channel].find_all {|url|
458 regex.match(url.url) || regex.match(url.nick)
461 m.reply "no matches for channel #{channel}"
463 list[0..(max-1)].each do |url|
464 m.reply "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
469 plugin = UrlPlugin.new
470 plugin.map 'urls search :channel :limit :string', :action => 'search',
471 :defaults => {:limit => 4},
472 :requirements => {:limit => /^\d+$/},
474 plugin.map 'urls search :limit :string', :action => 'search',
475 :defaults => {:limit => 4},
476 :requirements => {:limit => /^\d+$/},
478 plugin.map 'urls :channel :limit', :defaults => {:limit => 4},
479 :requirements => {:limit => /^\d+$/},
481 plugin.map 'urls :limit', :defaults => {:limit => 4},
482 :requirements => {:limit => /^\d+$/},