"""irclog -- an IRC log beautifier for the Trac wiki""" # Copyright (c) 2005, Simon E. Ward # Copyright (c) 2005, Marius Gedminas # Copyright (c) 2000, Jeffrey W. Waugh # Trac Wiki-Processor: # Simon Ward # Python port: # Marius Gedminas # Original Author: # Jeff Waugh # Contributors: # Rick Welykochy # Alexander Else # # Released under the terms of the GNU GPL # http://www.gnu.org/copyleft/gpl.html import re from StringIO import StringIO IRCLOG2HTML_VERSION = "2.3" IRCLOG2HTML_RELEASE = "2005-03-28" URL_REGEXP = re.compile(r'((http|https|ftp|gopher|news)://[^ \'")>]*)') def createlinks(text): """Replace possible URLs with links.""" return URL_REGEXP.sub(r'\1', text) def escape(s): """Replace ampersands, pointies, control characters. >>> escape('Hello & ') 'Hello & <world>' >>> escape('Hello & ') 'Hello & <world>' Control characters (ASCII 0 to 31) are stripped away >>> escape(''.join([chr(x) for x in range(32)])) '' """ s = s.replace('&', '&').replace('<', '<').replace('>', '>') return ''.join([c for c in s if ord(c) > 0x1F]) def shorttime(time): """Strip date and seconds from time. >>> shorttime('12:45:17') '12:45' >>> shorttime('12:45') '12:45' >>> shorttime('2005-02-04T12:45') '12:45' """ if 'T' in time: time = time.split('T')[-1] elif ' ' in time: time = time.split(' ')[-1] if time.count(':') > 1: time = ':'.join(time.split(':')[:2]) return time class Enum(object): """Enumerated value.""" def __init__(self, value): self.value = value def __repr__(self): return self.value class LogParser(object): """Parse an IRC log file. When iterated, yields the following events: time, COMMENT, (nick, text) time, ACTION, text time, JOIN, text time, PART, text, time, NICKCHANGE, (text, oldnick, newnick) time, SERVER, text """ COMMENT = Enum('COMMENT') ACTION = Enum('ACTION') JOIN = Enum('JOIN') PART = Enum('PART') NICKCHANGE = Enum('NICKCHANGE') SERVER = Enum('SERVER') OTHER = Enum('OTHER') TIME_REGEXP = re.compile( r'^\[?(' # Optional [ r'(?:\d{4}-\d{2}-\d{2}T|\d{2}-\w{3}-\d{4} |\w{3} \d{2} )?' # Optional date r'\d\d:\d\d(:\d\d)?' # Mandatory HH:MM, optional :SS r')\]? +') # Optional ], mandatory space NICK_REGEXP = re.compile(r'^(?:<(.*?)>|([a-zA-Z\[\\\]^_`{|}][a-zA-Z0-9\[\\\]^_`{|}-]+))\s') JOIN_REGEXP = re.compile(r'^(?:\*\*\*|-->)\s.*joined') PART_REGEXP = re.compile(r'^(?:\*\*\*|<--)\s.*(quit|left)') SERVMSG_REGEXP = re.compile(r'^(?:\*\*\*|---)\s') NICK_CHANGE_REGEXP = re.compile( r'^(?:\*\*\*|---)\s+(.*?) (?:are|is) now known as (.*)') def __init__(self, infile): self.infile = infile def __iter__(self): for line in self.infile: line = line.rstrip('\r\n') if not line: continue m = self.TIME_REGEXP.match(line) if m: time = m.group(1) line = line[len(m.group(0)):] else: time = None m = self.NICK_REGEXP.match(line) if m: nick = m.group(1) or m.group(2) text = line[len(m.group(0)):] yield time, self.COMMENT, (nick, text) elif line.startswith('* ') or line.startswith('*\t'): yield time, self.ACTION, line elif self.JOIN_REGEXP.match(line): yield time, self.JOIN, line elif self.PART_REGEXP.match(line): yield time, self.PART, line else: m = self.NICK_CHANGE_REGEXP.match(line) if m: oldnick = m.group(1) newnick = m.group(2) yield time, self.NICKCHANGE, (line, oldnick, newnick) elif self.SERVMSG_REGEXP.match(line): yield time, self.SERVER, line else: yield time, self.OTHER, line class NickClassifier(object): """Assign style classes to nicknames.""" def __init__(self, maxclasses=30, default_classes=None): self.nickcount = 0 self.maxclasses = maxclasses self.nick_classes = {} if default_classes: self.nick_classes.update(default_classes) def __getitem__(self, nick): cls = self.nick_classes.get(nick) if not cls: self.nickcount += 1 fieldlen = len(str(self.maxclasses)) cls = ('nc%%0%dd' % fieldlen) % (self.nickcount % self.maxclasses) self.nick_classes[nick] = cls return cls def change(self, oldnick, newnick): if oldnick in self.nick_classes: self.nick_classes[newnick] = self.nick_classes.pop(oldnick) class AbstractStyle(object): """A style defines the way output is formatted. This is not a real class, rather it is an description of how style classes should be written. """ name = "stylename" description = "Single-line description" def __init__(self, outfile, classes=None): """Create a text formatter for writing to outfile. `classes` may have the following attributes: part join server nickchange action """ self.outfile = outfile self.classes = classes or {} def servermsg(self, time, what, line): """Output a generic server message. `time` is a string. `line` is not escaped. `what` is one of LogParser event constants (e.g. LogParser.JOIN). """ def nicktext(self, time, nick, text, htmlclass): """Output a comment uttered by someone. `time` is a string. `nick` and `text` are not escaped. `htmlclass` is a string. """ class XHTMLStyle(AbstractStyle): """Text style, produces XHTML that can be styled with CSS""" name = 'xhtml' description = __doc__ CLASSMAP = { LogParser.ACTION: 'action', LogParser.JOIN: 'join', LogParser.PART: 'part', LogParser.NICKCHANGE: 'nickchange', LogParser.SERVER: 'servermsg', LogParser.OTHER: 'other', } prefix = '
' suffix = """

Generated by irclog wiki-processor (Simon Ward), based on irclog2html.py %(VERSION)s by Marius Gedminas - find it at mg.pov.lt!

""" % { 'VERSION': IRCLOG2HTML_VERSION } def link(self, url, title): # Intentionally not escaping title so that &entities; work if url: print >> self.outfile, ('%s' % (escape(urllib.quote(url)), title or escape(url))), elif title: print >> self.outfile, ('%s' % title), def servermsg(self, time, what, text): """Output a generic server message. `time` is a string. `line` is not escaped. `what` is one of LogParser event constants (e.g. LogParser.JOIN). """ text = escape(text) text = createlinks(text) if time: displaytime = shorttime(time) print >> self.outfile, ('

' '%s ' '%s

' % (time, self.CLASSMAP[what], time, displaytime, text)) else: print >> self.outfile, ('

%s

' % (self.CLASSMAP[what], text)) def nicktext(self, time, nick, text, htmlclass): """Output a comment uttered by someone. `time` is a string. `nick` and `text` are not escaped. `htmlclass` is a string. """ nick = escape(nick) text = escape(text) text = createlinks(text) text = text.replace(' ', '  ') if time: displaytime = shorttime(time) print >> self.outfile, ('

' '%s ' '<%s>' ' %s

' % (time, htmlclass, time, displaytime, nick, text)) else: print >> self.outfile, ('

' '<%s>' ' %s

' % (htmlclass, nick, text)) class XHTMLTableStyle(XHTMLStyle): """Table style, produces XHTML that can be styled with CSS""" name = 'xhtmltable' description = __doc__ prefix = '' suffix = '
' def servermsg(self, time, what, text): text = escape(text) text = createlinks(text) if time: displaytime = shorttime(time) print >> self.outfile, ('' '%s' '%s' '' % (time, self.CLASSMAP[what], text, time, displaytime)) else: print >> self.outfile, ('' '%s' '' % (self.CLASSMAP[what], text)) def nicktext(self, time, nick, text, htmlclass): nick = escape(nick) text = escape(text) text = createlinks(text) text = text.replace(' ', '  ') if time: displaytime = shorttime(time) print >> self.outfile, ('' '%s' '%s' '' '%s' '' % (htmlclass, time, nick, text, time, displaytime)) else: print >> self.outfile, ('' '%s' '%s' '' % (htmlclass, nick, text)) def execute(hdf, text, env): textbuf = StringIO(text) htmlbuf = StringIO() parser = LogParser(textbuf) formatter = XHTMLTableStyle(htmlbuf) nick_classes = NickClassifier(maxclasses=20) htmlbuf.write(formatter.prefix) for time, what, info in parser: if what == LogParser.COMMENT: nick, text = info nickclass = nick_classes[nick] formatter.nicktext(time, nick, text, nickclass) else: if what == LogParser.NICKCHANGE: text, oldnick, newnick = info nick_classes.change(oldnick, newnick) else: text = info formatter.servermsg(time, what, text) # Footer htmlbuf.write(formatter.suffix) return htmlbuf.getvalue()