「TracのWiki記法処理を知りたかったら、You、ソースをを読めばいいよ。」とマスターこんぴろに教えていただいたので、読んでみようと思う。Python勉強してないですが、正規表現とかは同じであると期待してみる。わかんなくなったら勉強しよう。
- (3/1追記) Python チュートリアル http://www.python.jp/doc/release/tut/ で学んだことを追記します。
長いのでスルーしてください。
OSXの手元の環境だと
/Library/Python/2.5/site-packages/Trac-0.11.2.1-py2.5.egg/trac/wiki/parser.py
にあった。
著作権表記
# -*- coding: utf-8 -*- # # Copyright (C) 2005-2008 Edgewall Software # Copyright (C) 2003-2006 Jonas Borgström <jonas@edgewall.com> # Copyright (C) 2004-2006 Christopher Lenz <cmlenz@gmx.de> # Copyright (C) 2005-2007 Christian Boos <cboos@neuf.fr> # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://trac.edgewall.org/wiki/TracLicense. # # This software consists of voluntary contributions made by many # individuals. For the exact contribution history, see the revision # history and logs, available at http://trac.edgewall.org/log/. # # Author: Jonas Borgström <jonas@edgewall.com> # Christopher Lenz <cmlenz@gmx.de> # Christian Boos <cboos@neuf.fr>
import
import re from trac.core import * from trac.notification import EMAIL_LOOKALIKE_PATTERN
クラス定義
class WikiParser(Component): """wiki subsystem dedicated to the Wiki text parsing."""
- Wiki text を Parseするところですよ、と。
定数
# Some constants used for clarifying the Wiki regexps: BOLDITALIC_TOKEN = "'''''" BOLD_TOKEN = "'''" ITALIC_TOKEN = "''" UNDERLINE_TOKEN = "__" STRIKE_TOKEN = "~~" SUBSCRIPT_TOKEN = ",," SUPERSCRIPT_TOKEN = r"\^" INLINE_TOKEN = "`" STARTBLOCK_TOKEN = r"\{\{\{" STARTBLOCK = "{{{" ENDBLOCK_TOKEN = r"\}\}\}" ENDBLOCK = "}}}" LINK_SCHEME = r"[\w.+-]+" # as per RFC 2396 INTERTRAC_SCHEME = r"[a-zA-Z.+-]*?" # no digits (support for shorthand links) QUOTED_STRING = r"'[^']+'|\"[^\"]+\"" SHREF_TARGET_FIRST = r"[\w/?!#@](?<!_)" # we don't want "_" SHREF_TARGET_MIDDLE = r"(?:\|(?=[^|\s])|[^|<>\s])" SHREF_TARGET_LAST = r"[\w/=](?<!_)" # we don't want "_" LHREF_RELATIVE_TARGET = r"[/#][^\s\]]*|\.\.?(?:[/#][^\s\]]*)?" XML_NAME = r"[\w:](?<!\d)[\w:.-]*?" # See http://www.w3.org/TR/REC-xml/#id
- 記法を定数化してあります。あ、これは変更可能性をあげますね。
- BNF記法みたいにかけるのか。
変換ルール定義 - 前処理
# Sequence of regexps used by the engine _pre_rules = [ # Font styles r"(?P<bolditalic>!?%s)" % BOLDITALIC_TOKEN, r"(?P<bold>!?%s)" % BOLD_TOKEN, r"(?P<italic>!?%s)" % ITALIC_TOKEN, r"(?P<underline>!?%s)" % UNDERLINE_TOKEN, r"(?P<strike>!?%s)" % STRIKE_TOKEN, r"(?P<subscript>!?%s)" % SUBSCRIPT_TOKEN, r"(?P<superscript>!?%s)" % SUPERSCRIPT_TOKEN, r"(?P<inlinecode>!?%s(?P<inline>.*?)%s)" \ % (STARTBLOCK_TOKEN, ENDBLOCK_TOKEN), r"(?P<inlinecode2>!?%s(?P<inline2>.*?)%s)" \ % (INLINE_TOKEN, INLINE_TOKEN)]
変換ルール定義2 - 後処理
# Rules provided by IWikiSyntaxProviders will be inserted here _post_rules = [ # e-mails r"(?P<email>!?%s)" % EMAIL_LOOKALIKE_PATTERN, # > ... r"(?P<citation>^(?P<cdepth>>(?: *>)*))", # &, < and > to &, < and > r"(?P<htmlescape>[&<>])", # wiki:TracLinks r"(?P<shref>!?((?P<sns>%s):(?P<stgt>%s|%s(?:%s*%s)?)))" \ % (LINK_SCHEME, QUOTED_STRING, SHREF_TARGET_FIRST, SHREF_TARGET_MIDDLE, SHREF_TARGET_LAST), # [wiki:TracLinks with optional label] or [/relative label] (r"(?P<lhref>!?\[(?:" r"(?P<rel>%s)|" % LHREF_RELATIVE_TARGET + # ./... or /... r"(?P<lns>%s):(?P<ltgt>%s|[^\]\s]*))" % \ (LINK_SCHEME, QUOTED_STRING) + # wiki:TracLinks or wiki:"trac links" r"(?:\s+(?P<label>%s|[^\]]+))?\])" % QUOTED_STRING), # optional label # [[macro]] call (r"(?P<macro>!?\[\[(?P<macroname>[\w/+-]+)" r"(\]\]|\((?P<macroargs>.*?)\)\]\]))"), # == heading == #hanchor r"(?P<heading>^\s*(?P<hdepth>=+)\s.*\s(?P=hdepth)\s*" r"(?P<hanchor>#%s)?(?:\s|$))" % XML_NAME, # * list r"(?P<list>^(?P<ldepth>\s+)(?:[-*]|\d+\.|[a-zA-Z]\.|[ivxIVX]{1,5}\.) )", # definition:: r"(?P<definition>^\s+((?:%s[^%s]*%s|%s(?:%s{,2}[^%s])*?%s|[^%s%s:]|:[^:])+::)(?:\s+|$))" % (INLINE_TOKEN, INLINE_TOKEN, INLINE_TOKEN, STARTBLOCK_TOKEN, ENDBLOCK[0], ENDBLOCK[0], ENDBLOCK_TOKEN, INLINE_TOKEN, STARTBLOCK[0]), # (leading space) r"(?P<indent>^(?P<idepth>\s+)(?=\S))", # || table || r"(?P<last_table_cell>\|\|\s*$)", r"(?P<table_cell>\|\|)"]
- うわー、ルール多いなぁ。
- でも11個しかないので、シンプルか。シンプルっすね。
- Wikiテキストから、ここの部分に対応する表示規則を作ってあげれば、行レベルの構造はカバーできそう。
- さっきの定数を使ってルールを組み立てているみたい。
変換ルール定義3 - コードブロック
_processor_re = re.compile('#\!([\w+-][\w+-/]*)') _processor_param_re = re.compile(r'''(\w+)=(".*?"|'.*?'|\w+)''') _anchor_re = re.compile('[^\w:.-]+', re.UNICODE)
__init__ メソッド
def __init__(self): self._compiled_rules = None self._link_resolvers = None self._helper_patterns = None self._external_handlers = None
インタフェース
def _get_rules(self): self._prepare_rules() return self._compiled_rules rules = property(_get_rules) def _get_helper_patterns(self): self._prepare_rules() return self._helper_patterns helper_patterns = property(_get_helper_patterns) def _get_external_handlers(self): self._prepare_rules() return self._external_handlers external_handlers = property(_get_external_handlers)
- 各内部メソッドを呼び出したり、内部変数を外から呼び出すインタフェース定義っぽい
_prepare_rules メソッド
def _prepare_rules(self): from trac.wiki.api import WikiSystem if not self._compiled_rules: helpers = [] handlers = {} syntax = self._pre_rules[:] i = 0 for resolver in WikiSystem(self.env).syntax_providers: for regexp, handler in resolver.get_wiki_syntax(): handlers['i' + str(i)] = handler syntax.append('(?P<i%d>%s)' % (i, regexp)) i += 1 syntax += self._post_rules[:] helper_re = re.compile(r'\?P<([a-z\d_]+)>') for rule in syntax: helpers += helper_re.findall(rule)[1:] rules = re.compile('(?:' + '|'.join(syntax) + ')', re.UNICODE) self._external_handlers = handlers self._helper_patterns = helpers self._compiled_rules = rules
- ルール定義を組み立てて返す部分。出力は3つの変数。
- self._external_handlers = handlers
- self._helper_patterns = helpers
- self._compiled_rules = rules
- 処理を追っていくと、
_get_link_resolvers メソッド
def _get_link_resolvers(self): if not self._link_resolvers: from trac.wiki.api import WikiSystem resolvers = {} for resolver in WikiSystem(self.env).syntax_providers: for namespace, handler in resolver.get_link_resolvers(): resolvers[namespace] = handler self._link_resolvers = resolvers return self._link_resolvers link_resolvers = property(_get_link_resolvers)
- リンクをHTMLに書き換えるルール集を作っている