eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/mercurial/minirst.py
changeset 69 c6bca38c1cbf
equal deleted inserted replaced
68:5ff1fc726848 69:c6bca38c1cbf
       
     1 # minirst.py - minimal reStructuredText parser
       
     2 #
       
     3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 """simplified reStructuredText parser.
       
     9 
       
    10 This parser knows just enough about reStructuredText to parse the
       
    11 Mercurial docstrings.
       
    12 
       
    13 It cheats in a major way: nested blocks are not really nested. They
       
    14 are just indented blocks that look like they are nested. This relies
       
    15 on the user to keep the right indentation for the blocks.
       
    16 
       
    17 It only supports a small subset of reStructuredText:
       
    18 
       
    19 - sections
       
    20 
       
    21 - paragraphs
       
    22 
       
    23 - literal blocks
       
    24 
       
    25 - definition lists
       
    26 
       
    27 - specific admonitions
       
    28 
       
    29 - bullet lists (items must start with '-')
       
    30 
       
    31 - enumerated lists (no autonumbering)
       
    32 
       
    33 - field lists (colons cannot be escaped)
       
    34 
       
    35 - option lists (supports only long options without arguments)
       
    36 
       
    37 - inline literals (no other inline markup is not recognized)
       
    38 """
       
    39 
       
    40 import re, sys
       
    41 import util, encoding
       
    42 from i18n import _
       
    43 
       
    44 
       
    45 def replace(text, substs):
       
    46     utext = text.decode(encoding.encoding)
       
    47     for f, t in substs:
       
    48         utext = utext.replace(f, t)
       
    49     return utext.encode(encoding.encoding)
       
    50 
       
    51 
       
    52 _blockre = re.compile(r"\n(?:\s*\n)+")
       
    53 
       
    54 def findblocks(text):
       
    55     """Find continuous blocks of lines in text.
       
    56 
       
    57     Returns a list of dictionaries representing the blocks. Each block
       
    58     has an 'indent' field and a 'lines' field.
       
    59     """
       
    60     blocks = []
       
    61     for b in _blockre.split(text.strip()):
       
    62         lines = b.splitlines()
       
    63         indent = min((len(l) - len(l.lstrip())) for l in lines)
       
    64         lines = [l[indent:] for l in lines]
       
    65         blocks.append(dict(indent=indent, lines=lines))
       
    66     return blocks
       
    67 
       
    68 
       
    69 def findliteralblocks(blocks):
       
    70     """Finds literal blocks and adds a 'type' field to the blocks.
       
    71 
       
    72     Literal blocks are given the type 'literal', all other blocks are
       
    73     given type the 'paragraph'.
       
    74     """
       
    75     i = 0
       
    76     while i < len(blocks):
       
    77         # Searching for a block that looks like this:
       
    78         #
       
    79         # +------------------------------+
       
    80         # | paragraph                    |
       
    81         # | (ends with "::")             |
       
    82         # +------------------------------+
       
    83         #    +---------------------------+
       
    84         #    | indented literal block    |
       
    85         #    +---------------------------+
       
    86         blocks[i]['type'] = 'paragraph'
       
    87         if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
       
    88             indent = blocks[i]['indent']
       
    89             adjustment = blocks[i + 1]['indent'] - indent
       
    90 
       
    91             if blocks[i]['lines'] == ['::']:
       
    92                 # Expanded form: remove block
       
    93                 del blocks[i]
       
    94                 i -= 1
       
    95             elif blocks[i]['lines'][-1].endswith(' ::'):
       
    96                 # Partially minimized form: remove space and both
       
    97                 # colons.
       
    98                 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
       
    99             else:
       
   100                 # Fully minimized form: remove just one colon.
       
   101                 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
       
   102 
       
   103             # List items are formatted with a hanging indent. We must
       
   104             # correct for this here while we still have the original
       
   105             # information on the indentation of the subsequent literal
       
   106             # blocks available.
       
   107             m = _bulletre.match(blocks[i]['lines'][0])
       
   108             if m:
       
   109                 indent += m.end()
       
   110                 adjustment -= m.end()
       
   111 
       
   112             # Mark the following indented blocks.
       
   113             while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
       
   114                 blocks[i + 1]['type'] = 'literal'
       
   115                 blocks[i + 1]['indent'] -= adjustment
       
   116                 i += 1
       
   117         i += 1
       
   118     return blocks
       
   119 
       
   120 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
       
   121 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)?  +)(.*)$')
       
   122 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
       
   123 _definitionre = re.compile(r'[^ ]')
       
   124 
       
   125 def splitparagraphs(blocks):
       
   126     """Split paragraphs into lists."""
       
   127     # Tuples with (list type, item regexp, single line items?). Order
       
   128     # matters: definition lists has the least specific regexp and must
       
   129     # come last.
       
   130     listtypes = [('bullet', _bulletre, True),
       
   131                  ('option', _optionre, True),
       
   132                  ('field', _fieldre, True),
       
   133                  ('definition', _definitionre, False)]
       
   134 
       
   135     def match(lines, i, itemre, singleline):
       
   136         """Does itemre match an item at line i?
       
   137 
       
   138         A list item can be followed by an idented line or another list
       
   139         item (but only if singleline is True).
       
   140         """
       
   141         line1 = lines[i]
       
   142         line2 = i + 1 < len(lines) and lines[i + 1] or ''
       
   143         if not itemre.match(line1):
       
   144             return False
       
   145         if singleline:
       
   146             return line2 == '' or line2[0] == ' ' or itemre.match(line2)
       
   147         else:
       
   148             return line2.startswith(' ')
       
   149 
       
   150     i = 0
       
   151     while i < len(blocks):
       
   152         if blocks[i]['type'] == 'paragraph':
       
   153             lines = blocks[i]['lines']
       
   154             for type, itemre, singleline in listtypes:
       
   155                 if match(lines, 0, itemre, singleline):
       
   156                     items = []
       
   157                     for j, line in enumerate(lines):
       
   158                         if match(lines, j, itemre, singleline):
       
   159                             items.append(dict(type=type, lines=[],
       
   160                                               indent=blocks[i]['indent']))
       
   161                         items[-1]['lines'].append(line)
       
   162                     blocks[i:i + 1] = items
       
   163                     break
       
   164         i += 1
       
   165     return blocks
       
   166 
       
   167 
       
   168 _fieldwidth = 12
       
   169 
       
   170 def updatefieldlists(blocks):
       
   171     """Find key and maximum key width for field lists."""
       
   172     i = 0
       
   173     while i < len(blocks):
       
   174         if blocks[i]['type'] != 'field':
       
   175             i += 1
       
   176             continue
       
   177 
       
   178         keywidth = 0
       
   179         j = i
       
   180         while j < len(blocks) and blocks[j]['type'] == 'field':
       
   181             m = _fieldre.match(blocks[j]['lines'][0])
       
   182             key, rest = m.groups()
       
   183             blocks[j]['lines'][0] = rest
       
   184             blocks[j]['key'] = key
       
   185             keywidth = max(keywidth, len(key))
       
   186             j += 1
       
   187 
       
   188         for block in blocks[i:j]:
       
   189             block['keywidth'] = keywidth
       
   190         i = j + 1
       
   191 
       
   192     return blocks
       
   193 
       
   194 
       
   195 def prunecontainers(blocks, keep):
       
   196     """Prune unwanted containers.
       
   197 
       
   198     The blocks must have a 'type' field, i.e., they should have been
       
   199     run through findliteralblocks first.
       
   200     """
       
   201     pruned = []
       
   202     i = 0
       
   203     while i + 1 < len(blocks):
       
   204         # Searching for a block that looks like this:
       
   205         #
       
   206         # +-------+---------------------------+
       
   207         # | ".. container ::" type            |
       
   208         # +---+                               |
       
   209         #     | blocks                        |
       
   210         #     +-------------------------------+
       
   211         if (blocks[i]['type'] == 'paragraph' and
       
   212             blocks[i]['lines'][0].startswith('.. container::')):
       
   213             indent = blocks[i]['indent']
       
   214             adjustment = blocks[i + 1]['indent'] - indent
       
   215             containertype = blocks[i]['lines'][0][15:]
       
   216             prune = containertype not in keep
       
   217             if prune:
       
   218                 pruned.append(containertype)
       
   219 
       
   220             # Always delete "..container:: type" block
       
   221             del blocks[i]
       
   222             j = i
       
   223             while j < len(blocks) and blocks[j]['indent'] > indent:
       
   224                 if prune:
       
   225                     del blocks[j]
       
   226                     i -= 1 # adjust outer index
       
   227                 else:
       
   228                     blocks[j]['indent'] -= adjustment
       
   229                     j += 1
       
   230         i += 1
       
   231     return blocks, pruned
       
   232 
       
   233 
       
   234 _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
       
   235 
       
   236 def findsections(blocks):
       
   237     """Finds sections.
       
   238 
       
   239     The blocks must have a 'type' field, i.e., they should have been
       
   240     run through findliteralblocks first.
       
   241     """
       
   242     for block in blocks:
       
   243         # Searching for a block that looks like this:
       
   244         #
       
   245         # +------------------------------+
       
   246         # | Section title                |
       
   247         # | -------------                |
       
   248         # +------------------------------+
       
   249         if (block['type'] == 'paragraph' and
       
   250             len(block['lines']) == 2 and
       
   251             encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and
       
   252             _sectionre.match(block['lines'][1])):
       
   253             block['underline'] = block['lines'][1][0]
       
   254             block['type'] = 'section'
       
   255             del block['lines'][1]
       
   256     return blocks
       
   257 
       
   258 
       
   259 def inlineliterals(blocks):
       
   260     substs = [('``', '"')]
       
   261     for b in blocks:
       
   262         if b['type'] in ('paragraph', 'section'):
       
   263             b['lines'] = [replace(l, substs) for l in b['lines']]
       
   264     return blocks
       
   265 
       
   266 
       
   267 def hgrole(blocks):
       
   268     substs = [(':hg:`', '"hg '), ('`', '"')]
       
   269     for b in blocks:
       
   270         if b['type'] in ('paragraph', 'section'):
       
   271             # Turn :hg:`command` into "hg command". This also works
       
   272             # when there is a line break in the command and relies on
       
   273             # the fact that we have no stray back-quotes in the input
       
   274             # (run the blocks through inlineliterals first).
       
   275             b['lines'] = [replace(l, substs) for l in b['lines']]
       
   276     return blocks
       
   277 
       
   278 
       
   279 def addmargins(blocks):
       
   280     """Adds empty blocks for vertical spacing.
       
   281 
       
   282     This groups bullets, options, and definitions together with no vertical
       
   283     space between them, and adds an empty block between all other blocks.
       
   284     """
       
   285     i = 1
       
   286     while i < len(blocks):
       
   287         if (blocks[i]['type'] == blocks[i - 1]['type'] and
       
   288             blocks[i]['type'] in ('bullet', 'option', 'field')):
       
   289             i += 1
       
   290         else:
       
   291             blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
       
   292             i += 2
       
   293     return blocks
       
   294 
       
   295 def prunecomments(blocks):
       
   296     """Remove comments."""
       
   297     i = 0
       
   298     while i < len(blocks):
       
   299         b = blocks[i]
       
   300         if b['type'] == 'paragraph' and b['lines'][0].startswith('.. '):
       
   301             del blocks[i]
       
   302         else:
       
   303             i += 1
       
   304     return blocks
       
   305 
       
   306 _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
       
   307                            r"error|hint|important|note|tip|warning)::",
       
   308                            flags=re.IGNORECASE)
       
   309 
       
   310 def findadmonitions(blocks):
       
   311     """
       
   312     Makes the type of the block an admonition block if
       
   313     the first line is an admonition directive
       
   314     """
       
   315     i = 0
       
   316     while i < len(blocks):
       
   317         m = _admonitionre.match(blocks[i]['lines'][0])
       
   318         if m:
       
   319             blocks[i]['type'] = 'admonition'
       
   320             admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
       
   321 
       
   322             firstline = blocks[i]['lines'][0][m.end() + 1:]
       
   323             if firstline:
       
   324                 blocks[i]['lines'].insert(1, '   ' + firstline)
       
   325 
       
   326             blocks[i]['admonitiontitle'] = admonitiontitle
       
   327             del blocks[i]['lines'][0]
       
   328         i = i + 1
       
   329     return blocks
       
   330 
       
   331 _admonitiontitles = {'attention': _('Attention:'),
       
   332                      'caution': _('Caution:'),
       
   333                      'danger': _('!Danger!')  ,
       
   334                      'error': _('Error:'),
       
   335                      'hint': _('Hint:'),
       
   336                      'important': _('Important:'),
       
   337                      'note': _('Note:'),
       
   338                      'tip': _('Tip:'),
       
   339                      'warning': _('Warning!')}
       
   340 
       
   341 def formatblock(block, width):
       
   342     """Format a block according to width."""
       
   343     if width <= 0:
       
   344         width = 78
       
   345     indent = ' ' * block['indent']
       
   346     if block['type'] == 'admonition':
       
   347         admonition = _admonitiontitles[block['admonitiontitle']]
       
   348         hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
       
   349 
       
   350         defindent = indent + hang * ' '
       
   351         text = ' '.join(map(str.strip, block['lines']))
       
   352         return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,
       
   353                                            initindent=defindent,
       
   354                                            hangindent=defindent))
       
   355     if block['type'] == 'margin':
       
   356         return ''
       
   357     if block['type'] == 'literal':
       
   358         indent += '  '
       
   359         return indent + ('\n' + indent).join(block['lines'])
       
   360     if block['type'] == 'section':
       
   361         underline = encoding.colwidth(block['lines'][0]) * block['underline']
       
   362         return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
       
   363     if block['type'] == 'definition':
       
   364         term = indent + block['lines'][0]
       
   365         hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
       
   366         defindent = indent + hang * ' '
       
   367         text = ' '.join(map(str.strip, block['lines'][1:]))
       
   368         return '%s\n%s' % (term, util.wrap(text, width=width,
       
   369                                            initindent=defindent,
       
   370                                            hangindent=defindent))
       
   371     subindent = indent
       
   372     if block['type'] == 'bullet':
       
   373         if block['lines'][0].startswith('| '):
       
   374             # Remove bullet for line blocks and add no extra
       
   375             # indention.
       
   376             block['lines'][0] = block['lines'][0][2:]
       
   377         else:
       
   378             m = _bulletre.match(block['lines'][0])
       
   379             subindent = indent + m.end() * ' '
       
   380     elif block['type'] == 'field':
       
   381         keywidth = block['keywidth']
       
   382         key = block['key']
       
   383 
       
   384         subindent = indent + _fieldwidth * ' '
       
   385         if len(key) + 2 > _fieldwidth:
       
   386             # key too large, use full line width
       
   387             key = key.ljust(width)
       
   388         elif keywidth + 2 < _fieldwidth:
       
   389             # all keys are small, add only two spaces
       
   390             key = key.ljust(keywidth + 2)
       
   391             subindent = indent + (keywidth + 2) * ' '
       
   392         else:
       
   393             # mixed sizes, use fieldwidth for this one
       
   394             key = key.ljust(_fieldwidth)
       
   395         block['lines'][0] = key + block['lines'][0]
       
   396     elif block['type'] == 'option':
       
   397         m = _optionre.match(block['lines'][0])
       
   398         option, arg, rest = m.groups()
       
   399         subindent = indent + (len(option) + len(arg)) * ' '
       
   400 
       
   401     text = ' '.join(map(str.strip, block['lines']))
       
   402     return util.wrap(text, width=width,
       
   403                      initindent=indent,
       
   404                      hangindent=subindent)
       
   405 
       
   406 
       
   407 def format(text, width, indent=0, keep=None):
       
   408     """Parse and format the text according to width."""
       
   409     blocks = findblocks(text)
       
   410     for b in blocks:
       
   411         b['indent'] += indent
       
   412     blocks = findliteralblocks(blocks)
       
   413     blocks, pruned = prunecontainers(blocks, keep or [])
       
   414     blocks = findsections(blocks)
       
   415     blocks = inlineliterals(blocks)
       
   416     blocks = hgrole(blocks)
       
   417     blocks = splitparagraphs(blocks)
       
   418     blocks = updatefieldlists(blocks)
       
   419     blocks = prunecomments(blocks)
       
   420     blocks = addmargins(blocks)
       
   421     blocks = findadmonitions(blocks)
       
   422     text = '\n'.join(formatblock(b, width) for b in blocks)
       
   423     if keep is None:
       
   424         return text
       
   425     else:
       
   426         return text, pruned
       
   427 
       
   428 
       
   429 if __name__ == "__main__":
       
   430     from pprint import pprint
       
   431 
       
   432     def debug(func, *args):
       
   433         blocks = func(*args)
       
   434         print "*** after %s:" % func.__name__
       
   435         pprint(blocks)
       
   436         print
       
   437         return blocks
       
   438 
       
   439     text = open(sys.argv[1]).read()
       
   440     blocks = debug(findblocks, text)
       
   441     blocks = debug(findliteralblocks, blocks)
       
   442     blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])
       
   443     blocks = debug(inlineliterals, blocks)
       
   444     blocks = debug(splitparagraphs, blocks)
       
   445     blocks = debug(updatefieldlists, blocks)
       
   446     blocks = debug(findsections, blocks)
       
   447     blocks = debug(prunecomments, blocks)
       
   448     blocks = debug(addmargins, blocks)
       
   449     blocks = debug(findadmonitions, blocks)
       
   450     print '\n'.join(formatblock(b, 30) for b in blocks)