--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/munge.py Fri Nov 14 06:36:42 2008 +0000
@@ -0,0 +1,533 @@
+#!/usr/bin/python2.5
+#
+# Copyright 2008 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# __doc__ string is slightly unconventional because it is used as usage text
+"""%prog [OPTIONS] [FIND_REGEX] [REPLACE_FORMAT]
+
+Script to list, search, and modify files using Python regex patterns.
+
+OPTIONS: optional command-line flags; see %prog --help
+
+FIND_REGEX: an optional valid Python regular expression pattern;
+ if supplied, only files containing at least one match will be processed;
+ matching file paths will be printed; if supplied, REPLACE_FORMAT will be
+ used to convert the match groups into formatted output.
+
+REPLACE_FORMAT: an optional valid Python format string;
+ FIND_REGEX must be supplied first if REPLACE_FORMAT is supplied;
+ positional arguments will be replaced with ordered groups from
+ FIND_REGEX matches, and named arguments will be replaced with named
+ groups from FIND_REGEX matches."""
+
+__authors__ = [
+ '"Todd Larsen" <tlarsen@google.com>',
+]
+
+
+import dircache
+import errno
+import os
+import optparse
+import re
+import sre_constants
+import sys
+
+
+class Error(Exception):
+ """Base class of all exceptions in this module.
+ """
+ pass
+
+
+def compileRegex(pattern):
+ """Compiles a Python regex pattern into a regex object.
+
+ Args:
+ pattern: valid Python regex pattern string, or an already-compiled
+ regex object (in which case this function is is a no-op)
+
+ Returns:
+ regex object compiled from pattern
+
+ Raises:
+ Error if pattern could not be compiled.
+ """
+ try:
+ return re.compile(pattern)
+ except sre_constants.error, error:
+ msg = 're.compile: %s\n%s' % (error.args[0], pattern)
+ raise Error(errno.EINVAL, msg)
+
+
+def findAll(text_to_search, pattern):
+ """Returns all matches of a regex in a string.
+
+ Args:
+ text_to_search: string in which to find matches
+ pattern: Python regex pattern (or already-compiled regex object)
+ indicating which matches to retrieve
+
+ Returns:
+ a (possibly empty) list of the matches found, as strings
+ """
+ matches = []
+
+ def _captureMatchText(match):
+ match_text = match.group()
+ matches.append(match_text)
+ return match_text
+
+ compileRegex(pattern).sub(_captureMatchText, text_to_search)
+
+ return matches
+
+
+def getFileContents(file_path):
+ """Reads the contents of a file as a single string, then closes the file.
+
+ Args:
+ file_path: path to the file to read its contents into a string
+
+ Returns:
+ a single string containing the entire contents of the file
+ """
+ file_to_read = open(file_path)
+ file_contents = file_to_read.read()
+ file_to_read.close()
+ return file_contents
+
+
+def findAllInFile(file_path, pattern, *ignored_args, **ignored_kwargs):
+ """Action to return a list of all pattern matches in a file.
+
+ Args:
+ file_path: path of file to manipulate
+ pattern: see findAll()
+ *ignored_args: other positional arguments which are ignored
+ command-line arguments not used by this action callable
+ **ignored_kwargs: other keyword arguments which are ignored
+ command-line options not used by this action callable
+
+ Returns:
+ two-tuple of boolean indicating if any match was found and a
+ (possibly empty) list of the matches found, as strings (to be used
+ as printable output of the action)
+ """
+ matches = findAll(getFileContents(file_path), pattern)
+
+ if matches:
+ found = True
+ else:
+ found = False
+
+ return found, matches
+
+
+def replaceAll(original, pattern, format):
+ """Substitutes formatted text for all matches in a string.
+
+ Args:
+ original: original string in which to find and replace matches
+ pattern: Python regex pattern (or already-compiled regex object)
+ indicating which matches to replace
+ format: Python format string specifying how to format the
+ replacement text; how this format string is interpreted depends
+ on the contents of the pattern; if the pattern contains:
+ named groups: format is expected to contain named format specifiers
+ unnamed groups: format is expected to contain exactly the same
+ number of unnamed format specifiers as the number of groups in
+ pattern
+ no groups: format is expected to contain a single format specifier
+ (in which case the entire match is supplied to it), or no format
+ specifier at all (in which case the "format" string simply
+ replaces the match with no substitutions from the match itself)
+
+ Returns:
+ two-tuple of the text with all matches replaced as specified by
+ pattern and format, and a list of the original matches, each followed
+ by its replacement
+ """
+ matches_and_replacements = []
+
+ def _replaceWithFormat(match):
+ formatted_match = None
+
+ if match.groupdict():
+ try:
+ formatted_match = format % match.groupdict()
+ except TypeError:
+ pass
+
+ if (not formatted_match) and match.groups():
+ try:
+ formatted_match = format % match.groups()
+ except TypeError:
+ pass
+
+ if (not formatted_match):
+ try:
+ formatted_match = format % match.group()
+ except TypeError:
+ formatted_match = format
+
+ matches_and_replacements.append(match.group())
+ matches_and_replacements.append(formatted_match)
+ return formatted_match
+
+ replaced = compileRegex(pattern).sub(_replaceWithFormat, original)
+
+ return replaced, matches_and_replacements
+
+
+def writeAltFileIfExt(path, ext, contents):
+ """Writes a file if path and additional extension are supplied.
+
+ If path or ext are not supplied, no file is written.
+
+ Args:
+ path: path of file to be written, to which ext will be appended
+ ext: additional file extension that will be appended to path
+ contents: contents of file to be written, as a string
+ """
+ if (not path) or (not ext):
+ return
+
+ if ext.startswith('.'):
+ ext = ext[1:]
+
+ alt_path = '%s.%s' % (path, ext)
+ alt_file = open(alt_path, 'w')
+ alt_file.write(contents)
+ alt_file.close()
+
+
+def replaceAllInFile(file_path, pattern, format,
+ new_ext=None, backup_ext=None,
+ overwrite_files=False,
+ *ignored_args, **ignored_kwargs):
+ """Substitutes formatted text for all matches in a file.
+
+ Args:
+ file_path: path of file to manipulate
+ pattern, format: see replaceAll()
+ *ignored_args: other positional arguments which are ignored
+ command-line arguments not used by this action callable
+ **ignored_kwargs: other keyword arguments which are ignored
+ command-line options not used by this action callable
+
+ Returns:
+ two-tuple of boolean indicating if any match was found and a
+ list of printable output text lines containing pairs of original
+ pattern matches each followed by the formatted replacement
+ """
+ original = getFileContents(file_path)
+
+ replaced, matches_and_replacements = replaceAll(
+ original, pattern, format)
+
+ if matches_and_replacements:
+ found = True
+ writeAltFileIfExt(file_path, new_ext, replaced)
+ writeAltFileIfExt(file_path, backup_ext, original)
+
+ if overwrite_files:
+ if replaced != original:
+ replaced_file = open(file_path, 'w')
+ replaced_file.write(replaced)
+ replaced_file.close()
+ else:
+ found = False
+
+ return found, matches_and_replacements
+
+
+def listFile(*ignored_args, **ignored_kwargs):
+ """No-op action callable that ignores arguments and returns (True, []).
+ """
+ return True, [] # match only based on file names, which was done by caller
+
+
+def applyActionToFiles(action, action_args,
+ start_path='', abs_path=False, files_pattern='',
+ recurse_dirs=False, dirs_pattern='',
+ follow_symlinks=False, quiet_output=False,
+ hide_paths=False, **action_options):
+ """Applies a callable action to files, based on options and arguments.
+
+ Args:
+ action: callable that expects a file path argument, positional arguments
+ (action_args), and keyword options from the command-line options dict;
+ and returns a "matched" boolean and a list of output strings
+ action_args: list of positional arguments, if any; passed to action
+ callable unchanged
+ start_path: required path of initial directory to visit
+ abs_path: optional boolean indicating to use absolute paths
+ files_pattern: required Python regex (object or pattern) which selects
+ which files to pass to the action callable
+ recurse_dirs: boolean indicating if subdirectories should be traversed
+ dirs_pattern: Python regex (object or pattern) which selects which
+ subdirectories to traverse if recurse_dirs is True
+ follow_symlinks: boolean indicating if symlinks should be traversed
+ quiet_output: optional boolean indicating if output should be suppressed
+ hide_paths: optional boolean indicating to omit file paths from output
+ **action_options: remaining keyword arguments that are passed unchanged
+ to the action callable
+
+ Returns:
+ two-tuple containing an exit code and a (possibly empty) list of
+ output strings
+
+ Raises:
+ Error exception if problems occur (file I/O, invalid regex, etc.).
+ """
+ exit_code = errno.ENOENT
+ output = []
+
+ start_path = os.path.expandvars(os.path.expanduser(start_path))
+
+ if abs_path:
+ start_path = os.path.abspath(start_path)
+
+ paths = [start_path]
+
+ files_regex = compileRegex(files_pattern)
+
+ if recurse_dirs:
+ dirs_regex = compileRegex(dirs_pattern)
+
+ while paths:
+ sub_paths = []
+
+ for path in paths:
+ # expand iterator into an actual list and sort it
+ try:
+ items = dircache.listdir(path)[:]
+ except (IOError, OSError), error:
+ raise Error(error.args[0], '%s: %s' % (
+ error.__class__.__name__, error.args[1]))
+
+ items.sort()
+
+ for item in items:
+ item_path = os.path.join(path, item)
+
+ if os.path.islink(item_path):
+ if not follow_symlinks:
+ continue # do not follow symlinks (ignore them)
+
+ if os.path.isdir(item_path):
+ if recurse_dirs:
+ if dirs_regex.match(item):
+ sub_paths.append(item_path)
+ continue
+
+ if files_regex.match(item):
+ try:
+ matched, found_output = action(item_path, *action_args,
+ **action_options)
+ except (IOError, OSError), error:
+ raise Error(error.args[0], '%s: %s' % (
+ error.__class__.__name__, error.args[1]))
+
+ if matched:
+ exit_code = 0 # at least one matched file has now been found
+
+ if (not quiet_output) and (not hide_paths):
+ output.append(item_path)
+
+ if not quiet_output:
+ output.extend(found_output)
+
+ paths = sub_paths
+
+ return exit_code, output
+
+
+class _ErrorOptionParser(optparse.OptionParser):
+ """Customized optparse.OptionParser that does not call sys.exit().
+ """
+
+ def error(self, msg):
+ """Raises an Error exception, instead of calling sys.exit().
+ """
+ raise Error(errno.EINVAL, msg)
+
+
+def _buildParser():
+ """Returns a custom OptionParser for parsing command-line arguments.
+ """
+ parser = _ErrorOptionParser(__doc__)
+
+ filter_group = optparse.OptionGroup(parser,
+ 'File Options',
+ 'Options used to select which files to process.')
+
+ filter_group.add_option(
+ '-f', '--files', dest='files_pattern', default='^.*$',
+ metavar='FILES_REGEX',
+ help=('Python regex pattern (*not* a glob!) defining files to process'
+ ' in each directory [default: %default]'))
+
+ filter_group.add_option(
+ '-F', '--follow', dest='follow_symlinks', default=False,
+ action='store_true',
+ help=('follow file and subdirectory symlinks (possibly *DANGEROUS*)'
+ ' [default: %default]'))
+
+ parser.add_option_group(filter_group)
+
+ dir_group = optparse.OptionGroup(parser,
+ 'Directory Options',
+ 'Options used to indicate which directories to traverse.')
+
+ dir_group.add_option(
+ '-s', '--start', dest='start_path', default=os.curdir, metavar='PATH',
+ help='directory in which to start processing files [default: %default]')
+
+ dir_group.add_option(
+ '-R', '--recursive', dest='recurse_dirs', default=False,
+ action='store_true',
+ help='recurse into subdirectories [default: %default]')
+
+ dir_group.add_option(
+ '-d', '--dirs', dest='dirs_pattern', default='^.*$',
+ metavar='SUBDIRS_REGEX',
+ help=('Python regex pattern (*not* a glob!) defining subdirectories to'
+ ' recurse into (if --recursive) [default: %default]'))
+
+ parser.add_option_group(dir_group)
+
+ output_group = optparse.OptionGroup(parser,
+ 'Output Options',
+ 'Options used to control program output.')
+
+ output_group.add_option(
+ '-a', '--abspath', dest='abs_path', default=False, action='store_true',
+ help=('output absolute paths instead of relative paths'
+ ' [default: %default]'))
+
+ output_group.add_option(
+ '-p', '--nopaths', dest='hide_paths', default=False, action='store_true',
+ help=('suppress printing of file path names for successfully matched'
+ ' files to stdout [default: %default]'))
+
+ output_group.add_option(
+ '-q', '--quiet', dest='quiet_output', default=False, action='store_true',
+ help=('suppress *all* printed output to stdout (but still perform'
+ ' replacements if specified) [default: %default]'))
+
+ parser.add_option_group(output_group)
+
+ replace_group = optparse.OptionGroup(parser,
+ 'Replace Options',
+ 'Options applied when matches in files are replaced with substitutions.'
+ ' (Only possible if REPLACE_FORMAT is supplied.)')
+
+ replace_group.add_option(
+ '-o', '--overwrite', dest='overwrite_files', default=False,
+ action='store_true',
+ help=('overwrite original files with formatted text substituted for'
+ ' matches [default: %default]'))
+
+ replace_group.add_option(
+ '-b', '--backup', dest='backup_ext', default='', metavar='EXTENSION',
+ help=('if supplied, and file would be overwritten, backup original'
+ ' file with the supplied extension [default is no backups of'
+ ' overwritten files are kept]'))
+
+ replace_group.add_option(
+ '-n', '--new', dest='new_ext', default='', metavar='EXTENSION',
+ help=('if supplied, and file has matches and and is altered by'
+ ' substitutions, create a new file with the supplied extension'
+ ' [default is no new file is created]'))
+
+ parser.add_option_group(replace_group)
+
+ return parser
+
+
+def _parseArgs(cmd_line_args):
+ """Builds a command-line option parser and parses command-line arguments.
+
+ Args:
+ cmd_line_args: command-line arguments, excluding the argv[0] program name
+
+ Returns:
+ four-tuple of action callable, supplied command-line options (including
+ those defined by defaults in the command-line parser) as a dict,
+ remaining positional command-line arguments, and the parser itself
+
+ Raises:
+ Error if problems occurred during commmand-line argument parsing.
+ """
+ parser = _buildParser()
+ options, args = parser.parse_args(args=cmd_line_args)
+
+ if not args:
+ # no FIND_REGEX or REPLACE_PATTERN supplied, so just match based
+ # on file name and subdirectory name patterns
+ action = listFile
+ elif len(args) == 1:
+ # FIND_REGEX supplied, but not REPLACE_PATTERN, so just match based
+ # on file name and subdirectory name patterns, and then on file
+ # contents
+ action = findAllInFile
+ elif len(args) == 2:
+ # FIND_REGEX and REPLACE_PATTERN both supplied, so match based
+ # on file name and subdirectory name patterns, and then do a find and
+ # replace on file contents
+ action = replaceAllInFile
+ else:
+ raise Error(errno.EINVAL,'too many (%d) arguments supplied:\n%s' % (
+ len(args), ' '.join(args)))
+
+ return action, vars(options), args, parser
+
+
+def _main(argv):
+ """Wrapper that catches exceptions, prints output, and returns exit status.
+
+ Normal program output is printed to stdout. Error output (including
+ exception text) is printed to stderr.
+
+ Args:
+ argv: script arguments, usually sys.argv; argv[0] is expected to be the
+ program name
+
+ Returns:
+ exit code suitable for sys.exit()
+ """
+ options = {} # empty options, used if _parseArgs() fails
+
+ try:
+ action, options, args, parser = _parseArgs(argv[1:])
+ exit_code, output = applyActionToFiles(action, args, **options)
+
+ if output: print '\n'.join(output)
+
+ except Error, error:
+ if not options.get('quiet_output'):
+ print >>sys.stderr, '\nERROR: (%s: %s) %s\n' % (
+ error.args[0], os.strerror(error.args[0]), error.args[1])
+ print >>sys.stderr, parser.get_usage()
+
+ exit_code = error.args[0]
+
+ return exit_code
+
+
+if __name__ == '__main__':
+ sys.exit(_main(sys.argv))