|
1 #!/usr/bin/python2.5 |
|
2 # |
|
3 # Copyright 2008 the Melange authors. |
|
4 # |
|
5 # Licensed under the Apache License, Version 2.0 (the "License"); |
|
6 # you may not use this file except in compliance with the License. |
|
7 # You may obtain a copy of the License at |
|
8 # |
|
9 # http://www.apache.org/licenses/LICENSE-2.0 |
|
10 # |
|
11 # Unless required by applicable law or agreed to in writing, software |
|
12 # distributed under the License is distributed on an "AS IS" BASIS, |
|
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
14 # See the License for the specific language governing permissions and |
|
15 # limitations under the License. |
|
16 |
|
17 # __doc__ string is slightly unconventional because it is used as usage text |
|
18 """%prog [OPTIONS] [FIND_REGEX] [REPLACE_FORMAT] |
|
19 |
|
20 Script to list, search, and modify files using Python regex patterns. |
|
21 |
|
22 OPTIONS: optional command-line flags; see %prog --help |
|
23 |
|
24 FIND_REGEX: an optional valid Python regular expression pattern; |
|
25 if supplied, only files containing at least one match will be processed; |
|
26 matching file paths will be printed; if supplied, REPLACE_FORMAT will be |
|
27 used to convert the match groups into formatted output. |
|
28 |
|
29 REPLACE_FORMAT: an optional valid Python format string; |
|
30 FIND_REGEX must be supplied first if REPLACE_FORMAT is supplied; |
|
31 positional arguments will be replaced with ordered groups from |
|
32 FIND_REGEX matches, and named arguments will be replaced with named |
|
33 groups from FIND_REGEX matches.""" |
|
34 |
|
35 __authors__ = [ |
|
36 '"Todd Larsen" <tlarsen@google.com>', |
|
37 ] |
|
38 |
|
39 |
|
40 import dircache |
|
41 import errno |
|
42 import os |
|
43 import optparse |
|
44 import re |
|
45 import sre_constants |
|
46 import sys |
|
47 |
|
48 |
|
49 class Error(Exception): |
|
50 """Base class of all exceptions in this module. |
|
51 """ |
|
52 pass |
|
53 |
|
54 |
|
55 def compileRegex(pattern): |
|
56 """Compiles a Python regex pattern into a regex object. |
|
57 |
|
58 Args: |
|
59 pattern: valid Python regex pattern string, or an already-compiled |
|
60 regex object (in which case this function is is a no-op) |
|
61 |
|
62 Returns: |
|
63 regex object compiled from pattern |
|
64 |
|
65 Raises: |
|
66 Error if pattern could not be compiled. |
|
67 """ |
|
68 try: |
|
69 return re.compile(pattern) |
|
70 except sre_constants.error, error: |
|
71 msg = 're.compile: %s\n%s' % (error.args[0], pattern) |
|
72 raise Error(errno.EINVAL, msg) |
|
73 |
|
74 |
|
75 def findAll(text_to_search, pattern): |
|
76 """Returns all matches of a regex in a string. |
|
77 |
|
78 Args: |
|
79 text_to_search: string in which to find matches |
|
80 pattern: Python regex pattern (or already-compiled regex object) |
|
81 indicating which matches to retrieve |
|
82 |
|
83 Returns: |
|
84 a (possibly empty) list of the matches found, as strings |
|
85 """ |
|
86 matches = [] |
|
87 |
|
88 def _captureMatchText(match): |
|
89 match_text = match.group() |
|
90 matches.append(match_text) |
|
91 return match_text |
|
92 |
|
93 compileRegex(pattern).sub(_captureMatchText, text_to_search) |
|
94 |
|
95 return matches |
|
96 |
|
97 |
|
98 def getFileContents(file_path): |
|
99 """Reads the contents of a file as a single string, then closes the file. |
|
100 |
|
101 Args: |
|
102 file_path: path to the file to read its contents into a string |
|
103 |
|
104 Returns: |
|
105 a single string containing the entire contents of the file |
|
106 """ |
|
107 file_to_read = open(file_path) |
|
108 file_contents = file_to_read.read() |
|
109 file_to_read.close() |
|
110 return file_contents |
|
111 |
|
112 |
|
113 def findAllInFile(file_path, pattern, *ignored_args, **ignored_kwargs): |
|
114 """Action to return a list of all pattern matches in a file. |
|
115 |
|
116 Args: |
|
117 file_path: path of file to manipulate |
|
118 pattern: see findAll() |
|
119 *ignored_args: other positional arguments which are ignored |
|
120 command-line arguments not used by this action callable |
|
121 **ignored_kwargs: other keyword arguments which are ignored |
|
122 command-line options not used by this action callable |
|
123 |
|
124 Returns: |
|
125 two-tuple of boolean indicating if any match was found and a |
|
126 (possibly empty) list of the matches found, as strings (to be used |
|
127 as printable output of the action) |
|
128 """ |
|
129 matches = findAll(getFileContents(file_path), pattern) |
|
130 |
|
131 if matches: |
|
132 found = True |
|
133 else: |
|
134 found = False |
|
135 |
|
136 return found, matches |
|
137 |
|
138 |
|
139 def replaceAll(original, pattern, format): |
|
140 """Substitutes formatted text for all matches in a string. |
|
141 |
|
142 Args: |
|
143 original: original string in which to find and replace matches |
|
144 pattern: Python regex pattern (or already-compiled regex object) |
|
145 indicating which matches to replace |
|
146 format: Python format string specifying how to format the |
|
147 replacement text; how this format string is interpreted depends |
|
148 on the contents of the pattern; if the pattern contains: |
|
149 named groups: format is expected to contain named format specifiers |
|
150 unnamed groups: format is expected to contain exactly the same |
|
151 number of unnamed format specifiers as the number of groups in |
|
152 pattern |
|
153 no groups: format is expected to contain a single format specifier |
|
154 (in which case the entire match is supplied to it), or no format |
|
155 specifier at all (in which case the "format" string simply |
|
156 replaces the match with no substitutions from the match itself) |
|
157 |
|
158 Returns: |
|
159 two-tuple of the text with all matches replaced as specified by |
|
160 pattern and format, and a list of the original matches, each followed |
|
161 by its replacement |
|
162 """ |
|
163 matches_and_replacements = [] |
|
164 |
|
165 def _replaceWithFormat(match): |
|
166 formatted_match = None |
|
167 |
|
168 if match.groupdict(): |
|
169 try: |
|
170 formatted_match = format % match.groupdict() |
|
171 except TypeError: |
|
172 pass |
|
173 |
|
174 if (not formatted_match) and match.groups(): |
|
175 try: |
|
176 formatted_match = format % match.groups() |
|
177 except TypeError: |
|
178 pass |
|
179 |
|
180 if (not formatted_match): |
|
181 try: |
|
182 formatted_match = format % match.group() |
|
183 except TypeError: |
|
184 formatted_match = format |
|
185 |
|
186 matches_and_replacements.append(match.group()) |
|
187 matches_and_replacements.append(formatted_match) |
|
188 return formatted_match |
|
189 |
|
190 replaced = compileRegex(pattern).sub(_replaceWithFormat, original) |
|
191 |
|
192 return replaced, matches_and_replacements |
|
193 |
|
194 |
|
195 def writeAltFileIfExt(path, ext, contents): |
|
196 """Writes a file if path and additional extension are supplied. |
|
197 |
|
198 If path or ext are not supplied, no file is written. |
|
199 |
|
200 Args: |
|
201 path: path of file to be written, to which ext will be appended |
|
202 ext: additional file extension that will be appended to path |
|
203 contents: contents of file to be written, as a string |
|
204 """ |
|
205 if (not path) or (not ext): |
|
206 return |
|
207 |
|
208 if ext.startswith('.'): |
|
209 ext = ext[1:] |
|
210 |
|
211 alt_path = '%s.%s' % (path, ext) |
|
212 alt_file = open(alt_path, 'w') |
|
213 alt_file.write(contents) |
|
214 alt_file.close() |
|
215 |
|
216 |
|
217 def replaceAllInFile(file_path, pattern, format, |
|
218 new_ext=None, backup_ext=None, |
|
219 overwrite_files=False, |
|
220 *ignored_args, **ignored_kwargs): |
|
221 """Substitutes formatted text for all matches in a file. |
|
222 |
|
223 Args: |
|
224 file_path: path of file to manipulate |
|
225 pattern, format: see replaceAll() |
|
226 *ignored_args: other positional arguments which are ignored |
|
227 command-line arguments not used by this action callable |
|
228 **ignored_kwargs: other keyword arguments which are ignored |
|
229 command-line options not used by this action callable |
|
230 |
|
231 Returns: |
|
232 two-tuple of boolean indicating if any match was found and a |
|
233 list of printable output text lines containing pairs of original |
|
234 pattern matches each followed by the formatted replacement |
|
235 """ |
|
236 original = getFileContents(file_path) |
|
237 |
|
238 replaced, matches_and_replacements = replaceAll( |
|
239 original, pattern, format) |
|
240 |
|
241 if matches_and_replacements: |
|
242 found = True |
|
243 writeAltFileIfExt(file_path, new_ext, replaced) |
|
244 writeAltFileIfExt(file_path, backup_ext, original) |
|
245 |
|
246 if overwrite_files: |
|
247 if replaced != original: |
|
248 replaced_file = open(file_path, 'w') |
|
249 replaced_file.write(replaced) |
|
250 replaced_file.close() |
|
251 else: |
|
252 found = False |
|
253 |
|
254 return found, matches_and_replacements |
|
255 |
|
256 |
|
257 def listFile(*ignored_args, **ignored_kwargs): |
|
258 """No-op action callable that ignores arguments and returns (True, []). |
|
259 """ |
|
260 return True, [] # match only based on file names, which was done by caller |
|
261 |
|
262 |
|
263 def applyActionToFiles(action, action_args, |
|
264 start_path='', abs_path=False, files_pattern='', |
|
265 recurse_dirs=False, dirs_pattern='', |
|
266 follow_symlinks=False, quiet_output=False, |
|
267 hide_paths=False, **action_options): |
|
268 """Applies a callable action to files, based on options and arguments. |
|
269 |
|
270 Args: |
|
271 action: callable that expects a file path argument, positional arguments |
|
272 (action_args), and keyword options from the command-line options dict; |
|
273 and returns a "matched" boolean and a list of output strings |
|
274 action_args: list of positional arguments, if any; passed to action |
|
275 callable unchanged |
|
276 start_path: required path of initial directory to visit |
|
277 abs_path: optional boolean indicating to use absolute paths |
|
278 files_pattern: required Python regex (object or pattern) which selects |
|
279 which files to pass to the action callable |
|
280 recurse_dirs: boolean indicating if subdirectories should be traversed |
|
281 dirs_pattern: Python regex (object or pattern) which selects which |
|
282 subdirectories to traverse if recurse_dirs is True |
|
283 follow_symlinks: boolean indicating if symlinks should be traversed |
|
284 quiet_output: optional boolean indicating if output should be suppressed |
|
285 hide_paths: optional boolean indicating to omit file paths from output |
|
286 **action_options: remaining keyword arguments that are passed unchanged |
|
287 to the action callable |
|
288 |
|
289 Returns: |
|
290 two-tuple containing an exit code and a (possibly empty) list of |
|
291 output strings |
|
292 |
|
293 Raises: |
|
294 Error exception if problems occur (file I/O, invalid regex, etc.). |
|
295 """ |
|
296 exit_code = errno.ENOENT |
|
297 output = [] |
|
298 |
|
299 start_path = os.path.expandvars(os.path.expanduser(start_path)) |
|
300 |
|
301 if abs_path: |
|
302 start_path = os.path.abspath(start_path) |
|
303 |
|
304 paths = [start_path] |
|
305 |
|
306 files_regex = compileRegex(files_pattern) |
|
307 |
|
308 if recurse_dirs: |
|
309 dirs_regex = compileRegex(dirs_pattern) |
|
310 |
|
311 while paths: |
|
312 sub_paths = [] |
|
313 |
|
314 for path in paths: |
|
315 # expand iterator into an actual list and sort it |
|
316 try: |
|
317 items = dircache.listdir(path)[:] |
|
318 except (IOError, OSError), error: |
|
319 raise Error(error.args[0], '%s: %s' % ( |
|
320 error.__class__.__name__, error.args[1])) |
|
321 |
|
322 items.sort() |
|
323 |
|
324 for item in items: |
|
325 item_path = os.path.join(path, item) |
|
326 |
|
327 if os.path.islink(item_path): |
|
328 if not follow_symlinks: |
|
329 continue # do not follow symlinks (ignore them) |
|
330 |
|
331 if os.path.isdir(item_path): |
|
332 if recurse_dirs: |
|
333 if dirs_regex.match(item): |
|
334 sub_paths.append(item_path) |
|
335 continue |
|
336 |
|
337 if files_regex.match(item): |
|
338 try: |
|
339 matched, found_output = action(item_path, *action_args, |
|
340 **action_options) |
|
341 except (IOError, OSError), error: |
|
342 raise Error(error.args[0], '%s: %s' % ( |
|
343 error.__class__.__name__, error.args[1])) |
|
344 |
|
345 if matched: |
|
346 exit_code = 0 # at least one matched file has now been found |
|
347 |
|
348 if (not quiet_output) and (not hide_paths): |
|
349 output.append(item_path) |
|
350 |
|
351 if not quiet_output: |
|
352 output.extend(found_output) |
|
353 |
|
354 paths = sub_paths |
|
355 |
|
356 return exit_code, output |
|
357 |
|
358 |
|
359 class _ErrorOptionParser(optparse.OptionParser): |
|
360 """Customized optparse.OptionParser that does not call sys.exit(). |
|
361 """ |
|
362 |
|
363 def error(self, msg): |
|
364 """Raises an Error exception, instead of calling sys.exit(). |
|
365 """ |
|
366 raise Error(errno.EINVAL, msg) |
|
367 |
|
368 |
|
369 def _buildParser(): |
|
370 """Returns a custom OptionParser for parsing command-line arguments. |
|
371 """ |
|
372 parser = _ErrorOptionParser(__doc__) |
|
373 |
|
374 filter_group = optparse.OptionGroup(parser, |
|
375 'File Options', |
|
376 'Options used to select which files to process.') |
|
377 |
|
378 filter_group.add_option( |
|
379 '-f', '--files', dest='files_pattern', default='^.*$', |
|
380 metavar='FILES_REGEX', |
|
381 help=('Python regex pattern (*not* a glob!) defining files to process' |
|
382 ' in each directory [default: %default]')) |
|
383 |
|
384 filter_group.add_option( |
|
385 '-F', '--follow', dest='follow_symlinks', default=False, |
|
386 action='store_true', |
|
387 help=('follow file and subdirectory symlinks (possibly *DANGEROUS*)' |
|
388 ' [default: %default]')) |
|
389 |
|
390 parser.add_option_group(filter_group) |
|
391 |
|
392 dir_group = optparse.OptionGroup(parser, |
|
393 'Directory Options', |
|
394 'Options used to indicate which directories to traverse.') |
|
395 |
|
396 dir_group.add_option( |
|
397 '-s', '--start', dest='start_path', default=os.curdir, metavar='PATH', |
|
398 help='directory in which to start processing files [default: %default]') |
|
399 |
|
400 dir_group.add_option( |
|
401 '-R', '--recursive', dest='recurse_dirs', default=False, |
|
402 action='store_true', |
|
403 help='recurse into subdirectories [default: %default]') |
|
404 |
|
405 dir_group.add_option( |
|
406 '-d', '--dirs', dest='dirs_pattern', default='^.*$', |
|
407 metavar='SUBDIRS_REGEX', |
|
408 help=('Python regex pattern (*not* a glob!) defining subdirectories to' |
|
409 ' recurse into (if --recursive) [default: %default]')) |
|
410 |
|
411 parser.add_option_group(dir_group) |
|
412 |
|
413 output_group = optparse.OptionGroup(parser, |
|
414 'Output Options', |
|
415 'Options used to control program output.') |
|
416 |
|
417 output_group.add_option( |
|
418 '-a', '--abspath', dest='abs_path', default=False, action='store_true', |
|
419 help=('output absolute paths instead of relative paths' |
|
420 ' [default: %default]')) |
|
421 |
|
422 output_group.add_option( |
|
423 '-p', '--nopaths', dest='hide_paths', default=False, action='store_true', |
|
424 help=('suppress printing of file path names for successfully matched' |
|
425 ' files to stdout [default: %default]')) |
|
426 |
|
427 output_group.add_option( |
|
428 '-q', '--quiet', dest='quiet_output', default=False, action='store_true', |
|
429 help=('suppress *all* printed output to stdout (but still perform' |
|
430 ' replacements if specified) [default: %default]')) |
|
431 |
|
432 parser.add_option_group(output_group) |
|
433 |
|
434 replace_group = optparse.OptionGroup(parser, |
|
435 'Replace Options', |
|
436 'Options applied when matches in files are replaced with substitutions.' |
|
437 ' (Only possible if REPLACE_FORMAT is supplied.)') |
|
438 |
|
439 replace_group.add_option( |
|
440 '-o', '--overwrite', dest='overwrite_files', default=False, |
|
441 action='store_true', |
|
442 help=('overwrite original files with formatted text substituted for' |
|
443 ' matches [default: %default]')) |
|
444 |
|
445 replace_group.add_option( |
|
446 '-b', '--backup', dest='backup_ext', default='', metavar='EXTENSION', |
|
447 help=('if supplied, and file would be overwritten, backup original' |
|
448 ' file with the supplied extension [default is no backups of' |
|
449 ' overwritten files are kept]')) |
|
450 |
|
451 replace_group.add_option( |
|
452 '-n', '--new', dest='new_ext', default='', metavar='EXTENSION', |
|
453 help=('if supplied, and file has matches and and is altered by' |
|
454 ' substitutions, create a new file with the supplied extension' |
|
455 ' [default is no new file is created]')) |
|
456 |
|
457 parser.add_option_group(replace_group) |
|
458 |
|
459 return parser |
|
460 |
|
461 |
|
462 def _parseArgs(cmd_line_args): |
|
463 """Builds a command-line option parser and parses command-line arguments. |
|
464 |
|
465 Args: |
|
466 cmd_line_args: command-line arguments, excluding the argv[0] program name |
|
467 |
|
468 Returns: |
|
469 four-tuple of action callable, supplied command-line options (including |
|
470 those defined by defaults in the command-line parser) as a dict, |
|
471 remaining positional command-line arguments, and the parser itself |
|
472 |
|
473 Raises: |
|
474 Error if problems occurred during commmand-line argument parsing. |
|
475 """ |
|
476 parser = _buildParser() |
|
477 options, args = parser.parse_args(args=cmd_line_args) |
|
478 |
|
479 if not args: |
|
480 # no FIND_REGEX or REPLACE_PATTERN supplied, so just match based |
|
481 # on file name and subdirectory name patterns |
|
482 action = listFile |
|
483 elif len(args) == 1: |
|
484 # FIND_REGEX supplied, but not REPLACE_PATTERN, so just match based |
|
485 # on file name and subdirectory name patterns, and then on file |
|
486 # contents |
|
487 action = findAllInFile |
|
488 elif len(args) == 2: |
|
489 # FIND_REGEX and REPLACE_PATTERN both supplied, so match based |
|
490 # on file name and subdirectory name patterns, and then do a find and |
|
491 # replace on file contents |
|
492 action = replaceAllInFile |
|
493 else: |
|
494 raise Error(errno.EINVAL,'too many (%d) arguments supplied:\n%s' % ( |
|
495 len(args), ' '.join(args))) |
|
496 |
|
497 return action, vars(options), args, parser |
|
498 |
|
499 |
|
500 def _main(argv): |
|
501 """Wrapper that catches exceptions, prints output, and returns exit status. |
|
502 |
|
503 Normal program output is printed to stdout. Error output (including |
|
504 exception text) is printed to stderr. |
|
505 |
|
506 Args: |
|
507 argv: script arguments, usually sys.argv; argv[0] is expected to be the |
|
508 program name |
|
509 |
|
510 Returns: |
|
511 exit code suitable for sys.exit() |
|
512 """ |
|
513 options = {} # empty options, used if _parseArgs() fails |
|
514 |
|
515 try: |
|
516 action, options, args, parser = _parseArgs(argv[1:]) |
|
517 exit_code, output = applyActionToFiles(action, args, **options) |
|
518 |
|
519 if output: print '\n'.join(output) |
|
520 |
|
521 except Error, error: |
|
522 if not options.get('quiet_output'): |
|
523 print >>sys.stderr, '\nERROR: (%s: %s) %s\n' % ( |
|
524 error.args[0], os.strerror(error.args[0]), error.args[1]) |
|
525 print >>sys.stderr, parser.get_usage() |
|
526 |
|
527 exit_code = error.args[0] |
|
528 |
|
529 return exit_code |
|
530 |
|
531 |
|
532 if __name__ == '__main__': |
|
533 sys.exit(_main(sys.argv)) |