1 #!/usr/bin/env python
   2 # encoding: utf-8
   3 
   4 """
   5 Copyright (c) 2007, Muharem Hrnjadovic
   6 
   7 All rights reserved.
   8 
   9 Redistribution and use in source and binary forms, with or without 
  10 modification, are permitted provided that the following conditions
  11 are met:
  12 
  13     * Redistributions of source code must retain the above copyright notice,
  14       this list of conditions and the following disclaimer.
  15     * Redistributions in binary form must reproduce the above copyright
  16       notice, this list of conditions and the following disclaimer in the
  17       documentation and/or other materials provided with the distribution.
  18     * Neither the name of Muharem Hrnjadovic nor the names of other
  19       contributors may be used to endorse or promote products derived from
  20       this software without specific prior written permission.
  21 
  22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  26 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  33 
  34 ---------------------------------------------------------------------------
  35 
  36 Module providing functions commonly used in shell scripting:
  37 
  38   - ffind()    : finds files in a directory tree
  39   - ffindgrep(): finds files in a directory tree and matches their
  40                  content to regular expressions
  41   - freplace() : in-place search/replace of files in a directory tree
  42                  with regular expressions
  43   - printr()   : prints the results of the ffind()/ffindgrep() functions
  44 
  45 Please see the documentation strings of the particular functions for
  46 detailed information.
  47 """
  48 
  49 # Copyright: (c) 2007 Muharem Hrnjadovic
  50 # created: 15/04/2007 09:31:25
  51 
  52 __version__ = "$Id:$"
  53 # $HeadURL $
  54 
  55 import os, sys, types, re, fnmatch, itertools
  56 
  57 class ScriptError(Exception): pass
  58 
  59 def ffind(path, shellglobs=None, namefs=None, relative=True):
  60     """
  61     Finds files in the directory tree starting at 'path' (filtered by
  62     Unix shell-style wildcards ('shellglobs') and/or the functions in
  63     the 'namefs' sequence).
  64 
  65     The parameters are as follows:
  66 
  67     - path: starting path of the directory tree to be searched
  68     - shellglobs: an optional sequence of Unix shell-style wildcards
  69       that are to be applied to the file *names* found
  70     - namefs: an optional sequence of functions to be applied to the
  71       file *paths* found
  72     - relative: a boolean flag that determines whether absolute or
  73       relative paths should be returned
  74 
  75     Please not that the shell wildcards work in a cumulative fashion
  76     i.e. each of them is applied to the full set of file *names* found.
  77 
  78     Conversely, all the functions in 'namefs'
  79         * only get to see the output of their respective predecessor
  80           function in the sequence (with the obvious exception of the
  81           first function)
  82         * are applied to the full file *path* (whereas the shell-style
  83           wildcards are only applied to the file *names*)
  84 
  85     Returns a sequence of paths for files found.
  86     """
  87     if not os.access(path, os.R_OK):
  88         raise ScriptError("cannot access path: '%s'" % path)
  89 
  90     fileList = [] # result list
  91     try:
  92         for dir, subdirs, files in os.walk(path):
  93             if shellglobs:
  94                 matched = []
  95                 for pattern in shellglobs:
  96                     filterf = lambda s: fnmatch.fnmatchcase(s, pattern)
  97                     matched.extend(filter(filterf, files))
  98                 fileList.extend(['%s%s%s' % (dir, os.sep, f) for f in matched])
  99             else:
 100                 fileList.extend(['%s%s%s' % (dir, os.sep, f) for f in files])
 101         if not relative: fileList = map(os.path.abspath, fileList)
 102         if namefs:
 103             for ff in namefs: fileList = filter(ff, fileList)
 104     except Exception, e: raise ScriptError(str(e))
 105     return(fileList)
 106 
 107 def ffindgrep(path, regexl, shellglobs=None, namefs=None,
 108               relative=True, linenums=False):
 109     """
 110     Finds files in the directory tree starting at 'path' (filtered by
 111     Unix shell-style wildcards ('shellglobs') and/or the functions in
 112     the 'namefs' sequence) and searches inside these.
 113 
 114     The parameters are as follows:
 115 
 116     - path: starting path of the directory tree to be searched
 117     - shellglobs: an optional sequence of Unix shell-style wildcards
 118       that are to be applied to the file *names* found
 119     - namefs: an optional sequence of functions to be applied to the
 120       file *paths* found
 121     - relative: a boolean flag that determines whether absolute or
 122       relative paths should be returned
 123     - linenums: turns on line numbers for found files (like grep -n)
 124 
 125     Additionaly, the file content will be filtered by the regular
 126     expressions in the 'regexl' sequence. Each entry in the latter
 127     is a
 128     
 129       - either a string (with the regex definition)
 130       - or a tuple with arguments accepted by re.compile() (the
 131         re.M and re.S flags will have no effect though)
 132 
 133     For all the files that pass the file name/content tests the function
 134     returns a dictionary where the
 135 
 136       - key is the file name and the
 137       - value is a string with lines filtered by 'regexl'
 138     """
 139     fileList = ffind(path, shellglobs=shellglobs,
 140                      namefs=namefs, relative=relative)
 141     if not fileList: return dict()
 142 
 143     result = dict()
 144 
 145     try:
 146         # first compile the regular expressions
 147         ffuncs = []
 148         for redata in regexl:
 149             if type(redata) == types.StringType:
 150                 ffuncs.append(re.compile(redata).search)
 151             elif type(redata) == types.TupleType:
 152                 ffuncs.append(re.compile(*redata).search)
 153         # now grep in the files found
 154         for file in fileList:
 155             # read file content
 156             fhandle = open(file, 'r')
 157             fcontent = fhandle.read()
 158             fhandle.close()
 159             # split file content in lines
 160             if linenums: lines = zip(itertools.count(1), fcontent.splitlines())
 161             else: lines = fcontent.splitlines()
 162             for ff in ffuncs:
 163                 if linenums: lines = filter(lambda t: ff(t[1]), lines)
 164                 else: lines = filter(ff, lines)
 165                 # there's no point in applying the remaining regular
 166                 # expressions if we don't have any matching lines any more
 167                 if not lines: break
 168             else:
 169                 # the loop terminated normally; add this file to the
 170                 # result set if there are any lines that matched
 171                 if lines:
 172                     if linenums:
 173                         result[file] = '\n'.join(["%d:%s" % t for t in lines])
 174                     else:
 175                         result[file] = '\n'.join(map(str, lines))
 176     except Exception, e: raise ScriptError(str(e))
 177     return(result)
 178 
 179 def freplace(path, regexl, shellglobs=None, namefs=None, bext='.bak'):
 180     """
 181     Finds files in the directory tree starting at 'path' (filtered by
 182     Unix shell-style wildcards ('shellglobs') and/or the functions in
 183     the 'namefs' sequence) and performs an in-place search/replace
 184     operation on these.
 185 
 186     The parameters are as follows:
 187 
 188     - path: starting path of the directory tree to be searched
 189     - shellglobs: an optional sequence of Unix shell-style wildcards
 190       that are to be applied to the file *names* found
 191     - namefs: an optional sequence of functions to be applied to the
 192       file *paths* found
 193     - relative: a boolean flag that determines whether absolute or
 194       relative paths should be returned
 195 
 196     Additionally, an in-place search/replace operation is performed
 197     on the content of all the files (whose names passed the tests)
 198     using the regular expressions in 'regexl'.
 199 
 200     Please note: 'regexl' is a sequence of 3-tuples, each having the
 201     following elements:
 202 
 203       - search string (Python regex syntax)
 204       - replace string (Python regex syntax)
 205       - regex flags or 'None' (re.compile syntax)
 206 
 207     Copies of the modified files are saved in backup files using the
 208     extension specified in 'bext'.
 209 
 210     The function returns the total number of files modified.
 211     """
 212     fileList = ffind(path, shellglobs=shellglobs, namefs=namefs)
 213 
 214     # return if no files found
 215     if not fileList: return 0
 216 
 217     filesChanged = 0
 218 
 219     try:
 220         cffl = []
 221         for searchs, replaces, reflags in regexl:
 222             # prepare the required regex objects, check whether we need
 223             # to pass any regex compilation flags
 224             if reflags is not None: regex = re.compile(searchs, reflags)
 225             else: regex = re.compile(searchs)
 226             cffl.append((regex.subn, replaces))
 227         for file in fileList:
 228             # read file content
 229             fhandle = open(file, 'r')
 230             text = fhandle.read()
 231             fhandle.close()
 232             substitutions = 0
 233             # unpack the subn() function and the replace string
 234             for subnfunc, replaces in cffl:
 235                 text, numOfChanges = subnfunc(replaces, text)
 236                 substitutions += numOfChanges
 237             if substitutions:
 238                 # first move away the original file
 239                 bakFileName = '%s%s' % (file, bext)
 240                 if os.path.exists(bakFileName): os.unlink(bakFileName)
 241                 os.rename(file, bakFileName)
 242                 # now write the new file content
 243                 fhandle = open(file, 'w')
 244                 fhandle.write(text)
 245                 fhandle.close()
 246                 filesChanged += 1
 247     except Exception, e: raise ScriptError(str(e))
 248 
 249     # return the number of files that had some of their content changed
 250     return(filesChanged)
 251 
 252 def printr(results):
 253     """
 254     prints the results of ffind()/ffindgrep() in a manner similar to
 255     the UNIX find utility
 256     """
 257     if type(results) == types.DictType:
 258         for f in sorted(results.keys()):
 259             sys.stdout.write("%s\n%s\n" % (results[f],f))
 260     else:
 261         for f in sorted(results):
 262             sys.stdout.write("%s\n" % f)
 263 
 264 if __name__ == '__main__':
 265     pass