1 #!/usr/bin/env python 2 # encoding: utf-8 3 4 """ 5 Copyright (c) 2007, Muharem Hrnjadovic 6 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 * Redistributions of source code must retain the above copyright notice, 14 this list of conditions and the following disclaimer. 15 * Redistributions in binary form must reproduce the above copyright 16 notice, this list of conditions and the following disclaimer in the 17 documentation and/or other materials provided with the distribution. 18 * Neither the name of Muharem Hrnjadovic nor the names of other 19 contributors may be used to endorse or promote products derived from 20 this software without specific prior written permission. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 26 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 34 --------------------------------------------------------------------------- 35 36 Module providing functions commonly used in shell scripting: 37 38 - ffind() : finds files in a directory tree 39 - ffindgrep(): finds files in a directory tree and matches their 40 content to regular expressions 41 - freplace() : in-place search/replace of files in a directory tree 42 with regular expressions 43 - printr() : prints the results of the ffind()/ffindgrep() functions 44 45 Please see the documentation strings of the particular functions for 46 detailed information. 47 """ 48 49 # Copyright: (c) 2007 Muharem Hrnjadovic 50 # created: 15/04/2007 09:31:25 51 52 __version__ = "$Id:$" 53 # $HeadURL $ 54 55 import os, sys, types, re, fnmatch, itertools 56 57 class ScriptError(Exception): pass 58 59 def ffind(path, shellglobs=None, namefs=None, relative=True): 60 """ 61 Finds files in the directory tree starting at 'path' (filtered by 62 Unix shell-style wildcards ('shellglobs') and/or the functions in 63 the 'namefs' sequence). 64 65 The parameters are as follows: 66 67 - path: starting path of the directory tree to be searched 68 - shellglobs: an optional sequence of Unix shell-style wildcards 69 that are to be applied to the file *names* found 70 - namefs: an optional sequence of functions to be applied to the 71 file *paths* found 72 - relative: a boolean flag that determines whether absolute or 73 relative paths should be returned 74 75 Please not that the shell wildcards work in a cumulative fashion 76 i.e. each of them is applied to the full set of file *names* found. 77 78 Conversely, all the functions in 'namefs' 79 * only get to see the output of their respective predecessor 80 function in the sequence (with the obvious exception of the 81 first function) 82 * are applied to the full file *path* (whereas the shell-style 83 wildcards are only applied to the file *names*) 84 85 Returns a sequence of paths for files found. 86 """ 87 if not os.access(path, os.R_OK): 88 raise ScriptError("cannot access path: '%s'" % path) 89 90 fileList = [] # result list 91 try: 92 for dir, subdirs, files in os.walk(path): 93 if shellglobs: 94 matched = [] 95 for pattern in shellglobs: 96 filterf = lambda s: fnmatch.fnmatchcase(s, pattern) 97 matched.extend(filter(filterf, files)) 98 fileList.extend(['%s%s%s' % (dir, os.sep, f) for f in matched]) 99 else: 100 fileList.extend(['%s%s%s' % (dir, os.sep, f) for f in files]) 101 if not relative: fileList = map(os.path.abspath, fileList) 102 if namefs: 103 for ff in namefs: fileList = filter(ff, fileList) 104 except Exception, e: raise ScriptError(str(e)) 105 return(fileList) 106 107 def ffindgrep(path, regexl, shellglobs=None, namefs=None, 108 relative=True, linenums=False): 109 """ 110 Finds files in the directory tree starting at 'path' (filtered by 111 Unix shell-style wildcards ('shellglobs') and/or the functions in 112 the 'namefs' sequence) and searches inside these. 113 114 The parameters are as follows: 115 116 - path: starting path of the directory tree to be searched 117 - shellglobs: an optional sequence of Unix shell-style wildcards 118 that are to be applied to the file *names* found 119 - namefs: an optional sequence of functions to be applied to the 120 file *paths* found 121 - relative: a boolean flag that determines whether absolute or 122 relative paths should be returned 123 - linenums: turns on line numbers for found files (like grep -n) 124 125 Additionaly, the file content will be filtered by the regular 126 expressions in the 'regexl' sequence. Each entry in the latter 127 is a 128 129 - either a string (with the regex definition) 130 - or a tuple with arguments accepted by re.compile() (the 131 re.M and re.S flags will have no effect though) 132 133 For all the files that pass the file name/content tests the function 134 returns a dictionary where the 135 136 - key is the file name and the 137 - value is a string with lines filtered by 'regexl' 138 """ 139 fileList = ffind(path, shellglobs=shellglobs, 140 namefs=namefs, relative=relative) 141 if not fileList: return dict() 142 143 result = dict() 144 145 try: 146 # first compile the regular expressions 147 ffuncs = [] 148 for redata in regexl: 149 if type(redata) == types.StringType: 150 ffuncs.append(re.compile(redata).search) 151 elif type(redata) == types.TupleType: 152 ffuncs.append(re.compile(*redata).search) 153 # now grep in the files found 154 for file in fileList: 155 # read file content 156 fhandle = open(file, 'r') 157 fcontent = fhandle.read() 158 fhandle.close() 159 # split file content in lines 160 if linenums: lines = zip(itertools.count(1), fcontent.splitlines()) 161 else: lines = fcontent.splitlines() 162 for ff in ffuncs: 163 if linenums: lines = filter(lambda t: ff(t[1]), lines) 164 else: lines = filter(ff, lines) 165 # there's no point in applying the remaining regular 166 # expressions if we don't have any matching lines any more 167 if not lines: break 168 else: 169 # the loop terminated normally; add this file to the 170 # result set if there are any lines that matched 171 if lines: 172 if linenums: 173 result[file] = '\n'.join(["%d:%s" % t for t in lines]) 174 else: 175 result[file] = '\n'.join(map(str, lines)) 176 except Exception, e: raise ScriptError(str(e)) 177 return(result) 178 179 def freplace(path, regexl, shellglobs=None, namefs=None, bext='.bak'): 180 """ 181 Finds files in the directory tree starting at 'path' (filtered by 182 Unix shell-style wildcards ('shellglobs') and/or the functions in 183 the 'namefs' sequence) and performs an in-place search/replace 184 operation on these. 185 186 The parameters are as follows: 187 188 - path: starting path of the directory tree to be searched 189 - shellglobs: an optional sequence of Unix shell-style wildcards 190 that are to be applied to the file *names* found 191 - namefs: an optional sequence of functions to be applied to the 192 file *paths* found 193 - relative: a boolean flag that determines whether absolute or 194 relative paths should be returned 195 196 Additionally, an in-place search/replace operation is performed 197 on the content of all the files (whose names passed the tests) 198 using the regular expressions in 'regexl'. 199 200 Please note: 'regexl' is a sequence of 3-tuples, each having the 201 following elements: 202 203 - search string (Python regex syntax) 204 - replace string (Python regex syntax) 205 - regex flags or 'None' (re.compile syntax) 206 207 Copies of the modified files are saved in backup files using the 208 extension specified in 'bext'. 209 210 The function returns the total number of files modified. 211 """ 212 fileList = ffind(path, shellglobs=shellglobs, namefs=namefs) 213 214 # return if no files found 215 if not fileList: return 0 216 217 filesChanged = 0 218 219 try: 220 cffl = [] 221 for searchs, replaces, reflags in regexl: 222 # prepare the required regex objects, check whether we need 223 # to pass any regex compilation flags 224 if reflags is not None: regex = re.compile(searchs, reflags) 225 else: regex = re.compile(searchs) 226 cffl.append((regex.subn, replaces)) 227 for file in fileList: 228 # read file content 229 fhandle = open(file, 'r') 230 text = fhandle.read() 231 fhandle.close() 232 substitutions = 0 233 # unpack the subn() function and the replace string 234 for subnfunc, replaces in cffl: 235 text, numOfChanges = subnfunc(replaces, text) 236 substitutions += numOfChanges 237 if substitutions: 238 # first move away the original file 239 bakFileName = '%s%s' % (file, bext) 240 if os.path.exists(bakFileName): os.unlink(bakFileName) 241 os.rename(file, bakFileName) 242 # now write the new file content 243 fhandle = open(file, 'w') 244 fhandle.write(text) 245 fhandle.close() 246 filesChanged += 1 247 except Exception, e: raise ScriptError(str(e)) 248 249 # return the number of files that had some of their content changed 250 return(filesChanged) 251 252 def printr(results): 253 """ 254 prints the results of ffind()/ffindgrep() in a manner similar to 255 the UNIX find utility 256 """ 257 if type(results) == types.DictType: 258 for f in sorted(results.keys()): 259 sys.stdout.write("%s\n%s\n" % (results[f],f)) 260 else: 261 for f in sorted(results): 262 sys.stdout.write("%s\n" % f) 263 264 if __name__ == '__main__': 265 pass