1 #!/usr/bin/env python 2 # encoding: utf-8 3 4 __version__ = "$Id:$" 5 # $HeadURL $ 6 7 import os, re, shutil, sys, tempfile 8 9 def scrub_urls(text_seq): 10 """A generator that deletes URL passwords from a string sequence. 11 12 This generator removes user/password data from URLs if embedded 13 in the latter as follows: scheme://user:passwd@netloc/path. 14 15 :param text_seq: A sequence of strings (that may contain URLs). 16 :return: A (scrubbed) line stripped of authentication credentials. 17 """ 18 # This regular expression will be used to remove authentication 19 # credentials from URLs. 20 password_re = re.compile('://([^:]+:[^@]+@)(\S+)') 21 22 for line in text_seq: 23 scrubbed_line = password_re.sub(r'://\2', line) 24 yield scrubbed_line 25 26 def scrub_file(tmp_dir, log_path): 27 if not os.access(tmp_dir, os.F_OK): 28 os.mkdir(tmp_dir) 29 30 # Move the original file out of the way. 31 fd, tmp_path = tempfile.mkstemp(dir=tmp_dir) 32 shutil.move(log_path, tmp_path) 33 34 # Open the unsanitized log file for reading. 35 original_file = open(tmp_path) 36 37 # Open the file that will hold the resulting, scrubbed log 38 # content for writing. 39 clean_file = None 40 41 try: 42 clean_file = open(log_path, 'w') 43 44 # Scrub the log file line by line. 45 clean_content_iter = scrub_urls(original_file) 46 for line in clean_content_iter: 47 clean_file.write(line) 48 finally: 49 # We're done with scrubbing, close the file handles. 50 original_file.close() 51 if clean_file is not None: 52 clean_file.close() 53 54 return tmp_path 55 56 if __name__ == '__main__': 57 print scrub_file("tmp", sys.argv[1])