1 #!/usr/bin/env python
  2 # encoding: utf-8
  3 
  4 __version__ = "$Id:$"
  5 # $HeadURL $
  6 
  7 import os, re, shutil, sys, tempfile
  8 
  9 def scrub_urls(text_seq):
 10     """A generator that deletes URL passwords from a string sequence.
 11 
 12     This generator removes user/password data from URLs if embedded
 13     in the latter as follows: scheme://user:passwd@netloc/path. 
 14 
 15     :param text_seq: A sequence of strings (that may contain URLs).
 16     :return: A (scrubbed) line stripped of authentication credentials.
 17     """
 18     # This regular expression will be used to remove authentication
 19     # credentials from URLs.
 20     password_re = re.compile('://([^:]+:[^@]+@)(\S+)')
 21 
 22     for line in text_seq:
 23         scrubbed_line = password_re.sub(r'://\2', line)
 24         yield scrubbed_line
 25 
 26 def scrub_file(tmp_dir, log_path):
 27     if not os.access(tmp_dir, os.F_OK):
 28         os.mkdir(tmp_dir)
 29 
 30     # Move the original file out of the way.
 31     fd, tmp_path = tempfile.mkstemp(dir=tmp_dir)
 32     shutil.move(log_path, tmp_path)
 33 
 34     # Open the unsanitized log file for reading.
 35     original_file = open(tmp_path)
 36 
 37     # Open the file that will hold the resulting, scrubbed log
 38     # content for writing.
 39     clean_file = None
 40 
 41     try:
 42         clean_file = open(log_path, 'w')
 43 
 44         # Scrub the log file line by line.
 45         clean_content_iter = scrub_urls(original_file)
 46         for line in clean_content_iter:
 47             clean_file.write(line)
 48     finally:
 49         # We're done with scrubbing, close the file handles.
 50         original_file.close()
 51         if clean_file is not None:
 52             clean_file.close()
 53 
 54     return tmp_path
 55 
 56 if __name__ == '__main__':
 57     print scrub_file("tmp", sys.argv[1])