hash - Finding md5 of files recursively in directory in python -


i want find md5sum of files starting "10" ( exe, doc, pdf etc) hence not checking file extension start 2 digits. far i've script traverse through directory , print out such files couldn't checksum printed each of them:

def print_files(file_directory, file_extensions=['10']):                           ''' print files in file_directory extensions in file_extensions, recursively. '''  # absolute path of file_directory parameter                                  file_directory = os.path.abspath(file_directory)                                        # list of files in file_directory                                                  file_directory_files = os.listdir(file_directory)                                       # traverse through files                                                             filename in file_directory_files:                                                      filepath = os.path.join(file_directory, filename)                                       # check if it's normal file or directory                                             if os.path.isfile(filepath):                                                                # check if file has extension of typical video files         file_extension in file_extensions:                                  # not reqd file, ignore                                           #if not filepath.endswith(file_extension):                          if not filename.startswith(file_extension) or len(filename) != 19:                 continue                                                                    # have got '10' file!                               print_files.counter += 1                                                         ## trying read , print md5 using hashlib/ doesnt work###             hasher = hashlib.md5()                                                         open(filename, 'rb') afile:                                               buf = afile.read(65536)                                                        while len(buf) > 0:                                                                hasher.update(buf)                                                             buf = afile.read(65536)                                                   # print it's name                                                              print('{0}'.format(filepath))                                                  print hasher('{0}.format(filepath)').hexdigest()              print '\n'                                            elif os.path.isdir(filepath):                                     # got directory, enter further processing         print_files(filepath)    if __name__ == '__main__':                                                                   # directory argument supplied                if len(sys.argv) == 2:                                                             if os.path.isdir(sys.argv[1]):                                                     file_directory = sys.argv[1]                                               else:                                                                              print('error: "{0}" not directory.'.format(sys.argv[1]))                  exit(1)                                                                else:                                                                              # set file directory cwd                     file_directory = os.getcwd()                                                print('\n -- looking required files in "{0}" --   \n'.format(file_directory))  # set number of processed files equal 0                              print_files.counter = 0                                                         # start processing                                                             print_files(file_directory)                                                     # done. exit now.    

'

i'd recommend not solve recursively, instead make use of os.walk() traverse directory structure. following code body of print_files function.

file_directory = os.path.abspath(file_directory) paths_to_hash = []  root, dirs, filenames in os.walk(file_directory, topdown=false):     i, dir in enumerate(dirs):         filename in filenames[i]:             if filenames[:2] == '10':                 paths_to_hash += [os.path.abspath('{0}/{1}/{2}'.format(root, dir, filename)]  path in paths_to_hash:     hash = hashlib.md5(open(path, 'rb').read()).digest())     print 'hash: {0} path: {1}'.format(hash, path) 

Comments

Popular posts from this blog

css - SVG using textPath a symbol not rendering in Firefox -

Java 8 + Maven Javadoc plugin: Error fetching URL -

datatable - Matlab struct computations -