hash - Finding md5 of files recursively in directory in python -
i want find md5sum of files starting "10" ( exe, doc, pdf etc) hence not checking file extension start 2 digits. far i've script traverse through directory , print out such files couldn't checksum printed each of them:
def print_files(file_directory, file_extensions=['10']): ''' print files in file_directory extensions in file_extensions, recursively. ''' # absolute path of file_directory parameter file_directory = os.path.abspath(file_directory) # list of files in file_directory file_directory_files = os.listdir(file_directory) # traverse through files filename in file_directory_files: filepath = os.path.join(file_directory, filename) # check if it's normal file or directory if os.path.isfile(filepath): # check if file has extension of typical video files file_extension in file_extensions: # not reqd file, ignore #if not filepath.endswith(file_extension): if not filename.startswith(file_extension) or len(filename) != 19: continue # have got '10' file! print_files.counter += 1 ## trying read , print md5 using hashlib/ doesnt work### hasher = hashlib.md5() open(filename, 'rb') afile: buf = afile.read(65536) while len(buf) > 0: hasher.update(buf) buf = afile.read(65536) # print it's name print('{0}'.format(filepath)) print hasher('{0}.format(filepath)').hexdigest() print '\n' elif os.path.isdir(filepath): # got directory, enter further processing print_files(filepath) if __name__ == '__main__': # directory argument supplied if len(sys.argv) == 2: if os.path.isdir(sys.argv[1]): file_directory = sys.argv[1] else: print('error: "{0}" not directory.'.format(sys.argv[1])) exit(1) else: # set file directory cwd file_directory = os.getcwd() print('\n -- looking required files in "{0}" -- \n'.format(file_directory)) # set number of processed files equal 0 print_files.counter = 0 # start processing print_files(file_directory) # done. exit now.
'
i'd recommend not solve recursively, instead make use of os.walk()
traverse directory structure. following code body of print_files
function.
file_directory = os.path.abspath(file_directory) paths_to_hash = [] root, dirs, filenames in os.walk(file_directory, topdown=false): i, dir in enumerate(dirs): filename in filenames[i]: if filenames[:2] == '10': paths_to_hash += [os.path.abspath('{0}/{1}/{2}'.format(root, dir, filename)] path in paths_to_hash: hash = hashlib.md5(open(path, 'rb').read()).digest()) print 'hash: {0} path: {1}'.format(hash, path)
Comments
Post a Comment