Coverage for encodermap/trajinfo/hash_files.py: 25%

22 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-07 11:05 +0000

1#!/usr/bin/python3 

2# -*- coding: utf-8 -*- 

3# encodermap/trajinfo/hash_files.py 

4################################################################################ 

5# Encodermap: A python library for dimensionality reduction. 

6# 

7# Copyright 2019-2022 University of Konstanz and the Authors 

8# 

9# Authors: 

10# Kevin Sawade 

11# 

12# Encodermap is free software: you can redistribute it and/or modify 

13# it under the terms of the GNU Lesser General Public License as 

14# published by the Free Software Foundation, either version 2.1 

15# of the License, or (at your option) any later version. 

16# This package is distributed in the hope that it will be useful to other 

17# researches. IT DOES NOT COME WITH ANY WARRANTY WHATSOEVER; without even the 

18# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 

19# See the GNU Lesser General Public License for more details. 

20# 

21# See <http://www.gnu.org/licenses/>. 

22################################################################################ 

23 

24import argparse 

25import glob 

26import hashlib 

27import os 

28import pprint 

29import sys 

30 

31 

32def hash_files(files): 

33 """Returns a dict of file hashes 

34 

35 Args: 

36 files (Union[str, list]) A file or a list of files. 

37 

38 Returns: 

39 dict: A nested dict, indexed by filenames and sha1 and md5 hashes. 

40 

41 """ 

42 # BUF_SIZE is totally arbitrary, change for your app! 

43 BUF_SIZE = 65536 # lets read stuff in 64kb chunks! 

44 

45 if isinstance(files, str): 

46 files = [files] 

47 

48 out = {} 

49 for file in files: 

50 md5 = hashlib.md5() 

51 sha1 = hashlib.sha1() 

52 

53 with open(file, "rb") as f: 

54 while True: 

55 data = f.read(BUF_SIZE) 

56 if not data: 

57 break 

58 md5.update(data) 

59 sha1.update(data) 

60 out[os.path.basename(file)] = {"md5": md5.hexdigest(), "sha1": sha1.hexdigest()} 

61 

62 return out 

63 

64 

65if __name__ == "__main__": 

66 parser = argparse.ArgumentParser( 

67 description="Get sha1 and md5 hashes of single file or list of files." 

68 ) 

69 parser.add_argument( 

70 "files", nargs="*", help="Files to get hashes from. Can be used with wildcard." 

71 ) 

72 if len(sys.argv) == 1: 

73 parser.print_help(sys.stderr) 

74 sys.exit(1) 

75 args = parser.parse_args() 

76 out = hash_files(args.files) 

77 pprint.pprint(out)