Coverage for encodermap/trajinfo/hash_files.py: 23%

23 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-23 11:43 +0100

1#!/usr/bin/python3 

2# -*- coding: utf-8 -*- 

3# encodermap/trajinfo/hash_files.py 

4################################################################################ 

5# Encodermap: A python library for dimensionality reduction. 

6# 

7# Copyright 2019-2024 University of Konstanz and the Authors 

8# 

9# Authors: 

10# Kevin Sawade 

11# 

12# Encodermap is free software: you can redistribute it and/or modify 

13# it under the terms of the GNU Lesser General Public License as 

14# published by the Free Software Foundation, either version 2.1 

15# of the License, or (at your option) any later version. 

16# This package is distributed in the hope that it will be useful to other 

17# researches. IT DOES NOT COME WITH ANY WARRANTY WHATSOEVER; without even the 

18# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 

19# See the GNU Lesser General Public License for more details. 

20# 

21# See <http://www.gnu.org/licenses/>. 

22################################################################################ 

23 

24# Standard Library Imports 

25import argparse 

26import glob 

27import hashlib 

28import os 

29import pprint 

30import sys 

31 

32 

33def hash_files(files): 

34 """Returns a dict of file hashes 

35 

36 Args: 

37 files (Union[str, list]): A file or a list of files. 

38 

39 Returns: 

40 dict: A nested dict, indexed by filenames and sha1 and md5 hashes. 

41 

42 """ 

43 # BUF_SIZE is totally arbitrary, change for your app! 

44 BUF_SIZE = 65536 # lets read stuff in 64kb chunks! 

45 

46 if isinstance(files, str): 

47 files = [files] 

48 

49 out = {} 

50 for file in files: 

51 md5 = hashlib.md5() 

52 sha1 = hashlib.sha1() 

53 

54 with open(file, "rb") as f: 

55 while True: 

56 data = f.read(BUF_SIZE) 

57 if not data: 

58 break 

59 md5.update(data) 

60 sha1.update(data) 

61 out[os.path.basename(file)] = {"md5": md5.hexdigest(), "sha1": sha1.hexdigest()} 

62 

63 return out 

64 

65 

66if __name__ == "__main__": 

67 parser = argparse.ArgumentParser( 

68 description="Get sha1 and md5 hashes of single file or list of files." 

69 ) 

70 parser.add_argument( 

71 "files", nargs="*", help="Files to get hashes from. Can be used with wildcard." 

72 ) 

73 if len(sys.argv) == 1: 

74 parser.print_help(sys.stderr) 

75 sys.exit(1) 

76 args = parser.parse_args() 

77 out = hash_files(args.files) 

78 pprint.pprint(out)