Coverage for encodermap/trajinfo/hash_files.py: 23%
23 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-23 11:43 +0100
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-23 11:43 +0100
1#!/usr/bin/python3
2# -*- coding: utf-8 -*-
3# encodermap/trajinfo/hash_files.py
4################################################################################
5# Encodermap: A python library for dimensionality reduction.
6#
7# Copyright 2019-2024 University of Konstanz and the Authors
8#
9# Authors:
10# Kevin Sawade
11#
12# Encodermap is free software: you can redistribute it and/or modify
13# it under the terms of the GNU Lesser General Public License as
14# published by the Free Software Foundation, either version 2.1
15# of the License, or (at your option) any later version.
16# This package is distributed in the hope that it will be useful to other
17# researches. IT DOES NOT COME WITH ANY WARRANTY WHATSOEVER; without even the
18# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
19# See the GNU Lesser General Public License for more details.
20#
21# See <http://www.gnu.org/licenses/>.
22################################################################################
24# Standard Library Imports
25import argparse
26import glob
27import hashlib
28import os
29import pprint
30import sys
33def hash_files(files):
34 """Returns a dict of file hashes
36 Args:
37 files (Union[str, list]): A file or a list of files.
39 Returns:
40 dict: A nested dict, indexed by filenames and sha1 and md5 hashes.
42 """
43 # BUF_SIZE is totally arbitrary, change for your app!
44 BUF_SIZE = 65536 # lets read stuff in 64kb chunks!
46 if isinstance(files, str):
47 files = [files]
49 out = {}
50 for file in files:
51 md5 = hashlib.md5()
52 sha1 = hashlib.sha1()
54 with open(file, "rb") as f:
55 while True:
56 data = f.read(BUF_SIZE)
57 if not data:
58 break
59 md5.update(data)
60 sha1.update(data)
61 out[os.path.basename(file)] = {"md5": md5.hexdigest(), "sha1": sha1.hexdigest()}
63 return out
66if __name__ == "__main__":
67 parser = argparse.ArgumentParser(
68 description="Get sha1 and md5 hashes of single file or list of files."
69 )
70 parser.add_argument(
71 "files", nargs="*", help="Files to get hashes from. Can be used with wildcard."
72 )
73 if len(sys.argv) == 1:
74 parser.print_help(sys.stderr)
75 sys.exit(1)
76 args = parser.parse_args()
77 out = hash_files(args.files)
78 pprint.pprint(out)