Skip to content

Commit

Permalink
fixes issue #512
Browse files Browse the repository at this point in the history
  • Loading branch information
proccaserra committed Nov 12, 2023
1 parent 34f9291 commit 2f489f5
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 2 deletions.
36 changes: 35 additions & 1 deletion isatools/model/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import networkx as nx
import hashlib
import os

from isatools.model.datafile import DataFile
from isatools.model.datafile import DataFile, Comment
from isatools.model.process import Process
from isatools.model.source import Source
from isatools.model.sample import Sample
Expand Down Expand Up @@ -212,3 +214,35 @@ def _deep_copy(isa_object):
if isinstance(isa_object, ProcessSequenceNode):
new_obj.assign_identifier()
return new_obj


def update_hash(path, file, hash_func):
computed_hash = ""
with open(os.path.join(path, file), "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
hash_func.update(byte_block)
computed_hash = hash_func.hexdigest()
return computed_hash


def compute_checksum(path, isa_file_object: DataFile, checksum_type):

global hash_type
if not checksum_type in ["md5", "sha1", "sha256"]:
raise ValueError("Invalid checksum type")
else:
file_checksum = None

if checksum_type == "md5":
hash_type = hashlib.md5()
file_checksum = update_hash(path, isa_file_object.filename, hash_type)
isa_file_object.comments.append(Comment(name="checksum type", value="md5"))

if checksum_type == "sha256":
hash_type = hashlib.sha256()
file_checksum = update_hash(path, isa_file_object.filename, hash_type)
isa_file_object.comments.append(Comment(name="checksum type", value="sha256"))

isa_file_object.comments.append(Comment(name="checksum", value=file_checksum))

return isa_file_object
14 changes: 13 additions & 1 deletion tests/model/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
find, plink,
batch_create_materials,
batch_create_assays,
_deep_copy
_deep_copy,
compute_checksum
)


Expand Down Expand Up @@ -103,3 +104,14 @@ def test_batch_create_assays(self):
self.assertFalse(first_batch == third_batch)
self.assertFalse(first_batch == second_batch)

def test_checksum_md5(self):
isa_data_file = DataFile(filename="EVHINN101.sff", label="RawDataFile")
updated_isa_data_file = compute_checksum("../data/tab/BII-S-3/", isa_data_file, "md5")
self.assertEqual(updated_isa_data_file.comments[0].value, "md5")
self.assertEqual(updated_isa_data_file.comments[1].value, "d41d8cd98f00b204e9800998ecf8427e")

def test_checksum_sha2(self):
isa_data_file = DataFile(filename="EVHINN101.sff", label="RawDataFile")
updated_isa_data_file = compute_checksum("../data/tab/BII-S-3/", isa_data_file, "sha256")
self.assertEqual(updated_isa_data_file.comments[0].value, "sha256")
self.assertEqual(updated_isa_data_file.comments[1].value, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")

0 comments on commit 2f489f5

Please sign in to comment.