Skip to content

Commit

Permalink
Cp download parallel (#477)
Browse files Browse the repository at this point in the history
* add more info on what chunk is downloading, and make chunk folder foe each file (#469)

* Download iso chunk folder (#470)

* add more info on what chunk is downloading, and make chunk folder foe each file

* fix bug

* comments

* add .cache/sparsezoo/neuralmagic/

* Multiple download bug (#476)

* src/sparsezoo/utils/download.py

* revert readme"
  • Loading branch information
horheynm committed Mar 8, 2024
1 parent 3e9e322 commit 0b58962
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion src/sparsezoo/utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from pathlib import Path
from queue import Queue
from typing import Any, Callable, Dict, Optional
from uuid import uuid4

import requests
from tqdm import tqdm
Expand Down Expand Up @@ -103,15 +104,20 @@ def get_chunk_download_path(self, path: str) -> str:
stub = path.split(os.path.sep)[-3]
path = "_".join(path.split(os.path.sep)[-2:])
file_name_as_folder = path.replace(".", "_")
file_id = str(uuid4())[:4]

# Note: parallel download may cause multiple processes to download
# the same file
# save the chunks on a different folder than the root model folder
# ~/.cache/sparsezoo/neuralmagic/chunks/stub/file_id/tokenizer_json/{chunk1, ...} # noqa
return os.path.join(
str(Path.home()),
".cache",
"sparsezoo",
"neuralmagic",
"chunks",
stub,
file_id,
file_name_as_folder,
)

Expand Down Expand Up @@ -410,7 +416,7 @@ def combine_chunks_and_delete(self, download_path: str, progress_bar: tqdm) -> N
combined_file.write(data)
progress_bar.update(len(data))

shutil.rmtree(self.chunk_download_path)
shutil.rmtree(os.path.dirname(self.chunk_download_path))

def get_chunk_file_path(self, file_range: str) -> str:
"""
Expand Down

0 comments on commit 0b58962

Please sign in to comment.