Skip to content

Commit

Permalink
[data] Add checks to avoid downloading the same file twice.
Browse files Browse the repository at this point in the history
  • Loading branch information
akashin committed Dec 30, 2017
1 parent 3844c66 commit 7c4bf1e
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions download_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,19 @@ def download_file(url, file_path):
print("Removed incomplete download")


def download_from_github(version, fn, target_dir):
def download_from_github(version, fn, target_dir, force=False):
url = REPOSITORY_PATH + "/releases/download/{0}/{1}".format(version, fn)
file_path = os.path.join(target_dir, fn)
if os.path.exists(file_path) and not force:
print("File {} is already downloaded.".format(file_path))
return
download_file(url, file_path)


def sequential_downloader(version, fns, target_dir):
def sequential_downloader(version, fns, target_dir, force=False):
os.makedirs(target_dir, exist_ok=True)
for fn in fns:
download_from_github(version, fn, target_dir)
download_from_github(version, fn, target_dir, force=force)


def link_all_files_from_dir(src_dir, dst_dir):
Expand All @@ -54,7 +57,7 @@ def link_resources():
link_all_files_from_dir("../readonly/dataset/", ".")


def download_week1_resources():
def download_week1_resources(force=False):
sequential_downloader(
"week1",
[
Expand All @@ -63,17 +66,19 @@ def download_week1_resources():
"test.tsv",
"text_prepare_tests.tsv",
],
"data"
"data",
force=force
)


def download_week2_resources():
def download_week2_resources(force=False):
sequential_downloader(
"week2",
[
"train.txt",
"validation.txt",
"test.txt",
],
"data"
"data",
force=force
)

0 comments on commit 7c4bf1e

Please sign in to comment.