Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions monai/apps/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,17 +220,16 @@ def download_url(
HTTPError: See urllib.request.urlretrieve.
ContentTooShortError: See urllib.request.urlretrieve.
IOError: See urllib.request.urlretrieve.
RuntimeError: When the hash validation of the ``url`` downloaded file fails.

ValueError: When the hash validation of the ``url`` downloaded file fails.
"""
if not filepath:
filepath = Path(".", _basename(url)).resolve()
logger.info(f"Default downloading to '{filepath}'")
filepath = Path(filepath)
if filepath.exists():
if not check_hash(filepath, hash_val, hash_type):
raise RuntimeError(
f"{hash_type} check of existing file failed: filepath={filepath}, expected {hash_type}={hash_val}."
raise ValueError(
f"{hash_type} hash check of existing file failed: filepath={filepath}, expected {hash_type}={hash_val}."
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
logger.info(f"File exists: {filepath}, skipped downloading.")
return
Expand Down Expand Up @@ -260,18 +259,20 @@ def download_url(
raise RuntimeError(
f"Download of file from {url} to {filepath} failed due to network issue or denied permission."
)
if not check_hash(tmp_name, hash_val, hash_type):
raise ValueError(
f"{hash_type} hash check of downloaded file failed: URL={url}, "
f"filepath={filepath}, expected {hash_type}={hash_val}, "
f"The file may be corrupted or tampered with. "
"Please retry the download or verify the source."
)
file_dir = filepath.parent
if file_dir:
os.makedirs(file_dir, exist_ok=True)
shutil.move(f"{tmp_name}", f"{filepath}") # copy the downloaded to a user-specified cache.
except (PermissionError, NotADirectoryError): # project-monai/monai issue #3613 #3757 for windows
pass
logger.info(f"Downloaded: {filepath}")
if not check_hash(filepath, hash_val, hash_type):
raise RuntimeError(
f"{hash_type} check of downloaded file failed: URL={url}, "
f"filepath={filepath}, expected {hash_type}={hash_val}."
)


def _extract_zip(filepath, output_dir):
Expand Down
6 changes: 3 additions & 3 deletions tests/apps/test_download_and_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ def test_actions(self):
with self.assertLogs(logger="monai.apps", level="ERROR"):
try:
download_url(url, filepath, wrong_md5)
except (ContentTooShortError, HTTPError, RuntimeError) as e:
if isinstance(e, RuntimeError):
except (ContentTooShortError, HTTPError, ValueError, RuntimeError) as e:
if isinstance(e, ValueError):
# FIXME: skip MD5 check as current downloading method may fail
self.assertTrue(str(e).startswith("md5 check"))
self.assertIn("hash check", str(e))
return # skipping this test due the network connection errors
Comment on lines +45 to 49
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
except (ContentTooShortError, HTTPError, ValueError, RuntimeError) as e:
if isinstance(e, ValueError):
# FIXME: skip MD5 check as current downloading method may fail
self.assertTrue(str(e).startswith("md5 check"))
self.assertIn("hash check", str(e))
return # skipping this test due the network connection errors
except ValueError as e:
# FIXME: skip MD5 check as current downloading method may fail
self.assertIn("hash check", str(e))
return
except (ContentTooShortError, HTTPError, RuntimeError) as e:
return # skipping this test due the network connection errors

I think a separate clause makes sense despite what the original was.


try:
Expand Down
35 changes: 34 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
"unexpected EOF", # incomplete download
"network issue",
"gdown dependency", # gdown not installed
"md5 check",
"hash check", # check hash value of downloaded file
"limit", # HTTP Error 503: Egress is over the account limit
"authenticate",
"timed out", # urlopen error [Errno 110] Connection timed out
Expand Down Expand Up @@ -182,6 +182,39 @@ def skip_if_downloading_fails():
raise unittest.SkipTest(f"Error while downloading: {rt_e}") from rt_e # incomplete download

raise rt_e
except ValueError as v_e:
if "hash check" in str(v_e):
raise unittest.SkipTest(f"Hash value error while downloading: {v_e}") from v_e
Comment thread
coderabbitai[bot] marked this conversation as resolved.

raise v_e


SAMPLE_TIFF = "https://huggingface.co/datasets/MONAI/testing_data/resolve/main/CMU-1.tiff"
SAMPLE_TIFF_HASH = "73a7e89bc15576587c3d68e55d9bf92f09690280166240b48ff4b48230b13bcd"
SAMPLE_TIFF_HASH_TYPE = "sha256"


class TestDownloadUrl(unittest.TestCase):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a conflict now from your previous PR being merged. These two classes possibly need to be merged as the two tests are different.

"""Exercise ``download_url`` success and hash-mismatch paths."""

def test_download_url(self):
"""
Download a sample TIFF and validate hash handling.
"""
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "model.tiff")

with skip_if_downloading_fails():
download_url(
url=SAMPLE_TIFF, filepath=model_path, hash_val=SAMPLE_TIFF_HASH, hash_type=SAMPLE_TIFF_HASH_TYPE
)

# Verify file exists before testing hash mismatch
if not os.path.exists(model_path):
self.skipTest("File was not downloaded successfully")

with self.assertRaises(ValueError):
download_url(url=SAMPLE_TIFF, filepath=model_path, hash_val="0" * 64, hash_type=SAMPLE_TIFF_HASH_TYPE)
Comment thread
coderabbitai[bot] marked this conversation as resolved.


SAMPLE_TIFF = "https://huggingface.co/datasets/MONAI/testing_data/resolve/main/CMU-1.tiff"
Expand Down
Loading