diff --git a/monai/apps/utils.py b/monai/apps/utils.py index 856bc64c9e..264425e9b9 100644 --- a/monai/apps/utils.py +++ b/monai/apps/utils.py @@ -220,8 +220,7 @@ def download_url( HTTPError: See urllib.request.urlretrieve. ContentTooShortError: See urllib.request.urlretrieve. IOError: See urllib.request.urlretrieve. - RuntimeError: When the hash validation of the ``url`` downloaded file fails. - + ValueError: When the hash validation of the ``url`` downloaded file fails. """ if not filepath: filepath = Path(".", _basename(url)).resolve() @@ -229,8 +228,8 @@ def download_url( filepath = Path(filepath) if filepath.exists(): if not check_hash(filepath, hash_val, hash_type): - raise RuntimeError( - f"{hash_type} check of existing file failed: filepath={filepath}, expected {hash_type}={hash_val}." + raise ValueError( + f"{hash_type} hash check of existing file failed: filepath={filepath}, expected {hash_type}={hash_val}." ) logger.info(f"File exists: {filepath}, skipped downloading.") return @@ -260,6 +259,13 @@ def download_url( raise RuntimeError( f"Download of file from {url} to {filepath} failed due to network issue or denied permission." ) + if not check_hash(tmp_name, hash_val, hash_type): + raise ValueError( + f"{hash_type} hash check of downloaded file failed: URL={url}, " + f"filepath={filepath}, expected {hash_type}={hash_val}, " + f"The file may be corrupted or tampered with. " + "Please retry the download or verify the source." + ) file_dir = filepath.parent if file_dir: os.makedirs(file_dir, exist_ok=True) @@ -267,11 +273,6 @@ def download_url( except (PermissionError, NotADirectoryError): # project-monai/monai issue #3613 #3757 for windows pass logger.info(f"Downloaded: {filepath}") - if not check_hash(filepath, hash_val, hash_type): - raise RuntimeError( - f"{hash_type} check of downloaded file failed: URL={url}, " - f"filepath={filepath}, expected {hash_type}={hash_val}." - ) def _extract_zip(filepath, output_dir): diff --git a/tests/apps/test_download_and_extract.py b/tests/apps/test_download_and_extract.py index 6d16a72735..9dcd00659f 100644 --- a/tests/apps/test_download_and_extract.py +++ b/tests/apps/test_download_and_extract.py @@ -42,10 +42,10 @@ def test_actions(self): with self.assertLogs(logger="monai.apps", level="ERROR"): try: download_url(url, filepath, wrong_md5) - except (ContentTooShortError, HTTPError, RuntimeError) as e: - if isinstance(e, RuntimeError): + except (ContentTooShortError, HTTPError, ValueError, RuntimeError) as e: + if isinstance(e, ValueError): # FIXME: skip MD5 check as current downloading method may fail - self.assertTrue(str(e).startswith("md5 check")) + self.assertIn("hash check", str(e)) return # skipping this test due the network connection errors try: diff --git a/tests/test_utils.py b/tests/test_utils.py index 03fa7abce3..71af2a5525 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -79,7 +79,7 @@ "unexpected EOF", # incomplete download "network issue", "gdown dependency", # gdown not installed - "md5 check", + "hash check", # check hash value of downloaded file "limit", # HTTP Error 503: Egress is over the account limit "authenticate", "timed out", # urlopen error [Errno 110] Connection timed out @@ -182,6 +182,39 @@ def skip_if_downloading_fails(): raise unittest.SkipTest(f"Error while downloading: {rt_e}") from rt_e # incomplete download raise rt_e + except ValueError as v_e: + if "hash check" in str(v_e): + raise unittest.SkipTest(f"Hash value error while downloading: {v_e}") from v_e + + raise v_e + + +SAMPLE_TIFF = "https://huggingface.co/datasets/MONAI/testing_data/resolve/main/CMU-1.tiff" +SAMPLE_TIFF_HASH = "73a7e89bc15576587c3d68e55d9bf92f09690280166240b48ff4b48230b13bcd" +SAMPLE_TIFF_HASH_TYPE = "sha256" + + +class TestDownloadUrl(unittest.TestCase): + """Exercise ``download_url`` success and hash-mismatch paths.""" + + def test_download_url(self): + """ + Download a sample TIFF and validate hash handling. + """ + with tempfile.TemporaryDirectory() as tempdir: + model_path = os.path.join(tempdir, "model.tiff") + + with skip_if_downloading_fails(): + download_url( + url=SAMPLE_TIFF, filepath=model_path, hash_val=SAMPLE_TIFF_HASH, hash_type=SAMPLE_TIFF_HASH_TYPE + ) + + # Verify file exists before testing hash mismatch + if not os.path.exists(model_path): + self.skipTest("File was not downloaded successfully") + + with self.assertRaises(ValueError): + download_url(url=SAMPLE_TIFF, filepath=model_path, hash_val="0" * 64, hash_type=SAMPLE_TIFF_HASH_TYPE) SAMPLE_TIFF = "https://huggingface.co/datasets/MONAI/testing_data/resolve/main/CMU-1.tiff"