-
Notifications
You must be signed in to change notification settings - Fork 0
AP-687 Only download files if local file is older #13
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -3,19 +3,22 @@ | |||||||||||||||||||||
| """ | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| import json | ||||||||||||||||||||||
| import logging | ||||||||||||||||||||||
| import os | ||||||||||||||||||||||
| import re | ||||||||||||||||||||||
| from io import StringIO | ||||||||||||||||||||||
| from pathlib import Path | ||||||||||||||||||||||
| from typing import Any, Iterator | ||||||||||||||||||||||
| import xml.etree.ElementTree as E | ||||||||||||||||||||||
| from datetime import datetime, timezone | ||||||||||||||||||||||
|
Comment on lines
9
to
+13
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not only for alphabetisation, but because
Suggested change
|
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| from pymarc import Record | ||||||||||||||||||||||
| from pymarc.marcxml import parse_xml_to_array | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| from .api import tind_get, tind_download | ||||||||||||||||||||||
| from .errors import RecordNotFoundError, TINDError | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| logger = logging.getLogger(__name__) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| NS = "http://www.loc.gov/MARC21/slim" | ||||||||||||||||||||||
| E.register_namespace("", NS) | ||||||||||||||||||||||
|
|
@@ -69,12 +72,15 @@ def fetch_metadata(self, record: str) -> Record: | |||||||||||||||||||||
|
|
||||||||||||||||||||||
| return records[0] | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| def fetch_file(self, file_url: str, output_dir: str = "") -> str: | ||||||||||||||||||||||
| """Download a file from TIND and save it locally. | ||||||||||||||||||||||
| def fetch_file(self, file_url: str, output_dir: str = "", modified: str = "") -> str: | ||||||||||||||||||||||
| """Download a file from TIND and save it locally. If the file already exists in the output | ||||||||||||||||||||||
| directory and has a local modified timestamp that is newer than supplied ``modified`` | ||||||||||||||||||||||
| timestamp, the file will not be re-downloaded. | ||||||||||||||||||||||
|
Comment on lines
+76
to
+78
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| :param str file_url: The TIND file download URL. | ||||||||||||||||||||||
| :param str output_dir: Directory in which to save the file. | ||||||||||||||||||||||
| Falls back to ``default_storage_dir`` when empty. | ||||||||||||||||||||||
| :param str modified: Optional modified timestamp from the file metadata returned by TIND | ||||||||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||
| :raises AuthorizationError: When the TIND API key is invalid or the file is restricted. | ||||||||||||||||||||||
| :raises ValueError: When ``file_url`` is not a valid TIND file download URL. | ||||||||||||||||||||||
| :raises RecordNotFoundError: When the file is invalid or not found. | ||||||||||||||||||||||
|
|
@@ -84,9 +90,20 @@ def fetch_file(self, file_url: str, output_dir: str = "") -> str: | |||||||||||||||||||||
| raise ValueError("URL is not a valid TIND file download URL.") | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| output_target = output_dir or self.default_storage_dir | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| expected_filename = file_url.rstrip("/").split("/")[-2] | ||||||||||||||||||||||
| expected_path = Path(output_target) / expected_filename | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if modified and expected_path.exists(): | ||||||||||||||||||||||
| meta_mtime = datetime.fromisoformat(modified).replace(tzinfo=timezone.utc) | ||||||||||||||||||||||
| local_mtime = datetime.fromtimestamp(expected_path.stat().st_mtime, tz=timezone.utc) | ||||||||||||||||||||||
| if local_mtime >= meta_mtime: | ||||||||||||||||||||||
| logger.debug("Cached file at (%s) is newer; skipping download.", expected_path) | ||||||||||||||||||||||
| return str(expected_path) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| (status, saved_to) = tind_download(file_url, output_dir=output_target, api_key=self.api_key) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if status != 200: | ||||||||||||||||||||||
| if status != 200 or not saved_to: | ||||||||||||||||||||||
| raise RecordNotFoundError("Referenced file could not be downloaded.") | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| return saved_to | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't this be a single entry in the changelog?