diff --git a/internetarchive/files.py b/internetarchive/files.py index c8029469..3ddcfa98 100644 --- a/internetarchive/files.py +++ b/internetarchive/files.py @@ -28,6 +28,7 @@ import socket import sys from contextlib import nullcontext, suppress +from datetime import datetime, timezone from urllib.parse import quote from requests.exceptions import ( @@ -298,11 +299,17 @@ def download(self, file_path=None, verbose=None, ignore_existing=None, else: raise exc + # Get timestamp from Last-Modified header + time_str = response.headers["Last-Modified"] + last_updated_pattern = "%a, %d %b %Y %H:%M:%S %Z" + dt = datetime.strptime(time_str, last_updated_pattern).replace(tzinfo=timezone.utc) + last_modified = int(dt.timestamp()) + # Set mtime with mtime from files.xml. if not no_change_timestamp: # If we want to set the timestamp to that of the original archive... with suppress(OSError): # Probably file-like object, e.g. sys.stdout. - os.utime(file_path.encode('utf-8'), (0, self.mtime)) + os.utime(file_path.encode('utf-8'), (0, last_modified)) msg = f'downloaded {self.identifier}/{self.name} to {file_path}' log.info(msg) diff --git a/internetarchive/item.py b/internetarchive/item.py index 18a2d072..d49c6b5f 100644 --- a/internetarchive/item.py +++ b/internetarchive/item.py @@ -1106,10 +1106,10 @@ def _build_request(): return response except HTTPError as exc: try: - msg = get_s3_xml_text(exc.response.content) + msg = get_s3_xml_text(exc.response.content) # type: ignore except ExpatError: # probably HTTP 500 error and response is invalid XML msg = ('IA S3 returned invalid XML ' - f'(HTTP status code {exc.response.status_code}). ' + f'(HTTP status code {exc.response.status_code}). ' # type: ignore 'This is a server side error which is either temporary, ' 'or requires the intervention of IA admins.')