Skip to content

Commit

Permalink
Merge branch 'master' into add_archive_file
Browse files Browse the repository at this point in the history
  • Loading branch information
benbou8231 authored Jun 13, 2024
2 parents cb86941 + c97ec24 commit e003e4e
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 49 deletions.
7 changes: 7 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
Release History
---------------

4.1.0 (2024-05-07)
++++++++++++++++++

**Bugfixes**

- Use mtime from files.xml if no Last-Modified header is available (e.g. VTT files).

4.0.1 (2024-04-15)
++++++++++++++++++

Expand Down
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ publish:
git tag -a v$(VERSION) -m 'version $(VERSION)'
git push --tags origin master
python -m build
twine upload --repository pypi dist/internetarchive-$(VERSION)-py3-none-any.whl
twine upload --repository pypi dist/internetarchive-$(VERSION).tar.gz
twine upload --repository pypi ./dist/*

docs-init:
pip install -r docs/requirements.txt
Expand Down
2 changes: 1 addition & 1 deletion internetarchive/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '4.0.1'
__version__ = '4.1.0'
93 changes: 47 additions & 46 deletions internetarchive/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,45 @@ def download( # noqa: max-complexity=38

parent_dir = os.path.dirname(file_path)

# Check if we should skip...
if not return_responses and os.path.exists(file_path.encode('utf-8')):
if checksum_archive:
checksum_archive_filename = '_checksum_archive.txt'
if not os.path.exists(checksum_archive_filename):
with open(checksum_archive_filename, 'w', encoding='utf-8') as f:
pass
with open(checksum_archive_filename, encoding='utf-8') as f:
checksum_archive_data = f.read().splitlines()
if file_path in checksum_archive_data:
msg = (
f'skipping {file_path}, '
f'file already exists based on checksum_archive.'
)
log.info(msg)
if verbose:
print(f' {msg}', file=sys.stderr)
return
if ignore_existing:
msg = f'skipping {file_path}, file already exists.'
log.info(msg)
if verbose:
print(f' {msg}', file=sys.stderr)
return
elif checksum or checksum_archive:
with open(file_path, 'rb') as fp:
md5_sum = utils.get_md5(fp)

if md5_sum == self.md5:
msg = f'skipping {file_path}, file already exists based on checksum.'
log.info(msg)
if verbose:
print(f' {msg}', file=sys.stderr)
if checksum_archive:
# add file to checksum_archive to skip it next time
with open(checksum_archive_filename, 'a', encoding='utf-8') as f:
f.write(f'{file_path}\n')
return

# Retry loop
while True:
try:
Expand Down Expand Up @@ -275,59 +314,21 @@ def download( # noqa: max-complexity=38
dt = parsedate_to_datetime(last_mod_header)
last_mod_mtime = dt.timestamp()
else:
last_mod_mtime = 0
last_mod_mtime = self.mtime

response.raise_for_status()

# Check if we should skip...
if not return_responses and os.path.exists(file_path.encode('utf-8')):
if checksum_archive:
checksum_archive_filename = '_checksum_archive.txt'
if not os.path.exists(checksum_archive_filename):
with open(checksum_archive_filename, 'w', encoding='utf-8') as f:
pass
with open(checksum_archive_filename, encoding='utf-8') as f:
checksum_archive_data = f.read().splitlines()
if file_path in checksum_archive_data:
msg = (
f'skipping {file_path}, '
f'file already exists based on checksum_archive.'
)
log.info(msg)
if verbose:
print(f' {msg}', file=sys.stderr)
return
if ignore_existing:
msg = f'skipping {file_path}, file already exists.'
log.info(msg)
if verbose:
print(f' {msg}', file=sys.stderr)
return
elif checksum or checksum_archive:
with open(file_path, 'rb') as fp:
md5_sum = utils.get_md5(fp)

if md5_sum == self.md5:
msg = f'skipping {file_path}, file already exists based on checksum.'
# Check if we should skip based on last modified time...
if not fileobj and not return_responses and os.path.exists(file_path.encode('utf-8')):
st = os.stat(file_path.encode('utf-8'))
if st.st_mtime == last_mod_mtime:
if self.name == f'{self.identifier}_files.xml' or (st.st_size == self.size):
msg = (f'skipping {file_path}, file already exists based on '
'length and date.')
log.info(msg)
if verbose:
print(f' {msg}', file=sys.stderr)
if checksum_archive:
# add file to checksum_archive to skip it next time
with open(checksum_archive_filename, 'a', encoding='utf-8') as f:
f.write(f'{file_path}\n')
return
elif not fileobj:
st = os.stat(file_path.encode('utf-8'))
if st.st_mtime == last_mod_mtime:
if self.name == f'{self.identifier}_files.xml' \
or (st.st_size == self.size):
msg = (f'skipping {file_path}, file already exists based on '
'length and date.')
log.info(msg)
if verbose:
print(f' {msg}', file=sys.stderr)
return

elif return_responses:
return response
Expand Down

0 comments on commit e003e4e

Please sign in to comment.