Skip to content

Commit

Permalink
Merge pull request #650 from ChlodAlejandro/master
Browse files Browse the repository at this point in the history
Skip early when downloading existing file
  • Loading branch information
jjjake authored May 28, 2024
2 parents 4efd45f + d31e5c3 commit c97ec24
Showing 1 changed file with 26 additions and 25 deletions.
51 changes: 26 additions & 25 deletions internetarchive/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,25 @@ def download(# noqa: max-complexity=38

parent_dir = os.path.dirname(file_path)

# Check if we should skip...
if not return_responses and os.path.exists(file_path.encode('utf-8')):
if ignore_existing:
msg = f'skipping {file_path}, file already exists.'
log.info(msg)
if verbose:
print(f' {msg}', file=sys.stderr)
return
elif checksum:
with open(file_path, 'rb') as fp:
md5_sum = utils.get_md5(fp)

if md5_sum == self.md5:
msg = f'skipping {file_path}, file already exists based on checksum.'
log.info(msg)
if verbose:
print(f' {msg}', file=sys.stderr)
return

# Retry loop
while True:
try:
Expand Down Expand Up @@ -256,35 +275,17 @@ def download(# noqa: max-complexity=38

response.raise_for_status()

# Check if we should skip...
if not return_responses and os.path.exists(file_path.encode('utf-8')):
if ignore_existing:
msg = f'skipping {file_path}, file already exists.'
log.info(msg)
if verbose:
print(f' {msg}', file=sys.stderr)
return
elif checksum:
with open(file_path, 'rb') as fp:
md5_sum = utils.get_md5(fp)

if md5_sum == self.md5:
msg = f'skipping {file_path}, file already exists based on checksum.'
# Check if we should skip based on last modified time...
if not fileobj and not return_responses and os.path.exists(file_path.encode('utf-8')):
st = os.stat(file_path.encode('utf-8'))
if st.st_mtime == last_mod_mtime:
if self.name == f'{self.identifier}_files.xml' or (st.st_size == self.size):
msg = (f'skipping {file_path}, file already exists based on '
'length and date.')
log.info(msg)
if verbose:
print(f' {msg}', file=sys.stderr)
return
elif not fileobj:
st = os.stat(file_path.encode('utf-8'))
if st.st_mtime == last_mod_mtime:
if self.name == f'{self.identifier}_files.xml' \
or (st.st_size == self.size):
msg = (f'skipping {file_path}, file already exists based on '
'length and date.')
log.info(msg)
if verbose:
print(f' {msg}', file=sys.stderr)
return

elif return_responses:
return response
Expand Down

0 comments on commit c97ec24

Please sign in to comment.