diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..be006de9 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,13 @@ +# Keep GitHub Actions up to date with GitHub's Dependabot... +# https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot +# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + groups: + github-actions: + patterns: + - "*" # Group all Actions updates into a single larger pull request + schedule: + interval: weekly diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml index 3b8173e0..10877bce 100644 --- a/.github/workflows/lint_python.yml +++ b/.github/workflows/lint_python.yml @@ -4,8 +4,8 @@ jobs: lint_python: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: cache: pip python-version: 3.x diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 7c40d73c..7669221d 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -9,8 +9,8 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: 3.x cache: pip diff --git a/.github/workflows/test_install.yml b/.github/workflows/test_install.yml index 866d4782..eb365cef 100644 --- a/.github/workflows/test_install.yml +++ b/.github/workflows/test_install.yml @@ -11,6 +11,6 @@ jobs: matrix: setuptools-version: ["45.2.0", "58.1.0", "62.4.0"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - run: pip install setuptools=="${{ matrix.setuptools-version }}" - run: pip install . diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 8cf26cad..4dfd9072 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -7,10 +7,10 @@ jobs: fail-fast: false max-parallel: 1 # Avoid timeout errors matrix: - python: ['3.7', '3.8', '3.9', '3.10', '3.11', 'pypy-3.7', 'pypy-3.8', 'pypy-3.9'] + python: ['3.8', '3.9', '3.10', '3.11', '3.12', 'pypy-3.9', 'pypy-3.10'] steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} cache: pip diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b1f255f4..cfe8f628 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.261 + rev: v0.0.269 hooks: - id: ruff @@ -42,7 +42,7 @@ repos: - id: codespell # See setup.cfg for args - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.1.1 + rev: v1.3.0 hooks: - id: mypy additional_dependencies: diff --git a/HISTORY.rst b/HISTORY.rst index 4b92cb97..84a7eed0 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,13 +3,31 @@ Release History --------------- -3.6.0 (?) -+++++++++ +3.7.0 (2024-03-19) +++++++++++++++++++ + +**Features and Improvements** + +- Added support for JSON Patch test operations, via the ``expect`` parameter. +- Added support for moving values via --append-list + (Now, rather than ignoring any requests where the value is already present, + --append-list will move the value to the end of the list). +- Switched to importlib-metadata to drop deprecated pkg_resources. + +**Bugfixes** + +- Fixed automatic size hint on uploads. +- Fixed bug where auth wasn't being sent for searches with user_aggs params. + +3.6.0 (2023-12-27) +++++++++++++++++++ **Features and Improvements** - Added ``set_scanner`` and ``--no-scanner`` options to upload to stop ia's default behavior of setting the scanner field in meta.xml on initial upload. +- ``0`` is now returned instead of an exception when search fails to retrieve the total number + of hits for a query. 3.5.0 (2023-05-09) ++++++++++++++++++ diff --git a/README.rst b/README.rst index 55919367..b6bbdcdb 100644 --- a/README.rst +++ b/README.rst @@ -30,7 +30,7 @@ You can install this module via pip: .. code:: bash - $ pip install internetarchive + $ python3 -m pip install internetarchive Binaries of the command-line tool are also available: diff --git a/docs/source/installation.rst b/docs/source/installation.rst index cadbe425..320cff93 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -5,37 +5,46 @@ Installation System-Wide Installation ------------------------- +------------------------- Installing the ``internetarchive`` library globally on your system can be done with `pip `_. -This is the recommended method for installing ``internetarchive`` (`see below `_ for details on installing pip):: +This is the recommended method for installing ``internetarchive`` (`see below `_ for details on installing pip). +If you are on Mac OS X, refer to the `Mac OS X section `_ below before proceeding. +Once you're ready to install, run the following command:: - $ sudo pip install internetarchive + $ sudo python3 -m pip install internetarchive -or, with `easy_install `_:: +Updating Your $PATH +~~~~~~~~~~~~~~~~~~~ - $ sudo easy_install internetarchive +Once you have successfully installed ``internetarchive``, you may need to update your ``$PATH`` (e.g. if running ``ia`` in your terminal returns an error). +If you receive a command not found error, run the following command to update your ``$PATH``:: -Either of these commands will install the ``internetarchive`` Python library and ``ia`` command-line tool on your system. + $ echo "$(python3 -m site --user-base)/bin" | sudo tee -a /etc/paths -**Note**: Some versions of Mac OS X come with Python libraries that are required by ``internetarchive`` (e.g. the Python package ``six``). -This can cause installation issues. If your installation is failing with a message that looks something like:: +Updating ia +~~~~~~~~~~~ - OSError: [Errno 1] Operation not permitted: '/var/folders/bk/3wx7qs8d0x79tqbmcdmsk1040000gp/T/pip-TGyjVo-uninstall/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/six-1.4.1-py2.7.egg-info' +To update, you can run the following command:: -You can use the ``--ignore-installed`` parameter in ``pip`` to ignore the libraries that are already installed, and continue with the rest of the installation:: + $ sudo python3 -m pip install --upgrade internetarchive - $ sudo pip install --ignore-installed internetarchive +Mac OS X +~~~~~~~~ -More details on this issue can be found here: https://github.com/pypa/pip/issues/3165 +While newer versions Mac OS X ship with Python 3 installed, it is recommended to install an updated version of Python 3. +You can do so with `Homebrew `_:: + + $ brew install python3 Installing Pip ~~~~~~~~~~~~~~ -Pip can be `installed with the get-pip.py script `_:: +If you are running Python 3.4+, you should already have ``pip`` installed. +If it is not already installed, it can be `installed with the get-pip.py script `_:: $ curl -LOs https://bootstrap.pypa.io/get-pip.py - $ python get-pip.py + $ python3 get-pip.py virtualenv @@ -45,11 +54,7 @@ If you don't want to, or can't, install the package system-wide you can use ``vi First, make sure ``virtualenv`` is installed on your system. If it's not, you can do so with pip:: - $ sudo pip install virtualenv - -With ``easy_install``:: - - $ sudo easy_install virtualenv + $ sudo python3 -m pip install virtualenv Or your systems package manager, ``apt-get`` for example:: @@ -92,7 +97,7 @@ If you are on an older operating system that only has Python 2 installed, it's h You can install and use version v2.3.0 with pip:: - $ pip install internetarchive==2.3.0 + $ sudo python2 -m pip install internetarchive==2.3.0 You can also download a binary of v2.3.0:: @@ -100,15 +105,6 @@ You can also download a binary of v2.3.0:: $ chmod +x ia-py2 -Snap ----- - -You can install the latest ``ia`` `snap `_, and help testing the most recent changes of the master branch in `all the supported Linux distros `_ with:: - - $ sudo snap install ia --edge - -Every time a new version of ``ia`` is pushed to the store, you will get it updated automatically. - Get the Code ------------ diff --git a/internetarchive/__version__.py b/internetarchive/__version__.py index 0736c156..8c3336cc 100644 --- a/internetarchive/__version__.py +++ b/internetarchive/__version__.py @@ -1 +1 @@ -__version__ = '3.6.0.dev2' +__version__ = '3.7.0' diff --git a/internetarchive/cli/ia.py b/internetarchive/cli/ia.py index 8e044c36..e00e5b6b 100755 --- a/internetarchive/cli/ia.py +++ b/internetarchive/cli/ia.py @@ -64,7 +64,11 @@ import sys from docopt import docopt, printable_usage -from pkg_resources import DistributionNotFound, iter_entry_points + +if sys.version_info < (3, 10): + from importlib_metadata import entry_points # type: ignore[import] +else: + from importlib.metadata import entry_points from schema import Or, Schema, SchemaError # type: ignore[import] from internetarchive import __version__ @@ -97,11 +101,11 @@ def load_ia_module(cmd: str): return __import__(_module, fromlist=['internetarchive.cli']) else: _module = f'ia_{cmd}' - for ep in iter_entry_points('internetarchive.cli.plugins'): + for ep in entry_points(group='internetarchive.cli.plugins'): if ep.name == _module: return ep.load() raise ImportError - except (ImportError, DistributionNotFound): + except (ImportError): print(f"error: '{cmd}' is not an ia command! See 'ia help'", file=sys.stderr) matches = '\t'.join(difflib.get_close_matches(cmd, cmd_aliases.values())) diff --git a/internetarchive/cli/ia_metadata.py b/internetarchive/cli/ia_metadata.py index c8a8d9aa..b4833695 100644 --- a/internetarchive/cli/ia_metadata.py +++ b/internetarchive/cli/ia_metadata.py @@ -22,17 +22,20 @@ ia metadata ... [--exists | --formats] [--header=...] ia metadata ... --modify=... [--target=] [--priority=] [--header=...] - [--timeout=] + [--timeout=] [--expect=...] ia metadata ... --remove=... [--priority=] [--header=...] [--timeout=] + [--expect=...] ia metadata ... [--append=... | --append-list=...] [--priority=] [--target=] [--header=...] [--timeout=] + [--expect=...] ia metadata ... --insert=... [--priority=] [--target=] [--header=...] - [--timeout=] + [--timeout=] [--expect=...] ia metadata --spreadsheet= [--priority=] [--modify=...] [--header=...] [--timeout=] + [--expect=...] ia metadata --help options: @@ -42,8 +45,10 @@ -t, --target= The metadata target to modify. -a, --append=... Append a string to a metadata element. -A, --append-list=... Append a field to a metadata element. - -i, --insert=... Insert a value into a multi-value field given + -i, --insert=... Insert a value into a multi-value field given an index (e.g. `--insert=collection[0]:foo`). + -E, --expect=... Test an expectation server-side before applying + patch to item metadata. -s, --spreadsheet= Modify metadata in bulk using a spreadsheet as input. -e, --exists Check if an item exists @@ -79,13 +84,14 @@ def modify_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Response: append = bool(args['--append']) + expect = get_args_dict(args['--expect']) append_list = bool(args['--append-list']) insert = bool(args['--insert']) try: r = item.modify_metadata(metadata, target=args['--target'], append=append, - priority=args['--priority'], append_list=append_list, - headers=args['--header'], insert=insert, - timeout=args['--timeout']) + expect=expect, priority=args['--priority'], + append_list=append_list, headers=args['--header'], + insert=insert, timeout=args['--timeout']) assert isinstance(r, Response) # mypy: modify_metadata() -> Request | Response except ItemLocateError as exc: print(f'{item.identifier} - error: {exc}', file=sys.stderr) @@ -178,6 +184,7 @@ def main(argv: dict, session: session.ArchiveSession) -> None: str: bool, '': list, '--modify': list, + '--expect': list, '--header': Or(None, And(Use(get_args_header_dict), dict), error='--header must be formatted as --header="key:value"'), '--append': list, diff --git a/internetarchive/cli/ia_reviews.py b/internetarchive/cli/ia_reviews.py index d06de76d..759ee536 100644 --- a/internetarchive/cli/ia_reviews.py +++ b/internetarchive/cli/ia_reviews.py @@ -68,7 +68,7 @@ def main(argv, session: ArchiveSession) -> None: print(r.text) sys.exit(0) except HTTPError as exc: - if exc.response.status_code == 404: + if exc.response.status_code == 404: # type: ignore sys.exit(0) else: raise exc diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py index b1ee3f94..475785c9 100644 --- a/internetarchive/iarequest.py +++ b/internetarchive/iarequest.py @@ -173,6 +173,7 @@ def __init__(self, access_key=None, secret_key=None, append=None, + expect=None, append_list=None, insert=None, **kwargs): @@ -188,6 +189,7 @@ def __init__(self, self.target = target self.priority = priority self.append = append + self.expect = expect self.append_list = append_list self.insert = insert @@ -210,6 +212,7 @@ def prepare(self): source_metadata=self.source_metadata, target=self.target, append=self.append, + expect=self.expect, append_list=self.append_list, insert=self.insert, ) @@ -220,13 +223,14 @@ class MetadataPreparedRequest(requests.models.PreparedRequest): def prepare(self, method=None, url=None, headers=None, files=None, data=None, params=None, auth=None, cookies=None, hooks=None, metadata={}, # noqa: B006 source_metadata=None, target=None, priority=None, append=None, - append_list=None, insert=None): + expect=None, append_list=None, insert=None): self.prepare_method(method) self.prepare_url(url, params) + self.identifier = self.url.split("?")[0].split("/")[-1] self.prepare_headers(headers) self.prepare_cookies(cookies) self.prepare_body(metadata, source_metadata, target, priority, append, - append_list, insert) + append_list, insert, expect) self.prepare_auth(auth, url) # Note that prepare_auth must be last to enable authentication schemes # such as OAuth to work on a fully prepared request. @@ -235,7 +239,7 @@ def prepare(self, method=None, url=None, headers=None, files=None, data=None, self.prepare_hooks(hooks) def prepare_body(self, metadata, source_metadata, target, priority, append, - append_list, insert): + append_list, insert, expect): priority = priority or -5 if not source_metadata: @@ -260,21 +264,25 @@ def prepare_body(self, metadata, source_metadata, target, priority, append, patch = prepare_patch(metadata[key], source_metadata['metadata'], append, + expect, append_list, insert) except KeyError: - raise ItemLocateError + raise ItemLocateError(f"{self.identifier} cannot be located " + "because it is dark or does not exist.") elif key.startswith('files'): patch = prepare_files_patch(metadata[key], source_metadata['files'], append, key, append_list, - insert) + insert, + expect) else: key = key.split('/')[0] patch = prepare_target_patch(metadata, source_metadata, append, - target, append_list, key, insert) + target, append_list, key, insert, + expect) changes.append({'target': key, 'patch': patch}) self.data = { '-changes': json.dumps(changes), @@ -287,16 +295,18 @@ def prepare_body(self, metadata, source_metadata, target, priority, append, target = 'metadata' try: patch = prepare_patch(metadata, source_metadata['metadata'], append, - append_list, insert) + expect, append_list, insert) except KeyError: - raise ItemLocateError + raise ItemLocateError(f"{self.identifier} cannot be located " + "because it is dark or does not exist.") elif 'files' in target: patch = prepare_files_patch(metadata, source_metadata['files'], append, - target, append_list, insert) + target, append_list, insert, expect) else: metadata = {target: metadata} patch = prepare_target_patch(metadata, source_metadata, append, - target, append_list, target, insert) + target, append_list, target, insert, + expect) self.data = { '-patch': json.dumps(patch), '-target': target, @@ -306,7 +316,8 @@ def prepare_body(self, metadata, source_metadata, target, priority, append, super().prepare_body(self.data, None) -def prepare_patch(metadata, source_metadata, append, append_list=None, insert=None): +def prepare_patch(metadata, source_metadata, append, + expect=None, append_list=None, insert=None): destination_metadata = source_metadata.copy() if isinstance(metadata, list): prepared_metadata = metadata @@ -330,11 +341,28 @@ def prepare_patch(metadata, source_metadata, append, append_list=None, insert=No # Delete metadata items where value is REMOVE_TAG. destination_metadata = delete_items_from_dict(destination_metadata, 'REMOVE_TAG') patch = make_patch(source_metadata, destination_metadata).patch - return patch + + # Add test operations to patch. + patch_tests = [] + for expect_key in expect: + idx = None + if '[' in expect_key: + idx = int(expect_key.split('[')[1].strip(']')) + key = expect_key.split('[')[0] + path = f'/{key}/{idx}' + p_test = {'op': 'test', 'path': path, 'value': expect[expect_key]} + else: + path = f'/{expect_key}' + p_test = {'op': 'test', 'path': path, 'value': expect[expect_key]} + + patch_tests.append(p_test) + final_patch = patch_tests + patch + + return final_patch def prepare_target_patch(metadata, source_metadata, append, target, append_list, key, - insert): + insert, expect): def dictify(lst, key=None, value=None): if not lst: @@ -351,18 +379,18 @@ def dictify(lst, key=None, value=None): source_metadata = source_metadata.get(_k, {}) else: source_metadata[_k] = source_metadata.get(_k, {}).get(_k, {}) - patch = prepare_patch(metadata, source_metadata, append, append_list, insert) + patch = prepare_patch(metadata, source_metadata, append, expect, append_list, insert) return patch def prepare_files_patch(metadata, source_metadata, append, target, append_list, - insert): + insert, expect): filename = '/'.join(target.split('/')[1:]) for f in source_metadata: if f.get('name') == filename: source_metadata = f break - patch = prepare_patch(metadata, source_metadata, append, append_list, insert) + patch = prepare_patch(metadata, source_metadata, append, expect, append_list, insert) return patch @@ -442,7 +470,7 @@ def rm_index(key): continue else: if v in source_metadata[key]: - continue + source_metadata[key] = [x for x in source_metadata[key] if x != v] if not isinstance(source_metadata[key], list): prepared_metadata[key] = [source_metadata[key]] else: diff --git a/internetarchive/item.py b/internetarchive/item.py index 18a2d072..7202976b 100644 --- a/internetarchive/item.py +++ b/internetarchive/item.py @@ -57,7 +57,7 @@ iter_directory, json, norm_filepath, - recursive_file_count, + recursive_file_count_and_size, validate_s3_identifier, ) @@ -770,6 +770,7 @@ def modify_metadata(self, metadata: Mapping, target: str | None = None, append: bool = False, + expect: Mapping | None = None, append_list: bool = False, insert: bool = False, priority: int = 0, @@ -794,6 +795,9 @@ def modify_metadata(self, :param append: Append value to an existing multi-value metadata field. + :param expect: Provide a dict of expectations to be tested + server-side before applying patch to item metadata. + :param append_list: Append values to an existing multi-value metadata field. No duplicate values will be added. @@ -811,6 +815,7 @@ def modify_metadata(self, secret_key = secret_key or self.session.secret_key debug = bool(debug) headers = headers or {} + expect = expect or {} request_kwargs = request_kwargs or {} if timeout: request_kwargs["timeout"] = float(timeout) # type: ignore @@ -835,6 +840,7 @@ def modify_metadata(self, access_key=access_key, secret_key=secret_key, append=append, + expect=expect, append_list=append_list, insert=insert) # Must use Session.prepare_request to make sure session settings @@ -1106,9 +1112,9 @@ def _build_request(): return response except HTTPError as exc: try: - msg = get_s3_xml_text(exc.response.content) + msg = get_s3_xml_text(exc.response.content) # type: ignore except ExpatError: # probably HTTP 500 error and response is invalid XML - msg = ('IA S3 returned invalid XML ' + msg = ('IA S3 returned invalid XML ' # type: ignore f'(HTTP status code {exc.response.status_code}). ' 'This is a server side error which is either temporary, ' 'or requires the intervention of IA admins.') @@ -1194,11 +1200,13 @@ def upload(self, files, responses = [] file_index = 0 - if queue_derive and total_files is None: - if checksum: - total_files = recursive_file_count(files, item=self, checksum=True) - else: - total_files = recursive_file_count(files, item=self, checksum=False) + headers = headers or {} + if (queue_derive or not headers.get('x-archive-size-hint')) and total_files == 0: + total_files, total_size = recursive_file_count_and_size(files, + item=self, + checksum=checksum) + if not headers.get('x-archive-size-hint'): + headers['x-archive-size-hint'] = str(total_size) file_metadata = None for f in files: if isinstance(f, dict): diff --git a/internetarchive/search.py b/internetarchive/search.py index 34791467..975a261d 100644 --- a/internetarchive/search.py +++ b/internetarchive/search.py @@ -130,7 +130,7 @@ def _advanced_search(self): auth=self.auth, **self.request_kwargs) j = r.json() - num_found = int(j['response']['numFound']) + num_found = int(j.get('response', {}).get('numFound', 0)) if not self._num_found: self._num_found = num_found if j.get('error'): @@ -153,7 +153,7 @@ def _scrape(self): if j.get('error'): yield j if not num_found: - num_found = int(j['total']) + num_found = int(j.get('total') or '0') if not self._num_found: self._num_found = num_found self._handle_scrape_error(j) @@ -214,7 +214,10 @@ def _user_aggs(self): self.params['page'] = '1' self.params['rows'] = '1' self.params['output'] = 'json' - r = self.session.get(self.search_url, params=self.params, **self.request_kwargs) + r = self.session.get(self.search_url, + params=self.params, + auth=self.auth, + **self.request_kwargs) j = r.json() if j.get('error'): yield j diff --git a/internetarchive/utils.py b/internetarchive/utils.py index 38b09546..e9d17206 100644 --- a/internetarchive/utils.py +++ b/internetarchive/utils.py @@ -216,15 +216,19 @@ def _get_tag_text(tag_name, xml_obj): def get_file_size(file_obj) -> int | None: - try: - file_obj.seek(0, os.SEEK_END) - size = file_obj.tell() - # Avoid OverflowError. - if size > sys.maxsize: + if is_filelike_obj(file_obj): + try: + file_obj.seek(0, os.SEEK_END) + size = file_obj.tell() + # Avoid OverflowError. + if size > sys.maxsize: + size = None + file_obj.seek(0, os.SEEK_SET) + except OSError: size = None - file_obj.seek(0, os.SEEK_SET) - except OSError: - size = None + else: + st = os.stat(file_obj) + size = st.st_size return size @@ -237,11 +241,14 @@ def iter_directory(directory: str): yield (filepath, key) -def recursive_file_count(files, item=None, checksum=False): - """Given a filepath or list of filepaths, return the total number of files.""" +def recursive_file_count_and_size(files, item=None, checksum=False): + """Given a filepath or list of filepaths, return the total number and size of files. + If `checksum` is `True`, skip over files whose MD5 hash matches any file in the `item`. + """ if not isinstance(files, (list, set)): files = [files] total_files = 0 + total_size = 0 if checksum is True: md5s = [f.get('md5') for f in item.files] else: @@ -264,24 +271,27 @@ def recursive_file_count(files, item=None, checksum=False): except (AttributeError, TypeError): is_dir = False if is_dir: - for x, _ in iter_directory(f): - if checksum is True: - with open(x, 'rb') as fh: - lmd5 = get_md5(fh) - if lmd5 in md5s: - continue - total_files += 1 + it = iter_directory(f) else: + it = [(f, None)] + for x, _ in it: if checksum is True: try: - with open(f, 'rb') as fh: + with open(x, 'rb') as fh: lmd5 = get_md5(fh) except TypeError: # Support file-like objects. - lmd5 = get_md5(f) + lmd5 = get_md5(x) if lmd5 in md5s: continue + total_size += get_file_size(x) total_files += 1 + return total_files, total_size + + +def recursive_file_count(*args, **kwargs): + """Like `recursive_file_count_and_size`, but returns only the file count.""" + total_files, _ = recursive_file_count_and_size(*args, **kwargs) return total_files @@ -294,6 +304,16 @@ def is_dir(obj) -> bool: return False +def is_filelike_obj(obj) -> bool: + """Distinguish file-like from path-like objects""" + try: + os.fspath(obj) + except TypeError: + return True + else: + return False + + def reraise_modify( caught_exc: Exception, append_msg: str, diff --git a/pyproject.toml b/pyproject.toml index ef31cd55..2ed3e1d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,11 +80,7 @@ max-statements = 124 [tool.ruff.per-file-ignores] "__init__.py" = ["E402"] -"tests/*" = [ - "PT017", - "S101", -] -"tests/conftest.py" = ["B018", "F811"] +"tests/*" = ["PT017", "S101"] "tests/cli/test_ia_list.py" = ["E741"] "tests/test_api.py" = ["E712"] "tests/test_config.py" = ["PT011"] diff --git a/setup.cfg b/setup.cfg index 1b6af950..28c63a4d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,6 +31,7 @@ install_requires = schema>=0.4.0 tqdm>=4.0.0 urllib3>=1.26.0 + importlib-metadata>=3.6.0 ;python_version <= "3.10" python_requires = >=3.7 include_package_data = True zip_safe = False @@ -59,7 +60,7 @@ docs = test = pytest==7.1.2 responses==0.20.0 - ruff==0.0.261 + ruff==0.0.269 types = tqdm-stubs>=0.2.0 types-colorama @@ -80,7 +81,6 @@ ignore-words-list = alers [mypy] exclude = ^\.git/|^__pycache__/|^docs/source/conf.py$|^old/|^build/|^dist/|\.tox python_version = 3.9 -install_types = True pretty = True scripts_are_modules = True show_error_codes = True diff --git a/tests/cli/test_ia_upload.py b/tests/cli/test_ia_upload.py index 3255d981..3d876085 100644 --- a/tests/cli/test_ia_upload.py +++ b/tests/cli/test_ia_upload.py @@ -124,6 +124,27 @@ def test_ia_upload_size_hint(capsys, tmpdir_ch, nasa_mocker): assert 'Accept-Encoding:gzip, deflate' in err +def test_ia_upload_automatic_size_hint_files(capsys, tmpdir_ch, nasa_mocker): + with open('foo', 'w') as fh: + fh.write('foo') + with open('bar', 'w') as fh: + fh.write('bar') + + ia_call(['ia', 'upload', '--debug', 'nasa', 'foo', 'bar']) + out, err = capsys.readouterr() + assert 'x-archive-size-hint:6' in err + +def test_ia_upload_automatic_size_hint_dir(capsys, tmpdir_ch, nasa_mocker): + with open('foo', 'w') as fh: + fh.write('foo') + with open('bar', 'w') as fh: + fh.write('bar') + + ia_call(['ia', 'upload', '--debug', 'nasa', '.']) + out, err = capsys.readouterr() + assert 'x-archive-size-hint:6' in err + + def test_ia_upload_unicode(tmpdir_ch, caplog): with open('தமிழ் - baz ∆.txt', 'w') as fh: fh.write('unicode foo') diff --git a/tests/conftest.py b/tests/conftest.py index 0b1392a7..1f0ef92c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,17 +12,6 @@ from internetarchive.cli import ia from internetarchive.utils import json -try: - FileNotFoundError -except NameError: - FileNotFoundError = IOError - -try: - WindowsError # type: ignore[used-before-def] -except NameError: - class WindowsError(Exception): - pass - PROTOCOL = 'https:' BASE_URL = 'https://archive.org/' METADATA_URL = f'{BASE_URL}metadata/' @@ -74,7 +63,7 @@ def load_test_data_file(filename): def call_cmd(cmd, expected_exit_code=0): - proc = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) + proc = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) # noqa: S602 stdout, stderr = proc.communicate() stdout = stdout.decode('utf-8').strip() stderr = stderr.decode('utf-8').strip() @@ -134,5 +123,5 @@ def nasa_metadata(): # TODO: Why is this function defined twice in this file? See issue #505 @pytest.fixture() # type: ignore -def nasa_item(nasa_mocker): +def nasa_item(nasa_mocker): # noqa: F811 return get_item('nasa') diff --git a/tests/requirements.txt b/tests/requirements.txt index 7b3df803..0cf7ee05 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,3 +1,3 @@ pytest==7.2.2 responses==0.23.1 -ruff==0.0.261 +ruff==0.0.269 diff --git a/tests/test_item.py b/tests/test_item.py index 9e6c6c50..ee4f2b80 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -582,6 +582,40 @@ def test_upload_checksum(tmpdir, nasa_item): assert r.status_code is None +def test_upload_automatic_size_hint(tmpdir, nasa_item): + with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: + _expected_headers = deepcopy(EXPECTED_S3_HEADERS) + del _expected_headers['x-archive-size-hint'] + _expected_headers['x-archive-size-hint'] = '15' + rsps.add(responses.PUT, S3_URL_RE, + adding_headers=_expected_headers) + + files = [] + with open(os.path.join(tmpdir, 'file'), 'w') as fh: + fh.write('a') + files.append(os.path.join(tmpdir, 'file')) + + os.mkdir(os.path.join(tmpdir, 'dir')) + with open(os.path.join(tmpdir, 'dir', 'file0'), 'w') as fh: + fh.write('bb') + with open(os.path.join(tmpdir, 'dir', 'file1'), 'w') as fh: + fh.write('cccc') + files.append(os.path.join(tmpdir, 'dir')) + + with open(os.path.join(tmpdir, 'obj'), 'wb') as fh: + fh.write(b'dddddddd') + fh.seek(0, os.SEEK_SET) + files.append(fh) + + _responses = nasa_item.upload(files, + access_key='a', + secret_key='b') + for r in _responses: + headers = {k.lower(): str(v) for k, v in r.headers.items()} + del headers['content-type'] + assert headers == _expected_headers + + def test_modify_metadata(nasa_item, nasa_metadata): with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: rsps.add(responses.POST, f'{PROTOCOL}//archive.org/metadata/nasa') diff --git a/tox.ini b/tox.ini index 062086a0..82a5cd95 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py37,py38,py39,py310,py311,pypy37,pypy38,pypy39 +envlist = py38,py39,py310,py311,py312,pypy39,pypy310 [testenv] deps = -r tests/requirements.txt @@ -7,9 +7,6 @@ deps = -r tests/requirements.txt commands = ruff . pytest {posargs} -[testenv:py37] -basepython=python3.7 - [testenv:py38] basepython=python3.8 @@ -21,3 +18,6 @@ basepython=python3.10 [testenv:py311] basepython=python3.11 + +[testenv:py312] +basepython=python3.12