diff --git a/cloudpathlib/__init__.py b/cloudpathlib/__init__.py index da4fe28e..c51fc45e 100644 --- a/cloudpathlib/__init__.py +++ b/cloudpathlib/__init__.py @@ -4,9 +4,11 @@ from .azure.azblobclient import AzureBlobClient from .azure.azblobpath import AzureBlobPath from .cloudpath import CloudPath, implementation_registry -from .s3.s3client import S3Client -from .gs.gspath import GSPath from .gs.gsclient import GSClient +from .gs.gspath import GSPath +from .http.httpclient import HttpClient +from .http.httppath import HttpPath +from .s3.s3client import S3Client from .s3.s3path import S3Path @@ -27,6 +29,8 @@ "implementation_registry", "GSClient", "GSPath", + "HttpClient", + "HttpPath", "S3Client", "S3Path", ] diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 4aa895a6..8f469c11 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -27,7 +27,6 @@ Generator, List, Optional, - Sequence, Tuple, Type, TYPE_CHECKING, @@ -286,11 +285,11 @@ def __setstate__(self, state: Dict[str, Any]) -> None: @property def _no_prefix(self) -> str: - return self._str[len(self.cloud_prefix) :] + return self._str[len(self.anchor) :] @property def _no_prefix_no_drive(self) -> str: - return self._str[len(self.cloud_prefix) + len(self.drive) :] + return self._str[len(self.anchor) + len(self.drive) :] @overload @classmethod @@ -881,9 +880,9 @@ def relative_to(self, other: Self, walk_up: bool = False) -> PurePosixPath: # absolute) if not isinstance(other, CloudPath): raise ValueError(f"{self} is a cloud path, but {other} is not") - if self.cloud_prefix != other.cloud_prefix: + if self.anchor != other.anchor: raise ValueError( - f"{self} is a {self.cloud_prefix} path, but {other} is a {other.cloud_prefix} path" + f"{self} is a {self.anchor} path, but {other} is a {other.anchor} path" ) kwargs = dict(walk_up=walk_up) @@ -921,7 +920,7 @@ def parent(self) -> Self: return self._dispatch_to_path("parent") @property - def parents(self) -> Sequence[Self]: + def parents(self) -> Tuple[Self, ...]: return self._dispatch_to_path("parents") @property @@ -1210,8 +1209,8 @@ def _new_cloudpath(self, path: Union[str, os.PathLike]) -> Self: path = path[1:] # add prefix/anchor if it is not already - if not path.startswith(self.cloud_prefix): - path = f"{self.cloud_prefix}{path}" + if not path.startswith(self.anchor): + path = f"{self.anchor}{path}" return self.client.CloudPath(path) diff --git a/cloudpathlib/http/__init__.py b/cloudpathlib/http/__init__.py new file mode 100644 index 00000000..5ad785be --- /dev/null +++ b/cloudpathlib/http/__init__.py @@ -0,0 +1,7 @@ +from .httpclient import HttpClient +from .httppath import HttpPath + +__all__ = [ + "HttpClient", + "HttpPath", +] diff --git a/cloudpathlib/http/httpclient.py b/cloudpathlib/http/httpclient.py new file mode 100644 index 00000000..ea76d006 --- /dev/null +++ b/cloudpathlib/http/httpclient.py @@ -0,0 +1,160 @@ +from datetime import datetime +import os +import re +import urllib.request +import urllib.parse +import urllib.error +from pathlib import Path +from typing import Iterable, Optional, Tuple, Union, Callable +import shutil +import mimetypes +import urllib.response + +import pytz + +from cloudpathlib.client import Client, register_client_class +from cloudpathlib.enums import FileCacheMode + +from .httppath import HttpPath + + +@register_client_class("http") +class HttpClient(Client): + def __init__( + self, + file_cache_mode: Optional[Union[str, FileCacheMode]] = None, + local_cache_dir: Optional[Union[str, os.PathLike]] = None, + content_type_method: Optional[Callable] = mimetypes.guess_type, + auth: Optional[urllib.request.BaseHandler] = None, + custom_list_page_parser: Optional[Callable[[str], Iterable[str]]] = None, + ): + super().__init__(file_cache_mode, local_cache_dir, content_type_method) + self.auth = auth + + if self.auth is None: + self.opener = urllib.request.build_opener() + else: + self.openener = urllib.request.build_opener(self.auth) + + self.custom_list_page_parser = custom_list_page_parser + + def _get_metadata(self, cloud_path: HttpPath) -> dict: + with self.opener.open(cloud_path.as_url()) as response: + last_modified = response.headers.get("Last-Modified", None) + + if last_modified is not None: + # per https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified + last_modified = datetime.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z") + + # should always be utc https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified#gmt + last_modified = last_modified.replace(tzinfo=pytz.UTC) + + return { + "size": int(response.headers.get("Content-Length", 0)), + "last_modified": last_modified, + "content_type": response.headers.get("Content-Type", None), + } + + def _download_file(self, cloud_path: HttpPath, local_path: Union[str, os.PathLike]) -> Path: + local_path = Path(local_path) + with self.opener.open(cloud_path.as_url()) as response: + with open(local_path, "wb") as out_file: + shutil.copyfileobj(response, out_file) + return local_path + + def _exists(self, cloud_path: HttpPath) -> bool: + request = urllib.request.Request(cloud_path.as_url(), method="HEAD") + try: + with self.opener.open(request) as response: + return response.status == 200 + except (urllib.error.HTTPError, urllib.error.URLError) as e: + if isinstance(e, urllib.error.URLError) or e.code == 404: + return False + raise + + def _move_file(self, src: HttpPath, dst: HttpPath, remove_src: bool = True) -> HttpPath: + self._upload_file(src, dst) + if remove_src: + self._remove(src) + return dst + + def _remove(self, cloud_path: HttpPath, missing_ok: bool = True) -> None: + request = urllib.request.Request(cloud_path.as_url(), method="DELETE") + try: + with self.opener.open(request) as response: + if response.status != 204: + raise Exception(f"Failed to delete {cloud_path}.") + except urllib.error.HTTPError as e: + if e.code == 404 and missing_ok: + pass + else: + raise FileNotFoundError(f"Failed to delete {cloud_path}.") + + def _list_dir(self, cloud_path: HttpPath, recursive: bool) -> Iterable[Tuple[HttpPath, bool]]: + try: + with self.opener.open(cloud_path.as_url()) as response: + # Parse the directory listing + for path, is_dir in self._parse_list_dir_response( + response.read().decode(), base_url=str(cloud_path) + ): + yield path, is_dir + + # If it's a directory and recursive is True, list the contents of the directory + if recursive and is_dir: + yield from self._list_dir(path, recursive=True) + + except: # noqa E722 + raise NotImplementedError( + "Unable to parse response as a listing of files; please provide a custom parser as `custom_list_page_parser`." + ) + + def _upload_file(self, local_path: Union[str, os.PathLike], cloud_path: HttpPath) -> HttpPath: + local_path = Path(local_path) + if self.content_type_method is not None: + content_type, _ = self.content_type_method(local_path) + + headers = {"Content-Type": content_type or "application/octet-stream"} + + with open(local_path, "rb") as file_data: + request = urllib.request.Request( + cloud_path.as_url(), data=file_data.read(), method="PUT", headers=headers + ) + with self.opener.open(request) as response: + if response.status != 201 and response.status != 200: + raise Exception(f"Failed to upload {local_path} to {cloud_path}.") + return cloud_path + + def _get_public_url(self, cloud_path: HttpPath) -> str: + return cloud_path.as_url() + + def _generate_presigned_url(self, cloud_path: HttpPath, expire_seconds: int = 60 * 60) -> str: + raise NotImplementedError("Presigned URLs are not supported using urllib.") + + def _parse_list_dir_response( + self, response: str, base_url: str + ) -> Iterable[Tuple[HttpPath, bool]]: + # Ensure base_url ends with a trailing slash so joining works + if not base_url.endswith("/"): + base_url += "/" + + def _simple_links(html: str) -> Iterable[str]: + return re.findall(r' None: + request = urllib.request.Request(url.as_url(), method=method, **kwargs) + with self.opener.open(request) as response: + return response + + +HttpClient.HttpPath = HttpClient.CloudPath # type: ignore diff --git a/cloudpathlib/http/httppath.py b/cloudpathlib/http/httppath.py new file mode 100644 index 00000000..b559f115 --- /dev/null +++ b/cloudpathlib/http/httppath.py @@ -0,0 +1,130 @@ +from pathlib import PurePosixPath +from typing import Tuple, Union, Optional + +import os +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import TYPE_CHECKING + +from ..cloudpath import CloudPath, NoStatError, register_path_class + + +if TYPE_CHECKING: + from .httpclient import HttpClient + + +@register_path_class("http") +class HttpPath(CloudPath): + cloud_prefix = "http://" + client: "HttpClient" + + def __init__( + self, + cloud_path: Union[str, "HttpPath"], + client: Optional["HttpClient"] = None, + ) -> None: + super().__init__(cloud_path, client) + + self._path = ( + PurePosixPath(self._url.path) + if self._url.path.startswith("/") + else PurePosixPath(f"/{self._url.path}") + ) + + @property + def drive(self) -> str: + # For HTTP paths, no drive; use .anchor for scheme + netloc + return self._url.netloc + + @property + def anchor(self) -> str: + return f"{self._url.scheme}://{self._url.netloc}/" + + @property + def _no_prefix_no_drive(self) -> str: + # netloc appears in anchor and drive for httppath; so don't double count + return self._str[len(self.anchor) - 1 :] + + def is_dir(self) -> bool: + if not self.exists(): + return False + + # HTTP doesn't really have directories, but some servers might list files if treated as such + # Here we'll assume paths without are dirs + return self._path.suffix == "" + + def is_file(self) -> bool: + if not self.exists(): + return False + + # HTTP doesn't have a direct file check, but we assume if it has a suffix, it's a file + return self._path.suffix != "" + + def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None: + pass # no-op for HTTP Paths + + def touch(self, exist_ok: bool = True) -> None: + if self.exists(): + if not exist_ok: + raise FileExistsError(f"File already exists: {self}") + + raise NotImplementedError( + "Touch not implemented for existing HTTP files since we can't update the modified time." + ) + else: + empty_file = Path(TemporaryDirectory().name) / "empty_file.txt" + empty_file.parent.mkdir(parents=True, exist_ok=True) + empty_file.write_text("") + self.client._upload_file(empty_file, self) + + def stat(self, follow_symlinks: bool = True) -> os.stat_result: + try: + meta = self.client._get_metadata(self) + except: # noqa E722 + raise NoStatError(f"Could not get metadata for {self}") + + return os.stat_result( + ( # type: ignore + None, # mode + None, # ino + self.cloud_prefix, # dev, + None, # nlink, + None, # uid, + None, # gid, + meta.get("size", 0), # size, + None, # atime, + meta.get("last_modified", 0).timestamp(), # mtime, + None, # ctime, + ) + ) + + def as_url(self, presign: bool = False, expire_seconds: int = 60 * 60) -> str: + if presign: + raise NotImplementedError("Presigning not supported for HTTP paths") + + return ( + self._url.geturl() + ) # recreate from what was initialized so we have the same query params, etc. + + @property + def name(self) -> str: + return self._path.name + + @property + def parents(self) -> Tuple["HttpPath", ...]: + return super().parents + (self._new_cloudpath(""),) + + def get(self, **kwargs): + return self.client.request(self, "GET", **kwargs) + + def put(self, **kwargs): + return self.client.request(self, "PUT", **kwargs) + + def post(self, **kwargs): + return self.client.request(self, "POST", **kwargs) + + def delete(self, **kwargs): + return self.client.request(self, "DELETE", **kwargs) + + def head(self, **kwargs): + return self.client.request(self, "HEAD", **kwargs) diff --git a/requirements-dev.txt b/requirements-dev.txt index 7c526692..c8291861 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -33,4 +33,5 @@ tabulate tenacity tqdm typer +types-pytz wheel diff --git a/tests/conftest.py b/tests/conftest.py index 301ffe87..71e90f19 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,7 @@ import shutil from tempfile import TemporaryDirectory from typing import Dict, Optional +from urllib.parse import urlparse from azure.storage.blob import BlobServiceClient from azure.storage.filedatalake import ( @@ -18,6 +19,8 @@ from cloudpathlib import AzureBlobClient, AzureBlobPath, GSClient, GSPath, S3Client, S3Path from cloudpathlib.cloudpath import implementation_registry +from cloudpathlib.http.httpclient import HttpClient +from cloudpathlib.http.httppath import HttpPath from cloudpathlib.local import ( local_azure_blob_implementation, LocalAzureBlobClient, @@ -42,6 +45,8 @@ from .mock_clients.mock_s3 import mocked_session_class_factory, DEFAULT_S3_BUCKET_NAME +from .http_fixtures import http_server # noqa: F401 + if os.getenv("USE_LIVE_CLOUD") == "1": load_dotenv(find_dotenv()) @@ -469,6 +474,44 @@ def local_s3_rig(request, monkeypatch, assets_dir): rig.client_class.reset_default_storage_dir() # reset local storage directory +class HttpProviderTestRig(CloudProviderTestRig): + def create_cloud_path(self, path: str, client=None): + """Http version needs to include netloc as well""" + if client: + return client.CloudPath( + cloud_path=f"{self.path_class.cloud_prefix}{self.drive}/{self.test_dir}/{path}" + ) + else: + return self.path_class( + cloud_path=f"{self.path_class.cloud_prefix}{self.drive}/{self.test_dir}/{path}" + ) + + +@fixture() +def http_rig(request, assets_dir, http_server): # noqa: F811 + test_dir = create_test_dir_name(request) + + host, server_dir = http_server + drive = urlparse(host).netloc + + # copy test assets + shutil.copytree(assets_dir, server_dir / test_dir) + + rig = CloudProviderTestRig( + path_class=HttpPath, + client_class=HttpClient, + drive=drive, + test_dir=test_dir, + ) + + rig.http_server_dir = server_dir + + yield rig + + rig.client_class._default_client = None # reset default client + shutil.rmtree(server_dir) + + # create azure fixtures for both blob and gen2 storage azure_rigs = fixture_union( "azure_rigs", @@ -489,6 +532,7 @@ def local_s3_rig(request, monkeypatch, assets_dir): local_azure_rig, local_s3_rig, local_gs_rig, + http_rig, ], ) diff --git a/tests/http_fixtures.py b/tests/http_fixtures.py new file mode 100644 index 00000000..2b0a9f88 --- /dev/null +++ b/tests/http_fixtures.py @@ -0,0 +1,90 @@ +from datetime import datetime +from functools import partial +from http.server import HTTPServer, SimpleHTTPRequestHandler +from io import BytesIO +import os +from pathlib import Path +import shutil +import threading +import time +from urllib.request import urlopen + +from pytest import fixture + + +class TestHTTPRequestHandler(SimpleHTTPRequestHandler): + """Also allows PUT and DELETE requests for testing.""" + + def do_PUT(self): + length = int(self.headers["Content-Length"]) + path = Path(self.translate_path(self.path)) + + if path.is_dir(): + path.mkdir(parents=True, exist_ok=True) + else: + path.parent.mkdir(parents=True, exist_ok=True) + + with path.open("wb") as f: + f.write(self.rfile.read(length)) + + now = datetime.now().timestamp() + os.utime(path, (now, now)) + + self.send_response(201) + self.end_headers() + + def do_DELETE(self): + path = Path(self.translate_path(self.path)) + + try: + if path.is_dir(): + shutil.rmtree(path) + else: + path.unlink() + self.send_response(204) + except FileNotFoundError: + self.send_response(404) + + self.end_headers() + + def list_directory(self, path: str | os.PathLike[str]) -> BytesIO | None: + return super().list_directory(path) + + +@fixture(scope="module") +def http_server(tmp_path_factory, worker_id): + hostname = "localhost" + port = ( + 9077 + int(worker_id.lstrip("gw")) if worker_id != "master" else 0 + ) # don't collide if tests running in parallel with multiple servers + + # Create a temporary directory to serve files from + server_dir = tmp_path_factory.mktemp("server_files").resolve() + server_dir.mkdir(exist_ok=True) + + # Function to start the server + def start_server(): + handler = partial(TestHTTPRequestHandler, directory=str(server_dir)) + httpd = HTTPServer((hostname, port), handler) + httpd.serve_forever() + + # Start the server in a separate thread + server_thread = threading.Thread(target=start_server, daemon=True) + server_thread.start() + + # Wait for the server to start + for _ in range(10): + try: + urlopen(f"http://{hostname}:{port}") + break + except Exception: + time.sleep(0.1) + + yield f"http://{hostname}:{port}", server_dir + + # Stop the server by exiting the thread + server_thread.join(0) + + # Clean up the temporary directory if it still exists + if server_dir.exists(): + shutil.rmtree(server_dir) diff --git a/tests/test_client.py b/tests/test_client.py index a665a5a6..bbfbcd77 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -5,6 +5,7 @@ import string from cloudpathlib import CloudPath +from cloudpathlib.http.httpclient import HttpClient from cloudpathlib.s3.s3client import S3Client @@ -91,6 +92,10 @@ def _test_write_content_type(suffix, expected, rig_ref, check=True): for suffix, content_type in mimes: _test_write_content_type(suffix, content_type, rig, check=False) + if rig.client_class is HttpClient: + # HTTP client doesn't support custom content types + return + # custom mime type method def my_content_type(path): # do lookup for content types I define; fallback to diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py index 7dc5b149..a0545cdb 100644 --- a/tests/test_cloudpath_file_io.py +++ b/tests/test_cloudpath_file_io.py @@ -14,17 +14,25 @@ CloudPathNotImplementedError, DirectoryNotEmptyError, ) +from cloudpathlib.http.httpclient import HttpClient +from cloudpathlib.http.httppath import HttpPath def test_file_discovery(rig): p = rig.create_cloud_path("dir_0/file0_0.txt") assert p.exists() - p2 = rig.create_cloud_path("dir_0/not_a_file") + p2 = rig.create_cloud_path("dir_0/not_a_file_yet.file") assert not p2.exists() p2.touch() assert p2.exists() - p2.touch(exist_ok=True) + + if rig.client_class not in [HttpClient]: # not supported to touch existing + p2.touch(exist_ok=True) + else: + with pytest.raises(NotImplementedError): + p2.touch(exist_ok=True) + with pytest.raises(FileExistsError): p2.touch(exist_ok=False) p2.unlink(missing_ok=False) @@ -83,12 +91,12 @@ def glob_test_dirs(rig, tmp_path): def _make_glob_directory(root): (root / "dirB").mkdir() - (root / "dirB" / "fileB").write_text("fileB") + (root / "dirB" / "fileB.txt").write_text("fileB") (root / "dirC").mkdir() (root / "dirC" / "dirD").mkdir() - (root / "dirC" / "dirD" / "fileD").write_text("fileD") - (root / "dirC" / "fileC").write_text("fileC") - (root / "fileA").write_text("fileA") + (root / "dirC" / "dirD" / "fileD.txt").write_text("fileD") + (root / "dirC" / "fileC.txt").write_text("fileC") + (root / "fileA.txt").write_text("fileA") cloud_root = rig.create_cloud_path("glob-tests") cloud_root.mkdir() @@ -181,6 +189,9 @@ def test_walk(glob_test_dirs): def test_list_buckets(rig): + if rig.path_class in [HttpPath]: + return # no bucket listing for HTTP + # test we can list buckets buckets = list(rig.path_class(f"{rig.path_class.cloud_prefix}").iterdir()) assert len(buckets) > 0 @@ -337,7 +348,7 @@ def test_is_dir_is_file(rig, tmp_path): def test_file_read_writes(rig, tmp_path): p = rig.create_cloud_path("dir_0/file0_0.txt") - p2 = rig.create_cloud_path("dir_0/not_a_file") + p2 = rig.create_cloud_path("dir_0/not_a_file.txt") p3 = rig.create_cloud_path("") text = "lalala" * 10_000 @@ -355,16 +366,20 @@ def test_file_read_writes(rig, tmp_path): before_touch = datetime.now() sleep(1) - p.touch() - if not getattr(rig, "is_custom_s3", False): - # Our S3Path.touch implementation does not update mod time for MinIO - assert datetime.fromtimestamp(p.stat().st_mtime) > before_touch + + if rig.path_class not in [HttpPath]: # not supported to touch existing + p.touch() + + if not getattr(rig, "is_custom_s3", False): + # Our S3Path.touch implementation does not update mod time for MinIO + assert datetime.fromtimestamp(p.stat().st_mtime) > before_touch # no-op if not getattr(rig, "is_adls_gen2", False): p.mkdir() - assert p.etag is not None + if rig.path_class not in [HttpPath]: # not supported to touch existing + assert p.etag is not None dest = rig.create_cloud_path("dir2/new_file0_0.txt") assert not dest.exists() diff --git a/tests/test_cloudpath_instantiation.py b/tests/test_cloudpath_instantiation.py index de139593..5c41abb6 100644 --- a/tests/test_cloudpath_instantiation.py +++ b/tests/test_cloudpath_instantiation.py @@ -7,6 +7,7 @@ from cloudpathlib import AzureBlobPath, CloudPath, GSPath, S3Path from cloudpathlib.exceptions import InvalidPrefixError, MissingDependenciesError +from cloudpathlib.http.httppath import HttpPath @pytest.mark.parametrize( @@ -44,6 +45,9 @@ def test_dispatch_error(): @pytest.mark.parametrize("path", ["b/k", "b/k", "b/k.file", "b/k", "b"]) def test_instantiation(rig, path): + if rig.path_class in [HttpPath]: + path = "example-url.com/" + path + # check two cases of prefix for prefix in [rig.cloud_prefix.lower(), rig.cloud_prefix.upper()]: expected = prefix + path @@ -51,13 +55,17 @@ def test_instantiation(rig, path): assert repr(p) == f"{rig.path_class.__name__}('{expected}')" assert str(p) == expected - assert p._no_prefix == expected.split("://", 1)[-1] + if rig.path_class in [HttpPath]: + assert p._no_prefix == path.replace("example-url.com/", "") + assert str(p._path) == path.replace("example-url.com", "") + + else: + assert p._no_prefix == expected.split("://", 1)[-1] + assert str(p._path) == expected.split(":/", 1)[-1] assert p._url.scheme == expected.split("://", 1)[0].lower() assert p._url.netloc == expected.split("://", 1)[-1].split("/")[0] - assert str(p._path) == expected.split(":/", 1)[-1] - def test_default_client_lazy(rig): cp = rig.path_class(rig.cloud_prefix + "testing/file.txt") diff --git a/tests/test_cloudpath_manipulation.py b/tests/test_cloudpath_manipulation.py index aaf4098c..91e29c77 100644 --- a/tests/test_cloudpath_manipulation.py +++ b/tests/test_cloudpath_manipulation.py @@ -4,6 +4,7 @@ import pytest from cloudpathlib import CloudPath +from cloudpathlib.http.httppath import HttpPath def test_properties(rig): @@ -83,16 +84,27 @@ def test_joins(rig): if sys.version_info >= (3, 12): assert rig.create_cloud_path("a/b/c/d").match("A/*/C/D", case_sensitive=False) - assert rig.create_cloud_path("a/b/c/d").anchor == rig.cloud_prefix + if rig.path_class not in [HttpPath]: + assert rig.create_cloud_path("a/b/c/d").anchor == rig.cloud_prefix + assert rig.create_cloud_path("a/b/c/d").parent == rig.create_cloud_path("a/b/c") - assert rig.create_cloud_path("a/b/c/d").parents == ( - rig.create_cloud_path("a/b/c"), - rig.create_cloud_path("a/b"), - rig.create_cloud_path("a"), - rig.path_class(f"{rig.cloud_prefix}{rig.drive}/{rig.test_dir}"), - rig.path_class(f"{rig.cloud_prefix}{rig.drive}"), - ) + if rig.path_class not in [HttpPath]: + assert rig.create_cloud_path("a/b/c/d").parents == ( + rig.create_cloud_path("a/b/c"), + rig.create_cloud_path("a/b"), + rig.create_cloud_path("a"), + rig.path_class(f"{rig.cloud_prefix}{rig.drive}/{rig.test_dir}"), + rig.path_class(f"{rig.cloud_prefix}{rig.drive}"), + ) + else: + assert rig.create_cloud_path("a/b/c/d").parents == ( + rig.create_cloud_path("a/b/c"), + rig.create_cloud_path("a/b"), + rig.create_cloud_path("a"), + rig.path_class(f"{rig.cloud_prefix}{rig.drive}/{rig.test_dir}"), + rig.path_class(f"{rig.cloud_prefix}{rig.drive}/"), + ) assert rig.create_cloud_path("a").joinpath("b", "c") == rig.create_cloud_path("a/b/c") assert rig.create_cloud_path("a").joinpath(PurePosixPath("b"), "c") == rig.create_cloud_path( @@ -106,21 +118,32 @@ def test_joins(rig): == f"{rig.cloud_prefix}{rig.drive}/{rig.test_dir}/a/b/c" ) - assert rig.create_cloud_path("a/b/c/d").parts == ( - rig.cloud_prefix, - rig.drive, - rig.test_dir, - "a", - "b", - "c", - "d", - ) + if rig.path_class in [HttpPath]: + assert rig.create_cloud_path("a/b/c/d").parts == ( + rig.cloud_prefix + rig.drive + "/", + rig.test_dir, + "a", + "b", + "c", + "d", + ) + else: + assert rig.create_cloud_path("a/b/c/d").parts == ( + rig.cloud_prefix, + rig.drive, + rig.test_dir, + "a", + "b", + "c", + "d", + ) def test_with_segments(rig): - assert rig.create_cloud_path("a/b/c/d").with_segments("x", "y", "z") == rig.client_class( - **rig.required_client_kwargs - ).CloudPath(f"{rig.cloud_prefix}x/y/z") + to_test = rig.create_cloud_path("a/b/c/d").with_segments("x", "y", "z") + assert to_test == rig.client_class(**rig.required_client_kwargs).CloudPath( + f"{to_test.anchor}x/y/z" + ) def test_is_junction(rig): diff --git a/tests/test_cloudpath_upload_copy.py b/tests/test_cloudpath_upload_copy.py index acf5e5ec..6e447f49 100644 --- a/tests/test_cloudpath_upload_copy.py +++ b/tests/test_cloudpath_upload_copy.py @@ -4,6 +4,7 @@ import pytest +from cloudpathlib.http.httppath import HttpPath from cloudpathlib.local import LocalGSPath, LocalS3Path, LocalS3Client from cloudpathlib.exceptions import ( CloudPathFileExistsError, @@ -64,12 +65,12 @@ def test_upload_from_file(rig, upload_assets_dir): assert p.read_text() == "Hello from 2" # to file, file exists and is newer - p.touch() + p.write_text("newer") with pytest.raises(OverwriteNewerCloudError): p.upload_from(upload_assets_dir / "upload_1.txt") # to file, file exists and is newer; overwrite - p.touch() + p.write_text("even newer") sleep(1.1) p.upload_from(upload_assets_dir / "upload_1.txt", force_overwrite_to_cloud=True) assert p.exists() @@ -100,12 +101,12 @@ def test_upload_from_dir(rig, upload_assets_dir): # a newer file exists on cloud sleep(1) - (p / "upload_1.txt").touch() + (p / "upload_1.txt").write_text("newer") with pytest.raises(OverwriteNewerCloudError): p.upload_from(upload_assets_dir) # force overwrite - (p / "upload_1.txt").touch() + (p / "upload_1.txt").write_text("even newer") (p / "upload_2.txt").unlink() p.upload_from(upload_assets_dir, force_overwrite_to_cloud=True) assert assert_mirrored(p, upload_assets_dir) @@ -135,9 +136,11 @@ def test_copy(rig, upload_assets_dir, tmpdir): # cloud to cloud -> make sure no local cache p_new = p.copy(p.parent / "new_upload_1.txt") assert p_new.exists() - assert not p_new._local.exists() # cache should never have been downloaded - assert not p._local.exists() # cache should never have been downloaded - assert p_new.read_text() == "Hello from 1" + + if rig.path_class not in [HttpPath]: + assert not p_new._local.exists() # cache should never have been downloaded + assert not p._local.exists() # cache should never have been downloaded + assert p_new.read_text() == "Hello from 1" # cloud to cloud path as string cloud_dest = str(p.parent / "new_upload_0.txt") @@ -153,7 +156,7 @@ def test_copy(rig, upload_assets_dir, tmpdir): assert p_new.read_text() == "Hello from 1" # cloud to cloud overwrite - p_new.touch() + p_new.write_text("p_new") with pytest.raises(OverwriteNewerCloudError): p_new = p.copy(p_new)