Source code for upathlib._blob

from __future__ import annotations

import pathlib
from collections.abc import Iterator

from typing_extensions import Self

from ._local import LocalPathType, LocalUpath
from ._upath import Upath


def _resolve_local_path(p: LocalPathType):
    if isinstance(p, str):
        p = pathlib.Path(p)
    if isinstance(p, pathlib.Path):
        p = LocalUpath(str(p.resolve().absolute()))
    else:
        assert isinstance(p, LocalUpath), type(p)
    return p


[docs] class BlobUpath(Upath): """ BlobUpath is a base class for paths in a *cloud* storage, aka "blob store". This is in contrast to a *local* disk storage, which is implemented by :class:`~upathlib.LocalUpath`. """ @property def blob_name(self) -> str: """ Return the "name" of the blob. This is the "path" without a leading ``'/'``. In cloud blob stores, this is exactly the name of the blob. The name often contains ``'/'``, which has no special role in the name per se but is *interpreted* by users to be a directory separator. """ return self._path.lstrip("/")
[docs] def is_dir(self) -> bool: """In a typical blob store, there is no such concept as a "directory". Here we emulate the concept in a local file system. If there is a blob named like :: /ab/cd/ef/g.txt we say there exists directory "/ab/cd/ef". We should never have a trailing `/` in a blob's name, like :: /ab/cd/ef/ (I don't know whether the blob stores allow such blob names.) Consequently, ``is_dir`` is equivalent to "having stuff in the dir". There is no such thing as an "empty directory" in blob stores. """ try: next(self.iterdir()) return True except StopIteration: return False
[docs] def iterdir(self) -> Iterator[Self]: """ Yield immediate children under the current dir. This is a naive, inefficient implementation. Expected to be refined by subclasses. """ p0 = self._path # this could be '/'. if not p0.endswith("/"): p0 += "/" np0 = len(p0) subdirs = set() for p in self.riterdir(): tail = p._path[np0:] if tail.startswith("/"): raise ValueError(f"malformed blob name: '{p._path}'") if "/" in tail: tail = tail[: tail.find("/")] if tail not in subdirs: yield self / tail subdirs.add(tail)
[docs] def download_dir( self, target: LocalPathType, **kwargs, ) -> int: target = _resolve_local_path(target) return target.copy_dir(self, **kwargs)
[docs] def download_file(self, target: LocalPathType, **kwargs) -> None: target = _resolve_local_path(target) target.copy_file(self, **kwargs)
[docs] def upload_dir( self, source: LocalPathType, **kwargs, ) -> int: source = _resolve_local_path(source) return self.copy_dir(source, **kwargs)
[docs] def upload_file(self, source: LocalPathType, **kwargs) -> None: source = _resolve_local_path(source) self.copy_file(source, **kwargs)
# If a subclass has efficient implementations for downloading and uploading, # don't override the methods `download_dir`, `download_file`, `upload_dir`, # `upload_file`. Instead, call those implementations in `_dir_to_dir` # and `_copy_file`.