Idea #20852
Updated by Brett Smith about 1 year ago
A lot of code in the "Python SDK cookbook":https://doc.arvados.org/v2.6/sdk/python/cookbook.html that coordinates low-level API calls should just be methods that are defined in the SDK itself. Why force every user to copy-paste these code snippets when we can just give them the method directly?
For recipes that I think are strong candidates for inclusion, here's are some rough sketches of what the method signatures could look like, along with implementation notes.
<pre><code class="python">
class Collection:
def download(self, src: PathLike, dst: PathLike | None=None) -> None: ...
# If src is a PurePath, generate a manifest path with .as_posix().
# If dst is None, default to the src basename.
def upload(self, src: PathLike, dst: PathLike | None=None) -> None: ...
# Same argument handling as download, except now we use dst.as_posix().
def iter_files(self, depth: int | None=None) -> Iterator[ArvadosFile]: ...
# depth limits recursion, None means no limit.
def iter_streams(self, depth: int | None=None) -> Iterator[Collection]: ...
def iter_contents(self, depth: int | None=None) -> Iterator[ArvadosFile | Collection]: ...
def copy(
self,
source: PathLike | IO,
target_path: PathLike | None=None,
source_collection: Collection | None=None,
overwrite: bool=False,
) -> None: ...
# Proposed expansion of the existing copy method signature.
# Explicitly passing an empty target_path should not be required, that can
# be the default if not specified.
# If source is an IO, just copy that object directly. target_path defaults
# to `source.name`. If source_collection is not None, that's a ValueError.
# If source is an ArvadosFile, can do manifest manipulation directly.
# If source is PathLike, follows the current implementation.
# With these changes, the current "copy file" recipe could be simplified to:
with src_coll.open('ExamplePath') as src_file:
dst_coll.copy(src_file)
def sharing_link(self, expires_at: datetime.datetime | datetime.timedelta ) -> str: ...
# expires_at is used for the created token. If it's a timedelta,
# that's relative to datetime.datetime.now().
# I'm not sure what module these go in.
def container_started(
client: ArvadosAPIClient,
container: str | Container | ContainerRequest,
) -> bool: ...
# If container is a str, it's a UUID for one of the other types, fetch that.
# If it's a container request, return False if container_uuid is None, else fetch that.
# `return status not in {'Queued', 'Locked'}`
def container_finished(
client: ArvadosAPIClient,
container: str | Container | ContainerRequest,
) -> bool: ...
# Same dance as above, `return status in {'Cancelled', 'Complete'}`
def container_succeeded(
client: ArvadosAPIClient,
container: str | Container | ContainerRequest,
success: typing.Container[int]=frozenset([0]),
) -> bool: ...
# Same dance as above, `return status == 'Complete' and exit_code in success`
def child_requests(
client: ArvadosAPIClient,
container: str | Container | ContainerRequest,
filters: list[list[str]]=[],
select: list[str]=[],
) -> Iterator[ContainerRequest]: ...
# Same dance as above, return the obvious `keyset_list_all` call.
def child_containers(
client: ArvadosAPIClient,
container: str | Container | ContainerRequest,
request_filters: list[list[str]]=[],
container_filters: list[list[str]]=[],
select: list[str]=[],
) -> Iterator[Container]: ...
# Same dance as above, call `child_requests` to get container_uuids, then
# return the obvious `keyset_list_all` call with those UUIDs.
# Note that `child_requests` can be called with a very narrow `select` for
# optimization, and can add the filter `container_uuid is not null`.
</code></pre>