Idea #20852
Updated by Brett Smith over 1 year ago
A lot of code in the "Python SDK cookbook":https://doc.arvados.org/v2.6/sdk/python/cookbook.html cookbook":FIXME that coordinates low-level API calls should just be methods that are defined in the SDK itself. Why force every user to copy-paste these code snippets when we can just give them the method directly? For recipes that I think are strong candidates for inclusion, here's are some rough sketches of what the method signatures could look like, along with implementation notes. <pre><code class="python"> class Collection: def download(self, src: PathLike, dst: PathLike | None=None) -> None: ... # If src is a PurePath, generate a manifest path with .as_posix(). # If dst is None, default to the src basename. def upload(self, src: PathLike, dst: PathLike | None=None) -> None: ... # Same argument handling as download, except now we use dst.as_posix(). def iter_files(self, depth: int | None=None) -> Iterator[ArvadosFile]: ... # depth limits recursion, None means no limit. def iter_streams(self, depth: int | None=None) -> Iterator[Collection]: ... def iter_contents(self, depth: int | None=None) -> Iterator[ArvadosFile | Collection]: ... def copy( self, source: PathLike | IO, target_path: PathLike | None=None, source_collection: Collection | None=None, overwrite: bool=False, ) -> None: ... # Proposed expansion of the existing copy method signature. # Explicitly passing an empty target_path should not be required, that can # be the default if not specified. # If source is an IO, just copy that object directly. target_path defaults # to `source.name`. If source_collection is not None, that's a ValueError. # If source is an ArvadosFile, can do manifest manipulation directly. # If source is PathLike, follows the current implementation. # With these changes, the current "copy file" recipe could be simplified to: with src_coll.open('ExamplePath') as src_file: dst_coll.copy(src_file) def sharing_link(self) -> str: ... # I'm not sure what module these go in. def container_started( client: ArvadosAPIClient, container: str | Container | ContainerRequest, ) -> bool: ... # If container is a str, it's a UUID for one of the other types, fetch that. # If it's a container request, return False if container_uuid is None, else fetch that. # `return status not in {'Queued', 'Locked'}` def container_finished( client: ArvadosAPIClient, container: str | Container | ContainerRequest, ) -> bool: ... # Same dance as above, `return status in {'Cancelled', 'Complete'}` def container_succeeded( client: ArvadosAPIClient, container: str | Container | ContainerRequest, success: typing.Container[int]=frozenset([0]), ) -> bool: ... # Same dance as above, `return status == 'Complete' and exit_code in success` def child_requests( client: ArvadosAPIClient, container: str | Container | ContainerRequest, filters: list[list[str]]=[], select: list[str]=[], ) -> Iterator[ContainerRequest]: ... # Same dance as above, return the obvious `keyset_list_all` call. def child_containers( client: ArvadosAPIClient, container: str | Container | ContainerRequest, request_filters: list[list[str]]=[], container_filters: list[list[str]]=[], select: list[str]=[], ) -> Iterator[Container]: ... # Same dance as above, call `child_requests` to get container_uuids, then # return the obvious `keyset_list_all` call with those UUIDs. # Note that `child_requests` can be called with a very narrow `select` for # optimization, and can add the filter `container_uuid is not null`. </code></pre>