Project

General

Profile

Idea #21017 ยป container.py

Draft implementation - Brett Smith, 11/28/2023 03:38 PM

 
# Copyright (C) The Arvados Authors. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

import itertools

from typing import (
Container,
Iterator,
List,
Optional,
Union,
)

import arvados.util

Filter = List[Union[None, bool, float, str, 'Filter']]

def lookup(
client: 'arvados.api_resources.ArvadosAPIClient',
container: Union[str, 'arvados.api_resources.Container', 'arvados.api_resources.ContainerRequest'],
) -> Optional['arvados.api_resources.Container']:
"""Retrieve and return an Arvados container

`container` can be an Arvados UUID, an Arvados container request object, or
an Arvados container object.

If `container` is a UUID for one of the other types, get that object
using `client`.

Then if the object is a container, return it.

Otherwise if the object is a container request, if the request has no
`container_uuid` (indicating it has not run), return None. Otherwise,
get the corresponding container object using `client`, and return that.

Raises `ValueError` if `container` is not a valid input for any reason
(malformed UUID, Arvados object missing fields, etc.).
"""
if isinstance(container, str):
if not arvados.util.uuid_pattern.fullmatch(container):
raise ValueError(f"{container!r} is not a valid Arvados UUID")
_, uuid_kind, _ = container.split('-')
if uuid_kind == 'dz642':
resource = client.containers()
elif uuid_kind == 'xvhdp':
resource = client.container_requests()
else:
raise ValueError(f"{container!r} is not a container or request UUID")
container = resource.get(uuid=container).execute()
try:
match = arvados.util.uuid_pattern.fullmatch(container['uuid'])
except (KeyError, TypeError):
match = None
if match is None:
raise ValueError("object does not have a valid Arvados UUID")
_, uuid_kind, _ = container['uuid'].split('-')
if uuid_kind == 'dz642':
return container
elif uuid_kind != 'xvhdp':
raise ValueError("object does not have a container or request UUID")
else:
try:
container_uuid = container['container_uuid']
except KeyError:
raise ValueError("container request object missing container_uuid field") from None
if container_uuid is None:
return None
try:
match = arvados.util.container_uuid_pattern.fullmatch(container_uuid)
except TypeError:
match = None
if match is None:
raise ValueError(f"container request object has invalid container_uuid {container_uuid!r}")
else:
return client.containers().get(uuid=container_uuid).execute()

def container_started(
client: 'arvados.api_resources.ArvadosAPIClient',
container: Union[str, 'arvados.api_resources.Container', 'arvados.api_resources.ContainerRequest'],
) -> bool:
container_obj = lookup(client, container)
if container_obj is None:
return False
else:
return container_obj['status'] not in {'Queued', 'Locked'}

def container_finished(
client: 'arvados.api_resources.ArvadosAPIClient',
container: Union[str, 'arvados.api_resources.Container', 'arvados.api_resources.ContainerRequest'],
) -> bool:
container_obj = lookup(client, container)
if container_obj is None:
return False
else:
return container_obj['status'] in {'Cancelled', 'Complete'}

def container_succeeded(
client: 'arvados.api_resources.ArvadosAPIClient',
container: Union[str, 'arvados.api_resources.Container', 'arvados.api_resources.ContainerRequest'],
success: Container[int]=frozenset([0]),
) -> bool:
container_obj = lookup(client, container)
return (
container_obj is not None
and container_obj['status'] == 'Complete'
and container_obj['exit_code'] in success
)

def child_requests(
client: 'arvados.api_resources.ArvadosAPIClient',
container: Union[str, 'arvados.api_resources.Container', 'arvados.api_resources.ContainerRequest'],
filters: List[Filter]=[],
select: Optional[List[str]]=None,
depth: Optional[int]=None,
) -> Iterator['arvados.api_resources.ContainerRequest']:
"""Iterate child requests of an Arvados container or request

Given an Arvados containter, request, or UUID, iterate the child requests
of the corresponding container.

`filters` and `select` correspond to the Arvados API `list` call. `filters`
sets additional search criteria for requests to iterate. `select` chooses
which fields are available in each request object.

`depth` limits how many times the search recurses. For example `depth=1`
will only iterate the immediate child requests of `container`, and not any
of their children.
"""
if select is not None and 'container_uuid' not in select:
select = ['container_uuid', *select]
if depth is None:
depth_iter = itertools.repeat(None)
else:
depth_iter = range(depth - 1, -1, -1)
container = lookup(client, container)
if container is None:
return
container_uuids = {container['uuid']}
for level in depth_iter:
child_filters = [
['requesting_container_uuid', 'in', list(container_uuids)],
]
container_uuids = set()
for child in arvados.util.keyset_list_all(
client.container_requests().list,
filters=filters + child_filters,
select=select,
):
yield child
container_uuids.add(child['container_uuid'])
container_uuids.discard(None)
if not container_uuids:
break
def child_containers(
client: 'arvados.api_resources.ArvadosAPIClient',
container: Union[str, 'arvados.api_resources.Container', 'arvados.api_resources.ContainerRequest'],
request_filters: List[Filter]=[],
container_filters: List[Filter]=[],
select: Optional[List[str]]=None,
depth: Optional[int]=None,
) -> Iterator['arvados.api_resources.Container']:
"""Iterate child containers of an Arvados container or request

Given an Arvados containter, request, or UUID, iterate the child containers
of the corresponding container.

`request_filters`, `container_filters`, and `select` correspond to the
Arvados API `list` call. `request_filters` and `container_filters` set
additional search criteria for requests to search and containers to
iterate, respectively. `select` chooses which fields are available in
each container object.

`depth` limits how many times the search recurses. For example `depth=1`
will only iterate the immediate child containers of `container`, and not any
of their children.
"""
if select is not None and 'uuid' not in select:
select = ['uuid', *select]
container_uuids = {req['container_uuid'] for req in child_requests(
client, container,
filters=request_filters + [['container_uuid', '!=', None]],
select=['container_uuid'],
depth=depth,
)}
container_uuids.discard(None)
while container_uuids:
next_uuids = list(itertools.islice(container_uuids, 100))
for container in arvados.util.keyset_list_all(
client.containers().list,
filters=container_filters + [['uuid', 'in', next_uuids]],
select=select,
):
yield container
container_uuids.remove(container['uuid'])

if __name__ == '__main__':
import arvados, argparse, csv, functools, re, sys
client = arvados.api('v1')
parser = argparse.ArgumentParser(
description="Write a CSV report of all child containers of an Arvados container",
epilog="This tool demonstrates functions that walk an Arvados container hierarchy.",
)
parser.add_argument(
'--fields', '-f',
type=re.compile(r'\W+', re.ASCII).split,
default=[
'uuid',
'state',
'started_at',
'finished_at',
'exit_code',
'output',
'log',
],
help="""container fields to include in the report,
separated by commas or whitespace""",
)
parser.add_argument(
'containers',
nargs=argparse.REMAINDER,
metavar='UUID',
type=functools.partial(lookup, client),
help="UUID(s) of container or request object(s) to report",
)
args = parser.parse_args()
out_csv = csv.DictWriter(
sys.stdout,
extrasaction='ignore',
fieldnames=args.fields,
)
out_csv.writeheader()
for container in args.containers:
out_csv.writerows(child_containers(client, container, select=args.fields))
    (1-1/1)