Project

General

Profile

Idea #21017 ยป container.py

Draft implementation - Brett Smith, 11/28/2023 03:38 PM

 
1
# Copyright (C) The Arvados Authors. All rights reserved.
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

    
5
import itertools
6

    
7
from typing import (
8
    Container,
9
    Iterator,
10
    List,
11
    Optional,
12
    Union,
13
)
14

    
15
import arvados.util
16

    
17
Filter = List[Union[None, bool, float, str, 'Filter']]
18

    
19
def lookup(
20
        client: 'arvados.api_resources.ArvadosAPIClient',
21
        container: Union[str, 'arvados.api_resources.Container', 'arvados.api_resources.ContainerRequest'],
22
) -> Optional['arvados.api_resources.Container']:
23
    """Retrieve and return an Arvados container
24

    
25
    `container` can be an Arvados UUID, an Arvados container request object, or
26
    an Arvados container object.
27

    
28
    If `container` is a UUID for one of the other types, get that object
29
    using `client`.
30

    
31
    Then if the object is a container, return it.
32

    
33
    Otherwise if the object is a container request, if the request has no
34
    `container_uuid` (indicating it has not run), return None. Otherwise,
35
    get the corresponding container object using `client`, and return that.
36

    
37
    Raises `ValueError` if `container` is not a valid input for any reason
38
    (malformed UUID, Arvados object missing fields, etc.).
39
    """
40
    if isinstance(container, str):
41
        if not arvados.util.uuid_pattern.fullmatch(container):
42
            raise ValueError(f"{container!r} is not a valid Arvados UUID")
43
        _, uuid_kind, _ = container.split('-')
44
        if uuid_kind == 'dz642':
45
            resource = client.containers()
46
        elif uuid_kind == 'xvhdp':
47
            resource = client.container_requests()
48
        else:
49
            raise ValueError(f"{container!r} is not a container or request UUID")
50
        container = resource.get(uuid=container).execute()
51
    try:
52
        match = arvados.util.uuid_pattern.fullmatch(container['uuid'])
53
    except (KeyError, TypeError):
54
        match = None
55
    if match is None:
56
        raise ValueError("object does not have a valid Arvados UUID")
57
    _, uuid_kind, _ = container['uuid'].split('-')
58
    if uuid_kind == 'dz642':
59
        return container
60
    elif uuid_kind != 'xvhdp':
61
        raise ValueError("object does not have a container or request UUID")
62
    else:
63
        try:
64
            container_uuid = container['container_uuid']
65
        except KeyError:
66
            raise ValueError("container request object missing container_uuid field") from None
67
        if container_uuid is None:
68
            return None
69
        try:
70
            match = arvados.util.container_uuid_pattern.fullmatch(container_uuid)
71
        except TypeError:
72
            match = None
73
        if match is None:
74
            raise ValueError(f"container request object has invalid container_uuid {container_uuid!r}")
75
        else:
76
            return client.containers().get(uuid=container_uuid).execute()
77

    
78
def container_started(
79
        client: 'arvados.api_resources.ArvadosAPIClient',
80
        container: Union[str, 'arvados.api_resources.Container', 'arvados.api_resources.ContainerRequest'],
81
) -> bool:
82
    container_obj = lookup(client, container)
83
    if container_obj is None:
84
        return False
85
    else:
86
        return container_obj['status'] not in {'Queued', 'Locked'}
87

    
88
def container_finished(
89
        client: 'arvados.api_resources.ArvadosAPIClient',
90
        container: Union[str, 'arvados.api_resources.Container', 'arvados.api_resources.ContainerRequest'],
91
) -> bool:
92
    container_obj = lookup(client, container)
93
    if container_obj is None:
94
        return False
95
    else:
96
        return container_obj['status'] in {'Cancelled', 'Complete'}
97

    
98
def container_succeeded(
99
        client: 'arvados.api_resources.ArvadosAPIClient',
100
        container: Union[str, 'arvados.api_resources.Container', 'arvados.api_resources.ContainerRequest'],
101
        success: Container[int]=frozenset([0]),
102
) -> bool:
103
    container_obj = lookup(client, container)
104
    return (
105
        container_obj is not None
106
        and container_obj['status'] == 'Complete'
107
        and container_obj['exit_code'] in success
108
    )
109

    
110
def child_requests(
111
        client: 'arvados.api_resources.ArvadosAPIClient',
112
        container: Union[str, 'arvados.api_resources.Container', 'arvados.api_resources.ContainerRequest'],
113
        filters: List[Filter]=[],
114
        select: Optional[List[str]]=None,
115
        depth: Optional[int]=None,
116
) -> Iterator['arvados.api_resources.ContainerRequest']:
117
    """Iterate child requests of an Arvados container or request
118

    
119
    Given an Arvados containter, request, or UUID, iterate the child requests
120
    of the corresponding container.
121

    
122
    `filters` and `select` correspond to the Arvados API `list` call. `filters`
123
    sets additional search criteria for requests to iterate. `select` chooses
124
    which fields are available in each request object.
125

    
126
    `depth` limits how many times the search recurses. For example `depth=1`
127
    will only iterate the immediate child requests of `container`, and not any
128
    of their children.
129
    """
130
    if select is not None and 'container_uuid' not in select:
131
        select = ['container_uuid', *select]
132
    if depth is None:
133
        depth_iter = itertools.repeat(None)
134
    else:
135
        depth_iter = range(depth - 1, -1, -1)
136
    container = lookup(client, container)
137
    if container is None:
138
        return
139
    container_uuids = {container['uuid']}
140
    for level in depth_iter:
141
        child_filters = [
142
            ['requesting_container_uuid', 'in', list(container_uuids)],
143
        ]
144
        container_uuids = set()
145
        for child in arvados.util.keyset_list_all(
146
                client.container_requests().list,
147
                filters=filters + child_filters,
148
                select=select,
149
        ):
150
            yield child
151
            container_uuids.add(child['container_uuid'])
152
        container_uuids.discard(None)
153
        if not container_uuids:
154
            break
155
        
156
def child_containers(
157
        client: 'arvados.api_resources.ArvadosAPIClient',
158
        container: Union[str, 'arvados.api_resources.Container', 'arvados.api_resources.ContainerRequest'],
159
        request_filters: List[Filter]=[],
160
        container_filters: List[Filter]=[],
161
        select: Optional[List[str]]=None,
162
        depth: Optional[int]=None,
163
) -> Iterator['arvados.api_resources.Container']:
164
    """Iterate child containers of an Arvados container or request
165

    
166
    Given an Arvados containter, request, or UUID, iterate the child containers
167
    of the corresponding container.
168

    
169
    `request_filters`, `container_filters`, and `select` correspond to the
170
    Arvados API `list` call. `request_filters` and `container_filters` set
171
    additional search criteria for requests to search and containers to
172
    iterate, respectively. `select` chooses which fields are available in
173
    each container object.
174

    
175
    `depth` limits how many times the search recurses. For example `depth=1`
176
    will only iterate the immediate child containers of `container`, and not any
177
    of their children.
178
    """
179
    if select is not None and 'uuid' not in select:
180
        select = ['uuid', *select]
181
    container_uuids = {req['container_uuid'] for req in child_requests(
182
            client, container,
183
            filters=request_filters + [['container_uuid', '!=', None]],
184
            select=['container_uuid'],
185
            depth=depth,
186
    )}
187
    container_uuids.discard(None)
188
    while container_uuids:
189
        next_uuids = list(itertools.islice(container_uuids, 100))
190
        for container in arvados.util.keyset_list_all(
191
                client.containers().list,
192
                filters=container_filters + [['uuid', 'in', next_uuids]],
193
                select=select,
194
        ):
195
            yield container
196
            container_uuids.remove(container['uuid'])
197

    
198
if __name__ == '__main__':
199
    import arvados, argparse, csv, functools, re, sys
200
    client = arvados.api('v1')
201
    parser = argparse.ArgumentParser(
202
        description="Write a CSV report of all child containers of an Arvados container",
203
        epilog="This tool demonstrates functions that walk an Arvados container hierarchy.",
204
    )
205
    parser.add_argument(
206
        '--fields', '-f',
207
        type=re.compile(r'\W+', re.ASCII).split,
208
        default=[
209
            'uuid',
210
            'state',
211
            'started_at',
212
            'finished_at',
213
            'exit_code',
214
            'output',
215
            'log',
216
        ],
217
        help="""container fields to include in the report,
218
        separated by commas or whitespace""",
219
    )
220
    parser.add_argument(
221
        'containers',
222
        nargs=argparse.REMAINDER,
223
        metavar='UUID',
224
        type=functools.partial(lookup, client),
225
        help="UUID(s) of container or request object(s) to report",
226
    )
227
    args = parser.parse_args()
228
    out_csv = csv.DictWriter(
229
        sys.stdout,
230
        extrasaction='ignore',
231
        fieldnames=args.fields,
232
    )
233
    out_csv.writeheader()
234
    for container in args.containers:
235
        out_csv.writerows(child_containers(client, container, select=args.fields))
    (1-1/1)