Actions
Bug #22617
openarvados-dispatch-cloud got in a tight loop of retrying crunch-run when it failed immediately
Status:
New
Priority:
Normal
Assigned To:
-
Category:
Crunch
Target version:
-
Story points:
-
Description
Brett to add details, but this happened on tordo-xvhdp-kfkn39r3glw8n2y, and you can see it in tordo's a-d-c logs for 2025-02-26.
Updated by Tom Clegg 25 days ago
{"ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","InstanceTypes":"g4dnxlarge","PID":304509,"Priority":562948212891007462,"State":"Queued","level":"info","msg":"adding container to queue","time":"2025-02-26T02:49:01.451755748Z"} {"ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"creating new instance","time":"2025-02-26T02:49:01.502047832Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process started","time":"2025-02-26T02:49:06.488994241Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process ended","time":"2025-02-26T02:50:17.610689582Z"} {"ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","PID":304509,"Priority":562948212891007462,"State":"Locked","level":"info","msg":"requeueing locked container because crunch-run exited","time":"2025-02-26T02:50:21.875754840Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process started","time":"2025-02-26T02:50:32.015323461Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process ended","time":"2025-02-26T02:51:57.243769580Z"} {"ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","PID":304509,"Priority":562948212891007462,"State":"Locked","level":"info","msg":"requeueing locked container because crunch-run exited","time":"2025-02-26T02:52:02.405996488Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process started","time":"2025-02-26T02:52:07.387843463Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process ended","time":"2025-02-26T02:53:17.477243009Z"} {"ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","PID":304509,"Priority":562948212891007462,"State":"Locked","level":"info","msg":"requeueing locked container because crunch-run exited","time":"2025-02-26T02:53:22.812441405Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process started","time":"2025-02-26T02:53:32.949572697Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process ended","time":"2025-02-26T02:54:37.260987523Z"} {"ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","PID":304509,"Priority":562948212891007462,"State":"Locked","level":"info","msg":"requeueing locked container because crunch-run exited","time":"2025-02-26T02:54:43.238247124Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process started","time":"2025-02-26T02:54:53.375432657Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process ended","time":"2025-02-26T02:55:57.131335143Z"} {"ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","PID":304509,"Priority":562948212891007462,"State":"Locked","level":"info","msg":"requeueing locked container because crunch-run exited","time":"2025-02-26T02:56:03.683812785Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process started","time":"2025-02-26T02:56:13.824259732Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process ended","time":"2025-02-26T02:57:17.239692138Z"} {"ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","PID":304509,"Priority":562948212891007462,"State":"Locked","level":"info","msg":"requeueing locked container because crunch-run exited","time":"2025-02-26T02:57:24.124246532Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process started","time":"2025-02-26T02:57:34.258460911Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process ended","time":"2025-02-26T02:58:37.144604036Z"} {"ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","PID":304509,"Priority":562948212891007462,"State":"Locked","level":"info","msg":"requeueing locked container because crunch-run exited","time":"2025-02-26T02:58:44.560316352Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process started","time":"2025-02-26T02:58:54.701662518Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process ended","time":"2025-02-26T03:00:07.452990139Z"} {"ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","PID":304509,"Priority":562948212891007462,"State":"Locked","level":"info","msg":"requeueing locked container because crunch-run exited","time":"2025-02-26T03:00:15.063772714Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process started","time":"2025-02-26T03:00:25.200943323Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process ended","time":"2025-02-26T03:01:37.197851921Z"} {"ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","PID":304509,"Priority":562948212891007462,"State":"Locked","level":"info","msg":"requeueing locked container because crunch-run exited","time":"2025-02-26T03:01:45.545731863Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process started","time":"2025-02-26T03:01:55.696853472Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"Reason":"state=Cancelled","level":"info","msg":"killing crunch-run process","time":"2025-02-26T03:03:06.014959745Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"Signal":15,"level":"info","msg":"sending signal","time":"2025-02-26T03:03:06.015084807Z"} {"Address":"10.253.254.242","ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","Instance":"i-0afbd98f698296a88","InstanceType":"g4dnxlarge","PID":304509,"level":"info","msg":"crunch-run process ended","time":"2025-02-26T03:03:06.035275902Z"} {"ClusterID":"tordo","ContainerUUID":"tordo-dz642-45mf7na7mgozajk","PID":304509,"State":"Cancelled","level":"info","msg":"dropping container from queue","time":"2025-02-26T03:03:16.047103405Z"}
Actions