Bug #15694
Updated by Peter Amstutz about 5 years ago
The workflow status polling thread got "502 Bad Gateway", the workflow runner stopped monitoring/submitting jobs but didn't fail either, it hung:
<pre>
2019-10-07T09:13:21.337649988Z arvados.cwl-runner INFO: Getting current container: <HttpError 502 when requesting https://qr1hi.arvadosapi.com/arvados/v1/containers/current?alt=json returned "Bad Gateway">
2019-10-07T09:13:21.337649988Z arvados.cwl-runner ERROR: Error checking states on API server: %s
2019-10-07T09:13:21.337649988Z Traceback (most recent call last):
2019-10-07T09:13:21.337649988Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/arvados_cwl/executor.py", line 369, in poll_states
2019-10-07T09:13:21.337649988Z proc_states = table.list(filters=[["uuid", "in", page]]).execute(num_retries=self.num_retries)
2019-10-07T09:13:21.337649988Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/googleapiclient/_helpers.py", line 130, in positional_wrapper
2019-10-07T09:13:21.337649988Z return wrapped(*args, **kwargs)
2019-10-07T09:13:21.337649988Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/googleapiclient/http.py", line 835, in execute
2019-10-07T09:13:21.337649988Z method=str(self.method), body=self.body, headers=self.headers)
2019-10-07T09:13:21.337649988Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/googleapiclient/http.py", line 157, in _retry_request
2019-10-07T09:13:21.337649988Z resp.status if resp else exception)
2019-10-07T09:13:21.337649988Z File "/usr/lib/python2.7/logging/__init__.py", line 1179, in warning
2019-10-07T09:13:21.337649988Z self._log(WARNING, msg, args, **kwargs)
2019-10-07T09:13:21.337649988Z File "/usr/lib/python2.7/logging/__init__.py", line 1286, in _log
2019-10-07T09:13:21.337649988Z self.handle(record)
2019-10-07T09:13:21.337649988Z File "/usr/lib/python2.7/logging/__init__.py", line 1296, in handle
2019-10-07T09:13:21.337649988Z self.callHandlers(record)
2019-10-07T09:13:21.337649988Z File "/usr/lib/python2.7/logging/__init__.py", line 1336, in callHandlers
2019-10-07T09:13:21.337649988Z hdlr.handle(record)
2019-10-07T09:13:21.337649988Z File "/usr/lib/python2.7/logging/__init__.py", line 759, in handle
2019-10-07T09:13:21.337649988Z self.emit(record)
2019-10-07T09:13:21.337649988Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/arvados_cwl/executor.py", line 86, in emit
2019-10-07T09:13:21.337649988Z "%s: %s" % (record.name, record.getMessage())
2019-10-07T09:13:21.337649988Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/arvados_cwl/executor.py", line 259, in runtime_status_update
2019-10-07T09:13:21.337649988Z current = arvados_cwl.util.get_current_container(self.api, self.num_retries, logger)
2019-10-07T09:13:21.337649988Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/arvados_cwl/util.py", line 34, in get_current_container
2019-10-07T09:13:21.337649988Z raise e
2019-10-07T09:13:21.337649988Z ApiError: <HttpError 502 when requesting https://qr1hi.arvadosapi.com/arvados/v1/containers/current?alt=json returned "Bad Gateway">
2019-10-07T09:13:29.060662282Z arvados.cwl-runner INFO: Getting current container: <HttpError 502 when requesting https://qr1hi.arvadosapi.com/arvados/v1/containers/current?alt=json returned "Bad Gateway">
2019-10-07T09:13:29.060662282Z arvados.cwl-runner ERROR: Fatal error in state polling thread.
2019-10-07T09:13:29.060662282Z Traceback (most recent call last):
2019-10-07T09:13:29.060662282Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/arvados_cwl/executor.py", line 371, in poll_states
2019-10-07T09:13:29.060662282Z logger.exception("Error checking states on API server: %s")
2019-10-07T09:13:29.060662282Z File "/usr/lib/python2.7/logging/__init__.py", line 1200, in exception
2019-10-07T09:13:29.060662282Z self.error(msg, *args, **kwargs)
2019-10-07T09:13:29.060662282Z File "/usr/lib/python2.7/logging/__init__.py", line 1193, in error
2019-10-07T09:13:29.060662282Z self._log(ERROR, msg, args, **kwargs)
2019-10-07T09:13:29.060662282Z File "/usr/lib/python2.7/logging/__init__.py", line 1286, in _log
2019-10-07T09:13:29.060662282Z self.handle(record)
2019-10-07T09:13:29.060662282Z File "/usr/lib/python2.7/logging/__init__.py", line 1296, in handle
2019-10-07T09:13:29.060662282Z self.callHandlers(record)
2019-10-07T09:13:29.060662282Z File "/usr/lib/python2.7/logging/__init__.py", line 1336, in callHandlers
2019-10-07T09:13:29.060662282Z hdlr.handle(record)
2019-10-07T09:13:29.060662282Z File "/usr/lib/python2.7/logging/__init__.py", line 759, in handle
2019-10-07T09:13:29.060662282Z self.emit(record)
2019-10-07T09:13:29.060662282Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/arvados_cwl/executor.py", line 86, in emit
2019-10-07T09:13:29.060662282Z "%s: %s" % (record.name, record.getMessage())
2019-10-07T09:13:29.060662282Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/arvados_cwl/executor.py", line 259, in runtime_status_update
2019-10-07T09:13:29.060662282Z current = arvados_cwl.util.get_current_container(self.api, self.num_retries, logger)
2019-10-07T09:13:29.060662282Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/arvados_cwl/util.py", line 34, in get_current_container
2019-10-07T09:13:29.060662282Z raise e
2019-10-07T09:13:29.060662282Z ApiError: <HttpError 502 when requesting https://qr1hi.arvadosapi.com/arvados/v1/containers/current?alt=json returned "Bad Gateway">
2019-10-07T09:13:43.365414753Z arvados.cwl-runner INFO: Getting current container: <HttpError 502 when requesting https://qr1hi.arvadosapi.com/arvados/v1/containers/current?alt=json returned "Bad Gateway">
2019-10-07T09:13:43.365414753Z Exception in thread Thread-22:
2019-10-07T09:13:43.365414753Z Traceback (most recent call last):
2019-10-07T09:13:43.365414753Z File "/usr/lib/python2.7/threading.py", line 801, in __bootstrap_inner
2019-10-07T09:13:43.365414753Z self.run()
2019-10-07T09:13:43.365414753Z File "/usr/lib/python2.7/threading.py", line 754, in run
2019-10-07T09:13:43.365414753Z self.__target(*self.__args, **self.__kwargs)
2019-10-07T09:13:43.365414753Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/arvados_cwl/executor.py", line 386, in poll_states
2019-10-07T09:13:43.365414753Z logger.exception("Fatal error in state polling thread.")
2019-10-07T09:13:43.365414753Z File "/usr/lib/python2.7/logging/__init__.py", line 1200, in exception
2019-10-07T09:13:43.365414753Z self.error(msg, *args, **kwargs)
2019-10-07T09:13:43.365414753Z File "/usr/lib/python2.7/logging/__init__.py", line 1193, in error
2019-10-07T09:13:43.365414753Z self._log(ERROR, msg, args, **kwargs)
2019-10-07T09:13:43.365414753Z File "/usr/lib/python2.7/logging/__init__.py", line 1286, in _log
2019-10-07T09:13:43.365414753Z self.handle(record)
2019-10-07T09:13:43.365414753Z File "/usr/lib/python2.7/logging/__init__.py", line 1296, in handle
2019-10-07T09:13:43.365414753Z self.callHandlers(record)
2019-10-07T09:13:43.365414753Z File "/usr/lib/python2.7/logging/__init__.py", line 1336, in callHandlers
2019-10-07T09:13:43.365414753Z hdlr.handle(record)
2019-10-07T09:13:43.365414753Z File "/usr/lib/python2.7/logging/__init__.py", line 759, in handle
2019-10-07T09:13:43.365414753Z self.emit(record)
2019-10-07T09:13:43.365414753Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/arvados_cwl/executor.py", line 86, in emit
2019-10-07T09:13:43.365414753Z "%s: %s" % (record.name, record.getMessage())
2019-10-07T09:13:43.365414753Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/arvados_cwl/executor.py", line 259, in runtime_status_update
2019-10-07T09:13:43.365414753Z current = arvados_cwl.util.get_current_container(self.api, self.num_retries, logger)
2019-10-07T09:13:43.365414753Z File "/usr/share/python2.7/dist/python-arvados-cwl-runner/lib/python2.7/site-packages/arvados_cwl/util.py", line 34, in get_current_container
2019-10-07T09:13:43.365414753Z raise e
2019-10-07T09:13:43.365414753Z ApiError: <HttpError 502 when requesting https://qr1hi.arvadosapi.com/arvados/v1/containers/current?alt=json returned "Bad Gateway">
2019-10-07T09:13:43.365414753Z
</pre>