diff --git a/compute_worker/compute_worker.py b/compute_worker/compute_worker.py index c02abcb0f..52f2a1e5f 100644 --- a/compute_worker/compute_worker.py +++ b/compute_worker/compute_worker.py @@ -505,6 +505,8 @@ def __init__(self, run_args): max_retries=Retry( total=3, backoff_factor=1, + status_forcelist=[502, 503, 504], + allowed_methods=["PATCH", "GET", "PUT"], ) ) self.requests_session.mount("http://", adapter) @@ -618,7 +620,7 @@ def _update_submission(self, data): url = f"{self.submissions_api_url}/submissions/{self.submission_id}/" data["secret"] = self.secret - logger.info(f"Updating submission @ {url} with data = {data}") + logger.info(f"Updating submission @ {url}") resp = self.requests_session.patch(url, data=data, timeout=150) if resp.status_code == 200: @@ -639,8 +641,10 @@ def _update_status(self, status, extra_information=None): try: self._update_submission(data) except Exception as e: - # Always catch exception and never raise error + # Re-raise only for terminal statuses so Celery marks the task as failed. logger.exception(f"Failed to update submission status to {status}: {e}") + if status in ("Finished", "Failed"): + raise def _get_container_image(self, image_name): logger.info("Running pull for image: {}".format(image_name))