summaryrefslogtreecommitdiff
path: root/.gitlab-ci
diff options
context:
space:
mode:
authorGuilherme Gallo <guilherme.gallo@collabora.com>2023-04-18 00:18:12 -0300
committerMarge Bot <emma+marge@anholt.net>2023-04-19 14:36:37 +0000
commit11a97b644cbc2f8906f6493b65135374858331a4 (patch)
tree79207c673bb63bed9f2d9b2a8b0730951cd1546a /.gitlab-ci
parent710b568dcdab41963f953599acc2497578a3b82e (diff)
downloadmesa-11a97b644cbc2f8906f6493b65135374858331a4.tar.gz
ci/lava: Refactor LAVAJobSubmitter and add tests
Some refactoring was needed to make LAVAJobSubmitter class testable via pytest. Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22500>
Diffstat (limited to '.gitlab-ci')
-rw-r--r--.gitlab-ci/lava/exceptions.py3
-rwxr-xr-x.gitlab-ci/lava/lava_job_submitter.py127
-rw-r--r--.gitlab-ci/lava/utils/lava_job.py21
-rw-r--r--.gitlab-ci/tests/test_lava_job_submitter.py87
4 files changed, 192 insertions, 46 deletions
diff --git a/.gitlab-ci/lava/exceptions.py b/.gitlab-ci/lava/exceptions.py
index 3c9a63eb3c6..f877b024510 100644
--- a/.gitlab-ci/lava/exceptions.py
+++ b/.gitlab-ci/lava/exceptions.py
@@ -12,9 +12,10 @@ class MesaCITimeoutError(MesaCIException):
class MesaCIRetryError(MesaCIException):
- def __init__(self, *args, retry_count: int) -> None:
+ def __init__(self, *args, retry_count: int, last_job: None) -> None:
super().__init__(*args)
self.retry_count = retry_count
+ self.last_job = last_job
class MesaCIParseException(MesaCIException):
diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py
index 39d07deac40..064219de782 100755
--- a/.gitlab-ci/lava/lava_job_submitter.py
+++ b/.gitlab-ci/lava/lava_job_submitter.py
@@ -18,7 +18,7 @@ from collections import defaultdict
from dataclasses import dataclass, fields
from datetime import datetime, timedelta
from io import StringIO
-from os import getenv
+from os import environ, getenv, path
from typing import Any, Optional
import fire
@@ -282,6 +282,7 @@ def print_job_final_status(job):
def execute_job_with_retries(
proxy, job_definition, retry_count, jobs_log
) -> Optional[LAVAJob]:
+ last_failed_job = None
for attempt_no in range(1, retry_count + 2):
# Need to get the logger value from its object to enable autosave
# features, if AutoSaveDict is enabled from StructuredLogging module
@@ -290,41 +291,52 @@ def execute_job_with_retries(
job = LAVAJob(proxy, job_definition, job_log)
STRUCTURAL_LOG["dut_attempt_counter"] = attempt_no
try:
+ job_log["submitter_start_time"] = datetime.now().isoformat()
submit_job(job)
wait_for_job_get_started(job)
log_follower: LogFollower = bootstrap_log_follower()
follow_job_execution(job, log_follower)
return job
+
except (MesaCIException, KeyboardInterrupt) as exception:
job.handle_exception(exception)
+
+ finally:
+ print_job_final_status(job)
+ # If LAVA takes too long to post process the job, the submitter
+ # gives up and proceeds.
+ job_log["submitter_end_time"] = datetime.now().isoformat()
+ last_failed_job = job
print_log(
f"{CONSOLE_LOG['BOLD']}"
f"Finished executing LAVA job in the attempt #{attempt_no}"
f"{CONSOLE_LOG['RESET']}"
)
- finally:
- job_log["finished_time"] = datetime.now().isoformat()
- print_job_final_status(job)
+
+ return last_failed_job
def retriable_follow_job(proxy, job_definition) -> LAVAJob:
number_of_retries = NUMBER_OF_RETRIES_TIMEOUT_DETECTION
- if finished_job := execute_job_with_retries(
+ last_attempted_job = execute_job_with_retries(
proxy, job_definition, number_of_retries, STRUCTURAL_LOG["dut_jobs"]
- ):
- return finished_job
-
- # Job failed in all attempts
- raise MesaCIRetryError(
- f"{CONSOLE_LOG['BOLD']}"
- f"{CONSOLE_LOG['FG_RED']}"
- "Job failed after it exceeded the number of "
- f"{number_of_retries} retries."
- f"{CONSOLE_LOG['RESET']}",
- retry_count=number_of_retries,
)
+ if last_attempted_job.exception is not None:
+ # Infra failed in all attempts
+ raise MesaCIRetryError(
+ f"{CONSOLE_LOG['BOLD']}"
+ f"{CONSOLE_LOG['FG_RED']}"
+ "Job failed after it exceeded the number of "
+ f"{number_of_retries} retries."
+ f"{CONSOLE_LOG['RESET']}",
+ retry_count=number_of_retries,
+ last_job=last_attempted_job,
+ )
+
+ return last_attempted_job
+
@dataclass
class PathResolver:
@@ -371,20 +383,9 @@ class LAVAJobSubmitter(PathResolver):
return
self.__structured_log_context = StructuredLoggerWrapper(self).logger_context()
+ self.proxy = setup_lava_proxy()
- def dump(self, job_definition):
- if self.dump_yaml:
- with GitlabSection(
- "yaml_dump",
- "LAVA job definition (YAML)",
- type=LogSectionType.LAVA_BOOT,
- start_collapsed=True,
- ):
- print(hide_sensitive_data(job_definition))
-
- def submit(self):
- proxy = setup_lava_proxy()
-
+ def __prepare_submission(self) -> str:
# Overwrite the timeout for the testcases with the value offered by the
# user. The testcase running time should be at least 4 times greater than
# the other sections (boot and setup), so we can safely ignore them.
@@ -398,27 +399,81 @@ class LAVAJobSubmitter(PathResolver):
lava_yaml.dump(generate_lava_yaml_payload(self), job_definition_stream)
job_definition = job_definition_stream.getvalue()
- self.dump(job_definition)
+ if self.dump_yaml:
+ self.dump_job_definition(job_definition)
- job = LAVAJob(proxy, job_definition)
- if errors := job.validate():
+ validation_job = LAVAJob(self.proxy, job_definition)
+ if errors := validation_job.validate():
fatal_err(f"Error in LAVA job definition: {errors}")
print_log("LAVA job definition validated successfully")
+ return job_definition
+
+ @classmethod
+ def is_under_ci(cls):
+ ci_envvar: str = getenv("CI", "false")
+ return ci_envvar.lower() == "true"
+
+ def dump_job_definition(self, job_definition) -> None:
+ with GitlabSection(
+ "yaml_dump",
+ "LAVA job definition (YAML)",
+ type=LogSectionType.LAVA_BOOT,
+ start_collapsed=True,
+ ):
+ print(hide_sensitive_data(job_definition))
+
+ def submit(self) -> None:
+ """
+ Prepares and submits the LAVA job.
+ If `validate_only` is True, it validates the job without submitting it.
+ If the job finishes with a non-pass status or encounters an exception,
+ the program exits with a non-zero return code.
+ """
+ job_definition: str = self.__prepare_submission()
+
if self.validate_only:
return
with self.__structured_log_context:
+ last_attempt_job = None
try:
- finished_job = retriable_follow_job(proxy, job_definition)
- exit_code = 0 if finished_job.status == "pass" else 1
- STRUCTURAL_LOG["job_combined_status"] = job.status
- sys.exit(exit_code)
+ last_attempt_job = retriable_follow_job(self.proxy, job_definition)
+
+ except MesaCIRetryError as retry_exception:
+ last_attempt_job = retry_exception.last_job
except Exception as exception:
STRUCTURAL_LOG["job_combined_fail_reason"] = str(exception)
raise exception
+ finally:
+ self.finish_script(last_attempt_job)
+
+ def print_log_artifact_url(self):
+ base_url = "https://$CI_PROJECT_ROOT_NAMESPACE.pages.freedesktop.org/"
+ artifacts_path = "-/$CI_PROJECT_NAME/-/jobs/$CI_JOB_ID/artifacts/"
+ relative_log_path = self.structured_log_file.relative_to(pathlib.Path.cwd())
+ full_path = f"{base_url}{artifacts_path}{relative_log_path}"
+ artifact_url = path.expandvars(full_path)
+
+ print_log(f"Structural Logging data available at: {artifact_url}")
+
+ def finish_script(self, last_attempt_job):
+ if self.is_under_ci() and self.structured_log_file:
+ self.print_log_artifact_url()
+
+ if not last_attempt_job:
+ # No job was run, something bad happened
+ STRUCTURAL_LOG["job_combined_status"] = "script_crash"
+ current_exception = str(sys.exc_info()[0])
+ STRUCTURAL_LOG["job_combined_fail_reason"] = current_exception
+ raise SystemExit(1)
+
+ STRUCTURAL_LOG["job_combined_status"] = last_attempt_job.status
+
+ if last_attempt_job.status != "pass":
+ raise SystemExit(1)
class StructuredLoggerWrapper:
def __init__(self, submitter: LAVAJobSubmitter) -> None:
diff --git a/.gitlab-ci/lava/utils/lava_job.py b/.gitlab-ci/lava/utils/lava_job.py
index 04d5300e8f2..b69f8b9fbb7 100644
--- a/.gitlab-ci/lava/utils/lava_job.py
+++ b/.gitlab-ci/lava/utils/lava_job.py
@@ -34,6 +34,7 @@ class LAVAJob:
self._is_finished = False
self.log: dict[str, Any] = log
self.status = "not_submitted"
+ self.__exception: Optional[str] = None
def heartbeat(self) -> None:
self.last_log_time: datetime = datetime.now()
@@ -61,6 +62,15 @@ class LAVAJob:
def is_finished(self) -> bool:
return self._is_finished
+ @property
+ def exception(self) -> str:
+ return self.__exception
+
+ @exception.setter
+ def exception(self, exception: Exception) -> None:
+ self.__exception = repr(exception)
+ self.log["dut_job_fail_reason"] = self.__exception
+
def validate(self) -> Optional[dict]:
"""Returns a dict with errors, if the validation fails.
@@ -158,6 +168,10 @@ class LAVAJob:
def handle_exception(self, exception: Exception):
print_log(exception)
+ self.cancel()
+ self.exception = exception
+
+ # Give more accurate status depending on exception
if isinstance(exception, MesaCIKnownIssueException):
self.status = "canceled"
elif isinstance(exception, MesaCITimeoutError):
@@ -165,11 +179,8 @@ class LAVAJob:
elif isinstance(exception, MesaCIException):
self.status = "failed"
elif isinstance(exception, KeyboardInterrupt):
- self.status = "canceled_by_user"
+ self.status = "interrupted"
print_log("LAVA job submitter was interrupted. Cancelling the job.")
- raise exception
+ raise
else:
self.status = "job_submitter_error"
-
- self.cancel()
- self.log["dut_job_fail_reason"] = str(exception)
diff --git a/.gitlab-ci/tests/test_lava_job_submitter.py b/.gitlab-ci/tests/test_lava_job_submitter.py
index 7ef2255550c..f8208a3a781 100644
--- a/.gitlab-ci/tests/test_lava_job_submitter.py
+++ b/.gitlab-ci/tests/test_lava_job_submitter.py
@@ -5,11 +5,13 @@
#
# SPDX-License-Identifier: MIT
+import os
import xmlrpc.client
from contextlib import nullcontext as does_not_raise
from datetime import datetime
from itertools import chain, repeat
from pathlib import Path
+from unittest.mock import MagicMock, patch
import pytest
from lava.exceptions import MesaCIException, MesaCIRetryError
@@ -50,6 +52,32 @@ def mock_proxy_waiting_time(mock_proxy):
return update_mock_proxy
+@pytest.fixture(params=[{"CI": "true"}, {"CI": "false"}], ids=["Under CI", "Local run"])
+def ci_environment(request):
+ with patch.dict(os.environ, request.param):
+ yield
+
+
+@pytest.fixture
+def lava_job_submitter(
+ ci_environment,
+ tmp_path,
+ mock_proxy,
+):
+ os.chdir(tmp_path)
+ tmp_file = Path(tmp_path) / "log.json"
+
+ with patch("lava.lava_job_submitter.setup_lava_proxy") as mock_setup_lava_proxy:
+ mock_setup_lava_proxy.return_value = mock_proxy()
+ yield LAVAJobSubmitter(
+ boot_method="test_boot",
+ ci_project_dir="test_dir",
+ device_type="test_device",
+ job_timeout_min=1,
+ structured_log_file=tmp_file,
+ )
+
+
@pytest.mark.parametrize("exception", [RuntimeError, SystemError, KeyError])
def test_submit_and_follow_respects_exceptions(mock_sleep, mock_proxy, exception):
with pytest.raises(MesaCIException):
@@ -301,7 +329,7 @@ def test_parse_job_result_from_log(message, expectation, mock_proxy):
@pytest.mark.slow(
reason="Slow and sketchy test. Needs a LAVA log raw file at /tmp/log.yaml"
)
-def test_full_yaml_log(mock_proxy, frozen_time, tmp_path):
+def test_full_yaml_log(mock_proxy, frozen_time, lava_job_submitter):
import random
from lavacli.utils import flow_yaml as lava_yaml
@@ -352,10 +380,61 @@ def test_full_yaml_log(mock_proxy, frozen_time, tmp_path):
def reset_logs(*args):
proxy.scheduler.jobs.logs.side_effect = load_lines()
- tmp_file = Path(tmp_path) / "log.json"
- LAVAJobSubmitter(structured_log_file=tmp_file)
proxy.scheduler.jobs.submit = reset_logs
with pytest.raises(MesaCIRetryError):
time_travel_to_test_time()
+ lava_job_submitter.submit()
retriable_follow_job(proxy, "")
- print(tmp_file.read_text())
+ print(lava_job_submitter.structured_log_file.read_text())
+
+
+@pytest.mark.parametrize(
+ "validate_only,finished_job_status,expected_combined_status,expected_exit_code",
+ [
+ (True, "pass", None, None),
+ (False, "pass", "pass", 0),
+ (False, "fail", "fail", 1),
+ ],
+ ids=[
+ "validate_only_no_job_submission",
+ "successful_job_submission",
+ "failed_job_submission",
+ ],
+)
+def test_job_combined_status(
+ lava_job_submitter,
+ validate_only,
+ finished_job_status,
+ expected_combined_status,
+ expected_exit_code,
+):
+ lava_job_submitter.validate_only = validate_only
+
+ with patch(
+ "lava.lava_job_submitter.retriable_follow_job"
+ ) as mock_retriable_follow_job, patch(
+ "lava.lava_job_submitter.LAVAJobSubmitter._LAVAJobSubmitter__prepare_submission"
+ ) as mock_prepare_submission, patch(
+ "sys.exit"
+ ):
+ from lava.lava_job_submitter import STRUCTURAL_LOG
+
+ mock_retriable_follow_job.return_value = MagicMock(status=finished_job_status)
+
+ mock_job_definition = MagicMock(spec=str)
+ mock_prepare_submission.return_value = mock_job_definition
+ original_status: str = STRUCTURAL_LOG.get("job_combined_status")
+
+ if validate_only:
+ lava_job_submitter.submit()
+ mock_retriable_follow_job.assert_not_called()
+ assert STRUCTURAL_LOG.get("job_combined_status") == original_status
+ return
+
+ try:
+ lava_job_submitter.submit()
+
+ except SystemExit as e:
+ assert e.code == expected_exit_code
+
+ assert STRUCTURAL_LOG["job_combined_status"] == expected_combined_status