diff --git a/backend/.gitignore b/backend/.gitignore index 2bf52705f..85d64fd19 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -180,3 +180,6 @@ runtime/secrets/postgres_password_secret # This is where the sqlite cache will be stored by default if not running on Docker. volume_data/*.sqlite3 volume_data/*.yaml + +# Temporary submissions directory (for development/testing purposes) +kernelCI_app/management/commands/tmp_submissions/* \ No newline at end of file diff --git a/backend/kernelCI_app/helpers/trees.py b/backend/kernelCI_app/helpers/trees.py index e2057c408..c719ef646 100644 --- a/backend/kernelCI_app/helpers/trees.py +++ b/backend/kernelCI_app/helpers/trees.py @@ -57,6 +57,7 @@ def sanitize_tree( """Sanitizes a checkout that was returned by a 'treelisting-like' query Returns a Checkout object""" + build_status = StatusCount( PASS=checkout["pass_builds"], FAIL=checkout["fail_builds"], @@ -87,13 +88,23 @@ def sanitize_tree( "skip": checkout["skip_boots"], } - if isinstance(checkout.get("git_commit_tags"), str): + # Has to check if it's a string because sqlite doesn't support ArrayFields. + # So if the query came from sqlite, it will be a string. + git_commit_tags = checkout.get("git_commit_tags") + if isinstance(git_commit_tags, str): try: checkout["git_commit_tags"] = json.loads(checkout["git_commit_tags"]) if not isinstance(checkout["git_commit_tags"], list): checkout["git_commit_tags"] = [] except json.JSONDecodeError: checkout["git_commit_tags"] = [] + elif git_commit_tags and isinstance(git_commit_tags, list): + first_tag = git_commit_tags[0] + if isinstance(first_tag, str): + # The git_commit_tags comes as list[str] on a normal query, but `Checkout` + # expects list[list[str]]. This is a workaround, the queries should *always* + # return a simples list[str]. + checkout["git_commit_tags"] = [git_commit_tags] return Checkout( **checkout, diff --git a/backend/kernelCI_app/management/commands/helpers/denormal.py b/backend/kernelCI_app/management/commands/helpers/denormal.py new file mode 100644 index 000000000..23e38c01a --- /dev/null +++ b/backend/kernelCI_app/management/commands/helpers/denormal.py @@ -0,0 +1,127 @@ +from datetime import datetime +from django.db import connections +from kernelCI_app.models import Checkouts, TreeListing + + +def handle_checkout_denormalization(*, buffer: list[Checkouts]) -> None: + """Deals with the operations related to the extra tables for denormalization. + + In the case of checkouts, it will update TreeListing table, and consume from PendingCheckouts. + """ + + if not buffer: + return + + tuple_params = [ + (c.origin, c.tree_name, c.git_repository_branch, c.git_repository_url) + for c in buffer + ] + flattened_list = [] + for tuple in tuple_params: + flattened_list += list(tuple) + + # check if the tree already exists on TreeListing // check which trees exist + query = f""" + SELECT + checkout_id, + start_time + FROM + tree_listing t + JOIN + (VALUES {','.join(["(%s, %s, %s, %s)"] * len(tuple_params))}) + AS v(origin, tree_name, git_repository_branch, git_repository_url) + ON ( + t.origin = v.origin + AND t.tree_name = v.tree_name + AND t.git_repository_branch = v.git_repository_branch + AND t.git_repository_url = v.git_repository_url + ) + """ + + with connections["default"].cursor() as cursor: + cursor.execute(query, flattened_list) + results = cursor.fetchall() + + existing_checkouts_map = {r[0]: r[1] for r in results} + + checkouts_for_update: list[Checkouts] = [] + + # results now have the list of checkout_id that *are* in the TreeListing + for checkout in buffer: + # if the checkout is in treeListing, check the start_time + if checkout.id in existing_checkouts_map: + # if newer than existing, update + checkout_start_time = datetime.fromisoformat(checkout.start_time) + if checkout_start_time >= existing_checkouts_map[checkout.id]: + checkouts_for_update.append(checkout) + # if older than existing, ignore (no action) + # if it's not on treeListing, add it + else: + checkouts_for_update.append(checkout) + + if checkouts_for_update: + tree_listing_objects = [ + TreeListing( + field_timestamp=checkout.field_timestamp, + checkout_id=checkout.id, + origin=checkout.origin, + tree_name=checkout.tree_name, + git_repository_url=checkout.git_repository_url, + git_repository_branch=checkout.git_repository_branch, + git_commit_hash=checkout.git_commit_hash, + git_commit_name=checkout.git_commit_name, + git_commit_tags=checkout.git_commit_tags, + start_time=checkout.start_time, + origin_builds_finish_time=checkout.origin_builds_finish_time, + origin_tests_finish_time=checkout.origin_tests_finish_time, + # Countings are defaulted to 0 when not provided + ) + for checkout in checkouts_for_update + ] + + TreeListing.objects.bulk_create( + tree_listing_objects, + update_conflicts=True, + unique_fields=[ + "origin", + "tree_name", + "git_repository_branch", + "git_repository_url", + ], + update_fields=[ + "field_timestamp", + "checkout_id", + "origin", + "tree_name", + "git_repository_url", + "git_repository_branch", + "git_commit_hash", + "git_commit_name", + "git_commit_tags", + "start_time", + "origin_builds_finish_time", + "origin_tests_finish_time", + "pass_builds", + "fail_builds", + "done_builds", + "miss_builds", + "skip_builds", + "error_builds", + "null_builds", + "pass_boots", + "fail_boots", + "done_boots", + "miss_boots", + "skip_boots", + "error_boots", + "null_boots", + "pass_tests", + "fail_tests", + "done_tests", + "miss_tests", + "skip_tests", + "error_tests", + "null_tests", + ], + ) + print(f"Updated {len(checkouts_for_update)} trees in TreeListing", flush=True) diff --git a/backend/kernelCI_app/management/commands/helpers/kcidbng_ingester.py b/backend/kernelCI_app/management/commands/helpers/kcidbng_ingester.py index 02c310eb4..17607a654 100644 --- a/backend/kernelCI_app/management/commands/helpers/kcidbng_ingester.py +++ b/backend/kernelCI_app/management/commands/helpers/kcidbng_ingester.py @@ -14,11 +14,17 @@ import yaml import kcidb_io from django.db import transaction +from kernelCI_app.management.commands.helpers.denormal import ( + handle_checkout_denormalization, +) from kernelCI_app.models import Issues, Checkouts, Builds, Tests, Incidents from kernelCI_app.management.commands.helpers.process_submissions import ( + ProcessedSubmission, + TableNames, build_instances_from_submission, ) +from kernelCI_app.typeModels.modelTypes import MODEL_MAP, TableModels VERBOSE = 0 LOGEXCERPT_THRESHOLD = 256 # 256 bytes threshold for logexcerpt @@ -293,6 +299,29 @@ def prepare_file_data(filename, trees_name, spool_dir): } +def consume_buffer(buffer: list[TableModels], item_type: TableNames) -> None: + """ + Consume a buffer of items and insert them into the database. + This function is called by the db_worker thread. + """ + if not buffer: + return + + if item_type == "checkouts": + handle_checkout_denormalization(buffer=buffer) + + model = MODEL_MAP[item_type] + + t0 = time.time() + model.objects.bulk_create( + buffer, + batch_size=INGEST_BATCH_SIZE, + ignore_conflicts=True, + ) + _out("bulk_create %s: n=%d in %.3fs" % (item_type, len(buffer), time.time() - t0)) + + +# TODO: lower the complexity of this function def db_worker(stop_event: threading.Event): # noqa: C901 """ Worker thread that processes the database queue. @@ -303,11 +332,11 @@ def db_worker(stop_event: threading.Event): # noqa: C901 """ # Local buffers for batching - issues_buf = [] - checkouts_buf = [] - builds_buf = [] - tests_buf = [] - incidents_buf = [] + issues_buf: list[Issues] = [] + checkouts_buf: list[Checkouts] = [] + builds_buf: list[Builds] = [] + tests_buf: list[Tests] = [] + incidents_buf: list[Incidents] = [] last_flush_ts = time.time() @@ -331,55 +360,11 @@ def flush_buffers(): try: # Single transaction for all tables in the flush with transaction.atomic(): - if issues_buf: - t0 = time.time() - Issues.objects.bulk_create( - issues_buf, batch_size=INGEST_BATCH_SIZE, ignore_conflicts=True - ) - _out( - "bulk_create issues: n=%d in %.3fs" - % (len(issues_buf), time.time() - t0) - ) - if checkouts_buf: - t0 = time.time() - Checkouts.objects.bulk_create( - checkouts_buf, - batch_size=INGEST_BATCH_SIZE, - ignore_conflicts=True, - ) - _out( - "bulk_create checkouts: n=%d in %.3fs" - % (len(checkouts_buf), time.time() - t0) - ) - if builds_buf: - t0 = time.time() - Builds.objects.bulk_create( - builds_buf, batch_size=INGEST_BATCH_SIZE, ignore_conflicts=True - ) - _out( - "bulk_create builds: n=%d in %.3fs" - % (len(builds_buf), time.time() - t0) - ) - if tests_buf: - t0 = time.time() - Tests.objects.bulk_create( - tests_buf, batch_size=INGEST_BATCH_SIZE, ignore_conflicts=True - ) - _out( - "bulk_create tests: n=%d in %.3fs" - % (len(tests_buf), time.time() - t0) - ) - if incidents_buf: - t0 = time.time() - Incidents.objects.bulk_create( - incidents_buf, - batch_size=INGEST_BATCH_SIZE, - ignore_conflicts=True, - ) - _out( - "bulk_create incidents: n=%d in %.3fs" - % (len(incidents_buf), time.time() - t0) - ) + consume_buffer(issues_buf, "issues") + consume_buffer(checkouts_buf, "checkouts") + consume_buffer(builds_buf, "builds") + consume_buffer(tests_buf, "tests") + consume_buffer(incidents_buf, "incidents") except Exception as e: logger.error("Error during bulk_create flush: %s", e) finally: @@ -415,7 +400,7 @@ def flush_buffers(): try: data, metadata = item if data is not None: - inst = build_instances_from_submission(data) + inst: ProcessedSubmission = build_instances_from_submission(data) issues_buf.extend(inst["issues"]) checkouts_buf.extend(inst["checkouts"]) builds_buf.extend(inst["builds"]) diff --git a/backend/kernelCI_app/management/commands/helpers/process_submissions.py b/backend/kernelCI_app/management/commands/helpers/process_submissions.py index 0be3bee70..73d3822cf 100644 --- a/backend/kernelCI_app/management/commands/helpers/process_submissions.py +++ b/backend/kernelCI_app/management/commands/helpers/process_submissions.py @@ -1,14 +1,23 @@ import logging from django.utils import timezone -from typing import Any, Literal +from typing import Any, TypedDict from django.db import IntegrityError from pydantic import ValidationError from kernelCI_app.models import Builds, Checkouts, Incidents, Issues, Tests +from kernelCI_app.typeModels.modelTypes import TableNames -TableNames = Literal["issues", "checkouts", "builds", "tests", "incidents"] +class ProcessedSubmission(TypedDict): + """Stores the list of items in a single submission. + Lists can't be None but can be empty.""" + + issues: list[Issues] + checkouts: list[Checkouts] + builds: list[Builds] + tests: list[Tests] + incidents: list[Incidents] logger = logging.getLogger(__name__) @@ -128,12 +137,12 @@ def make_incident_instance(incident) -> Incidents: return obj -def build_instances_from_submission(data: dict[str, Any]) -> dict[TableNames, list]: +def build_instances_from_submission(data: dict[str, Any]) -> ProcessedSubmission: """ Convert raw submission dicts into unsaved Django model instances, grouped by type. Per-item errors are logged and the item is skipped, matching the previous behavior. """ - out: dict[TableNames, list] = { + out: ProcessedSubmission = { "issues": [], "checkouts": [], "builds": [], diff --git a/backend/kernelCI_app/migrations/0004_treelisting.py b/backend/kernelCI_app/migrations/0004_treelisting.py new file mode 100644 index 000000000..30b8b1ed2 --- /dev/null +++ b/backend/kernelCI_app/migrations/0004_treelisting.py @@ -0,0 +1,90 @@ +# Generated by Django 5.1.12 on 2025-10-10 19:06 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("kernelCI_app", "0003_add_build_series_field"), + ] + + operations = [ + migrations.CreateModel( + name="TreeListing", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("field_timestamp", models.DateTimeField(db_column="_timestamp")), + ("checkout_id", models.TextField()), + ("origin", models.TextField()), + ("tree_name", models.TextField(blank=True, null=True)), + ("git_repository_url", models.TextField(blank=True, null=True)), + ("git_repository_branch", models.TextField(blank=True, null=True)), + ("git_commit_hash", models.TextField(blank=True, null=True)), + ("git_commit_name", models.TextField(blank=True, null=True)), + ( + "git_commit_tags", + django.contrib.postgres.fields.ArrayField( + base_field=models.TextField(), size=None + ), + ), + ("start_time", models.DateTimeField(blank=True, null=True)), + ( + "origin_builds_finish_time", + models.DateTimeField(blank=True, null=True), + ), + ( + "origin_tests_finish_time", + models.DateTimeField(blank=True, null=True), + ), + ("pass_builds", models.IntegerField(default=0)), + ("fail_builds", models.IntegerField(default=0)), + ("done_builds", models.IntegerField(default=0)), + ("miss_builds", models.IntegerField(default=0)), + ("skip_builds", models.IntegerField(default=0)), + ("error_builds", models.IntegerField(default=0)), + ("null_builds", models.IntegerField(default=0)), + ("pass_boots", models.IntegerField(default=0)), + ("fail_boots", models.IntegerField(default=0)), + ("done_boots", models.IntegerField(default=0)), + ("miss_boots", models.IntegerField(default=0)), + ("skip_boots", models.IntegerField(default=0)), + ("error_boots", models.IntegerField(default=0)), + ("null_boots", models.IntegerField(default=0)), + ("pass_tests", models.IntegerField(default=0)), + ("fail_tests", models.IntegerField(default=0)), + ("done_tests", models.IntegerField(default=0)), + ("miss_tests", models.IntegerField(default=0)), + ("skip_tests", models.IntegerField(default=0)), + ("error_tests", models.IntegerField(default=0)), + ("null_tests", models.IntegerField(default=0)), + ], + options={ + "db_table": "tree_listing", + "indexes": [ + models.Index(fields=["start_time"], name="tree_listing_start_time"), + models.Index(fields=["origin"], name="tree_listing_origin"), + ], + "constraints": [ + models.UniqueConstraint( + fields=( + "origin", + "tree_name", + "git_repository_url", + "git_repository_branch", + ), + name="unique_tree", + ) + ], + }, + ), + ] diff --git a/backend/kernelCI_app/models.py b/backend/kernelCI_app/models.py index 1b4e18f12..5d28118af 100644 --- a/backend/kernelCI_app/models.py +++ b/backend/kernelCI_app/models.py @@ -164,3 +164,60 @@ class Incidents(models.Model): class Meta: db_table = "incidents" + + +class TreeListing(models.Model): + field_timestamp = models.DateTimeField(db_column="_timestamp") # Created at + checkout_id = models.TextField() + origin = models.TextField() + tree_name = models.TextField(blank=True, null=True) + git_repository_url = models.TextField(blank=True, null=True) + git_repository_branch = models.TextField(blank=True, null=True) + git_commit_hash = models.TextField(blank=True, null=True) + git_commit_name = models.TextField(blank=True, null=True) + git_commit_tags = ArrayField(models.TextField()) # Cannot be null, but can be empty + start_time = models.DateTimeField(blank=True, null=True) + origin_builds_finish_time = models.DateTimeField(blank=True, null=True) + origin_tests_finish_time = models.DateTimeField(blank=True, null=True) + + pass_builds = models.IntegerField(default=0) + fail_builds = models.IntegerField(default=0) + done_builds = models.IntegerField(default=0) + miss_builds = models.IntegerField(default=0) + skip_builds = models.IntegerField(default=0) + error_builds = models.IntegerField(default=0) + null_builds = models.IntegerField(default=0) + + pass_boots = models.IntegerField(default=0) + fail_boots = models.IntegerField(default=0) + done_boots = models.IntegerField(default=0) + miss_boots = models.IntegerField(default=0) + skip_boots = models.IntegerField(default=0) + error_boots = models.IntegerField(default=0) + null_boots = models.IntegerField(default=0) + + pass_tests = models.IntegerField(default=0) + fail_tests = models.IntegerField(default=0) + done_tests = models.IntegerField(default=0) + miss_tests = models.IntegerField(default=0) + skip_tests = models.IntegerField(default=0) + error_tests = models.IntegerField(default=0) + null_tests = models.IntegerField(default=0) + + class Meta: + db_table = "tree_listing" + constraints = [ + models.UniqueConstraint( + fields=[ + "origin", + "tree_name", + "git_repository_url", + "git_repository_branch", + ], + name="unique_tree", + ) + ] + indexes = [ + models.Index(fields=["start_time"], name="tree_listing_start_time"), + models.Index(fields=["origin"], name="tree_listing_origin"), + ] diff --git a/backend/kernelCI_app/typeModels/modelTypes.py b/backend/kernelCI_app/typeModels/modelTypes.py new file mode 100644 index 000000000..0369e2ddf --- /dev/null +++ b/backend/kernelCI_app/typeModels/modelTypes.py @@ -0,0 +1,13 @@ +from typing import Literal +from kernelCI_app.models import Builds, Checkouts, Incidents, Issues, Tests + +type TableNames = Literal["issues", "checkouts", "builds", "tests", "incidents"] +type TableModels = Issues | Checkouts | Builds | Tests | Incidents + +MODEL_MAP: dict[TableNames, TableModels] = { + "issues": Issues, + "checkouts": Checkouts, + "builds": Builds, + "tests": Tests, + "incidents": Incidents, +} diff --git a/backend/kernelCI_app/typeModels/treeListing.py b/backend/kernelCI_app/typeModels/treeListing.py index 96c01513d..defec76da 100644 --- a/backend/kernelCI_app/typeModels/treeListing.py +++ b/backend/kernelCI_app/typeModels/treeListing.py @@ -1,4 +1,3 @@ -from typing import List from kernelCI_app.typeModels.common import StatusCount from kernelCI_app.typeModels.databases import ( Checkout__GitCommitHash, @@ -71,8 +70,8 @@ class CheckoutFast(CommonCheckouts): class TreeListingResponse(RootModel): - root: List[Checkout] + root: list[Checkout] class TreeListingFastResponse(RootModel): - root: List[CheckoutFast] + root: list[CheckoutFast] diff --git a/backend/kernelCI_app/urls.py b/backend/kernelCI_app/urls.py index 372460f84..d218b0ab7 100644 --- a/backend/kernelCI_app/urls.py +++ b/backend/kernelCI_app/urls.py @@ -25,6 +25,7 @@ def view_cache(view): path("test/", view_cache(views.TestDetails), name="testDetails"), path("tree/", view_cache(views.TreeView), name="tree"), path("tree-fast/", view_cache(views.TreeViewFast), name="tree-fast"), + path("tree/listing", view_cache(views.TreeListingView), name="treeListing"), path( "tree//full", views.TreeDetails.as_view(), diff --git a/backend/kernelCI_app/views/treeListingView.py b/backend/kernelCI_app/views/treeListingView.py new file mode 100644 index 000000000..1c80171db --- /dev/null +++ b/backend/kernelCI_app/views/treeListingView.py @@ -0,0 +1,127 @@ +from django.http import HttpRequest +from drf_spectacular.utils import extend_schema +from kernelCI_app.constants.localization import ClientStrings +from kernelCI_app.helpers.database import dict_fetchall +from kernelCI_app.helpers.errorHandling import create_api_error_response +from rest_framework.views import APIView +from rest_framework.response import Response +from kernelCI_app.helpers.trees import sanitize_tree +from kernelCI_app.typeModels.commonListing import ListingQueryParameters +from http import HTTPStatus +from kernelCI_app.typeModels.treeListing import TreeListingResponse +from pydantic import ValidationError + +from django.db import connections + + +# TODO: move to queries folder +# TODO: add unit tests +# TODO: stop using relative intervals! +def get_new_tree_listing_data(origin: str, interval_in_days: int) -> list[dict]: + """ + Fetches data from the tree_listing table for direct use in the frontend. + The status counts are NOT grouped in the return. + """ + + query = """ + SELECT + id, + _timestamp, + checkout_id, + origin, + tree_name, + git_repository_url, + git_repository_branch, + git_commit_hash, + git_commit_name, + git_commit_tags, + start_time, + origin_builds_finish_time, + origin_tests_finish_time, + + pass_builds, + fail_builds, + done_builds, + miss_builds, + skip_builds, + error_builds, + null_builds, + + pass_boots, + fail_boots, + done_boots, + miss_boots, + skip_boots, + error_boots, + null_boots, + + pass_tests, + fail_tests, + done_tests, + miss_tests, + skip_tests, + error_tests, + null_tests + FROM + tree_listing + WHERE + origin = %s + AND start_time >= NOW() - INTERVAL '%s days' + """ + + params = [origin, interval_in_days] + + with connections["default"].cursor() as cursor: + cursor.execute(query, params) + result = dict_fetchall(cursor) + + return result + + +class TreeListingView(APIView): + @extend_schema( + responses=TreeListingResponse, + parameters=[ListingQueryParameters], + methods=["GET"], + ) + def get(self, request: HttpRequest) -> Response: + """ + Returns the checkout data for trees in a specific origin, in the last X days. + The data includes the number of builds, boots and tests, grouped by status, for each checkout. + + Query params (`ListingQueryParameters`): + - origin: str + - interval_in_days: int + + Status returns: + - 200: A list of checkouts with their respective data. + - 400: Bad request, invalid parameters. + - 500: Internal server error, something went wrong on the server. Usually validation. + """ + + try: + request_params = ListingQueryParameters( + origin=request.GET.get("origin"), + interval_in_days=request.GET.get("interval_in_days"), + ) + except ValidationError as e: + return Response(data=e.json(), status=HTTPStatus.BAD_REQUEST) + + rows = get_new_tree_listing_data( + origin=request_params.origin, + interval_in_days=request_params.interval_in_days, + ) + + if not rows: + return create_api_error_response( + error_message=ClientStrings.NO_TREES_FOUND, status_code=HTTPStatus.OK + ) + + try: + valid_response = TreeListingResponse( + root=[sanitize_tree(row) for row in rows] + ) + except ValidationError as e: + return Response(data=e.json(), status=HTTPStatus.INTERNAL_SERVER_ERROR) + + return Response(valid_response.model_dump(by_alias=True)) diff --git a/backend/schema.yml b/backend/schema.yml index 9c415f066..7895e9aa9 100644 --- a/backend/schema.yml +++ b/backend/schema.yml @@ -1597,6 +1597,50 @@ paths: schema: $ref: '#/components/schemas/CommonDetailsTestsResponse' description: '' + /api/tree/listing: + get: + operationId: tree_listing_retrieve + description: |- + Returns the checkout data for trees in a specific origin, in the last X days. + The data includes the number of builds, boots and tests, grouped by status, for each checkout. + + Query params (`ListingQueryParameters`): + - origin: str + - interval_in_days: int + + Status returns: + - 200: A list of checkouts with their respective data. + - 400: Bad request, invalid parameters. + - 500: Internal server error, something went wrong on the server. Usually validation. + parameters: + - in: query + name: interval_in_days + schema: + default: 7 + exclusiveMinimum: 0 + title: Interval In Days + type: integer + description: Interval in days for the listing + - in: query + name: origin + schema: + default: maestro + title: Origin + type: string + description: Origin filter + tags: + - tree + security: + - cookieAuth: [] + - basicAuth: [] + - {} + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/TreeListingResponse' + description: '' components: schemas: BuildArchitectures: @@ -3288,7 +3332,7 @@ components: build_id: $ref: '#/components/schemas/Build__Id' status: - $ref: '#/components/schemas/Test__Status' + $ref: '#/components/schemas/StatusValues' path: $ref: '#/components/schemas/Test__Path' log_excerpt: