From 679b8f9126665890406c852a20f90f0bd3ddcb47 Mon Sep 17 00:00:00 2001 From: vinicius-r-silva Date: Wed, 20 Aug 2025 23:40:42 -0300 Subject: [PATCH 1/7] trace archive tool v1 --- .gitignore | 5 +- traces/stf_trace_archive/src/data/config.py | 44 ++++ traces/stf_trace_archive/src/data/consts.py | 5 + traces/stf_trace_archive/src/data/metadata.py | 37 +++ .../stf_trace_archive/src/data/output_path.py | 7 + .../src/data/source_type_map.py | 6 + .../stf_trace_archive/src/data/trace_data.py | 11 + .../src/data/trace_table_shema.py | 22 ++ .../src/data/workload_table_shema.py | 12 + .../database_explorer/database_explorer.py | 100 ++++++++ .../src/database_explorer/sources/base.py | 51 +++++ .../sources/local_storage.py | 216 ++++++++++++++++++ traces/stf_trace_archive/src/handlers/base.py | 9 + traces/stf_trace_archive/src/handlers/get.py | 42 ++++ traces/stf_trace_archive/src/handlers/list.py | 23 ++ .../stf_trace_archive/src/handlers/search.py | 8 + .../stf_trace_archive/src/handlers/setup.py | 102 +++++++++ .../stf_trace_archive/src/handlers/upload.py | 168 ++++++++++++++ .../src/tests/upload_tests.py | 146 ++++++++++++ .../src/tests/utils/trace_generator.py | 121 ++++++++++ traces/stf_trace_archive/src/trace_share.py | 53 +++++ .../stf_trace_archive/src/utils/cli_parser.py | 84 +++++++ .../src/utils/dict_to_obj.py | 10 + .../src/utils/fields_validator.py | 39 ++++ .../src/utils/file_dialog.py | 13 ++ .../src/utils/metadata_parser.py | 34 +++ traces/stf_trace_archive/src/utils/sha256.py | 8 + traces/stf_trace_archive/src/utils/ui.py | 34 +++ 28 files changed, 1409 insertions(+), 1 deletion(-) create mode 100644 traces/stf_trace_archive/src/data/config.py create mode 100644 traces/stf_trace_archive/src/data/consts.py create mode 100644 traces/stf_trace_archive/src/data/metadata.py create mode 100644 traces/stf_trace_archive/src/data/output_path.py create mode 100644 traces/stf_trace_archive/src/data/source_type_map.py create mode 100644 traces/stf_trace_archive/src/data/trace_data.py create mode 100644 traces/stf_trace_archive/src/data/trace_table_shema.py create mode 100644 traces/stf_trace_archive/src/data/workload_table_shema.py create mode 100644 traces/stf_trace_archive/src/database_explorer/database_explorer.py create mode 100644 traces/stf_trace_archive/src/database_explorer/sources/base.py create mode 100644 traces/stf_trace_archive/src/database_explorer/sources/local_storage.py create mode 100644 traces/stf_trace_archive/src/handlers/base.py create mode 100644 traces/stf_trace_archive/src/handlers/get.py create mode 100644 traces/stf_trace_archive/src/handlers/list.py create mode 100644 traces/stf_trace_archive/src/handlers/search.py create mode 100644 traces/stf_trace_archive/src/handlers/setup.py create mode 100644 traces/stf_trace_archive/src/handlers/upload.py create mode 100644 traces/stf_trace_archive/src/tests/upload_tests.py create mode 100644 traces/stf_trace_archive/src/tests/utils/trace_generator.py create mode 100644 traces/stf_trace_archive/src/trace_share.py create mode 100644 traces/stf_trace_archive/src/utils/cli_parser.py create mode 100644 traces/stf_trace_archive/src/utils/dict_to_obj.py create mode 100644 traces/stf_trace_archive/src/utils/fields_validator.py create mode 100644 traces/stf_trace_archive/src/utils/file_dialog.py create mode 100644 traces/stf_trace_archive/src/utils/metadata_parser.py create mode 100644 traces/stf_trace_archive/src/utils/sha256.py create mode 100644 traces/stf_trace_archive/src/utils/ui.py diff --git a/.gitignore b/.gitignore index 1c021563..578e573d 100644 --- a/.gitignore +++ b/.gitignore @@ -38,4 +38,7 @@ [Ff]ast[Dd]ebug # Backup files -*~ \ No newline at end of file +*~ + +# Python cache file +__pycache__/ \ No newline at end of file diff --git a/traces/stf_trace_archive/src/data/config.py b/traces/stf_trace_archive/src/data/config.py new file mode 100644 index 00000000..1d0b4411 --- /dev/null +++ b/traces/stf_trace_archive/src/data/config.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass +from typing import Dict, Optional, Type, Union, List + +@dataclass +class LocalStorageConfig: + path: str + +CONFIG_TYPE_MAP: Dict[str, Type] = { + "local-storage": LocalStorageConfig, +} + +@dataclass +class StorageConfig: + type: str + name: str + config: Union[LocalStorageConfig] + + @staticmethod + def from_dict(data: dict): + specific_config_type = data['type'] + if specific_config_type not in CONFIG_TYPE_MAP: + raise ValueError(f"Unknown storage type: {specific_config_type}") + + specific_config_class = CONFIG_TYPE_MAP.get(specific_config_type) + specific_config = specific_config_class(**data['config']) + return StorageConfig(type=data['type'], name=data['name'], config=specific_config) + +@dataclass +class Config: + storages: List[StorageConfig] + default_storage: Optional[str] + + @staticmethod + def from_dict(data: dict): + if not data: + return None + + storages = [] + if 'storages' in data: + storages = [StorageConfig.from_dict(s) for s in data['storages']] + + return Config(storages=storages, default_storage=data.get('default_storage')) + + \ No newline at end of file diff --git a/traces/stf_trace_archive/src/data/consts.py b/traces/stf_trace_archive/src/data/consts.py new file mode 100644 index 00000000..dc1b6ebc --- /dev/null +++ b/traces/stf_trace_archive/src/data/consts.py @@ -0,0 +1,5 @@ +from dataclasses import dataclass + +@dataclass(frozen=True) +class Const(): + PAD_LENGHT = 4 \ No newline at end of file diff --git a/traces/stf_trace_archive/src/data/metadata.py b/traces/stf_trace_archive/src/data/metadata.py new file mode 100644 index 00000000..dc88967d --- /dev/null +++ b/traces/stf_trace_archive/src/data/metadata.py @@ -0,0 +1,37 @@ +from dataclasses import dataclass +from typing import Optional, Dict + +@dataclass +class Author: + name: Optional[str] + company: Optional[str] + email: str + +@dataclass +class Workload: + filename: str + SHA256: str + execution_command: str + elf_sections: Dict[str, str] + +@dataclass +class TraceInterval: + instruction_pc: int + pc_count: int + interval_lenght: int + interval_lenght: int + start_instruction_index: int + end_instruction_index: int + +@dataclass +class Stf: + timestamp: str + stf_trace_info: Dict[str, str] + trace_interval: Optional[TraceInterval] + +@dataclass +class Metadata: + description: Optional[str] + author: Author + workload: Workload + stf: Stf \ No newline at end of file diff --git a/traces/stf_trace_archive/src/data/output_path.py b/traces/stf_trace_archive/src/data/output_path.py new file mode 100644 index 00000000..2ea784c1 --- /dev/null +++ b/traces/stf_trace_archive/src/data/output_path.py @@ -0,0 +1,7 @@ +from dataclasses import dataclass +from typing import Optional + +@dataclass +class OutputPaths(): + folder_path: str + filename: Optional[str] \ No newline at end of file diff --git a/traces/stf_trace_archive/src/data/source_type_map.py b/traces/stf_trace_archive/src/data/source_type_map.py new file mode 100644 index 00000000..1bab714a --- /dev/null +++ b/traces/stf_trace_archive/src/data/source_type_map.py @@ -0,0 +1,6 @@ +from typing import Type, Dict +from database_explorer.sources.local_storage import LocalStorageSource + +SOURCE_TYPE_MAP: Dict[str, Type] = { + "local-storage": LocalStorageSource, +} \ No newline at end of file diff --git a/traces/stf_trace_archive/src/data/trace_data.py b/traces/stf_trace_archive/src/data/trace_data.py new file mode 100644 index 00000000..a3e73f7b --- /dev/null +++ b/traces/stf_trace_archive/src/data/trace_data.py @@ -0,0 +1,11 @@ +from dataclasses import dataclass +from typing import Optional + +@dataclass +class TraceData(): + path: Optional[str] + id: Optional[str] + attempt: Optional[str] + part: Optional[str] + metadata_path: Optional[str] + metadata: Optional[str] \ No newline at end of file diff --git a/traces/stf_trace_archive/src/data/trace_table_shema.py b/traces/stf_trace_archive/src/data/trace_table_shema.py new file mode 100644 index 00000000..7e918c57 --- /dev/null +++ b/traces/stf_trace_archive/src/data/trace_table_shema.py @@ -0,0 +1,22 @@ +from dataclasses import dataclass + +@dataclass(frozen=True) +class TracesTableSchema: + TRACE_ID: str = "trace_id" + TRACE_ATTEMPT: str = "trace_attempt" + TRACE_PART: str = "trace_part" + WORKLOAD_ID: str = "workload_id" + WORKLOAD_SHA: str = "workload_sha" + WORKLOAD_NAME: str = "workload_name" + FULLY_TRACED: str = "fully_traced" + + def get_columns() -> list[str]: + return [ + TracesTableSchema.TRACE_ID, + TracesTableSchema.TRACE_ATTEMPT, + TracesTableSchema.TRACE_PART, + TracesTableSchema.WORKLOAD_ID, + TracesTableSchema.WORKLOAD_SHA, + TracesTableSchema.WORKLOAD_NAME, + TracesTableSchema.FULLY_TRACED + ] diff --git a/traces/stf_trace_archive/src/data/workload_table_shema.py b/traces/stf_trace_archive/src/data/workload_table_shema.py new file mode 100644 index 00000000..bce2be34 --- /dev/null +++ b/traces/stf_trace_archive/src/data/workload_table_shema.py @@ -0,0 +1,12 @@ +from dataclasses import dataclass + +@dataclass(frozen=True) +class WorkloadsTableSchema: + WORKLOAD_ID: str = "workload_id" + WORKLOAD_NAME: str = "workload_name" + + def get_columns() -> list[str]: + return [ + WorkloadsTableSchema.WORKLOAD_ID, + WorkloadsTableSchema.WORKLOAD_NAME, + ] diff --git a/traces/stf_trace_archive/src/database_explorer/database_explorer.py b/traces/stf_trace_archive/src/database_explorer/database_explorer.py new file mode 100644 index 00000000..e0110b3f --- /dev/null +++ b/traces/stf_trace_archive/src/database_explorer/database_explorer.py @@ -0,0 +1,100 @@ + +from typing import Optional +import pandas as pd +from data.trace_table_shema import TracesTableSchema +from data.workload_table_shema import WorkloadsTableSchema +from database_explorer.sources.base import SourceHandler +from data.metadata import Metadata +from data.trace_data import TraceData + +class DatabaseExplorer: + def __init__(self, storage: SourceHandler): + self.source = storage + + def get_workload_id(self, workload_sha256: str) -> Optional[int]: + workload_traces = self.source.traces_table[self.source.traces_table[TracesTableSchema.WORKLOAD_SHA] == workload_sha256] + if len(workload_traces) == 0: + return None + + return workload_traces.iloc[0][TracesTableSchema.WORKLOAD_ID] + + def upload_workload(self, workload_path: str) -> int: + workload_id = self._get_next_workload_id() + self.source.insert_workload(workload_path, workload_id) + return workload_id + + def _get_next_workload_id(self) -> int: + workload_ids = self.source.traces_table[TracesTableSchema.WORKLOAD_ID].unique() + max_workload_id = max(workload_ids, default=-1) + return max_workload_id + 1 + + def get_workload_name(self, workload_id: int) -> Optional[str]: + workload_row = self.source.workloads_table[self.source.workloads_table[WorkloadsTableSchema.WORKLOAD_ID] == workload_id] + if len(workload_row) > 0: + return workload_row.iloc[0][WorkloadsTableSchema.WORKLOAD_NAME] + + return None + + def get_next_trace_attempt(self, workload_id: int) -> int: + trace_attemps = self.get_trace_attempts(workload_id) + if not trace_attemps: + return 0 + + max_attempt = max(trace_attemps) + return max_attempt + 1 + + def get_trace_attempts(self, workload_id: int) -> list[int]: + workload_traces = self.source.traces_table[self.source.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id] + return workload_traces[TracesTableSchema.TRACE_ATTEMPT].unique().tolist() + + def get_trace_parts(self, workload_id: int, trace_attempt: int) -> list[int]: + workload_traces = self.source.traces_table[ + (self.source.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id) & + (self.source.traces_table[TracesTableSchema.TRACE_ATTEMPT] == trace_attempt) + ] + + return workload_traces[TracesTableSchema.TRACE_PART].unique().tolist() + + def check_trace_exists(self, trace_id: str) -> bool: + return trace_id in self.source.traces_table[TracesTableSchema.TRACE_ID].values + + def upload_traces(self, traces: list[TraceData]) -> None: + self.source.update_traces_table() + for trace in traces: + print(f"Uploading trace: {trace.path} with id: {trace.id}") + self._upload_trace(trace.path, trace.metadata) + + def _upload_trace(self, trace_path: str, metadata: Metadata) -> None: + trace_id = metadata.get('trace_id') + if not trace_id: + raise ValueError("Trace ID is required in metadata to upload a trace.") + + if self.check_trace_exists(trace_id): + raise ValueError(f"Trace with ID {trace_id} already exists in the database.") + + self.source.insert_trace(trace_path, metadata) + + def is_fully_traced(self, workload_id: int, trace_attempt: int) -> Optional[bool]: + trace_row = self.source.traces_table[ + (self.source.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id) & + (self.source.traces_table[TracesTableSchema.TRACE_ATTEMPT] == trace_attempt) + ] + if len(trace_row) == 0: + return None + + return trace_row.iloc[0][TracesTableSchema.FULLY_TRACED] + + def get_trace_ids(self) -> list[str]: + return self.source.traces_table[TracesTableSchema.TRACE_ID].to_list() + + def get_metadata(self, trace_id: str) -> Metadata: + return self.source.get_metadata(trace_id) + + def save_metadata(self, trace_id: str, path: str) -> None: + return self.source.save_metadata(trace_id, path) + + def save_trace(self, trace_id: str, path: str) -> None: + return self.source.save_trace(trace_id, path) + + def save_workload(self, workload_id: int, path: str) -> None: + return self.source.save_workload(workload_id, path) \ No newline at end of file diff --git a/traces/stf_trace_archive/src/database_explorer/sources/base.py b/traces/stf_trace_archive/src/database_explorer/sources/base.py new file mode 100644 index 00000000..c8b700c8 --- /dev/null +++ b/traces/stf_trace_archive/src/database_explorer/sources/base.py @@ -0,0 +1,51 @@ +import pandas as pd + +from abc import ABC, abstractmethod +from data.output_path import OutputPaths +from data.metadata import Metadata + +class SourceHandler(ABC): + @property + def traces_table(self): + raise NotImplementedError("This method should be overridden by subclasses.") + + @property + def workloads_table(self): + raise NotImplementedError("This method should be overridden by subclasses.") + + @abstractmethod + def update_traces_table(self) -> pd.DataFrame: + raise NotImplementedError("This method should be overridden by subclasses.") + + @abstractmethod + def update_workloads_table(self) -> pd.DataFrame: + raise NotImplementedError("This method should be overridden by subclasses.") + + @abstractmethod + def insert_trace(self, trace_path: str, metadata: Metadata) -> None: + raise NotImplementedError("This method should be overridden by subclasses.") + + @abstractmethod + def insert_workload(self, workload_path: str, workload_sha: str) -> None: + raise NotImplementedError("This method should be overridden by subclasses.") + + @abstractmethod + def get_metadata(self, trace_id: str) -> Metadata: + raise NotImplementedError("This method should be overridden by subclasses.") + + @abstractmethod + def save_metadata(self, trace_id: str, path: OutputPaths) -> None: + raise NotImplementedError("This method should be overridden by subclasses.") + + @abstractmethod + def save_trace(self, trace_id: str, path: OutputPaths) -> None: + raise NotImplementedError("This method should be overridden by subclasses.") + + @abstractmethod + def save_workload(self, workload_id: int, path: OutputPaths) -> None: + raise NotImplementedError("This method should be overridden by subclasses.") + + @staticmethod + def setup(): + raise NotImplementedError("This method should be overridden by subclasses.") + \ No newline at end of file diff --git a/traces/stf_trace_archive/src/database_explorer/sources/local_storage.py b/traces/stf_trace_archive/src/database_explorer/sources/local_storage.py new file mode 100644 index 00000000..a68e1339 --- /dev/null +++ b/traces/stf_trace_archive/src/database_explorer/sources/local_storage.py @@ -0,0 +1,216 @@ +import os +import readline +import shutil +from typing import Optional, Union +import pandas as pd +import yaml + +from data.workload_table_shema import WorkloadsTableSchema +from data.config import LocalStorageConfig +from data.consts import Const +from data.metadata import Metadata +from .base import SourceHandler +from data.trace_table_shema import TracesTableSchema +from utils.metadata_parser import MetadataParser + +class LocalStorageSource(SourceHandler): + def __init__(self, config: LocalStorageConfig): + if not config.path: + raise ValueError("Storage path cannot be empty.") + + self.storage_path = config.path + self.trace_suffix = "zstf" + self.metadata_suffix = "zstf.metadata.yaml" + self.metadata_cache = {} + self._traces_table = None + self._workloads_table = None + + @property + def traces_table(self): + if self._traces_table is None: + return self.update_traces_table() + + return self._traces_table + + @property + def workloads_table(self): + if self._workloads_table is None: + return self.update_workloads_table() + + return self._workloads_table + + def update_traces_table(self) -> pd.DataFrame: + df = pd.DataFrame(columns=TracesTableSchema.get_columns()) + if not os.path.exists(self.storage_path): + os.mkdir(self.storage_path) + + self.update_workloads_table() + workload_ids = self.workloads_table[WorkloadsTableSchema.WORKLOAD_ID].to_list() + for workload_id in workload_ids: + workload_folder = self._get_workload_folder(workload_id) + workload_path = os.path.join(self.storage_path, workload_folder) + trace_attempts = os.listdir(workload_path) + + for trace_attempt in trace_attempts: + trace_attempt_path = os.path.join(workload_path, trace_attempt) + if not os.path.isdir(trace_attempt_path): + continue + + trace_files = os.listdir(trace_attempt_path) + metadata_files = [filename for filename in trace_files if filename.endswith(self.metadata_suffix)] + trace_ids = [metadata_file[0:-len(self.metadata_suffix) - 1] for metadata_file in metadata_files] + trace_attemps_id = int(trace_attempt.split('_', 1)[1]) + + if len(trace_ids) == 0: + continue + + sample_metadata = self.get_metadata(trace_ids[0]) + sample_trace_interval = sample_metadata.get('stf', {}).get('trace_interval', None) + fully_trace = True if sample_trace_interval is None else False + workload_sha = sample_metadata.get('workload', {}).get('SHA256', None) + + for trace_id in trace_ids: + trace_part = trace_id.split('.')[1] + workload_name = '_'.join(trace_id.split('_')[1:]) + df = pd.concat([df, pd.DataFrame([{ + TracesTableSchema.TRACE_ID: trace_id, + TracesTableSchema.TRACE_ATTEMPT: trace_attemps_id, + TracesTableSchema.TRACE_PART: int(trace_part), + TracesTableSchema.WORKLOAD_ID: workload_id, + TracesTableSchema.WORKLOAD_SHA: workload_sha, + TracesTableSchema.WORKLOAD_NAME: workload_name, + TracesTableSchema.FULLY_TRACED: fully_trace + }])]) + + self._traces_table = df + return df + + def update_workloads_table(self) -> pd.DataFrame: + df = pd.DataFrame(columns=WorkloadsTableSchema.get_columns()) + if not os.path.exists(self.storage_path): + os.mkdir(self.storage_path) + + workload_folders = os.listdir(self.storage_path) + for workload_folder in workload_folders: + workload_id, workload_name = workload_folder.split('_', 1) + df = pd.concat([df, pd.DataFrame([{ + WorkloadsTableSchema.WORKLOAD_ID: int(workload_id), + WorkloadsTableSchema.WORKLOAD_NAME: workload_name, + }])]) + + self._workloads_table = df + return df + + def insert_workload(self, workload_path: str, workload_id: int) -> None: + workload_name = os.path.basename(workload_path) + workload_folder = self._get_workload_folder(workload_id, workload_name) + storage_path = os.path.join(self.storage_path, workload_folder) + os.makedirs(storage_path, exist_ok=False) + shutil.copy(workload_path, storage_path) + + self._workloads_table = pd.concat([self.workloads_table, pd.DataFrame([{ + WorkloadsTableSchema.WORKLOAD_ID: int(workload_id), + WorkloadsTableSchema.WORKLOAD_NAME: workload_name, + }])]) + + def insert_trace(self, trace_path: str, metadata: Metadata) -> None: + trace_id = metadata.get('trace_id') + if not trace_id: + raise ValueError("Trace ID is required in metadata to insert a trace.") + + workload_id, trace_attempt = trace_id.split('.')[:2] + workload_folder = self._get_workload_folder(workload_id) + attempt_folder = self._get_attempt_folder(trace_attempt) + + storage_path = os.path.join(self.storage_path, workload_folder, attempt_folder) + trace_storage_path = os.path.join(storage_path, f"{trace_id}.{self.trace_suffix}") + metadata_storage_path = os.path.join(storage_path, f"{trace_id}.{self.metadata_suffix}") + + os.makedirs(storage_path, exist_ok=True) + shutil.copy(trace_path, trace_storage_path) + with open(metadata_storage_path, 'w') as metadata_file: + yaml.dump(metadata, metadata_file) + + def get_metadata(self, trace_id: str) -> Metadata: + if trace_id in self.metadata_cache: + return self.metadata_cache[trace_id] + + workload_id, trace_attempt = trace_id.split('.')[:2] + workload_folder = self._get_workload_folder(workload_id) + attempt_folder = self._get_attempt_folder(trace_attempt) + metadata_path = os.path.join(self.storage_path, workload_folder, attempt_folder, f"{trace_id}.{self.metadata_suffix}") + if not os.path.exists(metadata_path): + raise FileNotFoundError(f"Metadata file not found: {trace_id}") + + metadata = MetadataParser.parse_metadata_from_path(metadata_path) + self.metadata_cache[trace_id] = metadata + return metadata + + def save_metadata(self, trace_id: str, path: str) -> None: + metadata_filename = f"{trace_id}.{self.metadata_suffix}" + dst_filename = path.filename if path.filename else metadata_filename + dst_path = os.path.join(path.folder_path, dst_filename) + + metadata = self.get_metadata(trace_id) + with open(dst_path, "w") as metadata_file: + yaml.dump(metadata, metadata_file) + print(f"Metadata {trace_id} saved on {os.path.abspath(dst_path)}") + + def save_trace(self, trace_id: str, path: str) -> None: + workload_id, trace_attempt = trace_id.split('.')[:2] + workload_folder = self._get_workload_folder(workload_id) + attempt_folder = self._get_attempt_folder(trace_attempt) + trace_filename = f"{trace_id}.{self.trace_suffix}" + trace_path = os.path.join(self.storage_path, workload_folder, attempt_folder, trace_filename) + + dst_filename = path.filename if path.filename else trace_filename + dst_path = os.path.join(path.folder_path, dst_filename) + shutil.copy(trace_path, dst_path) + print(f"Trace {trace_id} saved on {os.path.abspath(dst_path)}") + + def save_workload(self, workload_id: int, path: str) -> None: + workload_folder = os.path.join(self.storage_path, self._get_workload_folder(workload_id)) + if not os.path.exists(workload_folder): + raise FileNotFoundError(f"Workload not found: {workload_id}") + + workload_path_list = os.listdir(workload_folder) + workload_filename = None + workload_file_path = None + for workload_file in workload_path_list: + full_path = os.path.join(workload_folder, workload_file) + if not os.path.isfile(full_path): + continue + + if workload_file_path is not None: + raise NotImplementedError("Multiple workload files found.") + + workload_filename = workload_file + workload_file_path = full_path + + dst_filename = path.filename if path.filename else workload_filename + dst_path = os.path.join(path.folder_path, dst_filename) + shutil.copy(workload_file_path, dst_path) + print(f"Workload {workload_id} saved on {os.path.abspath(dst_path)}") + + def _get_workload_folder(self, workload_id: Union[str, int], workload_name: Optional[str] = None) -> str: + if isinstance(workload_id, str): + workload_id = int(workload_id) + + if not workload_name: + workload_name = self.workloads_table[self.workloads_table[WorkloadsTableSchema.WORKLOAD_ID] == workload_id][WorkloadsTableSchema.WORKLOAD_NAME].item() + + workload_folder = f"{str(workload_id).zfill(Const.PAD_LENGHT)}_{workload_name}" + return workload_folder + + def _get_attempt_folder(self, attempt_id: Union[str, int]) -> str: + return f"attempt_{str(attempt_id).zfill(Const.PAD_LENGHT)}" + + @staticmethod + def setup() -> LocalStorageConfig: + readline.set_completer_delims(' \t\n=') + readline.parse_and_bind("tab: complete") + path = input("Enter the storage folder path: ").lower() + readline.parse_and_bind("tab: self-insert") + path = os.path.abspath(path) + + return LocalStorageConfig(path=path) \ No newline at end of file diff --git a/traces/stf_trace_archive/src/handlers/base.py b/traces/stf_trace_archive/src/handlers/base.py new file mode 100644 index 00000000..9b27783f --- /dev/null +++ b/traces/stf_trace_archive/src/handlers/base.py @@ -0,0 +1,9 @@ +from abc import ABC, abstractmethod +import argparse + +from database_explorer.database_explorer import DatabaseExplorer + +class CommandHandler(ABC): + @abstractmethod + def run(self, args: argparse.Namespace, database_explorer: DatabaseExplorer) -> None: + raise NotImplementedError("This method should be overridden by subclasses.") \ No newline at end of file diff --git a/traces/stf_trace_archive/src/handlers/get.py b/traces/stf_trace_archive/src/handlers/get.py new file mode 100644 index 00000000..14319ef3 --- /dev/null +++ b/traces/stf_trace_archive/src/handlers/get.py @@ -0,0 +1,42 @@ +import argparse +import os +from database_explorer.database_explorer import DatabaseExplorer +from data.output_path import OutputPaths +from .base import CommandHandler + +class GetHandler(CommandHandler): + def run(self, args: argparse.Namespace, database_explorer: DatabaseExplorer) -> None: + self.explorer = database_explorer + output_path: OutputPaths = self._get_output_path(args.output) + if args.trace is not None: + self._save_trace(args.trace, output_path) + + elif args.workload is not None: + self.explorer.save_workload(args.workload, output_path) + + elif args.metadata is not None: + self.explorer.save_metadata(args.metadata, output_path) + + else: + raise ValueError("Invalid arguments: expected one of --trace, --workload, or --metadata") + + def _get_output_path(self, output_arg: str) -> OutputPaths: + if output_arg is None: + return OutputPaths(folder_path="./", filename=None) + + folder: str = os.path.dirname(output_arg) + filename: str = os.path.basename(output_arg) + return OutputPaths(folder_path=folder, filename=filename) + + def _save_trace(self, trace_id: str, output_path: str) -> None: + self.explorer.save_trace(trace_id, output_path) + if output_path.filename: + output_path.filename += ".metadata.yaml" + + self.explorer.save_metadata(trace_id, output_path) + + + + + + \ No newline at end of file diff --git a/traces/stf_trace_archive/src/handlers/list.py b/traces/stf_trace_archive/src/handlers/list.py new file mode 100644 index 00000000..fa4415d7 --- /dev/null +++ b/traces/stf_trace_archive/src/handlers/list.py @@ -0,0 +1,23 @@ +import argparse +from .base import CommandHandler +from database_explorer.database_explorer import DatabaseExplorer +from utils.ui import print_medatata_details + +class ListHandler(CommandHandler): + def run(self, args: argparse.Namespace, database_explorer: DatabaseExplorer) -> None: + self.explorer = database_explorer + match vars(args): + case _: + self._list_traces() + + def _list_traces(self) -> None: + trace_ids = self.explorer.get_trace_ids() + if not trace_ids: + print("No traces found.") + return + + for trace_id in sorted(trace_ids): + metadata = self.explorer.get_metadata(trace_id) + print_medatata_details(metadata) + print("") + diff --git a/traces/stf_trace_archive/src/handlers/search.py b/traces/stf_trace_archive/src/handlers/search.py new file mode 100644 index 00000000..88adfff1 --- /dev/null +++ b/traces/stf_trace_archive/src/handlers/search.py @@ -0,0 +1,8 @@ +import argparse +from database_explorer.database_explorer import DatabaseExplorer +from .base import CommandHandler + +# TODO +class SearchHandler(CommandHandler): + def run(self, args: argparse.Namespace, database_explorer: DatabaseExplorer): + raise NotImplementedError("SearchHandler is not implemented yet.") \ No newline at end of file diff --git a/traces/stf_trace_archive/src/handlers/setup.py b/traces/stf_trace_archive/src/handlers/setup.py new file mode 100644 index 00000000..ad058b86 --- /dev/null +++ b/traces/stf_trace_archive/src/handlers/setup.py @@ -0,0 +1,102 @@ +import argparse +import os +import pathlib +import yaml +from dataclasses import asdict +from data.config import Config, StorageConfig +from data.source_type_map import SOURCE_TYPE_MAP +from .base import CommandHandler + +class SetupHandler(CommandHandler): + def __init__(self): + config_folder = pathlib.Path(__file__).parent.parent.resolve() + config_filename = "config.yaml" + self._config_path = os.path.join(config_folder, config_filename) + self._config: Config = None + self._read_config_file() + self._complete_config_file() + + def run(self, args: argparse.Namespace, _) -> None: + if args.add_storage: + self._add_storage_source() + self._save_config() + + elif args.set_default_storage: + self._set_default_storage(args.set_default_storage) + self._save_config() + + def get_config(self) -> Config: + return self._config + + def _read_config_file(self) -> None: + if not os.path.exists(self._config_path): + return + + with open(self._config_path, 'r') as config_file: + config_dict = yaml.safe_load(config_file) + self._config = Config.from_dict(config_dict) + print("config") + print(self._config) + + def _complete_config_file(self) -> None: + if not self._config or not self._config.storages: + print("Config is empty or invalid. Creating new config file") + self._add_storage_source() + self._save_config() + + if not self._config.default_storage: + print("Default storage not set.") + self._set_default_storage() + self._save_config() + + + def _add_storage_source(self) -> None: + source_types = list(SOURCE_TYPE_MAP.keys()) + print("Creating a new storage source.") + print(f"Registred source type options: {', '.join(source_types)}") + source_type = input("Select your source type: ").lower() + + if source_type not in source_types: + raise ValueError(f"Unknown source type: {source_type}") + + used_source_names = self._get_source_names() + source_name = input("Enter your source name: ").lower() + + if source_name in used_source_names: + raise ValueError(f"Source name {source_name} already in use") + + source_class = SOURCE_TYPE_MAP.get(source_type) + source_specific_config = source_class.setup() + source_config = StorageConfig(type = source_type, name=source_name, config=source_specific_config) + + if not self._config: + self._config = Config(storages=[source_config], default_storage=source_name) + elif not self._config.storages: + self._config.storages = [source_config] + else: + self._config.storages.append(source_config) + + if not self._config.default_storage: + self._config.default_storage = source_name + + def _get_source_names(self) -> list[str]: + if not self._config: + return [] + + return [storage.name for storage in self._config.storages] + + def _set_default_storage(self, source_name: str = None) -> None: + used_source_names = self._get_source_names() + + if not source_name: + print(f"Enter the default source name: ", end="") + source_name = input().lower() + + if source_name not in used_source_names: + raise ValueError(f"Source name {source_name} not found on configure storage source names") + + self._config.default_storage = source_name + + def _save_config(self) -> None: + with open(self._config_path, 'w') as config_file: + yaml.safe_dump(asdict(self._config), config_file) \ No newline at end of file diff --git a/traces/stf_trace_archive/src/handlers/upload.py b/traces/stf_trace_archive/src/handlers/upload.py new file mode 100644 index 00000000..f682b11e --- /dev/null +++ b/traces/stf_trace_archive/src/handlers/upload.py @@ -0,0 +1,168 @@ +import argparse +import os +from tkinter.filedialog import FileDialog +from database_explorer.database_explorer import DatabaseExplorer +from data.consts import Const +from .base import CommandHandler +from utils.metadata_parser import MetadataParser +from utils.file_dialog import FileDialog +from utils.sha256 import compute_sha256 +from utils.ui import print_metadata_interval +from data.trace_data import TraceData + +class UploadHandler(CommandHandler): + _metadata_file_suffix = ".metadata.yaml" + + def run(self, args: argparse.Namespace, database_explorer: DatabaseExplorer): + self.explorer = database_explorer + traces = self._get_arg_traces(args) + self._validate_traces(traces) + workload_id = self._get_workload_id(traces, args) + self._setup_trace_attempt(traces, workload_id) + self._setup_trace_parts(traces, workload_id) + self._setup_trace_ids(traces, workload_id) + self.explorer.upload_traces(traces) + + def _get_arg_traces(self, args) -> list[TraceData]: + trace_paths = args.trace + + if(trace_paths and isinstance(trace_paths, str)): + trace_paths = [trace_paths] + + if args.it: + if not args.trace: + trace_paths = FileDialog.select_traces() + args.trace = trace_paths + + traces = [] + for trace_path in trace_paths: + metadata_path = trace_path + self._metadata_file_suffix + if not os.path.exists(metadata_path): + raise FileNotFoundError(f"Metadata file not found: {metadata_path}") + + metadata = MetadataParser.parse_metadata_from_path(metadata_path) + traces.append(TraceData(trace_path=trace_path, metadata_path=metadata_path, metadata=metadata)) + + if len(traces) <= 0: + raise ValueError("No traces provided.") + + return traces + + def _validate_traces(self, traces): + workload_sha256 = traces[0].metadata['workload']['SHA256'] + for trace in traces: + if trace.metadata['workload']['SHA256'] != workload_sha256: + raise ValueError("Traces from different workloads provided. Please provide traces from the same workload.") + + if len(traces) > 1: + for trace in traces: + if MetadataParser.is_fully_traced(trace.metadata): + raise ValueError("Multiple fully traced traces provided. Please provide only one fully traced trace or multiple partial traces.") + + def _setup_trace_attempt(self, traces, workload_id): + trace_attempt = self._get_trace_attempt(traces, workload_id) + for trace in traces: + trace.attempt = trace_attempt + + def _get_trace_attempt(self, traces, workload_id): + fully_traced = len(traces) == 1 and MetadataParser.is_fully_traced(traces[0].metadata) + if fully_traced: + return self.explorer.get_next_trace_attempt(workload_id) + + used_trace_attempts = self.explorer.get_trace_attempts(workload_id) + if len(used_trace_attempts) == 0: + return 0 + + print("Do you with to upload to a new trace attempt? (yes/no): ", end="") + answer = input().lower() + if answer != 'yes' and answer != 'y' and answer != 'no' and answer != 'n': + raise ValueError("Invalid response. Please answer 'yes' or 'no'.") + + if answer == 'yes' or answer == 'y': + return self.explorer.get_next_trace_attempt(workload_id) + + print(f"Existing trace attempts: {used_trace_attempts}") + print("Please provide the trace attempt number to upload to: ", end="") + trace_attempt = input().strip() + if not trace_attempt.isdigit() or int(trace_attempt) not in used_trace_attempts: + raise ValueError(f"Trace attempt value must be a number between {min(used_trace_attempts)} and {max(used_trace_attempts)}") + + trace_attempt = int(trace_attempt) + if self.explorer.is_fully_traced(workload_id, trace_attempt): + raise ValueError(f"Trace attempt {trace_attempt} for workload ID {workload_id} is fully traced. Cannot upload more traces to a fully traced attempt.") + + return trace_attempt + + def _setup_trace_parts(self, traces, workload_id): + if len(traces) == 1 and MetadataParser.is_fully_traced(traces[0].metadata): + for trace in traces: + trace.part = 0 + return traces + + print("Partial traces detected. Please specify the part number for this trace segment") + traces = sorted(traces, key = lambda trace: trace.metadata['stf']['trace_interval']['start_instruction_index']) + + trace_attempt = traces[0].attempt + used_parts = self.explorer.get_trace_parts(workload_id, trace_attempt) + print(f"used_parts: {used_parts}") + last_part_number = max(used_parts) if used_parts else -1 + for trace in traces: + print(f"\nTrace file: {trace.path}") + print_metadata_interval(trace.metadata) + + default_option = last_part_number + 1 + print(f"Part number [{format(default_option)}]: ", end="") + part_number = input() + if part_number == "": + part_number = default_option + else: + part_number = int(part_number) + + if part_number in used_parts: + raise ValueError(f"Part number {part_number} already used. Please use unique part numbers for each trace segment.") + + used_parts.append(part_number) + trace.part = part_number + last_part_number = part_number + + return traces + + def _get_workload_id(self, traces, args): + workload_sha256 = traces[0].metadata['workload']['SHA256'] + + workload_id = self.explorer.get_workload_id(workload_sha256) + if workload_id is not None: + print("Workload already exists in trace archive, skipping workload upload.") + return workload_id + + if not args.it and not args.workload: + raise ValueError("Workload not found on trace archive. Provide ither --it or --workload options to specify the workload binary.") + + workload_path = args.workload + if args.it and not args.workload: + workload_path = FileDialog.select_workload() + + if not workload_path or not os.path.exists(workload_path): + raise FileNotFoundError(f"Workload file not found: {workload_path}") + + workload_file_sha256 = compute_sha256(workload_path) + if workload_file_sha256 != workload_sha256: + raise ValueError("Workload file SHA256 does not match the one in metadata.") + + workload_id = self.explorer.upload_workload(workload_path) + return workload_id + + def _setup_trace_ids(self, traces, workload_id): + for trace in traces: + workload_name = self.explorer.get_workload_name(workload_id) + + if not workload_name: + raise ValueError(f"Workload with ID {workload_id} not found in the database.") + + trace_part = str(trace.part).zfill(Const.PAD_LENGHT) + trace_id = f"{workload_id}.{trace.attempt}.{trace_part}_{workload_name}" + trace.id = trace_id + trace.metadata['trace_id'] = trace_id + trace.metadata['workload']['filename'] = workload_name + + return traces \ No newline at end of file diff --git a/traces/stf_trace_archive/src/tests/upload_tests.py b/traces/stf_trace_archive/src/tests/upload_tests.py new file mode 100644 index 00000000..4806b175 --- /dev/null +++ b/traces/stf_trace_archive/src/tests/upload_tests.py @@ -0,0 +1,146 @@ +from contextlib import ExitStack +import io +import os +import re +import unittest + +from unittest.mock import patch +from tests.utils.trace_generator import TraceDataGenerator +from data.consts import Const +from trace_share import main +import sys + +class TraceTestInput(): + def __init__(self, workload_path, trace_paths): + self.workload_path = workload_path + self.trace_paths = trace_paths + +class TestUpload(unittest.TestCase): + def setUp(self): + self.generator = TraceDataGenerator() + self.workload1_path = self.generator.generate_worload(0) + self.workload1_name = self.workload1_path.split('/')[-1] + self.storage_type = "local-storage" + self.storage_path = "./tests/storage_test" + + self.workload1_full_trace_path = self.generator.generate_trace(workload_id=self.workload1_path, trace_attempt=0, trace_part=None) + self.workload1_trace_part_1_path = self.generator.generate_trace(workload_id=self.workload1_path, trace_attempt=1, trace_part=0) + self.workload1_trace_part_2_path = self.generator.generate_trace(workload_id=self.workload1_path, trace_attempt=1, trace_part=1) + self.default_args = [ + "trace_share", + "--source-type", + self.storage_type, + "--source-path", + self.storage_path, + "upload", + ] + + def tearDown(self): + self.generator.delete_test_traces() + self.generator.delete_test_storage(self.storage_type, self.storage_path) + pass + + def launch_test(self, workload, traces, inputs = None): + args = [ + *self.default_args, + ] + + if workload: + args.append("--workload") + args.append(workload) + + if traces: + if isinstance(traces, str): + traces = [traces] + for trace in traces: + args.append("--trace") + args.append(trace) + + captured_output = io.StringIO() + captured_stderr = io.StringIO() + try: + with ExitStack() as stack: + stack.enter_context(patch.object(sys, 'argv', args)) + stack.enter_context(patch("sys.stdout", new=captured_output)) + stack.enter_context(patch("sys.stderr", new=captured_stderr)) + if inputs is not None: + stack.enter_context(patch('builtins.input', side_effect = inputs)) + + main() + except Exception as e: + # print("stdout") + # print(captured_output.getvalue()) + # print("stderr") + # print(captured_stderr.getvalue()) + # print("e") + # print(e) + # traceback.print_exc() + return captured_output.getvalue(), captured_stderr.getvalue(), e + + # print("stdout") + # print(captured_output.getvalue()) + return captured_output.getvalue(), None, None + + def trace_exists_assert(self, trace_id, workload_name): + workload_id, attempt_id = trace_id.split(".")[0:2] + workload_folder = f"{workload_id.zfill(Const.PAD_LENGHT)}_{workload_name}" + attempt_folder = f"attempt_{attempt_id.zfill(Const.PAD_LENGHT)}" + trace_path = os.path.join(self.storage_path, workload_folder, attempt_folder, f"{trace_id}.zstf") + metadata_path = f"{trace_path}.metadata.yaml" + + self.assertTrue(os.path.exists(trace_path)) + self.assertTrue(os.path.exists(metadata_path)) + + def get_trace_ids_from_output(self, output): + if not output: + return [] + + pattern = r"(?<=\s)\d+\.\d+\.\d+_\S+" + return re.findall(pattern, output) + + def test_upload_full_trace(self): + print(f"\ntest_upload_full_trace") + + stdout, stderr, error = self.launch_test(self.workload1_path, self.workload1_full_trace_path) + trace_ids = self.get_trace_ids_from_output(stdout) + expected_trace_id = f"0.0.0000_{self.workload1_name}" + + self.assertIsNone(error) + self.assertIsNone(stderr) + self.assertEqual(len(trace_ids), 1) + self.assertEqual(trace_ids[0], expected_trace_id) + self.trace_exists_assert(expected_trace_id, self.workload1_name) + + def test_upload_partial_traces(self): + print(f"\n\ntest_upload_partial_trace") + inputs = ["0", "1"] + stdout, stderr, error = self.launch_test(self.workload1_path, [self.workload1_trace_part_1_path, self.workload1_trace_part_2_path], inputs) + trace_ids = self.get_trace_ids_from_output(stdout) + expected_trace_ids = [f"0.0.0000_{self.workload1_name}", f"0.0.0001_{self.workload1_name}"] + + self.assertIsNone(error) + self.assertIsNone(stderr) + self.assertEqual(len(trace_ids), len(expected_trace_ids)) + for i in range(0, len(trace_ids)): + self.assertEqual(trace_ids[i], expected_trace_ids[i], self.workload1_name) + self.trace_exists_assert(expected_trace_ids[i], self.workload1_name) + + def test_upload_two_attempts(self): + print(f"\n\ntest_upload_two_attempts") + stdout1, stderr1, error1 = self.launch_test(self.workload1_path, self.workload1_full_trace_path) + + inputs = ["y", "0", "1"] + stdout2, stderr2, error2 = self.launch_test(self.workload1_path, [self.workload1_trace_part_1_path, self.workload1_trace_part_2_path], inputs) + + trace_ids = self.get_trace_ids_from_output(stdout1) + trace_ids.extend(self.get_trace_ids_from_output(stdout2)) + expected_trace_ids = [f"0.0.0000_{self.workload1_name}", f"0.1.0000_{self.workload1_name}", f"0.1.0001_{self.workload1_name}"] + + self.assertIsNone(error1) + self.assertIsNone(error2) + self.assertIsNone(stderr1) + self.assertIsNone(stderr2) + self.assertEqual(len(trace_ids), len(expected_trace_ids)) + for i in range(0, len(trace_ids)): + self.assertEqual(trace_ids[i], expected_trace_ids[i], self.workload1_name) + self.trace_exists_assert(expected_trace_ids[i], self.workload1_name) diff --git a/traces/stf_trace_archive/src/tests/utils/trace_generator.py b/traces/stf_trace_archive/src/tests/utils/trace_generator.py new file mode 100644 index 00000000..f8871a2e --- /dev/null +++ b/traces/stf_trace_archive/src/tests/utils/trace_generator.py @@ -0,0 +1,121 @@ +from datetime import datetime +import hashlib +import os +import yaml + +class TraceDataGenerator(): + def __init__(self): + self.test_storage_path = "./tests/traces_test" + os.makedirs(self.test_storage_path, exist_ok=True) + + def generate_worload(self, workload_id = None): + if workload_id is None: + workload_files = os.listdir(self.test_storage_path) + workload_ids = [f.split('_')[1] for f in workload_files if f.startswith('workload_')] + workload_id = int(len(workload_ids) + 1) + + workload_file_content = f"Workload ID: {workload_id}\n" + workload_path = f"{self.test_storage_path}/workload_{workload_id}" + + with open(f"{workload_path}", 'w') as workload_file: + workload_file.write(workload_file_content) + + return workload_path + + def generate_trace(self, workload_id, trace_attempt, trace_part = None): + trace_attempt_path = f"{self.test_storage_path}/trace_attempt_{trace_attempt}" + os.makedirs(trace_attempt_path, exist_ok=True) + + if trace_part is None: + trace_path = f"{trace_attempt_path}/0.zstf" + metadata_path = f"{trace_attempt_path}/0.zstf.metadata.yaml" + else: + trace_path = f"{trace_attempt_path}/{trace_part}.zstf" + metadata_path = f"{trace_attempt_path}/{trace_part}.zstf.metadata.yaml" + + trace_file_content = f"Trace attempt: {trace_attempt}, trace part: {trace_part}\n" + with open(trace_path, 'w') as trace_file: + trace_file.write(trace_file_content) + + trace_metadata_content = self.generate_metadata(workload_id, trace_part) + with open(metadata_path, 'w') as metadata_file: + yaml.dump(trace_metadata_content, metadata_file) + + return trace_path + + def generate_metadata(self, workload_id, trace_part): + workload_sha256 = self.get_workload_sha256(workload_id) + + interval = None + if trace_part is not None: + interval = { + 'instruction_pc': 100 * trace_part, + 'pc_count': trace_part, + 'interval_lenght': trace_part * 100, + 'start_instruction_index': 100 * trace_part, + 'end_instruction_index': 100 * (trace_part + 1) + } + + metadata = { + 'description': None, + 'author': { + 'name': 'Jane Doe', + 'company': 'RISCV', + 'email': 'jane.doe@riscv.org' + }, + 'workload': { + 'filename': f"{workload_id}", + 'SHA256': workload_sha256, + 'execution_command': f"./{workload_id}", + 'elf_sections': { + 'comment': "Test", + 'riscv.attributes': "Test", + 'GCC.command.line': "Test" + }, + }, + 'stf': { + 'timestamp': datetime.now().isoformat(), + 'stf_trace_info': { + 'VERSION': "Test", + 'GENERATOR': "Test", + 'GEN_VERSION': "Test", + 'GEN_COMMENT': "Test", + 'STF_FEATURES': [] + }, + 'trace_interval': interval + } + } + + return metadata + + def get_workload_sha256(self, workload_path): + hash_sha256 = hashlib.sha256() + with open(workload_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_sha256.update(chunk) + return hash_sha256.hexdigest() + + def delete_test_traces(self): + self._delete_folder_and_files(self.test_storage_path) + + def delete_test_storage(self, type, path): + # print(f"Deleting {path}") + if type == "local-storage": + self._delete_folder_and_files(path) + + def _delete_folder_and_files(self, path): + # print(f"\n\nDeleting {path}") + # print(f"exist: {os.path.exists(path)}") + if not os.path.exists(path): + return + + for root, dirs, files in os.walk(path, topdown=False): + for name in files: + # print(f"Deleting file {os.path.join(root, name)}") + os.remove(os.path.join(root, name)) + for name in dirs: + # print(f"Deleting folder {os.path.join(root, name)}") + os.rmdir(os.path.join(root, name)) + + # print(f"Deleting folder {path}\n\n") + os.rmdir(path) diff --git a/traces/stf_trace_archive/src/trace_share.py b/traces/stf_trace_archive/src/trace_share.py new file mode 100644 index 00000000..503609ca --- /dev/null +++ b/traces/stf_trace_archive/src/trace_share.py @@ -0,0 +1,53 @@ +from data.config import Config, StorageConfig +from data.source_type_map import SOURCE_TYPE_MAP +from database_explorer.database_explorer import DatabaseExplorer +from utils.cli_parser import parseArgs +from handlers.upload import UploadHandler +from handlers.list import ListHandler +from handlers.search import SearchHandler +from handlers.get import GetHandler +from handlers.setup import SetupHandler + +def main(): + args = parseArgs() + setupHandler = SetupHandler() + command_map = { + 'upload': UploadHandler(), + 'search': SearchHandler(), + 'list': ListHandler(), + 'get': GetHandler(), + 'setup': setupHandler, + } + + config: Config = setupHandler.get_config() + explorer = get_storage(args.storage_name, config) + + handler = command_map.get(args.command) + if handler: + handler.run(args, explorer) + else: + print(f"Unknown command: {args.command}") + +def get_storage(selected_storage: str, config: Config) -> DatabaseExplorer: + if not selected_storage: + selected_storage = config.default_storage + + storage_config = None + for storage in config.storages: + if storage.name == selected_storage: + storage_config = storage + break + + if not storage_config: + raise ValueError(f"Storage not found: {selected_storage}") + + storage_class = SOURCE_TYPE_MAP.get(storage_config.type) + if not storage_class: + raise ValueError(f"Unknown source storage class: {storage_class}") + + storage = storage_class(storage_config.config) + explorer = DatabaseExplorer(storage) + return explorer + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/traces/stf_trace_archive/src/utils/cli_parser.py b/traces/stf_trace_archive/src/utils/cli_parser.py new file mode 100644 index 00000000..b1b4a893 --- /dev/null +++ b/traces/stf_trace_archive/src/utils/cli_parser.py @@ -0,0 +1,84 @@ +import argparse +import sys + +def parseArgs() -> argparse.Namespace: + parser = argparse.ArgumentParser( + prog='trace_share', + usage='python trace_share.py COMMAND [OPTIONS]', + description='CLI tool for Olympia traces exploration', + epilog="For more help on how to use trace_share, head to https://github.com/riscv-software-src/riscv-perf-model/tree/master/traces/stf_trace_archive/README.md", + formatter_class=argparse.RawTextHelpFormatter, + add_help=False + ) + + parser.add_argument('-h', '--help', action='help', help='Show this help message and exit.') + parser.add_argument('--storage-name', help='Select a pre-configured storage config.') + + subparsers = parser.add_subparsers(title='Commands', dest='command') + + upload_parser = subparsers.add_parser( + 'upload', + help='Upload workload and trace.', + description='Upload a workload, trace and metadataz to the database. At least one of the options must be used', + formatter_class=argparse.RawTextHelpFormatter + ) + upload_parser.add_argument('--workload', help='(optional) Path to the workload file.') + upload_parser.add_argument('--trace', action='append', help='Path to one or more trace files. If omitted, defaults to .zstf') + upload_parser.add_argument('--it', action='store_true', help='Iteractive file selection mode.') + + search_parser = subparsers.add_parser( + 'search', + help='Search traces by specified expression.', + description='Search for traces and metadata using a regular expression.', + formatter_class=argparse.RawTextHelpFormatter + ) + search_parser.add_argument('regex', nargs='?', help='Regex expression to search with.') + search_parser.add_argument('--names-only', action='store_true', help='Search only by trace name (ignore metadata).') + + list_parser = subparsers.add_parser( + 'list', + help='List items by category.', + description='List database traces or related entities.', + formatter_class=argparse.RawTextHelpFormatter + ) + group = list_parser.add_mutually_exclusive_group() + group.add_argument('--traces', action='store_true', help='Lists available traces (default)') + group.add_argument('--companies', action='store_true', help='Lists associated companies') + + get_parser = subparsers.add_parser( + 'get', + help='Download a specified trace or workload file.', + description='Download a specified trace or workload file.', + formatter_class=argparse.RawTextHelpFormatter + ) + + group = get_parser.add_mutually_exclusive_group(required=True) + group.add_argument('--trace', help='Id of the trace to download.') + group.add_argument('--workload', help='Id of the workload to download.') + group.add_argument('--metadata', help='Id of the metadata to download.') + get_parser.add_argument('-o', '--output', help='Output folder or file path') + + setup_parser = subparsers.add_parser( + 'setup', + help='Create or edit current tool configurations', + description='Create or edit current tool configurations', + formatter_class=argparse.RawTextHelpFormatter + ) + setup_parser.add_argument('--add-storage', action='store_true', help='Create a new storage source') + setup_parser.add_argument('--set-default-storage', help='Select the default storage') + + if len(sys.argv) == 1: + parser.print_help() + print("\nRun 'trace_share COMMAND --help' for more information on a command.") + print("\nFor more help on how to use trace_share, head to GITHUB_README_LINK") + sys.exit(0) + + args = parser.parse_args() + + if args.command == 'upload': + if not (args.workload or args.trace or args.it): + upload_parser.print_help() + print("\nError: At least one of --workload, --trace, or --it must be provided.") + exit(1) + + return args \ No newline at end of file diff --git a/traces/stf_trace_archive/src/utils/dict_to_obj.py b/traces/stf_trace_archive/src/utils/dict_to_obj.py new file mode 100644 index 00000000..8ca39476 --- /dev/null +++ b/traces/stf_trace_archive/src/utils/dict_to_obj.py @@ -0,0 +1,10 @@ +def dict_to_obj(d: dict): + if isinstance(d, dict): + obj = type("DynamicObj", (), {})() + for k, v in d.items(): + setattr(obj, k, dict_to_obj(v)) + return obj + elif isinstance(d, list): + return [dict_to_obj(x) for x in d] + else: + return d \ No newline at end of file diff --git a/traces/stf_trace_archive/src/utils/fields_validator.py b/traces/stf_trace_archive/src/utils/fields_validator.py new file mode 100644 index 00000000..ad6fb09e --- /dev/null +++ b/traces/stf_trace_archive/src/utils/fields_validator.py @@ -0,0 +1,39 @@ +class FieldsValidator: + @staticmethod + def validate(object: any, required_fields: list[str] = None, dependent_fields: list[str] = None) -> None: + if required_fields: + FieldsValidator.validate_required_fields(object, required_fields) + + if dependent_fields: + FieldsValidator.validate_dependent_fields(object, dependent_fields) + + + @staticmethod + def validate_required_fields(object: any, required_fields: list[str]) -> None: + if isinstance(required_fields, list): + for field in required_fields: + if field not in object: + raise KeyError(f"Missing required field: {field}") + return + + for field, sub_fields in required_fields.items(): + if field not in object or object.get(field) is None: + raise KeyError(f"Missing required field: {field}") + FieldsValidator.validate_required_fields(object.get(field), sub_fields) + + + @staticmethod + def validate_dependent_fields(object: any, dependent_fields: list[str]) -> None: + if isinstance(dependent_fields, list): + fields_count = 0 + for field in dependent_fields: + if field in object: + fields_count += 1 + if fields_count < len(dependent_fields): + raise ValueError(f"Object is incomplete: fields {dependent_fields} are interdependent and must either all be present or all be omitted.") + return + + for field, sub_fields in dependent_fields.items(): + if field in object and object.get(field) is not None: + FieldsValidator.validate_dependent_fields(object.get(field), sub_fields) + \ No newline at end of file diff --git a/traces/stf_trace_archive/src/utils/file_dialog.py b/traces/stf_trace_archive/src/utils/file_dialog.py new file mode 100644 index 00000000..19329da5 --- /dev/null +++ b/traces/stf_trace_archive/src/utils/file_dialog.py @@ -0,0 +1,13 @@ +from tkinter import filedialog + +class FileDialog(): + + @staticmethod + def select_workload() -> str: + file_path = filedialog.askopenfilename(title="Select Workload") + return file_path + + @staticmethod + def select_traces() -> list[str]: + file_paths = filedialog.askopenfilenames(title="Select Traces", filetypes=[("ZSTF", ".zstf")]) + return file_paths \ No newline at end of file diff --git a/traces/stf_trace_archive/src/utils/metadata_parser.py b/traces/stf_trace_archive/src/utils/metadata_parser.py new file mode 100644 index 00000000..bfc70deb --- /dev/null +++ b/traces/stf_trace_archive/src/utils/metadata_parser.py @@ -0,0 +1,34 @@ +import yaml + +from data.metadata import Metadata +from utils.fields_validator import FieldsValidator + +class MetadataParser: + @staticmethod + def parse_metadata(metadata_file) -> Metadata: + data = yaml.safe_load(metadata_file) + MetadataParser.validate_metadata(data) + return data + + @staticmethod + def parse_metadata_from_path(metadata_path: str) -> Metadata: + with open(metadata_path, 'r') as metadata_file: + return MetadataParser.parse_metadata(metadata_file) + + @staticmethod + def validate_metadata(metadata: Metadata) -> None: + if not metadata: + raise ValueError("Metadata is empty or invalid.") + + required_keys = {'author': ["name", "company", "email"], + 'workload': ['filename', 'SHA256', 'execution_command', 'elf_sections'], + 'stf': ['timestamp', 'stf_trace_info']} + dependent_keys = { + 'stf': {'trace_interval': ['instruction_pc', 'pc_count', 'interval_lenght', 'start_instruction_index', 'end_instruction_index']} + } + + FieldsValidator.validate(metadata, required_keys, dependent_keys) + + @staticmethod + def is_fully_traced(metadata: Metadata) -> bool: + return not metadata.get('stf', {}).get('trace_interval', None) \ No newline at end of file diff --git a/traces/stf_trace_archive/src/utils/sha256.py b/traces/stf_trace_archive/src/utils/sha256.py new file mode 100644 index 00000000..200e6556 --- /dev/null +++ b/traces/stf_trace_archive/src/utils/sha256.py @@ -0,0 +1,8 @@ +import hashlib + +def compute_sha256(file_path: str) -> str: + hash_sha256 = hashlib.sha256() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_sha256.update(chunk) + return hash_sha256.hexdigest() \ No newline at end of file diff --git a/traces/stf_trace_archive/src/utils/ui.py b/traces/stf_trace_archive/src/utils/ui.py new file mode 100644 index 00000000..55986aac --- /dev/null +++ b/traces/stf_trace_archive/src/utils/ui.py @@ -0,0 +1,34 @@ +from data.metadata import Metadata + +def print_medatata_details(metadata: Metadata) -> None: + print(f"id: {metadata['trace_id']}") + if "description" in metadata and metadata['description']: + print(metadata['description']) + + print(f"Workload: {metadata['workload']['filename']}") + print(f"Trace Timestamp: {metadata['stf']['timestamp']}") + print_metadata_interval(metadata) + + print("\n---------------------------------") + +def print_metadata_interval(metadata: Metadata) -> None: + if "trace_interval" not in metadata['stf'] or metadata['stf']["trace_interval"] is None: + print("Fully trace") + return + + trace_interval = metadata['stf']['trace_interval'] + print(f"Trace Interval:") + if trace_interval['instruction_pc'] is not None: + print(f" Instruction PC: {trace_interval['instruction_pc']}") + + if trace_interval['pc_count'] is not None: + print(f" PC Count: {trace_interval['pc_count']}") + + if trace_interval['interval_lenght'] is not None: + print(f" Interval Length: {trace_interval['interval_lenght']}") + + if trace_interval['start_instruction_index'] is not None: + print(f" Start Instruction Index: {trace_interval['start_instruction_index']}") + + if trace_interval['end_instruction_index'] is not None: + print(f" End Instruction Index: {trace_interval['end_instruction_index']}") \ No newline at end of file From 36c967dcdd3d6b4836d240ed75e0b5775fbdf80b Mon Sep 17 00:00:00 2001 From: vinicius-r-silva Date: Thu, 21 Aug 2025 00:58:10 -0300 Subject: [PATCH 2/7] renamed source to storage --- .../database_explorer.py | 44 ++++++++-------- .../src/data/source_type_map.py | 6 --- .../src/data/storage_type_map.py | 6 +++ .../sources => data/storages}/base.py | 2 +- .../storages}/local_storage.py | 4 +- traces/stf_trace_archive/src/handlers/base.py | 2 +- traces/stf_trace_archive/src/handlers/get.py | 2 +- traces/stf_trace_archive/src/handlers/list.py | 2 +- .../stf_trace_archive/src/handlers/search.py | 8 --- .../stf_trace_archive/src/handlers/setup.py | 52 +++++++++---------- .../stf_trace_archive/src/handlers/upload.py | 2 +- traces/stf_trace_archive/src/requirements.txt | 2 + traces/stf_trace_archive/src/trace_share.py | 12 ++--- .../stf_trace_archive/src/utils/cli_parser.py | 9 ---- 14 files changed, 68 insertions(+), 85 deletions(-) rename traces/stf_trace_archive/src/{database_explorer => data}/database_explorer.py (63%) delete mode 100644 traces/stf_trace_archive/src/data/source_type_map.py create mode 100644 traces/stf_trace_archive/src/data/storage_type_map.py rename traces/stf_trace_archive/src/{database_explorer/sources => data/storages}/base.py (98%) rename traces/stf_trace_archive/src/{database_explorer/sources => data/storages}/local_storage.py (99%) delete mode 100644 traces/stf_trace_archive/src/handlers/search.py create mode 100644 traces/stf_trace_archive/src/requirements.txt diff --git a/traces/stf_trace_archive/src/database_explorer/database_explorer.py b/traces/stf_trace_archive/src/data/database_explorer.py similarity index 63% rename from traces/stf_trace_archive/src/database_explorer/database_explorer.py rename to traces/stf_trace_archive/src/data/database_explorer.py index e0110b3f..feb5f12c 100644 --- a/traces/stf_trace_archive/src/database_explorer/database_explorer.py +++ b/traces/stf_trace_archive/src/data/database_explorer.py @@ -3,16 +3,16 @@ import pandas as pd from data.trace_table_shema import TracesTableSchema from data.workload_table_shema import WorkloadsTableSchema -from database_explorer.sources.base import SourceHandler +from data.storages.base import StorageHandler from data.metadata import Metadata from data.trace_data import TraceData class DatabaseExplorer: - def __init__(self, storage: SourceHandler): - self.source = storage + def __init__(self, storage: StorageHandler): + self.storage = storage def get_workload_id(self, workload_sha256: str) -> Optional[int]: - workload_traces = self.source.traces_table[self.source.traces_table[TracesTableSchema.WORKLOAD_SHA] == workload_sha256] + workload_traces = self.storage.traces_table[self.storage.traces_table[TracesTableSchema.WORKLOAD_SHA] == workload_sha256] if len(workload_traces) == 0: return None @@ -20,16 +20,16 @@ def get_workload_id(self, workload_sha256: str) -> Optional[int]: def upload_workload(self, workload_path: str) -> int: workload_id = self._get_next_workload_id() - self.source.insert_workload(workload_path, workload_id) + self.storage.insert_workload(workload_path, workload_id) return workload_id def _get_next_workload_id(self) -> int: - workload_ids = self.source.traces_table[TracesTableSchema.WORKLOAD_ID].unique() + workload_ids = self.storage.traces_table[TracesTableSchema.WORKLOAD_ID].unique() max_workload_id = max(workload_ids, default=-1) return max_workload_id + 1 def get_workload_name(self, workload_id: int) -> Optional[str]: - workload_row = self.source.workloads_table[self.source.workloads_table[WorkloadsTableSchema.WORKLOAD_ID] == workload_id] + workload_row = self.storage.workloads_table[self.storage.workloads_table[WorkloadsTableSchema.WORKLOAD_ID] == workload_id] if len(workload_row) > 0: return workload_row.iloc[0][WorkloadsTableSchema.WORKLOAD_NAME] @@ -44,22 +44,22 @@ def get_next_trace_attempt(self, workload_id: int) -> int: return max_attempt + 1 def get_trace_attempts(self, workload_id: int) -> list[int]: - workload_traces = self.source.traces_table[self.source.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id] + workload_traces = self.storage.traces_table[self.storage.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id] return workload_traces[TracesTableSchema.TRACE_ATTEMPT].unique().tolist() def get_trace_parts(self, workload_id: int, trace_attempt: int) -> list[int]: - workload_traces = self.source.traces_table[ - (self.source.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id) & - (self.source.traces_table[TracesTableSchema.TRACE_ATTEMPT] == trace_attempt) + workload_traces = self.storage.traces_table[ + (self.storage.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id) & + (self.storage.traces_table[TracesTableSchema.TRACE_ATTEMPT] == trace_attempt) ] return workload_traces[TracesTableSchema.TRACE_PART].unique().tolist() def check_trace_exists(self, trace_id: str) -> bool: - return trace_id in self.source.traces_table[TracesTableSchema.TRACE_ID].values + return trace_id in self.storage.traces_table[TracesTableSchema.TRACE_ID].values def upload_traces(self, traces: list[TraceData]) -> None: - self.source.update_traces_table() + self.storage.update_traces_table() for trace in traces: print(f"Uploading trace: {trace.path} with id: {trace.id}") self._upload_trace(trace.path, trace.metadata) @@ -72,12 +72,12 @@ def _upload_trace(self, trace_path: str, metadata: Metadata) -> None: if self.check_trace_exists(trace_id): raise ValueError(f"Trace with ID {trace_id} already exists in the database.") - self.source.insert_trace(trace_path, metadata) + self.storage.insert_trace(trace_path, metadata) def is_fully_traced(self, workload_id: int, trace_attempt: int) -> Optional[bool]: - trace_row = self.source.traces_table[ - (self.source.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id) & - (self.source.traces_table[TracesTableSchema.TRACE_ATTEMPT] == trace_attempt) + trace_row = self.storage.traces_table[ + (self.storage.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id) & + (self.storage.traces_table[TracesTableSchema.TRACE_ATTEMPT] == trace_attempt) ] if len(trace_row) == 0: return None @@ -85,16 +85,16 @@ def is_fully_traced(self, workload_id: int, trace_attempt: int) -> Optional[bool return trace_row.iloc[0][TracesTableSchema.FULLY_TRACED] def get_trace_ids(self) -> list[str]: - return self.source.traces_table[TracesTableSchema.TRACE_ID].to_list() + return self.storage.traces_table[TracesTableSchema.TRACE_ID].to_list() def get_metadata(self, trace_id: str) -> Metadata: - return self.source.get_metadata(trace_id) + return self.storage.get_metadata(trace_id) def save_metadata(self, trace_id: str, path: str) -> None: - return self.source.save_metadata(trace_id, path) + return self.storage.save_metadata(trace_id, path) def save_trace(self, trace_id: str, path: str) -> None: - return self.source.save_trace(trace_id, path) + return self.storage.save_trace(trace_id, path) def save_workload(self, workload_id: int, path: str) -> None: - return self.source.save_workload(workload_id, path) \ No newline at end of file + return self.storage.save_workload(workload_id, path) \ No newline at end of file diff --git a/traces/stf_trace_archive/src/data/source_type_map.py b/traces/stf_trace_archive/src/data/source_type_map.py deleted file mode 100644 index 1bab714a..00000000 --- a/traces/stf_trace_archive/src/data/source_type_map.py +++ /dev/null @@ -1,6 +0,0 @@ -from typing import Type, Dict -from database_explorer.sources.local_storage import LocalStorageSource - -SOURCE_TYPE_MAP: Dict[str, Type] = { - "local-storage": LocalStorageSource, -} \ No newline at end of file diff --git a/traces/stf_trace_archive/src/data/storage_type_map.py b/traces/stf_trace_archive/src/data/storage_type_map.py new file mode 100644 index 00000000..eaf6a71f --- /dev/null +++ b/traces/stf_trace_archive/src/data/storage_type_map.py @@ -0,0 +1,6 @@ +from typing import Type, Dict +from data.storages.local_storage import LocalStorage + +STORAGE_TYPE_MAP: Dict[str, Type] = { + "local-storage": LocalStorage, +} \ No newline at end of file diff --git a/traces/stf_trace_archive/src/database_explorer/sources/base.py b/traces/stf_trace_archive/src/data/storages/base.py similarity index 98% rename from traces/stf_trace_archive/src/database_explorer/sources/base.py rename to traces/stf_trace_archive/src/data/storages/base.py index c8b700c8..7847984e 100644 --- a/traces/stf_trace_archive/src/database_explorer/sources/base.py +++ b/traces/stf_trace_archive/src/data/storages/base.py @@ -4,7 +4,7 @@ from data.output_path import OutputPaths from data.metadata import Metadata -class SourceHandler(ABC): +class StorageHandler(ABC): @property def traces_table(self): raise NotImplementedError("This method should be overridden by subclasses.") diff --git a/traces/stf_trace_archive/src/database_explorer/sources/local_storage.py b/traces/stf_trace_archive/src/data/storages/local_storage.py similarity index 99% rename from traces/stf_trace_archive/src/database_explorer/sources/local_storage.py rename to traces/stf_trace_archive/src/data/storages/local_storage.py index a68e1339..e596eed4 100644 --- a/traces/stf_trace_archive/src/database_explorer/sources/local_storage.py +++ b/traces/stf_trace_archive/src/data/storages/local_storage.py @@ -9,11 +9,11 @@ from data.config import LocalStorageConfig from data.consts import Const from data.metadata import Metadata -from .base import SourceHandler +from .base import StorageHandler from data.trace_table_shema import TracesTableSchema from utils.metadata_parser import MetadataParser -class LocalStorageSource(SourceHandler): +class LocalStorage(StorageHandler): def __init__(self, config: LocalStorageConfig): if not config.path: raise ValueError("Storage path cannot be empty.") diff --git a/traces/stf_trace_archive/src/handlers/base.py b/traces/stf_trace_archive/src/handlers/base.py index 9b27783f..38ced6bc 100644 --- a/traces/stf_trace_archive/src/handlers/base.py +++ b/traces/stf_trace_archive/src/handlers/base.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod import argparse -from database_explorer.database_explorer import DatabaseExplorer +from data.database_explorer import DatabaseExplorer class CommandHandler(ABC): @abstractmethod diff --git a/traces/stf_trace_archive/src/handlers/get.py b/traces/stf_trace_archive/src/handlers/get.py index 14319ef3..a8d4daa7 100644 --- a/traces/stf_trace_archive/src/handlers/get.py +++ b/traces/stf_trace_archive/src/handlers/get.py @@ -1,6 +1,6 @@ import argparse import os -from database_explorer.database_explorer import DatabaseExplorer +from data.database_explorer import DatabaseExplorer from data.output_path import OutputPaths from .base import CommandHandler diff --git a/traces/stf_trace_archive/src/handlers/list.py b/traces/stf_trace_archive/src/handlers/list.py index fa4415d7..ecb70eee 100644 --- a/traces/stf_trace_archive/src/handlers/list.py +++ b/traces/stf_trace_archive/src/handlers/list.py @@ -1,6 +1,6 @@ import argparse from .base import CommandHandler -from database_explorer.database_explorer import DatabaseExplorer +from data.database_explorer import DatabaseExplorer from utils.ui import print_medatata_details class ListHandler(CommandHandler): diff --git a/traces/stf_trace_archive/src/handlers/search.py b/traces/stf_trace_archive/src/handlers/search.py deleted file mode 100644 index 88adfff1..00000000 --- a/traces/stf_trace_archive/src/handlers/search.py +++ /dev/null @@ -1,8 +0,0 @@ -import argparse -from database_explorer.database_explorer import DatabaseExplorer -from .base import CommandHandler - -# TODO -class SearchHandler(CommandHandler): - def run(self, args: argparse.Namespace, database_explorer: DatabaseExplorer): - raise NotImplementedError("SearchHandler is not implemented yet.") \ No newline at end of file diff --git a/traces/stf_trace_archive/src/handlers/setup.py b/traces/stf_trace_archive/src/handlers/setup.py index ad058b86..9c30d59e 100644 --- a/traces/stf_trace_archive/src/handlers/setup.py +++ b/traces/stf_trace_archive/src/handlers/setup.py @@ -4,7 +4,7 @@ import yaml from dataclasses import asdict from data.config import Config, StorageConfig -from data.source_type_map import SOURCE_TYPE_MAP +from data.storage_type_map import STORAGE_TYPE_MAP from .base import CommandHandler class SetupHandler(CommandHandler): @@ -51,51 +51,51 @@ def _complete_config_file(self) -> None: def _add_storage_source(self) -> None: - source_types = list(SOURCE_TYPE_MAP.keys()) + storage_types = list(STORAGE_TYPE_MAP.keys()) print("Creating a new storage source.") - print(f"Registred source type options: {', '.join(source_types)}") - source_type = input("Select your source type: ").lower() + print(f"Registred storage type options: {', '.join(storage_types)}") + storage_type = input("Select your storage type: ").lower() - if source_type not in source_types: - raise ValueError(f"Unknown source type: {source_type}") + if storage_type not in storage_types: + raise ValueError(f"Unknown storage type: {storage_type}") - used_source_names = self._get_source_names() - source_name = input("Enter your source name: ").lower() + used_storage_names = self._get_storage_names() + storage_name = input("Enter your storage name: ").lower() - if source_name in used_source_names: - raise ValueError(f"Source name {source_name} already in use") + if storage_name in used_storage_names: + raise ValueError(f"Storage name {storage_name} already in use") - source_class = SOURCE_TYPE_MAP.get(source_type) - source_specific_config = source_class.setup() - source_config = StorageConfig(type = source_type, name=source_name, config=source_specific_config) + storage_class = STORAGE_TYPE_MAP.get(storage_type) + storage_specific_config = storage_class.setup() + storage_config = StorageConfig(type = storage_type, name=storage_name, config=storage_specific_config) if not self._config: - self._config = Config(storages=[source_config], default_storage=source_name) + self._config = Config(storages=[storage_config], default_storage=storage_name) elif not self._config.storages: - self._config.storages = [source_config] + self._config.storages = [storage_config] else: - self._config.storages.append(source_config) + self._config.storages.append(storage_config) if not self._config.default_storage: - self._config.default_storage = source_name + self._config.default_storage = storage_name - def _get_source_names(self) -> list[str]: + def _get_storage_names(self) -> list[str]: if not self._config: return [] return [storage.name for storage in self._config.storages] - def _set_default_storage(self, source_name: str = None) -> None: - used_source_names = self._get_source_names() + def _set_default_storage(self, storage_name: str = None) -> None: + used_storage_names = self._get_storage_names() - if not source_name: - print(f"Enter the default source name: ", end="") - source_name = input().lower() + if not storage_name: + print(f"Enter the default storage source name: ", end="") + storage_name = input().lower() - if source_name not in used_source_names: - raise ValueError(f"Source name {source_name} not found on configure storage source names") + if storage_name not in used_storage_names: + raise ValueError(f"Storage source name {storage_name} not found on configured names") - self._config.default_storage = source_name + self._config.default_storage = storage_name def _save_config(self) -> None: with open(self._config_path, 'w') as config_file: diff --git a/traces/stf_trace_archive/src/handlers/upload.py b/traces/stf_trace_archive/src/handlers/upload.py index f682b11e..2c27cd31 100644 --- a/traces/stf_trace_archive/src/handlers/upload.py +++ b/traces/stf_trace_archive/src/handlers/upload.py @@ -1,7 +1,7 @@ import argparse import os from tkinter.filedialog import FileDialog -from database_explorer.database_explorer import DatabaseExplorer +from data.database_explorer import DatabaseExplorer from data.consts import Const from .base import CommandHandler from utils.metadata_parser import MetadataParser diff --git a/traces/stf_trace_archive/src/requirements.txt b/traces/stf_trace_archive/src/requirements.txt new file mode 100644 index 00000000..265d2a0c --- /dev/null +++ b/traces/stf_trace_archive/src/requirements.txt @@ -0,0 +1,2 @@ +pandas==2.3.1 +PyYAML diff --git a/traces/stf_trace_archive/src/trace_share.py b/traces/stf_trace_archive/src/trace_share.py index 503609ca..584bd1ab 100644 --- a/traces/stf_trace_archive/src/trace_share.py +++ b/traces/stf_trace_archive/src/trace_share.py @@ -1,10 +1,9 @@ -from data.config import Config, StorageConfig -from data.source_type_map import SOURCE_TYPE_MAP -from database_explorer.database_explorer import DatabaseExplorer +from data.config import Config +from data.storage_type_map import STORAGE_TYPE_MAP +from data.database_explorer import DatabaseExplorer from utils.cli_parser import parseArgs from handlers.upload import UploadHandler from handlers.list import ListHandler -from handlers.search import SearchHandler from handlers.get import GetHandler from handlers.setup import SetupHandler @@ -13,7 +12,6 @@ def main(): setupHandler = SetupHandler() command_map = { 'upload': UploadHandler(), - 'search': SearchHandler(), 'list': ListHandler(), 'get': GetHandler(), 'setup': setupHandler, @@ -41,9 +39,9 @@ def get_storage(selected_storage: str, config: Config) -> DatabaseExplorer: if not storage_config: raise ValueError(f"Storage not found: {selected_storage}") - storage_class = SOURCE_TYPE_MAP.get(storage_config.type) + storage_class = STORAGE_TYPE_MAP.get(storage_config.type) if not storage_class: - raise ValueError(f"Unknown source storage class: {storage_class}") + raise ValueError(f"Unknown storage class: {storage_class}") storage = storage_class(storage_config.config) explorer = DatabaseExplorer(storage) diff --git a/traces/stf_trace_archive/src/utils/cli_parser.py b/traces/stf_trace_archive/src/utils/cli_parser.py index b1b4a893..d4f173f6 100644 --- a/traces/stf_trace_archive/src/utils/cli_parser.py +++ b/traces/stf_trace_archive/src/utils/cli_parser.py @@ -26,15 +26,6 @@ def parseArgs() -> argparse.Namespace: upload_parser.add_argument('--trace', action='append', help='Path to one or more trace files. If omitted, defaults to .zstf') upload_parser.add_argument('--it', action='store_true', help='Iteractive file selection mode.') - search_parser = subparsers.add_parser( - 'search', - help='Search traces by specified expression.', - description='Search for traces and metadata using a regular expression.', - formatter_class=argparse.RawTextHelpFormatter - ) - search_parser.add_argument('regex', nargs='?', help='Regex expression to search with.') - search_parser.add_argument('--names-only', action='store_true', help='Search only by trace name (ignore metadata).') - list_parser = subparsers.add_parser( 'list', help='List items by category.', From cbb530f7a6558ca19412bc1d3d9e03e6df09395d Mon Sep 17 00:00:00 2001 From: vinicius-r-silva Date: Thu, 21 Aug 2025 02:04:27 -0300 Subject: [PATCH 3/7] remove debug lines and improved readme --- traces/stf_trace_archive/README.md | 251 +++++++++++------- .../src/data/database_explorer.py | 1 + .../stf_trace_archive/src/data/trace_data.py | 12 +- .../stf_trace_archive/src/handlers/setup.py | 2 - .../stf_trace_archive/src/handlers/upload.py | 2 +- .../src/tests/utils/trace_generator.py | 6 - .../stf_trace_archive/src/utils/cli_parser.py | 7 +- 7 files changed, 171 insertions(+), 110 deletions(-) diff --git a/traces/stf_trace_archive/README.md b/traces/stf_trace_archive/README.md index 53f23c63..b69a4f1f 100644 --- a/traces/stf_trace_archive/README.md +++ b/traces/stf_trace_archive/README.md @@ -1,113 +1,182 @@ # Trace Archive Tool -A Python command-line interface (CLI) tool to manage shared trace files,such as uploding, searching and downloading traces. +## Table of Contents + +1. [Quickstart](#quickstart) +2. [Introduction](#introduction) +3. [Dependencies](#dependencies) +4. [Project Structure](#project-structure) +5. [Usage](#usage) + 1. [Initial Setup](#initial-setup) + 2. [Upload Command](#upload-command) + 3. [List Command](#list-command) + 4. [Get Command](#get-command) +6. [Examples](#examples) + 1. [Uploading a Trace](#uploading-a-trace) + 2. [Downloading a Trace](#downloading-a-trace) + 3. [Downloading a Workload](#downloading-a-workload) + 4. [Creating and using second storage source](#creating-and-using-second-storage-source) +7. [Trace ID](#trace-id) + 1. [Example Trace IDs](#example-trace-ids) +8. [Storage Folder Structure](#storage-folder-structure) + +## Quickstart -## Usage +```bash +# Install dependencies +pip install -r requirements.txt -Run the script using: +# Configure initial storage (local) +python trace_archive.py setup -```bash -python trace_archive.py [options] +# Upload a workload and trace +python trace_archive.py upload --workload ../../stf_metadata/example/dhrystone --trace ../../stf_metadata/example/dhrystone.zstf ``` -To view all available commands and options use `--help` or `-h`: +## Introduction -```bash -$ python trace_archive.py --help -Usage: python trace_archive.py COMMAND [OPTIONS] +A Python CLI tool for uploading, organizing, and sharing trace and workload files. +Currently supports local storage, with planned extensions for cloud sources (e.g., Google Drive). + +## Dependencies -CLI tool for Olympia traces exploration +To use the trace archive tool, ensure you have the following installed: -Commands: - connect Connect to the system or database. - upload Upload workload and trace. - search Search traces by specified expression. - list List items by category. - get Download a specified trace file. +- **Python 3** (recommended: Python 3.8 or newer) +- **Required Python packages**: Install dependencies with: + ```bash + pip install -r requirements.txt + ``` + The main requirements are: + - `pandas` + - `PyYAML` -Run 'trace_archive COMMAND --help' for more information on a command. +## Project Structure -For more help on how to use trace_archive, head to GITHUB_README_LINK +```text +src/ +├── data/ # Core data models and classes +│ └── storage/ # Storage backend implementations (local, cloud, etc.) +├── handlers/ # Command handlers (upload, get, list, setup) +├── utils/ # Utility functions and helpers +└── trace_archive.py # Main CLI entry point ``` ---- -## Available Commands +## Usage + +The tool can be used with the following commands: + +* **[Upload](#upload-command).** Upload workload and/or trace. +* **[List](#list-command).** List items by category. +* **[Get](#get-command).** Download a specified trace file. +* **[Setup](#setup-command).** Create or edit current tool configurations. -### `upload` -Uploads a trace file along with its associated workload and metadata. +### Initial Setup + +To set up the trace archive tool, run the `setup` command to configure the inital storage type and it's path. For example, to set up a local storage type, with the name `local` and path to the storage folder `/home/user/trace_archive`, run: ```bash -$ python trace_archive.py upload --help -Usage: python trace_archive.py upload [OPTIONS] +$ python trace_archive.py setup +Creating a new storage source. +Registred storage type options: local-storage +Select your storage type: local-storage +Enter your storage name: local +Enter the storage folder path: /home/user/trace_archive +``` + +All storage sources contains a type and a name. The type is used to identify the storage source, like `local-storage` or `google-drive`, while the name is used to identify the storage configuration in the tools commands. -Upload a workload, trace and metadata to the database +With the initial setup done, you can add new storage sources or change the default storage source using the `setup` command again, with the commands `--add-storage` and `--set-default-storage`, respectively. -Options: - --workload Path to the workload file. - --trace Path to the trace file. - --it Iteractive files selection mode. +```bash +$ python trace_archive.py setup --add-storage +$ python trace_archive.py setup --set-default-storage ``` -> Requires a metadata file located at `.metadata.yaml`. +All configurations are stored in the `config.yaml` file, which is created in the current working directory when the `setup` command is run for the first time. -> For every upload, a unqiue [trace id](#trace-id) will be generated +Checkout the [Creating and using second storage source](#creating-and-using-second-storage-source) section for more details on how to create and use a second storage source. ---- +### Upload Command -### `search` +The `upload` command is for upload a trace and it's workload. The a trace file, and if not presented in the storage yet, hte workload file. The tools also expects a metadata file, which is a YAML file with the name `.metadata.yaml`, where `` is the name of the trace file. Multiple traces can be uploaded at once, as long as they are from the same trace attempt. -Search can be used to search for the given regex term in the list of available traces and metadata matches +The `upload` command options are: -```bash -$ python trace_archive.py search --help -Usage: python trace_archive.py search [OPTIONS] [REGEX] +* `--workload`: Path to the workload file. +* `--trace`: Path to the trace file. +* `--it`: Interactive files selection mode. If this option is used, the tool will prompt the user to select the workload and trace files -Search for traces and metadata using a regular expression. +For every upload, a unique [`trace-id`](#trace-id) will be generated and filled into the metadata file. -Arguments: - REGEX Regex expression to search with. +### List Command -Options: - --names-only Search only by trace id (ignore metadata). -``` +The `list` command is used to list the available traces or workloads in the archive. ---- +### Get Command -### `list` +The `get` command is used to download a specified trace, workload or metadata file from the archive. The command options are: -```bash -$ python trace_archive.py list --help -Usage: python trace_archive.py list [OPTIONS] +* `--trace`: Id of the trace to download. +* `--workload`: Id of the workload to download. +* `--metadata`: Id of the metadata (same as the trace id) to download. +* `-o, --output`: Output file path. If not specified, the file will be downloaded to the current working directory. -List database traces or related entities. +## Examples -Options: - --traces Lists available traces (default) - --workloads Lists available workloads +Assuming the trace archive tool is set up with a local storage type named `local`, you can use the following commands: + +### Uploading a Trace + +To upload a trace file named `dhrystone.zstf` and its workload `dhrystone`, present in the metadata example folder, you can run: + +```bash +$ python trace_archive.py --storage-name local upload --workload ../../stf_metadata/example/dhrystone --trace ../../stf_metadata/example/dhrystone.zstf + +Uploading workload: ../../stf_metadata/example/dhrystone with id: 0 +Uploading trace: ../../stf_metadata/example/dhrystone.zstf with id: 0.0.0000_dhrystone ``` ---- +### Downloading a Trace + +To download the trace file `000.000.000_dhrystone.zstf` and its metadata, you can run: + +```bash +$ python trace_archive.py get --trace 000.000.000_dhrystone.zstf -### `get` +Trace 0.0.0000_dhrystone saved on ./0.0.0000_dhrystone.zstf +Metadata 0.0.0000_dhrystone saved on ./0.0.0000_dhrystone.zstf.metadata.yaml +``` + +### Downloading a Workload -Downloads a specified trace file. +To download the workload `0` (dhrystone), you can run: ```bash -$ python trace_archive.py get --help -Usage: python trace_archive.py get [OPTIONS] TRACE +$ python trace_share.py get --workload 0 -Download a specified trace file. +Workload 0 saved on ./dhrystone +``` -Arguments: - TRACE Name of the trace to download. +### Creating and using second storage source -Options: - --revision Revision number. If not specified, the latest revision is used. - --company Filter by associated company. - --author Filter by author. - -o, --output Output file path. +To create a second storage source you can run the `setup` command with the `--add-storage` option: + +```bash +$ python trace_archive.py setup --add-storage +Creating a new storage source. +Registred storage type options: local-storage +Select your storage type: local-storage +Enter your storage name: private-storage +Enter the storage folder path: ./private +``` + +This will create a new storage source named `private-storage` with the path `./private`. You can then use this storage source in the `upload` command by specifying the `--storage` option: + +```bash +$ python trace_archive.py --storage-name private-storage upload --workload ../../stf_metadata/example/dhrystone --trace ../../stf_metadata/example/dhrystone.zstf ``` ## Trace ID @@ -135,13 +204,13 @@ Where: | Upload # | Description | Trace ID | | -------- | --------------------------------------------------------- | ----------------------- | -| 1st | `dhrystone` compiled with `-O3`, fully traced | `000.000.000_dhrystone` | -| 2nd | `dhrystone` `-O3`, traced from instruction 0 to 1,000,000 | `000.001.000_dhrystone` | -| 3rd | `dhrystone` `-O3`, traced from 1,000,000 to 2,000,000 | `000.001.001_dhrystone` | -| 4th | `dhrystone` `-O3`, traced from 2,000,000 to 3,000,000 | `000.001.002_dhrystone` | -| 5th | Same trace as 1st (re-uploaded) | `000.002.000_dhrystone` | -| 6th | `dhrystone` compiled with `-O2`, fully traced | `001.000.000_dhrystone` | -| 7th | `embench` compiled with `-O3`, fully traced | `002.000.000_embench` | +| 1st | `dhrystone` compiled with `-O3`, fully traced | `0.0.0000_dhrystone` | +| 2nd | `dhrystone` `-O3`, traced from instruction 0 to 1,000,000 | `0.1.0000_dhrystone` | +| 3rd | `dhrystone` `-O3`, traced from 1,000,000 to 2,000,000 | `0.1.0001_dhrystone` | +| 4th | `dhrystone` `-O3`, traced from 2,000,000 to 3,000,000 | `0.1.0002_dhrystone` | +| 5th | Same trace as 1st (re-uploaded) | `0.2.0000_dhrystone` | +| 6th | `dhrystone` compiled with `-O2`, fully traced | `0.0.0000_dhrystone` | +| 7th | `embench` compiled with `-O3`, fully traced | `0.0.0000_embench` | --- @@ -152,33 +221,33 @@ For the trace archive structure, each workload is stored in its own folder, iden The tree graph below illustrates a setup of the [Trace Id Example](#example-trace-ids): ```text -000/ +0000_dhrystone/ ├── dhrystone ├── dhrystone.objdump ├── dhrystone.stdout -├── 000/ -│ ├── 000.000.000_dhrystone.zstf -│ └── 000.000.000_dhrystone.zstf.metadata.yaml -├── 001/ -│ ├── 000.001.000_dhrystone.zstf -│ ├── 000.001.000_dhrystone.zstf.metadata.yaml -│ ├── 000.001.001_dhrystone.zstf -│ ├── 000.001.001_dhrystone.zstf.metadata.yaml -│ ├── 000.001.002_dhrystone.zstf -│ └── 000.001.002_dhrystone.zstf.metadata.yaml -001/ +├── attempt_0000/ +│ ├── 0.0.0000_dhrystone.zstf +│ └── 0.0.0000_dhrystone.zstf.metadata.yaml +├── attempt_0001/ +│ ├── 0.1.0000_dhrystone.zstf +│ ├── 0.1.0000_dhrystone.zstf.metadata.yaml +│ ├── 0.1.0001_dhrystone.zstf +│ ├── 0.1.0001_dhrystone.zstf.metadata.yaml +│ ├── 0.1.0002_dhrystone.zstf +│ └── 0.1.0002_dhrystone.zstf.metadata.yaml +0001_dhrystone/ ├── dhrystone ├── dhrystone.objdump ├── dhrystone.stdout -└── 000/ - ├── 001.000.000_dhrystone.zstf - └── 001.000.000_dhrystone.zstf.metadata.yaml -002/ +└── attempt_0000/ + ├── 1.0.0000_dhrystone.zstf + └── 1.0.0000_dhrystone.zstf.metadata.yaml +0002_embench/ ├── embench.zip ├── embench.objdump ├── embench.stdout -└── 000/ - ├── 002.000.000_embench.zstf - └── 002.000.000_embench.zstf.metadata.yaml +└── attempt_0000/ + ├── 2.0.0000_embench.zstf + └── 2.0.0000_embench.zstf.metadata.yaml ``` diff --git a/traces/stf_trace_archive/src/data/database_explorer.py b/traces/stf_trace_archive/src/data/database_explorer.py index feb5f12c..3b76c3de 100644 --- a/traces/stf_trace_archive/src/data/database_explorer.py +++ b/traces/stf_trace_archive/src/data/database_explorer.py @@ -20,6 +20,7 @@ def get_workload_id(self, workload_sha256: str) -> Optional[int]: def upload_workload(self, workload_path: str) -> int: workload_id = self._get_next_workload_id() + print(f"Uploading workload: {workload_path} with id: {workload_id}") self.storage.insert_workload(workload_path, workload_id) return workload_id diff --git a/traces/stf_trace_archive/src/data/trace_data.py b/traces/stf_trace_archive/src/data/trace_data.py index a3e73f7b..848b3924 100644 --- a/traces/stf_trace_archive/src/data/trace_data.py +++ b/traces/stf_trace_archive/src/data/trace_data.py @@ -3,9 +3,9 @@ @dataclass class TraceData(): - path: Optional[str] - id: Optional[str] - attempt: Optional[str] - part: Optional[str] - metadata_path: Optional[str] - metadata: Optional[str] \ No newline at end of file + path: Optional[str] = None + id: Optional[str] = None + attempt: Optional[str] = None + part: Optional[str] = None + metadata_path: Optional[str] = None + metadata: Optional[str] = None \ No newline at end of file diff --git a/traces/stf_trace_archive/src/handlers/setup.py b/traces/stf_trace_archive/src/handlers/setup.py index 9c30d59e..f0805e37 100644 --- a/traces/stf_trace_archive/src/handlers/setup.py +++ b/traces/stf_trace_archive/src/handlers/setup.py @@ -35,8 +35,6 @@ def _read_config_file(self) -> None: with open(self._config_path, 'r') as config_file: config_dict = yaml.safe_load(config_file) self._config = Config.from_dict(config_dict) - print("config") - print(self._config) def _complete_config_file(self) -> None: if not self._config or not self._config.storages: diff --git a/traces/stf_trace_archive/src/handlers/upload.py b/traces/stf_trace_archive/src/handlers/upload.py index 2c27cd31..490a0ba2 100644 --- a/traces/stf_trace_archive/src/handlers/upload.py +++ b/traces/stf_trace_archive/src/handlers/upload.py @@ -41,7 +41,7 @@ def _get_arg_traces(self, args) -> list[TraceData]: raise FileNotFoundError(f"Metadata file not found: {metadata_path}") metadata = MetadataParser.parse_metadata_from_path(metadata_path) - traces.append(TraceData(trace_path=trace_path, metadata_path=metadata_path, metadata=metadata)) + traces.append(TraceData(path=trace_path, metadata_path=metadata_path, metadata=metadata)) if len(traces) <= 0: raise ValueError("No traces provided.") diff --git a/traces/stf_trace_archive/src/tests/utils/trace_generator.py b/traces/stf_trace_archive/src/tests/utils/trace_generator.py index f8871a2e..b3ed8f77 100644 --- a/traces/stf_trace_archive/src/tests/utils/trace_generator.py +++ b/traces/stf_trace_archive/src/tests/utils/trace_generator.py @@ -99,23 +99,17 @@ def delete_test_traces(self): self._delete_folder_and_files(self.test_storage_path) def delete_test_storage(self, type, path): - # print(f"Deleting {path}") if type == "local-storage": self._delete_folder_and_files(path) def _delete_folder_and_files(self, path): - # print(f"\n\nDeleting {path}") - # print(f"exist: {os.path.exists(path)}") if not os.path.exists(path): return for root, dirs, files in os.walk(path, topdown=False): for name in files: - # print(f"Deleting file {os.path.join(root, name)}") os.remove(os.path.join(root, name)) for name in dirs: - # print(f"Deleting folder {os.path.join(root, name)}") os.rmdir(os.path.join(root, name)) - # print(f"Deleting folder {path}\n\n") os.rmdir(path) diff --git a/traces/stf_trace_archive/src/utils/cli_parser.py b/traces/stf_trace_archive/src/utils/cli_parser.py index d4f173f6..fa8bf13a 100644 --- a/traces/stf_trace_archive/src/utils/cli_parser.py +++ b/traces/stf_trace_archive/src/utils/cli_parser.py @@ -34,7 +34,6 @@ def parseArgs() -> argparse.Namespace: ) group = list_parser.add_mutually_exclusive_group() group.add_argument('--traces', action='store_true', help='Lists available traces (default)') - group.add_argument('--companies', action='store_true', help='Lists associated companies') get_parser = subparsers.add_parser( 'get', @@ -44,9 +43,9 @@ def parseArgs() -> argparse.Namespace: ) group = get_parser.add_mutually_exclusive_group(required=True) - group.add_argument('--trace', help='Id of the trace to download.') - group.add_argument('--workload', help='Id of the workload to download.') - group.add_argument('--metadata', help='Id of the metadata to download.') + group.add_argument('--trace', help='Id of the trace to download.', metavar='TRACE_ID') + group.add_argument('--metadata', help='Id of the metadata (same as trace) to download.', metavar='TRACE_ID') + group.add_argument('--workload', help='Id of the workload to download.', metavar='WORKLOAD_ID') get_parser.add_argument('-o', '--output', help='Output folder or file path') setup_parser = subparsers.add_parser( From b97823077f3f2d307258752d223ad40b30b9bf94 Mon Sep 17 00:00:00 2001 From: vinicius-r-silva Date: Thu, 21 Aug 2025 02:05:39 -0300 Subject: [PATCH 4/7] removed incomplete tests --- .../src/tests/upload_tests.py | 146 ------------------ .../src/tests/utils/trace_generator.py | 115 -------------- 2 files changed, 261 deletions(-) delete mode 100644 traces/stf_trace_archive/src/tests/upload_tests.py delete mode 100644 traces/stf_trace_archive/src/tests/utils/trace_generator.py diff --git a/traces/stf_trace_archive/src/tests/upload_tests.py b/traces/stf_trace_archive/src/tests/upload_tests.py deleted file mode 100644 index 4806b175..00000000 --- a/traces/stf_trace_archive/src/tests/upload_tests.py +++ /dev/null @@ -1,146 +0,0 @@ -from contextlib import ExitStack -import io -import os -import re -import unittest - -from unittest.mock import patch -from tests.utils.trace_generator import TraceDataGenerator -from data.consts import Const -from trace_share import main -import sys - -class TraceTestInput(): - def __init__(self, workload_path, trace_paths): - self.workload_path = workload_path - self.trace_paths = trace_paths - -class TestUpload(unittest.TestCase): - def setUp(self): - self.generator = TraceDataGenerator() - self.workload1_path = self.generator.generate_worload(0) - self.workload1_name = self.workload1_path.split('/')[-1] - self.storage_type = "local-storage" - self.storage_path = "./tests/storage_test" - - self.workload1_full_trace_path = self.generator.generate_trace(workload_id=self.workload1_path, trace_attempt=0, trace_part=None) - self.workload1_trace_part_1_path = self.generator.generate_trace(workload_id=self.workload1_path, trace_attempt=1, trace_part=0) - self.workload1_trace_part_2_path = self.generator.generate_trace(workload_id=self.workload1_path, trace_attempt=1, trace_part=1) - self.default_args = [ - "trace_share", - "--source-type", - self.storage_type, - "--source-path", - self.storage_path, - "upload", - ] - - def tearDown(self): - self.generator.delete_test_traces() - self.generator.delete_test_storage(self.storage_type, self.storage_path) - pass - - def launch_test(self, workload, traces, inputs = None): - args = [ - *self.default_args, - ] - - if workload: - args.append("--workload") - args.append(workload) - - if traces: - if isinstance(traces, str): - traces = [traces] - for trace in traces: - args.append("--trace") - args.append(trace) - - captured_output = io.StringIO() - captured_stderr = io.StringIO() - try: - with ExitStack() as stack: - stack.enter_context(patch.object(sys, 'argv', args)) - stack.enter_context(patch("sys.stdout", new=captured_output)) - stack.enter_context(patch("sys.stderr", new=captured_stderr)) - if inputs is not None: - stack.enter_context(patch('builtins.input', side_effect = inputs)) - - main() - except Exception as e: - # print("stdout") - # print(captured_output.getvalue()) - # print("stderr") - # print(captured_stderr.getvalue()) - # print("e") - # print(e) - # traceback.print_exc() - return captured_output.getvalue(), captured_stderr.getvalue(), e - - # print("stdout") - # print(captured_output.getvalue()) - return captured_output.getvalue(), None, None - - def trace_exists_assert(self, trace_id, workload_name): - workload_id, attempt_id = trace_id.split(".")[0:2] - workload_folder = f"{workload_id.zfill(Const.PAD_LENGHT)}_{workload_name}" - attempt_folder = f"attempt_{attempt_id.zfill(Const.PAD_LENGHT)}" - trace_path = os.path.join(self.storage_path, workload_folder, attempt_folder, f"{trace_id}.zstf") - metadata_path = f"{trace_path}.metadata.yaml" - - self.assertTrue(os.path.exists(trace_path)) - self.assertTrue(os.path.exists(metadata_path)) - - def get_trace_ids_from_output(self, output): - if not output: - return [] - - pattern = r"(?<=\s)\d+\.\d+\.\d+_\S+" - return re.findall(pattern, output) - - def test_upload_full_trace(self): - print(f"\ntest_upload_full_trace") - - stdout, stderr, error = self.launch_test(self.workload1_path, self.workload1_full_trace_path) - trace_ids = self.get_trace_ids_from_output(stdout) - expected_trace_id = f"0.0.0000_{self.workload1_name}" - - self.assertIsNone(error) - self.assertIsNone(stderr) - self.assertEqual(len(trace_ids), 1) - self.assertEqual(trace_ids[0], expected_trace_id) - self.trace_exists_assert(expected_trace_id, self.workload1_name) - - def test_upload_partial_traces(self): - print(f"\n\ntest_upload_partial_trace") - inputs = ["0", "1"] - stdout, stderr, error = self.launch_test(self.workload1_path, [self.workload1_trace_part_1_path, self.workload1_trace_part_2_path], inputs) - trace_ids = self.get_trace_ids_from_output(stdout) - expected_trace_ids = [f"0.0.0000_{self.workload1_name}", f"0.0.0001_{self.workload1_name}"] - - self.assertIsNone(error) - self.assertIsNone(stderr) - self.assertEqual(len(trace_ids), len(expected_trace_ids)) - for i in range(0, len(trace_ids)): - self.assertEqual(trace_ids[i], expected_trace_ids[i], self.workload1_name) - self.trace_exists_assert(expected_trace_ids[i], self.workload1_name) - - def test_upload_two_attempts(self): - print(f"\n\ntest_upload_two_attempts") - stdout1, stderr1, error1 = self.launch_test(self.workload1_path, self.workload1_full_trace_path) - - inputs = ["y", "0", "1"] - stdout2, stderr2, error2 = self.launch_test(self.workload1_path, [self.workload1_trace_part_1_path, self.workload1_trace_part_2_path], inputs) - - trace_ids = self.get_trace_ids_from_output(stdout1) - trace_ids.extend(self.get_trace_ids_from_output(stdout2)) - expected_trace_ids = [f"0.0.0000_{self.workload1_name}", f"0.1.0000_{self.workload1_name}", f"0.1.0001_{self.workload1_name}"] - - self.assertIsNone(error1) - self.assertIsNone(error2) - self.assertIsNone(stderr1) - self.assertIsNone(stderr2) - self.assertEqual(len(trace_ids), len(expected_trace_ids)) - for i in range(0, len(trace_ids)): - self.assertEqual(trace_ids[i], expected_trace_ids[i], self.workload1_name) - self.trace_exists_assert(expected_trace_ids[i], self.workload1_name) diff --git a/traces/stf_trace_archive/src/tests/utils/trace_generator.py b/traces/stf_trace_archive/src/tests/utils/trace_generator.py deleted file mode 100644 index b3ed8f77..00000000 --- a/traces/stf_trace_archive/src/tests/utils/trace_generator.py +++ /dev/null @@ -1,115 +0,0 @@ -from datetime import datetime -import hashlib -import os -import yaml - -class TraceDataGenerator(): - def __init__(self): - self.test_storage_path = "./tests/traces_test" - os.makedirs(self.test_storage_path, exist_ok=True) - - def generate_worload(self, workload_id = None): - if workload_id is None: - workload_files = os.listdir(self.test_storage_path) - workload_ids = [f.split('_')[1] for f in workload_files if f.startswith('workload_')] - workload_id = int(len(workload_ids) + 1) - - workload_file_content = f"Workload ID: {workload_id}\n" - workload_path = f"{self.test_storage_path}/workload_{workload_id}" - - with open(f"{workload_path}", 'w') as workload_file: - workload_file.write(workload_file_content) - - return workload_path - - def generate_trace(self, workload_id, trace_attempt, trace_part = None): - trace_attempt_path = f"{self.test_storage_path}/trace_attempt_{trace_attempt}" - os.makedirs(trace_attempt_path, exist_ok=True) - - if trace_part is None: - trace_path = f"{trace_attempt_path}/0.zstf" - metadata_path = f"{trace_attempt_path}/0.zstf.metadata.yaml" - else: - trace_path = f"{trace_attempt_path}/{trace_part}.zstf" - metadata_path = f"{trace_attempt_path}/{trace_part}.zstf.metadata.yaml" - - trace_file_content = f"Trace attempt: {trace_attempt}, trace part: {trace_part}\n" - with open(trace_path, 'w') as trace_file: - trace_file.write(trace_file_content) - - trace_metadata_content = self.generate_metadata(workload_id, trace_part) - with open(metadata_path, 'w') as metadata_file: - yaml.dump(trace_metadata_content, metadata_file) - - return trace_path - - def generate_metadata(self, workload_id, trace_part): - workload_sha256 = self.get_workload_sha256(workload_id) - - interval = None - if trace_part is not None: - interval = { - 'instruction_pc': 100 * trace_part, - 'pc_count': trace_part, - 'interval_lenght': trace_part * 100, - 'start_instruction_index': 100 * trace_part, - 'end_instruction_index': 100 * (trace_part + 1) - } - - metadata = { - 'description': None, - 'author': { - 'name': 'Jane Doe', - 'company': 'RISCV', - 'email': 'jane.doe@riscv.org' - }, - 'workload': { - 'filename': f"{workload_id}", - 'SHA256': workload_sha256, - 'execution_command': f"./{workload_id}", - 'elf_sections': { - 'comment': "Test", - 'riscv.attributes': "Test", - 'GCC.command.line': "Test" - }, - }, - 'stf': { - 'timestamp': datetime.now().isoformat(), - 'stf_trace_info': { - 'VERSION': "Test", - 'GENERATOR': "Test", - 'GEN_VERSION': "Test", - 'GEN_COMMENT': "Test", - 'STF_FEATURES': [] - }, - 'trace_interval': interval - } - } - - return metadata - - def get_workload_sha256(self, workload_path): - hash_sha256 = hashlib.sha256() - with open(workload_path, "rb") as f: - for chunk in iter(lambda: f.read(4096), b""): - hash_sha256.update(chunk) - return hash_sha256.hexdigest() - - def delete_test_traces(self): - self._delete_folder_and_files(self.test_storage_path) - - def delete_test_storage(self, type, path): - if type == "local-storage": - self._delete_folder_and_files(path) - - def _delete_folder_and_files(self, path): - if not os.path.exists(path): - return - - for root, dirs, files in os.walk(path, topdown=False): - for name in files: - os.remove(os.path.join(root, name)) - for name in dirs: - os.rmdir(os.path.join(root, name)) - - os.rmdir(path) From f2d591ddef982538342ec1612258d18cb1c8c1cc Mon Sep 17 00:00:00 2001 From: vinicius-r-silva Date: Thu, 21 Aug 2025 02:07:44 -0300 Subject: [PATCH 5/7] renamed to trace archive --- traces/stf_trace_archive/src/{trace_share.py => trace_archive.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename traces/stf_trace_archive/src/{trace_share.py => trace_archive.py} (100%) diff --git a/traces/stf_trace_archive/src/trace_share.py b/traces/stf_trace_archive/src/trace_archive.py similarity index 100% rename from traces/stf_trace_archive/src/trace_share.py rename to traces/stf_trace_archive/src/trace_archive.py From 746cb94214f700120649ca3747ed02eea85d4c90 Mon Sep 17 00:00:00 2001 From: vinicius-r-silva Date: Thu, 21 Aug 2025 02:10:22 -0300 Subject: [PATCH 6/7] renamed trace share to trace archive in readme --- traces/stf_trace_archive/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/traces/stf_trace_archive/README.md b/traces/stf_trace_archive/README.md index b69a4f1f..e892ea49 100644 --- a/traces/stf_trace_archive/README.md +++ b/traces/stf_trace_archive/README.md @@ -155,7 +155,7 @@ Metadata 0.0.0000_dhrystone saved on ./0.0.0000_dhrystone.zstf.metadata.yaml To download the workload `0` (dhrystone), you can run: ```bash -$ python trace_share.py get --workload 0 +$ python trace_archive.py get --workload 0 Workload 0 saved on ./dhrystone ``` From 2122f41ec2ab5d8b0cd9776374785b14be20c682 Mon Sep 17 00:00:00 2001 From: vinicius-r-silva Date: Sat, 23 Aug 2025 23:07:23 -0300 Subject: [PATCH 7/7] pep8 formating --- traces/stf_trace_archive/src/data/config.py | 10 +-- traces/stf_trace_archive/src/data/consts.py | 3 +- .../src/data/database_explorer.py | 62 ++++++++-------- traces/stf_trace_archive/src/data/metadata.py | 7 +- .../stf_trace_archive/src/data/output_path.py | 3 +- .../src/data/storage_type_map.py | 2 +- .../src/data/storages/base.py | 8 +-- .../src/data/storages/local_storage.py | 71 +++++++++++-------- .../stf_trace_archive/src/data/trace_data.py | 3 +- .../src/data/trace_table_shema.py | 1 + .../src/data/workload_table_shema.py | 1 + traces/stf_trace_archive/src/handlers/base.py | 3 +- traces/stf_trace_archive/src/handlers/get.py | 17 ++--- traces/stf_trace_archive/src/handlers/list.py | 10 ++- .../stf_trace_archive/src/handlers/setup.py | 17 ++--- .../stf_trace_archive/src/handlers/upload.py | 63 ++++++++-------- traces/stf_trace_archive/src/trace_archive.py | 9 ++- .../stf_trace_archive/src/utils/cli_parser.py | 5 +- .../src/utils/dict_to_obj.py | 2 +- .../src/utils/fields_validator.py | 13 ++-- .../src/utils/file_dialog.py | 8 ++- .../src/utils/metadata_parser.py | 23 ++++-- traces/stf_trace_archive/src/utils/sha256.py | 3 +- traces/stf_trace_archive/src/utils/ui.py | 14 ++-- 24 files changed, 204 insertions(+), 154 deletions(-) diff --git a/traces/stf_trace_archive/src/data/config.py b/traces/stf_trace_archive/src/data/config.py index 1d0b4411..e0f2ff23 100644 --- a/traces/stf_trace_archive/src/data/config.py +++ b/traces/stf_trace_archive/src/data/config.py @@ -1,14 +1,17 @@ from dataclasses import dataclass from typing import Dict, Optional, Type, Union, List + @dataclass class LocalStorageConfig: path: str + CONFIG_TYPE_MAP: Dict[str, Type] = { "local-storage": LocalStorageConfig, } + @dataclass class StorageConfig: type: str @@ -20,11 +23,12 @@ def from_dict(data: dict): specific_config_type = data['type'] if specific_config_type not in CONFIG_TYPE_MAP: raise ValueError(f"Unknown storage type: {specific_config_type}") - + specific_config_class = CONFIG_TYPE_MAP.get(specific_config_type) specific_config = specific_config_class(**data['config']) return StorageConfig(type=data['type'], name=data['name'], config=specific_config) + @dataclass class Config: storages: List[StorageConfig] @@ -34,11 +38,9 @@ class Config: def from_dict(data: dict): if not data: return None - + storages = [] if 'storages' in data: storages = [StorageConfig.from_dict(s) for s in data['storages']] return Config(storages=storages, default_storage=data.get('default_storage')) - - \ No newline at end of file diff --git a/traces/stf_trace_archive/src/data/consts.py b/traces/stf_trace_archive/src/data/consts.py index dc1b6ebc..cd441215 100644 --- a/traces/stf_trace_archive/src/data/consts.py +++ b/traces/stf_trace_archive/src/data/consts.py @@ -1,5 +1,6 @@ from dataclasses import dataclass + @dataclass(frozen=True) class Const(): - PAD_LENGHT = 4 \ No newline at end of file + PAD_LENGHT = 4 diff --git a/traces/stf_trace_archive/src/data/database_explorer.py b/traces/stf_trace_archive/src/data/database_explorer.py index 3b76c3de..4a5b8691 100644 --- a/traces/stf_trace_archive/src/data/database_explorer.py +++ b/traces/stf_trace_archive/src/data/database_explorer.py @@ -1,101 +1,107 @@ from typing import Optional -import pandas as pd from data.trace_table_shema import TracesTableSchema from data.workload_table_shema import WorkloadsTableSchema from data.storages.base import StorageHandler from data.metadata import Metadata from data.trace_data import TraceData + class DatabaseExplorer: - def __init__(self, storage: StorageHandler): + def __init__(self, storage: StorageHandler): self.storage = storage - + def get_workload_id(self, workload_sha256: str) -> Optional[int]: - workload_traces = self.storage.traces_table[self.storage.traces_table[TracesTableSchema.WORKLOAD_SHA] == workload_sha256] + workload_traces = self.storage.traces_table[self.storage.traces_table[ + TracesTableSchema.WORKLOAD_SHA] == workload_sha256] if len(workload_traces) == 0: return None - + return workload_traces.iloc[0][TracesTableSchema.WORKLOAD_ID] def upload_workload(self, workload_path: str) -> int: workload_id = self._get_next_workload_id() - print(f"Uploading workload: {workload_path} with id: {workload_id}") + print(f"Uploading workload: {workload_path} with id: {workload_id}") self.storage.insert_workload(workload_path, workload_id) return workload_id def _get_next_workload_id(self) -> int: - workload_ids = self.storage.traces_table[TracesTableSchema.WORKLOAD_ID].unique() + workload_ids = self.storage.traces_table[TracesTableSchema.WORKLOAD_ID].unique( + ) max_workload_id = max(workload_ids, default=-1) return max_workload_id + 1 - + def get_workload_name(self, workload_id: int) -> Optional[str]: - workload_row = self.storage.workloads_table[self.storage.workloads_table[WorkloadsTableSchema.WORKLOAD_ID] == workload_id] + workload_row = self.storage.workloads_table[ + self.storage.workloads_table[WorkloadsTableSchema.WORKLOAD_ID] == workload_id] if len(workload_row) > 0: return workload_row.iloc[0][WorkloadsTableSchema.WORKLOAD_NAME] return None - + def get_next_trace_attempt(self, workload_id: int) -> int: trace_attemps = self.get_trace_attempts(workload_id) if not trace_attemps: return 0 - + max_attempt = max(trace_attemps) return max_attempt + 1 - + def get_trace_attempts(self, workload_id: int) -> list[int]: - workload_traces = self.storage.traces_table[self.storage.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id] + workload_traces = self.storage.traces_table[ + self.storage.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id] return workload_traces[TracesTableSchema.TRACE_ATTEMPT].unique().tolist() - + def get_trace_parts(self, workload_id: int, trace_attempt: int) -> list[int]: workload_traces = self.storage.traces_table[ (self.storage.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id) & - (self.storage.traces_table[TracesTableSchema.TRACE_ATTEMPT] == trace_attempt) + (self.storage.traces_table[TracesTableSchema.TRACE_ATTEMPT] + == trace_attempt) ] - + return workload_traces[TracesTableSchema.TRACE_PART].unique().tolist() - + def check_trace_exists(self, trace_id: str) -> bool: return trace_id in self.storage.traces_table[TracesTableSchema.TRACE_ID].values def upload_traces(self, traces: list[TraceData]) -> None: self.storage.update_traces_table() - for trace in traces: - print(f"Uploading trace: {trace.path} with id: {trace.id}") + for trace in traces: + print(f"Uploading trace: {trace.path} with id: {trace.id}") self._upload_trace(trace.path, trace.metadata) def _upload_trace(self, trace_path: str, metadata: Metadata) -> None: trace_id = metadata.get('trace_id') if not trace_id: raise ValueError("Trace ID is required in metadata to upload a trace.") - + if self.check_trace_exists(trace_id): raise ValueError(f"Trace with ID {trace_id} already exists in the database.") - + self.storage.insert_trace(trace_path, metadata) def is_fully_traced(self, workload_id: int, trace_attempt: int) -> Optional[bool]: trace_row = self.storage.traces_table[ (self.storage.traces_table[TracesTableSchema.WORKLOAD_ID] == workload_id) & - (self.storage.traces_table[TracesTableSchema.TRACE_ATTEMPT] == trace_attempt) + (self.storage.traces_table[TracesTableSchema.TRACE_ATTEMPT] + == trace_attempt) ] if len(trace_row) == 0: return None - + return trace_row.iloc[0][TracesTableSchema.FULLY_TRACED] def get_trace_ids(self) -> list[str]: return self.storage.traces_table[TracesTableSchema.TRACE_ID].to_list() - + def get_metadata(self, trace_id: str) -> Metadata: return self.storage.get_metadata(trace_id) - + def save_metadata(self, trace_id: str, path: str) -> None: return self.storage.save_metadata(trace_id, path) - + def save_trace(self, trace_id: str, path: str) -> None: return self.storage.save_trace(trace_id, path) - + def save_workload(self, workload_id: int, path: str) -> None: - return self.storage.save_workload(workload_id, path) \ No newline at end of file + return self.storage.save_workload(workload_id, path) diff --git a/traces/stf_trace_archive/src/data/metadata.py b/traces/stf_trace_archive/src/data/metadata.py index dc88967d..27acfa1f 100644 --- a/traces/stf_trace_archive/src/data/metadata.py +++ b/traces/stf_trace_archive/src/data/metadata.py @@ -1,12 +1,14 @@ from dataclasses import dataclass from typing import Optional, Dict + @dataclass class Author: name: Optional[str] company: Optional[str] email: str + @dataclass class Workload: filename: str @@ -14,6 +16,7 @@ class Workload: execution_command: str elf_sections: Dict[str, str] + @dataclass class TraceInterval: instruction_pc: int @@ -23,15 +26,17 @@ class TraceInterval: start_instruction_index: int end_instruction_index: int + @dataclass class Stf: timestamp: str stf_trace_info: Dict[str, str] trace_interval: Optional[TraceInterval] + @dataclass class Metadata: description: Optional[str] author: Author workload: Workload - stf: Stf \ No newline at end of file + stf: Stf diff --git a/traces/stf_trace_archive/src/data/output_path.py b/traces/stf_trace_archive/src/data/output_path.py index 2ea784c1..3148f0ba 100644 --- a/traces/stf_trace_archive/src/data/output_path.py +++ b/traces/stf_trace_archive/src/data/output_path.py @@ -1,7 +1,8 @@ from dataclasses import dataclass from typing import Optional + @dataclass class OutputPaths(): folder_path: str - filename: Optional[str] \ No newline at end of file + filename: Optional[str] diff --git a/traces/stf_trace_archive/src/data/storage_type_map.py b/traces/stf_trace_archive/src/data/storage_type_map.py index eaf6a71f..787cd173 100644 --- a/traces/stf_trace_archive/src/data/storage_type_map.py +++ b/traces/stf_trace_archive/src/data/storage_type_map.py @@ -3,4 +3,4 @@ STORAGE_TYPE_MAP: Dict[str, Type] = { "local-storage": LocalStorage, -} \ No newline at end of file +} diff --git a/traces/stf_trace_archive/src/data/storages/base.py b/traces/stf_trace_archive/src/data/storages/base.py index 7847984e..587e7110 100644 --- a/traces/stf_trace_archive/src/data/storages/base.py +++ b/traces/stf_trace_archive/src/data/storages/base.py @@ -4,19 +4,20 @@ from data.output_path import OutputPaths from data.metadata import Metadata + class StorageHandler(ABC): @property def traces_table(self): raise NotImplementedError("This method should be overridden by subclasses.") - + @property def workloads_table(self): raise NotImplementedError("This method should be overridden by subclasses.") - + @abstractmethod def update_traces_table(self) -> pd.DataFrame: raise NotImplementedError("This method should be overridden by subclasses.") - + @abstractmethod def update_workloads_table(self) -> pd.DataFrame: raise NotImplementedError("This method should be overridden by subclasses.") @@ -48,4 +49,3 @@ def save_workload(self, workload_id: int, path: OutputPaths) -> None: @staticmethod def setup(): raise NotImplementedError("This method should be overridden by subclasses.") - \ No newline at end of file diff --git a/traces/stf_trace_archive/src/data/storages/local_storage.py b/traces/stf_trace_archive/src/data/storages/local_storage.py index e596eed4..7e81c781 100644 --- a/traces/stf_trace_archive/src/data/storages/local_storage.py +++ b/traces/stf_trace_archive/src/data/storages/local_storage.py @@ -13,6 +13,7 @@ from data.trace_table_shema import TracesTableSchema from utils.metadata_parser import MetadataParser + class LocalStorage(StorageHandler): def __init__(self, config: LocalStorageConfig): if not config.path: @@ -29,23 +30,24 @@ def __init__(self, config: LocalStorageConfig): def traces_table(self): if self._traces_table is None: return self.update_traces_table() - + return self._traces_table @property def workloads_table(self): if self._workloads_table is None: return self.update_workloads_table() - + return self._workloads_table - + def update_traces_table(self) -> pd.DataFrame: df = pd.DataFrame(columns=TracesTableSchema.get_columns()) if not os.path.exists(self.storage_path): os.mkdir(self.storage_path) - + self.update_workloads_table() - workload_ids = self.workloads_table[WorkloadsTableSchema.WORKLOAD_ID].to_list() + workload_ids = self.workloads_table[WorkloadsTableSchema.WORKLOAD_ID].to_list( + ) for workload_id in workload_ids: workload_folder = self._get_workload_folder(workload_id) workload_path = os.path.join(self.storage_path, workload_folder) @@ -57,17 +59,21 @@ def update_traces_table(self) -> pd.DataFrame: continue trace_files = os.listdir(trace_attempt_path) - metadata_files = [filename for filename in trace_files if filename.endswith(self.metadata_suffix)] - trace_ids = [metadata_file[0:-len(self.metadata_suffix) - 1] for metadata_file in metadata_files] + metadata_files = [ + filename for filename in trace_files if filename.endswith(self.metadata_suffix)] + trace_ids = [ + metadata_file[0:-len(self.metadata_suffix) - 1] for metadata_file in metadata_files] trace_attemps_id = int(trace_attempt.split('_', 1)[1]) if len(trace_ids) == 0: continue sample_metadata = self.get_metadata(trace_ids[0]) - sample_trace_interval = sample_metadata.get('stf', {}).get('trace_interval', None) + sample_trace_interval = sample_metadata.get( + 'stf', {}).get('trace_interval', None) fully_trace = True if sample_trace_interval is None else False - workload_sha = sample_metadata.get('workload', {}).get('SHA256', None) + workload_sha = sample_metadata.get( + 'workload', {}).get('SHA256', None) for trace_id in trace_ids: trace_part = trace_id.split('.')[1] @@ -81,15 +87,15 @@ def update_traces_table(self) -> pd.DataFrame: TracesTableSchema.WORKLOAD_NAME: workload_name, TracesTableSchema.FULLY_TRACED: fully_trace }])]) - + self._traces_table = df return df - + def update_workloads_table(self) -> pd.DataFrame: df = pd.DataFrame(columns=WorkloadsTableSchema.get_columns()) if not os.path.exists(self.storage_path): os.mkdir(self.storage_path) - + workload_folders = os.listdir(self.storage_path) for workload_folder in workload_folders: workload_id, workload_name = workload_folder.split('_', 1) @@ -106,7 +112,7 @@ def insert_workload(self, workload_path: str, workload_id: int) -> None: workload_folder = self._get_workload_folder(workload_id, workload_name) storage_path = os.path.join(self.storage_path, workload_folder) os.makedirs(storage_path, exist_ok=False) - shutil.copy(workload_path, storage_path) + shutil.copy(workload_path, storage_path) self._workloads_table = pd.concat([self.workloads_table, pd.DataFrame([{ WorkloadsTableSchema.WORKLOAD_ID: int(workload_id), @@ -122,9 +128,12 @@ def insert_trace(self, trace_path: str, metadata: Metadata) -> None: workload_folder = self._get_workload_folder(workload_id) attempt_folder = self._get_attempt_folder(trace_attempt) - storage_path = os.path.join(self.storage_path, workload_folder, attempt_folder) - trace_storage_path = os.path.join(storage_path, f"{trace_id}.{self.trace_suffix}") - metadata_storage_path = os.path.join(storage_path, f"{trace_id}.{self.metadata_suffix}") + storage_path = os.path.join( + self.storage_path, workload_folder, attempt_folder) + trace_storage_path = os.path.join( + storage_path, f"{trace_id}.{self.trace_suffix}") + metadata_storage_path = os.path.join( + storage_path, f"{trace_id}.{self.metadata_suffix}") os.makedirs(storage_path, exist_ok=True) shutil.copy(trace_path, trace_storage_path) @@ -134,14 +143,15 @@ def insert_trace(self, trace_path: str, metadata: Metadata) -> None: def get_metadata(self, trace_id: str) -> Metadata: if trace_id in self.metadata_cache: return self.metadata_cache[trace_id] - + workload_id, trace_attempt = trace_id.split('.')[:2] workload_folder = self._get_workload_folder(workload_id) attempt_folder = self._get_attempt_folder(trace_attempt) - metadata_path = os.path.join(self.storage_path, workload_folder, attempt_folder, f"{trace_id}.{self.metadata_suffix}") + metadata_path = os.path.join( + self.storage_path, workload_folder, attempt_folder, f"{trace_id}.{self.metadata_suffix}") if not os.path.exists(metadata_path): raise FileNotFoundError(f"Metadata file not found: {trace_id}") - + metadata = MetadataParser.parse_metadata_from_path(metadata_path) self.metadata_cache[trace_id] = metadata return metadata @@ -161,7 +171,8 @@ def save_trace(self, trace_id: str, path: str) -> None: workload_folder = self._get_workload_folder(workload_id) attempt_folder = self._get_attempt_folder(trace_attempt) trace_filename = f"{trace_id}.{self.trace_suffix}" - trace_path = os.path.join(self.storage_path, workload_folder, attempt_folder, trace_filename) + trace_path = os.path.join( + self.storage_path, workload_folder, attempt_folder, trace_filename) dst_filename = path.filename if path.filename else trace_filename dst_path = os.path.join(path.folder_path, dst_filename) @@ -169,10 +180,11 @@ def save_trace(self, trace_id: str, path: str) -> None: print(f"Trace {trace_id} saved on {os.path.abspath(dst_path)}") def save_workload(self, workload_id: int, path: str) -> None: - workload_folder = os.path.join(self.storage_path, self._get_workload_folder(workload_id)) + workload_folder = os.path.join( + self.storage_path, self._get_workload_folder(workload_id)) if not os.path.exists(workload_folder): raise FileNotFoundError(f"Workload not found: {workload_id}") - + workload_path_list = os.listdir(workload_folder) workload_filename = None workload_file_path = None @@ -183,28 +195,29 @@ def save_workload(self, workload_id: int, path: str) -> None: if workload_file_path is not None: raise NotImplementedError("Multiple workload files found.") - + workload_filename = workload_file workload_file_path = full_path - + dst_filename = path.filename if path.filename else workload_filename dst_path = os.path.join(path.folder_path, dst_filename) shutil.copy(workload_file_path, dst_path) print(f"Workload {workload_id} saved on {os.path.abspath(dst_path)}") - + def _get_workload_folder(self, workload_id: Union[str, int], workload_name: Optional[str] = None) -> str: if isinstance(workload_id, str): workload_id = int(workload_id) if not workload_name: - workload_name = self.workloads_table[self.workloads_table[WorkloadsTableSchema.WORKLOAD_ID] == workload_id][WorkloadsTableSchema.WORKLOAD_NAME].item() + workload_name = self.workloads_table[self.workloads_table[WorkloadsTableSchema.WORKLOAD_ID] + == workload_id][WorkloadsTableSchema.WORKLOAD_NAME].item() workload_folder = f"{str(workload_id).zfill(Const.PAD_LENGHT)}_{workload_name}" return workload_folder - + def _get_attempt_folder(self, attempt_id: Union[str, int]) -> str: return f"attempt_{str(attempt_id).zfill(Const.PAD_LENGHT)}" - + @staticmethod def setup() -> LocalStorageConfig: readline.set_completer_delims(' \t\n=') @@ -213,4 +226,4 @@ def setup() -> LocalStorageConfig: readline.parse_and_bind("tab: self-insert") path = os.path.abspath(path) - return LocalStorageConfig(path=path) \ No newline at end of file + return LocalStorageConfig(path=path) diff --git a/traces/stf_trace_archive/src/data/trace_data.py b/traces/stf_trace_archive/src/data/trace_data.py index 848b3924..c6478997 100644 --- a/traces/stf_trace_archive/src/data/trace_data.py +++ b/traces/stf_trace_archive/src/data/trace_data.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from typing import Optional + @dataclass class TraceData(): path: Optional[str] = None @@ -8,4 +9,4 @@ class TraceData(): attempt: Optional[str] = None part: Optional[str] = None metadata_path: Optional[str] = None - metadata: Optional[str] = None \ No newline at end of file + metadata: Optional[str] = None diff --git a/traces/stf_trace_archive/src/data/trace_table_shema.py b/traces/stf_trace_archive/src/data/trace_table_shema.py index 7e918c57..5c0a7c2f 100644 --- a/traces/stf_trace_archive/src/data/trace_table_shema.py +++ b/traces/stf_trace_archive/src/data/trace_table_shema.py @@ -1,5 +1,6 @@ from dataclasses import dataclass + @dataclass(frozen=True) class TracesTableSchema: TRACE_ID: str = "trace_id" diff --git a/traces/stf_trace_archive/src/data/workload_table_shema.py b/traces/stf_trace_archive/src/data/workload_table_shema.py index bce2be34..b784ffa6 100644 --- a/traces/stf_trace_archive/src/data/workload_table_shema.py +++ b/traces/stf_trace_archive/src/data/workload_table_shema.py @@ -1,5 +1,6 @@ from dataclasses import dataclass + @dataclass(frozen=True) class WorkloadsTableSchema: WORKLOAD_ID: str = "workload_id" diff --git a/traces/stf_trace_archive/src/handlers/base.py b/traces/stf_trace_archive/src/handlers/base.py index 38ced6bc..9cbb973b 100644 --- a/traces/stf_trace_archive/src/handlers/base.py +++ b/traces/stf_trace_archive/src/handlers/base.py @@ -3,7 +3,8 @@ from data.database_explorer import DatabaseExplorer + class CommandHandler(ABC): @abstractmethod def run(self, args: argparse.Namespace, database_explorer: DatabaseExplorer) -> None: - raise NotImplementedError("This method should be overridden by subclasses.") \ No newline at end of file + raise NotImplementedError("This method should be overridden by subclasses.") diff --git a/traces/stf_trace_archive/src/handlers/get.py b/traces/stf_trace_archive/src/handlers/get.py index a8d4daa7..1ac0725e 100644 --- a/traces/stf_trace_archive/src/handlers/get.py +++ b/traces/stf_trace_archive/src/handlers/get.py @@ -4,7 +4,8 @@ from data.output_path import OutputPaths from .base import CommandHandler -class GetHandler(CommandHandler): + +class GetHandler(CommandHandler): def run(self, args: argparse.Namespace, database_explorer: DatabaseExplorer) -> None: self.explorer = database_explorer output_path: OutputPaths = self._get_output_path(args.output) @@ -19,24 +20,18 @@ def run(self, args: argparse.Namespace, database_explorer: DatabaseExplorer) -> else: raise ValueError("Invalid arguments: expected one of --trace, --workload, or --metadata") - + def _get_output_path(self, output_arg: str) -> OutputPaths: if output_arg is None: return OutputPaths(folder_path="./", filename=None) - + folder: str = os.path.dirname(output_arg) filename: str = os.path.basename(output_arg) return OutputPaths(folder_path=folder, filename=filename) - + def _save_trace(self, trace_id: str, output_path: str) -> None: self.explorer.save_trace(trace_id, output_path) if output_path.filename: output_path.filename += ".metadata.yaml" - - self.explorer.save_metadata(trace_id, output_path) - - - - - \ No newline at end of file + self.explorer.save_metadata(trace_id, output_path) diff --git a/traces/stf_trace_archive/src/handlers/list.py b/traces/stf_trace_archive/src/handlers/list.py index ecb70eee..850853c7 100644 --- a/traces/stf_trace_archive/src/handlers/list.py +++ b/traces/stf_trace_archive/src/handlers/list.py @@ -3,8 +3,13 @@ from data.database_explorer import DatabaseExplorer from utils.ui import print_medatata_details + class ListHandler(CommandHandler): - def run(self, args: argparse.Namespace, database_explorer: DatabaseExplorer) -> None: + def run( + self, + args: argparse.Namespace, + database_explorer: DatabaseExplorer + ) -> None: self.explorer = database_explorer match vars(args): case _: @@ -15,9 +20,8 @@ def _list_traces(self) -> None: if not trace_ids: print("No traces found.") return - + for trace_id in sorted(trace_ids): metadata = self.explorer.get_metadata(trace_id) print_medatata_details(metadata) print("") - diff --git a/traces/stf_trace_archive/src/handlers/setup.py b/traces/stf_trace_archive/src/handlers/setup.py index f0805e37..23afb77e 100644 --- a/traces/stf_trace_archive/src/handlers/setup.py +++ b/traces/stf_trace_archive/src/handlers/setup.py @@ -7,6 +7,7 @@ from data.storage_type_map import STORAGE_TYPE_MAP from .base import CommandHandler + class SetupHandler(CommandHandler): def __init__(self): config_folder = pathlib.Path(__file__).parent.parent.resolve() @@ -47,7 +48,6 @@ def _complete_config_file(self) -> None: self._set_default_storage() self._save_config() - def _add_storage_source(self) -> None: storage_types = list(STORAGE_TYPE_MAP.keys()) print("Creating a new storage source.") @@ -65,10 +65,12 @@ def _add_storage_source(self) -> None: storage_class = STORAGE_TYPE_MAP.get(storage_type) storage_specific_config = storage_class.setup() - storage_config = StorageConfig(type = storage_type, name=storage_name, config=storage_specific_config) + storage_config = StorageConfig( + type=storage_type, name=storage_name, config=storage_specific_config) if not self._config: - self._config = Config(storages=[storage_config], default_storage=storage_name) + self._config = Config( + storages=[storage_config], default_storage=storage_name) elif not self._config.storages: self._config.storages = [storage_config] else: @@ -80,15 +82,14 @@ def _add_storage_source(self) -> None: def _get_storage_names(self) -> list[str]: if not self._config: return [] - + return [storage.name for storage in self._config.storages] - + def _set_default_storage(self, storage_name: str = None) -> None: used_storage_names = self._get_storage_names() if not storage_name: - print(f"Enter the default storage source name: ", end="") - storage_name = input().lower() + storage_name = input("Enter the default storage source name: ").lower() if storage_name not in used_storage_names: raise ValueError(f"Storage source name {storage_name} not found on configured names") @@ -97,4 +98,4 @@ def _set_default_storage(self, storage_name: str = None) -> None: def _save_config(self) -> None: with open(self._config_path, 'w') as config_file: - yaml.safe_dump(asdict(self._config), config_file) \ No newline at end of file + yaml.safe_dump(asdict(self._config), config_file) diff --git a/traces/stf_trace_archive/src/handlers/upload.py b/traces/stf_trace_archive/src/handlers/upload.py index 490a0ba2..cc039554 100644 --- a/traces/stf_trace_archive/src/handlers/upload.py +++ b/traces/stf_trace_archive/src/handlers/upload.py @@ -5,11 +5,11 @@ from data.consts import Const from .base import CommandHandler from utils.metadata_parser import MetadataParser -from utils.file_dialog import FileDialog from utils.sha256 import compute_sha256 from utils.ui import print_metadata_interval from data.trace_data import TraceData + class UploadHandler(CommandHandler): _metadata_file_suffix = ".metadata.yaml" @@ -22,33 +22,33 @@ def run(self, args: argparse.Namespace, database_explorer: DatabaseExplorer): self._setup_trace_parts(traces, workload_id) self._setup_trace_ids(traces, workload_id) self.explorer.upload_traces(traces) - + def _get_arg_traces(self, args) -> list[TraceData]: trace_paths = args.trace - if(trace_paths and isinstance(trace_paths, str)): + if (trace_paths and isinstance(trace_paths, str)): trace_paths = [trace_paths] - - if args.it: - if not args.trace: - trace_paths = FileDialog.select_traces() - args.trace = trace_paths + + if args.it and not trace_paths: + trace_paths = FileDialog.select_traces() + args.trace = trace_paths traces = [] for trace_path in trace_paths: metadata_path = trace_path + self._metadata_file_suffix if not os.path.exists(metadata_path): raise FileNotFoundError(f"Metadata file not found: {metadata_path}") - + metadata = MetadataParser.parse_metadata_from_path(metadata_path) - traces.append(TraceData(path=trace_path, metadata_path=metadata_path, metadata=metadata)) + traces.append(TraceData(path=trace_path, + metadata_path=metadata_path, metadata=metadata)) if len(traces) <= 0: raise ValueError("No traces provided.") - + return traces - - def _validate_traces(self, traces): + + def _validate_traces(self, traces): workload_sha256 = traces[0].metadata['workload']['SHA256'] for trace in traces: if trace.metadata['workload']['SHA256'] != workload_sha256: @@ -57,7 +57,7 @@ def _validate_traces(self, traces): if len(traces) > 1: for trace in traces: if MetadataParser.is_fully_traced(trace.metadata): - raise ValueError("Multiple fully traced traces provided. Please provide only one fully traced trace or multiple partial traces.") + raise ValueError("Multiple fully traced traces provided. Please provide only one fully traced trace or multiple partial traces.") def _setup_trace_attempt(self, traces, workload_id): trace_attempt = self._get_trace_attempt(traces, workload_id) @@ -65,10 +65,11 @@ def _setup_trace_attempt(self, traces, workload_id): trace.attempt = trace_attempt def _get_trace_attempt(self, traces, workload_id): - fully_traced = len(traces) == 1 and MetadataParser.is_fully_traced(traces[0].metadata) + fully_traced = len(traces) == 1 and MetadataParser.is_fully_traced( + traces[0].metadata) if fully_traced: return self.explorer.get_next_trace_attempt(workload_id) - + used_trace_attempts = self.explorer.get_trace_attempts(workload_id) if len(used_trace_attempts) == 0: return 0 @@ -77,16 +78,16 @@ def _get_trace_attempt(self, traces, workload_id): answer = input().lower() if answer != 'yes' and answer != 'y' and answer != 'no' and answer != 'n': raise ValueError("Invalid response. Please answer 'yes' or 'no'.") - + if answer == 'yes' or answer == 'y': return self.explorer.get_next_trace_attempt(workload_id) - + print(f"Existing trace attempts: {used_trace_attempts}") print("Please provide the trace attempt number to upload to: ", end="") trace_attempt = input().strip() if not trace_attempt.isdigit() or int(trace_attempt) not in used_trace_attempts: raise ValueError(f"Trace attempt value must be a number between {min(used_trace_attempts)} and {max(used_trace_attempts)}") - + trace_attempt = int(trace_attempt) if self.explorer.is_fully_traced(workload_id, trace_attempt): raise ValueError(f"Trace attempt {trace_attempt} for workload ID {workload_id} is fully traced. Cannot upload more traces to a fully traced attempt.") @@ -98,9 +99,9 @@ def _setup_trace_parts(self, traces, workload_id): for trace in traces: trace.part = 0 return traces - + print("Partial traces detected. Please specify the part number for this trace segment") - traces = sorted(traces, key = lambda trace: trace.metadata['stf']['trace_interval']['start_instruction_index']) + traces = sorted(traces, key=lambda trace: trace.metadata['stf']['trace_interval']['start_instruction_index']) trace_attempt = traces[0].attempt used_parts = self.explorer.get_trace_parts(workload_id, trace_attempt) @@ -120,7 +121,7 @@ def _setup_trace_parts(self, traces, workload_id): if part_number in used_parts: raise ValueError(f"Part number {part_number} already used. Please use unique part numbers for each trace segment.") - + used_parts.append(part_number) trace.part = part_number last_part_number = part_number @@ -134,35 +135,35 @@ def _get_workload_id(self, traces, args): if workload_id is not None: print("Workload already exists in trace archive, skipping workload upload.") return workload_id - + if not args.it and not args.workload: raise ValueError("Workload not found on trace archive. Provide ither --it or --workload options to specify the workload binary.") - + workload_path = args.workload if args.it and not args.workload: workload_path = FileDialog.select_workload() if not workload_path or not os.path.exists(workload_path): raise FileNotFoundError(f"Workload file not found: {workload_path}") - + workload_file_sha256 = compute_sha256(workload_path) if workload_file_sha256 != workload_sha256: raise ValueError("Workload file SHA256 does not match the one in metadata.") - + workload_id = self.explorer.upload_workload(workload_path) - return workload_id + return workload_id def _setup_trace_ids(self, traces, workload_id): for trace in traces: workload_name = self.explorer.get_workload_name(workload_id) - + if not workload_name: raise ValueError(f"Workload with ID {workload_id} not found in the database.") - + trace_part = str(trace.part).zfill(Const.PAD_LENGHT) trace_id = f"{workload_id}.{trace.attempt}.{trace_part}_{workload_name}" trace.id = trace_id trace.metadata['trace_id'] = trace_id trace.metadata['workload']['filename'] = workload_name - - return traces \ No newline at end of file + + return traces diff --git a/traces/stf_trace_archive/src/trace_archive.py b/traces/stf_trace_archive/src/trace_archive.py index 584bd1ab..81e4a47b 100644 --- a/traces/stf_trace_archive/src/trace_archive.py +++ b/traces/stf_trace_archive/src/trace_archive.py @@ -7,6 +7,7 @@ from handlers.get import GetHandler from handlers.setup import SetupHandler + def main(): args = parseArgs() setupHandler = SetupHandler() @@ -26,6 +27,7 @@ def main(): else: print(f"Unknown command: {args.command}") + def get_storage(selected_storage: str, config: Config) -> DatabaseExplorer: if not selected_storage: selected_storage = config.default_storage @@ -38,14 +40,15 @@ def get_storage(selected_storage: str, config: Config) -> DatabaseExplorer: if not storage_config: raise ValueError(f"Storage not found: {selected_storage}") - + storage_class = STORAGE_TYPE_MAP.get(storage_config.type) if not storage_class: raise ValueError(f"Unknown storage class: {storage_class}") - + storage = storage_class(storage_config.config) explorer = DatabaseExplorer(storage) return explorer + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/traces/stf_trace_archive/src/utils/cli_parser.py b/traces/stf_trace_archive/src/utils/cli_parser.py index fa8bf13a..bddffe8e 100644 --- a/traces/stf_trace_archive/src/utils/cli_parser.py +++ b/traces/stf_trace_archive/src/utils/cli_parser.py @@ -1,6 +1,7 @@ import argparse import sys + def parseArgs() -> argparse.Namespace: parser = argparse.ArgumentParser( prog='trace_share', @@ -41,7 +42,7 @@ def parseArgs() -> argparse.Namespace: description='Download a specified trace or workload file.', formatter_class=argparse.RawTextHelpFormatter ) - + group = get_parser.add_mutually_exclusive_group(required=True) group.add_argument('--trace', help='Id of the trace to download.', metavar='TRACE_ID') group.add_argument('--metadata', help='Id of the metadata (same as trace) to download.', metavar='TRACE_ID') @@ -71,4 +72,4 @@ def parseArgs() -> argparse.Namespace: print("\nError: At least one of --workload, --trace, or --it must be provided.") exit(1) - return args \ No newline at end of file + return args diff --git a/traces/stf_trace_archive/src/utils/dict_to_obj.py b/traces/stf_trace_archive/src/utils/dict_to_obj.py index 8ca39476..68eb65d8 100644 --- a/traces/stf_trace_archive/src/utils/dict_to_obj.py +++ b/traces/stf_trace_archive/src/utils/dict_to_obj.py @@ -7,4 +7,4 @@ def dict_to_obj(d: dict): elif isinstance(d, list): return [dict_to_obj(x) for x in d] else: - return d \ No newline at end of file + return d diff --git a/traces/stf_trace_archive/src/utils/fields_validator.py b/traces/stf_trace_archive/src/utils/fields_validator.py index ad6fb09e..4fc64857 100644 --- a/traces/stf_trace_archive/src/utils/fields_validator.py +++ b/traces/stf_trace_archive/src/utils/fields_validator.py @@ -7,7 +7,6 @@ def validate(object: any, required_fields: list[str] = None, dependent_fields: l if dependent_fields: FieldsValidator.validate_dependent_fields(object, dependent_fields) - @staticmethod def validate_required_fields(object: any, required_fields: list[str]) -> None: if isinstance(required_fields, list): @@ -15,12 +14,12 @@ def validate_required_fields(object: any, required_fields: list[str]) -> None: if field not in object: raise KeyError(f"Missing required field: {field}") return - + for field, sub_fields in required_fields.items(): if field not in object or object.get(field) is None: raise KeyError(f"Missing required field: {field}") - FieldsValidator.validate_required_fields(object.get(field), sub_fields) - + FieldsValidator.validate_required_fields( + object.get(field), sub_fields) @staticmethod def validate_dependent_fields(object: any, dependent_fields: list[str]) -> None: @@ -32,8 +31,8 @@ def validate_dependent_fields(object: any, dependent_fields: list[str]) -> None: if fields_count < len(dependent_fields): raise ValueError(f"Object is incomplete: fields {dependent_fields} are interdependent and must either all be present or all be omitted.") return - + for field, sub_fields in dependent_fields.items(): if field in object and object.get(field) is not None: - FieldsValidator.validate_dependent_fields(object.get(field), sub_fields) - \ No newline at end of file + FieldsValidator.validate_dependent_fields( + object.get(field), sub_fields) diff --git a/traces/stf_trace_archive/src/utils/file_dialog.py b/traces/stf_trace_archive/src/utils/file_dialog.py index 19329da5..0e9593a3 100644 --- a/traces/stf_trace_archive/src/utils/file_dialog.py +++ b/traces/stf_trace_archive/src/utils/file_dialog.py @@ -1,13 +1,15 @@ from tkinter import filedialog + class FileDialog(): @staticmethod def select_workload() -> str: file_path = filedialog.askopenfilename(title="Select Workload") - return file_path + return file_path @staticmethod def select_traces() -> list[str]: - file_paths = filedialog.askopenfilenames(title="Select Traces", filetypes=[("ZSTF", ".zstf")]) - return file_paths \ No newline at end of file + file_paths = filedialog.askopenfilenames( + title="Select Traces", filetypes=[("ZSTF", ".zstf")]) + return file_paths diff --git a/traces/stf_trace_archive/src/utils/metadata_parser.py b/traces/stf_trace_archive/src/utils/metadata_parser.py index bfc70deb..efac8121 100644 --- a/traces/stf_trace_archive/src/utils/metadata_parser.py +++ b/traces/stf_trace_archive/src/utils/metadata_parser.py @@ -3,32 +3,41 @@ from data.metadata import Metadata from utils.fields_validator import FieldsValidator + class MetadataParser: @staticmethod def parse_metadata(metadata_file) -> Metadata: data = yaml.safe_load(metadata_file) MetadataParser.validate_metadata(data) return data - + @staticmethod def parse_metadata_from_path(metadata_path: str) -> Metadata: with open(metadata_path, 'r') as metadata_file: return MetadataParser.parse_metadata(metadata_file) - + @staticmethod def validate_metadata(metadata: Metadata) -> None: if not metadata: raise ValueError("Metadata is empty or invalid.") - - required_keys = {'author': ["name", "company", "email"], + + required_keys = {'author': ["name", "company", "email"], 'workload': ['filename', 'SHA256', 'execution_command', 'elf_sections'], 'stf': ['timestamp', 'stf_trace_info']} dependent_keys = { - 'stf': {'trace_interval': ['instruction_pc', 'pc_count', 'interval_lenght', 'start_instruction_index', 'end_instruction_index']} + 'stf': { + 'trace_interval': [ + 'instruction_pc', + 'pc_count', + 'interval_lenght', + 'start_instruction_index', + 'end_instruction_index' + ] + } } - + FieldsValidator.validate(metadata, required_keys, dependent_keys) @staticmethod def is_fully_traced(metadata: Metadata) -> bool: - return not metadata.get('stf', {}).get('trace_interval', None) \ No newline at end of file + return not metadata.get('stf', {}).get('trace_interval', None) diff --git a/traces/stf_trace_archive/src/utils/sha256.py b/traces/stf_trace_archive/src/utils/sha256.py index 200e6556..342c6028 100644 --- a/traces/stf_trace_archive/src/utils/sha256.py +++ b/traces/stf_trace_archive/src/utils/sha256.py @@ -1,8 +1,9 @@ import hashlib + def compute_sha256(file_path: str) -> str: hash_sha256 = hashlib.sha256() with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_sha256.update(chunk) - return hash_sha256.hexdigest() \ No newline at end of file + return hash_sha256.hexdigest() diff --git a/traces/stf_trace_archive/src/utils/ui.py b/traces/stf_trace_archive/src/utils/ui.py index 55986aac..de37d063 100644 --- a/traces/stf_trace_archive/src/utils/ui.py +++ b/traces/stf_trace_archive/src/utils/ui.py @@ -1,5 +1,6 @@ from data.metadata import Metadata + def print_medatata_details(metadata: Metadata) -> None: print(f"id: {metadata['trace_id']}") if "description" in metadata and metadata['description']: @@ -11,24 +12,25 @@ def print_medatata_details(metadata: Metadata) -> None: print("\n---------------------------------") + def print_metadata_interval(metadata: Metadata) -> None: if "trace_interval" not in metadata['stf'] or metadata['stf']["trace_interval"] is None: print("Fully trace") return - + trace_interval = metadata['stf']['trace_interval'] - print(f"Trace Interval:") + print("Trace Interval:") if trace_interval['instruction_pc'] is not None: print(f" Instruction PC: {trace_interval['instruction_pc']}") if trace_interval['pc_count'] is not None: print(f" PC Count: {trace_interval['pc_count']}") - + if trace_interval['interval_lenght'] is not None: print(f" Interval Length: {trace_interval['interval_lenght']}") - + if trace_interval['start_instruction_index'] is not None: print(f" Start Instruction Index: {trace_interval['start_instruction_index']}") - + if trace_interval['end_instruction_index'] is not None: - print(f" End Instruction Index: {trace_interval['end_instruction_index']}") \ No newline at end of file + print(f" End Instruction Index: {trace_interval['end_instruction_index']}")