From d303a8edab941cddb9c5b387fc89fc3dbad25453 Mon Sep 17 00:00:00 2001 From: vinicius-r-silva Date: Thu, 4 Sep 2025 22:22:52 -0300 Subject: [PATCH] trace generator tool --- traces/docker_stf_trace_gen/Dockerfile | 56 +++--- traces/docker_stf_trace_gen/README.md | 7 +- traces/docker_stf_trace_gen/build_workload.py | 38 ++-- .../docker_stf_trace_gen/converters/base.py | 8 + .../converters/host_to_docker_path.py | 11 ++ traces/docker_stf_trace_gen/data/consts.py | 10 ++ traces/docker_stf_trace_gen/data/metadata.py | 46 +++++ .../factories/metadata_factory.py | 107 +++++++++++ traces/docker_stf_trace_gen/full_flow.py | 93 +++++----- traces/docker_stf_trace_gen/generate_trace.md | 166 ++++++++++++++++++ traces/docker_stf_trace_gen/generate_trace.py | 115 ++++++++++++ traces/docker_stf_trace_gen/run_simpoint.py | 30 ++-- traces/docker_stf_trace_gen/run_workload.py | 26 +-- traces/docker_stf_trace_gen/utils/config.py | 6 +- .../utils/docker_orchestrator.py | 67 +++++++ .../utils/trace_generator_arg_parser.py | 72 ++++++++ traces/docker_stf_trace_gen/utils/util.py | 125 +++++++------ traces/stf_metadata/README.md | 7 +- 18 files changed, 810 insertions(+), 180 deletions(-) create mode 100644 traces/docker_stf_trace_gen/converters/base.py create mode 100644 traces/docker_stf_trace_gen/converters/host_to_docker_path.py create mode 100644 traces/docker_stf_trace_gen/data/consts.py create mode 100644 traces/docker_stf_trace_gen/data/metadata.py create mode 100644 traces/docker_stf_trace_gen/factories/metadata_factory.py create mode 100644 traces/docker_stf_trace_gen/generate_trace.md create mode 100755 traces/docker_stf_trace_gen/generate_trace.py create mode 100644 traces/docker_stf_trace_gen/utils/docker_orchestrator.py create mode 100644 traces/docker_stf_trace_gen/utils/trace_generator_arg_parser.py diff --git a/traces/docker_stf_trace_gen/Dockerfile b/traces/docker_stf_trace_gen/Dockerfile index 22ec0418..478dfb45 100644 --- a/traces/docker_stf_trace_gen/Dockerfile +++ b/traces/docker_stf_trace_gen/Dockerfile @@ -1,16 +1,17 @@ FROM ubuntu:24.04 # Set environment variables early -ENV RISCV=/riscv \ - QEMU_PLUGINS=/qemu/build/contrib/plugins \ - PATH=$RISCV/bin:/opt/riscv/riscv32-elf/bin:/opt/riscv/riscv64-elf/bin:/opt/riscv/riscv32-glibc/bin:/SimPoint/bin:/qemu/build:$PATH \ - DEBIAN_FRONTEND=noninteractive \ - WORKDIR=/workspace \ - WORKLOADS=/workloads \ - OUTPUT=/output +ENV RISCV=/riscv +ENV QEMU_DIR=/qemu +ENV QEMU_PLUGINS=/qemu/build/contrib/plugins +ENV PATH=$RISCV/bin:/opt/riscv/riscv32-elf/bin:/opt/riscv/riscv64-elf/bin:/opt/riscv/riscv32-glibc/bin:/SimPoint/bin:/qemu/build:$PATH +ENV DEBIAN_FRONTEND=noninteractive +ENV WORKDIR=/workspace +ENV WORKLOADS=/workloads +ENV OUTPUT=/output # Install dependencies and clean up in one layer -RUN apt-get update && apt-get install -y \ +RUN apt update && apt install -y \ autoconf \ automake \ autotools-dev \ @@ -53,8 +54,7 @@ RUN apt-get update && apt-get install -y \ wget \ zlib1g-dev \ zstd \ - && rm -rf /var/lib/apt/lists/* \ - && apt-get clean + python3-yaml # Configure git for building RUN git config --global url."https://github.com/".insteadOf "git@github.com:" && \ @@ -62,13 +62,14 @@ RUN git config --global url."https://github.com/".insteadOf "git@github.com:" && git config --global user.name "Docker Builder" # Create directory structure -RUN mkdir -p /workloads /output /workspace $RISCV +RUN mkdir -p /output # Clone repositories in RISCV directory WORKDIR $RISCV -RUN git clone https://github.com/condorcomputing/condor.riscv-isa-sim.git --recurse-submodules && \ - git clone https://github.com/sparcians/stf_tools && \ - git clone https://github.com/riscv-software-src/riscv-pk.git +RUN git clone https://github.com/condorcomputing/condor.riscv-isa-sim.git --recurse-submodules +RUN git clone https://github.com/sparcians/stf_tools +RUN git clone https://github.com/riscv-software-src/riscv-pk.git +RUN git clone https://gitlab.com/ribeiro.v.silva/trace-gen.git # Clone QEMU and SimPoint WORKDIR / @@ -87,15 +88,14 @@ RUN chmod +x $RISCV/get-tool.sh && \ echo "Toolchain version:" && \ riscv64-unknown-linux-gnu-gcc --version 2>/dev/null || echo "Toolchain setup pending" -RUN mkdir -p /qemu/build # Build QEMU with plugins support WORKDIR /qemu/build RUN ../configure \ - --target-list=riscv32-linux-user,riscv64-linux-user,riscv32-softmmu,riscv64-softmmu \ - --enable-plugins \ - --disable-docs \ - --disable-gtk \ - --disable-sdl + --target-list=riscv32-linux-user,riscv64-linux-user,riscv32-softmmu,riscv64-softmmu \ + --enable-plugins \ + --disable-docs \ + --disable-gtk \ + --disable-sdl RUN make -j$(nproc) RUN make install @@ -116,6 +116,7 @@ RUN ../configure --prefix=$RISCV/condor.riscv-isa-sim/install RUN make -j$(nproc) RUN make regress RUN make install +ENV STF_DIR=$RISCV/condor.riscv-isa-sim/stf_lib # Create mount points for runtime mounting # Environment and flow scripts will be mounted at runtime @@ -127,9 +128,18 @@ RUN mkdir -p /workloads/environment /flow /outputs # - Host outputs -> /outputs RUN cp $RISCV/condor.riscv-isa-sim/install/bin/spike /usr/bin/ -WORKDIR /workspace +WORKDIR $RISCV/trace-gen +RUN make +RUN make install -CMD ["/bin/bash"] +WORKDIR $RISCV/riscv-pk/build +RUN mkdir $RISCV/pk +RUN ../configure --prefix=$RISCV/pk --host=riscv64-unknown-elf +RUN make -j$(nproc) +RUN make install +ENV PATH=$RISCV/pk:$PATH + +WORKDIR /workspace -# need to mount Volumes and show it in the documenation when runnignthis +CMD ["/bin/bash"] diff --git a/traces/docker_stf_trace_gen/README.md b/traces/docker_stf_trace_gen/README.md index d86912a1..6e5605b0 100644 --- a/traces/docker_stf_trace_gen/README.md +++ b/traces/docker_stf_trace_gen/README.md @@ -298,12 +298,9 @@ QEMU advantage: 2.70x faster More in [doc/emulator-comparison](doc/emulator-comparison) -### Trace Generation -**Important**: QEMU and Spike use different trace formats: -- **Spike**: Detailed STF (System Trace Format) traces for comprehensive analysis -- **QEMU**: Simple assembly traces using `-d in_asm` output +### STF Trace Generation -QEMU cannot generate STF traces, making it useful for running large files and basic traces, while Spike provides detailed tracing capabilities. +Read the [generate trace](generate_trace.md) file for details. ## Documentation diff --git a/traces/docker_stf_trace_gen/build_workload.py b/traces/docker_stf_trace_gen/build_workload.py index ef161591..ad8c3596 100755 --- a/traces/docker_stf_trace_gen/build_workload.py +++ b/traces/docker_stf_trace_gen/build_workload.py @@ -3,7 +3,7 @@ import argparse from pathlib import Path from typing import List -from utils.util import log, LogLevel, run_cmd, clean_dir, file_exists, write_file_lines +from utils.util import Util, LogLevel from utils.config import BoardConfig DEFAULT_WORKLOADS = { @@ -21,7 +21,7 @@ def __init__(self, board: str, arch: str, platform: str, bbv: bool, trace: bool) self.bbv = bbv self.trace = trace self.config = BoardConfig(board) - self.bin_dir = clean_dir(Path(f"/workloads/bin/{board}")) + self.bin_dir = Util.clean_dir(Path(f"/workloads/bin/{board}")) self.env_dir = Path(f"/workloads/environment/{board}") self.executables = [] @@ -41,14 +41,14 @@ def _get_flags(self, config: dict, workload_path: Path, workload_type: str, benc def build_environment(self, workload: str): """Compile environment runtime files.""" if self.config.should_skip_environment(self.platform, workload): - log(LogLevel.INFO, f"Skipping environment build for {self.platform}") + Util.log(LogLevel.INFO, f"Skipping environment build for {self.platform}") return cc, cflags, _, _ = self._get_flags({}, workload, workload) for src in self.config.get_environment_files(workload): src_file = self.env_dir / src if src_file.exists(): obj = self.env_dir / f"{Path(src).stem}.o" - run_cmd([cc, "-c", *cflags, "-o", str(obj), str(src_file)]) + Util.run_cmd([cc, "-c", *cflags, "-o", str(obj), str(src_file)]) def build_common_files(self, workload_path: Path, workload_type: str) -> List[str]: """Compile common files for riscv-tests.""" @@ -61,29 +61,29 @@ def build_common_files(self, workload_path: Path, workload_type: str) -> List[st if c_file.name in skip: continue obj = self.bin_dir / f"{c_file.stem}.o" - if run_cmd([cc, "-c", *cflags, "-o", str(obj), str(c_file)]): + if Util.run_cmd([cc, "-c", *cflags, "-o", str(obj), str(c_file)]): obj_files.append(str(obj)) return obj_files def build_benchmark(self, bench: str, workload_path: Path, workload_type: str, common_objs: List[str]): """Compile and link a single benchmark.""" - log(LogLevel.INFO, f"Building {bench}") + Util.log(LogLevel.INFO, f"Building {bench}") bench_dir = workload_path / ("src" if workload_type == "embench-iot" else "benchmarks") / bench if not bench_dir.exists(): - log(LogLevel.ERROR, f"Benchmark directory not found: {bench_dir}") + Util.log(LogLevel.ERROR, f"Benchmark directory not found: {bench_dir}") # Find source files source_exts = ['.c'] if workload_type == "embench-iot" else ['.c', '.S'] sources = [f for ext in source_exts for f in bench_dir.glob(f"*{ext}")] if not sources: - log(LogLevel.ERROR, f"No sources found for {bench}") + Util.log(LogLevel.ERROR, f"No sources found for {bench}") # Compile sources cc, cflags, ldflags, config = self._get_flags({}, workload_path, workload_type, bench) obj_files = [] for src in sources: obj = self.bin_dir / f"{src.stem}.o" - if run_cmd([cc, "-c", *cflags, "-o", str(obj), str(src)]): + if Util.run_cmd([cc, "-c", *cflags, "-o", str(obj), str(src)]): obj_files.append(str(obj)) # Compile additional sources for embench-iot @@ -92,7 +92,7 @@ def build_benchmark(self, bench: str, workload_path: Path, workload_type: str, c src_path = Path(src) if src_path.exists(): obj = self.bin_dir / f"{src_path.stem}_support.o" - if run_cmd([cc, "-c", *cflags, "-o", str(obj), str(src_path)]): + if Util.run_cmd([cc, "-c", *cflags, "-o", str(obj), str(src_path)]): obj_files.append(str(obj)) # Link executable @@ -104,7 +104,7 @@ def build_benchmark(self, bench: str, workload_path: Path, workload_type: str, c link_cmd.extend([f"-T{self.env_dir / config.get('linker_script', 'link.ld')}", *[str(self.env_dir / f"{Path(f).stem}.o") for f in self.config.get_environment_files(workload_type)]]) link_cmd.extend(config.get('libs', [])) - if run_cmd(link_cmd): + if Util.run_cmd(link_cmd): self.executables.append(str(exe)) def list_benchmarks(self, workload_path: Path, workload_type: str) -> List[str]: @@ -118,10 +118,10 @@ def build_workload(self, workload: str, benchmark: str = None, custom_path: str """Build specified workload or benchmark.""" workload_path = Path(custom_path or DEFAULT_WORKLOADS.get(workload, DEFAULT_WORKLOADS["riscv-tests"])) workload_type = workload if workload in DEFAULT_WORKLOADS else "custom" - if not file_exists(workload_path): - log(LogLevel.ERROR, f"Workload path not found: {workload_path}") + if not Util.file_exists(workload_path): + Util.log(LogLevel.ERROR, f"Workload path not found: {workload_path}") - log(LogLevel.INFO, f"Building {workload} for {self.arch}/{self.platform}/{self.board}") + Util.log(LogLevel.INFO, f"Building {workload} for {self.arch}/{self.platform}/{self.board}") self.build_environment(workload_type) common_objs = self.build_common_files(workload_path, workload_type) benchmarks = [benchmark] if benchmark else (["dhrystone"] if workload == "dhrystone" else self.list_benchmarks(workload_path, workload_type)) @@ -129,7 +129,7 @@ def build_workload(self, workload: str, benchmark: str = None, custom_path: str for bench in benchmarks: self.build_benchmark(bench, workload_path, workload_type, common_objs) - log(LogLevel.INFO, f"Built {len(self.executables)} executables in {self.bin_dir}") + Util.log(LogLevel.INFO, f"Built {len(self.executables)} executables in {self.bin_dir}") def main(): """Main entry point for building workloads.""" @@ -147,16 +147,16 @@ def main(): if args.list: for name, path in DEFAULT_WORKLOADS.items(): - if file_exists(path): - log(LogLevel.INFO, f"{name}: {path}") + if Util.file_exists(path): + Util.log(LogLevel.INFO, f"{name}: {path}") builder = WorkloadBuilder(args.board, args.arch, args.platform, args.bbv, args.trace) benchmarks = builder.list_benchmarks(Path(path), name) if benchmarks: - log(LogLevel.INFO, f" Benchmarks: {', '.join(benchmarks[:10])}{'...' if len(benchmarks) > 10 else ''}") + Util.log(LogLevel.INFO, f" Benchmarks: {', '.join(benchmarks[:10])}{'...' if len(benchmarks) > 10 else ''}") return if not args.workload: - log(LogLevel.ERROR, "Workload required. Use --list to see available workloads") + Util.log(LogLevel.ERROR, "Workload required. Use --list to see available workloads") builder = WorkloadBuilder(args.board, args.arch, args.platform, args.bbv, args.trace) builder.build_workload(args.workload, args.benchmark, args.custom_path) diff --git a/traces/docker_stf_trace_gen/converters/base.py b/traces/docker_stf_trace_gen/converters/base.py new file mode 100644 index 00000000..464360b6 --- /dev/null +++ b/traces/docker_stf_trace_gen/converters/base.py @@ -0,0 +1,8 @@ +from abc import ABC +from typing import Any + + +class BaseConverter(ABC): + @staticmethod + def convert(self, input: Any) -> Any: + raise NotImplementedError("This method should be overridden by subclasses.") diff --git a/traces/docker_stf_trace_gen/converters/host_to_docker_path.py b/traces/docker_stf_trace_gen/converters/host_to_docker_path.py new file mode 100644 index 00000000..85051f70 --- /dev/null +++ b/traces/docker_stf_trace_gen/converters/host_to_docker_path.py @@ -0,0 +1,11 @@ +import os +from converters.base import BaseConverter +from data.consts import Const + + +class HostToDockerPathConverter(BaseConverter): + @staticmethod + def convert(path: str) -> str: + parts = os.path.abspath(path).strip(os.sep).split(os.sep) + parts.insert(0, Const.DOCKER_TEMP_FOLDER) + return os.path.join(*parts) diff --git a/traces/docker_stf_trace_gen/data/consts.py b/traces/docker_stf_trace_gen/data/consts.py new file mode 100644 index 00000000..a31bf753 --- /dev/null +++ b/traces/docker_stf_trace_gen/data/consts.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass + + +@dataclass(frozen=True) +class Const(): + DOCKER_IMAGE_NAME = "riscv-perf-model:latest" + DOCKER_TEMP_FOLDER = "/host" + LIBSTFMEM = "/usr/lib/libstfmem.so" + STF_TOOLS = "/riscv/stf_tools/release/tools" + SPKIE_PK = "/riscv/riscv-pk/build/pk" diff --git a/traces/docker_stf_trace_gen/data/metadata.py b/traces/docker_stf_trace_gen/data/metadata.py new file mode 100644 index 00000000..78fd158e --- /dev/null +++ b/traces/docker_stf_trace_gen/data/metadata.py @@ -0,0 +1,46 @@ +from dataclasses import dataclass +from typing import Literal, Optional, Dict, Union + + +@dataclass +class Author: + email: str + name: Optional[str] = None + company: Optional[str] = None + + +@dataclass +class Workload: + filename: str + SHA256: str + execution_command: Optional[str] + elf_sections: Dict[str, str] + + +@dataclass +class IpModeInterval: + ip: int + ip_count: int + interval_lenght: int + + +@dataclass +class InstructionCountModeInterval: + start_instruction: int + interval_lenght: int + + +@dataclass +class Stf: + timestamp: str + stf_trace_info: Dict[str, str] + interval_mode: Literal["ip", "instructionCount", "macro", "fullyTrace"] + interval: Optional[Union[IpModeInterval, InstructionCountModeInterval]] = None + + +@dataclass +class Metadata: + author: Author + workload: Workload + stf: Stf + description: Optional[str] = None diff --git a/traces/docker_stf_trace_gen/factories/metadata_factory.py b/traces/docker_stf_trace_gen/factories/metadata_factory.py new file mode 100644 index 00000000..d6fabb77 --- /dev/null +++ b/traces/docker_stf_trace_gen/factories/metadata_factory.py @@ -0,0 +1,107 @@ +import datetime +import os +import re +from typing import Dict, Literal, Optional +from elftools.elf.elffile import ELFFile +from utils.util import Util +from utils.docker_orchestrator import DockerOrchestrator +from data.metadata import Author, InstructionCountModeInterval, IpModeInterval, Metadata, Stf, Workload + + +class MetadataFactory(): + def __init__(self, docker: DockerOrchestrator): + self.docker = docker + + def create( + self, + workload_path: str, + trace_path: str, + trace_interval_mode: Literal["ip", "instructionCount", "macro", "fullyTrace"], + start_instruction: Optional[int] = None, + num_instructions: Optional[int] = None, + start_pc: Optional[int] = None, + pc_threshold: Optional[int] = None, + execution_command: Optional[str] = None, + description: Optional[str] = None, + ) -> Metadata: + author = Author(name="John Doe", company="Example Corp", email="john.doe@example.com") + workload = self._create_workload(workload_path, execution_command) + stf = self._create_stf( + trace_path, + trace_interval_mode, + start_instruction, + num_instructions, + start_pc, + pc_threshold) + + return Metadata(description=description, author=author, workload=workload, stf=stf) + + def _create_workload(self, workload_path: str, execution_command: Optional[str] = None) -> Workload: + sha256 = Util.compute_sha256(workload_path) + elf_sections = self._get_workload_sections(workload_path) + filename = os.path.basename(workload_path) + return Workload(filename, sha256, execution_command, elf_sections) + + def _create_stf( + self, + trace_path: str, + interval_mode: Literal["ip", "instructionCount", "macro", "fullyTrace"], + start_instruction: Optional[int] = None, + num_instructions: Optional[int] = None, + start_pc: Optional[int] = None, + pc_threshold: Optional[int] = None, + ) -> Stf: + interval = None + if interval_mode == "ip": + interval = IpModeInterval(ip=start_pc, ip_count=pc_threshold, interval_lenght=num_instructions) + elif interval_mode == "instructionCount": + interval = InstructionCountModeInterval(start_instruction, num_instructions) + + utc_datetime = datetime.datetime.now(datetime.timezone.utc) + utc_timestamp = utc_datetime.timestamp() + + stf_trace_info = self._get_stf_info(trace_path) + return Stf(utc_timestamp, stf_trace_info, interval_mode, interval) + + def _get_workload_sections(self, workload_path: str) -> dict[str, str]: + sections = ['.comment', '.riscv.attributes', '.GCC.command.line'] + result = {} + + if not os.path.exists(workload_path): + raise FileNotFoundError(f"Workload file not found: {workload_path}") + + with open(workload_path, 'rb') as binary_file: + elf = ELFFile(binary_file) + for section in sections: + section_data = elf.get_section_by_name(section) + if section_data: + strings = re.findall(rb'[\x20-\x7E]{2,}', section_data.data()) + decoded = ' '.join(s.decode('utf-8') for s in strings) + decoded = decoded.replace('\'', '') + result[section.lstrip('.')] = decoded + + return result + + def _get_stf_info(self, trace_path: str) -> Dict[str, str]: + trace_info = self.docker.run_stf_tool("stf_trace_info", trace_path).decode('utf-8') + + metadata = {} + values_section = [] + trace_info_lines = trace_info.strip().split('\n') + + in_values = False + for line in trace_info_lines: + line = line.strip() + if len(line.strip()) == 0: + in_values = True + continue + + if not in_values: + if ' ' in line: + key, value = line.strip().split(None, 1) + metadata[key.strip()] = value.strip() + else: + values_section.append(line.strip()) + + metadata['STF_FEATURES'] = values_section + return metadata diff --git a/traces/docker_stf_trace_gen/full_flow.py b/traces/docker_stf_trace_gen/full_flow.py index ca4686ac..5505fda3 100755 --- a/traces/docker_stf_trace_gen/full_flow.py +++ b/traces/docker_stf_trace_gen/full_flow.py @@ -6,8 +6,7 @@ import time from pathlib import Path from typing import List, Tuple, Dict - -from utils.util import log, run_cmd, LogLevel, file_exists, ensure_dir, read_file_lines +from utils.util import Util, LogLevel from utils.config import BoardConfig @@ -20,7 +19,7 @@ def discover_workloads() -> Dict[str, str]: "riscv-tests": str(base_dir / "riscv-tests"), "dhrystone": str(base_dir / "riscv-tests") } - return {k: v for k, v in workloads.items() if file_exists(v)} + return {k: v for k, v in workloads.items() if Util.file_exists(v)} def get_benchmarks(workload: str, board: str = 'spike') -> List[str]: """Get benchmarks for a workload.""" @@ -46,7 +45,7 @@ def get_board_config(board: str) -> Dict: 'features': ['bbv', 'trace'] if board == 'spike' else ['bbv', 'trace'] } except Exception as e: - log(LogLevel.WARN, f"Could not load board config: {e}") + Util.log(LogLevel.WARN, f"Could not load board config: {e}") return {'cc': 'unknown', 'supported_archs': ['rv32', 'rv64'], 'supported_platforms': ['baremetal'], 'features': []} class DockerOrchestrator: @@ -54,43 +53,43 @@ class DockerOrchestrator: def __init__(self, container_name: str, image_name: str, host_output_dir: str): self.container_name = container_name self.image_name = image_name - self.host_output_dir = ensure_dir(Path(host_output_dir).resolve()) - self.host_bin_dir = ensure_dir(self.host_output_dir / "workloads_bin") - self.host_meta_dir = ensure_dir(self.host_output_dir / "workloads_meta") + self.host_output_dir = Util.ensure_dir(Path(host_output_dir).resolve()) + self.host_bin_dir = Util.ensure_dir(self.host_output_dir / "workloads_bin") + self.host_meta_dir = Util.ensure_dir(self.host_output_dir / "workloads_meta") self.container_output_dir = "/outputs" self.container_code_dir = "/flow" def check_docker(self) -> bool: """Check if Docker is available.""" - success, out, _ = run_cmd(["docker", "--version"], show=False) + success, out, _ = Util.run_cmd(["docker", "--version"], show=False) if success: - log(LogLevel.INFO, f"Docker available: {out.strip()}") + Util.log(LogLevel.INFO, f"Docker available: {out.strip()}") return True - log(LogLevel.ERROR, "Docker not found") + Util.log(LogLevel.ERROR, "Docker not found") return False def check_image(self) -> bool: """Check if Docker image exists.""" - success, out, _ = run_cmd(["docker", "images", "-q", self.image_name], show=False) + success, out, _ = Util.run_cmd(["docker", "images", "-q", self.image_name], show=False) if out.strip(): - log(LogLevel.INFO, f"Image found: {self.image_name}") + Util.log(LogLevel.INFO, f"Image found: {self.image_name}") return True - log(LogLevel.WARN, f"Image not found: {self.image_name}") + Util.log(LogLevel.WARN, f"Image not found: {self.image_name}") return False def build_image(self) -> bool: """Build Docker image.""" - log(LogLevel.INFO, "Building Docker image...") - if not file_exists("Dockerfile"): - log(LogLevel.ERROR, "Dockerfile not found") + Util.log(LogLevel.INFO, "Building Docker image...") + if not Util.file_exists("Dockerfile"): + Util.log(LogLevel.ERROR, "Dockerfile not found") return False cmd = ["docker", "build", "-t", self.image_name, "."] - success, _, _ = run_cmd(cmd) + success, _, _ = Util.run_cmd(cmd) if success: - log(LogLevel.INFO, "Image built successfully") + Util.log(LogLevel.INFO, "Image built successfully") return True - log(LogLevel.ERROR, "Failed to build image") + Util.log(LogLevel.ERROR, "Failed to build image") return False def run_command(self, command: List[str], interactive: bool = False) -> Tuple[bool, str, str]: @@ -113,7 +112,7 @@ def run_command(self, command: List[str], interactive: bool = False) -> Tuple[bo docker_cmd = ["docker", "run", "--rm"] + mounts + (["-it"] if interactive else []) + \ [self.image_name, "bash", "-c", f"cd {self.container_code_dir} && {' '.join(command)}"] - return run_cmd(docker_cmd, interactive=interactive) + return Util.run_cmd(docker_cmd, interactive=interactive) class WorkflowManager: """Manages RISC-V analysis workflow.""" @@ -129,11 +128,11 @@ def get_input(self, prompt: str, choices: List[str] = None, default: str = None, """Get validated user input.""" while True: if choices: - log(LogLevel.INFO, f"\n{prompt}") + Util.log(LogLevel.INFO, f"\n{prompt}") for i, c in enumerate(choices, 1): - log(LogLevel.INFO, f" {i}. {c}{' (default)' if c == default else ''}") + Util.log(LogLevel.INFO, f" {i}. {c}{' (default)' if c == default else ''}") if multi: - log(LogLevel.INFO, " Enter comma-separated numbers") + Util.log(LogLevel.INFO, " Enter comma-separated numbers") try: resp = input(f"Select [1-{len(choices)}]: ").strip() if not resp and default: @@ -145,26 +144,26 @@ def get_input(self, prompt: str, choices: List[str] = None, default: str = None, if 0 <= idx < len(choices): return choices[idx] except (ValueError, IndexError): - log(LogLevel.ERROR, "Invalid selection") + Util.log(LogLevel.ERROR, "Invalid selection") else: resp = input(f"{prompt}: ").strip() return resp or default def configure_interactive(self): """Configure workflow interactively.""" - log(LogLevel.HEADER, "RISC-V Workload Analysis Configuration") + Util.log(LogLevel.HEADER, "RISC-V Workload Analysis Configuration") # Workload selection workloads = discover_workloads() self.config['workload_suite'] = self.get_input( "Select workload suite", list(workloads.keys()), "embench-iot") self.config['architecture'] = "rv32" if self.config['workload_suite'] == "embench-iot" else "rv64" - log(LogLevel.INFO, f"Selected: {self.config['workload_suite']} ({workloads[self.config['workload_suite']]})") + Util.log(LogLevel.INFO, f"Selected: {self.config['workload_suite']} ({workloads[self.config['workload_suite']]})") # Benchmark selection benchmarks = get_benchmarks(self.config['workload_suite']) if benchmarks: - log(LogLevel.INFO, f"Found {len(benchmarks)} benchmarks: {', '.join(benchmarks[:10])}{'...' if len(benchmarks) > 10 else ''}") + Util.log(LogLevel.INFO, f"Found {len(benchmarks)} benchmarks: {', '.join(benchmarks[:10])}{'...' if len(benchmarks) > 10 else ''}") if self.get_input("Use all benchmarks? [Y/n]", ["y", "n"], "y") == "y": self.config['benchmarks'] = ['all'] else: @@ -175,7 +174,7 @@ def configure_interactive(self): boards = ['spike', 'qemu'] self.config['emulator'] = self.get_input("Select emulator", boards, "spike") board_config = get_board_config(self.config['emulator']) - log(LogLevel.INFO, f"Emulator: {self.config['emulator']} (Features: {', '.join(board_config['features'])})") + Util.log(LogLevel.INFO, f"Emulator: {self.config['emulator']} (Features: {', '.join(board_config['features'])})") # Architecture and platform archs = board_config['supported_archs'] @@ -193,9 +192,9 @@ def configure_interactive(self): self.config['enable_simpoint'] = self.get_input("Enable SimPoint? [y/n]", ["y", "n"], "y") == "y" # Confirm - log(LogLevel.HEADER, "Configuration Summary") + Util.log(LogLevel.HEADER, "Configuration Summary") for k, v in self.config.items(): - log(LogLevel.INFO, f" {k.replace('_', ' ').title():20}: {v}") + Util.log(LogLevel.INFO, f" {k.replace('_', ' ').title():20}: {v}") return self.get_input("Proceed? [y/n]", ["y", "n"], "y") == "y" def _generate_cmd(self, script: str, workload_specific: bool = False) -> List[str]: @@ -221,32 +220,32 @@ def _generate_cmd(self, script: str, workload_specific: bool = False) -> List[st def run_step(self, step: str, script: str, workload_specific: bool = False) -> bool: """Run a workflow step.""" - log(LogLevel.INFO, f"Executing {step}") + Util.log(LogLevel.INFO, f"Executing {step}") cmd = self._generate_cmd(script, workload_specific) success, stdout, stderr = self.orchestrator.run_command(cmd) if success: - log(LogLevel.INFO, f"{step} completed") + Util.log(LogLevel.INFO, f"{step} completed") if stdout: - log(LogLevel.DEBUG, stdout[-1000:]) + Util.log(LogLevel.DEBUG, stdout[-1000:]) else: - log(LogLevel.WARN, f"{step} failed") + Util.log(LogLevel.WARN, f"{step} failed") if stderr: - log(LogLevel.DEBUG, stderr[-1000:]) + Util.log(LogLevel.DEBUG, stderr[-1000:]) return success def collect_results(self): """Collect and summarize results.""" - log(LogLevel.HEADER, "Collecting Results") + Util.log(LogLevel.HEADER, "Collecting Results") output_files = [ p for p in self.orchestrator.host_output_dir.rglob("*") if p.is_file() and any(s in str(p) for s in ["results.txt", "bbv/", "traces/", "simpoint_analysis/"]) ] if output_files: - log(LogLevel.INFO, "Results found:") + Util.log(LogLevel.INFO, "Results found:") for f in output_files: - log(LogLevel.INFO, f" • {f.relative_to(self.orchestrator.host_output_dir)}") + Util.log(LogLevel.INFO, f" • {f.relative_to(self.orchestrator.host_output_dir)}") else: - log(LogLevel.WARN, "No results found") + Util.log(LogLevel.WARN, "No results found") summary = { "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), @@ -257,7 +256,7 @@ def collect_results(self): summary_file = self.orchestrator.host_output_dir / "analysis_summary.json" with summary_file.open('w') as f: json.dump(summary, f, indent=2) - log(LogLevel.INFO, f"Summary saved: {summary_file}") + Util.log(LogLevel.INFO, f"Summary saved: {summary_file}") def main(): """Main entry point for RISC-V analysis.""" @@ -278,7 +277,7 @@ def main(): parser.add_argument("--skip-run", action="store_true") args = parser.parse_args() - log(LogLevel.HEADER, f"RISC-V Analysis (Output: {Path(args.output_dir).resolve()})") + Util.log(LogLevel.HEADER, f"RISC-V Analysis (Output: {Path(args.output_dir).resolve()})") orchestrator = DockerOrchestrator(args.container_name, args.image_name, args.output_dir) if not orchestrator.check_docker(): @@ -291,7 +290,7 @@ def main(): if args.workload: workloads = discover_workloads() if args.workload not in workloads: - log(LogLevel.ERROR, f"Unknown workload: {args.workload}. Available: {', '.join(workloads.keys())}") + Util.log(LogLevel.ERROR, f"Unknown workload: {args.workload}. Available: {', '.join(workloads.keys())}") workflow.config.update({ 'workload_suite': args.workload, 'benchmarks': [args.benchmark] if args.benchmark else ['all'], @@ -305,7 +304,7 @@ def main(): }) else: if not workflow.configure_interactive(): - log(LogLevel.WARN, "Cancelled by user") + Util.log(LogLevel.WARN, "Cancelled by user") sys.exit(0) success = True @@ -317,15 +316,15 @@ def main(): workflow.run_step("SimPoint Analysis", "run_simpoint.py") workflow.collect_results() - log(LogLevel.HEADER, "Analysis " + ("Completed" if success else "Failed")) - log(LogLevel.INFO, f"Results in: {orchestrator.host_output_dir}") + Util.log(LogLevel.HEADER, "Analysis " + ("Completed" if success else "Failed")) + Util.log(LogLevel.INFO, f"Results in: {orchestrator.host_output_dir}") if __name__ == "__main__": try: main() except KeyboardInterrupt: - log(LogLevel.WARN, "Interrupted by user") + Util.log(LogLevel.WARN, "Interrupted by user") sys.exit(1) except Exception as e: - log(LogLevel.ERROR, f"Unexpected error: {e}") + Util.log(LogLevel.ERROR, f"Unexpected error: {e}") sys.exit(1) \ No newline at end of file diff --git a/traces/docker_stf_trace_gen/generate_trace.md b/traces/docker_stf_trace_gen/generate_trace.md new file mode 100644 index 00000000..8e31fa75 --- /dev/null +++ b/traces/docker_stf_trace_gen/generate_trace.md @@ -0,0 +1,166 @@ +# Trace Generation Tool + +This tool generates execution traces for RISC-V workloads using either **Spike** or **QEMU** emulators. + +There are **three different modes** for generating traces: +- **Macro** → uses `START_TRACE` and `STOP_TRACE` macros embedded in the workload +- **Instruction Count (`insn_count`)** → traces a fixed number of instructions after skipping some +- **Program Counter (`pc_count`)** → traces after a specific program counter (PC) is reached + +--- + +## Table of Contents + +1. [Quickstart](#quickstart) +2. [Usage](#usage) +3. [Global Options](#global-options) +4. [Modes](#modes) + - [Macro](#macro) + - [Instruction Count](#insn_count) + - [Program Counter](#pc_count) +5. [Mode Restrictions](#mode-restrictions) +6. [Summary Table](#summary-table) +7. [Help and More Info](#help-and-more-info) + +--- + +## Quickstart + +1. **Macro mode with Spike** + Trace using `START_TRACE` / `STOP_TRACE` markers inside the workload: + ```bash + python generate_trace.py --emulator spike macro workload.elf + ``` + +2. **Instruction Count mode** + Skip 1000 instructions, then trace 5000 instructions: + + ```bash + python generate_trace.py --emulator qemu insn_count \ + --num-instructions 5000 --start-instruction 1000 workload.elf + ``` + +3. **Program Counter mode (QEMU only)** + Start tracing after PC `0x80000000` is hit 5 times, trace 2000 instructions: + + ```bash + python generate_trace.py --emulator qemu pc_count \ + --num-instructions 2000 --start-pc 0x80000000 --pc-threshold 5 workload.elf + ``` + +--- + +## Usage + +```bash +python generate_trace.py [OPTIONS] MODE WORKLOAD_PATH +``` + +Example with help: + +```bash +python generate_trace.py macro --help +``` + +--- + +## Global Options + +These options apply to all modes: + +* **`--emulator {spike,qemu}`** *(required)* + Select which emulator to use. + +* **`--isa ISA`** *(optional)* + Instruction set architecture (e.g., `rv64imafdc`). + +* **`--dump`** *(flag)* + Create a trace file dump. + +* **`--pk`** *(flag)* + Run Spike with **pk (proxy kernel)**. + +* **`--image-name IMAGE_NAME`** *(default: `Const.DOCKER_IMAGE_NAME`)* + Use a custom Docker image instead of the default. + +* **`-o, --output OUTPUT`** *(optional)* + Output folder or file path. + +* **`workload`** *(positional, required)* + Path to workload binary. + +--- + +## Modes + +### `macro` + +Trace mode using `START_TRACE` and `STOP_TRACE` macros in the workload binary. + +* **Only works with Spike** +* No additional arguments required beyond the workload path. + +**Example:** + +```bash +python generate_trace.py --emulator spike macro workload.elf +``` + +--- + +### `insn_count` + +Trace a fixed number of instructions after skipping a given number. + +**Arguments:** + +* **`--num-instructions`** *(required, int)* → number of instructions to trace. +* **`--start-instruction`** *(required, int, default=0)* → instructions to skip before tracing starts. + +**Example:** + +```bash +python generate_trace.py --emulator qemu insn_count \ + --num-instructions 5000 --start-instruction 1000 workload.elf +``` + +--- + +### `pc_count` + +Trace a fixed number of instructions after reaching a given PC value a certain number of times. + +* **Only works with QEMU** + +**Arguments:** + +* **`--num-instructions`** *(required, int)* → number of instructions to trace. +* **`--start-pc`** *(required, int)* → starting program counter (hex or decimal). +* **`--pc-threshold`** *(required, int, default=1)* → number of times the PC must be hit before tracing begins. + +**Example:** + +```bash +python generate_trace.py --emulator qemu pc_count \ + --num-instructions 2000 --start-pc 0x80000000 --pc-threshold 5 workload.elf +``` + +--- + +## Mode Restrictions + +* `macro` mode **cannot** be used with `qemu`. +* `pc_count` mode **cannot** be used with `spike`. +* Each mode has its own required arguments. + +--- + +## Summary Table + +| Mode | Emulator | Required Arguments | +| ------------ | ---------- | ---------------------------------------------------- | +| `macro` | spike | workload | +| `insn_count` | spike/qemu | `--num-instructions`, `--start-instruction` | +| `pc_count` | qemu | `--num-instructions`, `--start-pc`, `--pc-threshold` | + +--- \ No newline at end of file diff --git a/traces/docker_stf_trace_gen/generate_trace.py b/traces/docker_stf_trace_gen/generate_trace.py new file mode 100755 index 00000000..d341c34c --- /dev/null +++ b/traces/docker_stf_trace_gen/generate_trace.py @@ -0,0 +1,115 @@ +import argparse +from dataclasses import asdict +import os +import yaml +from factories.metadata_factory import MetadataFactory +from data.metadata import Metadata +from utils.trace_generator_arg_parser import parse_args +from utils.docker_orchestrator import DockerOrchestrator +from data.consts import Const +from converters.host_to_docker_path import HostToDockerPathConverter + + +class TraceGenerator(): + def __init__(self, docker: DockerOrchestrator): + self.docker = docker + self.metadataFactory = MetadataFactory(docker) + + def run(self, args: argparse.Namespace) -> None: + args.output = self._get_ouput_path(args) + docker_paths = self._convert_paths(args) + + self._run_trace(args, docker_paths) + metadata = self._generate_metadata(args) + self._save_metadata(args.output, metadata) + + if args.dump: + dump_path = f"{args.output}.dump" + self.docker.run_stf_tool("stf_dump", args.output, dump_path) + + def _get_ouput_path(self, args: argparse.Namespace) -> str: + if not args.output: + return f"{args.workload}.zstf" + + isDir = not (os.path.splitext(args.ouput)[1]) + if not isDir and not args.output.endswith("zstf"): + raise ValueError("Invalid output file extension. Expected .zstf or directory.") + + if not isDir: + return args.output + + workload_filename = os.path.basename(args.workload) + return os.path.join(args.output, workload_filename) + + def _convert_paths( + self, + args: argparse.Namespace, + path_arguments: list[str] = ['workload', 'output'] + ) -> dict[str, str]: + docker_paths = {} + for path_argument in path_arguments: + arg_value = getattr(args, path_argument) + if arg_value: + docker_paths[arg_value] = HostToDockerPathConverter.convert(arg_value) + + return docker_paths + + def _run_trace(self, args: argparse.Namespace, docker_paths: dict[str, str]): + bash_cmd = "" + if args.emulator == "spike": + bash_cmd = self._get_spike_command(args, docker_paths) + elif args.emulator == "qemu": + bash_cmd = self._get_qemu_command(args, docker_paths) + else: + raise ValueError(f"Invalid emulator ({args.emulator}) provided") + + self.docker.run_command(bash_cmd, docker_paths) + + def _get_spike_command(self, args: argparse.Namespace, docker_paths: dict[str, str]) -> str: + isa = f"--isa={args.isa}" if args.isa else "" + pk = f"{Const.SPKIE_PK}" if args.pk else "" + + if args.mode == "insn_count": + return f"spike {isa} --stf_trace {docker_paths[args.output]} --stf_trace_memory_records --stf_insn_num_tracing --stf_insn_start {str(args.start_instruction)} --stf_insn_count {str(args.num_instructions)} {pk} {docker_paths[args.workload]}" + elif args.mode == "macro": + return f"spike {isa} --stf_trace {docker_paths[args.output]} --stf_trace_memory_records --stf_macro_tracing {pk} {docker_paths[args.workload]}" + + raise NotImplementedError(f"mode {args.mode} invalid for spike") + + def _get_qemu_command(self, args: argparse.Namespace, docker_paths: dict[str, str]) -> str: + args.start_instruction += 1 + if args.mode == "insn_count": + return f"qemu-riscv64 -plugin {Const.LIBSTFMEM},mode=dyn_insn_count,start_dyn_insn={args.start_instruction},num_instructions={args.num_instructions},outfile={docker_paths[args.output]} -d plugin -- {docker_paths[args.workload]}" + elif args.mode == "pc_count": + return f"qemu-riscv64 -plugin {Const.LIBSTFMEM},mode=ip,start_ip={args.start_pc},ip_hit_threshold={args.pc_threshold},num_instructions={args.num_instructions},outfile={docker_paths[args.output]} -d plugin -- {docker_paths[args.workload]}" + + raise NotImplementedError(f"mode {args.mode} invalid for qemu") + + def _generate_metadata(self, args: argparse.Namespace) -> Metadata: + workload_path = args.workload + return self.metadataFactory.create( + workload_path=workload_path, + trace_path=args.output, + trace_interval_mode=args.mode, + start_instruction=getattr(args, "start_instruction", None), + num_instructions=getattr(args, "num_instructions", None), + start_pc=getattr(args, "start_pc", None), + pc_threshold=getattr(args, "pc_threshold", None), + execution_command=None, + description=None, + ) + + def _save_metadata(self, trace_path: str, metadata: Metadata): + metadata_path = f"{trace_path}.metadata.yaml" + with open(metadata_path, 'w') as file: + yaml.dump(asdict(metadata), file) + + +def main(): + args = parse_args() + docker = DockerOrchestrator(args) + TraceGenerator(docker).run(args) + + +if __name__ == "__main__": + main() diff --git a/traces/docker_stf_trace_gen/run_simpoint.py b/traces/docker_stf_trace_gen/run_simpoint.py index 098f31e1..32e2377d 100755 --- a/traces/docker_stf_trace_gen/run_simpoint.py +++ b/traces/docker_stf_trace_gen/run_simpoint.py @@ -5,13 +5,13 @@ import time from pathlib import Path from typing import Dict, List, Tuple -from utils.util import log, LogLevel, run_cmd, ensure_dir, validate_tool, read_file_lines, file_exists +from utils.util import Util, LogLevel def find_bbv_files(emulator: str, binaries: List[Path]) -> Dict[str, Path]: """Find BBV files for binaries.""" output_dir = Path(f"/outputs/{emulator}_output/bbv") if not output_dir.exists(): - log(LogLevel.ERROR, f"BBV directory not found: {output_dir}") + Util.log(LogLevel.ERROR, f"BBV directory not found: {output_dir}") bbv_files = {} for binary in binaries: @@ -22,23 +22,23 @@ def find_bbv_files(emulator: str, binaries: List[Path]) -> Dict[str, Path]: if bbv_file.exists() and bbv_file.stat().st_size > 0: bbv_files[name] = bbv_file if not bbv_files: - log(LogLevel.ERROR, "No valid BBV files found") - log(LogLevel.INFO, f"Found {len(bbv_files)} BBV files") + Util.log(LogLevel.ERROR, "No valid BBV files found") + Util.log(LogLevel.INFO, f"Found {len(bbv_files)} BBV files") return bbv_files def run_simpoint_analysis(bbv_file: Path, benchmark: str, max_k: int, output_dir: Path) -> Tuple[bool, Path, Path]: """Run SimPoint analysis on a BBV file.""" - ensure_dir(output_dir) + Util.ensure_dir(output_dir) simpoints = output_dir / f"{benchmark}.simpoints" weights = output_dir / f"{benchmark}.weights" cmd = ["simpoint", "-loadFVFile", str(bbv_file), "-maxK", str(max_k), "-saveSimpoints", str(simpoints), "-saveSimpointWeights", str(weights)] - success, _, _ = run_cmd(cmd, timeout=300) + success, _, _ = Util.run_cmd(cmd, timeout=300) return success and simpoints.exists() and weights.exists(), simpoints, weights def parse_simpoint_results(simpoints_file: Path, weights_file: Path) -> List[Tuple[int, float]]: """Parse SimPoint intervals and weights.""" - simpoints = [int(line.split()[0]) for line in read_file_lines(simpoints_file) if line.split()[0].isdigit()] if simpoints_file.exists() else [] - weights = [float(line.split()[1]) for line in read_file_lines(weights_file) if len(line.split()) > 1] if weights_file.exists() else [] + simpoints = [int(line.split()[0]) for line in Util.read_file_lines(simpoints_file) if line.split()[0].isdigit()] if simpoints_file.exists() else [] + weights = [float(line.split()[1]) for line in Util.read_file_lines(weights_file) if len(line.split()) > 1] if weights_file.exists() else [] return list(zip(simpoints, weights)) if len(simpoints) == len(weights) else [] def generate_summary(results: Dict[str, Dict], output_file: Path): @@ -63,7 +63,7 @@ def generate_summary(results: Dict[str, Dict], output_file: Path): with output_file.open('w') as f: f.write('\n'.join(lines)) - log(LogLevel.INFO, '\n'.join(lines)) + Util.log(LogLevel.INFO, '\n'.join(lines)) def main(): """Main entry point.""" @@ -74,20 +74,20 @@ def main(): parser.add_argument("--output-dir", default="/outputs/simpoint_analysis") args = parser.parse_args() - validate_tool("simpoint") + Util.validate_tool("simpoint") - output_dir = ensure_dir(Path(args.output_dir)) - log(LogLevel.INFO, f"Starting SimPoint analysis for {args.emulator}") + output_dir = Util.ensure_dir(Path(args.output_dir)) + Util.log(LogLevel.INFO, f"Starting SimPoint analysis for {args.emulator}") binary_list = Path(f"/workloads/binary_list_{args.emulator}.txt") - binaries = [Path(line) for line in read_file_lines(binary_list) if file_exists(line)] + binaries = [Path(line) for line in Util.read_file_lines(binary_list) if Util.file_exists(line)] if args.workload_type: binaries = [b for b in binaries if args.workload_type in b.name] bbv_files = find_bbv_files(args.emulator, binaries) results = {} for bench, bbv_file in bbv_files.items(): - log(LogLevel.INFO, f"Analyzing {bench}") + Util.log(LogLevel.INFO, f"Analyzing {bench}") success, simpoints, weights = run_simpoint_analysis(bbv_file, bench, args.max_k, output_dir) result = {'success': success, 'bbv_file': str(bbv_file), 'simpoints_file': str(simpoints), 'weights_file': str(weights)} if success: @@ -110,7 +110,7 @@ def main(): pass - log(LogLevel.INFO, f"Analysis completed. Summary: {summary_file}") + Util.log(LogLevel.INFO, f"Analysis completed. Summary: {summary_file}") if __name__ == "__main__": main() \ No newline at end of file diff --git a/traces/docker_stf_trace_gen/run_workload.py b/traces/docker_stf_trace_gen/run_workload.py index 3765535b..09f5631f 100755 --- a/traces/docker_stf_trace_gen/run_workload.py +++ b/traces/docker_stf_trace_gen/run_workload.py @@ -3,18 +3,18 @@ import argparse from pathlib import Path from typing import Dict -from utils.util import log, LogLevel, run_cmd, get_time, validate_tool, clean_dir, ensure_dir +from utils.util import Util, LogLevel def validate_environment(emulator: str, platform: str, arch: str): """Validate emulator tools.""" if emulator == "spike": - validate_tool("spike") + Util.validate_tool("spike") elif emulator == "qemu": - validate_tool(f"qemu-{'system-' if platform == 'baremetal' else ''}riscv{32 if arch == 'rv32' else 64}") + Util.validate_tool(f"qemu-{'system-' if platform == 'baremetal' else ''}riscv{32 if arch == 'rv32' else 64}") def setup_output_dirs(emulator: str) -> Dict[str, Path]: """Setup output directories.""" - base = clean_dir(Path(f"/outputs/{emulator}_output")) + base = Util.clean_dir(Path(f"/outputs/{emulator}_output")) return { 'base': base, 'logs': base / "logs", 'bbv': base / "bbv", 'traces': base / "traces", 'results': base / "results.txt" @@ -23,7 +23,7 @@ def setup_output_dirs(emulator: str) -> Dict[str, Path]: def run_emulator(binary: Path, dirs: Dict[str, Path], emulator: str, bbv: bool, trace: bool, platform: str, arch: str, interval_size: int, enable_stf_tools: bool) -> float: """Run workload on emulator.""" name = binary.stem - log(LogLevel.INFO, f"Running {name} on {emulator.upper()} ({platform}/{arch})") + Util.log(LogLevel.INFO, f"Running {name} on {emulator.upper()} ({platform}/{arch})") if emulator == "qemu" and trace: print("QEMU Cannot generate STF traces, Please use Spike") trace = False @@ -47,16 +47,16 @@ def run_emulator(binary: Path, dirs: Dict[str, Path], emulator: str, bbv: bool, cfg = configs[emulator] cmd = cfg["cmd"] + (cfg["bbv"]() if bbv else []) + (cfg["trace"]() if trace else []) + cfg.get("bin", []) # build the logs file - start = get_time() - run_cmd(cmd) - end = get_time() + start = Util.get_time() + Util.run_cmd(cmd) + end = Util.get_time() if emulator == "spike" and trace and enable_stf_tools: trace_file = dirs['traces'] / f"{name}.zstf" if trace_file.exists(): stf_dump = Path("/riscv/stf_tools/release/tools/stf_dump/stf_dump") if stf_dump.exists(): - run_cmd([str(stf_dump), str(trace_file)]) + Util.run_cmd([str(stf_dump), str(trace_file)]) return end - start @@ -65,7 +65,7 @@ def run_workloads(emulator: str, platform: str, arch: str, bbv: bool, trace: boo validate_environment(emulator, platform, arch) dirs = setup_output_dirs(emulator) for d in [dirs['logs'], dirs['bbv'], dirs['traces']]: - ensure_dir(d) + Util.ensure_dir(d) # Fetch binaries in /workloads/bin// binary_dir = Path(f"/workloads/bin/{emulator}") @@ -76,9 +76,9 @@ def run_workloads(emulator: str, platform: str, arch: str, bbv: bool, trace: boo if workload: binaries = [b for b in binaries if workload in b.name] if not binaries: - log(LogLevel.ERROR, f"No workloads matching: {workload}") + Util.log(LogLevel.ERROR, f"No workloads matching: {workload}") - log(LogLevel.INFO, f"Running {len(binaries)} workloads") + Util.log(LogLevel.INFO, f"Running {len(binaries)} workloads") results = [] total_time = 0 with dirs['results'].open('w') as f: @@ -92,7 +92,7 @@ def run_workloads(emulator: str, platform: str, arch: str, bbv: bool, trace: boo except RuntimeError: continue - log(LogLevel.INFO, f"Summary: {len(results)} workloads, {total_time:.2f}s, results in {dirs['results']}") + Util.log(LogLevel.INFO, f"Summary: {len(results)} workloads, {total_time:.2f}s, results in {dirs['results']}") def main(): """Main entry point.""" diff --git a/traces/docker_stf_trace_gen/utils/config.py b/traces/docker_stf_trace_gen/utils/config.py index 2696edc1..0d13a589 100644 --- a/traces/docker_stf_trace_gen/utils/config.py +++ b/traces/docker_stf_trace_gen/utils/config.py @@ -3,7 +3,7 @@ import yaml import shlex from pathlib import Path -from utils.util import log +from utils.util import Util class BoardConfig: """Board configuration parser with support for tagged sections""" @@ -17,7 +17,7 @@ def __init__(self, board_name): def load_config(self): """Load configuration from board-specific file""" if not self.config_file.exists(): - log("ERROR", f"Board config not found: {self.config_file}") + Util.log("ERROR", f"Board config not found: {self.config_file}") return with open(self.config_file, 'r') as f: @@ -43,7 +43,7 @@ def _parse_list(self, value): return shlex.split(value) except ValueError as e: # Fallback to simple split if shlex fails (e.g., unmatched quotes) - log("WARNING", f"Failed to parse config value '{value}' with shlex: {e}") + Util.log("WARNING", f"Failed to parse config value '{value}' with shlex: {e}") return value.split() return [] diff --git a/traces/docker_stf_trace_gen/utils/docker_orchestrator.py b/traces/docker_stf_trace_gen/utils/docker_orchestrator.py new file mode 100644 index 00000000..f5499420 --- /dev/null +++ b/traces/docker_stf_trace_gen/utils/docker_orchestrator.py @@ -0,0 +1,67 @@ + +import argparse +import os +import docker +from typing import Optional +from data.consts import Const + + +class DockerOrchestrator(): + def __init__(self, args: argparse.Namespace) -> None: + self.docker_image_name = args.image_name if args.image_name else Const.DOCKER_IMAGE_NAME + self.docker_client = docker.from_env() + + def run_command(self, command: str, binds: Optional[dict[str, str]]) -> str: + volumes = {} + for host_path, docker_path in binds.items(): + host_folder = os.path.dirname(host_path) + docker_folder = os.path.dirname(docker_path) + volumes[host_folder] = {"bind": docker_folder, "mode": "rw"} + + print(command) + container = None + try: + container = self.docker_client.containers.run( + image=self.docker_image_name, + command=["bash", "-c", command], + volumes=volumes, + detach=True, + stdout=True, + stderr=True + ) + + exit_code = container.wait()["StatusCode"] + stdout_logs = container.logs(stdout=True, stderr=False) + stderr_logs = container.logs(stdout=False, stderr=True) + + if exit_code != 0: + print("Exit code:", exit_code) + print("STDOUT:\n", stdout_logs.decode()) + print("STDERR:\n", stderr_logs.decode()) + + finally: + try: + container.remove(force=True) + except Exception as e: + print("Cleanup failed:", e) + return stdout_logs + + def run_stf_tool(self, tool: str, host_input: str, host_output: Optional[str] = None): + docker_input = self.convert_host_path_to_docker_path(host_input) + binds = { + host_input: docker_input, + } + + tool_path = os.path.join(Const.STF_TOOLS, tool, tool) + cmd = f"{tool_path} {docker_input}" + result = self.run_command(cmd, binds) + if host_output is not None: + with open(host_output, "wb") as f: + f.write(result) + + return result + + def convert_host_path_to_docker_path(self, path: str) -> str: + parts = os.path.abspath(path).strip(os.sep).split(os.sep) + parts.insert(0, Const.DOCKER_TEMP_FOLDER) + return os.path.join(*parts) diff --git a/traces/docker_stf_trace_gen/utils/trace_generator_arg_parser.py b/traces/docker_stf_trace_gen/utils/trace_generator_arg_parser.py new file mode 100644 index 00000000..2394b66c --- /dev/null +++ b/traces/docker_stf_trace_gen/utils/trace_generator_arg_parser.py @@ -0,0 +1,72 @@ +import argparse +import sys +from data.consts import Const + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Generate traces for a workload", + usage='python generate_trace.py [OPTIONS] WORKLOAD_PATH' + ) + parser.add_argument("--emulator", required=True, choices=["spike", "qemu"]) + parser.add_argument("--isa", required=False, help="Instruction set architecture") + parser.add_argument("--dump", action='store_true', required=False, default=False, help="Create trace file dump") + parser.add_argument("--pk", action='store_true', required=False, default=False, help="Use Spike pk (proxy kernel)") + parser.add_argument( + "--image-name", + required=False, + default=Const.DOCKER_IMAGE_NAME, + help=f"Custom docker image name. default: {Const.DOCKER_IMAGE_NAME}") + parser.add_argument('-o', '--output', required=False, help='Output folder or file path') + + subparsers = parser.add_subparsers(title='Mode', dest='mode') + subparsers.add_parser( + 'macro', + help='Trace mode using START_TRACE and STOP_TRACE macros on the workload binary', + description='Trace mode using START_TRACE and STOP_TRACE macros on the workload binary', + formatter_class=argparse.RawTextHelpFormatter + ) + + inst_count_mode_parser = subparsers.add_parser( + 'insn_count', + help='Traces a fixed number of instructions, after a given start instruction index', + description='Traces a fixed number of instructions, after a given start instruction index', + formatter_class=argparse.RawTextHelpFormatter + ) + inst_count_mode_parser.add_argument( + "--num-instructions", + required=True, + type=int, + help="Number of instructions to trace") + inst_count_mode_parser.add_argument( + "--start-instruction", + required=True, + type=int, + default=0, + help="Number of instructions to skip before tracing (insn_count mode)") + + pc_mode_parser = subparsers.add_parser( + 'pc_count', + help='Traces a fixed number of instructions, after a given PC value and PC hits count', + description='Traces a fixed number of instructions, after a given PC value and PC hits count', + formatter_class=argparse.RawTextHelpFormatter + ) + pc_mode_parser.add_argument("--num-instructions", required=True, type=int, help="Number of instructions to trace") + pc_mode_parser.add_argument("--start-pc", required=True, type=int, help="Starting program counter (pc_count mode)") + pc_mode_parser.add_argument( + "--pc-threshold", + required=True, + type=int, + default=1, + help="PC hit threshold (pc_count mode)") + + parser.add_argument("workload", help="Path to workload file") + + if len(sys.argv) == 1: + parser.print_help() + print("\nRun 'trace_share COMMAND --help' for more information on a command.") + print("\nFor more help on how to use trace_share, head to GITHUB_README_LINK") + sys.exit(0) + + args = parser.parse_args() + return args diff --git a/traces/docker_stf_trace_gen/utils/util.py b/traces/docker_stf_trace_gen/utils/util.py index 3d0cf66f..50e804f0 100644 --- a/traces/docker_stf_trace_gen/utils/util.py +++ b/traces/docker_stf_trace_gen/utils/util.py @@ -1,3 +1,4 @@ +import hashlib import sys import subprocess import time @@ -7,6 +8,7 @@ import logging from enum import Enum + class LogLevel(Enum): INFO = "\033[32m" ERROR = "\033[31m" @@ -14,64 +16,87 @@ class LogLevel(Enum): DEBUG = "\033[34m" HEADER = "\033[95m\033[1m" + logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(levelname)s: %(message)s", datefmt="%H:%M:%S") -def log(level: LogLevel, msg: str, file=sys.stdout): - """Log with ANSI color and timestamp; raises on ERROR.""" - color = level.value - print(f"{color}{msg}\033[0m", file=file if level != LogLevel.ERROR else sys.stderr) - if level == LogLevel.ERROR: - exit(1) - raise RuntimeError(msg) # do we want to raise here? - +class Util(): + @staticmethod + def compute_sha256(file_path: str) -> str: + hash_sha256 = hashlib.sha256() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_sha256.update(chunk) + return hash_sha256.hexdigest() + + @staticmethod + def log(level: LogLevel, msg: str, file=sys.stdout): + """Log with ANSI color and timestamp; raises on ERROR.""" + color = level.value + print(f"{color}{msg}\033[0m", file=file if level != LogLevel.ERROR else sys.stderr) + if level == LogLevel.ERROR: + exit(1) + raise RuntimeError(msg) # do we want to raise here? -def run_cmd(cmd: List[str], cwd: Optional[Path] = None, timeout: int = 300, show: bool = True) -> Tuple[bool, str, str]: - """Run command, return (success, stdout, stderr).""" - if show: - log(LogLevel.DEBUG, f"Running: {' '.join(map(str, cmd))}") - try: - result = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True, timeout=timeout, check=False) - if not result.returncode == 0: - log(LogLevel.ERROR, f"Command failed: {result.stderr}") - return result.returncode == 0, result.stdout, result.stderr - except subprocess.TimeoutExpired: - log(LogLevel.ERROR, f"Timeout after {timeout}s") - except Exception as e: - log(LogLevel.ERROR, f"Exception: {e}") + @staticmethod + def run_cmd( + cmd: List[str], + cwd: Optional[Path] = None, + timeout: int = 300, + show: bool = True + ) -> Tuple[bool, str, str]: + """Run command, return (success, stdout, stderr).""" + if show: + Util.log(LogLevel.DEBUG, f"Running: {' '.join(map(str, cmd))}") + try: + result = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True, timeout=timeout, check=False) + if not result.returncode == 0: + Util.log(LogLevel.ERROR, f"Command failed: {result.stderr}") + return result.returncode == 0, result.stdout, result.stderr + except subprocess.TimeoutExpired: + Util.log(LogLevel.ERROR, f"Timeout after {timeout}s") + except Exception as e: + Util.log(LogLevel.ERROR, f"Exception: {e}") -def get_time() -> float: - """Return current time in seconds.""" - return time.time() + @staticmethod + def get_time() -> float: + """Return current time in seconds.""" + return time.time() -def ensure_dir(path: Path) -> Path: - """Create directory if it doesn't exist.""" - path.mkdir(parents=True, exist_ok=True) - return path + @staticmethod + def ensure_dir(path: Path) -> Path: + """Create directory if it doesn't exist.""" + path.mkdir(parents=True, exist_ok=True) + return path -def clean_dir(path: Path) -> Path: - """Clean and recreate directory.""" - if path.exists(): - shutil.rmtree(path) - return ensure_dir(path) + @staticmethod + def clean_dir(path: Path) -> Path: + """Clean and recreate directory.""" + if path.exists(): + shutil.rmtree(path) + return Util.ensure_dir(path) -def validate_tool(tool: str): - """Check if tool is in PATH.""" - if not shutil.which(tool): - log(LogLevel.ERROR, f"Tool not found: {tool}") - return False - return True + @staticmethod + def validate_tool(tool: str): + """Check if tool is in PATH.""" + if not shutil.which(tool): + Util.log(LogLevel.ERROR, f"Tool not found: {tool}") + return False + return True -def file_exists(path: Path | str) -> bool: - """Check if file exists.""" - return Path(path).exists() + @staticmethod + def file_exists(path: Path | str) -> bool: + """Check if file exists.""" + return Path(path).exists() -def read_file_lines(path: Path) -> List[str]: - """Read non-empty lines from file.""" - if not file_exists(path): - log(LogLevel.ERROR, f"File not found: {path}") - return [line.strip() for line in path.read_text().splitlines() if line.strip()] + @staticmethod + def read_file_lines(path: Path) -> List[str]: + """Read non-empty lines from file.""" + if not Util.file_exists(path): + Util.log(LogLevel.ERROR, f"File not found: {path}") + return [line.strip() for line in path.read_text().splitlines() if line.strip()] -def write_file_lines(path: Path, lines: List[str]): - """Write lines to file.""" - path.write_text("\n".join(lines) + "\n") \ No newline at end of file + @staticmethod + def write_file_lines(path: Path, lines: List[str]): + """Write lines to file.""" + path.write_text("\n".join(lines) + "\n") diff --git a/traces/stf_metadata/README.md b/traces/stf_metadata/README.md index ab1c89ba..c85c0d46 100644 --- a/traces/stf_metadata/README.md +++ b/traces/stf_metadata/README.md @@ -1,7 +1,5 @@ # STF (Simulation Trace Format) Metadata Specification -> This specification is still under development. - ## Table of Contents 1. [Introduction](#introduction) @@ -47,6 +45,7 @@ stf: GEN_VERSION: "Trace generator version" GEN_COMMENT: "Trace generator comment (e.g., git commit SHA for spike-stf)" STF_FEATURES: "list of STF features in the STF file" + interval_mode: "Tracing Strategy. (Fully traced, Macro defined start and stop, Instructions count, Program counter count)" trace_interval: instruction_pc: "Program counter (PC) value at the start of the trace" pc_count: "Program counter execution count value present at the start of the trace" @@ -91,10 +90,8 @@ stf: - STF_CONTAIN_PHYSICAL_ADDRESS - STF_CONTAIN_RV64 - STF_CONTAIN_EVENT64 + interval_mode: inst_count trace_interval: - instruction_pc: 0 - pc_count: 0 - interval_length: 100 start_instruction_index: 0 end_instruction_index: 100 ```