diff --git a/.gitignore b/.gitignore
index 578e573d..145c1bd0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,4 +41,8 @@
 *~
 
 # Python cache file
-__pycache__/
\ No newline at end of file
+__pycache__/
+
+
+# output generated by the end to end flow
+outputs
diff --git a/traces/docker_stf_trace_gen/Dockerfile b/traces/docker_stf_trace_gen/Dockerfile
index 478dfb45..52952e8c 100644
--- a/traces/docker_stf_trace_gen/Dockerfile
+++ b/traces/docker_stf_trace_gen/Dockerfile
@@ -1,17 +1,17 @@
 FROM ubuntu:24.04
 
-# Set environment variables early
+# Set environment variables early (separate lines)
 ENV RISCV=/riscv
-ENV QEMU_DIR=/qemu
 ENV QEMU_PLUGINS=/qemu/build/contrib/plugins
-ENV PATH=$RISCV/bin:/opt/riscv/riscv32-elf/bin:/opt/riscv/riscv64-elf/bin:/opt/riscv/riscv32-glibc/bin:/SimPoint/bin:/qemu/build:$PATH 
 ENV DEBIAN_FRONTEND=noninteractive
 ENV WORKDIR=/workspace
 ENV WORKLOADS=/workloads
-ENV OUTPUT=/output
+ENV OUTPUT=/outputs
+ENV PATH=$RISCV/bin:/opt/riscv/riscv32-elf/bin:/opt/riscv/riscv64-elf/bin:/opt/riscv/riscv32-glibc/bin:/SimPoint/bin:/qemu/build:$PATH
+ENV STF_DIR=$RISCV/condor.riscv-isa-sim/stf_lib
 
 # Install dependencies and clean up in one layer
-RUN apt update && apt install -y \
+RUN apt-get update && apt-get install -y \
     autoconf \
     automake \
     autotools-dev \
@@ -54,7 +54,10 @@ RUN apt update && apt install -y \
     wget \
     zlib1g-dev \
     zstd \
-    python3-yaml
+    python3-yaml \
+    python3-pyelftools \
+ && rm -rf /var/lib/apt/lists/* \
+ && apt-get clean
 
 # Configure git for building
 RUN git config --global url."https://github.com/".insteadOf "git@github.com:" && \
@@ -62,7 +65,7 @@ RUN git config --global url."https://github.com/".insteadOf "git@github.com:" &&
     git config --global user.name "Docker Builder"
 
 # Create directory structure
-RUN mkdir -p /output
+RUN mkdir -p /workloads /outputs /workspace $RISCV
 
 # Clone repositories in RISCV directory
 WORKDIR $RISCV
@@ -83,11 +86,11 @@ RUN git clone https://github.com/riscv-software-src/riscv-tests.git && \
 
 # Copy and execute toolchain setup script
 COPY utils/get-tool.sh $RISCV/get-tool.sh
-RUN chmod +x $RISCV/get-tool.sh && \
-    $RISCV/get-tool.sh && \
-    echo "Toolchain version:" && \
-    riscv64-unknown-linux-gnu-gcc --version 2>/dev/null || echo "Toolchain setup pending"
+RUN chmod +x $RISCV/get-tool.sh
+RUN $RISCV/get-tool.sh || true
+RUN echo "Toolchain version:" && (riscv64-unknown-linux-gnu-gcc --version 2>/dev/null || echo "Toolchain setup pending")
 
+RUN mkdir -p /qemu/build
 # Build QEMU with plugins support
 WORKDIR /qemu/build
 RUN ../configure \
@@ -107,16 +110,15 @@ RUN make -j$(nproc)
 WORKDIR $RISCV/stf_tools
 RUN git submodule update --init --recursive
 WORKDIR $RISCV/stf_tools/release
-RUN cmake .. -DCMAKE_BUILD_TYPE=Release && make -j$(nproc)
+RUN cmake .. -DCMAKE_BUILD_TYPE=Release
+RUN make -j$(nproc)
 
 # Build condor.riscv-isa-sim (Spike)
-# make the builddir ? 
 WORKDIR $RISCV/condor.riscv-isa-sim/build
 RUN ../configure --prefix=$RISCV/condor.riscv-isa-sim/install 
 RUN make -j$(nproc) 
 RUN make regress 
 RUN make install
-ENV STF_DIR=$RISCV/condor.riscv-isa-sim/stf_lib
 
 # Create mount points for runtime mounting
 # Environment and flow scripts will be mounted at runtime
@@ -128,18 +130,17 @@ RUN mkdir -p /workloads/environment /flow /outputs
 # - Host outputs -> /outputs
 
 RUN cp $RISCV/condor.riscv-isa-sim/install/bin/spike /usr/bin/
-
 WORKDIR $RISCV/trace-gen
-RUN make 
+RUN make
 RUN make install
 
+# Build riscv-pk (to provide pk for Spike if requested)
+WORKDIR $RISCV/riscv-pk
+RUN mkdir -p build
 WORKDIR $RISCV/riscv-pk/build
-RUN mkdir $RISCV/pk
-RUN ../configure --prefix=$RISCV/pk --host=riscv64-unknown-elf
+RUN ../configure --host=riscv64-unknown-elf
 RUN make -j$(nproc)
-RUN make install
-ENV PATH=$RISCV/pk:$PATH 
-
-WORKDIR /workspace 
 
 CMD ["/bin/bash"]
+
+# Volumes are mounted at runtime
diff --git a/traces/docker_stf_trace_gen/README.md b/traces/docker_stf_trace_gen/README.md
index 6e5605b0..a054e929 100644
--- a/traces/docker_stf_trace_gen/README.md
+++ b/traces/docker_stf_trace_gen/README.md
@@ -1,6 +1,11 @@
 # RISC-V Workload Analysis System
 
-A unified system for building and running RISC-V benchmarks with Spike and QEMU emulators, providing seamless switching between emulators and comprehensive trace generation for performance modeling.
+A unified system for building and running RISC-V benchmarks with Spike and QEMU emulators, providing seamless switching between emulators and comprehensive trace generation for performance model
+
+Reproducability is the goal. 
+Along with a trace archive which records information such as:
+  - binary hash, STF Trace, Simpointed Traces...
+
 
 ## Table of Contents
 
@@ -27,209 +32,154 @@ The toolchain uses [riscv-gnu-toolchain](https://github.com/riscv-collab/riscv-g
 
 
 ```mermaid
-flowchart TD
-    %% Main workflow
-    A[Workload Sources<br/>embench-iot, riscv-tests] --> B[Build System<br/>build_workload.py]
-    B --> C[Emulator Execution<br/>run_workload.py]
-    
-    %% Emulator paths - QEMU mainly for BBV
-    C --> C1[QEMU<br/>BBV Generation]
-    C --> C2[Spike<br/>STF Trace Generation]
-    
-    %% BBV from QEMU
-    C1 --> D[.bbv files<br/>Basic Block Vectors]
-    
-    %% Analysis options
-    D --> E[SimPoint Analysis<br/>run_simpoint.py]
-    D --> S1[Workload Slicing<br/>Work in Progress]
-    
-    %% SimPoint outputs guide Spike STF generation
-    E --> E1[.simpoints + .weights]
-    S1 --> S2[Slice Parameters]
-    
-    %% Generate final STF traces using Spike based on analysis
-    E1 --> ST[Spike STF Generation<br/>SimPoint-guided]
-    S2 --> ST2[Spike STF Generation<br/>Slice-guided]
-    C2 --> D2[Full STF Traces<br/>.zstf files]
-    
-    %% Final reduced traces
-    ST --> F[Reduced STF Traces<br/>SimPoint-based]
-    ST2 --> F2[Reduced STF Traces<br/>Slice-based]
-    
-    %% Trace Archive Integration
-    F --> TA[Trace Archive<br/>trace_archive.py]
-    F2 --> TA
-    D2 --> TA
-    TA --> TA1[Upload/Download<br/>with Metadata]
-    TA --> TA2[Community Archive<br/>Shared Storage]
-    
-    %% Performance Model - Direct and Archive paths
-    F --> G[RISC-V Performance Model<br/>Olympia - STF Only]
-    F2 --> G
-    D2 --> G
-    TA2 --> G
-
-    %% Configuration inputs
-    B1[board.cfg<br/>Compiler flags] --> B
-    B2[Environment Files<br/>crt0.S, main.c, stub.c] --> B
-    B3[Workload Sources<br/>benchmark functions] --> B
-
-    %% Docker environment
-    subgraph Docker ["Docker Container Environment"]
-        B
-        C
-        D
-        E
-        S1
-        ST
-        ST2
-    end
-
-    %% Output persistence
-    subgraph Outputs ["./outputs/ (Host Mounted)"]
-        D
-        D2
-        E1
-        S2
-        F
-        F2
-    end
-
-    %% Trace Archive System
-    subgraph Archive ["Trace Archive System"]
-        TA
-        TA1
-        TA2
-    end
-
-    %% Styling
-    classDef primary fill:#e1f5fe,stroke:#01579b,stroke-width:2px
-    classDef config fill:#f3e5f5,stroke:#4a148c,stroke-width:2px
-    classDef emulator fill:#e8f5e8,stroke:#1b5e20,stroke-width:2px
-    classDef output fill:#fff3e0,stroke:#e65100,stroke-width:2px
-    classDef wip fill:#fff9c4,stroke:#f57f17,stroke-width:2px,stroke-dasharray: 5 5
-    classDef archive fill:#f1f8e9,stroke:#33691e,stroke-width:2px
-    classDef performance fill:#fce4ec,stroke:#880e4f,stroke-width:2px
-
-    class A,B,C,D,E,ST,ST2 primary
-    class B1,B2,B3 config
-    class C1,C2 emulator
-    class D,D2,E1,S2,F,F2 output
-    class S1 wip
-    class TA,TA1,TA2 archive
-    class G performance
-```
-Dependencies:
-- `board.cfg` files define compiler flags and source files
-- Environment files provide runtime support (crt0.S, main.c, stub.c)
-- Workload-specific sources with benchmark functions
-- Docker containers provide consistent build environment
+flowchart TB
+classDef node fill
 
+%% Start
+A[Start]:::node --> B[Build]:::node
 
-## Current Development Roadmap:
+%% Build split
+B --> C[Compile from source<br/>]:::node
+B --> D[Link existing OBJ/ELF<br/>--input-obj + --entrypoint]:::node
 
-**Work to be done to add flow for:**
-- Workload reduction through checkpoint-based slicing (e.g., functional warming, live-cache checkpoints, time-based sampling)
-- Workload reduction through SimPoint analysis results
-- Add visualizations for clustering results if required to understand the projection and clusters
-- Combine with the flow to generate trace metadata and upload to common trace archive
-- Combine with the flow to interact with trace archive and run traces on Olympia
+%% Artifact
+C --> E[OBJ/ELF artifact<br/>board.yaml: flags, includes, env]:::node
+D --> E
 
-**Additional enhancements:**
-- Add support for real world workloads (CoreMark, SPEC etc.)
-## Primary Developments
+%% Run
+E --> F[run_workload.py<br/>→ .bbv / .zstf]:::node
 
-**Config-driven approach** - No hardcoded compiler flags, everything configured through `board.cfg` files, easier to configure build time flags.
-**Persistent outputs** - Results saved to `./outputs/` and persist after container exit  
-**Runtime mounting** - Scripts mounted at runtime, to allow access from host device
-**Modular design** - Easy addition of new workloads and boards
-**Logging to allow Debugging** - Logs the commands executed in the flow to reproduce and understand errors.
+%% Outputs
+F --> G[.bbv]:::node
+F --> H[full .zstf]:::node
+
+%% Analyze
+G --> I[run_simpoint.py<br/>→ .simpoints &amp; .weights]:::node
+I --> J[generate_trace.py<br/>sliced STF]:::node
+
+%% Model
+J --> K[run_olympia.py<br/>sliced STF or full STF]:::node
+H --> K
 
+%% End
+K --> L[End]:::node
+
+```
+Dependencies:
+- `board.yaml` files define compiler flags and source files
+- Environment files provide runtime support (crt0.S, main.c, stub.c)
+- Workload-specific sources with benchmark functions
+- Docker containers provide consistent build environment
 
 ## Quick Start
 
 ### 1. Build Docker Image
 ```bash
-docker build -t riscv-perf-model:latest .
+git clone https://github.com/Jatin-exe/riscv-perf-model.git
+cd traces/docker_stf_trace_gen
+docker build -t riscv-perf-model:olympia .
 ```
 
-### 2. Interactive Workflow
+### 2. End to End flow
 ```bash
-./full_flow.py
+./full_flow.py --workload embench-iot --emulator spike --arch rv32 --platform baremetal --bbv --trace --simpoint --interval-size 10000 --slice --benchmark matmult-int
 ```
-
-This provides an interactive interface allowing seamless workflow management.
+Interactive mode asks all settings up-front (workload vs wrapper-link, emulator, arch, platform, BBV/trace/simpoint/sliced vs full-trace, Olympia), then runs the selected pipeline end-to-end.
 
 ### 3. Interactive Container Access
 ```bash
-# Option 1: Helper script
+# Option 1: Helper script, mounts the output to the host for easy access.
 ./run_interactive.sh
-
-# Option 2: Manual command
-docker run --rm -it \
-    -v "$(pwd)/outputs:/outputs" \
-    -v "$(pwd):/flow" \
-    -v "$(pwd)/environment:/workloads/environment" \
-    -v "$(pwd)/../workloads:/workloads" \
-    -w /flow \
-    riscv-perf-model:latest bash
 ```
 
-### 4. Results Structure
+
+
+### Mounts and Directories
+- `/flow`: your repo (scripts live here)
+- `/default/environment`: minimal baremetal board environment (crt0.S, main.c, stub.c, util.c, link.ld)
+- `/default`: default workload suites in the image (e.g., `/default/embench-iot`, `/default/riscv-tests`)
+- `/workloads`: optional host-provided workload mount (if present)
+- `/outputs`: all outputs (build artifacts, BBV, SimPoint, slices, Olympia reports)
+  - Built binaries: `/outputs/<emulator>/bin/<workload>/<benchmark>/<benchmark>`
+  - Run outputs: `/outputs/<emulator>/<workload>/<benchmark>/{bbv,traces,logs}`
+  - SimPoint: `/outputs/simpoint_analysis/<bench>.{simpoints,weights}`
+  - Sliced STF: `/outputs/simpointed/<emulator>/<workload>/<benchmark>/*.zstf`
+  - Olympia: `/outputs/olympia_reports/<emulator>/<workload>/<benchmark>/*`
+
+### 4. Results & Logs Structure
 ```
 ./outputs/
-├── spike_output/              # Spike execution results
-│   ├── results.txt           # Timing and code size summary
-│   ├── logs/                 # Execution logs per workload
-│   │   ├── md5sum.log
-│   │   └── slre.log
-│   ├── bbv/                  # Basic Block Vector files
-│   │   ├── md5sum.bbv_cpu0   # BBV data for SimPoint
-│   │   └── slre.bbv_cpu0
-│   └── traces/               # STF trace files (detailed)
-│       ├── md5sum.zstf       # Compressed STF traces
-│       └── slre.zstf
-├── qemu_output/              # QEMU execution results  
-│   ├── results.txt
-│   ├── logs/
-│   ├── bbv/                  # BBV files from plugin
-│   │   ├── md5sum_bbv.0.bb
-│   │   └── slre_bbv.0.bb
-│   └── traces/               # Assembly traces (basic)
-│       ├── md5sum_trace.log  # Simple assembly output
-│       └── slre_trace.log
-└── simpoint_output/          # SimPoint analysis results
-    ├── md5sum.simpoints      # Representative intervals
-    ├── md5sum.weights        # Interval weights  
-    └── analysis_summary.json # Complete workflow summary
+├── spike/                    # Spike execution results
+│   └── embench-iot/
+│       └── md5sum/
+│           ├── bbv/md5sum.bbv
+│           ├── traces/md5sum.zstf
+│           └── logs/  # emulator stdout/stderr are captured here
+├── qemu/                     # QEMU execution results
+│   └── riscv-tests/
+│       └── dhrystone/
+│           ├── bbv/dhrystone.bbv.0.bb
+│           └── logs/
+├── olympia_reports/          # Olympia reports from STF traces
+│   ├── md5sum.txt
+│   └── slre.txt
+└── spike/                    # Built artifacts (per-emulator bin under each)
+    └── bin/embench-iot/md5sum/
+        ├── obj/*.o
+        └── md5sum            # Executable
 ```
 
-## Configuration System
 
-### Board Configuration Files
-Located in `environment/{board}/board.cfg`, using space-separated values:
 
-```ini
-[DEFAULT]
-board = spike
-cc = riscv32-unknown-elf-gcc
-defines = SPIKE=1 CPU_MHZ=1
-
-[rv32.baremetal]
-base_cflags = -march=rv32imafdc -mabi=ilp32d -mcmodel=medany
-base_ldflags = -march=rv32imafdc -mabi=ilp32d -nostartfiles
-libs = -lc -lm
-
-[embench-iot]
-workload_cflags = -Dtrue=1 -Dfalse=0
-environment_files = crt0.S main.c stub.c util.c
+## Primary Developments
 
-[riscv-tests]  
-workload_cflags = -std=gnu99 -Wno-implicit-int
-environment_files = crt0.S main.c stub.c util.c
+**Config-driven approach** - No hardcoded compiler flags, everything configured through `board.yaml` files, easier to configure build time flags.
+**Persistent outputs** - Results saved to `./outputs/` and persist after container exit  
+**Runtime mounting** - Scripts mounted at runtime, to allow access from host device
+**Modular design** - Easy addition of new workloads and boards
+**Logging to allow Debugging** - Logs the commands executed in the flow to reproduce and understand errors.
+
+
+
+## Workloads: Default vs Custom
+
+- Default suites are baked into the image under `/default`: `embench-iot`, `riscv-tests`, `dhrystone`.
+- Custom suites on the host can be placed under `./workloads` and will mount to `/workloads` in the container.
+- Build always prefers `/workloads/<suite>` when present, else falls back to `/default/<suite>`.
+
+## Configuration System
+
+### Board Configuration Files
+Located in `environment/{board}/board.yaml`, using hierarchical YAML:
+
+```yaml
+board: spike
+defaults:
+  cc: riscv32-unknown-elf-gcc
+  defines: ["CPU_MHZ=1", "WARMUP_HEAT=1"]
+architectures:
+  rv32:
+    platforms:
+      baremetal:
+        cc: riscv32-unknown-elf-gcc
+        arch: rv32imafdc
+        abi: ilp32d
+        base_cflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-mcmodel=medany", "-mno-relax", "-mstrict-align"]
+        base_ldflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-nostartfiles", "-Wl,--no-warn-rwx-segments"]
+workloads:
+  embench-iot:
+    workload_cflags: ["-Dtrue=1", "-Dfalse=0"]
+    platforms:
+      baremetal:
+        environment_files: ["crt0.S", "main.c", "stub.c", "util.c"]
+features:
+  bbv:
+    bbv_cflags: ["-DBBV"]
+  trace:
+    trace_cflags: ["-DTRACE"]
 ```
 
-Compiler flags can be changed by editing `/workloads/environment/{qemu,spike}/board.cfg` files. This allows defining specific compiler flags for any workload, source files, and header files, enabling cleaner addition of new workloads without hardcoding compiler options.
+Compiler flags and ISA are derived from these YAML configs at build/run time.
 
 ### Adding New Workloads
 
@@ -242,6 +192,27 @@ environment_files = crt0.S main.c stub.c util.c
 
 Goal here is to get easier additions of newer workloads by adding their compilation settings.
 
+## Using Precompiled Binaries or Objects
+
+Option A: Run an existing ELF directly
+
+```bash
+python3 flow/run_workload.py --emulator spike --arch rv32 --platform baremetal \
+  --binary /path/to/your.elf --bbv --interval-size 10000
+```
+
+Option B: Link objects with environment wrapper and a custom entrypoint
+
+```bash
+python3 flow/build_workload.py --input-obj your.o another.o --entrypoint your_main \
+  --arch rv32 --platform baremetal --emulator spike
+# Then run the produced binary under /outputs/<emu>/bin/custom/<name>/<name>
+python3 flow/run_workload.py --emulator spike --arch rv32 --platform baremetal \
+  --binary /outputs/spike/bin/custom/<name>/<name> --bbv --interval-size 10000
+```
+
+The runner discovers binaries under `/outputs/<emulator>/bin` and generates outputs under `/outputs/<emulator>/<workload>/<benchmark>/`.
+
 ## Command Examples
 
 ### Interactive Mode
@@ -251,23 +222,31 @@ python3 full_flow.py
 
 ### Direct Script Usage
 
-It is required to build your workload with `--bbv` and `--trace` if generating BBV and traces on spike (See [doc/bbv-trace](doc/bbv-trace))
-
+Build with `--bbv` and/or `--trace` to enable instrumentation. Spike produces STF `.zstf` traces. QEMU can produce STF via plugin.
 
 ```bash
-# Build Embench Workload with instrumentation for BBV generation and Trace
-python3 build_workload.py --workload embench-iot --board spike --arch rv32 --bbv --trace
+# Build Embench workload for Spike w/ BBV + STF trace
+python3 flow/build_workload.py --workload embench-iot --emulator spike --arch rv32 --bbv --trace
+
+# Run on Spike, generate BBV + STF
+python3 flow/run_workload.py --emulator spike --arch rv32 --workload embench-iot --bbv --trace --clean
 
-# Run the compiled Embench workload on spike with BBV and Trace generation 
-python3 run_workload.py --emulator spike --arch rv32 --bbv --trace
+# Run on QEMU, generate BBV and STF via plugin (configurable num instructions)
+python3 flow/run_workload.py --emulator qemu --arch rv64 --workload riscv-tests --bbv --trace \
+  --trace-num-instructions 2000000 --trace-start-instruction 0
 ```
 
+Important: BBV Interval Size
+- The BBV interval size (`--interval-size`) sets SimPoint windowing for BBV collection on Spike and QEMU.
+- If the workload is very small and the interval size is very large, the BBV file can end up empty (no full windows observed). Reduce interval size.
+- For testing SimPoint end-to-end, use a small interval like `--interval-size 10000` to ensure BBV and SimPoint outputs are generated quickly.
+
 ## Environment Structure
 
 Essential files providing the baremetal runtime:
 ```
 environment/{board}/
-├── board.cfg    # Configuration (space-separated flags, sources)
+├── board.yaml   # Configuration (YAML, flags, sources)
 ├── main.c       # Unified main with board support
 ├── crt0.S       # Startup assembly providing _start
 ├── link.ld      # Linker script for memory layout
@@ -277,6 +256,30 @@ environment/{board}/
 
 The `_start` symbol in crt0.S sets up the baremetal environment, then calls `env_main` which invokes benchmark functions. These functions are designed to be overwritten by the benchmarks being linked. The stub.c provides stub implementations so newlib compiles seamlessly for baremetal workloads.
 
+## CLI Arguments
+
+- flow/build_workload.py
+  - `--workload`: suite (embench-iot | riscv-tests | dhrystone)
+  - `--emulator`: spike | qemu
+  - `--arch`: rv32 | rv64
+  - `--platform`: baremetal | linux
+  - `--benchmark`: build only a specific benchmark (optional)
+  - `--bbv`, `--trace`: add instrumentation
+  - DEBUG=1 prints parsed config (cc/cflags/ldflags)
+
+- flow/run_workload.py
+  - `--emulator`: spike | qemu
+  - `--arch`, `--platform`
+  - `--workload`: suite
+  - `--benchmark`: specific benchmark (optional; if omitted, runs all)
+  - `--bbv`, `--trace`, `--interval-size`
+  - `--trace-num-instructions`, `--trace-start-instruction` (QEMU STF plugin)
+
+- flow/run_simpoint.py
+  - `--emulator`: spike | qemu
+  - `--workload`: suite
+  - `--max-k`: SimPoint K
+
 ## Performance Comparison
 
 Recent benchmarking results show QEMU performance advantages:
@@ -298,9 +301,37 @@ QEMU advantage: 2.70x faster
 More in [doc/emulator-comparison](doc/emulator-comparison)
 
 
-### STF Trace Generation
+### Trace Generation
+- Spike: Detailed STF (`.zstf`) via `--stf_macro_tracing`.
+- QEMU: STF via plugin (`/usr/lib/libstfmem.so`) using dyn_insn_count mode; configurable with `--trace-num-instructions` and `--trace-start-instruction`.
+
+Olympia (riscv-perf-model) is built into the image and exposed as `olympia`.
+
+Generate STF (sliced or full) inside the container:
+
+Inside the container, prefer the container-native generator (uses `/outputs` state):
+```
+# SimPoint-sliced STF for Spike; auto-reads interval_size from run_meta.json
+python3 flow/generate_trace.py --emulator spike --workload embench-iot --benchmark aha-mont64 --sliced --verify --dump --clean
+
+# One-shot slice of N instructions (Spike or QEMU) starting at 0
+python3 flow/generate_trace.py --emulator spike --workload embench-iot --benchmark aha-mont64 --interval-size 2000000 --clean
+
+# Run Olympia on sliced traces
+python3 flow/run_olympia.py --dir /outputs/simpointed/spike/embench-iot/aha-mont64 --interval 10000 --clean
+```
+
+## Current Development Roadmap:
+
+**Work to be done to add flow for:**
+- Add visualizations for clustering results if required to understand the projection and clusters
+- Combine with the flow to generate trace metadata and upload to common trace archive
+- Combine with the flow to interact with trace archive and run traces on Olympia
+
+**Additional enhancements:**
+- Add support for real world workloads (CoreMark, SPEC etc.)
+
 
-Read the [generate trace](generate_trace.md) file for details.
 
 ## Documentation
 
@@ -308,4 +339,4 @@ Read the [generate trace](generate_trace.md) file for details.
 - [doc/adding-workloads.md](doc/adding-workloads.md) - Guide for integrating new benchmark suites  
 - [doc/bbv-trace.md](doc/bbv-trace.md) - BBV generation and trace integration details
 
-Code runs inside Docker containers with results stored in the `./outputs/` folder, mounted to `/outputs` in the container. This provides reproducibility and convenience without requiring the Docker container environment for result access.
\ No newline at end of file
+Code runs inside Docker containers with results stored in the `./outputs/` folder, mounted to `/outputs` in the container. This provides reproducibility and convenience without requiring the Docker container environment for result access.
diff --git a/traces/docker_stf_trace_gen/build_workload.py b/traces/docker_stf_trace_gen/build_workload.py
deleted file mode 100755
index ad8c3596..00000000
--- a/traces/docker_stf_trace_gen/build_workload.py
+++ /dev/null
@@ -1,164 +0,0 @@
-#!/usr/bin/env python3
-"""Builds RISC-V workloads using configuration-driven approach."""
-import argparse
-from pathlib import Path
-from typing import List
-from utils.util import Util, LogLevel
-from utils.config import BoardConfig
-
-DEFAULT_WORKLOADS = {
-    "embench-iot": "/workloads/embench-iot",
-    "riscv-tests": "/workloads/riscv-tests",
-    "dhrystone": "/workloads/riscv-tests"
-}
-
-class WorkloadBuilder:
-    """Manages building of RISC-V workloads."""
-    def __init__(self, board: str, arch: str, platform: str, bbv: bool, trace: bool):
-        self.board = board
-        self.arch = arch
-        self.platform = platform
-        self.bbv = bbv
-        self.trace = trace
-        self.config = BoardConfig(board)
-        self.bin_dir = Util.clean_dir(Path(f"/workloads/bin/{board}"))
-        self.env_dir = Path(f"/workloads/environment/{board}")
-        self.executables = []
-
-    def _get_flags(self, config: dict, workload_path: Path, workload_type: str, benchmark: str = None) -> tuple:
-        """Get compiler and linker flags."""
-        build_config = self.config.get_build_config(self.arch, self.platform, workload_type, self.bbv, self.trace, benchmark)
-        cc = build_config.get('cc')
-        cflags = build_config.get('base_cflags', []) + build_config.get('cflags', [])
-        cflags.extend(f"-D{define}" for define in build_config.get('defines', []))
-        cflags.extend(f"-I{inc}" for inc in self.config.get_workload_includes(workload_path, workload_type))
-        cflags.append(f"-I{workload_path}/env")
-        if self.platform == "baremetal":
-            cflags.append(f"-I{self.env_dir}")
-        ldflags = build_config.get('base_ldflags', [])
-        return cc, cflags, ldflags, build_config
-
-    def build_environment(self, workload: str):
-        """Compile environment runtime files."""
-        if self.config.should_skip_environment(self.platform, workload):
-            Util.log(LogLevel.INFO, f"Skipping environment build for {self.platform}")
-            return
-        cc, cflags, _, _ = self._get_flags({}, workload, workload)
-        for src in self.config.get_environment_files(workload):
-            src_file = self.env_dir / src
-            if src_file.exists():
-                obj = self.env_dir / f"{Path(src).stem}.o"
-                Util.run_cmd([cc, "-c", *cflags, "-o", str(obj), str(src_file)])
-
-    def build_common_files(self, workload_path: Path, workload_type: str) -> List[str]:
-        """Compile common files for riscv-tests."""
-        if workload_type != "riscv-tests" or not (common_dir := workload_path / "benchmarks" / "common").exists():
-            return []
-        cc, cflags, _, _ = self._get_flags({}, workload_path, workload_type)
-        skip = self.config.get_skip_common_files(self.platform, workload_type)
-        obj_files = []
-        for c_file in common_dir.glob("*.c"):
-            if c_file.name in skip:
-                continue
-            obj = self.bin_dir / f"{c_file.stem}.o"
-            if Util.run_cmd([cc, "-c", *cflags, "-o", str(obj), str(c_file)]):
-                obj_files.append(str(obj))
-        return obj_files
-
-    def build_benchmark(self, bench: str, workload_path: Path, workload_type: str, common_objs: List[str]):
-        """Compile and link a single benchmark."""
-        Util.log(LogLevel.INFO, f"Building {bench}")
-        bench_dir = workload_path / ("src" if workload_type == "embench-iot" else "benchmarks") / bench
-        if not bench_dir.exists():
-            Util.log(LogLevel.ERROR, f"Benchmark directory not found: {bench_dir}")
-        
-        # Find source files
-        source_exts = ['.c'] if workload_type == "embench-iot" else ['.c', '.S']
-        sources = [f for ext in source_exts for f in bench_dir.glob(f"*{ext}")]
-        if not sources:
-            Util.log(LogLevel.ERROR, f"No sources found for {bench}")
-        
-        # Compile sources
-        cc, cflags, ldflags, config = self._get_flags({}, workload_path, workload_type, bench)
-        obj_files = []
-        for src in sources:
-            obj = self.bin_dir / f"{src.stem}.o"
-            if Util.run_cmd([cc, "-c", *cflags, "-o", str(obj), str(src)]):
-                obj_files.append(str(obj))
-        
-        # Compile additional sources for embench-iot
-        if workload_type == "embench-iot":
-            for src in config.get('workload_sources', []):
-                src_path = Path(src)
-                if src_path.exists():
-                    obj = self.bin_dir / f"{src_path.stem}_support.o"
-                    if Util.run_cmd([cc, "-c", *cflags, "-o", str(obj), str(src_path)]):
-                        obj_files.append(str(obj))
-        
-        # Link executable
-        if common_objs:
-            obj_files.extend(common_objs)
-        exe = self.bin_dir / bench
-        link_cmd = [cc, *ldflags, "-o", str(exe), *obj_files]
-        if self.platform == "baremetal":
-            link_cmd.extend([f"-T{self.env_dir / config.get('linker_script', 'link.ld')}",
-                             *[str(self.env_dir / f"{Path(f).stem}.o") for f in self.config.get_environment_files(workload_type)]])
-        link_cmd.extend(config.get('libs', []))
-        if Util.run_cmd(link_cmd):
-            self.executables.append(str(exe))
-
-    def list_benchmarks(self, workload_path: Path, workload_type: str) -> List[str]:
-        """List available benchmarks for a workload."""
-        dir_path = workload_path / ("src" if workload_type == "embench-iot" else "benchmarks")
-        if not dir_path.exists():
-            return []
-        return [d.name for d in dir_path.iterdir() if d.is_dir() and (workload_type != "riscv-tests" or d.name != "common")]
-
-    def build_workload(self, workload: str, benchmark: str = None, custom_path: str = None):
-        """Build specified workload or benchmark."""
-        workload_path = Path(custom_path or DEFAULT_WORKLOADS.get(workload, DEFAULT_WORKLOADS["riscv-tests"]))
-        workload_type = workload if workload in DEFAULT_WORKLOADS else "custom"
-        if not Util.file_exists(workload_path):
-            Util.log(LogLevel.ERROR, f"Workload path not found: {workload_path}")
-        
-        Util.log(LogLevel.INFO, f"Building {workload} for {self.arch}/{self.platform}/{self.board}")
-        self.build_environment(workload_type)
-        common_objs = self.build_common_files(workload_path, workload_type)
-        benchmarks = [benchmark] if benchmark else (["dhrystone"] if workload == "dhrystone" else self.list_benchmarks(workload_path, workload_type))
-        
-        for bench in benchmarks:
-            self.build_benchmark(bench, workload_path, workload_type, common_objs)
-        
-        Util.log(LogLevel.INFO, f"Built {len(self.executables)} executables in {self.bin_dir}")
-
-def main():
-    """Main entry point for building workloads."""
-    parser = argparse.ArgumentParser(description="Build RISC-V workloads")
-    parser.add_argument("--workload", help="Workload name")
-    parser.add_argument("--arch", default="rv32", choices=["rv32", "rv64"])
-    parser.add_argument("--platform", default="baremetal", choices=["baremetal", "linux"])
-    parser.add_argument("--board", default="spike", choices=["spike", "qemu"])
-    parser.add_argument("--benchmark", help="Specific benchmark")
-    parser.add_argument("--custom-path", help="Custom workload path")
-    parser.add_argument("--bbv", action="store_true", help="Enable BBV support")
-    parser.add_argument("--trace", action="store_true", help="Enable tracing")
-    parser.add_argument("--list", action="store_true", help="List available workloads")
-    args = parser.parse_args()
-
-    if args.list:
-        for name, path in DEFAULT_WORKLOADS.items():
-            if Util.file_exists(path):
-                Util.log(LogLevel.INFO, f"{name}: {path}")
-                builder = WorkloadBuilder(args.board, args.arch, args.platform, args.bbv, args.trace)
-                benchmarks = builder.list_benchmarks(Path(path), name)
-                if benchmarks:
-                    Util.log(LogLevel.INFO, f"  Benchmarks: {', '.join(benchmarks[:10])}{'...' if len(benchmarks) > 10 else ''}")
-        return
-
-    if not args.workload:
-        Util.log(LogLevel.ERROR, "Workload required. Use --list to see available workloads")
-    builder = WorkloadBuilder(args.board, args.arch, args.platform, args.bbv, args.trace)
-    builder.build_workload(args.workload, args.benchmark, args.custom_path)
-
-if __name__ == "__main__":
-    main()
diff --git a/traces/docker_stf_trace_gen/converters/base.py b/traces/docker_stf_trace_gen/converters/base.py
deleted file mode 100644
index 464360b6..00000000
--- a/traces/docker_stf_trace_gen/converters/base.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from abc import ABC
-from typing import Any
-
-
-class BaseConverter(ABC):
-    @staticmethod
-    def convert(self, input: Any) -> Any:
-        raise NotImplementedError("This method should be overridden by subclasses.")
diff --git a/traces/docker_stf_trace_gen/converters/host_to_docker_path.py b/traces/docker_stf_trace_gen/converters/host_to_docker_path.py
deleted file mode 100644
index 85051f70..00000000
--- a/traces/docker_stf_trace_gen/converters/host_to_docker_path.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import os
-from converters.base import BaseConverter
-from data.consts import Const
-
-
-class HostToDockerPathConverter(BaseConverter):
-    @staticmethod
-    def convert(path: str) -> str:
-        parts = os.path.abspath(path).strip(os.sep).split(os.sep)
-        parts.insert(0, Const.DOCKER_TEMP_FOLDER)
-        return os.path.join(*parts)
diff --git a/traces/docker_stf_trace_gen/data/consts.py b/traces/docker_stf_trace_gen/data/consts.py
index a31bf753..3ef3c2b8 100644
--- a/traces/docker_stf_trace_gen/data/consts.py
+++ b/traces/docker_stf_trace_gen/data/consts.py
@@ -2,9 +2,15 @@
 
 
 @dataclass(frozen=True)
-class Const():
-    DOCKER_IMAGE_NAME = "riscv-perf-model:latest"
-    DOCKER_TEMP_FOLDER = "/host"
+class Const:
+    DOCKER_IMAGE_NAME = "riscv-perf-model:olympia"
+
+    CONTAINER_FLOW_ROOT = "/flow"
+    CONTAINER_OUTPUT_ROOT = "/outputs"
+    CONTAINER_ENV_ROOT = "/default/environment"
+    CONTAINER_WORKLOAD_ROOT = "/workloads"
+
     LIBSTFMEM = "/usr/lib/libstfmem.so"
+    QEMU_PLUGIN_FALLBACK = "/qemu/build/contrib/plugins/libstfmem.so"
     STF_TOOLS = "/riscv/stf_tools/release/tools"
-    SPKIE_PK = "/riscv/riscv-pk/build/pk"
+    SPIKE_PK = "/riscv/riscv-pk/build/pk"
diff --git a/traces/docker_stf_trace_gen/doc/adding-workloads.md b/traces/docker_stf_trace_gen/doc/adding-workloads.md
index ec234035..1e7cf36e 100644
--- a/traces/docker_stf_trace_gen/doc/adding-workloads.md
+++ b/traces/docker_stf_trace_gen/doc/adding-workloads.md
@@ -22,7 +22,9 @@ DEFAULT_WORKLOADS = {
 }
 
 # 2. Add build logic for your workload type
-# no the build logic is done in the yaml fil need to udpat ehat
+# 2. Configure build in YAML (preferred)
+# Build logic is driven by the board YAML under environment/<board>/board.yaml.
+# Add workload-specific flags, sources, and environment files there.
 elif workload_type == "my-benchmark":
     # Your build logic here
     benchmarks = get_my_benchmark_list()
@@ -345,23 +347,23 @@ riscv64-linux-gnu-gcc -static -O2 \
 
 ```bash
 # Build specific benchmark
-./build_workload.py --workload my-benchmark --benchmark test1 --board spike
+python3 flow/build_workload.py --workload my-benchmark --benchmark test1 --emulator spike
 
 # Build all benchmarks  
-./build_workload.py --workload my-benchmark --board qemu --arch rv64
+python3 flow/build_workload.py --workload my-benchmark --emulator qemu --arch rv64
 
 # Build with BBV support
-./build_workload.py --workload my-benchmark --bbv --board spike
+python3 flow/build_workload.py --workload my-benchmark --bbv --emulator spike
 ```
 
 ### Run Test
 
 ```bash
 # Run built workloads
-./run_workload.py --emulator spike --workload test1
+python3 flow/run_workload.py --emulator spike --workload test1
 
 # Run with analysis features
-./run_workload.py --emulator qemu --bbv --trace
+python3 flow/run_workload.py --emulator qemu --bbv --trace
 ```
 
 ### Verify Output
@@ -373,10 +375,10 @@ Check that your workload produces expected results:
 ls -la /workloads/bin/spike/test1
 
 # Check execution logs
-cat /output/spike_output/logs/test1.log
+cat /outputs/spike_output/logs/test1.log
 
 # Check BBV generation (if enabled)
-ls -la /output/spike_output/bbv/test1.bbv
+ls -la /outputs/spike_output/bbv/test1.bbv
 ```
 
 ## Advanced Features
@@ -448,7 +450,7 @@ def build_my_benchmark(bench_name, workload_path, cc, base_cflags, platform, boa
         compiler = cc.replace("gcc", "g++") if source_file.suffix == ".cpp" else cc
         compile_cmd = [compiler, "-c"] + cflags + ["-o", obj_file, str(source_file)]
         run_cmd(compile_cmd)
-    
+
     # Link all objects
     link_cmd = [cc] + base_cflags.split() + ["-o", exe_path] + obj_files + env_objs + ["-lc", "-lm"]
     run_cmd(link_cmd)
@@ -469,4 +471,4 @@ def build_my_benchmark(bench_name, workload_path, cc, base_cflags, platform, boa
 
 
 With this framework, adding new workloads should be straightforward while maintaining compatibility with the existing analysis infrastructure.
-Along with the .cfg files for each workload.
\ No newline at end of file
+Along with the .cfg files for each workload.
diff --git a/traces/docker_stf_trace_gen/doc/bbv-trace.md b/traces/docker_stf_trace_gen/doc/bbv-trace.md
index 99f1f305..402608fe 100644
--- a/traces/docker_stf_trace_gen/doc/bbv-trace.md
+++ b/traces/docker_stf_trace_gen/doc/bbv-trace.md
@@ -22,20 +22,20 @@ This guide explains how to use Basic Block Vector (BBV) generation and instructi
 
 ```bash
 # Build with BBV and trace support
-./build_workload.py --workload embench-iot --benchmark md5sum --board spike --bbv --trace
+python3 flow/build_workload.py --workload embench-iot --benchmark md5sum --emulator spike --bbv --trace
 
 # Run with BBV and tracing enabled
-./run_workload.py --emulator spike --bbv --trace --workload md5sum
+python3 flow/run_workload.py --emulator spike --bbv --trace --workload md5sum
 ```
 
 ### Check Output
 
 ```bash
 # BBV files
-ls /output/spike_output/bbv/md5sum.bbv
+ls /outputs/spike_output/bbv/md5sum.bbv
 
 # Trace files  
-ls /output/spike_output/traces/md5sum.zstf
+ls /outputs/spike_output/traces/md5sum.zstf
 
 # Use stf_tools based stf_dump to convert STF trace files to human readable dump
 ```
@@ -151,7 +151,7 @@ Spike uses Control and Status Register (CSR) accesses to mark BBV regions:
 **Build Requirements:**
 ```bash
 # Must use --bbv flag during build to enable CSR markers
-./build_workload.py --workload embench-iot --benchmark aha-mont64 --board spike --bbv
+python3 flow/build_workload.py --workload embench-iot --benchmark aha-mont64 --emulator spike --bbv
 ```
 
 **Runtime Behavior:**
@@ -169,7 +169,7 @@ QEMU uses a plugin architecture for BBV generation:
 **Build Requirements:**
 ```bash
 # --bbv flag adds -DBBV but QEMU doesn't need source markers
-./build_workload.py --workload embench-iot --benchmark aha-mont64 --board qemu --bbv
+python3 flow/build_workload.py --workload embench-iot --benchmark aha-mont64 --emulator qemu --bbv
 ```
 
 **Runtime Behavior:**
@@ -189,10 +189,10 @@ BBV generation uses intervals to sample execution:
 **Choosing Intervals:**
 ```bash
 # Fine-grained analysis (more data)
-./run_workload.py --emulator qemu --bbv --interval-size 1000000    # 1M instructions
+python3 flow/run_workload.py --emulator qemu --bbv --interval-size 1000000    # 1M instructions
 
 # Coarse-grained analysis (less data)  
-./run_workload.py --emulator spike --bbv --interval-size 1000000000 # 1B instructions
+python3 flow/run_workload.py --emulator spike --bbv --interval-size 1000000000 # 1B instructions
 ```
 
 ## Instruction Tracing
@@ -204,7 +204,7 @@ Spike generates System Trace Format (STF) traces:
 **Build Requirements:**
 ```bash
 # Use --trace flag to enable trace markers
-./build_workload.py --workload embench-iot --benchmark md5sum --board spike --trace
+python3 flow/build_workload.py --workload embench-iot --benchmark md5sum --emulator spike --trace
 ```
 
 **Runtime Command:**
@@ -222,7 +222,7 @@ QEMU generates human-readable assembly traces:
 **Build Requirements:**
 ```bash
 # --trace flag adds -DTRACE for potential source-level control
-./build_workload.py --workload embench-iot --benchmark md5sum --board qemu --trace  
+python3 flow/build_workload.py --workload embench-iot --benchmark md5sum --emulator qemu --trace  
 ```
 
 **Runtime Command:**
@@ -303,7 +303,7 @@ result = benchmark();
 BBV and trace files are organized by emulator:
 
 ```
-/output/
+/outputs/
 ├── spike_output/
 │   ├── bbv/                    # Spike BBV files
 │   │   ├── md5sum.bbv
@@ -422,10 +422,10 @@ BBV files are designed for SimPoint analysis:
 
 ```bash
 # Generate BBV files
-./run_workload.py --emulator spike --bbv --workload embench-iot
+python3 flow/run_workload.py --emulator spike --bbv --workload embench-iot
 
 # Run SimPoint analysis (using generated BBV files)
-./run_simpoint.py --workload embench-iot --emulator spike --max-k 30
+python3 flow/run_simpoint.py --workload embench-iot --emulator spike --max-k 30
 
 # Results: .simpoints and .weights files for representative intervals
 ```
diff --git a/traces/docker_stf_trace_gen/doc/emulator-comparison.md b/traces/docker_stf_trace_gen/doc/emulator-comparison.md
index 040a1123..83ce5844 100644
--- a/traces/docker_stf_trace_gen/doc/emulator-comparison.md
+++ b/traces/docker_stf_trace_gen/doc/emulator-comparison.md
@@ -38,7 +38,7 @@ Overall speedup: 0.37x (QEMU 2.70x faster)
 - **Spike**: Generates detailed STF (System Trace Format) traces with instruction-level detail
 - **QEMU**: Generates simple assembly traces using `-d in_asm` output
 
-QEMU cannot generate STF traces - only basic assembly instruction logs.
+QEMU can generate STF traces via a plugin (libstfmem), while Spike generates STF natively.
 
 ### Performance Recommendations
 
@@ -199,4 +199,4 @@ embench-iot:md5sum        bbv        0.093        0.047        0.50x      ✓
 3. **Documentation**: Record emulator versions and configurations
 4. **Reproducibility**: Save complete build and run configurations
 
-The choice between Spike and QEMU depends on your analysis goals, time constraints, and accuracy requirements. The provided benchmarking framework helps quantify these trade-offs for your specific workloads.
\ No newline at end of file
+The choice between Spike and QEMU depends on your analysis goals, time constraints, and accuracy requirements. The provided benchmarking framework helps quantify these trade-offs for your specific workloads.
diff --git a/traces/docker_stf_trace_gen/doc/generate_trace.md b/traces/docker_stf_trace_gen/doc/generate_trace.md
new file mode 100644
index 00000000..e97fb030
--- /dev/null
+++ b/traces/docker_stf_trace_gen/doc/generate_trace.md
@@ -0,0 +1,134 @@
+# Trace Generation Tool
+
+This utility emits STF traces for RISC-V workloads using Spike or QEMU. It supports two workflows:
+
+1. **`single`** – produce one trace window from a workload binary (macro markers, instruction-count window or PC-count window)
+2. **`sliced`** – replay SimPoint-selected windows and emit a manifest of per-interval traces
+
+---
+
+## Quickstart
+
+1. **Spike macro markers**  
+   ```bash
+   python3 flow/generate_trace.py single --emulator spike --mode macro build/aha-mont64.elf
+   ```
+
+2. **QEMU instruction-count window**  
+   ```bash
+   python3 flow/generate_trace.py single --emulator qemu --arch rv64 --mode insn_count \
+       --num-instructions 5000 --start-instruction 1000 build/aha-mont64.elf
+   ```
+
+3. **SimPoint slicing**  
+   ```bash
+   python3 flow/generate_trace.py sliced --emulator spike \
+       --workload embench-iot --benchmark aha-mont64 --verify
+   ```
+
+---
+
+## Usage
+
+```bash
+python3 flow/generate_trace.py <single|sliced> [OPTIONS]
+```
+
+To inspect per-command options:
+
+```bash
+python3 flow/generate_trace.py single --help
+python3 flow/generate_trace.py sliced --help
+```
+
+---
+
+## `single` command
+
+Generates a single STF trace from a workload binary.
+
+### Required arguments
+
+- `binary` – path to the ELF to execute
+- `--emulator {spike,qemu}` – execution engine
+- `--mode {macro,insn_count,pc_count}` – trace extraction strategy
+
+### Optional arguments
+
+- `--arch {rv32,rv64}` – QEMU target width (default `rv64`)
+- `--isa ISA` – override the ISA passed to Spike (defaults to build metadata)
+- `--num-instructions N` – number of instructions to trace (for `insn_count`/`pc_count`)
+- `--start-instruction N` – instructions to skip before tracing (`insn_count`)
+- `--start-pc PC` – program counter that triggers tracing (`pc_count`, accepts hex)
+- `--pc-threshold N` – number of hits on `start-pc` before tracing (`pc_count`)
+- `--pk` – launch Spike with the proxy kernel
+- `--dump` – emit `stf_dump` output next to the trace
+- `-o, --output PATH` – custom output file or directory (defaults to `<binary>.zstf`)
+
+### Mode notes
+
+- `macro` is Spike-only and relies on `START_TRACE` / `STOP_TRACE` macros compiled into the workload
+- `insn_count` is available on Spike and QEMU
+- `pc_count` is QEMU-only and uses the STF plugin in IP mode
+
+---
+
+## `sliced` command
+
+Replays SimPoint intervals and produces a directory of per-interval STF traces.
+
+### Required arguments
+
+- `--workload NAME` – workload suite used during build/run
+- `--benchmark NAME` – benchmark within the workload
+- `--emulator spike` – slicing currently relies on Spike’s instruction-count tracing
+
+### Optional arguments
+
+- `--interval-size N` – override the interval size recorded in `run_meta.json`
+- `--simpoints PATH` / `--weights PATH` – override the default SimPoint outputs under `/outputs/simpoint_analysis`
+- `--verify` – run `stf_count` on each slice and record the measured instruction count
+- `--dump` – emit `stf_dump` output for each slice
+- `--clean` – remove the existing slice directory before regenerating
+
+### Outputs
+
+For each benchmark the tool writes:
+
+- One `.zstf` trace per SimPoint interval under `/outputs/simpointed/<emulator>/<workload>/<benchmark>/`
+- Matching `.metadata.json` files with trace provenance
+- An aggregate `slices.json` manifest detailing weights, interval indices, verification status, and trace paths
+
+---
+
+## Workflow dependencies
+
+Before running `sliced`, ensure the following steps have completed for the target benchmark:
+
+1. `build_workload.py` (compiled with `--bbv` if SimPoint will be used)
+2. `run_workload.py --bbv [--trace]` (produces BBV vectors and run metadata)
+3. `run_simpoint.py --emulator spike --workload <suite> [--benchmark <bench>]`
+
+---
+
+## Mode restrictions
+
+| Mode            | Emulator | Notes                                       |
+|-----------------|----------|---------------------------------------------|
+| `single/macro`  | spike    | Requires instrumentation macros             |
+| `single/insn_count` | spike/qemu | Instruction window tracing             |
+| `single/pc_count`  | qemu    | Uses STF plugin IP mode                    |
+| `sliced`        | spike    | Requires SimPoint artefacts and run metadata |
+
+Use `--dump` and `--verify` to generate additional artefacts for debugging or validation.
+
+---
+
+## Further help
+
+Invoke the sub-command help flags for the most accurate, up-to-date argument list.
+
+```bash
+python3 flow/generate_trace.py single --help
+python3 flow/generate_trace.py sliced --help
+```
diff --git a/traces/docker_stf_trace_gen/environment/qemu/board.yaml b/traces/docker_stf_trace_gen/environment/qemu/board.yaml
index 6d2e9de5..fe45cf69 100644
--- a/traces/docker_stf_trace_gen/environment/qemu/board.yaml
+++ b/traces/docker_stf_trace_gen/environment/qemu/board.yaml
@@ -1,104 +1,97 @@
-# Board Configuration for QEMU RISC-V Emulator 
-# YAML format with hierarchical structure
+schema: 2
 
-# Board identification and defaults
-board: qemu
-defaults:
-  cc: riscv32-unknown-elf-gcc
-  cpu_mhz: 1
-  warmup_heat: 1
-  defines: [ "CPU_MHZ=1", "WARMUP_HEAT=1"]
+variables:
+  workloads_roots:
+    - "/workloads"
+    - "/default"
+  env_root: "/default/environment/{board}"
+  outputs_root: "/outputs/{emulator}/bin"
+  include_auto: []
+  alt_roots:
+    - { from: "/workloads/", to: "/default/" }
 
-# Architecture and platform specific configurations
-architectures:
+# QEMU shares the same build configuration as Spike for now.
+# Differences at runtime are handled in run_workload.py
+
+toolchains:
   rv32:
-    platforms:
-      baremetal:
-        cc: riscv32-unknown-elf-gcc
-        arch: rv32imafdc
-        abi: ilp32d
-        base_cflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-mcmodel=medany", "-mno-relax", "-mstrict-align"]
-        base_ldflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-nostartfiles", "-Wl,--no-warn-rwx-segments"]
-        linker_script: link.ld
-        libs: ["-lc", "-lm"]
-        includes: []
-      
-      linux:
-        cc: riscv32-linux-gnu-gcc
-        arch: rv32imafdc
-        abi: ilp32d
-        base_cflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-static", "-O2"]
-        base_ldflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-static"]
-        libs: ["-lm"]
-        includes: []
+    baremetal:
+      cc: riscv32-unknown-elf-gcc
+      base_cflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-mcmodel=medany", "-mno-relax", "-mstrict-align"]
+      base_ldflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-nostartfiles", "-Wl,--no-warn-rwx-segments"]
+      libs: ["-lc", "-lm"]
+      linker_script: "link.ld"
+    linux:
+      cc: riscv32-unknown-linux-gnu-gcc
+      base_cflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-static", "-O2"]
+      base_ldflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-static"]
+      libs: ["-lm"]
+      linker_script: null
 
   rv64:
-    platforms:
-      baremetal:
-        cc: riscv64-unknown-elf-gcc
-        arch: rv64imafdc
-        abi: lp64d
-        base_cflags: ["-march=rv64imafdc", "-mabi=lp64d", "-mcmodel=medany", "-mno-relax", "-mstrict-align"]
-        base_ldflags: ["-march=rv64imafdc", "-mabi=lp64d", "-nostartfiles", "-Wl,--no-warn-rwx-segments"]
-        linker_script: link.ld
-        libs: ["-lc", "-lm"]
-        includes: []
-      
-      linux:
-        cc: riscv64-linux-gnu-gcc
-        arch: rv64imafdc
-        abi: lp64d
-        base_cflags: ["-march=rv64imafdc", "-mabi=lp64d", "-static", "-O2"]
-        base_ldflags: ["-march=rv64imafdc", "-mabi=lp64d", "-static"]
-        libs: ["-lm"]
-        includes: []
+    baremetal:
+      cc: riscv64-unknown-elf-gcc
+      base_cflags: ["-march=rv64imafdc", "-mabi=lp64d", "-mcmodel=medany", "-mno-relax", "-mstrict-align"]
+      base_ldflags: ["-march=rv64imafdc", "-mabi=lp64d", "-nostartfiles", "-Wl,--no-warn-rwx-segments"]
+      libs: ["-lc", "-lm"]
+      linker_script: "link.ld"
+    linux:
+      cc: riscv64-unknown-linux-gnu-gcc
+      base_cflags: ["-march=rv64imafdc", "-mabi=lp64d", "-static", "-O2"]
+      base_ldflags: ["-march=rv64imafdc", "-mabi=lp64d", "-static"]
+      libs: ["-lm"]
+      linker_script: null
+
+features:
+  bbv:
+    cflags: ["-DBBV"]
+    ldflags: []
+  trace:
+    cflags: ["-DTRACE"]
+    ldflags: []
 
-# Workload specific configurations
 workloads:
   embench-iot:
-    workload_cflags: ["-Dtrue=1", "-Dfalse=0", "-I/workloads/embench-iot/support/"]
-    workload_ldflags: []
-    workload_defines: []
-    workload_includes: []
-    workload_sources: ["/workloads/embench-iot/support/beebsc.c"]
-    environment_files: ["crt0.S", "main.c", "stub.c", "util.c"]
-    
+    layout:
+      mode: per_benchmark
+      per_benchmark:
+        bench_root: "{workload_root}/src"
+        source_patterns: ["*.c"]
+        exclude_dirs: []
+      support_per_benchmark_patterns:
+        - "{workload_root}/support/beebsc.c"
+    includes:
+      - "{workload_root}/support"
+      - "{workload_root}/env"
+    defines: ["CPU_MHZ=1", "WARMUP_HEAT=1", "true=1", "false=0"]
+    env:
+      files: ["crt0.S", "main.c", "stub.c", "util.c"]
+      skip: false
     platforms:
       linux:
-        workload_cflags: ["-std=gnu99"]
-        skip_environment: true
+        env: { skip: true }
+        cflags: ["-std=gnu99"]
+        libs: ["-lm"]
 
   riscv-tests:
-    workload_cflags: ["-std=gnu99", "-Wno-implicit-int", "-Wno-implicit-function-declaration"]
-    workload_ldflags: []
-    workload_defines: []
-    workload_includes: ["benchmarks/common"]
-    environment_files: ["crt0.S", "main.c", "stub.c", "util.c"]
-    
+    layout:
+      mode: per_benchmark
+      per_benchmark:
+        bench_root: "{workload_root}/benchmarks"
+        source_patterns: ["*.c", "*.S"]
+        exclude_dirs: ["common"]
+      common_patterns:
+        - "{workload_root}/benchmarks/common/*.c"
+      common_skip: []
+    includes:
+      - "{workload_root}/benchmarks/common"
+    cflags: ["-std=gnu99", "-Wno-implicit-int", "-Wno-implicit-function-declaration"]
+    env:
+      files: ["crt0.S", "main.c", "stub.c", "util.c"]
+      skip: false
     platforms:
       linux:
-        workload_cflags: ["-std=gnu99"]
-        skip_common_files: ["syscalls.c"]
-        skip_environment: true
-
-  dhrystone:
-    # Inherits from riscv-tests configuration
-    parent: riscv-tests
-    workload_cflags: ["-std=gnu99", "-Wno-implicit-int", "-Wno-implicit-function-declaration"]
-
-# Features and special configurations
-features:
-  bbv:
-    bbv_cflags: ["-DBBV"]
-    bbv_ldflags: []
-
-  trace:
-    trace_cflags: ["-DTRACE"]
-    trace_ldflags: []
+        env: { skip: true }
+        layout:
+          common_skip: ["syscalls.c"]
 
-  vector:
-    # Vector extension configuration for benchmarks starting with "vec-"
-    vector_rv32_arch: rv32gcv
-    vector_rv64_arch: rv64gcv
-    regular_rv32_arch: rv32gc
-    regular_rv64_arch: rv64gc
\ No newline at end of file
diff --git a/traces/docker_stf_trace_gen/environment/qemu/main.c b/traces/docker_stf_trace_gen/environment/qemu/main.c
index b6032011..d134ec7d 100644
--- a/traces/docker_stf_trace_gen/environment/qemu/main.c
+++ b/traces/docker_stf_trace_gen/environment/qemu/main.c
@@ -81,13 +81,10 @@ env_main(int argc __attribute__((unused)), char *argv[] __attribute__((unused)))
     initialise_benchmark();
     warm_caches(WARMUP_HEAT);
     
-    // Qemu does not support defining ROI.
-
-    
+    // QEMU does not support ROI markers in this setup
     result = benchmark();
-
     
     correct = verify_benchmark(result);
     
     return (!correct);
-}
\ No newline at end of file
+}
diff --git a/traces/docker_stf_trace_gen/environment/qemu/stub.c b/traces/docker_stf_trace_gen/environment/qemu/stub.c
index 7328722a..0671bb19 100644
--- a/traces/docker_stf_trace_gen/environment/qemu/stub.c
+++ b/traces/docker_stf_trace_gen/environment/qemu/stub.c
@@ -174,4 +174,3 @@ char *_sbrk (int delta)
 	return ptr;
 }
 
-
diff --git a/traces/docker_stf_trace_gen/environment/spike/board.yaml b/traces/docker_stf_trace_gen/environment/spike/board.yaml
index 6e57be1b..95f4d7f6 100644
--- a/traces/docker_stf_trace_gen/environment/spike/board.yaml
+++ b/traces/docker_stf_trace_gen/environment/spike/board.yaml
@@ -1,115 +1,118 @@
-# Board Configuration for Spike RISC-V Simulator
-# YAML format with hierarchical structure
+schema: 2
 
-# Board identification and defaults  
-board: spike
-defaults:
-  cc: riscv32-unknown-elf-gcc
-  cpu_mhz: 1
-  warmup_heat: 1
-  defines: ["CPU_MHZ=1", "WARMUP_HEAT=1"]
+# ---------- Variables ----------
+variables:
+  workloads_roots:
+    - "/workloads"
+    - "/default"
+  env_root: "/default/environment/{board}"
+  outputs_root: "/outputs/{emulator}/bin"
+  include_auto: []   # add any global include dirs here if you like
+  alt_roots:
+    # If a path starting with /workloads/... doesn't exist, try /default/...
+    - { from: "/workloads/", to: "/default/" }
 
-# Architecture and platform specific configurations
-architectures:
+# ---------- Toolchains ----------
+toolchains:
   rv32:
-    platforms:
-      baremetal:
-        cc: riscv32-unknown-elf-gcc
-        arch: rv32imafdc
-        abi: ilp32d
-        base_cflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-mcmodel=medany", "-mno-relax", "-mstrict-align"]
-        base_ldflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-nostartfiles", "-Wl,--no-warn-rwx-segments"]
-        linker_script: link.ld
-        libs: ["-lc", "-lm"]
-        includes: []
-      
-      linux:
-        cc: riscv32-unknown-linux-gnu-gcc
-        arch: rv32imafdc
-        abi: ilp32d
-        base_cflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-static", "-O2"]
-        base_ldflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-static"]
-        libs: ["-lm"]
-        includes: []
+    baremetal:
+      cc: riscv32-unknown-elf-gcc
+      base_cflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-mcmodel=medany", "-mno-relax", "-mstrict-align"]
+      base_ldflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-nostartfiles", "-Wl,--no-warn-rwx-segments"]
+      libs: ["-lc", "-lm"]
+      linker_script: "link.ld"
+
+    linux:
+      cc: riscv32-unknown-linux-gnu-gcc
+      base_cflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-static", "-O2"]
+      base_ldflags: ["-march=rv32imafdc", "-mabi=ilp32d", "-static"]
+      libs: ["-lm"]
+      linker_script: null
 
   rv64:
-    platforms:
-      baremetal:
-        cc: riscv64-unknown-elf-gcc
-        arch: rv64imafdc
-        abi: lp64d
-        base_cflags: ["-march=rv64imafdc", "-mabi=lp64d", "-mcmodel=medany", "-mno-relax", "-mstrict-align"]
-        base_ldflags: ["-march=rv64imafdc", "-mabi=lp64d", "-nostartfiles", "-Wl,--no-warn-rwx-segments"]
-        linker_script: link.ld
-        libs: ["-lc", "-lm"]
-        includes: []
-      
-      linux:
-        cc: riscv64-linux-gnu-gcc
-        arch: rv64imafdc
-        abi: lp64d
-        base_cflags: ["-march=rv64imafdc", "-mabi=lp64d", "-static", "-O2"]
-        base_ldflags: ["-march=rv64imafdc", "-mabi=lp64d", "-static"]
-        libs: ["-lm"]
-        includes: []
+    baremetal:
+      cc: riscv64-unknown-elf-gcc
+      base_cflags: ["-march=rv64imafdc", "-mabi=lp64d", "-mcmodel=medany", "-mno-relax", "-mstrict-align"]
+      base_ldflags: ["-march=rv64imafdc", "-mabi=lp64d", "-nostartfiles", "-Wl,--no-warn-rwx-segments"]
+      libs: ["-lc", "-lm"]
+      linker_script: "link.ld"
+
+    linux:
+      cc: riscv64-linux-gnu-gcc
+      base_cflags: ["-march=rv64imafdc", "-mabi=lp64d", "-static", "-O2"]
+      base_ldflags: ["-march=rv64imafdc", "-mabi=lp64d", "-static"]
+      libs: ["-lm"]
+      linker_script: null
 
-# Workload specific configurations
+# ---------- Features ----------
+features:
+  bbv:
+    cflags: ["-DBBV"]
+    ldflags: []
+  trace:
+    cflags: ["-DTRACE"]
+    ldflags: []
+
+# ---------- Workloads ----------
 workloads:
+
   embench-iot:
-    # Embench IoT specific flags, -Dtrue=1 and -Dfalse=0 to fix wikisort
-    workload_cflags: ["-Dtrue=1", "-Dfalse=0", "-I/workloads/embench-iot/support/"]
-    workload_ldflags: []
-    workload_defines: []
-    workload_includes: []
+    # Pure YAML: structure, what to compile, where from.
+    layout:
+      mode: per_benchmark
+      per_benchmark:
+        bench_root: "{workload_root}/src"
+        source_patterns: ["*.c"]
+        exclude_dirs: []
+      support_per_benchmark_patterns:
+        - "{workload_root}/support/beebsc.c"
+    includes:
+      - "{workload_root}/support"
+      - "{workload_root}/env"
+    defines: ["CPU_MHZ=1", "WARMUP_HEAT=1", "true=1", "false=0"]
+    cflags: []
+    ldflags: []
+    libs: []
+    env:
+      files: ["crt0.S", "main.c", "stub.c", "util.c"]
+      skip: false
 
     platforms:
-      baremetal:
-        workload_sources: ["/workloads/embench-iot/support/beebsc.c"]
-        environment_files: ["crt0.S", "main.c", "stub.c", "util.c"]
-    
       linux:
-        workload_cflags: ["-std=gnu99"]
-        environment_files: ["main.c"]
-        
-        #skip_environment: true
-        #workload_sources: ["/.c"]
+        # Linux: usually no env / different flags. Adjust as you need.
+        env: { skip: true, files: [] }
+        cflags: ["-std=gnu99"]
+        ldflags: []
+        libs: ["-lm"]
+        includes: []
 
   riscv-tests:
-    workload_cflags: ["-std=gnu99", "-Wno-implicit-int", "-Wno-implicit-function-declaration"]
-    workload_ldflags: []
-    workload_defines: []
-    workload_includes: ["benchmarks/common"]
-    environment_files: ["crt0.S", "main.c", "stub.c", "util.c"]
-    
+    layout:
+      mode: per_benchmark
+      per_benchmark:
+        bench_root: "{workload_root}/benchmarks"
+        source_patterns: ["*.c", "*.S"]
+        exclude_dirs: ["common"]
+      common_patterns:
+        - "{workload_root}/benchmarks/common/*.c"
+      common_skip: []   # e.g., ["syscalls.c"] on Linux if you need
+    includes:
+      - "{workload_root}/benchmarks/common"
+      # add ISA sim env if needed:
+      # - "/riscv/condor.riscv-isa-sim/riscv-tests/env"
+      # - "/riscv/condor.riscv-isa-sim/riscv"
+    defines: []
+    cflags: ["-std=gnu99", "-Wno-implicit-int", "-Wno-implicit-function-declaration"]
+    ldflags: []
+    libs: []
+    env:
+      files: ["crt0.S", "main.c", "stub.c", "util.c"]
+      skip: false
+
     platforms:
       linux:
-        workload_cflags: ["-std=gnu99"]
-        skip_common_files: ["syscalls.c"]
-        skip_environment: true
-
-  dhrystone:
-    # Dhrystone specific flags (inherits from riscv-tests)
-    parent: riscv-tests
-    # Its preferred in Dhrystone to not patch the source code and fix errors using compiler flags
-    workload_cflags: ["-std=gnu99", "-Wno-implicit-int", "-Wno-implicit-function-declaration"]
-
-
-  #coremark: to be done
-  # this allows for future extension to other workloads
-
-# Features and special configurations
-features:
-  bbv:
-    bbv_cflags: ["-DBBV"]
-    bbv_ldflags: []
-
-  trace:
-    trace_cflags: ["-DTRACE"]
-    trace_ldflags: []
-
-  vector:
-    # Vector extension configuration for benchmarks starting with "vec-"
-    vector_rv32_arch: rv32gcv
-    vector_rv64_arch: rv64gcv
-    regular_rv32_arch: rv32gc
-    regular_rv64_arch: rv64gc
\ No newline at end of file
+        env: { skip: true }
+        cflags: ["-std=gnu99"]
+        # Often riscv-tests 'syscalls.c' conflicts under Linux; skip via common_skip:
+        layout:
+          common_skip: ["syscalls.c"]
diff --git a/traces/docker_stf_trace_gen/environment/spike/main.c b/traces/docker_stf_trace_gen/environment/spike/main.c
index f5d2db6f..b7e71e62 100644
--- a/traces/docker_stf_trace_gen/environment/spike/main.c
+++ b/traces/docker_stf_trace_gen/environment/spike/main.c
@@ -37,7 +37,7 @@ int __attribute__((weak)) main(void) {
     return -1; 
 }
 
-int __attribute__((weak)) benchmark(void) {  //  Coremark main()
+int __attribute__((weak)) benchmark(void) { 
     return main(); 
 }
 
@@ -90,4 +90,4 @@ env_main(int argc __attribute__((unused)), char *argv[] __attribute__((unused)))
     correct = verify_benchmark(result);
     
     return (!correct);
-}
\ No newline at end of file
+}
diff --git a/traces/docker_stf_trace_gen/environment/spike/stub.c b/traces/docker_stf_trace_gen/environment/spike/stub.c
index 4de8c17c..9e3657f0 100644
--- a/traces/docker_stf_trace_gen/environment/spike/stub.c
+++ b/traces/docker_stf_trace_gen/environment/spike/stub.c
@@ -174,4 +174,3 @@ char *_sbrk (int delta)
 	return ptr;
 }
 
-
diff --git a/traces/docker_stf_trace_gen/factories/metadata_factory.py b/traces/docker_stf_trace_gen/factories/metadata_factory.py
index d6fabb77..b107a5d5 100644
--- a/traces/docker_stf_trace_gen/factories/metadata_factory.py
+++ b/traces/docker_stf_trace_gen/factories/metadata_factory.py
@@ -1,16 +1,17 @@
 import datetime
 import os
 import re
+from pathlib import Path
 from typing import Dict, Literal, Optional
 from elftools.elf.elffile import ELFFile
-from utils.util import Util
-from utils.docker_orchestrator import DockerOrchestrator
+from utils.util import Util, CommandError
+from data.consts import Const
 from data.metadata import Author, InstructionCountModeInterval, IpModeInterval, Metadata, Stf, Workload
 
 
-class MetadataFactory():
-    def __init__(self, docker: DockerOrchestrator):
-        self.docker = docker
+class MetadataFactory:
+    def __init__(self):
+        pass
 
     def create(
         self,
@@ -83,7 +84,13 @@ def _get_workload_sections(self, workload_path: str) -> dict[str, str]:
         return result
 
     def _get_stf_info(self, trace_path: str) -> Dict[str, str]:
-        trace_info = self.docker.run_stf_tool("stf_trace_info", trace_path).decode('utf-8')
+        tool = Path(Const.STF_TOOLS) / "stf_trace_info" / "stf_trace_info"
+        try:
+            result = Util.run_cmd([str(tool), trace_path])
+        except CommandError as err:
+            Util.error(f"Failed to run stf_trace_info: {err}")
+            return {}
+        trace_info = result.stdout
 
         metadata = {}
         values_section = []
diff --git a/traces/docker_stf_trace_gen/flow/__init__.py b/traces/docker_stf_trace_gen/flow/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/traces/docker_stf_trace_gen/flow/build_workload.py b/traces/docker_stf_trace_gen/flow/build_workload.py
new file mode 100644
index 00000000..821d50de
--- /dev/null
+++ b/traces/docker_stf_trace_gen/flow/build_workload.py
@@ -0,0 +1,380 @@
+#!/usr/bin/env python3
+"""Build RISC-V workloads using board configuration (schema v2)."""
+from __future__ import annotations
+
+import argparse
+import glob
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable, List, Optional
+import sys
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+from flow.utils.config import BuildConfig, FeatureSet, FinalConfig
+from flow.utils.paths import BenchmarkPaths, binaries_root
+from flow.utils.util import CommandError, Util
+
+
+# -----------------------------------------------------------------------------
+# Helper utilities
+# -----------------------------------------------------------------------------
+
+
+def _unique(paths: Iterable[Path]) -> List[Path]:
+    seen = set()
+    out: List[Path] = []
+    for path in paths:
+        key = path.resolve()
+        if key not in seen:
+            out.append(path)
+            seen.add(key)
+    return out
+
+
+def _collect_patterns(patterns: Iterable[str]) -> List[Path]:
+    matches: List[Path] = []
+    for pattern in patterns:
+        for match in glob.glob(pattern, recursive=True):
+            candidate = Path(match)
+            if candidate.is_file():
+                matches.append(candidate)
+    return _unique(sorted(matches))
+
+
+def _extract_isa(cflags: Iterable[str], default: Optional[str]) -> Optional[str]:
+    for flag in cflags:
+        if flag.startswith("-march="):
+            return flag.split("=", 1)[1]
+    return default
+
+
+def _emit_build_metadata(paths: BenchmarkPaths, final: FinalConfig, *, bench: str, args: argparse.Namespace, sources: List[str]) -> None:
+    metadata = {
+        "timestamp": Util.now_iso(),
+        "emulator": args.emulator,
+        "arch": args.arch,
+        "platform": args.platform,
+        "workload": args.workload,
+        "benchmark": bench,
+        "features": {"bbv": args.bbv, "trace": args.trace},
+        "entrypoint": args.entrypoint or "benchmark",
+        "toolchain": {"cc": final.tools.cc},
+        "flags": {
+            "cflags": final.flags.cflags,
+            "includes": final.flags.includes,
+            "ldflags": final.flags.ldflags,
+            "libs": final.flags.libs,
+            "linker_script": str(final.flags.linker_script) if final.flags.linker_script else None,
+        },
+        "env": {
+            "skip": final.env.skip,
+            "dir": str(final.env.dir),
+            "files": final.env.files,
+        },
+        "isa": _extract_isa(final.flags.cflags, None),
+        "sources": sources,
+        "binary": str(paths.binary_path),
+    }
+    Util.write_json(paths.build_meta_path, metadata)
+
+
+@dataclass
+class CompilationArtifacts:
+    env_objs: List[Path]
+    common_objs: List[Path]
+    support_once_objs: List[Path]
+
+
+class WorkloadBuilder:
+    def __init__(self, args: argparse.Namespace) -> None:
+        self.args = args
+        self.config = BuildConfig.load(board=args.emulator)
+        self.features = FeatureSet(bbv=args.bbv, trace=args.trace)
+        if not args.list and not args.workload and not args.input_obj:
+            raise SystemExit("--workload is required unless --input-obj is provided")
+
+    # ------------------------------------------------------------------
+    # Public entrypoints
+    # ------------------------------------------------------------------
+    def list_workloads(self) -> None:
+        Util.info("Configured workloads:")
+        for workload in self.config.list_workloads():
+            try:
+                root = self.config.resolve_workload_root(workload, None)
+                final = self._final_config(workload, root)
+            except SystemExit:
+                Util.warn(f"  {workload}: failed to resolve")
+                continue
+
+            if final.layout.mode == "per_benchmark" and final.layout.per_benchmark:
+                benches = self._discover_benchmarks(final)
+                sample = ", ".join(benches[:6]) + (" …" if len(benches) > 6 else "")
+                Util.info(f"  {workload} ({final.layout.mode}): {root} | benches: {sample}")
+            else:
+                Util.info(f"  {workload} ({final.layout.mode}): {root}")
+
+    def build(self) -> None:
+        if not self.args.workload:
+            raise SystemExit("--workload is required to select configuration data")
+        workload_root = self.config.resolve_workload_root(self.args.workload, self.args.custom_path)
+        final = self._final_config(self.args.workload, workload_root)
+
+        # Always ensure env dir exists even if skipped, so downstream tooling has predictable paths.
+        env_dir = binaries_root(self.args.emulator) / "env"
+        env_dir.mkdir(parents=True, exist_ok=True)
+
+        if self.args.input_obj:
+            self._link_from_objects(final)
+            return
+
+        if final.layout.mode == "per_benchmark" and final.layout.per_benchmark:
+            bench_names = self._target_benchmarks(final)
+            artifacts = self._prepare_shared_artifacts(final)
+            for bench in bench_names:
+                self._build_benchmark(final, artifacts, bench)
+        else:
+            self._build_single(final)
+
+    # ------------------------------------------------------------------
+    # Build helpers
+    # ------------------------------------------------------------------
+    def _final_config(self, workload: str, workload_root: Path) -> FinalConfig:
+        return self.config.finalize(
+            workload=workload,
+            arch=self.args.arch,
+            platform=self.args.platform,
+            emulator=self.args.emulator,
+            workload_root=workload_root,
+            features=self.features,
+        )
+
+    def _prepare_shared_artifacts(self, final: FinalConfig) -> CompilationArtifacts:
+        env_objs = self._compile_environment(final)
+        common_objs = self._compile_common_sources(final)
+        support_once_objs = self._compile_support_once_sources(final)
+        return CompilationArtifacts(env_objs, common_objs, support_once_objs)
+
+    def _compile_environment(self, final: FinalConfig) -> List[Path]:
+        if final.env.skip:
+            return []
+        objects: List[Path] = []
+        env_out = binaries_root(self.args.emulator) / "env"
+        env_out.mkdir(parents=True, exist_ok=True)
+        for file_name in final.env.files:
+            src = final.env.dir / file_name
+            if not src.exists():
+                raise SystemExit(f"Environment file missing: {src}")
+            obj = env_out / (Path(file_name).stem + ".o")
+            self._compile(src, obj, final)
+            objects.append(obj)
+        return objects
+
+    def _compile_common_sources(self, final: FinalConfig) -> List[Path]:
+        patterns = final.layout.common_patterns
+        if not patterns:
+            return []
+        sources = _collect_patterns(patterns)
+        skip = {Path(item).stem for item in final.layout.common_skip}
+        sources = [s for s in sources if s.stem not in skip]
+        dest = binaries_root(self.args.emulator) / self.args.workload / "common" / "obj"
+        return self._compile_many(sources, final, dest)
+
+    def _compile_support_once_sources(self, final: FinalConfig) -> List[Path]:
+        patterns = final.layout.support_once_patterns
+        if not patterns:
+            return []
+        sources = _collect_patterns(patterns)
+        dest = binaries_root(self.args.emulator) / self.args.workload / "support_once" / "obj"
+        return self._compile_many(sources, final, dest)
+
+    def _target_benchmarks(self, final: FinalConfig) -> List[str]:
+        if self.args.benchmark:
+            return [self.args.benchmark]
+        return self._discover_benchmarks(final)
+
+    def _discover_benchmarks(self, final: FinalConfig) -> List[str]:
+        per = final.layout.per_benchmark
+        if not per:
+            return []
+        root = Path(per.bench_root)
+        if not root.exists():
+            raise SystemExit(f"Bench root missing: {root}")
+        names = [entry.name for entry in sorted(root.iterdir()) if entry.is_dir()]
+        exclude = set(per.exclude_dirs)
+        return [name for name in names if name not in exclude]
+
+    def _collect_benchmark_sources(self, final: FinalConfig, bench: str) -> List[Path]:
+        per = final.layout.per_benchmark
+        if not per:
+            return []
+        root = Path(per.bench_root) / bench
+        patterns = [str(root / pattern) for pattern in per.source_patterns]
+        bench_sources = _collect_patterns(patterns)
+
+        # Support files scoped per benchmark
+        support_patterns = [pattern.replace("{bench}", bench) for pattern in final.layout.support_per_benchmark_patterns]
+        bench_sources += _collect_patterns(support_patterns)
+        return _unique(bench_sources)
+
+    def _compile_many(self, sources: Iterable[Path], final: FinalConfig, dest_dir: Path) -> List[Path]:
+        dest_dir.mkdir(parents=True, exist_ok=True)
+        objects: List[Path] = []
+        for src in sources:
+            obj = dest_dir / (src.stem + ".o")
+            self._compile(src, obj, final)
+            objects.append(obj)
+        return objects
+
+    def _compile(self, source: Path, output: Path, final: FinalConfig) -> None:
+        output.parent.mkdir(parents=True, exist_ok=True)
+        cmd = [final.tools.cc, "-c", *final.flags.cflags_with_includes, "-o", str(output), str(source)]
+        try:
+            Util.run_cmd(cmd)
+        except CommandError as err:
+            raise SystemExit(str(err))
+
+    def _link(self, final: FinalConfig, output: Path, objects: List[Path]) -> None:
+        cmd = [final.tools.cc]
+        if final.flags.linker_script:
+            cmd.extend([f"-T{final.flags.linker_script}"])
+        cmd.extend(final.flags.ldflags)
+        cmd.extend(["-o", str(output)])
+        cmd.extend(str(obj) for obj in objects)
+        cmd.extend(final.flags.libs)
+        try:
+            Util.run_cmd(cmd)
+        except CommandError as err:
+            raise SystemExit(str(err))
+
+    def _maybe_wrapper(self, final: FinalConfig) -> Optional[Path]:
+        entry = self.args.entrypoint
+        if not entry or entry == "benchmark":
+            return None
+        wrapper_dir = binaries_root(self.args.emulator) / "wrapper"
+        wrapper_dir.mkdir(parents=True, exist_ok=True)
+        src = wrapper_dir / "wrapper_entry.c"
+        src.write_text(
+            f"extern int {entry}(void);\n"
+            "int benchmark(void) {\n"
+            f"    return {entry}();\n"
+            "}\n"
+        )
+        obj = wrapper_dir / "wrapper_entry.o"
+        self._compile(src, obj, final)
+        return obj
+
+    def _build_benchmark(self, final: FinalConfig, artifacts: CompilationArtifacts, bench: str) -> None:
+        paths = BenchmarkPaths(self.args.emulator, self.args.workload, bench)
+        paths.resolve()
+
+        bench_sources = self._collect_benchmark_sources(final, bench)
+        if not bench_sources:
+            raise SystemExit(f"No sources discovered for benchmark '{bench}'")
+
+        objects = self._compile_many(bench_sources, final, paths.object_dir)
+        wrapper_obj = self._maybe_wrapper(final)
+        link_inputs = artifacts.env_objs + artifacts.common_objs + artifacts.support_once_objs + objects
+        if wrapper_obj:
+            link_inputs.append(wrapper_obj)
+
+        self._link(final, paths.binary_path, link_inputs)
+        Util.info(f"Built {paths.binary_path}")
+        _emit_build_metadata(
+            paths,
+            final,
+            bench=bench,
+            args=self.args,
+            sources=[str(src) for src in bench_sources],
+        )
+
+    def _build_single(self, final: FinalConfig) -> None:
+        patterns = final.layout.single_sources
+        if not patterns:
+            raise SystemExit("Single workload requested but no sources defined")
+        sources = _collect_patterns(patterns)
+        if not sources:
+            raise SystemExit("Could not resolve any sources for single workload")
+
+        bench_name = self.args.benchmark or self.args.workload
+        paths = BenchmarkPaths(self.args.emulator, self.args.workload, bench_name)
+        paths.resolve()
+
+        env_objs = self._compile_environment(final)
+        support_once_objs = self._compile_support_once_sources(final)
+        objects = self._compile_many(sources, final, paths.object_dir)
+        wrapper_obj = self._maybe_wrapper(final)
+
+        link_inputs = env_objs + support_once_objs + objects
+        if wrapper_obj:
+            link_inputs.append(wrapper_obj)
+
+        self._link(final, paths.binary_path, link_inputs)
+        Util.info(f"Built {paths.binary_path}")
+        _emit_build_metadata(
+            paths,
+            final,
+            bench=bench_name,
+            args=self.args,
+            sources=[str(src) for src in sources],
+        )
+
+    def _link_from_objects(self, final: FinalConfig) -> None:
+        if not self.args.benchmark:
+            raise SystemExit("--benchmark must be provided when using --input-obj")
+        paths = BenchmarkPaths(self.args.emulator, self.args.workload, self.args.benchmark)
+        paths.resolve()
+
+        env_objs = self._compile_environment(final)
+        wrapper_obj = self._maybe_wrapper(final)
+
+        link_inputs = env_objs + [Path(obj) for obj in self.args.input_obj]
+        if wrapper_obj:
+            link_inputs.append(wrapper_obj)
+
+        self._link(final, paths.binary_path, link_inputs)
+        Util.info(f"Linked {paths.binary_path} from provided objects")
+        _emit_build_metadata(
+            paths,
+            final,
+            bench=self.args.benchmark,
+            args=self.args,
+            sources=[str(Path(obj).resolve()) for obj in self.args.input_obj],
+        )
+
+
+# -----------------------------------------------------------------------------
+# CLI
+# -----------------------------------------------------------------------------
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Build RISC-V workloads")
+    parser.add_argument("--workload", required=False, help="Workload name as defined in board.yaml")
+    parser.add_argument("--benchmark", help="Specific benchmark to build (per_benchmark mode)")
+    parser.add_argument("--arch", default="rv32", choices=["rv32", "rv64"])
+    parser.add_argument("--platform", default="baremetal", choices=["baremetal", "linux"])
+    parser.add_argument("--emulator", default="spike", choices=["spike", "qemu"])
+    parser.add_argument("--bbv", action="store_true", help="Enable BBV feature macros during compilation")
+    parser.add_argument("--trace", action="store_true", help="Enable trace feature macros during compilation")
+    parser.add_argument("--custom-path", help="Override workload source root")
+    parser.add_argument("--entrypoint", help="Create benchmark() wrapper that calls the provided function")
+    parser.add_argument("--input-obj", nargs="+", help="Link provided object/archive files instead of building sources")
+    parser.add_argument("--list", action="store_true", help="List workloads from board.yaml and exit")
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+    builder = WorkloadBuilder(args)
+    if args.list:
+        builder.list_workloads()
+        return
+    if not args.workload and not args.input_obj:
+        raise SystemExit("--workload is required unless --input-obj is provided")
+    builder.build()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/traces/docker_stf_trace_gen/flow/generate_trace.py b/traces/docker_stf_trace_gen/flow/generate_trace.py
new file mode 100644
index 00000000..ec2f0bc3
--- /dev/null
+++ b/traces/docker_stf_trace_gen/flow/generate_trace.py
@@ -0,0 +1,331 @@
+#!/usr/bin/env python3
+"""Generate STF traces either as single windows or SimPoint-sliced sets."""
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import shutil
+from dataclasses import asdict
+from pathlib import Path
+from typing import Dict, List, Optional
+import sys
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+from data.consts import Const
+from factories.metadata_factory import MetadataFactory
+from flow.utils.paths import BenchmarkPaths, simpoint_analysis_root
+from flow.utils.util import CommandError, Util
+
+
+def _resolve_output(binary: Path, output: Optional[str]) -> Path:
+    if output is None:
+        return binary.with_suffix(".zstf")
+    candidate = Path(output)
+    if candidate.is_dir():
+        return candidate / (binary.stem + ".zstf")
+    if candidate.suffix in {".zstf", ".stf"}:
+        return candidate
+    raise SystemExit("--output must point to a directory or .zstf/.stf file")
+
+
+def _run_single(args: argparse.Namespace) -> None:
+    binary = Path(args.binary).resolve()
+    if not binary.exists():
+        raise SystemExit(f"Binary not found: {binary}")
+
+    output_path = _resolve_output(binary, args.output).resolve()
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    if args.emulator == "spike":
+        cmd = ["spike"]
+        if args.isa:
+            cmd.append(f"--isa={args.isa}")
+        cmd.extend(["--stf_trace_memory_records", f"--stf_trace={output_path}"])
+
+        if args.mode == "macro":
+            cmd.append("--stf_macro_tracing")
+        elif args.mode == "insn_count":
+            if args.num_instructions is None:
+                raise SystemExit("--num-instructions is required for insn_count mode")
+            cmd.extend([
+                "--stf_insn_num_tracing",
+                "--stf_insn_start",
+                str(args.start_instruction or 0),
+                "--stf_insn_count",
+                str(args.num_instructions),
+            ])
+        else:
+            raise SystemExit("Spike does not support pc_count mode")
+
+        if args.pk:
+            cmd.append(Const.SPIKE_PK)
+        cmd.append(str(binary))
+
+    else:  # QEMU
+        bits = 32 if args.arch == "rv32" else 64
+        cmd = [f"qemu-riscv{bits}", str(binary)]
+        plugin = Const.LIBSTFMEM
+        if not Path(plugin).exists():
+            raise SystemExit(f"STF plugin not found: {plugin}")
+
+        if args.mode == "insn_count":
+            if args.num_instructions is None:
+                raise SystemExit("--num-instructions is required for insn_count mode")
+            start_dyn = (args.start_instruction or 0) + 1
+            plugin_cfg = (
+                f"{plugin},mode=dyn_insn_count,start_dyn_insn={start_dyn},"
+                f"num_instructions={args.num_instructions},outfile={output_path}"
+            )
+        elif args.mode == "pc_count":
+            if args.num_instructions is None or args.start_pc is None:
+                raise SystemExit("pc_count requires --num-instructions and --start-pc")
+            plugin_cfg = (
+                f"{plugin},mode=ip,start_ip={args.start_pc},ip_hit_threshold={args.pc_threshold},"
+                f"num_instructions={args.num_instructions},outfile={output_path}"
+            )
+        else:
+            raise SystemExit("Macro tracing is not available via QEMU")
+        cmd.extend(["-plugin", plugin_cfg, "-d", "plugin"])
+
+    Util.info("Generating trace: " + " ".join(cmd))
+    try:
+        Util.run_cmd(cmd)
+    except CommandError as err:
+        raise SystemExit(str(err))
+
+    metadata = MetadataFactory().create(
+        workload_path=str(binary),
+        trace_path=str(output_path),
+        trace_interval_mode={
+            "macro": "macro",
+            "insn_count": "instructionCount",
+            "pc_count": "ip",
+        }[args.mode],
+        start_instruction=args.start_instruction,
+        num_instructions=args.num_instructions,
+        start_pc=args.start_pc,
+        pc_threshold=args.pc_threshold,
+    )
+    metadata_path = output_path.with_suffix(".metadata.json")
+    metadata_path.write_text(json.dumps(asdict(metadata), indent=2) + "\n")
+
+    if args.dump:
+        dump_tool = Path(Const.STF_TOOLS) / "stf_dump" / "stf_dump"
+        if dump_tool.exists():
+            dump_result = Util.run_cmd([str(dump_tool), str(output_path)])
+            output_path.with_suffix(".dump").write_text(dump_result.stdout)
+        else:
+            Util.warn("stf_dump tool not available; skipping dump")
+
+    Util.info(f"Trace generated at {output_path}")
+
+
+def _parse_simpoints(simpoints: Path, weights: Path) -> List[Dict[str, float]]:
+    if not simpoints.exists() or not weights.exists():
+        raise SystemExit("SimPoint files missing")
+
+    intervals: Dict[int, int] = {}
+    for line in simpoints.read_text().splitlines():
+        if not line.strip():
+            continue
+        idx, cluster = line.split()
+        intervals[int(cluster)] = int(idx)
+
+    weights_map: Dict[int, float] = {}
+    for line in weights.read_text().splitlines():
+        if not line.strip():
+            continue
+        weight, cluster = line.split()
+        weights_map[int(cluster)] = float(weight)
+
+    entries: List[Dict[str, float]] = []
+    for cluster, interval in intervals.items():
+        if cluster not in weights_map:
+            continue
+        entries.append({"cluster": cluster, "interval": interval, "weight": weights_map[cluster]})
+    entries.sort(key=lambda item: item["interval"])
+    return entries
+
+
+def _verify_trace(trace_path: Path, expected: int) -> Dict[str, object]:
+    count_tool = Path(Const.STF_TOOLS) / "stf_count" / "stf_count"
+    if not count_tool.exists():
+        Util.warn("stf_count not available; skipping verification")
+        return {"verified": None, "counted": None}
+    try:
+        result = Util.run_cmd([str(count_tool), str(trace_path)])
+    except CommandError as err:
+        Util.warn(f"stf_count failed: {err}")
+        return {"verified": None, "counted": None}
+    numbers = [int(x) for x in re.findall(r"\d+", result.stdout)]
+    counted = numbers[-1] if numbers else None
+    verified = counted is not None and abs(counted - expected) <= 1
+    return {"verified": verified, "counted": counted}
+
+
+def _dump_trace(trace_path: Path) -> None:
+    dump_tool = Path(Const.STF_TOOLS) / "stf_dump" / "stf_dump"
+    if not dump_tool.exists():
+        Util.warn("stf_dump not available; skipping dump")
+        return
+    try:
+        dump_result = Util.run_cmd([str(dump_tool), str(trace_path)])
+    except CommandError as err:
+        Util.warn(f"stf_dump failed: {err}")
+        return
+    trace_path.with_suffix(".dump").write_text(dump_result.stdout)
+
+
+def _run_sliced(args: argparse.Namespace) -> None:
+    paths = BenchmarkPaths(args.emulator, args.workload, args.benchmark)
+    run_meta_path = paths.run_meta_path
+    if not run_meta_path.exists():
+        raise SystemExit(f"run_meta.json not found. Run run_workload.py first: {run_meta_path}")
+
+    run_meta = Util.read_json(run_meta_path)
+    interval_size = args.interval_size or run_meta.get("interval_size")
+    if not interval_size:
+        raise SystemExit("Interval size missing; pass --interval-size")
+    isa = run_meta.get("isa")
+    if not isa:
+        raise SystemExit("ISA missing from run metadata; rebuild/run with updated tooling")
+    binary = Path(run_meta.get("binary", ""))
+    if not binary.exists():
+        raise SystemExit(f"Binary not found: {binary}")
+    platform = run_meta.get("platform", "baremetal")
+
+    simpoints = Path(args.simpoints) if args.simpoints else simpoint_analysis_root() / f"{args.benchmark}.simpoints"
+    weights = Path(args.weights) if args.weights else simpoint_analysis_root() / f"{args.benchmark}.weights"
+    entries = _parse_simpoints(simpoints, weights)
+    if not entries:
+        raise SystemExit("No SimPoint entries detected")
+
+    output_dir = paths.simpoint_dir
+    if args.clean and output_dir.exists():
+        shutil.rmtree(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    Util.info(f"Generating {len(entries)} sliced traces for {args.workload}/{args.benchmark}")
+
+    manifest_entries: List[Dict[str, object]] = []
+    metadata_factory = MetadataFactory()
+
+    for entry in entries:
+        interval_idx = entry["interval"]
+        weight = entry["weight"]
+        start = interval_idx * interval_size
+        trace_path = output_dir / f"{args.benchmark}.sp_{interval_idx}.zstf"
+
+        cmd = [
+            "spike",
+            f"--isa={isa}",
+            "--stf_trace_memory_records",
+            "--stf_insn_num_tracing",
+            "--stf_insn_start",
+            str(start),
+            "--stf_insn_count",
+            str(interval_size),
+            f"--stf_trace={trace_path}",
+        ]
+        if platform == "linux":
+            cmd.append(Const.SPIKE_PK)
+        cmd.append(str(binary))
+
+        Util.info(f"Slice interval {interval_idx} (weight={weight:.6f})")
+        try:
+            Util.run_cmd(cmd)
+        except CommandError as err:
+            raise SystemExit(str(err))
+
+        verification = {"verified": None, "counted": None}
+        if args.verify:
+            verification = _verify_trace(trace_path, interval_size)
+
+        if args.dump:
+            _dump_trace(trace_path)
+
+        metadata = metadata_factory.create(
+            workload_path=str(binary),
+            trace_path=str(trace_path),
+            trace_interval_mode="instructionCount",
+            start_instruction=start,
+            num_instructions=interval_size,
+        )
+        metadata_path = trace_path.with_suffix(".metadata.json")
+        metadata_path.write_text(json.dumps(asdict(metadata), indent=2) + "\n")
+
+        manifest_entries.append(
+            {
+                "interval_index": interval_idx,
+                "weight": weight,
+                "start_instruction": start,
+                "num_instructions": interval_size,
+                "trace": str(trace_path),
+                "metadata": str(metadata_path),
+                "verified": verification.get("verified"),
+                "counted_instructions": verification.get("counted"),
+            }
+        )
+
+    manifest = {
+        "workload": args.workload,
+        "benchmark": args.benchmark,
+        "emulator": args.emulator,
+        "interval_size": interval_size,
+        "generated_at": Util.now_iso(),
+        "simpoints": str(simpoints),
+        "weights": str(weights),
+        "slices": manifest_entries,
+        "total_weight": sum(item["weight"] for item in entries),
+    }
+    manifest_path = output_dir / "slices.json"
+    Util.write_json(manifest_path, manifest)
+    Util.info(f"SimPoint slices stored under {output_dir}")
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Trace generation toolkit")
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    single = sub.add_parser("single", help="Generate a single STF trace window")
+    single.add_argument("binary", help="Path to workload binary")
+    single.add_argument("--emulator", required=True, choices=["spike", "qemu"])
+    single.add_argument("--mode", choices=["macro", "insn_count", "pc_count"], default="macro")
+    single.add_argument("--arch", choices=["rv32", "rv64"], default="rv64", help="Used for QEMU target selection")
+    single.add_argument("--isa", help="ISA string for Spike")
+    single.add_argument("--pk", action="store_true", help="Launch Spike with proxy kernel")
+    single.add_argument("--num-instructions", type=int, help="Number of instructions to trace (instruction-count modes)")
+    single.add_argument("--start-instruction", type=int, default=0)
+    single.add_argument("--start-pc", type=lambda x: int(x, 0))
+    single.add_argument("--pc-threshold", type=int, default=1)
+    single.add_argument("-o", "--output", help="Output file or directory")
+    single.add_argument("--dump", action="store_true", help="Emit stf_dump alongside trace")
+
+    sliced = sub.add_parser("sliced", help="Generate SimPoint-sliced traces")
+    sliced.add_argument("--emulator", required=True, choices=["spike"], help="Spike is required for slicing")
+    sliced.add_argument("--workload", required=True)
+    sliced.add_argument("--benchmark", required=True)
+    sliced.add_argument("--interval-size", type=int, help="Override interval size (defaults to run metadata)")
+    sliced.add_argument("--simpoints", help="Path to .simpoints file")
+    sliced.add_argument("--weights", help="Path to .weights file")
+    sliced.add_argument("--verify", action="store_true", help="Validate traces via stf_count")
+    sliced.add_argument("--dump", action="store_true", help="Emit stf_dump per slice")
+    sliced.add_argument("--clean", action="store_true", help="Remove existing slices before generation")
+
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+    if args.command == "single":
+        _run_single(args)
+    else:
+        _run_sliced(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/traces/docker_stf_trace_gen/flow/run_simpoint.py b/traces/docker_stf_trace_gen/flow/run_simpoint.py
new file mode 100644
index 00000000..27038320
--- /dev/null
+++ b/traces/docker_stf_trace_gen/flow/run_simpoint.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+"""Run SimPoint analysis on BBV traces produced by run_workload.py."""
+from __future__ import annotations
+
+import argparse
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Optional
+import sys
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+from flow.utils.paths import BenchmarkPaths, outputs_root, simpoint_analysis_root
+from flow.utils.util import CommandError, Util
+
+
+@dataclass
+class BBVTarget:
+    workload: str
+    benchmark: str
+    bbv_file: Path
+    run_meta: Path
+
+    @property
+    def run_meta_data(self) -> Dict:
+        return Util.read_json(self.run_meta)
+
+
+def _discover_targets(args: argparse.Namespace) -> List[BBVTarget]:
+    run_root = outputs_root() / args.emulator
+    if not run_root.exists():
+        raise SystemExit(f"No run outputs present: {run_root}")
+
+    targets: List[BBVTarget] = []
+    for workload_dir in sorted(run_root.iterdir()):
+        if workload_dir.name in {"bin", "simpointed"}:
+            continue
+        if args.workload and workload_dir.name != args.workload:
+            continue
+        for bench_dir in sorted(workload_dir.iterdir()):
+            if not bench_dir.is_dir():
+                continue
+            if args.benchmark and bench_dir.name != args.benchmark:
+                continue
+            run_meta = bench_dir / "run_meta.json"
+            if not run_meta.exists():
+                continue
+            data = Util.read_json(run_meta)
+            bbv_path = data.get("bbv_file")
+            if not bbv_path:
+                continue
+            bbv_file = Path(bbv_path)
+            if not bbv_file.exists() or bbv_file.stat().st_size == 0:
+                continue
+            targets.append(BBVTarget(workload_dir.name, bench_dir.name, bbv_file, run_meta))
+    if not targets:
+        raise SystemExit("No BBV files discovered. Did you run with --bbv?")
+    return targets
+
+
+def _run_simpoint(bbv_file: Path, benchmark: str, max_k: int, output_dir: Path) -> Dict:
+    output_dir.mkdir(parents=True, exist_ok=True)
+    simpoints = output_dir / f"{benchmark}.simpoints"
+    weights = output_dir / f"{benchmark}.weights"
+
+    cmd = [
+        "simpoint",
+        "-loadFVFile",
+        str(bbv_file),
+        "-maxK",
+        str(max_k),
+        "-saveSimpoints",
+        str(simpoints),
+        "-saveSimpointWeights",
+        str(weights),
+    ]
+    try:
+        result = Util.run_cmd(cmd)
+    except CommandError as err:
+        raise SystemExit(str(err))
+
+    if not simpoints.exists() or not weights.exists():
+        raise SystemExit(
+            "SimPoint completed without producing output files. "
+            "Check that 'simpoint' is installed and the BBV file is valid."
+        )
+
+    intervals = _join_simpoint_outputs(simpoints, weights)
+    coverage = sum(item["weight"] for item in intervals)
+    return {
+        "simpoints_file": str(simpoints),
+        "weights_file": str(weights),
+        "intervals": intervals,
+        "coverage": coverage,
+    }
+
+
+def _join_simpoint_outputs(simpoints_path: Path, weights_path: Path) -> List[Dict[str, float]]:
+    intervals_by_cluster: Dict[int, int] = {}
+    for line in simpoints_path.read_text().splitlines():
+        if not line.strip():
+            continue
+        parts = line.split()
+        if len(parts) != 2:
+            continue
+        interval, cluster = parts
+        intervals_by_cluster[int(cluster)] = int(interval)
+
+    weights_by_cluster: Dict[int, float] = {}
+    for line in weights_path.read_text().splitlines():
+        if not line.strip():
+            continue
+        parts = line.split()
+        if len(parts) != 2:
+            continue
+        weight, cluster = parts
+        weights_by_cluster[int(cluster)] = float(weight)
+
+    entries: List[Dict[str, float]] = []
+    for cluster, interval in intervals_by_cluster.items():
+        weight = weights_by_cluster.get(cluster)
+        if weight is None:
+            continue
+        entries.append({"cluster": cluster, "interval": interval, "weight": weight})
+    entries.sort(key=lambda item: item["interval"])
+    return entries
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run SimPoint analysis on BBV vectors")
+    parser.add_argument("--emulator", required=True, choices=["spike", "qemu"])
+    parser.add_argument("--workload", help="Filter workload")
+    parser.add_argument("--benchmark", help="Filter benchmark")
+    parser.add_argument("--max-k", type=int, default=30)
+    parser.add_argument("--output-dir", default=None, help="Override output directory (defaults to /outputs/simpoint_analysis)")
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+
+    if not Util.validate_tool("simpoint"):
+        raise SystemExit(1)
+
+    targets = _discover_targets(args)
+    output_dir = Path(args.output_dir) if args.output_dir else simpoint_analysis_root()
+    summary: Dict[str, Dict] = {}
+
+    for target in targets:
+        Util.info(f"Running SimPoint for {target.workload}/{target.benchmark}")
+        result = _run_simpoint(target.bbv_file, target.benchmark, args.max_k, output_dir)
+        result.update(
+            {
+                "workload": target.workload,
+                "benchmark": target.benchmark,
+                "bbv_file": str(target.bbv_file),
+            }
+        )
+        summary[f"{target.workload}:{target.benchmark}"] = result
+
+    summary_path = output_dir / "simpoint_summary.json"
+    Util.write_json(summary_path, summary)
+    Util.info(f"Wrote SimPoint summary to {summary_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/traces/docker_stf_trace_gen/flow/run_workload.py b/traces/docker_stf_trace_gen/flow/run_workload.py
new file mode 100644
index 00000000..38992774
--- /dev/null
+++ b/traces/docker_stf_trace_gen/flow/run_workload.py
@@ -0,0 +1,385 @@
+#!/usr/bin/env python3
+"""Run RISC-V workloads on Spike or QEMU and capture BBV/trace artefacts."""
+from __future__ import annotations
+
+import argparse
+import json
+import shlex
+import shutil
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import sys
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+from data.consts import Const
+from flow.utils.paths import BenchmarkPaths, binaries_root
+from flow.utils.util import CommandError, CommandResult, Util
+
+
+DEFAULT_ISA = {"rv32": "rv32imafdc", "rv64": "rv64imafdc"}
+
+
+@dataclass
+class RunTarget:
+    workload: str
+    benchmark: str
+    binary: Path
+    build_meta: Dict[str, Any]
+
+    @property
+    def isa(self) -> Optional[str]:
+        return self.build_meta.get("isa") if self.build_meta else None
+
+    @property
+    def features(self) -> Dict[str, bool]:
+        if not self.build_meta:
+            return {}
+        return self.build_meta.get("features", {})
+
+
+# -----------------------------------------------------------------------------
+# Discovery helpers
+# -----------------------------------------------------------------------------
+
+def _load_build_meta(binary_dir: Path) -> Dict[str, Any]:
+    meta_path = binary_dir / "build_meta.json"
+    if meta_path.exists():
+        try:
+            return Util.read_json(meta_path)
+        except json.JSONDecodeError as err:
+            Util.warn(f"Failed to parse build metadata {meta_path}: {err}")
+    return {}
+
+
+def _discover_from_outputs(args: argparse.Namespace) -> List[RunTarget]:
+    root = binaries_root(args.emulator)
+    if not root.exists():
+        raise SystemExit(f"No binaries found. Expected directory {root}")
+
+    targets: List[RunTarget] = []
+    for workload_dir in sorted(root.iterdir()):
+        if not workload_dir.is_dir() or workload_dir.name in {"env", "wrapper"}:
+            continue
+        if args.workload and workload_dir.name != args.workload:
+            continue
+        for bench_dir in sorted(workload_dir.iterdir()):
+            if not bench_dir.is_dir():
+                continue
+            bench_name = bench_dir.name
+            if args.benchmark and bench_name != args.benchmark:
+                continue
+            binary = bench_dir / bench_name
+            if not binary.exists():
+                continue
+            meta = _load_build_meta(bench_dir)
+            targets.append(RunTarget(workload_dir.name, bench_name, binary, meta))
+    if not targets:
+        raise SystemExit("No matching binaries found. Did you run build_workload.py?")
+    return targets
+
+
+def _target_from_binary(args: argparse.Namespace) -> RunTarget:
+    binary = Path(args.binary).resolve()
+    if not binary.exists():
+        raise SystemExit(f"Binary not found: {binary}")
+    if not args.workload or not args.benchmark:
+        raise SystemExit("--binary requires both --workload and --benchmark to locate outputs")
+    meta = {}
+    bench_dir = binary.parent
+    meta_path = bench_dir / "build_meta.json"
+    if meta_path.exists():
+        meta = _load_build_meta(bench_dir)
+    return RunTarget(args.workload, args.benchmark, binary, meta)
+
+
+# -----------------------------------------------------------------------------
+# Command assembly
+# -----------------------------------------------------------------------------
+
+def _spike_command(
+    target: RunTarget,
+    paths: BenchmarkPaths,
+    *,
+    isa: str,
+    bbv: bool,
+    trace: bool,
+    interval_size: int,
+    platform: str,
+) -> Dict[str, Any]:
+    cmd: List[str] = ["spike", f"--isa={isa}"]
+    bbv_file: Optional[Path] = None
+    trace_file: Optional[Path] = None
+
+    if bbv:
+        paths.bbv_dir.mkdir(parents=True, exist_ok=True)
+        bbv_file = paths.bbv_dir / f"{target.benchmark}.bbv"
+        cmd.extend([
+            "--en_bbv",
+            f"--bb_file={bbv_file}",
+            f"--simpoint_size={interval_size}",
+        ])
+
+    if trace:
+        paths.trace_dir.mkdir(parents=True, exist_ok=True)
+        trace_file = paths.trace_dir / f"{target.benchmark}.full.zstf"
+        cmd.extend([
+            "--stf_trace_memory_records",
+            "--stf_macro_tracing",
+            f"--stf_trace={trace_file}",
+        ])
+
+    binary_cmd: List[str]
+    if platform == "linux":
+        binary_cmd = [Const.SPIKE_PK, str(target.binary)]
+    else:
+        binary_cmd = [str(target.binary)]
+
+    return {
+        "argv": cmd + binary_cmd,
+        "bbv_file": bbv_file,
+        "trace_file": trace_file,
+    }
+
+
+def _qemu_command(
+    target: RunTarget,
+    paths: BenchmarkPaths,
+    *,
+    arch: str,
+    platform: str,
+    bbv: bool,
+    trace: bool,
+    interval_size: int,
+) -> Dict[str, Any]:
+    bits = 32 if arch == "rv32" else 64
+    cmd: List[str]
+    bbv_file: Optional[Path] = None
+    trace_file: Optional[Path] = None
+
+    if platform == "baremetal":
+        cmd = [f"qemu-system-riscv{bits}", "-nographic", "-machine", "virt", "-bios", "none", "-kernel", str(target.binary)]
+    else:
+        cmd = [f"qemu-riscv{bits}", str(target.binary)]
+
+    if bbv:
+        plugin = "/usr/lib/libbbv.so"
+        if not Path(plugin).exists():
+            Util.warn("BBV requested for QEMU but libbbv.so not found; skipping")
+        else:
+            paths.bbv_dir.mkdir(parents=True, exist_ok=True)
+            bbv_file = paths.bbv_dir / f"{target.benchmark}.bbv"
+            cmd.extend([
+                "-plugin",
+                f"{plugin},interval={interval_size},outfile={bbv_file}",
+            ])
+
+    if trace:
+        plugin = Const.LIBSTFMEM
+        if not Path(plugin).exists():
+            Util.warn("Trace requested for QEMU but libstfmem.so not found; skipping")
+        else:
+            paths.trace_dir.mkdir(parents=True, exist_ok=True)
+            trace_file = paths.trace_dir / f"{target.benchmark}.full.zstf"
+            cmd.extend([
+                "-plugin",
+                f"{plugin},mode=dyn_insn_count,start_dyn_insn=0,num_instructions=18446744073709551615,outfile={trace_file}",
+                "-d",
+                "plugin",
+            ])
+    return {
+        "argv": cmd,
+        "bbv_file": bbv_file,
+        "trace_file": trace_file,
+    }
+
+
+# -----------------------------------------------------------------------------
+# Execution
+# -----------------------------------------------------------------------------
+
+def _write_logs(paths: BenchmarkPaths, target: RunTarget, result: CommandResult) -> None:
+    paths.logs_dir.mkdir(parents=True, exist_ok=True)
+    log_path = paths.logs_dir / f"{target.benchmark}.log"
+    log_path.write_text(result.stdout + ("\n" + result.stderr if result.stderr else ""))
+
+
+def _clean_outputs(paths: BenchmarkPaths) -> None:
+    for directory in (paths.bbv_dir, paths.trace_dir, paths.logs_dir):
+        if directory.exists():
+            shutil.rmtree(directory)
+
+
+def _write_run_metadata(
+    paths: BenchmarkPaths,
+    target: RunTarget,
+    *,
+    args: argparse.Namespace,
+    isa: str,
+    bbv: bool,
+    trace: bool,
+    bbv_file: Optional[Path],
+    trace_file: Optional[Path],
+    command: List[str],
+    elapsed: float,
+) -> None:
+    metadata = {
+        "timestamp": Util.now_iso(),
+        "emulator": args.emulator,
+        "arch": args.arch,
+        "platform": args.platform,
+        "workload": target.workload,
+        "benchmark": target.benchmark,
+        "binary": str(target.binary),
+        "isa": isa,
+        "interval_size": args.interval_size,
+        "bbv_enabled": bbv,
+        "trace_enabled": trace,
+        "bbv_file": str(bbv_file) if bbv_file else None,
+        "trace_file": str(trace_file) if trace_file else None,
+        "command": command,
+        "elapsed_seconds": elapsed,
+        "build_meta": target.build_meta,
+    }
+    Util.write_json(paths.run_meta_path, metadata)
+
+
+# -----------------------------------------------------------------------------
+# CLI handling
+# -----------------------------------------------------------------------------
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run workloads on Spike or QEMU")
+    parser.add_argument("--emulator", required=True, choices=["spike", "qemu"])
+    parser.add_argument("--platform", default="baremetal", choices=["baremetal", "linux"])
+    parser.add_argument("--arch", default="rv32", choices=["rv32", "rv64"])
+    parser.add_argument("--workload", help="Filter to a specific workload")
+    parser.add_argument("--benchmark", help="Filter to a single benchmark")
+    parser.add_argument("--binary", help="Run a specific binary instead of discovered outputs")
+    parser.add_argument("--interval-size", type=int, default=10_000_000)
+    parser.add_argument("--stf-dump", action="store_true", help="Run stf_dump on generated traces")
+    parser.add_argument("--clean", action="store_true", help="Clean bbv/trace/log directories before running")
+    parser.add_argument("--list", action="store_true", help="List discovered binaries and exit")
+    parser.set_defaults(bbv=None, trace=None)
+    parser.add_argument("--bbv", dest="bbv", action="store_true", help="Enable BBV generation during run")
+    parser.add_argument("--no-bbv", dest="bbv", action="store_false", help="Disable BBV regardless of build metadata")
+    parser.add_argument("--trace", dest="trace", action="store_true", help="Generate full STF traces during run")
+    parser.add_argument("--no-trace", dest="trace", action="store_false", help="Disable trace generation")
+    parser.add_argument("--isa", help="Override ISA passed to Spike (defaults to build metadata or arch mapping)")
+    return parser.parse_args()
+
+
+def _list_targets(targets: List[RunTarget]) -> None:
+    Util.info("Discovered binaries:")
+    for target in targets:
+        Util.info(f"  {target.workload}/{target.benchmark}: {target.binary}")
+
+
+def _effective_flag(value: Optional[bool], default: bool) -> bool:
+    return default if value is None else value
+
+
+def run_targets(args: argparse.Namespace, targets: List[RunTarget]) -> None:
+    Util.info(f"Running {len(targets)} benchmark(s) on {args.emulator}")
+
+    for target in targets:
+        paths = BenchmarkPaths(args.emulator, target.workload, target.benchmark)
+        paths.resolve()
+        if args.clean:
+            _clean_outputs(paths)
+
+        meta_features = target.features
+        bbv = _effective_flag(args.bbv, meta_features.get("bbv", False))
+        trace = _effective_flag(args.trace, meta_features.get("trace", False))
+
+        if args.emulator == "spike":
+            isa = args.isa or target.isa or DEFAULT_ISA.get(args.arch, args.arch)
+            command_info = _spike_command(target, paths, isa=isa, bbv=bbv, trace=trace, interval_size=args.interval_size, platform=args.platform)
+        else:
+            isa = args.isa or target.isa or DEFAULT_ISA.get(args.arch, args.arch)
+            command_info = _qemu_command(target, paths, arch=args.arch, platform=args.platform, bbv=bbv, trace=trace, interval_size=args.interval_size)
+
+        argv = command_info["argv"]
+        bbv_file = command_info.get("bbv_file")
+        trace_file = command_info.get("trace_file")
+
+        Util.info(f"Running {target.workload}/{target.benchmark}")
+        Util.info("Command: " + " ".join(shlex.quote(arg) for arg in argv))
+
+        start = time.time()
+        try:
+            result = Util.run_cmd(argv)
+        except CommandError as err:
+            raise SystemExit(str(err))
+        elapsed = time.time() - start
+
+        # Spike appends suffixes like _cpu0 to BBV files; capture the file that actually exists.
+        if bbv and bbv_file and not bbv_file.exists():
+            candidates = sorted(paths.bbv_dir.glob(f"{target.benchmark}.bbv*"))
+            if candidates:
+                bbv_file = candidates[0]
+        if trace and trace_file and not trace_file.exists():
+            candidates = sorted(paths.trace_dir.glob(f"{target.benchmark}*.zstf"))
+            if candidates:
+                trace_file = candidates[0]
+
+        _write_logs(paths, target, result)
+        _write_run_metadata(
+            paths,
+            target,
+            args=args,
+            isa=isa,
+            bbv=bbv,
+            trace=trace,
+            bbv_file=bbv_file,
+            trace_file=trace_file,
+            command=list(argv),
+            elapsed=elapsed,
+        )
+
+        Util.info(f"Completed {target.benchmark} in {elapsed:.2f}s")
+
+        if trace and args.stf_dump and trace_file and trace_file.exists():
+            dump_tool = Path(Const.STF_TOOLS) / "stf_dump" / "stf_dump"
+            if dump_tool.exists():
+                Util.info(f"Dumping trace with {dump_tool}")
+                try:
+                    Util.run_cmd([str(dump_tool), str(trace_file)], capture_output=False)
+                except CommandError as err:
+                    Util.warn(f"stf_dump failed: {err}")
+
+
+# -----------------------------------------------------------------------------
+# Main entry point
+# -----------------------------------------------------------------------------
+
+def main() -> None:
+    args = parse_args()
+
+    if args.emulator == "spike":
+        tool_to_check = "spike"
+    else:
+        bits = 32 if args.arch == "rv32" else 64
+        tool_to_check = f"qemu-{'system-' if args.platform == 'baremetal' else ''}riscv{bits}"
+
+    if not Util.validate_tool(tool_to_check):
+        raise SystemExit(1)
+
+    if args.binary:
+        targets = [_target_from_binary(args)]
+    else:
+        targets = _discover_from_outputs(args)
+
+    if args.list:
+        _list_targets(targets)
+        return
+
+    run_targets(args, targets)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/traces/docker_stf_trace_gen/flow/utils/config.py b/traces/docker_stf_trace_gen/flow/utils/config.py
new file mode 100644
index 00000000..8f63daf0
--- /dev/null
+++ b/traces/docker_stf_trace_gen/flow/utils/config.py
@@ -0,0 +1,416 @@
+"""
+config_v2.py — strict YAML parser and config finalizer for the builder.
+
+Schema overview (see board.yaml for a concrete example):
+
+schema: 2
+variables:
+  workloads_roots: ["/workloads", "/default"]
+  env_root: "/default/environment/{board}"
+  outputs_root: "/outputs/{emulator}/bin"
+  include_auto: []          # optional extra includes to add everywhere
+  alt_roots:                # optional fallbacks if a path doesn't exist
+    - {from: "/workloads/", to: "/default/"}
+
+toolchains:
+  rv32:
+    baremetal:
+      cc: "riscv32-unknown-elf-gcc"
+      base_cflags: ["-march=rv32imafdc", ...]
+      base_ldflags: ["-march=rv32imafdc", ...]
+      libs: ["-lc", "-lm"]
+      linker_script: "link.ld"
+    linux:
+      cc: "riscv32-unknown-linux-gnu-gcc"
+      base_cflags: [...]
+      base_ldflags: [...]
+      libs: ["-lm"]
+
+features:
+  bbv:
+    cflags: ["-DBBV"]
+    ldflags: []
+  trace:
+    cflags: ["-DTRACE"]
+    ldflags: []
+  # (Optional) arch_overrides for vector vs regular can also be modeled here.
+
+workloads:
+  <name>:
+    # Layout + flags which can be overridden under platforms.<platform>
+    layout:
+      mode: "per_benchmark" | "single"
+      # per_benchmark only:
+      per_benchmark:
+        bench_root: "{workload_root}/benchmarks"  # or .../src
+        source_patterns: ["*.c", "*.S"]
+        exclude_dirs: ["common"]                  # optional
+      common_patterns: ["{workload_root}/benchmarks/common/*.c"]   # optional
+      common_skip: ["syscalls.c"]                                   # optional
+      support_once_patterns: ["{workload_root}/support/*.c"]        # optional
+      support_per_benchmark_patterns: ["{workload_root}/support/*.c"]  # optional
+      # single only:
+      single_sources: ["{workload_root}/foo.c", ...]                # if mode: single
+
+    includes: ["{workload_root}/env"]            # as many as you need
+    defines: []
+    cflags: []
+    ldflags: []
+    libs: []
+
+    env:
+      files: ["crt0.S", "main.c", "stub.c", "util.c"]
+      skip: false
+
+    platforms:
+      baremetal: { ... overrides ... }
+      linux:     { ... overrides ... }
+"""
+
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import yaml
+import copy
+
+from data.consts import Const
+
+# ---------------------------- Data Classes ----------------------------
+
+@dataclass
+class Toolchain:
+    cc: str
+    base_cflags: List[str]
+    base_ldflags: List[str]
+    libs: List[str]
+    linker_script: Optional[str]
+
+
+@dataclass
+class Flags:
+    cflags: List[str]
+    includes: List[str]
+    ldflags: List[str]
+    libs: List[str]
+    linker_script: Optional[Path]
+
+    @property
+    def cflags_with_includes(self) -> List[str]:
+        incs = [f"-I{p}" for p in self.includes]
+        # dedupe preserving order
+        seen = set()
+        out: List[str] = []
+        for f in (self.cflags + incs):
+            if f not in seen:
+                out.append(f)
+                seen.add(f)
+        return out
+
+
+@dataclass
+class EnvConfig:
+    dir: Path
+    files: List[str]
+    skip: bool
+
+
+@dataclass
+class PerBenchmark:
+    bench_root: str
+    source_patterns: List[str]
+    exclude_dirs: List[str]
+
+
+@dataclass
+class LayoutConfig:
+    mode: str  # "per_benchmark" or "single"
+    per_benchmark: Optional[PerBenchmark]
+    common_patterns: List[str]
+    common_skip: List[str]
+    support_once_patterns: List[str]
+    support_per_benchmark_patterns: List[str]
+    single_sources: List[str]
+
+
+@dataclass
+class Paths:
+    workloads_roots: List[Path]
+    outputs_root: Path
+
+
+@dataclass
+class Tools:
+    cc: str
+
+
+@dataclass
+class FeatureSet:
+    bbv: bool = False
+    trace: bool = False
+
+
+@dataclass
+class FinalConfig:
+    tools: Tools
+    flags: Flags
+    env: EnvConfig
+    layout: LayoutConfig
+    paths: Paths
+
+
+# ---------------------------- Helpers ----------------------------
+
+def _deep_merge(a: dict, b: dict) -> dict:
+    """Deep-merge b into a without modifying inputs."""
+    out = copy.deepcopy(a)
+    for k, v in b.items():
+        if isinstance(v, dict) and isinstance(out.get(k), dict):
+            out[k] = _deep_merge(out[k], v)
+        else:
+            out[k] = copy.deepcopy(v)
+    return out
+
+
+def _dedupe(seq: List[str]) -> List[str]:
+    seen = set()
+    out: List[str] = []
+    for x in seq:
+        if x not in seen:
+            out.append(x)
+            seen.add(x)
+    return out
+
+
+def _fmt(value: Any, ctx: Dict[str, str]) -> Any:
+    if isinstance(value, str):
+        try:
+            return value.format(**ctx)
+        except KeyError:
+            return value
+    if isinstance(value, list):
+        return [_fmt(v, ctx) for v in value]
+    if isinstance(value, dict):
+        return {k: _fmt(v, ctx) for k, v in value.items()}
+    return value
+
+
+def _ensure_list(v: Any) -> List:
+    if v is None:
+        return []
+    return v if isinstance(v, list) else [v]
+
+
+def _rewrite_if_missing(p: Path, alt_rules: List[dict]) -> Path:
+    """If p doesn't exist, try replacing configured prefixes."""
+    if p.exists():
+        return p
+    s = str(p)
+    for rule in alt_rules or []:
+        frm = rule.get("from")
+        to = rule.get("to")
+        if frm and to and s.startswith(frm):
+            candidate = Path(to + s[len(frm):])
+            if candidate.exists():
+                return candidate
+    return p  # keep original (may be created later or be optional)
+
+
+# ---------------------------- Main Loader ----------------------------
+
+class BuildConfig:
+    def __init__(self, data: Dict[str, Any], board: str):
+        self.raw = data
+        self.board = board
+
+        if int(self.raw.get("schema", 0)) != 2:
+            raise SystemExit("board.yaml must set schema: 2")
+
+        self.variables = self.raw.get("variables", {})
+        self.toolchains = self.raw.get("toolchains", {})
+        self.features = self.raw.get("features", {})
+        self.workloads = self.raw.get("workloads", {})
+
+    @staticmethod
+    def load(board: str) -> "BuildConfig":
+        candidate_paths = [
+            Path(Const.CONTAINER_ENV_ROOT) / board / "board.yaml",
+            Path(__file__).resolve().parents[2] / "environment" / board / "board.yaml",
+        ]
+        cfg_path = next((p for p in candidate_paths if p.exists()), None)
+        if cfg_path is None:
+            raise SystemExit(f"Board config not found for {board}. Searched: {', '.join(str(p) for p in candidate_paths)}")
+        with open(cfg_path, "r") as f:
+            data = yaml.safe_load(f) or {}
+        return BuildConfig(data, board=board)
+
+    def list_workloads(self) -> List[str]:
+        return sorted(self.workloads.keys())
+
+    # ---------- Resolution ----------
+
+    def resolve_workload_root(self, workload: str, custom: Optional[str]) -> Path:
+        ctx = self._ctx_defaults()
+        roots = [Path(_fmt(p, ctx)) for p in self.variables.get("workloads_roots", ["/workloads", "/default"])]
+        if custom:
+            p = Path(custom)
+            if p.exists():
+                return p
+            raise SystemExit(f"Custom workload path not found: {p}")
+        for r in roots:
+            candidate = r / workload
+            if candidate.exists():
+                return candidate
+        raise SystemExit(f"Workload '{workload}' not found in: {', '.join(map(str, roots))}")
+
+    def _ctx_defaults(self, **extra) -> Dict[str, str]:
+        ctx = dict(
+            board=self.board,
+            emulator=self.board,  # alias
+        )
+        ctx.update(extra)
+        return ctx
+
+    def _toolchain(self, arch: str, platform: str) -> Toolchain:
+        t = self.toolchains.get(arch, {}).get(platform, {})
+        if not t:
+            raise SystemExit(f"Missing toolchain for {arch}.{platform}")
+        return Toolchain(
+            cc=t.get("cc", ""),
+            base_cflags=_ensure_list(t.get("base_cflags")),
+            base_ldflags=_ensure_list(t.get("base_ldflags")),
+            libs=_ensure_list(t.get("libs")),
+            linker_script=t.get("linker_script"),
+        )
+
+    def _apply_features(self, cflags: List[str], ldflags: List[str], feats: FeatureSet) -> Tuple[List[str], List[str]]:
+        def add(kind: str):
+            cfg = self.features.get(kind, {})
+            return _ensure_list(cfg.get("cflags")), _ensure_list(cfg.get("ldflags"))
+        cc, ll = [], []
+        if feats.bbv:
+            c, l = add("bbv")
+            cc += c; ll += l
+        if feats.trace:
+            c, l = add("trace")
+            cc += c; ll += l
+        return _dedupe(cflags + cc), _dedupe(ldflags + ll)
+
+    def _final_includes(self, base: List[str], ctx: Dict[str, str]) -> List[str]:
+        # includes from variables.include_auto + workload/includes
+        auto = _ensure_list(self.variables.get("include_auto", []))
+        incs = _fmt(auto + base, ctx)
+        # rewrite missing prefixes if requested
+        alt_rules = _ensure_list(self.variables.get("alt_roots", []))
+        fixed: List[str] = []
+        for inc in incs:
+            p = Path(inc)
+            p2 = _rewrite_if_missing(p, alt_rules)
+            fixed.append(str(p2))
+        return _dedupe(fixed)
+
+    def get_layout(self, workload: str, emulator: str, arch: str, platform: str, workload_root: Path) -> LayoutConfig:
+        # Merge workload + platform override, then format
+        base = self.workloads.get(workload, {})
+        plat = (base.get("platforms", {}) or {}).get(platform, {})
+        merged = _deep_merge(base, plat)
+
+        ctx = self._ctx_defaults(workload_root=str(workload_root), arch=arch, platform=platform, emulator=emulator)
+        merged = _fmt(merged, ctx)
+
+        layout = merged.get("layout", {}) or {}
+        mode = layout.get("mode", "per_benchmark")
+
+        per_bench_config = None
+        if mode == "per_benchmark":
+            pb = layout.get("per_benchmark", {}) or {}
+            per_bench_config = PerBenchmark(
+                bench_root=pb.get("bench_root", f"{workload_root}/benchmarks"),
+                source_patterns=_ensure_list(pb.get("source_patterns", ["*.c", "*.S"])),
+                exclude_dirs=_ensure_list(pb.get("exclude_dirs", [])),
+            )
+
+        common_patterns = _ensure_list(layout.get("common_patterns", []))
+        common_skip = _ensure_list(layout.get("common_skip", []))
+        support_once = _ensure_list(layout.get("support_once_patterns", []))
+        support_per_bench = _ensure_list(layout.get("support_per_benchmark_patterns", []))
+        single_sources = _ensure_list(layout.get("single_sources", []))
+
+        return LayoutConfig(
+            mode=mode,
+            per_benchmark=per_bench_config,
+            common_patterns=common_patterns,
+            common_skip=common_skip,
+            support_once_patterns=support_once,
+            support_per_benchmark_patterns=support_per_bench,
+            single_sources=single_sources,
+        )
+
+    def finalize(
+        self,
+        workload: str,
+        arch: str,
+        platform: str,
+        emulator: str,
+        workload_root: Path,
+        features: FeatureSet,
+    ) -> FinalConfig:
+        # toolchain
+        tc = self._toolchain(arch, platform)
+
+        # workload->platform deep merge + format
+        base = self.workloads.get(workload, {})
+        plat = (base.get("platforms", {}) or {}).get(platform, {})
+        merged = _deep_merge(base, plat)
+
+        ctx = self._ctx_defaults(workload_root=str(workload_root), arch=arch, platform=platform, emulator=emulator)
+        merged = _fmt(merged, ctx)
+
+        # env
+        env_cfg = merged.get("env", {}) or {}
+        env_skip = bool(env_cfg.get("skip", False))
+        env_dir = Path(_fmt(self.variables.get("env_root", "/default/environment/{board}"), ctx))
+        env_files = _ensure_list(env_cfg.get("files", []))
+
+        # flags (base toolchain + workload additions)
+        defines = [f"-D{d}" for d in _ensure_list(merged.get("defines", []))]
+        cflags = _dedupe(tc.base_cflags + defines + _ensure_list(merged.get("cflags", [])))
+        ldflags = _dedupe(tc.base_ldflags + _ensure_list(merged.get("ldflags", [])))
+        libs = _dedupe(tc.libs + _ensure_list(merged.get("libs", [])))
+        # features
+        cflags, ldflags = self._apply_features(cflags, ldflags, features)
+
+        # includes
+        includes = self._final_includes(_ensure_list(merged.get("includes", [])), ctx)
+
+        # linker script
+        lds = merged.get("linker_script", tc.linker_script)
+        linker_script = Path(env_dir / lds) if (lds and platform == "baremetal") else None
+
+        # layout
+        layout = self.get_layout(workload, emulator=emulator, arch=arch, platform=platform, workload_root=workload_root)
+
+        # outputs root
+        out_root = Path(_fmt(self.variables.get("outputs_root", "/outputs/{emulator}/bin"), ctx))
+
+        # alt_roots apply to linker_script path too (if missing)
+        alt_rules = _ensure_list(self.variables.get("alt_roots", []))
+        if linker_script:
+            linker_script = _rewrite_if_missing(linker_script, alt_rules)
+
+        return FinalConfig(
+            tools=Tools(cc=tc.cc),
+            flags=Flags(
+                cflags=cflags,
+                includes=includes,
+                ldflags=ldflags,
+                libs=libs,
+                linker_script=linker_script,
+            ),
+            env=EnvConfig(dir=env_dir, files=env_files, skip=env_skip),
+            layout=layout,
+            paths=Paths(
+                workloads_roots=[Path(p) for p in _fmt(self.variables.get("workloads_roots", ["/workloads", "/default"]), ctx)],
+                outputs_root=out_root,
+            ),
+        )
diff --git a/traces/docker_stf_trace_gen/flow/utils/docker_orchestrator.py b/traces/docker_stf_trace_gen/flow/utils/docker_orchestrator.py
new file mode 100644
index 00000000..61fa3cd8
--- /dev/null
+++ b/traces/docker_stf_trace_gen/flow/utils/docker_orchestrator.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+"""Launch helper for running commands inside the project Docker image."""
+from __future__ import annotations
+
+import shlex
+import subprocess
+from pathlib import Path
+from typing import Iterable, Sequence
+
+from data.consts import Const
+from flow.utils.util import CommandError, CommandResult, Util
+
+
+class DockerOrchestrator:
+    """Small wrapper around ``docker run`` with the mounts we expect."""
+
+    def __init__(self, image: str | None = None) -> None:
+        self.image = image or Const.DOCKER_IMAGE_NAME
+        self.project_root = Path(__file__).resolve().parents[2]
+        self.mounts = self._default_mounts()
+
+    def _default_mounts(self) -> list[tuple[Path, str]]:
+        mounts: list[tuple[Path, str]] = []
+        mounts.append((self.project_root, Const.CONTAINER_FLOW_ROOT))
+        outputs = self.project_root / "outputs"
+        outputs.mkdir(parents=True, exist_ok=True)
+        mounts.append((outputs, Const.CONTAINER_OUTPUT_ROOT))
+        env_dir = self.project_root / "environment"
+        if env_dir.exists():
+            mounts.append((env_dir, Const.CONTAINER_ENV_ROOT))
+        workloads_dir = self.project_root.parent / "workloads"
+        if workloads_dir.exists():
+            mounts.append((workloads_dir, Const.CONTAINER_WORKLOAD_ROOT))
+        return mounts
+
+    def _docker_prefix(self, workdir: str, interactive: bool) -> list[str]:
+        cmd = ["docker", "run", "--rm"]
+        if interactive:
+            cmd.append("-it")
+        for host, container in self.mounts:
+            cmd.extend(["-v", f"{host.resolve()}:{container}"])
+        cmd.extend(["-w", workdir, self.image])
+        return cmd
+
+    def run(
+        self,
+        command: Sequence[str],
+        *,
+        workdir: str = Const.CONTAINER_FLOW_ROOT,
+        interactive: bool = False,
+    ) -> subprocess.CompletedProcess:
+        docker_cmd = self._docker_prefix(workdir, interactive) + ["bash", "-lc", shlex.join(command)]
+        Util.info("Docker exec: " + " ".join(docker_cmd))
+        result = subprocess.run(docker_cmd)
+        if result.returncode != 0:
+            raise CommandError(CommandResult(tuple(docker_cmd), result.returncode, "", ""))
+        return result
diff --git a/traces/docker_stf_trace_gen/utils/get-tool.sh b/traces/docker_stf_trace_gen/flow/utils/get-tool.sh
similarity index 100%
rename from traces/docker_stf_trace_gen/utils/get-tool.sh
rename to traces/docker_stf_trace_gen/flow/utils/get-tool.sh
diff --git a/traces/docker_stf_trace_gen/flow/utils/paths.py b/traces/docker_stf_trace_gen/flow/utils/paths.py
new file mode 100644
index 00000000..ee17c015
--- /dev/null
+++ b/traces/docker_stf_trace_gen/flow/utils/paths.py
@@ -0,0 +1,88 @@
+"""Centralised view of output directories used across the flow."""
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+
+from data.consts import Const
+
+
+def outputs_root() -> Path:
+    return Path(Const.CONTAINER_OUTPUT_ROOT)
+
+
+def binaries_root(emulator: str) -> Path:
+    return outputs_root() / emulator / "bin"
+
+
+def simpoint_analysis_root() -> Path:
+    return outputs_root() / "simpoint_analysis"
+
+
+def simpointed_root(emulator: str) -> Path:
+    return outputs_root() / "simpointed" / emulator
+
+
+@dataclass(frozen=True)
+class BenchmarkPaths:
+    emulator: str
+    workload: str
+    benchmark: str
+
+    @property
+    def binary_dir(self) -> Path:
+        return binaries_root(self.emulator) / self.workload / self.benchmark
+
+    @property
+    def binary_path(self) -> Path:
+        return self.binary_dir / self.benchmark
+
+    @property
+    def object_dir(self) -> Path:
+        return self.binary_dir / "obj"
+
+    @property
+    def build_meta_path(self) -> Path:
+        return self.binary_dir / "build_meta.json"
+
+    @property
+    def env_dir(self) -> Path:
+        return binaries_root(self.emulator) / "env"
+
+    @property
+    def run_root(self) -> Path:
+        return outputs_root() / self.emulator / self.workload / self.benchmark
+
+    @property
+    def bbv_dir(self) -> Path:
+        return self.run_root / "bbv"
+
+    @property
+    def trace_dir(self) -> Path:
+        return self.run_root / "traces"
+
+    @property
+    def logs_dir(self) -> Path:
+        return self.run_root / "logs"
+
+    @property
+    def run_meta_path(self) -> Path:
+        return self.run_root / "run_meta.json"
+
+    @property
+    def simpoint_dir(self) -> Path:
+        return simpointed_root(self.emulator) / self.workload / self.benchmark
+
+    def resolve(self) -> None:
+        """Ensure parent directories exist for consumers that expect them."""
+        for path in (self.binary_dir, self.object_dir, self.env_dir, self.run_root):
+            path.mkdir(parents=True, exist_ok=True)
+
+
+__all__ = [
+    "BenchmarkPaths",
+    "binaries_root",
+    "outputs_root",
+    "simpoint_analysis_root",
+    "simpointed_root",
+]
diff --git a/traces/docker_stf_trace_gen/flow/utils/util.py b/traces/docker_stf_trace_gen/flow/utils/util.py
new file mode 100644
index 00000000..69f869af
--- /dev/null
+++ b/traces/docker_stf_trace_gen/flow/utils/util.py
@@ -0,0 +1,216 @@
+"""Common helpers shared across the flow CLIs."""
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import os
+import subprocess
+import sys
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable, List, Mapping, Optional, Sequence, Tuple, Union
+
+# Configure a predictable logger once. Individual scripts can further tweak level.
+logging.basicConfig(
+    level=logging.INFO,
+    format="[%(asctime)s] %(levelname)s: %(message)s",
+    datefmt="%H:%M:%S",
+)
+
+
+@dataclass
+class CommandResult:
+    """Structured command result for downstream processing."""
+
+    argv: Tuple[str, ...]
+    returncode: int
+    stdout: str
+    stderr: str
+
+    @property
+    def ok(self) -> bool:
+        return self.returncode == 0
+
+
+class CommandError(RuntimeError):
+    """Raised when a subprocess exits with non-zero status and check=True."""
+
+    def __init__(self, result: CommandResult):
+        self.result = result
+        super().__init__(
+            f"Command failed ({result.returncode}): {' '.join(result.argv)}\n"
+            f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}"
+        )
+
+
+class Util:
+    """Grab-bag of helpers for logging, filesystem and subprocess handling."""
+
+    LOGGER = logging.getLogger("flow")
+
+    # ------------------------------------------------------------------
+    # Logging helpers
+    # ------------------------------------------------------------------
+    @staticmethod
+    def log(level: Union[int, str], message: str) -> None:
+        """Log with python logging while supporting legacy string levels."""
+        if isinstance(level, str):
+            level = getattr(logging, level.upper(), logging.INFO)
+        Util.LOGGER.log(level, message)
+
+    @staticmethod
+    def info(message: str) -> None:
+        Util.log(logging.INFO, message)
+
+    @staticmethod
+    def warn(message: str) -> None:
+        Util.log(logging.WARNING, message)
+
+    @staticmethod
+    def error(message: str) -> None:
+        Util.log(logging.ERROR, message)
+
+    # ------------------------------------------------------------------
+    # Subprocess helpers
+    # ------------------------------------------------------------------
+    @staticmethod
+    def run_cmd(
+        cmd: Sequence[Union[str, Path]],
+        *,
+        cwd: Optional[Union[str, Path]] = None,
+        env: Optional[Mapping[str, str]] = None,
+        timeout: Optional[int] = None,
+        check: bool = True,
+        capture_output: bool = True,
+        text: bool = True,
+        log: bool = True,
+    ) -> CommandResult:
+        """Execute *cmd* and return a :class:`CommandResult`.
+
+        ``check=False`` keeps failures non-fatal so that callers can handle them.
+        ``capture_output=False`` streams stdout/stderr directly.
+        """
+        argv = tuple(str(part) for part in cmd)
+        if log:
+            Util.LOGGER.debug("Running command: %s", " ".join(argv))
+        completed = subprocess.run(
+            argv,
+            cwd=None if cwd is None else str(cwd),
+            env=None if env is None else dict(env),
+            timeout=timeout,
+            capture_output=capture_output,
+            text=text,
+            check=False,
+        )
+        result = CommandResult(
+            argv=argv,
+            returncode=completed.returncode,
+            stdout=completed.stdout or "",
+            stderr=completed.stderr or "",
+        )
+        if check and not result.ok:
+            raise CommandError(result)
+        return result
+
+    # ------------------------------------------------------------------
+    # Filesystem helpers
+    # ------------------------------------------------------------------
+    @staticmethod
+    def ensure_dir(path: Union[str, Path]) -> Path:
+        p = Path(path)
+        p.mkdir(parents=True, exist_ok=True)
+        return p
+
+    @staticmethod
+    def clean_dir(path: Union[str, Path]) -> Path:
+        p = Path(path)
+        if p.exists():
+            for child in p.iterdir():
+                if child.is_dir():
+                    Util.clean_dir(child)
+                else:
+                    child.unlink()
+            p.rmdir()
+        p.mkdir(parents=True, exist_ok=True)
+        return p
+
+    @staticmethod
+    def file_exists(path: Union[str, Path]) -> bool:
+        return Path(path).exists()
+
+    @staticmethod
+    def glob_paths(
+        base: Union[str, Path],
+        patterns: Optional[Iterable[str]] = None,
+        *,
+        include_files: bool = True,
+        include_dirs: bool = False,
+    ) -> List[Path]:
+        base_path = Path(base)
+        if not base_path.exists():
+            return []
+        pats = tuple(patterns or ["*"])
+        results: list[Path] = []
+        for pattern in pats:
+            for entry in base_path.glob(pattern):
+                if entry.is_file() and include_files:
+                    results.append(entry)
+                elif entry.is_dir() and include_dirs:
+                    results.append(entry)
+        return sorted(set(results))
+
+    @staticmethod
+    def read_file_lines(path: Union[str, Path]) -> List[str]:
+        p = Path(path)
+        if not p.exists():
+            raise FileNotFoundError(p)
+        return [line.rstrip("\n") for line in p.read_text().splitlines()]
+
+    @staticmethod
+    def write_file_lines(path: Union[str, Path], lines: Iterable[str]) -> None:
+        Path(path).write_text("\n".join(lines) + "\n")
+
+    @staticmethod
+    def read_json(path: Union[str, Path]) -> dict:
+        p = Path(path)
+        if not p.exists():
+            raise FileNotFoundError(p)
+        return json.loads(p.read_text())
+
+    @staticmethod
+    def write_json(path: Union[str, Path], data: Mapping[str, object]) -> None:
+        Path(path).write_text(json.dumps(data, indent=2, sort_keys=True) + "\n")
+
+    # ------------------------------------------------------------------
+    # Misc helpers
+    # ------------------------------------------------------------------
+    @staticmethod
+    def compute_sha256(file_path: Union[str, Path]) -> str:
+        sha256 = hashlib.sha256()
+        with open(file_path, "rb") as handle:
+            for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+                sha256.update(chunk)
+        return sha256.hexdigest()
+
+    @staticmethod
+    def validate_tool(tool: str) -> bool:
+        if shutil.which(tool):
+            return True
+        Util.error(f"Required tool not found in PATH: {tool}")
+        return False
+
+    @staticmethod
+    def now_iso() -> str:
+        return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+
+
+# Avoid circular import with shutil until after class definition
+import shutil  # noqa: E402  (import at end to keep Util definition self-contained)
+
+__all__ = [
+    "CommandResult",
+    "CommandError",
+    "Util",
+]
diff --git a/traces/docker_stf_trace_gen/full_flow.py b/traces/docker_stf_trace_gen/full_flow.py
index 5505fda3..f06220ef 100755
--- a/traces/docker_stf_trace_gen/full_flow.py
+++ b/traces/docker_stf_trace_gen/full_flow.py
@@ -1,330 +1,144 @@
 #!/usr/bin/env python3
-"""Orchestrates RISC-V workload analysis with Docker."""
-import argparse
-import sys
-import json
-import time
-from pathlib import Path
-from typing import List, Tuple, Dict
-from utils.util import Util, LogLevel
-from utils.config import BoardConfig
-
-
-# do we need these functions in util.py? they seem generic enough
-def discover_workloads() -> Dict[str, str]:
-    """Discover available workloads."""
-    base_dir = Path.cwd().parent / "workloads" if (Path.cwd().parent / "workloads").exists() else Path("/workloads")
-    workloads = {
-        "embench-iot": str(base_dir / "embench-iot"),
-        "riscv-tests": str(base_dir / "riscv-tests"),
-        "dhrystone": str(base_dir / "riscv-tests")
-    }
-    return {k: v for k, v in workloads.items() if Util.file_exists(v)}
+"""Host-side orchestrator that runs the full workload→SimPoint→slicing flow."""
+from __future__ import annotations
 
-def get_benchmarks(workload: str, board: str = 'spike') -> List[str]:
-    """Get benchmarks for a workload."""
-    workloads = discover_workloads()
-    workload_path = Path(workloads.get(workload, workloads.get("riscv-tests", "")))
-    if workload == "embench-iot":
-        src_dir = workload_path / "src"
-        return [d.name for d in src_dir.iterdir() if d.is_dir()] if src_dir.exists() else []
-    elif workload in ["riscv-tests", "dhrystone"]:
-        bench_dir = workload_path / "benchmarks"
-        return [d.name for d in bench_dir.iterdir() if d.is_dir() and d.name != "common"] if bench_dir.exists() else []
-    return []
-
-def get_board_config(board: str) -> Dict:
-    """Get board configuration."""
-    try:
-        config = BoardConfig(board)
-        sample = config.get_build_config('rv32', 'baremetal')
-        return {
-            'cc': sample.get('cc', 'unknown'),
-            'supported_archs': ['rv32', 'rv64'],
-            'supported_platforms': ['baremetal', 'linux'],
-            'features': ['bbv', 'trace'] if board == 'spike' else ['bbv', 'trace']
-        }
-    except Exception as e:
-        Util.log(LogLevel.WARN, f"Could not load board config: {e}")
-        return {'cc': 'unknown', 'supported_archs': ['rv32', 'rv64'], 'supported_platforms': ['baremetal'], 'features': []}
+import argparse
 
-class DockerOrchestrator:
-    """Manages Docker container operations."""
-    def __init__(self, container_name: str, image_name: str, host_output_dir: str):
-        self.container_name = container_name
-        self.image_name = image_name
-        self.host_output_dir = Util.ensure_dir(Path(host_output_dir).resolve())
-        self.host_bin_dir = Util.ensure_dir(self.host_output_dir / "workloads_bin")
-        self.host_meta_dir = Util.ensure_dir(self.host_output_dir / "workloads_meta")
-        self.container_output_dir = "/outputs"
-        self.container_code_dir = "/flow"
-    
-    def check_docker(self) -> bool:
-        """Check if Docker is available."""
-        success, out, _ = Util.run_cmd(["docker", "--version"], show=False)
-        if success:
-            Util.log(LogLevel.INFO, f"Docker available: {out.strip()}")
-            return True
-        Util.log(LogLevel.ERROR, "Docker not found")
-        return False
+from data.consts import Const
+from flow.utils.docker_orchestrator import DockerOrchestrator
+from flow.utils.util import CommandError, Util
 
-    
-    def check_image(self) -> bool:
-        """Check if Docker image exists."""
-        success, out, _ = Util.run_cmd(["docker", "images", "-q", self.image_name], show=False)
-        if out.strip():
-            Util.log(LogLevel.INFO, f"Image found: {self.image_name}")
-            return True
-        Util.log(LogLevel.WARN, f"Image not found: {self.image_name}")
-        return False
 
-    def build_image(self) -> bool:
-        """Build Docker image."""
-        Util.log(LogLevel.INFO, "Building Docker image...")
-        if not Util.file_exists("Dockerfile"):
-            Util.log(LogLevel.ERROR, "Dockerfile not found")
-            return False
-        cmd = ["docker", "build", "-t", self.image_name, "."]
-        success, _, _ = Util.run_cmd(cmd)
-        if success:
-            Util.log(LogLevel.INFO, "Image built successfully")
-            return True
-        Util.log(LogLevel.ERROR, "Failed to build image")
-        return False
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="End-to-end RISC-V trace pipeline")
+    parser.add_argument("--workload", required=True, help="Workload name from board.yaml")
+    parser.add_argument("--benchmark", help="Specific benchmark (required for slicing)")
+    parser.add_argument("--emulator", choices=["spike", "qemu"], default="spike")
+    parser.add_argument("--arch", choices=["rv32", "rv64"], default="rv32")
+    parser.add_argument("--platform", choices=["baremetal", "linux"], default="baremetal")
 
-    def run_command(self, command: List[str], interactive: bool = False) -> Tuple[bool, str, str]:
-        """Run command in Docker container."""
-        mounts = [
-            f"-v {self.host_output_dir}:{self.container_output_dir}",
-            f"-v {Path.cwd()}:{self.container_code_dir}",
-            f"-v {Path.cwd() / 'environment'}:/workloads/environment",
-        ]
-        if (workloads_dir := Path.cwd().parent / "workloads").exists():
-            mounts.append(f"-v {workloads_dir}:/workloads")
-        mounts.extend([
-            f"-v {self.host_bin_dir}:/workloads/bin",
-            f"-v {self.host_meta_dir}:/workloads/meta",
-        ])
-        for board in ['spike', 'qemu']:
-            binary_list = self.host_meta_dir / f"binary_list_{board}.txt"
-            binary_list.touch(exist_ok=True)
-            mounts.append(f"-v {binary_list}:/workloads/binary_list_{board}.txt")
-        
-        docker_cmd = ["docker", "run", "--rm"] + mounts + (["-it"] if interactive else []) + \
-                     [self.image_name, "bash", "-c", f"cd {self.container_code_dir} && {' '.join(command)}"]
-        return Util.run_cmd(docker_cmd, interactive=interactive)
+    parser.add_argument("--bbv", action="store_true", help="Compile/run with BBV support")
+    parser.add_argument("--trace", action="store_true", help="Compile/run with trace macros")
+    parser.add_argument("--interval-size", type=int, default=10_000_000, help="BBV interval size for run phase")
 
-class WorkflowManager:
-    """Manages RISC-V analysis workflow."""
-    def __init__(self, orchestrator: DockerOrchestrator):
-        self.orchestrator = orchestrator
-        self.config = {
-            'workload_suite': None, 'benchmarks': [], 'emulator': None,
-            'architecture': None, 'platform': 'baremetal',
-            'enable_bbv': False, 'enable_trace': False, 'enable_simpoint': False
-        }
+    parser.add_argument("--skip-build", action="store_true")
+    parser.add_argument("--skip-run", action="store_true")
 
-    def get_input(self, prompt: str, choices: List[str] = None, default: str = None, multi: bool = False) -> str:
-        """Get validated user input."""
-        while True:
-            if choices:
-                Util.log(LogLevel.INFO, f"\n{prompt}")
-                for i, c in enumerate(choices, 1):
-                    Util.log(LogLevel.INFO, f"  {i}. {c}{' (default)' if c == default else ''}")
-                if multi:
-                    Util.log(LogLevel.INFO, "  Enter comma-separated numbers")
-                try:
-                    resp = input(f"Select [1-{len(choices)}]: ").strip()
-                    if not resp and default:
-                        return default
-                    if multi and ',' in resp:
-                        indices = [int(x.strip()) - 1 for x in resp.split(',')]
-                        return ','.join([choices[i] for i in indices if 0 <= i < len(choices)])
-                    idx = int(resp) - 1
-                    if 0 <= idx < len(choices):
-                        return choices[idx]
-                except (ValueError, IndexError):
-                    Util.log(LogLevel.ERROR, "Invalid selection")
-            else:
-                resp = input(f"{prompt}: ").strip()
-                return resp or default
+    parser.add_argument("--simpoint", action="store_true", help="Execute run_simpoint.py after workload run")
+    parser.add_argument("--simpoint-max-k", type=int, default=30)
 
-    def configure_interactive(self):
-        """Configure workflow interactively."""
-        Util.log(LogLevel.HEADER, "RISC-V Workload Analysis Configuration")
-        
-        # Workload selection
-        workloads = discover_workloads()
-        self.config['workload_suite'] = self.get_input(
-            "Select workload suite", list(workloads.keys()), "embench-iot")
-        self.config['architecture'] = "rv32" if self.config['workload_suite'] == "embench-iot" else "rv64"
-        Util.log(LogLevel.INFO, f"Selected: {self.config['workload_suite']} ({workloads[self.config['workload_suite']]})")
+    parser.add_argument("--slice", action="store_true", help="Generate SimPoint-sliced traces")
+    parser.add_argument("--slice-verify", action="store_true", help="Verify sliced traces via stf_count")
+    parser.add_argument("--slice-dump", action="store_true", help="Emit stf_dump for each slice")
+    parser.add_argument("--slice-clean", action="store_true", help="Clean slice directory before regeneration")
+    parser.add_argument("--slice-interval-size", type=int, help="Override interval size during slicing")
 
-        # Benchmark selection
-        benchmarks = get_benchmarks(self.config['workload_suite'])
-        if benchmarks:
-            Util.log(LogLevel.INFO, f"Found {len(benchmarks)} benchmarks: {', '.join(benchmarks[:10])}{'...' if len(benchmarks) > 10 else ''}")
-            if self.get_input("Use all benchmarks? [Y/n]", ["y", "n"], "y") == "y":
-                self.config['benchmarks'] = ['all']
-            else:
-                selected = self.get_input("Enter benchmarks (comma-separated)", benchmarks, multi=True)
-                self.config['benchmarks'] = [b.strip() for b in selected.split(',') if b.strip() in benchmarks] or ['all']
-        
-        # Emulator selection
-        boards = ['spike', 'qemu']
-        self.config['emulator'] = self.get_input("Select emulator", boards, "spike")
-        board_config = get_board_config(self.config['emulator'])
-        Util.log(LogLevel.INFO, f"Emulator: {self.config['emulator']} (Features: {', '.join(board_config['features'])})")
+    parser.add_argument("--image-name", default=Const.DOCKER_IMAGE_NAME)
+    parser.add_argument("--list-benchmarks", action="store_true", help="List available workloads/benchmarks inside container")
+    return parser.parse_args()
 
-        # Architecture and platform
-        archs = board_config['supported_archs']
-        plats = board_config['supported_platforms']
-        self.config['architecture'] = self.get_input(f"Select architecture (current: {self.config['architecture']})", archs, self.config['architecture'])
-        self.config['platform'] = self.get_input("Select platform", plats, "baremetal") if len(plats) > 1 else plats[0]
 
-        # Analysis features
-        features = board_config['features']
-        if 'bbv' in features:
-            self.config['enable_bbv'] = self.get_input("Enable BBV? [y/n]", ["y", "n"], "y") == "y"
-        if self.config['enable_bbv'] and 'trace' in features:
-            self.config['enable_trace'] = self.get_input("Enable tracing? [y/n]", ["y", "n"], "n") == "y"
-        if self.config['enable_bbv']:
-            self.config['enable_simpoint'] = self.get_input("Enable SimPoint? [y/n]", ["y", "n"], "y") == "y"
+def run_command(orchestrator: DockerOrchestrator, argv: list[str]) -> None:
+    Util.info("→ " + " ".join(argv))
+    try:
+        orchestrator.run(argv)
+    except CommandError as err:
+        raise SystemExit(str(err))
 
-        # Confirm
-        Util.log(LogLevel.HEADER, "Configuration Summary")
-        for k, v in self.config.items():
-            Util.log(LogLevel.INFO, f"  {k.replace('_', ' ').title():20}: {v}")
-        return self.get_input("Proceed? [y/n]", ["y", "n"], "y") == "y"
 
-    def _generate_cmd(self, script: str, workload_specific: bool = False) -> List[str]:
-        """Generate command for build/run/simpoint."""
-        cmd = ["python3", script]
-        if script != "run_simpoint.py":
-            cmd.extend(["--arch", self.config['architecture'], "--platform", self.config['platform']])
-            if self.config['enable_bbv']:
-                cmd.append("--bbv")
-            if self.config['enable_trace']:
-                cmd.append("--trace")
-        if script == "build_workload.py":
-            cmd.extend(["--workload", self.config['workload_suite'], "--board", self.config['emulator']])
-            if self.config['benchmarks'] != ['all']:
-                cmd.extend(["--benchmark", self.config['benchmarks'][0]])
-        elif script == "run_workload.py":
-            cmd.extend(["--emulator", self.config['emulator']])
-            if workload_specific and self.config['benchmarks'] != ['all']:
-                cmd.extend(["--workload", self.config['benchmarks'][0]])
-        elif script == "run_simpoint.py":
-            cmd.extend(["--emulator", self.config['emulator'], "--workload-type", self.config['workload_suite'], "--verbose"])
-        return cmd
+def main() -> None:
+    args = parse_args()
+    orchestrator = DockerOrchestrator(args.image_name)
 
-    def run_step(self, step: str, script: str, workload_specific: bool = False) -> bool:
-        """Run a workflow step."""
-        Util.log(LogLevel.INFO, f"Executing {step}")
-        cmd = self._generate_cmd(script, workload_specific)
-        success, stdout, stderr = self.orchestrator.run_command(cmd)
-        if success:
-            Util.log(LogLevel.INFO, f"{step} completed")
-            if stdout:
-                Util.log(LogLevel.DEBUG, stdout[-1000:])
-        else:
-            Util.log(LogLevel.WARN, f"{step} failed")
-            if stderr:
-                Util.log(LogLevel.DEBUG, stderr[-1000:])
-        return success
+    if args.list_benchmarks:
+        run_command(orchestrator, ["python3", "flow/build_workload.py", "--list", "--emulator", args.emulator])
+        return
 
-    def collect_results(self):
-        """Collect and summarize results."""
-        Util.log(LogLevel.HEADER, "Collecting Results")
-        output_files = [
-            p for p in self.orchestrator.host_output_dir.rglob("*")
-            if p.is_file() and any(s in str(p) for s in ["results.txt", "bbv/", "traces/", "simpoint_analysis/"])
+    if not args.skip_build:
+        build_cmd = [
+            "python3",
+            "flow/build_workload.py",
+            "--workload",
+            args.workload,
+            "--arch",
+            args.arch,
+            "--platform",
+            args.platform,
+            "--emulator",
+            args.emulator,
         ]
-        if output_files:
-            Util.log(LogLevel.INFO, "Results found:")
-            for f in output_files:
-                Util.log(LogLevel.INFO, f"  • {f.relative_to(self.orchestrator.host_output_dir)}")
-        else:
-            Util.log(LogLevel.WARN, "No results found")
-
-        summary = {
-            "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
-            "config": self.config,
-            "output_dir": str(self.orchestrator.host_output_dir),
-            "output_files": [str(f) for f in output_files]
-        }
-        summary_file = self.orchestrator.host_output_dir / "analysis_summary.json"
-        with summary_file.open('w') as f:
-            json.dump(summary, f, indent=2)
-        Util.log(LogLevel.INFO, f"Summary saved: {summary_file}")
-
-def main():
-    """Main entry point for RISC-V analysis."""
-    parser = argparse.ArgumentParser(description="RISC-V Workload Analysis Orchestrator")
-    parser.add_argument("--container-name", default="riscv-analysis")
-    parser.add_argument("--image-name", default="riscv-perf-model:latest")
-    parser.add_argument("--output-dir", default="./outputs")
-    parser.add_argument("--workload", help="Workload suite")
-    parser.add_argument("--benchmark", help="Specific benchmark")
-    parser.add_argument("--emulator", choices=["spike", "qemu"])
-    parser.add_argument("--arch", choices=["rv32", "rv64"])
-    parser.add_argument("--platform", choices=["baremetal", "linux"], default="baremetal")
-    parser.add_argument("--bbv", action="store_true")
-    parser.add_argument("--trace", action="store_true")
-    parser.add_argument("--simpoint", action="store_true")
-    parser.add_argument("--build-image", action="store_true")
-    parser.add_argument("--skip-build", action="store_true")
-    parser.add_argument("--skip-run", action="store_true")
-    args = parser.parse_args()
-
-    Util.log(LogLevel.HEADER, f"RISC-V Analysis (Output: {Path(args.output_dir).resolve()})")
-    orchestrator = DockerOrchestrator(args.container_name, args.image_name, args.output_dir)
-    
-    if not orchestrator.check_docker():
-        sys.exit(1)
-    if args.build_image or not orchestrator.check_image():
-        if not orchestrator.build_image():
-            sys.exit(1)
-
-    workflow = WorkflowManager(orchestrator)
-    if args.workload:
-        workloads = discover_workloads()
-        if args.workload not in workloads:
-            Util.log(LogLevel.ERROR, f"Unknown workload: {args.workload}. Available: {', '.join(workloads.keys())}")
-        workflow.config.update({
-            'workload_suite': args.workload,
-            'benchmarks': [args.benchmark] if args.benchmark else ['all'],
-            'emulator': args.emulator or 'spike',
-            'architecture': args.arch or ('rv32' if args.workload == 'embench-iot' else 'rv64'),
-            # need a better way to set/fetch default platform/arch based on workload/emulator (yaml)
-            'platform': args.platform,
-            'enable_bbv': args.bbv,
-            'enable_trace': args.trace,
-            'enable_simpoint': args.simpoint
-        })
-    else:
-        if not workflow.configure_interactive():
-            Util.log(LogLevel.WARN, "Cancelled by user")
-            sys.exit(0)
+        if args.benchmark:
+            build_cmd.extend(["--benchmark", args.benchmark])
+        if args.bbv:
+            build_cmd.append("--bbv")
+        if args.trace:
+            build_cmd.append("--trace")
+        run_command(orchestrator, build_cmd)
+
+    if not args.skip_run:
+        run_cmd = [
+            "python3",
+            "flow/run_workload.py",
+            "--emulator",
+            args.emulator,
+            "--platform",
+            args.platform,
+            "--arch",
+            args.arch,
+            "--interval-size",
+            str(args.interval_size),
+            "--workload",
+            args.workload,
+        ]
+        if args.benchmark:
+            run_cmd.extend(["--benchmark", args.benchmark])
+        if args.bbv:
+            run_cmd.append("--bbv")
+        if args.trace:
+            run_cmd.append("--trace")
+        run_command(orchestrator, run_cmd)
+
+    if args.simpoint:
+        sim_cmd = [
+            "python3",
+            "flow/run_simpoint.py",
+            "--emulator",
+            args.emulator,
+            "--workload",
+            args.workload,
+            "--max-k",
+            str(args.simpoint_max_k),
+        ]
+        if args.benchmark:
+            sim_cmd.extend(["--benchmark", args.benchmark])
+        run_command(orchestrator, sim_cmd)
+
+    if args.slice:
+        if not args.benchmark:
+            raise SystemExit("--slice requires --benchmark")
+        slice_cmd = [
+            "python3",
+            "flow/generate_trace.py",
+            "sliced",
+            "--emulator",
+            args.emulator,
+            "--workload",
+            args.workload,
+            "--benchmark",
+            args.benchmark,
+        ]
+        if args.slice_interval_size:
+            slice_cmd.extend(["--interval-size", str(args.slice_interval_size)])
+        if args.slice_verify:
+            slice_cmd.append("--verify")
+        if args.slice_dump:
+            slice_cmd.append("--dump")
+        if args.slice_clean:
+            slice_cmd.append("--clean")
+        run_command(orchestrator, slice_cmd)
 
-    success = True
-    if not args.skip_build:
-        success = workflow.run_step("Build", "build_workload.py")
-    if success and not args.skip_run:
-        success = workflow.run_step("Execution", "run_workload.py", workload_specific=True)
-    if success and workflow.config['enable_simpoint']:
-        workflow.run_step("SimPoint Analysis", "run_simpoint.py")
-    workflow.collect_results()
+    Util.info("Workflow completed")
 
-    Util.log(LogLevel.HEADER, "Analysis " + ("Completed" if success else "Failed"))
-    Util.log(LogLevel.INFO, f"Results in: {orchestrator.host_output_dir}")
 
 if __name__ == "__main__":
-    try:
-        main()
-    except KeyboardInterrupt:
-        Util.log(LogLevel.WARN, "Interrupted by user")
-        sys.exit(1)
-    except Exception as e:
-        Util.log(LogLevel.ERROR, f"Unexpected error: {e}")
-        sys.exit(1)
\ No newline at end of file
+    main()
diff --git a/traces/docker_stf_trace_gen/generate_trace.md b/traces/docker_stf_trace_gen/generate_trace.md
deleted file mode 100644
index 8e31fa75..00000000
--- a/traces/docker_stf_trace_gen/generate_trace.md
+++ /dev/null
@@ -1,166 +0,0 @@
-# Trace Generation Tool
-
-This tool generates execution traces for RISC-V workloads using either **Spike** or **QEMU** emulators.  
-
-There are **three different modes** for generating traces:
-- **Macro** → uses `START_TRACE` and `STOP_TRACE` macros embedded in the workload  
-- **Instruction Count (`insn_count`)** → traces a fixed number of instructions after skipping some  
-- **Program Counter (`pc_count`)** → traces after a specific program counter (PC) is reached  
-
----
-
-## Table of Contents
-
-1. [Quickstart](#quickstart)
-2. [Usage](#usage)
-3. [Global Options](#global-options)
-4. [Modes](#modes)  
-   - [Macro](#macro)  
-   - [Instruction Count](#insn_count)  
-   - [Program Counter](#pc_count)  
-5. [Mode Restrictions](#mode-restrictions)
-6. [Summary Table](#summary-table)
-7. [Help and More Info](#help-and-more-info)
-
----
-
-## Quickstart
-
-1. **Macro mode with Spike**  
-   Trace using `START_TRACE` / `STOP_TRACE` markers inside the workload:  
-    ```bash
-   python generate_trace.py --emulator spike macro workload.elf
-    ```
-
-2. **Instruction Count mode**
-   Skip 1000 instructions, then trace 5000 instructions:
-
-   ```bash
-   python generate_trace.py --emulator qemu insn_count \
-       --num-instructions 5000 --start-instruction 1000 workload.elf
-   ```
-
-3. **Program Counter mode (QEMU only)**
-   Start tracing after PC `0x80000000` is hit 5 times, trace 2000 instructions:
-
-   ```bash
-   python generate_trace.py --emulator qemu pc_count \
-       --num-instructions 2000 --start-pc 0x80000000 --pc-threshold 5 workload.elf
-   ```
-
----
-
-## Usage
-
-```bash
-python generate_trace.py [OPTIONS] MODE WORKLOAD_PATH
-```
-
-Example with help:
-
-```bash
-python generate_trace.py macro --help
-```
-
----
-
-## Global Options
-
-These options apply to all modes:
-
-* **`--emulator {spike,qemu}`** *(required)*
-  Select which emulator to use.
-
-* **`--isa ISA`** *(optional)*
-  Instruction set architecture (e.g., `rv64imafdc`).
-
-* **`--dump`** *(flag)*
-  Create a trace file dump.
-
-* **`--pk`** *(flag)*
-  Run Spike with **pk (proxy kernel)**.
-
-* **`--image-name IMAGE_NAME`** *(default: `Const.DOCKER_IMAGE_NAME`)*
-  Use a custom Docker image instead of the default.
-
-* **`-o, --output OUTPUT`** *(optional)*
-  Output folder or file path.
-
-* **`workload`** *(positional, required)*
-  Path to workload binary.
-
----
-
-## Modes
-
-### `macro`
-
-Trace mode using `START_TRACE` and `STOP_TRACE` macros in the workload binary.
-
-* **Only works with Spike**
-* No additional arguments required beyond the workload path.
-
-**Example:**
-
-```bash
-python generate_trace.py --emulator spike macro workload.elf
-```
-
----
-
-### `insn_count`
-
-Trace a fixed number of instructions after skipping a given number.
-
-**Arguments:**
-
-* **`--num-instructions`** *(required, int)* → number of instructions to trace.
-* **`--start-instruction`** *(required, int, default=0)* → instructions to skip before tracing starts.
-
-**Example:**
-
-```bash
-python generate_trace.py --emulator qemu insn_count \
-    --num-instructions 5000 --start-instruction 1000 workload.elf
-```
-
----
-
-### `pc_count`
-
-Trace a fixed number of instructions after reaching a given PC value a certain number of times.
-
-* **Only works with QEMU**
-
-**Arguments:**
-
-* **`--num-instructions`** *(required, int)* → number of instructions to trace.
-* **`--start-pc`** *(required, int)* → starting program counter (hex or decimal).
-* **`--pc-threshold`** *(required, int, default=1)* → number of times the PC must be hit before tracing begins.
-
-**Example:**
-
-```bash
-python generate_trace.py --emulator qemu pc_count \
-    --num-instructions 2000 --start-pc 0x80000000 --pc-threshold 5 workload.elf
-```
-
----
-
-## Mode Restrictions
-
-* `macro` mode **cannot** be used with `qemu`.
-* `pc_count` mode **cannot** be used with `spike`.
-* Each mode has its own required arguments.
-
----
-
-## Summary Table
-
-| Mode         | Emulator   | Required Arguments                                   |
-| ------------ | ---------- | ---------------------------------------------------- |
-| `macro`      | spike      | workload                                             |
-| `insn_count` | spike/qemu | `--num-instructions`, `--start-instruction`          |
-| `pc_count`   | qemu       | `--num-instructions`, `--start-pc`, `--pc-threshold` |
-
----
\ No newline at end of file
diff --git a/traces/docker_stf_trace_gen/generate_trace.py b/traces/docker_stf_trace_gen/generate_trace.py
deleted file mode 100755
index d341c34c..00000000
--- a/traces/docker_stf_trace_gen/generate_trace.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import argparse
-from dataclasses import asdict
-import os
-import yaml
-from factories.metadata_factory import MetadataFactory
-from data.metadata import Metadata
-from utils.trace_generator_arg_parser import parse_args
-from utils.docker_orchestrator import DockerOrchestrator
-from data.consts import Const
-from converters.host_to_docker_path import HostToDockerPathConverter
-
-
-class TraceGenerator():
-    def __init__(self, docker: DockerOrchestrator):
-        self.docker = docker
-        self.metadataFactory = MetadataFactory(docker)
-
-    def run(self, args: argparse.Namespace) -> None:
-        args.output = self._get_ouput_path(args)
-        docker_paths = self._convert_paths(args)
-
-        self._run_trace(args, docker_paths)
-        metadata = self._generate_metadata(args)
-        self._save_metadata(args.output, metadata)
-
-        if args.dump:
-            dump_path = f"{args.output}.dump"
-            self.docker.run_stf_tool("stf_dump", args.output, dump_path)
-
-    def _get_ouput_path(self, args: argparse.Namespace) -> str:
-        if not args.output:
-            return f"{args.workload}.zstf"
-
-        isDir = not (os.path.splitext(args.ouput)[1])
-        if not isDir and not args.output.endswith("zstf"):
-            raise ValueError("Invalid output file extension. Expected .zstf or directory.")
-
-        if not isDir:
-            return args.output
-
-        workload_filename = os.path.basename(args.workload)
-        return os.path.join(args.output, workload_filename)
-
-    def _convert_paths(
-        self,
-        args: argparse.Namespace,
-        path_arguments: list[str] = ['workload', 'output']
-    ) -> dict[str, str]:
-        docker_paths = {}
-        for path_argument in path_arguments:
-            arg_value = getattr(args, path_argument)
-            if arg_value:
-                docker_paths[arg_value] = HostToDockerPathConverter.convert(arg_value)
-
-        return docker_paths
-
-    def _run_trace(self, args: argparse.Namespace, docker_paths: dict[str, str]):
-        bash_cmd = ""
-        if args.emulator == "spike":
-            bash_cmd = self._get_spike_command(args, docker_paths)
-        elif args.emulator == "qemu":
-            bash_cmd = self._get_qemu_command(args, docker_paths)
-        else:
-            raise ValueError(f"Invalid emulator ({args.emulator}) provided")
-
-        self.docker.run_command(bash_cmd, docker_paths)
-
-    def _get_spike_command(self, args: argparse.Namespace, docker_paths: dict[str, str]) -> str:
-        isa = f"--isa={args.isa}" if args.isa else ""
-        pk = f"{Const.SPKIE_PK}" if args.pk else ""
-
-        if args.mode == "insn_count":
-            return f"spike {isa} --stf_trace {docker_paths[args.output]} --stf_trace_memory_records --stf_insn_num_tracing --stf_insn_start {str(args.start_instruction)} --stf_insn_count {str(args.num_instructions)}  {pk} {docker_paths[args.workload]}"
-        elif args.mode == "macro":
-            return f"spike {isa} --stf_trace {docker_paths[args.output]} --stf_trace_memory_records --stf_macro_tracing {pk} {docker_paths[args.workload]}"
-
-        raise NotImplementedError(f"mode {args.mode} invalid for spike")
-
-    def _get_qemu_command(self, args: argparse.Namespace, docker_paths: dict[str, str]) -> str:
-        args.start_instruction += 1
-        if args.mode == "insn_count":
-            return f"qemu-riscv64 -plugin {Const.LIBSTFMEM},mode=dyn_insn_count,start_dyn_insn={args.start_instruction},num_instructions={args.num_instructions},outfile={docker_paths[args.output]} -d plugin -- {docker_paths[args.workload]}"
-        elif args.mode == "pc_count":
-            return f"qemu-riscv64 -plugin {Const.LIBSTFMEM},mode=ip,start_ip={args.start_pc},ip_hit_threshold={args.pc_threshold},num_instructions={args.num_instructions},outfile={docker_paths[args.output]} -d plugin -- {docker_paths[args.workload]}"
-
-        raise NotImplementedError(f"mode {args.mode} invalid for qemu")
-
-    def _generate_metadata(self, args: argparse.Namespace) -> Metadata:
-        workload_path = args.workload
-        return self.metadataFactory.create(
-            workload_path=workload_path,
-            trace_path=args.output,
-            trace_interval_mode=args.mode,
-            start_instruction=getattr(args, "start_instruction", None),
-            num_instructions=getattr(args, "num_instructions", None),
-            start_pc=getattr(args, "start_pc", None),
-            pc_threshold=getattr(args, "pc_threshold", None),
-            execution_command=None,
-            description=None,
-        )
-
-    def _save_metadata(self, trace_path: str, metadata: Metadata):
-        metadata_path = f"{trace_path}.metadata.yaml"
-        with open(metadata_path, 'w') as file:
-            yaml.dump(asdict(metadata), file)
-
-
-def main():
-    args = parse_args()
-    docker = DockerOrchestrator(args)
-    TraceGenerator(docker).run(args)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/traces/docker_stf_trace_gen/run_simpoint.py b/traces/docker_stf_trace_gen/run_simpoint.py
deleted file mode 100755
index 32e2377d..00000000
--- a/traces/docker_stf_trace_gen/run_simpoint.py
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/env python3
-"""Performs SimPoint analysis on BBV files."""
-import argparse
-import json
-import time
-from pathlib import Path
-from typing import Dict, List, Tuple
-from utils.util import Util, LogLevel
-
-def find_bbv_files(emulator: str, binaries: List[Path]) -> Dict[str, Path]:
-    """Find BBV files for binaries."""
-    output_dir = Path(f"/outputs/{emulator}_output/bbv")
-    if not output_dir.exists():
-        Util.log(LogLevel.ERROR, f"BBV directory not found: {output_dir}")
-    
-    bbv_files = {}
-    for binary in binaries:
-        name = binary.stem
-        bbv_file = output_dir / (f"{name}.bbv_cpu0" if emulator == "spike" else f"{name}.bbv.0.bb")
-        if not bbv_file.exists() and emulator == "qemu":
-            bbv_file = output_dir / f"{name}_bbv.0.bb"
-        if bbv_file.exists() and bbv_file.stat().st_size > 0:
-            bbv_files[name] = bbv_file
-    if not bbv_files:
-        Util.log(LogLevel.ERROR, "No valid BBV files found")
-    Util.log(LogLevel.INFO, f"Found {len(bbv_files)} BBV files")
-    return bbv_files
-
-def run_simpoint_analysis(bbv_file: Path, benchmark: str, max_k: int, output_dir: Path) -> Tuple[bool, Path, Path]:
-    """Run SimPoint analysis on a BBV file."""
-    Util.ensure_dir(output_dir)
-    simpoints = output_dir / f"{benchmark}.simpoints"
-    weights = output_dir / f"{benchmark}.weights"
-    cmd = ["simpoint", "-loadFVFile", str(bbv_file), "-maxK", str(max_k), "-saveSimpoints", str(simpoints), "-saveSimpointWeights", str(weights)]
-    success, _, _ = Util.run_cmd(cmd, timeout=300)
-    return success and simpoints.exists() and weights.exists(), simpoints, weights
-
-def parse_simpoint_results(simpoints_file: Path, weights_file: Path) -> List[Tuple[int, float]]:
-    """Parse SimPoint intervals and weights."""
-    simpoints = [int(line.split()[0]) for line in Util.read_file_lines(simpoints_file) if line.split()[0].isdigit()] if simpoints_file.exists() else []
-    weights = [float(line.split()[1]) for line in Util.read_file_lines(weights_file) if len(line.split()) > 1] if weights_file.exists() else []
-    return list(zip(simpoints, weights)) if len(simpoints) == len(weights) else []
-
-def generate_summary(results: Dict[str, Dict], output_file: Path):
-    """Generate SimPoint analysis report."""
-    successful = sum(1 for r in results.values() if r['success'])
-    total_simpoints = sum(r.get('simpoints_count', 0) for r in results.values() if r['success'])
-    lines = [
-        f"SimPoint Analysis Summary - {time.strftime('%Y-%m-%d %H:%M:%S')}",
-        f"Total Benchmarks: {len(results)}",
-        f"Successful: {successful}/{len(results)}",
-        f"Average SimPoints: {total_simpoints/max(successful,1):.1f}",
-        "\nResults:"
-    ]
-    for bench, res in results.items():
-        lines.append(f"{bench}: {'SUCCESS' if res['success'] else 'FAILED'}")
-        if res['success']:
-            lines.append(f"  SimPoints: {res['simpoints_count']}")
-            lines.append(f"  Coverage: {res.get('coverage', 'N/A')}")
-            if res.get('intervals'):
-                top = sorted(res['intervals'], key=lambda x: x[1], reverse=True)[:3]
-                lines.append(f"  Top intervals: {', '.join(f'{i}({w:.3f})' for i, w in top)}")
-    
-    with output_file.open('w') as f:
-        f.write('\n'.join(lines))
-    Util.log(LogLevel.INFO, '\n'.join(lines))
-
-def main():
-    """Main entry point."""
-    parser = argparse.ArgumentParser(description="Run SimPoint analysis on BBV files")
-    parser.add_argument("--emulator", required=True, choices=["spike", "qemu"])
-    parser.add_argument("--workload-type", help="Filter by workload type")
-    parser.add_argument("--max-k", type=int, default=30, help="Max clusters for SimPoint")
-    parser.add_argument("--output-dir", default="/outputs/simpoint_analysis")
-    args = parser.parse_args()
-
-    Util.validate_tool("simpoint")
-    
-    output_dir = Util.ensure_dir(Path(args.output_dir))
-    Util.log(LogLevel.INFO, f"Starting SimPoint analysis for {args.emulator}")
-    
-    binary_list = Path(f"/workloads/binary_list_{args.emulator}.txt")
-    binaries = [Path(line) for line in Util.read_file_lines(binary_list) if Util.file_exists(line)]
-    if args.workload_type:
-        binaries = [b for b in binaries if args.workload_type in b.name]
-    
-    bbv_files = find_bbv_files(args.emulator, binaries)
-    results = {}
-    for bench, bbv_file in bbv_files.items():
-        Util.log(LogLevel.INFO, f"Analyzing {bench}")
-        success, simpoints, weights = run_simpoint_analysis(bbv_file, bench, args.max_k, output_dir)
-        result = {'success': success, 'bbv_file': str(bbv_file), 'simpoints_file': str(simpoints), 'weights_file': str(weights)}
-        if success:
-            intervals = parse_simpoint_results(simpoints, weights)
-            result.update({'intervals': intervals, 'simpoints_count': len(intervals), 'coverage': f"{sum(w for _, w in intervals):.3f}"})
-        results[bench] = result
-    
-    summary_file = output_dir / "simpoint_summary.txt"
-    generate_summary(results, summary_file)
-    with (output_dir / "simpoint_results.json").open('w') as f:
-        json.dump(results, f, indent=2)
-
-
-    if True: #reduce: 
-        #parse the weights and simpoitn files to get the top 3 intervals and their weights
-        # and then slice the traces accodinlgy for that interval from the trace
-        # make sure to tweak around with diff intervl_sizes (btw ) give optionf or that 
-        #then save the trace corresponding to the top 3 intervals in seperate files 
-        # in the format required by the perf model and traace-archive.py tool
-        pass
-
-    
-    Util.log(LogLevel.INFO, f"Analysis completed. Summary: {summary_file}")
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/traces/docker_stf_trace_gen/run_workload.py b/traces/docker_stf_trace_gen/run_workload.py
deleted file mode 100755
index 09f5631f..00000000
--- a/traces/docker_stf_trace_gen/run_workload.py
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/usr/bin/env python3
-"""Runs RISC-V workloads on Spike or QEMU."""
-import argparse
-from pathlib import Path
-from typing import Dict
-from utils.util import Util, LogLevel
-
-def validate_environment(emulator: str, platform: str, arch: str):
-    """Validate emulator tools."""
-    if emulator == "spike":
-        Util.validate_tool("spike")
-    elif emulator == "qemu":
-        Util.validate_tool(f"qemu-{'system-' if platform == 'baremetal' else ''}riscv{32 if arch == 'rv32' else 64}")
-
-def setup_output_dirs(emulator: str) -> Dict[str, Path]:
-    """Setup output directories."""
-    base = Util.clean_dir(Path(f"/outputs/{emulator}_output"))
-    return {
-        'base': base, 'logs': base / "logs", 'bbv': base / "bbv",
-        'traces': base / "traces", 'results': base / "results.txt"
-    }
-
-def run_emulator(binary: Path, dirs: Dict[str, Path], emulator: str, bbv: bool, trace: bool, platform: str, arch: str, interval_size: int, enable_stf_tools: bool) -> float:
-    """Run workload on emulator."""
-    name = binary.stem
-    Util.log(LogLevel.INFO, f"Running {name} on {emulator.upper()} ({platform}/{arch})")
-    if emulator == "qemu" and trace: 
-        print("QEMU Cannot generate STF traces, Please use Spike")
-        trace = False
-    
-    configs = {
-        "spike": {
-            "cmd": ["spike", f"--isa=rv{arch[2:]}imafdc"],
-            "bbv": lambda: ["--en_bbv", f"--bb_file={dirs['bbv'] / f'{name}.bbv'}", f"--simpoint_size={interval_size}"],
-            "trace": lambda: ["--stf_macro_tracing", "--stf_trace_memory_records", f"--stf_trace={dirs['traces'] / f'{name}.zstf'}"],
-            "bin": [str(binary)]
-        },
-        "qemu": {
-            "cmd": (["qemu-system-riscv32" if arch == "rv32" else "qemu-system-riscv64", "-nographic", "-machine", "virt", "-bios", "none", "-kernel", str(binary)]
-                    if platform == "baremetal" else
-                    [f"qemu-riscv{32 if arch == 'rv32' else 64}", str(binary)]),
-            "bbv": lambda: ["-plugin", f"/qemu/build/contrib/plugins/libbbv.so,interval={interval_size},outfile={dirs['bbv'] / f'{name}.bbv'}"],
-            #disable trace for qemu
-        }
-    }
-    
-    cfg = configs[emulator]
-    cmd = cfg["cmd"] + (cfg["bbv"]() if bbv else []) + (cfg["trace"]() if trace else []) + cfg.get("bin", [])
-    # build the logs file 
-    start = Util.get_time() 
-    Util.run_cmd(cmd)
-    end = Util.get_time()
-    
-    if emulator == "spike" and trace and enable_stf_tools:
-        trace_file = dirs['traces'] / f"{name}.zstf"
-        if trace_file.exists():
-            stf_dump = Path("/riscv/stf_tools/release/tools/stf_dump/stf_dump")
-            if stf_dump.exists():
-                Util.run_cmd([str(stf_dump), str(trace_file)])
-    
-    return end - start
-
-def run_workloads(emulator: str, platform: str, arch: str, bbv: bool, trace: bool, workload: str = None, interval_size: int = 10**7, enable_stf_tools: bool = False):
-    """Run all workloads from binary list."""
-    validate_environment(emulator, platform, arch)
-    dirs = setup_output_dirs(emulator)
-    for d in [dirs['logs'], dirs['bbv'], dirs['traces']]:
-        Util.ensure_dir(d)
-    
-    # Fetch binaries in /workloads/bin/<emulator>/ 
-    binary_dir = Path(f"/workloads/bin/{emulator}")
-    # Get all files in the emulator's directory, excluding .o files
-    binaries = [f for f in binary_dir.glob("*") if f.is_file() and f.suffix != ".o"]
-
-    # This is for a specific benchmark/workload ?
-    if workload:
-        binaries = [b for b in binaries if workload in b.name]
-        if not binaries:
-            Util.log(LogLevel.ERROR, f"No workloads matching: {workload}")
-    
-    Util.log(LogLevel.INFO, f"Running {len(binaries)} workloads")
-    results = []
-    total_time = 0
-    with dirs['results'].open('w') as f:
-        f.write("Workload RunTime(s) CodeSize(bytes)\n")
-        for binary in binaries:
-            try:
-                run_time = run_emulator(binary, dirs, emulator, bbv, trace, platform, arch, interval_size, enable_stf_tools)
-                results.append((binary.stem, run_time, binary.stat().st_size))
-                f.write(f"{binary.stem} {run_time:.3f} {binary.stat().st_size}\n")
-                total_time += run_time
-            except RuntimeError:
-                continue
-    
-    Util.log(LogLevel.INFO, f"Summary: {len(results)} workloads, {total_time:.2f}s, results in {dirs['results']}")
-
-def main():
-    """Main entry point."""
-    parser = argparse.ArgumentParser(description="Run RISC-V workloads")
-    parser.add_argument("--emulator", required=True, choices=["spike", "qemu"], help="Emulator to use")
-    parser.add_argument("--platform", default="baremetal", choices=["baremetal", "linux"], help="Platform type")
-    parser.add_argument("--arch", default="rv32", choices=["rv32", "rv64"], help="Architecture")
-    parser.add_argument("--bbv", action="store_true", help="Enable BBV generation")
-    parser.add_argument("--trace", action="store_true", help="Generate STF Traces (Spike only)")
-    parser.add_argument("--workload", help="Filter specific workload")
-    parser.add_argument("--interval-size", type=int, default=10**7, help="BBV Interval size")
-    parser.add_argument("--enable-stf-tools", action="store_true")
-    args = parser.parse_args()
-    
-    run_workloads(args.emulator, args.platform, args.arch, args.bbv, args.trace, 
-                  args.workload, args.interval_size, args.enable_stf_tools)
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/traces/docker_stf_trace_gen/scripts/run_interactive.sh b/traces/docker_stf_trace_gen/scripts/run_interactive.sh
index 3b469eca..9abcf161 100755
--- a/traces/docker_stf_trace_gen/scripts/run_interactive.sh
+++ b/traces/docker_stf_trace_gen/scripts/run_interactive.sh
@@ -13,7 +13,7 @@ mkdir -p "${OUTPUT_DIR}"
 # Print mounting information
 echo "🐳 Starting interactive Docker container with mounts:"
 echo "  📁 Flow scripts:  ${CURRENT_DIR} -> /flow"
-echo "  📁 Environment:   ${CURRENT_DIR}/environment -> /workloads/environment"
+echo "  📁 Environment:   ${CURRENT_DIR}/environment -> /default/environment"
 echo "  📁 Outputs:       ${OUTPUT_DIR} -> /outputs"
 
 # Check if workloads directory exists
@@ -30,10 +30,10 @@ echo ""
 docker run --rm -it \
     -v "${OUTPUT_DIR}:/outputs" \
     -v "${CURRENT_DIR}:/flow" \
-    -v "${CURRENT_DIR}/environment:/workloads/environment" \
+    -v "${CURRENT_DIR}/environment:/default/environment" \
     ${WORKLOADS_MOUNT} \
     -w /flow \
-    riscv-perf-model:latest \
+    riscv-perf-model:olympia \
     bash
 
-echo "🏁 Container exited. Outputs preserved in: ${OUTPUT_DIR}"
\ No newline at end of file
+echo "🏁 Container exited. Outputs preserved in: ${OUTPUT_DIR}"
diff --git a/traces/docker_stf_trace_gen/scripts/spike_vs_qemu.py b/traces/docker_stf_trace_gen/scripts/spike_vs_qemu.py
index b153b40f..0cb4d298 100755
--- a/traces/docker_stf_trace_gen/scripts/spike_vs_qemu.py
+++ b/traces/docker_stf_trace_gen/scripts/spike_vs_qemu.py
@@ -92,8 +92,8 @@ def __init__(self, output_dir: str = "benchmark_results", verbose: bool = False)
         self.logger = logging.getLogger(__name__)
         
         # Build script paths
-        self.build_script = Path("build_workload.py")
-        self.run_script = Path("run_workload.py")
+        self.build_script = Path("flow/build_workload.py")
+        self.run_script = Path("flow/run_workload.py")
         
         # Validate scripts exist
         if not self.build_script.exists():
@@ -191,7 +191,7 @@ def build_workload(self, workload_suite: str, benchmark: str, arch: str = "rv64"
             "--benchmark", benchmark,  # Specify individual benchmark
             "--arch", arch,
             "--platform", platform,
-            "--board", board
+            "--emulator", board
         ]
         
         if bbv:
@@ -239,17 +239,20 @@ def check_outputs_generated(self, workload: str, board: str, bbv: bool, trace: b
         
         if bbv:
             # Check for BBV files (different formats for spike vs qemu)
+            output_base = Path(f"/outputs/{board}_output/bbv")
             if board == "spike":
-                bbv_file = Path(f"/output/{board}_output/bbv/{workload}.bbv_cpu0")
-            else:  # qemu
-                bbv_file = Path(f"/output/{board}_output/bbv/{workload}.bb")
+                # Spike may produce .bbv or .bbv_cpu0
+                candidates = [output_base / f"{workload}.bbv", output_base / f"{workload}.bbv_cpu0"]
+            else:  # qemu: outfile prefix <name>.bbv -> files like <name>.bbv.0.bb
+                candidates = [output_base / f"{workload}.bbv.0.bb", output_base / f"{workload}_bbv.0.bb"]
+            bbv_file = next((p for p in candidates if p.exists()), output_base / f"{workload}.bbv")
             
             bbv_generated = bbv_file.exists() and bbv_file.stat().st_size > 0
             self.logger.debug(f"BBV file {bbv_file}: {'found' if bbv_generated else 'not found'}")
         
         if trace:
-            # Check for trace files
-            trace_file = Path(f"{workload}.stf")  # Adjust extension as needed
+            # Check for STF trace files generated into /outputs/<board>_output/traces
+            trace_file = Path(f"/outputs/{board}_output/traces/{workload}.zstf")
             trace_generated = trace_file.exists() and trace_file.stat().st_size > 0
             self.logger.debug(f"Trace file {trace_file}: {'found' if trace_generated else 'not found'}")
         
diff --git a/traces/docker_stf_trace_gen/utils/config.py b/traces/docker_stf_trace_gen/utils/config.py
deleted file mode 100644
index 0d13a589..00000000
--- a/traces/docker_stf_trace_gen/utils/config.py
+++ /dev/null
@@ -1,257 +0,0 @@
-#!/usr/bin/env python3
-
-import yaml
-import shlex
-from pathlib import Path
-from utils.util import Util
-
-class BoardConfig:
-    """Board configuration parser with support for tagged sections"""
-    
-    def __init__(self, board_name):
-        self.board_name = board_name
-        self.config_file = Path(f"environment/{board_name}/board.yaml")
-        self.config = {}
-        self.load_config()
-    
-    def load_config(self):
-        """Load configuration from board-specific file"""
-        if not self.config_file.exists():
-            Util.log("ERROR", f"Board config not found: {self.config_file}")
-            return
-        
-        with open(self.config_file, 'r') as f:
-            self.config = yaml.safe_load(f) or {}
-    
-    def _parse_list(self, value):
-        """Parse list values from config (YAML native lists or space-separated strings)"""
-        if not value:
-            return []
-        
-        # If already a list, return as-is
-        if isinstance(value, list):
-            return value
-        
-        # If string, parse as space-separated (backward compatibility)
-        if isinstance(value, str):
-            value = value.strip()
-            if not value:
-                return []
-            
-            try:
-                # Use shlex.split() to handle quoted arguments properly
-                return shlex.split(value)
-            except ValueError as e:
-                # Fallback to simple split if shlex fails (e.g., unmatched quotes)
-                Util.log("WARNING", f"Failed to parse config value '{value}' with shlex: {e}")
-                return value.split()
-        
-        return []
-    
-    def _get_nested_value(self, path, default=None):
-        """Get value from nested YAML structure using dot notation"""
-        current = self.config
-        try:
-            for key in path.split('.'):
-                current = current[key]
-            
-            # Parse lists for known list-type keys
-            if isinstance(path.split('.')[-1], str):
-                key = path.split('.')[-1]
-                if (key.endswith('_cflags') or key.endswith('_ldflags') or key.endswith('_includes') or 
-                    key.endswith('_defines') or key.endswith('_sources') or 
-                    key in ['libs', 'includes', 'defines', 'base_cflags', 'base_ldflags', 
-                           'environment_files', 'skip_common_files', 'workload_sources']):
-                    return self._parse_list(current)
-            
-            return current
-        except (KeyError, TypeError):
-            return default
-    
-    def _get_legacy_section_key(self, section, key, default=None):
-        """Legacy method for backward compatibility - maps to new structure"""
-        # Handle legacy section names
-        if section == 'DEFAULT':
-            return self._get_nested_value(f'defaults.{key}', default)
-        
-        # Handle architecture.platform sections (e.g., rv32.baremetal)
-        if '.' in section and len(section.split('.')) == 2:
-            arch, platform = section.split('.')
-            return self._get_nested_value(f'architectures.{arch}.platforms.{platform}.{key}', default)
-        
-        # Handle workload sections
-        if section in ['embench-iot', 'riscv-tests', 'dhrystone']:
-            return self._get_nested_value(f'workloads.{section}.{key}', default)
-        
-        # Handle platform.workload sections (e.g., linux.embench-iot)
-        if '.' in section and section.split('.')[0] in ['linux', 'baremetal']:
-            platform, workload = section.split('.')
-            return self._get_nested_value(f'workloads.{workload}.platforms.{platform}.{key}', default)
-        
-        # Handle special sections
-        if section in ['bbv', 'trace', 'vector']:
-            return self._get_nested_value(f'features.{section}.{key}', default)
-        
-        return default
-    
-    def get_build_config(self, arch, platform, workload=None, bbv=False, trace=False, 
-                        benchmark_name=None):
-        """Get complete build configuration for given parameters"""
-        config = {}
-        
-        # Start with defaults
-        defaults = self._get_nested_value('defaults', {})
-        for key, value in defaults.items():
-            if (key.endswith('_cflags') or key.endswith('_ldflags') or key.endswith('_includes') or 
-                key.endswith('_defines') or key.endswith('_sources') or 
-                key in ['libs', 'includes', 'defines', 'base_cflags', 'base_ldflags', 
-                       'environment_files', 'skip_common_files', 'workload_sources']):
-                config[key] = self._parse_list(value)
-            else:
-                config[key] = value
-        
-        # Apply architecture + platform specific settings
-        arch_platform_config = self._get_nested_value(f'architectures.{arch}.platforms.{platform}', {})
-        for key, value in arch_platform_config.items():
-            if (key.endswith('_cflags') or key.endswith('_ldflags') or key.endswith('_includes') or 
-                key.endswith('_defines') or key.endswith('_sources') or 
-                key in ['libs', 'includes', 'defines', 'base_cflags', 'base_ldflags', 
-                       'environment_files', 'skip_common_files', 'workload_sources']):
-                config[key] = self._parse_list(value)
-            else:
-                config[key] = value
-        
-        # Apply workload-specific settings
-        if workload:
-            workload_config = self._get_nested_value(f'workloads.{workload}', {})
-            for key, value in workload_config.items():
-                if key.startswith('workload_'):
-                    # Special handling for workload_sources - preserve full key name
-                    if key == 'workload_sources':
-                        config[key] = self._parse_list(value)
-                    else:
-                        # Merge workload-specific flags
-                        base_key = key.replace('workload_', '')
-                        parsed_value = self._parse_list(value) if (
-                            key.endswith('_cflags') or key.endswith('_ldflags') or key.endswith('_includes') or 
-                            key.endswith('_defines') or key.endswith('_sources')
-                        ) else value
-                        
-                        if base_key in config:
-                            if isinstance(config[base_key], list) and isinstance(parsed_value, list):
-                                config[base_key].extend(parsed_value)
-                            else:
-                                config[base_key] = parsed_value
-                        else:
-                            config[base_key] = parsed_value
-                else:
-                    if (key.endswith('_cflags') or key.endswith('_ldflags') or key.endswith('_includes') or 
-                        key.endswith('_defines') or key.endswith('_sources') or 
-                        key in ['libs', 'includes', 'defines', 'base_cflags', 'base_ldflags', 
-                               'environment_files', 'skip_common_files', 'workload_sources']):
-                        config[key] = self._parse_list(value)
-                    else:
-                        config[key] = value
-            
-            # Apply platform-specific workload overrides
-            platform_workload_config = self._get_nested_value(f'workloads.{workload}.platforms.{platform}', {})
-            for key, value in platform_workload_config.items():
-                if (key.endswith('_cflags') or key.endswith('_ldflags') or key.endswith('_includes') or 
-                    key.endswith('_defines') or key.endswith('_sources') or 
-                    key in ['libs', 'includes', 'defines', 'base_cflags', 'base_ldflags', 
-                           'environment_files', 'skip_common_files', 'workload_sources']):
-                    config[key] = self._parse_list(value)
-                else:
-                    config[key] = value
-        
-        # Apply BBV settings if enabled
-        if bbv:
-            bbv_cflags = self._get_nested_value('features.bbv.bbv_cflags', [])
-            bbv_ldflags = self._get_nested_value('features.bbv.bbv_ldflags', [])
-            
-            config.setdefault('base_cflags', []).extend(self._parse_list(bbv_cflags))
-            config.setdefault('base_ldflags', []).extend(self._parse_list(bbv_ldflags))
-        
-        # Apply trace settings if enabled
-        if trace:
-            trace_cflags = self._get_nested_value('features.trace.trace_cflags', [])
-            trace_ldflags = self._get_nested_value('features.trace.trace_ldflags', [])
-            
-            config.setdefault('base_cflags', []).extend(self._parse_list(trace_cflags))
-            config.setdefault('base_ldflags', []).extend(self._parse_list(trace_ldflags))
-        
-        # Handle vector benchmarks (override architecture) - only for riscv-tests workload
-        if workload == "riscv-tests":
-            vector_config = self._get_nested_value('features.vector', {})
-            if vector_config:
-                if benchmark_name and benchmark_name.startswith('vec-'):
-                    # Vector benchmarks get vector architecture
-                    arch_key = f"vector_{arch}_arch"
-                    new_arch = vector_config.get(arch_key)
-                    if new_arch:
-                        # Update march flags
-                        if 'base_cflags' in config:
-                            for i, flag in enumerate(config['base_cflags']):
-                                if flag.startswith('-march='):
-                                    config['base_cflags'][i] = f"-march={new_arch}"
-                        if 'base_ldflags' in config:
-                            for i, flag in enumerate(config['base_ldflags']):
-                                if flag.startswith('-march='):
-                                    config['base_ldflags'][i] = f"-march={new_arch}"
-                else:
-                    # Non-vector riscv-tests benchmarks get regular arch  
-                    arch_key = f"regular_{arch}_arch"
-                    new_arch = vector_config.get(arch_key)
-                    if new_arch:
-                        # Update march flags
-                        if 'base_cflags' in config:
-                            for i, flag in enumerate(config['base_cflags']):
-                                if flag.startswith('-march='):
-                                    config['base_cflags'][i] = f"-march={new_arch}"
-                        if 'base_ldflags' in config:
-                            for i, flag in enumerate(config['base_ldflags']):
-                                if flag.startswith('-march='):
-                                    config['base_ldflags'][i] = f"-march={new_arch}"
-        # Note: embench-iot and other workloads preserve their base architecture from [rv32.baremetal]
-        
-        return config
-    
-    def get_compiler_info(self, arch, platform):
-        """Get compiler and base flags for architecture and platform"""
-        config = self.get_build_config(arch, platform)
-        
-        cc = config.get('cc', f"riscv{arch[2:]}-unknown-elf-gcc")
-        base_cflags = ' '.join(config.get('base_cflags', []))
-        base_ldflags = ' '.join(config.get('base_ldflags', []))
-        
-        return cc, base_cflags, base_ldflags
-    
-    def get_environment_files(self, workload=None):
-        """Get list of environment files to compile"""
-        config = self.get_build_config("rv32", "baremetal", workload)
-        return config.get('environment_files', ['crt0.S', 'main.c', 'stub.c'])
-    
-    def should_skip_environment(self, platform, workload=None):
-        """Check if environment compilation should be skipped"""
-        config = self.get_build_config("rv32", platform, workload)
-        return config.get('skip_environment', False)
-    
-    def get_workload_includes(self, workload_path, workload=None):
-        """Get workload-specific include paths"""
-        config = self.get_build_config("rv32", "baremetal", workload)
-        includes = config.get('includes', [])
-        
-        # Convert relative paths to absolute based on workload_path
-        absolute_includes = []
-        for include in includes:
-            if not Path(include).is_absolute():
-                absolute_includes.append(str(Path(workload_path) / include))
-            else:
-                absolute_includes.append(include)
-        
-        return absolute_includes
-    
-    def get_skip_common_files(self, platform, workload=None):
-        """Get list of common files to skip for this platform/workload"""
-        config = self.get_build_config("rv32", platform, workload)
-        return config.get('skip_common_files', [])
\ No newline at end of file
diff --git a/traces/docker_stf_trace_gen/utils/docker_orchestrator.py b/traces/docker_stf_trace_gen/utils/docker_orchestrator.py
deleted file mode 100644
index f5499420..00000000
--- a/traces/docker_stf_trace_gen/utils/docker_orchestrator.py
+++ /dev/null
@@ -1,67 +0,0 @@
-
-import argparse
-import os
-import docker
-from typing import Optional
-from data.consts import Const
-
-
-class DockerOrchestrator():
-    def __init__(self, args: argparse.Namespace) -> None:
-        self.docker_image_name = args.image_name if args.image_name else Const.DOCKER_IMAGE_NAME
-        self.docker_client = docker.from_env()
-
-    def run_command(self, command: str, binds: Optional[dict[str, str]]) -> str:
-        volumes = {}
-        for host_path, docker_path in binds.items():
-            host_folder = os.path.dirname(host_path)
-            docker_folder = os.path.dirname(docker_path)
-            volumes[host_folder] = {"bind": docker_folder, "mode": "rw"}
-
-        print(command)
-        container = None
-        try:
-            container = self.docker_client.containers.run(
-                image=self.docker_image_name,
-                command=["bash", "-c", command],
-                volumes=volumes,
-                detach=True,
-                stdout=True,
-                stderr=True
-            )
-
-            exit_code = container.wait()["StatusCode"]
-            stdout_logs = container.logs(stdout=True, stderr=False)
-            stderr_logs = container.logs(stdout=False, stderr=True)
-
-            if exit_code != 0:
-                print("Exit code:", exit_code)
-                print("STDOUT:\n", stdout_logs.decode())
-                print("STDERR:\n", stderr_logs.decode())
-
-        finally:
-            try:
-                container.remove(force=True)
-            except Exception as e:
-                print("Cleanup failed:", e)
-            return stdout_logs
-
-    def run_stf_tool(self, tool: str, host_input: str, host_output: Optional[str] = None):
-        docker_input = self.convert_host_path_to_docker_path(host_input)
-        binds = {
-            host_input: docker_input,
-        }
-
-        tool_path = os.path.join(Const.STF_TOOLS, tool, tool)
-        cmd = f"{tool_path} {docker_input}"
-        result = self.run_command(cmd, binds)
-        if host_output is not None:
-            with open(host_output, "wb") as f:
-                f.write(result)
-
-        return result
-
-    def convert_host_path_to_docker_path(self, path: str) -> str:
-        parts = os.path.abspath(path).strip(os.sep).split(os.sep)
-        parts.insert(0, Const.DOCKER_TEMP_FOLDER)
-        return os.path.join(*parts)
diff --git a/traces/docker_stf_trace_gen/utils/trace_generator_arg_parser.py b/traces/docker_stf_trace_gen/utils/trace_generator_arg_parser.py
deleted file mode 100644
index 2394b66c..00000000
--- a/traces/docker_stf_trace_gen/utils/trace_generator_arg_parser.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import argparse
-import sys
-from data.consts import Const
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description="Generate traces for a workload",
-        usage='python generate_trace.py [OPTIONS] WORKLOAD_PATH'
-    )
-    parser.add_argument("--emulator", required=True, choices=["spike", "qemu"])
-    parser.add_argument("--isa", required=False, help="Instruction set architecture")
-    parser.add_argument("--dump", action='store_true', required=False, default=False, help="Create trace file dump")
-    parser.add_argument("--pk", action='store_true', required=False, default=False, help="Use Spike pk (proxy kernel)")
-    parser.add_argument(
-        "--image-name",
-        required=False,
-        default=Const.DOCKER_IMAGE_NAME,
-        help=f"Custom docker image name. default: {Const.DOCKER_IMAGE_NAME}")
-    parser.add_argument('-o', '--output', required=False, help='Output folder or file path')
-
-    subparsers = parser.add_subparsers(title='Mode', dest='mode')
-    subparsers.add_parser(
-        'macro',
-        help='Trace mode using START_TRACE and STOP_TRACE macros on the workload binary',
-        description='Trace mode using START_TRACE and STOP_TRACE macros on the workload binary',
-        formatter_class=argparse.RawTextHelpFormatter
-    )
-
-    inst_count_mode_parser = subparsers.add_parser(
-        'insn_count',
-        help='Traces a fixed number of instructions, after a given start instruction index',
-        description='Traces a fixed number of instructions, after a given start instruction index',
-        formatter_class=argparse.RawTextHelpFormatter
-    )
-    inst_count_mode_parser.add_argument(
-        "--num-instructions",
-        required=True,
-        type=int,
-        help="Number of instructions to trace")
-    inst_count_mode_parser.add_argument(
-        "--start-instruction",
-        required=True,
-        type=int,
-        default=0,
-        help="Number of instructions to skip before tracing (insn_count mode)")
-
-    pc_mode_parser = subparsers.add_parser(
-        'pc_count',
-        help='Traces a fixed number of instructions, after a given PC value and PC hits count',
-        description='Traces a fixed number of instructions, after a given PC value and PC hits count',
-        formatter_class=argparse.RawTextHelpFormatter
-    )
-    pc_mode_parser.add_argument("--num-instructions", required=True, type=int, help="Number of instructions to trace")
-    pc_mode_parser.add_argument("--start-pc", required=True, type=int, help="Starting program counter (pc_count mode)")
-    pc_mode_parser.add_argument(
-        "--pc-threshold",
-        required=True,
-        type=int,
-        default=1,
-        help="PC hit threshold (pc_count mode)")
-
-    parser.add_argument("workload", help="Path to workload file")
-
-    if len(sys.argv) == 1:
-        parser.print_help()
-        print("\nRun 'trace_share COMMAND --help' for more information on a command.")
-        print("\nFor more help on how to use trace_share, head to GITHUB_README_LINK")
-        sys.exit(0)
-
-    args = parser.parse_args()
-    return args
diff --git a/traces/docker_stf_trace_gen/utils/util.py b/traces/docker_stf_trace_gen/utils/util.py
deleted file mode 100644
index 50e804f0..00000000
--- a/traces/docker_stf_trace_gen/utils/util.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import hashlib
-import sys
-import subprocess
-import time
-from pathlib import Path
-from typing import List, Tuple, Optional
-import shutil
-import logging
-from enum import Enum
-
-
-class LogLevel(Enum):
-    INFO = "\033[32m"
-    ERROR = "\033[31m"
-    WARN = "\033[33m"
-    DEBUG = "\033[34m"
-    HEADER = "\033[95m\033[1m"
-
-
-logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(levelname)s: %(message)s", datefmt="%H:%M:%S")
-
-
-class Util():
-    @staticmethod
-    def compute_sha256(file_path: str) -> str:
-        hash_sha256 = hashlib.sha256()
-        with open(file_path, "rb") as f:
-            for chunk in iter(lambda: f.read(4096), b""):
-                hash_sha256.update(chunk)
-        return hash_sha256.hexdigest()
-
-    @staticmethod
-    def log(level: LogLevel, msg: str, file=sys.stdout):
-        """Log with ANSI color and timestamp; raises on ERROR."""
-        color = level.value
-        print(f"{color}{msg}\033[0m", file=file if level != LogLevel.ERROR else sys.stderr)
-        if level == LogLevel.ERROR:
-            exit(1)
-            raise RuntimeError(msg)  # do we want to raise here?
-
-    @staticmethod
-    def run_cmd(
-        cmd: List[str],
-        cwd: Optional[Path] = None,
-        timeout: int = 300,
-        show: bool = True
-    ) -> Tuple[bool, str, str]:
-        """Run command, return (success, stdout, stderr)."""
-        if show:
-            Util.log(LogLevel.DEBUG, f"Running: {' '.join(map(str, cmd))}")
-        try:
-            result = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True, timeout=timeout, check=False)
-            if not result.returncode == 0:
-                Util.log(LogLevel.ERROR, f"Command failed: {result.stderr}")
-            return result.returncode == 0, result.stdout, result.stderr
-        except subprocess.TimeoutExpired:
-            Util.log(LogLevel.ERROR, f"Timeout after {timeout}s")
-        except Exception as e:
-            Util.log(LogLevel.ERROR, f"Exception: {e}")
-
-    @staticmethod
-    def get_time() -> float:
-        """Return current time in seconds."""
-        return time.time()
-
-    @staticmethod
-    def ensure_dir(path: Path) -> Path:
-        """Create directory if it doesn't exist."""
-        path.mkdir(parents=True, exist_ok=True)
-        return path
-
-    @staticmethod
-    def clean_dir(path: Path) -> Path:
-        """Clean and recreate directory."""
-        if path.exists():
-            shutil.rmtree(path)
-        return Util.ensure_dir(path)
-
-    @staticmethod
-    def validate_tool(tool: str):
-        """Check if tool is in PATH."""
-        if not shutil.which(tool):
-            Util.log(LogLevel.ERROR, f"Tool not found: {tool}")
-            return False
-        return True
-
-    @staticmethod
-    def file_exists(path: Path | str) -> bool:
-        """Check if file exists."""
-        return Path(path).exists()
-
-    @staticmethod
-    def read_file_lines(path: Path) -> List[str]:
-        """Read non-empty lines from file."""
-        if not Util.file_exists(path):
-            Util.log(LogLevel.ERROR, f"File not found: {path}")
-        return [line.strip() for line in path.read_text().splitlines() if line.strip()]
-
-    @staticmethod
-    def write_file_lines(path: Path, lines: List[str]):
-        """Write lines to file."""
-        path.write_text("\n".join(lines) + "\n")
diff --git a/traces/qemu-bbvs-flow/documentation/embench b/traces/qemu-bbvs-flow/documentation/embench
deleted file mode 100644
index 8bede7bd..00000000
--- a/traces/qemu-bbvs-flow/documentation/embench
+++ /dev/null
@@ -1,177 +0,0 @@
-# QEMU-BBVS-Flow with Embench Integration
-
-## Project Overview
-
-This project provides a unified, containerized workflow for running various workloads on QEMU with SimPoint analysis, specifically designed for computer architecture research and performance analysis. The system generates Basic Block Vectors (BBVs) from program execution traces and performs SimPoint clustering analysis to identify representative execution phases for efficient architectural simulation.
-
-## What This Project Does
-
-### Core Functionality
-The system executes benchmarks in QEMU emulation, captures execution traces as Basic Block Vectors, and uses SimPoint analysis to identify representative simulation points. This is essential for computer architecture research where full program simulation is computationally expensive, but representative phases can provide accurate performance insights with significantly reduced simulation time.
-
-### Key Capabilities
-- Executes workloads on RISC-V architecture using QEMU emulation with BBV plugin support
-- Generates BBV traces capturing program execution behavior at configurable intervals
-- Performs SimPoint clustering to identify program phases and representative intervals
-- Supports multiple benchmark suites including Embench IoT, Dhrystone, and custom workloads
-- Provides both integrated workflow and standalone execution options for different use cases
-- Enables custom workload integration with generic handling for any benchmark with build system
-
-## Why This Project Exists
-
-### Research Problem
-Computer architecture simulators require extensive time to simulate complete program execution, often taking hours or days for complex benchmarks. SimPoint methodology solves this by identifying representative execution intervals that capture the program's overall behavior, reducing simulation time from hours to minutes while maintaining statistical accuracy for performance analysis.
-
-### Solution Approach
-This project automates the entire workflow from benchmark compilation to SimPoint analysis, providing researchers with a ready-to-use environment that eliminates the complexity of setting up QEMU cross-compilation, RISC-V toolchains, BBV plugin configuration, and SimPoint analysis tools. The containerized approach ensures reproducibility across different development environments.
-
-## Technical Architecture
-
-### Component Stack
-The system integrates multiple components in a layered architecture:
-
-**Base Infrastructure Layer:**
-- Ubuntu 22.04 LTS containerized environment providing stable Linux foundation
-- Pre-built RISC-V cross-compilation toolchain from ribeirovsilva/riscv-toolchain image
-- QEMU emulator compiled with RISC-V support and BBV plugin capability for trace generation
-- SimPoint clustering analysis tool compiled from source for program phase detection
-
-**Benchmark Integration Layer:**
-- Embench IoT benchmark suite with 20 embedded workloads pre-compiled for immediate execution
-- Dhrystone integer arithmetic benchmark with enhanced iteration count for better analysis
-- Generic workload support system accommodating custom benchmarks with Makefile or build.sh
-
-**Analysis Pipeline Layer:**
-- Automated workload setup and compilation management for different benchmark types
-- BBV trace generation during QEMU execution with configurable interval parameters
-- SimPoint clustering analysis with adjustable cluster count and algorithm parameters
-- Result organization and output management with structured file naming conventions
-
-## How It Works
-
-### Execution Workflow
-The system follows a structured five-stage workflow that begins with workload identification and setup, proceeds through compilation and binary preparation, executes workloads in QEMU with BBV trace collection, performs SimPoint clustering analysis, and concludes with organized result output generation.
-
-### Workload Processing Strategy
-Different workload types are handled through specialized setup procedures optimized for each benchmark category. Embench workloads utilize pre-compiled binaries stored in the container for immediate execution, Dhrystone undergoes source compilation with enhanced iteration counts for extended analysis periods, and custom workloads are processed through generic build systems supporting both Makefile and shell script approaches.
-
-### BBV Generation Process
-During workload execution in QEMU, the BBV plugin captures basic block execution frequencies at user-specified intervals, typically every 100 instructions. These traces form comprehensive input datasets for SimPoint analysis and represent the program's dynamic execution behavior over time, including phase transitions and steady-state regions.
-
-### SimPoint Analysis Methodology
-The generated BBV traces undergo k-means clustering analysis to identify distinct execution phases based on basic block execution patterns. The analysis produces simulation points representing each cluster centroid and statistical weights indicating cluster importance and coverage, enabling representative simulation strategies that maintain accuracy while reducing computational requirements.
-
-## Reproduction Guide
-
-### Prerequisites and System Requirements
-
-**Hardware Requirements:**
-- Minimum 8GB RAM with 16GB recommended for complex benchmarks
-- At least 15GB available disk space for Docker images and build artifacts
-- Multi-core CPU recommended for parallel compilation processes
-- Stable internet connectivity for downloading source repositories and Docker images
-
-**Operating System Support:**
-- Linux distributions with Docker support including Ubuntu, CentOS, Fedora
-- macOS with Docker Desktop installation and sufficient memory allocation
-- Windows 10/11 with WSL2 integration and Docker Desktop configuration
-
-**Software Dependencies:**
-- Docker Engine 20.10 or later with container runtime support
-- Git version control system for repository cloning and management
-- Text editor or IDE for configuration file modification if needed
-- Terminal or command-line interface for script execution
-
-### Installation and Setup Process
-
-**Step 1: Environment Preparation**
-Clone the project repository to your local system and ensure all shell scripts have executable permissions. Verify Docker installation and daemon status, checking available resources and network connectivity for downloading dependencies.
-
-**Step 2: Docker Image Construction**
-Execute the build script which constructs the unified Docker image containing all necessary components including QEMU, SimPoint, RISC-V toolchain, and pre-compiled benchmark suites. This process typically requires 30-45 minutes depending on system performance and network speed.
-
-**Step 3: Workload Execution**
-Use the provided scripts to execute desired benchmarks with configurable parameters including architecture selection, compiler flags, BBV generation intervals, and SimPoint clustering parameters. The system supports both individual workload analysis and batch processing modes.
-
-### Basic Usage Commands
-
-**Complete Workflow Execution:**
-```bash
-./build_and_run.sh [workload_type] [source_path] [compiler_flags] [architecture] [interval] [clusters]
-```
-
-**Individual Component Execution:**
-```bash
-./build_docker.sh
-./run_workload.sh [workload_type] [parameters]
-```
-
-**Workload-Specific Examples:**
-```bash
-./build_and_run.sh dhrystone
-./build_and_run.sh embench
-./build_and_run.sh custom /path/to/source "-O2" riscv64 100 30
-```
-
-### Expected Results and Verification
-
-**Output File Structure:**
-The analysis generates organized results in the simpoint_output directory including BBV trace files with .bb extensions containing binary execution data, SimPoint files with .simpoints extensions listing representative intervals, and weight files with .weights extensions indicating cluster importance values.
-
-**Verification Steps:**
-Successful execution produces multiple file types for each analyzed workload, with BBV files typically ranging from megabytes to gigabytes depending on program complexity and analysis interval, SimPoint files containing text-format cluster assignments, and weight files with normalized importance values summing to unity.
-
-**Performance Benchmarks:**
-Single workload analysis typically completes within 30 seconds to 5 minutes depending on program complexity, while full benchmark suite processing may require 15-30 minutes. Build times vary from 30-60 minutes for initial Docker image construction with subsequent builds leveraging cached layers.
-
-## Development Environment Setup
-
-### VS Code Integration
-
-**Extension Requirements:**
-Install Remote-WSL extension for Windows WSL2 integration, Docker extension for container management and debugging, Remote-Containers extension for development container support, and ShellCheck extension for shell script validation and syntax highlighting.
-
-**Container Development:**
-The project includes devcontainer configuration enabling direct development within the containerized environment, providing consistent toolchain access, integrated debugging capabilities, and simplified dependency management across different host systems.
-
-**Task Configuration:**
-Pre-configured VS Code tasks enable one-click Docker image building, workload execution with parameter selection, result analysis and visualization, and integrated terminal access for manual command execution and debugging.
-
-### Advanced Configuration Options
-
-**Parameter Customization:**
-Modify workload_config.json for benchmark-specific settings including default compiler flags, architecture selections, analysis intervals, and clustering parameters. Custom configurations enable optimization for specific research requirements and performance constraints.
-
-**Workload Extension:**
-Add new benchmark support by extending setup_workload.sh with additional case statements, implementing source acquisition and compilation procedures, updating configuration files with new workload definitions, and testing integration with existing analysis pipeline.
-
-**Performance Optimization:**
-Adjust BBV generation intervals for performance versus accuracy trade-offs, modify SimPoint clustering parameters for different phase detection granularity, configure Docker resource allocation for optimal build and execution performance, and implement parallel processing for batch analysis workflows.
-
-## Troubleshooting and Common Issues
-
-### Build and Setup Problems
-
-**Docker-Related Issues:**
-Verify Docker daemon status and available resources, check internet connectivity for source downloads, ensure sufficient disk space for image construction, and validate user permissions for Docker container operations.
-
-**Compilation Failures:**
-Review build logs for specific error messages, verify RISC-V toolchain installation within container, check source code compatibility with cross-compilation requirements, and ensure proper dependency resolution for complex build systems.
-
-### Execution and Analysis Problems
-
-**QEMU Execution Issues:**
-Validate binary compatibility with selected RISC-V architecture, verify QEMU plugin availability and configuration, check memory allocation for complex workload execution, and ensure proper file permissions for trace output generation.
-
-**SimPoint Analysis Failures:**
-Confirm BBV file generation and format correctness, verify SimPoint tool installation and path configuration, check parameter validity for clustering algorithms, and ensure sufficient computational resources for large trace analysis.
-
-### Performance and Resource Issues
-
-**Memory and Storage Constraints:**
-Monitor Docker container resource usage during build and execution phases, implement cleanup procedures for temporary files and build artifacts, optimize analysis parameters for available system resources, and consider distributed analysis for large-scale benchmark suites.
-
-**Network and Connectivity Problems:**
-Verify stable internet connectivity for source repository access, configure proxy settings if required for corporate environments, implement retry mechanisms for transient network failures, and consider offline mode for environments with restricted connectivity.
-
-This comprehensive guide enables researchers and developers to understand, reproduce, and extend the QEMU-BBVS-Flow system for their specific computer architecture research requirements while maintaining consistency and reproducibility across different environments and use cases.
\ No newline at end of file
diff --git a/traces/qemu-bbvs-flow/integration_demo.sh b/traces/qemu-bbvs-flow/integration_demo.sh
deleted file mode 100644
index c1bcddb3..00000000
--- a/traces/qemu-bbvs-flow/integration_demo.sh
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/bin/bash
-# integration_demo.sh - Demonstration of Embench integration with qemu-bbvs-flow
-
-set -e
-
-echo "=============================================="
-echo "QEMU-BBVS-Flow + Embench Integration Demo"
-echo "=============================================="
-
-# Check if Docker is available
-if ! command -v docker &> /dev/null; then
-    echo "Error: Docker is required but not installed"
-    exit 1
-fi
-
-# Configuration
-IMAGE_NAME="qemu-simpoint-unified"
-OUTPUT_DIR="$(pwd)/simpoint_output"
-
-echo ""
-echo "[STEP 1] Building integrated Docker image..."
-echo "This includes RISC-V toolchain + QEMU + SimPoint + pre-built Embench"
-./build_docker.sh
-
-echo ""
-echo "[STEP 2] Testing pre-built Embench availability..."
-docker run --rm "$IMAGE_NAME" /run_embench_simple.sh --list
-
-echo ""
-echo "[STEP 3] Running single Embench workload with BBV generation..."
-mkdir -p "$OUTPUT_DIR"
-docker run --rm -v "$OUTPUT_DIR:/output" "$IMAGE_NAME" /run_embench_simple.sh crc32 --bbv
-
-# Verify BBV file was created
-if [ -f "$OUTPUT_DIR/crc32_bbv.0.bb" ]; then
-    echo "✓ BBV file generated successfully: crc32_bbv.0.bb"
-    echo "  File size: $(ls -lh $OUTPUT_DIR/crc32_bbv.0.bb | awk '{print $5}')"
-else
-    echo "✗ BBV file not generated"
-fi
-
-echo ""
-echo "[STEP 4] Running complete SimPoint analysis on Embench workloads..."
-./run_workload.sh embench "" "" riscv32 100 30
-
-echo ""
-echo "[STEP 5] Testing Dhrystone (existing functionality)..."
-./run_workload.sh dhrystone
-
-echo ""
-echo "[STEP 6] Demonstrating generic workload handling..."
-echo "This shows the improved generic workload support (review suggestion implemented)"
-
-# Create a simple test workload
-mkdir -p test_workload
-cat > test_workload/Makefile << 'EOF'
-CC ?= gcc
-CFLAGS ?= -static
-
-hello: hello.c
-	$(CC) $(CFLAGS) -o hello hello.c
-
-clean:
-	rm -f hello
-EOF
-
-cat > test_workload/hello.c << 'EOF'
-#include <stdio.h>
-int main() {
-    for(int i = 0; i < 1000000; i++) {
-        // Simple computation for BBV generation
-        volatile int x = i * 2 + 1;
-    }
-    printf("Hello from generic workload!\n");
-    return 0;
-}
-EOF
-
-echo "Running generic workload 'testwork' (minimum 3 chars as per review)..."
-./run_workload.sh testwork "$(pwd)/test_workload" "-static" riscv64 100 30
-
-# Clean up test workload
-rm -rf test_workload
-
-echo ""
-echo "=============================================="
-echo "Integration Demo Results Summary"
-echo "=============================================="
-
-if [ -d "$OUTPUT_DIR" ]; then
-    echo "Generated files in $OUTPUT_DIR:"
-    ls -la "$OUTPUT_DIR"
-    
-    # Count different file types
-    bbv_files=$(find "$OUTPUT_DIR" -name "*.bb" 2>/dev/null | wc -l)
-    simpoints_files=$(find "$OUTPUT_DIR" -name "*.simpoints" 2>/dev/null | wc -l)
-    weights_files=$(find "$OUTPUT_DIR" -name "*.weights" 2>/dev/null | wc -l)
-    
-    echo ""
-    echo "File Type Summary:"
-    echo "- BBV trace files (.bb): $bbv_files"
-    echo "- SimPoints files (.simpoints): $simpoints_files"
-    echo "- Weight files (.weights): $weights_files"
-    
-    echo ""
-    echo "Key Features Demonstrated:"
-    echo "✓ Pre-built RISC-V toolchain integration"
-    echo "✓ Pre-compiled Embench workloads"
-    echo "✓ Standalone Embench execution with BBV generation"
-    echo "✓ Workload-specific analysis (review suggestion implemented)"
-    echo "✓ Generic workload handling (review suggestion implemented)"
-    echo "✓ Backward compatibility with existing Dhrystone workflow"
-    echo "✓ Minimal changes to existing codebase"
-    
-    if [ "$bbv_files" -gt 0 ] && [ "$simpoints_files" -gt 0 ] && [ "$weights_files" -gt 0 ]; then
-        echo ""
-        echo "Integration successful! All components working correctly."
-    else
-        echo ""
-        echo "Some files missing - check logs for issues."
-    fi
-else
-    echo "No output directory found - check for errors in execution."
-fi
-
-echo ""
-echo "=============================================="
-echo "Next Steps:"
-echo "1. Embench is already compiled in the build"
-echo "Available Embench workloads:
-================================
- 1. aha-mont64
- 2. crc32
- 3. cubic
- 4. edn
- 5. huffbench
- 6. matmult-int
- 7. md5sum
- 8. minver
- 9. nbody
-10. nettle-aes
-11. nettle-sha256
-12. nsichneu
-13. picojpeg
-14. primecount
-15. qrduino
-16. sglib-combined
-17. slre
-18. st
-19. statemate
-20. tarfind
-21. ud
-22. wikisort
-================================
-Total: 22 workloads"
-echo "2. Use standalone runner: docker run --rm -v \$(pwd)/simpoint_output:/output qemu-simpoint-unified /run_embench_simple.sh --all"
-echo "3. Add your own workloads using the generic handler"
-echo "=============================================="
\ No newline at end of file
diff --git a/traces/qemu-bbvs-flow/run_embench_simple.sh b/traces/qemu-bbvs-flow/run_embench_simple.sh
deleted file mode 100644
index 4953be4f..00000000
--- a/traces/qemu-bbvs-flow/run_embench_simple.sh
+++ /dev/null
@@ -1,183 +0,0 @@
-#!/bin/bash
-# run_embench_simple.sh - Simple script to run Embench workloads
-set -e
-
-EMBENCH_DIR="/workspace/embench-iot"
-
-usage() {
-    echo "Usage: $0 [workload_name] [options]"
-    echo "Options:"
-    echo "  -l, --list          List all available workloads"
-    echo "  -a, --all           Run all workloads"
-    echo "  -t, --time          Run with timing information"
-    echo "  -b, --bbv           Generate BBV traces"
-    echo "  -h, --help          Show this help message"
-    echo ""
-    echo "Examples:"
-    echo "  $0 crc32            # Run crc32 workload"
-    echo "  $0 crc32 --time     # Run crc32 with timing"
-    echo "  $0 crc32 --bbv      # Run crc32 with BBV generation"
-    echo "  $0 --all            # Run all workloads"
-    echo "  $0 --list           # List available workloads"
-}
-
-list_workloads() {
-    echo "Available Embench workloads:"
-    echo "================================"
-    local count=0
-    for workload_dir in "${EMBENCH_DIR}/bd/src"/*; do
-        if [ -d "$workload_dir" ]; then
-            workload=$(basename "$workload_dir")
-            executable="${workload_dir}/${workload}"
-            if [ -f "$executable" ]; then
-                count=$((count + 1))
-                printf "%2d. %s\n" $count "$workload"
-            fi
-        fi
-    done
-    echo "================================"
-    echo "Total: $count workloads"
-}
-
-run_workload() {
-    local workload=$1
-    local with_timing=$2
-    local with_bbv=$3
-    local workload_path="${EMBENCH_DIR}/bd/src/${workload}"
-    local executable="${workload_path}/${workload}"
-    
-    if [ ! -f "$executable" ]; then
-        echo "Error: Workload '$workload' not found at $executable"
-        echo "Available workloads:"
-        list_workloads
-        return 1
-    fi
-    
-    echo "Running workload: $workload"
-    echo "Executable: $executable"
-    echo "Architecture: RISC-V 32-bit"
-    echo ""
-    
-    if [ "$with_bbv" = "true" ]; then
-        echo "Running with BBV generation..."
-        echo "----------------------------------------"
-        if [ "$with_timing" = "true" ]; then
-            time qemu-riscv32 -plugin $QEMU_PLUGINS/libbbv.so,interval=100,outfile=/output/${workload}_bbv "$executable"
-        else
-            qemu-riscv32 -plugin $QEMU_PLUGINS/libbbv.so,interval=100,outfile=/output/${workload}_bbv "$executable"
-        fi
-        echo "BBV trace saved to /output/${workload}_bbv.0.bb"
-        echo "----------------------------------------"
-    elif [ "$with_timing" = "true" ]; then
-        echo "Running with timing information..."
-        echo "----------------------------------------"
-        time qemu-riscv32 "$executable"
-        echo "----------------------------------------"
-    else
-        echo "Execution output:"
-        echo "----------------------------------------"
-        qemu-riscv32 "$executable"
-        echo "----------------------------------------"
-    fi
-    
-    echo "Successfully ran $workload"
-    echo ""
-}
-
-run_all_workloads() {
-    local with_timing=$1
-    local with_bbv=$2
-    
-    echo "Running all Embench workloads..."
-    echo ""
-    
-    local success_count=0
-    local total_count=0
-    local failed_workloads=()
-    
-    for workload_dir in "${EMBENCH_DIR}/bd/src"/*; do
-        if [ -d "$workload_dir" ]; then
-            workload=$(basename "$workload_dir")
-            executable="${workload_dir}/${workload}"
-            if [ -f "$executable" ]; then
-                total_count=$((total_count + 1))
-                echo "[$total_count] Running $workload..."
-                
-                if run_workload "$workload" "$with_timing" "$with_bbv"; then
-                    success_count=$((success_count + 1))
-                else
-                    failed_workloads+=("$workload")
-                fi
-            fi
-        fi
-    done
-    
-    echo "Execution Summary:"
-    echo "================================"
-    echo "Successful: $success_count/$total_count workloads"
-    echo "Failed: $((total_count - success_count))/$total_count workloads"
-    
-    if [ ${#failed_workloads[@]} -gt 0 ]; then
-        echo "Failed workloads: ${failed_workloads[*]}"
-    fi
-    echo "================================"
-}
-
-# Parse arguments
-WITH_TIMING=false
-WITH_BBV=false
-ACTION=""
-WORKLOAD=""
-
-while [[ $# -gt 0 ]]; do
-    case $1 in
-        -l|--list)
-            ACTION="list"
-            shift
-            ;;
-        -a|--all)
-            ACTION="all"
-            shift
-            ;;
-        -t|--time)
-            WITH_TIMING=true
-            shift
-            ;;
-        -b|--bbv)
-            WITH_BBV=true
-            shift
-            ;;
-        -h|--help)
-            usage
-            exit 0
-            ;;
-        -*)
-            echo "Unknown option: $1"
-            usage
-            exit 1
-            ;;
-        *)
-            WORKLOAD="$1"
-            shift
-            ;;
-    esac
-done
-
-# Execute based on action
-case "$ACTION" in
-    list)
-        list_workloads
-        ;;
-    all)
-        run_all_workloads "$WITH_TIMING" "$WITH_BBV"
-        ;;
-    *)
-        if [ -n "$WORKLOAD" ]; then
-            run_workload "$WORKLOAD" "$WITH_TIMING" "$WITH_BBV"
-        else
-            echo "Please specify a workload or use --list to see available options"
-            echo ""
-            usage
-        fi
-        ;;
-esac
\ No newline at end of file