ardoco · DanielDango · Nov 28, 2025 · Nov 28, 2025 · Nov 28, 2025 · Dec 1, 2025
@@ -33,3 +33,33 @@ Runs the pipeline in transitive mode and evaluates it. This is useful for multi-
 java -jar ./ratlr.jar transitive -c ./configs/d2m.json ./configs/m2c.json -e ./configs/eval.json
 ```
 
+## Prompt Optimization
+
+Optimizes prompts used in trace link classification to improve performance.
+This command runs the prompt optimization pipeline and optionally evaluates the optimized prompts against evaluation configurations.
+
+The optimization process:
+1. Runs baseline evaluation (if evaluation configs are provided)
+2. Executes the prompt optimizer with the specified optimization configuration
+3. Re-runs evaluation with the optimized prompt to measure improvement
+
+As only the optimized prompt is transfered from the optimization results to the evaluation, other configuration parameters (e.g., model, dataset) do not have to match between optimization and evaluation configurations.
+
+### Examples
+
+```bash
+# Run optimization with a single config
+java -jar ./ratlr.jar optimize -c ./example-configs/optimizer-config.json
+
+# Run optimization and evaluate the results
+java -jar ./ratlr.jar optimize -c ./example-configs/optimizer-config.json -e ./example-configs/simple-config.json
+
+# Run optimization with directories
+java -jar ./ratlr.jar optimize -c ./configs/optimization -e ./configs/evaluation
+```
+
+### Options
+
+- `-c, --configs`: **(Required)** One or more optimization configuration file paths. If a path points to a directory, all files within that directory will be processed.
+- `-e, --eval`: **(Optional)** One or more evaluation configuration file paths. Each evaluation configuration will be used with each optimization config to measure performance before and after optimization.
+
@@ -0,0 +1,84 @@
+
+{
+  "cache_dir": "./cache/WARC",
+
+  "gold_standard_configuration": {
+    "path": "./datasets/req2req/WARC/answer.csv",
+    "hasHeader": "true"
+  },
+
+  "source_artifact_provider" : {
+    "name" : "text",
+    "args" : {
+      "artifact_type" : "requirement",
+      "path" : "./datasets/req2req/WARC/high"
+    }
+  },
+  "target_artifact_provider" : {
+    "name" : "text",
+    "args" : {
+      "artifact_type" : "requirement",
+      "path" : "./datasets/req2req/WARC/low"
+    }
+  },
+  "source_preprocessor" : {
+    "name" : "artifact",
+    "args" : {}
+  },
+  "target_preprocessor" : {
+    "name" : "artifact",
+    "args" : {}
+  },
+  "embedding_creator" : {
+    "name" : "openai",
+    "args" : {
+      "model": "text-embedding-3-large"
+    }
+  },
+  "source_store" : {
+    "name" : "custom",
+    "args" : {}
+  },
+  "target_store" : {
+    "name" : "cosine_similarity",
+    "args" : {
+      "max_results" : "4"
+    }
+  },
+  "metric" : {
+    "name" : "pointwise",
+    "args" : {}
+  },
+  "evaluator" : {
+    "name" : "ucb",
+    "args" : {
+      "samples_per_eval" : "16",
+      "eval_rounds" : "4",
+      "eval_prompts_per_round" : "1"
+    }
+  },
+  "prompt_optimizer": {
+    "name" : "gradient_openai",
+    "args" : {
+      "prompt": "Question: Here are two parts of software development artifacts.\n\n            {source_type}: '''{source_content}'''\n\n            {target_type}: '''{target_content}'''\n            Are they related?\n\n            Answer with 'yes' or 'no'.",
+      "model": "gpt-4o-mini-2024-07-18",
+      "maximum_iterations": 3,
+      "minibatch_size" : "20"
+    }
+  },
+  "classifier" : {
+    "name" : "simple_openai",
+    "args" : {
+      "model": "gpt-4o-mini-2024-07-18",
+      "temperature": 0.0
+    }
+  },
+  "result_aggregator" : {
+    "name" : "any_connection",
+    "args" : {}
+  },
+  "tracelinkid_postprocessor" : {
+    "name" : "identity",
+    "args" : {}
+  }
+}
@@ -0,0 +1,77 @@
+
+{
+  "cache_dir": "./cache/WARC",
+
+  "gold_standard_configuration": {
+    "path": "./datasets/req2req/WARC/answer.csv",
+    "hasHeader": "true"
+  },
+
+  "source_artifact_provider" : {
+    "name" : "text",
+    "args" : {
+      "artifact_type" : "requirement",
+      "path" : "./datasets/req2req/WARC/high"
+    }
+  },
+  "target_artifact_provider" : {
+    "name" : "text",
+    "args" : {
+      "artifact_type" : "requirement",
+      "path" : "./datasets/req2req/WARC/low"
+    }
+  },
+  "source_preprocessor" : {
+    "name" : "artifact",
+    "args" : {}
+  },
+  "target_preprocessor" : {
+    "name" : "artifact",
+    "args" : {}
+  },
+  "embedding_creator" : {
+    "name" : "openai",
+    "args" : {
+      "model": "text-embedding-3-large"
+    }
+  },
+  "source_store" : {
+    "name" : "custom",
+    "args" : {}
+  },
+  "target_store" : {
+    "name" : "cosine_similarity",
+    "args" : {
+      "max_results" : "4"
+    }
+  },
+  "metric" : {
+    "name" : "mock",
+    "args" : {}
+  },
+  "evaluator" : {
+    "name" : "mock",
+    "args" : {}
+  },
+  "prompt_optimizer": {
+    "name" : "simple_openai",
+    "args" : {
+      "prompt": "Question: Here are two parts of software development artifacts.\n\n            {source_type}: '''{source_content}'''\n\n            {target_type}: '''{target_content}'''\n            Are they related?\n\n            Answer with 'yes' or 'no'.",
+      "model": "gpt-4o-mini-2024-07-18"
+    }
+  },
+  "classifier" : {
+    "name" : "simple_openai",
+    "args" : {
+      "model": "gpt-4o-mini-2024-07-18"
+    }
+  },
+  "result_aggregator" : {
+    "name" : "any_connection",
+    "args" : {}
+  },
+  "tracelinkid_postprocessor" : {
+    "name" : "identity",
+    "args" : {}
+  }
+}
@@ -119,6 +119,7 @@
     <dependency>
       <groupId>org.junit.jupiter</groupId>
       <artifactId>junit-jupiter-params</artifactId>
+      <version>${junit.version}</version>
       <scope>test</scope>
     </dependency>
     <dependency>

@@ -4,6 +4,7 @@
 import java.nio.file.Path;
 
 import edu.kit.kastel.sdq.lissa.cli.command.EvaluateCommand;
+import edu.kit.kastel.sdq.lissa.cli.command.OptimizeCommand;
 import edu.kit.kastel.sdq.lissa.cli.command.TransitiveTraceCommand;
 
 import picocli.CommandLine;
@@ -15,12 +16,13 @@
  * <ul>
  *     <li>{@link EvaluateCommand} - Evaluates trace link analysis configurations</li>
  *     <li>{@link TransitiveTraceCommand} - Performs transitive trace link analysis</li>
+ *     <li>{@link OptimizeCommand} - Optimize a single prompt for better trace link analysis classification results</li>
  * </ul>
  *
  * The CLI supports various command-line options and provides help information
  * through the standard help options (--help, -h).
  */
-@CommandLine.Command(subcommands = {EvaluateCommand.class, TransitiveTraceCommand.class})
+@CommandLine.Command(subcommands = {EvaluateCommand.class, TransitiveTraceCommand.class, OptimizeCommand.class})
 public final class MainCLI {
 
     /**

@@ -0,0 +1,109 @@
+/* Licensed under MIT 2025. */
+package edu.kit.kastel.sdq.lissa.cli.command;
+
+import static edu.kit.kastel.sdq.lissa.cli.command.EvaluateCommand.loadConfigs;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import edu.kit.kastel.sdq.lissa.ratlr.Evaluation;
+import edu.kit.kastel.sdq.lissa.ratlr.Optimization;
+
+import picocli.CommandLine;
+
+/**
+ * Command implementation for optimizing prompts used in trace link analysis configurations.
+ * This command processes one or more optimization configuration files to run the prompt
+ * optimization pipeline, and optionally evaluates the optimized prompts using specified
+ * evaluation configuration files.
+ */
+@CommandLine.Command(
+        name = "optimize",
+        mixinStandardHelpOptions = true,
+        description = "Optimizes a prompt for usage in the pipeline")
+public class OptimizeCommand implements Runnable {
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(OptimizeCommand.class);
+
+    /**
+     * Array of optimization configuration file paths to be processed.
+     * If a path points to a directory, all files within that directory will be processed.
+     * This option is required to run the optimization command.
+     */
+    @CommandLine.Option(
+            names = {"-c", "--configs"},
+            arity = "1..*",
+            description =
+                    "Specifies one or more config paths to be invoked by the pipeline iteratively. If the path points "
+                            + "to a directory, all files inside are chosen to get invoked.")
+    private Path[] optimizationConfigs;
+
+    /**
+     * Array of evaluation configuration file paths to be processed.
+     * If a path points to a directory, all files within that directory will be processed.
+     * This option is optional; if not provided, no evaluation will be performed after optimization.
+     */
+    @CommandLine.Option(
+            names = {"-e", "--eval"},
+            arity = "0..*",
+            description = "Specifies optional evaluation config paths to be invoked by the pipeline iteratively. "
+                    + "Each evaluation configuration will be used with each optimization config."
+                    + "If the path points to a directory, all files inside are chosen to get invoked.")
+    private Path[] evaluationConfigs;
+
+    /**
+     * Runs the optimization and evaluation pipelines based on the provided configuration files.
+     * It first loads the optimization and evaluation configurations, then executes the evaluation
+     * pipeline for each evaluation configuration. This is the unoptimized baseline evaluation. <br>
+     * After that, it runs the optimization pipeline for
+     * each optimization configuration, and subsequently evaluates the optimized prompt using each
+     * evaluation configuration once more with the optimized prompt instead of the original one.
+     */
+    @Override
+    public void run() {
+        List<Path> configsToOptimize = loadConfigs(optimizationConfigs);
+        List<Path> configsToEvaluate = loadConfigs(evaluationConfigs);
+        LOGGER.info(
+                "Found {} optimization config files and {} evaluation config files to invoke",
+                configsToOptimize.size(),
+                configsToEvaluate.size());
+
+        for (Path evaluationConfig : configsToEvaluate) {
+            runEvaluation(evaluationConfig, "");
+        }
+
+        for (Path optimizationConfig : configsToOptimize) {
+            LOGGER.info("Invoking the optimization pipeline with '{}'", optimizationConfig);
+            String optimizedPrompt = "";
+            try {
+                var optimization = new Optimization(optimizationConfig);
+                optimizedPrompt = optimization.run();
+            } catch (IOException e) {
+                LOGGER.warn(
+                        "Optimization configuration '{}' threw an exception: {} \n Maybe the file does not exist?",
+                        optimizationConfig,
+                        e.getMessage());
+            }
+            for (Path evaluationConfig : configsToEvaluate) {
+                runEvaluation(evaluationConfig, optimizedPrompt);
+            }
+        }
+    }
+
+    private static void runEvaluation(Path evaluationConfig, String optimizedPrompt) {
+        LOGGER.info("Invoking the evaluation pipeline with '{}'", evaluationConfig);
+        try {
+            var evaluation = new Evaluation(evaluationConfig, optimizedPrompt);
+            evaluation.run();
+        } catch (IOException e) {
+            LOGGER.warn(
+                    "Baseline evaluation configuration '{}' threw an exception: {} \n Maybe the file does not exist?",
+                    evaluationConfig,
+                    e.getMessage());
+        }
+    }
+}