Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
711d142
feat: add naive prompt optimizer with iterative optimization framework
DanielDango Nov 28, 2025
7887f8d
fix: correct typos and improve documentation in various files as comm…
DanielDango Nov 28, 2025
4a7744a
add feedback optimizer implementation
DanielDango Nov 28, 2025
75dddd1
wip: add more logging
DanielDango Dec 1, 2025
565a80e
feat: enhance cache parameters to include classifier type in Reasonin…
DanielDango Dec 3, 2025
883ede1
feat: Enhance logging for cache operations and misclassification checks
DanielDango Dec 11, 2025
cb89983
Rework caching. Now it shall be ensured that Cacheparameters are used…
dfuchss Dec 11, 2025
fb1de00
Update ArchitectureTest.java
dfuchss Dec 11, 2025
3e7facf
feat: Remove caching from GlobalMetric and PointwiseMetric for prompt…
DanielDango Dec 23, 2025
e31c6c1
Merge remote-tracking branch 'upstream/feature/simplify-caching' into…
DanielDango Dec 23, 2025
c5fff3f
chore: update formatting
DanielDango Dec 23, 2025
abd5a4f
chore: update cache parameter usage
DanielDango Dec 23, 2025
317d3cc
feat: add additional logging
DanielDango Dec 23, 2025
63df5fb
fix: revert reduced target store deduplication
DanielDango Dec 23, 2025
a8d1d47
fix: re-enable other e2e optimizer tests
DanielDango Dec 23, 2025
aa5772f
feat: Add gradient optimizer based on ProTeGi by "Automatic Prompt Op…
DanielDango Dec 23, 2025
3739508
chore: apply spotless and use SecureRandom instead of Random
DanielDango Dec 23, 2025
629ac80
revert: use Random instead of SecureRandom
DanielDango Dec 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,33 @@ Runs the pipeline in transitive mode and evaluates it. This is useful for multi-
java -jar ./ratlr.jar transitive -c ./configs/d2m.json ./configs/m2c.json -e ./configs/eval.json
```

## Prompt Optimization

Optimizes prompts used in trace link classification to improve performance.
This command runs the prompt optimization pipeline and optionally evaluates the optimized prompts against evaluation configurations.

The optimization process:
1. Runs baseline evaluation (if evaluation configs are provided)
2. Executes the prompt optimizer with the specified optimization configuration
3. Re-runs evaluation with the optimized prompt to measure improvement

As only the optimized prompt is transfered from the optimization results to the evaluation, other configuration parameters (e.g., model, dataset) do not have to match between optimization and evaluation configurations.

### Examples

```bash
# Run optimization with a single config
java -jar ./ratlr.jar optimize -c ./example-configs/optimizer-config.json

# Run optimization and evaluate the results
java -jar ./ratlr.jar optimize -c ./example-configs/optimizer-config.json -e ./example-configs/simple-config.json

# Run optimization with directories
java -jar ./ratlr.jar optimize -c ./configs/optimization -e ./configs/evaluation
```

### Options

- `-c, --configs`: **(Required)** One or more optimization configuration file paths. If a path points to a directory, all files within that directory will be processed.
- `-e, --eval`: **(Optional)** One or more evaluation configuration file paths. Each evaluation configuration will be used with each optimization config to measure performance before and after optimization.

84 changes: 84 additions & 0 deletions example-configs/gradient-optimizer-config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@

{
"cache_dir": "./cache/WARC",

"gold_standard_configuration": {
"path": "./datasets/req2req/WARC/answer.csv",
"hasHeader": "true"
},

"source_artifact_provider" : {
"name" : "text",
"args" : {
"artifact_type" : "requirement",
"path" : "./datasets/req2req/WARC/high"
}
},
"target_artifact_provider" : {
"name" : "text",
"args" : {
"artifact_type" : "requirement",
"path" : "./datasets/req2req/WARC/low"
}
},
"source_preprocessor" : {
"name" : "artifact",
"args" : {}
},
"target_preprocessor" : {
"name" : "artifact",
"args" : {}
},
"embedding_creator" : {
"name" : "openai",
"args" : {
"model": "text-embedding-3-large"
}
},
"source_store" : {
"name" : "custom",
"args" : {}
},
"target_store" : {
"name" : "cosine_similarity",
"args" : {
"max_results" : "4"
}
},
"metric" : {
"name" : "pointwise",
"args" : {}
},
"evaluator" : {
"name" : "ucb",
"args" : {
"samples_per_eval" : "16",
"eval_rounds" : "4",
"eval_prompts_per_round" : "1"
}
},
"prompt_optimizer": {
"name" : "gradient_openai",
"args" : {
"prompt": "Question: Here are two parts of software development artifacts.\n\n {source_type}: '''{source_content}'''\n\n {target_type}: '''{target_content}'''\n Are they related?\n\n Answer with 'yes' or 'no'.",
"model": "gpt-4o-mini-2024-07-18",
"maximum_iterations": 3,
"minibatch_size" : "20"
}
},
"classifier" : {
"name" : "simple_openai",
"args" : {
"model": "gpt-4o-mini-2024-07-18",
"temperature": 0.0
}
},
"result_aggregator" : {
"name" : "any_connection",
"args" : {}
},
"tracelinkid_postprocessor" : {
"name" : "identity",
"args" : {}
}
}
77 changes: 77 additions & 0 deletions example-configs/optimizer-config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@

{
"cache_dir": "./cache/WARC",

"gold_standard_configuration": {
"path": "./datasets/req2req/WARC/answer.csv",
"hasHeader": "true"
},

"source_artifact_provider" : {
"name" : "text",
"args" : {
"artifact_type" : "requirement",
"path" : "./datasets/req2req/WARC/high"
}
},
"target_artifact_provider" : {
"name" : "text",
"args" : {
"artifact_type" : "requirement",
"path" : "./datasets/req2req/WARC/low"
}
},
"source_preprocessor" : {
"name" : "artifact",
"args" : {}
},
"target_preprocessor" : {
"name" : "artifact",
"args" : {}
},
"embedding_creator" : {
"name" : "openai",
"args" : {
"model": "text-embedding-3-large"
}
},
"source_store" : {
"name" : "custom",
"args" : {}
},
"target_store" : {
"name" : "cosine_similarity",
"args" : {
"max_results" : "4"
}
},
"metric" : {
"name" : "mock",
"args" : {}
},
"evaluator" : {
"name" : "mock",
"args" : {}
},
"prompt_optimizer": {
"name" : "simple_openai",
"args" : {
"prompt": "Question: Here are two parts of software development artifacts.\n\n {source_type}: '''{source_content}'''\n\n {target_type}: '''{target_content}'''\n Are they related?\n\n Answer with 'yes' or 'no'.",
"model": "gpt-4o-mini-2024-07-18"
}
},
"classifier" : {
"name" : "simple_openai",
"args" : {
"model": "gpt-4o-mini-2024-07-18"
}
},
"result_aggregator" : {
"name" : "any_connection",
"args" : {}
},
"tracelinkid_postprocessor" : {
"name" : "identity",
"args" : {}
}
}
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
Expand Down
4 changes: 3 additions & 1 deletion src/main/java/edu/kit/kastel/sdq/lissa/cli/MainCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.nio.file.Path;

import edu.kit.kastel.sdq.lissa.cli.command.EvaluateCommand;
import edu.kit.kastel.sdq.lissa.cli.command.OptimizeCommand;
import edu.kit.kastel.sdq.lissa.cli.command.TransitiveTraceCommand;

import picocli.CommandLine;
Expand All @@ -15,12 +16,13 @@
* <ul>
* <li>{@link EvaluateCommand} - Evaluates trace link analysis configurations</li>
* <li>{@link TransitiveTraceCommand} - Performs transitive trace link analysis</li>
* <li>{@link OptimizeCommand} - Optimize a single prompt for better trace link analysis classification results</li>
* </ul>
*
* The CLI supports various command-line options and provides help information
* through the standard help options (--help, -h).
*/
@CommandLine.Command(subcommands = {EvaluateCommand.class, TransitiveTraceCommand.class})
@CommandLine.Command(subcommands = {EvaluateCommand.class, TransitiveTraceCommand.class, OptimizeCommand.class})
public final class MainCLI {

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/* Licensed under MIT 2025. */
package edu.kit.kastel.sdq.lissa.cli.command;

import static edu.kit.kastel.sdq.lissa.cli.command.EvaluateCommand.loadConfigs;

import java.io.IOException;
import java.nio.file.Path;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.kit.kastel.sdq.lissa.ratlr.Evaluation;
import edu.kit.kastel.sdq.lissa.ratlr.Optimization;

import picocli.CommandLine;

/**
* Command implementation for optimizing prompts used in trace link analysis configurations.
* This command processes one or more optimization configuration files to run the prompt
* optimization pipeline, and optionally evaluates the optimized prompts using specified
* evaluation configuration files.
*/
@CommandLine.Command(
name = "optimize",
mixinStandardHelpOptions = true,
description = "Optimizes a prompt for usage in the pipeline")
public class OptimizeCommand implements Runnable {

private static final Logger LOGGER = LoggerFactory.getLogger(OptimizeCommand.class);

/**
* Array of optimization configuration file paths to be processed.
* If a path points to a directory, all files within that directory will be processed.
* This option is required to run the optimization command.
*/
@CommandLine.Option(
names = {"-c", "--configs"},
arity = "1..*",
description =
"Specifies one or more config paths to be invoked by the pipeline iteratively. If the path points "
+ "to a directory, all files inside are chosen to get invoked.")
private Path[] optimizationConfigs;

/**
* Array of evaluation configuration file paths to be processed.
* If a path points to a directory, all files within that directory will be processed.
* This option is optional; if not provided, no evaluation will be performed after optimization.
*/
@CommandLine.Option(
names = {"-e", "--eval"},
arity = "0..*",
description = "Specifies optional evaluation config paths to be invoked by the pipeline iteratively. "
+ "Each evaluation configuration will be used with each optimization config."
+ "If the path points to a directory, all files inside are chosen to get invoked.")
private Path[] evaluationConfigs;

/**
* Runs the optimization and evaluation pipelines based on the provided configuration files.
* It first loads the optimization and evaluation configurations, then executes the evaluation
* pipeline for each evaluation configuration. This is the unoptimized baseline evaluation. <br>
* After that, it runs the optimization pipeline for
* each optimization configuration, and subsequently evaluates the optimized prompt using each
* evaluation configuration once more with the optimized prompt instead of the original one.
*/
@Override
public void run() {
List<Path> configsToOptimize = loadConfigs(optimizationConfigs);
List<Path> configsToEvaluate = loadConfigs(evaluationConfigs);
LOGGER.info(
"Found {} optimization config files and {} evaluation config files to invoke",
configsToOptimize.size(),
configsToEvaluate.size());

for (Path evaluationConfig : configsToEvaluate) {
runEvaluation(evaluationConfig, "");
}

for (Path optimizationConfig : configsToOptimize) {
LOGGER.info("Invoking the optimization pipeline with '{}'", optimizationConfig);
String optimizedPrompt = "";
try {
var optimization = new Optimization(optimizationConfig);
optimizedPrompt = optimization.run();
} catch (IOException e) {
LOGGER.warn(
"Optimization configuration '{}' threw an exception: {} \n Maybe the file does not exist?",
optimizationConfig,
e.getMessage());
}
for (Path evaluationConfig : configsToEvaluate) {
runEvaluation(evaluationConfig, optimizedPrompt);
}
}
}

private static void runEvaluation(Path evaluationConfig, String optimizedPrompt) {
LOGGER.info("Invoking the evaluation pipeline with '{}'", evaluationConfig);
try {
var evaluation = new Evaluation(evaluationConfig, optimizedPrompt);
evaluation.run();
} catch (IOException e) {
LOGGER.warn(
"Baseline evaluation configuration '{}' threw an exception: {} \n Maybe the file does not exist?",
evaluationConfig,
e.getMessage());
}
}
}
Loading