UnitTestBot · sergeyrid · Jul 7, 2023 · Jul 10, 2023 · Jul 11, 2023 · Jul 11, 2023
diff --git a/settings.gradle.kts b/settings.gradle.kts
@@ -4,3 +4,4 @@ include("usvm-core")
 include("usvm-jvm")
 include("usvm-util")
 include("usvm-sample-language")
+include("usvm-ml-path-selection")
diff --git a/usvm-ml-path-selection/README.md b/usvm-ml-path-selection/README.md
@@ -0,0 +1,44 @@
+## Machine Learning Path Selector
+
+### Entry point
+
+To run tests with this path selector use `jarRunner.kt`. You can pass a path to a configuration json as the first argument. Gathered statistics will be put in a folder according to your configuration.
+
+### Config
+
+A config object is declared inside `MLConfig.kt`. A detailed description of all the options is listed below:
+
+- `gameEnvPath` - a path to a folder that contains trained models (`rnn_cell.onnx`, `gnn_model.onnx`, `actor_model.onnx`) and a blacklist of tests to be skipped (`blacklist.txt`), also some logs are saved to this folder
+- `dataPath` - a path to a folder to save all statistics into
+- `defaultAlgorithm` - an algorithm to use if a trained model is not found, must be one of: `BFS`, `ForkDepthRandom`
+- `postprocessing` - how actor model's outputs should be processed, must be one of: `Argmax` (choose an id of the maximum value), `Softmax` (sample from a distribution derived from the outputs via the softmax), `None` (sample from the outputs — only when they form a distribution)
+- `mode` - a mode for `jarRunner.kt`, must be one of: `Calculation` (to calculate statistics used to train models), `Aggregation` (to aggregate statistics for different tests into one file), `Both` (to both calculate statistics and aggregate them), `Test` (to test this path selector with different time limits and compare it to other path selectors)
+- `logFeatures` - whether to save statistics used to train models
+- `shuffleTests` - whether to shuffle tests before running (affects the tests being run if the `dataConsumption` option is less than 100)
+- `discounts` - time discounts used when testing path selectors
+- `inputShape` - an input shape of an actor model
+- `maxAttentionLength` - a maximum attention length of a PPO actor model
+- `useGnn` - whether to use a GNN model
+- `dataConsumption` - a percentage of tests to run
+- `hardTimeLimit` - a time limit for one test
+- `solverTimeLimit` - a time limit for one solver call
+- `maxConcurrency` - a maximum number of threads running different tests concurrently
+- `graphUpdate` - when to update block graph data, must be one of: `Once` (at the beginning of a test), `TestGeneration` (every time a new test is generated)
+- `logGraphFeatuers` - whether to save graph statistics used to train a GNN model to a dataset file
+- `gnnFeaturesCount` - a number of features that a GNN model returns
+- `useRnn` - whether to use an RNN model
+- `rnnStateShape` - a shape of an RNN state
+- `rnnFeaturesCount` - a number of features that an RNN model returns
+- `inputJars` - jars and their packages to run tests on
+
+### How to modify the metric
+
+To modify the metric you may change values of the `reward` property of the `ActionData` objects. They are written inside the property `path` of the `FeaturesLoggingPathSelector`. Currently, the metric is calculated in the `remove` method of the `FeaturesLoggingPathSelector`.
+
+### Training environment
+
+The training environment and its description are inside `environment.zip`.
+
+### "Modified" files
+
+Source files which names start with "Modified" are modified copies of files from other modules. They were modified to support this path selector.
diff --git a/usvm-ml-path-selection/build.gradle.kts b/usvm-ml-path-selection/build.gradle.kts
@@ -0,0 +1,22 @@
+object MLVersions {
+    const val serialization = "1.5.1"
+    const val onnxruntime = "1.15.1"
+    const val dotlin = "1.0.2"
+}
+
+plugins {
+    id("usvm.kotlin-conventions")
+    kotlin("plugin.serialization") version "1.8.21"
+}
+
+dependencies {
+    implementation(project(":usvm-jvm"))
+    implementation(project(":usvm-core"))
+
+    implementation("org.jacodb:jacodb-analysis:${Versions.jcdb}")
+    implementation("ch.qos.logback:logback-classic:${Versions.logback}")
+
+    implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:${MLVersions.serialization}")
+    implementation("io.github.rchowell:dotlin:${MLVersions.dotlin}")
+    implementation("com.microsoft.onnxruntime:onnxruntime:${MLVersions.onnxruntime}")
+}
diff --git a/usvm-ml-path-selection/environment.zip b/usvm-ml-path-selection/environment.zip
diff --git a/usvm-ml-path-selection/src/main/kotlin/org/usvm/CoverageCounter.kt b/usvm-ml-path-selection/src/main/kotlin/org/usvm/CoverageCounter.kt
@@ -0,0 +1,85 @@
+package org.usvm
+
+import kotlinx.serialization.Serializable
+import kotlinx.serialization.json.Json
+import kotlinx.serialization.json.JsonObject
+import kotlinx.serialization.json.encodeToJsonElement
+import kotlinx.serialization.json.jsonObject
+import java.util.concurrent.ConcurrentHashMap
+
+class CoverageCounter(
+    private val mlConfig: MLConfig
+) {
+    private val testCoverages = ConcurrentHashMap<String, List<Float>>()
+    private val testStatementsCounts = ConcurrentHashMap<String, Float>()
+    private val testDiscounts = ConcurrentHashMap<String, List<Float>>()
+    private val testFinished = ConcurrentHashMap<String, Boolean>()
+
+    fun addTest(testName: String, statementsCount: Float) {
+        testCoverages[testName] = List(mlConfig.discounts.size) { 0.0f }
+        testStatementsCounts[testName] = statementsCount
+        testDiscounts[testName] = List(mlConfig.discounts.size) { 1.0f }
+        testFinished[testName] = false
+    }
+
+    fun updateDiscounts(testName: String) {
+        testDiscounts[testName] = testDiscounts.getValue(testName)
+            .mapIndexed { id, currentDiscount -> mlConfig.discounts[id] * currentDiscount }
+    }
+
+    fun updateResults(testName: String, newCoverage: Float) {
+        val currentDiscounts = testDiscounts.getValue(testName)
+        testCoverages[testName] = testCoverages.getValue(testName)
+            .mapIndexed { id, currentCoverage -> currentCoverage + currentDiscounts[id] * newCoverage }
+    }
+
+    fun finishTest(testName: String) {
+        testFinished[testName] = true
+    }
+
+    fun reset() {
+        testCoverages.clear()
+        testStatementsCounts.clear()
+        testDiscounts.clear()
+        testFinished.clear()
+    }
+
+    private fun getTotalCoverages(): List<Float> {
+        return testCoverages.values.reduce { acc, floats ->
+            acc.zip(floats).map { (total, value) -> total + value }
+        }
+    }
+
+    @Serializable
+    private data class TestStatistics(
+        private val discounts: Map<String, Float>,
+        private val statementsCount: Float,
+        private val finished: Boolean,
+    )
+
+    @Serializable
+    private data class Statistics(
+        private val tests: Map<String, TestStatistics>,
+        private val totalDiscounts: Map<String, Float>,
+        private val totalStatementsCount: Float,
+        private val finishedTestsCount: Float,
+    )
+
+    fun getStatistics(): JsonObject {
+        val discountStrings = mlConfig.discounts.map { it.toString() }
+        val testStatistics = testCoverages.mapValues { (test, coverages) ->
+            TestStatistics(
+                discountStrings.zip(coverages).toMap(),
+                testStatementsCounts.getValue(test),
+                testFinished.getValue(test),
+            )
+        }
+        val statistics = Statistics(
+            testStatistics,
+            discountStrings.zip(getTotalCoverages()).toMap(),
+            testStatementsCounts.values.sum(),
+            testFinished.values.sumOf { if (it) 1.0 else 0.0 }.toFloat(),
+        )
+        return Json.encodeToJsonElement(statistics).jsonObject
+    }
+}
diff --git a/usvm-ml-path-selection/src/main/kotlin/org/usvm/MLConfig.kt b/usvm-ml-path-selection/src/main/kotlin/org/usvm/MLConfig.kt
@@ -0,0 +1,51 @@
+package org.usvm
+
+enum class Postprocessing {
+    Argmax,
+    Softmax,
+    None,
+}
+
+enum class Mode {
+    Calculation,
+    Aggregation,
+    Both,
+    Test,
+}
+
+enum class Algorithm {
+    BFS,
+    ForkDepthRandom,
+}
+
+enum class GraphUpdate {
+    Once,
+    TestGeneration,
+}
+
+data class MLConfig (
+    val gameEnvPath: String = "../Game_env",
+    val dataPath: String = "../Data",
+    val defaultAlgorithm: Algorithm = Algorithm.BFS,
+    val postprocessing: Postprocessing = Postprocessing.Argmax,
+    val mode: Mode = Mode.Both,
+    val logFeatures: Boolean = true,
+    val shuffleTests: Boolean = true,
+    val discounts: List<Float> = listOf(1.0f, 0.998f, 0.99f),
+    val inputShape: List<Long> = listOf(1, -1, 77),
+    val maxAttentionLength: Int = -1,
+    val useGnn: Boolean = true,
+    val dataConsumption: Float = 100.0f,
+    val hardTimeLimit: Int = 30000, // in ms
+    val solverTimeLimit: Int = 10000, // in ms
+    val maxConcurrency: Int = 64,
+    val graphUpdate: GraphUpdate = GraphUpdate.Once,
+    val logGraphFeatures: Boolean = false,
+    val gnnFeaturesCount: Int = 8,
+    val useRnn: Boolean = true,
+    val rnnStateShape: List<Long> = listOf(4, 1, 512),
+    val rnnFeaturesCount: Int = 33,
+    val inputJars: Map<String, List<String>> = mapOf(
+        Pair("../Game_env/jars/usvm-jvm-new.jar", listOf("org.usvm.samples", "com.thealgorithms"))
+    ) // path to jar file -> list of package names
+)
diff --git a/usvm-ml-path-selection/src/main/kotlin/org/usvm/ModifiedUMachineOptions.kt b/usvm-ml-path-selection/src/main/kotlin/org/usvm/ModifiedUMachineOptions.kt
@@ -0,0 +1,19 @@
+package org.usvm
+
+enum class ModifiedPathSelectionStrategy {
+    /**
+     * Collects features according to states selected by any other path selector.
+     */
+    FEATURES_LOGGING,
+    /**
+     * Collects features and feeds them to the ML model to select states.
+     * Extends FEATURE_LOGGING path selector.
+     */
+    MACHINE_LEARNING,
+}
+
+data class ModifiedUMachineOptions(
+    val basicOptions: UMachineOptions = UMachineOptions(),
+    val pathSelectionStrategies: List<ModifiedPathSelectionStrategy> =
+        listOf(ModifiedPathSelectionStrategy.MACHINE_LEARNING)
+)