From ff4b126607b381df0ae0c67ad67896c80220420e Mon Sep 17 00:00:00 2001 From: Nikita Lebedev Date: Fri, 31 Oct 2025 11:37:28 +0200 Subject: [PATCH 1/6] poc Signed-off-by: Nikita Lebedev --- .../hedera/statevalidation/PocCommand.java | 150 ++++++++++++++++++ .../statevalidation/StateOperatorCommand.java | 3 +- .../hedera/statevalidation/poc/DataStats.java | 55 +++++++ .../hedera/statevalidation/poc/ItemData.java | 22 +++ .../statevalidation/poc/ProcessorTask.java | 138 ++++++++++++++++ 5 files changed, 367 insertions(+), 1 deletion(-) create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/PocCommand.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/DataStats.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ItemData.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ProcessorTask.java diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/PocCommand.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/PocCommand.java new file mode 100644 index 000000000000..56875c7261f3 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/PocCommand.java @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation; + +import com.hedera.pbj.runtime.io.buffer.BufferedData; +import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.statevalidation.poc.DataStats; +import com.hedera.statevalidation.poc.ItemData; +import com.hedera.statevalidation.poc.ItemData.Type; +import com.hedera.statevalidation.poc.ProcessorTask; +import com.hedera.statevalidation.util.StateUtils; +import com.swirlds.merkledb.MerkleDbDataSource; +import com.swirlds.merkledb.files.DataFileCollection; +import com.swirlds.merkledb.files.DataFileIterator; +import com.swirlds.merkledb.files.DataFileReader; +import com.swirlds.platform.state.snapshot.DeserializedSignedState; +import com.swirlds.state.MerkleNodeState; +import com.swirlds.virtualmap.VirtualMap; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.ParentCommand; + +@Command(name = "poc") +public class PocCommand implements Runnable { + + @ParentCommand + private StateOperatorCommand parent; + + @Option( + names = {"-io", "--io-threads"}, + description = "Number of IO threads for reading from disk.") + private int ioThreads = 2; + + @Option( + names = {"-p", "--process-threads"}, + description = "Number of CPU threads for processing chunks.") + private int processThreads = 2; + + @Option( + names = {"-b", "--queue-capacity"}, + description = "Queue capacity for backpressure control.") + private int queueCapacity = 1000; + + private PocCommand() {} + + @Override + public void run() { + try { + BlockingQueue dataQueue = new LinkedBlockingQueue<>(queueCapacity); + + ExecutorService ioPool = Executors.newFixedThreadPool(ioThreads); + ExecutorService processPool = Executors.newFixedThreadPool(processThreads); + + long startTime = System.currentTimeMillis(); + + // Initialize state and get data file collections + parent.initializeStateDir(); + DeserializedSignedState deserializedSignedState = StateUtils.getDeserializedSignedState(); + MerkleNodeState state = + deserializedSignedState.reservedSignedState().get().getState(); + VirtualMap virtualMap = (VirtualMap) state.getRoot(); + MerkleDbDataSource vds = (MerkleDbDataSource) virtualMap.getDataSource(); + + DataFileCollection pathToKeyValueDfc = vds.getPathToKeyValue().getFileCollection(); + DataFileCollection pathToHashDfc = vds.getHashStoreDisk().getFileCollection(); + DataFileCollection keyToPathDfc = vds.getKeyToPath().getFileCollection(); + + // Count total readers upfront + int totalReaders = pathToKeyValueDfc.getAllCompletedFiles().size() + + pathToHashDfc.getAllCompletedFiles().size() + + keyToPathDfc.getAllCompletedFiles().size(); + + CountDownLatch readerLatch = new CountDownLatch(totalReaders); + CountDownLatch processorsLatch = new CountDownLatch(processThreads); + + DataStats dataStats = new DataStats(); + + // Start processor threads + for (int i = 0; i < processThreads; i++) { + processPool.submit(new ProcessorTask(dataQueue, vds, dataStats, processorsLatch)); + } + + // Submit reader tasks + submitReaderTasksFor(pathToKeyValueDfc, dataQueue, Type.P2KV, ioPool, readerLatch); + submitReaderTasksFor(pathToHashDfc, dataQueue, Type.P2H, ioPool, readerLatch); + submitReaderTasksFor(keyToPathDfc, dataQueue, Type.K2P, ioPool, readerLatch); + + // Wait for all readers to finish + readerLatch.await(); + ioPool.shutdown(); + if (!ioPool.awaitTermination(1, TimeUnit.MINUTES)) { + throw new RuntimeException("IO pool did not terminate within timeout"); + } + + // Send one poison pill per processor + for (int i = 0; i < processThreads; i++) { + dataQueue.put(ItemData.poisonPill()); + } + + // Wait for processors to finish + processorsLatch.await(); + processPool.shutdown(); + if (!processPool.awaitTermination(1, TimeUnit.MINUTES)) { + throw new RuntimeException("Process pool did not terminate within timeout"); + } + + System.out.println(dataStats); + long elapsedTime = System.currentTimeMillis() - startTime; + System.out.println("Total processing time: " + elapsedTime + " ms"); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static void submitReaderTasksFor( + DataFileCollection dfc, + BlockingQueue dataQueue, + ItemData.Type dataType, + ExecutorService ioPool, + CountDownLatch readerLatch) { + for (DataFileReader reader : dfc.getAllCompletedFiles()) { + ioPool.submit(() -> { + try { + try (DataFileIterator dataIterator = reader.createIterator()) { + while (dataIterator.next()) { + BufferedData originalData = dataIterator.getDataItemData(); + Bytes dataCopy = originalData.getBytes(0, originalData.remaining()); + + ItemData itemData = + new ItemData(dataType, dataCopy, dataIterator.getDataItemDataLocation()); + dataQueue.put(itemData); + } + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Reader interrupted", e); + } catch (Exception e) { + throw new RuntimeException("Reader failed", e); + } finally { + readerLatch.countDown(); + } + }); + } + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/StateOperatorCommand.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/StateOperatorCommand.java index 84db55c72330..a0263f77bc48 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/StateOperatorCommand.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/StateOperatorCommand.java @@ -18,7 +18,8 @@ ExportCommand.class, SortedExportCommand.class, CompactionCommand.class, - ApplyBlocksCommand.class + ApplyBlocksCommand.class, + PocCommand.class }, description = "CLI tool with validation and introspection modes.") public class StateOperatorCommand implements Runnable { diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/DataStats.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/DataStats.java new file mode 100644 index 000000000000..becd257d9267 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/DataStats.java @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc; + +import java.util.concurrent.atomic.AtomicLong; + +public final class DataStats { + private final AtomicLong totalSpaceSize = new AtomicLong(); + private final AtomicLong totalItemCount = new AtomicLong(); + private final AtomicLong obsoleteSpaceSize = new AtomicLong(); + private final AtomicLong obsoleteItemCount = new AtomicLong(); + + public void addTotalSpaceBytes(long bytes) { + totalSpaceSize.addAndGet(bytes); + } + + public void incrementTotalItemCount() { + totalItemCount.incrementAndGet(); + } + + public void addObsoleteSpaceBytes(long bytes) { + obsoleteSpaceSize.addAndGet(bytes); + } + + public void incrementObsoleteItemCount() { + obsoleteItemCount.incrementAndGet(); + } + + public long getTotalSpaceSize() { + return totalSpaceSize.get(); + } + + public long getTotalItemCount() { + return totalItemCount.get(); + } + + public long getObsoleteSpaceSize() { + return obsoleteSpaceSize.get(); + } + + public long getObsoleteItemCount() { + return obsoleteItemCount.get(); + } + + @Override + public String toString() { + return String.format( + """ + DataStats: + Total space: %,d bytes + Total items: %,d + Obsolete space: %,d bytes + Obsolete items: %,d""", + getTotalSpaceSize(), getTotalItemCount(), getObsoleteSpaceSize(), getObsoleteItemCount()); + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ItemData.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ItemData.java new file mode 100644 index 000000000000..8f088556a71f --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ItemData.java @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc; + +import com.hedera.pbj.runtime.io.buffer.Bytes; + +public record ItemData(Type type, Bytes bytes, long location) { + + public enum Type { + P2KV, + P2H, + K2P, + TERMINATOR + } + + public static ItemData poisonPill() { + return new ItemData(Type.TERMINATOR, Bytes.EMPTY, -1L); + } + + public boolean isPoisonPill() { + return type == Type.TERMINATOR; + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ProcessorTask.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ProcessorTask.java new file mode 100644 index 000000000000..a00b11635c12 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ProcessorTask.java @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc; + +import com.hedera.statevalidation.util.reflect.BucketIterator; +import com.swirlds.merkledb.MerkleDbDataSource; +import com.swirlds.merkledb.collections.LongList; +import com.swirlds.merkledb.files.hashmap.ParsedBucket; +import com.swirlds.virtualmap.datasource.VirtualHashRecord; +import com.swirlds.virtualmap.datasource.VirtualLeafBytes; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CountDownLatch; + +public class ProcessorTask implements Runnable { + + private final BlockingQueue dataQueue; + + private final LongList pathToDiskLocationLeafNodes; + private final LongList pathToDiskLocationInternalNodes; + private final LongList bucketIndexToBucketLocation; + + private final DataStats dataStats; + + private final CountDownLatch processorsLatch; + + public ProcessorTask( + BlockingQueue dataQueue, + MerkleDbDataSource vds, + DataStats dataStats, + CountDownLatch processorsLatch) { + this.dataQueue = dataQueue; + + this.pathToDiskLocationLeafNodes = vds.getPathToDiskLocationLeafNodes(); + this.pathToDiskLocationInternalNodes = vds.getPathToDiskLocationInternalNodes(); + this.bucketIndexToBucketLocation = (LongList) vds.getKeyToPath().getBucketIndexToBucketLocation(); + + this.dataStats = dataStats; + + this.processorsLatch = processorsLatch; + } + + @Override + public void run() { + try { + while (true) { + ItemData chunk = dataQueue.take(); + + if (chunk.isPoisonPill()) { + break; + } + + processChunk(chunk); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } finally { + processorsLatch.countDown(); + } + } + + private void processChunk(ItemData data) { + switch (data.type()) { + case P2KV -> processVirtualLeafBytes(data); + case P2H -> processVirtualHashRecord(data); + case K2P -> processBucket(data); + } + } + + private void processVirtualLeafBytes(ItemData data) { + try { + dataStats.addTotalSpaceBytes(data.bytes().length()); + dataStats.incrementTotalItemCount(); + + VirtualLeafBytes virtualLeafBytes = + VirtualLeafBytes.parseFrom(data.bytes().toReadableSequentialData()); + long path = virtualLeafBytes.path(); + + if (data.location() == pathToDiskLocationLeafNodes.get(path)) { + // live object, do something... + System.out.println("P2KV: path=" + path + ", key=" + virtualLeafBytes.keyBytes() + ", value=" + + virtualLeafBytes.valueBytes()); + } else { + // add to wasted items/space + dataStats.addObsoleteSpaceBytes(data.bytes().length()); + dataStats.incrementObsoleteItemCount(); + } + } catch (Exception e) { + e.printStackTrace(); + } + } + + private void processVirtualHashRecord(ItemData data) { + try { + dataStats.addTotalSpaceBytes(data.bytes().length()); + dataStats.incrementTotalItemCount(); + + VirtualHashRecord virtualHashRecord = + VirtualHashRecord.parseFrom(data.bytes().toReadableSequentialData()); + final long path = virtualHashRecord.path(); + + if (data.location() == pathToDiskLocationInternalNodes.get(path)) { + // live object, do something... + System.out.println("P2H: path=" + path + ", hash=" + virtualHashRecord.hash()); + + } else { + // add to wasted items/space + dataStats.addObsoleteSpaceBytes(data.bytes().length()); + dataStats.incrementObsoleteItemCount(); + } + } catch (Exception e) { + e.printStackTrace(); + } + } + + private void processBucket(ItemData data) { + try { + dataStats.addTotalSpaceBytes(data.bytes().length()); + dataStats.incrementTotalItemCount(); + + final ParsedBucket bucket = new ParsedBucket(); + bucket.readFrom(data.bytes().toReadableSequentialData()); + + if (data.location() == bucketIndexToBucketLocation.get(bucket.getBucketIndex())) { + // live object, do something... + var bucketIterator = new BucketIterator(bucket); + while (bucketIterator.hasNext()) { + final ParsedBucket.BucketEntry entry = bucketIterator.next(); + System.out.println("K2P: key=" + entry.getKeyBytes() + ", path=" + entry.getValue()); + } + } else { + // add to wasted items/space + dataStats.addObsoleteSpaceBytes(data.bytes().length()); + dataStats.incrementObsoleteItemCount(); + } + } catch (Exception e) { + e.printStackTrace(); + } + } +} From 3cb224d100d0f11852b03eb2ff0f6e12189035f9 Mon Sep 17 00:00:00 2001 From: Nikita Lebedev Date: Tue, 11 Nov 2025 20:00:54 +0200 Subject: [PATCH 2/6] chunked parallel reading Signed-off-by: Nikita Lebedev --- .../hedera/statevalidation/PocCommand.java | 173 +++++++++--- .../poc/ChunkedFileIterator.java | 251 ++++++++++++++++++ .../hedera/statevalidation/poc/DataStats.java | 40 ++- .../statevalidation/poc/ProcessorTask.java | 15 +- .../merkledb/config/MerkleDbConfig.java | 2 +- .../merkledb/files/DataFileCommon.java | 4 +- 6 files changed, 434 insertions(+), 51 deletions(-) create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ChunkedFileIterator.java diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/PocCommand.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/PocCommand.java index 56875c7261f3..02f1191726b0 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/PocCommand.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/PocCommand.java @@ -3,6 +3,7 @@ import com.hedera.pbj.runtime.io.buffer.BufferedData; import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.statevalidation.poc.ChunkedFileIterator; import com.hedera.statevalidation.poc.DataStats; import com.hedera.statevalidation.poc.ItemData; import com.hedera.statevalidation.poc.ItemData.Type; @@ -10,17 +11,20 @@ import com.hedera.statevalidation.util.StateUtils; import com.swirlds.merkledb.MerkleDbDataSource; import com.swirlds.merkledb.files.DataFileCollection; -import com.swirlds.merkledb.files.DataFileIterator; import com.swirlds.merkledb.files.DataFileReader; import com.swirlds.platform.state.snapshot.DeserializedSignedState; import com.swirlds.state.MerkleNodeState; import com.swirlds.virtualmap.VirtualMap; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import picocli.CommandLine.Command; import picocli.CommandLine.Option; import picocli.CommandLine.ParentCommand; @@ -57,6 +61,7 @@ public void run() { ExecutorService processPool = Executors.newFixedThreadPool(processThreads); long startTime = System.currentTimeMillis(); + AtomicLong totalBoundarySearchMillis = new AtomicLong(0L); // Initialize state and get data file collections parent.initializeStateDir(); @@ -70,12 +75,44 @@ public void run() { DataFileCollection pathToHashDfc = vds.getHashStoreDisk().getFileCollection(); DataFileCollection keyToPathDfc = vds.getKeyToPath().getFileCollection(); - // Count total readers upfront - int totalReaders = pathToKeyValueDfc.getAllCompletedFiles().size() + int totalFiles = pathToKeyValueDfc.getAllCompletedFiles().size() + pathToHashDfc.getAllCompletedFiles().size() + keyToPathDfc.getAllCompletedFiles().size(); - CountDownLatch readerLatch = new CountDownLatch(totalReaders); + System.out.println("P2KV file count: " + pathToKeyValueDfc.getAllCompletedFiles().size()); + System.out.println("P2H file count: " + pathToHashDfc.getAllCompletedFiles().size()); + System.out.println("K2P file count: " + keyToPathDfc.getAllCompletedFiles().size()); + System.out.println("Total files: " + totalFiles); + + long globalTotalSize = pathToKeyValueDfc.getAllCompletedFiles().stream() + .mapToLong(DataFileReader::getSize) + .sum() + + pathToHashDfc.getAllCompletedFiles().stream() + .mapToLong(DataFileReader::getSize) + .sum() + + keyToPathDfc.getAllCompletedFiles().stream() + .mapToLong(DataFileReader::getSize) + .sum(); + + System.out.println("Global total data size: " + globalTotalSize / (1024 * 1024) + " MB"); + + // Plan all tasks (calculate chunks for each file) + List tasks = new ArrayList<>(); + tasks.addAll(planTasksFor(pathToKeyValueDfc, Type.P2KV, ioThreads, globalTotalSize)); + tasks.addAll(planTasksFor(pathToHashDfc, Type.P2H, ioThreads, globalTotalSize)); + tasks.addAll(planTasksFor(keyToPathDfc, Type.K2P, ioThreads, globalTotalSize)); + + // Sort tasks: largest chunks first (better thread utilization) + tasks.sort((a, b) -> Long.compare( + b.endByte - b.startByte, + a.endByte - a.startByte + )); + + int totalTasks = tasks.size(); + + System.out.println("Total tasks: " + totalTasks); + + CountDownLatch readerLatch = new CountDownLatch(totalTasks); CountDownLatch processorsLatch = new CountDownLatch(processThreads); DataStats dataStats = new DataStats(); @@ -85,10 +122,24 @@ public void run() { processPool.submit(new ProcessorTask(dataQueue, vds, dataStats, processorsLatch)); } - // Submit reader tasks - submitReaderTasksFor(pathToKeyValueDfc, dataQueue, Type.P2KV, ioPool, readerLatch); - submitReaderTasksFor(pathToHashDfc, dataQueue, Type.P2H, ioPool, readerLatch); - submitReaderTasksFor(keyToPathDfc, dataQueue, Type.K2P, ioPool, readerLatch); + // Submit with chunking + // Submit all planned tasks + for (FileReadTask task : tasks) { + ioPool.submit(() -> { + try { + readFileChunk(task.reader, dataQueue, task.type, task.startByte, task.endByte, + totalBoundarySearchMillis); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Reader interrupted", e); + } catch (Exception e) { + throw new RuntimeException("Reader failed for chunk " + + task.startByte + "-" + task.endByte, e); + } finally { + readerLatch.countDown(); + } + }); + } // Wait for all readers to finish readerLatch.await(); @@ -110,41 +161,95 @@ public void run() { } System.out.println(dataStats); - long elapsedTime = System.currentTimeMillis() - startTime; - System.out.println("Total processing time: " + elapsedTime + " ms"); + System.out.println("Total boundary search time: " + totalBoundarySearchMillis.get() + " ms"); + System.out.println("Total processing time: " + (System.currentTimeMillis() - startTime) + " ms"); } catch (Exception e) { throw new RuntimeException(e); } } - private static void submitReaderTasksFor( + // Helper: Plan tasks for one collection + private List planTasksFor( DataFileCollection dfc, - BlockingQueue dataQueue, ItemData.Type dataType, - ExecutorService ioPool, - CountDownLatch readerLatch) { + int ioThreads, + long globalTotalSize) { + + List tasks = new ArrayList<>(); + for (DataFileReader reader : dfc.getAllCompletedFiles()) { - ioPool.submit(() -> { - try { - try (DataFileIterator dataIterator = reader.createIterator()) { - while (dataIterator.next()) { - BufferedData originalData = dataIterator.getDataItemData(); - Bytes dataCopy = originalData.getBytes(0, originalData.remaining()); - - ItemData itemData = - new ItemData(dataType, dataCopy, dataIterator.getDataItemDataLocation()); - dataQueue.put(itemData); - } - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException("Reader interrupted", e); - } catch (Exception e) { - throw new RuntimeException("Reader failed", e); - } finally { - readerLatch.countDown(); + long fileSize = reader.getSize(); + if (fileSize == 0) { + continue; + } + + // Calculate optimal chunks using GLOBAL total + int chunks = calculateOptimalChunks(reader, ioThreads, globalTotalSize); + long chunkSize = (fileSize + chunks - 1) / chunks; + + System.out.println( + "File: " + reader.getPath().getFileName() + " size: " + fileSize / (1024 * 1024) + " chunks: " + + chunks + " chunkSize: " + chunkSize / (1024 * 1024)); + + // Create tasks for each chunk + for (int i = 0; i < chunks; i++) { + long startByte = i * chunkSize; + long endByte = Math.min(startByte + chunkSize, fileSize); + + if (startByte >= fileSize) { + continue; } - }); + + tasks.add(new FileReadTask(reader, dataType, startByte, endByte)); + } + } + + return tasks; + } + + private int calculateOptimalChunks( + DataFileReader reader, + int ioThreads, + long globalTotalDataSize) { + + long fileSize = reader.getSize(); + long targetChunkSize = globalTotalDataSize / (ioThreads * 4); + + if (fileSize < targetChunkSize) { + return 1; + } + + return (int) Math.ceil((double) fileSize / targetChunkSize); + } + + private static void readFileChunk( + DataFileReader reader, + BlockingQueue dataQueue, + Type dataType, + long startByte, + long endByte, AtomicLong totalBoundarySearchMillis) + throws IOException, InterruptedException { + + try (ChunkedFileIterator iterator = + new ChunkedFileIterator(reader.getPath(), + reader.getMetadata(), dataType, startByte, endByte, totalBoundarySearchMillis)) { + + while (iterator.next()) { + BufferedData originalData = iterator.getDataItemData(); + Bytes dataCopy = originalData.getBytes(0, originalData.remaining()); + + ItemData itemData = new ItemData(dataType, dataCopy, iterator.getDataItemDataLocation()); + dataQueue.put(itemData); + } } } + + // Helper record to hold task information + private record FileReadTask( + DataFileReader reader, + ItemData.Type type, + long startByte, + long endByte + ) { + } } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ChunkedFileIterator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ChunkedFileIterator.java new file mode 100644 index 000000000000..7b9e926b91aa --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ChunkedFileIterator.java @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc; + +import static com.hedera.pbj.runtime.ProtoParserTools.TAG_FIELD_OFFSET; +import static com.swirlds.merkledb.files.DataFileCommon.FIELD_DATAFILE_ITEMS; +import static com.swirlds.merkledb.files.DataFileCommon.FIELD_DATAFILE_METADATA; + +import com.hedera.pbj.runtime.ProtoConstants; +import com.hedera.pbj.runtime.io.ReadableSequentialData; +import com.hedera.pbj.runtime.io.buffer.BufferedData; +import com.hedera.pbj.runtime.io.stream.ReadableStreamingData; +import com.hedera.statevalidation.poc.ItemData.Type; +import com.swirlds.merkledb.files.DataFileCommon; +import com.swirlds.merkledb.files.DataFileMetadata; +import com.swirlds.merkledb.files.hashmap.Bucket; +import com.swirlds.merkledb.files.hashmap.ParsedBucket; +import com.swirlds.virtualmap.datasource.VirtualHashRecord; +import com.swirlds.virtualmap.datasource.VirtualLeafBytes; +import java.io.BufferedInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.concurrent.atomic.AtomicLong; + +public class ChunkedFileIterator implements AutoCloseable { + private final static int BUFFER_SIZE = 128 * 1024; + + private final FileChannel channel; + private final DataFileMetadata metadata; + + private long startByte; + private final long endByte; + + private final ItemData.Type dataType; + + private BufferedInputStream bufferedInputStream; + private ReadableSequentialData in; + private BufferedData dataItemBuffer; + private long currentDataItemFilePosition; + private boolean closed = false; + + public ChunkedFileIterator( + Path path, DataFileMetadata metadata, Type dataType, long startByte, long endByte, + AtomicLong totalBoundarySearchMillis) + throws IOException { + this.channel = FileChannel.open(path, StandardOpenOption.READ); + this.metadata = metadata; + + this.startByte = startByte; + this.endByte = endByte; + + this.dataType = dataType; + + if (startByte > 0) { + // Find boundary, then position channel and open streams + long startTime = System.currentTimeMillis(); + this.startByte += findBoundaryOffset(); + long boundaryOffsetSearchTime = System.currentTimeMillis() - startTime; +// System.out.println("Found boundary offset in:" + boundaryOffsetSearchTime + " ms"); + totalBoundarySearchMillis.addAndGet(boundaryOffsetSearchTime); + channel.position(this.startByte); + openStreams(); + } else { + // At file start + channel.position(startByte); + openStreams(); + } + } + + private void openStreams() { + var channelStream = Channels.newInputStream(channel); + this.bufferedInputStream = new BufferedInputStream(channelStream, BUFFER_SIZE); + this.in = new ReadableStreamingData(bufferedInputStream); + } + + private long findBoundaryOffset() throws IOException { + // Use buffer to minimize disk I/O and channel repositioning + // It should account for boundary + full data item to validate its proto schema + ByteBuffer scanBuffer = ByteBuffer.allocate(BUFFER_SIZE); + + // Read large chunk at current position + scanBuffer.clear(); + channel.position(startByte); + int bytesRead = channel.read(scanBuffer); + if (bytesRead <= 0) { + throw new IOException("No valid data item boundary found in chunk"); + } + + scanBuffer.flip(); + BufferedData bufferData = BufferedData.wrap(scanBuffer); + + // Scan through buffer looking for valid boundary + while (bufferData.hasRemaining()) { + long positionInBuffer = bufferData.position(); + + try { + int tag = bufferData.readVarInt(false); + int fieldNum = tag >> TAG_FIELD_OFFSET; + + if ((fieldNum == FIELD_DATAFILE_ITEMS.number()) + && ((tag & ProtoConstants.TAG_WIRE_TYPE_MASK) == ProtoConstants.WIRE_TYPE_DELIMITED.ordinal())) { + int dataItemSize = bufferData.readVarInt(false); + long dataStartPosition = bufferData.position(); + + if (dataItemSize > 0 && (dataStartPosition + dataItemSize <= bufferData.limit())) { + bufferData.limit(dataStartPosition + dataItemSize); + long savedPos = bufferData.position(); + + if (isValidDataItem(bufferData)) { +// System.out.println( +// "Found valid item at " + positionInBuffer + " data size: " + dataItemSize); + return positionInBuffer; + } + + bufferData.position(savedPos); + bufferData.limit(bytesRead); + } + } + + // Not found, advance by 1 byte + bufferData.position(positionInBuffer + 1); + + } catch (Exception e) { + // Parsing failed, advance by 1 byte + bufferData.position(positionInBuffer + 1); + } + } + + throw new IOException("No valid data item boundary found in chunk"); + } + + private boolean isValidDataItem(BufferedData buffer) { + try { + if (!buffer.hasRemaining()) { + return false; + } + + return switch (dataType) { + case P2H -> validateVirtualHashRecord(buffer); + case P2KV -> validateVirtualLeafBytes(buffer); + case K2P -> validateBucket(buffer); + default -> throw new IllegalStateException("Unexpected data type: " + dataType); + }; + + } catch (Exception e) { + // Any parsing exception means invalid data + return false; + } + } + + private boolean validateVirtualHashRecord(BufferedData buffer) { + try { + VirtualHashRecord.parseFrom(buffer); + return true; + } catch (Exception e) { + return false; + } + } + + private boolean validateVirtualLeafBytes(BufferedData buffer) { + try { + VirtualLeafBytes.parseFrom(buffer); + return true; + } catch (Exception e) { + return false; + } + } + + private boolean validateBucket(BufferedData buffer) { + try { + final Bucket bucket = new ParsedBucket(); + bucket.readFrom(buffer); + return true; + } catch (Exception e) { + return false; + } + } + + public boolean next() throws IOException { + if (closed) { + throw new IllegalStateException("Cannot read from a closed iterator"); + } + + while (in.hasRemaining()) { + currentDataItemFilePosition = startByte + in.position(); + + if (currentDataItemFilePosition >= endByte) { + return false; + } + + final int tag = in.readVarInt(false); + final int fieldNum = tag >> TAG_FIELD_OFFSET; + + if (fieldNum == FIELD_DATAFILE_ITEMS.number()) { + final int dataItemSize = in.readVarInt(false); + dataItemBuffer = fillBuffer(dataItemSize); + return true; + } else if (fieldNum == FIELD_DATAFILE_METADATA.number()) { + final int metadataSize = in.readVarInt(false); + in.skip(metadataSize); + } else { + throw new IllegalArgumentException("Unknown data file field: " + fieldNum); + } + } + + return false; + } + + public BufferedData getDataItemData() { + return dataItemBuffer; + } + + public long getDataItemDataLocation() { + return DataFileCommon.dataLocation(metadata.getIndex(), currentDataItemFilePosition); + } + + @Override + public void close() throws IOException { + if (!closed) { + closed = true; + dataItemBuffer = null; + if (bufferedInputStream != null) { + bufferedInputStream.close(); + } + channel.close(); + } + } + + private BufferedData fillBuffer(int bytesToRead) throws IOException { + if (bytesToRead <= 0) { + throw new IOException("Malformed data, requested bytes: " + bytesToRead); + } + + if (dataItemBuffer == null || dataItemBuffer.capacity() < bytesToRead) { + dataItemBuffer = BufferedData.allocate(bytesToRead); + } + + dataItemBuffer.position(0); + dataItemBuffer.limit(bytesToRead); + final long bytesRead = in.readBytes(dataItemBuffer); + if (bytesRead != bytesToRead) { + throw new IOException("Couldn't read " + bytesToRead + " bytes, only read " + bytesRead); + } + + dataItemBuffer.position(0); + return dataItemBuffer; + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/DataStats.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/DataStats.java index becd257d9267..c61e404994f4 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/DataStats.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/DataStats.java @@ -8,6 +8,9 @@ public final class DataStats { private final AtomicLong totalItemCount = new AtomicLong(); private final AtomicLong obsoleteSpaceSize = new AtomicLong(); private final AtomicLong obsoleteItemCount = new AtomicLong(); + private final AtomicLong p2kvFailedToProcessCount = new AtomicLong(); + private final AtomicLong p2hFailedToProcessCount = new AtomicLong(); + private final AtomicLong k2pFailedToProcessCount = new AtomicLong(); public void addTotalSpaceBytes(long bytes) { totalSpaceSize.addAndGet(bytes); @@ -25,6 +28,18 @@ public void incrementObsoleteItemCount() { obsoleteItemCount.incrementAndGet(); } + public void incrementP2kvFailedToProcessCount() { + p2kvFailedToProcessCount.incrementAndGet(); + } + + public void incrementP2hFailedToProcessCount() { + p2hFailedToProcessCount.incrementAndGet(); + } + + public void incrementK2pFailedToProcessCount() { + k2pFailedToProcessCount.incrementAndGet(); + } + public long getTotalSpaceSize() { return totalSpaceSize.get(); } @@ -41,6 +56,18 @@ public long getObsoleteItemCount() { return obsoleteItemCount.get(); } + public long getP2kvFailedToProcessCount() { + return p2kvFailedToProcessCount.get(); + } + + public long getP2hFailedToProcessCount() { + return p2hFailedToProcessCount.get(); + } + + public long getK2pFailedToProcessCount() { + return k2pFailedToProcessCount.get(); + } + @Override public String toString() { return String.format( @@ -49,7 +76,16 @@ public String toString() { Total space: %,d bytes Total items: %,d Obsolete space: %,d bytes - Obsolete items: %,d""", - getTotalSpaceSize(), getTotalItemCount(), getObsoleteSpaceSize(), getObsoleteItemCount()); + Obsolete items: %,d + P2KV items failed to process: %,d + P2H items failed to process: %,d + K2P items failed to process: %,d""", + getTotalSpaceSize(), + getTotalItemCount(), + getObsoleteSpaceSize(), + getObsoleteItemCount(), + getP2kvFailedToProcessCount(), + getP2hFailedToProcessCount(), + getK2pFailedToProcessCount()); } } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ProcessorTask.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ProcessorTask.java index a00b11635c12..95b3a5335335 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ProcessorTask.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ProcessorTask.java @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 package com.hedera.statevalidation.poc; -import com.hedera.statevalidation.util.reflect.BucketIterator; import com.swirlds.merkledb.MerkleDbDataSource; import com.swirlds.merkledb.collections.LongList; import com.swirlds.merkledb.files.hashmap.ParsedBucket; @@ -76,15 +75,13 @@ private void processVirtualLeafBytes(ItemData data) { if (data.location() == pathToDiskLocationLeafNodes.get(path)) { // live object, do something... - System.out.println("P2KV: path=" + path + ", key=" + virtualLeafBytes.keyBytes() + ", value=" - + virtualLeafBytes.valueBytes()); } else { // add to wasted items/space dataStats.addObsoleteSpaceBytes(data.bytes().length()); dataStats.incrementObsoleteItemCount(); } } catch (Exception e) { - e.printStackTrace(); + dataStats.incrementP2kvFailedToProcessCount(); } } @@ -99,7 +96,6 @@ private void processVirtualHashRecord(ItemData data) { if (data.location() == pathToDiskLocationInternalNodes.get(path)) { // live object, do something... - System.out.println("P2H: path=" + path + ", hash=" + virtualHashRecord.hash()); } else { // add to wasted items/space @@ -107,7 +103,7 @@ private void processVirtualHashRecord(ItemData data) { dataStats.incrementObsoleteItemCount(); } } catch (Exception e) { - e.printStackTrace(); + dataStats.incrementP2hFailedToProcessCount(); } } @@ -121,18 +117,13 @@ private void processBucket(ItemData data) { if (data.location() == bucketIndexToBucketLocation.get(bucket.getBucketIndex())) { // live object, do something... - var bucketIterator = new BucketIterator(bucket); - while (bucketIterator.hasNext()) { - final ParsedBucket.BucketEntry entry = bucketIterator.next(); - System.out.println("K2P: key=" + entry.getKeyBytes() + ", path=" + entry.getValue()); - } } else { // add to wasted items/space dataStats.addObsoleteSpaceBytes(data.bytes().length()); dataStats.incrementObsoleteItemCount(); } } catch (Exception e) { - e.printStackTrace(); + dataStats.incrementK2pFailedToProcessCount(); } } } diff --git a/platform-sdk/swirlds-merkledb/src/main/java/com/swirlds/merkledb/config/MerkleDbConfig.java b/platform-sdk/swirlds-merkledb/src/main/java/com/swirlds/merkledb/config/MerkleDbConfig.java index ae9ca7ff2ae4..a71638e361e8 100644 --- a/platform-sdk/swirlds-merkledb/src/main/java/com/swirlds/merkledb/config/MerkleDbConfig.java +++ b/platform-sdk/swirlds-merkledb/src/main/java/com/swirlds/merkledb/config/MerkleDbConfig.java @@ -73,7 +73,7 @@ public record MerkleDbConfig( @Positive @ConfigProperty(defaultValue = "1000000000") long initialCapacity, @Positive @ConfigProperty(defaultValue = "4000000000") long maxNumOfKeys, - @Min(0) @ConfigProperty(defaultValue = "8388608") long hashesRamToDiskThreshold, + @Min(0) @ConfigProperty(defaultValue = "0") long hashesRamToDiskThreshold, @Positive @ConfigProperty(defaultValue = "1000000") int hashStoreRamBufferSize, @ConfigProperty(defaultValue = "true") boolean hashStoreRamOffHeapBuffers, @Positive @ConfigProperty(defaultValue = "" + MEBIBYTES_TO_BYTES) int longListChunkSize, diff --git a/platform-sdk/swirlds-merkledb/src/main/java/com/swirlds/merkledb/files/DataFileCommon.java b/platform-sdk/swirlds-merkledb/src/main/java/com/swirlds/merkledb/files/DataFileCommon.java index 41bab119c137..1e015c7a4986 100644 --- a/platform-sdk/swirlds-merkledb/src/main/java/com/swirlds/merkledb/files/DataFileCommon.java +++ b/platform-sdk/swirlds-merkledb/src/main/java/com/swirlds/merkledb/files/DataFileCommon.java @@ -77,9 +77,9 @@ public final class DataFileCommon { public static final int PAGE_SIZE = 4096; // Data file protobuf fields - static final FieldDefinition FIELD_DATAFILE_METADATA = + public static final FieldDefinition FIELD_DATAFILE_METADATA = new FieldDefinition("metadata", FieldType.MESSAGE, false, false, false, 1); - static final FieldDefinition FIELD_DATAFILE_ITEMS = + public static final FieldDefinition FIELD_DATAFILE_ITEMS = new FieldDefinition("items", FieldType.MESSAGE, true, true, false, 11); private DataFileCommon() { From a5be4d1178e1827763f7cdad11a704759247e8b2 Mon Sep 17 00:00:00 2001 From: Nikita Lebedev Date: Wed, 26 Nov 2025 13:23:18 +0200 Subject: [PATCH 3/6] add validators which can run in parallel Signed-off-by: Nikita Lebedev --- .../hedera/statevalidation/PocCommand.java | 255 --------- .../statevalidation/StateOperatorCommand.java | 2 +- .../statevalidation/Validate2Command.java | 488 ++++++++++++++++++ .../hedera/statevalidation/poc/DataStats.java | 91 ---- .../statevalidation/poc/ProcessorTask.java | 129 ----- .../listener/LoggingValidationListener.java | 32 ++ .../poc/listener/ValidationListener.java | 13 + .../statevalidation/poc/model/DataStats.java | 142 +++++ .../poc/{ => model}/ItemData.java | 2 +- .../{ => pipeline}/ChunkedFileIterator.java | 52 +- .../poc/pipeline/ProcessorTask.java | 230 +++++++++ .../poc/util/ValidationAssertions.java | 137 +++++ .../poc/util/ValidationException.java | 21 + .../validator/AccountAndSupplyValidator.java | 94 ++++ .../poc/validator/EntityIdCountValidator.java | 128 +++++ .../EntityIdUniquenessValidator.java | 176 +++++++ .../HashRecordIntegrityValidator.java | 39 ++ .../HdhmBucketIntegrityValidator.java | 234 +++++++++ .../LeafBytesIntegrityValidator.java | 75 +++ .../TokenRelationsIntegrityValidator.java | 114 ++++ .../validator/api/HashRecordValidator.java | 13 + .../validator/api/HdhmBucketValidator.java | 9 + .../poc/validator/api/LeafBytesValidator.java | 9 + .../poc/validator/api/Validator.java | 21 + .../hedera/statevalidation/util/LogUtils.java | 21 + .../src/main/resources/log4j2.xml | 9 +- 26 files changed, 2034 insertions(+), 502 deletions(-) delete mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/PocCommand.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java delete mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/DataStats.java delete mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ProcessorTask.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/LoggingValidationListener.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationListener.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/model/DataStats.java rename hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/{ => model}/ItemData.java (90%) rename hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/{ => pipeline}/ChunkedFileIterator.java (81%) create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ProcessorTask.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationAssertions.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationException.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/AccountAndSupplyValidator.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdCountValidator.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdUniquenessValidator.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HashRecordIntegrityValidator.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HdhmBucketIntegrityValidator.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/LeafBytesIntegrityValidator.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/TokenRelationsIntegrityValidator.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HashRecordValidator.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HdhmBucketValidator.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/LeafBytesValidator.java create mode 100644 hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/Validator.java diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/PocCommand.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/PocCommand.java deleted file mode 100644 index 02f1191726b0..000000000000 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/PocCommand.java +++ /dev/null @@ -1,255 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -package com.hedera.statevalidation; - -import com.hedera.pbj.runtime.io.buffer.BufferedData; -import com.hedera.pbj.runtime.io.buffer.Bytes; -import com.hedera.statevalidation.poc.ChunkedFileIterator; -import com.hedera.statevalidation.poc.DataStats; -import com.hedera.statevalidation.poc.ItemData; -import com.hedera.statevalidation.poc.ItemData.Type; -import com.hedera.statevalidation.poc.ProcessorTask; -import com.hedera.statevalidation.util.StateUtils; -import com.swirlds.merkledb.MerkleDbDataSource; -import com.swirlds.merkledb.files.DataFileCollection; -import com.swirlds.merkledb.files.DataFileReader; -import com.swirlds.platform.state.snapshot.DeserializedSignedState; -import com.swirlds.state.MerkleNodeState; -import com.swirlds.virtualmap.VirtualMap; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; -import picocli.CommandLine.Command; -import picocli.CommandLine.Option; -import picocli.CommandLine.ParentCommand; - -@Command(name = "poc") -public class PocCommand implements Runnable { - - @ParentCommand - private StateOperatorCommand parent; - - @Option( - names = {"-io", "--io-threads"}, - description = "Number of IO threads for reading from disk.") - private int ioThreads = 2; - - @Option( - names = {"-p", "--process-threads"}, - description = "Number of CPU threads for processing chunks.") - private int processThreads = 2; - - @Option( - names = {"-b", "--queue-capacity"}, - description = "Queue capacity for backpressure control.") - private int queueCapacity = 1000; - - private PocCommand() {} - - @Override - public void run() { - try { - BlockingQueue dataQueue = new LinkedBlockingQueue<>(queueCapacity); - - ExecutorService ioPool = Executors.newFixedThreadPool(ioThreads); - ExecutorService processPool = Executors.newFixedThreadPool(processThreads); - - long startTime = System.currentTimeMillis(); - AtomicLong totalBoundarySearchMillis = new AtomicLong(0L); - - // Initialize state and get data file collections - parent.initializeStateDir(); - DeserializedSignedState deserializedSignedState = StateUtils.getDeserializedSignedState(); - MerkleNodeState state = - deserializedSignedState.reservedSignedState().get().getState(); - VirtualMap virtualMap = (VirtualMap) state.getRoot(); - MerkleDbDataSource vds = (MerkleDbDataSource) virtualMap.getDataSource(); - - DataFileCollection pathToKeyValueDfc = vds.getPathToKeyValue().getFileCollection(); - DataFileCollection pathToHashDfc = vds.getHashStoreDisk().getFileCollection(); - DataFileCollection keyToPathDfc = vds.getKeyToPath().getFileCollection(); - - int totalFiles = pathToKeyValueDfc.getAllCompletedFiles().size() - + pathToHashDfc.getAllCompletedFiles().size() - + keyToPathDfc.getAllCompletedFiles().size(); - - System.out.println("P2KV file count: " + pathToKeyValueDfc.getAllCompletedFiles().size()); - System.out.println("P2H file count: " + pathToHashDfc.getAllCompletedFiles().size()); - System.out.println("K2P file count: " + keyToPathDfc.getAllCompletedFiles().size()); - System.out.println("Total files: " + totalFiles); - - long globalTotalSize = pathToKeyValueDfc.getAllCompletedFiles().stream() - .mapToLong(DataFileReader::getSize) - .sum() - + pathToHashDfc.getAllCompletedFiles().stream() - .mapToLong(DataFileReader::getSize) - .sum() - + keyToPathDfc.getAllCompletedFiles().stream() - .mapToLong(DataFileReader::getSize) - .sum(); - - System.out.println("Global total data size: " + globalTotalSize / (1024 * 1024) + " MB"); - - // Plan all tasks (calculate chunks for each file) - List tasks = new ArrayList<>(); - tasks.addAll(planTasksFor(pathToKeyValueDfc, Type.P2KV, ioThreads, globalTotalSize)); - tasks.addAll(planTasksFor(pathToHashDfc, Type.P2H, ioThreads, globalTotalSize)); - tasks.addAll(planTasksFor(keyToPathDfc, Type.K2P, ioThreads, globalTotalSize)); - - // Sort tasks: largest chunks first (better thread utilization) - tasks.sort((a, b) -> Long.compare( - b.endByte - b.startByte, - a.endByte - a.startByte - )); - - int totalTasks = tasks.size(); - - System.out.println("Total tasks: " + totalTasks); - - CountDownLatch readerLatch = new CountDownLatch(totalTasks); - CountDownLatch processorsLatch = new CountDownLatch(processThreads); - - DataStats dataStats = new DataStats(); - - // Start processor threads - for (int i = 0; i < processThreads; i++) { - processPool.submit(new ProcessorTask(dataQueue, vds, dataStats, processorsLatch)); - } - - // Submit with chunking - // Submit all planned tasks - for (FileReadTask task : tasks) { - ioPool.submit(() -> { - try { - readFileChunk(task.reader, dataQueue, task.type, task.startByte, task.endByte, - totalBoundarySearchMillis); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException("Reader interrupted", e); - } catch (Exception e) { - throw new RuntimeException("Reader failed for chunk " - + task.startByte + "-" + task.endByte, e); - } finally { - readerLatch.countDown(); - } - }); - } - - // Wait for all readers to finish - readerLatch.await(); - ioPool.shutdown(); - if (!ioPool.awaitTermination(1, TimeUnit.MINUTES)) { - throw new RuntimeException("IO pool did not terminate within timeout"); - } - - // Send one poison pill per processor - for (int i = 0; i < processThreads; i++) { - dataQueue.put(ItemData.poisonPill()); - } - - // Wait for processors to finish - processorsLatch.await(); - processPool.shutdown(); - if (!processPool.awaitTermination(1, TimeUnit.MINUTES)) { - throw new RuntimeException("Process pool did not terminate within timeout"); - } - - System.out.println(dataStats); - System.out.println("Total boundary search time: " + totalBoundarySearchMillis.get() + " ms"); - System.out.println("Total processing time: " + (System.currentTimeMillis() - startTime) + " ms"); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - // Helper: Plan tasks for one collection - private List planTasksFor( - DataFileCollection dfc, - ItemData.Type dataType, - int ioThreads, - long globalTotalSize) { - - List tasks = new ArrayList<>(); - - for (DataFileReader reader : dfc.getAllCompletedFiles()) { - long fileSize = reader.getSize(); - if (fileSize == 0) { - continue; - } - - // Calculate optimal chunks using GLOBAL total - int chunks = calculateOptimalChunks(reader, ioThreads, globalTotalSize); - long chunkSize = (fileSize + chunks - 1) / chunks; - - System.out.println( - "File: " + reader.getPath().getFileName() + " size: " + fileSize / (1024 * 1024) + " chunks: " - + chunks + " chunkSize: " + chunkSize / (1024 * 1024)); - - // Create tasks for each chunk - for (int i = 0; i < chunks; i++) { - long startByte = i * chunkSize; - long endByte = Math.min(startByte + chunkSize, fileSize); - - if (startByte >= fileSize) { - continue; - } - - tasks.add(new FileReadTask(reader, dataType, startByte, endByte)); - } - } - - return tasks; - } - - private int calculateOptimalChunks( - DataFileReader reader, - int ioThreads, - long globalTotalDataSize) { - - long fileSize = reader.getSize(); - long targetChunkSize = globalTotalDataSize / (ioThreads * 4); - - if (fileSize < targetChunkSize) { - return 1; - } - - return (int) Math.ceil((double) fileSize / targetChunkSize); - } - - private static void readFileChunk( - DataFileReader reader, - BlockingQueue dataQueue, - Type dataType, - long startByte, - long endByte, AtomicLong totalBoundarySearchMillis) - throws IOException, InterruptedException { - - try (ChunkedFileIterator iterator = - new ChunkedFileIterator(reader.getPath(), - reader.getMetadata(), dataType, startByte, endByte, totalBoundarySearchMillis)) { - - while (iterator.next()) { - BufferedData originalData = iterator.getDataItemData(); - Bytes dataCopy = originalData.getBytes(0, originalData.remaining()); - - ItemData itemData = new ItemData(dataType, dataCopy, iterator.getDataItemDataLocation()); - dataQueue.put(itemData); - } - } - } - - // Helper record to hold task information - private record FileReadTask( - DataFileReader reader, - ItemData.Type type, - long startByte, - long endByte - ) { - } -} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/StateOperatorCommand.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/StateOperatorCommand.java index a0263f77bc48..7a19d7c69d13 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/StateOperatorCommand.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/StateOperatorCommand.java @@ -19,7 +19,7 @@ SortedExportCommand.class, CompactionCommand.class, ApplyBlocksCommand.class, - PocCommand.class + Validate2Command.class }, description = "CLI tool with validation and introspection modes.") public class StateOperatorCommand implements Runnable { diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java new file mode 100644 index 000000000000..2c2f3c878dfe --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java @@ -0,0 +1,488 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation; + +import static com.hedera.statevalidation.poc.validator.AccountAndSupplyValidator.ACCOUNT_TAG; +import static com.hedera.statevalidation.poc.validator.EntityIdCountValidator.ENTITY_ID_COUNT_TAG; +import static com.hedera.statevalidation.poc.validator.EntityIdUniquenessValidator.ENTITY_ID_UNIQUENESS_TAG; +import static com.hedera.statevalidation.poc.validator.HashRecordIntegrityValidator.INTERNAL_TAG; +import static com.hedera.statevalidation.poc.validator.HdhmBucketIntegrityValidator.HDHM_TAG; +import static com.hedera.statevalidation.poc.validator.LeafBytesIntegrityValidator.LEAF_TAG; +import static com.hedera.statevalidation.poc.validator.TokenRelationsIntegrityValidator.TOKEN_RELATIONS_TAG; +import static com.swirlds.base.units.UnitConstants.BYTES_TO_MEBIBYTES; +import static com.swirlds.base.units.UnitConstants.MEBIBYTES_TO_BYTES; + +import com.hedera.pbj.runtime.io.buffer.BufferedData; +import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.statevalidation.poc.listener.LoggingValidationListener; +import com.hedera.statevalidation.poc.listener.ValidationListener; +import com.hedera.statevalidation.poc.model.DataStats; +import com.hedera.statevalidation.poc.model.ItemData; +import com.hedera.statevalidation.poc.model.ItemData.Type; +import com.hedera.statevalidation.poc.pipeline.ChunkedFileIterator; +import com.hedera.statevalidation.poc.pipeline.ProcessorTask; +import com.hedera.statevalidation.poc.util.ValidationException; +import com.hedera.statevalidation.poc.validator.AccountAndSupplyValidator; +import com.hedera.statevalidation.poc.validator.EntityIdCountValidator; +import com.hedera.statevalidation.poc.validator.EntityIdUniquenessValidator; +import com.hedera.statevalidation.poc.validator.HashRecordIntegrityValidator; +import com.hedera.statevalidation.poc.validator.HdhmBucketIntegrityValidator; +import com.hedera.statevalidation.poc.validator.LeafBytesIntegrityValidator; +import com.hedera.statevalidation.poc.validator.TokenRelationsIntegrityValidator; +import com.hedera.statevalidation.poc.validator.api.Validator; +import com.hedera.statevalidation.util.StateUtils; +import com.swirlds.merkledb.MerkleDbDataSource; +import com.swirlds.merkledb.files.DataFileCollection; +import com.swirlds.merkledb.files.DataFileReader; +import com.swirlds.platform.state.snapshot.DeserializedSignedState; +import com.swirlds.state.MerkleNodeState; +import com.swirlds.virtualmap.VirtualMap; +import edu.umd.cs.findbugs.annotations.NonNull; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CopyOnWriteArraySet; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import picocli.CommandLine; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.ParentCommand; + +@SuppressWarnings("FieldMayBeFinal") +@Command( + name = "validate2", + mixinStandardHelpOptions = true, + description = "Validate command v2. Validates the state by running some of the validators in parallel.") +public class Validate2Command implements Runnable { + + private static final Logger log = LogManager.getLogger(Validate2Command.class); + + @ParentCommand + private StateOperatorCommand parent; + + @Option( + names = {"-io", "--io-threads"}, + description = "Number of IO threads for reading from disk.") + private int ioThreads = 4; + + @Option( + names = {"-p", "--process-threads"}, + description = "Number of CPU threads for processing chunks.") + private int processThreads = 6; + + @Option( + names = {"-q", "--queue-capacity"}, + description = "Queue capacity for backpressure control.") + private int queueCapacity = 1000; + + @Option( + names = {"-b", "--batch-size"}, + description = "Batch size for processing items.") + private int batchSize = 10; + + @CommandLine.Parameters( + arity = "1..*", + description = "Tag to run: [" + + INTERNAL_TAG + + ", " + + LEAF_TAG + + ", " + + HDHM_TAG + + ", " + + ACCOUNT_TAG + + ", " + + TOKEN_RELATIONS_TAG + + ", " + + ENTITY_ID_COUNT_TAG + + ", " + + ENTITY_ID_UNIQUENESS_TAG + + "]") + private String[] tags = { + INTERNAL_TAG, + LEAF_TAG, + HDHM_TAG, + ACCOUNT_TAG, + TOKEN_RELATIONS_TAG, + ENTITY_ID_COUNT_TAG, + ENTITY_ID_UNIQUENESS_TAG + }; + + private Validate2Command() {} + + @Override + public void run() { + try { + try (ExecutorService ioPool = Executors.newFixedThreadPool(ioThreads)) { + try (ExecutorService processPool = Executors.newFixedThreadPool(processThreads)) { + final BlockingQueue> dataQueue = new LinkedBlockingQueue<>(queueCapacity); + + final long startTime = System.currentTimeMillis(); + final AtomicLong totalBoundarySearchMillis = new AtomicLong(0L); + + // Initialize state and get data file collections + parent.initializeStateDir(); + final DeserializedSignedState deserializedSignedState = StateUtils.getDeserializedSignedState(); + final MerkleNodeState state = + deserializedSignedState.reservedSignedState().get().getState(); + final VirtualMap virtualMap = (VirtualMap) state.getRoot(); + final MerkleDbDataSource vds = (MerkleDbDataSource) virtualMap.getDataSource(); + + final DataFileCollection pathToKeyValueDfc = + vds.getPathToKeyValue().getFileCollection(); + final DataFileCollection pathToHashDfc = + vds.getHashStoreDisk().getFileCollection(); + final DataFileCollection keyToPathDfc = vds.getKeyToPath().getFileCollection(); + + // Initialize validators and listeners + final List validationListeners = List.of(new LoggingValidationListener()); + final Map> validators = + createAndInitValidators(state, tags, validationListeners); + + int totalFiles = 0; + long globalTotalSize = 0L; + final List fileReadTasks = new ArrayList<>(); + + if (validators.containsKey(Type.P2KV)) { + totalFiles += pathToKeyValueDfc.getAllCompletedFiles().size(); + globalTotalSize += pathToKeyValueDfc.getAllCompletedFiles().stream() + .mapToLong(DataFileReader::getSize) + .sum(); + log.debug( + "P2KV file count: {}", + pathToKeyValueDfc.getAllCompletedFiles().size()); + } + if (validators.containsKey(Type.P2H)) { + totalFiles += pathToHashDfc.getAllCompletedFiles().size(); + globalTotalSize += pathToHashDfc.getAllCompletedFiles().stream() + .mapToLong(DataFileReader::getSize) + .sum(); + log.debug( + "P2H file count: {}", + pathToHashDfc.getAllCompletedFiles().size()); + } + if (validators.containsKey(Type.K2P)) { + totalFiles += keyToPathDfc.getAllCompletedFiles().size(); + globalTotalSize += keyToPathDfc.getAllCompletedFiles().stream() + .mapToLong(DataFileReader::getSize) + .sum(); + log.debug( + "K2P file count: {}", + keyToPathDfc.getAllCompletedFiles().size()); + } + + // Plan all file read tasks (calculate chunks for each file) + if (validators.containsKey(Type.P2KV)) { + fileReadTasks.addAll(planTasksFor(pathToKeyValueDfc, Type.P2KV, ioThreads, globalTotalSize)); + } + if (validators.containsKey(Type.P2H)) { + fileReadTasks.addAll(planTasksFor(pathToHashDfc, Type.P2H, ioThreads, globalTotalSize)); + } + if (validators.containsKey(Type.K2P)) { + fileReadTasks.addAll(planTasksFor(keyToPathDfc, Type.K2P, ioThreads, globalTotalSize)); + } + + log.debug("File count: {}", totalFiles); + log.debug("Total data size: {} MB", globalTotalSize * BYTES_TO_MEBIBYTES); + + // Sort tasks: largest chunks first (better thread utilization) + fileReadTasks.sort((a, b) -> Long.compare(b.endByte - b.startByte, a.endByte - a.startByte)); + + final int totalFileReadTasks = fileReadTasks.size(); + + log.debug("Total file read tasks: {}", totalFileReadTasks); + + final CountDownLatch readerLatch = new CountDownLatch(totalFileReadTasks); + final CountDownLatch processorsLatch = new CountDownLatch(processThreads); + + final DataStats dataStats = new DataStats(); + + // Start processor threads + for (int i = 0; i < processThreads; i++) { + processPool.submit(new ProcessorTask( + validators, validationListeners, dataQueue, vds, dataStats, processorsLatch)); + } + + // Submit all planned file read tasks to read file in chunks + for (final FileReadTask task : fileReadTasks) { + ioPool.submit(() -> { + try { + readFileChunk( + task.reader, + dataQueue, + task.type, + task.startByte, + task.endByte, + totalBoundarySearchMillis); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + e.printStackTrace(); // TODO: double check this exception + throw new RuntimeException("Reader interrupted", e); + } catch (Exception e) { + e.printStackTrace(); // TODO: double check this exception + throw new RuntimeException( + "Reader failed for chunk " + task.startByte + "-" + task.endByte, e); + } finally { + readerLatch.countDown(); + } + }); + } + + // Wait for all readers to finish + readerLatch.await(); + ioPool.shutdown(); + if (!ioPool.awaitTermination(1, TimeUnit.MINUTES)) { + throw new RuntimeException("IO pool did not terminate within timeout"); + } + + // Send one poison pill per processor + for (int i = 0; i < processThreads; i++) { + dataQueue.put(List.of(ItemData.poisonPill())); + } + + // Wait for processors to finish + processorsLatch.await(); + processPool.shutdown(); + if (!processPool.awaitTermination(1, TimeUnit.MINUTES)) { + throw new RuntimeException("Process pool did not terminate within timeout"); + } + + validators + .values() + .forEach(validatorSet -> validatorSet.forEach(validator -> { + try { + validator.validate(); + validationListeners.forEach( + listener -> listener.onValidationCompleted(validator.getTag())); + } catch (ValidationException e) { + log.error("Validation failed: {}", e.getMessage()); + } + })); + + if (validators.containsKey(Type.P2KV)) { + log.info( + "P2KV (Path -> Key/Value) Data Stats: \n {}", + dataStats.getP2kv().toStringContent()); + } + if (validators.containsKey(Type.P2H)) { + log.info( + "P2H (Path -> Hash) Data Stats: \n {}", + dataStats.getP2h().toStringContent()); + } + if (validators.containsKey(Type.K2P)) { + log.info( + "K2P (Key -> Path) Data Stats: \n {}", + dataStats.getK2p().toStringContent()); + } + + log.info(dataStats); + + // common validation for error reads + if (dataStats.hasErrorReads()) { + throw new RuntimeException("Error reads found. Full info: \n " + dataStats); + } + + log.debug("Total boundary search time: {} ms", totalBoundarySearchMillis.get()); + log.debug("Total processing time: {} ms", System.currentTimeMillis() - startTime); + } + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private Map> createAndInitValidators( + @NonNull final MerkleNodeState state, + @NonNull final String[] tags, + @NonNull final List validationListeners) { + final Set tagSet = Set.of(tags); + + final Map> validatorsMap = new HashMap<>(); + + // 1. Populate map with validators that match supplied tags + final Set hashRecordValidators = new CopyOnWriteArraySet<>(); + final Validator hashRecordValidator = new HashRecordIntegrityValidator(); + if (tagSet.contains(hashRecordValidator.getTag())) { + hashRecordValidators.add(hashRecordValidator); + } + if (!hashRecordValidators.isEmpty()) { + validatorsMap.put(Type.P2H, hashRecordValidators); + } + // hdhm + final Set hdhmBucketValidators = new CopyOnWriteArraySet<>(); + final Validator hdhmBucketValidator = new HdhmBucketIntegrityValidator(); + if (tagSet.contains(hdhmBucketValidator.getTag())) { + hdhmBucketValidators.add(hdhmBucketValidator); + } + if (!hdhmBucketValidators.isEmpty()) { + validatorsMap.put(Type.K2P, hdhmBucketValidators); + } + // leaf, etc. + final Set leafBytesValidators = new CopyOnWriteArraySet<>(); + final Validator leafBytesValidator = new LeafBytesIntegrityValidator(); + if (tagSet.contains(leafBytesValidator.getTag())) { + leafBytesValidators.add(leafBytesValidator); + } + final Validator accountValidator = new AccountAndSupplyValidator(); + if (tagSet.contains(accountValidator.getTag())) { + leafBytesValidators.add(accountValidator); + } + if (!leafBytesValidators.isEmpty()) { + validatorsMap.put(Type.P2KV, leafBytesValidators); + } + final Validator tokenRelationsValidator = new TokenRelationsIntegrityValidator(); + if (tagSet.contains(tokenRelationsValidator.getTag())) { + leafBytesValidators.add(tokenRelationsValidator); + } + if (!leafBytesValidators.isEmpty()) { + validatorsMap.put(Type.P2KV, leafBytesValidators); + } + final Validator entityIdCountValidator = new EntityIdCountValidator(); + if (tagSet.contains(entityIdCountValidator.getTag())) { + leafBytesValidators.add(entityIdCountValidator); + } + if (!leafBytesValidators.isEmpty()) { + validatorsMap.put(Type.P2KV, leafBytesValidators); + } + final Validator entityIdUniquenessValidator = new EntityIdUniquenessValidator(); + if (tagSet.contains(entityIdUniquenessValidator.getTag())) { + leafBytesValidators.add(entityIdUniquenessValidator); + } + if (!leafBytesValidators.isEmpty()) { + validatorsMap.put(Type.P2KV, leafBytesValidators); + } + + // 2. Initialize validators and remove if initialization fails + // Use an iterator on the map values to allow safe removal of empty sets + final java.util.Iterator> mapIterator = + validatorsMap.values().iterator(); + while (mapIterator.hasNext()) { + final Set validatorSet = mapIterator.next(); + final java.util.Iterator validatorIterator = validatorSet.iterator(); + + while (validatorIterator.hasNext()) { + final Validator validator = validatorIterator.next(); + validationListeners.forEach(listener -> listener.onValidationStarted(validator.getTag())); + try { + validator.initialize(state); + } catch (ValidationException e) { + validationListeners.forEach(listener -> listener.onValidationFailed(e)); + // 3. Remove validator entry if initialization failed + validatorIterator.remove(); + } + } + + // Clean up: remove the entry from the map if no validators remain for this type + if (validatorSet.isEmpty()) { + mapIterator.remove(); + } + } + + // 4. Return the fully initialized and cleaned map + return validatorsMap; + } + + // Helper: Plan tasks for one collection + private List planTasksFor( + @NonNull final DataFileCollection dfc, + @NonNull final ItemData.Type dataType, + final int ioThreads, + final long globalTotalSize) { + + final List tasks = new ArrayList<>(); + + final long collectionTotalSize = dfc.getAllCompletedFiles().stream() + .mapToLong(DataFileReader::getSize) + .sum(); + + for (final DataFileReader reader : dfc.getAllCompletedFiles()) { + final long fileSize = reader.getSize(); + if (fileSize == 0) { + continue; + } + + final int chunks = calculateOptimalChunks(reader, ioThreads, collectionTotalSize); + final long chunkSize = (fileSize + chunks - 1) / chunks; + + log.debug( + "File: {} size: {} MB, chunks: {} chunkSize: {} MB", + reader.getPath().getFileName(), + fileSize * BYTES_TO_MEBIBYTES, + chunks, + chunkSize * BYTES_TO_MEBIBYTES); + + // Create tasks for each chunk + for (int i = 0; i < chunks; i++) { + final long startByte = i * chunkSize; + final long endByte = Math.min(startByte + chunkSize, fileSize); + + if (startByte >= fileSize) { + continue; + } + + tasks.add(new FileReadTask(reader, dataType, startByte, endByte)); + } + } + + return tasks; + } + + private int calculateOptimalChunks( + @NonNull final DataFileReader reader, final int ioThreads, final long globalTotalDataSize) { + + final long fileSize = reader.getSize(); + + // literals here can be extracted to params + final long targetChunkSize = Math.max(globalTotalDataSize / (ioThreads * 2), 128 * MEBIBYTES_TO_BYTES); + + if (fileSize < targetChunkSize) { + return 1; + } + + return (int) Math.ceil((double) fileSize / targetChunkSize); + } + + private void readFileChunk( + @NonNull final DataFileReader reader, + @NonNull final BlockingQueue> dataQueue, + @NonNull final Type dataType, + final long startByte, + final long endByte, + @NonNull final AtomicLong totalBoundarySearchMillis) + throws IOException, InterruptedException { + + try (ChunkedFileIterator iterator = new ChunkedFileIterator( + reader.getPath(), reader.getMetadata(), dataType, startByte, endByte, totalBoundarySearchMillis)) { + + List batch = new ArrayList<>(batchSize); + while (iterator.next()) { + final BufferedData originalData = iterator.getDataItemData(); + final Bytes dataCopy = originalData.getBytes(0, originalData.remaining()); + + final ItemData itemData = new ItemData(dataType, dataCopy, iterator.getDataItemDataLocation()); + batch.add(itemData); + + if (batch.size() >= batchSize) { + dataQueue.put(batch); + batch = new ArrayList<>(batchSize); + } + } + + if (!batch.isEmpty()) { + dataQueue.put(batch); + } + } + } + + // Helper record to hold task information + private record FileReadTask(DataFileReader reader, ItemData.Type type, long startByte, long endByte) {} +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/DataStats.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/DataStats.java deleted file mode 100644 index c61e404994f4..000000000000 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/DataStats.java +++ /dev/null @@ -1,91 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -package com.hedera.statevalidation.poc; - -import java.util.concurrent.atomic.AtomicLong; - -public final class DataStats { - private final AtomicLong totalSpaceSize = new AtomicLong(); - private final AtomicLong totalItemCount = new AtomicLong(); - private final AtomicLong obsoleteSpaceSize = new AtomicLong(); - private final AtomicLong obsoleteItemCount = new AtomicLong(); - private final AtomicLong p2kvFailedToProcessCount = new AtomicLong(); - private final AtomicLong p2hFailedToProcessCount = new AtomicLong(); - private final AtomicLong k2pFailedToProcessCount = new AtomicLong(); - - public void addTotalSpaceBytes(long bytes) { - totalSpaceSize.addAndGet(bytes); - } - - public void incrementTotalItemCount() { - totalItemCount.incrementAndGet(); - } - - public void addObsoleteSpaceBytes(long bytes) { - obsoleteSpaceSize.addAndGet(bytes); - } - - public void incrementObsoleteItemCount() { - obsoleteItemCount.incrementAndGet(); - } - - public void incrementP2kvFailedToProcessCount() { - p2kvFailedToProcessCount.incrementAndGet(); - } - - public void incrementP2hFailedToProcessCount() { - p2hFailedToProcessCount.incrementAndGet(); - } - - public void incrementK2pFailedToProcessCount() { - k2pFailedToProcessCount.incrementAndGet(); - } - - public long getTotalSpaceSize() { - return totalSpaceSize.get(); - } - - public long getTotalItemCount() { - return totalItemCount.get(); - } - - public long getObsoleteSpaceSize() { - return obsoleteSpaceSize.get(); - } - - public long getObsoleteItemCount() { - return obsoleteItemCount.get(); - } - - public long getP2kvFailedToProcessCount() { - return p2kvFailedToProcessCount.get(); - } - - public long getP2hFailedToProcessCount() { - return p2hFailedToProcessCount.get(); - } - - public long getK2pFailedToProcessCount() { - return k2pFailedToProcessCount.get(); - } - - @Override - public String toString() { - return String.format( - """ - DataStats: - Total space: %,d bytes - Total items: %,d - Obsolete space: %,d bytes - Obsolete items: %,d - P2KV items failed to process: %,d - P2H items failed to process: %,d - K2P items failed to process: %,d""", - getTotalSpaceSize(), - getTotalItemCount(), - getObsoleteSpaceSize(), - getObsoleteItemCount(), - getP2kvFailedToProcessCount(), - getP2hFailedToProcessCount(), - getK2pFailedToProcessCount()); - } -} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ProcessorTask.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ProcessorTask.java deleted file mode 100644 index 95b3a5335335..000000000000 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ProcessorTask.java +++ /dev/null @@ -1,129 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -package com.hedera.statevalidation.poc; - -import com.swirlds.merkledb.MerkleDbDataSource; -import com.swirlds.merkledb.collections.LongList; -import com.swirlds.merkledb.files.hashmap.ParsedBucket; -import com.swirlds.virtualmap.datasource.VirtualHashRecord; -import com.swirlds.virtualmap.datasource.VirtualLeafBytes; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.CountDownLatch; - -public class ProcessorTask implements Runnable { - - private final BlockingQueue dataQueue; - - private final LongList pathToDiskLocationLeafNodes; - private final LongList pathToDiskLocationInternalNodes; - private final LongList bucketIndexToBucketLocation; - - private final DataStats dataStats; - - private final CountDownLatch processorsLatch; - - public ProcessorTask( - BlockingQueue dataQueue, - MerkleDbDataSource vds, - DataStats dataStats, - CountDownLatch processorsLatch) { - this.dataQueue = dataQueue; - - this.pathToDiskLocationLeafNodes = vds.getPathToDiskLocationLeafNodes(); - this.pathToDiskLocationInternalNodes = vds.getPathToDiskLocationInternalNodes(); - this.bucketIndexToBucketLocation = (LongList) vds.getKeyToPath().getBucketIndexToBucketLocation(); - - this.dataStats = dataStats; - - this.processorsLatch = processorsLatch; - } - - @Override - public void run() { - try { - while (true) { - ItemData chunk = dataQueue.take(); - - if (chunk.isPoisonPill()) { - break; - } - - processChunk(chunk); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } finally { - processorsLatch.countDown(); - } - } - - private void processChunk(ItemData data) { - switch (data.type()) { - case P2KV -> processVirtualLeafBytes(data); - case P2H -> processVirtualHashRecord(data); - case K2P -> processBucket(data); - } - } - - private void processVirtualLeafBytes(ItemData data) { - try { - dataStats.addTotalSpaceBytes(data.bytes().length()); - dataStats.incrementTotalItemCount(); - - VirtualLeafBytes virtualLeafBytes = - VirtualLeafBytes.parseFrom(data.bytes().toReadableSequentialData()); - long path = virtualLeafBytes.path(); - - if (data.location() == pathToDiskLocationLeafNodes.get(path)) { - // live object, do something... - } else { - // add to wasted items/space - dataStats.addObsoleteSpaceBytes(data.bytes().length()); - dataStats.incrementObsoleteItemCount(); - } - } catch (Exception e) { - dataStats.incrementP2kvFailedToProcessCount(); - } - } - - private void processVirtualHashRecord(ItemData data) { - try { - dataStats.addTotalSpaceBytes(data.bytes().length()); - dataStats.incrementTotalItemCount(); - - VirtualHashRecord virtualHashRecord = - VirtualHashRecord.parseFrom(data.bytes().toReadableSequentialData()); - final long path = virtualHashRecord.path(); - - if (data.location() == pathToDiskLocationInternalNodes.get(path)) { - // live object, do something... - - } else { - // add to wasted items/space - dataStats.addObsoleteSpaceBytes(data.bytes().length()); - dataStats.incrementObsoleteItemCount(); - } - } catch (Exception e) { - dataStats.incrementP2hFailedToProcessCount(); - } - } - - private void processBucket(ItemData data) { - try { - dataStats.addTotalSpaceBytes(data.bytes().length()); - dataStats.incrementTotalItemCount(); - - final ParsedBucket bucket = new ParsedBucket(); - bucket.readFrom(data.bytes().toReadableSequentialData()); - - if (data.location() == bucketIndexToBucketLocation.get(bucket.getBucketIndex())) { - // live object, do something... - } else { - // add to wasted items/space - dataStats.addObsoleteSpaceBytes(data.bytes().length()); - dataStats.incrementObsoleteItemCount(); - } - } catch (Exception e) { - dataStats.incrementK2pFailedToProcessCount(); - } - } -} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/LoggingValidationListener.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/LoggingValidationListener.java new file mode 100644 index 000000000000..94b908257461 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/LoggingValidationListener.java @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.listener; + +import com.hedera.statevalidation.poc.util.ValidationException; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +// update logging format +public class LoggingValidationListener implements ValidationListener { + + private static final Logger log = LogManager.getLogger(LoggingValidationListener.class); + + @Override + public void onValidationStarted(String tag) { + log.debug(framedString(tag + " started")); + } + + @Override + public void onValidationCompleted(String tag) { + log.debug(framedString(tag + " finished")); + } + + @Override + public void onValidationFailed(ValidationException error) { + log.debug(framedString(error.getValidatorTag() + " failed")); + } + + private String framedString(String stringToFrame) { + String frame = " ".repeat(stringToFrame.length() + 6); + return String.format("\n%s\n %s \n%s", frame, stringToFrame, frame); + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationListener.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationListener.java new file mode 100644 index 000000000000..63da2c0aa6ce --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationListener.java @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.listener; + +import com.hedera.statevalidation.poc.util.ValidationException; + +public interface ValidationListener { + + default void onValidationStarted(String tag) {} + + default void onValidationCompleted(String tag) {} + + default void onValidationFailed(ValidationException error) {} +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/model/DataStats.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/model/DataStats.java new file mode 100644 index 000000000000..07a092ce370f --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/model/DataStats.java @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.model; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Thread-safe container for collecting validation statistics across different data types. + */ +public final class DataStats { + + private final StatGroup p2kv = new StatGroup(); + private final StatGroup p2h = new StatGroup(); + private final StatGroup k2p = new StatGroup(); + + public StatGroup getP2kv() { + return p2kv; + } + + public StatGroup getP2h() { + return p2h; + } + + public StatGroup getK2p() { + return k2p; + } + + // --- Aggregations --- + + public long getTotalSpaceSize() { + return p2h.getSpaceSize() + p2kv.getSpaceSize() + k2p.getSpaceSize(); + } + + public long getTotalItemCount() { + return p2h.getItemCount() + p2kv.getItemCount() + k2p.getItemCount(); + } + + public long getObsoleteSpaceSize() { + return p2h.getObsoleteSpaceSize() + p2kv.getObsoleteSpaceSize() + k2p.getObsoleteSpaceSize(); + } + + public long getObsoleteItemCount() { + return p2h.getObsoleteItemCount() + p2kv.getObsoleteItemCount() + k2p.getObsoleteItemCount(); + } + + public boolean hasErrorReads() { + return p2kv.hasErrors() || p2h.hasErrors() || k2p.hasErrors(); + } + + @Override + public String toString() { + return String.format( + """ + Total Data Stats: + Total items: %,d + Total space: %,d bytes + Obsolete items: %,d + Obsolete space: %,d bytes""", + getTotalItemCount(), getTotalSpaceSize(), getObsoleteItemCount(), getObsoleteSpaceSize()); + } + + /** + * Grouping of statistics for a single data type. + */ + public static final class StatGroup { + private final AtomicLong spaceSize = new AtomicLong(); + private final AtomicLong itemCount = new AtomicLong(); + private final AtomicLong obsoleteSpaceSize = new AtomicLong(); + private final AtomicLong obsoleteItemCount = new AtomicLong(); + private final AtomicLong parseErrorCount = new AtomicLong(); + private final AtomicLong invalidLocationCount = new AtomicLong(); + + public void addSpaceSize(long bytes) { + spaceSize.addAndGet(bytes); + } + + public void incrementItemCount() { + itemCount.incrementAndGet(); + } + + public void addObsoleteSpaceSize(long bytes) { + obsoleteSpaceSize.addAndGet(bytes); + } + + public void incrementObsoleteItemCount() { + obsoleteItemCount.incrementAndGet(); + } + + public void incrementParseErrorCount() { + parseErrorCount.incrementAndGet(); + } + + public void incrementInvalidLocationCount() { + invalidLocationCount.incrementAndGet(); + } + + public long getSpaceSize() { + return spaceSize.get(); + } + + public long getItemCount() { + return itemCount.get(); + } + + public long getObsoleteSpaceSize() { + return obsoleteSpaceSize.get(); + } + + public long getObsoleteItemCount() { + return obsoleteItemCount.get(); + } + + public long getParseErrorCount() { + return parseErrorCount.get(); + } + + public long getInvalidLocationCount() { + return invalidLocationCount.get(); + } + + public boolean hasErrors() { + return parseErrorCount.get() > 0 || invalidLocationCount.get() > 0; + } + + // Helper for the parent toString + public String toStringContent() { + return String.format( + """ + Total items: %,d + Total space: %,d bytes + Obsolete items: %,d + Obsolete space: %,d bytes + Parse errors: %,d + Invalid locations: %,d""", + getItemCount(), + getSpaceSize(), + getObsoleteItemCount(), + getObsoleteSpaceSize(), + getParseErrorCount(), + getInvalidLocationCount()); + } + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ItemData.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/model/ItemData.java similarity index 90% rename from hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ItemData.java rename to hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/model/ItemData.java index 8f088556a71f..2425caf30210 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ItemData.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/model/ItemData.java @@ -1,5 +1,5 @@ // SPDX-License-Identifier: Apache-2.0 -package com.hedera.statevalidation.poc; +package com.hedera.statevalidation.poc.model; import com.hedera.pbj.runtime.io.buffer.Bytes; diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ChunkedFileIterator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ChunkedFileIterator.java similarity index 81% rename from hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ChunkedFileIterator.java rename to hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ChunkedFileIterator.java index 7b9e926b91aa..0805d70ab061 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/ChunkedFileIterator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ChunkedFileIterator.java @@ -1,5 +1,5 @@ // SPDX-License-Identifier: Apache-2.0 -package com.hedera.statevalidation.poc; +package com.hedera.statevalidation.poc.pipeline; import static com.hedera.pbj.runtime.ProtoParserTools.TAG_FIELD_OFFSET; import static com.swirlds.merkledb.files.DataFileCommon.FIELD_DATAFILE_ITEMS; @@ -9,13 +9,15 @@ import com.hedera.pbj.runtime.io.ReadableSequentialData; import com.hedera.pbj.runtime.io.buffer.BufferedData; import com.hedera.pbj.runtime.io.stream.ReadableStreamingData; -import com.hedera.statevalidation.poc.ItemData.Type; +import com.hedera.statevalidation.poc.model.ItemData; +import com.hedera.statevalidation.poc.model.ItemData.Type; import com.swirlds.merkledb.files.DataFileCommon; import com.swirlds.merkledb.files.DataFileMetadata; import com.swirlds.merkledb.files.hashmap.Bucket; import com.swirlds.merkledb.files.hashmap.ParsedBucket; import com.swirlds.virtualmap.datasource.VirtualHashRecord; import com.swirlds.virtualmap.datasource.VirtualLeafBytes; +import edu.umd.cs.findbugs.annotations.NonNull; import java.io.BufferedInputStream; import java.io.IOException; import java.nio.ByteBuffer; @@ -26,7 +28,7 @@ import java.util.concurrent.atomic.AtomicLong; public class ChunkedFileIterator implements AutoCloseable { - private final static int BUFFER_SIZE = 128 * 1024; + private static final int BUFFER_SIZE = 128 * 1024; private final FileChannel channel; private final DataFileMetadata metadata; @@ -43,8 +45,12 @@ public class ChunkedFileIterator implements AutoCloseable { private boolean closed = false; public ChunkedFileIterator( - Path path, DataFileMetadata metadata, Type dataType, long startByte, long endByte, - AtomicLong totalBoundarySearchMillis) + @NonNull final Path path, + @NonNull final DataFileMetadata metadata, + @NonNull final Type dataType, + long startByte, + long endByte, + @NonNull final AtomicLong totalBoundarySearchMillis) throws IOException { this.channel = FileChannel.open(path, StandardOpenOption.READ); this.metadata = metadata; @@ -56,10 +62,10 @@ public ChunkedFileIterator( if (startByte > 0) { // Find boundary, then position channel and open streams - long startTime = System.currentTimeMillis(); + final long startTime = System.currentTimeMillis(); this.startByte += findBoundaryOffset(); long boundaryOffsetSearchTime = System.currentTimeMillis() - startTime; -// System.out.println("Found boundary offset in:" + boundaryOffsetSearchTime + " ms"); + // System.out.println("Found boundary offset in:" + boundaryOffsetSearchTime + " ms"); totalBoundarySearchMillis.addAndGet(boundaryOffsetSearchTime); channel.position(this.startByte); openStreams(); @@ -71,7 +77,7 @@ public ChunkedFileIterator( } private void openStreams() { - var channelStream = Channels.newInputStream(channel); + final var channelStream = Channels.newInputStream(channel); this.bufferedInputStream = new BufferedInputStream(channelStream, BUFFER_SIZE); this.in = new ReadableStreamingData(bufferedInputStream); } @@ -79,7 +85,7 @@ private void openStreams() { private long findBoundaryOffset() throws IOException { // Use buffer to minimize disk I/O and channel repositioning // It should account for boundary + full data item to validate its proto schema - ByteBuffer scanBuffer = ByteBuffer.allocate(BUFFER_SIZE); + final ByteBuffer scanBuffer = ByteBuffer.allocate(BUFFER_SIZE); // Read large chunk at current position scanBuffer.clear(); @@ -90,28 +96,27 @@ private long findBoundaryOffset() throws IOException { } scanBuffer.flip(); - BufferedData bufferData = BufferedData.wrap(scanBuffer); + final BufferedData bufferData = BufferedData.wrap(scanBuffer); // Scan through buffer looking for valid boundary while (bufferData.hasRemaining()) { - long positionInBuffer = bufferData.position(); + final long positionInBuffer = bufferData.position(); try { - int tag = bufferData.readVarInt(false); - int fieldNum = tag >> TAG_FIELD_OFFSET; + final int tag = bufferData.readVarInt(false); + final int fieldNum = tag >> TAG_FIELD_OFFSET; if ((fieldNum == FIELD_DATAFILE_ITEMS.number()) - && ((tag & ProtoConstants.TAG_WIRE_TYPE_MASK) == ProtoConstants.WIRE_TYPE_DELIMITED.ordinal())) { - int dataItemSize = bufferData.readVarInt(false); - long dataStartPosition = bufferData.position(); + && ((tag & ProtoConstants.TAG_WIRE_TYPE_MASK) + == ProtoConstants.WIRE_TYPE_DELIMITED.ordinal())) { + final int dataItemSize = bufferData.readVarInt(false); + final long dataStartPosition = bufferData.position(); if (dataItemSize > 0 && (dataStartPosition + dataItemSize <= bufferData.limit())) { bufferData.limit(dataStartPosition + dataItemSize); - long savedPos = bufferData.position(); + final long savedPos = bufferData.position(); if (isValidDataItem(bufferData)) { -// System.out.println( -// "Found valid item at " + positionInBuffer + " data size: " + dataItemSize); return positionInBuffer; } @@ -122,7 +127,6 @@ private long findBoundaryOffset() throws IOException { // Not found, advance by 1 byte bufferData.position(positionInBuffer + 1); - } catch (Exception e) { // Parsing failed, advance by 1 byte bufferData.position(positionInBuffer + 1); @@ -132,7 +136,7 @@ private long findBoundaryOffset() throws IOException { throw new IOException("No valid data item boundary found in chunk"); } - private boolean isValidDataItem(BufferedData buffer) { + private boolean isValidDataItem(@NonNull final BufferedData buffer) { try { if (!buffer.hasRemaining()) { return false; @@ -151,7 +155,7 @@ private boolean isValidDataItem(BufferedData buffer) { } } - private boolean validateVirtualHashRecord(BufferedData buffer) { + private boolean validateVirtualHashRecord(@NonNull final BufferedData buffer) { try { VirtualHashRecord.parseFrom(buffer); return true; @@ -160,7 +164,7 @@ private boolean validateVirtualHashRecord(BufferedData buffer) { } } - private boolean validateVirtualLeafBytes(BufferedData buffer) { + private boolean validateVirtualLeafBytes(@NonNull final BufferedData buffer) { try { VirtualLeafBytes.parseFrom(buffer); return true; @@ -169,7 +173,7 @@ private boolean validateVirtualLeafBytes(BufferedData buffer) { } } - private boolean validateBucket(BufferedData buffer) { + private boolean validateBucket(@NonNull final BufferedData buffer) { try { final Bucket bucket = new ParsedBucket(); bucket.readFrom(buffer); diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ProcessorTask.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ProcessorTask.java new file mode 100644 index 000000000000..4aeae14eaa42 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ProcessorTask.java @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.pipeline; + +import com.hedera.statevalidation.poc.listener.ValidationListener; +import com.hedera.statevalidation.poc.model.DataStats; +import com.hedera.statevalidation.poc.model.ItemData; +import com.hedera.statevalidation.poc.model.ItemData.Type; +import com.hedera.statevalidation.poc.util.ValidationException; +import com.hedera.statevalidation.poc.validator.api.HashRecordValidator; +import com.hedera.statevalidation.poc.validator.api.HdhmBucketValidator; +import com.hedera.statevalidation.poc.validator.api.LeafBytesValidator; +import com.hedera.statevalidation.poc.validator.api.Validator; +import com.hedera.statevalidation.util.LogUtils; +import com.swirlds.merkledb.MerkleDbDataSource; +import com.swirlds.merkledb.collections.LongList; +import com.swirlds.merkledb.files.hashmap.ParsedBucket; +import com.swirlds.virtualmap.datasource.VirtualHashRecord; +import com.swirlds.virtualmap.datasource.VirtualLeafBytes; +import edu.umd.cs.findbugs.annotations.NonNull; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CountDownLatch; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +public class ProcessorTask implements Runnable { + + private static final Logger log = LogManager.getLogger(ProcessorTask.class); + + private final List validationListeners; + + private final Set p2kvValidators; + private final Set p2hValidators; + private final Set k2pValidators; + + private final MerkleDbDataSource vds; + + private final BlockingQueue> dataQueue; + + private final LongList pathToDiskLocationLeafNodes; + private final LongList pathToDiskLocationInternalNodes; + private final LongList bucketIndexToBucketLocation; + + private final DataStats dataStats; + + private final CountDownLatch processorsLatch; + + public ProcessorTask( + @NonNull final Map> validators, + @NonNull final List validationListeners, + @NonNull final BlockingQueue> dataQueue, + @NonNull final MerkleDbDataSource vds, + @NonNull final DataStats dataStats, + @NonNull final CountDownLatch processorsLatch) { + this.validationListeners = validationListeners; + + this.p2kvValidators = validators.get(Type.P2KV); + this.p2hValidators = validators.get(Type.P2H); + this.k2pValidators = validators.get(Type.K2P); + + this.dataQueue = dataQueue; + + this.vds = vds; + + this.pathToDiskLocationLeafNodes = vds.getPathToDiskLocationLeafNodes(); + this.pathToDiskLocationInternalNodes = vds.getPathToDiskLocationInternalNodes(); + this.bucketIndexToBucketLocation = (LongList) vds.getKeyToPath().getBucketIndexToBucketLocation(); + + this.dataStats = dataStats; + + this.processorsLatch = processorsLatch; + } + + @Override + public void run() { + try { + while (true) { + final List batch = dataQueue.take(); + boolean stop = false; + + for (final ItemData chunk : batch) { + if (chunk.isPoisonPill()) { + stop = true; + break; + } + + processChunk(chunk); + } + + if (stop) { + break; + } + } + } catch (InterruptedException e) { + e.printStackTrace(); + Thread.currentThread().interrupt(); + } finally { + processorsLatch.countDown(); + } + } + + private void processChunk(@NonNull final ItemData data) { + switch (data.type()) { + case P2KV -> processVirtualLeafBytes(data); + case P2H -> processVirtualHashRecord(data); + case K2P -> processBucket(data); + } + } + + private void processVirtualLeafBytes(@NonNull final ItemData data) { + try { + dataStats.getP2kv().addSpaceSize(data.bytes().length()); + dataStats.getP2kv().incrementItemCount(); + + final VirtualLeafBytes virtualLeafBytes = + VirtualLeafBytes.parseFrom(data.bytes().toReadableSequentialData()); + long path = virtualLeafBytes.path(); + + if (data.location() == pathToDiskLocationLeafNodes.get(path)) { + // live object, perform ops on it... + try { + // Explicitly cast here. This is safe, explicit, and has negligible performance cost. + p2kvValidators.forEach(validator -> + ((LeafBytesValidator) validator).processLeafBytes(data.location(), virtualLeafBytes)); + } catch (ValidationException e) { + // remove validator from the set, so it won't be used again + p2kvValidators.removeIf(validator -> validator.getTag().equals(e.getValidatorTag())); + // notify listeners about the error, so they can log, etc. + validationListeners.forEach(listener -> listener.onValidationFailed(e)); + } + } else if (data.location() == -1) { + dataStats.getP2kv().incrementInvalidLocationCount(); + LogUtils.printFileDataLocationErrorPoc( + log, + "data.location() was -1 for P2KV entry", + vds.getPathToKeyValue().getFileCollection(), + data); + } else { + // add to wasted items/space + dataStats.getP2kv().addObsoleteSpaceSize(data.bytes().length()); + dataStats.getP2kv().incrementObsoleteItemCount(); + } + } catch (Exception e) { + dataStats.getP2kv().incrementParseErrorCount(); + LogUtils.printFileDataLocationErrorPoc( + log, e.getMessage(), vds.getPathToKeyValue().getFileCollection(), data); + } + } + + private void processVirtualHashRecord(@NonNull final ItemData data) { + try { + dataStats.getP2h().addSpaceSize(data.bytes().length()); + dataStats.getP2h().incrementItemCount(); + + final VirtualHashRecord virtualHashRecord = + VirtualHashRecord.parseFrom(data.bytes().toReadableSequentialData()); + final long path = virtualHashRecord.path(); + + if (data.location() == pathToDiskLocationInternalNodes.get(path)) { + // live object, perform ops on it... + try { + // Explicitly cast here. This is safe, explicit, and has negligible performance cost. + p2hValidators.forEach( + validator -> ((HashRecordValidator) validator).processHashRecord(virtualHashRecord)); + } catch (ValidationException e) { + // remove validator from the set, so it won't be used again + p2hValidators.removeIf(validator -> validator.getTag().equals(e.getValidatorTag())); + // notify listeners about the error, so they can log, etc. + validationListeners.forEach(listener -> listener.onValidationFailed(e)); + } + } else if (data.location() == -1) { + dataStats.getP2h().incrementInvalidLocationCount(); + LogUtils.printFileDataLocationErrorPoc( + log, + "data.location() was -1 for P2H entry", + vds.getHashStoreDisk().getFileCollection(), + data); + } else { + // add to wasted items/space + dataStats.getP2h().addObsoleteSpaceSize(data.bytes().length()); + dataStats.getP2h().incrementObsoleteItemCount(); + } + } catch (Exception e) { + dataStats.getP2h().incrementParseErrorCount(); + LogUtils.printFileDataLocationErrorPoc( + log, e.getMessage(), vds.getHashStoreDisk().getFileCollection(), data); + } + } + + private void processBucket(@NonNull final ItemData data) { + try { + dataStats.getK2p().addSpaceSize(data.bytes().length()); + dataStats.getK2p().incrementItemCount(); + + final ParsedBucket bucket = new ParsedBucket(); + bucket.readFrom(data.bytes().toReadableSequentialData()); + + if (data.location() == bucketIndexToBucketLocation.get(bucket.getBucketIndex())) { + // live object, perform ops on it... + try { + // Explicitly cast here. This is safe, explicit, and has negligible performance cost. + k2pValidators.forEach( + validator -> ((HdhmBucketValidator) validator).processBucket(data.location(), bucket)); + } catch (ValidationException e) { + // remove validator from the set, so it won't be used again + k2pValidators.removeIf(validator -> validator.getTag().equals(e.getValidatorTag())); + // notify listeners about the error, so they can log, etc. + validationListeners.forEach(listener -> listener.onValidationFailed(e)); + } + } else if (data.location() == -1) { + dataStats.getK2p().incrementInvalidLocationCount(); + LogUtils.printFileDataLocationErrorPoc( + log, + "data.location() was -1 for K2P entry", + vds.getKeyToPath().getFileCollection(), + data); + } else { + // add to wasted items/space + dataStats.getK2p().addObsoleteSpaceSize(data.bytes().length()); + dataStats.getK2p().incrementObsoleteItemCount(); + } + } catch (Exception e) { + dataStats.getK2p().incrementParseErrorCount(); + LogUtils.printFileDataLocationErrorPoc( + log, e.getMessage(), vds.getKeyToPath().getFileCollection(), data); + } + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationAssertions.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationAssertions.java new file mode 100644 index 000000000000..5659bf1def7c --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationAssertions.java @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.util; + +import edu.umd.cs.findbugs.annotations.NonNull; +import edu.umd.cs.findbugs.annotations.Nullable; + +/** + * Utility class providing assertion-like methods for state validation. + * Unlike JUnit assertions, these are designed for production validation scenarios + * and provide detailed error context suitable for operational debugging. + */ +public final class ValidationAssertions { + + private ValidationAssertions() { + // Utility class - no instantiation + } + + /** + * Validates that an object is not null. + * + * @param obj the object to check + * @param validatorTag the tag of the validator performing the check + * @param the type of the object + * @return the non-null object + * @throws ValidationException if the object is null + */ + public static T requireNonNull(@Nullable T obj, @NonNull String validatorTag) { + if (obj == null) { + throw new ValidationException(validatorTag, "Expected non-null value but was null"); + } + return obj; + } + + /** + * Validates that a condition is true. + * + * @param condition the condition to check + * @param validatorTag the tag of the validator performing the check + * @throws ValidationException if the condition is false + */ + public static void requireTrue(boolean condition, @NonNull String validatorTag) { + if (!condition) { + throw new ValidationException(validatorTag, "Expected condition to be true but was false"); + } + } + + /** + * Validates that a condition is true with a custom message. + * + * @param condition the condition to check + * @param validatorTag the tag of the validator performing the check + * @param message custom error message + * @throws ValidationException if the condition is false + */ + public static void requireTrue(boolean condition, @NonNull String validatorTag, @NonNull String message) { + if (!condition) { + throw new ValidationException(validatorTag, message); + } + } + + /** + * Validates that two values are equal. + * + * @param expected the expected value + * @param actual the actual value + * @param validatorTag the tag of the validator performing the check + * @throws ValidationException if the values are not equal + */ + public static void requireEqual(@Nullable Object expected, @Nullable Object actual, @NonNull String validatorTag) { + if (!java.util.Objects.equals(expected, actual)) { + throw new ValidationException(validatorTag, String.format("Expected <%s> but was <%s>", expected, actual)); + } + } + + /** + * Validates that two long values are equal. + * Specialized version for better performance and error messages with numeric values. + * + * @param expected the expected value + * @param actual the actual value + * @param validatorTag the tag of the validator performing the check + * @throws ValidationException if the values are not equal + */ + public static void requireEqual(long expected, long actual, @NonNull String validatorTag) { + if (expected != actual) { + throw new ValidationException(validatorTag, String.format("Expected <%d> but was <%d>", expected, actual)); + } + } + + /** + * Validates that two long values are equal. + * Specialized version for better performance and error messages with numeric values. + * + * @param expected the expected value + * @param actual the actual value + * @param validatorTag the tag of the validator performing the check + * @throws ValidationException if the values are not equal + */ + public static void requireEqual(long expected, long actual, @NonNull String message, @NonNull String validatorTag) { + if (expected != actual) { + throw new ValidationException( + validatorTag, String.format("Expected <%d> but was <%d>. %s", expected, actual, message)); + } + } + + /** + * Validates that two values are not equal. + * + * @param expected the expected value + * @param actual the actual value + * @param validatorTag the tag of the validator performing the check + * @throws ValidationException if the values are equal + */ + public static void requireNotEqual( + @Nullable Object expected, @Nullable Object actual, @NonNull String validatorTag) { + if (java.util.Objects.equals(expected, actual)) { + throw new ValidationException( + validatorTag, String.format("Expected not equal <%s> but was <%s>", expected, actual)); + } + } + + /** + * Validates that two long values are not equal. + * Specialized version for better performance and error messages with numeric values. + * + * @param expected the expected value + * @param actual the actual value + * @param validatorTag the tag of the validator performing the check + * @throws ValidationException if the values are equal + */ + public static void requireNotEqual(long expected, long actual, @NonNull String validatorTag) { + if (expected == actual) { + throw new ValidationException( + validatorTag, String.format("Expected not equal <%d> but was <%d>", expected, actual)); + } + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationException.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationException.java new file mode 100644 index 000000000000..b0de881e8526 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationException.java @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.util; + +public class ValidationException extends RuntimeException { + + private final String validatorTag; + + public ValidationException(String validatorTag, String message) { + super(String.format("[%s] Validation failed: %s", validatorTag, message)); + this.validatorTag = validatorTag; + } + + public ValidationException(String validatorTag, String message, Throwable cause) { + super(String.format("[%s] Validation failed at: %s", validatorTag, message), cause); + this.validatorTag = validatorTag; + } + + public String getValidatorTag() { + return validatorTag; + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/AccountAndSupplyValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/AccountAndSupplyValidator.java new file mode 100644 index 000000000000..1919e4345c87 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/AccountAndSupplyValidator.java @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.validator; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.hedera.hapi.node.base.AccountID; +import com.hedera.hapi.node.state.token.Account; +import com.hedera.node.app.service.entityid.EntityIdService; +import com.hedera.node.app.service.entityid.ReadableEntityIdStore; +import com.hedera.node.app.service.entityid.impl.ReadableEntityIdStoreImpl; +import com.hedera.node.app.service.token.impl.TokenServiceImpl; +import com.hedera.node.app.service.token.impl.schemas.V0490TokenSchema; +import com.hedera.pbj.runtime.ParseException; +import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.statevalidation.poc.util.ValidationAssertions; +import com.hedera.statevalidation.poc.validator.api.LeafBytesValidator; +import com.swirlds.state.MerkleNodeState; +import com.swirlds.state.merkle.StateKeyUtils; +import com.swirlds.state.merkle.StateValue; +import com.swirlds.state.spi.ReadableKVState; +import com.swirlds.virtualmap.VirtualMap; +import com.swirlds.virtualmap.datasource.VirtualLeafBytes; +import edu.umd.cs.findbugs.annotations.NonNull; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +public class AccountAndSupplyValidator implements LeafBytesValidator { + + private static final Logger log = LogManager.getLogger(AccountAndSupplyValidator.class); + + public static final String ACCOUNT_TAG = "account"; + + // 1_000_000_000 tiny bar = 1 h + // https://help.hedera.com/hc/en-us/articles/360000674317-What-are-the-official-HBAR-cryptocurrency-denominations- + // https://help.hedera.com/hc/en-us/articles/360000665518-What-is-the-total-supply-of-HBAR- + private final long TOTAL_tHBAR_SUPPLY = 5_000_000_000_000_000_000L; + + private final AtomicLong accountsCreated = new AtomicLong(0L); + private final AtomicLong totalBalance = new AtomicLong(0L); + + private long numAccounts; + + @Override + public String getTag() { + return ACCOUNT_TAG; + } + + @Override + public void initialize(@NonNull MerkleNodeState state) { + final VirtualMap virtualMap = (VirtualMap) state.getRoot(); + assertNotNull(virtualMap); + + final ReadableEntityIdStore entityCounters = + new ReadableEntityIdStoreImpl(state.getReadableStates(EntityIdService.NAME)); + final ReadableKVState accounts = + state.getReadableStates(TokenServiceImpl.NAME).get(V0490TokenSchema.ACCOUNTS_STATE_ID); + + assertNotNull(accounts); + assertNotNull(entityCounters); + + this.numAccounts = entityCounters.numAccounts(); + log.debug("Number of accounts: {}", numAccounts); + } + + @Override + public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { + final Bytes keyBytes = leafBytes.keyBytes(); + final Bytes valueBytes = leafBytes.valueBytes(); + final int readKeyStateId = StateKeyUtils.extractStateIdFromStateKeyOneOf(keyBytes); + final int readValueStateId = StateValue.extractStateIdFromStateValueOneOf(valueBytes); + if ((readKeyStateId == V0490TokenSchema.ACCOUNTS_STATE_ID) + && (readValueStateId == V0490TokenSchema.ACCOUNTS_STATE_ID)) { + try { + final com.hedera.hapi.platform.state.StateValue stateValue = + com.hedera.hapi.platform.state.StateValue.PROTOBUF.parse(valueBytes); + final Account account = stateValue.value().as(); + final long tinybarBalance = account.tinybarBalance(); + assertTrue(tinybarBalance >= 0); + totalBalance.addAndGet(tinybarBalance); + accountsCreated.incrementAndGet(); + } catch (final ParseException e) { + throw new RuntimeException("Failed to parse a key", e); + } + } + } + + @Override + public void validate() { + ValidationAssertions.requireEqual(TOTAL_tHBAR_SUPPLY, totalBalance.get(), ACCOUNT_TAG); + ValidationAssertions.requireEqual(accountsCreated.get(), numAccounts, ACCOUNT_TAG); + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdCountValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdCountValidator.java new file mode 100644 index 000000000000..220a358de2bd --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdCountValidator.java @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.validator; + +import static com.hedera.node.app.service.entityid.impl.schemas.V0590EntityIdSchema.ENTITY_COUNTS_STATE_ID; + +import com.hedera.hapi.node.state.entity.EntityCounts; +import com.hedera.hapi.platform.state.StateKey; +import com.hedera.node.app.service.entityid.EntityIdService; +import com.hedera.pbj.runtime.ParseException; +import com.hedera.statevalidation.poc.util.ValidationAssertions; +import com.hedera.statevalidation.poc.validator.api.LeafBytesValidator; +import com.swirlds.state.MerkleNodeState; +import com.swirlds.state.spi.ReadableSingletonState; +import com.swirlds.virtualmap.datasource.VirtualLeafBytes; +import edu.umd.cs.findbugs.annotations.NonNull; +import java.util.concurrent.atomic.AtomicLong; + +public class EntityIdCountValidator implements LeafBytesValidator { + + public static final String ENTITY_ID_COUNT_TAG = "entityIdCount"; + + private EntityCounts entityCounts; + + private final AtomicLong accountCount = new AtomicLong(0); + private final AtomicLong aliasesCount = new AtomicLong(0); + private final AtomicLong tokenCount = new AtomicLong(0); + private final AtomicLong tokenRelCount = new AtomicLong(0); + private final AtomicLong nftsCount = new AtomicLong(0); + private final AtomicLong airdropsCount = new AtomicLong(0); + private final AtomicLong stakingInfoCount = new AtomicLong(0); + private final AtomicLong topicCount = new AtomicLong(0); + private final AtomicLong fileCount = new AtomicLong(0); + private final AtomicLong nodesCount = new AtomicLong(0); + private final AtomicLong scheduleCount = new AtomicLong(0); + private final AtomicLong contractStorageCount = new AtomicLong(0); + private final AtomicLong contractBytecodeCount = new AtomicLong(0); + private final AtomicLong hookCount = new AtomicLong(0); + private final AtomicLong labmbdaStorageCount = new AtomicLong(0); + + @Override + public String getTag() { + return ENTITY_ID_COUNT_TAG; + } + + @Override + public void initialize(@NonNull MerkleNodeState state) { + final ReadableSingletonState entityIdSingleton = + state.getReadableStates(EntityIdService.NAME).getSingleton(ENTITY_COUNTS_STATE_ID); + this.entityCounts = entityIdSingleton.get(); + } + + @Override + public void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBytes) { + try { + StateKey key = StateKey.PROTOBUF.parse(leafBytes.keyBytes()); + switch (key.key().kind()) { + case TOKENSERVICE_I_ACCOUNTS -> accountCount.incrementAndGet(); + case TOKENSERVICE_I_ALIASES -> aliasesCount.incrementAndGet(); + case TOKENSERVICE_I_TOKENS -> tokenCount.incrementAndGet(); + case TOKENSERVICE_I_TOKEN_RELS -> tokenRelCount.incrementAndGet(); + case TOKENSERVICE_I_NFTS -> nftsCount.incrementAndGet(); + case TOKENSERVICE_I_PENDING_AIRDROPS -> airdropsCount.incrementAndGet(); + case TOKENSERVICE_I_STAKING_INFOS -> stakingInfoCount.incrementAndGet(); + case CONSENSUSSERVICE_I_TOPICS -> topicCount.incrementAndGet(); + case FILESERVICE_I_FILES -> fileCount.incrementAndGet(); + case ADDRESSBOOKSERVICE_I_NODES -> nodesCount.incrementAndGet(); + case SCHEDULESERVICE_I_SCHEDULES_BY_ID -> scheduleCount.incrementAndGet(); + case CONTRACTSERVICE_I_STORAGE -> contractStorageCount.incrementAndGet(); + case CONTRACTSERVICE_I_BYTECODE -> contractBytecodeCount.incrementAndGet(); + case CONTRACTSERVICE_I_EVM_HOOK_STATES -> hookCount.incrementAndGet(); + case CONTRACTSERVICE_I_LAMBDA_STORAGE -> labmbdaStorageCount.incrementAndGet(); + } + } catch (ParseException e) { + throw new RuntimeException(e); + } + } + + @Override + public void validate() { + ValidationAssertions.requireNonNull(entityCounts, ENTITY_ID_COUNT_TAG); + ValidationAssertions.requireEqual( + entityCounts.numAccounts(), accountCount.get(), ENTITY_ID_COUNT_TAG, "Account count is unexpected"); + ValidationAssertions.requireEqual( + entityCounts.numAliases(), aliasesCount.get(), ENTITY_ID_COUNT_TAG, "Alias count is unexpected"); + ValidationAssertions.requireEqual( + entityCounts.numTokens(), tokenCount.get(), ENTITY_ID_COUNT_TAG, "Token count is unexpected"); + ValidationAssertions.requireEqual( + entityCounts.numTokenRelations(), + tokenRelCount.get(), + ENTITY_ID_COUNT_TAG, + "Token relations count is unexpected"); + ValidationAssertions.requireEqual( + entityCounts.numNfts(), nftsCount.get(), ENTITY_ID_COUNT_TAG, "NFTs count is unexpected"); + ValidationAssertions.requireEqual( + entityCounts.numAirdrops(), airdropsCount.get(), ENTITY_ID_COUNT_TAG, "Airdrops count is unexpected"); + ValidationAssertions.requireEqual( + entityCounts.numStakingInfos(), + stakingInfoCount.get(), + ENTITY_ID_COUNT_TAG, + "Staking infos count is unexpected"); + ValidationAssertions.requireEqual( + entityCounts.numTopics(), topicCount.get(), ENTITY_ID_COUNT_TAG, "Topic count is unexpected"); + ValidationAssertions.requireEqual( + entityCounts.numFiles(), fileCount.get(), ENTITY_ID_COUNT_TAG, "File count is unexpected"); + ValidationAssertions.requireEqual( + entityCounts.numNodes(), nodesCount.get(), ENTITY_ID_COUNT_TAG, "Nodes count is unexpected"); + // To be investigated - https://github.com/hiero-ledger/hiero-consensus-node/issues/20993 + // ValidationAssertions.requireEqual(entityCounts.numSchedules(), scheduleCount.get(), ENTITY_ID_COUNT_TAG, + // "Schedule count is unexpected"); + // ValidationAssertions.requireEqual( + // entityCounts.numContractStorageSlots(), + // contractStorageCount.get(), + // ENTITY_ID_COUNT_TAG, + // "Contract storage count is unexpected"); + ValidationAssertions.requireEqual( + entityCounts.numContractBytecodes(), + contractBytecodeCount.get(), + ENTITY_ID_COUNT_TAG, + "Contract count is unexpected"); + ValidationAssertions.requireEqual( + entityCounts.numHooks(), hookCount.get(), ENTITY_ID_COUNT_TAG, "Hook count is unexpected"); + ValidationAssertions.requireEqual( + entityCounts.numLambdaStorageSlots(), + labmbdaStorageCount.get(), + ENTITY_ID_COUNT_TAG, + "Lambda slot count is unexpected"); + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdUniquenessValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdUniquenessValidator.java new file mode 100644 index 000000000000..acc8c6a9af47 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdUniquenessValidator.java @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.validator; + +import static com.hedera.node.app.service.consensus.impl.schemas.V0490ConsensusSchema.TOPICS_STATE_ID; +import static com.hedera.node.app.service.contract.impl.schemas.V0490ContractSchema.BYTECODE_STATE_ID; +import static com.hedera.node.app.service.file.impl.schemas.V0490FileSchema.FILES_STATE_ID; +import static com.hedera.node.app.service.schedule.impl.schemas.V0490ScheduleSchema.SCHEDULES_BY_ID_STATE_ID; +import static com.hedera.node.app.service.token.impl.schemas.V0490TokenSchema.ACCOUNTS_STATE_ID; +import static com.hedera.node.app.service.token.impl.schemas.V0490TokenSchema.TOKENS_STATE_ID; + +import com.hedera.hapi.node.base.AccountID; +import com.hedera.hapi.node.base.ContractID; +import com.hedera.hapi.node.base.FileID; +import com.hedera.hapi.node.base.ScheduleID; +import com.hedera.hapi.node.base.TokenID; +import com.hedera.hapi.node.base.TopicID; +import com.hedera.hapi.node.state.consensus.Topic; +import com.hedera.hapi.node.state.contract.Bytecode; +import com.hedera.hapi.node.state.file.File; +import com.hedera.hapi.node.state.schedule.Schedule; +import com.hedera.hapi.node.state.token.Account; +import com.hedera.hapi.node.state.token.Token; +import com.hedera.hapi.platform.state.StateKey; +import com.hedera.node.app.service.consensus.ConsensusService; +import com.hedera.node.app.service.contract.ContractService; +import com.hedera.node.app.service.file.FileService; +import com.hedera.node.app.service.schedule.ScheduleService; +import com.hedera.node.app.service.token.TokenService; +import com.hedera.pbj.runtime.ParseException; +import com.hedera.statevalidation.poc.util.ValidationAssertions; +import com.hedera.statevalidation.poc.validator.api.LeafBytesValidator; +import com.swirlds.state.MerkleNodeState; +import com.swirlds.state.spi.ReadableKVState; +import com.swirlds.virtualmap.datasource.VirtualLeafBytes; +import edu.umd.cs.findbugs.annotations.NonNull; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +public class EntityIdUniquenessValidator implements LeafBytesValidator { + + private static final Logger log = LogManager.getLogger(EntityIdUniquenessValidator.class); + + public static final String ENTITY_ID_UNIQUENESS_TAG = "entityIdUniqueness"; + private static final long IMPERMISSIBLE_ENTITY_ID = -1L; + + private ReadableKVState tokensState; + private ReadableKVState accountState; + private ReadableKVState smartContractState; + private ReadableKVState topicState; + private ReadableKVState fileState; + private ReadableKVState scheduleState; + + private final AtomicInteger issuesFound = new AtomicInteger(0); + + @Override + public String getTag() { + return ENTITY_ID_UNIQUENESS_TAG; + } + + @Override + public void initialize(@NonNull MerkleNodeState state) { + this.tokensState = state.getReadableStates(TokenService.NAME).get(TOKENS_STATE_ID); + this.accountState = state.getReadableStates(TokenService.NAME).get(ACCOUNTS_STATE_ID); + this.smartContractState = state.getReadableStates(ContractService.NAME).get(BYTECODE_STATE_ID); + this.topicState = state.getReadableStates(ConsensusService.NAME).get(TOPICS_STATE_ID); + this.fileState = state.getReadableStates(FileService.NAME).get(FILES_STATE_ID); + this.scheduleState = state.getReadableStates(ScheduleService.NAME).get(SCHEDULES_BY_ID_STATE_ID); + } + + @Override + public void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBytes) { + long entityId = IMPERMISSIBLE_ENTITY_ID; + + try { + StateKey key = StateKey.PROTOBUF.parse(leafBytes.keyBytes()); + switch (key.key().kind()) { + case TOKENSERVICE_I_TOKENS -> { + final TokenID tokenId = key.key().as(); + entityId = tokenId.tokenNum(); + } + case TOKENSERVICE_I_ACCOUNTS -> { + final AccountID accountId = key.key().as(); + entityId = accountId.accountNumOrElse(IMPERMISSIBLE_ENTITY_ID); + } + case CONTRACTSERVICE_I_BYTECODE -> { + final ContractID contractId = key.key().as(); + entityId = contractId.contractNumOrElse(IMPERMISSIBLE_ENTITY_ID); + } + case CONSENSUSSERVICE_I_TOPICS -> { + final TopicID topicId = key.key().as(); + entityId = topicId.topicNum(); + } + case FILESERVICE_I_FILES -> { + final FileID fileId = key.key().as(); + entityId = fileId.fileNum(); + } + case SCHEDULESERVICE_I_SCHEDULES_BY_ID -> { + final ScheduleID scheduleId = key.key().as(); + entityId = scheduleId.scheduleNum(); + } + } + } catch (ParseException e) { + throw new RuntimeException(e); + } + + if (entityId != IMPERMISSIBLE_ENTITY_ID) { + checkEntityUniqueness(entityId); + } + } + + @Override + public void validate() { + ValidationAssertions.requireEqual(0, issuesFound.get(), ENTITY_ID_UNIQUENESS_TAG); + } + + private void checkEntityUniqueness(long entityId) { + int counter = 0; + final Token token = tokensState.get(new TokenID(0, 0, entityId)); + if (token != null) { + counter++; + } + + final Account account = + accountState.get(AccountID.newBuilder().accountNum(entityId).build()); + if (account != null) { + counter++; + } + + final Bytecode contract = smartContractState.get( + ContractID.newBuilder().contractNum(entityId).build()); + + if (contract != null) { + counter++; + } + + final Topic topic = + topicState.get(TopicID.newBuilder().topicNum(entityId).build()); + + if (topic != null) { + counter++; + } + + final File file = fileState.get(FileID.newBuilder().fileNum(entityId).build()); + if (file != null) { + counter++; + } + + final Schedule schedule = scheduleState.get(new ScheduleID(0, 0, entityId)); + if (schedule != null) { + counter++; + } + + if (counter > 1) { + if (account != null && contract != null) { + // if it's a smart contract account, we expect it to have a contract with matching id + return; + } + + final String errorMessage = String.format( + """ + Entity ID %d is not unique, found %d entities.\s + Token = %s, \ + \s + Account = %s,\s + Contract = %s, \s + Topic = %s,\s + File = %s,\s + Schedule = %s + """, + entityId, counter, token, account, contract, topic, file, schedule); + log.info(errorMessage); + issuesFound.incrementAndGet(); + } + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HashRecordIntegrityValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HashRecordIntegrityValidator.java new file mode 100644 index 000000000000..e395066be053 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HashRecordIntegrityValidator.java @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.validator; + +import com.hedera.statevalidation.poc.util.ValidationAssertions; +import com.hedera.statevalidation.poc.validator.api.HashRecordValidator; +import com.swirlds.state.MerkleNodeState; +import com.swirlds.virtualmap.datasource.VirtualHashRecord; +import edu.umd.cs.findbugs.annotations.NonNull; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +public class HashRecordIntegrityValidator implements HashRecordValidator { + + private static final Logger log = LogManager.getLogger(HashRecordIntegrityValidator.class); + + public static final String INTERNAL_TAG = "internal"; + + private final AtomicInteger totalEntriesProcessed = new AtomicInteger(0); + + @Override + public String getTag() { + return INTERNAL_TAG; + } + + @Override + public void initialize(@NonNull final MerkleNodeState state) {} + + @Override + public void processHashRecord(@NonNull final VirtualHashRecord hashRecord) { + ValidationAssertions.requireNonNull(hashRecord.hash(), INTERNAL_TAG); + totalEntriesProcessed.incrementAndGet(); + } + + @Override + public void validate() { + log.debug("Successfully checked {} VirtualHashRecord entries", totalEntriesProcessed.get()); + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HdhmBucketIntegrityValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HdhmBucketIntegrityValidator.java new file mode 100644 index 000000000000..7530a9d68e12 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HdhmBucketIntegrityValidator.java @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.validator; + +import static com.hedera.statevalidation.util.ConfigUtils.COLLECTED_INFO_THRESHOLD; +import static com.hedera.statevalidation.util.LogUtils.printFileDataLocationError; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import com.hedera.hapi.platform.state.StateKey; +import com.hedera.pbj.runtime.ParseException; +import com.hedera.pbj.runtime.io.buffer.BufferedData; +import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.statevalidation.poc.util.ValidationAssertions; +import com.hedera.statevalidation.poc.validator.api.HdhmBucketValidator; +import com.hedera.statevalidation.util.reflect.BucketIterator; +import com.swirlds.merkledb.MerkleDbDataSource; +import com.swirlds.merkledb.collections.LongList; +import com.swirlds.merkledb.files.DataFileCollection; +import com.swirlds.merkledb.files.hashmap.ParsedBucket; +import com.swirlds.state.MerkleNodeState; +import com.swirlds.virtualmap.VirtualMap; +import com.swirlds.virtualmap.datasource.VirtualLeafBytes; +import edu.umd.cs.findbugs.annotations.NonNull; +import java.util.Objects; +import java.util.concurrent.CopyOnWriteArrayList; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +public class HdhmBucketIntegrityValidator implements HdhmBucketValidator { + + private static final Logger log = LogManager.getLogger(HdhmBucketIntegrityValidator.class); + + public static final String HDHM_TAG = "hdhm"; + + private DataFileCollection keyToPathDfc; + private DataFileCollection pathToKeyValueDfc; + private LongList pathToDiskLocationLeafNodes; + + private final CopyOnWriteArrayList stalePathsInfos = new CopyOnWriteArrayList<>(); + private final CopyOnWriteArrayList nullLeafsInfo = new CopyOnWriteArrayList<>(); + private final CopyOnWriteArrayList unexpectedKeyInfos = new CopyOnWriteArrayList<>(); + private final CopyOnWriteArrayList pathMismatchInfos = new CopyOnWriteArrayList<>(); + private final CopyOnWriteArrayList hashCodeMismatchInfos = new CopyOnWriteArrayList<>(); + + @Override + public String getTag() { + return HDHM_TAG; + } + + @Override + public void initialize(@NonNull final MerkleNodeState state) { + final VirtualMap virtualMap = (VirtualMap) state.getRoot(); + assertNotNull(virtualMap); + final MerkleDbDataSource vds = (MerkleDbDataSource) virtualMap.getDataSource(); + + this.pathToKeyValueDfc = vds.getPathToKeyValue().getFileCollection(); + this.keyToPathDfc = vds.getKeyToPath().getFileCollection(); + + this.pathToDiskLocationLeafNodes = vds.getPathToDiskLocationLeafNodes(); + } + + @Override + public void processBucket(long bucketLocation, @NonNull final ParsedBucket bucket) { + Objects.requireNonNull(pathToKeyValueDfc); + Objects.requireNonNull(keyToPathDfc); + Objects.requireNonNull(pathToDiskLocationLeafNodes); + + final int bucketIndex = bucket.getBucketIndex(); + + try { + var bucketIterator = new BucketIterator(bucket); + while (bucketIterator.hasNext()) { + final ParsedBucket.BucketEntry entry = bucketIterator.next(); + final Bytes keyBytes = entry.getKeyBytes(); + final long path = entry.getValue(); + // get path -> dataLocation + var dataLocation = pathToDiskLocationLeafNodes.get(path); + if (dataLocation == 0) { + printFileDataLocationError(log, "Stale path", keyToPathDfc, bucketLocation); + collectInfo(new StalePathInfo(path, parseKey(keyBytes)), stalePathsInfos); + continue; + } + final BufferedData leafData = pathToKeyValueDfc.readDataItem(dataLocation); + if (leafData == null) { + printFileDataLocationError(log, "Null leaf", keyToPathDfc, bucketLocation); + collectInfo(new NullLeafInfo(path, parseKey(keyBytes)), nullLeafsInfo); + continue; + } + final VirtualLeafBytes leafBytes = VirtualLeafBytes.parseFrom(leafData); + if (!keyBytes.equals(leafBytes.keyBytes())) { + printFileDataLocationError(log, "Leaf key mismatch", keyToPathDfc, bucketLocation); + collectInfo( + new UnexpectedKeyInfo(path, parseKey(keyBytes), parseKey(leafBytes.keyBytes())), + unexpectedKeyInfos); + } + if (leafBytes.path() != path) { + printFileDataLocationError(log, "Leaf path mismatch", keyToPathDfc, bucketLocation); + collectInfo(new PathMismatchInfo(path, leafBytes.path(), parseKey(keyBytes)), pathMismatchInfos); + continue; + } + final int hashCode = entry.getHashCode(); + if ((hashCode & bucketIndex) != bucketIndex) { + printFileDataLocationError(log, "Hash code mismatch", keyToPathDfc, bucketLocation); + collectInfo(new HashCodeMismatchInfo(hashCode, bucketIndex), hashCodeMismatchInfos); + } + } + } catch (Exception e) { + if (bucketLocation != 0) { + printFileDataLocationError(log, e.getMessage(), keyToPathDfc, bucketLocation); + } + } + } + + @Override + public void validate() { + if (!stalePathsInfos.isEmpty()) { + log.error("Stale path info:\n{}", stalePathsInfos); + log.error( + "There are {} records with stale paths, please check the logs for more info", + stalePathsInfos.size()); + } + + if (!nullLeafsInfo.isEmpty()) { + log.error("Null leaf info:\n{}", stalePathsInfos); + log.error( + "There are {} records with null leafs, please check the logs for more info", + stalePathsInfos.size()); + } + + if (!unexpectedKeyInfos.isEmpty()) { + log.error("Unexpected key info:\n{}", unexpectedKeyInfos); + log.error( + "There are {} records with unexpected keys, please check the logs for more info", + unexpectedKeyInfos.size()); + } + + if (!pathMismatchInfos.isEmpty()) { + log.error("Path mismatch info:\n{}", pathMismatchInfos); + log.error( + "There are {} records with mismatched paths, please check the logs for more info", + pathMismatchInfos.size()); + } + + if (!hashCodeMismatchInfos.isEmpty()) { + log.error("Hash code mismatch info:\n{}", hashCodeMismatchInfos); + log.error( + "There are {} records with mismatch hash codes, please, check the logs for more info", + hashCodeMismatchInfos.size()); + } + + ValidationAssertions.requireTrue( + stalePathsInfos.isEmpty() + && nullLeafsInfo.isEmpty() + && unexpectedKeyInfos.isEmpty() + && pathMismatchInfos.isEmpty() + && hashCodeMismatchInfos.isEmpty(), + HDHM_TAG, + "One of the test condition hasn't been met. " + + "Conditions: " + + ("stalePathsInfos.isEmpty() = %s, " + + "nullLeafsInfo.isEmpty() = %s, " + + "unexpectedKeyInfos.isEmpty() = %s, " + + "pathMismatchInfos.isEmpty() = %s, " + + "hashCodeMismatchInfos.isEmpty() = %s") + .formatted( + stalePathsInfos.isEmpty(), + nullLeafsInfo.isEmpty(), + unexpectedKeyInfos.isEmpty(), + pathMismatchInfos.isEmpty(), + hashCodeMismatchInfos.isEmpty())); + } + + // --- + + private static StateKey parseKey(Bytes keyBytes) throws ParseException { + return StateKey.PROTOBUF.parse(keyBytes); + } + + private static void collectInfo(T info, CopyOnWriteArrayList list) { + if (COLLECTED_INFO_THRESHOLD == 0 || list.size() < COLLECTED_INFO_THRESHOLD) { + list.add(info); + } + } + + // Bucket entry path is not found in the leaf index + record StalePathInfo(long path, StateKey key) { + @Override + @NonNull + public String toString() { + return "StalePathInfo{" + "path=" + path + ", key=" + key + "}\n"; + } + } + + // Bucket entry path is in the leaf index, but leaf data cannot be loaded + private record NullLeafInfo(long path, StateKey key) { + @Override + @NonNull + public String toString() { + return "NullLeafInfo{" + "path=" + path + ", key=" + key + "}\n"; + } + } + + // Bucket entry key doesn't match leaf key, leaf is loaded by entry path + record UnexpectedKeyInfo(long path, StateKey expectedKey, StateKey actualKey) { + @Override + @NonNull + public String toString() { + return "UnexpectedKeyInfo{" + "path=" + + path + ", expectedKey=" + + expectedKey + ", actualKey=" + + actualKey + "}\n"; + } + } + + // Bucket entry path doesn't match leaf path, leaf is loaded by entry path + private record PathMismatchInfo(long expectedPath, long actualPath, StateKey key) { + @Override + @NonNull + public String toString() { + return "PathMismatchInfo{" + "expectedPath=" + + expectedPath + ", actualPath=" + + actualPath + ", key=" + + key + "}\n"; + } + } + + // Bucket entry hash code doesn't match bucket index (modulo HDHM resize) + private record HashCodeMismatchInfo(int entryHashCode, int bucketIndex) { + @Override + @NonNull + public String toString() { + return "HashCodeMismatchInfo{" + "entryHashCode=" + entryHashCode + ", bucketIndex=" + bucketIndex + "}\n"; + } + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/LeafBytesIntegrityValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/LeafBytesIntegrityValidator.java new file mode 100644 index 000000000000..4e1f0ef501cf --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/LeafBytesIntegrityValidator.java @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.validator; + +import static com.hedera.statevalidation.util.LogUtils.printFileDataLocationError; + +import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.statevalidation.poc.util.ValidationAssertions; +import com.hedera.statevalidation.poc.validator.api.LeafBytesValidator; +import com.swirlds.merkledb.MerkleDbDataSource; +import com.swirlds.merkledb.files.DataFileCollection; +import com.swirlds.merkledb.files.hashmap.HalfDiskHashMap; +import com.swirlds.state.MerkleNodeState; +import com.swirlds.virtualmap.VirtualMap; +import com.swirlds.virtualmap.datasource.VirtualLeafBytes; +import edu.umd.cs.findbugs.annotations.NonNull; +import java.io.IOException; +import java.util.Objects; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +public class LeafBytesIntegrityValidator implements LeafBytesValidator { + + private static final Logger log = LogManager.getLogger(LeafBytesIntegrityValidator.class); + + public static final String LEAF_TAG = "leaf"; + + private VirtualMap virtualMap; + private DataFileCollection pathToKeyValueDfc; + private HalfDiskHashMap keyToPath; + + private final AtomicInteger successCount = new AtomicInteger(0); + private final AtomicInteger exceptionCount = new AtomicInteger(0); + + @Override + public String getTag() { + return LEAF_TAG; + } + + @Override + public void initialize(@NonNull final MerkleNodeState state) { + this.virtualMap = (VirtualMap) state.getRoot(); + final MerkleDbDataSource vds = (MerkleDbDataSource) virtualMap.getDataSource(); + this.pathToKeyValueDfc = vds.getPathToKeyValue().getFileCollection(); + this.keyToPath = vds.getKeyToPath(); + } + + @Override + public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { + Objects.requireNonNull(virtualMap); + Objects.requireNonNull(pathToKeyValueDfc); + Objects.requireNonNull(keyToPath); + + try { + final Bytes keyBytes = leafBytes.keyBytes(); + final Bytes valueBytes = leafBytes.valueBytes(); + final long p2KvPath = leafBytes.path(); + long k2pPath = keyToPath.get(keyBytes, -1); + + ValidationAssertions.requireEqual(p2KvPath, k2pPath, LEAF_TAG); + ValidationAssertions.requireEqual(valueBytes, virtualMap.getBytes(keyBytes), LEAF_TAG); + + successCount.incrementAndGet(); + } catch (IOException e) { + exceptionCount.incrementAndGet(); + printFileDataLocationError(log, e.getMessage(), pathToKeyValueDfc, dataLocation); + } + } + + @Override + public void validate() { + log.debug("Successfully checked {} VirtualLeafBytes entries", successCount.get()); + ValidationAssertions.requireEqual(0, exceptionCount.get(), LEAF_TAG, "Some read operations failed"); + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/TokenRelationsIntegrityValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/TokenRelationsIntegrityValidator.java new file mode 100644 index 000000000000..ec083047ff37 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/TokenRelationsIntegrityValidator.java @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.validator; + +import static com.swirlds.state.merkle.StateUtils.getStateKeyForKv; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import com.hedera.hapi.node.base.AccountID; +import com.hedera.hapi.node.base.TokenID; +import com.hedera.hapi.node.state.common.EntityIDPair; +import com.hedera.hapi.node.state.token.TokenRelation; +import com.hedera.node.app.service.entityid.EntityIdService; +import com.hedera.node.app.service.entityid.ReadableEntityIdStore; +import com.hedera.node.app.service.entityid.impl.ReadableEntityIdStoreImpl; +import com.hedera.node.app.service.token.impl.schemas.V0490TokenSchema; +import com.hedera.pbj.runtime.ParseException; +import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.statevalidation.poc.util.ValidationAssertions; +import com.hedera.statevalidation.poc.validator.api.LeafBytesValidator; +import com.swirlds.state.MerkleNodeState; +import com.swirlds.state.merkle.StateKeyUtils; +import com.swirlds.state.merkle.StateValue; +import com.swirlds.virtualmap.VirtualMap; +import com.swirlds.virtualmap.datasource.VirtualLeafBytes; +import edu.umd.cs.findbugs.annotations.NonNull; +import java.util.Objects; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +public class TokenRelationsIntegrityValidator implements LeafBytesValidator { + + private static final Logger log = LogManager.getLogger(TokenRelationsIntegrityValidator.class); + + public static final String TOKEN_RELATIONS_TAG = "tokenRelations"; + + private VirtualMap virtualMap; + private long numTokenRelations = 0L; + + private final AtomicInteger objectsProcessed = new AtomicInteger(0); + private final AtomicInteger accountFailCounter = new AtomicInteger(0); + private final AtomicInteger tokenFailCounter = new AtomicInteger(0); + + @Override + public String getTag() { + return TOKEN_RELATIONS_TAG; + } + + @Override + public void initialize(@NonNull MerkleNodeState state) { + this.virtualMap = (VirtualMap) state.getRoot(); + + final ReadableEntityIdStore entityCounters = + new ReadableEntityIdStoreImpl(state.getReadableStates(EntityIdService.NAME)); + + this.numTokenRelations = entityCounters.numTokenRelations(); + log.debug("Number of token relations: {}", numTokenRelations); + } + + @Override + public void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBytes) { + Objects.requireNonNull(virtualMap); + + final Bytes keyBytes = leafBytes.keyBytes(); + final Bytes valueBytes = leafBytes.valueBytes(); + final int readKeyStateId = StateKeyUtils.extractStateIdFromStateKeyOneOf(keyBytes); + final int readValueStateId = StateValue.extractStateIdFromStateValueOneOf(valueBytes); + if ((readKeyStateId == V0490TokenSchema.TOKEN_RELS_STATE_ID) + && (readValueStateId == V0490TokenSchema.TOKEN_RELS_STATE_ID)) { + try { + final com.hedera.hapi.platform.state.StateKey stateKey = + com.hedera.hapi.platform.state.StateKey.PROTOBUF.parse(keyBytes); + + final EntityIDPair entityIDPair = stateKey.key().as(); + final AccountID accountId1 = entityIDPair.accountId(); + final TokenID tokenId1 = entityIDPair.tokenId(); + + final com.hedera.hapi.platform.state.StateValue stateValue = + com.hedera.hapi.platform.state.StateValue.PROTOBUF.parse(valueBytes); + final TokenRelation tokenRelation = stateValue.value().as(); + final AccountID accountId2 = tokenRelation.accountId(); + final TokenID tokenId2 = tokenRelation.tokenId(); + + assertNotNull(accountId1); + assertNotNull(tokenId1); + assertNotNull(accountId2); + assertNotNull(tokenId2); + + assertEquals(accountId1, accountId2); + assertEquals(tokenId1, tokenId2); + + if (!virtualMap.containsKey( + getStateKeyForKv(V0490TokenSchema.ACCOUNTS_STATE_ID, accountId1, AccountID.PROTOBUF))) { + accountFailCounter.incrementAndGet(); + } + + if (!virtualMap.containsKey( + getStateKeyForKv(V0490TokenSchema.TOKENS_STATE_ID, tokenId1, TokenID.PROTOBUF))) { + tokenFailCounter.incrementAndGet(); + } + objectsProcessed.incrementAndGet(); + } catch (final ParseException e) { + throw new RuntimeException("Failed to parse a key", e); + } + } + } + + @Override + public void validate() { + ValidationAssertions.requireEqual(objectsProcessed.get(), numTokenRelations, TOKEN_RELATIONS_TAG); + ValidationAssertions.requireEqual(0, accountFailCounter.get(), TOKEN_RELATIONS_TAG); + ValidationAssertions.requireEqual(0, tokenFailCounter.get(), TOKEN_RELATIONS_TAG); + } +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HashRecordValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HashRecordValidator.java new file mode 100644 index 000000000000..b9b496f1deca --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HashRecordValidator.java @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.validator.api; + +import com.swirlds.virtualmap.datasource.VirtualHashRecord; +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * Marker interface for validators that can process virtual hash records + * to validate internal indexes. + */ +public interface HashRecordValidator extends Validator { + void processHashRecord(@NonNull VirtualHashRecord virtualHashRecord); +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HdhmBucketValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HdhmBucketValidator.java new file mode 100644 index 000000000000..78fea87e6f80 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HdhmBucketValidator.java @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.validator.api; + +import com.swirlds.merkledb.files.hashmap.ParsedBucket; +import edu.umd.cs.findbugs.annotations.NonNull; + +public interface HdhmBucketValidator extends Validator { + void processBucket(long bucketLocation, @NonNull ParsedBucket bucket); +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/LeafBytesValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/LeafBytesValidator.java new file mode 100644 index 000000000000..4a0640788ef1 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/LeafBytesValidator.java @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.validator.api; + +import com.swirlds.virtualmap.datasource.VirtualLeafBytes; +import edu.umd.cs.findbugs.annotations.NonNull; + +public interface LeafBytesValidator extends Validator { + void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBytes); +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/Validator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/Validator.java new file mode 100644 index 000000000000..19b53d350525 --- /dev/null +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/Validator.java @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.statevalidation.poc.validator.api; + +import com.swirlds.state.MerkleNodeState; +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * Base interface for all validators with a clear lifecycle. + */ +public interface Validator { + + String getTag(); + + void initialize(@NonNull MerkleNodeState state); + + /** + * Finalize validation and assert results. + * Called once after all data processing is complete. + */ + void validate(); +} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/util/LogUtils.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/util/LogUtils.java index d7de7b7b1e33..be370e49c040 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/util/LogUtils.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/util/LogUtils.java @@ -3,6 +3,7 @@ import static com.swirlds.merkledb.files.DataFileCommon.dataLocationToString; +import com.hedera.statevalidation.poc.model.ItemData; import com.swirlds.merkledb.files.DataFileCollection; import com.swirlds.merkledb.files.DataFileReader; import edu.umd.cs.findbugs.annotations.NonNull; @@ -29,4 +30,24 @@ public static void printFileDataLocationError( logger.error("Metadata: {}", a.getMetadata()); }); } + + // poc + public static void printFileDataLocationErrorPoc( + @NonNull final Logger logger, + @NonNull final String message, + @NonNull final DataFileCollection dfc, + @NonNull final ItemData itemData) { + final List dataFiles = dfc.getAllCompletedFiles(); + logger.error("Error! Details: {}", message); + logger.error("Item Data: {}", itemData); + if (itemData.location() != -1) { + logger.error("Data location: {}", dataLocationToString(itemData.location())); + } + logger.error("Data file collection: "); + dataFiles.forEach(a -> { + logger.error("File: {}", a.getPath()); + logger.error("Size: {}", a.getSize()); + logger.error("Metadata: {}", a.getMetadata()); + }); + } } diff --git a/hedera-state-validator/src/main/resources/log4j2.xml b/hedera-state-validator/src/main/resources/log4j2.xml index 81a4b27fb800..32c2b8ac0aac 100644 --- a/hedera-state-validator/src/main/resources/log4j2.xml +++ b/hedera-state-validator/src/main/resources/log4j2.xml @@ -1,6 +1,11 @@ + + + + + @@ -29,8 +34,10 @@ + - + + From 644f502147c965b70cc2fb491315e345a664e9e1 Mon Sep 17 00:00:00 2001 From: Nikita Lebedev Date: Fri, 5 Dec 2025 11:53:06 +0200 Subject: [PATCH 4/6] concurrency and exception handling Signed-off-by: Nikita Lebedev --- .../statevalidation/Validate2Command.java | 378 +++++++++--------- .../poc/listener/ValidationListener.java | 6 + .../poc/pipeline/ChunkedFileIterator.java | 83 ++-- .../poc/pipeline/ProcessorTask.java | 123 +++--- .../poc/util/ValidationException.java | 3 + .../validator/AccountAndSupplyValidator.java | 18 +- .../poc/validator/EntityIdCountValidator.java | 53 ++- .../EntityIdUniquenessValidator.java | 25 +- .../HashRecordIntegrityValidator.java | 2 +- .../HdhmBucketIntegrityValidator.java | 9 +- .../LeafBytesIntegrityValidator.java | 6 +- .../TokenRelationsIntegrityValidator.java | 24 +- .../poc/validator/api/Validator.java | 8 + 13 files changed, 387 insertions(+), 351 deletions(-) diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java index 2c2f3c878dfe..c28ccb2e0017 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java @@ -45,11 +45,11 @@ import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CopyOnWriteArraySet; -import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -83,7 +83,7 @@ public class Validate2Command implements Runnable { @Option( names = {"-q", "--queue-capacity"}, description = "Queue capacity for backpressure control.") - private int queueCapacity = 1000; + private int queueCapacity = 100; @Option( names = {"-b", "--batch-size"}, @@ -121,196 +121,202 @@ private Validate2Command() {} @Override public void run() { - try { - try (ExecutorService ioPool = Executors.newFixedThreadPool(ioThreads)) { - try (ExecutorService processPool = Executors.newFixedThreadPool(processThreads)) { - final BlockingQueue> dataQueue = new LinkedBlockingQueue<>(queueCapacity); - - final long startTime = System.currentTimeMillis(); - final AtomicLong totalBoundarySearchMillis = new AtomicLong(0L); - - // Initialize state and get data file collections - parent.initializeStateDir(); - final DeserializedSignedState deserializedSignedState = StateUtils.getDeserializedSignedState(); - final MerkleNodeState state = - deserializedSignedState.reservedSignedState().get().getState(); - final VirtualMap virtualMap = (VirtualMap) state.getRoot(); - final MerkleDbDataSource vds = (MerkleDbDataSource) virtualMap.getDataSource(); - - final DataFileCollection pathToKeyValueDfc = - vds.getPathToKeyValue().getFileCollection(); - final DataFileCollection pathToHashDfc = - vds.getHashStoreDisk().getFileCollection(); - final DataFileCollection keyToPathDfc = vds.getKeyToPath().getFileCollection(); - - // Initialize validators and listeners - final List validationListeners = List.of(new LoggingValidationListener()); - final Map> validators = - createAndInitValidators(state, tags, validationListeners); - - int totalFiles = 0; - long globalTotalSize = 0L; - final List fileReadTasks = new ArrayList<>(); - - if (validators.containsKey(Type.P2KV)) { - totalFiles += pathToKeyValueDfc.getAllCompletedFiles().size(); - globalTotalSize += pathToKeyValueDfc.getAllCompletedFiles().stream() - .mapToLong(DataFileReader::getSize) - .sum(); - log.debug( - "P2KV file count: {}", - pathToKeyValueDfc.getAllCompletedFiles().size()); - } - if (validators.containsKey(Type.P2H)) { - totalFiles += pathToHashDfc.getAllCompletedFiles().size(); - globalTotalSize += pathToHashDfc.getAllCompletedFiles().stream() - .mapToLong(DataFileReader::getSize) - .sum(); - log.debug( - "P2H file count: {}", - pathToHashDfc.getAllCompletedFiles().size()); - } - if (validators.containsKey(Type.K2P)) { - totalFiles += keyToPathDfc.getAllCompletedFiles().size(); - globalTotalSize += keyToPathDfc.getAllCompletedFiles().stream() - .mapToLong(DataFileReader::getSize) - .sum(); - log.debug( - "K2P file count: {}", - keyToPathDfc.getAllCompletedFiles().size()); - } + try (ExecutorService ioPool = Executors.newFixedThreadPool(ioThreads)) { + try (ExecutorService processPool = Executors.newFixedThreadPool(processThreads)) { + final BlockingQueue> dataQueue = new LinkedBlockingQueue<>(queueCapacity); + + final long startTime = System.currentTimeMillis(); + final AtomicLong totalBoundarySearchMillis = new AtomicLong(0L); + + // Initialize state and get data file collections + parent.initializeStateDir(); + final DeserializedSignedState deserializedSignedState = StateUtils.getDeserializedSignedState(); + final MerkleNodeState state = + deserializedSignedState.reservedSignedState().get().getState(); + final VirtualMap virtualMap = (VirtualMap) state.getRoot(); + final MerkleDbDataSource vds = (MerkleDbDataSource) virtualMap.getDataSource(); + + final DataFileCollection pathToKeyValueDfc = + vds.getPathToKeyValue().getFileCollection(); + final DataFileCollection pathToHashDfc = vds.getHashStoreDisk().getFileCollection(); + final DataFileCollection keyToPathDfc = vds.getKeyToPath().getFileCollection(); + + // Initialize validators and listeners + final List validationListeners = List.of(new LoggingValidationListener()); + final Map> validators = + createAndInitValidators(state, tags, validationListeners); + + int totalFiles = 0; + long globalTotalSize = 0L; + final var fileReadTasks = new ArrayList(); + + if (validators.containsKey(Type.P2KV)) { + totalFiles += pathToKeyValueDfc.getAllCompletedFiles().size(); + globalTotalSize += pathToKeyValueDfc.getAllCompletedFiles().stream() + .mapToLong(DataFileReader::getSize) + .sum(); + log.debug( + "P2KV file count: {}", + pathToKeyValueDfc.getAllCompletedFiles().size()); + } + if (validators.containsKey(Type.P2H)) { + totalFiles += pathToHashDfc.getAllCompletedFiles().size(); + globalTotalSize += pathToHashDfc.getAllCompletedFiles().stream() + .mapToLong(DataFileReader::getSize) + .sum(); + log.debug( + "P2H file count: {}", + pathToHashDfc.getAllCompletedFiles().size()); + } + if (validators.containsKey(Type.K2P)) { + totalFiles += keyToPathDfc.getAllCompletedFiles().size(); + globalTotalSize += keyToPathDfc.getAllCompletedFiles().stream() + .mapToLong(DataFileReader::getSize) + .sum(); + log.debug( + "K2P file count: {}", + keyToPathDfc.getAllCompletedFiles().size()); + } - // Plan all file read tasks (calculate chunks for each file) - if (validators.containsKey(Type.P2KV)) { - fileReadTasks.addAll(planTasksFor(pathToKeyValueDfc, Type.P2KV, ioThreads, globalTotalSize)); - } - if (validators.containsKey(Type.P2H)) { - fileReadTasks.addAll(planTasksFor(pathToHashDfc, Type.P2H, ioThreads, globalTotalSize)); - } - if (validators.containsKey(Type.K2P)) { - fileReadTasks.addAll(planTasksFor(keyToPathDfc, Type.K2P, ioThreads, globalTotalSize)); - } + // Plan all file read tasks (calculate chunks for each file) + if (validators.containsKey(Type.P2KV)) { + fileReadTasks.addAll(planTasksFor(pathToKeyValueDfc, Type.P2KV, ioThreads, globalTotalSize)); + } + if (validators.containsKey(Type.P2H)) { + fileReadTasks.addAll(planTasksFor(pathToHashDfc, Type.P2H, ioThreads, globalTotalSize)); + } + if (validators.containsKey(Type.K2P)) { + fileReadTasks.addAll(planTasksFor(keyToPathDfc, Type.K2P, ioThreads, globalTotalSize)); + } - log.debug("File count: {}", totalFiles); - log.debug("Total data size: {} MB", globalTotalSize * BYTES_TO_MEBIBYTES); + log.debug("File count: {}", totalFiles); + log.debug("Total data size: {} MB", globalTotalSize * BYTES_TO_MEBIBYTES); - // Sort tasks: largest chunks first (better thread utilization) - fileReadTasks.sort((a, b) -> Long.compare(b.endByte - b.startByte, a.endByte - a.startByte)); + // Sort tasks: largest chunks first (better thread utilization) + fileReadTasks.sort((a, b) -> Long.compare(b.endByte - b.startByte, a.endByte - a.startByte)); - final int totalFileReadTasks = fileReadTasks.size(); + final int totalFileReadTasks = fileReadTasks.size(); - log.debug("Total file read tasks: {}", totalFileReadTasks); + log.debug("Total file read tasks: {}", totalFileReadTasks); - final CountDownLatch readerLatch = new CountDownLatch(totalFileReadTasks); - final CountDownLatch processorsLatch = new CountDownLatch(processThreads); + final DataStats dataStats = new DataStats(); - final DataStats dataStats = new DataStats(); + final List> processorFutures = new ArrayList<>(); + final List> ioFutures = new ArrayList<>(); - // Start processor threads - for (int i = 0; i < processThreads; i++) { - processPool.submit(new ProcessorTask( - validators, validationListeners, dataQueue, vds, dataStats, processorsLatch)); - } + // Start processor threads + for (int i = 0; i < processThreads; i++) { + processorFutures.add(processPool.submit( + new ProcessorTask(validators, validationListeners, dataQueue, vds, dataStats))); + } - // Submit all planned file read tasks to read file in chunks - for (final FileReadTask task : fileReadTasks) { - ioPool.submit(() -> { - try { - readFileChunk( - task.reader, - dataQueue, - task.type, - task.startByte, - task.endByte, - totalBoundarySearchMillis); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - e.printStackTrace(); // TODO: double check this exception - throw new RuntimeException("Reader interrupted", e); - } catch (Exception e) { - e.printStackTrace(); // TODO: double check this exception - throw new RuntimeException( - "Reader failed for chunk " + task.startByte + "-" + task.endByte, e); - } finally { - readerLatch.countDown(); - } - }); - } + // Submit all planned file read tasks + for (final FileReadTask task : fileReadTasks) { + ioFutures.add(ioPool.submit(() -> { + readFileChunk( + task.reader, + dataQueue, + task.type, + task.startByte, + task.endByte, + totalBoundarySearchMillis); + return null; + })); + } - // Wait for all readers to finish - readerLatch.await(); - ioPool.shutdown(); - if (!ioPool.awaitTermination(1, TimeUnit.MINUTES)) { - throw new RuntimeException("IO pool did not terminate within timeout"); + for (final Future future : ioFutures) { + try { + future.get(); + } catch (final ExecutionException e) { + ioPool.shutdownNow(); + processPool.shutdownNow(); + throw new RuntimeException("IO Task failed", e.getCause() != null ? e.getCause() : e); } + } - // Send one poison pill per processor - for (int i = 0; i < processThreads; i++) { - dataQueue.put(List.of(ItemData.poisonPill())); - } + // Send one poison pill per processor + for (int i = 0; i < processThreads; i++) { + dataQueue.put(List.of(ItemData.poisonPill())); + } - // Wait for processors to finish - processorsLatch.await(); - processPool.shutdown(); - if (!processPool.awaitTermination(1, TimeUnit.MINUTES)) { - throw new RuntimeException("Process pool did not terminate within timeout"); + for (final Future future : processorFutures) { + try { + future.get(); + } catch (final ExecutionException e) { + throw new RuntimeException("Processor Task failed", e.getCause() != null ? e.getCause() : e); } + } - validators - .values() - .forEach(validatorSet -> validatorSet.forEach(validator -> { - try { - validator.validate(); - validationListeners.forEach( - listener -> listener.onValidationCompleted(validator.getTag())); - } catch (ValidationException e) { - log.error("Validation failed: {}", e.getMessage()); - } - })); - - if (validators.containsKey(Type.P2KV)) { - log.info( - "P2KV (Path -> Key/Value) Data Stats: \n {}", - dataStats.getP2kv().toStringContent()); - } - if (validators.containsKey(Type.P2H)) { - log.info( - "P2H (Path -> Hash) Data Stats: \n {}", - dataStats.getP2h().toStringContent()); - } - if (validators.containsKey(Type.K2P)) { - log.info( - "K2P (Key -> Path) Data Stats: \n {}", - dataStats.getK2p().toStringContent()); + boolean anyValidationFailed = false; + for (var validatorSet : validators.values()) { + for (var validator : validatorSet) { + try { + validator.validate(); + validationListeners.forEach(listener -> listener.onValidationCompleted(validator.getTag())); + } catch (final ValidationException e) { + anyValidationFailed = true; + validationListeners.forEach(listener -> listener.onValidationFailed(e)); + } catch (final Exception e) { + anyValidationFailed = true; + validationListeners.forEach(listener -> listener.onValidationFailed(new ValidationException( + validator.getTag(), + "Unexpected exception during validation: " + e.getMessage(), + e))); + } } + } - log.info(dataStats); + if (validators.containsKey(Type.P2KV)) { + log.info( + "P2KV (Path -> Key/Value) Data Stats: \n {}", + dataStats.getP2kv().toStringContent()); + } + if (validators.containsKey(Type.P2H)) { + log.info( + "P2H (Path -> Hash) Data Stats: \n {}", + dataStats.getP2h().toStringContent()); + } + if (validators.containsKey(Type.K2P)) { + log.info( + "K2P (Key -> Path) Data Stats: \n {}", + dataStats.getK2p().toStringContent()); + } - // common validation for error reads - if (dataStats.hasErrorReads()) { - throw new RuntimeException("Error reads found. Full info: \n " + dataStats); - } + log.info(dataStats); + + // common validation for error reads + if (dataStats.hasErrorReads()) { + throw new RuntimeException("Error reads found. Full info: \n " + dataStats); + } - log.debug("Total boundary search time: {} ms", totalBoundarySearchMillis.get()); - log.debug("Total processing time: {} ms", System.currentTimeMillis() - startTime); + if (anyValidationFailed) { + throw new ValidationException("*", "One or more validators failed. Check logs for details."); } + + log.debug("Total boundary search time: {} ms", totalBoundarySearchMillis.get()); + log.debug("Total processing time: {} ms", System.currentTimeMillis() - startTime); } - } catch (Exception e) { - throw new RuntimeException(e); + } catch (final RuntimeException e) { + throw e; + } catch (final InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IllegalStateException("Validation interrupted", e); + } catch (final Exception e) { + throw new IllegalStateException("Validation failed unexpectedly", e); } } - private Map> createAndInitValidators( + private Map> createAndInitValidators( @NonNull final MerkleNodeState state, @NonNull final String[] tags, @NonNull final List validationListeners) { final Set tagSet = Set.of(tags); - final Map> validatorsMap = new HashMap<>(); + final Map> validatorsMap = new HashMap<>(); // 1. Populate map with validators that match supplied tags - final Set hashRecordValidators = new CopyOnWriteArraySet<>(); - final Validator hashRecordValidator = new HashRecordIntegrityValidator(); + final var hashRecordValidators = new CopyOnWriteArraySet(); + final var hashRecordValidator = new HashRecordIntegrityValidator(); if (tagSet.contains(hashRecordValidator.getTag())) { hashRecordValidators.add(hashRecordValidator); } @@ -318,8 +324,8 @@ private Map> createAndInitValidators( validatorsMap.put(Type.P2H, hashRecordValidators); } // hdhm - final Set hdhmBucketValidators = new CopyOnWriteArraySet<>(); - final Validator hdhmBucketValidator = new HdhmBucketIntegrityValidator(); + final var hdhmBucketValidators = new CopyOnWriteArraySet(); + final var hdhmBucketValidator = new HdhmBucketIntegrityValidator(); if (tagSet.contains(hdhmBucketValidator.getTag())) { hdhmBucketValidators.add(hdhmBucketValidator); } @@ -327,33 +333,33 @@ private Map> createAndInitValidators( validatorsMap.put(Type.K2P, hdhmBucketValidators); } // leaf, etc. - final Set leafBytesValidators = new CopyOnWriteArraySet<>(); - final Validator leafBytesValidator = new LeafBytesIntegrityValidator(); + final var leafBytesValidators = new CopyOnWriteArraySet(); + final var leafBytesValidator = new LeafBytesIntegrityValidator(); if (tagSet.contains(leafBytesValidator.getTag())) { leafBytesValidators.add(leafBytesValidator); } - final Validator accountValidator = new AccountAndSupplyValidator(); + final var accountValidator = new AccountAndSupplyValidator(); if (tagSet.contains(accountValidator.getTag())) { leafBytesValidators.add(accountValidator); } if (!leafBytesValidators.isEmpty()) { validatorsMap.put(Type.P2KV, leafBytesValidators); } - final Validator tokenRelationsValidator = new TokenRelationsIntegrityValidator(); + final var tokenRelationsValidator = new TokenRelationsIntegrityValidator(); if (tagSet.contains(tokenRelationsValidator.getTag())) { leafBytesValidators.add(tokenRelationsValidator); } if (!leafBytesValidators.isEmpty()) { validatorsMap.put(Type.P2KV, leafBytesValidators); } - final Validator entityIdCountValidator = new EntityIdCountValidator(); + final var entityIdCountValidator = new EntityIdCountValidator(); if (tagSet.contains(entityIdCountValidator.getTag())) { leafBytesValidators.add(entityIdCountValidator); } if (!leafBytesValidators.isEmpty()) { validatorsMap.put(Type.P2KV, leafBytesValidators); } - final Validator entityIdUniquenessValidator = new EntityIdUniquenessValidator(); + final var entityIdUniquenessValidator = new EntityIdUniquenessValidator(); if (tagSet.contains(entityIdUniquenessValidator.getTag())) { leafBytesValidators.add(entityIdUniquenessValidator); } @@ -362,32 +368,22 @@ private Map> createAndInitValidators( } // 2. Initialize validators and remove if initialization fails - // Use an iterator on the map values to allow safe removal of empty sets - final java.util.Iterator> mapIterator = - validatorsMap.values().iterator(); - while (mapIterator.hasNext()) { - final Set validatorSet = mapIterator.next(); - final java.util.Iterator validatorIterator = validatorSet.iterator(); - - while (validatorIterator.hasNext()) { - final Validator validator = validatorIterator.next(); + validatorsMap.values().removeIf(validatorSet -> { + validatorSet.removeIf(validator -> { validationListeners.forEach(listener -> listener.onValidationStarted(validator.getTag())); try { validator.initialize(state); - } catch (ValidationException e) { - validationListeners.forEach(listener -> listener.onValidationFailed(e)); - // 3. Remove validator entry if initialization failed - validatorIterator.remove(); + return false; // keep validator + } catch (final Exception e) { + validationListeners.forEach(listener -> listener.onValidationFailed( + new ValidationException(validator.getTag(), "Unexpected exception: " + e.getMessage(), e))); + return true; // remove validator } - } - - // Clean up: remove the entry from the map if no validators remain for this type - if (validatorSet.isEmpty()) { - mapIterator.remove(); - } - } + }); + return validatorSet.isEmpty(); // remove entry if no validators remain + }); - // 4. Return the fully initialized and cleaned map + // 3. Return the fully initialized and cleaned map return validatorsMap; } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationListener.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationListener.java index 63da2c0aa6ce..fba64f6fc6b0 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationListener.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationListener.java @@ -3,6 +3,12 @@ import com.hedera.statevalidation.poc.util.ValidationException; +/** + * Listener for validation lifecycle events. + * + *

Thread Safety: Implementations must be thread-safe as callbacks + * may be invoked concurrently from multiple processor threads. + */ public interface ValidationListener { default void onValidationStarted(String tag) {} diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ChunkedFileIterator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ChunkedFileIterator.java index 0805d70ab061..fe7e96df0263 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ChunkedFileIterator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ChunkedFileIterator.java @@ -28,6 +28,7 @@ import java.util.concurrent.atomic.AtomicLong; public class ChunkedFileIterator implements AutoCloseable { + // move to cmd params? private static final int BUFFER_SIZE = 128 * 1024; private final FileChannel channel; @@ -53,26 +54,37 @@ public ChunkedFileIterator( @NonNull final AtomicLong totalBoundarySearchMillis) throws IOException { this.channel = FileChannel.open(path, StandardOpenOption.READ); - this.metadata = metadata; - - this.startByte = startByte; - this.endByte = endByte; - - this.dataType = dataType; - - if (startByte > 0) { - // Find boundary, then position channel and open streams - final long startTime = System.currentTimeMillis(); - this.startByte += findBoundaryOffset(); - long boundaryOffsetSearchTime = System.currentTimeMillis() - startTime; - // System.out.println("Found boundary offset in:" + boundaryOffsetSearchTime + " ms"); - totalBoundarySearchMillis.addAndGet(boundaryOffsetSearchTime); - channel.position(this.startByte); - openStreams(); - } else { - // At file start - channel.position(startByte); - openStreams(); + try { + this.metadata = metadata; + + this.startByte = startByte; + this.endByte = endByte; + + this.dataType = dataType; + + if (startByte > 0) { + // Find boundary, then position channel and open streams + final long startTime = System.currentTimeMillis(); + this.startByte += findBoundaryOffset(); + // FIXME: update to nanos + final long boundaryOffsetSearchTime = System.currentTimeMillis() - startTime; + // System.out.println("Found boundary offset in:" + boundaryOffsetSearchTime + " ms"); + totalBoundarySearchMillis.addAndGet(boundaryOffsetSearchTime); + channel.position(this.startByte); + openStreams(); + } else { + // At file start + channel.position(startByte); + openStreams(); + } + } catch (final Exception e) { + // Ensure channel is closed if constructor fails after opening + try { + channel.close(); + } catch (final IOException closeEx) { + e.addSuppressed(closeEx); + } + throw e; } } @@ -143,44 +155,33 @@ private boolean isValidDataItem(@NonNull final BufferedData buffer) { } return switch (dataType) { + // Parsing without exception means valid data case P2H -> validateVirtualHashRecord(buffer); case P2KV -> validateVirtualLeafBytes(buffer); case K2P -> validateBucket(buffer); - default -> throw new IllegalStateException("Unexpected data type: " + dataType); + default -> false; }; - } catch (Exception e) { + } catch (final Exception e) { // Any parsing exception means invalid data return false; } } private boolean validateVirtualHashRecord(@NonNull final BufferedData buffer) { - try { - VirtualHashRecord.parseFrom(buffer); - return true; - } catch (Exception e) { - return false; - } + VirtualHashRecord.parseFrom(buffer); + return true; } private boolean validateVirtualLeafBytes(@NonNull final BufferedData buffer) { - try { - VirtualLeafBytes.parseFrom(buffer); - return true; - } catch (Exception e) { - return false; - } + VirtualLeafBytes.parseFrom(buffer); + return true; } private boolean validateBucket(@NonNull final BufferedData buffer) { - try { - final Bucket bucket = new ParsedBucket(); - bucket.readFrom(buffer); - return true; - } catch (Exception e) { - return false; - } + final Bucket bucket = new ParsedBucket(); + bucket.readFrom(buffer); + return true; } public boolean next() throws IOException { diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ProcessorTask.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ProcessorTask.java index 4aeae14eaa42..9f174ec5282e 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ProcessorTask.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ProcessorTask.java @@ -19,21 +19,21 @@ import edu.umd.cs.findbugs.annotations.NonNull; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.BlockingQueue; -import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Callable; +import java.util.concurrent.CopyOnWriteArraySet; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -public class ProcessorTask implements Runnable { +public class ProcessorTask implements Callable { private static final Logger log = LogManager.getLogger(ProcessorTask.class); private final List validationListeners; - private final Set p2kvValidators; - private final Set p2hValidators; - private final Set k2pValidators; + private final CopyOnWriteArraySet p2kvValidators; + private final CopyOnWriteArraySet p2hValidators; + private final CopyOnWriteArraySet k2pValidators; private final MerkleDbDataSource vds; @@ -45,15 +45,12 @@ public class ProcessorTask implements Runnable { private final DataStats dataStats; - private final CountDownLatch processorsLatch; - public ProcessorTask( - @NonNull final Map> validators, + @NonNull final Map> validators, @NonNull final List validationListeners, @NonNull final BlockingQueue> dataQueue, @NonNull final MerkleDbDataSource vds, - @NonNull final DataStats dataStats, - @NonNull final CountDownLatch processorsLatch) { + @NonNull final DataStats dataStats) { this.validationListeners = validationListeners; this.p2kvValidators = validators.get(Type.P2KV); @@ -69,12 +66,10 @@ public ProcessorTask( this.bucketIndexToBucketLocation = (LongList) vds.getKeyToPath().getBucketIndexToBucketLocation(); this.dataStats = dataStats; - - this.processorsLatch = processorsLatch; } @Override - public void run() { + public Void call() { try { while (true) { final List batch = dataQueue.take(); @@ -93,12 +88,11 @@ public void run() { break; } } - } catch (InterruptedException e) { - e.printStackTrace(); + } catch (final InterruptedException e) { Thread.currentThread().interrupt(); - } finally { - processorsLatch.countDown(); + log.error("Processor task interrupted."); } + return null; } private void processChunk(@NonNull final ItemData data) { @@ -116,20 +110,29 @@ private void processVirtualLeafBytes(@NonNull final ItemData data) { final VirtualLeafBytes virtualLeafBytes = VirtualLeafBytes.parseFrom(data.bytes().toReadableSequentialData()); - long path = virtualLeafBytes.path(); + final long path = virtualLeafBytes.path(); if (data.location() == pathToDiskLocationLeafNodes.get(path)) { // live object, perform ops on it... - try { - // Explicitly cast here. This is safe, explicit, and has negligible performance cost. - p2kvValidators.forEach(validator -> - ((LeafBytesValidator) validator).processLeafBytes(data.location(), virtualLeafBytes)); - } catch (ValidationException e) { - // remove validator from the set, so it won't be used again - p2kvValidators.removeIf(validator -> validator.getTag().equals(e.getValidatorTag())); - // notify listeners about the error, so they can log, etc. - validationListeners.forEach(listener -> listener.onValidationFailed(e)); + if (p2kvValidators == null || p2kvValidators.isEmpty()) { + return; } + p2kvValidators.forEach(validator -> { + try { + ((LeafBytesValidator) validator).processLeafBytes(data.location(), virtualLeafBytes); + } catch (final ValidationException e) { + // Remove validator and notify listeners only once (removeIf returns true only for the thread + // that removes) + if (p2kvValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { + validationListeners.forEach(listener -> listener.onValidationFailed(e)); + } + } catch (final Exception e) { + if (p2kvValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { + validationListeners.forEach(listener -> listener.onValidationFailed(new ValidationException( + validator.getTag(), "Unexpected exception: " + e.getMessage(), e))); + } + } + }); } else if (data.location() == -1) { dataStats.getP2kv().incrementInvalidLocationCount(); LogUtils.printFileDataLocationErrorPoc( @@ -142,7 +145,7 @@ private void processVirtualLeafBytes(@NonNull final ItemData data) { dataStats.getP2kv().addObsoleteSpaceSize(data.bytes().length()); dataStats.getP2kv().incrementObsoleteItemCount(); } - } catch (Exception e) { + } catch (final Exception e) { dataStats.getP2kv().incrementParseErrorCount(); LogUtils.printFileDataLocationErrorPoc( log, e.getMessage(), vds.getPathToKeyValue().getFileCollection(), data); @@ -160,16 +163,27 @@ private void processVirtualHashRecord(@NonNull final ItemData data) { if (data.location() == pathToDiskLocationInternalNodes.get(path)) { // live object, perform ops on it... - try { - // Explicitly cast here. This is safe, explicit, and has negligible performance cost. - p2hValidators.forEach( - validator -> ((HashRecordValidator) validator).processHashRecord(virtualHashRecord)); - } catch (ValidationException e) { - // remove validator from the set, so it won't be used again - p2hValidators.removeIf(validator -> validator.getTag().equals(e.getValidatorTag())); - // notify listeners about the error, so they can log, etc. - validationListeners.forEach(listener -> listener.onValidationFailed(e)); + if (p2hValidators == null || p2hValidators.isEmpty()) { + return; } + p2hValidators.forEach(validator -> { + try { + ((HashRecordValidator) validator).processHashRecord(virtualHashRecord); + } catch (final ValidationException e) { + // Remove validator and notify listeners only once (removeIf returns true only for the thread + // that removes) + if (p2hValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { + validationListeners.forEach(listener -> listener.onValidationFailed(e)); + } + } catch (final Exception e) { + // Remove validator and notify listeners only once (removeIf returns true only for the thread + // that removes) + if (p2hValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { + validationListeners.forEach(listener -> listener.onValidationFailed(new ValidationException( + validator.getTag(), "Unexpected exception: " + e.getMessage(), e))); + } + } + }); } else if (data.location() == -1) { dataStats.getP2h().incrementInvalidLocationCount(); LogUtils.printFileDataLocationErrorPoc( @@ -182,7 +196,7 @@ private void processVirtualHashRecord(@NonNull final ItemData data) { dataStats.getP2h().addObsoleteSpaceSize(data.bytes().length()); dataStats.getP2h().incrementObsoleteItemCount(); } - } catch (Exception e) { + } catch (final Exception e) { dataStats.getP2h().incrementParseErrorCount(); LogUtils.printFileDataLocationErrorPoc( log, e.getMessage(), vds.getHashStoreDisk().getFileCollection(), data); @@ -199,16 +213,27 @@ private void processBucket(@NonNull final ItemData data) { if (data.location() == bucketIndexToBucketLocation.get(bucket.getBucketIndex())) { // live object, perform ops on it... - try { - // Explicitly cast here. This is safe, explicit, and has negligible performance cost. - k2pValidators.forEach( - validator -> ((HdhmBucketValidator) validator).processBucket(data.location(), bucket)); - } catch (ValidationException e) { - // remove validator from the set, so it won't be used again - k2pValidators.removeIf(validator -> validator.getTag().equals(e.getValidatorTag())); - // notify listeners about the error, so they can log, etc. - validationListeners.forEach(listener -> listener.onValidationFailed(e)); + if (k2pValidators == null || k2pValidators.isEmpty()) { + return; } + k2pValidators.forEach(validator -> { + try { + ((HdhmBucketValidator) validator).processBucket(data.location(), bucket); + } catch (final ValidationException e) { + // Remove validator and notify listeners only once (removeIf returns true only for the thread + // that removes) + if (k2pValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { + validationListeners.forEach(listener -> listener.onValidationFailed(e)); + } + } catch (final Exception e) { + // Remove validator and notify listeners only once (removeIf returns true only for the thread + // that removes) + if (k2pValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { + validationListeners.forEach(listener -> listener.onValidationFailed(new ValidationException( + validator.getTag(), "Unexpected exception: " + e.getMessage(), e))); + } + } + }); } else if (data.location() == -1) { dataStats.getK2p().incrementInvalidLocationCount(); LogUtils.printFileDataLocationErrorPoc( @@ -221,7 +246,7 @@ private void processBucket(@NonNull final ItemData data) { dataStats.getK2p().addObsoleteSpaceSize(data.bytes().length()); dataStats.getK2p().incrementObsoleteItemCount(); } - } catch (Exception e) { + } catch (final Exception e) { dataStats.getK2p().incrementParseErrorCount(); LogUtils.printFileDataLocationErrorPoc( log, e.getMessage(), vds.getKeyToPath().getFileCollection(), data); diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationException.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationException.java index b0de881e8526..891c088ae68c 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationException.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationException.java @@ -1,6 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 package com.hedera.statevalidation.poc.util; +/** + * Exception thrown when a validation check fails. + */ public class ValidationException extends RuntimeException { private final String validatorTag; diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/AccountAndSupplyValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/AccountAndSupplyValidator.java index 1919e4345c87..9f4daf632c58 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/AccountAndSupplyValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/AccountAndSupplyValidator.java @@ -1,9 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 package com.hedera.statevalidation.poc.validator; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - import com.hedera.hapi.node.base.AccountID; import com.hedera.hapi.node.state.token.Account; import com.hedera.node.app.service.entityid.EntityIdService; @@ -22,6 +19,7 @@ import com.swirlds.virtualmap.VirtualMap; import com.swirlds.virtualmap.datasource.VirtualLeafBytes; import edu.umd.cs.findbugs.annotations.NonNull; +import java.util.Objects; import java.util.concurrent.atomic.AtomicLong; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -48,17 +46,17 @@ public String getTag() { } @Override - public void initialize(@NonNull MerkleNodeState state) { + public void initialize(@NonNull final MerkleNodeState state) { final VirtualMap virtualMap = (VirtualMap) state.getRoot(); - assertNotNull(virtualMap); + Objects.requireNonNull(virtualMap); final ReadableEntityIdStore entityCounters = new ReadableEntityIdStoreImpl(state.getReadableStates(EntityIdService.NAME)); final ReadableKVState accounts = state.getReadableStates(TokenServiceImpl.NAME).get(V0490TokenSchema.ACCOUNTS_STATE_ID); - assertNotNull(accounts); - assertNotNull(entityCounters); + Objects.requireNonNull(accounts); + Objects.requireNonNull(entityCounters); this.numAccounts = entityCounters.numAccounts(); log.debug("Number of accounts: {}", numAccounts); @@ -77,7 +75,7 @@ public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes com.hedera.hapi.platform.state.StateValue.PROTOBUF.parse(valueBytes); final Account account = stateValue.value().as(); final long tinybarBalance = account.tinybarBalance(); - assertTrue(tinybarBalance >= 0); + ValidationAssertions.requireTrue(tinybarBalance >= 0, getTag()); totalBalance.addAndGet(tinybarBalance); accountsCreated.incrementAndGet(); } catch (final ParseException e) { @@ -88,7 +86,7 @@ public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes @Override public void validate() { - ValidationAssertions.requireEqual(TOTAL_tHBAR_SUPPLY, totalBalance.get(), ACCOUNT_TAG); - ValidationAssertions.requireEqual(accountsCreated.get(), numAccounts, ACCOUNT_TAG); + ValidationAssertions.requireEqual(TOTAL_tHBAR_SUPPLY, totalBalance.get(), getTag()); + ValidationAssertions.requireEqual(accountsCreated.get(), numAccounts, getTag()); } } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdCountValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdCountValidator.java index 220a358de2bd..f88b78b9ffa6 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdCountValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdCountValidator.java @@ -13,6 +13,7 @@ import com.swirlds.state.spi.ReadableSingletonState; import com.swirlds.virtualmap.datasource.VirtualLeafBytes; import edu.umd.cs.findbugs.annotations.NonNull; +import java.util.Objects; import java.util.concurrent.atomic.AtomicLong; public class EntityIdCountValidator implements LeafBytesValidator { @@ -35,7 +36,7 @@ public class EntityIdCountValidator implements LeafBytesValidator { private final AtomicLong contractStorageCount = new AtomicLong(0); private final AtomicLong contractBytecodeCount = new AtomicLong(0); private final AtomicLong hookCount = new AtomicLong(0); - private final AtomicLong labmbdaStorageCount = new AtomicLong(0); + private final AtomicLong lambdaStorageCount = new AtomicLong(0); @Override public String getTag() { @@ -43,16 +44,16 @@ public String getTag() { } @Override - public void initialize(@NonNull MerkleNodeState state) { + public void initialize(@NonNull final MerkleNodeState state) { final ReadableSingletonState entityIdSingleton = state.getReadableStates(EntityIdService.NAME).getSingleton(ENTITY_COUNTS_STATE_ID); - this.entityCounts = entityIdSingleton.get(); + this.entityCounts = Objects.requireNonNull(entityIdSingleton.get()); } @Override - public void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBytes) { + public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { try { - StateKey key = StateKey.PROTOBUF.parse(leafBytes.keyBytes()); + final StateKey key = StateKey.PROTOBUF.parse(leafBytes.keyBytes()); switch (key.key().kind()) { case TOKENSERVICE_I_ACCOUNTS -> accountCount.incrementAndGet(); case TOKENSERVICE_I_ALIASES -> aliasesCount.incrementAndGet(); @@ -68,7 +69,7 @@ public void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBy case CONTRACTSERVICE_I_STORAGE -> contractStorageCount.incrementAndGet(); case CONTRACTSERVICE_I_BYTECODE -> contractBytecodeCount.incrementAndGet(); case CONTRACTSERVICE_I_EVM_HOOK_STATES -> hookCount.incrementAndGet(); - case CONTRACTSERVICE_I_LAMBDA_STORAGE -> labmbdaStorageCount.incrementAndGet(); + case CONTRACTSERVICE_I_LAMBDA_STORAGE -> lambdaStorageCount.incrementAndGet(); } } catch (ParseException e) { throw new RuntimeException(e); @@ -77,52 +78,46 @@ public void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBy @Override public void validate() { - ValidationAssertions.requireNonNull(entityCounts, ENTITY_ID_COUNT_TAG); + ValidationAssertions.requireNonNull(entityCounts, getTag()); ValidationAssertions.requireEqual( - entityCounts.numAccounts(), accountCount.get(), ENTITY_ID_COUNT_TAG, "Account count is unexpected"); + entityCounts.numAccounts(), accountCount.get(), getTag(), "Account count is unexpected"); ValidationAssertions.requireEqual( - entityCounts.numAliases(), aliasesCount.get(), ENTITY_ID_COUNT_TAG, "Alias count is unexpected"); + entityCounts.numAliases(), aliasesCount.get(), getTag(), "Alias count is unexpected"); ValidationAssertions.requireEqual( - entityCounts.numTokens(), tokenCount.get(), ENTITY_ID_COUNT_TAG, "Token count is unexpected"); + entityCounts.numTokens(), tokenCount.get(), getTag(), "Token count is unexpected"); ValidationAssertions.requireEqual( - entityCounts.numTokenRelations(), - tokenRelCount.get(), - ENTITY_ID_COUNT_TAG, - "Token relations count is unexpected"); + entityCounts.numTokenRelations(), tokenRelCount.get(), getTag(), "Token relations count is unexpected"); ValidationAssertions.requireEqual( - entityCounts.numNfts(), nftsCount.get(), ENTITY_ID_COUNT_TAG, "NFTs count is unexpected"); + entityCounts.numNfts(), nftsCount.get(), getTag(), "NFTs count is unexpected"); ValidationAssertions.requireEqual( - entityCounts.numAirdrops(), airdropsCount.get(), ENTITY_ID_COUNT_TAG, "Airdrops count is unexpected"); + entityCounts.numAirdrops(), airdropsCount.get(), getTag(), "Airdrops count is unexpected"); ValidationAssertions.requireEqual( - entityCounts.numStakingInfos(), - stakingInfoCount.get(), - ENTITY_ID_COUNT_TAG, - "Staking infos count is unexpected"); + entityCounts.numStakingInfos(), stakingInfoCount.get(), getTag(), "Staking infos count is unexpected"); ValidationAssertions.requireEqual( - entityCounts.numTopics(), topicCount.get(), ENTITY_ID_COUNT_TAG, "Topic count is unexpected"); + entityCounts.numTopics(), topicCount.get(), getTag(), "Topic count is unexpected"); ValidationAssertions.requireEqual( - entityCounts.numFiles(), fileCount.get(), ENTITY_ID_COUNT_TAG, "File count is unexpected"); + entityCounts.numFiles(), fileCount.get(), getTag(), "File count is unexpected"); ValidationAssertions.requireEqual( - entityCounts.numNodes(), nodesCount.get(), ENTITY_ID_COUNT_TAG, "Nodes count is unexpected"); + entityCounts.numNodes(), nodesCount.get(), getTag(), "Nodes count is unexpected"); // To be investigated - https://github.com/hiero-ledger/hiero-consensus-node/issues/20993 - // ValidationAssertions.requireEqual(entityCounts.numSchedules(), scheduleCount.get(), ENTITY_ID_COUNT_TAG, + // ValidationAssertions.requireEqual(entityCounts.numSchedules(), scheduleCount.get(), getTag(), // "Schedule count is unexpected"); // ValidationAssertions.requireEqual( // entityCounts.numContractStorageSlots(), // contractStorageCount.get(), - // ENTITY_ID_COUNT_TAG, + // getTag(), // "Contract storage count is unexpected"); ValidationAssertions.requireEqual( entityCounts.numContractBytecodes(), contractBytecodeCount.get(), - ENTITY_ID_COUNT_TAG, + getTag(), "Contract count is unexpected"); ValidationAssertions.requireEqual( - entityCounts.numHooks(), hookCount.get(), ENTITY_ID_COUNT_TAG, "Hook count is unexpected"); + entityCounts.numHooks(), hookCount.get(), getTag(), "Hook count is unexpected"); ValidationAssertions.requireEqual( entityCounts.numLambdaStorageSlots(), - labmbdaStorageCount.get(), - ENTITY_ID_COUNT_TAG, + lambdaStorageCount.get(), + getTag(), "Lambda slot count is unexpected"); } } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdUniquenessValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdUniquenessValidator.java index acc8c6a9af47..ca3dc4144e94 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdUniquenessValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdUniquenessValidator.java @@ -33,6 +33,7 @@ import com.swirlds.state.spi.ReadableKVState; import com.swirlds.virtualmap.datasource.VirtualLeafBytes; import edu.umd.cs.findbugs.annotations.NonNull; +import java.util.Objects; import java.util.concurrent.atomic.AtomicInteger; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -59,17 +60,23 @@ public String getTag() { } @Override - public void initialize(@NonNull MerkleNodeState state) { - this.tokensState = state.getReadableStates(TokenService.NAME).get(TOKENS_STATE_ID); - this.accountState = state.getReadableStates(TokenService.NAME).get(ACCOUNTS_STATE_ID); - this.smartContractState = state.getReadableStates(ContractService.NAME).get(BYTECODE_STATE_ID); - this.topicState = state.getReadableStates(ConsensusService.NAME).get(TOPICS_STATE_ID); - this.fileState = state.getReadableStates(FileService.NAME).get(FILES_STATE_ID); - this.scheduleState = state.getReadableStates(ScheduleService.NAME).get(SCHEDULES_BY_ID_STATE_ID); + public void initialize(@NonNull final MerkleNodeState state) { + this.tokensState = Objects.requireNonNull( + state.getReadableStates(TokenService.NAME).get(TOKENS_STATE_ID)); + this.accountState = Objects.requireNonNull( + state.getReadableStates(TokenService.NAME).get(ACCOUNTS_STATE_ID)); + this.smartContractState = Objects.requireNonNull( + state.getReadableStates(ContractService.NAME).get(BYTECODE_STATE_ID)); + this.topicState = Objects.requireNonNull( + state.getReadableStates(ConsensusService.NAME).get(TOPICS_STATE_ID)); + this.fileState = + Objects.requireNonNull(state.getReadableStates(FileService.NAME).get(FILES_STATE_ID)); + this.scheduleState = Objects.requireNonNull( + state.getReadableStates(ScheduleService.NAME).get(SCHEDULES_BY_ID_STATE_ID)); } @Override - public void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBytes) { + public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { long entityId = IMPERMISSIBLE_ENTITY_ID; try { @@ -111,7 +118,7 @@ public void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBy @Override public void validate() { - ValidationAssertions.requireEqual(0, issuesFound.get(), ENTITY_ID_UNIQUENESS_TAG); + ValidationAssertions.requireEqual(0, issuesFound.get(), getTag()); } private void checkEntityUniqueness(long entityId) { diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HashRecordIntegrityValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HashRecordIntegrityValidator.java index e395066be053..da8e04b61040 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HashRecordIntegrityValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HashRecordIntegrityValidator.java @@ -28,7 +28,7 @@ public void initialize(@NonNull final MerkleNodeState state) {} @Override public void processHashRecord(@NonNull final VirtualHashRecord hashRecord) { - ValidationAssertions.requireNonNull(hashRecord.hash(), INTERNAL_TAG); + ValidationAssertions.requireNonNull(hashRecord.hash(), getTag()); totalEntriesProcessed.incrementAndGet(); } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HdhmBucketIntegrityValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HdhmBucketIntegrityValidator.java index 7530a9d68e12..ff5b65431e50 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HdhmBucketIntegrityValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HdhmBucketIntegrityValidator.java @@ -3,7 +3,6 @@ import static com.hedera.statevalidation.util.ConfigUtils.COLLECTED_INFO_THRESHOLD; import static com.hedera.statevalidation.util.LogUtils.printFileDataLocationError; -import static org.junit.jupiter.api.Assertions.assertNotNull; import com.hedera.hapi.platform.state.StateKey; import com.hedera.pbj.runtime.ParseException; @@ -49,7 +48,7 @@ public String getTag() { @Override public void initialize(@NonNull final MerkleNodeState state) { final VirtualMap virtualMap = (VirtualMap) state.getRoot(); - assertNotNull(virtualMap); + Objects.requireNonNull(virtualMap); final MerkleDbDataSource vds = (MerkleDbDataSource) virtualMap.getDataSource(); this.pathToKeyValueDfc = vds.getPathToKeyValue().getFileCollection(); @@ -67,13 +66,13 @@ public void processBucket(long bucketLocation, @NonNull final ParsedBucket bucke final int bucketIndex = bucket.getBucketIndex(); try { - var bucketIterator = new BucketIterator(bucket); + final BucketIterator bucketIterator = new BucketIterator(bucket); while (bucketIterator.hasNext()) { final ParsedBucket.BucketEntry entry = bucketIterator.next(); final Bytes keyBytes = entry.getKeyBytes(); final long path = entry.getValue(); // get path -> dataLocation - var dataLocation = pathToDiskLocationLeafNodes.get(path); + final long dataLocation = pathToDiskLocationLeafNodes.get(path); if (dataLocation == 0) { printFileDataLocationError(log, "Stale path", keyToPathDfc, bucketLocation); collectInfo(new StalePathInfo(path, parseKey(keyBytes)), stalePathsInfos); @@ -153,7 +152,7 @@ public void validate() { && unexpectedKeyInfos.isEmpty() && pathMismatchInfos.isEmpty() && hashCodeMismatchInfos.isEmpty(), - HDHM_TAG, + getTag(), "One of the test condition hasn't been met. " + "Conditions: " + ("stalePathsInfos.isEmpty() = %s, " diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/LeafBytesIntegrityValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/LeafBytesIntegrityValidator.java index 4e1f0ef501cf..0b59c31cea4f 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/LeafBytesIntegrityValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/LeafBytesIntegrityValidator.java @@ -57,8 +57,8 @@ public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes final long p2KvPath = leafBytes.path(); long k2pPath = keyToPath.get(keyBytes, -1); - ValidationAssertions.requireEqual(p2KvPath, k2pPath, LEAF_TAG); - ValidationAssertions.requireEqual(valueBytes, virtualMap.getBytes(keyBytes), LEAF_TAG); + ValidationAssertions.requireEqual(p2KvPath, k2pPath, getTag()); + ValidationAssertions.requireEqual(valueBytes, virtualMap.getBytes(keyBytes), getTag()); successCount.incrementAndGet(); } catch (IOException e) { @@ -70,6 +70,6 @@ public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes @Override public void validate() { log.debug("Successfully checked {} VirtualLeafBytes entries", successCount.get()); - ValidationAssertions.requireEqual(0, exceptionCount.get(), LEAF_TAG, "Some read operations failed"); + ValidationAssertions.requireEqual(0, exceptionCount.get(), getTag(), "Some read operations failed"); } } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/TokenRelationsIntegrityValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/TokenRelationsIntegrityValidator.java index ec083047ff37..dfe9b6c26272 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/TokenRelationsIntegrityValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/TokenRelationsIntegrityValidator.java @@ -2,8 +2,6 @@ package com.hedera.statevalidation.poc.validator; import static com.swirlds.state.merkle.StateUtils.getStateKeyForKv; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; import com.hedera.hapi.node.base.AccountID; import com.hedera.hapi.node.base.TokenID; @@ -47,7 +45,7 @@ public String getTag() { } @Override - public void initialize(@NonNull MerkleNodeState state) { + public void initialize(@NonNull final MerkleNodeState state) { this.virtualMap = (VirtualMap) state.getRoot(); final ReadableEntityIdStore entityCounters = @@ -58,7 +56,7 @@ public void initialize(@NonNull MerkleNodeState state) { } @Override - public void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBytes) { + public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { Objects.requireNonNull(virtualMap); final Bytes keyBytes = leafBytes.keyBytes(); @@ -81,13 +79,13 @@ public void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBy final AccountID accountId2 = tokenRelation.accountId(); final TokenID tokenId2 = tokenRelation.tokenId(); - assertNotNull(accountId1); - assertNotNull(tokenId1); - assertNotNull(accountId2); - assertNotNull(tokenId2); + ValidationAssertions.requireNonNull(accountId1, getTag()); + ValidationAssertions.requireNonNull(tokenId1, getTag()); + ValidationAssertions.requireNonNull(accountId2, getTag()); + ValidationAssertions.requireNonNull(tokenId2, getTag()); - assertEquals(accountId1, accountId2); - assertEquals(tokenId1, tokenId2); + ValidationAssertions.requireEqual(accountId1, accountId2, getTag()); + ValidationAssertions.requireEqual(tokenId1, tokenId2, getTag()); if (!virtualMap.containsKey( getStateKeyForKv(V0490TokenSchema.ACCOUNTS_STATE_ID, accountId1, AccountID.PROTOBUF))) { @@ -107,8 +105,8 @@ public void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBy @Override public void validate() { - ValidationAssertions.requireEqual(objectsProcessed.get(), numTokenRelations, TOKEN_RELATIONS_TAG); - ValidationAssertions.requireEqual(0, accountFailCounter.get(), TOKEN_RELATIONS_TAG); - ValidationAssertions.requireEqual(0, tokenFailCounter.get(), TOKEN_RELATIONS_TAG); + ValidationAssertions.requireEqual(objectsProcessed.get(), numTokenRelations, getTag()); + ValidationAssertions.requireEqual(0, accountFailCounter.get(), getTag()); + ValidationAssertions.requireEqual(0, tokenFailCounter.get(), getTag()); } } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/Validator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/Validator.java index 19b53d350525..61f413f6d578 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/Validator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/Validator.java @@ -6,6 +6,14 @@ /** * Base interface for all validators with a clear lifecycle. + * + *

Thread Safety Contract: Validator implementations are invoked concurrently + * from multiple processor threads. They are safe to use because: + *

    + *
  • The state being validated is read-only (no concurrent writes)
  • + *
  • All counters/accumulators must use atomic types
  • + *
  • The underlying MerkleDB infrastructure supports concurrent reads
  • + *
*/ public interface Validator { From 31e7790d678446752903b438c38b6037d9af50a5 Mon Sep 17 00:00:00 2001 From: Nikita Lebedev Date: Mon, 8 Dec 2025 11:58:26 +0200 Subject: [PATCH 5/6] validation flow Signed-off-by: Nikita Lebedev --- .../statevalidation/Validate2Command.java | 28 +++++++++---------- ....java => ValidationExecutionListener.java} | 11 ++++++-- 2 files changed, 23 insertions(+), 16 deletions(-) rename hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/{LoggingValidationListener.java => ValidationExecutionListener.java} (74%) diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java index c28ccb2e0017..837183557118 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java @@ -13,7 +13,7 @@ import com.hedera.pbj.runtime.io.buffer.BufferedData; import com.hedera.pbj.runtime.io.buffer.Bytes; -import com.hedera.statevalidation.poc.listener.LoggingValidationListener; +import com.hedera.statevalidation.poc.listener.ValidationExecutionListener; import com.hedera.statevalidation.poc.listener.ValidationListener; import com.hedera.statevalidation.poc.model.DataStats; import com.hedera.statevalidation.poc.model.ItemData; @@ -44,6 +44,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; import java.util.concurrent.CopyOnWriteArraySet; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -63,7 +64,7 @@ name = "validate2", mixinStandardHelpOptions = true, description = "Validate command v2. Validates the state by running some of the validators in parallel.") -public class Validate2Command implements Runnable { +public class Validate2Command implements Callable { private static final Logger log = LogManager.getLogger(Validate2Command.class); @@ -120,7 +121,8 @@ public class Validate2Command implements Runnable { private Validate2Command() {} @Override - public void run() { + public Integer call() { + final var validationExecutionListener = new ValidationExecutionListener(); try (ExecutorService ioPool = Executors.newFixedThreadPool(ioThreads)) { try (ExecutorService processPool = Executors.newFixedThreadPool(processThreads)) { final BlockingQueue> dataQueue = new LinkedBlockingQueue<>(queueCapacity); @@ -142,7 +144,7 @@ public void run() { final DataFileCollection keyToPathDfc = vds.getKeyToPath().getFileCollection(); // Initialize validators and listeners - final List validationListeners = List.of(new LoggingValidationListener()); + final List validationListeners = List.of(validationExecutionListener); final Map> validators = createAndInitValidators(state, tags, validationListeners); @@ -247,17 +249,14 @@ public void run() { } } - boolean anyValidationFailed = false; for (var validatorSet : validators.values()) { for (var validator : validatorSet) { try { validator.validate(); validationListeners.forEach(listener -> listener.onValidationCompleted(validator.getTag())); } catch (final ValidationException e) { - anyValidationFailed = true; validationListeners.forEach(listener -> listener.onValidationFailed(e)); } catch (final Exception e) { - anyValidationFailed = true; validationListeners.forEach(listener -> listener.onValidationFailed(new ValidationException( validator.getTag(), "Unexpected exception during validation: " + e.getMessage(), @@ -284,17 +283,15 @@ public void run() { log.info(dataStats); + log.debug("Total boundary search time: {} ms", totalBoundarySearchMillis.get()); + log.debug("Total processing time: {} ms", System.currentTimeMillis() - startTime); + // common validation for error reads if (dataStats.hasErrorReads()) { - throw new RuntimeException("Error reads found. Full info: \n " + dataStats); - } - - if (anyValidationFailed) { - throw new ValidationException("*", "One or more validators failed. Check logs for details."); + return 1; } - log.debug("Total boundary search time: {} ms", totalBoundarySearchMillis.get()); - log.debug("Total processing time: {} ms", System.currentTimeMillis() - startTime); + return validationExecutionListener.isFailed() ? 1 : 0; } } catch (final RuntimeException e) { throw e; @@ -374,6 +371,9 @@ private Map> createAndInitValidators( try { validator.initialize(state); return false; // keep validator + } catch (final ValidationException e) { + validationListeners.forEach(listener -> listener.onValidationFailed(e)); + return true; // remove validator } catch (final Exception e) { validationListeners.forEach(listener -> listener.onValidationFailed( new ValidationException(validator.getTag(), "Unexpected exception: " + e.getMessage(), e))); diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/LoggingValidationListener.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationExecutionListener.java similarity index 74% rename from hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/LoggingValidationListener.java rename to hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationExecutionListener.java index 94b908257461..963014a03f4f 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/LoggingValidationListener.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationExecutionListener.java @@ -6,9 +6,11 @@ import org.apache.logging.log4j.Logger; // update logging format -public class LoggingValidationListener implements ValidationListener { +public class ValidationExecutionListener implements ValidationListener { - private static final Logger log = LogManager.getLogger(LoggingValidationListener.class); + private static final Logger log = LogManager.getLogger(ValidationExecutionListener.class); + + private volatile boolean failed = false; @Override public void onValidationStarted(String tag) { @@ -22,6 +24,7 @@ public void onValidationCompleted(String tag) { @Override public void onValidationFailed(ValidationException error) { + this.failed = true; log.debug(framedString(error.getValidatorTag() + " failed")); } @@ -29,4 +32,8 @@ private String framedString(String stringToFrame) { String frame = " ".repeat(stringToFrame.length() + 6); return String.format("\n%s\n %s \n%s", frame, stringToFrame, frame); } + + public boolean isFailed() { + return failed; + } } From 5e2b38ecd43a1c0787b306a6eec169772f415b7b Mon Sep 17 00:00:00 2001 From: Nikita Lebedev Date: Mon, 8 Dec 2025 19:23:36 +0200 Subject: [PATCH 6/6] refactoring & javadoc Signed-off-by: Nikita Lebedev --- .../statevalidation/Validate2Command.java | 141 ++++++---- .../listener/ValidationExecutionListener.java | 40 ++- .../poc/listener/ValidationListener.java | 66 ++++- .../statevalidation/poc/model/ItemData.java | 24 ++ .../poc/pipeline/ChunkedFileIterator.java | 248 +++++++++++++----- .../poc/pipeline/ProcessorTask.java | 194 ++++++++++---- .../poc/util/ValidationAssertions.java | 6 +- .../poc/util/ValidationException.java | 7 +- .../validator/AccountAndSupplyValidator.java | 19 +- .../poc/validator/EntityIdCountValidator.java | 19 +- .../EntityIdUniquenessValidator.java | 19 +- .../HashRecordIntegrityValidator.java | 17 +- .../HdhmBucketIntegrityValidator.java | 17 +- .../LeafBytesIntegrityValidator.java | 19 +- .../TokenRelationsIntegrityValidator.java | 19 +- .../validator/api/HashRecordValidator.java | 14 +- .../validator/api/HdhmBucketValidator.java | 15 ++ .../poc/validator/api/LeafBytesValidator.java | 17 +- .../poc/validator/api/Validator.java | 87 +++++- 19 files changed, 780 insertions(+), 208 deletions(-) diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java index 837183557118..2d8adb638150 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/Validate2Command.java @@ -10,6 +10,7 @@ import static com.hedera.statevalidation.poc.validator.TokenRelationsIntegrityValidator.TOKEN_RELATIONS_TAG; import static com.swirlds.base.units.UnitConstants.BYTES_TO_MEBIBYTES; import static com.swirlds.base.units.UnitConstants.MEBIBYTES_TO_BYTES; +import static com.swirlds.base.units.UnitConstants.NANOSECONDS_TO_MILLISECONDS; import com.hedera.pbj.runtime.io.buffer.BufferedData; import com.hedera.pbj.runtime.io.buffer.Bytes; @@ -73,24 +74,40 @@ public class Validate2Command implements Callable { @Option( names = {"-io", "--io-threads"}, - description = "Number of IO threads for reading from disk.") + description = "Number of IO threads for reading from disk. Default: 4.") private int ioThreads = 4; @Option( names = {"-p", "--process-threads"}, - description = "Number of CPU threads for processing chunks.") + description = "Number of CPU threads for processing chunks. Default: 6.") private int processThreads = 6; @Option( names = {"-q", "--queue-capacity"}, - description = "Queue capacity for backpressure control.") + description = "Queue capacity for backpressure control. Default: 100.") private int queueCapacity = 100; @Option( names = {"-b", "--batch-size"}, - description = "Batch size for processing items.") + description = "Batch size for processing items. Default: 10.") private int batchSize = 10; + @Option( + names = {"-mcs", "--min-chunk-size-mib"}, + description = "Minimum chunk size in mebibytes (MiB) for file reading. Default: 128 MiB.") + private int minChunkSizeMib = 128; + + @Option( + names = {"-c", "--chunk-multiplier"}, + description = + "Multiplier for IO threads to determine target number of chunks (higher value = more, smaller chunks). Default: 2.") + private int chunkMultiplier = 2; + + @Option( + names = {"-bs", "--buffer-size-kib"}, + description = "Buffer size in kibibytes (KiB) for file reading operations. Default: 128 KiB.") + private int bufferSizeKib = 128; + @CommandLine.Parameters( arity = "1..*", description = "Tag to run: [" @@ -122,17 +139,14 @@ private Validate2Command() {} @Override public Integer call() { - final var validationExecutionListener = new ValidationExecutionListener(); try (ExecutorService ioPool = Executors.newFixedThreadPool(ioThreads)) { try (ExecutorService processPool = Executors.newFixedThreadPool(processThreads)) { - final BlockingQueue> dataQueue = new LinkedBlockingQueue<>(queueCapacity); - - final long startTime = System.currentTimeMillis(); - final AtomicLong totalBoundarySearchMillis = new AtomicLong(0L); + final long startTime = System.nanoTime(); // Initialize state and get data file collections parent.initializeStateDir(); final DeserializedSignedState deserializedSignedState = StateUtils.getDeserializedSignedState(); + //noinspection resource -- doesn't matter in this context final MerkleNodeState state = deserializedSignedState.reservedSignedState().get().getState(); final VirtualMap virtualMap = (VirtualMap) state.getRoot(); @@ -140,79 +154,82 @@ public Integer call() { final DataFileCollection pathToKeyValueDfc = vds.getPathToKeyValue().getFileCollection(); + //noinspection DataFlowIssue final DataFileCollection pathToHashDfc = vds.getHashStoreDisk().getFileCollection(); final DataFileCollection keyToPathDfc = vds.getKeyToPath().getFileCollection(); // Initialize validators and listeners + final var validationExecutionListener = new ValidationExecutionListener(); final List validationListeners = List.of(validationExecutionListener); final Map> validators = createAndInitValidators(state, tags, validationListeners); - int totalFiles = 0; - long globalTotalSize = 0L; + // Calculate file count and total size + int dataFileCount = 0; + long dataTotalSizeBytes = 0L; final var fileReadTasks = new ArrayList(); if (validators.containsKey(Type.P2KV)) { - totalFiles += pathToKeyValueDfc.getAllCompletedFiles().size(); - globalTotalSize += pathToKeyValueDfc.getAllCompletedFiles().stream() + dataFileCount += pathToKeyValueDfc.getAllCompletedFiles().size(); + dataTotalSizeBytes += pathToKeyValueDfc.getAllCompletedFiles().stream() .mapToLong(DataFileReader::getSize) .sum(); log.debug( - "P2KV file count: {}", + "P2KV data file count: {}", pathToKeyValueDfc.getAllCompletedFiles().size()); } if (validators.containsKey(Type.P2H)) { - totalFiles += pathToHashDfc.getAllCompletedFiles().size(); - globalTotalSize += pathToHashDfc.getAllCompletedFiles().stream() + dataFileCount += pathToHashDfc.getAllCompletedFiles().size(); + dataTotalSizeBytes += pathToHashDfc.getAllCompletedFiles().stream() .mapToLong(DataFileReader::getSize) .sum(); log.debug( - "P2H file count: {}", + "P2H data file count: {}", pathToHashDfc.getAllCompletedFiles().size()); } if (validators.containsKey(Type.K2P)) { - totalFiles += keyToPathDfc.getAllCompletedFiles().size(); - globalTotalSize += keyToPathDfc.getAllCompletedFiles().stream() + dataFileCount += keyToPathDfc.getAllCompletedFiles().size(); + dataTotalSizeBytes += keyToPathDfc.getAllCompletedFiles().stream() .mapToLong(DataFileReader::getSize) .sum(); log.debug( - "K2P file count: {}", + "K2P data file count: {}", keyToPathDfc.getAllCompletedFiles().size()); } // Plan all file read tasks (calculate chunks for each file) if (validators.containsKey(Type.P2KV)) { - fileReadTasks.addAll(planTasksFor(pathToKeyValueDfc, Type.P2KV, ioThreads, globalTotalSize)); + fileReadTasks.addAll(planTasksFor(pathToKeyValueDfc, Type.P2KV, ioThreads)); } if (validators.containsKey(Type.P2H)) { - fileReadTasks.addAll(planTasksFor(pathToHashDfc, Type.P2H, ioThreads, globalTotalSize)); + fileReadTasks.addAll(planTasksFor(pathToHashDfc, Type.P2H, ioThreads)); } if (validators.containsKey(Type.K2P)) { - fileReadTasks.addAll(planTasksFor(keyToPathDfc, Type.K2P, ioThreads, globalTotalSize)); + fileReadTasks.addAll(planTasksFor(keyToPathDfc, Type.K2P, ioThreads)); } - log.debug("File count: {}", totalFiles); - log.debug("Total data size: {} MB", globalTotalSize * BYTES_TO_MEBIBYTES); + log.debug("Total file count: {}", dataFileCount); + log.debug("Total data size: {} MB", dataTotalSizeBytes * BYTES_TO_MEBIBYTES); + log.debug("Total file read tasks: {}", fileReadTasks.size()); // Sort tasks: largest chunks first (better thread utilization) fileReadTasks.sort((a, b) -> Long.compare(b.endByte - b.startByte, a.endByte - a.startByte)); - final int totalFileReadTasks = fileReadTasks.size(); + // Initialize data structures for file chunks processing + final var dataStats = new DataStats(); + final var totalBoundarySearchNanos = new AtomicLong(0L); - log.debug("Total file read tasks: {}", totalFileReadTasks); + final var dataQueue = new LinkedBlockingQueue>(queueCapacity); + final var processorFutures = new ArrayList>(); + final var ioFutures = new ArrayList>(); - final DataStats dataStats = new DataStats(); - - final List> processorFutures = new ArrayList<>(); - final List> ioFutures = new ArrayList<>(); - - // Start processor threads + // Start process threads for (int i = 0; i < processThreads; i++) { processorFutures.add(processPool.submit( new ProcessorTask(validators, validationListeners, dataQueue, vds, dataStats))); } - // Submit all planned file read tasks + // Submit file read tasks for (final FileReadTask task : fileReadTasks) { ioFutures.add(ioPool.submit(() -> { readFileChunk( @@ -221,11 +238,12 @@ public Integer call() { task.type, task.startByte, task.endByte, - totalBoundarySearchMillis); + totalBoundarySearchNanos); return null; })); } + // Wait for all io tasks to complete for (final Future future : ioFutures) { try { future.get(); @@ -241,6 +259,7 @@ public Integer call() { dataQueue.put(List.of(ItemData.poisonPill())); } + // Wait for all processor tasks to complete for (final Future future : processorFutures) { try { future.get(); @@ -249,8 +268,9 @@ public Integer call() { } } - for (var validatorSet : validators.values()) { - for (var validator : validatorSet) { + // Perform final validations + for (final var validatorSet : validators.values()) { + for (final var validator : validatorSet) { try { validator.validate(); validationListeners.forEach(listener -> listener.onValidationCompleted(validator.getTag())); @@ -265,6 +285,7 @@ public Integer call() { } } + // Output only relevant data stats if (validators.containsKey(Type.P2KV)) { log.info( "P2KV (Path -> Key/Value) Data Stats: \n {}", @@ -281,10 +302,16 @@ public Integer call() { dataStats.getK2p().toStringContent()); } - log.info(dataStats); + // Don't log total aggregate stats if only one validator is present + if (validators.size() > 1) { + log.info(dataStats); + } - log.debug("Total boundary search time: {} ms", totalBoundarySearchMillis.get()); - log.debug("Total processing time: {} ms", System.currentTimeMillis() - startTime); + log.debug( + "Total boundary search time: {} ms", + totalBoundarySearchNanos.get() * NANOSECONDS_TO_MILLISECONDS); + log.debug( + "Total processing time: {} ms", (System.nanoTime() - startTime) * NANOSECONDS_TO_MILLISECONDS); // common validation for error reads if (dataStats.hasErrorReads()) { @@ -389,24 +416,21 @@ private Map> createAndInitValidators( // Helper: Plan tasks for one collection private List planTasksFor( - @NonNull final DataFileCollection dfc, - @NonNull final ItemData.Type dataType, - final int ioThreads, - final long globalTotalSize) { - + @NonNull final DataFileCollection dfc, @NonNull final Type dataType, final int ioThreads) { final List tasks = new ArrayList<>(); final long collectionTotalSize = dfc.getAllCompletedFiles().stream() .mapToLong(DataFileReader::getSize) .sum(); + // Calculate chunks for each file for (final DataFileReader reader : dfc.getAllCompletedFiles()) { final long fileSize = reader.getSize(); if (fileSize == 0) { continue; } - final int chunks = calculateOptimalChunks(reader, ioThreads, collectionTotalSize); + final int chunks = calculateOptimalChunks(reader, collectionTotalSize, ioThreads); final long chunkSize = (fileSize + chunks - 1) / chunks; log.debug( @@ -432,32 +456,45 @@ private List planTasksFor( return tasks; } + // Helper: Calculate the optimal number of chunks for the file private int calculateOptimalChunks( - @NonNull final DataFileReader reader, final int ioThreads, final long globalTotalDataSize) { - + @NonNull final DataFileReader reader, final long collectionTotalSize, final int ioThreads) { final long fileSize = reader.getSize(); - // literals here can be extracted to params - final long targetChunkSize = Math.max(globalTotalDataSize / (ioThreads * 2), 128 * MEBIBYTES_TO_BYTES); + final int minChunkSize = minChunkSizeMib * MEBIBYTES_TO_BYTES; + + // Calculate target chunk size: divide total collection size by (ioThreads * chunkMultiplier) + // to distribute work evenly across threads, but ensure it's at least minChunkSize + final long targetChunkSize = Math.max(collectionTotalSize / (ioThreads * chunkMultiplier), minChunkSize); + // If file is smaller than target chunk size, process it as a single chunk if (fileSize < targetChunkSize) { return 1; } + // Otherwise, divide file into chunks of approximately targetChunkSize (round up) return (int) Math.ceil((double) fileSize / targetChunkSize); } + // Helper: Read the file chunk and put data into the queue private void readFileChunk( @NonNull final DataFileReader reader, @NonNull final BlockingQueue> dataQueue, @NonNull final Type dataType, final long startByte, final long endByte, - @NonNull final AtomicLong totalBoundarySearchMillis) + @NonNull final AtomicLong totalBoundarySearchNanos) throws IOException, InterruptedException { + final int bufferSizeBytes = bufferSizeKib * 1024; try (ChunkedFileIterator iterator = new ChunkedFileIterator( - reader.getPath(), reader.getMetadata(), dataType, startByte, endByte, totalBoundarySearchMillis)) { + reader.getPath(), + reader.getMetadata(), + dataType, + startByte, + endByte, + bufferSizeBytes, + totalBoundarySearchNanos)) { List batch = new ArrayList<>(batchSize); while (iterator.next()) { diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationExecutionListener.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationExecutionListener.java index 963014a03f4f..61c097fd98e4 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationExecutionListener.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationExecutionListener.java @@ -2,37 +2,53 @@ package com.hedera.statevalidation.poc.listener; import com.hedera.statevalidation.poc.util.ValidationException; +import edu.umd.cs.findbugs.annotations.NonNull; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -// update logging format +/** + * A {@link ValidationListener} implementation that logs validation lifecycle events + * and tracks overall validation failure status. + */ public class ValidationExecutionListener implements ValidationListener { private static final Logger log = LogManager.getLogger(ValidationExecutionListener.class); private volatile boolean failed = false; + /** + * {@inheritDoc} + *

Logs the validator start event at INFO level. + */ @Override - public void onValidationStarted(String tag) { - log.debug(framedString(tag + " started")); + public void onValidationStarted(@NonNull final String tag) { + log.info("Validator [{}] started", tag); } + /** + * {@inheritDoc} + *

Logs the validator completion event at INFO level. + */ @Override - public void onValidationCompleted(String tag) { - log.debug(framedString(tag + " finished")); + public void onValidationCompleted(@NonNull final String tag) { + log.info("Validator [{}] completed successfully", tag); } + /** + * {@inheritDoc} + *

Sets the failed flag and logs the failure event at ERROR level. + */ @Override - public void onValidationFailed(ValidationException error) { + public void onValidationFailed(@NonNull final ValidationException error) { this.failed = true; - log.debug(framedString(error.getValidatorTag() + " failed")); - } - - private String framedString(String stringToFrame) { - String frame = " ".repeat(stringToFrame.length() + 6); - return String.format("\n%s\n %s \n%s", frame, stringToFrame, frame); + log.error("Validator [{}] failed: {}", error.getValidatorTag(), error.getMessage(), error); } + /** + * Returns whether any validator has failed. + * + * @return {@code true} if at least one validator failed, {@code false} otherwise + */ public boolean isFailed() { return failed; } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationListener.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationListener.java index fba64f6fc6b0..5c1179f81868 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationListener.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/listener/ValidationListener.java @@ -2,18 +2,74 @@ package com.hedera.statevalidation.poc.listener; import com.hedera.statevalidation.poc.util.ValidationException; +import edu.umd.cs.findbugs.annotations.NonNull; /** - * Listener for validation lifecycle events. + * Listener interface for receiving notifications about validation lifecycle events. + * + *

This interface enables observers to track the progress and outcome of validators + * during state validation. Listeners are registered with the validation command and + * receive callbacks at key points in each validator's lifecycle: + *

    + *
  • When a validator starts initialization
  • + *
  • When a validator successfully completes validation
  • + *
  • When a validator fails during initialization or validation
  • + *
* *

Thread Safety: Implementations must be thread-safe as callbacks - * may be invoked concurrently from multiple processor threads. + * may be invoked concurrently from multiple processor threads. The validation + * pipeline processes data items in parallel, and validators may fail at any + * point during concurrent processing. + * + *

Lifecycle: Listeners are notified in the following order for each validator: + *

    + *
  1. {@link #onValidationStarted(String)} - Called before validator initialization
  2. + *
  3. Either {@link #onValidationCompleted(String)} on success, or + * {@link #onValidationFailed(ValidationException)} on failure
  4. + *
+ * + * @see ValidationException */ public interface ValidationListener { - default void onValidationStarted(String tag) {} + /** + * Called when a validator begins its validation process. + * + *

This callback is invoked before the validator's initialization phase. + * It provides an opportunity to log or track which validators are being executed. + * + * @param tag the unique identifier tag of the validator that is starting, + * such as "internal", "leaf", "hdhm", "account", "tokenRelations", + * "entityIdCount", or "entityIdUniqueness" + */ + default void onValidationStarted(@NonNull String tag) {} - default void onValidationCompleted(String tag) {} + /** + * Called when a validator successfully completes its validation. + * + *

This callback is invoked after the validator's {@code validate()} method + * returns without throwing an exception, indicating that all validation + * assertions passed. + * + * @param tag the unique identifier tag of the validator that completed successfully + */ + default void onValidationCompleted(@NonNull String tag) {} - default void onValidationFailed(ValidationException error) {} + /** + * Called when a validator fails during initialization, processing, or final validation. + * + *

This callback is invoked when: + *

    + *
  • A validator throws a {@link ValidationException} during initialization
  • + *
  • A validator throws a {@link ValidationException} while processing data items
  • + *
  • A validator throws a {@link ValidationException} during final validation
  • + *
  • An unexpected exception occurs (wrapped in a {@link ValidationException})
  • + *
+ * + *

When a validator fails, it is automatically removed from the active validator set + * to prevent blocking the processing of subsequent data items. + * + * @param error the validation exception containing the validator tag and failure details + */ + default void onValidationFailed(@NonNull ValidationException error) {} } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/model/ItemData.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/model/ItemData.java index 2425caf30210..41e9135de3da 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/model/ItemData.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/model/ItemData.java @@ -3,19 +3,43 @@ import com.hedera.pbj.runtime.io.buffer.Bytes; +/** + * Immutable data item read from a MerkleDB data file chunk for validation processing. + * + * @param type the MerkleDB data type + * @param bytes the serialized data content + * @param location the packed data location (file index + byte offset), or -1 for terminators + */ public record ItemData(Type type, Bytes bytes, long location) { + /** + * MerkleDB data file types used in the validation pipeline. + */ public enum Type { + /** Path to Key/Value - contains {@code VirtualLeafBytes} */ P2KV, + /** Path to Hash - contains {@code VirtualHashRecord} */ P2H, + /** Key to Path - contains HDHM {@code Bucket} entries */ K2P, + /** Sentinel value signaling processor threads to terminate */ TERMINATOR } + /** + * Creates a terminator item to signal processor thread shutdown. + * + * @return a poison pill item + */ public static ItemData poisonPill() { return new ItemData(Type.TERMINATOR, Bytes.EMPTY, -1L); } + /** + * Checks if this item signals thread termination. + * + * @return true if this is a poison pill + */ public boolean isPoisonPill() { return type == Type.TERMINATOR; } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ChunkedFileIterator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ChunkedFileIterator.java index fe7e96df0263..5ca30985618c 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ChunkedFileIterator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ChunkedFileIterator.java @@ -27,30 +27,78 @@ import java.nio.file.StandardOpenOption; import java.util.concurrent.atomic.AtomicLong; +/** + * Iterator class for iterating over data items in a specific byte range (chunk) of a data file + * created by {@link com.swirlds.merkledb.files.DataFileWriter}. It is designed to be used in a + * {@code while(iter.next()){...}} loop, where you can then read the data items info for the + * current item with {@link #getDataItemData()} and {@link #getDataItemDataLocation()}. + * + *

Unlike {@link com.swirlds.merkledb.files.DataFileIterator} which reads an entire file + * sequentially, this iterator operates on a defined byte range, enabling parallel processing + * of large data files by creating multiple iterator instances working on different chunks + * of the same file concurrently. + * + *

When starting from a non-zero byte offset, the iterator automatically scans forward to + * locate a valid data item boundary by validating the protobuf structure of encountered data. + * Supported data types for boundary validation include {@link VirtualHashRecord}, + * {@link VirtualLeafBytes}, and {@link Bucket}. + * + *

Each iterator instance should be used from a single thread, but multiple instances + * can safely operate on different byte ranges of the same file in parallel. + * + * @see com.swirlds.merkledb.files.DataFileIterator + * @see com.swirlds.merkledb.files.DataFileReader + */ public class ChunkedFileIterator implements AutoCloseable { - // move to cmd params? - private static final int BUFFER_SIZE = 128 * 1024; - + /** File channel used for reading the data file and positioning within the byte range */ private final FileChannel channel; + /** The file metadata providing file index for data location calculation */ private final DataFileMetadata metadata; + /** The starting byte offset in the file for this chunk, adjusted to the nearest valid data item boundary */ private long startByte; + /** The ending byte offset in the file for this chunk (exclusive) */ private final long endByte; + /** The type of data items in this file, used for boundary validation when starting mid-file */ private final ItemData.Type dataType; + /** Buffer size in bytes for both boundary scanning and stream reading operations */ + private final int bufferSizeBytes; + + /** Buffered input stream this iterator is reading from */ private BufferedInputStream bufferedInputStream; + /** Readable sequential data on top of the buffered input stream */ private ReadableSequentialData in; + /** Buffer that is reused for reading each data item */ private BufferedData dataItemBuffer; + /** The offset in bytes from start of file to the beginning of the current data item */ private long currentDataItemFilePosition; + /** True if this iterator has been closed */ private boolean closed = false; + /** + * Create a new ChunkedFileIterator for a specific byte range of an existing data file. + * + *

If {@code startByte} is greater than zero, the constructor will scan forward from that + * position to find a valid data item boundary before beginning iteration. + * + * @param path the path to the data file to read + * @param metadata the file metadata providing the file index + * @param dataType the type of data items in this file, used for boundary validation + * @param startByte the starting byte offset in the file (will be adjusted to nearest boundary if non-zero) + * @param endByte the ending byte offset in the file (exclusive) + * @param bufferSizeBytes the buffer size for both boundary scanning and stream reading + * @param totalBoundarySearchMillis atomic counter to accumulate boundary search time in nanoseconds + * @throws IOException if there was a problem opening the file or finding a valid boundary + */ public ChunkedFileIterator( @NonNull final Path path, @NonNull final DataFileMetadata metadata, @NonNull final Type dataType, long startByte, long endByte, + int bufferSizeBytes, @NonNull final AtomicLong totalBoundarySearchMillis) throws IOException { this.channel = FileChannel.open(path, StandardOpenOption.READ); @@ -62,14 +110,14 @@ public ChunkedFileIterator( this.dataType = dataType; + this.bufferSizeBytes = bufferSizeBytes; + if (startByte > 0) { // Find boundary, then position channel and open streams - final long startTime = System.currentTimeMillis(); + final long startTimeNanos = System.nanoTime(); this.startByte += findBoundaryOffset(); - // FIXME: update to nanos - final long boundaryOffsetSearchTime = System.currentTimeMillis() - startTime; - // System.out.println("Found boundary offset in:" + boundaryOffsetSearchTime + " ms"); - totalBoundarySearchMillis.addAndGet(boundaryOffsetSearchTime); + final long boundaryOffsetSearchTimeNanos = System.nanoTime() - startTimeNanos; + totalBoundarySearchMillis.addAndGet(boundaryOffsetSearchTimeNanos); channel.position(this.startByte); openStreams(); } else { @@ -88,16 +136,107 @@ public ChunkedFileIterator( } } + /** + * Advance to the next data item within this chunk's byte range. + * + * @return true if a data item was read, or false if the end of the chunk has been reached + * @throws IOException if there was a problem reading from the file + * @throws IllegalStateException if the iterator has been closed + * @throws IllegalArgumentException if an unknown data file field is encountered + */ + public boolean next() throws IOException { + if (closed) { + throw new IllegalStateException("Cannot read from a closed iterator"); + } + + while (in.hasRemaining()) { + currentDataItemFilePosition = startByte + in.position(); + + if (currentDataItemFilePosition >= endByte) { + return false; + } + + final int tag = in.readVarInt(false); + final int fieldNum = tag >> TAG_FIELD_OFFSET; + + if (fieldNum == FIELD_DATAFILE_ITEMS.number()) { + final int dataItemSize = in.readVarInt(false); + dataItemBuffer = fillBuffer(dataItemSize); + return true; + } else if (fieldNum == FIELD_DATAFILE_METADATA.number()) { + final int metadataSize = in.readVarInt(false); + in.skip(metadataSize); + } else { + throw new IllegalArgumentException("Unknown data file field: " + fieldNum); + } + } + + return false; + } + + /** + * Get the current data item's data. This is a shared buffer and must NOT be leaked from + * the call site or modified directly. + * + * @return buffer containing the data item bytes, or null if the iterator has been closed + * or is in the before-first or after-last states + */ + public BufferedData getDataItemData() { + return dataItemBuffer; + } + + /** + * Get the data location (file index + byte offset) for the current data item. + * + * @return current data item location encoded as a long value + */ + public long getDataItemDataLocation() { + return DataFileCommon.dataLocation(metadata.getIndex(), currentDataItemFilePosition); + } + + /** + * Close the iterator, releasing all resources including the file channel and streams. + * + * @throws IOException if this resource cannot be closed + */ + @Override + public void close() throws IOException { + if (!closed) { + closed = true; + dataItemBuffer = null; + if (bufferedInputStream != null) { + bufferedInputStream.close(); + } + channel.close(); + } + } + + // ================================================================================================================= + // Private methods + + /** + * Opens buffered input streams on top of the file channel for sequential reading. + */ private void openStreams() { final var channelStream = Channels.newInputStream(channel); - this.bufferedInputStream = new BufferedInputStream(channelStream, BUFFER_SIZE); + this.bufferedInputStream = new BufferedInputStream(channelStream, bufferSizeBytes); this.in = new ReadableStreamingData(bufferedInputStream); } + /** + * Scans forward from the current {@code startByte} position to find the offset to the nearest + * valid data item boundary. Uses buffered reads to minimize disk I/O. + * + *

The method reads a chunk of data and scans byte-by-byte looking for a valid protobuf tag + * followed by data that can be successfully parsed according to the {@code dataType}. + * + * @return the offset from {@code startByte} to the nearest valid data item boundary + * @throws IOException if no valid boundary is found within the buffer or if reading fails + */ private long findBoundaryOffset() throws IOException { // Use buffer to minimize disk I/O and channel repositioning // It should account for boundary + full data item to validate its proto schema - final ByteBuffer scanBuffer = ByteBuffer.allocate(BUFFER_SIZE); + final ByteBuffer scanBuffer = ByteBuffer.allocate(bufferSizeBytes); // Read large chunk at current position scanBuffer.clear(); @@ -148,6 +287,12 @@ private long findBoundaryOffset() throws IOException { throw new IOException("No valid data item boundary found in chunk"); } + /** + * Validates whether the buffer contains a valid data item of the expected type. + * + * @param buffer the buffer containing potential data item bytes + * @return true if the buffer contains valid data that can be parsed, false otherwise + */ private boolean isValidDataItem(@NonNull final BufferedData buffer) { try { if (!buffer.hasRemaining()) { @@ -168,77 +313,54 @@ private boolean isValidDataItem(@NonNull final BufferedData buffer) { } } + /** + * Attempts to parse the buffer as a {@link VirtualHashRecord}. + * + * @param buffer the buffer containing potential hash record bytes + * @return true if parsing succeeds + */ private boolean validateVirtualHashRecord(@NonNull final BufferedData buffer) { VirtualHashRecord.parseFrom(buffer); return true; } + /** + * Attempts to parse the buffer as a {@link VirtualLeafBytes}. + * + * @param buffer the buffer containing potential leaf bytes + * @return true if parsing succeeds + */ private boolean validateVirtualLeafBytes(@NonNull final BufferedData buffer) { VirtualLeafBytes.parseFrom(buffer); return true; } - private boolean validateBucket(@NonNull final BufferedData buffer) { - final Bucket bucket = new ParsedBucket(); - bucket.readFrom(buffer); - return true; - } - - public boolean next() throws IOException { - if (closed) { - throw new IllegalStateException("Cannot read from a closed iterator"); - } - - while (in.hasRemaining()) { - currentDataItemFilePosition = startByte + in.position(); - - if (currentDataItemFilePosition >= endByte) { - return false; - } - - final int tag = in.readVarInt(false); - final int fieldNum = tag >> TAG_FIELD_OFFSET; - - if (fieldNum == FIELD_DATAFILE_ITEMS.number()) { - final int dataItemSize = in.readVarInt(false); - dataItemBuffer = fillBuffer(dataItemSize); - return true; - } else if (fieldNum == FIELD_DATAFILE_METADATA.number()) { - final int metadataSize = in.readVarInt(false); - in.skip(metadataSize); - } else { - throw new IllegalArgumentException("Unknown data file field: " + fieldNum); - } - } - - return false; - } - - public BufferedData getDataItemData() { - return dataItemBuffer; - } - - public long getDataItemDataLocation() { - return DataFileCommon.dataLocation(metadata.getIndex(), currentDataItemFilePosition); - } - - @Override - public void close() throws IOException { - if (!closed) { - closed = true; - dataItemBuffer = null; - if (bufferedInputStream != null) { - bufferedInputStream.close(); - } - channel.close(); + /** + * Attempts to parse the buffer as a {@link Bucket}. + * + * @param buffer the buffer containing potential bucket bytes + * @return true if parsing succeeds + */ + private boolean validateBucket(@NonNull final BufferedData buffer) throws IOException { + try (final Bucket bucket = new ParsedBucket()) { + bucket.readFrom(buffer); + return true; } } + /** + * Reads the specified number of bytes from the current position into a buffer. + * + * @param bytesToRead number of bytes to read + * @return buffer containing the requested bytes + * @throws IOException if the requested bytes cannot be read or if bytesToRead is invalid + */ private BufferedData fillBuffer(int bytesToRead) throws IOException { if (bytesToRead <= 0) { throw new IOException("Malformed data, requested bytes: " + bytesToRead); } + // Create or resize the buffer if necessary if (dataItemBuffer == null || dataItemBuffer.capacity() < bytesToRead) { dataItemBuffer = BufferedData.allocate(bytesToRead); } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ProcessorTask.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ProcessorTask.java index 9f174ec5282e..05eb23723608 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ProcessorTask.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/pipeline/ProcessorTask.java @@ -25,26 +25,70 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +/** + * A concurrent task that processes batches of data items from a blocking queue and dispatches them + * to appropriate validators based on data type. + * + *

This class is designed to run as part of a parallel validation pipeline where multiple + * {@code ProcessorTask} instances consume data items from a shared queue. Each task processes + * three types of MerkleDB data: + *

    + *
  • P2KV (Path to Key/Value) - Virtual leaf bytes processed by {@link LeafBytesValidator}
  • + *
  • P2H (Path to Hash) - Virtual hash records processed by {@link HashRecordValidator}
  • + *
  • K2P (Key to Path) - HDHM buckets processed by {@link HdhmBucketValidator}
  • + *
+ * + *

The processor validates each data item's location against the corresponding index to determine + * if it represents a live object or obsolete data. Live objects are passed to registered validators, + * while obsolete items are tracked in statistics. Thread-safe validator sets ({@link CopyOnWriteArraySet}) + * allow validators to be removed dynamically if they fail during processing. + * + *

The task terminates gracefully when it receives a poison pill item in the queue. + * + * @see Validator + * @see ValidationListener + * @see DataStats + */ public class ProcessorTask implements Callable { private static final Logger log = LogManager.getLogger(ProcessorTask.class); + /** List of listeners to notify about validation events such as start, completion, or failure */ private final List validationListeners; + /** Thread-safe set of validators for P2KV (Path to Key/Value) data items, may be null if no P2KV validators configured */ private final CopyOnWriteArraySet p2kvValidators; + /** Thread-safe set of validators for P2H (Path to Hash) data items, may be null if no P2H validators configured */ private final CopyOnWriteArraySet p2hValidators; + /** Thread-safe set of validators for K2P (Key to Path) data items, may be null if no K2P validators configured */ private final CopyOnWriteArraySet k2pValidators; + /** The MerkleDB data source providing access to file collections for error logging */ private final MerkleDbDataSource vds; + /** Blocking queue from which batches of data items are consumed for processing */ private final BlockingQueue> dataQueue; + /** Index mapping leaf node paths to their disk locations, used to determine if P2KV items are live */ private final LongList pathToDiskLocationLeafNodes; + /** Index mapping internal node paths to their disk locations, used to determine if P2H items are live */ private final LongList pathToDiskLocationInternalNodes; + /** Index mapping bucket indexes to their disk locations, used to determine if K2P items are live */ private final LongList bucketIndexToBucketLocation; + /** Statistics collector for tracking item counts, space usage, and error counts per data type */ private final DataStats dataStats; + /** + * Creates a new ProcessorTask that consumes data items from a queue and dispatches them to validators. + * + * @param validators map of data types to their corresponding validator sets; validators may be + * dynamically removed from these sets if they fail during processing + * @param validationListeners listeners to notify about validation lifecycle events + * @param dataQueue the blocking queue from which batches of data items are consumed + * @param vds the MerkleDB data source providing location indexes and file collections + * @param dataStats statistics collector for tracking processing metrics + */ public ProcessorTask( @NonNull final Map> validators, @NonNull final List validationListeners, @@ -68,6 +112,16 @@ public ProcessorTask( this.dataStats = dataStats; } + /** + * Executes the processor task, continuously consuming and processing batches of data items + * from the queue until a poison pill is received or the thread is interrupted. + * + *

Each batch is processed sequentially, with individual items dispatched to the appropriate + * processing method based on their data type. The task terminates gracefully when it encounters + * a poison pill item in any batch. + * + * @return always returns {@code null} upon completion + */ @Override public Void call() { try { @@ -95,6 +149,11 @@ public Void call() { return null; } + /** + * Dispatches a single data item to the appropriate processing method based on its type. + * + * @param data the data item to process + */ private void processChunk(@NonNull final ItemData data) { switch (data.type()) { case P2KV -> processVirtualLeafBytes(data); @@ -103,17 +162,32 @@ private void processChunk(@NonNull final ItemData data) { } } + /** + * Processes a P2KV (Path to Key/Value) data item containing virtual leaf bytes. + * + *

This method performs the following operations: + *

    + *
  1. Updates space and item count statistics
  2. + *
  3. For live items, passes them to all registered P2KV validators
  4. + *
+ * + *

If a validator throws an exception during processing, it is removed from the validator set + * and listeners are notified of the failure. This ensures that a failing validator does not + * block processing of subsequent items. + * + * @param data the P2KV data item to process + */ private void processVirtualLeafBytes(@NonNull final ItemData data) { try { dataStats.getP2kv().addSpaceSize(data.bytes().length()); dataStats.getP2kv().incrementItemCount(); - final VirtualLeafBytes virtualLeafBytes = + final VirtualLeafBytes virtualLeafBytes = VirtualLeafBytes.parseFrom(data.bytes().toReadableSequentialData()); final long path = virtualLeafBytes.path(); if (data.location() == pathToDiskLocationLeafNodes.get(path)) { - // live object, perform ops on it... + // Live object, perform ops on it... if (p2kvValidators == null || p2kvValidators.isEmpty()) { return; } @@ -121,12 +195,12 @@ private void processVirtualLeafBytes(@NonNull final ItemData data) { try { ((LeafBytesValidator) validator).processLeafBytes(data.location(), virtualLeafBytes); } catch (final ValidationException e) { - // Remove validator and notify listeners only once (removeIf returns true only for the thread - // that removes) + // Remove validator and notify listeners only once if (p2kvValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { validationListeners.forEach(listener -> listener.onValidationFailed(e)); } } catch (final Exception e) { + // Remove validator and notify listeners only once if (p2kvValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { validationListeners.forEach(listener -> listener.onValidationFailed(new ValidationException( validator.getTag(), "Unexpected exception: " + e.getMessage(), e))); @@ -141,7 +215,7 @@ private void processVirtualLeafBytes(@NonNull final ItemData data) { vds.getPathToKeyValue().getFileCollection(), data); } else { - // add to wasted items/space + // Add to wasted items/space dataStats.getP2kv().addObsoleteSpaceSize(data.bytes().length()); dataStats.getP2kv().incrementObsoleteItemCount(); } @@ -152,6 +226,21 @@ private void processVirtualLeafBytes(@NonNull final ItemData data) { } } + /** + * Processes a P2H (Path to Hash) data item containing a virtual hash record. + * + *

This method performs the following operations: + *

    + *
  1. Updates space and item count statistics
  2. + *
  3. For live items, passes them to all registered P2H validators
  4. + *
+ * + *

If a validator throws an exception during processing, it is removed from the validator set + * and listeners are notified of the failure. This ensures that a failing validator does not + * block processing of subsequent items. + * + * @param data the P2H data item to process + */ private void processVirtualHashRecord(@NonNull final ItemData data) { try { dataStats.getP2h().addSpaceSize(data.bytes().length()); @@ -162,7 +251,7 @@ private void processVirtualHashRecord(@NonNull final ItemData data) { final long path = virtualHashRecord.path(); if (data.location() == pathToDiskLocationInternalNodes.get(path)) { - // live object, perform ops on it... + // Live object, perform ops on it... if (p2hValidators == null || p2hValidators.isEmpty()) { return; } @@ -170,14 +259,12 @@ private void processVirtualHashRecord(@NonNull final ItemData data) { try { ((HashRecordValidator) validator).processHashRecord(virtualHashRecord); } catch (final ValidationException e) { - // Remove validator and notify listeners only once (removeIf returns true only for the thread - // that removes) + // Remove validator and notify listeners only once if (p2hValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { validationListeners.forEach(listener -> listener.onValidationFailed(e)); } } catch (final Exception e) { - // Remove validator and notify listeners only once (removeIf returns true only for the thread - // that removes) + // Remove validator and notify listeners only once if (p2hValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { validationListeners.forEach(listener -> listener.onValidationFailed(new ValidationException( validator.getTag(), "Unexpected exception: " + e.getMessage(), e))); @@ -192,7 +279,7 @@ private void processVirtualHashRecord(@NonNull final ItemData data) { vds.getHashStoreDisk().getFileCollection(), data); } else { - // add to wasted items/space + // Add to wasted items/space dataStats.getP2h().addObsoleteSpaceSize(data.bytes().length()); dataStats.getP2h().incrementObsoleteItemCount(); } @@ -203,48 +290,63 @@ private void processVirtualHashRecord(@NonNull final ItemData data) { } } + /** + * Processes a K2P (Key to Path) data item containing an HDHM bucket. + * + *

This method performs the following operations: + *

    + *
  1. Updates space and item count statistics
  2. + *
  3. For live items, passes them to all registered K2P validators
  4. + *
+ * + *

If a validator throws an exception during processing, it is removed from the validator set + * and listeners are notified of the failure. This ensures that a failing validator does not + * block processing of subsequent items. + * + * @param data the K2P data item to process + */ private void processBucket(@NonNull final ItemData data) { try { dataStats.getK2p().addSpaceSize(data.bytes().length()); dataStats.getK2p().incrementItemCount(); - final ParsedBucket bucket = new ParsedBucket(); - bucket.readFrom(data.bytes().toReadableSequentialData()); + try (final ParsedBucket bucket = new ParsedBucket()) { + bucket.readFrom(data.bytes().toReadableSequentialData()); - if (data.location() == bucketIndexToBucketLocation.get(bucket.getBucketIndex())) { - // live object, perform ops on it... - if (k2pValidators == null || k2pValidators.isEmpty()) { - return; - } - k2pValidators.forEach(validator -> { - try { - ((HdhmBucketValidator) validator).processBucket(data.location(), bucket); - } catch (final ValidationException e) { - // Remove validator and notify listeners only once (removeIf returns true only for the thread - // that removes) - if (k2pValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { - validationListeners.forEach(listener -> listener.onValidationFailed(e)); - } - } catch (final Exception e) { - // Remove validator and notify listeners only once (removeIf returns true only for the thread - // that removes) - if (k2pValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { - validationListeners.forEach(listener -> listener.onValidationFailed(new ValidationException( - validator.getTag(), "Unexpected exception: " + e.getMessage(), e))); - } + if (data.location() == bucketIndexToBucketLocation.get(bucket.getBucketIndex())) { + // Live object, perform ops on it... + if (k2pValidators == null || k2pValidators.isEmpty()) { + return; } - }); - } else if (data.location() == -1) { - dataStats.getK2p().incrementInvalidLocationCount(); - LogUtils.printFileDataLocationErrorPoc( - log, - "data.location() was -1 for K2P entry", - vds.getKeyToPath().getFileCollection(), - data); - } else { - // add to wasted items/space - dataStats.getK2p().addObsoleteSpaceSize(data.bytes().length()); - dataStats.getK2p().incrementObsoleteItemCount(); + k2pValidators.forEach(validator -> { + try { + ((HdhmBucketValidator) validator).processBucket(data.location(), bucket); + } catch (final ValidationException e) { + // Remove validator and notify listeners only once + if (k2pValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { + validationListeners.forEach(listener -> listener.onValidationFailed(e)); + } + } catch (final Exception e) { + // Remove validator and notify listeners only once + if (k2pValidators.removeIf(v -> v.getTag().equals(validator.getTag()))) { + validationListeners.forEach( + listener -> listener.onValidationFailed(new ValidationException( + validator.getTag(), "Unexpected exception: " + e.getMessage(), e))); + } + } + }); + } else if (data.location() == -1) { + dataStats.getK2p().incrementInvalidLocationCount(); + LogUtils.printFileDataLocationErrorPoc( + log, + "data.location() was -1 for K2P entry", + vds.getKeyToPath().getFileCollection(), + data); + } else { + // Add to wasted items/space + dataStats.getK2p().addObsoleteSpaceSize(data.bytes().length()); + dataStats.getK2p().incrementObsoleteItemCount(); + } } } catch (final Exception e) { dataStats.getK2p().incrementParseErrorCount(); diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationAssertions.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationAssertions.java index 5659bf1def7c..c3fc20ebabaa 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationAssertions.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationAssertions.java @@ -6,14 +6,10 @@ /** * Utility class providing assertion-like methods for state validation. - * Unlike JUnit assertions, these are designed for production validation scenarios - * and provide detailed error context suitable for operational debugging. */ public final class ValidationAssertions { - private ValidationAssertions() { - // Utility class - no instantiation - } + private ValidationAssertions() {} /** * Validates that an object is not null. diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationException.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationException.java index 891c088ae68c..15b475f2c5a1 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationException.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/util/ValidationException.java @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 package com.hedera.statevalidation.poc.util; +import edu.umd.cs.findbugs.annotations.NonNull; + /** * Exception thrown when a validation check fails. */ @@ -8,12 +10,13 @@ public class ValidationException extends RuntimeException { private final String validatorTag; - public ValidationException(String validatorTag, String message) { + public ValidationException(@NonNull final String validatorTag, @NonNull final String message) { super(String.format("[%s] Validation failed: %s", validatorTag, message)); this.validatorTag = validatorTag; } - public ValidationException(String validatorTag, String message, Throwable cause) { + public ValidationException( + @NonNull final String validatorTag, @NonNull final String message, @NonNull final Throwable cause) { super(String.format("[%s] Validation failed at: %s", validatorTag, message), cause); this.validatorTag = validatorTag; } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/AccountAndSupplyValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/AccountAndSupplyValidator.java index 9f4daf632c58..3df228df7507 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/AccountAndSupplyValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/AccountAndSupplyValidator.java @@ -24,6 +24,9 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +/** + * @see LeafBytesValidator + */ public class AccountAndSupplyValidator implements LeafBytesValidator { private static final Logger log = LogManager.getLogger(AccountAndSupplyValidator.class); @@ -40,11 +43,17 @@ public class AccountAndSupplyValidator implements LeafBytesValidator { private long numAccounts; + /** + * {@inheritDoc} + */ @Override - public String getTag() { + public @NonNull String getTag() { return ACCOUNT_TAG; } + /** + * {@inheritDoc} + */ @Override public void initialize(@NonNull final MerkleNodeState state) { final VirtualMap virtualMap = (VirtualMap) state.getRoot(); @@ -62,8 +71,11 @@ public void initialize(@NonNull final MerkleNodeState state) { log.debug("Number of accounts: {}", numAccounts); } + /** + * {@inheritDoc} + */ @Override - public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { + public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { final Bytes keyBytes = leafBytes.keyBytes(); final Bytes valueBytes = leafBytes.valueBytes(); final int readKeyStateId = StateKeyUtils.extractStateIdFromStateKeyOneOf(keyBytes); @@ -84,6 +96,9 @@ public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes } } + /** + * {@inheritDoc} + */ @Override public void validate() { ValidationAssertions.requireEqual(TOTAL_tHBAR_SUPPLY, totalBalance.get(), getTag()); diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdCountValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdCountValidator.java index f88b78b9ffa6..f2fbf1f9631d 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdCountValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdCountValidator.java @@ -16,6 +16,9 @@ import java.util.Objects; import java.util.concurrent.atomic.AtomicLong; +/** + * @see LeafBytesValidator + */ public class EntityIdCountValidator implements LeafBytesValidator { public static final String ENTITY_ID_COUNT_TAG = "entityIdCount"; @@ -38,11 +41,17 @@ public class EntityIdCountValidator implements LeafBytesValidator { private final AtomicLong hookCount = new AtomicLong(0); private final AtomicLong lambdaStorageCount = new AtomicLong(0); + /** + * {@inheritDoc} + */ @Override - public String getTag() { + public @NonNull String getTag() { return ENTITY_ID_COUNT_TAG; } + /** + * {@inheritDoc} + */ @Override public void initialize(@NonNull final MerkleNodeState state) { final ReadableSingletonState entityIdSingleton = @@ -50,8 +59,11 @@ public void initialize(@NonNull final MerkleNodeState state) { this.entityCounts = Objects.requireNonNull(entityIdSingleton.get()); } + /** + * {@inheritDoc} + */ @Override - public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { + public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { try { final StateKey key = StateKey.PROTOBUF.parse(leafBytes.keyBytes()); switch (key.key().kind()) { @@ -76,6 +88,9 @@ public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes } } + /** + * {@inheritDoc} + */ @Override public void validate() { ValidationAssertions.requireNonNull(entityCounts, getTag()); diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdUniquenessValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdUniquenessValidator.java index ca3dc4144e94..3f93f7db151d 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdUniquenessValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/EntityIdUniquenessValidator.java @@ -38,6 +38,9 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +/** + * @see LeafBytesValidator + */ public class EntityIdUniquenessValidator implements LeafBytesValidator { private static final Logger log = LogManager.getLogger(EntityIdUniquenessValidator.class); @@ -54,11 +57,17 @@ public class EntityIdUniquenessValidator implements LeafBytesValidator { private final AtomicInteger issuesFound = new AtomicInteger(0); + /** + * {@inheritDoc} + */ @Override - public String getTag() { + public @NonNull String getTag() { return ENTITY_ID_UNIQUENESS_TAG; } + /** + * {@inheritDoc} + */ @Override public void initialize(@NonNull final MerkleNodeState state) { this.tokensState = Objects.requireNonNull( @@ -75,8 +84,11 @@ public void initialize(@NonNull final MerkleNodeState state) { state.getReadableStates(ScheduleService.NAME).get(SCHEDULES_BY_ID_STATE_ID)); } + /** + * {@inheritDoc} + */ @Override - public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { + public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { long entityId = IMPERMISSIBLE_ENTITY_ID; try { @@ -116,6 +128,9 @@ public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes } } + /** + * {@inheritDoc} + */ @Override public void validate() { ValidationAssertions.requireEqual(0, issuesFound.get(), getTag()); diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HashRecordIntegrityValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HashRecordIntegrityValidator.java index da8e04b61040..b1936cb67489 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HashRecordIntegrityValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HashRecordIntegrityValidator.java @@ -10,6 +10,9 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +/** + * @see HashRecordValidator + */ public class HashRecordIntegrityValidator implements HashRecordValidator { private static final Logger log = LogManager.getLogger(HashRecordIntegrityValidator.class); @@ -18,20 +21,32 @@ public class HashRecordIntegrityValidator implements HashRecordValidator { private final AtomicInteger totalEntriesProcessed = new AtomicInteger(0); + /** + * {@inheritDoc} + */ @Override - public String getTag() { + public @NonNull String getTag() { return INTERNAL_TAG; } + /** + * {@inheritDoc} + */ @Override public void initialize(@NonNull final MerkleNodeState state) {} + /** + * {@inheritDoc} + */ @Override public void processHashRecord(@NonNull final VirtualHashRecord hashRecord) { ValidationAssertions.requireNonNull(hashRecord.hash(), getTag()); totalEntriesProcessed.incrementAndGet(); } + /** + * {@inheritDoc} + */ @Override public void validate() { log.debug("Successfully checked {} VirtualHashRecord entries", totalEntriesProcessed.get()); diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HdhmBucketIntegrityValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HdhmBucketIntegrityValidator.java index ff5b65431e50..a1195f77e137 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HdhmBucketIntegrityValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/HdhmBucketIntegrityValidator.java @@ -24,6 +24,9 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +/** + * @see HdhmBucketValidator + */ public class HdhmBucketIntegrityValidator implements HdhmBucketValidator { private static final Logger log = LogManager.getLogger(HdhmBucketIntegrityValidator.class); @@ -40,11 +43,17 @@ public class HdhmBucketIntegrityValidator implements HdhmBucketValidator { private final CopyOnWriteArrayList pathMismatchInfos = new CopyOnWriteArrayList<>(); private final CopyOnWriteArrayList hashCodeMismatchInfos = new CopyOnWriteArrayList<>(); + /** + * {@inheritDoc} + */ @Override - public String getTag() { + public @NonNull String getTag() { return HDHM_TAG; } + /** + * {@inheritDoc} + */ @Override public void initialize(@NonNull final MerkleNodeState state) { final VirtualMap virtualMap = (VirtualMap) state.getRoot(); @@ -57,6 +66,9 @@ public void initialize(@NonNull final MerkleNodeState state) { this.pathToDiskLocationLeafNodes = vds.getPathToDiskLocationLeafNodes(); } + /** + * {@inheritDoc} + */ @Override public void processBucket(long bucketLocation, @NonNull final ParsedBucket bucket) { Objects.requireNonNull(pathToKeyValueDfc); @@ -109,6 +121,9 @@ public void processBucket(long bucketLocation, @NonNull final ParsedBucket bucke } } + /** + * {@inheritDoc} + */ @Override public void validate() { if (!stalePathsInfos.isEmpty()) { diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/LeafBytesIntegrityValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/LeafBytesIntegrityValidator.java index 0b59c31cea4f..ef0d9f770a7c 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/LeafBytesIntegrityValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/LeafBytesIntegrityValidator.java @@ -19,6 +19,9 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +/** + * @see LeafBytesValidator + */ public class LeafBytesIntegrityValidator implements LeafBytesValidator { private static final Logger log = LogManager.getLogger(LeafBytesIntegrityValidator.class); @@ -32,11 +35,17 @@ public class LeafBytesIntegrityValidator implements LeafBytesValidator { private final AtomicInteger successCount = new AtomicInteger(0); private final AtomicInteger exceptionCount = new AtomicInteger(0); + /** + * {@inheritDoc} + */ @Override - public String getTag() { + public @NonNull String getTag() { return LEAF_TAG; } + /** + * {@inheritDoc} + */ @Override public void initialize(@NonNull final MerkleNodeState state) { this.virtualMap = (VirtualMap) state.getRoot(); @@ -45,8 +54,11 @@ public void initialize(@NonNull final MerkleNodeState state) { this.keyToPath = vds.getKeyToPath(); } + /** + * {@inheritDoc} + */ @Override - public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { + public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { Objects.requireNonNull(virtualMap); Objects.requireNonNull(pathToKeyValueDfc); Objects.requireNonNull(keyToPath); @@ -67,6 +79,9 @@ public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes } } + /** + * {@inheritDoc} + */ @Override public void validate() { log.debug("Successfully checked {} VirtualLeafBytes entries", successCount.get()); diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/TokenRelationsIntegrityValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/TokenRelationsIntegrityValidator.java index dfe9b6c26272..4befdfd1e78a 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/TokenRelationsIntegrityValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/TokenRelationsIntegrityValidator.java @@ -26,6 +26,9 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +/** + * @see LeafBytesValidator + */ public class TokenRelationsIntegrityValidator implements LeafBytesValidator { private static final Logger log = LogManager.getLogger(TokenRelationsIntegrityValidator.class); @@ -39,11 +42,17 @@ public class TokenRelationsIntegrityValidator implements LeafBytesValidator { private final AtomicInteger accountFailCounter = new AtomicInteger(0); private final AtomicInteger tokenFailCounter = new AtomicInteger(0); + /** + * {@inheritDoc} + */ @Override - public String getTag() { + public @NonNull String getTag() { return TOKEN_RELATIONS_TAG; } + /** + * {@inheritDoc} + */ @Override public void initialize(@NonNull final MerkleNodeState state) { this.virtualMap = (VirtualMap) state.getRoot(); @@ -55,8 +64,11 @@ public void initialize(@NonNull final MerkleNodeState state) { log.debug("Number of token relations: {}", numTokenRelations); } + /** + * {@inheritDoc} + */ @Override - public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { + public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes leafBytes) { Objects.requireNonNull(virtualMap); final Bytes keyBytes = leafBytes.keyBytes(); @@ -103,6 +115,9 @@ public void processLeafBytes(long dataLocation, @NonNull final VirtualLeafBytes } } + /** + * {@inheritDoc} + */ @Override public void validate() { ValidationAssertions.requireEqual(objectsProcessed.get(), numTokenRelations, getTag()); diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HashRecordValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HashRecordValidator.java index b9b496f1deca..9a897b4514b8 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HashRecordValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HashRecordValidator.java @@ -5,9 +5,19 @@ import edu.umd.cs.findbugs.annotations.NonNull; /** - * Marker interface for validators that can process virtual hash records - * to validate internal indexes. + * Validator interface for processing P2H (Path to Hash) data items. + * + *

Implementations receive {@link VirtualHashRecord} entries containing internal node hashes + * from the MerkleDB hash store. Called concurrently from multiple processor threads. + * + * @see Validator */ public interface HashRecordValidator extends Validator { + + /** + * Processes a single virtual hash record entry. + * + * @param virtualHashRecord the parsed hash record containing path and hash data + */ void processHashRecord(@NonNull VirtualHashRecord virtualHashRecord); } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HdhmBucketValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HdhmBucketValidator.java index 78fea87e6f80..a857faf6ea4f 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HdhmBucketValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/HdhmBucketValidator.java @@ -4,6 +4,21 @@ import com.swirlds.merkledb.files.hashmap.ParsedBucket; import edu.umd.cs.findbugs.annotations.NonNull; +/** + * Validator interface for processing K2P (Key to Path) data items. + * + *

Implementations receive {@link ParsedBucket} entries from the HalfDiskHashMap (HDHM) + * key-to-path index. Called concurrently from multiple processor threads. + * + * @see Validator + */ public interface HdhmBucketValidator extends Validator { + + /** + * Processes a single HDHM bucket entry. + * + * @param bucketLocation the packed data location (file index + byte offset) of this bucket + * @param bucket the parsed bucket containing key-to-path mappings + */ void processBucket(long bucketLocation, @NonNull ParsedBucket bucket); } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/LeafBytesValidator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/LeafBytesValidator.java index 4a0640788ef1..2195b177a2d0 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/LeafBytesValidator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/LeafBytesValidator.java @@ -4,6 +4,21 @@ import com.swirlds.virtualmap.datasource.VirtualLeafBytes; import edu.umd.cs.findbugs.annotations.NonNull; +/** + * Validator interface for processing P2KV (Path to Key/Value) data items. + * + *

Implementations receive {@link VirtualLeafBytes} containing serialized key-value pairs + * from the MerkleDB leaf store. Called concurrently from multiple processor threads. + * + * @see Validator + */ public interface LeafBytesValidator extends Validator { - void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBytes); + + /** + * Processes a single virtual leaf bytes entry. + * + * @param dataLocation the packed data location (file index + byte offset) of this entry + * @param leafBytes the parsed leaf data containing path, key bytes, and value bytes + */ + void processLeafBytes(long dataLocation, @NonNull VirtualLeafBytes leafBytes); } diff --git a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/Validator.java b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/Validator.java index 61f413f6d578..e4ddb7f897a4 100644 --- a/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/Validator.java +++ b/hedera-state-validator/src/main/java/com/hedera/statevalidation/poc/validator/api/Validator.java @@ -5,25 +5,96 @@ import edu.umd.cs.findbugs.annotations.NonNull; /** - * Base interface for all validators with a clear lifecycle. + * Base interface for all validators with a clear lifecycle used in the parallel state validation pipeline. * - *

Thread Safety Contract: Validator implementations are invoked concurrently - * from multiple processor threads. They are safe to use because: + *

Validator Lifecycle

+ *

Each validator follows a three-phase lifecycle: + *

    + *
  1. Initialization - {@link #initialize(MerkleNodeState)} is called once before any data + * processing begins. Validators should extract required state references and initialize counters.
  2. + *
  3. Processing - Data items are streamed to validators.
  4. + *
  5. Validation - {@link #validate()} is called once after all data processing is complete + * to perform final assertions and report results.
  6. + *
+ * + *

Thread Safety Contract

+ *

Validator implementations are invoked concurrently from multiple processor threads. + * They are safe to use because: *

    - *
  • The state being validated is read-only (no concurrent writes)
  • - *
  • All counters/accumulators must use atomic types
  • - *
  • The underlying MerkleDB infrastructure supports concurrent reads
  • + *
  • The state being validated is read-only (no concurrent writes)
  • + *
  • All counters/accumulators must use atomic types (e.g., {@code AtomicLong}, {@code AtomicInteger})
  • + *
  • The underlying MerkleDB infrastructure supports concurrent reads
  • + *
  • Validators are stored in {@code CopyOnWriteArraySet} allowing safe removal on failure
  • *
+ * + *

Error Handling

+ *

Validators should throw {@link com.hedera.statevalidation.poc.util.ValidationException} when + * validation fails. When an exception is thrown: + *

    + *
  • The validator is automatically removed from the active validator set
  • + *
  • Registered {@link com.hedera.statevalidation.poc.listener.ValidationListener listeners} + * are notified of the failure
  • + *
  • Processing continues for remaining validators
  • + *
+ * + * @see com.hedera.statevalidation.poc.util.ValidationException + * @see com.hedera.statevalidation.poc.listener.ValidationListener */ public interface Validator { + /** + * Returns the unique identifier tag for this validator. + * + *

The tag is used for: + *

    + *
  • Filtering which validators to run via command-line parameters
  • + *
  • Logging and error reporting to identify which validator produced output
  • + *
  • Listener notifications about validation lifecycle events
  • + *
+ * + * @return a non-null, unique string identifier for this validator + */ + @NonNull String getTag(); + /** + * Initializes the validator with access to the Merkle node state. + * + *

This method is called once before any data processing begins. Implementations should: + *

    + *
  • Extract and store references to required state components (e.g., readable states, + * virtual maps, entity stores)
  • + *
  • Initialize any atomic counters or thread-safe collections needed for tracking
  • + *
  • Perform any pre-validation setup or initial state queries
  • + *
+ * + *

If initialization fails, the validator should throw a + * {@link com.hedera.statevalidation.poc.util.ValidationException} and will be excluded + * from further processing. + * + * @param state the Merkle node state providing read-only access to all service states, + * virtual maps, and data sources; must not be null + * @throws com.hedera.statevalidation.poc.util.ValidationException if initialization fails + * and the validator cannot proceed + */ void initialize(@NonNull MerkleNodeState state); /** - * Finalize validation and assert results. - * Called once after all data processing is complete. + * Finalizes validation and asserts results. + * + *

This method is called once after all data processing is complete. Implementations should: + *

    + *
  • Perform final assertions using + * {@link com.hedera.statevalidation.poc.util.ValidationAssertions}
  • + *
  • Log summary statistics or results
  • + *
  • Compare accumulated counts against expected values from state metadata
  • + *
+ * + *

If any validation assertion fails, throw a + * {@link com.hedera.statevalidation.poc.util.ValidationException} with details about the failure. + * + * @throws com.hedera.statevalidation.poc.util.ValidationException if any validation + * assertion fails */ void validate(); }