diff --git a/scripts/rocksdb-compatibility/.gitignore b/scripts/rocksdb-compatibility/.gitignore new file mode 100644 index 00000000000..13d5ab46f9f --- /dev/null +++ b/scripts/rocksdb-compatibility/.gitignore @@ -0,0 +1,2 @@ +/.venv +/__pycache__ diff --git a/scripts/rocksdb-compatibility/README.md b/scripts/rocksdb-compatibility/README.md new file mode 100644 index 00000000000..3c78a798df1 --- /dev/null +++ b/scripts/rocksdb-compatibility/README.md @@ -0,0 +1,3 @@ +To run this test: +1. Run `install-rocksdb.sh` (preferably in a docker because it installs to system library) to ensure rocksdb dyn lib are installed +2. Run `test.py` inside a venv where everything in `requirements.txt` is installed diff --git a/scripts/rocksdb-compatibility/install-rocksdb.sh b/scripts/rocksdb-compatibility/install-rocksdb.sh new file mode 100755 index 00000000000..289317ebfc9 --- /dev/null +++ b/scripts/rocksdb-compatibility/install-rocksdb.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -euox pipefail + +ROCKSDB_VERSION=10.5.1 + +ROCKSDB_SOURCE=`mktemp -d --tmpdir rocksdb-$ROCKSDB_VERSION.XXXXXX` +trap "rm -rf $ROCKSDB_SOURCE" EXIT + +curl -L https://github.com/facebook/rocksdb/archive/refs/tags/v${ROCKSDB_VERSION}.tar.gz | tar xz -C $ROCKSDB_SOURCE + +cd $ROCKSDB_SOURCE/rocksdb-${ROCKSDB_VERSION} + +# NOTE: +# `-Wno-unused-parameter` is to fix this error: +# util/compression.cc:684:40: error: unused parameter ‘args’ [-Werror=unused-parameter] +# 684 | Status ExtractUncompressedSize(Args& args) override { +# | ~~~~~~^~~~ +EXTRA_CXXFLAGS="-Wno-unused-parameter" make -j$(nproc) install diff --git a/scripts/rocksdb-compatibility/requirements.txt b/scripts/rocksdb-compatibility/requirements.txt new file mode 100644 index 00000000000..40fdc62a1d5 --- /dev/null +++ b/scripts/rocksdb-compatibility/requirements.txt @@ -0,0 +1,3 @@ +cffi==2.0.0 +tqdm==4.65 +pycurl==7.45.7 diff --git a/scripts/rocksdb-compatibility/rocksdb.py b/scripts/rocksdb-compatibility/rocksdb.py new file mode 100644 index 00000000000..0469029fc96 --- /dev/null +++ b/scripts/rocksdb-compatibility/rocksdb.py @@ -0,0 +1,93 @@ +from cffi import FFI +from contextlib import contextmanager + +ffi = FFI() + +ffi.cdef(""" +typedef struct rocksdb_t rocksdb_t; +typedef struct rocksdb_options_t rocksdb_options_t; +typedef struct rocksdb_readoptions_t rocksdb_readoptions_t; +typedef struct rocksdb_iterator_t rocksdb_iterator_t; + +rocksdb_options_t* rocksdb_options_create(void); +void rocksdb_options_destroy(rocksdb_options_t*); +void rocksdb_options_set_create_if_missing(rocksdb_options_t*, unsigned char); + +rocksdb_t* rocksdb_open(const rocksdb_options_t* options, const char* name, char** errptr); +void rocksdb_close(rocksdb_t* db); + +rocksdb_readoptions_t* rocksdb_readoptions_create(void); +void rocksdb_readoptions_destroy(rocksdb_readoptions_t*); + +rocksdb_iterator_t* rocksdb_create_iterator(rocksdb_t* db, const rocksdb_readoptions_t* options); +void rocksdb_iter_destroy(rocksdb_iterator_t* iter); +void rocksdb_iter_seek_to_first(rocksdb_iterator_t* iter); +unsigned char rocksdb_iter_valid(const rocksdb_iterator_t* iter); +void rocksdb_iter_next(rocksdb_iterator_t* iter); +const char* rocksdb_iter_key(const rocksdb_iterator_t* iter, size_t* klen); +const char* rocksdb_iter_value(const rocksdb_iterator_t* iter, size_t* vlen); +""") + +# Load the library +rocksdb = ffi.dlopen("librocksdb.so") + +@contextmanager +def rocksdb_options(create_if_missing=False): + opts = rocksdb.rocksdb_options_create() + rocksdb.rocksdb_options_set_create_if_missing(opts, int(create_if_missing)) + try: + yield opts + finally: + rocksdb.rocksdb_options_destroy(opts) + +@contextmanager +def open_db(path, options): + err_ptr = ffi.new("char**") + db = rocksdb.rocksdb_open(options, path.encode('utf-8'), err_ptr) + if err_ptr[0] != ffi.NULL: + raise RuntimeError("Open error: " + ffi.string(err_ptr[0]).decode()) + try: + yield db + finally: + rocksdb.rocksdb_close(db) + +@contextmanager +def read_iter(db): + ropts = rocksdb.rocksdb_readoptions_create() + iter_ = rocksdb.rocksdb_create_iterator(db, ropts) + try: + yield iter_ + finally: + rocksdb.rocksdb_iter_destroy(iter_) + rocksdb.rocksdb_readoptions_destroy(ropts) + +def test(path, rounds): + """ + Iterate over a RocksDB database and print key-value pairs in hexadecimal. + + Args: + path (str): Path to the RocksDB database. + rounds (int): Number of key-value pairs to read from the start of the database. + + Behavior: + - Opens the database in read-only mode (does not create a new DB). + - Uses a RocksDB iterator to traverse from the first key. + - Prints each key-value pair as hexadecimal strings. + - Stops early if the iterator reaches the end of the DB before 'rounds' entries. + """ + with rocksdb_options(create_if_missing=False) as opts, open_db(path, opts) as db, read_iter(db) as it: + rocksdb.rocksdb_iter_seek_to_first(it) + for _ in range(rounds): + if not rocksdb.rocksdb_iter_valid(it): + break + + klen = ffi.new("size_t*") + vlen = ffi.new("size_t*") + key_ptr = rocksdb.rocksdb_iter_key(it, klen) + val_ptr = rocksdb.rocksdb_iter_value(it, vlen) + + key_buf = ffi.buffer(key_ptr, klen[0]) + val_buf = ffi.buffer(val_ptr, vlen[0]) + print(f"Found KV-pair: {key_buf[:].hex()} -> {val_buf[:].hex()}") + + rocksdb.rocksdb_iter_next(it) diff --git a/scripts/rocksdb-compatibility/test.py b/scripts/rocksdb-compatibility/test.py new file mode 100644 index 00000000000..eaeca114301 --- /dev/null +++ b/scripts/rocksdb-compatibility/test.py @@ -0,0 +1,101 @@ +import os +import random +import tarfile +import tempfile +import xml.etree.ElementTree as ET +from io import BytesIO +from typing import List +from urllib.parse import urljoin + +import pycurl +from tqdm import tqdm + +import rocksdb + +NUM_LEDGER_TARS = 5 +NUM_KV_PER_LEDGER = 10 + +# Match keys starting with "genesis_ledger" or "epoch_ledger" and ending with ".tar.gz" +def matches_pattern(key: str) -> bool: + return (key.startswith("genesis_ledger") or key.startswith("epoch_ledger")) and key.endswith(".tar.gz") + + +def download_file(url: str, dest_path: str) -> None: + with open(dest_path, "wb") as f: + # Create a progress bar (tqdm) + pbar = tqdm(unit="B", unit_scale=True, unit_divisor=1024, ncols=80) + + def progress(download_t, download_d, _upload_t, _upload_d): + _ = (_upload_t, _upload_d) # Make pyright happier + if download_t > 0: + pbar.total = download_t + pbar.update(download_d - pbar.n) + + c = pycurl.Curl() + c.setopt(pycurl.URL, url) + c.setopt(pycurl.WRITEDATA, f) + c.setopt(pycurl.FOLLOWLOCATION, True) + c.setopt(pycurl.NOPROGRESS, False) + c.setopt(pycurl.XFERINFOFUNCTION, progress) + c.perform() + c.close() + + pbar.close() + + +def extract_tar_gz(tar_path: str, target_dir: str) -> None: + with tarfile.open(tar_path, "r:gz") as tar: + tar.extractall(path=target_dir) + +# TODO: figure out how to enable SSL here +def list_s3_keys(url, matches_pattern) -> List[str] : + buffer = BytesIO() + c = pycurl.Curl() + c.setopt(pycurl.URL, url) + c.setopt(pycurl.WRITEDATA, buffer) + c.setopt(pycurl.FOLLOWLOCATION, True) + c.setopt(pycurl.SSL_VERIFYPEER, False) + c.setopt(pycurl.SSL_VERIFYHOST, 0) + c.perform() + status_code = c.getinfo(pycurl.RESPONSE_CODE) + c.close() + + if status_code != 200: + raise RuntimeError(f"Failed to list S3 bucket: {status_code}") + + data = buffer.getvalue() + root = ET.fromstring(data) + ns = {"s3": "http://s3.amazonaws.com/doc/2006-03-01/"} + tar_keys = [ + text + for elem in root.findall(".//s3:Contents/s3:Key", ns) + if (text := elem.text) is not None and matches_pattern(text) + ] + return tar_keys + +def main(): + tar_keys = list_s3_keys("https://snark-keys.o1test.net.s3.amazonaws.com/", matches_pattern) + + if not tar_keys: + raise RuntimeError("No ledger tar files found.") + + for tar_key in random.sample(tar_keys, min(NUM_LEDGER_TARS, len(tar_keys))): + tar_uri = urljoin("https://s3-us-west-2.amazonaws.com/snark-keys.o1test.net/", tar_key) + print(f"Testing RocksDB compatibility on {tar_uri}") + + with tempfile.TemporaryDirectory() as tmpdir: + tar_path = os.path.join(tmpdir, os.path.basename(tar_key)) + print(f" Downloading to {tar_path}...") + download_file(tar_uri, tar_path) + + db_path = os.path.join(tmpdir, "extracted") + os.makedirs(db_path, exist_ok=True) + print(f" Extracting to {db_path}...") + extract_tar_gz(tar_path, db_path) + + print(f" Testing extracted RocksDB at {db_path}") + rocksdb.test(db_path, NUM_KV_PER_LEDGER) + + +if __name__ == "__main__": + main()