Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions scripts/rocksdb-compatibility/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/.venv
/__pycache__
3 changes: 3 additions & 0 deletions scripts/rocksdb-compatibility/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
To run this test:
1. Run `install-rocksdb.sh` (preferably in a docker because it installs to system library) to ensure rocksdb dyn lib are installed
2. Run `test.py` inside a venv where everything in `requirements.txt` is installed
19 changes: 19 additions & 0 deletions scripts/rocksdb-compatibility/install-rocksdb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash

set -euox pipefail

ROCKSDB_VERSION=10.5.1

ROCKSDB_SOURCE=`mktemp -d --tmpdir rocksdb-$ROCKSDB_VERSION.XXXXXX`
trap "rm -rf $ROCKSDB_SOURCE" EXIT

curl -L https://github.com/facebook/rocksdb/archive/refs/tags/v${ROCKSDB_VERSION}.tar.gz | tar xz -C $ROCKSDB_SOURCE

cd $ROCKSDB_SOURCE/rocksdb-${ROCKSDB_VERSION}

# NOTE:
# `-Wno-unused-parameter` is to fix this error:
# util/compression.cc:684:40: error: unused parameter ‘args’ [-Werror=unused-parameter]
# 684 | Status ExtractUncompressedSize(Args& args) override {
# | ~~~~~~^~~~
EXTRA_CXXFLAGS="-Wno-unused-parameter" make -j$(nproc) install
3 changes: 3 additions & 0 deletions scripts/rocksdb-compatibility/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
cffi==2.0.0
tqdm==4.65
pycurl==7.45.7
93 changes: 93 additions & 0 deletions scripts/rocksdb-compatibility/rocksdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from cffi import FFI
from contextlib import contextmanager

ffi = FFI()

ffi.cdef("""
typedef struct rocksdb_t rocksdb_t;
typedef struct rocksdb_options_t rocksdb_options_t;
typedef struct rocksdb_readoptions_t rocksdb_readoptions_t;
typedef struct rocksdb_iterator_t rocksdb_iterator_t;

rocksdb_options_t* rocksdb_options_create(void);
void rocksdb_options_destroy(rocksdb_options_t*);
void rocksdb_options_set_create_if_missing(rocksdb_options_t*, unsigned char);

rocksdb_t* rocksdb_open(const rocksdb_options_t* options, const char* name, char** errptr);
void rocksdb_close(rocksdb_t* db);

rocksdb_readoptions_t* rocksdb_readoptions_create(void);
void rocksdb_readoptions_destroy(rocksdb_readoptions_t*);

rocksdb_iterator_t* rocksdb_create_iterator(rocksdb_t* db, const rocksdb_readoptions_t* options);
void rocksdb_iter_destroy(rocksdb_iterator_t* iter);
void rocksdb_iter_seek_to_first(rocksdb_iterator_t* iter);
unsigned char rocksdb_iter_valid(const rocksdb_iterator_t* iter);
void rocksdb_iter_next(rocksdb_iterator_t* iter);
const char* rocksdb_iter_key(const rocksdb_iterator_t* iter, size_t* klen);
const char* rocksdb_iter_value(const rocksdb_iterator_t* iter, size_t* vlen);
""")

# Load the library
rocksdb = ffi.dlopen("librocksdb.so")

@contextmanager
def rocksdb_options(create_if_missing=False):
opts = rocksdb.rocksdb_options_create()
rocksdb.rocksdb_options_set_create_if_missing(opts, int(create_if_missing))
try:
yield opts
finally:
rocksdb.rocksdb_options_destroy(opts)

@contextmanager
def open_db(path, options):
err_ptr = ffi.new("char**")
db = rocksdb.rocksdb_open(options, path.encode('utf-8'), err_ptr)
if err_ptr[0] != ffi.NULL:
raise RuntimeError("Open error: " + ffi.string(err_ptr[0]).decode())
try:
yield db
finally:
rocksdb.rocksdb_close(db)

@contextmanager
def read_iter(db):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should better be a generator, I think. Should leave as a follow-up.

BTW, the reason I'm hand-rolling a binding is because other existing libraries is not using dynamic linking.

ropts = rocksdb.rocksdb_readoptions_create()
iter_ = rocksdb.rocksdb_create_iterator(db, ropts)
try:
yield iter_
finally:
rocksdb.rocksdb_iter_destroy(iter_)
rocksdb.rocksdb_readoptions_destroy(ropts)

def test(path, rounds):
"""
Iterate over a RocksDB database and print key-value pairs in hexadecimal.

Args:
path (str): Path to the RocksDB database.
rounds (int): Number of key-value pairs to read from the start of the database.

Behavior:
- Opens the database in read-only mode (does not create a new DB).
- Uses a RocksDB iterator to traverse from the first key.
- Prints each key-value pair as hexadecimal strings.
- Stops early if the iterator reaches the end of the DB before 'rounds' entries.
"""
with rocksdb_options(create_if_missing=False) as opts, open_db(path, opts) as db, read_iter(db) as it:
rocksdb.rocksdb_iter_seek_to_first(it)
for _ in range(rounds):
if not rocksdb.rocksdb_iter_valid(it):
break

klen = ffi.new("size_t*")
vlen = ffi.new("size_t*")
key_ptr = rocksdb.rocksdb_iter_key(it, klen)
val_ptr = rocksdb.rocksdb_iter_value(it, vlen)

key_buf = ffi.buffer(key_ptr, klen[0])
val_buf = ffi.buffer(val_ptr, vlen[0])
print(f"Found KV-pair: {key_buf[:].hex()} -> {val_buf[:].hex()}")

rocksdb.rocksdb_iter_next(it)
101 changes: 101 additions & 0 deletions scripts/rocksdb-compatibility/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import os
import random
import tarfile
import tempfile
import xml.etree.ElementTree as ET
from io import BytesIO
from typing import List
from urllib.parse import urljoin

import pycurl
from tqdm import tqdm

import rocksdb

NUM_LEDGER_TARS = 5
NUM_KV_PER_LEDGER = 10

# Match keys starting with "genesis_ledger" or "epoch_ledger" and ending with ".tar.gz"
def matches_pattern(key: str) -> bool:
return (key.startswith("genesis_ledger") or key.startswith("epoch_ledger")) and key.endswith(".tar.gz")


def download_file(url: str, dest_path: str) -> None:
with open(dest_path, "wb") as f:
# Create a progress bar (tqdm)
pbar = tqdm(unit="B", unit_scale=True, unit_divisor=1024, ncols=80)

def progress(download_t, download_d, _upload_t, _upload_d):
_ = (_upload_t, _upload_d) # Make pyright happier
if download_t > 0:
pbar.total = download_t
pbar.update(download_d - pbar.n)

c = pycurl.Curl()
c.setopt(pycurl.URL, url)
c.setopt(pycurl.WRITEDATA, f)
c.setopt(pycurl.FOLLOWLOCATION, True)
c.setopt(pycurl.NOPROGRESS, False)
c.setopt(pycurl.XFERINFOFUNCTION, progress)
c.perform()
c.close()

pbar.close()


def extract_tar_gz(tar_path: str, target_dir: str) -> None:
with tarfile.open(tar_path, "r:gz") as tar:
tar.extractall(path=target_dir)

# TODO: figure out how to enable SSL here
def list_s3_keys(url, matches_pattern) -> List[str] :
buffer = BytesIO()
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
c.setopt(pycurl.WRITEDATA, buffer)
c.setopt(pycurl.FOLLOWLOCATION, True)
c.setopt(pycurl.SSL_VERIFYPEER, False)
c.setopt(pycurl.SSL_VERIFYHOST, 0)
c.perform()
status_code = c.getinfo(pycurl.RESPONSE_CODE)
c.close()

if status_code != 200:
raise RuntimeError(f"Failed to list S3 bucket: {status_code}")

data = buffer.getvalue()
root = ET.fromstring(data)
ns = {"s3": "http://s3.amazonaws.com/doc/2006-03-01/"}
tar_keys = [
text
for elem in root.findall(".//s3:Contents/s3:Key", ns)
if (text := elem.text) is not None and matches_pattern(text)
]
return tar_keys

def main():
tar_keys = list_s3_keys("https://snark-keys.o1test.net.s3.amazonaws.com/", matches_pattern)

if not tar_keys:
raise RuntimeError("No ledger tar files found.")

for tar_key in random.sample(tar_keys, min(NUM_LEDGER_TARS, len(tar_keys))):
tar_uri = urljoin("https://s3-us-west-2.amazonaws.com/snark-keys.o1test.net/", tar_key)
print(f"Testing RocksDB compatibility on {tar_uri}")

with tempfile.TemporaryDirectory() as tmpdir:
tar_path = os.path.join(tmpdir, os.path.basename(tar_key))
print(f" Downloading to {tar_path}...")
download_file(tar_uri, tar_path)

db_path = os.path.join(tmpdir, "extracted")
os.makedirs(db_path, exist_ok=True)
print(f" Extracting to {db_path}...")
extract_tar_gz(tar_path, db_path)

print(f" Testing extracted RocksDB at {db_path}")
rocksdb.test(db_path, NUM_KV_PER_LEDGER)


if __name__ == "__main__":
main()