Skip to content
Open
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
a1f2153
Add BloatNet tests
gballet Aug 14, 2025
02d65b4
try building the contract
gballet Aug 14, 2025
e721cc6
fix: SSTORE 0 -> 1 match all values in the state
gballet Aug 14, 2025
d1cad25
add the tx for 0 -> 1 and 1 -> 2
gballet Aug 14, 2025
16f6d30
fix: linter issues
gballet Aug 14, 2025
374e08a
remove more whitespaces
gballet Aug 14, 2025
333c876
fix formatting
gballet Aug 15, 2025
79a95b8
move to benchmarks
gballet Aug 21, 2025
8131e98
fix linter value
gballet Aug 22, 2025
5f805fd
use the gas limit from the environment
gballet Aug 22, 2025
090a400
parameterize the written value in SSTORE
gballet Aug 26, 2025
cd02a02
fix linter issues
gballet Aug 26, 2025
1f3c381
update CHANGELOG.md
gballet Aug 26, 2025
f6def7e
fix format
gballet Aug 26, 2025
7e20a50
simplify syntax
gballet Aug 26, 2025
c24ad35
fix: start with an empty contract storage
gballet Aug 26, 2025
fc27e53
more fixes, but the result is still incorrect
gballet Aug 26, 2025
7d87262
fix: finally fix the tests
gballet Aug 26, 2025
8556014
linter fix
gballet Aug 27, 2025
326915e
add SLOAD tests
gballet Aug 27, 2025
bc7ee84
Merge branch 'main' into bloatnet-test-SSTORE
gballet Aug 28, 2025
4164f3c
review feedback
gballet Aug 28, 2025
6d12da6
more review feedback
gballet Aug 29, 2025
b3aa527
refactor(tests): Proposed patch for bloatnet SSTORE tests (#1)
fselmo Sep 2, 2025
add1a36
mandatory linter issue fix
gballet Sep 2, 2025
134dc8c
fix SLOAD test
gballet Sep 2, 2025
20c47b7
fix linter issue
gballet Sep 2, 2025
f668f86
break sload tests in multiple txs
gballet Sep 2, 2025
edfc381
fix doc issue in CI
gballet Sep 3, 2025
65d500f
Apply suggestions from code review
gballet Sep 9, 2025
6df6895
fix linter
gballet Sep 9, 2025
f3f7a0d
fix remaining issue
gballet Sep 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ Users can select any of the artifacts depending on their testing needs for their

### πŸ§ͺ Test Cases

- ✨ [BloatNet](bloatnet.info)/Multidimensional Metering: Add benchmarks to be used as part of the BloatNet project and also for Multidimensional Metering.
- ✨ [EIP-7951](https://eips.ethereum.org/EIPS/eip-7951): Add additional test cases for modular comparison and initcode context ([#2023](https://github.com/ethereum/execution-spec-tests/pull/2023), & [#2068](https://github.com/ethereum/execution-spec-tests/pull/2068)).
- πŸ”€ Refactored `BLOBHASH` opcode context tests to use the `pre_alloc` plugin in order to avoid contract and EOA address collisions ([#1637](https://github.com/ethereum/execution-spec-tests/pull/1637)).
- πŸ”€ Refactored `SELFDESTRUCT` opcode collision tests to use the `pre_alloc` plugin in order to avoid contract and EOA address collisions ([#1643](https://github.com/ethereum/execution-spec-tests/pull/1643)).
Expand Down
165 changes: 165 additions & 0 deletions tests/benchmark/test_bloatnet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
"""
abstract: Tests that benchmarks EVMs to estimate the costs of stateful opcodes.
Tests that benchmarks EVMs to estimate the costs of stateful opcodes..
"""

import pytest

from ethereum_test_base_types import HashInt
from ethereum_test_forks import Fork
from ethereum_test_tools import (
Account,
Alloc,
Block,
BlockchainTestFiller,
Environment,
Storage,
Transaction,
)
from ethereum_test_tools.vm.opcode import Opcodes as Op


@pytest.mark.valid_from("Prague")
@pytest.mark.parametrize("final_storage_value", [0x02 << 248, 0x02])
def test_bloatnet(
blockchain_test: BlockchainTestFiller, pre: Alloc, fork: Fork, final_storage_value: int
):
"""
A test that calls a contract with many SSTOREs.

The first block will have many SSTORES that go from 0 -> 1
and the 2nd block will have many SSTORES that go from 1 -> 2
"""
# Get gas costs for the current fork
gas_costs = fork.gas_costs()

# this is only used for computing the intinsic gas
data = final_storage_value.to_bytes(32, "big").rstrip(b"\x00")

storage = Storage()

# Initial gas for PUSH0 + CALLDATALOAD + POP (at the end)
totalgas = gas_costs.G_BASE * 2 + gas_costs.G_VERY_LOW
totalgas = totalgas + fork.transaction_intrinsic_cost_calculator()(calldata=data)
gas_increment = gas_costs.G_VERY_LOW * 2 + gas_costs.G_STORAGE_SET + gas_costs.G_COLD_SLOAD
sstore_code = Op.PUSH0 + Op.CALLDATALOAD
storage_slot: int = 0
while totalgas + gas_increment < Environment().gas_limit:
totalgas += gas_increment
sstore_code = sstore_code + Op.SSTORE(storage_slot, Op.DUP1)
storage[storage_slot] = final_storage_value
storage_slot += 1

sstore_code = sstore_code + Op.POP # Drop last value on the stack

sender = pre.fund_eoa()
print(sender)
contract_address = pre.deploy_contract(
code=sstore_code,
storage=Storage(),
)

tx_0_1 = Transaction(
to=contract_address,
gas_limit=Environment().gas_limit,
data=(final_storage_value // 2).to_bytes(32, "big").rstrip(b"\x00"),
value=0,
sender=sender,
)
tx_1_2 = Transaction(
to=contract_address,
gas_limit=Environment().gas_limit,
data=final_storage_value.to_bytes(32, "big").rstrip(b"\x00"),
value=0,
sender=sender,
)

post = {contract_address: Account(storage=storage)}

blockchain_test(pre=pre, blocks=[Block(txs=[tx_0_1]), Block(txs=[tx_1_2])], post=post)


# Warm reads are very cheap, which means you can really fill a block
# with them. Only fill the block by a factor of SPEEDUP.
SPEEDUP: int = 100


Copy link
Collaborator

@jsign jsign Aug 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already have covered cold and warm SSTORE/SLOAD with same and different values.

Are these tests different in some way?

Copy link
Collaborator

@fselmo fselmo Aug 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm yeah, good observation. The biggest question I suppose is whether those worst case scenarios are the same as the ones in the bloatnet doc here? If they are, maybe creating a marker on existing relevant tests, so we don't have to redefine them, would be a good approach. Then, instead of trying to run only the tests in this file, we could use a marker like -m bloatnet and if we mark those existing, relevant, param cases with this marker, they get included in all the tests you want to run. We could then mark this whole file (test_bloatnet.py) with the appropriate marker for your use so that you get all the test cases relevant to you in one command, whether they are defined here or elsewhere.

Just a thought.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's not conflate two different things together, the objectives are clearly different:

  • BloatNet looks for the performance of regular execution
  • the zk benchmarks are benchmarking zkevms, a widely different environment

Adding an extra coupling here is causing more work for no benefit, since these tests are maintained by different sets of people.

Regarding the worst case, the tests are doing different things since the code itself is different. The goal of the bloatnet test it to measure the sole performance of SSTORE in client, whereas when I read the zkvm-specific code, it is doing extra stuff like jumps. It's normal, you wouldn't be able to load so much code as in our test inside a zkvm. But we can.

Copy link
Collaborator

@jsign jsign Sep 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gballet, the tests in benchmark are not specific to zkvms. (They are even used for PerfNet).

The test I linked are executing blocks where the full gas limit is used to do cold or warm reads, or to write slots to existent or non-existent storage slots. There's nothing specific to zkvms there, thus why I ask how these tests are different -- mainly to avoid duplication or explain better what different variant is trying to be benchmarked.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to be addressed before merging!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I consider it has already been addressed by my first comment: it's not the same bytecode, not the same objectives (worst case vs perf), and not the same constraints. Ignacio's tests might not be specific to zkvms, but we do need something specific for us.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the answer here is bloatnet could benefit from its own marker. It seems clear that not all benchmark tests (for example, bloatnet) will want to be run by zkvm / perfnet and also the same is true for bloatnet tests not wanting to run all benchmark tests.

I think it would be a good idea to apply a pytest marker for bloatnet which only runs bloatnet marked tests (which this whole file can make use of). And I think we need to alter the marker for benchmark and make it so it does not run bloatnet tests by default.

@LouisTsai-Csie, do you agree here? I know we discussed some of this but I just want a sanity check. And if you do agree, do you think you can help implement this here? I think this will be important for us to establish some organization in the codebase before merging this and then having tests leak to different teams that don't care about each other's test cases.

I know also that @marioevz expressed a very similar direction for this but he is not available the next few weeks. I think this will help us get all of this in, in an organized fashion that doesn't change how the benchmark teams use their marker and would also help better categorize bloatnet tests.

@pytest.mark.valid_from("Prague")
def test_bloatnet_sload_warm(blockchain_test: BlockchainTestFiller, pre: Alloc, fork: Fork):
"""Test that loads warm storage locations many times."""
gas_costs = fork.gas_costs()

# Pre-fill storage with values
num_slots = 100 # Number of storage slots to warm up
storage = Storage({HashInt(i): HashInt(0xDEADBEEF + i) for i in range(num_slots)})

# Calculate gas costs
totalgas = fork.transaction_intrinsic_cost_calculator()(calldata=b"")

# First pass - warm up all slots (cold access)
warmup_gas = num_slots * (gas_costs.G_COLD_SLOAD + gas_costs.G_BASE)
totalgas += warmup_gas

# Calculate how many warm loads we can fit
gas_increment = gas_costs.G_WARM_SLOAD + gas_costs.G_BASE # Warm SLOAD + POP
remaining_gas = Environment().gas_limit - totalgas
num_warm_loads = remaining_gas // (SPEEDUP * gas_increment)

# Build the complete code: warmup + repeated warm loads
sload_code = Op.SLOAD(0) + Op.POP if num_slots > 0 else Op.STOP
for i in range(1, num_slots):
sload_code = sload_code + Op.SLOAD(i) + Op.POP
for i in range(num_warm_loads):
sload_code = sload_code + Op.SLOAD(i % num_slots) + Op.POP

sender = pre.fund_eoa()
contract_address = pre.deploy_contract(
code=sload_code,
storage=storage,
)

tx = Transaction(
to=contract_address,
gas_limit=Environment().gas_limit,
data=b"",
value=0,
sender=sender,
)

post = {contract_address: Account(storage=storage)}
blockchain_test(pre=pre, blocks=[Block(txs=[tx])], post=post)


@pytest.mark.valid_from("Prague")
def test_bloatnet_sload_cold(blockchain_test: BlockchainTestFiller, pre: Alloc, fork: Fork):
"""Test that loads many different cold storage locations."""
gas_costs = fork.gas_costs()

# Calculate gas costs and max slots
totalgas = fork.transaction_intrinsic_cost_calculator()(calldata=b"")
# PUSH + Cold SLOAD + POP
gas_increment = gas_costs.G_VERY_LOW + gas_costs.G_COLD_SLOAD + gas_costs.G_BASE
max_slots = (Environment().gas_limit - totalgas) // gas_increment

# Build storage and code for all slots
storage = Storage({HashInt(i): HashInt(0xC0FFEE + i) for i in range(max_slots)})
sload_code = Op.SLOAD(0) + Op.POP if max_slots > 0 else Op.STOP
for i in range(1, max_slots):
sload_code = sload_code + Op.SLOAD(i) + Op.POP

sender = pre.fund_eoa()
contract_address = pre.deploy_contract(
code=sload_code,
storage=storage,
)

tx = Transaction(
to=contract_address,
gas_limit=Environment().gas_limit,
data=b"",
value=0,
sender=sender,
)

post = {contract_address: Account(storage=storage)}
blockchain_test(pre=pre, blocks=[Block(txs=[tx])], post=post)
Loading