Skip to content

Commit 9fe6047

Browse files
committed
update webtable post
1 parent 1a4a2ee commit 9fe6047

File tree

1 file changed

+13
-12
lines changed

1 file changed

+13
-12
lines changed

src/content/blog/2025-03-14_webtable.md

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@ tags:
77
- wide column
88
- data modelling
99
published_at: 2025-03-21T15:55:27.431Z
10-
last_modified_at: 2025-03-21T15:55:27.431Z
10+
last_modified_at: 2025-08-04T23:55:27.431Z
1111
image: /media/thumbs/google.jpg
1212
---
1313

14-
Everyone knows Google. For almost the entire’s internet lifetime, the Google search page has been the starting point of almost any internet user’s surfing session.
14+
Everyone knows Google.
15+
For almost the entire’s internet lifetime, the Google search page has been the starting point of almost any internet user’s surfing session.
1516
To be able to serve search requests of (almost) anything that can be found on the internet, Google has to manage a huge search index.
1617
Storing a large portion of the internet requires petabytes, [if not exabytes](https://cloud.google.com/blog/topics/developers-practitioners/how-big-cloud-bigtable?hl=en), of storage.
1718

@@ -39,7 +40,7 @@ The row key defines how the table is ordered, so it is the only indexing mechani
3940

4041
## The descendants of Bigtable: LevelDB & RocksDB
4142

42-
LevelDB, an embeddable LSM-tree based key-value storage engine by Google, was released in 2011 and a [direct descendent of Bigtable code](https://opensource.googleblog.com/2011/07/leveldb-fast-persistent-key-value-store.html).
43+
LevelDB, an embeddable LSM-tree based key-value storage engine by Google, was released in 2011 and is a [direct descendent of Bigtable code](https://opensource.googleblog.com/2011/07/leveldb-fast-persistent-key-value-store.html).
4344
It is now in [maintenance-only mode](https://github.com/google/leveldb/commit/aa5479bbf47e9df86e0afbb89e6246085f22cdd4).
4445

4546
> Fun fact: [Every Chromium browser ships with LevelDB](https://chromium.googlesource.com/chromium/src/+/refs/heads/main/content/browser/indexed_db/indexed_db_leveldb_coding.cc).
@@ -108,9 +109,9 @@ And to retrieve a row, we do:
108109
for kv in webtable.prefix("jupiter\0") {
109110
let (k, v) = kv?;
110111
let mut splits = k.split(|&x| x == b'\0');
111-
let row_key = std::str::from_utf8(splits.next().unwrap()).unwrap();
112-
let col_fam = std::str::from_utf8(splits.next().unwrap()).unwrap();
113-
let col_qua = std::str::from_utf8(splits.next().unwrap()).unwrap();
112+
let row_key = std::str::from_utf8(splits.next()?)?;
113+
let col_fam = std::str::from_utf8(splits.next()?)?;
114+
let col_qua = std::str::from_utf8(splits.next()?)?;
114115

115116
let mut buf = [0; std::mem::size_of::<Timestamp>()];
116117
buf.copy_from_slice(&k[k.len() - std::mem::size_of::<Timestamp>()..]);
@@ -200,10 +201,10 @@ impl WideColumnTable {
200201
// ignore the TableCell::new syntax, it's some nasty self_cell stuff
201202
Ok(TableCell::new(kv?, |(k, v)| {
202203
let mut splits = k.split(|&x| x == b'\0');
203-
let row_key = std::str::from_utf8(splits.next().unwrap()).unwrap();
204-
let column_family = std::str::from_utf8(splits.next().unwrap()).unwrap();
205-
let column_qualifier = std::str::from_utf8(splits.next().unwrap()).unwrap();
206-
let ts_bytes = splits.next().unwrap();
204+
let row_key = std::str::from_utf8(splits.next()?)?;
205+
let column_family = std::str::from_utf8(splits.next()?)?;
206+
let column_qualifier = std::str::from_utf8(splits.next()?)?;
207+
let ts_bytes = splits.next()?;
207208

208209
let mut buf = [0; std::mem::size_of::<Timestamp>()];
209210
buf.copy_from_slice(ts_bytes);
@@ -449,10 +450,10 @@ let test_urls = ["https://vedur.is", "https://news.ycombinator.com"];
449450

450451
for url in test_urls {
451452
eprintln!("Scraping {url:?}");
452-
let res = reqwest::blocking::get(url).unwrap();
453+
let res = reqwest::blocking::get(url)?;
453454

454455
if res.status().is_success() {
455-
let html = res.text().unwrap();
456+
let html = res.text()?;
456457
webtable.insert(url, &html)?;
457458
}
458459
}

0 commit comments

Comments
 (0)