|
7 | 7 | - wide column |
8 | 8 | - data modelling |
9 | 9 | published_at: 2025-03-21T15:55:27.431Z |
10 | | -last_modified_at: 2025-03-21T15:55:27.431Z |
| 10 | +last_modified_at: 2025-08-04T23:55:27.431Z |
11 | 11 | image: /media/thumbs/google.jpg |
12 | 12 | --- |
13 | 13 |
|
14 | | -Everyone knows Google. For almost the entire’s internet lifetime, the Google search page has been the starting point of almost any internet user’s surfing session. |
| 14 | +Everyone knows Google. |
| 15 | +For almost the entire’s internet lifetime, the Google search page has been the starting point of almost any internet user’s surfing session. |
15 | 16 | To be able to serve search requests of (almost) anything that can be found on the internet, Google has to manage a huge search index. |
16 | 17 | Storing a large portion of the internet requires petabytes, [if not exabytes](https://cloud.google.com/blog/topics/developers-practitioners/how-big-cloud-bigtable?hl=en), of storage. |
17 | 18 |
|
@@ -39,7 +40,7 @@ The row key defines how the table is ordered, so it is the only indexing mechani |
39 | 40 |
|
40 | 41 | ## The descendants of Bigtable: LevelDB & RocksDB |
41 | 42 |
|
42 | | -LevelDB, an embeddable LSM-tree based key-value storage engine by Google, was released in 2011 and a [direct descendent of Bigtable code](https://opensource.googleblog.com/2011/07/leveldb-fast-persistent-key-value-store.html). |
| 43 | +LevelDB, an embeddable LSM-tree based key-value storage engine by Google, was released in 2011 and is a [direct descendent of Bigtable code](https://opensource.googleblog.com/2011/07/leveldb-fast-persistent-key-value-store.html). |
43 | 44 | It is now in [maintenance-only mode](https://github.com/google/leveldb/commit/aa5479bbf47e9df86e0afbb89e6246085f22cdd4). |
44 | 45 |
|
45 | 46 | > Fun fact: [Every Chromium browser ships with LevelDB](https://chromium.googlesource.com/chromium/src/+/refs/heads/main/content/browser/indexed_db/indexed_db_leveldb_coding.cc). |
@@ -108,9 +109,9 @@ And to retrieve a row, we do: |
108 | 109 | for kv in webtable.prefix("jupiter\0") { |
109 | 110 | let (k, v) = kv?; |
110 | 111 | let mut splits = k.split(|&x| x == b'\0'); |
111 | | - let row_key = std::str::from_utf8(splits.next().unwrap()).unwrap(); |
112 | | - let col_fam = std::str::from_utf8(splits.next().unwrap()).unwrap(); |
113 | | - let col_qua = std::str::from_utf8(splits.next().unwrap()).unwrap(); |
| 112 | + let row_key = std::str::from_utf8(splits.next()?)?; |
| 113 | + let col_fam = std::str::from_utf8(splits.next()?)?; |
| 114 | + let col_qua = std::str::from_utf8(splits.next()?)?; |
114 | 115 |
|
115 | 116 | let mut buf = [0; std::mem::size_of::<Timestamp>()]; |
116 | 117 | buf.copy_from_slice(&k[k.len() - std::mem::size_of::<Timestamp>()..]); |
@@ -200,10 +201,10 @@ impl WideColumnTable { |
200 | 201 | // ignore the TableCell::new syntax, it's some nasty self_cell stuff |
201 | 202 | Ok(TableCell::new(kv?, |(k, v)| { |
202 | 203 | let mut splits = k.split(|&x| x == b'\0'); |
203 | | - let row_key = std::str::from_utf8(splits.next().unwrap()).unwrap(); |
204 | | - let column_family = std::str::from_utf8(splits.next().unwrap()).unwrap(); |
205 | | - let column_qualifier = std::str::from_utf8(splits.next().unwrap()).unwrap(); |
206 | | - let ts_bytes = splits.next().unwrap(); |
| 204 | + let row_key = std::str::from_utf8(splits.next()?)?; |
| 205 | + let column_family = std::str::from_utf8(splits.next()?)?; |
| 206 | + let column_qualifier = std::str::from_utf8(splits.next()?)?; |
| 207 | + let ts_bytes = splits.next()?; |
207 | 208 |
|
208 | 209 | let mut buf = [0; std::mem::size_of::<Timestamp>()]; |
209 | 210 | buf.copy_from_slice(ts_bytes); |
@@ -449,10 +450,10 @@ let test_urls = ["https://vedur.is", "https://news.ycombinator.com"]; |
449 | 450 |
|
450 | 451 | for url in test_urls { |
451 | 452 | eprintln!("Scraping {url:?}"); |
452 | | - let res = reqwest::blocking::get(url).unwrap(); |
| 453 | + let res = reqwest::blocking::get(url)?; |
453 | 454 |
|
454 | 455 | if res.status().is_success() { |
455 | | - let html = res.text().unwrap(); |
| 456 | + let html = res.text()?; |
456 | 457 | webtable.insert(url, &html)?; |
457 | 458 | } |
458 | 459 | } |
|
0 commit comments