Skip to content

Commit 5ea81f5

Browse files
committed
libexpr: Actually cache line information in PosTable
Previous code had a sneaky bug due to which no caching actually happened: ```cpp auto linesForInput = (*lines)[origin->offset]; ``` That should have been: ```cpp auto & linesForInput = (*lines)[origin->offset]; ``` See [1]. Now that it also makes sense to make the cache bound in side in order not to memoize all the sources without freeing any memory. The default cache size has been chosen somewhat arbitrarily to be ~64k origins. For reference, 25.05 nixpkgs has ~50k .nix files. Simple benchmark: ```nix let pkgs = import <nixpkgs> { }; in builtins.foldl' (acc: el: acc + el.line) 0 ( builtins.genList (x: builtins.unsafeGetAttrPos "gcc" pkgs) 10000 ) ``` (After) ``` $ hyperfine "result/bin/nix eval -f ./test.nix" Benchmark 1: result/bin/nix eval -f ./test.nix Time (mean ± σ): 292.7 ms ± 3.9 ms [User: 131.0 ms, System: 120.5 ms] Range (min … max): 288.1 ms … 300.5 ms 10 runs ``` (Before) ``` hyperfine "nix eval -f ./test.nix" Benchmark 1: nix eval -f ./test.nix Time (mean ± σ): 666.7 ms ± 6.4 ms [User: 428.3 ms, System: 191.2 ms] Range (min … max): 659.7 ms … 681.3 ms 10 runs ``` If the origin happens to be a `all-packages.nix` or similar in size then the difference is much more dramatic. [1]: https://www.github.com/lix-project/lix/commit/22e3f0e9875082be7f4eec8e3caeb134a7f1c05f
1 parent 4711720 commit 5ea81f5

File tree

2 files changed

+44
-14
lines changed

2 files changed

+44
-14
lines changed

src/libutil/include/nix/util/pos-table.hh

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <cstdint>
55
#include <vector>
66

7+
#include "nix/util/lru-cache.hh"
78
#include "nix/util/pos-idx.hh"
89
#include "nix/util/position.hh"
910
#include "nix/util/sync.hh"
@@ -37,10 +38,20 @@ public:
3738
};
3839

3940
private:
41+
/**
42+
* Vector of byte offsets (in the virtual input buffer) of initial line character's position.
43+
* Sorted by construction. Binary search over it allows for efficient translation of arbitrary
44+
* byte offsets in the virtual input buffer to its line + column position.
45+
*/
4046
using Lines = std::vector<uint32_t>;
47+
/**
48+
* Cache from byte offset in the virtual buffer of Origins -> @ref Lines in that origin.
49+
*/
50+
using LinesCache = LRUCache<uint32_t, Lines>;
4151

4252
std::map<uint32_t, Origin> origins;
43-
mutable Sync<std::map<uint32_t, Lines>> lines;
53+
54+
mutable Sync<LinesCache> linesCache;
4455

4556
const Origin * resolve(PosIdx p) const
4657
{
@@ -56,6 +67,11 @@ private:
5667
}
5768

5869
public:
70+
PosTable(std::size_t linesCacheCapacity = 65536)
71+
: linesCache(linesCacheCapacity)
72+
{
73+
}
74+
5975
Origin addOrigin(Pos::Origin origin, size_t size)
6076
{
6177
uint32_t offset = 0;

src/libutil/pos-table.cc

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,35 @@ Pos PosTable::operator[](PosIdx p) const
1515
const auto offset = origin->offsetOf(p);
1616

1717
Pos result{0, 0, origin->origin};
18-
auto lines = this->lines.lock();
19-
auto linesForInput = (*lines)[origin->offset];
20-
21-
if (linesForInput.empty()) {
22-
auto source = result.getSource().value_or("");
23-
const char * begin = source.data();
24-
for (Pos::LinesIterator it(source), end; it != end; it++)
25-
linesForInput.push_back(it->data() - begin);
26-
if (linesForInput.empty())
27-
linesForInput.push_back(0);
18+
auto linesCache = this->linesCache.lock();
19+
20+
/* Try the origin's line cache */
21+
const auto * linesForInput = linesCache->getOrNullptr(origin->offset);
22+
23+
auto fillCacheForOrigin = [](std::string_view content) {
24+
auto contentLines = Lines();
25+
26+
const char * begin = content.data();
27+
for (Pos::LinesIterator it(content), end; it != end; it++)
28+
contentLines.push_back(it->data() - begin);
29+
if (contentLines.empty())
30+
contentLines.push_back(0);
31+
32+
return contentLines;
33+
};
34+
35+
/* Calculate line offsets and fill the cache */
36+
if (!linesForInput) {
37+
auto originContent = result.getSource().value_or("");
38+
linesCache->upsert(origin->offset, fillCacheForOrigin(originContent));
39+
linesForInput = linesCache->getOrNullptr(origin->offset);
2840
}
29-
// as above: the first line starts at byte 0 and is always present
30-
auto lineStartOffset = std::prev(std::upper_bound(linesForInput.begin(), linesForInput.end(), offset));
3141

32-
result.line = 1 + (lineStartOffset - linesForInput.begin());
42+
assert(linesForInput);
43+
44+
// as above: the first line starts at byte 0 and is always present
45+
auto lineStartOffset = std::prev(std::upper_bound(linesForInput->begin(), linesForInput->end(), offset));
46+
result.line = 1 + (lineStartOffset - linesForInput->begin());
3347
result.column = 1 + (offset - *lineStartOffset);
3448
return result;
3549
}

0 commit comments

Comments
 (0)