Skip to content

Commit 9dee10d

Browse files
committed
Optimize purge. ~30% for large batches.
Use a map to store and lookup FDI records in `couch_db_updater`. The map helps in two ways: * In `apply_purge_requests` we avoid removing then re-adding the active FDI record to the list. Looking up the record is a faster operation, especially with large batch and a simple update vs remove + replace helps with generating less garbage. * In `purge_docs` avoid nested `lists:keyfind/3` lookups when building the FDI pairs list. For instance with 1000 docs, we replace 1000 calls to keyfind, an O(n) operation, with 1000 map lookups, O(log n) operations. Benchmark: ``` ./conflicts.py -a adm:pass -q 1 -n 100000 -x 1.0 -z -c 0 docs: 100k, purge batch size: 1000 q:1 all deleted no conflicts ``` Results (7 calls with main and 10 calls with the PR) Unoptimized (main) *** purging 100000 docs 15 sec, rate = 6466/sec *** purging 100000 docs 15 sec, rate = 6880/sec *** purging 100000 docs 14 sec, rate = 7019/sec *** purging 100000 docs 17 sec, rate = 6056/sec *** purging 100000 docs 14 sec, rate = 7239/sec *** purging 100000 docs 14 sec, rate = 7124/sec *** purging 100000 docs 14 sec, rate = 7121/sec Averge: 6844 Optimized (pr) *** purging 100000 docs 11 sec, rate = 9003/sec *** purging 100000 docs 12 sec, rate = 8591/sec *** purging 100000 docs 10 sec, rate = 9692/sec *** purging 100000 docs 11 sec, rate = 9442/sec *** purging 100000 docs 12 sec, rate = 8364/sec *** purging 100000 docs 11 sec, rate = 8784/sec *** purging 100000 docs 11 sec, rate = 9103/sec Average: 8926 Speedup: 30%
1 parent 784574b commit 9dee10d

File tree

1 file changed

+16
-17
lines changed

1 file changed

+16
-17
lines changed

src/couch/src/couch_db_updater.erl

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -811,22 +811,22 @@ purge_docs(Db, PurgeReqs) ->
811811
FDIs = couch_db_engine:open_docs(Db, Ids),
812812
USeq = couch_db_engine:get_update_seq(Db),
813813

814-
IdFDIs = lists:zip(Ids, FDIs),
814+
IdFDIs = maps:from_list(lists:zip(Ids, FDIs)),
815815
{NewIdFDIs, Replies} = apply_purge_reqs(PurgeReqs, IdFDIs, USeq, []),
816816

817-
Pairs = lists:flatmap(
818-
fun({DocId, OldFDI}) ->
819-
{DocId, NewFDI} = lists:keyfind(DocId, 1, NewIdFDIs),
820-
case {OldFDI, NewFDI} of
821-
{not_found, not_found} ->
822-
[];
823-
{#full_doc_info{} = A, #full_doc_info{} = A} ->
824-
[];
825-
{#full_doc_info{}, _} ->
826-
[{OldFDI, NewFDI}]
827-
end
828-
end,
829-
IdFDIs
817+
Pairs = lists:sort(
818+
maps:fold(
819+
fun(DocId, OldFDI, Acc) ->
820+
#{DocId := NewFDI} = NewIdFDIs,
821+
case {OldFDI, NewFDI} of
822+
{not_found, not_found} -> Acc;
823+
{#full_doc_info{} = A, #full_doc_info{} = A} -> Acc;
824+
{#full_doc_info{}, _} -> [{OldFDI, NewFDI} | Acc]
825+
end
826+
end,
827+
[],
828+
IdFDIs
829+
)
830830
),
831831

832832
PSeq = couch_db_engine:get_purge_seq(Db),
@@ -850,7 +850,7 @@ apply_purge_reqs([], IdFDIs, _USeq, Replies) ->
850850
{IdFDIs, lists:reverse(Replies)};
851851
apply_purge_reqs([Req | RestReqs], IdFDIs, USeq, Replies) ->
852852
{_UUID, DocId, Revs} = Req,
853-
{value, {_, FDI0}, RestIdFDIs} = lists:keytake(DocId, 1, IdFDIs),
853+
#{DocId := FDI0} = IdFDIs,
854854
{NewFDI, RemovedRevs, NewUSeq} =
855855
case FDI0 of
856856
#full_doc_info{rev_tree = Tree} ->
@@ -888,9 +888,8 @@ apply_purge_reqs([Req | RestReqs], IdFDIs, USeq, Replies) ->
888888
% Not found means nothing to change
889889
{not_found, [], USeq}
890890
end,
891-
NewIdFDIs = [{DocId, NewFDI} | RestIdFDIs],
892891
NewReplies = [{ok, RemovedRevs} | Replies],
893-
apply_purge_reqs(RestReqs, NewIdFDIs, NewUSeq, NewReplies).
892+
apply_purge_reqs(RestReqs, IdFDIs#{DocId := NewFDI}, NewUSeq, NewReplies).
894893

895894
update_time_seq(#db{time_seq = TSeq} = Db, Seq) when is_integer(Seq) ->
896895
Timestamp = couch_time_seq:timestamp(),

0 commit comments

Comments
 (0)