Skip to content

Commit 8c7ac33

Browse files
committed
askrene: implement reduce_num_flows in refine, using increase_flows().
Now we simply call it at the end. We need to check it hasn't violated fee maxima, but otherwise it's simple. Signed-off-by: Rusty Russell <[email protected]> Changelog-Fixed: Plugins: `askrene` now handles limits on number of htlcs much more gracefully.
1 parent 050b149 commit 8c7ac33

File tree

4 files changed

+83
-87
lines changed

4 files changed

+83
-87
lines changed

plugins/askrene/mcf.c

Lines changed: 30 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,34 +1377,14 @@ linear_routes(const tal_t *ctx, struct route_query *rq,
13771377
struct reserve_hop *reservations = new_reservations(working_ctx, rq);
13781378

13791379
while (!amount_msat_is_zero(amount_to_deliver)) {
1380-
size_t num_parts, parts_slots, excess_parts;
1381-
u32 bottleneck_idx;
1382-
13831380
if (timemono_after(time_mono(), deadline)) {
13841381
error_message = rq_log(ctx, rq, LOG_BROKEN,
13851382
"%s: timed out after deadline",
13861383
__func__);
13871384
goto fail;
13881385
}
13891386

1390-
/* FIXME: This algorithm to limit the number of parts is dumb
1391-
* for two reasons:
1392-
* 1. it does not take into account that several loop
1393-
* iterations here may produce two flows along the same
1394-
* path that after "squash_flows" become a single flow.
1395-
* 2. limiting the number of "slots" to 1 makes us fail to
1396-
* see some solutions that use more than one of those
1397-
* existing paths.
1398-
*
1399-
* A better approach could be to run MCF, remove the excess
1400-
* paths and then recompute a MCF on a network where each arc is
1401-
* one of the previously remaining paths, ie. redistributing the
1402-
* payment amount among the selected paths in a cost-efficient
1403-
* way. */
14041387
new_flows = tal_free(new_flows);
1405-
num_parts = tal_count(*flows);
1406-
assert(num_parts < rq->maxparts);
1407-
parts_slots = rq->maxparts - num_parts;
14081388

14091389
/* If the amount_to_deliver is very small we better use a single
14101390
* path computation because:
@@ -1415,8 +1395,7 @@ linear_routes(const tal_t *ctx, struct route_query *rq,
14151395
* do not deliver the entire payment amount by just a small
14161396
* amount. */
14171397
if (amount_msat_less_eq(amount_to_deliver,
1418-
SINGLE_PATH_THRESHOLD) ||
1419-
parts_slots == 1) {
1398+
SINGLE_PATH_THRESHOLD)) {
14201399
new_flows = single_path_flow(working_ctx, rq, srcnode,
14211400
dstnode, amount_to_deliver,
14221401
mu, delay_feefactor);
@@ -1433,7 +1412,7 @@ linear_routes(const tal_t *ctx, struct route_query *rq,
14331412
}
14341413

14351414
error_message =
1436-
refine_flows(ctx, rq, amount_to_deliver, &new_flows, &bottleneck_idx);
1415+
refine_flows(ctx, rq, amount_to_deliver, &new_flows, NULL);
14371416
if (error_message)
14381417
goto fail;
14391418

@@ -1457,32 +1436,6 @@ linear_routes(const tal_t *ctx, struct route_query *rq,
14571436
assert(!amount_msat_is_zero(new_flows[i]->delivers));
14581437
}
14591438

1460-
if (tal_count(new_flows) > parts_slots) {
1461-
/* Remove the excees of parts and leave one slot for the
1462-
* next round of computations. */
1463-
excess_parts = 1 + tal_count(new_flows) - parts_slots;
1464-
} else if (tal_count(new_flows) == parts_slots &&
1465-
amount_msat_less(all_deliver, amount_to_deliver)) {
1466-
/* Leave exactly 1 slot for the next round of
1467-
* computations. */
1468-
excess_parts = 1;
1469-
} else
1470-
excess_parts = 0;
1471-
if (excess_parts > 0) {
1472-
/* If we removed all the flows we found, avoid selecting them again,
1473-
* by disabling one. */
1474-
if (excess_parts == tal_count(new_flows))
1475-
bitmap_set_bit(rq->disabled_chans, bottleneck_idx);
1476-
if (!remove_flows(&new_flows, excess_parts)) {
1477-
error_message = rq_log(ctx, rq, LOG_BROKEN,
1478-
"%s: failed to remove %zu"
1479-
" flows from a set of %zu",
1480-
__func__, excess_parts,
1481-
tal_count(new_flows));
1482-
goto fail;
1483-
}
1484-
}
1485-
14861439
/* Is this set of flows too expensive?
14871440
* We can check if the new flows are within the fee budget,
14881441
* however in some cases we have discarded some flows at this
@@ -1606,6 +1559,34 @@ linear_routes(const tal_t *ctx, struct route_query *rq,
16061559
/* all set! Now squash flows that use the same path */
16071560
squash_flows(ctx, rq, flows);
16081561

1562+
/* If we're over the number of parts, try to cram excess into the
1563+
* largest-capacity parts */
1564+
if (tal_count(*flows) > rq->maxparts) {
1565+
struct amount_msat fee;
1566+
1567+
error_message = reduce_num_flows(rq, rq, flows, amount, rq->maxparts);
1568+
if (error_message) {
1569+
*flows = tal_free(*flows);
1570+
return error_message;
1571+
}
1572+
1573+
/* Check fee budget! */
1574+
fee = flowset_fee(rq->plugin, *flows);
1575+
if (amount_msat_greater(fee, maxfee)) {
1576+
error_message = rq_log(rq, rq, LOG_INFORM,
1577+
"After reducing the flows to %zu (i.e. maxparts),"
1578+
" we had a fee of %s, greater than "
1579+
"max of %s.",
1580+
tal_count(*flows),
1581+
fmt_amount_msat(tmpctx, fee),
1582+
fmt_amount_msat(tmpctx, maxfee));
1583+
if (error_message) {
1584+
*flows = tal_free(*flows);
1585+
return error_message;
1586+
}
1587+
}
1588+
}
1589+
16091590
/* flows_probability re-does a temporary reservation so we need to call
16101591
* it after we have cleaned the reservations we used to build the flows
16111592
* hence after we freed working_ctx. */

plugins/askrene/refine.c

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "config.h"
22
#include <ccan/asort/asort.h>
3+
#include <ccan/cast/cast.h>
34
#include <ccan/tal/str/str.h>
45
#include <common/gossmap.h>
56
#include <plugins/askrene/askrene.h>
@@ -597,29 +598,43 @@ double flows_probability(const tal_t *ctx, struct route_query *rq,
597598
return probability;
598599
}
599600

600-
/* Compare flows by deliver amount */
601-
static int reverse_cmp_flows(struct flow *const *fa, struct flow *const *fb,
602-
void *unused UNUSED)
601+
const char *reduce_num_flows(const tal_t *ctx,
602+
const struct route_query *rq,
603+
struct flow ***flows,
604+
struct amount_msat deliver,
605+
size_t num_parts)
603606
{
604-
if (amount_msat_eq((*fa)->delivers, (*fb)->delivers))
605-
return 0;
606-
if (amount_msat_greater((*fa)->delivers, (*fb)->delivers))
607-
return -1;
608-
return 1;
609-
}
607+
/* Keep the largest flows (not as I originally implemented, the largest
608+
* capacity flows). Here's Lagrang3's analysis:
609+
*
610+
* I think it is better to keep the largest-deliver flows. If we only
611+
* go for the highest capacity we may throw away the low cost benefits
612+
* of the MCF.
613+
614+
* Hypothetical scenario: MCF finds 3 flows but maxparts=2,
615+
* flow 1: deliver=10, cost=0, capacity=0
616+
* flow 2: deliver=7, cost=1, capacity=5
617+
* flow 3: deliver=1, cost=10, capacity=100
618+
*
619+
* It is better to keep flows 1 and 2 by accomodating 1 more unit of
620+
* flow in flow2 at 1 value expense (per flow), than to keep flows 2 and
621+
* 3 by accomodating 5 more units of flow in flow2 at cost 1 and 5 in
622+
* flow3 at cost 100.
623+
*
624+
* The trade-off is: if we prioritize the delivery value already
625+
* computed by MCF then we find better solutions, but we might fail to
626+
* find feasible solutions sometimes. If we prioritize capacity then we
627+
* generally find bad solutions though we find feasibility more often
628+
* than the alternative.
629+
*/
630+
size_t orig_num_flows = tal_count(*flows);
631+
asort(*flows, orig_num_flows, revcmp_flows, NULL);
632+
tal_resize(flows, num_parts);
633+
634+
if (!increase_flows(rq, *flows, deliver, -1.0))
635+
return rq_log(ctx, rq, LOG_INFORM,
636+
"Failed to reduce %zu flows down to maxparts (%zu)",
637+
orig_num_flows, num_parts);
610638

611-
bool remove_flows(struct flow ***flows, u32 n)
612-
{
613-
if (n == 0)
614-
goto fail;
615-
if (n > tal_count(*flows))
616-
goto fail;
617-
asort(*flows, tal_count(*flows), reverse_cmp_flows, NULL);
618-
for (size_t count = tal_count(*flows); n > 0; n--, count--) {
619-
assert(count > 0);
620-
tal_arr_remove(flows, count - 1);
621-
}
622-
return true;
623-
fail:
624-
return false;
639+
return NULL;
625640
}

plugins/askrene/refine.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ void squash_flows(const tal_t *ctx, struct route_query *rq,
3838
double flows_probability(const tal_t *ctx, struct route_query *rq,
3939
struct flow ***flows);
4040

41-
/* Helper function: removes n flows from the set. It will remove those flows
42-
* with the lowest amount values. */
43-
bool remove_flows(struct flow ***flows, u32 n);
41+
/* Modify flows so only N remain, if we can. Returns an error if we cannot. */
42+
const char *reduce_num_flows(const tal_t *ctx,
43+
const struct route_query *rq,
44+
struct flow ***flows,
45+
struct amount_msat deliver,
46+
size_t num_parts);
4447
#endif /* LIGHTNING_PLUGINS_ASKRENE_REFINE_H */

tests/test_askrene.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,8 +1782,7 @@ def test_simple_dummy_channel(node_factory):
17821782

17831783
def test_maxparts_infloop(node_factory, bitcoind):
17841784
# Three paths from l1 -> l5.
1785-
# FIXME: enhance explain_failure!
1786-
l1, l2, l3, l4, l5 = node_factory.get_nodes(5, opts=[{'broken_log': 'plugin-cln-askrene.*the obvious route'}] + [{}] * 4)
1785+
l1, l2, l3, l4, l5 = node_factory.get_nodes(5)
17871786

17881787
for intermediate in (l2, l3, l4):
17891788
node_factory.join_nodes([l1, intermediate, l5])
@@ -1805,16 +1804,14 @@ def test_maxparts_infloop(node_factory, bitcoind):
18051804
final_cltv=5)
18061805
assert len(route['routes']) == 3
18071806

1808-
# Now with maxparts == 2. Usually askrene can't figure out why it failed,
1809-
# but sometimes it gets a theory.
1810-
with pytest.raises(RpcError):
1811-
l1.rpc.getroutes(source=l1.info['id'],
1812-
destination=l5.info['id'],
1813-
amount_msat=amount,
1814-
layers=[],
1815-
maxfee_msat=amount,
1816-
final_cltv=5,
1817-
maxparts=2)
1807+
route = l1.rpc.getroutes(source=l1.info['id'],
1808+
destination=l5.info['id'],
1809+
amount_msat=amount,
1810+
layers=[],
1811+
maxfee_msat=amount,
1812+
final_cltv=5,
1813+
maxparts=2)
1814+
assert len(route['routes']) == 2
18181815

18191816

18201817
def test_askrene_timeout(node_factory, bitcoind):

0 commit comments

Comments
 (0)