Skip to content

Commit dd18627

Browse files
authored
Merge pull request #8638 from neo-technology/2.6-node-sim-config
Merge new node sim configs under single 'useComponents'
2 parents 0229769 + 972d574 commit dd18627

File tree

9 files changed

+178
-43
lines changed

9 files changed

+178
-43
lines changed
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* Copyright (c) "Neo4j"
3+
* Neo4j Sweden AB [http://neo4j.com]
4+
*
5+
* This file is part of Neo4j.
6+
*
7+
* Neo4j is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
package org.neo4j.gds.similarity.nodesim;
21+
22+
import org.jetbrains.annotations.NotNull;
23+
24+
import static org.neo4j.gds.utils.StringFormatting.formatWithLocale;
25+
26+
/**
27+
* Specification for how to treat components when computing NodeSimilarity.
28+
* Components can be used or not used.
29+
* If components are used, they can be computed or provided.
30+
*/
31+
public final class ComponentSpec {
32+
/**
33+
* Valid user input is {@code java.lang.Boolean} or {@code java.lang.String}.
34+
*
35+
* False means no components are used.
36+
* True means components are used, computed internally.
37+
* String is a property, representing pre-computed components.
38+
*/
39+
public static ComponentSpec parse(Object userInput) {
40+
if (userInput instanceof ComponentSpec) { // the config default calls in with a pre-parsed spec. it's annoying.
41+
return (ComponentSpec) userInput;
42+
}
43+
if (userInput instanceof Boolean) {
44+
return new ComponentSpec((Boolean) userInput, null);
45+
}
46+
if (userInput == null) {
47+
throw new IllegalArgumentException("Invalid component spec: cannot parse null as node property");
48+
}
49+
if (userInput instanceof String) {
50+
return parse((String) userInput);
51+
}
52+
throw new IllegalArgumentException(formatWithLocale("Invalid component spec: cannot parse type %s with value %s", userInput.getClass().getSimpleName(), userInput.toString()));
53+
}
54+
55+
private static ComponentSpec parse(@NotNull String userInput) {
56+
if (userInput.isBlank()) {
57+
throw new IllegalArgumentException("Invalid component spec: expected a valid node property");
58+
}
59+
return new ComponentSpec(Boolean.TRUE, userInput);
60+
}
61+
62+
public static String render(ComponentSpec spec) {
63+
return spec.componentProperty == null ? spec.useComponents.toString() : spec.componentProperty;
64+
}
65+
66+
static final ComponentSpec NO = new ComponentSpec(false, null);
67+
68+
private final Boolean useComponents;
69+
private final String componentProperty;
70+
71+
private ComponentSpec(Boolean useComponents, String componentProperty) {
72+
this.useComponents = useComponents;
73+
this.componentProperty = componentProperty;
74+
}
75+
76+
public boolean computeComponents() {
77+
return useComponents && null == componentProperty;
78+
}
79+
80+
public boolean useComponents() {
81+
return useComponents;
82+
}
83+
84+
public boolean usePreComputedComponents() {
85+
return useComponents && null != componentProperty;
86+
}
87+
88+
public String componentProperty() {
89+
return componentProperty;
90+
}
91+
}

algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,15 @@ private Stream<SimilarityResult> computeParallel() {
239239
}
240240

241241
private LongUnaryOperator initComponents() {
242-
if (!config.enableComponentsOptimization()) {
242+
var componentsUsage = config.useComponents();
243+
if (!componentsUsage.useComponents()) {
243244
// considering everything as within the same component
244245
return n -> 0;
245246
}
246247

247-
if (config.componentProperty() != null) {
248+
if (componentsUsage.usePreComputedComponents()) {
248249
// extract component info from property
249-
NodePropertyValues nodeProperties = graph.nodeProperties(config.componentProperty());
250+
NodePropertyValues nodeProperties = graph.nodeProperties(componentsUsage.componentProperty());
250251
return initComponentIdMapping(graph, nodeProperties::longValue);
251252
}
252253

algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityBaseConfig.java

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
package org.neo4j.gds.similarity.nodesim;
2121

2222
import org.immutables.value.Value;
23-
import org.jetbrains.annotations.Nullable;
2423
import org.neo4j.gds.NodeLabel;
2524
import org.neo4j.gds.RelationshipType;
2625
import org.neo4j.gds.annotation.Configuration;
@@ -31,8 +30,6 @@
3130

3231
import java.util.Collection;
3332

34-
import static org.neo4j.gds.core.StringIdentifierValidations.emptyToNull;
35-
import static org.neo4j.gds.core.StringIdentifierValidations.validateNoWhiteCharacter;
3633
import static org.neo4j.gds.utils.StringFormatting.formatWithLocale;
3734

3835
@Configuration
@@ -52,8 +49,6 @@ public interface NodeSimilarityBaseConfig extends AlgoBaseConfig, RelationshipWe
5249

5350
String COMPONENT_PROPERTY_KEY = "componentProperty";
5451

55-
String APPLY_WCC = "applyWcc";
56-
5752
@Configuration.DoubleRange(min = 0, max = 1)
5853
default double similarityCutoff() {
5954
return 1E-42;
@@ -99,12 +94,11 @@ default int bottomN() {
9994
return BOTTOM_N_DEFAULT;
10095
}
10196

102-
@Configuration.ConvertWith(method = "validatePropertyName")
103-
@Configuration.Key(COMPONENT_PROPERTY_KEY)
104-
default @Nullable String componentProperty() { return null; }
105-
106-
@Configuration.Key(APPLY_WCC)
107-
default boolean applyWcc() {return false;}
97+
@Configuration.ConvertWith(method = "org.neo4j.gds.similarity.nodesim.ComponentSpec#parse")
98+
@Configuration.ToMapValue( "org.neo4j.gds.similarity.nodesim.ComponentSpec#render")
99+
default ComponentSpec useComponents() {
100+
return ComponentSpec.NO;
101+
}
108102

109103
@Configuration.Ignore
110104
default int normalizedK() {
@@ -168,30 +162,26 @@ default void validate() {
168162
}
169163
}
170164

171-
static @Nullable String validatePropertyName(String input) {
172-
return validateNoWhiteCharacter(emptyToNull(input), COMPONENT_PROPERTY_KEY);
173-
}
174-
175165
@Configuration.GraphStoreValidationCheck
176166
default void validateComponentProperty(
177167
GraphStore graphStore,
178168
Collection<NodeLabel> selectedLabels,
179169
Collection<RelationshipType> selectedRelationshipTypes
180170
) {
181-
String componentProperty = componentProperty();
182-
if (componentProperty != null) {
183-
ConfigNodesValidations.validateNodePropertyExists(graphStore, selectedLabels, "Component property", componentProperty);
171+
var componentsUsage = useComponents();
172+
if (componentsUsage.usePreComputedComponents()) {
173+
ConfigNodesValidations.validateNodePropertyExists(graphStore, selectedLabels, "Component property", componentsUsage.componentProperty());
184174
}
185175
}
186176

187177
@Configuration.Ignore
188178
default boolean actuallyRunWCC() {
189-
return enableComponentsOptimization() && componentProperty() == null;
179+
return useComponents().computeComponents();
190180
}
191181

192182
@Configuration.Ignore
193183
default boolean enableComponentsOptimization() {
194-
return applyWcc() || componentProperty() != null;
184+
return useComponents().useComponents();
195185
}
196186

197187
}

algo/src/test/java/org/neo4j/gds/similarity/filterednodesim/FilteredNodeSimilarityTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ void shouldSurviveIoannisFurtherObjections(boolean enableWcc) {
139139
var config = FilteredNodeSimilarityStreamConfigImpl.builder()
140140
.sourceNodeFilter(NodeFilterSpecFactory.create(sourceNodeFilter))
141141
.concurrency(1)
142-
.applyWcc(enableWcc)
142+
.useComponents(enableWcc)
143143
.topK(1)
144144
.topN(10)
145145
.build();

algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,7 @@ void shouldComputeMemrecWithOrWithoutComponentMapping(boolean componentPropertyS
149149
.topK(TOP_K_DEFAULT)
150150
.writeProperty("writeProperty")
151151
.writeRelationshipType("writeRelationshipType")
152-
.applyWcc(true)
153-
.componentProperty(componentPropertySet ? "compid" : null)
152+
.useComponents(componentPropertySet ? "compid" : true)
154153
.build();
155154

156155
MemoryTree actual = new NodeSimilarityFactory<>().memoryEstimation(config).estimate(dimensions, 1);
@@ -198,7 +197,7 @@ void shouldOptimizeForDistinctComponentsProperty(Orientation orientation, int co
198197
Graph graph = orientation == NATURAL ? naturalGraph : reverseGraph;
199198
var config = NodeSimilarityStreamConfigImpl.builder()
200199
.similarityCutoff(0.0)
201-
.componentProperty("compid")
200+
.useComponents("compid")
202201
.concurrency(concurrency)
203202
.build();
204203

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Copyright (c) "Neo4j"
3+
* Neo4j Sweden AB [http://neo4j.com]
4+
*
5+
* This file is part of Neo4j.
6+
*
7+
* Neo4j is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
package org.neo4j.gds.similarity.nodesim;
21+
22+
import org.junit.jupiter.api.Test;
23+
24+
import static org.assertj.core.api.Assertions.assertThat;
25+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
26+
27+
class ComponentSpecTest {
28+
29+
@Test
30+
void shouldTurnOffFeature() {
31+
var falseSpec = ComponentSpec.parse(false);
32+
assertThat(falseSpec.useComponents()).isFalse();
33+
assertThat(falseSpec.computeComponents()).isFalse();
34+
}
35+
36+
@Test
37+
void shouldTurnOnFeatureForComputedComponents() {
38+
var spec = ComponentSpec.parse(true);
39+
assertThat(spec.useComponents()).isTrue();
40+
assertThat(spec.computeComponents()).isTrue();
41+
}
42+
43+
@Test
44+
void shouldTurnOnFeatureForPreComputedComponents() {
45+
var spec = ComponentSpec.parse("prop");
46+
assertThat(spec.useComponents()).isTrue();
47+
assertThat(spec.computeComponents()).isFalse();
48+
}
49+
50+
@Test
51+
void shouldThrowOnNullAndEmptyString() {
52+
assertThatThrownBy(() -> ComponentSpec.parse(""))
53+
.hasMessageContaining("Invalid component spec: expected a valid node property");
54+
assertThatThrownBy(() -> ComponentSpec.parse(null))
55+
.hasMessageContaining("Invalid component spec: cannot parse null as node property");
56+
}
57+
58+
@Test
59+
void shouldThrowOnUnexpectedInputs() {
60+
assertThatThrownBy(() -> ComponentSpec.parse(42)).hasMessageContaining("Invalid component spec");
61+
}
62+
}

algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityBaseConfigTest.java

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,26 +35,17 @@ void shouldNotRequestAnythingIfNotEnabled(){
3535

3636
@Test
3737
void shouldRequestWccIfEnabled(){
38-
var config = NodeSimilarityStreamConfigImpl.builder().applyWcc(true).build();
38+
var config = NodeSimilarityStreamConfigImpl.builder().useComponents(true).build();
3939

4040
assertThat(config.actuallyRunWCC()).isTrue();
4141
assertThat(config.enableComponentsOptimization()).isTrue();
4242
}
4343

4444
@Test
4545
void shouldNotRequestWccIfPropertyGiven(){
46-
var config = NodeSimilarityStreamConfigImpl.builder().componentProperty("foo").build();
46+
var config = NodeSimilarityStreamConfigImpl.builder().useComponents("foo").build();
4747

4848
assertThat(config.actuallyRunWCC()).isFalse();
4949
assertThat(config.enableComponentsOptimization()).isTrue();
5050
}
51-
52-
@Test
53-
void shouldNotRequestWccIfPropertyGivenAndWccProvided(){
54-
var config = NodeSimilarityStreamConfigImpl.builder().applyWcc(true).componentProperty("foo").build();
55-
56-
assertThat(config.actuallyRunWCC()).isFalse();
57-
assertThat(config.enableComponentsOptimization()).isTrue();
58-
}
59-
6051
}

algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,7 @@ void shouldLogProgress(int concurrency) {
878878
void shouldLogProgressForWccOptimization() {
879879
var graph = naturalGraph;
880880
var config = NodeSimilarityStreamConfigImpl.builder()
881-
.applyWcc(true)
881+
.useComponents(true)
882882
.concurrency(4)
883883
.build();
884884
var progressTask = new NodeSimilarityFactory<>().progressTask(graph, config);

doc/modules/ROOT/partials/algorithms/node-similarity/specific-configuration.adoc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ If unspecified, the algorithm runs unweighted.
2121
| similarityMetric
2222
| String | JACCARD | yes | The metric used to compute similarity.
2323
Can be either `JACCARD`, `OVERLAP` or `COSINE`.
24-
| [[runwcc-{instance}]] runWcc | Boolean | false | yes | If enabled, xref:algorithms/wcc.adoc[WCC] is run as a pre-processing step to obtain the graph components.
25-
This can increase performance for multi-component graphs, since nodes in distinct components always have zero similarity and do not need to be compared.
26-
If the components are already stored in a node property, use the `componentProperty` configuration parameter instead.
27-
| [[component-property-{instance}]] componentProperty | String | null | yes | Node property containing the ID of the graph components. If not available, use the `runWcc` configuration parameter instead.
24+
| [[useComponents-{instance}]] useComponents | Boolean or String | false | yes
25+
| If enabled, Node Similarity will use components to improve the performance of the computation, skipping comparisons of nodes in different components.
26+
Set to `false` (Default): the algorithm does not use components, but computes similarity across the entire graph.
27+
Set to `true`: the algorithm uses components, and will compute these components before computing similarity.
28+
Set to *String*: use pre-computed components stored in the graph, with *String* as the key for a node property representing components

0 commit comments

Comments
 (0)