Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from types import TracebackType
from typing import List, NamedTuple, Optional, Type

from graphdatascience.procedure_surface.api.base_result import BaseResult
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2


class GraphSamplingEndpoints(ABC):
"""
Abstract base class defining the API for graph sampling operations.
"""

@abstractmethod
def rwr(
self,
G: GraphV2,
graph_name: str,
start_nodes: Optional[List[int]] = None,
restart_probability: Optional[float] = None,
sampling_ratio: Optional[float] = None,
node_label_stratification: Optional[bool] = None,
relationship_weight_property: Optional[str] = None,
relationship_types: Optional[List[str]] = None,
node_labels: Optional[List[str]] = None,
sudo: Optional[bool] = None,
log_progress: Optional[bool] = None,
username: Optional[str] = None,
concurrency: Optional[int] = None,
job_id: Optional[str] = None,
) -> GraphWithSamplingResult:
"""
Random walk with restarts (RWR) samples the graph by taking random walks from a set of start nodes.

On each step of a random walk, there is a probability that the walk stops, and a new walk from one of the start
nodes starts instead (i.e. the walk restarts). Each node visited on these walks will be part of the sampled
subgraph. The resulting subgraph is stored as a new graph in the Graph Catalog.

Parameters
----------
G : GraphV2
The input graph to be sampled.
graph_name : str
The name of the new graph that is stored in the graph catalog.
start_nodes : list of int, optional
IDs of the initial set of nodes in the original graph from which the sampling random walks will start.
By default, a single node is chosen uniformly at random.
restart_probability : float, optional
The probability that a sampling random walk restarts from one of the start nodes.
Default is 0.1.
sampling_ratio : float, optional
The fraction of nodes in the original graph to be sampled.
Default is 0.15.
node_label_stratification : bool, optional
If true, preserves the node label distribution of the original graph.
Default is False.
relationship_weight_property : str, optional
Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted.
relationship_types : list of str, optional
Filter the named graph using the given relationship types. Relationships with any of the given types will be
included.
node_labels : list of str, optional
Filter the named graph using the given node labels. Nodes with any of the given labels will be included.
sudo : bool, optional
Bypass heap control. Use with caution.
Default is False.
log_progress : bool, optional
Turn `on/off` percentage logging while running procedure.
Default is True.
username : str, optional
Use Administrator access to run an algorithm on a graph owned by another user.
Default is None.
concurrency : int, optional
The number of concurrent threads used for running the algorithm.
Default is 4.
job_id : str, optional
An ID that can be provided to more easily track the algorithm’s progress.
By default, a random job id is generated.

Returns
-------
GraphWithSamplingResult
Tuple of the graph object and the result of the Random Walk with Restart (RWR), including the dimensions of the sampled graph.
"""
pass

@abstractmethod
def cnarw(
self,
G: GraphV2,
graph_name: str,
start_nodes: Optional[List[int]] = None,
restart_probability: Optional[float] = None,
sampling_ratio: Optional[float] = None,
node_label_stratification: Optional[bool] = None,
relationship_weight_property: Optional[str] = None,
relationship_types: Optional[List[str]] = None,
node_labels: Optional[List[str]] = None,
sudo: Optional[bool] = None,
log_progress: Optional[bool] = None,
username: Optional[str] = None,
concurrency: Optional[int] = None,
job_id: Optional[str] = None,
) -> GraphWithSamplingResult:
"""
Common Neighbour Aware Random Walk (CNARW) samples the graph by taking random walks from a set of start nodes

CNARW is a graph sampling technique that involves optimizing the selection of the next-hop node. It takes into
account the number of common neighbours between the current node and the next-hop candidates. On each step of a
random walk, there is a probability that the walk stops, and a new walk from one of the start nodes starts
instead (i.e. the walk restarts). Each node visited on these walks will be part of the sampled subgraph. The
resulting subgraph is stored as a new graph in the Graph Catalog.

Parameters
----------
G : GraphV2
The input graph to be sampled.
graph_name : str
The name of the new graph that is stored in the graph catalog.
start_nodes : list of int, optional
IDs of the initial set of nodes in the original graph from which the sampling random walks will start.
By default, a single node is chosen uniformly at random.
restart_probability : float, optional
The probability that a sampling random walk restarts from one of the start nodes.
Default is 0.1.
sampling_ratio : float, optional
The fraction of nodes in the original graph to be sampled.
Default is 0.15.
node_label_stratification : bool, optional
If true, preserves the node label distribution of the original graph.
Default is False.
relationship_weight_property : str, optional
Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted.
relationship_types : list of str, optional
Filter the named graph using the given relationship types. Relationships with any of the given types will be
included.
node_labels : list of str, optional
Filter the named graph using the given node labels. Nodes with any of the given labels will be included.
sudo : bool, optional
Bypass heap control. Use with caution.
Default is False.
log_progress : bool, optional
Turn `on/off` percentage logging while running procedure.
Default is True.
username : str, optional
Use Administrator access to run an algorithm on a graph owned by another user.
Default is None.
concurrency : int, optional
The number of concurrent threads used for running the algorithm.
Default is 4.
job_id : str, optional
An ID that can be provided to more easily track the algorithm’s progress.
By default, a random job id is generated.

Returns
-------
GraphSamplingResult
Tuple of the graph object and the result of the Common Neighbour Aware Random Walk (CNARW), including the dimensions of the sampled graph.
"""
pass


class GraphSamplingResult(BaseResult):
graph_name: str
from_graph_name: str
node_count: int
relationship_count: int
start_node_count: int
project_millis: int


class GraphWithSamplingResult(NamedTuple):
graph: GraphV2
result: GraphSamplingResult

def __enter__(self) -> GraphV2:
return self.graph

def __exit__(
self,
exception_type: Optional[Type[BaseException]],
exception_value: Optional[BaseException],
traceback: Optional[TracebackType],
) -> None:
self.graph.drop()
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from graphdatascience.procedure_surface.api.catalog.node_label_endpoints import NodeLabelEndpoints
from graphdatascience.procedure_surface.api.catalog.node_properties_endpoints import NodePropertiesEndpoints
from graphdatascience.procedure_surface.api.catalog.relationships_endpoints import RelationshipsEndpoints
from graphdatascience.procedure_surface.api.graph_sampling_endpoints import GraphSamplingEndpoints
from graphdatascience.procedure_surface.api.catalog.graph_sampling_endpoints import GraphSamplingEndpoints


class CatalogEndpoints(ABC):
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,10 @@
from graphdatascience.procedure_surface.api.base_result import BaseResult
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2

from .estimation_result import EstimationResult
from graphdatascience.procedure_surface.api.estimation_result import EstimationResult


class ArticleRankEndpoints(ABC):
"""
Abstract base class defining the API for the ArticleRank algorithm.
"""

@abstractmethod
def mutate(
Expand All @@ -36,40 +33,44 @@ def mutate(
source_nodes: Optional[Any] = None,
) -> ArticleRankMutateResult:
"""
Executes the ArticleRank algorithm and writes the results back to the graph as a node property.
Runs the Article Rank algorithm and stores the results in the graph catalog as a new node property.

ArticleRank is a variant of the Page Rank algorithm, which measures the transitive influence of nodes.
Page Rank follows the assumption that relationships originating from low-degree nodes have a higher influence than relationships from high-degree nodes.
Article Rank lowers the influence of low-degree nodes by lowering the scores being sent to their neighbors in each iteration.

Parameters
----------
G : GraphV2
The graph to run the algorithm on
mutate_property : str
The property name to store the ArticleRank score for each node
Name of the node property to store the results in.
damping_factor : Optional[float], default=None
The damping factor controls the probability of a random jump to a random node
Probability of a jump to a random node.
tolerance : Optional[float], default=None
Minimum change in scores between iterations
Minimum change in scores between iterations.
max_iterations : Optional[int], default=None
The maximum number of iterations to run
Maximum number of iterations to run.
scaler : Optional[Any], default=None
Configuration for scaling the scores
Name of the scaler applied on the resulting scores.
relationship_types : Optional[List[str]], default=None
The relationships types used to select relationships for this algorithm run
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
node_labels : Optional[List[str]], default=None
The node labels used to select nodes for this algorithm run
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
sudo : Optional[bool], default=None
Override memory estimation limits
Disable the memory guard.
log_progress : Optional[bool], default=None
Whether to log progress
Display progress logging.
username : Optional[str], default=None
The username to attribute the procedure run to
concurrency : Optional[Any], default=None
The number of concurrent threads
Number of threads to use for running the algorithm.
job_id : Optional[Any], default=None
An identifier for the job
Identifier for the job.
relationship_weight_property : Optional[str], default=None
The property name that contains weight
Name of the property to be used as weights.
source_nodes : Optional[Any], default=None
The source nodes for personalized ArticleRank
List of node ids to use as starting points. Use a list of list pairs to associate each node with a bias > 0.

Returns
-------
Expand All @@ -96,38 +97,42 @@ def stats(
source_nodes: Optional[Any] = None,
) -> ArticleRankStatsResult:
"""
Executes the ArticleRank algorithm and returns result statistics without writing the result to Neo4j.
Runs the Article Rank algorithm and returns result statistics without storing the results.

ArticleRank is a variant of the Page Rank algorithm, which measures the transitive influence of nodes.
Page Rank follows the assumption that relationships originating from low-degree nodes have a higher influence than relationships from high-degree nodes.
Article Rank lowers the influence of low-degree nodes by lowering the scores being sent to their neighbors in each iteration.

Parameters
----------
G : GraphV2
The graph to run the algorithm on
damping_factor : Optional[float], default=None
The damping factor controls the probability of a random jump to a random node
Probability of a jump to a random node.
tolerance : Optional[float], default=None
Minimum change in scores between iterations
Minimum change in scores between iterations.
max_iterations : Optional[int], default=None
The maximum number of iterations to run
Maximum number of iterations to run.
scaler : Optional[Any], default=None
Configuration for scaling the scores
Name of the scaler applied on the resulting scores.
relationship_types : Optional[List[str]], default=None
The relationships types used to select relationships for this algorithm run
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
node_labels : Optional[List[str]], default=None
The node labels used to select nodes for this algorithm run
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
sudo : Optional[bool], default=None
Override memory estimation limits
Disable the memory guard.
log_progress : Optional[bool], default=None
Whether to log progress
Display progress logging.
username : Optional[str], default=None
The username to attribute the procedure run to
concurrency : Optional[Any], default=None
The number of concurrent threads
Number of threads to use for running the algorithm.
job_id : Optional[Any], default=None
An identifier for the job
Identifier for the job.
relationship_weight_property : Optional[str], default=None
The property name that contains weight
Name of the property to be used as weights.
source_nodes : Optional[Any], default=None
The source nodes for personalized ArticleRank
List of node ids to use as starting points. Use a list of list pairs to associate each node with a bias > 0.

Returns
-------
Expand Down Expand Up @@ -214,7 +219,11 @@ def write(
write_concurrency: Optional[int] = None,
) -> ArticleRankWriteResult:
"""
Executes the ArticleRank algorithm and writes the results to Neo4j.
Runs the Article Rank algorithm and stores the result in the Neo4j database as a new node property.

ArticleRank is a variant of the Page Rank algorithm, which measures the transitive influence of nodes.
Page Rank follows the assumption that relationships originating from low-degree nodes have a higher influence than relationships from high-degree nodes.
Article Rank lowers the influence of low-degree nodes by lowering the scores being sent to their neighbors in each iteration.

Parameters
----------
Expand All @@ -223,31 +232,31 @@ def write(
write_property : str
The property name to write the ArticleRank score for each node
damping_factor : Optional[float], default=None
The damping factor controls the probability of a random jump to a random node
Probability of a jump to a random node.
tolerance : Optional[float], default=None
Minimum change in scores between iterations
Minimum change in scores between iterations.
max_iterations : Optional[int], default=None
The maximum number of iterations to run
Maximum number of iterations to run.
scaler : Optional[Any], default=None
Configuration for scaling the scores
Name of the scaler applied on the resulting scores.
relationship_types : Optional[List[str]], default=None
The relationships types used to select relationships for this algorithm run
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
node_labels : Optional[List[str]], default=None
The node labels used to select nodes for this algorithm run
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
sudo : Optional[bool], default=None
Override memory estimation limits
Disable the memory guard.
log_progress : Optional[bool], default=None
Whether to log progress
Display progress logging.
username : Optional[str], default=None
The username to attribute the procedure run to
concurrency : Optional[Any], default=None
The number of concurrent threads
Number of threads to use for running the algorithm.
job_id : Optional[Any], default=None
An identifier for the job
Identifier for the job.
relationship_weight_property : Optional[str], default=None
The property name that contains weight
Name of the property to be used as weights.
source_nodes : Optional[Any], default=None
The source nodes for personalized ArticleRank
List of node ids to use as starting points. Use a list of list pairs to associate each node with a bias > 0.
write_concurrency : Optional[int], default=None
The number of concurrent threads used for writing

Expand Down
Loading