Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
56764fc
First stab at creating bubbles, with source and sink labels and relat…
Balletie Jun 16, 2015
da47b66
Replace test graph with one that conserves flow. Update tests accordi…
Balletie Jun 16, 2015
15ca256
Remove visited set and keep track of processed rels using property
Balletie Jun 16, 2015
d1e4a10
Create bubbles in PathExpander instead, correctly this time.
Balletie Jun 17, 2015
e72becc
Test the bubbles with a new graph, update and enable old tests
Balletie Jun 17, 2015
2fde348
Also test for multiple source nodes in bubbles. Currently fails
Balletie Jun 17, 2015
9120094
Add some comments, split some functions.
gfokkema Jun 18, 2015
df682eb
Add back propagateSourceIDs, and also store the propagatedSources for…
Balletie Jun 18, 2015
c99a7e9
WIP: Initial clustering implementation for bubbles
Balletie Jun 18, 2015
b7d94c9
Trim source and sink of clustered bubbles, return source and sink as …
Balletie Jun 18, 2015
dc82975
Fix some perfomance issues: get rid of nested transactions, do explic…
Balletie Jun 18, 2015
c695e29
Test correctly, and fix bugs that arose (see description)
Balletie Jun 18, 2015
998e07f
Set the interestingness property. Get the individual score in Cluster…
Balletie Jun 18, 2015
eccd398
Extend the test for individual nodes, and fix bug that arose
Balletie Jun 19, 2015
07fbc0a
Performance improvement: use a query when clustering a large bubble
Balletie Jun 19, 2015
27ad184
Extend clustertest with tests for 1) duplicates 2) missing 3) nested …
Balletie Jun 20, 2015
a4093a2
Keep a map from bubble IDs to their nested bubble IDs. also merge eve…
Balletie Jun 20, 2015
ed0c423
Completely rewrite clustering to use a recursive traversal.
Balletie Jun 20, 2015
90796cb
Also test the combined graphs for correct clustering
Balletie Jun 20, 2015
e41a259
Fix off-by-one error in AllClustersQuery. Test passing.
Balletie Jun 20, 2015
deffb97
Keep track of sink nodes that are visited, so there are no duplicates
Balletie Jun 20, 2015
7c19a06
Merge remote-tracking branch 'jente/redo-strain' into skip-cluster-bu…
gfokkema Jun 20, 2015
4332cb2
Continue clustering on visited nodes, pretend source is not there whe…
Balletie Jun 22, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ public interface Graph extends AnnotationCollection {

/**
* Return a list of nodes that belong to the same cluster as the given startId.
* @param startNodes the start nodes
* @param start the start nodes
* @param end the maximum rank of the cluster
* @param threshold the clustering threshold
* @return a list representing the cluster
*/
Map<Integer, List<Cluster>> getAllClusters(List<String> startNodes, int end, int threshold);
Map<Integer, List<Cluster>> getAllClusters(int start, int end, int threshold);

/**
* Sets the interestingness strategy which calculates the interestingness when
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ public int getRankFromBasePair(int base) {
}

@Override
public Map<Integer, List<Cluster>> getAllClusters(List<String> startNodes,
public Map<Integer, List<Cluster>> getAllClusters(int start,
int end, int threshold) {
return query(new AllClustersQuery(startNodes, end, threshold, is));
return query(new AllClustersQuery(start, end, threshold, is));
}

@Override
Expand Down Expand Up @@ -268,8 +268,12 @@ public void setInterestingnessStrategy(InterestingnessStrategy is) {
* order, to assign ranks and scores to nodes.
*/
protected void analyze() {
// Rank the graph.
execute(e -> new AnalyzeCommand(rootIterator()).execute(e));
ResourceIterator<Node> roots;
try (Transaction tx = service.beginTx()) {
roots = rootIterator();
new AnalyzeCommand(roots).execute(service);
tx.success();
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ public enum NodeLabels implements Label {
ANNOTATION,
DRMUTATION,
SOURCE,
NODE
NODE,
BUBBLE_SOURCE,
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ public enum RelTypes implements RelationshipType {
ANNOTATED,
NEXT,
SOURCE,
MUTATION,
BUBBLE_SOURCE_OF
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,12 @@
import nl.tudelft.dnainator.graph.impl.properties.SourceProperties;
import nl.tudelft.dnainator.graph.interestingness.Scores;

import org.neo4j.collection.primitive.Primitive;
import org.neo4j.collection.primitive.PrimitiveLongSet;
import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.traversal.InitialBranchState.State;
import org.neo4j.graphdb.traversal.InitialBranchState;
import org.neo4j.graphdb.traversal.Uniqueness;

import static nl.tudelft.dnainator.graph.impl.properties.SequenceProperties.BASE_DIST;
Expand All @@ -30,7 +27,6 @@
* ranks the nodes in the Neo4j database accordingly.
*/
public class AnalyzeCommand implements Command {
private static final int INIT_CAP = 4096;
private static final String LABEL = "n";
private static final String GET_NODES_BASEDIST =
"MATCH (n:" + NodeLabels.NODE.name() + ")-[:" + RelTypes.SOURCE.name() + "]-s, "
Expand All @@ -55,16 +51,12 @@ public AnalyzeCommand(ResourceIterator<Node> roots) {
* @param service the database service
* @return a topological ordering, starting from the roots
*/
@SuppressWarnings("unchecked")
public Iterable<Node> topologicalOrder(GraphDatabaseService service) {
return topologicalOrder(service, Primitive.longSet());
}

private Iterable<Node> topologicalOrder(GraphDatabaseService service,
PrimitiveLongSet processed) {
return service.traversalDescription()
// Depth first order, for creating bubbles.
.depthFirst()
.expand(new TopologicalPathExpander()
, new State<>(processed, null))
.expand(new TopologicalPathExpander(), InitialBranchState.NO_STATE)
// We manage uniqueness for ourselves.
.uniqueness(Uniqueness.NONE)
.traverse(loop(roots))
Expand All @@ -73,18 +65,10 @@ private Iterable<Node> topologicalOrder(GraphDatabaseService service,

@Override
public void execute(GraphDatabaseService service) {
try (
Transaction tx = service.beginTx();
// Our set is located "off heap", i.e. not managed by the garbage collector.
// It is automatically closed after the try block, which frees the allocated memory.
PrimitiveLongSet processed = Primitive.offHeapLongSet(INIT_CAP)
) {
for (Node n : topologicalOrder(service, processed)) {
rankDest(n);
}
scoreDRMutations(service);
tx.success();
for (Node n : topologicalOrder(service)) {
rankDest(n);
}
scoreDRMutations(service);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,50 +1,164 @@
package nl.tudelft.dnainator.graph.impl.command;

import nl.tudelft.dnainator.graph.impl.NodeLabels;
import nl.tudelft.dnainator.graph.impl.RelTypes;
import org.neo4j.collection.primitive.PrimitiveLongSet;
import nl.tudelft.dnainator.graph.impl.properties.BubbleProperties;
import nl.tudelft.dnainator.graph.impl.properties.SequenceProperties;

import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Path;
import org.neo4j.graphdb.PathExpander;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.traversal.BranchState;

import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
* PathExpander for determining the topological ordering.
*/
public class TopologicalPathExpander implements PathExpander<PrimitiveLongSet> {
private boolean hasUnprocessedIncoming(PrimitiveLongSet processed, Node n) {
public class TopologicalPathExpander implements PathExpander<Object> {
private static final String PROCESSED = "PROCESSED";
private Map<Long, Set<Long>> relIDtoSourceIDs;
private Map<Long, Set<Long>> bubbleSourceIDtoEndIDs;

/**
* Constructs a new {@link TopologicalPathExpander}.
*/
public TopologicalPathExpander() {
this.relIDtoSourceIDs = new HashMap<>();
this.bubbleSourceIDtoEndIDs = new HashMap<>();
}

private boolean hasUnprocessedIncoming(Node n) {
Iterable<Relationship> in = n.getRelationships(RelTypes.NEXT, Direction.INCOMING);
for (Relationship r : in) {
if (!processed.contains(r.getId())) {
if (!r.hasProperty(PROCESSED)) {
return true;
}
}
// Clean up after ourselves.
in.forEach(rel -> rel.removeProperty(PROCESSED));
// All incoming edges have been processed.
return false;
}

@Override
public Iterable<Relationship> expand(Path path,
BranchState<PrimitiveLongSet> state) {
BranchState<Object> noState) {
Node from = path.endNode();
// Propagate all unclosed bubbles and the newly created ones.
Set<Long> toPropagate = getSourcesToPropagate(from);

// For each unclosed bubble source, remove the current node from the endings and
// add outgoing nodes to the ending nodes, thereby advancing the bubble endings.
toPropagate.forEach(e -> advanceEnds(e, from));
// Store in this node the bubbles in which it is nested.
storeOuterBubbles(from, toPropagate);
// Create a new bubblesource, that will have its own bubble endings.
createBubbleSource(from, toPropagate);

// Encode the unclosed propagated bubbles on the edges.
from.getRelationships(RelTypes.NEXT, Direction.OUTGOING)
.forEach(out -> propagateSourceIDs(toPropagate, out));

List<Relationship> expand = new LinkedList<>();
for (Relationship r : from.getRelationships(RelTypes.NEXT, Direction.OUTGOING)) {
PrimitiveLongSet processed = state.getState();
processed.add(r.getId());
if (!hasUnprocessedIncoming(processed, r.getEndNode())) {
for (Relationship out : from.getRelationships(RelTypes.NEXT, Direction.OUTGOING)) {
setNumStrainsThrough(out);
out.setProperty(PROCESSED, true);
if (!hasUnprocessedIncoming(out.getEndNode())) {
createBubbleSink(out.getEndNode());
// All of the dependencies of this node have been added to the result.
expand.add(r);
expand.add(out);
}
}
return expand;
}

private void storeOuterBubbles(Node from, Set<Long> toPropagate) {
// Set the source id of the bubbles to which this node belongs. Excludes its own
// source id if it's a source.
from.setProperty(BubbleProperties.BUBBLE_SOURCE_IDS.name(),
toPropagate.stream().mapToLong(l -> l).toArray());
}

private Set<Long> getSourcesToPropagate(Node from) {
Iterable<Relationship> ins = from.getRelationships(RelTypes.NEXT, Direction.INCOMING);

// This function accumulates unclosed bubble sources from a mapping of incoming edge ids.
Set<Long> propagatedSources = new HashSet<>();
for (Relationship in : ins) {
propagatedSources.addAll(relIDtoSourceIDs.remove(Long.valueOf(in.getId())).stream()
.filter(source -> bubbleSourceIDtoEndIDs.get(source) != null)
.collect(Collectors.toList()));
}
return propagatedSources;
}

private void advanceEnds(Long bubbleSource, Node endnode) {
Set<Long> pathEndIDs = bubbleSourceIDtoEndIDs.get(bubbleSource);
if (pathEndIDs != null) {
pathEndIDs.remove(Long.valueOf(endnode.getId()));

// FIXME: we add twice here in most cases.
endnode.getRelationships(RelTypes.NEXT, Direction.OUTGOING)
.forEach(rel -> pathEndIDs.add(Long.valueOf(rel.getEndNode().getId())));
}
}

private void createBubbleSource(Node n, Set<Long> toPropagate) {
int outDegree = n.getDegree(RelTypes.NEXT, Direction.OUTGOING);
if (outDegree >= 2) {
Set<Long> pathEnds = new HashSet<>(outDegree);
toPropagate.add(Long.valueOf(n.getId()));

n.addLabel(NodeLabels.BUBBLE_SOURCE);
n.getRelationships(RelTypes.NEXT, Direction.OUTGOING)
.forEach(rel -> pathEnds.add(Long.valueOf(rel.getEndNode().getId())));

bubbleSourceIDtoEndIDs.put(Long.valueOf(n.getId()), pathEnds);
}
}

private void propagateSourceIDs(Set<Long> propagatedUnique, Relationship out) {
relIDtoSourceIDs.put(Long.valueOf(out.getId()), propagatedUnique);
}

private void createBubbleSink(Node n) {
int degree = n.getDegree(RelTypes.NEXT, Direction.INCOMING);
if (degree >= 2) {
Set<Long> bubbleSourceID = new HashSet<>();
for (Relationship in : n.getRelationships(RelTypes.NEXT, Direction.INCOMING)) {
for (Long sourceID : relIDtoSourceIDs.get(Long.valueOf(in.getId()))) {
if (bubbleSourceIDtoEndIDs.get(sourceID).size() == 1) {
bubbleSourceID.add(sourceID);
}
}
}
bubbleSourceID.forEach(id -> {
if (bubbleSourceIDtoEndIDs.get(id).size() == 1) {
bubbleSourceIDtoEndIDs.remove(id);
}
Node bubbleSource = n.getGraphDatabase().getNodeById(id.longValue());
bubbleSource.createRelationshipTo(n, RelTypes.BUBBLE_SOURCE_OF);
});
}
}

private void setNumStrainsThrough(Relationship r) {
r.setProperty(SequenceProperties.EDGE_NUM_STRAINS.name(), Math.abs(
r.getStartNode().getDegree(RelTypes.SOURCE)
- r.getEndNode().getDegree(RelTypes.SOURCE)));
}

@Override
public PathExpander<PrimitiveLongSet> reverse() {
public PathExpander<Object> reverse() {
throw new UnsupportedOperationException();
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package nl.tudelft.dnainator.graph.impl.properties;

/**
* Properties of nodes within bubbles.
*/
public enum BubbleProperties {
BUBBLE_SOURCE_IDS
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ public enum SequenceProperties {
SEQUENCE,
BASE_DIST,
RANK,
INTERESTINGNESS
INTERESTINGNESS,
EDGE_NUM_STRAINS
}
Loading