sparse_mkl backend integration

maniospas · maniospas · commit 69874f3c6bc4 · 2022-10-17T22:26:21.000+03:00
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -22,7 +22,6 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          sudo apt install intel-basekit
           pip install sklearn
           pip install scipy
           pip install numpy
diff --git a/documentation/measures.md b/documentation/measures.md
@@ -372,7 +372,7 @@ Graph conductance (information flow) of scores.
 Assumes a fuzzy set of subgraphs whose nodes are included with probability proportional to their scores, 
 as per the formulation of [krasanakis2019linkauc] and calculates E[outgoing edges] / E[internal edges] of 
 the fuzzy rank subgraph. To avoid potential optimization towards filling the whole graph, the measure is 
-evaluated to infinity if either denomator *or* the nominator is zero (this means that whole connected components 
+evaluated to infinity if either denominator *or* the nominator is zero (this means that whole connected components 
 should not be extracted). 
 If scores assume binary values, E[.] becomes set size and this calculates the induced subgraph Conductance. The constructor initializes the Conductance measure. 
 
diff --git a/documentation/tips.md b/documentation/tips.md
@@ -48,7 +48,7 @@ algorithms.
 ### My graph is already a scipy sparse matrix.
 Note that node ranking algorithms and graph signals
 typically require graphs. However, sometimes
-it is more computationally efficient to constuct
+it is more computationally efficient to construct
 and move around sparse scipy adjacency matrices, 
 for example to avoid additional memory allocations.
 
@@ -61,8 +61,8 @@ import pygrank as pg
 adj = ...  # a square sparse scipy array
 graph = pg.AdjacencyWrapper(adj, directed=True)
 ```
-In this case, the graph's nodes are consider to be
+In this case, the graph's nodes are considered to be
 the numerical values *0,1,..,adj.shape[0]-1*.
 The *directed* argument in the constructor only
 affects the type of *"auto"* normalization in
-preprocessors.
+preprocessors.
diff --git a/examples/playground/compare_filter_tuning.py b/examples/playground/compare_filter_tuning.py
@@ -1,6 +1,6 @@
 import pygrank as pg
 
-pg.load_backend("numpy")
+pg.load_backend("sparse_dot_mkl")
 
 datasets = ["amazon", "citeseer", "maven"]
 datasets = ["citeseer"]
diff --git a/pygrank/core/backend/sparse_dot_mkl.py b/pygrank/core/backend/sparse_dot_mkl.py
@@ -1,6 +1,7 @@
 import numpy as np
 from numpy import abs, sum, exp, log, copy, repeat, min, max, dot, mean, diag, ones
 from scipy.sparse import eye
+import warnings
 import os
 if "MKL_NUM_THREADS" not in os.environ:
     os.environ["MKL_NUM_THREADS"] = str(os.cpu_count())
@@ -12,7 +13,7 @@ def backend_init():
 
 
 def graph_dropout(M, _):
-    return M.to_csr()
+    return M
 
 
 def separate_cols(x):
@@ -24,7 +25,7 @@ def combine_cols(cols):
 
 
 def backend_name():
-    return "numpy"
+    return "sparse_dot_mkl"
 
 
 def scipy_sparse_to_backend(M):
@@ -60,9 +61,23 @@ def self_normalize(obj):
         obj = obj / np_sum
     return obj
 
+__pygrank_sparse_dot_mkl_warning = False
+
 
 def conv(signal, M):
-    return sparse_dot_mkl.dot_product_mkl(signal, M)
+    global __pygrank_sparse_dot_mkl_warning
+    if __pygrank_sparse_dot_mkl_warning:
+        return signal * M
+    try:
+        return sparse_dot_mkl.dot_product_mkl(signal, M)
+    except Exception as e:
+        if not __pygrank_sparse_dot_mkl_warning:
+            __pygrank_sparse_dot_mkl_warning = True
+            warnings.warn("sparse_dot_mkl failed to link for sparse matrix multiplication.\n"
+                          "Please check your environment setup."
+                          "Falling back to numpy implementation for this backend.")
+            warnings.warn(str(e))
+        return signal * M
 
 
 def length(x):
diff --git a/pygrank/core/utils/preprocessing.py b/pygrank/core/utils/preprocessing.py
@@ -119,7 +119,7 @@ def to_sparse_matrix(G,
         elif normalization != "none":
             raise Exception("Supported normalizations: none, col, symmetric, both, laplacian, auto")
     M = transform_adjacency(M)
-    ret = M if backend.backend_name() == "numpy" else backend.scipy_sparse_to_backend(M)
+    ret = M if backend.backend_name() == "numpy" or backend.backend_name() == "sparse_dot_mkl" else backend.scipy_sparse_to_backend(M)
     ret._pygrank_node2id = {v: i for i, v in enumerate(G)}
     if cors:
         ret.__pygrank_preprocessed = {backend.backend_name(): ret, "numpy": M}
diff --git a/pygrank/measures/unsupervised.py b/pygrank/measures/unsupervised.py
@@ -55,7 +55,7 @@ class Conductance(Unsupervised):
     Assumes a fuzzy set of subgraphs whose nodes are included with probability proportional to their scores,
     as per the formulation of [krasanakis2019linkauc] and calculates E[outgoing edges] / E[internal edges] of
     the fuzzy rank subgraph. To avoid potential optimization towards filling the whole graph, the measure is
-    evaluated to infinity if either denomator *or* the nominator is zero (this means that whole connected components
+    evaluated to infinity if either denominator *or* the nominator is zero (this means that whole connected components
     should not be extracted).
     If scores assume binary values, E[.] becomes set size and this calculates the induced subgraph Conductance.
     """
diff --git a/setup.py b/setup.py
@@ -12,7 +12,7 @@
 
 setuptools.setup(
     name='pygrank',
-    version='0.2.9',
+    version='0.2.10',
     author="Emmanouil (Manios) Krasanakis",
     author_email="maniospas@hotmail.com",
     description="Recommendation algorithms for large graphs",