Skip to content

Commit

Permalink
v0.3.1 - added typing and fixed joblib requirement
Browse files Browse the repository at this point in the history
  • Loading branch information
eliorc committed Sep 13, 2019
1 parent 8408324 commit 2d38080
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 37 deletions.
2 changes: 1 addition & 1 deletion node2vec/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .node2vec import Node2Vec
from . import edges

__version__ = '0.2.1'
__version__ = '0.3.1'
23 changes: 10 additions & 13 deletions node2vec/edges.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,24 @@

class EdgeEmbedder(ABC):

def __init__(self, keyed_vectors, quiet=False):
def __init__(self, keyed_vectors: KeyedVectors, quiet: bool = False):
"""
:param keyed_vectors: KeyedVectors containing nodes and embeddings to calculate edges for
:type keyed_vectors: gensim.models.KeyedVectors
"""

self.kv = keyed_vectors
self.quiet = quiet

@abstractmethod
def _embed(self, edge):
def _embed(self, edge: tuple) -> np.ndarray:
"""
Abstract method for implementing the embedding method
:param edge: tuple of two nodes
:type edge: tuple
:return: Edge embedding
:rtype: numpy.array
"""
pass

def __getitem__(self, edge):
def __getitem__(self, edge) -> np.ndarray:
if not isinstance(edge, tuple) or not len(edge) == 2:
raise ValueError('edge must be a tuple of two nodes')

Expand All @@ -40,10 +37,10 @@ def __getitem__(self, edge):

return self._embed(edge)

def as_keyed_vectors(self):
def as_keyed_vectors(self) -> KeyedVectors:
"""
Generated a KeyedVectors instance with all
:return:
Generated a KeyedVectors instance with all the possible edge embeddings
:return: Edge embeddings
"""

edge_generator = combinations_with_replacement(self.kv.index2word, r=2)
Expand Down Expand Up @@ -79,7 +76,7 @@ class AverageEmbedder(EdgeEmbedder):
Average node features
"""

def _embed(self, edge):
def _embed(self, edge: tuple):
return (self.kv[edge[0]] + self.kv[edge[1]]) / 2


Expand All @@ -88,7 +85,7 @@ class HadamardEmbedder(EdgeEmbedder):
Hadamard product node features
"""

def _embed(self, edge):
def _embed(self, edge: tuple):
return self.kv[edge[0]] * self.kv[edge[1]]


Expand All @@ -97,7 +94,7 @@ class WeightedL1Embedder(EdgeEmbedder):
Weighted L1 node features
"""

def _embed(self, edge):
def _embed(self, edge: tuple):
return np.abs(self.kv[edge[0]] - self.kv[edge[1]])


Expand All @@ -106,5 +103,5 @@ class WeightedL2Embedder(EdgeEmbedder):
Weighted L2 node features
"""

def _embed(self, edge):
def _embed(self, edge: tuple):
return (self.kv[edge[0]] - self.kv[edge[1]]) ** 2
26 changes: 11 additions & 15 deletions node2vec/node2vec.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import os
from collections import defaultdict

import numpy as np
import gensim, os
from joblib import Parallel, delayed, load, dump
import networkx as nx
import gensim
from joblib import Parallel, delayed
from tqdm import tqdm

from .parallel import parallel_generate_walks


Expand All @@ -16,31 +20,23 @@ class Node2Vec:
P_KEY = 'p'
Q_KEY = 'q'

def __init__(self, graph, dimensions=128, walk_length=80, num_walks=10, p=1, q=1, weight_key='weight',
workers=1, sampling_strategy=None, quiet=False, temp_folder=None):
def __init__(self, graph: nx.Graph, dimensions: int = 128, walk_length: int = 80, num_walks: int = 10, p: float = 1,
q: float = 1, weight_key: str = 'weight', workers: int = 1, sampling_strategy: dict = None,
quiet: bool = False, temp_folder: str = None):
"""
Initiates the Node2Vec object, precomputes walking probabilities and generates the walks.
:param graph: Input graph
:type graph: Networkx Graph
:param dimensions: Embedding dimensions (default: 128)
:type dimensions: int
:param walk_length: Number of nodes in each walk (default: 80)
:type walk_length: int
:param num_walks: Number of walks per node (default: 10)
:type num_walks: int
:param p: Return hyper parameter (default: 1)
:type p: float
:param q: Inout parameter (default: 1)
:type q: float
:param weight_key: On weighted graphs, this is the key for the weight attribute (default: 'weight')
:type weight_key: str
:param workers: Number of workers for parallel execution (default: 1)
:type workers: int
:param sampling_strategy: Node specific sampling strategies, supports setting node specific 'q', 'p', 'num_walks' and 'walk_length'.
Use these keys exactly. If not set, will use the global ones which were passed on the object initialization
:param temp_folder: Path to folder with enough space to hold the memory map of self.d_graph (for big graphs); to be passed joblib.Parallel.temp_folder
:type temp_folder: str
"""

self.graph = graph
Expand Down Expand Up @@ -131,7 +127,7 @@ def _precompute_probabilities(self):
# Save neighbors
d_graph[current_node][self.NEIGHBORS_KEY] = d_neighbors

def _generate_walks(self):
def _generate_walks(self) -> list:
"""
Generates the random walks which will be used as the skip-gram input.
:return: List of walks. Each walk is a list of nodes.
Expand Down Expand Up @@ -161,7 +157,7 @@ def _generate_walks(self):

return walks

def fit(self, **skip_gram_params):
def fit(self, **skip_gram_params) -> gensim.models.Word2Vec:
"""
Creates the embeddings using gensim's Word2Vec.
:param skip_gram_params: Parameteres for gensim.models.Word2Vec - do not supply 'size' it is taken from the Node2Vec 'dimensions' parameter
Expand Down
13 changes: 7 additions & 6 deletions node2vec/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@
import numpy as np
from tqdm import tqdm

def parallel_generate_walks(d_graph, global_walk_length, num_walks, cpu_num, sampling_strategy=None,
num_walks_key=None, walk_length_key=None, neighbors_key=None, probabilities_key=None,
first_travel_key=None, quiet=False):

def parallel_generate_walks(d_graph: dict, global_walk_length: int, num_walks: int, cpu_num: int,
sampling_strategy: dict = None, num_walks_key: str = None, walk_length_key: str = None,
neighbors_key: str = None, probabilities_key: str = None, first_travel_key: str = None,
quiet: bool = False) -> list:
"""
Generates the random walks which will be used as the skip-gram input.
:return: List of walks. Each walk is a list of nodes.
"""

walks = list()



if not quiet:
pbar = tqdm(total=num_walks, desc='Generating walks (CPU: {})'.format(cpu_num))

Expand Down Expand Up @@ -70,4 +71,4 @@ def parallel_generate_walks(d_graph, global_walk_length, num_walks, cpu_num, sam
if not quiet:
pbar.close()

return walks
return walks
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name='node2vec',
packages=['node2vec'],
version='0.3.0',
version='0.3.1',
description='Implementation of the node2vec algorithm.',
author='Elior Cohen',
author_email='[email protected]',
Expand All @@ -14,7 +14,7 @@
'gensim',
'numpy',
'tqdm',
'joblib'
'joblib>=0.13.2'
],
keywords=['machine learning', 'embeddings'],
)

0 comments on commit 2d38080

Please sign in to comment.