Commit 0fc33092 authored by Zahra Najafabadi's avatar Zahra Najafabadi

initial multilayer commit

parent 85e47ff6
......@@ -3,3 +3,4 @@
**/.idea
*.log
**/env
**/venv
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This package implements community detection.
Package name is community but refer to python-louvain on pypi
"""
# from .community_louvain import (
# partition_at_level,
# modularity,
# best_partition,
# generate_dendrogram,
# induced_graph,
# load_binary,
# )
__author__ = """Thomas Aynaud (thomas.aynaud@lip6.fr)"""
# Copyright (C) 2009 by
# Thomas Aynaud <thomas.aynaud@lip6.fr>
# All rights reserved.
# BSD license.
# coding=utf-8
class Status(object):
"""
To handle several data in one struct.
Could be replaced by named tuple, but don't want to depend on python 2.6
"""
node2com = {}
total_weight = 0
internals = {}
degrees = {}
gdegrees = {}
def __init__(self):
self.node2com = dict([])
self.total_weight = 0
self.degrees = dict([])
self.gdegrees = dict([])
self.internals = dict([])
self.loops = dict([])
def __str__(self):
return ("node2com : " + str(self.node2com) + " degrees : "
+ str(self.degrees) + " internals : " + str(self.internals)
+ " total_weight : " + str(self.total_weight))
def copy(self):
"""Perform a deep copy of status"""
new_status = Status()
new_status.node2com = self.node2com.copy()
new_status.internals = self.internals.copy()
new_status.degrees = self.degrees.copy()
new_status.gdegrees = self.gdegrees.copy()
new_status.total_weight = self.total_weight
def init(self, graph, weight, part=None):
"""Initialize the status of a graph with every node in one community"""
count = 0
self.node2com = dict([])
self.total_weight = 0
self.degrees = dict([])
self.gdegrees = dict([])
self.internals = dict([])
self.total_weight = graph.size(weight=weight)
if part is None:
for node in graph.nodes():
self.node2com[node] = count
deg = float(graph.degree(node, weight=weight))
if deg < 0:
error = "Bad node degree ({})".format(deg)
raise ValueError(error)
self.degrees[count] = deg
self.gdegrees[node] = deg
edge_data = graph.get_edge_data(node, node, default={weight: 0})
self.loops[node] = float(edge_data.get(weight, 1))
self.internals[count] = self.loops[node]
count += 1
else:
for node in graph.nodes():
com = part[node]
self.node2com[node] = com
deg = float(graph.degree(node, weight=weight))
self.degrees[com] = self.degrees.get(com, 0) + deg
self.gdegrees[node] = deg
inc = 0.
for neighbor, datas in graph[node].items():
edge_weight = datas.get(weight, 1)
if edge_weight <= 0:
error = "Bad graph type ({})".format(type(graph))
raise ValueError(error)
if part[neighbor] == com:
if neighbor == node:
inc += float(edge_weight)
else:
inc += float(edge_weight) / 2.
self.internals[com] = self.internals.get(com, 0) + inc
## a framework for community-based node ranking
import networkx as nx
import numpy as np
from sklearn.cluster import AffinityPropagation
import multiprocessing as mp
from ..network_ranking.node_ranking import sparse_page_rank,modularity,stochastic_normalization
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster.hierarchy import fcluster
import scipy.sparse as sp
def page_rank_kernel(index_row):
## call as results = p.map(pr_kernel, batch)
pr = sparse_page_rank(_RANK_GRAPH, [index_row],
epsilon=1e-6,
max_steps=100000,
damping=0.90,
spread_step=10,
spread_percent=0.1,
try_shrink=True)
norm = np.linalg.norm(pr, 2)
if norm > 0:
pr = pr / np.linalg.norm(pr, 2)
return (index_row,pr)
else:
return (index_row,np.zeros(G.shape[1]))
def create_tree(centers):
clusters = {}
to_merge = linkage(centers, method='single')
for i, merge in enumerate(to_merge):
if merge[0] <= len(to_merge):
# if it is an original point read it from the centers array
a = centers[int(merge[0]) - 1]
else:
# other wise read the cluster that has been created
a = clusters[int(merge[0])]
if merge[1] <= len(to_merge):
b = centers[int(merge[1]) - 1]
else:
b = clusters[int(merge[1])]
# the clusters are 1-indexed by scipy
clusters[1 + i + len(to_merge)] = {
'children' : [a, b]
}
# ^ you could optionally store other info here (e.g distances)
return clusters
def return_infomap_communities(network):
infomapWrapper = infomap.Infomap("--two-level --silent")
# Add link weight as an optional third argument
for e in network.edges():
infomapWrapper.addLink(e[0], e[1])
infomapWrapper.run()
tree = infomapWrapper.tree
print("Found %d modules with codelength: %f" % (tree.numTopModules(), tree.codelength()))
print("\n#node module")
part = defaultdict(list)
for node in tree.leafIter():
part[node.moduleIndex()].append(node.physIndex)
return list(part.values())
if __name__ == "__main__":
from infomap import infomap
from collections import defaultdict
from itertools import product
import community
from networkx.algorithms.community import LFR_benchmark_graph
from sklearn.cluster import AffinityPropagation,DBSCAN,MiniBatchKMeans
from scipy import cluster
from scipy.cluster.hierarchy import fcluster
from scipy.spatial.distance import pdist
global _RANK_GRAPH
print("Generating communities..")
n = 500
tau1 = 4
tau2 = 1.5
mu = 0.1
# _RANK_GRAPH = nx.windmill_graph(20, 5)
_RANK_GRAPH = LFR_benchmark_graph(n,
tau1,
tau2,
mu,
average_degree=5,
min_community=30,
seed=10)
print(nx.info(_RANK_GRAPH))
A = _RANK_GRAPH.copy()
_RANK_GRAPH = nx.to_scipy_sparse_matrix(_RANK_GRAPH)
_RANK_GRAPH = stochastic_normalization(_RANK_GRAPH) ## normalize
n = _RANK_GRAPH.shape[1]
with mp.Pool(processes=mp.cpu_count()) as p:
results = p.map(page_rank_kernel,range(n))
vectors = np.zeros((n, n))
for pr_vector in results:
if pr_vector != None:
vectors[pr_vector[0],:] = pr_vector[1]
vectors = np.nan_to_num(vectors)
option = "cpu"
dx_rc = defaultdict(list)
dx_lx = defaultdict(list)
dx_hc = defaultdict(list)
if option == "cpu":
mx_opt = 0
for nclust in range(2,_RANK_GRAPH.shape[0]):
clustering_algorithm = MiniBatchKMeans(n_clusters=nclust)
clusters = clustering_algorithm.fit_predict(vectors)
for a, b in zip(clusters,A.nodes()):
dx_rc[a].append(b)
partitions = dx_rc.values()
mx = modularity(A, partitions, weight='weight')
if mx > mx_opt:
mx_opt = mx
dx_rc = defaultdict(list)
print("KM: {}".format(mx_opt))
Z = linkage(vectors, 'ward')
mod_hc_opt = 0
for nclust in range(3,_RANK_GRAPH.shape[0]):
try:
k = nclust
cls = fcluster(Z, k, criterion='maxclust')
for a,b in zip(cls,A.nodes()):
dx_hc[a].append(b)
partition_hi = dx_hc.values()
mod = modularity(A, partition_hi, weight='weight')
if mod > mod_hc_opt:
mod_hc_opt = mod
except:
pass
print("Hierarchical: {}".format(mod))
## the louvain partition
partition = community.best_partition(A)
for a,b in partition.items():
dx_lx[b].append(a)
partition_louvain = dx_lx.values()
print("Louvain: {}".format(modularity(A, partition_louvain, weight='weight')))
parts_im = return_infomap_communities(A)
print("Infomap: {}".format(modularity(A, parts_im, weight='weight')))
## high level interface for community detection algorithms
from .community_louvain import *
def louvain_communities(network,input_type="mat",verbose=True):
# if input_type == "mat":
# network = nx.from_scipy_sparse_matrix(network)
if verbose:
print ("Detecting communities..")
partition = best_partition(network)
return partition
## node ranking algorithms
import numpy as np
import networkx as nx
import scipy.sparse as sp
#from networkx.algorithms.community.community_utils import is_partition
from itertools import product
# def stochastic_normalization(matrix):
# matrix = matrix.tolil()
# try:
# matrix.setdiag(0)
# except TypeError:
# matrix.setdiag(np.zeros(matrix.shape[0]))
# matrix = matrix.tocsr()
# d = matrix.sum(axis=1).getA1()
# nzs = np.where(d > 0)
# d[nzs] = 1 / d[nzs]
# matrix = (sp.diags(d, 0).tocsc().dot(matrix)).transpose()
# return matrix
def stochastic_normalization(matrix):
matrix = matrix.tolil()
try:
matrix.setdiag(0)
except TypeError:
matrix.setdiag(np.zeros(matrix.shape[0]))
matrix = matrix.tocsr()
d = matrix.sum(axis=1).getA1()
nzs = np.where(d > 0)
k = 1/d[nzs]
matrix = (sp.diags(k, 0).tocsc().dot(matrix)).transpose()
return matrix
def stochastic_normalization_hin(matrix):
matrix = matrix.tolil()
try:
matrix.setdiag(0)
except TypeError:
matrix.setdiag(np.zeros(matrix.shape[0]))
matrix = matrix.tocsr()
d = matrix.sum(axis=1).getA1()
nzs = np.where(d > 0)
d[nzs] = 1 / d[nzs]
matrix = (sp.diags(d, 0).tocsc().dot(matrix)).transpose()
return matrix
def modularity(G, communities, weight='weight'):
return 1
# if not is_partition(G, communities):
# raise NotAPartition(G, communities)
# multigraph = G.is_multigraph()
# directed = G.is_directed()
# m = G.size(weight=weight)
# if directed:
# out_degree = dict(G.out_degree(weight=weight))
# in_degree = dict(G.in_degree(weight=weight))
# norm = 1 / m
# else:
# out_degree = dict(G.degree(weight=weight))
# in_degree = out_degree
# norm = 1 / (2 * m)
# def val(u, v):
# try:
# if multigraph:
# w = sum(d.get(weight, 1) for k, d in G[u][v].items())
# else:
# w = G[u][v].get(weight, 1)
# except KeyError:
# w = 0
# # Double count self-loops if the graph is undirected.
# if u == v and not directed:
# w *= 2
# return w - in_degree[u] * out_degree[v] * norm
# Q = np.sum(val(u, v) for c in communities for u, v in product(c, repeat=2))
# return Q * norm
def page_rank_kernel(index_row):
## call as results = p.map(pr_kernel, batch)
pr = sparse_page_rank(G, [index_row],
epsilon=1e-6,
max_steps=100000,
damping=damping_hyper,
spread_step=spread_step_hyper,
spread_percent=spread_percent_hyper,
try_shrink=True)
norm = np.linalg.norm(pr, 2)
if norm > 0:
pr = pr / np.linalg.norm(pr, 2)
return (index_row,pr)
else:
return (index_row,np.zeros(graph.shape[1]))
def sparse_page_rank(matrix, start_nodes,
epsilon=1e-6,
max_steps=100000,
damping=0.5,
spread_step=10,
spread_percent=0.3,
try_shrink=False):
assert(len(start_nodes)) > 0
# this method assumes that column sums are all equal to 1 (stochastic normalizaition!)
size = matrix.shape[0]
if start_nodes is None:
start_nodes = range(size)
nz = size
else:
nz = len(start_nodes)
start_vec = np.zeros((size, 1))
start_vec[start_nodes] = 1
start_rank = start_vec / len(start_nodes)
rank_vec = start_vec / len(start_nodes)
# calculate the max spread:
shrink = False
which = np.zeros(0)
if try_shrink:
v = start_vec / len(start_nodes)
steps = 0
while nz < size * spread_percent and steps < spread_step:
steps += 1
v += matrix.dot(v)
nz_new = np.count_nonzero(v)
if nz_new == nz:
shrink = True
break
nz = nz_new
rr = np.arange(matrix.shape[0])
which = (v[rr] > 0).reshape(size)
if shrink:
start_rank = start_rank[which]
rank_vec = rank_vec[which]
matrix = matrix[:, which][which, :]
diff = np.Inf
steps = 0
while diff > epsilon and steps < max_steps: # not converged yet
steps += 1
new_rank = matrix.dot(rank_vec)
rank_sum = np.sum(new_rank)
if rank_sum < 0.999999999:
new_rank += start_rank * (1 - rank_sum)
new_rank = damping * new_rank + (1 - damping) * start_rank
new_diff = np.linalg.norm(rank_vec - new_rank, 1)
diff = new_diff
rank_vec = new_rank
if try_shrink and shrink:
ret = np.zeros(size)
rank_vec = rank_vec.T[0] ## this works for both python versions
ret[which] = rank_vec
ret[start_nodes] = 0
return ret.flatten()
else:
rank_vec[start_nodes] = 0
return rank_vec.flatten()
def hubs_and_authorities(graph):
return nx.hits_scipy(graph)
def hub_matrix(graph):
return nx.hub_matrix(graph)
def authority_matrix(graph):
return nx.authority_matrix(graph)
## Compute many possible network statistics
import scipy.io
import networkx as nx
import pandas as pd
import numpy as np
from collections import Counter
from operator import itemgetter
def identify_n_hubs(G,top_n=100,node_type=None):
if node_type is not None:
target_nodes = []
for n in G.nodes(data=True):
try:
if n[1]['type'] == node_type:
target_nodes.append(n[0])
except:
pass
else:
target_nodes = G.nodes()
degree_dict = {x : G.degree(x) for x in target_nodes}
top_n_id = {x[0]:x[1] for e,x in enumerate(sorted(degree_dict.items(), key = itemgetter(1), reverse = True)) if e < top_n}
return top_n_id
def core_network_statistics(G,labels=None,name="example"):
rframe = pd.DataFrame(columns=["Name",
"classes",
"nodes",
"edges",
"degree",
"diameter",
"connected components",
"clustering coefficient",
"density",
"flow_hierarchy"])
nodes = len(G.nodes())
edges = len(G.edges())
cc = len(list(nx.connected_components(G.to_undirected())))
try:
cc = nx.average_clustering(G.to_undirected())
except:
cc = None
try:
dx = nx.density(G)
except:
dx = None
clustering = None
if labels is not None:
number_of_classes = labels.shape[1]
else:
number_of_classes = None
node_degree_vector = list(dict(nx.degree(G)).values())
mean_degree = np.mean(node_degree_vector)
try:
diameter = nx.diameter(G)
except:
diameter = "intractable"
try:
flow_hierarchy = nx.flow_hierarchy(G)
except:
flow_hierarchy = "intractable"
point = {"Name": name,
"classes":number_of_classes,
"nodes":nodes,
"edges":edges,
"diameter":diameter,
"degree":mean_degree,
"flow hierarchy":flow_hierarchy,
"connected components":cc,
"clustering coefficient":clustering,
"density":dx}
rframe = rframe.append(point,ignore_index=True)
return rframe
## implement scale free network estimation
## do this according to the paper WCGA
import numpy as np
from collections import Counter
from scipy import stats
def pick_threshold(matrix):
current_r_opt = 0
rho, pval = stats.spearmanr(matrix)
for j in np.linspace(0,1,50):
tmp_array = rho.copy()
tmp_array[tmp_array > j] = 1
tmp_array[tmp_array < j] = 0
np.fill_diagonal(tmp_array, 0) ## self loops
rw_sum = np.sum(tmp_array,axis=0)
counts = Counter(rw_sum)
key_counts = np.log(list(counts.keys()))
counts = np.log(list(counts.values()))
slope, intercept, r_value, p_value, std_err = stats.linregress(key_counts,counts)
if r_value > current_r_opt:
print("Updating R^2: {}".format(r_value))
current_r_opt = r_value
if r_value > 0.80:
return j
return current_r_opt
def default_correlation_to_network(matrix,input_type="matrix",preprocess="standard"):
if preprocess == "standard":
matrix = (matrix - np.mean(matrix, axis=0)) / np.std(matrix, axis=0)
optimal_threshold = pick_threshold(matrix)
print("Rsq threshold {}".format(optimal_threshold))
matrix[matrix > optimal_threshold] = 1
matrix[matrix < optimal_threshold] = 0
return matrix
if __name__ == "__main__":
from numpy import genfromtxt
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--filename",default="/home/skblaz/Downloads/expression.tsv")
args = parser.parse_args()
datta = args.filename
a = genfromtxt(datta, delimiter='\t',skip_header=4)
a = np.nan_to_num(a)
print("Read the data..")
# idx = np.random.randint(1000,size=5000)
# a = a[:,idx]
print(default_correlation_to_network(a).shape)
## compute enrichment via FET
## first for only simple terms,
## continue with communities
## Compute many possible network statistics
import scipy.io
import networkx as nx
import pandas as pd
def core_network_statistics(G,labels=None,name="example"):
rframe = pd.DataFrame(columns=["Name",
"classes",
"nodes",
"edges",
"degree",
"diameter",
"connected components",
"clustering coefficient",
"density",
"flow_hierarchy"])
nodes = len(G.nodes())
edges = len(G.edges())
cc = len(list(nx.connected_components(G.to_undirected())))
try:
cc = nx.average_clustering(G.to_undirected())
except:
cc = None
try:
dx = nx.density(G)
except:
dx = None
clustering = None
if labels is not None:
number_of_classes = labels.shape[1]
else:
number_of_classes = None
mean_degree = np.mean(nx.degree(G).values())
diameter = nx.diameter(G)
flow_hierarchy = nx.flow_hierarchy(G)
point = {"Name": name,
"classes":number_of_classes,
"nodes":nodes,
"edges":edges,
"diameter":diameter,
"degree":mean_degree,
"flow hierarchy":flow_hierarchy,
"connected components":cc,
"clustering coefficient":clustering,
"density":dx}
rframe = rframe.append(point,ignore_index=True)
return rframe
### ncd
import networkx as nx
import numpy as np
import tqdm
from sklearn.cluster import AffinityPropagation
import sklearn.metrics.pairwise
import multiprocessing as mp
from .node_ranking import sparse_page_rank,modularity,stochastic_normalization
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import fcluster
import scipy.sparse as sp
from collections import defaultdict
from itertools import product
#import community
from networkx.algorithms.community import LFR_benchmark_graph
from sklearn.cluster import AffinityPropagation,DBSCAN,MiniBatchKMeans
from scipy import cluster
from scipy.spatial.distance import pdist
global _RANK_GRAPH
def page_rank_kernel(index_row):
## call as results = p.map(pr_kernel, batch)
pr = sparse_page_rank(_RANK_GRAPH, [index_row],
epsilon=1e-6,
max_steps=100000,
damping=0.90,
spread_step=10,
spread_percent=0.1,
try_shrink=True)
norm = np.linalg.norm(pr, 2)
if norm > 0:
pr = pr / np.linalg.norm(pr, 2)
return (index_row,pr)
else:
return (index_row,np.zeros(G.shape[1]))
def create_tree(centers):
clusters = {}
to_merge = linkage(centers, method='single')
for i, merge in enumerate(to_merge):
if merge[0] <= len(to_merge):
# if it is an original point read it from the centers array
a = centers[int(merge[0]) - 1]
else:
# other wise read the cluster that has been created
a = clusters[int(merge[0])]
if merge[1] <= len(to_merge):
b = centers[int(merge[1]) - 1]
else:
b = clusters[int(merge[1])]
# the clusters are 1-indexed by scipy
clusters[1 + i + len(to_merge)] = {
'children' : [a, b]
}
# ^ you could optionally store other info here (e.g distances)
return clusters
def sum(X,v):
rows, cols = X.shape
row_start_stop = as_strided(X.indptr, shape=(rows, 2),
strides=2*X.indptr.strides)
for row, (start, stop) in enumerate(row_start_stop):
data = X.data[start:stop]
data -= v[row]
def NoRC_communities_main(input_graph,clustering_scheme="hierarchical",max_com_num=100,verbose=False,sparisfy=True,parallel_step=6,prob_threshold=0.0005, community_range = [1,3,5,7,11,20,40,50,100,200,300],fine_range=3,lag_threshold=10):
if verbose:
print("Walking..")
global _RANK_GRAPH
_RANK_GRAPH = input_graph
A = _RANK_GRAPH.copy()
_RANK_GRAPH = nx.to_scipy_sparse_matrix(_RANK_GRAPH)
_RANK_GRAPH = stochastic_normalization(_RANK_GRAPH) ## normalize
n = _RANK_GRAPH.shape[1]
edgelist_triplets = []
jobs = [range(n)[i:i + parallel_step] for i in range(0, n, parallel_step)]
with mp.Pool(processes=parallel_step) as p:
for batch in tqdm.tqdm(jobs):
results = p.map(page_rank_kernel,batch)
for nid, result_vector in results:
cols = np.argwhere(result_vector > prob_threshold).flatten().astype(int)
vals = result_vector[cols].flatten()
ixx = np.repeat(nid,cols.shape[0]).flatten().astype(int)
arx = np.vstack((ixx,cols,vals)).T
edgelist_triplets.append(arx)
sparse_edgelist = np.concatenate(edgelist_triplets,axis=0)
print("Compressed to {}% of the initial size".format((sparse_edgelist.shape[0]*100)/n**2))
vectors = sp.coo_matrix((sparse_edgelist[:,2], (sparse_edgelist[:,0].astype(int),sparse_edgelist[:,1].astype(int)))).tocsr()
mx_opt = 0
if clustering_scheme == "kmeans":
if verbose:
print("Doing kmeans search")
nopt = 0
lag_num = 0
for nclust in tqdm.tqdm(community_range):
dx_hc = defaultdict(list)
clustering_algorithm = MiniBatchKMeans(n_clusters=nclust)
clusters = clustering_algorithm.fit_predict(vectors)
for a, b in zip(clusters,A.nodes()):
dx_hc[a].append(b)
partitions = dx_hc.values()
mx = modularity(A, partitions, weight='weight')
if mx > mx_opt:
lag_num = 0
if verbose:
print("Improved modularity: {}, found {} communities.".format(mx,len(partitions)))
mx_opt = mx
opt_clust = dx_hc
nopt = nclust
if mx == 1:
nopt = nclust
return opt_clust
else:
lag_num+=1
if verbose:
print("No improvement for {} iterations.".format(lag_num))
if lag_num > lag_threshold:
break
## fine grained search
if verbose:
print("Fine graining around {}".format(nopt))
for nclust in range(nopt-fine_range,nopt+fine_range,1):
if nclust != nopt:
dx_hc = defaultdict(list)
clustering_algorithm = MiniBatchKMeans(n_clusters=nclust)
clusters = clustering_algorithm.fit_predict(vectors)
for a, b in zip(clusters,A.nodes()):
dx_hc[a].append(b)
partitions = dx_hc.values()
mx = modularity(A, partitions, weight='weight')
if mx > mx_opt:
if verbose:
print("Improved modularity: {}, found {} communities.".format(mx,len(partitions)))
mx_opt = mx
opt_clust = dx_hc
if mx == 1:
nopt = nclust
return opt_clust
return opt_clust
if clustering_scheme == "hierarchical":
Z = linkage(vectors.todense(), 'average')
mod_hc_opt = 0
for nclust in tqdm.tqdm(community_range):
dx_hc = defaultdict(list)
try:
cls = fcluster(Z, nclust, criterion='maxclust')
for a,b in zip(cls,A.nodes()):
dx_hc[a].append(b)
partition_hi = dx_hc.values()
mod = modularity(A, partition_hi, weight='weight')
if mod > mod_hc_opt:
if verbose:
print("\nImproved modularity: {}, communities: {}".format(mod, len(partition_hi)))
mod_hc_opt = mod
opt_clust = dx_hc
if mod == 1:
return opt_clust
except Exception as es:
print (es)
return opt_clust
if __name__ == "__main__":
# n = 50
# tau1 = 4
# tau2 = 1.5
# mu = 0.1
# graph = LFR_benchmark_graph(n,
# tau1,
# tau2,
# mu,
# average_degree=5,
# min_community=30,
# seed=10)
graph = nx.powerlaw_cluster_graph(1000,5,0.1)
print(nx.info(graph))
communities1 = NoRC_communities_main(graph,verbose=True,clustering_scheme="kmeans")
communities1 = NoRC_communities_main(graph,verbose=True,clustering_scheme="hierarchical")
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This package implements community detection.
Package name is community but refer to python-louvain on pypi
"""
# from .community_louvain import (
# partition_at_level,
# modularity,
# best_partition,
# generate_dendrogram,
# induced_graph,
# load_binary,
# )
__author__ = """Thomas Aynaud (thomas.aynaud@lip6.fr)"""
# Copyright (C) 2009 by
# Thomas Aynaud <thomas.aynaud@lip6.fr>
# All rights reserved.
# BSD license.
# coding=utf-8
class Status(object):
"""
To handle several data in one struct.
Could be replaced by named tuple, but don't want to depend on python 2.6
"""
node2com = {}
total_weight = 0
internals = {}
degrees = {}
gdegrees = {}
def __init__(self):
self.node2com = dict([])
self.total_weight = 0
self.degrees = dict([])
self.gdegrees = dict([])
self.internals = dict([])
self.loops = dict([])
def __str__(self):
return ("node2com : " + str(self.node2com) + " degrees : "
+ str(self.degrees) + " internals : " + str(self.internals)
+ " total_weight : " + str(self.total_weight))
def copy(self):
"""Perform a deep copy of status"""
new_status = Status()
new_status.node2com = self.node2com.copy()
new_status.internals = self.internals.copy()
new_status.degrees = self.degrees.copy()
new_status.gdegrees = self.gdegrees.copy()
new_status.total_weight = self.total_weight
def init(self, graph, weight, part=None):
"""Initialize the status of a graph with every node in one community"""
count = 0
self.node2com = dict([])
self.total_weight = 0
self.degrees = dict([])
self.gdegrees = dict([])
self.internals = dict([])
self.total_weight = graph.size(weight=weight)
if part is None:
for node in graph.nodes():
self.node2com[node] = count
deg = float(graph.degree(node, weight=weight))
if deg < 0:
error = "Bad node degree ({})".format(deg)
raise ValueError(error)
self.degrees[count] = deg
self.gdegrees[node] = deg
edge_data = graph.get_edge_data(node, node, default={weight: 0})
self.loops[node] = float(edge_data.get(weight, 1))
self.internals[count] = self.loops[node]
count += 1
else:
for node in graph.nodes():
com = part[node]
self.node2com[node] = com
deg = float(graph.degree(node, weight=weight))
self.degrees[com] = self.degrees.get(com, 0) + deg
self.gdegrees[node] = deg
inc = 0.
for neighbor, datas in graph[node].items():
edge_weight = datas.get(weight, 1)
if edge_weight <= 0:
error = "Bad graph type ({})".format(type(graph))
raise ValueError(error)
if part[neighbor] == com:
if neighbor == node:
inc += float(edge_weight)
else:
inc += float(edge_weight) / 2.
self.internals[com] = self.internals.get(com, 0) + inc
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This package implements community detection.
Package name is community but refer to python-louvain on pypi
"""
# from .community_louvain import (
# partition_at_level,
# modularity,
# best_partition,
# generate_dendrogram,
# induced_graph,
# load_binary,
# )
__author__ = """Thomas Aynaud (thomas.aynaud@lip6.fr)"""
# Copyright (C) 2009 by
# Thomas Aynaud <thomas.aynaud@lip6.fr>
# All rights reserved.
# BSD license.
## a set of measures for assessing community quality
import numpy as np
from sklearn.metrics import silhouette_score
from itertools import product
def modularity(G, communities, weight='weight'):
communities = list(communities.values())
multigraph = G.is_multigraph()
directed = G.is_directed()
m = G.size(weight=weight)
if directed:
out_degree = dict(G.out_degree(weight=weight))
in_degree = dict(G.in_degree(weight=weight))
norm = 1 / m
else:
out_degree = dict(G.degree(weight=weight))
in_degree = out_degree
norm = 1 / (2 * m)
def val(u, v):
try:
if multigraph:
w = sum(d.get(weight, 1) for k, d in G[u][v].items())
else:
w = G[u][v].get(weight, 1)
except KeyError:
w = 0
# Double count self-loops if the graph is undirected.
if u == v and not directed:
w *= 2
return w - in_degree[u] * out_degree[v] * norm
Q = np.sum(val(u, v) for c in communities for u, v in product(c, repeat=2))
return Q * norm
def size_distribution(network_partition):
return np.array([len(x) for x in network_partition.values()])
def number_of_communities(network_partition):
return len(network_partition)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment