Commit b365f613 authored by zahra's avatar zahra

Semantic Linking: Created Classes for semantic linking logic

parent 2caa4a25
Index: data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py (date 1568037363000)
+++ data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py (date 1568038969230)
@@ -1,140 +1,137 @@
-import networkx as nx
-import matplotlib.pyplot as plt
-import pandas as pd
import json
-
-
-with open("mult_in_out.json", "r") as json_file:
- df_nodes = json.load(json_file)
-
-
nodeIds = []
-destIds= []
-clusterlabels= []
+destIds = []
+clusterlabels = []
destclusterlabel = []
-cluster= []
+cluster = []
labalvlues = []
-i = 0
+
+def classify():
+
+ with open("mult_in_out.json", "r") as json_file:
+ df_nodes = json.load(json_file)
+
-for row in df_nodes:
+ for row in df_nodes:
- for j in range(len(row['TransactionFrom'])):
- print(" Input Ids: ", row['TransactionFrom'][j])
- nodeIds.append(row['TransactionFrom'])
- print("This is nodes: ", nodeIds)
+ for j in range(len(row['TransactionFrom'])):
+ print(" Input Ids: ", row['TransactionFrom'][j])
+ nodeIds.append(row['TransactionFrom'])
+ print("This is nodes: ", nodeIds)
-for row in df_nodes:
- destIds.append(row['TransactionTo'])
+ for row in df_nodes:
+ destIds.append(row['TransactionTo'])
-for row in range(len(nodeIds)):
- print(nodeIds[row])
+ for row in range(len(nodeIds)):
+ print(nodeIds[row])
-print("Finish InputIDs")
-for row in range(len(nodeIds)):
+ print("Finish InputIDs")
+ i = 0
+ for row in range(len(nodeIds)):
- clusterlabels.append(row)
- i += 1
-print(i)
+ clusterlabels.append(row)
+ i += 1
+ print(i)
-"""" classifying Inputs"""
-"""" Labaling inputs"""
-for row in range(len(nodeIds)):
+ """" classifying Inputs"""
+ """" Labaling inputs"""
+ for row in range(len(nodeIds)):
- for rown in range(len(nodeIds[row])):
+ for rown in range(len(nodeIds[row])):
- for row1 in range(len(nodeIds)):
- for rown1 in range(len(nodeIds[row1])):
- if(nodeIds[row][rown]==nodeIds[row1][rown1]):
- # print("row: ",row,"row1: ",row1)
- if(row < row1):
- for row2 in clusterlabels:
- if( clusterlabels[row1]== clusterlabels[row2]):
- clusterlabels[row2]=clusterlabels[row]
- clusterlabels[row1] = clusterlabels[row]
+ for row1 in range(len(nodeIds)):
+ for rown1 in range(len(nodeIds[row1])):
+ if(nodeIds[row][rown]==nodeIds[row1][rown1]):
+ # print("row: ",row,"row1: ",row1)
+ if(row < row1):
+ for row2 in clusterlabels:
+ if( clusterlabels[row1]== clusterlabels[row2]):
+ clusterlabels[row2]=clusterlabels[row]
+ clusterlabels[row1] = clusterlabels[row]
- else:
- for row2 in clusterlabels:
- if (clusterlabels[row] == clusterlabels[row2]):
- clusterlabels[row2] = clusterlabels[row1]
- clusterlabels[row] = clusterlabels[row1]
+ else:
+ for row2 in clusterlabels:
+ if (clusterlabels[row] == clusterlabels[row2]):
+ clusterlabels[row2] = clusterlabels[row1]
+ clusterlabels[row] = clusterlabels[row1]
-print(clusterlabels)
-print("cluster labels:", len(clusterlabels))
-print("NodeIDs: ", len(nodeIds))
+ print(clusterlabels)
+ print("cluster labels:", len(clusterlabels))
+ print("NodeIDs: ", len(nodeIds))
-"""" Calculating the number of clusters"""
-clusternum = 1
-labalvlues.append(clusterlabels[0])
-for row in range(len(clusterlabels)):
- flag = True
- for row1 in range(len(labalvlues)):
- if(clusterlabels[row]== labalvlues[row1]):
- flag = False
+ """" Calculating the number of clusters"""
+ clusternum = 1
+ labalvlues.append(clusterlabels[0])
+ for row in range(len(clusterlabels)):
+ flag = True
+ for row1 in range(len(labalvlues)):
+ if(clusterlabels[row]== labalvlues[row1]):
+ flag = False
- if (flag):
- clusternum = + 1
- labalvlues.append(clusterlabels[row])
+ if (flag):
+ clusternum = + 1
+ labalvlues.append(clusterlabels[row])
-print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues))
+ print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues))
-"""" clustering Ids according to their labels"""
+ """" clustering Ids according to their labels"""
-for row in range(len(labalvlues)):
- cluster.append([])
- for row3 in range(len(nodeIds)):
- if (labalvlues[row] == clusterlabels[row3]):
- cluster[row].extend(nodeIds[row3])
-print("clusters: ", cluster)
+ for row in range(len(labalvlues)):
+ cluster.append([])
+ for row3 in range(len(nodeIds)):
+ if (labalvlues[row] == clusterlabels[row3]):
+ cluster[row].extend(nodeIds[row3])
+ print("clusters: ", cluster)
-""" Removing duplicating items in cluster"""
+ """ Removing duplicating items in cluster"""
-flag = True
-while(flag):
- for row in range(len(cluster)):
+ flag = True
+ while(flag):
+ for row in range(len(cluster)):
- flag= False
- for row1 in range(len(cluster[row])):
- flag= False
- for row2 in range (len(cluster[row])):
- if(row1 != row2):
- if(cluster[row][row1] == cluster[row][row2]):
- del cluster[row][row2]
- flag=True
- break
- if(flag):
- break
- if(flag):
- break
+ flag= False
+ for row1 in range(len(cluster[row])):
+ flag= False
+ for row2 in range (len(cluster[row])):
+ if(row1 != row2):
+ if(cluster[row][row1] == cluster[row][row2]):
+ del cluster[row][row2]
+ flag=True
+ break
+ if(flag):
+ break
+ if(flag):
+ break
-print("cluster:", cluster)
+ print("cluster:", cluster)
-"""" Clustering Destination Ids """
-for row in range(len(destIds)):
- destclusterlabel.append([])
- for row2 in range(len(destIds[row])):
- flag = True
- for rownum in range(len(labalvlues)):
- for row1 in range(len(cluster[rownum])):
+ """" Clustering Destination Ids """
+ for row in range(len(destIds)):
+ destclusterlabel.append([])
+ for row2 in range(len(destIds[row])):
+ flag = True
+ for rownum in range(len(labalvlues)):
+ for row1 in range(len(cluster[rownum])):
- if(destIds[row][row2]== cluster[rownum][row1]):
- destclusterlabel[row].append(labalvlues[rownum])
- flag = False
- if(flag):
- destclusterlabel.append(destIds[row][row2])
+ if(destIds[row][row2]== cluster[rownum][row1]):
+ destclusterlabel[row].append(labalvlues[rownum])
+ flag = False
+ if(flag):
+ destclusterlabel.append(destIds[row][row2])
-print("destination labels (destination Ids): ", destclusterlabel)
+ print("destination labels (destination Ids): ", destclusterlabel)
Index: data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py (date 1568037363000)
+++ data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py (date 1568040344378)
@@ -1,7 +1,7 @@
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
-import HyperGraph as hg
+import initialdemo.HyperGraph as hg
import pandas as pd
import json
import warnings
@@ -12,194 +12,198 @@
import values as values
from matplotlib import colors
-def _color_network(G):
- """Colors the network so that neighboring nodes all have distinct colors.
+class SemanticLinking:
+
+ def __init__(self):
+ hg.classify()
+
+ def _color_network(self, G):
+ """Colors the network so that neighboring nodes all have distinct colors.
- Returns a dict keyed by color to a set of nodes with that color.
- """
- coloring = dict() # color => set(node)
- colors = nx.coloring.greedy_color(G)
- for node, color in colors.items():
- if color in coloring:
- coloring[color].add(node)
- else:
- coloring[color] = set([node])
- return coloring
+ Returns a dict keyed by color to a set of nodes with that color.
+ """
+ coloring = dict() # color => set(node)
+ colors = nx.coloring.greedy_color(G)
+ for node, color in colors.items():
+ if color in coloring:
+ coloring[color].add(node)
+ else:
+ coloring[color] = set([node])
+ return coloring
-def _labeling_complete(labeling, G):
- """Determines whether or not LPA is done.
+ def _labeling_complete(self, labeling, G):
+ """Determines whether or not LPA is done.
- Label propagation is complete when all nodes have a label that is
- in the set of highest frequency labels amongst its neighbors.
+ Label propagation is complete when all nodes have a label that is
+ in the set of highest frequency labels amongst its neighbors.
- Nodes with no neighbors are considered complete.
- """
- return all(labeling[v] in _most_frequent_labels(v, labeling, G)
- for v in G if len(G[v]) > 0)
+ Nodes with no neighbors are considered complete.
+ """
+ return all(labeling[v] in self._most_frequent_labels(v, labeling, G)
+ for v in G if len(G[v]) > 0)
-def _most_frequent_labels(node, labeling, G):
- """Returns a set of all labels with maximum frequency in `labeling`.
+ def _most_frequent_labels(self, node, labeling, G):
+ """Returns a set of all labels with maximum frequency in `labeling`.
- Input `labeling` should be a dict keyed by node to labels.
- """
- if not G[node]:
- # Nodes with no neighbors are themselves a community and are labeled
- # accordingly, hence the immediate if statement.
- return {labeling[node]}
+ Input `labeling` should be a dict keyed by node to labels.
+ """
+ if not G[node]:
+ # Nodes with no neighbors are themselves a community and are labeled
+ # accordingly, hence the immediate if statement.
+ return {labeling[node]}
- # Compute the frequencies of all neighbours of node
- freqs = Counter(labeling[q] for q in G[node])
- max_freq = max(freqs.values())
- return {label for label, freq in freqs.items() if freq == max_freq}
+ # Compute the frequencies of all neighbours of node
+ freqs = Counter(labeling[q] for q in G[node])
+ max_freq = max(freqs.values())
+ return {label for label, freq in freqs.items() if freq == max_freq}
-def _update_label(node, labeling, G):
- """Updates the label of a node using the Prec-Max tie breaking algorithm
+ def _update_label(self, node, labeling, G):
+ """Updates the label of a node using the Prec-Max tie breaking algorithm
- The algorithm is explained in: 'Community Detection via Semi-Synchronous
- Label Propagation Algorithms' Cordasco and Gargano, 2011
- """
- high_labels = _most_frequent_labels(node, labeling, G)
- if len(high_labels) == 1:
- labeling[node] = high_labels.pop()
- elif len(high_labels) > 1:
- # Prec-Max
- if labeling[node] not in high_labels:
+ The algorithm is explained in: 'Community Detection via Semi-Synchronous
+ Label Propagation Algorithms' Cordasco and Gargano, 2011
+ """
+ high_labels = self._most_frequent_labels(node, labeling, G)
+ if len(high_labels) == 1:
+ labeling[node] = high_labels.pop()
+ elif len(high_labels) > 1:
+ # Prec-Max
+ if labeling[node] not in high_labels:
- labeling[node] = max(high_labels)
+ labeling[node] = max(high_labels)
-warnings.filterwarnings('ignore')
+ warnings.filterwarnings('ignore')
-#G = nx.DiGraph(directed=True)
-G = nx.MultiDiGraph(day="Stackoverflow")
+ #G = nx.DiGraph(directed=True)
+ G = nx.MultiDiGraph(day="Stackoverflow")
-df_nodes = hg.clusterlabels
-destf_nodes = hg.destclusterlabel
-color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
- 7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
- 13: '#d6dcff', 14: '#d2f5f0'}
-i=0
+ df_nodes = hg.clusterlabels
+ destf_nodes = hg.destclusterlabel
+ color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
+ 7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
+ 13: '#d6dcff', 14: '#d2f5f0'}
+ i=0
-graphedge=[]
-weigth=[]
-sourcedestination = []
-source = []
-dest = []
-edge_width = []
-weight1 = []
+ graphedge = []
+ weigth = []
+ sourcedestination = []
+ source = []
+ dest = []
+ edge_width = []
+ weight1 = []
+ node_adjacencies = []
-""""drawing edges in graph"""
+ def drawedges(self):
-for drow in range(len(df_nodes)):
- for row in range(len(destf_nodes[drow])):
- G.add_edge(df_nodes[drow], destf_nodes[drow][row])
+ """drawing edges in graph"""
+
+ for drow in range(len(self.df_nodes)):
+ for row in range(len(self.destf_nodes[drow])):
+ self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row])
-for row in range(len(hg.labalvlues)):
- for row1 in range(len(hg.labalvlues)):
- weight1.append(G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
- print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", weight1[row1])
-
-G.__setattr__('weight', weight1)
+ for row in range(len(hg.labalvlues)):
+ for row1 in range(len(hg.labalvlues)):
+ self.weight1.append(self.G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
+ print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", self.weight1[row1])
- # print(float(row['Timestamp']))
- #G.add_weighted_edges_from([(row['TransactionFrom'], row['TransactionTo'], i*j)])
+ self.G.__setattr__('weight', self.weight1)
-#print dict_pos
-
-"""label_propagation_communities(G) """
+ def labeling(self):
+ """label_propagation_communities(G) """
-coloring = _color_network(G)
- # Create a unique label for each node in the graph
-labeling = {v: k for k, v in enumerate(G)}
-print("lable value: ", labeling.values())
-while not _labeling_complete(labeling, G):
-# Update the labels of every node with the same color.
- print("lable value: ", labeling.values())
- for color, nodes in coloring.items():
- for n in nodes:
- _update_label(n, labeling, G)
- for label in set(labeling.values()):
- print("lable value: ", labeling.values())
+ coloring = self._color_network(self.G)
+ # Create a unique label for each node in the graph
+ labeling = {v: k for k, v in enumerate(self.G)}
+ print("lable value: ", labeling.values())
+ while not self._labeling_complete(labeling, self.G):
+ # Update the labels of every node with the same color.
+ print("lable value: ", labeling.values())
+ for color, nodes in coloring.items():
+ for n in nodes:
+ self._update_label(n, labeling, self.G)
+ for label in set(labeling.values()):
+ print("lable value: ", labeling.values())
-
-""" findig nodes' adjecencies"""
-node_adjacencies = []
-node_text = []
-for node, adjacencies in enumerate(G.adjacency()):
- node_adjacencies.append(len(adjacencies[1]))
- node_text.append('# of connections: '+str(len(adjacencies[1])))
+ def findigneighbors(self):
+ """ findig nodes' adjecencies"""
+ node_text = []
+ for node, adjacencies in enumerate(self.G.adjacency()):
+ self.node_adjacencies.append(len(adjacencies[1]))
+ node_text.append('# of connections: '+str(len(adjacencies[1])))
-G.color = node_adjacencies
+ self.G.color = self.node_adjacencies
-
-plt.figure(figsize=(25, 25))
-options = {
- 'with_labels': True,
- 'font_weight': 'regular',
-}
+ def result(self):
+ plt.figure(figsize=(25, 25))
+ options = {
+ 'with_labels': True,
+ 'font_weight': 'regular',
+ }
-#colors = [color_map[G.node[node][1]] for node in G]
-#sizes = [G.node[node]['Timestamp'] * 10 for node in G]
+ # colors = [color_map[G.node[node][1]] for node in G]
+ # sizes = [G.node[node]['Timestamp'] * 10 for node in G]
-
-d = nx.degree_centrality(G)
-d_list= list(d.values())
-print ("node centrality: ",d_list)
-print("node adjacencies: ", node_adjacencies)
-for row in range(len(weigth)):
- edge_width.append([])
- for drow in range(len(weigth[row])):
- edge_width[row].append(weigth[row][drow])
-node_size = [v * 80 for v in d.values()] #setting node size based on node centrality
-edge_width = [row * 0.5 for row in weight1]
-
-print("Nodes' Degree: ", nx.degree(G))
-print("Nodes' Betweeness ", nx.edge_betweenness_centrality(G))
-print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(G))
+ d = nx.degree_centrality(self.G)
+ d_list = list(d.values())
+ print("node centrality: ", d_list)
+ print("node adjacencies: ", self.node_adjacencies)
+ for row in range(len(self.weigth)):
+ self.edge_width.append([])
+ for drow in range(len(self.weigth[row])):
+ self.edge_width[row].append(self.weigth[row][drow])
+ node_size = [v * 80 for v in d.values()] # setting node size based on node centrality
+ edge_width = [row * 0.5 for row in self.weight1]
-
+ print("Nodes' Degree: ", nx.degree(self.G))
+ print("Nodes' Betweeness ", nx.edge_betweenness_centrality(self.G))
+ print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(self.G))
-"""
-Using the spring layout :
-- k controls the distance between the nodes and varies between 0 and 1
-- iterations is the number of times simulated annealing is run
-default k=0.1 and iterations=50
-"""
+ """
+ Using the spring layout :
+ - k controls the distance between the nodes and varies between 0 and 1
+ - iterations is the number of times simulated annealing is run
+ default k=0.1 and iterations=50
+ """
-labels2 = {}
+ labels2 = {}
-for idx, edge in enumerate(G.edges):
- labels2[edge] = "s"
+ for idx, edge in enumerate(self.G.edges):
+ labels2[edge] = "s"
-pos_nodes=nx.spring_layout(G, k=0.25, iterations=50)
-ax = plt.gca()
+ pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50)
-nx.draw(G, pos_nodes,node_color= node_adjacencies, node_size=node_size, width=2, arrowstyle='->',arrowsize=10, weight=weight1, edge_color='gray',**options)
-edge_labels = nx.get_edge_attributes(G, 'weight')
+ nx.draw(self.G, pos_nodes, node_color=self.node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
+ arrowsize=10, weight=self.weight1, edge_color='gray', **options)
+ edge_labels = nx.get_edge_attributes(self.G, 'weight')
-pos_attrs = {}
-for node, coords in pos_nodes.items():
- pos_attrs[node] = (coords[0], coords[1] + 0.02)
-nx.draw_networkx_edge_labels(G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
-nx.draw_networkx_labels(G, pos_attrs, labels=labeling,font_size=10, font_color='red')
-
-
+ pos_attrs = {}
+ for node, coords in pos_nodes.items():
+ pos_attrs[node] = (coords[0], coords[1] + 0.02)
+ nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
+ nx.draw_networkx_labels(self.G, pos_attrs, labels=self.labeling, font_size=10, font_color='red')
-ax = plt.gca()
-ax.collections[0].set_edgecolor("#555555")
-plt.show()
+ ax = plt.gca()
+ ax.collections[0].set_edgecolor("#555555")
+ plt.show()
+ def main(self):
+ self.drawedges()
+ self.labeling()
+ self.findigneighbors()
+ self.result()
-
+linking = SemanticLinking()
+linking.main()
\ No newline at end of file
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import json import json
with open("mult_in_out.json", "r") as json_file:
df_nodes = json.load(json_file)
nodeIds = [] nodeIds = []
destIds= [] destIds = []
clusterlabels= [] clusterlabels = []
destclusterlabel = [] destclusterlabel = []
cluster= [] cluster = []
labalvlues = [] labalvlues = []
i = 0
def classify():
for row in df_nodes: with open("mult_in_out.json", "r") as json_file:
df_nodes = json.load(json_file)
for j in range(len(row['TransactionFrom'])):
print(" Input Ids: ", row['TransactionFrom'][j])
nodeIds.append(row['TransactionFrom'])
print("This is nodes: ", nodeIds)
for row in df_nodes: for row in df_nodes:
destIds.append(row['TransactionTo'])
for j in range(len(row['TransactionFrom'])):
print(" Input Ids: ", row['TransactionFrom'][j])
nodeIds.append(row['TransactionFrom'])
print("This is nodes: ", nodeIds)
for row in range(len(nodeIds)):
print(nodeIds[row])
print("Finish InputIDs") for row in df_nodes:
for row in range(len(nodeIds)): destIds.append(row['TransactionTo'])
clusterlabels.append(row)
i += 1
print(i)
"""" classifying Inputs""" for row in range(len(nodeIds)):
"""" Labaling inputs""" print(nodeIds[row])
for row in range(len(nodeIds)):
for rown in range(len(nodeIds[row])): print("Finish InputIDs")
i = 0
for row in range(len(nodeIds)):
for row1 in range(len(nodeIds)): clusterlabels.append(row)
for rown1 in range(len(nodeIds[row1])): i += 1
if(nodeIds[row][rown]==nodeIds[row1][rown1]): print(i)
# print("row: ",row,"row1: ",row1)
if(row < row1):
for row2 in clusterlabels:
if( clusterlabels[row1]== clusterlabels[row2]):
clusterlabels[row2]=clusterlabels[row]
clusterlabels[row1] = clusterlabels[row]
else: """" classifying Inputs"""
for row2 in clusterlabels: """" Labaling inputs"""
if (clusterlabels[row] == clusterlabels[row2]): for row in range(len(nodeIds)):
clusterlabels[row2] = clusterlabels[row1]
clusterlabels[row] = clusterlabels[row1]
for rown in range(len(nodeIds[row])):
print(clusterlabels) for row1 in range(len(nodeIds)):
print("cluster labels:", len(clusterlabels)) for rown1 in range(len(nodeIds[row1])):
print("NodeIDs: ", len(nodeIds)) if(nodeIds[row][rown]==nodeIds[row1][rown1]):
# print("row: ",row,"row1: ",row1)
if(row < row1):
for row2 in clusterlabels:
if( clusterlabels[row1]== clusterlabels[row2]):
clusterlabels[row2]=clusterlabels[row]
clusterlabels[row1] = clusterlabels[row]
else:
for row2 in clusterlabels:
if (clusterlabels[row] == clusterlabels[row2]):
clusterlabels[row2] = clusterlabels[row1]
clusterlabels[row] = clusterlabels[row1]
"""" Calculating the number of clusters""" print(clusterlabels)
clusternum = 1 print("cluster labels:", len(clusterlabels))
labalvlues.append(clusterlabels[0]) print("NodeIDs: ", len(nodeIds))
for row in range(len(clusterlabels)):
flag = True
for row1 in range(len(labalvlues)):
if(clusterlabels[row]== labalvlues[row1]):
flag = False
if (flag):
clusternum = + 1
labalvlues.append(clusterlabels[row])
print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues)) """" Calculating the number of clusters"""
clusternum = 1
labalvlues.append(clusterlabels[0])
for row in range(len(clusterlabels)):
flag = True
for row1 in range(len(labalvlues)):
if(clusterlabels[row]== labalvlues[row1]):
flag = False
if (flag):
clusternum = + 1
labalvlues.append(clusterlabels[row])
print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues))
"""" clustering Ids according to their labels""" """" clustering Ids according to their labels"""
for row in range(len(labalvlues)): for row in range(len(labalvlues)):
cluster.append([]) cluster.append([])
for row3 in range(len(nodeIds)): for row3 in range(len(nodeIds)):
if (labalvlues[row] == clusterlabels[row3]): if (labalvlues[row] == clusterlabels[row3]):
cluster[row].extend(nodeIds[row3]) cluster[row].extend(nodeIds[row3])
print("clusters: ", cluster) print("clusters: ", cluster)
""" Removing duplicating items in cluster""" """ Removing duplicating items in cluster"""
flag = True flag = True
while(flag): while(flag):
for row in range(len(cluster)): for row in range(len(cluster)):
flag= False
for row1 in range(len(cluster[row])):
flag= False flag= False
for row2 in range (len(cluster[row])): for row1 in range(len(cluster[row])):
if(row1 != row2): flag= False
if(cluster[row][row1] == cluster[row][row2]): for row2 in range (len(cluster[row])):
del cluster[row][row2] if(row1 != row2):
flag=True if(cluster[row][row1] == cluster[row][row2]):
break del cluster[row][row2]
flag=True
break
if(flag):
break
if(flag): if(flag):
break break
if(flag):
break
print("cluster:", cluster) print("cluster:", cluster)
"""" Clustering Destination Ids """ """" Clustering Destination Ids """
for row in range(len(destIds)): for row in range(len(destIds)):
destclusterlabel.append([]) destclusterlabel.append([])
for row2 in range(len(destIds[row])): for row2 in range(len(destIds[row])):
flag = True flag = True
for rownum in range(len(labalvlues)): for rownum in range(len(labalvlues)):
for row1 in range(len(cluster[rownum])): for row1 in range(len(cluster[rownum])):
if(destIds[row][row2]== cluster[rownum][row1]): if(destIds[row][row2]== cluster[rownum][row1]):
destclusterlabel[row].append(labalvlues[rownum]) destclusterlabel[row].append(labalvlues[rownum])
flag = False flag = False
if(flag): if(flag):
destclusterlabel.append(destIds[row][row2]) destclusterlabel.append(destIds[row][row2])
print("destination labels (destination Ids): ", destclusterlabel) print("destination labels (destination Ids): ", destclusterlabel)
import networkx as nx import networkx as nx
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from collections import Counter from collections import Counter
import HyperGraph as hg import initialdemo.HyperGraph as hg
import pandas as pd import pandas as pd
import json import json
import warnings import warnings
...@@ -12,194 +12,198 @@ import mplleaflet ...@@ -12,194 +12,198 @@ import mplleaflet
import values as values import values as values
from matplotlib import colors from matplotlib import colors
def _color_network(G): class SemanticLinking:
"""Colors the network so that neighboring nodes all have distinct colors.
Returns a dict keyed by color to a set of nodes with that color. def __init__(self):
""" hg.classify()
coloring = dict() # color => set(node)
colors = nx.coloring.greedy_color(G)
for node, color in colors.items():
if color in coloring:
coloring[color].add(node)
else:
coloring[color] = set([node])
return coloring
def _color_network(self, G):
"""Colors the network so that neighboring nodes all have distinct colors.
def _labeling_complete(labeling, G): Returns a dict keyed by color to a set of nodes with that color.
"""Determines whether or not LPA is done. """
coloring = dict() # color => set(node)
colors = nx.coloring.greedy_color(G)
for node, color in colors.items():
if color in coloring:
coloring[color].add(node)
else:
coloring[color] = set([node])
return coloring
Label propagation is complete when all nodes have a label that is
in the set of highest frequency labels amongst its neighbors.
Nodes with no neighbors are considered complete. def _labeling_complete(self, labeling, G):
""" """Determines whether or not LPA is done.
return all(labeling[v] in _most_frequent_labels(v, labeling, G)
for v in G if len(G[v]) > 0)
Label propagation is complete when all nodes have a label that is
in the set of highest frequency labels amongst its neighbors.
def _most_frequent_labels(node, labeling, G): Nodes with no neighbors are considered complete.
"""Returns a set of all labels with maximum frequency in `labeling`. """
return all(labeling[v] in self._most_frequent_labels(v, labeling, G)
for v in G if len(G[v]) > 0)
Input `labeling` should be a dict keyed by node to labels.
"""
if not G[node]:
# Nodes with no neighbors are themselves a community and are labeled
# accordingly, hence the immediate if statement.
return {labeling[node]}
# Compute the frequencies of all neighbours of node def _most_frequent_labels(self, node, labeling, G):
freqs = Counter(labeling[q] for q in G[node]) """Returns a set of all labels with maximum frequency in `labeling`.
max_freq = max(freqs.values())
return {label for label, freq in freqs.items() if freq == max_freq}
Input `labeling` should be a dict keyed by node to labels.
"""
if not G[node]:
# Nodes with no neighbors are themselves a community and are labeled
# accordingly, hence the immediate if statement.
return {labeling[node]}
def _update_label(node, labeling, G): # Compute the frequencies of all neighbours of node
"""Updates the label of a node using the Prec-Max tie breaking algorithm freqs = Counter(labeling[q] for q in G[node])
max_freq = max(freqs.values())
return {label for label, freq in freqs.items() if freq == max_freq}
The algorithm is explained in: 'Community Detection via Semi-Synchronous
Label Propagation Algorithms' Cordasco and Gargano, 2011
"""
high_labels = _most_frequent_labels(node, labeling, G)
if len(high_labels) == 1:
labeling[node] = high_labels.pop()
elif len(high_labels) > 1:
# Prec-Max
if labeling[node] not in high_labels:
labeling[node] = max(high_labels) def _update_label(self, node, labeling, G):
"""Updates the label of a node using the Prec-Max tie breaking algorithm
The algorithm is explained in: 'Community Detection via Semi-Synchronous
Label Propagation Algorithms' Cordasco and Gargano, 2011
"""
high_labels = self._most_frequent_labels(node, labeling, G)
if len(high_labels) == 1:
labeling[node] = high_labels.pop()
elif len(high_labels) > 1:
# Prec-Max
if labeling[node] not in high_labels:
warnings.filterwarnings('ignore') labeling[node] = max(high_labels)
warnings.filterwarnings('ignore')
#G = nx.DiGraph(directed=True)
G = nx.MultiDiGraph(day="Stackoverflow")
df_nodes = hg.clusterlabels
destf_nodes = hg.destclusterlabel
color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
13: '#d6dcff', 14: '#d2f5f0'}
i=0
graphedge=[] #G = nx.DiGraph(directed=True)
weigth=[] G = nx.MultiDiGraph(day="Stackoverflow")
sourcedestination = []
source = []
dest = []
edge_width = []
weight1 = []
df_nodes = hg.clusterlabels
destf_nodes = hg.destclusterlabel
color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
13: '#d6dcff', 14: '#d2f5f0'}
i=0
""""drawing edges in graph""" graphedge = []
weigth = []
sourcedestination = []
source = []
dest = []
edge_width = []
weight1 = []
for drow in range(len(df_nodes)): node_adjacencies = []
for row in range(len(destf_nodes[drow])):
G.add_edge(df_nodes[drow], destf_nodes[drow][row])
for row in range(len(hg.labalvlues)): def drawedges(self):
for row1 in range(len(hg.labalvlues)):
weight1.append(G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", weight1[row1])
G.__setattr__('weight', weight1) """drawing edges in graph"""
# print(float(row['Timestamp'])) for drow in range(len(self.df_nodes)):
#G.add_weighted_edges_from([(row['TransactionFrom'], row['TransactionTo'], i*j)]) for row in range(len(self.destf_nodes[drow])):
self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row])
#print dict_pos for row in range(len(hg.labalvlues)):
for row1 in range(len(hg.labalvlues)):
self.weight1.append(self.G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", self.weight1[row1])
"""label_propagation_communities(G) """ self.G.__setattr__('weight', self.weight1)
def labeling(self):
"""label_propagation_communities(G) """
coloring = _color_network(G)
# Create a unique label for each node in the graph
labeling = {v: k for k, v in enumerate(G)}
print("lable value: ", labeling.values())
while not _labeling_complete(labeling, G):
# Update the labels of every node with the same color.
print("lable value: ", labeling.values())
for color, nodes in coloring.items():
for n in nodes:
_update_label(n, labeling, G)
for label in set(labeling.values()):
print("lable value: ", labeling.values())
coloring = self._color_network(self.G)
# Create a unique label for each node in the graph
labeling = {v: k for k, v in enumerate(self.G)}
print("lable value: ", labeling.values())
while not self._labeling_complete(labeling, self.G):
# Update the labels of every node with the same color.
print("lable value: ", labeling.values())
for color, nodes in coloring.items():
for n in nodes:
self._update_label(n, labeling, self.G)
for label in set(labeling.values()):
print("lable value: ", labeling.values())
""" findig nodes' adjecencies""" def findigneighbors(self):
node_adjacencies = [] """ findig nodes' adjecencies"""
node_text = [] node_text = []
for node, adjacencies in enumerate(G.adjacency()): for node, adjacencies in enumerate(self.G.adjacency()):
node_adjacencies.append(len(adjacencies[1])) self.node_adjacencies.append(len(adjacencies[1]))
node_text.append('# of connections: '+str(len(adjacencies[1]))) node_text.append('# of connections: '+str(len(adjacencies[1])))
G.color = node_adjacencies self.G.color = self.node_adjacencies
def result(self):
plt.figure(figsize=(25, 25))
options = {
'with_labels': True,
'font_weight': 'regular',
}
plt.figure(figsize=(25, 25)) # colors = [color_map[G.node[node][1]] for node in G]
options = { # sizes = [G.node[node]['Timestamp'] * 10 for node in G]
'with_labels': True,
'font_weight': 'regular',
}
#colors = [color_map[G.node[node][1]] for node in G] d = nx.degree_centrality(self.G)
#sizes = [G.node[node]['Timestamp'] * 10 for node in G] d_list = list(d.values())
print("node centrality: ", d_list)
print("node adjacencies: ", self.node_adjacencies)
for row in range(len(self.weigth)):
self.edge_width.append([])
for drow in range(len(self.weigth[row])):
self.edge_width[row].append(self.weigth[row][drow])
node_size = [v * 80 for v in d.values()] # setting node size based on node centrality
edge_width = [row * 0.5 for row in self.weight1]
print("Nodes' Degree: ", nx.degree(self.G))
print("Nodes' Betweeness ", nx.edge_betweenness_centrality(self.G))
print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(self.G))
d = nx.degree_centrality(G) """
d_list= list(d.values()) Using the spring layout :
print ("node centrality: ",d_list) - k controls the distance between the nodes and varies between 0 and 1
print("node adjacencies: ", node_adjacencies) - iterations is the number of times simulated annealing is run
for row in range(len(weigth)): default k=0.1 and iterations=50
edge_width.append([]) """
for drow in range(len(weigth[row])):
edge_width[row].append(weigth[row][drow])
node_size = [v * 80 for v in d.values()] #setting node size based on node centrality
edge_width = [row * 0.5 for row in weight1]
print("Nodes' Degree: ", nx.degree(G)) labels2 = {}
print("Nodes' Betweeness ", nx.edge_betweenness_centrality(G))
print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(G))
for idx, edge in enumerate(self.G.edges):
labels2[edge] = "s"
pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50)
nx.draw(self.G, pos_nodes, node_color=self.node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
arrowsize=10, weight=self.weight1, edge_color='gray', **options)
edge_labels = nx.get_edge_attributes(self.G, 'weight')
pos_attrs = {}
for node, coords in pos_nodes.items():
pos_attrs[node] = (coords[0], coords[1] + 0.02)
nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
nx.draw_networkx_labels(self.G, pos_attrs, labels=self.labeling, font_size=10, font_color='red')
""" ax = plt.gca()
Using the spring layout : ax.collections[0].set_edgecolor("#555555")
- k controls the distance between the nodes and varies between 0 and 1 plt.show()
- iterations is the number of times simulated annealing is run
default k=0.1 and iterations=50
"""
labels2 = {}
for idx, edge in enumerate(G.edges):
labels2[edge] = "s"
pos_nodes=nx.spring_layout(G, k=0.25, iterations=50)
ax = plt.gca()
nx.draw(G, pos_nodes,node_color= node_adjacencies, node_size=node_size, width=2, arrowstyle='->',arrowsize=10, weight=weight1, edge_color='gray',**options)
edge_labels = nx.get_edge_attributes(G, 'weight')
pos_attrs = {}
for node, coords in pos_nodes.items():
pos_attrs[node] = (coords[0], coords[1] + 0.02)
nx.draw_networkx_edge_labels(G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
nx.draw_networkx_labels(G, pos_attrs, labels=labeling,font_size=10, font_color='red')
ax = plt.gca()
ax.collections[0].set_edgecolor("#555555")
plt.show()
def main(self):
self.drawedges()
self.labeling()
self.findigneighbors()
self.result()
linking = SemanticLinking()
linking.main()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment