Commit b365f613 authored by zahra's avatar zahra

Semantic Linking: Created Classes for semantic linking logic

parent 2caa4a25
Index: data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py (date 1568037363000)
+++ data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py (date 1568038969230)
@@ -1,140 +1,137 @@
-import networkx as nx
-import matplotlib.pyplot as plt
-import pandas as pd
import json
-
-
-with open("mult_in_out.json", "r") as json_file:
- df_nodes = json.load(json_file)
-
-
nodeIds = []
-destIds= []
-clusterlabels= []
+destIds = []
+clusterlabels = []
destclusterlabel = []
-cluster= []
+cluster = []
labalvlues = []
-i = 0
+
+def classify():
+
+ with open("mult_in_out.json", "r") as json_file:
+ df_nodes = json.load(json_file)
+
-for row in df_nodes:
+ for row in df_nodes:
- for j in range(len(row['TransactionFrom'])):
- print(" Input Ids: ", row['TransactionFrom'][j])
- nodeIds.append(row['TransactionFrom'])
- print("This is nodes: ", nodeIds)
+ for j in range(len(row['TransactionFrom'])):
+ print(" Input Ids: ", row['TransactionFrom'][j])
+ nodeIds.append(row['TransactionFrom'])
+ print("This is nodes: ", nodeIds)
-for row in df_nodes:
- destIds.append(row['TransactionTo'])
+ for row in df_nodes:
+ destIds.append(row['TransactionTo'])
-for row in range(len(nodeIds)):
- print(nodeIds[row])
+ for row in range(len(nodeIds)):
+ print(nodeIds[row])
-print("Finish InputIDs")
-for row in range(len(nodeIds)):
+ print("Finish InputIDs")
+ i = 0
+ for row in range(len(nodeIds)):
- clusterlabels.append(row)
- i += 1
-print(i)
+ clusterlabels.append(row)
+ i += 1
+ print(i)
-"""" classifying Inputs"""
-"""" Labaling inputs"""
-for row in range(len(nodeIds)):
+ """" classifying Inputs"""
+ """" Labaling inputs"""
+ for row in range(len(nodeIds)):
- for rown in range(len(nodeIds[row])):
+ for rown in range(len(nodeIds[row])):
- for row1 in range(len(nodeIds)):
- for rown1 in range(len(nodeIds[row1])):
- if(nodeIds[row][rown]==nodeIds[row1][rown1]):
- # print("row: ",row,"row1: ",row1)
- if(row < row1):
- for row2 in clusterlabels:
- if( clusterlabels[row1]== clusterlabels[row2]):
- clusterlabels[row2]=clusterlabels[row]
- clusterlabels[row1] = clusterlabels[row]
+ for row1 in range(len(nodeIds)):
+ for rown1 in range(len(nodeIds[row1])):
+ if(nodeIds[row][rown]==nodeIds[row1][rown1]):
+ # print("row: ",row,"row1: ",row1)
+ if(row < row1):
+ for row2 in clusterlabels:
+ if( clusterlabels[row1]== clusterlabels[row2]):
+ clusterlabels[row2]=clusterlabels[row]
+ clusterlabels[row1] = clusterlabels[row]
- else:
- for row2 in clusterlabels:
- if (clusterlabels[row] == clusterlabels[row2]):
- clusterlabels[row2] = clusterlabels[row1]
- clusterlabels[row] = clusterlabels[row1]
+ else:
+ for row2 in clusterlabels:
+ if (clusterlabels[row] == clusterlabels[row2]):
+ clusterlabels[row2] = clusterlabels[row1]
+ clusterlabels[row] = clusterlabels[row1]
-print(clusterlabels)
-print("cluster labels:", len(clusterlabels))
-print("NodeIDs: ", len(nodeIds))
+ print(clusterlabels)
+ print("cluster labels:", len(clusterlabels))
+ print("NodeIDs: ", len(nodeIds))
-"""" Calculating the number of clusters"""
-clusternum = 1
-labalvlues.append(clusterlabels[0])
-for row in range(len(clusterlabels)):
- flag = True
- for row1 in range(len(labalvlues)):
- if(clusterlabels[row]== labalvlues[row1]):
- flag = False
+ """" Calculating the number of clusters"""
+ clusternum = 1
+ labalvlues.append(clusterlabels[0])
+ for row in range(len(clusterlabels)):
+ flag = True
+ for row1 in range(len(labalvlues)):
+ if(clusterlabels[row]== labalvlues[row1]):
+ flag = False
- if (flag):
- clusternum = + 1
- labalvlues.append(clusterlabels[row])
+ if (flag):
+ clusternum = + 1
+ labalvlues.append(clusterlabels[row])
-print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues))
+ print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues))
-"""" clustering Ids according to their labels"""
+ """" clustering Ids according to their labels"""
-for row in range(len(labalvlues)):
- cluster.append([])
- for row3 in range(len(nodeIds)):
- if (labalvlues[row] == clusterlabels[row3]):
- cluster[row].extend(nodeIds[row3])
-print("clusters: ", cluster)
+ for row in range(len(labalvlues)):
+ cluster.append([])
+ for row3 in range(len(nodeIds)):
+ if (labalvlues[row] == clusterlabels[row3]):
+ cluster[row].extend(nodeIds[row3])
+ print("clusters: ", cluster)
-""" Removing duplicating items in cluster"""
+ """ Removing duplicating items in cluster"""
-flag = True
-while(flag):
- for row in range(len(cluster)):
+ flag = True
+ while(flag):
+ for row in range(len(cluster)):
- flag= False
- for row1 in range(len(cluster[row])):
- flag= False
- for row2 in range (len(cluster[row])):
- if(row1 != row2):
- if(cluster[row][row1] == cluster[row][row2]):
- del cluster[row][row2]
- flag=True
- break
- if(flag):
- break
- if(flag):
- break
+ flag= False
+ for row1 in range(len(cluster[row])):
+ flag= False
+ for row2 in range (len(cluster[row])):
+ if(row1 != row2):
+ if(cluster[row][row1] == cluster[row][row2]):
+ del cluster[row][row2]
+ flag=True
+ break
+ if(flag):
+ break
+ if(flag):
+ break
-print("cluster:", cluster)
+ print("cluster:", cluster)
-"""" Clustering Destination Ids """
-for row in range(len(destIds)):
- destclusterlabel.append([])
- for row2 in range(len(destIds[row])):
- flag = True
- for rownum in range(len(labalvlues)):
- for row1 in range(len(cluster[rownum])):
+ """" Clustering Destination Ids """
+ for row in range(len(destIds)):
+ destclusterlabel.append([])
+ for row2 in range(len(destIds[row])):
+ flag = True
+ for rownum in range(len(labalvlues)):
+ for row1 in range(len(cluster[rownum])):
- if(destIds[row][row2]== cluster[rownum][row1]):
- destclusterlabel[row].append(labalvlues[rownum])
- flag = False
- if(flag):
- destclusterlabel.append(destIds[row][row2])
+ if(destIds[row][row2]== cluster[rownum][row1]):
+ destclusterlabel[row].append(labalvlues[rownum])
+ flag = False
+ if(flag):
+ destclusterlabel.append(destIds[row][row2])
-print("destination labels (destination Ids): ", destclusterlabel)
+ print("destination labels (destination Ids): ", destclusterlabel)
Index: data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py (date 1568037363000)
+++ data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py (date 1568040344378)
@@ -1,7 +1,7 @@
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
-import HyperGraph as hg
+import initialdemo.HyperGraph as hg
import pandas as pd
import json
import warnings
@@ -12,194 +12,198 @@
import values as values
from matplotlib import colors
-def _color_network(G):
- """Colors the network so that neighboring nodes all have distinct colors.
+class SemanticLinking:
+
+ def __init__(self):
+ hg.classify()
+
+ def _color_network(self, G):
+ """Colors the network so that neighboring nodes all have distinct colors.
- Returns a dict keyed by color to a set of nodes with that color.
- """
- coloring = dict() # color => set(node)
- colors = nx.coloring.greedy_color(G)
- for node, color in colors.items():
- if color in coloring:
- coloring[color].add(node)
- else:
- coloring[color] = set([node])
- return coloring
+ Returns a dict keyed by color to a set of nodes with that color.
+ """
+ coloring = dict() # color => set(node)
+ colors = nx.coloring.greedy_color(G)
+ for node, color in colors.items():
+ if color in coloring:
+ coloring[color].add(node)
+ else:
+ coloring[color] = set([node])
+ return coloring
-def _labeling_complete(labeling, G):
- """Determines whether or not LPA is done.
+ def _labeling_complete(self, labeling, G):
+ """Determines whether or not LPA is done.
- Label propagation is complete when all nodes have a label that is
- in the set of highest frequency labels amongst its neighbors.
+ Label propagation is complete when all nodes have a label that is
+ in the set of highest frequency labels amongst its neighbors.
- Nodes with no neighbors are considered complete.
- """
- return all(labeling[v] in _most_frequent_labels(v, labeling, G)
- for v in G if len(G[v]) > 0)
+ Nodes with no neighbors are considered complete.
+ """
+ return all(labeling[v] in self._most_frequent_labels(v, labeling, G)
+ for v in G if len(G[v]) > 0)
-def _most_frequent_labels(node, labeling, G):
- """Returns a set of all labels with maximum frequency in `labeling`.
+ def _most_frequent_labels(self, node, labeling, G):
+ """Returns a set of all labels with maximum frequency in `labeling`.
- Input `labeling` should be a dict keyed by node to labels.
- """
- if not G[node]:
- # Nodes with no neighbors are themselves a community and are labeled
- # accordingly, hence the immediate if statement.
- return {labeling[node]}
+ Input `labeling` should be a dict keyed by node to labels.
+ """
+ if not G[node]:
+ # Nodes with no neighbors are themselves a community and are labeled
+ # accordingly, hence the immediate if statement.
+ return {labeling[node]}
- # Compute the frequencies of all neighbours of node
- freqs = Counter(labeling[q] for q in G[node])
- max_freq = max(freqs.values())
- return {label for label, freq in freqs.items() if freq == max_freq}
+ # Compute the frequencies of all neighbours of node
+ freqs = Counter(labeling[q] for q in G[node])
+ max_freq = max(freqs.values())
+ return {label for label, freq in freqs.items() if freq == max_freq}
-def _update_label(node, labeling, G):
- """Updates the label of a node using the Prec-Max tie breaking algorithm
+ def _update_label(self, node, labeling, G):
+ """Updates the label of a node using the Prec-Max tie breaking algorithm
- The algorithm is explained in: 'Community Detection via Semi-Synchronous
- Label Propagation Algorithms' Cordasco and Gargano, 2011
- """
- high_labels = _most_frequent_labels(node, labeling, G)
- if len(high_labels) == 1:
- labeling[node] = high_labels.pop()
- elif len(high_labels) > 1:
- # Prec-Max
- if labeling[node] not in high_labels:
+ The algorithm is explained in: 'Community Detection via Semi-Synchronous
+ Label Propagation Algorithms' Cordasco and Gargano, 2011
+ """
+ high_labels = self._most_frequent_labels(node, labeling, G)
+ if len(high_labels) == 1:
+ labeling[node] = high_labels.pop()
+ elif len(high_labels) > 1:
+ # Prec-Max
+ if labeling[node] not in high_labels:
- labeling[node] = max(high_labels)
+ labeling[node] = max(high_labels)
-warnings.filterwarnings('ignore')
+ warnings.filterwarnings('ignore')
-#G = nx.DiGraph(directed=True)
-G = nx.MultiDiGraph(day="Stackoverflow")
+ #G = nx.DiGraph(directed=True)
+ G = nx.MultiDiGraph(day="Stackoverflow")
-df_nodes = hg.clusterlabels
-destf_nodes = hg.destclusterlabel
-color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
- 7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
- 13: '#d6dcff', 14: '#d2f5f0'}
-i=0
+ df_nodes = hg.clusterlabels
+ destf_nodes = hg.destclusterlabel
+ color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
+ 7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
+ 13: '#d6dcff', 14: '#d2f5f0'}
+ i=0
-graphedge=[]
-weigth=[]
-sourcedestination = []
-source = []
-dest = []
-edge_width = []
-weight1 = []
+ graphedge = []
+ weigth = []
+ sourcedestination = []
+ source = []
+ dest = []
+ edge_width = []
+ weight1 = []
+ node_adjacencies = []
-""""drawing edges in graph"""
+ def drawedges(self):
-for drow in range(len(df_nodes)):
- for row in range(len(destf_nodes[drow])):
- G.add_edge(df_nodes[drow], destf_nodes[drow][row])
+ """drawing edges in graph"""
+
+ for drow in range(len(self.df_nodes)):
+ for row in range(len(self.destf_nodes[drow])):
+ self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row])
-for row in range(len(hg.labalvlues)):
- for row1 in range(len(hg.labalvlues)):
- weight1.append(G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
- print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", weight1[row1])
-
-G.__setattr__('weight', weight1)
+ for row in range(len(hg.labalvlues)):
+ for row1 in range(len(hg.labalvlues)):
+ self.weight1.append(self.G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
+ print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", self.weight1[row1])
- # print(float(row['Timestamp']))
- #G.add_weighted_edges_from([(row['TransactionFrom'], row['TransactionTo'], i*j)])
+ self.G.__setattr__('weight', self.weight1)
-#print dict_pos
-
-"""label_propagation_communities(G) """
+ def labeling(self):
+ """label_propagation_communities(G) """
-coloring = _color_network(G)
- # Create a unique label for each node in the graph
-labeling = {v: k for k, v in enumerate(G)}
-print("lable value: ", labeling.values())
-while not _labeling_complete(labeling, G):
-# Update the labels of every node with the same color.
- print("lable value: ", labeling.values())
- for color, nodes in coloring.items():
- for n in nodes:
- _update_label(n, labeling, G)
- for label in set(labeling.values()):
- print("lable value: ", labeling.values())
+ coloring = self._color_network(self.G)
+ # Create a unique label for each node in the graph
+ labeling = {v: k for k, v in enumerate(self.G)}
+ print("lable value: ", labeling.values())
+ while not self._labeling_complete(labeling, self.G):
+ # Update the labels of every node with the same color.
+ print("lable value: ", labeling.values())
+ for color, nodes in coloring.items():
+ for n in nodes:
+ self._update_label(n, labeling, self.G)
+ for label in set(labeling.values()):
+ print("lable value: ", labeling.values())
-
-""" findig nodes' adjecencies"""
-node_adjacencies = []
-node_text = []
-for node, adjacencies in enumerate(G.adjacency()):
- node_adjacencies.append(len(adjacencies[1]))
- node_text.append('# of connections: '+str(len(adjacencies[1])))
+ def findigneighbors(self):
+ """ findig nodes' adjecencies"""
+ node_text = []
+ for node, adjacencies in enumerate(self.G.adjacency()):
+ self.node_adjacencies.append(len(adjacencies[1]))
+ node_text.append('# of connections: '+str(len(adjacencies[1])))
-G.color = node_adjacencies
+ self.G.color = self.node_adjacencies
-
-plt.figure(figsize=(25, 25))
-options = {
- 'with_labels': True,
- 'font_weight': 'regular',
-}
+ def result(self):
+ plt.figure(figsize=(25, 25))
+ options = {
+ 'with_labels': True,
+ 'font_weight': 'regular',
+ }
-#colors = [color_map[G.node[node][1]] for node in G]
-#sizes = [G.node[node]['Timestamp'] * 10 for node in G]
+ # colors = [color_map[G.node[node][1]] for node in G]
+ # sizes = [G.node[node]['Timestamp'] * 10 for node in G]
-
-d = nx.degree_centrality(G)
-d_list= list(d.values())
-print ("node centrality: ",d_list)
-print("node adjacencies: ", node_adjacencies)
-for row in range(len(weigth)):
- edge_width.append([])
- for drow in range(len(weigth[row])):
- edge_width[row].append(weigth[row][drow])
-node_size = [v * 80 for v in d.values()] #setting node size based on node centrality
-edge_width = [row * 0.5 for row in weight1]
-
-print("Nodes' Degree: ", nx.degree(G))
-print("Nodes' Betweeness ", nx.edge_betweenness_centrality(G))
-print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(G))
+ d = nx.degree_centrality(self.G)
+ d_list = list(d.values())
+ print("node centrality: ", d_list)
+ print("node adjacencies: ", self.node_adjacencies)
+ for row in range(len(self.weigth)):
+ self.edge_width.append([])
+ for drow in range(len(self.weigth[row])):
+ self.edge_width[row].append(self.weigth[row][drow])
+ node_size = [v * 80 for v in d.values()] # setting node size based on node centrality
+ edge_width = [row * 0.5 for row in self.weight1]
-
+ print("Nodes' Degree: ", nx.degree(self.G))
+ print("Nodes' Betweeness ", nx.edge_betweenness_centrality(self.G))
+ print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(self.G))
-"""
-Using the spring layout :
-- k controls the distance between the nodes and varies between 0 and 1
-- iterations is the number of times simulated annealing is run
-default k=0.1 and iterations=50
-"""
+ """
+ Using the spring layout :
+ - k controls the distance between the nodes and varies between 0 and 1
+ - iterations is the number of times simulated annealing is run
+ default k=0.1 and iterations=50
+ """
-labels2 = {}
+ labels2 = {}
-for idx, edge in enumerate(G.edges):
- labels2[edge] = "s"
+ for idx, edge in enumerate(self.G.edges):
+ labels2[edge] = "s"
-pos_nodes=nx.spring_layout(G, k=0.25, iterations=50)
-ax = plt.gca()
+ pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50)
-nx.draw(G, pos_nodes,node_color= node_adjacencies, node_size=node_size, width=2, arrowstyle='->',arrowsize=10, weight=weight1, edge_color='gray',**options)
-edge_labels = nx.get_edge_attributes(G, 'weight')
+ nx.draw(self.G, pos_nodes, node_color=self.node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
+ arrowsize=10, weight=self.weight1, edge_color='gray', **options)
+ edge_labels = nx.get_edge_attributes(self.G, 'weight')
-pos_attrs = {}
-for node, coords in pos_nodes.items():
- pos_attrs[node] = (coords[0], coords[1] + 0.02)
-nx.draw_networkx_edge_labels(G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
-nx.draw_networkx_labels(G, pos_attrs, labels=labeling,font_size=10, font_color='red')
-
-
+ pos_attrs = {}
+ for node, coords in pos_nodes.items():
+ pos_attrs[node] = (coords[0], coords[1] + 0.02)
+ nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
+ nx.draw_networkx_labels(self.G, pos_attrs, labels=self.labeling, font_size=10, font_color='red')
-ax = plt.gca()
-ax.collections[0].set_edgecolor("#555555")
-plt.show()
+ ax = plt.gca()
+ ax.collections[0].set_edgecolor("#555555")
+ plt.show()
+ def main(self):
+ self.drawedges()
+ self.labeling()
+ self.findigneighbors()
+ self.result()
-
+linking = SemanticLinking()
+linking.main()
\ No newline at end of file
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import json
with open("mult_in_out.json", "r") as json_file:
df_nodes = json.load(json_file)
nodeIds = []
destIds= []
clusterlabels= []
destIds = []
clusterlabels = []
destclusterlabel = []
cluster= []
cluster = []
labalvlues = []
i = 0
def classify():
for row in df_nodes:
with open("mult_in_out.json", "r") as json_file:
df_nodes = json.load(json_file)
for j in range(len(row['TransactionFrom'])):
print(" Input Ids: ", row['TransactionFrom'][j])
nodeIds.append(row['TransactionFrom'])
print("This is nodes: ", nodeIds)
for row in df_nodes:
destIds.append(row['TransactionTo'])
for row in df_nodes:
for j in range(len(row['TransactionFrom'])):
print(" Input Ids: ", row['TransactionFrom'][j])
nodeIds.append(row['TransactionFrom'])
print("This is nodes: ", nodeIds)
for row in range(len(nodeIds)):
print(nodeIds[row])
print("Finish InputIDs")
for row in range(len(nodeIds)):
for row in df_nodes:
destIds.append(row['TransactionTo'])
clusterlabels.append(row)
i += 1
print(i)
"""" classifying Inputs"""
"""" Labaling inputs"""
for row in range(len(nodeIds)):
for row in range(len(nodeIds)):
print(nodeIds[row])
for rown in range(len(nodeIds[row])):
print("Finish InputIDs")
i = 0
for row in range(len(nodeIds)):
for row1 in range(len(nodeIds)):
for rown1 in range(len(nodeIds[row1])):
if(nodeIds[row][rown]==nodeIds[row1][rown1]):
# print("row: ",row,"row1: ",row1)
if(row < row1):
for row2 in clusterlabels:
if( clusterlabels[row1]== clusterlabels[row2]):
clusterlabels[row2]=clusterlabels[row]
clusterlabels[row1] = clusterlabels[row]
clusterlabels.append(row)
i += 1
print(i)
else:
for row2 in clusterlabels:
if (clusterlabels[row] == clusterlabels[row2]):
clusterlabels[row2] = clusterlabels[row1]
clusterlabels[row] = clusterlabels[row1]
"""" classifying Inputs"""
"""" Labaling inputs"""
for row in range(len(nodeIds)):
for rown in range(len(nodeIds[row])):
print(clusterlabels)
print("cluster labels:", len(clusterlabels))
print("NodeIDs: ", len(nodeIds))
for row1 in range(len(nodeIds)):
for rown1 in range(len(nodeIds[row1])):
if(nodeIds[row][rown]==nodeIds[row1][rown1]):
# print("row: ",row,"row1: ",row1)
if(row < row1):
for row2 in clusterlabels:
if( clusterlabels[row1]== clusterlabels[row2]):
clusterlabels[row2]=clusterlabels[row]
clusterlabels[row1] = clusterlabels[row]
else:
for row2 in clusterlabels:
if (clusterlabels[row] == clusterlabels[row2]):
clusterlabels[row2] = clusterlabels[row1]
clusterlabels[row] = clusterlabels[row1]
"""" Calculating the number of clusters"""
clusternum = 1
labalvlues.append(clusterlabels[0])
for row in range(len(clusterlabels)):
flag = True
for row1 in range(len(labalvlues)):
if(clusterlabels[row]== labalvlues[row1]):
flag = False
print(clusterlabels)
print("cluster labels:", len(clusterlabels))
print("NodeIDs: ", len(nodeIds))
if (flag):
clusternum = + 1
labalvlues.append(clusterlabels[row])
print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues))
"""" Calculating the number of clusters"""
clusternum = 1
labalvlues.append(clusterlabels[0])
for row in range(len(clusterlabels)):
flag = True
for row1 in range(len(labalvlues)):
if(clusterlabels[row]== labalvlues[row1]):
flag = False
if (flag):
clusternum = + 1
labalvlues.append(clusterlabels[row])
print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues))
"""" clustering Ids according to their labels"""
"""" clustering Ids according to their labels"""
for row in range(len(labalvlues)):
cluster.append([])
for row3 in range(len(nodeIds)):
if (labalvlues[row] == clusterlabels[row3]):
cluster[row].extend(nodeIds[row3])
print("clusters: ", cluster)
for row in range(len(labalvlues)):
cluster.append([])
for row3 in range(len(nodeIds)):
if (labalvlues[row] == clusterlabels[row3]):
cluster[row].extend(nodeIds[row3])
print("clusters: ", cluster)
""" Removing duplicating items in cluster"""
""" Removing duplicating items in cluster"""
flag = True
while(flag):
for row in range(len(cluster)):
flag = True
while(flag):
for row in range(len(cluster)):
flag= False
for row1 in range(len(cluster[row])):
flag= False
for row2 in range (len(cluster[row])):
if(row1 != row2):
if(cluster[row][row1] == cluster[row][row2]):
del cluster[row][row2]
flag=True
break
for row1 in range(len(cluster[row])):
flag= False
for row2 in range (len(cluster[row])):
if(row1 != row2):
if(cluster[row][row1] == cluster[row][row2]):
del cluster[row][row2]
flag=True
break
if(flag):
break
if(flag):
break
if(flag):
break
print("cluster:", cluster)
print("cluster:", cluster)
"""" Clustering Destination Ids """
for row in range(len(destIds)):
destclusterlabel.append([])
for row2 in range(len(destIds[row])):
flag = True
for rownum in range(len(labalvlues)):
for row1 in range(len(cluster[rownum])):
"""" Clustering Destination Ids """
for row in range(len(destIds)):
destclusterlabel.append([])
for row2 in range(len(destIds[row])):
flag = True
for rownum in range(len(labalvlues)):
for row1 in range(len(cluster[rownum])):
if(destIds[row][row2]== cluster[rownum][row1]):
destclusterlabel[row].append(labalvlues[rownum])
flag = False
if(flag):
destclusterlabel.append(destIds[row][row2])
if(destIds[row][row2]== cluster[rownum][row1]):
destclusterlabel[row].append(labalvlues[rownum])
flag = False
if(flag):
destclusterlabel.append(destIds[row][row2])
print("destination labels (destination Ids): ", destclusterlabel)
print("destination labels (destination Ids): ", destclusterlabel)
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
import HyperGraph as hg
import initialdemo.HyperGraph as hg
import pandas as pd
import json
import warnings
......@@ -12,194 +12,198 @@ import mplleaflet
import values as values
from matplotlib import colors
def _color_network(G):
"""Colors the network so that neighboring nodes all have distinct colors.
class SemanticLinking:
Returns a dict keyed by color to a set of nodes with that color.
"""
coloring = dict() # color => set(node)
colors = nx.coloring.greedy_color(G)
for node, color in colors.items():
if color in coloring:
coloring[color].add(node)
else:
coloring[color] = set([node])
return coloring
def __init__(self):
hg.classify()
def _color_network(self, G):
"""Colors the network so that neighboring nodes all have distinct colors.
def _labeling_complete(labeling, G):
"""Determines whether or not LPA is done.
Returns a dict keyed by color to a set of nodes with that color.
"""
coloring = dict() # color => set(node)
colors = nx.coloring.greedy_color(G)
for node, color in colors.items():
if color in coloring:
coloring[color].add(node)
else:
coloring[color] = set([node])
return coloring
Label propagation is complete when all nodes have a label that is
in the set of highest frequency labels amongst its neighbors.
Nodes with no neighbors are considered complete.
"""
return all(labeling[v] in _most_frequent_labels(v, labeling, G)
for v in G if len(G[v]) > 0)
def _labeling_complete(self, labeling, G):
"""Determines whether or not LPA is done.
Label propagation is complete when all nodes have a label that is
in the set of highest frequency labels amongst its neighbors.
def _most_frequent_labels(node, labeling, G):
"""Returns a set of all labels with maximum frequency in `labeling`.
Nodes with no neighbors are considered complete.
"""
return all(labeling[v] in self._most_frequent_labels(v, labeling, G)
for v in G if len(G[v]) > 0)
Input `labeling` should be a dict keyed by node to labels.
"""
if not G[node]:
# Nodes with no neighbors are themselves a community and are labeled
# accordingly, hence the immediate if statement.
return {labeling[node]}
# Compute the frequencies of all neighbours of node
freqs = Counter(labeling[q] for q in G[node])
max_freq = max(freqs.values())
return {label for label, freq in freqs.items() if freq == max_freq}
def _most_frequent_labels(self, node, labeling, G):
"""Returns a set of all labels with maximum frequency in `labeling`.
Input `labeling` should be a dict keyed by node to labels.
"""
if not G[node]:
# Nodes with no neighbors are themselves a community and are labeled
# accordingly, hence the immediate if statement.
return {labeling[node]}
def _update_label(node, labeling, G):
"""Updates the label of a node using the Prec-Max tie breaking algorithm
# Compute the frequencies of all neighbours of node
freqs = Counter(labeling[q] for q in G[node])
max_freq = max(freqs.values())
return {label for label, freq in freqs.items() if freq == max_freq}
The algorithm is explained in: 'Community Detection via Semi-Synchronous
Label Propagation Algorithms' Cordasco and Gargano, 2011
"""
high_labels = _most_frequent_labels(node, labeling, G)
if len(high_labels) == 1:
labeling[node] = high_labels.pop()
elif len(high_labels) > 1:
# Prec-Max
if labeling[node] not in high_labels:
labeling[node] = max(high_labels)
def _update_label(self, node, labeling, G):
"""Updates the label of a node using the Prec-Max tie breaking algorithm
The algorithm is explained in: 'Community Detection via Semi-Synchronous
Label Propagation Algorithms' Cordasco and Gargano, 2011
"""
high_labels = self._most_frequent_labels(node, labeling, G)
if len(high_labels) == 1:
labeling[node] = high_labels.pop()
elif len(high_labels) > 1:
# Prec-Max
if labeling[node] not in high_labels:
warnings.filterwarnings('ignore')
labeling[node] = max(high_labels)
warnings.filterwarnings('ignore')
#G = nx.DiGraph(directed=True)
G = nx.MultiDiGraph(day="Stackoverflow")
df_nodes = hg.clusterlabels
destf_nodes = hg.destclusterlabel
color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
13: '#d6dcff', 14: '#d2f5f0'}
i=0
graphedge=[]
weigth=[]
sourcedestination = []
source = []
dest = []
edge_width = []
weight1 = []
#G = nx.DiGraph(directed=True)
G = nx.MultiDiGraph(day="Stackoverflow")
df_nodes = hg.clusterlabels
destf_nodes = hg.destclusterlabel
color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
13: '#d6dcff', 14: '#d2f5f0'}
i=0
""""drawing edges in graph"""
graphedge = []
weigth = []
sourcedestination = []
source = []
dest = []
edge_width = []
weight1 = []
for drow in range(len(df_nodes)):
for row in range(len(destf_nodes[drow])):
G.add_edge(df_nodes[drow], destf_nodes[drow][row])
node_adjacencies = []
for row in range(len(hg.labalvlues)):
for row1 in range(len(hg.labalvlues)):
weight1.append(G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", weight1[row1])
def drawedges(self):
G.__setattr__('weight', weight1)
"""drawing edges in graph"""
# print(float(row['Timestamp']))
#G.add_weighted_edges_from([(row['TransactionFrom'], row['TransactionTo'], i*j)])
for drow in range(len(self.df_nodes)):
for row in range(len(self.destf_nodes[drow])):
self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row])
#print dict_pos
for row in range(len(hg.labalvlues)):
for row1 in range(len(hg.labalvlues)):
self.weight1.append(self.G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", self.weight1[row1])
"""label_propagation_communities(G) """
self.G.__setattr__('weight', self.weight1)
def labeling(self):
"""label_propagation_communities(G) """
coloring = _color_network(G)
# Create a unique label for each node in the graph
labeling = {v: k for k, v in enumerate(G)}
print("lable value: ", labeling.values())
while not _labeling_complete(labeling, G):
# Update the labels of every node with the same color.
print("lable value: ", labeling.values())
for color, nodes in coloring.items():
for n in nodes:
_update_label(n, labeling, G)
for label in set(labeling.values()):
print("lable value: ", labeling.values())
coloring = self._color_network(self.G)
# Create a unique label for each node in the graph
labeling = {v: k for k, v in enumerate(self.G)}
print("lable value: ", labeling.values())
while not self._labeling_complete(labeling, self.G):
# Update the labels of every node with the same color.
print("lable value: ", labeling.values())
for color, nodes in coloring.items():
for n in nodes:
self._update_label(n, labeling, self.G)
for label in set(labeling.values()):
print("lable value: ", labeling.values())
""" findig nodes' adjecencies"""
node_adjacencies = []
node_text = []
for node, adjacencies in enumerate(G.adjacency()):
node_adjacencies.append(len(adjacencies[1]))
node_text.append('# of connections: '+str(len(adjacencies[1])))
def findigneighbors(self):
""" findig nodes' adjecencies"""
node_text = []
for node, adjacencies in enumerate(self.G.adjacency()):
self.node_adjacencies.append(len(adjacencies[1]))
node_text.append('# of connections: '+str(len(adjacencies[1])))
G.color = node_adjacencies
self.G.color = self.node_adjacencies
def result(self):
plt.figure(figsize=(25, 25))
options = {
'with_labels': True,
'font_weight': 'regular',
}
plt.figure(figsize=(25, 25))
options = {
'with_labels': True,
'font_weight': 'regular',
}
# colors = [color_map[G.node[node][1]] for node in G]
# sizes = [G.node[node]['Timestamp'] * 10 for node in G]
#colors = [color_map[G.node[node][1]] for node in G]
#sizes = [G.node[node]['Timestamp'] * 10 for node in G]
d = nx.degree_centrality(self.G)
d_list = list(d.values())
print("node centrality: ", d_list)
print("node adjacencies: ", self.node_adjacencies)
for row in range(len(self.weigth)):
self.edge_width.append([])
for drow in range(len(self.weigth[row])):
self.edge_width[row].append(self.weigth[row][drow])
node_size = [v * 80 for v in d.values()] # setting node size based on node centrality
edge_width = [row * 0.5 for row in self.weight1]
print("Nodes' Degree: ", nx.degree(self.G))
print("Nodes' Betweeness ", nx.edge_betweenness_centrality(self.G))
print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(self.G))
d = nx.degree_centrality(G)
d_list= list(d.values())
print ("node centrality: ",d_list)
print("node adjacencies: ", node_adjacencies)
for row in range(len(weigth)):
edge_width.append([])
for drow in range(len(weigth[row])):
edge_width[row].append(weigth[row][drow])
node_size = [v * 80 for v in d.values()] #setting node size based on node centrality
edge_width = [row * 0.5 for row in weight1]
"""
Using the spring layout :
- k controls the distance between the nodes and varies between 0 and 1
- iterations is the number of times simulated annealing is run
default k=0.1 and iterations=50
"""
print("Nodes' Degree: ", nx.degree(G))
print("Nodes' Betweeness ", nx.edge_betweenness_centrality(G))
print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(G))
labels2 = {}
for idx, edge in enumerate(self.G.edges):
labels2[edge] = "s"
pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50)
nx.draw(self.G, pos_nodes, node_color=self.node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
arrowsize=10, weight=self.weight1, edge_color='gray', **options)
edge_labels = nx.get_edge_attributes(self.G, 'weight')
pos_attrs = {}
for node, coords in pos_nodes.items():
pos_attrs[node] = (coords[0], coords[1] + 0.02)
nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
nx.draw_networkx_labels(self.G, pos_attrs, labels=self.labeling, font_size=10, font_color='red')
"""
Using the spring layout :
- k controls the distance between the nodes and varies between 0 and 1
- iterations is the number of times simulated annealing is run
default k=0.1 and iterations=50
"""
labels2 = {}
for idx, edge in enumerate(G.edges):
labels2[edge] = "s"
pos_nodes=nx.spring_layout(G, k=0.25, iterations=50)
ax = plt.gca()
nx.draw(G, pos_nodes,node_color= node_adjacencies, node_size=node_size, width=2, arrowstyle='->',arrowsize=10, weight=weight1, edge_color='gray',**options)
edge_labels = nx.get_edge_attributes(G, 'weight')
pos_attrs = {}
for node, coords in pos_nodes.items():
pos_attrs[node] = (coords[0], coords[1] + 0.02)
nx.draw_networkx_edge_labels(G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
nx.draw_networkx_labels(G, pos_attrs, labels=labeling,font_size=10, font_color='red')
ax = plt.gca()
ax.collections[0].set_edgecolor("#555555")
plt.show()
ax = plt.gca()
ax.collections[0].set_edgecolor("#555555")
plt.show()
def main(self):
self.drawedges()
self.labeling()
self.findigneighbors()
self.result()
linking = SemanticLinking()
linking.main()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment