Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
b365f613
Commit
b365f613
authored
Sep 09, 2019
by
zahra
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Semantic Linking: Created Classes for semantic linking logic
parent
2caa4a25
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
843 additions
and
240 deletions
+843
-240
Created_classes_for_semantic_linking_logic.patch
...vice/app/Created_classes_for_semantic_linking_logic.patch
+602
-0
HyperGraph.py
...mantic-linking-microservice/app/initialdemo/HyperGraph.py
+92
-95
SemanticLinking.py
...c-linking-microservice/app/initialdemo/SemanticLinking.py
+149
-145
No files found.
data-hub/semantic-linking-microservice/app/Created_classes_for_semantic_linking_logic.patch
0 → 100644
View file @
b365f613
Index: data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py (date 1568037363000)
+++ data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py (date 1568038969230)
@@ -1,140 +1,137 @@
-import networkx as nx
-import matplotlib.pyplot as plt
-import pandas as pd
import json
-
-
-with open("mult_in_out.json", "r") as json_file:
- df_nodes = json.load(json_file)
-
-
nodeIds = []
-destIds= []
-clusterlabels= []
+destIds = []
+clusterlabels = []
destclusterlabel = []
-cluster= []
+cluster = []
labalvlues = []
-i = 0
+
+def classify():
+
+ with open("mult_in_out.json", "r") as json_file:
+ df_nodes = json.load(json_file)
+
-for row in df_nodes:
+ for row in df_nodes:
- for j in range(len(row['TransactionFrom'])):
- print(" Input Ids: ", row['TransactionFrom'][j])
- nodeIds.append(row['TransactionFrom'])
- print("This is nodes: ", nodeIds)
+ for j in range(len(row['TransactionFrom'])):
+ print(" Input Ids: ", row['TransactionFrom'][j])
+ nodeIds.append(row['TransactionFrom'])
+ print("This is nodes: ", nodeIds)
-for row in df_nodes:
- destIds.append(row['TransactionTo'])
+ for row in df_nodes:
+ destIds.append(row['TransactionTo'])
-for row in range(len(nodeIds)):
- print(nodeIds[row])
+ for row in range(len(nodeIds)):
+ print(nodeIds[row])
-print("Finish InputIDs")
-for row in range(len(nodeIds)):
+ print("Finish InputIDs")
+ i = 0
+ for row in range(len(nodeIds)):
- clusterlabels.append(row)
- i += 1
-print(i)
+ clusterlabels.append(row)
+ i += 1
+ print(i)
-"""" classifying Inputs"""
-"""" Labaling inputs"""
-for row in range(len(nodeIds)):
+ """" classifying Inputs"""
+ """" Labaling inputs"""
+ for row in range(len(nodeIds)):
- for rown in range(len(nodeIds[row])):
+ for rown in range(len(nodeIds[row])):
- for row1 in range(len(nodeIds)):
- for rown1 in range(len(nodeIds[row1])):
- if(nodeIds[row][rown]==nodeIds[row1][rown1]):
- # print("row: ",row,"row1: ",row1)
- if(row < row1):
- for row2 in clusterlabels:
- if( clusterlabels[row1]== clusterlabels[row2]):
- clusterlabels[row2]=clusterlabels[row]
- clusterlabels[row1] = clusterlabels[row]
+ for row1 in range(len(nodeIds)):
+ for rown1 in range(len(nodeIds[row1])):
+ if(nodeIds[row][rown]==nodeIds[row1][rown1]):
+ # print("row: ",row,"row1: ",row1)
+ if(row < row1):
+ for row2 in clusterlabels:
+ if( clusterlabels[row1]== clusterlabels[row2]):
+ clusterlabels[row2]=clusterlabels[row]
+ clusterlabels[row1] = clusterlabels[row]
- else:
- for row2 in clusterlabels:
- if (clusterlabels[row] == clusterlabels[row2]):
- clusterlabels[row2] = clusterlabels[row1]
- clusterlabels[row] = clusterlabels[row1]
+ else:
+ for row2 in clusterlabels:
+ if (clusterlabels[row] == clusterlabels[row2]):
+ clusterlabels[row2] = clusterlabels[row1]
+ clusterlabels[row] = clusterlabels[row1]
-print(clusterlabels)
-print("cluster labels:", len(clusterlabels))
-print("NodeIDs: ", len(nodeIds))
+ print(clusterlabels)
+ print("cluster labels:", len(clusterlabels))
+ print("NodeIDs: ", len(nodeIds))
-"""" Calculating the number of clusters"""
-clusternum = 1
-labalvlues.append(clusterlabels[0])
-for row in range(len(clusterlabels)):
- flag = True
- for row1 in range(len(labalvlues)):
- if(clusterlabels[row]== labalvlues[row1]):
- flag = False
+ """" Calculating the number of clusters"""
+ clusternum = 1
+ labalvlues.append(clusterlabels[0])
+ for row in range(len(clusterlabels)):
+ flag = True
+ for row1 in range(len(labalvlues)):
+ if(clusterlabels[row]== labalvlues[row1]):
+ flag = False
- if (flag):
- clusternum = + 1
- labalvlues.append(clusterlabels[row])
+ if (flag):
+ clusternum = + 1
+ labalvlues.append(clusterlabels[row])
-print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues))
+ print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues))
-"""" clustering Ids according to their labels"""
+ """" clustering Ids according to their labels"""
-for row in range(len(labalvlues)):
- cluster.append([])
- for row3 in range(len(nodeIds)):
- if (labalvlues[row] == clusterlabels[row3]):
- cluster[row].extend(nodeIds[row3])
-print("clusters: ", cluster)
+ for row in range(len(labalvlues)):
+ cluster.append([])
+ for row3 in range(len(nodeIds)):
+ if (labalvlues[row] == clusterlabels[row3]):
+ cluster[row].extend(nodeIds[row3])
+ print("clusters: ", cluster)
-""" Removing duplicating items in cluster"""
+ """ Removing duplicating items in cluster"""
-flag = True
-while(flag):
- for row in range(len(cluster)):
+ flag = True
+ while(flag):
+ for row in range(len(cluster)):
- flag= False
- for row1 in range(len(cluster[row])):
- flag= False
- for row2 in range (len(cluster[row])):
- if(row1 != row2):
- if(cluster[row][row1] == cluster[row][row2]):
- del cluster[row][row2]
- flag=True
- break
- if(flag):
- break
- if(flag):
- break
+ flag= False
+ for row1 in range(len(cluster[row])):
+ flag= False
+ for row2 in range (len(cluster[row])):
+ if(row1 != row2):
+ if(cluster[row][row1] == cluster[row][row2]):
+ del cluster[row][row2]
+ flag=True
+ break
+ if(flag):
+ break
+ if(flag):
+ break
-print("cluster:", cluster)
+ print("cluster:", cluster)
-"""" Clustering Destination Ids """
-for row in range(len(destIds)):
- destclusterlabel.append([])
- for row2 in range(len(destIds[row])):
- flag = True
- for rownum in range(len(labalvlues)):
- for row1 in range(len(cluster[rownum])):
+ """" Clustering Destination Ids """
+ for row in range(len(destIds)):
+ destclusterlabel.append([])
+ for row2 in range(len(destIds[row])):
+ flag = True
+ for rownum in range(len(labalvlues)):
+ for row1 in range(len(cluster[rownum])):
- if(destIds[row][row2]== cluster[rownum][row1]):
- destclusterlabel[row].append(labalvlues[rownum])
- flag = False
- if(flag):
- destclusterlabel.append(destIds[row][row2])
+ if(destIds[row][row2]== cluster[rownum][row1]):
+ destclusterlabel[row].append(labalvlues[rownum])
+ flag = False
+ if(flag):
+ destclusterlabel.append(destIds[row][row2])
-print("destination labels (destination Ids): ", destclusterlabel)
+ print("destination labels (destination Ids): ", destclusterlabel)
Index: data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py (date 1568037363000)
+++ data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py (date 1568040344378)
@@ -1,7 +1,7 @@
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
-import HyperGraph as hg
+import initialdemo.HyperGraph as hg
import pandas as pd
import json
import warnings
@@ -12,194 +12,198 @@
import values as values
from matplotlib import colors
-def _color_network(G):
- """Colors the network so that neighboring nodes all have distinct colors.
+class SemanticLinking:
+
+ def __init__(self):
+ hg.classify()
+
+ def _color_network(self, G):
+ """Colors the network so that neighboring nodes all have distinct colors.
- Returns a dict keyed by color to a set of nodes with that color.
- """
- coloring = dict() # color => set(node)
- colors = nx.coloring.greedy_color(G)
- for node, color in colors.items():
- if color in coloring:
- coloring[color].add(node)
- else:
- coloring[color] = set([node])
- return coloring
+ Returns a dict keyed by color to a set of nodes with that color.
+ """
+ coloring = dict() # color => set(node)
+ colors = nx.coloring.greedy_color(G)
+ for node, color in colors.items():
+ if color in coloring:
+ coloring[color].add(node)
+ else:
+ coloring[color] = set([node])
+ return coloring
-def _labeling_complete(labeling, G):
- """Determines whether or not LPA is done.
+ def _labeling_complete(self, labeling, G):
+ """Determines whether or not LPA is done.
- Label propagation is complete when all nodes have a label that is
- in the set of highest frequency labels amongst its neighbors.
+ Label propagation is complete when all nodes have a label that is
+ in the set of highest frequency labels amongst its neighbors.
- Nodes with no neighbors are considered complete.
- """
- return all(labeling[v] in _most_frequent_labels(v, labeling, G)
- for v in G if len(G[v]) > 0)
+ Nodes with no neighbors are considered complete.
+ """
+ return all(labeling[v] in self._most_frequent_labels(v, labeling, G)
+ for v in G if len(G[v]) > 0)
-def _most_frequent_labels(node, labeling, G):
- """Returns a set of all labels with maximum frequency in `labeling`.
+ def _most_frequent_labels(self, node, labeling, G):
+ """Returns a set of all labels with maximum frequency in `labeling`.
- Input `labeling` should be a dict keyed by node to labels.
- """
- if not G[node]:
- # Nodes with no neighbors are themselves a community and are labeled
- # accordingly, hence the immediate if statement.
- return {labeling[node]}
+ Input `labeling` should be a dict keyed by node to labels.
+ """
+ if not G[node]:
+ # Nodes with no neighbors are themselves a community and are labeled
+ # accordingly, hence the immediate if statement.
+ return {labeling[node]}
- # Compute the frequencies of all neighbours of node
- freqs = Counter(labeling[q] for q in G[node])
- max_freq = max(freqs.values())
- return {label for label, freq in freqs.items() if freq == max_freq}
+ # Compute the frequencies of all neighbours of node
+ freqs = Counter(labeling[q] for q in G[node])
+ max_freq = max(freqs.values())
+ return {label for label, freq in freqs.items() if freq == max_freq}
-def _update_label(node, labeling, G):
- """Updates the label of a node using the Prec-Max tie breaking algorithm
+ def _update_label(self, node, labeling, G):
+ """Updates the label of a node using the Prec-Max tie breaking algorithm
- The algorithm is explained in: 'Community Detection via Semi-Synchronous
- Label Propagation Algorithms' Cordasco and Gargano, 2011
- """
- high_labels = _most_frequent_labels(node, labeling, G)
- if len(high_labels) == 1:
- labeling[node] = high_labels.pop()
- elif len(high_labels) > 1:
- # Prec-Max
- if labeling[node] not in high_labels:
+ The algorithm is explained in: 'Community Detection via Semi-Synchronous
+ Label Propagation Algorithms' Cordasco and Gargano, 2011
+ """
+ high_labels = self._most_frequent_labels(node, labeling, G)
+ if len(high_labels) == 1:
+ labeling[node] = high_labels.pop()
+ elif len(high_labels) > 1:
+ # Prec-Max
+ if labeling[node] not in high_labels:
- labeling[node] = max(high_labels)
+ labeling[node] = max(high_labels)
-warnings.filterwarnings('ignore')
+ warnings.filterwarnings('ignore')
-#G = nx.DiGraph(directed=True)
-G = nx.MultiDiGraph(day="Stackoverflow")
+ #G = nx.DiGraph(directed=True)
+ G = nx.MultiDiGraph(day="Stackoverflow")
-df_nodes = hg.clusterlabels
-destf_nodes = hg.destclusterlabel
-color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
- 7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
- 13: '#d6dcff', 14: '#d2f5f0'}
-i=0
+ df_nodes = hg.clusterlabels
+ destf_nodes = hg.destclusterlabel
+ color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
+ 7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
+ 13: '#d6dcff', 14: '#d2f5f0'}
+ i=0
-graphedge=[]
-weigth=[]
-sourcedestination = []
-source = []
-dest = []
-edge_width = []
-weight1 = []
+ graphedge = []
+ weigth = []
+ sourcedestination = []
+ source = []
+ dest = []
+ edge_width = []
+ weight1 = []
+ node_adjacencies = []
-""""drawing edges in graph"""
+ def drawedges(self):
-for drow in range(len(df_nodes)):
- for row in range(len(destf_nodes[drow])):
- G.add_edge(df_nodes[drow], destf_nodes[drow][row])
+ """drawing edges in graph"""
+
+ for drow in range(len(self.df_nodes)):
+ for row in range(len(self.destf_nodes[drow])):
+ self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row])
-for row in range(len(hg.labalvlues)):
- for row1 in range(len(hg.labalvlues)):
- weight1.append(G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
- print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", weight1[row1])
-
-G.__setattr__('weight', weight1)
+ for row in range(len(hg.labalvlues)):
+ for row1 in range(len(hg.labalvlues)):
+ self.weight1.append(self.G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
+ print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", self.weight1[row1])
- # print(float(row['Timestamp']))
- #G.add_weighted_edges_from([(row['TransactionFrom'], row['TransactionTo'], i*j)])
+ self.G.__setattr__('weight', self.weight1)
-#print dict_pos
-
-"""label_propagation_communities(G) """
+ def labeling(self):
+ """label_propagation_communities(G) """
-coloring = _color_network(G)
- # Create a unique label for each node in the graph
-labeling = {v: k for k, v in enumerate(G)}
-print("lable value: ", labeling.values())
-while not _labeling_complete(labeling, G):
-# Update the labels of every node with the same color.
- print("lable value: ", labeling.values())
- for color, nodes in coloring.items():
- for n in nodes:
- _update_label(n, labeling, G)
- for label in set(labeling.values()):
- print("lable value: ", labeling.values())
+ coloring = self._color_network(self.G)
+ # Create a unique label for each node in the graph
+ labeling = {v: k for k, v in enumerate(self.G)}
+ print("lable value: ", labeling.values())
+ while not self._labeling_complete(labeling, self.G):
+ # Update the labels of every node with the same color.
+ print("lable value: ", labeling.values())
+ for color, nodes in coloring.items():
+ for n in nodes:
+ self._update_label(n, labeling, self.G)
+ for label in set(labeling.values()):
+ print("lable value: ", labeling.values())
-
-""" findig nodes' adjecencies"""
-node_adjacencies = []
-node_text = []
-for node, adjacencies in enumerate(G.adjacency()):
- node_adjacencies.append(len(adjacencies[1]))
- node_text.append('# of connections: '+str(len(adjacencies[1])))
+ def findigneighbors(self):
+ """ findig nodes' adjecencies"""
+ node_text = []
+ for node, adjacencies in enumerate(self.G.adjacency()):
+ self.node_adjacencies.append(len(adjacencies[1]))
+ node_text.append('# of connections: '+str(len(adjacencies[1])))
-G.color = node_adjacencies
+ self.G.color = self.node_adjacencies
-
-plt.figure(figsize=(25, 25))
-options = {
- 'with_labels': True,
- 'font_weight': 'regular',
-}
+ def result(self):
+ plt.figure(figsize=(25, 25))
+ options = {
+ 'with_labels': True,
+ 'font_weight': 'regular',
+ }
-#colors = [color_map[G.node[node][1]] for node in G]
-#sizes = [G.node[node]['Timestamp'] * 10 for node in G]
+ # colors = [color_map[G.node[node][1]] for node in G]
+ # sizes = [G.node[node]['Timestamp'] * 10 for node in G]
-
-d = nx.degree_centrality(G)
-d_list= list(d.values())
-print ("node centrality: ",d_list)
-print("node adjacencies: ", node_adjacencies)
-for row in range(len(weigth)):
- edge_width.append([])
- for drow in range(len(weigth[row])):
- edge_width[row].append(weigth[row][drow])
-node_size = [v * 80 for v in d.values()] #setting node size based on node centrality
-edge_width = [row * 0.5 for row in weight1]
-
-print("Nodes' Degree: ", nx.degree(G))
-print("Nodes' Betweeness ", nx.edge_betweenness_centrality(G))
-print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(G))
+ d = nx.degree_centrality(self.G)
+ d_list = list(d.values())
+ print("node centrality: ", d_list)
+ print("node adjacencies: ", self.node_adjacencies)
+ for row in range(len(self.weigth)):
+ self.edge_width.append([])
+ for drow in range(len(self.weigth[row])):
+ self.edge_width[row].append(self.weigth[row][drow])
+ node_size = [v * 80 for v in d.values()] # setting node size based on node centrality
+ edge_width = [row * 0.5 for row in self.weight1]
-
+ print("Nodes' Degree: ", nx.degree(self.G))
+ print("Nodes' Betweeness ", nx.edge_betweenness_centrality(self.G))
+ print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(self.G))
-"""
-Using the spring layout :
-- k controls the distance between the nodes and varies between 0 and 1
-- iterations is the number of times simulated annealing is run
-default k=0.1 and iterations=50
-"""
+ """
+ Using the spring layout :
+ - k controls the distance between the nodes and varies between 0 and 1
+ - iterations is the number of times simulated annealing is run
+ default k=0.1 and iterations=50
+ """
-labels2 = {}
+ labels2 = {}
-for idx, edge in enumerate(G.edges):
- labels2[edge] = "s"
+ for idx, edge in enumerate(self.G.edges):
+ labels2[edge] = "s"
-pos_nodes=nx.spring_layout(G, k=0.25, iterations=50)
-ax = plt.gca()
+ pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50)
-nx.draw(G, pos_nodes,node_color= node_adjacencies, node_size=node_size, width=2, arrowstyle='->',arrowsize=10, weight=weight1, edge_color='gray',**options)
-edge_labels = nx.get_edge_attributes(G, 'weight')
+ nx.draw(self.G, pos_nodes, node_color=self.node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
+ arrowsize=10, weight=self.weight1, edge_color='gray', **options)
+ edge_labels = nx.get_edge_attributes(self.G, 'weight')
-pos_attrs = {}
-for node, coords in pos_nodes.items():
- pos_attrs[node] = (coords[0], coords[1] + 0.02)
-nx.draw_networkx_edge_labels(G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
-nx.draw_networkx_labels(G, pos_attrs, labels=labeling,font_size=10, font_color='red')
-
-
+ pos_attrs = {}
+ for node, coords in pos_nodes.items():
+ pos_attrs[node] = (coords[0], coords[1] + 0.02)
+ nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
+ nx.draw_networkx_labels(self.G, pos_attrs, labels=self.labeling, font_size=10, font_color='red')
-ax = plt.gca()
-ax.collections[0].set_edgecolor("#555555")
-plt.show()
+ ax = plt.gca()
+ ax.collections[0].set_edgecolor("#555555")
+ plt.show()
+ def main(self):
+ self.drawedges()
+ self.labeling()
+ self.findigneighbors()
+ self.result()
-
+linking = SemanticLinking()
+linking.main()
\ No newline at end of file
data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py
View file @
b365f613
import
networkx
as
nx
import
matplotlib.pyplot
as
plt
import
pandas
as
pd
import
json
with
open
(
"mult_in_out.json"
,
"r"
)
as
json_file
:
df_nodes
=
json
.
load
(
json_file
)
nodeIds
=
[]
destIds
=
[]
clusterlabels
=
[]
destIds
=
[]
clusterlabels
=
[]
destclusterlabel
=
[]
cluster
=
[]
cluster
=
[]
labalvlues
=
[]
i
=
0
def
classify
():
for
row
in
df_nodes
:
with
open
(
"mult_in_out.json"
,
"r"
)
as
json_file
:
df_nodes
=
json
.
load
(
json_file
)
for
j
in
range
(
len
(
row
[
'TransactionFrom'
])):
print
(
" Input Ids: "
,
row
[
'TransactionFrom'
][
j
])
nodeIds
.
append
(
row
[
'TransactionFrom'
])
print
(
"This is nodes: "
,
nodeIds
)
for
row
in
df_nodes
:
destIds
.
append
(
row
[
'TransactionTo'
])
for
row
in
df_nodes
:
for
j
in
range
(
len
(
row
[
'TransactionFrom'
])):
print
(
" Input Ids: "
,
row
[
'TransactionFrom'
][
j
])
nodeIds
.
append
(
row
[
'TransactionFrom'
])
print
(
"This is nodes: "
,
nodeIds
)
for
row
in
range
(
len
(
nodeIds
)):
print
(
nodeIds
[
row
])
print
(
"Finish InputIDs"
)
for
row
in
range
(
len
(
nodeIds
)):
for
row
in
df_nodes
:
destIds
.
append
(
row
[
'TransactionTo'
])
clusterlabels
.
append
(
row
)
i
+=
1
print
(
i
)
"""" classifying Inputs"""
"""" Labaling inputs"""
for
row
in
range
(
len
(
nodeIds
)):
for
row
in
range
(
len
(
nodeIds
)):
print
(
nodeIds
[
row
])
for
rown
in
range
(
len
(
nodeIds
[
row
])):
print
(
"Finish InputIDs"
)
i
=
0
for
row
in
range
(
len
(
nodeIds
)):
for
row1
in
range
(
len
(
nodeIds
)):
for
rown1
in
range
(
len
(
nodeIds
[
row1
])):
if
(
nodeIds
[
row
][
rown
]
==
nodeIds
[
row1
][
rown1
]):
# print("row: ",row,"row1: ",row1)
if
(
row
<
row1
):
for
row2
in
clusterlabels
:
if
(
clusterlabels
[
row1
]
==
clusterlabels
[
row2
]):
clusterlabels
[
row2
]
=
clusterlabels
[
row
]
clusterlabels
[
row1
]
=
clusterlabels
[
row
]
clusterlabels
.
append
(
row
)
i
+=
1
print
(
i
)
else
:
for
row2
in
clusterlabels
:
if
(
clusterlabels
[
row
]
==
clusterlabels
[
row2
]):
clusterlabels
[
row2
]
=
clusterlabels
[
row1
]
clusterlabels
[
row
]
=
clusterlabels
[
row1
]
"""" classifying Inputs"""
"""" Labaling inputs"""
for
row
in
range
(
len
(
nodeIds
)):
for
rown
in
range
(
len
(
nodeIds
[
row
])):
print
(
clusterlabels
)
print
(
"cluster labels:"
,
len
(
clusterlabels
))
print
(
"NodeIDs: "
,
len
(
nodeIds
))
for
row1
in
range
(
len
(
nodeIds
)):
for
rown1
in
range
(
len
(
nodeIds
[
row1
])):
if
(
nodeIds
[
row
][
rown
]
==
nodeIds
[
row1
][
rown1
]):
# print("row: ",row,"row1: ",row1)
if
(
row
<
row1
):
for
row2
in
clusterlabels
:
if
(
clusterlabels
[
row1
]
==
clusterlabels
[
row2
]):
clusterlabels
[
row2
]
=
clusterlabels
[
row
]
clusterlabels
[
row1
]
=
clusterlabels
[
row
]
else
:
for
row2
in
clusterlabels
:
if
(
clusterlabels
[
row
]
==
clusterlabels
[
row2
]):
clusterlabels
[
row2
]
=
clusterlabels
[
row1
]
clusterlabels
[
row
]
=
clusterlabels
[
row1
]
"""" Calculating the number of clusters"""
clusternum
=
1
labalvlues
.
append
(
clusterlabels
[
0
])
for
row
in
range
(
len
(
clusterlabels
)):
flag
=
True
for
row1
in
range
(
len
(
labalvlues
)):
if
(
clusterlabels
[
row
]
==
labalvlues
[
row1
]):
flag
=
False
print
(
clusterlabels
)
print
(
"cluster labels:"
,
len
(
clusterlabels
))
print
(
"NodeIDs: "
,
len
(
nodeIds
))
if
(
flag
):
clusternum
=
+
1
labalvlues
.
append
(
clusterlabels
[
row
])
print
(
"label values (source Ids in the network): "
,
labalvlues
,
" and the number of clusters is: "
,
len
(
labalvlues
))
"""" Calculating the number of clusters"""
clusternum
=
1
labalvlues
.
append
(
clusterlabels
[
0
])
for
row
in
range
(
len
(
clusterlabels
)):
flag
=
True
for
row1
in
range
(
len
(
labalvlues
)):
if
(
clusterlabels
[
row
]
==
labalvlues
[
row1
]):
flag
=
False
if
(
flag
):
clusternum
=
+
1
labalvlues
.
append
(
clusterlabels
[
row
])
print
(
"label values (source Ids in the network): "
,
labalvlues
,
" and the number of clusters is: "
,
len
(
labalvlues
))
"""" clustering Ids according to their labels"""
"""" clustering Ids according to their labels"""
for
row
in
range
(
len
(
labalvlues
)):
cluster
.
append
([])
for
row3
in
range
(
len
(
nodeIds
)):
if
(
labalvlues
[
row
]
==
clusterlabels
[
row3
]):
cluster
[
row
]
.
extend
(
nodeIds
[
row3
])
print
(
"clusters: "
,
cluster
)
for
row
in
range
(
len
(
labalvlues
)):
cluster
.
append
([])
for
row3
in
range
(
len
(
nodeIds
)):
if
(
labalvlues
[
row
]
==
clusterlabels
[
row3
]):
cluster
[
row
]
.
extend
(
nodeIds
[
row3
])
print
(
"clusters: "
,
cluster
)
""" Removing duplicating items in cluster"""
""" Removing duplicating items in cluster"""
flag
=
True
while
(
flag
):
for
row
in
range
(
len
(
cluster
)):
flag
=
True
while
(
flag
):
for
row
in
range
(
len
(
cluster
)):
flag
=
False
for
row1
in
range
(
len
(
cluster
[
row
])):
flag
=
False
for
row2
in
range
(
len
(
cluster
[
row
])):
if
(
row1
!=
row2
):
if
(
cluster
[
row
][
row1
]
==
cluster
[
row
][
row2
]):
del
cluster
[
row
][
row2
]
flag
=
True
break
for
row1
in
range
(
len
(
cluster
[
row
])):
flag
=
False
for
row2
in
range
(
len
(
cluster
[
row
])):
if
(
row1
!=
row2
):
if
(
cluster
[
row
][
row1
]
==
cluster
[
row
][
row2
]):
del
cluster
[
row
][
row2
]
flag
=
True
break
if
(
flag
):
break
if
(
flag
):
break
if
(
flag
):
break
print
(
"cluster:"
,
cluster
)
print
(
"cluster:"
,
cluster
)
"""" Clustering Destination Ids """
for
row
in
range
(
len
(
destIds
)):
destclusterlabel
.
append
([])
for
row2
in
range
(
len
(
destIds
[
row
])):
flag
=
True
for
rownum
in
range
(
len
(
labalvlues
)):
for
row1
in
range
(
len
(
cluster
[
rownum
])):
"""" Clustering Destination Ids """
for
row
in
range
(
len
(
destIds
)):
destclusterlabel
.
append
([])
for
row2
in
range
(
len
(
destIds
[
row
])):
flag
=
True
for
rownum
in
range
(
len
(
labalvlues
)):
for
row1
in
range
(
len
(
cluster
[
rownum
])):
if
(
destIds
[
row
][
row2
]
==
cluster
[
rownum
][
row1
]):
destclusterlabel
[
row
]
.
append
(
labalvlues
[
rownum
])
flag
=
False
if
(
flag
):
destclusterlabel
.
append
(
destIds
[
row
][
row2
])
if
(
destIds
[
row
][
row2
]
==
cluster
[
rownum
][
row1
]):
destclusterlabel
[
row
]
.
append
(
labalvlues
[
rownum
])
flag
=
False
if
(
flag
):
destclusterlabel
.
append
(
destIds
[
row
][
row2
])
print
(
"destination labels (destination Ids): "
,
destclusterlabel
)
print
(
"destination labels (destination Ids): "
,
destclusterlabel
)
data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py
View file @
b365f613
import
networkx
as
nx
import
matplotlib.pyplot
as
plt
from
collections
import
Counter
import
HyperGraph
as
hg
import
initialdemo.
HyperGraph
as
hg
import
pandas
as
pd
import
json
import
warnings
...
...
@@ -12,194 +12,198 @@ import mplleaflet
import
values
as
values
from
matplotlib
import
colors
def
_color_network
(
G
):
"""Colors the network so that neighboring nodes all have distinct colors.
class
SemanticLinking
:
Returns a dict keyed by color to a set of nodes with that color.
"""
coloring
=
dict
()
# color => set(node)
colors
=
nx
.
coloring
.
greedy_color
(
G
)
for
node
,
color
in
colors
.
items
():
if
color
in
coloring
:
coloring
[
color
]
.
add
(
node
)
else
:
coloring
[
color
]
=
set
([
node
])
return
coloring
def
__init__
(
self
):
hg
.
classify
()
def
_color_network
(
self
,
G
):
"""Colors the network so that neighboring nodes all have distinct colors.
def
_labeling_complete
(
labeling
,
G
):
"""Determines whether or not LPA is done.
Returns a dict keyed by color to a set of nodes with that color.
"""
coloring
=
dict
()
# color => set(node)
colors
=
nx
.
coloring
.
greedy_color
(
G
)
for
node
,
color
in
colors
.
items
():
if
color
in
coloring
:
coloring
[
color
]
.
add
(
node
)
else
:
coloring
[
color
]
=
set
([
node
])
return
coloring
Label propagation is complete when all nodes have a label that is
in the set of highest frequency labels amongst its neighbors.
Nodes with no neighbors are considered complete.
"""
return
all
(
labeling
[
v
]
in
_most_frequent_labels
(
v
,
labeling
,
G
)
for
v
in
G
if
len
(
G
[
v
])
>
0
)
def
_labeling_complete
(
self
,
labeling
,
G
):
"""Determines whether or not LPA is done.
Label propagation is complete when all nodes have a label that is
in the set of highest frequency labels amongst its neighbors.
def
_most_frequent_labels
(
node
,
labeling
,
G
):
"""Returns a set of all labels with maximum frequency in `labeling`.
Nodes with no neighbors are considered complete.
"""
return
all
(
labeling
[
v
]
in
self
.
_most_frequent_labels
(
v
,
labeling
,
G
)
for
v
in
G
if
len
(
G
[
v
])
>
0
)
Input `labeling` should be a dict keyed by node to labels.
"""
if
not
G
[
node
]:
# Nodes with no neighbors are themselves a community and are labeled
# accordingly, hence the immediate if statement.
return
{
labeling
[
node
]}
# Compute the frequencies of all neighbours of node
freqs
=
Counter
(
labeling
[
q
]
for
q
in
G
[
node
])
max_freq
=
max
(
freqs
.
values
())
return
{
label
for
label
,
freq
in
freqs
.
items
()
if
freq
==
max_freq
}
def
_most_frequent_labels
(
self
,
node
,
labeling
,
G
):
"""Returns a set of all labels with maximum frequency in `labeling`.
Input `labeling` should be a dict keyed by node to labels.
"""
if
not
G
[
node
]:
# Nodes with no neighbors are themselves a community and are labeled
# accordingly, hence the immediate if statement.
return
{
labeling
[
node
]}
def
_update_label
(
node
,
labeling
,
G
):
"""Updates the label of a node using the Prec-Max tie breaking algorithm
# Compute the frequencies of all neighbours of node
freqs
=
Counter
(
labeling
[
q
]
for
q
in
G
[
node
])
max_freq
=
max
(
freqs
.
values
())
return
{
label
for
label
,
freq
in
freqs
.
items
()
if
freq
==
max_freq
}
The algorithm is explained in: 'Community Detection via Semi-Synchronous
Label Propagation Algorithms' Cordasco and Gargano, 2011
"""
high_labels
=
_most_frequent_labels
(
node
,
labeling
,
G
)
if
len
(
high_labels
)
==
1
:
labeling
[
node
]
=
high_labels
.
pop
()
elif
len
(
high_labels
)
>
1
:
# Prec-Max
if
labeling
[
node
]
not
in
high_labels
:
labeling
[
node
]
=
max
(
high_labels
)
def
_update_label
(
self
,
node
,
labeling
,
G
):
"""Updates the label of a node using the Prec-Max tie breaking algorithm
The algorithm is explained in: 'Community Detection via Semi-Synchronous
Label Propagation Algorithms' Cordasco and Gargano, 2011
"""
high_labels
=
self
.
_most_frequent_labels
(
node
,
labeling
,
G
)
if
len
(
high_labels
)
==
1
:
labeling
[
node
]
=
high_labels
.
pop
()
elif
len
(
high_labels
)
>
1
:
# Prec-Max
if
labeling
[
node
]
not
in
high_labels
:
warnings
.
filterwarnings
(
'ignore'
)
labeling
[
node
]
=
max
(
high_labels
)
warnings
.
filterwarnings
(
'ignore'
)
#G = nx.DiGraph(directed=True)
G
=
nx
.
MultiDiGraph
(
day
=
"Stackoverflow"
)
df_nodes
=
hg
.
clusterlabels
destf_nodes
=
hg
.
destclusterlabel
color_map
=
{
1
:
'#f09494'
,
2
:
'#eebcbc'
,
3
:
'#72bbd0'
,
4
:
'#91f0a1'
,
5
:
'#629fff'
,
6
:
'#bcc2f2'
,
7
:
'#eebcbc'
,
8
:
'#f1f0c0'
,
9
:
'#d2ffe7'
,
10
:
'#caf3a6'
,
11
:
'#ffdf55'
,
12
:
'#ef77aa'
,
13
:
'#d6dcff'
,
14
:
'#d2f5f0'
}
i
=
0
graphedge
=
[]
weigth
=
[]
sourcedestination
=
[]
source
=
[]
dest
=
[]
edge_width
=
[]
weight1
=
[]
#G = nx.DiGraph(directed=True)
G
=
nx
.
MultiDiGraph
(
day
=
"Stackoverflow"
)
df_nodes
=
hg
.
clusterlabels
destf_nodes
=
hg
.
destclusterlabel
color_map
=
{
1
:
'#f09494'
,
2
:
'#eebcbc'
,
3
:
'#72bbd0'
,
4
:
'#91f0a1'
,
5
:
'#629fff'
,
6
:
'#bcc2f2'
,
7
:
'#eebcbc'
,
8
:
'#f1f0c0'
,
9
:
'#d2ffe7'
,
10
:
'#caf3a6'
,
11
:
'#ffdf55'
,
12
:
'#ef77aa'
,
13
:
'#d6dcff'
,
14
:
'#d2f5f0'
}
i
=
0
""""drawing edges in graph"""
graphedge
=
[]
weigth
=
[]
sourcedestination
=
[]
source
=
[]
dest
=
[]
edge_width
=
[]
weight1
=
[]
for
drow
in
range
(
len
(
df_nodes
)):
for
row
in
range
(
len
(
destf_nodes
[
drow
])):
G
.
add_edge
(
df_nodes
[
drow
],
destf_nodes
[
drow
][
row
])
node_adjacencies
=
[]
for
row
in
range
(
len
(
hg
.
labalvlues
)):
for
row1
in
range
(
len
(
hg
.
labalvlues
)):
weight1
.
append
(
G
.
number_of_edges
(
hg
.
labalvlues
[
row
],
hg
.
labalvlues
[
row1
]))
print
(
"The number of coccurance from node "
,
hg
.
labalvlues
[
row
],
"to node "
,
hg
.
labalvlues
[
row1
],
": "
,
weight1
[
row1
])
def
drawedges
(
self
):
G
.
__setattr__
(
'weight'
,
weight1
)
"""drawing edges in graph"""
# print(float(row['Timestamp']))
#G.add_weighted_edges_from([(row['TransactionFrom'], row['TransactionTo'], i*j)])
for
drow
in
range
(
len
(
self
.
df_nodes
)):
for
row
in
range
(
len
(
self
.
destf_nodes
[
drow
])):
self
.
G
.
add_edge
(
self
.
df_nodes
[
drow
],
self
.
destf_nodes
[
drow
][
row
])
#print dict_pos
for
row
in
range
(
len
(
hg
.
labalvlues
)):
for
row1
in
range
(
len
(
hg
.
labalvlues
)):
self
.
weight1
.
append
(
self
.
G
.
number_of_edges
(
hg
.
labalvlues
[
row
],
hg
.
labalvlues
[
row1
]))
print
(
"The number of coccurance from node "
,
hg
.
labalvlues
[
row
],
"to node "
,
hg
.
labalvlues
[
row1
],
": "
,
self
.
weight1
[
row1
])
"""label_propagation_communities(G) """
self
.
G
.
__setattr__
(
'weight'
,
self
.
weight1
)
def
labeling
(
self
):
"""label_propagation_communities(G) """
coloring
=
_color_network
(
G
)
# Create a unique label for each node in the graph
labeling
=
{
v
:
k
for
k
,
v
in
enumerate
(
G
)}
print
(
"lable value: "
,
labeling
.
values
())
while
not
_labeling_complete
(
labeling
,
G
):
# Update the labels of every node with the same color.
print
(
"lable value: "
,
labeling
.
values
())
for
color
,
nodes
in
coloring
.
items
():
for
n
in
nodes
:
_update_label
(
n
,
labeling
,
G
)
for
label
in
set
(
labeling
.
values
()):
print
(
"lable value: "
,
labeling
.
values
())
coloring
=
self
.
_color_network
(
self
.
G
)
# Create a unique label for each node in the graph
labeling
=
{
v
:
k
for
k
,
v
in
enumerate
(
self
.
G
)}
print
(
"lable value: "
,
labeling
.
values
())
while
not
self
.
_labeling_complete
(
labeling
,
self
.
G
):
# Update the labels of every node with the same color.
print
(
"lable value: "
,
labeling
.
values
())
for
color
,
nodes
in
coloring
.
items
():
for
n
in
nodes
:
self
.
_update_label
(
n
,
labeling
,
self
.
G
)
for
label
in
set
(
labeling
.
values
()):
print
(
"lable value: "
,
labeling
.
values
())
""" findig nodes' adjecencies"""
node_adjacencies
=
[]
node_text
=
[]
for
node
,
adjacencies
in
enumerate
(
G
.
adjacency
()):
node_adjacencies
.
append
(
len
(
adjacencies
[
1
]))
node_text
.
append
(
'# of connections: '
+
str
(
len
(
adjacencies
[
1
])))
def
findigneighbors
(
self
):
""" findig nodes' adjecencies"""
node_text
=
[]
for
node
,
adjacencies
in
enumerate
(
self
.
G
.
adjacency
()):
self
.
node_adjacencies
.
append
(
len
(
adjacencies
[
1
]))
node_text
.
append
(
'# of connections: '
+
str
(
len
(
adjacencies
[
1
])))
G
.
color
=
node_adjacencies
self
.
G
.
color
=
self
.
node_adjacencies
def
result
(
self
):
plt
.
figure
(
figsize
=
(
25
,
25
))
options
=
{
'with_labels'
:
True
,
'font_weight'
:
'regular'
,
}
plt
.
figure
(
figsize
=
(
25
,
25
))
options
=
{
'with_labels'
:
True
,
'font_weight'
:
'regular'
,
}
# colors = [color_map[G.node[node][1]] for node in G]
# sizes = [G.node[node]['Timestamp'] * 10 for node in G]
#colors = [color_map[G.node[node][1]] for node in G]
#sizes = [G.node[node]['Timestamp'] * 10 for node in G]
d
=
nx
.
degree_centrality
(
self
.
G
)
d_list
=
list
(
d
.
values
())
print
(
"node centrality: "
,
d_list
)
print
(
"node adjacencies: "
,
self
.
node_adjacencies
)
for
row
in
range
(
len
(
self
.
weigth
)):
self
.
edge_width
.
append
([])
for
drow
in
range
(
len
(
self
.
weigth
[
row
])):
self
.
edge_width
[
row
]
.
append
(
self
.
weigth
[
row
][
drow
])
node_size
=
[
v
*
80
for
v
in
d
.
values
()]
# setting node size based on node centrality
edge_width
=
[
row
*
0.5
for
row
in
self
.
weight1
]
print
(
"Nodes' Degree: "
,
nx
.
degree
(
self
.
G
))
print
(
"Nodes' Betweeness "
,
nx
.
edge_betweenness_centrality
(
self
.
G
))
print
(
"Nodes' Betweeness-centrality: "
,
nx
.
betweenness_centrality
(
self
.
G
))
d
=
nx
.
degree_centrality
(
G
)
d_list
=
list
(
d
.
values
())
print
(
"node centrality: "
,
d_list
)
print
(
"node adjacencies: "
,
node_adjacencies
)
for
row
in
range
(
len
(
weigth
)):
edge_width
.
append
([])
for
drow
in
range
(
len
(
weigth
[
row
])):
edge_width
[
row
]
.
append
(
weigth
[
row
][
drow
])
node_size
=
[
v
*
80
for
v
in
d
.
values
()]
#setting node size based on node centrality
edge_width
=
[
row
*
0.5
for
row
in
weight1
]
"""
Using the spring layout :
- k controls the distance between the nodes and varies between 0 and 1
- iterations is the number of times simulated annealing is run
default k=0.1 and iterations=50
"""
print
(
"Nodes' Degree: "
,
nx
.
degree
(
G
))
print
(
"Nodes' Betweeness "
,
nx
.
edge_betweenness_centrality
(
G
))
print
(
"Nodes' Betweeness-centrality: "
,
nx
.
betweenness_centrality
(
G
))
labels2
=
{}
for
idx
,
edge
in
enumerate
(
self
.
G
.
edges
):
labels2
[
edge
]
=
"s"
pos_nodes
=
nx
.
spring_layout
(
self
.
G
,
k
=
0.25
,
iterations
=
50
)
nx
.
draw
(
self
.
G
,
pos_nodes
,
node_color
=
self
.
node_adjacencies
,
node_size
=
node_size
,
width
=
2
,
arrowstyle
=
'->'
,
arrowsize
=
10
,
weight
=
self
.
weight1
,
edge_color
=
'gray'
,
**
options
)
edge_labels
=
nx
.
get_edge_attributes
(
self
.
G
,
'weight'
)
pos_attrs
=
{}
for
node
,
coords
in
pos_nodes
.
items
():
pos_attrs
[
node
]
=
(
coords
[
0
],
coords
[
1
]
+
0.02
)
nx
.
draw_networkx_edge_labels
(
self
.
G
,
pos_nodes
,
edge_labels
=
edge_labels
,
font_size
=
10
,
font_color
=
'red'
)
nx
.
draw_networkx_labels
(
self
.
G
,
pos_attrs
,
labels
=
self
.
labeling
,
font_size
=
10
,
font_color
=
'red'
)
"""
Using the spring layout :
- k controls the distance between the nodes and varies between 0 and 1
- iterations is the number of times simulated annealing is run
default k=0.1 and iterations=50
"""
labels2
=
{}
for
idx
,
edge
in
enumerate
(
G
.
edges
):
labels2
[
edge
]
=
"s"
pos_nodes
=
nx
.
spring_layout
(
G
,
k
=
0.25
,
iterations
=
50
)
ax
=
plt
.
gca
()
nx
.
draw
(
G
,
pos_nodes
,
node_color
=
node_adjacencies
,
node_size
=
node_size
,
width
=
2
,
arrowstyle
=
'->'
,
arrowsize
=
10
,
weight
=
weight1
,
edge_color
=
'gray'
,
**
options
)
edge_labels
=
nx
.
get_edge_attributes
(
G
,
'weight'
)
pos_attrs
=
{}
for
node
,
coords
in
pos_nodes
.
items
():
pos_attrs
[
node
]
=
(
coords
[
0
],
coords
[
1
]
+
0.02
)
nx
.
draw_networkx_edge_labels
(
G
,
pos_nodes
,
edge_labels
=
edge_labels
,
font_size
=
10
,
font_color
=
'red'
)
nx
.
draw_networkx_labels
(
G
,
pos_attrs
,
labels
=
labeling
,
font_size
=
10
,
font_color
=
'red'
)
ax
=
plt
.
gca
()
ax
.
collections
[
0
]
.
set_edgecolor
(
"#555555"
)
plt
.
show
()
ax
=
plt
.
gca
()
ax
.
collections
[
0
]
.
set_edgecolor
(
"#555555"
)
plt
.
show
()
def
main
(
self
):
self
.
drawedges
()
self
.
labeling
()
self
.
findigneighbors
()
self
.
result
()
linking
=
SemanticLinking
()
linking
.
main
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment