Commit 48c4791d authored by Alexander Lercher's avatar Alexander Lercher

Merge branch 'feature/semantic-linking-refactoring' into develop

parents 5f88eba2 b08e6606
Pipeline #10 failed with stages
...@@ -28,3 +28,39 @@ paths: ...@@ -28,3 +28,39 @@ paths:
responses: responses:
200: 200:
description: "Successful echo of request data" description: "Successful echo of request data"
/graphinfo:
get:
operationId: "rest.graphinfo.get"
tags:
- "GraphInfo"
summary: "Get info about clustered nodes"
description: "Returns multiple metrics for all nodes created by analyzing and clustering the blockchain traces"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/NodeInfo"
definitions:
NodeInfo:
type: "object"
properties:
label:
type: string
centrality:
type: number
adjacencies:
type: integer
degree:
type: number
betweenness:
type: object
properties:
to_node:
type: integer
value:
type: number
betweenness_centrality:
type: number
\ No newline at end of file
import json import json
nodeIds = [] class HyperGraph:
destIds = []
clusterlabels = []
destclusterlabel = []
cluster = []
labalvlues = []
def classify(): cluster_labels = []
df_nodes = load_values() dest_cluster_labels = []
label_values = []
for row in df_nodes: def __init__(self):
pass
for j in range(len(row['TransactionFrom'])): def classify(self):
print(" Input Ids: ", row['TransactionFrom'][j]) df_nodes = self.load_values()
nodeIds.append(row['TransactionFrom'])
print("This is nodes: ", nodeIds)
ret_val = self.init(df_nodes)
nodeIds = ret_val['nodeIds']
clusterlabels = ret_val['clusterlabels']
destIds = ret_val['destIds']
clusterlabels = self.classify_input(nodeIds, clusterlabels)
for row in df_nodes: labelvals = self.calc_cluster_num(clusterlabels)
destIds.append(row['TransactionTo'])
cluster = self.cluster_with_labels(nodeIds, clusterlabels, labelvals)
for row in range(len(nodeIds)): cluster = self.remove_duplicates(cluster)
print(nodeIds[row])
print("Finish InputIDs") destclusterlabel = self.cluster_dest_ids(labelvals, cluster, destIds)
i = 0
for row in range(len(nodeIds)):
clusterlabels.append(row) self.cluster_labels = clusterlabels
i += 1 self.dest_cluster_labels = destclusterlabel
print(i) self.labelvals = labelvals
"""" classifying Inputs""" def load_values(self):
"""" Labaling inputs""" with open("mult_in_out_large.json", "r") as json_file:
for row in range(len(nodeIds)): df_nodes = json.load(json_file)
for rown in range(len(nodeIds[row])): return df_nodes
for row1 in range(len(nodeIds)): def init(self, df_nodes):
for rown1 in range(len(nodeIds[row1])): nodeIds = []
if(nodeIds[row][rown]==nodeIds[row1][rown1]): clusterlabels = []
# print("row: ",row,"row1: ",row1) destIds = []
if(row < row1):
for row2 in clusterlabels:
if( clusterlabels[row1]== clusterlabels[row2]):
clusterlabels[row2]=clusterlabels[row]
clusterlabels[row1] = clusterlabels[row]
else: for row in df_nodes:
for row2 in clusterlabels: for j in range(len(row['TransactionFrom'])):
if (clusterlabels[row] == clusterlabels[row2]): print(" Input Ids: ", row['TransactionFrom'][j])
clusterlabels[row2] = clusterlabels[row1] nodeIds.append(row['TransactionFrom'])
clusterlabels[row] = clusterlabels[row1] print("This is nodes: ", nodeIds)
for row in df_nodes:
destIds.append(row['TransactionTo'])
print(clusterlabels) for row in range(len(nodeIds)):
print("cluster labels:", len(clusterlabels)) print(nodeIds[row])
print("NodeIDs: ", len(nodeIds))
"""" Calculating the number of clusters"""
clusternum = 1
labalvlues.append(clusterlabels[0])
for row in range(len(clusterlabels)):
flag = True
for row1 in range(len(labalvlues)):
if(clusterlabels[row]== labalvlues[row1]):
flag = False
if (flag):
clusternum = + 1
labalvlues.append(clusterlabels[row])
print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues))
print("Finish InputIDs")
i = 0
for row in range(len(nodeIds)):
clusterlabels.append(row)
i += 1
print(i)
return {'nodeIds': nodeIds,
'clusterlabels': clusterlabels,
'destIds': destIds}
def classify_input(self, nodeIds, clusterlabels):
"""" classifying Inputs"""
"""" Labaling inputs"""
for row in range(len(nodeIds)):
"""" clustering Ids according to their labels""" for rown in range(len(nodeIds[row])):
for row in range(len(labalvlues)): for row1 in range(len(nodeIds)):
cluster.append([]) for rown1 in range(len(nodeIds[row1])):
for row3 in range(len(nodeIds)): if(nodeIds[row][rown]==nodeIds[row1][rown1]):
if (labalvlues[row] == clusterlabels[row3]): # print("row: ",row,"row1: ",row1)
cluster[row].extend(nodeIds[row3]) if(row < row1):
print("clusters: ", cluster) for row2 in clusterlabels:
if( clusterlabels[row1]== clusterlabels[row2]):
clusterlabels[row2]=clusterlabels[row]
clusterlabels[row1] = clusterlabels[row]
else:
for row2 in clusterlabels:
if (clusterlabels[row] == clusterlabels[row2]):
clusterlabels[row2] = clusterlabels[row1]
clusterlabels[row] = clusterlabels[row1]
print(clusterlabels)
print("cluster labels:", len(clusterlabels))
print("NodeIDs: ", len(nodeIds))
return clusterlabels
""" Removing duplicating items in cluster""" def calc_cluster_num(self, clusterlabels):
"""" Calculating the number of clusters"""
labelvals = []
flag = True labelvals.append(clusterlabels[0])
while(flag): for row in range(len(clusterlabels)):
for row in range(len(cluster)): flag = True
for row1 in range(len(labelvals)):
flag= False if(clusterlabels[row]== labelvals[row1]):
for row1 in range(len(cluster[row])): flag = False
flag= False
for row2 in range (len(cluster[row])):
if(row1 != row2):
if(cluster[row][row1] == cluster[row][row2]):
del cluster[row][row2]
flag=True
break
if(flag):
break
if(flag):
break
print("cluster:", cluster) if (flag):
labelvals.append(clusterlabels[row])
print("label values (source Ids in the network): ", labelvals, " and the number of clusters is: ", len(labelvals))
return labelvals
def cluster_with_labels(self, nodeIds, clusterlabels, labelvals):
"""" clustering Ids according to their labels"""
cluster = []
"""" Clustering Destination Ids """ for row in range(len(labelvals)):
for row in range(len(destIds)): cluster.append([])
destclusterlabel.append([]) for row3 in range(len(nodeIds)):
for row2 in range(len(destIds[row])): if (labelvals[row] == clusterlabels[row3]):
flag = True cluster[row].extend(nodeIds[row3])
for rownum in range(len(labalvlues)): print("clusters: ", cluster)
for row1 in range(len(cluster[rownum])):
if(destIds[row][row2]== cluster[rownum][row1]): return cluster
destclusterlabel[row].append(labalvlues[rownum])
flag = False
if(flag):
destclusterlabel.append(destIds[row][row2])
print("destination labels (destination Ids): ", destclusterlabel) def remove_duplicates(self, cluster):
""" Removing duplicating items in cluster"""
def load_values(): flag = True
with open("mult_in_out_large.json", "r") as json_file: while(flag):
df_nodes = json.load(json_file) for row in range(len(cluster)):
return df_nodes flag= False
\ No newline at end of file for row1 in range(len(cluster[row])):
flag= False
for row2 in range (len(cluster[row])):
if(row1 != row2):
if(cluster[row][row1] == cluster[row][row2]):
del cluster[row][row2]
flag=True
break
if(flag):
break
if(flag):
break
print("cluster:", cluster)
return cluster
def cluster_dest_ids(self, labelvals, cluster, destIds):
"""" Clustering Destination Ids """
destclusterlabel = []
for row in range(len(destIds)):
destclusterlabel.append([])
for row2 in range(len(destIds[row])):
flag = True
for rownum in range(len(labelvals)):
for row1 in range(len(cluster[rownum])):
if(destIds[row][row2]== cluster[rownum][row1]):
destclusterlabel[row].append(labelvals[rownum])
flag = False
if(flag):
destclusterlabel.append(destIds[row][row2])
print("destination labels (destination Ids): ", destclusterlabel)
return destclusterlabel
\ No newline at end of file
class NodeInfo:
'''Contains information about the individual nodes in the generated graph'''
label = None
centrality = None
adjacencies = None
degree = None
betweenness = None
betweenness_centrality = None
def __init__(self):
self.label = 'Node123'
self.centrality = 0
self.adjacencies = 0
self.degree = 0
self.betweenness = None
self.betweenness_centrality = 0
import networkx as nx import networkx as nx
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from collections import Counter from collections import Counter
import HyperGraph as hg from HyperGraph import HyperGraph
import warnings import warnings
# pip install networkx # pip install networkx
...@@ -13,9 +13,29 @@ import warnings ...@@ -13,9 +13,29 @@ import warnings
class SemanticLinking: class SemanticLinking:
def __init__(self): hg: HyperGraph = None
hg.classify() df_nodes = []
destf_nodes = []
G: nx.MultiDiGraph = None
color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
13: '#d6dcff', 14: '#d2f5f0'}
def __init__(self):
warnings.filterwarnings('ignore')
# init HyperGraph
self.hg = HyperGraph()
self.hg.classify()
self.df_nodes = self.hg.cluster_labels
self.destf_nodes = self.hg.dest_cluster_labels
# init visual graph
self.G = nx.MultiDiGraph(day="Stackoverflow")
def _color_network(self, G): def _color_network(self, G):
"""Colors the network so that neighboring nodes all have distinct colors. """Colors the network so that neighboring nodes all have distinct colors.
...@@ -30,7 +50,6 @@ class SemanticLinking: ...@@ -30,7 +50,6 @@ class SemanticLinking:
coloring[color] = set([node]) coloring[color] = set([node])
return coloring return coloring
def _labeling_complete(self, labeling, G): def _labeling_complete(self, labeling, G):
"""Determines whether or not LPA is done. """Determines whether or not LPA is done.
...@@ -42,7 +61,6 @@ class SemanticLinking: ...@@ -42,7 +61,6 @@ class SemanticLinking:
return all(labeling[v] in self._most_frequent_labels(v, labeling, G) return all(labeling[v] in self._most_frequent_labels(v, labeling, G)
for v in G if len(G[v]) > 0) for v in G if len(G[v]) > 0)
def _most_frequent_labels(self, node, labeling, G): def _most_frequent_labels(self, node, labeling, G):
"""Returns a set of all labels with maximum frequency in `labeling`. """Returns a set of all labels with maximum frequency in `labeling`.
...@@ -58,7 +76,6 @@ class SemanticLinking: ...@@ -58,7 +76,6 @@ class SemanticLinking:
max_freq = max(freqs.values()) max_freq = max(freqs.values())
return {label for label, freq in freqs.items() if freq == max_freq} return {label for label, freq in freqs.items() if freq == max_freq}
def _update_label(self, node, labeling, G): def _update_label(self, node, labeling, G):
"""Updates the label of a node using the Prec-Max tie breaking algorithm """Updates the label of a node using the Prec-Max tie breaking algorithm
...@@ -71,57 +88,30 @@ class SemanticLinking: ...@@ -71,57 +88,30 @@ class SemanticLinking:
elif len(high_labels) > 1: elif len(high_labels) > 1:
# Prec-Max # Prec-Max
if labeling[node] not in high_labels: if labeling[node] not in high_labels:
labeling[node] = max(high_labels)
labeling[node] = max(high_labels)
warnings.filterwarnings('ignore')
#G = nx.DiGraph(directed=True)
G = nx.MultiDiGraph(day="Stackoverflow")
df_nodes = hg.clusterlabels
destf_nodes = hg.destclusterlabel
color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
13: '#d6dcff', 14: '#d2f5f0'}
i=0
graphedge = []
weigth = []
sourcedestination = []
source = []
dest = []
edge_width = []
weight1 = []
node_adjacencies = []
labeling = {}
def drawedges(self): def drawedges(self):
"""drawing edges in graph""" """drawing edges in graph"""
labelvalues = self.hg.label_values
weight1 = []
for drow in range(len(self.df_nodes)): for drow in range(len(self.df_nodes)):
for row in range(len(self.destf_nodes[drow])): for row in range(len(self.destf_nodes[drow])):
self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row]) self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row])
for row in range(len(hg.labalvlues)): for row in range(len(labelvalues)):
for row1 in range(len(hg.labalvlues)): for row1 in range(len(labelvalues)):
self.weight1.append(self.G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1])) weight1.append(self.G.number_of_edges(labelvalues[row], labelvalues[row1]))
print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", self.weight1[row1]) print("The number of coccurance from node ", labelvalues[row],"to node ", labelvalues[row1], ": ", weight1[row1])
self.G.__setattr__('weight', self.weight1) self.G.weight = weight1
return weight1
def dolabeling(self): def dolabeling(self):
"""label_propagation_communities(G) """ """label_propagation_communities(G) """
coloring = self._color_network(self.G) coloring = self._color_network(self.G)
# Create a unique label for each node in the graph # Create a unique label for each node in the graph
labeling = {v: k for k, v in enumerate(self.G)} labeling = {v: k for k, v in enumerate(self.G)}
print("lable value: ", labeling.values()) print("lable value: ", labeling.values())
while not self._labeling_complete(labeling, self.G): while not self._labeling_complete(labeling, self.G):
...@@ -132,43 +122,46 @@ class SemanticLinking: ...@@ -132,43 +122,46 @@ class SemanticLinking:
self._update_label(n, labeling, self.G) self._update_label(n, labeling, self.G)
for label in set(labeling.values()): for label in set(labeling.values()):
print("lable value: ", labeling.values()) print("lable value: ", labeling.values())
self.labeling = labeling
return labeling
def findigneighbors(self): def findigneighbors(self):
""" findig nodes' adjecencies""" """ findig nodes' adjecencies"""
node_text = [] node_text = []
node_adjacencies = []
for node, adjacencies in enumerate(self.G.adjacency()): for node, adjacencies in enumerate(self.G.adjacency()):
self.node_adjacencies.append(len(adjacencies[1])) node_adjacencies.append(len(adjacencies[1]))
node_text.append('# of connections: '+str(len(adjacencies[1]))) node_text.append('# of connections: '+str(len(adjacencies[1])))
self.G.color = self.node_adjacencies self.G.color = node_adjacencies
return node_adjacencies
def result(self): def print_metrics(self, weight1, labeling, node_adjacencies):
plt.figure(figsize=(25, 25)) weigth = []
options = { edge_width = []
'with_labels': True,
'font_weight': 'regular',
}
plt.figure(figsize=(25, 25))
# colors = [color_map[G.node[node][1]] for node in G] # colors = [color_map[G.node[node][1]] for node in G]
# sizes = [G.node[node]['Timestamp'] * 10 for node in G] # sizes = [G.node[node]['Timestamp'] * 10 for node in G]
d = nx.degree_centrality(self.G) d = nx.degree_centrality(self.G)
d_list = list(d.values()) d_list = list(d.values())
print("node centrality: ", d_list) print("node centrality: ", d_list)
print("node adjacencies: ", self.node_adjacencies) print("node adjacencies: ", node_adjacencies)
for row in range(len(self.weigth)): for row in range(len(weigth)):
self.edge_width.append([]) edge_width.append([])
for drow in range(len(self.weigth[row])): for drow in range(len(weigth[row])):
self.edge_width[row].append(self.weigth[row][drow]) edge_width[row].append(weigth[row][drow])
node_size = [v * 80 for v in d.values()] # setting node size based on node centrality
edge_width = [row * 0.5 for row in self.weight1] edge_width = [row * 0.5 for row in weight1]
print("Nodes' Degree: ", nx.degree(self.G)) print("Nodes' Degree: ", nx.degree(self.G))
print("Nodes' Betweeness ", nx.edge_betweenness_centrality(self.G)) print("Nodes' Betweeness ", nx.edge_betweenness_centrality(self.G))
print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(self.G)) print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(self.G))
def draw_edges(self, weight1, labeling, node_adjacencies):
""" """
Using the spring layout : Using the spring layout :
- k controls the distance between the nodes and varies between 0 and 1 - k controls the distance between the nodes and varies between 0 and 1
...@@ -176,16 +169,22 @@ class SemanticLinking: ...@@ -176,16 +169,22 @@ class SemanticLinking:
default k=0.1 and iterations=50 default k=0.1 and iterations=50
""" """
labels2 = {} labels2 = {}
options = {
'with_labels': True,
'font_weight': 'regular',
}
d = nx.degree_centrality(self.G)
node_size = [v * 80 for v in d.values()] # setting node size based on node centrality
for idx, edge in enumerate(self.G.edges): for idx, edge in enumerate(self.G.edges):
labels2[edge] = "s" labels2[edge] = "s"
pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50) pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50)
nx.draw(self.G, pos_nodes, node_color=node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
nx.draw(self.G, pos_nodes, node_color=self.node_adjacencies, node_size=node_size, width=2, arrowstyle='->', arrowsize=10, weight=weight1, edge_color='gray', **options)
arrowsize=10, weight=self.weight1, edge_color='gray', **options)
edge_labels = nx.get_edge_attributes(self.G, 'weight') edge_labels = nx.get_edge_attributes(self.G, 'weight')
...@@ -193,18 +192,23 @@ class SemanticLinking: ...@@ -193,18 +192,23 @@ class SemanticLinking:
for node, coords in pos_nodes.items(): for node, coords in pos_nodes.items():
pos_attrs[node] = (coords[0], coords[1] + 0.02) pos_attrs[node] = (coords[0], coords[1] + 0.02)
nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red') nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
nx.draw_networkx_labels(self.G, pos_attrs, labels=self.labeling, font_size=10, font_color='red') nx.draw_networkx_labels(self.G, pos_attrs, labels=labeling, font_size=10, font_color='red')
ax = plt.gca() ax = plt.gca()
ax.collections[0].set_edgecolor("#555555") ax.collections[0].set_edgecolor("#555555")
plt.show() plt.show()
def main(self): def main(self):
self.drawedges() weight1 = self.drawedges()
self.dolabeling()
self.findigneighbors() labeling = self.dolabeling()
self.result()
node_adjacencies = self.findigneighbors()
self.print_metrics(weight1, labeling, node_adjacencies)
self.draw_edges(weight1, labeling, node_adjacencies)
linking = SemanticLinking() if __name__ == '__main__':
linking.main() linking = SemanticLinking()
\ No newline at end of file linking.main()
\ No newline at end of file
from flask import request, Response
from initialdemo.NodeInfo import NodeInfo
import pickle as json
def get():
# TODO return real graph infos
ni = NodeInfo()
return [ni.__dict__]
# add modules folder to interpreter path
import sys
import os
modules_paths = ['../app/', '../../../modules/']
for path in modules_paths:
if os.path.exists(path):
sys.path.insert(1, path)
print(f"added {path}")
import unittest
import manage_sys_paths
import json
from initialdemo.HyperGraph import HyperGraph
class Test_HyperGraph(unittest.TestCase):
hypergraph: HyperGraph = None
def setUp(self):
self.hypergraph = HyperGraph()
def test_removeDuplicates_noDupOrdered_sameContent(self):
list_ = [[1,2,3]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual(list_, set_)
def test_removeDuplicates_oneDupOrdered_removed(self):
list_ = [[1,2,3,3]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,3]], set_)
def test_removeDuplicates_multDupOrdered_allRemoved(self):
list_ = [[1,1,2,3,3,4]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,3,4]], set_)
def test_removeDuplicates_noDupUnordered_sameContent(self):
list_ = [[1,2,3,5,9,4,30,15]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual(list_, set_)
def test_removeDuplicates_oneDupUnordered_removed(self):
list_ = [[1,2,3,5,9,4,30,5,15]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,3,5,9,4,30,15]], set_)
def test_removeDuplicates_multDupUnordered_allRemoved(self):
list_ = [[1,2,5,3,1,70,25,-1,7,-1]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,5,3,70,25,-1,7]], set_)
def test_removeDuplicates_oneDupOrderedMultDim_removed(self):
list_ = [[1,1,2],[2,2,3]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2],[2,3]], set_)
def test_removeDuplicates_multDupOrderedMultDim_allRemoved(self):
list_ = [[1,1,2,3,3],[2,2,3,4,4,5]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,3],[2,3,4,5]], set_)
def test_removeDuplicates_multDupUnorderedMultDim_allRemoved(self):
list_ = [[1,2,5,2,7,3],[-10,5,3,20,-10,-7]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,5,7,3],[-10,5,3,20,-7]], set_)
def test_removeDuplicates_multDupUnorderedMultDim2_allRemoved(self):
list_ = [[1,2,5,2,7,3],[-10,5,3,20,-10,-7],[1,2]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,5,7,3],[-10,5,3,20,-7],[1,2]], set_)
def test_removeDuplicates_multDupUnorderedTripleDim_noDupRemoved(self):
list_ = [[[1,2,5,2,7,3],[-10,5,3,20,-10,-7],[1,2]]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual(list_, set_)
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment