Commit 48c4791d authored by Alexander Lercher's avatar Alexander Lercher

Merge branch 'feature/semantic-linking-refactoring' into develop

parents 5f88eba2 b08e6606
Pipeline #10 failed with stages
...@@ -28,3 +28,39 @@ paths: ...@@ -28,3 +28,39 @@ paths:
responses: responses:
200: 200:
description: "Successful echo of request data" description: "Successful echo of request data"
/graphinfo:
get:
operationId: "rest.graphinfo.get"
tags:
- "GraphInfo"
summary: "Get info about clustered nodes"
description: "Returns multiple metrics for all nodes created by analyzing and clustering the blockchain traces"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/NodeInfo"
definitions:
NodeInfo:
type: "object"
properties:
label:
type: string
centrality:
type: number
adjacencies:
type: integer
degree:
type: number
betweenness:
type: object
properties:
to_node:
type: integer
value:
type: number
betweenness_centrality:
type: number
\ No newline at end of file
import json import json
nodeIds = [] class HyperGraph:
destIds = []
clusterlabels = []
destclusterlabel = []
cluster = []
labalvlues = []
def classify(): cluster_labels = []
df_nodes = load_values() dest_cluster_labels = []
label_values = []
for row in df_nodes: def __init__(self):
pass
def classify(self):
df_nodes = self.load_values()
ret_val = self.init(df_nodes)
nodeIds = ret_val['nodeIds']
clusterlabels = ret_val['clusterlabels']
destIds = ret_val['destIds']
clusterlabels = self.classify_input(nodeIds, clusterlabels)
labelvals = self.calc_cluster_num(clusterlabels)
cluster = self.cluster_with_labels(nodeIds, clusterlabels, labelvals)
cluster = self.remove_duplicates(cluster)
destclusterlabel = self.cluster_dest_ids(labelvals, cluster, destIds)
self.cluster_labels = clusterlabels
self.dest_cluster_labels = destclusterlabel
self.labelvals = labelvals
def load_values(self):
with open("mult_in_out_large.json", "r") as json_file:
df_nodes = json.load(json_file)
return df_nodes
def init(self, df_nodes):
nodeIds = []
clusterlabels = []
destIds = []
for row in df_nodes:
for j in range(len(row['TransactionFrom'])): for j in range(len(row['TransactionFrom'])):
print(" Input Ids: ", row['TransactionFrom'][j]) print(" Input Ids: ", row['TransactionFrom'][j])
nodeIds.append(row['TransactionFrom']) nodeIds.append(row['TransactionFrom'])
print("This is nodes: ", nodeIds) print("This is nodes: ", nodeIds)
for row in df_nodes: for row in df_nodes:
destIds.append(row['TransactionTo']) destIds.append(row['TransactionTo'])
for row in range(len(nodeIds)): for row in range(len(nodeIds)):
print(nodeIds[row]) print(nodeIds[row])
print("Finish InputIDs") print("Finish InputIDs")
i = 0 i = 0
for row in range(len(nodeIds)): for row in range(len(nodeIds)):
clusterlabels.append(row) clusterlabels.append(row)
i += 1 i += 1
print(i) print(i)
return {'nodeIds': nodeIds,
'clusterlabels': clusterlabels,
'destIds': destIds}
def classify_input(self, nodeIds, clusterlabels):
"""" classifying Inputs""" """" classifying Inputs"""
"""" Labaling inputs""" """" Labaling inputs"""
for row in range(len(nodeIds)): for row in range(len(nodeIds)):
...@@ -55,42 +88,43 @@ def classify(): ...@@ -55,42 +88,43 @@ def classify():
clusterlabels[row2] = clusterlabels[row1] clusterlabels[row2] = clusterlabels[row1]
clusterlabels[row] = clusterlabels[row1] clusterlabels[row] = clusterlabels[row1]
print(clusterlabels) print(clusterlabels)
print("cluster labels:", len(clusterlabels)) print("cluster labels:", len(clusterlabels))
print("NodeIDs: ", len(nodeIds)) print("NodeIDs: ", len(nodeIds))
return clusterlabels
def calc_cluster_num(self, clusterlabels):
"""" Calculating the number of clusters""" """" Calculating the number of clusters"""
clusternum = 1 labelvals = []
labalvlues.append(clusterlabels[0])
labelvals.append(clusterlabels[0])
for row in range(len(clusterlabels)): for row in range(len(clusterlabels)):
flag = True flag = True
for row1 in range(len(labalvlues)): for row1 in range(len(labelvals)):
if(clusterlabels[row]== labalvlues[row1]): if(clusterlabels[row]== labelvals[row1]):
flag = False flag = False
if (flag): if (flag):
clusternum = + 1 labelvals.append(clusterlabels[row])
labalvlues.append(clusterlabels[row])
print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues))
print("label values (source Ids in the network): ", labelvals, " and the number of clusters is: ", len(labelvals))
return labelvals
def cluster_with_labels(self, nodeIds, clusterlabels, labelvals):
"""" clustering Ids according to their labels""" """" clustering Ids according to their labels"""
cluster = []
for row in range(len(labalvlues)): for row in range(len(labelvals)):
cluster.append([]) cluster.append([])
for row3 in range(len(nodeIds)): for row3 in range(len(nodeIds)):
if (labalvlues[row] == clusterlabels[row3]): if (labelvals[row] == clusterlabels[row3]):
cluster[row].extend(nodeIds[row3]) cluster[row].extend(nodeIds[row3])
print("clusters: ", cluster) print("clusters: ", cluster)
return cluster
def remove_duplicates(self, cluster):
""" Removing duplicating items in cluster""" """ Removing duplicating items in cluster"""
flag = True flag = True
...@@ -113,27 +147,25 @@ def classify(): ...@@ -113,27 +147,25 @@ def classify():
print("cluster:", cluster) print("cluster:", cluster)
return cluster
def cluster_dest_ids(self, labelvals, cluster, destIds):
"""" Clustering Destination Ids """ """" Clustering Destination Ids """
destclusterlabel = []
for row in range(len(destIds)): for row in range(len(destIds)):
destclusterlabel.append([]) destclusterlabel.append([])
for row2 in range(len(destIds[row])): for row2 in range(len(destIds[row])):
flag = True flag = True
for rownum in range(len(labalvlues)): for rownum in range(len(labelvals)):
for row1 in range(len(cluster[rownum])): for row1 in range(len(cluster[rownum])):
if(destIds[row][row2]== cluster[rownum][row1]): if(destIds[row][row2]== cluster[rownum][row1]):
destclusterlabel[row].append(labalvlues[rownum]) destclusterlabel[row].append(labelvals[rownum])
flag = False flag = False
if(flag): if(flag):
destclusterlabel.append(destIds[row][row2]) destclusterlabel.append(destIds[row][row2])
print("destination labels (destination Ids): ", destclusterlabel) print("destination labels (destination Ids): ", destclusterlabel)
def load_values(): return destclusterlabel
with open("mult_in_out_large.json", "r") as json_file: \ No newline at end of file
df_nodes = json.load(json_file)
return df_nodes
\ No newline at end of file
class NodeInfo:
'''Contains information about the individual nodes in the generated graph'''
label = None
centrality = None
adjacencies = None
degree = None
betweenness = None
betweenness_centrality = None
def __init__(self):
self.label = 'Node123'
self.centrality = 0
self.adjacencies = 0
self.degree = 0
self.betweenness = None
self.betweenness_centrality = 0
import networkx as nx import networkx as nx
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from collections import Counter from collections import Counter
import HyperGraph as hg from HyperGraph import HyperGraph
import warnings import warnings
# pip install networkx # pip install networkx
...@@ -13,8 +13,28 @@ import warnings ...@@ -13,8 +13,28 @@ import warnings
class SemanticLinking: class SemanticLinking:
hg: HyperGraph = None
df_nodes = []
destf_nodes = []
G: nx.MultiDiGraph = None
color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
13: '#d6dcff', 14: '#d2f5f0'}
def __init__(self): def __init__(self):
hg.classify() warnings.filterwarnings('ignore')
# init HyperGraph
self.hg = HyperGraph()
self.hg.classify()
self.df_nodes = self.hg.cluster_labels
self.destf_nodes = self.hg.dest_cluster_labels
# init visual graph
self.G = nx.MultiDiGraph(day="Stackoverflow")
def _color_network(self, G): def _color_network(self, G):
"""Colors the network so that neighboring nodes all have distinct colors. """Colors the network so that neighboring nodes all have distinct colors.
...@@ -30,7 +50,6 @@ class SemanticLinking: ...@@ -30,7 +50,6 @@ class SemanticLinking:
coloring[color] = set([node]) coloring[color] = set([node])
return coloring return coloring
def _labeling_complete(self, labeling, G): def _labeling_complete(self, labeling, G):
"""Determines whether or not LPA is done. """Determines whether or not LPA is done.
...@@ -42,7 +61,6 @@ class SemanticLinking: ...@@ -42,7 +61,6 @@ class SemanticLinking:
return all(labeling[v] in self._most_frequent_labels(v, labeling, G) return all(labeling[v] in self._most_frequent_labels(v, labeling, G)
for v in G if len(G[v]) > 0) for v in G if len(G[v]) > 0)
def _most_frequent_labels(self, node, labeling, G): def _most_frequent_labels(self, node, labeling, G):
"""Returns a set of all labels with maximum frequency in `labeling`. """Returns a set of all labels with maximum frequency in `labeling`.
...@@ -58,7 +76,6 @@ class SemanticLinking: ...@@ -58,7 +76,6 @@ class SemanticLinking:
max_freq = max(freqs.values()) max_freq = max(freqs.values())
return {label for label, freq in freqs.items() if freq == max_freq} return {label for label, freq in freqs.items() if freq == max_freq}
def _update_label(self, node, labeling, G): def _update_label(self, node, labeling, G):
"""Updates the label of a node using the Prec-Max tie breaking algorithm """Updates the label of a node using the Prec-Max tie breaking algorithm
...@@ -71,55 +88,28 @@ class SemanticLinking: ...@@ -71,55 +88,28 @@ class SemanticLinking:
elif len(high_labels) > 1: elif len(high_labels) > 1:
# Prec-Max # Prec-Max
if labeling[node] not in high_labels: if labeling[node] not in high_labels:
labeling[node] = max(high_labels) labeling[node] = max(high_labels)
warnings.filterwarnings('ignore')
#G = nx.DiGraph(directed=True)
G = nx.MultiDiGraph(day="Stackoverflow")
df_nodes = hg.clusterlabels
destf_nodes = hg.destclusterlabel
color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
13: '#d6dcff', 14: '#d2f5f0'}
i=0
graphedge = []
weigth = []
sourcedestination = []
source = []
dest = []
edge_width = []
weight1 = []
node_adjacencies = []
labeling = {}
def drawedges(self): def drawedges(self):
"""drawing edges in graph""" """drawing edges in graph"""
labelvalues = self.hg.label_values
weight1 = []
for drow in range(len(self.df_nodes)): for drow in range(len(self.df_nodes)):
for row in range(len(self.destf_nodes[drow])): for row in range(len(self.destf_nodes[drow])):
self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row]) self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row])
for row in range(len(hg.labalvlues)): for row in range(len(labelvalues)):
for row1 in range(len(hg.labalvlues)): for row1 in range(len(labelvalues)):
self.weight1.append(self.G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1])) weight1.append(self.G.number_of_edges(labelvalues[row], labelvalues[row1]))
print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", self.weight1[row1]) print("The number of coccurance from node ", labelvalues[row],"to node ", labelvalues[row1], ": ", weight1[row1])
self.G.__setattr__('weight', self.weight1) self.G.weight = weight1
return weight1
def dolabeling(self): def dolabeling(self):
"""label_propagation_communities(G) """ """label_propagation_communities(G) """
coloring = self._color_network(self.G) coloring = self._color_network(self.G)
# Create a unique label for each node in the graph # Create a unique label for each node in the graph
labeling = {v: k for k, v in enumerate(self.G)} labeling = {v: k for k, v in enumerate(self.G)}
...@@ -132,24 +122,26 @@ class SemanticLinking: ...@@ -132,24 +122,26 @@ class SemanticLinking:
self._update_label(n, labeling, self.G) self._update_label(n, labeling, self.G)
for label in set(labeling.values()): for label in set(labeling.values()):
print("lable value: ", labeling.values()) print("lable value: ", labeling.values())
self.labeling = labeling
return labeling
def findigneighbors(self): def findigneighbors(self):
""" findig nodes' adjecencies""" """ findig nodes' adjecencies"""
node_text = [] node_text = []
node_adjacencies = []
for node, adjacencies in enumerate(self.G.adjacency()): for node, adjacencies in enumerate(self.G.adjacency()):
self.node_adjacencies.append(len(adjacencies[1])) node_adjacencies.append(len(adjacencies[1]))
node_text.append('# of connections: '+str(len(adjacencies[1]))) node_text.append('# of connections: '+str(len(adjacencies[1])))
self.G.color = self.node_adjacencies self.G.color = node_adjacencies
return node_adjacencies
def print_metrics(self, weight1, labeling, node_adjacencies):
weigth = []
edge_width = []
def result(self):
plt.figure(figsize=(25, 25)) plt.figure(figsize=(25, 25))
options = {
'with_labels': True,
'font_weight': 'regular',
}
# colors = [color_map[G.node[node][1]] for node in G] # colors = [color_map[G.node[node][1]] for node in G]
# sizes = [G.node[node]['Timestamp'] * 10 for node in G] # sizes = [G.node[node]['Timestamp'] * 10 for node in G]
...@@ -157,18 +149,19 @@ class SemanticLinking: ...@@ -157,18 +149,19 @@ class SemanticLinking:
d = nx.degree_centrality(self.G) d = nx.degree_centrality(self.G)
d_list = list(d.values()) d_list = list(d.values())
print("node centrality: ", d_list) print("node centrality: ", d_list)
print("node adjacencies: ", self.node_adjacencies) print("node adjacencies: ", node_adjacencies)
for row in range(len(self.weigth)): for row in range(len(weigth)):
self.edge_width.append([]) edge_width.append([])
for drow in range(len(self.weigth[row])): for drow in range(len(weigth[row])):
self.edge_width[row].append(self.weigth[row][drow]) edge_width[row].append(weigth[row][drow])
node_size = [v * 80 for v in d.values()] # setting node size based on node centrality
edge_width = [row * 0.5 for row in self.weight1] edge_width = [row * 0.5 for row in weight1]
print("Nodes' Degree: ", nx.degree(self.G)) print("Nodes' Degree: ", nx.degree(self.G))
print("Nodes' Betweeness ", nx.edge_betweenness_centrality(self.G)) print("Nodes' Betweeness ", nx.edge_betweenness_centrality(self.G))
print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(self.G)) print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(self.G))
def draw_edges(self, weight1, labeling, node_adjacencies):
""" """
Using the spring layout : Using the spring layout :
- k controls the distance between the nodes and varies between 0 and 1 - k controls the distance between the nodes and varies between 0 and 1
...@@ -177,15 +170,21 @@ class SemanticLinking: ...@@ -177,15 +170,21 @@ class SemanticLinking:
""" """
labels2 = {} labels2 = {}
options = {
'with_labels': True,
'font_weight': 'regular',
}
d = nx.degree_centrality(self.G)
node_size = [v * 80 for v in d.values()] # setting node size based on node centrality
for idx, edge in enumerate(self.G.edges): for idx, edge in enumerate(self.G.edges):
labels2[edge] = "s" labels2[edge] = "s"
pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50) pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50)
nx.draw(self.G, pos_nodes, node_color=node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
nx.draw(self.G, pos_nodes, node_color=self.node_adjacencies, node_size=node_size, width=2, arrowstyle='->', arrowsize=10, weight=weight1, edge_color='gray', **options)
arrowsize=10, weight=self.weight1, edge_color='gray', **options)
edge_labels = nx.get_edge_attributes(self.G, 'weight') edge_labels = nx.get_edge_attributes(self.G, 'weight')
...@@ -193,18 +192,23 @@ class SemanticLinking: ...@@ -193,18 +192,23 @@ class SemanticLinking:
for node, coords in pos_nodes.items(): for node, coords in pos_nodes.items():
pos_attrs[node] = (coords[0], coords[1] + 0.02) pos_attrs[node] = (coords[0], coords[1] + 0.02)
nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red') nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
nx.draw_networkx_labels(self.G, pos_attrs, labels=self.labeling, font_size=10, font_color='red') nx.draw_networkx_labels(self.G, pos_attrs, labels=labeling, font_size=10, font_color='red')
ax = plt.gca() ax = plt.gca()
ax.collections[0].set_edgecolor("#555555") ax.collections[0].set_edgecolor("#555555")
plt.show() plt.show()
def main(self): def main(self):
self.drawedges() weight1 = self.drawedges()
self.dolabeling()
self.findigneighbors() labeling = self.dolabeling()
self.result()
node_adjacencies = self.findigneighbors()
self.print_metrics(weight1, labeling, node_adjacencies)
self.draw_edges(weight1, labeling, node_adjacencies)
linking = SemanticLinking() if __name__ == '__main__':
linking.main() linking = SemanticLinking()
\ No newline at end of file linking.main()
\ No newline at end of file
from flask import request, Response
from initialdemo.NodeInfo import NodeInfo
import pickle as json
def get():
# TODO return real graph infos
ni = NodeInfo()
return [ni.__dict__]
# add modules folder to interpreter path
import sys
import os
modules_paths = ['../app/', '../../../modules/']
for path in modules_paths:
if os.path.exists(path):
sys.path.insert(1, path)
print(f"added {path}")
import unittest
import manage_sys_paths
import json
from initialdemo.HyperGraph import HyperGraph
class Test_HyperGraph(unittest.TestCase):
hypergraph: HyperGraph = None
def setUp(self):
self.hypergraph = HyperGraph()
def test_removeDuplicates_noDupOrdered_sameContent(self):
list_ = [[1,2,3]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual(list_, set_)
def test_removeDuplicates_oneDupOrdered_removed(self):
list_ = [[1,2,3,3]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,3]], set_)
def test_removeDuplicates_multDupOrdered_allRemoved(self):
list_ = [[1,1,2,3,3,4]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,3,4]], set_)
def test_removeDuplicates_noDupUnordered_sameContent(self):
list_ = [[1,2,3,5,9,4,30,15]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual(list_, set_)
def test_removeDuplicates_oneDupUnordered_removed(self):
list_ = [[1,2,3,5,9,4,30,5,15]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,3,5,9,4,30,15]], set_)
def test_removeDuplicates_multDupUnordered_allRemoved(self):
list_ = [[1,2,5,3,1,70,25,-1,7,-1]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,5,3,70,25,-1,7]], set_)
def test_removeDuplicates_oneDupOrderedMultDim_removed(self):
list_ = [[1,1,2],[2,2,3]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2],[2,3]], set_)
def test_removeDuplicates_multDupOrderedMultDim_allRemoved(self):
list_ = [[1,1,2,3,3],[2,2,3,4,4,5]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,3],[2,3,4,5]], set_)
def test_removeDuplicates_multDupUnorderedMultDim_allRemoved(self):
list_ = [[1,2,5,2,7,3],[-10,5,3,20,-10,-7]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,5,7,3],[-10,5,3,20,-7]], set_)
def test_removeDuplicates_multDupUnorderedMultDim2_allRemoved(self):
list_ = [[1,2,5,2,7,3],[-10,5,3,20,-10,-7],[1,2]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,5,7,3],[-10,5,3,20,-7],[1,2]], set_)
def test_removeDuplicates_multDupUnorderedTripleDim_noDupRemoved(self):
list_ = [[[1,2,5,2,7,3],[-10,5,3,20,-10,-7],[1,2]]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual(list_, set_)
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment