Commit 48c4791d authored by Alexander Lercher's avatar Alexander Lercher

Merge branch 'feature/semantic-linking-refactoring' into develop

parents 5f88eba2 b08e6606
Pipeline #10 failed with stages
......@@ -28,3 +28,39 @@ paths:
responses:
200:
description: "Successful echo of request data"
/graphinfo:
get:
operationId: "rest.graphinfo.get"
tags:
- "GraphInfo"
summary: "Get info about clustered nodes"
description: "Returns multiple metrics for all nodes created by analyzing and clustering the blockchain traces"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/NodeInfo"
definitions:
NodeInfo:
type: "object"
properties:
label:
type: string
centrality:
type: number
adjacencies:
type: integer
degree:
type: number
betweenness:
type: object
properties:
to_node:
type: integer
value:
type: number
betweenness_centrality:
type: number
\ No newline at end of file
import json
nodeIds = []
destIds = []
clusterlabels = []
destclusterlabel = []
cluster = []
labalvlues = []
class HyperGraph:
def classify():
df_nodes = load_values()
cluster_labels = []
dest_cluster_labels = []
label_values = []
for row in df_nodes:
def __init__(self):
pass
for j in range(len(row['TransactionFrom'])):
print(" Input Ids: ", row['TransactionFrom'][j])
nodeIds.append(row['TransactionFrom'])
print("This is nodes: ", nodeIds)
def classify(self):
df_nodes = self.load_values()
ret_val = self.init(df_nodes)
nodeIds = ret_val['nodeIds']
clusterlabels = ret_val['clusterlabels']
destIds = ret_val['destIds']
clusterlabels = self.classify_input(nodeIds, clusterlabels)
for row in df_nodes:
destIds.append(row['TransactionTo'])
labelvals = self.calc_cluster_num(clusterlabels)
cluster = self.cluster_with_labels(nodeIds, clusterlabels, labelvals)
for row in range(len(nodeIds)):
print(nodeIds[row])
cluster = self.remove_duplicates(cluster)
print("Finish InputIDs")
i = 0
for row in range(len(nodeIds)):
destclusterlabel = self.cluster_dest_ids(labelvals, cluster, destIds)
clusterlabels.append(row)
i += 1
print(i)
self.cluster_labels = clusterlabels
self.dest_cluster_labels = destclusterlabel
self.labelvals = labelvals
"""" classifying Inputs"""
"""" Labaling inputs"""
for row in range(len(nodeIds)):
def load_values(self):
with open("mult_in_out_large.json", "r") as json_file:
df_nodes = json.load(json_file)
for rown in range(len(nodeIds[row])):
return df_nodes
for row1 in range(len(nodeIds)):
for rown1 in range(len(nodeIds[row1])):
if(nodeIds[row][rown]==nodeIds[row1][rown1]):
# print("row: ",row,"row1: ",row1)
if(row < row1):
for row2 in clusterlabels:
if( clusterlabels[row1]== clusterlabels[row2]):
clusterlabels[row2]=clusterlabels[row]
clusterlabels[row1] = clusterlabels[row]
def init(self, df_nodes):
nodeIds = []
clusterlabels = []
destIds = []
else:
for row2 in clusterlabels:
if (clusterlabels[row] == clusterlabels[row2]):
clusterlabels[row2] = clusterlabels[row1]
clusterlabels[row] = clusterlabels[row1]
for row in df_nodes:
for j in range(len(row['TransactionFrom'])):
print(" Input Ids: ", row['TransactionFrom'][j])
nodeIds.append(row['TransactionFrom'])
print("This is nodes: ", nodeIds)
for row in df_nodes:
destIds.append(row['TransactionTo'])
print(clusterlabels)
print("cluster labels:", len(clusterlabels))
print("NodeIDs: ", len(nodeIds))
"""" Calculating the number of clusters"""
clusternum = 1
labalvlues.append(clusterlabels[0])
for row in range(len(clusterlabels)):
flag = True
for row1 in range(len(labalvlues)):
if(clusterlabels[row]== labalvlues[row1]):
flag = False
if (flag):
clusternum = + 1
labalvlues.append(clusterlabels[row])
print("label values (source Ids in the network): ", labalvlues, " and the number of clusters is: ", len(labalvlues))
for row in range(len(nodeIds)):
print(nodeIds[row])
print("Finish InputIDs")
i = 0
for row in range(len(nodeIds)):
clusterlabels.append(row)
i += 1
print(i)
return {'nodeIds': nodeIds,
'clusterlabels': clusterlabels,
'destIds': destIds}
def classify_input(self, nodeIds, clusterlabels):
"""" classifying Inputs"""
"""" Labaling inputs"""
for row in range(len(nodeIds)):
"""" clustering Ids according to their labels"""
for rown in range(len(nodeIds[row])):
for row in range(len(labalvlues)):
cluster.append([])
for row3 in range(len(nodeIds)):
if (labalvlues[row] == clusterlabels[row3]):
cluster[row].extend(nodeIds[row3])
print("clusters: ", cluster)
for row1 in range(len(nodeIds)):
for rown1 in range(len(nodeIds[row1])):
if(nodeIds[row][rown]==nodeIds[row1][rown1]):
# print("row: ",row,"row1: ",row1)
if(row < row1):
for row2 in clusterlabels:
if( clusterlabels[row1]== clusterlabels[row2]):
clusterlabels[row2]=clusterlabels[row]
clusterlabels[row1] = clusterlabels[row]
else:
for row2 in clusterlabels:
if (clusterlabels[row] == clusterlabels[row2]):
clusterlabels[row2] = clusterlabels[row1]
clusterlabels[row] = clusterlabels[row1]
print(clusterlabels)
print("cluster labels:", len(clusterlabels))
print("NodeIDs: ", len(nodeIds))
return clusterlabels
""" Removing duplicating items in cluster"""
def calc_cluster_num(self, clusterlabels):
"""" Calculating the number of clusters"""
labelvals = []
flag = True
while(flag):
for row in range(len(cluster)):
flag= False
for row1 in range(len(cluster[row])):
flag= False
for row2 in range (len(cluster[row])):
if(row1 != row2):
if(cluster[row][row1] == cluster[row][row2]):
del cluster[row][row2]
flag=True
break
if(flag):
break
if(flag):
break
labelvals.append(clusterlabels[0])
for row in range(len(clusterlabels)):
flag = True
for row1 in range(len(labelvals)):
if(clusterlabels[row]== labelvals[row1]):
flag = False
print("cluster:", cluster)
if (flag):
labelvals.append(clusterlabels[row])
print("label values (source Ids in the network): ", labelvals, " and the number of clusters is: ", len(labelvals))
return labelvals
def cluster_with_labels(self, nodeIds, clusterlabels, labelvals):
"""" clustering Ids according to their labels"""
cluster = []
"""" Clustering Destination Ids """
for row in range(len(destIds)):
destclusterlabel.append([])
for row2 in range(len(destIds[row])):
flag = True
for rownum in range(len(labalvlues)):
for row1 in range(len(cluster[rownum])):
for row in range(len(labelvals)):
cluster.append([])
for row3 in range(len(nodeIds)):
if (labelvals[row] == clusterlabels[row3]):
cluster[row].extend(nodeIds[row3])
print("clusters: ", cluster)
if(destIds[row][row2]== cluster[rownum][row1]):
destclusterlabel[row].append(labalvlues[rownum])
flag = False
if(flag):
destclusterlabel.append(destIds[row][row2])
return cluster
print("destination labels (destination Ids): ", destclusterlabel)
def remove_duplicates(self, cluster):
""" Removing duplicating items in cluster"""
def load_values():
with open("mult_in_out_large.json", "r") as json_file:
df_nodes = json.load(json_file)
return df_nodes
\ No newline at end of file
flag = True
while(flag):
for row in range(len(cluster)):
flag= False
for row1 in range(len(cluster[row])):
flag= False
for row2 in range (len(cluster[row])):
if(row1 != row2):
if(cluster[row][row1] == cluster[row][row2]):
del cluster[row][row2]
flag=True
break
if(flag):
break
if(flag):
break
print("cluster:", cluster)
return cluster
def cluster_dest_ids(self, labelvals, cluster, destIds):
"""" Clustering Destination Ids """
destclusterlabel = []
for row in range(len(destIds)):
destclusterlabel.append([])
for row2 in range(len(destIds[row])):
flag = True
for rownum in range(len(labelvals)):
for row1 in range(len(cluster[rownum])):
if(destIds[row][row2]== cluster[rownum][row1]):
destclusterlabel[row].append(labelvals[rownum])
flag = False
if(flag):
destclusterlabel.append(destIds[row][row2])
print("destination labels (destination Ids): ", destclusterlabel)
return destclusterlabel
\ No newline at end of file
class NodeInfo:
'''Contains information about the individual nodes in the generated graph'''
label = None
centrality = None
adjacencies = None
degree = None
betweenness = None
betweenness_centrality = None
def __init__(self):
self.label = 'Node123'
self.centrality = 0
self.adjacencies = 0
self.degree = 0
self.betweenness = None
self.betweenness_centrality = 0
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
import HyperGraph as hg
from HyperGraph import HyperGraph
import warnings
# pip install networkx
......@@ -13,9 +13,29 @@ import warnings
class SemanticLinking:
def __init__(self):
hg.classify()
hg: HyperGraph = None
df_nodes = []
destf_nodes = []
G: nx.MultiDiGraph = None
color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
13: '#d6dcff', 14: '#d2f5f0'}
def __init__(self):
warnings.filterwarnings('ignore')
# init HyperGraph
self.hg = HyperGraph()
self.hg.classify()
self.df_nodes = self.hg.cluster_labels
self.destf_nodes = self.hg.dest_cluster_labels
# init visual graph
self.G = nx.MultiDiGraph(day="Stackoverflow")
def _color_network(self, G):
"""Colors the network so that neighboring nodes all have distinct colors.
......@@ -30,7 +50,6 @@ class SemanticLinking:
coloring[color] = set([node])
return coloring
def _labeling_complete(self, labeling, G):
"""Determines whether or not LPA is done.
......@@ -42,7 +61,6 @@ class SemanticLinking:
return all(labeling[v] in self._most_frequent_labels(v, labeling, G)
for v in G if len(G[v]) > 0)
def _most_frequent_labels(self, node, labeling, G):
"""Returns a set of all labels with maximum frequency in `labeling`.
......@@ -58,7 +76,6 @@ class SemanticLinking:
max_freq = max(freqs.values())
return {label for label, freq in freqs.items() if freq == max_freq}
def _update_label(self, node, labeling, G):
"""Updates the label of a node using the Prec-Max tie breaking algorithm
......@@ -71,57 +88,30 @@ class SemanticLinking:
elif len(high_labels) > 1:
# Prec-Max
if labeling[node] not in high_labels:
labeling[node] = max(high_labels)
warnings.filterwarnings('ignore')
#G = nx.DiGraph(directed=True)
G = nx.MultiDiGraph(day="Stackoverflow")
df_nodes = hg.clusterlabels
destf_nodes = hg.destclusterlabel
color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
13: '#d6dcff', 14: '#d2f5f0'}
i=0
graphedge = []
weigth = []
sourcedestination = []
source = []
dest = []
edge_width = []
weight1 = []
node_adjacencies = []
labeling = {}
labeling[node] = max(high_labels)
def drawedges(self):
"""drawing edges in graph"""
labelvalues = self.hg.label_values
weight1 = []
for drow in range(len(self.df_nodes)):
for row in range(len(self.destf_nodes[drow])):
self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row])
for row in range(len(hg.labalvlues)):
for row1 in range(len(hg.labalvlues)):
self.weight1.append(self.G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", self.weight1[row1])
for row in range(len(labelvalues)):
for row1 in range(len(labelvalues)):
weight1.append(self.G.number_of_edges(labelvalues[row], labelvalues[row1]))
print("The number of coccurance from node ", labelvalues[row],"to node ", labelvalues[row1], ": ", weight1[row1])
self.G.__setattr__('weight', self.weight1)
self.G.weight = weight1
return weight1
def dolabeling(self):
"""label_propagation_communities(G) """
coloring = self._color_network(self.G)
# Create a unique label for each node in the graph
# Create a unique label for each node in the graph
labeling = {v: k for k, v in enumerate(self.G)}
print("lable value: ", labeling.values())
while not self._labeling_complete(labeling, self.G):
......@@ -132,43 +122,46 @@ class SemanticLinking:
self._update_label(n, labeling, self.G)
for label in set(labeling.values()):
print("lable value: ", labeling.values())
self.labeling = labeling
return labeling
def findigneighbors(self):
""" findig nodes' adjecencies"""
node_text = []
node_adjacencies = []
for node, adjacencies in enumerate(self.G.adjacency()):
self.node_adjacencies.append(len(adjacencies[1]))
node_adjacencies.append(len(adjacencies[1]))
node_text.append('# of connections: '+str(len(adjacencies[1])))
self.G.color = self.node_adjacencies
self.G.color = node_adjacencies
return node_adjacencies
def result(self):
plt.figure(figsize=(25, 25))
options = {
'with_labels': True,
'font_weight': 'regular',
}
def print_metrics(self, weight1, labeling, node_adjacencies):
weigth = []
edge_width = []
plt.figure(figsize=(25, 25))
# colors = [color_map[G.node[node][1]] for node in G]
# sizes = [G.node[node]['Timestamp'] * 10 for node in G]
d = nx.degree_centrality(self.G)
d_list = list(d.values())
print("node centrality: ", d_list)
print("node adjacencies: ", self.node_adjacencies)
for row in range(len(self.weigth)):
self.edge_width.append([])
for drow in range(len(self.weigth[row])):
self.edge_width[row].append(self.weigth[row][drow])
node_size = [v * 80 for v in d.values()] # setting node size based on node centrality
edge_width = [row * 0.5 for row in self.weight1]
print("node adjacencies: ", node_adjacencies)
for row in range(len(weigth)):
edge_width.append([])
for drow in range(len(weigth[row])):
edge_width[row].append(weigth[row][drow])
edge_width = [row * 0.5 for row in weight1]
print("Nodes' Degree: ", nx.degree(self.G))
print("Nodes' Betweeness ", nx.edge_betweenness_centrality(self.G))
print("Nodes' Betweeness-centrality: ", nx.betweenness_centrality(self.G))
def draw_edges(self, weight1, labeling, node_adjacencies):
"""
Using the spring layout :
- k controls the distance between the nodes and varies between 0 and 1
......@@ -176,16 +169,22 @@ class SemanticLinking:
default k=0.1 and iterations=50
"""
labels2 = {}
labels2 = {}
options = {
'with_labels': True,
'font_weight': 'regular',
}
d = nx.degree_centrality(self.G)
node_size = [v * 80 for v in d.values()] # setting node size based on node centrality
for idx, edge in enumerate(self.G.edges):
labels2[edge] = "s"
pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50)
nx.draw(self.G, pos_nodes, node_color=self.node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
arrowsize=10, weight=self.weight1, edge_color='gray', **options)
nx.draw(self.G, pos_nodes, node_color=node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
arrowsize=10, weight=weight1, edge_color='gray', **options)
edge_labels = nx.get_edge_attributes(self.G, 'weight')
......@@ -193,18 +192,23 @@ class SemanticLinking:
for node, coords in pos_nodes.items():
pos_attrs[node] = (coords[0], coords[1] + 0.02)
nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
nx.draw_networkx_labels(self.G, pos_attrs, labels=self.labeling, font_size=10, font_color='red')
nx.draw_networkx_labels(self.G, pos_attrs, labels=labeling, font_size=10, font_color='red')
ax = plt.gca()
ax.collections[0].set_edgecolor("#555555")
plt.show()
def main(self):
self.drawedges()
self.dolabeling()
self.findigneighbors()
self.result()
weight1 = self.drawedges()
labeling = self.dolabeling()
node_adjacencies = self.findigneighbors()
self.print_metrics(weight1, labeling, node_adjacencies)
self.draw_edges(weight1, labeling, node_adjacencies)
linking = SemanticLinking()
linking.main()
\ No newline at end of file
if __name__ == '__main__':
linking = SemanticLinking()
linking.main()
\ No newline at end of file
from flask import request, Response
from initialdemo.NodeInfo import NodeInfo
import pickle as json
def get():
# TODO return real graph infos
ni = NodeInfo()
return [ni.__dict__]
# add modules folder to interpreter path
import sys
import os
modules_paths = ['../app/', '../../../modules/']
for path in modules_paths:
if os.path.exists(path):
sys.path.insert(1, path)
print(f"added {path}")
import unittest
import manage_sys_paths
import json
from initialdemo.HyperGraph import HyperGraph
class Test_HyperGraph(unittest.TestCase):
hypergraph: HyperGraph = None
def setUp(self):
self.hypergraph = HyperGraph()
def test_removeDuplicates_noDupOrdered_sameContent(self):
list_ = [[1,2,3]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual(list_, set_)
def test_removeDuplicates_oneDupOrdered_removed(self):
list_ = [[1,2,3,3]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,3]], set_)
def test_removeDuplicates_multDupOrdered_allRemoved(self):
list_ = [[1,1,2,3,3,4]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,3,4]], set_)
def test_removeDuplicates_noDupUnordered_sameContent(self):
list_ = [[1,2,3,5,9,4,30,15]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual(list_, set_)
def test_removeDuplicates_oneDupUnordered_removed(self):
list_ = [[1,2,3,5,9,4,30,5,15]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,3,5,9,4,30,15]], set_)
def test_removeDuplicates_multDupUnordered_allRemoved(self):
list_ = [[1,2,5,3,1,70,25,-1,7,-1]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,5,3,70,25,-1,7]], set_)
def test_removeDuplicates_oneDupOrderedMultDim_removed(self):
list_ = [[1,1,2],[2,2,3]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2],[2,3]], set_)
def test_removeDuplicates_multDupOrderedMultDim_allRemoved(self):
list_ = [[1,1,2,3,3],[2,2,3,4,4,5]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,3],[2,3,4,5]], set_)
def test_removeDuplicates_multDupUnorderedMultDim_allRemoved(self):
list_ = [[1,2,5,2,7,3],[-10,5,3,20,-10,-7]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,5,7,3],[-10,5,3,20,-7]], set_)
def test_removeDuplicates_multDupUnorderedMultDim2_allRemoved(self):
list_ = [[1,2,5,2,7,3],[-10,5,3,20,-10,-7],[1,2]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual([[1,2,5,7,3],[-10,5,3,20,-7],[1,2]], set_)
def test_removeDuplicates_multDupUnorderedTripleDim_noDupRemoved(self):
list_ = [[[1,2,5,2,7,3],[-10,5,3,20,-10,-7],[1,2]]]
set_ = self.hypergraph.remove_duplicates(list_)
self.assertEqual(list_, set_)
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment