Semantic linking was added.

2caa4a25 · zahra · 2cae7884 · 2caa4a25 · 2caa4a25 · 2caa4a25
Commit 2caa4a25 authored Sep 09, 2019 by zahra
5 changed files
--- a/.gitignore
+++ b/.gitignore
 **/__pycache__
 **/.vscode
-*.log
\ No newline at end of file
+**/.idea
+*.log
--- a/data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py
+++ b/data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py
+import networkx as nx
+import matplotlib.pyplot as plt
+import pandas as pd
+import json
+
+
+
+with open("mult_in_out.json", "r") as json_file:
+    df_nodes = json.load(json_file)
+
+
+nodeIds = []
+destIds= []
+clusterlabels= []
+destclusterlabel = []
+cluster= []
+labalvlues = []
+i = 0
+
+
+for row in df_nodes:
+
+    for j in range(len(row['TransactionFrom'])):
+        print("   Input Ids:  ", row['TransactionFrom'][j])
+    nodeIds.append(row['TransactionFrom'])
+    print("This is nodes:  ", nodeIds)
+
+
+for row in df_nodes:
+   destIds.append(row['TransactionTo'])
+
+
+for row in range(len(nodeIds)):
+    print(nodeIds[row])
+
+print("Finish InputIDs")
+for row in range(len(nodeIds)):
+
+    clusterlabels.append(row)
+    i += 1
+print(i)
+
+"""" classifying Inputs"""
+"""" Labaling inputs"""
+for row in range(len(nodeIds)):
+
+    for rown in range(len(nodeIds[row])):
+
+        for row1 in range(len(nodeIds)):
+            for rown1 in range(len(nodeIds[row1])):
+               if(nodeIds[row][rown]==nodeIds[row1][rown1]):
+                  # print("row:  ",row,"row1:  ",row1)
+                  if(row < row1):
+                      for row2 in clusterlabels:
+                          if( clusterlabels[row1]== clusterlabels[row2]):
+                              clusterlabels[row2]=clusterlabels[row]
+                      clusterlabels[row1] = clusterlabels[row]
+
+                  else:
+                      for row2 in clusterlabels:
+                          if (clusterlabels[row] == clusterlabels[row2]):
+                              clusterlabels[row2] = clusterlabels[row1]
+                      clusterlabels[row] = clusterlabels[row1]
+
+
+print(clusterlabels)
+print("cluster labels:", len(clusterlabels))
+print("NodeIDs:   ", len(nodeIds))
+
+
+
+"""" Calculating the number of clusters"""
+clusternum = 1
+labalvlues.append(clusterlabels[0])
+for row in range(len(clusterlabels)):
+    flag = True
+    for row1 in range(len(labalvlues)):
+      if(clusterlabels[row]== labalvlues[row1]):
+         flag = False
+
+    if (flag):
+        clusternum = + 1
+        labalvlues.append(clusterlabels[row])
+
+print("label values (source Ids in the network):  ", labalvlues, " and the number of clusters is: ", len(labalvlues))
+
+
+
+
+"""" clustering Ids according to their labels"""
+
+for row in range(len(labalvlues)):
+    cluster.append([])
+    for row3 in range(len(nodeIds)):
+        if (labalvlues[row] == clusterlabels[row3]):
+            cluster[row].extend(nodeIds[row3])
+print("clusters:   ", cluster)
+
+
+
+""" Removing duplicating items in cluster"""
+
+flag = True
+while(flag):
+  for row in range(len(cluster)):
+
+    flag= False
+    for row1 in range(len(cluster[row])):
+        flag= False
+        for row2 in range (len(cluster[row])):
+             if(row1 != row2):
+                    if(cluster[row][row1] == cluster[row][row2]):
+                       del cluster[row][row2]
+                       flag=True
+                       break
+        if(flag):
+            break
+    if(flag):
+        break
+
+print("cluster:", cluster)
+
+
+
+
+"""" Clustering  Destination Ids """
+for row in range(len(destIds)):
+    destclusterlabel.append([])
+    for row2 in range(len(destIds[row])):
+        flag = True
+        for rownum in range(len(labalvlues)):
+            for row1 in range(len(cluster[rownum])):
+
+               if(destIds[row][row2]== cluster[rownum][row1]):
+                    destclusterlabel[row].append(labalvlues[rownum])
+                    flag = False
+        if(flag):
+            destclusterlabel.append(destIds[row][row2])
+
+print("destination labels (destination Ids):  ", destclusterlabel)
--- a/data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py
+++ b/data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py
+import networkx as nx
+import matplotlib.pyplot as plt
+from collections import Counter
+import HyperGraph as hg
+import pandas as pd
+import json
+import warnings
+import csv
+import community
+import time
+import mplleaflet
+import values as values
+from matplotlib import colors
+
+def _color_network(G):
+    """Colors the network so that neighboring nodes all have distinct colors.
+
+       Returns a dict keyed by color to a set of nodes with that color.
+    """
+    coloring = dict()  # color => set(node)
+    colors = nx.coloring.greedy_color(G)
+    for node, color in colors.items():
+        if color in coloring:
+            coloring[color].add(node)
+        else:
+            coloring[color] = set([node])
+    return coloring
+
+
+def _labeling_complete(labeling, G):
+    """Determines whether or not LPA is done.
+
+       Label propagation is complete when all nodes have a label that is
+       in the set of highest frequency labels amongst its neighbors.
+
+       Nodes with no neighbors are considered complete.
+    """
+    return all(labeling[v] in _most_frequent_labels(v, labeling, G)
+               for v in G if len(G[v]) > 0)
+
+
+def _most_frequent_labels(node, labeling, G):
+    """Returns a set of all labels with maximum frequency in `labeling`.
+
+       Input `labeling` should be a dict keyed by node to labels.
+    """
+    if not G[node]:
+        # Nodes with no neighbors are themselves a community and are labeled
+        # accordingly, hence the immediate if statement.
+        return {labeling[node]}
+
+    # Compute the frequencies of all neighbours of node
+    freqs = Counter(labeling[q] for q in G[node])
+    max_freq = max(freqs.values())
+    return {label for label, freq in freqs.items() if freq == max_freq}
+
+
+def _update_label(node, labeling, G):
+    """Updates the label of a node using the Prec-Max tie breaking algorithm
+
+       The algorithm is explained in: 'Community Detection via Semi-Synchronous
+       Label Propagation Algorithms' Cordasco and Gargano, 2011
+    """
+    high_labels = _most_frequent_labels(node, labeling, G)
+    if len(high_labels) == 1:
+        labeling[node] = high_labels.pop()
+    elif len(high_labels) > 1:
+        # Prec-Max
+        if labeling[node] not in high_labels:
+
+         labeling[node] = max(high_labels)
+
+
+warnings.filterwarnings('ignore')
+
+
+
+
+#G = nx.DiGraph(directed=True)
+G = nx.MultiDiGraph(day="Stackoverflow")
+
+df_nodes = hg.clusterlabels
+destf_nodes = hg.destclusterlabel
+color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
+             7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
+             13: '#d6dcff', 14: '#d2f5f0'}
+i=0
+
+graphedge=[]
+weigth=[]
+sourcedestination = []
+source = []
+dest = []
+edge_width = []
+weight1 = []
+
+
+""""drawing edges in graph"""
+
+for drow in range(len(df_nodes)):
+    for row in range(len(destf_nodes[drow])):
+       G.add_edge(df_nodes[drow], destf_nodes[drow][row])
+
+for row in range(len(hg.labalvlues)):
+    for row1 in range(len(hg.labalvlues)):
+       weight1.append(G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
+       print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", weight1[row1])
+
+G.__setattr__('weight', weight1)
+
+   # print(float(row['Timestamp']))
+    #G.add_weighted_edges_from([(row['TransactionFrom'], row['TransactionTo'], i*j)])
+
+#print dict_pos
+
+"""label_propagation_communities(G) """
+
+
+coloring = _color_network(G)
+    # Create a unique label for each node in the graph
+labeling = {v: k for k, v in enumerate(G)}
+print("lable value: ", labeling.values())
+while not _labeling_complete(labeling, G):
+# Update the labels of every node with the same color.
+    print("lable value: ", labeling.values())
+    for color, nodes in coloring.items():
+      for n in nodes:
+        _update_label(n, labeling, G)
+    for label in set(labeling.values()):
+      print("lable value: ", labeling.values())
+
+
+
+""" findig nodes' adjecencies"""
+node_adjacencies = []
+node_text = []
+for node, adjacencies in enumerate(G.adjacency()):
+    node_adjacencies.append(len(adjacencies[1]))
+    node_text.append('# of connections: '+str(len(adjacencies[1])))
+
+G.color = node_adjacencies
+
+
+plt.figure(figsize=(25, 25))
+options = {
+    'with_labels': True,
+    'font_weight': 'regular',
+}
+
+#colors = [color_map[G.node[node][1]] for node in G]
+#sizes = [G.node[node]['Timestamp'] * 10 for node in G]
+
+
+d = nx.degree_centrality(G)
+d_list= list(d.values())
+print ("node centrality: ",d_list)
+print("node adjacencies: ", node_adjacencies)
+for row in range(len(weigth)):
+    edge_width.append([])
+    for drow in range(len(weigth[row])):
+        edge_width[row].append(weigth[row][drow])
+node_size =  [v * 80 for v in d.values()] #setting node size based on node centrality
+edge_width = [row * 0.5 for row in weight1]
+
+print("Nodes' Degree:  ", nx.degree(G))
+print("Nodes' Betweeness  ", nx.edge_betweenness_centrality(G))
+print("Nodes' Betweeness-centrality:  ", nx.betweenness_centrality(G))
+
+
+
+"""
+Using the spring layout : 
+- k controls the distance between the nodes and varies between 0 and 1
+- iterations is the number of times simulated annealing is run
+default k=0.1 and iterations=50
+"""
+
+labels2 = {}
+
+for idx, edge in enumerate(G.edges):
+    labels2[edge] = "s"
+
+pos_nodes=nx.spring_layout(G, k=0.25, iterations=50)
+ax = plt.gca()
+
+nx.draw(G, pos_nodes,node_color= node_adjacencies, node_size=node_size, width=2, arrowstyle='->',arrowsize=10, weight=weight1, edge_color='gray',**options)
+
+edge_labels = nx.get_edge_attributes(G, 'weight')
+
+
+pos_attrs = {}
+for node, coords in pos_nodes.items():
+    pos_attrs[node] = (coords[0], coords[1] + 0.02)
+nx.draw_networkx_edge_labels(G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
+nx.draw_networkx_labels(G, pos_attrs, labels=labeling,font_size=10, font_color='red')
+
+
+
+ax = plt.gca()
+ax.collections[0].set_edgecolor("#555555")
+plt.show()
+
+
+
+
--- a/data-hub/semantic-linking-microservice/app/initialdemo/mult_in_out.json
+++ b/data-hub/semantic-linking-microservice/app/initialdemo/mult_in_out.json
--- a/data-hub/semantic-linking-microservice/app/intelligence_zahra/Processor.py
+++ b/data-hub/semantic-linking-microservice/app/intelligence_zahra/Processor.py
-import logging
-LOGGER = logging.getLogger(__name__)
-
-class Processor:
-    def __init__(self):
-        pass
-    
-    def process(self, traces: list):
-        LOGGER.info(f"called processing with: {str(traces)}")
\ No newline at end of file