Semantic Linking: Created Classes for semantic linking logic

b365f613 · zahra · 2caa4a25 · b365f613 · b365f613 · b365f613
Commit b365f613 authored Sep 09, 2019 by zahra
3 changed files
--- a/data-hub/semantic-linking-microservice/app/Created_classes_for_semantic_linking_logic.patch
+++ b/data-hub/semantic-linking-microservice/app/Created_classes_for_semantic_linking_logic.patch
+Index: data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py	(date 1568037363000)
+++ data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py	(date 1568038969230)
+@@ -1,140 +1,137 @@
+-import networkx as nx
+-import matplotlib.pyplot as plt
+-import pandas as pd
+ import json
+-
+-
+-with open("mult_in_out.json", "r") as json_file:
+-    df_nodes = json.load(json_file)
+-
+-
+ nodeIds = []
+-destIds= []
+-clusterlabels= []
+destIds = []
+clusterlabels = []
+ destclusterlabel = []
+-cluster= []
+cluster = []
+ labalvlues = []
+-i = 0
+
+def classify():
+
+    with open("mult_in_out.json", "r") as json_file:
+        df_nodes = json.load(json_file)
+
+-for row in df_nodes:
+    for row in df_nodes:
+-    for j in range(len(row['TransactionFrom'])):
+-        print("   Input Ids:  ", row['TransactionFrom'][j])
+-    nodeIds.append(row['TransactionFrom'])
+-    print("This is nodes:  ", nodeIds)
+        for j in range(len(row['TransactionFrom'])):
+            print("   Input Ids:  ", row['TransactionFrom'][j])
+        nodeIds.append(row['TransactionFrom'])
+        print("This is nodes:  ", nodeIds)
+-for row in df_nodes:
+-   destIds.append(row['TransactionTo'])
+    for row in df_nodes:
+       destIds.append(row['TransactionTo'])
+-for row in range(len(nodeIds)):
+-    print(nodeIds[row])
+    for row in range(len(nodeIds)):
+        print(nodeIds[row])
+-print("Finish InputIDs")
+-for row in range(len(nodeIds)):
+    print("Finish InputIDs")
+    i = 0
+    for row in range(len(nodeIds)):
+-    clusterlabels.append(row)
+-    i += 1
+-print(i)
+        clusterlabels.append(row)
+        i += 1
+    print(i)
+-"""" classifying Inputs"""
+-"""" Labaling inputs"""
+-for row in range(len(nodeIds)):
+    """" classifying Inputs"""
+    """" Labaling inputs"""
+    for row in range(len(nodeIds)):
+-    for rown in range(len(nodeIds[row])):
+        for rown in range(len(nodeIds[row])):
+-        for row1 in range(len(nodeIds)):
+-            for rown1 in range(len(nodeIds[row1])):
+-               if(nodeIds[row][rown]==nodeIds[row1][rown1]):
+-                  # print("row:  ",row,"row1:  ",row1)
+-                  if(row < row1):
+-                      for row2 in clusterlabels:
+-                          if( clusterlabels[row1]== clusterlabels[row2]):
+-                              clusterlabels[row2]=clusterlabels[row]
+-                      clusterlabels[row1] = clusterlabels[row]
+            for row1 in range(len(nodeIds)):
+                for rown1 in range(len(nodeIds[row1])):
+                   if(nodeIds[row][rown]==nodeIds[row1][rown1]):
+                      # print("row:  ",row,"row1:  ",row1)
+                      if(row < row1):
+                          for row2 in clusterlabels:
+                              if( clusterlabels[row1]== clusterlabels[row2]):
+                                  clusterlabels[row2]=clusterlabels[row]
+                          clusterlabels[row1] = clusterlabels[row]
+-                  else:
+-                      for row2 in clusterlabels:
+-                          if (clusterlabels[row] == clusterlabels[row2]):
+-                              clusterlabels[row2] = clusterlabels[row1]
+-                      clusterlabels[row] = clusterlabels[row1]
+                      else:
+                          for row2 in clusterlabels:
+                              if (clusterlabels[row] == clusterlabels[row2]):
+                                  clusterlabels[row2] = clusterlabels[row1]
+                          clusterlabels[row] = clusterlabels[row1]
+-print(clusterlabels)
+-print("cluster labels:", len(clusterlabels))
+-print("NodeIDs:   ", len(nodeIds))
+    print(clusterlabels)
+    print("cluster labels:", len(clusterlabels))
+    print("NodeIDs:   ", len(nodeIds))
+-"""" Calculating the number of clusters"""
+-clusternum = 1
+-labalvlues.append(clusterlabels[0])
+-for row in range(len(clusterlabels)):
+-    flag = True
+-    for row1 in range(len(labalvlues)):
+-      if(clusterlabels[row]== labalvlues[row1]):
+-         flag = False
+    """" Calculating the number of clusters"""
+    clusternum = 1
+    labalvlues.append(clusterlabels[0])
+    for row in range(len(clusterlabels)):
+        flag = True
+        for row1 in range(len(labalvlues)):
+          if(clusterlabels[row]== labalvlues[row1]):
+             flag = False
+-    if (flag):
+-        clusternum = + 1
+-        labalvlues.append(clusterlabels[row])
+        if (flag):
+            clusternum = + 1
+            labalvlues.append(clusterlabels[row])
+-print("label values (source Ids in the network):  ", labalvlues, " and the number of clusters is: ", len(labalvlues))
+    print("label values (source Ids in the network):  ", labalvlues, " and the number of clusters is: ", len(labalvlues))
+-"""" clustering Ids according to their labels"""
+    """" clustering Ids according to their labels"""
+-for row in range(len(labalvlues)):
+-    cluster.append([])
+-    for row3 in range(len(nodeIds)):
+-        if (labalvlues[row] == clusterlabels[row3]):
+-            cluster[row].extend(nodeIds[row3])
+-print("clusters:   ", cluster)
+    for row in range(len(labalvlues)):
+        cluster.append([])
+        for row3 in range(len(nodeIds)):
+            if (labalvlues[row] == clusterlabels[row3]):
+                cluster[row].extend(nodeIds[row3])
+    print("clusters:   ", cluster)
+-""" Removing duplicating items in cluster"""
+    """ Removing duplicating items in cluster"""
+-flag = True
+-while(flag):
+-  for row in range(len(cluster)):
+    flag = True
+    while(flag):
+      for row in range(len(cluster)):
+-    flag= False
+-    for row1 in range(len(cluster[row])):
+-        flag= False
+-        for row2 in range (len(cluster[row])):
+-             if(row1 != row2):
+-                    if(cluster[row][row1] == cluster[row][row2]):
+-                       del cluster[row][row2]
+-                       flag=True
+-                       break
+-        if(flag):
+-            break
+-    if(flag):
+-        break
+        flag= False
+        for row1 in range(len(cluster[row])):
+            flag= False
+            for row2 in range (len(cluster[row])):
+                 if(row1 != row2):
+                        if(cluster[row][row1] == cluster[row][row2]):
+                           del cluster[row][row2]
+                           flag=True
+                           break
+            if(flag):
+                break
+        if(flag):
+            break
+-print("cluster:", cluster)
+    print("cluster:", cluster)
+-"""" Clustering  Destination Ids """
+-for row in range(len(destIds)):
+-    destclusterlabel.append([])
+-    for row2 in range(len(destIds[row])):
+-        flag = True
+-        for rownum in range(len(labalvlues)):
+-            for row1 in range(len(cluster[rownum])):
+    """" Clustering  Destination Ids """
+    for row in range(len(destIds)):
+        destclusterlabel.append([])
+        for row2 in range(len(destIds[row])):
+            flag = True
+            for rownum in range(len(labalvlues)):
+                for row1 in range(len(cluster[rownum])):
+-               if(destIds[row][row2]== cluster[rownum][row1]):
+-                    destclusterlabel[row].append(labalvlues[rownum])
+-                    flag = False
+-        if(flag):
+-            destclusterlabel.append(destIds[row][row2])
+                   if(destIds[row][row2]== cluster[rownum][row1]):
+                        destclusterlabel[row].append(labalvlues[rownum])
+                        flag = False
+            if(flag):
+                destclusterlabel.append(destIds[row][row2])
+-print("destination labels (destination Ids):  ", destclusterlabel)
+    print("destination labels (destination Ids):  ", destclusterlabel)
+Index: data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py	(date 1568037363000)
+++ data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py	(date 1568040344378)
+@@ -1,7 +1,7 @@
+ import networkx as nx
+ import matplotlib.pyplot as plt
+ from collections import Counter
+-import HyperGraph as hg
+import initialdemo.HyperGraph as hg
+ import pandas as pd
+ import json
+ import warnings
+@@ -12,194 +12,198 @@
+ import values as values
+ from matplotlib import colors
+-def _color_network(G):
+-    """Colors the network so that neighboring nodes all have distinct colors.
+class SemanticLinking:
+
+    def __init__(self):
+        hg.classify()
+
+    def _color_network(self, G):
+        """Colors the network so that neighboring nodes all have distinct colors.
+-       Returns a dict keyed by color to a set of nodes with that color.
+-    """
+-    coloring = dict()  # color => set(node)
+-    colors = nx.coloring.greedy_color(G)
+-    for node, color in colors.items():
+-        if color in coloring:
+-            coloring[color].add(node)
+-        else:
+-            coloring[color] = set([node])
+-    return coloring
+           Returns a dict keyed by color to a set of nodes with that color.
+        """
+        coloring = dict()  # color => set(node)
+        colors = nx.coloring.greedy_color(G)
+        for node, color in colors.items():
+            if color in coloring:
+                coloring[color].add(node)
+            else:
+                coloring[color] = set([node])
+        return coloring
+-def _labeling_complete(labeling, G):
+-    """Determines whether or not LPA is done.
+    def _labeling_complete(self, labeling, G):
+        """Determines whether or not LPA is done.
+-       Label propagation is complete when all nodes have a label that is
+-       in the set of highest frequency labels amongst its neighbors.
+           Label propagation is complete when all nodes have a label that is
+           in the set of highest frequency labels amongst its neighbors.
+-       Nodes with no neighbors are considered complete.
+-    """
+-    return all(labeling[v] in _most_frequent_labels(v, labeling, G)
+-               for v in G if len(G[v]) > 0)
+           Nodes with no neighbors are considered complete.
+        """
+        return all(labeling[v] in self._most_frequent_labels(v, labeling, G)
+                   for v in G if len(G[v]) > 0)
+-def _most_frequent_labels(node, labeling, G):
+-    """Returns a set of all labels with maximum frequency in `labeling`.
+    def _most_frequent_labels(self, node, labeling, G):
+        """Returns a set of all labels with maximum frequency in `labeling`.
+-       Input `labeling` should be a dict keyed by node to labels.
+-    """
+-    if not G[node]:
+-        # Nodes with no neighbors are themselves a community and are labeled
+-        # accordingly, hence the immediate if statement.
+-        return {labeling[node]}
+           Input `labeling` should be a dict keyed by node to labels.
+        """
+        if not G[node]:
+            # Nodes with no neighbors are themselves a community and are labeled
+            # accordingly, hence the immediate if statement.
+            return {labeling[node]}
+-    # Compute the frequencies of all neighbours of node
+-    freqs = Counter(labeling[q] for q in G[node])
+-    max_freq = max(freqs.values())
+-    return {label for label, freq in freqs.items() if freq == max_freq}
+        # Compute the frequencies of all neighbours of node
+        freqs = Counter(labeling[q] for q in G[node])
+        max_freq = max(freqs.values())
+        return {label for label, freq in freqs.items() if freq == max_freq}
+-def _update_label(node, labeling, G):
+-    """Updates the label of a node using the Prec-Max tie breaking algorithm
+    def _update_label(self, node, labeling, G):
+        """Updates the label of a node using the Prec-Max tie breaking algorithm
+-       The algorithm is explained in: 'Community Detection via Semi-Synchronous
+-       Label Propagation Algorithms' Cordasco and Gargano, 2011
+-    """
+-    high_labels = _most_frequent_labels(node, labeling, G)
+-    if len(high_labels) == 1:
+-        labeling[node] = high_labels.pop()
+-    elif len(high_labels) > 1:
+-        # Prec-Max
+-        if labeling[node] not in high_labels:
+           The algorithm is explained in: 'Community Detection via Semi-Synchronous
+           Label Propagation Algorithms' Cordasco and Gargano, 2011
+        """
+        high_labels = self._most_frequent_labels(node, labeling, G)
+        if len(high_labels) == 1:
+            labeling[node] = high_labels.pop()
+        elif len(high_labels) > 1:
+            # Prec-Max
+            if labeling[node] not in high_labels:
+-         labeling[node] = max(high_labels)
+             labeling[node] = max(high_labels)
+-warnings.filterwarnings('ignore')
+    warnings.filterwarnings('ignore')
+-#G = nx.DiGraph(directed=True)
+-G = nx.MultiDiGraph(day="Stackoverflow")
+    #G = nx.DiGraph(directed=True)
+    G = nx.MultiDiGraph(day="Stackoverflow")
+-df_nodes = hg.clusterlabels
+-destf_nodes = hg.destclusterlabel
+-color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
+-             7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
+-             13: '#d6dcff', 14: '#d2f5f0'}
+-i=0
+    df_nodes = hg.clusterlabels
+    destf_nodes = hg.destclusterlabel
+    color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
+                 7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
+                 13: '#d6dcff', 14: '#d2f5f0'}
+    i=0
+-graphedge=[]
+-weigth=[]
+-sourcedestination = []
+-source = []
+-dest = []
+-edge_width = []
+-weight1 = []
+    graphedge = []
+    weigth = []
+    sourcedestination = []
+    source = []
+    dest = []
+    edge_width = []
+    weight1 = []
+    node_adjacencies = []
+-""""drawing edges in graph"""
+    def drawedges(self):
+-for drow in range(len(df_nodes)):
+-    for row in range(len(destf_nodes[drow])):
+-       G.add_edge(df_nodes[drow], destf_nodes[drow][row])
+        """drawing edges in graph"""
+
+        for drow in range(len(self.df_nodes)):
+            for row in range(len(self.destf_nodes[drow])):
+               self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row])
+-for row in range(len(hg.labalvlues)):
+-    for row1 in range(len(hg.labalvlues)):
+-       weight1.append(G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
+-       print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", weight1[row1])
+-
+-G.__setattr__('weight', weight1)
+        for row in range(len(hg.labalvlues)):
+            for row1 in range(len(hg.labalvlues)):
+               self.weight1.append(self.G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
+               print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", self.weight1[row1])
+-   # print(float(row['Timestamp']))
+-    #G.add_weighted_edges_from([(row['TransactionFrom'], row['TransactionTo'], i*j)])
+        self.G.__setattr__('weight', self.weight1)
+-#print dict_pos
+-
+-"""label_propagation_communities(G) """
+    def labeling(self):
+        """label_propagation_communities(G) """
+-coloring = _color_network(G)
+-    # Create a unique label for each node in the graph
+-labeling = {v: k for k, v in enumerate(G)}
+-print("lable value: ", labeling.values())
+-while not _labeling_complete(labeling, G):
+-# Update the labels of every node with the same color.
+-    print("lable value: ", labeling.values())
+-    for color, nodes in coloring.items():
+-      for n in nodes:
+-        _update_label(n, labeling, G)
+-    for label in set(labeling.values()):
+-      print("lable value: ", labeling.values())
+        coloring = self._color_network(self.G)
+            # Create a unique label for each node in the graph
+        labeling = {v: k for k, v in enumerate(self.G)}
+        print("lable value: ", labeling.values())
+        while not self._labeling_complete(labeling, self.G):
+        # Update the labels of every node with the same color.
+            print("lable value: ", labeling.values())
+            for color, nodes in coloring.items():
+              for n in nodes:
+                self._update_label(n, labeling, self.G)
+            for label in set(labeling.values()):
+              print("lable value: ", labeling.values())
+-
+-""" findig nodes' adjecencies"""
+-node_adjacencies = []
+-node_text = []
+-for node, adjacencies in enumerate(G.adjacency()):
+-    node_adjacencies.append(len(adjacencies[1]))
+-    node_text.append('# of connections: '+str(len(adjacencies[1])))
+    def findigneighbors(self):
+        """ findig nodes' adjecencies"""
+        node_text = []
+        for node, adjacencies in enumerate(self.G.adjacency()):
+            self.node_adjacencies.append(len(adjacencies[1]))
+            node_text.append('# of connections: '+str(len(adjacencies[1])))
+-G.color = node_adjacencies
+        self.G.color = self.node_adjacencies
+-
+-plt.figure(figsize=(25, 25))
+-options = {
+-    'with_labels': True,
+-    'font_weight': 'regular',
+-}
+    def result(self):
+        plt.figure(figsize=(25, 25))
+        options = {
+            'with_labels': True,
+            'font_weight': 'regular',
+        }
+-#colors = [color_map[G.node[node][1]] for node in G]
+-#sizes = [G.node[node]['Timestamp'] * 10 for node in G]
+        # colors = [color_map[G.node[node][1]] for node in G]
+        # sizes = [G.node[node]['Timestamp'] * 10 for node in G]
+-
+-d = nx.degree_centrality(G)
+-d_list= list(d.values())
+-print ("node centrality: ",d_list)
+-print("node adjacencies: ", node_adjacencies)
+-for row in range(len(weigth)):
+-    edge_width.append([])
+-    for drow in range(len(weigth[row])):
+-        edge_width[row].append(weigth[row][drow])
+-node_size =  [v * 80 for v in d.values()] #setting node size based on node centrality
+-edge_width = [row * 0.5 for row in weight1]
+-
+-print("Nodes' Degree:  ", nx.degree(G))
+-print("Nodes' Betweeness  ", nx.edge_betweenness_centrality(G))
+-print("Nodes' Betweeness-centrality:  ", nx.betweenness_centrality(G))
+        d = nx.degree_centrality(self.G)
+        d_list = list(d.values())
+        print("node centrality: ", d_list)
+        print("node adjacencies: ", self.node_adjacencies)
+        for row in range(len(self.weigth)):
+            self.edge_width.append([])
+            for drow in range(len(self.weigth[row])):
+                self.edge_width[row].append(self.weigth[row][drow])
+        node_size = [v * 80 for v in d.values()]  # setting node size based on node centrality
+        edge_width = [row * 0.5 for row in self.weight1]
+-
+        print("Nodes' Degree:  ", nx.degree(self.G))
+        print("Nodes' Betweeness  ", nx.edge_betweenness_centrality(self.G))
+        print("Nodes' Betweeness-centrality:  ", nx.betweenness_centrality(self.G))
+-"""
+-Using the spring layout : 
+-- k controls the distance between the nodes and varies between 0 and 1
+-- iterations is the number of times simulated annealing is run
+-default k=0.1 and iterations=50
+-"""
+        """
+        Using the spring layout : 
+        - k controls the distance between the nodes and varies between 0 and 1
+        - iterations is the number of times simulated annealing is run
+        default k=0.1 and iterations=50
+        """
+-labels2 = {}
+        labels2 = {}
+-for idx, edge in enumerate(G.edges):
+-    labels2[edge] = "s"
+        for idx, edge in enumerate(self.G.edges):
+            labels2[edge] = "s"
+-pos_nodes=nx.spring_layout(G, k=0.25, iterations=50)
+-ax = plt.gca()
+        pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50)
+-nx.draw(G, pos_nodes,node_color= node_adjacencies, node_size=node_size, width=2, arrowstyle='->',arrowsize=10, weight=weight1, edge_color='gray',**options)
+-edge_labels = nx.get_edge_attributes(G, 'weight')
+        nx.draw(self.G, pos_nodes, node_color=self.node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
+                arrowsize=10, weight=self.weight1, edge_color='gray', **options)
+        edge_labels = nx.get_edge_attributes(self.G, 'weight')
+-pos_attrs = {}
+-for node, coords in pos_nodes.items():
+-    pos_attrs[node] = (coords[0], coords[1] + 0.02)
+-nx.draw_networkx_edge_labels(G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
+-nx.draw_networkx_labels(G, pos_attrs, labels=labeling,font_size=10, font_color='red')
+-
+-
+        pos_attrs = {}
+        for node, coords in pos_nodes.items():
+            pos_attrs[node] = (coords[0], coords[1] + 0.02)
+        nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
+        nx.draw_networkx_labels(self.G, pos_attrs, labels=self.labeling, font_size=10, font_color='red')
+-ax = plt.gca()
+-ax.collections[0].set_edgecolor("#555555")
+-plt.show()
+        ax = plt.gca()
+        ax.collections[0].set_edgecolor("#555555")
+        plt.show()
+    def main(self):
+        self.drawedges()
+        self.labeling()
+        self.findigneighbors()
+        self.result()
+-
+linking = SemanticLinking()
+linking.main()
+\ No newline at end of file
--- a/data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py
+++ b/data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py
-import networkx as nx
-import matplotlib.pyplot as plt
-import pandas as pd
 import json
-with open("mult_in_out.json", "r") as json_file:
-    df_nodes = json.load(json_file)
 nodeIds = []
-destIds= []
+destIds = []
-clusterlabels= []
+clusterlabels = []
 destclusterlabel = []
-cluster= []
+cluster = []
 labalvlues = []
-i = 0
+def classify():
-for row in df_nodes:
+    with open("mult_in_out.json", "r") as json_file:
+        df_nodes = json.load(json_file)
-    for j in range(len(row['TransactionFrom'])):
-        print("   Input Ids:  ", row['TransactionFrom'][j])
-    nodeIds.append(row['TransactionFrom'])
-    print("This is nodes:  ", nodeIds)
-for row in df_nodes:
+    for row in df_nodes:
-   destIds.append(row['TransactionTo'])
+        for j in range(len(row['TransactionFrom'])):
+            print("   Input Ids:  ", row['TransactionFrom'][j])
+        nodeIds.append(row['TransactionFrom'])
+        print("This is nodes:  ", nodeIds)
-for row in range(len(nodeIds)):
-    print(nodeIds[row])
-print("Finish InputIDs")
+    for row in df_nodes:
-for row in range(len(nodeIds)):
+       destIds.append(row['TransactionTo'])
-    clusterlabels.append(row)
-    i += 1
-print(i)
-"""" classifying Inputs"""
+    for row in range(len(nodeIds)):
-"""" Labaling inputs"""
+        print(nodeIds[row])
-for row in range(len(nodeIds)):
-    for rown in range(len(nodeIds[row])):
+    print("Finish InputIDs")
+    i = 0
+    for row in range(len(nodeIds)):
-        for row1 in range(len(nodeIds)):
+        clusterlabels.append(row)
-            for rown1 in range(len(nodeIds[row1])):
+        i += 1
-               if(nodeIds[row][rown]==nodeIds[row1][rown1]):
+    print(i)
-                  # print("row:  ",row,"row1:  ",row1)
-                  if(row < row1):
-                      for row2 in clusterlabels:
-                          if( clusterlabels[row1]== clusterlabels[row2]):
-                              clusterlabels[row2]=clusterlabels[row]
-                      clusterlabels[row1] = clusterlabels[row]
-                  else:
+    """" classifying Inputs"""
-                      for row2 in clusterlabels:
+    """" Labaling inputs"""
-                          if (clusterlabels[row] == clusterlabels[row2]):
+    for row in range(len(nodeIds)):
-                              clusterlabels[row2] = clusterlabels[row1]
-                      clusterlabels[row] = clusterlabels[row1]
+        for rown in range(len(nodeIds[row])):
-print(clusterlabels)
+            for row1 in range(len(nodeIds)):
-print("cluster labels:", len(clusterlabels))
+                for rown1 in range(len(nodeIds[row1])):
-print("NodeIDs:   ", len(nodeIds))
+                   if(nodeIds[row][rown]==nodeIds[row1][rown1]):
+                      # print("row:  ",row,"row1:  ",row1)
+                      if(row < row1):
+                          for row2 in clusterlabels:
+                              if( clusterlabels[row1]== clusterlabels[row2]):
+                                  clusterlabels[row2]=clusterlabels[row]
+                          clusterlabels[row1] = clusterlabels[row]
+                      else:
+                          for row2 in clusterlabels:
+                              if (clusterlabels[row] == clusterlabels[row2]):
+                                  clusterlabels[row2] = clusterlabels[row1]
+                          clusterlabels[row] = clusterlabels[row1]
-"""" Calculating the number of clusters"""
+    print(clusterlabels)
-clusternum = 1
+    print("cluster labels:", len(clusterlabels))
-labalvlues.append(clusterlabels[0])
+    print("NodeIDs:   ", len(nodeIds))
-for row in range(len(clusterlabels)):
-    flag = True
-    for row1 in range(len(labalvlues)):
-      if(clusterlabels[row]== labalvlues[row1]):
-         flag = False
-    if (flag):
-        clusternum = + 1
-        labalvlues.append(clusterlabels[row])
-print("label values (source Ids in the network):  ", labalvlues, " and the number of clusters is: ", len(labalvlues))
+    """" Calculating the number of clusters"""
+    clusternum = 1
+    labalvlues.append(clusterlabels[0])
+    for row in range(len(clusterlabels)):
+        flag = True
+        for row1 in range(len(labalvlues)):
+          if(clusterlabels[row]== labalvlues[row1]):
+             flag = False
+        if (flag):
+            clusternum = + 1
+            labalvlues.append(clusterlabels[row])
+    print("label values (source Ids in the network):  ", labalvlues, " and the number of clusters is: ", len(labalvlues))
-"""" clustering Ids according to their labels"""
+    """" clustering Ids according to their labels"""
-for row in range(len(labalvlues)):
+    for row in range(len(labalvlues)):
-    cluster.append([])
+        cluster.append([])
-    for row3 in range(len(nodeIds)):
+        for row3 in range(len(nodeIds)):
-        if (labalvlues[row] == clusterlabels[row3]):
+            if (labalvlues[row] == clusterlabels[row3]):
-            cluster[row].extend(nodeIds[row3])
+                cluster[row].extend(nodeIds[row3])
-print("clusters:   ", cluster)
+    print("clusters:   ", cluster)
-""" Removing duplicating items in cluster"""
+    """ Removing duplicating items in cluster"""
-flag = True
+    flag = True
-while(flag):
+    while(flag):
-  for row in range(len(cluster)):
+      for row in range(len(cluster)):
-    flag= False
-    for row1 in range(len(cluster[row])):
        flag= False
-        for row2 in range (len(cluster[row])):
+        for row1 in range(len(cluster[row])):
-             if(row1 != row2):
+            flag= False
-                    if(cluster[row][row1] == cluster[row][row2]):
+            for row2 in range (len(cluster[row])):
-                       del cluster[row][row2]
+                 if(row1 != row2):
-                       flag=True
+                        if(cluster[row][row1] == cluster[row][row2]):
-                       break
+                           del cluster[row][row2]
+                           flag=True
+                           break
+            if(flag):
+                break
        if(flag):
            break
-    if(flag):
-        break
-print("cluster:", cluster)
+    print("cluster:", cluster)
-"""" Clustering  Destination Ids """
+    """" Clustering  Destination Ids """
-for row in range(len(destIds)):
+    for row in range(len(destIds)):
-    destclusterlabel.append([])
+        destclusterlabel.append([])
-    for row2 in range(len(destIds[row])):
+        for row2 in range(len(destIds[row])):
-        flag = True
+            flag = True
-        for rownum in range(len(labalvlues)):
+            for rownum in range(len(labalvlues)):
-            for row1 in range(len(cluster[rownum])):
+                for row1 in range(len(cluster[rownum])):
-               if(destIds[row][row2]== cluster[rownum][row1]):
+                   if(destIds[row][row2]== cluster[rownum][row1]):
-                    destclusterlabel[row].append(labalvlues[rownum])
+                        destclusterlabel[row].append(labalvlues[rownum])
-                    flag = False
+                        flag = False
-        if(flag):
+            if(flag):
-            destclusterlabel.append(destIds[row][row2])
+                destclusterlabel.append(destIds[row][row2])
-print("destination labels (destination Ids):  ", destclusterlabel)
+    print("destination labels (destination Ids):  ", destclusterlabel)
--- a/data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py
+++ b/data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py
 import networkx as nx
 import matplotlib.pyplot as plt
 from collections import Counter
-import HyperGraph as hg
+import initialdemo.HyperGraph as hg
 import pandas as pd
 import json
 import warnings
@@ -12,194 +12,198 @@ import mplleaflet
 import values as values
 from matplotlib import colors
-def _color_network(G):
+class SemanticLinking:
-    """Colors the network so that neighboring nodes all have distinct colors.
-       Returns a dict keyed by color to a set of nodes with that color.
+    def __init__(self):
-    """
+        hg.classify()
-    coloring = dict()  # color => set(node)
-    colors = nx.coloring.greedy_color(G)
-    for node, color in colors.items():
-        if color in coloring:
-            coloring[color].add(node)
-        else:
-            coloring[color] = set([node])
-    return coloring
+    def _color_network(self, G):
+        """Colors the network so that neighboring nodes all have distinct colors.
-def _labeling_complete(labeling, G):
+           Returns a dict keyed by color to a set of nodes with that color.
-    """Determines whether or not LPA is done.
+        """
+        coloring = dict()  # color => set(node)
+        colors = nx.coloring.greedy_color(G)
+        for node, color in colors.items():
+            if color in coloring:
+                coloring[color].add(node)
+            else:
+                coloring[color] = set([node])
+        return coloring
-       Label propagation is complete when all nodes have a label that is
-       in the set of highest frequency labels amongst its neighbors.
-       Nodes with no neighbors are considered complete.
+    def _labeling_complete(self, labeling, G):
-    """
+        """Determines whether or not LPA is done.
-    return all(labeling[v] in _most_frequent_labels(v, labeling, G)
-               for v in G if len(G[v]) > 0)
+           Label propagation is complete when all nodes have a label that is
+           in the set of highest frequency labels amongst its neighbors.
-def _most_frequent_labels(node, labeling, G):
+           Nodes with no neighbors are considered complete.
-    """Returns a set of all labels with maximum frequency in `labeling`.
+        """
+        return all(labeling[v] in self._most_frequent_labels(v, labeling, G)
+                   for v in G if len(G[v]) > 0)
-       Input `labeling` should be a dict keyed by node to labels.
-    """
-    if not G[node]:
-        # Nodes with no neighbors are themselves a community and are labeled
-        # accordingly, hence the immediate if statement.
-        return {labeling[node]}
-    # Compute the frequencies of all neighbours of node
+    def _most_frequent_labels(self, node, labeling, G):
-    freqs = Counter(labeling[q] for q in G[node])
+        """Returns a set of all labels with maximum frequency in `labeling`.
-    max_freq = max(freqs.values())
-    return {label for label, freq in freqs.items() if freq == max_freq}
+           Input `labeling` should be a dict keyed by node to labels.
+        """
+        if not G[node]:
+            # Nodes with no neighbors are themselves a community and are labeled
+            # accordingly, hence the immediate if statement.
+            return {labeling[node]}
-def _update_label(node, labeling, G):
+        # Compute the frequencies of all neighbours of node
-    """Updates the label of a node using the Prec-Max tie breaking algorithm
+        freqs = Counter(labeling[q] for q in G[node])
+        max_freq = max(freqs.values())
+        return {label for label, freq in freqs.items() if freq == max_freq}
-       The algorithm is explained in: 'Community Detection via Semi-Synchronous
-       Label Propagation Algorithms' Cordasco and Gargano, 2011
-    """
-    high_labels = _most_frequent_labels(node, labeling, G)
-    if len(high_labels) == 1:
-        labeling[node] = high_labels.pop()
-    elif len(high_labels) > 1:
-        # Prec-Max
-        if labeling[node] not in high_labels:
-         labeling[node] = max(high_labels)
+    def _update_label(self, node, labeling, G):
+        """Updates the label of a node using the Prec-Max tie breaking algorithm
+           The algorithm is explained in: 'Community Detection via Semi-Synchronous
+           Label Propagation Algorithms' Cordasco and Gargano, 2011
+        """
+        high_labels = self._most_frequent_labels(node, labeling, G)
+        if len(high_labels) == 1:
+            labeling[node] = high_labels.pop()
+        elif len(high_labels) > 1:
+            # Prec-Max
+            if labeling[node] not in high_labels:
-warnings.filterwarnings('ignore')
+             labeling[node] = max(high_labels)
+    warnings.filterwarnings('ignore')
-#G = nx.DiGraph(directed=True)
-G = nx.MultiDiGraph(day="Stackoverflow")
-df_nodes = hg.clusterlabels
-destf_nodes = hg.destclusterlabel
-color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
-             7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
-             13: '#d6dcff', 14: '#d2f5f0'}
-i=0
-graphedge=[]
+    #G = nx.DiGraph(directed=True)
-weigth=[]
+    G = nx.MultiDiGraph(day="Stackoverflow")
-sourcedestination = []
-source = []
-dest = []
-edge_width = []
-weight1 = []
+    df_nodes = hg.clusterlabels
+    destf_nodes = hg.destclusterlabel
+    color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
+                 7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
+                 13: '#d6dcff', 14: '#d2f5f0'}
+    i=0
-""""drawing edges in graph"""
+    graphedge = []
+    weigth = []
+    sourcedestination = []
+    source = []
+    dest = []
+    edge_width = []
+    weight1 = []
-for drow in range(len(df_nodes)):
+    node_adjacencies = []
-    for row in range(len(destf_nodes[drow])):
-       G.add_edge(df_nodes[drow], destf_nodes[drow][row])
-for row in range(len(hg.labalvlues)):
+    def drawedges(self):
-    for row1 in range(len(hg.labalvlues)):
-       weight1.append(G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
-       print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", weight1[row1])
-G.__setattr__('weight', weight1)
+        """drawing edges in graph"""
-   # print(float(row['Timestamp']))
+        for drow in range(len(self.df_nodes)):
-    #G.add_weighted_edges_from([(row['TransactionFrom'], row['TransactionTo'], i*j)])
+            for row in range(len(self.destf_nodes[drow])):
+               self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row])
-#print dict_pos
+        for row in range(len(hg.labalvlues)):
+            for row1 in range(len(hg.labalvlues)):
+               self.weight1.append(self.G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
+               print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", self.weight1[row1])
-"""label_propagation_communities(G) """
+        self.G.__setattr__('weight', self.weight1)
+    def labeling(self):
+        """label_propagation_communities(G) """
-coloring = _color_network(G)
-    # Create a unique label for each node in the graph
-labeling = {v: k for k, v in enumerate(G)}
-print("lable value: ", labeling.values())
-while not _labeling_complete(labeling, G):
-# Update the labels of every node with the same color.
-    print("lable value: ", labeling.values())
-    for color, nodes in coloring.items():
-      for n in nodes:
-        _update_label(n, labeling, G)
-    for label in set(labeling.values()):
-      print("lable value: ", labeling.values())
+        coloring = self._color_network(self.G)
+            # Create a unique label for each node in the graph
+        labeling = {v: k for k, v in enumerate(self.G)}
+        print("lable value: ", labeling.values())
+        while not self._labeling_complete(labeling, self.G):
+        # Update the labels of every node with the same color.
+            print("lable value: ", labeling.values())
+            for color, nodes in coloring.items():
+              for n in nodes:
+                self._update_label(n, labeling, self.G)
+            for label in set(labeling.values()):
+              print("lable value: ", labeling.values())
-""" findig nodes' adjecencies"""
+    def findigneighbors(self):
-node_adjacencies = []
+        """ findig nodes' adjecencies"""
-node_text = []
+        node_text = []
-for node, adjacencies in enumerate(G.adjacency()):
+        for node, adjacencies in enumerate(self.G.adjacency()):
-    node_adjacencies.append(len(adjacencies[1]))
+            self.node_adjacencies.append(len(adjacencies[1]))
-    node_text.append('# of connections: '+str(len(adjacencies[1])))
+            node_text.append('# of connections: '+str(len(adjacencies[1])))
-G.color = node_adjacencies
+        self.G.color = self.node_adjacencies
+    def result(self):
+        plt.figure(figsize=(25, 25))
+        options = {
+            'with_labels': True,
+            'font_weight': 'regular',
+        }
-plt.figure(figsize=(25, 25))
+        # colors = [color_map[G.node[node][1]] for node in G]
-options = {
+        # sizes = [G.node[node]['Timestamp'] * 10 for node in G]
-    'with_labels': True,
-    'font_weight': 'regular',
-}
-#colors = [color_map[G.node[node][1]] for node in G]
+        d = nx.degree_centrality(self.G)
-#sizes = [G.node[node]['Timestamp'] * 10 for node in G]
+        d_list = list(d.values())
+        print("node centrality: ", d_list)
+        print("node adjacencies: ", self.node_adjacencies)
+        for row in range(len(self.weigth)):
+            self.edge_width.append([])
+            for drow in range(len(self.weigth[row])):
+                self.edge_width[row].append(self.weigth[row][drow])
+        node_size = [v * 80 for v in d.values()]  # setting node size based on node centrality
+        edge_width = [row * 0.5 for row in self.weight1]
+        print("Nodes' Degree:  ", nx.degree(self.G))
+        print("Nodes' Betweeness  ", nx.edge_betweenness_centrality(self.G))
+        print("Nodes' Betweeness-centrality:  ", nx.betweenness_centrality(self.G))
-d = nx.degree_centrality(G)
+        """
-d_list= list(d.values())
+        Using the spring layout : 
-print ("node centrality: ",d_list)
+        - k controls the distance between the nodes and varies between 0 and 1
-print("node adjacencies: ", node_adjacencies)
+        - iterations is the number of times simulated annealing is run
-for row in range(len(weigth)):
+        default k=0.1 and iterations=50
-    edge_width.append([])
+        """
-    for drow in range(len(weigth[row])):
-        edge_width[row].append(weigth[row][drow])
-node_size =  [v * 80 for v in d.values()] #setting node size based on node centrality
-edge_width = [row * 0.5 for row in weight1]
-print("Nodes' Degree:  ", nx.degree(G))
+        labels2 = {}
-print("Nodes' Betweeness  ", nx.edge_betweenness_centrality(G))
-print("Nodes' Betweeness-centrality:  ", nx.betweenness_centrality(G))
+        for idx, edge in enumerate(self.G.edges):
+            labels2[edge] = "s"
+        pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50)
+        nx.draw(self.G, pos_nodes, node_color=self.node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
+                arrowsize=10, weight=self.weight1, edge_color='gray', **options)
+        edge_labels = nx.get_edge_attributes(self.G, 'weight')
+        pos_attrs = {}
+        for node, coords in pos_nodes.items():
+            pos_attrs[node] = (coords[0], coords[1] + 0.02)
+        nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
+        nx.draw_networkx_labels(self.G, pos_attrs, labels=self.labeling, font_size=10, font_color='red')
-"""
+        ax = plt.gca()
-Using the spring layout : 
+        ax.collections[0].set_edgecolor("#555555")
- k controls the distance between the nodes and varies between 0 and 1
+        plt.show()
- iterations is the number of times simulated annealing is run
-default k=0.1 and iterations=50
-"""
-labels2 = {}
-for idx, edge in enumerate(G.edges):
-    labels2[edge] = "s"
-pos_nodes=nx.spring_layout(G, k=0.25, iterations=50)
-ax = plt.gca()
-nx.draw(G, pos_nodes,node_color= node_adjacencies, node_size=node_size, width=2, arrowstyle='->',arrowsize=10, weight=weight1, edge_color='gray',**options)
-edge_labels = nx.get_edge_attributes(G, 'weight')
-pos_attrs = {}
-for node, coords in pos_nodes.items():
-    pos_attrs[node] = (coords[0], coords[1] + 0.02)
-nx.draw_networkx_edge_labels(G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
-nx.draw_networkx_labels(G, pos_attrs, labels=labeling,font_size=10, font_color='red')
-ax = plt.gca()
-ax.collections[0].set_edgecolor("#555555")
-plt.show()
+    def main(self):
+        self.drawedges()
+        self.labeling()
+        self.findigneighbors()
+        self.result()
+linking = SemanticLinking()
+linking.main()
\ No newline at end of file