Semantic Linking: Created Classes for semantic linking logic

b365f613 · zahra · 2caa4a25 · b365f613 · b365f613 · b365f613
Commit b365f613 authored Sep 09, 2019 by zahra
3 changed files
--- a/data-hub/semantic-linking-microservice/app/Created_classes_for_semantic_linking_logic.patch
+++ b/data-hub/semantic-linking-microservice/app/Created_classes_for_semantic_linking_logic.patch
+Index: data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py	(date 1568037363000)
+++ data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py	(date 1568038969230)
+@@ -1,140 +1,137 @@
+-import networkx as nx
+-import matplotlib.pyplot as plt
+-import pandas as pd
+ import json
+ 
+-
+-
+-with open("mult_in_out.json", "r") as json_file:
+-    df_nodes = json.load(json_file)
+-
+-
+ nodeIds = []
+-destIds= []
+-clusterlabels= []
+destIds = []
+clusterlabels = []
+ destclusterlabel = []
+-cluster= []
+cluster = []
+ labalvlues = []
+-i = 0
+
+def classify():
+
+    with open("mult_in_out.json", "r") as json_file:
+        df_nodes = json.load(json_file)
+
+ 
+ 
+-for row in df_nodes:
+    for row in df_nodes:
+ 
+-    for j in range(len(row['TransactionFrom'])):
+-        print("   Input Ids:  ", row['TransactionFrom'][j])
+-    nodeIds.append(row['TransactionFrom'])
+-    print("This is nodes:  ", nodeIds)
+        for j in range(len(row['TransactionFrom'])):
+            print("   Input Ids:  ", row['TransactionFrom'][j])
+        nodeIds.append(row['TransactionFrom'])
+        print("This is nodes:  ", nodeIds)
+ 
+ 
+-for row in df_nodes:
+-   destIds.append(row['TransactionTo'])
+    for row in df_nodes:
+       destIds.append(row['TransactionTo'])
+ 
+ 
+-for row in range(len(nodeIds)):
+-    print(nodeIds[row])
+    for row in range(len(nodeIds)):
+        print(nodeIds[row])
+ 
+-print("Finish InputIDs")
+-for row in range(len(nodeIds)):
+    print("Finish InputIDs")
+    i = 0
+    for row in range(len(nodeIds)):
+ 
+-    clusterlabels.append(row)
+-    i += 1
+-print(i)
+        clusterlabels.append(row)
+        i += 1
+    print(i)
+ 
+-"""" classifying Inputs"""
+-"""" Labaling inputs"""
+-for row in range(len(nodeIds)):
+    """" classifying Inputs"""
+    """" Labaling inputs"""
+    for row in range(len(nodeIds)):
+ 
+-    for rown in range(len(nodeIds[row])):
+        for rown in range(len(nodeIds[row])):
+ 
+-        for row1 in range(len(nodeIds)):
+-            for rown1 in range(len(nodeIds[row1])):
+-               if(nodeIds[row][rown]==nodeIds[row1][rown1]):
+-                  # print("row:  ",row,"row1:  ",row1)
+-                  if(row < row1):
+-                      for row2 in clusterlabels:
+-                          if( clusterlabels[row1]== clusterlabels[row2]):
+-                              clusterlabels[row2]=clusterlabels[row]
+-                      clusterlabels[row1] = clusterlabels[row]
+            for row1 in range(len(nodeIds)):
+                for rown1 in range(len(nodeIds[row1])):
+                   if(nodeIds[row][rown]==nodeIds[row1][rown1]):
+                      # print("row:  ",row,"row1:  ",row1)
+                      if(row < row1):
+                          for row2 in clusterlabels:
+                              if( clusterlabels[row1]== clusterlabels[row2]):
+                                  clusterlabels[row2]=clusterlabels[row]
+                          clusterlabels[row1] = clusterlabels[row]
+ 
+-                  else:
+-                      for row2 in clusterlabels:
+-                          if (clusterlabels[row] == clusterlabels[row2]):
+-                              clusterlabels[row2] = clusterlabels[row1]
+-                      clusterlabels[row] = clusterlabels[row1]
+                      else:
+                          for row2 in clusterlabels:
+                              if (clusterlabels[row] == clusterlabels[row2]):
+                                  clusterlabels[row2] = clusterlabels[row1]
+                          clusterlabels[row] = clusterlabels[row1]
+ 
+ 
+-print(clusterlabels)
+-print("cluster labels:", len(clusterlabels))
+-print("NodeIDs:   ", len(nodeIds))
+    print(clusterlabels)
+    print("cluster labels:", len(clusterlabels))
+    print("NodeIDs:   ", len(nodeIds))
+ 
+ 
+ 
+-"""" Calculating the number of clusters"""
+-clusternum = 1
+-labalvlues.append(clusterlabels[0])
+-for row in range(len(clusterlabels)):
+-    flag = True
+-    for row1 in range(len(labalvlues)):
+-      if(clusterlabels[row]== labalvlues[row1]):
+-         flag = False
+    """" Calculating the number of clusters"""
+    clusternum = 1
+    labalvlues.append(clusterlabels[0])
+    for row in range(len(clusterlabels)):
+        flag = True
+        for row1 in range(len(labalvlues)):
+          if(clusterlabels[row]== labalvlues[row1]):
+             flag = False
+ 
+-    if (flag):
+-        clusternum = + 1
+-        labalvlues.append(clusterlabels[row])
+        if (flag):
+            clusternum = + 1
+            labalvlues.append(clusterlabels[row])
+ 
+-print("label values (source Ids in the network):  ", labalvlues, " and the number of clusters is: ", len(labalvlues))
+    print("label values (source Ids in the network):  ", labalvlues, " and the number of clusters is: ", len(labalvlues))
+ 
+ 
+ 
+ 
+-"""" clustering Ids according to their labels"""
+    """" clustering Ids according to their labels"""
+ 
+-for row in range(len(labalvlues)):
+-    cluster.append([])
+-    for row3 in range(len(nodeIds)):
+-        if (labalvlues[row] == clusterlabels[row3]):
+-            cluster[row].extend(nodeIds[row3])
+-print("clusters:   ", cluster)
+    for row in range(len(labalvlues)):
+        cluster.append([])
+        for row3 in range(len(nodeIds)):
+            if (labalvlues[row] == clusterlabels[row3]):
+                cluster[row].extend(nodeIds[row3])
+    print("clusters:   ", cluster)
+ 
+ 
+ 
+-""" Removing duplicating items in cluster"""
+    """ Removing duplicating items in cluster"""
+ 
+-flag = True
+-while(flag):
+-  for row in range(len(cluster)):
+    flag = True
+    while(flag):
+      for row in range(len(cluster)):
+ 
+-    flag= False
+-    for row1 in range(len(cluster[row])):
+-        flag= False
+-        for row2 in range (len(cluster[row])):
+-             if(row1 != row2):
+-                    if(cluster[row][row1] == cluster[row][row2]):
+-                       del cluster[row][row2]
+-                       flag=True
+-                       break
+-        if(flag):
+-            break
+-    if(flag):
+-        break
+        flag= False
+        for row1 in range(len(cluster[row])):
+            flag= False
+            for row2 in range (len(cluster[row])):
+                 if(row1 != row2):
+                        if(cluster[row][row1] == cluster[row][row2]):
+                           del cluster[row][row2]
+                           flag=True
+                           break
+            if(flag):
+                break
+        if(flag):
+            break
+ 
+-print("cluster:", cluster)
+    print("cluster:", cluster)
+ 
+ 
+ 
+ 
+-"""" Clustering  Destination Ids """
+-for row in range(len(destIds)):
+-    destclusterlabel.append([])
+-    for row2 in range(len(destIds[row])):
+-        flag = True
+-        for rownum in range(len(labalvlues)):
+-            for row1 in range(len(cluster[rownum])):
+    """" Clustering  Destination Ids """
+    for row in range(len(destIds)):
+        destclusterlabel.append([])
+        for row2 in range(len(destIds[row])):
+            flag = True
+            for rownum in range(len(labalvlues)):
+                for row1 in range(len(cluster[rownum])):
+ 
+-               if(destIds[row][row2]== cluster[rownum][row1]):
+-                    destclusterlabel[row].append(labalvlues[rownum])
+-                    flag = False
+-        if(flag):
+-            destclusterlabel.append(destIds[row][row2])
+                   if(destIds[row][row2]== cluster[rownum][row1]):
+                        destclusterlabel[row].append(labalvlues[rownum])
+                        flag = False
+            if(flag):
+                destclusterlabel.append(destIds[row][row2])
+ 
+-print("destination labels (destination Ids):  ", destclusterlabel)
+    print("destination labels (destination Ids):  ", destclusterlabel)
+Index: data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py
+IDEA additional info:
+Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
+<+>UTF-8
+===================================================================
+--- data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py	(date 1568037363000)
+++ data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py	(date 1568040344378)
+@@ -1,7 +1,7 @@
+ import networkx as nx
+ import matplotlib.pyplot as plt
+ from collections import Counter
+-import HyperGraph as hg
+import initialdemo.HyperGraph as hg
+ import pandas as pd
+ import json
+ import warnings
+@@ -12,194 +12,198 @@
+ import values as values
+ from matplotlib import colors
+ 
+-def _color_network(G):
+-    """Colors the network so that neighboring nodes all have distinct colors.
+class SemanticLinking:
+
+    def __init__(self):
+        hg.classify()
+
+    def _color_network(self, G):
+        """Colors the network so that neighboring nodes all have distinct colors.
+ 
+-       Returns a dict keyed by color to a set of nodes with that color.
+-    """
+-    coloring = dict()  # color => set(node)
+-    colors = nx.coloring.greedy_color(G)
+-    for node, color in colors.items():
+-        if color in coloring:
+-            coloring[color].add(node)
+-        else:
+-            coloring[color] = set([node])
+-    return coloring
+           Returns a dict keyed by color to a set of nodes with that color.
+        """
+        coloring = dict()  # color => set(node)
+        colors = nx.coloring.greedy_color(G)
+        for node, color in colors.items():
+            if color in coloring:
+                coloring[color].add(node)
+            else:
+                coloring[color] = set([node])
+        return coloring
+ 
+ 
+-def _labeling_complete(labeling, G):
+-    """Determines whether or not LPA is done.
+    def _labeling_complete(self, labeling, G):
+        """Determines whether or not LPA is done.
+ 
+-       Label propagation is complete when all nodes have a label that is
+-       in the set of highest frequency labels amongst its neighbors.
+           Label propagation is complete when all nodes have a label that is
+           in the set of highest frequency labels amongst its neighbors.
+ 
+-       Nodes with no neighbors are considered complete.
+-    """
+-    return all(labeling[v] in _most_frequent_labels(v, labeling, G)
+-               for v in G if len(G[v]) > 0)
+           Nodes with no neighbors are considered complete.
+        """
+        return all(labeling[v] in self._most_frequent_labels(v, labeling, G)
+                   for v in G if len(G[v]) > 0)
+ 
+ 
+-def _most_frequent_labels(node, labeling, G):
+-    """Returns a set of all labels with maximum frequency in `labeling`.
+    def _most_frequent_labels(self, node, labeling, G):
+        """Returns a set of all labels with maximum frequency in `labeling`.
+ 
+-       Input `labeling` should be a dict keyed by node to labels.
+-    """
+-    if not G[node]:
+-        # Nodes with no neighbors are themselves a community and are labeled
+-        # accordingly, hence the immediate if statement.
+-        return {labeling[node]}
+           Input `labeling` should be a dict keyed by node to labels.
+        """
+        if not G[node]:
+            # Nodes with no neighbors are themselves a community and are labeled
+            # accordingly, hence the immediate if statement.
+            return {labeling[node]}
+ 
+-    # Compute the frequencies of all neighbours of node
+-    freqs = Counter(labeling[q] for q in G[node])
+-    max_freq = max(freqs.values())
+-    return {label for label, freq in freqs.items() if freq == max_freq}
+        # Compute the frequencies of all neighbours of node
+        freqs = Counter(labeling[q] for q in G[node])
+        max_freq = max(freqs.values())
+        return {label for label, freq in freqs.items() if freq == max_freq}
+ 
+ 
+-def _update_label(node, labeling, G):
+-    """Updates the label of a node using the Prec-Max tie breaking algorithm
+    def _update_label(self, node, labeling, G):
+        """Updates the label of a node using the Prec-Max tie breaking algorithm
+ 
+-       The algorithm is explained in: 'Community Detection via Semi-Synchronous
+-       Label Propagation Algorithms' Cordasco and Gargano, 2011
+-    """
+-    high_labels = _most_frequent_labels(node, labeling, G)
+-    if len(high_labels) == 1:
+-        labeling[node] = high_labels.pop()
+-    elif len(high_labels) > 1:
+-        # Prec-Max
+-        if labeling[node] not in high_labels:
+           The algorithm is explained in: 'Community Detection via Semi-Synchronous
+           Label Propagation Algorithms' Cordasco and Gargano, 2011
+        """
+        high_labels = self._most_frequent_labels(node, labeling, G)
+        if len(high_labels) == 1:
+            labeling[node] = high_labels.pop()
+        elif len(high_labels) > 1:
+            # Prec-Max
+            if labeling[node] not in high_labels:
+ 
+-         labeling[node] = max(high_labels)
+             labeling[node] = max(high_labels)
+ 
+ 
+-warnings.filterwarnings('ignore')
+    warnings.filterwarnings('ignore')
+ 
+ 
+ 
+ 
+-#G = nx.DiGraph(directed=True)
+-G = nx.MultiDiGraph(day="Stackoverflow")
+    #G = nx.DiGraph(directed=True)
+    G = nx.MultiDiGraph(day="Stackoverflow")
+ 
+-df_nodes = hg.clusterlabels
+-destf_nodes = hg.destclusterlabel
+-color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
+-             7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
+-             13: '#d6dcff', 14: '#d2f5f0'}
+-i=0
+    df_nodes = hg.clusterlabels
+    destf_nodes = hg.destclusterlabel
+    color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
+                 7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
+                 13: '#d6dcff', 14: '#d2f5f0'}
+    i=0
+ 
+-graphedge=[]
+-weigth=[]
+-sourcedestination = []
+-source = []
+-dest = []
+-edge_width = []
+-weight1 = []
+    graphedge = []
+    weigth = []
+    sourcedestination = []
+    source = []
+    dest = []
+    edge_width = []
+    weight1 = []
+ 
+    node_adjacencies = []
+ 
+-""""drawing edges in graph"""
+    def drawedges(self):
+ 
+-for drow in range(len(df_nodes)):
+-    for row in range(len(destf_nodes[drow])):
+-       G.add_edge(df_nodes[drow], destf_nodes[drow][row])
+        """drawing edges in graph"""
+
+        for drow in range(len(self.df_nodes)):
+            for row in range(len(self.destf_nodes[drow])):
+               self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row])
+ 
+-for row in range(len(hg.labalvlues)):
+-    for row1 in range(len(hg.labalvlues)):
+-       weight1.append(G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
+-       print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", weight1[row1])
+-
+-G.__setattr__('weight', weight1)
+        for row in range(len(hg.labalvlues)):
+            for row1 in range(len(hg.labalvlues)):
+               self.weight1.append(self.G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
+               print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", self.weight1[row1])
+ 
+-   # print(float(row['Timestamp']))
+-    #G.add_weighted_edges_from([(row['TransactionFrom'], row['TransactionTo'], i*j)])
+        self.G.__setattr__('weight', self.weight1)
+ 
+-#print dict_pos
+-
+-"""label_propagation_communities(G) """
+    def labeling(self):
+        """label_propagation_communities(G) """
+ 
+ 
+-coloring = _color_network(G)
+-    # Create a unique label for each node in the graph
+-labeling = {v: k for k, v in enumerate(G)}
+-print("lable value: ", labeling.values())
+-while not _labeling_complete(labeling, G):
+-# Update the labels of every node with the same color.
+-    print("lable value: ", labeling.values())
+-    for color, nodes in coloring.items():
+-      for n in nodes:
+-        _update_label(n, labeling, G)
+-    for label in set(labeling.values()):
+-      print("lable value: ", labeling.values())
+        coloring = self._color_network(self.G)
+            # Create a unique label for each node in the graph
+        labeling = {v: k for k, v in enumerate(self.G)}
+        print("lable value: ", labeling.values())
+        while not self._labeling_complete(labeling, self.G):
+        # Update the labels of every node with the same color.
+            print("lable value: ", labeling.values())
+            for color, nodes in coloring.items():
+              for n in nodes:
+                self._update_label(n, labeling, self.G)
+            for label in set(labeling.values()):
+              print("lable value: ", labeling.values())
+ 
+ 
+-
+-""" findig nodes' adjecencies"""
+-node_adjacencies = []
+-node_text = []
+-for node, adjacencies in enumerate(G.adjacency()):
+-    node_adjacencies.append(len(adjacencies[1]))
+-    node_text.append('# of connections: '+str(len(adjacencies[1])))
+    def findigneighbors(self):
+        """ findig nodes' adjecencies"""
+        node_text = []
+        for node, adjacencies in enumerate(self.G.adjacency()):
+            self.node_adjacencies.append(len(adjacencies[1]))
+            node_text.append('# of connections: '+str(len(adjacencies[1])))
+ 
+-G.color = node_adjacencies
+        self.G.color = self.node_adjacencies
+ 
+-
+-plt.figure(figsize=(25, 25))
+-options = {
+-    'with_labels': True,
+-    'font_weight': 'regular',
+-}
+    def result(self):
+        plt.figure(figsize=(25, 25))
+        options = {
+            'with_labels': True,
+            'font_weight': 'regular',
+        }
+ 
+-#colors = [color_map[G.node[node][1]] for node in G]
+-#sizes = [G.node[node]['Timestamp'] * 10 for node in G]
+        # colors = [color_map[G.node[node][1]] for node in G]
+        # sizes = [G.node[node]['Timestamp'] * 10 for node in G]
+ 
+-
+-d = nx.degree_centrality(G)
+-d_list= list(d.values())
+-print ("node centrality: ",d_list)
+-print("node adjacencies: ", node_adjacencies)
+-for row in range(len(weigth)):
+-    edge_width.append([])
+-    for drow in range(len(weigth[row])):
+-        edge_width[row].append(weigth[row][drow])
+-node_size =  [v * 80 for v in d.values()] #setting node size based on node centrality
+-edge_width = [row * 0.5 for row in weight1]
+-
+-print("Nodes' Degree:  ", nx.degree(G))
+-print("Nodes' Betweeness  ", nx.edge_betweenness_centrality(G))
+-print("Nodes' Betweeness-centrality:  ", nx.betweenness_centrality(G))
+        d = nx.degree_centrality(self.G)
+        d_list = list(d.values())
+        print("node centrality: ", d_list)
+        print("node adjacencies: ", self.node_adjacencies)
+        for row in range(len(self.weigth)):
+            self.edge_width.append([])
+            for drow in range(len(self.weigth[row])):
+                self.edge_width[row].append(self.weigth[row][drow])
+        node_size = [v * 80 for v in d.values()]  # setting node size based on node centrality
+        edge_width = [row * 0.5 for row in self.weight1]
+ 
+-
+        print("Nodes' Degree:  ", nx.degree(self.G))
+        print("Nodes' Betweeness  ", nx.edge_betweenness_centrality(self.G))
+        print("Nodes' Betweeness-centrality:  ", nx.betweenness_centrality(self.G))
+ 
+-"""
+-Using the spring layout : 
+-- k controls the distance between the nodes and varies between 0 and 1
+-- iterations is the number of times simulated annealing is run
+-default k=0.1 and iterations=50
+-"""
+        """
+        Using the spring layout : 
+        - k controls the distance between the nodes and varies between 0 and 1
+        - iterations is the number of times simulated annealing is run
+        default k=0.1 and iterations=50
+        """
+ 
+-labels2 = {}
+        labels2 = {}
+ 
+-for idx, edge in enumerate(G.edges):
+-    labels2[edge] = "s"
+        for idx, edge in enumerate(self.G.edges):
+            labels2[edge] = "s"
+ 
+-pos_nodes=nx.spring_layout(G, k=0.25, iterations=50)
+-ax = plt.gca()
+        pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50)
+ 
+-nx.draw(G, pos_nodes,node_color= node_adjacencies, node_size=node_size, width=2, arrowstyle='->',arrowsize=10, weight=weight1, edge_color='gray',**options)
+ 
+-edge_labels = nx.get_edge_attributes(G, 'weight')
+        nx.draw(self.G, pos_nodes, node_color=self.node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
+                arrowsize=10, weight=self.weight1, edge_color='gray', **options)
+ 
+        edge_labels = nx.get_edge_attributes(self.G, 'weight')
+ 
+-pos_attrs = {}
+-for node, coords in pos_nodes.items():
+-    pos_attrs[node] = (coords[0], coords[1] + 0.02)
+-nx.draw_networkx_edge_labels(G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
+-nx.draw_networkx_labels(G, pos_attrs, labels=labeling,font_size=10, font_color='red')
+-
+-
+        pos_attrs = {}
+        for node, coords in pos_nodes.items():
+            pos_attrs[node] = (coords[0], coords[1] + 0.02)
+        nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
+        nx.draw_networkx_labels(self.G, pos_attrs, labels=self.labeling, font_size=10, font_color='red')
+ 
+-ax = plt.gca()
+-ax.collections[0].set_edgecolor("#555555")
+-plt.show()
+        ax = plt.gca()
+        ax.collections[0].set_edgecolor("#555555")
+        plt.show()
+ 
+ 
+    def main(self):
+        self.drawedges()
+        self.labeling()
+        self.findigneighbors()
+        self.result()
+ 
+-
+linking = SemanticLinking()
+linking.main()
+\ No newline at end of file
--- a/data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py
+++ b/data-hub/semantic-linking-microservice/app/initialdemo/HyperGraph.py
-import networkx as nx
-import matplotlib.pyplot as plt
-import pandas as pd
 import json

+nodeIds = []
+destIds = []
+clusterlabels = []
+destclusterlabel = []
+cluster = []
+labalvlues = []

+def classify():

-with open("mult_in_out.json", "r") as json_file:
+    with open("mult_in_out.json", "r") as json_file:
        df_nodes = json.load(json_file)


-nodeIds = []
-destIds= []
-clusterlabels= []
-destclusterlabel = []
-cluster= []
-labalvlues = []
-i = 0
-

-for row in df_nodes:
+    for row in df_nodes:

        for j in range(len(row['TransactionFrom'])):
            print("   Input Ids:  ", row['TransactionFrom'][j])
@@ -26,23 +22,24 @@ for row in df_nodes:
        print("This is nodes:  ", nodeIds)


-for row in df_nodes:
+    for row in df_nodes:
       destIds.append(row['TransactionTo'])


-for row in range(len(nodeIds)):
+    for row in range(len(nodeIds)):
        print(nodeIds[row])

-print("Finish InputIDs")
-for row in range(len(nodeIds)):
+    print("Finish InputIDs")
+    i = 0
+    for row in range(len(nodeIds)):

        clusterlabels.append(row)
        i += 1
-print(i)
+    print(i)

-"""" classifying Inputs"""
-"""" Labaling inputs"""
-for row in range(len(nodeIds)):
+    """" classifying Inputs"""
+    """" Labaling inputs"""
+    for row in range(len(nodeIds)):

        for rown in range(len(nodeIds[row])):

@@ -63,16 +60,16 @@ for row in range(len(nodeIds)):
                          clusterlabels[row] = clusterlabels[row1]


-print(clusterlabels)
-print("cluster labels:", len(clusterlabels))
-print("NodeIDs:   ", len(nodeIds))
+    print(clusterlabels)
+    print("cluster labels:", len(clusterlabels))
+    print("NodeIDs:   ", len(nodeIds))



-"""" Calculating the number of clusters"""
-clusternum = 1
-labalvlues.append(clusterlabels[0])
-for row in range(len(clusterlabels)):
+    """" Calculating the number of clusters"""
+    clusternum = 1
+    labalvlues.append(clusterlabels[0])
+    for row in range(len(clusterlabels)):
        flag = True
        for row1 in range(len(labalvlues)):
          if(clusterlabels[row]== labalvlues[row1]):
@@ -82,26 +79,26 @@ for row in range(len(clusterlabels)):
            clusternum = + 1
            labalvlues.append(clusterlabels[row])

-print("label values (source Ids in the network):  ", labalvlues, " and the number of clusters is: ", len(labalvlues))
+    print("label values (source Ids in the network):  ", labalvlues, " and the number of clusters is: ", len(labalvlues))




-"""" clustering Ids according to their labels"""
+    """" clustering Ids according to their labels"""

-for row in range(len(labalvlues)):
+    for row in range(len(labalvlues)):
        cluster.append([])
        for row3 in range(len(nodeIds)):
            if (labalvlues[row] == clusterlabels[row3]):
                cluster[row].extend(nodeIds[row3])
-print("clusters:   ", cluster)
+    print("clusters:   ", cluster)



-""" Removing duplicating items in cluster"""
+    """ Removing duplicating items in cluster"""

-flag = True
-while(flag):
+    flag = True
+    while(flag):
      for row in range(len(cluster)):

        flag= False
@@ -118,13 +115,13 @@ while(flag):
        if(flag):
            break

-print("cluster:", cluster)
+    print("cluster:", cluster)




-"""" Clustering  Destination Ids """
-for row in range(len(destIds)):
+    """" Clustering  Destination Ids """
+    for row in range(len(destIds)):
        destclusterlabel.append([])
        for row2 in range(len(destIds[row])):
            flag = True
@@ -137,4 +134,4 @@ for row in range(len(destIds)):
            if(flag):
                destclusterlabel.append(destIds[row][row2])

-print("destination labels (destination Ids):  ", destclusterlabel)
+    print("destination labels (destination Ids):  ", destclusterlabel)
--- a/data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py
+++ b/data-hub/semantic-linking-microservice/app/initialdemo/SemanticLinking.py
 import networkx as nx
 import matplotlib.pyplot as plt
 from collections import Counter
-import HyperGraph as hg
+import initialdemo.HyperGraph as hg
 import pandas as pd
 import json
 import warnings
@@ -12,7 +12,12 @@ import mplleaflet
 import values as values
 from matplotlib import colors

-def _color_network(G):
+class SemanticLinking:
+
+    def __init__(self):
+        hg.classify()
+
+    def _color_network(self, G):
        """Colors the network so that neighboring nodes all have distinct colors.

           Returns a dict keyed by color to a set of nodes with that color.
@@ -27,7 +32,7 @@ def _color_network(G):
        return coloring


-def _labeling_complete(labeling, G):
+    def _labeling_complete(self, labeling, G):
        """Determines whether or not LPA is done.

           Label propagation is complete when all nodes have a label that is
@@ -35,11 +40,11 @@ def _labeling_complete(labeling, G):

           Nodes with no neighbors are considered complete.
        """
-    return all(labeling[v] in _most_frequent_labels(v, labeling, G)
+        return all(labeling[v] in self._most_frequent_labels(v, labeling, G)
                   for v in G if len(G[v]) > 0)


-def _most_frequent_labels(node, labeling, G):
+    def _most_frequent_labels(self, node, labeling, G):
        """Returns a set of all labels with maximum frequency in `labeling`.

           Input `labeling` should be a dict keyed by node to labels.
@@ -55,13 +60,13 @@ def _most_frequent_labels(node, labeling, G):
        return {label for label, freq in freqs.items() if freq == max_freq}


-def _update_label(node, labeling, G):
+    def _update_label(self, node, labeling, G):
        """Updates the label of a node using the Prec-Max tie breaking algorithm

           The algorithm is explained in: 'Community Detection via Semi-Synchronous
           Label Propagation Algorithms' Cordasco and Gargano, 2011
        """
-    high_labels = _most_frequent_labels(node, labeling, G)
+        high_labels = self._most_frequent_labels(node, labeling, G)
        if len(high_labels) == 1:
            labeling[node] = high_labels.pop()
        elif len(high_labels) > 1:
@@ -71,135 +76,134 @@ def _update_label(node, labeling, G):
             labeling[node] = max(high_labels)


-warnings.filterwarnings('ignore')
+    warnings.filterwarnings('ignore')




-#G = nx.DiGraph(directed=True)
-G = nx.MultiDiGraph(day="Stackoverflow")
+    #G = nx.DiGraph(directed=True)
+    G = nx.MultiDiGraph(day="Stackoverflow")

-df_nodes = hg.clusterlabels
-destf_nodes = hg.destclusterlabel
-color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
+    df_nodes = hg.clusterlabels
+    destf_nodes = hg.destclusterlabel
+    color_map = {1: '#f09494', 2: '#eebcbc', 3: '#72bbd0', 4: '#91f0a1', 5: '#629fff', 6: '#bcc2f2',
                 7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa',
                 13: '#d6dcff', 14: '#d2f5f0'}
-i=0
+    i=0

-graphedge=[]
-weigth=[]
-sourcedestination = []
-source = []
-dest = []
-edge_width = []
-weight1 = []
+    graphedge = []
+    weigth = []
+    sourcedestination = []
+    source = []
+    dest = []
+    edge_width = []
+    weight1 = []

+    node_adjacencies = []

-""""drawing edges in graph"""
+    def drawedges(self):

-for drow in range(len(df_nodes)):
-    for row in range(len(destf_nodes[drow])):
-       G.add_edge(df_nodes[drow], destf_nodes[drow][row])
+        """drawing edges in graph"""

-for row in range(len(hg.labalvlues)):
-    for row1 in range(len(hg.labalvlues)):
-       weight1.append(G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
-       print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", weight1[row1])
+        for drow in range(len(self.df_nodes)):
+            for row in range(len(self.destf_nodes[drow])):
+               self.G.add_edge(self.df_nodes[drow], self.destf_nodes[drow][row])

-G.__setattr__('weight', weight1)
-
-   # print(float(row['Timestamp']))
-    #G.add_weighted_edges_from([(row['TransactionFrom'], row['TransactionTo'], i*j)])
+        for row in range(len(hg.labalvlues)):
+            for row1 in range(len(hg.labalvlues)):
+               self.weight1.append(self.G.number_of_edges(hg.labalvlues[row], hg.labalvlues[row1]))
+               print("The number of coccurance from node ", hg.labalvlues[row],"to node ", hg.labalvlues[row1], ": ", self.weight1[row1])

-#print dict_pos
+        self.G.__setattr__('weight', self.weight1)

-"""label_propagation_communities(G) """
+    def labeling(self):
+        """label_propagation_communities(G) """


-coloring = _color_network(G)
+        coloring = self._color_network(self.G)
            # Create a unique label for each node in the graph
-labeling = {v: k for k, v in enumerate(G)}
-print("lable value: ", labeling.values())
-while not _labeling_complete(labeling, G):
-# Update the labels of every node with the same color.
+        labeling = {v: k for k, v in enumerate(self.G)}
+        print("lable value: ", labeling.values())
+        while not self._labeling_complete(labeling, self.G):
+        # Update the labels of every node with the same color.
            print("lable value: ", labeling.values())
            for color, nodes in coloring.items():
              for n in nodes:
-        _update_label(n, labeling, G)
+                self._update_label(n, labeling, self.G)
            for label in set(labeling.values()):
              print("lable value: ", labeling.values())


-
-""" findig nodes' adjecencies"""
-node_adjacencies = []
-node_text = []
-for node, adjacencies in enumerate(G.adjacency()):
-    node_adjacencies.append(len(adjacencies[1]))
+    def findigneighbors(self):
+        """ findig nodes' adjecencies"""
+        node_text = []
+        for node, adjacencies in enumerate(self.G.adjacency()):
+            self.node_adjacencies.append(len(adjacencies[1]))
            node_text.append('# of connections: '+str(len(adjacencies[1])))

-G.color = node_adjacencies
-
+        self.G.color = self.node_adjacencies

-plt.figure(figsize=(25, 25))
-options = {
+    def result(self):
+        plt.figure(figsize=(25, 25))
+        options = {
            'with_labels': True,
            'font_weight': 'regular',
-}
-
-#colors = [color_map[G.node[node][1]] for node in G]
-#sizes = [G.node[node]['Timestamp'] * 10 for node in G]
-
+        }
+
+        # colors = [color_map[G.node[node][1]] for node in G]
+        # sizes = [G.node[node]['Timestamp'] * 10 for node in G]
+
+        d = nx.degree_centrality(self.G)
+        d_list = list(d.values())
+        print("node centrality: ", d_list)
+        print("node adjacencies: ", self.node_adjacencies)
+        for row in range(len(self.weigth)):
+            self.edge_width.append([])
+            for drow in range(len(self.weigth[row])):
+                self.edge_width[row].append(self.weigth[row][drow])
+        node_size = [v * 80 for v in d.values()]  # setting node size based on node centrality
+        edge_width = [row * 0.5 for row in self.weight1]
+
+        print("Nodes' Degree:  ", nx.degree(self.G))
+        print("Nodes' Betweeness  ", nx.edge_betweenness_centrality(self.G))
+        print("Nodes' Betweeness-centrality:  ", nx.betweenness_centrality(self.G))

-d = nx.degree_centrality(G)
-d_list= list(d.values())
-print ("node centrality: ",d_list)
-print("node adjacencies: ", node_adjacencies)
-for row in range(len(weigth)):
-    edge_width.append([])
-    for drow in range(len(weigth[row])):
-        edge_width[row].append(weigth[row][drow])
-node_size =  [v * 80 for v in d.values()] #setting node size based on node centrality
-edge_width = [row * 0.5 for row in weight1]
-
-print("Nodes' Degree:  ", nx.degree(G))
-print("Nodes' Betweeness  ", nx.edge_betweenness_centrality(G))
-print("Nodes' Betweeness-centrality:  ", nx.betweenness_centrality(G))
-
-
-
-"""
-Using the spring layout : 
- k controls the distance between the nodes and varies between 0 and 1
- iterations is the number of times simulated annealing is run
-default k=0.1 and iterations=50
-"""
+        """
+        Using the spring layout : 
+        - k controls the distance between the nodes and varies between 0 and 1
+        - iterations is the number of times simulated annealing is run
+        default k=0.1 and iterations=50
+        """

-labels2 = {}
+        labels2 = {}

-for idx, edge in enumerate(G.edges):
+        for idx, edge in enumerate(self.G.edges):
            labels2[edge] = "s"

-pos_nodes=nx.spring_layout(G, k=0.25, iterations=50)
-ax = plt.gca()
+        pos_nodes = nx.spring_layout(self.G, k=0.25, iterations=50)

-nx.draw(G, pos_nodes,node_color= node_adjacencies, node_size=node_size, width=2, arrowstyle='->',arrowsize=10, weight=weight1, edge_color='gray',**options)

-edge_labels = nx.get_edge_attributes(G, 'weight')
+        nx.draw(self.G, pos_nodes, node_color=self.node_adjacencies, node_size=node_size, width=2, arrowstyle='->',
+                arrowsize=10, weight=self.weight1, edge_color='gray', **options)

+        edge_labels = nx.get_edge_attributes(self.G, 'weight')

-pos_attrs = {}
-for node, coords in pos_nodes.items():
+        pos_attrs = {}
+        for node, coords in pos_nodes.items():
            pos_attrs[node] = (coords[0], coords[1] + 0.02)
-nx.draw_networkx_edge_labels(G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
-nx.draw_networkx_labels(G, pos_attrs, labels=labeling,font_size=10, font_color='red')
-
-
-
-ax = plt.gca()
-ax.collections[0].set_edgecolor("#555555")
-plt.show()
+        nx.draw_networkx_edge_labels(self.G, pos_nodes, edge_labels=edge_labels, font_size=10, font_color='red')
+        nx.draw_networkx_labels(self.G, pos_attrs, labels=self.labeling, font_size=10, font_color='red')

+        ax = plt.gca()
+        ax.collections[0].set_edgecolor("#555555")
+        plt.show()


+    def main(self):
+        self.drawedges()
+        self.labeling()
+        self.findigneighbors()
+        self.result()

+linking = SemanticLinking()
+linking.main()
\ No newline at end of file