Connected Cluster and Similarity functionalities

567d499c · Bogdan · d728e14e · 567d499c · 567d499c · 567d499c
Commit 567d499c authored Jul 06, 2020 by Bogdan
28 changed files
--- a/src/data-hub/role-stage-discovery-microservice/app/__init__.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/__init__.py
--- a/src/data-hub/role-stage-discovery-microservice/app/configs/swagger.yml
+++ b/src/data-hub/role-stage-discovery-microservice/app/configs/swagger.yml
@@ -9,7 +9,7 @@ consumes:
 produces:
  - "application/json"

-basePath: "/api"
+# basePath: "/api"

 paths:
  /debug:
@@ -173,11 +173,51 @@ paths:
        summary: "Insert locations from AGI, create clusters for starting time and location layers, create graphs for the location clusters"
        parameters: []
        responses:
-            204:
+            200:
                description: "Successful operation"
                
 #endregion

+################################################################################
+  /connectedClusters:
+    get:
+      operationId: "routes.connClusters.get_conn_clusters"
+      tags:
+          - "Connected"
+      summary: "Get connected Clusters data"
+      description: "Returns a dictionary of cluster. The clusters contain the associated connected clusters and connected nodes data."
+      responses:
+        200:
+          description: "Successful operation"
+          schema:
+            $ref: "#/definitions/ConnectedDict"
+            
+  /clusterSimilarity:
+    get:
+      operationId: "routes.similarity.get_similarity"
+      tags:
+          - "Similarity"
+      summary: "Get data of the similarity between clusters"
+      description: "Returns a dictionary where the key is a tuple of cluster_labels (i.e. [0,319]) and the value is the computed similarity between 2 clusters in the tuple, in regard to each layer in the input. \n Note: the tuple clusters have the same layer and the computed similarity is in regard to clusters from OTHER layers."
+      responses:
+        200:
+          description: "Successful operation"
+          schema:
+            $ref: "#/definitions/ClusterSimilarityArray"
+            
+  /clusterRunArray:
+    get:
+      operationId: "routes.connRun.get_connected_run"
+      tags:
+          - "RunId"
+      summary: "Get RunId"
+      description: "Returns the RunId and the associated datetime when a connection of clusters/simillarity of clusters was computed."
+      responses:
+        200:
+          description: "Successful operation"
+          schema:
+            $ref: "#/definitions/ClusterRunArray"
+

 definitions:
  Cluster:
@@ -263,4 +303,117 @@ definitions:
  TimeSliceCollection:
    type: array
    items:
-        $ref: "#/definitions/TimeSlice"
\ No newline at end of file
+        $ref: "#/definitions/TimeSlice"
+
+##################################################################
+
+
+
+  ConnectedDict:
+    type: array
+    items:
+      $ref: "#/definitions/ConnectedCluster"
+        
+
+  ConnectedCluster:
+    type: object
+    properties:
+      cluster_label:
+        type: string
+        example: "6"
+      cluster_layer:
+        type: string
+        example: "Price_Layer"
+      cluster_runId:
+        type: string
+        example: "5efdc04ac43add0aba567d76"
+      cluster_containedNodesDict:
+        $ref: "#/definitions/ConnectedNode"
+      cluster_connNodesDict:
+        $ref: "#/definitions/ConnectedNode"
+      cluster_connClustDict:
+        type: object
+        additionalProperties:
+          type: number
+        example:
+          "cluster_label": nrOfConnectedNodes
+          #"-1": 42
+          "0": 39
+          "6969": 1
+          
+      #not used, should be removed?
+      #cluster_connectionsNr
+      
+
+  ConnectedNode:
+    type: object
+    properties:
+      cluster_label:
+        type: string
+      node_layer:
+        type: string
+      uniqueID:
+        type: string
+    example:
+        "cluster_label": "2230"
+        "node_layer": "Destination_Layer"
+        "uniqueID": "a95075f5042b1b27060080156d87"
+      #not used, should be removed?
+      #finished_time
+      #latitude_Destination
+      #longitude_Destination
+      #travelID
+      #travelPrice
+      #userID        
+  
+  ClusterSimilarityArray:
+    type: array
+    items: 
+     $ref: "#/definitions/ClusterSimilarityDictionary"
+  
+  ClusterSimilarityDictionary:
+    properties:
+      clusterTuple:
+        type: array
+        items:
+          type: string
+        minItems: 2
+        maxItems: 2
+        example: [
+          #cluster_label1
+          0,
+          #cluster_label2
+          319
+          ]   
+      similarityValues:
+        type: object
+        additionalProperties:
+          type: number
+        example:
+          "layer_name": similarityValue
+          "StartingPoint_Layer": 39.0,
+          "StartingTime_Layer": 99.0101004948485
+      runId: 
+        type: string
+        example: "5efdc04ac43add0aba567d76"
+        
+  ClusterRunArray:
+    type: array
+    items:
+      $ref: "#/definitions/ClusterRun"
+      
+  ClusterRun:
+    type: object
+    properties:
+      _id:
+        type: string
+        example: "5efdc04ac43add0aba567d76"
+      Datetime:
+        type: string
+        example: "2020-07-02 14:19:51.651764"
+  
+# Added by API Auto Mocking Plugin
+host: virtserver.swaggerhub.com
+basePath: /NumeDeOrganizatie/Smart/1.0.0
+schemes:
+ - https
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/__init__.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/__init__.py
-from db.entities.location import Location
-from db.entities.popular_location import PopularLocation
-from db.entities.cluster import Cluster
-from db.entities.clusterset import ClusterSet
-from db.entities.user_cluster_graph import UserClusterGraph
-from db.entities.layer import Layer
-from db.entities.timeslice import TimeSlice
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_cluster.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_cluster.py
+class ClusterC:
+
+    def __init__(self,cluster_label,cluster_layer,cluster_runId,cluster_containedNodesDict,cluster_connNodesDict,cluster_connClustDict):
+        
+        self.cluster_label = cluster_label
+        self.cluster_layer = cluster_layer
+        self.cluster_runId = cluster_runId
+        self.cluster_containedNodesDict = cluster_containedNodesDict ###RENAME TO curClNodesDict    #Keys are frozensets(touples)  uniqueID and cluster  #
+        self.cluster_connNodesDict = cluster_connNodesDict #Keys are frozensets(touples)  uniqueID and cluster  #problem if you remove newNodes and oldNodes lists.. there may be duplicates
+        self.cluster_connClustDict = cluster_connClustDict #dictionary: layer -> (dict2: cluster_label -> nrOfConnections ) OR dictionary: cluster_label -> nrOfConnections
+        #cluster_connClustDict ------> look at both newNodes and oldNodes
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_layer.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_layer.py
+class LayerC:
+    def __init__(self,layer_name,cluster_Dict):
+        self.layer_name = layer_name
+        self.cluster_Dict = cluster_Dict
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_node.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_node.py
+
+class NodeC:
+    
+    def __init__(self, cluster_label, node_layer, finished_time, latitude_Destination, longitude_Destination, travelID, travelPrice, uniqueID, userID):
+        self.cluster_label = cluster_label
+        self.node_layer = node_layer
+        self.finished_time = finished_time
+        self.latitude_Destination = latitude_Destination
+        self.longitude_Destination = longitude_Destination
+        self.travelID = travelID
+        self.travelPrice = travelPrice
+        self.uniqueID = uniqueID        
+        self.userID = userID
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_run.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_run.py
+from datetime import datetime
+
+class ConnectedRun:
+    def __init__(self,run_id,timeOfExec):
+        self.run_id = run_id
+        self.timeOfExec = timeOfExec
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/repository.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/repository.py
@@ -3,7 +3,11 @@ import network_constants as netconst
 from database.MongoRepositoryBase import MongoRepositoryBase
 import json

+from db.entities.layer import *
+from db.entities.cluster import *
+from db.entities.timeslice import *
 from db.entities import *
+from processing.similarityFiles.miscFunctions import *
 from typing import List


@@ -19,6 +23,9 @@ class Repository(MongoRepositoryBase):
        self._layer_nodes_collection = 'layer_nodes'
        self._clusters_collection = 'clusters'
        self._time_slice_collection = 'time_slices'
+        self._connected_clusters_collection ='connected_clusters'
+        self._similarity_collection = 'similarity'
+        self._connected_run = 'connected_run'

 #region Layers
    def add_layer(self, layer: Layer):
@@ -78,4 +85,72 @@ class Repository(MongoRepositoryBase):
    def remove_all_time_slices(self):
        super().drop_collection(self._time_slice_collection)

+#endregion
+
+#region clusterConnected
+    def add_connected_clusters(self, clusterDictArray):
+        ''' Add Connected Clusters Data to DB '''
+        result = super().insert_many(self._connected_clusters_collection, clusterDictArray)
+        return result
+        
+    def get_connected_clusters(self, run_id=None):#, layer_name: str):
+        ''' Get Connected Clusters Data from DB '''
+        if (run_id == None):
+            entries = super().get_entries(self._connected_clusters_collection) #, projection={'Price_Layer': 1})
+        else:
+            entries = super().get_entries(self._similarity_collection, selection={'cluster_runId' : run_id})
+        
+        output = []
+        for ent in entries:
+            output.append(ent)
+        return output
+        #return [Cluster(cluster_dict=e, from_db=True) for e in entries]
+
+#endregion
+
+#region similarity
+    def add_similarity(self, inputDict):
+        ''' Add Similarity Data to DB '''
+        #checkIfConnClustDictIsSerializable(outputJSON)      
+        result = super().insert_many(self._similarity_collection, inputDict)
+        #print(str(result))
+        #super().insert_entry(self._connected_clusters_collection, outputJSON)
+        return result
+    
+    #TODO
+    def get_similarity(self, run_id=None):
+        ''' Get Similarity Data from DB '''
+        if (run_id == None):
+            entries = super().get_entries(self._similarity_collection, projection={'_id': 0})
+        else:
+            entries = super().get_entries(self._similarity_collection, selection={'runId' : run_id})
+            
+        output = []
+        for e in entries:
+            output.append(e)
+        return output
+
+
+#endregion
+
+#region connected_run
+
+    def add_connected_run(self, conRunTimestamp):
+        ''' Add Connected Run Data to DB '''
+        result = super().insert_entry(self._connected_run, conRunTimestamp)
+        return result
+
+    def get_connected_run(self, run_id= None):
+        ''' Get Connected Run Data from DB '''
+        if (run_id == None):
+            entries = super().get_entries(self._connected_run)
+        else:
+            entries = super().get_entries(self._connected_run, selection={'_id' : run_id})
+
+        output = []
+        for e in entries:
+            output.append(e)
+        return output
+
+
 #endregion
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/__ init __.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/__ init __.py
+# __init__.py
+from similarityFiles.calculateSimilarity import *
+from similarityFiles.calculateWeights import *
+from similarityFiles.populateWithNewNodes import *
+from similarityFiles.miscFunctions import *
+from similarityFiles.test import *
+
+from db.entities.connected_cluster import *
+from db.entities.connected_layer import *
+from db.entities.connected_node import *
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateSimilarity.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateSimilarity.py
+#This file contains the methods for caclulating the similarity between clusters
+import math
+
+from db.entities.connected_node import NodeC
+from db.entities.connected_cluster import ClusterC
+from db.entities.connected_layer import LayerC
+from typing import Dict
+
+def minMaxFunction(iIndex,jIndex,clusterList) -> Dict[str,int]:
+
+    ''' minMax Metric for calculating similarity between 2 clusters.
+        Clusters must be from the same layer, and will be compared to clusters from different layers (cluster_layer attribute)
+
+    :param string iIndex: The index of the first Cluster in the "clusterList" 
+    :param string jIndex: The index of the second Cluster in the "clusterList"  
+    :param List[Cluster] clusterList: A list of clusters to which the 2 clusters will be compared to
+
+
+    :returns: Dictionary with layername as KEY, and the computed similarity value between the 2 clusters in regard to the layer as the VALUE of the Dict.
+    :rtype: Dict{str,int}
+    '''
+
+    
+    iCluster= clusterList[iIndex]
+    jCluster= clusterList[jIndex]
+
+    outputDict = dict()
+
+    #calculate th
+    for curCluster in clusterList:      #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
+
+        curLayer = curCluster.cluster_layer
+        curLabel = curCluster.cluster_label
+        if(( curLayer != iCluster.cluster_layer) 
+        and ( curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label)) 
+        and ( curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label))):
+
+            # min part
+            curMin = min(curCluster.cluster_connClustDict[iCluster.cluster_label],curCluster.cluster_connClustDict[jCluster.cluster_label])
+            if(outputDict.__contains__(curLayer) == False):
+                outputDict[curLayer]= curMin
+            else: # max part
+                if(outputDict[curLayer]<curMin):
+                    outputDict[curLayer] = curMin
+
+
+    return outputDict
+
+
+def calcEuclideanDist(iIndex,jIndex,clusterList) -> Dict[str,float]:
+    ''' Euclidean Distance Metric for calculating similarity between 2 clusters.
+        Clusters must be from the same layer, and will be compared to clusters from different layers (cluster_layer attribute)
+
+    :param string iIndex: The index of the first Cluster in the "clusterList" 
+    :param string jIndex: The index of the second Cluster in the "clusterList"  
+    :param List[Cluster] clusterList: A list of clusters to which the 2 clusters will be compared to
+
+
+    :returns: Dictionary with layername as KEY, and the computed similarity value between the 2 clusters in regard to the layer as the VALUE of the Dict.
+    :rtype: Dict{str,float}
+    '''
+
+
+
+    iCluster= clusterList[iIndex]
+    jCluster= clusterList[jIndex]
+
+    outputDict = dict()
+
+    #calculate the distance  //paralelizable
+    for curCluster in clusterList:      #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
+        
+        curLayer = curCluster.cluster_layer
+        curLabel = curCluster.cluster_label #debugOnly
+
+        #considering only clusters from other layers for distance calc
+        if( curLayer != iCluster.cluster_layer):
+            ######BUUUG, WHAT IF THEY DON'T SHARE A CONNECTION?
+            ###### if in a layer both cluster don't have a connection --> distance of 0. Identical in regard to that layer. correct or false?
+            iVal = 0
+            jVal = 0
+            connectedClusters = False
+            if(curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label)):
+                iVal = curCluster.cluster_connClustDict[iCluster.cluster_label]
+                connectedClusters = True
+            if(curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label)):
+                jVal = curCluster.cluster_connClustDict[jCluster.cluster_label]
+                connectedClusters = True
+
+            
+            if (connectedClusters == False):
+                #clusters aren't connected => assign the max int value if there are no prior elements in list
+                if(outputDict.__contains__(curLayer) == False):
+                    outputDict[curLayer]= 2147483647 #notConnected
+            else:
+                #clusters ARE connected => add the squares part of the euclid distance to the value of the similarity
+                if(outputDict.__contains__(curLayer) == False):
+                    #first element
+                    outputDict[curLayer]= (iVal - jVal)**2
+                else: 
+                    #further elements
+                    outputDict[curLayer]+= (iVal - jVal)**2
+
+    for layer in outputDict:
+        outputDict[layer] = math.sqrt(outputDict[layer])
+
+
+    return outputDict
+
+
+#                                      frozenset(tuple)                dict
+#                               [(iClusterLabel,jClusterLabel), (layer,similarity)]
+#def calculateSimilarity(inputLayerDict) -> Dict[frozenset((str,str)),Dict[str,int]]:
+def calculateSimilarity(inputLayerDict):
+
+
+    ''' Calculates the similarity between clusters contained in the "inputLayerDict". Similarity is calculated for each combination of 2 clusters from the SAME layer.
+
+    :param Dict{layername: Layer} inputLayerDict: Contains the associated Layer and Clusters objects. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
+
+
+    :returns: Dict{tuple(cluster_label1, cluster_label2) : Dict{layername, similarityValue}}. Returns a Dictionary with a tuple of 2 clusters as KEY, and a Dictionary with the computed similarity of the clusters in regard to each layer as VALUE
+    :rtype: Dict{(string,string): Dict{str:float}}
+    '''     
+
+    print("Entered calculateSimilarity")
+    similarityDict = dict() #the key is a frozenset(Tuple) (clusterLabel1,clusterLabel2)
+    
+    clusterList = list()
+    
+
+    for curLayer in inputLayerDict.values():        
+        for curCluster in curLayer.cluster_Dict.values():
+            clusterList.append(curCluster)
+    #print(" Nr. of clusters: "+str(len(clusterList)))
+    
+    #go thru every combination of 2 clusters and calculate the similarity between them in regard to each layer
+    i=0
+    while( i < len(clusterList) ):
+        iCluster = clusterList[i]
+        j=i+1
+        while ( j<len(clusterList)):
+            jCluster = clusterList[j]
+            if (iCluster.cluster_layer == jCluster.cluster_layer): #calculate similarity only from the same layer
+                tuplekey = (clusterList[i].cluster_label,clusterList[j].cluster_label)
+                key = frozenset(tuplekey)
+                #### EUCLIDEAN DISTANCE /minMax
+                similarityDict[key]=calcEuclideanDist(i,j,clusterList)
+
+                #print("#### similarityDict  i:"+str(i)+" j:"+str(j))
+                #print("#### "+str(similarityDict))
+            else:
+                j = len(clusterList)
+
+            j+=1
+        i+=1
+
+
+    print("Finished calculateSimilarity")
+    return similarityDict
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateWeights.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateWeights.py
+from db.entities.connected_node import NodeC
+from db.entities.connected_cluster import ClusterC
+from db.entities.connected_layer import LayerC
+from typing import Dict
+
+def sortFunctByNode(node):
+    try :
+        return node.uniqueID
+    except:
+        print(node.cluster_label)
+        print(node.node_layer)
+        print(node.uniqueID)
+
+def calculateWeights(inputLayerDict) -> Dict[str,LayerC]: 
+
+    ''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
+
+    :param Dict{string: Layer} inputLayerDict: Contains the associated Layer and Clusters objects. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
+
+
+    :returns: Dict{layername: Layer}. Returns the inputLayerDict with the added connections in the attributes cluster_connClustDict and cluster_connNodesDict
+    
+    :rtype: Dict{string: Layer}
+    '''    
+
+
+    #the input dictates which cluster is updated; 
+    # #however it will update all the included clusters
+    # if i only want to update a single cluster without considering the rest i should create a new method?
+    #
+    #
+    #
+    #
+    print("Entered calculateWeights")
+    nodeList = []
+    for curLayer in inputLayerDict.values():
+        for curCluster in curLayer.cluster_Dict.values():
+            for curNode in curCluster.cluster_containedNodesDict.values():
+                nodeList.append(curNode)
+                #if curNode != None:
+                    #if(curNode.uniqueID!= None):
+                        
+
+    #print("  Nr. of nodes: " + str(len(nodeList)))
+
+    nodeList.sort(key=sortFunctByNode)
+
+    i=0
+    while( i < len(nodeList) ):
+        iNode = nodeList[i]
+        j=i+1
+        while ( j<len(nodeList)):
+            jNode = nodeList[j]
+            #if there is a connection
+            #print("\n ### \n"+iNode.uniqueID +" "+ iNode.node_layer +"\n"+ jNode.uniqueID +" "+ jNode.node_layer )
+
+            if (iNode.node_layer != jNode.node_layer) and (iNode.uniqueID == jNode.uniqueID):
+                iOldTuple = (iNode.uniqueID,iNode.cluster_label)
+                jOldTuple= (jNode.uniqueID,jNode.cluster_label)
+                iOldKey = frozenset(iOldTuple)
+                jOldKey = frozenset(jOldTuple)
+                #Check if old node dicts has this node: if not add to ConnDictionary and to OldNodesDict
+                #              Layer                .             Cluster             .    OldNodesDict    .    Does not contain the OTHER node
+                
+                if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connNodesDict.__contains__(jOldKey) == False):
+                    #add node j at cluster i
+                    if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict.__contains__(jNode.cluster_label)):
+                        inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict[jNode.cluster_label]+=1
+                    else:
+                        inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict[jNode.cluster_label]=1
+                    #add node to old nodes
+                    inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connNodesDict[jOldKey]=jNode
+                    
+
+                    
+                
+                if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connNodesDict.__contains__(iOldKey) == False):
+                    #add node i at cluster j
+                    if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict.__contains__(iNode.cluster_label)):
+                        inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict[iNode.cluster_label]+=1
+                    else:
+                        inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict[iNode.cluster_label]=1
+                    #add node to old nodes
+                    inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connNodesDict[iOldKey]=iNode
+
+            j+=1   
+        i+=1
+
+    #deleting cluster_containedNodesDicts/// No longer needed
+    #for curLayer in inputLayerDict.values():
+     #   for curCluster in curLayer.cluster_Dict.values():
+     #       inputLayerDict[curCluster.cluster_layer].cluster_Dict[curCluster.cluster_label].cluster_containedNodesDict = dict()
+
+    print("Finished calculateWeights")
+
+    #store weights in database?
+    
+
+    return inputLayerDict
+
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataInput.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataInput.py
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataOutput.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataOutput.py
+#Misc util functions
+import json
+import requests
+import datetime
+from routes.connClusters import add_conn_clusters
+from routes.similarity import add_similarity
+from routes.connRun import add_connected_run
+from processing.similarityFiles.miscFunctions import *
+
+def outputFileLayerFunction(layerDict,limitNrNodes,limitNrCluster,runId):
+    ''' Writes the layerDict data to a JSON file.
+
+    :param Dict{string: Layer} layerDict: Object which contains Data about the Layers, Clusters and Nodes
+
+    :param int limitNrNodes: How many nodes are contained in layerDict. Used in creating the name of the File
+
+    :param int limitNrCluster: How many clusters are contained in layerDict. Used in creating the name of the File
+
+    '''  
+
+    layerJSON = convertLayerDictToJSON(layerDict,runId)
+    outputJSON = json.dumps(layerJSON, default=lambda o: o.__dict__, indent=4)
+            
+    try:
+        with open('resultLayerDictN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.json', 'w') as outfile:
+            outfile.write(outputJSON)
+    except ValueError:
+        print("Error occured when writing the resultLayerDict file")
+
+
+
+def outputFileSimilFunction(similarityDict,limitNrNodes,limitNrCluster,runId):
+
+    ''' Writes the similarityDict data to a JSON file.
+
+    :param Dict{(cluster_label1, cluster_label2): Dict{layername: value}} similarityDict: Object which contains Data about the similarity between the clusters, Clusters and Nodes
+
+    :param int limitNrNodes: How many nodes are contained in layerDict. Used in creating the name of the File
+
+    :param int limitNrCluster: How many clusters are contained in layerDict. Used in creating the name of the File
+
+    '''  
+    
+    similJSON = convertSimilarityDictToJSON(similarityDict,runId)
+    outputJSON = json.dumps(similJSON, default=lambda o: o.__dict__, indent=4)
+
+    try: 
+        with open('resultSimilarityDictN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.json', 'w') as outfile2:
+            outfile2.write(outputJSON)
+    except ValueError:
+        print("Error occured when writing the resultSimilarityDict file")
+
+def outputFileTimeFunction(timelist,limitNrNodes,limitNrCluster,runId):
+    ''' Writes execution time to a file.
+
+    :param List[datetime] timelist: Contains timestamps about the execution time of functions and the program.
+
+    :param int limitNrNodes: How many nodes are considered. Used in creating the name of the File
+
+    :param int limitNrCluster: How many clusters are considered. Used in creating the name of the File
+
+    '''  
+
+    stringToWrite = "StartTime: "+ str(timelist[0])
+    stringToWrite += "\nFinishTime: " + str((timelist[3])) +"\n"
+    stringToWrite += "\nPopulateWithNewNodes: " + str((timelist[1]-timelist[0]).total_seconds())
+    stringToWrite += "\nCalculateWeights: " + str((timelist[2]-timelist[1]).total_seconds())
+    stringToWrite += "\nCalculateSimilarity: " + str((timelist[3]-timelist[2]).total_seconds())
+    stringToWrite += "\nTotalTime: " + str((timelist[3]-timelist[0]).total_seconds())
+    stringToWrite += "\nRunId: " +str(runId)
+    
+
+    #aux = str(timelist[0]) + " :PopulateWithNewNodes\n"+ str(timelist[1]) +  " :CalculateWeights\n" + str(timelist[2]) + " :CalculateSimilarity\n"+   str(timelist[3]) + " :Finish" 
+    try:
+        with open('resultTimeExecN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.txt', 'w') as outfile3:
+            outfile3.write(stringToWrite)
+    except ValueError:
+        print("Error occured when writing the resultTimeExec file")
+
+
+def outputMongoConnClustDict(inputDict,runId):
+
+    ''' Stores connected_clusters in the database.
+
+    :param Dict() inputDict: Contains the data to insert
+
+    :param string runId: Id of the Run 
+
+    '''  
+
+    #inputDict["Timestamp"] = str(datetime.datetime.now())
+    
+    add_conn_clusters(inputDict,runId)
+
+def outputMongoSimilarity(inputDict,runId):
+    ''' Stores cluster_similarity in the database.
+
+    :param Dict() inputDict: Contains the data to insert
+
+    :param string runId: Id of the Run 
+
+    ''' 
+    add_similarity(inputDict,runId)
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/miscFunctions.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/miscFunctions.py
+#Misc util functions
+import json
+import requests
+import datetime
+
+
+
+
+def currentTime():
+    ts = datetime.datetime.now()
+    print(ts)
+    return ts
+
+def totalNumberOfNodes(inputLayerDict):
+    ''' Computes total number of nodes in the inputLayerDict.
+
+    :param Dict{string: Layer} inputLayerDict: Layer in which the Clusters in which the Nodes are stored
+
+    :returns: Returns nr of Nodes
+    
+    :rtype: int
+    '''  
+    nodeCount = 0
+    for curLayer in inputLayerDict.values():
+        for curCluster in curLayer.cluster_Dict.values():
+                nodeCount+=len(curCluster.cluster_containedNodesDict.values())
+                
+    return nodeCount
+
+def totalNumberOfClusters(inputLayerDict):
+    ''' Computes total number of clusters in the inputLayerDict.
+
+    :param Dict{string: Layer} inputLayerDict: Layer in which the Clusters are stored
+
+    :returns: Returns nr of Clusters
+
+    :rtype: int
+    '''  
+    clustCount = 0
+    for curLayer in inputLayerDict.values():
+            clustCount+= len(curLayer.cluster_Dict.values())
+                
+    return clustCount
+
+def convertLayerDictToJSON(layerDict, runId):
+    ''' Converts a Layer object to JSON format.
+
+    :param Dict{string: Layer} layerDict: Object which contains Data about the Layers, Clusters and Nodes
+
+    :rtype: Dict{string: [Cluster1, Cluster2, ...]}
+    '''
+
+    '''
+    {
+        layer1 : {
+            [
+                {
+                    cluster_label1 : 0123400,
+                    cluster_layer: layer1,
+                    "cluster_connClustDict": {
+                        "0123456": 98
+                        "1234567": 12
+                    },
+
+                    cluster_containedNodesDict : {
+                        [
+                            abcd,
+                            sgre,
+                            dgre,
+                            ddhr,
+                            yyrh
+                        ]
+                    }
+                },
+                {
+
+                },
+                {
+                    
+                }            
+            ]
+        },
+
+        layer2 : {
+
+        }
+    }
+    '''
+    outputJSON = []
+
+    for curLayer in layerDict.values():        
+        for curCluster in curLayer.cluster_Dict.values():
+            outputJSON.append({
+                "cluster_label" : curCluster.cluster_label,
+                "cluster_layer" : curCluster.cluster_layer, 
+                "cluster_runId" : runId,
+                "cluster_connClustDict" : changeDictKeysToString(curCluster.cluster_connClustDict),
+                "cluster_connNodesDict" : getFrozensetFromConnNodesDict(curCluster.cluster_connNodesDict),
+                "cluster_containedNodesDict" : getNodeIdListFromContainedNodesDict(curCluster.cluster_containedNodesDict),
+            })
+
+    #outputJSON = json.dumps(outputJSON, default=lambda o: o.__dict__, indent=4)
+
+    return outputJSON
+
+def changeDictKeysToString(inputDict):
+
+    keys_values = inputDict.items()
+    outputDict = { str(key): value for key,value in keys_values}
+    return outputDict
+
+def getNodeIdListFromContainedNodesDict(inputDict):
+    output = []
+    for curNode in inputDict.values():
+        output.append(curNode.uniqueID)
+    return output
+
+def getFrozensetFromConnNodesDict(inputDict):
+    output = []
+    
+    for curNode in inputDict.values(): 
+        auxDict = {}
+        auxDict["node_id"]= curNode.uniqueID
+        auxDict["node_cluster"] = curNode.cluster_label
+        output.append(auxDict)
+    return output
+
+
+def convertSimilarityDictToJSON(inputDict,runId):
+
+    similList = []
+    for compositeKey in inputDict:
+        frozensetString =list()
+        #key is a tuple of cluster_labels
+        for key in compositeKey:
+            frozensetString.append(key)
+
+        similList.append({
+            "clusterTuple" : frozensetString,
+            "similarityValues" : inputDict[compositeKey],
+            "runId": runId
+        })
+    similToJSON = similList
+    #outputJSON = json.dumps(similToJSON, default=lambda o: o.__dict__, indent=4)
+
+    return similToJSON
+
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/test.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/test.py
+from db.entities.connected_node import NodeC
+from db.entities.connected_cluster import ClusterC
+from db.entities.connected_layer import LayerC
+from typing import Dict
+#from db.repository import Repository
+import json
+import requests
+from routes.clustersets import get_by_nametest.py
+
+from db import repository
+
--- a/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C121.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C121.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C60.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C60.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C121.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C121.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C60.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C60.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C121.txt
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C121.txt
+StartTime: 2020-07-02 12:05:47.067975
+FinishTime: 2020-07-02 12:05:54.561853
+
+PopulateWithNewNodes: 2.495718
+CalculateWeights: 4.590413
+CalculateSimilarity: 0.407747
+TotalTime: 7.493878
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C60.txt
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C60.txt
+StartTime: 2020-07-06 10:43:32.240013
+FinishTime: 2020-07-06 10:43:39.110333
+
+PopulateWithNewNodes: 2.399582
+CalculateWeights: 4.422768
+CalculateSimilarity: 0.04797
+TotalTime: 6.87032
+RunId: 5f02e43b53a73a48d0eaaed5
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/clustersets.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/clustersets.py
 from flask import request, Response
 from db.repository import Repository
-from db.entities import ClusterSet
+from db.entities import clusterset

 repo = Repository()


--- a/src/data-hub/role-stage-discovery-microservice/app/routes/connClusters.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/connClusters.py
+from flask import request, Response
+from db.repository import Repository
+from routes.connRun import add_connected_run
+from processing.similarityFiles.miscFunctions import *
+
+repo = Repository()
+
+
+def add_conn_clusters(inputDict,runId):
+    ''' Stores connected_clusters in the database.
+
+    :param Dict() inputDict: Contains the data to insert
+
+    :param string runId: Id of the Run 
+
+    ''' 
+    
+    outputJSON = convertLayerDictToJSON(inputDict,runId)
+    repo.add_connected_clusters(outputJSON)
+
+
+def get_conn_clusters():
+    ''' Gets connected_clusters from the database.
+
+        :returns: Returns similarity objects from the DB
+        :rtype: Dict
+    '''  
+    result = repo.get_connected_clusters()
+    if result is None or len(result) == 0:        
+        print("MongoDb Get Error: Response 404")
+        return Response(status=404)
+    else:
+
+        return result
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/connRun.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/connRun.py
+from flask import request, Response
+from db.repository import Repository
+from db.entities import connected_run
+import datetime
+#from db.entities import clusterset #REMOVE?
+
+repo = Repository()
+
+
+def add_connected_run():
+    '''
+        Inserts Run with current Time into the DB
+        
+        :returns: Returns the _id of the connected_run entry in the DB
+    
+        :rtype: string
+    '''    
+
+    currentTime = datetime.datetime.now()
+    runDict = {"Datetime" : str(currentTime)}
+    inserted_result = repo.add_connected_run(runDict)
+    return str(inserted_result.inserted_id)
+
+
+def get_connected_run(): ########TODO#################
+    ''' ##TODO## Gets Run from the database.
+
+        :returns: Returns Run objects from the DB
+        :rtype: Dict{_id,datetime}
+    '''  
+    """
+    result = repo.get_connected_clusters()
+    if result is None or result.retrieved == 0:        
+        print("#### Response 404")
+        return Response(status=404)
+    else:
+        return result
+    conRun = ConnectedRun(result.sdfsdf)
+    """
+
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/similarity.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/similarity.py
+from flask import request, Response
+from db.repository import Repository
+from processing.similarityFiles.miscFunctions import convertSimilarityDictToJSON
+#from db.entities import clusterset #REMOVE?
+
+repo = Repository()
+
+
+def add_similarity(inputDict,runId):
+    ''' Stores cluster_similarity in the database.
+
+    :param Dict() inputDict: Contains the data to insert
+
+    :param string runId: Id of the Run 
+
+    ''' 
+
+    outputJSON = convertSimilarityDictToJSON(inputDict,runId)
+    repo.add_similarity(outputJSON)
+
+def get_similarity(): 
+    ''' Gets cluster_similarity from the database.
+
+        :returns: Returns similarity objects from the DB
+        :rtype: Dict
+    '''  
+    result = repo.get_similarity()
+    if result is None or len(result) == 0:        
+        print("MongoDb Get Error: Response 404")
+        return Response(status=404)
+    else:
+        return result
--- a/src/data-hub/role-stage-discovery-microservice/app/similarityMain.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/similarityMain.py
+"""
+for each cluster in the layer
+    for each other cluster from all the other layers
+        find the number of connexions
+        save them into a dictionary (ClusterID(from other layer) -> Nr of connections)
+    save all the dictionaries in a map? ( ClusterID1 -> dictionary1, ClusterID2 -> dicitonary2 )
+have a map per layer? (Nr of maps = nr of layers)
+Each cluster has a dictionary of connCluster-> nrConections
+Each layer has a dictionary of clusters -> dictionaries of nodes/connections
+"""
+import os
+import sys
+import math
+import datetime
+from typing import Dict
+
+##################AUX
+
+modules_path = '../../../modules/'
+if os.path.exists(modules_path):
+    sys.path.insert(1, modules_path)
+
+#### TO BE DELETED #### ^
+
+from db.entities.connected_node import NodeC
+from db.entities.connected_cluster import ClusterC
+from db.entities.connected_layer import LayerC
+from processing.similarityFiles.dataInput import *
+from processing.similarityFiles.calculateWeights import *
+from processing.similarityFiles.calculateSimilarity import *
+from processing.similarityFiles.miscFunctions import *
+from processing.similarityFiles.dataOutput import *
+from routes.connRun import connected_run
+
+
+def main():   
+    print("\nEntered Main")
+
+    timelist = []
+    timelist.append(currentTime())#starting time
+    
+    """
+    Current Layers
+        Price_Layer
+        FinishedTime_Layer
+        Destination_Layer
+        StartingPoint_Layer
+        Reputation_Layer
+        StartingTime_Layer
+        User_Layer
+    """
+    layerNameList = ["Price_Layer","FinishedTime_Layer","Destination_Layer"] #Get it from somewhere else?
+    limitNrCluster = 20 #per Layer
+    limitNrNodes = 1000 #per Layer
+
+    layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes)
+    #layerDict = getClusterDataFromSwagger(limitNrCluster,limitNrNodes) #for Swagger, Change URLs inside the function for different input Data
+
+    totalNodes = totalNumberOfNodes(layerDict)
+    print("Nr. of nodes: " +str(totalNodes))
+    totalClusters = totalNumberOfClusters(layerDict)
+    print("Nr. of clusters: " + str(totalClusters))
+    timelist.append(currentTime())
+
+    
+    #calculates the weights between the clusters (weight == number of connections) #return is displayed in outputLayerFunction    
+    layerDict = calculateWeights(layerDict)
+    timelist.append(currentTime())
+
+    
+    #calculates the similarity between the clusters #returns dictionary[ tuple(cluster_label1,cluster_label2), 
+    #                                                                      listOfSimilarity(layer1,layer2,layer3) ]    
+    similarityDict = calculateSimilarity(layerDict)
+    timelist.append(currentTime()) #Finishing time    
+
+    
+    #Write to files
+    runId = add_connected_run()
+    
+    print("Outputing data")
+    outputFileLayerFunction(layerDict,totalNodes,totalClusters,runId)
+    outputFileSimilFunction(similarityDict,totalNodes,totalClusters,runId)
+    outputFileTimeFunction(timelist,totalNodes,totalClusters,runId)
+
+    #Output to DB
+    outputMongoConnClustDict(layerDict,runId)
+    outputMongoSimilarity(similarityDict,runId)
+
+    #Currently not used, developed for possible future uses
+    connClustersFromMongo = getConnClusterDataFromMongo()    
+    similarityArrFromMongo = getSimilarityDataFromMOngo()
+    print("FINISHED")
+
+    return
+
+
+##########START##########
+main()
+#########FINISH##########
--- a/src/modules/database/MongoRepositoryBase.py
+++ b/src/modules/database/MongoRepositoryBase.py
@@ -17,7 +17,7 @@ class MongoRepositoryBase:

    def insert_entry(self, collection_name, content: dict):
        collection = self._database[collection_name]
-        collection.insert_one(content)
+        return collection.insert_one(content)

    def insert_many(self, collection_name, content: list):
        collection = self._database[collection_name]

--- a/src/modules/network_constants.py
+++ b/src/modules/network_constants.py
@@ -18,6 +18,6 @@ SEMANTIC_LINKING_DB_PORT = 27017

 ## Role Stage Discovery
 ROLESTAGE_DISCOVERY_HOSTNAME = 'role-stage-discovery'
-ROLESTAGE_DISCOVERY_REST_PORT = 80
-ROLESTAGE_DISCOVERY_DB_HOSTNAME = f'{ROLESTAGE_DISCOVERY_HOSTNAME}-db'
-ROLESTAGE_DISCOVERY_DB_PORT = 27017
\ No newline at end of file
+ROLESTAGE_DISCOVERY_REST_PORT = 30103
+ROLESTAGE_DISCOVERY_DB_HOSTNAME = f'articonf1.itec.aau.at'
+ROLESTAGE_DISCOVERY_DB_PORT = 30104
\ No newline at end of file