Connected Cluster and Similarity functionalities

567d499c · Bogdan · d728e14e · 567d499c · 567d499c · 567d499c
Commit 567d499c authored Jul 06, 2020 by Bogdan
28 changed files
--- a/src/data-hub/role-stage-discovery-microservice/app/__init__.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/__init__.py
--- a/src/data-hub/role-stage-discovery-microservice/app/configs/swagger.yml
+++ b/src/data-hub/role-stage-discovery-microservice/app/configs/swagger.yml
@@ -9,7 +9,7 @@ consumes:
 produces:
  - "application/json"
-basePath: "/api"
+# basePath: "/api"
 paths:
  /debug:
@@ -173,11 +173,51 @@ paths:
        summary: "Insert locations from AGI, create clusters for starting time and location layers, create graphs for the location clusters"
        parameters: []
        responses:
-            204:
+            200:
                description: "Successful operation"
 #endregion
+################################################################################
+  /connectedClusters:
+    get:
+      operationId: "routes.connClusters.get_conn_clusters"
+      tags:
+          - "Connected"
+      summary: "Get connected Clusters data"
+      description: "Returns a dictionary of cluster. The clusters contain the associated connected clusters and connected nodes data."
+      responses:
+        200:
+          description: "Successful operation"
+          schema:
+            $ref: "#/definitions/ConnectedDict"
+  /clusterSimilarity:
+    get:
+      operationId: "routes.similarity.get_similarity"
+      tags:
+          - "Similarity"
+      summary: "Get data of the similarity between clusters"
+      description: "Returns a dictionary where the key is a tuple of cluster_labels (i.e. [0,319]) and the value is the computed similarity between 2 clusters in the tuple, in regard to each layer in the input. \n Note: the tuple clusters have the same layer and the computed similarity is in regard to clusters from OTHER layers."
+      responses:
+        200:
+          description: "Successful operation"
+          schema:
+            $ref: "#/definitions/ClusterSimilarityArray"
+  /clusterRunArray:
+    get:
+      operationId: "routes.connRun.get_connected_run"
+      tags:
+          - "RunId"
+      summary: "Get RunId"
+      description: "Returns the RunId and the associated datetime when a connection of clusters/simillarity of clusters was computed."
+      responses:
+        200:
+          description: "Successful operation"
+          schema:
+            $ref: "#/definitions/ClusterRunArray"
 definitions:
  Cluster:
@@ -264,3 +304,116 @@ definitions:
    type: array
    items:
        $ref: "#/definitions/TimeSlice"
+##################################################################
+  ConnectedDict:
+    type: array
+    items:
+      $ref: "#/definitions/ConnectedCluster"
+  ConnectedCluster:
+    type: object
+    properties:
+      cluster_label:
+        type: string
+        example: "6"
+      cluster_layer:
+        type: string
+        example: "Price_Layer"
+      cluster_runId:
+        type: string
+        example: "5efdc04ac43add0aba567d76"
+      cluster_containedNodesDict:
+        $ref: "#/definitions/ConnectedNode"
+      cluster_connNodesDict:
+        $ref: "#/definitions/ConnectedNode"
+      cluster_connClustDict:
+        type: object
+        additionalProperties:
+          type: number
+        example:
+          "cluster_label": nrOfConnectedNodes
+          #"-1": 42
+          "0": 39
+          "6969": 1
+      #not used, should be removed?
+      #cluster_connectionsNr
+  ConnectedNode:
+    type: object
+    properties:
+      cluster_label:
+        type: string
+      node_layer:
+        type: string
+      uniqueID:
+        type: string
+    example:
+        "cluster_label": "2230"
+        "node_layer": "Destination_Layer"
+        "uniqueID": "a95075f5042b1b27060080156d87"
+      #not used, should be removed?
+      #finished_time
+      #latitude_Destination
+      #longitude_Destination
+      #travelID
+      #travelPrice
+      #userID        
+  ClusterSimilarityArray:
+    type: array
+    items: 
+     $ref: "#/definitions/ClusterSimilarityDictionary"
+  ClusterSimilarityDictionary:
+    properties:
+      clusterTuple:
+        type: array
+        items:
+          type: string
+        minItems: 2
+        maxItems: 2
+        example: [
+          #cluster_label1
+          0,
+          #cluster_label2
+          319
+          ]   
+      similarityValues:
+        type: object
+        additionalProperties:
+          type: number
+        example:
+          "layer_name": similarityValue
+          "StartingPoint_Layer": 39.0,
+          "StartingTime_Layer": 99.0101004948485
+      runId: 
+        type: string
+        example: "5efdc04ac43add0aba567d76"
+  ClusterRunArray:
+    type: array
+    items:
+      $ref: "#/definitions/ClusterRun"
+  ClusterRun:
+    type: object
+    properties:
+      _id:
+        type: string
+        example: "5efdc04ac43add0aba567d76"
+      Datetime:
+        type: string
+        example: "2020-07-02 14:19:51.651764"
+# Added by API Auto Mocking Plugin
+host: virtserver.swaggerhub.com
+basePath: /NumeDeOrganizatie/Smart/1.0.0
+schemes:
+ - https
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/__init__.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/__init__.py
-from db.entities.location import Location
-from db.entities.popular_location import PopularLocation
-from db.entities.cluster import Cluster
-from db.entities.clusterset import ClusterSet
-from db.entities.user_cluster_graph import UserClusterGraph
-from db.entities.layer import Layer
-from db.entities.timeslice import TimeSlice
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_cluster.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_cluster.py
+class ClusterC:
+    def __init__(self,cluster_label,cluster_layer,cluster_runId,cluster_containedNodesDict,cluster_connNodesDict,cluster_connClustDict):
+        self.cluster_label = cluster_label
+        self.cluster_layer = cluster_layer
+        self.cluster_runId = cluster_runId
+        self.cluster_containedNodesDict = cluster_containedNodesDict ###RENAME TO curClNodesDict    #Keys are frozensets(touples)  uniqueID and cluster  #
+        self.cluster_connNodesDict = cluster_connNodesDict #Keys are frozensets(touples)  uniqueID and cluster  #problem if you remove newNodes and oldNodes lists.. there may be duplicates
+        self.cluster_connClustDict = cluster_connClustDict #dictionary: layer -> (dict2: cluster_label -> nrOfConnections ) OR dictionary: cluster_label -> nrOfConnections
+        #cluster_connClustDict ------> look at both newNodes and oldNodes
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_layer.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_layer.py
+class LayerC:
+    def __init__(self,layer_name,cluster_Dict):
+        self.layer_name = layer_name
+        self.cluster_Dict = cluster_Dict
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_node.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_node.py
+class NodeC:
+    def __init__(self, cluster_label, node_layer, finished_time, latitude_Destination, longitude_Destination, travelID, travelPrice, uniqueID, userID):
+        self.cluster_label = cluster_label
+        self.node_layer = node_layer
+        self.finished_time = finished_time
+        self.latitude_Destination = latitude_Destination
+        self.longitude_Destination = longitude_Destination
+        self.travelID = travelID
+        self.travelPrice = travelPrice
+        self.uniqueID = uniqueID        
+        self.userID = userID
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_run.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_run.py
+from datetime import datetime
+class ConnectedRun:
+    def __init__(self,run_id,timeOfExec):
+        self.run_id = run_id
+        self.timeOfExec = timeOfExec
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/repository.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/repository.py
@@ -3,7 +3,11 @@ import network_constants as netconst
 from database.MongoRepositoryBase import MongoRepositoryBase
 import json
+from db.entities.layer import *
+from db.entities.cluster import *
+from db.entities.timeslice import *
 from db.entities import *
+from processing.similarityFiles.miscFunctions import *
 from typing import List
@@ -19,6 +23,9 @@ class Repository(MongoRepositoryBase):
        self._layer_nodes_collection = 'layer_nodes'
        self._clusters_collection = 'clusters'
        self._time_slice_collection = 'time_slices'
+        self._connected_clusters_collection ='connected_clusters'
+        self._similarity_collection = 'similarity'
+        self._connected_run = 'connected_run'
 #region Layers
    def add_layer(self, layer: Layer):
@@ -79,3 +86,71 @@ class Repository(MongoRepositoryBase):
        super().drop_collection(self._time_slice_collection)
 #endregion
+#region clusterConnected
+    def add_connected_clusters(self, clusterDictArray):
+        ''' Add Connected Clusters Data to DB '''
+        result = super().insert_many(self._connected_clusters_collection, clusterDictArray)
+        return result
+    def get_connected_clusters(self, run_id=None):#, layer_name: str):
+        ''' Get Connected Clusters Data from DB '''
+        if (run_id == None):
+            entries = super().get_entries(self._connected_clusters_collection) #, projection={'Price_Layer': 1})
+        else:
+            entries = super().get_entries(self._similarity_collection, selection={'cluster_runId' : run_id})
+        output = []
+        for ent in entries:
+            output.append(ent)
+        return output
+        #return [Cluster(cluster_dict=e, from_db=True) for e in entries]
+#endregion
+#region similarity
+    def add_similarity(self, inputDict):
+        ''' Add Similarity Data to DB '''
+        #checkIfConnClustDictIsSerializable(outputJSON)      
+        result = super().insert_many(self._similarity_collection, inputDict)
+        #print(str(result))
+        #super().insert_entry(self._connected_clusters_collection, outputJSON)
+        return result
+    #TODO
+    def get_similarity(self, run_id=None):
+        ''' Get Similarity Data from DB '''
+        if (run_id == None):
+            entries = super().get_entries(self._similarity_collection, projection={'_id': 0})
+        else:
+            entries = super().get_entries(self._similarity_collection, selection={'runId' : run_id})
+        output = []
+        for e in entries:
+            output.append(e)
+        return output
+#endregion
+#region connected_run
+    def add_connected_run(self, conRunTimestamp):
+        ''' Add Connected Run Data to DB '''
+        result = super().insert_entry(self._connected_run, conRunTimestamp)
+        return result
+    def get_connected_run(self, run_id= None):
+        ''' Get Connected Run Data from DB '''
+        if (run_id == None):
+            entries = super().get_entries(self._connected_run)
+        else:
+            entries = super().get_entries(self._connected_run, selection={'_id' : run_id})
+        output = []
+        for e in entries:
+            output.append(e)
+        return output
+#endregion
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/__ init __.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/__ init __.py
+# __init__.py
+from similarityFiles.calculateSimilarity import *
+from similarityFiles.calculateWeights import *
+from similarityFiles.populateWithNewNodes import *
+from similarityFiles.miscFunctions import *
+from similarityFiles.test import *
+from db.entities.connected_cluster import *
+from db.entities.connected_layer import *
+from db.entities.connected_node import *
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateSimilarity.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateSimilarity.py
+#This file contains the methods for caclulating the similarity between clusters
+import math
+from db.entities.connected_node import NodeC
+from db.entities.connected_cluster import ClusterC
+from db.entities.connected_layer import LayerC
+from typing import Dict
+def minMaxFunction(iIndex,jIndex,clusterList) -> Dict[str,int]:
+    ''' minMax Metric for calculating similarity between 2 clusters.
+        Clusters must be from the same layer, and will be compared to clusters from different layers (cluster_layer attribute)
+    :param string iIndex: The index of the first Cluster in the "clusterList" 
+    :param string jIndex: The index of the second Cluster in the "clusterList"  
+    :param List[Cluster] clusterList: A list of clusters to which the 2 clusters will be compared to
+    :returns: Dictionary with layername as KEY, and the computed similarity value between the 2 clusters in regard to the layer as the VALUE of the Dict.
+    :rtype: Dict{str,int}
+    '''
+    iCluster= clusterList[iIndex]
+    jCluster= clusterList[jIndex]
+    outputDict = dict()
+    #calculate th
+    for curCluster in clusterList:      #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
+        curLayer = curCluster.cluster_layer
+        curLabel = curCluster.cluster_label
+        if(( curLayer != iCluster.cluster_layer) 
+        and ( curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label)) 
+        and ( curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label))):
+            # min part
+            curMin = min(curCluster.cluster_connClustDict[iCluster.cluster_label],curCluster.cluster_connClustDict[jCluster.cluster_label])
+            if(outputDict.__contains__(curLayer) == False):
+                outputDict[curLayer]= curMin
+            else: # max part
+                if(outputDict[curLayer]<curMin):
+                    outputDict[curLayer] = curMin
+    return outputDict
+def calcEuclideanDist(iIndex,jIndex,clusterList) -> Dict[str,float]:
+    ''' Euclidean Distance Metric for calculating similarity between 2 clusters.
+        Clusters must be from the same layer, and will be compared to clusters from different layers (cluster_layer attribute)
+    :param string iIndex: The index of the first Cluster in the "clusterList" 
+    :param string jIndex: The index of the second Cluster in the "clusterList"  
+    :param List[Cluster] clusterList: A list of clusters to which the 2 clusters will be compared to
+    :returns: Dictionary with layername as KEY, and the computed similarity value between the 2 clusters in regard to the layer as the VALUE of the Dict.
+    :rtype: Dict{str,float}
+    '''
+    iCluster= clusterList[iIndex]
+    jCluster= clusterList[jIndex]
+    outputDict = dict()
+    #calculate the distance  //paralelizable
+    for curCluster in clusterList:      #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
+        curLayer = curCluster.cluster_layer
+        curLabel = curCluster.cluster_label #debugOnly
+        #considering only clusters from other layers for distance calc
+        if( curLayer != iCluster.cluster_layer):
+            ######BUUUG, WHAT IF THEY DON'T SHARE A CONNECTION?
+            ###### if in a layer both cluster don't have a connection --> distance of 0. Identical in regard to that layer. correct or false?
+            iVal = 0
+            jVal = 0
+            connectedClusters = False
+            if(curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label)):
+                iVal = curCluster.cluster_connClustDict[iCluster.cluster_label]
+                connectedClusters = True
+            if(curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label)):
+                jVal = curCluster.cluster_connClustDict[jCluster.cluster_label]
+                connectedClusters = True
+            if (connectedClusters == False):
+                #clusters aren't connected => assign the max int value if there are no prior elements in list
+                if(outputDict.__contains__(curLayer) == False):
+                    outputDict[curLayer]= 2147483647 #notConnected
+            else:
+                #clusters ARE connected => add the squares part of the euclid distance to the value of the similarity
+                if(outputDict.__contains__(curLayer) == False):
+                    #first element
+                    outputDict[curLayer]= (iVal - jVal)**2
+                else: 
+                    #further elements
+                    outputDict[curLayer]+= (iVal - jVal)**2
+    for layer in outputDict:
+        outputDict[layer] = math.sqrt(outputDict[layer])
+    return outputDict
+#                                      frozenset(tuple)                dict
+#                               [(iClusterLabel,jClusterLabel), (layer,similarity)]
+#def calculateSimilarity(inputLayerDict) -> Dict[frozenset((str,str)),Dict[str,int]]:
+def calculateSimilarity(inputLayerDict):
+    ''' Calculates the similarity between clusters contained in the "inputLayerDict". Similarity is calculated for each combination of 2 clusters from the SAME layer.
+    :param Dict{layername: Layer} inputLayerDict: Contains the associated Layer and Clusters objects. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
+    :returns: Dict{tuple(cluster_label1, cluster_label2) : Dict{layername, similarityValue}}. Returns a Dictionary with a tuple of 2 clusters as KEY, and a Dictionary with the computed similarity of the clusters in regard to each layer as VALUE
+    :rtype: Dict{(string,string): Dict{str:float}}
+    '''     
+    print("Entered calculateSimilarity")
+    similarityDict = dict() #the key is a frozenset(Tuple) (clusterLabel1,clusterLabel2)
+    clusterList = list()
+    for curLayer in inputLayerDict.values():        
+        for curCluster in curLayer.cluster_Dict.values():
+            clusterList.append(curCluster)
+    #print(" Nr. of clusters: "+str(len(clusterList)))
+    #go thru every combination of 2 clusters and calculate the similarity between them in regard to each layer
+    i=0
+    while( i < len(clusterList) ):
+        iCluster = clusterList[i]
+        j=i+1
+        while ( j<len(clusterList)):
+            jCluster = clusterList[j]
+            if (iCluster.cluster_layer == jCluster.cluster_layer): #calculate similarity only from the same layer
+                tuplekey = (clusterList[i].cluster_label,clusterList[j].cluster_label)
+                key = frozenset(tuplekey)
+                #### EUCLIDEAN DISTANCE /minMax
+                similarityDict[key]=calcEuclideanDist(i,j,clusterList)
+                #print("#### similarityDict  i:"+str(i)+" j:"+str(j))
+                #print("#### "+str(similarityDict))
+            else:
+                j = len(clusterList)
+            j+=1
+        i+=1
+    print("Finished calculateSimilarity")
+    return similarityDict
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateWeights.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateWeights.py
+from db.entities.connected_node import NodeC
+from db.entities.connected_cluster import ClusterC
+from db.entities.connected_layer import LayerC
+from typing import Dict
+def sortFunctByNode(node):
+    try :
+        return node.uniqueID
+    except:
+        print(node.cluster_label)
+        print(node.node_layer)
+        print(node.uniqueID)
+def calculateWeights(inputLayerDict) -> Dict[str,LayerC]: 
+    ''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
+    :param Dict{string: Layer} inputLayerDict: Contains the associated Layer and Clusters objects. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
+    :returns: Dict{layername: Layer}. Returns the inputLayerDict with the added connections in the attributes cluster_connClustDict and cluster_connNodesDict
+    :rtype: Dict{string: Layer}
+    '''    
+    #the input dictates which cluster is updated; 
+    # #however it will update all the included clusters
+    # if i only want to update a single cluster without considering the rest i should create a new method?
+    #
+    #
+    #
+    #
+    print("Entered calculateWeights")
+    nodeList = []
+    for curLayer in inputLayerDict.values():
+        for curCluster in curLayer.cluster_Dict.values():
+            for curNode in curCluster.cluster_containedNodesDict.values():
+                nodeList.append(curNode)
+                #if curNode != None:
+                    #if(curNode.uniqueID!= None):
+    #print("  Nr. of nodes: " + str(len(nodeList)))
+    nodeList.sort(key=sortFunctByNode)
+    i=0
+    while( i < len(nodeList) ):
+        iNode = nodeList[i]
+        j=i+1
+        while ( j<len(nodeList)):
+            jNode = nodeList[j]
+            #if there is a connection
+            #print("\n ### \n"+iNode.uniqueID +" "+ iNode.node_layer +"\n"+ jNode.uniqueID +" "+ jNode.node_layer )
+            if (iNode.node_layer != jNode.node_layer) and (iNode.uniqueID == jNode.uniqueID):
+                iOldTuple = (iNode.uniqueID,iNode.cluster_label)
+                jOldTuple= (jNode.uniqueID,jNode.cluster_label)
+                iOldKey = frozenset(iOldTuple)
+                jOldKey = frozenset(jOldTuple)
+                #Check if old node dicts has this node: if not add to ConnDictionary and to OldNodesDict
+                #              Layer                .             Cluster             .    OldNodesDict    .    Does not contain the OTHER node
+                if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connNodesDict.__contains__(jOldKey) == False):
+                    #add node j at cluster i
+                    if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict.__contains__(jNode.cluster_label)):
+                        inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict[jNode.cluster_label]+=1
+                    else:
+                        inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict[jNode.cluster_label]=1
+                    #add node to old nodes
+                    inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connNodesDict[jOldKey]=jNode
+                if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connNodesDict.__contains__(iOldKey) == False):
+                    #add node i at cluster j
+                    if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict.__contains__(iNode.cluster_label)):
+                        inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict[iNode.cluster_label]+=1
+                    else:
+                        inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict[iNode.cluster_label]=1
+                    #add node to old nodes
+                    inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connNodesDict[iOldKey]=iNode
+            j+=1   
+        i+=1
+    #deleting cluster_containedNodesDicts/// No longer needed
+    #for curLayer in inputLayerDict.values():
+     #   for curCluster in curLayer.cluster_Dict.values():
+     #       inputLayerDict[curCluster.cluster_layer].cluster_Dict[curCluster.cluster_label].cluster_containedNodesDict = dict()
+    print("Finished calculateWeights")
+    #store weights in database?
+    return inputLayerDict
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataInput.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataInput.py
+#This file contains the methods which add the data (layers,clusters,nodes) 
+# from the input JSON to the "layerDict" dicionary for further processing
+from db.entities.connected_node import NodeC
+from db.entities.connected_cluster import ClusterC
+from db.entities.connected_layer import LayerC
+from typing import Dict
+#from db.repository import Repository
+import json
+import requests
+from routes.clustersets import get_by_name
+#TEST ONLY
+from routes.connClusters import get_conn_clusters
+from routes.similarity import get_similarity
+def getClusterDataFromSwagger(limitNrCluster,limitNrNodes):
+    ''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
+    :param int limitNrCluster: Limits Clusters considered. None or <0 values == No limit
+    :param int limitNrNodes: Limits Clusters considered. None or <0 values == No limit
+    :returns: Dict{layername: Layer}. Returns a Dict with the data gathered from the DB
+    :rtype: Dict{string: Layer}
+    '''    
+    print("Entered dataInput")
+    # ??? OBSOLETE ???
+    # oldBigTestClusters https://drive.google.com/uc?export=download&id=1l4gHBwrG_N4pCL5-MfWJk2szNrF3VnpG  it takes a whie to download
+    # smallTestClusters https://drive.google.com/uc?export=download&id=1cMoGtmi-XouSDM9DRl-ddmPkf2Bm7sk7
+    # smallTestOnlyLocationClusters https://drive.google.com/uc?export=download&id=1wBT9vi7aS4rE4qOWHEyLEfQ2KbmpBK9e
+    # smallTestOnlyPriceClusters https://drive.google.com/uc?export=download&id=1g9pEOOpDMBj6yZOlFj7HfOoMOAFTvPOW
+    # smallTestOnlyTimeClusters https://drive.google.com/uc?export=download&id=1XKXQHEC5ubJHmntQBNnzgfpEZl6OXE_B
+    listURLs = []    
+    #"""
+    listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/Price_Layer/clusters')
+    listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/FinishedTime_Layer/clusters')    
+    listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/Destination_Layer/clusters')
+    #listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/StartingPoint_Layer/clusters')
+    #listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/Reputation_Layer/clusters')    
+    #listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/StartingTime_Layer/clusters')
+    #listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/User_Layer/clusters')
+    #"""
+    #Maximum of these nodes PER Layer will be considered
+    if (limitNrCluster == None ) or (limitNrCluster < 0): 
+        limitNrCluster = 9223372036854775807  #per Layer (LLONG_MAX == 2^63)
+    if (limitNrNodes == None ) or (limitNrNodes < 0): 
+        limitNrNodes = 9223372036854775807  #per Layer (LLONG_MAX == 2^63)
+    layerDict = dict()
+    #imports and translates the data from JSON into usefull format
+    #returns layerdiction -> Layer -> clusterDict -> Cluster -> nodesDict -> Nodes    
+    for url in listURLs:
+        newData = loadJson(url)
+        layerDict = populateWithNewNodesSingleLayer(newData[0:limitNrCluster],layerDict,limitNrNodes)
+    return layerDict
+def loadJson(url) :
+    res = requests.get(url, timeout=30)
+    jsonData = json.loads(res.content) 
+    return jsonData
+def getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes):
+    ''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
+    :param List[string] layerNameList: Name of the layers to pull from the DB
+    :param int limitNrCluster: Limits Clusters considered. None or <0 values == No limit
+    :param int limitNrNodes: Limits Clusters considered. None or <0 values == No limit
+    :returns: Dict{layername: Layer}. Returns a Dict with the data gathered from the DB
+    :rtype: Dict{string: Layer}
+    '''    
+    layerDict = dict()
+    #Maximum of these nodes PER Layer will be considered
+    if (limitNrCluster == None ) or (limitNrCluster < 0): 
+        limitNrCluster = 9223372036854775807  #per Layer (LLONG_MAX == 2^63)
+    if (limitNrNodes == None ) or (limitNrNodes < 0): 
+        limitNrNodes = 9223372036854775807  #per Layer (LLONG_MAX == 2^63)
+    layerDict = dict()
+    #imports and translates the data from JSON into usefull format
+    #returns layerdiction -> Layer -> clusterDict -> Cluster -> nodesDict -> Nodes    
+    for name in layerNameList:
+        newData = get_by_name(name)
+        layerDict = populateWithNewNodesSingleLayer(newData[0:limitNrCluster],layerDict,limitNrNodes)
+    return layerDict
+def populateWithNewNodesSingleLayer(inputData, layerDict) -> Dict[str,LayerC]:
+    ''' Gets Layer,Cluster and Node data from a JSON format, and appends it into the "layerDict" dictionary. A single Layer only
+    :param inputData: JSON data to be formated.
+    :param Dict{string: Layer} layerDict: If it's empty/null a new one will be created. Otherwise contains the associated Layer and Clusters objects to which date is appended. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
+    :returns: Dict{layername: Layer}. Returns the inputLayerDict with the added data from the JSON
+    :rtype: Dict{string: Layer}
+    '''  
+    print("Entered populateWithNewNodes")
+    if(layerDict == None):
+        layerDict = dict()
+    print(" Layer: "+inputData[0].get("layer_name"))
+    curLayerName = None
+    #newClusterDict
+    #clusterDict = layerDict.get(curCluster.get("layer_name"),dict())
+    for curCluster in inputData:
+        if(curCluster.get("layer_name")!= curLayerName):
+            clusterDict = layerDict.get(curCluster.get("layer_name"),dict())
+            curLayerName = curCluster.get("layer_name")
+        oldCluster = clusterDict.get(curCluster.get("cluster_label"),None)
+        if oldCluster is None: #means this is a new cluster
+            cluster_containedNodesDict = dict()
+        else: #means this is an already existing cluster
+            cluster_containedNodesDict = oldCluster.cluster_containedNodesDict
+        for curNode in curCluster.get("nodes"):
+            #totalNodesCount+=1
+            newNode = NodeC(
+                curCluster.get("cluster_label"),
+                curLayerName,
+                curNode.get("Finished_time"),
+                curNode.get("Latitude_Destination"),
+                curNode.get("Longitude_Destination"),
+                curNode.get("TravelID"),
+                curNode.get("TravelPrice"),
+                curNode.get("UniqueID"),
+                curNode.get("UserID"))
+            if(newNode != None):
+                if(newNode.uniqueID!= None and newNode.cluster_label!= None and newNode.node_layer!= None):
+                    auxtuple = (newNode.uniqueID,newNode.cluster_label)
+                    key = frozenset(auxtuple)
+                    cluster_containedNodesDict[key]= newNode #overwrite if already there
+        #finished node
+        if oldCluster is None:
+            #    def __init__(self,cluster_label,cluster_layer,cluster_containedNodesDict,cluster_connNodesDict, cluster_connectionsNr,cluster_connClustDict):
+            newCluster = ClusterC(
+                curCluster.get("cluster_label"),
+                curLayerName,
+                None,
+                cluster_containedNodesDict,
+                dict(),         #will populate the dict fields later later
+                dict()) #may not be empty anymore) 
+            clusterDict[newCluster.cluster_label] = newCluster
+        else:
+            oldCluster.cluster_containedNodesDict = cluster_containedNodesDict
+            clusterDict[curCluster.get("cluster_label")] = oldCluster               
+        #finished cluster
+        newLayer = LayerC(curLayerName,clusterDict)
+        layerDict[curLayerName]= newLayer
+    ##########TESTEAAAZAA
+    return layerDict
+def populateWithNewNodesSingleLayer(inputData, layerDict, limitNrNodes) -> Dict[str,LayerC]:
+    ''' Gets Layer,Cluster and Node data from a JSON format, and appends it into the "layerDict" dictionary.
+    :param inputData: JSON data to be formated.
+    :param Dict{string: Layer} layerDict: If it's empty/null a new one will be created. Otherwise contains the associated Layer and Clusters objects to which JSON data is appended. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
+    :param int limitNrNodes: How many maximum nodes PER layer will be considered.
+    :returns: Dict{layername: Layer}. Returns the inputLayerDict with the added data from the JSON
+    :rtype: Dict{string: Layer}
+    '''  
+    print("Entered populateWithNewNodes")
+    if(layerDict == None):
+        layerDict = dict()
+    print(" Layer: "+inputData[0].get("layer_name"))
+    curLayerName = None
+    #newClusterDict
+    #clusterDict = layerDict.get(curCluster.get("layer_name"),dict())
+    for curCluster in inputData:
+        if(curCluster.get("layer_name")!= curLayerName):
+            clusterDict = layerDict.get(curCluster.get("layer_name"),dict())
+            curLayerName = curCluster.get("layer_name")
+        oldCluster = clusterDict.get(curCluster.get("cluster_label"),None)
+        if oldCluster is None: #means this is a new cluster
+            cluster_containedNodesDict = dict()
+        else: #means this is an already existing cluster
+            cluster_containedNodesDict = oldCluster.cluster_containedNodesDict
+        for curNode in curCluster.get("nodes"):
+            #totalNodesCount+=1
+            newNode = NodeC(
+                curCluster.get("cluster_label"),
+                curLayerName,
+                curNode.get("Finished_time"),
+                curNode.get("Latitude_Destination"),
+                curNode.get("Longitude_Destination"),
+                curNode.get("TravelID"),
+                curNode.get("TravelPrice"),
+                curNode.get("UniqueID"),
+                curNode.get("UserID"))
+            if(newNode != None):
+                if(newNode.uniqueID!= None and newNode.cluster_label!= None and newNode.node_layer!= None):
+                    if( limitNrNodes>0):
+                        auxtuple = (newNode.uniqueID,newNode.cluster_label)
+                        key = frozenset(auxtuple)
+                        cluster_containedNodesDict[key]= newNode #overwrite if already there
+                        limitNrNodes-=1
+        #finished node
+        if oldCluster is None:
+            #    def __init__(self,cluster_label,cluster_layer,cluster_containedNodesDict,cluster_connNodesDict, cluster_connectionsNr,cluster_connClustDict):
+            newCluster = ClusterC(
+                curCluster.get("cluster_label"),
+                curLayerName,
+                None,
+                cluster_containedNodesDict,
+                dict(),
+                dict()) #may not be empty anymore) 
+            clusterDict[newCluster.cluster_label] = newCluster
+        else:
+            oldCluster.cluster_containedNodesDict = cluster_containedNodesDict
+            clusterDict[curCluster.get("cluster_label")] = oldCluster               
+        #finished cluster
+        newLayer = LayerC(curLayerName,clusterDict)
+        layerDict[curLayerName]= newLayer
+    return layerDict
+#deprecated
+def populateWithNewNodesAllLayers(inputData,layerDict) -> Dict[str,LayerC]:
+    ''' Gets Layer,Cluster and Node data from a JSON format, and appends it into the "layerDict" dictionary.
+    :param inputData: JSON data to be formated.
+    :param Dict{string: Layer} layerDict: If it's empty/null a new one will be created. Otherwise contains the associated Layer and Clusters objects to which date is appended. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
+    :returns: Dict{layername: Layer}. Returns the inputLayerDict with the added data from the JSON
+    :rtype: Dict{string: Layer}
+    '''  
+    print("Entered populateWithNewNodes")
+    if(layerDict == None):
+        layerDict = dict()
+    for curLayer in inputData:
+        clusterDict = layerDict.get(curLayer.get("layer_name"),dict()) #gets the Old dict or an empty if none is found i.e for a new layer
+        curLayerName = curLayer.get("layer_name")
+        for curCluster in curLayer.get("clusters"):         
+            oldCluster = clusterDict.get(curCluster.get("cluster_label"),None)
+            if oldCluster is None: #means this is a new cluster
+                cluster_containedNodesDict = dict()
+            else: #means this is an already existing cluster
+                cluster_containedNodesDict = oldCluster.cluster_containedNodesDict
+            #SORT NODES?  
+            for curNode in curCluster.get("nodes"):
+                if(curNode != None):
+                    if(curNode.uniqueID != None):
+                        newNode = NodeC(
+                            curCluster.get("cluster_label"),
+                            curLayerName,
+                            curNode.get("Finished_time"),
+                            curNode.get("Latitude_Destination"),
+                            curNode.get("Longitude_Destination"),
+                            curNode.get("TravelID"),
+                            curNode.get("TravelPrice"),
+                            curNode.get("UniqueID"),
+                            curNode.get("UserID"))
+                        auxtuple = (newNode.uniqueID,newNode.cluster_label)
+                        key = frozenset(auxtuple)
+                        cluster_containedNodesDict[key]= newNode #overwrite if already there
+            #finished node
+            if oldCluster is None:
+                newCluster = ClusterC(
+                    curCluster.get("cluster_label"),
+                    curLayer.get("layer_name"),
+                    None,
+                    cluster_containedNodesDict,
+                    dict(),
+                    dict()) #may not be empty anymore) 
+                clusterDict[newCluster.cluster_label] = newCluster
+            else:
+                #only cluster_containedNodesDict should change
+                oldCluster.cluster_containedNodesDict = cluster_containedNodesDict
+                #cluster_connNr and clusterConnDict should stay the same
+                clusterDict[curCluster.get("cluster_label")] = oldCluster               
+        #finished cluster
+        newLayer = LayerC(curLayer.get("layer_name"),clusterDict)
+        layerDict[curLayer.get("layer_name")]= newLayer
+    #finished layer
+    print("Finished populateWithNewNodes")
+    return layerDict
+def getConnClusterDataFromMongo():
+    mongoArray = get_conn_clusters()
+    outputDict = convertRetrievedClustersFromMongo(mongoArray)
+    return outputDict
+def getSimilarityDataFromMOngo():
+    result = get_similarity()
+    return result
+def convertRetrievedClustersFromMongo(inputArray):
+    ####TODO#### Not tested thoroughly
+    LayerDict = {}
+    for entry in inputArray:
+        if not(entry['cluster_layer'] in LayerDict):
+            LayerDict[entry['cluster_layer']] = []
+        cl = ClusterC(
+            entry['cluster_label'],
+            entry['cluster_layer'],
+            entry['cluster_runId'],
+            entry['cluster_containedNodesDict'],
+            entry['cluster_connNodesDict'],
+            entry['cluster_connClustDict'])
+        LayerDict[entry['cluster_layer']].append(cl)        
+    return LayerDict
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataOutput.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataOutput.py
+#Misc util functions
+import json
+import requests
+import datetime
+from routes.connClusters import add_conn_clusters
+from routes.similarity import add_similarity
+from routes.connRun import add_connected_run
+from processing.similarityFiles.miscFunctions import *
+def outputFileLayerFunction(layerDict,limitNrNodes,limitNrCluster,runId):
+    ''' Writes the layerDict data to a JSON file.
+    :param Dict{string: Layer} layerDict: Object which contains Data about the Layers, Clusters and Nodes
+    :param int limitNrNodes: How many nodes are contained in layerDict. Used in creating the name of the File
+    :param int limitNrCluster: How many clusters are contained in layerDict. Used in creating the name of the File
+    '''  
+    layerJSON = convertLayerDictToJSON(layerDict,runId)
+    outputJSON = json.dumps(layerJSON, default=lambda o: o.__dict__, indent=4)
+    try:
+        with open('resultLayerDictN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.json', 'w') as outfile:
+            outfile.write(outputJSON)
+    except ValueError:
+        print("Error occured when writing the resultLayerDict file")
+def outputFileSimilFunction(similarityDict,limitNrNodes,limitNrCluster,runId):
+    ''' Writes the similarityDict data to a JSON file.
+    :param Dict{(cluster_label1, cluster_label2): Dict{layername: value}} similarityDict: Object which contains Data about the similarity between the clusters, Clusters and Nodes
+    :param int limitNrNodes: How many nodes are contained in layerDict. Used in creating the name of the File
+    :param int limitNrCluster: How many clusters are contained in layerDict. Used in creating the name of the File
+    '''  
+    similJSON = convertSimilarityDictToJSON(similarityDict,runId)
+    outputJSON = json.dumps(similJSON, default=lambda o: o.__dict__, indent=4)
+    try: 
+        with open('resultSimilarityDictN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.json', 'w') as outfile2:
+            outfile2.write(outputJSON)
+    except ValueError:
+        print("Error occured when writing the resultSimilarityDict file")
+def outputFileTimeFunction(timelist,limitNrNodes,limitNrCluster,runId):
+    ''' Writes execution time to a file.
+    :param List[datetime] timelist: Contains timestamps about the execution time of functions and the program.
+    :param int limitNrNodes: How many nodes are considered. Used in creating the name of the File
+    :param int limitNrCluster: How many clusters are considered. Used in creating the name of the File
+    '''  
+    stringToWrite = "StartTime: "+ str(timelist[0])
+    stringToWrite += "\nFinishTime: " + str((timelist[3])) +"\n"
+    stringToWrite += "\nPopulateWithNewNodes: " + str((timelist[1]-timelist[0]).total_seconds())
+    stringToWrite += "\nCalculateWeights: " + str((timelist[2]-timelist[1]).total_seconds())
+    stringToWrite += "\nCalculateSimilarity: " + str((timelist[3]-timelist[2]).total_seconds())
+    stringToWrite += "\nTotalTime: " + str((timelist[3]-timelist[0]).total_seconds())
+    stringToWrite += "\nRunId: " +str(runId)
+    #aux = str(timelist[0]) + " :PopulateWithNewNodes\n"+ str(timelist[1]) +  " :CalculateWeights\n" + str(timelist[2]) + " :CalculateSimilarity\n"+   str(timelist[3]) + " :Finish" 
+    try:
+        with open('resultTimeExecN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.txt', 'w') as outfile3:
+            outfile3.write(stringToWrite)
+    except ValueError:
+        print("Error occured when writing the resultTimeExec file")
+def outputMongoConnClustDict(inputDict,runId):
+    ''' Stores connected_clusters in the database.
+    :param Dict() inputDict: Contains the data to insert
+    :param string runId: Id of the Run 
+    '''  
+    #inputDict["Timestamp"] = str(datetime.datetime.now())
+    add_conn_clusters(inputDict,runId)
+def outputMongoSimilarity(inputDict,runId):
+    ''' Stores cluster_similarity in the database.
+    :param Dict() inputDict: Contains the data to insert
+    :param string runId: Id of the Run 
+    ''' 
+    add_similarity(inputDict,runId)
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/miscFunctions.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/miscFunctions.py
+#Misc util functions
+import json
+import requests
+import datetime
+def currentTime():
+    ts = datetime.datetime.now()
+    print(ts)
+    return ts
+def totalNumberOfNodes(inputLayerDict):
+    ''' Computes total number of nodes in the inputLayerDict.
+    :param Dict{string: Layer} inputLayerDict: Layer in which the Clusters in which the Nodes are stored
+    :returns: Returns nr of Nodes
+    :rtype: int
+    '''  
+    nodeCount = 0
+    for curLayer in inputLayerDict.values():
+        for curCluster in curLayer.cluster_Dict.values():
+                nodeCount+=len(curCluster.cluster_containedNodesDict.values())
+    return nodeCount
+def totalNumberOfClusters(inputLayerDict):
+    ''' Computes total number of clusters in the inputLayerDict.
+    :param Dict{string: Layer} inputLayerDict: Layer in which the Clusters are stored
+    :returns: Returns nr of Clusters
+    :rtype: int
+    '''  
+    clustCount = 0
+    for curLayer in inputLayerDict.values():
+            clustCount+= len(curLayer.cluster_Dict.values())
+    return clustCount
+def convertLayerDictToJSON(layerDict, runId):
+    ''' Converts a Layer object to JSON format.
+    :param Dict{string: Layer} layerDict: Object which contains Data about the Layers, Clusters and Nodes
+    :rtype: Dict{string: [Cluster1, Cluster2, ...]}
+    '''
+    '''
+    {
+        layer1 : {
+            [
+                {
+                    cluster_label1 : 0123400,
+                    cluster_layer: layer1,
+                    "cluster_connClustDict": {
+                        "0123456": 98
+                        "1234567": 12
+                    },
+                    cluster_containedNodesDict : {
+                        [
+                            abcd,
+                            sgre,
+                            dgre,
+                            ddhr,
+                            yyrh
+                        ]
+                    }
+                },
+                {
+                },
+                {
+                }            
+            ]
+        },
+        layer2 : {
+        }
+    }
+    '''
+    outputJSON = []
+    for curLayer in layerDict.values():        
+        for curCluster in curLayer.cluster_Dict.values():
+            outputJSON.append({
+                "cluster_label" : curCluster.cluster_label,
+                "cluster_layer" : curCluster.cluster_layer, 
+                "cluster_runId" : runId,
+                "cluster_connClustDict" : changeDictKeysToString(curCluster.cluster_connClustDict),
+                "cluster_connNodesDict" : getFrozensetFromConnNodesDict(curCluster.cluster_connNodesDict),
+                "cluster_containedNodesDict" : getNodeIdListFromContainedNodesDict(curCluster.cluster_containedNodesDict),
+            })
+    #outputJSON = json.dumps(outputJSON, default=lambda o: o.__dict__, indent=4)
+    return outputJSON
+def changeDictKeysToString(inputDict):
+    keys_values = inputDict.items()
+    outputDict = { str(key): value for key,value in keys_values}
+    return outputDict
+def getNodeIdListFromContainedNodesDict(inputDict):
+    output = []
+    for curNode in inputDict.values():
+        output.append(curNode.uniqueID)
+    return output
+def getFrozensetFromConnNodesDict(inputDict):
+    output = []
+    for curNode in inputDict.values(): 
+        auxDict = {}
+        auxDict["node_id"]= curNode.uniqueID
+        auxDict["node_cluster"] = curNode.cluster_label
+        output.append(auxDict)
+    return output
+def convertSimilarityDictToJSON(inputDict,runId):
+    similList = []
+    for compositeKey in inputDict:
+        frozensetString =list()
+        #key is a tuple of cluster_labels
+        for key in compositeKey:
+            frozensetString.append(key)
+        similList.append({
+            "clusterTuple" : frozensetString,
+            "similarityValues" : inputDict[compositeKey],
+            "runId": runId
+        })
+    similToJSON = similList
+    #outputJSON = json.dumps(similToJSON, default=lambda o: o.__dict__, indent=4)
+    return similToJSON
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/test.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/test.py
+from db.entities.connected_node import NodeC
+from db.entities.connected_cluster import ClusterC
+from db.entities.connected_layer import LayerC
+from typing import Dict
+#from db.repository import Repository
+import json
+import requests
+from routes.clustersets import get_by_nametest.py
+from db import repository
--- a/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C121.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C121.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C60.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C60.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C121.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C121.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C60.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C60.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C121.txt
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C121.txt
+StartTime: 2020-07-02 12:05:47.067975
+FinishTime: 2020-07-02 12:05:54.561853
+PopulateWithNewNodes: 2.495718
+CalculateWeights: 4.590413
+CalculateSimilarity: 0.407747
+TotalTime: 7.493878
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C60.txt
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C60.txt
+StartTime: 2020-07-06 10:43:32.240013
+FinishTime: 2020-07-06 10:43:39.110333
+PopulateWithNewNodes: 2.399582
+CalculateWeights: 4.422768
+CalculateSimilarity: 0.04797
+TotalTime: 6.87032
+RunId: 5f02e43b53a73a48d0eaaed5
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/clustersets.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/clustersets.py
 from flask import request, Response
 from db.repository import Repository
-from db.entities import ClusterSet
+from db.entities import clusterset
 repo = Repository()

--- a/src/data-hub/role-stage-discovery-microservice/app/routes/connClusters.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/connClusters.py
+from flask import request, Response
+from db.repository import Repository
+from routes.connRun import add_connected_run
+from processing.similarityFiles.miscFunctions import *
+repo = Repository()
+def add_conn_clusters(inputDict,runId):
+    ''' Stores connected_clusters in the database.
+    :param Dict() inputDict: Contains the data to insert
+    :param string runId: Id of the Run 
+    ''' 
+    outputJSON = convertLayerDictToJSON(inputDict,runId)
+    repo.add_connected_clusters(outputJSON)
+def get_conn_clusters():
+    ''' Gets connected_clusters from the database.
+        :returns: Returns similarity objects from the DB
+        :rtype: Dict
+    '''  
+    result = repo.get_connected_clusters()
+    if result is None or len(result) == 0:        
+        print("MongoDb Get Error: Response 404")
+        return Response(status=404)
+    else:
+        return result
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/connRun.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/connRun.py
+from flask import request, Response
+from db.repository import Repository
+from db.entities import connected_run
+import datetime
+#from db.entities import clusterset #REMOVE?
+repo = Repository()
+def add_connected_run():
+    '''
+        Inserts Run with current Time into the DB
+        :returns: Returns the _id of the connected_run entry in the DB
+        :rtype: string
+    '''    
+    currentTime = datetime.datetime.now()
+    runDict = {"Datetime" : str(currentTime)}
+    inserted_result = repo.add_connected_run(runDict)
+    return str(inserted_result.inserted_id)
+def get_connected_run(): ########TODO#################
+    ''' ##TODO## Gets Run from the database.
+        :returns: Returns Run objects from the DB
+        :rtype: Dict{_id,datetime}
+    '''  
+    """
+    result = repo.get_connected_clusters()
+    if result is None or result.retrieved == 0:        
+        print("#### Response 404")
+        return Response(status=404)
+    else:
+        return result
+    conRun = ConnectedRun(result.sdfsdf)
+    """
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/similarity.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/similarity.py
+from flask import request, Response
+from db.repository import Repository
+from processing.similarityFiles.miscFunctions import convertSimilarityDictToJSON
+#from db.entities import clusterset #REMOVE?
+repo = Repository()
+def add_similarity(inputDict,runId):
+    ''' Stores cluster_similarity in the database.
+    :param Dict() inputDict: Contains the data to insert
+    :param string runId: Id of the Run 
+    ''' 
+    outputJSON = convertSimilarityDictToJSON(inputDict,runId)
+    repo.add_similarity(outputJSON)
+def get_similarity(): 
+    ''' Gets cluster_similarity from the database.
+        :returns: Returns similarity objects from the DB
+        :rtype: Dict
+    '''  
+    result = repo.get_similarity()
+    if result is None or len(result) == 0:        
+        print("MongoDb Get Error: Response 404")
+        return Response(status=404)
+    else:
+        return result
--- a/src/data-hub/role-stage-discovery-microservice/app/similarityMain.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/similarityMain.py
+"""
+for each cluster in the layer
+    for each other cluster from all the other layers
+        find the number of connexions
+        save them into a dictionary (ClusterID(from other layer) -> Nr of connections)
+    save all the dictionaries in a map? ( ClusterID1 -> dictionary1, ClusterID2 -> dicitonary2 )
+have a map per layer? (Nr of maps = nr of layers)
+Each cluster has a dictionary of connCluster-> nrConections
+Each layer has a dictionary of clusters -> dictionaries of nodes/connections
+"""
+import os
+import sys
+import math
+import datetime
+from typing import Dict
+##################AUX
+modules_path = '../../../modules/'
+if os.path.exists(modules_path):
+    sys.path.insert(1, modules_path)
+#### TO BE DELETED #### ^
+from db.entities.connected_node import NodeC
+from db.entities.connected_cluster import ClusterC
+from db.entities.connected_layer import LayerC
+from processing.similarityFiles.dataInput import *
+from processing.similarityFiles.calculateWeights import *
+from processing.similarityFiles.calculateSimilarity import *
+from processing.similarityFiles.miscFunctions import *
+from processing.similarityFiles.dataOutput import *
+from routes.connRun import connected_run
+def main():   
+    print("\nEntered Main")
+    timelist = []
+    timelist.append(currentTime())#starting time
+    """
+    Current Layers
+        Price_Layer
+        FinishedTime_Layer
+        Destination_Layer
+        StartingPoint_Layer
+        Reputation_Layer
+        StartingTime_Layer
+        User_Layer
+    """
+    layerNameList = ["Price_Layer","FinishedTime_Layer","Destination_Layer"] #Get it from somewhere else?
+    limitNrCluster = 20 #per Layer
+    limitNrNodes = 1000 #per Layer
+    layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes)
+    #layerDict = getClusterDataFromSwagger(limitNrCluster,limitNrNodes) #for Swagger, Change URLs inside the function for different input Data
+    totalNodes = totalNumberOfNodes(layerDict)
+    print("Nr. of nodes: " +str(totalNodes))
+    totalClusters = totalNumberOfClusters(layerDict)
+    print("Nr. of clusters: " + str(totalClusters))
+    timelist.append(currentTime())
+    #calculates the weights between the clusters (weight == number of connections) #return is displayed in outputLayerFunction    
+    layerDict = calculateWeights(layerDict)
+    timelist.append(currentTime())
+    #calculates the similarity between the clusters #returns dictionary[ tuple(cluster_label1,cluster_label2), 
+    #                                                                      listOfSimilarity(layer1,layer2,layer3) ]    
+    similarityDict = calculateSimilarity(layerDict)
+    timelist.append(currentTime()) #Finishing time    
+    #Write to files
+    runId = add_connected_run()
+    print("Outputing data")
+    outputFileLayerFunction(layerDict,totalNodes,totalClusters,runId)
+    outputFileSimilFunction(similarityDict,totalNodes,totalClusters,runId)
+    outputFileTimeFunction(timelist,totalNodes,totalClusters,runId)
+    #Output to DB
+    outputMongoConnClustDict(layerDict,runId)
+    outputMongoSimilarity(similarityDict,runId)
+    #Currently not used, developed for possible future uses
+    connClustersFromMongo = getConnClusterDataFromMongo()    
+    similarityArrFromMongo = getSimilarityDataFromMOngo()
+    print("FINISHED")
+    return
+##########START##########
+main()
+#########FINISH##########
--- a/src/modules/database/MongoRepositoryBase.py
+++ b/src/modules/database/MongoRepositoryBase.py
@@ -17,7 +17,7 @@ class MongoRepositoryBase:
    def insert_entry(self, collection_name, content: dict):
        collection = self._database[collection_name]
-        collection.insert_one(content)
+        return collection.insert_one(content)
    def insert_many(self, collection_name, content: list):
        collection = self._database[collection_name]

--- a/src/modules/network_constants.py
+++ b/src/modules/network_constants.py
@@ -18,6 +18,6 @@ SEMANTIC_LINKING_DB_PORT = 27017
 ## Role Stage Discovery
 ROLESTAGE_DISCOVERY_HOSTNAME = 'role-stage-discovery'
-ROLESTAGE_DISCOVERY_REST_PORT = 80
+ROLESTAGE_DISCOVERY_REST_PORT = 30103
-ROLESTAGE_DISCOVERY_DB_HOSTNAME = f'{ROLESTAGE_DISCOVERY_HOSTNAME}-db'
+ROLESTAGE_DISCOVERY_DB_HOSTNAME = f'articonf1.itec.aau.at'
-ROLESTAGE_DISCOVERY_DB_PORT = 27017
+ROLESTAGE_DISCOVERY_DB_PORT = 30104
\ No newline at end of file