Created Test (Assertion) + other minor changes

eab8c906 · Bogdan · 3b390ffb · eab8c906 · eab8c906 · eab8c906
Commit eab8c906 authored Jul 08, 2020 by Bogdan
19 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,5 @@
 *.log
 **/env
 **/venv
+
+src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN14992C221.json
--- a/src/data-hub/role-stage-discovery-microservice/app/configs/swagger.yml
+++ b/src/data-hub/role-stage-discovery-microservice/app/configs/swagger.yml
@@ -328,36 +328,51 @@ definitions:
        type: string
        example: "5efdc04ac43add0aba567d76"
      cluster_containedNodesDict:
-        $ref: "#/definitions/ConnectedNode"
+        type: array
+        items: 
+          type: string
+          example: "2696718d7a33ab3dbf28e9c88411afcfe9a933a45e57ec9159bc0668543f1568"
      cluster_connNodesDict:
-        $ref: "#/definitions/ConnectedNode"
+        type: array
+        items:
+          $ref: "#/definitions/ConnectedNode"
      cluster_connClustDict:
-        type: object
-        additionalProperties:
-          type: number
-        example:
-          "cluster_label": nrOfConnectedNodes
-          #"-1": 42
-          "0": 39
-          "6969": 1
+        type: array
+        items:
+          $ref: "#/definitions/ConnectedClusterAux"
          
-      #not used, should be removed?
-      #cluster_connectionsNr
+          
+  ConnectedClusterAux:
+    type: object
+    properties:
+      cluster_label:
+        type: string
+        example: "-1"
+      cluster_layer:
+        type: string
+        example: "FinishedTime_Layer"
+      connectionWeight:
+        type: number
+        example: 42
+      
+  
      

  ConnectedNode:
    type: object
    properties:
-      cluster_label:
+      node_id:
        type: string
-      node_layer:
+      node_cluster:
        type: string
-      uniqueID:
+      node_layer:
        type: string
+      
    example:
-        "cluster_label": "2230"
+        "node_id": "27a08ed0facc7d68a0818c7695dad391cf48d6095e57ec9159bc0668543f159b"
+        "node_cluster": "2230"
        "node_layer": "Destination_Layer"
-        "uniqueID": "a95075f5042b1b27060080156d87"
+        
      #not used, should be removed?
      #finished_time
      #latitude_Destination
@@ -373,18 +388,16 @@ definitions:
  
  ClusterSimilarityDictionary:
    properties:
-      clusterTuple:
-        type: array
-        items:
-          type: string
-        minItems: 2
-        maxItems: 2
-        example: [
-          #cluster_label1
-          0,
-          #cluster_label2
-          319
-          ]   
+      cluster1_label:
+        type: string
+        example: "0"
+      cluster2_label:
+        type: string
+        example: "1"
+      cluster_layer:
+        type: string
+        example: "Price_layer"
+      
      similarityValues:
        type: object
        additionalProperties:

--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_cluster.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_cluster.py
@@ -7,7 +7,7 @@ class ClusterC:
    :param cluster_layer: The layer name of the 'source' cluster
    :param cluster_runId: The run which calculated the connections
    :param cluster_containedNodesDict: Ids of nodes contained in the 'source' cluster
-    :param cluster_connNodesDict: Ids of nodes contained in 'dest' clusters, where the 'dest' cluster is uniquely identifiable by layer name and cluster label
+    :param cluster_connNodesDict: Node Objects contained in 'dest' clusters, where the 'dest' cluster is uniquely identifiable by layer name and cluster label
    :param cluster_connClustDict: Layer name, cluster label and weight for each 'dest' cluster
    '''

@@ -16,7 +16,6 @@ class ClusterC:
        self.cluster_label = cluster_label
        self.cluster_layer = cluster_layer
        self.cluster_runId = cluster_runId
-        self.cluster_containedNodesDict = cluster_containedNodesDict ###RENAME TO curClNodesDict    #Keys are frozensets(touples)  uniqueID and cluster  #
-        self.cluster_connNodesDict = cluster_connNodesDict #Keys are frozensets(touples)  uniqueID and cluster  #problem if you remove newNodes and oldNodes lists.. there may be duplicates
-        self.cluster_connClustDict = cluster_connClustDict #dictionary: layer -> (dict2: cluster_label -> nrOfConnections ) OR dictionary: cluster_label -> nrOfConnections
-        #cluster_connClustDict ------> look at both newNodes and oldNodes
+        self.cluster_containedNodesDict = cluster_containedNodesDict #Keys are frozensets(touples) == frozenset(uniqueID, cluster and layer) Value is the Node UniqueId
+        self.cluster_connNodesDict = cluster_connNodesDict #Keys are frozensets(touples)  (uniqueID:str,node_cluster:str,node_layer:str) Values are NodeC Objects
+        self.cluster_connClustDict = cluster_connClustDict #dictionary: (dict[(cluster_label,clusterlayer)] -> nrOfConnections/weightOfTheConnection )
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_layer.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_layer.py
 class LayerC:
-    def __init__(self,layer_name,cluster_Dict):
-        self.layer_name = layer_name
-        self.cluster_Dict = cluster_Dict
\ No newline at end of file
+    def __init__(self,layer_name:str,cluster_Dict):
+        '''
+        This class represents the Layer which contains the connected clusters.
+        
+        :param layer_name: The layer name which contains the clusters
+        :param cluster_Dict: The connected_clusters contained in this layer.
+
+        '''
+        self.layer_name = layer_name 
+        self.cluster_Dict = cluster_Dict # Dict[cluster_label] --> ClusterC object
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_node.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_node.py

 class NodeC:
+    '''
+    This class represents the Node data contained in a Cluster.
+
+    :param node_layer: The layer name which contains the node
+    :param node_cluster: The connected_cluster.cluster_label which contains in this layer.
+    :param uniqueID: Id of the node. Only unique in inside a single cluster. NOT unique between multiple clusters/layers.
+
+    '''
    
-    def __init__(self, cluster_label, node_layer, uniqueID):
-        self.cluster_label = cluster_label
-        self.node_layer = node_layer
-        self.uniqueID = uniqueID        
\ No newline at end of file
+    def __init__(self, node_cluster, node_layer, uniqueID):
+        self.node_cluster = node_cluster # str
+        self.node_layer = node_layer # str
+        self.uniqueID = uniqueID   # str     
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_run.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_run.py
 from datetime import datetime

 class ConnectedRun:
+    '''
+    This class represents the RunId and Time when the Connecting of the Clusters and Calculating the Similarity between clusters is executed.
+    
+    :param run_id: The MongoDB _id of the Run to uniquely identify it.
+    :param timeOfExec: Datetime object containing info when the run was finished.
+
+'''
    def __init__(self,run_id,timeOfExec):
        self.run_id = run_id
        self.timeOfExec = timeOfExec
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/main.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/main.py
@@ -14,6 +14,7 @@ LOGGER = logging.getLogger(__name__)
 #############################
 import connexion

+
 # load swagger config 
 app = connexion.App(__name__, specification_dir='configs/')
 app.add_api('swagger.yml')

--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateSimilarity.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateSimilarity.py
@@ -29,13 +29,18 @@ def minMaxFunction(iIndex,jIndex,clusterList) -> Dict[str,int]:
    #calculate th
    for curCluster in clusterList:      #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one

+        iClusterTuple = (iCluster.cluster_label,iCluster.cluster_layer)
+        jClusterTuple = (jCluster.cluster_label,jCluster.cluster_layer)
+        #iClusterKey = frozenset(iClusterTuple)
+        #jClusterKey = frozenset(jClusterTuple)
+
        curLayer = curCluster.cluster_layer
        if(( curLayer != iCluster.cluster_layer) 
-        and ( curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label)) 
-        and ( curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label))):
+        and ( curCluster.cluster_connClustDict.__contains__(iClusterTuple)) 
+        and ( curCluster.cluster_connClustDict.__contains__(jClusterTuple))):

            # min part
-            curMin = min(curCluster.cluster_connClustDict[iCluster.cluster_label],curCluster.cluster_connClustDict[jCluster.cluster_label])
+            curMin = min(curCluster.cluster_connClustDict[iClusterTuple],curCluster.cluster_connClustDict[jClusterTuple])
            if(outputDict.__contains__(curLayer) == False):
                outputDict[curLayer]= curMin
            else: # max part
@@ -68,28 +73,32 @@ def calcEuclideanDist(iIndex,jIndex,clusterList) -> Dict[str,float]:

    #calculate the distance  //paralelizable
    for curCluster in clusterList:      #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
+
+        iClusterTuple = (iCluster.cluster_label,iCluster.cluster_layer)
+        jClusterTuple = (jCluster.cluster_label,jCluster.cluster_layer)
+        #iClusterKey = frozenset(iClusterTuple)
+        #jClusterKey = frozenset(jClusterTuple)
        
        curLayer = curCluster.cluster_layer

        #considering only clusters from other layers for distance calc
        if( curLayer != iCluster.cluster_layer):
-            ######BUUUG, WHAT IF THEY DON'T SHARE A CONNECTION?
            ###### if in a layer both cluster don't have a connection --> distance of 0. Identical in regard to that layer. correct or false?
            iVal = 0
            jVal = 0
            connectedClusters = False
-            if(curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label)):
-                iVal = curCluster.cluster_connClustDict[iCluster.cluster_label]
+            if(curCluster.cluster_connClustDict.__contains__(iClusterTuple)):
+                iVal = curCluster.cluster_connClustDict[iClusterTuple]
                connectedClusters = True
-            if(curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label)):
-                jVal = curCluster.cluster_connClustDict[jCluster.cluster_label]
+            if(curCluster.cluster_connClustDict.__contains__(jClusterTuple)):
+                jVal = curCluster.cluster_connClustDict[jClusterTuple]
                connectedClusters = True

            
            if (connectedClusters == False):
                #clusters aren't connected => assign the max int value if there are no prior elements in list
                if(outputDict.__contains__(curLayer) == False):
-                    outputDict[curLayer]= 2147483647 #notConnected
+                    outputDict[curLayer]= 2147483647 #notConnected to that particular layer at all
            else:
                #clusters ARE connected => add the squares part of the euclid distance to the value of the similarity
                if(outputDict.__contains__(curLayer) == False):
@@ -140,10 +149,10 @@ def calculateSimilarity(inputLayerDict):
        while ( j<len(clusterList)):
            jCluster = clusterList[j]
            if (iCluster.cluster_layer == jCluster.cluster_layer): #calculate similarity only from the same layer
-                tuplekey = (clusterList[i].cluster_label,clusterList[j].cluster_label)
-                key = frozenset(tuplekey)
+                tuplekey = (clusterList[i].cluster_label,clusterList[j].cluster_label,iCluster.cluster_layer)
+                
                #### EUCLIDEAN DISTANCE /minMax
-                similarityDict[key]=calcEuclideanDist(i,j,clusterList)
+                similarityDict[tuplekey]=calcEuclideanDist(i,j,clusterList)

                #print("#### similarityDict  i:"+str(i)+" j:"+str(j))
                #print("#### "+str(similarityDict))

--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateWeights.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateWeights.py
@@ -7,7 +7,7 @@ def sortFunctByNode(node):
    try :
        return node.uniqueID
    except:
-        print(node.cluster_label)
+        print(node.node_cluster)
        print(node.node_layer)
        print(node.uniqueID)

@@ -55,35 +55,37 @@ def calculateWeights(inputLayerDict) -> Dict[str,LayerC]:
            #Compute a connection 

            if (iNode.node_layer != jNode.node_layer) and (iNode.uniqueID == jNode.uniqueID):
-                iOldTuple = (iNode.uniqueID,iNode.cluster_label)
-                jOldTuple= (jNode.uniqueID,jNode.cluster_label)
-                iOldKey = frozenset(iOldTuple)
-                jOldKey = frozenset(jOldTuple)
-                #iForeignKey =
-                #jForeignKey =
+                iNodeTuple = (iNode.uniqueID,iNode.node_cluster,iNode.node_layer)
+                jNodeTuple= (jNode.uniqueID,jNode.node_cluster,jNode.node_layer)
+                iNodeKey = frozenset(iNodeTuple)
+                jNodeKey = frozenset(jNodeTuple)
+                iClusterTuple = (iNode.node_cluster,iNode.node_layer)
+                jClusterTuple = (jNode.node_cluster,jNode.node_layer)
+                #iClusterKey = frozenset(iClusterTuple)
+                #jClusterKey = frozenset(jClusterTuple)
                #Check if old node dicts has this node: if not add to ConnDictionary and to OldNodesDict
                #              Layer                .             Cluster             .    OldNodesDict    .    Does not contain the OTHER node
                
-                if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connNodesDict.__contains__(jOldKey) == False):
+                if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connNodesDict.__contains__(jNodeKey) == False):
                    #add node j at cluster i
-                    if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict.__contains__(jNode.cluster_label)):
-                        inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict[jNode.cluster_label]+=1
+                    if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connClustDict.__contains__(jClusterTuple)):
+                        inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connClustDict[jClusterTuple]+=1
                    else:
-                        inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict[jNode.cluster_label]=1
+                        inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connClustDict[jClusterTuple]=1
                    #add node to old nodes
-                    inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connNodesDict[jOldKey]=jNode
+                    inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connNodesDict[jNodeKey]=jNode
                    

                    
                
-                if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connNodesDict.__contains__(iOldKey) == False):
+                if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connNodesDict.__contains__(iNodeKey) == False):
                    #add node i at cluster j
-                    if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict.__contains__(iNode.cluster_label)):
-                        inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict[iNode.cluster_label]+=1
+                    if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connClustDict.__contains__(iClusterTuple)):
+                        inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connClustDict[iClusterTuple]+=1
                    else:
-                        inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict[iNode.cluster_label]=1
+                        inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connClustDict[iClusterTuple]=1
                    #add node to old nodes
-                    inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connNodesDict[iOldKey]=iNode
+                    inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connNodesDict[iNodeKey]=iNode

            j+=1   
        i+=1

--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataInput.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataInput.py
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/miscFunctions.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/miscFunctions.py
@@ -50,42 +50,6 @@ def convertLayerDictToJSON(layerDict, runId):
    :rtype: Dict{string: [Cluster1, Cluster2, ...]}
    '''

-    '''
-    {
-        layer1 : {
-            [
-                {
-                    cluster_label1 : 0123400,
-                    cluster_layer: layer1,
-                    "cluster_connClustDict": {
-                        "0123456": 98
-                        "1234567": 12
-                    },
-
-                    cluster_containedNodesDict : {
-                        [
-                            abcd,
-                            sgre,
-                            dgre,
-                            ddhr,
-                            yyrh
-                        ]
-                    }
-                },
-                {
-
-                },
-                {
-                    
-                }            
-            ]
-        },
-
-        layer2 : {
-
-        }
-    }
-    '''
    outputJSON = []

    for curLayer in layerDict.values():        
@@ -94,52 +58,74 @@ def convertLayerDictToJSON(layerDict, runId):
                "cluster_label" : curCluster.cluster_label,
                "cluster_layer" : curCluster.cluster_layer, 
                "cluster_runId" : runId,
-                "cluster_connClustDict" : changeDictKeysToString(curCluster.cluster_connClustDict),
-                "cluster_connNodesDict" : getFrozensetFromConnNodesDict(curCluster.cluster_connNodesDict),
-                "cluster_containedNodesDict" : getNodeIdListFromContainedNodesDict(curCluster.cluster_containedNodesDict),
+                "cluster_connClustDict" : changeTupleDictToDictList(curCluster.cluster_connClustDict),
+                "cluster_connNodesDict" : getFrozensetFromConnNodesDict(curCluster.cluster_connNodesDict), #Don
+                "cluster_containedNodesDict" : getNodeIdListFromContainedNodesDict(curCluster.cluster_containedNodesDict)
            })

    #outputJSON = json.dumps(outputJSON, default=lambda o: o.__dict__, indent=4)

    return outputJSON

-def changeDictKeysToString(inputDict):
+def changeTupleDictToDictList(inputDict):
+    '''
+    Helper function used to convert the code into JSON format
+
+    '''
+
+    outputList = []
+    for tupleKey in inputDict:
+        auxDict = dict()
+        auxDict["cluster_label"]= tupleKey[0]
+        auxDict["cluster_layer"]= tupleKey[1]
+        auxDict["connectionWeight"] = inputDict[tupleKey]
+        outputList.append(auxDict)            

-    keys_values = inputDict.items()
-    outputDict = { str(key): value for key,value in keys_values}
-    return outputDict
+    return outputList

 def getNodeIdListFromContainedNodesDict(inputDict):
+    '''
+    Helper function used to convert the code into JSON format
+
+    '''
    output = []
    for curNode in inputDict.values():
        output.append(curNode.uniqueID)
    return output

 def getFrozensetFromConnNodesDict(inputDict):
+    '''
+    Helper function used to convert the code into JSON format
+
+    '''
    output = []
    
    for curNode in inputDict.values(): 
        auxDict = {}
        auxDict["node_id"]= curNode.uniqueID
-        auxDict["node_cluster"] = curNode.cluster_label
+        auxDict["node_cluster"] = curNode.node_cluster
+        auxDict["node_layer"] = curNode.node_layer
        output.append(auxDict)
    return output


 def convertSimilarityDictToJSON(inputDict,runId):
+    ''' Converts a Similarity Dictionary to JSON format. For outputting to DB
+
+    :param Dict{} similarityDict: Object which contains Data about the Computed similarities between Clusters
+
+    :rtype: List[Dicts]
+    '''

    similList = []
-    for compositeKey in inputDict:
-        frozensetString =list()
-        #key is a tuple of cluster_labels
-        for key in compositeKey:
-            frozensetString.append(key)
-
-        similList.append({
-            "clusterTuple" : frozensetString,
-            "similarityValues" : inputDict[compositeKey],
-            "runId": runId
-        })
+    for tupleKey in inputDict:
+        auxDict = dict()
+        auxDict["cluster1_label"]= tupleKey[0]
+        auxDict["cluster2_label"]= tupleKey[1]
+        auxDict["cluster_layer"] = tupleKey[2]  
+        auxDict["similarityValues"] = inputDict[tupleKey]    
+        auxDict["runId"] = runId
+        similList.append(auxDict)
    similToJSON = similList
    #outputJSON = json.dumps(similToJSON, default=lambda o: o.__dict__, indent=4)


--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/testSimilarity.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/testSimilarity.py
--- a/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C121.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C121.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C60.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C60.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C121.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C121.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C60.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C60.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C121.txt
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C121.txt
-StartTime: 2020-07-02 12:05:47.067975
-FinishTime: 2020-07-02 12:05:54.561853
+StartTime: 2020-07-07 16:55:42.418309
+FinishTime: 2020-07-07 16:55:49.746628

-PopulateWithNewNodes: 2.495718
-CalculateWeights: 4.590413
-CalculateSimilarity: 0.407747
-TotalTime: 7.493878
\ No newline at end of file
+PopulateWithNewNodes: 2.321926
+CalculateWeights: 4.499367
+CalculateSimilarity: 0.507026
+TotalTime: 7.328319
+RunId: 5f048cf587e0ee319fa894ed
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C60.txt
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C60.txt
-StartTime: 2020-07-06 16:16:11.525479
-FinishTime: 2020-07-06 16:16:18.213974
+StartTime: 2020-07-07 14:57:32.942331
+FinishTime: 2020-07-07 14:57:39.489324

-PopulateWithNewNodes: 2.206513
-CalculateWeights: 4.435216
-CalculateSimilarity: 0.046766
-TotalTime: 6.688495
-RunId: 5f033232366be85ec1afca7b
\ No newline at end of file
+PopulateWithNewNodes: 2.102823
+CalculateWeights: 4.382948
+CalculateSimilarity: 0.061222
+TotalTime: 6.546993
+RunId: 5f0471438b27390711e31c70
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/similarityMain.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/similarityMain.py
@@ -30,12 +30,17 @@ from processing.similarityFiles.calculateWeights import *
 from processing.similarityFiles.calculateSimilarity import *
 from processing.similarityFiles.miscFunctions import *
 from processing.similarityFiles.dataOutput import *
-from routes.connRun import connected_run
+
+
+#####TEST ONLY#####
+from processing.similarityFiles.testSimilarity import *


 def main():   
    print("\nEntered Main")

+    outputToFileFLAG = False
+
    timelist = []
    timelist.append(currentTime())#starting time
    
@@ -54,8 +59,9 @@ def main():
    limitNrNodes = 1000 #per Layer

    layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes)
-    #layerDict = getClusterDataFromSwagger(limitNrCluster,limitNrNodes) #for Swagger, Change URLs inside the function for different input Data

+    #URLlist = None
+    #layerDict = getClusterDataFromSwagger(limitNrCluster,limitNrNodes, URLlist) #for Swagger, Change URLs inside the function for different input Data or provide a list with URLS 
    totalNodes = totalNumberOfNodes(layerDict)
    print("Nr. of nodes: " +str(totalNodes))
    totalClusters = totalNumberOfClusters(layerDict)
@@ -76,25 +82,34 @@ def main():
    
    #Write to files
    runId = add_connected_run()
+
    
-    print("Outputing data")
-    outputFileLayerFunction(layerDict,totalNodes,totalClusters,runId)
-    outputFileSimilFunction(similarityDict,totalNodes,totalClusters,runId)
-    outputFileTimeFunction(timelist,totalNodes,totalClusters,runId)
+    if (outputToFileFLAG == True):
+        print("Outputing data")
+        outputFileLayerFunction(layerDict,totalNodes,totalClusters,runId)
+        outputFileSimilFunction(similarityDict,totalNodes,totalClusters,runId)
+        outputFileTimeFunction(timelist,totalNodes,totalClusters,runId)

    #Output to DB
    outputMongoConnClustDict(layerDict,runId)
    outputMongoSimilarity(similarityDict,runId)

-    #Currently not used, developed for possible future uses
+
+    #Currently not used in the calculation of connections/similarity, developed for possible future uses
    connClustersFromMongo = getConnClusterDataFromMongo()    
+    similarityDictFromMongo = calculateSimilarity(connClustersFromMongo)
+
    similarityArrFromMongo = getSimilarityDataFromMongo()
    connectedRunFromMongo = getConnectedRunDataFromMongo()
+    
+
    print("FINISHED")

    return

-
+def test():
+    testInputData()
 ##########START##########
-main()
+#main()
+test()
 #########FINISH##########