Refactored part of Connected/Similarity code

16d9711d · Bogdan · d24d7dc4 · 16d9711d · 16d9711d · 16d9711d
Commit 16d9711d authored Jul 06, 2020 by Bogdan
13 changed files
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_node.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/connected_node.py
 class NodeC:
-    def __init__(self, cluster_label, node_layer, finished_time, latitude_Destination, longitude_Destination, travelID, travelPrice, uniqueID, userID):
+    def __init__(self, cluster_label, node_layer, uniqueID):
        self.cluster_label = cluster_label
        self.node_layer = node_layer
-        self.finished_time = finished_time
+        self.uniqueID = uniqueID        
-        self.latitude_Destination = latitude_Destination
\ No newline at end of file
-        self.longitude_Destination = longitude_Destination
-        self.travelID = travelID
-        self.travelPrice = travelPrice
-        self.uniqueID = uniqueID        
-        self.userID = userID
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/db/repository.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/repository.py
@@ -90,7 +90,7 @@ class Repository(MongoRepositoryBase):
        result = super().insert_many(self._connected_clusters_collection, clusterDictArray)
        return result
-    def get_connected_clusters(self, run_id=None):#, layer_name: str):
+    def get_connected_clusters(self, run_id: str=None):#, layer_name: str):
        ''' Get Connected Clusters Data from DB '''
        if (run_id == None):
            entries = super().get_entries(self._connected_clusters_collection, projection={'_id': 0})
@@ -115,8 +115,7 @@ class Repository(MongoRepositoryBase):
        #super().insert_entry(self._connected_clusters_collection, outputJSON)
        return result
-    #TODO
+    def get_similarity(self, run_id: str=None):
-    def get_similarity(self, run_id=None):
        ''' Get Similarity Data from DB '''
        if (run_id == None):
            entries = super().get_entries(self._similarity_collection, projection={'_id': 0})
@@ -138,12 +137,12 @@ class Repository(MongoRepositoryBase):
        result = super().insert_entry(self._connected_run, conRunTimestamp)
        return result
-    def get_connected_run(self, run_id= None):
+    def get_connected_run(self, run_id: str= None):
        ''' Get Connected Run Data from DB '''
        if (run_id == None):
            entries = super().get_entries(self._connected_run)
        else:
-            entries = super().get_entries(self._connected_run, selection={'_id' : run_id})
+            entries = super().get_entries(self._connected_run, selection={'_id' : run_id}, projection={'_id': 1, 'Datetime': 1})
        output = []
        for e in entries:

--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateSimilarity.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateSimilarity.py
@@ -30,7 +30,6 @@ def minMaxFunction(iIndex,jIndex,clusterList) -> Dict[str,int]:
    for curCluster in clusterList:      #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
        curLayer = curCluster.cluster_layer
-        curLabel = curCluster.cluster_label
        if(( curLayer != iCluster.cluster_layer) 
        and ( curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label)) 
        and ( curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label))):
@@ -71,7 +70,6 @@ def calcEuclideanDist(iIndex,jIndex,clusterList) -> Dict[str,float]:
    for curCluster in clusterList:      #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
        curLayer = curCluster.cluster_layer
-        curLabel = curCluster.cluster_label #debugOnly
        #considering only clusters from other layers for distance calc
        if( curLayer != iCluster.cluster_layer):
@@ -116,7 +114,7 @@ def calculateSimilarity(inputLayerDict):
    ''' Calculates the similarity between clusters contained in the "inputLayerDict". Similarity is calculated for each combination of 2 clusters from the SAME layer.
-    :param Dict{layername: Layer} inputLayerDict: Contains the associated Layer and Clusters objects. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
+    :param Dict{layername: LayerC} inputLayerDict: Contains the associated Layer and Clusters objects. The dictonary KEY is layername, the Value is a LayerC Object. The LayerC object has an attribute cluster_Dict which stores the clusters in the LayerC.
    :returns: Dict{tuple(cluster_label1, cluster_label2) : Dict{layername, similarityValue}}. Returns a Dictionary with a tuple of 2 clusters as KEY, and a Dictionary with the computed similarity of the clusters in regard to each layer as VALUE
@@ -129,8 +127,8 @@ def calculateSimilarity(inputLayerDict):
    clusterList = list()
-    for curLayer in inputLayerDict.values():        
+    for curLayerC in inputLayerDict.values():        
-        for curCluster in curLayer.cluster_Dict.values():
+        for curCluster in curLayerC.cluster_Dict.values():
            clusterList.append(curCluster)
    #print(" Nr. of clusters: "+str(len(clusterList)))

--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateWeights.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/calculateWeights.py
@@ -15,12 +15,12 @@ def calculateWeights(inputLayerDict) -> Dict[str,LayerC]:
    ''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
-    :param Dict{string: Layer} inputLayerDict: Contains the associated Layer and Clusters objects. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
+    :param Dict{string: LayerC} inputLayerDict: Contains the associated LayerC and ConnClusters objects. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the LayerC.
-    :returns: Dict{layername: Layer}. Returns the inputLayerDict with the added connections in the attributes cluster_connClustDict and cluster_connNodesDict
+    :returns: Dict{layername: LayerC}. Returns the inputLayerDict with the added connections in the attributes cluster_connClustDict and cluster_connNodesDict
-    :rtype: Dict{string: Layer}
+    :rtype: Dict{string: LayerC}
    '''    
@@ -52,13 +52,15 @@ def calculateWeights(inputLayerDict) -> Dict[str,LayerC]:
        while ( j<len(nodeList)):
            jNode = nodeList[j]
            #if there is a connection
-            #print("\n ### \n"+iNode.uniqueID +" "+ iNode.node_layer +"\n"+ jNode.uniqueID +" "+ jNode.node_layer )
+            #Compute a connection 
            if (iNode.node_layer != jNode.node_layer) and (iNode.uniqueID == jNode.uniqueID):
                iOldTuple = (iNode.uniqueID,iNode.cluster_label)
                jOldTuple= (jNode.uniqueID,jNode.cluster_label)
                iOldKey = frozenset(iOldTuple)
                jOldKey = frozenset(jOldTuple)
+                #iForeignKey =
+                #jForeignKey =
                #Check if old node dicts has this node: if not add to ConnDictionary and to OldNodesDict
                #              Layer                .             Cluster             .    OldNodesDict    .    Does not contain the OTHER node
@@ -86,11 +88,6 @@ def calculateWeights(inputLayerDict) -> Dict[str,LayerC]:
            j+=1   
        i+=1
-    #deleting cluster_containedNodesDicts/// No longer needed
-    #for curLayer in inputLayerDict.values():
-     #   for curCluster in curLayer.cluster_Dict.values():
-     #       inputLayerDict[curCluster.cluster_layer].cluster_Dict[curCluster.cluster_label].cluster_containedNodesDict = dict()
    print("Finished calculateWeights")
    #store weights in database?

--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataInput.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataInput.py
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataOutput.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/similarityFiles/dataOutput.py
@@ -2,10 +2,9 @@
 import json
 import requests
 import datetime
-from routes.connClusters import add_conn_clusters
-from routes.similarity import add_similarity
-from routes.connRun import add_connected_run
 from processing.similarityFiles.miscFunctions import *
+from db.repository import Repository
+repo = Repository()
 def outputFileLayerFunction(layerDict,limitNrNodes,limitNrCluster,runId):
    ''' Writes the layerDict data to a JSON file.
@@ -100,4 +99,43 @@ def outputMongoSimilarity(inputDict,runId):
    :param string runId: Id of the Run 
    ''' 
    add_similarity(inputDict,runId)
\ No newline at end of file
+def add_connected_run():
+    '''
+        Inserts Run with current Time into the DB
+        :returns: Returns the _id of the connected_run entry in the DB
+        :rtype: string
+    '''    
+    currentTime = datetime.datetime.now()
+    runDict = {"Datetime" : str(currentTime)}
+    inserted_result = repo.add_connected_run(runDict)
+    return str(inserted_result.inserted_id)
+def add_conn_clusters(inputDict,runId):
+    ''' Stores connected_clusters in the database.
+    :param Dict() inputDict: Contains the data to insert
+    :param string runId: Id of the Run 
+    ''' 
+    outputJSON = convertLayerDictToJSON(inputDict,runId)
+    repo.add_connected_clusters(outputJSON)
+def add_similarity(inputDict,runId):
+    ''' Stores cluster_similarity in the database.
+    :param Dict() inputDict: Contains the data to insert
+    :param string runId: Id of the Run 
+    ''' 
+    outputJSON = convertSimilarityDictToJSON(inputDict,runId)
+    repo.add_similarity(outputJSON)
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C60.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultLayerDictN2999C60.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C60.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN2999C60.json
--- a/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C60.txt
+++ b/src/data-hub/role-stage-discovery-microservice/app/resultTimeExecN2999C60.txt
-StartTime: 2020-07-06 10:43:32.240013
+StartTime: 2020-07-06 16:16:11.525479
-FinishTime: 2020-07-06 10:43:39.110333
+FinishTime: 2020-07-06 16:16:18.213974
-PopulateWithNewNodes: 2.399582
+PopulateWithNewNodes: 2.206513
-CalculateWeights: 4.422768
+CalculateWeights: 4.435216
-CalculateSimilarity: 0.04797
+CalculateSimilarity: 0.046766
-TotalTime: 6.87032
+TotalTime: 6.688495
-RunId: 5f02e43b53a73a48d0eaaed5
+RunId: 5f033232366be85ec1afca7b
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/connClusters.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/connClusters.py
 from flask import request, Response
 from db.repository import Repository
-from routes.connRun import add_connected_run
-from processing.similarityFiles.miscFunctions import *
 repo = Repository()
-def add_conn_clusters(inputDict,runId):
-    ''' Stores connected_clusters in the database.
-    :param Dict() inputDict: Contains the data to insert
-    :param string runId: Id of the Run 
-    ''' 
-    outputJSON = convertLayerDictToJSON(inputDict,runId)
-    repo.add_connected_clusters(outputJSON)
 def get_conn_clusters():
    ''' Gets connected_clusters from the database.

--- a/src/data-hub/role-stage-discovery-microservice/app/routes/connRun.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/connRun.py
@@ -7,34 +7,17 @@ import datetime
 repo = Repository()
-def add_connected_run():
+def get_connected_run():
-    '''
+    ''' Gets Run from the database.
-        Inserts Run with current Time into the DB
-        :returns: Returns the _id of the connected_run entry in the DB
-        :rtype: string
-    '''    
-    currentTime = datetime.datetime.now()
-    runDict = {"Datetime" : str(currentTime)}
-    inserted_result = repo.add_connected_run(runDict)
-    return str(inserted_result.inserted_id)
-def get_connected_run(): ########TODO#################
-    ''' ##TODO## Gets Run from the database.
        :returns: Returns Run objects from the DB
        :rtype: Dict{_id,datetime}
    '''  
-    """
+    result = repo.get_connected_run()
-    result = repo.get_connected_clusters()
+    if result is None or len(result) == 0:        
-    if result is None or result.retrieved == 0:        
        print("#### Response 404")
        return Response(status=404)
    else:
        return result
-    conRun = ConnectedRun(result.sdfsdf)
-    """
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/similarity.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/similarity.py
@@ -5,19 +5,6 @@ from processing.similarityFiles.miscFunctions import convertSimilarityDictToJSON
 repo = Repository()
-def add_similarity(inputDict,runId):
-    ''' Stores cluster_similarity in the database.
-    :param Dict() inputDict: Contains the data to insert
-    :param string runId: Id of the Run 
-    ''' 
-    outputJSON = convertSimilarityDictToJSON(inputDict,runId)
-    repo.add_similarity(outputJSON)
 def get_similarity(): 
    ''' Gets cluster_similarity from the database.

--- a/src/data-hub/role-stage-discovery-microservice/app/similarityMain.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/similarityMain.py
@@ -88,7 +88,8 @@ def main():
    #Currently not used, developed for possible future uses
    connClustersFromMongo = getConnClusterDataFromMongo()    
-    similarityArrFromMongo = getSimilarityDataFromMOngo()
+    similarityArrFromMongo = getSimilarityDataFromMongo()
+    connectedRunFromMongo = getConnectedRunDataFromMongo()
    print("FINISHED")
    return