Commit eab8c906 authored by Bogdan's avatar Bogdan

Created Test (Assertion) + other minor changes

parent 3b390ffb
...@@ -4,3 +4,5 @@ ...@@ -4,3 +4,5 @@
*.log *.log
**/env **/env
**/venv **/venv
src/data-hub/role-stage-discovery-microservice/app/resultSimilarityDictN14992C221.json
...@@ -328,36 +328,51 @@ definitions: ...@@ -328,36 +328,51 @@ definitions:
type: string type: string
example: "5efdc04ac43add0aba567d76" example: "5efdc04ac43add0aba567d76"
cluster_containedNodesDict: cluster_containedNodesDict:
$ref: "#/definitions/ConnectedNode" type: array
items:
type: string
example: "2696718d7a33ab3dbf28e9c88411afcfe9a933a45e57ec9159bc0668543f1568"
cluster_connNodesDict: cluster_connNodesDict:
type: array
items:
$ref: "#/definitions/ConnectedNode" $ref: "#/definitions/ConnectedNode"
cluster_connClustDict: cluster_connClustDict:
type: array
items:
$ref: "#/definitions/ConnectedClusterAux"
ConnectedClusterAux:
type: object type: object
additionalProperties: properties:
cluster_label:
type: string
example: "-1"
cluster_layer:
type: string
example: "FinishedTime_Layer"
connectionWeight:
type: number type: number
example: example: 42
"cluster_label": nrOfConnectedNodes
#"-1": 42
"0": 39
"6969": 1
#not used, should be removed?
#cluster_connectionsNr
ConnectedNode: ConnectedNode:
type: object type: object
properties: properties:
cluster_label: node_id:
type: string type: string
node_layer: node_cluster:
type: string type: string
uniqueID: node_layer:
type: string type: string
example: example:
"cluster_label": "2230" "node_id": "27a08ed0facc7d68a0818c7695dad391cf48d6095e57ec9159bc0668543f159b"
"node_cluster": "2230"
"node_layer": "Destination_Layer" "node_layer": "Destination_Layer"
"uniqueID": "a95075f5042b1b27060080156d87"
#not used, should be removed? #not used, should be removed?
#finished_time #finished_time
#latitude_Destination #latitude_Destination
...@@ -373,18 +388,16 @@ definitions: ...@@ -373,18 +388,16 @@ definitions:
ClusterSimilarityDictionary: ClusterSimilarityDictionary:
properties: properties:
clusterTuple: cluster1_label:
type: array type: string
items: example: "0"
cluster2_label:
type: string type: string
minItems: 2 example: "1"
maxItems: 2 cluster_layer:
example: [ type: string
#cluster_label1 example: "Price_layer"
0,
#cluster_label2
319
]
similarityValues: similarityValues:
type: object type: object
additionalProperties: additionalProperties:
......
...@@ -7,7 +7,7 @@ class ClusterC: ...@@ -7,7 +7,7 @@ class ClusterC:
:param cluster_layer: The layer name of the 'source' cluster :param cluster_layer: The layer name of the 'source' cluster
:param cluster_runId: The run which calculated the connections :param cluster_runId: The run which calculated the connections
:param cluster_containedNodesDict: Ids of nodes contained in the 'source' cluster :param cluster_containedNodesDict: Ids of nodes contained in the 'source' cluster
:param cluster_connNodesDict: Ids of nodes contained in 'dest' clusters, where the 'dest' cluster is uniquely identifiable by layer name and cluster label :param cluster_connNodesDict: Node Objects contained in 'dest' clusters, where the 'dest' cluster is uniquely identifiable by layer name and cluster label
:param cluster_connClustDict: Layer name, cluster label and weight for each 'dest' cluster :param cluster_connClustDict: Layer name, cluster label and weight for each 'dest' cluster
''' '''
...@@ -16,7 +16,6 @@ class ClusterC: ...@@ -16,7 +16,6 @@ class ClusterC:
self.cluster_label = cluster_label self.cluster_label = cluster_label
self.cluster_layer = cluster_layer self.cluster_layer = cluster_layer
self.cluster_runId = cluster_runId self.cluster_runId = cluster_runId
self.cluster_containedNodesDict = cluster_containedNodesDict ###RENAME TO curClNodesDict #Keys are frozensets(touples) uniqueID and cluster # self.cluster_containedNodesDict = cluster_containedNodesDict #Keys are frozensets(touples) == frozenset(uniqueID, cluster and layer) Value is the Node UniqueId
self.cluster_connNodesDict = cluster_connNodesDict #Keys are frozensets(touples) uniqueID and cluster #problem if you remove newNodes and oldNodes lists.. there may be duplicates self.cluster_connNodesDict = cluster_connNodesDict #Keys are frozensets(touples) (uniqueID:str,node_cluster:str,node_layer:str) Values are NodeC Objects
self.cluster_connClustDict = cluster_connClustDict #dictionary: layer -> (dict2: cluster_label -> nrOfConnections ) OR dictionary: cluster_label -> nrOfConnections self.cluster_connClustDict = cluster_connClustDict #dictionary: (dict[(cluster_label,clusterlayer)] -> nrOfConnections/weightOfTheConnection )
#cluster_connClustDict ------> look at both newNodes and oldNodes
class LayerC: class LayerC:
def __init__(self,layer_name,cluster_Dict): def __init__(self,layer_name:str,cluster_Dict):
'''
This class represents the Layer which contains the connected clusters.
:param layer_name: The layer name which contains the clusters
:param cluster_Dict: The connected_clusters contained in this layer.
'''
self.layer_name = layer_name self.layer_name = layer_name
self.cluster_Dict = cluster_Dict self.cluster_Dict = cluster_Dict # Dict[cluster_label] --> ClusterC object
\ No newline at end of file \ No newline at end of file
class NodeC: class NodeC:
'''
This class represents the Node data contained in a Cluster.
def __init__(self, cluster_label, node_layer, uniqueID): :param node_layer: The layer name which contains the node
self.cluster_label = cluster_label :param node_cluster: The connected_cluster.cluster_label which contains in this layer.
self.node_layer = node_layer :param uniqueID: Id of the node. Only unique in inside a single cluster. NOT unique between multiple clusters/layers.
self.uniqueID = uniqueID
\ No newline at end of file '''
def __init__(self, node_cluster, node_layer, uniqueID):
self.node_cluster = node_cluster # str
self.node_layer = node_layer # str
self.uniqueID = uniqueID # str
\ No newline at end of file
from datetime import datetime from datetime import datetime
class ConnectedRun: class ConnectedRun:
'''
This class represents the RunId and Time when the Connecting of the Clusters and Calculating the Similarity between clusters is executed.
:param run_id: The MongoDB _id of the Run to uniquely identify it.
:param timeOfExec: Datetime object containing info when the run was finished.
'''
def __init__(self,run_id,timeOfExec): def __init__(self,run_id,timeOfExec):
self.run_id = run_id self.run_id = run_id
self.timeOfExec = timeOfExec self.timeOfExec = timeOfExec
\ No newline at end of file
...@@ -14,6 +14,7 @@ LOGGER = logging.getLogger(__name__) ...@@ -14,6 +14,7 @@ LOGGER = logging.getLogger(__name__)
############################# #############################
import connexion import connexion
# load swagger config # load swagger config
app = connexion.App(__name__, specification_dir='configs/') app = connexion.App(__name__, specification_dir='configs/')
app.add_api('swagger.yml') app.add_api('swagger.yml')
......
...@@ -29,13 +29,18 @@ def minMaxFunction(iIndex,jIndex,clusterList) -> Dict[str,int]: ...@@ -29,13 +29,18 @@ def minMaxFunction(iIndex,jIndex,clusterList) -> Dict[str,int]:
#calculate th #calculate th
for curCluster in clusterList: #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one for curCluster in clusterList: #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
iClusterTuple = (iCluster.cluster_label,iCluster.cluster_layer)
jClusterTuple = (jCluster.cluster_label,jCluster.cluster_layer)
#iClusterKey = frozenset(iClusterTuple)
#jClusterKey = frozenset(jClusterTuple)
curLayer = curCluster.cluster_layer curLayer = curCluster.cluster_layer
if(( curLayer != iCluster.cluster_layer) if(( curLayer != iCluster.cluster_layer)
and ( curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label)) and ( curCluster.cluster_connClustDict.__contains__(iClusterTuple))
and ( curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label))): and ( curCluster.cluster_connClustDict.__contains__(jClusterTuple))):
# min part # min part
curMin = min(curCluster.cluster_connClustDict[iCluster.cluster_label],curCluster.cluster_connClustDict[jCluster.cluster_label]) curMin = min(curCluster.cluster_connClustDict[iClusterTuple],curCluster.cluster_connClustDict[jClusterTuple])
if(outputDict.__contains__(curLayer) == False): if(outputDict.__contains__(curLayer) == False):
outputDict[curLayer]= curMin outputDict[curLayer]= curMin
else: # max part else: # max part
...@@ -69,27 +74,31 @@ def calcEuclideanDist(iIndex,jIndex,clusterList) -> Dict[str,float]: ...@@ -69,27 +74,31 @@ def calcEuclideanDist(iIndex,jIndex,clusterList) -> Dict[str,float]:
#calculate the distance //paralelizable #calculate the distance //paralelizable
for curCluster in clusterList: #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one for curCluster in clusterList: #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
iClusterTuple = (iCluster.cluster_label,iCluster.cluster_layer)
jClusterTuple = (jCluster.cluster_label,jCluster.cluster_layer)
#iClusterKey = frozenset(iClusterTuple)
#jClusterKey = frozenset(jClusterTuple)
curLayer = curCluster.cluster_layer curLayer = curCluster.cluster_layer
#considering only clusters from other layers for distance calc #considering only clusters from other layers for distance calc
if( curLayer != iCluster.cluster_layer): if( curLayer != iCluster.cluster_layer):
######BUUUG, WHAT IF THEY DON'T SHARE A CONNECTION?
###### if in a layer both cluster don't have a connection --> distance of 0. Identical in regard to that layer. correct or false? ###### if in a layer both cluster don't have a connection --> distance of 0. Identical in regard to that layer. correct or false?
iVal = 0 iVal = 0
jVal = 0 jVal = 0
connectedClusters = False connectedClusters = False
if(curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label)): if(curCluster.cluster_connClustDict.__contains__(iClusterTuple)):
iVal = curCluster.cluster_connClustDict[iCluster.cluster_label] iVal = curCluster.cluster_connClustDict[iClusterTuple]
connectedClusters = True connectedClusters = True
if(curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label)): if(curCluster.cluster_connClustDict.__contains__(jClusterTuple)):
jVal = curCluster.cluster_connClustDict[jCluster.cluster_label] jVal = curCluster.cluster_connClustDict[jClusterTuple]
connectedClusters = True connectedClusters = True
if (connectedClusters == False): if (connectedClusters == False):
#clusters aren't connected => assign the max int value if there are no prior elements in list #clusters aren't connected => assign the max int value if there are no prior elements in list
if(outputDict.__contains__(curLayer) == False): if(outputDict.__contains__(curLayer) == False):
outputDict[curLayer]= 2147483647 #notConnected outputDict[curLayer]= 2147483647 #notConnected to that particular layer at all
else: else:
#clusters ARE connected => add the squares part of the euclid distance to the value of the similarity #clusters ARE connected => add the squares part of the euclid distance to the value of the similarity
if(outputDict.__contains__(curLayer) == False): if(outputDict.__contains__(curLayer) == False):
...@@ -140,10 +149,10 @@ def calculateSimilarity(inputLayerDict): ...@@ -140,10 +149,10 @@ def calculateSimilarity(inputLayerDict):
while ( j<len(clusterList)): while ( j<len(clusterList)):
jCluster = clusterList[j] jCluster = clusterList[j]
if (iCluster.cluster_layer == jCluster.cluster_layer): #calculate similarity only from the same layer if (iCluster.cluster_layer == jCluster.cluster_layer): #calculate similarity only from the same layer
tuplekey = (clusterList[i].cluster_label,clusterList[j].cluster_label) tuplekey = (clusterList[i].cluster_label,clusterList[j].cluster_label,iCluster.cluster_layer)
key = frozenset(tuplekey)
#### EUCLIDEAN DISTANCE /minMax #### EUCLIDEAN DISTANCE /minMax
similarityDict[key]=calcEuclideanDist(i,j,clusterList) similarityDict[tuplekey]=calcEuclideanDist(i,j,clusterList)
#print("#### similarityDict i:"+str(i)+" j:"+str(j)) #print("#### similarityDict i:"+str(i)+" j:"+str(j))
#print("#### "+str(similarityDict)) #print("#### "+str(similarityDict))
......
...@@ -7,7 +7,7 @@ def sortFunctByNode(node): ...@@ -7,7 +7,7 @@ def sortFunctByNode(node):
try : try :
return node.uniqueID return node.uniqueID
except: except:
print(node.cluster_label) print(node.node_cluster)
print(node.node_layer) print(node.node_layer)
print(node.uniqueID) print(node.uniqueID)
...@@ -55,35 +55,37 @@ def calculateWeights(inputLayerDict) -> Dict[str,LayerC]: ...@@ -55,35 +55,37 @@ def calculateWeights(inputLayerDict) -> Dict[str,LayerC]:
#Compute a connection #Compute a connection
if (iNode.node_layer != jNode.node_layer) and (iNode.uniqueID == jNode.uniqueID): if (iNode.node_layer != jNode.node_layer) and (iNode.uniqueID == jNode.uniqueID):
iOldTuple = (iNode.uniqueID,iNode.cluster_label) iNodeTuple = (iNode.uniqueID,iNode.node_cluster,iNode.node_layer)
jOldTuple= (jNode.uniqueID,jNode.cluster_label) jNodeTuple= (jNode.uniqueID,jNode.node_cluster,jNode.node_layer)
iOldKey = frozenset(iOldTuple) iNodeKey = frozenset(iNodeTuple)
jOldKey = frozenset(jOldTuple) jNodeKey = frozenset(jNodeTuple)
#iForeignKey = iClusterTuple = (iNode.node_cluster,iNode.node_layer)
#jForeignKey = jClusterTuple = (jNode.node_cluster,jNode.node_layer)
#iClusterKey = frozenset(iClusterTuple)
#jClusterKey = frozenset(jClusterTuple)
#Check if old node dicts has this node: if not add to ConnDictionary and to OldNodesDict #Check if old node dicts has this node: if not add to ConnDictionary and to OldNodesDict
# Layer . Cluster . OldNodesDict . Does not contain the OTHER node # Layer . Cluster . OldNodesDict . Does not contain the OTHER node
if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connNodesDict.__contains__(jOldKey) == False): if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connNodesDict.__contains__(jNodeKey) == False):
#add node j at cluster i #add node j at cluster i
if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict.__contains__(jNode.cluster_label)): if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connClustDict.__contains__(jClusterTuple)):
inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict[jNode.cluster_label]+=1 inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connClustDict[jClusterTuple]+=1
else: else:
inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict[jNode.cluster_label]=1 inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connClustDict[jClusterTuple]=1
#add node to old nodes #add node to old nodes
inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connNodesDict[jOldKey]=jNode inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connNodesDict[jNodeKey]=jNode
if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connNodesDict.__contains__(iOldKey) == False): if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connNodesDict.__contains__(iNodeKey) == False):
#add node i at cluster j #add node i at cluster j
if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict.__contains__(iNode.cluster_label)): if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connClustDict.__contains__(iClusterTuple)):
inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict[iNode.cluster_label]+=1 inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connClustDict[iClusterTuple]+=1
else: else:
inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict[iNode.cluster_label]=1 inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connClustDict[iClusterTuple]=1
#add node to old nodes #add node to old nodes
inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connNodesDict[iOldKey]=iNode inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connNodesDict[iNodeKey]=iNode
j+=1 j+=1
i+=1 i+=1
......
...@@ -8,15 +8,15 @@ from typing import Dict ...@@ -8,15 +8,15 @@ from typing import Dict
#from db.repository import Repository #from db.repository import Repository
import json import json
import requests import requests
from routes.clustersets import get_by_name
#Only used for testing from db.repository import *
from routes.connClusters import get_conn_clusters repo = Repository()
from routes.similarity import get_similarity
from routes.connRun import get_connected_run
def getClusterDataFromSwagger(limitNrCluster,limitNrNodes):
def getClusterDataFromSwagger(limitNrCluster:int,limitNrNodes:int,listURLs:List[str] = None):
''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers. ''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
...@@ -24,12 +24,16 @@ def getClusterDataFromSwagger(limitNrCluster,limitNrNodes): ...@@ -24,12 +24,16 @@ def getClusterDataFromSwagger(limitNrCluster,limitNrNodes):
:param int limitNrNodes: Limits Clusters considered. None or <0 values == No limit :param int limitNrNodes: Limits Clusters considered. None or <0 values == No limit
:param List[str] listURLs : Specifies from which URLs to download data. If not provided/None => uses Default URLs
:returns: Dict{layername: LayerC}. Returns a Dict with the data gathered from the DB :returns: Dict{layername: LayerC}. Returns a Dict with the data gathered from the DB
:rtype: Dict{string: LayerC} :rtype: Dict{string: LayerC}
''' '''
print("Entered dataInput") print("Entered dataInput")
if(listURLs == None):
listURLs = [] listURLs = []
#""" #"""
listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/Price_Layer/clusters') listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/Price_Layer/clusters')
...@@ -57,7 +61,7 @@ def getClusterDataFromSwagger(limitNrCluster,limitNrNodes): ...@@ -57,7 +61,7 @@ def getClusterDataFromSwagger(limitNrCluster,limitNrNodes):
return layerDict return layerDict
def loadJson(url) : def loadJson(url:str) :
res = requests.get(url, timeout=30) res = requests.get(url, timeout=30)
jsonData = json.loads(res.content) jsonData = json.loads(res.content)
return jsonData return jsonData
...@@ -77,7 +81,6 @@ def getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes): ...@@ -77,7 +81,6 @@ def getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes):
:rtype: Dict{string: LayerC} :rtype: Dict{string: LayerC}
''' '''
layerDict = dict()
#Maximum of these nodes PER Layer will be considered #Maximum of these nodes PER Layer will be considered
...@@ -90,7 +93,7 @@ def getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes): ...@@ -90,7 +93,7 @@ def getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes):
#imports and translates the data from JSON into usefull format #imports and translates the data from JSON into usefull format
#returns layerdiction -> Layer -> clusterDict -> Cluster -> nodesDict -> Nodes #returns layerdiction -> Layer -> clusterDict -> Cluster -> nodesDict -> Nodes
for name in layerNameList: for name in layerNameList:
newData = get_by_name(name) newData = get_mongoDB_cluster_by_layerName(name)#repo.get_clusters_for_layer(name)
layerDict = populateWithNewNodesSingleLayer(newData[0:limitNrCluster],layerDict,limitNrNodes) layerDict = populateWithNewNodesSingleLayer(newData[0:limitNrCluster],layerDict,limitNrNodes)
return layerDict return layerDict
...@@ -136,11 +139,11 @@ def populateWithNewNodesSingleLayer(inputData, layerDict, limitNrNodes) -> Dict[ ...@@ -136,11 +139,11 @@ def populateWithNewNodesSingleLayer(inputData, layerDict, limitNrNodes) -> Dict[
curLayerName, curLayerName,
curNode.get("UniqueID")) curNode.get("UniqueID"))
if(newNode != None): if(newNode != None):
if(newNode.uniqueID!= None and newNode.cluster_label!= None and newNode.node_layer!= None): if(newNode.uniqueID!= None and newNode.node_cluster!= None and newNode.node_layer!= None):
if( limitNrNodes>0): if( limitNrNodes>0):
auxtuple = (newNode.uniqueID,newNode.cluster_label) auxtuple = (newNode.uniqueID,newNode.node_cluster,newNode.node_layer) #unique because the input data is only from one layer
key = frozenset(auxtuple) key = frozenset(auxtuple)
#check for duplicates #TODO #check for duplicates
cluster_containedNodesDict[key]= newNode #overwrite if already there cluster_containedNodesDict[key]= newNode #overwrite if already there
limitNrNodes-=1 limitNrNodes-=1
...@@ -148,15 +151,15 @@ def populateWithNewNodesSingleLayer(inputData, layerDict, limitNrNodes) -> Dict[ ...@@ -148,15 +151,15 @@ def populateWithNewNodesSingleLayer(inputData, layerDict, limitNrNodes) -> Dict[
if oldCluster is None: if oldCluster is None:
# def __init__(self,cluster_label,cluster_layer,cluster_containedNodesDict,cluster_connNodesDict, cluster_connectionsNr,cluster_connClustDict): # def __init__(self,cluster_label,cluster_layer,cluster_containedNodesDict,cluster_connNodesDict, cluster_connectionsNr,cluster_connClustDict):
newCluster = ClusterC( newClusterC = ClusterC(
curCluster.get("cluster_label"), curCluster.get("cluster_label"),
curLayerName, curLayerName,
None, None,
cluster_containedNodesDict, cluster_containedNodesDict,
dict(), dict(),
dict()) #may not be empty anymore) dict())
clusterDict[newCluster.cluster_label] = newCluster clusterDict[newClusterC.cluster_label] = newClusterC
else: else:
oldCluster.cluster_containedNodesDict = cluster_containedNodesDict oldCluster.cluster_containedNodesDict = cluster_containedNodesDict
clusterDict[curCluster.get("cluster_label")] = oldCluster clusterDict[curCluster.get("cluster_label")] = oldCluster
...@@ -168,33 +171,117 @@ def populateWithNewNodesSingleLayer(inputData, layerDict, limitNrNodes) -> Dict[ ...@@ -168,33 +171,117 @@ def populateWithNewNodesSingleLayer(inputData, layerDict, limitNrNodes) -> Dict[
def getConnClusterDataFromMongo(): def getConnClusterDataFromMongo():
mongoArray = get_conn_clusters() '''
Gets the Connected_clusters data from the MongoDB database. The data is found in the collection "conected_clusters"
Data returned can be used as input for calculateSimilarity function
:returns: Dict{layername: Layer}. Returns the layerDict containing connected_clusters from DB
:rtype: Dict{string: Layer}
'''
mongoArray = repo.get_connected_clusters()
outputDict = convertRetrievedClustersFromMongo(mongoArray) outputDict = convertRetrievedClustersFromMongo(mongoArray)
return outputDict return outputDict
def getSimilarityDataFromMongo(): def getSimilarityDataFromMongo():
result = get_similarity() '''
Gets the computed Similarity between clusters data from the MongoDB database. The data is found in the collection "similarity"
:returns: Dict. Returns the a dictionary from DB containing the computed similarity.
:rtype: Dict
'''
result = repo.get_similarity()
return result return result
def getConnectedRunDataFromMongo(): def getConnectedRunDataFromMongo():
result = get_connected_run() '''
Gets the connected_run data from the MongoDB database.
The data is found in the collection "connected_run"
:returns: List[Dict]. Returns the a List with run objects from DB.
:rtype: List
'''
result = repo.get_connected_run()
return result return result
def convertRetrievedClustersFromMongo(inputArray): def convertRetrievedClustersFromMongo(inputArray):
'''
Helper function used to convert the JSON from the DB into the Python Code structure used in the functions
LayerDict = {} '''
layerDict = {}
for entry in inputArray: for entry in inputArray:
if not(entry['cluster_layer'] in LayerDict): curLayerName = entry["cluster_layer"]
LayerDict[entry['cluster_layer']] = []
#if it's a new layer, create it
cl = ClusterC( if not(curLayerName in layerDict):
entry['cluster_label'], clusterDict = dict()
entry['cluster_layer'], layerDict[curLayerName] = LayerC(curLayerName,clusterDict)
entry['cluster_runId'],
entry['cluster_containedNodesDict'], #get the new cluster
entry['cluster_connNodesDict'], newClusterC = ClusterC(entry["cluster_label"],
entry['cluster_connClustDict']) entry["cluster_layer"],
LayerDict[entry['cluster_layer']].append(cl) entry["cluster_runId"],
makeChangeNodesDict(entry["cluster_containedNodesDict"],entry["cluster_label"],
return LayerDict entry["cluster_layer"]),
\ No newline at end of file makeFrozensetForConnNodesDict(entry["cluster_connNodesDict"]), ##Change from list of dicts to dicts[frozenset(layer_name,node_id,cluster_label)]
changeDictListToTupleDict(entry["cluster_connClustDict"]))
#add cluster to the layerDict
layerDict[curLayerName].cluster_Dict[newClusterC.cluster_label]=newClusterC
return layerDict
def makeFrozensetForConnNodesDict(inputList):
'''
Helper function used to convert the JSON from the DB into the Python Code structure used in the functions
'''
outputDict = dict()
for dic in inputList:
newNodeC = NodeC(dic["node_cluster"],dic["node_layer"],dic["node_id"])
tuppleKey = (dic["node_cluster"],dic["node_layer"],dic["node_id"])
frozenSetKey = frozenset(tuppleKey)
outputDict[frozenSetKey] = newNodeC
return outputDict
def changeDictListToTupleDict(inputDictList):
'''
Helper function used to convert the JSON from the DB into the Python Code structure used in the functions
'''
outputDict = dict()
for entry in inputDictList:
tuppleKey=(entry["cluster_label"],entry["cluster_layer"])
outputDict[tuppleKey] = entry["connectionWeight"]
return outputDict
def makeChangeNodesDict(inputList,cluster_label,cluster_layer):
'''
Helper function used to convert the JSON from the DB into the Python Code structure used in the functions
'''
outputDict = dict()
for entry in inputList:
newNode = NodeC(cluster_label,cluster_layer,entry)
auxtuple = (newNode.uniqueID,newNode.node_cluster,newNode.node_layer) #unique because the input data is only from one layer
key = frozenset(auxtuple)
#check for duplicates
outputDict[key]= newNode
return outputDict
def get_mongoDB_cluster_by_layerName(name):
res = repo.get_clusters_for_layer(name)
return [c.to_serializable_dict() for c in res]
...@@ -50,42 +50,6 @@ def convertLayerDictToJSON(layerDict, runId): ...@@ -50,42 +50,6 @@ def convertLayerDictToJSON(layerDict, runId):
:rtype: Dict{string: [Cluster1, Cluster2, ...]} :rtype: Dict{string: [Cluster1, Cluster2, ...]}
''' '''
'''
{
layer1 : {
[
{
cluster_label1 : 0123400,
cluster_layer: layer1,
"cluster_connClustDict": {
"0123456": 98
"1234567": 12
},
cluster_containedNodesDict : {
[
abcd,
sgre,
dgre,
ddhr,
yyrh
]
}
},
{
},
{
}
]
},
layer2 : {
}
}
'''
outputJSON = [] outputJSON = []
for curLayer in layerDict.values(): for curLayer in layerDict.values():
...@@ -94,52 +58,74 @@ def convertLayerDictToJSON(layerDict, runId): ...@@ -94,52 +58,74 @@ def convertLayerDictToJSON(layerDict, runId):
"cluster_label" : curCluster.cluster_label, "cluster_label" : curCluster.cluster_label,
"cluster_layer" : curCluster.cluster_layer, "cluster_layer" : curCluster.cluster_layer,
"cluster_runId" : runId, "cluster_runId" : runId,
"cluster_connClustDict" : changeDictKeysToString(curCluster.cluster_connClustDict), "cluster_connClustDict" : changeTupleDictToDictList(curCluster.cluster_connClustDict),
"cluster_connNodesDict" : getFrozensetFromConnNodesDict(curCluster.cluster_connNodesDict), "cluster_connNodesDict" : getFrozensetFromConnNodesDict(curCluster.cluster_connNodesDict), #Don
"cluster_containedNodesDict" : getNodeIdListFromContainedNodesDict(curCluster.cluster_containedNodesDict), "cluster_containedNodesDict" : getNodeIdListFromContainedNodesDict(curCluster.cluster_containedNodesDict)
}) })
#outputJSON = json.dumps(outputJSON, default=lambda o: o.__dict__, indent=4) #outputJSON = json.dumps(outputJSON, default=lambda o: o.__dict__, indent=4)
return outputJSON return outputJSON
def changeDictKeysToString(inputDict): def changeTupleDictToDictList(inputDict):
'''
Helper function used to convert the code into JSON format
'''
outputList = []
for tupleKey in inputDict:
auxDict = dict()
auxDict["cluster_label"]= tupleKey[0]
auxDict["cluster_layer"]= tupleKey[1]
auxDict["connectionWeight"] = inputDict[tupleKey]
outputList.append(auxDict)
keys_values = inputDict.items() return outputList
outputDict = { str(key): value for key,value in keys_values}
return outputDict
def getNodeIdListFromContainedNodesDict(inputDict): def getNodeIdListFromContainedNodesDict(inputDict):
'''
Helper function used to convert the code into JSON format
'''
output = [] output = []
for curNode in inputDict.values(): for curNode in inputDict.values():
output.append(curNode.uniqueID) output.append(curNode.uniqueID)
return output return output
def getFrozensetFromConnNodesDict(inputDict): def getFrozensetFromConnNodesDict(inputDict):
'''
Helper function used to convert the code into JSON format
'''
output = [] output = []
for curNode in inputDict.values(): for curNode in inputDict.values():
auxDict = {} auxDict = {}
auxDict["node_id"]= curNode.uniqueID auxDict["node_id"]= curNode.uniqueID
auxDict["node_cluster"] = curNode.cluster_label auxDict["node_cluster"] = curNode.node_cluster
auxDict["node_layer"] = curNode.node_layer
output.append(auxDict) output.append(auxDict)
return output return output
def convertSimilarityDictToJSON(inputDict,runId): def convertSimilarityDictToJSON(inputDict,runId):
''' Converts a Similarity Dictionary to JSON format. For outputting to DB
:param Dict{} similarityDict: Object which contains Data about the Computed similarities between Clusters
:rtype: List[Dicts]
'''
similList = [] similList = []
for compositeKey in inputDict: for tupleKey in inputDict:
frozensetString =list() auxDict = dict()
#key is a tuple of cluster_labels auxDict["cluster1_label"]= tupleKey[0]
for key in compositeKey: auxDict["cluster2_label"]= tupleKey[1]
frozensetString.append(key) auxDict["cluster_layer"] = tupleKey[2]
auxDict["similarityValues"] = inputDict[tupleKey]
similList.append({ auxDict["runId"] = runId
"clusterTuple" : frozensetString, similList.append(auxDict)
"similarityValues" : inputDict[compositeKey],
"runId": runId
})
similToJSON = similList similToJSON = similList
#outputJSON = json.dumps(similToJSON, default=lambda o: o.__dict__, indent=4) #outputJSON = json.dumps(similToJSON, default=lambda o: o.__dict__, indent=4)
......
StartTime: 2020-07-02 12:05:47.067975 StartTime: 2020-07-07 16:55:42.418309
FinishTime: 2020-07-02 12:05:54.561853 FinishTime: 2020-07-07 16:55:49.746628
PopulateWithNewNodes: 2.495718 PopulateWithNewNodes: 2.321926
CalculateWeights: 4.590413 CalculateWeights: 4.499367
CalculateSimilarity: 0.407747 CalculateSimilarity: 0.507026
TotalTime: 7.493878 TotalTime: 7.328319
\ No newline at end of file RunId: 5f048cf587e0ee319fa894ed
\ No newline at end of file
StartTime: 2020-07-06 16:16:11.525479 StartTime: 2020-07-07 14:57:32.942331
FinishTime: 2020-07-06 16:16:18.213974 FinishTime: 2020-07-07 14:57:39.489324
PopulateWithNewNodes: 2.206513 PopulateWithNewNodes: 2.102823
CalculateWeights: 4.435216 CalculateWeights: 4.382948
CalculateSimilarity: 0.046766 CalculateSimilarity: 0.061222
TotalTime: 6.688495 TotalTime: 6.546993
RunId: 5f033232366be85ec1afca7b RunId: 5f0471438b27390711e31c70
\ No newline at end of file \ No newline at end of file
...@@ -30,12 +30,17 @@ from processing.similarityFiles.calculateWeights import * ...@@ -30,12 +30,17 @@ from processing.similarityFiles.calculateWeights import *
from processing.similarityFiles.calculateSimilarity import * from processing.similarityFiles.calculateSimilarity import *
from processing.similarityFiles.miscFunctions import * from processing.similarityFiles.miscFunctions import *
from processing.similarityFiles.dataOutput import * from processing.similarityFiles.dataOutput import *
from routes.connRun import connected_run
#####TEST ONLY#####
from processing.similarityFiles.testSimilarity import *
def main(): def main():
print("\nEntered Main") print("\nEntered Main")
outputToFileFLAG = False
timelist = [] timelist = []
timelist.append(currentTime())#starting time timelist.append(currentTime())#starting time
...@@ -54,8 +59,9 @@ def main(): ...@@ -54,8 +59,9 @@ def main():
limitNrNodes = 1000 #per Layer limitNrNodes = 1000 #per Layer
layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes) layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes)
#layerDict = getClusterDataFromSwagger(limitNrCluster,limitNrNodes) #for Swagger, Change URLs inside the function for different input Data
#URLlist = None
#layerDict = getClusterDataFromSwagger(limitNrCluster,limitNrNodes, URLlist) #for Swagger, Change URLs inside the function for different input Data or provide a list with URLS
totalNodes = totalNumberOfNodes(layerDict) totalNodes = totalNumberOfNodes(layerDict)
print("Nr. of nodes: " +str(totalNodes)) print("Nr. of nodes: " +str(totalNodes))
totalClusters = totalNumberOfClusters(layerDict) totalClusters = totalNumberOfClusters(layerDict)
...@@ -77,6 +83,8 @@ def main(): ...@@ -77,6 +83,8 @@ def main():
#Write to files #Write to files
runId = add_connected_run() runId = add_connected_run()
if (outputToFileFLAG == True):
print("Outputing data") print("Outputing data")
outputFileLayerFunction(layerDict,totalNodes,totalClusters,runId) outputFileLayerFunction(layerDict,totalNodes,totalClusters,runId)
outputFileSimilFunction(similarityDict,totalNodes,totalClusters,runId) outputFileSimilFunction(similarityDict,totalNodes,totalClusters,runId)
...@@ -86,15 +94,22 @@ def main(): ...@@ -86,15 +94,22 @@ def main():
outputMongoConnClustDict(layerDict,runId) outputMongoConnClustDict(layerDict,runId)
outputMongoSimilarity(similarityDict,runId) outputMongoSimilarity(similarityDict,runId)
#Currently not used, developed for possible future uses
#Currently not used in the calculation of connections/similarity, developed for possible future uses
connClustersFromMongo = getConnClusterDataFromMongo() connClustersFromMongo = getConnClusterDataFromMongo()
similarityDictFromMongo = calculateSimilarity(connClustersFromMongo)
similarityArrFromMongo = getSimilarityDataFromMongo() similarityArrFromMongo = getSimilarityDataFromMongo()
connectedRunFromMongo = getConnectedRunDataFromMongo() connectedRunFromMongo = getConnectedRunDataFromMongo()
print("FINISHED") print("FINISHED")
return return
def test():
testInputData()
##########START########## ##########START##########
main() #main()
test()
#########FINISH########## #########FINISH##########
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment