Commit 7ca4a448 authored by Alexander Lercher's avatar Alexander Lercher

Merge branch 'developBogdan' into 'develop'

Connected Cluster and Similarity functionalities

See merge request !12
parents d728e14e 8f1f8dc7
...@@ -178,6 +178,58 @@ paths: ...@@ -178,6 +178,58 @@ paths:
#endregion #endregion
################################################################################
/connectedClusters:
get:
operationId: "routes.connClusters.get_conn_clusters"
tags:
- "Connected"
summary: "Get connected Clusters data"
description: "Returns a dictionary of cluster. The clusters contain the associated connected clusters and connected nodes data."
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ConnectedDict"
/clusterSimilarity:
get:
operationId: "routes.similarity.get_similarity"
tags:
- "Similarity"
summary: "Get data of the similarity between clusters."
parameters:
- name: "layer_name"
in: "query"
description: "Name of the layer"
required: true
type: "string"
- name: "batchNr"
in: "query"
description: "Batch number (starting from 0)"
required: true
type: "integer"
description: "Data is returned in batches of size 1000. Returns a dictionary where the key is a tuple of cluster_labels (i.e. [0,319]) and the value is the computed similarity between 2 clusters in the tuple, in regard to each layer in the input. \n Note: the tuple clusters have the same layer and the computed similarity is in regard to clusters from OTHER layers."
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ClusterSimilarityArray"
/clusterRunArray:
get:
operationId: "routes.connRun.get_connected_run"
tags:
- "RunId"
summary: "Get RunId"
description: "Returns the RunId and the associated datetime when a connection of clusters/simillarity of clusters was computed."
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ClusterRunArray"
definitions: definitions:
Cluster: Cluster:
...@@ -264,3 +316,124 @@ definitions: ...@@ -264,3 +316,124 @@ definitions:
type: array type: array
items: items:
$ref: "#/definitions/TimeSlice" $ref: "#/definitions/TimeSlice"
##################################################################
ConnectedDict:
type: array
items:
$ref: "#/definitions/ConnectedCluster"
ConnectedCluster:
type: object
properties:
cluster_label:
type: string
example: "6"
cluster_layer:
type: string
example: "Price_Layer"
cluster_runId:
type: string
example: "5efdc04ac43add0aba567d76"
cluster_containedNodesDict:
type: array
items:
type: string
example: "2696718d7a33ab3dbf28e9c88411afcfe9a933a45e57ec9159bc0668543f1568"
cluster_connNodesDict:
type: array
items:
$ref: "#/definitions/ConnectedNode"
cluster_connClustDict:
type: array
items:
$ref: "#/definitions/ConnectedClusterAux"
ConnectedClusterAux:
type: object
properties:
cluster_label:
type: string
example: "-1"
cluster_layer:
type: string
example: "FinishedTime_Layer"
connectionWeight:
type: number
example: 42
ConnectedNode:
type: object
properties:
node_id:
type: string
node_cluster:
type: string
node_layer:
type: string
example:
"node_id": "27a08ed0facc7d68a0818c7695dad391cf48d6095e57ec9159bc0668543f159b"
"node_cluster": "2230"
"node_layer": "Destination_Layer"
#not used, should be removed?
#finished_time
#latitude_Destination
#longitude_Destination
#travelID
#travelPrice
#userID
ClusterSimilarityArray:
type: array
items:
$ref: "#/definitions/ClusterSimilarityDictionary"
ClusterSimilarityDictionary:
properties:
cluster1_label:
type: string
example: "0"
cluster2_label:
type: string
example: "1"
cluster_layer:
type: string
example: "Price_Layer"
similarityValues:
type: object
additionalProperties:
type: number
example:
"layer_name": similarityValue
"StartingPoint_Layer": 39.0,
"StartingTime_Layer": 99.0101004948485
runId:
type: string
example: "5efdc04ac43add0aba567d76"
ClusterRunArray:
type: array
items:
$ref: "#/definitions/ClusterRun"
ClusterRun:
type: object
properties:
_id:
type: string
example: "5efdc04ac43add0aba567d76"
Datetime:
type: string
example: "2020-07-02 14:19:51.651764"
\ No newline at end of file
class ClusterC:
'''
This class represents connected clusters between layers where weights are the number of shared nodes.
Connections are stored with one 'source' cluster which is connected to n 'destination' clusters, with n >= 0.
:param cluster_label: The cluster label of the 'source' cluster
:param cluster_layer: The layer name of the 'source' cluster
:param cluster_runId: The run which calculated the connections
:param cluster_containedNodesDict: Ids of nodes contained in the 'source' cluster
:param cluster_connNodesDict: Node Objects contained in 'dest' clusters, where the 'dest' cluster is uniquely identifiable by layer name and cluster label
:param cluster_connClustDict: Layer name, cluster label and weight for each 'dest' cluster
'''
def __init__(self,cluster_label,cluster_layer,cluster_runId,cluster_containedNodesDict,cluster_connNodesDict,cluster_connClustDict):
self.cluster_label = cluster_label
self.cluster_layer = cluster_layer
self.cluster_runId = cluster_runId
self.cluster_containedNodesDict = cluster_containedNodesDict #Keys are frozensets(touples) == frozenset(uniqueID, cluster and layer) Value is the Node UniqueId
self.cluster_connNodesDict = cluster_connNodesDict #Keys are frozensets(touples) (uniqueID:str,node_cluster:str,node_layer:str) Values are NodeC Objects
self.cluster_connClustDict = cluster_connClustDict #dictionary: (dict[(cluster_label,clusterlayer)] -> nrOfConnections/weightOfTheConnection )
class LayerC:
def __init__(self,layer_name:str,cluster_Dict):
'''
This class represents the Layer which contains the connected clusters.
:param layer_name: The layer name which contains the clusters
:param cluster_Dict: The connected_clusters contained in this layer.
'''
self.layer_name = layer_name
self.cluster_Dict = cluster_Dict # Dict[cluster_label] --> ClusterC object
\ No newline at end of file
class NodeC:
'''
This class represents the Node data contained in a Cluster.
:param node_layer: The layer name which contains the node
:param node_cluster: The connected_cluster.cluster_label which contains in this layer.
:param uniqueID: Id of the node. Only unique in inside a single cluster. NOT unique between multiple clusters/layers.
'''
def __init__(self, node_cluster, node_layer, uniqueID):
self.node_cluster = node_cluster # str
self.node_layer = node_layer # str
self.uniqueID = uniqueID # str
\ No newline at end of file
from datetime import datetime
class ConnectedRun:
'''
This class represents the RunId and Time when the Connecting of the Clusters and Calculating the Similarity between clusters is executed.
:param run_id: The MongoDB _id of the Run to uniquely identify it.
:param timeOfExec: Datetime object containing info when the run was finished.
'''
def __init__(self,run_id,timeOfExec):
self.run_id = run_id
self.timeOfExec = timeOfExec
\ No newline at end of file
...@@ -4,6 +4,7 @@ from database.MongoRepositoryBase import MongoRepositoryBase ...@@ -4,6 +4,7 @@ from database.MongoRepositoryBase import MongoRepositoryBase
import json import json
from db.entities import * from db.entities import *
# from processing.similarityFiles.miscFunctions import *
from typing import List from typing import List
...@@ -19,6 +20,9 @@ class Repository(MongoRepositoryBase): ...@@ -19,6 +20,9 @@ class Repository(MongoRepositoryBase):
self._layer_nodes_collection = 'layer_nodes' self._layer_nodes_collection = 'layer_nodes'
self._clusters_collection = 'clusters' self._clusters_collection = 'clusters'
self._time_slice_collection = 'time_slices' self._time_slice_collection = 'time_slices'
self._connected_clusters_collection ='connected_clusters'
self._similarity_collection = 'similarity'
self._connected_run = 'connected_run'
#region Layers #region Layers
def add_layer(self, layer: Layer): def add_layer(self, layer: Layer):
...@@ -79,3 +83,87 @@ class Repository(MongoRepositoryBase): ...@@ -79,3 +83,87 @@ class Repository(MongoRepositoryBase):
super().drop_collection(self._time_slice_collection) super().drop_collection(self._time_slice_collection)
#endregion #endregion
#region clusterConnected
def add_connected_clusters(self, clusterDictArray):
''' Add Connected Clusters Data to DB '''
result = super().insert_many(self._connected_clusters_collection, clusterDictArray)
return result
def get_connected_clusters(self, run_id: str=None):#, layer_name: str):
''' Get Connected Clusters Data from DB '''
if (run_id == None):
entries = super().get_entries(self._connected_clusters_collection, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'cluster_runId' : run_id}, projection={'_id': 0})
output = []
for ent in entries:
output.append(ent)
return output
# print(ent)
#return [Cluster(cluster_dict=e, from_db=True) for e in entries]
#endregion
#region similarity
def add_similarity(self, inputDict):
''' Add Similarity Data to DB '''
#checkIfConnClustDictIsSerializable(outputJSON)
result = super().insert_many(self._similarity_collection, inputDict)
#print(str(result))
#super().insert_entry(self._connected_clusters_collection, outputJSON)
return result
def get_similarity(self,skipNr,batchSize, cluster_layer: str= None, run_id: str=None):
''' Get Similarity Data from DB '''
if (run_id == None):
if(cluster_layer == None):
entries = super().get_entries(self._similarity_collection, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'cluster_layer' : cluster_layer}, projection={'_id': 0})
else:
if(cluster_layer == None):
entries = super().get_entries(self._similarity_collection, selection={'runId' : run_id}, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'cluster_layer' : cluster_layer, 'runId' : run_id}, projection={'_id': 0})
#
return list(entries.sort([('_id', -1)]).skip(skipNr).limit(batchSize))
"""
output = []
for e in entries:
output.append(e)
return output
"""
#endregion
#region connected_run
def convert_run_to_json_support(self, run_from_db: dict) -> dict:
''' Converts the ObjectId from MongoDb to its string repr. '''
run_from_db['_id'] = str(run_from_db['_id'])
return run_from_db
def add_connected_run(self, conRunTimestamp):
''' Add Connected Run Data to DB '''
result = super().insert_entry(self._connected_run, conRunTimestamp)
return result
def get_connected_run(self, run_id: str= None):
''' Get Connected Run Data from DB '''
if (run_id == None):
entries = super().get_entries(self._connected_run)
else:
entries = super().get_entries(self._connected_run, selection={'_id' : run_id}, projection={'_id': 1, 'Datetime': 1})
output = []
for e in entries:
output.append(self.convert_run_to_json_support(e))
return output
#endregion
\ No newline at end of file
# __init__.py
from similarityFiles.calculateSimilarity import *
from similarityFiles.calculateWeights import *
from similarityFiles.populateWithNewNodes import *
from similarityFiles.miscFunctions import *
from similarityFiles.test import *
from db.entities.connected_cluster import *
from db.entities.connected_layer import *
from db.entities.connected_node import *
#This file contains the methods for caclulating the similarity between clusters
import math
from db.entities.connected_node import NodeC
from db.entities.connected_cluster import ClusterC
from db.entities.connected_layer import LayerC
from typing import Dict
def minMaxFunction(iIndex,jIndex,clusterList) -> Dict[str,int]:
''' minMax Metric for calculating similarity between 2 clusters.
Clusters must be from the same layer, and will be compared to clusters from different layers (cluster_layer attribute)
:param string iIndex: The index of the first Cluster in the "clusterList"
:param string jIndex: The index of the second Cluster in the "clusterList"
:param List[Cluster] clusterList: A list of clusters to which the 2 clusters will be compared to
:returns: Dictionary with layername as KEY, and the computed similarity value between the 2 clusters in regard to the layer as the VALUE of the Dict.
:rtype: Dict{str,int}
'''
iCluster= clusterList[iIndex]
jCluster= clusterList[jIndex]
outputDict = dict()
#calculate th
for curCluster in clusterList: #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
iClusterTuple = (iCluster.cluster_label,iCluster.cluster_layer)
jClusterTuple = (jCluster.cluster_label,jCluster.cluster_layer)
#iClusterKey = frozenset(iClusterTuple)
#jClusterKey = frozenset(jClusterTuple)
curLayer = curCluster.cluster_layer
if(( curLayer != iCluster.cluster_layer)
and ( curCluster.cluster_connClustDict.__contains__(iClusterTuple))
and ( curCluster.cluster_connClustDict.__contains__(jClusterTuple))):
# min part
curMin = min(curCluster.cluster_connClustDict[iClusterTuple],curCluster.cluster_connClustDict[jClusterTuple])
if(outputDict.__contains__(curLayer) == False):
outputDict[curLayer]= curMin
else: # max part
if(outputDict[curLayer]<curMin):
outputDict[curLayer] = curMin
return outputDict
def calcEuclideanDist(iIndex,jIndex,clusterList) -> Dict[str,float]:
''' Euclidean Distance Metric for calculating similarity between 2 clusters.
Clusters must be from the same layer, and will be compared to clusters from different layers (cluster_layer attribute)
:param string iIndex: The index of the first Cluster in the "clusterList"
:param string jIndex: The index of the second Cluster in the "clusterList"
:param List[Cluster] clusterList: A list of clusters to which the 2 clusters will be compared to
:returns: Dictionary with layername as KEY, and the computed similarity value between the 2 clusters in regard to the layer as the VALUE of the Dict.
:rtype: Dict{str,float}
'''
iCluster= clusterList[iIndex]
jCluster= clusterList[jIndex]
outputDict = dict()
#calculate the distance //paralelizable
for curCluster in clusterList: #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
iClusterTuple = (iCluster.cluster_label,iCluster.cluster_layer)
jClusterTuple = (jCluster.cluster_label,jCluster.cluster_layer)
#iClusterKey = frozenset(iClusterTuple)
#jClusterKey = frozenset(jClusterTuple)
curLayer = curCluster.cluster_layer
#considering only clusters from other layers for distance calc
if( curLayer != iCluster.cluster_layer):
###### if in a layer both cluster don't have a connection --> distance of 0. Identical in regard to that layer. correct or false?
iVal = 0
jVal = 0
connectedClusters = False
if(curCluster.cluster_connClustDict.__contains__(iClusterTuple)):
iVal = curCluster.cluster_connClustDict[iClusterTuple]
connectedClusters = True
if(curCluster.cluster_connClustDict.__contains__(jClusterTuple)):
jVal = curCluster.cluster_connClustDict[jClusterTuple]
connectedClusters = True
if (connectedClusters == False):
#clusters aren't connected => assign the max int value if there are no prior elements in list
if(outputDict.__contains__(curLayer) == False):
outputDict[curLayer]= 2147483647 #notConnected to that particular layer at all
else:
#clusters ARE connected => add the squares part of the euclid distance to the value of the similarity
if(outputDict.__contains__(curLayer) == False):
#first element
outputDict[curLayer]= (iVal - jVal)**2
else:
#further elements
outputDict[curLayer]+= (iVal - jVal)**2
for layer in outputDict:
outputDict[layer] = math.sqrt(outputDict[layer])
return outputDict
# frozenset(tuple) dict
# [(iClusterLabel,jClusterLabel), (layer,similarity)]
#def calculateSimilarity(inputLayerDict) -> Dict[frozenset((str,str)),Dict[str,int]]:
def calculateSimilarity(inputLayerDict):
''' Calculates the similarity between clusters contained in the "inputLayerDict". Similarity is calculated for each combination of 2 clusters from the SAME layer.
:param Dict{layername: LayerC} inputLayerDict: Contains the associated Layer and Clusters objects. The dictonary KEY is layername, the Value is a LayerC Object. The LayerC object has an attribute cluster_Dict which stores the clusters in the LayerC.
:returns: Dict{tuple(cluster_label1, cluster_label2) : Dict{layername, similarityValue}}. Returns a Dictionary with a tuple of 2 clusters as KEY, and a Dictionary with the computed similarity of the clusters in regard to each layer as VALUE
:rtype: Dict{(string,string): Dict{str:float}}
'''
print("Entered calculateSimilarity")
similarityDict = dict() #the key is a frozenset(Tuple) (clusterLabel1,clusterLabel2)
clusterList = list()
for curLayerC in inputLayerDict.values():
for curCluster in curLayerC.cluster_Dict.values():
clusterList.append(curCluster)
#print(" Nr. of clusters: "+str(len(clusterList)))
#go thru every combination of 2 clusters and calculate the similarity between them in regard to each layer
i=0
while( i < len(clusterList) ):
iCluster = clusterList[i]
j=i+1
while ( j<len(clusterList)):
jCluster = clusterList[j]
if (iCluster.cluster_layer == jCluster.cluster_layer): #calculate similarity only from the same layer
tuplekey = (clusterList[i].cluster_label,clusterList[j].cluster_label,iCluster.cluster_layer)
#### EUCLIDEAN DISTANCE /minMax
similarityDict[tuplekey]=calcEuclideanDist(i,j,clusterList)
#print("#### similarityDict i:"+str(i)+" j:"+str(j))
#print("#### "+str(similarityDict))
else:
j = len(clusterList)
j+=1
i+=1
print("Finished calculateSimilarity")
return similarityDict
\ No newline at end of file
from db.entities.connected_node import NodeC
from db.entities.connected_cluster import ClusterC
from db.entities.connected_layer import LayerC
from typing import Dict
def sortFunctByNode(node):
try :
return node.uniqueID
except:
print(node.node_cluster)
print(node.node_layer)
print(node.uniqueID)
def calculateWeights(inputLayerDict) -> Dict[str,LayerC]:
''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
:param Dict{string: LayerC} inputLayerDict: Contains the associated LayerC and ConnClusters objects. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the LayerC.
:returns: Dict{layername: LayerC}. Returns the inputLayerDict with the added connections in the attributes cluster_connClustDict and cluster_connNodesDict
:rtype: Dict{string: LayerC}
'''
#the input dictates which cluster is updated;
# #however it will update all the included clusters
# if i only want to update a single cluster without considering the rest i should create a new method?
#
#
#
#
print("Entered calculateWeights")
nodeList = []
for curLayer in inputLayerDict.values():
for curCluster in curLayer.cluster_Dict.values():
for curNode in curCluster.cluster_containedNodesDict.values():
nodeList.append(curNode)
#if curNode != None:
#if(curNode.uniqueID!= None):
#print(" Nr. of nodes: " + str(len(nodeList)))
nodeList.sort(key=sortFunctByNode)
i=0
while( i < len(nodeList) ):
iNode = nodeList[i]
j=i+1
while ( j<len(nodeList)):
jNode = nodeList[j]
#if there is a connection
#Compute a connection
if (iNode.node_layer != jNode.node_layer) and (iNode.uniqueID == jNode.uniqueID):
iNodeTuple = (iNode.uniqueID,iNode.node_cluster,iNode.node_layer)
jNodeTuple= (jNode.uniqueID,jNode.node_cluster,jNode.node_layer)
iNodeKey = frozenset(iNodeTuple)
jNodeKey = frozenset(jNodeTuple)
iClusterTuple = (iNode.node_cluster,iNode.node_layer)
jClusterTuple = (jNode.node_cluster,jNode.node_layer)
#iClusterKey = frozenset(iClusterTuple)
#jClusterKey = frozenset(jClusterTuple)
#Check if old node dicts has this node: if not add to ConnDictionary and to OldNodesDict
# Layer . Cluster . OldNodesDict . Does not contain the OTHER node
if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connNodesDict.__contains__(jNodeKey) == False):
#add node j at cluster i
if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connClustDict.__contains__(jClusterTuple)):
inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connClustDict[jClusterTuple]+=1
else:
inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connClustDict[jClusterTuple]=1
#add node to old nodes
inputLayerDict[iNode.node_layer].cluster_Dict[iNode.node_cluster].cluster_connNodesDict[jNodeKey]=jNode
if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connNodesDict.__contains__(iNodeKey) == False):
#add node i at cluster j
if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connClustDict.__contains__(iClusterTuple)):
inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connClustDict[iClusterTuple]+=1
else:
inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connClustDict[iClusterTuple]=1
#add node to old nodes
inputLayerDict[jNode.node_layer].cluster_Dict[jNode.node_cluster].cluster_connNodesDict[iNodeKey]=iNode
j+=1
i+=1
print("Finished calculateWeights")
#store weights in database?
return inputLayerDict
#Misc util functions
import json
import requests
import datetime
from processing.similarityFiles.miscFunctions import *
from db.repository import Repository
repo = Repository()
def outputFileLayerFunction(layerDict,limitNrNodes,limitNrCluster,runId):
''' Writes the layerDict data to a JSON file.
:param Dict{string: Layer} layerDict: Object which contains Data about the Layers, Clusters and Nodes
:param int limitNrNodes: How many nodes are contained in layerDict. Used in creating the name of the File
:param int limitNrCluster: How many clusters are contained in layerDict. Used in creating the name of the File
'''
layerJSON = convertLayerDictToJSON(layerDict,runId)
outputJSON = json.dumps(layerJSON, default=lambda o: o.__dict__, indent=4)
try:
with open('resultLayerDictN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.json', 'w') as outfile:
outfile.write(outputJSON)
except ValueError:
print("Error occured when writing the resultLayerDict file")
def outputFileSimilFunction(similarityDict,limitNrNodes,limitNrCluster,runId):
''' Writes the similarityDict data to a JSON file.
:param Dict{(cluster_label1, cluster_label2): Dict{layername: value}} similarityDict: Object which contains Data about the similarity between the clusters, Clusters and Nodes
:param int limitNrNodes: How many nodes are contained in layerDict. Used in creating the name of the File
:param int limitNrCluster: How many clusters are contained in layerDict. Used in creating the name of the File
'''
similJSON = convertSimilarityDictToJSON(similarityDict,runId)
outputJSON = json.dumps(similJSON, default=lambda o: o.__dict__, indent=4)
try:
with open('resultSimilarityDictN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.json', 'w') as outfile2:
outfile2.write(outputJSON)
except ValueError:
print("Error occured when writing the resultSimilarityDict file")
def outputFileTimeFunction(timelist,limitNrNodes,limitNrCluster,runId):
''' Writes execution time to a file.
:param List[datetime] timelist: Contains timestamps about the execution time of functions and the program.
:param int limitNrNodes: How many nodes are considered. Used in creating the name of the File
:param int limitNrCluster: How many clusters are considered. Used in creating the name of the File
'''
stringToWrite = "StartTime: "+ str(timelist[0])
stringToWrite += "\nFinishTime: " + str((timelist[3])) +"\n"
stringToWrite += "\nPopulateWithNewNodes: " + str((timelist[1]-timelist[0]).total_seconds())
stringToWrite += "\nCalculateWeights: " + str((timelist[2]-timelist[1]).total_seconds())
stringToWrite += "\nCalculateSimilarity: " + str((timelist[3]-timelist[2]).total_seconds())
stringToWrite += "\nTotalTime: " + str((timelist[3]-timelist[0]).total_seconds())
stringToWrite += "\nRunId: " +str(runId)
#aux = str(timelist[0]) + " :PopulateWithNewNodes\n"+ str(timelist[1]) + " :CalculateWeights\n" + str(timelist[2]) + " :CalculateSimilarity\n"+ str(timelist[3]) + " :Finish"
try:
with open('resultTimeExecN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.txt', 'w') as outfile3:
outfile3.write(stringToWrite)
except ValueError:
print("Error occured when writing the resultTimeExec file")
def outputMongoConnClustDict(inputDict,runId):
''' Stores connected_clusters in the database.
:param Dict() inputDict: Contains the data to insert
:param string runId: Id of the Run
'''
#inputDict["Timestamp"] = str(datetime.datetime.now())
add_conn_clusters(inputDict,runId)
def outputMongoSimilarity(inputDict,runId):
''' Stores cluster_similarity in the database.
:param Dict() inputDict: Contains the data to insert
:param string runId: Id of the Run
'''
add_similarity(inputDict,runId)
def add_connected_run():
'''
Inserts Run with current Time into the DB
:returns: Returns the _id of the connected_run entry in the DB
:rtype: string
'''
currentTime = datetime.datetime.now()
runDict = {"Datetime" : str(currentTime)}
inserted_result = repo.add_connected_run(runDict)
return str(inserted_result.inserted_id)
def add_conn_clusters(inputDict,runId):
''' Stores connected_clusters in the database.
:param Dict() inputDict: Contains the data to insert
:param string runId: Id of the Run
'''
outputJSON = convertLayerDictToJSON(inputDict,runId)
repo.add_connected_clusters(outputJSON)
def add_similarity(inputDict,runId):
''' Stores cluster_similarity in the database.
:param Dict() inputDict: Contains the data to insert
:param string runId: Id of the Run
'''
outputJSON = convertSimilarityDictToJSON(inputDict,runId)
repo.add_similarity(outputJSON)
\ No newline at end of file
#Misc util functions
import json
import requests
import datetime
def currentTime():
ts = datetime.datetime.now()
print(ts)
return ts
def totalNumberOfNodes(inputLayerDict):
''' Computes total number of nodes in the inputLayerDict.
:param Dict{string: Layer} inputLayerDict: Layer in which the Clusters in which the Nodes are stored
:returns: Returns nr of Nodes
:rtype: int
'''
nodeCount = 0
for curLayer in inputLayerDict.values():
for curCluster in curLayer.cluster_Dict.values():
nodeCount+=len(curCluster.cluster_containedNodesDict.values())
return nodeCount
def totalNumberOfClusters(inputLayerDict):
''' Computes total number of clusters in the inputLayerDict.
:param Dict{string: Layer} inputLayerDict: Layer in which the Clusters are stored
:returns: Returns nr of Clusters
:rtype: int
'''
clustCount = 0
for curLayer in inputLayerDict.values():
clustCount+= len(curLayer.cluster_Dict.values())
return clustCount
def convertLayerDictToJSON(layerDict, runId):
''' Converts a Layer object to JSON format.
:param Dict{string: Layer} layerDict: Object which contains Data about the Layers, Clusters and Nodes
:rtype: Dict{string: [Cluster1, Cluster2, ...]}
'''
outputJSON = []
for curLayer in layerDict.values():
for curCluster in curLayer.cluster_Dict.values():
outputJSON.append({
"cluster_label" : curCluster.cluster_label,
"cluster_layer" : curCluster.cluster_layer,
"cluster_runId" : runId,
"cluster_connClustDict" : changeTupleDictToDictList(curCluster.cluster_connClustDict),
"cluster_connNodesDict" : getFrozensetFromConnNodesDict(curCluster.cluster_connNodesDict), #Don
"cluster_containedNodesDict" : getNodeIdListFromContainedNodesDict(curCluster.cluster_containedNodesDict)
})
#outputJSON = json.dumps(outputJSON, default=lambda o: o.__dict__, indent=4)
return outputJSON
def changeTupleDictToDictList(inputDict):
'''
Helper function used to convert the code into JSON format
'''
outputList = []
for tupleKey in inputDict:
auxDict = dict()
auxDict["cluster_label"]= tupleKey[0]
auxDict["cluster_layer"]= tupleKey[1]
auxDict["connectionWeight"] = inputDict[tupleKey]
outputList.append(auxDict)
return outputList
def getNodeIdListFromContainedNodesDict(inputDict):
'''
Helper function used to convert the code into JSON format
'''
output = []
for curNode in inputDict.values():
output.append(curNode.uniqueID)
return output
def getFrozensetFromConnNodesDict(inputDict):
'''
Helper function used to convert the code into JSON format
'''
output = []
for curNode in inputDict.values():
auxDict = {}
auxDict["node_id"]= curNode.uniqueID
auxDict["node_cluster"] = curNode.node_cluster
auxDict["node_layer"] = curNode.node_layer
output.append(auxDict)
return output
def convertSimilarityDictToJSON(inputDict,runId):
''' Converts a Similarity Dictionary to JSON format. For outputting to DB
:param Dict{} similarityDict: Object which contains Data about the Computed similarities between Clusters
:rtype: List[Dicts]
'''
similList = []
for tupleKey in inputDict:
auxDict = dict()
auxDict["cluster1_label"]= tupleKey[0]
auxDict["cluster2_label"]= tupleKey[1]
auxDict["cluster_layer"] = tupleKey[2]
auxDict["similarityValues"] = inputDict[tupleKey]
auxDict["runId"] = runId
similList.append(auxDict)
similToJSON = similList
#outputJSON = json.dumps(similToJSON, default=lambda o: o.__dict__, indent=4)
return similToJSON
from flask import request, Response
from db.repository import Repository
repo = Repository()
def get_conn_clusters():
''' Gets connected_clusters from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
'''
result = repo.get_connected_clusters()
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
else:
return result
from flask import request, Response
from db.repository import Repository
from db.entities import connected_run
import datetime
#from db.entities import clusterset #REMOVE?
repo = Repository()
def get_connected_run():
''' Gets Run from the database.
:returns: Returns Run objects from the DB
:rtype: Dict{_id,datetime}
'''
result = repo.get_connected_run()
if result is None or len(result) == 0:
print("#### Response 404")
return Response(status=404)
else:
return result
from flask import request, Response
from db.repository import Repository
from processing.similarityFiles.miscFunctions import convertSimilarityDictToJSON
#from db.entities import clusterset #REMOVE?
repo = Repository()
def get_similarity(layer_name,batchNr):
''' Gets cluster_similarity from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
'''
batchSize = 1000
if int(batchNr)<0:
print("Batch number needs to be a positive integer")
return Response(status=404)
skipNr = batchSize*int(batchNr)
#get_similarity(self,skipNr,batchSize, cluster_layer: str= None, run_id: str=None)
result = repo.get_similarity(skipNr, batchSize, layer_name)
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
else:
return result
"""
for each cluster in the layer
for each other cluster from all the other layers
find the number of connexions
save them into a dictionary (ClusterID(from other layer) -> Nr of connections)
save all the dictionaries in a map? ( ClusterID1 -> dictionary1, ClusterID2 -> dicitonary2 )
have a map per layer? (Nr of maps = nr of layers)
Each cluster has a dictionary of connCluster-> nrConections
Each layer has a dictionary of clusters -> dictionaries of nodes/connections
"""
import os
import sys
import math
import datetime
from typing import Dict
##################AUX
modules_path = '../../../modules/'
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
#### TO BE DELETED #### ^
from db.entities.connected_node import NodeC
from db.entities.connected_cluster import ClusterC
from db.entities.connected_layer import LayerC
from processing.similarityFiles.dataInput import *
from processing.similarityFiles.calculateWeights import *
from processing.similarityFiles.calculateSimilarity import *
from processing.similarityFiles.miscFunctions import *
from processing.similarityFiles.dataOutput import *
def main():
print("\nEntered Main")
outputToFileFLAG = False
timelist = []
timelist.append(currentTime())#starting time
"""
Current Layers
Price_Layer
FinishedTime_Layer
Destination_Layer
StartingPoint_Layer
Reputation_Layer
StartingTime_Layer
User_Layer
"""
layerNameList = ["Price_Layer","FinishedTime_Layer","Destination_Layer"] #Get it from somewhere else?
limitNrCluster = -1 #per Layer # 0< equals noLimit
limitNrNodes = -1 #per Layer
layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes)
#URLlist = None
#layerDict = getClusterDataFromSwagger(limitNrCluster,limitNrNodes, URLlist) #for Swagger, Change URLs inside the function for different input Data or provide a list with URLS
totalNodes = totalNumberOfNodes(layerDict)
print("Nr. of nodes: " +str(totalNodes))
totalClusters = totalNumberOfClusters(layerDict)
print("Nr. of clusters: " + str(totalClusters))
timelist.append(currentTime())
#calculates the weights between the clusters (weight == number of connections) #return is displayed in outputLayerFunction
layerDict = calculateWeights(layerDict)
timelist.append(currentTime())
#calculates the similarity between the clusters #returns dictionary[ tuple(cluster_label1,cluster_label2),
# listOfSimilarity(layer1,layer2,layer3) ]
similarityDict = calculateSimilarity(layerDict)
timelist.append(currentTime()) #Finishing time
#Write to files
runId = add_connected_run()
if (outputToFileFLAG == True):
print("Outputing data")
outputFileLayerFunction(layerDict,totalNodes,totalClusters,runId)
outputFileSimilFunction(similarityDict,totalNodes,totalClusters,runId)
outputFileTimeFunction(timelist,totalNodes,totalClusters,runId)
#Output to DB
outputMongoConnClustDict(layerDict,runId)
outputMongoSimilarity(similarityDict,runId)
#Currently not used in the calculation of connections/similarity, developed for possible future uses
#connClustersFromMongo = getConnClusterDataFromMongo()
#similarityDictFromMongo = calculateSimilarity(connClustersFromMongo)
#similarityArrFromMongo = getSimilarityDataFromMongo("Price_Layer") # only 220 similarities, but there are about 20 clusters total
#similarityArrFromMongo = getSimilarityDataFromMongo("Destination_Layer") # ~2.500k similarities
#similarityArrFromMongo = getSimilarityDataFromMongo("FinishedTime_Layer")# should have the rest of similarities => 15.000k
#connectedRunFromMongo = getConnectedRunDataFromMongo()
print("FINISHED")
return
##########START##########
main()
#########FINISH##########
import unittest import unittest
import sys import sys
sys.path.insert(1, '../') for path in ['../', './']:
sys.path.insert(1, path)
# python -m unittest discover # python -m unittest discover
from db.entities import Cluster from db.entities import Cluster
......
...@@ -17,7 +17,7 @@ class MongoRepositoryBase: ...@@ -17,7 +17,7 @@ class MongoRepositoryBase:
def insert_entry(self, collection_name, content: dict): def insert_entry(self, collection_name, content: dict):
collection = self._database[collection_name] collection = self._database[collection_name]
collection.insert_one(content) return collection.insert_one(content)
def insert_many(self, collection_name, content: list): def insert_many(self, collection_name, content: list):
collection = self._database[collection_name] collection = self._database[collection_name]
......
...@@ -18,6 +18,6 @@ SEMANTIC_LINKING_DB_PORT = 27017 ...@@ -18,6 +18,6 @@ SEMANTIC_LINKING_DB_PORT = 27017
## Role Stage Discovery ## Role Stage Discovery
ROLESTAGE_DISCOVERY_HOSTNAME = 'role-stage-discovery' ROLESTAGE_DISCOVERY_HOSTNAME = 'role-stage-discovery'
ROLESTAGE_DISCOVERY_REST_PORT = 80 ROLESTAGE_DISCOVERY_REST_PORT = 30103
ROLESTAGE_DISCOVERY_DB_HOSTNAME = f'{ROLESTAGE_DISCOVERY_HOSTNAME}-db' ROLESTAGE_DISCOVERY_DB_HOSTNAME = f'articonf1.itec.aau.at'
ROLESTAGE_DISCOVERY_DB_PORT = 27017 ROLESTAGE_DISCOVERY_DB_PORT = 30104
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment