Commit 567d499c authored by Bogdan's avatar Bogdan

Connected Cluster and Similarity functionalities

parent d728e14e
......@@ -9,7 +9,7 @@ consumes:
produces:
- "application/json"
basePath: "/api"
# basePath: "/api"
paths:
/debug:
......@@ -173,11 +173,51 @@ paths:
summary: "Insert locations from AGI, create clusters for starting time and location layers, create graphs for the location clusters"
parameters: []
responses:
204:
200:
description: "Successful operation"
#endregion
################################################################################
/connectedClusters:
get:
operationId: "routes.connClusters.get_conn_clusters"
tags:
- "Connected"
summary: "Get connected Clusters data"
description: "Returns a dictionary of cluster. The clusters contain the associated connected clusters and connected nodes data."
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ConnectedDict"
/clusterSimilarity:
get:
operationId: "routes.similarity.get_similarity"
tags:
- "Similarity"
summary: "Get data of the similarity between clusters"
description: "Returns a dictionary where the key is a tuple of cluster_labels (i.e. [0,319]) and the value is the computed similarity between 2 clusters in the tuple, in regard to each layer in the input. \n Note: the tuple clusters have the same layer and the computed similarity is in regard to clusters from OTHER layers."
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ClusterSimilarityArray"
/clusterRunArray:
get:
operationId: "routes.connRun.get_connected_run"
tags:
- "RunId"
summary: "Get RunId"
description: "Returns the RunId and the associated datetime when a connection of clusters/simillarity of clusters was computed."
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ClusterRunArray"
definitions:
Cluster:
......@@ -264,3 +304,116 @@ definitions:
type: array
items:
$ref: "#/definitions/TimeSlice"
##################################################################
ConnectedDict:
type: array
items:
$ref: "#/definitions/ConnectedCluster"
ConnectedCluster:
type: object
properties:
cluster_label:
type: string
example: "6"
cluster_layer:
type: string
example: "Price_Layer"
cluster_runId:
type: string
example: "5efdc04ac43add0aba567d76"
cluster_containedNodesDict:
$ref: "#/definitions/ConnectedNode"
cluster_connNodesDict:
$ref: "#/definitions/ConnectedNode"
cluster_connClustDict:
type: object
additionalProperties:
type: number
example:
"cluster_label": nrOfConnectedNodes
#"-1": 42
"0": 39
"6969": 1
#not used, should be removed?
#cluster_connectionsNr
ConnectedNode:
type: object
properties:
cluster_label:
type: string
node_layer:
type: string
uniqueID:
type: string
example:
"cluster_label": "2230"
"node_layer": "Destination_Layer"
"uniqueID": "a95075f5042b1b27060080156d87"
#not used, should be removed?
#finished_time
#latitude_Destination
#longitude_Destination
#travelID
#travelPrice
#userID
ClusterSimilarityArray:
type: array
items:
$ref: "#/definitions/ClusterSimilarityDictionary"
ClusterSimilarityDictionary:
properties:
clusterTuple:
type: array
items:
type: string
minItems: 2
maxItems: 2
example: [
#cluster_label1
0,
#cluster_label2
319
]
similarityValues:
type: object
additionalProperties:
type: number
example:
"layer_name": similarityValue
"StartingPoint_Layer": 39.0,
"StartingTime_Layer": 99.0101004948485
runId:
type: string
example: "5efdc04ac43add0aba567d76"
ClusterRunArray:
type: array
items:
$ref: "#/definitions/ClusterRun"
ClusterRun:
type: object
properties:
_id:
type: string
example: "5efdc04ac43add0aba567d76"
Datetime:
type: string
example: "2020-07-02 14:19:51.651764"
# Added by API Auto Mocking Plugin
host: virtserver.swaggerhub.com
basePath: /NumeDeOrganizatie/Smart/1.0.0
schemes:
- https
\ No newline at end of file
from db.entities.location import Location
from db.entities.popular_location import PopularLocation
from db.entities.cluster import Cluster
from db.entities.clusterset import ClusterSet
from db.entities.user_cluster_graph import UserClusterGraph
from db.entities.layer import Layer
from db.entities.timeslice import TimeSlice
\ No newline at end of file
class ClusterC:
def __init__(self,cluster_label,cluster_layer,cluster_runId,cluster_containedNodesDict,cluster_connNodesDict,cluster_connClustDict):
self.cluster_label = cluster_label
self.cluster_layer = cluster_layer
self.cluster_runId = cluster_runId
self.cluster_containedNodesDict = cluster_containedNodesDict ###RENAME TO curClNodesDict #Keys are frozensets(touples) uniqueID and cluster #
self.cluster_connNodesDict = cluster_connNodesDict #Keys are frozensets(touples) uniqueID and cluster #problem if you remove newNodes and oldNodes lists.. there may be duplicates
self.cluster_connClustDict = cluster_connClustDict #dictionary: layer -> (dict2: cluster_label -> nrOfConnections ) OR dictionary: cluster_label -> nrOfConnections
#cluster_connClustDict ------> look at both newNodes and oldNodes
class LayerC:
def __init__(self,layer_name,cluster_Dict):
self.layer_name = layer_name
self.cluster_Dict = cluster_Dict
\ No newline at end of file
class NodeC:
def __init__(self, cluster_label, node_layer, finished_time, latitude_Destination, longitude_Destination, travelID, travelPrice, uniqueID, userID):
self.cluster_label = cluster_label
self.node_layer = node_layer
self.finished_time = finished_time
self.latitude_Destination = latitude_Destination
self.longitude_Destination = longitude_Destination
self.travelID = travelID
self.travelPrice = travelPrice
self.uniqueID = uniqueID
self.userID = userID
\ No newline at end of file
from datetime import datetime
class ConnectedRun:
def __init__(self,run_id,timeOfExec):
self.run_id = run_id
self.timeOfExec = timeOfExec
\ No newline at end of file
......@@ -3,7 +3,11 @@ import network_constants as netconst
from database.MongoRepositoryBase import MongoRepositoryBase
import json
from db.entities.layer import *
from db.entities.cluster import *
from db.entities.timeslice import *
from db.entities import *
from processing.similarityFiles.miscFunctions import *
from typing import List
......@@ -19,6 +23,9 @@ class Repository(MongoRepositoryBase):
self._layer_nodes_collection = 'layer_nodes'
self._clusters_collection = 'clusters'
self._time_slice_collection = 'time_slices'
self._connected_clusters_collection ='connected_clusters'
self._similarity_collection = 'similarity'
self._connected_run = 'connected_run'
#region Layers
def add_layer(self, layer: Layer):
......@@ -79,3 +86,71 @@ class Repository(MongoRepositoryBase):
super().drop_collection(self._time_slice_collection)
#endregion
#region clusterConnected
def add_connected_clusters(self, clusterDictArray):
''' Add Connected Clusters Data to DB '''
result = super().insert_many(self._connected_clusters_collection, clusterDictArray)
return result
def get_connected_clusters(self, run_id=None):#, layer_name: str):
''' Get Connected Clusters Data from DB '''
if (run_id == None):
entries = super().get_entries(self._connected_clusters_collection) #, projection={'Price_Layer': 1})
else:
entries = super().get_entries(self._similarity_collection, selection={'cluster_runId' : run_id})
output = []
for ent in entries:
output.append(ent)
return output
#return [Cluster(cluster_dict=e, from_db=True) for e in entries]
#endregion
#region similarity
def add_similarity(self, inputDict):
''' Add Similarity Data to DB '''
#checkIfConnClustDictIsSerializable(outputJSON)
result = super().insert_many(self._similarity_collection, inputDict)
#print(str(result))
#super().insert_entry(self._connected_clusters_collection, outputJSON)
return result
#TODO
def get_similarity(self, run_id=None):
''' Get Similarity Data from DB '''
if (run_id == None):
entries = super().get_entries(self._similarity_collection, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'runId' : run_id})
output = []
for e in entries:
output.append(e)
return output
#endregion
#region connected_run
def add_connected_run(self, conRunTimestamp):
''' Add Connected Run Data to DB '''
result = super().insert_entry(self._connected_run, conRunTimestamp)
return result
def get_connected_run(self, run_id= None):
''' Get Connected Run Data from DB '''
if (run_id == None):
entries = super().get_entries(self._connected_run)
else:
entries = super().get_entries(self._connected_run, selection={'_id' : run_id})
output = []
for e in entries:
output.append(e)
return output
#endregion
\ No newline at end of file
# __init__.py
from similarityFiles.calculateSimilarity import *
from similarityFiles.calculateWeights import *
from similarityFiles.populateWithNewNodes import *
from similarityFiles.miscFunctions import *
from similarityFiles.test import *
from db.entities.connected_cluster import *
from db.entities.connected_layer import *
from db.entities.connected_node import *
#This file contains the methods for caclulating the similarity between clusters
import math
from db.entities.connected_node import NodeC
from db.entities.connected_cluster import ClusterC
from db.entities.connected_layer import LayerC
from typing import Dict
def minMaxFunction(iIndex,jIndex,clusterList) -> Dict[str,int]:
''' minMax Metric for calculating similarity between 2 clusters.
Clusters must be from the same layer, and will be compared to clusters from different layers (cluster_layer attribute)
:param string iIndex: The index of the first Cluster in the "clusterList"
:param string jIndex: The index of the second Cluster in the "clusterList"
:param List[Cluster] clusterList: A list of clusters to which the 2 clusters will be compared to
:returns: Dictionary with layername as KEY, and the computed similarity value between the 2 clusters in regard to the layer as the VALUE of the Dict.
:rtype: Dict{str,int}
'''
iCluster= clusterList[iIndex]
jCluster= clusterList[jIndex]
outputDict = dict()
#calculate th
for curCluster in clusterList: #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
curLayer = curCluster.cluster_layer
curLabel = curCluster.cluster_label
if(( curLayer != iCluster.cluster_layer)
and ( curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label))
and ( curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label))):
# min part
curMin = min(curCluster.cluster_connClustDict[iCluster.cluster_label],curCluster.cluster_connClustDict[jCluster.cluster_label])
if(outputDict.__contains__(curLayer) == False):
outputDict[curLayer]= curMin
else: # max part
if(outputDict[curLayer]<curMin):
outputDict[curLayer] = curMin
return outputDict
def calcEuclideanDist(iIndex,jIndex,clusterList) -> Dict[str,float]:
''' Euclidean Distance Metric for calculating similarity between 2 clusters.
Clusters must be from the same layer, and will be compared to clusters from different layers (cluster_layer attribute)
:param string iIndex: The index of the first Cluster in the "clusterList"
:param string jIndex: The index of the second Cluster in the "clusterList"
:param List[Cluster] clusterList: A list of clusters to which the 2 clusters will be compared to
:returns: Dictionary with layername as KEY, and the computed similarity value between the 2 clusters in regard to the layer as the VALUE of the Dict.
:rtype: Dict{str,float}
'''
iCluster= clusterList[iIndex]
jCluster= clusterList[jIndex]
outputDict = dict()
#calculate the distance //paralelizable
for curCluster in clusterList: #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
curLayer = curCluster.cluster_layer
curLabel = curCluster.cluster_label #debugOnly
#considering only clusters from other layers for distance calc
if( curLayer != iCluster.cluster_layer):
######BUUUG, WHAT IF THEY DON'T SHARE A CONNECTION?
###### if in a layer both cluster don't have a connection --> distance of 0. Identical in regard to that layer. correct or false?
iVal = 0
jVal = 0
connectedClusters = False
if(curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label)):
iVal = curCluster.cluster_connClustDict[iCluster.cluster_label]
connectedClusters = True
if(curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label)):
jVal = curCluster.cluster_connClustDict[jCluster.cluster_label]
connectedClusters = True
if (connectedClusters == False):
#clusters aren't connected => assign the max int value if there are no prior elements in list
if(outputDict.__contains__(curLayer) == False):
outputDict[curLayer]= 2147483647 #notConnected
else:
#clusters ARE connected => add the squares part of the euclid distance to the value of the similarity
if(outputDict.__contains__(curLayer) == False):
#first element
outputDict[curLayer]= (iVal - jVal)**2
else:
#further elements
outputDict[curLayer]+= (iVal - jVal)**2
for layer in outputDict:
outputDict[layer] = math.sqrt(outputDict[layer])
return outputDict
# frozenset(tuple) dict
# [(iClusterLabel,jClusterLabel), (layer,similarity)]
#def calculateSimilarity(inputLayerDict) -> Dict[frozenset((str,str)),Dict[str,int]]:
def calculateSimilarity(inputLayerDict):
''' Calculates the similarity between clusters contained in the "inputLayerDict". Similarity is calculated for each combination of 2 clusters from the SAME layer.
:param Dict{layername: Layer} inputLayerDict: Contains the associated Layer and Clusters objects. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
:returns: Dict{tuple(cluster_label1, cluster_label2) : Dict{layername, similarityValue}}. Returns a Dictionary with a tuple of 2 clusters as KEY, and a Dictionary with the computed similarity of the clusters in regard to each layer as VALUE
:rtype: Dict{(string,string): Dict{str:float}}
'''
print("Entered calculateSimilarity")
similarityDict = dict() #the key is a frozenset(Tuple) (clusterLabel1,clusterLabel2)
clusterList = list()
for curLayer in inputLayerDict.values():
for curCluster in curLayer.cluster_Dict.values():
clusterList.append(curCluster)
#print(" Nr. of clusters: "+str(len(clusterList)))
#go thru every combination of 2 clusters and calculate the similarity between them in regard to each layer
i=0
while( i < len(clusterList) ):
iCluster = clusterList[i]
j=i+1
while ( j<len(clusterList)):
jCluster = clusterList[j]
if (iCluster.cluster_layer == jCluster.cluster_layer): #calculate similarity only from the same layer
tuplekey = (clusterList[i].cluster_label,clusterList[j].cluster_label)
key = frozenset(tuplekey)
#### EUCLIDEAN DISTANCE /minMax
similarityDict[key]=calcEuclideanDist(i,j,clusterList)
#print("#### similarityDict i:"+str(i)+" j:"+str(j))
#print("#### "+str(similarityDict))
else:
j = len(clusterList)
j+=1
i+=1
print("Finished calculateSimilarity")
return similarityDict
\ No newline at end of file
from db.entities.connected_node import NodeC
from db.entities.connected_cluster import ClusterC
from db.entities.connected_layer import LayerC
from typing import Dict
def sortFunctByNode(node):
try :
return node.uniqueID
except:
print(node.cluster_label)
print(node.node_layer)
print(node.uniqueID)
def calculateWeights(inputLayerDict) -> Dict[str,LayerC]:
''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
:param Dict{string: Layer} inputLayerDict: Contains the associated Layer and Clusters objects. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
:returns: Dict{layername: Layer}. Returns the inputLayerDict with the added connections in the attributes cluster_connClustDict and cluster_connNodesDict
:rtype: Dict{string: Layer}
'''
#the input dictates which cluster is updated;
# #however it will update all the included clusters
# if i only want to update a single cluster without considering the rest i should create a new method?
#
#
#
#
print("Entered calculateWeights")
nodeList = []
for curLayer in inputLayerDict.values():
for curCluster in curLayer.cluster_Dict.values():
for curNode in curCluster.cluster_containedNodesDict.values():
nodeList.append(curNode)
#if curNode != None:
#if(curNode.uniqueID!= None):
#print(" Nr. of nodes: " + str(len(nodeList)))
nodeList.sort(key=sortFunctByNode)
i=0
while( i < len(nodeList) ):
iNode = nodeList[i]
j=i+1
while ( j<len(nodeList)):
jNode = nodeList[j]
#if there is a connection
#print("\n ### \n"+iNode.uniqueID +" "+ iNode.node_layer +"\n"+ jNode.uniqueID +" "+ jNode.node_layer )
if (iNode.node_layer != jNode.node_layer) and (iNode.uniqueID == jNode.uniqueID):
iOldTuple = (iNode.uniqueID,iNode.cluster_label)
jOldTuple= (jNode.uniqueID,jNode.cluster_label)
iOldKey = frozenset(iOldTuple)
jOldKey = frozenset(jOldTuple)
#Check if old node dicts has this node: if not add to ConnDictionary and to OldNodesDict
# Layer . Cluster . OldNodesDict . Does not contain the OTHER node
if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connNodesDict.__contains__(jOldKey) == False):
#add node j at cluster i
if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict.__contains__(jNode.cluster_label)):
inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict[jNode.cluster_label]+=1
else:
inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict[jNode.cluster_label]=1
#add node to old nodes
inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connNodesDict[jOldKey]=jNode
if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connNodesDict.__contains__(iOldKey) == False):
#add node i at cluster j
if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict.__contains__(iNode.cluster_label)):
inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict[iNode.cluster_label]+=1
else:
inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict[iNode.cluster_label]=1
#add node to old nodes
inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connNodesDict[iOldKey]=iNode
j+=1
i+=1
#deleting cluster_containedNodesDicts/// No longer needed
#for curLayer in inputLayerDict.values():
# for curCluster in curLayer.cluster_Dict.values():
# inputLayerDict[curCluster.cluster_layer].cluster_Dict[curCluster.cluster_label].cluster_containedNodesDict = dict()
print("Finished calculateWeights")
#store weights in database?
return inputLayerDict
#Misc util functions
import json
import requests
import datetime
from routes.connClusters import add_conn_clusters
from routes.similarity import add_similarity
from routes.connRun import add_connected_run
from processing.similarityFiles.miscFunctions import *
def outputFileLayerFunction(layerDict,limitNrNodes,limitNrCluster,runId):
''' Writes the layerDict data to a JSON file.
:param Dict{string: Layer} layerDict: Object which contains Data about the Layers, Clusters and Nodes
:param int limitNrNodes: How many nodes are contained in layerDict. Used in creating the name of the File
:param int limitNrCluster: How many clusters are contained in layerDict. Used in creating the name of the File
'''
layerJSON = convertLayerDictToJSON(layerDict,runId)
outputJSON = json.dumps(layerJSON, default=lambda o: o.__dict__, indent=4)
try:
with open('resultLayerDictN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.json', 'w') as outfile:
outfile.write(outputJSON)
except ValueError:
print("Error occured when writing the resultLayerDict file")
def outputFileSimilFunction(similarityDict,limitNrNodes,limitNrCluster,runId):
''' Writes the similarityDict data to a JSON file.
:param Dict{(cluster_label1, cluster_label2): Dict{layername: value}} similarityDict: Object which contains Data about the similarity between the clusters, Clusters and Nodes
:param int limitNrNodes: How many nodes are contained in layerDict. Used in creating the name of the File
:param int limitNrCluster: How many clusters are contained in layerDict. Used in creating the name of the File
'''
similJSON = convertSimilarityDictToJSON(similarityDict,runId)
outputJSON = json.dumps(similJSON, default=lambda o: o.__dict__, indent=4)
try:
with open('resultSimilarityDictN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.json', 'w') as outfile2:
outfile2.write(outputJSON)
except ValueError:
print("Error occured when writing the resultSimilarityDict file")
def outputFileTimeFunction(timelist,limitNrNodes,limitNrCluster,runId):
''' Writes execution time to a file.
:param List[datetime] timelist: Contains timestamps about the execution time of functions and the program.
:param int limitNrNodes: How many nodes are considered. Used in creating the name of the File
:param int limitNrCluster: How many clusters are considered. Used in creating the name of the File
'''
stringToWrite = "StartTime: "+ str(timelist[0])
stringToWrite += "\nFinishTime: " + str((timelist[3])) +"\n"
stringToWrite += "\nPopulateWithNewNodes: " + str((timelist[1]-timelist[0]).total_seconds())
stringToWrite += "\nCalculateWeights: " + str((timelist[2]-timelist[1]).total_seconds())
stringToWrite += "\nCalculateSimilarity: " + str((timelist[3]-timelist[2]).total_seconds())
stringToWrite += "\nTotalTime: " + str((timelist[3]-timelist[0]).total_seconds())
stringToWrite += "\nRunId: " +str(runId)
#aux = str(timelist[0]) + " :PopulateWithNewNodes\n"+ str(timelist[1]) + " :CalculateWeights\n" + str(timelist[2]) + " :CalculateSimilarity\n"+ str(timelist[3]) + " :Finish"
try:
with open('resultTimeExecN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.txt', 'w') as outfile3:
outfile3.write(stringToWrite)
except ValueError:
print("Error occured when writing the resultTimeExec file")
def outputMongoConnClustDict(inputDict,runId):
''' Stores connected_clusters in the database.
:param Dict() inputDict: Contains the data to insert
:param string runId: Id of the Run
'''
#inputDict["Timestamp"] = str(datetime.datetime.now())
add_conn_clusters(inputDict,runId)
def outputMongoSimilarity(inputDict,runId):
''' Stores cluster_similarity in the database.
:param Dict() inputDict: Contains the data to insert
:param string runId: Id of the Run
'''
add_similarity(inputDict,runId)
\ No newline at end of file
#Misc util functions
import json
import requests
import datetime
def currentTime():
ts = datetime.datetime.now()
print(ts)
return ts
def totalNumberOfNodes(inputLayerDict):
''' Computes total number of nodes in the inputLayerDict.
:param Dict{string: Layer} inputLayerDict: Layer in which the Clusters in which the Nodes are stored
:returns: Returns nr of Nodes
:rtype: int
'''
nodeCount = 0
for curLayer in inputLayerDict.values():
for curCluster in curLayer.cluster_Dict.values():
nodeCount+=len(curCluster.cluster_containedNodesDict.values())
return nodeCount
def totalNumberOfClusters(inputLayerDict):
''' Computes total number of clusters in the inputLayerDict.
:param Dict{string: Layer} inputLayerDict: Layer in which the Clusters are stored
:returns: Returns nr of Clusters
:rtype: int
'''
clustCount = 0
for curLayer in inputLayerDict.values():
clustCount+= len(curLayer.cluster_Dict.values())
return clustCount
def convertLayerDictToJSON(layerDict, runId):
''' Converts a Layer object to JSON format.
:param Dict{string: Layer} layerDict: Object which contains Data about the Layers, Clusters and Nodes
:rtype: Dict{string: [Cluster1, Cluster2, ...]}
'''
'''
{
layer1 : {
[
{
cluster_label1 : 0123400,
cluster_layer: layer1,
"cluster_connClustDict": {
"0123456": 98
"1234567": 12
},
cluster_containedNodesDict : {
[
abcd,
sgre,
dgre,
ddhr,
yyrh
]
}
},
{
},
{
}
]
},
layer2 : {
}
}
'''
outputJSON = []
for curLayer in layerDict.values():
for curCluster in curLayer.cluster_Dict.values():
outputJSON.append({
"cluster_label" : curCluster.cluster_label,
"cluster_layer" : curCluster.cluster_layer,
"cluster_runId" : runId,
"cluster_connClustDict" : changeDictKeysToString(curCluster.cluster_connClustDict),
"cluster_connNodesDict" : getFrozensetFromConnNodesDict(curCluster.cluster_connNodesDict),
"cluster_containedNodesDict" : getNodeIdListFromContainedNodesDict(curCluster.cluster_containedNodesDict),
})
#outputJSON = json.dumps(outputJSON, default=lambda o: o.__dict__, indent=4)
return outputJSON
def changeDictKeysToString(inputDict):
keys_values = inputDict.items()
outputDict = { str(key): value for key,value in keys_values}
return outputDict
def getNodeIdListFromContainedNodesDict(inputDict):
output = []
for curNode in inputDict.values():
output.append(curNode.uniqueID)
return output
def getFrozensetFromConnNodesDict(inputDict):
output = []
for curNode in inputDict.values():
auxDict = {}
auxDict["node_id"]= curNode.uniqueID
auxDict["node_cluster"] = curNode.cluster_label
output.append(auxDict)
return output
def convertSimilarityDictToJSON(inputDict,runId):
similList = []
for compositeKey in inputDict:
frozensetString =list()
#key is a tuple of cluster_labels
for key in compositeKey:
frozensetString.append(key)
similList.append({
"clusterTuple" : frozensetString,
"similarityValues" : inputDict[compositeKey],
"runId": runId
})
similToJSON = similList
#outputJSON = json.dumps(similToJSON, default=lambda o: o.__dict__, indent=4)
return similToJSON
from db.entities.connected_node import NodeC
from db.entities.connected_cluster import ClusterC
from db.entities.connected_layer import LayerC
from typing import Dict
#from db.repository import Repository
import json
import requests
from routes.clustersets import get_by_nametest.py
from db import repository
StartTime: 2020-07-02 12:05:47.067975
FinishTime: 2020-07-02 12:05:54.561853
PopulateWithNewNodes: 2.495718
CalculateWeights: 4.590413
CalculateSimilarity: 0.407747
TotalTime: 7.493878
\ No newline at end of file
StartTime: 2020-07-06 10:43:32.240013
FinishTime: 2020-07-06 10:43:39.110333
PopulateWithNewNodes: 2.399582
CalculateWeights: 4.422768
CalculateSimilarity: 0.04797
TotalTime: 6.87032
RunId: 5f02e43b53a73a48d0eaaed5
\ No newline at end of file
from flask import request, Response
from db.repository import Repository
from db.entities import ClusterSet
from db.entities import clusterset
repo = Repository()
......
from flask import request, Response
from db.repository import Repository
from routes.connRun import add_connected_run
from processing.similarityFiles.miscFunctions import *
repo = Repository()
def add_conn_clusters(inputDict,runId):
''' Stores connected_clusters in the database.
:param Dict() inputDict: Contains the data to insert
:param string runId: Id of the Run
'''
outputJSON = convertLayerDictToJSON(inputDict,runId)
repo.add_connected_clusters(outputJSON)
def get_conn_clusters():
''' Gets connected_clusters from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
'''
result = repo.get_connected_clusters()
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
else:
return result
from flask import request, Response
from db.repository import Repository
from db.entities import connected_run
import datetime
#from db.entities import clusterset #REMOVE?
repo = Repository()
def add_connected_run():
'''
Inserts Run with current Time into the DB
:returns: Returns the _id of the connected_run entry in the DB
:rtype: string
'''
currentTime = datetime.datetime.now()
runDict = {"Datetime" : str(currentTime)}
inserted_result = repo.add_connected_run(runDict)
return str(inserted_result.inserted_id)
def get_connected_run(): ########TODO#################
''' ##TODO## Gets Run from the database.
:returns: Returns Run objects from the DB
:rtype: Dict{_id,datetime}
'''
"""
result = repo.get_connected_clusters()
if result is None or result.retrieved == 0:
print("#### Response 404")
return Response(status=404)
else:
return result
conRun = ConnectedRun(result.sdfsdf)
"""
from flask import request, Response
from db.repository import Repository
from processing.similarityFiles.miscFunctions import convertSimilarityDictToJSON
#from db.entities import clusterset #REMOVE?
repo = Repository()
def add_similarity(inputDict,runId):
''' Stores cluster_similarity in the database.
:param Dict() inputDict: Contains the data to insert
:param string runId: Id of the Run
'''
outputJSON = convertSimilarityDictToJSON(inputDict,runId)
repo.add_similarity(outputJSON)
def get_similarity():
''' Gets cluster_similarity from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
'''
result = repo.get_similarity()
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
else:
return result
"""
for each cluster in the layer
for each other cluster from all the other layers
find the number of connexions
save them into a dictionary (ClusterID(from other layer) -> Nr of connections)
save all the dictionaries in a map? ( ClusterID1 -> dictionary1, ClusterID2 -> dicitonary2 )
have a map per layer? (Nr of maps = nr of layers)
Each cluster has a dictionary of connCluster-> nrConections
Each layer has a dictionary of clusters -> dictionaries of nodes/connections
"""
import os
import sys
import math
import datetime
from typing import Dict
##################AUX
modules_path = '../../../modules/'
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
#### TO BE DELETED #### ^
from db.entities.connected_node import NodeC
from db.entities.connected_cluster import ClusterC
from db.entities.connected_layer import LayerC
from processing.similarityFiles.dataInput import *
from processing.similarityFiles.calculateWeights import *
from processing.similarityFiles.calculateSimilarity import *
from processing.similarityFiles.miscFunctions import *
from processing.similarityFiles.dataOutput import *
from routes.connRun import connected_run
def main():
print("\nEntered Main")
timelist = []
timelist.append(currentTime())#starting time
"""
Current Layers
Price_Layer
FinishedTime_Layer
Destination_Layer
StartingPoint_Layer
Reputation_Layer
StartingTime_Layer
User_Layer
"""
layerNameList = ["Price_Layer","FinishedTime_Layer","Destination_Layer"] #Get it from somewhere else?
limitNrCluster = 20 #per Layer
limitNrNodes = 1000 #per Layer
layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes)
#layerDict = getClusterDataFromSwagger(limitNrCluster,limitNrNodes) #for Swagger, Change URLs inside the function for different input Data
totalNodes = totalNumberOfNodes(layerDict)
print("Nr. of nodes: " +str(totalNodes))
totalClusters = totalNumberOfClusters(layerDict)
print("Nr. of clusters: " + str(totalClusters))
timelist.append(currentTime())
#calculates the weights between the clusters (weight == number of connections) #return is displayed in outputLayerFunction
layerDict = calculateWeights(layerDict)
timelist.append(currentTime())
#calculates the similarity between the clusters #returns dictionary[ tuple(cluster_label1,cluster_label2),
# listOfSimilarity(layer1,layer2,layer3) ]
similarityDict = calculateSimilarity(layerDict)
timelist.append(currentTime()) #Finishing time
#Write to files
runId = add_connected_run()
print("Outputing data")
outputFileLayerFunction(layerDict,totalNodes,totalClusters,runId)
outputFileSimilFunction(similarityDict,totalNodes,totalClusters,runId)
outputFileTimeFunction(timelist,totalNodes,totalClusters,runId)
#Output to DB
outputMongoConnClustDict(layerDict,runId)
outputMongoSimilarity(similarityDict,runId)
#Currently not used, developed for possible future uses
connClustersFromMongo = getConnClusterDataFromMongo()
similarityArrFromMongo = getSimilarityDataFromMOngo()
print("FINISHED")
return
##########START##########
main()
#########FINISH##########
......@@ -17,7 +17,7 @@ class MongoRepositoryBase:
def insert_entry(self, collection_name, content: dict):
collection = self._database[collection_name]
collection.insert_one(content)
return collection.insert_one(content)
def insert_many(self, collection_name, content: list):
collection = self._database[collection_name]
......
......@@ -18,6 +18,6 @@ SEMANTIC_LINKING_DB_PORT = 27017
## Role Stage Discovery
ROLESTAGE_DISCOVERY_HOSTNAME = 'role-stage-discovery'
ROLESTAGE_DISCOVERY_REST_PORT = 80
ROLESTAGE_DISCOVERY_DB_HOSTNAME = f'{ROLESTAGE_DISCOVERY_HOSTNAME}-db'
ROLESTAGE_DISCOVERY_DB_PORT = 27017
\ No newline at end of file
ROLESTAGE_DISCOVERY_REST_PORT = 30103
ROLESTAGE_DISCOVERY_DB_HOSTNAME = f'articonf1.itec.aau.at'
ROLESTAGE_DISCOVERY_DB_PORT = 30104
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment