Commit 567d499c authored by Bogdan's avatar Bogdan

Connected Cluster and Similarity functionalities

parent d728e14e
......@@ -9,7 +9,7 @@ consumes:
produces:
- "application/json"
basePath: "/api"
# basePath: "/api"
paths:
/debug:
......@@ -173,11 +173,51 @@ paths:
summary: "Insert locations from AGI, create clusters for starting time and location layers, create graphs for the location clusters"
parameters: []
responses:
204:
200:
description: "Successful operation"
#endregion
################################################################################
/connectedClusters:
get:
operationId: "routes.connClusters.get_conn_clusters"
tags:
- "Connected"
summary: "Get connected Clusters data"
description: "Returns a dictionary of cluster. The clusters contain the associated connected clusters and connected nodes data."
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ConnectedDict"
/clusterSimilarity:
get:
operationId: "routes.similarity.get_similarity"
tags:
- "Similarity"
summary: "Get data of the similarity between clusters"
description: "Returns a dictionary where the key is a tuple of cluster_labels (i.e. [0,319]) and the value is the computed similarity between 2 clusters in the tuple, in regard to each layer in the input. \n Note: the tuple clusters have the same layer and the computed similarity is in regard to clusters from OTHER layers."
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ClusterSimilarityArray"
/clusterRunArray:
get:
operationId: "routes.connRun.get_connected_run"
tags:
- "RunId"
summary: "Get RunId"
description: "Returns the RunId and the associated datetime when a connection of clusters/simillarity of clusters was computed."
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ClusterRunArray"
definitions:
Cluster:
......@@ -264,3 +304,116 @@ definitions:
type: array
items:
$ref: "#/definitions/TimeSlice"
##################################################################
ConnectedDict:
type: array
items:
$ref: "#/definitions/ConnectedCluster"
ConnectedCluster:
type: object
properties:
cluster_label:
type: string
example: "6"
cluster_layer:
type: string
example: "Price_Layer"
cluster_runId:
type: string
example: "5efdc04ac43add0aba567d76"
cluster_containedNodesDict:
$ref: "#/definitions/ConnectedNode"
cluster_connNodesDict:
$ref: "#/definitions/ConnectedNode"
cluster_connClustDict:
type: object
additionalProperties:
type: number
example:
"cluster_label": nrOfConnectedNodes
#"-1": 42
"0": 39
"6969": 1
#not used, should be removed?
#cluster_connectionsNr
ConnectedNode:
type: object
properties:
cluster_label:
type: string
node_layer:
type: string
uniqueID:
type: string
example:
"cluster_label": "2230"
"node_layer": "Destination_Layer"
"uniqueID": "a95075f5042b1b27060080156d87"
#not used, should be removed?
#finished_time
#latitude_Destination
#longitude_Destination
#travelID
#travelPrice
#userID
ClusterSimilarityArray:
type: array
items:
$ref: "#/definitions/ClusterSimilarityDictionary"
ClusterSimilarityDictionary:
properties:
clusterTuple:
type: array
items:
type: string
minItems: 2
maxItems: 2
example: [
#cluster_label1
0,
#cluster_label2
319
]
similarityValues:
type: object
additionalProperties:
type: number
example:
"layer_name": similarityValue
"StartingPoint_Layer": 39.0,
"StartingTime_Layer": 99.0101004948485
runId:
type: string
example: "5efdc04ac43add0aba567d76"
ClusterRunArray:
type: array
items:
$ref: "#/definitions/ClusterRun"
ClusterRun:
type: object
properties:
_id:
type: string
example: "5efdc04ac43add0aba567d76"
Datetime:
type: string
example: "2020-07-02 14:19:51.651764"
# Added by API Auto Mocking Plugin
host: virtserver.swaggerhub.com
basePath: /NumeDeOrganizatie/Smart/1.0.0
schemes:
- https
\ No newline at end of file
from db.entities.location import Location
from db.entities.popular_location import PopularLocation
from db.entities.cluster import Cluster
from db.entities.clusterset import ClusterSet
from db.entities.user_cluster_graph import UserClusterGraph
from db.entities.layer import Layer
from db.entities.timeslice import TimeSlice
\ No newline at end of file
class ClusterC:
def __init__(self,cluster_label,cluster_layer,cluster_runId,cluster_containedNodesDict,cluster_connNodesDict,cluster_connClustDict):
self.cluster_label = cluster_label
self.cluster_layer = cluster_layer
self.cluster_runId = cluster_runId
self.cluster_containedNodesDict = cluster_containedNodesDict ###RENAME TO curClNodesDict #Keys are frozensets(touples) uniqueID and cluster #
self.cluster_connNodesDict = cluster_connNodesDict #Keys are frozensets(touples) uniqueID and cluster #problem if you remove newNodes and oldNodes lists.. there may be duplicates
self.cluster_connClustDict = cluster_connClustDict #dictionary: layer -> (dict2: cluster_label -> nrOfConnections ) OR dictionary: cluster_label -> nrOfConnections
#cluster_connClustDict ------> look at both newNodes and oldNodes
class LayerC:
def __init__(self,layer_name,cluster_Dict):
self.layer_name = layer_name
self.cluster_Dict = cluster_Dict
\ No newline at end of file
class NodeC:
def __init__(self, cluster_label, node_layer, finished_time, latitude_Destination, longitude_Destination, travelID, travelPrice, uniqueID, userID):
self.cluster_label = cluster_label
self.node_layer = node_layer
self.finished_time = finished_time
self.latitude_Destination = latitude_Destination
self.longitude_Destination = longitude_Destination
self.travelID = travelID
self.travelPrice = travelPrice
self.uniqueID = uniqueID
self.userID = userID
\ No newline at end of file
from datetime import datetime
class ConnectedRun:
def __init__(self,run_id,timeOfExec):
self.run_id = run_id
self.timeOfExec = timeOfExec
\ No newline at end of file
......@@ -3,7 +3,11 @@ import network_constants as netconst
from database.MongoRepositoryBase import MongoRepositoryBase
import json
from db.entities.layer import *
from db.entities.cluster import *
from db.entities.timeslice import *
from db.entities import *
from processing.similarityFiles.miscFunctions import *
from typing import List
......@@ -19,6 +23,9 @@ class Repository(MongoRepositoryBase):
self._layer_nodes_collection = 'layer_nodes'
self._clusters_collection = 'clusters'
self._time_slice_collection = 'time_slices'
self._connected_clusters_collection ='connected_clusters'
self._similarity_collection = 'similarity'
self._connected_run = 'connected_run'
#region Layers
def add_layer(self, layer: Layer):
......@@ -79,3 +86,71 @@ class Repository(MongoRepositoryBase):
super().drop_collection(self._time_slice_collection)
#endregion
#region clusterConnected
def add_connected_clusters(self, clusterDictArray):
''' Add Connected Clusters Data to DB '''
result = super().insert_many(self._connected_clusters_collection, clusterDictArray)
return result
def get_connected_clusters(self, run_id=None):#, layer_name: str):
''' Get Connected Clusters Data from DB '''
if (run_id == None):
entries = super().get_entries(self._connected_clusters_collection) #, projection={'Price_Layer': 1})
else:
entries = super().get_entries(self._similarity_collection, selection={'cluster_runId' : run_id})
output = []
for ent in entries:
output.append(ent)
return output
#return [Cluster(cluster_dict=e, from_db=True) for e in entries]
#endregion
#region similarity
def add_similarity(self, inputDict):
''' Add Similarity Data to DB '''
#checkIfConnClustDictIsSerializable(outputJSON)
result = super().insert_many(self._similarity_collection, inputDict)
#print(str(result))
#super().insert_entry(self._connected_clusters_collection, outputJSON)
return result
#TODO
def get_similarity(self, run_id=None):
''' Get Similarity Data from DB '''
if (run_id == None):
entries = super().get_entries(self._similarity_collection, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'runId' : run_id})
output = []
for e in entries:
output.append(e)
return output
#endregion
#region connected_run
def add_connected_run(self, conRunTimestamp):
''' Add Connected Run Data to DB '''
result = super().insert_entry(self._connected_run, conRunTimestamp)
return result
def get_connected_run(self, run_id= None):
''' Get Connected Run Data from DB '''
if (run_id == None):
entries = super().get_entries(self._connected_run)
else:
entries = super().get_entries(self._connected_run, selection={'_id' : run_id})
output = []
for e in entries:
output.append(e)
return output
#endregion
\ No newline at end of file
# __init__.py
from similarityFiles.calculateSimilarity import *
from similarityFiles.calculateWeights import *
from similarityFiles.populateWithNewNodes import *
from similarityFiles.miscFunctions import *
from similarityFiles.test import *
from db.entities.connected_cluster import *
from db.entities.connected_layer import *
from db.entities.connected_node import *
#This file contains the methods for caclulating the similarity between clusters
import math
from db.entities.connected_node import NodeC
from db.entities.connected_cluster import ClusterC
from db.entities.connected_layer import LayerC
from typing import Dict
def minMaxFunction(iIndex,jIndex,clusterList) -> Dict[str,int]:
''' minMax Metric for calculating similarity between 2 clusters.
Clusters must be from the same layer, and will be compared to clusters from different layers (cluster_layer attribute)
:param string iIndex: The index of the first Cluster in the "clusterList"
:param string jIndex: The index of the second Cluster in the "clusterList"
:param List[Cluster] clusterList: A list of clusters to which the 2 clusters will be compared to
:returns: Dictionary with layername as KEY, and the computed similarity value between the 2 clusters in regard to the layer as the VALUE of the Dict.
:rtype: Dict{str,int}
'''
iCluster= clusterList[iIndex]
jCluster= clusterList[jIndex]
outputDict = dict()
#calculate th
for curCluster in clusterList: #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
curLayer = curCluster.cluster_layer
curLabel = curCluster.cluster_label
if(( curLayer != iCluster.cluster_layer)
and ( curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label))
and ( curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label))):
# min part
curMin = min(curCluster.cluster_connClustDict[iCluster.cluster_label],curCluster.cluster_connClustDict[jCluster.cluster_label])
if(outputDict.__contains__(curLayer) == False):
outputDict[curLayer]= curMin
else: # max part
if(outputDict[curLayer]<curMin):
outputDict[curLayer] = curMin
return outputDict
def calcEuclideanDist(iIndex,jIndex,clusterList) -> Dict[str,float]:
''' Euclidean Distance Metric for calculating similarity between 2 clusters.
Clusters must be from the same layer, and will be compared to clusters from different layers (cluster_layer attribute)
:param string iIndex: The index of the first Cluster in the "clusterList"
:param string jIndex: The index of the second Cluster in the "clusterList"
:param List[Cluster] clusterList: A list of clusters to which the 2 clusters will be compared to
:returns: Dictionary with layername as KEY, and the computed similarity value between the 2 clusters in regard to the layer as the VALUE of the Dict.
:rtype: Dict{str,float}
'''
iCluster= clusterList[iIndex]
jCluster= clusterList[jIndex]
outputDict = dict()
#calculate the distance //paralelizable
for curCluster in clusterList: #jCluster.cluster_layer == iCluster.cluster_layer, so i only compare to one
curLayer = curCluster.cluster_layer
curLabel = curCluster.cluster_label #debugOnly
#considering only clusters from other layers for distance calc
if( curLayer != iCluster.cluster_layer):
######BUUUG, WHAT IF THEY DON'T SHARE A CONNECTION?
###### if in a layer both cluster don't have a connection --> distance of 0. Identical in regard to that layer. correct or false?
iVal = 0
jVal = 0
connectedClusters = False
if(curCluster.cluster_connClustDict.__contains__(iCluster.cluster_label)):
iVal = curCluster.cluster_connClustDict[iCluster.cluster_label]
connectedClusters = True
if(curCluster.cluster_connClustDict.__contains__(jCluster.cluster_label)):
jVal = curCluster.cluster_connClustDict[jCluster.cluster_label]
connectedClusters = True
if (connectedClusters == False):
#clusters aren't connected => assign the max int value if there are no prior elements in list
if(outputDict.__contains__(curLayer) == False):
outputDict[curLayer]= 2147483647 #notConnected
else:
#clusters ARE connected => add the squares part of the euclid distance to the value of the similarity
if(outputDict.__contains__(curLayer) == False):
#first element
outputDict[curLayer]= (iVal - jVal)**2
else:
#further elements
outputDict[curLayer]+= (iVal - jVal)**2
for layer in outputDict:
outputDict[layer] = math.sqrt(outputDict[layer])
return outputDict
# frozenset(tuple) dict
# [(iClusterLabel,jClusterLabel), (layer,similarity)]
#def calculateSimilarity(inputLayerDict) -> Dict[frozenset((str,str)),Dict[str,int]]:
def calculateSimilarity(inputLayerDict):
''' Calculates the similarity between clusters contained in the "inputLayerDict". Similarity is calculated for each combination of 2 clusters from the SAME layer.
:param Dict{layername: Layer} inputLayerDict: Contains the associated Layer and Clusters objects. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
:returns: Dict{tuple(cluster_label1, cluster_label2) : Dict{layername, similarityValue}}. Returns a Dictionary with a tuple of 2 clusters as KEY, and a Dictionary with the computed similarity of the clusters in regard to each layer as VALUE
:rtype: Dict{(string,string): Dict{str:float}}
'''
print("Entered calculateSimilarity")
similarityDict = dict() #the key is a frozenset(Tuple) (clusterLabel1,clusterLabel2)
clusterList = list()
for curLayer in inputLayerDict.values():
for curCluster in curLayer.cluster_Dict.values():
clusterList.append(curCluster)
#print(" Nr. of clusters: "+str(len(clusterList)))
#go thru every combination of 2 clusters and calculate the similarity between them in regard to each layer
i=0
while( i < len(clusterList) ):
iCluster = clusterList[i]
j=i+1
while ( j<len(clusterList)):
jCluster = clusterList[j]
if (iCluster.cluster_layer == jCluster.cluster_layer): #calculate similarity only from the same layer
tuplekey = (clusterList[i].cluster_label,clusterList[j].cluster_label)
key = frozenset(tuplekey)
#### EUCLIDEAN DISTANCE /minMax
similarityDict[key]=calcEuclideanDist(i,j,clusterList)
#print("#### similarityDict i:"+str(i)+" j:"+str(j))
#print("#### "+str(similarityDict))
else:
j = len(clusterList)
j+=1
i+=1
print("Finished calculateSimilarity")
return similarityDict
\ No newline at end of file
from db.entities.connected_node import NodeC
from db.entities.connected_cluster import ClusterC
from db.entities.connected_layer import LayerC
from typing import Dict
def sortFunctByNode(node):
try :
return node.uniqueID
except:
print(node.cluster_label)
print(node.node_layer)
print(node.uniqueID)
def calculateWeights(inputLayerDict) -> Dict[str,LayerC]:
''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
:param Dict{string: Layer} inputLayerDict: Contains the associated Layer and Clusters objects. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
:returns: Dict{layername: Layer}. Returns the inputLayerDict with the added connections in the attributes cluster_connClustDict and cluster_connNodesDict
:rtype: Dict{string: Layer}
'''
#the input dictates which cluster is updated;
# #however it will update all the included clusters
# if i only want to update a single cluster without considering the rest i should create a new method?
#
#
#
#
print("Entered calculateWeights")
nodeList = []
for curLayer in inputLayerDict.values():
for curCluster in curLayer.cluster_Dict.values():
for curNode in curCluster.cluster_containedNodesDict.values():
nodeList.append(curNode)
#if curNode != None:
#if(curNode.uniqueID!= None):
#print(" Nr. of nodes: " + str(len(nodeList)))
nodeList.sort(key=sortFunctByNode)
i=0
while( i < len(nodeList) ):
iNode = nodeList[i]
j=i+1
while ( j<len(nodeList)):
jNode = nodeList[j]
#if there is a connection
#print("\n ### \n"+iNode.uniqueID +" "+ iNode.node_layer +"\n"+ jNode.uniqueID +" "+ jNode.node_layer )
if (iNode.node_layer != jNode.node_layer) and (iNode.uniqueID == jNode.uniqueID):
iOldTuple = (iNode.uniqueID,iNode.cluster_label)
jOldTuple= (jNode.uniqueID,jNode.cluster_label)
iOldKey = frozenset(iOldTuple)
jOldKey = frozenset(jOldTuple)
#Check if old node dicts has this node: if not add to ConnDictionary and to OldNodesDict
# Layer . Cluster . OldNodesDict . Does not contain the OTHER node
if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connNodesDict.__contains__(jOldKey) == False):
#add node j at cluster i
if (inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict.__contains__(jNode.cluster_label)):
inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict[jNode.cluster_label]+=1
else:
inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connClustDict[jNode.cluster_label]=1
#add node to old nodes
inputLayerDict[iNode.node_layer].cluster_Dict[iNode.cluster_label].cluster_connNodesDict[jOldKey]=jNode
if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connNodesDict.__contains__(iOldKey) == False):
#add node i at cluster j
if (inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict.__contains__(iNode.cluster_label)):
inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict[iNode.cluster_label]+=1
else:
inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connClustDict[iNode.cluster_label]=1
#add node to old nodes
inputLayerDict[jNode.node_layer].cluster_Dict[jNode.cluster_label].cluster_connNodesDict[iOldKey]=iNode
j+=1
i+=1
#deleting cluster_containedNodesDicts/// No longer needed
#for curLayer in inputLayerDict.values():
# for curCluster in curLayer.cluster_Dict.values():
# inputLayerDict[curCluster.cluster_layer].cluster_Dict[curCluster.cluster_label].cluster_containedNodesDict = dict()
print("Finished calculateWeights")
#store weights in database?
return inputLayerDict
#This file contains the methods which add the data (layers,clusters,nodes)
# from the input JSON to the "layerDict" dicionary for further processing
from db.entities.connected_node import NodeC
from db.entities.connected_cluster import ClusterC
from db.entities.connected_layer import LayerC
from typing import Dict
#from db.repository import Repository
import json
import requests
from routes.clustersets import get_by_name
#TEST ONLY
from routes.connClusters import get_conn_clusters
from routes.similarity import get_similarity
def getClusterDataFromSwagger(limitNrCluster,limitNrNodes):
''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
:param int limitNrCluster: Limits Clusters considered. None or <0 values == No limit
:param int limitNrNodes: Limits Clusters considered. None or <0 values == No limit
:returns: Dict{layername: Layer}. Returns a Dict with the data gathered from the DB
:rtype: Dict{string: Layer}
'''
print("Entered dataInput")
# ??? OBSOLETE ???
# oldBigTestClusters https://drive.google.com/uc?export=download&id=1l4gHBwrG_N4pCL5-MfWJk2szNrF3VnpG it takes a whie to download
# smallTestClusters https://drive.google.com/uc?export=download&id=1cMoGtmi-XouSDM9DRl-ddmPkf2Bm7sk7
# smallTestOnlyLocationClusters https://drive.google.com/uc?export=download&id=1wBT9vi7aS4rE4qOWHEyLEfQ2KbmpBK9e
# smallTestOnlyPriceClusters https://drive.google.com/uc?export=download&id=1g9pEOOpDMBj6yZOlFj7HfOoMOAFTvPOW
# smallTestOnlyTimeClusters https://drive.google.com/uc?export=download&id=1XKXQHEC5ubJHmntQBNnzgfpEZl6OXE_B
listURLs = []
#"""
listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/Price_Layer/clusters')
listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/FinishedTime_Layer/clusters')
listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/Destination_Layer/clusters')
#listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/StartingPoint_Layer/clusters')
#listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/Reputation_Layer/clusters')
#listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/StartingTime_Layer/clusters')
#listURLs.append('http://articonf1.itec.aau.at:30103/api/layers/User_Layer/clusters')
#"""
#Maximum of these nodes PER Layer will be considered
if (limitNrCluster == None ) or (limitNrCluster < 0):
limitNrCluster = 9223372036854775807 #per Layer (LLONG_MAX == 2^63)
if (limitNrNodes == None ) or (limitNrNodes < 0):
limitNrNodes = 9223372036854775807 #per Layer (LLONG_MAX == 2^63)
layerDict = dict()
#imports and translates the data from JSON into usefull format
#returns layerdiction -> Layer -> clusterDict -> Cluster -> nodesDict -> Nodes
for url in listURLs:
newData = loadJson(url)
layerDict = populateWithNewNodesSingleLayer(newData[0:limitNrCluster],layerDict,limitNrNodes)
return layerDict
def loadJson(url) :
res = requests.get(url, timeout=30)
jsonData = json.loads(res.content)
return jsonData
def getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes):
''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
:param List[string] layerNameList: Name of the layers to pull from the DB
:param int limitNrCluster: Limits Clusters considered. None or <0 values == No limit
:param int limitNrNodes: Limits Clusters considered. None or <0 values == No limit
:returns: Dict{layername: Layer}. Returns a Dict with the data gathered from the DB
:rtype: Dict{string: Layer}
'''
layerDict = dict()
#Maximum of these nodes PER Layer will be considered
if (limitNrCluster == None ) or (limitNrCluster < 0):
limitNrCluster = 9223372036854775807 #per Layer (LLONG_MAX == 2^63)
if (limitNrNodes == None ) or (limitNrNodes < 0):
limitNrNodes = 9223372036854775807 #per Layer (LLONG_MAX == 2^63)
layerDict = dict()
#imports and translates the data from JSON into usefull format
#returns layerdiction -> Layer -> clusterDict -> Cluster -> nodesDict -> Nodes
for name in layerNameList:
newData = get_by_name(name)
layerDict = populateWithNewNodesSingleLayer(newData[0:limitNrCluster],layerDict,limitNrNodes)
return layerDict
def populateWithNewNodesSingleLayer(inputData, layerDict) -> Dict[str,LayerC]:
''' Gets Layer,Cluster and Node data from a JSON format, and appends it into the "layerDict" dictionary. A single Layer only
:param inputData: JSON data to be formated.
:param Dict{string: Layer} layerDict: If it's empty/null a new one will be created. Otherwise contains the associated Layer and Clusters objects to which date is appended. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
:returns: Dict{layername: Layer}. Returns the inputLayerDict with the added data from the JSON
:rtype: Dict{string: Layer}
'''
print("Entered populateWithNewNodes")
if(layerDict == None):
layerDict = dict()
print(" Layer: "+inputData[0].get("layer_name"))
curLayerName = None
#newClusterDict
#clusterDict = layerDict.get(curCluster.get("layer_name"),dict())
for curCluster in inputData:
if(curCluster.get("layer_name")!= curLayerName):
clusterDict = layerDict.get(curCluster.get("layer_name"),dict())
curLayerName = curCluster.get("layer_name")
oldCluster = clusterDict.get(curCluster.get("cluster_label"),None)
if oldCluster is None: #means this is a new cluster
cluster_containedNodesDict = dict()
else: #means this is an already existing cluster
cluster_containedNodesDict = oldCluster.cluster_containedNodesDict
for curNode in curCluster.get("nodes"):
#totalNodesCount+=1
newNode = NodeC(
curCluster.get("cluster_label"),
curLayerName,
curNode.get("Finished_time"),
curNode.get("Latitude_Destination"),
curNode.get("Longitude_Destination"),
curNode.get("TravelID"),
curNode.get("TravelPrice"),
curNode.get("UniqueID"),
curNode.get("UserID"))
if(newNode != None):
if(newNode.uniqueID!= None and newNode.cluster_label!= None and newNode.node_layer!= None):
auxtuple = (newNode.uniqueID,newNode.cluster_label)
key = frozenset(auxtuple)
cluster_containedNodesDict[key]= newNode #overwrite if already there
#finished node
if oldCluster is None:
# def __init__(self,cluster_label,cluster_layer,cluster_containedNodesDict,cluster_connNodesDict, cluster_connectionsNr,cluster_connClustDict):
newCluster = ClusterC(
curCluster.get("cluster_label"),
curLayerName,
None,
cluster_containedNodesDict,
dict(), #will populate the dict fields later later
dict()) #may not be empty anymore)
clusterDict[newCluster.cluster_label] = newCluster
else:
oldCluster.cluster_containedNodesDict = cluster_containedNodesDict
clusterDict[curCluster.get("cluster_label")] = oldCluster
#finished cluster
newLayer = LayerC(curLayerName,clusterDict)
layerDict[curLayerName]= newLayer
##########TESTEAAAZAA
return layerDict
def populateWithNewNodesSingleLayer(inputData, layerDict, limitNrNodes) -> Dict[str,LayerC]:
''' Gets Layer,Cluster and Node data from a JSON format, and appends it into the "layerDict" dictionary.
:param inputData: JSON data to be formated.
:param Dict{string: Layer} layerDict: If it's empty/null a new one will be created. Otherwise contains the associated Layer and Clusters objects to which JSON data is appended. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
:param int limitNrNodes: How many maximum nodes PER layer will be considered.
:returns: Dict{layername: Layer}. Returns the inputLayerDict with the added data from the JSON
:rtype: Dict{string: Layer}
'''
print("Entered populateWithNewNodes")
if(layerDict == None):
layerDict = dict()
print(" Layer: "+inputData[0].get("layer_name"))
curLayerName = None
#newClusterDict
#clusterDict = layerDict.get(curCluster.get("layer_name"),dict())
for curCluster in inputData:
if(curCluster.get("layer_name")!= curLayerName):
clusterDict = layerDict.get(curCluster.get("layer_name"),dict())
curLayerName = curCluster.get("layer_name")
oldCluster = clusterDict.get(curCluster.get("cluster_label"),None)
if oldCluster is None: #means this is a new cluster
cluster_containedNodesDict = dict()
else: #means this is an already existing cluster
cluster_containedNodesDict = oldCluster.cluster_containedNodesDict
for curNode in curCluster.get("nodes"):
#totalNodesCount+=1
newNode = NodeC(
curCluster.get("cluster_label"),
curLayerName,
curNode.get("Finished_time"),
curNode.get("Latitude_Destination"),
curNode.get("Longitude_Destination"),
curNode.get("TravelID"),
curNode.get("TravelPrice"),
curNode.get("UniqueID"),
curNode.get("UserID"))
if(newNode != None):
if(newNode.uniqueID!= None and newNode.cluster_label!= None and newNode.node_layer!= None):
if( limitNrNodes>0):
auxtuple = (newNode.uniqueID,newNode.cluster_label)
key = frozenset(auxtuple)
cluster_containedNodesDict[key]= newNode #overwrite if already there
limitNrNodes-=1
#finished node
if oldCluster is None:
# def __init__(self,cluster_label,cluster_layer,cluster_containedNodesDict,cluster_connNodesDict, cluster_connectionsNr,cluster_connClustDict):
newCluster = ClusterC(
curCluster.get("cluster_label"),
curLayerName,
None,
cluster_containedNodesDict,
dict(),
dict()) #may not be empty anymore)
clusterDict[newCluster.cluster_label] = newCluster
else:
oldCluster.cluster_containedNodesDict = cluster_containedNodesDict
clusterDict[curCluster.get("cluster_label")] = oldCluster
#finished cluster
newLayer = LayerC(curLayerName,clusterDict)
layerDict[curLayerName]= newLayer
return layerDict
#deprecated
def populateWithNewNodesAllLayers(inputData,layerDict) -> Dict[str,LayerC]:
''' Gets Layer,Cluster and Node data from a JSON format, and appends it into the "layerDict" dictionary.
:param inputData: JSON data to be formated.
:param Dict{string: Layer} layerDict: If it's empty/null a new one will be created. Otherwise contains the associated Layer and Clusters objects to which date is appended. The dictonary KEY is layername, the Value is a Layer Object. The Layer object has an attribute cluster_Dict which stores the clusters in the Layer.
:returns: Dict{layername: Layer}. Returns the inputLayerDict with the added data from the JSON
:rtype: Dict{string: Layer}
'''
print("Entered populateWithNewNodes")
if(layerDict == None):
layerDict = dict()
for curLayer in inputData:
clusterDict = layerDict.get(curLayer.get("layer_name"),dict()) #gets the Old dict or an empty if none is found i.e for a new layer
curLayerName = curLayer.get("layer_name")
for curCluster in curLayer.get("clusters"):
oldCluster = clusterDict.get(curCluster.get("cluster_label"),None)
if oldCluster is None: #means this is a new cluster
cluster_containedNodesDict = dict()
else: #means this is an already existing cluster
cluster_containedNodesDict = oldCluster.cluster_containedNodesDict
#SORT NODES?
for curNode in curCluster.get("nodes"):
if(curNode != None):
if(curNode.uniqueID != None):
newNode = NodeC(
curCluster.get("cluster_label"),
curLayerName,
curNode.get("Finished_time"),
curNode.get("Latitude_Destination"),
curNode.get("Longitude_Destination"),
curNode.get("TravelID"),
curNode.get("TravelPrice"),
curNode.get("UniqueID"),
curNode.get("UserID"))
auxtuple = (newNode.uniqueID,newNode.cluster_label)
key = frozenset(auxtuple)
cluster_containedNodesDict[key]= newNode #overwrite if already there
#finished node
if oldCluster is None:
newCluster = ClusterC(
curCluster.get("cluster_label"),
curLayer.get("layer_name"),
None,
cluster_containedNodesDict,
dict(),
dict()) #may not be empty anymore)
clusterDict[newCluster.cluster_label] = newCluster
else:
#only cluster_containedNodesDict should change
oldCluster.cluster_containedNodesDict = cluster_containedNodesDict
#cluster_connNr and clusterConnDict should stay the same
clusterDict[curCluster.get("cluster_label")] = oldCluster
#finished cluster
newLayer = LayerC(curLayer.get("layer_name"),clusterDict)
layerDict[curLayer.get("layer_name")]= newLayer
#finished layer
print("Finished populateWithNewNodes")
return layerDict
def getConnClusterDataFromMongo():
mongoArray = get_conn_clusters()
outputDict = convertRetrievedClustersFromMongo(mongoArray)
return outputDict
def getSimilarityDataFromMOngo():
result = get_similarity()
return result
def convertRetrievedClustersFromMongo(inputArray):
####TODO#### Not tested thoroughly
LayerDict = {}
for entry in inputArray:
if not(entry['cluster_layer'] in LayerDict):
LayerDict[entry['cluster_layer']] = []
cl = ClusterC(
entry['cluster_label'],
entry['cluster_layer'],
entry['cluster_runId'],
entry['cluster_containedNodesDict'],
entry['cluster_connNodesDict'],
entry['cluster_connClustDict'])
LayerDict[entry['cluster_layer']].append(cl)
return LayerDict
\ No newline at end of file
#Misc util functions
import json
import requests
import datetime
from routes.connClusters import add_conn_clusters
from routes.similarity import add_similarity
from routes.connRun import add_connected_run
from processing.similarityFiles.miscFunctions import *
def outputFileLayerFunction(layerDict,limitNrNodes,limitNrCluster,runId):
''' Writes the layerDict data to a JSON file.
:param Dict{string: Layer} layerDict: Object which contains Data about the Layers, Clusters and Nodes
:param int limitNrNodes: How many nodes are contained in layerDict. Used in creating the name of the File
:param int limitNrCluster: How many clusters are contained in layerDict. Used in creating the name of the File
'''
layerJSON = convertLayerDictToJSON(layerDict,runId)
outputJSON = json.dumps(layerJSON, default=lambda o: o.__dict__, indent=4)
try:
with open('resultLayerDictN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.json', 'w') as outfile:
outfile.write(outputJSON)
except ValueError:
print("Error occured when writing the resultLayerDict file")
def outputFileSimilFunction(similarityDict,limitNrNodes,limitNrCluster,runId):
''' Writes the similarityDict data to a JSON file.
:param Dict{(cluster_label1, cluster_label2): Dict{layername: value}} similarityDict: Object which contains Data about the similarity between the clusters, Clusters and Nodes
:param int limitNrNodes: How many nodes are contained in layerDict. Used in creating the name of the File
:param int limitNrCluster: How many clusters are contained in layerDict. Used in creating the name of the File
'''
similJSON = convertSimilarityDictToJSON(similarityDict,runId)
outputJSON = json.dumps(similJSON, default=lambda o: o.__dict__, indent=4)
try:
with open('resultSimilarityDictN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.json', 'w') as outfile2:
outfile2.write(outputJSON)
except ValueError:
print("Error occured when writing the resultSimilarityDict file")
def outputFileTimeFunction(timelist,limitNrNodes,limitNrCluster,runId):
''' Writes execution time to a file.
:param List[datetime] timelist: Contains timestamps about the execution time of functions and the program.
:param int limitNrNodes: How many nodes are considered. Used in creating the name of the File
:param int limitNrCluster: How many clusters are considered. Used in creating the name of the File
'''
stringToWrite = "StartTime: "+ str(timelist[0])
stringToWrite += "\nFinishTime: " + str((timelist[3])) +"\n"
stringToWrite += "\nPopulateWithNewNodes: " + str((timelist[1]-timelist[0]).total_seconds())
stringToWrite += "\nCalculateWeights: " + str((timelist[2]-timelist[1]).total_seconds())
stringToWrite += "\nCalculateSimilarity: " + str((timelist[3]-timelist[2]).total_seconds())
stringToWrite += "\nTotalTime: " + str((timelist[3]-timelist[0]).total_seconds())
stringToWrite += "\nRunId: " +str(runId)
#aux = str(timelist[0]) + " :PopulateWithNewNodes\n"+ str(timelist[1]) + " :CalculateWeights\n" + str(timelist[2]) + " :CalculateSimilarity\n"+ str(timelist[3]) + " :Finish"
try:
with open('resultTimeExecN'+str(limitNrNodes)+'C'+str(limitNrCluster)+'.txt', 'w') as outfile3:
outfile3.write(stringToWrite)
except ValueError:
print("Error occured when writing the resultTimeExec file")
def outputMongoConnClustDict(inputDict,runId):
''' Stores connected_clusters in the database.
:param Dict() inputDict: Contains the data to insert
:param string runId: Id of the Run
'''
#inputDict["Timestamp"] = str(datetime.datetime.now())
add_conn_clusters(inputDict,runId)
def outputMongoSimilarity(inputDict,runId):
''' Stores cluster_similarity in the database.
:param Dict() inputDict: Contains the data to insert
:param string runId: Id of the Run
'''
add_similarity(inputDict,runId)
\ No newline at end of file
#Misc util functions
import json
import requests
import datetime
def currentTime():
ts = datetime.datetime.now()
print(ts)
return ts
def totalNumberOfNodes(inputLayerDict):
''' Computes total number of nodes in the inputLayerDict.
:param Dict{string: Layer} inputLayerDict: Layer in which the Clusters in which the Nodes are stored
:returns: Returns nr of Nodes
:rtype: int
'''
nodeCount = 0
for curLayer in inputLayerDict.values():
for curCluster in curLayer.cluster_Dict.values():
nodeCount+=len(curCluster.cluster_containedNodesDict.values())
return nodeCount
def totalNumberOfClusters(inputLayerDict):
''' Computes total number of clusters in the inputLayerDict.
:param Dict{string: Layer} inputLayerDict: Layer in which the Clusters are stored
:returns: Returns nr of Clusters
:rtype: int
'''
clustCount = 0
for curLayer in inputLayerDict.values():
clustCount+= len(curLayer.cluster_Dict.values())
return clustCount
def convertLayerDictToJSON(layerDict, runId):
''' Converts a Layer object to JSON format.
:param Dict{string: Layer} layerDict: Object which contains Data about the Layers, Clusters and Nodes
:rtype: Dict{string: [Cluster1, Cluster2, ...]}
'''
'''
{
layer1 : {
[
{
cluster_label1 : 0123400,
cluster_layer: layer1,
"cluster_connClustDict": {
"0123456": 98
"1234567": 12
},
cluster_containedNodesDict : {
[
abcd,
sgre,
dgre,
ddhr,
yyrh
]
}
},
{
},
{
}
]
},
layer2 : {
}
}
'''
outputJSON = []
for curLayer in layerDict.values():
for curCluster in curLayer.cluster_Dict.values():
outputJSON.append({
"cluster_label" : curCluster.cluster_label,
"cluster_layer" : curCluster.cluster_layer,
"cluster_runId" : runId,
"cluster_connClustDict" : changeDictKeysToString(curCluster.cluster_connClustDict),
"cluster_connNodesDict" : getFrozensetFromConnNodesDict(curCluster.cluster_connNodesDict),
"cluster_containedNodesDict" : getNodeIdListFromContainedNodesDict(curCluster.cluster_containedNodesDict),
})
#outputJSON = json.dumps(outputJSON, default=lambda o: o.__dict__, indent=4)
return outputJSON
def changeDictKeysToString(inputDict):
keys_values = inputDict.items()
outputDict = { str(key): value for key,value in keys_values}
return outputDict
def getNodeIdListFromContainedNodesDict(inputDict):
output = []
for curNode in inputDict.values():
output.append(curNode.uniqueID)
return output
def getFrozensetFromConnNodesDict(inputDict):
output = []
for curNode in inputDict.values():
auxDict = {}
auxDict["node_id"]= curNode.uniqueID
auxDict["node_cluster"] = curNode.cluster_label
output.append(auxDict)
return output
def convertSimilarityDictToJSON(inputDict,runId):
similList = []
for compositeKey in inputDict:
frozensetString =list()
#key is a tuple of cluster_labels
for key in compositeKey:
frozensetString.append(key)
similList.append({
"clusterTuple" : frozensetString,
"similarityValues" : inputDict[compositeKey],
"runId": runId
})
similToJSON = similList
#outputJSON = json.dumps(similToJSON, default=lambda o: o.__dict__, indent=4)
return similToJSON
from db.entities.connected_node import NodeC
from db.entities.connected_cluster import ClusterC
from db.entities.connected_layer import LayerC
from typing import Dict
#from db.repository import Repository
import json
import requests
from routes.clustersets import get_by_nametest.py
from db import repository
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
StartTime: 2020-07-02 12:05:47.067975
FinishTime: 2020-07-02 12:05:54.561853
PopulateWithNewNodes: 2.495718
CalculateWeights: 4.590413
CalculateSimilarity: 0.407747
TotalTime: 7.493878
\ No newline at end of file
StartTime: 2020-07-06 10:43:32.240013
FinishTime: 2020-07-06 10:43:39.110333
PopulateWithNewNodes: 2.399582
CalculateWeights: 4.422768
CalculateSimilarity: 0.04797
TotalTime: 6.87032
RunId: 5f02e43b53a73a48d0eaaed5
\ No newline at end of file
from flask import request, Response
from db.repository import Repository
from db.entities import ClusterSet
from db.entities import clusterset
repo = Repository()
......
from flask import request, Response
from db.repository import Repository
from routes.connRun import add_connected_run
from processing.similarityFiles.miscFunctions import *
repo = Repository()
def add_conn_clusters(inputDict,runId):
''' Stores connected_clusters in the database.
:param Dict() inputDict: Contains the data to insert
:param string runId: Id of the Run
'''
outputJSON = convertLayerDictToJSON(inputDict,runId)
repo.add_connected_clusters(outputJSON)
def get_conn_clusters():
''' Gets connected_clusters from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
'''
result = repo.get_connected_clusters()
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
else:
return result
from flask import request, Response
from db.repository import Repository
from db.entities import connected_run
import datetime
#from db.entities import clusterset #REMOVE?
repo = Repository()
def add_connected_run():
'''
Inserts Run with current Time into the DB
:returns: Returns the _id of the connected_run entry in the DB
:rtype: string
'''
currentTime = datetime.datetime.now()
runDict = {"Datetime" : str(currentTime)}
inserted_result = repo.add_connected_run(runDict)
return str(inserted_result.inserted_id)
def get_connected_run(): ########TODO#################
''' ##TODO## Gets Run from the database.
:returns: Returns Run objects from the DB
:rtype: Dict{_id,datetime}
'''
"""
result = repo.get_connected_clusters()
if result is None or result.retrieved == 0:
print("#### Response 404")
return Response(status=404)
else:
return result
conRun = ConnectedRun(result.sdfsdf)
"""
from flask import request, Response
from db.repository import Repository
from processing.similarityFiles.miscFunctions import convertSimilarityDictToJSON
#from db.entities import clusterset #REMOVE?
repo = Repository()
def add_similarity(inputDict,runId):
''' Stores cluster_similarity in the database.
:param Dict() inputDict: Contains the data to insert
:param string runId: Id of the Run
'''
outputJSON = convertSimilarityDictToJSON(inputDict,runId)
repo.add_similarity(outputJSON)
def get_similarity():
''' Gets cluster_similarity from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
'''
result = repo.get_similarity()
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
else:
return result
"""
for each cluster in the layer
for each other cluster from all the other layers
find the number of connexions
save them into a dictionary (ClusterID(from other layer) -> Nr of connections)
save all the dictionaries in a map? ( ClusterID1 -> dictionary1, ClusterID2 -> dicitonary2 )
have a map per layer? (Nr of maps = nr of layers)
Each cluster has a dictionary of connCluster-> nrConections
Each layer has a dictionary of clusters -> dictionaries of nodes/connections
"""
import os
import sys
import math
import datetime
from typing import Dict
##################AUX
modules_path = '../../../modules/'
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
#### TO BE DELETED #### ^
from db.entities.connected_node import NodeC
from db.entities.connected_cluster import ClusterC
from db.entities.connected_layer import LayerC
from processing.similarityFiles.dataInput import *
from processing.similarityFiles.calculateWeights import *
from processing.similarityFiles.calculateSimilarity import *
from processing.similarityFiles.miscFunctions import *
from processing.similarityFiles.dataOutput import *
from routes.connRun import connected_run
def main():
print("\nEntered Main")
timelist = []
timelist.append(currentTime())#starting time
"""
Current Layers
Price_Layer
FinishedTime_Layer
Destination_Layer
StartingPoint_Layer
Reputation_Layer
StartingTime_Layer
User_Layer
"""
layerNameList = ["Price_Layer","FinishedTime_Layer","Destination_Layer"] #Get it from somewhere else?
limitNrCluster = 20 #per Layer
limitNrNodes = 1000 #per Layer
layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes)
#layerDict = getClusterDataFromSwagger(limitNrCluster,limitNrNodes) #for Swagger, Change URLs inside the function for different input Data
totalNodes = totalNumberOfNodes(layerDict)
print("Nr. of nodes: " +str(totalNodes))
totalClusters = totalNumberOfClusters(layerDict)
print("Nr. of clusters: " + str(totalClusters))
timelist.append(currentTime())
#calculates the weights between the clusters (weight == number of connections) #return is displayed in outputLayerFunction
layerDict = calculateWeights(layerDict)
timelist.append(currentTime())
#calculates the similarity between the clusters #returns dictionary[ tuple(cluster_label1,cluster_label2),
# listOfSimilarity(layer1,layer2,layer3) ]
similarityDict = calculateSimilarity(layerDict)
timelist.append(currentTime()) #Finishing time
#Write to files
runId = add_connected_run()
print("Outputing data")
outputFileLayerFunction(layerDict,totalNodes,totalClusters,runId)
outputFileSimilFunction(similarityDict,totalNodes,totalClusters,runId)
outputFileTimeFunction(timelist,totalNodes,totalClusters,runId)
#Output to DB
outputMongoConnClustDict(layerDict,runId)
outputMongoSimilarity(similarityDict,runId)
#Currently not used, developed for possible future uses
connClustersFromMongo = getConnClusterDataFromMongo()
similarityArrFromMongo = getSimilarityDataFromMOngo()
print("FINISHED")
return
##########START##########
main()
#########FINISH##########
......@@ -17,7 +17,7 @@ class MongoRepositoryBase:
def insert_entry(self, collection_name, content: dict):
collection = self._database[collection_name]
collection.insert_one(content)
return collection.insert_one(content)
def insert_many(self, collection_name, content: list):
collection = self._database[collection_name]
......
......@@ -18,6 +18,6 @@ SEMANTIC_LINKING_DB_PORT = 27017
## Role Stage Discovery
ROLESTAGE_DISCOVERY_HOSTNAME = 'role-stage-discovery'
ROLESTAGE_DISCOVERY_REST_PORT = 80
ROLESTAGE_DISCOVERY_DB_HOSTNAME = f'{ROLESTAGE_DISCOVERY_HOSTNAME}-db'
ROLESTAGE_DISCOVERY_DB_PORT = 27017
\ No newline at end of file
ROLESTAGE_DISCOVERY_REST_PORT = 30103
ROLESTAGE_DISCOVERY_DB_HOSTNAME = f'articonf1.itec.aau.at'
ROLESTAGE_DISCOVERY_DB_PORT = 30104
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment