Commit a039212b authored by Bogdan's avatar Bogdan

Added Batch feature for Similarity GET

parent eab8c906
...@@ -197,8 +197,20 @@ paths: ...@@ -197,8 +197,20 @@ paths:
operationId: "routes.similarity.get_similarity" operationId: "routes.similarity.get_similarity"
tags: tags:
- "Similarity" - "Similarity"
summary: "Get data of the similarity between clusters" summary: "Get data of the similarity between clusters."
description: "Returns a dictionary where the key is a tuple of cluster_labels (i.e. [0,319]) and the value is the computed similarity between 2 clusters in the tuple, in regard to each layer in the input. \n Note: the tuple clusters have the same layer and the computed similarity is in regard to clusters from OTHER layers." parameters:
- name: "layer_name"
in: "query"
description: "Name of the layer"
required: true
type: "string"
- name: "batchNr"
in: "query"
description: "Batch number (starting from 0)"
required: true
type: "integer"
description: "Data is returned in batches of size 1000. Returns a dictionary where the key is a tuple of cluster_labels (i.e. [0,319]) and the value is the computed similarity between 2 clusters in the tuple, in regard to each layer in the input. \n Note: the tuple clusters have the same layer and the computed similarity is in regard to clusters from OTHER layers."
responses: responses:
200: 200:
description: "Successful operation" description: "Successful operation"
...@@ -396,7 +408,7 @@ definitions: ...@@ -396,7 +408,7 @@ definitions:
example: "1" example: "1"
cluster_layer: cluster_layer:
type: string type: string
example: "Price_layer" example: "Price_Layer"
similarityValues: similarityValues:
type: object type: object
......
...@@ -115,17 +115,28 @@ class Repository(MongoRepositoryBase): ...@@ -115,17 +115,28 @@ class Repository(MongoRepositoryBase):
#super().insert_entry(self._connected_clusters_collection, outputJSON) #super().insert_entry(self._connected_clusters_collection, outputJSON)
return result return result
def get_similarity(self, run_id: str=None): def get_similarity(self,skipNr,batchSize, cluster_layer: str= None, run_id: str=None):
''' Get Similarity Data from DB ''' ''' Get Similarity Data from DB '''
if (run_id == None): if (run_id == None):
entries = super().get_entries(self._similarity_collection, projection={'_id': 0}) if(cluster_layer == None):
entries = super().get_entries(self._similarity_collection, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'cluster_layer' : cluster_layer}, projection={'_id': 0})
else: else:
entries = super().get_entries(self._similarity_collection, selection={'runId' : run_id}, projection={'_id': 0}) if(cluster_layer == None):
entries = super().get_entries(self._similarity_collection, selection={'runId' : run_id}, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'cluster_layer' : cluster_layer, 'runId' : run_id}, projection={'_id': 0})
#
return list(entries.sort([('_id', -1)]).skip(skipNr).limit(batchSize))
"""
output = [] output = []
for e in entries: for e in entries:
output.append(e) output.append(e)
return output return output
"""
#endregion #endregion
......
...@@ -186,7 +186,7 @@ def getConnClusterDataFromMongo(): ...@@ -186,7 +186,7 @@ def getConnClusterDataFromMongo():
return outputDict return outputDict
def getSimilarityDataFromMongo(): def getSimilarityDataFromMongo(cluster_layer: str= None, batchSize: int=1000, run_id: str=None):
''' '''
Gets the computed Similarity between clusters data from the MongoDB database. The data is found in the collection "similarity" Gets the computed Similarity between clusters data from the MongoDB database. The data is found in the collection "similarity"
...@@ -194,7 +194,15 @@ def getSimilarityDataFromMongo(): ...@@ -194,7 +194,15 @@ def getSimilarityDataFromMongo():
:rtype: Dict :rtype: Dict
''' '''
result = repo.get_similarity() skipNr = 0
result = []
batchResult = repo.get_similarity(skipNr,batchSize,cluster_layer,run_id)
result.extend(batchResult)
while len(batchResult) == batchSize:
skipNr += batchSize
batchResult = repo.get_similarity(skipNr,batchSize,cluster_layer,run_id)
result.extend(batchResult)
return result return result
def getConnectedRunDataFromMongo(): def getConnectedRunDataFromMongo():
......
This source diff could not be displayed because it is too large. You can view the blob instead.
StartTime: 2020-07-07 16:55:42.418309
FinishTime: 2020-07-07 16:55:49.746628
PopulateWithNewNodes: 2.321926
CalculateWeights: 4.499367
CalculateSimilarity: 0.507026
TotalTime: 7.328319
RunId: 5f048cf587e0ee319fa894ed
\ No newline at end of file
StartTime: 2020-07-07 14:57:32.942331
FinishTime: 2020-07-07 14:57:39.489324
PopulateWithNewNodes: 2.102823
CalculateWeights: 4.382948
CalculateSimilarity: 0.061222
TotalTime: 6.546993
RunId: 5f0471438b27390711e31c70
\ No newline at end of file
...@@ -5,13 +5,19 @@ from processing.similarityFiles.miscFunctions import convertSimilarityDictToJSON ...@@ -5,13 +5,19 @@ from processing.similarityFiles.miscFunctions import convertSimilarityDictToJSON
repo = Repository() repo = Repository()
def get_similarity(): def get_similarity(layer_name,batchNr):
''' Gets cluster_similarity from the database. ''' Gets cluster_similarity from the database.
:returns: Returns similarity objects from the DB :returns: Returns similarity objects from the DB
:rtype: Dict :rtype: Dict
''' '''
result = repo.get_similarity() batchSize = 1000
if int(batchNr)<0:
print("Batch number needs to be a positive integer")
return Response(status=404)
skipNr = batchSize*int(batchNr)
#get_similarity(self,skipNr,batchSize, cluster_layer: str= None, run_id: str=None)
result = repo.get_similarity(skipNr, batchSize, layer_name)
if result is None or len(result) == 0: if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404") print("MongoDb Get Error: Response 404")
return Response(status=404) return Response(status=404)
......
...@@ -55,9 +55,10 @@ def main(): ...@@ -55,9 +55,10 @@ def main():
User_Layer User_Layer
""" """
layerNameList = ["Price_Layer","FinishedTime_Layer","Destination_Layer"] #Get it from somewhere else? layerNameList = ["Price_Layer","FinishedTime_Layer","Destination_Layer"] #Get it from somewhere else?
limitNrCluster = 20 #per Layer limitNrCluster = -1 #per Layer # 0< equals noLimit
limitNrNodes = 1000 #per Layer limitNrNodes = -1 #per Layer
"""
layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes) layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes)
#URLlist = None #URLlist = None
...@@ -93,13 +94,15 @@ def main(): ...@@ -93,13 +94,15 @@ def main():
#Output to DB #Output to DB
outputMongoConnClustDict(layerDict,runId) outputMongoConnClustDict(layerDict,runId)
outputMongoSimilarity(similarityDict,runId) outputMongoSimilarity(similarityDict,runId)
"""
#Currently not used in the calculation of connections/similarity, developed for possible future uses #Currently not used in the calculation of connections/similarity, developed for possible future uses
connClustersFromMongo = getConnClusterDataFromMongo() #connClustersFromMongo = getConnClusterDataFromMongo()
similarityDictFromMongo = calculateSimilarity(connClustersFromMongo) #similarityDictFromMongo = calculateSimilarity(connClustersFromMongo)
similarityArrFromMongo = getSimilarityDataFromMongo() #similarityArrFromMongo = getSimilarityDataFromMongo("Price_Layer") # only 220 similarities, but there are about 20 clusters total
#similarityArrFromMongo = getSimilarityDataFromMongo("Destination_Layer") # ~2.500k similarities
similarityArrFromMongo = getSimilarityDataFromMongo("FinishedTime_Layer")# should have the rest of similarities => 15.000k
connectedRunFromMongo = getConnectedRunDataFromMongo() connectedRunFromMongo = getConnectedRunDataFromMongo()
...@@ -110,6 +113,6 @@ def main(): ...@@ -110,6 +113,6 @@ def main():
def test(): def test():
testInputData() testInputData()
##########START########## ##########START##########
#main() main()
test() #test()
#########FINISH########## #########FINISH##########
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment