Commit a039212b authored by Bogdan's avatar Bogdan

Added Batch feature for Similarity GET

parent eab8c906
......@@ -197,8 +197,20 @@ paths:
operationId: "routes.similarity.get_similarity"
tags:
- "Similarity"
summary: "Get data of the similarity between clusters"
description: "Returns a dictionary where the key is a tuple of cluster_labels (i.e. [0,319]) and the value is the computed similarity between 2 clusters in the tuple, in regard to each layer in the input. \n Note: the tuple clusters have the same layer and the computed similarity is in regard to clusters from OTHER layers."
summary: "Get data of the similarity between clusters."
parameters:
- name: "layer_name"
in: "query"
description: "Name of the layer"
required: true
type: "string"
- name: "batchNr"
in: "query"
description: "Batch number (starting from 0)"
required: true
type: "integer"
description: "Data is returned in batches of size 1000. Returns a dictionary where the key is a tuple of cluster_labels (i.e. [0,319]) and the value is the computed similarity between 2 clusters in the tuple, in regard to each layer in the input. \n Note: the tuple clusters have the same layer and the computed similarity is in regard to clusters from OTHER layers."
responses:
200:
description: "Successful operation"
......@@ -396,7 +408,7 @@ definitions:
example: "1"
cluster_layer:
type: string
example: "Price_layer"
example: "Price_Layer"
similarityValues:
type: object
......
......@@ -115,17 +115,28 @@ class Repository(MongoRepositoryBase):
#super().insert_entry(self._connected_clusters_collection, outputJSON)
return result
def get_similarity(self, run_id: str=None):
def get_similarity(self,skipNr,batchSize, cluster_layer: str= None, run_id: str=None):
''' Get Similarity Data from DB '''
if (run_id == None):
entries = super().get_entries(self._similarity_collection, projection={'_id': 0})
if(cluster_layer == None):
entries = super().get_entries(self._similarity_collection, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'cluster_layer' : cluster_layer}, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'runId' : run_id}, projection={'_id': 0})
if(cluster_layer == None):
entries = super().get_entries(self._similarity_collection, selection={'runId' : run_id}, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'cluster_layer' : cluster_layer, 'runId' : run_id}, projection={'_id': 0})
#
return list(entries.sort([('_id', -1)]).skip(skipNr).limit(batchSize))
"""
output = []
for e in entries:
output.append(e)
return output
"""
#endregion
......
......@@ -186,7 +186,7 @@ def getConnClusterDataFromMongo():
return outputDict
def getSimilarityDataFromMongo():
def getSimilarityDataFromMongo(cluster_layer: str= None, batchSize: int=1000, run_id: str=None):
'''
Gets the computed Similarity between clusters data from the MongoDB database. The data is found in the collection "similarity"
......@@ -194,7 +194,15 @@ def getSimilarityDataFromMongo():
:rtype: Dict
'''
result = repo.get_similarity()
skipNr = 0
result = []
batchResult = repo.get_similarity(skipNr,batchSize,cluster_layer,run_id)
result.extend(batchResult)
while len(batchResult) == batchSize:
skipNr += batchSize
batchResult = repo.get_similarity(skipNr,batchSize,cluster_layer,run_id)
result.extend(batchResult)
return result
def getConnectedRunDataFromMongo():
......
This source diff could not be displayed because it is too large. You can view the blob instead.
StartTime: 2020-07-07 16:55:42.418309
FinishTime: 2020-07-07 16:55:49.746628
PopulateWithNewNodes: 2.321926
CalculateWeights: 4.499367
CalculateSimilarity: 0.507026
TotalTime: 7.328319
RunId: 5f048cf587e0ee319fa894ed
\ No newline at end of file
StartTime: 2020-07-07 14:57:32.942331
FinishTime: 2020-07-07 14:57:39.489324
PopulateWithNewNodes: 2.102823
CalculateWeights: 4.382948
CalculateSimilarity: 0.061222
TotalTime: 6.546993
RunId: 5f0471438b27390711e31c70
\ No newline at end of file
......@@ -5,13 +5,19 @@ from processing.similarityFiles.miscFunctions import convertSimilarityDictToJSON
repo = Repository()
def get_similarity():
def get_similarity(layer_name,batchNr):
''' Gets cluster_similarity from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
'''
result = repo.get_similarity()
batchSize = 1000
if int(batchNr)<0:
print("Batch number needs to be a positive integer")
return Response(status=404)
skipNr = batchSize*int(batchNr)
#get_similarity(self,skipNr,batchSize, cluster_layer: str= None, run_id: str=None)
result = repo.get_similarity(skipNr, batchSize, layer_name)
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
......
......@@ -55,9 +55,10 @@ def main():
User_Layer
"""
layerNameList = ["Price_Layer","FinishedTime_Layer","Destination_Layer"] #Get it from somewhere else?
limitNrCluster = 20 #per Layer
limitNrNodes = 1000 #per Layer
limitNrCluster = -1 #per Layer # 0< equals noLimit
limitNrNodes = -1 #per Layer
"""
layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes)
#URLlist = None
......@@ -93,13 +94,15 @@ def main():
#Output to DB
outputMongoConnClustDict(layerDict,runId)
outputMongoSimilarity(similarityDict,runId)
"""
#Currently not used in the calculation of connections/similarity, developed for possible future uses
connClustersFromMongo = getConnClusterDataFromMongo()
similarityDictFromMongo = calculateSimilarity(connClustersFromMongo)
#connClustersFromMongo = getConnClusterDataFromMongo()
#similarityDictFromMongo = calculateSimilarity(connClustersFromMongo)
similarityArrFromMongo = getSimilarityDataFromMongo()
#similarityArrFromMongo = getSimilarityDataFromMongo("Price_Layer") # only 220 similarities, but there are about 20 clusters total
#similarityArrFromMongo = getSimilarityDataFromMongo("Destination_Layer") # ~2.500k similarities
similarityArrFromMongo = getSimilarityDataFromMongo("FinishedTime_Layer")# should have the rest of similarities => 15.000k
connectedRunFromMongo = getConnectedRunDataFromMongo()
......@@ -110,6 +113,6 @@ def main():
def test():
testInputData()
##########START##########
#main()
test()
main()
#test()
#########FINISH##########
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment