Commit a039212b authored by Bogdan's avatar Bogdan

Added Batch feature for Similarity GET

parent eab8c906
......@@ -197,8 +197,20 @@ paths:
operationId: "routes.similarity.get_similarity"
tags:
- "Similarity"
summary: "Get data of the similarity between clusters"
description: "Returns a dictionary where the key is a tuple of cluster_labels (i.e. [0,319]) and the value is the computed similarity between 2 clusters in the tuple, in regard to each layer in the input. \n Note: the tuple clusters have the same layer and the computed similarity is in regard to clusters from OTHER layers."
summary: "Get data of the similarity between clusters."
parameters:
- name: "layer_name"
in: "query"
description: "Name of the layer"
required: true
type: "string"
- name: "batchNr"
in: "query"
description: "Batch number (starting from 0)"
required: true
type: "integer"
description: "Data is returned in batches of size 1000. Returns a dictionary where the key is a tuple of cluster_labels (i.e. [0,319]) and the value is the computed similarity between 2 clusters in the tuple, in regard to each layer in the input. \n Note: the tuple clusters have the same layer and the computed similarity is in regard to clusters from OTHER layers."
responses:
200:
description: "Successful operation"
......@@ -396,7 +408,7 @@ definitions:
example: "1"
cluster_layer:
type: string
example: "Price_layer"
example: "Price_Layer"
similarityValues:
type: object
......
......@@ -115,17 +115,28 @@ class Repository(MongoRepositoryBase):
#super().insert_entry(self._connected_clusters_collection, outputJSON)
return result
def get_similarity(self, run_id: str=None):
def get_similarity(self,skipNr,batchSize, cluster_layer: str= None, run_id: str=None):
''' Get Similarity Data from DB '''
if (run_id == None):
entries = super().get_entries(self._similarity_collection, projection={'_id': 0})
if(cluster_layer == None):
entries = super().get_entries(self._similarity_collection, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'cluster_layer' : cluster_layer}, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'runId' : run_id}, projection={'_id': 0})
if(cluster_layer == None):
entries = super().get_entries(self._similarity_collection, selection={'runId' : run_id}, projection={'_id': 0})
else:
entries = super().get_entries(self._similarity_collection, selection={'cluster_layer' : cluster_layer, 'runId' : run_id}, projection={'_id': 0})
#
return list(entries.sort([('_id', -1)]).skip(skipNr).limit(batchSize))
"""
output = []
for e in entries:
output.append(e)
return output
"""
#endregion
......
......@@ -186,7 +186,7 @@ def getConnClusterDataFromMongo():
return outputDict
def getSimilarityDataFromMongo():
def getSimilarityDataFromMongo(cluster_layer: str= None, batchSize: int=1000, run_id: str=None):
'''
Gets the computed Similarity between clusters data from the MongoDB database. The data is found in the collection "similarity"
......@@ -194,7 +194,15 @@ def getSimilarityDataFromMongo():
:rtype: Dict
'''
result = repo.get_similarity()
skipNr = 0
result = []
batchResult = repo.get_similarity(skipNr,batchSize,cluster_layer,run_id)
result.extend(batchResult)
while len(batchResult) == batchSize:
skipNr += batchSize
batchResult = repo.get_similarity(skipNr,batchSize,cluster_layer,run_id)
result.extend(batchResult)
return result
def getConnectedRunDataFromMongo():
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
StartTime: 2020-07-07 16:55:42.418309
FinishTime: 2020-07-07 16:55:49.746628
PopulateWithNewNodes: 2.321926
CalculateWeights: 4.499367
CalculateSimilarity: 0.507026
TotalTime: 7.328319
RunId: 5f048cf587e0ee319fa894ed
\ No newline at end of file
StartTime: 2020-07-07 14:57:32.942331
FinishTime: 2020-07-07 14:57:39.489324
PopulateWithNewNodes: 2.102823
CalculateWeights: 4.382948
CalculateSimilarity: 0.061222
TotalTime: 6.546993
RunId: 5f0471438b27390711e31c70
\ No newline at end of file
......@@ -5,13 +5,19 @@ from processing.similarityFiles.miscFunctions import convertSimilarityDictToJSON
repo = Repository()
def get_similarity():
def get_similarity(layer_name,batchNr):
''' Gets cluster_similarity from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
'''
result = repo.get_similarity()
batchSize = 1000
if int(batchNr)<0:
print("Batch number needs to be a positive integer")
return Response(status=404)
skipNr = batchSize*int(batchNr)
#get_similarity(self,skipNr,batchSize, cluster_layer: str= None, run_id: str=None)
result = repo.get_similarity(skipNr, batchSize, layer_name)
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
......
......@@ -55,9 +55,10 @@ def main():
User_Layer
"""
layerNameList = ["Price_Layer","FinishedTime_Layer","Destination_Layer"] #Get it from somewhere else?
limitNrCluster = 20 #per Layer
limitNrNodes = 1000 #per Layer
limitNrCluster = -1 #per Layer # 0< equals noLimit
limitNrNodes = -1 #per Layer
"""
layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes)
#URLlist = None
......@@ -93,13 +94,15 @@ def main():
#Output to DB
outputMongoConnClustDict(layerDict,runId)
outputMongoSimilarity(similarityDict,runId)
"""
#Currently not used in the calculation of connections/similarity, developed for possible future uses
connClustersFromMongo = getConnClusterDataFromMongo()
similarityDictFromMongo = calculateSimilarity(connClustersFromMongo)
#connClustersFromMongo = getConnClusterDataFromMongo()
#similarityDictFromMongo = calculateSimilarity(connClustersFromMongo)
similarityArrFromMongo = getSimilarityDataFromMongo()
#similarityArrFromMongo = getSimilarityDataFromMongo("Price_Layer") # only 220 similarities, but there are about 20 clusters total
#similarityArrFromMongo = getSimilarityDataFromMongo("Destination_Layer") # ~2.500k similarities
similarityArrFromMongo = getSimilarityDataFromMongo("FinishedTime_Layer")# should have the rest of similarities => 15.000k
connectedRunFromMongo = getConnectedRunDataFromMongo()
......@@ -110,6 +113,6 @@ def main():
def test():
testInputData()
##########START##########
#main()
test()
main()
#test()
#########FINISH##########
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment