Commit 8f11d009 authored by Bogdan's avatar Bogdan

Refactored Rolestage Endpoints and documentation

Added documentation and changed the Endpoints to match Rest conventions, added query attributes for use-case, tables  and layers. Also modified a bit of repository and calculate similarity code to work with the refactoring/adding of use-cases and tables
parent 20247ffe
......@@ -51,6 +51,31 @@ This microservice contains the nodes from the transactions preprocessed as defin
# Role Stage Discovery Microservice
This microservice contains the communities based on clusters and similarities between communities. It additionally contains time slices with subsets of clusters, which's transaction happened in the corresponding time.
This microservice contains the communities based on clusters and similarities between communities. It additionally contains time slices with subsets of clusters, which's transaction happened in the corresponding time. Schemas and Input data are supplied by the [Business Logic microservice](, [Semantic Linking microservice]( and [Trace Retrieval microservice](
The endpoints are currently refactored, so please check the Swagger UI autogenerated documentation on its website.
\ No newline at end of file
## RunId
When a similarity computation is executed, it has an associated RunId which is used to uniquely identify that execution.
```GET``` returns all RunIds in the db.
## Similarity
Returns the computed similarity. Two clusters belonging to the SAME layer will be given a similarity value by comparing them to another cluster belonging to a DIFFERENT layer. This is done for every cluster in the input data. This querry returns all the calculated similarity values, given the criteria (i.e belonging to a use-case,table etc).
```GET{use_case}/tables/{table}/clusterSimilarity``` returns all similarity values for the given use-case and table.
## Connected Cluster
Intermediary data-structure used only by the function which computes the similarity. Clusters are connected only to other clusters belonging to a DIFFERENT layer.
```GET{use_case}/tables{table}/connectedClusters``` returns all connected clusters for the given use-case and table.
## Input and Schemas
Returns the Schemas and/or Input data used for calculating the clustering which is further used for calculating the similarity.
```GET{use_case}/layers:``` returns layer schemas for the given use-case.
```GET{use_case}/tables{use_case_table}/layers/{layer_name}/clusters:``` returns the processed cluster input data.
......@@ -130,7 +130,49 @@ class Repository(MongoRepositoryBase):
if (run_id == None):
entries = super().get_entries(self._connected_clusters_collection, projection={'_id': 0})
entries = super().get_entries(self._similarity_collection, selection={'cluster_runId' : run_id}, projection={'_id': 0})
entries = super().get_entries(self._connected_clusters_collection, selection={'cluster_runId' : run_id}, projection={'_id': 0})
output = []
for ent in entries:
return output
# print(ent)
#return [Cluster(cluster_dict=e, from_db=True) for e in entries]
def get_connected_clusters_for_use_case(self,use_case, run_id: str=None):#, layer_name: str):
''' Get Connected Clusters Data given the Use-Case from DB '''
if (run_id == None):
entries = super().get_entries(self._connected_clusters_collection, selection={'cluster_use_case': use_case}, projection={'_id': 0})
entries = super().get_entries(self._connected_clusters_collection, selection={'cluster_runId' : run_id, 'cluster_use_case': use_case}, projection={'_id': 0})
output = []
for ent in entries:
return output
# print(ent)
#return [Cluster(cluster_dict=e, from_db=True) for e in entries]
def get_connected_clusters_for_table(self,use_case,table, run_id: str=None):#, layer_name: str):
''' Get Connected Clusters Data given the Use-Case and Table from DB '''
if (run_id == None):
entries = super().get_entries(self._connected_clusters_collection, selection={'cluster_use_case': use_case,'cluster_table': table}, projection={'_id': 0})
entries = super().get_entries(self._connected_clusters_collection, selection={'cluster_runId' : run_id,'cluster_use_case': use_case,'cluster_table': table}, projection={'_id': 0})
output = []
for ent in entries:
return output
# print(ent)
#return [Cluster(cluster_dict=e, from_db=True) for e in entries]
def get_connected_clusters_by_name(self,use_case, table, layer_name, run_id: str=None):#, layer_name: str):
''' Get Connected Clusters Data from DB '''
if (run_id == None):
entries = super().get_entries(self._connected_clusters_collection, selection={'cluster_use_case': use_case,'cluster_table': table, 'cluster_layer' : layer_name}, projection={'_id': 0})
entries = super().get_entries(self._connected_clusters_collection, selection={'cluster_runId' : run_id,'cluster_use_case': use_case,'cluster_table': table, 'cluster_layer' : layer_name}, projection={'_id': 0})
output = []
for ent in entries:
......@@ -175,8 +217,38 @@ class Repository(MongoRepositoryBase):
return output
def get_similarity_use_case(self,skipNr,batchSize,use_case, run_id: str=None):
''' Get Similarity Data from DB '''
if (run_id == None):
entries = super().get_entries(self._similarity_collection, selection={'use_case' : use_case}, projection={'_id': 0})
entries = super().get_entries(self._similarity_collection, selection={'use_case' : use_case, 'runId' : run_id}, projection={'_id': 0})
return list(entries.sort([('_id', -1)]).skip(skipNr).limit(batchSize))
def get_similarity_table(self,skipNr,batchSize,use_case,table, run_id: str=None):
''' Get Similarity Data from DB '''
if (run_id == None):
entries = super().get_entries(self._similarity_collection, selection={'use_case' : use_case, 'table': table}, projection={'_id': 0})
entries = super().get_entries(self._similarity_collection, selection={'use_case' : use_case, 'table': table, 'runId' : run_id}, projection={'_id': 0})
return list(entries.sort([('_id', -1)]).skip(skipNr).limit(batchSize))
def get_similarity_layer(self,skipNr,batchSize,use_case,table,layer, run_id: str=None):
''' Get Similarity Data from DB '''
if (run_id == None):
entries = super().get_entries(self._similarity_collection, selection={'use_case' : use_case, 'table': table, 'cluster_layer' : layer}, projection={'_id': 0})
entries = super().get_entries(self._similarity_collection, selection={'use_case' : use_case, 'table': table, 'cluster_layer' : layer, 'runId' : run_id}, projection={'_id': 0})
return list(entries.sort([('_id', -1)]).skip(skipNr).limit(batchSize))
#region connected_run
......@@ -67,7 +67,7 @@ def loadJson(url:str) :
return jsonData
def getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes):
def getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes,use_case,table):
''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
:param List[string] layerNameList: Name of the layers to pull from the DB
......@@ -93,7 +93,7 @@ def getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes):
#imports and translates the data from JSON into usefull format
#returns layerdiction -> Layer -> clusterDict -> Cluster -> nodesDict -> Nodes
for name in layerNameList:
newData = get_mongoDB_cluster_by_layerName(name)#repo.get_clusters_for_layer(name)
newData = get_mongoDB_cluster_by_layerName(use_case,table,name)#repo.get_clusters_for_layer(name)
if newData is not None and len(newData) != 0:
layerDict = populateWithNewNodesSingleLayer(newData[0:limitNrCluster],layerDict,limitNrNodes)
......@@ -290,7 +290,7 @@ def makeChangeNodesDict(inputList,cluster_label,cluster_layer):
outputDict[key]= newNode
return outputDict
def get_mongoDB_cluster_by_layerName(name):
res = repo.get_clusters_for_layer(name)
def get_mongoDB_cluster_by_layerName(use_case, table , layer_name):
res = repo.get_clusters_for_layer(use_case, table, layer_name)
return [c.to_serializable_dict() for c in res]
......@@ -6,7 +6,7 @@ from processing.similarityFiles.miscFunctions import *
from db.repository import Repository
repo = Repository()
def outputFileLayerFunction(layerDict,limitNrNodes,limitNrCluster,runId):
def outputFileLayerFunction(layerDict,limitNrNodes,limitNrCluster,runId,table,use_case):
''' Writes the layerDict data to a JSON file.
:param Dict{string: Layer} layerDict: Object which contains Data about the Layers, Clusters and Nodes
......@@ -17,7 +17,7 @@ def outputFileLayerFunction(layerDict,limitNrNodes,limitNrCluster,runId):
layerJSON = convertLayerDictToJSON(layerDict,runId)
layerJSON = convertLayerDictToJSON(layerDict,runId,table,use_case)
outputJSON = json.dumps(layerJSON, default=lambda o: o.__dict__, indent=4)
......@@ -28,7 +28,7 @@ def outputFileLayerFunction(layerDict,limitNrNodes,limitNrCluster,runId):
def outputFileSimilFunction(similarityDict,limitNrNodes,limitNrCluster,runId):
def outputFileSimilFunction(similarityDict,limitNrNodes,limitNrCluster,runId,table,use_case):
''' Writes the similarityDict data to a JSON file.
......@@ -40,7 +40,7 @@ def outputFileSimilFunction(similarityDict,limitNrNodes,limitNrCluster,runId):
similJSON = convertSimilarityDictToJSON(similarityDict,runId)
similJSON = convertSimilarityDictToJSON(similarityDict,runId,table,use_case)
outputJSON = json.dumps(similJSON, default=lambda o: o.__dict__, indent=4)
......@@ -77,7 +77,7 @@ def outputFileTimeFunction(timelist,limitNrNodes,limitNrCluster,runId):
print("Error occured when writing the resultTimeExec file")
def outputMongoConnClustDict(inputDict,runId):
def outputMongoConnClustDict(inputDict,runId,table,use_case):
''' Stores connected_clusters in the database.
......@@ -89,9 +89,9 @@ def outputMongoConnClustDict(inputDict,runId):
#inputDict["Timestamp"] = str(
def outputMongoSimilarity(inputDict,runId):
def outputMongoSimilarity(inputDict,runId,table,use_case):
''' Stores cluster_similarity in the database.
:param Dict() inputDict: Contains the data to insert
......@@ -99,7 +99,7 @@ def outputMongoSimilarity(inputDict,runId):
:param string runId: Id of the Run
def add_connected_run():
......@@ -116,7 +116,7 @@ def add_connected_run():
inserted_result = repo.add_connected_run(runDict)
return str(inserted_result.inserted_id)
def add_conn_clusters(inputDict,runId):
def add_conn_clusters(inputDict,runId,table,use_case):
''' Stores connected_clusters in the database.
:param Dict() inputDict: Contains the data to insert
......@@ -125,11 +125,11 @@ def add_conn_clusters(inputDict,runId):
outputJSON = convertLayerDictToJSON(inputDict,runId)
outputJSON = convertLayerDictToJSON(inputDict,runId,table,use_case)
for element in outputJSON:
def add_similarity(inputDict,runId):
def add_similarity(inputDict,runId,table,use_case):
''' Stores cluster_similarity in the database.
:param Dict() inputDict: Contains the data to insert
......@@ -138,6 +138,6 @@ def add_similarity(inputDict,runId):
outputJSON = convertSimilarityDictToJSON(inputDict,runId)
outputJSON = convertSimilarityDictToJSON(inputDict,runId,table,use_case)
for element in outputJSON:
\ No newline at end of file
......@@ -42,7 +42,7 @@ def totalNumberOfClusters(inputLayerDict):
return clustCount
def convertLayerDictToJSON(layerDict, runId):
def convertLayerDictToJSON(layerDict, runId,table,use_case):
''' Converts a Layer object to JSON format.
:param Dict{string: Layer} layerDict: Object which contains Data about the Layers, Clusters and Nodes
......@@ -56,7 +56,9 @@ def convertLayerDictToJSON(layerDict, runId):
for curCluster in curLayer.cluster_Dict.values():
"cluster_label" : curCluster.cluster_label,
"cluster_layer" : curCluster.cluster_layer,
"cluster_layer" : curCluster.cluster_layer,
"cluster_table" : table,
"cluster_use_case": use_case,
"cluster_runId" : runId,
"cluster_connClustDict" : changeTupleDictToDictList(curCluster.cluster_connClustDict),
"cluster_connNodesDict" : getFrozensetFromConnNodesDict(curCluster.cluster_connNodesDict), #Don
......@@ -109,7 +111,7 @@ def getFrozensetFromConnNodesDict(inputDict):
return output
def convertSimilarityDictToJSON(inputDict,runId):
def convertSimilarityDictToJSON(inputDict,runId,table,use_case):
''' Converts a Similarity Dictionary to JSON format. For outputting to DB
:param Dict{} similarityDict: Object which contains Data about the Computed similarities between Clusters
......@@ -125,6 +127,8 @@ def convertSimilarityDictToJSON(inputDict,runId):
auxDict["cluster_layer"] = tupleKey[2]
auxDict["similarityValues"] = inputDict[tupleKey]
auxDict["runId"] = runId
auxDict["table"] = table
auxDict["use_case"] = use_case
similToJSON = similList
#outputJSON = json.dumps(similToJSON, default=lambda o: o.__dict__, indent=4)
......@@ -39,7 +39,7 @@ from processing.similarityFiles.dataOutput import *
outputToFileFLAG = True
def main(layerNameList:List[str] = ["Price_Layer","FinishedTime_Layer","Destination_Layer"]):
def main(layerNameList:List[str] , table:str , use_case: str):
Executes the similarity calculation by calculating weights between clusters in different layers.
Then calculating the Euclidean distance between nodes in the same layer based on one other layer each.
......@@ -48,7 +48,8 @@ def main(layerNameList:List[str] = ["Price_Layer","FinishedTime_Layer","Destinat
:param layerNameList: The list of layer names as strings
print("Entered Similarity Main")
if len(layerNameList)==0:
timelist = []
timelist.append(currentTime())#starting time
......@@ -67,7 +68,7 @@ def main(layerNameList:List[str] = ["Price_Layer","FinishedTime_Layer","Destinat
limitNrNodes = -1 #per Layer
layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes)
layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes,use_case,table)
if layerDict is None or len(layerDict) == 0:
LOGGER.error(f"No data for any of the following layers existed: {str(layerNameList)}. Similarity calculation was not performed.")
......@@ -98,13 +99,13 @@ def main(layerNameList:List[str] = ["Price_Layer","FinishedTime_Layer","Destinat
if (outputToFileFLAG == True):
print("Outputing data")
#Output to DB
#Currently not used in the calculation of connections/similarity, developed for possible future uses
......@@ -122,6 +123,6 @@ def main(layerNameList:List[str] = ["Price_Layer","FinishedTime_Layer","Destinat
if __name__ is '__main__':
#if __name__ is '__main__':
......@@ -4,8 +4,8 @@ from db.entities import ClusterSet
repo = Repository()
def get_by_name(use_case, use_case_table, name):
res = repo.get_clusters_for_layer(use_case, use_case_table, name)
def get_by_name(use_case, table, layer_name):
res = repo.get_clusters_for_layer(use_case, table, layer_name)
if res is None or len(res) == 0:
return Response(status=404)
......@@ -16,3 +16,45 @@ def get_conn_clusters():
return result
def get_conn_clusters_use_case(use_case):
''' Gets connected_clusters from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
result = repo.get_connected_clusters_for_use_case(use_case)
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
return result
def get_conn_clusters_table(use_case,table):
''' Gets connected_clusters from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
result = repo.get_connected_clusters_for_table(use_case, table)
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
return result
def get_conn_clusters_name(use_case,table,layer_name):
''' Gets connected_clusters from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
result = repo.get_connected_clusters_by_name(use_case,table,layer_name)
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
return result
......@@ -26,15 +26,15 @@ def get_by_use_case(use_case):
return Response(status=404)
def get_by_table(use_case, use_case_table):
res = repo.get_layers_for_table(use_case, use_case_table)
def get_by_table(use_case, table):
res = repo.get_layers_for_table(use_case, table)
if len(res) > 0:
return [l.to_serializable_dict() for l in res]
return Response(status=404)
def get_by_name(use_case, use_case_table, name):
res = repo.get_layer_by_name(use_case, use_case_table, name)
def get_by_name(use_case, table, layer_name):
res = repo.get_layer_by_name(use_case, table, layer_name)
if res is not None:
return res.to_serializable_dict()
......@@ -43,8 +43,8 @@ def get_by_name(use_case, use_case_table, name):
#region nodes
def get_nodes(use_case, use_case_table, name):
res = repo.get_layer_nodes(use_case, use_case_table, name)
def get_nodes(use_case, table, layer_name):
res = repo.get_layer_nodes(use_case, table, layer_name)
# print(res)
return res
......@@ -23,3 +23,60 @@ def get_similarity(layer_name,batchNr):
return Response(status=404)
return result
def get_similarity_use_case(use_case,batchNr):
''' Gets cluster_similarity from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
batchSize = 1000
if int(batchNr)<0:
print("Batch number needs to be a positive integer")
return Response(status=404)
skipNr = batchSize*int(batchNr)
#get_similarity(self,skipNr,batchSize, cluster_layer: str= None, run_id: str=None)
result = repo.get_similarity_use_case(skipNr, batchSize, use_case)
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
return result
def get_similarity_table(use_case,table,batchNr):
''' Gets cluster_similarity from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
batchSize = 1000
if int(batchNr)<0:
print("Batch number needs to be a positive integer")
return Response(status=404)
skipNr = batchSize*int(batchNr)
#get_similarity(self,skipNr,batchSize, cluster_layer: str= None, run_id: str=None)
result = repo.get_similarity_table(skipNr, batchSize, use_case,table)
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
return result
def get_similarity_layer(use_case,table,layer_name,batchNr):
''' Gets cluster_similarity from the database.
:returns: Returns similarity objects from the DB
:rtype: Dict
batchSize = 1000
if int(batchNr)<0:
print("Batch number needs to be a positive integer")
return Response(status=404)
skipNr = batchSize*int(batchNr)
#get_similarity(self,skipNr,batchSize, cluster_layer: str= None, run_id: str=None)
result = repo.get_similarity_layer(skipNr, batchSize,use_case,table, layer_name)
if result is None or len(result) == 0:
print("MongoDb Get Error: Response 404")
return Response(status=404)
return result
......@@ -4,8 +4,8 @@ from db.entities import TimeSlice
repo = Repository()
def get_by_name(use_case, use_case_table, name):
res = repo.get_time_slices_by_name(use_case, use_case_table, name)
def get_by_name(use_case, table, layer_name):
res = repo.get_time_slices_by_name(use_case, table, layer_name)
if res is not None and len(res) != 0:
return [e.to_serializable_dict() for e in res]
......@@ -7,17 +7,38 @@ repo = Repository()
def run_similarity_calc_per_use_case():
layers = repo.get_layers()
uc_layers = {}
# uc_layers = {}
# for layer in layers:
# uc = layer.use_case
# if uc not in uc_layers:
# uc_layers[uc] = []
# uc_layers[uc].append(layer.layer_name)
# for key in uc_layers:
# layers2 = uc_layers[key]
# print(f"Running for use case {key} with layers {str(layers2)}.")
# SimilarityCalc.main(layerNameList=layers2)
uc_dict = dict()
# use_case[table[layer_name]]
for layer in layers:
uc = layer.use_case
if uc not in uc_layers:
uc_layers[uc] = []
for key in uc_layers:
layers = uc_layers[key]
print(f"Running for use case {key} with layers {str(layers)}.")
use_case = layer.use_case
table = layer.use_case_table
if use_case not in uc_dict:
uc_dict[use_case] = dict()
#aux = uc_dict[use_case]
if table not in uc_dict[use_case]:
uc_dict[use_case][table] = []
for uc in uc_dict:
for table in uc_dict[uc]:
layers2 = uc_dict[uc][table]
print(f"Running for use case {uc}, table {table}, with layers {str(layers2)}.")
if __name__ == '__main__':
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment