Commit f12f43ff authored by Bogdan's avatar Bogdan

Fixed Similarity, Work on the dummy data upload, Delete func add in repo

parent 9c4399c4
......@@ -11,10 +11,11 @@ class ClusterC:
:param cluster_connClustDict: Layer name, cluster label and weight for each 'dest' cluster
'''
def __init__(self,cluster_label,cluster_layer,cluster_runId,cluster_containedNodesDict,cluster_connNodesDict,cluster_connClustDict):
def __init__(self,cluster_label,cluster_layer,use_case,cluster_runId,cluster_containedNodesDict,cluster_connNodesDict,cluster_connClustDict):
self.cluster_label = cluster_label
self.cluster_layer = cluster_layer
self.use_case = use_case
self.cluster_runId = cluster_runId
self.cluster_containedNodesDict = cluster_containedNodesDict #Keys are frozensets(touples) == frozenset(uniqueID, cluster and layer) Value is the Node UniqueId
self.cluster_connNodesDict = cluster_connNodesDict #Keys are frozensets(touples) (uniqueID:str,node_cluster:str,node_layer:str) Values are NodeC Objects
......
......@@ -9,7 +9,8 @@ class NodeC:
'''
def __init__(self, node_cluster, node_layer, uniqueID):
def __init__(self, node_cluster, node_layer, uniqueID,use_case):
self.node_cluster = node_cluster # str
self.node_layer = node_layer # str
self.uniqueID = uniqueID # str
\ No newline at end of file
self.uniqueID = uniqueID # str
self.use_case = use_case # str
\ No newline at end of file
......@@ -28,6 +28,15 @@ class Repository(MongoRepositoryBase):
self._connected_run = 'connected_run'
#TODO find a better name
def delete_FIND_A_BETTER_NAME(self, use_case: str):
colList = [self._layer_collection,self._clusters_collection,self._connected_clusters_collection,self._layer_nodes_collection,self._similarity_collection]
criteria = {"use_case": use_case}
for col in colList:
res = super().delete_entry(col,criteria,True)
print ("###MASS DELETE "+ col + " "+ str(res))
#region Layers
def add_layer(self, layer: Layer):
super().insert_entry(self._layer_collection, layer.to_serializable_dict())
......@@ -65,6 +74,9 @@ class Repository(MongoRepositoryBase):
criteria = {"use_case" : use_case}
res = super().delete_entry(collection_name,criteria,True)
print ("###REPOSITORY: res= "+ str(res))
#endregion Layers
......@@ -96,6 +108,11 @@ class Repository(MongoRepositoryBase):
def add_clusters(self, clusters: List[Cluster]):
cluster_dicts = [c.to_serializable_dict(for_db=True) for c in clusters]
super().insert_many(self._clusters_collection, cluster_dicts)
def get_clusters_for_layer_of_use_case(self, use_case: str,layer_name: str):
entries = super().get_entries(self._clusters_collection, selection={'use_case': use_case, 'layer_name': layer_name}, projection={'_id': 0})
return [Cluster(cluster_dict=e, from_db=True) for e in entries]
def get_clusters_for_layer(self, use_case: str, use_case_table: str, layer_name: str) -> List[Cluster]:
entries = super().get_entries(self._clusters_collection, selection={'use_case': use_case, 'use_case_table': use_case_table, 'layer_name': layer_name}, projection={'_id': 0})
......@@ -184,7 +201,8 @@ class Repository(MongoRepositoryBase):
output.append(e)
return output
"""
def delete_all_similarity_data(self):
super().drop_collection(self._similarity_collection)
#endregion
......
......@@ -67,7 +67,7 @@ def loadJson(url:str) :
return jsonData
def getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes):
def getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes,use_case:str):
''' Calculates the nr of connections/weights between the clusters contained in the "inputLayerDict". Connections are made between clusters from DIFFERENT layers.
:param List[string] layerNameList: Name of the layers to pull from the DB
......@@ -93,7 +93,7 @@ def getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes):
#imports and translates the data from JSON into usefull format
#returns layerdiction -> Layer -> clusterDict -> Cluster -> nodesDict -> Nodes
for name in layerNameList:
newData = get_mongoDB_cluster_by_layerName(name)#repo.get_clusters_for_layer(name)
newData = repo.get_clusters_for_layer_of_use_case(use_case,name)#repo.get_clusters_for_layer(name)
if newData is not None and len(newData) != 0:
layerDict = populateWithNewNodesSingleLayer(newData[0:limitNrCluster],layerDict,limitNrNodes)
......@@ -117,28 +117,29 @@ def populateWithNewNodesSingleLayer(inputData, layerDict, limitNrNodes) -> Dict[
print("Entered populateWithNewNodes")
if(layerDict == None):
layerDict = dict()
print(" Layer: "+inputData[0].get("layer_name"))
print(" Layer: "+inputData[0].layer_name)
curLayerName = None
#newClusterDict
#clusterDict = layerDict.get(curCluster.get("layer_name"),dict())
for curCluster in inputData:
if(curCluster.get("layer_name")!= curLayerName):
clusterDict = layerDict.get(curCluster.get("layer_name"),dict())
curLayerName = curCluster.get("layer_name")
if(curCluster.layer_name!= curLayerName):
clusterDict = layerDict.get(curCluster.layer_name,dict())
curLayerName = curCluster.layer_name
oldCluster = clusterDict.get(curCluster.get("cluster_label"),None)
oldCluster = clusterDict.get(curCluster.cluster_label,None)
if oldCluster is None: #means this is a new cluster
cluster_containedNodesDict = dict()
else: #means this is an already existing cluster
cluster_containedNodesDict = oldCluster.cluster_containedNodesDict
for curNode in curCluster.get("nodes"):
for curNode in curCluster.nodes:
#totalNodesCount+=1
newNode = NodeC(
curCluster.get("cluster_label"),
curCluster.cluster_label,
curLayerName,
curNode.get("UniqueID"))
curNode.get("UniqueID"),
curCluster.use_case
)
if(newNode != None):
if(newNode.uniqueID!= None and newNode.node_cluster!= None and newNode.node_layer!= None):
if( limitNrNodes>0):
......@@ -151,10 +152,11 @@ def populateWithNewNodesSingleLayer(inputData, layerDict, limitNrNodes) -> Dict[
#finished node
if oldCluster is None:
# def __init__(self,cluster_label,cluster_layer,cluster_containedNodesDict,cluster_connNodesDict, cluster_connectionsNr,cluster_connClustDict):
# def __init__(self,cluster_label,cluster_layer,use_case,cluster_containedNodesDict,cluster_connNodesDict, cluster_connectionsNr,cluster_connClustDict):
newClusterC = ClusterC(
curCluster.get("cluster_label"),
curCluster.cluster_label,
curLayerName,
curCluster.use_case,
None,
cluster_containedNodesDict,
dict(),
......@@ -163,7 +165,7 @@ def populateWithNewNodesSingleLayer(inputData, layerDict, limitNrNodes) -> Dict[
clusterDict[newClusterC.cluster_label] = newClusterC
else:
oldCluster.cluster_containedNodesDict = cluster_containedNodesDict
clusterDict[curCluster.get("cluster_label")] = oldCluster
clusterDict[curCluster.cluster_label] = oldCluster
#finished cluster
newLayer = LayerC(curLayerName,clusterDict)
layerDict[curLayerName]= newLayer
......
......@@ -28,7 +28,7 @@ def outputFileLayerFunction(layerDict,limitNrNodes,limitNrCluster,runId):
def outputFileSimilFunction(similarityDict,limitNrNodes,limitNrCluster,runId):
def outputFileSimilFunction(similarityDict,limitNrNodes,limitNrCluster,runId,use_case):
''' Writes the similarityDict data to a JSON file.
......@@ -40,7 +40,7 @@ def outputFileSimilFunction(similarityDict,limitNrNodes,limitNrCluster,runId):
'''
similJSON = convertSimilarityDictToJSON(similarityDict,runId)
similJSON = convertSimilarityDictToJSON(similarityDict,runId,use_case)
outputJSON = json.dumps(similJSON, default=lambda o: o.__dict__, indent=4)
try:
......@@ -91,7 +91,7 @@ def outputMongoConnClustDict(inputDict,runId):
add_conn_clusters(inputDict,runId)
def outputMongoSimilarity(inputDict,runId):
def outputMongoSimilarity(inputDict,runId,use_case):
''' Stores cluster_similarity in the database.
:param Dict() inputDict: Contains the data to insert
......@@ -99,7 +99,7 @@ def outputMongoSimilarity(inputDict,runId):
:param string runId: Id of the Run
'''
add_similarity(inputDict,runId)
add_similarity(inputDict,runId,use_case)
def add_connected_run():
......@@ -129,7 +129,7 @@ def add_conn_clusters(inputDict,runId):
for element in outputJSON:
repo.add_connected_cluster(element)
def add_similarity(inputDict,runId):
def add_similarity(inputDict,runId,use_case):
''' Stores cluster_similarity in the database.
:param Dict() inputDict: Contains the data to insert
......@@ -138,6 +138,6 @@ def add_similarity(inputDict,runId):
'''
outputJSON = convertSimilarityDictToJSON(inputDict,runId)
outputJSON = convertSimilarityDictToJSON(inputDict,runId,use_case)
for element in outputJSON:
repo.add_single_similarity(element)
\ No newline at end of file
......@@ -109,7 +109,7 @@ def getFrozensetFromConnNodesDict(inputDict):
return output
def convertSimilarityDictToJSON(inputDict,runId):
def convertSimilarityDictToJSON(inputDict,runId,use_case):
''' Converts a Similarity Dictionary to JSON format. For outputting to DB
:param Dict{} similarityDict: Object which contains Data about the Computed similarities between Clusters
......@@ -125,6 +125,8 @@ def convertSimilarityDictToJSON(inputDict,runId):
auxDict["cluster_layer"] = tupleKey[2]
auxDict["similarityValues"] = inputDict[tupleKey]
auxDict["runId"] = runId
auxDict["use_case"] = use_case
similList.append(auxDict)
similToJSON = similList
#outputJSON = json.dumps(similToJSON, default=lambda o: o.__dict__, indent=4)
......
......@@ -39,7 +39,7 @@ from processing.similarityFiles.dataOutput import *
outputToFileFLAG = True
def main(layerNameList:List[str] = ["Price_Layer","FinishedTime_Layer","Destination_Layer"]):
def main(use_case:str,layerNameList:List[str] ):
'''
Executes the similarity calculation by calculating weights between clusters in different layers.
Then calculating the Euclidean distance between nodes in the same layer based on one other layer each.
......@@ -67,7 +67,7 @@ def main(layerNameList:List[str] = ["Price_Layer","FinishedTime_Layer","Destinat
limitNrNodes = -1 #per Layer
layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes)
layerDict = getClusterDataFromMongo(layerNameList,limitNrCluster,limitNrNodes,use_case)
if layerDict is None or len(layerDict) == 0:
LOGGER.error(f"No data for any of the following layers existed: {str(layerNameList)}. Similarity calculation was not performed.")
return
......@@ -99,12 +99,12 @@ def main(layerNameList:List[str] = ["Price_Layer","FinishedTime_Layer","Destinat
if (outputToFileFLAG == True):
print("Outputing data")
outputFileLayerFunction(layerDict,totalNodes,totalClusters,runId)
outputFileSimilFunction(similarityDict,totalNodes,totalClusters,runId)
outputFileSimilFunction(similarityDict,totalNodes,totalClusters,runId,use_case)
outputFileTimeFunction(timelist,totalNodes,totalClusters,runId)
#Output to DB
outputMongoConnClustDict(layerDict,runId)
outputMongoSimilarity(similarityDict,runId)
outputMongoSimilarity(similarityDict,runId,use_case)
#Currently not used in the calculation of connections/similarity, developed for possible future uses
......
StartTime: 2020-10-15 17:44:59.074263
FinishTime: 2020-10-15 17:45:02.969112
PopulateWithNewNodes: 3.82889
CalculateWeights: 0.054965
CalculateSimilarity: 0.010994
TotalTime: 3.894849
RunId: 5f886e7e35b70a1704c728e6
\ No newline at end of file
......@@ -33,7 +33,7 @@ def run_similarity_calc_for_Paper_case():
for key in uc_layers:
layers = uc_layers[key]
print(f"Running for use case {key} with layers {str(layers)}.")
SimilarityCalc.main(layerNameList=layers)
SimilarityCalc.main(uc,layerNameList=layers)
if __name__ == '__main__':
run_similarity_calc_per_use_case()
\ No newline at end of file
......@@ -21,35 +21,25 @@ import processing.fetching.fetching as fetchy
import processing.similarityFiles.dataInput as inputData
##################################
#TODO: get certificates/authorization
# SSL configuration
# certificate_path = get_resources_path()
# context = (os.path.normpath(f'{certificate_path}/articonf1.crt'), os.path.normpath(f'{certificate_path}/articonf1.key')) # certificate and key files
# if is_running_locally():
# print("Running locally...")
# app.add_api(swagger_util.get_bundled_specs(Path("configs/swagger_local.yml")),
# resolver = connexion.RestyResolver("cms_rest_api"))
# else:
# app.add_api(swagger_util.get_bundled_specs(Path("configs/swagger.yml")),
# resolver = connexion.RestyResolver("cms_rest_api"))
##################################
from db.repository import *
repo = Repository()
def deleteWHOOLEData():
print("test")
repo.delete_all_similarity_data()
def mainViz():
#procesing data
#fetchy.fetch_nodes_from_semantic_linking()
#clustering.run_clustering_for_Paper_case()
similCalc.run_similarity_calc_for_Paper_case()
fetchy.fetch_nodes_from_semantic_linking()
clustering.run_clustering_for_Paper_case()
similCalc.run_similarity_calc_for_Paper_case() #TODO FIX
inputData.getClusterDataFromMongo("Paper",None,None)
#inputData.getClusterDataFromMongo("Paper",None,None)
#inputData.getSimilarityDataFromMongo(cluster_layer: str= None, batchSize: int=1000, run_id: str=None)
#similarityArrFromMongo = getSimilarityDataFromMongo("Paper")
mainViz()
\ No newline at end of file
deleteWHOOLEData()
#mainViz()
\ No newline at end of file
......@@ -104,6 +104,7 @@ class Repository(MongoRepositoryBase):
'''Deletes All node of the given use_case'''
collection = self._database[self._layer_nodes_collection]
collection.delete_many({"use_case": use_case})
def delete_nodes_for_table(self, use_case, table_name):
'''Deletes nodes from the given table of the use-case'''
......@@ -111,4 +112,11 @@ class Repository(MongoRepositoryBase):
collection.delete_many({"use_case": use_case, "table": table_name})
# def delete_use_case_layers(self, use_case: str):
# collection_name = self._layer_collection
# criteria = {"use_case" : use_case}
# res = super().delete_entry(collection_name,criteria,True)
# print ("###REPOSITORY: res= "+ str(res))
# endregion
import csv
import hashlib
import datetime
import sys
import os
......@@ -13,6 +14,9 @@ from messaging.MessageHandler import MessageHandler
CSV_FILE = r'Energy_Dataset.csv'
handler = MessageHandler()
from db.repository import *
repo = Repository()
def upload_transaction(transaction):
'''{"type": "new-trace",
......@@ -31,14 +35,19 @@ def upload_transaction(transaction):
if __name__ == '__main__':
repo.delete_nodes_for_use_case("paper")
print ("###DELETED NODES FROM SEMANTIC LINKING - PAPER USE CASE")
with open(CSV_FILE, 'r') as file:
reader = csv.reader(file)
titles = next(reader)
#################
maxCustomers = 5
maxRows = 10
maxCustomers = 10
maxRowsPerMonth = 120 # 24Hr * 5Days
month_memory=[0,0,0,0,0,0,0,0,0,0,0,0]
##################
old_c = None
......@@ -55,13 +64,25 @@ if __name__ == '__main__':
if transaction['Customer'] != old_c:
customerCount +=1
rowCount = 0
for i in range(12):
month_memory[i]=maxRowsPerMonth
old_c = transaction['Customer']
if (customerCount>maxCustomers):
print("\nMAX customers reached")
break
upload_condition = False
if(rowCount<maxRows):
datetime_obj= datetime.datetime.strptime(transaction['Timestamp'],'%Y-%m-%d %H:%M:%S')
if(month_memory[datetime_obj.month-1]>0): #MAX XX times per month considered per customer
if(datetime_obj.minute==0): #upload only hourly
month_memory[datetime_obj.month-1]-=1
upload_condition = True
#datetime_obj.month
if(upload_condition):
upload_transaction(transaction)
print(f"uploading for {old_c}")
rowCount+=1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment