User_Demand Visualizations Bug Fixes

118ddb39 · Bogdan · 8155f69d · 118ddb39 · 118ddb39 · 118ddb39
Commit 118ddb39 authored Oct 29, 2020 by Bogdan
4 changed files
--- a/src/data-hub/role-stage-discovery-microservice/app/db/repository.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/repository.py
@@ -73,7 +73,7 @@ class Repository(MongoRepositoryBase):
        collection_name = self._layer_collection
        criteria = {"use_case" : use_case}
        res = super().delete_entry(collection_name,criteria,True)
-        print ("###REPOSITORY: res= "+ str(res))
+        print (" Deleting UseCase: "+ str(use_case))

    


--- a/src/data-hub/role-stage-discovery-microservice/app/visualization/vis_cluster_results.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/visualization/vis_cluster_results.py
@@ -46,9 +46,9 @@ def show_clustering_result(ax, min_pts, clusters: dict):
    colors = [distinct_colors[node['cluster_label']] for node in nodes]

    ax.set_title(f'Clustering Result with MinPts={min_pts}')
-    ax.set_xlabel('Total_Demand_MWh')
+    ax.set_xlabel('User_Demand_kWh')
    ax.set_ylabel('Customer')
-    ax.scatter( [n['Total_Demand_MWh'] for n in nodes], 
+    ax.scatter( [n['User_Demand_kWh'] for n in nodes], 
                [n['Customer'] for n in nodes], 
                c=colors)

@@ -72,14 +72,14 @@ def show_clusteringSingleFeature_result(ax, min_pts, clusters: dict):
    colors = [distinct_colors[node['cluster_label']] for node in nodes]

    ax.set_title(f'Clustering Result with MinPts={min_pts}')
-    ax.set_xlabel('Total_Demand_MWh')
+    ax.set_xlabel('User_Demand_kWh')
    ax.grid(True, axis='x')
    #ax.set_ylabel('Customer')
-    ax.scatter( [n['Total_Demand_MWh'] for n in nodes], 
+    ax.scatter( [n['User_Demand_kWh'] for n in nodes], 
                [0 for n in nodes],
                c=colors)

-def show_clusteringSingleFeatureDensity_result(ax, min_pts, clusters: dict):
+def show_clusteringSingleFeatureDensityBoxPlot_result(ax, min_pts, clusters: dict):
    labels = clusters.keys()
    # flatten values in dict

@@ -95,7 +95,7 @@ def show_clusteringSingleFeatureDensity_result(ax, min_pts, clusters: dict):
        if not checkKey(mydict,n['cluster_label']):
            mydict[n['cluster_label']] = []

-        mydict[n['cluster_label']].append(n['Total_Demand_MWh'])
+        mydict[n['cluster_label']].append(n['User_Demand_kWh'])

    

@@ -130,10 +130,10 @@ def show_clusteringSingleFeatureDensity_result(ax, min_pts, clusters: dict):
            plt.setp(bp['medians'][0], color=distinct_colors[cluster_label])
    
    ax.set_title(f'Clustering Result with MinPts={min_pts}')
-    ax.set_xlabel('Total_Demand_MWh')
+    ax.set_xlabel('User_Demand_kWh')
    #ax.set_ylabel('Cluster Label')
    ax.grid(True, axis='x')
-    # ax.scatter( [n['Total_Demand_MWh'] for n in nodes], 
+    # ax.scatter( [n['User_Demand_kWh'] for n in nodes], 
    #             [0 for n in nodes],
    #             c=colors)

@@ -143,14 +143,14 @@ def run_clustering(min_points, dataset):
    clusterer = Clusterer(min_points=min_points)
    return clusterer.cluster_dataset(
        dataset=dataset,
-        features=['Total_Demand_MWh','Customer']
+        features=['User_Demand_kWh','Customer']
    )

 def run_clustering_SingleFeature(min_points, dataset):
    clusterer = Clusterer(min_points=min_points)
    return clusterer.cluster_dataset(
        dataset=dataset,
-        features=['Total_Demand_MWh']
+        features=['User_Demand_kWh']
    )

    # res: Dict[Any, ClusterResult] = clusterer.cluster_dataset(
@@ -173,7 +173,7 @@ def createDataset(inputDict):
    for entry in inputSimListOfDict:
        newDict = {}
        try: 
-            newDict["Total_Demand_MWh"] = float(entry["Total_Demand_MWh"])
+            newDict["User_Demand_kWh"] = float(entry["User_Demand_kWh"])
            newDict["Customer"] = float(entry["Customer"])
            dataset.append(newDict)
        except:
@@ -204,10 +204,10 @@ if __name__ == '__main__':
    #dataset = [{'1':n[0], '2':n[1]} for n in nodes]

    JWT_TOKEN = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMC0xMC0yMCAxNDoyNDoxMi45MzI3OTAiLCJ2YWxpZF91bnRpbCI6IjIwMjAtMTAtMjEgMTQ6MjQ6MTIuOTMyNzkwIn0.qzaDauyEA4pAnw8K8ik6jTtbEOY24q159GDYbvByaJ4"
-    #r = requests.get('https://articonf1.itec.aau.at:30103/api/paper/paper/layers/Demand_Layer/nodes', timeout=15)
+
    r = requests.get(
        
-        url = 'https://articonf1.itec.aau.at:30103/api/paper/paper/layers/Demand_Layer/nodes',
+        url = 'https://articonf1.itec.aau.at:30103/api/paper/paper/layers/User_Demand_Layer/nodes',
        timeout=15,
        headers = {"Authorization": f"Bearer {JWT_TOKEN}"},
        verify = False # ignore ssl error)
@@ -243,66 +243,66 @@ if __name__ == '__main__':
    # dataset = createDataset(inputSimListOfDict)
    # print("Started TEST Clustering") #500 = 4 clusters
    # clusters = run_clustering_SingleFeature(250, dataset)
-    # show_clusteringSingleFeatureDensity_result(ax1,250,clusters)
+    # show_clusteringSingleFeatureDensityBoxPlot_result(ax1,250,clusters)
   
    # dataset = createDataset(inputSimListOfDict)
    # print("Started TEST Clustering") #500 = 4 clusters
    # clusters = run_clustering_SingleFeature(500, dataset)
-    # show_clusteringSingleFeatureDensity_result(ax2,500,clusters)
+    # show_clusteringSingleFeatureDensityBoxPlot_result(ax2,500,clusters)

    # dataset = createDataset(inputSimListOfDict)
    # print("Started TEST Clustering") #500 = 4 clusters
    # clusters = run_clustering_SingleFeature(750, dataset)
-    # show_clusteringSingleFeatureDensity_result(ax3,750,clusters)
+    # show_clusteringSingleFeatureDensityBoxPlot_result(ax3,750,clusters)

    # dataset = createDataset(inputSimListOfDict)
    # print("Started TEST Clustering") #500 = 4 clusters
    # clusters = run_clustering_SingleFeature(1000, dataset)
-    # show_clusteringSingleFeatureDensity_result(ax4,1000,clusters)
+    # show_clusteringSingleFeatureDensityBoxPlot_result(ax4,1000,clusters)

    

-    print("Started 1st Clustering")
-    dataset = createDataset(inputSimListOfDict)
-    clusters = run_clustering_SingleFeature(50, dataset)
-    show_clusteringSingleFeature_result(ax1, 50, clusters)
+    # print("Started 1st Clustering")
+    # dataset = createDataset(inputSimListOfDict)
+    # clusters = run_clustering_SingleFeature(15, dataset)
+    # show_clusteringSingleFeature_result(ax1, 15, clusters)

-    print("Started 2nd Clustering")
-    dataset = createDataset(inputSimListOfDict)
-    clusters = run_clustering_SingleFeature(100, dataset)
-    show_clusteringSingleFeature_result(ax2, 100, clusters)
+    # print("Started 2nd Clustering")
+    # dataset = createDataset(inputSimListOfDict)
+    # clusters = run_clustering_SingleFeature(25, dataset)
+    # show_clusteringSingleFeature_result(ax2, 25, clusters)

    
-    print("Started 3rd Clustering")
-    dataset = createDataset(inputSimListOfDict)
-    clusters = run_clustering_SingleFeature(150, dataset)
-    show_clusteringSingleFeature_result(ax3, 150, clusters)
+    # print("Started 3rd Clustering")
+    # dataset = createDataset(inputSimListOfDict)
+    # clusters = run_clustering_SingleFeature(50, dataset)
+    # show_clusteringSingleFeature_result(ax3, 50, clusters)

-    print("Started 4th Clustering")
-    dataset = createDataset(inputSimListOfDict)
-    clusters = run_clustering_SingleFeature(250, dataset)
-    show_clusteringSingleFeature_result(ax4, 250, clusters)
+    # print("Started 4th Clustering")
+    # dataset = createDataset(inputSimListOfDict)
+    # clusters = run_clustering_SingleFeature(100, dataset)
+    # show_clusteringSingleFeature_result(ax4, 100, clusters)

    
-    # dataset = createDataset(inputSimListOfDict)    
-    # print("Started 1st Clustering")
-    # clusters = run_clustering(10, dataset)
-    # show_clustering_result(ax1, 10, clusters)
+    dataset = createDataset(inputSimListOfDict)    
+    print("Started 1st Clustering")
+    clusters = run_clustering(10, dataset)
+    show_clustering_result(ax1, 10, clusters)
    
-    # dataset = createDataset(inputSimListOfDict)
-    # print("Started 2nd Clustering")
-    # clusters = run_clustering(15, dataset)
-    # show_clustering_result(ax2, 15, clusters)
+    dataset = createDataset(inputSimListOfDict)
+    print("Started 2nd Clustering")
+    clusters = run_clustering(15, dataset)
+    show_clustering_result(ax2, 15, clusters)
    
-    # dataset = createDataset(inputSimListOfDict)
-    # print("Started 3rd Clustering")
-    # clusters = run_clustering(25, dataset)
-    # show_clustering_result(ax3, 25, clusters)
+    dataset = createDataset(inputSimListOfDict)
+    print("Started 3rd Clustering")
+    clusters = run_clustering(25, dataset)
+    show_clustering_result(ax3, 25, clusters)

-    # dataset = createDataset(inputSimListOfDict)
-    # print("Started 4th Clustering")
-    # clusters = run_clustering(50, dataset)
-    # show_clustering_result(ax4, 50, clusters)
+    dataset = createDataset(inputSimListOfDict)
+    print("Started 4th Clustering")
+    clusters = run_clustering(50, dataset)
+    show_clustering_result(ax4, 50, clusters)

    #agePhysics = [ 25, 31, 31, 31, 12,28,29,31,33,34,35,36,34,39,40,41,48 ]
    # basic plot

--- a/src/data-hub/role-stage-discovery-microservice/app/visualization/visualizeRawData.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/visualization/visualizeRawData.py
@@ -15,7 +15,8 @@ def mainViz():
    distributionEnergy = dict()
    distributionHeating = dict()
    distributionPrice = dict()    
-    distributionDemand = dict()
+    distributionTotalDemand = dict()
+    distributionUserDemand = dict()

    for entry in inputDict:
        try:
@@ -57,10 +58,21 @@ def mainViz():
        
        try:
            tdVal=round(float(entry['Total_Demand_MWh']),2)
-            if(checkKey(distributionDemand,tdVal)):
-                distributionDemand[tdVal] += 1
+            if(checkKey(distributionTotalDemand,tdVal)):
+                distributionTotalDemand[tdVal] += 1
            else:
-                distributionDemand[tdVal] = 1
+                distributionTotalDemand[tdVal] = 1
+        except:
+            pass
+
+        try:
+            eVal=round(float(entry['Energy_Consumption_kWh']),2)
+            hVal=round(float(entry['Heating_Consumption_kWh']),2)
+            udVal = round((eVal+hVal),2)
+            if(checkKey(distributionUserDemand,udVal)):
+                distributionUserDemand[udVal] += 1
+            else:
+                distributionUserDemand[udVal] = 1
        except:
            pass

@@ -95,13 +107,13 @@ def mainViz():
    #plt.xticks(np.linspace(0,4.5,num=20))

    ## Heating
-    list1 = sorted(distributionHeating.items())
-    x2,y2 = zip(*list1)
-    plt.bar(x2,y2,color='red',label="Heating", width=0.01)
-    plt.legend()
-    plt.title('Heating Consumption')
-    plt.xlabel('kWh')
-    plt.show()
+    # list1 = sorted(distributionHeating.items())
+    # x2,y2 = zip(*list1)
+    # plt.bar(x2,y2,color='red',label="Heating", width=0.01)
+    # plt.legend()
+    # plt.title('Heating Consumption')
+    # plt.xlabel('kWh')
+    # plt.show()

    ## Price
    # #plt.xticks(np.linspace(0,100,num=10))
@@ -114,13 +126,24 @@ def mainViz():
    # plt.show()

    ### Demand
+    plt.xticks(np.linspace(0,100,num=10))
+    list1 = sorted(distributionTotalDemand.items())
+    x2,y2 = zip(*list1)
+    plt.bar(x2,y2,color='black',label="Total Demand", width=5)
+    plt.legend()
+    plt.title('Total Demand')
+    plt.xlabel('MWh')
+    plt.show()
+
+    ## User Demand
+
    #plt.xticks(np.linspace(0,100,num=10))
-    # list1 = sorted(distributionDemand.items())
+    # list1 = sorted(distributionUserDemand.items())
    # x2,y2 = zip(*list1)
-    # plt.bar(x2,y2,color='gray',label="Demand", width=5)
+    # plt.bar(x2,y2,color='orange',label="User Demand", width=0.01)
    # plt.legend()
-    # plt.title('Total Demand')
-    # plt.xlabel('MWh')
+    # plt.title('User Demand')
+    # plt.xlabel('kWh')
    # plt.show()



--- a/src/data-hub/semantic-linking-microservice/app/dummy_upload.py
+++ b/src/data-hub/semantic-linking-microservice/app/dummy_upload.py
@@ -38,7 +38,7 @@ if __name__ == '__main__':

    repo.delete_nodes_for_use_case("paper")
    print ("###DELETED NODES FROM SEMANTIC LINKING - PAPER USE CASE")
-
+    transactionList = []
    with open(CSV_FILE, 'r') as file:
        reader = csv.reader(file)

@@ -60,6 +60,11 @@ if __name__ == '__main__':
            transaction['docType'] = 'paper'
            for idx in range(len(row)):
                transaction[titles[idx]] = row[idx]
+                
+            # also include the user demand, as Total_Demand_WMh is not per user
+            energy_cons = float(transaction['Energy_Consumption_kWh']) if transaction['Energy_Consumption_kWh'] is not None and transaction['Energy_Consumption_kWh'] != "" else 0
+            heating_cons = float(transaction['Heating_Consumption_kWh']) if transaction['Heating_Consumption_kWh'] is not None and transaction['Heating_Consumption_kWh'] != "" else 0
+            transaction['User_Demand_kWh'] = heating_cons + energy_cons

            if transaction['Customer'] != old_c:  
                customerCount +=1    
@@ -84,9 +89,16 @@ if __name__ == '__main__':
            
            if(upload_condition):
                upload_transaction(transaction)
-                print(f"uploading for {old_c}")
+                transactionList.append(transaction)
+                #print(f"uploading for {old_c}")
            rowCount+=1
-            
-            


+    outputJSON = json.dumps(transactionList, default=lambda o: o.__dict__, indent=4)
+
+    try: 
+        with open('EnergyDataSubset.json', 'w') as outfile:
+            outfile.write(outputJSON)
+    except ValueError:
+        print("Error occured when writing the resultSimilarityDict file")
+