Updated Visualisations Methods

931efc95 · Bogdan · 41cd4ebb · 931efc95 · 931efc95 · 931efc95
Commit 931efc95 authored Oct 22, 2020 by Bogdan
7 changed files
--- a/src/data-hub/community-detection-microservice/app/visualization/vis_cluster_results.py
+++ b/src/data-hub/community-detection-microservice/app/visualization/vis_cluster_results.py
+# clustering of generated nodes
+import sys
+import os
+import requests
+import json
+modules_path = './'
+if os.path.exists(modules_path):
+    sys.path.insert(1, modules_path)
+
+import matplotlib.pyplot as plt
+import sklearn.datasets
+import numpy as np
+from processing.clustering.clusterer import Clusterer
+
+# parameters for data generation
+N_SAMPLES = 1000
+N_FEATURES = 2
+N_CENTERS = 3
+STD_DEVIATION = 1.0
+
+def show_generated_data(ax, nodes, labels):
+    distinct_colors = plt.cm.rainbow(np.linspace(0, 1, N_CENTERS))
+    colors = [distinct_colors[label] for label in labels]
+
+    ax.set_title('Generated Dataset')
+    ax.set_xlabel('Feature 1')
+    ax.set_ylabel('Feature 2')
+    ax.scatter(nodes[:,0], nodes[:,1], c=colors)
+
+def show_clustering_result(ax, min_pts, clusters: dict):
+    labels = clusters.keys()
+    # flatten values in dict
+    nodes = [node for subset in clusters.values() for node in subset]
+    
+    if -1 in labels:
+        # clustering contains noise, add them in black
+        distinct_colors = plt.cm.rainbow(np.linspace(0, 1, len(set(labels))-1))
+        distinct_colors = np.append(distinct_colors, [[0,0,0,1]], axis=0)
+    else:
+        distinct_colors = plt.cm.rainbow(np.linspace(0, 1, len(set(labels))))
+    colors = [distinct_colors[node['cluster_label']] for node in nodes]
+
+    ax.set_title(f'Clustering Result with MinPts={min_pts}')
+    ax.set_xlabel('Total_Demand_MWh')
+    ax.set_ylabel('Customer')
+    ax.scatter( [n['Total_Demand_MWh'] for n in nodes], 
+                [n['Customer'] for n in nodes], 
+                c=colors)
+
+def show_clusteringSingleFeature_result(ax, min_pts, clusters: dict):
+    labels = clusters.keys()
+    # flatten values in dict
+    nodes = [node for subset in clusters.values() for node in subset]
+    
+    if -1 in labels:
+        # clustering contains noise, add them in black
+        distinct_colors = plt.cm.rainbow(np.linspace(0, 1, len(set(labels))-1))
+        distinct_colors = np.append(distinct_colors, [[0,0,0,1]], axis=0)
+    else:
+        distinct_colors = plt.cm.rainbow(np.linspace(0, 1, len(set(labels))))
+    colors = [distinct_colors[node['cluster_label']] for node in nodes]
+
+    ax.set_title(f'Clustering Result with MinPts={min_pts}')
+    ax.set_xlabel('Total_Demand_MWh')
+    ax.set_ylabel('Customer')
+    ax.scatter( [n['Total_Demand_MWh'] for n in nodes], 
+                [1 for n in nodes],
+                c=colors)
+
+
+def run_clustering(min_points, dataset):
+    clusterer = Clusterer(min_points=min_points)
+    return clusterer.cluster_dataset(
+        dataset=dataset,
+        features=['Total_Demand_MWh','Customer']
+    )
+
+def run_clustering_SingleFeature(min_points, dataset):
+    clusterer = Clusterer(min_points=min_points)
+    return clusterer.cluster_dataset(
+        dataset=dataset,
+        features=['Total_Demand_MWh']
+    )
+
+    # res: Dict[Any, ClusterResult] = clusterer.cluster_dataset(
+    #     nodes,
+    #     layer.properties
+    # )
+
+
+if __name__ == '__main__':
+    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
+    fig.tight_layout(pad=3.0)
+
+    nodes, labels = sklearn.datasets.make_blobs(n_samples=N_SAMPLES, n_features=N_FEATURES, centers=N_CENTERS, cluster_std=STD_DEVIATION)
+    # nodes = np.multiply(nodes, .1)
+    #get nodes from swagger
+    #r.request (link)
+    #nodes = blahb lbah
+
+    #TODO get a list of values (total demand)
+    #nodes =
+
+    #USELESS NOW
+    #show_generated_data(ax1, nodes, labels) 
+
+    #dataset = [{'1':n[0], '2':n[1]} for n in nodes]
+
+    JWT_TOKEN = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMC0xMC0yMCAxNDoyNDoxMi45MzI3OTAiLCJ2YWxpZF91bnRpbCI6IjIwMjAtMTAtMjEgMTQ6MjQ6MTIuOTMyNzkwIn0.qzaDauyEA4pAnw8K8ik6jTtbEOY24q159GDYbvByaJ4"
+    #r = requests.get('https://articonf1.itec.aau.at:30103/api/paper/paper/layers/Demand_Layer/nodes', timeout=15)
+    r = requests.get(
+        
+        url = 'https://articonf1.itec.aau.at:30103/api/paper/paper/layers/Demand_Layer/nodes',
+        timeout=15,
+        headers = {"Authorization": f"Bearer {JWT_TOKEN}"},
+        verify = False # ignore ssl error)
+    )
+    #TODO NEED PERMISSION HOW DO I GET PERMISSION
+    print("Downloaded JSON")
+    inputSimListOfDict = json.loads(r.content)
+
+    # {
+    # "Customer": "13",
+    # "Postcode": "2261",
+    # "Timestamp": "2012-07-02 09:00:00",
+    # "Total_Demand_MWh": "10513.24",
+    # "UniqueID": "f5a4eb614bf3d794211970c65365aeeec7afe6750b7623e3de4d174f9ef0d6e1",
+    # "layer_name": "Demand_Layer",
+    # "use_case": "paper",
+    # "use_case_table": "paper"
+    # }
+    
+    dataset = []
+    skippedCounter=0
+    for entry in inputSimListOfDict:
+        newDict = {}
+        try: 
+            newDict["Total_Demand_MWh"] = float(entry["Total_Demand_MWh"])
+            newDict["Customer"] = float(entry["Customer"])
+            dataset.append(newDict)
+        except:
+            skippedCounter+=1
+            #print ("Warning: Skipped Bad formated Node")
+        
+    print("Warning: Skipped "+ str(skippedCounter) + " Badly formated nodes")
+
+    print("Started 1st Clustering")
+    clusters = run_clustering_SingleFeature(25, dataset)
+    show_clusteringSingleFeature_result(ax1, 25, clusters)
+
+    print("Started 2nd Clustering")
+    clusters = run_clustering_SingleFeature(50, dataset)
+    show_clusteringSingleFeature_result(ax2, 50, clusters)
+
+    print("Started 3rd Clustering")
+    clusters = run_clustering_SingleFeature(100, dataset)
+    show_clusteringSingleFeature_result(ax3, 100, clusters)
+
+    print("Started 4th Clustering")
+    clusters = run_clustering_SingleFeature(300, dataset)
+    show_clusteringSingleFeature_result(ax4, 300, clusters)
+        
+    # print("Started 1st Clustering")
+    # clusters = run_clustering(5, dataset)
+    # show_clustering_result(ax1, 5, clusters)
+    
+    # print("Started 2nd Clustering")
+    # clusters = run_clustering(10, dataset)
+    # show_clustering_result(ax2, 10, clusters)
+    
+    # print("Started 3rd Clustering")
+    # clusters = run_clustering(15, dataset)
+    # show_clustering_result(ax3, 15, clusters)
+    
+    # print("Started 4th Clustering")
+    # clusters = run_clustering(25, dataset)
+    # show_clustering_result(ax4, 25, clusters)
+
+    
+
+    plt.show()
+    print("#FINISH")
\ No newline at end of file
--- a/src/data-hub/community-detection-microservice/app/visualization/vis_execution_time.py
+++ b/src/data-hub/community-detection-microservice/app/visualization/vis_execution_time.py
+import matplotlib.pyplot as plt
+
+# clustering
+times = [[1000,0.9823,1.0420,0.9656],
+[5000,7.8716,8.8916,8.2609],
+[10000,24.7394,29.0521,24.3734],
+[20000,86.0519,104.0453,85.4891],
+[50000,489.4964,574.7641,468.8706]]
+
+# slicing
+times2 = [[1000, 0.010159840000000031,   0.008385740000001363,   0.008584839999997484],
+[5000,  0.044350359999999256,   0.04146890000000099,    0.04291390000000206],
+[10000, 0.07776566000000074,    0.07954154000000102,    0.07955803999999489],
+[20000, 0.15964476000000047,    0.16679267999999894,    0.15759418000000097],
+[50000, 0.4081138799999998,     0.4278634399999987,     0.41363941999999554]]
+
+
+n = [t[0] for t in times]
+finished = [t[1] for t in times]
+dest = [t[2] for t in times]
+price = [t[3] for t in times]
+    # print(f"{t[0]}: {t[1]} {t[2]} {t[3]}")
+
+
+fig, ax = plt.subplots()
+ax.set_title('Execution Time for Clustering')
+ax.set_xlabel('Number of Nodes')
+ax.set_ylabel('Time in Seconds')
+ax.plot(n, dest, label='Destination')
+ax.plot(n, finished, label='Finished Time')
+ax.plot(n, price, label='Price')
+ax.legend()
+
+plt.show()
\ No newline at end of file
--- a/src/data-hub/community-detection-microservice/app/visualization/visualize_time_slices.py
+++ b/src/data-hub/community-detection-microservice/app/visualization/visualize_time_slices.py
+import sys
+import os
+for path in ['../', './', '../../../modules/']:
+    if os.path.exists(path):
+        sys.path.insert(1, path)
+
+import matplotlib.pyplot as plt
+from db.repository import Repository
+from db.entities import TimeSlice
+from typing import List
+
+
+def plt_show_circles(time_slices: List[TimeSlice], cluster_no):
+    cluster_no = str(cluster_no)
+
+    for slice_ in time_slices:
+
+        nodes = slice_.get_nodes_for_cluster(cluster_no)
+            
+        # print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
+
+        plt.title(str(slice_.time))
+
+        plt.scatter([n['Longitude_Destination'] if 'Longitude_Destination' in n else 0
+                        for n in nodes],
+                    [n['Latitude_Destination'] if 'Latitude_Destination' in n else 0
+                        for n in nodes],
+                    s=[len(nodes)*100]*len(nodes))
+
+        plt.pause(0.5)
+
+
+def plt_show_bars(time_slices: List[TimeSlice], cluster_no):
+    cluster_no = str(cluster_no)
+    
+    labels = [ts.time for ts in time_slices]  
+    x_axis_label_stepsize = 10  
+
+    nodes_per_slice_for_single_cluster = \
+            [len(time_slice.get_nodes_for_cluster(cluster_no))
+            for time_slice
+            in time_slices]
+
+    fig, ax = plt.subplots()
+    ax.bar(x=range(len(labels)),
+        height=nodes_per_slice_for_single_cluster)
+
+    ax.set_ylabel('Size')
+    ax.set_title(f'Cluster-{cluster_no} size over time')
+    ax.set_xticks(range(len(labels))[::x_axis_label_stepsize])
+    ax.set_xticklabels(labels[::x_axis_label_stepsize])
+
+    plt.show()
+
+
+if __name__ == "__main__":
+    repo = Repository()
+    time_slices = repo.get_time_slices_by_name("Destination_Layer")
+
+    # chronological order
+    time_slices.sort(key=lambda ts: eval(ts.time))
+
+    print(len(time_slices))
+    plt_show_bars(time_slices, cluster_no = 0)
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/visualisationPaper.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/visualisationPaper.py
@@ -114,37 +114,43 @@ def mainViz():


    #TRY TO PLOT
-    fig, axs = plt.subplots(1,5, sharex = True)
-    fig.suptitle('Choose A title??? ')
-    fig.text(0.5, 0.04, 'Euclidean Distance', ha='center', va='center')
+    #fig, axs = plt.subplots(, sharex = True)
+    plt.xlabel('Eucledian Distance')
+    plt.ylabel('Nr. of Cluster combinations')
+    #fig.suptitle('')
+    #fig.text(0.5, 0.04, 'Euclidean Distance', ha='center', va='center')

    list1 = sorted(distributionSolar.items())
    x2,y2 = zip(*list1)
-    axs[0].bar(x2,y2,color='purple',label="Solar", width=0.2)
-    axs[0].legend()
-    axs[0].set_title('Solar')
-    axs[0].set(ylabel='Nr. of Similarity connections between two Clusters')
-
-    list1 = sorted(distributionEnergy.items())
-    x,y = zip(*list1)
-    axs[1].bar(x, y, color='blue',label="Energy", width=0.2)
-    axs[1].legend()
-
-    list1 = sorted(distributionHeating.items())
-    x3,y3 = zip(*list1)
-    axs[2].bar(x3,y3,color='red',label="Heating", width=0.2)
-    axs[2].legend()
+    plt.bar(x2,y2,color='purple',label="Solar", width=0.2)
+    plt.legend()
+    plt.set_title('Solar')
    

-    list1 = sorted(distributionPrice.items())
-    x4,y4 = zip(*list1)
-    axs[3].bar(x4,y4,color='green',label="Price", width=0.2)
-    axs[3].legend()
+    # list1 = sorted(distributionEnergy.items())
+    # x,y = zip(*list1)
+    # plt.bar(x, y, color='blue',label="Energy", width=0.2)
+    # plt.legend()
+    # plt.set_title('Energy')
+
+    # list1 = sorted(distributionHeating.items())
+    # x3,y3 = zip(*list1)
+    # plt.bar(x3,y3,color='red',label="Heating", width=0.2)
+    # plt.legend()
+    # plt.set_title('Heating')
+    

-    list1 = sorted(distributionPosition.items())
-    x5,y5 = zip(*list1)
-    axs[4].bar(x5,y5,color='grey',label="Location", width=0.2)
-    axs[4].legend()
+    # list1 = sorted(distributionPrice.items())
+    # x4,y4 = zip(*list1)
+    # plt.bar(x4,y4,color='green',label="Price", width=0.2)
+    # plt.legend()
+    # plt.set_title('Price')
+
+    # list1 = sorted(distributionPosition.items())
+    # x5,y5 = zip(*list1)
+    # plt.bar(x5,y5,color='grey',label="Location", width=0.2)
+    # plt.legend()
+    # plt.set_title('Position')


    

--- a/src/data-hub/role-stage-discovery-microservice/app/visualization/vis_cluster_results.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/visualization/vis_cluster_results.py
--- a/src/data-hub/role-stage-discovery-microservice/app/visualization/vis_execution_time.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/visualization/vis_execution_time.py
+import matplotlib.pyplot as plt
+
+# clustering
+times = [[1000,0.9823,1.0420,0.9656],
+[5000,7.8716,8.8916,8.2609],
+[10000,24.7394,29.0521,24.3734],
+[20000,86.0519,104.0453,85.4891],
+[50000,489.4964,574.7641,468.8706]]
+
+# slicing
+times2 = [[1000, 0.010159840000000031,   0.008385740000001363,   0.008584839999997484],
+[5000,  0.044350359999999256,   0.04146890000000099,    0.04291390000000206],
+[10000, 0.07776566000000074,    0.07954154000000102,    0.07955803999999489],
+[20000, 0.15964476000000047,    0.16679267999999894,    0.15759418000000097],
+[50000, 0.4081138799999998,     0.4278634399999987,     0.41363941999999554]]
+
+
+n = [t[0] for t in times]
+finished = [t[1] for t in times]
+dest = [t[2] for t in times]
+price = [t[3] for t in times]
+    # print(f"{t[0]}: {t[1]} {t[2]} {t[3]}")
+
+
+fig, ax = plt.subplots()
+ax.set_title('Execution Time for Clustering')
+ax.set_xlabel('Number of Nodes')
+ax.set_ylabel('Time in Seconds')
+ax.plot(n, dest, label='Destination')
+ax.plot(n, finished, label='Finished Time')
+ax.plot(n, price, label='Price')
+ax.legend()
+
+plt.show()
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/visualization/visualize_time_slices.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/visualization/visualize_time_slices.py
+import sys
+import os
+for path in ['../', './', '../../../modules/']:
+    if os.path.exists(path):
+        sys.path.insert(1, path)
+
+import matplotlib.pyplot as plt
+from db.repository import Repository
+from db.entities import TimeSlice
+from typing import List
+
+
+def plt_show_circles(time_slices: List[TimeSlice], cluster_no):
+    cluster_no = str(cluster_no)
+
+    for slice_ in time_slices:
+
+        nodes = slice_.get_nodes_for_cluster(cluster_no)
+            
+        # print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
+
+        plt.title(str(slice_.time))
+
+        plt.scatter([n['Longitude_Destination'] if 'Longitude_Destination' in n else 0
+                        for n in nodes],
+                    [n['Latitude_Destination'] if 'Latitude_Destination' in n else 0
+                        for n in nodes],
+                    s=[len(nodes)*100]*len(nodes))
+
+        plt.pause(0.5)
+
+
+def plt_show_bars(time_slices: List[TimeSlice], cluster_no):
+    cluster_no = str(cluster_no)
+    
+    labels = [ts.time for ts in time_slices]  
+    x_axis_label_stepsize = 10  
+
+    nodes_per_slice_for_single_cluster = \
+            [len(time_slice.get_nodes_for_cluster(cluster_no))
+            for time_slice
+            in time_slices]
+
+    fig, ax = plt.subplots()
+    ax.bar(x=range(len(labels)),
+        height=nodes_per_slice_for_single_cluster)
+
+    ax.set_ylabel('Size')
+    ax.set_title(f'Cluster-{cluster_no} size over time')
+    ax.set_xticks(range(len(labels))[::x_axis_label_stepsize])
+    ax.set_xticklabels(labels[::x_axis_label_stepsize])
+
+    plt.show()
+
+
+if __name__ == "__main__":
+    repo = Repository()
+    time_slices = repo.get_time_slices_by_name("Destination_Layer")
+
+    # chronological order
+    time_slices.sort(key=lambda ts: eval(ts.time))
+
+    print(len(time_slices))
+    plt_show_bars(time_slices, cluster_no = 0)
\ No newline at end of file