Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
931efc95
Commit
931efc95
authored
Oct 22, 2020
by
Bogdan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Updated Visualisations Methods
parent
41cd4ebb
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
723 additions
and
25 deletions
+723
-25
vis_cluster_results.py
...ion-microservice/app/visualization/vis_cluster_results.py
+182
-0
vis_execution_time.py
...tion-microservice/app/visualization/vis_execution_time.py
+34
-0
visualize_time_slices.py
...n-microservice/app/visualization/visualize_time_slices.py
+64
-0
visualisationPaper.py
...le-stage-discovery-microservice/app/visualisationPaper.py
+31
-25
vis_cluster_results.py
...ery-microservice/app/visualization/vis_cluster_results.py
+314
-0
vis_execution_time.py
...very-microservice/app/visualization/vis_execution_time.py
+34
-0
visualize_time_slices.py
...y-microservice/app/visualization/visualize_time_slices.py
+64
-0
No files found.
src/data-hub/community-detection-microservice/app/visualization/vis_cluster_results.py
0 → 100644
View file @
931efc95
# clustering of generated nodes
import
sys
import
os
import
requests
import
json
modules_path
=
'./'
if
os
.
path
.
exists
(
modules_path
):
sys
.
path
.
insert
(
1
,
modules_path
)
import
matplotlib.pyplot
as
plt
import
sklearn.datasets
import
numpy
as
np
from
processing.clustering.clusterer
import
Clusterer
# parameters for data generation
N_SAMPLES
=
1000
N_FEATURES
=
2
N_CENTERS
=
3
STD_DEVIATION
=
1.0
def
show_generated_data
(
ax
,
nodes
,
labels
):
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
N_CENTERS
))
colors
=
[
distinct_colors
[
label
]
for
label
in
labels
]
ax
.
set_title
(
'Generated Dataset'
)
ax
.
set_xlabel
(
'Feature 1'
)
ax
.
set_ylabel
(
'Feature 2'
)
ax
.
scatter
(
nodes
[:,
0
],
nodes
[:,
1
],
c
=
colors
)
def
show_clustering_result
(
ax
,
min_pts
,
clusters
:
dict
):
labels
=
clusters
.
keys
()
# flatten values in dict
nodes
=
[
node
for
subset
in
clusters
.
values
()
for
node
in
subset
]
if
-
1
in
labels
:
# clustering contains noise, add them in black
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
len
(
set
(
labels
))
-
1
))
distinct_colors
=
np
.
append
(
distinct_colors
,
[[
0
,
0
,
0
,
1
]],
axis
=
0
)
else
:
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
len
(
set
(
labels
))))
colors
=
[
distinct_colors
[
node
[
'cluster_label'
]]
for
node
in
nodes
]
ax
.
set_title
(
f
'Clustering Result with MinPts={min_pts}'
)
ax
.
set_xlabel
(
'Total_Demand_MWh'
)
ax
.
set_ylabel
(
'Customer'
)
ax
.
scatter
(
[
n
[
'Total_Demand_MWh'
]
for
n
in
nodes
],
[
n
[
'Customer'
]
for
n
in
nodes
],
c
=
colors
)
def
show_clusteringSingleFeature_result
(
ax
,
min_pts
,
clusters
:
dict
):
labels
=
clusters
.
keys
()
# flatten values in dict
nodes
=
[
node
for
subset
in
clusters
.
values
()
for
node
in
subset
]
if
-
1
in
labels
:
# clustering contains noise, add them in black
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
len
(
set
(
labels
))
-
1
))
distinct_colors
=
np
.
append
(
distinct_colors
,
[[
0
,
0
,
0
,
1
]],
axis
=
0
)
else
:
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
len
(
set
(
labels
))))
colors
=
[
distinct_colors
[
node
[
'cluster_label'
]]
for
node
in
nodes
]
ax
.
set_title
(
f
'Clustering Result with MinPts={min_pts}'
)
ax
.
set_xlabel
(
'Total_Demand_MWh'
)
ax
.
set_ylabel
(
'Customer'
)
ax
.
scatter
(
[
n
[
'Total_Demand_MWh'
]
for
n
in
nodes
],
[
1
for
n
in
nodes
],
c
=
colors
)
def
run_clustering
(
min_points
,
dataset
):
clusterer
=
Clusterer
(
min_points
=
min_points
)
return
clusterer
.
cluster_dataset
(
dataset
=
dataset
,
features
=
[
'Total_Demand_MWh'
,
'Customer'
]
)
def
run_clustering_SingleFeature
(
min_points
,
dataset
):
clusterer
=
Clusterer
(
min_points
=
min_points
)
return
clusterer
.
cluster_dataset
(
dataset
=
dataset
,
features
=
[
'Total_Demand_MWh'
]
)
# res: Dict[Any, ClusterResult] = clusterer.cluster_dataset(
# nodes,
# layer.properties
# )
if
__name__
==
'__main__'
:
fig
,
((
ax1
,
ax2
),
(
ax3
,
ax4
))
=
plt
.
subplots
(
2
,
2
)
fig
.
tight_layout
(
pad
=
3.0
)
nodes
,
labels
=
sklearn
.
datasets
.
make_blobs
(
n_samples
=
N_SAMPLES
,
n_features
=
N_FEATURES
,
centers
=
N_CENTERS
,
cluster_std
=
STD_DEVIATION
)
# nodes = np.multiply(nodes, .1)
#get nodes from swagger
#r.request (link)
#nodes = blahb lbah
#TODO get a list of values (total demand)
#nodes =
#USELESS NOW
#show_generated_data(ax1, nodes, labels)
#dataset = [{'1':n[0], '2':n[1]} for n in nodes]
JWT_TOKEN
=
"eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMC0xMC0yMCAxNDoyNDoxMi45MzI3OTAiLCJ2YWxpZF91bnRpbCI6IjIwMjAtMTAtMjEgMTQ6MjQ6MTIuOTMyNzkwIn0.qzaDauyEA4pAnw8K8ik6jTtbEOY24q159GDYbvByaJ4"
#r = requests.get('https://articonf1.itec.aau.at:30103/api/paper/paper/layers/Demand_Layer/nodes', timeout=15)
r
=
requests
.
get
(
url
=
'https://articonf1.itec.aau.at:30103/api/paper/paper/layers/Demand_Layer/nodes'
,
timeout
=
15
,
headers
=
{
"Authorization"
:
f
"Bearer {JWT_TOKEN}"
},
verify
=
False
# ignore ssl error)
)
#TODO NEED PERMISSION HOW DO I GET PERMISSION
print
(
"Downloaded JSON"
)
inputSimListOfDict
=
json
.
loads
(
r
.
content
)
# {
# "Customer": "13",
# "Postcode": "2261",
# "Timestamp": "2012-07-02 09:00:00",
# "Total_Demand_MWh": "10513.24",
# "UniqueID": "f5a4eb614bf3d794211970c65365aeeec7afe6750b7623e3de4d174f9ef0d6e1",
# "layer_name": "Demand_Layer",
# "use_case": "paper",
# "use_case_table": "paper"
# }
dataset
=
[]
skippedCounter
=
0
for
entry
in
inputSimListOfDict
:
newDict
=
{}
try
:
newDict
[
"Total_Demand_MWh"
]
=
float
(
entry
[
"Total_Demand_MWh"
])
newDict
[
"Customer"
]
=
float
(
entry
[
"Customer"
])
dataset
.
append
(
newDict
)
except
:
skippedCounter
+=
1
#print ("Warning: Skipped Bad formated Node")
print
(
"Warning: Skipped "
+
str
(
skippedCounter
)
+
" Badly formated nodes"
)
print
(
"Started 1st Clustering"
)
clusters
=
run_clustering_SingleFeature
(
25
,
dataset
)
show_clusteringSingleFeature_result
(
ax1
,
25
,
clusters
)
print
(
"Started 2nd Clustering"
)
clusters
=
run_clustering_SingleFeature
(
50
,
dataset
)
show_clusteringSingleFeature_result
(
ax2
,
50
,
clusters
)
print
(
"Started 3rd Clustering"
)
clusters
=
run_clustering_SingleFeature
(
100
,
dataset
)
show_clusteringSingleFeature_result
(
ax3
,
100
,
clusters
)
print
(
"Started 4th Clustering"
)
clusters
=
run_clustering_SingleFeature
(
300
,
dataset
)
show_clusteringSingleFeature_result
(
ax4
,
300
,
clusters
)
# print("Started 1st Clustering")
# clusters = run_clustering(5, dataset)
# show_clustering_result(ax1, 5, clusters)
# print("Started 2nd Clustering")
# clusters = run_clustering(10, dataset)
# show_clustering_result(ax2, 10, clusters)
# print("Started 3rd Clustering")
# clusters = run_clustering(15, dataset)
# show_clustering_result(ax3, 15, clusters)
# print("Started 4th Clustering")
# clusters = run_clustering(25, dataset)
# show_clustering_result(ax4, 25, clusters)
plt
.
show
()
print
(
"#FINISH"
)
\ No newline at end of file
src/data-hub/community-detection-microservice/app/visualization/vis_execution_time.py
0 → 100644
View file @
931efc95
import
matplotlib.pyplot
as
plt
# clustering
times
=
[[
1000
,
0.9823
,
1.0420
,
0.9656
],
[
5000
,
7.8716
,
8.8916
,
8.2609
],
[
10000
,
24.7394
,
29.0521
,
24.3734
],
[
20000
,
86.0519
,
104.0453
,
85.4891
],
[
50000
,
489.4964
,
574.7641
,
468.8706
]]
# slicing
times2
=
[[
1000
,
0.010159840000000031
,
0.008385740000001363
,
0.008584839999997484
],
[
5000
,
0.044350359999999256
,
0.04146890000000099
,
0.04291390000000206
],
[
10000
,
0.07776566000000074
,
0.07954154000000102
,
0.07955803999999489
],
[
20000
,
0.15964476000000047
,
0.16679267999999894
,
0.15759418000000097
],
[
50000
,
0.4081138799999998
,
0.4278634399999987
,
0.41363941999999554
]]
n
=
[
t
[
0
]
for
t
in
times
]
finished
=
[
t
[
1
]
for
t
in
times
]
dest
=
[
t
[
2
]
for
t
in
times
]
price
=
[
t
[
3
]
for
t
in
times
]
# print(f"{t[0]}: {t[1]} {t[2]} {t[3]}")
fig
,
ax
=
plt
.
subplots
()
ax
.
set_title
(
'Execution Time for Clustering'
)
ax
.
set_xlabel
(
'Number of Nodes'
)
ax
.
set_ylabel
(
'Time in Seconds'
)
ax
.
plot
(
n
,
dest
,
label
=
'Destination'
)
ax
.
plot
(
n
,
finished
,
label
=
'Finished Time'
)
ax
.
plot
(
n
,
price
,
label
=
'Price'
)
ax
.
legend
()
plt
.
show
()
\ No newline at end of file
src/data-hub/community-detection-microservice/app/visualization/visualize_time_slices.py
0 → 100644
View file @
931efc95
import
sys
import
os
for
path
in
[
'../'
,
'./'
,
'../../../modules/'
]:
if
os
.
path
.
exists
(
path
):
sys
.
path
.
insert
(
1
,
path
)
import
matplotlib.pyplot
as
plt
from
db.repository
import
Repository
from
db.entities
import
TimeSlice
from
typing
import
List
def
plt_show_circles
(
time_slices
:
List
[
TimeSlice
],
cluster_no
):
cluster_no
=
str
(
cluster_no
)
for
slice_
in
time_slices
:
nodes
=
slice_
.
get_nodes_for_cluster
(
cluster_no
)
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt
.
title
(
str
(
slice_
.
time
))
plt
.
scatter
([
n
[
'Longitude_Destination'
]
if
'Longitude_Destination'
in
n
else
0
for
n
in
nodes
],
[
n
[
'Latitude_Destination'
]
if
'Latitude_Destination'
in
n
else
0
for
n
in
nodes
],
s
=
[
len
(
nodes
)
*
100
]
*
len
(
nodes
))
plt
.
pause
(
0.5
)
def
plt_show_bars
(
time_slices
:
List
[
TimeSlice
],
cluster_no
):
cluster_no
=
str
(
cluster_no
)
labels
=
[
ts
.
time
for
ts
in
time_slices
]
x_axis_label_stepsize
=
10
nodes_per_slice_for_single_cluster
=
\
[
len
(
time_slice
.
get_nodes_for_cluster
(
cluster_no
))
for
time_slice
in
time_slices
]
fig
,
ax
=
plt
.
subplots
()
ax
.
bar
(
x
=
range
(
len
(
labels
)),
height
=
nodes_per_slice_for_single_cluster
)
ax
.
set_ylabel
(
'Size'
)
ax
.
set_title
(
f
'Cluster-{cluster_no} size over time'
)
ax
.
set_xticks
(
range
(
len
(
labels
))[::
x_axis_label_stepsize
])
ax
.
set_xticklabels
(
labels
[::
x_axis_label_stepsize
])
plt
.
show
()
if
__name__
==
"__main__"
:
repo
=
Repository
()
time_slices
=
repo
.
get_time_slices_by_name
(
"Destination_Layer"
)
# chronological order
time_slices
.
sort
(
key
=
lambda
ts
:
eval
(
ts
.
time
))
print
(
len
(
time_slices
))
plt_show_bars
(
time_slices
,
cluster_no
=
0
)
\ No newline at end of file
src/data-hub/role-stage-discovery-microservice/app/visualisationPaper.py
View file @
931efc95
...
@@ -114,37 +114,43 @@ def mainViz():
...
@@ -114,37 +114,43 @@ def mainViz():
#TRY TO PLOT
#TRY TO PLOT
fig
,
axs
=
plt
.
subplots
(
1
,
5
,
sharex
=
True
)
#fig, axs = plt.subplots(, sharex = True)
fig
.
suptitle
(
'Choose A title??? '
)
plt
.
xlabel
(
'Eucledian Distance'
)
fig
.
text
(
0.5
,
0.04
,
'Euclidean Distance'
,
ha
=
'center'
,
va
=
'center'
)
plt
.
ylabel
(
'Nr. of Cluster combinations'
)
#fig.suptitle('')
#fig.text(0.5, 0.04, 'Euclidean Distance', ha='center', va='center')
list1
=
sorted
(
distributionSolar
.
items
())
list1
=
sorted
(
distributionSolar
.
items
())
x2
,
y2
=
zip
(
*
list1
)
x2
,
y2
=
zip
(
*
list1
)
axs
[
0
]
.
bar
(
x2
,
y2
,
color
=
'purple'
,
label
=
"Solar"
,
width
=
0.2
)
plt
.
bar
(
x2
,
y2
,
color
=
'purple'
,
label
=
"Solar"
,
width
=
0.2
)
axs
[
0
]
.
legend
()
plt
.
legend
()
axs
[
0
]
.
set_title
(
'Solar'
)
plt
.
set_title
(
'Solar'
)
axs
[
0
]
.
set
(
ylabel
=
'Nr. of Similarity connections between two Clusters'
)
list1
=
sorted
(
distributionEnergy
.
items
())
# list1 = sorted(distributionEnergy.items())
x
,
y
=
zip
(
*
list1
)
# x,y = zip(*list1)
axs
[
1
]
.
bar
(
x
,
y
,
color
=
'blue'
,
label
=
"Energy"
,
width
=
0.2
)
# plt.bar(x, y, color='blue',label="Energy", width=0.2)
axs
[
1
]
.
legend
()
# plt.legend()
# plt.set_title('Energy')
list1
=
sorted
(
distributionHeating
.
items
())
x3
,
y3
=
zip
(
*
list1
)
# list1 = sorted(distributionHeating.items())
axs
[
2
]
.
bar
(
x3
,
y3
,
color
=
'red'
,
label
=
"Heating"
,
width
=
0.2
)
# x3,y3 = zip(*list1)
axs
[
2
]
.
legend
()
# plt.bar(x3,y3,color='red',label="Heating", width=0.2)
# plt.legend()
# plt.set_title('Heating')
list1
=
sorted
(
distributionPrice
.
items
())
x4
,
y4
=
zip
(
*
list1
)
axs
[
3
]
.
bar
(
x4
,
y4
,
color
=
'green'
,
label
=
"Price"
,
width
=
0.2
)
# list1 = sorted(distributionPrice.items())
axs
[
3
]
.
legend
()
# x4,y4 = zip(*list1)
# plt.bar(x4,y4,color='green',label="Price", width=0.2)
list1
=
sorted
(
distributionPosition
.
items
())
# plt.legend()
x5
,
y5
=
zip
(
*
list1
)
# plt.set_title('Price')
axs
[
4
]
.
bar
(
x5
,
y5
,
color
=
'grey'
,
label
=
"Location"
,
width
=
0.2
)
axs
[
4
]
.
legend
()
# list1 = sorted(distributionPosition.items())
# x5,y5 = zip(*list1)
# plt.bar(x5,y5,color='grey',label="Location", width=0.2)
# plt.legend()
# plt.set_title('Position')
...
...
src/data-hub/role-stage-discovery-microservice/app/visualization/vis_cluster_results.py
0 → 100644
View file @
931efc95
# clustering of generated nodes
import
sys
import
os
import
requests
import
json
modules_path
=
'./'
if
os
.
path
.
exists
(
modules_path
):
sys
.
path
.
insert
(
1
,
modules_path
)
import
matplotlib.pyplot
as
plt
import
sklearn.datasets
import
numpy
as
np
from
processing.clustering.clusterer
import
Clusterer
#from datascience import stats
# parameters for data generation
N_SAMPLES
=
1000
N_FEATURES
=
2
N_CENTERS
=
3
STD_DEVIATION
=
1.0
def
show_generated_data
(
ax
,
nodes
,
labels
):
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
N_CENTERS
))
colors
=
[
distinct_colors
[
label
]
for
label
in
labels
]
ax
.
set_title
(
'Generated Dataset'
)
ax
.
set_xlabel
(
'Feature 1'
)
ax
.
set_ylabel
(
'Feature 2'
)
ax
.
scatter
(
nodes
[:,
0
],
nodes
[:,
1
],
c
=
colors
)
def
show_clustering_result
(
ax
,
min_pts
,
clusters
:
dict
):
labels
=
clusters
.
keys
()
# flatten values in dict
#nodes = [node for subset in clusters.values() for node in subset]
# ^^^ bugged| replaced vvv
nodes
=
[]
for
lbl
in
labels
:
nodes
.
extend
(
clusters
[
lbl
]
.
nodes
)
if
-
1
in
labels
:
# clustering contains noise, add them in black
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
len
(
set
(
labels
))
-
1
))
distinct_colors
=
np
.
append
(
distinct_colors
,
[[
0
,
0
,
0
,
1
]],
axis
=
0
)
else
:
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
len
(
set
(
labels
))))
colors
=
[
distinct_colors
[
node
[
'cluster_label'
]]
for
node
in
nodes
]
ax
.
set_title
(
f
'Clustering Result with MinPts={min_pts}'
)
ax
.
set_xlabel
(
'Total_Demand_MWh'
)
ax
.
set_ylabel
(
'Customer'
)
ax
.
scatter
(
[
n
[
'Total_Demand_MWh'
]
for
n
in
nodes
],
[
n
[
'Customer'
]
for
n
in
nodes
],
c
=
colors
)
def
show_clusteringSingleFeature_result
(
ax
,
min_pts
,
clusters
:
dict
):
labels
=
clusters
.
keys
()
# flatten values in dict
#nodes = [node for subset in clusters.values() for node in subset]
# ^^^ bugged| replaced vvv
nodes
=
[]
for
lbl
in
labels
:
nodes
.
extend
(
clusters
[
lbl
]
.
nodes
)
if
-
1
in
labels
:
# clustering contains noise, add them in black
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
len
(
set
(
labels
))
-
1
))
distinct_colors
=
np
.
append
(
distinct_colors
,
[[
0
,
0
,
0
,
1
]],
axis
=
0
)
else
:
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
len
(
set
(
labels
))))
colors
=
[
distinct_colors
[
node
[
'cluster_label'
]]
for
node
in
nodes
]
ax
.
set_title
(
f
'Clustering Result with MinPts={min_pts}'
)
ax
.
set_xlabel
(
'Total_Demand_MWh'
)
ax
.
grid
(
True
,
axis
=
'x'
)
#ax.set_ylabel('Customer')
ax
.
scatter
(
[
n
[
'Total_Demand_MWh'
]
for
n
in
nodes
],
[
0
for
n
in
nodes
],
c
=
colors
)
def
show_clusteringSingleFeatureDensity_result
(
ax
,
min_pts
,
clusters
:
dict
):
labels
=
clusters
.
keys
()
# flatten values in dict
#nodes = [node for subset in clusters.values() for node in subset]
# ^^^ bugged| replaced vvv
nodes
=
[]
mydict
=
dict
()
for
lbl
in
labels
:
nodes
.
extend
(
clusters
[
lbl
]
.
nodes
)
for
n
in
nodes
:
#group nodes per clusters
if
not
checkKey
(
mydict
,
n
[
'cluster_label'
]):
mydict
[
n
[
'cluster_label'
]]
=
[]
mydict
[
n
[
'cluster_label'
]]
.
append
(
n
[
'Total_Demand_MWh'
])
if
-
1
in
labels
:
# clustering contains noise, add them in black
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
len
(
set
(
labels
))
-
1
))
distinct_colors
=
np
.
append
(
distinct_colors
,
[[
0
,
0
,
0
,
1
]],
axis
=
0
)
else
:
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
len
(
set
(
labels
))))
colors
=
[
distinct_colors
[
node
[
'cluster_label'
]]
for
node
in
nodes
]
for
cLabel
,
cValue
in
mydict
.
items
():
cluster_label
=
int
(
cLabel
)
if
cluster_label
!=
-
1
:
bp
=
ax
.
boxplot
(
cValue
,
positions
=
[
0
],
vert
=
False
,
widths
=
0.15
)
plt
.
setp
(
bp
[
'boxes'
][
0
],
color
=
distinct_colors
[
cluster_label
])
plt
.
setp
(
bp
[
'caps'
][
0
],
color
=
distinct_colors
[
cluster_label
])
plt
.
setp
(
bp
[
'caps'
][
1
],
color
=
distinct_colors
[
cluster_label
])
plt
.
setp
(
bp
[
'whiskers'
][
0
],
color
=
distinct_colors
[
cluster_label
])
plt
.
setp
(
bp
[
'whiskers'
][
1
],
color
=
distinct_colors
[
cluster_label
])
try
:
plt
.
setp
(
bp
[
'fliers'
][
0
],
color
=
distinct_colors
[
cluster_label
])
except
:
print
(
''
)
try
:
plt
.
setp
(
bp
[
'fliers'
][
1
],
color
=
distinct_colors
[
cluster_label
])
except
:
print
(
''
)
plt
.
setp
(
bp
[
'medians'
][
0
],
color
=
distinct_colors
[
cluster_label
])
ax
.
set_title
(
f
'Clustering Result with MinPts={min_pts}'
)
ax
.
set_xlabel
(
'Total_Demand_MWh'
)
#ax.set_ylabel('Cluster Label')
ax
.
grid
(
True
,
axis
=
'x'
)
# ax.scatter( [n['Total_Demand_MWh'] for n in nodes],
# [0 for n in nodes],
# c=colors)
def
run_clustering
(
min_points
,
dataset
):
clusterer
=
Clusterer
(
min_points
=
min_points
)
return
clusterer
.
cluster_dataset
(
dataset
=
dataset
,
features
=
[
'Total_Demand_MWh'
,
'Customer'
]
)
def
run_clustering_SingleFeature
(
min_points
,
dataset
):
clusterer
=
Clusterer
(
min_points
=
min_points
)
return
clusterer
.
cluster_dataset
(
dataset
=
dataset
,
features
=
[
'Total_Demand_MWh'
]
)
# res: Dict[Any, ClusterResult] = clusterer.cluster_dataset(
# nodes,
# layer.properties
# )
def
checkKey
(
dict
,
key
):
if
key
in
dict
.
keys
():
#print("Present, ", end =" ")
#print(str(key)+ " : " + str(dict[key] ))
return
True
else
:
#print("Not present")
return
False
def
createDataset
(
inputDict
):
dataset
=
[]
skippedCounter
=
0
for
entry
in
inputSimListOfDict
:
newDict
=
{}
try
:
newDict
[
"Total_Demand_MWh"
]
=
float
(
entry
[
"Total_Demand_MWh"
])
newDict
[
"Customer"
]
=
float
(
entry
[
"Customer"
])
dataset
.
append
(
newDict
)
except
:
skippedCounter
+=
1
#print ("Warning: Skipped Bad formated Node")
print
(
"Warning: Skipped "
+
str
(
skippedCounter
)
+
" Badly formated nodes"
)
return
dataset
if
__name__
==
'__main__'
:
fig
,
((
ax1
,
ax2
),
(
ax3
,
ax4
))
=
plt
.
subplots
(
2
,
2
)
fig
.
tight_layout
(
pad
=
3.0
)
nodes
,
labels
=
sklearn
.
datasets
.
make_blobs
(
n_samples
=
N_SAMPLES
,
n_features
=
N_FEATURES
,
centers
=
N_CENTERS
,
cluster_std
=
STD_DEVIATION
)
# nodes = np.multiply(nodes, .1)
#get nodes from swagger
#r.request (link)
#nodes = blahb lbah
#TODO get a list of values (total demand)
#nodes =
#USELESS NOW
#show_generated_data(ax1, nodes, labels)
#dataset = [{'1':n[0], '2':n[1]} for n in nodes]
JWT_TOKEN
=
"eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMC0xMC0yMCAxNDoyNDoxMi45MzI3OTAiLCJ2YWxpZF91bnRpbCI6IjIwMjAtMTAtMjEgMTQ6MjQ6MTIuOTMyNzkwIn0.qzaDauyEA4pAnw8K8ik6jTtbEOY24q159GDYbvByaJ4"
#r = requests.get('https://articonf1.itec.aau.at:30103/api/paper/paper/layers/Demand_Layer/nodes', timeout=15)
r
=
requests
.
get
(
url
=
'https://articonf1.itec.aau.at:30103/api/paper/paper/layers/Demand_Layer/nodes'
,
timeout
=
15
,
headers
=
{
"Authorization"
:
f
"Bearer {JWT_TOKEN}"
},
verify
=
False
# ignore ssl error)
)
#TODO NEED PERMISSION HOW DO I GET PERMISSION
print
(
"Downloaded JSON"
)
inputSimListOfDict
=
json
.
loads
(
r
.
content
)
# {
# "Customer": "13",
# "Postcode": "2261",
# "Timestamp": "2012-07-02 09:00:00",
# "Total_Demand_MWh": "10513.24",
# "UniqueID": "f5a4eb614bf3d794211970c65365aeeec7afe6750b7623e3de4d174f9ef0d6e1",
# "layer_name": "Demand_Layer",
# "use_case": "paper",
# "use_case_table": "paper"
# }
# dataset = []
# skippedCounter=0
# for entry in inputSimListOfDict:
# newDict = {}
# try:
# newDict["Total_Demand_MWh"] = float(entry["Total_Demand_MWh"])
# newDict["Customer"] = float(entry["Customer"])
# dataset.append(newDict)
# except:
# skippedCounter+=1
# #print ("Warning: Skipped Bad formated Node")
# dataset = createDataset(inputSimListOfDict)
# print("Started TEST Clustering") #500 = 4 clusters
# clusters = run_clustering_SingleFeature(250, dataset)
# show_clusteringSingleFeatureDensity_result(ax1,250,clusters)
# dataset = createDataset(inputSimListOfDict)
# print("Started TEST Clustering") #500 = 4 clusters
# clusters = run_clustering_SingleFeature(500, dataset)
# show_clusteringSingleFeatureDensity_result(ax2,500,clusters)
# dataset = createDataset(inputSimListOfDict)
# print("Started TEST Clustering") #500 = 4 clusters
# clusters = run_clustering_SingleFeature(750, dataset)
# show_clusteringSingleFeatureDensity_result(ax3,750,clusters)
# dataset = createDataset(inputSimListOfDict)
# print("Started TEST Clustering") #500 = 4 clusters
# clusters = run_clustering_SingleFeature(1000, dataset)
# show_clusteringSingleFeatureDensity_result(ax4,1000,clusters)
print
(
"Started 1st Clustering"
)
dataset
=
createDataset
(
inputSimListOfDict
)
clusters
=
run_clustering_SingleFeature
(
50
,
dataset
)
show_clusteringSingleFeature_result
(
ax1
,
50
,
clusters
)
print
(
"Started 2nd Clustering"
)
dataset
=
createDataset
(
inputSimListOfDict
)
clusters
=
run_clustering_SingleFeature
(
100
,
dataset
)
show_clusteringSingleFeature_result
(
ax2
,
100
,
clusters
)
print
(
"Started 3rd Clustering"
)
dataset
=
createDataset
(
inputSimListOfDict
)
clusters
=
run_clustering_SingleFeature
(
150
,
dataset
)
show_clusteringSingleFeature_result
(
ax3
,
150
,
clusters
)
print
(
"Started 4th Clustering"
)
dataset
=
createDataset
(
inputSimListOfDict
)
clusters
=
run_clustering_SingleFeature
(
250
,
dataset
)
show_clusteringSingleFeature_result
(
ax4
,
250
,
clusters
)
# dataset = createDataset(inputSimListOfDict)
# print("Started 1st Clustering")
# clusters = run_clustering(10, dataset)
# show_clustering_result(ax1, 10, clusters)
# dataset = createDataset(inputSimListOfDict)
# print("Started 2nd Clustering")
# clusters = run_clustering(15, dataset)
# show_clustering_result(ax2, 15, clusters)
# dataset = createDataset(inputSimListOfDict)
# print("Started 3rd Clustering")
# clusters = run_clustering(25, dataset)
# show_clustering_result(ax3, 25, clusters)
# dataset = createDataset(inputSimListOfDict)
# print("Started 4th Clustering")
# clusters = run_clustering(50, dataset)
# show_clustering_result(ax4, 50, clusters)
#agePhysics = [ 25, 31, 31, 31, 12,28,29,31,33,34,35,36,34,39,40,41,48 ]
# basic plot
#plt.boxplot(agePhysics)
#plt.boxplot(agePhysics, showmeans=True)
plt
.
show
()
print
(
"#FINISH"
)
\ No newline at end of file
src/data-hub/role-stage-discovery-microservice/app/visualization/vis_execution_time.py
0 → 100644
View file @
931efc95
import
matplotlib.pyplot
as
plt
# clustering
times
=
[[
1000
,
0.9823
,
1.0420
,
0.9656
],
[
5000
,
7.8716
,
8.8916
,
8.2609
],
[
10000
,
24.7394
,
29.0521
,
24.3734
],
[
20000
,
86.0519
,
104.0453
,
85.4891
],
[
50000
,
489.4964
,
574.7641
,
468.8706
]]
# slicing
times2
=
[[
1000
,
0.010159840000000031
,
0.008385740000001363
,
0.008584839999997484
],
[
5000
,
0.044350359999999256
,
0.04146890000000099
,
0.04291390000000206
],
[
10000
,
0.07776566000000074
,
0.07954154000000102
,
0.07955803999999489
],
[
20000
,
0.15964476000000047
,
0.16679267999999894
,
0.15759418000000097
],
[
50000
,
0.4081138799999998
,
0.4278634399999987
,
0.41363941999999554
]]
n
=
[
t
[
0
]
for
t
in
times
]
finished
=
[
t
[
1
]
for
t
in
times
]
dest
=
[
t
[
2
]
for
t
in
times
]
price
=
[
t
[
3
]
for
t
in
times
]
# print(f"{t[0]}: {t[1]} {t[2]} {t[3]}")
fig
,
ax
=
plt
.
subplots
()
ax
.
set_title
(
'Execution Time for Clustering'
)
ax
.
set_xlabel
(
'Number of Nodes'
)
ax
.
set_ylabel
(
'Time in Seconds'
)
ax
.
plot
(
n
,
dest
,
label
=
'Destination'
)
ax
.
plot
(
n
,
finished
,
label
=
'Finished Time'
)
ax
.
plot
(
n
,
price
,
label
=
'Price'
)
ax
.
legend
()
plt
.
show
()
\ No newline at end of file
src/data-hub/role-stage-discovery-microservice/app/visualization/visualize_time_slices.py
0 → 100644
View file @
931efc95
import
sys
import
os
for
path
in
[
'../'
,
'./'
,
'../../../modules/'
]:
if
os
.
path
.
exists
(
path
):
sys
.
path
.
insert
(
1
,
path
)
import
matplotlib.pyplot
as
plt
from
db.repository
import
Repository
from
db.entities
import
TimeSlice
from
typing
import
List
def
plt_show_circles
(
time_slices
:
List
[
TimeSlice
],
cluster_no
):
cluster_no
=
str
(
cluster_no
)
for
slice_
in
time_slices
:
nodes
=
slice_
.
get_nodes_for_cluster
(
cluster_no
)
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt
.
title
(
str
(
slice_
.
time
))
plt
.
scatter
([
n
[
'Longitude_Destination'
]
if
'Longitude_Destination'
in
n
else
0
for
n
in
nodes
],
[
n
[
'Latitude_Destination'
]
if
'Latitude_Destination'
in
n
else
0
for
n
in
nodes
],
s
=
[
len
(
nodes
)
*
100
]
*
len
(
nodes
))
plt
.
pause
(
0.5
)
def
plt_show_bars
(
time_slices
:
List
[
TimeSlice
],
cluster_no
):
cluster_no
=
str
(
cluster_no
)
labels
=
[
ts
.
time
for
ts
in
time_slices
]
x_axis_label_stepsize
=
10
nodes_per_slice_for_single_cluster
=
\
[
len
(
time_slice
.
get_nodes_for_cluster
(
cluster_no
))
for
time_slice
in
time_slices
]
fig
,
ax
=
plt
.
subplots
()
ax
.
bar
(
x
=
range
(
len
(
labels
)),
height
=
nodes_per_slice_for_single_cluster
)
ax
.
set_ylabel
(
'Size'
)
ax
.
set_title
(
f
'Cluster-{cluster_no} size over time'
)
ax
.
set_xticks
(
range
(
len
(
labels
))[::
x_axis_label_stepsize
])
ax
.
set_xticklabels
(
labels
[::
x_axis_label_stepsize
])
plt
.
show
()
if
__name__
==
"__main__"
:
repo
=
Repository
()
time_slices
=
repo
.
get_time_slices_by_name
(
"Destination_Layer"
)
# chronological order
time_slices
.
sort
(
key
=
lambda
ts
:
eval
(
ts
.
time
))
print
(
len
(
time_slices
))
plt_show_bars
(
time_slices
,
cluster_no
=
0
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment