Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
9885bbf7
Commit
9885bbf7
authored
Apr 22, 2020
by
Alexander Lercher
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Extracted visualization from slicing
parent
e7061d7f
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
103 additions
and
80 deletions
+103
-80
timeslice.py
...unity-detection-microservice/app/db/entities/timeslice.py
+6
-3
run_time_slicing.py
.../community-detection-microservice/app/run_time_slicing.py
+33
-77
visualize_time_slices.py
...n-microservice/app/visualization/visualize_time_slices.py
+64
-0
No files found.
src/data-hub/community-detection-microservice/app/db/entities/timeslice.py
View file @
9885bbf7
...
@@ -16,19 +16,22 @@ class TimeSlice:
...
@@ -16,19 +16,22 @@ class TimeSlice:
time_slice_dict
:
Dict
=
None
,
from_db
=
False
):
time_slice_dict
:
Dict
=
None
,
from_db
=
False
):
self
.
time
=
str
(
time
)
self
.
time
=
str
(
time
)
self
.
layer_name
=
layer_name
self
.
layer_name
=
layer_name
self
.
clusters
:
Dict
[
int
,
List
[
Node
]]
=
{}
self
.
clusters
:
Dict
[
str
,
List
[
Node
]]
=
{}
if
time_slice_dict
is
not
None
:
if
time_slice_dict
is
not
None
:
self
.
from_serializable_dict
(
time_slice_dict
,
from_db
)
self
.
from_serializable_dict
(
time_slice_dict
,
from_db
)
def
add_node_to_cluster
(
self
,
cluster_label
:
int
,
node
):
def
add_node_to_cluster
(
self
,
cluster_label
:
str
,
node
):
# only string keys can be stored in json
cluster_label
=
str
(
cluster_label
)
if
cluster_label
not
in
self
.
clusters
:
if
cluster_label
not
in
self
.
clusters
:
self
.
clusters
[
cluster_label
]
=
[]
self
.
clusters
[
cluster_label
]
=
[]
node
=
self
.
_get_unique_id
(
node
)
node
=
self
.
_get_unique_id
(
node
)
self
.
clusters
[
cluster_label
]
.
append
(
node
)
self
.
clusters
[
cluster_label
]
.
append
(
node
)
def
get_nodes_for_cluster
(
self
,
cluster_label
:
int
):
def
get_nodes_for_cluster
(
self
,
cluster_label
:
str
):
if
cluster_label
in
self
.
clusters
:
if
cluster_label
in
self
.
clusters
:
return
self
.
clusters
[
cluster_label
]
return
self
.
clusters
[
cluster_label
]
else
:
else
:
...
...
src/data-hub/community-detection-microservice/app/run_time_slicing.py
View file @
9885bbf7
...
@@ -6,96 +6,52 @@ if os.path.exists(modules_path):
...
@@ -6,96 +6,52 @@ if os.path.exists(modules_path):
import
json
import
json
from
datetime
import
datetime
,
date
from
datetime
import
datetime
,
date
import
matplotlib.pyplot
as
plt
from
db.repository
import
Repository
from
db.repository
import
Repository
from
db.entities.timeslice
import
TimeSlice
from
db.entities.timeslice
import
TimeSlice
from
db.entities
import
ClusterSet
from
db.entities
import
ClusterSet
from
typing
import
Tuple
,
Dict
from
typing
import
Tuple
,
Dict
,
Any
repo
=
Repository
()
def
convert_to_time_slice_key
(
timestamp
:
str
)
->
Tuple
[
int
,
int
]:
def
convert_to_time_slice_key
(
timestamp
:
str
)
->
Tuple
[
int
,
int
]:
'''Returns the tuple (year, week_of_year) from a timestamp.'''
'''Returns the tuple (year, week_of_year) from a timestamp.
This is used as the key for the slicing.
'''
timestamp
=
datetime
.
fromtimestamp
(
float
(
timestamp
[
0
:
10
]))
timestamp
=
datetime
.
fromtimestamp
(
float
(
timestamp
[
0
:
10
]))
(
y
,
w
,
_
)
=
timestamp
.
isocalendar
()
(
y
,
w
,
_
)
=
timestamp
.
isocalendar
()
return
(
y
,
w
)
return
(
y
,
w
)
def
get_clusterset
():
def
split_clustersets_by_time
(
clustersets
)
->
Dict
[
Any
,
TimeSlice
]:
clusterset
=
repo
.
get_clusterset
(
'Destination_Layer'
)
'''
# with open('clustering_results/optics/clusterset_Destination_Layer.txt') as file:
Partitions all nodes of each clusterset into idividual time slices based on their timestamp. The information about the cluster is kept.
# clusterset = ClusterSet(cluster_set_dict=json.loads(file.read()))
return
clusterset
def
plt_show_circles
(
keys
,
time_slices
,
cluster_no
):
for
k
in
keys
:
slice_
=
time_slices
[
k
]
if
cluster_no
in
slice_
.
nodes
:
nodes
=
slice_
.
nodes
[
cluster_no
]
else
:
nodes
=
[]
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt
.
title
(
str
(
k
))
plt
.
scatter
([
n
[
'Longitude_Destination'
]
for
n
in
nodes
],
[
n
[
'Latitude_Destination'
]
for
n
in
nodes
],
s
=
[
len
(
nodes
)
*
100
]
*
len
(
nodes
))
plt
.
pause
(
0.5
)
def
plt_show_bars
(
keys
,
time_slices
,
cluster_no
):
x_axis_label_stepsize
=
10
nodes_per_slice_for_single_cluster
=
\
[
len
(
time_slices
[
k
]
.
get_nodes_for_cluster
(
cluster_no
))
for
k
in
keys
]
fig
,
ax
=
plt
.
subplots
()
:params clustersets: The clustersets whichs nodes are split
ax
.
bar
(
x
=
range
(
len
(
keys
)),
:returns: A dict of time slices where the key is the time info and value is the information about the time slice
height
=
nodes_per_slice_for_single_cluster
)
'''
cnt
=
0
ax
.
set_ylabel
(
'Size'
)
time_slices
:
Dict
[
Any
,
TimeSlice
]
=
{}
ax
.
set_title
(
f
'Cluster-{cluster_no} size over time'
)
for
clusterset
in
clustersets
:
ax
.
set_xticks
(
range
(
len
(
keys
))[::
x_axis_label_stepsize
])
for
cluster_no
in
clusterset
.
clusters
:
ax
.
set_xticklabels
(
keys
[::
x_axis_label_stepsize
])
plt
.
show
()
clusterset
=
get_clusterset
()
cnt
=
0
time_slices
=
{}
# for clusterset in clustersets:
for
cluster_no
in
clusterset
.
clusters
:
for
node
in
cluster_no
.
nodes
:
for
node
in
cluster_no
.
nodes
:
# assign the nodes to time slices and recreate the clusters there
# assign the nodes to time slices and recreate the clusters there
# TODO use start and end time for assignment
time_key
=
convert_to_time_slice_key
(
str
(
node
[
'Finished_time'
]))
time_key
=
convert_to_time_slice_key
(
str
(
node
[
'Finished_time'
]))
if
time_key
not
in
time_slices
:
if
time_key
not
in
time_slices
:
time_slices
[
time_key
]
=
TimeSlice
(
time_key
,
clusterset
.
layer_name
)
time_slices
[
time_key
]
=
TimeSlice
(
time_key
,
clusterset
.
layer_name
)
time_slices
[
time_key
]
.
add_node_to_cluster
(
cluster_no
.
cluster_label
,
node
)
time_slices
[
time_key
]
.
add_node_to_cluster
(
cluster_no
.
cluster_label
,
node
)
return
time_slices
# sort chronologically
if
__name__
==
"__main__"
:
keys
=
list
(
time_slices
.
keys
())
repo
=
Repository
()
keys
.
sort
()
clustersets
=
[
repo
.
get_clusterset
(
'Destination_Layer'
)]
time_slices
=
split_clustersets_by_time
(
clustersets
)
repo
.
remove_all_time_slices
()
# sort chronologically
for
k
,
v
in
time_slices
.
items
():
keys
=
list
(
time_slices
.
keys
())
repo
.
add_time_slice
(
v
)
keys
.
sort
(
)
repo
.
remove_all_time_slices
()
print
(
len
(
time_slices
))
for
k
,
v
in
time_slices
.
items
():
plt_show_bars
(
keys
,
time_slices
,
cluster_no
=
0
)
repo
.
add_time_slice
(
v
)
src/data-hub/community-detection-microservice/app/visualization/visualize_time_slices.py
0 → 100644
View file @
9885bbf7
import
sys
import
os
for
path
in
[
'../'
,
'./'
,
'../../../modules/'
]:
if
os
.
path
.
exists
(
path
):
sys
.
path
.
insert
(
1
,
path
)
import
matplotlib.pyplot
as
plt
from
db.repository
import
Repository
from
db.entities
import
TimeSlice
from
typing
import
List
def
plt_show_circles
(
time_slices
:
List
[
TimeSlice
],
cluster_no
):
cluster_no
=
str
(
cluster_no
)
for
slice_
in
time_slices
:
nodes
=
slice_
.
get_nodes_for_cluster
(
cluster_no
)
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt
.
title
(
str
(
slice_
.
time
))
plt
.
scatter
([
n
[
'Longitude_Destination'
]
if
'Longitude_Destination'
in
n
else
0
for
n
in
nodes
],
[
n
[
'Latitude_Destination'
]
if
'Latitude_Destination'
in
n
else
0
for
n
in
nodes
],
s
=
[
len
(
nodes
)
*
100
]
*
len
(
nodes
))
plt
.
pause
(
0.5
)
def
plt_show_bars
(
time_slices
:
List
[
TimeSlice
],
cluster_no
):
cluster_no
=
str
(
cluster_no
)
labels
=
[
ts
.
time
for
ts
in
time_slices
]
x_axis_label_stepsize
=
10
nodes_per_slice_for_single_cluster
=
\
[
len
(
time_slice
.
get_nodes_for_cluster
(
cluster_no
))
for
time_slice
in
time_slices
]
fig
,
ax
=
plt
.
subplots
()
ax
.
bar
(
x
=
range
(
len
(
labels
)),
height
=
nodes_per_slice_for_single_cluster
)
ax
.
set_ylabel
(
'Size'
)
ax
.
set_title
(
f
'Cluster-{cluster_no} size over time'
)
ax
.
set_xticks
(
range
(
len
(
labels
))[::
x_axis_label_stepsize
])
ax
.
set_xticklabels
(
labels
[::
x_axis_label_stepsize
])
plt
.
show
()
if
__name__
==
"__main__"
:
repo
=
Repository
()
time_slices
=
repo
.
get_time_slices_by_name
(
"Destination_Layer"
)
# chronological order
time_slices
.
sort
(
key
=
lambda
ts
:
eval
(
ts
.
time
))
print
(
len
(
time_slices
))
plt_show_bars
(
time_slices
,
cluster_no
=
0
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment