Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
9885bbf7
Commit
9885bbf7
authored
Apr 22, 2020
by
Alexander Lercher
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Extracted visualization from slicing
parent
e7061d7f
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
103 additions
and
80 deletions
+103
-80
timeslice.py
...unity-detection-microservice/app/db/entities/timeslice.py
+6
-3
run_time_slicing.py
.../community-detection-microservice/app/run_time_slicing.py
+33
-77
visualize_time_slices.py
...n-microservice/app/visualization/visualize_time_slices.py
+64
-0
No files found.
src/data-hub/community-detection-microservice/app/db/entities/timeslice.py
View file @
9885bbf7
...
...
@@ -16,19 +16,22 @@ class TimeSlice:
time_slice_dict
:
Dict
=
None
,
from_db
=
False
):
self
.
time
=
str
(
time
)
self
.
layer_name
=
layer_name
self
.
clusters
:
Dict
[
int
,
List
[
Node
]]
=
{}
self
.
clusters
:
Dict
[
str
,
List
[
Node
]]
=
{}
if
time_slice_dict
is
not
None
:
self
.
from_serializable_dict
(
time_slice_dict
,
from_db
)
def
add_node_to_cluster
(
self
,
cluster_label
:
int
,
node
):
def
add_node_to_cluster
(
self
,
cluster_label
:
str
,
node
):
# only string keys can be stored in json
cluster_label
=
str
(
cluster_label
)
if
cluster_label
not
in
self
.
clusters
:
self
.
clusters
[
cluster_label
]
=
[]
node
=
self
.
_get_unique_id
(
node
)
self
.
clusters
[
cluster_label
]
.
append
(
node
)
def
get_nodes_for_cluster
(
self
,
cluster_label
:
int
):
def
get_nodes_for_cluster
(
self
,
cluster_label
:
str
):
if
cluster_label
in
self
.
clusters
:
return
self
.
clusters
[
cluster_label
]
else
:
...
...
src/data-hub/community-detection-microservice/app/run_time_slicing.py
View file @
9885bbf7
...
...
@@ -6,96 +6,52 @@ if os.path.exists(modules_path):
import
json
from
datetime
import
datetime
,
date
import
matplotlib.pyplot
as
plt
from
db.repository
import
Repository
from
db.entities.timeslice
import
TimeSlice
from
db.entities
import
ClusterSet
from
typing
import
Tuple
,
Dict
repo
=
Repository
()
from
typing
import
Tuple
,
Dict
,
Any
def
convert_to_time_slice_key
(
timestamp
:
str
)
->
Tuple
[
int
,
int
]:
'''Returns the tuple (year, week_of_year) from a timestamp.'''
'''Returns the tuple (year, week_of_year) from a timestamp.
This is used as the key for the slicing.
'''
timestamp
=
datetime
.
fromtimestamp
(
float
(
timestamp
[
0
:
10
]))
(
y
,
w
,
_
)
=
timestamp
.
isocalendar
()
return
(
y
,
w
)
def
get_clusterset
():
clusterset
=
repo
.
get_clusterset
(
'Destination_Layer'
)
# with open('clustering_results/optics/clusterset_Destination_Layer.txt') as file:
# clusterset = ClusterSet(cluster_set_dict=json.loads(file.read()))
return
clusterset
def
plt_show_circles
(
keys
,
time_slices
,
cluster_no
):
for
k
in
keys
:
slice_
=
time_slices
[
k
]
if
cluster_no
in
slice_
.
nodes
:
nodes
=
slice_
.
nodes
[
cluster_no
]
else
:
nodes
=
[]
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt
.
title
(
str
(
k
))
plt
.
scatter
([
n
[
'Longitude_Destination'
]
for
n
in
nodes
],
[
n
[
'Latitude_Destination'
]
for
n
in
nodes
],
s
=
[
len
(
nodes
)
*
100
]
*
len
(
nodes
))
plt
.
pause
(
0.5
)
def
plt_show_bars
(
keys
,
time_slices
,
cluster_no
):
x_axis_label_stepsize
=
10
nodes_per_slice_for_single_cluster
=
\
[
len
(
time_slices
[
k
]
.
get_nodes_for_cluster
(
cluster_no
))
for
k
in
keys
]
def
split_clustersets_by_time
(
clustersets
)
->
Dict
[
Any
,
TimeSlice
]:
'''
Partitions all nodes of each clusterset into idividual time slices based on their timestamp. The information about the cluster is kept.
fig
,
ax
=
plt
.
subplots
()
ax
.
bar
(
x
=
range
(
len
(
keys
)),
height
=
nodes_per_slice_for_single_cluster
)
ax
.
set_ylabel
(
'Size'
)
ax
.
set_title
(
f
'Cluster-{cluster_no} size over time'
)
ax
.
set_xticks
(
range
(
len
(
keys
))[::
x_axis_label_stepsize
])
ax
.
set_xticklabels
(
keys
[::
x_axis_label_stepsize
])
plt
.
show
()
clusterset
=
get_clusterset
()
cnt
=
0
time_slices
=
{}
# for clusterset in clustersets:
for
cluster_no
in
clusterset
.
clusters
:
:params clustersets: The clustersets whichs nodes are split
:returns: A dict of time slices where the key is the time info and value is the information about the time slice
'''
cnt
=
0
time_slices
:
Dict
[
Any
,
TimeSlice
]
=
{}
for
clusterset
in
clustersets
:
for
cluster_no
in
clusterset
.
clusters
:
for
node
in
cluster_no
.
nodes
:
# assign the nodes to time slices and recreate the clusters there
# TODO use start and end time for assignment
time_key
=
convert_to_time_slice_key
(
str
(
node
[
'Finished_time'
]))
if
time_key
not
in
time_slices
:
time_slices
[
time_key
]
=
TimeSlice
(
time_key
,
clusterset
.
layer_name
)
time_slices
[
time_key
]
.
add_node_to_cluster
(
cluster_no
.
cluster_label
,
node
)
return
time_slices
# sort chronologically
keys
=
list
(
time_slices
.
keys
())
keys
.
sort
()
if
__name__
==
"__main__"
:
repo
=
Repository
()
clustersets
=
[
repo
.
get_clusterset
(
'Destination_Layer'
)]
time_slices
=
split_clustersets_by_time
(
clustersets
)
repo
.
remove_all_time_slices
()
for
k
,
v
in
time_slices
.
items
():
repo
.
add_time_slice
(
v
)
# sort chronologically
keys
=
list
(
time_slices
.
keys
())
keys
.
sort
(
)
print
(
len
(
time_slices
))
plt_show_bars
(
keys
,
time_slices
,
cluster_no
=
0
)
repo
.
remove_all_time_slices
()
for
k
,
v
in
time_slices
.
items
():
repo
.
add_time_slice
(
v
)
src/data-hub/community-detection-microservice/app/visualization/visualize_time_slices.py
0 → 100644
View file @
9885bbf7
import
sys
import
os
for
path
in
[
'../'
,
'./'
,
'../../../modules/'
]:
if
os
.
path
.
exists
(
path
):
sys
.
path
.
insert
(
1
,
path
)
import
matplotlib.pyplot
as
plt
from
db.repository
import
Repository
from
db.entities
import
TimeSlice
from
typing
import
List
def
plt_show_circles
(
time_slices
:
List
[
TimeSlice
],
cluster_no
):
cluster_no
=
str
(
cluster_no
)
for
slice_
in
time_slices
:
nodes
=
slice_
.
get_nodes_for_cluster
(
cluster_no
)
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt
.
title
(
str
(
slice_
.
time
))
plt
.
scatter
([
n
[
'Longitude_Destination'
]
if
'Longitude_Destination'
in
n
else
0
for
n
in
nodes
],
[
n
[
'Latitude_Destination'
]
if
'Latitude_Destination'
in
n
else
0
for
n
in
nodes
],
s
=
[
len
(
nodes
)
*
100
]
*
len
(
nodes
))
plt
.
pause
(
0.5
)
def
plt_show_bars
(
time_slices
:
List
[
TimeSlice
],
cluster_no
):
cluster_no
=
str
(
cluster_no
)
labels
=
[
ts
.
time
for
ts
in
time_slices
]
x_axis_label_stepsize
=
10
nodes_per_slice_for_single_cluster
=
\
[
len
(
time_slice
.
get_nodes_for_cluster
(
cluster_no
))
for
time_slice
in
time_slices
]
fig
,
ax
=
plt
.
subplots
()
ax
.
bar
(
x
=
range
(
len
(
labels
)),
height
=
nodes_per_slice_for_single_cluster
)
ax
.
set_ylabel
(
'Size'
)
ax
.
set_title
(
f
'Cluster-{cluster_no} size over time'
)
ax
.
set_xticks
(
range
(
len
(
labels
))[::
x_axis_label_stepsize
])
ax
.
set_xticklabels
(
labels
[::
x_axis_label_stepsize
])
plt
.
show
()
if
__name__
==
"__main__"
:
repo
=
Repository
()
time_slices
=
repo
.
get_time_slices_by_name
(
"Destination_Layer"
)
# chronological order
time_slices
.
sort
(
key
=
lambda
ts
:
eval
(
ts
.
time
))
print
(
len
(
time_slices
))
plt_show_bars
(
time_slices
,
cluster_no
=
0
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment