Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
0e20ca32
Commit
0e20ca32
authored
Apr 16, 2020
by
Alexander Lercher
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Splitting clusters based on stages
currently weeks of the year
parent
0beaf5bb
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
188 additions
and
0 deletions
+188
-0
timeslice.py
...unity-detection-microservice/app/db/entities/timeslice.py
+48
-0
repository.py
...hub/community-detection-microservice/app/db/repository.py
+3
-0
run_time_slicing.py
.../community-detection-microservice/app/run_time_slicing.py
+137
-0
No files found.
src/data-hub/community-detection-microservice/app/db/entities/timeslice.py
0 → 100644
View file @
0e20ca32
import
json
from
typing
import
List
,
Dict
,
TypeVar
,
Any
from
datetime
import
date
,
datetime
Node
=
TypeVar
(
'Node'
)
class
TimeSlice
:
def
__init__
(
self
,
time
,
nodes
=
None
,
cluster_set_dict
:
Dict
=
None
,
from_db
=
False
):
self
.
time
=
time
self
.
nodes
:
Dict
[
int
,
List
[
Node
]]
=
{}
# if cluster_set_dict is not None:
# self.from_serializable_dict(cluster_set_dict, from_db)
def
add_node_to_cluster
(
self
,
cluster_label
,
node
):
if
cluster_label
not
in
self
.
nodes
:
self
.
nodes
[
cluster_label
]
=
[]
self
.
nodes
[
cluster_label
]
.
append
(
node
)
# todo
# def to_serializable_dict(self, for_db=False) -> Dict:
# serialized_dict_clusters = [cluster.to_serializable_dict(for_db)
# for cluster in self.clusters]
# return {
# "layer_name": self.layer_name,
# "clusters": json.dumps(serialized_dict_clusters) if for_db else serialized_dict_clusters
# }
# def from_serializable_dict(self, cluster_set_dict: Dict, from_db=False):
# self.layer_name = cluster_set_dict["layer_name"]
# serialized_dict_clusters = json.loads(cluster_set_dict["clusters"]) \
# if from_db else cluster_set_dict["clusters"]
# self.clusters = [Cluster(cluster_dict=cluster_dict, from_db=from_db)
# for cluster_dict in serialized_dict_clusters]
def
__repr__
(
self
):
return
self
.
__str__
()
# return {'time': self.time, "#nodes": len(self.nodes)}
# json.dumps(self.to_serializable_dict())
def
__str__
(
self
):
return
f
"TimeSlice({self.time}, {[len(v) for k, v in self.nodes.items()]})"
src/data-hub/community-detection-microservice/app/db/repository.py
View file @
0e20ca32
...
...
@@ -94,14 +94,17 @@ class Repository(MongoRepositoryBase):
super
()
.
insert_entry
(
self
.
_clusterset_collection
,
cluster_set
.
to_serializable_dict
())
def
get_clustersets
(
self
)
->
List
[
ClusterSet
]:
'''Returns all clustersets.'''
entries
=
super
()
.
get_entries
(
self
.
_clusterset_collection
)
return
[
ClusterSet
(
cluster_set_dict
=
e
)
for
e
in
entries
]
def
get_clusterset_names
(
self
)
->
List
[
str
]:
'''Returns the names of all clustersets.'''
entries
=
super
()
.
get_entries
(
self
.
_clusterset_collection
,
projection
=
{
'layer_name'
:
1
})
return
[
e
[
'layer_name'
]
for
e
in
entries
]
def
get_clusterset
(
self
,
layer_name
)
->
ClusterSet
:
'''Returns a single clusterset with the given name or None otherwise.'''
entries
=
super
()
.
get_entries
(
self
.
_clusterset_collection
,
selection
=
{
'layer_name'
:
layer_name
})
entries
=
[
ClusterSet
(
cluster_set_dict
=
e
)
for
e
in
entries
]
...
...
src/data-hub/community-detection-microservice/app/run_time_slicing.py
0 → 100644
View file @
0e20ca32
import
sys
import
os
modules_path
=
'../../../modules/'
if
os
.
path
.
exists
(
modules_path
):
sys
.
path
.
insert
(
1
,
modules_path
)
import
json
from
datetime
import
datetime
,
date
import
matplotlib.pyplot
as
plt
from
db.repository
import
Repository
from
db.entities.timeslice
import
TimeSlice
from
db.entities
import
ClusterSet
from
typing
import
Tuple
# repo = Repository()
def
convert_to_time_slice_key
(
timestamp
:
str
)
->
Tuple
[
int
,
int
]:
'''Returns the tuple (year, week_of_year) from a timestamp.'''
timestamp
=
datetime
.
fromtimestamp
(
float
(
timestamp
[
0
:
10
]))
(
y
,
w
,
_
)
=
timestamp
.
isocalendar
()
return
(
y
,
w
)
def
get_clusterset
():
# clusterset = repo.get_clusterset('Destination_Layer')
with
open
(
'clustering_results/optics/clusterset_Destination_Layer.txt'
)
as
file
:
clusterset
=
ClusterSet
(
cluster_set_dict
=
json
.
loads
(
file
.
read
()))
return
clusterset
clusterset
=
ClusterSet
(
cluster_set_dict
=
{
"clusters"
:
[{
"cluster_label"
:
0
,
"nodes"
:
[{
"Finished_time"
:
1579143634812589
,
"Latitude_Destination"
:
-
5.95081
,
"Longitude_Destination"
:
37.415281
,
"TravelID"
:
"5e57ec9159bc0668543f1568"
,
"TravelPrice"
:
19
,
"UniqueID"
:
"2696718d7a33ab3dbf28e9c88411afcfe9a933a45e57ec9159bc0668543f1568"
,
"UserID"
:
"2696718d7a33ab3dbf28e9c88411afcfe9a933a4"
,
"cluster_label"
:
0
},
{
"Finished_time"
:
1582709512112368
,
"Latitude_Destination"
:
-
5.95081
,
"Longitude_Destination"
:
37.415281
,
"TravelID"
:
"5e57ec9159bc0668543f15cf"
,
"TravelPrice"
:
16
,
"UniqueID"
:
"98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cf"
,
"UserID"
:
"98dcb2717ddae152d5b359c6ea97e4fe34a29d4c"
,
"cluster_label"
:
0
},
{
"Finished_time"
:
1582709512112367
,
"Latitude_Destination"
:
-
5.95081
,
"Longitude_Destination"
:
37.415281
,
"TravelID"
:
"5e57ec9159bc0668543f15cf"
,
"TravelPrice"
:
16
,
"UniqueID"
:
"98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cd"
,
"UserID"
:
"98dcb2717ddae152d5b359c6ea97e4fe34a29d4c"
,
"cluster_label"
:
0
}]
}],
"layer_name"
:
"Destination_Layer"
})
return
clusterset
def
plt_show_circles
(
keys
,
time_slices
,
cluster_no
):
for
k
in
keys
:
slice_
=
time_slices
[
k
]
if
cluster_no
in
slice_
.
nodes
:
nodes
=
slice_
.
nodes
[
cluster_no
]
else
:
nodes
=
[]
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt
.
title
(
str
(
k
))
plt
.
scatter
([
n
[
'Longitude_Destination'
]
for
n
in
nodes
],
[
n
[
'Latitude_Destination'
]
for
n
in
nodes
],
s
=
[
len
(
nodes
)
*
100
]
*
len
(
nodes
))
plt
.
pause
(
0.5
)
def
plt_show_bars
(
keys
,
time_slices
,
cluster_no
):
x_axis_label_stepsize
=
10
nodes_per_slice_for_single_cluster
=
\
[
len
(
time_slices
[
k
]
.
nodes
[
cluster_no
])
if
cluster_no
in
time_slices
[
k
]
.
nodes
else
0
for
k
in
keys
]
fig
,
ax
=
plt
.
subplots
()
ax
.
bar
(
x
=
range
(
len
(
keys
)),
height
=
nodes_per_slice_for_single_cluster
)
ax
.
set_ylabel
(
'Size'
)
ax
.
set_title
(
f
'Cluster-{cluster_no} size over time'
)
ax
.
set_xticks
(
range
(
len
(
keys
))[::
x_axis_label_stepsize
])
ax
.
set_xticklabels
(
keys
[::
x_axis_label_stepsize
])
plt
.
show
()
clusterset
=
get_clusterset
()
# print(clusterset.layer_name)
cnt
=
0
time_slices
=
{}
# for clusterset in clustersets:
for
cluster_no
in
clusterset
.
clusters
:
for
node
in
cluster_no
.
nodes
:
# assign the nodes to time slices and recreate the clusters there
time_key
=
convert_to_time_slice_key
(
str
(
node
[
'Finished_time'
]))
if
time_key
not
in
time_slices
:
time_slices
[
time_key
]
=
TimeSlice
(
time_key
)
time_slices
[
time_key
]
.
add_node_to_cluster
(
cluster_no
.
cluster_label
,
node
)
# sort chronologically
keys
=
list
(
time_slices
.
keys
())
keys
.
sort
()
plt_show_bars
(
keys
,
time_slices
,
cluster_no
=
20
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment