Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
bd4aa55b
Commit
bd4aa55b
authored
Apr 22, 2020
by
Alexander Lercher
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'feature/network-stages' into develop
parents
0e20ca32
c66bd0dd
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
261 additions
and
154 deletions
+261
-154
swagger.yml
.../community-detection-microservice/app/configs/swagger.yml
+65
-3
__init__.py
...munity-detection-microservice/app/db/entities/__init__.py
+1
-0
timeslice.py
...unity-detection-microservice/app/db/entities/timeslice.py
+54
-39
repository.py
...hub/community-detection-microservice/app/db/repository.py
+19
-0
clustersets.py
...ommunity-detection-microservice/app/routes/clustersets.py
+2
-2
timeslices.py
...community-detection-microservice/app/routes/timeslices.py
+19
-0
run_time_slicing.py
.../community-detection-microservice/app/run_time_slicing.py
+34
-110
visualize_time_slices.py
...n-microservice/app/visualization/visualize_time_slices.py
+64
-0
MongoRepositoryBase.py
src/modules/database/MongoRepositoryBase.py
+3
-0
No files found.
src/data-hub/community-detection-microservice/app/configs/swagger.yml
View file @
bd4aa55b
...
...
@@ -228,14 +228,14 @@ paths:
items
:
type
:
string
/clustersets/{name}
:
/clustersets/{
layer
name}
:
get
:
operationId
:
"
routes.clustersets.get_by_name"
tags
:
-
"
Clusters"
summary
:
"
Get
clusterset
for
layer-name"
parameters
:
-
name
:
"
name"
-
name
:
"
layer
name"
in
:
"
path"
description
:
"
Name
of
the
layer
to
return
the
clusterset
for"
required
:
true
...
...
@@ -262,6 +262,41 @@ paths:
schema
:
$ref
:
"
#/definitions/UserClusterGraphCollection"
# Time slices
/timeslices
:
get
:
operationId
:
"
routes.timeslices.get"
tags
:
-
"
Time
Slices"
summary
:
"
Get
all
time
slices
based
on
individual
layers
containing
clusters
with
nodes
for
that
time"
parameters
:
[]
responses
:
200
:
description
:
"
Successful
operation"
schema
:
$ref
:
"
#/definitions/TimeSliceCollection"
/timeslices/{layername}
:
get
:
operationId
:
"
routes.timeslices.get_by_name"
tags
:
-
"
Time
Slices"
summary
:
"
Get
all
time
slices
for
one
layer"
parameters
:
-
name
:
"
layername"
in
:
"
path"
description
:
"
Name
of
the
layer
to
return
the
time
slices
for"
required
:
true
type
:
"
string"
responses
:
200
:
description
:
"
Successful
operation"
schema
:
$ref
:
"
#/definitions/TimeSliceCollection"
404
:
description
:
"
No
time
slices
found
for
layername"
# Function Calls
/rfc/run
:
post
:
...
...
@@ -422,4 +457,31 @@ definitions:
ClusterSetCollection
:
type
:
array
items
:
$ref
:
"
#/definitions/ClusterSet"
\ No newline at end of file
$ref
:
"
#/definitions/ClusterSet"
TimeSlice
:
type
:
object
properties
:
time
:
type
:
object
example
:
"
(2020,
52)"
layer_name
:
type
:
string
clusters
:
type
:
object
additionalProperties
:
type
:
array
items
:
type
:
object
properties
:
UniqueID
:
type
:
string
example
:
"
0"
:
-
UniqueID
:
abc
-
UniqueID
:
def
TimeSliceCollection
:
type
:
array
items
:
$ref
:
"
#/definitions/TimeSlice"
\ No newline at end of file
src/data-hub/community-detection-microservice/app/db/entities/__init__.py
View file @
bd4aa55b
...
...
@@ -4,3 +4,4 @@ from db.entities.cluster import Cluster, LocationCluster, TimeCluster
from
db.entities.clusterset
import
ClusterSet
from
db.entities.user_cluster_graph
import
UserClusterGraph
from
db.entities.layer
import
Layer
from
db.entities.timeslice
import
TimeSlice
\ No newline at end of file
src/data-hub/community-detection-microservice/app/db/entities/timeslice.py
View file @
bd4aa55b
import
json
from
typing
import
List
,
Dict
,
TypeVar
,
Any
from
typing
import
List
,
Dict
,
NewType
,
Any
from
datetime
import
date
,
datetime
Node
=
TypeVar
(
'Node'
)
Node
=
NewType
(
'Node'
,
dict
)
class
TimeSlice
:
def
__init__
(
self
,
time
,
nodes
=
None
,
cluster_set_dict
:
Dict
=
None
,
from_db
=
False
):
self
.
time
=
time
self
.
nodes
:
Dict
[
int
,
List
[
Node
]]
=
{}
# if cluster_set_dict is not None:
# self.from_serializable_dict(cluster_set_dict, from_db)
def
add_node_to_cluster
(
self
,
cluster_label
,
node
):
if
cluster_label
not
in
self
.
nodes
:
self
.
nodes
[
cluster_label
]
=
[]
self
.
nodes
[
cluster_label
]
.
append
(
node
)
# todo
# def to_serializable_dict(self, for_db=False) -> Dict:
# serialized_dict_clusters = [cluster.to_serializable_dict(for_db)
# for cluster in self.clusters]
# return {
# "layer_name": self.layer_name,
# "clusters": json.dumps(serialized_dict_clusters) if for_db else serialized_dict_clusters
# }
# def from_serializable_dict(self, cluster_set_dict: Dict, from_db=False):
# self.layer_name = cluster_set_dict["layer_name"]
# serialized_dict_clusters = json.loads(cluster_set_dict["clusters"]) \
# if from_db else cluster_set_dict["clusters"]
# self.clusters = [Cluster(cluster_dict=cluster_dict, from_db=from_db)
# for cluster_dict in serialized_dict_clusters]
'''
A time slice for a single layer containing all nodes for that time.
:param time: The tag indicating the time
:param layer_name: The name of the layer the nodes belong to
'''
def
__init__
(
self
,
time
:
Any
,
layer_name
:
str
,
time_slice_dict
:
Dict
=
None
,
from_db
=
False
):
self
.
time
=
str
(
time
)
self
.
layer_name
=
layer_name
self
.
clusters
:
Dict
[
str
,
List
[
Node
]]
=
{}
if
time_slice_dict
is
not
None
:
self
.
from_serializable_dict
(
time_slice_dict
,
from_db
)
def
add_node_to_cluster
(
self
,
cluster_label
:
str
,
node
):
# only string keys can be stored in json
cluster_label
=
str
(
cluster_label
)
if
cluster_label
not
in
self
.
clusters
:
self
.
clusters
[
cluster_label
]
=
[]
node
=
self
.
_get_unique_id
(
node
)
self
.
clusters
[
cluster_label
]
.
append
(
node
)
def
get_nodes_for_cluster
(
self
,
cluster_label
:
str
):
if
cluster_label
in
self
.
clusters
:
return
self
.
clusters
[
cluster_label
]
else
:
return
[]
def
_get_unique_id
(
self
,
node
:
Dict
)
->
Dict
:
'''Returns a new dict with the unique id only.'''
uid_key
=
'UniqueID'
if
uid_key
in
node
:
return
{
uid_key
:
node
[
uid_key
]}
def
to_serializable_dict
(
self
,
for_db
=
False
)
->
Dict
:
return
{
"time"
:
self
.
time
,
'layer_name'
:
self
.
layer_name
,
"clusters"
:
json
.
dumps
(
self
.
clusters
)
if
for_db
else
self
.
clusters
}
def
from_serializable_dict
(
self
,
dict
:
Dict
,
from_db
=
False
):
self
.
time
=
dict
[
"time"
]
self
.
layer_name
=
dict
[
'layer_name'
]
self
.
clusters
=
json
.
loads
(
dict
[
'clusters'
])
if
from_db
else
dict
[
'clusters'
]
def
__repr__
(
self
):
return
self
.
__str__
()
# return {'time': self.time, "#nodes": len(self.nodes)}
# json.dumps(self.to_serializable_dict())
return
json
.
dumps
(
self
.
to_serializable_dict
())
def
__str__
(
self
):
return
f
"TimeSlice({self.
time}, {[len(v) for k, v in self.nodes.items()]
})"
return
f
"TimeSlice({self.
__repr__()
})"
src/data-hub/community-detection-microservice/app/db/repository.py
View file @
bd4aa55b
...
...
@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase):
self
.
_user_cluster_graph_collection
=
'user_cluster_graph'
self
.
_layer_collection
=
'layer'
self
.
_clusterset_collection
=
'cluster_set'
self
.
_time_slice_collection
=
'time_slice'
self
.
agi_repo
=
AgiRepository
()
...
...
@@ -113,3 +114,21 @@ class Repository(MongoRepositoryBase):
else
:
return
None
#endregion
#region TimeSlice
def
add_time_slice
(
self
,
timeslice
:
TimeSlice
):
super
()
.
insert_entry
(
self
.
_time_slice_collection
,
timeslice
.
to_serializable_dict
(
for_db
=
True
))
def
get_time_slices
(
self
)
->
List
[
TimeSlice
]:
'''Returns all time slices.'''
entries
=
super
()
.
get_entries
(
self
.
_time_slice_collection
)
return
[
TimeSlice
(
None
,
None
,
time_slice_dict
=
e
,
from_db
=
True
)
for
e
in
entries
]
def
get_time_slices_by_name
(
self
,
layer_name
)
->
List
[
TimeSlice
]:
'''Returns all time slices with the given layer_name.'''
entries
=
super
()
.
get_entries
(
self
.
_time_slice_collection
,
selection
=
{
'layer_name'
:
layer_name
})
return
[
TimeSlice
(
None
,
None
,
time_slice_dict
=
e
,
from_db
=
True
)
for
e
in
entries
]
def
remove_all_time_slices
(
self
):
super
()
.
drop_collection
(
self
.
_time_slice_collection
)
#endregion
\ No newline at end of file
src/data-hub/community-detection-microservice/app/routes/clustersets.py
View file @
bd4aa55b
...
...
@@ -10,8 +10,8 @@ def get():
def
get_names
():
return
repo
.
get_clusterset_names
()
def
get_by_name
(
name
):
res
=
repo
.
get_clusterset
(
name
)
def
get_by_name
(
layer
name
):
res
=
repo
.
get_clusterset
(
layer
name
)
if
res
is
not
None
:
return
res
.
to_serializable_dict
()
else
:
...
...
src/data-hub/community-detection-microservice/app/routes/timeslices.py
0 → 100644
View file @
bd4aa55b
from
flask
import
request
,
Response
from
db.repository
import
Repository
from
db.entities
import
TimeSlice
repo
=
Repository
()
def
get
():
return
[
e
.
to_serializable_dict
()
for
e
in
repo
.
get_time_slices
()]
def
get_by_name
(
layername
):
res
=
repo
.
get_time_slices_by_name
(
layername
)
print
(
len
(
res
))
if
res
is
not
None
and
len
(
res
)
!=
0
:
return
[
e
.
to_serializable_dict
()
for
e
in
res
]
else
:
return
Response
(
status
=
404
)
src/data-hub/community-detection-microservice/app/run_time_slicing.py
View file @
bd4aa55b
...
...
@@ -6,132 +6,56 @@ if os.path.exists(modules_path):
import
json
from
datetime
import
datetime
,
date
import
matplotlib.pyplot
as
plt
from
db.repository
import
Repository
from
db.entities.timeslice
import
TimeSlice
from
db.entities
import
ClusterSet
from
typing
import
Tuple
from
typing
import
Tuple
,
Dict
,
Any
# repo = Repository()
TimeSliceKey
=
Tuple
[
int
,
int
]
def
convert_to_time_slice_key
(
timestamp
:
str
)
->
Tuple
[
int
,
int
]:
'''Returns the tuple (year, week_of_year) from a timestamp.'''
timestamp
=
datetime
.
fromtimestamp
(
float
(
timestamp
[
0
:
10
]))
(
y
,
w
,
_
)
=
timestamp
.
isocalendar
()
def
convert_to_time_slice_key
(
timestamp
:
str
)
->
TimeSliceKey
:
'''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
time
=
datetime
.
utcfromtimestamp
(
float
(
timestamp
[
0
:
10
]))
(
y
,
w
,
_
)
=
time
.
isocalendar
()
return
(
y
,
w
)
def
get_clusterset
():
# clusterset = repo.get_clusterset('Destination_Layer')
with
open
(
'clustering_results/optics/clusterset_Destination_Layer.txt'
)
as
file
:
clusterset
=
ClusterSet
(
cluster_set_dict
=
json
.
loads
(
file
.
read
()))
return
clusterset
clusterset
=
ClusterSet
(
cluster_set_dict
=
{
"clusters"
:
[{
"cluster_label"
:
0
,
"nodes"
:
[{
"Finished_time"
:
1579143634812589
,
"Latitude_Destination"
:
-
5.95081
,
"Longitude_Destination"
:
37.415281
,
"TravelID"
:
"5e57ec9159bc0668543f1568"
,
"TravelPrice"
:
19
,
"UniqueID"
:
"2696718d7a33ab3dbf28e9c88411afcfe9a933a45e57ec9159bc0668543f1568"
,
"UserID"
:
"2696718d7a33ab3dbf28e9c88411afcfe9a933a4"
,
"cluster_label"
:
0
},
{
"Finished_time"
:
1582709512112368
,
"Latitude_Destination"
:
-
5.95081
,
"Longitude_Destination"
:
37.415281
,
"TravelID"
:
"5e57ec9159bc0668543f15cf"
,
"TravelPrice"
:
16
,
"UniqueID"
:
"98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cf"
,
"UserID"
:
"98dcb2717ddae152d5b359c6ea97e4fe34a29d4c"
,
"cluster_label"
:
0
},
{
"Finished_time"
:
1582709512112367
,
"Latitude_Destination"
:
-
5.95081
,
"Longitude_Destination"
:
37.415281
,
"TravelID"
:
"5e57ec9159bc0668543f15cf"
,
"TravelPrice"
:
16
,
"UniqueID"
:
"98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cd"
,
"UserID"
:
"98dcb2717ddae152d5b359c6ea97e4fe34a29d4c"
,
"cluster_label"
:
0
}]
}],
"layer_name"
:
"Destination_Layer"
})
return
clusterset
def
plt_show_circles
(
keys
,
time_slices
,
cluster_no
):
for
k
in
keys
:
slice_
=
time_slices
[
k
]
if
cluster_no
in
slice_
.
nodes
:
nodes
=
slice_
.
nodes
[
cluster_no
]
else
:
nodes
=
[]
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt
.
title
(
str
(
k
))
plt
.
scatter
([
n
[
'Longitude_Destination'
]
for
n
in
nodes
],
[
n
[
'Latitude_Destination'
]
for
n
in
nodes
],
s
=
[
len
(
nodes
)
*
100
]
*
len
(
nodes
))
plt
.
pause
(
0.5
)
def
split_clusterset_by_time
(
clustersets
)
->
Dict
[
TimeSliceKey
,
TimeSlice
]:
'''
Distributes all nodes of a single clusterset into individual time slices based on their timestamps.
If a node spans over multiple slices it will be added to all of them.
Information about clusters and the nodes in the clusters will not be changed.
:params clustersets: The clusterset whichs nodes are split
:returns: A dict of time slices where the key is the time info and value is the information about the time slice
'''
def
plt_show_bars
(
keys
,
time_slices
,
cluster_no
):
x_axis_label_stepsize
=
10
time_slices
:
Dict
[
Any
,
TimeSlice
]
=
{}
for
cluster_no
in
clusterset
.
clusters
:
for
node
in
cluster_no
.
nodes
:
nodes_per_slice_for_single_cluster
=
\
[
len
(
time_slices
[
k
]
.
nodes
[
cluster_no
])
if
cluster_no
in
time_slices
[
k
]
.
nodes
else
0
for
k
in
keys
]
time_keys
=
{
convert_to_time_slice_key
(
str
(
node
[
'Finished_time'
])),
convert_to_time_slice_key
(
str
(
node
[
'Starting_time'
]))
}
fig
,
ax
=
plt
.
subplots
()
ax
.
bar
(
x
=
range
(
len
(
keys
)),
height
=
nodes_per_slice_for_single_cluster
)
for
time_key
in
time_keys
:
if
time_key
not
in
time_slices
:
time_slices
[
time_key
]
=
TimeSlice
(
time_key
,
clusterset
.
layer_name
)
ax
.
set_ylabel
(
'Size'
)
ax
.
set_title
(
f
'Cluster-{cluster_no} size over time'
)
ax
.
set_xticks
(
range
(
len
(
keys
))[::
x_axis_label_stepsize
])
ax
.
set_xticklabels
(
keys
[::
x_axis_label_stepsize
])
time_slices
[
time_key
]
.
add_node_to_cluster
(
cluster_no
.
cluster_label
,
node
)
plt
.
show
()
clusterset
=
get_clusterset
()
# print(clusterset.layer_name)
cnt
=
0
time_slices
=
{}
# for clusterset in clustersets:
for
cluster_no
in
clusterset
.
clusters
:
for
node
in
cluster_no
.
nodes
:
# assign the nodes to time slices and recreate the clusters there
time_key
=
convert_to_time_slice_key
(
str
(
node
[
'Finished_time'
]))
return
time_slices
if
time_key
not
in
time_slices
:
time_slices
[
time_key
]
=
TimeSlice
(
time_key
)
time_slices
[
time_key
]
.
add_node_to_cluster
(
cluster_no
.
cluster_label
,
node
)
# sort chronologically
keys
=
list
(
time_slices
.
keys
())
keys
.
sort
()
if
__name__
==
"__main__"
:
repo
=
Repository
()
plt_show_bars
(
keys
,
time_slices
,
cluster_no
=
20
)
repo
.
remove_all_time_slices
(
)
clustersets
=
repo
.
get_clustersets
()
for
clusterset
in
clustersets
:
time_slices
=
split_clusterset_by_time
(
clusterset
)
for
k
,
v
in
time_slices
.
items
():
repo
.
add_time_slice
(
v
)
src/data-hub/community-detection-microservice/app/visualization/visualize_time_slices.py
0 → 100644
View file @
bd4aa55b
import
sys
import
os
for
path
in
[
'../'
,
'./'
,
'../../../modules/'
]:
if
os
.
path
.
exists
(
path
):
sys
.
path
.
insert
(
1
,
path
)
import
matplotlib.pyplot
as
plt
from
db.repository
import
Repository
from
db.entities
import
TimeSlice
from
typing
import
List
def
plt_show_circles
(
time_slices
:
List
[
TimeSlice
],
cluster_no
):
cluster_no
=
str
(
cluster_no
)
for
slice_
in
time_slices
:
nodes
=
slice_
.
get_nodes_for_cluster
(
cluster_no
)
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt
.
title
(
str
(
slice_
.
time
))
plt
.
scatter
([
n
[
'Longitude_Destination'
]
if
'Longitude_Destination'
in
n
else
0
for
n
in
nodes
],
[
n
[
'Latitude_Destination'
]
if
'Latitude_Destination'
in
n
else
0
for
n
in
nodes
],
s
=
[
len
(
nodes
)
*
100
]
*
len
(
nodes
))
plt
.
pause
(
0.5
)
def
plt_show_bars
(
time_slices
:
List
[
TimeSlice
],
cluster_no
):
cluster_no
=
str
(
cluster_no
)
labels
=
[
ts
.
time
for
ts
in
time_slices
]
x_axis_label_stepsize
=
10
nodes_per_slice_for_single_cluster
=
\
[
len
(
time_slice
.
get_nodes_for_cluster
(
cluster_no
))
for
time_slice
in
time_slices
]
fig
,
ax
=
plt
.
subplots
()
ax
.
bar
(
x
=
range
(
len
(
labels
)),
height
=
nodes_per_slice_for_single_cluster
)
ax
.
set_ylabel
(
'Size'
)
ax
.
set_title
(
f
'Cluster-{cluster_no} size over time'
)
ax
.
set_xticks
(
range
(
len
(
labels
))[::
x_axis_label_stepsize
])
ax
.
set_xticklabels
(
labels
[::
x_axis_label_stepsize
])
plt
.
show
()
if
__name__
==
"__main__"
:
repo
=
Repository
()
time_slices
=
repo
.
get_time_slices_by_name
(
"Destination_Layer"
)
# chronological order
time_slices
.
sort
(
key
=
lambda
ts
:
eval
(
ts
.
time
))
print
(
len
(
time_slices
))
plt_show_bars
(
time_slices
,
cluster_no
=
0
)
\ No newline at end of file
src/modules/database/MongoRepositoryBase.py
View file @
bd4aa55b
...
...
@@ -12,6 +12,9 @@ class MongoRepositoryBase:
self
.
_mongo_client
=
MongoClient
(
f
"mongodb://{username}:{password}@{hostname}:{port}/"
)
self
.
_database
=
self
.
_mongo_client
[
database_name
]
def
drop_collection
(
self
,
collection_name
):
self
.
_database
[
collection_name
]
.
drop
()
def
insert_entry
(
self
,
collection_name
,
content
:
dict
):
collection
=
self
.
_database
[
collection_name
]
collection
.
insert_one
(
content
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment