Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
e7061d7f
Commit
e7061d7f
authored
Apr 21, 2020
by
Alexander Lercher
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Improved data schema for time slices, storing and loading from mongodb
parent
0e20ca32
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
173 additions
and
93 deletions
+173
-93
swagger.yml
.../community-detection-microservice/app/configs/swagger.yml
+65
-3
__init__.py
...munity-detection-microservice/app/db/entities/__init__.py
+1
-0
timeslice.py
...unity-detection-microservice/app/db/entities/timeslice.py
+51
-39
repository.py
...hub/community-detection-microservice/app/db/repository.py
+19
-0
clustersets.py
...ommunity-detection-microservice/app/routes/clustersets.py
+2
-2
timeslices.py
...community-detection-microservice/app/routes/timeslices.py
+19
-0
run_time_slicing.py
.../community-detection-microservice/app/run_time_slicing.py
+13
-49
MongoRepositoryBase.py
src/modules/database/MongoRepositoryBase.py
+3
-0
No files found.
src/data-hub/community-detection-microservice/app/configs/swagger.yml
View file @
e7061d7f
...
@@ -228,14 +228,14 @@ paths:
...
@@ -228,14 +228,14 @@ paths:
items
:
items
:
type
:
string
type
:
string
/clustersets/{name}
:
/clustersets/{
layer
name}
:
get
:
get
:
operationId
:
"
routes.clustersets.get_by_name"
operationId
:
"
routes.clustersets.get_by_name"
tags
:
tags
:
-
"
Clusters"
-
"
Clusters"
summary
:
"
Get
clusterset
for
layer-name"
summary
:
"
Get
clusterset
for
layer-name"
parameters
:
parameters
:
-
name
:
"
name"
-
name
:
"
layer
name"
in
:
"
path"
in
:
"
path"
description
:
"
Name
of
the
layer
to
return
the
clusterset
for"
description
:
"
Name
of
the
layer
to
return
the
clusterset
for"
required
:
true
required
:
true
...
@@ -262,6 +262,41 @@ paths:
...
@@ -262,6 +262,41 @@ paths:
schema
:
schema
:
$ref
:
"
#/definitions/UserClusterGraphCollection"
$ref
:
"
#/definitions/UserClusterGraphCollection"
# Time slices
/timeslices
:
get
:
operationId
:
"
routes.timeslices.get"
tags
:
-
"
Time
Slices"
summary
:
"
Get
all
time
slices
based
on
individual
layers
containing
clusters
with
nodes
for
that
time"
parameters
:
[]
responses
:
200
:
description
:
"
Successful
operation"
schema
:
$ref
:
"
#/definitions/TimeSliceCollection"
/timeslices/{layername}
:
get
:
operationId
:
"
routes.timeslices.get_by_name"
tags
:
-
"
Time
Slices"
summary
:
"
Get
all
time
slices
for
one
layer"
parameters
:
-
name
:
"
layername"
in
:
"
path"
description
:
"
Name
of
the
layer
to
return
the
time
slices
for"
required
:
true
type
:
"
string"
responses
:
200
:
description
:
"
Successful
operation"
schema
:
$ref
:
"
#/definitions/TimeSliceCollection"
404
:
description
:
"
No
time
slices
found
for
layername"
# Function Calls
# Function Calls
/rfc/run
:
/rfc/run
:
post
:
post
:
...
@@ -423,3 +458,30 @@ definitions:
...
@@ -423,3 +458,30 @@ definitions:
type
:
array
type
:
array
items
:
items
:
$ref
:
"
#/definitions/ClusterSet"
$ref
:
"
#/definitions/ClusterSet"
TimeSlice
:
type
:
object
properties
:
time
:
type
:
object
example
:
"
(2020,
52)"
layer_name
:
type
:
string
clusters
:
type
:
object
additionalProperties
:
type
:
array
items
:
type
:
object
properties
:
UniqueID
:
type
:
string
example
:
"
0"
:
-
UniqueID
:
abc
-
UniqueID
:
def
TimeSliceCollection
:
type
:
array
items
:
$ref
:
"
#/definitions/TimeSlice"
\ No newline at end of file
src/data-hub/community-detection-microservice/app/db/entities/__init__.py
View file @
e7061d7f
...
@@ -4,3 +4,4 @@ from db.entities.cluster import Cluster, LocationCluster, TimeCluster
...
@@ -4,3 +4,4 @@ from db.entities.cluster import Cluster, LocationCluster, TimeCluster
from
db.entities.clusterset
import
ClusterSet
from
db.entities.clusterset
import
ClusterSet
from
db.entities.user_cluster_graph
import
UserClusterGraph
from
db.entities.user_cluster_graph
import
UserClusterGraph
from
db.entities.layer
import
Layer
from
db.entities.layer
import
Layer
from
db.entities.timeslice
import
TimeSlice
\ No newline at end of file
src/data-hub/community-detection-microservice/app/db/entities/timeslice.py
View file @
e7061d7f
import
json
import
json
from
typing
import
List
,
Dict
,
TypeVar
,
Any
from
typing
import
List
,
Dict
,
NewType
,
Any
from
datetime
import
date
,
datetime
from
datetime
import
date
,
datetime
Node
=
TypeVar
(
'Node'
)
Node
=
NewType
(
'Node'
,
dict
)
class
TimeSlice
:
class
TimeSlice
:
'''
def
__init__
(
self
,
time
,
nodes
=
None
,
A time slice for a single layer containing all nodes for that time.
cluster_set_dict
:
Dict
=
None
,
from_db
=
False
):
self
.
time
=
time
:param time: The tag indicating the time
self
.
nodes
:
Dict
[
int
,
List
[
Node
]]
=
{}
:param layer_name: The name of the layer the nodes belong to
'''
# if cluster_set_dict is not None:
# self.from_serializable_dict(cluster_set_dict, from_db)
def
__init__
(
self
,
time
:
Any
,
layer_name
:
str
,
time_slice_dict
:
Dict
=
None
,
from_db
=
False
):
def
add_node_to_cluster
(
self
,
cluster_label
,
node
):
self
.
time
=
str
(
time
)
if
cluster_label
not
in
self
.
nodes
:
self
.
layer_name
=
layer_name
self
.
nodes
[
cluster_label
]
=
[]
self
.
clusters
:
Dict
[
int
,
List
[
Node
]]
=
{}
self
.
nodes
[
cluster_label
]
.
append
(
node
)
if
time_slice_dict
is
not
None
:
self
.
from_serializable_dict
(
time_slice_dict
,
from_db
)
# todo
def
add_node_to_cluster
(
self
,
cluster_label
:
int
,
node
):
# def to_serializable_dict(self, for_db=False) -> Dict:
if
cluster_label
not
in
self
.
clusters
:
# serialized_dict_clusters = [cluster.to_serializable_dict(for_db)
self
.
clusters
[
cluster_label
]
=
[]
# for cluster in self.clusters]
# return {
node
=
self
.
_get_unique_id
(
node
)
# "layer_name": self.layer_name,
self
.
clusters
[
cluster_label
]
.
append
(
node
)
# "clusters": json.dumps(serialized_dict_clusters) if for_db else serialized_dict_clusters
# }
def
get_nodes_for_cluster
(
self
,
cluster_label
:
int
):
if
cluster_label
in
self
.
clusters
:
# def from_serializable_dict(self, cluster_set_dict: Dict, from_db=False):
return
self
.
clusters
[
cluster_label
]
# self.layer_name = cluster_set_dict["layer_name"]
else
:
return
[]
# serialized_dict_clusters = json.loads(cluster_set_dict["clusters"]) \
# if from_db else cluster_set_dict["clusters"]
def
_get_unique_id
(
self
,
node
:
Dict
)
->
Dict
:
# self.clusters = [Cluster(cluster_dict=cluster_dict, from_db=from_db)
'''Returns a new dict with the unique id only.'''
# for cluster_dict in serialized_dict_clusters]
uid_key
=
'UniqueID'
if
uid_key
in
node
:
return
{
uid_key
:
node
[
uid_key
]}
def
to_serializable_dict
(
self
,
for_db
=
False
)
->
Dict
:
return
{
"time"
:
self
.
time
,
'layer_name'
:
self
.
layer_name
,
"clusters"
:
json
.
dumps
(
self
.
clusters
)
if
for_db
else
self
.
clusters
}
def
from_serializable_dict
(
self
,
dict
:
Dict
,
from_db
=
False
):
self
.
time
=
dict
[
"time"
]
self
.
layer_name
=
dict
[
'layer_name'
]
self
.
clusters
=
json
.
loads
(
dict
[
'clusters'
])
if
from_db
else
dict
[
'clusters'
]
def
__repr__
(
self
):
def
__repr__
(
self
):
return
self
.
__str__
()
return
json
.
dumps
(
self
.
to_serializable_dict
())
# return {'time': self.time, "#nodes": len(self.nodes)}
# json.dumps(self.to_serializable_dict())
def
__str__
(
self
):
def
__str__
(
self
):
return
f
"TimeSlice({self.
time}, {[len(v) for k, v in self.nodes.items()]
})"
return
f
"TimeSlice({self.
__repr__()
})"
src/data-hub/community-detection-microservice/app/db/repository.py
View file @
e7061d7f
...
@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase):
...
@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase):
self
.
_user_cluster_graph_collection
=
'user_cluster_graph'
self
.
_user_cluster_graph_collection
=
'user_cluster_graph'
self
.
_layer_collection
=
'layer'
self
.
_layer_collection
=
'layer'
self
.
_clusterset_collection
=
'cluster_set'
self
.
_clusterset_collection
=
'cluster_set'
self
.
_time_slice_collection
=
'time_slice'
self
.
agi_repo
=
AgiRepository
()
self
.
agi_repo
=
AgiRepository
()
...
@@ -113,3 +114,21 @@ class Repository(MongoRepositoryBase):
...
@@ -113,3 +114,21 @@ class Repository(MongoRepositoryBase):
else
:
else
:
return
None
return
None
#endregion
#endregion
#region TimeSlice
def
add_time_slice
(
self
,
timeslice
:
TimeSlice
):
super
()
.
insert_entry
(
self
.
_time_slice_collection
,
timeslice
.
to_serializable_dict
(
for_db
=
True
))
def
get_time_slices
(
self
)
->
List
[
TimeSlice
]:
'''Returns all time slices.'''
entries
=
super
()
.
get_entries
(
self
.
_time_slice_collection
)
return
[
TimeSlice
(
None
,
None
,
time_slice_dict
=
e
,
from_db
=
True
)
for
e
in
entries
]
def
get_time_slices_by_name
(
self
,
layer_name
)
->
List
[
TimeSlice
]:
'''Returns all time slices with the given layer_name.'''
entries
=
super
()
.
get_entries
(
self
.
_time_slice_collection
,
selection
=
{
'layer_name'
:
layer_name
})
return
[
TimeSlice
(
None
,
None
,
time_slice_dict
=
e
,
from_db
=
True
)
for
e
in
entries
]
def
remove_all_time_slices
(
self
):
super
()
.
drop_collection
(
self
.
_time_slice_collection
)
#endregion
\ No newline at end of file
src/data-hub/community-detection-microservice/app/routes/clustersets.py
View file @
e7061d7f
...
@@ -10,8 +10,8 @@ def get():
...
@@ -10,8 +10,8 @@ def get():
def
get_names
():
def
get_names
():
return
repo
.
get_clusterset_names
()
return
repo
.
get_clusterset_names
()
def
get_by_name
(
name
):
def
get_by_name
(
layer
name
):
res
=
repo
.
get_clusterset
(
name
)
res
=
repo
.
get_clusterset
(
layer
name
)
if
res
is
not
None
:
if
res
is
not
None
:
return
res
.
to_serializable_dict
()
return
res
.
to_serializable_dict
()
else
:
else
:
...
...
src/data-hub/community-detection-microservice/app/routes/timeslices.py
0 → 100644
View file @
e7061d7f
from
flask
import
request
,
Response
from
db.repository
import
Repository
from
db.entities
import
TimeSlice
repo
=
Repository
()
def
get
():
return
[
e
.
to_serializable_dict
()
for
e
in
repo
.
get_time_slices
()]
def
get_by_name
(
layername
):
res
=
repo
.
get_time_slices_by_name
(
layername
)
print
(
len
(
res
))
if
res
is
not
None
and
len
(
res
)
!=
0
:
return
[
e
.
to_serializable_dict
()
for
e
in
res
]
else
:
return
Response
(
status
=
404
)
src/data-hub/community-detection-microservice/app/run_time_slicing.py
View file @
e7061d7f
...
@@ -10,9 +10,9 @@ import matplotlib.pyplot as plt
...
@@ -10,9 +10,9 @@ import matplotlib.pyplot as plt
from
db.repository
import
Repository
from
db.repository
import
Repository
from
db.entities.timeslice
import
TimeSlice
from
db.entities.timeslice
import
TimeSlice
from
db.entities
import
ClusterSet
from
db.entities
import
ClusterSet
from
typing
import
Tuple
from
typing
import
Tuple
,
Dict
#
repo = Repository()
repo
=
Repository
()
def
convert_to_time_slice_key
(
timestamp
:
str
)
->
Tuple
[
int
,
int
]:
def
convert_to_time_slice_key
(
timestamp
:
str
)
->
Tuple
[
int
,
int
]:
...
@@ -23,46 +23,9 @@ def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]:
...
@@ -23,46 +23,9 @@ def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]:
def
get_clusterset
():
def
get_clusterset
():
# clusterset = repo.get_clusterset('Destination_Layer')
clusterset
=
repo
.
get_clusterset
(
'Destination_Layer'
)
with
open
(
'clustering_results/optics/clusterset_Destination_Layer.txt'
)
as
file
:
# with open('clustering_results/optics/clusterset_Destination_Layer.txt') as file:
clusterset
=
ClusterSet
(
cluster_set_dict
=
json
.
loads
(
file
.
read
()))
# clusterset = ClusterSet(cluster_set_dict=json.loads(file.read()))
return
clusterset
clusterset
=
ClusterSet
(
cluster_set_dict
=
{
"clusters"
:
[{
"cluster_label"
:
0
,
"nodes"
:
[{
"Finished_time"
:
1579143634812589
,
"Latitude_Destination"
:
-
5.95081
,
"Longitude_Destination"
:
37.415281
,
"TravelID"
:
"5e57ec9159bc0668543f1568"
,
"TravelPrice"
:
19
,
"UniqueID"
:
"2696718d7a33ab3dbf28e9c88411afcfe9a933a45e57ec9159bc0668543f1568"
,
"UserID"
:
"2696718d7a33ab3dbf28e9c88411afcfe9a933a4"
,
"cluster_label"
:
0
},
{
"Finished_time"
:
1582709512112368
,
"Latitude_Destination"
:
-
5.95081
,
"Longitude_Destination"
:
37.415281
,
"TravelID"
:
"5e57ec9159bc0668543f15cf"
,
"TravelPrice"
:
16
,
"UniqueID"
:
"98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cf"
,
"UserID"
:
"98dcb2717ddae152d5b359c6ea97e4fe34a29d4c"
,
"cluster_label"
:
0
},
{
"Finished_time"
:
1582709512112367
,
"Latitude_Destination"
:
-
5.95081
,
"Longitude_Destination"
:
37.415281
,
"TravelID"
:
"5e57ec9159bc0668543f15cf"
,
"TravelPrice"
:
16
,
"UniqueID"
:
"98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cd"
,
"UserID"
:
"98dcb2717ddae152d5b359c6ea97e4fe34a29d4c"
,
"cluster_label"
:
0
}]
}],
"layer_name"
:
"Destination_Layer"
})
return
clusterset
return
clusterset
...
@@ -90,9 +53,7 @@ def plt_show_bars(keys, time_slices, cluster_no):
...
@@ -90,9 +53,7 @@ def plt_show_bars(keys, time_slices, cluster_no):
x_axis_label_stepsize
=
10
x_axis_label_stepsize
=
10
nodes_per_slice_for_single_cluster
=
\
nodes_per_slice_for_single_cluster
=
\
[
len
(
time_slices
[
k
]
.
nodes
[
cluster_no
])
[
len
(
time_slices
[
k
]
.
get_nodes_for_cluster
(
cluster_no
))
if
cluster_no
in
time_slices
[
k
]
.
nodes
else
0
for
k
for
k
in
keys
]
in
keys
]
...
@@ -110,7 +71,7 @@ def plt_show_bars(keys, time_slices, cluster_no):
...
@@ -110,7 +71,7 @@ def plt_show_bars(keys, time_slices, cluster_no):
clusterset
=
get_clusterset
()
clusterset
=
get_clusterset
()
# print(clusterset.layer_name)
cnt
=
0
cnt
=
0
time_slices
=
{}
time_slices
=
{}
...
@@ -121,7 +82,7 @@ for cluster_no in clusterset.clusters:
...
@@ -121,7 +82,7 @@ for cluster_no in clusterset.clusters:
time_key
=
convert_to_time_slice_key
(
str
(
node
[
'Finished_time'
]))
time_key
=
convert_to_time_slice_key
(
str
(
node
[
'Finished_time'
]))
if
time_key
not
in
time_slices
:
if
time_key
not
in
time_slices
:
time_slices
[
time_key
]
=
TimeSlice
(
time_key
)
time_slices
[
time_key
]
=
TimeSlice
(
time_key
,
clusterset
.
layer_name
)
time_slices
[
time_key
]
.
add_node_to_cluster
(
cluster_no
.
cluster_label
,
node
)
time_slices
[
time_key
]
.
add_node_to_cluster
(
cluster_no
.
cluster_label
,
node
)
...
@@ -131,7 +92,10 @@ keys = list(time_slices.keys())
...
@@ -131,7 +92,10 @@ keys = list(time_slices.keys())
keys
.
sort
()
keys
.
sort
()
repo
.
remove_all_time_slices
()
plt_show_bars
(
keys
,
time_slices
,
cluster_no
=
20
)
for
k
,
v
in
time_slices
.
items
():
repo
.
add_time_slice
(
v
)
print
(
len
(
time_slices
))
plt_show_bars
(
keys
,
time_slices
,
cluster_no
=
0
)
src/modules/database/MongoRepositoryBase.py
View file @
e7061d7f
...
@@ -12,6 +12,9 @@ class MongoRepositoryBase:
...
@@ -12,6 +12,9 @@ class MongoRepositoryBase:
self
.
_mongo_client
=
MongoClient
(
f
"mongodb://{username}:{password}@{hostname}:{port}/"
)
self
.
_mongo_client
=
MongoClient
(
f
"mongodb://{username}:{password}@{hostname}:{port}/"
)
self
.
_database
=
self
.
_mongo_client
[
database_name
]
self
.
_database
=
self
.
_mongo_client
[
database_name
]
def
drop_collection
(
self
,
collection_name
):
self
.
_database
[
collection_name
]
.
drop
()
def
insert_entry
(
self
,
collection_name
,
content
:
dict
):
def
insert_entry
(
self
,
collection_name
,
content
:
dict
):
collection
=
self
.
_database
[
collection_name
]
collection
=
self
.
_database
[
collection_name
]
collection
.
insert_one
(
content
)
collection
.
insert_one
(
content
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment