Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
31f80acb
Commit
31f80acb
authored
Apr 29, 2020
by
Alexander Lercher
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'feature/handle-large-datasets' into develop
parents
bd4aa55b
cf555c9f
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
237 additions
and
72 deletions
+237
-72
swagger.yml
.../community-detection-microservice/app/configs/swagger.yml
+120
-33
cluster.py
...mmunity-detection-microservice/app/db/entities/cluster.py
+5
-1
layer.py
...community-detection-microservice/app/db/entities/layer.py
+1
-4
repository.py
...hub/community-detection-microservice/app/db/repository.py
+27
-3
clustersets.py
...ommunity-detection-microservice/app/routes/clustersets.py
+10
-2
layers.py
...hub/community-detection-microservice/app/routes/layers.py
+24
-6
timeslices.py
...community-detection-microservice/app/routes/timeslices.py
+10
-1
run_clustering.py
...ub/community-detection-microservice/app/run_clustering.py
+12
-8
run_time_slicing.py
.../community-detection-microservice/app/run_time_slicing.py
+24
-14
MongoRepositoryBase.py
src/modules/database/MongoRepositoryBase.py
+4
-0
No files found.
src/data-hub/community-detection-microservice/app/configs/swagger.yml
View file @
31f80acb
...
@@ -80,16 +80,17 @@ paths:
...
@@ -80,16 +80,17 @@ paths:
400
:
400
:
description
:
"
Invalid
input"
description
:
"
Invalid
input"
# Layers
#region Layers
/layers
:
/layers
:
post
:
post
:
operationId
:
"
routes.layers.post"
operationId
:
"
routes.layers.post"
tags
:
tags
:
-
"
Layers"
-
"
Layers"
summary
:
"
Add
a
new
layer
or
overwrite
an
existing
one
"
summary
:
"
Add
a
new
layer
[TODO:
or
overwrite
an
existing
one]
"
parameters
:
parameters
:
-
in
:
body
-
in
:
body
name
:
"
L
ayer"
name
:
"
l
ayer"
description
:
"
The
layer
data
to
be
added"
description
:
"
The
layer
data
to
be
added"
required
:
true
required
:
true
schema
:
schema
:
...
@@ -111,41 +112,109 @@ paths:
...
@@ -111,41 +112,109 @@ paths:
schema
:
schema
:
$ref
:
"
#/definitions/LayerCollection"
$ref
:
"
#/definitions/LayerCollection"
/layers/
names
:
/layers/
{name}
:
get
:
get
:
operationId
:
"
routes.layers.get_
names
"
operationId
:
"
routes.layers.get_
by_name
"
tags
:
tags
:
-
"
Layers"
-
"
Layers"
summary
:
"
Get
all
layer
names"
summary
:
"
Get
single
layer
data"
parameters
:
[]
parameters
:
-
name
:
"
name"
in
:
"
path"
description
:
"
Name
of
the
requested
layer"
required
:
true
type
:
"
string"
responses
:
responses
:
200
:
200
:
description
:
"
Successful
operation"
description
:
"
Successful
operation"
schema
:
schema
:
type
:
array
$ref
:
"
#/definitions/Layer"
items
:
404
:
type
:
string
description
:
"
Layer
not
found"
/layers/{name}
:
/layers/{name}
/nodes
:
get
:
get
:
operationId
:
"
routes.layers.get_
by_name
"
operationId
:
"
routes.layers.get_
nodes
"
tags
:
tags
:
-
"
Layers"
-
"
Layers"
summary
:
"
Get
layer
data
for
layer-name
"
summary
:
"
Get
all
individual
nodes
for
the
layer
"
parameters
:
parameters
:
-
name
:
"
name"
-
name
:
"
name"
in
:
"
path"
in
:
"
path"
description
:
"
Name
of
the
layer
to
return
"
description
:
"
Name
of
the
layer"
required
:
true
required
:
true
type
:
"
string"
type
:
"
string"
responses
:
responses
:
200
:
200
:
description
:
"
Successful
operation"
description
:
"
Successful
operation"
schema
:
schema
:
$ref
:
"
#/definitions/
Layer
"
$ref
:
"
#/definitions/
NodeCollection
"
404
:
404
:
description
:
"
Layer
not
found"
description
:
"
Layer
not
found"
post
:
operationId
:
"
routes.layers.post_nodes"
tags
:
-
"
Layers"
summary
:
"
Adds
a
single
or
multiple
nodes
to
the
layer"
parameters
:
-
name
:
"
name"
in
:
"
path"
description
:
"
Name
of
the
layer"
required
:
true
type
:
"
string"
-
name
:
"
node"
in
:
body
description
:
"
The
node(s)
to
be
added"
required
:
true
schema
:
$ref
:
"
#/definitions/NodeCollection"
responses
:
201
:
description
:
"
Successful
operation"
400
:
description
:
"
Invalid
input"
/layers/{name}/clusters
:
get
:
operationId
:
"
routes.clustersets.get_by_name2"
tags
:
-
"
Layers"
summary
:
"
Get
all
clusters
for
the
layer"
parameters
:
-
name
:
"
name"
in
:
"
path"
description
:
"
Name
of
the
layer"
required
:
true
type
:
"
string"
responses
:
200
:
description
:
"
Successful
operation"
schema
:
$ref
:
"
#/definitions/ClusterCollection"
404
:
description
:
"
Layer
not
found"
/layers/{name}/timeslices
:
get
:
operationId
:
"
routes.timeslices.get_by_name2"
tags
:
-
"
Layers"
summary
:
"
Get
all
timeslices
for
the
layer"
parameters
:
-
name
:
"
name"
in
:
"
path"
description
:
"
Name
of
the
layer"
required
:
true
type
:
"
string"
responses
:
200
:
description
:
"
Successful
operation"
schema
:
$ref
:
"
#/definitions/TimeSliceCollection"
404
:
description
:
"
Layer
not
found"
#endregion
# Clusters
# Clusters
# TODO remove partially
# TODO remove partially
/location-clusters
:
/location-clusters
:
...
@@ -200,6 +269,7 @@ paths:
...
@@ -200,6 +269,7 @@ paths:
# 200:
# 200:
# description: "Successful operation"
# description: "Successful operation"
# TODO remove
/clustersets
:
/clustersets
:
get
:
get
:
operationId
:
"
routes.clustersets.get"
operationId
:
"
routes.clustersets.get"
...
@@ -248,6 +318,7 @@ paths:
...
@@ -248,6 +318,7 @@ paths:
404
:
404
:
description
:
"
Clusterset
not
found"
description
:
"
Clusterset
not
found"
# TODO remove
# TODO remove
/user-cluster-graphs
:
/user-cluster-graphs
:
get
:
get
:
...
@@ -335,20 +406,20 @@ definitions:
...
@@ -335,20 +406,20 @@ definitions:
Cluster
:
Cluster
:
type
:
object
type
:
object
properties
:
properties
:
layer_name
:
type
:
string
cluster_label
:
cluster_label
:
type
:
number
type
:
number
nodes
:
nodes
:
type
:
array
type
:
array
items
:
items
:
type
:
object
$ref
:
"
#/definitions/Node"
example
:
"
Finished_time"
:
1576631193265951
"
Latitude_Destination"
:
-5.973257
ClusterCollection
:
"
Longitude_Destination"
:
37.416316
type
:
array
"
TravelID"
:
"
5e57ec9159bc0668543f156a"
items
:
"
TravelPrice"
:
15
$ref
:
"
#/definitions/Cluster"
"
UniqueID"
:
"
a95075f5042b1b27060080156d87fe34ec7e712c5e57ec9159bc0668543f156a"
"
UserID"
:
"
a95075f5042b1b27060080156d87fe34ec7e712c"
LocationCluster
:
LocationCluster
:
type
:
object
type
:
object
...
@@ -416,10 +487,10 @@ definitions:
...
@@ -416,10 +487,10 @@ definitions:
properties
:
properties
:
LayerName
:
LayerName
:
type
:
string
type
:
string
Nodes
:
#
Nodes:
type
:
array
#
type: array
items
:
#
items:
type
:
object
#
type: object
Properties
:
Properties
:
type
:
array
type
:
array
items
:
items
:
...
@@ -430,10 +501,10 @@ definitions:
...
@@ -430,10 +501,10 @@ definitions:
properties
:
properties
:
layer_name
:
layer_name
:
type
:
string
type
:
string
nodes
:
#
nodes:
type
:
array
#
type: array
items
:
#
items:
type
:
object
#
type: object
properties
:
properties
:
type
:
array
type
:
array
items
:
items
:
...
@@ -444,6 +515,22 @@ definitions:
...
@@ -444,6 +515,22 @@ definitions:
items
:
items
:
$ref
:
"
#/definitions/Layer"
$ref
:
"
#/definitions/Layer"
Node
:
type
:
object
example
:
"
Finished_time"
:
1576631193265951
"
Latitude_Destination"
:
-5.973257
"
Longitude_Destination"
:
37.416316
"
TravelID"
:
"
5e57ec9159bc0668543f156a"
"
TravelPrice"
:
15
"
UniqueID"
:
"
a95075f5042b1b27060080156d87fe34ec7e712c5e57ec9159bc0668543f156a"
"
UserID"
:
"
a95075f5042b1b27060080156d87fe34ec7e712c"
NodeCollection
:
type
:
array
items
:
$ref
:
"
#/definitions/Node"
ClusterSet
:
ClusterSet
:
type
:
object
type
:
object
properties
:
properties
:
...
...
src/data-hub/community-detection-microservice/app/db/entities/cluster.py
View file @
31f80acb
...
@@ -7,12 +7,14 @@ class Cluster:
...
@@ -7,12 +7,14 @@ class Cluster:
'''
'''
A cluster for an arbitrary layer containing some nodes.
A cluster for an arbitrary layer containing some nodes.
:param layer_name: The name of the layer in which the cluster is located
:param cluster_label: The label of the cluster unique for the layer
:param cluster_label: The label of the cluster unique for the layer
:param nodes: The individual nodes of the cluster
:param nodes: The individual nodes of the cluster
'''
'''
def
__init__
(
self
,
cluster_label
:
int
=
None
,
nodes
:
List
=
None
,
def
__init__
(
self
,
layer_name
:
str
=
None
,
cluster_label
:
int
=
None
,
nodes
:
List
[
Dict
]
=
None
,
cluster_dict
:
Dict
=
None
,
from_db
=
False
):
cluster_dict
:
Dict
=
None
,
from_db
=
False
):
self
.
layer_name
=
layer_name
self
.
cluster_label
=
cluster_label
self
.
cluster_label
=
cluster_label
self
.
nodes
=
nodes
self
.
nodes
=
nodes
...
@@ -21,11 +23,13 @@ class Cluster:
...
@@ -21,11 +23,13 @@ class Cluster:
def
to_serializable_dict
(
self
,
for_db
=
False
)
->
Dict
:
def
to_serializable_dict
(
self
,
for_db
=
False
)
->
Dict
:
return
{
return
{
"layer_name"
:
self
.
layer_name
,
"cluster_label"
:
self
.
cluster_label
,
"cluster_label"
:
self
.
cluster_label
,
"nodes"
:
json
.
dumps
(
self
.
nodes
)
if
for_db
else
self
.
nodes
"nodes"
:
json
.
dumps
(
self
.
nodes
)
if
for_db
else
self
.
nodes
}
}
def
from_serializable_dict
(
self
,
cluster_dict
:
Dict
,
from_db
=
False
):
def
from_serializable_dict
(
self
,
cluster_dict
:
Dict
,
from_db
=
False
):
self
.
layer_name
=
cluster_dict
[
"layer_name"
]
self
.
cluster_label
=
cluster_dict
[
"cluster_label"
]
self
.
cluster_label
=
cluster_dict
[
"cluster_label"
]
self
.
nodes
=
json
.
loads
(
cluster_dict
[
"nodes"
])
\
self
.
nodes
=
json
.
loads
(
cluster_dict
[
"nodes"
])
\
if
from_db
else
cluster_dict
[
"nodes"
]
if
from_db
else
cluster_dict
[
"nodes"
]
...
...
src/data-hub/community-detection-microservice/app/db/entities/layer.py
View file @
31f80acb
...
@@ -17,15 +17,12 @@ class Layer:
...
@@ -17,15 +17,12 @@ class Layer:
def
to_serializable_dict
(
self
,
for_db
=
False
)
->
Dict
:
def
to_serializable_dict
(
self
,
for_db
=
False
)
->
Dict
:
return
{
return
{
"layer_name"
:
self
.
layer_name
,
"layer_name"
:
self
.
layer_name
,
"properties"
:
self
.
properties
,
"properties"
:
self
.
properties
"nodes"
:
json
.
dumps
(
self
.
nodes
)
if
for_db
else
self
.
nodes
}
}
def
from_serializable_dict
(
self
,
layer_info
:
Dict
,
from_db
=
False
):
def
from_serializable_dict
(
self
,
layer_info
:
Dict
,
from_db
=
False
):
self
.
layer_name
=
layer_info
[
'layer_name'
]
self
.
layer_name
=
layer_info
[
'layer_name'
]
self
.
properties
=
layer_info
[
'properties'
]
self
.
properties
=
layer_info
[
'properties'
]
self
.
nodes
=
json
.
loads
(
layer_info
[
"nodes"
])
\
if
from_db
else
layer_info
[
"nodes"
]
def
__repr__
(
self
):
def
__repr__
(
self
):
return
json
.
dumps
(
self
.
to_serializable_dict
())
return
json
.
dumps
(
self
.
to_serializable_dict
())
...
...
src/data-hub/community-detection-microservice/app/db/repository.py
View file @
31f80acb
...
@@ -21,9 +21,10 @@ class Repository(MongoRepositoryBase):
...
@@ -21,9 +21,10 @@ class Repository(MongoRepositoryBase):
self
.
_location_cluster_collection
=
'location_cluster'
self
.
_location_cluster_collection
=
'location_cluster'
self
.
_time_cluster_collection
=
'time_cluster'
self
.
_time_cluster_collection
=
'time_cluster'
self
.
_user_cluster_graph_collection
=
'user_cluster_graph'
self
.
_user_cluster_graph_collection
=
'user_cluster_graph'
self
.
_layer_collection
=
'layer'
self
.
_layer_collection
=
'layer-new'
self
.
_clusterset_collection
=
'cluster_set'
self
.
_layer_nodes_collection
=
'layer_nodes-new'
self
.
_time_slice_collection
=
'time_slice'
self
.
_clusterset_collection
=
'cluster_set-new'
self
.
_time_slice_collection
=
'time_slice-new'
self
.
agi_repo
=
AgiRepository
()
self
.
agi_repo
=
AgiRepository
()
...
@@ -88,9 +89,22 @@ class Repository(MongoRepositoryBase):
...
@@ -88,9 +89,22 @@ class Repository(MongoRepositoryBase):
return
entries
[
0
]
return
entries
[
0
]
else
:
else
:
return
None
return
None
def
add_layer_node
(
self
,
node
:
dict
):
super
()
.
insert_entry
(
self
.
_layer_nodes_collection
,
node
)
def
add_layer_nodes
(
self
,
nodes
:
List
[
dict
]):
super
()
.
insert_many
(
self
.
_layer_nodes_collection
,
nodes
)
def
get_layer_nodes
(
self
,
layer_name
:
str
)
->
dict
:
'''Returns all nodes for the layer.'''
entries
=
super
()
.
get_entries
(
self
.
_layer_nodes_collection
,
selection
=
{
'layer_name'
:
layer_name
},
projection
=
{
'_id'
:
0
})
return
[
e
for
e
in
entries
]
#endregion
#endregion
#region ClusterSet
#region ClusterSet
# TODO cleanup
def
add_clusterset
(
self
,
cluster_set
:
ClusterSet
):
def
add_clusterset
(
self
,
cluster_set
:
ClusterSet
):
super
()
.
insert_entry
(
self
.
_clusterset_collection
,
cluster_set
.
to_serializable_dict
())
super
()
.
insert_entry
(
self
.
_clusterset_collection
,
cluster_set
.
to_serializable_dict
())
...
@@ -113,6 +127,16 @@ class Repository(MongoRepositoryBase):
...
@@ -113,6 +127,16 @@ class Repository(MongoRepositoryBase):
return
entries
[
0
]
return
entries
[
0
]
else
:
else
:
return
None
return
None
def
add_clusters
(
self
,
clusters
:
List
[
Cluster
]):
cluster_dicts
=
[
c
.
to_serializable_dict
(
for_db
=
True
)
for
c
in
clusters
]
super
()
.
insert_many
(
self
.
_clusterset_collection
,
cluster_dicts
)
def
get_clusters_for_layer
(
self
,
layer_name
:
str
)
->
List
[
Cluster
]:
entries
=
super
()
.
get_entries
(
self
.
_clusterset_collection
,
selection
=
{
'layer_name'
:
layer_name
},
projection
=
{
'_id'
:
0
})
return
[
Cluster
(
cluster_dict
=
e
,
from_db
=
True
)
for
e
in
entries
]
#endregion
#endregion
#region TimeSlice
#region TimeSlice
...
...
src/data-hub/community-detection-microservice/app/routes/clustersets.py
View file @
31f80acb
...
@@ -10,8 +10,16 @@ def get():
...
@@ -10,8 +10,16 @@ def get():
def
get_names
():
def
get_names
():
return
repo
.
get_clusterset_names
()
return
repo
.
get_clusterset_names
()
def
get_by_name
(
layername
):
res
=
repo
.
get_clusterset
(
layername
)
def
get_by_name2
(
name
):
res
=
repo
.
get_clusters_for_layer
(
name
)
if
res
is
None
or
len
(
res
)
==
0
:
return
Response
(
status
=
404
)
else
:
return
[
c
.
to_serializable_dict
()
for
c
in
res
]
def
get_by_name
(
name
):
res
=
repo
.
get_clusterset
(
name
)
if
res
is
not
None
:
if
res
is
not
None
:
return
res
.
to_serializable_dict
()
return
res
.
to_serializable_dict
()
else
:
else
:
...
...
src/data-hub/community-detection-microservice/app/routes/layers.py
View file @
31f80acb
...
@@ -4,15 +4,18 @@ from db.entities import Layer
...
@@ -4,15 +4,18 @@ from db.entities import Layer
repo
=
Repository
()
repo
=
Repository
()
#region layers
def
post
():
def
post
():
'''Insert a new layer or overwrite an existing one.'''
# TODO overwrite
body
=
request
.
json
body
=
request
.
json
_insert_layer
(
body
)
_insert_layer
(
body
)
return
Response
(
status
=
201
)
return
Response
(
status
=
201
)
def
_insert_layer
(
layer_data
:
dict
):
def
_insert_layer
(
layer_data
:
dict
):
# convert object keys from ext source
'''Converts object keys from external source and inserts into database.'''
layer_data
[
'layer_name'
]
=
layer_data
.
pop
(
'LayerName'
)
layer_data
[
'layer_name'
]
=
layer_data
.
pop
(
'LayerName'
)
layer_data
[
'nodes'
]
=
layer_data
.
pop
(
'Nodes'
)
#
layer_data['nodes'] = layer_data.pop('Nodes')
layer_data
[
'properties'
]
=
layer_data
.
pop
(
'Properties'
)
layer_data
[
'properties'
]
=
layer_data
.
pop
(
'Properties'
)
repo
.
add_layer
(
Layer
(
layer_data
))
repo
.
add_layer
(
Layer
(
layer_data
))
...
@@ -20,12 +23,27 @@ def _insert_layer(layer_data: dict):
...
@@ -20,12 +23,27 @@ def _insert_layer(layer_data: dict):
def
get
():
def
get
():
return
[
l
.
to_serializable_dict
()
for
l
in
repo
.
get_layers
()]
return
[
l
.
to_serializable_dict
()
for
l
in
repo
.
get_layers
()]
def
get_names
():
return
repo
.
get_layer_names
()
def
get_by_name
(
name
):
def
get_by_name
(
name
):
res
=
repo
.
get_layer
(
name
)
res
=
repo
.
get_layer
(
name
)
if
res
is
not
None
:
if
res
is
not
None
:
return
res
.
to_serializable_dict
()
return
res
.
to_serializable_dict
()
else
:
else
:
return
Response
(
status
=
404
)
return
Response
(
status
=
404
)
\ No newline at end of file
#endregion
#region nodes
def
get_nodes
(
name
):
res
=
repo
.
get_layer_nodes
(
name
)
# print(res)
return
res
def
post_nodes
(
name
):
body
=
request
.
json
for
node
in
body
:
node
[
'layer_name'
]
=
name
repo
.
add_layer_nodes
(
body
)
return
Response
(
status
=
201
)
#endregion nodes
\ No newline at end of file
src/data-hub/community-detection-microservice/app/routes/timeslices.py
View file @
31f80acb
...
@@ -11,9 +11,18 @@ def get():
...
@@ -11,9 +11,18 @@ def get():
def
get_by_name
(
layername
):
def
get_by_name
(
layername
):
res
=
repo
.
get_time_slices_by_name
(
layername
)
res
=
repo
.
get_time_slices_by_name
(
layername
)
print
(
len
(
res
))
#
print(len(res))
if
res
is
not
None
and
len
(
res
)
!=
0
:
if
res
is
not
None
and
len
(
res
)
!=
0
:
return
[
e
.
to_serializable_dict
()
for
e
in
res
]
return
[
e
.
to_serializable_dict
()
for
e
in
res
]
else
:
else
:
return
Response
(
status
=
404
)
return
Response
(
status
=
404
)
def
get_by_name2
(
name
):
res
=
repo
.
get_time_slices_by_name
(
name
)
# print(len(res))
if
res
is
not
None
and
len
(
res
)
!=
0
:
return
[
e
.
to_serializable_dict
()
for
e
in
res
]
else
:
return
Response
(
status
=
404
)
\ No newline at end of file
src/data-hub/community-detection-microservice/app/run_clustering.py
View file @
31f80acb
...
@@ -22,27 +22,30 @@ def run_generic_clustering():
...
@@ -22,27 +22,30 @@ def run_generic_clustering():
all_layers
:
List
[
Layer
]
=
repo
.
get_layers
()
all_layers
:
List
[
Layer
]
=
repo
.
get_layers
()
for
layer
in
all_layers
:
for
layer
in
all_layers
:
print
(
f
"Clustering {layer.layer_name}"
)
if
layer
.
properties
is
None
or
len
(
layer
.
properties
)
==
0
:
if
layer
.
properties
is
None
or
len
(
layer
.
properties
)
==
0
:
print
(
"skipping"
)
continue
continue
print
(
f
"Clustering {layer.layer_name}"
)
clusters
=
run_clustering_for_layer
(
layer
)
clusters
=
run_clustering_for_layer
(
layer
)
cluster_set
=
ClusterSet
(
layer
.
layer_name
,
clusters
)
#
cluster_set = ClusterSet(layer.layer_name, clusters)
store_
clusterset
(
cluster_set
)
store_
generic_clusters
(
clusters
)
def
run_clustering_for_layer
(
layer
:
Layer
)
->
List
[
Cluster
]:
def
run_clustering_for_layer
(
layer
:
Layer
)
->
List
[
Cluster
]:
clusterer
=
Clusterer
(
)
nodes
=
repo
.
get_layer_nodes
(
layer
.
layer_name
)
clusterer
=
Clusterer
()
res
=
clusterer
.
cluster_dataset
(
res
=
clusterer
.
cluster_dataset
(
layer
.
nodes
,
nodes
,
layer
.
properties
layer
.
properties
)
)
return
[
Cluster
(
key
,
value
)
for
key
,
value
in
res
.
items
()]
return
[
Cluster
(
layer
.
layer_name
,
key
,
value
)
for
key
,
value
in
res
.
items
()]
def
store_
clusterset
(
cluster_set
:
ClusterSet
):
def
store_
generic_clusters
(
clusters
:
List
[
Cluster
]
):
repo
.
add_clusters
et
(
cluster_set
)
repo
.
add_clusters
(
clusters
)
# with open(f'clusterset_{cluster_set.layer_name}.txt', 'w') as file:
# with open(f'clusterset_{cluster_set.layer_name}.txt', 'w') as file:
# file.write(json.dumps(cluster_set.to_serializable_dict()))
# file.write(json.dumps(cluster_set.to_serializable_dict()))
...
@@ -109,5 +112,6 @@ def store_clusters(type: str, clusters: List):
...
@@ -109,5 +112,6 @@ def store_clusters(type: str, clusters: List):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
run_generic_clustering
()
run_generic_clustering
()
# TODO cleanup
# run_location_clustering()
# run_location_clustering()
# run_time_clustering()
# run_time_clustering()
src/data-hub/community-detection-microservice/app/run_time_slicing.py
View file @
31f80acb
...
@@ -8,8 +8,8 @@ import json
...
@@ -8,8 +8,8 @@ import json
from
datetime
import
datetime
,
date
from
datetime
import
datetime
,
date
from
db.repository
import
Repository
from
db.repository
import
Repository
from
db.entities.timeslice
import
TimeSlice
from
db.entities.timeslice
import
TimeSlice
from
db.entities
import
ClusterSet
from
db.entities
import
ClusterSet
,
Cluster
from
typing
import
Tuple
,
Dict
,
Any
from
typing
import
Tuple
,
Dict
,
Any
,
List
TimeSliceKey
=
Tuple
[
int
,
int
]
TimeSliceKey
=
Tuple
[
int
,
int
]
...
@@ -20,28 +20,30 @@ def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey:
...
@@ -20,28 +20,30 @@ def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey:
return
(
y
,
w
)
return
(
y
,
w
)
def
split_clusterset_by_time
(
clustersets
)
->
Dict
[
TimeSliceKey
,
TimeSlice
]:
def
split_clusterset_by_time
(
layer_name
:
str
,
clusters
:
List
[
Cluster
]
)
->
Dict
[
TimeSliceKey
,
TimeSlice
]:
'''
'''
Distributes all nodes
of a single clusterset
into individual time slices based on their timestamps.
Distributes all nodes
in clusters of a single layer
into individual time slices based on their timestamps.
If a node spans over multiple slices it will be added to all of them.
If a node spans over multiple slices it will be added to all of them.
Information about clusters and the nodes in the clusters will not be changed.
Information about clusters and the nodes in the clusters will not be changed.
:params clusters
ets: The clusterset
whichs nodes are split
:params clusters
: The clusters
whichs nodes are split
:returns: A dict of time slices where the key is the time info and value is the information about the time slice
:returns: A dict of time slices where the key is the time info and value is the information about the time slice
'''
'''
time_property_names
=
[
'Finished_time'
,
'Starting_time'
]
time_slices
:
Dict
[
Any
,
TimeSlice
]
=
{}
time_slices
:
Dict
[
Any
,
TimeSlice
]
=
{}
for
cluster_no
in
clusters
et
.
clusters
:
for
cluster_no
in
clusters
:
for
node
in
cluster_no
.
nodes
:
for
node
in
cluster_no
.
nodes
:
time_keys
=
{
# retrieve times the node is located in based on the defined time properties in the schema
convert_to_time_slice_key
(
str
(
node
[
'Finished_time'
])),
time_keys
=
set
()
convert_to_time_slice_key
(
str
(
node
[
'Starting_time'
]))
for
time_property
in
time_property_names
:
}
if
time_property
in
node
:
time_keys
.
add
(
convert_to_time_slice_key
(
str
(
node
[
time_property
])))
for
time_key
in
time_keys
:
for
time_key
in
time_keys
:
if
time_key
not
in
time_slices
:
if
time_key
not
in
time_slices
:
time_slices
[
time_key
]
=
TimeSlice
(
time_key
,
clusterset
.
layer_name
)
time_slices
[
time_key
]
=
TimeSlice
(
time_key
,
layer_name
)
time_slices
[
time_key
]
.
add_node_to_cluster
(
cluster_no
.
cluster_label
,
node
)
time_slices
[
time_key
]
.
add_node_to_cluster
(
cluster_no
.
cluster_label
,
node
)
...
@@ -53,9 +55,17 @@ if __name__ == "__main__":
...
@@ -53,9 +55,17 @@ if __name__ == "__main__":
repo
.
remove_all_time_slices
()
repo
.
remove_all_time_slices
()
clustersets
=
repo
.
get_clustersets
()
layers
=
repo
.
get_layers
()
for
clusterset
in
clustersets
:
for
layer
in
layers
:
time_slices
=
split_clusterset_by_time
(
clusterset
)
layer_name
=
layer
.
layer_name
print
(
f
"Working on {layer_name}"
)
clusters_for_layer
=
repo
.
get_clusters_for_layer
(
layer_name
)
# if no clusters were generated use one large cluster instead of skipping the layer
if
clusters_for_layer
is
None
or
len
(
clusters_for_layer
)
==
0
:
clusters_for_layer
=
[
Cluster
(
layer_name
,
-
1
,
repo
.
get_layer_nodes
(
layer_name
))]
time_slices
=
split_clusterset_by_time
(
layer_name
,
clusters_for_layer
)
for
k
,
v
in
time_slices
.
items
():
for
k
,
v
in
time_slices
.
items
():
repo
.
add_time_slice
(
v
)
repo
.
add_time_slice
(
v
)
src/modules/database/MongoRepositoryBase.py
View file @
31f80acb
...
@@ -19,6 +19,10 @@ class MongoRepositoryBase:
...
@@ -19,6 +19,10 @@ class MongoRepositoryBase:
collection
=
self
.
_database
[
collection_name
]
collection
=
self
.
_database
[
collection_name
]
collection
.
insert_one
(
content
)
collection
.
insert_one
(
content
)
def
insert_many
(
self
,
collection_name
,
content
:
list
):
collection
=
self
.
_database
[
collection_name
]
collection
.
insert_many
(
content
)
def
get_entries
(
self
,
collection_name
,
selection
:
dict
=
{},
projection
:
dict
=
{
'_'
:
0
})
->
cursor
.
Cursor
:
def
get_entries
(
self
,
collection_name
,
selection
:
dict
=
{},
projection
:
dict
=
{
'_'
:
0
})
->
cursor
.
Cursor
:
collection
=
self
.
_database
[
collection_name
]
collection
=
self
.
_database
[
collection_name
]
return
collection
.
find
(
selection
,
projection
)
return
collection
.
find
(
selection
,
projection
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment