Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
c94aba76
Commit
c94aba76
authored
Feb 07, 2020
by
Alexander
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
preparation for mongodb storage
parent
72491c09
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
18 deletions
+20
-18
clusterer.py
...munity-detection-microservice/app/processing/clusterer.py
+12
-15
run_clustering.py
...ub/community-detection-microservice/app/run_clustering.py
+8
-3
No files found.
src/data-hub/community-detection-microservice/app/processing/clusterer.py
View file @
c94aba76
...
@@ -43,6 +43,7 @@ class Clusterer:
...
@@ -43,6 +43,7 @@ class Clusterer:
return
fig
return
fig
# TODO refactor for other input
def
create_labels
(
self
,
locations
:
List
)
->
List
:
def
create_labels
(
self
,
locations
:
List
)
->
List
:
if
locations
is
None
or
len
(
locations
)
==
0
:
if
locations
is
None
or
len
(
locations
)
==
0
:
return
locations
# trash in trash out
return
locations
# trash in trash out
...
@@ -58,6 +59,7 @@ class Clusterer:
...
@@ -58,6 +59,7 @@ class Clusterer:
def
extract_location_data
(
self
,
locations
:
List
[
dict
])
->
np
.
ndarray
:
def
extract_location_data
(
self
,
locations
:
List
[
dict
])
->
np
.
ndarray
:
return
np
.
asarray
([(
float
(
l
[
'latitude'
]),
float
(
l
[
'longitude'
]))
for
l
in
locations
])
return
np
.
asarray
([(
float
(
l
[
'latitude'
]),
float
(
l
[
'longitude'
]))
for
l
in
locations
])
# TODO refactor for other input
def
label_locations
(
self
,
locations
:
List
[
Dict
],
labels
:
List
)
->
List
:
def
label_locations
(
self
,
locations
:
List
[
Dict
],
labels
:
List
)
->
List
:
if
locations
is
None
or
labels
is
None
:
if
locations
is
None
or
labels
is
None
:
return
return
...
@@ -84,24 +86,19 @@ class Clusterer:
...
@@ -84,24 +86,19 @@ class Clusterer:
return
clusters
return
clusters
def
cluster_times
(
self
,
times
:
List
[
Dict
])
->
Dict
[
int
,
List
[
Dict
]]:
def
cluster_times
(
self
,
times
:
List
[
Dict
])
->
Dict
[
int
,
List
[
Dict
]]:
times1
=
np
.
asarray
([((
t
[
'timestamp'
]),
0
)
for
t
in
times
])
# times.sort(key=lambda x: x['timestamp'])
# TODO refactor for other input
times1
=
np
.
asarray
([(
float
(
t
[
'timestamp'
]),
float
(
0
))
for
t
in
times
])
# print(times)
dbsc
=
DBSCAN
(
eps
=
self
.
epsilon
,
min_samples
=
self
.
min_points
)
dbsc
=
DBSCAN
(
eps
=
self
.
epsilon
,
min_samples
=
self
.
min_points
)
dbsc
=
dbsc
.
fit
(
times1
)
dbsc
=
dbsc
.
fit
(
times1
)
labels
=
dbsc
.
labels_
labels
=
dbsc
.
labels_
.
tolist
()
print
(
labels
)
self
.
label_locations
(
times
,
labels
)
self
.
label_locations
(
times
,
labels
)
times
=
[
t
for
t
in
times
if
t
[
'cluster_label'
]
!=
-
1
]
print
(
times
)
info
=
[
l
for
l
in
labels
if
l
!=
-
1
]
clusters
=
{}
print
(
info
)
for
label
in
labels
:
clusters
[
label
]
=
[
l
for
l
in
times
if
l
[
'cluster_label'
]
==
label
]
times1
=
np
.
asarray
([(
float
(
t
[
'timestamp'
]),
float
(
0
))
for
t
in
times
])
# fig = self._draw_locations(locations=times1, partition_info=labels)
fig
=
self
.
_draw_locations
(
locations
=
times1
,
partition_info
=
info
)
# fig.savefig('img.png')
fig
.
savefig
(
'img.png'
)
return
clusters
\ No newline at end of file
\ No newline at end of file
src/data-hub/community-detection-microservice/app/run_clustering.py
View file @
c94aba76
...
@@ -10,7 +10,7 @@ from db.repository import Repository
...
@@ -10,7 +10,7 @@ from db.repository import Repository
from
processing.clusterer
import
Clusterer
from
processing.clusterer
import
Clusterer
DEBUG
=
Fals
e
DEBUG
=
Tru
e
repo
=
Repository
()
repo
=
Repository
()
...
@@ -30,11 +30,16 @@ def run_location_clustering():
...
@@ -30,11 +30,16 @@ def run_location_clustering():
def
run_time_clustering
():
def
run_time_clustering
():
user_clusterer
=
Clusterer
(
epsilon
=
10
**
5.8
)
all_location_traces
=
repo
.
get_agi_locations
()
all_location_traces
=
repo
.
get_agi_locations
()
user_clusterer
=
Clusterer
(
epsilon
=
10
**
5.8
)
cluster_result
=
user_clusterer
.
cluster_times
([
l
.
to_serializable_dict
()
for
l
in
all_location_traces
]
)
user_clusterer
.
cluster_times
([
l
.
to_serializable_dict
()
for
l
in
all_location_traces
])
clusters
=
[
UserCluster
(
key
,
value
)
for
key
,
value
in
cluster_result
.
items
()]
store_user_clusters
(
clusters
)
def
store_user_clusters
(
user_clusters
:
List
[
UserCluster
]):
def
store_user_clusters
(
user_clusters
:
List
[
UserCluster
]):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment