Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
e4993f0c
Commit
e4993f0c
authored
Feb 10, 2020
by
Alexander
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
generalized clusterer methods
parent
8287b767
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
53 additions
and
48 deletions
+53
-48
clusterer.py
...munity-detection-microservice/app/processing/clusterer.py
+31
-36
run_clustering.py
...ub/community-detection-microservice/app/run_clustering.py
+10
-3
test_clusterer.py
...munity-detection-microservice/app/tests/test_clusterer.py
+12
-9
No files found.
src/data-hub/community-detection-microservice/app/processing/clusterer.py
View file @
e4993f0c
...
...
@@ -43,32 +43,38 @@ class Clusterer:
return
fig
# TODO refactor for other input
def
create_labels
(
self
,
locations
:
List
)
->
List
:
if
locations
is
None
or
len
(
locations
)
==
0
:
return
locations
# trash in trash out
locations
=
self
.
extract_location_data
(
locations
)
def
create_labels
(
self
,
features
:
np
.
ndarray
)
->
List
:
if
features
is
None
or
len
(
features
)
==
0
:
return
features
# trash in trash out
dbsc
=
DBSCAN
(
eps
=
self
.
epsilon
,
min_samples
=
self
.
min_points
)
dbsc
=
dbsc
.
fit
(
location
s
)
dbsc
=
dbsc
.
fit
(
feature
s
)
labels
=
dbsc
.
labels_
return
labels
.
tolist
()
def
extract_location_
data
(
self
,
locations
:
List
[
dict
])
->
np
.
ndarray
:
def
extract_location_
features
(
self
,
locations
:
List
[
dict
])
->
np
.
ndarray
:
return
np
.
asarray
([(
float
(
l
[
'latitude'
]),
float
(
l
[
'longitude'
]))
for
l
in
locations
])
# TODO refactor for other input
def
label_locations
(
self
,
locations
:
List
[
Dict
],
labels
:
List
)
->
List
:
if
locations
is
None
or
labels
is
None
:
def
extract_time_features
(
self
,
times
:
List
[
Dict
])
->
np
.
ndarray
:
return
np
.
asarray
([((
t
[
'timestamp'
]),
0
)
for
t
in
times
])
def
label_dataset
(
self
,
dataset
:
List
[
Dict
],
labels
:
List
)
->
List
:
if
dataset
is
None
or
labels
is
None
:
return
if
len
(
locations
)
!=
len
(
labels
):
raise
ValueError
(
"
locations
and labels has to have same length"
)
if
len
(
dataset
)
!=
len
(
labels
):
raise
ValueError
(
"
dataset
and labels has to have same length"
)
for
i
in
range
(
len
(
locations
)):
locations
[
i
][
'cluster_label'
]
=
labels
[
i
]
for
i
in
range
(
len
(
dataset
)):
dataset
[
i
][
'cluster_label'
]
=
labels
[
i
]
def
group_by_clusters
(
self
,
dataset
:
List
[
Dict
],
labels
:
List
)
->
Dict
[
int
,
List
[
Dict
]]:
clusters
=
{}
for
label
in
labels
:
clusters
[
label
]
=
[
ds
for
ds
in
dataset
if
ds
[
'cluster_label'
]
==
label
]
return
clusters
def
cluster_locations
(
self
,
locations
:
List
[
Dict
])
->
Dict
[
int
,
List
[
Dict
]]:
'''Returns a dictionary with identified clusters and their locations copied from the input'''
...
...
@@ -76,29 +82,18 @@ class Clusterer:
# raise Exception("locations has to contain something")
return
{}
labels
=
self
.
create_labels
(
locations
)
self
.
label_locations
(
locations
,
labels
)
clusters
=
{}
for
label
in
labels
:
clusters
[
label
]
=
[
l
for
l
in
locations
if
l
[
'cluster_label'
]
==
label
]
features
=
self
.
extract_location_features
(
locations
)
return
clusters
labels
=
self
.
create_labels
(
features
)
self
.
label_dataset
(
locations
,
labels
)
return
self
.
group_by_clusters
(
locations
,
labels
)
def
cluster_times
(
self
,
times
:
List
[
Dict
])
->
Dict
[
int
,
List
[
Dict
]]:
times1
=
np
.
asarray
([((
t
[
'timestamp'
]),
0
)
for
t
in
times
])
'''Returns a dictionary with identified clusters and their times copied from the input'''
features
=
self
.
extract_time_features
(
times
)
# TODO refactor for other input
dbsc
=
DBSCAN
(
eps
=
self
.
epsilon
,
min_samples
=
self
.
min_points
)
dbsc
=
dbsc
.
fit
(
times1
)
labels
=
dbsc
.
labels_
.
tolist
()
labels
=
self
.
create_labels
(
features
)
self
.
label_dataset
(
times
,
labels
)
self
.
label_locations
(
times
,
labels
)
clusters
=
{}
for
label
in
labels
:
clusters
[
label
]
=
[
l
for
l
in
times
if
l
[
'cluster_label'
]
==
label
]
# fig = self._draw_locations(locations=times1, partition_info=labels)
# fig.savefig('img.png')
return
clusters
\ No newline at end of file
return
self
.
group_by_clusters
(
times
,
labels
)
\ No newline at end of file
src/data-hub/community-detection-microservice/app/run_clustering.py
View file @
e4993f0c
...
...
@@ -10,15 +10,20 @@ from db.repository import Repository
from
processing.clusterer
import
Clusterer
DEBUG
=
Tru
e
DEBUG
=
Fals
e
repo
=
Repository
()
# locs = repo.get_agi_locations()
# for l in locs:
# repo.add_location(l)
# exit()
def
run_location_clustering
():
user_clusterer
=
Clusterer
()
all_location_traces
=
repo
.
get_
agi_
locations
()
all_location_traces
=
repo
.
get_locations
()
cluster_result
=
user_clusterer
.
cluster_locations
(
[
l
.
to_serializable_dict
()
for
l
in
all_location_traces
])
...
...
@@ -32,7 +37,7 @@ def run_location_clustering():
def
run_time_clustering
():
user_clusterer
=
Clusterer
(
epsilon
=
10
**
5.8
)
all_location_traces
=
repo
.
get_
agi_
locations
()
all_location_traces
=
repo
.
get_locations
()
cluster_result
=
user_clusterer
.
cluster_times
([
l
.
to_serializable_dict
()
for
l
in
all_location_traces
])
...
...
@@ -43,6 +48,7 @@ def run_time_clustering():
repo
.
add_time_cluster
(
c
)
# TODO make abstract for other features
def
store_user_clusters
(
user_clusters
:
List
[
UserCluster
]):
if
DEBUG
:
print
(
user_clusters
)
...
...
@@ -54,3 +60,4 @@ def store_user_clusters(user_clusters: List[UserCluster]):
if
__name__
==
"__main__"
:
run_time_clustering
()
run_location_clustering
()
src/data-hub/community-detection-microservice/app/tests/test_clusterer.py
View file @
e4993f0c
...
...
@@ -20,13 +20,15 @@ class TestClusterer(unittest.TestCase):
self
.
assertEqual
([],
labels
)
def
test_create_labels_singleInput_singleCluster
(
self
):
labels
=
self
.
clusterer
.
create_labels
([
self
.
location
(
1
,
2
)])
features
=
self
.
clusterer
.
extract_location_features
([
self
.
location
(
1
,
2
)])
labels
=
self
.
clusterer
.
create_labels
(
features
)
self
.
assertEqual
(
1
,
len
(
labels
))
def
test_create_labels_nearInputs_singleCluster
(
self
):
locations
=
[
self
.
location
(
1
,
2
),
self
.
location
(
2
,
2
)]
labels
=
self
.
clusterer
.
create_labels
(
locations
)
features
=
self
.
clusterer
.
extract_location_features
(
locations
)
labels
=
self
.
clusterer
.
create_labels
(
features
)
self
.
assertEqual
(
2
,
len
(
labels
))
self
.
assertEqual
(
labels
[
0
],
labels
[
1
])
...
...
@@ -34,36 +36,37 @@ class TestClusterer(unittest.TestCase):
def
test_create_labels_nearInputs_twoClusters
(
self
):
locations
=
[
self
.
location
(
1
,
2
),
self
.
location
(
2
,
2
),
self
.
location
(
20
,
20
)]
labels
=
self
.
clusterer
.
create_labels
(
locations
)
features
=
self
.
clusterer
.
extract_location_features
(
locations
)
labels
=
self
.
clusterer
.
create_labels
(
features
)
self
.
assertEqual
(
3
,
len
(
labels
))
self
.
assertEqual
(
labels
[
0
],
labels
[
1
])
self
.
assertNotEqual
(
labels
[
0
],
labels
[
2
])
def
test_label_locations_NoneLocations_NoException
(
self
):
self
.
clusterer
.
label_
locations
(
None
,
[])
self
.
clusterer
.
label_
dataset
(
None
,
[])
def
test_label_locations_NoneLabels_NoException
(
self
):
self
.
clusterer
.
label_
locations
([],
None
)
self
.
clusterer
.
label_
dataset
([],
None
)
def
test_label_locations_emptyInput_emptyOutput
(
self
):
locations
=
[]
self
.
clusterer
.
label_
locations
(
locations
,
[])
self
.
clusterer
.
label_
dataset
(
locations
,
[])
self
.
assertEqual
(
0
,
len
(
locations
))
def
test_label_locations_diffInputLengths_ValueError_1
(
self
):
with
self
.
assertRaises
(
ValueError
):
self
.
clusterer
.
label_
locations
([],
[
1
])
self
.
clusterer
.
label_
dataset
([],
[
1
])
def
test_label_locations_diffInputLengths_ValueError_2
(
self
):
with
self
.
assertRaises
(
ValueError
):
self
.
clusterer
.
label_
locations
([
self
.
location
(
1
,
2
)],
[])
self
.
clusterer
.
label_
dataset
([
self
.
location
(
1
,
2
)],
[])
def
test_label_locations_multInput_correctlyLabeled
(
self
):
locations
=
[
self
.
location
(
1
,
2
),
self
.
location
(
2
,
2
),
self
.
location
(
20
,
20
)]
labels
=
[
17
,
2
,
20
]
self
.
clusterer
.
label_
locations
(
locations
,
labels
)
self
.
clusterer
.
label_
dataset
(
locations
,
labels
)
self
.
assertEqual
(
3
,
len
(
locations
))
self
.
assertHaveLabelsAsNewKey
(
locations
,
labels
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment