Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
a31e702c
Commit
a31e702c
authored
Jul 27, 2021
by
Alexander Lercher
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added unit tests
parent
b7097db8
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
636 additions
and
0 deletions
+636
-0
test_ClusterMetricsCalculator.py
...n-microservice/app/tests/test_ClusterMetricsCalculator.py
+37
-0
test_cluster.py
...ommunity-detection-microservice/app/tests/test_cluster.py
+331
-0
test_layer.py
...-community-detection-microservice/app/tests/test_layer.py
+268
-0
No files found.
src/data-hub/proactive-community-detection-microservice/app/tests/test_ClusterMetricsCalculator.py
0 → 100644
View file @
a31e702c
import
unittest
import
sys
for
path
in
[
'../'
,
'./'
]:
sys
.
path
.
insert
(
1
,
path
)
# python -m unittest discover
from
processing
import
ClusterMetricsCalculator2D
class
TestClusterMetricsCalculator
(
unittest
.
TestCase
):
def
test__get_standard_deviation__same_points_many_decimals__zero_and_not_nan
(
self
):
nodes
=
[{
'f1'
:
-
8.58564
,
'f2'
:
41.148567
},
{
'f1'
:
-
8.58564
,
'f2'
:
41.148567
},
{
'f1'
:
-
8.58564
,
'f2'
:
41.148567
},
{
'f1'
:
-
8.58564
,
'f2'
:
41.148567
},
{
'f1'
:
-
8.58564
,
'f2'
:
41.148567
},
{
'f1'
:
-
8.58564
,
'f2'
:
41.148567
},
{
'f1'
:
-
8.58564
,
'f2'
:
41.148567
},
{
'f1'
:
-
8.58564
,
'f2'
:
41.148567
},
{
'f1'
:
-
8.58564
,
'f2'
:
41.148567
}]
calc
=
ClusterMetricsCalculator2D
(
nodes
,
[
'f1'
,
'f2'
],
len
(
nodes
),
1
)
self
.
assertAlmostEqual
(
0
,
calc
.
get_standard_deviation
())
def
test__get_range__almost_linear_distribution_in_2d__euclidean_distance
(
self
):
l
=
[(
-
8.657802
,
41.160978
),
(
-
8.65782
,
41.160969
),
(
-
8.657838
,
41.16096
)]
nodes
=
[{
'f1'
:
e
[
0
],
'f2'
:
e
[
1
]}
for
e
in
l
]
calc
=
ClusterMetricsCalculator2D
(
nodes
,
[
'f1'
,
'f2'
],
len
(
nodes
),
1
)
# https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
self
.
assertAlmostEqual
(
4.0E-5
,
calc
.
get_range
(),
5
)
if
__name__
==
'__main__'
:
unittest
.
main
()
src/data-hub/proactive-community-detection-microservice/app/tests/test_cluster.py
0 → 100644
View file @
a31e702c
import
unittest
import
sys
for
path
in
[
'../'
,
'./'
]:
sys
.
path
.
insert
(
1
,
path
)
# python -m unittest discover
from
entities
import
Cluster
,
TimeWindow
from
typing
import
Any
,
Tuple
from
datetime
import
date
,
datetime
import
json
from
math
import
sqrt
import
statistics
as
stat
class
TestCluster
(
unittest
.
TestCase
):
def
test__init__single_cluster__all_values_set
(
self
):
tw
,
centers
=
self
.
_get_timewindow_single_cluster_same_feature
()
c
=
Cluster
(
"time_abc"
,
"clusterId 1"
,
list
(
tw
.
clusters
.
values
())[
0
],
"feature"
,
nr_layer_nodes
=
3
,
layer_diversity
=
1
,
global_cluster_center
=
centers
[
'1'
])
self
.
assertEqual
(
"time_abc"
,
c
.
time_window_id
)
self
.
assertEqual
(
"clusterId 1"
,
c
.
cluster_id
)
self
.
assert_cluster
((
3
,
0
,
0
,
1
,
1
),
c
)
self
.
assertEqual
(
c
.
range_
,
0
)
self
.
assertEqual
(
c
.
center
,
(
1
,
0
))
self
.
assertEqual
(
c
.
global_center_distance
,
0
)
def
test__create_multiple_from_time_window__single_cluster__all_values_set
(
self
):
tw
,
centers
=
self
.
_get_timewindow_single_cluster_same_feature
()
clusters
=
list
(
Cluster
.
create_multiple_from_time_window
(
tw
,
"feature"
,
centers
))
self
.
assertEqual
(
1
,
len
(
clusters
))
c
=
clusters
[
0
]
self
.
assertEqual
(
"KW1"
,
c
.
time_window_id
)
self
.
assertEqual
(
"1"
,
c
.
cluster_id
)
self
.
assert_cluster
((
3
,
0
,
0
,
1
,
1
),
c
)
def
test__create_multiple_from_time_window__two_clusters__correct_time_id_cluster_id
(
self
):
tw
,
centers
=
self
.
_get_timewindow_two_clusters_same_feature
()
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
"feature"
,
centers
)
expected
=
[(
"KW1"
,
"1"
),
(
"KW1"
,
"2"
)]
for
c
,
exp
in
zip
(
clusters
,
expected
):
self
.
assertEqual
(
exp
[
0
],
c
.
time_window_id
)
self
.
assertEqual
(
exp
[
1
],
c
.
cluster_id
)
def
test__create_multiple_from_time_window__two_clusters_same_features__correct_calculation
(
self
):
tw
,
centers
=
self
.
_get_timewindow_two_clusters_same_feature
()
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
"feature"
,
centers
)
expected
=
[(
3
,
0
,
0
,
3
/
5
,
1
/
2
),
(
2
,
0
,
0
,
2
/
5
,
1
/
2
)]
for
c
,
exp
in
zip
(
clusters
,
expected
):
self
.
assert_cluster
(
exp
,
c
)
def
test__create_multiple_from_time_window__two_clusters_same_features_and_feature_names_list__correct_calculation
(
self
):
tw
,
centers
=
self
.
_get_timewindow_two_clusters_same_feature
()
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
[
"feature"
],
centers
)
expected
=
[(
3
,
0
,
0
,
3
/
5
,
1
/
2
),
(
2
,
0
,
0
,
2
/
5
,
1
/
2
)]
for
c
,
exp
in
zip
(
clusters
,
expected
):
self
.
assert_cluster
(
exp
,
c
)
def
test__create_multiple_from_time_window__two_clusters_different_features__correct_calculation
(
self
):
tw
=
TimeWindow
(
"CW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
3
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature"
:
70
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature"
:
75
})
centers
=
{
'1'
:(
2
,
0
),
'2'
:(
72.5
,
0
)}
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
"feature"
,
centers
)
# variance for stddev calculated with: http://www.alcula.com/calculators/statistics/variance/
expected
=
[(
3
,
sqrt
(
2.0
/
3
),
2.0
/
3
,
3
/
5
,
1
/
2
),
(
2
,
sqrt
(
6.25
),
5.0
/
2
,
2
/
5
,
1
/
2
)]
for
cluster
,
exp
in
zip
(
clusters
,
expected
):
self
.
assert_cluster
(
exp
,
cluster
)
def
test__create_multiple_from_time_window__empty_cluster__all_zero_for_empty_cluster
(
self
):
tw
=
TimeWindow
(
"CW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
3
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature"
:
70
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature"
:
75
})
tw
.
clusters
[
"3"
]
=
[]
centers
=
{
'1'
:(
2
,
0
),
'2'
:(
72.5
,
0
),
'3'
:(
0
,
0
)}
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
"feature"
,
centers
)
expected
=
[(
3
,
sqrt
(
2.0
/
3
),
2.0
/
3
,
3
/
5
,
1
/
2
),
# diversity is still 2 as len=0 is ignored
(
2
,
sqrt
(
6.25
),
5.0
/
2
,
2
/
5
,
1
/
2
),
(
0
,
0
,
0
,
0
,
0
)]
# len 0 -> everything 0
for
cluster
,
exp
in
zip
(
clusters
,
expected
):
self
.
assert_cluster
(
exp
,
cluster
)
def
test__create_multiple_from_time_window__2d_clustering_single_feature_value__no_stddev_no_scarcity
(
self
):
tw
=
TimeWindow
(
"CW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
1
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
70
,
"f2"
:
70
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
70
,
"f2"
:
70
})
centers
=
{
'1'
:(
1
,
1
),
'2'
:(
70
,
70
)}
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
[
"f1"
,
"f2"
],
centers
)
expected
=
[(
3
,
0
,
0
,
3
/
5
,
1
/
2
),
(
2
,
0
,
0
,
2
/
5
,
1
/
2
)]
for
cluster
,
exp
in
zip
(
clusters
,
expected
):
self
.
assert_cluster
(
exp
,
cluster
)
def
test__create_multiple_from_time_window__2d_clustering__correct_stddev_and_scarcity
(
self
):
tw
=
TimeWindow
(
"CW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
2
,
"f2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
3
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
70
,
"f2"
:
70
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
72
,
"f2"
:
75
})
centers
=
{
'1'
:(
4
/
3
,
5
/
3
),
'2'
:(
71
,
72.5
)}
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
[
"f1"
,
"f2"
],
centers
)
# stddev calculated manually as in: https://glenbambrick.com/tag/standard-distance/
# area of the polygon calculated with: https://www.mathopenref.com/coordpolygonareacalc.html
expected
=
[(
3
,
sqrt
(
2
/
9
+
8
/
9
),
sqrt
(
1
/
3
),
3
/
5
,
1
/
2
),
(
2
,
sqrt
(
7.25
),
sqrt
(
2
*
2
+
5
*
5
)
/
2
,
2
/
5
,
1
/
2
)]
for
cluster
,
exp
in
zip
(
clusters
,
expected
):
self
.
assert_cluster
(
exp
,
cluster
)
def
test__create_multiple_from_time_window__2d_clustering_complex__correct_stddev_and_scarcity
(
self
):
tw
=
TimeWindow
(
"CW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
0
,
"f2"
:
0
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
3
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
3
,
"f2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
0
,
"f2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
2
})
# inside the convex hull
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
2
,
"f2"
:
2
})
# inside the convex hull
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
2
,
"f2"
:
1
})
centers
=
{
'1'
:((
1
+
3
+
1
+
2
+
2
)
/
7
,(
3
+
2
+
2
+
2
+
2
+
1
)
/
7
)}
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
[
"f1"
,
"f2"
],
centers
)
# stddev calculated manually as in: https://glenbambrick.com/tag/standard-distance/
X
=
[
0
,
1
,
3
,
0
,
1
,
2
,
2
]
Y
=
[
0
,
3
,
2
,
2
,
2
,
2
,
1
]
x_mean
=
stat
.
mean
(
X
)
y_mean
=
stat
.
mean
(
Y
)
sum_x
=
0
for
x
in
X
:
sum_x
+=
(
x
-
x_mean
)
**
2
sum_y
=
0
for
y
in
Y
:
sum_y
+=
(
y
-
y_mean
)
**
2
sd
=
sqrt
(
sum_x
/
7
+
sum_y
/
7
)
# area of the polygon calculated with: https://www.mathopenref.com/coordpolygonareacalc.html
area
=
5
scarcity
=
sqrt
(
area
/
7
)
expected
=
[[
7
,
sd
,
scarcity
,
1
,
1
]]
for
cluster
,
exp
in
zip
(
clusters
,
expected
):
self
.
assert_cluster
(
exp
,
cluster
)
def
test__create_multiple_from_time_window__2d_clustering_1d_single_feature_value__correct_calculation
(
self
):
tw
=
TimeWindow
(
"CW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
3
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
70
,
"f2"
:
70
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
75
,
"f2"
:
70
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
72
,
"f2"
:
70
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
71
,
"f2"
:
70
})
centers
=
{
'1'
:(
1
,
2
),
'2'
:((
70
+
75
+
72
+
71
)
/
4
,
70
)}
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
[
"f1"
,
"f2"
],
centers
)
# variance/stddev calculated as for 1d cluster (as f1/f2 is always the same)
# scarcity calculated as for 1d cluster
expected
=
[(
3
,
sqrt
(
2
/
3
),
2
/
3
,
3
/
7
,
1
/
2
),
(
4
,
sqrt
(
3.5
),
5
/
4
,
4
/
7
,
1
/
2
)]
for
cluster
,
exp
in
zip
(
clusters
,
expected
):
self
.
assert_cluster
(
exp
,
cluster
)
def
test__create_multiple_from_time_window__1d_clusters__correct_cluster_range_and_center
(
self
):
tw
=
TimeWindow
(
"CW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
70
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
72
})
centers
=
{
'1'
:(
4
/
3
,
0
),
'2'
:(
71
,
0
)}
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
[
"f1"
],
centers
)
expected
=
[(
1
,
(
4
/
3
,
0
)),
(
2
,
(
71
,
0
))]
# (range, center)
for
c
,
exp
in
zip
(
clusters
,
expected
):
self
.
assertEqual
(
c
.
range_
,
exp
[
0
])
self
.
assertEqual
(
c
.
center
,
exp
[
1
])
def
test__create_multiple_from_time_window__2d_clusters__correct_cluster_range_and_center
(
self
):
tw
=
TimeWindow
(
"CW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
2
,
"f2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
3
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
70
,
"f2"
:
70
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
72
,
"f2"
:
75
})
centers
=
{
'1'
:(
4
/
3
,
5
/
3
),
'2'
:(
71
,
72.5
)}
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
[
"f1"
,
"f2"
],
centers
)
# https://www.triangle-calculator.com/de/?what=vc&a=1&a1=1&3dd=3D&a2=&b=2&b1=1&b2=&c=1&c1=3&c2=&submit=Berechnen&3d=0
# https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
expected
=
[(
1
,
(
4
/
3
,
5
/
3
)),
(
5.385165
,
(
71
,
72.5
))]
# (range, center)
for
c
,
exp
in
zip
(
clusters
,
expected
):
self
.
assertAlmostEqual
(
c
.
range_
,
exp
[
0
],
places
=
6
)
self
.
assertEqual
(
c
.
center
,
exp
[
1
])
def
test__create_multiple_from_time_window__2d_clusters__correct_linear_global_center_distance
(
self
):
tw
=
TimeWindow
(
"CW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
2
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
70
,
"f2"
:
70
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
72
,
"f2"
:
75
})
centers
=
{
'1'
:(
1
,
1
),
'2'
:(
71
,
70
)}
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
[
"f1"
,
"f2"
],
centers
)
expected
=
[
1
,
2.5
]
# euclidean dist
for
c
,
exp
in
zip
(
clusters
,
expected
):
self
.
assertAlmostEqual
(
c
.
global_center_distance
,
exp
,
places
=
6
)
def
test__create_multiple_from_time_window__1d_clusters__correct_euclidean_global_center_distance
(
self
):
tw
=
TimeWindow
(
"CW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
70
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
72
})
centers
=
{
'1'
:(
1
,
0
),
'2'
:(
70
,
0
)}
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
[
"f1"
],
centers
)
# https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
expected
=
[
0
,
1
]
# euclidean dist
for
c
,
exp
in
zip
(
clusters
,
expected
):
self
.
assertAlmostEqual
(
c
.
global_center_distance
,
exp
,
places
=
6
)
def
test__create_multiple_from_time_window__2d_clusters__correct_euclidean_global_center_distance
(
self
):
tw
=
TimeWindow
(
"CW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"f1"
:
1
,
"f2"
:
2
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
70
,
"f2"
:
70
})
tw
.
add_node_to_cluster
(
"2"
,
{
"f1"
:
72
,
"f2"
:
75
})
centers
=
{
'1'
:(
0
,
0
),
'2'
:(
70
,
70
)}
clusters
=
Cluster
.
create_multiple_from_time_window
(
tw
,
[
"f1"
,
"f2"
],
centers
)
# https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
expected
=
[
2.236068
,
2.692582
]
# euclidean dist
for
c
,
exp
in
zip
(
clusters
,
expected
):
self
.
assertAlmostEqual
(
c
.
global_center_distance
,
exp
,
places
=
6
)
#region setup methods
def
_get_timewindow_single_cluster_same_feature
(
self
)
->
TimeWindow
:
'''Returns a TimeWindow with time=KW1 and three nodes in cluster 1, all feature values = 1.'''
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
1
})
global_centers
=
{
'1'
:(
1
,
0
)}
return
tw
,
global_centers
def
_get_timewindow_two_clusters_same_feature
(
self
)
->
TimeWindow
:
'''
Returns a TimeWindow with time=KW1 and:
Three nodes in cluster 1, all feature values = 1.
Two nodes in cluster 2, all feature values = 2.
'''
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
1
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature"
:
2
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature"
:
2
})
global_centers
=
{
'1'
:(
1
,
0
),
'2'
:(
2
,
0
)}
return
tw
,
global_centers
#endregion setup methods
#region custom asserts
def
assert_cluster
(
self
,
expected_values
:
Tuple
[
Any
],
cluster
:
Cluster
):
"""
Checks if the cluster values equal the expected_values.
:param expected_values: A tuple (exp_size, exp_stddev, exp_scarcity, exp_import1, exp_import2)
"""
self
.
assertEqual
(
expected_values
[
0
],
cluster
.
size
)
self
.
assertAlmostEqual
(
expected_values
[
1
],
cluster
.
std_dev
)
self
.
assertAlmostEqual
(
expected_values
[
2
],
cluster
.
scarcity
)
self
.
assertAlmostEqual
(
expected_values
[
3
],
cluster
.
importance1
)
self
.
assertAlmostEqual
(
expected_values
[
4
],
cluster
.
importance2
)
#endregion custom asserts
if
__name__
==
'__main__'
:
unittest
.
main
()
src/data-hub/proactive-community-detection-microservice/app/tests/test_layer.py
0 → 100644
View file @
a31e702c
import
unittest
import
sys
for
path
in
[
'../'
,
'./'
]:
sys
.
path
.
insert
(
1
,
path
)
# python -m unittest discover
from
entities
import
Layer
,
TimeWindow
from
entities.layer
import
InternalCluster
from
typing
import
Any
,
Tuple
,
List
from
datetime
import
date
,
datetime
import
json
from
math
import
sqrt
import
statistics
as
stat
class
TestInternalCluster
(
unittest
.
TestCase
):
def
test__init__1d_features__all_values_set
(
self
):
cluster_nodes
=
[{
"feature"
:
1
},
{
"feature"
:
1
},
{
"feature"
:
1
}]
c
=
InternalCluster
(
"123"
,
cluster_nodes
,
feature_names
=
[
"feature"
],
global_cluster_center
=
(
1.5
,
0
),
n_layer_nodes
=
len
(
cluster_nodes
))
self
.
assert_internal_cluster
(
c
,
'123'
,
3
,
.5
)
def
test__init__2d_features__all_values_set
(
self
):
cluster_nodes
=
[{
"feature1"
:
1
,
'feature2'
:
1
},
{
"feature1"
:
1
,
'feature2'
:
1
},
{
"feature1"
:
1
,
'feature2'
:
1
}]
c
=
InternalCluster
(
"123"
,
cluster_nodes
,
feature_names
=
[
"feature1"
,
'feature2'
],
global_cluster_center
=
(
1.5
,
1.5
),
n_layer_nodes
=
len
(
cluster_nodes
))
# distance: https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
self
.
assert_internal_cluster
(
c
,
'123'
,
3
,
sqrt
(
.5
))
def
test__get_current_cluster_center__1d
(
self
):
cluster_nodes
=
[{
"feature"
:
1
},
{
"feature"
:
2
},
{
"feature"
:
3
}]
c
=
InternalCluster
(
"123"
,
cluster_nodes
,
feature_names
=
[
"feature"
],
global_cluster_center
=
(
2
,
0
),
n_layer_nodes
=
len
(
cluster_nodes
))
self
.
assert_internal_cluster
(
c
,
'123'
,
3
,
0
)
def
test__get_current_cluster_center__1d_weighted_result
(
self
):
cluster_nodes
=
[{
"feature"
:
1
},
{
"feature"
:
1
},
{
"feature"
:
3
}]
c
=
InternalCluster
(
"123"
,
cluster_nodes
,
feature_names
=
[
"feature"
],
global_cluster_center
=
(
5
/
3
,
0
),
n_layer_nodes
=
len
(
cluster_nodes
))
self
.
assert_internal_cluster
(
c
,
'123'
,
3
,
0
)
def
test__get_current_cluster_center__2d_weighted_result
(
self
):
cluster_nodes
=
[{
"feature1"
:
1
,
"feature2"
:
1
},
{
"feature1"
:
1
,
"feature2"
:
1
},
{
"feature1"
:
2
,
"feature2"
:
2
},
{
"feature1"
:
3
,
"feature2"
:
1
}]
c
=
InternalCluster
(
"123"
,
cluster_nodes
,
feature_names
=
[
"feature1"
,
'feature2'
],
global_cluster_center
=
(
1.75
,
1.25
),
n_layer_nodes
=
len
(
cluster_nodes
))
self
.
assert_internal_cluster
(
c
,
'123'
,
4
,
0
)
def
assert_internal_cluster
(
self
,
actual_cluster
:
InternalCluster
,
expected_id
,
expected_size
,
expected_distance
):
self
.
assertEqual
(
expected_id
,
actual_cluster
.
cluster_id
)
self
.
assertEqual
(
expected_size
,
actual_cluster
.
size
)
self
.
assertAlmostEqual
(
expected_distance
,
actual_cluster
.
global_center_distance
)
class
TestLayer
(
unittest
.
TestCase
):
def
test__init__1d_single_cluster
(
self
):
cluster_nodes
=
list
(
self
.
_get_timewindow_single_cluster_1d_same_feature
()
.
clusters
.
values
())[
0
]
c
=
InternalCluster
(
"123"
,
cluster_nodes
,
feature_names
=
[
"feature"
],
global_cluster_center
=
(
1
,
0
),
n_layer_nodes
=
len
(
cluster_nodes
))
l
=
Layer
(
'123'
,
[
c
])
self
.
assert_layer
(
l
,
[
1
],
0
,
[
0
])
def
test__create_from_time_window__1d_single_cluster
(
self
):
tw
=
self
.
_get_timewindow_single_cluster_1d_same_feature
()
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature'
],
global_cluster_centers
=
{
'1'
:
(
1
,
0
)})
self
.
assert_layer
(
l
,
[
1
],
0
,
[
0
])
def
test__create_from_time_window__2d_single_cluster
(
self
):
tw
=
self
.
_get_timewindow_single_cluster_2d_same_feature
()
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature1'
,
'feature2'
],
global_cluster_centers
=
{
'1'
:
(
1
,
1
)})
self
.
assert_layer
(
l
,
[
1
],
0
,
[
0
])
def
test__create_from_time_window__1d_two_clusters
(
self
):
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
5
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
5
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
7
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
6
})
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature1'
],
global_cluster_centers
=
{
'1'
:
(
1.5
,
0
),
'2'
:
(
5
,
0
)})
# entropy: https://planetcalc.com/2476/
# distance: https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
self
.
assert_layer
(
l
,
[
2
/
6
,
4
/
6
],
0.91829583
,
[
.5
,
.75
])
def
test__create_from_time_window__2d_two_clusters
(
self
):
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
2
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
5
,
"feature2"
:
5
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
7
,
"feature2"
:
4
})
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature1'
,
'feature2'
],
global_cluster_centers
=
{
'1'
:
(
1
,
1
),
'2'
:
(
6.5
,
5
)})
# entropy: https://planetcalc.com/2476/
# distance: https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
self
.
assert_layer
(
l
,
[
3
/
5
,
2
/
5
],
0.97095059
,
[
2
/
3
,
sqrt
(
.5
)])
def
test__create_from_time_window__1d_clusters_correct_nodes_clusters
(
self
):
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
5
,
"feature2"
:
5
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
7
})
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature1'
],
global_cluster_centers
=
{
'1'
:
(
1
,
1
),
'2'
:
(
6.5
,
5
)})
self
.
assertEqual
(
l
.
n_nodes
,
5
)
self
.
assertEqual
(
l
.
n_clusters
,
2
)
def
test__create_from_time_window__2d_clusters_correct_nodes_clusters
(
self
):
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
2
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
5
,
"feature2"
:
5
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
7
,
"feature2"
:
4
})
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature1'
,
'feature2'
],
global_cluster_centers
=
{
'1'
:
(
1
,
1
),
'2'
:
(
6.5
,
5
)})
self
.
assertEqual
(
l
.
n_nodes
,
5
)
self
.
assertEqual
(
l
.
n_clusters
,
2
)
def
test__create_from_time_window__1d_clusters__correct_size_aggregate_metrics
(
self
):
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
5
,
"feature2"
:
5
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
7
})
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature1'
],
global_cluster_centers
=
{
'1'
:
(
1
,
0
),
'2'
:
(
6.5
,
5
)})
self
.
assertEqual
(
l
.
cluster_size_agg_metrics
,
{
'min'
:
2
,
'max'
:
3
,
'sum'
:
5
,
'avg'
:
2.5
})
self
.
assertEqual
(
l
.
cluster_relative_size_agg_metrics
,
{
'min'
:
2
/
5
,
'max'
:
3
/
5
,
'sum'
:
1
,
'avg'
:(
1
/
2
)})
def
test__create_from_time_window__2d_clusters__correct_size_aggregate_metrics
(
self
):
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
2
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
5
,
"feature2"
:
5
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
7
,
"feature2"
:
4
})
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature1'
,
'feature2'
],
global_cluster_centers
=
{
'1'
:
(
1
,
1
),
'2'
:
(
6.5
,
5
)})
self
.
assertEqual
(
l
.
cluster_size_agg_metrics
,
{
'min'
:
2
,
'max'
:
3
,
'sum'
:
5
,
'avg'
:
2.5
})
self
.
assertEqual
(
l
.
cluster_relative_size_agg_metrics
,
{
'min'
:
2
/
5
,
'max'
:
3
/
5
,
'sum'
:
1
,
'avg'
:(
1
/
2
)})
def
test__create_from_time_window__1d_clusters__correct_center_distance_aggregate_metrics
(
self
):
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
5
,
"feature2"
:
5
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
7
})
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature1'
],
global_cluster_centers
=
{
'1'
:
(
1.1
,
0
),
'2'
:
(
6.5
,
0
)})
expected
=
{
'min'
:
0.1
,
'max'
:
0.5
,
'sum'
:
0.6
,
'avg'
:
0.3
}
self
.
assertEqual
(
len
(
l
.
cluster_center_distance_agg_metrics
.
items
()),
4
)
for
k
in
[
'min'
,
'max'
,
'sum'
,
'avg'
]:
self
.
assertAlmostEqual
(
l
.
cluster_center_distance_agg_metrics
[
k
],
expected
[
k
])
def
test__create_from_time_window__2d_clusters__correct_center_distance_aggregate_metrics
(
self
):
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
2
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
5
,
"feature2"
:
5
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
7
,
"feature2"
:
4
})
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature1'
,
'feature2'
],
global_cluster_centers
=
{
'1'
:
(
1
,
1
),
'2'
:
(
6.5
,
5
)})
expected
=
{
'min'
:
2
/
3
,
'max'
:
sqrt
(
.5
),
'sum'
:
sqrt
(
.5
)
+
2
/
3
,
'avg'
:(
sqrt
(
.5
)
+
2
/
3
)
/
2
}
self
.
assertEqual
(
len
(
l
.
cluster_center_distance_agg_metrics
.
items
()),
4
)
for
k
in
[
'min'
,
'max'
,
'sum'
,
'avg'
]:
self
.
assertAlmostEqual
(
l
.
cluster_center_distance_agg_metrics
[
k
],
expected
[
k
])
def
test__create_from_time_window__empty_layer__correct_size_and_centerdist_aggregate_metrics
(
self
):
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature1'
],
global_cluster_centers
=
{
'1'
:
(
1
,
0
),
'2'
:
(
6.5
,
5
)})
expected
=
{
'min'
:
0
,
'max'
:
0
,
'sum'
:
0
,
'avg'
:
0
}
self
.
assertEqual
(
l
.
cluster_size_agg_metrics
,
expected
)
self
.
assertEqual
(
l
.
cluster_relative_size_agg_metrics
,
expected
)
self
.
assertEqual
(
l
.
cluster_center_distance_agg_metrics
,
expected
)
def
test__create_from_time_window__1d_clusters_correct_centers
(
self
):
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
2
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
5
,
"feature2"
:
5
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
7
,
"feature2"
:
4
})
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature1'
],
global_cluster_centers
=
{
'1'
:
(
1
,
1
),
'2'
:
(
6.5
,
5
)})
for
exp
,
act
in
zip
([(
1
,
0
),
(
6
,
0
)],
l
.
centers
):
for
exp_dim
,
act_dim
in
zip
(
exp
,
act
):
self
.
assertAlmostEqual
(
exp_dim
,
act_dim
)
def
test__create_from_time_window__2d_clusters_correct_centers
(
self
):
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
2
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
2
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
5
,
"feature2"
:
5
})
tw
.
add_node_to_cluster
(
"2"
,
{
"feature1"
:
7
,
"feature2"
:
4
})
l
=
Layer
.
create_from_time_window
(
tw
,
feature_names
=
[
'feature1'
,
'feature2'
],
global_cluster_centers
=
{
'1'
:
(
1
,
1
),
'2'
:
(
6.5
,
5
)})
for
exp
,
act
in
zip
([(
1
,
1.66666666666
),
(
6
,
4.5
)],
l
.
centers
):
for
exp_dim
,
act_dim
in
zip
(
exp
,
act
):
self
.
assertAlmostEqual
(
exp_dim
,
act_dim
)
#region setup methods
def
_get_timewindow_single_cluster_1d_same_feature
(
self
)
->
TimeWindow
:
'''Returns a TimeWindow with time=KW1 and three nodes in cluster 1, all feature values = 1.'''
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature"
:
1
})
return
tw
def
_get_timewindow_single_cluster_2d_same_feature
(
self
)
->
TimeWindow
:
'''Returns a TimeWindow with time=KW1 and three nodes in cluster 1, all feature1 & feature2 values = 1.'''
tw
=
TimeWindow
(
"KW1"
,
"uc"
,
"uct"
,
"ln"
)
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
1
})
tw
.
add_node_to_cluster
(
"1"
,
{
"feature1"
:
1
,
"feature2"
:
1
})
return
tw
#endregion setup methods
def
assert_layer
(
self
,
actual_layer
:
Layer
,
relative_sizes
:
List
[
float
],
entropy
:
float
,
center_dist
:
List
[
float
]):
self
.
assertEqual
(
len
(
actual_layer
.
relative_cluster_sizes
),
len
(
relative_sizes
))
for
i
in
range
(
len
(
relative_sizes
)):
self
.
assertAlmostEqual
(
relative_sizes
[
i
],
actual_layer
.
relative_cluster_sizes
[
i
])
self
.
assertAlmostEqual
(
entropy
,
actual_layer
.
entropy
)
self
.
assertEqual
(
len
(
actual_layer
.
distances_from_global_centers
),
len
(
center_dist
))
for
i
in
range
(
len
(
center_dist
)):
self
.
assertAlmostEqual
(
center_dist
[
i
],
actual_layer
.
distances_from_global_centers
[
i
])
if
__name__
==
'__main__'
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment