Commit a31e702c authored by Alexander Lercher's avatar Alexander Lercher

Added unit tests

parent b7097db8
import unittest
import sys
for path in ['../', './']:
sys.path.insert(1, path)
# python -m unittest discover
from processing import ClusterMetricsCalculator2D
class TestClusterMetricsCalculator(unittest.TestCase):
def test__get_standard_deviation__same_points_many_decimals__zero_and_not_nan(self):
nodes = [{'f1': -8.58564, 'f2': 41.148567},
{'f1': -8.58564, 'f2': 41.148567},
{'f1': -8.58564, 'f2': 41.148567},
{'f1': -8.58564, 'f2': 41.148567},
{'f1': -8.58564, 'f2': 41.148567},
{'f1': -8.58564, 'f2': 41.148567},
{'f1': -8.58564, 'f2': 41.148567},
{'f1': -8.58564, 'f2': 41.148567},
{'f1': -8.58564, 'f2': 41.148567}]
calc = ClusterMetricsCalculator2D(nodes, ['f1','f2'], len(nodes), 1)
self.assertAlmostEqual(0, calc.get_standard_deviation())
def test__get_range__almost_linear_distribution_in_2d__euclidean_distance(self):
l = [(-8.657802, 41.160978), (-8.65782, 41.160969), (-8.657838, 41.16096)]
nodes = [{'f1': e[0], 'f2': e[1]} for e in l]
calc = ClusterMetricsCalculator2D(nodes, ['f1','f2'], len(nodes), 1)
# https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
self.assertAlmostEqual(4.0E-5, calc.get_range(), 5)
if __name__ == '__main__':
unittest.main()
import unittest
import sys
for path in ['../', './']:
sys.path.insert(1, path)
# python -m unittest discover
from entities import Cluster, TimeWindow
from typing import Any, Tuple
from datetime import date, datetime
import json
from math import sqrt
import statistics as stat
class TestCluster(unittest.TestCase):
def test__init__single_cluster__all_values_set(self):
tw, centers = self._get_timewindow_single_cluster_same_feature()
c = Cluster("time_abc", "clusterId 1", list(tw.clusters.values())[0], "feature", nr_layer_nodes=3, layer_diversity=1, global_cluster_center=centers['1'])
self.assertEqual("time_abc", c.time_window_id)
self.assertEqual("clusterId 1", c.cluster_id)
self.assert_cluster((3, 0, 0, 1, 1), c)
self.assertEqual(c.range_, 0)
self.assertEqual(c.center, (1,0))
self.assertEqual(c.global_center_distance, 0)
def test__create_multiple_from_time_window__single_cluster__all_values_set(self):
tw, centers = self._get_timewindow_single_cluster_same_feature()
clusters = list(Cluster.create_multiple_from_time_window(tw, "feature", centers))
self.assertEqual(1, len(clusters))
c = clusters[0]
self.assertEqual("KW1", c.time_window_id)
self.assertEqual("1", c.cluster_id)
self.assert_cluster((3, 0, 0, 1, 1), c)
def test__create_multiple_from_time_window__two_clusters__correct_time_id_cluster_id(self):
tw, centers = self._get_timewindow_two_clusters_same_feature()
clusters = Cluster.create_multiple_from_time_window(tw, "feature", centers)
expected = [("KW1", "1"), ("KW1", "2")]
for c, exp in zip(clusters, expected):
self.assertEqual(exp[0], c.time_window_id)
self.assertEqual(exp[1], c.cluster_id)
def test__create_multiple_from_time_window__two_clusters_same_features__correct_calculation(self):
tw, centers = self._get_timewindow_two_clusters_same_feature()
clusters = Cluster.create_multiple_from_time_window(tw, "feature", centers)
expected = [(3, 0, 0, 3/5, 1/2), (2, 0, 0, 2/5, 1/2)]
for c, exp in zip(clusters, expected):
self.assert_cluster(exp, c)
def test__create_multiple_from_time_window__two_clusters_same_features_and_feature_names_list__correct_calculation(self):
tw, centers = self._get_timewindow_two_clusters_same_feature()
clusters = Cluster.create_multiple_from_time_window(tw, ["feature"], centers)
expected = [(3, 0, 0, 3/5, 1/2), (2, 0, 0, 2/5, 1/2)]
for c, exp in zip(clusters, expected):
self.assert_cluster(exp, c)
def test__create_multiple_from_time_window__two_clusters_different_features__correct_calculation(self):
tw = TimeWindow("CW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature":1})
tw.add_node_to_cluster("1", {"feature":2})
tw.add_node_to_cluster("1", {"feature":3})
tw.add_node_to_cluster("2", {"feature":70})
tw.add_node_to_cluster("2", {"feature":75})
centers = {'1':(2,0),'2':(72.5,0)}
clusters = Cluster.create_multiple_from_time_window(tw, "feature", centers)
# variance for stddev calculated with: http://www.alcula.com/calculators/statistics/variance/
expected = [(3, sqrt(2.0/3), 2.0/3, 3/5, 1/2), (2, sqrt(6.25), 5.0/2, 2/5, 1/2)]
for cluster, exp in zip(clusters, expected):
self.assert_cluster(exp, cluster)
def test__create_multiple_from_time_window__empty_cluster__all_zero_for_empty_cluster(self):
tw = TimeWindow("CW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature":1})
tw.add_node_to_cluster("1", {"feature":2})
tw.add_node_to_cluster("1", {"feature":3})
tw.add_node_to_cluster("2", {"feature":70})
tw.add_node_to_cluster("2", {"feature":75})
tw.clusters["3"] = []
centers = {'1':(2,0),'2':(72.5,0),'3':(0,0)}
clusters = Cluster.create_multiple_from_time_window(tw, "feature", centers)
expected = [(3, sqrt(2.0/3), 2.0/3, 3/5, 1/2), # diversity is still 2 as len=0 is ignored
(2, sqrt(6.25), 5.0/2, 2/5, 1/2),
(0, 0, 0, 0, 0)] # len 0 -> everything 0
for cluster, exp in zip(clusters, expected):
self.assert_cluster(exp, cluster)
def test__create_multiple_from_time_window__2d_clustering_single_feature_value__no_stddev_no_scarcity(self):
tw = TimeWindow("CW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"f1":1, "f2":1})
tw.add_node_to_cluster("1", {"f1":1, "f2":1})
tw.add_node_to_cluster("1", {"f1":1, "f2":1})
tw.add_node_to_cluster("2", {"f1":70, "f2":70})
tw.add_node_to_cluster("2", {"f1":70, "f2":70})
centers = {'1':(1,1),'2':(70,70)}
clusters = Cluster.create_multiple_from_time_window(tw, ["f1", "f2"], centers)
expected = [(3, 0, 0, 3/5, 1/2), (2, 0, 0, 2/5, 1/2)]
for cluster, exp in zip(clusters, expected):
self.assert_cluster(exp, cluster)
def test__create_multiple_from_time_window__2d_clustering__correct_stddev_and_scarcity(self):
tw = TimeWindow("CW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"f1":1, "f2":1})
tw.add_node_to_cluster("1", {"f1":2, "f2":1})
tw.add_node_to_cluster("1", {"f1":1, "f2":3})
tw.add_node_to_cluster("2", {"f1":70, "f2":70})
tw.add_node_to_cluster("2", {"f1":72, "f2":75})
centers = {'1':(4/3,5/3),'2':(71,72.5)}
clusters = Cluster.create_multiple_from_time_window(tw, ["f1", "f2"], centers)
# stddev calculated manually as in: https://glenbambrick.com/tag/standard-distance/
# area of the polygon calculated with: https://www.mathopenref.com/coordpolygonareacalc.html
expected = [(3, sqrt(2/9+8/9), sqrt(1/3), 3/5, 1/2), (2, sqrt(7.25), sqrt(2*2+5*5)/2, 2/5, 1/2)]
for cluster, exp in zip(clusters, expected):
self.assert_cluster(exp, cluster)
def test__create_multiple_from_time_window__2d_clustering_complex__correct_stddev_and_scarcity(self):
tw = TimeWindow("CW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"f1":0, "f2":0})
tw.add_node_to_cluster("1", {"f1":1, "f2":3})
tw.add_node_to_cluster("1", {"f1":3, "f2":2})
tw.add_node_to_cluster("1", {"f1":0, "f2":2})
tw.add_node_to_cluster("1", {"f1":1, "f2":2}) # inside the convex hull
tw.add_node_to_cluster("1", {"f1":2, "f2":2}) # inside the convex hull
tw.add_node_to_cluster("1", {"f1":2, "f2":1})
centers = {'1':((1+3+1+2+2)/7,(3+2+2+2+2+1)/7)}
clusters = Cluster.create_multiple_from_time_window(tw, ["f1", "f2"], centers)
# stddev calculated manually as in: https://glenbambrick.com/tag/standard-distance/
X = [0,1,3,0,1,2,2]
Y = [0,3,2,2,2,2,1]
x_mean = stat.mean(X)
y_mean = stat.mean(Y)
sum_x = 0
for x in X:
sum_x += (x - x_mean)**2
sum_y = 0
for y in Y:
sum_y += (y - y_mean)**2
sd = sqrt(sum_x/7 + sum_y/7)
# area of the polygon calculated with: https://www.mathopenref.com/coordpolygonareacalc.html
area = 5
scarcity = sqrt(area / 7)
expected = [[7, sd, scarcity, 1, 1]]
for cluster, exp in zip(clusters, expected):
self.assert_cluster(exp, cluster)
def test__create_multiple_from_time_window__2d_clustering_1d_single_feature_value__correct_calculation(self):
tw = TimeWindow("CW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"f1":1, "f2":1})
tw.add_node_to_cluster("1", {"f1":1, "f2":2})
tw.add_node_to_cluster("1", {"f1":1, "f2":3})
tw.add_node_to_cluster("2", {"f1":70, "f2":70})
tw.add_node_to_cluster("2", {"f1":75, "f2":70})
tw.add_node_to_cluster("2", {"f1":72, "f2":70})
tw.add_node_to_cluster("2", {"f1":71, "f2":70})
centers = {'1':(1,2),'2':((70+75+72+71)/4,70)}
clusters = Cluster.create_multiple_from_time_window(tw, ["f1", "f2"], centers)
# variance/stddev calculated as for 1d cluster (as f1/f2 is always the same)
# scarcity calculated as for 1d cluster
expected = [(3, sqrt(2/3), 2/3, 3/7, 1/2),
(4, sqrt(3.5), 5/4, 4/7, 1/2)]
for cluster, exp in zip(clusters, expected):
self.assert_cluster(exp, cluster)
def test__create_multiple_from_time_window__1d_clusters__correct_cluster_range_and_center(self):
tw = TimeWindow("CW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"f1":1})
tw.add_node_to_cluster("1", {"f1":2})
tw.add_node_to_cluster("1", {"f1":1})
tw.add_node_to_cluster("2", {"f1":70})
tw.add_node_to_cluster("2", {"f1":72})
centers = {'1':(4/3, 0),'2':(71,0)}
clusters = Cluster.create_multiple_from_time_window(tw, ["f1"], centers)
expected = [(1, (4/3,0)), (2, (71,0))] # (range, center)
for c, exp in zip(clusters, expected):
self.assertEqual(c.range_, exp[0])
self.assertEqual(c.center, exp[1])
def test__create_multiple_from_time_window__2d_clusters__correct_cluster_range_and_center(self):
tw = TimeWindow("CW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"f1":1, "f2":1})
tw.add_node_to_cluster("1", {"f1":2, "f2":1})
tw.add_node_to_cluster("1", {"f1":1, "f2":3})
tw.add_node_to_cluster("2", {"f1":70, "f2":70})
tw.add_node_to_cluster("2", {"f1":72, "f2":75})
centers = {'1':(4/3,5/3),'2':(71,72.5)}
clusters = Cluster.create_multiple_from_time_window(tw, ["f1","f2"], centers)
# https://www.triangle-calculator.com/de/?what=vc&a=1&a1=1&3dd=3D&a2=&b=2&b1=1&b2=&c=1&c1=3&c2=&submit=Berechnen&3d=0
# https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
expected = [(1, (4/3,5/3)), (5.385165, (71,72.5))] # (range, center)
for c, exp in zip(clusters, expected):
self.assertAlmostEqual(c.range_, exp[0], places=6)
self.assertEqual(c.center, exp[1])
def test__create_multiple_from_time_window__2d_clusters__correct_linear_global_center_distance(self):
tw = TimeWindow("CW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"f1":1, "f2":2})
tw.add_node_to_cluster("1", {"f1":1, "f2":2})
tw.add_node_to_cluster("1", {"f1":1, "f2":2})
tw.add_node_to_cluster("2", {"f1":70, "f2":70})
tw.add_node_to_cluster("2", {"f1":72, "f2":75})
centers = {'1':(1,1),'2':(71,70)}
clusters = Cluster.create_multiple_from_time_window(tw, ["f1","f2"], centers)
expected = [1, 2.5] # euclidean dist
for c, exp in zip(clusters, expected):
self.assertAlmostEqual(c.global_center_distance, exp, places=6)
def test__create_multiple_from_time_window__1d_clusters__correct_euclidean_global_center_distance(self):
tw = TimeWindow("CW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"f1":1})
tw.add_node_to_cluster("1", {"f1":1})
tw.add_node_to_cluster("1", {"f1":1})
tw.add_node_to_cluster("2", {"f1":70})
tw.add_node_to_cluster("2", {"f1":72})
centers = {'1':(1,0),'2':(70,0)}
clusters = Cluster.create_multiple_from_time_window(tw, ["f1"], centers)
# https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
expected = [0, 1] # euclidean dist
for c, exp in zip(clusters, expected):
self.assertAlmostEqual(c.global_center_distance, exp, places=6)
def test__create_multiple_from_time_window__2d_clusters__correct_euclidean_global_center_distance(self):
tw = TimeWindow("CW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"f1":1, "f2":2})
tw.add_node_to_cluster("1", {"f1":1, "f2":2})
tw.add_node_to_cluster("1", {"f1":1, "f2":2})
tw.add_node_to_cluster("2", {"f1":70, "f2":70})
tw.add_node_to_cluster("2", {"f1":72, "f2":75})
centers = {'1':(0,0),'2':(70,70)}
clusters = Cluster.create_multiple_from_time_window(tw, ["f1","f2"], centers)
# https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
expected = [2.236068, 2.692582] # euclidean dist
for c, exp in zip(clusters, expected):
self.assertAlmostEqual(c.global_center_distance, exp, places=6)
#region setup methods
def _get_timewindow_single_cluster_same_feature(self) -> TimeWindow:
'''Returns a TimeWindow with time=KW1 and three nodes in cluster 1, all feature values = 1.'''
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature":1})
tw.add_node_to_cluster("1", {"feature":1})
tw.add_node_to_cluster("1", {"feature":1})
global_centers = {'1':(1,0)}
return tw, global_centers
def _get_timewindow_two_clusters_same_feature(self) -> TimeWindow:
'''
Returns a TimeWindow with time=KW1 and:
Three nodes in cluster 1, all feature values = 1.
Two nodes in cluster 2, all feature values = 2.
'''
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature":1})
tw.add_node_to_cluster("1", {"feature":1})
tw.add_node_to_cluster("1", {"feature":1})
tw.add_node_to_cluster("2", {"feature":2})
tw.add_node_to_cluster("2", {"feature":2})
global_centers = {'1':(1,0),'2':(2,0)}
return tw, global_centers
#endregion setup methods
#region custom asserts
def assert_cluster(self, expected_values: Tuple[Any], cluster: Cluster):
"""
Checks if the cluster values equal the expected_values.
:param expected_values: A tuple (exp_size, exp_stddev, exp_scarcity, exp_import1, exp_import2)
"""
self.assertEqual(expected_values[0], cluster.size)
self.assertAlmostEqual(expected_values[1], cluster.std_dev)
self.assertAlmostEqual(expected_values[2], cluster.scarcity)
self.assertAlmostEqual(expected_values[3], cluster.importance1)
self.assertAlmostEqual(expected_values[4], cluster.importance2)
#endregion custom asserts
if __name__ == '__main__':
unittest.main()
import unittest
import sys
for path in ['../', './']:
sys.path.insert(1, path)
# python -m unittest discover
from entities import Layer, TimeWindow
from entities.layer import InternalCluster
from typing import Any, Tuple, List
from datetime import date, datetime
import json
from math import sqrt
import statistics as stat
class TestInternalCluster(unittest.TestCase):
def test__init__1d_features__all_values_set(self):
cluster_nodes = [{"feature":1}, {"feature":1}, {"feature":1}]
c = InternalCluster("123", cluster_nodes, feature_names=["feature"], global_cluster_center=(1.5,0), n_layer_nodes=len(cluster_nodes))
self.assert_internal_cluster(c, '123', 3, .5)
def test__init__2d_features__all_values_set(self):
cluster_nodes = [{"feature1":1,'feature2':1}, {"feature1":1,'feature2':1}, {"feature1":1,'feature2':1}]
c = InternalCluster("123", cluster_nodes, feature_names=["feature1", 'feature2'], global_cluster_center=(1.5,1.5), n_layer_nodes=len(cluster_nodes))
# distance: https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
self.assert_internal_cluster(c, '123', 3, sqrt(.5))
def test__get_current_cluster_center__1d(self):
cluster_nodes = [{"feature":1}, {"feature":2}, {"feature":3}]
c = InternalCluster("123", cluster_nodes, feature_names=["feature"], global_cluster_center=(2, 0), n_layer_nodes=len(cluster_nodes))
self.assert_internal_cluster(c, '123', 3, 0)
def test__get_current_cluster_center__1d_weighted_result(self):
cluster_nodes = [{"feature":1}, {"feature":1}, {"feature":3}]
c = InternalCluster("123", cluster_nodes, feature_names=["feature"], global_cluster_center=(5/3, 0), n_layer_nodes=len(cluster_nodes))
self.assert_internal_cluster(c, '123', 3, 0)
def test__get_current_cluster_center__2d_weighted_result(self):
cluster_nodes = [{"feature1":1,"feature2":1},
{"feature1":1,"feature2":1},
{"feature1":2,"feature2":2},
{"feature1":3,"feature2":1}]
c = InternalCluster("123", cluster_nodes, feature_names=["feature1", 'feature2'], global_cluster_center=(1.75, 1.25), n_layer_nodes=len(cluster_nodes))
self.assert_internal_cluster(c, '123', 4, 0)
def assert_internal_cluster(self, actual_cluster: InternalCluster, expected_id, expected_size, expected_distance):
self.assertEqual(expected_id, actual_cluster.cluster_id)
self.assertEqual(expected_size, actual_cluster.size)
self.assertAlmostEqual(expected_distance, actual_cluster.global_center_distance)
class TestLayer(unittest.TestCase):
def test__init__1d_single_cluster(self):
cluster_nodes = list(self._get_timewindow_single_cluster_1d_same_feature().clusters.values())[0]
c = InternalCluster("123", cluster_nodes, feature_names=["feature"], global_cluster_center=(1,0), n_layer_nodes=len(cluster_nodes))
l = Layer('123', [c])
self.assert_layer(l, [1], 0, [0])
def test__create_from_time_window__1d_single_cluster(self):
tw = self._get_timewindow_single_cluster_1d_same_feature()
l = Layer.create_from_time_window(tw, feature_names=['feature'], global_cluster_centers={'1': (1,0)})
self.assert_layer(l, [1], 0, [0])
def test__create_from_time_window__2d_single_cluster(self):
tw = self._get_timewindow_single_cluster_2d_same_feature()
l = Layer.create_from_time_window(tw, feature_names=['feature1', 'feature2'], global_cluster_centers={'1': (1,1)})
self.assert_layer(l, [1], 0, [0])
def test__create_from_time_window__1d_two_clusters(self):
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature1":1})
tw.add_node_to_cluster("1", {"feature1":1})
tw.add_node_to_cluster("2", {"feature1":5})
tw.add_node_to_cluster("2", {"feature1":5})
tw.add_node_to_cluster("2", {"feature1":7})
tw.add_node_to_cluster("2", {"feature1":6})
l = Layer.create_from_time_window(tw, feature_names=['feature1'], global_cluster_centers={'1': (1.5,0), '2': (5,0)})
# entropy: https://planetcalc.com/2476/
# distance: https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
self.assert_layer(l, [2/6, 4/6], 0.91829583, [.5, .75])
def test__create_from_time_window__2d_two_clusters(self):
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature1":1,"feature2":1})
tw.add_node_to_cluster("1", {"feature1":1,"feature2":2})
tw.add_node_to_cluster("1", {"feature1":1,"feature2":2})
tw.add_node_to_cluster("2", {"feature1":5,"feature2":5})
tw.add_node_to_cluster("2", {"feature1":7,"feature2":4})
l = Layer.create_from_time_window(tw, feature_names=['feature1', 'feature2'], global_cluster_centers={'1': (1,1), '2': (6.5,5)})
# entropy: https://planetcalc.com/2476/
# distance: https://www.calculatorsoup.com/calculators/geometry-plane/distance-two-points.php
self.assert_layer(l, [3/5, 2/5], 0.97095059, [2/3, sqrt(.5)])
def test__create_from_time_window__1d_clusters_correct_nodes_clusters(self):
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature1":1})
tw.add_node_to_cluster("1", {"feature1":1})
tw.add_node_to_cluster("1", {"feature1":1})
tw.add_node_to_cluster("2", {"feature1":5,"feature2":5})
tw.add_node_to_cluster("2", {"feature1":7})
l = Layer.create_from_time_window(tw, feature_names=['feature1'], global_cluster_centers={'1': (1,1), '2': (6.5,5)})
self.assertEqual(l.n_nodes, 5)
self.assertEqual(l.n_clusters, 2)
def test__create_from_time_window__2d_clusters_correct_nodes_clusters(self):
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature1":1,"feature2":1})
tw.add_node_to_cluster("1", {"feature1":1,"feature2":2})
tw.add_node_to_cluster("1", {"feature1":1,"feature2":2})
tw.add_node_to_cluster("2", {"feature1":5,"feature2":5})
tw.add_node_to_cluster("2", {"feature1":7,"feature2":4})
l = Layer.create_from_time_window(tw, feature_names=['feature1', 'feature2'], global_cluster_centers={'1': (1,1), '2': (6.5,5)})
self.assertEqual(l.n_nodes, 5)
self.assertEqual(l.n_clusters, 2)
def test__create_from_time_window__1d_clusters__correct_size_aggregate_metrics(self):
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature1":1})
tw.add_node_to_cluster("1", {"feature1":1})
tw.add_node_to_cluster("1", {"feature1":1})
tw.add_node_to_cluster("2", {"feature1":5,"feature2":5})
tw.add_node_to_cluster("2", {"feature1":7})
l = Layer.create_from_time_window(tw, feature_names=['feature1'], global_cluster_centers={'1': (1,0), '2': (6.5,5)})
self.assertEqual(l.cluster_size_agg_metrics, {'min':2, 'max':3, 'sum':5, 'avg':2.5})
self.assertEqual(l.cluster_relative_size_agg_metrics, {'min':2/5, 'max':3/5, 'sum':1, 'avg':(1/2)})
def test__create_from_time_window__2d_clusters__correct_size_aggregate_metrics(self):
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature1":1,"feature2":1})
tw.add_node_to_cluster("1", {"feature1":1,"feature2":2})
tw.add_node_to_cluster("1", {"feature1":1,"feature2":2})
tw.add_node_to_cluster("2", {"feature1":5,"feature2":5})
tw.add_node_to_cluster("2", {"feature1":7,"feature2":4})
l = Layer.create_from_time_window(tw, feature_names=['feature1', 'feature2'], global_cluster_centers={'1': (1,1), '2': (6.5,5)})
self.assertEqual(l.cluster_size_agg_metrics, {'min':2, 'max':3, 'sum':5, 'avg':2.5})
self.assertEqual(l.cluster_relative_size_agg_metrics, {'min':2/5, 'max':3/5, 'sum':1, 'avg':(1/2)})
def test__create_from_time_window__1d_clusters__correct_center_distance_aggregate_metrics(self):
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature1":1})
tw.add_node_to_cluster("1", {"feature1":1})
tw.add_node_to_cluster("1", {"feature1":1})
tw.add_node_to_cluster("2", {"feature1":5,"feature2":5})
tw.add_node_to_cluster("2", {"feature1":7})
l = Layer.create_from_time_window(tw, feature_names=['feature1'], global_cluster_centers={'1': (1.1,0), '2': (6.5,0)})
expected = {'min':0.1, 'max':0.5, 'sum':0.6, 'avg':0.3}
self.assertEqual(len(l.cluster_center_distance_agg_metrics.items()), 4)
for k in ['min', 'max', 'sum', 'avg']:
self.assertAlmostEqual(l.cluster_center_distance_agg_metrics[k], expected[k])
def test__create_from_time_window__2d_clusters__correct_center_distance_aggregate_metrics(self):
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature1":1,"feature2":1})
tw.add_node_to_cluster("1", {"feature1":1,"feature2":2})
tw.add_node_to_cluster("1", {"feature1":1,"feature2":2})
tw.add_node_to_cluster("2", {"feature1":5,"feature2":5})
tw.add_node_to_cluster("2", {"feature1":7,"feature2":4})
l = Layer.create_from_time_window(tw, feature_names=['feature1', 'feature2'], global_cluster_centers={'1': (1,1), '2': (6.5,5)})
expected = {'min':2/3, 'max':sqrt(.5), 'sum':sqrt(.5)+2/3, 'avg':(sqrt(.5)+2/3)/2}
self.assertEqual(len(l.cluster_center_distance_agg_metrics.items()), 4)
for k in ['min', 'max', 'sum', 'avg']:
self.assertAlmostEqual(l.cluster_center_distance_agg_metrics[k], expected[k])
def test__create_from_time_window__empty_layer__correct_size_and_centerdist_aggregate_metrics(self):
tw = TimeWindow("KW1", "uc", "uct", "ln")
l = Layer.create_from_time_window(tw, feature_names=['feature1'], global_cluster_centers={'1': (1,0), '2': (6.5,5)})
expected = {'min':0, 'max':0, 'sum':0, 'avg':0}
self.assertEqual(l.cluster_size_agg_metrics, expected)
self.assertEqual(l.cluster_relative_size_agg_metrics, expected)
self.assertEqual(l.cluster_center_distance_agg_metrics, expected)
def test__create_from_time_window__1d_clusters_correct_centers(self):
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature1":1,"feature2":1})
tw.add_node_to_cluster("1", {"feature1":1,"feature2":2})
tw.add_node_to_cluster("1", {"feature1":1,"feature2":2})
tw.add_node_to_cluster("2", {"feature1":5,"feature2":5})
tw.add_node_to_cluster("2", {"feature1":7,"feature2":4})
l = Layer.create_from_time_window(tw, feature_names=['feature1'], global_cluster_centers={'1': (1,1), '2': (6.5,5)})
for exp, act in zip([(1,0), (6,0)], l.centers):
for exp_dim, act_dim in zip(exp, act):
self.assertAlmostEqual(exp_dim, act_dim)
def test__create_from_time_window__2d_clusters_correct_centers(self):
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature1":1,"feature2":1})
tw.add_node_to_cluster("1", {"feature1":1,"feature2":2})
tw.add_node_to_cluster("1", {"feature1":1,"feature2":2})
tw.add_node_to_cluster("2", {"feature1":5,"feature2":5})
tw.add_node_to_cluster("2", {"feature1":7,"feature2":4})
l = Layer.create_from_time_window(tw, feature_names=['feature1', 'feature2'], global_cluster_centers={'1': (1,1), '2': (6.5,5)})
for exp, act in zip([(1,1.66666666666), (6,4.5)], l.centers):
for exp_dim, act_dim in zip(exp, act):
self.assertAlmostEqual(exp_dim, act_dim)
#region setup methods
def _get_timewindow_single_cluster_1d_same_feature(self) -> TimeWindow:
'''Returns a TimeWindow with time=KW1 and three nodes in cluster 1, all feature values = 1.'''
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature":1})
tw.add_node_to_cluster("1", {"feature":1})
tw.add_node_to_cluster("1", {"feature":1})
return tw
def _get_timewindow_single_cluster_2d_same_feature(self) -> TimeWindow:
'''Returns a TimeWindow with time=KW1 and three nodes in cluster 1, all feature1 & feature2 values = 1.'''
tw = TimeWindow("KW1", "uc", "uct", "ln")
tw.add_node_to_cluster("1", {"feature1":1, "feature2":1})
tw.add_node_to_cluster("1", {"feature1":1, "feature2":1})
tw.add_node_to_cluster("1", {"feature1":1, "feature2":1})
return tw
#endregion setup methods
def assert_layer(self, actual_layer: Layer, relative_sizes: List[float], entropy: float, center_dist: List[float]):
self.assertEqual(len(actual_layer.relative_cluster_sizes), len(relative_sizes))
for i in range(len(relative_sizes)):
self.assertAlmostEqual(relative_sizes[i], actual_layer.relative_cluster_sizes[i])
self.assertAlmostEqual(entropy, actual_layer.entropy)
self.assertEqual(len(actual_layer.distances_from_global_centers), len(center_dist))
for i in range(len(center_dist)):
self.assertAlmostEqual(center_dist[i], actual_layer.distances_from_global_centers[i])
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment