Commit 0beaf5bb authored by Alexander Lercher's avatar Alexander Lercher

Using OPTICS clustering instead of DBSCAN

parent 54a425e8
import json import json
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN from sklearn.cluster import DBSCAN, OPTICS
from typing import List, Dict, Any, TypeVar from typing import List, Dict, Any, TypeVar
from deprecated import deprecated from deprecated import deprecated
...@@ -59,7 +59,7 @@ class Clusterer: ...@@ -59,7 +59,7 @@ class Clusterer:
if features is None or len(features) == 0: if features is None or len(features) == 0:
return features # trash in trash out return features # trash in trash out
dbsc = DBSCAN(eps = self.epsilon, min_samples = self.min_points) dbsc = OPTICS() # DBSCAN(eps = self.epsilon, min_samples = self.min_points)
dbsc = dbsc.fit(features) dbsc = dbsc.fit(features)
labels = dbsc.labels_ labels = dbsc.labels_
......
...@@ -4,6 +4,7 @@ modules_path = '../../../modules/' ...@@ -4,6 +4,7 @@ modules_path = '../../../modules/'
if os.path.exists(modules_path): if os.path.exists(modules_path):
sys.path.insert(1, modules_path) sys.path.insert(1, modules_path)
import json
from db.entities import * from db.entities import *
from typing import List, Dict, Tuple from typing import List, Dict, Tuple
from db.repository import Repository, AgiRepository from db.repository import Repository, AgiRepository
...@@ -21,11 +22,13 @@ def run_generic_clustering(): ...@@ -21,11 +22,13 @@ def run_generic_clustering():
all_layers:List[Layer] = repo.get_layers() all_layers:List[Layer] = repo.get_layers()
for layer in all_layers: for layer in all_layers:
if layer.properties is None or len(layer.properties) == 0:
continue
print(f"Clustering {layer.layer_name}") print(f"Clustering {layer.layer_name}")
clusters = run_clustering_for_layer(layer) clusters = run_clustering_for_layer(layer)
cluster_set = ClusterSet(layer.layer_name, clusters) cluster_set = ClusterSet(layer.layer_name, clusters)
repo.add_clusterset(cluster_set) store_clusterset(cluster_set)
def run_clustering_for_layer(layer: Layer) -> List[Cluster]: def run_clustering_for_layer(layer: Layer) -> List[Cluster]:
...@@ -38,6 +41,11 @@ def run_clustering_for_layer(layer: Layer) -> List[Cluster]: ...@@ -38,6 +41,11 @@ def run_clustering_for_layer(layer: Layer) -> List[Cluster]:
return [Cluster(key, value) for key, value in res.items()] return [Cluster(key, value) for key, value in res.items()]
def store_clusterset(cluster_set: ClusterSet):
repo.add_clusterset(cluster_set)
# with open(f'clusterset_{cluster_set.layer_name}.txt', 'w') as file:
# file.write(json.dumps(cluster_set.to_serializable_dict()))
def run_location_clustering(): def run_location_clustering():
user_clusterer = Clusterer() user_clusterer = Clusterer()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment