Commit e2deb911 authored by Alexander Lercher's avatar Alexander Lercher

[RoleStage] Implemented selection for fetching and clustering scripts

parent b028f0df
......@@ -86,20 +86,23 @@ def _fetch_nodes(use_case: str, table: str, layer_name: str) -> List[Dict]:
return response.json()
def fetch_nodes_from_semantic_linking():
def fetch_nodes_from_semantic_linking(selected_use_cases: List[str] = None, selected_use_case_tables: List[str] = None):
'''Empties the db and inserts layers and nodes from BusinessLogic and SemanticLinking'''
repository = Repository()
# please dont delete all layers/ nodes anymore @10.11.2020
# repository.delete_all_layers()
# repository.delete_all_nodes()
use_cases = _fetch_use_cases()
for use_case in use_cases:
if selected_use_cases is not None and use_case not in selected_use_cases:
continue
print(f"Fetching for use-case {use_case}")
tables = _fetch_tables(use_case)
for table in tables:
if table != 'bank-app':
if selected_use_case_tables is not None and table not in selected_use_case_tables:
continue
layers = _fetch_layers(use_case, table)
......@@ -107,6 +110,7 @@ def fetch_nodes_from_semantic_linking():
try:
print(f"Fetching nodes for layer {use_case}//{table}//{layer.layer_name}.")
# check if layer already exists in DB, add it if not
reference_layer = repository.get_layer_by_name(use_case, table, layer.layer_name)
if reference_layer == None:
......
......@@ -9,9 +9,11 @@ from db.entities import Cluster
from typing import List
from db.repository import Repository
LAYER_FILES = ['User_Demand_Layer.json']
repo = Repository()
def get_clusters(layer_file) -> List[Cluster]:
with open(layer_file, 'r') as file:
clusters = json.loads(file.read())
......@@ -19,10 +21,6 @@ def get_clusters(layer_file) -> List[Cluster]:
def store_generic_clusters(clusters: List[Cluster], layer):
print([c.label for c in clusters][0:10])
return
try:
with open(f'{layer}.json', 'w') as file:
cluster_dicts = [c.to_serializable_dict(for_db=False) for c in clusters]
......@@ -37,9 +35,6 @@ def store_generic_clusters(clusters: List[Cluster], layer):
print(f"failed uploading {layer}")
layers = ['User_Demand_Layer.json']
for layer in layers:
for layer in LAYER_FILES:
clusts: List[Cluster] = get_clusters(layer)
# print(len(clusts))
store_generic_clusters(clusts, layer)
\ No newline at end of file
......@@ -14,17 +14,20 @@ from processing.clustering import Clusterer, ClusterResult
repo = Repository()
def run_generic_clustering():
def run_generic_clustering(selected_use_cases: List[str] = None, selected_use_case_tables: List[str] = None, selected_layer_names: List[str] = None):
'''Runs the clustering for all layers found in the repository.'''
all_layers:List[Layer] = repo.get_layers()
all_layers = [l for l in all_layers
if l.layer_name in ['User_Demand_Layer'] and l.use_case == 'smart-energy']
layers = [l for l in all_layers
if (selected_use_cases is None or l.use_case in selected_use_cases)
and (selected_use_case_tables is None or l.use_case_table in selected_use_case_tables)
and (selected_layer_names is None or l.layer_name in selected_layer_names)
]
for layer in all_layers:
for layer in layers:
print(f"Clustering {layer.use_case}//{layer.use_case_table}//{layer.layer_name}.")
if layer.properties is None or len(layer.properties) == 0:
print("skipping")
print("skipping, no properties to cluster")
continue
try:
......@@ -54,15 +57,16 @@ def store_generic_clusters(clusters: List[Cluster], layer):
cluster_dicts = [c.to_serializable_dict(for_db=False) for c in clusters]
file.write(json.dumps(cluster_dicts))
except:
pass
print(f"Error while writing json for {layer}")
try:
for cluster in clusters:
repo.add_cluster(cluster)
except:
pass
print(f"Error while storing cluster in db for {layer}")
if __name__ == "__main__":
# please dont delete all clusters anymore @10.11.2020
# repo.delete_all_clusters()
run_generic_clustering()
run_generic_clustering(selected_use_cases=[], selected_use_case_tables=[], selected_layer_names=[])
......@@ -11,4 +11,4 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import processing.fetching.fetching as f
if __name__ == "__main__":
f.fetch_nodes_from_semantic_linking()
\ No newline at end of file
f.fetch_nodes_from_semantic_linking(selected_use_cases=[], selected_use_case_tables=[])
\ No newline at end of file
......@@ -166,7 +166,7 @@ def add_layers(use_case:str, table_name: str):
print(url+": "+str(response.status_code))
def main(use_case: str = "smart-energy", table_name: str = "smart-energy-paper"):
def main(use_case: str = "smart-energy", table_name: str = "smart-energy"):
print("SMART-ENERGY")
add_table(use_case, table_name)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment