Commit e2deb911 authored by Alexander Lercher's avatar Alexander Lercher

[RoleStage] Implemented selection for fetching and clustering scripts

parent b028f0df
...@@ -86,20 +86,23 @@ def _fetch_nodes(use_case: str, table: str, layer_name: str) -> List[Dict]: ...@@ -86,20 +86,23 @@ def _fetch_nodes(use_case: str, table: str, layer_name: str) -> List[Dict]:
return response.json() return response.json()
def fetch_nodes_from_semantic_linking(): def fetch_nodes_from_semantic_linking(selected_use_cases: List[str] = None, selected_use_case_tables: List[str] = None):
'''Empties the db and inserts layers and nodes from BusinessLogic and SemanticLinking''' '''Empties the db and inserts layers and nodes from BusinessLogic and SemanticLinking'''
repository = Repository() repository = Repository()
# please dont delete all layers/ nodes anymore @10.11.2020
# repository.delete_all_layers() # repository.delete_all_layers()
# repository.delete_all_nodes() # repository.delete_all_nodes()
use_cases = _fetch_use_cases() use_cases = _fetch_use_cases()
for use_case in use_cases: for use_case in use_cases:
if selected_use_cases is not None and use_case not in selected_use_cases:
continue
print(f"Fetching for use-case {use_case}") print(f"Fetching for use-case {use_case}")
tables = _fetch_tables(use_case) tables = _fetch_tables(use_case)
for table in tables: for table in tables:
if selected_use_case_tables is not None and table not in selected_use_case_tables:
if table != 'bank-app':
continue continue
layers = _fetch_layers(use_case, table) layers = _fetch_layers(use_case, table)
...@@ -107,6 +110,7 @@ def fetch_nodes_from_semantic_linking(): ...@@ -107,6 +110,7 @@ def fetch_nodes_from_semantic_linking():
try: try:
print(f"Fetching nodes for layer {use_case}//{table}//{layer.layer_name}.") print(f"Fetching nodes for layer {use_case}//{table}//{layer.layer_name}.")
# check if layer already exists in DB, add it if not # check if layer already exists in DB, add it if not
reference_layer = repository.get_layer_by_name(use_case, table, layer.layer_name) reference_layer = repository.get_layer_by_name(use_case, table, layer.layer_name)
if reference_layer == None: if reference_layer == None:
......
...@@ -9,9 +9,11 @@ from db.entities import Cluster ...@@ -9,9 +9,11 @@ from db.entities import Cluster
from typing import List from typing import List
from db.repository import Repository from db.repository import Repository
LAYER_FILES = ['User_Demand_Layer.json']
repo = Repository() repo = Repository()
def get_clusters(layer_file) -> List[Cluster]: def get_clusters(layer_file) -> List[Cluster]:
with open(layer_file, 'r') as file: with open(layer_file, 'r') as file:
clusters = json.loads(file.read()) clusters = json.loads(file.read())
...@@ -19,10 +21,6 @@ def get_clusters(layer_file) -> List[Cluster]: ...@@ -19,10 +21,6 @@ def get_clusters(layer_file) -> List[Cluster]:
def store_generic_clusters(clusters: List[Cluster], layer): def store_generic_clusters(clusters: List[Cluster], layer):
print([c.label for c in clusters][0:10])
return
try: try:
with open(f'{layer}.json', 'w') as file: with open(f'{layer}.json', 'w') as file:
cluster_dicts = [c.to_serializable_dict(for_db=False) for c in clusters] cluster_dicts = [c.to_serializable_dict(for_db=False) for c in clusters]
...@@ -37,9 +35,6 @@ def store_generic_clusters(clusters: List[Cluster], layer): ...@@ -37,9 +35,6 @@ def store_generic_clusters(clusters: List[Cluster], layer):
print(f"failed uploading {layer}") print(f"failed uploading {layer}")
layers = ['User_Demand_Layer.json'] for layer in LAYER_FILES:
for layer in layers:
clusts: List[Cluster] = get_clusters(layer) clusts: List[Cluster] = get_clusters(layer)
# print(len(clusts))
store_generic_clusters(clusts, layer) store_generic_clusters(clusts, layer)
\ No newline at end of file
...@@ -14,17 +14,20 @@ from processing.clustering import Clusterer, ClusterResult ...@@ -14,17 +14,20 @@ from processing.clustering import Clusterer, ClusterResult
repo = Repository() repo = Repository()
def run_generic_clustering(): def run_generic_clustering(selected_use_cases: List[str] = None, selected_use_case_tables: List[str] = None, selected_layer_names: List[str] = None):
'''Runs the clustering for all layers found in the repository.''' '''Runs the clustering for all layers found in the repository.'''
all_layers:List[Layer] = repo.get_layers() all_layers:List[Layer] = repo.get_layers()
all_layers = [l for l in all_layers layers = [l for l in all_layers
if l.layer_name in ['User_Demand_Layer'] and l.use_case == 'smart-energy'] if (selected_use_cases is None or l.use_case in selected_use_cases)
and (selected_use_case_tables is None or l.use_case_table in selected_use_case_tables)
and (selected_layer_names is None or l.layer_name in selected_layer_names)
]
for layer in all_layers: for layer in layers:
print(f"Clustering {layer.use_case}//{layer.use_case_table}//{layer.layer_name}.") print(f"Clustering {layer.use_case}//{layer.use_case_table}//{layer.layer_name}.")
if layer.properties is None or len(layer.properties) == 0: if layer.properties is None or len(layer.properties) == 0:
print("skipping") print("skipping, no properties to cluster")
continue continue
try: try:
...@@ -54,15 +57,16 @@ def store_generic_clusters(clusters: List[Cluster], layer): ...@@ -54,15 +57,16 @@ def store_generic_clusters(clusters: List[Cluster], layer):
cluster_dicts = [c.to_serializable_dict(for_db=False) for c in clusters] cluster_dicts = [c.to_serializable_dict(for_db=False) for c in clusters]
file.write(json.dumps(cluster_dicts)) file.write(json.dumps(cluster_dicts))
except: except:
pass print(f"Error while writing json for {layer}")
try: try:
for cluster in clusters: for cluster in clusters:
repo.add_cluster(cluster) repo.add_cluster(cluster)
except: except:
pass print(f"Error while storing cluster in db for {layer}")
if __name__ == "__main__": if __name__ == "__main__":
# please dont delete all clusters anymore @10.11.2020
# repo.delete_all_clusters() # repo.delete_all_clusters()
run_generic_clustering() run_generic_clustering(selected_use_cases=[], selected_use_case_tables=[], selected_layer_names=[])
...@@ -11,4 +11,4 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) ...@@ -11,4 +11,4 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import processing.fetching.fetching as f import processing.fetching.fetching as f
if __name__ == "__main__": if __name__ == "__main__":
f.fetch_nodes_from_semantic_linking() f.fetch_nodes_from_semantic_linking(selected_use_cases=[], selected_use_case_tables=[])
\ No newline at end of file \ No newline at end of file
...@@ -166,7 +166,7 @@ def add_layers(use_case:str, table_name: str): ...@@ -166,7 +166,7 @@ def add_layers(use_case:str, table_name: str):
print(url+": "+str(response.status_code)) print(url+": "+str(response.status_code))
def main(use_case: str = "smart-energy", table_name: str = "smart-energy-paper"): def main(use_case: str = "smart-energy", table_name: str = "smart-energy"):
print("SMART-ENERGY") print("SMART-ENERGY")
add_table(use_case, table_name) add_table(use_case, table_name)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment