Implemented JSON responses

e1216230 · Bogdan Mihai (ARTICONF student) · 18dde0e8 · e1216230 · e1216230 · e1216230
Commit e1216230 authored Jun 15, 2021 by Bogdan Mihai (ARTICONF student)
32 changed files
--- a/src/participation-hub/federated-learning-microservice/app/configs/routes.yml
+++ b/src/participation-hub/federated-learning-microservice/app/configs/routes.yml
@@ -73,11 +73,11 @@ paths:
          description: "Successful Request"
        '404':
          description: "Use case train session data does not exist"
-  /Owners/use_cases/{use_case}/upload:
+  /Owners/use_cases/{use_case}/upload_and_train:
    post:
      security:
        - JwtAdmin: []
-      operationId: "routes.owners.upload"
+      operationId: "routes.owners.upload_and_train"
      tags:
        - "Owners"
      summary: "Upload the files required for the federated training"
@@ -202,7 +202,7 @@ paths:
    post:
      security:
        - JwtAdmin: []
-      operationId: "routes.user.check_article"
+      operationId: "routes.users.check_article"
      tags:
        - "Users"
      summary: "Use the trained model to evaluate an input"

--- a/src/participation-hub/federated-learning-microservice/app/processing/default/main_proc.py
+++ b/src/participation-hub/federated-learning-microservice/app/processing/default/main_proc.py
@@ -10,8 +10,8 @@ print(os.getcwd())

 import global_hyperparams as globals
 from preprocessing import get_preprocessed_train_test_data
-from federated_algorithm import federated_computation_new, federated_computation_continue, save_state_to_file, load_state_from_file
-from checkpoint_manager import save_to_file_CSV#,save_state_to_file, load_state_from_file
+from federated_algorithm import federated_computation_new, federated_computation_continue#, save_state_to_file, load_state_from_file
+from checkpoint_manager import save_to_file_CSV,save_state_to_file, load_state_from_file



@@ -34,8 +34,10 @@ from checkpoint_manager import save_to_file_CSV#,save_state_to_file, load_state_
    # print(type(metrics))
    # print("DONE2")

-def start_processing(developer_id:int = 0):
-    globals.initialize()
+def start_processing(use_case, developer_id:int = 0):
+    globals.initialize(use_case,developer_id)
+    globals.TRAINER_ID = developer_id
+    
    
    train_dataset, test_dataset= get_preprocessed_train_test_data()

@@ -43,7 +45,6 @@ def start_processing(developer_id:int = 0):
    trained_metrics= metrics['train']
    
    timestamp = save_state_to_file(state)
-    globals.TRAINER_ID = developer_id
    globals.DATASET_ID = timestamp
    
    written_row = save_to_file_CSV(globals.TRAINER_ID,timestamp,globals.DATASET_ID,trained_metrics['sparse_categorical_accuracy'],trained_metrics['loss'])

--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/db/aux.py
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/db/aux.py
--- a/src/participation-hub/federated-learning-microservice/app/processing/test/checkpoint_manager.py
+++ b/src/participation-hub/federated-learning-microservice/app/processing/test/checkpoint_manager.py
+# Copyright 2019, The TensorFlow Federated Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities for saving and loading experiment checkpoints."""
+
+import os.path
+import re
+from typing import Any, List, Tuple, Union
+
+from absl import logging
+import tensorflow as tf
+
+
+class FileCheckpointManager():
+  """A checkpoint manager backed by a file system.
+
+  This checkpoint manager is a utility to save and load checkpoints. While
+  the checkpoint manager is compatible with any nested structure supported by
+  `tf.convert_to_tensor`, checkpoints may often represent the output of a
+  `tff.templates.IterativeProcess`. For example, one possible use case would
+  be to save the `ServerState` output of an iterative process created via
+  `tff.learning`. This is comparable to periodically saving model weights and
+  optimizer states during non-federated training.
+
+  The implementation you find here is slightly different from
+  `tf.train.CheckpointManager`. This implementation yields nested structures
+  that are immutable whereas `tf.train.CheckpointManager` is used to manage
+  `tf.train.Checkpoint` objects, which are mutable collections. Additionally,
+  this implementation allows retaining the initial checkpoint as part of the
+  total number of checkpoints that are kept.
+
+  The checkpoint manager is intended only for allowing simulations to be
+  resumed after interruption. In particular, it is intended to only restart the
+  same simulation, run with the same version of TensorFlow Federated.
+  """
+
+  def __init__(self,
+               root_dir: str,
+               prefix: str = 'ckpt_',
+               step: int = 1,
+               keep_total: int = 5,
+               keep_first: bool = True):
+    """Returns an initialized `FileCheckpointManager`.
+
+    Args:
+      root_dir: A path on the filesystem to store checkpoints.
+      prefix: A string to use as the prefix for checkpoint names.
+      step: How often the checkpoint manager should save a checkpoint. When
+      calling `FileCheckpointManager.save_checkpoint`, a  checkpoint will only
+      be written for round numbers divisible by `step`.
+      keep_total: An integer representing the total number of checkpoints to
+        keep.
+      keep_first: A boolean indicating if the first checkpoint should be kept,
+        irrespective of whether it is in the last `keep_total` checkpoints. This
+        is desirable in settings where you would like to ensure full
+        reproducibility of the simulation, especially in settings where
+        model weights or optimizer states are initialized randomly. By loading
+        from the initial checkpoint, one can avoid re-initializing and obtaining
+        different results.
+    """
+    self._root_dir = root_dir
+    self._prefix = prefix
+    self._step = step
+    self._keep_total = keep_total
+    self._keep_first = keep_first
+    path = re.escape(os.path.join(root_dir, prefix))
+    self._round_num_expression = re.compile(r'{}([0-9]+)$'.format(path))
+
+  def load_latest_checkpoint_or_default(self, default: Any) -> Tuple[Any, int]:
+    """Loads latest checkpoint, loading `default` if no checkpoints exist.
+
+    Saves `default` as the 0th checkpoint if no checkpoints exist.
+
+    Args:
+      default: A nested structure which `tf.convert_to_tensor` supports to use
+        as a template when reconstructing the loaded template. This structure
+        will be saved as the checkpoint for round number 0 and returned if there
+        are no pre-existing saved checkpoints.
+
+    Returns:
+      A `tuple` of `(state, round_num)` where `state` matches the Python
+      structure in `structure`, and `round_num` is an integer. If no
+      checkpoints have been written, returns `(default, 0)`.
+    """
+    state, round_num = self.load_latest_checkpoint(default)
+    if state is None:
+      state = default
+      round_num = 0
+      self.save_checkpoint(state, round_num)
+    return state, round_num
+
+  def load_latest_checkpoint(self,
+                             structure: Any) -> Tuple[Any, Union[int, None]]:
+    """Loads the latest state and round number.
+
+    Args:
+      structure: A nested structure which `tf.convert_to_tensor` supports to use
+        as a template when reconstructing the loaded template.
+
+    Returns:
+      A `tuple` of `(state, round_num)` where `state` matches the Python
+      structure in `structure`, and `round_num` is an integer. If no checkpoints
+      have been previously saved, returns the tuple `(None, None)`.
+    """
+    checkpoint_paths = self._get_all_checkpoint_paths()
+    if checkpoint_paths:
+      checkpoint_path = max(checkpoint_paths, key=self._round_num)
+      return self._load_checkpoint_from_path(structure, checkpoint_path)
+    return None, None
+
+  def load_checkpoint(self, structure: Any, round_num: int) -> Any:
+    """Returns the checkpointed state for the given `round_num`.
+
+    Args:
+      structure: A nested structure which `tf.convert_to_tensor` supports to use
+        as a template when reconstructing the loaded template.
+      round_num: An integer representing the round to load from.
+    """
+    basename = '{}{}'.format(self._prefix, round_num)
+    checkpoint_path = os.path.join(self._root_dir, basename)
+    state, _ = self._load_checkpoint_from_path(structure, checkpoint_path)
+    return state
+
+  def _load_checkpoint_from_path(self, structure: Any,
+                                 checkpoint_path: str) -> Tuple[Any, int]:
+    """Returns the state and round number for the given `checkpoint_path`.
+
+    Args:
+      structure: A nested structure which `tf.convert_to_tensor` supports to use
+        as a template when reconstructing the loaded template.
+      checkpoint_path: A path on the filesystem to load.
+
+    Raises:
+      FileNotFoundError: If a checkpoint for given `checkpoint_path` doesn't
+        exist.
+    """
+    if not tf.io.gfile.exists(checkpoint_path):
+      raise FileNotFoundError(
+          'No such file or directory: {}'.format(checkpoint_path))
+    model = tf.saved_model.load(checkpoint_path)
+    flat_obj = model.build_obj_fn()
+    state = tf.nest.pack_sequence_as(structure, flat_obj)
+    round_num = self._round_num(checkpoint_path)
+    logging.info('Checkpoint loaded: %s', checkpoint_path)
+    return state, round_num
+
+  def _save_checkpoint(self, state: Any, round_num: int) -> None:
+    """Internal function to save a new checkpoint.
+
+    Args:
+      state: A nested structure which `tf.convert_to_tensor` supports.
+      round_num: An integer representing the current training round.
+    """
+    basename = '{}{}'.format(self._prefix, round_num)
+    checkpoint_path = os.path.join(self._root_dir, basename)
+    flat_obj = tf.nest.flatten(state)
+    model = tf.Module()
+    model.obj = flat_obj
+    model.build_obj_fn = tf.function(lambda: model.obj, input_signature=())
+
+    # First write to a temporary directory.
+    temp_basename = '.temp_{}'.format(basename)
+    temp_path = os.path.join(self._root_dir, temp_basename)
+    try:
+      tf.io.gfile.rmtree(temp_path)
+    except tf.errors.NotFoundError:
+      pass
+    tf.io.gfile.makedirs(temp_path)
+    tf.saved_model.save(model, temp_path, signatures={})
+
+    # Rename the temp directory to the final location atomically.
+    tf.io.gfile.rename(temp_path, checkpoint_path)
+    logging.info('Checkpoint saved: %s', checkpoint_path)
+
+    self._clear_old_checkpoints()
+
+  def save_checkpoint(self, state: Any, round_num: int) -> None:
+    """Saves a new checkpointed `state` for the given `round_num`.
+
+    Note that a checkpoint is only written if `round_num` is divisible by the
+    `step` initialization argument of the manager.
+
+    Args:
+      state: A nested structure which `tf.convert_to_tensor` supports.
+      round_num: An integer representing the current training round.
+    """
+    if round_num % self._step == 0:
+      self._save_checkpoint(state, round_num)
+
+  def _clear_old_checkpoints(self) -> None:
+    """Removes old checkpoints."""
+    checkpoint_paths = self._get_all_checkpoint_paths()
+    if len(checkpoint_paths) > self._keep_total:
+      checkpoint_paths = sorted(checkpoint_paths, key=self._round_num)
+      start = 1 if self._keep_first else 0
+      stop = start - self._keep_total
+      for checkpoint_path in checkpoint_paths[start:stop]:
+        tf.io.gfile.rmtree(checkpoint_path)
+        logging.info('Checkpoint removed: %s', checkpoint_path)
+
+  def _round_num(self, checkpoint_path: str) -> int:
+    """Returns the round number for the given `checkpoint_path`, or `-1`."""
+    match = self._round_num_expression.match(checkpoint_path)
+    if match is None:
+      logging.debug(
+          'Could not extract round number from: \'%s\' using the following '
+          'pattern: \'%s\'', checkpoint_path,
+          self._round_num_expression.pattern)
+      return -1
+    return int(match.group(1))
+
+  def _get_all_checkpoint_paths(self) -> List[str]:
+    """Returns all the checkpoint paths managed by the instance."""
+    # Due to tensorflow/issues/19378, we cannot use `tf.io.gfile.glob` here
+    # because it returns directory contents recursively on Windows.
+    if tf.io.gfile.exists(self._root_dir):
+      root_dir_entries = tf.io.gfile.listdir(self._root_dir)
+      return [
+          os.path.join(self._root_dir, e)
+          for e in root_dir_entries
+          if e.startswith(self._prefix)
+      ]
+    else:
+      return []
+############################################################################################################
+
+import csv
+import os
+
+def save_to_file_CSV(trainer_id,model_id,dataset_id,accuracy,loss):
+
+  filename = "processing/text_processing/ledger.csv"
+  row = [str(trainer_id),str(model_id),str(dataset_id),str(accuracy),str(loss)]
+
+  if not (os.path.exists(filename)):
+    fields = ['Trainer_id','Model_id','Dataset_id','Accuracy','Loss']
+    with open(filename, 'w') as csvfile:
+      csvwriter =csv.writer(csvfile)
+      csvwriter.writerow(fields)
+      csvwriter.writerow(row)
+  else:
+    with open(filename, 'a') as csvfile:
+      csvwriter =csv.writer(csvfile)
+      csvwriter.writerow(row)  
+  return row
+
+import time
+
+def save_state_to_file(state):
+    time_stamp = int(time.time())
+    ckpt_manager = FileCheckpointManager("processing/text_processing/models", prefix="ckpt_")
+    ckpt_manager.save_checkpoint(state,time_stamp)
+    return time_stamp
+
+def load_state_from_file(model_filename):
+    iterative_process = tff.learning.build_federated_averaging_process(model_fn,client_optimizer_fn=lambda: tf.keras.optimizers.SGD(lr=0.5))
+    state = iterative_process.initialize()
+    ckpt_manager = FileCheckpointManager("processing/text_processing/models", prefix="ckpt_")    
+    restored_state = ckpt_manager.load_latest_checkpoint(state)
+    return restored_state[0]
+
+  
--- a/src/participation-hub/federated-learning-microservice/app/processing/test/main_proc.py
+++ b/src/participation-hub/federated-learning-microservice/app/processing/test/main_proc.py
+import os
+#from processing.text_processing.federated_algorithm import federated_computation_continue
+#from processing.text_processing.version_handler import save_state_to_file
+
+print(os.getcwd())
+
+
+#import processing.text_processing.global_hyperparams as globals
+#from processing.text_processing.preprocessing import get_preprocessed_train_test_data
+
+import global_hyperparams as globals
+from preprocessing import get_preprocessed_train_test_data
+from federated_algorithm import federated_computation_new, federated_computation_continue, save_state_to_file, load_state_from_file
+from checkpoint_manager import save_to_file_CSV#,save_state_to_file, load_state_from_file
+
+
+
+    # globals.initialize()
+    # train_dataset, test_dataset= get_preprocessed_train_test_data()
+
+    # state,metrics = federated_computation_new(train_dataset,test_dataset)    
+    
+    # last_model_id = save_state_to_file(state)
+    
+    # #model_filename = "ckpt_1622721644"
+    # #restored_state = load_state_from_file(model_filename)
+
+    # #state,metrics = federated_computation_continue(train_dataset, test_dataset, restored_state)  
+    # #last_model_id = save_state_to_file(state)  
+    # trained_metrics= metrics['train']
+    # save_to_file_CSV(globals.TRAINER_ID,last_model_id,globals.DATASET_ID,trained_metrics['sparse_categorical_accuracy'],trained_metrics['loss'])
+    # print("DONE")
+    # print(type(state))
+    # print(type(metrics))
+    # print("DONE2")
+
+def start_processing(developer_id:int = 0):
+    globals.initialize()
+    
+    train_dataset, test_dataset= get_preprocessed_train_test_data()
+
+    state,metrics = federated_computation_new(train_dataset,test_dataset)    
+    trained_metrics= metrics['train']
+    
+    timestamp = save_state_to_file(state)
+    globals.TRAINER_ID = developer_id
+    globals.DATASET_ID = timestamp
+    
+    written_row = save_to_file_CSV(globals.TRAINER_ID,timestamp,globals.DATASET_ID,trained_metrics['sparse_categorical_accuracy'],trained_metrics['loss'])
+    return written_row
\ No newline at end of file
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/db/test.csv
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/db/test.csv
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/db/train.csv
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/db/train.csv
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/global_hyperparams.py
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/global_hyperparams.py

-def initialize():
+def initialize(use_case,trainer_id = 0,dataset_id = 0):

    global MAX_LENGTH #Lenght of sentences to be fed into the NN. Similar to image size i.e. 100pixels x 100pixels, but it's 1D.
    MAX_LENGTH = 40
@@ -20,8 +20,8 @@ def initialize():
    global EPOCHS #number of epochs the model will be trained
    EPOCHS = 5 
    global TRAINER_ID # ID of the trainer entity. 
-    TRAINER_ID = 0 #0 = Owner of the use_case
+    TRAINER_ID = trainer_id #0 = Owner of the use_case
    global DATASET_ID # ID of the dataset used
-    DATASET_ID = 0 #0 = "Main"/Original dataset
+    DATASET_ID = dataset_id #0 = "Main"/Original dataset
    global USE_CASE #Use_case name
-    USE_CASE = None
\ No newline at end of file
+    USE_CASE = use_case
\ No newline at end of file
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/ledger.csv
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/ledger.csv
 Trainer_id,Model_id,Dataset_id,Accuracy,Loss
-0,1623160388,0,0.25,nan
-0,1623160474,0,0.5,nan
-1,1623333361,0,0.5,nan
-1,1623406445,0,0.5,nan
-0,1623419415,1623419415,0.0,nan
+0,1623766462,1623766462,0.5,nan
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/main_proc.py
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/main_proc.py
@@ -2,14 +2,16 @@ import os
 #from processing.text_processing.federated_algorithm import federated_computation_continue
 #from processing.text_processing.version_handler import save_state_to_file

+print(os.getcwd())
+

 #import processing.text_processing.global_hyperparams as globals
 #from processing.text_processing.preprocessing import get_preprocessed_train_test_data

 import global_hyperparams as globals
 from preprocessing import get_preprocessed_train_test_data
-from federated_algorithm import federated_computation_new, federated_computation_continue 
-from checkpoint_manager import save_to_file_CSV, save_state_to_file, load_state_from_file
+from federated_algorithm import federated_computation_new, federated_computation_continue#, save_state_to_file, load_state_from_file
+from checkpoint_manager import save_to_file_CSV,save_state_to_file, load_state_from_file



@@ -32,9 +34,10 @@ from checkpoint_manager import save_to_file_CSV, save_state_to_file, load_state_
    # print(type(metrics))
    # print("DONE2")

-def start_processing(use_case:str,developer_id:int = 0):
-    globals.initialize()
-    globals.USE_CASE = use_case
+def start_processing(use_case, developer_id:int = 0):
+    globals.initialize(use_case,developer_id)
+    globals.TRAINER_ID = developer_id
+    
    
    train_dataset, test_dataset= get_preprocessed_train_test_data()

@@ -42,10 +45,7 @@ def start_processing(use_case:str,developer_id:int = 0):
    trained_metrics= metrics['train']
    
    timestamp = save_state_to_file(state)
-    globals.TRAINER_ID = developer_id
    globals.DATASET_ID = timestamp
    
    written_row = save_to_file_CSV(globals.TRAINER_ID,timestamp,globals.DATASET_ID,trained_metrics['sparse_categorical_accuracy'],trained_metrics['loss'])
-    return written_row
-
-start_processing("text_processing")
\ No newline at end of file
+    return written_row
\ No newline at end of file
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/best_model_LSTM.hdf5
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/best_model_LSTM.hdf5
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1622721644/saved_model.pb
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1622721644/saved_model.pb
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623160474/saved_model.pb
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623160474/saved_model.pb
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623160474/variables/variables.data-00000-of-00001
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623160474/variables/variables.data-00000-of-00001
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623160474/variables/variables.index
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623160474/variables/variables.index
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623333361/saved_model.pb
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623333361/saved_model.pb
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623333361/variables/variables.data-00000-of-00001
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623333361/variables/variables.data-00000-of-00001
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623333361/variables/variables.index
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623333361/variables/variables.index
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623406445/saved_model.pb
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623406445/saved_model.pb
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623406445/variables/variables.data-00000-of-00001
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623406445/variables/variables.data-00000-of-00001
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623406445/variables/variables.index
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623406445/variables/variables.index
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623419415/saved_model.pb
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623419415/saved_model.pb
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623419415/variables/variables.data-00000-of-00001
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623419415/variables/variables.data-00000-of-00001
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623419415/variables/variables.index
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623419415/variables/variables.index
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623766462/saved_model.pb
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1623766462/saved_model.pb
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1622721644/variables/variables.data-00000-of-00001
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1622721644/variables/variables.data-00000-of-00001
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1622721644/variables/variables.index
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/models/ckpt_1622721644/variables/variables.index
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/old_prototypes/prototype_fed_text.py
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/old_prototypes/prototype_fed_text.py
-# import pandas as pd
-
-# train_df = pd.read_csv('processing/fake-news/train.csv', header=0)
-# test_df = pd.read_csv('processing/fake-news/test.csv', header=0)
-
-# train_df = train_df.fillna(' ')
-# test_df = test_df.fillna(' ')
-
-# train_df['all_info'] = train_df['text'] + train_df['title'] + train_df['author']
-# test_df['all_info'] = test_df['text'] + test_df['title'] + test_df['author']
-
-# target = train_df['label'].values
-
-
-
-import pandas as pd
-
-from sklearn.model_selection import train_test_split
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import LSTM, Dense, Dropout, Embedding
-from tensorflow.keras.preprocessing.text import Tokenizer
-from tensorflow.keras.preprocessing.sequence import pad_sequences
-import collections
-import numpy as np
-import tensorflow as tf
-import tensorflow_federated as tff
-
-real = pd.read_csv("processing/fake_news/prototype_db_fake_real/True.csv")
-fake = pd.read_csv("processing/fake_news/prototype_db_fake_real/Fake.csv")
-
-# dropping rows that have urls as text and date, real's dates look fine, also dropping ones that have no text
-fake_drop = fake.drop(index=[9358,15507,15508,18933])
-fake_drop = fake_drop.drop(fake_drop.loc[fake_drop.text == ' '].index)
-real_drop = real.drop(real.loc[real.text == ' '].index)
-
-# Give labels to data before combining
-fake['label'] = 1
-real['label'] = 0
-combined = pd.concat([fake, real])
-
-no_reuters = combined.copy()
-no_reuters.text = no_reuters.text.str.replace('Reuters', '')
-combined = no_reuters.copy()
-## train/test split the text data and labels
-
-df_text = combined['text'] #features is now
-labels = combined['label'] #or maybe use target?
-target = combined['label'].values
-
-print("##################label")
-print(type(labels))
-print(labels)
-print("###########")
-print(type(combined['label'].values))
-print(combined['label'].values)
-
-print("df_text_type:")
-print(type(df_text))
-############################ ^ORIGINAL DB
-
-
-# train_df = pd.read_csv('processing/text_processing/prototype_db_fake_real/train.csv', header=0)
-# test_df = pd.read_csv('processing/text_processing/prototype_db_fake_real/test.csv', header=0)
-
-# train_df = train_df.fillna(' ')
-# test_df = test_df.fillna(' ')
-
-# train_df['all_info'] = train_df['text'] + train_df['title'] + train_df['author']
-# test_df['all_info'] = test_df['text'] + test_df['title'] + test_df['author']
-
-# target = train_df['label'].values
-# print(type(train_df['label'].values))
-# print(train_df['label'].values)
-
-# df_text = train_df['all_info']
-
-######################################################################################
-tokenizer = Tokenizer(oov_token = "<OOV>", num_words=6000)
-tokenizer.fit_on_texts(df_text)
-
-MAX_LENGTH = 40
-VOCAB_SIZE = 6000
-
-sequences_train = tokenizer.texts_to_sequences(df_text)
-
-padded_train = pad_sequences(sequences_train, padding = 'post', maxlen=MAX_LENGTH)
-
-#Data_train, data_text, label_train, label_test
-X_train, X_test, y_train, y_test = train_test_split(padded_train, target, test_size=0.2)
-
-X_train = tf.convert_to_tensor(X_train)
-X_test = tf.convert_to_tensor(X_test)
-y_train = tf.convert_to_tensor(y_train)
-y_test = tf.convert_to_tensor(y_test)
-
-print(X_train.shape)
-print(y_train.shape)
-
-print("Type of X_train, X_test, y_train, y_test")
-print(type(X_train))
-print(type(X_test))
-print(type(y_train))
-print(type(y_test))
-###################################################################################\
-
-
-#FED PREPROCESSING
-
-NUM_CLIENTS = 4
-SHUFFLE_BUFFER = 5000
-BATCH_SIZE = 512
-
-def preprocess(dataset):
-  def element_fn(x, y):
-    return collections.OrderedDict([
-        ('x', x),
-        ('y', y)#tf.cast(tf.reshape(y, [1]), tf.float32))
-    ])
-
-  return dataset.map(element_fn).shuffle(
-      SHUFFLE_BUFFER).batch(BATCH_SIZE)
-
-def generate_clients_datasets(n, source_x, source_y):
-    clients_dataset=[]
-    for i in range(n):
-        dataset=tf.data.Dataset.from_tensor_slices(([source_x[i]], [source_y[i]]))
-        dataset=preprocess(dataset)
-        clients_dataset.append(dataset)
-    return clients_dataset
-
-train_dataset=generate_clients_datasets(NUM_CLIENTS, X_train, y_train)
-test_dataset=generate_clients_datasets(NUM_CLIENTS, X_test, y_test)
-
-
-# Grab a single batch of data so that TFF knows what data looks like.
-# sample_batch = tf.nest.map_structure(
-#     lambda x: x.numpy(), iter(train_dataset[0]).next())
-
-INPUT_SPEC = train_dataset[0].element_spec
-print("DONE PREPROCESSING")
-
-
-
-#################################################################################
-EMBED_DIM = 10
-
-def get_simple_LSTM_model():
-  model = Sequential()
-  model.add(Embedding(VOCAB_SIZE, EMBED_DIM, input_length=MAX_LENGTH))
-  model.add(Dropout(0.3))
-  model.add(LSTM(100))
-  model.add(Dropout(0.3))
-  model.add(Dense(64, activation='relu'))
-  model.add(Dropout(0.3))
-  model.add(Dense(1, activation='sigmoid'))
-
-#   model.compile(loss='binary_crossentropy',
-#               optimizer='adam', 
-#               metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
-  return model
-
-
-def model_fn():
-  keras_model = get_simple_LSTM_model()
-  #return tff.learning.from_compiled_keras_model(keras_model, sample_batch) original
-
-  return tff.learning.from_keras_model(
-      keras_model,
-      input_spec=INPUT_SPEC,
-      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
-      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
-
-# Training and evaluating the model
-iterative_process = tff.learning.build_federated_averaging_process(model_fn,client_optimizer_fn=lambda: tf.keras.optimizers.SGD(lr=0.5))
-state = iterative_process.initialize()
-
-EPOCHS = 5
-
-for n in range(EPOCHS):
-    state, metrics = iterative_process.next(state, train_dataset)
-    print('round  {}, training metrics={}'.format(n+1, metrics))
-
-evaluation = tff.learning.build_federated_evaluation(model_fn)
-eval_metrics = evaluation(state.model, train_dataset)
-print('Training evaluation metrics={}'.format(eval_metrics))
-
-test_metrics = evaluation(state.model, test_dataset)
-print('Test evaluation metrics={}'.format(test_metrics))
-
-# model = get_simple_LSTM_model()
-# print(model.summary())
-
-
-# best_model_file_name = "processing/text_processing/models/best_model_LSTM.hdf5"
-# callbacks=[
-#     tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=15, 
-#                                   verbose=1, mode="min", restore_best_weights=True),
-#     tf.keras.callbacks.ModelCheckpoint(filepath=best_model_file_name, verbose=1, save_best_only=True)
-# ]
-
-# history = model.fit(X_train, 
-#                     y_train, 
-#                     epochs=EPOCHS, 
-#                     validation_data=(X_test, y_test), 
-#                     callbacks=callbacks)
-
-# model.save(best_model_file_name)
-# model = tf.keras.models.load_model(best_model_file_name)
-
--- a/src/participation-hub/federated-learning-microservice/app/processing/text_processing/old_prototypes/prototype_text_LSTM_nonfed.py
+++ b/src/participation-hub/federated-learning-microservice/app/processing/text_processing/old_prototypes/prototype_text_LSTM_nonfed.py
-# import pandas as pd
-
-# train_df = pd.read_csv('processing/fake-news/train.csv', header=0)
-# test_df = pd.read_csv('processing/fake-news/test.csv', header=0)
-
-# train_df = train_df.fillna(' ')
-# test_df = test_df.fillna(' ')
-
-# train_df['all_info'] = train_df['text'] + train_df['title'] + train_df['author']
-# test_df['all_info'] = test_df['text'] + test_df['title'] + test_df['author']
-
-# target = train_df['label'].values
-
-
-
-import pandas as pd
-import re
-import matplotlib.pyplot as plt
-
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-from tensorflow.keras.models import Model
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import LSTM, Activation, Dense, Dropout, Input, Embedding
-from tensorflow.keras.optimizers import RMSprop
-from tensorflow.keras.preprocessing.text import Tokenizer
-from tensorflow.keras.preprocessing import sequence
-from tensorflow.keras.callbacks import EarlyStopping
-from tensorflow.keras.preprocessing.sequence import pad_sequences
-from nltk import word_tokenize
-import collections
-import numpy as np
-import tensorflow as tf
-import tensorflow_federated as tff
-
-real = pd.read_csv("processing/fake_news/prototype_db_fake_real/True.csv")
-fake = pd.read_csv("processing/fake_news/prototype_db_fake_real/Fake.csv")
-
-# dropping rows that have urls as text and date, real's dates look fine, also dropping ones that have no text
-fake_drop = fake.drop(index=[9358,15507,15508,18933])
-fake_drop = fake_drop.drop(fake_drop.loc[fake_drop.text == ' '].index)
-real_drop = real.drop(real.loc[real.text == ' '].index)
-
-# Give labels to data before combining
-fake['label'] = 1
-real['label'] = 0
-combined = pd.concat([fake, real])
-
-no_reuters = combined.copy()
-no_reuters.text = no_reuters.text.str.replace('Reuters', '')
-combined = no_reuters.copy()
-## train/test split the text data and labels
-
-train_df_text = combined['text'] #features is now
-labels = combined['label'] #or maybe use target?
-target = combined['label'].values
-
-print("##################label")
-print(type(labels))
-print(labels)
-print("###########")
-print(type(combined['label'].values))
-print(combined['label'].values)
-
-print("train_df_type:")
-print(type(train_df_text))
-############################ ^ORIGINAL DB
-
-
-# train_df = pd.read_csv('processing/text_processing/prototype_db_fake_real/train.csv', header=0)
-# test_df = pd.read_csv('processing/text_processing/prototype_db_fake_real/test.csv', header=0)
-
-# train_df = train_df.fillna(' ')
-# test_df = test_df.fillna(' ')
-
-# train_df['all_info'] = train_df['text'] + train_df['title'] + train_df['author']
-# test_df['all_info'] = test_df['text'] + test_df['title'] + test_df['author']
-
-# target = train_df['label'].values
-# print(type(train_df['label'].values))
-# print(train_df['label'].values)
-
-# train_df_text = train_df['all_info']
-
-######################################################################################
-tokenizer = Tokenizer(oov_token = "<OOV>", num_words=6000)
-tokenizer.fit_on_texts(train_df_text)
-
-max_length = 40
-vocab_size = 6000
-
-sequences_train = tokenizer.texts_to_sequences(train_df_text)
-
-padded_train = pad_sequences(sequences_train, padding = 'post', maxlen=max_length)
-
-X_train, X_test, y_train, y_test = train_test_split(padded_train, target, test_size=0.2)
-
-print(X_train.shape)
-print(y_train.shape)
-
-#################################################################################
-embed_dim = 10
-
-def get_simple_LSTM_model():
-  model = Sequential()
-  model.add(Embedding(vocab_size, embed_dim, input_length=max_length))
-  model.add(Dropout(0.3))
-  model.add(LSTM(100))
-  model.add(Dropout(0.3))
-  model.add(Dense(64, activation='relu'))
-  model.add(Dropout(0.3))
-  model.add(Dense(1, activation='sigmoid'))
-  return model
-
-model = get_simple_LSTM_model()
-print(model.summary())
-
-
-best_model_file_name = "processing/text_processing/models/best_model_LSTM.hdf5"
-callbacks=[
-    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=15, 
-                                  verbose=1, mode="min", restore_best_weights=True),
-    tf.keras.callbacks.ModelCheckpoint(filepath=best_model_file_name, verbose=1, save_best_only=True)
-]
-
-model.compile(loss='binary_crossentropy',
-              optimizer='adam', 
-              metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
-
-history = model.fit(X_train, 
-                    y_train, 
-                    epochs=5, 
-                    validation_data=(X_test, y_test), 
-                    callbacks=callbacks)
-
-model.save(best_model_file_name)
-model = tf.keras.models.load_model(best_model_file_name)
-
--- a/src/participation-hub/federated-learning-microservice/app/routes/developers.py
+++ b/src/participation-hub/federated-learning-microservice/app/routes/developers.py
@@ -15,7 +15,8 @@ def last(use_case: str):
        bottom = df.tail(1)
        bottom = str(bottom)
        print(bottom)
-        return Response(status=200, response=bottom)
+        metricsJson = trainMetricsToJSON(bottom)
+        return Response(status=200, response=metricsJson)
    except Exception as e:
        print(e)
        return Response(status=400, response="Trained model data doesn't exist")
@@ -35,6 +36,21 @@ def upload_and_train(use_case: str, developer_id: int):
    #THEN start processing
    last_train_metrics = main_proc.start_processing(use_case,developer_id)
    print (last_train_metrics)
-    return Response(status=200, response=last_train_metrics)
+    #Trainer_id,Model_id,Dataset_id,Accuracy,Loss
+    #0,1623160388,0,0.25,nan
+    metricsJson = trainMetricsToJSON(last_train_metrics)
+    
+
+    return Response(status=200, response=metricsJson)
+
+def trainMetricsToJSON(last_train_metrics : list):
+    metricsDict = dict()
+    metricsDict["Trainer_id"] = last_train_metrics[0]
+    metricsDict["Model_id"] = last_train_metrics[1]
+    metricsDict["Dataset_id"] = last_train_metrics[2]
+    metricsDict["Accuracy"] = last_train_metrics[3]
+    metricsDict["Loss"] = last_train_metrics[4]
+    return json.dumps(metricsDict)
+
+#upload_and_train("text_processing",1)

-upload_and_train("text_processing",1)
\ No newline at end of file
--- a/src/participation-hub/federated-learning-microservice/app/routes/owners.py
+++ b/src/participation-hub/federated-learning-microservice/app/routes/owners.py
@@ -13,7 +13,8 @@ def last(use_case: str):
        bottom = df.tail(1)
        bottom = str(bottom)
        print(bottom)
-        return Response(status=200, response=bottom)
+        metricsJson = trainMetricsToJSON(bottom)
+        return Response(status=200, response=metricsJson)
    except Exception as e:
        print(e)
        return Response(status=400, response="Trained model data doesn't exist")
@@ -21,13 +22,13 @@ def last(use_case: str):

 def upload_and_train(use_case: str):

-    use_case_path = './processing/'+use_case
+    use_case_path = './processing/'+use_case+'/'
    #Remove old files
    try:
        if os.path.exists(use_case_path):
            print("Use_case path")
            print(use_case_path)
-            shutil.rmtree(use_case_path)
+            shutil.rmtree(use_case_path)#Deletes old folder with all the files
    except OSError as error:
        print(error)
        return Response(status=400, response="Error occured when deleteing the old use_case directory")
@@ -62,8 +63,17 @@ def upload_and_train(use_case: str):

    last_train_metrics = main_proc.start_processing(use_case,0)
    print (last_train_metrics)
-    return Response(status=200, response=last_train_metrics)
+    metricsJson = trainMetricsToJSON(last_train_metrics)
+    return Response(status=200, response=metricsJson)

-last("text_processing")
+def trainMetricsToJSON(last_train_metrics : list):
+    metricsDict = dict()
+    metricsDict["Trainer_id"] = last_train_metrics[0]
+    metricsDict["Model_id"] = last_train_metrics[1]
+    metricsDict["Dataset_id"] = last_train_metrics[2]
+    metricsDict["Accuracy"] = last_train_metrics[3]
+    metricsDict["Loss"] = last_train_metrics[4]
+    return json.dumps(metricsDict)
+#last("text_processing")

-upload_and_train("test")
\ No newline at end of file
+#upload_and_train("text_processing") #warning it deletes the files
\ No newline at end of file
--- a/src/participation-hub/federated-learning-microservice/app/routes/users.py
+++ b/src/participation-hub/federated-learning-microservice/app/routes/users.py
@@ -5,11 +5,11 @@ from flask import Response, request

 def check_article(use_case: str):
    #body = request.STRING
-
+    #TODO Working on it
    #FOR USE_CASE {use_case} 

    #insert body into the trained model
-
+    
    #get the result
    result = None #bool True/False
    return Response(status=400, response=str(result))
\ No newline at end of file