Implemented Reddit Upload; Updated Reddit Schema

776d16a1 · Bogdan · 81958c3d · 776d16a1 · 81958c3d · 776d16a1
Commit 776d16a1 authored Feb 04, 2021 by Bogdan
4 changed files
--- a/src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/reddit/tables/add_reddit_posts.py
+++ b/src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/reddit/tables/add_reddit_posts.py
@@ -7,16 +7,25 @@ def add_table(use_case: str, table_name: str):
    '''
    columns = {}
+    use_case = "reddit"
+    columns = [
+        # "doctype",
+        "id",
+        "user_id",
+        "title",
+        "content",
+        "permalink",
+        "upvotes",
+        "percentage_upvoted",
+        "n_comments",
+        "subreddit"
+    ]
+    columns = { c : c for c in columns }
-    columns["UniqueID"] = "posts//subreddit+//posts//id"
+    columns["UniqueID"] = "user_id+subreddit+id"
-    columns["subreddit"] = "posts/subreddit"
-    columns["user_id"] = "posts//user_id"
-    columns["title"] = "posts//title"
-    columns["content"] = "posts//content"
-    columns["permalink]"] = "posts//permalink"
-    columns["upvotes"] = "posts//upvotes"
-    columns["percentage_upvoted"] = "posts//percentage_upvoted"
-    columns["n_comments"] = "posts//n_comments"
    table = {
        "name": table_name,
@@ -25,6 +34,18 @@ def add_table(use_case: str, table_name: str):
    postTableToSwagger(use_case,table)
+# {    #EXAMPLE JSON
+#       "id":"larjmz",
+#       "user_id":"R3V0LV3Rocelot",
+#       "title":"Outrage As Robinhood CEO Confesses To Elon Musk: DTCC Shut Down Stocks In Gamestop; AMC Surge",
+#       "content":"",
+#       "permalink":"/r/news/comments/larjmz/outrage_as_robinhood_ceo_confesses_to_elon_musk/",
+#       "upvotes":57,
+#       "percentage_upvoted":0.86,
+#       "n_comments":19,
+#       "subreddit":"news"
+#    }
 def add_layers(use_case:str, table_name: str):
    layers = [
        {
@@ -49,7 +70,7 @@ def add_layers(use_case:str, table_name: str):
        {
            "use_case": use_case,
            "table": table_name,
-            "name": "User_Post_Layer",
+            "name": "User_Layer",
            "properties": [
                "UniqueID",
                "subreddit",
@@ -81,13 +102,14 @@ def add_layers(use_case:str, table_name: str):
                "n_comments"
            ],
            "cluster_properties": [
-                "upvotes"
+                "upvotes",
+                "percentage_upvoted"
            ]
        },
        {
            "use_case": use_case,
            "table": table_name,
-            "name": "Liked_Layer",
+            "name": "Percentage_Layer",
            "properties": [
                "UniqueID",
                "subreddit",
@@ -127,7 +149,7 @@ def add_layers(use_case:str, table_name: str):
    postLayersToSwagger(use_case,layers)    
 def main(use_case: str):
-    print("posts")
+    print("reddit")
-    table_name = "posts"
+    table_name = "reddit"
    add_table(use_case,table_name)
    add_layers(use_case,table_name)
--- a/src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/reddit/tables/add_reddit_users.py
+++ b/src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/reddit/tables/add_reddit_users.py
-from _add_use_case_scripts.requestPost  import postLayersToSwagger, postTableToSwagger
-def add_table(use_case: str, table_name: str):
-    '''
-    take the columns and add the mappings at the server
-    replace all "/"'s in the internal representation with a "_"
-    '''
-    columns = {}
-    columns["UniqueID"] = "users//id"
-    columns["name"] = "users//id"
-    #TODO should it have some other attributes from posts// ????
-    # columns["subreddit"] = "posts/subreddit"
-    # columns["user_id"] = "posts//user_id"
-    # columns["title"] = "posts//title"
-    # columns["content"] = "posts//content"
-    # columns["permalink]"] = "posts//permalink"
-    # columns["upvotes"] = "posts//upvotes"
-    # columns["percentage_upvoted"] = "posts//percentage_upvoted"
-    # columns["n_comments"] = "posts//n_comments" 
-    table = {
-        "name": table_name,
-        "mappings": columns
-    }
-    postTableToSwagger(use_case,table)
-def add_layers(use_case:str, table_name: str):
-    layers = [
-        {
-            "use_case": use_case,
-            "table": table_name,
-            "name": "Name_Layer",
-            "properties": [
-                "UniqueID",
-                "name"
-            ],
-            "cluster_properties": [
-                "name"
-            ]
-        }
-    ]
-    postLayersToSwagger(use_case,layers)    
-def main(use_case: str):
-    print("users")
-    table_name = "users"
-    add_table(use_case,table_name)
-    add_layers(use_case,table_name)
--- a/tools/reddit-upload/reddit_dataset.json
+++ b/tools/reddit-upload/reddit_dataset.json
--- a/tools/reddit-upload/upload_data.py
+++ b/tools/reddit-upload/upload_data.py
+''' This script adds all data from BitYoga's csv to our pipeline.'''
+import csv
+import requests
+import sys
+import os
+import json
+modules_path = '../../../modules/'
+if os.path.exists(modules_path):
+    sys.path.insert(1, modules_path)
+#import network_constants as nc
+#from security.token_manager import TokenManager
+def send_transaction_to_rest_gateway(transaction: dict):
+    # token from Rest Gateway to authorize 
+    #JWT_TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMC0xMC0wNiAwOTowNzoyMy44MjEyOTciLCJ2YWxpZF91bnRpbCI6IjIwMjAtMTAtMDcgMDk6MDc6MjMuODIxMjk3In0.orqTjn-3J4qMM0kpreWVPkwUEtUcgoqo6wsUFxKCrwg'
+    #JWT_TOKEN = TokenManager.getInstance().getToken()
+    res = requests.post(
+        url = 'https://articonf1.itec.aau.at:30401/api/trace',
+        json = transaction,
+        headers = {"Authorization": f"Bearer {JWT_TOKEN}"},
+        verify = False # ignore ssl error
+    )
+    if res.status_code >= 400:
+        raise Exception(f"Error while uploading: {str(res.content)}")
+    print(res)
+# file to read the data from
+JSON_DATASET = r'reddit_dataset.json'
+if __name__ == '__main__':    
+    with open(JSON_DATASET, 'r') as json_file:
+        # reader = csv.reader(file)
+        json_data_list = json.load(json_file)
+        #titles = next(json_data)
+        summ = 0          
+        for obj_dict in json_data_list:
+            print ("a")
+            transaction = {}
+            transaction['ApplicationType'] = 'reddit'
+            transaction['docType'] = 'reddit'
+            for key, value in obj_dict.items():
+                transaction[key] = value
+            send_transaction_to_rest_gateway(transaction)
+            summ+=1
+            if (summ % 100 == 0 ):
+                print ("Uploaded " + str(summ) + " transactions.")
+        print ("TOTAL Uploaded " + str(summ) + " transactions.")
\ No newline at end of file