Commit 776d16a1 authored by Bogdan's avatar Bogdan

Implemented Reddit Upload; Updated Reddit Schema

parent 81958c3d
......@@ -7,16 +7,25 @@ def add_table(use_case: str, table_name: str):
'''
columns = {}
use_case = "reddit"
columns = [
# "doctype",
"id",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments",
"subreddit"
]
columns = { c : c for c in columns }
columns["UniqueID"] = "posts//subreddit+//posts//id"
columns["subreddit"] = "posts/subreddit"
columns["user_id"] = "posts//user_id"
columns["title"] = "posts//title"
columns["content"] = "posts//content"
columns["permalink]"] = "posts//permalink"
columns["upvotes"] = "posts//upvotes"
columns["percentage_upvoted"] = "posts//percentage_upvoted"
columns["n_comments"] = "posts//n_comments"
columns["UniqueID"] = "user_id+subreddit+id"
table = {
"name": table_name,
......@@ -25,6 +34,18 @@ def add_table(use_case: str, table_name: str):
postTableToSwagger(use_case,table)
# { #EXAMPLE JSON
# "id":"larjmz",
# "user_id":"R3V0LV3Rocelot",
# "title":"Outrage As Robinhood CEO Confesses To Elon Musk: DTCC Shut Down Stocks In Gamestop; AMC Surge",
# "content":"",
# "permalink":"/r/news/comments/larjmz/outrage_as_robinhood_ceo_confesses_to_elon_musk/",
# "upvotes":57,
# "percentage_upvoted":0.86,
# "n_comments":19,
# "subreddit":"news"
# }
def add_layers(use_case:str, table_name: str):
layers = [
{
......@@ -49,7 +70,7 @@ def add_layers(use_case:str, table_name: str):
{
"use_case": use_case,
"table": table_name,
"name": "User_Post_Layer",
"name": "User_Layer",
"properties": [
"UniqueID",
"subreddit",
......@@ -81,13 +102,14 @@ def add_layers(use_case:str, table_name: str):
"n_comments"
],
"cluster_properties": [
"upvotes"
"upvotes",
"percentage_upvoted"
]
},
{
"use_case": use_case,
"table": table_name,
"name": "Liked_Layer",
"name": "Percentage_Layer",
"properties": [
"UniqueID",
"subreddit",
......@@ -127,7 +149,7 @@ def add_layers(use_case:str, table_name: str):
postLayersToSwagger(use_case,layers)
def main(use_case: str):
print("posts")
table_name = "posts"
print("reddit")
table_name = "reddit"
add_table(use_case,table_name)
add_layers(use_case,table_name)
from _add_use_case_scripts.requestPost import postLayersToSwagger, postTableToSwagger
def add_table(use_case: str, table_name: str):
'''
take the columns and add the mappings at the server
replace all "/"'s in the internal representation with a "_"
'''
columns = {}
columns["UniqueID"] = "users//id"
columns["name"] = "users//id"
#TODO should it have some other attributes from posts// ????
# columns["subreddit"] = "posts/subreddit"
# columns["user_id"] = "posts//user_id"
# columns["title"] = "posts//title"
# columns["content"] = "posts//content"
# columns["permalink]"] = "posts//permalink"
# columns["upvotes"] = "posts//upvotes"
# columns["percentage_upvoted"] = "posts//percentage_upvoted"
# columns["n_comments"] = "posts//n_comments"
table = {
"name": table_name,
"mappings": columns
}
postTableToSwagger(use_case,table)
def add_layers(use_case:str, table_name: str):
layers = [
{
"use_case": use_case,
"table": table_name,
"name": "Name_Layer",
"properties": [
"UniqueID",
"name"
],
"cluster_properties": [
"name"
]
}
]
postLayersToSwagger(use_case,layers)
def main(use_case: str):
print("users")
table_name = "users"
add_table(use_case,table_name)
add_layers(use_case,table_name)
This diff is collapsed.
''' This script adds all data from BitYoga's csv to our pipeline.'''
import csv
import requests
import sys
import os
import json
modules_path = '../../../modules/'
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
#import network_constants as nc
#from security.token_manager import TokenManager
def send_transaction_to_rest_gateway(transaction: dict):
# token from Rest Gateway to authorize
#JWT_TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMC0xMC0wNiAwOTowNzoyMy44MjEyOTciLCJ2YWxpZF91bnRpbCI6IjIwMjAtMTAtMDcgMDk6MDc6MjMuODIxMjk3In0.orqTjn-3J4qMM0kpreWVPkwUEtUcgoqo6wsUFxKCrwg'
#JWT_TOKEN = TokenManager.getInstance().getToken()
res = requests.post(
url = 'https://articonf1.itec.aau.at:30401/api/trace',
json = transaction,
headers = {"Authorization": f"Bearer {JWT_TOKEN}"},
verify = False # ignore ssl error
)
if res.status_code >= 400:
raise Exception(f"Error while uploading: {str(res.content)}")
print(res)
# file to read the data from
JSON_DATASET = r'reddit_dataset.json'
if __name__ == '__main__':
with open(JSON_DATASET, 'r') as json_file:
# reader = csv.reader(file)
json_data_list = json.load(json_file)
#titles = next(json_data)
summ = 0
for obj_dict in json_data_list:
print ("a")
transaction = {}
transaction['ApplicationType'] = 'reddit'
transaction['docType'] = 'reddit'
for key, value in obj_dict.items():
transaction[key] = value
send_transaction_to_rest_gateway(transaction)
summ+=1
if (summ % 100 == 0 ):
print ("Uploaded " + str(summ) + " transactions.")
print ("TOTAL Uploaded " + str(summ) + " transactions.")
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment