Commit 776d16a1 authored by Bogdan's avatar Bogdan

Implemented Reddit Upload; Updated Reddit Schema

parent 81958c3d
......@@ -8,15 +8,24 @@ def add_table(use_case: str, table_name: str):
columns = {}
columns["UniqueID"] = "posts//subreddit+//posts//id"
columns["subreddit"] = "posts/subreddit"
columns["user_id"] = "posts//user_id"
columns["title"] = "posts//title"
columns["content"] = "posts//content"
columns["permalink]"] = "posts//permalink"
columns["upvotes"] = "posts//upvotes"
columns["percentage_upvoted"] = "posts//percentage_upvoted"
columns["n_comments"] = "posts//n_comments"
use_case = "reddit"
columns = [
# "doctype",
"id",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments",
"subreddit"
]
columns = { c : c for c in columns }
columns["UniqueID"] = "user_id+subreddit+id"
table = {
"name": table_name,
......@@ -25,6 +34,18 @@ def add_table(use_case: str, table_name: str):
postTableToSwagger(use_case,table)
# { #EXAMPLE JSON
# "id":"larjmz",
# "user_id":"R3V0LV3Rocelot",
# "title":"Outrage As Robinhood CEO Confesses To Elon Musk: DTCC Shut Down Stocks In Gamestop; AMC Surge",
# "content":"",
# "permalink":"/r/news/comments/larjmz/outrage_as_robinhood_ceo_confesses_to_elon_musk/",
# "upvotes":57,
# "percentage_upvoted":0.86,
# "n_comments":19,
# "subreddit":"news"
# }
def add_layers(use_case:str, table_name: str):
layers = [
{
......@@ -49,7 +70,7 @@ def add_layers(use_case:str, table_name: str):
{
"use_case": use_case,
"table": table_name,
"name": "User_Post_Layer",
"name": "User_Layer",
"properties": [
"UniqueID",
"subreddit",
......@@ -81,13 +102,14 @@ def add_layers(use_case:str, table_name: str):
"n_comments"
],
"cluster_properties": [
"upvotes"
"upvotes",
"percentage_upvoted"
]
},
{
"use_case": use_case,
"table": table_name,
"name": "Liked_Layer",
"name": "Percentage_Layer",
"properties": [
"UniqueID",
"subreddit",
......@@ -127,7 +149,7 @@ def add_layers(use_case:str, table_name: str):
postLayersToSwagger(use_case,layers)
def main(use_case: str):
print("posts")
table_name = "posts"
print("reddit")
table_name = "reddit"
add_table(use_case,table_name)
add_layers(use_case,table_name)
from _add_use_case_scripts.requestPost import postLayersToSwagger, postTableToSwagger
def add_table(use_case: str, table_name: str):
'''
take the columns and add the mappings at the server
replace all "/"'s in the internal representation with a "_"
'''
columns = {}
columns["UniqueID"] = "users//id"
columns["name"] = "users//id"
#TODO should it have some other attributes from posts// ????
# columns["subreddit"] = "posts/subreddit"
# columns["user_id"] = "posts//user_id"
# columns["title"] = "posts//title"
# columns["content"] = "posts//content"
# columns["permalink]"] = "posts//permalink"
# columns["upvotes"] = "posts//upvotes"
# columns["percentage_upvoted"] = "posts//percentage_upvoted"
# columns["n_comments"] = "posts//n_comments"
table = {
"name": table_name,
"mappings": columns
}
postTableToSwagger(use_case,table)
def add_layers(use_case:str, table_name: str):
layers = [
{
"use_case": use_case,
"table": table_name,
"name": "Name_Layer",
"properties": [
"UniqueID",
"name"
],
"cluster_properties": [
"name"
]
}
]
postLayersToSwagger(use_case,layers)
def main(use_case: str):
print("users")
table_name = "users"
add_table(use_case,table_name)
add_layers(use_case,table_name)
This source diff could not be displayed because it is too large. You can view the blob instead.
''' This script adds all data from BitYoga's csv to our pipeline.'''
import csv
import requests
import sys
import os
import json
modules_path = '../../../modules/'
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
#import network_constants as nc
#from security.token_manager import TokenManager
def send_transaction_to_rest_gateway(transaction: dict):
# token from Rest Gateway to authorize
#JWT_TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMC0xMC0wNiAwOTowNzoyMy44MjEyOTciLCJ2YWxpZF91bnRpbCI6IjIwMjAtMTAtMDcgMDk6MDc6MjMuODIxMjk3In0.orqTjn-3J4qMM0kpreWVPkwUEtUcgoqo6wsUFxKCrwg'
#JWT_TOKEN = TokenManager.getInstance().getToken()
res = requests.post(
url = 'https://articonf1.itec.aau.at:30401/api/trace',
json = transaction,
headers = {"Authorization": f"Bearer {JWT_TOKEN}"},
verify = False # ignore ssl error
)
if res.status_code >= 400:
raise Exception(f"Error while uploading: {str(res.content)}")
print(res)
# file to read the data from
JSON_DATASET = r'reddit_dataset.json'
if __name__ == '__main__':
with open(JSON_DATASET, 'r') as json_file:
# reader = csv.reader(file)
json_data_list = json.load(json_file)
#titles = next(json_data)
summ = 0
for obj_dict in json_data_list:
print ("a")
transaction = {}
transaction['ApplicationType'] = 'reddit'
transaction['docType'] = 'reddit'
for key, value in obj_dict.items():
transaction[key] = value
send_transaction_to_rest_gateway(transaction)
summ+=1
if (summ % 100 == 0 ):
print ("Uploaded " + str(summ) + " transactions.")
print ("TOTAL Uploaded " + str(summ) + " transactions.")
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment