Commit dd9f898f authored by Bogdan's avatar Bogdan

Merge remote-tracking branch 'origin/feauture/schema-reddit' into develop

parents 2f7e09be d4ce41ed
import sys
import os
from pathlib import Path
from typing import Dict, Any
import requests
modules_path = '../../../modules/'
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
import network_constants as nc
from security.token_manager import TokenManager
import tables.add_reddit as reddit
def add_use_case(use_case: str):
jwt = TokenManager.getInstance().getToken()
url = f"https://articonf1.itec.aau.at:30420/api/use-cases"
response = requests.post(
url,
verify=False,
proxies = { "http":None, "https":None },
headers = { "Authorization": f"Bearer {jwt}"},
json = {"name": use_case}
)
print(url+": "+str(response.status_code))
if __name__ == "__main__":
use_case = "reddit"
# disable ssl warnings :)
requests.packages.urllib3.disable_warnings()
add_use_case(use_case)
reddit.main(use_case)
\ No newline at end of file
import network_constants as nc
from security.token_manager import TokenManager
import requests
def add_table(use_case: str, table_name: str):
'''
take the columns and add the mappings at the server
replace all "/"'s in the internal representation with a "_"
'''
columns = {}
use_case = "reddit"
columns = [
# "doctype",
"id",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments",
"subreddit",
"created_at"
]
columns = { c : c for c in columns }
columns["UniqueID"] = "user_id+subreddit+id"
table = {
"name": table_name,
"mappings": columns
}
url = f"https://articonf1.itec.aau.at:30420/api/use-cases/{use_case}/tables"
jwt = TokenManager.getInstance().getToken()
response = requests.post(
url,
verify=False,
proxies = { "http":None, "https":None },
headers = { "Authorization": f"Bearer {jwt}"},
json = table
)
print(url+": "+str(response.status_code))
def add_layers(use_case:str, table_name: str):
layers = [
# { #subreddit is string cannot cluster
# "use_case": use_case,
# "table": table_name,
# "name": "Subreddit_Layer",
# "properties": [
# "UniqueID",
# "subreddit",
# "user_id",
# "title",
# "content",
# "permalink",
# "upvotes",
# "percentage_upvoted",
# "n_comments"
# ],
# "cluster_properties": [
# "subreddit"
# ]
# },
{
"use_case": use_case,
"table": table_name,
"name": "Upvotes_Layer", #TODO Probably do something like Total Votes? so we can get a popularity?
"properties": [
"UniqueID",
"subreddit",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments",
"created_at"
],
"cluster_properties": [
"upvotes",
"percentage_upvoted"
]
},
{
"use_case": use_case,
"table": table_name,
"name": "Percentage_Layer",
"properties": [
"UniqueID",
"subreddit",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments",
"created_at"
],
"cluster_properties": [
"percentage_upvoted"
]
},
{
"use_case": use_case,
"table": table_name,
"name": "Engagement_Layer",
"properties": [
"UniqueID",
"subreddit",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments",
"created_at"
],
"cluster_properties": [
"n_comments"
]
},
{
"use_case": use_case,
"table": table_name,
"name": "Time_Layer",
"properties": [
"UniqueID",
"subreddit",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments",
"created_at"
],
"cluster_properties": [
"created_at"
]
}
]
jwt = TokenManager.getInstance().getToken()
for layer in layers:
url = f"https://articonf1.itec.aau.at:30420/api/layers"
response = requests.post(
url,
verify=False,
proxies = { "http":None, "https":None },
headers = { "Authorization": f"Bearer {jwt}"},
json = layer
)
print(url+": "+str(response.status_code))
def main(use_case: str):
print("reddit")
table_name = "reddit"
add_table(use_case,table_name)
add_layers(use_case,table_name)
This diff is collapsed.
''' This script adds all data from BitYoga's csv to our pipeline.'''
import csv
import requests
import sys
import os
import json
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# modules_path = '../../../modules/'
# if os.path.exists(modules_path):
# sys.path.insert(1, modules_path)
# import network_constants as nc
# from security.token_manager import TokenManager
def send_transaction_to_rest_gateway(transaction: dict):
# token from Rest Gateway to authorize
JWT_TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMS0wMi0wOCAxMzo0NzoxOC40NzUxMjEiLCJ2YWxpZF91bnRpbCI6IjIwMjEtMDItMDkgMTM6NDc6MTguNDc1MTIxIn0.DWY9c0X2XQJDz0Ef35-k1IVY6GWf00ogaVOCeX8Irlo'
res = requests.post(
url = 'https://articonf1.itec.aau.at:30401/api/trace',
json = transaction,
headers = {"Authorization": f"Bearer {JWT_TOKEN}"},
verify = False # ignore ssl error
)
if res.status_code >= 400:
raise Exception(f"Error while uploading: {str(res.content)}")
#print(res) Lots of spam
# file to read the data from
JSON_DATASET = r'reddit_dataset.json'
if __name__ == '__main__':
with open(JSON_DATASET, 'r') as json_file:
# reader = csv.reader(file)
json_data_list = json.load(json_file)
#titles = next(json_data)
summ = 0
for obj_dict in json_data_list:
transaction = {}
transaction['ApplicationType'] = 'reddit'
transaction['docType'] = 'reddit'
for key, value in obj_dict.items():
transaction[key] = value
send_transaction_to_rest_gateway(transaction)
summ+=1
if (summ % 1000 == 0 ):
print ("Uploaded " + str(summ) + " transactions.")
print ("TOTAL Uploaded " + str(summ) + " transactions.")
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment