Commit dd9f898f authored by Bogdan's avatar Bogdan

Merge remote-tracking branch 'origin/feauture/schema-reddit' into develop

parents 2f7e09be d4ce41ed
import sys
import os
from pathlib import Path
from typing import Dict, Any
import requests
modules_path = '../../../modules/'
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
import network_constants as nc
from security.token_manager import TokenManager
import tables.add_reddit as reddit
def add_use_case(use_case: str):
jwt = TokenManager.getInstance().getToken()
url = f"https://articonf1.itec.aau.at:30420/api/use-cases"
response = requests.post(
url,
verify=False,
proxies = { "http":None, "https":None },
headers = { "Authorization": f"Bearer {jwt}"},
json = {"name": use_case}
)
print(url+": "+str(response.status_code))
if __name__ == "__main__":
use_case = "reddit"
# disable ssl warnings :)
requests.packages.urllib3.disable_warnings()
add_use_case(use_case)
reddit.main(use_case)
\ No newline at end of file
import network_constants as nc
from security.token_manager import TokenManager
import requests
def add_table(use_case: str, table_name: str):
'''
take the columns and add the mappings at the server
replace all "/"'s in the internal representation with a "_"
'''
columns = {}
use_case = "reddit"
columns = [
# "doctype",
"id",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments",
"subreddit",
"created_at"
]
columns = { c : c for c in columns }
columns["UniqueID"] = "user_id+subreddit+id"
table = {
"name": table_name,
"mappings": columns
}
url = f"https://articonf1.itec.aau.at:30420/api/use-cases/{use_case}/tables"
jwt = TokenManager.getInstance().getToken()
response = requests.post(
url,
verify=False,
proxies = { "http":None, "https":None },
headers = { "Authorization": f"Bearer {jwt}"},
json = table
)
print(url+": "+str(response.status_code))
def add_layers(use_case:str, table_name: str):
layers = [
# { #subreddit is string cannot cluster
# "use_case": use_case,
# "table": table_name,
# "name": "Subreddit_Layer",
# "properties": [
# "UniqueID",
# "subreddit",
# "user_id",
# "title",
# "content",
# "permalink",
# "upvotes",
# "percentage_upvoted",
# "n_comments"
# ],
# "cluster_properties": [
# "subreddit"
# ]
# },
{
"use_case": use_case,
"table": table_name,
"name": "Upvotes_Layer", #TODO Probably do something like Total Votes? so we can get a popularity?
"properties": [
"UniqueID",
"subreddit",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments",
"created_at"
],
"cluster_properties": [
"upvotes",
"percentage_upvoted"
]
},
{
"use_case": use_case,
"table": table_name,
"name": "Percentage_Layer",
"properties": [
"UniqueID",
"subreddit",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments",
"created_at"
],
"cluster_properties": [
"percentage_upvoted"
]
},
{
"use_case": use_case,
"table": table_name,
"name": "Engagement_Layer",
"properties": [
"UniqueID",
"subreddit",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments",
"created_at"
],
"cluster_properties": [
"n_comments"
]
},
{
"use_case": use_case,
"table": table_name,
"name": "Time_Layer",
"properties": [
"UniqueID",
"subreddit",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments",
"created_at"
],
"cluster_properties": [
"created_at"
]
}
]
jwt = TokenManager.getInstance().getToken()
for layer in layers:
url = f"https://articonf1.itec.aau.at:30420/api/layers"
response = requests.post(
url,
verify=False,
proxies = { "http":None, "https":None },
headers = { "Authorization": f"Bearer {jwt}"},
json = layer
)
print(url+": "+str(response.status_code))
def main(use_case: str):
print("reddit")
table_name = "reddit"
add_table(use_case,table_name)
add_layers(use_case,table_name)
This source diff could not be displayed because it is too large. You can view the blob instead.
''' This script adds all data from BitYoga's csv to our pipeline.'''
import csv
import requests
import sys
import os
import json
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# modules_path = '../../../modules/'
# if os.path.exists(modules_path):
# sys.path.insert(1, modules_path)
# import network_constants as nc
# from security.token_manager import TokenManager
def send_transaction_to_rest_gateway(transaction: dict):
# token from Rest Gateway to authorize
JWT_TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMS0wMi0wOCAxMzo0NzoxOC40NzUxMjEiLCJ2YWxpZF91bnRpbCI6IjIwMjEtMDItMDkgMTM6NDc6MTguNDc1MTIxIn0.DWY9c0X2XQJDz0Ef35-k1IVY6GWf00ogaVOCeX8Irlo'
res = requests.post(
url = 'https://articonf1.itec.aau.at:30401/api/trace',
json = transaction,
headers = {"Authorization": f"Bearer {JWT_TOKEN}"},
verify = False # ignore ssl error
)
if res.status_code >= 400:
raise Exception(f"Error while uploading: {str(res.content)}")
#print(res) Lots of spam
# file to read the data from
JSON_DATASET = r'reddit_dataset.json'
if __name__ == '__main__':
with open(JSON_DATASET, 'r') as json_file:
# reader = csv.reader(file)
json_data_list = json.load(json_file)
#titles = next(json_data)
summ = 0
for obj_dict in json_data_list:
transaction = {}
transaction['ApplicationType'] = 'reddit'
transaction['docType'] = 'reddit'
for key, value in obj_dict.items():
transaction[key] = value
send_transaction_to_rest_gateway(transaction)
summ+=1
if (summ % 1000 == 0 ):
print ("Uploaded " + str(summ) + " transactions.")
print ("TOTAL Uploaded " + str(summ) + " transactions.")
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment