Commit b0499784 authored by Alexander Lercher's avatar Alexander Lercher

[SemanticLinking] Added youtube dummy upload

parent 271733ea
# this file contains all the training data but is large.
videos.csv
\ No newline at end of file
import csv
import hashlib
import sys
import os
modules_paths = ['.', '../../../modules/']
for modules_path in modules_paths:
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
from messaging.MessageHandler import MessageHandler
from db.repository import Repository
# file to read the data from
CSV_FILE = r'dummy_upload/community-prediction-youtube/videos.csv'
handler = MessageHandler(Repository())
import csv
import json
from datetime import datetime
from typing import Iterator
import pandas as pd
from pandas import DataFrame
def load_csv_content() -> Iterator:
'''Returns a generator for all lines in the csv file with correct field types.'''
dfs: DataFrame = pd.read_csv(CSV_FILE)
return dfs.iterrows()
def upload_transaction(transaction):
# manually flatten based on table mapping
uid = transaction['video_id']
transaction['UniqueID'] = uid
transaction['trend_delay'] = transaction['trend_duration']
transaction['timestamp'] = transaction['trending_timestamp']
del transaction['trend_duration']
del transaction['trending_timestamp']
t = {
'use_case': 'community-prediction-youtube-n',
'table': 'community-prediction-youtube-n',
'id': uid,
'properties': transaction,
}
handler.handle_new_trace(t)
if __name__ == '__main__':
entries = load_csv_content()
for idx, transaction in entries:
transaction = transaction.to_dict()
upload_transaction(transaction)
if idx % 1000 == 0:
print(f"Progress: {str(float(idx) / 375942)} %")
\ No newline at end of file
import requests
requests.packages.urllib3.disable_warnings()
from icecream import ic
uc = 'community-prediction-youtube'
def httpget(url):
token = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMS0wNS0wNSAxMTozNjozOC4yMzAxODEiLCJ2YWxpZF91bnRpbCI6IjIwMjEtMDUtMDYgMTE6MzY6MzguMjMwMTgxIn0.Fz6iPpA0CnrXlOCj-VuCHFzc58H9Of2cBYHOb_RqvzI'
res = requests.get(url,
verify=False,
headers = { "Authorization": f"Bearer {token}"})
return res
# list tables
res = httpget(url = f'https://articonf1.itec.aau.at:30420/api/use-cases/{uc}/tables')
print("Tables: ", [entry['name'] for entry in res.json()])
# count pushed data
def count_data(json_res, table_identifier='table'):
tables = {}
for entry in json_res:
key = entry[table_identifier]
if key not in tables:
tables[key] = 0
tables[key] += 1
ic(tables)
res = httpget(url = f'https://articonf1.itec.aau.at:30001/api/use_cases/{uc}/transactions')
count_data(res.json())
res_f = httpget(url = f'https://articonf1.itec.aau.at:30001/api/use_cases/{uc}/transactions-failed')
count_data(res_f.json(), 'docType')
res_d = httpget(url = f'https://articonf1.itec.aau.at:30001/api/use_cases/{uc}/transactions-duplicated')
count_data(res_d.json())
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment