Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
776d16a1
Commit
776d16a1
authored
Feb 04, 2021
by
Bogdan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implemented Reddit Upload; Updated Reddit Schema
parent
81958c3d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
96 additions
and
67 deletions
+96
-67
add_reddit.py
...ice/app/_add_use_case_scripts/reddit/tables/add_reddit.py
+36
-14
add_reddit_users.py
...p/_add_use_case_scripts/reddit/tables/add_reddit_users.py
+0
-53
reddit_dataset.json
tools/reddit-upload/reddit_dataset.json
+1
-0
upload_data.py
tools/reddit-upload/upload_data.py
+59
-0
No files found.
src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/reddit/tables/add_reddit
_posts
.py
→
src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/reddit/tables/add_reddit.py
View file @
776d16a1
...
@@ -7,16 +7,25 @@ def add_table(use_case: str, table_name: str):
...
@@ -7,16 +7,25 @@ def add_table(use_case: str, table_name: str):
'''
'''
columns
=
{}
columns
=
{}
use_case
=
"reddit"
columns
=
[
# "doctype",
"id"
,
"user_id"
,
"title"
,
"content"
,
"permalink"
,
"upvotes"
,
"percentage_upvoted"
,
"n_comments"
,
"subreddit"
]
columns
=
{
c
:
c
for
c
in
columns
}
columns
[
"UniqueID"
]
=
"posts//subreddit+//posts//id"
columns
[
"UniqueID"
]
=
"user_id+subreddit+id"
columns
[
"subreddit"
]
=
"posts/subreddit"
columns
[
"user_id"
]
=
"posts//user_id"
columns
[
"title"
]
=
"posts//title"
columns
[
"content"
]
=
"posts//content"
columns
[
"permalink]"
]
=
"posts//permalink"
columns
[
"upvotes"
]
=
"posts//upvotes"
columns
[
"percentage_upvoted"
]
=
"posts//percentage_upvoted"
columns
[
"n_comments"
]
=
"posts//n_comments"
table
=
{
table
=
{
"name"
:
table_name
,
"name"
:
table_name
,
...
@@ -25,6 +34,18 @@ def add_table(use_case: str, table_name: str):
...
@@ -25,6 +34,18 @@ def add_table(use_case: str, table_name: str):
postTableToSwagger
(
use_case
,
table
)
postTableToSwagger
(
use_case
,
table
)
# { #EXAMPLE JSON
# "id":"larjmz",
# "user_id":"R3V0LV3Rocelot",
# "title":"Outrage As Robinhood CEO Confesses To Elon Musk: DTCC Shut Down Stocks In Gamestop; AMC Surge",
# "content":"",
# "permalink":"/r/news/comments/larjmz/outrage_as_robinhood_ceo_confesses_to_elon_musk/",
# "upvotes":57,
# "percentage_upvoted":0.86,
# "n_comments":19,
# "subreddit":"news"
# }
def
add_layers
(
use_case
:
str
,
table_name
:
str
):
def
add_layers
(
use_case
:
str
,
table_name
:
str
):
layers
=
[
layers
=
[
{
{
...
@@ -49,7 +70,7 @@ def add_layers(use_case:str, table_name: str):
...
@@ -49,7 +70,7 @@ def add_layers(use_case:str, table_name: str):
{
{
"use_case"
:
use_case
,
"use_case"
:
use_case
,
"table"
:
table_name
,
"table"
:
table_name
,
"name"
:
"User_
Post_
Layer"
,
"name"
:
"User_Layer"
,
"properties"
:
[
"properties"
:
[
"UniqueID"
,
"UniqueID"
,
"subreddit"
,
"subreddit"
,
...
@@ -81,13 +102,14 @@ def add_layers(use_case:str, table_name: str):
...
@@ -81,13 +102,14 @@ def add_layers(use_case:str, table_name: str):
"n_comments"
"n_comments"
],
],
"cluster_properties"
:
[
"cluster_properties"
:
[
"upvotes"
"upvotes"
,
"percentage_upvoted"
]
]
},
},
{
{
"use_case"
:
use_case
,
"use_case"
:
use_case
,
"table"
:
table_name
,
"table"
:
table_name
,
"name"
:
"
Liked
_Layer"
,
"name"
:
"
Percentage
_Layer"
,
"properties"
:
[
"properties"
:
[
"UniqueID"
,
"UniqueID"
,
"subreddit"
,
"subreddit"
,
...
@@ -127,7 +149,7 @@ def add_layers(use_case:str, table_name: str):
...
@@ -127,7 +149,7 @@ def add_layers(use_case:str, table_name: str):
postLayersToSwagger
(
use_case
,
layers
)
postLayersToSwagger
(
use_case
,
layers
)
def
main
(
use_case
:
str
):
def
main
(
use_case
:
str
):
print
(
"
posts
"
)
print
(
"
reddit
"
)
table_name
=
"
posts
"
table_name
=
"
reddit
"
add_table
(
use_case
,
table_name
)
add_table
(
use_case
,
table_name
)
add_layers
(
use_case
,
table_name
)
add_layers
(
use_case
,
table_name
)
src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/reddit/tables/add_reddit_users.py
deleted
100644 → 0
View file @
81958c3d
from
_add_use_case_scripts.requestPost
import
postLayersToSwagger
,
postTableToSwagger
def
add_table
(
use_case
:
str
,
table_name
:
str
):
'''
take the columns and add the mappings at the server
replace all "/"'s in the internal representation with a "_"
'''
columns
=
{}
columns
[
"UniqueID"
]
=
"users//id"
columns
[
"name"
]
=
"users//id"
#TODO should it have some other attributes from posts// ????
# columns["subreddit"] = "posts/subreddit"
# columns["user_id"] = "posts//user_id"
# columns["title"] = "posts//title"
# columns["content"] = "posts//content"
# columns["permalink]"] = "posts//permalink"
# columns["upvotes"] = "posts//upvotes"
# columns["percentage_upvoted"] = "posts//percentage_upvoted"
# columns["n_comments"] = "posts//n_comments"
table
=
{
"name"
:
table_name
,
"mappings"
:
columns
}
postTableToSwagger
(
use_case
,
table
)
def
add_layers
(
use_case
:
str
,
table_name
:
str
):
layers
=
[
{
"use_case"
:
use_case
,
"table"
:
table_name
,
"name"
:
"Name_Layer"
,
"properties"
:
[
"UniqueID"
,
"name"
],
"cluster_properties"
:
[
"name"
]
}
]
postLayersToSwagger
(
use_case
,
layers
)
def
main
(
use_case
:
str
):
print
(
"users"
)
table_name
=
"users"
add_table
(
use_case
,
table_name
)
add_layers
(
use_case
,
table_name
)
tools/reddit-upload/reddit_dataset.json
0 → 100644
View file @
776d16a1
This source diff could not be displayed because it is too large. You can
view the blob
instead.
tools/reddit-upload/upload_data.py
0 → 100644
View file @
776d16a1
''' This script adds all data from BitYoga's csv to our pipeline.'''
import
csv
import
requests
import
sys
import
os
import
json
modules_path
=
'../../../modules/'
if
os
.
path
.
exists
(
modules_path
):
sys
.
path
.
insert
(
1
,
modules_path
)
#import network_constants as nc
#from security.token_manager import TokenManager
def
send_transaction_to_rest_gateway
(
transaction
:
dict
):
# token from Rest Gateway to authorize
#JWT_TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMC0xMC0wNiAwOTowNzoyMy44MjEyOTciLCJ2YWxpZF91bnRpbCI6IjIwMjAtMTAtMDcgMDk6MDc6MjMuODIxMjk3In0.orqTjn-3J4qMM0kpreWVPkwUEtUcgoqo6wsUFxKCrwg'
#JWT_TOKEN = TokenManager.getInstance().getToken()
res
=
requests
.
post
(
url
=
'https://articonf1.itec.aau.at:30401/api/trace'
,
json
=
transaction
,
headers
=
{
"Authorization"
:
f
"Bearer {JWT_TOKEN}"
},
verify
=
False
# ignore ssl error
)
if
res
.
status_code
>=
400
:
raise
Exception
(
f
"Error while uploading: {str(res.content)}"
)
print
(
res
)
# file to read the data from
JSON_DATASET
=
r'reddit_dataset.json'
if
__name__
==
'__main__'
:
with
open
(
JSON_DATASET
,
'r'
)
as
json_file
:
# reader = csv.reader(file)
json_data_list
=
json
.
load
(
json_file
)
#titles = next(json_data)
summ
=
0
for
obj_dict
in
json_data_list
:
print
(
"a"
)
transaction
=
{}
transaction
[
'ApplicationType'
]
=
'reddit'
transaction
[
'docType'
]
=
'reddit'
for
key
,
value
in
obj_dict
.
items
():
transaction
[
key
]
=
value
send_transaction_to_rest_gateway
(
transaction
)
summ
+=
1
if
(
summ
%
100
==
0
):
print
(
"Uploaded "
+
str
(
summ
)
+
" transactions."
)
print
(
"TOTAL Uploaded "
+
str
(
summ
)
+
" transactions."
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment