Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
dd9f898f
Commit
dd9f898f
authored
Feb 15, 2021
by
Bogdan
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/feauture/schema-reddit' into develop
parents
2f7e09be
d4ce41ed
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
274 additions
and
0 deletions
+274
-0
add_smart_reddit_schema.py
...p/_add_use_case_scripts/reddit/add_smart_reddit_schema.py
+37
-0
add_reddit.py
...ice/app/_add_use_case_scripts/reddit/tables/add_reddit.py
+175
-0
reddit_dataset.json
tools/reddit-upload/reddit_dataset.json
+1
-0
upload_data.py
tools/reddit-upload/upload_data.py
+61
-0
No files found.
src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/reddit/add_smart_reddit_schema.py
0 → 100644
View file @
dd9f898f
import
sys
import
os
from
pathlib
import
Path
from
typing
import
Dict
,
Any
import
requests
modules_path
=
'../../../modules/'
if
os
.
path
.
exists
(
modules_path
):
sys
.
path
.
insert
(
1
,
modules_path
)
import
network_constants
as
nc
from
security.token_manager
import
TokenManager
import
tables.add_reddit
as
reddit
def
add_use_case
(
use_case
:
str
):
jwt
=
TokenManager
.
getInstance
()
.
getToken
()
url
=
f
"https://articonf1.itec.aau.at:30420/api/use-cases"
response
=
requests
.
post
(
url
,
verify
=
False
,
proxies
=
{
"http"
:
None
,
"https"
:
None
},
headers
=
{
"Authorization"
:
f
"Bearer {jwt}"
},
json
=
{
"name"
:
use_case
}
)
print
(
url
+
": "
+
str
(
response
.
status_code
))
if
__name__
==
"__main__"
:
use_case
=
"reddit"
# disable ssl warnings :)
requests
.
packages
.
urllib3
.
disable_warnings
()
add_use_case
(
use_case
)
reddit
.
main
(
use_case
)
\ No newline at end of file
src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/reddit/tables/add_reddit.py
0 → 100644
View file @
dd9f898f
import
network_constants
as
nc
from
security.token_manager
import
TokenManager
import
requests
def
add_table
(
use_case
:
str
,
table_name
:
str
):
'''
take the columns and add the mappings at the server
replace all "/"'s in the internal representation with a "_"
'''
columns
=
{}
use_case
=
"reddit"
columns
=
[
# "doctype",
"id"
,
"user_id"
,
"title"
,
"content"
,
"permalink"
,
"upvotes"
,
"percentage_upvoted"
,
"n_comments"
,
"subreddit"
,
"created_at"
]
columns
=
{
c
:
c
for
c
in
columns
}
columns
[
"UniqueID"
]
=
"user_id+subreddit+id"
table
=
{
"name"
:
table_name
,
"mappings"
:
columns
}
url
=
f
"https://articonf1.itec.aau.at:30420/api/use-cases/{use_case}/tables"
jwt
=
TokenManager
.
getInstance
()
.
getToken
()
response
=
requests
.
post
(
url
,
verify
=
False
,
proxies
=
{
"http"
:
None
,
"https"
:
None
},
headers
=
{
"Authorization"
:
f
"Bearer {jwt}"
},
json
=
table
)
print
(
url
+
": "
+
str
(
response
.
status_code
))
def
add_layers
(
use_case
:
str
,
table_name
:
str
):
layers
=
[
# { #subreddit is string cannot cluster
# "use_case": use_case,
# "table": table_name,
# "name": "Subreddit_Layer",
# "properties": [
# "UniqueID",
# "subreddit",
# "user_id",
# "title",
# "content",
# "permalink",
# "upvotes",
# "percentage_upvoted",
# "n_comments"
# ],
# "cluster_properties": [
# "subreddit"
# ]
# },
{
"use_case"
:
use_case
,
"table"
:
table_name
,
"name"
:
"Upvotes_Layer"
,
#TODO Probably do something like Total Votes? so we can get a popularity?
"properties"
:
[
"UniqueID"
,
"subreddit"
,
"user_id"
,
"title"
,
"content"
,
"permalink"
,
"upvotes"
,
"percentage_upvoted"
,
"n_comments"
,
"created_at"
],
"cluster_properties"
:
[
"upvotes"
,
"percentage_upvoted"
]
},
{
"use_case"
:
use_case
,
"table"
:
table_name
,
"name"
:
"Percentage_Layer"
,
"properties"
:
[
"UniqueID"
,
"subreddit"
,
"user_id"
,
"title"
,
"content"
,
"permalink"
,
"upvotes"
,
"percentage_upvoted"
,
"n_comments"
,
"created_at"
],
"cluster_properties"
:
[
"percentage_upvoted"
]
},
{
"use_case"
:
use_case
,
"table"
:
table_name
,
"name"
:
"Engagement_Layer"
,
"properties"
:
[
"UniqueID"
,
"subreddit"
,
"user_id"
,
"title"
,
"content"
,
"permalink"
,
"upvotes"
,
"percentage_upvoted"
,
"n_comments"
,
"created_at"
],
"cluster_properties"
:
[
"n_comments"
]
},
{
"use_case"
:
use_case
,
"table"
:
table_name
,
"name"
:
"Time_Layer"
,
"properties"
:
[
"UniqueID"
,
"subreddit"
,
"user_id"
,
"title"
,
"content"
,
"permalink"
,
"upvotes"
,
"percentage_upvoted"
,
"n_comments"
,
"created_at"
],
"cluster_properties"
:
[
"created_at"
]
}
]
jwt
=
TokenManager
.
getInstance
()
.
getToken
()
for
layer
in
layers
:
url
=
f
"https://articonf1.itec.aau.at:30420/api/layers"
response
=
requests
.
post
(
url
,
verify
=
False
,
proxies
=
{
"http"
:
None
,
"https"
:
None
},
headers
=
{
"Authorization"
:
f
"Bearer {jwt}"
},
json
=
layer
)
print
(
url
+
": "
+
str
(
response
.
status_code
))
def
main
(
use_case
:
str
):
print
(
"reddit"
)
table_name
=
"reddit"
add_table
(
use_case
,
table_name
)
add_layers
(
use_case
,
table_name
)
tools/reddit-upload/reddit_dataset.json
0 → 100644
View file @
dd9f898f
This diff is collapsed.
Click to expand it.
tools/reddit-upload/upload_data.py
0 → 100644
View file @
dd9f898f
''' This script adds all data from BitYoga's csv to our pipeline.'''
import
csv
import
requests
import
sys
import
os
import
json
import
urllib3
urllib3
.
disable_warnings
(
urllib3
.
exceptions
.
InsecureRequestWarning
)
# modules_path = '../../../modules/'
# if os.path.exists(modules_path):
# sys.path.insert(1, modules_path)
# import network_constants as nc
# from security.token_manager import TokenManager
def
send_transaction_to_rest_gateway
(
transaction
:
dict
):
# token from Rest Gateway to authorize
JWT_TOKEN
=
'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMS0wMi0wOCAxMzo0NzoxOC40NzUxMjEiLCJ2YWxpZF91bnRpbCI6IjIwMjEtMDItMDkgMTM6NDc6MTguNDc1MTIxIn0.DWY9c0X2XQJDz0Ef35-k1IVY6GWf00ogaVOCeX8Irlo'
res
=
requests
.
post
(
url
=
'https://articonf1.itec.aau.at:30401/api/trace'
,
json
=
transaction
,
headers
=
{
"Authorization"
:
f
"Bearer {JWT_TOKEN}"
},
verify
=
False
# ignore ssl error
)
if
res
.
status_code
>=
400
:
raise
Exception
(
f
"Error while uploading: {str(res.content)}"
)
#print(res) Lots of spam
# file to read the data from
JSON_DATASET
=
r'reddit_dataset.json'
if
__name__
==
'__main__'
:
with
open
(
JSON_DATASET
,
'r'
)
as
json_file
:
# reader = csv.reader(file)
json_data_list
=
json
.
load
(
json_file
)
#titles = next(json_data)
summ
=
0
for
obj_dict
in
json_data_list
:
transaction
=
{}
transaction
[
'ApplicationType'
]
=
'reddit'
transaction
[
'docType'
]
=
'reddit'
for
key
,
value
in
obj_dict
.
items
():
transaction
[
key
]
=
value
send_transaction_to_rest_gateway
(
transaction
)
summ
+=
1
if
(
summ
%
1000
==
0
):
print
(
"Uploaded "
+
str
(
summ
)
+
" transactions."
)
print
(
"TOTAL Uploaded "
+
str
(
summ
)
+
" transactions."
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment