Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
b0499784
Commit
b0499784
authored
May 05, 2021
by
Alexander Lercher
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[SemanticLinking] Added youtube dummy upload
parent
271733ea
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
97 additions
and
0 deletions
+97
-0
.gitignore
.../app/dummy_upload/community-prediction-youtube/.gitignore
+2
-0
dummy_upload.py
...dummy_upload/community-prediction-youtube/dummy_upload.py
+60
-0
check_uc.py
tools/check-use-case-data/check_uc.py
+35
-0
No files found.
src/data-hub/semantic-linking-microservice/app/dummy_upload/community-prediction-youtube/.gitignore
0 → 100644
View file @
b0499784
# this file contains all the training data but is large.
videos.csv
\ No newline at end of file
src/data-hub/semantic-linking-microservice/app/dummy_upload/community-prediction-youtube/dummy_upload.py
0 → 100644
View file @
b0499784
import
csv
import
hashlib
import
sys
import
os
modules_paths
=
[
'.'
,
'../../../modules/'
]
for
modules_path
in
modules_paths
:
if
os
.
path
.
exists
(
modules_path
):
sys
.
path
.
insert
(
1
,
modules_path
)
from
messaging.MessageHandler
import
MessageHandler
from
db.repository
import
Repository
# file to read the data from
CSV_FILE
=
r'dummy_upload/community-prediction-youtube/videos.csv'
handler
=
MessageHandler
(
Repository
())
import
csv
import
json
from
datetime
import
datetime
from
typing
import
Iterator
import
pandas
as
pd
from
pandas
import
DataFrame
def
load_csv_content
()
->
Iterator
:
'''Returns a generator for all lines in the csv file with correct field types.'''
dfs
:
DataFrame
=
pd
.
read_csv
(
CSV_FILE
)
return
dfs
.
iterrows
()
def
upload_transaction
(
transaction
):
# manually flatten based on table mapping
uid
=
transaction
[
'video_id'
]
transaction
[
'UniqueID'
]
=
uid
transaction
[
'trend_delay'
]
=
transaction
[
'trend_duration'
]
transaction
[
'timestamp'
]
=
transaction
[
'trending_timestamp'
]
del
transaction
[
'trend_duration'
]
del
transaction
[
'trending_timestamp'
]
t
=
{
'use_case'
:
'community-prediction-youtube-n'
,
'table'
:
'community-prediction-youtube-n'
,
'id'
:
uid
,
'properties'
:
transaction
,
}
handler
.
handle_new_trace
(
t
)
if
__name__
==
'__main__'
:
entries
=
load_csv_content
()
for
idx
,
transaction
in
entries
:
transaction
=
transaction
.
to_dict
()
upload_transaction
(
transaction
)
if
idx
%
1000
==
0
:
print
(
f
"Progress: {str(float(idx) / 375942)}
%
"
)
\ No newline at end of file
tools/check-use-case-data/check_uc.py
0 → 100644
View file @
b0499784
import
requests
requests
.
packages
.
urllib3
.
disable_warnings
()
from
icecream
import
ic
uc
=
'community-prediction-youtube'
def
httpget
(
url
):
token
=
'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6InJlZ3VsYXJAaXRlYy5hYXUuYXQiLCJjcmVhdGVkX2F0IjoiMjAyMS0wNS0wNSAxMTozNjozOC4yMzAxODEiLCJ2YWxpZF91bnRpbCI6IjIwMjEtMDUtMDYgMTE6MzY6MzguMjMwMTgxIn0.Fz6iPpA0CnrXlOCj-VuCHFzc58H9Of2cBYHOb_RqvzI'
res
=
requests
.
get
(
url
,
verify
=
False
,
headers
=
{
"Authorization"
:
f
"Bearer {token}"
})
return
res
# list tables
res
=
httpget
(
url
=
f
'https://articonf1.itec.aau.at:30420/api/use-cases/{uc}/tables'
)
print
(
"Tables: "
,
[
entry
[
'name'
]
for
entry
in
res
.
json
()])
# count pushed data
def
count_data
(
json_res
,
table_identifier
=
'table'
):
tables
=
{}
for
entry
in
json_res
:
key
=
entry
[
table_identifier
]
if
key
not
in
tables
:
tables
[
key
]
=
0
tables
[
key
]
+=
1
ic
(
tables
)
res
=
httpget
(
url
=
f
'https://articonf1.itec.aau.at:30001/api/use_cases/{uc}/transactions'
)
count_data
(
res
.
json
())
res_f
=
httpget
(
url
=
f
'https://articonf1.itec.aau.at:30001/api/use_cases/{uc}/transactions-failed'
)
count_data
(
res_f
.
json
(),
'docType'
)
res_d
=
httpget
(
url
=
f
'https://articonf1.itec.aau.at:30001/api/use_cases/{uc}/transactions-duplicated'
)
count_data
(
res_d
.
json
())
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment