Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
688215a0
Commit
688215a0
authored
Dec 01, 2020
by
Alexander Lercher
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload of data set to semantic linking
parent
b1b2337a
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
98 additions
and
0 deletions
+98
-0
.gitignore
...ice/app/dummy_upload/community-prediction-taxi/.gitignore
+2
-0
dummy_upload.py
...pp/dummy_upload/community-prediction-taxi/dummy_upload.py
+96
-0
No files found.
src/data-hub/semantic-linking-microservice/app/dummy_upload/community-prediction-taxi/.gitignore
0 → 100644
View file @
688215a0
# this file contains all the training data but is around 1.8 GB.
train.csv
\ No newline at end of file
src/data-hub/semantic-linking-microservice/app/dummy_upload/community-prediction-taxi/dummy_upload.py
0 → 100644
View file @
688215a0
import
csv
import
hashlib
import
sys
import
os
modules_paths
=
[
'.'
,
'../../../modules/'
]
for
modules_path
in
modules_paths
:
if
os
.
path
.
exists
(
modules_path
):
sys
.
path
.
insert
(
1
,
modules_path
)
from
messaging.MessageHandler
import
MessageHandler
from
db.repository
import
Repository
# file to read the data from
CSV_FILE
=
r'dummy_upload/community-prediction-taxi/train.csv'
handler
=
MessageHandler
(
Repository
())
import
csv
import
json
from
datetime
import
datetime
from
typing
import
Iterator
enum_mapping
=
{
'A'
:
1
,
'B'
:
2
,
'C'
:
3
}
def
load_csv_content
()
->
Iterator
:
'''Returns a generator for all lines in the csv file with correct field types.'''
with
open
(
CSV_FILE
)
as
csv_file
:
reader
=
csv
.
reader
(
csv_file
)
headers
=
[
h
.
lower
()
for
h
in
next
(
reader
)]
for
line
in
reader
:
# convert line fields to correct type
for
i
in
range
(
len
(
headers
)):
# trip_id AS string
if
i
==
0
:
continue
# call_type, day_type
if
i
in
[
1
,
6
]:
line
[
i
]
=
enum_mapping
[
line
[
i
]]
# origin_call, origin_stand, taxi_id AS int
elif
i
in
[
2
,
3
,
4
]:
line
[
i
]
=
int
(
line
[
i
])
if
line
[
i
]
!=
""
else
""
# timestamp AS timestamp
elif
i
==
5
:
# datetime is not serializable
# line[i] = datetime.fromtimestamp(int(line[i]))
line
[
i
]
=
int
(
line
[
i
])
# missing_data AS bool
elif
i
==
7
:
line
[
i
]
=
line
[
i
]
.
lower
()
==
'true'
# polyline AS List[List[float]]
elif
i
==
8
:
line
[
i
]
=
json
.
loads
(
line
[
i
])
entry
=
dict
(
zip
(
headers
,
line
))
yield
entry
def
upload_transaction
(
transaction
):
# manually flatten based on table mapping
uid
=
transaction
[
'trip_id'
]
transaction
[
'UniqueID'
]
=
uid
if
len
(
transaction
[
'polyline'
])
==
0
:
print
(
f
"skipping transaction: {transaction}"
)
return
transaction
[
'start_location_lat'
]
=
transaction
[
'polyline'
][
0
][
0
]
transaction
[
'start_location_long'
]
=
transaction
[
'polyline'
][
0
][
1
]
transaction
[
'end_location_lat'
]
=
transaction
[
'polyline'
][
-
1
][
0
]
transaction
[
'end_location_long'
]
=
transaction
[
'polyline'
][
-
1
][
1
]
del
transaction
[
'trip_id'
]
del
transaction
[
'polyline'
]
t
=
{
'use_case'
:
'community-prediction-taxi'
,
'table'
:
'community-prediction-taxi'
,
'id'
:
uid
,
'properties'
:
transaction
,
}
handler
.
handle_new_trace
(
t
)
if
__name__
==
'__main__'
:
entries
=
load_csv_content
()
for
idx
,
transaction
in
enumerate
(
entries
):
upload_transaction
(
transaction
)
if
idx
%
1000
==
0
:
print
(
f
"Progress: {str(float(idx) / 1710671)}
%
"
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment