Commit d4ce41ed authored by Bogdan's avatar Bogdan

Updated the schema

parent 17f6cf8b
......@@ -23,7 +23,8 @@ def add_table(use_case: str, table_name: str):
"upvotes",
"percentage_upvoted",
"n_comments",
"subreddit"
"subreddit",
"created_at"
]
columns = { c : c for c in columns }
......@@ -50,10 +51,29 @@ def add_table(use_case: str, table_name: str):
def add_layers(use_case:str, table_name: str):
layers = [
# { #subreddit is string cannot cluster
# "use_case": use_case,
# "table": table_name,
# "name": "Subreddit_Layer",
# "properties": [
# "UniqueID",
# "subreddit",
# "user_id",
# "title",
# "content",
# "permalink",
# "upvotes",
# "percentage_upvoted",
# "n_comments"
# ],
# "cluster_properties": [
# "subreddit"
# ]
# },
{
"use_case": use_case,
"table": table_name,
"name": "Subreddit_Layer",
"name": "Upvotes_Layer", #TODO Probably do something like Total Votes? so we can get a popularity?
"properties": [
"UniqueID",
"subreddit",
......@@ -63,35 +83,18 @@ def add_layers(use_case:str, table_name: str):
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments"
"n_comments",
"created_at"
],
"cluster_properties": [
"subreddit"
]
},
{
"use_case": use_case,
"table": table_name,
"name": "User_Layer",
"properties": [
"UniqueID",
"subreddit",
"user_id",
"title",
"content",
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments"
],
"cluster_properties": [
"user_id"
"percentage_upvoted"
]
},
{
"use_case": use_case,
"table": table_name,
"name": "Upvotes_Layer", #TODO Probably do something like Total Votes? so we can get a popularity?
"name": "Percentage_Layer",
"properties": [
"UniqueID",
"subreddit",
......@@ -101,17 +104,17 @@ def add_layers(use_case:str, table_name: str):
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments"
"n_comments",
"created_at"
],
"cluster_properties": [
"upvotes",
"percentage_upvoted"
]
},
{
"use_case": use_case,
"table": table_name,
"name": "Percentage_Layer",
"name": "Engagement_Layer",
"properties": [
"UniqueID",
"subreddit",
......@@ -121,16 +124,17 @@ def add_layers(use_case:str, table_name: str):
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments"
"n_comments",
"created_at"
],
"cluster_properties": [
"percentage_upvoted"
"n_comments"
]
},
{
"use_case": use_case,
"table": table_name,
"name": "Engagement_Layer",
"name": "Time_Layer",
"properties": [
"UniqueID",
"subreddit",
......@@ -140,10 +144,11 @@ def add_layers(use_case:str, table_name: str):
"permalink",
"upvotes",
"percentage_upvoted",
"n_comments"
"n_comments",
"created_at"
],
"cluster_properties": [
"n_comments"
"created_at"
]
}
]
......
This diff is collapsed.
[{"id": "lat2of", "user_id": "JamesKBoyd", "title": "California man falls to his death while canyoneering at Death Valley", "content": "", "permalink": "/r/news/comments/lat2of/california_man_falls_to_his_death_while/", "upvotes": 0, "percentage_upvoted": 0.5, "n_comments": 3, "subreddit": "news"}, {"id": "lasy7g", "user_id": "watercolornightmares", "title": "How Rich Hospitals Profit From Patients in Car Crashes", "content": "", "permalink": "/r/news/comments/lasy7g/how_rich_hospitals_profit_from_patients_in_car/", "upvotes": 2, "percentage_upvoted": 0.57, "n_comments": 3, "subreddit": "news"}, {"id": "lasrjq", "user_id": "wilmots1", "title": "Moscow court hears case for jailing Putin critic Navalny", "content": "", "permalink": "/r/news/comments/lasrjq/moscow_court_hears_case_for_jailing_putin_critic/", "upvotes": 9, "percentage_upvoted": 0.84, "n_comments": 0, "subreddit": "news"}]
\ No newline at end of file
......@@ -30,7 +30,7 @@ def send_transaction_to_rest_gateway(transaction: dict):
if res.status_code >= 400:
raise Exception(f"Error while uploading: {str(res.content)}")
print(res)
#print(res) Lots of spam
# file to read the data from
JSON_DATASET = r'reddit_dataset.json'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment