Skip to content

Commit

Permalink
dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
dknoma committed May 12, 2020
1 parent ee7a892 commit b4a91d0
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 0 deletions.
19 changes: 19 additions & 0 deletions data/combine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pandas as pd
df1 = pd.read_csv('steam_games_titles.csv', error_bad_lines=False)
df2 = pd.read_csv('steam_reviews.csv')

test = df2['title'].drop_duplicates().to_numpy()

# print(test)

df1 = df1.loc[df1['title'].isin(test)]

df1.drop(["url", "languages", "popular_tags", "game_details", "release_date", "developer", "publisher", "game_description", "achievements", "mature_content", "recent_reviews", "minimum_requirements", "recommended_requirements", "types", "desc_snippet", "original_price", "discount_price"], axis=1, inplace=True)
df2.drop(["funny", "is_early_access_review", "review"], axis=1, inplace=True)

df2 = df2[df2['date_posted'].dt.year != 2019]

df_final = df1.merge(df2, left_on = 'title', right_on = 'title')
# # df_final = pd.merge(df1, df2, how = 'inner', on = 'title')

df_final.to_csv('steam_combined_final.csv')
11 changes: 11 additions & 0 deletions data/reduce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import pandas as pd
df1 = pd.read_csv('steam_games_titles.csv', error_bad_lines=False)
df2 = pd.read_csv('steam_reviews.csv')

title = df2['title'].drop_duplicates().to_numpy()

df1 = df1.loc[df1['title'].isin(title)]

df1.drop(["url", "all_reviews", "genre", "languages", "popular_tags", "game_details", "release_date", "developer", "publisher", "game_description", "achievements", "mature_content", "recent_reviews", "minimum_requirements", "recommended_requirements", "types", "desc_snippet", "original_price", "discount_price"], axis=1, inplace=True)

df1.to_csv('steam_reduced_final.csv')
13 changes: 13 additions & 0 deletions dataset.html
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,19 @@ <h1>Steam Games Complete Dataset</h1>
<li>The title of the game.</li>
</ul></li>
</ol>

<blockquote>
<p>
"This data is made available under the Public Domain Dedication and License v1.0 whose full text can be found at: <a href="http://www.opendatacommons.org/licenses/pddl/1.0/">www.opendatacommons.org/licenses/pddl/1.0/</a>"
</p>
<cite>Resource retrieved from <a href="https://www.kaggle.com/trolukovich/steam-games-complete-dataset">
https://www.kaggle.com/trolukovich/steam-games-complete-dataset</a>, accessed May 5, 2020</cite>
</blockquote>
<div>
<h3>Note</h3>
<p>I included the reduced and combined datasets (steam_combined.final.csv and
steam_reduced_with_genre_no_reviews.json). Also included are the scripts made to do the wrangling.</p>
</div>
</div>
<!-- End page content -->
</div>
Expand Down

0 comments on commit b4a91d0

Please sign in to comment.