-
Notifications
You must be signed in to change notification settings - Fork 97
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implements MovieRecommender class #72
Open
ghost
wants to merge
7
commits into
rilopez:master
Choose a base branch
from
unknown repository
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
07ce8dd
Test pass check (with faked results).
cc4eaa7
Open file, read and print first 20 lines
50e84c2
Generate .csv file with required information
250221c
Computes and prints recommendations
4242ed2
Passes test successfully
8a1d551
Added comments and cleaned up code
5e9616b
Code refactoring
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
71 changes: 71 additions & 0 deletions
71
src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
package nearsoft.academy.bigdata.recommendation; | ||
|
||
import org.apache.mahout.cf.taste.common.TasteException; | ||
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; | ||
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; | ||
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; | ||
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; | ||
import org.apache.mahout.cf.taste.model.DataModel; | ||
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; | ||
import org.apache.mahout.cf.taste.recommender.RecommendedItem; | ||
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; | ||
import org.apache.mahout.cf.taste.similarity.UserSimilarity; | ||
|
||
import java.util.LinkedList; | ||
import java.util.List; | ||
import java.io.*; | ||
|
||
public class MovieRecommender { | ||
private ReviewInfo reviewInfo; | ||
|
||
public MovieRecommender(String filePath) { | ||
ReviewDataReader reader = new ReviewDataReader(); | ||
|
||
this.reviewInfo = new ReviewInfo(); | ||
reader.readData(filePath, this.reviewInfo); | ||
} | ||
|
||
/** | ||
* @return Total number of reviews. | ||
*/ | ||
public int getTotalReviews() { | ||
return this.reviewInfo.getTotalReviews(); | ||
} | ||
|
||
/** | ||
* @return Total number of products. | ||
*/ | ||
public int getTotalProducts() { | ||
return this.reviewInfo.getProducts().size(); | ||
} | ||
|
||
/** | ||
* @return Total number of users. | ||
*/ | ||
public int getTotalUsers() { | ||
return this.reviewInfo.getUsers().size(); | ||
} | ||
|
||
/** | ||
* Gets 3 recommendations for a given user. | ||
* | ||
* @param userID The ID of the user. | ||
* @return List of IDs of the recommended products. | ||
*/ | ||
public List<String> getRecommendationsForUser(String userID) throws IOException, TasteException { | ||
DataModel model = new FileDataModel(new File("movies.csv")); | ||
UserSimilarity similarity = new PearsonCorrelationSimilarity(model); | ||
UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); | ||
UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); | ||
|
||
List<String> recommendedTitles = new LinkedList<String>(); // List of alphanumeric product IDs. | ||
List<RecommendedItem> recommendations = recommender.recommend(this.reviewInfo.getUserIndex(userID), 3); | ||
|
||
for (RecommendedItem recommendation : recommendations) { | ||
recommendedTitles.add(this.reviewInfo.getProducts().get(recommendation.getItemID())); | ||
} | ||
|
||
return recommendedTitles; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
58 changes: 58 additions & 0 deletions
58
src/test/java/nearsoft/academy/bigdata/recommendation/ReviewDataReader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package nearsoft.academy.bigdata.recommendation; | ||
|
||
import java.io.*; | ||
import java.util.zip.GZIPInputStream; | ||
|
||
public class ReviewDataReader { | ||
|
||
/** | ||
* Reads the "movies.txt.gz" file. | ||
* Generates a .csv file with the relevant information in the correct format. | ||
* Loads the users, products and reviews info. | ||
*/ | ||
public void readData(String filePath, ReviewInfo info) { | ||
File file = new File(filePath); | ||
|
||
try { | ||
InputStream in = new GZIPInputStream(new FileInputStream(file)); | ||
Reader decoder = new InputStreamReader(in, "US-ASCII"); | ||
BufferedReader br = new BufferedReader(decoder); | ||
String line; | ||
String productID = ""; | ||
String userID = ""; | ||
|
||
File fout = new File("movies.csv"); | ||
FileOutputStream fos = new FileOutputStream(fout); | ||
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos)); | ||
String newLine; | ||
|
||
int productCounter = 0; | ||
int userCounter = 0; | ||
|
||
while ((line = br.readLine()) != null) { | ||
if (line.startsWith("product/productId")) { | ||
productID = line.split(": ")[1]; | ||
if (!info.containsProduct(productID)) { | ||
info.putProduct(new Long(productCounter), productID); | ||
productCounter++; | ||
} | ||
} else if (line.startsWith("review/userId")) { | ||
userID = line.split(": ")[1]; | ||
if (!info.containsUser(userID)) { | ||
info.putUser(new Long(userCounter), userID); | ||
userCounter++; | ||
} | ||
} else if (line.startsWith("review/score")) { | ||
newLine = info.getUserIndex(userID) + "," + info.getProductIndex(productID) + "," + line.split(": ")[1]; | ||
bw.write(newLine); | ||
bw.newLine(); | ||
info.setTotalReviews(info.getTotalReviews() + 1); | ||
} | ||
} | ||
bw.close(); | ||
|
||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} | ||
} | ||
} |
104 changes: 104 additions & 0 deletions
104
src/test/java/nearsoft/academy/bigdata/recommendation/ReviewInfo.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
package nearsoft.academy.bigdata.recommendation; | ||
|
||
import com.google.common.collect.BiMap; | ||
import com.google.common.collect.HashBiMap; | ||
|
||
/** | ||
* Stores the Amazon movie reviews information | ||
*/ | ||
public class ReviewInfo { | ||
|
||
private BiMap<Long, String> users; // Collection of distinct users present in the reviews | ||
private BiMap<Long, String> products; // Collection of distinct products present in the reviews | ||
private int totalReviews; // Number of total reviews | ||
|
||
public ReviewInfo() { | ||
this.users = HashBiMap.create(); | ||
this.products = HashBiMap.create(); | ||
this.totalReviews = 0; | ||
} | ||
|
||
public BiMap<Long, String> getUsers() { | ||
return users; | ||
} | ||
|
||
public void setUsers(BiMap<Long, String> users) { | ||
this.users = users; | ||
} | ||
|
||
/** | ||
* Checks if the specified user exists in the reviews | ||
* | ||
* @param userID Alphanumeric user ID | ||
* @return TRUE if the user is found, FALSE otherwise | ||
*/ | ||
public boolean containsUser(String userID) { | ||
return this.users.containsValue(userID); | ||
} | ||
|
||
/** | ||
* Adds a new user to the users collection | ||
* | ||
* @param key BiMap key | ||
* @param userID Alphanumeric user ID | ||
*/ | ||
public void putUser(Long key, String userID) { | ||
this.users.put(key, userID); | ||
} | ||
|
||
/** | ||
* Finds the index of the specified user in the users collection | ||
* | ||
* @param userID Alphanumeric user ID | ||
* @return Index position of the user | ||
*/ | ||
public Long getUserIndex(String userID) { | ||
return this.users.inverse().get(userID); | ||
} | ||
|
||
public BiMap<Long, String> getProducts() { | ||
return products; | ||
} | ||
|
||
public void setProducts(BiMap<Long, String> products) { | ||
this.products = products; | ||
} | ||
|
||
/** | ||
* Checks if the specified product exists in the reviews | ||
* | ||
* @param productID Alphanumeric product ID | ||
* @return TRUE if the product is found, FALSE otherwise | ||
*/ | ||
public boolean containsProduct(String productID) { | ||
return this.products.containsValue(productID); | ||
} | ||
|
||
/** | ||
* Adds a new product to the products collection | ||
* | ||
* @param key BiMap key | ||
* @param productID Alphanumeric product ID | ||
*/ | ||
public void putProduct(Long key, String productID) { | ||
this.products.put(key, productID); | ||
} | ||
|
||
/** | ||
* Finds the index of the specified product in the product collection | ||
* | ||
* @param productID Alphanumeric product ID | ||
* @return Index position of the product | ||
*/ | ||
public Long getProductIndex(String productID) { | ||
return this.products.inverse().get(productID); | ||
} | ||
|
||
public int getTotalReviews() { | ||
return totalReviews; | ||
} | ||
|
||
public void setTotalReviews(int totalReviews) { | ||
this.totalReviews = totalReviews; | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move version to properties