Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements MovieRecommender class #72

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,10 @@
<version>4.7</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>21.0</version>
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move version to properties

</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package nearsoft.academy.bigdata.recommendation;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

import java.util.LinkedList;
import java.util.List;
import java.io.*;

public class MovieRecommender {
private ReviewInfo reviewInfo;

public MovieRecommender(String filePath) {
ReviewDataReader reader = new ReviewDataReader();

this.reviewInfo = new ReviewInfo();
reader.readData(filePath, this.reviewInfo);
}

/**
* @return Total number of reviews.
*/
public int getTotalReviews() {
return this.reviewInfo.getTotalReviews();
}

/**
* @return Total number of products.
*/
public int getTotalProducts() {
return this.reviewInfo.getProducts().size();
}

/**
* @return Total number of users.
*/
public int getTotalUsers() {
return this.reviewInfo.getUsers().size();
}

/**
* Gets 3 recommendations for a given user.
*
* @param userID The ID of the user.
* @return List of IDs of the recommended products.
*/
public List<String> getRecommendationsForUser(String userID) throws IOException, TasteException {
DataModel model = new FileDataModel(new File("movies.csv"));
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);

List<String> recommendedTitles = new LinkedList<String>(); // List of alphanumeric product IDs.
List<RecommendedItem> recommendations = recommender.recommend(this.reviewInfo.getUserIndex(userID), 3);

for (RecommendedItem recommendation : recommendations) {
recommendedTitles.add(this.reviewInfo.getProducts().get(recommendation.getItemID()));
}

return recommendedTitles;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
public class MovieRecommenderTest {
@Test
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
// download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
// file must be placed in the root directory of this repository
MovieRecommender recommender = new MovieRecommender("movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());
Expand All @@ -24,7 +25,6 @@ public void testDataInfo() throws IOException, TasteException {
assertThat(recommendations, hasItem("B0002O7Y8U"));
assertThat(recommendations, hasItem("B00004CQTF"));
assertThat(recommendations, hasItem("B000063W82"));

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package nearsoft.academy.bigdata.recommendation;

import java.io.*;
import java.util.zip.GZIPInputStream;

public class ReviewDataReader {

/**
* Reads the "movies.txt.gz" file.
* Generates a .csv file with the relevant information in the correct format.
* Loads the users, products and reviews info.
*/
public void readData(String filePath, ReviewInfo info) {
File file = new File(filePath);

try {
InputStream in = new GZIPInputStream(new FileInputStream(file));
Reader decoder = new InputStreamReader(in, "US-ASCII");
BufferedReader br = new BufferedReader(decoder);
String line;
String productID = "";
String userID = "";

File fout = new File("movies.csv");
FileOutputStream fos = new FileOutputStream(fout);
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos));
String newLine;

int productCounter = 0;
int userCounter = 0;

while ((line = br.readLine()) != null) {
if (line.startsWith("product/productId")) {
productID = line.split(": ")[1];
if (!info.containsProduct(productID)) {
info.putProduct(new Long(productCounter), productID);
productCounter++;
}
} else if (line.startsWith("review/userId")) {
userID = line.split(": ")[1];
if (!info.containsUser(userID)) {
info.putUser(new Long(userCounter), userID);
userCounter++;
}
} else if (line.startsWith("review/score")) {
newLine = info.getUserIndex(userID) + "," + info.getProductIndex(productID) + "," + line.split(": ")[1];
bw.write(newLine);
bw.newLine();
info.setTotalReviews(info.getTotalReviews() + 1);
}
}
bw.close();

} catch (IOException e) {
e.printStackTrace();
}
}
}
104 changes: 104 additions & 0 deletions src/test/java/nearsoft/academy/bigdata/recommendation/ReviewInfo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package nearsoft.academy.bigdata.recommendation;

import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;

/**
* Stores the Amazon movie reviews information
*/
public class ReviewInfo {

private BiMap<Long, String> users; // Collection of distinct users present in the reviews
private BiMap<Long, String> products; // Collection of distinct products present in the reviews
private int totalReviews; // Number of total reviews

public ReviewInfo() {
this.users = HashBiMap.create();
this.products = HashBiMap.create();
this.totalReviews = 0;
}

public BiMap<Long, String> getUsers() {
return users;
}

public void setUsers(BiMap<Long, String> users) {
this.users = users;
}

/**
* Checks if the specified user exists in the reviews
*
* @param userID Alphanumeric user ID
* @return TRUE if the user is found, FALSE otherwise
*/
public boolean containsUser(String userID) {
return this.users.containsValue(userID);
}

/**
* Adds a new user to the users collection
*
* @param key BiMap key
* @param userID Alphanumeric user ID
*/
public void putUser(Long key, String userID) {
this.users.put(key, userID);
}

/**
* Finds the index of the specified user in the users collection
*
* @param userID Alphanumeric user ID
* @return Index position of the user
*/
public Long getUserIndex(String userID) {
return this.users.inverse().get(userID);
}

public BiMap<Long, String> getProducts() {
return products;
}

public void setProducts(BiMap<Long, String> products) {
this.products = products;
}

/**
* Checks if the specified product exists in the reviews
*
* @param productID Alphanumeric product ID
* @return TRUE if the product is found, FALSE otherwise
*/
public boolean containsProduct(String productID) {
return this.products.containsValue(productID);
}

/**
* Adds a new product to the products collection
*
* @param key BiMap key
* @param productID Alphanumeric product ID
*/
public void putProduct(Long key, String productID) {
this.products.put(key, productID);
}

/**
* Finds the index of the specified product in the product collection
*
* @param productID Alphanumeric product ID
* @return Index position of the product
*/
public Long getProductIndex(String productID) {
return this.products.inverse().get(productID);
}

public int getTotalReviews() {
return totalReviews;
}

public void setTotalReviews(int totalReviews) {
this.totalReviews = totalReviews;
}
}