diff --git a/pom.xml b/pom.xml index 8169ff7..d7f932e 100644 --- a/pom.xml +++ b/pom.xml @@ -26,5 +26,10 @@ 4.7 test + + com.google.guava + guava + 21.0 + diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..9ffe9b0 --- /dev/null +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,71 @@ +package nearsoft.academy.bigdata.recommendation; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +import java.util.LinkedList; +import java.util.List; +import java.io.*; + +public class MovieRecommender { + private ReviewInfo reviewInfo; + + public MovieRecommender(String filePath) { + ReviewDataReader reader = new ReviewDataReader(); + + this.reviewInfo = new ReviewInfo(); + reader.readData(filePath, this.reviewInfo); + } + + /** + * @return Total number of reviews. + */ + public int getTotalReviews() { + return this.reviewInfo.getTotalReviews(); + } + + /** + * @return Total number of products. + */ + public int getTotalProducts() { + return this.reviewInfo.getProducts().size(); + } + + /** + * @return Total number of users. + */ + public int getTotalUsers() { + return this.reviewInfo.getUsers().size(); + } + + /** + * Gets 3 recommendations for a given user. + * + * @param userID The ID of the user. + * @return List of IDs of the recommended products. + */ + public List getRecommendationsForUser(String userID) throws IOException, TasteException { + DataModel model = new FileDataModel(new File("movies.csv")); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + + List recommendedTitles = new LinkedList(); // List of alphanumeric product IDs. + List recommendations = recommender.recommend(this.reviewInfo.getUserIndex(userID), 3); + + for (RecommendedItem recommendation : recommendations) { + recommendedTitles.add(this.reviewInfo.getProducts().get(recommendation.getItemID())); + } + + return recommendedTitles; + } + +} diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..da498a0 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -13,9 +13,10 @@ public class MovieRecommenderTest { @Test public void testDataInfo() throws IOException, TasteException { - //download movies.txt.gz from - // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + // download movies.txt.gz from + // http://snap.stanford.edu/data/web-Movies.html + // file must be placed in the root directory of this repository + MovieRecommender recommender = new MovieRecommender("movies.txt.gz"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers()); @@ -24,7 +25,6 @@ public void testDataInfo() throws IOException, TasteException { assertThat(recommendations, hasItem("B0002O7Y8U")); assertThat(recommendations, hasItem("B00004CQTF")); assertThat(recommendations, hasItem("B000063W82")); - } } diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/ReviewDataReader.java b/src/test/java/nearsoft/academy/bigdata/recommendation/ReviewDataReader.java new file mode 100644 index 0000000..bc7a5c0 --- /dev/null +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/ReviewDataReader.java @@ -0,0 +1,58 @@ +package nearsoft.academy.bigdata.recommendation; + +import java.io.*; +import java.util.zip.GZIPInputStream; + +public class ReviewDataReader { + + /** + * Reads the "movies.txt.gz" file. + * Generates a .csv file with the relevant information in the correct format. + * Loads the users, products and reviews info. + */ + public void readData(String filePath, ReviewInfo info) { + File file = new File(filePath); + + try { + InputStream in = new GZIPInputStream(new FileInputStream(file)); + Reader decoder = new InputStreamReader(in, "US-ASCII"); + BufferedReader br = new BufferedReader(decoder); + String line; + String productID = ""; + String userID = ""; + + File fout = new File("movies.csv"); + FileOutputStream fos = new FileOutputStream(fout); + BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos)); + String newLine; + + int productCounter = 0; + int userCounter = 0; + + while ((line = br.readLine()) != null) { + if (line.startsWith("product/productId")) { + productID = line.split(": ")[1]; + if (!info.containsProduct(productID)) { + info.putProduct(new Long(productCounter), productID); + productCounter++; + } + } else if (line.startsWith("review/userId")) { + userID = line.split(": ")[1]; + if (!info.containsUser(userID)) { + info.putUser(new Long(userCounter), userID); + userCounter++; + } + } else if (line.startsWith("review/score")) { + newLine = info.getUserIndex(userID) + "," + info.getProductIndex(productID) + "," + line.split(": ")[1]; + bw.write(newLine); + bw.newLine(); + info.setTotalReviews(info.getTotalReviews() + 1); + } + } + bw.close(); + + } catch (IOException e) { + e.printStackTrace(); + } + } +} diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/ReviewInfo.java b/src/test/java/nearsoft/academy/bigdata/recommendation/ReviewInfo.java new file mode 100644 index 0000000..ab1c0ce --- /dev/null +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/ReviewInfo.java @@ -0,0 +1,104 @@ +package nearsoft.academy.bigdata.recommendation; + +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; + +/** + * Stores the Amazon movie reviews information + */ +public class ReviewInfo { + + private BiMap users; // Collection of distinct users present in the reviews + private BiMap products; // Collection of distinct products present in the reviews + private int totalReviews; // Number of total reviews + + public ReviewInfo() { + this.users = HashBiMap.create(); + this.products = HashBiMap.create(); + this.totalReviews = 0; + } + + public BiMap getUsers() { + return users; + } + + public void setUsers(BiMap users) { + this.users = users; + } + + /** + * Checks if the specified user exists in the reviews + * + * @param userID Alphanumeric user ID + * @return TRUE if the user is found, FALSE otherwise + */ + public boolean containsUser(String userID) { + return this.users.containsValue(userID); + } + + /** + * Adds a new user to the users collection + * + * @param key BiMap key + * @param userID Alphanumeric user ID + */ + public void putUser(Long key, String userID) { + this.users.put(key, userID); + } + + /** + * Finds the index of the specified user in the users collection + * + * @param userID Alphanumeric user ID + * @return Index position of the user + */ + public Long getUserIndex(String userID) { + return this.users.inverse().get(userID); + } + + public BiMap getProducts() { + return products; + } + + public void setProducts(BiMap products) { + this.products = products; + } + + /** + * Checks if the specified product exists in the reviews + * + * @param productID Alphanumeric product ID + * @return TRUE if the product is found, FALSE otherwise + */ + public boolean containsProduct(String productID) { + return this.products.containsValue(productID); + } + + /** + * Adds a new product to the products collection + * + * @param key BiMap key + * @param productID Alphanumeric product ID + */ + public void putProduct(Long key, String productID) { + this.products.put(key, productID); + } + + /** + * Finds the index of the specified product in the product collection + * + * @param productID Alphanumeric product ID + * @return Index position of the product + */ + public Long getProductIndex(String productID) { + return this.products.inverse().get(productID); + } + + public int getTotalReviews() { + return totalReviews; + } + + public void setTotalReviews(int totalReviews) { + this.totalReviews = totalReviews; + } +}