diff --git a/pom.xml b/pom.xml
index 8169ff7..d7f932e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,5 +26,10 @@
4.7
test
+
+ com.google.guava
+ guava
+ 21.0
+
diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java
new file mode 100644
index 0000000..9ffe9b0
--- /dev/null
+++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java
@@ -0,0 +1,71 @@
+package nearsoft.academy.bigdata.recommendation;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
+import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
+import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
+import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.io.*;
+
+public class MovieRecommender {
+ private ReviewInfo reviewInfo;
+
+ public MovieRecommender(String filePath) {
+ ReviewDataReader reader = new ReviewDataReader();
+
+ this.reviewInfo = new ReviewInfo();
+ reader.readData(filePath, this.reviewInfo);
+ }
+
+ /**
+ * @return Total number of reviews.
+ */
+ public int getTotalReviews() {
+ return this.reviewInfo.getTotalReviews();
+ }
+
+ /**
+ * @return Total number of products.
+ */
+ public int getTotalProducts() {
+ return this.reviewInfo.getProducts().size();
+ }
+
+ /**
+ * @return Total number of users.
+ */
+ public int getTotalUsers() {
+ return this.reviewInfo.getUsers().size();
+ }
+
+ /**
+ * Gets 3 recommendations for a given user.
+ *
+ * @param userID The ID of the user.
+ * @return List of IDs of the recommended products.
+ */
+ public List getRecommendationsForUser(String userID) throws IOException, TasteException {
+ DataModel model = new FileDataModel(new File("movies.csv"));
+ UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
+ UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
+ UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
+
+ List recommendedTitles = new LinkedList(); // List of alphanumeric product IDs.
+ List recommendations = recommender.recommend(this.reviewInfo.getUserIndex(userID), 3);
+
+ for (RecommendedItem recommendation : recommendations) {
+ recommendedTitles.add(this.reviewInfo.getProducts().get(recommendation.getItemID()));
+ }
+
+ return recommendedTitles;
+ }
+
+}
diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
index 0d0b1fe..da498a0 100644
--- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
+++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
@@ -13,9 +13,10 @@
public class MovieRecommenderTest {
@Test
public void testDataInfo() throws IOException, TasteException {
- //download movies.txt.gz from
- // http://snap.stanford.edu/data/web-Movies.html
- MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
+ // download movies.txt.gz from
+ // http://snap.stanford.edu/data/web-Movies.html
+ // file must be placed in the root directory of this repository
+ MovieRecommender recommender = new MovieRecommender("movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());
@@ -24,7 +25,6 @@ public void testDataInfo() throws IOException, TasteException {
assertThat(recommendations, hasItem("B0002O7Y8U"));
assertThat(recommendations, hasItem("B00004CQTF"));
assertThat(recommendations, hasItem("B000063W82"));
-
}
}
diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/ReviewDataReader.java b/src/test/java/nearsoft/academy/bigdata/recommendation/ReviewDataReader.java
new file mode 100644
index 0000000..bc7a5c0
--- /dev/null
+++ b/src/test/java/nearsoft/academy/bigdata/recommendation/ReviewDataReader.java
@@ -0,0 +1,58 @@
+package nearsoft.academy.bigdata.recommendation;
+
+import java.io.*;
+import java.util.zip.GZIPInputStream;
+
+public class ReviewDataReader {
+
+ /**
+ * Reads the "movies.txt.gz" file.
+ * Generates a .csv file with the relevant information in the correct format.
+ * Loads the users, products and reviews info.
+ */
+ public void readData(String filePath, ReviewInfo info) {
+ File file = new File(filePath);
+
+ try {
+ InputStream in = new GZIPInputStream(new FileInputStream(file));
+ Reader decoder = new InputStreamReader(in, "US-ASCII");
+ BufferedReader br = new BufferedReader(decoder);
+ String line;
+ String productID = "";
+ String userID = "";
+
+ File fout = new File("movies.csv");
+ FileOutputStream fos = new FileOutputStream(fout);
+ BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos));
+ String newLine;
+
+ int productCounter = 0;
+ int userCounter = 0;
+
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("product/productId")) {
+ productID = line.split(": ")[1];
+ if (!info.containsProduct(productID)) {
+ info.putProduct(new Long(productCounter), productID);
+ productCounter++;
+ }
+ } else if (line.startsWith("review/userId")) {
+ userID = line.split(": ")[1];
+ if (!info.containsUser(userID)) {
+ info.putUser(new Long(userCounter), userID);
+ userCounter++;
+ }
+ } else if (line.startsWith("review/score")) {
+ newLine = info.getUserIndex(userID) + "," + info.getProductIndex(productID) + "," + line.split(": ")[1];
+ bw.write(newLine);
+ bw.newLine();
+ info.setTotalReviews(info.getTotalReviews() + 1);
+ }
+ }
+ bw.close();
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/ReviewInfo.java b/src/test/java/nearsoft/academy/bigdata/recommendation/ReviewInfo.java
new file mode 100644
index 0000000..ab1c0ce
--- /dev/null
+++ b/src/test/java/nearsoft/academy/bigdata/recommendation/ReviewInfo.java
@@ -0,0 +1,104 @@
+package nearsoft.academy.bigdata.recommendation;
+
+import com.google.common.collect.BiMap;
+import com.google.common.collect.HashBiMap;
+
+/**
+ * Stores the Amazon movie reviews information
+ */
+public class ReviewInfo {
+
+ private BiMap users; // Collection of distinct users present in the reviews
+ private BiMap products; // Collection of distinct products present in the reviews
+ private int totalReviews; // Number of total reviews
+
+ public ReviewInfo() {
+ this.users = HashBiMap.create();
+ this.products = HashBiMap.create();
+ this.totalReviews = 0;
+ }
+
+ public BiMap getUsers() {
+ return users;
+ }
+
+ public void setUsers(BiMap users) {
+ this.users = users;
+ }
+
+ /**
+ * Checks if the specified user exists in the reviews
+ *
+ * @param userID Alphanumeric user ID
+ * @return TRUE if the user is found, FALSE otherwise
+ */
+ public boolean containsUser(String userID) {
+ return this.users.containsValue(userID);
+ }
+
+ /**
+ * Adds a new user to the users collection
+ *
+ * @param key BiMap key
+ * @param userID Alphanumeric user ID
+ */
+ public void putUser(Long key, String userID) {
+ this.users.put(key, userID);
+ }
+
+ /**
+ * Finds the index of the specified user in the users collection
+ *
+ * @param userID Alphanumeric user ID
+ * @return Index position of the user
+ */
+ public Long getUserIndex(String userID) {
+ return this.users.inverse().get(userID);
+ }
+
+ public BiMap getProducts() {
+ return products;
+ }
+
+ public void setProducts(BiMap products) {
+ this.products = products;
+ }
+
+ /**
+ * Checks if the specified product exists in the reviews
+ *
+ * @param productID Alphanumeric product ID
+ * @return TRUE if the product is found, FALSE otherwise
+ */
+ public boolean containsProduct(String productID) {
+ return this.products.containsValue(productID);
+ }
+
+ /**
+ * Adds a new product to the products collection
+ *
+ * @param key BiMap key
+ * @param productID Alphanumeric product ID
+ */
+ public void putProduct(Long key, String productID) {
+ this.products.put(key, productID);
+ }
+
+ /**
+ * Finds the index of the specified product in the product collection
+ *
+ * @param productID Alphanumeric product ID
+ * @return Index position of the product
+ */
+ public Long getProductIndex(String productID) {
+ return this.products.inverse().get(productID);
+ }
+
+ public int getTotalReviews() {
+ return totalReviews;
+ }
+
+ public void setTotalReviews(int totalReviews) {
+ this.totalReviews = totalReviews;
+ }
+}