diff --git a/cs105_lab2_apache_log.dbc b/cs105_lab2_apache_log.dbc index 8bdc3f8..112090f 100644 Binary files a/cs105_lab2_apache_log.dbc and b/cs105_lab2_apache_log.dbc differ diff --git a/cs105_lab2_apache_log.py b/cs105_lab2_apache_log.py index b24a202..f450f28 100644 --- a/cs105_lab2_apache_log.py +++ b/cs105_lab2_apache_log.py @@ -539,7 +539,7 @@ def parse_clf_time(s): paths_counts = (paths_df .select('path', 'count') - .map(lambda r: (r[0], r[1])) + .rdd.map(lambda r: (r[0], r[1])) .collect()) paths, counts = zip(*paths_counts) @@ -726,7 +726,7 @@ def parse_clf_time(s): # TEST Number of unique daily hosts (4c) daily_hosts_list = (daily_hosts_df - .map(lambda r: (r[0], r[1])) + .rdd.map(lambda r: (r[0], r[1])) .take(30)) Test.assertEquals(day_to_host_pair_df.count(), total_log_entries, 'incorrect row count for day_to_host_pair_df')