version bump to v0.4.0

rudeboybert · Feb 11, 2018 · 8c59082 · 8c59082
1 parent 9597a8a
commit 8c59082
Show file tree

Hide file tree

Showing 197 changed files with 2,068 additions and 415 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -4,7 +4,7 @@ Description: Datasets and code published by the data journalism website
     'FiveThirtyEight' available at <https://github.com/fivethirtyeight/data>. 
     Note that while we received guidance from editors at 'FiveThirtyEight', this 
     package is not officially published by 'FiveThirtyEight'.
-Version: 0.3.0.9000
+Version: 0.4.0
 Authors@R: c(
     person("Albert Y.", "Kim", email = "[email protected]", role = c("aut", "cre")),
     person("Chester", "Ismay", email = "[email protected]", role = "aut"),

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,7 @@
-# fivethirtyeight 0.3.0.9000
+# fivethirtyeight 0.4.0
 
-* Added new and missing data sets:
+* Package website now at <http://fivethirtyeight-r.netlify.com/>
+* Added new data sets:
     + By Meredith Manley: `ahca_polls`, `bachelorette`, `candy_rankings`, 
     `chess_transfers`, `generic_polllist`, `generic_topline`, `mlb_elo`, 
     `ratings`, `spi_global_rankings`, `spi_matches`, `tweets`
@@ -10,9 +11,20 @@
     `tv_hurricanes`, `tv_hurricanes_by_network`, `tv_states`, `sandy_311`,
     `senators`, `trump_approval_poll`, `trump_approval_trend`, 
     `trumpworld_issues`, `trumpworld_polls`, `twitter_presidents`, `undefeated`
-* Package website now at <http://fivethirtyeight-r.netlify.com/>
+* Note for now `mayweather_mcgregor_tweets`, `mlb_elo`, `nba_carmelo`,
+`nfl_elo`, `senators`, only include the first 10 rows of the data because of R
+package file size restrictions (< 5MB tarballs); code to download/process the
+entire dataset are included in help files. We hope to include full versions of
+these datasets in v0.5.0 of package using the `drat` package.
 * Converted all ordinal categorical variables to `factor(..., ordered = TRUE)`.
 Ex: `bechdel$clean_test`
+* Vignette notes:
+    + Added a vignette that has a link to all user-contributed vignettes, which 
+    areonly available on development version of package and on GitHub due to 
+    CRAN package size restrictions.
+    + For all datasets that have a user contributed vignette, add a link to it 
+    in the corresponding help/roxygen code file.
+* Added `CITATION`
 
 
 # fivethirtyeight 0.3.0

diff --git a/R/data_albert.R b/R/data_albert.R
@@ -139,7 +139,7 @@
 #'   \item{wine_servings}{Servings of wine in average serving sizes per person}
 #'   \item{total_litres_of_pure_alcohol}{Total litres of pure alcohol per person}
 #' }
-#' @source World Health Organisation, Global Information System on Alcohol and Health (GISAH), 2010.
+#' @source World Health Organization, Global Information System on Alcohol and Health (GISAH), 2010.
 #' @examples
 #' # To convert data frame to tidy data (long) format, run:
 #' library(tidyverse)
@@ -159,6 +159,8 @@
 #' The raw data behind the story
 #' "'Straight Outta Compton' Is The Rare Biopic Not About White Dudes"
 #' \url{https://fivethirtyeight.com/features/straight-outta-compton-is-the-rare-biopic-not-about-white-dudes/}.
+#' An analysis using this data was contributed by Pradeep Adhokshaja as a package vignette at
+#' \url{http://fivethirtyeight-r.netlify.com/articles/biopics.html}.
 #'
 #' @format A data frame with 761 rows representing movies and 14 variables:
 #' \describe{
@@ -187,6 +189,8 @@
 #' The raw data behind the story
 #' "A Statistical Analysis of the Work of Bob Ross"
 #' \url{https://fivethirtyeight.com/features/a-statistical-analysis-of-the-work-of-bob-ross/}.
+#' An analysis using this data was contributed by Jonathan Bouchet as a package vignette at
+#' \url{http://fivethirtyeight-r.netlify.com/articles/bob_ross.html}.
 #'
 #' @format A data frame with 403 rows representing episodes and 71 variables:
 #' \describe{
@@ -335,7 +339,7 @@
 #'   \item{employed_fulltime_yearround}{Employed at least 50 weeks (WKW == 1) and at least 35 hours (WKHP >= 35)}
 #'   \item{unemployed}{Number unemployed (ESR == 3)}
 #'   \item{unemployment_rate}{Unemployed / (Unemployed + Employed)}
-#'   \item{p25th}{25th percentile of earnigns}
+#'   \item{p25th}{25th percentile of earnings}
 #'   \item{median}{Median earnings of full-time, year-round workers}
 #'   \item{p75th}{75th percentile of earnings}
 #' }
@@ -362,15 +366,15 @@
 #'   \item{grad_employed_fulltime_yearround}{Employed at least 50 weeks (WKW == 1) and at least 35 hours (WKHP >= 35)}
 #'   \item{grad_unemployed}{Number unemployed (ESR == 3)}
 #'   \item{grad_unemployment_rate}{Unemployed / (Unemployed + Employed)}
-#'   \item{grad_p25th}{25th percentile of earnigns}
+#'   \item{grad_p25th}{25th percentile of earnings}
 #'   \item{grad_median}{Median earnings of full-time, year-round workers}
 #'   \item{grad_p75th}{75th percentile of earnings}
 #'   \item{nongrad_total}{Total number of people with major}
 #'   \item{nongrad_employed}{Number employed (ESR == 1 or 2)}
 #'   \item{nongrad_employed_fulltime_yearround}{Employed at least 50 weeks (WKW == 1) and at least 35 hours (WKHP >= 35)}
 #'   \item{nongrad_unemployed}{Number unemployed (ESR == 3)}
 #'   \item{nongrad_unemployment_rate}{Unemployed / (Unemployed + Employed)}
-#'   \item{nongrad_p25th}{25th percentile of earnigns}
+#'   \item{nongrad_p25th}{25th percentile of earnings}
 #'   \item{nongrad_median}{Median earnings of full-time, year-round workers}
 #'   \item{nongrad_p75th}{75th percentile of earnings}
 #'   \item{grad_share}{grad_total / (grad_total + nongrad_total)}
@@ -405,7 +409,7 @@
 #'   \item{employed_fulltime_yearround}{Employed at least 50 weeks (WKW == 1) and at least 35 hours (WKHP >= 35)}
 #'   \item{unemployed}{Number unemployed (ESR == 3)}
 #'   \item{unemployment_rate}{Unemployed / (Unemployed + Employed)}
-#'   \item{p25th}{25th percentile of earnigns}
+#'   \item{p25th}{25th percentile of earnings}
 #'   \item{median}{Median earnings of full-time, year-round workers}
 #'   \item{p75th}{75th percentile of earnings}
 #'   \item{college_jobs}{Number with job requiring a college degree}
@@ -424,6 +428,8 @@
 #' The raw data behind the story
 #' "Comic Books Are Still Made By Men, For Men And About Men"
 #' \url{https://fivethirtyeight.com/features/women-in-comic-books/}.
+#' An analysis using this data was contributed by Jonathan Bouchet as a package vignette at
+#' \url{http://fivethirtyeight-r.netlify.com/articles/comics_gender.html}.
 #'
 #' @format A data frame with 23272 rows representing characters and 16 variables:
 #' \describe{
@@ -476,7 +482,7 @@
 #'   \item{care_data}{How much, if at all, do you care about the debate over the use of the word "data" as a singular or plural noun?}
 #'   \item{care_proper_grammar}{In your opinion, how important or unimportant is proper use of grammar?}
 #' }
-#' @source See \url{https://github.com/fivethirtyeight/data/tree/master/comma-survey-data}.
+#' @source See \url{https://github.com/fivethirtyeight/data/tree/master/comma-survey}.
 "comma_survey"
 
 
@@ -537,9 +543,9 @@
 #'   \item{google_knowledge_occupation}{Their occupation or office, according to Google's Knowledge Graph or, if they're not in there, how Stewart introduced them on the program.}
 #'   \item{show}{Air date of episode. Not unique, as some shows had more than one guest}
 #'   \item{group}{A larger group designation for the occupation. For instance, us senators, us presidents, and former presidents are all under "politicians"}
-#'   \item{raw_guest_list}{The person or list of people who appeared on the show, according to Wikipedia. The GoogleKnowlege_Occupation only refers to one of them in a given row.}
+#'   \item{raw_guest_list}{The person or list of people who appeared on the show, according to Wikipedia. The GoogleKnowledge_Occupation only refers to one of them in a given row.}
 #' }
-#' @source Google Knowlege Graph, The Daily Show clip library, Wikipedia.
+#' @source Google Knowledge Graph, The Daily Show clip library, Wikipedia.
 "daily_show_guests"
 
 

diff --git a/R/data_chester.R b/R/data_chester.R
@@ -196,7 +196,7 @@
 #'   \item{gamenum}{Order of All-Star Game for the season (in years w/ multiple ASGs; set to 0 when only 1 per year)}
 #'   \item{gameid}{Game ID at Baseball-Reference.com}
 #'   \item{lgid}{League of All-Star team}
-#'   \item{startingpos}{Postion (according to baseball convention; 1=pitcher, 2=catcher, etc.) if starter}
+#'   \item{startingpos}{Position (according to baseball convention; 1=pitcher, 2=catcher, etc.) if starter}
 #'   \item{off600}{Estimate of offensive talent, in runs above league average per 600 plate appearances}
 #'   \item{def600}{Estimate of fielding talent, in runs above league average per 600 plate appearances}
 #'   \item{pitch200}{Estimate of pitching talent, in runs above league average per 200 innings pitched}
@@ -257,6 +257,8 @@
 #' The raw data behind the story
 #' "Projecting The Top 50 Players In The 2015 NBA Draft Class"
 #' \url{https://fivethirtyeight.com/features/projecting-the-top-50-players-in-the-2015-nba-draft-class/}.
+#' An analysis using this data was contributed by G. Elliott Morris as a package vignette at
+#' \url{http://fivethirtyeight-r.netlify.com/articles/nba.html}.
 #'
 #' @format A data frame with 1090 rows representing National Basketball Association players/prospects and 9 variables:
 #' \describe{
@@ -333,7 +335,7 @@
 #'   \item{name}{first initial.last name}
 #'   \item{team}{team at time of suspension}
 #'   \item{games}{number of games suspended (one regular season = 16 games)}
-#'   \item{category}{personal conduct, substance abuse, peformance enhancing drugs or in-game violence}
+#'   \item{category}{personal conduct, substance abuse, performance enhancing drugs or in-game violence}
 #'   \item{description}{description of suspension}
 #'   \item{year}{year of suspension}
 #'   \item{source}{news source}

diff --git a/R/data_jen.R b/R/data_jen.R
@@ -215,6 +215,9 @@
 #' The raw data behind the story
 #' "A Complete Catalog Of Every Time Someone Cursed Or Bled Out In A Quentin Tarantino Movie"
 #' \url{https://fivethirtyeight.com/features/complete-catalog-curses-deaths-quentin-tarantino-films/}.
+#' An analysis using this data was contributed by Olivia Barrows, Jojo Miller, and Jayla Nakayama
+#' as a package vignette at
+#' \url{http://fivethirtyeight-r.netlify.com/articles/tarantino_swears.html}.
 #'
 #' @format A data frame with 1894 rows representing curse/death instances and 4 variables:
 #' \describe{
@@ -303,7 +306,9 @@
 #' The raw data behind the story
 #' "The World's Favorite Donald Trump Tweets"
 #' \url{https://fivethirtyeight.com/features/the-worlds-favorite-donald-trump-tweets/}.
-#' Tweets posted on twitter by Donald Trump (@@realDonaldTrump)
+#' Tweets posted on twitter by Donald Trump (@@realDonaldTrump).
+#' An analysis using this data was contributed by Adam Spannbauer as a package vignette at
+#' \url{http://fivethirtyeight-r.netlify.com/articles/trump_twitter.html}.
 #'
 #' @format A data frame with 448 rows representing tweets and 3 variables:
 #' \describe{
@@ -461,7 +466,7 @@
 #'   \item{mehs}{Mehs}
 #'   \item{league_average_gpct}{League-average goose percentage}
 #'   \item{ppf}{Pitcher park factor}
-#'   \item{replacement_gpct}{Replacement-level goose pecentage}
+#'   \item{replacement_gpct}{Replacement-level goose percentage}
 #'   \item{gwar}{Goose Wins Above Replacement}
 #'   \item{key_retro}{Retrosheet unique player identifier}
 #' }