From 625516612db5ff76574ac223de6cf21744170e3d Mon Sep 17 00:00:00 2001 From: gcohenfr Date: Tue, 21 Nov 2023 07:47:55 -0800 Subject: [PATCH] tutorial12 --- materials/tutorial_12/tests_tutorial_12.R | 316 +++++ materials/tutorial_12/tutorial_12.ipynb | 1516 +++++++++++++++++++++ 2 files changed, 1832 insertions(+) create mode 100644 materials/tutorial_12/tests_tutorial_12.R create mode 100644 materials/tutorial_12/tutorial_12.ipynb diff --git a/materials/tutorial_12/tests_tutorial_12.R b/materials/tutorial_12/tests_tutorial_12.R new file mode 100644 index 0000000..965565b --- /dev/null +++ b/materials/tutorial_12/tests_tutorial_12.R @@ -0,0 +1,316 @@ +library(digest) +library(testthat) + +test_1.1 <- function() { + test_that('Did not assign answer to an object called "Crabs_vs_width_scatterplot"', { + expect_true(exists("Crabs_vs_width_scatterplot")) + }) + + test_that("Solution should be a ggplot object", { + expect_true(is.ggplot(Crabs_vs_width_scatterplot)) + }) + + properties <- c(Crabs_vs_width_scatterplot$layers[[1]]$mapping, Crabs_vs_width_scatterplot$mapping) + + test_that("Plot should have width on the x-axis", { + expect_true("width" == rlang::get_expr(properties$x)) + }) + + test_that("Plot does not have the correct layers", { + expect_true("GeomPoint" %in% class(Crabs_vs_width_scatterplot$layers[[1]]$geom)) + + # Remove if not needed: + # expect_true("GeomVline" %in% class(Crabs_vs_width_scatterplot$layers[[2]]$geom)) + }) + + test_that("Plot does not use the correct data", { + expect_equal(digest(nrow(Crabs_vs_width_scatterplot$data)), "8c2afe893b01f3a8c63e1aef3b5aad9e") + expect_equal(digest(round(sum(Crabs_vs_width_scatterplot$data$width))), "17a701ffb51e7429bcd57678dd80b402") + + # If width is not known: + # expect_equal(digest(round(sum(pull(Crabs_vs_width_scatterplot$data, rlang::get_expr(properties$x))))), "HASH_HERE") + }) + + test_that("x-axis label should be descriptive and human readable", { + expect_false(Crabs_vs_width_scatterplot$labels$x == toString(rlang::get_expr(properties$x))) + }) + + test_that("Plot should have a title", { + expect_true("title" %in% names(Crabs_vs_width_scatterplot$labels)) + }) + + print("Success!") +} + +test_1.2 <- function() { + test_that('Did not assign answer to an object called "Crabs_group_avg_width"', { + expect_true(exists("Crabs_group_avg_width")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(Crabs_group_avg_width)) + }) + + expected_colnames <- c("width_intervals", "mean_n_males") + given_colnames <- colnames(Crabs_group_avg_width) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(Crabs_group_avg_width))), "71db8a6cad03244e6e50f0ad8bc95a65") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(Crabs_group_avg_width$mean_n_males) * 10e4)), "12273d78c0faff1ea67c8f8e9c42edd0") + }) + + print("Success!") +} + +test_1.3 <- function() { + test_that('Did not assign answer to an object called "Crabs_avg_width_scatterplot"', { + expect_true(exists("Crabs_avg_width_scatterplot")) + }) + + test_that("Solution should be a ggplot object", { + expect_true(is.ggplot(Crabs_avg_width_scatterplot)) + }) + + properties <- c(Crabs_avg_width_scatterplot$layers[[1]]$mapping, Crabs_avg_width_scatterplot$mapping) + + test_that("Plot should have width_intervals on the x-axis", { + expect_true("width_intervals" == rlang::get_expr(properties$x)) + }) + + test_that("Plot does not have the correct layers", { + expect_true("GeomPoint" %in% class(Crabs_avg_width_scatterplot$layers[[1]]$geom)) + + # Remove if not needed: + # expect_true("GeomVline" %in% class(Crabs_avg_width_scatterplot$layers[[2]]$geom)) + }) + + test_that("Plot does not use the correct data", { + expect_equal(digest(nrow(Crabs_avg_width_scatterplot$data)), "71db8a6cad03244e6e50f0ad8bc95a65") + expect_equal(digest(round(sum(Crabs_avg_width_scatterplot$data$mean_n_males))), "e1d9279a9999b2d3bb972ab3267b49c7") + + # If width_intervals is not known: + # expect_equal(digest(round(sum(pull(Crabs_avg_width_scatterplot$data, rlang::get_expr(properties$x))))), "HASH_HERE") + }) + + test_that("x-axis label should be descriptive and human readable", { + expect_false(Crabs_avg_width_scatterplot$labels$x == toString(rlang::get_expr(properties$x))) + }) + + test_that("Plot should have a title", { + expect_true("title" %in% names(Crabs_avg_width_scatterplot$labels)) + }) + + print("Success!") +} + +test_1.4 <- function() { + test_that('Did not assign answer to an object called "answer1.4"', { + expect_true(exists("answer1.4")) + }) + + test_that('Solution should be a single character ("A", "B", or "C")', { + expect_match(answer1.4, "a|b|c", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer1.4)) + + test_that("Solution is incorrect", { + expect_equal(answer_hash, "127a2ec00989b9f7faf671ed470be7f8") + }) + + print("Success!") +} + +test_1.5 <- function() { + test_that('Did not assign answer to an object called "Crabs_vs_width_scatterplot"', { + expect_true(exists("Crabs_vs_width_scatterplot")) + }) + + test_that("Solution should be a ggplot object", { + expect_true(is.ggplot(Crabs_vs_width_scatterplot)) + }) + + properties <- c(Crabs_vs_width_scatterplot$layers[[1]]$mapping, Crabs_vs_width_scatterplot$mapping) + + test_that("Plot should have width on the x-axis", { + expect_true("width" == rlang::get_expr(properties$x)) + }) + + test_that("Plot does not have the correct layers", { + expect_true("GeomPoint" %in% class(Crabs_vs_width_scatterplot$layers[[1]]$geom)) + }) + + + test_that("Plot does not use the correct data", { + expect_equal(digest(nrow(Crabs_vs_width_scatterplot$data)), "8c2afe893b01f3a8c63e1aef3b5aad9e") + expect_equal(digest(round(sum(Crabs_vs_width_scatterplot$data$width))), "17a701ffb51e7429bcd57678dd80b402") + + # If width is not known: + # expect_equal(digest(round(sum(pull(Crabs_vs_width_scatterplot$data, rlang::get_expr(properties$x))))), "HASH_HERE") + }) + + test_that("x-axis label should be descriptive and human readable", { + expect_false(Crabs_vs_width_scatterplot$labels$x == toString(rlang::get_expr(properties$x))) + }) + + test_that("Plot should have a title", { + expect_true("title" %in% names(Crabs_vs_width_scatterplot$labels)) + }) + + print("Success!") +} + + +test_1.6 <- function() { + test_that('Did not assign answer to an object called "crabs_Poisson_model"', { + expect_true(exists("crabs_Poisson_model")) + }) + + test_that("Solution should be a glm object", { + expect_true("glm" %in% class(crabs_Poisson_model)) + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(crabs_Poisson_model$residuals) * 10e4)), "5ab35ebc157c4f75476569c445d5a1cc") + }) + + print("Success!") +} + +test_1.7 <- function() { + test_that('Did not assign answer to an object called "crabs_Poisson_model_results"', { + expect_true(exists("crabs_Poisson_model_results")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(crabs_Poisson_model_results)) + }) + + expected_colnames <- c('term','estimate','std.error','statistic','p.value','conf.low','conf.high') + + + given_colnames <- colnames(crabs_Poisson_model_results) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(crabs_Poisson_model_results))), "dd4ad37ee474732a009111e3456e7ed7") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(crabs_Poisson_model_results$conf.low) * 10e6)), "79cef645627327fe2632a29728949fa8") + expect_equal(digest(as.integer(sum(crabs_Poisson_model_results$conf.high) * 10e6)), "b2f64ff96ebb309861a640e306397224") + expect_equal(digest(as.integer(sum(crabs_Poisson_model_results$statistic) * 10e6)), "09ad422316ee480535d0735aedfb2543") + }) + + print("Success!") +} + +test_1.8 <- function() { + test_that('Did not assign answer to an object called "crabs_Poisson_model_results"', { + expect_true(exists("crabs_Poisson_model_results")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(crabs_Poisson_model_results)) + }) + + expected_colnames <- c('term','estimate','std.error','statistic','p.value','conf.low','conf.high', 'exp.estimate', 'exp.conf.low', 'exp.conf.high') + + given_colnames <- colnames(crabs_Poisson_model_results) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(crabs_Poisson_model_results))), "dd4ad37ee474732a009111e3456e7ed7") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(crabs_Poisson_model_results$exp.estimate) * 10e6)), "dd50342cffd6d0cde245787cbdd97021") + expect_equal(digest(as.integer(sum(crabs_Poisson_model_results$exp.conf.low) * 10e6)), "5aa9e6879f68968bdb958bdc1a4cdf32") + expect_equal(digest(as.integer(sum(crabs_Poisson_model_results$exp.conf.high) * 10e6)), "b6866c669fd240e038b16d7e35b90592") + }) + + print("Success!") +} + +test_1.9 <- function() { + test_that('Did not assign answer to an object called "answer1.9"', { + expect_true(exists("answer1.9")) + }) + + answer_hash <- digest(tolower(answer1.9)) + test_that("Solution is incorrect", { + expect_equal(answer_hash, "fd4d64bc84d8d1ac10b94c23bda1a016") + }) + + print("Success!") +} + +test_1.10 <- function() { + test_that('Did not assign answer to an object called "answer1.10"', { + expect_true(exists("answer1.10")) + }) + + test_that('Solution should be a single character ("A", "B", "C", or "D")', { + expect_match(answer1.10, "a|b|c|d", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer1.10)) + test_that("Solution is incorrect", { + expect_equal(answer_hash, "6e7a8c1c098e8817e3df3fd1b21149d1") + }) + + print("Success!") +} + +test_1.11 <- function() { + test_that('Did not assign answer to an object called "answer1.11"', { + expect_true(exists("answer1.11")) + }) + + test_that('Solution should be a single character ("A", "B", "C", or "D")', { + expect_match(answer1.11, "a|b|c|d", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer1.11)) + test_that("Solution is incorrect", { + expect_equal(answer_hash, "ddf100612805359cd81fdc5ce3b9fbba") + }) + + print("Success!") +} + +test_1.12 <- function() { + test_that('Did not assign answer to an object called "answer1.12"', { + expect_true(exists("answer1.12")) + }) + + answer_as_numeric <- as.numeric(answer1.12) + test_that("Solution should be a number", { + expect_false(is.na(answer_as_numeric)) + }) + + test_that("Solution is incorrect", { + expect_equal(digest(as.integer(answer_as_numeric * 10e6)), "07660fdc17d69e9b645c10b4a1f810be") + }) + + print("Success!") +} diff --git a/materials/tutorial_12/tutorial_12.ipynb b/materials/tutorial_12/tutorial_12.ipynb new file mode 100644 index 0000000..1130586 --- /dev/null +++ b/materials/tutorial_12/tutorial_12.ipynb @@ -0,0 +1,1516 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "2d0f31d79866375b7f3161d0277a32a3", + "grade": false, + "grade_id": "cell-f1e1d845873036f4", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "# Tutorial 12: Binary or Discrete Counts Responses" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d94e81925b1456028fb504d00e3a9284", + "grade": false, + "grade_id": "cell-82d9926086d47a80", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "#### Lecture and Tutorial Learning Goals:\n", + "After completing this week's lecture and tutorial work, you will be able to:\n", + "\n", + "1. Describe the logistic regression estimation procedure (categorical data as the response variable and explanatory variables), and Poisson regression estimation procedure (discrete counts as the response variable and explanatory variables).\n", + "2. Discuss the relationship between linear regression and logistic and Poisson regression. Discuss the consequences of modeling data that is more suitable for logistic and Poisson regression models as a linear regression model.\n", + "3. Interpret the coefficients and $p$-values in the logistic and Poisson regression settings.\n", + "4. Discuss useful diagnostics for logistic and Poisson regression and explain why they should be performed.\n", + "5. Write a computer script to perform logistic and Poisson regression and perform model diagnostics. Interpret and communicate the results from that computer script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "6dd126a878384f00ec2c6c116565c9ff", + "grade": false, + "grade_id": "cell-a2a153352bc44a68", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Run this cell before continuing.\n", + "library(tidyverse)\n", + "library(repr)\n", + "library(infer)\n", + "library(gridExtra)\n", + "library(mlbench)\n", + "library(AER)\n", + "library(ISLR)\n", + "library(broom)\n", + "library(qqplotr)\n", + "library(performance)\n", + "library(see)\n", + "library(MASS)\n", + "library(glmbb)\n", + "library(cowplot)\n", + "source(\"tests_tutorial_12.R\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d500b40fbbd8d191dc155e0b17c231df", + "grade": false, + "grade_id": "cell-801b28434bf00853", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "## 1. Poisson Regression\n", + "\n", + "Let us proceed with Poisson regression. This class of GLM is intended for cases where the response is a count (i.e., an integer). What do we mean by this? Firstly, we will load the dataset to be used in this section. The data frame `crabs` ([Brockmann, 1996](https://ubc.summon.serialssolutions.com/2.0.0/link/0/eLvHCXMwrV3JasMwEBUlpZBLl7Sl6YY_oE4sy5sgFEpICKU9NadcjFYSmtghCyRf0t-tJC_EPhRaepMHS8jSSPM0M3oGALkdx67tCYRyKV0GacCp9CBX60RCySMcYiigy2qpOq_F1ZiMLqL0v-mFYrZvvd4JXXcPEnOUZbU1z6W-fxdoh6eO-XQUvDyGCnXrbK_hxCkDDCHKgs9KH20FelDOR_pzWxXbdYhljTEanoF10e8iC6V2SbDK9Pg_H3gOTnPsar1kynYBjkTSAs1yC923wMkkNaVL8NVbkNXn8wcxjJ8b0euaZysTvyuzVJUYH9i6Kpsl1ijVOSbTtPZ2f6V6nYuectnbbLGdb2tNLNP5fjkVi1J-BcbDwbg_svOfQNhMIRdsu4KHEnIsIiQYJoGQvsKQmAuOsE8lojRnrPEIlZQHHIUEEs8PBVHQjaJr0EjSRNwAS8ebmMNk4DHkRYgQSt2ARoxRnzuhJG2AitmNlxnVR3x4REI41sMf6-GP8-GPd20QmVn7RZV4MB7p0u3fq96BZpY-rn1B96CxWW3FgyGOeDR6_g1rIQSN)) is a dataset detailing the **counts** of satellite male crabs residing around a female crab nest: `n_males`. \n", + "\n", + "> The data frame `crabs` contains 173 observations on horseshoe crabs (*Limulus polyphemus*). The response is the count of male crabs (`n_males`) around a female breeding nest. It is subject to four input variables: a factor for the `color` of the prosoma with four levels, a factor for the condition of the posterior `spine` with three levels, the continuous variables for carapace `width` (cm), and `weight` (g).\n", + "\n", + "Run the cell below before proceeding." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d7f194427d74287ab36175b312da4159", + "grade": false, + "grade_id": "cell-c42393c3147ab6d0", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "data(crabs)\n", + "crabs <- crabs %>%\n", + " rename(n_males = satell) %>%\n", + " dplyr::select(-y)\n", + "\n", + "str(crabs)\n", + "head(crabs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "53940b5e4147ecc0a1d918a66012392c", + "grade": false, + "grade_id": "cell-3245a397a3c0a56f", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.1**\n", + "
{points: 1}\n", + "\n", + "Create a scatterplot of `n_males` versus carapace `width` (via `geom_point()`), even though `n_males` is not continuous. The `ggplot()` object's name will be `Crabs_vs_width_scatterplot`. Recall that the response must be placed on the $y$-axis, whereas the continuous input must be on the $x$-axis. Include proper axis labels and title.\n", + "\n", + "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "4e3dcaff4a02b8b0c12a3587d988c936", + "grade": false, + "grade_id": "cell-267744b8584d386b", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Crabs_vs_width_scatterplot <- \n", + "# ... %>%\n", + "# ggplot() +\n", + "# ...(aes(..., ...)) +\n", + "# labs(y = ..., x = ...) +\n", + "# ggtitle(...) +\n", + "# theme(\n", + "# text = element_text(size = 16.5),\n", + "# plot.title = element_text(face = \"bold\"),\n", + "# axis.title = element_text(face = \"bold\"),\n", + "# legend.title = element_text(face = \"bold\")) + \n", + "# scale_x_continuous(breaks = seq(20, 34, 2))\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "Crabs_vs_width_scatterplot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "0ab5a2b196ff6c3c1ed3a2b30946f711", + "grade": true, + "grade_id": "cell-4ea95eb36fa39c91", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "test_1.1()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "6142c87aa6d0d6518e09687fd9fa8908", + "grade": false, + "grade_id": "cell-c3c5c48f76ee8a52", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Note the characteristic horizontal pattern in the points of `Crabs_vs_width_scatterplot`, since the $y$-axis has repeated counts associated with different `width` values. Graphically speaking, is the variable the carapace `width` associated with `n_males`?\n", + "\n", + "From the `Crabs_vs_width_scatterplot` above, it is hard to graphically conclude something about the relationship between `n_males` and caparace `width`. Hence, let us plot the average `n_males` by non-overlapped carapace `width` groups. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "5033c071993b6ed60129dcf2bbd9196c", + "grade": false, + "grade_id": "cell-d8dc6bd50a1e51f1", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.2**\n", + "
{points: 1}\n", + "\n", + "Create a data frame called `Crabs_group_avg_width`, which is created from `crabs` and has two columns:\n", + "\n", + "- `width_intervals`: a column created with column `width` via function `cut()` with `breaks = 10` (i.e., bins).\n", + "- `mean_n_males`: the average `n_males` by each bin.\n", + "\n", + "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "e6cfebcffc2b10cd7d5b47614f365c70", + "grade": false, + "grade_id": "cell-e136e294390e75db", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Crabs_group_avg_width <- \n", + "# ... %>%\n", + "# ...(width_intervals = ...(..., ...)) %>%\n", + "# group_by(...) %>% \n", + "# summarise(... = ...(...)) \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "Crabs_group_avg_width" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "429c9f614730ce806230cd149ce6cd13", + "grade": true, + "grade_id": "cell-24bf5f18b3ee2717", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "test_1.2()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "5ab1e5a66cefd3b5a20e56692594c572", + "grade": false, + "grade_id": "cell-fcf23138e5f9e32f", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.3**\n", + "
{points: 1}\n", + "\n", + "Create another scatterplot of `mean_n_males` on the $y$-axis versus the carapace `width_intervals` on the $x$-axis using `Crabs_group_avg_width` with `geom_point()`. The `ggplot()` object's name will be `Crabs_avg_width_scatterplot`. Include proper axis labels and title.\n", + "\n", + "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "062821791d38686c9c2f8ec6bba4af44", + "grade": false, + "grade_id": "cell-72dcede930e14421", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Crabs_avg_width_scatterplot <- ... %>%\n", + "# ggplot() +\n", + "# ...(aes(..., ...), colour = \"red\", size = 4) +\n", + "# labs(y = ..., x = ...) +\n", + "# ggtitle(...) +\n", + "# theme(\n", + "# text = element_text(size = 14),\n", + "# plot.title = element_text(face = \"bold\"),\n", + "# axis.title = element_text(face = \"bold\"),\n", + "# legend.title = element_text(face = \"bold\"),\n", + "# axis.text.x = element_text(angle = 45, hjust = 1)\n", + "# )\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "options(repr.plot.width = 15, repr.plot.height = 7)\n", + "plot_grid(Crabs_vs_width_scatterplot, Crabs_avg_width_scatterplot, ncol = 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "06d23e2fc7522963d9ad92d2bc5ceedc", + "grade": true, + "grade_id": "cell-c470e7f855f8f4c8", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "test_1.3()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a227e6065d0254d0e966f2b955bdde66", + "grade": false, + "grade_id": "cell-58f301f5cab01d1a", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.4**\n", + "
{points: 1}\n", + "\n", + "By looking at `Crabs_avg_width_scatterplot`, graphically speaking, what is the relationship between `n_males` and carapace `width`?\n", + "\n", + "**A.** Positive.\n", + "\n", + "**B.** Negative.\n", + "\n", + "**C.** No relationship.\n", + "\n", + "*Assign your answer to the object `answer1.4` (character type surrounded by quotes).*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d417bb44aa9ac1ad7ea23c927da2496f", + "grade": false, + "grade_id": "cell-5f4531deea0754da", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# answer1.4 <- ...\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer1.4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "3fe969a9a11de52d0812d9d263da9ee1", + "grade": true, + "grade_id": "cell-32224ab4e0d00a59", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "test_1.4()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "ca474bd507ac23a98657c19cea770765", + "grade": false, + "grade_id": "cell-98e957ee7f6dfc9c", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "If we compare `Crabs_vs_width_scatterplot` and `Crabs_avg_width_scatterplot`, we can see that working with the averages of `n_males` by carapace `width` intervals gives us a clearer perspective of the relationship between these two variables. Nonetheless, we need to find a suitable model to confirm this statistically.\n", + "\n", + "Recall that the residual component in an ordinary linear regression model, namely $\\varepsilon_i$, is assumed to be Normally distributed, making the response $Y_i$ Normally distributed. In this case, our response variable is the \"Number of male crabs\" (count!). Counts distributions can be asymmetric, and they are non-negative, thus the Normal distribution might not be adequate. Nor the logistic regression, since we are not estimating proportions. \n", + "\n", + "A very useful distribution to model counts (not the only one!) is the Poisson distribution. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "5790238897dcac0b0d03098c3cb3fa8f", + "grade": false, + "grade_id": "cell-d07f008bc4129b8f", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**The Poisson Regression**\n", + "\n", + "A Poisson random variable takes discrete non-negative integer values that count something in a given timeframe or even in a space such as a geographic unit. A particularity of the Poisson distribution is that its mean is equal to its variance. Thus, any factor that affects the mean will also affect the variance. This fact could be a potential drawback for using a Poisson regression model.\n", + "\n", + "The Poisson regression model is given by:\n", + "\n", + "$$Y_i|\\mathbf{X}_i \\sim \\text{Poisson}(\\lambda_i),$$\n", + "\n", + "$$\\log(\\lambda_i) = \\beta_0 + \\beta_1X_{1,i} + \\ldots + \\beta_pX_{1,p}$$\n", + "\n", + "or equivalently,\n", + "\n", + "$$\\lambda_i = e^{\\beta_0 + \\beta_1X_{1,i} + \\ldots + \\beta_pX_{1,p}}$$\n", + "\n", + "where each variable has its own mean, $\\lambda_i$, and variance, also $\\lambda_i$. The parameter $\\lambda_i$ is interpreted as the risk of an event occurring in a given timeframe or even a space. Note that $\\lambda_i$ cannot be negative, hence it is a useful function to link $E[Y_i|\\mathbf{X}_i]=\\lambda_i$ with the the linear component." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "e4b6ad031020360f52082c24f73240fe", + "grade": false, + "grade_id": "cell-27605b7706aedb71", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.5**\n", + "
{points: 1}\n", + "\n", + "For our `crabs` dataset the events are the number of male crabs, `n_males`, around a space: the female breeding nest. Suppose we want to make inference on whether the carapace `width` is related to the response `n_males`. Thus, we could use Poisson regression. Let $X_{\\texttt{width}_i}$ be the $i$th value for the input `width` in our dataset `crabs`. The model's regression equation will be:\n", + "\n", + "$$\\log(\\lambda_i) = \\beta_0 + \\beta_1 X_{\\texttt{width}_i}.$$\n", + "\n", + "Let us plot the predictions of this model on top of `Crabs_vs_width_scatterplot`. Use `geom_smooth()` with `method = \"glm\"` and `method.args = list(family = poisson)`.\n", + "\n", + "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "dcf2caa2b54bc8b29705e3acac978768", + "grade": false, + "grade_id": "cell-3a1e6d7e495e79d0", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Crabs_vs_width_scatterplot <- \n", + "# Crabs_vs_width_scatterplot +\n", + "# ...(\n", + "# aes(..., ...), \n", + "# ..., se = FALSE,\n", + "# ...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "Crabs_vs_width_scatterplot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "419c75caf1de4a00fff5719224bc8cb6", + "grade": true, + "grade_id": "cell-e6f45c72012f6548", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_1.5()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "29bc289c41116e36fe2426e5055d88eb", + "grade": false, + "grade_id": "cell-04bbdae892ba5783", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "As seen in other models, the parameters $\\beta_0, \\beta_1, \\dots, \\beta_{p}$ are unknown population coefficients that we want to estimate using data. \n", + "\n", + "In order to fit a Poisson regression model, we can also use the function `glm()` and its argument `family = poisson` (required to specify the Poisson nature of the response), which obtains the estimates $\\hat{\\beta}_0, \\hat{\\beta}_1, \\dots \\hat{\\beta}_{p}$. The estimates are obtained through maximum likelihood where we assume a Poisson joint probability mass function of the $n$ responses $Y_i$." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "7709c39efbfc26544edb44656db777bd", + "grade": false, + "grade_id": "cell-1cd6b2d8388adfad", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.6**\n", + "
{points: 1}\n", + "\n", + "Using `glm()`, estimate a Poisson regression model with `n_males` as a response and two input variables: `width` ($X_{\\texttt{width}_i}$) and `color` ($X_{\\texttt{color_darker}_i}$, $X_{\\texttt{color_light}_i}$, and $X_{\\texttt{color_medium}_i}$) for the $i$th observation:\n", + "\n", + "$$\n", + "log(\\lambda_i) = \\beta_0 + \\beta_1 X_{\\texttt{width}_i} + \\beta_2 X_{\\texttt{color_darker}_i} + \\beta_3 X_{\\texttt{color_light}_i} + \\beta_4 X_{\\texttt{color_medium}_i}.\n", + "$$\n", + "\n", + "Note that the ordinal input `color` has four levels (where `dark` is the baseline): " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "ef6561481dd7f355a99408c62d1dcd9a", + "grade": false, + "grade_id": "cell-a5c51d7457cd83a8", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "levels(crabs$color)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "9f11986d9d874da9b2c6d7a4a127d49d", + "grade": false, + "grade_id": "cell-3474c3e0f1563e34", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Therefore, we have three dummy variables: $X_{\\texttt{color_darker}_i}$, $X_{\\texttt{color_light}_i}$, and $X_{\\texttt{color_medium}_i}$. Depending on the `color`, these dummy variables take on the following values:\n", + "\n", + "- When `color` is `darker`, then $X_{\\texttt{color_darker}_i} = 1$ while the other two dummy variables $X_{\\texttt{color_light}_i} = X_{\\texttt{color_medium}_i} = 0$.\n", + "- When `color` is `light`, then $X_{\\texttt{light}_i} = 1$ while the other two dummy variables $X_{\\texttt{color_darker}_i} = X_{\\texttt{color_medium}_i} = 0$.\n", + "- When `color` is `medium`, then $X_{\\texttt{medium}_i} = 1$ while the other two dummy variables $X_{\\texttt{color_darker}_i} = X_{\\texttt{color_light}_i} = 0$.\n", + "\n", + "Call the model `crabs_Poisson_model`.\n", + " \n", + "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.* " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "0c23fb53ad786e0805a83daabc30eb8a", + "grade": false, + "grade_id": "cell-cce6669279499835", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# crabs_Poisson_model <- ...(\n", + "# ...,\n", + "# ...,\n", + "# ...\n", + "# )\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "summary(crabs_Poisson_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "01ca38e188374d6711e79151a5fec892", + "grade": true, + "grade_id": "cell-97cc9b41beb464bf", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "test_1.6()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a275f0f6e422f602fbd0bcfa871d3289", + "grade": false, + "grade_id": "cell-b7f592bf64da6cf8", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.7**\n", + "
{points: 1}\n", + "\n", + "Report the estimated coefficients, their standard errors, and corresponding $p$-values using `tidy()` with `crabs_Poisson_model`. Include the corresponding asymptotic 95% confidence intervals. Store the results in the variable `crabs_Poisson_model_results`.\n", + "\n", + "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b8ee3cebef5e66c1989656ce43ab8951", + "grade": false, + "grade_id": "cell-4deafdcc73d1ddc4", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# crabs_Poisson_model_results <- \n", + "# ...(..., conf.int = TRUE) %>%\n", + "# mutate_if(is.numeric, round, 3)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "crabs_Poisson_model_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "338e86ff29326cb30c941ede1a46255c", + "grade": true, + "grade_id": "cell-441e381e29ca858b", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_1.7()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a9c205d86ae2d22d334014e8ae098f8b", + "grade": false, + "grade_id": "cell-f95b8a861b29b15b", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.8**\n", + "
{points: 1}\n", + "\n", + "Since the link function is logarithmic, we can also interpret the exponentiated coefficients. Add to `crabs_Poisson_model_results` the estimate, and 95% confidence interval, of $e^{\\beta_j}, j=0,...,p$. \n", + "\n", + "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "4ae1de2b5efd7ca21e55ad9390c55f1b", + "grade": false, + "grade_id": "cell-faa353ab9211c114", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# crabs_Poisson_model_results <- \n", + "# crabs_Poisson_model_results %>%\n", + "# mutate(\n", + "# exp.estimate = ...,\n", + "# exp.conf.low = ...,\n", + "# exp.conf.high = ...) %>%\n", + "# mutate_if(is.numeric, round, 4)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "crabs_Poisson_model_results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "a615caefcc12fee9864b794e383c0a3d", + "grade": true, + "grade_id": "cell-1822a6990bcbbd0f", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "test_1.8()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "7a2c5ac3641b01bf800bd44b047d7fdc", + "grade": false, + "grade_id": "cell-d029850491a79223", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Note that you can also get the exponentiated estimated coefficients using `tidy` when the link function is `log` or `logit`. Note that `std.error` and `statistic` are not adjusted and it does not report the raw coefficients." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "72d9f39879dae06280ccf6b1d8876395", + "grade": false, + "grade_id": "cell-d41aa0a8719a312b", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "tidy(crabs_Poisson_model, exponentiate = FALSE,conf.int = TRUE)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "44c3dd5dd57ecf3fa13ce816a7cac29e", + "grade": false, + "grade_id": "cell-87f78f36becf5ba3", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "tidy(crabs_Poisson_model, exponentiate = TRUE,conf.int = TRUE)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "2f9677618f8b5a521ccf7c8e681f0609", + "grade": false, + "grade_id": "cell-7c2691fb066e9a1e", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.9**\n", + "
{points: 1}\n", + "\n", + "Using a **significance level $\\alpha = 0.05$**, and the output in `crabs_Poisson_model_results`, which of the following statements is TRUE?\n", + "\n", + "**A.** There's enough evidence to reject the null hypothesis that the coefficient of carapace `width` is zero.\n", + "\n", + "**B.** There's enough evidence to reject the null hypothesis that, for any width, the mean numbers of male crabs with `dark` and `darker` colors of the prosoma are equal.\n", + "\n", + "**C.** There's enough evidence to reject the null hypothesis that, for any width, the mean numbers of male crabs with `dark` and `light` colors of the prosoma are equal. \n", + "\n", + "**D.** There's enough evidence to reject the null hypothesis that, for any width, the mean numbers of male crabs with `dark` and `medium` colors of the prosoma are equal. \n", + "\n", + "*Assign your answers to the object `answer1.9`. Your answers have to be included in a single string indicating the correct options **in alphabetical order** and surrounded by quotes (e.g., `\"ABCD\"` indicates you are selecting the four options).*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d65d30b90231faecc44c6d528236c653", + "grade": false, + "grade_id": "cell-d1466876e62cbd8d", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# answer1.9 <- \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer1.9" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9a60387f4dcfb40d6906afa52ea7a93e", + "grade": true, + "grade_id": "cell-c4b2bdbb07660385", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_1.9()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "2bfbb69a735c6be77b3a76f62ea9b164", + "grade": false, + "grade_id": "cell-3cce4716202a3ac2", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Interpretation of estimated coefficients for continuous covariates** \n", + "\n", + "Firstly, let us focus on the coefficient interpretation corresponding to carapace `width`, *while keeping `color` constant*. Consider an observation with a given value $X_{\\texttt{width}} = \\texttt{w}$ cm, and another observation with a given $X_{\\texttt{width + 1}} = \\texttt{w} + 1$ cm (i.e., an increase of $1$ cm). Then we have their corresponding regression equations:\n", + "\n", + "$$\n", + "\\log \\lambda_{\\texttt{width}} = \\beta_0 + \\beta_1 \\overbrace{\\texttt{w}}^{X_{\\texttt{width}}} + \\overbrace{\\beta_2 X_{\\texttt{color_darker}} + \\beta_3 X_{\\texttt{color_light}} + \\beta_4 X_{\\texttt{color_medium}}}^{\\text{Constant}}\n", + "$$\n", + "$$\n", + "\\log \\lambda_{\\texttt{width + 1}} = \\beta_0 + \\beta_1 \\underbrace{(\\texttt{w} + 1)}_{X_{\\texttt{width + 1}}} + \\underbrace{\\beta_2 X_{\\texttt{color_darker}} + \\beta_3 X_{\\texttt{color_light}} + \\beta_4 X_{\\texttt{color_medium}}.}_{\\text{Constant}}\n", + "$$\n", + "\n", + "We take the difference between both equations as:\n", + "\n", + "\\begin{align*}\n", + "\\log \\lambda_{\\texttt{width + 1}} - \\log \\lambda_{\\texttt{width}} &= \\beta_1 (\\texttt{w} + 1) - \\beta_1 \\texttt{w} \\\\\n", + "&= \\beta_1.\n", + "\\end{align*}\n", + "\n", + "Then, we apply the logarithm property for a ratio:\n", + "\n", + "\\begin{align*}\n", + "\\log \\frac{\\lambda_{\\texttt{width + 1}} }{\\lambda_{\\texttt{width}}} &= \\log \\lambda_{\\texttt{width + 1}} - \\log \\lambda_{\\texttt{width}} \\\\\n", + "&= \\beta_1.\n", + "\\end{align*}\n", + "\n", + "Finally, we have to exponentiate the previous equation:\n", + "\n", + "$$\n", + "\\frac{\\lambda_{\\texttt{width + 1}} }{\\lambda_{\\texttt{width}}} = e^{\\beta_1}.\n", + "$$\n", + "\n", + "This expression indicates that the mean count varies in a multiplicative way when a continuous covariate increases by 1 unit, i.e., $\\lambda_{\\texttt{width + 1}}= e^{\\beta_1}\\lambda_{\\texttt{width}}$." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d18dc4097eb487b67385f4107bf2cafc", + "grade": false, + "grade_id": "cell-30680ad0b12b005a", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.10**\n", + "
{points: 1}\n", + "\n", + "**Using the column `exp.estimate` from `crabs_Poisson_model_results`**, what is the correct interpretation of the regression equation's estimated slope for `width`?\n", + "\n", + "**A.** The mean count of male crabs (`n_males`) around a female breeding nest decreases by $161\\%$ when increasing the carapace `width` by $1$ cm, *while keeping `color` constant*.\n", + "\n", + "**B.** The mean count of male crabs (`n_males`) around a female breeding nest increases by $161\\%$ when increasing the carapace `width` by $1$ cm, *while keeping `color` constant*.\n", + "\n", + "**C.** The mean count of male crabs (`n_males`) around a female breeding nest increases by $16.1\\%$ when increasing the carapace `width` by $1$ cm, *while keeping `color` constant*.\n", + "\n", + "**D.** The mean count of male crabs (`n_males`) around a female breeding nest decreases by $16.1\\%$ when increasing the carapace `width` by $1$ cm, *while keeping `color` constant*.\n", + "\n", + "**E.** The mean count of *dark* male crabs (`n_males`) around a female breeding nest increases by $161\\%$ when increasing the carapace `width` by $1$ cm.\n", + "\n", + "**F.** The mean count of *dark* male crabs (`n_males`) around a female breeding nest increases by $16.1\\%$ when increasing the carapace `width` by $1$ cm.\n", + "\n", + "*Assign your answer to the object `answer1.10`. Your answer should be one of `\"A\"`, `\"B\"`, `\"C\"`, `\"D\"`, `\"E\"`, or `\"F\"` surrounded by quotes.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "52bf049475f2209a74c35c60713d28d2", + "grade": false, + "grade_id": "cell-813f996317ff61dd", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# answer1.10 <- ...\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer1.10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f7233916807a2fc8edae2069861a8ade", + "grade": true, + "grade_id": "cell-01b57f216ff8f262", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "test_1.10()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "f52d9e399a01697d185382fab3d2c8e2", + "grade": false, + "grade_id": "cell-01590258a807acee", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Interpretation of estimated coefficients of dummy variables** \n", + "\n", + "*Keeping `width` constant*, at any value (recall assumption of additive models!) consider an observation from the `darker` group: \n", + "\n", + "Then we have their corresponding regression equations:\n", + "\n", + "$$\n", + "\\log \\lambda_{\\texttt{width}} = \\beta_0 + \\beta_1 \\overbrace{\\texttt{w}}^{\\text{Constant}} + \\beta_2 X_{\\texttt{color_darker}} + \\beta_3 X_{\\texttt{color_light}} + \\beta_4 X_{\\texttt{color_medium}}\n", + "$$\n", + "\n", + "$$\n", + "\\log \\lambda_{\\texttt{width,dark}} = \\beta_0 + \\beta_1 \\texttt{w} + \\beta_2 \\times 0 + \\beta_3 \\times 0 + \\beta_4 \\times 0 \n", + "$$\n", + "\n", + "$$\n", + "\\log \\lambda_{\\texttt{width,darker}} = \\beta_0 + \\beta_1 \\texttt{w} + \\beta_2 \\times 1 + \\beta_3 \\times 0 + \\beta_4 \\times 0.\n", + "$$\n", + "\n", + "We take the difference between both equations as:\n", + "\n", + "\\begin{align*}\n", + "\\log \\lambda_{\\texttt{width,darker}} - \\log \\lambda_{\\texttt{width,dark}} = \\beta_2.\n", + "\\end{align*}\n", + "\n", + "Then, we apply the logarithm property for a ratio:\n", + "\n", + "\\begin{align*}\n", + "\\log \\frac{\\lambda_{\\texttt{width,darker}} }{\\lambda_{\\texttt{width,dark}}} &= \\log \\lambda_{\\texttt{width,darker}} - \\log \\lambda_{\\texttt{width,dark}} \\\\\n", + "&= \\beta_2.\n", + "\\end{align*}\n", + "\n", + "Finally, we have to exponentiate the previous equation:\n", + "\n", + "$$\n", + "\\frac{\\lambda_{\\texttt{width,darker}} }{\\lambda_{\\texttt{width,dark}}} = e^{\\beta_2}.\n", + "$$\n", + "\n", + "The expression $\\frac{\\lambda_{\\texttt{width,darker}} }{\\lambda_{\\texttt{width,dark}}} = e^{\\beta_2}$ indicates that the mean count changes in a multiplicative way between the two groups." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "2730f7c988816ff03732f5225d70d2eb", + "grade": false, + "grade_id": "cell-f8af88cea66668b0", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.11**\n", + "
{points: 1}\n", + "\n", + "Let us move on to the interpretation of the coefficient corresponding to `light` from `color` (with reference level `dark`).\n", + "\n", + "Using the `crabs_Poisson_model_results` tibble, what is the correct interpretation of the regression estimated coefficient for the dummy variable `light`?\n", + "\n", + "**A.** The mean count of male crabs (`n_males`) around a female breeding nest is $54.7\\%$ lower in the `light` prosoma group compared to `dark` group, *while keeping the carapace `width` constant (for any width value).*\n", + "\n", + "**B.** The mean count of male crabs (`n_males`) around a female breeding nest $54.7\\%$ higher in the `light` prosoma group compared to `dark` group, *while keeping the carapace `width` constant (for any width value).*\n", + "\n", + "**C.** The mean count of male crabs (`n_males`) around a female breeding nest $154.7\\%$ higher in the `light` prosoma group compared to `dark` group, *while keeping the carapace `width` constant (for any width value).*\n", + "\n", + "**D.** The mean count of male crabs (`n_males`) around a female breeding nest $154.7\\%$ lower in the `light` prosoma group compared to `dark` group, *while keeping the carapace `width` constant (for any width value).*\n", + "\n", + "**E.** The mean count of male crabs (`n_males`) around a female breeding nest is $54.7\\%$ lower in the `dark` prosoma group compared to `light` group, *while keeping the carapace `width` constant (for any width value).*\n", + "\n", + "**F.** The mean count of male crabs (`n_males`) around a female breeding nest $54.7\\%$ higher in the `dark` prosoma group compared to `light` group, *while keeping the carapace `width` constant (for any width value).*\n", + "\n", + "*Assign your answer to the object `answer1.11`. Your answer should be one of `\"A\"`, `\"B\"`, `\"C\"`, `\"D\"`, `\"E\"`, or `\"F\"` surrounded by quotes.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "ff760b89269d4912c27982a3fa0e2af6", + "grade": false, + "grade_id": "cell-9dd1384a6af3a36a", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# answer1.11 <- ...\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer1.11" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d9edace5350e6c726a32663d2322356c", + "grade": true, + "grade_id": "cell-27d30eed0b7b8bae", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "test_1.11()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "0ebd56f5239783cd05b4b3d5763e2d14", + "grade": false, + "grade_id": "cell-96565a64571f2a45", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.12**\n", + "
{points: 1}\n", + "\n", + "Suppose we want to predict the mean count of male crabs (`n_males`) around a female breeding nest with a carapace `width` of $27.5$ cm and a `light` `color` of the prosoma. Then, using the function `predict()` with the object `crabs_Poisson_model`, obtain the corresponding prediction.\n", + "\n", + "> **Hint:** Check the argument `type` when coding this prediction.\n", + "\n", + "*Assign your answer to the object `answer1.12`. Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "e495e0ababf6655472414e9b0822ac7a", + "grade": false, + "grade_id": "cell-5f4ccaec6bf73492", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# answer1.12 <- \n", + "# ...(...,\n", + "# tibble(..., ..., ...),\n", + "# type = ...\n", + "# )\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer1.12" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "81c218f35362c5b32755ab5d436812c3", + "grade": true, + "grade_id": "cell-0df8c510b063bb57", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "test_1.12()" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,Rmd" + }, + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.2.3" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}