From d1216fc823f100a1edc1d19955ed21524cc442f3 Mon Sep 17 00:00:00 2001 From: Harrison Curtis Date: Mon, 1 Jul 2024 14:55:18 +0100 Subject: [PATCH] Major commit --- R/data_anal_gam_ext.r | 44 +++++++++++++++++++++++++++++++------------ R/data_anal_vis.r | 1 + README.md | 19 ++++++++++++++++--- 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/R/data_anal_gam_ext.r b/R/data_anal_gam_ext.r index 2198b97..82a6841 100644 --- a/R/data_anal_gam_ext.r +++ b/R/data_anal_gam_ext.r @@ -1,6 +1,9 @@ library(mgcv) library(tidyverse) library(gratia) +library(fable) +library(feasts) +library(tsibble) # Import data---- @@ -38,11 +41,10 @@ fitgam <- gam(TotalState ~ s(t), method = "REML", data = df_sapre) # AIC measure of the model fit--- AIC(fitl,fitgam) - newdata <- data.frame( t = c(164:179) ) -forecasts <- predict(fitgam, newdata, se.fit = TRUE) +forecasts <- predict.gam(fitgam, newdata, se.fit = TRUE) # Extract the fit and standard errors forecastmean <- forecasts$fit @@ -75,31 +77,49 @@ lost_revenue_upper = totalupper * cattle_levy lost_revenue_lower = totallower * cattle_levy # Model plots---- -fitgam %>% draw() + xlab() +fitgam %>% draw() fitgam %>% appraise() +# Required change for plot belwo to be consistnet with other R scripts. +# Thisc ould be factored out an functionalised. +df_sa <- df_sa %>% + mutate(Date = yearquarter(Date)) %>% + as_tsibble(index = Date) post_impact <- ggplot() + geom_line(data = df_sa, aes(x = Date, y = TotalState), color = "blue", linetype = "solid") + geom_line(aes(x = forcast_df$Date, y = forcast_df$forecastmean), color = "blue") + geom_ribbon(aes(x = forcast_df$Date, ymin = forcast_df$lower, ymax = forcast_df$upper,), fill = "blue", alpha = 0.3) + - geom_vline(xintercept = as.Date("2020-03-01"), color = "red", linetype = "dashed") + geom_vline(xintercept = as.Date("2020-03-01"), color = "red", linetype = "dashed") + + labs(x = "Date", y = "Number of Cattle Slaughtered ('000)", title = "Total number of cattle (excl calves) across all Australian States") + + theme( + plot.title = element_text(size = 10), # Title font size + axis.title = element_text(size = 8), # Axis titles font size + axis.text = element_text(size = 7) # Axis text font size + ) + + annotate("text", x = yearquarter("2000 Q2"), y = max(df_sa$TotalState) * 1.05, label = "Pre-COVID", color = "black", size = 2) + + annotate("text", x = yearquarter("2023 Q2"), y = max(df_sa$TotalState) * 1.05, label = "Post-COVID", color = "black", size = 2) +# Work out this plot. causal_impact_plot <- ggplot() + geom_line(data = drop_na(df_sa), aes(x = Date, y = TotalState), color = "blue") + geom_ribbon(data = forcast_df, - aes(x = Date, ymin = post_covid$NumberSlaughteredCATTLEexclcalvesTotalState), - ymax = yhat, fill = "red", alpha = 1) + + aes(x = Date, ymin = df_sapost$TotalState), + ymax = forecastmean, fill = "red", alpha = 1) + labs(y = "Number of Cattle Slaughtered ('000)", title = "Total number of cattle (excl calves) across all Australian States") + theme( plot.title = element_text(size = 10), # Title font size axis.title = element_text(size = 8), # Axis titles font size axis.text = element_text(size = 7) # Axis text font size - ) # -#+ - # geom_vline(xintercept = as.Date("2020-03-01"), color = "red", linetype = "dashed") + - #annotate("text", x = yearquarter("2000 Q2"), y = max(df_sa$NumberSlaughteredCATTLEexclcalvesTotalState) * 1.05, label = "Pre-COVID", color = "black", size = 2) + - #annotate("text", x = yearquarter("2023 Q2"), y = max(df_sa$NumberSlaughteredCATTLEexclcalvesTotalState) * 1.05, label = "Post-COVID", color = "black", size = 2) - + ) + + geom_vline(xintercept = as.Date("2020-03-01"), color = "red", linetype = "dashed") + + annotate("text", x = yearquarter("2000 Q2"), y = max(df_sa$TotalState) * 1.05, label = "Pre-COVID", color = "black", size = 2) + + annotate("text", x = yearquarter("2023 Q2"), y = max(df_sa$TotalState) * 1.05, label = "Post-COVID", color = "black", size = 2) + +# Save plots out----- +ggsave(filename = "/home/harrison/Desktop/gitHubRepos/cattlecovidcausal/img/gamforecast.png", + plot = post_impact, width = 6, height = 4, units = "in", dpi = 300) +ggsave(filename = "/home/harrison/Desktop/gitHubRepos/cattlecovidcausal/img/causal_impact_gam.png", + plot = causal_impact_plot, width = 6, height = 4, units = "in", dpi = 300) diff --git a/R/data_anal_vis.r b/R/data_anal_vis.r index 5125843..5b43f30 100644 --- a/R/data_anal_vis.r +++ b/R/data_anal_vis.r @@ -2,6 +2,7 @@ library(ggplot2) library(fable) library(feasts) +library(tsibble) #Import data---- # Import data from github repo. diff --git a/README.md b/README.md index 4e6f76a..acd642d 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,8 @@ post_covid <- df_sa %>% ``` The R code above shows that to conduct the analysis here the data had to be split between pre and post Covid-19. The date of 2020-03-01 was the date selected to be in line with Australias' Covid lockdowns (March 23, 2020) and the fidelity of sampling (quarterly) within the data collected by the ABS (Quarterly). -### Modelling +### Linear Modelling + ``` # Fit linear model for seasonal adjusted data. @@ -39,7 +40,6 @@ fitlinear <- pre_covid |> ``` As the code above shows a linear model was applied to the timeseries data in order estimate the causal effect. -### Model complexity This a simple example of an ITS analysis similar to that applied within the CausalPy package. As noted in their documentation more complex timeseries models can be applied to ITS analysis. None of these are applied here as the analysis is being conducted on the seasonally adjusted data provided by the ABS. Seasonal adjustments are very common within timeseries analyses (Hyndham & Athanasopoulos, 2021) they allow for modelling of the data to be easier by extracting variablity that in specific cases is not the focus of the analysis. @@ -100,6 +100,19 @@ lost_revenue_upper = totalupper * cattle_levy lost_revenue_lower = totallower * cattle_levy ``` +### Generalised additive modelling +The following plots are the result of using the MGCV package and associated methods to fit a additive model to avoid repetition not all of the code is not provided in detail here. See, the R directory within the repository for that. But much of it is just variation of the same general workflow. + +``` +# Fit of genralied additive model smoother on timeseries data denoted s(t) using mgcv. +fitgam <- gam(TotalState ~ s(t), method = "REML", data = df_sapre) +``` + +![x](https://github.com/HPCurtis/causalcovidcattle/blob/main/img/gamforecast.png?raw=true) +![y](https://github.com/HPCurtis/causalcovidcattle/blob/main/img/gamforecast.png?raw=true) + +## Model results + | |Mean|Lower|Upper| |------------------------|------|---------|---| | Total Cattle Slaughtered Linear|-7,408,400|-2,663,278|-12,153,521| @@ -126,7 +139,7 @@ Hyndman, R.J., & Athanasopoulos, G. (2021) *Forecasting: principles and practice Simpson, G. L. (2018). Modelling palaeoecological time series using generalised additive models. Frontiers in Ecology and Evolution, 6, 149. -Thompson, E. (2022). *How to escape from model land. How mathematical models can lead us astray and what we can do about. Basic Books. +Thompson, E. (2022). *How to escape from model land. How mathematical models can lead us astray and what we can do about.* Basic Books. Web resources