From 40e76a2114f8cc04e5e2111d93e4b352ded6dbe3 Mon Sep 17 00:00:00 2001 From: Harrison Curtis Date: Fri, 21 Jun 2024 13:33:06 +0100 Subject: [PATCH] all --- R/data_anal_vis.r | 46 +++++++++++++++------------------------- R/data_analyis.r | 0 data/.~lock.cattle.xlsx# | 1 + 3 files changed, 18 insertions(+), 29 deletions(-) delete mode 100644 R/data_analyis.r create mode 100644 data/.~lock.cattle.xlsx# diff --git a/R/data_anal_vis.r b/R/data_anal_vis.r index 69ffdea..89b4fff 100644 --- a/R/data_anal_vis.r +++ b/R/data_anal_vis.r @@ -1,37 +1,32 @@ +# Import relevant library(ggplot2) library(fable) +library(feasts) #Import data---- # Import data from github repo. -df_og <- read.csv("https://raw.githubusercontent.com/HPCurtis/causalcovidcattle/main/data/cattle_og.csv") df_sa <- read.csv("https://raw.githubusercontent.com/HPCurtis/causalcovidcattle/main/data/cattle_sa.csv") # Convert to date form. -df_og$Date <- as.Date(df_og$Date) df_sa$Date <- as.Date(df_sa$Date) +df_sa <- df_sa %>% drop_na() %>% + as_tsibble( index = Date) %>% + mutate(Date = yearquarter(Date)) + #Pre/Post Covid---- pre_covid <- df_sa %>% - drop_na() %>% - filter(Date < ymd("2020-03-01")) %>% + filter(Date < yearquarter("2020 Q2")) %>% mutate(Date = yearquarter(Date)) %>% as_tsibble(index = Date) post_covid <- df_sa %>% - drop_na() %>% - filter(Date >= ymd("2020-03-01") )%>% + filter(Date >= yearquarter("2020 Q2"))%>% mutate(Date = yearquarter(Date)) %>% as_tsibble(index = Date) # Visualisations for the project -#Time series Decomposition------ -# Conduct STL decomposition. -dcmp <- drop_na(df_sa) |> - model(stl = STL(NumberSlaughteredCATTLEexclcalvesTotalState)) - -components(dcmp) |> autoplot() - # Fit linear model for seasonal adjusted data. fitlinear <- pre_covid |> model(trend_model = TSLM(NumberSlaughteredCATTLEexclcalvesTotalState ~ trend())) @@ -42,27 +37,20 @@ fc <- fitlinear %>% # Get forecast intervals hilo() -# Convert Data column to approriate data type. -fc$Date <- as_date(fc$Date) - -plinear <- fc %>% autoplot(drop_na(df_sa)) + - labs(y = "Number of Cattle Slaughtered", - title = "Total number of cattle (excl calves) across all Australian States") + - theme( - plot.title = element_text(size = 10), # Title font size - axis.title = element_text(size = 8), # Axis titles font size - axis.text = element_text(size = 7) # Axis text font size - ) + geom_vline(xintercept = as.Date("2020-03-01")) - -# Save plot out -ggsave(filename = "/home/harrison/Desktop/gitHubRepos/cattlecovidcausal/img/linearforecast.png", plot = plinear, width = 6, height = 4, units = "in", dpi = 300) +plinear <- ggplot() + + geom_line(data = df_sa, aes(x = Date, y = NumberSlaughteredCATTLEexclcalvesTotalState), color = "blue", linetype = "solid") + + geom_line(aes(x = fc$Date, y = fc$.mean), color = "blue") + + geom_ribbon(aes(x = fc$Date, ymin = fc$`95%`$lower, ymax = fc$`95%`$upper), + fill = "blue", alpha = 0.3) + + geom_vline(xintercept = as.Date("2020-03-01"), color = "red", linetype = "dashed") + + labs(x = "Date", y = "Number of Cattle Slaughtered ('000)", title = "Total number of cattle (excl calves) across all Australian States") + #Calculate Causal impact---- yhat <- fc$.mean yhatupper <- fc$`95%`$upper yhatlower <- fc$`95%`$lower -# Clauate mean and lower and upper bounds +# Calculate mean and lower and upper bounds Totalslaughteredimpact <- post_covid$NumberSlaughteredCATTLEexclcalvesTotalState - yhat Totalslaughteredimpactupper <- post_covid$NumberSlaughteredCATTLEexclcalvesTotalState - yhatupper Totalslaughteredimpactlower <- post_covid$NumberSlaughteredCATTLEexclcalvesTotalState - yhatlower @@ -78,7 +66,7 @@ causal_impact_plot <- ggplot() + geom_ribbon(data = fc, aes(x = Date, ymin = post_covid$NumberSlaughteredCATTLEexclcalvesTotalState), ymax = yhat, fill = "blue", alpha = 0.2) + - labs(y = "Number of Cattle Slaughtered", + labs(y = "Number of Cattle Slaughtered ('000)", title = "Total number of cattle (excl calves) across all Australian States") + theme( plot.title = element_text(size = 10), # Title font size diff --git a/R/data_analyis.r b/R/data_analyis.r deleted file mode 100644 index e69de29..0000000 diff --git a/data/.~lock.cattle.xlsx# b/data/.~lock.cattle.xlsx# new file mode 100644 index 0000000..f3198f0 --- /dev/null +++ b/data/.~lock.cattle.xlsx# @@ -0,0 +1 @@ +,harrison,harrison-HP-Laptop-14-cf1xxx,21.06.2024 13:18,file:///home/harrison/.config/libreoffice/4; \ No newline at end of file