Supplemental4_Analysis.Rmd

---
title: "RampCodes_Supplemental6"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


### ------------------------------------------------------------------------------------------------- ### 

## Script aims plot properties and proportions of cells that classify as positive or negative in the linear model and plot schematic elements for explaining the shuffled data. 

### ------------------------------------------------------------------------------------------------- ### 

How many neurons are ramp cells? 

2. get total numbers numbers
```{r}
ramp_num <- nrow(subset(spatial_firing, lm_group_b == "Negative" | lm_group_b == "Positive"))
```

### ------------------------------------------------------------------------------------------------- ### 

Now we want to visualise the coefficients of all neurons and all shuffled datasets

Now lets find and plot the proportion of cells according to the classification

1. extract proportion of cells that meet each criteria
```{r}
start <- nrow(subset(spatial_firing, lm_group_b == "Negative"))/nrow(spatial_firing)*100
reward <- nrow(subset(spatial_firing, lm_group_b == "Positive"))/nrow(spatial_firing)*100
nonslope <- nrow(subset(spatial_firing, lm_group_b == "Unclassified"))/nrow(spatial_firing)*100
```

2. get total numbers numbers
```{r}
start_num <- nrow(subset(spatial_firing, lm_group_b == "Negative"))
reward_num <- nrow(subset(spatial_firing, lm_group_b == "Positive"))
nonslope_num <- nrow(subset(spatial_firing, lm_group_b == "Unclassified"))

```

3. Put into a tibble 
```{r}
proportions_mixed_ramps <- tibble(perc=c(start, reward, nonslope), num=c(start_num, reward_num, nonslope_num), ramp_id= c("Start", "ToReward", "Unclassified"),ramp_type = c("Start", "ToReward", "Unclassified"))
```


3. Plot bar graph of proportions
```{r}
ggplot(proportions_mixed_ramps, aes(x= ramp_type, y = perc, fill=factor(ramp_id))) +
  geom_bar(stat="identity",width = 0.9, alpha = .4) +
  labs(y = "Percent", x="") +
  scale_fill_manual(values=c("violetred2", "chartreuse3", "grey62")) +
  geom_text(aes(label = num), hjust = 1.5, vjust = 0.5, srt=90, size = 6, position = position_dodge(-0.5)) +
  theme_classic() +
  theme(axis.text.x = element_text(size=19),
        axis.text.y = element_text(size=20),
        legend.position="bottom", 
        legend.title = element_blank(),
        text = element_text(size=19), 
        legend.text=element_text(size=19), 
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))
if (save_figures == 1) {
  ggsave(file = "plots/Outbound_ramp_proportions_update.png", width = 3, height = 5.5)
}
```


We might also want to visualise the coefficients for the real and shuffled dataset as a histogram.

1. First, make stacked histogram of slope values for real dataset 
```{r}
level_order <- c("Negative", "Positive", "Unclassified")

(exp_slopes_plot <- ggplot(data=spatial_firing, aes(x= asr_b_o_rewarded_fit_slope, fill=factor(unlist(lm_group_b), level = level_order))) +
    coord_cartesian(xlim = c(-0.45,0.45), ylim = c(0, 0.4)) +
    geom_histogram(aes(y=..count../sum(..count..)), binwidth=0.01, alpha=1) +
    labs(x = "Slope (Hz/cm)") +
    ylab("Proportion") +
    scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) +
    scale_fill_manual(values=c("violetred2", "chartreuse3", "grey62")) +
    theme_classic() +
    theme(axis.text.x = element_text(size=16),
          axis.text.y = element_text(size=16),
          legend.title = element_blank(),
          legend.position = "none",
          text = element_text(size=16),
          legend.text=element_text(size=16),
          axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))))
if (save_figures == 1) {
  ggsave(file = "plots/Outbound_slope_histogram_update.png", width = 4, height = 2)
}

level_order <- c("Negative", "Positive", "Unclassified")

ggplot(data=subset(spatial_firing, lm_group_b == "Positive"), aes(x= asr_b_o_rewarded_fit_slope, fill=factor(unlist(lm_group_b), level = level_order))) +
  coord_cartesian(xlim = c(-0.45,0.45)) +
  geom_histogram(aes(y=..count../sum(..count..)), binwidth=0.01, alpha=0.5) +
  ylab("Proportion") +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) +
  scale_fill_manual(values=c("violetred2", "chartreuse3", "grey62")) +
  theme_classic() +
  theme(axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=16),
        legend.title = element_blank(),
        legend.position = "none",
        text = element_text(size=16),
        legend.text=element_text(size=16),
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))
if (save_figures == 1) {
  ggsave(file = "plots/Outbound_slope_histogram_update_pos.png", width = 4, height = 2)
}


ggplot(data=subset(spatial_firing, lm_group_b == "Negative"), aes(x= asr_b_o_rewarded_fit_slope, fill=factor(unlist(lm_group_b), level = level_order))) +
  coord_cartesian(xlim = c(-0.45,0.45)) +
  geom_histogram(aes(y=..count../sum(..count..)), binwidth=0.01, alpha=0.5) +
  ylab("Proportion") +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) +
  scale_fill_manual(values=c("violetred2", "chartreuse3", "grey62")) +
  theme_classic() +
  theme(axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=16),
        legend.title = element_blank(),
        legend.position = "none",
        text = element_text(size=16),
        legend.text=element_text(size=16),
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))
if (save_figures == 1) {
  ggsave(file = "plots/Outbound_slope_histogram_update_neg.png", width = 4, height = 2)
}
```

## extract the home bound classifations for the shuffled data
```{r}
local_circ_shuffles_hb <- function(df_in, cs_path) {
  shuffled_df <- read_feather(cs_path)
  
  
  # get list of cells based on session id + cluster id
  # add unique id for each cell to both data frames
  shuffled_df$unique_cell_id <- paste(shuffled_df$session_id, shuffled_df$cluster_id)
  unique_cells = unique(shuffled_df[c("unique_cell_id")])
  number_of_cells = nrow(unique_cells)
  print('Number of cells in spike-level shuffle data:')
  print(number_of_cells)
  
  # reformat shuffled data
  shuffled_b <- shuffled_df %>%
    select(unique_cell_id, shuffle_id, beaconed_r2_hb, beaconed_slope_hb, beaconed_p_val_hb) %>%
    rename(neuron = "shuffle_id", slope = "beaconed_slope_hb", r.squared = "beaconed_r2_hb", p.value = "beaconed_p_val_hb") %>%
    group_by(unique_cell_id) %>%
    nest()
  shuffled_nb <- shuffled_df %>%
    select(unique_cell_id, shuffle_id, non_beaconed_r2_hb, non_beaconed_slope_hb, non_beaconed_p_val_hb) %>%
    rename(neuron = "shuffle_id", slope = "non_beaconed_slope_hb", r.squared = "non_beaconed_r2_hb", p.value = "non_beaconed_p_val_hb") %>%
    group_by(unique_cell_id) %>%
    nest()
  shuffled_p <- shuffled_df %>%
    select(unique_cell_id, shuffle_id, probe_r2_hb, probe_slope_hb, probe_p_val_hb) %>%
    rename(neuron = "shuffle_id", slope = "probe_slope_hb", r.squared = "probe_r2_hb", p.value = "probe_p_val_hb") %>%
    group_by(unique_cell_id) %>%
    nest()
  
  
  # Provides a reference for cell IDs in the experimental data
  unique_cell_ids <- paste(df_in$session_id, df_in$cluster_id)
  
  # initialise
  shuffled_results_b <- shuffled_b %>% filter(unique_cell_id == unique_cell_ids[1])
  shuffled_results_nb <- shuffled_b %>% filter(unique_cell_id == unique_cell_ids[1])
  shuffled_results_p <- shuffled_b %>% filter(unique_cell_id == unique_cell_ids[1])
  
  # iterate on list of cells in the main datset
  for(i in 2:length(unique_cell_ids)) {
    # find the shuffled data that correspond to the current cell
    shuffled_results_b <- rbind(shuffled_results_b,
                                shuffled_b %>%filter(unique_cell_id == unique_cell_ids[i]))
    shuffled_results_nb <- rbind(shuffled_results_nb,
                                 shuffled_b %>% filter(unique_cell_id == unique_cell_ids[i]))
    shuffled_results_p <- rbind(shuffled_results_p,
                                shuffled_b %>% filter(unique_cell_id == unique_cell_ids[i]))
 
  }
  
  df <- tibble(unique_cell_id = shuffled_results_b$unique_cell_id,
               shuffle_results_b_h = shuffled_results_b$data,
               shuffled_results_nb_h = shuffled_results_nb$data,
               shuffled_results_p_h = shuffled_results_p$data)
  
  return(df)
}

```

```{r}
# To load circular shuffle results
if(shuffle_type=="circular") {
  cs_path = "data_in/all_mice_concatenated_shuffle_data_rewarded_unsmoothened.feather"
  spatial_firing_circ <- local_circ_shuffles_hb(spatial_firing, cs_path)
  
  spatial_firing <- spatial_firing %>%
  select(-contains('shuffle_results_b_h')) %>%
  select(-contains('shuffle_results_nb_h')) %>%
  select(-contains('shuffle_results_pb_h')) %>%
  mutate(shuffle_results_b_h = spatial_firing_circ$shuffle_results_b_h,
         shuffle_results_nb_h = spatial_firing_circ$shuffled_results_nb_h,
         shuffle_results_p_h = spatial_firing_circ$shuffled_results_p_h)
  # Remove unused frame
  rm(spatial_firing_circ)
}

# Check to see if the column shuffle_results_b_o exists.
# It will exist if shuffles have been pre-loaded or if circular shuffle results have been loaded.
# If it doesn't exist then the mean firing rate as a function of position will be shuffled.
if(!"shuffle_results_b_h" %in% colnames(spatial_firing)) {
  spatial_firing <- spatial_firing %>%
    mutate(shuffle_results_b_h = future_pmap(list(Rates_averaged_rewarded_b, 110, 170, shuffles), shuffle_rates)) %>%
    mutate(shuffle_results_nb_h = future_pmap(list(Rates_averaged_rewarded_nb, 110, 170, shuffles), shuffle_rates)) %>%
    mutate(shuffle_results_p_h = future_pmap(list(Rates_averaged_rewarded_p, 110, 170, shuffles), shuffle_rates))
}
```
```{r}
spatial_firing <- spatial_firing %>%
  unnest_wider(shuffle_results_b_h, names_sep = "_", names_repair = "universal")
```


### How much of the shuffled dataset is past criteria? 
1. Extract shuffled slopes and rsquared values. 
```{r}
shuff_slopes <- tibble(slopes = unlist(spatial_firing$shuffle_results_b_o_slope), 
                       r2 = unlist(spatial_firing$shuffle_results_b_o_r.squared), 
                       pval = unlist(spatial_firing$shuffle_results_b_o_p.value), 
                       min_slope = rep(spatial_firing$shuffle_min_slope_b_o, each = 1000), 
                       max_slope = rep(spatial_firing$shuffle_max_slope_b_o, each = 1000),
                       
                       slopes_h = unlist(spatial_firing$shuffle_results_b_h_slope), 
                       r2_h = unlist(spatial_firing$shuffle_results_b_h_r.squared), 
                       pval_h = unlist(spatial_firing$shuffle_results_b_h_p.value))
#shuff_slopes <- head(shuff_slopes, n =1000)         
```

2. Function to classify shuffled cells based on shuffled distribution
```{r}
shuff_slopes <- shuff_slopes %>%
  mutate(shuff_lm_group_b = pmap(list(min_slope,max_slope,slopes,pval),compare_slopes))

shuff_slopes <- shuff_slopes %>%
  mutate(shuff_lm_group_b_h = pmap(list(min_slope,max_slope,slopes_h,pval_h),compare_slopes))

shuff_slopes <- shuff_slopes %>%
  select(-contains('track_category')) %>%
  mutate(track_category = future_map2(shuff_lm_group_b, shuff_lm_group_b_h, mark_track_category),
         track_category_numeric = map2(shuff_lm_group_b, shuff_lm_group_b_h, mark_numeric_track_category))
```

2. Plot pre reward vs post reward slope for shuffled dataset
```{r}
position_shuff_slopes <- shuff_slopes %>% 
  filter(shuff_lm_group_b == "Positive" | shuff_lm_group_b == "Negative") %>%
  select(slopes, slopes_h,track_category, shuff_lm_group_b)


ggplot() + 
    geom_point(data=subset(position_shuff_slopes, track_category == "pospos" | track_category == "negneg"),
               aes(x = as.numeric(unlist(slopes)), 
                   y = as.numeric(unlist(slopes_h)), 
                   color=factor(unlist(shuff_lm_group_b))), alpha=0.8) +
    geom_point(data=subset(position_shuff_slopes, track_category == "posneg" | track_category == "negpos"),
               aes(x = as.numeric(unlist(slopes)), 
                   y = as.numeric(unlist(slopes_h)), 
                   color=factor(unlist(shuff_lm_group_b))), shape=2, alpha=0.8) +
    geom_point(data=subset(position_shuff_slopes, track_category == "posnon" | track_category == "negnon"),
               aes(x = as.numeric(unlist(slopes)), 
                   y = as.numeric(unlist(slopes_h)), 
                   color=factor(unlist(shuff_lm_group_b))), shape=3, alpha=0.8) +  
  
    geom_point(data=shuff_slopes %>% filter(shuff_lm_group_b == "Unclassified"),
               aes(x = as.numeric(unlist(slopes)), 
                   y = as.numeric(unlist(slopes_h)), 
                   color=factor(unlist(shuff_lm_group_b))), shape=4, alpha=0.8) +     
    coord_cartesian(ylim = c(-.45,.61), xlim = c(-.45,.45)) +
    geom_abline(intercept = 0, slope = 1, colour = "grey", linetype = "dashed") +
    geom_abline(intercept = 0, slope = -1, colour = "grey", linetype = "dashed") +
    xlab("Pre-reward slope") +
    ylab("Post-reward slope") +
    theme_classic() +
    scale_color_manual(values=c("violetred2", "chartreuse3", "grey81")) +
    theme(axis.text.x = element_text(size=18),
          axis.text.y = element_text(size=18),
          legend.position="bottom", 
          legend.title = element_blank(),
          text = element_text(size=17), 
          legend.text=element_text(size=16), 
          axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0))) 

if (save_figures == 1) {
 ggsave(file = "plots/shuffle_slope_comparison_reset.png", width = 4, height = 4) 
}

```


2. Plot histogram of slopes
```{r}
level_order <- c("Negative", "Positive", "Unclassified")

(shuffle_slopes_plot <- ggplot(data=shuff_slopes, aes(x = slopes, fill=factor(unlist(shuff_lm_group_b), level = level_order))) +
    coord_cartesian(xlim = c(-0.45,0.45), ylim = c(0, 0.4)) +
    geom_histogram(aes(y=..count../sum(..count..)), binwidth=0.01) +
    labs(x = "Slope (Hz/cm)") +
    ylab("Proportion") +
    scale_fill_manual(values=c("violetred2", "chartreuse3", "grey62")) +
    scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) +
    theme_classic() +
    theme(axis.text.x = element_text(size=16),
          axis.text.y = element_text(size=16),
          legend.title = element_blank(),
          legend.position = "none",
          text = element_text(size=16),
          legend.text=element_text(size=16),
          axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))))
if (save_figures == 1) {
  ggsave(file = "plots/Outbound_slope_histogram_shuffled.png", width = 4, height = 2)
}
```


3. Make stacked histogram of rsquared values for real dataset
```{r}
level_order <- c("Negative", "Positive", "Unclassified")

(exp_r2_plot <- ggplot(data=spatial_firing, aes(x= asr_b_o_rewarded_fit_r.squared, fill=factor(unlist(lm_group_b), level = level_order))) + 
  coord_cartesian(xlim = c(0,1), ylim = c(0,0.12)) +
  geom_histogram(aes(y=..count../sum(..count..)), binwidth=0.01) +
  ylab("Proportion") +
  xlab(expression(R^2)) +
  scale_fill_manual(values=c("violetred2", "chartreuse3", "grey62")) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) +
  theme_classic() +
  theme(axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=16),
        legend.title = element_blank(),
        legend.position = "none",
        text = element_text(size=16),
        legend.text=element_text(size=16),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))))

if (save_figures == 1) {
  ggsave(file = "plots/Outbound_rsquared_histogram_update.png", width = 4, height = 2)
}
```

4. Same as above but for shuffled datasets
```{r}
(shuffle_r2_plot <- ggplot(data=shuff_slopes, aes(x = r2, fill=factor(unlist(shuff_lm_group_b)))) +
    coord_cartesian(xlim = c(0,1), ylim = c(0,0.12)) +
    geom_histogram(aes(y=..count../sum(..count..)), binwidth=0.01) +
    xlab(expression(R^2)) +
    ylab("Proportion") +
    scale_fill_manual(values=c("violetred2", "chartreuse3", "grey62")) +
    scale_y_continuous(breaks = scales::pretty_breaks(n = 3)) +
    theme_classic() +
    theme(axis.text.x = element_text(size=16),
          axis.text.y = element_text(size=16),
          legend.position="none",
          legend.title = element_blank(),
          text = element_text(size=16),
          legend.text=element_text(size=16),
          axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))))

if (save_figures == 1) {
  ggsave(file = "plots/Outbound_rsquared_histogram_shuffled.png", width = 4, height = 2)
}
```


Make a figure
```{r}
cowplot::plot_grid(exp_slopes_plot, shuffle_slopes_plot, exp_r2_plot, shuffle_r2_plot, labels = c("A", "B", "C", "D"), label_size = 16)
ggsave(file = "plots/exp_vs_shuffle.jpg")
```


Look at p values too
```{r}
(exp_pval_plot <- ggplot(data=spatial_firing, aes(x= asr_b_o_rewarded_fit_p.value, fill=factor(unlist(lm_group_b), level = level_order))) + 
  coord_cartesian(xlim = c(0,1), ylim = c(0,0.7)) +
   geom_histogram(aes(y=..count../sum(..count..)),
                   binwidth=0.005) +
  ylab("Proportion") +
  xlab(expression(p)) +
  scale_fill_manual(values=c("violetred2", "chartreuse3", "grey62")) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 4)) +
  theme_classic() +
  theme(axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=16),
        legend.title = element_blank(),
        legend.position = "none",
        text = element_text(size=16),
        legend.text=element_text(size=16),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))))

```

```{r}
(shuffle_pval_plot <- ggplot(data=shuff_slopes, aes(x = pval, fill=factor(unlist(shuff_lm_group_b)))) +
    coord_cartesian(xlim = c(0,1), ylim = c(0,0.7)) +
    geom_histogram(aes(y=..count../sum(..count..)),
                   binwidth=0.005) +
    xlab(expression(p)) +
    ylab("Proportion") +
    scale_fill_manual(values=c("violetred2", "chartreuse3", "grey62")) +
    scale_y_continuous(breaks = scales::pretty_breaks(n = 3)) +
    theme_classic() +
    theme(axis.text.x = element_text(size=16),
          axis.text.y = element_text(size=16),
          legend.position="none",
          legend.title = element_blank(),
          text = element_text(size=16),
          legend.text=element_text(size=16),
          axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))))


```

```{r}
cowplot::plot_grid(exp_pval_plot, shuffle_pval_plot, labels = c("A", "B"), label_size = 16)
ggsave(file = "plots/exp_vs_shuffle_pvals.jpg")
```