iPA_template_completer.R

#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
#' Author:  Fabian Schwendinger                                <
#' Intended for interpretablePA R-package                      <
#' GitHub:  https://github.com/FSchwendinger/interpretablePA   <
#' Purpose: Automate interpretablePA results generation based  <
#'          on GGIR part 2 output.                             <
#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

#' INTRODUCTION
#' Use this script to create a csv file that can be used as input for 
#' the interpretablePA R-package. The interpretablePA package will process
#' this csv to produce another csv file that allows you to compare your 
#' accelerometer data (average acceleration and intensity gradient) against 
#' our reference values, which are specific to age and sex. 
#' 
#' The output will be presented in terms of percentages of the predicted 
#' values and Z-scores. These metrics are useful for assessing whether an 
#' individual's physical activity level is within the expected range 
#' compared to individuals of similar age and sex in the general population.
#' 
#' After running the full script, the interpretablePA Shiny app will be run.
#' User should then go to '1) User data' and 'Cohort-level data (raw)'
#' and upload the csv file generated above. The csv file will be stored in 
#' the working directory as 'interpretablePA_template_DATE.csv'.



#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
# USER INPUT SECTION
#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

# DETAILS ON DATA PREPARATION
#' To derive metrics relative to age- and sex-specific reference values, 
#' interpretablePA needs the age and sex for each ID from your 
#' part2_summary.csv file, which is produced by GGIR. Ensure that the IDs 
#' match across both files. Specify the path to your file containing
#' the ID, age, and sex information. Also, provide the corresponding 
#' column names in your data file to ensure accurate data processing. 
#' The subject characteristics file may be in csv or xlsx/xls format.
 
# Set working directory
setwd("EXAMPLE_PATH")

# Path to your subject characteristics file 
dat_path <- "EXAMPLE_PATH/data/subject_characteristics.csv" 
col_name_id <- "ID"     # Variable name of ID column
col_name_sex <- "sex"   # Variable name of sex column
col_name_age <- "age"   # Variable name of age column

# Specify in the following how male and female is coded in your file
user_sex_male <- "0"    # encoding of male sex
user_sex_female <- "1"  # encoding of female sex

# Path to your part2_summary.csv file generated by GGIR
part_2_path <- "EXAMPLE_PATH/data/part2_summary.csv"

#' No more user input is required beyond this line. After providing
#' the information above, you can use the 'Source button' to run the full script.






#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
# DO NOT MODIFY!!!
#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

# Function to install and load packages if they are not already loaded
install_and_load <- function(package) {
  if (!require(package, character.only = TRUE)) {
    install.packages(package)
    library(package, character.only = TRUE)
  }
}

# Load necessary packages
install_and_load("dplyr")
install_and_load("readxl")

# Generate template for interpretablePA
temp <- data.frame(
  ID = character(),
  sex = factor(levels = c("m", "f")),
  age = numeric()
)

# Determine the file type based on the extension
file_extension <- tolower(tools::file_ext(dat_path))

# Read subject characteristics data based on file type
if (file_extension %in% c("csv")) {
  subject_data <- read.csv(dat_path, stringsAsFactors = FALSE)
} else if (file_extension %in% c("xls", "xlsx")) {
  subject_data <- readxl::read_excel(dat_path)
} else {
  stop("Unsupported file type.")
}

# Rename and transform subject_data to match the template structure
subject_data <- within(subject_data, {
  ID <- as.character(get(col_name_id))
  sex <-
    factor(
      get(col_name_sex),
      levels = c(user_sex_male, user_sex_female),
      labels = c("m", "f")
    )
  age <- as.numeric(get(col_name_age))
})

# Subset subject_data to keep only needed columns
subject_data <-
  data.frame(ID = subject_data$ID,
             sex = subject_data$sex,
             age = subject_data$age)

# Combine the loaded and formatted data with the template
temp <- rbind(temp, subject_data)

# Read and preprocess GGIR part 2 data
part_2 <- read.csv(part_2_path, stringsAsFactors = FALSE)
part_2 <- within(part_2, {
  ID <- trimws(ID)
  avacc <- as.numeric(AD_mean_ENMO_mg_0.24hr)
  ig <- as.numeric(AD_ig_gradient_ENMO_0.24hr)
})

# Keep only the needed columns from part_2
part_2 <- part_2[c("ID", "avacc", "ig")]

# Merge temp with part_2 data
temp <- merge(temp, part_2, by = "ID")

# Write the combined data frame to a csv file
write.csv(temp,
          sprintf("interpretablePA_template_%s.csv", Sys.Date()),
          row.names = FALSE)

# Install the interpretablePA package from GitHub if not already installed
if (!require("interpretablePA", character.only = TRUE)) {
  install.packages("remotes")
  remotes::install_github("FSchwendinger/interpretablePA")
  library(interpretablePA, character.only = TRUE)
}

# Load interpretablePA and prepare for analysis
library(interpretablePA)
interpret.pa() #This will call the interpretablePA Shiny app

#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
# END.
#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<