diff --git a/data/ALSECYPIAMH_WU_2022.r b/data/ALSECYPIAMH_WU_2022.r new file mode 100644 index 0000000..f9c37cd --- /dev/null +++ b/data/ALSECYPIAMH_WU_2022.r @@ -0,0 +1,157 @@ +Data: https://osf.io/jqzbx/ + Paper: + library(haven) +library(dplyr) +library(tidyr) +library(openxlsx) +library(readr) +library(readxl) +library(sas7bdat) + +remove_na <- function(df) { + df <- df[!(rowSums(is.na(df[, -which(names(df) %in% c("id"))])) == (ncol(df) - 1)), ] + return(df) +} + + +preStudy_df <- read_sav("CPS Pre-Study.sav") +study1 <- read_sav("CPS Study 1.sav") +study2 <- read_sav("CPS Study 2.sav") + +preStudy_df [] <- lapply(preStudy_df, function(col) { # Remove column labels for each column + attr(col, "label") <- NULL + return(col) +}) + +study1[] <- lapply(study1, function(col) { # Remove column labels for each column + attr(col, "label") <- NULL + return(col) +}) + +study2[] <- lapply(study2, function(col) { # Remove column labels for each column + attr(col, "label") <- NULL + return(col) +}) + +preStudy_df <- preStudy_df|> + rename(id=PID) +study1 <- study1|> + rename(id=PID) +study2 <- study2|> + rename(id=PID) + +# ---------- Process CPS Datasets ---------- +CPS_PRE_df <- preStudy_df |> + select(starts_with("CPS"), id) +colnames(CPS_PRE_df) <- c("CPS_BS1", "CPS_BS2", "CPS_BS3", "CPS_BS4", "CPS_GD1", "CPS_GD2", "CPS_GD3", "CPS_GD4", "CPS_M1", "CPS_M2", "CPS_M3", "CPS_M4", "id") +CPS_PRE_df <- remove_na(CPS_PRE_df) +CPS_PRE_df <- pivot_longer(CPS_PRE_df, cols=-c(id), names_to="item", values_to="resp") + +CPS_Study1_df <- study1 |> + select(starts_with("CPS"), id) +CPS_Study1_df <- remove_na(CPS_Study1_df) +CPS_Study1_df <- pivot_longer(CPS_Study1_df, cols=-c(id), names_to="item", values_to="resp") + +CPS_Study2_df <- study2 |> + select(starts_with("CPS"), id) +CPS_Study2_df <- remove_na(CPS_Study2_df) +CPS_Study2_df <- CPS_Study2_df |> + select(-CPS, -CPS_M, -CPS_GD, -CPS_BTS) +CPS_Study2_df <- pivot_longer(CPS_Study2_df, cols=-c(id), names_to="item", values_to="resp") + + +CPS_PRE_df $ group <- "pre_study" +CPS_Study1_df $ group <- "study1" +CPS_Study2_df $ group <- "study2" + +CPS_df <- rbind(CPS_PRE_df,CPS_Study1_df,CPS_Study2_df ) + +save(CPS_df, file="ALSECYPIAMH_WU_2022_CPS.Rdata") +write.csv(CPS_df, "ALSECYPIAMH_WU_2022_CPS.csv", row.names=FALSE) + +# ---------- Process SDQ Datasets ---------- +SDQ_df <- study2 |> + select(starts_with("SDQ"), id) +SDQ_df <- remove_na(SDQ_df) +SDQ_df <- pivot_longer(SDQ_df, cols=-c(id), names_to="item", values_to="resp") + +save(SDQ_df, file="ALSECYPIAMH_WU_2022_SDQ.Rdata") +write.csv(SDQ_df, "ALSECYPIAMH_WU_2022_SDQ.csv", row.names=FALSE) + +# ---------- Process SWEMBS Datasets ---------- +SWEMWBS_df <- study2 |> + select(starts_with("SWEMWBS"), id, -SWEMWBS) +SWEMWBS_df <- remove_na(SWEMWBS_df) +SWEMWBS_df <- pivot_longer(SWEMWBS_df, cols=-c(id), names_to="item", values_to="resp") + +save(SWEMWBS_df, file="ALSECYPIAMH_WU_2022_SWEMWBS.Rdata") +write.csv(SWEMWBS_df, "ALSECYPIAMH_WU_2022_SWEMWBS.csv", row.names=FALSE) + +# ---------- Process SWLS Datasets ---------- +SWLS_df <- study2 |> + select(starts_with("SWLS"), id, -SWLS) +SWLS_df <- remove_na(SWLS_df) +SWLS_df <- pivot_longer(SWLS_df, cols=-c(id), names_to="item", values_to="resp") + +save(SWLS_df, file="ALSECYPIAMH_WU_2022_SWLS.Rdata") +write.csv(SWLS_df, "ALSECYPIAMH_WU_2022_SWLS.csv", row.names=FALSE) + +# ---------- Process PEI Datasets ---------- +PEI_df <- study2 |> + select(starts_with("PEI"), id) +PEI_df <- remove_na(PEI_df) +PEI_df <- PEI_df |> + select(-PEI) +PEI_df <- pivot_longer(PEI_df, cols=-c(id), names_to="item", values_to="resp") + +save(PEI_df, file="ALSECYPIAMH_WU_2022_PEI.Rdata") +write.csv(PEI_df, "ALSECYPIAMH_WU_2022_PEI.csv", row.names=FALSE) + +# ---------- Process NEI Datasets ---------- +NEI_df <- study2 |> + select(starts_with("NEI"), id) +NEI_df <- remove_na(NEI_df) +NEI_df <- NEI_df |> + select(-NEI) +NEI_df <- pivot_longer(NEI_df, cols=-c(id), names_to="item", values_to="resp") + +save(NEI_df, file="ALSECYPIAMH_WU_2022_NEI.Rdata") +write.csv(NEI_df, "ALSECYPIAMH_WU_2022_NEI.csv", row.names=FALSE) + +# ---------- Process PHQ Datasets ---------- +PHQ_df <- study2 |> + select(starts_with("PHQ"), id) +PHQ_df <- remove_na(PHQ_df) +PHQ_df <- pivot_longer(PHQ_df, cols=-c(id), names_to="item", values_to="resp") + +save(PHQ_df, file="ALSECYPIAMH_WU_2022_PHQ.Rdata") +write.csv(PHQ_df, "ALSECYPIAMH_WU_2022_PHQ.csv", row.names=FALSE) + +# ---------- Process Empathy Datasets ---------- +Empathy_df <- study2 |> + select(starts_with("Empathy"), id, -ends_with("r"),-Empathy) +Empathy_df <- remove_na(Empathy_df) +Empathy_df <- pivot_longer(Empathy_df, cols=-c(id), names_to="item", values_to="resp") + +save(Empathy_df, file="ALSECYPIAMH_WU_2022_Empathy.Rdata") +write.csv(Empathy_df, "ALSECYPIAMH_WU_2022_Empathy.csv", row.names=FALSE) + +# ---------- Process MIL Datasets ---------- +MIL_df <- study2 |> + select(starts_with("MIL"), id) +MIL_df <- remove_na(MIL_df) +MIL_df <- pivot_longer(MIL_df, cols=-c(id), names_to="item", values_to="resp") + +save(MIL_df, file="ALSECYPIAMH_WU_2022_MIL.Rdata") +write.csv(MIL_df, "ALSECYPIAMH_WU_2022_MIL.csv", row.names=FALSE) + +# ---------- Process PIL Datasets ---------- +PIL_df <- study2 |> + select(starts_with("PIL"), id) +PIL_df <- remove_na(PIL_df) +PIL_df <- PIL_df |> + select(-PIL) +PIL_df <- pivot_longer(PIL_df, cols=-c(id), names_to="item", values_to="resp") + +save(PIL_df, file="ALSECYPIAMH_WU_2022_PIL.Rdata") +write.csv(PIL_df, "ALSECYPIAMH_WU_2022_PIL.csv", row.names=FALSE) \ No newline at end of file diff --git a/data/CHEXI_Lin_2019.r b/data/CHEXI_Lin_2019.r new file mode 100644 index 0000000..f6fa0cd --- /dev/null +++ b/data/CHEXI_Lin_2019.r @@ -0,0 +1,17 @@ +# Data: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/YRLEAY +library(haven) +library(dplyr) +library(tidyr) +library(openxlsx) +library(readxl) + +df <- read_excel("no_Names_coding-0623_forupload.xlsx", sheet = "data") +df$id <- seq_len(nrow(df)) +df <- df |> + select(id, starts_with("item")) +df[df == 9999] <- NA +df <- df[!apply(df[, -which(names(df) == "id")], 1, function(row) all(is.na(row))), ] +df <- pivot_longer(df, col=-id, values_to="resp", names_to="item") + +save(df, file="CHEXI_Lin_2019.Rdata") +write.csv(df, "CHEXI_Lin_2019.csv", row.names=FALSE) diff --git a/data/Ellipse_Corssley_2024.r b/data/Ellipse_Corssley_2024.r new file mode 100644 index 0000000..5a0906f --- /dev/null +++ b/data/Ellipse_Corssley_2024.r @@ -0,0 +1,37 @@ +# Data: https://github.com/scrosseye/ELLIPSE-Corpus +library(haven) +library(dplyr) +library(tidyr) + +df <- read.csv("ellipsis_raw_rater_scores_anon_all_essay.csv") +df <- df |> + rename(id=Filename) |> + select(-Text) +df1 <- df |> + select(id, ends_with("1")) |> + df2 <- df |> + select(id, ends_with("2")) + +colnames(df1) <- ifelse( + colnames(df1) == "id", + colnames(df1), + gsub("_1$", "", colnames(df1)) +) +colnames(df2) <- ifelse( + colnames(df2) == "id", + colnames(df2), + gsub("_2$", "", colnames(df2)) +) + +df1 <- df1 |> + rename(rater=Rater) +df2 <- df2 |> + rename(rater=Rater) + +df1 <- pivot_longer(df1, cols=-c(id, rater), names_to="item", values_to="resp") +df2 <- pivot_longer(df2, cols=-c(id, rater), names_to="item", values_to="resp") + +final_df <- rbind(df1, df2) + +save(final_df, file="Ellipse_Corssley_2024.Rdata") +write.csv(final_df, "Ellipse_Corssley_2024.csv", row.names=FALSE) \ No newline at end of file diff --git a/data/MGSISGCLQ_Hollyhead_2018.r b/data/MGSISGCLQ_Hollyhead_2018.r new file mode 100644 index 0000000..44604dc --- /dev/null +++ b/data/MGSISGCLQ_Hollyhead_2018.r @@ -0,0 +1,41 @@ +# Data: https://osf.io/5vhju/ +# Paper: +library(haven) +library(dplyr) +library(tidyr) +library(openxlsx) + +df <- read.xlsx("MGSIS-5 and GCLQ Data.xlsx") +df <- df[, colSums(!is.na(df)) > 0] +df$id <- seq_len(nrow(df)) +df <- df |> + select(-Start.time, -Completion.time, -Email, -`Are.you.a.resident.of.the.UK?`, -`How.old.are.you?`) +df <- df[!apply(df[, -which(names(df) == "id")], 1, function(row) all(is.na(row))), ] + +mgsis_df <- df |> + select(id, starts_with("I"), `Do.you.have.a.medical.condition.which.affects.your.lower.body.or.could.impact.how.you.feel.about.your.genitals?`) |> + rename(cov_medical_condition=`Do.you.have.a.medical.condition.which.affects.your.lower.body.or.could.impact.how.you.feel.about.your.genitals?`) +GCLQ_df <- df |> + select(-starts_with("I"), id) |> + rename(cov_medical_condition=`Do.you.have.a.medical.condition.which.affects.your.lower.body.or.could.impact.how.you.feel.about.your.genitals?`) + +GCLQ_df[] <- lapply(GCLQ_df, function(x) ifelse(x == "Yes", 1, ifelse(x == "No", 0, x))) +GCLQ_df <- GCLQ_df %>% + mutate(across(everything(), ~ as.numeric(as.character(.)))) +GCLQ_df <- pivot_longer(GCLQ_df, cols=-c(id, cov_medical_condition), names_to="item", values_to="resp") + +likert_map <- c( + "Strongly Disagree" = 1, + "Disagree" = 2, + "Agree" = 3, + "Strongly Agree" = 4 +) + +mgsis_df <- pivot_longer(mgsis_df, cols=-c(id, cov_medical_condition), names_to="item", values_to="resp") +mgsis_df$resp <- likert_map[mgsis_df$resp] + +final_df <- rbind(mgsis_df, GCLQ_df) +final_df$cov_medical_condition <- as.numeric(lapply(final_df$cov_medical_condition, function(x) ifelse(x == "Yes", 1, ifelse(x == "No", 0, x)))) + +save(final_df, file="MGSISGCLQ_Hollyhead_2018.Rdata") +write.csv(final_df, "MGSISGCLQ_Hollyhead_2018.csv", row.names=FALSE) \ No newline at end of file diff --git a/data/MMF_Solis_2020.r b/data/MMF_Solis_2020.r new file mode 100644 index 0000000..dedc14f --- /dev/null +++ b/data/MMF_Solis_2020.r @@ -0,0 +1,17 @@ +# Paper: https://www.cambridge.org/core/journals/british-journal-of-political-science/article/measuring-media-freedom-an-item-response-theory-analysis-of-existing-indicators/4A6D5AE5E6F4E78D0642BFF882C1FBF6 +# Data: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/ENOEQS +# Issue: https://github.com/ben-domingue/irw/issues/599 + +library(haven) +library(dplyr) +library(tidyr) + +df <- read.csv("irt_full.csv") +df$id <- seq_len(nrow(df)) +df <- df |> + select(-cow, -year) +df <- pivot_longer(df, cols=-id, names_to="item", values_to="resp") +df <- na.omit(df) + +save(df, file="MMF_Solis_2020.Rdata") +write.csv(df, "MMF_Solis_2020.csv", row.names=FALSE) \ No newline at end of file diff --git a/data/PPSRS_Caliciuri_2024.r b/data/PPSRS_Caliciuri_2024.r new file mode 100644 index 0000000..e329699 --- /dev/null +++ b/data/PPSRS_Caliciuri_2024.r @@ -0,0 +1,15 @@ +# Poster: https://www.researchgate.net/profile/Rossella-Caliciuri/publication/382182050_Psychometric_Properties_of_the_Scientific_Reasoning_Scale/links/6690f2a1b15ba55907539c5a/Psychometric-Properties-of-the-Scientific-Reasoning-Scale.pdf +# Data: https://osf.io/jk9dp/ +library(haven) +library(dplyr) +library(tidyr) +library(openxlsx) + +df <- read_sav("3. CFA&IRT_SRS (n=337).sav") +df <- df |> + select(ID, starts_with("SRS")) |> + rename(id=ID) +df <- pivot_longer(df, cols=-id, names_to = "item", values_to="resp") + +save(df, file="PPSRS_Caliciuri_2024.Rdata") +write.csv(df, "PPSRS_Caliciuri_2024.csv", row.names=FALSE) \ No newline at end of file diff --git a/data/SBD_Smith_2020.r b/data/SBD_Smith_2020.r new file mode 100644 index 0000000..f7b234d --- /dev/null +++ b/data/SBD_Smith_2020.r @@ -0,0 +1,16 @@ +# Paper: https://www.sciencedirect.com/science/article/pii/S0165032719302046 +# Data: https://osf.io/c4v7g/ +library(haven) +library(dplyr) +library(tidyr) +library(openxlsx) + +df <- read.table("bpses_pre_for_factor.dat", header = TRUE, sep = ",", stringsAsFactors = FALSE) +colnames(df) <- paste0("MBDS", 1:22) +df$id <- seq_len(nrow(df)) +df <- pivot_longer(df, cols=-id, values_to = "resp", names_to = "item") +df$resp[df$resp == 999] <- NA + +save(df, file="SBD_Smith_2020.Rdata") +write.csv(df, "SBD_Smith_2020.csv", row.names=FALSE) + diff --git a/data/SCS_Suh_2023.r b/data/SCS_Suh_2023.r new file mode 100644 index 0000000..daa303d --- /dev/null +++ b/data/SCS_Suh_2023.r @@ -0,0 +1,59 @@ +# Data: https://osf.io/wsjkb/ +# Paper: https://link.springer.com/article/10.1007/s41811-024-00214-3 +library(haven) +library(dplyr) +library(tidyr) + +ea_df <- read_sav("SCS EA dataset.sav") +korea_df <- read_sav("SCS Korean dataset.sav") + +ea_df <- ea_df |> + rename(id=ID) +korea_df <- korea_df |> + rename(id=ID) + +# ---------- Process SCS Dataset ---------- +ea_scs <- ea_df |> + select(id, starts_with("SCS"), -starts_with("SCS_Short")) +ea_scs <- pivot_longer(ea_scs, cols=-id, names_to="item", values_to="resp") +ea_scs$group <- "US" + +korea_scs <- korea_df |> + select(id, starts_with("SCS")) +korea_scs <- pivot_longer(korea_scs, cols=-id, names_to="item", values_to="resp") +korea_scs$group <- "Korea" + +scs_df <- rbind(ea_scs, korea_scs) + +save(scs_df, file="SCS_Suh_2023_SCS.Rdata") +write.csv(scs_df, "SCS_Suh_2023_SCS.csv", row.names=FALSE) + +# ---------- Process SIAPS Dataset ---------- +ea_siaps <- ea_df |> + select(id, starts_with("SIAPS"), -ends_with("F2"), -ends_with("F1")) +ea_siaps[ea_siaps == 999] <- NA +ea_siaps <- pivot_longer(ea_siaps, cols=-id, names_to="item", values_to="resp") + +save(ea_siaps, file="SCS_Suh_2023_SIAPS.Rdata") +write.csv(ea_siaps, "SCS_Suh_2023_SIAPS.csv", row.names=FALSE) + +# ---------- Process BFNE Dataset ---------- +ea_bfne <- ea_df |> + select(id, starts_with("BFNE"), -ends_with("T")) +ea_bfne[ea_bfne == 999] <- NA +ea_bfne <- pivot_longer(ea_bfne, cols=-id, names_to="item", values_to="resp") +ea_bfne <- ea_bfne[!is.na(ea_bfne$resp),] +ea_bfne$group <- "US" + +korea_bfne <- korea_df |> + select(id, starts_with("BFNE"), -BFNE) +korea_bfne[korea_bfne == 999] <- NA +korea_bfne[korea_bfne == 0] <- NA +korea_bfne <- pivot_longer(korea_bfne, cols=-id, names_to="item", values_to="resp") +korea_bfne <- korea_bfne[!is.na(korea_bfne$resp),] +korea_bfne$group <- "Korea" + +bfne_df <- rbind(korea_bfne, ea_bfne) + +save(bfne_df, file="SCS_Suh_2023_BFNE.Rdata") +write.csv(bfne_df, "SCS_Suh_2023_BFNE.csv", row.names=FALSE) \ No newline at end of file