Skip to content

Commit

Permalink
Merge branch 'main' of github.com:ben-domingue/irw into main
Browse files Browse the repository at this point in the history
  • Loading branch information
ben-domingue committed Nov 28, 2024
2 parents 27696fd + 633e4f9 commit f818ec7
Show file tree
Hide file tree
Showing 8 changed files with 359 additions and 0 deletions.
157 changes: 157 additions & 0 deletions data/ALSECYPIAMH_WU_2022.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
Data: https://osf.io/jqzbx/
Paper:
library(haven)
library(dplyr)
library(tidyr)
library(openxlsx)
library(readr)
library(readxl)
library(sas7bdat)

remove_na <- function(df) {
df <- df[!(rowSums(is.na(df[, -which(names(df) %in% c("id"))])) == (ncol(df) - 1)), ]
return(df)
}


preStudy_df <- read_sav("CPS Pre-Study.sav")
study1 <- read_sav("CPS Study 1.sav")
study2 <- read_sav("CPS Study 2.sav")

preStudy_df [] <- lapply(preStudy_df, function(col) { # Remove column labels for each column
attr(col, "label") <- NULL
return(col)
})

study1[] <- lapply(study1, function(col) { # Remove column labels for each column
attr(col, "label") <- NULL
return(col)
})

study2[] <- lapply(study2, function(col) { # Remove column labels for each column
attr(col, "label") <- NULL
return(col)
})

preStudy_df <- preStudy_df|>
rename(id=PID)
study1 <- study1|>
rename(id=PID)
study2 <- study2|>
rename(id=PID)

# ---------- Process CPS Datasets ----------
CPS_PRE_df <- preStudy_df |>
select(starts_with("CPS"), id)
colnames(CPS_PRE_df) <- c("CPS_BS1", "CPS_BS2", "CPS_BS3", "CPS_BS4", "CPS_GD1", "CPS_GD2", "CPS_GD3", "CPS_GD4", "CPS_M1", "CPS_M2", "CPS_M3", "CPS_M4", "id")
CPS_PRE_df <- remove_na(CPS_PRE_df)
CPS_PRE_df <- pivot_longer(CPS_PRE_df, cols=-c(id), names_to="item", values_to="resp")

CPS_Study1_df <- study1 |>
select(starts_with("CPS"), id)
CPS_Study1_df <- remove_na(CPS_Study1_df)
CPS_Study1_df <- pivot_longer(CPS_Study1_df, cols=-c(id), names_to="item", values_to="resp")

CPS_Study2_df <- study2 |>
select(starts_with("CPS"), id)
CPS_Study2_df <- remove_na(CPS_Study2_df)
CPS_Study2_df <- CPS_Study2_df |>
select(-CPS, -CPS_M, -CPS_GD, -CPS_BTS)
CPS_Study2_df <- pivot_longer(CPS_Study2_df, cols=-c(id), names_to="item", values_to="resp")


CPS_PRE_df $ group <- "pre_study"
CPS_Study1_df $ group <- "study1"
CPS_Study2_df $ group <- "study2"

CPS_df <- rbind(CPS_PRE_df,CPS_Study1_df,CPS_Study2_df )

save(CPS_df, file="ALSECYPIAMH_WU_2022_CPS.Rdata")
write.csv(CPS_df, "ALSECYPIAMH_WU_2022_CPS.csv", row.names=FALSE)

# ---------- Process SDQ Datasets ----------
SDQ_df <- study2 |>
select(starts_with("SDQ"), id)
SDQ_df <- remove_na(SDQ_df)
SDQ_df <- pivot_longer(SDQ_df, cols=-c(id), names_to="item", values_to="resp")

save(SDQ_df, file="ALSECYPIAMH_WU_2022_SDQ.Rdata")
write.csv(SDQ_df, "ALSECYPIAMH_WU_2022_SDQ.csv", row.names=FALSE)

# ---------- Process SWEMBS Datasets ----------
SWEMWBS_df <- study2 |>
select(starts_with("SWEMWBS"), id, -SWEMWBS)
SWEMWBS_df <- remove_na(SWEMWBS_df)
SWEMWBS_df <- pivot_longer(SWEMWBS_df, cols=-c(id), names_to="item", values_to="resp")

save(SWEMWBS_df, file="ALSECYPIAMH_WU_2022_SWEMWBS.Rdata")
write.csv(SWEMWBS_df, "ALSECYPIAMH_WU_2022_SWEMWBS.csv", row.names=FALSE)

# ---------- Process SWLS Datasets ----------
SWLS_df <- study2 |>
select(starts_with("SWLS"), id, -SWLS)
SWLS_df <- remove_na(SWLS_df)
SWLS_df <- pivot_longer(SWLS_df, cols=-c(id), names_to="item", values_to="resp")

save(SWLS_df, file="ALSECYPIAMH_WU_2022_SWLS.Rdata")
write.csv(SWLS_df, "ALSECYPIAMH_WU_2022_SWLS.csv", row.names=FALSE)

# ---------- Process PEI Datasets ----------
PEI_df <- study2 |>
select(starts_with("PEI"), id)
PEI_df <- remove_na(PEI_df)
PEI_df <- PEI_df |>
select(-PEI)
PEI_df <- pivot_longer(PEI_df, cols=-c(id), names_to="item", values_to="resp")

save(PEI_df, file="ALSECYPIAMH_WU_2022_PEI.Rdata")
write.csv(PEI_df, "ALSECYPIAMH_WU_2022_PEI.csv", row.names=FALSE)

# ---------- Process NEI Datasets ----------
NEI_df <- study2 |>
select(starts_with("NEI"), id)
NEI_df <- remove_na(NEI_df)
NEI_df <- NEI_df |>
select(-NEI)
NEI_df <- pivot_longer(NEI_df, cols=-c(id), names_to="item", values_to="resp")

save(NEI_df, file="ALSECYPIAMH_WU_2022_NEI.Rdata")
write.csv(NEI_df, "ALSECYPIAMH_WU_2022_NEI.csv", row.names=FALSE)

# ---------- Process PHQ Datasets ----------
PHQ_df <- study2 |>
select(starts_with("PHQ"), id)
PHQ_df <- remove_na(PHQ_df)
PHQ_df <- pivot_longer(PHQ_df, cols=-c(id), names_to="item", values_to="resp")

save(PHQ_df, file="ALSECYPIAMH_WU_2022_PHQ.Rdata")
write.csv(PHQ_df, "ALSECYPIAMH_WU_2022_PHQ.csv", row.names=FALSE)

# ---------- Process Empathy Datasets ----------
Empathy_df <- study2 |>
select(starts_with("Empathy"), id, -ends_with("r"),-Empathy)
Empathy_df <- remove_na(Empathy_df)
Empathy_df <- pivot_longer(Empathy_df, cols=-c(id), names_to="item", values_to="resp")

save(Empathy_df, file="ALSECYPIAMH_WU_2022_Empathy.Rdata")
write.csv(Empathy_df, "ALSECYPIAMH_WU_2022_Empathy.csv", row.names=FALSE)

# ---------- Process MIL Datasets ----------
MIL_df <- study2 |>
select(starts_with("MIL"), id)
MIL_df <- remove_na(MIL_df)
MIL_df <- pivot_longer(MIL_df, cols=-c(id), names_to="item", values_to="resp")

save(MIL_df, file="ALSECYPIAMH_WU_2022_MIL.Rdata")
write.csv(MIL_df, "ALSECYPIAMH_WU_2022_MIL.csv", row.names=FALSE)

# ---------- Process PIL Datasets ----------
PIL_df <- study2 |>
select(starts_with("PIL"), id)
PIL_df <- remove_na(PIL_df)
PIL_df <- PIL_df |>
select(-PIL)
PIL_df <- pivot_longer(PIL_df, cols=-c(id), names_to="item", values_to="resp")

save(PIL_df, file="ALSECYPIAMH_WU_2022_PIL.Rdata")
write.csv(PIL_df, "ALSECYPIAMH_WU_2022_PIL.csv", row.names=FALSE)
17 changes: 17 additions & 0 deletions data/CHEXI_Lin_2019.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Data: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/YRLEAY
library(haven)
library(dplyr)
library(tidyr)
library(openxlsx)
library(readxl)

df <- read_excel("no_Names_coding-0623_forupload.xlsx", sheet = "data")
df$id <- seq_len(nrow(df))
df <- df |>
select(id, starts_with("item"))
df[df == 9999] <- NA
df <- df[!apply(df[, -which(names(df) == "id")], 1, function(row) all(is.na(row))), ]
df <- pivot_longer(df, col=-id, values_to="resp", names_to="item")

save(df, file="CHEXI_Lin_2019.Rdata")
write.csv(df, "CHEXI_Lin_2019.csv", row.names=FALSE)
37 changes: 37 additions & 0 deletions data/Ellipse_Corssley_2024.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Data: https://github.com/scrosseye/ELLIPSE-Corpus
library(haven)
library(dplyr)
library(tidyr)

df <- read.csv("ellipsis_raw_rater_scores_anon_all_essay.csv")
df <- df |>
rename(id=Filename) |>
select(-Text)
df1 <- df |>
select(id, ends_with("1")) |>
df2 <- df |>
select(id, ends_with("2"))

colnames(df1) <- ifelse(
colnames(df1) == "id",
colnames(df1),
gsub("_1$", "", colnames(df1))
)
colnames(df2) <- ifelse(
colnames(df2) == "id",
colnames(df2),
gsub("_2$", "", colnames(df2))
)

df1 <- df1 |>
rename(rater=Rater)
df2 <- df2 |>
rename(rater=Rater)

df1 <- pivot_longer(df1, cols=-c(id, rater), names_to="item", values_to="resp")
df2 <- pivot_longer(df2, cols=-c(id, rater), names_to="item", values_to="resp")

final_df <- rbind(df1, df2)

save(final_df, file="Ellipse_Corssley_2024.Rdata")
write.csv(final_df, "Ellipse_Corssley_2024.csv", row.names=FALSE)
41 changes: 41 additions & 0 deletions data/MGSISGCLQ_Hollyhead_2018.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Data: https://osf.io/5vhju/
# Paper:
library(haven)
library(dplyr)
library(tidyr)
library(openxlsx)

df <- read.xlsx("MGSIS-5 and GCLQ Data.xlsx")
df <- df[, colSums(!is.na(df)) > 0]
df$id <- seq_len(nrow(df))
df <- df |>
select(-Start.time, -Completion.time, -Email, -`Are.you.a.resident.of.the.UK?`, -`How.old.are.you?`)
df <- df[!apply(df[, -which(names(df) == "id")], 1, function(row) all(is.na(row))), ]

mgsis_df <- df |>
select(id, starts_with("I"), `Do.you.have.a.medical.condition.which.affects.your.lower.body.or.could.impact.how.you.feel.about.your.genitals?`) |>
rename(cov_medical_condition=`Do.you.have.a.medical.condition.which.affects.your.lower.body.or.could.impact.how.you.feel.about.your.genitals?`)
GCLQ_df <- df |>
select(-starts_with("I"), id) |>
rename(cov_medical_condition=`Do.you.have.a.medical.condition.which.affects.your.lower.body.or.could.impact.how.you.feel.about.your.genitals?`)

GCLQ_df[] <- lapply(GCLQ_df, function(x) ifelse(x == "Yes", 1, ifelse(x == "No", 0, x)))
GCLQ_df <- GCLQ_df %>%
mutate(across(everything(), ~ as.numeric(as.character(.))))
GCLQ_df <- pivot_longer(GCLQ_df, cols=-c(id, cov_medical_condition), names_to="item", values_to="resp")

likert_map <- c(
"Strongly Disagree" = 1,
"Disagree" = 2,
"Agree" = 3,
"Strongly Agree" = 4
)

mgsis_df <- pivot_longer(mgsis_df, cols=-c(id, cov_medical_condition), names_to="item", values_to="resp")
mgsis_df$resp <- likert_map[mgsis_df$resp]

final_df <- rbind(mgsis_df, GCLQ_df)
final_df$cov_medical_condition <- as.numeric(lapply(final_df$cov_medical_condition, function(x) ifelse(x == "Yes", 1, ifelse(x == "No", 0, x))))

save(final_df, file="MGSISGCLQ_Hollyhead_2018.Rdata")
write.csv(final_df, "MGSISGCLQ_Hollyhead_2018.csv", row.names=FALSE)
17 changes: 17 additions & 0 deletions data/MMF_Solis_2020.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Paper: https://www.cambridge.org/core/journals/british-journal-of-political-science/article/measuring-media-freedom-an-item-response-theory-analysis-of-existing-indicators/4A6D5AE5E6F4E78D0642BFF882C1FBF6
# Data: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/ENOEQS
# Issue: https://github.com/ben-domingue/irw/issues/599

library(haven)
library(dplyr)
library(tidyr)

df <- read.csv("irt_full.csv")
df$id <- seq_len(nrow(df))
df <- df |>
select(-cow, -year)
df <- pivot_longer(df, cols=-id, names_to="item", values_to="resp")
df <- na.omit(df)

save(df, file="MMF_Solis_2020.Rdata")
write.csv(df, "MMF_Solis_2020.csv", row.names=FALSE)
15 changes: 15 additions & 0 deletions data/PPSRS_Caliciuri_2024.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Poster: https://www.researchgate.net/profile/Rossella-Caliciuri/publication/382182050_Psychometric_Properties_of_the_Scientific_Reasoning_Scale/links/6690f2a1b15ba55907539c5a/Psychometric-Properties-of-the-Scientific-Reasoning-Scale.pdf
# Data: https://osf.io/jk9dp/
library(haven)
library(dplyr)
library(tidyr)
library(openxlsx)

df <- read_sav("3. CFA&IRT_SRS (n=337).sav")
df <- df |>
select(ID, starts_with("SRS")) |>
rename(id=ID)
df <- pivot_longer(df, cols=-id, names_to = "item", values_to="resp")

save(df, file="PPSRS_Caliciuri_2024.Rdata")
write.csv(df, "PPSRS_Caliciuri_2024.csv", row.names=FALSE)
16 changes: 16 additions & 0 deletions data/SBD_Smith_2020.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Paper: https://www.sciencedirect.com/science/article/pii/S0165032719302046
# Data: https://osf.io/c4v7g/
library(haven)
library(dplyr)
library(tidyr)
library(openxlsx)

df <- read.table("bpses_pre_for_factor.dat", header = TRUE, sep = ",", stringsAsFactors = FALSE)
colnames(df) <- paste0("MBDS", 1:22)
df$id <- seq_len(nrow(df))
df <- pivot_longer(df, cols=-id, values_to = "resp", names_to = "item")
df$resp[df$resp == 999] <- NA

save(df, file="SBD_Smith_2020.Rdata")
write.csv(df, "SBD_Smith_2020.csv", row.names=FALSE)

59 changes: 59 additions & 0 deletions data/SCS_Suh_2023.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Data: https://osf.io/wsjkb/
# Paper: https://link.springer.com/article/10.1007/s41811-024-00214-3
library(haven)
library(dplyr)
library(tidyr)

ea_df <- read_sav("SCS EA dataset.sav")
korea_df <- read_sav("SCS Korean dataset.sav")

ea_df <- ea_df |>
rename(id=ID)
korea_df <- korea_df |>
rename(id=ID)

# ---------- Process SCS Dataset ----------
ea_scs <- ea_df |>
select(id, starts_with("SCS"), -starts_with("SCS_Short"))
ea_scs <- pivot_longer(ea_scs, cols=-id, names_to="item", values_to="resp")
ea_scs$group <- "US"

korea_scs <- korea_df |>
select(id, starts_with("SCS"))
korea_scs <- pivot_longer(korea_scs, cols=-id, names_to="item", values_to="resp")
korea_scs$group <- "Korea"

scs_df <- rbind(ea_scs, korea_scs)

save(scs_df, file="SCS_Suh_2023_SCS.Rdata")
write.csv(scs_df, "SCS_Suh_2023_SCS.csv", row.names=FALSE)

# ---------- Process SIAPS Dataset ----------
ea_siaps <- ea_df |>
select(id, starts_with("SIAPS"), -ends_with("F2"), -ends_with("F1"))
ea_siaps[ea_siaps == 999] <- NA
ea_siaps <- pivot_longer(ea_siaps, cols=-id, names_to="item", values_to="resp")

save(ea_siaps, file="SCS_Suh_2023_SIAPS.Rdata")
write.csv(ea_siaps, "SCS_Suh_2023_SIAPS.csv", row.names=FALSE)

# ---------- Process BFNE Dataset ----------
ea_bfne <- ea_df |>
select(id, starts_with("BFNE"), -ends_with("T"))
ea_bfne[ea_bfne == 999] <- NA
ea_bfne <- pivot_longer(ea_bfne, cols=-id, names_to="item", values_to="resp")
ea_bfne <- ea_bfne[!is.na(ea_bfne$resp),]
ea_bfne$group <- "US"

korea_bfne <- korea_df |>
select(id, starts_with("BFNE"), -BFNE)
korea_bfne[korea_bfne == 999] <- NA
korea_bfne[korea_bfne == 0] <- NA
korea_bfne <- pivot_longer(korea_bfne, cols=-id, names_to="item", values_to="resp")
korea_bfne <- korea_bfne[!is.na(korea_bfne$resp),]
korea_bfne$group <- "Korea"

bfne_df <- rbind(korea_bfne, ea_bfne)

save(bfne_df, file="SCS_Suh_2023_BFNE.Rdata")
write.csv(bfne_df, "SCS_Suh_2023_BFNE.csv", row.names=FALSE)

0 comments on commit f818ec7

Please sign in to comment.