From c8b154a1678563dae04ad410216bc7bc62eb4451 Mon Sep 17 00:00:00 2001 From: Al-Murphy Date: Mon, 16 Sep 2024 11:47:41 +0100 Subject: [PATCH] Risk allele mappings --- DESCRIPTION | 2 +- NEWS.md | 5 +++++ R/data.R | 30 +++++++++++++++++++++++++----- R/sysdata.rda | Bin 2018 -> 2084 bytes data/sumstatsColHeaders.rda | Bin 2056 -> 2105 bytes man/sumstatsColHeaders.Rd | 30 +++++++++++++++++++++++++----- 6 files changed, 56 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e33a0bad..37b72a6d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: MungeSumstats Type: Package Title: Standardise summary statistics from GWAS -Version: 1.13.5 +Version: 1.13.6 Authors@R: c(person(given = "Alan", family = "Murphy", diff --git a/NEWS.md b/NEWS.md index 7d1c822f..fb1e93e6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +## CHANGES IN VERSION 1.13.6 + +### New features +* Mappings added to mapping file for risk and non risk allele. + ## CHANGES IN VERSION 1.13.3 ### Bug fix diff --git a/R/data.R b/R/data.R index d23a389d..9678b139 100644 --- a/R/data.R +++ b/R/data.R @@ -12,11 +12,31 @@ #' # Most the data in the below table comes from the LDSC github wiki #' data("sumstatsColHeaders") #' # Make additions to sumstatsColHeaders using github version of MungeSumstats- +#' # Shown is an example of adding new A1 and A2 naming +#' a1_name <- c("NON","RISK","ALLELE") +#' a2_name <- c("RISK","ALLELE") +#' all_delims <- c("_",".",""," ","-") +#' all_uncorr_a1 <- vector(mode="list",length = length(all_delims)) +#' all_corr_a1 <- vector(mode="list",length = length(all_delims)) +#' all_uncorr_a2 <- vector(mode="list",length = length(all_delims)) +#' all_corr_a2 <- vector(mode="list",length = length(all_delims)) +#' for(i in seq_along(all_delims)){ +#' delim <- all_delims[i] +#' a1 <- unlist(paste(a1_name,collapse=delim)) +#' a2 <- unlist(paste(a2_name,collapse=delim)) +#' all_uncorr_a1[[i]] <- a1 +#' all_uncorr_a2[[i]] <- a2 +#' all_corr_a1[[i]] <- "A1" +#' all_corr_a2[[i]] <- "A2" +#' } +#' se_cols <- data.frame("Uncorrected"=c(unlist(all_uncorr_a1),unlist(all_uncorr_a2)), +#' "Corrected"=c(unlist(all_corr_a1),unlist(all_corr_a2))) +#' # Or another example ..... #' # shown is an example of adding columns for Standard Error (SE) -#' #se_cols <- data.frame("Uncorrected"=c("SE","se","STANDARD.ERROR", -#' # "STANDARD_ERROR","STANDARD-ERROR"), -#' # "Corrected"=rep("SE",5)) -#' #sumstatsColHeaders <- rbind(sumstatsColHeaders,se_cols) +#' se_cols <- data.frame("Uncorrected"=c("SE","se","STANDARD.ERROR", +#' "STANDARD_ERROR","STANDARD-ERROR"), +#' "Corrected"=rep("SE",5)) +#' sumstatsColHeaders <- rbind(sumstatsColHeaders,se_cols) #' #Once additions are made, order & save the new mapping dataset #' #now sort ordering -important for logic that #' # uncorrected=corrected comes first @@ -27,7 +47,7 @@ #' sumstatsColHeaders$ordering,decreasing = TRUE),] #' rownames(sumstatsColHeaders)<-1:nrow(sumstatsColHeaders) #' sumstatsColHeaders$ordering <- NULL -#' #manually move FRWQUENCY to above MAR - github issue 95 +#' #manually move FREQUENCY to above MAR - github issue 95 #' frequency <- sumstatsColHeaders[sumstatsColHeaders$Uncorrected=="FREQUENCY",] #' maf <- sumstatsColHeaders[sumstatsColHeaders$Uncorrected=="MAF",] #' if(as.integer(rownames(frequency))>as.integer(rownames(maf))){ diff --git a/R/sysdata.rda b/R/sysdata.rda index 863e7a0badd1ac2f4ec37052dad7107e18230bd6..2339876f2f81ca1aae5a2c35c55bf7d9a52e1655 100644 GIT binary patch literal 2084 zcmV+<2;28UT4*^jL0KkKSxx`CHUJdO|KR`s`QS(sf8am=|8TDdo`66A01yBHU*oCQ8G zo)pT!1SNfUR3K#hq*!#R={)iAJ%56@64O*1B5*lP1SumO6%0iU6bxA33BUGD$*O z{0=k8mV60e>N(zdt(M0aV1U4d=-_EW1 z^DbL36r{NrEJ1!O1XCvBn_1>F$C}cx-}uv4U*Rc>yOV;w?&Es{e_# z6hJGKgVz{f=4j;zp@Zic`Tc0TKxXZnB+a$wA_tT7}KDLZQjKuD*`tep?;6v#$5OB+ugIj7cQAh zLw4k=La^v&Hj+6+A$908YFrOjh0slpgBu$vju>>HGFNj}WeXot>dxY=c%Upm9;VB9 z11M=u&7s5bG3Iz?1+GbwWSEphj_OPstzbAbRh4BKLBm5$B_@k7B+#R5F;NwkSQSK6 zR0^UL6$5ia+t9PVjIi1s=1mEJ(owHorrUcf2-!56kR6-n!C6nv+gwX04sJXSH}>Yh z`X_PA_vx3dqN}}HuQ=o(E$QO9IcsgJL_kQwWWqYSJdRM?`9X;B2qi9y7g~-9yB6@;vG$v6I_83 z@}j>Hz}Kw}rIVLGgB&zyh?Mk*CdB`a?q>qDv(GzE(@E1Cs1e)D39Z@3oY2Qwf#EEI zH)UcF&=$TD$|dU+fLM`B5;~NqG@7ajr3OTit&JO$78%IIW*h*EG-jJ_r3UO0D#!(} z&#CC~W^+sORLL(n6t5mq0+T2pp-a&o_u!AR+5#wmpglE^L9}0AIFUmZ1(6RI6PQC3 zH7CW5(V`qM!**D-Pm&Nw(?YSb;l3z!SoIl!5n<;Msl-SlH}tbFONWcq=$x!B&;sr> zYuGOu$KEmIpA2@%7wHAfxDLX4>;p^o8M%0^zJnR=$JHcqBVdbf;;Z%oDX%qtS}6j z*zG#Z!s2Q!K{jbkS0%?qV;XaS`>l zCJi+#XNW_Rwv2e`t`E#4i+WswdjaB+j4`2&43y!L>Z#n3M7Jy47>q!^^@oN5#sqmK#mZ8J zv%fJHXv_&=2ABb*w)VnKD}CS(h1tpE=-R9bogGQmuZ{L|F?*D&$mDRy;Z#GQtz`r3z4Qp%saOGKPAQkwjXA(V1g}*j)HWQ&D{@ z>ElGfF+J|?&hFFC*%ve;4Lo~#rzIw6>i@s6PIk|Z!P4g=zj(0VTUpcCuDmM4lenLe O{}*yaI8cyH|GGBTW7+2b literal 2018 zcmV<82OaoAT4*^jL0KkKSxCl!kN^|{|G@wM`QS(sf8am=|8TDdo`66E00CeK-aCN% zt;|h)>dqhnD0017K0DjHIiPr#ZUSQ1x z97qIOAw?7F1PC;!nj`CWxT8r8Xb}dq0xjCEO%Xxex7H~feWKf=(ISvHG2jHXS`iwiM+y!|p(6LYGeXcRz#USqK4_o`&@0axVO;D0LRZ;#Ap?T(o`IFG zV2to4Vbc;YApXW1I-QHlaY`7p7y(6+!ZVgtm+Fl`+$B(T+{ z8B0w`DPOwx=a~Vr&mLA4V|_@~kwA7z7QVW#X<`+g_RQtGf~zggS1q&=kQ69cX^n$! zMw|vxDCS|3!xaqsU4GE?&C=R$%V5ruKoo(5DMXBDNhnLOj#SO92$K~iAzZwA;3{NA zjLKndCY{PMnurtv$CniW!Q})<8*dj7{^w@;hZ%+hMA)ut3l;L7xUN>vGS2G^6=ln- zyevSfHkD8qKtQGm1PX&WNKqjrdk~FG5C=w)g5!pVzkLQxL8*?q%o{)*MY+iQj zH{W}e7?N@c4>P97NPUqXtQ`<{iKK*Kzl6SX!IhO-7Edo*+q(N3h81Rpx{(2Zb4b99 zz%RRGhBxVJtqHW$+1;I!AuRC8Akg-RgcDjAM(ql8(Cl~&6kwwRfaB{%4Ue^Y6p@?a z(%j^X6;>; zq;Okj@8_uZu{bbhWDs5x1&ww(KI9=j(9l^0d@1XswqxmTMiUs*poneTVM&U>M$;58 z&HGv<<-2yz4P7)jaFoP1XEjY4jeZXESN)q4md`^AQG=?nhUCX&i&~ew80V<{gVj_^GC`e3^LRMjO zEYPEDG0_#3@G6L?s1-ygCjpOh+uX6X?RV@CR8oc2aHjj4`njk3G}hzFvhb2r9d=`7lT>Os_n=?E!vb91-;@y+MpY@W zHS5u@DVHSNFu!baIkw97G#Ij6fMG~t48}Vx9d@F*iH<`hr)XFW7z|UHAln>dB*;j) zFJHb8sxJs0P91|sWX8Q(o#Ds`OxPy{%8i3~XlAiu7mkYEa3IK7Wwd^LFt|kJ?Rxd# z%Lj)FCLTjhs{)ST1U}gnzBUZ=AtHH6`(7T$6F|u9CkVkNl3-06QP$uTJkWsL#M&EDqJ?@8CQS*UJ8C2e z;`pn&;8)eOl57i^rbnxvdhdENk(1DT62qq2Lb{m~xa}mRDPU5u?3|$?klLP+90mQ} zAtMoc0^~<~l|dMzR(%%UDI;05XTznNpAbmgx!AU~X$CyI9MH^()!+nm7)b(#@xPAm zAoBn!;|&7oO)zE#0WH@%<<5qY7w6X?IaZwPm6UT?K`|}ndt9Lnp9YfKw`hVb?z1)* zcu69o8yu|O?l%k&@FGnOpt5PG5Sr_!p&_yyEq>MI5fKgCvL`9KkS`&Hq2#*VzE_~$z;w(YXw6=t0#9W+xTEu)gfn)cU8%3k&i zMRjeC4lPZQk_6(BCfiI+iGxET7)G0D)3u~p0)?8jiDtF3i42I?P0^O`~j%>n&dj$|i z?J3#Bmf9-er%*LIDxk(lLt@l#@9<>LxpxsPz^fJe2s5$`j9tgCJXFjYEm;p>hf3;l zWV0)nrYM0h96T8{FdOIw-F8kRE(#q80}Yhhk^v5}C84Z$M)r-S=5Du?8;?e25gM$q zSt`(k$gLqr29=8@Fk{96fMueXMZh50jX-5J5MoH0S7T((t13B2$~F&B#@u9bg<>>@ zQ>PfB3F-%-BSbNwjSQ6GlH*gsB8hEQ+6+d3E!?m1yfc9va?Q$8g{!6_Eoj66rlR!# z6t?$lB_y+$1DfqGfjYO9f~Nk&>|^7Fp4#n9n#f4AOS?E^14Firq1~=@0%;NumZ*ue zj);0^c?QTRu(dTyig;MiipfG60cw$h#9oC|;Zcrk@mkgNlccy(fW&CalaG`7H~8nb z6b9c{XO{UnL{2~VWb3vyXt2FcYaEx$535HFRr9^;#YA5az!{$kVwXWkYbFV A#{d8T diff --git a/data/sumstatsColHeaders.rda b/data/sumstatsColHeaders.rda index c1bbc662d74479213b99c58ced8c4964ab9a7d1e..c8c9a048a5d6d4f6390ca6bfc6c62be91ad486e9 100644 GIT binary patch literal 2105 zcmV-92*&pxiwFP!000001MON{Z`?K(9?#8li4)faw(XmMye%NYV>`RfBS+%QsMSal zNfRV~A&kU*Y8$}LVqf=n^v5oiq;w7sMY(9&ASpI3kmh{fxsiug4O7Ig&S$S?yZ_A2T zE*o~TK_|;xc1YxLB?;=60zE69#k`1_C<^Q?va*v6JK2D;hh~alnsF(XqT&|GcpWw9 zSfMf%Vi)fVUL|?f@JA4s#vPZH#xQa|~l`g;+QE$*!{mvz=V#X(iL`7Df&`I?9WXpIIO&U=EornM|@(UTirb59>xkpC@WVl6v|R%vv~X;eg-~-Oz?Q9aKKq z?)H`0A$Sz2y&{4IG~cT8iq~y}Mr}kz_MdSLq413oH?Nr@+?`Ktb&3OE)Fg6JzNkoK z1@QzwBWBVnbch(H!bE{+LzJuHg*`0~>72<_+1dI(MEPAm`U>pv0 z91FjuffTJh!BWe_(bbx${FM_Gp&OOI@}gQtS36KqK~y1OV6??vWH_?AGIEbTR@|b` zkVHMvvQ%3^rEU~;qhJfwfw~m6OWlnQmoCLO4k_DhKJnEMv8}-x^D|;L(g?uuJz3^{ z_?Z{Ck5!teLDk13RvMbJvPmr%D=k4hC5Dd`()VAp(OH`$i>QYr(WX%MmD^C#^odSE zCy$qeNLl7fb%&AbBHEn))2A*mn7t{T81;@Pg>1z#8m2X@OA}~K z(3PM0`9q%ye2O`(B#xK9BT$FSoz%kwP&G&{G3-=9Cl3a{c@^?_R8zNGe9yB4PPu4{?09&3U33nenE)G&0Rv~WnioDX25Kdkp0 z3|~W7s`jd`eLFubo2$Ac!GR(d;QA|;*c__M7j@CQxN;*deGXgHx6UzHA1qP6$FCf! z4{LpRpFXJd0o5OyBjmT^vRcpDeqH&F&C$|r-i?XZ{=*#XC5osh4J`fQA_ zpm3o+>bi=}%(a6ueSxD@^WZ;e8heK6@mPIiE=SCXb5!36!Y?G*M7)hCxwZf>zm+ucBQE8VXUQd2je%IXFzlJW=X zQ?2rA$~Ovp$}M%DFR*onLITShTi@L5BN;TECENpprSQcWXCN@zD)t%fil~T6dk{VGH{Z@a5o=%5KCOgLU7uI5Pg0*&avzlb zD01G9K0YAtMV|)XW6}SI(Djt^XE1$Dxle9AoP1C=H2b>&71c(ZyRbFA>js?-oE=cgCIvb$bWkJzk^@f jncn<#eFvF16NiwFP!000001MON{Z`?K(9?#8l$;EYn-R+xzye%NYV>`RfBS+$lwX2a7 zNfRV~A&kU*Y8${#(bxR}{mqLdDV@VZQ7)P`NV>Z&kmh{fxsm5kBsEGIzdl>MUJM3< z(O^7M|DPyvFxl6e@M16+4;45Vj0bb|_wn72-`{<>{%{w)|L*eU`rXa#owEO0X|&am zGW@x7{8%ak$6zI3B`BnlNRaw!B6D1~b3&v%I@}#UJg2j%P z{}g4$cLD<%1`?GOSSq({zgr7r=E%UPRZ0?Rh*_pe!RP>(qmon=I4X;l&A1pz&{EQ; z0z%D;Qp8DA=P=Ag63ck4f}g*zGjXRjdWLxqU-2^GSq;uDbjF;-;P|9UwwZ`!wck}W zuU$6mWrJRpx$Kz8(^?WVtOR=2Jd1f5Gf|c}TV!P~8}_mRWe?2~!!*-MtVPW&lJPcb z(UC$`Dnt?QOI{~=*7A=bFpWE{YK@_D4NCJZ8XUk{o))ioy2pp3kQ9!!5_(}&+T=j07?TaMRd1&kC6)St+(Hep>XFU{T0*p4Kwm?Vxiw(Mev0{L}(T0Yk9J z-xaB-k}F7Xqz0_1TC^%SQS{6jg>*H;%GxlnVO{S4G(lGr zf+ln|OR?FAs7_u9ti?D}oNW)!c>3l_#H7zo1XTz#NeI|Ny*EN_@^z)Uf;)h5m!upm z#e6FCsen)mud*sV&Iop;OeWbTFL#`fhs_{i&NHj9mTD`g)s3PV6l|e7P@kf4X{OQR(x>>dL(1-$k9{>n>}s&a{FInYGy-sRPnM-0 ze&NL(VwGm9srs12O2be#HmLaR*9cK-Ztuz?nNo|;TGt=2{p7BL5f6s0wD z`V!IxG73T(pV@OrXQL=hNvGnZ)EhqDDMpHb^S9|i*W#OYAt5i^f4Sev`qD+Z;fRH3hOV$HTXEg%rjU z?+j~LFynQR<4cbfXG%;+S(a;chmqSdx;*>4PhDUzds8|#>VhYw?8Gt}rZucj6KGA) zm!J9hL!SzKiaDbsju*ZoP>1V6>NWvX1CnbDJ6F)jg9P|5bjW)ca9Y4Ddl>MVCP)ppG)IMvUg?Y2((z0c`Y#&7Ond z8wii8y{c>9-dD@^sxC=zpvVQdd5a}BhwAc0UGy%l+=$DN!xr@y=a_5`mMGujTMpHS zjXu0jA2j-a>W|D3^2>2qqi0>ezI;dKXz91_%m0MtcUZJ`Sj>s+h-0yi$y-HpHbz)b zxKKatyNWH$wSzK4f!%6&@SnAVJ;n5RtUfZABj&_;+}sJmKGMxN;+S{tn_1Oh|J?ibI zYuR=y>~xRW6+%R`TG$5#MnP|zJ-vQx&c*IcjfaHmWBVr zFQ4YR<g_CZ>?Ws-o8O0f~8dNt`R@ke=mDLSM0D$BFpcdA7!IuG(F!zWoi zVUTR3k~yPyTQO1b(GB{!vi0*@C_&S1VZG z!><)#-v%(p;gQNSuNQ&GbY`gw-P6aV8NEY2#f&bE5!~^qRNjEyb)J)Tae|8o_zC2HL+JZJ@Ov=*MsT0p`5f?xA^9BeQSjG)6!?|E{Q~g25dLug zTM_+*|3Q2ZKSuO^hyD}<`RVzOAljdlAEf_hiT=X*rQ{*`z3t0Orw`J9rL=mG{-Dxd zKe^=o^WDi0*WcgV4a{#NgJDZ9_dmRSe|vlL_QTD)mYPRMxB=$3?|)(twvu|&eze4}kfBpm784wSAEC2wt$P1eQ diff --git a/man/sumstatsColHeaders.Rd b/man/sumstatsColHeaders.Rd index caada99e..81ef3484 100644 --- a/man/sumstatsColHeaders.Rd +++ b/man/sumstatsColHeaders.Rd @@ -13,11 +13,31 @@ The code to prepare the .Rda file file from the marker file is: # Most the data in the below table comes from the LDSC github wiki data("sumstatsColHeaders") # Make additions to sumstatsColHeaders using github version of MungeSumstats- +# Shown is an example of adding new A1 and A2 naming +a1_name <- c("NON","RISK","ALLELE") +a2_name <- c("RISK","ALLELE") +all_delims <- c("_",".",""," ","-") +all_uncorr_a1 <- vector(mode="list",length = length(all_delims)) +all_corr_a1 <- vector(mode="list",length = length(all_delims)) +all_uncorr_a2 <- vector(mode="list",length = length(all_delims)) +all_corr_a2 <- vector(mode="list",length = length(all_delims)) +for(i in seq_along(all_delims)){ +delim <- all_delims[i] +a1 <- unlist(paste(a1_name,collapse=delim)) +a2 <- unlist(paste(a2_name,collapse=delim)) +all_uncorr_a1[[i]] <- a1 +all_uncorr_a2[[i]] <- a2 +all_corr_a1[[i]] <- "A1" + all_corr_a2[[i]] <- "A2" +} +se_cols <- data.frame("Uncorrected"=c(unlist(all_uncorr_a1),unlist(all_uncorr_a2)), + "Corrected"=c(unlist(all_corr_a1),unlist(all_corr_a2))) +# Or another example ..... # shown is an example of adding columns for Standard Error (SE) -#se_cols <- data.frame("Uncorrected"=c("SE","se","STANDARD.ERROR", -# "STANDARD_ERROR","STANDARD-ERROR"), -# "Corrected"=rep("SE",5)) -#sumstatsColHeaders <- rbind(sumstatsColHeaders,se_cols) +se_cols <- data.frame("Uncorrected"=c("SE","se","STANDARD.ERROR", + "STANDARD_ERROR","STANDARD-ERROR"), + "Corrected"=rep("SE",5)) +sumstatsColHeaders <- rbind(sumstatsColHeaders,se_cols) #Once additions are made, order & save the new mapping dataset #now sort ordering -important for logic that # uncorrected=corrected comes first @@ -28,7 +48,7 @@ sumstatsColHeaders <- sumstatsColHeaders$ordering,decreasing = TRUE),] rownames(sumstatsColHeaders)<-1:nrow(sumstatsColHeaders) sumstatsColHeaders$ordering <- NULL -#manually move FRWQUENCY to above MAR - github issue 95 +#manually move FREQUENCY to above MAR - github issue 95 frequency <- sumstatsColHeaders[sumstatsColHeaders$Uncorrected=="FREQUENCY",] maf <- sumstatsColHeaders[sumstatsColHeaders$Uncorrected=="MAF",] if(as.integer(rownames(frequency))>as.integer(rownames(maf))){