Skip to content

Commit

Permalink
update CPP files
Browse files Browse the repository at this point in the history
  • Loading branch information
MingzhiYe16 committed Jun 26, 2024
1 parent 1e70eb1 commit ca9c48c
Show file tree
Hide file tree
Showing 35 changed files with 9,233 additions and 228 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file added src/.DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions src/.Rapp.history
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
load("/Users/mingzhiye/CppProjects/cit/src/symbols.rds")
6 changes: 3 additions & 3 deletions src/Makevars
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CXX_STD=CXX11
PKG_LIBS = `gsl-config --libs`
PKG_CFLAGS = `gsl-config --cflags`
CXX_STD=CXX14
PKG_LIBS = $(shell gsl-config --libs)
PKG_CPPFLAGS = $(shell gsl-config --cflags)
2 changes: 1 addition & 1 deletion src/Makevars.win
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CXX_STD=CXX11
CXX_STD=CXX14
PKG_CPPFLAGS=-I$(LIB_GSL)/include
PKG_LIBS=-L$(LIB_GSL)/lib -lgsl -lgslcblas
27 changes: 0 additions & 27 deletions src/cit_init.c

This file was deleted.

315 changes: 315 additions & 0 deletions src/citbin_v1.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
#include <R.h>
#include <Rmath.h>
#include <vector>
#include <algorithm>
#include <gsl/gsl_fit.h>
#include <gsl/gsl_multifit.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_cdf.h>
#include <iostream>
#include <random> // std::default_random_engine
#include "logisticfunc.h"
#include <Rcpp.h>
#include "maxElementWithNan.h"

using namespace Rcpp;
using namespace std;


/*
L: matrix of continuous instrumental variables
G: matrix of candidate causal mediators
T: matrix of 0/1 variables
Programmer: Joshua Millstein
*/



// [[Rcpp::export]]
void citbin( Rcpp::NumericVector L, Rcpp::NumericVector G, Rcpp::NumericVector T, int &maxit, int &nrow,
int &ncol, Rcpp::NumericVector pval1, Rcpp::NumericVector pval2, Rcpp::NumericVector pval3, Rcpp::NumericVector pval4 , Rcpp::NumericVector pval3nc, int &rseed)
{
unsigned seed = rseed;
int rw, cl, i, rind, df, df1, df2, nobs, ip, npos, nperm, nmiss, stride;
double rss2, rss3, rss5, F, pv, pvp, maxp, tmp, rhs, testval;
bool aa, bb, cc, dd, converged;
const int firstloop = 1000;
const int posno = 20;
const double alpha = .1;
vector<vector<double> > LL;
vector<double> pvec;
vector<double> gpred;
vector<double> gresid;

gsl_matrix *Lm, *cov, *X;
gsl_vector *Gm, *Tm, *Gp, *c;

double *designmat = new double[ nrow * (ncol + 2) ];
double *phenovec = new double[ nrow ];

LL.resize( nrow );
GetRNGstate();

for(rw = 0; rw < nrow; rw++) {
LL[rw].resize( ncol );
}

for(cl = 0; cl < ncol; cl++) {
for(rw = 0; rw < nrow; rw++) {
LL[rw][cl] = L[rw + nrow * cl];
}
}


// create analysis vectors w/no missing data
nobs = 0;
for(rw = 0; rw < nrow; rw++) {
nmiss = 0;
for(cl = 0; cl < ncol; cl++) {
if( LL[rw][cl] == -9999 ) {
nmiss++;
}
}
aa = nmiss == 0;
bb = G[rw] != -9999;
cc = T[rw] != -9999;
if(aa && bb && cc) {
nobs++;
}
}

Lm = gsl_matrix_alloc (nobs, ncol);
Gm = gsl_vector_alloc (nobs);
Tm = gsl_vector_alloc (nobs);
rind = 0;
for(rw = 0; rw < nrow; rw++) {
nmiss = 0;
for(cl = 0; cl < ncol; cl++) {
if( LL[rw][cl] == -9999 ) {
nmiss++;
}
}
aa = nmiss == 0;
bb = G[rw] != -9999;
cc = T[rw] != -9999;

if(aa && bb && cc) {
for(cl = 0; cl < ncol; cl++) {
gsl_matrix_set(Lm, rind, cl, LL[rw][cl]);
}
gsl_vector_set(Gm, rind, G[rw]);
gsl_vector_set(Tm, rind, T[rw]);
rind++;
}
}
// fit model T ~ L
ip = 1 + ncol; // intercept + multiple L variable
for(rw = 0; rw < nobs; rw++) {
phenovec[ rw ] = gsl_vector_get(Tm, rw );
designmat[ rw * ip ] = 1; // intercept
for(cl = 0; cl < ncol; cl++) {
designmat[ rw * ip + 1 + cl ] = gsl_matrix_get (Lm, rw, cl);
}
}
df = ncol;
converged = logisticReg( pv, phenovec, designmat, nobs, ip, df );
if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl;
pv = ( converged ) ? pv : std::numeric_limits<double>::quiet_NaN();
pvec.push_back( pv ); // pval for T ~ L, 9 if it did not converge, p1

// fit model T ~ L + G
stride = ip + 1;
for(rw = 0; rw < nobs; rw++) {
designmat[ rw * stride ] = 1; // intercept
for(cl = 0; cl < ncol; cl++) {
designmat[ rw * stride + 1 + cl ] = gsl_matrix_get (Lm, rw, cl);
}
designmat[ rw * stride + 1 + ncol ] = gsl_vector_get(Gm, rw );
}

df = 1;
converged = logisticReg( pv, phenovec, designmat, nobs, stride, df );
if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl;
pv = ( converged ) ? pv : std::numeric_limits<double>::quiet_NaN();
pvec.push_back( pv ); // pval for T ~ G|L, 9 if it did not converge, p2

// fit model G ~ T
X = gsl_matrix_alloc (nobs,2);
for(rw = 0; rw < nobs; rw++) {
gsl_matrix_set(X, rw, 0, 1.0); // intercept
gsl_matrix_set(X, rw, 1, gsl_vector_get (Tm, rw));
}
c = gsl_vector_alloc (2);
cov = gsl_matrix_alloc (2, 2);
gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (nobs, 2);
gsl_multifit_linear (X, Gm, c, cov, &rss2, work);
gsl_multifit_linear_free (work);
gsl_matrix_free (X);
gsl_matrix_free (cov);
gsl_vector_free (c);

// fit model G ~ L + T
X = gsl_matrix_alloc (nobs, ip + 1);
for(rw = 0; rw < nobs; rw++) {
gsl_matrix_set(X, rw, 0, 1.0); // intercept
for(cl = 0; cl < ncol; cl++) {
gsl_matrix_set(X, rw, cl + 1, gsl_matrix_get (Lm, rw, cl));
}
gsl_matrix_set(X, rw, ip, gsl_vector_get (Tm, rw));
}
c = gsl_vector_alloc (ip + 1);
cov = gsl_matrix_alloc (ip + 1, ip + 1);
work = gsl_multifit_linear_alloc (nobs, ip + 1);
gsl_multifit_linear (X, Gm, c, cov, &rss3, work);
gsl_multifit_linear_free (work);
gsl_matrix_free (X);
gsl_matrix_free (cov);
gsl_vector_free (c);
df1 = ncol;
df2 = nobs - ip -1;
F = df2*(rss2-rss3)/(rss3*df1);
pv = gsl_cdf_fdist_Q(F, df1, df2);
pvec.push_back( pv ); // pval for G ~ L|T, p3

// fit model T ~ G + L to test L
stride = ip + 1;
for(rw = 0; rw < nobs; rw++) {
designmat[ rw * stride ] = 1; // intercept
designmat[ rw * stride + 1 ] = gsl_vector_get(Gm, rw );
for(cl = 0; cl < ncol; cl++) {
designmat[ rw * stride + 2 + cl ] = gsl_matrix_get (Lm, rw, cl);
}
}
df = ncol;
converged = logisticReg( pv, phenovec, designmat, nobs, stride, df );
if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl;
pv = ( converged ) ? pv : std::numeric_limits<double>::quiet_NaN(); // p-value for T ~ L|G
pval3nc[0] = pv; // pvalue to be used for non-centrality parameter

// fit model G ~ L
X = gsl_matrix_alloc (nobs, ip );
for(rw = 0; rw < nobs; rw++) {
gsl_matrix_set(X, rw, 0, 1.0); // intercept
for(cl = 0; cl < ncol; cl++) {
gsl_matrix_set(X, rw, cl + 1, gsl_matrix_get (Lm, rw, cl));
}
}
c = gsl_vector_alloc (ip);
cov = gsl_matrix_alloc (ip, ip);
work = gsl_multifit_linear_alloc (nobs, ip);
gsl_multifit_linear (X, Gm, c, cov, &rss5, work);
gsl_multifit_linear_free (work);
gsl_matrix_free (cov);

// residuals for G ~ L
for(rw = 0; rw < nobs; rw++) {
rhs = 0;
for(cl = 0; cl < ip; cl++) {
rhs += gsl_vector_get (c, cl) * gsl_matrix_get (X, rw, cl);
}

gpred.push_back(rhs);
tmp = gsl_vector_get (Gm, rw) - rhs;
gresid.push_back(tmp);
}
gsl_vector_free (c);

// Conduct an initial set of permutations

Gp = gsl_vector_alloc (nobs);
npos = 0;
for(i = 0; i < firstloop; i++){
// randomly permute residuals

shuffle( gresid.begin(), gresid.end(), std::default_random_engine(seed) );

// compute G* based on marginal L effects and permuted residuals
for(rw = 0; rw < nobs; rw++) {
gsl_vector_set(Gp, rw, gpred[rw] + gresid[rw] );
}

// Recompute p-value for T ~ L|G based on G*
// fit model T ~ G* + L to test L
stride = ip + 1;
for(rw = 0; rw < nobs; rw++) {
designmat[ rw * stride ] = 1; // intercept
designmat[ rw * stride + 1 ] = gsl_vector_get(Gp, rw );
for(cl = 0; cl < ncol; cl++) {
designmat[ rw * stride + 2 + cl ] = gsl_matrix_get (Lm, rw, cl);
}
}

df = ncol;
converged = logisticReg( pvp, phenovec, designmat, nobs, stride, df );
if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl;
pvp = ( converged ) ? pvp : std::numeric_limits<double>::quiet_NaN(); // p-value for T ~ L|G*
if( pvp > pv ) npos++;

} // end initial permutation loop

// Conduct additional permutations if there is some indication of statistical significance
maxp = maxElementWithNan(pvec);
nperm = firstloop;
aa = npos < posno;
bb = maxp < alpha;
cc = nperm < maxit;
maxp = maxElementWithNan(pvec);
testval = (double) (npos + 1) / nperm ;
dd = maxp < testval; // check that other component p-values are small

if(aa && bb && cc && dd){
while(aa && cc) {

// randomly permute residuals

shuffle( gresid.begin(), gresid.end(), std::default_random_engine(seed) );
// compute G* based on marginal L effects and permuted residuals
for(rw = 0; rw < nobs; rw++) {
gsl_vector_set(Gp, rw, gpred[rw] + gresid[rw] );
}

// Recompute p-value for T ~ L|G based on G*
// fit model T ~ G* + L to test L
stride = ip + 1;
for(rw = 0; rw < nobs; rw++) {
designmat[ rw * stride ] = 1; // intercept
designmat[ rw * stride + 1 ] = gsl_vector_get(Gp, rw );
for(cl = 0; cl < ncol; cl++) {
designmat[ rw * stride + 2 + cl ] = gsl_matrix_get (Lm, rw, cl);
}
}

df = ncol;
converged = logisticReg( pvp, phenovec, designmat, nobs, stride, df );
if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl;
pvp = ( converged ) ? pvp : std::numeric_limits<double>::quiet_NaN(); // p-value for T ~ L|G*
if( pvp > pv ) npos++;

aa = npos < posno;
cc = nperm < ( maxit - 1 );
nperm++;
} // end 'while' permutation loop
} // End if
pv = 1.0 * npos / nperm;
pvec.push_back(pv); // pval for L ind T|G

pval1[0] = pvec[0]; // pval for T ~ L
pval2[0] = pvec[1]; // pval for T ~ G|L
pval3[0] = pvec[2]; // pval for G ~ L|T
pval4[0] = pvec[3]; // pval for L ind T|G

pvec.clear();
gresid.clear();
gpred.clear();
gsl_matrix_free (Lm);
gsl_vector_free (Gm);
gsl_vector_free (Tm);
gsl_vector_free (Gp);

delete [] designmat;
delete [] phenovec;

LL.clear();

} // End citbin
Loading

0 comments on commit ca9c48c

Please sign in to comment.