working on cran submission.

cleanzr · Sep 30, 2020 · deeac53 · deeac53
1 parent 596797d
commit deeac53
Show file tree

Hide file tree

Showing 4 changed files with 500 additions and 1 deletion.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: blink
 Type: Package
 Title: Record Linkage for Empirically Motivated Priors
-Version: 0.1.0
+Version: 1.1.0
 Authors@R: person("Rebecca", "Steorts", email = "[email protected]",
   role = c("aut", "cre"))
 Depends: R (>= 3.0.2),

diff --git a/data-raw/.Rapp.history b/data-raw/.Rapp.history
@@ -0,0 +1,15 @@
+library(RecordLinkage)
+data(RLdata500)
+save(file="RLdata500-new.csv", header=TRUE, sep=",")
+write.table(file="RLdata500-new.csv", header=TRUE, sep=",")
+write.table(file="RLdata500-new.csv")
+head(RLdata500)
+write.table(RLdata500, file="RLdata500-new.csv", header=TRUE, sep=",", row.names=FALSE)
+write.table(RLdata500, file="RLdata500-new.csv", sep=",", row.names=FALSE)
+write.table(RLdata500, file="RLdata500-new.csv", sep=",", row.names=FALSE, quote="FALSE")
+?write.table()
+write.table(RLdata500, file="RLdata500-new.csv", sep=",", row.names=FALSE, quote=FALSE)
+write.table(RLdata500, file="RLdata500-new.csv", sep=",", row.names=FALSE, quote=FALSE)
+write.table(identity.RLdata500, file="identity.RLdata500.csv", quote=FALSE)
+write.table(identity.RLdata500, file="identity.RLdata500.csv", quote=FALSE, row.names=FALSE)
+write.table(identity.RLdata500, file="identity.RLdata500.csv", quote=FALSE, row.names=FALSE)
diff --git a/vignettes/introEBLink.R b/vignettes/introEBLink.R
@@ -0,0 +1,55 @@
+## ---- echo = FALSE------------------------------------------------------------
+knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
+
+## ---- echo=TRUE, message=FALSE, knitr::opts_chunk$set(cache=TRUE)-------------
+library(blink)
+data(RLdata500)
+head(RLdata500)
+
+## -----------------------------------------------------------------------------
+# X.c contains the categorical variables
+# X.s contains the string variables 
+# p.c is the number of categorical variables
+# p.s contains the number of string variables
+X.c <- RLdata500[c("by","bm","bd")]
+X.c <- as.matrix(RLdata500[,"bd"],ncol=1)
+p.c <- ncol(X.c)
+X.s <- as.matrix(RLdata500[-c(2,4,7)])
+p.s <- ncol(X.s)
+
+## -----------------------------------------------------------------------------
+# File number identifier
+# Note: Recall that X.c and X.s include all files "stacked" on top of each other.
+# The vector below keeps track of which rows of X.c and X.s are in which files.
+file.num <- rep(c(1,2,3),c(200,150,150))
+
+## -----------------------------------------------------------------------------
+# Subjective choices for distortion probability prior
+a <-1
+b <- 999
+
+## -----------------------------------------------------------------------------
+d <- function(string1,string2){adist(string1,string2)}
+
+## -----------------------------------------------------------------------------
+c <- 1
+
+## ----results="hide"-----------------------------------------------------------
+library(knitr)
+library(blink)
+library(plyr)
+Sys.setenv(TMPDIR="/tmp/")
+configure.vars="TMPDIR=/tmp/"
+lam.gs <- rl.gibbs(file.num=file.num,X.s=X.s,X.c=X.c,num.gs=2,a=a,b=b,c=c,d=d, M=500)
+#system.time(lam.gs <- rl.gibbs(file.num=file.num,X.s=X.s,X.c=X.c,num.gs=2,a=a,b=b,c=c,d=d, M=500))
+
+## ---- fig.show="hold", fig.cap="The red line is the ground truth (450), which is not close to the estimate (500) since we only ran 10 Gibbs sampling iterations."----
+#estLink <- tempfile(pattern = "lam.gs")
+estLink <- lam.gs
+estPopSize <- apply(estLink , 1, function(x) {length(unique(x))})
+plot(density(estPopSize),xlim=c(300,500),main="",lty=1, "Observed Population Size", ylim= c(0,1))
+abline(v=450,col="red")
+abline(v=mean(estPopSize),col="black",lty=2)
+mean(estPopSize)
+sd(estPopSize)
+