From a5b93e403581ee3f864335ebd17a8d28c901dcd8 Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Fri, 9 Oct 2015 07:37:54 +0200 Subject: [PATCH 01/21] added importFrom directives - again... --- NAMESPACE | 6 ++++++ R/dbcal.R | 1 + R/read.R | 2 ++ R/save.R | 1 + R/tools.R | 2 ++ 5 files changed, 12 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index cf8526cf..25f5ea62 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,4 +13,10 @@ export(set.lang) export(stata_pre13_save) export(stbcal) import(Rcpp) +importFrom(stats,complete.cases) +importFrom(stats,na.omit) +importFrom(utils,download.file) +importFrom(utils,localeToCharset) +importFrom(utils,setTxtProgressBar) +importFrom(utils,txtProgressBar) useDynLib(readstata13) diff --git a/R/dbcal.R b/R/dbcal.R index f507a4e8..aac4e585 100644 --- a/R/dbcal.R +++ b/R/dbcal.R @@ -43,6 +43,7 @@ #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @examples #' sp500 <- stbcal(system.file("extdata/sp500.stbcal", package="readstata13")) +#' @importFrom stats complete.cases #' @export stbcal <- function(stbcalfile) { diff --git a/R/read.R b/R/read.R index 2c0f40f3..5a22583e 100644 --- a/R/read.R +++ b/R/read.R @@ -105,6 +105,8 @@ #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @useDynLib readstata13 +#' @importFrom utils download.file +#' @importFrom stats na.omit #' @export read.dta13 <- function(file, convert.factors = TRUE, generate.factors=FALSE, encoding = NULL, fromEncoding=NULL, diff --git a/R/save.R b/R/save.R index 04790057..28b011e8 100644 --- a/R/save.R +++ b/R/save.R @@ -61,6 +61,7 @@ #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @useDynLib readstata13 +#' @importFrom utils localeToCharset #' @export save.dta13 <- function(data, file, data.label=NULL, time.stamp=TRUE, convert.factors=FALSE, convert.dates=TRUE, tz="GMT", diff --git a/R/tools.R b/R/tools.R index 5febe16e..469dff8a 100644 --- a/R/tools.R +++ b/R/tools.R @@ -258,6 +258,8 @@ get.varlabel <- function(dat, var.name=NULL, lang=NA) { #' get.varlabel(datDE) #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} +#' @importFrom stats na.omit +#' @importFrom utils txtProgressBar setTxtProgressBar #' @export set.lang <- function(dat, lang=NA, generate.factors=FALSE) { if (is.na(lang) | lang == get.lang(dat, F)$default) { From 8d6d691307574a92e51fccc2ac34e4f6f93c6b2b Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Fri, 9 Oct 2015 08:11:16 +0200 Subject: [PATCH 02/21] Fixed some comments --- R/read.R | 14 ++++++-------- man/read.dta13.Rd | 16 +++++++--------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/R/read.R b/R/read.R index 5a22583e..c61c6797 100644 --- a/R/read.R +++ b/R/read.R @@ -47,7 +47,7 @@ #' @details If the filename is a url, the file will be downloaded as a temporary #' file and read afterwards. #' -#' Stata files are encoded in ansinew. Depending on your system default encoding +#' Stata files are encoded in ansinew. Depending on your system's default encoding #' certain characters may appear wrong. Using a correct encoding may fix these. #' #' Variable names stored in the dta-file will be used in the resulting @@ -67,14 +67,12 @@ #' #' In R, you may use rownames to store characters (see for instance #' \code{data(swiss)}). In Stata, this is not possible and rownames have to be -#' stored as a variable. If this is the case for your file and you want to use -#' rownames, \code{add.rownames=TRUE} will convert the first variable of the -#' dta-file into rownames of the resulting data.frame. +#' stored as a variable. If you want to use rownames, set add.rownames to TRUE. +#' Then the first variable of the dta-file will hold the rownames of the resulting +#' data.frame. #' -#' Beginning with Stata 13 (format 117), a new dta-format was introduced, which -#' was not handled by foreign at the time. It was implemented in this package -#' therefore the package got its name. Reading dta-files from earlier Stata -#' versions was not implemented until version 0.8. +#' Reading dta-files of older and newer versions than 13 was introduced +#' with version 0.8. #' @return The function returns a data.frame with attributes. The attributes #' include #' \describe{ diff --git a/man/read.dta13.Rd b/man/read.dta13.Rd index 0cd4f779..52d7fd90 100644 --- a/man/read.dta13.Rd +++ b/man/read.dta13.Rd @@ -75,7 +75,7 @@ The function returns a data.frame with attributes. The attributes If the filename is a url, the file will be downloaded as a temporary file and read afterwards. -Stata files are encoded in ansinew. Depending on your system default encoding +Stata files are encoded in ansinew. Depending on your system's default encoding certain characters may appear wrong. Using a correct encoding may fix these. Variable names stored in the dta-file will be used in the resulting @@ -95,14 +95,12 @@ Stata 13 introduced a new character type called strL. strLs are able to store In R, you may use rownames to store characters (see for instance \code{data(swiss)}). In Stata, this is not possible and rownames have to be - stored as a variable. If this is the case for your file and you want to use - rownames, \code{add.rownames=TRUE} will convert the first variable of the - dta-file into rownames of the resulting data.frame. - -Beginning with Stata 13 (format 117), a new dta-format was introduced, which - was not handled by foreign at the time. It was implemented in this package - therefore the package got its name. Reading dta-files from earlier Stata - versions was not implemented until version 0.8. + stored as a variable. If you want to use rownames, set add.rownames to TRUE. + Then the first variable of the dta-file will hold the rownames of the resulting + data.frame. + +Reading dta-files of older and newer versions than 13 was introduced + with version 0.8. } \note{ read.dta13 uses GPL 2 licensed code by Thomas Lumley and R-core members From 44352f4ae05c3bc6400b1e334a11b353876c5409 Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Fri, 9 Oct 2015 08:48:28 +0200 Subject: [PATCH 03/21] bump version number --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f991bf9b..ebe61c1c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: readstata13 Type: Package Title: Import Stata Data Files -Version: 0.7.1 +Version: 0.8.1 Authors@R: c( person("Jan Marvin", "Garbuszus", email = "jan.garbuszus@ruhr-uni-bochum.de", role = c("aut")), From bf6e46392ee2b5a75f4529e47a7cb75e3f92a202 Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Fri, 9 Oct 2015 09:01:05 +0200 Subject: [PATCH 04/21] fixed typo in warnings --- src/rcpp_pre13_savestata.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/rcpp_pre13_savestata.cpp b/src/rcpp_pre13_savestata.cpp index f059f12e..7e1e4d18 100644 --- a/src/rcpp_pre13_savestata.cpp +++ b/src/rcpp_pre13_savestata.cpp @@ -121,7 +121,7 @@ int stata_pre13_save(const char * filePath, Rcpp::DataFrame dat) /* write a datalabel */ if (datalabel.size() > ndlabel) - Rcpp::warning("Datalabel to long. Resizing. Max size is %d.", + Rcpp::warning("Datalabel too long. Resizing. Max size is %d.", ndlabel - 1); dta.write(datalabel.c_str(), ndlabel); @@ -131,7 +131,7 @@ int stata_pre13_save(const char * filePath, Rcpp::DataFrame dat) { if (timestamp.size() > 18) { - Rcpp::warning("Timestamp to long. Dropping."); + Rcpp::warning("Timestamp too long. Dropping."); timestamp = ""; } dta.write(timestamp.c_str(),timestamp.size()); @@ -189,7 +189,7 @@ int stata_pre13_save(const char * filePath, Rcpp::DataFrame dat) string nvarname = as(nvarnames[i]); if (nvarname.size() > nvarnameslen) - Rcpp::warning("Varname to long. Resizing. Max size is %d", + Rcpp::warning("Varname too long. Resizing. Max size is %d", nvarnameslen - 1); dta.write(nvarname.c_str(),nvarnameslen); @@ -210,7 +210,7 @@ int stata_pre13_save(const char * filePath, Rcpp::DataFrame dat) string nformats = as(formats[i]); if (nformats.size() > nformatslen) - Rcpp::warning("Formats to long. Resizing. Max size is %d", + Rcpp::warning("Formats too long. Resizing. Max size is %d", nformatslen - 1); dta.write(nformats.c_str(),nformatslen); @@ -222,7 +222,7 @@ int stata_pre13_save(const char * filePath, Rcpp::DataFrame dat) string nvalLabels = as(valLabels[i]); if (nvalLabels.size() > nvalLabelslen) - Rcpp::warning("Vallabel to long. Resizing. Max size is %d", + Rcpp::warning("Vallabel too long. Resizing. Max size is %d", nvalLabelslen - 1); dta.write(nvalLabels.c_str(), nvalLabelslen); @@ -237,7 +237,7 @@ int stata_pre13_save(const char * filePath, Rcpp::DataFrame dat) nvarLabels = as(varLabels[i]); if (nvarLabels.size() > nvarLabelslen) - Rcpp::warning("Varlabel to long. Resizing. Max size is %d", + Rcpp::warning("Varlabel too long. Resizing. Max size is %d", nvarLabelslen - 1); } dta.write(nvarLabels.c_str(),nvarLabelslen); @@ -391,7 +391,7 @@ int stata_pre13_save(const char * filePath, Rcpp::DataFrame dat) // Stata 6-12 can only store 244 byte strings if(val_s.size()>maxstrsize) { - Rcpp::warning("Character Var.to long. Resizing. Max size is %d.", + Rcpp::warning("Character value too long. Resizing. Max size is %d.", maxstrsize); // val_s.resize(244); } @@ -464,7 +464,7 @@ int stata_pre13_save(const char * filePath, Rcpp::DataFrame dat) string labtext = as(labelText[i]); if (labtext.size() > maxlabelsize) { - Rcpp::warning("Label to long. Resizing. Max size is %d", + Rcpp::warning("Label too long. Resizing. Max size is %d", maxlabelsize); labtext.resize(maxlabelsize); // labtext[labtext.size()] = '\0'; From 0ab3dcd5246a98989034bf6e3a7dee87a305e644 Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Fri, 9 Oct 2015 10:09:28 +0200 Subject: [PATCH 05/21] make factor conversion for float and double values an option, fixes in help text for read.dta13 --- R/read.R | 25 ++++++++++++++++--------- man/read.dta13.Rd | 14 +++++++++----- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/R/read.R b/R/read.R index c61c6797..3eb901d5 100644 --- a/R/read.R +++ b/R/read.R @@ -43,6 +43,8 @@ #' converted. #' @param add.rownames \emph{logical.} If \code{TRUE}, the first column will be #' used as rownames. Variable will be dropped afterwards. +#' @param nonint.factors \emph{logical.} If \code{TRUE}, factors labels +#' will be assigned to variables of type float and double. #' #' @details If the filename is a url, the file will be downloaded as a temporary #' file and read afterwards. @@ -61,9 +63,10 @@ #' dates. #' #' Stata 13 introduced a new character type called strL. strLs are able to store -#' strings of any size up to 2 billion characters. While R is able to store -#' strings of this size in a character, certain data.frames may appear messed, -#' if long strings are inserted default is \code{FALSE}. +#' strings up to 2 billion characters. While R is able to store +#' strings of this size in a character vector, the printed representation of such +#' vectors looks rather cluttered, so by default only a reference is saved in the +#' data.frame (\code{replace.strl=FALSE}). #' #' In R, you may use rownames to store characters (see for instance #' \code{data(swiss)}). In Stata, this is not possible and rownames have to be @@ -110,7 +113,7 @@ read.dta13 <- function(file, convert.factors = TRUE, generate.factors=FALSE, encoding = NULL, fromEncoding=NULL, convert.underscore = FALSE, missing.type = FALSE, convert.dates = TRUE, replace.strl = FALSE, - add.rownames = FALSE) { + add.rownames = FALSE, nonint.factors=FALSE) { # Check if path is a url if (length(grep("^(http|ftp|https)://", file))) { tmp <- tempfile() @@ -319,8 +322,13 @@ read.dta13 <- function(file, convert.factors = TRUE, generate.factors=FALSE, vartype <- types[i] labtable <- label[[labname]] #don't convert columns of type double or float to factor - if (labname %in% names(label) & !(vartype == sdouble | vartype == sfloat)) - { + if (labname %in% names(label)) { + if((vartype == sdouble | vartype == sfloat)) { + if(!nonint.factors) { + warning(paste0("\n ",vnames[i], ":\n Factor codes of type double or float detected - no labels assigned.\n Set option nonint.factors to TRUE to assign labels anyway.")) + next + } + } # get unique values / omit NA varunique <- na.omit(unique(data[, i])) # assign label if label set is complete @@ -335,13 +343,12 @@ read.dta13 <- function(file, convert.factors = TRUE, generate.factors=FALSE, data[, i] <- factor(data[, i], levels=gen.lab, labels=names(gen.lab)) } else { - warning(paste(vnames[i], "Missing factor labels - no labels assigned. - Set option generate.factors=T to generate labels.")) + warning(paste0("\n ",vnames[i], ":\n Missing factor labels - no labels assigned.\n Set option generate.factors=T to generate labels.")) } } } } - + if (add.rownames) { rownames(data) <- data[[1]] data[[1]] <- NULL diff --git a/man/read.dta13.Rd b/man/read.dta13.Rd index 52d7fd90..48c8a9fb 100644 --- a/man/read.dta13.Rd +++ b/man/read.dta13.Rd @@ -7,7 +7,7 @@ read.dta13(file, convert.factors = TRUE, generate.factors = FALSE, encoding = NULL, fromEncoding = NULL, convert.underscore = FALSE, missing.type = FALSE, convert.dates = TRUE, replace.strl = FALSE, - add.rownames = FALSE) + add.rownames = FALSE, nonint.factors = FALSE) } \arguments{ \item{file}{\emph{character.} Path to the dta file you want to import.} @@ -42,7 +42,10 @@ a strL string in the data.frame with the actual value. The strl attribute will be removed from the data.frame.} \item{add.rownames}{\emph{logical.} If \code{TRUE}, the first column will be - used as rownames. Variable will be dropped afterwards.} +used as rownames. Variable will be dropped afterwards.} + +\item{nonint.factors}{\emph{logical.} If \code{TRUE}, factors labels + will be assigned to variables of type float and double.} } \value{ The function returns a data.frame with attributes. The attributes @@ -89,9 +92,10 @@ Stata dates are converted to R's Date class the same way foreign handles dates. Stata 13 introduced a new character type called strL. strLs are able to store - strings of any size up to 2 billion characters. While R is able to store - strings of this size in a character, certain data.frames may appear messed, - if long strings are inserted default is \code{FALSE}. + strings up to 2 billion characters. While R is able to store + strings of this size in a character vector, the printed representation of such + vectors looks rather cluttered, so by default only a reference is saved in the + data.frame (\code{replace.strl=FALSE}). In R, you may use rownames to store characters (see for instance \code{data(swiss)}). In Stata, this is not possible and rownames have to be From 1e43c341b0531354dc109e140f0a412ea60bafd8 Mon Sep 17 00:00:00 2001 From: Marvin Date: Fri, 9 Oct 2015 12:15:16 +0200 Subject: [PATCH 06/21] Implement a pre R-3.2 version of dir.exists(). Will be replaced sometime in the future. --- R/save.R | 2 +- R/tools.R | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/R/save.R b/R/save.R index 28b011e8..0afca23e 100644 --- a/R/save.R +++ b/R/save.R @@ -71,7 +71,7 @@ save.dta13 <- function(data, file, data.label=NULL, time.stamp=TRUE, if (!is.data.frame(data)) stop("The object \"data\" must have class data.frame") - if (!dir.exists(dirname(file))) + if (!dir.exists13(dirname(file))) stop("Path is invalid. Possibly a non existend directory.") # Allow writing version as Stata version not Stata format diff --git a/R/tools.R b/R/tools.R index 469dff8a..08660460 100644 --- a/R/tools.R +++ b/R/tools.R @@ -33,6 +33,12 @@ save.encoding <- function(x, encoding) { sub="byte") } +# Function to check if directory exists +# @param x file path +dir.exists13 <-function(x) { + path <- dirname(x) + return(file.exists(path)) +} # Construct File Path # From efd1496fb537d150c94489b864848744d5e52971 Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Sun, 11 Oct 2015 21:34:40 +0200 Subject: [PATCH 07/21] update see also section in r help --- R/read.R | 4 ++-- R/save.R | 4 ++-- man/read.dta13.Rd | 4 ++-- man/save.dta13.Rd | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/read.R b/R/read.R index 3eb901d5..094b8323 100644 --- a/R/read.R +++ b/R/read.R @@ -99,8 +99,8 @@ #' } #' @note read.dta13 uses GPL 2 licensed code by Thomas Lumley and R-core members #' from foreign::read.dta(). -#' @seealso \code{\link{read.dta}} and \code{memisc} for dta files from Stata -#' versions < 13. +#' @seealso \code{\link[foreign]{read.dta}} in package \code{foreign} and \code{memisc} for dta files from Stata +#' versions < 13 and \code{\link[haven]{read_dta}} in package \code{haven} for Stata version >= 13. #' @references Stata Corp (2014): Description of .dta file format #' \url{http://www.stata.com/help.cgi?dta} #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} diff --git a/R/save.R b/R/save.R index 0afca23e..fd4a3f1b 100644 --- a/R/save.R +++ b/R/save.R @@ -54,8 +54,8 @@ #' type. The first element is the identifier and the second element the #' string.} #' } -#' @seealso \code{\link[foreign]{write.dta}} and \code{memisc} for dta files -#' from Stata versions < 13. +#' @seealso \code{\link[foreign]{read.dta}} in package \code{foreign} and \code{memisc} for dta files from Stata +#' versions < 13 and \code{\link[haven]{read_dta}} in package \code{haven} for Stata version >= 13. #' @references Stata Corp (2014): Description of .dta file format #' \url{http://www.stata.com/help.cgi?dta} #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} diff --git a/man/read.dta13.Rd b/man/read.dta13.Rd index 48c8a9fb..dc6828e3 100644 --- a/man/read.dta13.Rd +++ b/man/read.dta13.Rd @@ -120,7 +120,7 @@ Stata Corp (2014): Description of .dta file format \url{http://www.stata.com/help.cgi?dta} } \seealso{ -\code{\link{read.dta}} and \code{memisc} for dta files from Stata -versions < 13. +\code{\link[foreign]{read.dta}} in package \code{foreign} and \code{memisc} for dta files from Stata +versions < 13 and \code{\link[haven]{read_dta}} in package \code{haven} for Stata version >= 13. } diff --git a/man/save.dta13.Rd b/man/save.dta13.Rd index 3dc6ac80..3bab679a 100644 --- a/man/save.dta13.Rd +++ b/man/save.dta13.Rd @@ -72,7 +72,7 @@ Stata Corp (2014): Description of .dta file format \url{http://www.stata.com/help.cgi?dta} } \seealso{ -\code{\link[foreign]{write.dta}} and \code{memisc} for dta files -from Stata versions < 13. +\code{\link[foreign]{read.dta}} in package \code{foreign} and \code{memisc} for dta files from Stata +versions < 13 and \code{\link[haven]{read_dta}} in package \code{haven} for Stata version >= 13. } From 49332b6433195a0be126b76ffdd8787dfa54573f Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Thu, 15 Oct 2015 22:02:42 +0000 Subject: [PATCH 08/21] changed fseeks to test. respect initial reading of tags in testing of end tags --- inst/include/readstata.h | 17 +++++++++ inst/include/swap_endian.h | 2 +- src/read_dta.cpp | 75 ++++++++++++++++++++++++++------------ 3 files changed, 69 insertions(+), 25 deletions(-) diff --git a/inst/include/readstata.h b/inst/include/readstata.h index ad93f9b5..599e2cb2 100644 --- a/inst/include/readstata.h +++ b/inst/include/readstata.h @@ -21,7 +21,23 @@ #include #include #include + +#define GCC_VERSION (__GNUC__ * 10000 \ ++ __GNUC_MINOR__ * 100 \ ++ __GNUC_PATCHLEVEL__) + +/* Test for GCC < 4.9.0 */ +#if GCC_VERSION < 40900 & !__clang__ + typedef signed char int8_t; + typedef unsigned char uint8_t; + typedef signed short int16_t; + typedef unsigned short uint16_t; + typedef signed int int32_t; + typedef unsigned int uint32_t; +#else #include +#endif + #include "read_dta.h" #include "read_pre13_dta.h" @@ -76,6 +92,7 @@ inline void test(std::string testme, FILE * file) readstring(test,file, test.size()); if (testme.compare(test)!=0) { + Rcpp::warning("\n testme:%s \n test: %s\n", testme.c_str(), test.c_str()); Rcpp::stop("When attempting to read %s: Something went wrong!", testme.c_str()); } } diff --git a/inst/include/swap_endian.h b/inst/include/swap_endian.h index d7189ce0..343465b3 100644 --- a/inst/include/swap_endian.h +++ b/inst/include/swap_endian.h @@ -1,7 +1,7 @@ #ifndef SWAP_ENDIAN #define SWAP_ENDIAN -#include +/*#include */ #include #define GCC_VERSION (__GNUC__ * 10000 \ diff --git a/src/read_dta.cpp b/src/read_dta.cpp index c6791814..b230946b 100644 --- a/src/read_dta.cpp +++ b/src/read_dta.cpp @@ -21,7 +21,8 @@ using namespace Rcpp; using namespace std; List read_dta(FILE * file, const bool missing) { - fseek(file, 18, SEEK_CUR);// stata_dta>
+ //fseek(file, 18, SEEK_CUR);// stata_dta>
+ test("stata_dta>
", file); test("", file); /* @@ -73,7 +74,8 @@ List read_dta(FILE * file, const bool missing) { break; } - fseek(file, 10, SEEK_CUR); // + //fseek(file, 10, SEEK_CUR); // + test("", file); test("", file); /* @@ -83,7 +85,8 @@ List read_dta(FILE * file, const bool missing) { std::string byteorder(3, '\0'); readstring(byteorder,file, byteorder.size()); - fseek(file, 12, SEEK_CUR); // + //fseek(file, 12, SEEK_CUR); // + test("", file); test("", file); bool swapit = 0; @@ -96,7 +99,8 @@ List read_dta(FILE * file, const bool missing) { uint16_t k = 0; k = readbin(k, file, swapit); - fseek(file, 4, SEEK_CUR); // + //fseek(file, 4, SEEK_CUR); // + test("", file); test("", file); /* @@ -112,7 +116,8 @@ List read_dta(FILE * file, const bool missing) { n = readbin(n, file, swapit); } - fseek(file, 4, SEEK_CUR); // + //fseek(file, 4, SEEK_CUR); // + test("", file); test(" + //fseek(file, 8, SEEK_CUR); // + test("", file); test("", file); /* @@ -164,7 +170,8 @@ List read_dta(FILE * file, const bool missing) { } CharacterVector timestampCV = timestamp; - fseek(file, 21, SEEK_CUR); //
+ //fseek(file, 21, SEEK_CUR); //
+ test("
", file); test("", file); /* @@ -194,7 +201,8 @@ List read_dta(FILE * file, const bool missing) { map[i] = nmap; } - fseek(file, 6, SEEK_CUR); // + //fseek(file, 6, SEEK_CUR); // + test("", file); test("", file); /* @@ -216,7 +224,8 @@ List read_dta(FILE * file, const bool missing) { vartype[i] = nvartype; } - fseek(file, 17, SEEK_CUR); // + //fseek(file, 17, SEEK_CUR); // + test("", file); test("", file); /* @@ -232,7 +241,8 @@ List read_dta(FILE * file, const bool missing) { varnames[i] = nvarnames; } - fseek(file, 11, SEEK_CUR); // + //fseek(file, 11, SEEK_CUR); // + test("", file); test("", file); /* @@ -252,7 +262,8 @@ List read_dta(FILE * file, const bool missing) { sortlist[i] = nsortlist; } - fseek(file, 11, SEEK_CUR); // + //fseek(file, 11, SEEK_CUR); // + test("", file); test("", file); /* @@ -269,7 +280,8 @@ List read_dta(FILE * file, const bool missing) { formats[i] = nformats; } - fseek(file, 10, SEEK_CUR); // + //fseek(file, 10, SEEK_CUR); // + test("", file); test("",file); /* @@ -287,7 +299,8 @@ List read_dta(FILE * file, const bool missing) { valLabels[i] = nvalLabels; } - fseek(file, 20, SEEK_CUR); // + //fseek(file, 20, SEEK_CUR); // + test("", file); test("", file); /* @@ -303,7 +316,8 @@ List read_dta(FILE * file, const bool missing) { varLabels[i] = nvarLabels; } - fseek(file, 18, SEEK_CUR); // + //fseek(file, 18, SEEK_CUR); // + test("", file); test("", file); /* @@ -324,11 +338,13 @@ List read_dta(FILE * file, const bool missing) { List ch = List(); CharacterVector chs(3); - - while (chtag.compare(tago)==0) + + if(chtag.compare(tago)==0) { - uint32_t nocharacter = 0; - nocharacter = readbin(nocharacter, file, swapit); + while (chtag.compare(tago)==0) + { + uint32_t nocharacter = 0; + nocharacter = readbin(nocharacter, file, swapit); std::string chvarname(chlen, '\0'); std::string chcharact(chlen, '\0'); @@ -354,7 +370,9 @@ List read_dta(FILE * file, const bool missing) { readstring(tago, file, tago.size()); } - fseek(file, 14, SEEK_CUR); //[ + //fseek(file, 14, SEEK_CUR); //[ + } + test("aracteristics>", file); test("", file); /* @@ -515,7 +533,8 @@ List read_dta(FILE * file, const bool missing) { df.attr("names") = varnames; df.attr("class") = "data.frame"; - fseek(file, 7, SEEK_CUR); // + //fseek(file, 7, SEEK_CUR); // + test("", file); test("", file); /* @@ -533,7 +552,8 @@ List read_dta(FILE * file, const bool missing) { readstring(tags, file, tags.size()); List strlstable = List(); //put strLs into this list - +if(gso.compare(tags)==0) +{ while(gso.compare(tags)==0) { CharacterVector strls(2); @@ -588,7 +608,11 @@ List read_dta(FILE * file, const bool missing) { } // after strls - fseek(file, 5, SEEK_CUR); //[ + //fseek(file, 5, SEEK_CUR); //[ + //test("", file); +} + test("trls>", file); + test("", file); /* @@ -607,7 +631,7 @@ List read_dta(FILE * file, const bool missing) { readstring(tag, file, tag.size()); List labelList = List(); //put labels into this list - +if(lbltag.compare(tag)==0) { while(lbltag.compare(tag)==0) { int32_t nlen = 0, labn = 0, txtlen = 0, noff = 0, val = 0; @@ -695,7 +719,10 @@ List read_dta(FILE * file, const bool missing) { * close the file */ - fseek(file, 10, SEEK_CUR); // [ + //fseek(file, 10, SEEK_CUR); // [# + //test("", file); +} + test("ue_labels>", file); test("", file); From b721e04951fd15b47faad96434350040eb0eb1c3 Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Fri, 16 Oct 2015 00:13:51 +0200 Subject: [PATCH 09/21] Update DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ebe61c1c..c303f42b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: readstata13 Type: Package Title: Import Stata Data Files -Version: 0.8.1 +Version: 0.8.2 Authors@R: c( person("Jan Marvin", "Garbuszus", email = "jan.garbuszus@ruhr-uni-bochum.de", role = c("aut")), From 96f768526c442cefc0ca0c2b1a7d34caf388ed96 Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Fri, 16 Oct 2015 09:49:02 +0200 Subject: [PATCH 10/21] removed not needed ifs --- src/read_dta.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/read_dta.cpp b/src/read_dta.cpp index b230946b..6d5ccd3f 100644 --- a/src/read_dta.cpp +++ b/src/read_dta.cpp @@ -339,8 +339,6 @@ List read_dta(FILE * file, const bool missing) { List ch = List(); CharacterVector chs(3); - if(chtag.compare(tago)==0) - { while (chtag.compare(tago)==0) { uint32_t nocharacter = 0; @@ -371,7 +369,7 @@ List read_dta(FILE * file, const bool missing) { } //fseek(file, 14, SEEK_CUR); //[ - } + test("aracteristics>", file); test("", file); @@ -552,8 +550,7 @@ List read_dta(FILE * file, const bool missing) { readstring(tags, file, tags.size()); List strlstable = List(); //put strLs into this list -if(gso.compare(tags)==0) -{ + while(gso.compare(tags)==0) { CharacterVector strls(2); @@ -610,7 +607,7 @@ if(gso.compare(tags)==0) // after strls //fseek(file, 5, SEEK_CUR); //[ //test("", file); -} + test("trls>", file); test("", file); @@ -631,7 +628,7 @@ if(gso.compare(tags)==0) readstring(tag, file, tag.size()); List labelList = List(); //put labels into this list -if(lbltag.compare(tag)==0) { + while(lbltag.compare(tag)==0) { int32_t nlen = 0, labn = 0, txtlen = 0, noff = 0, val = 0; @@ -721,7 +718,7 @@ if(lbltag.compare(tag)==0) { //fseek(file, 10, SEEK_CUR); // [# //test("", file); -} + test("ue_labels>", file); test("", file); From 03e3b8a73c7918c90af29031e8dfddd604356e6c Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Mon, 19 Oct 2015 09:32:21 +0200 Subject: [PATCH 11/21] bump version / add download badge --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index cabc66a1..73d592bb 100644 --- a/README.md +++ b/README.md @@ -35,12 +35,12 @@ users need to install ```R # install.packages("devtools") -devtools::install_github("sjewo/readstata13", ref="0.8") +devtools::install_github("sjewo/readstata13", ref="0.8.1") ``` Older Versions of devtools require a username option: ```R -install_github("readstata13", username="sjewo", ref="0.8") +install_github("readstata13", username="sjewo", ref="0.8.1") ``` To install the current development version from github: @@ -53,7 +53,7 @@ devtools::install_github("sjewo/readstata13", ref="testing") ## Current Status [![Build Status](https://travis-ci.org/sjewo/readstata13.svg?branch=master)](https://travis-ci.org/sjewo/readstata13) - +[![CRAN Downloads](http://cranlogs.r-pkg.org/badges/readstata13)](https://cran.r-project.org/web/packages/readstata13/index.html) ### Working features * reading data files from disk or url and create a data.frame From af7b3e9f449e3e8d7f6927041914359d8c5943e1 Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Mon, 19 Oct 2015 10:58:21 +0200 Subject: [PATCH 12/21] change version to 0.8.1 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c303f42b..ebe61c1c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: readstata13 Type: Package Title: Import Stata Data Files -Version: 0.8.2 +Version: 0.8.1 Authors@R: c( person("Jan Marvin", "Garbuszus", email = "jan.garbuszus@ruhr-uni-bochum.de", role = c("aut")), From 1b8eff69f8eedcba580f1f9b63ab50473bcf636e Mon Sep 17 00:00:00 2001 From: Marvin Date: Wed, 4 Nov 2015 13:35:20 +0100 Subject: [PATCH 13/21] Comment cleanup. --- src/read_dta.cpp | 48 +++++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/src/read_dta.cpp b/src/read_dta.cpp index 6d5ccd3f..dddb99a3 100644 --- a/src/read_dta.cpp +++ b/src/read_dta.cpp @@ -21,7 +21,7 @@ using namespace Rcpp; using namespace std; List read_dta(FILE * file, const bool missing) { - //fseek(file, 18, SEEK_CUR);// stata_dta>
+ // stata_dta>
test("stata_dta>
", file); test("", file); @@ -74,7 +74,7 @@ List read_dta(FILE * file, const bool missing) { break; } - //fseek(file, 10, SEEK_CUR); // + // test("", file); test("", file); @@ -85,7 +85,7 @@ List read_dta(FILE * file, const bool missing) { std::string byteorder(3, '\0'); readstring(byteorder,file, byteorder.size()); - //fseek(file, 12, SEEK_CUR); // + // test("", file); test("", file); @@ -99,7 +99,7 @@ List read_dta(FILE * file, const bool missing) { uint16_t k = 0; k = readbin(k, file, swapit); - //fseek(file, 4, SEEK_CUR); // + // test("", file); test("", file); @@ -116,7 +116,7 @@ List read_dta(FILE * file, const bool missing) { n = readbin(n, file, swapit); } - //fseek(file, 4, SEEK_CUR); // + // test("", file); test(" + // test("", file); test("", file); @@ -170,7 +170,7 @@ List read_dta(FILE * file, const bool missing) { } CharacterVector timestampCV = timestamp; - //fseek(file, 21, SEEK_CUR); //
+ //
test("
", file); test("", file); @@ -201,7 +201,7 @@ List read_dta(FILE * file, const bool missing) { map[i] = nmap; } - //fseek(file, 6, SEEK_CUR); // + // test("", file); test("", file); @@ -224,7 +224,7 @@ List read_dta(FILE * file, const bool missing) { vartype[i] = nvartype; } - //fseek(file, 17, SEEK_CUR); // + // test("", file); test("", file); @@ -241,7 +241,7 @@ List read_dta(FILE * file, const bool missing) { varnames[i] = nvarnames; } - //fseek(file, 11, SEEK_CUR); // + // test("", file); test("", file); @@ -262,7 +262,7 @@ List read_dta(FILE * file, const bool missing) { sortlist[i] = nsortlist; } - //fseek(file, 11, SEEK_CUR); // + // test("", file); test("", file); @@ -280,7 +280,7 @@ List read_dta(FILE * file, const bool missing) { formats[i] = nformats; } - //fseek(file, 10, SEEK_CUR); // + // test("", file); test("",file); @@ -299,7 +299,7 @@ List read_dta(FILE * file, const bool missing) { valLabels[i] = nvalLabels; } - //fseek(file, 20, SEEK_CUR); // + // test("", file); test("", file); @@ -316,7 +316,7 @@ List read_dta(FILE * file, const bool missing) { varLabels[i] = nvarLabels; } - //fseek(file, 18, SEEK_CUR); // + // test("", file); test("", file); @@ -338,7 +338,7 @@ List read_dta(FILE * file, const bool missing) { List ch = List(); CharacterVector chs(3); - + while (chtag.compare(tago)==0) { uint32_t nocharacter = 0; @@ -361,15 +361,14 @@ List read_dta(FILE * file, const bool missing) { // add characteristics to the list ch.push_front( chs ); - //fseek(file, 5, SEEK_CUR); // + // test("", file); // read next tag readstring(tago, file, tago.size()); } - //fseek(file, 14, SEEK_CUR); //[ - + //[ test("aracteristics>", file); test("", file); @@ -531,7 +530,7 @@ List read_dta(FILE * file, const bool missing) { df.attr("names") = varnames; df.attr("class") = "data.frame"; - //fseek(file, 7, SEEK_CUR); // + //
test("", file); test("", file); @@ -605,11 +604,8 @@ List read_dta(FILE * file, const bool missing) { } // after strls - //fseek(file, 5, SEEK_CUR); //[ - //test("", file); - + //[ test("trls>", file); - test("", file); /* @@ -628,7 +624,7 @@ List read_dta(FILE * file, const bool missing) { readstring(tag, file, tag.size()); List labelList = List(); //put labels into this list - + while(lbltag.compare(tag)==0) { int32_t nlen = 0, labn = 0, txtlen = 0, noff = 0, val = 0; @@ -716,9 +712,7 @@ List read_dta(FILE * file, const bool missing) { * close the file */ - //fseek(file, 10, SEEK_CUR); // [# - //test("", file); - + // [ test("ue_labels>", file); test("", file); From 64c68c883a4db397ae3106cf80372b4f0c588572 Mon Sep 17 00:00:00 2001 From: Marvin Date: Thu, 5 Nov 2015 17:03:44 +0100 Subject: [PATCH 14/21] Replace remaining fseeks with a simple skip function. --- inst/include/readstata.h | 11 +++++++++++ src/read_dta.cpp | 4 ++-- src/read_pre13_dta.cpp | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/inst/include/readstata.h b/inst/include/readstata.h index 599e2cb2..4ee2995e 100644 --- a/inst/include/readstata.h +++ b/inst/include/readstata.h @@ -85,6 +85,17 @@ static void readstring(std::string &mystring, FILE * fp, int nchar) Rcpp::warning("char: a binary read error occurred"); } +static void skip(uint32_t sbit, FILE * file) +{ + uint8_t bit = 0; + + if (fread(&bit, sbit, 1, file) != 1){ + if (ferror(file)){ + Rcpp::warning("num: a binary read error occurred."); + } + } +} + inline void test(std::string testme, FILE * file) { std::string test(testme.size(), '\0'); diff --git a/src/read_dta.cpp b/src/read_dta.cpp index dddb99a3..82a9009d 100644 --- a/src/read_dta.cpp +++ b/src/read_dta.cpp @@ -639,7 +639,7 @@ List read_dta(FILE * file, const bool missing) { readstring(nlabname, file, nlabname.size()); //padding - fseek(file, 3, SEEK_CUR); + skip(3, file); // value_label_table for actual label set labn = readbin(labn, file, swapit); @@ -702,7 +702,7 @@ List read_dta(FILE * file, const bool missing) { // add this set to output list labelList.push_front( code, labset); - fseek(file, 6, SEEK_CUR); // + skip(6, file); // readstring(tag, file, tag.size()); } diff --git a/src/read_pre13_dta.cpp b/src/read_pre13_dta.cpp index 593a40a3..274d915e 100644 --- a/src/read_pre13_dta.cpp +++ b/src/read_pre13_dta.cpp @@ -510,7 +510,7 @@ List read_pre13_dta(FILE * file, const bool missing) readstring(nlabname, file, nlabname.size()); //padding - fseek(file, 3, SEEK_CUR); + skip(3, file); // value_label_table for actual label set labn = readbin(labn, file, swapit); From 1b8880b7e7ee31800ebe0a33ee74c88017ffe100 Mon Sep 17 00:00:00 2001 From: Marvin Date: Thu, 5 Nov 2015 17:18:29 +0100 Subject: [PATCH 15/21] If test() calls stop() make sure to close the file. Otherwise memory will be polluted. Maybe a additional call to gc() is required. However stopping should not happen in the first place. --- inst/include/readstata.h | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/include/readstata.h b/inst/include/readstata.h index 4ee2995e..cf482283 100644 --- a/inst/include/readstata.h +++ b/inst/include/readstata.h @@ -103,6 +103,7 @@ inline void test(std::string testme, FILE * file) readstring(test,file, test.size()); if (testme.compare(test)!=0) { + fclose(file); Rcpp::warning("\n testme:%s \n test: %s\n", testme.c_str(), test.c_str()); Rcpp::stop("When attempting to read %s: Something went wrong!", testme.c_str()); } From 63b9bba70fce2a1dd562603ebefaa3f1b8e97786 Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Mon, 9 Nov 2015 18:45:36 +0100 Subject: [PATCH 16/21] save strl attribute as named character vector, not as list --- R/read.R | 5 ++--- src/read_dta.cpp | 23 +++++++++++++---------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/R/read.R b/R/read.R index 094b8323..97ec29ea 100644 --- a/R/read.R +++ b/R/read.R @@ -89,9 +89,8 @@ #' \item{var.labels:}{Variable labels} #' \item{version:}{dta file format version} #' \item{label.table:}{List of value labels.} -#' \item{strl:}{List of character vectors for the new strl string variable -#' type. The first element is the identifier and -#' the second element the string.} +#' \item{strl:}{Character vector with long strings for the new strl string variable +#' type. The name of every element is the identifier.} #' \item{expansion.fields:}{list providing variable name, characteristic name #' and the contents of Stata characteristic field.} #' \item{missing:}{List of numeric vectors with Stata missing type for each diff --git a/src/read_dta.cpp b/src/read_dta.cpp index 82a9009d..7a451356 100644 --- a/src/read_dta.cpp +++ b/src/read_dta.cpp @@ -548,12 +548,14 @@ List read_dta(FILE * file, const bool missing) { std::string tags(3, '\0'); readstring(tags, file, tags.size()); - List strlstable = List(); //put strLs into this list + //put strLs into a named vector + CharacterVector strlvalues(0); + CharacterVector strlnames(0); while(gso.compare(tags)==0) { CharacterVector strls(2); - char erg[22]; + char ref[22]; // FixMe: Strl in 118 switch (release) @@ -565,7 +567,7 @@ List read_dta(FILE * file, const bool missing) { v = readbin(v, file, swapit); o = readbin(o, file, swapit); - sprintf(erg, "%010d%010d", v, o); + sprintf(ref, "%010d%010d", v, o); break; } case 118: @@ -577,12 +579,11 @@ List read_dta(FILE * file, const bool missing) { o = readbin(o, file, swapit); // z = readbin(z, file, swapit); - sprintf(erg, "%010d%010ld", v, o); - // sprintf(erg, "%010ld", z); + sprintf(ref, "%010d%010ld", v, o); + // sprintf(ref, "%010ld", z); break; } } - strls(0) = erg; // (129 = binary) | (130 = ascii) uint8_t t = 0; @@ -596,13 +597,15 @@ List read_dta(FILE * file, const bool missing) { std::string strl(len, '\0'); readstring(strl, file, strl.size()); - strls(1) = strl; - - strlstable.push_back( strls ); + strlvalues.push_back( strl ); + strlnames.push_back( ref ); readstring(tags, file, tags.size()); } + // set identifier as name + strlvalues.attr("names") = strlnames; + // after strls //[ test("trls>", file); @@ -731,7 +734,7 @@ List read_dta(FILE * file, const bool missing) { df.attr("version") = versionIV; df.attr("label.table") = labelList; df.attr("expansion.fields") = ch; - df.attr("strl") = strlstable; + df.attr("strl") = strlvalues; df.attr("byteorder") = wrap(byteorder); return df; From f7f90176e4c1e5b206f80aeb83288bad66cfc0ac Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Mon, 9 Nov 2015 23:13:16 +0100 Subject: [PATCH 17/21] Revert "Replace remaining fseeks with a simple skip function." This reverts commit 64c68c883a4db397ae3106cf80372b4f0c588572. --- inst/include/readstata.h | 11 ----------- src/read_dta.cpp | 4 ++-- src/read_pre13_dta.cpp | 2 +- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/inst/include/readstata.h b/inst/include/readstata.h index cf482283..08256908 100644 --- a/inst/include/readstata.h +++ b/inst/include/readstata.h @@ -85,17 +85,6 @@ static void readstring(std::string &mystring, FILE * fp, int nchar) Rcpp::warning("char: a binary read error occurred"); } -static void skip(uint32_t sbit, FILE * file) -{ - uint8_t bit = 0; - - if (fread(&bit, sbit, 1, file) != 1){ - if (ferror(file)){ - Rcpp::warning("num: a binary read error occurred."); - } - } -} - inline void test(std::string testme, FILE * file) { std::string test(testme.size(), '\0'); diff --git a/src/read_dta.cpp b/src/read_dta.cpp index 7a451356..ae7a4287 100644 --- a/src/read_dta.cpp +++ b/src/read_dta.cpp @@ -642,7 +642,7 @@ List read_dta(FILE * file, const bool missing) { readstring(nlabname, file, nlabname.size()); //padding - skip(3, file); + fseek(file, 3, SEEK_CUR); // value_label_table for actual label set labn = readbin(labn, file, swapit); @@ -705,7 +705,7 @@ List read_dta(FILE * file, const bool missing) { // add this set to output list labelList.push_front( code, labset); - skip(6, file); // + fseek(file, 6, SEEK_CUR); // readstring(tag, file, tag.size()); } diff --git a/src/read_pre13_dta.cpp b/src/read_pre13_dta.cpp index 274d915e..593a40a3 100644 --- a/src/read_pre13_dta.cpp +++ b/src/read_pre13_dta.cpp @@ -510,7 +510,7 @@ List read_pre13_dta(FILE * file, const bool missing) readstring(nlabname, file, nlabname.size()); //padding - skip(3, file); + fseek(file, 3, SEEK_CUR); // value_label_table for actual label set labn = readbin(labn, file, swapit); From 684be2b735a81c4dbfc78c96f108e03e1625d571 Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Mon, 9 Nov 2015 23:15:24 +0100 Subject: [PATCH 18/21] More efficient strl replacement --- R/read.R | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/R/read.R b/R/read.R index 97ec29ea..2b96de9d 100644 --- a/R/read.R +++ b/R/read.R @@ -260,21 +260,12 @@ read.dta13 <- function(file, convert.factors = TRUE, generate.factors=FALSE, if (replace.strl) { if (version >= 117L) { - strl <- do.call(rbind, attr(data,"strl")) + strl <- c("") + names(strl) <- "00000000000000000000" + strl <- c(strl, attr(data,"strl")) for (j in seq(ncol(data))[types == 32768] ) { - refs <- unique(data[, j]) - for (ref in refs) { - if (length(strl[strl[,1] == ref,2]) != 0){ - data[data[, j] == ref, j] <- strl[strl[, 1] == ref, 2] - } - } + data[, j] <- strl[data[,j]] } - - # recode strL 0 to void - for (v in (1:ncol(data))[types == sstrl]) { - data[[v]] <- gsub("00000000000000000000","", data[[v]] ) - } - # if strls are in data.frame remove attribute strl attr(data, "strl") <- NULL } else { From 853c078e370be663ddfebb3eabe8a9ea6cf5f192 Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Sat, 14 Nov 2015 14:37:21 +0100 Subject: [PATCH 19/21] prepare cran release 0.8.1 --- DESCRIPTION | 3 ++- README.md | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ebe61c1c..22bf26d6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,8 @@ Authors@R: c( email = "jan.garbuszus@ruhr-uni-bochum.de", role = c("aut")), person("Sebastian", "Jeworutzki", email="Sebastian.Jeworutzki@ruhr-uni-bochum.de", role = c("aut", "cre")), - person("R Core Team", role="cph") + person("R Core Team", role="cph"), + person("Magnus Thor", "Torfason", role="ctb") ) Description: Function to read and write the Stata file format. URL: https://github.com/sjewo/readstata13 diff --git a/README.md b/README.md index 73d592bb..c616a5f6 100644 --- a/README.md +++ b/README.md @@ -54,12 +54,18 @@ devtools::install_github("sjewo/readstata13", ref="testing") [![Build Status](https://travis-ci.org/sjewo/readstata13.svg?branch=master)](https://travis-ci.org/sjewo/readstata13) [![CRAN Downloads](http://cranlogs.r-pkg.org/badges/readstata13)](https://cran.r-project.org/web/packages/readstata13/index.html) + ### Working features +* [new in 0.8.1] convert non-integer variables to factors (```nonint.factors=T```) +* [new in 0.8.1] handle large datasets +* [new in 0.8.1] working with strL variables is now a lot faster + + * reading data files from disk or url and create a data.frame * saving dta files to disk - most features of the dta file format are supported * assign variable names -* read the new strL strings and save them as attribute +* read the new strL strings and save them as attribute * convert stata label to factors and save them as attribute * read some meta data (timestamp, dataset label, formats,...) * convert strings to system encoding From e3457d5173d19891183f1537a1989f1acc230a91 Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Sat, 14 Nov 2015 14:38:59 +0100 Subject: [PATCH 20/21] update manpage for strL --- man/read.dta13.Rd | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/man/read.dta13.Rd b/man/read.dta13.Rd index dc6828e3..8565448c 100644 --- a/man/read.dta13.Rd +++ b/man/read.dta13.Rd @@ -61,9 +61,8 @@ The function returns a data.frame with attributes. The attributes \item{var.labels:}{Variable labels} \item{version:}{dta file format version} \item{label.table:}{List of value labels.} - \item{strl:}{List of character vectors for the new strl string variable - type. The first element is the identifier and - the second element the string.} + \item{strl:}{Character vector with long strings for the new strl string variable + type. The name of every element is the identifier.} \item{expansion.fields:}{list providing variable name, characteristic name and the contents of Stata characteristic field.} \item{missing:}{List of numeric vectors with Stata missing type for each From 856250df57289c30c856c105b956091bdbd7a90e Mon Sep 17 00:00:00 2001 From: Sebastian Jeworutzki Date: Sat, 14 Nov 2015 14:42:46 +0100 Subject: [PATCH 21/21] typos in readme.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index c616a5f6..5439cf69 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,6 @@ devtools::install_github("sjewo/readstata13", ref="testing") * [new in 0.8.1] handle large datasets * [new in 0.8.1] working with strL variables is now a lot faster - * reading data files from disk or url and create a data.frame * saving dta files to disk - most features of the dta file format are supported * assign variable names