-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_import.r
115 lines (99 loc) · 4.16 KB
/
data_import.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
source("clean_states.r")
## utils package (default)
my.file.path = file.path(".","data","states.csv")
my.file.path
?read.table
readLines(con=my.file.path,n=5)
states <- read.table(my.file.path,header=T,sep=",")
states <- clean.states(states)
str(states)
head(states)
my.file.path0 = file.path(".","data","states0.csv")
readLines(con=my.file.path0,n=5)
#states0 <- read.table(my.file.path0,header=T,sep=",",stringsAsFactors=F)
states0 <- read.csv(my.file.path0,stringsAsFactors=F)
states0 <- clean.states0(states0)
str(states0)
tail(states0)
states0[which.min(states0$Statehood),]
states0[which.max(states0$Statehood),]
mfp <- file.path(".","data","states.txt")
states1 <- read.delim(mfp)
str(states1)
states1 <- clean.states(states1)
## with the usage of colClasses, certain columns can be excluded:
states0 <- read.table(my.file.path0,header=T,sep=",",stringsAsFactors=F)
## note: read.csv2 and read.delim2 has the field separator as ; in place of ,
## readr package:
if (!require(readr)) install.packages("readr")
library(readr)
?read_delim
vignette("column-types")
states <- read_delim(my.file.path,delim=",") # utils is better, col_types is a pain when there are too many columns
str(states)
## skip and nmax helps to process huge data in chunks
## read_csv and read_tsv are also available
## data.table
#if (!require(data.table)) install.packages("data.table")
if (!require(dtplyr)) install.packages("dtplyr")
library(dtplyr)
## fread is the key function here:
states <- fread(my.file.path) # again, a pain: numbers are not recognised, factors are not recognised.
str(states)
class(fread(my.file.path)) # "data.table" "data.frame"
class(read_tsv(mfp)) # "tbl_df" "tbl" "data.frame"
if(!require(xlsx)) install.packages("xlsx")
library(xlsx)
##vignette("xlsx") # not much
?xlsx::read.xlsx
xfp <- file.path("data","sample.xlsx")
nuOfSheets <- length(getSheets(loadWorkbook(xfp)))
nuOfSheets
if(!require(readODS)) install.packages("readODS")
library(readODS)
ofp <- file.path("data","sample.ods")
ls(package:readODS)
ods_sheets(ofp) # list of sheet names
get_num_sheet_in_ods(ofp) # how many
getNrOfSheetsInODS(ofp) # same
?read.ods # by default, all sheets are read into list
?read_ods # more parameters
if(!require(gnumeric)) install.packages("gnumeric")
library(gnumeric)
ls(package:gnumeric)
## ?read.gnumeric.range
## ?read.gnumeric.sheet.info
gfp <- file.path("data","sample.gnumeric")
read.gnumeric.sheet.info(gfp) # concise and useful info
## ?read.gnumeric.sheet # almost the same as range
?read.gnumeric.sheets
gsl <- read.gnumeric.sheets(gfp)
str(gsl) # list of two, automatic column names
if(!require(readxl)) install.packages("readxl")
library(readxl)
ls(package:readxl)
## ?excel_sheets # list sheets
excel_sheets(xfp) # list of sheet names
?read_excel # both xls and xlsx are read
## lapply to read sheets:
xlsx.list <- lapply(excel_sheets(xfp),read_excel,path=xfp,col_names=F) # very stupid: col names are T by default
str(xlsx.list)
if(!require(gdata)) install.packages("gdata")
library(gdata)
ls(package:gdata) # lots of functions, just like xlsx
help(gdata) # only xls is handled, xlsx is not
if(!require(XLConnect)) install.packages("XLConnect")
library(XLConnect)
ls(package:XLConnect) # feature-rich
wb <- loadWorkbook(xfp)
class(wb) # "workbook", a connection to the file
str(wb)
getSheets(wb) # a vector of the sheet names
sh1 <- readWorksheet(wb,getSheets(wb)[1], header=F)
str(sh1) # incorrect (lack of) name recognition
## partial read:
sh1p <- readWorksheet(wb,1,startRow=10,endRow=19,startCol=5,endCol=9,header=F)
sh1p # a manageable subset
createSheet(wb,"added")
writeWorksheet(wb,mtcars,"added") # complete with formatted header
saveWorkbook(wb,file.path("data","sample_modified.xlsx")) # otherwise, data is lost