From ff268acc62f2edf5579932fedcf64c95e57c8248 Mon Sep 17 00:00:00 2001 From: Carlos Macasaet Date: Fri, 20 Dec 2013 11:46:02 -0800 Subject: [PATCH 1/4] Function to scan an HBase table and return a data frame. The row names of the resulting data frame are the keys of the HBase table. The column names are those specified in colspec. If a row does not have a value for a column specified in colspec, then that cell will be populated with NA. Prep work to allow all columns in a column family. Use cols instead of colspec when getting column values. modularise helper methods --- pkg/R/hbase.r | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/pkg/R/hbase.r b/pkg/R/hbase.r index 0945bbf..149eff0 100644 --- a/pkg/R/hbase.r +++ b/pkg/R/hbase.r @@ -269,3 +269,44 @@ hb.get.data.frame <- function(tablename, start,end=NULL,columns=NULL){ } } +.hb.get_value <- function( row, column_name ) +{ + indices <- which( row[[ 2 ]] == column_name ) + index <- ifelse( length( indices ) == 1, indices[[ 1 ]], 0 ) + ifelse( index == 0, NA, row[[ 3 ]][[ index ]] ) +} + +.hb.create_column_index_values_getter <- function( columns, raw_result ) +{ + function( column_index ) + { + column_name <- columns[[ column_index ]] + unlist( lapply( raw_result, .hb.get_value, column_name ) ) + } +} + + +hb.scan.data.frame <- function( tablename, startrow, end=NULL, colspec, + sz=hb.defaults("sz"), + usz=hb.defaults("usz"), + hbc=hb.defaults("hbc") ) +{ + scn <- hb.scan( tablename, startrow, end, colspec, sz, usz, hbc ) + f <- scn$get() + cols <- ifelse( length( colspec ) == 1, f[[ 1 ]][[ 2 ]], colspec ) + if( length( colspec ) == 1 ) { + cols <- f[[ 1 ]][[ 2 ]] + } else { + cols <- colspec + } + df <- + as.data.frame( lapply( 1:length( cols ), + .hb.create_column_index_values_getter( cols, + f ) ) ) + rownames( df ) <- unlist( lapply( f, "[[", 1 ) ) + colnames( df ) <- cols + df +} + +#hb.scan.ex <- function(tablename, startrow="", end="", colspec=character(0), timestamp=0, caching=0, filterstring=character(0), sz=hb.defaults("sz"), us z=hb.defaults("usz"), +#227 hbc=hb.defaults("hbc")){ From 5e55f3ee3b4bf2c1615370735d1002f330091a31 Mon Sep 17 00:00:00 2001 From: Carlos Macasaet Date: Sat, 8 Feb 2014 15:34:39 -0800 Subject: [PATCH 2/4] Extract helper method and introduce method to get data frame from scan.ex. --- pkg/R/hbase.r | 64 +++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/pkg/R/hbase.r b/pkg/R/hbase.r index 149eff0..c58c35c 100644 --- a/pkg/R/hbase.r +++ b/pkg/R/hbase.r @@ -269,44 +269,52 @@ hb.get.data.frame <- function(tablename, start,end=NULL,columns=NULL){ } } -.hb.get_value <- function( row, column_name ) +.hb.as.data.frame <- function( scan, colspec ) { - indices <- which( row[[ 2 ]] == column_name ) - index <- ifelse( length( indices ) == 1, indices[[ 1 ]], 0 ) - ifelse( index == 0, NA, row[[ 3 ]][[ index ]] ) -} - -.hb.create_column_index_values_getter <- function( columns, raw_result ) -{ - function( column_index ) + raw_result <- scan$get() + # interpret column specification + if( length( colspec ) == 1 ) { + cols <- raw_result[[ 1 ]][[ 2 ]] + } else { + cols <- colspec + } + # define column vectors + get_column_index_values <- function( column_index ) { - column_name <- columns[[ column_index ]] - unlist( lapply( raw_result, .hb.get_value, column_name ) ) + get_value <- function( row, column_name ) + { + indices <- which( row[[ 2 ]] == column_name ) + index <- ifelse( length( indices ) == 1, indices[[ 1 ]], 0 ) + ifelse( index == 0, NA, row[[ 3 ]][[ index ]] ) + } + column_name <- cols[[ column_index ]] + unlist( lapply( raw_result, get_value, column_name ) ) } + # define resulting data frame + df <- as.data.frame( lapply( 1:length( cols ), get_column_index_values ) ) + rownames( df ) <- unlist( lapply( raw_result, "[[", 1 ) ) + colnames( df ) <- cols + df } - hb.scan.data.frame <- function( tablename, startrow, end=NULL, colspec, sz=hb.defaults("sz"), usz=hb.defaults("usz"), hbc=hb.defaults("hbc") ) { scn <- hb.scan( tablename, startrow, end, colspec, sz, usz, hbc ) - f <- scn$get() - cols <- ifelse( length( colspec ) == 1, f[[ 1 ]][[ 2 ]], colspec ) - if( length( colspec ) == 1 ) { - cols <- f[[ 1 ]][[ 2 ]] - } else { - cols <- colspec - } - df <- - as.data.frame( lapply( 1:length( cols ), - .hb.create_column_index_values_getter( cols, - f ) ) ) - rownames( df ) <- unlist( lapply( f, "[[", 1 ) ) - colnames( df ) <- cols - df + .hb.as.data.frame( scn, colspec ) } -#hb.scan.ex <- function(tablename, startrow="", end="", colspec=character(0), timestamp=0, caching=0, filterstring=character(0), sz=hb.defaults("sz"), us z=hb.defaults("usz"), -#227 hbc=hb.defaults("hbc")){ +hb.scan.ex.data.frame <- function( tablename, startrow='', end='', + colspec, timestamp=0, caching=0, + filterstring=character(0), + sz=hb.defaults("sz"), + usz=hb.defaults("usz"), + hbc=hb.defaults("hbc") ) +{ + scn <- + hb.scan.ex( tablename, startrow, end, colspec, timestamp, caching, + filterstring, sz, usz, hbc ) + .hb.as.data.frame( scn, colspec ) +} From 69d059d39e73bca5232c1bc6008b7ac1077f60a5 Mon Sep 17 00:00:00 2001 From: Carlos Macasaet Date: Wed, 21 May 2014 21:20:27 -0700 Subject: [PATCH 3/4] Revert "Extract helper method and introduce method to get data frame from scan.ex." This reverts commit 5e55f3ee3b4bf2c1615370735d1002f330091a31. --- pkg/R/hbase.r | 64 ++++++++++++++++++++++----------------------------- 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/pkg/R/hbase.r b/pkg/R/hbase.r index c58c35c..149eff0 100644 --- a/pkg/R/hbase.r +++ b/pkg/R/hbase.r @@ -269,52 +269,44 @@ hb.get.data.frame <- function(tablename, start,end=NULL,columns=NULL){ } } -.hb.as.data.frame <- function( scan, colspec ) +.hb.get_value <- function( row, column_name ) { - raw_result <- scan$get() - # interpret column specification - if( length( colspec ) == 1 ) { - cols <- raw_result[[ 1 ]][[ 2 ]] - } else { - cols <- colspec - } - # define column vectors - get_column_index_values <- function( column_index ) + indices <- which( row[[ 2 ]] == column_name ) + index <- ifelse( length( indices ) == 1, indices[[ 1 ]], 0 ) + ifelse( index == 0, NA, row[[ 3 ]][[ index ]] ) +} + +.hb.create_column_index_values_getter <- function( columns, raw_result ) +{ + function( column_index ) { - get_value <- function( row, column_name ) - { - indices <- which( row[[ 2 ]] == column_name ) - index <- ifelse( length( indices ) == 1, indices[[ 1 ]], 0 ) - ifelse( index == 0, NA, row[[ 3 ]][[ index ]] ) - } - column_name <- cols[[ column_index ]] - unlist( lapply( raw_result, get_value, column_name ) ) + column_name <- columns[[ column_index ]] + unlist( lapply( raw_result, .hb.get_value, column_name ) ) } - # define resulting data frame - df <- as.data.frame( lapply( 1:length( cols ), get_column_index_values ) ) - rownames( df ) <- unlist( lapply( raw_result, "[[", 1 ) ) - colnames( df ) <- cols - df } + hb.scan.data.frame <- function( tablename, startrow, end=NULL, colspec, sz=hb.defaults("sz"), usz=hb.defaults("usz"), hbc=hb.defaults("hbc") ) { scn <- hb.scan( tablename, startrow, end, colspec, sz, usz, hbc ) - .hb.as.data.frame( scn, colspec ) + f <- scn$get() + cols <- ifelse( length( colspec ) == 1, f[[ 1 ]][[ 2 ]], colspec ) + if( length( colspec ) == 1 ) { + cols <- f[[ 1 ]][[ 2 ]] + } else { + cols <- colspec + } + df <- + as.data.frame( lapply( 1:length( cols ), + .hb.create_column_index_values_getter( cols, + f ) ) ) + rownames( df ) <- unlist( lapply( f, "[[", 1 ) ) + colnames( df ) <- cols + df } -hb.scan.ex.data.frame <- function( tablename, startrow='', end='', - colspec, timestamp=0, caching=0, - filterstring=character(0), - sz=hb.defaults("sz"), - usz=hb.defaults("usz"), - hbc=hb.defaults("hbc") ) -{ - scn <- - hb.scan.ex( tablename, startrow, end, colspec, timestamp, caching, - filterstring, sz, usz, hbc ) - .hb.as.data.frame( scn, colspec ) -} +#hb.scan.ex <- function(tablename, startrow="", end="", colspec=character(0), timestamp=0, caching=0, filterstring=character(0), sz=hb.defaults("sz"), us z=hb.defaults("usz"), +#227 hbc=hb.defaults("hbc")){ From d526acbe46b4bbf20ca06867d6ebb5f3e4ab929a Mon Sep 17 00:00:00 2001 From: Carlos Macasaet Date: Sat, 8 Feb 2014 15:34:39 -0800 Subject: [PATCH 4/4] Extract helper method and introduce method to get data frame from scan.ex. --- pkg/R/hbase.r | 64 +++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/pkg/R/hbase.r b/pkg/R/hbase.r index b159a18..0b9dffc 100644 --- a/pkg/R/hbase.r +++ b/pkg/R/hbase.r @@ -269,44 +269,52 @@ hb.get.data.frame <- function(tablename, start,end=NULL,columns=NULL){ } } -.hb.get_value <- function( row, column_name ) +.hb.as.data.frame <- function( scan, colspec ) { - indices <- which( row[[ 2 ]] == column_name ) - index <- ifelse( length( indices ) == 1, indices[[ 1 ]], 0 ) - ifelse( index == 0, NA, row[[ 3 ]][[ index ]] ) -} - -.hb.create_column_index_values_getter <- function( columns, raw_result ) -{ - function( column_index ) + raw_result <- scan$get() + # interpret column specification + if( length( colspec ) == 1 ) { + cols <- raw_result[[ 1 ]][[ 2 ]] + } else { + cols <- colspec + } + # define column vectors + get_column_index_values <- function( column_index ) { - column_name <- columns[[ column_index ]] - unlist( lapply( raw_result, .hb.get_value, column_name ) ) + get_value <- function( row, column_name ) + { + indices <- which( row[[ 2 ]] == column_name ) + index <- ifelse( length( indices ) == 1, indices[[ 1 ]], 0 ) + ifelse( index == 0, NA, row[[ 3 ]][[ index ]] ) + } + column_name <- cols[[ column_index ]] + unlist( lapply( raw_result, get_value, column_name ) ) } + # define resulting data frame + df <- as.data.frame( lapply( 1:length( cols ), get_column_index_values ) ) + rownames( df ) <- unlist( lapply( raw_result, "[[", 1 ) ) + colnames( df ) <- cols + df } - hb.scan.data.frame <- function( tablename, startrow, end=NULL, colspec, sz=hb.defaults("sz"), usz=hb.defaults("usz"), hbc=hb.defaults("hbc") ) { scn <- hb.scan( tablename, startrow, end, colspec, sz, usz, hbc ) - f <- scn$get() - cols <- ifelse( length( colspec ) == 1, f[[ 1 ]][[ 2 ]], colspec ) - if( length( colspec ) == 1 ) { - cols <- f[[ 1 ]][[ 2 ]] - } else { - cols <- colspec - } - df <- - as.data.frame( lapply( 1:length( cols ), - .hb.create_column_index_values_getter( cols, - f ) ) ) - rownames( df ) <- unlist( lapply( f, "[[", 1 ) ) - colnames( df ) <- cols - df + .hb.as.data.frame( scn, colspec ) } -#hb.scan.ex <- function(tablename, startrow="", end="", colspec=character(0), timestamp=0, caching=0, filterstring=character(0), sz=hb.defaults("sz"), us z=hb.defaults("usz"), -#227 hbc=hb.defaults("hbc")){ +hb.scan.ex.data.frame <- function( tablename, startrow='', end='', + colspec, timestamp=0, caching=0, + filterstring=character(0), + sz=hb.defaults("sz"), + usz=hb.defaults("usz"), + hbc=hb.defaults("hbc") ) +{ + scn <- + hb.scan.ex( tablename, startrow, end, colspec, timestamp, caching, + filterstring, sz, usz, hbc ) + .hb.as.data.frame( scn, colspec ) +}