-
Notifications
You must be signed in to change notification settings - Fork 84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
v1.7.4 #149
v1.7.4 #149
Changes from 8 commits
df43af1
db5ecda
7b9ae5f
9a9377d
bed76d5
48ce5f7
05d004f
9bbbeae
53c4bf4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,8 +10,8 @@ Description: Provides easier interaction with | |
format and manages throttling by 'Socrata'. | ||
Users can upload data to Socrata portals directly | ||
from R. | ||
Version: 1.7.3-2 | ||
Date: 2017-06-22 | ||
Version: 1.7.4-4 | ||
Date: 2017-12-11 | ||
Author: Hugh Devlin, Ph. D., Tom Schenk, Jr., and John Malc | ||
Maintainer: "Tom Schenk Jr." <[email protected]> | ||
Depends: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,38 @@ logMsg <- function(s) { | |
cat(format(Sys.time(), "%Y-%m-%d %H:%M:%OS3 "), as.character(sys.call(-1))[1], ": ", s, '\n', sep='') | ||
} | ||
|
||
#' Compiles the information to be used in HTTP headers | ||
#' | ||
#' Grabs the headers (RSocrata version, OS, and R version) to be used while | ||
#' making HTTP requests with Socrata. This enables Socrata's team to track | ||
#' the usage of RSocrata. | ||
#' @return a string | ||
#' @importFrom utils packageVersion | ||
#' @author Tom Schenk Jr \email{tom.schenk@@cityofchicago.org} | ||
#' @noRd | ||
fetch_user_agent <- function() { | ||
rSocrataVersion <- packageVersion("RSocrata") | ||
operatingSystem <- Sys.info()[["sysname"]] | ||
operatingSystemVersion <- paste(Sys.info()[["release"]], Sys.info()[["version"]]) | ||
rVersion <- paste0(R.version$major, | ||
".", | ||
R.version$minor, | ||
ifelse( # Checks if version has status, e.g., "rev" | ||
R.version$status == "", | ||
"", | ||
paste0("-",R.version$status)) | ||
) | ||
|
||
header <- paste0( "RSocrata/", | ||
rSocrataVersion, " (", | ||
operatingSystem, "/", | ||
operatingSystemVersion, "; ", | ||
"R/", rVersion, | ||
")" | ||
) | ||
return(header) | ||
} | ||
|
||
#' Checks the validity of the syntax for a potential Socrata dataset Unique Identifier, also known as a 4x4. | ||
#' | ||
#' Will check the validity of a potential dataset unique identifier | ||
|
@@ -79,6 +111,16 @@ validateUrl <- function(url, app_token) { | |
parsedUrl$query <- NULL | ||
} | ||
|
||
# if /data appended to URL, remove it | ||
pathLength <- nchar(parsedUrl$path) | ||
if(substr(parsedUrl$path, pathLength - 4, pathLength) == '/data') { | ||
parsedUrl$path <- substr(parsedUrl$path, 1, pathLength - 5) | ||
} | ||
if(substr(parsedUrl$path, pathLength - 5, pathLength) == '/data/') { | ||
parsedUrl$path <- substr(parsedUrl$path, 1, pathLength - 6) | ||
} | ||
|
||
|
||
fourByFour <- basename(parsedUrl$path) | ||
if(!isFourByFour(fourByFour)) | ||
stop(fourByFour, " is not a valid Socrata dataset unique identifier.") | ||
|
@@ -162,15 +204,15 @@ no_deniro <- function(x) { | |
#' @param email - Optional. The email to the Socrata account with read access to the dataset. | ||
#' @param password - Optional. The password associated with the email to the Socrata account | ||
#' @return httr response object | ||
#' @importFrom httr http_status GET content stop_for_status | ||
#' @importFrom httr http_status GET content stop_for_status user_agent | ||
#' @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org} | ||
#' @noRd | ||
getResponse <- function(url, email = NULL, password = NULL) { | ||
|
||
if(is.null(email) && is.null(password)){ | ||
response <- httr::GET(url) | ||
response <- httr::GET(url, user_agent(fetch_user_agent())) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, agree with this. Will make the change (though strikes me as an odd style, it's semantically appropriate). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wish I had never suggested the namespace thing because we don't consistently follow it (there are two places outside the review that don't have it). So, we get the downside of reduced readability without the benefit of having all foreign calls fully specified. So I have mixed feelings about even making this comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it makes sense, the use of |
||
} else { # email and password are not NULL | ||
response <- httr::GET(url, httr::authenticate(email, password)) | ||
response <- httr::GET(url, httr::authenticate(email, password), user_agent(fetch_user_agent())) | ||
} | ||
|
||
# status <- httr::http_status(response) | ||
|
@@ -355,15 +397,18 @@ read.socrata <- function(url, app_token = NULL, email = NULL, password = NULL, | |
#' # Check schema definition for metadata | ||
#' attributes(df) | ||
#' @importFrom jsonlite fromJSON | ||
#' @importFrom httr parse_url | ||
#' @importFrom httr GET build_url parse_url content user_agent | ||
#' @export | ||
ls.socrata <- function(url) { | ||
url <- as.character(url) | ||
parsedUrl <- httr::parse_url(url) | ||
if(is.null(parsedUrl$scheme) | is.null(parsedUrl$hostname)) | ||
stop(url, " does not appear to be a valid URL.") | ||
parsedUrl$path <- "data.json" | ||
data_dot_json <- jsonlite::fromJSON(httr::build_url(parsedUrl)) | ||
#Download data | ||
response <- httr::GET(httr::build_url(parsedUrl), user_agent(fetch_user_agent())) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree, I'll make the change. |
||
data_dot_json <- jsonlite::fromJSON(content(response, "text")) | ||
|
||
data_df <- as.data.frame(data_dot_json$dataset) | ||
# Assign Catalog Fields as attributes | ||
attr(data_df, "@context") <- data_dot_json$`@context` | ||
|
@@ -389,20 +434,22 @@ ls.socrata <- function(url) { | |
#' @param password - password associated with Socrata account (will need write access to dataset) | ||
#' @param app_token - optional app_token associated with Socrata account | ||
#' @return httr a response object | ||
#' @importFrom httr GET | ||
#' @importFrom httr GET POST PUT authenticate user_agent add_headers | ||
#' | ||
#' @noRd | ||
checkUpdateResponse <- function(json_data_to_upload, url, http_verb, email, password, app_token = NULL) { | ||
if(http_verb == "POST"){ | ||
response <- httr::POST(url, | ||
body = json_data_to_upload, | ||
httr::authenticate(email, password), | ||
httr::user_agent(fetch_user_agent()), | ||
httr::add_headers("X-App-Token" = app_token, | ||
"Content-Type" = "application/json")) #, verbose()) | ||
} else if(http_verb == "PUT"){ | ||
response <- httr::PUT(url, | ||
body = json_data_to_upload, | ||
httr::authenticate(email, password), | ||
httr::user_agent(fetch_user_agent()), | ||
httr::add_headers("X-App-Token" = app_token, | ||
"Content-Type" = "application/json")) # , verbose()) | ||
} | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -234,13 +234,13 @@ test_that("Read URL provided by data.json from ls.socrata() - JSON", { | |
expect_equal(9, ncol(df), label="columns") | ||
}) | ||
|
||
test_that("Read data with missing dates", { # See issue #24 & #27 | ||
# Query below will pull Boston's 311 requests from early July 2011. Contains NA dates. | ||
df <- read.socrata("https://data.cityofboston.gov/resource/awu8-dc52.csv?$where=case_enquiry_id< 101000295717") | ||
expect_equal(99, nrow(df), label="rows") | ||
na_time_rows <- df[is.na(df$TARGET_DT), ] | ||
expect_equal(33, length(na_time_rows), label="rows with missing TARGET_DT dates") | ||
}) | ||
# test_that("Read data with missing dates", { # See issue #24 & #27 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: we could
However this is fine as it stands There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, good point, we should make a reference to the issue instead of just commenting-out. Will make the change. |
||
# # Query below will pull Boston's 311 requests from early July 2011. Contains NA dates. | ||
# df <- read.socrata("https://data.cityofboston.gov/resource/awu8-dc52.csv?$where=case_enquiry_id< 101000295717") | ||
# expect_equal(99, nrow(df), label="rows") | ||
# na_time_rows <- df[is.na(df$TARGET_DT), ] | ||
# expect_equal(33, length(na_time_rows), label="rows with missing TARGET_DT dates") | ||
# }) | ||
|
||
test_that("format is not supported", { | ||
# Unsupported data formats | ||
|
@@ -419,6 +419,17 @@ test_that("incorrect API Query Human Readable", { | |
expect_equal(9, ncol(df), label="columns") | ||
}) | ||
|
||
context("URL suffixes from Socrata are handled") | ||
|
||
test_that("Handle /data suffix", { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we're only testing the ability to correctly parse URLs that end in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm inclined to agree. Can be a later touch-up. /cc @nicklucius |
||
df1 <- read.socrata('https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj/data') | ||
expect_equal(1007, nrow(df1), label="rows") | ||
expect_equal(9, ncol(df1), label="columns") | ||
df2 <- read.socrata('https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj/data/') | ||
expect_equal(1007, nrow(df2), label="rows") | ||
expect_equal(9, ncol(df2), label="columns") | ||
}) | ||
|
||
context("ls.socrata functions correctly") | ||
|
||
test_that("List datasets available from a Socrata domain", { | ||
|
@@ -478,14 +489,11 @@ test_that("add a row to a dataset", { | |
df_in <- data.frame(x,y) | ||
|
||
# write to dataset | ||
write.socrata(df_in,datasetToAddToUrl,"UPSERT",socrataEmail,socrataPassword) | ||
|
||
# read from dataset and store last (most recent) row for comparisons / tests | ||
df_out <- read.socrata(url = datasetToAddToUrl, email = socrataEmail, password = socrataPassword) | ||
df_out_last_row <- tail(df_out, n=1) | ||
res <- write.socrata(df_in,datasetToAddToUrl,"UPSERT",socrataEmail,socrataPassword) | ||
|
||
# Check that the dataset was written without error | ||
expect_equal(res$status_code, 200L) | ||
|
||
expect_equal(df_in$x, as.numeric(df_out_last_row$x), label = "x value") | ||
expect_equal(df_in$y, as.numeric(df_out_last_row$y), label = "y value") | ||
}) | ||
|
||
|
||
|
@@ -498,15 +506,10 @@ test_that("fully replace a dataset", { | |
df_in <- data.frame(x,y) | ||
|
||
# write to dataset | ||
write.socrata(df_in,datasetToReplaceUrl,"REPLACE",socrataEmail,socrataPassword) | ||
|
||
# read from dataset for comparisons / tests | ||
df_out <- read.socrata(url = datasetToReplaceUrl, email = socrataEmail, password = socrataPassword) | ||
res <- write.socrata(df_in,datasetToReplaceUrl,"REPLACE",socrataEmail,socrataPassword) | ||
|
||
expect_equal(ncol(df_in), ncol(df_out), label="columns") | ||
expect_equal(nrow(df_in), nrow(df_out), label="rows") | ||
expect_equal(df_in$x, as.numeric(df_out$x), label = "x values") | ||
expect_equal(df_in$y, as.numeric(df_out$y), label = "y values") | ||
# Check that the dataset was written without error | ||
expect_equal(res$status_code, 200L) | ||
}) | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would put the call to
httr::user_agent
here, but there is no error as it stands.e.g. something like this:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm a little less inclined for this change. If we make the change, the call from
GET
would be:GET( url, fetch_user_agent() )
But, I think I prefer the literal call-out in the current method:
GET( url, user_agent( fetch_user_agent() ) )