Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #153: fix warnings about usage of deprecated "dplyr::arrange_()" and "dplyr::select_()" functions #155

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions dataCompareR/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,13 @@ Imports:
dplyr (>= 0.5.0),
knitr,
stringi,
markdown
markdown,
tidyselect
URL: https://github.com/capitalone/dataCompareR
BugReports: https://github.com/capitalone/dataCompareR/issues
License: Apache License 2.0 | file LICENSE
RoxygenNote: 7.1.2
RoxygenNote: 7.2.3
Encoding: UTF-8
Suggests:
testthat,
data.table,
Expand Down
5 changes: 3 additions & 2 deletions dataCompareR/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,19 @@ import(knitr)
import(markdown)
import(stringi)
importFrom(dplyr,"%>%")
importFrom(dplyr,across)
importFrom(dplyr,arrange)
importFrom(dplyr,arrange_)
importFrom(dplyr,distinct)
importFrom(dplyr,filter)
importFrom(dplyr,funs)
importFrom(dplyr,inner_join)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_all)
importFrom(dplyr,rename_with)
importFrom(dplyr,sample_n)
importFrom(dplyr,select)
importFrom(dplyr,select_)
importFrom(dplyr,summarise)
importFrom(tidyselect,all_of)
importFrom(utils,capture.output)
importFrom(utils,head)
importFrom(utils,packageVersion)
Expand Down
5 changes: 5 additions & 0 deletions dataCompareR/NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# dataCompareR 0.1.5

Fix warnings about usage of deprecated `dplyr::arrange_()` and `dplyr::select_()`
functions (#153).

# dataCompareR 0.1.4

Bug fix and minor improvement:
Expand Down
24 changes: 12 additions & 12 deletions dataCompareR/R/cd_compareData.R
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
# SPDX-Copyright: Copyright (c) Capital One Services, LLC
# SPDX-License-Identifier: Apache-2.0
# Copyright 2017 Capital One Services, LLC
# SPDX-Copyright: Copyright (c) Capital One Services, LLC
# SPDX-License-Identifier: Apache-2.0
# Copyright 2017 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, either express or implied.
#
# See the License for the specific language governing permissions and limitations under the License.
# OF ANY KIND, either express or implied.
#
# See the License for the specific language governing permissions and limitations under the License.


#' Compare data. Wrapper for comparison functionality.
#'
#' @param DFA dataframe as returned from prepareData
#' @param DFB dataframe as returned from prepareData
#' @param keys vector of chars - names of index variables
#' @param maxMismatches Integer. The max number of mismatches to assess, after which dataCompareR will stop
#' @param maxMismatches Integer. The max number of mismatches to assess, after which dataCompareR will stop
#' (without producing a dataCompareR object). Designed to improve performance for large datasets.
#' @return mismatchObject containing mismatch data for each of the variables in
#' the dataframes
#'
#' @examples
#' @examples
#'\dontrun{compareData(iris, iris)}
#'
#'\dontrun{iris2 <- iris}
Expand Down
32 changes: 16 additions & 16 deletions dataCompareR/R/cd_createMismatchObject.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
# SPDX-Copyright: Copyright (c) Capital One Services, LLC
# SPDX-License-Identifier: Apache-2.0
# Copyright 2017 Capital One Services, LLC
# SPDX-Copyright: Copyright (c) Capital One Services, LLC
# SPDX-License-Identifier: Apache-2.0
# Copyright 2017 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, either express or implied.
#
# OF ANY KIND, either express or implied.
#

#' Create mismatch object
#'
Expand All @@ -24,8 +24,8 @@
#' @examples
#'\dontrun{createMismatchObject(dataA, dataB, mism, idx)}
createMismatchObject <- function(dat_a, dat_b, dat_eq, str_index) {


# Initialise output object
out <- list()

Expand All @@ -34,8 +34,8 @@ createMismatchObject <- function(dat_a, dat_b, dat_eq, str_index) {
if(nrow(dat_a)==0) {
return(out)
}


# Loop over variables to create output, ignoring index variable
varnames <- names(dat_a)[!(names(dat_a) %in% str_index)]
for (v in varnames) {
Expand Down Expand Up @@ -78,7 +78,7 @@ variableMismatches <- function(varname, vals_a, vals_b, vector_eq) {
if ("__temprowname__" %in% names(d)) {
d[, "__temprowname__"] <- NULL
}

d[,'variable'] <- varname
return(d)
}
Expand All @@ -89,7 +89,7 @@ variableMismatches <- function(varname, vals_a, vals_b, vector_eq) {
#'
#' @return mismatch details
variableDetails <- function(dat) {


class_a <- collapseClasses(dat[, "valueA"])
class_b <- collapseClasses(dat[, "valueB"])
Expand All @@ -106,6 +106,6 @@ variableDetails <- function(dat) {
} else {
dat[, "diffAB"] <- ""
}

return(dat)
}
112 changes: 56 additions & 56 deletions dataCompareR/R/cd_locateMismatches.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
# SPDX-Copyright: Copyright (c) Capital One Services, LLC
# SPDX-License-Identifier: Apache-2.0
# Copyright 2017 Capital One Services, LLC
# SPDX-Copyright: Copyright (c) Capital One Services, LLC
# SPDX-License-Identifier: Apache-2.0
# Copyright 2017 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, either express or implied.
# OF ANY KIND, either express or implied.

#' collapseClasses. Collapse the classes of an object to a single string
#'
#' @param x any object
#' @return a string listing the classes of x, separated by commas
#'
#' @examples
#' @examples
#'\dontrun{collapseClasses(iris)}
#'\dontrun{collapseClasses("hello")}
collapseClasses <- function(x) {
Expand All @@ -41,15 +41,15 @@ mismatchHighStop <- function(trueFalseMatrix, maxMismatches) {
}

#' Checks whether elements in two input data frames are equal.
#'
#'
#' @param DFA input data frame
#' @param DFB input data frame
#' @param maxMismatches Integer. The max number of mismatches to assess, after which dataCompareR will stop
#' @param DFB input data frame
#' @param maxMismatches Integer. The max number of mismatches to assess, after which dataCompareR will stop
#' (without producing a dataCompareR object). Designed to improve performance for large datasets.
#' @param keys character vector of index variables
#'
#'
#' @importFrom dplyr mutate_all
#'
#'
#' @return data frame containing keys and boolean logic of match/no match for each element
#' If data types are not equal returns FALSE. Treats NA and NaN as unequal.
locateMismatches <- function(DFA, DFB, keys=NULL, maxMismatches=NA){
Expand All @@ -58,84 +58,84 @@ locateMismatches <- function(DFA, DFB, keys=NULL, maxMismatches=NA){
if(nrow(DFA)==0) {
return(data.frame())
}

# col names
colNames <- names(DFA)

# drop keys
colCompare <- setdiff(colNames,keys)

#print(dim(DFA))
#print(dim(DFB))

# find vars where type different excluding keys
colTypeDiff <- sapply(select_(DFA,.dots=colCompare), collapseClasses) == sapply(select_(DFB,.dots=colCompare), collapseClasses)
colTypeDiff <- sapply(select(DFA, all_of(colCompare)), collapseClasses) == sapply(select(DFB, all_of(colCompare)), collapseClasses)
cols2Compare <- names(colTypeDiff[colTypeDiff==T])

# select columns to compare
if(length(cols2Compare)>0) {

# First find matching cols with identical
matchingCols <- vector(mode = 'logical', length = length(cols2Compare))
for(i in 0:length(cols2Compare)) {
matchingCols[i] <- identical(DFA[,cols2Compare[i]], DFB[,cols2Compare[i]])
}

# Get names of full matches
colsFullMatch <- cols2Compare[matchingCols]

# Create a list of cols with diffs
cols2Diff <- setdiff(cols2Compare, colsFullMatch)

if(length(cols2Diff) > 0) {
# Now handle the cases where we're not equal

# Now handle the cases where we're not equal

# Get these cols once
subsetA <- select_(DFA,.dots = cols2Diff)
subsetB <- select_(DFB,.dots = cols2Diff)
subsetA <- select(DFA, all_of(cols2Diff))
subsetB <- select(DFB, all_of(cols2Diff))

# Look for NA's
isNA_A <- mutate_all(subsetA, .funs = is.na)
isNA_B <- mutate_all(subsetB, .funs = is.na)

# Find any cells impacted by NA's
anyNA <- isNA_A | isNA_B

# and repeat the above for NAN's
isNaN_A <- mutate_all(subsetA, .funs = is.nan)
isNaN_B <- mutate_all(subsetB, .funs = is.nan)
anyNaN <- isNaN_A | isNaN_B

# find matching NA or NaNs
matchNA <- isNA_A == isNA_B
matchNaN <- isNaN_A == isNaN_B

# Create a naive summary of matches first
compareTF <- subsetA == subsetB

# Check for mismatch count, stop if exceeded
mismatchHighStop(compareTF,maxMismatches)

# And then a somewhat confusing hierarchy...
# If we get a true or a false from matchAnyway, this is correct
# Otherwise, if they are both NA, we need to look at NA and NaN

# Cols that have a NaN - overwrite with NaN matching status
compareTF[anyNA] <- matchNA[anyNA]

# Check for mismatch count, stop if exceeded
mismatchHighStop(compareTF,maxMismatches)

# and For NA's
compareTF[anyNaN] <- matchNaN[anyNaN]

# Check for mismatch count, stop if exceeded
mismatchHighStop(compareTF,maxMismatches)

# Make a DF
compareTF <- as.data.frame(compareTF)

# Add in cols that full match as all T
compareTF[,colsFullMatch] <- TRUE
}
Expand All @@ -149,21 +149,21 @@ locateMismatches <- function(DFA, DFB, keys=NULL, maxMismatches=NA){
compareTF <- data.frame()
}
}



}
else {
compareTF <- data.frame()
}

# not compared
colsNot2Compare <- names(colTypeDiff[colTypeDiff==F])

# ID only
mismatchOut <- data.frame(DFA[,keys], stringsAsFactors = FALSE)
names(mismatchOut) <- keys

if(nrow(compareTF) > 0) {
# We have some matching rows, proceed as normal
mismatchOut <- cbind(mismatchOut,compareTF)
Expand All @@ -174,22 +174,22 @@ locateMismatches <- function(DFA, DFB, keys=NULL, maxMismatches=NA){
for(i in colsNot2Compare) {
mismatchOut[i] <- FALSE
}


}



# output columns in same order as input data frames
# if statement handles cases where we have no overlap
if(nrow(mismatchOut) == 0) {
mismatchOut <- data.frame()
} else {
mismatchOut <- mismatchOut %>% select_(.dots=colNames)
mismatchOut <- mismatchOut %>% select(all_of(colNames))
}

return(mismatchOut)

}


Loading