Skip to contents

Compares 2 dataframes and outputs any differences.

Usage

diffdf(
  base,
  compare,
  keys = NULL,
  suppress_warnings = FALSE,
  strict_numeric = TRUE,
  strict_factor = TRUE,
  file = NULL,
  tolerance = sqrt(.Machine$double.eps),
  scale = NULL,
  check_column_order = FALSE,
  check_df_class = FALSE
)

Arguments

base

input dataframe

compare

comparison dataframe

keys

vector of variables (as strings) that defines a unique row in the base and compare dataframes

suppress_warnings

Do you want to suppress warnings? (logical)

strict_numeric

Flag for strict numeric to numeric comparisons (default = TRUE). If False diffdf will cast integer to double where required for comparisons. Note that variables specified in the keys will never be casted.

strict_factor

Flag for strict factor to character comparisons (default = TRUE). If False diffdf will cast factors to characters where required for comparisons. Note that variables specified in the keys will never be casted.

file

Location and name of a text file to output the results to. Setting to NULL will cause no file to be produced.

tolerance

Set tolerance for numeric comparisons. Note that comparisons fail if (x-y)/scale > tolerance.

scale

Set scale for numeric comparisons. Note that comparisons fail if (x-y)/scale > tolerance. Setting as NULL is a slightly more efficient version of scale = 1.

check_column_order

Should the column ordering be checked? (logical)

check_df_class

Do you want to check for differences in the class between base and compare? (logical)

Examples

x <- subset(iris, -Species)
#> Warning: ‘-’ not meaningful for factors
x[1, 2] <- 5
COMPARE <- diffdf(iris, x)
#> Warning: 
#> There are rows in BASE that are not in COMPARE !!
#> Not all Values Compared Equal
print(COMPARE)
#> Differences found between the objects!
#> 
#> Summary of BASE and COMPARE
#>   ====================================
#>     PROPERTY      BASE        COMP    
#>   ------------------------------------
#>       Name        iris         x      
#>      Class     data.frame  data.frame 
#>     Rows(#)       150          1      
#>    Columns(#)      5           5      
#>   ------------------------------------
#> 
#> 
#> There are rows in BASE that are not in COMPARE !!
#> First 10 of 149 rows are shown in table below
#>   ===============
#>    ..ROWNUMBER.. 
#>   ---------------
#>          2       
#>          3       
#>          4       
#>          5       
#>          6       
#>          7       
#>          8       
#>          9       
#>         10       
#>         11       
#>   ---------------
#> 
#> 
#> Not all Values Compared Equal
#>   =================================
#>      Variable    No of Differences 
#>   ---------------------------------
#>    Sepal.Length          1         
#>    Sepal.Width           1         
#>    Petal.Length          1         
#>    Petal.Width           1         
#>      Species             1         
#>   ---------------------------------
#> 
#> 
#>   ============================================
#>      VARIABLE    ..ROWNUMBER..  BASE  COMPARE 
#>   --------------------------------------------
#>    Sepal.Length        1        5.1     NA    
#>   --------------------------------------------
#> 
#> 
#>   ===========================================
#>     VARIABLE    ..ROWNUMBER..  BASE  COMPARE 
#>   -------------------------------------------
#>    Sepal.Width        1        3.5      5    
#>   -------------------------------------------
#> 
#> 
#>   ============================================
#>      VARIABLE    ..ROWNUMBER..  BASE  COMPARE 
#>   --------------------------------------------
#>    Petal.Length        1        1.4     NA    
#>   --------------------------------------------
#> 
#> 
#>   ===========================================
#>     VARIABLE    ..ROWNUMBER..  BASE  COMPARE 
#>   -------------------------------------------
#>    Petal.Width        1        0.2     NA    
#>   -------------------------------------------
#> 
#> 
#>   ==========================================
#>    VARIABLE  ..ROWNUMBER..   BASE   COMPARE 
#>   ------------------------------------------
#>    Species         1        setosa   <NA>   
#>   ------------------------------------------
#> 
#> 

#### Sample data frames

DF1 <- data.frame(
    id = c(1, 2, 3, 4, 5, 6),
    v1 = letters[1:6],
    v2 = c(NA, NA, 1, 2, 3, NA)
)

DF2 <- data.frame(
    id = c(1, 2, 3, 4, 5, 7),
    v1 = letters[1:6],
    v2 = c(NA, NA, 1, 2, NA, NA),
    v3 = c(NA, NA, 1, 2, NA, 4)
)

diffdf(DF1, DF1, keys = "id")
#> No issues were found!

# We can control matching with scale/location for example:

DF1 <- data.frame(
    id = c(1, 2, 3, 4, 5, 6),
    v1 = letters[1:6],
    v2 = c(1, 2, 3, 4, 5, 6)
)
DF2 <- data.frame(
    id = c(1, 2, 3, 4, 5, 6),
    v1 = letters[1:6],
    v2 = c(1.1, 2, 3, 4, 5, 6)
)

diffdf(DF1, DF2, keys = "id")
#> Warning: 
#> Not all Values Compared Equal
#> Differences found between the objects!
#> 
#> Summary of BASE and COMPARE
#>   ====================================
#>     PROPERTY      BASE        COMP    
#>   ------------------------------------
#>       Name        DF1         DF2     
#>      Class     data.frame  data.frame 
#>     Rows(#)        6           6      
#>    Columns(#)      3           3      
#>   ------------------------------------
#> 
#> 
#> Not all Values Compared Equal
#>   =============================
#>    Variable  No of Differences 
#>   -----------------------------
#>       v2             1         
#>   -----------------------------
#> 
#> 
#>   =============================
#>    VARIABLE  id  BASE  COMPARE 
#>   -----------------------------
#>       v2     1    1      1.1   
#>   -----------------------------
#> 
#> 
diffdf(DF1, DF2, keys = "id", tolerance = 0.2)
#> No issues were found!
diffdf(DF1, DF2, keys = "id", scale = 10, tolerance = 0.2)
#> No issues were found!

# We can use strict_factor to compare factors with characters for example:

DF1 <- data.frame(
    id = c(1, 2, 3, 4, 5, 6),
    v1 = letters[1:6],
    v2 = c(NA, NA, 1, 2, 3, NA),
    stringsAsFactors = FALSE
)

DF2 <- data.frame(
    id = c(1, 2, 3, 4, 5, 6),
    v1 = letters[1:6],
    v2 = c(NA, NA, 1, 2, 3, NA)
)

diffdf(DF1, DF2, keys = "id", strict_factor = TRUE)
#> No issues were found!
diffdf(DF1, DF2, keys = "id", strict_factor = FALSE)
#> No issues were found!