| 1 | ||
| 2 | ||
| 3 | 
                      #' identify_extra_rows  | 
                  |
| 4 | 
                      #'  | 
                  |
| 5 | 
                      #' Identifies rows that are in a baseline dataset but not in a comparator dataset  | 
                  |
| 6 | 
                      #' @param DS1 Baseline dataset (data frame)  | 
                  |
| 7 | 
                      #' @param DS2 Comparator dataset (data frame)  | 
                  |
| 8 | 
                      #' @param KEYS List of variables that define a unique row within the datasets (strings)  | 
                  |
| 9 | 
                      identify_extra_rows <- function(DS1, DS2 , KEYS){
                     | 
                  |
| 10 | 258x | 
                      DS2[["..FLAG.."]] <- "Y"  | 
                  
| 11 | 258x | 
                      dat <- merge(  | 
                  
| 12 | 258x | 
                      subset( DS1 , select = KEYS) ,  | 
                  
| 13 | 258x | 
                      subset( DS2 , select = c(KEYS, "..FLAG..")) ,  | 
                  
| 14 | 258x | 
                      by = KEYS, all.x = T,  | 
                  
| 15 | 258x | 
                      sort = TRUE  | 
                  
| 16 | 
                      )  | 
                  |
| 17 | 258x | 
                          dat <- dat[do.call("order", dat[KEYS]), ]
                     | 
                  
| 18 | 
                       | 
                  |
| 19 | 258x | 
                      dat[ is.na(dat[["..FLAG.."]]) , KEYS, drop=FALSE]  | 
                  
| 20 | 
                      }  | 
                  |
| 21 | ||
| 22 | ||
| 23 | ||
| 24 | 
                      #' identify_extra_cols  | 
                  |
| 25 | 
                      #'  | 
                  |
| 26 | 
                      #' Identifies columns that are in a baseline dataset but not in a comparator dataset  | 
                  |
| 27 | 
                      #' @param DS1 Baseline dataset (data frame)  | 
                  |
| 28 | 
                      #' @param DS2 Comparator dataset (data frame)  | 
                  |
| 29 | 
                      #' @importFrom tibble tibble  | 
                  |
| 30 | 
                      identify_extra_cols <- function(DS1 , DS2){
                     | 
                  |
| 31 | 258x | 
                      match.cols <- sapply ( names(DS1), "%in%", names(DS2))  | 
                  
| 32 | 258x | 
                          if (  !all(is.logical(match.cols)) ){
                     | 
                  
| 33 | ! | 
                              stop("Assumption of logical return type is not true")
                     | 
                  
| 34 | 
                      }  | 
                  |
| 35 | 258x | 
                      tibble(  | 
                  
| 36 | 258x | 
                      COLUMNS = names(DS1)[ !match.cols]  | 
                  
| 37 | 
                      )  | 
                  |
| 38 | 
                      }  | 
                  |
| 39 | ||
| 40 | ||
| 41 | ||
| 42 | ||
| 43 | ||
| 44 | ||
| 45 | ||
| 46 | 
                      #' identify_matching_cols  | 
                  |
| 47 | 
                      #'  | 
                  |
| 48 | 
                      #' Identifies columns with the same name in two data frames  | 
                  |
| 49 | 
                      #' @param DS1 Input dataset 1 (data frame)  | 
                  |
| 50 | 
                      #' @param DS2 Input dataset 2 (data frame)  | 
                  |
| 51 | 
                      #' @param EXCLUDE Columns to ignore  | 
                  |
| 52 | 
                      identify_matching_cols <- function(DS1, DS2 , EXCLUDE = ""){
                     | 
                  |
| 53 | 826x | 
                      match_cols <- sapply( names(DS1), "%in%" , names(DS2))  | 
                  
| 54 | 826x | 
                      exclude_cols <- sapply( names(DS1), "%in%" , EXCLUDE)  | 
                  
| 55 | 826x | 
                      names(DS1)[ match_cols & !exclude_cols ]  | 
                  
| 56 | 
                      }  | 
                  |
| 57 | ||
| 58 | ||
| 59 | ||
| 60 | ||
| 61 | ||
| 62 | 
                      #' identify_unsupported_cols  | 
                  |
| 63 | 
                      #'  | 
                  |
| 64 | 
                      #' Identifies any columns for which the package is not setup to handle  | 
                  |
| 65 | 
                      #' @param dsin input dataset  | 
                  |
| 66 | 
                      identify_unsupported_cols <- function(dsin){
                     | 
                  |
| 67 | 258x | 
                      dat <- subset(  | 
                  
| 68 | 258x | 
                      identify_properties(dsin) ,  | 
                  
| 69 | 258x | 
                              select = c("VARIABLE", "MODE")
                     | 
                  
| 70 | 
                      )  | 
                  |
| 71 | 
                       | 
                  |
| 72 | 258x | 
                          dat[ !dat[["MODE"]] %in% c('numeric', 'character', 'logical') ,, drop=FALSE]
                     | 
                  
| 73 | ||
| 74 | 
                      }  | 
                  |
| 75 | ||
| 76 | ||
| 77 | ||
| 78 | 
                      #' identify_mode_differences  | 
                  |
| 79 | 
                      #'  | 
                  |
| 80 | 
                      #' Identifies any mode differences between two data frames  | 
                  |
| 81 | 
                      #' @param BASE Base dataset for comparison (data.frame)  | 
                  |
| 82 | 
                      #' @param COMP Comparator dataset to compare base against (data.frame)  | 
                  |
| 83 | 
                      identify_mode_differences <- function( BASE, COMP ){
                     | 
                  |
| 84 | ||
| 85 | 429x | 
                      matching_cols <- identify_matching_cols( BASE , COMP )  | 
                  
| 86 | 
                       | 
                  |
| 87 | 429x | 
                      dat <- merge(  | 
                  
| 88 | 429x | 
                      x = identify_properties(BASE),  | 
                  
| 89 | 429x | 
                      y = identify_properties(COMP),  | 
                  
| 90 | 429x | 
                      by = "VARIABLE",  | 
                  
| 91 | 429x | 
                      all = TRUE,  | 
                  
| 92 | 429x | 
                              suffixes = c(".BASE", ".COMP"),
                     | 
                  
| 93 | 429x | 
                      sort = TRUE  | 
                  
| 94 | 
                      )  | 
                  |
| 95 | 429x | 
                          dat <-  subset( dat, select = c("VARIABLE" , "MODE.BASE" , "MODE.COMP"))
                     | 
                  
| 96 | 
                       | 
                  |
| 97 | 429x | 
                      KEEP1 <- dat[["VARIABLE"]] %in% matching_cols  | 
                  
| 98 | 429x | 
                      KEEP2 <- dat[["MODE.BASE"]] != dat[["MODE.COMP"]]  | 
                  
| 99 | 
                       | 
                  |
| 100 | 429x | 
                      dat[ KEEP1 & KEEP2 ,, drop=FALSE]  | 
                  
| 101 | ||
| 102 | 
                      }  | 
                  |
| 103 | ||
| 104 | ||
| 105 | ||
| 106 | 
                      #' identify_class_differences  | 
                  |
| 107 | 
                      #'  | 
                  |
| 108 | 
                      #' Identifies any class differences between two data frames  | 
                  |
| 109 | 
                      #' @param BASE Base dataset for comparison (data.frame)  | 
                  |
| 110 | 
                      #' @param COMP Comparator dataset to compare base against (data.frame)  | 
                  |
| 111 | 
                      identify_class_differences <- function( BASE, COMP ){
                     | 
                  |
| 112 | 
                       | 
                  |
| 113 | 129x | 
                      matching_cols <- identify_matching_cols( BASE , COMP )  | 
                  
| 114 | 
                       | 
                  |
| 115 | 129x | 
                      dat <- merge(  | 
                  
| 116 | 129x | 
                      x = identify_properties(BASE),  | 
                  
| 117 | 129x | 
                      y = identify_properties(COMP),  | 
                  
| 118 | 129x | 
                      by = "VARIABLE",  | 
                  
| 119 | 129x | 
                      all = TRUE,  | 
                  
| 120 | 129x | 
                      sort = TRUE,  | 
                  
| 121 | 129x | 
                              suffixes =  c(".BASE", ".COMP")
                     | 
                  
| 122 | 
                      )  | 
                  |
| 123 | 
                       | 
                  |
| 124 | 129x | 
                          dat <- subset( dat , select = c("VARIABLE" , "CLASS.BASE" , "CLASS.COMP")) 
                     | 
                  
| 125 | 
                       | 
                  |
| 126 | 129x | 
                      KEEP1 <- dat[["VARIABLE"]] %in% matching_cols  | 
                  
| 127 | 129x | 
                      KEEP2 <- !mapply(  | 
                  
| 128 | 129x | 
                      identical,  | 
                  
| 129 | 129x | 
                      dat[["CLASS.BASE"]] ,  | 
                  
| 130 | 129x | 
                      dat[["CLASS.COMP"]]  | 
                  
| 131 | 
                      )  | 
                  |
| 132 | 
                       | 
                  |
| 133 | 129x | 
                      dat[ KEEP1 & KEEP2 ,, drop=FALSE]  | 
                  
| 134 | ||
| 135 | 
                      }  | 
                  |
| 136 | ||
| 137 | ||
| 138 | ||
| 139 | 
                      #' identify_att_differences  | 
                  |
| 140 | 
                      #'  | 
                  |
| 141 | 
                      #' Identifies any attribute differences between two data frames  | 
                  |
| 142 | 
                      #' @param BASE Base dataset for comparison (data.frame)  | 
                  |
| 143 | 
                      #' @param COMP Comparator dataset to compare base against (data.frame)  | 
                  |
| 144 | 
                      #' @param exclude_cols Columns to exclude from comparison  | 
                  |
| 145 | 
                      #' @importFrom tibble tibble  | 
                  |
| 146 | 
                      identify_att_differences <- function( BASE, COMP , exclude_cols = "" ){
                     | 
                  |
| 147 | 
                       | 
                  |
| 148 | 139x | 
                      matching_cols <- identify_matching_cols( BASE , COMP , exclude_cols )  | 
                  
| 149 | 
                       | 
                  |
| 150 | 139x | 
                      PROPS <- merge(  | 
                  
| 151 | 139x | 
                      x = identify_properties(BASE) ,  | 
                  
| 152 | 139x | 
                      y = identify_properties(COMP) ,  | 
                  
| 153 | 139x | 
                      by = "VARIABLE",  | 
                  
| 154 | 139x | 
                      all = TRUE,  | 
                  
| 155 | 139x | 
                      sort = TRUE,  | 
                  
| 156 | 139x | 
                              suffixes = c(".BASE", ".COMP")
                     | 
                  
| 157 | 
                      )  | 
                  |
| 158 | 
                       | 
                  |
| 159 | 139x | 
                          PROPS <- subset( PROPS , select =  c("VARIABLE", "ATTRIBS.BASE" , "ATTRIBS.COMP")) 
                     | 
                  
| 160 | 
                       | 
                  |
| 161 | 139x | 
                      PROPS <- PROPS[ PROPS[["VARIABLE"]] %in% matching_cols,, drop = FALSE]  | 
                  
| 162 | 
                       | 
                  |
| 163 | 
                       | 
                  |
| 164 | 
                      ### Setup dummy return value  | 
                  |
| 165 | 139x | 
                      RETURN <- tibble(  | 
                  
| 166 | 139x | 
                      VARIABLE = character(),  | 
                  
| 167 | 139x | 
                      ATTR_NAME = character(),  | 
                  
| 168 | 139x | 
                      VALUES.BASE = list(),  | 
                  
| 169 | 139x | 
                      VALUES.COMP = list()  | 
                  
| 170 | 
                      )  | 
                  |
| 171 | 
                       | 
                  |
| 172 | 139x | 
                          for ( i in  PROPS[["VARIABLE"]] ){
                     | 
                  
| 173 | 
                       | 
                  |
| 174 | 1237x | 
                      PROPS_filt <- PROPS[ PROPS[["VARIABLE"]] == i ,, drop=FALSE]  | 
                  
| 175 | ||
| 176 | 
                      ### Get a vector of all available attributes across both variables  | 
                  |
| 177 | 1237x | 
                      ATTRIB_NAMES = unique(c(  | 
                  
| 178 | 1237x | 
                      names(PROPS_filt[["ATTRIBS.BASE"]][[1]]) ,  | 
                  
| 179 | 1237x | 
                      names(PROPS_filt[["ATTRIBS.COMP"]][[1]])  | 
                  
| 180 | 
                      ))  | 
                  |
| 181 | 
                       | 
                  |
| 182 | 
                      ### If variable has no attributes move onto the next variable  | 
                  |
| 183 | 904x | 
                      if ( is.null(ATTRIB_NAMES) ) next()  | 
                  
| 184 | 
                       | 
                  |
| 185 | 
                      ### Loop over each attribute checking if they are identical and outputing  | 
                  |
| 186 | 
                      ### anyones that arn't  | 
                  |
| 187 | 333x | 
                              for ( j in ATTRIB_NAMES){
                     | 
                  
| 188 | 
                       | 
                  |
| 189 | 574x | 
                      ATTRIB_BASE = PROPS_filt[["ATTRIBS.BASE"]][[1]][j]  | 
                  
| 190 | 574x | 
                      ATTRIB_COMP = PROPS_filt[["ATTRIBS.COMP"]][[1]][j]  | 
                  
| 191 | 
                       | 
                  |
| 192 | 574x | 
                                  if ( !identical(ATTRIB_BASE , ATTRIB_COMP) ){
                     | 
                  
| 193 | 
                       | 
                  |
| 194 | 89x | 
                      ATT_DIFFS <- tibble(  | 
                  
| 195 | 89x | 
                      VARIABLE = i ,  | 
                  
| 196 | 89x | 
                      ATTR_NAME = j ,  | 
                  
| 197 | 89x | 
                      VALUES.BASE = ifelse( is.null(ATTRIB_BASE) , list() , ATTRIB_BASE),  | 
                  
| 198 | 89x | 
                      VALUES.COMP = ifelse( is.null(ATTRIB_COMP) , list() , ATTRIB_COMP)  | 
                  
| 199 | 
                      )  | 
                  |
| 200 | 
                       | 
                  |
| 201 | 89x | 
                      RETURN <- rbind(RETURN , ATT_DIFFS)  | 
                  
| 202 | 
                      }  | 
                  |
| 203 | 
                      }  | 
                  |
| 204 | 
                      }  | 
                  |
| 205 | 139x | 
                      return(RETURN)  | 
                  
| 206 | 
                      }  | 
                  |
| 207 | ||
| 208 | ||
| 209 | ||
| 210 | ||
| 211 | ||
| 212 | 
                      #' identify_differences  | 
                  |
| 213 | 
                      #'  | 
                  |
| 214 | 
                      #' Compares each column within 2 datasets to identify any values which they  | 
                  |
| 215 | 
                      #' mismatch on.  | 
                  |
| 216 | 
                      #' @param BASE Base dataset for comparison (data.frame)  | 
                  |
| 217 | 
                      #' @param COMP Comparator dataset to compare base against (data.frame)  | 
                  |
| 218 | 
                      #' @param KEYS List of variables that define a unique row within the datasets (strings)  | 
                  |
| 219 | 
                      #' @param exclude_cols Columns to exclude from comparison  | 
                  |
| 220 | 
                      #' @param tolerance Level of tolerance for numeric differences between two variables  | 
                  |
| 221 | 
                      #' @param scale Scale that tolerance should be set on. If NULL assume absolute  | 
                  |
| 222 | 
                      identify_differences <- function( BASE , COMP , KEYS, exclude_cols,  | 
                  |
| 223 | 
                      tolerance = sqrt(.Machine$double.eps),  | 
                  |
| 224 | 
                                                        scale = NULL ) {
                     | 
                  |
| 225 | 
                       | 
                  |
| 226 | 129x | 
                      matching_cols <- identify_matching_cols( BASE , COMP , c(KEYS, exclude_cols))  | 
                  
| 227 | 
                       | 
                  |
| 228 | 1x | 
                      if( length(matching_cols) == 0 ) return ( tibble() )  | 
                  
| 229 | 
                       | 
                  |
| 230 | 128x | 
                      DAT = merge(  | 
                  
| 231 | 128x | 
                      x = BASE ,  | 
                  
| 232 | 128x | 
                      y = COMP ,  | 
                  
| 233 | 128x | 
                      by = KEYS ,  | 
                  
| 234 | 128x | 
                              suffix = c(".x", ".y"),
                     | 
                  
| 235 | 128x | 
                      sort = TRUE  | 
                  
| 236 | 
                      )  | 
                  |
| 237 | 128x | 
                          DAT <- DAT[do.call("order", DAT[KEYS]), ]
                     | 
                  
| 238 | 
                       | 
                  |
| 239 | ||
| 240 | 128x | 
                      matching_list <- mapply(  | 
                  
| 241 | 128x | 
                      is_variable_different ,  | 
                  
| 242 | 128x | 
                      matching_cols,  | 
                  
| 243 | 128x | 
                      MoreArgs = list(  | 
                  
| 244 | 128x | 
                      keynames = KEYS,  | 
                  
| 245 | 128x | 
                      datain = DAT,  | 
                  
| 246 | 128x | 
                      tolerance = tolerance ,  | 
                  
| 247 | 128x | 
                      scale = scale  | 
                  
| 248 | 
                      ),  | 
                  |
| 249 | 128x | 
                      SIMPLIFY = FALSE  | 
                  
| 250 | 
                      )  | 
                  |
| 251 | 
                       | 
                  |
| 252 | 128x | 
                      matching_list  | 
                  
| 253 | 
                      }  | 
                  |
| 254 | ||
| 255 | ||
| 256 | ||
| 257 | ||
| 258 | ||
| 259 | ||
| 260 | ||
| 261 | ||
| 262 | ||
| 263 | 
                      #' identify_properties  | 
                  |
| 264 | 
                      #'  | 
                  |
| 265 | 
                      #' Returns a dataframe of metadata for a given dataset.  | 
                  |
| 266 | 
                      #' Returned values include variable names , class , mode , type & attributes  | 
                  |
| 267 | 
                      #' @param dsin input dataframe that you want to get the metadata from  | 
                  |
| 268 | 
                      #' @importFrom tibble tibble  | 
                  |
| 269 | 
                      identify_properties <- function(dsin){
                     | 
                  |
| 270 | 
                       | 
                  |
| 271 | 
                      ### If missing or null return empty dataset  | 
                  |
| 272 | 1652x | 
                          if( is.null(dsin) ) {
                     | 
                  
| 273 | ! | 
                      x <- tibble(  | 
                  
| 274 | ! | 
                      VARIABLE = character(),  | 
                  
| 275 | ! | 
                      CLASS = list(),  | 
                  
| 276 | ! | 
                      MODE = character(),  | 
                  
| 277 | ! | 
                      TYPE = character() ,  | 
                  
| 278 | ! | 
                      ATTRIBS = list()  | 
                  
| 279 | 
                      )  | 
                  |
| 280 | ! | 
                      return(x)  | 
                  
| 281 | 
                      }  | 
                  |
| 282 | 
                       | 
                  |
| 283 | 1652x | 
                      tibble(  | 
                  
| 284 | 1652x | 
                      VARIABLE = names(dsin),  | 
                  
| 285 | 1652x | 
                      CLASS = lapply(dsin, class),  | 
                  
| 286 | 1652x | 
                      MODE = sapply(dsin , mode),  | 
                  
| 287 | 1652x | 
                      TYPE = sapply(dsin , typeof) ,  | 
                  
| 288 | 1652x | 
                      ATTRIBS = lapply( dsin , attributes)  | 
                  
| 289 | 
                      )  | 
                  |
| 290 | 
                      }  | 
                  |
| 291 | ||
| 292 | ||
| 293 | ||
| 294 | ||
| 295 | ||
| 296 | 
| 1 | 
                      #' diffdf  | 
                  |
| 2 | 
                      #' @description  | 
                  |
| 3 | 
                      #' Compares 2 dataframes and outputs any differences.  | 
                  |
| 4 | 
                      #' @param base input dataframe  | 
                  |
| 5 | 
                      #' @param compare comparison dataframe  | 
                  |
| 6 | 
                      #' @param keys vector of variables (as strings) that defines a unique row in the base and compare dataframes  | 
                  |
| 7 | 
                      #' @param strict_numeric Flag for strict numeric to numeric comparisons (default = TRUE). If False diffdf will cast integer to double where required for comparisons. Note that variables specified in the keys will never be casted.  | 
                  |
| 8 | 
                      #' @param strict_factor Flag for strict factor to character comparisons (default = TRUE). If False diffdf will cast factors to characters where required for comparisons. Note that variables specified in the keys will never be casted.  | 
                  |
| 9 | 
                      #' @param suppress_warnings Do you want to suppress warnings? (logical)  | 
                  |
| 10 | 
                      #' @param file Location and name of a text file to output the results to. Setting to NULL will cause no file to be produced.  | 
                  |
| 11 | 
                      #' @param tolerance Set tolerance for numeric comparisons. Note that comparisons fail if (x-y)/scale > tolerance.  | 
                  |
| 12 | 
                      #' @param scale Set scale for numeric comparisons. Note that comparisons fail if (x-y)/scale > tolerance. Setting as NULL is a slightly more efficient version of scale = 1.  | 
                  |
| 13 | 
                      #' @examples  | 
                  |
| 14 | 
                      #' x <- subset( iris, -Species)  | 
                  |
| 15 | 
                      #' x[1,2] <- 5  | 
                  |
| 16 | 
                      #' COMPARE <- diffdf( iris, x)  | 
                  |
| 17 | 
                      #' print( COMPARE )  | 
                  |
| 18 | 
                      #' print( COMPARE , "Sepal.Length" )  | 
                  |
| 19 | 
                      #'  | 
                  |
| 20 | 
                      #' #### Sample data frames  | 
                  |
| 21 | 
                      #'  | 
                  |
| 22 | 
                      #' DF1 <- data.frame(  | 
                  |
| 23 | 
                      #' id = c(1,2,3,4,5,6),  | 
                  |
| 24 | 
                      #' v1 = letters[1:6],  | 
                  |
| 25 | 
                      #' v2 = c(NA , NA , 1 , 2 , 3 , NA)  | 
                  |
| 26 | 
                      #' )  | 
                  |
| 27 | 
                      #'  | 
                  |
| 28 | 
                      #' DF2 <- data.frame(  | 
                  |
| 29 | 
                      #' id = c(1,2,3,4,5,7),  | 
                  |
| 30 | 
                      #' v1 = letters[1:6],  | 
                  |
| 31 | 
                      #' v2 = c(NA , NA , 1 , 2 , NA , NA),  | 
                  |
| 32 | 
                      #' v3 = c(NA , NA , 1 , 2 , NA , 4)  | 
                  |
| 33 | 
                      #' )  | 
                  |
| 34 | 
                      #'  | 
                  |
| 35 | 
                      #' diffdf(DF1 , DF1 , keys = "id")  | 
                  |
| 36 | 
                      #'  | 
                  |
| 37 | 
                      #' # We can control matching with scale/location for example:  | 
                  |
| 38 | 
                      #'  | 
                  |
| 39 | 
                      #' DF1 <- data.frame(  | 
                  |
| 40 | 
                      #' id = c(1,2,3,4,5,6),  | 
                  |
| 41 | 
                      #' v1 = letters[1:6],  | 
                  |
| 42 | 
                      #' v2 = c(1,2,3,4,5,6)  | 
                  |
| 43 | 
                      #' )  | 
                  |
| 44 | 
                      #' DF2 <- data.frame(  | 
                  |
| 45 | 
                      #' id = c(1,2,3,4,5,6),  | 
                  |
| 46 | 
                      #' v1 = letters[1:6],  | 
                  |
| 47 | 
                      #' v2 = c(1.1,2,3,4,5,6)  | 
                  |
| 48 | 
                      #' )  | 
                  |
| 49 | 
                      #'  | 
                  |
| 50 | 
                      #' diffdf(DF1 , DF2 , keys = "id")  | 
                  |
| 51 | 
                      #' diffdf(DF1 , DF2 , keys = "id", tolerance = 0.2)  | 
                  |
| 52 | 
                      #' diffdf(DF1 , DF2 , keys = "id", scale = 10, tolerance = 0.2)  | 
                  |
| 53 | 
                      #'  | 
                  |
| 54 | 
                      #' # We can use strict_factor to compare factors with characters for example:  | 
                  |
| 55 | 
                      #'  | 
                  |
| 56 | 
                      #' DF1 <- data.frame(  | 
                  |
| 57 | 
                      #' id = c(1,2,3,4,5,6),  | 
                  |
| 58 | 
                      #' v1 = letters[1:6],  | 
                  |
| 59 | 
                      #' v2 = c(NA , NA , 1 , 2 , 3 , NA),  | 
                  |
| 60 | 
                      #' stringsAsFactors = FALSE  | 
                  |
| 61 | 
                      #' )  | 
                  |
| 62 | 
                      #'  | 
                  |
| 63 | 
                      #' DF2 <- data.frame(  | 
                  |
| 64 | 
                      #' id = c(1,2,3,4,5,6),  | 
                  |
| 65 | 
                      #' v1 = letters[1:6],  | 
                  |
| 66 | 
                      #' v2 = c(NA , NA , 1 , 2 , 3 , NA)  | 
                  |
| 67 | 
                      #' )  | 
                  |
| 68 | 
                      #'  | 
                  |
| 69 | 
                      #' diffdf(DF1 , DF2 , keys = "id", strict_factor = TRUE)  | 
                  |
| 70 | 
                      #' diffdf(DF1 , DF2 , keys = "id", strict_factor = FALSE)  | 
                  |
| 71 | 
                      #'  | 
                  |
| 72 | 
                      #' @export  | 
                  |
| 73 | 
                      diffdf <- function (  | 
                  |
| 74 | 
                      base ,  | 
                  |
| 75 | 
                      compare ,  | 
                  |
| 76 | 
                      keys = NULL,  | 
                  |
| 77 | 
                      suppress_warnings = FALSE,  | 
                  |
| 78 | 
                      strict_numeric = TRUE,  | 
                  |
| 79 | 
                      strict_factor = TRUE,  | 
                  |
| 80 | 
                      file = NULL,  | 
                  |
| 81 | 
                      tolerance = sqrt(.Machine$double.eps),  | 
                  |
| 82 | 
                      scale = NULL  | 
                  |
| 83 | 
                      ){
                     | 
                  |
| 84 | 
                       | 
                  |
| 85 | 134x | 
                      BASE = base  | 
                  
| 86 | 134x | 
                      COMP = compare  | 
                  
| 87 | 134x | 
                      KEYS = keys  | 
                  
| 88 | 134x | 
                      SUPWARN = suppress_warnings  | 
                  
| 89 | 
                       | 
                  |
| 90 | 
                       | 
                  |
| 91 | 
                      ### Initatiate output object  | 
                  |
| 92 | 134x | 
                      COMPARE <- list()  | 
                  
| 93 | 134x | 
                          class(COMPARE) <- c("diffdf" , "list") 
                     | 
                  
| 94 | 
                       | 
                  |
| 95 | 134x | 
                      is_derived <- FALSE  | 
                  
| 96 | 
                       | 
                  |
| 97 | 
                      ### If no key is suplied match values based upon row number  | 
                  |
| 98 | 134x | 
                          if (is.null(KEYS)){
                     | 
                  
| 99 | 122x | 
                      is_derived <- TRUE  | 
                  
| 100 | 122x | 
                      keyname <- generate_keyname(BASE, COMP)  | 
                  
| 101 | 122x | 
                      BASE[[keyname]] <- 1:nrow(BASE)  | 
                  
| 102 | 122x | 
                      COMP[[keyname]] <- 1:nrow(COMP)  | 
                  
| 103 | 122x | 
                      KEYS <- keyname  | 
                  
| 104 | 
                      }  | 
                  |
| 105 | 134x | 
                      attr(COMPARE, 'keys') <- list(value = KEYS, is_derived = is_derived)  | 
                  
| 106 | 
                       | 
                  |
| 107 | 
                       | 
                  |
| 108 | 
                       | 
                  |
| 109 | 134x | 
                          if (!is.numeric(tolerance)) {
                     | 
                  
| 110 | 2x | 
                              stop("'tolerance' should be numeric")
                     | 
                  
| 111 | 
                      }  | 
                  |
| 112 | 
                       | 
                  |
| 113 | 132x | 
                          if (!is.numeric(scale) && !is.null(scale)) {
                     | 
                  
| 114 | 2x | 
                              stop("'scale' should be numeric or NULL")
                     | 
                  
| 115 | 
                      }  | 
                  |
| 116 | 
                       | 
                  |
| 117 | ||
| 118 | 
                       | 
                  |
| 119 | 130x | 
                          if ( !has_unique_rows(BASE , KEYS) ){
                     | 
                  
| 120 | 1x | 
                      stop( "BY variables in BASE do not result in unique observations")  | 
                  
| 121 | 
                      }  | 
                  |
| 122 | 
                       | 
                  |
| 123 | 129x | 
                          if ( !has_unique_rows(COMP , KEYS) ){
                     | 
                  
| 124 | ! | 
                      stop( "BY variables in COMPARE do not result in unique observations")  | 
                  
| 125 | 
                      }  | 
                  |
| 126 | 
                       | 
                  |
| 127 | 
                       | 
                  |
| 128 | ||
| 129 | 
                      #### Check essential variable properties (class & mode)  | 
                  |
| 130 | 
                       | 
                  |
| 131 | 129x | 
                      COMPARE[["UnsupportedColsBase"]] <- construct_issue(  | 
                  
| 132 | 129x | 
                      value = identify_unsupported_cols(BASE) ,  | 
                  
| 133 | 129x | 
                      message = "There are columns in BASE with unsupported modes !!"  | 
                  
| 134 | 
                      )  | 
                  |
| 135 | 
                       | 
                  |
| 136 | 
                       | 
                  |
| 137 | 129x | 
                      COMPARE[["UnsupportedColsComp"]] <- construct_issue(  | 
                  
| 138 | 129x | 
                      value = identify_unsupported_cols(COMP) ,  | 
                  
| 139 | 129x | 
                      message = "There are columns in COMPARE with unsupported modes !!"  | 
                  
| 140 | 
                      )  | 
                  |
| 141 | ||
| 142 | 
                       | 
                  |
| 143 | 
                      # cast variables if strict is off  | 
                  |
| 144 | 129x | 
                          if ( !strict_factor | !strict_numeric ){
                     | 
                  
| 145 | 
                       | 
                  |
| 146 | 9x | 
                      casted_df <- cast_variables(  | 
                  
| 147 | 9x | 
                      BASE = BASE,  | 
                  
| 148 | 9x | 
                      COMPARE = COMP,  | 
                  
| 149 | 9x | 
                      ignore_vars = KEYS,  | 
                  
| 150 | 9x | 
                      cast_integers = !strict_numeric ,  | 
                  
| 151 | 9x | 
                      cast_factors = !strict_factor  | 
                  
| 152 | 
                      )  | 
                  |
| 153 | 
                       | 
                  |
| 154 | 9x | 
                      BASE <- casted_df$BASE  | 
                  
| 155 | 9x | 
                      COMP <- casted_df$COMP  | 
                  
| 156 | 
                       | 
                  |
| 157 | 
                      }  | 
                  |
| 158 | 
                       | 
                  |
| 159 | 
                       | 
                  |
| 160 | 129x | 
                      COMPARE[["VarModeDiffs"]] <- construct_issue(  | 
                  
| 161 | 129x | 
                      value = identify_mode_differences( BASE, COMP ) ,  | 
                  
| 162 | 129x | 
                      message = "There are columns in BASE and COMPARE with different modes !!"  | 
                  
| 163 | 
                      )  | 
                  |
| 164 | 
                       | 
                  |
| 165 | 
                       | 
                  |
| 166 | 129x | 
                      COMPARE[["VarClassDiffs"]] <- construct_issue(  | 
                  
| 167 | 129x | 
                      value = identify_class_differences(BASE, COMP) ,  | 
                  
| 168 | 129x | 
                      message = "There are columns in BASE and COMPARE with different classes !!"  | 
                  
| 169 | 
                      )  | 
                  |
| 170 | 
                       | 
                  |
| 171 | ||
| 172 | 129x | 
                      exclude_cols <- c(  | 
                  
| 173 | 129x | 
                      COMPARE[["UnsupportedColsBase"]]$VARIABLE ,  | 
                  
| 174 | 129x | 
                      COMPARE[["UnsupportedColsComp"]]$VARIABLE,  | 
                  
| 175 | 129x | 
                      COMPARE[["VarClassDiffs"]]$VARIABLE,  | 
                  
| 176 | 129x | 
                      COMPARE[["VarModeDiffs"]]$VARIABLE  | 
                  
| 177 | 
                      )  | 
                  |
| 178 | 
                       | 
                  |
| 179 | ||
| 180 | 
                      ##### Check Validity of Keys  | 
                  |
| 181 | 
                       | 
                  |
| 182 | 129x | 
                      BASE_keys <- names(BASE)[names(BASE) %in% KEYS]  | 
                  
| 183 | 129x | 
                      COMP_keys <- names(COMP)[names(COMP) %in% KEYS]  | 
                  
| 184 | 
                       | 
                  |
| 185 | 
                       | 
                  |
| 186 | 129x | 
                          if ( length(BASE_keys) != length(KEYS)  ){
                     | 
                  
| 187 | ! | 
                      stop( "BASE is missing variables specified in KEYS")  | 
                  
| 188 | 
                      }  | 
                  |
| 189 | 
                       | 
                  |
| 190 | 129x | 
                          if ( length(COMP_keys) != length(KEYS) ){
                     | 
                  
| 191 | ! | 
                      stop( "COMP is missing variables specified in KEYS")  | 
                  
| 192 | 
                      }  | 
                  |
| 193 | 
                       | 
                  |
| 194 | 129x | 
                          if( any(KEYS %in% exclude_cols)){
                     | 
                  
| 195 | ! | 
                              stop("KEYS are either an invalid or contain different modes between BASE and COMP")
                     | 
                  
| 196 | 
                      }  | 
                  |
| 197 | 
                       | 
                  |
| 198 | ||
| 199 | 
                      ##### Check Attributes  | 
                  |
| 200 | ||
| 201 | 
                       | 
                  |
| 202 | 129x | 
                      COMPARE[["AttribDiffs"]] <- construct_issue(  | 
                  
| 203 | 129x | 
                      value = identify_att_differences(BASE, COMP , exclude_cols) ,  | 
                  
| 204 | 129x | 
                      message = "There are columns in BASE and COMPARE with differing attributes !!"  | 
                  
| 205 | 
                      )  | 
                  |
| 206 | 
                       | 
                  |
| 207 | 
                       | 
                  |
| 208 | 
                      ##### Check data  | 
                  |
| 209 | 
                       | 
                  |
| 210 | 129x | 
                      BASE <- factor_to_character(BASE , KEYS)  | 
                  
| 211 | 129x | 
                      COMP <- factor_to_character(COMP , KEYS)  | 
                  
| 212 | 
                       | 
                  |
| 213 | 
                       | 
                  |
| 214 | 129x | 
                      COMPARE[["ExtRowsBase"]] <- construct_issue(  | 
                  
| 215 | 129x | 
                      value = identify_extra_rows( BASE, COMP, KEYS ) ,  | 
                  
| 216 | 129x | 
                      message = "There are rows in BASE that are not in COMPARE !!"  | 
                  
| 217 | 
                      )  | 
                  |
| 218 | 
                       | 
                  |
| 219 | 
                       | 
                  |
| 220 | 129x | 
                      COMPARE[["ExtRowsComp"]] <- construct_issue(  | 
                  
| 221 | 129x | 
                      value = identify_extra_rows( COMP, BASE, KEYS ) ,  | 
                  
| 222 | 129x | 
                      message = "There are rows in COMPARE that are not in BASE !!"  | 
                  
| 223 | 
                      )  | 
                  |
| 224 | 
                       | 
                  |
| 225 | 
                       | 
                  |
| 226 | 
                       | 
                  |
| 227 | 129x | 
                      COMPARE[["ExtColsBase"]] <- construct_issue(  | 
                  
| 228 | 129x | 
                      value = identify_extra_cols(BASE, COMP) ,  | 
                  
| 229 | 129x | 
                      message = "There are columns in BASE that are not in COMPARE !!"  | 
                  
| 230 | 
                      )  | 
                  |
| 231 | 
                       | 
                  |
| 232 | 
                       | 
                  |
| 233 | 129x | 
                      COMPARE[["ExtColsComp"]] <- construct_issue(  | 
                  
| 234 | 129x | 
                      value = identify_extra_cols(COMP, BASE) ,  | 
                  
| 235 | 129x | 
                      message = "There are columns in COMPARE that are not in BASE !!"  | 
                  
| 236 | 
                      )  | 
                  |
| 237 | 
                       | 
                  |
| 238 | 
                       | 
                  |
| 239 | 129x | 
                      VALUE_DIFFERENCES <- identify_differences(  | 
                  
| 240 | 129x | 
                      BASE, COMP , KEYS, exclude_cols,  | 
                  
| 241 | 129x | 
                      tolerance = tolerance,  | 
                  
| 242 | 129x | 
                      scale = scale  | 
                  
| 243 | 
                      )  | 
                  |
| 244 | ||
| 245 | 
                       | 
                  |
| 246 | 
                       | 
                  |
| 247 | 
                      ## Summarise the number of mismatching rows per variable  | 
                  |
| 248 | ||
| 249 | 129x | 
                          if ( length(VALUE_DIFFERENCES) ){
                     | 
                  
| 250 | 128x | 
                      NDIFF <- sapply( VALUE_DIFFERENCES , nrow )  | 
                  
| 251 | 128x | 
                      COMPARE[["NumDiff"]] <- construct_issue(  | 
                  
| 252 | 128x | 
                      value = convert_to_issue(NDIFF),  | 
                  
| 253 | 128x | 
                      message = "Not all Values Compared Equal"  | 
                  
| 254 | 
                      )  | 
                  |
| 255 | 
                      }  | 
                  |
| 256 | ||
| 257 | ||
| 258 | 129x | 
                          for ( i in names(VALUE_DIFFERENCES) ){
                     | 
                  
| 259 | 991x | 
                      COMPARE[[ paste0( "VarDiff_", i)]] <- construct_issue(  | 
                  
| 260 | 991x | 
                      value = VALUE_DIFFERENCES[[i]] ,  | 
                  
| 261 | 991x | 
                      message = ""  | 
                  
| 262 | 
                      )  | 
                  |
| 263 | 
                      }  | 
                  |
| 264 | 
                       | 
                  |
| 265 | 
                      ## Get all issue messages, remove blank message, and collapse into single string  | 
                  |
| 266 | 129x | 
                      ISSUE_MSGS <- sapply(COMPARE, function(x) get_issue_message(x))  | 
                  
| 267 | 129x | 
                      ISSUE_MSGS <- ISSUE_MSGS[ ISSUE_MSGS != ""]  | 
                  
| 268 | 
                       | 
                  |
| 269 | 129x | 
                          if( length(ISSUE_MSGS) != 0 ){
                     | 
                  
| 270 | 95x | 
                              if(!SUPWARN) {
                     | 
                  
| 271 | 36x | 
                      ISSUE_MSGS <- paste(ISSUE_MSGS, collapse ='\n' )  | 
                  
| 272 | 36x | 
                                  warning( c("\n" , ISSUE_MSGS))
                     | 
                  
| 273 | 
                      }  | 
                  |
| 274 | 
                      }  | 
                  |
| 275 | 
                       | 
                  |
| 276 | 
                       | 
                  |
| 277 | 129x | 
                          if (!is.null(file)){
                     | 
                  
| 278 | ! | 
                      x <- print(COMPARE , as_string = TRUE)  | 
                  
| 279 | 
                       | 
                  |
| 280 | ! | 
                      tryCatch(  | 
                  
| 281 | 
                                  {
                     | 
                  |
| 282 | ! | 
                      sink(file)  | 
                  
| 283 | ! | 
                      cat(x, sep = "\n")  | 
                  
| 284 | ! | 
                      sink()  | 
                  
| 285 | 
                      },  | 
                  |
| 286 | ! | 
                                  warning = function(w){
                     | 
                  
| 287 | ! | 
                      sink()  | 
                  
| 288 | ! | 
                      warning(w)  | 
                  
| 289 | 
                      },  | 
                  |
| 290 | ! | 
                                  error = function(e){
                     | 
                  
| 291 | ! | 
                      sink()  | 
                  
| 292 | ! | 
                      stop(e)  | 
                  
| 293 | 
                      }  | 
                  |
| 294 | 
                      )  | 
                  |
| 295 | ! | 
                      return(invisible(COMPARE))  | 
                  
| 296 | 
                       | 
                  |
| 297 | 
                      }  | 
                  |
| 298 | 
                       | 
                  |
| 299 | 129x | 
                      return(COMPARE)  | 
                  
| 300 | 
                      }  | 
                  |
| 301 | ||
| 302 | ||
| 303 | ||
| 304 | ||
| 305 | 
                      #' diffdf_has_issues  | 
                  |
| 306 | 
                      #'  | 
                  |
| 307 | 
                      #' Utility function which returns TRUE if an diffdf  | 
                  |
| 308 | 
                      #' object has issues or FALSE if an diffdf object does not have issues  | 
                  |
| 309 | 
                      #' @param x diffdf object  | 
                  |
| 310 | 
                      #' @examples  | 
                  |
| 311 | 
                      #'  | 
                  |
| 312 | 
                      #' # Example with no issues  | 
                  |
| 313 | 
                      #' x <- diffdf( iris, iris )  | 
                  |
| 314 | 
                      #' diffdf_has_issues(x)  | 
                  |
| 315 | 
                      #'  | 
                  |
| 316 | 
                      #' # Example with issues  | 
                  |
| 317 | 
                      #' iris2 <- iris  | 
                  |
| 318 | 
                      #' iris2[2,2] <- NA  | 
                  |
| 319 | 
                      #' x <- diffdf( iris , iris2 , suppress_warnings = TRUE)  | 
                  |
| 320 | 
                      #' diffdf_has_issues(x)  | 
                  |
| 321 | 
                      #' @export  | 
                  |
| 322 | 
                      diffdf_has_issues <- function(x){
                     | 
                  |
| 323 | ! | 
                      if ( class(x)[[1]] != "diffdf" ) stop( "x is not an diffdf object")  | 
                  
| 324 | 21x | 
                      return( length(x) != 0 )  | 
                  
| 325 | 
                      }  | 
                  |
| 326 | 
| 1 | ||
| 2 | ||
| 3 | ||
| 4 | ||
| 5 | ||
| 6 | 
                      #' diffdf_issuerows  | 
                  |
| 7 | 
                      #'  | 
                  |
| 8 | 
                      #' This function takes a diffdf object and a dataframe and subsets  | 
                  |
| 9 | 
                      #' the dataframe for problem rows as identified in the comparison object.  | 
                  |
| 10 | 
                      #' If \code{vars} has been specified only issue rows associated with those
                     | 
                  |
| 11 | 
                      #' variable(s) will be returned.  | 
                  |
| 12 | 
                      #' @param df dataframe to be subsetted  | 
                  |
| 13 | 
                      #' @param diff diffdf object  | 
                  |
| 14 | 
                      #' @param vars (optional) character vector containing names of issue variables to subset dataframe  | 
                  |
| 15 | 
                      #' on. A value of NULL (default) will be taken to mean available issue variables.  | 
                  |
| 16 | 
                      #' @examples  | 
                  |
| 17 | 
                      #' iris2 <- iris  | 
                  |
| 18 | 
                      #' for ( i in 1:3) iris2[i,i] <- 99  | 
                  |
| 19 | 
                      #' x <- diffdf( iris , iris2, suppress_warnings = TRUE)  | 
                  |
| 20 | 
                      #' diffdf_issuerows( iris , x)  | 
                  |
| 21 | 
                      #' diffdf_issuerows( iris2 , x)  | 
                  |
| 22 | 
                      #' diffdf_issuerows( iris2 , x , vars = "Sepal.Length")  | 
                  |
| 23 | 
                      #' diffdf_issuerows( iris2 , x , vars = c("Sepal.Length" , "Sepal.Width"))
                     | 
                  |
| 24 | 
                      #' @details  | 
                  |
| 25 | 
                      #' Note that diffdf_issuerows can be used to subset against any dataframe. The only  | 
                  |
| 26 | 
                      #' requirement is that the original variables specified in the keys argument to diffdf  | 
                  |
| 27 | 
                      #' are present on the dataframe you are subsetting against. However please note that if  | 
                  |
| 28 | 
                      #' no keys were specified in diffdf then the row number is used. This means using  | 
                  |
| 29 | 
                      #' diffdf_issuerows without a keys against an arbitrary dataset can easily result in  | 
                  |
| 30 | 
                      #' nonsense rows being returned. It is always recommended to supply keys to diffdf.  | 
                  |
| 31 | 
                      #' @export  | 
                  |
| 32 | 
                      diffdf_issuerows <- function( df , diff, vars = NULL){    
                     | 
                  |
| 33 | ||
| 34 | 19x | 
                          if ( class(diff)[[1]] != "diffdf") {
                     | 
                  
| 35 | ! | 
                              stop("diff should be an diffdf object")
                     | 
                  
| 36 | 
                      }  | 
                  |
| 37 | 
                       | 
                  |
| 38 | 19x | 
                      KEYS_ATT = attr(diff, "keys")  | 
                  
| 39 | 
                       | 
                  |
| 40 | 19x | 
                          if( is.null(KEYS_ATT) ) {
                     | 
                  
| 41 | ! | 
                              stop("diff is missing the keys attribute")
                     | 
                  
| 42 | 
                      }  | 
                  |
| 43 | 
                       | 
                  |
| 44 | 19x | 
                      issue_vars <- names(diff)[grep( "^VarDiff_", names(diff))]  | 
                  
| 45 | 
                       | 
                  |
| 46 | 19x | 
                          if ( is.null(vars)){
                     | 
                  
| 47 | 15x | 
                      vars <- issue_vars  | 
                  
| 48 | 
                          }  else {
                     | 
                  |
| 49 | 4x | 
                              vars <- paste0("VarDiff_" , vars)
                     | 
                  
| 50 | 
                      }  | 
                  |
| 51 | 
                       | 
                  |
| 52 | 19x | 
                          if ( length(issue_vars) == 0 | sum( vars %in% issue_vars) == 0){
                     | 
                  
| 53 | 7x | 
                      return(df[FALSE,])  | 
                  
| 54 | 
                      }  | 
                  |
| 55 | 
                       | 
                  |
| 56 | 12x | 
                      KEEP <- mapply(  | 
                  
| 57 | 12x | 
                      FUN = get_issue_dataset,  | 
                  
| 58 | 12x | 
                      issue = vars,  | 
                  
| 59 | 12x | 
                      diff = list(diff) ,  | 
                  
| 60 | 12x | 
                      SIMPLIFY = F  | 
                  
| 61 | 
                      )  | 
                  |
| 62 | 
                       | 
                  |
| 63 | 12x | 
                      KEEP <- recursive_reduce( KEEP , rbind)  | 
                  
| 64 | 12x | 
                      KEEP <- KEEP[!duplicated(KEEP),]  | 
                  
| 65 | 
                       | 
                  |
| 66 | 12x | 
                          if ( KEYS_ATT$is_derived ){
                     | 
                  
| 67 | 2x | 
                      df[[KEYS_ATT$value]] <- 1:nrow(df)  | 
                  
| 68 | 
                      }  | 
                  |
| 69 | ||
| 70 | 12x | 
                      keys <- KEYS_ATT$value  | 
                  
| 71 | 
                       | 
                  |
| 72 | 12x | 
                          if ( any( ! keys %in% names(df))){
                     | 
                  
| 73 | 1x | 
                              stop("df does not contain all variables specified as keys in diff")
                     | 
                  
| 74 | 
                      }  | 
                  |
| 75 | 
                       | 
                  |
| 76 | 11x | 
                      RET <- merge(  | 
                  
| 77 | 11x | 
                      x = df,  | 
                  
| 78 | 11x | 
                      y = KEEP,  | 
                  
| 79 | 11x | 
                      sort = TRUE  | 
                  
| 80 | 
                      )  | 
                  |
| 81 | 
                       | 
                  |
| 82 | 11x | 
                          RET <- RET[do.call("order", RET[keys]), ]
                     | 
                  
| 83 | 
                       | 
                  |
| 84 | 11x | 
                          if ( KEYS_ATT$is_derived ){
                     | 
                  
| 85 | 2x | 
                      keep_vars <- !names(RET) %in% KEYS_ATT$value  | 
                  
| 86 | 2x | 
                      RET <- RET[, keep_vars , drop = FALSE]  | 
                  
| 87 | 
                      }  | 
                  |
| 88 | 
                       | 
                  |
| 89 | 11x | 
                      return(RET)  | 
                  
| 90 | 
                      }  | 
                  |
| 91 | ||
| 92 | ||
| 93 | ||
| 94 | ||
| 95 | 
                      #' get_issue_dataset  | 
                  |
| 96 | 
                      #'  | 
                  |
| 97 | 
                      #' Internal function used by diffdf_issuerows to extract the dataframe  | 
                  |
| 98 | 
                      #' from each a target issue. In particular it also strips off any  | 
                  |
| 99 | 
                      #' non-key variables  | 
                  |
| 100 | 
                      #' @param issue name of issue to extract the dataset from diff  | 
                  |
| 101 | 
                      #' @param diff diffdf object which contains issues  | 
                  |
| 102 | 
                      get_issue_dataset <- function(issue, diff){
                     | 
                  |
| 103 | 20x | 
                      issue_df <- diff[[issue]]  | 
                  
| 104 | 20x | 
                          keep <- names(issue_df)[ !(names(issue_df) %in% c("BASE", "COMPARE", "VARIABLE"))]
                     | 
                  
| 105 | 20x | 
                      issue_df[,keep , drop=FALSE]  | 
                  
| 106 | 
                      }  | 
                  
| 1 | ||
| 2 | ||
| 3 | 
                      #' string_pad  | 
                  |
| 4 | 
                      #'  | 
                  |
| 5 | 
                      #' Utility function used to replicate str_pad. Adds white space to either end  | 
                  |
| 6 | 
                      #' of a string to get it to equal the desired length  | 
                  |
| 7 | 
                      #' @param x string  | 
                  |
| 8 | 
                      #' @param width desired length  | 
                  |
| 9 | 
                      string_pad <- function(x , width){
                     | 
                  |
| 10 | ! | 
                      if ( nchar(x) >= width) return(x)  | 
                  
| 11 | 814x | 
                      width <- width - nchar(x)  | 
                  
| 12 | 814x | 
                      left <- paste0( rep ( " " , floor( width/2) ) , collapse= "")  | 
                  
| 13 | 814x | 
                      right <- paste0( rep ( " " , ceiling( width/2) ) , collapse= "")  | 
                  
| 14 | 814x | 
                      paste0( left , x ,right , collapse = "")  | 
                  
| 15 | 
                      }  | 
                  |
| 16 | ||
| 17 | ||
| 18 | 
                      #' recursive_reduce  | 
                  |
| 19 | 
                      #'  | 
                  |
| 20 | 
                      #' Utility function used to replicated purrr::reduce. Recursively applies a  | 
                  |
| 21 | 
                      #' function to a list of elements until only 1 element remains  | 
                  |
| 22 | 
                      #' @param .l list of values to apply a function to  | 
                  |
| 23 | 
                      #' @param .f function to apply to each each element of the list in turn  | 
                  |
| 24 | 
                      #' i.e. .l[[1]] <- .f( .l[[1]] , .l[[2]]) ; .l[[1]] <- .f( .l[[1]] , .l[[3]])  | 
                  |
| 25 | 
                      recursive_reduce <- function(.l , .f){
                     | 
                  |
| 26 | 557x | 
                          if (length(.l) != 1){
                     | 
                  
| 27 | 377x | 
                      .l[[2]] <- .f( .l[[1]] , .l[[2]])  | 
                  
| 28 | 377x | 
                      return( recursive_reduce( .l[-1] , .f))  | 
                  
| 29 | 
                          } else {
                     | 
                  |
| 30 | 180x | 
                      return(.l[[1]])  | 
                  
| 31 | 
                      }  | 
                  |
| 32 | 
                      }  | 
                  |
| 33 | ||
| 34 | 
                      #' invert  | 
                  |
| 35 | 
                      #'  | 
                  |
| 36 | 
                      #' Utility function used to replicated purrr::transpose. Turns a list inside  | 
                  |
| 37 | 
                      #' out.  | 
                  |
| 38 | 
                      #' @param x list  | 
                  |
| 39 | 
                      invert <- function(x){
                     | 
                  |
| 40 | 56x | 
                      x2 <- list()  | 
                  
| 41 | 56x | 
                      cnames <- names(x)  | 
                  
| 42 | 56x | 
                      tnames <- names(x[[1]])  | 
                  
| 43 | 56x | 
                          for ( i in tnames ){
                     | 
                  
| 44 | 168x | 
                      x2[[i]] <- list()  | 
                  
| 45 | 168x | 
                              for (j in cnames){
                     | 
                  
| 46 | 537x | 
                      x2[[i]][[j]] <- x[[j]][[i]]  | 
                  
| 47 | 
                      }  | 
                  |
| 48 | 
                      }  | 
                  |
| 49 | 56x | 
                      return(x2)  | 
                  
| 50 | 
                      }  | 
                  |
| 51 | ||
| 52 | ||
| 53 | ||
| 54 | ||
| 55 | 
                      #' as_ascii_table  | 
                  |
| 56 | 
                      #'  | 
                  |
| 57 | 
                      #' This function takes a data.frame and attempts to convert it into  | 
                  |
| 58 | 
                      #' a simple ascii format suitable for printing to the screen  | 
                  |
| 59 | 
                      #' It is assumed all variable values have a as.character() method  | 
                  |
| 60 | 
                      #' in order to cast them to character.  | 
                  |
| 61 | 
                      #' @param dat Input dataset to convert into a ascii table  | 
                  |
| 62 | 
                      #' @param line_prefix Symbols to prefix infront of every line of the table  | 
                  |
| 63 | 
                      as_ascii_table <- function(dat, line_prefix = "  "){
                     | 
                  |
| 64 | 
                       | 
                  |
| 65 | 
                       | 
                  |
| 66 | 
                      ## Convert every value to character and crop to a suitable length  | 
                  |
| 67 | 56x | 
                      dat <- as_tibble(apply(dat, c(1, 2), as_cropped_char))  | 
                  
| 68 | 
                       | 
                  |
| 69 | 56x | 
                      hold <- list()  | 
                  
| 70 | 56x | 
                      COLS <- colnames(dat)  | 
                  
| 71 | 
                       | 
                  |
| 72 | 
                      ### For each column extract core elements (width, values , title) and pad out  | 
                  |
| 73 | 
                      ### each string to be a suitable length  | 
                  |
| 74 | 56x | 
                          for ( i in 1:ncol(dat)){
                     | 
                  
| 75 | 179x | 
                      COL <- COLS[i]  | 
                  
| 76 | 179x | 
                      VALUES <- dat[[i]]  | 
                  
| 77 | 
                       | 
                  |
| 78 | 179x | 
                      JOINT <- c(COL , VALUES)  | 
                  
| 79 | 179x | 
                      WIDTH <- max( sapply(JOINT, nchar)) + 2  | 
                  
| 80 | 
                       | 
                  |
| 81 | 179x | 
                      hold[[COL]] <- list()  | 
                  
| 82 | 179x | 
                      hold[[COL]]$WIDTH <- WIDTH  | 
                  
| 83 | 179x | 
                      hold[[COL]]$VALUES <- sapply( VALUES ,string_pad, width = WIDTH )  | 
                  
| 84 | 179x | 
                      hold[[COL]]$HEADER <- sapply( COL ,string_pad, width = WIDTH )  | 
                  
| 85 | 
                      }  | 
                  |
| 86 | 
                       | 
                  |
| 87 | 
                      ### Collapse into a single value per component ( title , values, width )  | 
                  |
| 88 | 56x | 
                      thold <- invert(hold)  | 
                  
| 89 | 56x | 
                      tvals <- recursive_reduce( thold$VALUES , paste0 )  | 
                  
| 90 | 56x | 
                      thead <- recursive_reduce( thold$HEADER , paste0)  | 
                  
| 91 | 56x | 
                      twidth <- recursive_reduce( thold$WIDTH , sum)  | 
                  
| 92 | 
                       | 
                  |
| 93 | 
                      ### Create header and footer lines  | 
                  |
| 94 | 56x | 
                          TLINE <- paste0(rep("=" , twidth), collapse = "")
                     | 
                  
| 95 | 56x | 
                          LINE  <- paste0(rep("-" , twidth), collapse = "")
                     | 
                  
| 96 | 56x | 
                      FVALS <- paste0(line_prefix, tvals , collapse = "\n")  | 
                  
| 97 | 
                       | 
                  |
| 98 | 
                      ### Output table  | 
                  |
| 99 | 56x | 
                      paste0(  | 
                  
| 100 | 56x | 
                      "\n",  | 
                  
| 101 | 56x | 
                      line_prefix, TLINE, "\n",  | 
                  
| 102 | 56x | 
                      line_prefix, thead, "\n",  | 
                  
| 103 | 56x | 
                      line_prefix, LINE, "\n",  | 
                  
| 104 | 56x | 
                      FVALS, "\n",  | 
                  
| 105 | 56x | 
                      line_prefix, LINE  | 
                  
| 106 | 
                      )  | 
                  |
| 107 | 
                      }  | 
                  |
| 108 | ||
| 109 | ||
| 110 | ||
| 111 | ||
| 112 | ||
| 113 | ||
| 114 | ||
| 115 | ||
| 116 | 
                      #' as_cropped_char  | 
                  |
| 117 | 
                      #'  | 
                  |
| 118 | 
                      #' Makes any character string above x chars  | 
                  |
| 119 | 
                      #' Reduce down to a x char string with ...  | 
                  |
| 120 | 
                      #' @param inval a single element value  | 
                  |
| 121 | 
                      #' @param crop_at character limit  | 
                  |
| 122 | 
                      as_cropped_char <- function(inval, crop_at = 30 ){
                     | 
                  |
| 123 | 
                       | 
                  |
| 124 | 635x | 
                          if ( is.null(inval) ){
                     | 
                  
| 125 | 
                       | 
                  |
| 126 | ! | 
                      inval <- "<NULL>"  | 
                  
| 127 | 
                       | 
                  |
| 128 | 635x | 
                          } else if ( is.na(inval)){
                     | 
                  
| 129 | 
                       | 
                  |
| 130 | 1x | 
                      inval <- "<NA>"  | 
                  
| 131 | 
                       | 
                  |
| 132 | 
                          } else {
                     | 
                  |
| 133 | 
                       | 
                  |
| 134 | 634x | 
                      inval <- as.character(inval)  | 
                  
| 135 | 
                       | 
                  |
| 136 | 
                      }  | 
                  |
| 137 | 
                       | 
                  |
| 138 | 635x | 
                      charlength <- sapply(inval, nchar)  | 
                  
| 139 | 
                       | 
                  |
| 140 | 635x | 
                          if (charlength > crop_at ){
                     | 
                  
| 141 | 
                       | 
                  |
| 142 | 10x | 
                      outval <- substr(inval, 1, crop_at )  | 
                  
| 143 | 10x | 
                      outval <- paste0(outval, '...')  | 
                  
| 144 | 
                       | 
                  |
| 145 | 
                          } else {
                     | 
                  |
| 146 | 
                       | 
                  |
| 147 | 625x | 
                      outval <- inval  | 
                  
| 148 | 
                       | 
                  |
| 149 | 
                      }  | 
                  |
| 150 | 
                       | 
                  |
| 151 | 635x | 
                      outval  | 
                  
| 152 | 
                      }  | 
                  |
| 153 | ||
| 154 | ||
| 155 | ||
| 156 | 
                      #' get_table  | 
                  |
| 157 | 
                      #'  | 
                  |
| 158 | 
                      #' Generate nice looking table from a data frame  | 
                  |
| 159 | 
                      #' @param dsin dataset  | 
                  |
| 160 | 
                      #' @param row_limit Maximum number of rows displayed in dataset  | 
                  |
| 161 | 
                      get_table <- function(dsin , row_limit = 10){
                     | 
                  |
| 162 | 
                       | 
                  |
| 163 | 56x | 
                          if( nrow(dsin) == 0 ) {
                     | 
                  
| 164 | ! | 
                              return("")
                     | 
                  
| 165 | 
                      }  | 
                  |
| 166 | 
                       | 
                  |
| 167 | 56x | 
                      display_table <- subset(dsin , 1:nrow(dsin) < (row_limit + 1) )  | 
                  
| 168 | 
                       | 
                  |
| 169 | 56x | 
                          if ( nrow(dsin) > row_limit ){
                     | 
                  
| 170 | 
                       | 
                  |
| 171 | 4x | 
                      add_message <- paste0(  | 
                  
| 172 | 4x | 
                      'First ',  | 
                  
| 173 | 4x | 
                      row_limit,  | 
                  
| 174 | 4x | 
                      " of " ,  | 
                  
| 175 | 4x | 
                      nrow(dsin),  | 
                  
| 176 | 4x | 
                      ' rows are shown in table below'  | 
                  
| 177 | 
                      )  | 
                  |
| 178 | 
                       | 
                  |
| 179 | 
                          } else {
                     | 
                  |
| 180 | 52x | 
                      add_message <- 'All rows are shown in table below'  | 
                  
| 181 | 
                      }  | 
                  |
| 182 | 
                       | 
                  |
| 183 | 56x | 
                      msg <- paste(  | 
                  
| 184 | 56x | 
                      c(  | 
                  
| 185 | 56x | 
                      add_message,  | 
                  
| 186 | 56x | 
                      as_ascii_table(display_table),  | 
                  
| 187 | 56x | 
                      '\n'  | 
                  
| 188 | 
                      ),  | 
                  |
| 189 | 56x | 
                      collapse = '\n'  | 
                  
| 190 | 
                      )  | 
                  |
| 191 | 
                       | 
                  |
| 192 | 56x | 
                      return(msg)  | 
                  
| 193 | 
                      }  | 
                  |
| 194 | ||
| 195 | 
| 1 | ||
| 2 | 
                      #' Print diffdf objects  | 
                  |
| 3 | 
                      #'  | 
                  |
| 4 | 
                      #' Print nicely formatted version of an diffdf object  | 
                  |
| 5 | 
                      #' @param x comparison object created by diffdf().  | 
                  |
| 6 | 
                      #' @param ... Additional arguments (not used)  | 
                  |
| 7 | 
                      #' @param as_string Return printed message as an R character vector?  | 
                  |
| 8 | 
                      #' @examples  | 
                  |
| 9 | 
                      #' x <- subset( iris , -Species )  | 
                  |
| 10 | 
                      #' x[1,2] <- 5  | 
                  |
| 11 | 
                      #' COMPARE <- diffdf( iris, x)  | 
                  |
| 12 | 
                      #' print( COMPARE )  | 
                  |
| 13 | 
                      #' print( COMPARE , "Sepal.Length" )  | 
                  |
| 14 | 
                      #' @export  | 
                  |
| 15 | 
                      print.diffdf <- function(x, ..., as_string = FALSE){
                     | 
                  |
| 16 | 21x | 
                      COMPARE <- x  | 
                  
| 17 | ||
| 18 | 21x | 
                          if ( length(COMPARE) == 0 ){
                     | 
                  
| 19 | 2x | 
                      outtext <- "No issues were found!\n"  | 
                  
| 20 | 
                       | 
                  |
| 21 | 
                          } else {
                     | 
                  |
| 22 | 
                       | 
                  |
| 23 | 19x | 
                      start_text <- paste0(  | 
                  
| 24 | 19x | 
                      'Differences found between the objects!\n\n',  | 
                  
| 25 | 19x | 
                      'A summary is given below.\n\n'  | 
                  
| 26 | 
                      )  | 
                  |
| 27 | ||
| 28 | 19x | 
                      end_text <- lapply(COMPARE, function(x) get_print_message(x) )  | 
                  
| 29 | 19x | 
                      end_text <- paste0(unlist(end_text), collapse = "")  | 
                  
| 30 | ||
| 31 | 19x | 
                      outtext <- paste0(start_text, end_text)  | 
                  
| 32 | 
                      }  | 
                  |
| 33 | 
                       | 
                  |
| 34 | 21x | 
                          if ( as_string){
                     | 
                  
| 35 | 21x | 
                      return(strsplit(outtext, '\n')[[1]])  | 
                  
| 36 | 
                          } else {
                     | 
                  |
| 37 | ! | 
                      cat(outtext)  | 
                  
| 38 | ! | 
                      return(invisible(COMPARE))  | 
                  
| 39 | 
                      }  | 
                  |
| 40 | 
                      }  | 
                  |
| 41 | ||
| 42 | ||
| 43 | 
| 1 | 
                      #' construct_issue  | 
                  |
| 2 | 
                      #'  | 
                  |
| 3 | 
                      #' Make an s3 object with class issue and possible additional class,  | 
                  |
| 4 | 
                      #' and assign other arguments to attributes  | 
                  |
| 5 | 
                      #' @param value the value of the object  | 
                  |
| 6 | 
                      #' @param message the value of the message attribute  | 
                  |
| 7 | 
                      #' @param add_class additional class to add  | 
                  |
| 8 | 
                      construct_issue <- function(value, message, add_class = NULL){
                     | 
                  |
| 9 | 2280x | 
                      x <- value  | 
                  
| 10 | 
                       | 
                  |
| 11 | 
                      ### If nothing has been provided return nothing !  | 
                  |
| 12 | 2074x | 
                      if ( nrow(x) == 0 ) return(NULL)  | 
                  
| 13 | 
                       | 
                  |
| 14 | 206x | 
                      class(x) <- c(add_class, "issue", class(x))  | 
                  
| 15 | 206x | 
                      attributes(x)[["message"]] <- message  | 
                  
| 16 | 206x | 
                      return(x)  | 
                  
| 17 | 
                      }  | 
                  |
| 18 | ||
| 19 | ||
| 20 | 
                      #' get_issue_message  | 
                  |
| 21 | 
                      #'  | 
                  |
| 22 | 
                      #' Simple function to grab the issue message  | 
                  |
| 23 | 
                      #' @param object inputted object of class issue  | 
                  |
| 24 | 
                      #' @param ... other arguments  | 
                  |
| 25 | 
                      get_issue_message <- function (object, ...) {
                     | 
                  |
| 26 | 206x | 
                      return( attr(object,"message"))  | 
                  
| 27 | 
                      }  | 
                  |
| 28 | ||
| 29 | ||
| 30 | 
                      #' get_print_message  | 
                  |
| 31 | 
                      #'  | 
                  |
| 32 | 
                      #' Get the required text depending on type of issue  | 
                  |
| 33 | 
                      #' @param object inputted object of class issue  | 
                  |
| 34 | 
                      #' @param ... other arguments  | 
                  |
| 35 | 
                      get_print_message <- function (object, ...) {
                     | 
                  |
| 36 | 56x | 
                          UseMethod("get_print_message", object)
                     | 
                  
| 37 | 
                      }  | 
                  |
| 38 | ||
| 39 | ||
| 40 | 
                      #' get_print_message.default  | 
                  |
| 41 | 
                      #'  | 
                  |
| 42 | 
                      #' Errors, as this should only ever be given an issue  | 
                  |
| 43 | 
                      #' @param object issue  | 
                  |
| 44 | 
                      get_print_message.default <- function(object) {
                     | 
                  |
| 45 | ! | 
                          stop("Error: An issue has not been provided to this function!")
                     | 
                  
| 46 | 
                      }  | 
                  |
| 47 | ||
| 48 | ||
| 49 | 
                      #' get_print_message.issue  | 
                  |
| 50 | 
                      #'  | 
                  |
| 51 | 
                      #' Get text from a basic issue, based on the class of the value of the issue  | 
                  |
| 52 | 
                      #'  | 
                  |
| 53 | 
                      #' @param object an object of class issue_basic  | 
                  |
| 54 | 
                      get_print_message.issue <- function(object){
                     | 
                  |
| 55 | 56x | 
                      paste(  | 
                  
| 56 | 56x | 
                      c(attr(object, "message"), get_table(object) ),  | 
                  
| 57 | 56x | 
                      collapse = '\n'  | 
                  
| 58 | 
                      )  | 
                  |
| 59 | 
                      }  | 
                  |
| 60 | ||
| 61 | ||
| 62 | ||
| 63 | ||
| 64 | ||
| 65 | ||
| 66 | ||
| 67 | ||
| 68 | ||
| 69 | 
| 1 | 
                      #' is_variable_different  | 
                  |
| 2 | 
                      #'  | 
                  |
| 3 | 
                      #' This subsets the data set on the variable name, picks out differences and returns a tibble  | 
                  |
| 4 | 
                      #' of differences for the given variable  | 
                  |
| 5 | 
                      #' @importFrom tibble as_tibble  | 
                  |
| 6 | 
                      #' @param variablename name of variable being compared  | 
                  |
| 7 | 
                      #' @param keynames name of keys  | 
                  |
| 8 | 
                      #' @param datain Inputted dataset with base and compare vectors  | 
                  |
| 9 | 
                      #' @param ... Additional arguments which might be passed through (numerical accuracy)  | 
                  |
| 10 | 
                      #' @return A boolean vector which is T if target and current are different  | 
                  |
| 11 | 
                      is_variable_different <- function (variablename, keynames, datain, ...) {
                     | 
                  |
| 12 | ||
| 13 | 991x | 
                      xvar <- paste0(variablename,'.x')  | 
                  
| 14 | 991x | 
                      yvar <- paste0(variablename,'.y')  | 
                  
| 15 | 
                       | 
                  |
| 16 | 991x | 
                          if ( ! xvar %in% names(datain) | ! yvar %in% names(datain)){
                     | 
                  
| 17 | ! | 
                              stop("Variable does not exist within input dataset")
                     | 
                  
| 18 | 
                      }  | 
                  |
| 19 | 
                       | 
                  |
| 20 | 991x | 
                      target <- datain[[xvar]]  | 
                  
| 21 | 991x | 
                      current <- datain[[yvar]]  | 
                  
| 22 | 991x | 
                      outvect <- find_difference(target, current, ...)  | 
                  
| 23 | 
                       | 
                  |
| 24 | 991x | 
                      datain[["VARIABLE"]] <- variablename  | 
                  
| 25 | 
                       | 
                  |
| 26 | 991x | 
                          names(datain)[names(datain) %in% c(xvar, yvar)] <- c("BASE", "COMPARE")
                     | 
                  
| 27 | 
                       | 
                  |
| 28 | 991x | 
                      x <- as_tibble(  | 
                  
| 29 | 991x | 
                      subset(  | 
                  
| 30 | 991x | 
                      datain,  | 
                  
| 31 | 991x | 
                      outvect,  | 
                  
| 32 | 991x | 
                                  select = c("VARIABLE", keynames, "BASE", "COMPARE")
                     | 
                  
| 33 | 
                      )  | 
                  |
| 34 | 
                      )  | 
                  |
| 35 | 
                       | 
                  |
| 36 | 991x | 
                      return(x)  | 
                  
| 37 | 
                      }  | 
                  |
| 38 | ||
| 39 | 
                      #' compare_vectors  | 
                  |
| 40 | 
                      #'  | 
                  |
| 41 | 
                      #' Compare two vectors looking for differences  | 
                  |
| 42 | 
                      #'  | 
                  |
| 43 | 
                      #' @param target the base vector  | 
                  |
| 44 | 
                      #' @param current a vector to compare target to  | 
                  |
| 45 | 
                      #' @param ... Additional arguments which might be passed through (numerical accuracy)  | 
                  |
| 46 | 
                      compare_vectors <- function (target, current, ...) {
                     | 
                  |
| 47 | 1020x | 
                          UseMethod("compare_vectors")
                     | 
                  
| 48 | 
                      }  | 
                  |
| 49 | ||
| 50 | ||
| 51 | 
                      #' find_difference  | 
                  |
| 52 | 
                      #'  | 
                  |
| 53 | 
                      #' This determines if two vectors are different. It expects vectors of the same  | 
                  |
| 54 | 
                      #' length and type, and is intended to be used after checks have already been done  | 
                  |
| 55 | 
                      #' Initially picks out any nas (matching nas count as a match)  | 
                  |
| 56 | 
                      #' Then compares remaining vector  | 
                  |
| 57 | 
                      #'  | 
                  |
| 58 | 
                      #' @param target the base vector  | 
                  |
| 59 | 
                      #' @param current a vector to compare target to  | 
                  |
| 60 | 
                      #' @param ... Additional arguments which might be passed through (numerical accuracy)  | 
                  |
| 61 | 
                      find_difference <- function (target, current, ...) {
                     | 
                  |
| 62 | 
                       | 
                  |
| 63 | 1023x | 
                          if( length(target) != length(current)){
                     | 
                  
| 64 | 2x | 
                              warning("Inputs are not of the same length")
                     | 
                  
| 65 | 2x | 
                      return(NULL)  | 
                  
| 66 | 
                      }  | 
                  |
| 67 | 
                       | 
                  |
| 68 | 1021x | 
                          if( is.null(target) | is.null(current) ){
                     | 
                  
| 69 | 1x | 
                      return( is.null(target) != is.null(current) )  | 
                  
| 70 | 
                      }  | 
                  |
| 71 | 
                       | 
                  |
| 72 | 
                      ### Initalise output, assume problem unless evidence otherwise  | 
                  |
| 73 | 1020x | 
                      return_vector <- rep(TRUE, length(target))  | 
                  
| 74 | 
                       | 
                  |
| 75 | 1020x | 
                      nas_t <- is.na(target)  | 
                  
| 76 | 1020x | 
                      nas_c <- is.na(current)  | 
                  
| 77 | 
                       | 
                  |
| 78 | 
                      ## compare missing values  | 
                  |
| 79 | 1020x | 
                      nacompare <- nas_t != nas_c  | 
                  
| 80 | 1020x | 
                      naselect <- nas_t|nas_c  | 
                  
| 81 | 1020x | 
                      return_vector[naselect] <- nacompare[naselect]  | 
                  
| 82 | 
                       | 
                  |
| 83 | 
                      ## compare non-missing values  | 
                  |
| 84 | 1020x | 
                      selectvector <- as.logical( (!nas_t) * (!nas_c) )  | 
                  
| 85 | 
                       | 
                  |
| 86 | 1020x | 
                      comparevect <- compare_vectors(  | 
                  
| 87 | 1020x | 
                      target[selectvector] ,  | 
                  
| 88 | 1020x | 
                      current[selectvector],  | 
                  
| 89 | 
                      ...  | 
                  |
| 90 | 
                      )  | 
                  |
| 91 | 
                       | 
                  |
| 92 | 1020x | 
                      return_vector[selectvector] <- comparevect  | 
                  
| 93 | 
                       | 
                  |
| 94 | 1020x | 
                      return(return_vector)  | 
                  
| 95 | 
                      }  | 
                  |
| 96 | ||
| 97 | ||
| 98 | ||
| 99 | ||
| 100 | ||
| 101 | ||
| 102 | ||
| 103 | 
                      #' compare_vectors.default  | 
                  |
| 104 | 
                      #'  | 
                  |
| 105 | 
                      #' Default method, if the vector is not numeric or factor. Basic comparison  | 
                  |
| 106 | 
                      #' @param target the base vector  | 
                  |
| 107 | 
                      #' @param current a vector to compare target to  | 
                  |
| 108 | 
                      #' @param ... Additional arguments which might be passed through (numerical accuracy)  | 
                  |
| 109 | 
                      compare_vectors.default <- function(target, current, ...){
                     | 
                  |
| 110 | 445x | 
                      target != current  | 
                  
| 111 | 
                      }  | 
                  |
| 112 | ||
| 113 | ||
| 114 | ||
| 115 | ||
| 116 | 
                      #' compare_vectors.factor  | 
                  |
| 117 | 
                      #'  | 
                  |
| 118 | 
                      #' Compares factors. Sets them as character and then compares  | 
                  |
| 119 | 
                      #' @param target the base vector  | 
                  |
| 120 | 
                      #' @param current a vector to compare target to  | 
                  |
| 121 | 
                      #' @param ... Additional arguments which might be passed through (numerical accuracy)  | 
                  |
| 122 | 
                      compare_vectors.factor <- function(target, current, ...){
                     | 
                  |
| 123 | 90x | 
                      as.character(target) != as.character(current)  | 
                  
| 124 | 
                      }  | 
                  |
| 125 | ||
| 126 | ||
| 127 | ||
| 128 | ||
| 129 | ||
| 130 | 
                      #' compare_vectors.numeric  | 
                  |
| 131 | 
                      #'  | 
                  |
| 132 | 
                      #' This is a modified version of the all.equal function  | 
                  |
| 133 | 
                      #' which returns a vector rather than a message  | 
                  |
| 134 | 
                      #' @param target the base vector  | 
                  |
| 135 | 
                      #' @param current a vector to compare target to  | 
                  |
| 136 | 
                      #' @param tolerance Level of tolerance for differences between two variables  | 
                  |
| 137 | 
                      #' @param scale Scale that tolerance should be set on. If NULL assume absolute  | 
                  |
| 138 | 
                      compare_vectors.numeric <- function(  | 
                  |
| 139 | 
                      target,  | 
                  |
| 140 | 
                      current,  | 
                  |
| 141 | 
                      tolerance = sqrt(.Machine$double.eps),  | 
                  |
| 142 | 
                      scale = NULL  | 
                  |
| 143 | 
                      ){
                     | 
                  |
| 144 | ||
| 145 | 485x | 
                      out <- target == current  | 
                  
| 146 | 
                       | 
                  |
| 147 | 485x | 
                          if (all(out)) {
                     | 
                  
| 148 | 451x | 
                      return(!out)  | 
                  
| 149 | 
                      }  | 
                  |
| 150 | 
                       | 
                  |
| 151 | 34x | 
                          if (is.integer(target) || is.integer(current)){ 
                     | 
                  
| 152 | 11x | 
                      target <- as.double(target)  | 
                  
| 153 | 11x | 
                      current <- as.double(current)  | 
                  
| 154 | 
                      }  | 
                  |
| 155 | 
                       | 
                  |
| 156 | 34x | 
                      xy <- abs(target - current)  | 
                  
| 157 | 
                       | 
                  |
| 158 | 34x | 
                          if (!is.null(scale)) {
                     | 
                  
| 159 | 2x | 
                      xy <- xy/scale  | 
                  
| 160 | 
                      }  | 
                  |
| 161 | 
                       | 
                  |
| 162 | 34x | 
                      return(xy > tolerance)  | 
                  
| 163 | ||
| 164 | 
                      }  | 
                  |
| 165 | ||
| 166 | ||
| 167 | ||
| 168 | 
| 1 | 
                      #'generate_keyname  | 
                  |
| 2 | 
                      #'  | 
                  |
| 3 | 
                      #'Function to generate a name for the keys if not provided  | 
                  |
| 4 | 
                      #'  | 
                  |
| 5 | 
                      #'@param BASE base dataset  | 
                  |
| 6 | 
                      #'@param COMP comparison dataset  | 
                  |
| 7 | 
                      #'@param replace_names a vector of replacement names. Used for recursion, should be edited in function for clarity  | 
                  |
| 8 | 
                      #'  | 
                  |
| 9 | 
                      generate_keyname <- function(  | 
                  |
| 10 | 
                      BASE,  | 
                  |
| 11 | 
                      COMP,  | 
                  |
| 12 | 
                          replace_names = c("..ROWNUMBER..", "..RN..", "..ROWN..", "..N..")
                     | 
                  |
| 13 | 
                      ){
                     | 
                  |
| 14 | 
                       | 
                  |
| 15 | 213x | 
                          if ( class(replace_names) != "character"){
                     | 
                  
| 16 | ! | 
                      stop( "replace_names is not a character vector")  | 
                  
| 17 | 
                      }  | 
                  |
| 18 | 
                       | 
                  |
| 19 | 213x | 
                          if (length(replace_names) == 0) {
                     | 
                  
| 20 | 8x | 
                              stop("All default row names are in use in BASE/COMPARE. Please provide a KEY argument")
                     | 
                  
| 21 | 
                      }  | 
                  |
| 22 | 
                       | 
                  |
| 23 | 205x | 
                      key_name <- replace_names[1]  | 
                  
| 24 | 
                       | 
                  |
| 25 | 205x | 
                          if (!is.null(BASE[[key_name]]) | !is.null( COMP[[key_name]])){
                     | 
                  
| 26 | 66x | 
                      key_name <- generate_keyname(BASE, COMP, replace_names[-1])  | 
                  
| 27 | 
                      }  | 
                  |
| 28 | 
                       | 
                  |
| 29 | 173x | 
                      return(key_name)  | 
                  
| 30 | 
                       | 
                  |
| 31 | 
                      }  | 
                  
| 1 | ||
| 2 | 
                      #' sort_then_join  | 
                  |
| 3 | 
                      #'  | 
                  |
| 4 | 
                      #' Convenience function to sort two strings and paste them together  | 
                  |
| 5 | 
                      #' @param string1 first string  | 
                  |
| 6 | 
                      #' @param string2 second string  | 
                  |
| 7 | 
                      sort_then_join <- function(string1, string2){
                     | 
                  |
| 8 | 14x | 
                      paste0(sort(c(string1, string2)), collapse = '')  | 
                  
| 9 | 
                      }  | 
                  |
| 10 | ||
| 11 | ||
| 12 | 
                      #' class_merge  | 
                  |
| 13 | 
                      #'  | 
                  |
| 14 | 
                      #' Convenience function to put all classes an object has into one string  | 
                  |
| 15 | 
                      #' @param x an object  | 
                  |
| 16 | 
                      class_merge <- function(x){
                     | 
                  |
| 17 | 304x | 
                      paste(class(x), collapse = "_")  | 
                  
| 18 | 
                      }  | 
                  |
| 19 | ||
| 20 | ||
| 21 | 
                      get_message <- function( colname , whichdat, totype){
                     | 
                  |
| 22 | 12x | 
                      message(paste0(  | 
                  
| 23 | 12x | 
                      "NOTE: Variable " , colname, " in " , tolower(whichdat) , " was casted to " , totype  | 
                  
| 24 | 
                      ))  | 
                  |
| 25 | 
                      }  | 
                  |
| 26 | ||
| 27 | ||
| 28 | 
                      #' get_casted_vector  | 
                  |
| 29 | 
                      #'  | 
                  |
| 30 | 
                      #' casts a vector depending on its type and input  | 
                  |
| 31 | 
                      #' @param colin column to cast  | 
                  |
| 32 | 
                      #' @param colname name of vector  | 
                  |
| 33 | 
                      #' @param whichdat whether base or compare is being casted (used for messages)  | 
                  |
| 34 | 
                      get_casted_vector <- function(colin, colname,  whichdat){
                     | 
                  |
| 35 | 
                       | 
                  |
| 36 | 24x | 
                          if ( class(colin) == "factor"){
                     | 
                  
| 37 | 6x | 
                      get_message( colname , whichdat , "character")  | 
                  
| 38 | 6x | 
                      return(as.character(colin))  | 
                  
| 39 | 
                      }  | 
                  |
| 40 | 
                       | 
                  |
| 41 | 18x | 
                          if ( class(colin) == "integer"){
                     | 
                  
| 42 | 6x | 
                      get_message( colname , whichdat , "numeric")  | 
                  
| 43 | 6x | 
                      return(as.numeric(colin))  | 
                  
| 44 | 
                      }  | 
                  |
| 45 | 
                       | 
                  |
| 46 | 12x | 
                      colin  | 
                  
| 47 | 
                      }  | 
                  |
| 48 | ||
| 49 | ||
| 50 | ||
| 51 | 
                      #' get_casted_dataset  | 
                  |
| 52 | 
                      #'  | 
                  |
| 53 | 
                      #' Internal utility function to loop across a dataset casting all target  | 
                  |
| 54 | 
                      #' variables  | 
                  |
| 55 | 
                      #' @param df dataset to be casted  | 
                  |
| 56 | 
                      #' @param columns columns to be casted  | 
                  |
| 57 | 
                      #' @param whichdat whether base or compare is being casted (used for messages)  | 
                  |
| 58 | 
                      get_casted_dataset <- function(df , columns , whichdat){
                     | 
                  |
| 59 | 22x | 
                          for ( col in  columns ){
                     | 
                  
| 60 | 24x | 
                      df[[col]] <- get_casted_vector( df[[col]] , col , whichdat )  | 
                  
| 61 | 
                      }  | 
                  |
| 62 | 22x | 
                      return(df)  | 
                  
| 63 | 
                      }  | 
                  |
| 64 | ||
| 65 | ||
| 66 | ||
| 67 | ||
| 68 | ||
| 69 | ||
| 70 | 
                      #' cast_variables  | 
                  |
| 71 | 
                      #'  | 
                  |
| 72 | 
                      #' Function to cast datasets columns if they have differing types  | 
                  |
| 73 | 
                      #' Restricted to specific cases, currently integer and double, and character and factor  | 
                  |
| 74 | 
                      #' @param BASE base dataset  | 
                  |
| 75 | 
                      #' @param COMPARE comparison dataset  | 
                  |
| 76 | 
                      #' @param ignore_vars Variables not to be considered for casting  | 
                  |
| 77 | 
                      #' @param cast_integers Logical - Whether integers should be cased to double when compared to doubles  | 
                  |
| 78 | 
                      #' @param cast_factors Logical - Whether characters should be casted to characters when compared to characters  | 
                  |
| 79 | 
                      cast_variables <- function(  | 
                  |
| 80 | 
                      BASE,  | 
                  |
| 81 | 
                      COMPARE,  | 
                  |
| 82 | 
                      ignore_vars = NULL,  | 
                  |
| 83 | 
                      cast_integers = FALSE ,  | 
                  |
| 84 | 
                      cast_factors = FALSE  | 
                  |
| 85 | 
                      ){
                     | 
                  |
| 86 | ||
| 87 | 13x | 
                      allowed_class_casts <- c( "integernumeric" , "characterfactor")[c(cast_integers, cast_factors)]  | 
                  
| 88 | 
                       | 
                  |
| 89 | 13x | 
                      BASE_class <- data.frame(  | 
                  
| 90 | 13x | 
                      class_BASE = sapply(BASE, class_merge),  | 
                  
| 91 | 13x | 
                      colname = names(BASE),  | 
                  
| 92 | 13x | 
                      stringsAsFactors = FALSE  | 
                  
| 93 | 
                      )  | 
                  |
| 94 | 13x | 
                      BASE_class <- BASE_class[!BASE_class[["colname"]] %in% ignore_vars,, drop=FALSE]  | 
                  
| 95 | 
                       | 
                  |
| 96 | 
                       | 
                  |
| 97 | 13x | 
                      COMPARE_class <- data.frame(  | 
                  
| 98 | 13x | 
                      class_COMPARE = sapply(COMPARE, class_merge),  | 
                  
| 99 | 13x | 
                      colname = names(COMPARE),  | 
                  
| 100 | 13x | 
                      stringsAsFactors = FALSE  | 
                  
| 101 | 
                      )  | 
                  |
| 102 | 13x | 
                      COMPARE_class <- COMPARE_class[!COMPARE_class[["colname"]] %in% ignore_vars ,, drop=FALSE]  | 
                  
| 103 | 
                       | 
                  |
| 104 | 13x | 
                      common_class <- merge(  | 
                  
| 105 | 13x | 
                      x = BASE_class,  | 
                  
| 106 | 13x | 
                      y = COMPARE_class,  | 
                  
| 107 | 13x | 
                      by = "colname"  | 
                  
| 108 | 
                      )  | 
                  |
| 109 | 
                       | 
                  |
| 110 | 
                       | 
                  |
| 111 | 13x | 
                      diff_class <- common_class[ common_class[["class_BASE"]] != common_class[["class_COMPARE"]] ,,drop=FALSE]  | 
                  
| 112 | ||
| 113 | 
                       | 
                  |
| 114 | 13x | 
                      diff_class$classmerge <- mapply(  | 
                  
| 115 | 13x | 
                      sort_then_join,  | 
                  
| 116 | 13x | 
                      diff_class$class_COMPARE,  | 
                  
| 117 | 13x | 
                      diff_class$class_BASE  | 
                  
| 118 | 
                      )  | 
                  |
| 119 | 
                       | 
                  |
| 120 | 
                       | 
                  |
| 121 | 13x | 
                      cast_columns <- diff_class[ diff_class[["classmerge"]] %in% allowed_class_casts ,,drop=FALSE]  | 
                  
| 122 | 
                       | 
                  |
| 123 | 
                       | 
                  |
| 124 | 13x | 
                      DATASETS <- list(  | 
                  
| 125 | 13x | 
                      "BASE" = BASE,  | 
                  
| 126 | 13x | 
                      "COMPARE" = COMPARE  | 
                  
| 127 | 
                      )  | 
                  |
| 128 | 
                       | 
                  |
| 129 | ||
| 130 | 13x | 
                          if( nrow(cast_columns) == 0 ){
                     | 
                  
| 131 | 2x | 
                      return( DATASETS )  | 
                  
| 132 | 
                      }  | 
                  |
| 133 | 
                       | 
                  |
| 134 | 
                       | 
                  |
| 135 | 11x | 
                          for ( i in names(DATASETS)){
                     | 
                  
| 136 | 22x | 
                      DATASETS[[i]] <- get_casted_dataset( DATASETS[[i]] , cast_columns$colname , i)  | 
                  
| 137 | 
                      }  | 
                  |
| 138 | 
                       | 
                  |
| 139 | 
                       | 
                  |
| 140 | 11x | 
                      return(DATASETS)  | 
                  
| 141 | 
                      }  | 
                  |
| 142 | ||
| 143 | ||
| 144 | ||
| 145 | ||
| 146 | ||
| 147 | ||
| 148 | ||
| 149 | ||
| 150 | ||
| 151 | ||
| 152 | ||
| 153 | ||
| 154 | ||
| 155 | ||
| 156 | ||
| 157 | ||
| 158 | ||
| 159 | ||
| 160 | ||
| 161 | ||
| 162 | ||
| 163 | ||
| 164 | ||
| 165 | ||
| 166 | ||
| 167 | ||
| 168 | ||
| 169 | ||
| 170 | ||
| 171 | ||
| 172 | ||
| 173 | 
| 1 | ||
| 2 | ||
| 3 | ||
| 4 | 
                      #' factor_to_character  | 
                  |
| 5 | 
                      #'  | 
                  |
| 6 | 
                      #' Takes a dataframe and converts any factor variables to character  | 
                  |
| 7 | 
                      #' @param dsin input dataframe  | 
                  |
| 8 | 
                      #' @param vars variables to consider for conversion. Default NULL will consider  | 
                  |
| 9 | 
                      #' every variable within the dataset  | 
                  |
| 10 | 
                      factor_to_character <- function( dsin , vars = NULL){
                     | 
                  |
| 11 | 
                       | 
                  |
| 12 | ! | 
                      if ( is.null(vars) ) vars = names(dsin)  | 
                  
| 13 | 
                       | 
                  |
| 14 | 258x | 
                          for (var in vars){
                     | 
                  
| 15 | 272x | 
                              if(  is.factor(dsin[[var]])){
                     | 
                  
| 16 | ! | 
                      dsin[[var]] <- as.character(dsin[[var]])  | 
                  
| 17 | 
                      }  | 
                  |
| 18 | 
                      }  | 
                  |
| 19 | 258x | 
                      return(dsin)  | 
                  
| 20 | 
                      }  | 
                  |
| 21 | ||
| 22 | ||
| 23 | ||
| 24 | ||
| 25 | 
                      #' has_unique_rows  | 
                  |
| 26 | 
                      #'  | 
                  |
| 27 | 
                      #' Check if a data sets rows are unique  | 
                  |
| 28 | 
                      #' @param DAT input data set (data frame)  | 
                  |
| 29 | 
                      #' @param KEYS Set of keys which should be unique  | 
                  |
| 30 | 
                      has_unique_rows <- function(DAT , KEYS){
                     | 
                  |
| 31 | 259x | 
                      DUPS <- duplicated( subset(DAT , select= KEYS) )  | 
                  
| 32 | 259x | 
                      NDUPS <- sum( DUPS)  | 
                  
| 33 | 259x | 
                      return( NDUPS == 0 )  | 
                  
| 34 | 
                      }  | 
                  |
| 35 | ||
| 36 | 
                      #'convert_to_issue  | 
                  |
| 37 | 
                      #'  | 
                  |
| 38 | 
                      #'converts the count value into the correct issue format  | 
                  |
| 39 | 
                      #'@param datin data inputted  | 
                  |
| 40 | 
                      #'@importFrom tibble rownames_to_column  | 
                  |
| 41 | 
                      convert_to_issue <- function(datin){
                     | 
                  |
| 42 | 128x | 
                      datin_tibble <- tibble(  | 
                  
| 43 | 128x | 
                      `Variable` = names(datin),  | 
                  
| 44 | 128x | 
                      `No of Differences` = datin  | 
                  
| 45 | 
                      )  | 
                  |
| 46 | 
                       | 
                  |
| 47 | 128x | 
                      datin_tibble_reduced <- datin_tibble[ datin_tibble[["No of Differences"]] > 0, , drop = FALSE]  | 
                  
| 48 | 128x | 
                      return(datin_tibble_reduced)  | 
                  
| 49 | 
                      }  | 
                  |
| 50 |