1 |
#' Identify Issue Rows |
|
2 |
#' |
|
3 |
#' This function takes a `diffdf` object and a dataframe and subsets |
|
4 |
#' the `data.frame` for problem rows as identified in the comparison object. |
|
5 |
#' If \code{vars} has been specified only issue rows associated with those |
|
6 |
#' variable(s) will be returned. |
|
7 |
#' @param df dataframe to be subsetted |
|
8 |
#' @param diff diffdf object |
|
9 |
#' @param vars (optional) character vector containing names of issue variables to subset dataframe |
|
10 |
#' on. A value of NULL (default) will be taken to mean available issue variables. |
|
11 |
#' @examples |
|
12 |
#' iris2 <- iris |
|
13 |
#' for (i in 1:3) iris2[i, i] <- 99 |
|
14 |
#' x <- diffdf(iris, iris2, suppress_warnings = TRUE) |
|
15 |
#' diffdf_issuerows(iris, x) |
|
16 |
#' diffdf_issuerows(iris2, x) |
|
17 |
#' diffdf_issuerows(iris2, x, vars = "Sepal.Length") |
|
18 |
#' diffdf_issuerows(iris2, x, vars = c("Sepal.Length", "Sepal.Width")) |
|
19 |
#' @details |
|
20 |
#' Note that `diffdf_issuerows` can be used to subset against any dataframe. The only |
|
21 |
#' requirement is that the original variables specified in the keys argument to diffdf |
|
22 |
#' are present on the dataframe you are subsetting against. However please note that if |
|
23 |
#' no keys were specified in diffdf then the row number is used. This means using |
|
24 |
#' `diffdf_issuerows` without a keys against an arbitrary dataset can easily result in |
|
25 |
#' nonsense rows being returned. It is always recommended to supply keys to diffdf. |
|
26 |
#' @export |
|
27 |
diffdf_issuerows <- function(df, diff, vars = NULL) { |
|
28 | ||
29 | 19x |
assertthat::assert_that( |
30 | 19x |
class(diff)[[1]] == "diffdf" |
31 |
) |
|
32 | ||
33 | ||
34 | 19x |
KEYS_ATT <- attr(diff, "keys") |
35 | ||
36 | 19x |
assertthat::assert_that( |
37 | 19x |
!is.null(KEYS_ATT), |
38 | 19x |
msg = "diff is missing the keys attribute" |
39 |
) |
|
40 | ||
41 | ||
42 | 19x |
issue_vars <- names(diff)[grep("^VarDiff_", names(diff))] |
43 | ||
44 | 19x |
if (is.null(vars)) { |
45 | 15x |
vars <- issue_vars |
46 |
} else { |
|
47 | 4x |
vars <- paste0("VarDiff_", vars) |
48 |
} |
|
49 | ||
50 | 19x |
if (length(issue_vars) == 0 || sum(vars %in% issue_vars) == 0) { |
51 | 7x |
return(df[FALSE, ]) |
52 |
} |
|
53 | ||
54 | 12x |
KEEP <- mapply( |
55 | 12x |
FUN = get_issue_dataset, |
56 | 12x |
issue = vars, |
57 | 12x |
diff = list(diff), |
58 | 12x |
SIMPLIFY = FALSE |
59 |
) |
|
60 | ||
61 | 12x |
KEEP <- recursive_reduce(KEEP, rbind) |
62 | 12x |
KEEP <- KEEP[!duplicated(KEEP), ] |
63 | ||
64 | 12x |
if (KEYS_ATT$is_derived) { |
65 | 2x |
df[[KEYS_ATT$value]] <- seq_len(nrow(df)) |
66 |
} |
|
67 | ||
68 | 12x |
keys <- KEYS_ATT$value |
69 | ||
70 | 12x |
if (any(!keys %in% names(df))) { |
71 | 1x |
stop("df does not contain all variables specified as keys in diff") |
72 |
} |
|
73 | ||
74 | 11x |
RET <- merge( |
75 | 11x |
x = df, |
76 | 11x |
y = KEEP, |
77 | 11x |
sort = TRUE |
78 |
) |
|
79 | ||
80 | 11x |
RET <- RET[do.call("order", RET[keys]), ] |
81 | ||
82 | 11x |
if (KEYS_ATT$is_derived) { |
83 | 2x |
keep_vars <- !names(RET) %in% KEYS_ATT$value |
84 | 2x |
RET <- RET[, keep_vars, drop = FALSE] |
85 |
} |
|
86 | ||
87 | 11x |
return(RET) |
88 |
} |
|
89 | ||
90 | ||
91 | ||
92 | ||
93 |
#' get_issue_dataset |
|
94 |
#' |
|
95 |
#' Internal function used by `diffdf_issuerows` to extract the dataframe |
|
96 |
#' from each a target issue. In particular it also strips off any |
|
97 |
#' non-key variables |
|
98 |
#' @param issue name of issue to extract the dataset from diff |
|
99 |
#' @param diff diffdf object which contains issues |
|
100 |
#' @keywords internal |
|
101 |
get_issue_dataset <- function(issue, diff) { |
|
102 | 20x |
issue_df <- diff[[issue]] |
103 | 20x |
keep <- names(issue_df)[!(names(issue_df) %in% c("BASE", "COMPARE", "VARIABLE"))] |
104 | 20x |
issue_df[, keep, drop = FALSE] |
105 |
} |
1 |
#' diffdf |
|
2 |
#' @description |
|
3 |
#' Compares 2 dataframes and outputs any differences. |
|
4 |
#' @param base input dataframe |
|
5 |
#' @param compare comparison dataframe |
|
6 |
#' @param keys vector of variables (as strings) that defines a unique row in |
|
7 |
#' the base and compare dataframes |
|
8 |
#' @param strict_numeric Flag for strict numeric to numeric comparisons |
|
9 |
#' (default = TRUE). If False diffdf will cast integer to double where |
|
10 |
#' required for comparisons. Note that variables specified in the keys |
|
11 |
#' will never be casted. |
|
12 |
#' @param strict_factor Flag for strict factor to character comparisons |
|
13 |
#' (default = TRUE). If False diffdf will cast factors to characters where |
|
14 |
#' required for comparisons. Note that variables specified in the keys will |
|
15 |
#' never be casted. |
|
16 |
#' @param suppress_warnings Do you want to suppress warnings? (logical) |
|
17 |
#' @param file Location and name of a text file to output the results to. |
|
18 |
#' Setting to NULL will cause no file to be produced. |
|
19 |
#' @param tolerance Set tolerance for numeric comparisons. Note that |
|
20 |
#' comparisons fail if (x-y)/scale > tolerance. |
|
21 |
#' @param scale Set scale for numeric comparisons. Note that comparisons fail |
|
22 |
#' if (x-y)/scale > tolerance. Setting as NULL is a slightly more efficient |
|
23 |
#' version of scale = 1. |
|
24 |
#' @param check_column_order Should the column ordering be checked? (logical) |
|
25 |
#' @param check_df_class Do you want to check for differences in the class |
|
26 |
#' between `base` and `compare`? (logical) |
|
27 |
#' @examples |
|
28 |
#' x <- subset(iris, -Species) |
|
29 |
#' x[1, 2] <- 5 |
|
30 |
#' COMPARE <- diffdf(iris, x) |
|
31 |
#' print(COMPARE) |
|
32 |
#' |
|
33 |
#' #### Sample data frames |
|
34 |
#' |
|
35 |
#' DF1 <- data.frame( |
|
36 |
#' id = c(1, 2, 3, 4, 5, 6), |
|
37 |
#' v1 = letters[1:6], |
|
38 |
#' v2 = c(NA, NA, 1, 2, 3, NA) |
|
39 |
#' ) |
|
40 |
#' |
|
41 |
#' DF2 <- data.frame( |
|
42 |
#' id = c(1, 2, 3, 4, 5, 7), |
|
43 |
#' v1 = letters[1:6], |
|
44 |
#' v2 = c(NA, NA, 1, 2, NA, NA), |
|
45 |
#' v3 = c(NA, NA, 1, 2, NA, 4) |
|
46 |
#' ) |
|
47 |
#' |
|
48 |
#' diffdf(DF1, DF1, keys = "id") |
|
49 |
#' |
|
50 |
#' # We can control matching with scale/location for example: |
|
51 |
#' |
|
52 |
#' DF1 <- data.frame( |
|
53 |
#' id = c(1, 2, 3, 4, 5, 6), |
|
54 |
#' v1 = letters[1:6], |
|
55 |
#' v2 = c(1, 2, 3, 4, 5, 6) |
|
56 |
#' ) |
|
57 |
#' DF2 <- data.frame( |
|
58 |
#' id = c(1, 2, 3, 4, 5, 6), |
|
59 |
#' v1 = letters[1:6], |
|
60 |
#' v2 = c(1.1, 2, 3, 4, 5, 6) |
|
61 |
#' ) |
|
62 |
#' |
|
63 |
#' diffdf(DF1, DF2, keys = "id") |
|
64 |
#' diffdf(DF1, DF2, keys = "id", tolerance = 0.2) |
|
65 |
#' diffdf(DF1, DF2, keys = "id", scale = 10, tolerance = 0.2) |
|
66 |
#' |
|
67 |
#' # We can use strict_factor to compare factors with characters for example: |
|
68 |
#' |
|
69 |
#' DF1 <- data.frame( |
|
70 |
#' id = c(1, 2, 3, 4, 5, 6), |
|
71 |
#' v1 = letters[1:6], |
|
72 |
#' v2 = c(NA, NA, 1, 2, 3, NA), |
|
73 |
#' stringsAsFactors = FALSE |
|
74 |
#' ) |
|
75 |
#' |
|
76 |
#' DF2 <- data.frame( |
|
77 |
#' id = c(1, 2, 3, 4, 5, 6), |
|
78 |
#' v1 = letters[1:6], |
|
79 |
#' v2 = c(NA, NA, 1, 2, 3, NA) |
|
80 |
#' ) |
|
81 |
#' |
|
82 |
#' diffdf(DF1, DF2, keys = "id", strict_factor = TRUE) |
|
83 |
#' diffdf(DF1, DF2, keys = "id", strict_factor = FALSE) |
|
84 |
#' |
|
85 |
#' @export |
|
86 |
diffdf <- function( |
|
87 |
base, |
|
88 |
compare, |
|
89 |
keys = NULL, |
|
90 |
suppress_warnings = FALSE, |
|
91 |
strict_numeric = TRUE, |
|
92 |
strict_factor = TRUE, |
|
93 |
file = NULL, |
|
94 |
tolerance = sqrt(.Machine$double.eps), |
|
95 |
scale = NULL, |
|
96 |
check_column_order = FALSE, |
|
97 |
check_df_class = FALSE |
|
98 |
) { |
|
99 | ||
100 | 124x |
assertthat::assert_that( |
101 | 124x |
assertthat::is.flag(check_df_class), |
102 | 124x |
!is.na(check_df_class), |
103 | 124x |
msg = "`check_df_class` must be a length 1 logical" |
104 |
) |
|
105 | ||
106 | 124x |
BASE <- base |
107 | 124x |
COMP <- compare |
108 | 124x |
KEYS <- keys |
109 | 124x |
SUPWARN <- suppress_warnings |
110 | ||
111 |
### Initatiate output object |
|
112 | 124x |
COMPARE <- list() |
113 | 124x |
class(COMPARE) <- c("diffdf", "list") |
114 | ||
115 | ||
116 | 124x |
BASE_NAME <- deparse(substitute(base)) |
117 | 124x |
COMP_NAME <- deparse(substitute(compare)) |
118 | 124x |
COMPARE[["DataSummary"]] <- construct_issue( |
119 | 124x |
value = describe_dataframe(BASE, COMP, BASE_NAME, COMP_NAME), |
120 | 124x |
message = "Summary of BASE and COMPARE" |
121 |
) |
|
122 | ||
123 | ||
124 | 124x |
is_derived <- FALSE |
125 | ||
126 |
### If no key is suplied match values based upon row number |
|
127 | 124x |
if (is.null(KEYS)) { |
128 | 102x |
is_derived <- TRUE |
129 | 102x |
keyname <- generate_keyname(BASE, COMP) |
130 | 102x |
BASE[[keyname]] <- seq_len(nrow(BASE)) |
131 | 102x |
COMP[[keyname]] <- seq_len(nrow(COMP)) |
132 | 102x |
KEYS <- keyname |
133 |
} |
|
134 | 124x |
attr(COMPARE, "keys") <- list(value = KEYS, is_derived = is_derived) |
135 | ||
136 | 124x |
assertthat::assert_that( |
137 | 124x |
is.numeric(tolerance), |
138 | 124x |
is.numeric(scale) || is.null(scale) |
139 |
) |
|
140 | ||
141 | 120x |
missing_keys_base <- KEYS[!KEYS %in% names(BASE)] |
142 | 120x |
assertthat::assert_that( |
143 | 120x |
length(missing_keys_base) == 0, |
144 | 120x |
msg = sprintf( |
145 | 120x |
"The following KEYS are not available in BASE:\n %s", |
146 | 120x |
paste(missing_keys_base, collapse = "\n ") |
147 |
) |
|
148 |
) |
|
149 | ||
150 | 119x |
missing_keys_comp <- KEYS[!KEYS %in% names(COMP)] |
151 | 119x |
assertthat::assert_that( |
152 | 119x |
length(missing_keys_comp) == 0, |
153 | 119x |
msg = sprintf( |
154 | 119x |
"The following KEYS are not available in COMPARE:\n %s", |
155 | 119x |
paste(missing_keys_comp, collapse = "\n ") |
156 |
) |
|
157 |
) |
|
158 | ||
159 | 118x |
assertthat::assert_that( |
160 | 118x |
has_unique_rows(BASE, KEYS), |
161 | 118x |
msg = "BY variables in BASE do not result in unique observations" |
162 |
) |
|
163 | ||
164 | 117x |
assertthat::assert_that( |
165 | 117x |
has_unique_rows(COMP, KEYS), |
166 | 117x |
msg = "BY variables in COMPARE do not result in unique observations" |
167 |
) |
|
168 | ||
169 | ||
170 | ||
171 |
#### Check essential variable properties (class & mode) |
|
172 | ||
173 | 117x |
COMPARE[["UnsupportedColsBase"]] <- construct_issue( |
174 | 117x |
value = identify_unsupported_cols(BASE), |
175 | 117x |
message = "There are columns in BASE with unsupported modes !!" |
176 |
) |
|
177 | ||
178 | ||
179 | 117x |
COMPARE[["UnsupportedColsComp"]] <- construct_issue( |
180 | 117x |
value = identify_unsupported_cols(COMP), |
181 | 117x |
message = "There are columns in COMPARE with unsupported modes !!" |
182 |
) |
|
183 | ||
184 | ||
185 |
# cast variables if strict is off |
|
186 | 117x |
if (!strict_factor || !strict_numeric) { |
187 | 9x |
casted_df <- cast_variables( |
188 | 9x |
BASE = BASE, |
189 | 9x |
COMPARE = COMP, |
190 | 9x |
ignore_vars = KEYS, |
191 | 9x |
cast_integers = !strict_numeric, |
192 | 9x |
cast_factors = !strict_factor |
193 |
) |
|
194 | ||
195 | 9x |
BASE <- casted_df$BASE |
196 | 9x |
COMP <- casted_df$COMP |
197 |
} |
|
198 | ||
199 | ||
200 | 117x |
COMPARE[["VarModeDiffs"]] <- construct_issue( |
201 | 117x |
value = identify_mode_differences(BASE, COMP), |
202 | 117x |
message = "There are columns in BASE and COMPARE with different modes !!" |
203 |
) |
|
204 | ||
205 | ||
206 | 117x |
COMPARE[["VarClassDiffs"]] <- construct_issue( |
207 | 117x |
value = identify_class_differences(BASE, COMP), |
208 | 117x |
message = "There are columns in BASE and COMPARE with different classes !!" |
209 |
) |
|
210 | ||
211 | ||
212 | ||
213 | ||
214 |
##### Check Validity of Keys |
|
215 | ||
216 | 117x |
BASE_keys <- names(BASE)[names(BASE) %in% KEYS] |
217 | 117x |
COMP_keys <- names(COMP)[names(COMP) %in% KEYS] |
218 | ||
219 | 117x |
assertthat::assert_that( |
220 | 117x |
length(BASE_keys) == length(KEYS), |
221 | 117x |
msg = "BASE is missing variables specified in KEYS" |
222 |
) |
|
223 | ||
224 | 117x |
assertthat::assert_that( |
225 | 117x |
length(COMP_keys) == length(KEYS), |
226 | 117x |
msg = "COMP is missing variables specified in KEYS" |
227 |
) |
|
228 | ||
229 | ||
230 | 117x |
assert_valid_keys( |
231 | 117x |
COMPARE, KEYS, "UnsupportedColsBase", |
232 | 117x |
"The following KEYS in BASE have an unsupported mode (see `?mode()`)" |
233 |
) |
|
234 | 116x |
assert_valid_keys( |
235 | 116x |
COMPARE, KEYS, "UnsupportedColsComp", |
236 | 116x |
"The following KEYS in COMPARE have an unsupported mode (see `?mode()`)" |
237 |
) |
|
238 | 115x |
assert_valid_keys( |
239 | 115x |
COMPARE, KEYS, "VarModeDiffs", |
240 | 115x |
"The following KEYS have different modes between BASE and COMPARE" |
241 |
) |
|
242 | 113x |
assert_valid_keys( |
243 | 113x |
COMPARE, KEYS, "VarClassDiffs", |
244 | 113x |
"The following KEYS have different classes between BASE and COMPARE" |
245 |
) |
|
246 | ||
247 | ||
248 | 112x |
exclude_cols <- c( |
249 | 112x |
COMPARE[["UnsupportedColsBase"]]$VARIABLE, |
250 | 112x |
COMPARE[["UnsupportedColsComp"]]$VARIABLE, |
251 | 112x |
COMPARE[["VarClassDiffs"]]$VARIABLE, |
252 | 112x |
COMPARE[["VarModeDiffs"]]$VARIABLE |
253 |
) |
|
254 | ||
255 | 112x |
if (check_column_order) { |
256 | 5x |
if (attr(COMPARE, "keys")$is_derived) { |
257 | 4x |
keep_vars_base <- !(names(BASE) %in% attr(COMPARE, "keys")$value) |
258 | 4x |
keep_vars_comp <- !(names(COMP) %in% attr(COMPARE, "keys")$value) |
259 |
} else { |
|
260 | 1x |
keep_vars_base <- TRUE |
261 | 1x |
keep_vars_comp <- TRUE |
262 |
} |
|
263 | 5x |
COMPARE[["ColumnOrder"]] <- construct_issue( |
264 | 5x |
value = identify_column_order_differences( |
265 | 5x |
BASE[, keep_vars_base, drop = FALSE], |
266 | 5x |
COMP[, keep_vars_comp, drop = FALSE] |
267 |
), |
|
268 | 5x |
message = "There are differences in the column ordering between BASE and COMPARE !!" |
269 |
) |
|
270 |
} |
|
271 | ||
272 | ||
273 |
##### Check Attributes |
|
274 | 112x |
COMPARE[["AttribDiffs"]] <- construct_issue( |
275 | 112x |
value = identify_att_differences(BASE, COMP, exclude_cols), |
276 | 112x |
message = "There are columns in BASE and COMPARE with differing attributes !!" |
277 |
) |
|
278 | ||
279 | ||
280 |
##### Check data |
|
281 | ||
282 | 112x |
BASE <- factor_to_character(BASE, KEYS) |
283 | 112x |
COMP <- factor_to_character(COMP, KEYS) |
284 | ||
285 | ||
286 | 112x |
COMPARE[["ExtRowsBase"]] <- construct_issue( |
287 | 112x |
value = identify_extra_rows(BASE, COMP, KEYS), |
288 | 112x |
message = "There are rows in BASE that are not in COMPARE !!" |
289 |
) |
|
290 | ||
291 | ||
292 | 112x |
COMPARE[["ExtRowsComp"]] <- construct_issue( |
293 | 112x |
value = identify_extra_rows(COMP, BASE, KEYS), |
294 | 112x |
message = "There are rows in COMPARE that are not in BASE !!" |
295 |
) |
|
296 | ||
297 | ||
298 | 112x |
COMPARE[["ExtColsBase"]] <- construct_issue( |
299 | 112x |
value = identify_extra_cols(BASE, COMP), |
300 | 112x |
message = "There are columns in BASE that are not in COMPARE !!" |
301 |
) |
|
302 | ||
303 | ||
304 | 112x |
COMPARE[["ExtColsComp"]] <- construct_issue( |
305 | 112x |
value = identify_extra_cols(COMP, BASE), |
306 | 112x |
message = "There are columns in COMPARE that are not in BASE !!" |
307 |
) |
|
308 | ||
309 | ||
310 | 112x |
VALUE_DIFFERENCES <- identify_differences( |
311 | 112x |
BASE, COMP, KEYS, exclude_cols, |
312 | 112x |
tolerance = tolerance, |
313 | 112x |
scale = scale |
314 |
) |
|
315 | ||
316 | ||
317 | ||
318 |
## Summarise the number of mismatching rows per variable |
|
319 | ||
320 | 112x |
if (length(VALUE_DIFFERENCES)) { |
321 | 106x |
NDIFF <- sapply(VALUE_DIFFERENCES, nrow) |
322 | 106x |
COMPARE[["NumDiff"]] <- construct_issue( |
323 | 106x |
value = convert_to_issue(NDIFF), |
324 | 106x |
message = "Not all Values Compared Equal" |
325 |
) |
|
326 |
} |
|
327 | ||
328 | ||
329 | 112x |
for (i in names(VALUE_DIFFERENCES)) { |
330 | 917x |
COMPARE[[paste0("VarDiff_", i)]] <- construct_issue( |
331 | 917x |
value = VALUE_DIFFERENCES[[i]], |
332 | 917x |
message = NULL |
333 |
) |
|
334 |
} |
|
335 | ||
336 | ||
337 |
# suppress warning message of data summary if user didn't request to check it |
|
338 |
# we leave the issue in the main compare object though for printing purposes |
|
339 | 112x |
COMPARE_WARNINGS <- COMPARE |
340 | 112x |
attr(COMPARE_WARNINGS[["DataSummary"]], "message") <- c( |
341 | 112x |
"There are differences between the class of BASE and COMPARE" |
342 |
) |
|
343 | 112x |
if (!check_df_class || identical(class(base), class(compare))) { |
344 | 110x |
COMPARE_WARNINGS["DataSummary"] <- NULL |
345 |
} |
|
346 | ||
347 |
# Get all issue messages, remove blank message, and collapse into single string |
|
348 | 112x |
ISSUE_MSGS <- sapply(COMPARE_WARNINGS, function(x) get_issue_message(x)) |
349 | 112x |
ISSUE_MSGS <- Filter(function(x) !is.null(x), ISSUE_MSGS) |
350 | 112x |
ISSUE_MSGS <- Filter(function(x) x != "", ISSUE_MSGS) |
351 | ||
352 | 112x |
if (length(ISSUE_MSGS) != 0) { |
353 | 73x |
if (!SUPWARN) { |
354 | 45x |
ISSUE_MSGS <- paste(ISSUE_MSGS, collapse = "\n") |
355 | 45x |
warning(c("\n", ISSUE_MSGS)) |
356 |
} |
|
357 |
} |
|
358 | ||
359 |
# If the classes are the same and it is the only entry in the compare |
|
360 |
# object then remove it in order to trigger "no issues found" |
|
361 | 112x |
if (identical(class(base), class(compare)) && length(COMPARE) == 1) { |
362 | 38x |
COMPARE["DataSummary"] <- NULL |
363 |
} |
|
364 | ||
365 |
# If the summary is the only item and the user didn't want to check classes |
|
366 |
# then remove the object to trigger the "no issues found" |
|
367 | 112x |
if (!check_df_class && length(COMPARE) == 1) { |
368 | 1x |
COMPARE["DataSummary"] <- NULL |
369 |
} |
|
370 | ||
371 | ||
372 | 112x |
if (!is.null(file)) { |
373 | ! |
x <- print(COMPARE, as_string = TRUE) |
374 | ||
375 | ! |
tryCatch( |
376 |
{ |
|
377 | ! |
sink(file) |
378 | ! |
cat(x, sep = "\n") |
379 | ! |
sink() |
380 |
}, |
|
381 | ! |
warning = function(w) { |
382 | ! |
sink() |
383 | ! |
warning(w) |
384 |
}, |
|
385 | ! |
error = function(e) { |
386 | ! |
sink() |
387 | ! |
stop(e) |
388 |
} |
|
389 |
) |
|
390 | ! |
return(invisible(COMPARE)) |
391 |
} |
|
392 | ||
393 | 112x |
return(COMPARE) |
394 |
} |
|
395 | ||
396 | ||
397 | ||
398 | ||
399 |
#' diffdf_has_issues |
|
400 |
#' |
|
401 |
#' Utility function which returns TRUE if an diffdf |
|
402 |
#' object has issues or FALSE if an diffdf object does not have issues |
|
403 |
#' @param x diffdf object |
|
404 |
#' @examples |
|
405 |
#' |
|
406 |
#' # Example with no issues |
|
407 |
#' x <- diffdf(iris, iris) |
|
408 |
#' diffdf_has_issues(x) |
|
409 |
#' |
|
410 |
#' # Example with issues |
|
411 |
#' iris2 <- iris |
|
412 |
#' iris2[2, 2] <- NA |
|
413 |
#' x <- diffdf(iris, iris2, suppress_warnings = TRUE) |
|
414 |
#' diffdf_has_issues(x) |
|
415 |
#' @export |
|
416 |
diffdf_has_issues <- function(x) { |
|
417 | ! |
if (class(x)[[1]] != "diffdf") stop("x is not an diffdf object") |
418 | 3x |
return(length(x) != 0) |
419 |
} |
|
420 | ||
421 | ||
422 |
#' Assert that keys are valid |
|
423 |
#' |
|
424 |
#' Utility function to check that user provided "keys" aren't listed as a problem |
|
425 |
#' variable of the current list of issues. |
|
426 |
#' @param COMPARE (`list`)\cr A named list of which each element is a `data.frame` with the |
|
427 |
#' column `VARIABLE` |
|
428 |
#' @param KEYS (`character`)\cr name of key variables to check to make sure they don't contain |
|
429 |
#' any issues |
|
430 |
#' @param component (`character`)\cr name of the component within `COMPARE` to check against |
|
431 |
#' @param msg (`character`)\cr error message to print if any of `KEYS` are found within |
|
432 |
#' `COMPARE[component]$VARIABLE` |
|
433 |
#' @keywords internal |
|
434 |
assert_valid_keys <- function(COMPARE, KEYS, component, msg) { |
|
435 | 461x |
keys_reduced <- KEYS[KEYS %in% COMPARE[[component]]$VARIABLE] |
436 | 461x |
assertthat::assert_that( |
437 | 461x |
length(keys_reduced) == 0, |
438 | 461x |
msg = sprintf( |
439 | 461x |
"%s:\n%s", |
440 | 461x |
msg, |
441 | 461x |
paste0("`", paste0(keys_reduced, collapse = "`, `"), "`") |
442 |
) |
|
443 |
) |
|
444 |
} |
1 |
#' factor_to_character |
|
2 |
#' |
|
3 |
#' Takes a dataframe and converts any factor variables to character |
|
4 |
#' @param dsin input dataframe |
|
5 |
#' @param vars variables to consider for conversion. Default NULL will consider |
|
6 |
#' every variable within the dataset |
|
7 |
#' @keywords internal |
|
8 |
factor_to_character <- function(dsin, vars = NULL) { |
|
9 | ! |
if (is.null(vars)) vars <- names(dsin) |
10 | ||
11 | 224x |
for (var in vars) { |
12 | 236x |
if (is.factor(dsin[[var]])) { |
13 | ! |
dsin[[var]] <- as.character(dsin[[var]]) |
14 |
} |
|
15 |
} |
|
16 | 224x |
return(dsin) |
17 |
} |
|
18 | ||
19 | ||
20 | ||
21 | ||
22 |
#' has_unique_rows |
|
23 |
#' |
|
24 |
#' Check if a data sets rows are unique |
|
25 |
#' @param DAT input data set (data frame) |
|
26 |
#' @param KEYS Set of keys which should be unique |
|
27 |
#' @keywords internal |
|
28 |
has_unique_rows <- function(DAT, KEYS) { |
|
29 | 235x |
DUPS <- duplicated(subset(DAT, select = KEYS)) |
30 | 235x |
NDUPS <- sum(DUPS) |
31 | 235x |
return(NDUPS == 0) |
32 |
} |
|
33 | ||
34 |
#' convert_to_issue |
|
35 |
#' |
|
36 |
#' converts the count value into the correct issue format |
|
37 |
#' @param datin data inputted |
|
38 |
#' @importFrom tibble rownames_to_column |
|
39 |
#' @keywords internal |
|
40 |
convert_to_issue <- function(datin) { |
|
41 | 106x |
datin_tibble <- tibble( |
42 | 106x |
`Variable` = names(datin), |
43 | 106x |
`No of Differences` = datin |
44 |
) |
|
45 | ||
46 | 106x |
datin_tibble_reduced <- datin_tibble[datin_tibble[["No of Differences"]] > 0, , drop = FALSE] |
47 | 106x |
return(datin_tibble_reduced) |
48 |
} |
|
49 | ||
50 |
#' Describe the datasets being compared |
|
51 |
#' |
|
52 |
#' This function is used to produce a basic summary table of the core |
|
53 |
#' features of the two `data.frame`'s being compared. |
|
54 |
#' @param base (`data.frame`)\cr base dataset to be described |
|
55 |
#' @param comp (`data.frame`)\cr comparison dataset to be described |
|
56 |
#' @param base_name (`character`)\cr name of the base dataset |
|
57 |
#' @param comp_name (`character`)\cr name of the comparison dataset |
|
58 |
#' @keywords internal |
|
59 |
describe_dataframe <- function(base, comp, base_name, comp_name) { |
|
60 | 124x |
tibble( |
61 | 124x |
PROPERTY = list( |
62 | 124x |
"Name", |
63 | 124x |
"Class", |
64 | 124x |
"Rows(#)", |
65 | 124x |
"Columns(#)" |
66 |
), |
|
67 | 124x |
BASE = c( |
68 | 124x |
base_name, |
69 | 124x |
paste(class(base), collapse = ", "), |
70 | 124x |
as.character(nrow(base)), |
71 | 124x |
as.character(ncol(base)) |
72 |
), |
|
73 | 124x |
COMP = c( |
74 | 124x |
comp_name, |
75 | 124x |
paste(class(comp), collapse = ", "), |
76 | 124x |
as.character(nrow(comp)), |
77 | 124x |
as.character(ncol(comp)) |
78 |
) |
|
79 |
) |
|
80 |
} |
1 |
#' identify_extra_rows |
|
2 |
#' |
|
3 |
#' Identifies rows that are in a baseline dataset but not in a comparator dataset |
|
4 |
#' @param DS1 Baseline dataset (data frame) |
|
5 |
#' @param DS2 Comparator dataset (data frame) |
|
6 |
#' @param KEYS List of variables that define a unique row within the datasets (strings) |
|
7 |
#' @keywords internal |
|
8 |
identify_extra_rows <- function(DS1, DS2, KEYS) { |
|
9 | 224x |
if (nrow(DS2) == 0 || nrow(DS1) == 0) { |
10 | 10x |
return(DS1[, KEYS, drop = FALSE]) |
11 |
} |
|
12 | 214x |
DS2[["..FLAG.."]] <- "Y" |
13 | 214x |
dat <- merge( |
14 | 214x |
subset(DS1, select = KEYS), |
15 | 214x |
subset(DS2, select = c(KEYS, "..FLAG..")), |
16 | 214x |
by = KEYS, all.x = TRUE, |
17 | 214x |
sort = TRUE |
18 |
) |
|
19 | 214x |
dat <- dat[do.call("order", dat[KEYS]), ] |
20 | ||
21 | 214x |
dat[is.na(dat[["..FLAG.."]]), KEYS, drop = FALSE] |
22 |
} |
|
23 | ||
24 | ||
25 | ||
26 |
#' identify_extra_cols |
|
27 |
#' |
|
28 |
#' Identifies columns that are in a baseline dataset but not in a comparator dataset |
|
29 |
#' @param DS1 Baseline dataset (data frame) |
|
30 |
#' @param DS2 Comparator dataset (data frame) |
|
31 |
#' @importFrom tibble tibble |
|
32 |
#' @keywords internal |
|
33 |
identify_extra_cols <- function(DS1, DS2) { |
|
34 | 224x |
match.cols <- sapply(names(DS1), "%in%", names(DS2)) |
35 | 224x |
assertthat::assert_that( |
36 | 224x |
all(is.logical(match.cols)), |
37 | 224x |
msg = "Assumption of logical return type is not true" |
38 |
) |
|
39 | 224x |
tibble( |
40 | 224x |
COLUMNS = names(DS1)[!match.cols] |
41 |
) |
|
42 |
} |
|
43 | ||
44 | ||
45 | ||
46 | ||
47 | ||
48 | ||
49 | ||
50 |
#' identify_matching_cols |
|
51 |
#' |
|
52 |
#' Identifies columns with the same name in two data frames |
|
53 |
#' @param DS1 Input dataset 1 (data frame) |
|
54 |
#' @param DS2 Input dataset 2 (data frame) |
|
55 |
#' @param EXCLUDE Columns to ignore |
|
56 |
#' @keywords internal |
|
57 |
identify_matching_cols <- function(DS1, DS2, EXCLUDE = "") { |
|
58 | 768x |
match_cols <- sapply(names(DS1), "%in%", names(DS2)) |
59 | 768x |
exclude_cols <- sapply(names(DS1), "%in%", EXCLUDE) |
60 | 768x |
names(DS1)[match_cols & !exclude_cols] |
61 |
} |
|
62 | ||
63 | ||
64 | ||
65 | ||
66 | ||
67 |
#' identify_unsupported_cols |
|
68 |
#' |
|
69 |
#' Identifies any columns for which the package is not setup to handle |
|
70 |
#' @param dsin input dataset |
|
71 |
#' @keywords internal |
|
72 |
identify_unsupported_cols <- function(dsin) { |
|
73 | 234x |
dat <- subset( |
74 | 234x |
identify_properties(dsin), |
75 | 234x |
select = c("VARIABLE", "MODE") |
76 |
) |
|
77 | ||
78 | 234x |
dat[!dat[["MODE"]] %in% c("numeric", "character", "logical"), , drop = FALSE] |
79 |
} |
|
80 | ||
81 | ||
82 | ||
83 |
#' identify_mode_differences |
|
84 |
#' |
|
85 |
#' Identifies any mode differences between two data frames |
|
86 |
#' @param BASE Base dataset for comparison (data.frame) |
|
87 |
#' @param COMP Comparator dataset to compare base against (data.frame) |
|
88 |
#' @keywords internal |
|
89 |
identify_mode_differences <- function(BASE, COMP) { |
|
90 | 417x |
matching_cols <- identify_matching_cols(BASE, COMP) |
91 | ||
92 | 417x |
dat <- merge( |
93 | 417x |
x = identify_properties(BASE), |
94 | 417x |
y = identify_properties(COMP), |
95 | 417x |
by = "VARIABLE", |
96 | 417x |
all = TRUE, |
97 | 417x |
suffixes = c(".BASE", ".COMP"), |
98 | 417x |
sort = TRUE |
99 |
) |
|
100 | 417x |
dat <- subset(dat, select = c("VARIABLE", "MODE.BASE", "MODE.COMP")) |
101 | ||
102 | 417x |
KEEP1 <- dat[["VARIABLE"]] %in% matching_cols |
103 | 417x |
KEEP2 <- dat[["MODE.BASE"]] != dat[["MODE.COMP"]] |
104 | ||
105 | 417x |
dat[KEEP1 & KEEP2, , drop = FALSE] |
106 |
} |
|
107 | ||
108 | ||
109 | ||
110 |
#' identify_class_differences |
|
111 |
#' |
|
112 |
#' Identifies any class differences between two data frames |
|
113 |
#' @param BASE Base dataset for comparison (data.frame) |
|
114 |
#' @param COMP Comparator dataset to compare base against (data.frame) |
|
115 |
#' @keywords internal |
|
116 |
identify_class_differences <- function(BASE, COMP) { |
|
117 | 117x |
matching_cols <- identify_matching_cols(BASE, COMP) |
118 | ||
119 | 117x |
dat <- merge( |
120 | 117x |
x = identify_properties(BASE), |
121 | 117x |
y = identify_properties(COMP), |
122 | 117x |
by = "VARIABLE", |
123 | 117x |
all = TRUE, |
124 | 117x |
sort = TRUE, |
125 | 117x |
suffixes = c(".BASE", ".COMP") |
126 |
) |
|
127 | ||
128 | 117x |
dat <- subset(dat, select = c("VARIABLE", "CLASS.BASE", "CLASS.COMP")) |
129 | ||
130 | 117x |
KEEP1 <- dat[["VARIABLE"]] %in% matching_cols |
131 | 117x |
KEEP2 <- !mapply( |
132 | 117x |
identical, |
133 | 117x |
dat[["CLASS.BASE"]], |
134 | 117x |
dat[["CLASS.COMP"]] |
135 |
) |
|
136 | ||
137 | 117x |
dat[KEEP1 & KEEP2, , drop = FALSE] |
138 |
} |
|
139 | ||
140 | ||
141 | ||
142 |
#' Identify differences in attributes |
|
143 |
#' |
|
144 |
#' Identifies any attribute differences between two data frames |
|
145 |
#' @param BASE Base dataset for comparison (data.frame) |
|
146 |
#' @param COMP Comparator dataset to compare base against (data.frame) |
|
147 |
#' @param exclude_cols Columns to exclude from comparison |
|
148 |
#' @importFrom tibble tibble |
|
149 |
#' @keywords internal |
|
150 |
identify_att_differences <- function(BASE, COMP, exclude_cols = "") { |
|
151 | 122x |
matching_cols <- identify_matching_cols(BASE, COMP, exclude_cols) |
152 | ||
153 | 122x |
PROPS <- merge( |
154 | 122x |
x = identify_properties(BASE), |
155 | 122x |
y = identify_properties(COMP), |
156 | 122x |
by = "VARIABLE", |
157 | 122x |
all = TRUE, |
158 | 122x |
sort = TRUE, |
159 | 122x |
suffixes = c(".BASE", ".COMP") |
160 |
) |
|
161 | ||
162 | 122x |
PROPS <- subset(PROPS, select = c("VARIABLE", "ATTRIBS.BASE", "ATTRIBS.COMP")) |
163 | ||
164 | 122x |
PROPS <- PROPS[PROPS[["VARIABLE"]] %in% matching_cols, , drop = FALSE] |
165 | ||
166 | ||
167 |
### Setup dummy return value |
|
168 | 122x |
RETURN <- tibble( |
169 | 122x |
VARIABLE = character(), |
170 | 122x |
ATTR_NAME = character(), |
171 | 122x |
VALUES.BASE = list(), |
172 | 122x |
VALUES.COMP = list() |
173 |
) |
|
174 | ||
175 | 122x |
for (i in PROPS[["VARIABLE"]]) { |
176 | 1157x |
PROPS_filt <- PROPS[PROPS[["VARIABLE"]] == i, , drop = FALSE] |
177 | ||
178 |
### Get a vector of all available attributes across both variables |
|
179 | 1157x |
ATTRIB_NAMES <- unique(c( |
180 | 1157x |
names(PROPS_filt[["ATTRIBS.BASE"]][[1]]), |
181 | 1157x |
names(PROPS_filt[["ATTRIBS.COMP"]][[1]]) |
182 |
)) |
|
183 | ||
184 |
### If variable has no attributes move onto the next variable |
|
185 | 850x |
if (is.null(ATTRIB_NAMES)) next() |
186 | ||
187 |
### Loop over each attribute checking if they are identical and outputing |
|
188 |
### anyones that arn't |
|
189 | 307x |
for (j in ATTRIB_NAMES) { |
190 | 528x |
ATTRIB_BASE <- PROPS_filt[["ATTRIBS.BASE"]][[1]][j] |
191 | 528x |
ATTRIB_COMP <- PROPS_filt[["ATTRIBS.COMP"]][[1]][j] |
192 | ||
193 | 528x |
if (!identical(ATTRIB_BASE, ATTRIB_COMP)) { |
194 | 64x |
ATT_DIFFS <- tibble( |
195 | 64x |
VARIABLE = i, |
196 | 64x |
ATTR_NAME = j, |
197 | 64x |
VALUES.BASE = ifelse(is.null(ATTRIB_BASE), list(), ATTRIB_BASE), |
198 | 64x |
VALUES.COMP = ifelse(is.null(ATTRIB_COMP), list(), ATTRIB_COMP) |
199 |
) |
|
200 | ||
201 | 64x |
RETURN <- rbind(RETURN, ATT_DIFFS) |
202 |
} |
|
203 |
} |
|
204 |
} |
|
205 | 122x |
return(RETURN) |
206 |
} |
|
207 | ||
208 | ||
209 | ||
210 | ||
211 | ||
212 |
#' identify_differences |
|
213 |
#' |
|
214 |
#' Compares each column within 2 datasets to identify any values which they |
|
215 |
#' mismatch on. |
|
216 |
#' @param BASE Base dataset for comparison (data.frame) |
|
217 |
#' @param COMP Comparator dataset to compare base against (data.frame) |
|
218 |
#' @param KEYS List of variables that define a unique row within the datasets (strings) |
|
219 |
#' @param exclude_cols Columns to exclude from comparison |
|
220 |
#' @param tolerance Level of tolerance for numeric differences between two variables |
|
221 |
#' @param scale Scale that tolerance should be set on. If NULL assume absolute |
|
222 |
#' @keywords internal |
|
223 |
identify_differences <- function( |
|
224 |
BASE, |
|
225 |
COMP, |
|
226 |
KEYS, |
|
227 |
exclude_cols, |
|
228 |
tolerance = sqrt(.Machine$double.eps), |
|
229 |
scale = NULL |
|
230 |
) { |
|
231 | ||
232 | 112x |
matching_cols <- identify_matching_cols(BASE, COMP, c(KEYS, exclude_cols)) |
233 | ||
234 | 112x |
if (length(matching_cols) == 0) { |
235 | 2x |
return(tibble()) |
236 |
} |
|
237 | ||
238 | 110x |
DAT <- merge( |
239 | 110x |
x = BASE, |
240 | 110x |
y = COMP, |
241 | 110x |
by = KEYS, |
242 | 110x |
suffix = c(".x", ".y"), |
243 | 110x |
sort = TRUE |
244 |
) |
|
245 | 110x |
if (nrow(DAT) == 0) { |
246 | 4x |
return(tibble()) |
247 |
} |
|
248 | 106x |
DAT <- DAT[do.call("order", DAT[KEYS]), ] |
249 | ||
250 | 106x |
matching_list <- mapply( |
251 | 106x |
is_variable_different, |
252 | 106x |
matching_cols, |
253 | 106x |
MoreArgs = list( |
254 | 106x |
keynames = KEYS, |
255 | 106x |
datain = DAT, |
256 | 106x |
tolerance = tolerance, |
257 | 106x |
scale = scale |
258 |
), |
|
259 | 106x |
SIMPLIFY = FALSE |
260 |
) |
|
261 | ||
262 | 106x |
matching_list |
263 |
} |
|
264 | ||
265 | ||
266 | ||
267 | ||
268 | ||
269 | ||
270 | ||
271 | ||
272 | ||
273 |
#' identify_properties |
|
274 |
#' |
|
275 |
#' Returns a dataframe of metadata for a given dataset. |
|
276 |
#' Returned values include variable names , class , mode , type & attributes |
|
277 |
#' @param dsin input dataframe that you want to get the metadata from |
|
278 |
#' @importFrom tibble tibble |
|
279 |
#' @keywords internal |
|
280 |
identify_properties <- function(dsin) { |
|
281 |
### If missing or null return empty dataset |
|
282 | 1546x |
if (is.null(dsin)) { |
283 | ! |
x <- tibble( |
284 | ! |
VARIABLE = character(), |
285 | ! |
CLASS = list(), |
286 | ! |
MODE = character(), |
287 | ! |
TYPE = character(), |
288 | ! |
ATTRIBS = list() |
289 |
) |
|
290 | ! |
return(x) |
291 |
} |
|
292 | ||
293 | 1546x |
tibble( |
294 | 1546x |
VARIABLE = names(dsin), |
295 | 1546x |
CLASS = lapply(dsin, class), |
296 | 1546x |
MODE = sapply(dsin, mode), |
297 | 1546x |
TYPE = sapply(dsin, typeof), |
298 | 1546x |
ATTRIBS = lapply(dsin, attributes) |
299 |
) |
|
300 |
} |
|
301 | ||
302 | ||
303 |
#' Find column ordering differences |
|
304 |
#' |
|
305 |
#' Compares two datasets and outputs a table listing any differences in the column |
|
306 |
#' orders between the two datasets. Columns that are not contained within both |
|
307 |
#' are ignored however column ordering is derived prior to removing these columns. |
|
308 |
#' |
|
309 |
#' @param BASE (`data.frame`)\cr Base dataset for comparison |
|
310 |
#' @param COMP (`data.frame`)\cr Comparator dataset to compare base against |
|
311 |
#' @keywords internal |
|
312 |
identify_column_order_differences <- function(BASE, COMP) { |
|
313 | 8x |
base_cols <- tibble( |
314 | 8x |
COLUMN = names(BASE), |
315 | 8x |
"BASE-INDEX" = seq_along(names(BASE)) |
316 |
) |
|
317 | 8x |
comp_cols <- tibble( |
318 | 8x |
COLUMN = names(COMP), |
319 | 8x |
"COMPARE-INDEX" = seq_along(names(COMP)) |
320 |
) |
|
321 | 8x |
col_index <- merge( |
322 | 8x |
base_cols, |
323 | 8x |
comp_cols, |
324 | 8x |
by = c("COLUMN"), |
325 | 8x |
all = TRUE, |
326 | 8x |
sort = FALSE |
327 |
) |
|
328 | 8x |
keep_rows <- col_index[["BASE-INDEX"]] != col_index[["COMPARE-INDEX"]] |
329 | 8x |
keep_rows[is.na(keep_rows)] <- FALSE |
330 | 8x |
col_index[keep_rows, , drop = FALSE] |
331 |
} |
1 |
#' sort_then_join |
|
2 |
#' |
|
3 |
#' Convenience function to sort two strings and paste them together |
|
4 |
#' @param string1 first string |
|
5 |
#' @param string2 second string |
|
6 |
#' @keywords internal |
|
7 |
sort_then_join <- function(string1, string2) { |
|
8 | 14x |
paste0(sort(c(string1, string2)), collapse = "") |
9 |
} |
|
10 | ||
11 | ||
12 |
#' class_merge |
|
13 |
#' |
|
14 |
#' Convenience function to put all classes an object has into one string |
|
15 |
#' @param x an object |
|
16 |
#' @keywords internal |
|
17 |
class_merge <- function(x) { |
|
18 | 304x |
paste(class(x), collapse = "_") |
19 |
} |
|
20 | ||
21 | ||
22 |
get_message <- function(colname, whichdat, totype) { |
|
23 | 12x |
message(paste0( |
24 | 12x |
"NOTE: Variable ", colname, " in ", tolower(whichdat), " was casted to ", totype |
25 |
)) |
|
26 |
} |
|
27 | ||
28 | ||
29 |
#' get_casted_vector |
|
30 |
#' |
|
31 |
#' casts a vector depending on its type and input |
|
32 |
#' @param colin column to cast |
|
33 |
#' @param colname name of vector |
|
34 |
#' @param whichdat whether base or compare is being casted (used for messages) |
|
35 |
#' @importFrom methods is |
|
36 |
#' @keywords internal |
|
37 |
get_casted_vector <- function(colin, colname, whichdat) { |
|
38 | 24x |
if (is(colin, "factor")) { |
39 | 6x |
get_message(colname, whichdat, "character") |
40 | 6x |
return(as.character(colin)) |
41 |
} |
|
42 | ||
43 | 18x |
if (is(colin, "integer")) { |
44 | 6x |
get_message(colname, whichdat, "numeric") |
45 | 6x |
return(as.numeric(colin)) |
46 |
} |
|
47 | ||
48 | 12x |
colin |
49 |
} |
|
50 | ||
51 | ||
52 | ||
53 |
#' get_casted_dataset |
|
54 |
#' |
|
55 |
#' Internal utility function to loop across a dataset casting all target |
|
56 |
#' variables |
|
57 |
#' @param df dataset to be casted |
|
58 |
#' @param columns columns to be casted |
|
59 |
#' @param whichdat whether base or compare is being casted (used for messages) |
|
60 |
#' @keywords internal |
|
61 |
get_casted_dataset <- function(df, columns, whichdat) { |
|
62 | 22x |
for (col in columns) { |
63 | 24x |
df[[col]] <- get_casted_vector(df[[col]], col, whichdat) |
64 |
} |
|
65 | 22x |
return(df) |
66 |
} |
|
67 | ||
68 | ||
69 | ||
70 | ||
71 | ||
72 | ||
73 |
#' cast_variables |
|
74 |
#' |
|
75 |
#' Function to cast datasets columns if they have differing types |
|
76 |
#' Restricted to specific cases, currently integer and double, and character and factor |
|
77 |
#' @param BASE base dataset |
|
78 |
#' @param COMPARE comparison dataset |
|
79 |
#' @param ignore_vars Variables not to be considered for casting |
|
80 |
#' @param cast_integers Logical - Whether integers should be cased to double when compared to doubles |
|
81 |
#' @param cast_factors Logical - Whether characters should be casted to characters when compared to characters |
|
82 |
#' @keywords internal |
|
83 |
cast_variables <- function( |
|
84 |
BASE, |
|
85 |
COMPARE, |
|
86 |
ignore_vars = NULL, |
|
87 |
cast_integers = FALSE, |
|
88 |
cast_factors = FALSE |
|
89 |
) { |
|
90 | ||
91 | 13x |
allowed_class_casts <- c("integernumeric", "characterfactor")[c(cast_integers, cast_factors)] |
92 | ||
93 | 13x |
BASE_class <- data.frame( |
94 | 13x |
class_BASE = sapply(BASE, class_merge), |
95 | 13x |
colname = names(BASE), |
96 | 13x |
stringsAsFactors = FALSE |
97 |
) |
|
98 | 13x |
BASE_class <- BASE_class[!BASE_class[["colname"]] %in% ignore_vars, , drop = FALSE] |
99 | ||
100 | ||
101 | 13x |
COMPARE_class <- data.frame( |
102 | 13x |
class_COMPARE = sapply(COMPARE, class_merge), |
103 | 13x |
colname = names(COMPARE), |
104 | 13x |
stringsAsFactors = FALSE |
105 |
) |
|
106 | 13x |
COMPARE_class <- COMPARE_class[!COMPARE_class[["colname"]] %in% ignore_vars, , drop = FALSE] |
107 | ||
108 | 13x |
common_class <- merge( |
109 | 13x |
x = BASE_class, |
110 | 13x |
y = COMPARE_class, |
111 | 13x |
by = "colname" |
112 |
) |
|
113 | ||
114 | ||
115 | 13x |
diff_class <- common_class[common_class[["class_BASE"]] != common_class[["class_COMPARE"]], , drop = FALSE] |
116 | ||
117 | ||
118 | 13x |
diff_class$classmerge <- mapply( |
119 | 13x |
sort_then_join, |
120 | 13x |
diff_class$class_COMPARE, |
121 | 13x |
diff_class$class_BASE |
122 |
) |
|
123 | ||
124 | ||
125 | 13x |
cast_columns <- diff_class[diff_class[["classmerge"]] %in% allowed_class_casts, , drop = FALSE] |
126 | ||
127 | ||
128 | 13x |
DATASETS <- list( |
129 | 13x |
"BASE" = BASE, |
130 | 13x |
"COMPARE" = COMPARE |
131 |
) |
|
132 | ||
133 | 13x |
if (nrow(cast_columns) == 0) { |
134 | 2x |
return(DATASETS) |
135 |
} |
|
136 | ||
137 | ||
138 | 11x |
for (i in names(DATASETS)) { |
139 | 22x |
DATASETS[[i]] <- get_casted_dataset(DATASETS[[i]], cast_columns$colname, i) |
140 |
} |
|
141 | ||
142 | 11x |
return(DATASETS) |
143 |
} |
1 |
#' Pad String |
|
2 |
#' |
|
3 |
#' Utility function used to replicate `str_pad`. Adds white space to either end |
|
4 |
#' of a string to get it to equal the desired length |
|
5 |
#' @param x string |
|
6 |
#' @param width desired length |
|
7 |
#' @keywords internal |
|
8 |
string_pad <- function(x, width) { |
|
9 | 1027x |
if (nchar(x) >= width) { |
10 | ! |
return(x) |
11 |
} |
|
12 | 1027x |
width <- width - nchar(x) |
13 | 1027x |
left <- paste0(rep(" ", floor(width / 2)), collapse = "") |
14 | 1027x |
right <- paste0(rep(" ", ceiling(width / 2)), collapse = "") |
15 | 1027x |
paste0(left, x, right, collapse = "") |
16 |
} |
|
17 | ||
18 | ||
19 |
#' recursive_reduce |
|
20 |
#' |
|
21 |
#' Utility function used to replicated `purrr::reduce`. Recursively applies a |
|
22 |
#' function to a list of elements until only 1 element remains |
|
23 |
#' @param .l list of values to apply a function to |
|
24 |
#' @param .f function to apply to each each element of the list in turn. See details. |
|
25 |
#' @details |
|
26 |
#' This function is essentially performing the following operation: |
|
27 |
#' ``` |
|
28 |
#' .l[[1]] <- .f( .l[[1]] , .l[[2]]) ; .l[[1]] <- .f( .l[[1]] , .l[[3]]) |
|
29 |
#' ``` |
|
30 |
#' @keywords internal |
|
31 |
recursive_reduce <- function(.l, .f) { |
|
32 | 365x |
if (length(.l) != 1) { |
33 | 233x |
.l[[2]] <- .f(.l[[1]], .l[[2]]) |
34 | 233x |
return(recursive_reduce(.l[-1], .f)) |
35 |
} else { |
|
36 | 132x |
return(.l[[1]]) |
37 |
} |
|
38 |
} |
|
39 | ||
40 |
#' invert |
|
41 |
#' |
|
42 |
#' Utility function used to replicated `purrr::transpose`. Turns a list inside |
|
43 |
#' out. |
|
44 |
#' @param x list |
|
45 |
#' @keywords internal |
|
46 |
invert <- function(x) { |
|
47 | 40x |
x2 <- list() |
48 | 40x |
cnames <- names(x) |
49 | 40x |
tnames <- names(x[[1]]) |
50 | 40x |
for (i in tnames) { |
51 | 120x |
x2[[i]] <- list() |
52 | 120x |
for (j in cnames) { |
53 | 345x |
x2[[i]][[j]] <- x[[j]][[i]] |
54 |
} |
|
55 |
} |
|
56 | 40x |
return(x2) |
57 |
} |
|
58 | ||
59 | ||
60 | ||
61 | ||
62 |
#' as_ascii_table |
|
63 |
#' |
|
64 |
#' This function takes a `data.frame` and attempts to convert it into |
|
65 |
#' a simple ascii format suitable for printing to the screen |
|
66 |
#' It is assumed all variable values have a `as.character()` method |
|
67 |
#' in order to cast them to character. |
|
68 |
#' @param dat Input dataset to convert into a ascii table |
|
69 |
#' @param line_prefix Symbols to prefix in front of every line of the table |
|
70 |
#' @keywords internal |
|
71 |
as_ascii_table <- function(dat, line_prefix = " ") { |
|
72 | 40x |
n_col <- ncol(dat) |
73 | 40x |
n_row <- nrow(dat) |
74 | ||
75 |
## Convert every value to character and crop to a suitable length |
|
76 | 40x |
dat_char <- lapply(dat, as_fmt_char) |
77 | ||
78 | ||
79 | 40x |
hold <- list() |
80 | 40x |
COLS <- colnames(dat) |
81 | ||
82 |
### For each column extract core elements (width, values , title) and pad out |
|
83 |
### each string to be a suitable length |
|
84 | 40x |
for (i in seq_len(n_col)) { |
85 | 115x |
COL <- COLS[i] |
86 | 115x |
VALUES <- dat_char[[i]] |
87 | ||
88 | 115x |
JOINT <- c(COL, VALUES) |
89 | 115x |
WIDTH <- max(sapply(JOINT, nchar)) + 2 |
90 | ||
91 | 115x |
hold[[COL]] <- list() |
92 | 115x |
hold[[COL]]$WIDTH <- WIDTH |
93 | 115x |
hold[[COL]]$VALUES <- sapply(VALUES, string_pad, width = WIDTH) |
94 | 115x |
hold[[COL]]$HEADER <- sapply(COL, string_pad, width = WIDTH) |
95 |
} |
|
96 | ||
97 |
### Collapse into a single value per component ( title , values, width ) |
|
98 | 40x |
thold <- invert(hold) |
99 | 40x |
tvals <- recursive_reduce(thold$VALUES, paste0) |
100 | 40x |
thead <- recursive_reduce(thold$HEADER, paste0) |
101 | 40x |
twidth <- recursive_reduce(thold$WIDTH, sum) |
102 | ||
103 |
### Create header and footer lines |
|
104 | 40x |
TLINE <- paste0(rep("=", twidth), collapse = "") |
105 | 40x |
LINE <- paste0(rep("-", twidth), collapse = "") |
106 | 40x |
FVALS <- paste0(line_prefix, tvals, collapse = "\n") |
107 | ||
108 |
### Output table |
|
109 | 40x |
paste0( |
110 | 40x |
line_prefix, TLINE, "\n", |
111 | 40x |
line_prefix, thead, "\n", |
112 | 40x |
line_prefix, LINE, "\n", |
113 | 40x |
FVALS, "\n", |
114 | 40x |
line_prefix, LINE |
115 |
) |
|
116 |
} |
|
117 | ||
118 | ||
119 |
#' as_character |
|
120 |
#' |
|
121 |
#' Stub function to enable mocking in unit tests |
|
122 |
as_character <- as.character |
|
123 | ||
124 |
#' Format vector to printable string |
|
125 |
#' |
|
126 |
#' Coerces a vector of any type into a printable string. The most |
|
127 |
#' significant transformation is performed on existing character |
|
128 |
#' vectors which will be truncated, have newlines converted |
|
129 |
#' to explicit symbols and will be wrapped in quotes if they |
|
130 |
#' contain white space. |
|
131 |
#' |
|
132 |
#' @param x (`vector`) \cr vector to be converted to character |
|
133 |
#' @param add_quotes (`logical`) \cr if true will wrap strings that contain |
|
134 |
#' whitespace with quotes |
|
135 |
#' @param crop_at (`numeric`) \cr specifies the limit at which strings should |
|
136 |
#' be truncated to |
|
137 |
#' @param ... additional arguments (not currently used) |
|
138 |
#' |
|
139 |
#' @name as_fmt_char |
|
140 |
#' @keywords internal |
|
141 |
as_fmt_char <- function(x, ...) { |
|
142 | 180x |
UseMethod("as_fmt_char") |
143 |
} |
|
144 | ||
145 |
#' @rdname as_fmt_char |
|
146 |
#' @export |
|
147 |
as_fmt_char.numeric <- function(x, ...) { |
|
148 | 34x |
format(x, digits = 7, justify = "right") |
149 |
} |
|
150 | ||
151 |
#' @rdname as_fmt_char |
|
152 |
#' @export |
|
153 |
as_fmt_char.NULL <- function(x, ...) { |
|
154 | ! |
"<NULL>" |
155 |
} |
|
156 | ||
157 |
#' @importFrom utils capture.output |
|
158 |
#' @rdname as_fmt_char |
|
159 |
#' @export |
|
160 |
as_fmt_char.list <- function(x, ...) { |
|
161 | 15x |
vapply( |
162 | 15x |
x, |
163 | 15x |
function(x) { |
164 | 54x |
if (is.numeric(x)) { |
165 | ! |
return(as_fmt_char(x)) |
166 |
} |
|
167 | 54x |
if (is.character(x) & length(x) == 1) { |
168 | 54x |
return(as_fmt_char(x)) |
169 |
} |
|
170 | ! |
as_fmt_char( |
171 | ! |
paste(capture.output(dput(x)), collapse = " "), |
172 | ! |
add_quotes = FALSE |
173 |
) |
|
174 |
}, |
|
175 | 15x |
character(1) |
176 |
) |
|
177 |
} |
|
178 | ||
179 |
#' @rdname as_fmt_char |
|
180 |
#' @export |
|
181 |
as_fmt_char.factor <- function(x, ...) { |
|
182 | 3x |
as_fmt_char(as.character(x)) |
183 |
} |
|
184 | ||
185 |
#' @rdname as_fmt_char |
|
186 |
#' @export |
|
187 |
as_fmt_char.character <- function(x, add_quotes = TRUE, crop_at = 30, ...) { |
|
188 | 123x |
needs_quotes <- grepl("\\s", x) & add_quotes |
189 | ||
190 | 123x |
x[is.na(x)] <- "<NA>" |
191 | ||
192 |
# Replace \nl \cr with tags to stop print message splitting over |
|
193 |
# multiple lines |
|
194 | 123x |
x <- gsub("\x0D", "<cr>", x) |
195 | 123x |
x <- gsub("\x0A", "<nl>", x) |
196 | ||
197 | 123x |
charlength <- vapply(x, nchar, numeric(1)) |
198 | 123x |
x <- substr(x, 1, crop_at) |
199 | 123x |
x[charlength > crop_at] <- paste0(x[charlength > crop_at], "...") |
200 | ||
201 |
# Add enclosing " " around strings with white space so that it can be |
|
202 |
# clearly identified in the printed output |
|
203 | 123x |
x[needs_quotes] <- paste0('"', x[needs_quotes], '"') |
204 | ||
205 | 123x |
return(x) |
206 |
} |
|
207 | ||
208 | ||
209 |
#' @rdname as_fmt_char |
|
210 |
#' @export |
|
211 |
as_fmt_char.default <- function(x, ...) { |
|
212 | 6x |
x_char <- as_character(x) |
213 | 6x |
assertthat::assert_that( |
214 | 6x |
is.character(x_char), |
215 | 6x |
msg = sprintf( |
216 | 6x |
"Unable to convert class `'%s'` to character for printing purposes", |
217 | 6x |
paste(class(x), collapse = "', '") |
218 |
) |
|
219 |
) |
|
220 | 5x |
as_fmt_char.character(x_char, add_quotes = FALSE) |
221 |
} |
|
222 | ||
223 | ||
224 |
#' @rdname as_fmt_char |
|
225 |
#' @export |
|
226 |
as_fmt_char.POSIXt <- function(x, ...) { |
|
227 | 4x |
format(x, "%Y-%m-%d %H:%M:%S %Z") |
228 |
} |
|
229 | ||
230 | ||
231 |
#' get_table |
|
232 |
#' |
|
233 |
#' Generate nice looking table from a data frame |
|
234 |
#' @param dsin dataset |
|
235 |
#' @inheritParams print.diffdf |
|
236 |
#' @keywords internal |
|
237 |
get_table <- function(dsin, row_limit = 10) { |
|
238 | 37x |
if (nrow(dsin) == 0) { |
239 | ! |
return("") |
240 |
} |
|
241 | 37x |
if (!is.null(row_limit)) { |
242 | 34x |
assertthat::assert_that( |
243 | 34x |
assertthat::is.number(row_limit), |
244 | 34x |
row_limit > 0, |
245 | 34x |
msg = "row_limit must be a positive integer" |
246 |
) |
|
247 |
} |
|
248 | 37x |
if (is.null(row_limit)) { |
249 | 3x |
display_table <- dsin |
250 |
} else { |
|
251 | 34x |
display_table <- subset(dsin, seq_len(nrow(dsin)) < (row_limit + 1)) |
252 |
} |
|
253 | ||
254 | 37x |
add_message <- if (!is.null(row_limit) && nrow(dsin) > row_limit) { |
255 | 5x |
paste0( |
256 | 5x |
"First ", |
257 | 5x |
row_limit, |
258 | 5x |
" of ", |
259 | 5x |
nrow(dsin), |
260 | 5x |
" rows are shown in table below" |
261 |
) |
|
262 |
} else { |
|
263 | 32x |
NULL |
264 |
} |
|
265 | ||
266 | 37x |
msg <- paste( |
267 | 37x |
c( |
268 | 37x |
add_message, |
269 | 37x |
as_ascii_table(display_table) |
270 |
), |
|
271 | 37x |
collapse = "\n" |
272 |
) |
|
273 | 37x |
return(msg) |
274 |
} |
1 |
#' is_variable_different |
|
2 |
#' |
|
3 |
#' This subsets the data set on the variable name, picks out differences and returns a `tibble` |
|
4 |
#' of differences for the given variable |
|
5 |
#' @importFrom tibble as_tibble |
|
6 |
#' @param variablename name of variable being compared |
|
7 |
#' @param keynames name of keys |
|
8 |
#' @param datain Inputted dataset with base and compare vectors |
|
9 |
#' @param ... Additional arguments which might be passed through (numerical accuracy) |
|
10 |
#' @return A boolean vector which is T if target and current are different |
|
11 |
#' @keywords internal |
|
12 |
is_variable_different <- function(variablename, keynames, datain, ...) { |
|
13 | 917x |
xvar <- paste0(variablename, ".x") |
14 | 917x |
yvar <- paste0(variablename, ".y") |
15 | ||
16 | 917x |
assertthat::assert_that( |
17 | 917x |
xvar %in% names(datain) && yvar %in% names(datain), |
18 | 917x |
msg = "Variable does not exist within input dataset" |
19 |
) |
|
20 | ||
21 | 917x |
target <- datain[[xvar]] |
22 | 917x |
current <- datain[[yvar]] |
23 | 917x |
outvect <- find_difference(target, current, ...) |
24 | ||
25 | 917x |
datain[["VARIABLE"]] <- variablename |
26 | ||
27 | 917x |
names(datain)[names(datain) %in% c(xvar, yvar)] <- c("BASE", "COMPARE") |
28 | ||
29 | 917x |
x <- as_tibble( |
30 | 917x |
subset( |
31 | 917x |
datain, |
32 | 917x |
outvect, |
33 | 917x |
select = c("VARIABLE", keynames, "BASE", "COMPARE") |
34 |
) |
|
35 |
) |
|
36 | ||
37 | 917x |
return(x) |
38 |
} |
|
39 | ||
40 |
#' compare_vectors |
|
41 |
#' |
|
42 |
#' Compare two vectors looking for differences |
|
43 |
#' |
|
44 |
#' @param target the base vector |
|
45 |
#' @param current a vector to compare target to |
|
46 |
#' @param ... Additional arguments which might be passed through (numerical accuracy) |
|
47 |
#' @keywords internal |
|
48 |
compare_vectors <- function(target, current, ...) { |
|
49 | 950x |
UseMethod("compare_vectors") |
50 |
} |
|
51 | ||
52 | ||
53 |
#' find_difference |
|
54 |
#' |
|
55 |
#' This determines if two vectors are different. It expects vectors of the same |
|
56 |
#' length and type, and is intended to be used after checks have already been done |
|
57 |
#' Initially picks out any `NA`'s (matching `NA`'s count as a match) |
|
58 |
#' Then compares remaining vector |
|
59 |
#' |
|
60 |
#' @param target the base vector |
|
61 |
#' @param current a vector to compare target to |
|
62 |
#' @param ... Additional arguments which might be passed through (numerical accuracy) |
|
63 |
#' @keywords internal |
|
64 |
find_difference <- function(target, current, ...) { |
|
65 | 953x |
if (length(target) != length(current)) { |
66 | 2x |
warning("Inputs are not of the same length") |
67 | 2x |
return(NULL) |
68 |
} |
|
69 | ||
70 | 951x |
if (is.null(target) || is.null(current)) { |
71 | 1x |
return(is.null(target) != is.null(current)) |
72 |
} |
|
73 | ||
74 |
### Initalise output, assume problem unless evidence otherwise |
|
75 | 950x |
return_vector <- rep(TRUE, length(target)) |
76 | ||
77 | 950x |
nas_t <- is.na(target) |
78 | 950x |
nas_c <- is.na(current) |
79 | ||
80 |
## compare missing values |
|
81 | 950x |
nacompare <- nas_t != nas_c |
82 | 950x |
naselect <- nas_t | nas_c |
83 | 950x |
return_vector[naselect] <- nacompare[naselect] |
84 | ||
85 |
## compare non-missing values |
|
86 | 950x |
selectvector <- as.logical((!nas_t) * (!nas_c)) |
87 | ||
88 | 950x |
comparevect <- compare_vectors( |
89 | 950x |
target[selectvector], |
90 | 950x |
current[selectvector], |
91 |
... |
|
92 |
) |
|
93 | ||
94 | 950x |
return_vector[selectvector] <- comparevect |
95 | ||
96 | 950x |
return(return_vector) |
97 |
} |
|
98 | ||
99 | ||
100 | ||
101 | ||
102 | ||
103 | ||
104 | ||
105 |
#' compare_vectors.default |
|
106 |
#' |
|
107 |
#' Default method, if the vector is not numeric or factor. Basic comparison |
|
108 |
#' @param target the base vector |
|
109 |
#' @param current a vector to compare target to |
|
110 |
#' @param ... Additional arguments which might be passed through (numerical accuracy) |
|
111 |
#' @keywords internal |
|
112 |
compare_vectors.default <- function(target, current, ...) { |
|
113 | 408x |
target != current |
114 |
} |
|
115 | ||
116 | ||
117 | ||
118 | ||
119 |
#' compare_vectors.factor |
|
120 |
#' |
|
121 |
#' Compares factors. Sets them as character and then compares |
|
122 |
#' @param target the base vector |
|
123 |
#' @param current a vector to compare target to |
|
124 |
#' @param ... Additional arguments which might be passed through (numerical accuracy) |
|
125 |
#' @keywords internal |
|
126 |
compare_vectors.factor <- function(target, current, ...) { |
|
127 | 83x |
as.character(target) != as.character(current) |
128 |
} |
|
129 | ||
130 | ||
131 | ||
132 | ||
133 | ||
134 |
#' compare_vectors.numeric |
|
135 |
#' |
|
136 |
#' This is a modified version of the all.equal function |
|
137 |
#' which returns a vector rather than a message |
|
138 |
#' @param target the base vector |
|
139 |
#' @param current a vector to compare target to |
|
140 |
#' @param tolerance Level of tolerance for differences between two variables |
|
141 |
#' @param scale Scale that tolerance should be set on. If NULL assume absolute |
|
142 |
#' @param ... Not used |
|
143 |
#' @keywords internal |
|
144 |
compare_vectors.numeric <- function( |
|
145 |
target, |
|
146 |
current, |
|
147 |
tolerance = sqrt(.Machine$double.eps), |
|
148 |
scale = NULL, |
|
149 |
... |
|
150 |
) { |
|
151 | ||
152 | 459x |
out <- target == current |
153 | ||
154 | 459x |
if (all(out)) { |
155 | 433x |
return(!out) |
156 |
} |
|
157 | ||
158 | 26x |
if (is.integer(target) || is.integer(current)) { |
159 | 7x |
target <- as.double(target) |
160 | 7x |
current <- as.double(current) |
161 |
} |
|
162 | ||
163 | 26x |
xy <- abs(target - current) |
164 | ||
165 | 26x |
if (!is.null(scale)) { |
166 | 4x |
xy <- xy / scale |
167 |
} |
|
168 | ||
169 | 26x |
return(xy > tolerance) |
170 |
} |
|
171 | ||
172 |
#' compare_vectors.int64 |
|
173 |
#' |
|
174 |
#' Handle int64 vectors. Uses numeric comparison |
|
175 |
#' @param target the base vector |
|
176 |
#' @param current a vector to compare target to |
|
177 |
#' @param tolerance Level of tolerance for differences between two variables |
|
178 |
#' @param scale Scale that tolerance should be set on. If NULL assume absolute |
|
179 |
#' @param ... Not used |
|
180 |
#' @keywords internal |
|
181 |
compare_vectors.integer64 <- function( |
|
182 |
target, |
|
183 |
current, |
|
184 |
tolerance = sqrt(.Machine$double.eps), |
|
185 |
scale = NULL, |
|
186 |
... |
|
187 |
) { |
|
188 | 4x |
compare_vectors.numeric(target, current, tolerance, scale) |
189 |
} |
1 |
#' construct_issue |
|
2 |
#' |
|
3 |
#' Make an s3 object with class issue and possible additional class, |
|
4 |
#' and assign other arguments to attributes |
|
5 |
#' @param value the value of the object |
|
6 |
#' @param message the value of the message attribute |
|
7 |
#' @param add_class additional class to add |
|
8 |
#' @keywords internal |
|
9 |
construct_issue <- function(value, message, add_class = NULL) { |
|
10 | 2180x |
x <- value |
11 | ||
12 |
### If nothing has been provided return nothing ! |
|
13 | 2180x |
if (nrow(x) == 0) { |
14 | 1913x |
return(NULL) |
15 |
} |
|
16 | ||
17 | 267x |
class(x) <- c(add_class, "issue", class(x)) |
18 | 267x |
attributes(x)[["message"]] <- message |
19 | 267x |
return(x) |
20 |
} |
|
21 | ||
22 | ||
23 |
#' get_issue_message |
|
24 |
#' |
|
25 |
#' Simple function to grab the issue message |
|
26 |
#' @param object inputted object of class issue |
|
27 |
#' @param ... other arguments |
|
28 |
#' @keywords internal |
|
29 |
get_issue_message <- function(object, ...) { |
|
30 | 134x |
return(attr(object, "message")) |
31 |
} |
|
32 | ||
33 | ||
34 |
#' get_print_message |
|
35 |
#' |
|
36 |
#' Get the required text depending on type of issue |
|
37 |
#' @param object inputted object of class issue |
|
38 |
#' @param ... other arguments |
|
39 |
#' @keywords internal |
|
40 |
get_print_message <- function(object, ...) { |
|
41 | 37x |
UseMethod("get_print_message", object) |
42 |
} |
|
43 | ||
44 | ||
45 |
#' get_print_message.default |
|
46 |
#' |
|
47 |
#' Errors, as this should only ever be given an issue |
|
48 |
#' @param object issue |
|
49 |
#' @param ... Not used |
|
50 |
#' @keywords internal |
|
51 |
get_print_message.default <- function(object, ...) { |
|
52 | ! |
stop("Error: An issue has not been provided to this function!") |
53 |
} |
|
54 | ||
55 | ||
56 |
#' get_print_message.issue |
|
57 |
#' |
|
58 |
#' Get text from a basic issue, based on the class of the value of the issue |
|
59 |
#' |
|
60 |
#' @param object an object of class issue_basic |
|
61 |
#' @inheritParams print.diffdf |
|
62 |
#' @keywords internal |
|
63 |
get_print_message.issue <- function(object, row_limit, ...) { |
|
64 | 37x |
paste( |
65 | 37x |
c( |
66 | 37x |
attr(object, "message"), |
67 | 37x |
get_table(object, row_limit = row_limit), |
68 | 37x |
"\n\n" |
69 |
), |
|
70 | 37x |
collapse = "\n" |
71 |
) |
|
72 |
} |
1 |
#' Print diffdf objects |
|
2 |
#' |
|
3 |
#' Print nicely formatted version of an diffdf object |
|
4 |
#' @param x comparison object created by diffdf(). |
|
5 |
#' @param ... Additional arguments (not used) |
|
6 |
#' @param row_limit Max row limit for difference tables (NULL to show all rows) |
|
7 |
#' @param as_string Return printed message as an R character vector? |
|
8 |
#' @examples |
|
9 |
#' x <- subset(iris, -Species) |
|
10 |
#' x[1, 2] <- 5 |
|
11 |
#' COMPARE <- diffdf(iris, x) |
|
12 |
#' print(COMPARE) |
|
13 |
#' print(COMPARE, row_limit = 5) |
|
14 |
#' @export |
|
15 |
print.diffdf <- function(x, row_limit = 10, as_string = FALSE, ...) { |
|
16 | 25x |
if (!is.null(row_limit)) { |
17 | 24x |
assertthat::assert_that( |
18 | 24x |
assertthat::is.number(row_limit), |
19 | 24x |
row_limit > 0, |
20 | 24x |
msg = "row_limit must be a positive integer" |
21 |
) |
|
22 |
} |
|
23 | 21x |
assertthat::assert_that( |
24 | 21x |
assertthat::is.flag(as_string) |
25 |
) |
|
26 | 19x |
COMPARE <- x |
27 | ||
28 | 19x |
if (length(COMPARE) == 0) { |
29 | 6x |
outtext <- "No issues were found!\n" |
30 |
} else { |
|
31 | 13x |
start_text <- paste0("Differences found between the objects!\n\n") |
32 | 13x |
end_text <- lapply(COMPARE, function(x) get_print_message(x, row_limit)) |
33 | 13x |
end_text <- paste0(unlist(end_text), collapse = "") |
34 | 13x |
outtext <- paste0(start_text, end_text) |
35 |
} |
|
36 | 19x |
if (as_string) { |
37 | 7x |
return(strsplit(outtext, "\n")[[1]]) |
38 |
} else { |
|
39 | 12x |
cat(outtext) |
40 | 12x |
return(invisible(COMPARE)) |
41 |
} |
|
42 |
} |
1 |
#' Generate unique key name |
|
2 |
#' |
|
3 |
#' Function to generate a name for the keys if not provided |
|
4 |
#' |
|
5 |
#' @param BASE base dataset |
|
6 |
#' @param COMP comparison dataset |
|
7 |
#' @param replace_names a vector of replacement names. Used for recursion, should be edited in function for clarity |
|
8 |
#' |
|
9 |
#' @keywords internal |
|
10 |
generate_keyname <- function( |
|
11 |
BASE, |
|
12 |
COMP, |
|
13 |
replace_names = c("..ROWNUMBER..", "..RN..", "..ROWN..", "..N..") |
|
14 |
) { |
|
15 | 193x |
assertthat::assert_that( |
16 | 193x |
is(replace_names, "character"), |
17 | 193x |
msg = "replace_names is not a character vector" |
18 |
) |
|
19 | ||
20 | 193x |
assertthat::assert_that( |
21 | 193x |
length(replace_names) != 0, |
22 | 193x |
msg = "All default row names are in use in BASE/COMPARE. Please provide a KEY argument" |
23 |
) |
|
24 | ||
25 | ||
26 | 185x |
key_name <- replace_names[1] |
27 | ||
28 | 185x |
if (!is.null(BASE[[key_name]]) || !is.null(COMP[[key_name]])) { |
29 | 66x |
key_name <- generate_keyname(BASE, COMP, replace_names[-1]) |
30 |
} |
|
31 | 153x |
return(key_name) |
32 |
} |