From e04e67fcd096fcc39ec3f99ef3198d1aaab11c3a Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 3 Nov 2024 22:03:03 +0100 Subject: [PATCH 01/25] fix type coercion in bmerge --- R/bmerge.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/bmerge.R b/R/bmerge.R index aa7ea4103..82353bed0 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -71,7 +71,13 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xclass, iname, iclass) } if (xclass == iclass) { - if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xclass, xname) + if (length(icols)>1 && class(x[[xc]])=="Date" && class(i[[ic]]=="Date")) { + set(x, j=xc, value=as.double(x[[xc]])) + set(i, j=ic, value=as.double(i[[ic]])) + if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) + } else { + if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xclass, xname) + } next } if (xclass=="character" || iclass=="character" || From c55bfcc86f6eea12959e07c8d118287515d19650 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 3 Nov 2024 22:04:36 +0100 Subject: [PATCH 02/25] fix bracket --- R/bmerge.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/bmerge.R b/R/bmerge.R index 82353bed0..fba00669f 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -71,7 +71,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xclass, iname, iclass) } if (xclass == iclass) { - if (length(icols)>1 && class(x[[xc]])=="Date" && class(i[[ic]]=="Date")) { + if (length(icols)>1 && class(x[[xc]])=="Date" && class(i[[ic]])=="Date") { set(x, j=xc, value=as.double(x[[xc]])) set(i, j=ic, value=as.double(i[[ic]])) if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) From 272fc592f20ca3a0ea5e060b38c5078bd5673762 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 3 Nov 2024 22:24:16 +0100 Subject: [PATCH 03/25] add test cases --- inst/tests/tests.Rraw | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fef8f1fff..aa9caedd2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20596,3 +20596,12 @@ test(2295.3, is.data.table(d2)) # #6588: .checkTypos used to give arbitrary strings to stopf as the first argument test(2296, d2[x %no such operator% 1], error = '%no such operator%') + +# coerce Dates to double if join on multiple columns, #6602 +date_int = seq(as.Date('2015-01-01'), as.Date('2015-01-01'), by="day") +date_dbl = as.Date('2015-01-01') +x = data.table(a=date_int, b=1L) +y = data.table(c=date_int, d=date_dbl) + +test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a)], data.table(c=date_int, d=date_dbl, b=1L), output="No coercion needed.") +test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=date_dbl, d=date_dbl, b=1L), output="coercing to double.") From 3aba1cc45433c6ecb03899b2559e2ffa8662ae5e Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 3 Nov 2024 22:25:45 +0100 Subject: [PATCH 04/25] fix lint --- R/bmerge.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/bmerge.R b/R/bmerge.R index fba00669f..de1d5cedc 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -71,7 +71,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xclass, iname, iclass) } if (xclass == iclass) { - if (length(icols)>1 && class(x[[xc]])=="Date" && class(i[[ic]])=="Date") { + if (length(icols)>1 && is(x[[xc]], "Date") && is(i[[ic]], "Date")) { set(x, j=xc, value=as.double(x[[xc]])) set(i, j=ic, value=as.double(i[[ic]])) if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) From 1d7a8c2289e5b8b765ae48ef2129d78d3394216d Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 3 Nov 2024 22:39:12 +0100 Subject: [PATCH 05/25] fix old test case --- inst/tests/tests.Rraw | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aa9caedd2..43497c6f6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15727,7 +15727,8 @@ DT = data.table(z = 1i) test(2069.33, DT[DT, on = 'z'], error = "Type 'complex' is not supported for joining/merging") # forder verbose message when !isReallyReal Date, #1738 -DT = data.table(d=sample(seq(as.Date("2015-01-01"), as.Date("2015-01-05"), by="days"), 20, replace=TRUE)) +date_dbl = as.Date(as.double(seq(as.Date("2015-01-01"), as.Date("2015-01-05"), by="days")), origin="1970-01-01") +DT = data.table(d=sample(date_dbl, 20, replace=TRUE)) test(2070.01, typeof(DT$d), "double") test(2070.02, DT[, .N, keyby=d, verbose=TRUE], output="Column 1.*date.*8 byte double.*no fractions are present.*4 byte integer.*to save space and time") @@ -20604,4 +20605,4 @@ x = data.table(a=date_int, b=1L) y = data.table(c=date_int, d=date_dbl) test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a)], data.table(c=date_int, d=date_dbl, b=1L), output="No coercion needed.") -test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=date_dbl, d=date_dbl, b=1L), output="coercing to double.") +test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=date_int, d=date_int, b=1L), output="coercing to double.") From ff934ccb13325f76b6aabbfa085c975798dc9fcb Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 24 Nov 2024 15:58:04 +0100 Subject: [PATCH 06/25] rename x/i class --- R/bmerge.R | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index de1d5cedc..cd6e8ca41 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -41,26 +41,26 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # Note that if i is keyed, if this coerces i's key gets dropped by set() ic = icols[a] xc = xcols[a] - xclass = getClass(x[[xc]]) - iclass = getClass(i[[ic]]) + x_merge_type = getClass(x[[xc]]) + i_merge_type = getClass(i[[ic]]) xname = paste0("x.", names(x)[xc]) iname = paste0("i.", names(i)[ic]) - if (!xclass %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, xclass) - if (!iclass %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, iclass) - if (xclass=="factor" || iclass=="factor") { + if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) + if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) + if (x_merge_type=="factor" || i_merge_type=="factor") { if (roll!=0.0 && a==length(icols)) stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) - if (xclass=="factor" && iclass=="factor") { + if (x_merge_type=="factor" && i_merge_type=="factor") { if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values next } else { - if (xclass=="character") { + if (x_merge_type=="character") { if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) set(i, j=ic, value=val<-as.character(i[[ic]])) set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 next - } else if (iclass=="character") { + } else if (i_merge_type=="character") { if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 @@ -68,43 +68,43 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos next } } - stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xclass, iname, iclass) + stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) } - if (xclass == iclass) { + if (x_merge_type == i_merge_type) { if (length(icols)>1 && is(x[[xc]], "Date") && is(i[[ic]], "Date")) { set(x, j=xc, value=as.double(x[[xc]])) set(i, j=ic, value=as.double(i[[ic]])) if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) } else { - if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xclass, xname) + if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) } next } - if (xclass=="character" || iclass=="character" || - xclass=="logical" || iclass=="logical" || - xclass=="factor" || iclass=="factor") { + if (x_merge_type=="character" || i_merge_type=="character" || + x_merge_type=="logical" || i_merge_type=="logical" || + x_merge_type=="factor" || i_merge_type=="factor") { if (anyNA(i[[ic]]) && allNA(i[[ic]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, iclass, xclass, xname) - set(i, j=ic, value=match.fun(paste0("as.", xclass))(i[[ic]])) + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) + set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) next } else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, xclass, iclass, iname) - set(x, j=xc, value=match.fun(paste0("as.", iclass))(x[[xc]])) + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) + set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) next } - stopf("Incompatible join types: %s (%s) and %s (%s)", xname, xclass, iname, iclass) + stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) } - if (xclass=="integer64" || iclass=="integer64") { + if (x_merge_type=="integer64" || i_merge_type=="integer64") { nm = c(iname, xname) - if (xclass=="integer64") { w=i; wc=ic; wclass=iclass; } else { w=x; wc=xc; wclass=xclass; nm=rev(nm) } # w is which to coerce + if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) set(w, j=wc, value=bit64::as.integer64(w[[wc]])) } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) } else { # just integer and double left - if (iclass=="double") { + if (i_merge_type=="double") { if (!isReallyReal(i[[ic]])) { # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys # we've always coerced to int and returned int, for convenience. From a75fb0ede218460c98cb2c3757e2ce60b53ca0f7 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 24 Nov 2024 16:20:41 +0100 Subject: [PATCH 07/25] add minimal test --- inst/tests/tests.Rraw | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 43497c6f6..98e65e383 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20599,10 +20599,8 @@ test(2295.3, is.data.table(d2)) test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 -date_int = seq(as.Date('2015-01-01'), as.Date('2015-01-01'), by="day") -date_dbl = as.Date('2015-01-01') -x = data.table(a=date_int, b=1L) -y = data.table(c=date_int, d=date_dbl) +x = data.table(a=1L) +y = data.table(c=1L, d=2) +y[x, on=.(c == a, d == a)] -test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a)], data.table(c=date_int, d=date_dbl, b=1L), output="No coercion needed.") -test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=date_int, d=date_int, b=1L), output="coercing to double.") +test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=date_int, d=date_int, b=1L), output="coercing to double.") From 1bb60bf371a64569b860946072cc02cca5995c8c Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 24 Nov 2024 16:44:53 +0100 Subject: [PATCH 08/25] indent loop --- R/bmerge.R | 150 ++++++++++++++++++++++++++--------------------------- 1 file changed, 73 insertions(+), 77 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index cd6e8ca41..4460eb033 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -34,92 +34,88 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans } - if (nrow(i)) for (a in seq_along(icols)) { - # - check that join columns have compatible types - # - do type coercions if necessary on just the shallow local copies for the purpose of join - # - handle factor columns appropriately - # Note that if i is keyed, if this coerces i's key gets dropped by set() - ic = icols[a] - xc = xcols[a] - x_merge_type = getClass(x[[xc]]) - i_merge_type = getClass(i[[ic]]) - xname = paste0("x.", names(x)[xc]) - iname = paste0("i.", names(i)[ic]) - if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) - if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) - if (x_merge_type=="factor" || i_merge_type=="factor") { - if (roll!=0.0 && a==length(icols)) - stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) - if (x_merge_type=="factor" && i_merge_type=="factor") { - if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) - set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values - next - } else { - if (x_merge_type=="character") { - if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) - set(i, j=ic, value=val<-as.character(i[[ic]])) - set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 - next - } else if (i_merge_type=="character") { - if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) - newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) - if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 - set(i, j=ic, value=newvalue) + if (nrow(i)) { + for (a in seq_along(icols)) { + # - check that join columns have compatible types + # - do type coercions if necessary on just the shallow local copies for the purpose of join + # - handle factor columns appropriately + # Note that if i is keyed, if this coerces i's key gets dropped by set() + ic = icols[a] + xc = xcols[a] + x_merge_type = getClass(x[[xc]]) + i_merge_type = getClass(i[[ic]]) + xname = paste0("x.", names(x)[xc]) + iname = paste0("i.", names(i)[ic]) + if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) + if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) + if (x_merge_type=="factor" || i_merge_type=="factor") { + if (roll!=0.0 && a==length(icols)) + stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) + if (x_merge_type=="factor" && i_merge_type=="factor") { + if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) + set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values next + } else { + if (x_merge_type=="character") { + if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) + set(i, j=ic, value=val<-as.character(i[[ic]])) + set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 + next + } else if (i_merge_type=="character") { + if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) + newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) + if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 + set(i, j=ic, value=newvalue) + next + } } + stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) } - stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) - } - if (x_merge_type == i_merge_type) { - if (length(icols)>1 && is(x[[xc]], "Date") && is(i[[ic]], "Date")) { - set(x, j=xc, value=as.double(x[[xc]])) - set(i, j=ic, value=as.double(i[[ic]])) - if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) - } else { + if (x_merge_type == i_merge_type) { if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) - } - next - } - if (x_merge_type=="character" || i_merge_type=="character" || - x_merge_type=="logical" || i_merge_type=="logical" || - x_merge_type=="factor" || i_merge_type=="factor") { - if (anyNA(i[[ic]]) && allNA(i[[ic]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) - set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) next } - else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) - set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) - next + if (x_merge_type=="character" || i_merge_type=="character" || + x_merge_type=="logical" || i_merge_type=="logical" || + x_merge_type=="factor" || i_merge_type=="factor") { + if (anyNA(i[[ic]]) && allNA(i[[ic]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) + set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) + next + } + else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) + set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) + next + } + stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) } - stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) - } - if (x_merge_type=="integer64" || i_merge_type=="integer64") { - nm = c(iname, xname) - if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce - if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { - if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) - set(w, j=wc, value=bit64::as.integer64(w[[wc]])) - } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) - } else { - # just integer and double left - if (i_merge_type=="double") { - if (!isReallyReal(i[[ic]])) { - # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys - # we've always coerced to int and returned int, for convenience. - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = as.integer(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 - set(i, j=ic, value=val) - set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. + if (x_merge_type=="integer64" || i_merge_type=="integer64") { + nm = c(iname, xname) + if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce + if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { + if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) + set(w, j=wc, value=bit64::as.integer64(w[[wc]])) + } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) + } else { + # just integer and double left + if (i_merge_type=="double") { + if (!isReallyReal(i[[ic]])) { + # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys + # we've always coerced to int and returned int, for convenience. + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) + val = as.integer(i[[ic]]) + if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + set(i, j=ic, value=val) + set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. + } else { + if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) + set(x, j=xc, value=as.double(x[[xc]])) + } } else { - if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) - set(x, j=xc, value=as.double(x[[xc]])) + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) + set(i, j=ic, value=as.double(i[[ic]])) } - } else { - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - set(i, j=ic, value=as.double(i[[ic]])) } } } From 7ee12aa69a8757e40c981239be1cd181f293b272 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 24 Nov 2024 17:25:39 +0100 Subject: [PATCH 09/25] add fix in one direction --- R/bmerge.R | 22 +++++++++++++++++----- inst/tests/tests.Rraw | 4 ++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 4460eb033..f7eafd047 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -34,7 +34,11 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans } - if (nrow(i)) { + if (nrow(i)) { + x_merge_types = vapply_1c(x[0L, ..xcols], getClass) + i_merge_types = vapply_1c(x[0L, ..icols], getClass) + xnames = paste0("x.", names(x)[xcols]) + inames = paste0("i.", names(i)[icols]) for (a in seq_along(icols)) { # - check that join columns have compatible types # - do type coercions if necessary on just the shallow local copies for the purpose of join @@ -42,10 +46,10 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # Note that if i is keyed, if this coerces i's key gets dropped by set() ic = icols[a] xc = xcols[a] - x_merge_type = getClass(x[[xc]]) - i_merge_type = getClass(i[[ic]]) - xname = paste0("x.", names(x)[xc]) - iname = paste0("i.", names(i)[ic]) + x_merge_type = x_merge_types[a] + i_merge_type = i_merge_types[a] + xname = xnames[a] + iname = inames[a] if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) if (x_merge_type=="factor" || i_merge_type=="factor") { @@ -115,6 +119,14 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } else { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) set(i, j=ic, value=as.double(i[[ic]])) + ic_idx = which(ic == icols) + if (length(ic_idx)>1) { + for (b in which(x_merge_types[ic_idx] != "double")) { + xb = xcols[b] + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) + set(x, j=xb, value=as.double(x[[xb]])) + } + } } } } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 98e65e383..77a8cf967 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20601,6 +20601,6 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 x = data.table(a=1L) y = data.table(c=1L, d=2) -y[x, on=.(c == a, d == a)] -test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=date_int, d=date_int, b=1L), output="coercing to double.") +test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") +test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") From 562a9fd7972cb87aa21555a8b6ee251970deafa6 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 22:06:43 +0100 Subject: [PATCH 10/25] remove indent to cater for diff --- R/bmerge.R | 177 ++++++++++++++++++++++++++--------------------------- 1 file changed, 87 insertions(+), 90 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index f7eafd047..7505f1526 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -35,102 +35,99 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } if (nrow(i)) { - x_merge_types = vapply_1c(x[0L, ..xcols], getClass) - i_merge_types = vapply_1c(x[0L, ..icols], getClass) - xnames = paste0("x.", names(x)[xcols]) - inames = paste0("i.", names(i)[icols]) - for (a in seq_along(icols)) { - # - check that join columns have compatible types - # - do type coercions if necessary on just the shallow local copies for the purpose of join - # - handle factor columns appropriately - # Note that if i is keyed, if this coerces i's key gets dropped by set() - ic = icols[a] - xc = xcols[a] - x_merge_type = x_merge_types[a] - i_merge_type = i_merge_types[a] - xname = xnames[a] - iname = inames[a] - if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) - if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) - if (x_merge_type=="factor" || i_merge_type=="factor") { - if (roll!=0.0 && a==length(icols)) - stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) - if (x_merge_type=="factor" && i_merge_type=="factor") { - if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) - set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values - next - } else { - if (x_merge_type=="character") { - if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) - set(i, j=ic, value=val<-as.character(i[[ic]])) - set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 - next - } else if (i_merge_type=="character") { - if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) - newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) - if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 - set(i, j=ic, value=newvalue) - next - } - } - stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) - } - if (x_merge_type == i_merge_type) { - if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) + xhead = x[0, ..xcols] + ihead = i[0, ..icols] + xtypes = vapply_1c(xhead, getClass) + itypes = vapply_1c(ihead, getClass) + for (a in seq_along(icols)) { + # - check that join columns have compatible types + # - do type coercions if necessary on just the shallow local copies for the purpose of join + # - handle factor columns appropriately + # Note that if i is keyed, if this coerces i's key gets dropped by set() + ic = icols[a] + xc = xcols[a] + xtype = xtypes[a] + itype = itypes[a] + xname = paste0("x.", names(xhead)[a]) + iname = paste0("i.", names(ihead)[a]) + if (!xtype %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, xtype) + if (!itype %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, itype) + if (xtype=="factor" || itype=="factor") { + if (roll!=0.0 && a==length(icols)) + stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) + if (xtype=="factor" && itype=="factor") { + if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) + set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values + next + } else { + if (xtype=="character") { + if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) + set(i, j=ic, value=val<-as.character(i[[ic]])) + set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 + next + } else if (itype=="character") { + if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) + newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) + if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 + set(i, j=ic, value=newvalue) next } - if (x_merge_type=="character" || i_merge_type=="character" || - x_merge_type=="logical" || i_merge_type=="logical" || - x_merge_type=="factor" || i_merge_type=="factor") { - if (anyNA(i[[ic]]) && allNA(i[[ic]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) - set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) - next - } - else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) - set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) - next - } - stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) - } - if (x_merge_type=="integer64" || i_merge_type=="integer64") { - nm = c(iname, xname) - if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce - if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { - if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) - set(w, j=wc, value=bit64::as.integer64(w[[wc]])) - } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) + } + stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xtype, iname, itype) + } + if (xtype == itype) { + if (anyDuplicated(icols) && !all() && duplicated(icols, fromLast=TRUE)[a]) { + set(x, j=xc, value=as.double(x[[xc]])) + set(i, j=ic, value=as.double(i[[ic]])) + if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) + } else { + if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xtype, xname) + } + next + } + if (xtype=="character" || itype=="character" || + xtype=="logical" || itype=="logical" || + xtype=="factor" || itype=="factor") { + if (anyNA(i[[ic]]) && allNA(i[[ic]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, itype, xtype, xname) + set(i, j=ic, value=match.fun(paste0("as.", xtype))(i[[ic]])) + next + } + else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, xtype, itype, iname) + set(x, j=xc, value=match.fun(paste0("as.", itype))(x[[xc]])) + next + } + stopf("Incompatible join types: %s (%s) and %s (%s)", xname, xtype, iname, itype) + } + if (xtype=="integer64" || itype=="integer64") { + nm = c(iname, xname) + if (xtype=="integer64") { w=i; wc=ic; wclass=itype; } else { w=x; wc=xc; wclass=xtype; nm=rev(nm) } # w is which to coerce + if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { + if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) + set(w, j=wc, value=bit64::as.integer64(w[[wc]])) + } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) + } else { + # just integer and double left + if (itype=="double") { + if (!isReallyReal(i[[ic]])) { + # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys + # we've always coerced to int and returned int, for convenience. + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) + val = as.integer(i[[ic]]) + if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + set(i, j=ic, value=val) + set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. } else { - # just integer and double left - if (i_merge_type=="double") { - if (!isReallyReal(i[[ic]])) { - # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys - # we've always coerced to int and returned int, for convenience. - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = as.integer(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 - set(i, j=ic, value=val) - set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. - } else { - if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) - set(x, j=xc, value=as.double(x[[xc]])) - } - } else { - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - set(i, j=ic, value=as.double(i[[ic]])) - ic_idx = which(ic == icols) - if (length(ic_idx)>1) { - for (b in which(x_merge_types[ic_idx] != "double")) { - xb = xcols[b] - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) - set(x, j=xb, value=as.double(x[[xb]])) - } - } - } + if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) + set(x, j=xc, value=as.double(x[[xc]])) } + } else { + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) + set(i, j=ic, value=as.double(i[[ic]])) } } +}} ## after all modifications of x, check if x has a proper key on all xcols. ## If not, calculate the order. Also for non-equi joins, the order must be calculated. From 49f849befdb49c4ae302b49671fc175f7c0a499d Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 22:11:36 +0100 Subject: [PATCH 11/25] Revert "remove indent to cater for diff" This reverts commit 562a9fd7972cb87aa21555a8b6ee251970deafa6. --- R/bmerge.R | 177 +++++++++++++++++++++++++++-------------------------- 1 file changed, 90 insertions(+), 87 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 7505f1526..f7eafd047 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -35,99 +35,102 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } if (nrow(i)) { - xhead = x[0, ..xcols] - ihead = i[0, ..icols] - xtypes = vapply_1c(xhead, getClass) - itypes = vapply_1c(ihead, getClass) - for (a in seq_along(icols)) { - # - check that join columns have compatible types - # - do type coercions if necessary on just the shallow local copies for the purpose of join - # - handle factor columns appropriately - # Note that if i is keyed, if this coerces i's key gets dropped by set() - ic = icols[a] - xc = xcols[a] - xtype = xtypes[a] - itype = itypes[a] - xname = paste0("x.", names(xhead)[a]) - iname = paste0("i.", names(ihead)[a]) - if (!xtype %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, xtype) - if (!itype %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, itype) - if (xtype=="factor" || itype=="factor") { - if (roll!=0.0 && a==length(icols)) - stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) - if (xtype=="factor" && itype=="factor") { - if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) - set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values - next - } else { - if (xtype=="character") { - if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) - set(i, j=ic, value=val<-as.character(i[[ic]])) - set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 - next - } else if (itype=="character") { - if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) - newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) - if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 - set(i, j=ic, value=newvalue) + x_merge_types = vapply_1c(x[0L, ..xcols], getClass) + i_merge_types = vapply_1c(x[0L, ..icols], getClass) + xnames = paste0("x.", names(x)[xcols]) + inames = paste0("i.", names(i)[icols]) + for (a in seq_along(icols)) { + # - check that join columns have compatible types + # - do type coercions if necessary on just the shallow local copies for the purpose of join + # - handle factor columns appropriately + # Note that if i is keyed, if this coerces i's key gets dropped by set() + ic = icols[a] + xc = xcols[a] + x_merge_type = x_merge_types[a] + i_merge_type = i_merge_types[a] + xname = xnames[a] + iname = inames[a] + if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) + if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) + if (x_merge_type=="factor" || i_merge_type=="factor") { + if (roll!=0.0 && a==length(icols)) + stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) + if (x_merge_type=="factor" && i_merge_type=="factor") { + if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) + set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values + next + } else { + if (x_merge_type=="character") { + if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) + set(i, j=ic, value=val<-as.character(i[[ic]])) + set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 + next + } else if (i_merge_type=="character") { + if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) + newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) + if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 + set(i, j=ic, value=newvalue) + next + } + } + stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) + } + if (x_merge_type == i_merge_type) { + if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) next } - } - stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xtype, iname, itype) - } - if (xtype == itype) { - if (anyDuplicated(icols) && !all() && duplicated(icols, fromLast=TRUE)[a]) { - set(x, j=xc, value=as.double(x[[xc]])) - set(i, j=ic, value=as.double(i[[ic]])) - if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) - } else { - if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xtype, xname) - } - next - } - if (xtype=="character" || itype=="character" || - xtype=="logical" || itype=="logical" || - xtype=="factor" || itype=="factor") { - if (anyNA(i[[ic]]) && allNA(i[[ic]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, itype, xtype, xname) - set(i, j=ic, value=match.fun(paste0("as.", xtype))(i[[ic]])) - next - } - else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, xtype, itype, iname) - set(x, j=xc, value=match.fun(paste0("as.", itype))(x[[xc]])) - next - } - stopf("Incompatible join types: %s (%s) and %s (%s)", xname, xtype, iname, itype) - } - if (xtype=="integer64" || itype=="integer64") { - nm = c(iname, xname) - if (xtype=="integer64") { w=i; wc=ic; wclass=itype; } else { w=x; wc=xc; wclass=xtype; nm=rev(nm) } # w is which to coerce - if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { - if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) - set(w, j=wc, value=bit64::as.integer64(w[[wc]])) - } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) - } else { - # just integer and double left - if (itype=="double") { - if (!isReallyReal(i[[ic]])) { - # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys - # we've always coerced to int and returned int, for convenience. - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = as.integer(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 - set(i, j=ic, value=val) - set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. + if (x_merge_type=="character" || i_merge_type=="character" || + x_merge_type=="logical" || i_merge_type=="logical" || + x_merge_type=="factor" || i_merge_type=="factor") { + if (anyNA(i[[ic]]) && allNA(i[[ic]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) + set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) + next + } + else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) + set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) + next + } + stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) + } + if (x_merge_type=="integer64" || i_merge_type=="integer64") { + nm = c(iname, xname) + if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce + if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { + if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) + set(w, j=wc, value=bit64::as.integer64(w[[wc]])) + } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) } else { - if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) - set(x, j=xc, value=as.double(x[[xc]])) + # just integer and double left + if (i_merge_type=="double") { + if (!isReallyReal(i[[ic]])) { + # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys + # we've always coerced to int and returned int, for convenience. + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) + val = as.integer(i[[ic]]) + if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + set(i, j=ic, value=val) + set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. + } else { + if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) + set(x, j=xc, value=as.double(x[[xc]])) + } + } else { + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) + set(i, j=ic, value=as.double(i[[ic]])) + ic_idx = which(ic == icols) + if (length(ic_idx)>1) { + for (b in which(x_merge_types[ic_idx] != "double")) { + xb = xcols[b] + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) + set(x, j=xb, value=as.double(x[[xb]])) + } + } + } } - } else { - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - set(i, j=ic, value=as.double(i[[ic]])) } } -}} ## after all modifications of x, check if x has a proper key on all xcols. ## If not, calculate the order. Also for non-equi joins, the order must be calculated. From cfa5d7134b467ca66679d3e566ed6ddff8e1e327 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 22:13:02 +0100 Subject: [PATCH 12/25] remove indent --- R/bmerge.R | 160 ++++++++++++++++++++++++++--------------------------- 1 file changed, 80 insertions(+), 80 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index f7eafd047..65d532170 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -40,96 +40,96 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos xnames = paste0("x.", names(x)[xcols]) inames = paste0("i.", names(i)[icols]) for (a in seq_along(icols)) { - # - check that join columns have compatible types - # - do type coercions if necessary on just the shallow local copies for the purpose of join - # - handle factor columns appropriately - # Note that if i is keyed, if this coerces i's key gets dropped by set() - ic = icols[a] - xc = xcols[a] - x_merge_type = x_merge_types[a] - i_merge_type = i_merge_types[a] - xname = xnames[a] - iname = inames[a] - if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) - if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) - if (x_merge_type=="factor" || i_merge_type=="factor") { - if (roll!=0.0 && a==length(icols)) - stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) - if (x_merge_type=="factor" && i_merge_type=="factor") { - if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) - set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values + # - check that join columns have compatible types + # - do type coercions if necessary on just the shallow local copies for the purpose of join + # - handle factor columns appropriately + # Note that if i is keyed, if this coerces i's key gets dropped by set() + ic = icols[a] + xc = xcols[a] + x_merge_type = x_merge_types[a] + i_merge_type = i_merge_types[a] + xname = xnames[a] + iname = inames[a] + if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) + if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) + if (x_merge_type=="factor" || i_merge_type=="factor") { + if (roll!=0.0 && a==length(icols)) + stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) + if (x_merge_type=="factor" && i_merge_type=="factor") { + if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) + set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values + next + } else { + if (x_merge_type=="character") { + if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) + set(i, j=ic, value=val<-as.character(i[[ic]])) + set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 + next + } else if (i_merge_type=="character") { + if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) + newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) + if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 + set(i, j=ic, value=newvalue) next - } else { - if (x_merge_type=="character") { - if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) - set(i, j=ic, value=val<-as.character(i[[ic]])) - set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 - next - } else if (i_merge_type=="character") { - if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) - newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) - if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 - set(i, j=ic, value=newvalue) - next - } } - stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) } - if (x_merge_type == i_merge_type) { - if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) + stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) + } + if (x_merge_type == i_merge_type) { + if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) + next + } + if (x_merge_type=="character" || i_merge_type=="character" || + x_merge_type=="logical" || i_merge_type=="logical" || + x_merge_type=="factor" || i_merge_type=="factor") { + if (anyNA(i[[ic]]) && allNA(i[[ic]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) + set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) next } - if (x_merge_type=="character" || i_merge_type=="character" || - x_merge_type=="logical" || i_merge_type=="logical" || - x_merge_type=="factor" || i_merge_type=="factor") { - if (anyNA(i[[ic]]) && allNA(i[[ic]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) - set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) - next - } - else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) - set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) - next - } - stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) + else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) + set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) + next } - if (x_merge_type=="integer64" || i_merge_type=="integer64") { - nm = c(iname, xname) - if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce - if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { - if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) - set(w, j=wc, value=bit64::as.integer64(w[[wc]])) - } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) - } else { - # just integer and double left - if (i_merge_type=="double") { - if (!isReallyReal(i[[ic]])) { - # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys - # we've always coerced to int and returned int, for convenience. - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = as.integer(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 - set(i, j=ic, value=val) - set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. - } else { - if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) - set(x, j=xc, value=as.double(x[[xc]])) - } + stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) + } + if (x_merge_type=="integer64" || i_merge_type=="integer64") { + nm = c(iname, xname) + if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce + if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { + if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) + set(w, j=wc, value=bit64::as.integer64(w[[wc]])) + } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) + } else { + # just integer and double left + if (i_merge_type=="double") { + if (!isReallyReal(i[[ic]])) { + # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys + # we've always coerced to int and returned int, for convenience. + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) + val = as.integer(i[[ic]]) + if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + set(i, j=ic, value=val) + set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. } else { - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - set(i, j=ic, value=as.double(i[[ic]])) - ic_idx = which(ic == icols) - if (length(ic_idx)>1) { - for (b in which(x_merge_types[ic_idx] != "double")) { - xb = xcols[b] - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) - set(x, j=xb, value=as.double(x[[xb]])) - } + if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) + set(x, j=xc, value=as.double(x[[xc]])) + } + } else { + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) + set(i, j=ic, value=as.double(i[[ic]])) + ic_idx = which(ic == icols) + if (length(ic_idx)>1) { + for (b in which(x_merge_types[ic_idx] != "double")) { + xb = xcols[b] + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) + set(x, j=xb, value=as.double(x[[xb]])) } } } - } + } + } } ## after all modifications of x, check if x has a proper key on all xcols. From 6b8f11fed6a895862547a97dee8604c3de9fb8b3 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 23:01:40 +0100 Subject: [PATCH 13/25] add 2nd case --- R/bmerge.R | 43 ++++++++++++++++++++++++++++++------------- inst/tests/tests.Rraw | 5 ++++- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 65d532170..1ced26ef3 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -103,29 +103,46 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) } else { # just integer and double left + ic_idx = which(ic == icols) if (i_merge_type=="double") { + coerce_x = FALSE if (!isReallyReal(i[[ic]])) { # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys # we've always coerced to int and returned int, for convenience. - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = as.integer(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 - set(i, j=ic, value=val) - set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. - } else { + for (b in which(x_merge_types[ic_idx] == "double")) { + xb = xcols[b] + if (isReallyReal(x[[xb]])) { + coerce_x = TRUE + break + } + } + if (!coerce_x) { + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) + val = as.integer(i[[ic]]) + if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + set(i, j=ic, value=val) + set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. + for (b in which(x_merge_types[ic_idx] == "double")) { + xb = xcols[b] + val = as.integer(x[[xb]]) + if (!is.null(attributes(x[[xb]]))) attributes(val) = attributes(x[[xb]]) + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", xnames[b], xname) + set(x, j=xb, value=val) + } + } + } + if (coerce_x) { + ic_idx = which() if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) set(x, j=xc, value=as.double(x[[xc]])) } } else { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) set(i, j=ic, value=as.double(i[[ic]])) - ic_idx = which(ic == icols) - if (length(ic_idx)>1) { - for (b in which(x_merge_types[ic_idx] != "double")) { - xb = xcols[b] - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) - set(x, j=xb, value=as.double(x[[xb]])) - } + for (b in which(x_merge_types[ic_idx] == "integer")) { + xb = xcols[b] + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) + set(x, j=xb, value=as.double(x[[xb]])) } } } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 77a8cf967..46c56e19a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20601,6 +20601,9 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 x = data.table(a=1L) y = data.table(c=1L, d=2) - test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") +x = data.table(a=1) +y = data.table(c=1, d=2L) +test(2297.3, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.4, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") From a651b03bb606310dceb50c0c0b0f474a3825cb16 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 23:05:18 +0100 Subject: [PATCH 14/25] remove trailing ws --- R/bmerge.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 1ced26ef3..7f543ade4 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -110,7 +110,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys # we've always coerced to int and returned int, for convenience. for (b in which(x_merge_types[ic_idx] == "double")) { - xb = xcols[b] + xb = xcols[b] if (isReallyReal(x[[xb]])) { coerce_x = TRUE break @@ -145,7 +145,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(x, j=xb, value=as.double(x[[xb]])) } } - } + } } } From 839463eebded28ab9943537041c7071432894cf1 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 23:28:58 +0100 Subject: [PATCH 15/25] update all cases --- R/bmerge.R | 55 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 7f543ade4..652ddd236 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -34,22 +34,17 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans } - if (nrow(i)) { - x_merge_types = vapply_1c(x[0L, ..xcols], getClass) - i_merge_types = vapply_1c(x[0L, ..icols], getClass) - xnames = paste0("x.", names(x)[xcols]) - inames = paste0("i.", names(i)[icols]) - for (a in seq_along(icols)) { + if (nrow(i)) for (a in seq_along(icols)) { # - check that join columns have compatible types # - do type coercions if necessary on just the shallow local copies for the purpose of join # - handle factor columns appropriately # Note that if i is keyed, if this coerces i's key gets dropped by set() ic = icols[a] xc = xcols[a] - x_merge_type = x_merge_types[a] - i_merge_type = i_merge_types[a] - xname = xnames[a] - iname = inames[a] + x_merge_type = getClass(x[[xc]]) + i_merge_type = getClass(i[[ic]]) + xname = paste0("x.", names(x)[xc]) + iname = paste0("i.", names(i)[ic]) if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) if (x_merge_type=="factor" || i_merge_type=="factor") { @@ -109,11 +104,14 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos if (!isReallyReal(i[[ic]])) { # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys # we've always coerced to int and returned int, for convenience. - for (b in which(x_merge_types[ic_idx] == "double")) { - xb = xcols[b] - if (isReallyReal(x[[xb]])) { - coerce_x = TRUE - break + if (length(ic_idx)>1L) { + xc_idx = xcols[ic_idx] + for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { + xb = xcols[b] + if (isReallyReal(x[[xb]])) { + coerce_x = TRUE + break + } } } if (!coerce_x) { @@ -122,12 +120,15 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 set(i, j=ic, value=val) set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. - for (b in which(x_merge_types[ic_idx] == "double")) { - xb = xcols[b] - val = as.integer(x[[xb]]) - if (!is.null(attributes(x[[xb]]))) attributes(val) = attributes(x[[xb]]) - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", xnames[b], xname) - set(x, j=xb, value=val) + if (length(ic_idx)>1L) { + xc_idx = xcols[ic_idx] + for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { + xb = xcols[b] + val = as.integer(x[[xb]]) + if (!is.null(attributes(x[[xb]]))) attributes(val) = attributes(x[[xb]]) + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", paste0("x.", names(x)[xb]), xname) + set(x, j=xb, value=val) + } } } } @@ -139,15 +140,17 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } else { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) set(i, j=ic, value=as.double(i[[ic]])) - for (b in which(x_merge_types[ic_idx] == "integer")) { - xb = xcols[b] - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) - set(x, j=xb, value=as.double(x[[xb]])) + if (length(ic_idx)>1L) { + xc_idx = xcols[ic_idx] + for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "integer")) { + xb = xcols[b] + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) + set(x, j=xb, value=as.double(x[[xb]])) + } } } } } - } ## after all modifications of x, check if x has a proper key on all xcols. ## If not, calculate the order. Also for non-equi joins, the order must be calculated. From 935ac607e028206a81bfb781008ffff225829174 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 23:41:40 +0100 Subject: [PATCH 16/25] fix typo --- R/bmerge.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 652ddd236..658ff53ec 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -102,19 +102,20 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos if (i_merge_type=="double") { coerce_x = FALSE if (!isReallyReal(i[[ic]])) { + coerce_x = TRUE # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys # we've always coerced to int and returned int, for convenience. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { xb = xcols[b] - if (isReallyReal(x[[xb]])) { - coerce_x = TRUE + if (!isReallyReal(x[[xb]])) { + coerce_x = FALSE break } } } - if (!coerce_x) { + if (coerce_x) { if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) val = as.integer(i[[ic]]) if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 @@ -132,8 +133,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } } - if (coerce_x) { - ic_idx = which() + if (!coerce_x) { if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) set(x, j=xc, value=as.double(x[[xc]])) } From f1997e0f531df5a368d2819fb36e2b8c97608852 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 23:57:58 +0100 Subject: [PATCH 17/25] fix test cases --- inst/tests/tests.Rraw | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 46c56e19a..8d5a98633 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20600,10 +20600,10 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 x = data.table(a=1L) -y = data.table(c=1L, d=2) +y = data.table(c=1L, d=1) test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") x = data.table(a=1) -y = data.table(c=1, d=2L) -test(2297.3, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") -test(2297.4, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +y = data.table(c=1, d=1L) +test(2297.3, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1, d=1), output="Coercing .*d to type double") +test(2297.4, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1, d=1), output="Coercing .*d to type double") From 25eaa659689ec765e176bcc357235d98fab05ee2 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Wed, 27 Nov 2024 00:04:10 +0100 Subject: [PATCH 18/25] update testcases --- inst/tests/tests.Rraw | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8d5a98633..71c43aa67 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20599,11 +20599,21 @@ test(2295.3, is.data.table(d2)) test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 +d_int = .Date(1L) +d_dbl = .Date(1) x = data.table(a=1L) y = data.table(c=1L, d=1) -test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") -test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") +test(2297.01, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") +test(2297.02, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") +x = data.table(a=d_int) +y = data.table(c=d_int, d=d_dbl) +test(2297.03, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int), output="Coercing .*c to type double") +test(2297.04, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int), output="Coercing .*c to type double") x = data.table(a=1) y = data.table(c=1, d=1L) -test(2297.3, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1, d=1), output="Coercing .*d to type double") -test(2297.4, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1, d=1), output="Coercing .*d to type double") +test(2297.11, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1, d=1), output="Coercing .*d to type double") +test(2297.12, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1, d=1), output="Coercing .*d to type double") +x = data.table(a=d_dbl) +y = data.table(c=d_dbl, d=d_int) +test(2297.13, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=d_dbl, d=d_dbl), output="Coercing .*d to type double") +test(2297.14, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_dbl, d=d_dbl), output="Coercing .*d to type double") From f0219c4a77017c683426c45dfbe127c0c8ff328f Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Wed, 27 Nov 2024 00:08:40 +0100 Subject: [PATCH 19/25] update copying attributes from int to dbl --- R/bmerge.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 658ff53ec..fbe1408c5 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -139,13 +139,17 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } else { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - set(i, j=ic, value=as.double(i[[ic]])) + val = as.double(i[[ic]]) + if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + set(i, j=ic, value=val) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "integer")) { xb = xcols[b] + val = as.double(x[[xb]]) + if (!is.null(attributes(x[[xb]]))) attributes(val) = attributes(x[[xb]]) if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) - set(x, j=xb, value=as.double(x[[xb]])) + set(x, j=xb, value=val) } } } From ca6756f7ca3faffcf4064a7f4c3095e6980f0b85 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 09:52:46 +0100 Subject: [PATCH 20/25] start modularize --- R/bmerge.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index fbe1408c5..571a18ec7 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -34,6 +34,12 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans } + cast_with_atts = function(x, f) { + ans = f(x) + if (!is.null(attributes(x))) attributes(ans) = attributes(x) + ans + } + if (nrow(i)) for (a in seq_along(icols)) { # - check that join columns have compatible types # - do type coercions if necessary on just the shallow local copies for the purpose of join @@ -117,18 +123,15 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } if (coerce_x) { if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = as.integer(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + val = cast_with_atts(i[[ic]], as.integer) # to retain Date for example; 3679 set(i, j=ic, value=val) set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { xb = xcols[b] - val = as.integer(x[[xb]]) - if (!is.null(attributes(x[[xb]]))) attributes(val) = attributes(x[[xb]]) if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", paste0("x.", names(x)[xb]), xname) - set(x, j=xb, value=val) + set(x, j=xb, value=cast_with_atts(x[[xb]], as.integer)) } } } @@ -139,17 +142,14 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } else { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - val = as.double(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + val = cast_with_atts(i[[ic]], as.double) set(i, j=ic, value=val) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "integer")) { xb = xcols[b] - val = as.double(x[[xb]]) - if (!is.null(attributes(x[[xb]]))) attributes(val) = attributes(x[[xb]]) if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) - set(x, j=xb, value=val) + set(x, j=xb, value=cast_with_atts(x[[xb]], as.double)) } } } From d41bbf26f0fd4a7deebb1b83ee2bcac31b20bf8f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 10:08:12 +0100 Subject: [PATCH 21/25] fix cases --- R/bmerge.R | 4 ++-- inst/tests/tests.Rraw | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 571a18ec7..b6e0f4afb 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -88,7 +88,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) next } - else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { + if (anyNA(x[[xc]]) && allNA(x[[xc]])) { if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) next @@ -115,7 +115,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos xc_idx = xcols[ic_idx] for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { xb = xcols[b] - if (!isReallyReal(x[[xb]])) { + if (isReallyReal(x[[xb]])) { coerce_x = FALSE break } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 71c43aa67..f77c3fbcb 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20611,9 +20611,9 @@ test(2297.03, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], dat test(2297.04, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int), output="Coercing .*c to type double") x = data.table(a=1) y = data.table(c=1, d=1L) -test(2297.11, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1, d=1), output="Coercing .*d to type double") -test(2297.12, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1, d=1), output="Coercing .*d to type double") +test(2297.11, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.12, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") x = data.table(a=d_dbl) y = data.table(c=d_dbl, d=d_int) -test(2297.13, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=d_dbl, d=d_dbl), output="Coercing .*d to type double") -test(2297.14, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_dbl, d=d_dbl), output="Coercing .*d to type double") +test(2297.13, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.14, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int), output="Coercing double column x.c (which contains no fractions) to type integer") From 33d700916da2cfc4a4b4724e307c5070de37b18a Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 11:01:49 +0100 Subject: [PATCH 22/25] ensure same types for test --- R/bmerge.R | 6 +++--- inst/tests/tests.Rraw | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index b6e0f4afb..d24d8cb94 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -113,7 +113,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # we've always coerced to int and returned int, for convenience. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { + for (b in which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")) { xb = xcols[b] if (isReallyReal(x[[xb]])) { coerce_x = FALSE @@ -128,7 +128,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { + for (b in which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")) { xb = xcols[b] if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", paste0("x.", names(x)[xb]), xname) set(x, j=xb, value=cast_with_atts(x[[xb]], as.integer)) @@ -146,7 +146,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(i, j=ic, value=val) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "integer")) { + for (b in which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")) { xb = xcols[b] if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) set(x, j=xb, value=cast_with_atts(x[[xb]], as.double)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f77c3fbcb..2ff7d7504 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12226,10 +12226,11 @@ DT1 = data.table(RANDOM_STRING = rand_strings(n), DATE = sample(seq(as.Date('2016-01-01'), as.Date('2016-12-31'), by="day"), n, replace=TRUE)) DT2 = data.table(RANDOM_STRING = rand_strings(n), START_DATE = sample(seq(as.Date('2015-01-01'), as.Date('2017-12-31'), by="day"), n, replace=TRUE)) +as.intDate = function(x) .Date(as.integer(as.Date(x))) DT2[, EXPIRY_DATE := START_DATE + floor(runif(1000, 200,300))] -DT1[, DT1_ID := .I][, DATE := as.Date(DATE)] +DT1[, DT1_ID := .I][, DATE := as.intDate(DATE)] cols = c("START_DATE", "EXPIRY_DATE") -DT2[, DT2_ID := .I][, (cols) := lapply(.SD, as.Date), .SDcols=cols] +DT2[, DT2_ID := .I][, (cols) := lapply(.SD, as.intDate), .SDcols=cols] ans1 = DT2[DT1, on=.(RANDOM_STRING, START_DATE <= DATE, EXPIRY_DATE >= DATE), .N, by=.EACHI ]$N > 0L tmp = DT1[DT2, on=.(RANDOM_STRING, DATE >= START_DATE, DATE <= EXPIRY_DATE), which=TRUE, nomatch=0L] ans2 = DT1[, DT1_ID %in% tmp] From 91c53c56100efddf758a89707bc42ce2e3bb1480 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 11:13:50 +0100 Subject: [PATCH 23/25] add test for codecov --- inst/tests/tests.Rraw | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2ff7d7504..6dbfa66a5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20600,21 +20600,27 @@ test(2295.3, is.data.table(d2)) test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 -d_int = .Date(1L) -d_dbl = .Date(1) x = data.table(a=1L) y = data.table(c=1L, d=1) test(2297.01, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") test(2297.02, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") -x = data.table(a=d_int) -y = data.table(c=d_int, d=d_dbl) -test(2297.03, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int), output="Coercing .*c to type double") -test(2297.04, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int), output="Coercing .*c to type double") x = data.table(a=1) y = data.table(c=1, d=1L) -test(2297.11, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") -test(2297.12, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.03, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.04, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +# dates +d_int = .Date(1L) +d_dbl = .Date(1) +x = data.table(a=d_int) +y = data.table(c=d_int, d=d_dbl) +test(2297.11, y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int)) +test(2297.12, y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int)) x = data.table(a=d_dbl) y = data.table(c=d_dbl, d=d_int) -test(2297.13, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int), output="Coercing double column x.c (which contains no fractions) to type integer") -test(2297.14, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.13, y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int)) +test(2297.14, y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int)) +# real double +x = data.table(a=1) +y = data.table(c=1.5, d=1L) +test(2297.21, y[x, on=.(c == a, d == a)], data.table(c=1, d=1)) +test(2297.22, y[x, on=.(d == a, c == a)], data.table(c=1, d=1)) From 6c68fb720e4963d7f27af233c5a45a76cf852402 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 13:25:43 +0100 Subject: [PATCH 24/25] simplify --- R/bmerge.R | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index d24d8cb94..7b8d84021 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -34,8 +34,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans } - cast_with_atts = function(x, f) { - ans = f(x) + cast_with_atts = function(x, as.f) { + ans = as.f(x) if (!is.null(attributes(x))) attributes(ans) = attributes(x) ans } @@ -113,8 +113,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # we've always coerced to int and returned int, for convenience. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (b in which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")) { - xb = xcols[b] + for (xb in xcols[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { if (isReallyReal(x[[xb]])) { coerce_x = FALSE break @@ -128,8 +127,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (b in which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")) { - xb = xcols[b] + for (xb in xcols[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", paste0("x.", names(x)[xb]), xname) set(x, j=xb, value=cast_with_atts(x[[xb]], as.integer)) } @@ -146,8 +144,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(i, j=ic, value=val) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (b in which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")) { - xb = xcols[b] + for (xb in xcols[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")]) { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) set(x, j=xb, value=cast_with_atts(x[[xb]], as.double)) } From 5aaee922d0408c8dfa4c36ac08eae2ae322e6bf4 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 15:40:59 +0100 Subject: [PATCH 25/25] fix test on windows --- R/bmerge.R | 8 ++++---- inst/tests/tests.Rraw | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 7b8d84021..21c7345da 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -104,7 +104,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) } else { # just integer and double left - ic_idx = which(ic == icols) + ic_idx = which(ic == icols) # check if on is joined on multiple conditions if (i_merge_type=="double") { coerce_x = FALSE if (!isReallyReal(i[[ic]])) { @@ -113,7 +113,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # we've always coerced to int and returned int, for convenience. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (xb in xcols[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { + for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { if (isReallyReal(x[[xb]])) { coerce_x = FALSE break @@ -127,7 +127,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (xb in xcols[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { + for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", paste0("x.", names(x)[xb]), xname) set(x, j=xb, value=cast_with_atts(x[[xb]], as.integer)) } @@ -144,7 +144,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(i, j=ic, value=val) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (xb in xcols[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")]) { + for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")]) { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) set(x, j=xb, value=cast_with_atts(x[[xb]], as.double)) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6dbfa66a5..9f9aa2aa0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12226,11 +12226,10 @@ DT1 = data.table(RANDOM_STRING = rand_strings(n), DATE = sample(seq(as.Date('2016-01-01'), as.Date('2016-12-31'), by="day"), n, replace=TRUE)) DT2 = data.table(RANDOM_STRING = rand_strings(n), START_DATE = sample(seq(as.Date('2015-01-01'), as.Date('2017-12-31'), by="day"), n, replace=TRUE)) -as.intDate = function(x) .Date(as.integer(as.Date(x))) DT2[, EXPIRY_DATE := START_DATE + floor(runif(1000, 200,300))] -DT1[, DT1_ID := .I][, DATE := as.intDate(DATE)] +DT1[, DT1_ID := .I][, DATE := as.Date(DATE)] cols = c("START_DATE", "EXPIRY_DATE") -DT2[, DT2_ID := .I][, (cols) := lapply(.SD, as.intDate), .SDcols=cols] +DT2[, DT2_ID := .I][, (cols) := lapply(.SD, as.Date), .SDcols=cols] ans1 = DT2[DT1, on=.(RANDOM_STRING, START_DATE <= DATE, EXPIRY_DATE >= DATE), .N, by=.EACHI ]$N > 0L tmp = DT1[DT2, on=.(RANDOM_STRING, DATE >= START_DATE, DATE <= EXPIRY_DATE), which=TRUE, nomatch=0L] ans2 = DT1[, DT1_ID %in% tmp]