Skip to content

Commit

Permalink
dcast handles fun.aggregate argument better (#3384)
Browse files Browse the repository at this point in the history
  • Loading branch information
arunsrinivasan authored and mattdowle committed Feb 15, 2019
1 parent 1111d46 commit 28e31f5
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 11 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

5. `foverlaps()` returned incorrect results overlapping on POSIXct objects which were <= `1970-01-01`, i.e., datetime values that were represented internally as -ve numeric values. This is now fixed. Closes [#3349](https://github.com/Rdatatable/data.table/issues/3349). Thanks to @lux5 for reporting.

6. Several issues were filed regarding limitations of `dcast.data.table` in handling `fun.aggregate` argument when the functions are not directly provided to the argument as `fun.aggregate <- list(sum, mean)` and instead are stored in a variable, e.g., `funs <- list(sum, mean)` and referred to as `fun.aggregate=funs`. This fix closes several issues [#1974](https://github.com/Rdatatable/data.table/issues/1974), [#1369](https://github.com/Rdatatable/data.table/issues/1369), [#2064](https://github.com/Rdatatable/data.table/issues/2064) and [#2949](https://github.com/Rdatatable/data.table/issues/2949). Thanks to @sunbee, @Ping2016, @smidelius and @d0rg0ld for reporting.

#### NOTES

1. When upgrading to 1.12.0 some Windows users might have seen `CdllVersion not found` in some circumstances. We found a way to catch that so the [helpful message](https://twitter.com/MattDowle/status/1084528873549705217) now occurs for those upgrading from versions prior to 1.12.0 too, as well as those upgrading from 1.12.0 to a later version. See item 1 in notes section of 1.12.0 below for more background.
Expand Down
11 changes: 7 additions & 4 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -1724,7 +1724,8 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
gfuns = c("sum", "prod", "mean", "median", "var", "sd", ".N", "min", "max", "head", "last", "first", "tail", "[") # added .N for #5760
.ok <- function(q) {
if (dotN(q)) return(TRUE) # For #5760
cond = is.call(q) && length(q1c <- as.character(q[[1L]]))==1L && q1c %chin% gfuns && !is.call(q[[2L]])
# Need is.symbol() check. See #1369, #1974 or #2949 issues and explanation below by searching for one of these issues.
cond = is.call(q) && is.symbol(q[[1]]) && (q1c <- as.character(q[[1]])) %chin% gfuns && !is.call(q[[2L]])
# run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls
ans = cond && (length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L)))
if (identical(ans, TRUE)) return(ans)
Expand Down Expand Up @@ -1761,9 +1762,11 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
oldjsub = jsub
if (jsub[[1L]]=="list") {
for (ii in seq_along(jsub)[-1L]) {
if (dotN(jsub[[ii]])) next; # For #5760
if (is.call(jsub[[ii]]) && jsub[[ii]][[1L]]=="mean")
jsub[[ii]] = .optmean(jsub[[ii]])
this_jsub = jsub[[ii]]
if (dotN(this_jsub)) next; # For #5760
# Addressing #1369, #2949 and #1974. Added is.symbol() check to handle cases where expanded function definition is used insead of function names. #1369 results in (function(x) sum(x)) as jsub[[.]] from dcast.data.table.
if (is.call(this_jsub) && is.symbol(this_jsub[[1L]]) && this_jsub[[1L]]=="mean")
jsub[[ii]] = .optmean(this_jsub)
}
} else if (jsub[[1L]]=="mean") {
jsub = .optmean(jsub)
Expand Down
20 changes: 13 additions & 7 deletions R/fcast.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,20 @@ value_vars <- function(value.var, varnames) {
aggregate_funs <- function(funs, vals, sep="_", ...) {
if (is.call(funs) && funs[[1L]] == "eval")
funs = eval(funs[[2L]], parent.frame(2L), parent.frame(2L))
if (is.call(funs) && as.character(funs[[1L]]) %chin% c("c", "list"))
if (is.call(funs) && as.character(funs[[1L]]) %chin% c("c", "list")) {
funs = lapply(as.list(funs)[-1L], function(x) {
if (is.call(x) && as.character(x[[1L]]) %chin% c("c", "list")) as.list(x)[-1L] else x
})
else funs = list(funs)
} else funs = eval(funs, parent.frame(2L), parent.frame(2L))
if(is.function(funs)) funs = list(funs) # needed for cases as shown in test#1700.1
if (length(funs) != length(vals)) {
if (length(vals) == 1L)
vals = replicate(length(funs), vals)
else stop("When 'fun.aggregate' and 'value.var' are both lists, 'value.var' must be either of length =1 or =length(fun.aggregate).")
}
only_one_fun = length(unlist(funs)) == 1L
dots = list(...)
construct_funs <- function(fun, val) {
if (!is.list(fun)) fun = list(fun)
construct_funs <- function(fun, nm, val) {
ans = vector("list", length(fun)*length(val))
nms = vector("character", length(ans))
k = 1L
Expand All @@ -89,14 +89,20 @@ aggregate_funs <- function(funs, vals, sep="_", ...) {
expr = c(expr, dots)
ans[[k]] = as.call(expr)
# changed order of arguments here, #1153
nms[k] = if (only_one_fun) j else
paste(j, all.names(i, max.names=1L, functions=TRUE), sep=sep)
nms[k] = if (only_one_fun) j else paste(j, nm, sep=sep)
k = k+1L;
}
}
setattr(ans, 'names', nms)
}
ans = mapply(construct_funs, funs, vals, SIMPLIFY=FALSE)
ans = lapply(seq_along(funs), function(i) {
nm <- names(funs[i])
if (is.null(nm) || !nzchar(nm)) {
nm <- all.names(funs[[i]], max.names=1L, functions=TRUE)
}
if (!length(nm)) nm <- paste0("fun", i)
construct_funs(funs[i], nm, vals[[i]])
})
as.call(c(quote(list), unlist(ans)))
}

Expand Down
32 changes: 32 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -13427,6 +13427,38 @@ DT = as.data.table(mtcars)[1]
test(1986.3, DT[, colMeans(.SD), by=gear], data.table(gear=4, V1=c(21,6,160,110,3.9,2.62,16.46,0,1,4)))
test(1986.4, DT[, as.list(colMeans(.SD)), by=gear], cbind(DT[,"gear"],DT[,-"gear"]))

# tests for #2949, #1974 and #1369 - dcast not able to handle functions referred to by a variable
dt = data.table(
x=sample(5,20,TRUE),
y=sample(2,20,TRUE),
z=sample(letters[1:2], 20,TRUE),
d1 = runif(20),
d2=1L
)
myFun1 <- function(data, vars) {
mySum <- function(x) sum(x)
dcast.data.table(data, "x + y ~ z", value.var=vars, fun.aggregate=mySum)
}
myFun2 <- function(data, vars) {
myFuns <- list(f1=sum, first=function(x) x[1L])
dcast.data.table(data, "x + y ~ z", value.var=vars, fun.aggregate=myFuns)
}
funs = list(sum, mean)
vars = list("d1", "d2")
test(1987.1,
names(dcast.data.table(dt, x + y ~ z, fun=funs, value.var=vars)),
c("x", "y", "d1_fun1_a", "d1_fun1_b", "d2_fun2_a", "d2_fun2_b")
)
test(1987.2,
dcast.data.table(dt, x + y ~ z, fun=sum, value.var=vars[[1]]),
myFun1(dt, vars[[1]])
)
test(1987.3,
dcast.data.table(dt, x + y ~ z, fun=list(f1=sum, first=function(x) x[1L]), value.var=vars),
myFun2(dt, vars)
)



###################################
# Add new tests above this line #
Expand Down

0 comments on commit 28e31f5

Please sign in to comment.