# example data
DT = data.table(iris)
DT[, Bin := cut(Sepal.Length, c(4,6,8))]
To apply the same summarizing function to every column by group, we can use lapply
and .SD
DT[, lapply(.SD, median), by=.(Species, Bin)]
# Species Bin Sepal.Length Sepal.Width Petal.Length Petal.Width
# 1: setosa (4,6] 5.0 3.4 1.50 0.2
# 2: versicolor (6,8] 6.4 2.9 4.60 1.4
# 3: versicolor (4,6] 5.6 2.7 4.05 1.3
# 4: virginica (6,8] 6.7 3.0 5.60 2.1
# 5: virginica (4,6] 5.8 2.7 5.00 1.9
We can filter the columns in .SD
with the .SDcols
argument:
DT[, lapply(.SD, median), by=.(Species, Bin), .SDcols="Petal.Length"]
# Species Bin Petal.Length
# 1: setosa (4,6] 1.50
# 2: versicolor (6,8] 4.60
# 3: versicolor (4,6] 4.05
# 4: virginica (6,8] 5.60
# 5: virginica (4,6] 5.00
Currently, the simplest extension to multiple functions is perhaps:
DT[, unlist(recursive=FALSE, lapply(
.(med = median, iqr = IQR),
function(f) lapply(.SD, f)
)), by=.(Species, Bin), .SDcols=Petal.Length:Petal.Width]
# Species Bin med.Petal.Length med.Petal.Width iqr.Petal.Length iqr.Petal.Width
# 1: setosa (4,6] 1.50 0.2 0.175 0.100
# 2: versicolor (6,8] 4.60 1.4 0.300 0.200
# 3: versicolor (4,6] 4.05 1.3 0.525 0.275
# 4: virginica (6,8] 5.60 2.1 0.700 0.500
# 5: virginica (4,6] 5.00 1.9 0.200 0.200
If you want the names to be like Petal.Length.med
instead of med.Petal.Length
, change the order:
DT[, unlist(recursive=FALSE, lapply(
.SD,
function(x) lapply(.(med = median, iqr = IQR), function(f) f(x))
)), by=.(Species, Bin), .SDcols=Petal.Length:Petal.Width]
# Species Bin Petal.Length.med Petal.Length.iqr Petal.Width.med Petal.Width.iqr
# 1: setosa (4,6] 1.50 0.175 0.2 0.100
# 2: versicolor (6,8] 4.60 0.300 1.4 0.200
# 3: versicolor (4,6] 4.05 0.525 1.3 0.275
# 4: virginica (6,8] 5.60 0.700 2.1 0.500
# 5: virginica (4,6] 5.00 0.200 1.9 0.200