A lazy data.table lazy captures the intent of dplyr verbs, only actually performing computation when requested (with collect(), pull(), as.data.frame(), data.table::as.data.table(), or tibble::as_tibble()). This allows dtplyr to convert dplyr verbs into as few data.table expressions as possible, which leads to a high performance translation.

See vignette("translation") for the details of the translation.

lazy_dt(x, name = NULL, immutable = TRUE, key_by = NULL)

Arguments

x

A data table (or something can can be coerced to a data table).

name

Optionally, supply a name to be used in generated expressions. For expert use only.

immutable

If TRUE, x is treated as immutable and will never be modified by any code generated by dtplyr. Alternatively, you can set immutable = FALSE to allow dtplyr to modify the input object.

key_by

Set keys for data frame, using select() semantics (e.g. key_by = c(key1, key2).

This uses data.table::setkey() to sort the table and build an index. This will considerably improve performance for subsets, summaries, and joins that use the keys.

See vignette("datatable-keys-fast-subset") for more details.

Examples

library(dplyr, warn.conflicts = FALSE) # If you have a data.table, using it with any dplyr generic will # automatically convert it to a lazy_dt object dt <- data.table::data.table(x = 1:10, y = 10:1) dt %>% filter(x == y)
#> Source: local data table [0 x 2] #> Call: `_DT17`[x == y] #> #> # … with 2 variables: x <int>, y <int> #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
dt %>% mutate(z = x + y)
#> Source: local data table [10 x 3] #> Call: copy(`_DT18`)[, `:=`(z = x + y)] #> #> x y z #> <int> <int> <int> #> 1 1 10 11 #> 2 2 9 11 #> 3 3 8 11 #> 4 4 7 11 #> 5 5 6 11 #> 6 6 5 11 #> # … with 4 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
# Note that dtplyr will avoid mutating the input data.table, so the # previous translation includes an automatic copy(). You can avoid this # with a manual call to lazy_dt() dt %>% lazy_dt(immutable = FALSE) %>% mutate(z = x + y)
#> Source: local data table [10 x 3] #> Call: `_DT19`[, `:=`(z = x + y)] #> #> x y z #> <int> <int> <int> #> 1 1 10 11 #> 2 2 9 11 #> 3 3 8 11 #> 4 4 7 11 #> 5 5 6 11 #> 6 6 5 11 #> # … with 4 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
# If you have a data frame, you can use lazy_dt() to convert it to # a data.table: mtcars2 <- lazy_dt(mtcars) mtcars2
#> Source: local data table [32 x 11] #> Call: `_DT20` #> #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> # … with 26 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% select(mpg:cyl)
#> Source: local data table [32 x 2] #> Call: `_DT20`[, .(mpg, cyl)] #> #> mpg cyl #> <dbl> <dbl> #> 1 21 6 #> 2 21 6 #> 3 22.8 4 #> 4 21.4 6 #> 5 18.7 8 #> 6 18.1 6 #> # … with 26 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% select(x = mpg, y = cyl)
#> Source: local data table [32 x 2] #> Call: `_DT20`[, .(x = mpg, y = cyl)] #> #> x y #> <dbl> <dbl> #> 1 21 6 #> 2 21 6 #> 3 22.8 4 #> 4 21.4 6 #> 5 18.7 8 #> 6 18.1 6 #> # … with 26 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% filter(cyl == 4) %>% select(mpg)
#> Source: local data table [11 x 1] #> Call: `_DT20`[cyl == 4, .(mpg)] #> #> mpg #> <dbl> #> 1 22.8 #> 2 24.4 #> 3 22.8 #> 4 32.4 #> 5 30.4 #> 6 33.9 #> # … with 5 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% select(mpg, cyl) %>% filter(cyl == 4)
#> Source: local data table [11 x 2] #> Call: `_DT20`[, .(mpg, cyl)][cyl == 4] #> #> mpg cyl #> <dbl> <dbl> #> 1 22.8 4 #> 2 24.4 4 #> 3 22.8 4 #> 4 32.4 4 #> 5 30.4 4 #> 6 33.9 4 #> # … with 5 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2)
#> Source: local data table [32 x 13] #> Call: copy(`_DT20`)[, `:=`(c("cyl2", "cyl4"), { #> cyl2 <- cyl * 2 #> cyl4 <- cyl2 * 2 #> .(cyl2, cyl4) #> })] #> #> mpg cyl disp hp drat wt qsec vs am gear carb cyl2 cyl4 #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 12 24 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 12 24 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 8 16 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 12 24 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 16 32 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 12 24 #> # … with 26 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% transmute(cyl2 = cyl * 2, vs2 = vs * 2)
#> Source: local data table [32 x 2] #> Call: `_DT20`[, .(cyl2 = cyl * 2, vs2 = vs * 2)] #> #> cyl2 vs2 #> <dbl> <dbl> #> 1 12 0 #> 2 12 0 #> 3 8 2 #> 4 12 2 #> 5 16 0 #> 6 12 2 #> # … with 26 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% filter(cyl == 8) %>% mutate(cyl2 = cyl * 2)
#> Source: local data table [14 x 12] #> Call: `_DT20`[cyl == 8][, `:=`(cyl2 = cyl * 2)] #> #> mpg cyl disp hp drat wt qsec vs am gear carb cyl2 #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 16 #> 2 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 16 #> 3 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3 16 #> 4 17.3 8 276. 180 3.07 3.73 17.6 0 0 3 3 16 #> 5 15.2 8 276. 180 3.07 3.78 18 0 0 3 3 16 #> 6 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 16 #> # … with 8 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
# Learn more about translation in vignette("translation") by_cyl <- mtcars2 %>% group_by(cyl) by_cyl %>% summarise(mpg = mean(mpg))
#> Source: local data table [3 x 2] #> Call: `_DT20`[, .(mpg = mean(mpg)), keyby = .(cyl)] #> #> cyl mpg #> <dbl> <dbl> #> 1 4 26.7 #> 2 6 19.7 #> 3 8 15.1 #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
by_cyl %>% mutate(mpg = mean(mpg))
#> Source: local data table [32 x 11] #> Groups: cyl #> Call: copy(`_DT20`)[, `:=`(mpg = mean(mpg)), by = .(cyl)] #> #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 19.7 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 19.7 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 26.7 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 19.7 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 15.1 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 19.7 6 225 105 2.76 3.46 20.2 1 0 3 1 #> # … with 26 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
by_cyl %>% filter(mpg < mean(mpg)) %>% summarise(hp = mean(hp))
#> Source: local data table [3 x 2] #> Call: `_DT20`[`_DT20`[, .I[mpg < mean(mpg)], by = .(cyl)]$V1, .(hp = mean(hp)), #> keyby = .(cyl)] #> #> cyl hp #> <dbl> <dbl> #> 1 4 91.2 #> 2 6 132. #> 3 8 246. #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results