Skip to content

A lazy data.table lazy captures the intent of dplyr verbs, only actually performing computation when requested (with collect(), pull(), as.data.frame(), data.table::as.data.table(), or tibble::as_tibble()). This allows dtplyr to convert dplyr verbs into as few data.table expressions as possible, which leads to a high performance translation.

See vignette("translation") for the details of the translation.

Usage

lazy_dt(x, name = NULL, immutable = TRUE, key_by = NULL)

Arguments

x

A data table (or something can can be coerced to a data table).

name

Optionally, supply a name to be used in generated expressions. For expert use only.

immutable

If TRUE, x is treated as immutable and will never be modified by any code generated by dtplyr. Alternatively, you can set immutable = FALSE to allow dtplyr to modify the input object.

key_by

Set keys for data frame, using select() semantics (e.g. key_by = c(key1, key2).

This uses data.table::setkey() to sort the table and build an index. This will considerably improve performance for subsets, summaries, and joins that use the keys.

See vignette("datatable-keys-fast-subset") for more details.

Examples

library(dplyr, warn.conflicts = FALSE)

# If you have a data.table, using it with any dplyr generic will
# automatically convert it to a lazy_dt object
dt <- data.table::data.table(x = 1:10, y = 10:1)
dt %>% filter(x == y)
#> Empty data.table (0 rows and 2 cols): x,y
dt %>% mutate(z = x + y)
#>      x  y  z
#>  1:  1 10 11
#>  2:  2  9 11
#>  3:  3  8 11
#>  4:  4  7 11
#>  5:  5  6 11
#>  6:  6  5 11
#>  7:  7  4 11
#>  8:  8  3 11
#>  9:  9  2 11
#> 10: 10  1 11

# Note that dtplyr will avoid mutating the input data.table, so the
# previous translation includes an automatic copy(). You can avoid this
# with a manual call to lazy_dt()
dt %>%
  lazy_dt(immutable = FALSE) %>%
  mutate(z = x + y)
#> Source: local data table [10 x 3]
#> Call:   `_DT20`[, `:=`(z = x + y)]
#> 
#>       x     y     z
#>   <int> <int> <int>
#> 1     1    10    11
#> 2     2     9    11
#> 3     3     8    11
#> 4     4     7    11
#> 5     5     6    11
#> 6     6     5    11
#> # … with 4 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

# If you have a data frame, you can use lazy_dt() to convert it to
# a data.table:
mtcars2 <- lazy_dt(mtcars)
mtcars2
#> Source: local data table [32 x 11]
#> Call:   `_DT21`
#> 
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  21       6   160   110  3.9   2.62  16.5     0     1     4     4
#> 2  21       6   160   110  3.9   2.88  17.0     0     1     4     4
#> 3  22.8     4   108    93  3.85  2.32  18.6     1     1     4     1
#> 4  21.4     6   258   110  3.08  3.22  19.4     1     0     3     1
#> 5  18.7     8   360   175  3.15  3.44  17.0     0     0     3     2
#> 6  18.1     6   225   105  2.76  3.46  20.2     1     0     3     1
#> # … with 26 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% select(mpg:cyl)
#> Source: local data table [32 x 2]
#> Call:   `_DT21`[, .(mpg, cyl)]
#> 
#>     mpg   cyl
#>   <dbl> <dbl>
#> 1  21       6
#> 2  21       6
#> 3  22.8     4
#> 4  21.4     6
#> 5  18.7     8
#> 6  18.1     6
#> # … with 26 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% select(x = mpg, y = cyl)
#> Source: local data table [32 x 2]
#> Call:   `_DT21`[, .(x = mpg, y = cyl)]
#> 
#>       x     y
#>   <dbl> <dbl>
#> 1  21       6
#> 2  21       6
#> 3  22.8     4
#> 4  21.4     6
#> 5  18.7     8
#> 6  18.1     6
#> # … with 26 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% filter(cyl == 4) %>% select(mpg)
#> Source: local data table [11 x 1]
#> Call:   `_DT21`[cyl == 4, .(mpg)]
#> 
#>     mpg
#>   <dbl>
#> 1  22.8
#> 2  24.4
#> 3  22.8
#> 4  32.4
#> 5  30.4
#> 6  33.9
#> # … with 5 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% select(mpg, cyl) %>% filter(cyl == 4)
#> Source: local data table [11 x 2]
#> Call:   `_DT21`[, .(mpg, cyl)][cyl == 4]
#> 
#>     mpg   cyl
#>   <dbl> <dbl>
#> 1  22.8     4
#> 2  24.4     4
#> 3  22.8     4
#> 4  32.4     4
#> 5  30.4     4
#> 6  33.9     4
#> # … with 5 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2)
#> Source: local data table [32 x 13]
#> Call:   copy(`_DT21`)[, `:=`(c("cyl2", "cyl4"), {
#>     cyl2 <- cyl * 2
#>     cyl4 <- cyl2 * 2
#>     .(cyl2, cyl4)
#> })]
#> 
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  21       6   160   110  3.9   2.62  16.5     0     1     4     4    12
#> 2  21       6   160   110  3.9   2.88  17.0     0     1     4     4    12
#> 3  22.8     4   108    93  3.85  2.32  18.6     1     1     4     1     8
#> 4  21.4     6   258   110  3.08  3.22  19.4     1     0     3     1    12
#> 5  18.7     8   360   175  3.15  3.44  17.0     0     0     3     2    16
#> 6  18.1     6   225   105  2.76  3.46  20.2     1     0     3     1    12
#> # … with 26 more rows, and 1 more variable: cyl4 <dbl>
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% transmute(cyl2 = cyl * 2, vs2 = vs * 2)
#> Source: local data table [32 x 2]
#> Call:   `_DT21`[, .(cyl2 = cyl * 2, vs2 = vs * 2)]
#> 
#>    cyl2   vs2
#>   <dbl> <dbl>
#> 1    12     0
#> 2    12     0
#> 3     8     2
#> 4    12     2
#> 5    16     0
#> 6    12     2
#> # … with 26 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% filter(cyl == 8) %>% mutate(cyl2 = cyl * 2)
#> Source: local data table [14 x 12]
#> Call:   `_DT21`[cyl == 8][, `:=`(cyl2 = cyl * 2)]
#> 
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2    16
#> 2  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4    16
#> 3  16.4     8  276.   180  3.07  4.07  17.4     0     0     3     3    16
#> 4  17.3     8  276.   180  3.07  3.73  17.6     0     0     3     3    16
#> 5  15.2     8  276.   180  3.07  3.78  18       0     0     3     3    16
#> 6  10.4     8  472    205  2.93  5.25  18.0     0     0     3     4    16
#> # … with 8 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

# Learn more about translation in vignette("translation")
by_cyl <- mtcars2 %>% group_by(cyl)
by_cyl %>% summarise(mpg = mean(mpg))
#> Source: local data table [3 x 2]
#> Call:   `_DT21`[, .(mpg = mean(mpg)), keyby = .(cyl)]
#> 
#>     cyl   mpg
#>   <dbl> <dbl>
#> 1     4  26.7
#> 2     6  19.7
#> 3     8  15.1
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
by_cyl %>% mutate(mpg = mean(mpg))
#> Source: local data table [32 x 11]
#> Groups: cyl
#> Call:   copy(`_DT21`)[, `:=`(mpg = mean(mpg)), by = .(cyl)]
#> 
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  19.7     6   160   110  3.9   2.62  16.5     0     1     4     4
#> 2  19.7     6   160   110  3.9   2.88  17.0     0     1     4     4
#> 3  26.7     4   108    93  3.85  2.32  18.6     1     1     4     1
#> 4  19.7     6   258   110  3.08  3.22  19.4     1     0     3     1
#> 5  15.1     8   360   175  3.15  3.44  17.0     0     0     3     2
#> 6  19.7     6   225   105  2.76  3.46  20.2     1     0     3     1
#> # … with 26 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
by_cyl %>%
  filter(mpg < mean(mpg)) %>%
  summarise(hp = mean(hp))
#> Source: local data table [3 x 2]
#> Call:   `_DT21`[`_DT21`[, .I[mpg < mean(mpg)], by = .(cyl)]$V1, .(hp = mean(hp)), 
#>     keyby = .(cyl)]
#> 
#>     cyl    hp
#>   <dbl> <dbl>
#> 1     4  91.2
#> 2     6 132. 
#> 3     8 246. 
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results