This is a method for the tidyr pivot_wider() generic. It is translated to data.table::dcast()

# S3 method for dtplyr_step
pivot_wider(
  data,
  id_cols = NULL,
  names_from = name,
  names_prefix = "",
  names_sep = "_",
  names_glue = NULL,
  names_sort = FALSE,
  names_repair = "check_unique",
  values_from = value,
  values_fill = NULL,
  values_fn = NULL,
  ...
)

Arguments

data

A lazy_dt().

id_cols

<tidy-select> A set of columns that uniquely identifies each observation. Defaults to all columns in data except for the columns specified in names_from and values_from. Typically used when you have redundant variables, i.e. variables whose values are perfectly correlated with existing variables.

names_from

<tidy-select> A pair of arguments describing which column (or columns) to get the name of the output column (names_from), and which column (or columns) to get the cell values from (values_from).

If values_from contains multiple values, the value will be added to the front of the output column.

names_prefix

String added to the start of every variable name. This is particularly useful if names_from is a numeric vector and you want to create syntactic variable names.

names_sep

If names_from or values_from contains multiple variables, this will be used to join their values together into a single string to use as a column name.

names_glue

Instead of names_sep and names_prefix, you can supply a glue specification that uses the names_from columns (and special .value) to create custom column names.

names_sort

Should the column names be sorted? If FALSE, the default, column names are ordered by first appearance.

names_repair

What happens if the output has invalid column names? The default, "check_unique" is to error if the columns are duplicated. Use "minimal" to allow duplicates in the output, or "unique" to de-duplicated by adding numeric suffixes. See vctrs::vec_as_names() for more options.

values_from

<tidy-select> A pair of arguments describing which column (or columns) to get the name of the output column (names_from), and which column (or columns) to get the cell values from (values_from).

If values_from contains multiple values, the value will be added to the front of the output column.

values_fill

Optionally, a (scalar) value that specifies what each value should be filled in with when missing.

This can be a named list if you want to apply different aggregations to different value columns.

values_fn

A function, the default is length(). Note this is different behavior than tidyr::pivot_wider(), which returns a list column by default.

...

Additional arguments passed on to methods.

Examples

library(tidyr) fish_encounters_dt <- lazy_dt(fish_encounters) fish_encounters_dt
#> Source: local data table [114 x 3] #> Call: `_DT28` #> #> fish station seen #> <fct> <fct> <int> #> 1 4842 Release 1 #> 2 4842 I80_1 1 #> 3 4842 Lisbon 1 #> 4 4842 Rstr 1 #> 5 4842 Base_TD 1 #> 6 4842 BCE 1 #> # … with 108 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
fish_encounters_dt %>% pivot_wider(names_from = station, values_from = seen)
#> Source: local data table [19 x 12] #> Call: dcast(`_DT28`, formula = fish ~ station, value.var = "seen") #> #> fish Release I80_1 Lisbon Rstr Base_TD BCE BCW BCE2 BCW2 MAE MAW #> <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> #> 1 4842 1 1 1 1 1 1 1 1 1 1 1 #> 2 4843 1 1 1 1 1 1 1 1 1 1 1 #> 3 4844 1 1 1 1 1 1 1 1 1 1 1 #> 4 4845 1 1 1 1 1 NA NA NA NA NA NA #> 5 4847 1 1 1 NA NA NA NA NA NA NA NA #> 6 4848 1 1 1 1 NA NA NA NA NA NA NA #> # … with 13 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
# Fill in missing values fish_encounters_dt %>% pivot_wider(names_from = station, values_from = seen, values_fill = 0)
#> Source: local data table [19 x 12] #> Call: dcast(`_DT28`, formula = fish ~ station, value.var = "seen", #> fill = 0) #> #> fish Release I80_1 Lisbon Rstr Base_TD BCE BCW BCE2 BCW2 MAE MAW #> <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> #> 1 4842 1 1 1 1 1 1 1 1 1 1 1 #> 2 4843 1 1 1 1 1 1 1 1 1 1 1 #> 3 4844 1 1 1 1 1 1 1 1 1 1 1 #> 4 4845 1 1 1 1 1 0 0 0 0 0 0 #> 5 4847 1 1 1 0 0 0 0 0 0 0 0 #> 6 4848 1 1 1 1 0 0 0 0 0 0 0 #> # … with 13 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
# Generate column names from multiple variables us_rent_income_dt <- lazy_dt(us_rent_income) us_rent_income_dt
#> Source: local data table [104 x 5] #> Call: `_DT29` #> #> GEOID NAME variable estimate moe #> <chr> <chr> <chr> <dbl> <dbl> #> 1 01 Alabama income 24476 136 #> 2 01 Alabama rent 747 3 #> 3 02 Alaska income 32940 508 #> 4 02 Alaska rent 1200 13 #> 5 04 Arizona income 27517 148 #> 6 04 Arizona rent 972 4 #> # … with 98 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
us_rent_income_dt %>% pivot_wider(names_from = variable, values_from = c(estimate, moe))
#> Source: local data table [52 x 6] #> Call: dcast(`_DT29`, formula = GEOID + NAME ~ variable, value.var = c("estimate", #> "moe")) #> #> GEOID NAME estimate_income estimate_rent moe_income moe_rent #> <chr> <chr> <dbl> <dbl> <dbl> <dbl> #> 1 01 Alabama 24476 747 136 3 #> 2 02 Alaska 32940 1200 508 13 #> 3 04 Arizona 27517 972 148 4 #> 4 05 Arkansas 23789 709 165 5 #> 5 06 California 29454 1358 109 3 #> 6 08 Colorado 32401 1125 109 5 #> # … with 46 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
# When there are multiple `names_from` or `values_from`, you can use # use `names_sep` or `names_glue` to control the output variable names us_rent_income_dt %>% pivot_wider( names_from = variable, names_sep = ".", values_from = c(estimate, moe) )
#> Source: local data table [52 x 6] #> Call: dcast(`_DT29`, formula = GEOID + NAME ~ variable, value.var = c("estimate", #> "moe"), sep = ".") #> #> GEOID NAME estimate.income estimate.rent moe.income moe.rent #> <chr> <chr> <dbl> <dbl> <dbl> <dbl> #> 1 01 Alabama 24476 747 136 3 #> 2 02 Alaska 32940 1200 508 13 #> 3 04 Arizona 27517 972 148 4 #> 4 05 Arkansas 23789 709 165 5 #> 5 06 California 29454 1358 109 3 #> 6 08 Colorado 32401 1125 109 5 #> # … with 46 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
# Can perform aggregation with values_fn warpbreaks_dt <- lazy_dt(as_tibble(warpbreaks[c("wool", "tension", "breaks")])) warpbreaks_dt
#> Source: local data table [54 x 3] #> Call: `_DT30` #> #> wool tension breaks #> <fct> <fct> <dbl> #> 1 A L 26 #> 2 A L 30 #> 3 A L 54 #> 4 A L 25 #> 5 A L 70 #> 6 A L 52 #> # … with 48 more rows #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
warpbreaks_dt %>% pivot_wider( names_from = wool, values_from = breaks, values_fn = mean )
#> Source: local data table [3 x 3] #> Call: dcast(`_DT30`, formula = tension ~ wool, value.var = "breaks", #> fun.aggregate = function (x, ...) #> UseMethod("mean")) #> #> tension A B #> <fct> <dbl> <dbl> #> 1 L 44.6 28.2 #> 2 M 24 28.8 #> 3 H 24.6 18.8 #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results