Split mixed tables, i.e. tables that hold both long data (N values per patient) and short data (one value per patient, duplicated on N lines), into one long table and one short table.
Usage
split_mixed_datasets(
datasets = get_datasets(),
id = get_subjid_cols(),
...,
ignore_cols = get_meta_cols(0.95),
output_code = FALSE,
verbose = TRUE
)
Arguments
- datasets
a dataframe or a list of dataframes to split. Default to all the datasets from
.lookup
.- id
the patient identifier, probably "subjid". Should be shared by all datasets. Case-insensitive.
- ...
not used
- ignore_cols
columns to ignore when considering a table as long. Default to
getOption("edc_cols_crfname", "CRFNAME")
. Case-insensitive.- output_code
whether to print the code to explicitly write. Can also be a file path.
- verbose
whether to print informations about the process.
Value
a list of the new long and short tables. Use load_list()
to load them into the global environment.
Examples
#tm = read_trialmaster("filename.zip", pw="xx")
tm = edc_example()
#> Warning: Option "edc_lookup" has been overwritten.
names(tm)
#> [1] "enrol" "db1" "db2"
#> [4] "db3" "short" "long_pure"
#> [7] "long_mixed" "ae" "datetime_extraction"
#> [10] "date_extraction" ".lookup"
#load_list(tm)
print(tm$long_mixed) #`val1` and `val2` are long but `val3` is short
#> # A tibble: 100 × 5
#> subjid crfname val1b val2b val3b
#> <int> <chr> <dbl> <dbl> <chr>
#> 1 1 both short and long data 1.33 11.0 B
#> 2 1 both short and long data -0.869 10.9 B
#> 3 2 both short and long data 0.0555 10.0 C
#> 4 2 both short and long data 0.0491 10.1 C
#> 5 3 both short and long data -0.578 9.28 D
#> 6 3 both short and long data -0.999 9.80 D
#> 7 4 both short and long data -0.00243 8.97 E
#> 8 4 both short and long data 0.656 9.03 E
#> 9 5 both short and long data 1.48 8.78 F
#> 10 5 both short and long data -1.91 10.8 F
#> # ℹ 90 more rows
mixed_data = split_mixed_datasets(tm, id="subjid", verbose=TRUE)
#> ✔ There were 5 short tables:
#> "enrol", "db1", "db2", "db3", and "short"
#> ✔ There was 1 pure long table:
#> "long_pure"
#> ✔ There were 2 mixed (short+long) tables:
#> "long_mixed" and "ae"
#> → Use `EDCimport::load_list()` on the result to get separated long and short
#> data.
load_list(mixed_data)
print(long_mixed_short)
#> # A tibble: 50 × 3
#> subjid crfname val3b
#> <int> <chr> <chr>
#> 1 1 both short and long data B
#> 2 2 both short and long data C
#> 3 3 both short and long data D
#> 4 4 both short and long data E
#> 5 5 both short and long data F
#> 6 6 both short and long data G
#> 7 7 both short and long data H
#> 8 8 both short and long data I
#> 9 9 both short and long data J
#> 10 10 both short and long data K
#> # ℹ 40 more rows
print(long_mixed_long)
#> # A tibble: 100 × 3
#> subjid val1b val2b
#> <int> <dbl> <dbl>
#> 1 1 1.33 11.0
#> 2 1 -0.869 10.9
#> 3 2 0.0555 10.0
#> 4 2 0.0491 10.1
#> 5 3 -0.578 9.28
#> 6 3 -0.999 9.80
#> 7 4 -0.00243 8.97
#> 8 4 0.656 9.03
#> 9 5 1.48 8.78
#> 10 5 -1.91 10.8
#> # ℹ 90 more rows
#alternatively, get the code and only use the datasets you need
split_mixed_datasets(tm, id="subjid", output_code=TRUE)
#> ✔ There were 5 short tables:
#> "enrol", "db1", "db2", "db3", and "short"
#> ✔ There was 1 pure long table:
#> "long_pure"
#> ✔ There were 2 mixed (short+long) tables:
#> "long_mixed" and "ae"
#> → Copy the following code in your script to separate long and short data:
#> ## `long_mixed` (dim=100x5) ----
#>
#> long_mixed_short = long_mixed %>%
#> select(subjid, crfname, val3b) %>%
#> group_by(subjid) %>%
#> summarise(across(everything(), unify)) #dim=50x3
#>
#> long_mixed_long = long_mixed %>%
#> select(subjid, val1b, val2b) #dim=100x3
#>
#>
#> ## `ae` (dim=175x6) ----
#>
#> ae_short = ae %>%
#> select(subjid, crfname, n_ae) %>%
#> group_by(subjid) %>%
#> summarise(across(everything(), unify)) #dim=48x3
#>
#> ae_long = ae %>%
#> select(subjid, aesoc, aegr, sae) #dim=175x4
filename = tempfile("mixed_code", fileext=".R")
split_mixed_datasets(tm, id="subjid", output_code=filename)
#> ✔ There were 5 short tables:
#> "enrol", "db1", "db2", "db3", and "short"
#> ✔ There was 1 pure long table:
#> "long_pure"
#> ✔ There were 2 mixed (short+long) tables:
#> "long_mixed" and "ae"
#> → Copy the code from /tmp/RtmpCVbBBg/mixed_code1d6e27ea654d.R in your script to
#> separate long and short data:
#> `utils::browseURL(/tmp/RtmpCVbBBg/mixed_code1d6e27ea654d.R)`
readLines(filename)
#> [1] "## `long_mixed` (dim=100x5) ---- "
#> [2] ""
#> [3] "long_mixed_short = long_mixed %>% "
#> [4] " select(subjid, crfname, val3b) %>% "
#> [5] " group_by(subjid) %>% "
#> [6] " summarise(across(everything(), unify)) #dim=50x3 "
#> [7] ""
#> [8] " long_mixed_long = long_mixed %>% "
#> [9] " select(subjid, val1b, val2b) #dim=100x3"
#> [10] ""
#> [11] ""
#> [12] "## `ae` (dim=175x6) ---- "
#> [13] ""
#> [14] "ae_short = ae %>% "
#> [15] " select(subjid, crfname, n_ae) %>% "
#> [16] " group_by(subjid) %>% "
#> [17] " summarise(across(everything(), unify)) #dim=48x3 "
#> [18] ""
#> [19] " ae_long = ae %>% "
#> [20] " select(subjid, aesoc, aegr, sae) #dim=175x4 "