Data Merge

NEST CoreDev

Joining datasets is an essential step when working with relational datasets.

To support this, two functions are provided depending on how to process the data_extract_spec object:

  1. merge_expression_module can be used when there is no need to process the list of data_extract_spec. This function reads the data and the list of data_extract_spec objects and applies the merging. Essentially, it serves as a wrapper that combines data_extract_multiple_srv() and merge_expression_srv().
  2. merge_expression_srv and data_extract_multiple_srv can be used in scenarios where additional processing of the list of data_extract_spec is necessary or data_extract_srv() to customize the selector_list input.

The following sections provide examples for both scenarios.

merge_expression_module

Using merge_expression_module alone requires a list of data_extract_spec objects for the data_extract argument, a list of reactive or non-reactive data.frame objects, and a list of join keys corresponding to each data.frame object.

Step 1/5 - Preparing the Data

library(teal.transform)
library(teal.data)
library(shiny)

# Define data.frame objects
ADSL <- teal.transform::rADSL
ADTTE <- teal.transform::rADTTE

# create a list of reactive data.frame objects
datasets <- list(
  ADSL = reactive(ADSL),
  ADTTE = reactive(ADTTE)
)

# create  join_keys
join_keys <- join_keys(
  join_key("ADSL", "ADSL", c("STUDYID", "USUBJID")),
  join_key("ADSL", "ADTTE", c("STUDYID", "USUBJID")),
  join_key("ADTTE", "ADTTE", c("STUDYID", "USUBJID", "PARAMCD"))
)

Step 2/5 - Creating the Data Extracts

adsl_extract <- data_extract_spec(
  dataname = "ADSL",
  select = select_spec(
    label = "Select variable:",
    choices = c("AGE", "BMRKR1"),
    selected = "AGE",
    multiple = TRUE,
    fixed = FALSE
  )
)

adtte_extract <- data_extract_spec(
  dataname = "ADTTE",
  select = select_spec(
    choices = c("AVAL", "ASEQ"),
    selected = "AVAL",
    multiple = TRUE,
    fixed = FALSE
  )
)

data_extracts <- list(adsl_extract = adsl_extract, adtte_extract = adtte_extract)

Step 3/5 - Creating the UI

merge_ui <- function(id, data_extracts) {
  ns <- NS(id)
  sidebarLayout(
    sidebarPanel(
      h3("Encoding"),
      div(
        data_extract_ui(
          ns("adsl_extract"), # must correspond with data_extracts list names
          label = "ADSL extract",
          data_extracts[[1]]
        ),
        data_extract_ui(
          ns("adtte_extract"), # must correspond with data_extracts list names
          label = "ADTTE extract",
          data_extracts[[2]]
        )
      )
    ),
    mainPanel(
      h3("Output"),
      verbatimTextOutput(ns("expr")),
      dataTableOutput(ns("data"))
    )
  )
}

Step 4/5 - Creating the Server Logic

merge_srv <- function(id, datasets, data_extracts, join_keys) {
  moduleServer(id, function(input, output, session) {
    merged_data <- merge_expression_module(
      data_extract = data_extracts,
      datasets = datasets,
      join_keys = join_keys,
      merge_function = "dplyr::left_join"
    )

    ANL <- reactive({
      data_list <- lapply(datasets, function(ds) ds())
      eval(envir = list2env(data_list), expr = as.expression(merged_data()$expr))
    })
    output$expr <- renderText(paste(merged_data()$expr, collapse = "\n"))
    output$data <- renderDataTable(ANL())
  })
}

Step 5/5 - Creating the shiny App

shinyApp(
  ui = fluidPage(merge_ui("data_merge", data_extracts)),
  server = function(input, output, session) {
    merge_srv("data_merge", datasets, data_extracts, join_keys)
  }
)

Shiny app output for Data Extract


data_extract_multiple_srv + merge_expression_srv

In the scenario above, if the user deselects the ADTTE variable, the merging between ADTTE and ADSL would still occur, even though ADTTE is not used or needed. Here, the developer might update the selector_list input in a reactive manner so that it gets updated based on conditions set by the developer. Below, we reuse the input from above and update the app server so that the adtte_extract is removed from the selector_list input when no ADTTE variable is selected. The reactive_selector_list is then passed to merge_expression_srv:

Modifying the Server Logic

merge_srv <- function(id, datasets, data_extracts, join_keys) {
  moduleServer(id, function(input, output, session) {
    selector_list <- data_extract_multiple_srv(data_extracts, datasets, join_keys)
    reactive_selector_list <- reactive({
      if (is.null(selector_list()$adtte_extract) || length(selector_list()$adtte_extract()$select) == 0) {
        selector_list()[names(selector_list()) != "adtte_extract"]
      } else {
        selector_list()
      }
    })

    merged_data <- merge_expression_srv(
      selector_list = reactive_selector_list,
      datasets = datasets,
      join_keys = join_keys,
      merge_function = "dplyr::left_join"
    )

    ANL <- reactive({
      data_list <- lapply(datasets, function(ds) ds())
      eval(envir = list2env(data_list), expr = as.expression(merged_data()$expr))
    })
    output$expr <- renderText(paste(merged_data()$expr, collapse = "\n"))
    output$data <- renderDataTable(ANL())
  })
}

Updating the shiny app

shinyApp(
  ui = fluidPage(merge_ui("data_merge", data_extracts)),
  server = function(input, output, session) {
    merge_srv("data_merge", datasets, data_extracts, join_keys)
  }
)