---
title: "Creation of standard in-text tables"
author: "Laure Cougnaud"
date: "`r format(Sys.Date(), '%B %d, %Y')`"
output: 
  rmarkdown::html_document:
    toc: true
    toc_float: true
    toc_depth: 5
    number_sections: true
vignette: >
  %\VignetteIndexEntry{03 - Creation of standard in-text tables}
  %\VignetteEngine{knitr::rmarkdown}
  \usepackage[utf8]{inputenc}
---

# Introduction

```{r options, echo = FALSE}
	
	library(knitr)
	opts_chunk$set(
		echo = TRUE, results = 'asis', warning = FALSE, 
		# stop document execution if error (not the default)
		error = FALSE, message = FALSE, cache = FALSE,
		fig.width = 8, fig.height = 7,
		fig.path = "./figures_vignette/",
		fig.align = 'center')
	# instead of warn = 0 by default
	# include warnings when they occur in the document
	options(width = 170)
	
```

```{r loadPackages, warning = FALSE}

	library(clinUtils)
	library(tools)# toTitleCase
	library(plyr) # for ddply, rbind.fill
	library(inTextSummaryTable)

```

## Data format

The package is demonstrated with a subset of the ADaM datasets 
from the CDISC Pilot 01 dataset, available in the `clinUtils` package.

```{r loadData}	

	# load example data
    library(clinUtils)

    # load example data
    data(dataADaMCDISCP01)

    dataAll <- dataADaMCDISCP01
    labelVars <- attr(dataAll, "labelVars")

```

Typical in-text table for the CSR are included in the following sections.

Please note that the **table content** e.g. variables, statistics of interest
**depends strongly on the study at hand and personal preferences**.

# Subject information

## Subject disposition

```{r table-subjectDisposition}
	
	# data of interest
	dataDM <- dataAll$ADSL
	
	varDMFL <- grep("FL$", colnames(dataDM), value = TRUE)
	varDMFLLabel <- sub(" Flag$", "", labelVars[varDMFL])
	
	getSummaryStatisticsTable(
		data = dataDM,
		var = varDMFL, varFlag = varDMFL, varGeneralLab = "Analysis Set, N", 
		varLab = varDMFLLabel,
		stats = getStats("n (%)"),
		colVar = "TRT01P",
		labelVars = labelVars,
		colTotalInclude = TRUE, colTotalLab = "All subjects",
		varInclude0 = TRUE,
		title = toTitleCase("Table: subject disposition"),
		file = file.path("tables_CSR", "Table_subjectDisposition.docx")
	)

```

## Demographics

```{r table-demography}

	# data of interest
	dataDM <- subset(dataAll$ADSL, SAFFL == "Y")
	
	# variables of interest
	# Note: if available: ethnicity is included
	varsDM <- c(
		"SEX", "AGE", "AGEGR1",
		"RACE", "ETHNIC",
		"HEIGHTBL", "WEIGHTBL", 
		"BMIBL", "BMIBLGR1"
	)

	# Sort variables according to corresponding numeric variable
	dataDM$AGEGR1 <- with(dataDM, reorder(AGEGR1, AGEGR1N))
	dataDM$RACE <- with(dataDM, reorder(RACE, RACEN))
	dataDM$TRT01P <- with(dataDM, reorder(TRT01P, TRT01PN))
	
	## Define set of statistics of interest:
	statsDM <- getStatsData(
		data = dataDM, var = varsDM,
		# different for continuous and categorical variable
		type = c(cont = "median (range)", cat = "n (%)"),
		# for categorical variable, statistic name (here: 'n (%)')
		# should not be included in the table
		args = list(cat = list(includeName = FALSE))
	)

	## create the table:
	
	getSummaryStatisticsTable(
		data = dataDM, 
		# variables to summarize
		var = varsDM, 
		varGeneralLab = "Parameter",
		# column
		colVar = "TRT01P", colTotalInclude = TRUE, colTotalLab = "All subjects",
		# statistics
		stats = statsDM,
		statsGeneralLab = "",
		labelVars = labelVars,
		# if only one category, should be included in separated row (e.g. RACE: White)
		rowAutoMerge = FALSE,
		rowInclude0 = FALSE, emptyValue = 0,
		title = toTitleCase("Table: Demographic Data (safety Analysis Set)"),
		file = file.path("tables_CSR", "Table_demographicData.docx")
	)
	
```

## Baseline disease characteristics

Please note that the content of the table strongly depends on the study.

```{r table-baselineDiseaseCharacteristics}

	# data of interest
	dataBDC <- subset(dataAll$ADSL, SAFFL == "Y")
	
	# create table
	getSummaryStatisticsTable(
		data = dataBDC,
		var = c("DURDIS", "EDUCLVL"), varGeneralLab = "Parameter", 
		colVar = "TRT01P", colTotalInclude = TRUE, colTotalLab = "All subjects",
		stats = getStats("median\n(range)"), statsGeneralLab = "",
		rowAutoMerge = FALSE,
		labelVars = labelVars,
		title = toTitleCase("Table: Baseline Disease Characteristics (safety analysis set)"),
		file = file.path("tables_CSR", "Table_BaselineCharacteristics.docx")
	)

```

## Medical History and Concomitant Diseases

```{r table-MH}

	dataCM <- subset(dataAll$ADCM, SAFFL == "Y")

	# sort variable according to corresponding numeric variables
	dataCM$TRTA <- with(dataCM, reorder(TRTA, TRTAN))
	
	# Terms should be in lower-case
	dataCM$CMDECOD <- simpleCap(tolower(dataCM$CMDECOD))
	dataCM$CMCLAS <- simpleCap(tolower(dataCM$CMCLAS))
			
	getSummaryStatisticsTable(
		data = dataCM,
		colVar = "TRTA", colTotalInclude = TRUE, colTotalLab = "All subjects",
		rowVar = c("CMCLAS", "CMDECOD"), 
		# include total across generic terms and across ATC4 classes
		rowVarTotalInclude = c("CMCLAS", "CMDECOD"), 
		rowTotalLab = "Any prior and concomitant medication",
		stats = getStats("n (%)"),
		# sort rows based on counts of subjects in the total column 
		rowOrder = "total",
		labelVars = labelVars,
		emptyValue = 0,
		title = toTitleCase(paste("Prior and concomitant therapies",
			"by medication class and generic term (safety analyis set)"
		)),
		file = file.path("tables_CSR", "Table_CM.docx")
	)
	
```

# Efficacy Analyses

The example dataset has has two primary endpoints:

* ADAS-Cog (11), a.k.a Alzheimer's Disease Assessment Scale - Cognitive Subscale
a metric containing 11 items, available in the `ADQSADAS` dataset
* CIBIC+ score a.k.a Video-referenced Clinician's Interview-based Impression of Change
 available in the `ADQSCIBC` dataset
 
```{r table-efficacy}

	dataAdasCog11 <- subset(dataAll$ADQSADAS, PARAMCD == "ACTOT")
	dataCIBIC <- subset(dataAll$ADQSCIBC, PARAMCD == "CIBICVAL")
	
	dataEfficacy <- plyr::rbind.fill(dataAdasCog11, dataCIBIC)
	
	dataEfficacy$TRTP <- with(dataEfficacy, reorder(TRTP, TRTPN))
	dataEfficacy$AVISIT <- with(dataEfficacy, reorder(AVISIT, AVISITN))
	
	stats <- getStatsData(
		data = dataEfficacy, 
		var = c("AVAL", "CHG"), 
		type = c("n", "mean (se)", "median (range)")
	)
	
	getSummaryStatisticsTable(
		data = dataEfficacy,
		rowVar = "PARAM",
		colVar = c("TRTP", "AVISIT"),
		var = c("AVAL", "CHG"), 
		stats = stats,
		labelVars = labelVars,
		title = paste("Table: efficacy endpoints", 
			toTitleCase("actual value and changes from baseline per time point"				
		)),
		file = file.path("tables_CSR", "Table_efficacy.docx")
	)
	
```

# Safety Analyses

## Adverse Events

### Treatment-emergent summary table

```{r table-summaryTable}

	## data of interest: safety analysis set and treatment-emergent
	dataTEAE <- subset(dataAll$ADAE, SAFFL == "Y" & TRTEMFL == "Y")
	
	# order treatment and severity categories
	dataTEAE$TRTA <- with(dataTEAE, reorder(TRTA, TRTAN))
	
	## data considered for the total
	dataTotalAE <- subset(dataAll$ADSL, SAFFL == "Y")
	dataTotalAE$TRTA <- with(dataTotalAE, reorder(TRT01A, TRT01AN))
	
	# TEAE with worst intensity
	# build worst-case scenario
	dataTEAE$AESEV <- factor(dataTEAE$AESEV, levels = c("MILD", "MODERATE", "SEVERE"))
	dataTEAE$AESEVN <- as.numeric(dataTEAE$AESEV)
	dataTEAE <- ddply(dataTEAE, c("USUBJID", "TRTA"), function(x)
		cbind.data.frame(x, 
			WORSTINT = with(x, ifelse(AESEVN == max(AESEVN), as.character(AESEV), NA_character_))
	))
	dataTEAE$WORSTINT <- factor(dataTEAE$WORSTINT, levels = levels(dataTEAE$AESEV))
	
	## specify labels for each variable:
	varsAE <- c("TRTEMFL", "AESER", "AESDTH", "AEREL")
	
	# create the table
	getSummaryStatisticsTable(
		data = dataTEAE,
		colVar = "TRTA",
		# define variables to compute statistics on
		var = c("TRTEMFL", "AESER", "WORSTINT", "AESDTH", "AEREL"), 
		varFlag = c("TRTEMFL", "AESER", "AESDTH"),
		varLab = c(TRTEMFL = "Treatment-Emergent", WORSTINT = "Worst-case severity:"),
		varGeneralLab = "Subjects with, n(%):",
		# force the inclusion of lines for variable without count:
		varInclude0 = TRUE,
		# include the total for the worst-case scenario
		varTotalInclude = "WORSTINT",
		# statistics:
		stats = getStats('n (%)'),
		emptyValue = "0",
		labelVars = labelVars,
		# dataset used for the total in the header column (and for percentage as default)
		dataTotal = dataTotalAE,
		# title/export
		title = toTitleCase("Table: Summary Table of Treatment-emergent Adverse Events (safety analysis set)"),
		file = file.path("tables_CSR", "Table_TEAE_summary.docx")
	)
	
```

### Treatment-emergent incidence table

#### Events occuring in at least one subject

```{r table-TEAE}

	dataTEAE <- subset(dataAll$ADAE, SAFFL == "Y" & TRTEMFL == "Y")
	
	# order treatment and severity categories
	dataTEAE$TRTA <- with(dataTEAE, reorder(TRTA, TRTAN))
	
	## data considered for the total
	dataTotalAE <- subset(dataAll$ADSL, SAFFL == "Y")
	dataTotalAE$TRTA <- with(dataTotalAE, reorder(TRT01A, TRT01AN))
	
	getSummaryStatisticsTable(
		data = dataTEAE,
		rowVar = c("AESOC", "AEDECOD"),
		colVar = "TRTA",
		## total
		# data
		dataTotal = dataTotalAE,
		# row total
		rowVarTotalInclude = c("AESOC", "AEDECOD"), rowTotalLab = "Any TEAE",
		stats = getStats("n (%)"),
		labelVars = labelVars,
		rowVarLab = c('AESOC' = "TEAE by SOC and Preferred Term,\nn (%)"),
		# sort rows based on the total column:
		rowOrder = "total", 
		rowOrderTotalFilterFct = function(x) subset(x, TRTA == "Total"),
		title = paste("Table: Treatment-emergent Adverse Events by System Organ Class",
			"and Preferred Term (Safety Analysis Set)"
		),
		file = file.path("tables_CSR", "Table_TEAE_SOCPT_atLeast1Subject.docx")
	)

```

#### Events occuring in at least 25% of all subjects

```{r table-TEAE-inAtLeast25Percent}

	getSummaryStatisticsTable(
		data = dataTEAE,
		rowVar = c("AESOC", "AEDECOD"),
		colVar = "TRTA",
		## total
		# data
		dataTotal = dataTotalAE, 
		# row total
		rowVarTotalInclude = c("AESOC", "AEDECOD"), rowTotalLab = "Any TEAE",
		stats = getStats("n (%)"),
		labelVars = labelVars,
		rowVarLab = c('AESOC' = "SOC and Preferred Term,\nn (%)"),
		# sort rows based on the total column:
		rowOrder = "total", 
		rowOrderTotalFilterFct = function(x) subset(x, TRTA == "Total"),
		title = paste("Table: Treatment-emergent Adverse Events by System Organ Class",
			"and Preferred Term reported in at least 25% of the subjects",
			"in any treatment group (Safety Analysis Set)"
		),
		file = file.path("tables_CSR", "Table_TEAE_SOCPT_atLeast25PercentsSubject.docx"),
		# include only events occuring in at least 25% for at least one preferred term:
		filterFct = function(x)
			ddply(x, "AESOC", function(x){ # per AESOC to include the total
				ddply(x, "AEDECOD", function(y){
					yTotal <- subset(y, grepl("Total", TRTA))
					if(any(yTotal$statPercN >= 25))	y
				})
			})
	)

```

### Treatment-emergent worst-case table

```{r tableTEAE-worstCase}

	dataTEAE <- subset(dataAll$ADAE, SAFFL == "Y" & TRTEMFL == "Y")
	
	# order treatment and severity categories
	dataTEAE$TRTA <- with(dataTEAE, reorder(TRTA, TRTAN))
	
	## data considered for the total
	dataTotalAE <- subset(dataAll$ADSL, SAFFL == "Y")
	dataTotalAE$TRTA <- with(dataTotalAE, reorder(TRT01A, TRT01AN))
	
	# TEAE with worst intensity
	dataTEAE$AESEV <- factor(dataTEAE$AESEV, levels = c("MILD", "MODERATE", "SEVERE"))
	dataTEAE$AESEVN <- as.numeric(dataTEAE$AESEV)
	
	# extract worst-case scenario data (only one record if multiple with same severity)
	dataAEWC <- ddply(dataTEAE, c("AESOC", "AEDECOD", "USUBJID", "TRTA"), function(x){
		x[which.max(x$AESEVN), ]
	})
	# worst-case scenario in lower case
	dataAEWC$WORSTINT <- simpleCap(tolower(dataAEWC$AESEV))
	labelVars["WORSTINT"] <- "Worst-case scenario"

	## datasets used for the total: 
	# for total: compute worst-case across SOC and across AE term
	# (otherwise patient counted in multiple categories if present different categories for different AEs)
	dataTotalRow <- list(
		# within SOC (across AEDECOD)
		'AEDECOD' = ddply(dataAEWC, c("AESOC", "USUBJID", "TRTA"), function(x){	
			x[which.max(x$AESEVN), ]
		}),
		# across SOC
		'AESOC' = ddply(dataAEWC, c("USUBJID", "TRTA"), function(x){	
			x[which.max(x$AESEVN), ]
		})
	)
	
	getSummaryStatisticsTable(
		data = dataAEWC,
		## row variables:
		rowVar = c("AESOC", "AEDECOD", "WORSTINT"), rowVarInSepCol = "WORSTINT",
		# include total across SOC and across AEDECOD
		rowVarTotalInclude = c("AESOC", "AEDECOD"), dataTotalRow = dataTotalRow, 
		rowVarTotalByVar = "WORSTINT", # count for each severity category for the total
		rowTotalLab = "Any TEAE", rowVarLab = c(AESOC = "Subjects with, n(%):", WORSTINT = "Worst-case scenario"),
		# sort per total in the total column
		rowOrder = "total", 
		## column variables
		colVar = "TRTA", 
		stats = getStats("n (%)"),
		emptyValue = "0",
		labelVars = labelVars,
		dataTotal = dataTotalAE,
		title = toTitleCase(paste("Table: Treatment-emergent Adverse",
			"Events by system organ",
			"and preferred term by worst-case (safety Analysis Set)"
		)),
		file = file.path("tables_CSR", "Table_TEAE_Severity.docx")
	)
	
```

## Laboratory safety

### Table of laboratory abnormalities

```{r tableLab}

	dataLBAbn <- subset(dataAll$ADLBC, SAFFL == "Y" & LBNRIND != "NORMAL")
	
	dataLBAbn$PARAM <- with(dataLBAbn, reorder(PARAM, PARAMN))
	dataLBAbn$TRTA <- with(dataLBAbn, reorder(TRTA, TRTAN))
	dataLBAbn$LBNRIND <- factor(dataLBAbn$LBNRIND, levels = c("LOW", "HIGH"))

	dataLBAbnTotal <- subset(dataAll$ADSL, SAFFL == "Y")
	dataLBAbnTotal$TRTA <- with(dataLBAbnTotal, reorder(TRT01A, TRT01AN))
	
	getSummaryStatisticsTable(
		data = dataLBAbn,
		rowVar = c("PARCAT1", "PARAM"), 
		rowVarTotalInclude = c("PARCAT1", "PARAM"),
		colVar = "TRTA", 
		var = "LBNRIND", 
		rowVarInSepCol = "variableGroup", varSubgroupLab = "Abnormality",
		rowVarLab = c('PARCAT1' = "Laboratory Parameter\nn (%)"),
		stats = getStats("n (%)"),
		labelVars = labelVars,
		rowOrder = c("PARCAT1" = "total", "PARAM" = "total", "variableGroup" = "auto"),
		dataTotal = dataLBAbnTotal, 
		title = toTitleCase(paste("Table: Treatment-emergent",
			"Worst-case Laboratory Abnormalities (safety analysis set)"
		)),
		emptyValue = "0",
		file = file.path("tables_CSR", "Table_Lab_Severity.docx")
	)

```

## Electrocardiogram

Please note that there is no ECG dataset in the CDISC Pilot dataset
used for the examples, so this table is not effectively created
in the vignette.

Nevertheless, an example code is provided below to create a standard
table of summary statistics for the ECG parameters.

```{r ECG-formatData, eval = FALSE}

	# data of interest
	paramsECG <- c("QT", "QTCF", "QRS", "PR", "RR", "EGHR")

	dataECG <- subset(dataAll$ADEG, SAFFL == "Y" & PARAMCD %in% paramsECG)
	dataECG$TRTA <- with(dataECG, reorder(TRTA, TRTAN))
	dataECG$PARAM <- with(dataECG, reorder(PARAM, PARAMN))
	
	# consider all non-missing post-baseline records
	dataECGPostBaseline <- subset(dataECG, 
		AVISIT %in% c("Screening", "Baseline", "Worst-case post-baseline")
	)
	
	# worst-case scenario:
	dataECGWC <- subset(dataECG, AVISIT == "Worst-case post-baseline")
	# treatment-emergent
	dataECGWC$TRTEMFL <- with(dataECGWC, ifelse(BASECAT1 != CHGCAT1, "Y", "N"))
	dataECGWCTE <- subset(dataECGWC, TRTEMFL == "Y")
	dataECGWC <- convertVarToFactor(dataECGWC, 
		var = c("AVALCAT1", "CHGCAT1"), 
		varNum = c("AVALCA1N", "CHGCAT1N")
	)
	
	# create the table
	getSummaryStatisticsTable(
		data = dataECGWC,
		# layout:
		colVar = "TRTA",
		rowVar = "PARAM", rowVarLab = c('PARAM' = "ECG Parameter"),
		# metrics to compute statistics on
		var = c("AVALCAT1", "CHGCAT1"),
		# in a separated column
		rowVarInSepCol = c("variable", "variableGroup"),
		# labels
		varGeneralLab = "Abnormality",
		varSubgroupLab = "Worst-Case Post-Baseline",
		stats = getStats("n (%)"),
		labelVars = labelVars,
		# total: all post-baseline
		dataTotal = dataECGPostBaseline, 
		emptyValue = "0",
		rowVarTotalPerc = "PARAM", # total per parameter
		# ensure that categories are below the type of abnormality
		rowAutoMerge = FALSE,
		# only retain abnormalities:
		filterFct = function(x){
			subset(x, !variableGroup %in% c("<= 450 msec", "<= 30 msec"))
		},
		title = toTitleCase(paste("Table: Treatment-emergent worst-case",
			"ECG abnormalities and change from baseline ECG abnormalities (safety analysis set)"
		)),
		file = file.path("tables_CSR", "Table_ECG.docx")
	)
	
```

## Vital signs

### Treatment-emergent vital signs abnormalities

```{r tableVitalSigns}

	# analyis set and parameters of interest
	dataVS <- subset(dataAll$ADVS, 
		SAFFL == "Y" & ANL01FL == "Y" & VISIT != "BASELINE"
	)
	
	dataVS$PARAM <- with(dataVS, reorder(PARAM, PARAMN))
	dataVS$ANRIND <- with(dataVS, reorder(PARAM, PARAMN))
	dataVS$TRTA <- with(dataVS, reorder(TRTA, TRTAN))
	dataVS$SHIFT1 <- with(dataVS, factor(ifelse(SHIFT1 == "", NA_character_, SHIFT1)))
			
	getSummaryStatisticsTable(
		data = dataVS,
		rowVar = "PARAM", 
		rowVarInSepCol = "variableGroup", 
		rowVarInclude0 = TRUE,
		colVar = "TRTA", 
		var = "SHIFT1", varTotalInclude = TRUE,
		emptyValue = 0,
		stats = getStats("n (%)"),
		rowVarTotalPerc = "PARAM",
		labelVars = labelVars,
		title = toTitleCase(paste("Table: Treatment-emergent Worst-case",
			"Vital Sign Abnormalities (Safety Analysis Set)"
		)),
		file = file.path("tables_CSR", "Table_VitalSigns_Severity.docx")
	)

```

# Pharmacokinetics analysis

Please note that this example pharmacodynamics dataset contains different
subjects than the other datasets used in the vignette.

```{r PK}

	paramcdPK <- c("AUCINFO", "CMAX", "TMAX")
	dataPK <- subset(dataAll$ADPP, PKFL == "Y" & PARAMCD %in% paramcdPK)
	
	dataPK$PARCAT1 <- with(dataPK, reorder(PARCAT1, PARCAT1N))
	dataPK$PARAMCD <- with(dataPK, reorder(PARAMCD, PARAMN))
	dataPK$TRTA <- with(dataPK, reorder(TRTA, TRTAN))
	dataPK$PARAMCD <- with(dataPK, reorder(PARAMCD, PARAMN))
	
	# build pretty labels
	labelsPK <- c(
		AUCINFO = "AUC_{Inf,obs}\n(h*ng/mL)",
		CMAX = "C_{max}\n(ng/mL)",
		TMAX = "t_{max}\n(h)"
	)
	dataPK$PARAM <- factor(dataPK$PARAMCD, 
		levels = levels(dataPK$PARAMCD), 
		labels = labelsPK[levels(dataPK$PARAMCD)]
	)
	
	statsPK <- dlply(dataPK, "PARAM", function(dataParam){
		getStatsData(
			data = dataParam,
			var = "AVAL",
			type = "median\n(range)",
			includeName = FALSE
		)[[1]]
	})
	
	getSummaryStatisticsTable(
		data = dataPK,
		rowVar = c("PARCAT1", "PARAM"), colVar = "TRTA",
		var = "AVAL",
#		rowVarLab = c('PARCAT1' = "PK parameters"),
		stats = statsPK, statsVarBy = "PARAM",
		emptyValue = "-",
		title = toTitleCase("Table: Summary of PK parameters (pharmacokinetics analysis set)"),
		file = file.path("tables_CSR", "Table_PK_Parameters.docx"),
		labelVars = labelVars
	)

```

# Appendix

## Session information

```{r includeSessionInfo, echo = FALSE, results = "asis"}
print(sessionInfo())
```