% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nci_multivar_summary.R
\name{nci_multivar_summary}
\alias{nci_multivar_summary}
\title{Summarize simulated usual intakes}
\usage{
nci_multivar_summary(
  input.data,
  variables = NULL,
  mean.variables = NULL,
  quantile.variables = NULL,
  density.plot.variables = NULL,
  ecdf.plot.variables = NULL,
  row.subset = NULL,
  population.name = "All",
  weight = NULL,
  do.means = TRUE,
  do.quantiles = TRUE,
  quantiles = c(0.05, 0.25, 0.5, 0.75, 0.95),
  do.proportions = TRUE,
  lower.thresholds = NULL,
  upper.thresholds = NULL,
  do.density.plots = FALSE,
  do.ecdf.plots = FALSE
)
}
\arguments{
\item{input.data}{A data frame.}

\item{variables}{Vector of variables to use for all enabled summary
functions.}

\item{mean.variables}{Vector of variables to calculate means. Overrides
\code{variables} for calculating means.}

\item{quantile.variables}{Vector of variables to calculate quantiles.
Overrides \code{variables} for calculating quantiles.}

\item{density.plot.variables}{Vector of variables to generate density plots.
Overrides \code{variables} for generating density plots.}

\item{ecdf.plot.variables}{Vector of variables to generate ECDF plots.
Overrides \code{variables} for generating ECDF plots.}

\item{row.subset}{Logical vector of length \code{nrow(data)} indicating which rows
of \code{data} to use when calculating summary statistics or plots.}

\item{population.name}{A character string to identify a population. Included
as a column in the output so that populations can be distinguished when
output datasets are combined. (default = \code{"All"})}

\item{weight}{Variable with weighting for each subject.}

\item{do.means}{Flag specifying whether means should be calculated. (default
= \code{TRUE})}

\item{do.quantiles}{Flag specifying whether quantiles should be calculated.
(default = \code{TRUE})}

\item{quantiles}{Numeric vector of quantiles (between 0 and 1) to calculate.
No effect when \code{do.quantiles} is \code{FALSE}. Default calculates 5th, 25th,
50th, 75th, and 95th percentiles. (default = \code{c(0.05, 0.25, 0.5, 0.75, 0.95)})}

\item{do.proportions}{Flag specifying whether proportions of observations
above/below thresholds should be calculated. At least one of
\code{lower.thresholds} and \code{upper.thresholds} must be specified to calculate
proportions.}

\item{lower.thresholds}{Named list of vectors specifying thresholds to
calculate the proportion of subjects above those thresholds. The names of
the list are the variables to apply the thresholds to.}

\item{upper.thresholds}{Named list of vectors specifying thresholds to
calculate the proportion of subjects below those thresholds. The names of
the list are the variables to apply the thresholds to.}

\item{do.density.plots}{Flag specifying whether density plots should be
generated. (default = \code{FALSE})}

\item{do.ecdf.plots}{Flag specifying whether ECDF plots should be generated.
(default = \code{FALSE})}
}
\value{
A data frame with the following columns:
\itemize{
\item population: The name of the population given by \code{population.name}.
\item variable: The name of the variable being summarized.
\item statistic: The name of the summary statistic.
\item value: The value of the summary statistic.

If plots are generated, they are output as PDFs in the current working
directory.
}
}
\description{
A utility that calculates means, quantiles, and proportions
above/below a threshold for data frame columns. The output data frame is
structured so that the summary statistic values are in a single column to
make variance calculation from replicate methods more efficient (for an
example, see the daily nutrient analysis vignette).
}
\details{
This function is intended to summarize simulated usual intakes from
\code{\link[=nci_multivar_distrib]{nci_multivar_distrib()}} and variables derived from them. By default, the
5th, 25th, 50th, 75th, and 95th percentiles and the mean are calculated for
the input variables. In addition, the proportion of subjects above lower
threshold(s) and/or below upper threshold(s) for variables can be
calculated.

Supplemental plots can also be generated to visualize the distributions of
the variables. Density plots use kernel density estimation via
\code{\link[stats:density]{stats::density()}} to visualize a variable's probability distribution
function. Empirical cumulative distribution function (ECDF) plots use
\code{\link[stats:ecdf]{stats::ecdf()}} visualize a variable's cumulative distribution function.
}
\examples{
#subset NHANES data
nhanes.subset <- nhcvd[nhcvd$SDMVSTRA \%in\% c(48, 60, 72),]

boxcox.lambda.data <- boxcox_survey(input.data=nhanes.subset,
                                    row.subset=(nhanes.subset$DAY == 1),
                                    variable="TSODI",
                                    id="SEQN",
                                    repeat.obs="DAY",
                                    weight="WTDRD1")

minimum.amount.data <- calculate_minimum_amount(input.data=nhanes.subset,
                                                row.subset=(nhanes.subset$DAY == 1),
                                                daily.variables="TSODI")

pre.mcmc.data <- nci_multivar_preprocessor(input.data=nhanes.subset,
                                           daily.variables="TSODI",
                                           boxcox.lambda.data=boxcox.lambda.data,
                                           minimum.amount.data=minimum.amount.data)

mcmc.output <- nci_multivar_mcmc(pre.mcmc.data=pre.mcmc.data,
                                 id="SEQN",
                                 weight="WTDRD1",
                                 repeat.obs="DAY",
                                 daily.variables="TSODI",
                                 num.mcmc.iterations=1000,
                                 num.burn=500,
                                 num.thin=1)

#use first instance of each subject as population base
mcmc.input.data <- pre.mcmc.data$mcmc.input
population.base <- mcmc.input.data[!duplicated(mcmc.input.data$SEQN),]

distrib.output <- nci_multivar_distrib(multivar.mcmc.model=mcmc.output,
                                       distrib.population=population.base,
                                       id="SEQN",
                                       weight="WTDRD1",
                                       num.simulated.u=100)

usual.intake.summary <- nci_multivar_summary(input.data=distrib.output,
                                             variables="usual.intake.TSODI",
                                             weight="WTDRD1",
                                             do.means=TRUE,
                                             do.quantiles=TRUE,
                                             quantiles=c(0.05, 0.25, 0.5, 0.75, 0.95),
                                             do.proportions=TRUE,
                                             lower.thresholds=list(usual.intake.TSODI=2200),
                                             upper.thresholds=list(usual.intake.TSODI=3600))

usual.intake.summary
}
