## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## -----------------------------------------------------------------------------
library(ncimultivar)

## -----------------------------------------------------------------------------
#subset data
input.dataset <- nhcvd[nhcvd$SDMVSTRA %in% c(48, 54, 60, 66, 72, 78),]

#Define indicator for Day 2
input.dataset$Day2 <- (input.dataset$DAY == 2)

#remove subjects that are missing any covariates or variables
missing.covariates <- is.na(input.dataset$SMK_REC) | is.na(input.dataset$RIDAGEYR) | is.na(input.dataset$RIAGENDR) | 
                      is.na(input.dataset$Weekend) | is.na(input.dataset$Day2)
missing.variables <- is.na(input.dataset$G_WHOLE) | is.na(input.dataset$TKCAL)

input.dataset <- input.dataset[!missing.covariates & !missing.variables,]

#break down smoking status into binary indicators
input.dataset$Current.Smoker <- as.numeric(input.dataset$SMK_REC == 1)
input.dataset$Former.Smoker <- as.numeric(input.dataset$SMK_REC == 2)
input.dataset$Never.Smoker <- as.numeric(input.dataset$SMK_REC == 3)

#rename whole grain and energy variables for readability
input.dataset$Whole.Grain <- input.dataset$G_WHOLE
input.dataset$Energy <- input.dataset$TKCAL

## -----------------------------------------------------------------------------
fay.factor <- 0.7

input.dataset <- brr_weights(input.data=input.dataset,
                             id="SEQN",
                             strata="SDMVSTRA",
                             psu="SDMVPSU",
                             cell="PSCELL",
                             weight="WTDRD1",
                             fay.factor=fay.factor)

## -----------------------------------------------------------------------------
wins.whole.grain <- boxcox_survey(input.data=input.dataset,
                                  row.subset=(input.dataset$Day2 == 0),
                                  variable="Whole.Grain",
                                  is.episodic=TRUE,
                                  weight="RepWt_0",
                                  do.winsorization=TRUE,
                                  iqr.multiple=2,
                                  id="SEQN",
                                  repeat.obs="DAY")

wins.energy <- boxcox_survey(input.data=input.dataset,
                             row.subset=(input.dataset$Day2 == 0),
                             variable="Energy",
                             weight="RepWt_0",
                             do.winsorization=TRUE,
                             iqr.multiple=2,
                             id="SEQN",
                             repeat.obs="DAY")

#Winsorize whole grain
input.dataset$Whole.Grain <- pmin(input.dataset$Whole.Grain, 10.71163)

#Winsorize energy
input.dataset$Energy <- pmax(input.dataset$Energy, 269.0701)
input.dataset$Energy <- pmin(input.dataset$Energy, 8026.0436)

## -----------------------------------------------------------------------------
boxcox.whole.grain <- boxcox_survey(input.data=input.dataset,
                                    row.subset=(input.dataset$Day2 == 0),
                                    variable="Whole.Grain",
                                    covariates=c("Current.Smoker", "Former.Smoker", "RIDAGEYR", "RIAGENDR", "Weekend"),
                                    weight="RepWt_0")

boxcox.energy <- boxcox_survey(input.data=input.dataset,
                               row.subset=(input.dataset$Day2 == 0),
                               variable="Energy",
                               covariates=c("Current.Smoker", "Former.Smoker", "RIDAGEYR", "RIAGENDR", "Weekend"),
                               weight="RepWt_0")

boxcox.lambda.data <- rbind(boxcox.whole.grain, boxcox.energy)

## -----------------------------------------------------------------------------
minimum.amount.data <- calculate_minimum_amount(input.data=input.dataset,
                                                row.subset=(input.dataset$Day2 == 0),
                                                episodic.variables="Whole.Grain",
                                                daily.variables="Energy")

## -----------------------------------------------------------------------------
pre.mcmc.data <- nci_multivar_preprocessor(input.data=input.dataset,
                                           episodic.variables="Whole.Grain",
                                           daily.variables="Energy",
                                           continuous.covariates="RIDAGEYR",
                                           boxcox.lambda.data=boxcox.lambda.data,
                                           minimum.amount.data=minimum.amount.data)

## -----------------------------------------------------------------------------
head(pre.mcmc.data$mcmc.input[,c("SEQN", "DAY", "ind.Whole.Grain", "amt.Whole.Grain")], 10)

## -----------------------------------------------------------------------------
num.brr <- 8

mcmc.brr <- vector(mode="list", length=num.brr + 1)

for(brr.rep in 0:num.brr) {
  
  print(paste0("Starting Iteration ", brr.rep))
  
  mcmc.brr[[brr.rep + 1]] <- nci_multivar_mcmc(pre.mcmc.data=pre.mcmc.data,
                                               id="SEQN",
                                               repeat.obs="DAY",
                                               weight=paste0("RepWt_", brr.rep),
                                               episodic.variables="Whole.Grain",
                                               daily.variables="Energy",
                                               default.covariates=c("Current.Smoker", "Former.Smoker", "std.RIDAGEYR", "RIAGENDR", "Day2", "Weekend"),
                                               num.mcmc.iterations=4000,
                                               num.burn=2000,
                                               num.thin=2,
                                               mcmc.seed=(9999 + brr.rep))
}

## -----------------------------------------------------------------------------
#get first instance of each subject
mcmc.input.data <- pre.mcmc.data$mcmc.input
distrib.pop <- mcmc.input.data[!duplicated(mcmc.input.data$SEQN),]
num.subjects <- nrow(distrib.pop)

#Set Day 2 to zero to factor out the effect of Day 2 recalls
distrib.pop$Day2 <- 0

#create repeats of each subject for weekday and weekend consumption
distrib.pop <- distrib.pop[rep(seq_len(num.subjects), each=2),]
distrib.pop$Weekend <- rep(c(0,1), num.subjects)
distrib.pop$Weekend.Weight <- rep(c(4,3), num.subjects)

## -----------------------------------------------------------------------------
summary.brr <- vector(mode="list", length=num.brr + 1)

for(brr.rep in 0:num.brr) {
  
  print(paste0("Starting Iteration ", brr.rep))
  
  #create dataset with 200 simulated usual intakes for each subject
  distrib.data <- nci_multivar_distrib(multivar.mcmc.model=mcmc.brr[[brr.rep + 1]],
                                       distrib.population=distrib.pop,
                                       id="SEQN",
                                       weight=paste0("RepWt_", brr.rep),
                                       nuisance.weight="Weekend.Weight",
                                       num.simulated.u=200,
                                       distrib.seed=(99999 + brr.rep))
  
  #compute means, quantiles, and proportions for simulated whole grain and energy intakes
  summary.brr[[brr.rep + 1]] <- nci_multivar_summary(input.data=distrib.data,
                                                     variables=c("usual.intake.Whole.Grain", "usual.intake.Energy"),
                                                     weight=paste0("RepWt_", brr.rep),
                                                     do.means=TRUE,
                                                     do.quantiles=TRUE,
                                                     quantiles=c(0.05, 0.25, 0.5, 0.75, 0.95))
}

## -----------------------------------------------------------------------------
#calculate degrees of freedom
df <- length(unique(input.dataset$SDMVSTRA))

#extract point estimate and BRR replicates
point <- summary.brr[[1]]$value
reps <- vapply(summary.brr[-1], function(brr.i) brr.i$value, point)

#calculate BRR standard error
std.error <- sqrt(rowSums((reps - point)^2)/(num.brr*fay.factor^2))

#95% confidence intervals
confidence.lower <- point + qt(0.025, df)*std.error
confidence.upper <- point + qt(0.975, df)*std.error

#create summary report
summary.report <- data.frame(summary.brr[[1]], 
                             std.error=std.error,
                             confidence.lower=confidence.lower,
                             confidence.upper=confidence.upper)

summary.report

