## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## -----------------------------------------------------------------------------
library(ncimultivar)

## -----------------------------------------------------------------------------
#subset data
input.dataset <- nhcvd[nhcvd$SDMVSTRA %in% c(48, 54, 60, 66, 72, 78),]

#Define indicator for Day 2
input.dataset$Day2 <- (input.dataset$DAY == 2)

#remove subjects that are missing any covariates, variables, or outcomes
missing.covariates <- is.na(input.dataset$SMK_REC) | is.na(input.dataset$RIDAGEYR) | is.na(input.dataset$RIAGENDR) | 
                      is.na(input.dataset$Weekend) | is.na(input.dataset$Day2)
missing.variables <- is.na(input.dataset$TPOTA)
missing.outcomes <- is.na(input.dataset$BPSY_AVG)

input.dataset <- input.dataset[!missing.covariates & !missing.variables & !missing.outcomes,]

#break down smoking status into binary indicators
input.dataset$Current.Smoker <- as.numeric(input.dataset$SMK_REC == 1)
input.dataset$Former.Smoker <- as.numeric(input.dataset$SMK_REC == 2)
input.dataset$Never.Smoker <- as.numeric(input.dataset$SMK_REC == 3)

#rename potassium variable for readability
input.dataset$Potassium <- input.dataset$TPOTA

## -----------------------------------------------------------------------------
#generate BRR weights
fay.factor <- 0.7

input.dataset <- brr_weights(input.data=input.dataset,
                             id="SEQN",
                             strata="SDMVSTRA",
                             psu="SDMVPSU",
                             cell="PSCELL",
                             weight="WTDRD1",
                             fay.factor=fay.factor)

## -----------------------------------------------------------------------------
#Winsorize extreme values of potassium intake
wins.potassium <- boxcox_survey(input.data=input.dataset,
                                row.subset=(input.dataset$Day2 == 0),
                                variable="Potassium",
                                weight="RepWt_0",
                                do.winsorization=TRUE,
                                id="SEQN",
                                repeat.obs="DAY")

input.dataset$Potassium <- pmax(input.dataset$Potassium, 42.45263)

#run Box-Cox survey and create Box-Cox lambda data using the first recall
boxcox.lambda.data <- boxcox_survey(input.data=input.dataset,
                                    row.subset=(input.dataset$Day2 == 0),
                                    variable="Potassium",
                                    covariates=c("Current.Smoker", "Former.Smoker", "RIDAGEYR", "RIAGENDR", "Weekend"),
                                    weight="RepWt_0")

#Calculate minimum amount of potassium intake in the first recall
minimum.amount.data <- calculate_minimum_amount(input.data=input.dataset,
                                                row.subset=(input.dataset$Day2 == 0),
                                                daily.variables="Potassium")

#Run MCMC pre-preprocessor
pre.mcmc.data <- nci_multivar_preprocessor(input.data=input.dataset,
                                           daily.variables="Potassium",
                                           continuous.covariates="RIDAGEYR",
                                           boxcox.lambda.data=boxcox.lambda.data,
                                           minimum.amount.data=minimum.amount.data)

## -----------------------------------------------------------------------------
num.brr <- 8

mcmc.brr <- vector(mode="list", length=num.brr + 1)

for(brr.rep in 0:num.brr) {
  
  print(paste0("Starting Iteration ", brr.rep))
  
  mcmc.brr[[brr.rep + 1]] <- nci_multivar_mcmc(pre.mcmc.data=pre.mcmc.data,
                                               id="SEQN",
                                               repeat.obs="DAY",
                                               weight=paste0("RepWt_", brr.rep),
                                               daily.variables="Potassium",
                                               default.covariates=c("Current.Smoker", "Former.Smoker", "std.RIDAGEYR", "RIAGENDR", "Day2", "Weekend", "BPSY_AVG"),
                                               num.mcmc.iterations=3000,
                                               num.burn=1000,
                                               num.thin=2,
                                               save.u.main=TRUE,
                                               mcmc.seed=(9999 + brr.rep))
}

## -----------------------------------------------------------------------------
#get first instance of each subject
mcmc.input.data <- pre.mcmc.data$mcmc.input
distrib.pop <- mcmc.input.data[!duplicated(mcmc.input.data$SEQN),]
num.subjects <- nrow(distrib.pop)

#Set Day 2 to zero to factor out the effect of Day 2 recalls
distrib.pop$Day2 <- 0

#create repeats of each subject for weekday and weekend consumption
distrib.pop <- distrib.pop[rep(seq_len(num.subjects), each=2),]
distrib.pop$Weekend <- rep(c(0,1), num.subjects)
distrib.pop$Weekend.Weight <- rep(c(4,3), num.subjects)

## -----------------------------------------------------------------------------
summary.brr <- vector(mode="list", length=num.brr + 1)

for(brr.rep in 0:num.brr) {
  
  print(paste0("Starting Iteration ", brr.rep))
  
  #draw 10 sets of parameters from MCMC parameter distribution
  num.sets <- 10
  mcmc.parameter.sets <- draw_parameters(mcmc.brr[[brr.rep + 1]], num.draws=num.sets)
  
  model.coefficients <- vector(mode="list", length=num.sets)
  
  for(set.num in 1:num.sets) {
    
    #simulate 1 usual intake for each subject using the U matrix from the model parameter set
    regression.data <- nci_multivar_distrib(multivar.mcmc.model=mcmc.parameter.sets[[set.num]],
                                            distrib.population=distrib.pop,
                                            id="SEQN",
                                            weight=paste0("RepWt_", brr.rep),
                                            nuisance.weight="Weekend.Weight",
                                            additional.output=c("Current.Smoker", "Former.Smoker", "RIDAGEYR", "RIAGENDR", "BPSY_AVG"),
                                            use.mcmc.u.matrices=TRUE,
                                            distrib.seed=(99999 + num.sets*brr.rep + set.num))
    
    #scale down simulated potassium intake by 1000 to show the effect per 1,000 mg of potassium
    regression.data$usual.intake.Potassium <- regression.data$usual.intake.Potassium/1000
    
    #fit linear model of systolic blood pressure against the average simulated potassium intakes and save coefficients
    bp.model <- lm(BPSY_AVG ~ usual.intake.Potassium + Current.Smoker + Former.Smoker + RIDAGEYR + RIAGENDR, 
                   data=regression.data, 
                   weights=regression.data[,paste0("RepWt_", brr.rep)])
    
    model.coefficients[[set.num]] <- summary_coefficients(model=bp.model)
  }
  
  #save average of predicted values across parameter sets
  coefficient.values <- vapply(model.coefficients, function(set) set$value, model.coefficients[[1]]$value)
  mean.coefficients <- rowMeans(coefficient.values)
  
  summary.brr[[brr.rep + 1]] <- data.frame(model.coefficients[[1]][,c("population", "variable", "statistic")],
                                           value=mean.coefficients)
}

## -----------------------------------------------------------------------------
#calculate degrees of freedom
df <- length(unique(input.dataset$SDMVSTRA))

#extract point estimate and BRR replicates
point <- summary.brr[[1]]$value
reps <- vapply(summary.brr[-1], function(brr.i) brr.i$value, point)

#calculate BRR standard error
std.error <- sqrt(rowSums((reps - point)^2)/(num.brr*fay.factor^2))

#95% confidence intervals
confidence.lower <- point + qt(0.025, df)*std.error
confidence.upper <- point + qt(0.975, df)*std.error

#create summary report
summary.report <- data.frame(summary.brr[[1]], 
                             std.error=std.error,
                             confidence.lower=confidence.lower,
                             confidence.upper=confidence.upper)

summary.report

