## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## -----------------------------------------------------------------------------
library(ncimultivar)

## -----------------------------------------------------------------------------
#subset data
input.dataset <- nhcvd[nhcvd$SDMVSTRA %in% c(48, 54, 60, 66, 72, 78),]

#Define indicator for Day 2
input.dataset$Day2 <- (input.dataset$DAY == 2)

#remove subjects that are missing any covariates or variables
missing.covariates <- is.na(input.dataset$SMK_REC) | is.na(input.dataset$RIDAGEYR) | is.na(input.dataset$RIAGENDR) | 
                      is.na(input.dataset$Weekend) | is.na(input.dataset$Day2)
missing.variables <- is.na(input.dataset$G_TOTAL) | is.na(input.dataset$G_REFINED)

input.dataset <- input.dataset[!missing.covariates & !missing.variables,]

#break down smoking status into binary indicators
input.dataset$Current.Smoker <- as.numeric(input.dataset$SMK_REC == 1)
input.dataset$Former.Smoker <- as.numeric(input.dataset$SMK_REC == 2)
input.dataset$Never.Smoker <- as.numeric(input.dataset$SMK_REC == 3)

#rename variables for readability
input.dataset$Total.Grain <- input.dataset$G_TOTAL
input.dataset$Refined.Grain <- input.dataset$G_REFINED

## -----------------------------------------------------------------------------
#Winsorize extreme values
wins.total.grain <- boxcox_survey(input.data=input.dataset,
                                  row.subset=(input.dataset$Day2 == 0),
                                  variable="Total.Grain",
                                  weight="WTDRD1",
                                  do.winsorization=TRUE,
                                  id="SEQN",
                                  repeat.obs="DAY")

wins.refined.grain <- boxcox_survey(input.data=input.dataset,
                                    row.subset=(input.dataset$Day2 == 0),
                                    variable="Refined.Grain",
                                    weight="WTDRD1",
                                    do.winsorization=TRUE,
                                    id="SEQN",
                                    repeat.obs="DAY")

input.dataset$Total.Grain <- pmin(input.dataset$Total.Grain, 51.05915)


#Find best Box-Cox lambdas for each variable
boxcox.total.grain <- boxcox_survey(input.data=input.dataset,
                                    row.subset=(input.dataset$Day2 == 0),
                                    variable="Total.Grain",
                                    covariates=c("Current.Smoker", "Former.Smoker", "RIDAGEYR", "RIAGENDR", "Weekend"),
                                    weight="WTDRD1")

boxcox.refined.grain <- boxcox_survey(input.data=input.dataset,
                                      row.subset=(input.dataset$Day2 == 0),
                                      variable="Refined.Grain",
                                      covariates=c("Current.Smoker", "Former.Smoker", "RIDAGEYR", "RIAGENDR", "Weekend"),
                                      weight="WTDRD1")

boxcox.lambda.data <- rbind(boxcox.total.grain, boxcox.refined.grain)


#calculate minimum amounts
minimum.amount.data <- calculate_minimum_amount(input.data=input.dataset,
                                                row.subset=(input.dataset$Day2 == 0),
                                                daily.variables=c("Total.Grain", "Refined.Grain"))

#run pre-processor to get MCMC input data
pre.mcmc.data <- nci_multivar_preprocessor(input.data=input.dataset,
                                           daily.variables=c("Total.Grain", "Refined.Grain"),
                                           continuous.covariates="RIDAGEYR",
                                           boxcox.lambda.data=boxcox.lambda.data,
                                           minimum.amount.data=minimum.amount.data)

## -----------------------------------------------------------------------------
good.model <- nci_multivar_mcmc(pre.mcmc.data=pre.mcmc.data,
                                id="SEQN",
                                repeat.obs="DAY",
                                weight="WTDRD1",
                                daily.variables="Total.Grain",
                                default.covariates=c("Current.Smoker", "Former.Smoker", "std.RIDAGEYR", "RIAGENDR", "Day2", "Weekend"),
                                num.mcmc.iterations=3000,
                                num.burn=1000,
                                num.thin=2,
                                mcmc.seed=9999)

trace_plots(multivar.mcmc.model=good.model)

## -----------------------------------------------------------------------------
low.burn.in <- nci_multivar_mcmc(pre.mcmc.data=pre.mcmc.data,
                                 id="SEQN",
                                 repeat.obs="DAY",
                                 weight="WTDRD1",
                                 daily.variables="Total.Grain",
                                 default.covariates=c("Current.Smoker", "Former.Smoker", "std.RIDAGEYR", "RIAGENDR", "Day2", "Weekend"),
                                 num.mcmc.iterations=300,
                                 num.burn=100,
                                 num.thin=2,
                                 mcmc.seed=9999)

trace_plots(multivar.mcmc.model=low.burn.in)

## -----------------------------------------------------------------------------
#subset input dataset so that only the first 10 subjects have a second recall
first.recall <- input.dataset[input.dataset$Day2 == 0,]
second.recall <- input.dataset[input.dataset$Day2 == 1,][1:10,]
input.subset <- rbind(first.recall, second.recall)

pre.mcmc.subset <- nci_multivar_preprocessor(input.data=input.subset,
                                             daily.variables="Total.Grain",
                                             continuous.covariates="RIDAGEYR",
                                             boxcox.lambda.data=boxcox.lambda.data,
                                             minimum.amount.data=minimum.amount.data) 

small.sample <- nci_multivar_mcmc(pre.mcmc.data=pre.mcmc.subset,
                                  id="SEQN",
                                  repeat.obs="DAY",
                                  weight="WTDRD1",
                                  daily.variables="Total.Grain",
                                  default.covariates=c("Current.Smoker", "Former.Smoker", "std.RIDAGEYR", "RIAGENDR", "Day2", "Weekend"),
                                  num.mcmc.iterations=3000,
                                  num.burn=1000,
                                  num.thin=2,
                                  mcmc.seed=9999)

trace_plots(multivar.mcmc.model=small.sample)

## -----------------------------------------------------------------------------
correlated.model <- nci_multivar_mcmc(pre.mcmc.data=pre.mcmc.data,
                                      id="SEQN",
                                      repeat.obs="DAY",
                                      weight="WTDRD1",
                                      daily.variables=c("Total.Grain", "Refined.Grain"),
                                      default.covariates=c("Current.Smoker", "Former.Smoker", "std.RIDAGEYR", "RIAGENDR", "Day2", "Weekend"),
                                      num.mcmc.iterations=3000,
                                      num.burn=1000,
                                      num.thin=2,
                                      mcmc.seed=9999)

trace_plots(multivar.mcmc.model=correlated.model)

## -----------------------------------------------------------------------------
gr.statistics <- gelman_rubin(num.chains=5,
                              pre.mcmc.data=pre.mcmc.data,
                              id="SEQN",
                              repeat.obs="DAY",
                              weight="WTDRD1",
                              daily.variables="Total.Grain",
                              default.covariates=c("Current.Smoker", "Former.Smoker", "std.RIDAGEYR", "RIAGENDR", "Day2", "Weekend"),
                              num.mcmc.iterations=3000,
                              num.burn=1000,
                              num.thin=2,
                              initial.mcmc.seed=9999)

gr.statistics

