options mergenoby=nowarn;

/*****************************************************************************/
/*****************************************************************************/
/* The following file is required to run this example:                       */
/*   nlmixed_bivariate.macro.v1.2.sas                                        */
/*****************************************************************************/
/*                                                                           */
/*****************************************************************************/
/* This example analysis uses regression calibration and fits a logistic     */
/* regression model to assess the relationships between two dietary          */
/* components and a health outcome.  For this analysis, the example data     */
/* were simulated to include data from 100,000 participants, and the food    */
/* frequency questionnaire (FFQ) is the main dietary instrument.  The        */
/* dietary components are red meat and energy, and the health outcome is a   */
/* binary event variable.  The simulated data include a calibration substudy */
/* of 1,000 participants with repeated intake measurements from 24-hour      */
/* dietary recalls.                                                          */
/*                                                                           */
/* This program fits a bivariate measurement error model to obtain values    */
/* needed in a subsequent analysis step.                                     */
/*                                                                           */
/* This analysis uses bootstrap variance estimation, so the bivariate        */
/* measurement error model and logistic model are fit using the original     */
/* data set and 200 replicate data sets.  The following replicfirst and      */
/* repliclast macro variables allow specification of a range of replicates.  */
/* This example program considers the original data set (i.e. replicate 0).  */
/*****************************************************************************/

%let replicfirst = 0;
%let repliclast  = 0;



title1 "Fit Univariate and Bivariate Measurement Error Models Using MLE with FFQ as Main Instrument";
title2 "Predict Intake and Perform Regression Calibration";
title3 "Assess Relationships between Two Dietary Components and a Health Outcome";



***********************************************************************;
*** The path must be modified in the following lines to specify the ***;
*** location of the SAS macros, the SAS data files, and the SAS     ***;
*** library that will be used for output                            ***;
***********************************************************************;

%let home = /prj/dcp/statprog/meas.err/develop.public.resources.stat.meth;

*** Include the required macros ***;
%include "&home/include.files.macros/nlmixed_bivariate_macro_v1.2.sas";

*** Output data library ***;
libname outlib "&home/bivar_epidemiology_example3_mle_mainffq/outlib";



*************************************************************************************************;
*** Global macro variable to test for the successful execution of the NLMIXED_BIVARIATE macro ***;
*************************************************************************************************;

%global status;



*********************************************************************;
*** Import the simulated data and bootstrap data for this example ***;
*********************************************************************;

data simdata_and_bootdata;
  set outlib.simdata_and_bootdata;
run;



**********************************************;
*** Create a calibration substudy data set ***;
**********************************************;

data calib_and_bootdata;
  set simdata_and_bootdata;
  if n(of r11 r12 r21 r22) > 0;
run;



*****************************************************************************************************************;
*** Import the data set with the initial parameter estimates for dietary component 1 and dietary component 2. ***;
*** Rename the variables needed for the NLMIXED_BIVARIATE macro.                                              ***;
*****************************************************************************************************************;

data init_parms(keep = p1_intercept p1_q1 p1_q2 logsdu1 a1_intercept a1_q1 a1_q2 a1_lambda a1_logsde
                       logsdu2 a2_intercept a2_q1 a2_q2 a2_lambda a2_logsde logsdu3 corr_u1u2);

  merge outlib._parms_u1(rename = (p_intercept = p1_intercept
                                   p_q1        = p1_q1
                                   p_q2        = p1_q2
                                   a_intercept = a1_intercept
                                   a_q1        = a1_q1
                                   a_q2        = a1_q2
                                   a_lambda    = a1_lambda
                                   a_logsde    = a1_logsde
                                   ))

        outlib._parms_u2(rename = (a_intercept = a2_intercept
                                   a_q1        = a2_q1
                                   a_q2        = a2_q2
                                   a_lambda    = a2_lambda
                                   a_logsde    = a2_logsde
                                   logsdu2     = logsdu3
                                   ));

run;



*********************************************************************************;
*** The macro fit_models_replicate_loop is used to call the NLMIXED_BIVARIATE ***;
*** macro using the original study data (i.e. replicate data set 0) and each  ***;
*** of the replicate data sets for bootstrap variance estimation              ***;
*** (i.e. replicate data sets 1, 2, ..., 200)                                 ***;
*********************************************************************************;

%macro fit_models_replicate_loop;

  %do replicnum = &replicfirst %to &repliclast;

    title4 "Replicate &replicnum";
    data select_replicate;
      set calib_and_bootdata (where = (replicate=&replicnum));
    run;



    *****************************************************************************************************;
    *** Create a data set for the NLMIXED_BIVARIATE macro.                                            ***;
    *** This macro requires an input data set that includes one or more observations for each person. ***;
    *****************************************************************************************************;

    data calibmrec;

      set select_replicate;

      ********************************************;
      *** Output record for 1st 24-hour recall ***;
      ********************************************;

      if n(of r11 r21) > 0 then do;

        repeat = 1;

        R1 = r11;
        R2 = r21;

        output;

      end;

      ********************************************;
      *** Output record for 2nd 24-hour recall ***;
      ********************************************;

      if n(of r12 r22) > 0 then do;

        repeat = 2;

        R1 = r12;
        R2 = r22;

        output;

      end;

    run;



    ***************************************************************************************;
    *** Calculate the smallest positive 24-hour recall value for each dietary component ***;
    ***************************************************************************************;

    proc univariate data=calibmrec noprint;
      where R1 > 0;
      var R1;
      output out=outmin_R1 min=min_a1;
    run;

    proc univariate data=calibmrec noprint;
      where R2 > 0;
      var R2;
      output out=outmin_R2 min=min_a2;
    run;



    /*******************************************************************************
    *                                                                              *
    * SAS macro NLMixed_Bivariate fits a bivariate model for two foods/nutrients.  *
    * The first food/nutrient can be episodically consumed or consumed every       *
    * day, while the second food/nutrient is assumed to be consumed every day.     *
    *                                                                              *
    * Model for episodically consumed foods/nutrients (two-part model):            *
    * For episodically consumed foods/nutrients, the macro fits a two-part         *
    * nonlinear mixed model, where the first part is the probability to            *
    * consume and the second part is the amount consumed on a consumption day.     *
    * The model allows for covariates in each part, includes a random effect       *
    * for each part, and allows the random effects to be correlated.               *
    *                                                                              *
    * Model for foods/nutrients consumed every day (one-part model):               *
    * For foods/nutrients consumed every day, the macro fits a one-part            *
    * nonlinear mixed model of the amount consumed (the probability to consume     *
    * is assumed to be 1). The model allows for covariates and includes a          *
    * random effect.                                                               *
    *                                                                              *
    * For a food/nutrient that is consumed nearly every day by nearly everyone,    *
    * so that the number of zero values is small, it may be preferable to use      *
    * the one-part (consumed every day) model, since the two-part model may        *
    * have trouble modeling the probability to consume in such a situation.        *
    *                                                                              *
    * Note, however, that the one-part model requires all responses to be          *
    * greater than zero (zero values are treated as missing values).               *
    * Before fitting the one-part model to a food/nutrient that has some zero      *
    * values, replace the zero values with a small positive value, such as         *
    * half the smallest observed nonzero value.                                    *
    *                                                                              *
    * Note: Initial parameter estimates must be supplied by the user.              *
    * They can be estimated using SAS macro NLMixed_Univariate.                    *
    *                                                                              *
    * The macro calls the NLMixed procedure to fit the model.                      *
    *                                                                              *
    ********************************************************************************
    *                                                                              *
    * Macro Parameters:                                                            *
    *                                                                              *
    *    Required Parameters:                                                      *
    *       data          = name of SAS data set containing the data to be         *
    *                       analyzed. The data set has multiple observations       *
    *                       for each subject, one for each reptition of the        *
    *                       24-hour recall (or other dietary instrument).          *
    *       subject       = name of the variable that uniquely identifies each     *
    *                       subject (i.e., ID variable).                           *
    *       repeat        = name of the variable that indexes repeated             *
    *                       observations for each subject.                         *
    *       response1     = name of first food/nutrient variable to be modeled     *
    *                       (24-hour recall variable for first food/nutrient).     *
    *       response2     = name of second food/nutrient variable to be modeled    *
    *                       (24-hour recall variable for second food/nutrient).    *
    *       modeltype    = model for first food/nutrient:                          *
    *                       to fit the two-part (epsisodic) model, specify         *
    *                          modeltype = TWOPART                                 *
    *                       to fit the one-part (every day) model, specify         *
    *                          modeltype = ONEPART                                 *
    *       init_parms    = name of SAS data set that contains initial             *
    *                       parameter estimates. See the description of output     *
    *                       data set parms_b (below) for further information.      *
    *                                                                              *
    *    Optional Parameters:                                                      *
    *       covars_prob1  = list of variables that are covariates in the           *
    *                       probability part of the two-part model for the         *
    *                       first food/nutrient.                                   *
    *                       if modeltype = ONEPART, then covars_prob is ignored.   *
    *       covars_amt1   = list of variables that are covariates in the           *
    *                       one-part model or the amount part of the               *
    *                       two-part model for the first food/nutrient.            *
    *       covars_amt2   = list of variables that are covariates in the           *
    *                       one-part model for the second food/nutrient            *
    *       link          = link function for the probability part of the two-     *
    *                       part model for the first food/nutrient.                *
    *                       to fit a logistic model, specify                       *
    *                          link = logit                                        *
    *                       to fit a probit model, specify                         *
    *                          link = probit                                       *
    *                       by default, link = probit.                             *
    *                       if modeltype = ONEPART, then link is ignored.          *
    *       lambda1       = Box-Cox transformation parameter for the first         *
    *                       food/nutrient. If lambda1 is not specified, then       *
    *                       it is estimated as part of the model.                  *
    *       lambda2       = Box-Cox transformation parameter for the second        *
    *                       food/nutrient. If lambda2 is not specified, then       *
    *                       it is estimated as part of the model.                  *
    *       var_u1        = variance of the random effect in the probability       *
    *                       part of the model for the first food/nutrient.         *
    *                       If var_u1 is not specified, then it is estimated       *
    *                       as part of the model.                                  *
    *                       if modeltype = ONEPART, then var_u1 is ignored.        *
    *       var_u2        = variance of the random effect in the amount            *
    *                       part of the model for the first food/nutrient.         *
    *                       If var_u2 is not specified, then it is estimated       *
    *                       as part of the model.                                  *
    *       var_u3        = variance of the random effect in the amount            *
    *                       part of the model for the second food/nutrient.        *
    *                       If var_u3 is not specified, then it is estimated       *
    *                       as part of the model.                                  *
    *       threshold     = Y to fit a latent variable threshold model.            *
    *                     = N otherwise. by default, threshold = Y.                *
    *                       if threshold = Y, then the probit model is fit.        *
    *                       if modeltype = ONEPART, then threshold is ignored.     *
    *       replicate_var = name of the sampling weight variable if the data       *
    *                       is from a complex survey with weights.                 *
    *                       by default, the macro performs an unweighted           *
    *                       analysis (assumes a simple random sample).             *
    *       nloptions     = options for the NLMixed procedure that are             *
    *                       appended to the PROC NLMIXED statement, e.g.,          *
    *                        nloptions = technique=newrap maxiter=200,             *
    *       print         = Y to print the output from the model.                  *
    *                     = N to supress printing the output from the model.       *
    *                     = V (verbose) to print extra output.                     *
    *                       by default, print = Y.                                 *
    *       ntitle        = number of titles defined by the user.                  *
    *                       by default, ntitle = 2.                                *
    *                                                                              *
    ********************************************************************************
    *                                                                              *
    * Output Data Sets:                                                            *
    *                                                                              *
    *   parms_b = data set containing parameter estimates for the model.           *
    *             parms_b contains the following variables:                        *
    *                                                                              *
    *                 A1_Intercept = intercept in the amount part of the model     *
    *                                for the first food/nutrient.                  *
    *                 A1_varname   = regression slope for covariate "varname"      *
    *                                in the amount part of the model for the       *
    *                                first food/nutrient.                          *
    *                 A2_Intercept = intercept for the second food/nutrient.       *
    *                 A2_varname   = regression slope for covariate "varname"      *
    *                                for the second food/nutrient.                 *
    *                 A1_LogSDe    = Log(Sqrt(Var_e2))                             *
    *                 A2_LogSDe    = Log(Sqrt(Var_e3))                             *
    *                 z_e2e3       = Fisher transformation of Corr_e2e3:           *
    *                                  z = ln[(1+corr)/(1-corr)] / 2               *
    *                 Var_e2       = variance of within-person error e2 (amount    *
    *                                part of model for first food/nutrient).       *
    *                 Var_e3       = variance of within-person error e3 (second    *
    *                                food/nutrient).                               *
    *                 Var_u2       = variance of random effect u2 (amount part     *
    *                                of model for first food/nutrient).            *
    *                 Var_u3       = variance of random effect u3 (second          *
    *                                food/nutrient).                               *
    *                                of the model for the second food).            *
    *                 Cov_e2e3     = covariance of random errors e2 and e3.        *
    *                 Corr_e2e3    = correlation of random errors e2 and e3.       *
    *                 Cov_u2u3     = covariance of random effects u2 and u3.       *
    *                 Corr_u2u3    = correlation of random effects u2 and u3.      *
    *                                                                              *
    *             if fitting the two-part model for the first food/nutrient,       *
    *             then parms_b also contains the following variables:              *
    *                                                                              *
    *                 P1_Intercept = intercept in the prob. part of the model      *
    *                                for the first food/nutrient.                  *
    *                 P1_varname   = regression slope for covariate "varname"      *
    *                                in the prob. part of the model for the        *
    *                                first food/nutrient.                          *
    *                 Var_u1       = variance of random effect u1 (prob. part      *
    *                                of model for first food/nutrient).            *
    *                 Cov_u1u2     = covariance of random effects u1 and u2.       *
    *                 Cov_u1u3     = covariance of random effects u1 and u3.       *
    *                 Corr_u1u2    = correlation of random effects u1 and u2.      *
    *                 Corr_u1u3    = correlation of random effects u1 and u3.      *
    *                                                                              *
    *             note: initial parameter estimates must be supplied by the        *
    *                   user using the init_parms option.                          *
    *                   the user-supplied data set will have the same variables    *
    *                   as data set parms_b, except it should not include the      *
    *                   following variables:                                       *
    *                         z_e2e3                                               *
    *                         Cov_e2e3                                             *
    *                         Corr_e2e3                                            *
    *                         Cov_u1u2 Cov_u1u3 Cov_u2u3                           *
    *                         Corr_u1u3 Corr_u2u3                                  *
    *                   All the necessary initial parameter estimates can be       *
    *                   estimated using the SAS macro NLMixed_Univariate.          *
    *                                                                              *
    *   pred_x_b = data set containing predicted values for the model.             *
    *              pred_x_b contains all the variables in the input data set,      *
    *              plus the following variables:                                   *
    *                                                                              *
    *                 pred_x_a1 = predicted mean amount on consumption day for     *
    *                             the first food/nutrient.                         *
    *                 pred_x_a2 = predicted mean amount for the second             *
    *                             food/nutrient.                                   *
    *                                                                              *
    *             if fitting the two-part model for the first food/nutrient,       *
    *             then pred_x_b also contains the following variable:              *
    *                                                                              *
    *                 pred_x_p1 = predicted probability of consumption for         *
    *                             the first food/nutrient.                         *
    *                                                                              *
    *******************************************************************************/

    ***********************************************************************************************************************;
    *** Call NLMIXED_BIVARIATE to fit a bivariate nonlinear mixed model for dietary component 1 and dietary component 2 ***;
    ***********************************************************************************************************************;

    %let status = 1;

    %nlmixed_bivariate(data          = calibmrec,
                       subject       = replicaterowid,
                       repeat        = repeat,
                       response1     = R1,
                       response2     = R2,
                       modeltype     = TWOPART,
                       init_parms    = init_parms,
                       covars_prob1  = q1 q2,
                       covars_amt1   = q1 q2,
                       covars_amt2   = q1 q2,
                       link          = PROBIT,
                       lambda1       = ,
                       lambda2       = ,
                       var_u1        = ,
                       var_u2        = ,
                       var_u3        = ,
                       corr_u1u2     = ,
                       corr_u1u3     = ,
                       corr_u2u3     = ,
                       threshold     = Y,
                       replicate_var = ,
                       nloptions     = technique=newrap,
                       print         = Y,
                       ntitle        = 4
                       );



    ********************************************************************************************;
    *** If the NLMIXED_BIVARIATE macro fails then exit the loop and begin the next iteration ***;
    ********************************************************************************************;

    data _null_;
      set conv_b;
      call symput("status", strip(put(status, 6.)));
    run;



    **************************************************************************************************;
    *** If NLMIXED_BIVARIATE is successful, save the bivariate parameter estimates and min amounts ***;
    **************************************************************************************************;

    %if &status = 0 %then %do;

      data outlib._parms_b&replicnum;
        if _n_ = 1 then do;
          set outmin_R1(keep = min_a1);
          set outmin_R2(keep = min_a2);
        end;
        set parms_b;
      run;

    %end;



    %else %do;

      ****************************************************************************************;
      *** Delete parameter estimates for the current iteration if NLMIXED_BIVARIATE fails  ***;
      ****************************************************************************************;

      %put NLMIXED_BIVARIATE was not successful for replicate data set &replicnum;

      proc datasets nolist lib=outlib;
        delete _parms_b&replicnum;
      run;

    %end;

  %end;   *** End of the replicate loop ***;

%mend fit_models_replicate_loop;



************************************************;
*** Call the fit_models_replicate_loop macro ***;
************************************************;

%fit_models_replicate_loop;
