options mergenoby=nowarn;

/***************************************************************************/
/* The following files are required to run this example:                   */
/*   nlmixed_univariate_macro_v1.2.sas                                     */
/*   boxcox_survey.macro.v1.2.sas                                          */
/*   t_0104nc_may7.sas7bdat                                                */
/***************************************************************************/
/*                                                                         */
/***************************************************************************/
/* This example analysis fits univariate and bivariate measurement error   */
/* models for two dietary components that are consumed almost daily.  It   */
/* also estimates the distribution of the ratio of usual intakes for two   */
/* dietary components.  For this analysis, the example data is from the    */
/* 2001-2004 NHANES public use data and includes children ages 1-8, males  */
/* aged 9+ years, and females aged 9+ years.  The 24-hour dietary recall   */
/* is the main instrument and the two dietary components are saturated fat */
/* and energy.  The example data include repeated intake measurements from */
/* 24-hour dietary recalls.                                                */
/*                                                                         */
/* This analysis uses balanced repeated replication (BRR) variance         */
/* estimation, so the bivariate measurement error model and distribution   */
/* of usual intake use the original weight and 32 BRR weights.  The        */
/* following replicfirst and repliclast macro variables allow              */
/* specification of a range of replicates.  This program considers the     */
/* original data set (i.e. replicate 0).                                   */
/***************************************************************************/

%let replicfirst = 0;
%let repliclast  = 0;



title1 "Fit Univariate Measurement Error Model for Energy Using MLE with 24-Hour Recall as Main Instrument";
title2 "Estimate Distribution of the Ratio of Usual Intakes for Two Dietary Components";



***********************************************************************;
*** The path must be modified in the following lines to specify the ***;
*** location of the SAS macros, the SAS data files, and the SAS     ***;
*** library that will be used for output                            ***;
***********************************************************************;

%let home = /prj/dcp/statprog/meas.err/develop.public.resources.stat.meth;

*** Include the required macros ***;
%include "&home/include.files.macros/nlmixed_univariate_macro_v1.2.sas";
%include "&home/include.files.macros/boxcox_survey.macro.v1.2.sas";

*** Input data library ***;
libname inlib "&home/data";

*** Output data library ***;
libname outlib "&home/bivar_surveillance_example1_mle_main24hr/outlib";



***************;
*** Formats ***;
***************;

proc format;
  value sexfmt
    1 = "Male"
    2 = "Female";
  value stratfmt
    1 = "Children 1-8"
    2 = "Males 9+"
    3 = "Females 9+";
run;



************************************************************************************;
*** A macro is used to loop the original weight variable and the 32 BRR weights  ***;
*** through calls to the NLMIXED_UNIVARIATE macro.  To accomplish this           ***;
*** seamlessly the name of the original weight variable and the BRR weight       ***;
*** variables must have the same root and must end in 0-32, e.g. rndw0-rndw32,   ***;
*** where rndw0 is the original weight. (In this example the original weight is  ***;
*** rndw0.) The following macro variables are used to ensure proper naming.      ***;
************************************************************************************;

%let weight_var_orig = rndw;
%let weight_var      = rndw;



data nhanes (keep = stratum id sex female age race repeat repeat2 dayofweek weekend agegrp rndw0 rndw1-rndw32 recall_dc
                    age_1to3 age_4to8 age_9to13 age_14to18 age_19to30 age_31to50 age_51to70 age_71plus);

  set inlib.t_0104nc_may7;


  ***********************************************************************;
  *** The survey weight variable name does not end with zero (0),     ***;
  *** so it is renamed.  The new name has the same root name of the   ***;
  *** BRR weight variables and has 0 appended.                        ***;
  *** In this example the original weight variable name is rndw.      ***;
  *** The BRR weights are named rndw1-rndw32.                         ***;
  ***********************************************************************;

  rename &weight_var_orig = &weight_var.0;


  *************************;
  *** Rename covariates ***;
  *************************;

  stratum   = stra;
  id        = seqn;
  sex       = riagendr;
  age       = ridageyr;
  race      = ridreth1;
  repeat    = drddaycd;
  dayofweek = drdday;
  weekend   = wkend;


  ****************************************;
  *** Subset to the strata of interest ***;
  ****************************************;

  if stratum in (1:3);


  ****************************************;
  *** Specify recall dietary component ***;
  ****************************************;

  recall_dc = DRXTKCAL;


  ***************************************;
  *** Create covariates for the model ***;
  ***************************************;

  if repeat = 2 then repeat2 = 1;
  else repeat2 = 0;

  if sex = 2 then female = 1;
  else if sex = 1 then female = 0;

  array a (*) age_1to3 age_4to8 age_9to13 age_14to18 age_19to30 age_31to50 age_51to70 age_71plus;
  do i = 1 to dim(a);
    a(i) = 0;
  end;
  a(agegrp) = 1;


  format sex sexfmt. stratum stratfmt.;

run;

proc sort data=nhanes;
  by stratum id repeat;
run;



*****************************************************************;
*** Get minimum amount on consumption day and add to data set ***;
*****************************************************************;

proc means data=nhanes noprint;
  by stratum;
  where (recall_dc > 0);
  var recall_dc;
  output out=min_a(keep=stratum min_a) min=min_a;
run;

data nhanes;
  merge nhanes min_a;
    by stratum;

  ***********************************************************************************;
  *** For one-part models, set 24HR zero values to half the minimum nonzero value ***;
  ***********************************************************************************;

  modeltype = "ONEPART";
  if (modeltype = "ONEPART" & recall_dc = 0) then recall_dc = min_a / 2;

run;



**********************************************************************************;
*** The macro fit_models_replicate_loop is used to call the NLMIXED_UNIVARIATE ***;
*** macro and fit a bivariate measurement error model using the original study ***;
*** data (i.e. replicate data set 0) and each of the replicate datasets (i.e.  ***;
*** replicate datasets 1, 2, ..., 32) for balanced repeated replication (BRR)  ***;
*** variance estimation                                                        ***;
**********************************************************************************;

%macro fit_models_replicate_loop;

  %do replicnum = &replicfirst %to &repliclast;

    title4 "Replicate &replicnum";


    **********************************************;
    *** Use macro variable to represent the    ***;
    *** replicate variables in subsequent code ***;
    **********************************************;

    %let replicvar=&weight_var&replicnum ;


    ***************************************************************;
    *** Find best Box-Cox transformation for 24HR (for details, ***;
    *** see documentation for the BOXCOX_SURVEY macro).         ***;
    *** For two-part models, delete zero values.                ***;
    ***************************************************************;

    data nhanes_boxcox;
      set nhanes;
        by stratum id;
      if first.id;
      if (modeltype ^= "ONEPART" & recall_dc = 0) then delete;
    run;
    
    %do i=1 %to 3;
      
      data stra_boxcox;
        set nhanes_boxcox;
        if stratum = &i;
      run;
      
      %boxcox_survey (data    = stra_boxcox,
                      subject = id,
                      var     = recall_dc,
                      weight  = &replicvar,
                      print   = Y,
                      ntitle  = 2);
      
      ************************************************************;
      *** Create macro variables for the Box-Cox lambda values ***;
      ************************************************************;
      
      data _null_;
        set _lambda;
        call symput("lambda&i",strip(put(lambda_recall_dc,4.2)));
      run;
      
    %end;



    ***********************************************************************************;
    *** Create separate data sets each stratum (children 1-8, males 9+, females 9+) ***;
    ***********************************************************************************;

    data nhanes1 nhanes2 nhanes3;
      set nhanes;
      if stratum = 1 then output nhanes1;
      if stratum = 2 then output nhanes2;
      if stratum = 3 then output nhanes3;
    run;



    /*****************************************************************************/
    /*                                                                           */
    /* SAS macro NLMixed_Univariate fits a univariate model for a food/nutrient. */
    /* The food/nutrient can be episodically consumed or consumed every day.     */
    /*                                                                           */
    /* Model for episodically consumed foods/nutrients (two-part model):         */
    /* For episodically consumed foods/nutrients, the macro fits a two-part      */
    /* nonlinear mixed model, where the first part is the probability to         */
    /* consume and the second part is the amount consumed on a consumption day.  */
    /* The model allows for covariates in each part, includes a random effect    */
    /* for each part, and allows the random effects to be correlated.            */
    /*                                                                           */
    /* Model for foods/nutrients consumed every day (one-part model):            */
    /* For foods/nutrients consumed every day, the macro fits a one-part         */
    /* nonlinear mixed model of the amount consumed (the probability to consume  */
    /* is assumed to be 1). The model allows for covariates and includes a       */
    /* random effect.                                                            */
    /*                                                                           */
    /* For a food/nutrient that is consumed nearly every day by nearly everyone, */
    /* so that the number of zero values is small, it may be preferable to use   */
    /* the one-part (consumed every day) model, since the two-part model may     */
    /* have trouble modeling the probability to consume in such a situation.     */
    /*                                                                           */
    /* Note, however, that the one-part model requires all responses to be       */
    /* greater than zero (zero values are treated as missing values).            */
    /* Before fitting the one-part model to a food/nutrient that has some zero   */
    /* values, replace the zero values with a small positive value, such as      */
    /* half the smallest observed nonzero value.                                 */
    /*                                                                           */
    /* The macro calls the NLMixed procedure to fit the model.                   */
    /*                                                                           */
    /*****************************************************************************/
    /*                                                                           */
    /* Macro Parameters:                                                         */
    /*                                                                           */
    /*    Required Parameters:                                                   */
    /*       data          = name of SAS data set containing the data to be      */
    /*                       analyzed. The data set has multiple observations    */
    /*                       for each subject, one for each reptition of the     */
    /*                       24-hour recall (or other dietary instrument).       */
    /*       subject       = name of the variable that uniquely identifies each  */
    /*                       subject (i.e., ID variable).                        */
    /*       repeat        = name of the variable that indexes repeated          */
    /*                       observations for each subject.                      */
    /*       response      = name of the food/nutrient variable to be modeled    */
    /*                       (24-hour recall variable for the food/nutrient).    */
    /*       modeltype     = model for food/nutrient:                            */
    /*                       to fit the two-part (epsisodic) model, specify      */
    /*                          modeltype = TWOPART                              */
    /*                       to fit the one-part (every day) model, specify      */
    /*                          modeltype = ONEPART                              */
    /*                                                                           */
    /*    Optional Parameters:                                                   */
    /*       covars_prob   = list of variables that are covariates in the        */
    /*                       probability part of the two-part model.             */
    /*                       if modeltype=ONEPART, then covars_prob is ignored.  */
    /*       covars_amt    = list of variables that are covariates in the        */
    /*                       one-part model or the amount part of the            */
    /*                       two-part model.                                     */
    /*       link          = link function for the probability part of the two-  */
    /*                       part model. to fit a logistic model, specify        */
    /*                          link = logit                                     */
    /*                       to fit a probit model, specify                      */
    /*                          link = probit                                    */
    /*                       by default, link = probit.                          */
    /*                       if modeltype = ONEPART, then link is ignored.       */
    /*       lambda        = Box-Cox transformation parameter for the amount     */
    /*                       part of the model. If lambda is not specified,      */
    /*                       then it is estimated as part of the model.          */
    /*       var_u1        = variance of the random effect in the probability    */
    /*                       part of the two-part model.                         */
    /*                       If var_u1 is not specified, then it is estimated    */
    /*                       as part of the model.                               */
    /*                       if modeltype = ONEPART, then var_u1 is ignored.     */
    /*       var_u2        = variance of the random effect in the one-part model */
    /*                       or the amount part of the two-part model.           */
    /*                       If var_u2 is not specified, then it is estimated    */
    /*                       as part of the model.                               */
    /*       indep_u       = Y if random effects u1 and u2 are independent.      */
    /*                     = N if random effects u1 and u2 are dependent.        */
    /*                       by default, indep_u = N.                            */
    /*                       if modeltype = ONEPART, then indep_u is ignored.    */
    /*       replicate_var = name of the sampling weight variable if the data    */
    /*                       is from a complex survey with weights.              */
    /*                       by default, the macro performs an unweighted        */
    /*                       analysis (assumes a simple random sample).          */
    /*       nloptions     = options for the NLMixed procedure that are          */
    /*                       appended to the PROC NLMIXED statement, e.g.,       */
    /*                          nloptions = technique=newrap maxiter=200,        */
    /*       init_parms    = name of SAS data set that contains initial          */
    /*                       parameter estimates. See the description of output  */
    /*                       data set parms_u (below) for further information.   */
    /*                       if init_parms is not specified, then the macro      */
    /*                       calculates initial parameter estimates.             */
    /*       print         = Y to print the output from the model.               */
    /*                     = N to supress printing the output from the model.    */
    /*                     = V (verbose) to print extra output.                  */
    /*                       by default, print = Y.                              */
    /*       ntitle        = number of titles defined by the user.               */
    /*                       by default, ntitle = 2.                             */
    /*                                                                           */
    /*****************************************************************************/
    /*                                                                           */
    /* Output Data Sets:                                                         */
    /*                                                                           */
    /*   parms_u = data set containing parameter estimates for the model.        */
    /*             parms_u contains the following variables:                     */
    /*                                                                           */
    /*                 A_Intercept = intercept in the amount part of the model.  */
    /*                 A_varname   = regression slope for covariate "varname"    */
    /*                               in the amount part of the model.            */
    /*                 A_LogSDe    = Log(Sqrt(Var_e))                            */
    /*                 LogSDu2     = Log(Sqrt(Var_u2))                           */
    /*                 Var_e       = variance of the within-person error in the  */
    /*                               amount part of the model.                   */
    /*                 Var_u2      = variance of the random effect in the        */
    /*                               amount part of the model.                   */
    /*                                                                           */
    /*             if fitting the two-part model, then parms_u also contains     */
    /*             the following variables:                                      */
    /*                                                                           */
    /*                 P_Intercept = intercept in the prob. part of the model.   */
    /*                 P_varname   = regression slope for covariate "varname"    */
    /*                               in the prob. part of the model.             */
    /*                 LogSDu1     = Log(Sqrt(Var_u2))                           */
    /*                 z_u1u2      = Fisher transformation of Corr_u1u2:         */
    /*                                  z = ln[(1+corr)/(1-corr)] / 2            */
    /*                 Var_u1      = variance of the random effect in the        */
    /*                               prob. part of the model.                    */
    /*                 Cov_u1u2    = covariance of random effects u1 and u2.     */
    /*                 Corr_u1u2   = correlation of random effects u1 and u2.    */
    /*                                                                           */
    /*             note: if specifying initial parameter estimates using the     */
    /*                   init_parms option, the init_parms data set should have  */
    /*                   the same variables as parms_u, except it should not     */
    /*                   include var_e, var_u2, var_u1, cov_u1u2 or corr_u1u2    */
    /*                   (these are derived parameters, i.e., functions of the   */
    /*                    other parameters).                                     */
    /*                                                                           */
    /*   pred_x_u = data set containing predicted values for the model.          */
    /*              pred_x_u contains all the variables in the input data set,   */
    /*              plus the following variable:                                 */
    /*                                                                           */
    /*                 pred_x_a = predicted mean amount on consumption day.      */
    /*                                                                           */
    /*              if fitting the two-part model, then pred_x_u also contains   */
    /*              the following variable:                                      */
    /*                                                                           */
    /*                  pred_x_p = predicted probability of consumption.         */
    /*                                                                           */
    /*****************************************************************************/
    
    ***********************************************************************************;
    *** Call macro NLMIXED_UNIVARIATE to fit nonlinear mixed model in each stratum. ***;
    *** In this example, we specify a fixed value for parameter lambda (Box-Cox     ***;
    *** transformation of amount consumed). Alternatively, one can let the macro    ***;
    *** estimate lambda by not specifying a value for macro parameter "lambda".     ***;
    ***********************************************************************************;

    title3 "Stratum 1 = Children 1-8";

    %nlmixed_univariate(data          = nhanes1,
                        subject       = id,
                        repeat        = repeat,
                        response      = recall_dc,
                        modeltype     = ONEPART,
                        covars_prob   = repeat2 weekend female age_4to8,
                        covars_amt    = repeat2 weekend female age_4to8,
                        link          = ,
                        lambda        = &lambda1,
                        var_u1        = ,
                        var_u2        = ,
                        indep_u       = N,
                        replicate_var = &replicvar,
                        nloptions     = technique=trureg,
                        init_parms    = ,
                        print         = Y,
                        ntitle        = 3
                        );

    data parms_u1;
      stratum = 1;
      set parms_u;
    run;



    title3 "Stratum 2 = Males 9+";

    %nlmixed_univariate(data          = nhanes2,
                        subject       = id,
                        repeat        = repeat,
                        response      = recall_dc,
                        modeltype     = ONEPART,
                        covars_prob   = repeat2 weekend age_14to18 age_19to30 age_31to50 age_51to70 age_71plus,
                        covars_amt    = repeat2 weekend age_14to18 age_19to30 age_31to50 age_51to70 age_71plus,
                        link          = ,
                        lambda        = &lambda2,
                        var_u1        = ,
                        var_u2        = ,
                        indep_u       = N,
                        replicate_var = &replicvar,
                        nloptions     = technique=trureg,
                        init_parms    = ,
                        print         = Y,
                        ntitle        = 3
                        );

    data parms_u2;
      stratum = 2;
      set parms_u;
    run;



    title3 "Stratum 3 = Females 9+";

    %nlmixed_univariate(data          = nhanes3,
                        subject       = id,
                        repeat        = repeat,
                        response      = recall_dc,
                        modeltype     = ONEPART,
                        covars_prob   = repeat2 weekend age_14to18 age_19to30 age_31to50 age_51to70 age_71plus,
                        covars_amt    = repeat2 weekend age_14to18 age_19to30 age_31to50 age_51to70 age_71plus,
                        link          = ,
                        lambda        = &lambda3,
                        var_u1        = ,
                        var_u2        = ,
                        indep_u       = N,
                        replicate_var = &replicvar,
                        nloptions     = technique=trureg,
                        init_parms    = ,
                        print         = Y,
                        ntitle        = 3
                        );

    data parms_u3;
      stratum = 3;
      set parms_u;
    run;



    ************************************************;
    *** Combine data sets of parameter estimates ***;
    ************************************************;

    data parms_u;
      set parms_u1 parms_u2 parms_u3;
        by stratum;
    run;



    **********************************************;
    *** Keep minimum amount on consumption day ***;
    **********************************************;

    data parms_u;
      merge parms_u min_a;
        by stratum;
    run;



    ********************************;
    *** Save parameter estimates ***;
    ********************************;

    data outlib.parms_u_energy&replicnum;
      set parms_u;
    run;


  %end;


%mend;



%fit_models_replicate_loop;
