options mergenoby=nowarn;

/***************************************************************************/
/* The following files are required to run this example:                   */
/*   distrib_bivariate.macro.v1.1.sas                                      */
/*   percentiles_survey.macro.v1.1.sas                                     */
/***************************************************************************/
/*                                                                         */
/***************************************************************************/
/* This example analysis fits univariate and bivariate measurement error   */
/* models for two dietary components that are consumed almost daily.  It   */
/* also estimates the distribution of the ratio of usual intakes for two   */
/* dietary components.  For this analysis, the example data is from the    */
/* 2001-2004 NHANES public use data and includes children ages 1-8, males  */
/* aged 9+ years, and females aged 9+ years.  The 24-hour dietary recall   */
/* is the main instrument and the two dietary components are saturated fat */
/* and energy.  The example data include repeated intake measurements from */
/* 24-hour dietary recalls.                                                */
/*                                                                         */
/* This analysis uses balanced repeated replication (BRR) variance         */
/* estimation, so the bivariate measurement error model and distribution   */
/* of usual intake use the original weight and 32 BRR weights.  The        */
/* following replicfirst and repliclast macro variables allow              */
/* specification of a range of replicates.  This program considers the     */
/* original data set (i.e. replicate 0).                                   */
/***************************************************************************/

%let replicfirst = 0;
%let repliclast  = 0;



title1 "Fit Bivariate Measurement Error Model Using MLE with 24-Hour Recall as Main Instrument";
title2 "Estimate Distribution of the Ratio of Usual Intakes for Two Dietary Components";



***********************************************************************;
*** The path must be modified in the following lines to specify the ***;
*** location of the SAS macros, the SAS data files, and the SAS     ***;
*** library that will be used for output                            ***;
***********************************************************************;

%let home = /prj/dcp/statprog/meas.err/develop.public.resources.stat.meth;

*** Include the required macros ***;
%include "&home/include.files.macros/distrib_bivariate.macro.v1.1.sas";
%include "&home/include.files.macros/percentiles_survey.macro.v1.1.sas";

*** Output data library ***;
libname outlib "&home/bivar_surveillance_example1_mle_main24hr/outlib";



***************;
*** Formats ***;
***************;

proc format;
  value sexfmt
    1 = "Male"
    2 = "Female";
  value stratfmt
    1 = "Children 1-8"
    2 = "Males 9+"
    3 = "Females 9+";
  value subfmt
    1 = "Children 1-3"
    2 = "Children 4-8"
    3 = "Males 9-13"
    4 = "Males 14-18"
    5 = "Males 19-30"
    6 = "Males 31-50"
    7 = "Males 51-70"
    8 = "Males 71+"
    9 = "Males 19+"
   10 = "Females 9-13"
   11 = "Females 14-18"
   12 = "Females 19-30"
   13 = "Females 31-50"
   14 = "Females 51-70"
   15 = "Females 71+"
   16 = "Females 19+"
   17 = "All 1+";
run;



***********************************************************************************;
*** The macro distrib_pctl_replicate_loop is used to call the DISTRIB_BIVARIATE ***;
*** and PERCENTILES_SURVEY macros to estimate the percentiles of the population ***;
*** distribution of the ratio of usual intakes of saturated fat and energy.     ***;
*** The macro loop uses the original study data (i.e. replicate data set 0) and ***;
*** each of the replicate datasets (i.e. replicate datasets 1, 2, ..., 32) for  ***;
*** balanced repeated replication (BRR) variance estimation                     ***;
***********************************************************************************;

%macro distrib_pctl_replicate_loop;

  %do replicnum = &replicfirst %to &repliclast;

    title4 "Replicate &replicnum";


      ******************************************************************************************;
      *** Get parameter estimates and predicted values calculated by macro NLMIXED_BIVARIATE ***;
      ******************************************************************************************;

      data parms;
        set outlib.parms_b_sfat_energy&replicnum;
        format stratum stratfmt.;
      run;

      data pred;
        set outlib.pred_b_sfat_energy&replicnum;
        format sex sexfmt. stratum stratfmt.;
      run;

      proc sort data=pred;
        by stratum id repeat;
      run;



      *************************************************************************************************;
      *** For predicted values, create two records per subject, one for weekday and one for weekend ***;
      *************************************************************************************************;

      data pred;
        merge pred parms(keep=stratum a1_repeat2 a2_repeat2 a1_weekend a2_weekend);
          by stratum;
        run;

      data pred (drop=a1_repeat2 a2_repeat2 a1_weekend a2_weekend);
        set pred;
          by stratum id;

        if first.id;  /* keep first record per subject. */


        ***************************************************************************************;
        *** The following code assumes that "repeat2" is a covariate in the model:          ***;
        ***   repeat2 = 0 for the first administered 24-hour dietary recall (drddaycd = 1)  ***;
        ***   repeat2 = 1 for the second administered 24-hour dietary recall (drddaycd = 2) ***;
        ***************************************************************************************;

        ********************************************************************;
        *** For each subject, calculate predicted value when repeat2 = 0 ***;
        ********************************************************************;

        if repeat2 = 1 then do;
          repeat  = 1;
          repeat2 = 0;
          pred_x_a1 = pred_x_a1 - a1_repeat2;
          pred_x_a2 = pred_x_a2 - a2_repeat2;
        end;


        ******************************************************************************;
        *** The following code assumes that "weekend" is a covariate in the model: ***;
        ***   weekend = 0 if the 24-hour dietary recall was for Monday-Thursday    ***;
        ***   weekend = 1 if the 24-hour dietary recall was for Friday-Sunday      ***;
        ******************************************************************************;

        *************************************;
        *** Create record for weekend = 0 ***;
        *************************************;

        if weekend = 1 then do;
          weekend = 0;
          pred_x_a1 = pred_x_a1 - a1_weekend;
          pred_x_a2 = pred_x_a2 - a2_weekend;
          end;

        day_wgt = 4/7;
        output;


        *************************************;
        *** Create record for weekend = 1 ***;
        *************************************;

        weekend = 1;
        pred_x_a1 = pred_x_a1 + a1_weekend;
        pred_x_a2 = pred_x_a2 + a2_weekend;
        day_wgt = 3/7;
        output;

      run;



      *****************************************************************************************************;
      *** For each dietary component, create macro variable that equals half the minimum nonzero amount ***;
      *****************************************************************************************************;

      data _null_;
        set pred;
          by stratum;
        if (first.stratum);

        min_a1 = min_a1 / 2;
        if stratum = 1 then call symput("min_a11",strip(put(min_a1, best12.)));
        if stratum = 2 then call symput("min_a21",strip(put(min_a1, best12.)));
        if stratum = 3 then call symput("min_a31",strip(put(min_a1, best12.)));

        min_a2 = min_a2 / 2;
        if stratum = 1 then call symput("min_a12",strip(put(min_a2, best12.)));
        if stratum = 2 then call symput("min_a22",strip(put(min_a2, best12.)));
        if stratum = 3 then call symput("min_a32",strip(put(min_a2, best12.)));
      run;


      *************************************************;
      *** Create separate data set for each stratum ***;
      *************************************************;

      data parms1 parms2 parms3;
        set parms;
        if stratum = 1 then output parms1;
        if stratum = 2 then output parms2;
        if stratum = 3 then output parms3;
      run;

      data pred1 pred2 pred3;
        set pred;
        if stratum = 1 then output pred1;
        if stratum = 2 then output pred2;
        if stratum = 3 then output pred3;
      run;



    *********************************;
    *** Delete unneeded data sets ***;
    *********************************;

    proc datasets lib=work nolist;
      delete parms pred;
    run;



    /****************************************************************************
    *                                                                           *
    * SAS Macro Distrib_Bivariate estimates the bivariate distribution of true  *
    * usual intake of two foods/nutrients using Monte Carlo simulation.         *
    * The first food/nutrient can be episodically consumed or consumed every    *
    * day, while the second food/nutrient is assumed to be consumed every day.  *
    *                                                                           *
    * Macro Distrib_Bivariate reads data sets parms_b and pred_x_b output by    *
    * SAS macro NLMixed_Bivariate, and uses Monte Carlo simulation of the       *
    * random effects to generate the distribution of true usual intake.         *
    *                                                                           *
    * The method for two foods/nutrients that are consumed every day is         *
    * described in:                                                             *
    *    Freedman LS, et al. The population distribution of ratios of usual     *
    *    intake of dietary components that are consume every day can be         *
    *    estimated from repeated 24-hour recalls. J Nutr. 2010;140:111-116.     *
    *                                                                           *
    *****************************************************************************
    *                                                                           *
    * Macro Parameters:                                                         *
    *                                                                           *
    *    Required Parameters:                                                   *
    *       param      = name of SAS data set containing the estimated model    *
    *                    parameters from macro NLMixed_Bivariate.               *
    *       predicted  = name of SAS data set containing the predicted values   *
    *                    calculated by macro NLMixed_Bivariate.                 *
    *                    typically, each subject has only one predicted value,  *
    *                    so has only one observation in the data set.           *
    *                    some models, however, allow a subject to have          *
    *                    different predicted values on different days (e.g.,    *
    *                    subjects may eat differently on weekends than on       *
    *                    weekdays). for such models, each subject should have   *
    *                    one observation for each unique predicted value.       *
    *                    macro Distrib_Univariate will then calculate true      *
    *                    usual intake as a weighted average of usual intake on  *
    *                    the different days (see day_wht, below).               *
    *       subject    = name of the variable that uniquely identifies each     *
    *                    subject in the predicted data set (i.e., ID variable). *
    *       modeltype  = model for first food/nutrient:                         *
    *                       to fit the two-part (epsisodic) model, specify      *
    *                          modeltype = TWOPART                              *
    *                       to fit the one-part (every day) model, specify      *
    *                          modeltype = ONEPART                              *
    *                                                                           *
    *    Optional Parameters:                                                   *
    *       link       = link function for the probability part of the two-     *
    *                    part model. to fit a logistic model, specify           *
    *                       link = logit                                        *
    *                    to fit a probit model, specify                         *
    *                       link = probit                                       *
    *                    by default, link = logit.                              *
    *                    if modeltype = ONEPART, then link is ignored.          *
    *       nsim_mc    = number of pseudo-individuals to simulate for each      *
    *                    real individual in the data set. if the data set has   *
    *                    n subjects, then the monte carlo distribution will     *
    *                    have nsim_mc * n pesudo-individuals.                   *
    *                    by default, nsim_mc = 1.                               *
    *       day_wgt    = name of the "day weight" variable in the predicted     *
    *                    data set. if the model allows subjects to have         *
    *                    different predicted values on different days (see      *
    *                    predicted, above), then macro distrib will calculate   *
    *                    each subject's true usual intake as a weighted average *
    *                    of usual intake on the different days (using the       *
    *                    weights in the "day weight" variable) . for example,   *
    *                    if each subject has one predicted value for weekends   *
    *                    (Friday-Sunday) and one predicted value for weekdays   *
    *                    (Monday-Thursday), then the day weight variable should *
    *                    equal 3/7 for the weekend predicted value and 4/7 for  *
    *                    the weekday predicted value.                           *
    *                    by default, if subjects have multiple predicted        *
    *                    values, then true usual intake is calculated as an     *
    *                    unweighted average of usual intake on different days.  *
    *       min_a1     = minimum true usual intake for a one-part model, or     *
    *                    mininum true usual amount on consumption day for a     *
    *                    two-part model, for the first food/nutrient.           *
    *                    the monte carlo method generates amount consumed on    *
    *                    a transformed scale, then back-transforms it to the    *
    *                    original scale. occasionally, the generated amount is  *
    *                    too small to be back-transformed. when this happens,   *
    *                    amount on the original scale is set to min_a1.         *
    *                    by default, min_a1 = 0.                                *
    *       min_a2     = minimum true usual intake for the second food/nutrient.*
    *                    by default, min_a2 = 0.                                *
    *       backtran   = 1 to use a numerical integration method to integrate   *
    *                    back-transformed reported intake over the distribution *
    *                    of within-person error.                                *
    *                  = 2 to use a Taylor linearization approximation to the   *
    *                    integral (not recommended).                            *
    *                  = 3 to back-transform without integrating over the       *
    *                    distribution of within-person error. (not recommended).*
    *                    by default, backtran = 1.                              *
    *       print      = Y to print summary of the monte carlo distribution.    *
    *                  = N to supress printing summary of the distribution.     *
    *                    by default, print = Y.                                 *
    *       ntitle     = number of titles defined by the user.                  *
    *                    by default, ntitle = 2.                                *
    *                                                                           *
    *****************************************************************************
    *                                                                           *
    * Output Data Set:                                                          *
    *                                                                           *
    *   _mcsim = data set containing the monte carlo distribution of true usual *
    *            intake of the food/nutrient.  if the the predicted data set    *
    *            has n subjects, _mcsim will have nsim_mc * n pseudo-subjects.  *
    *            _mcsim contains all the numeric variables in the predicted     *
    *            data set, plus the following variables:                        *
    *                                                                           *
    *               t1    = true usual intake of first food/nutrient.           *
    *               t2    = true usual intake of second food/nutrient.          *
    *               a1    = true usual amount on consumption day for first      *
    *                       food/nutrient.                                      *
    *               bc_a1 = Box-Cox transformed true usual amount a1.           *
    *               bc_t2 = Box-Cox transformed true usual intake t2.           *
    *                                                                           *
    *            if fitting the two-part model, then _mcsim also contains       *
    *            the following variables:                                       *
    *                                                                           *
    *               p1             = true probability to consume.               *
    *               linear_pred_p1 = transformed true probability to consume:   *
    *                                 = log(p1/(1-p1))  for logistic model      *
    *                                 = probit(p1)      for probit model        *
    *                                                                           *
    ****************************************************************************/

    ***********************************************************************;
    *** Set seed for random number generator for Monte Carlo simulation ***;
    ***********************************************************************;

    %global seed;
    %let seed = 6594165;


    **************************************************************;
    *** Call macro DISTRIB_BIVARIATE to generate a Monte Carlo ***;
    *** distribution of usual intake for each stratum          ***;
    **************************************************************;

    title3 "Stratum 1 = Chilren 1-8";

    %distrib_bivariate(param          = parms1,
                       predicted      = pred1,
                       subject        = id,
                       modeltype      = ONEPART,
                       nsim_mc        = 100,
                       day_wgt        = day_wgt,
                       min_a1         = &min_a11,
                       min_a2         = &min_a12,
                       print          = N,
                       ntitle         = 3
                       );

    data mcsim;
      set _mcsim;
    run;


    title3 "Stratum 2 = Males 9+";

    %distrib_bivariate(param          = parms2,
                       predicted      = pred2,
                       subject        = id,
                       modeltype      = ONEPART,
                       nsim_mc        = 100,
                       day_wgt        = day_wgt,
                       min_a1         = &min_a21,
                       min_a2         = &min_a22,
                       print          = N,
                       ntitle         = 3
                       );


    data mcsim;
      set mcsim _mcsim;
        by stratum;
    run;


    title3 "Stratum 3 = Females 9+";

    %distrib_bivariate(param          = parms3,
                       predicted      = pred3,
                       subject        = id,
                       modeltype      = ONEPART,
                       nsim_mc        = 100,
                       day_wgt        = day_wgt,
                       min_a1         = &min_a31,
                       min_a2         = &min_a32,
                       print          = N,
                       ntitle         = 3
                       );


    data mcsim;
      set mcsim _mcsim;
        by stratum;
    run;


    proc datasets lib=work nolist;
      delete _mcsim;
    run;

    title2;



    ****************************************;
    *** Calculate usual nutrient density ***;
    ****************************************;

    data mcsim;
      set mcsim;

      *t_density = 1000 * (t1 / t2);        *** nutrient/food per 1000 kcal;
      t_density =  100 * ((9 * t1) / t2);   *** percent calories from saturated fat;
    run;



    /*****************************************************************************/
    /*                                                                           */
    /* SAS Macro Percentiles_Survey estimates means, standard deviations and     */
    /* percentiles for survey data.                                              */
    /*                                                                           */
    /* if sampling weights are specified, estimates are based on a weighted      */
    /* empirical distribution. Otherwise, they are based on an unweighted        */
    /* empirical distribution.                                                   */
    /*                                                                           */
    /*****************************************************************************/
    /*                                                                           */
    /* Macro Parameters:                                                         */
    /*                                                                           */
    /*    Required Parameters:                                                   */
    /*       data          = name of SAS data set containing the data to be      */
    /*                       analyzed.                                           */
    /*       var           = name of variable for which means and percentiles    */
    /*                       are to be estimated.                                */
    /*                                                                           */
    /*    Optional Parameters:                                                   */
    /*       byvar         = list of variables which define by-groups in the     */
    /*                       data. means and percentles will be estimated        */
    /*                       separately for each by-group.                       */
    /*       weight        = name of the sampling weight variable, if the data   */
    /*                       is from a complex survey with weights.              */
    /*                       by default, the macro assumes equal weights.        */
    /*       cutpoints     = list of values for which cut-point probabilities    */
    /*                       are to be calculated (Prob(X <= cut-point)).        */
    /*                       by default no cut-point probabilites are calculted. */
    /*       print         = Y to print means and percentiles.                   */
    /*                       N to supress printing means and percentiles.        */
    /*                       by default, print = Y.                              */
    /*       ntitle        = number of titles defined by the user.               */
    /*                       by default, ntitle = 2.                             */
    /*                                                                           */
    /*****************************************************************************/
    /*                                                                           */
    /* Output Data Set:                                                          */
    /*                                                                           */
    /*   _percentiles = data set containing estimated means and percentiles.     */
    /*                  _percentiles contains the following variables:           */
    /*                                                                           */
    /*                    mean     = estimated mean.                             */
    /*                    variance = estimated variance.                         */
    /*                    stddev   = estimated standard distribution.            */
    /*                    min      = minimum value.                              */
    /*                    max      = maximum value.                              */
    /*                    Pctile1  = estimated first percentile.                 */
    /*                    Pctile2  = estimated second percentiile.               */
    /*                    ...                                                    */
    /*                    Pctile99 = estimated 99th percentile.                  */
    /*                                                                           */
    /*                 if cut-points are specified, then _percentiles also       */
    /*                 contains the following variables:                         */
    /*                                                                           */
    /*                    Prob1    = probability for first cut-point.            */
    /*                    Prob2    = probability for second cut-point.           */
    /*                    ...                                                    */
    /*                                                                           */
    /*                 if by-group variables are specified, they are also        */
    /*                 included in data set _percentiles.                        */
    /*                                                                           */
    /*****************************************************************************/

    ****************************************************************************************;
    *** Call macro PERCENTILES_SURVEY to calculate percentiles of usual nutrient density ***;
    ****************************************************************************************;

    title3 "Table 1: Percentiles by Age and Sex";

    data mcsim2;
      set mcsim;

      Subpopulation = agegrp;
      if (sex = 2 & agegrp >= 3) then Subpopulation = Subpopulation + 7;
      format Subpopulation subfmt.;
    run;

    %percentiles_survey(data       = mcsim2,
                        byvar      = Subpopulation,
                        var        = t_density,
                        weight     = rndw&replicnum,
                        cutpoints  = 10 12 15,
                        print      = N,
                        ntitle     = 3
                        );

    data pctl;
      set _percentiles;
        by Subpopulation;
    run;


    title3 "Table 2: Percentiles for Adults 19+, by Sex";

    data mcsim2;
      set mcsim;

      if age < 19 then delete;
      if sex = 1 then Subpopulation = 9;
      else if sex = 2 then Subpopulation = 16;
      format Subpopulation subfmt.;
    run;

    %percentiles_survey(data       = mcsim2,
                        byvar      = Subpopulation,
                        var        = t_density,
                        weight     = rndw&replicnum,
                        cutpoints  = 10 12 15,
                        print      = N,
                        ntitle     = 3
                        );

    data pctl;
      set pctl _percentiles;
        by subpopulation;
    run;


    title3 "Table 3: Percentiles for All Persons 1+";

    data mcsim2;
      set mcsim;

      Subpopulation = 17;
      format Subpopulation subfmt.;
    run;

    %percentiles_survey(data       = mcsim2,
                        byvar      = Subpopulation,
                        var        = t_density,
                        weight     = rndw&replicnum,
                        cutpoints  = 10 12 15,
                        print      = N,
                        ntitle     = 3
                        );

    data pctl;
      set pctl _percentiles;
        by subpopulation;
    run;



    ************************************************;
    *** Print the summary table by subpopulation ***;
    ************************************************;

    %if &replicnum = 0 %then %do;
      title3 "Estimated Mean, Standard Deviation, Percentiles, and Cut-Points";
      title4 "By Subpopulation";

      proc print data=pctl label;
        id subpopulation;
        var Mean StdDev Pctile5 Pctile10 Pctile25 Pctile50 Pctile75 Pctile90 Pctile95 Prob1-Prob3;
        format Mean StdDev Pctile5 Pctile10 Pctile25 Pctile50 Pctile75 Pctile90 Pctile95 7.2
               Prob1-Prob3 7.2;
        label Prob1 = "Prob(X <= 10)"
              Prob2 = "Prob(X <= 12)"
              Prob3 = "Prob(X <= 15)";
      run;
    %end;



    ***********************************************************;
    *** Save means, percentiles and cut-point probabilities ***;
    ***********************************************************;

    data outlib.pctl_b_sfat_energy&replicnum;
      set pctl;
    run;


  %end;


%mend;



%distrib_pctl_replicate_loop;
