/*****************************************************************************/
/*** The following files are required for this example:                    ***/
/***   brr_pvalue_ci_macro_v1.1.sas                                        ***/
/***   _percentiles_all_foods0-_percentiles_all_foods16                    ***/
/***                                                                       ***/
/*** This example includes 5 programs.  Programs 1a, 1b, 1c can be used to ***/
/*** fit a multivariate measurement error model, calculate mean usual      ***/
/*** Healthy Eating Index2010 (HEI2010) scores, and perform t-tests      ***/
/*** comparing mean usual HEI2010 scores using NHANES 2003-2004 data.     ***/
/*** Programs 1d and 1e can be used to extend the analysis to also         ***/
/*** calculate percentiles and cutpoint probablities for both the HEI-2010 ***/
/*** component scores and the HEI-2010 densities, conditional on           ***/
/*** categories of HEI-2010 total scores.                                  ***/
/***                                                                       ***/
/*** This SAS program illustrates the use of the BRR_PVALUE_CI macro to    ***/
/*** perform balanced repeated replication (BRR) variance estimation.  The ***/
/*** input data set includes mean usual HEI-2010 densities and scores      ***/
/*** calculated using the original weights (i.e. weights for replicate 0)  ***/
/*** and using the 14 valid BRR replicates (i.e. 1, 2, ..., 11, 13, 14,    ***/
/*** 16).                                                                  ***/
/*****************************************************************************/


/*****************************************/
/*** Specify a title for this example. ***/
/*****************************************/

title1 "Fit Multivariate Measurement Error Model Using MCMC with 24-Hour Recall as Main Instrument";
title2 "Calculate Mean Usual HEI2010 Scores and Perform T-Tests Comparing Mean Usual HEI2010 Scores";
title3 "Calculate Conditional Distributions and Cutpoint Probabilities for HEI-2010 Densities and Scores";


/***********************************************************************/
/*** The path must be modified in the following lines to specify the ***/
/*** location of the SAS macro and the SAS data file.                ***/
/***********************************************************************/

%let home = /prj/dcp/statprog/meas.err/develop.public.resources.stat.meth;

*** Include the required macros ***;
%include "&home/include.files.macros/brr_pvalue_ci_macro_v1.1.sas";

*** Input data library ***;
libname inlib  "&home/multivar_surveillance_example1_mcmc_nnc_main24hr/outlib/example1d";

*** Output data library ***;
libname outlib "&home/multivar_surveillance_example1_mcmc_nnc_main24hr/outlib/example1e";



/******************************************************************************/
/*** Import the data file of intermediate results needed for this example   ***/
/*** and create a data set with one record including distribution estimates ***/
/*** from the original data and a data set with records 1, 2, ..., 16       ***/
/*** including distribution estimates from replicate analyses performed for ***/
/*** BRR variance estimation.                                               ***/
/******************************************************************************/

data origwt_pctls(keep = replicate hei_total_score_gt50 hei_food mean min max
                         pctile1 pctile5 pctile10 pctile25 pctile50 pctile75
                         pctile90 pctile95 pctile99 prob1 prob2 prob3);
  set inlib._percentiles_all_foods0;
run;


data keep_brr_pctls(keep = replicate hei_total_score_gt50 hei_food mean min max
                         pctile1 pctile5 pctile10 pctile25 pctile50 pctile75
                         pctile90 pctile95 pctile99 prob1 prob2 prob3)
     del_brr_pctls;
  set inlib._percentiles_all_foods1-inlib._percentiles_all_foods16;

  if replicate in (12, 14) then output del_brr_pctls;
  else output keep_brr_pctls;
run;



********************************************************************;
*** To calculate the confidence intervals for the percentiles    ***;
*** the BRR_PVALUE_CI macro expects only one observation per     ***;
*** replicate.  In this example we have three groups of data,    ***;
*** total-hei_score <= 50, total_hei_score > 50, and an overall  ***;
*** estimate for total_hei_score.  In addition we have a number  ***;
*** foods.  To handle this data structure in the macro we treat  ***;
*** the foods as subgroups and execute the macro separately for  ***;
*** each group described above.  To accomplish this a utility    ***;
*** macro "subGroupforCI_V2" will be used.                       ***;
********************************************************************;

************************************************;
*** Create the appropriate grouped data sets ***;
************************************************;

data origwt_le50 origwt_gt50 origwt;
  set origwt_pctls;
  if      hei_total_score_gt50=0 then output origwt_le50;
  else if hei_total_score_gt50=1 then output origwt_gt50;
  else if hei_total_score_gt50=2 then output origwt;
run;

data brr_le50 brr_gt50 brrwt;
  set keep_brr_pctls;
  if      hei_total_score_gt50=0 then output brr_le50;
  else if hei_total_score_gt50=1 then output brr_gt50;
  else if hei_total_score_gt50=2 then output brrwt;
run;


**************************************************;
*** Use a macro variable to list the variables ***;
*** to be processed in the macro               ***;
**************************************************;

%let desc_vars = mean min max pctile5 pctile10 pctile25 pctile50 pctile75 pctile90 pctile95 prob1 prob2 prob3;



******************************************************************************;
***     Create the utility macro RUN_BRR_LOOP_BY_SUBGRP to prepare the     ***;
***               the data and call the macro BRR_PVALUE_CI                ***;
******************************************************************************;

/*****************************************************************************/
/*****************************************************************************/
/*                                                                           */
/* Description of the BRR_PVALUE_CI macro                                    */
/*                                                                           */
/*****************************************************************************/
/*                                                                           */
/*                                                                           */
/* The BRR_PVALUE_CI macro performs balanced repeated replication (BRR)      */
/* variance estimation, calculates a p-value for each user specified         */
/* parameter to test the null hypothesis that the parameter value is zero,   */
/* and calculates a confidence interval for each user specified parameter.   */
/* The macro produces an output data set including the parameter estimates,  */
/* the BRR standard error estimates, the p-value used to test the null       */
/* hypothesis that the parameter value is zero versus the alternative        */
/* hypothesis that the parameter value is not zero, and the lower and upper  */
/* confidence limits.                                                        */
/*                                                                           */
/* The macro input includes a data set with one record including parameter   */
/* estimates from the original data and a data set with records 1, 2, ..., B */
/* including parameter estimates from replicate analyses performed for BRR   */
/* variance estimation.  Optionally, Fay's BRR method can be specified.      */
/*                                                                           */
/* Reference:                                                                */
/*                                                                           */
/*   Korn EL, Graubard BI. Analysis of Health Surveys, John Wiley & Sons,    */
/*   Inc., New York, 1999.                                                   */
/*                                                                           */
/*                                                                           */
/* The syntax for calling the BRR_PVALUE_CI macro is:                        */
/*                                                                           */
/* %brr_pvalue_ci(data_orig_estimates_1rec =,                                */
/*                data_brr_estimates_brecs =,                                */
/*                param_estimate_names     =,                                */
/*                set_f_method_fay         =,                                */
/*                set_confidence_level     =,                                */
/*                set_degrees_freedom      =,                                */
/*                print                    =,                                */
/*                titles                   =                                 */
/*                );                                                         */
/*                                                                           */
/*  where                                                                    */
/*                                                                           */
/*  "data_orig_estimates_1rec"    Specifies an input data set that includes  */
/*                                one record with parameter estimates from   */
/*                                the original data.                         */
/*                                                                           */
/*  "data_brr_estimates_brecs"    Specifies an input data set that includes  */
/*                                records 1, 2, ..., B with parameter        */
/*                                estimates from replicate analyses          */
/*                                performed for balanced repeated            */
/*                                replication (BRR) variance estimation.     */
/*                                For example, record 1 includes the         */
/*                                parameter estimates obtained from          */
/*                                replicate 1, and record B includes the     */
/*                                parameter estimates obtained from          */
/*                                replicate B, i.e. the final replicate.     */
/*                                                                           */
/*  "param_estimate_names"        Specifies a list of parameter estimates    */
/*                                that are included as variables in the      */
/*                                input data sets specified for the          */
/*                                "data_orig_estimates_1rec" and             */
/*                                "data_brr_estimates_brecs" macro           */
/*                                parameters.                                */
/*                                                                           */
/*  "set_f_method_fay"            Specifies a value f that will be used to   */
/*                                define an adjustment factor 1/[(1-f)**2]   */
/*                                used to perform an appropriate adjustment  */
/*                                if Fay's BRR method was used to create the */
/*                                replicate weights.  The specified value    */
/*                                should be nonnegative and less than 1.     */
/*                                The default value is "0" so by default,    */
/*                                the BRR calculations are based on the      */
/*                                assumption that Fay's method was not used  */
/*                                to create the replicate weights.           */
/*                                                                           */
/*  "set_confidence_level"        Specifies the confidence level.  The       */
/*                                default value is "0.95".                   */
/*                                                                           */
/*  "set_degrees_freedom"         Specifies the degrees of freedom for the t */
/*                                distribution and for the denominator of    */
/*                                the F distribution.                        */
/*                                                                           */
/*  "print"                       If "print=n" or "print=N" then macro       */
/*                                results are not printed.  The default      */
/*                                value is "y".                              */
/*                                                                           */
/*  "titles"                      Specifies the number of title lines to be  */
/*                                reserved for the user's titles.  One       */
/*                                additional title line is used by the       */
/*                                macro.  The default value is "0".          */
/*                                                                           */
/*****************************************************************************/
/*                                                                           */
/* Macro Output Data Set:  brr_pvalue_ci_out                                 */
/*                                                                           */
/* The BRR_PVALUE_CI macro produces a SAS data set "brr_pvalue_ci_out" that  */
/* includes the following variables:                                         */
/*               _name_, estimate, brr_se, pvalue_param0_probf_df_1_r,       */
/*               lowerlim_t_df_r, and upperlim_t_df_r                        */
/* where r is the value that was specified using the "set_degrees_freedom"   */
/* macro parameter.  The variable "_name_" identifies each parameter, and    */
/* the variable "estimate" includes the parameter estimates from the data    */
/* set specified using the "data_orig_estimates_1rec" macro parameter.  The  */
/* variable "brr_se" includes the BRR standard error estimates.  The         */
/* variable "pvalue_param0_probf_df_1_r" is the p-value used to test the     */
/* null hypothesis that the parameter value is zero versus the alternative   */
/* hypothesis that the parameter value is not zero.  The p-value is          */
/* calculated using the F distribution with 1 numerator degree of freedom    */
/* and r denominator degrees of freedom.  An equivalent p-value could be     */
/* obtained using the t distribution with r degrees of freedom.  The         */
/* variables "lowerlim_t_df_r" and "upperlim_t_df_r" are the lower and upper */
/* confidence limits which are calculated using the t distribution with r    */
/* degrees of freedom.                                                       */
/*                                                                           */
/*****************************************************************************/


%macro RUN_BRR_LOOP_BY_SUBGRP(subgrp=, numsubgrps=, baseda=, repda=, grp=);

  /***************************************************************************/
  /* The macro RUN_BRR_LOOP_BY_SUBGRP selects the data into separate data    */
  /* sets per subgroup category, identifies the variables to be used in      */
  /* the BRR_PVALUE_CI macro, and calls that macro. Additionally all the     */
  /* data sets output by each call are finally concatenated and saved into   */
  /* one data set.                                                           */
  /*                                                                         */
  /* The syntax for calling the macro is:                                    */
  /*   %run_brr_loop_by_subgrp(subgrp=, numsubgrps=, baseda=, repda=, grp=); */
  /* where:                                                                  */
  /*   subgroup is used to identify the name of the subgroup variable        */
  /*   numsubgrps is the number of subgroups                                 */
  /*   baseda    is the data set with replicate 0 (original wts) data        */
  /*   repda     is the data set with brr replicates data                    */
  /*   grp        is used to name the final data set, indicating the group   */
  /*                                                                         */
  /*                                                                         */
  /***************************************************************************/

  **********************************************************************;
  *** Create an identifying variable based on the number of subgroup ***;
  *** categories to make it easy to select the records.              ***;
  *** Save this on the base data set and create a data set with only ***;
  *** the subgroup information and the record_id for later use.      ***;
  **********************************************************************;

  *********************;
  *** Base data set ***;
  *********************;

  data &baseda subgrps(keep=record_id &subgrp);
    set &baseda;
      record_id = _n_;
    output &baseda subgrps;
  run;


  *********************************************************************************;
  *** Replicate data sets                                                       ***;
  *** Add the record_id associated with each subgrp value to the replicate data ***;
  *********************************************************************************;

  proc sort data=subgrps;
    by &subgrp;
  run;

  proc sort data=&repda;
    by &subgrp;
  run;

  data &repda;
    merge subgrps &repda;
    by &subgrp;
  run;

  ***********************************************************************************;
  *** Create data sets for the CI macro, each data set can have only one subgroup ***;
  ***********************************************************************************;

  %do a = 1 %to &numsubgrps;

    *********************;
    *** Base data set ***;
    *********************;

    data base(drop=record_id &subgrp);
      set &baseda;
      if &a = record_id;
    run;

    ***************************;
    *** Replicate data sets ***;
    ***************************;
    data replicate(drop=record_id &subgrp);
      set &repda;
      if &a = record_id;
    run;



    ************************************;
    *** Call the BRR_PVALUE_CI macro ***;
    ************************************;

    title5 "Output for Subgroup &a, Group &grp";

    /*****************************************************************************/
    /*** If all 16 replicates had yielded usable results, the degrees of       ***/
    /*** freedom would have been limited to 15 (i.e. the number of sampled     ***/
    /*** primary sampling units (PSUs) minus the number of strata is 30-15=15  ***/
    /*** for NHANES 2003-2004).  The degrees of freedom associated with        ***/
    /*** replication variance estimators is discussed by Korn and Graubard     ***/
    /*** (1999, Analysis of Health Surveys, pg. 34) and in the documentation   ***/
    /*** for the SAS survey procedures.                                        ***/
    /*****************************************************************************/

    %brr_pvalue_ci(data_orig_estimates_1rec = base,
                   data_brr_estimates_brecs = replicate,
                   param_estimate_names     = &desc_vars,
                   set_f_method_fay         = 0.3,
                   set_confidence_level     = 0.95,
                   set_degrees_freedom      = 14,
                   print                    = y,
                   titles                   = 5
                   );

    ***************************************************;
    *** Keep each iteration of the output data sets ***;
    ***************************************************;

    data brr_pvalue_ci_out_pct&a;
      set brr_pvalue_ci_out;
      record_id = &a;
    run;


    *********************************************************************;
    *** Delete data sets that will be recreated in the next iteration ***;
    *********************************************************************;

    proc datasets nolist lib=work;
      delete brr_pvalue_ci_out base replicate;
    quit;


    ******************************************************************;
    *** When all subgroups categories have been processed combine  ***;
    *** the data set of CIs for the "descript" data, add the       ***;
    *** subgroup values appropriately, and save the final data set ***;
    ******************************************************************;

    %if &a = &numsubgrps %then %do;

      data brr_pvalue_ci_pct_all;
        set brr_pvalue_ci_out_pct1-brr_pvalue_ci_out_pct&a;
      run;

      proc sort data = brr_pvalue_ci_pct_all;
        by record_id;
      run;

      proc sort data=subgrps;
        by record_id;
      run;

      data outlib.brr_pvalue_ci_out_&grp(drop = record_id);
        merge brr_pvalue_ci_pct_all subgrps;
        by record_id;

        rename _name_ = Tested_Variable;

        length group $8.;
        group = "&grp";
      run;

    %end;


  %end; /* of iterations for each subgroup */


%mend; /* of RUN_BRR_LOOP_BY_SUBGRP macro */



*************************************************************************;
*** Call the utility macro RUN_BRR_LOOP_BY_SUBGRP once for each group ***;
*************************************************************************;

%run_brr_loop_by_subgrp(subgrp=hei_food, numsubgrps=22, baseda=origwt_le50, repda=brr_le50, grp=le50);
%run_brr_loop_by_subgrp(subgrp=hei_food, numsubgrps=22, baseda=origwt_gt50, repda=brr_gt50, grp=gt50);
%run_brr_loop_by_subgrp(subgrp=hei_food, numsubgrps=1,  baseda=origwt,      repda=brrwt,    grp=none);



/*******************************************************/
/*** Concatenate the output data sets for all groups ***/
/*******************************************************/

proc sort data=outlib.brr_pvalue_ci_out_le50 out=le50;
  by hei_food group;
run;

proc sort data=outlib.brr_pvalue_ci_out_gt50 out=gt50;
  by hei_food group;
run;

proc sort data=outlib.brr_pvalue_ci_out_none out=none;
  by hei_food group;
run;

data outlib.brr_pvalue_ci_out_all;
  set le50 gt50 none;
  by hei_food group;
run;

proc datasets nolist lib=outlib;
  delete brr_pvalue_ci_out_le50
         brr_pvalue_ci_out_gt50
         brr_pvalue_ci_out_none;
quit;
