/*****************************************************************************/
/*** The following files are required for this example:                    ***/
/***   brr_pvalue_ci_macro_v1.1.sas                                        ***/
/***   _percentiles_all_dc0-_percentiles_all_dc32                          ***/
/***                                                                       ***/
/*** This program fits a multivariate measurement error model and          ***/
/*** calculates mean usual intake using NHANES 2003-2004 data.  It also    ***/
/*** calculates percentiles and cutpoint probablities for usual intake,    ***/
/*** conditional on quintiles of sodium intake.  For this analysis, the    ***/
/*** example data include males, ages 9+ from NHANES 2003-2004, and the    ***/
/*** 24-hour dietary recall is the main dietary instrument.  This analysis ***/
/*** allows for participants that never consume alcohol.  The dietary      ***/
/*** components are alcohol, meat, whole grains, and sodium.               ***/
/***                                                                       ***/
/*** This SAS program illustrates the use of the BRR_PVALUE_CI macro to    ***/
/*** perform balanced repeated replication (BRR) variance estimation.  The ***/
/*** input data set includes mean usual intakes, percentiles, and cutpoint ***/
/*** probabilities calculated using the original weights (i.e. weights for ***/
/*** replicate 0) and using the 32 BRR replicates (i.e. 1, 2, ..., 32).    ***/
/*****************************************************************************/



title1 "Fit Multivariate Measurement Error Model Using MCMC with 24-Hour Recall as Main Instrument, Allowing for Never Consumers";
title2 "Calculate Usual Intake Conditional Distributions and Cutpoint Probabilities";



/***********************************************************************/
/*** The path must be modified in the following lines to specify the ***/
/*** location of the SAS macro and the SAS data file.                ***/
/***********************************************************************/

%let home = /prj/dcp/statprog/meas.err/develop.public.resources.stat.meth;

*** Include the required macros ***;
%include "&home/include.files.macros/brr_pvalue_ci_macro_v1.1.sas";

*** Output data library ***;
libname outlib "&home/multivar_surveillance_example2_mcmc_nc_main24hr/outlib";



/******************************************************************************/
/*** Import the data file of intermediate results needed for this example   ***/
/*** and create a data set with one record including distribution estimates ***/
/*** from the original data and a data set with records 1, 2, ..., 32       ***/
/*** including distribution estimates from replicate analyses performed for ***/
/*** BRR variance estimation.                                               ***/
/******************************************************************************/

data origwt_pctls(keep = replicate dc mean min max pctile1 pctile5 pctile10 pctile25 pctile50 pctile75 pctile90
                         pctile95 pctile99 prob1 prob2 prob3 sodium_quintile);
  set outlib._percentiles_all_dc0;
run;


data brr_pctls(keep = replicate dc mean min max pctile1 pctile5 pctile10 pctile25 pctile50 pctile75 pctile90
                      pctile95 pctile99 prob1 prob2 prob3 sodium_quintile);
  set outlib._percentiles_all_dc1-outlib._percentiles_all_dc32;
run;



***********************************************************************;
*** To calculate the confidence intervals for the percentiles the   ***;
*** BRR_PVALUE_CI macro expects only one observation per replicate. ***;
*** In this example we have five groups of data for each quintile   ***;
*** of sodium intake.  In addition we have a number of dietary      ***;
*** components.  To handle this data structure in the macro we      ***;
*** the dietary components as subgroups and run the macro           ***;
*** separately for each group described above.  To accomplish this  ***;
*** a utility macro "RUN_BRR_LOOP_BY_SUBGRP" will be used.          ***;
***********************************************************************;

************************************************;
*** Create the appropriate grouped data sets ***;
************************************************;

data origwt_sq1 origwt_sq2 origwt_sq3 origwt_sq4 origwt_sq5;
  set origwt_pctls;
  if      sodium_quintile=1 then output origwt_sq1;
  else if sodium_quintile=2 then output origwt_sq2;
  else if sodium_quintile=3 then output origwt_sq3;
  else if sodium_quintile=4 then output origwt_sq4;
  else if sodium_quintile=5 then output origwt_sq5;
run;

data brr_sq1 brr_sq2 brr_sq3 brr_sq4 brr_sq5;
  set brr_pctls;
  if      sodium_quintile=1 then output brr_sq1;
  else if sodium_quintile=2 then output brr_sq2;
  else if sodium_quintile=3 then output brr_sq3;
  else if sodium_quintile=4 then output brr_sq4;
  else if sodium_quintile=5 then output brr_sq5;
run;


**************************************************;
*** Use a macro variable to list the variables ***;
*** to be processed in the macro               ***;
**************************************************;

%let desc_vars = mean min max pctile5 pctile10 pctile25 pctile50 pctile75 pctile90 pctile95 prob1 prob2 prob3;



******************************************************************************;
***     Create the utility macro RUN_BRR_LOOP_BY_SUBGRP to prepare the     ***;
***               the data and call the macro BRR_PVALUE_CI                ***;
******************************************************************************;

/*****************************************************************************/
/*****************************************************************************/
/*                                                                           */
/* Description of the BRR_PVALUE_CI macro                                    */
/*                                                                           */
/*****************************************************************************/
/*                                                                           */
/*                                                                           */
/* The BRR_PVALUE_CI macro performs balanced repeated replication (BRR)      */
/* variance estimation, calculates a p-value for each user specified         */
/* parameter to test the null hypothesis that the parameter value is zero,   */
/* and calculates a confidence interval for each user specified parameter.   */
/* The macro produces an output data set including the parameter estimates,  */
/* the BRR standard error estimates, the p-value used to test the null       */
/* hypothesis that the parameter value is zero versus the alternative        */
/* hypothesis that the parameter value is not zero, and the lower and upper  */
/* confidence limits.                                                        */
/*                                                                           */
/* The macro input includes a data set with one record including parameter   */
/* estimates from the original data and a data set with records 1, 2, ..., B */
/* including parameter estimates from replicate analyses performed for BRR   */
/* variance estimation.  Optionally, Fay's BRR method can be specified.      */
/*                                                                           */
/* Reference:                                                                */
/*                                                                           */
/*   Korn EL, Graubard BI. Analysis of Health Surveys, John Wiley & Sons,    */
/*   Inc., New York, 1999.                                                   */
/*                                                                           */
/*                                                                           */
/* The syntax for calling the BRR_PVALUE_CI macro is:                        */
/*                                                                           */
/* %brr_pvalue_ci(data_orig_estimates_1rec =,                                */
/*                data_brr_estimates_brecs =,                                */
/*                param_estimate_names     =,                                */
/*                set_f_method_fay         =,                                */
/*                set_confidence_level     =,                                */
/*                set_degrees_freedom      =,                                */
/*                print                    =,                                */
/*                titles                   =                                 */
/*                );                                                         */
/*                                                                           */
/*  where                                                                    */
/*                                                                           */
/*  "data_orig_estimates_1rec"    Specifies an input data set that includes  */
/*                                one record with parameter estimates from   */
/*                                the original data.                         */
/*                                                                           */
/*  "data_brr_estimates_brecs"    Specifies an input data set that includes  */
/*                                records 1, 2, ..., B with parameter        */
/*                                estimates from replicate analyses          */
/*                                performed for balanced repeated            */
/*                                replication (BRR) variance estimation.     */
/*                                For example, record 1 includes the         */
/*                                parameter estimates obtained from          */
/*                                replicate 1, and record B includes the     */
/*                                parameter estimates obtained from          */
/*                                replicate B, i.e. the final replicate.     */
/*                                                                           */
/*  "param_estimate_names"        Specifies a list of parameter estimates    */
/*                                that are included as variables in the      */
/*                                input data sets specified for the          */
/*                                "data_orig_estimates_1rec" and             */
/*                                "data_brr_estimates_brecs" macro           */
/*                                parameters.                                */
/*                                                                           */
/*  "set_f_method_fay"            Specifies a value f that will be used to   */
/*                                define an adjustment factor 1/[(1-f)**2]   */
/*                                used to perform an appropriate adjustment  */
/*                                if Fay's BRR method was used to create the */
/*                                replicate weights.  The specified value    */
/*                                should be nonnegative and less than 1.     */
/*                                The default value is "0" so by default,    */
/*                                the BRR calculations are based on the      */
/*                                assumption that Fay's method was not used  */
/*                                to create the replicate weights.           */
/*                                                                           */
/*  "set_confidence_level"        Specifies the confidence level.  The       */
/*                                default value is "0.95".                   */
/*                                                                           */
/*  "set_degrees_freedom"         Specifies the degrees of freedom for the t */
/*                                distribution and for the denominator of    */
/*                                the F distribution.                        */
/*                                                                           */
/*  "print"                       If "print=n" or "print=N" then macro       */
/*                                results are not printed.  The default      */
/*                                value is "y".                              */
/*                                                                           */
/*  "titles"                      Specifies the number of title lines to be  */
/*                                reserved for the user's titles.  One       */
/*                                additional title line is used by the       */
/*                                macro.  The default value is "0".          */
/*                                                                           */
/*****************************************************************************/
/*                                                                           */
/* Macro Output Data Set:  brr_pvalue_ci_out                                 */
/*                                                                           */
/* The BRR_PVALUE_CI macro produces a SAS data set "brr_pvalue_ci_out" that  */
/* includes the following variables:                                         */
/*               _name_, estimate, brr_se, pvalue_param0_probf_df_1_r,       */
/*               lowerlim_t_df_r, and upperlim_t_df_r                        */
/* where r is the value that was specified using the "set_degrees_freedom"   */
/* macro parameter.  The variable "_name_" identifies each parameter, and    */
/* the variable "estimate" includes the parameter estimates from the data    */
/* set specified using the "data_orig_estimates_1rec" macro parameter.  The  */
/* variable "brr_se" includes the BRR standard error estimates.  The         */
/* variable "pvalue_param0_probf_df_1_r" is the p-value used to test the     */
/* null hypothesis that the parameter value is zero versus the alternative   */
/* hypothesis that the parameter value is not zero.  The p-value is          */
/* calculated using the F distribution with 1 numerator degree of freedom    */
/* and r denominator degrees of freedom.  An equivalent p-value could be     */
/* obtained using the t distribution with r degrees of freedom.  The         */
/* variables "lowerlim_t_df_r" and "upperlim_t_df_r" are the lower and upper */
/* confidence limits which are calculated using the t distribution with r    */
/* degrees of freedom.                                                       */
/*****************************************************************************/


%macro brr_loop_by_subgrp(subgrp=, numsubgrps=, baseda=, repda=, grp=);

  /***************************************************************************/
  /* The macro RUN_BRR_LOOP_BY_SUBGRP selects the data into separate data    */
  /* sets per subgroup category, identifies the variables to be used in      */
  /* the BRR_PVALUE_CI macro, and calls that macro. Additionally all the     */
  /* data sets output by each call are finally concatenated and saved into   */
  /* one data set.                                                           */
  /*                                                                         */
  /* The syntax for calling the macro is:                                    */
  /*   %run_brr_loop_by_subgrp(subgrp=, numsubgrps=, baseda=, repda=, grp=); */
  /* where:                                                                  */
  /*   subgroup is used to identify the name of the subgroup variable        */
  /*   numsubgrps is the number of subgroups                                 */
  /*   baseda    is the data set with replicate 0 (original wts) data        */
  /*   repda     is the data set with brr replicates data                    */
  /*   grp        is used to name the final data set, indicating the group   */
  /***************************************************************************/

  **********************************************************************;
  *** Create an identifying variable based on the number of subgroup ***;
  *** categories to make it easy to select the records.              ***;
  *** Save this on the base data set and create a data set with only ***;
  *** the subgroup information and the record_id for later use.      ***;
  **********************************************************************;

  *********************;
  *** Base data set ***;
  *********************;

  data &baseda subgrps(keep=record_id &subgrp);
    set &baseda;
      record_id = _n_;
    output &baseda subgrps;
  run;


  *********************************************************************************;
  *** Replicate data sets                                                       ***;
  *** Add the record_id associated with each subgrp value to the replicate data ***;
  *********************************************************************************;

  proc sort data=subgrps;
    by &subgrp;
  run;

  proc sort data=&repda;
    by &subgrp;
  run;

  data &repda;
    merge subgrps &repda;
    by &subgrp;
  run;

  ***********************************************************************************;
  *** Create data sets for the CI macro, each data set can have only one subgroup ***;
  ***********************************************************************************;

  %do a = 1 %to &numsubgrps;

    *********************;
    *** Base data set ***;
    *********************;

    data base(drop=record_id &subgrp);
      set &baseda;
      if &a = record_id;
    run;

    ***************************;
    *** Replicate data sets ***;
    ***************************;
    data replicate(drop=record_id &subgrp);
      set &repda;
      if &a = record_id;
    run;



    title3 "Summary of Parameter Estimates and BRR Variance Estimation for Subgroup &a, &grp";

    ******************************************************************************;
    ***       Call the BRR_PVALUE_CI macro to calculate standard errors,       ***;
    ***                   p-values, and confidence intervals                   ***;
    ******************************************************************************;
    *** Note that the degrees of freedom is specified as 30 (i.e. the number   ***;
    *** of sampled primary sampling units (PSUs) minus the number of strata is ***;
    *** 60-30=30 for NHANES 2001-2004). The degrees of freedom associated with ***;
    *** replication variance estimators is discussed by Korn and Graubard      ***;
    *** (1999, Analysis of Health Surveys, pg. 34) and in the documentation    ***;
    *** for the SAS survey procedures.                                         ***;
    ******************************************************************************;

    %brr_pvalue_ci(data_orig_estimates_1rec = base,
                   data_brr_estimates_brecs = replicate,
                   param_estimate_names     = &desc_vars,
                   set_f_method_fay         = 0.3,
                   set_confidence_level     = 0.95,
                   set_degrees_freedom      = 30,
                   print                    = y,
                   titles                   = 3
                   );

    ***************************************************;
    *** Keep each iteration of the output data sets ***;
    ***************************************************;

    data brr_pvalue_ci_out_pct&a;
      set brr_pvalue_ci_out;
      record_id = &a;
    run;


    *********************************************************************;
    *** Delete data sets that will be recreated in the next iteration ***;
    *********************************************************************;

    proc datasets nolist lib=work;
      delete brr_pvalue_ci_out base replicate;
    quit;


    ******************************************************************;
    *** When all subgroups categories have been processed combine  ***;
    *** the data set of CIs for the "descript" data, add the       ***;
    *** subgroup values appropriately, and save the final data set ***;
    ******************************************************************;

    %if &a = &numsubgrps %then %do;

      data brr_pvalue_ci_pct_all;
        set brr_pvalue_ci_out_pct1-brr_pvalue_ci_out_pct&a;
      run;

      proc sort data = brr_pvalue_ci_pct_all;
        by record_id;
      run;

      proc sort data=subgrps;
        by record_id;
      run;

      data outlib.brr_pvalue_ci_out_&grp(drop = record_id);
        merge brr_pvalue_ci_pct_all subgrps;
        by record_id;

        rename _name_ = Tested_Variable;

        length group $8.;
        group = "&grp";
      run;

    %end;


  %end; /* of iterations for each subgroup */


%mend; /* of RUN_BRR_LOOP_BY_SUBGRP macro */



*********************************************************************;
*** Call the utility macro BRR_LOOP_BY_SUBGRP once for each group ***;
*********************************************************************;

%brr_loop_by_subgrp(subgrp=dc, numsubgrps=3, baseda=origwt_sq1, repda=brr_sq1, grp=quintile1);
%brr_loop_by_subgrp(subgrp=dc, numsubgrps=3, baseda=origwt_sq2, repda=brr_sq2, grp=quintile2);
%brr_loop_by_subgrp(subgrp=dc, numsubgrps=3, baseda=origwt_sq3, repda=brr_sq3, grp=quintile3);
%brr_loop_by_subgrp(subgrp=dc, numsubgrps=3, baseda=origwt_sq4, repda=brr_sq4, grp=quintile4);
%brr_loop_by_subgrp(subgrp=dc, numsubgrps=3, baseda=origwt_sq5, repda=brr_sq5, grp=quintile5);



/*******************************************************/
/*** Concatenate the output data sets for all groups ***/
/*******************************************************/

proc sort data=outlib.brr_pvalue_ci_out_quintile1 out=sq1;
  by dc group;
run;

proc sort data=outlib.brr_pvalue_ci_out_quintile2 out=sq2;
  by dc group;
run;

proc sort data=outlib.brr_pvalue_ci_out_quintile3 out=sq3;
  by dc group;
run;

proc sort data=outlib.brr_pvalue_ci_out_quintile4 out=sq4;
  by dc group;
run;

proc sort data=outlib.brr_pvalue_ci_out_quintile5 out=sq5;
  by dc group;
run;

data outlib.brr_pvalue_ci_out_all;
  set sq1 sq2 sq3 sq4 sq5;
  by dc group;
run;

proc datasets nolist lib=outlib;
  delete brr_pvalue_ci_out_sq1
         brr_pvalue_ci_out_sq2
         brr_pvalue_ci_out_sq3
         brr_pvalue_ci_out_sq4
         brr_pvalue_ci_out_sq5;
quit;
