/*
Fix NHANES Strata for BRR

Description:

	Corrects specific NHANES strata that do not have exactly 2
	primary sampling units (PSUs) so that balanced repeated replication (BRR)
	weights can be generated for the data.
	
Parameters:

	- input_data: A SAS data set of NHANES data.
								It must contain the following variables:
								- SDMVSTRA: Strata
								- SDMVPSU: Primary Sampling Unit (PSU)
	- outlib: The SAS library to store output datasets. (default = WORK)
	- outname: The name of the output dataset.
	
Output:

	The following SAS data set is created in outlib:
		
		outname: A SAS data set with the same records as input_data with SDMVPSU modified so that selected strata have exactly 2 PSUs.
						 Sorted by SDMVSTRA and SDMVPSU.

Details:

	Some NHANES cycles have strata that contain 3 primary sampling units
	(PSUs). Since balanced repeated replication (BRR) requires exactly 2 PSUs
	for all strata, PSUs in these strata must be reconfigured for BRR to be
	used. This function corrects specific NHANES strata that have 3 PSUs. Two
	of the three PSUs in each of the strata to be corrected were randomly
	selected to be combined. the If the function detects other strata that it
	was not able to correct, a warning is given to the user.
	
	This function currently corrects the following strata/PSU combinations:
	- SDMVSTRA = 1, Combine SDMVPSU 1 and 2
	- SDMVSTRA = 86, Combine SDMVPSU 2 and 3
	- SMDVSTRA = 90, Combine SDMVPSU 2 and 3
	- SMDVSTRA = 91, Combine SDMVPSU 1 and 3
	- SDMVSTRA = 92, Combine SDMVPSU 2 and 3
	- SDMVSTRA = 156, Combine SDMVPSU 1 and 3
*/
%macro fix_nhanes_strata(input_data=,
									 			 outlib=WORK,
									 			 outname=);
	
	%local fix1 fix86 fix90 fix91 fix92 fix156;
									 			 
	**Fix known problematic NHANES strata that do not have exactly 2 PSUs;
	data strata_psu_records (keep = SDMVSTRA SDMVPSU);
		set &input_data.;
	run;
	
	proc sort data=strata_psu_records; by SDMVSTRA SDMVPSU; run;
	
	data strata_psu_levels;
		set strata_psu_records;
		by SDMVSTRA SDMVPSU;
		if first.SDMVPSU;
	run;
	
	data _NULL_;
		set strata_psu_levels end=last_obs;
		
		retain fix1 fix86 fix90 fix91 fix92 fix156;
		
		if _N_ = 1 then do;
		
			fix1 = 0;
			fix86 = 0;
			fix90 = 0;
			fix91 = 0;
			fix92 = 0;
			fix156 = 0;
		end;
		
		if SDMVSTRA = 1 and SDMVPSU = 3 then fix1 = 1;
		if SDMVSTRA = 86 and SDMVPSU = 3 then fix86 = 1;
		if SDMVSTRA = 90 and SDMVPSU = 3 then fix90 = 1;
		if SDMVSTRA = 91 and SDMVPSU = 3 then fix91 = 1;
		if SDMVSTRA = 92 and SDMVPSU = 3 then fix92 = 1;
		if SDMVSTRA = 156 and SDMVPSU = 3 then fix156 = 1;
		
		if last_obs = 1 then do;
			
			call symputx("fix1", fix1);
			call symputx("fix86", fix86);
			call symputx("fix90", fix90);
			call symputx("fix91", fix91);
			call symputx("fix92", fix92);
			call symputx("fix156", fix156);
		end;
	run;
	
	data &outlib..&outname.;
		set &input_data.;
		
		%if &fix1. = 1 %then %do;
			if SDMVSTRA = 1 and SDMVPSU = 2 then SDMVPSU = 1;
			else if SDMVSTRA = 1 and SDMVPSU = 3 then SDMVPSU = 2;
		%end;
		
		%if &fix86. = 1 %then %do;
			if SDMVSTRA = 86 and SDMVPSU = 3 then SDMVPSU = 2;
		%end;
		
		%if &fix90. = 1 %then %do;
			if SDMVSTRA = 90 and SMDVPSU = 3 then SDMVPSU = 2;
		%end;
		
		%if &fix91. = 1 %then %do;
			if SDMVSTRA = 91 and SDMVPSU = 3 then SDMVPSU = 1;
		%end;
		
		%if &fix92. = 1 %then %do;
			if SDMVSTRA = 92 and SDMVPSU = 3 then SDMVPSU = 2;
		%end;

		%if &fix156. = 1 %then %do;
			if SDMVSTRA = 156 and SDMVPSU = 3 then SDMVPSU = 1;
		%end;
	run;
	
	**Check if any more strata do not have exactly 2 PSUs;
	proc sort data=&outlib..&outname; by SDMVSTRA SDMVPSU; run;
	
	data _psu_counts (keep = SDMVSTRA psu_count);
		set &outlib..&outname.;
		by SDMVSTRA SDMVPSU;
		
		retain psu_count;
		
		if first.SDMVSTRA then psu_count = 0;
		
		if first.SDMVPSU then psu_count = psu_count + 1;
		
		if last.SDMVSTRA then output;
	run;
	
	data _NULL_;
		set _psu_counts end=last_obs;
		
		length not_fixed $2000.;
		
		retain not_fixed;
		
		if _N_ = 1 then not_fixed = "";
		
		if psu_count ^= 2 then not_fixed = catx(", ", not_fixed, SDMVSTRA);
		
		if last_obs = 1 and not_fixed ^= "" then do;
		
			warning_text = cat("WARNING: The following strata do not have exactly 2 PSUs but could not be fixed: ", not_fixed);
			put warning_text;
		end;
	run;
%mend fix_nhanes_strata;