	(*Mathematica:: Version 8.0      *)
(*:Context:: "rufit`"                 *)
(*:Title::  data sets for relative utility methods *)
(*:Summary::                       *)
(*:References::                    *)
(*Date   2016 Stuart G. Baker *)

   	      
BeginPackage["rufitdata`"]  

GenDataCS::usage="GenDataCS[show_:True]"
GenDataSBP::usage="GenDataSBP[show_:True]"
GenDataCRP::usage="GenDataCRP[show_:True]"
GenDataBD::usage="GenDataBD[show_:True]"
GenDataCRPPH::usage="GenDataCRPPH[show_:True]"
GenDataHDL::usage="GenDataHDL[show_:True]"
GenDataGENE::usage="GenDataGENE[show_:True]"
GenDataSNP1::usage="GenDataSNP1"  
GenDataSNP2::usage="GenDataSNP2"  
GenDataSNP3::usage="GenDataSNP3"  
GenDataSim1::usage="GenDataSim"
GenDataSim2::usage="GenDataSim2"

Clear[GenDataSBP,GenDataCRP, GenDataBD,GenDataCRPPH, GenDataHDL,
GenDataGENE,GenDataCS,GenDataSIM,GenDataSNP1,GenDataSNP2,GenDataSNP3,GenDataSim1,GenDataSim2]
					
Begin["Private`"]	  



GenDataW[show_:True]:=
Module[{datasetname,model1name,model2name,xmat,ymat,data},
namex="waist size risk factor for diabetes:  Mhlenbruch K1, et al  Eur J Epidemiol 28:25-33, 2013 ";
xmat={{12,7,9,2,0},{17,90,92,41,8},{2,57,170,142,49},
         {0,3,51,67,30}, {0,0,0,0,0}};
ymat={{4512, 515,94,18,3},
          {4978,3984,1373,201,32},
          {814,2695,2598,854,120},
          {16,241,689,489,92},
          {0,0,0,0,0}};
riskscore={.88/2, (.88+2.37)/2, (2.37+6.30)/2,(6.3+16.21)/2,16.21}/100//N;
riskscorename={"1","2","3","4","5"};
model1name="Model 1";
model2name="Model 2";
markername="waist size";
datasetname="W";
data={xmat,ymat,riskscore,riskscorename,model1name,model2name,markername,datasetname,"table"};
If[show,Print[StringJoin["data",datasetname],": ",namex]];
Return[data]]





GenDataSBP[show_:True]:=
Module[{datasetname,model1name,model2name,xmat,ymat,data},
namex="systolic blood pressure: Cook et al,  Annals of Internal Medicine 2009";
rmat={{1.3,6.8,0,0},{4.4,8.4,14.6,17.5},{0,4.3,14.3,34.2},{0,0,25.0,29.4}}/100//N;
nmat={{20372,696,23,0},{635,1441,307,7},{4,204,519,90},{0,2,54,204}};
xmat=rmat nmat;
ymat=(1-rmat) nmat;
riskscore={2.5, 7.5, 15, 20}/100//N;
riskscorename={"0%-5%","5%-10%","10%-20%",">20%"};
model1name="Model 1";
model2name="Model 2";
markername="SBP";
datasetname="SBP";
data={xmat,ymat,riskscore,riskscorename,model1name,model2name,markername,datasetname,"table"};
If[show,Print[StringJoin["data",datasetname],": ",namex]];
Return[data]]



GenDataCRP[show_:True]:=
Module[{datasetname,model1name,model2name,xmat,ymat,data},
namex="C-reactive protein: Cook et al, Annals of Internal Medicine 2006 ";
rmat= {{1.6,5.8,0,0},{2.4,7.8,15.2,0},{0,6.8,11.5,19.9},{0,0,18.8,27.1}}/100//N;
nmat={{23174,488,0,0},{267,1773,213,0},{0,110,653,40},{0,0,30,179}};
xmat=rmat nmat;
ymat=(1-rmat) nmat;
riskscorename={"0%-5%","5%-10%","10%-20%",">20%"};
riskscore={2.5, 7.5, 15, 20}/100//N;
model1name="Model 1";
model2name="Model 2";
markername="CRP";
datasetname="CRP";
data={xmat,ymat,riskscore,riskscorename,model1name,model2name,markername,datasetname,"table"};
If[show,Print[StringJoin["data",datasetname],": ",namex]];
Return[data]]


GenDataBD[show_:True]:=
Module[{datasetname,model1name,model2name,xmat,ymat,data},
namex="breast density: Janes et al, Annals of Internal Medicine  2008";
xmat= {{1161, 415,0,0},
    {526,1328,754,32},
   {74,609,1419,621},  
   {0,38,340,1467}};
nmat={{176831,38500,71,0.01},
       {64297,99456,37149,1025},
       {8741,45478,71309,23267},
        {90,2672,15891,44452}};
rmat=xmat/nmat;
ymat=(1-rmat) nmat;
riskscore={2.5, 7.5, 15, 20}/100//N;
riskscorename={"0%-5%","5%-10%","10%-20%",">20%"};
model1name="Model 1";
model2name="Model 2";
markername="breast density";
datasetname="BD";
data={xmat,ymat,riskscore,riskscorename,model1name,model2name,markername,datasetname,"table"};
If[show,Print[StringJoin["data",datasetname],": ",namex]];
Return[data]]



GenDataHDL[show_:True]:=
Module[{datasetname,model1name,model2name,xmat,ymat,data},
namex="high density lipoprotein:  Cook et al, Circulation 2007 ";
markername="HDL";
datasetname="HDL";
model1name="Model 1";
model2name="Model 2";
nmat={{22655,696,6,0},{593,1712,291,0},{3,214,512,76},{0,0,41,102}};
rmat={{1.5,5.9,0,0},{3.7,7.6,14.7,0},{0,7.5,10.7,23.3},{0,0,28.7,71.3}}/100//N;
xmat=Round[rmat nmat];
ymat=Round[(1-rmat) nmat];
riskscore={2.5, 7.5, 15, 20}/100//N;
riskscorename={"0%-5%","5%-10%","10%-20%",">20%"};
data={xmat,ymat,riskscore,riskscorename,model1name,model2name,markername,datasetname,"table"};
If[show,Print[StringJoin["data",datasetname],": ",namex]];
Return[data]]



GenDataGENE[show_:True]:=
Module[{datasetname,model1name,model2name,xmat,ymat,data},
namex="SNP ATP III data: Paynter et al, Annals of Internal Med 2009  ";
nmat={{18609,205,0,0},{181,1933,83,0},{0,80,697,31},{0,0,26,284}};
rmat={{1.5,8.0,0,0},{4.9,8.0,19.3,0},{0,10.9,12.9,23.6},{0,0,15.3,31.0}}/100//N;
xmat=Round[rmat nmat];
ymat=Round[(1-rmat) nmat];
riskscore={2.5, 7.5, 15, 20}/100//N;
riskscorename={"0%-5%","5%-10%","10%-20%",">20%"};
model1name="Model 1";
model2name="Model 2";
markername="SNP";
datasetname="GENE";
data={xmat,ymat,riskscore,riskscorename,model1name,model2name,markername,datasetname,"table"};
If[show,Print[StringJoin["data",datasetname],": ",namex]];
Return[data]]



GenDataCRPPH[show_:True]:=
Module[{datasetname,model1name,model2name,xmat,ymat,data},
namex="C-reactive protein + parental history (no hypertension): Ridker et al, Circulation 2008 ";
nmat={{1592,204,0,0},{380,2642,306,0},{0,422,2850,284},{0,0,308,176}};
rmat={{2.5,7,0,0},{3,6.6,16,2},{0,5.8,15.1,20.6},{0,0,20,38.6}}/100//N;
xmat=Round[rmat nmat];
ymat=Round[(1-rmat) nmat];
riskscore={2.5, 7.5, 15, 20}/100//N;
riskscorename={"0%-5%","5%-10%","10%-20%",">20%"};
model1name="Model 1";
model2name="Model 2";
markername="CRP + parent history";
datasetname="CRPPH";
data={xmat,ymat,riskscore,riskscorename,model1name,model2name,markername,datasetname,"table"};
If[show,Print[StringJoin["data",datasetname],": ",namex]];
Return[data]]




GenDataCS[show_:True]:=
Module[{datasetname,model1name,model2name,xmat,ymat,data,mat,riskscore,riskscorename},
namex="early C-section: Schuit et al,   BJOG Int J Obst Gyn 2012"; 
xmat={{78,17,0,0,0,0,0,0,0},{2,31,18,1,0,0,0,0,0},{0,49,82,75,19,0,0,0,0},
{0,9,105,188,162,32,1,0,0},{0,0,18,94,166,137,37,0,0},{0,0,2,14,37,70,56,17,0},
{0,0,0,0,1,18,21,16,1},{0,0,0,0,0,0,0,9,6},{0,0,0,0,0,0,0,0,1}};
ymat={{1295,110,0,0,0,0,0,0,0},{59,204,38,3,0,0,0,0,0},{0,222,276,163,20,0,0,0,0},
{0,41,262,387,203,32,2,0,0},{0,1,88,145,180,130,23,0,0},{0,0,2,21,46,49,27,6,0},
{0,0,0,2,3,14,14,6,0},{0,0,0,0,0,0,2,0,0},{0,0,0,0,0,0,0,1,0}};
riskscore=Mean[#]&  /@ {{0,10},{10,20},{20,30},{30,40},{40,50},{50,60},{60,70}, {70,80},{80,90}}/100//N;
riskscorename={"0-10%","10-20%","20-30%","30-40%","40-50%","50-60%","60-70%","70-80%","80-90%"};
model1name="Model 1";
model2name="Model 2";
markername="intrapartum";
datasetname="CS";
data={xmat,ymat,riskscore,riskscorename,model1name,model2name,markername,datasetname,"table"};
If[show,Print[StringJoin["data",datasetname],": ",namex]];
Return[data]]




GenDataSNP1[show_:True]:=
Module[{datasetname,model1name,model2name,xmat,ymat,data},
namex="SNP breast cancer Mealiffe et al, JNCI 2010";
xmat={{455,133,41},{155,172,157},{19,64,468}};
ymat={{605,108,23},{196,170,106},{24,65,339}};
riskscore={1.5, 1.75, 2}/100//N;
riskscorename={"<1.5","1.5%-2%",">2%"};
model1name="Model 1";
model2name="Model 2";
markername="SNPs";
datasetname="SNP1";
data={xmat,ymat,riskscore,riskscorename,model1name,model2name,markername,datasetname,"table"};
If[show,Print[StringJoin["data",datasetname],": ",namex]];
Return[data]]

GenDataSNP2[show_:True]:=
Module[{datasetname,model1name,model2name,xmat,ymat,data},
namex="SNP breast cancer Dite et al, Breast Cancer Res Treat 2013";
xmat={{769,66,17},{21,29,20},{2,9,29}};
ymat={{411,18,5},{6,10,5},{0,1,7}};
riskscore={1.5, 1.75, 2}/100//N;
riskscorename={"<1.5","1.5%-2%",">2%"};
model1name="Model 1";
model2name="Model 2";
markername="SNPs";
datasetname="SNP2";
data={xmat,ymat,riskscore,riskscorename,model1name,model2name,markername,datasetname,"table"};
If[show,Print[StringJoin["data",datasetname],": ",namex]];
Return[data]]



GenDataSNP3[show_:True]:=
Module[{datasetname,model1name,model2name,xmat,ymat,data},
namex="SNP breast cancer Lee et al, Breast Cancer Res 2014";
(*xmat is cases*)
xmat={{109,46, 0,0,0},{36,123,39,0,0},{0,13,19,12,0},{0,0,4,2,3},{0,0,0,1,4}};
ymat={{511, 84,0,0,0},{90,356,33,0,0},{0,53,50,12,0}, {0,0,12,3,1},{0,0,0,2,5}};
riskscore={1, 1.25, 1.75, 2.25, 2}/100//N;
riskscorename={"<1",  "1%-1.5%", "1.5%-2%", "2%-2.5%", ">2.5%"};
model1name="Model 1";
model2name="Model 2";
markername="SNPs";
datasetname="SNP2";
data={xmat,ymat,riskscore,riskscorename,model1name,model2name,markername,datasetname,"table"};
If[show,Print[StringJoin["data",datasetname],": ",namex]];
Return[data]]




GenDataSim1[n_,show_:True]:=
Module[{x1,x2,y, xmat1,model1,a0,a1,xmat2,model2,b0,b1,b2,x1TEST,x2TEST,yTEST, part1,r1,part2,r2,
                 model1name,model2name, data},
SeedRandom[18];
 name="simulated1";
 onevec=Table[1,n];
 p=.2;
 Print["Sim sample size ",n];
 Print["Sim prevalence= ",p];
 {x1,x2,y}=GenCore[p,n];
       xmat1=Transpose@{onevec,x1};
       model1 =LogitModelFit[{xmat1,y}];
	{a0,a1}=model1["BestFitParameters"];
	Print["Sim1 Model 1 parameters ", Round[{a0,a1} 1000]/1000//N ];  
      xmat2=Transpose@{onevec,x1,x2};
         model2 =LogitModelFit[{xmat2,y}];
  	{b0,b1,b2}=model2["BestFitParameters"];
     	Print["Sim1 Model 2 parameters ", Round[{b0,b1,b2} 1000]/1000//N]; 
 {x1TEST,x2TEST,yTEST}=GenCore[p,n]; 
   part1= a0+a1 x1TEST ;
   r1=Exp[part1]/(1+Exp[part1]);
   part2= b0+ b1 x1TEST +  b2 x2TEST;
   r2=Exp[part2]/(1+Exp[part2]);
    model1name="Model 1";
  model2name="Model 2";
  markername="marker2";
  datasetname=name;
 data={yTEST,r1,r2,model1name,model2name,markername,datasetname,"list"};
 If[show,Print[StringJoin["data",datasetname],": ",name]];
Return[data]]


GenDataSim2[n_,show_:True]:=
Module[{x1,x2,y, xmat1,model1,a0,a1,xmat2,model2,b0,b1,b2,x1TEST,x2TEST,yTEST, part1,r1,part2,r2,
                 model1name,model2name, data},
SeedRandom[18];
 name="simulated2";
 onevec=Table[1,n];
 p=.2;
 Print["Sim sample size ",n];
 Print["Sim prevalence= ",p];
 {x1,x2,y}=GenCore[p,n];
 (*USE PREDICTOR 2 for BASELIN*)
       xmat1=Transpose@{onevec,x2};
       model1 =LogitModelFit[{xmat1,y}];
	{a0,a1}=model1["BestFitParameters"];
	Print["Sim2 Model 1 parameters ", Round[{a0,a1} 1000]/1000//N ];  
      xmat2=Transpose@{onevec,x1,x2};
         model2 =LogitModelFit[{xmat2,y}];
  	{b0,b1,b2}=model2["BestFitParameters"];
     	Print["Sim2 Model 2 parameters ", Round[{b0,b1,b2} 1000]/1000//N]; 
 {x1TEST,x2TEST,yTEST}=GenCore[p,n]; 
   part1= a0+a1 x1TEST ;
   r1=Exp[part1]/(1+Exp[part1]);
   part2= b0+ b1 x1TEST +  b2 x2TEST;
   r2=Exp[part2]/(1+Exp[part2]);
    model1name="Model 1";
  model2name="Model 2";
  markername="marker2";
  datasetname=name;
 data={yTEST,r1,r2,model1name,model2name,markername,datasetname,"list"};
 If[show,Print[StringJoin["data",datasetname],": ",name]];
Return[data]]




GenCore[p_,max_]:=
Module[{x1case,x1control,x2case,x2control,x1,x2,y,maxcase,maxcontrols},
y= Sort@RandomVariate[BernoulliDistribution[p],max];
maxcase=Total[y];
maxcontrol=max-maxcase;
 x1case=RandomVariate[NormalDistribution[0,1], maxcase];
 x1control=RandomVariate[NormalDistribution[1,1], maxcontrol];
  x2case=RandomVariate[NormalDistribution[0,1], maxcase];
  x2control=RandomVariate[NormalDistribution[.5,1], maxcontrol];
 x1=Join[x1control, x1case];
  x2=Join[x2control, x2case];
 Return[{x1,x2,y}]]  



End[] 
EndPackage[]



showname=False;
dataCS=GenDataCS[showname];
dataSBP=GenDataSBP[showname];
dataCRP=GenDataCRP[showname];
dataCRPPH=GenDataCRPPH[showname];
dataHDL=GenDataHDL[showname];
dataGENE=GenDataGENE[showname];
dataBD=GenDataBD[showname];
dataSNP1=GenDataSNP1[showname];
dataSNP2=GenDataSNP2[showname];
dataSNP3=GenDataSNP3[showname];
dataSim1=GenDataSim1[10000,showname];
dataSim2=GenDataSim2[10000,showname];
 

Print["dataCS, dataSBP,dataCRP,dataCRPPH,dataHDL, dataGENE,dataBD,dataSNP1,dataSNP2,dataSNP3"];
Print["dataSim1  dataSim2"];

