(*Mathematica:: Version 8.0      *)
(*:Context:: "rufit`"        *)
(*:Title::  relative utility  *)
(*:Summary::       Decision analsyis for evaluating risk prediction via relative utility curves*)
(*:References::                  *)
(*Date   2016 Stuart G. Baker *)
(*Input
RUTableFitKey	            rufitkey.m*)

(*Package List
   rufit.m				main packages 
   rufitkey.m			main processing function
   rufitcore.m 			computes concave ROC curve and corresponding RU curve
   rufitplot.m			plots of ROC and RU curves and calibration
   rufitdata.m			generates data examples from the literature
   rufitreport.m			table estimates and table data
   rufitrange.m			calculations for range of test tradeoff
   Proofs
   rufitapprox.m
   rufittangentcondition.m   
   *)
   
   	      
BeginPackage["rufit`","rufitkey`", "rufitdata`","rufitcore`", "rufitplot`", "rufitreport`","rufitrange`","rufitapprox`","rufittangentcondition`"]
                           
                         


RUFit::usage="RUFit[data,options] where either 
       data={xmat,ymat,riskscorevec,riskscorename,modelname1,modelname2,markername,  datasetname,table} 
       data= {y,r1,r2,modelname1,modelname2,markername,datasetname,list} "


Options[RUFit]= {P->Automatic,ShowPlot->"Standard"}
 

P::usage="probability of developing disease: either Automatic or a number between 0 and 1"
ShowPlot::usage="All or Standard or Basic or Talk, with only the latter two applying to categorical data"

Clear[RUFit]
					
Begin["Private`"]	  



RUFit[dataF_,options___Rule]:=
Module[{core,len,markername,datasetname,datatype,resvec,corex,fracmaxRU,format,p0,showplot},
         {p0,showplot}={P,ShowPlot}/.{options}/.Options[RUFit];
         If[showplot=="Talk",format="talk", format="manu"];
             core={p0,format,showplot};
          (*non-user options*)
          fracmaxRU=.1;  (*mininum RU for prediction instead of no prediciton*)
          fracmaxRU=.02;  (*mininum RU for prediction instead of no prediciton*)
           pointsize=.01;      (*for ROC and RU curves*)
          showmap=False;  (*interpolation steps*)
           showzone=True;  (*treat all and treat none labels on RU*)
          showdata=False;
          showest=False;  (* estimates of interemediate quantities*)
          corex={showdata,showest,pointsize,showzone,showmap,fracmaxRU};
         (*special option*)
          numcut=50;      (*number of cutpoints for continuous data*)       
     	 len=Length[dataF];
 	 markername=dataF[[-3]];
  	datasetname=dataF[[-2]];
 	datatype=dataF[[-1]];
 	format=core[[2]];
 	 Print["DATA SET NAME: ",datasetname, "  MARKER NAME: ",markername, "  data type = ",datatype];
 	 If[datatype=="table" && format=="manu", Print["only ROC and RU curves for table data"]];
 	 If[datatype=="list",             RUListFit[dataF,core,corex,numcut]];
 	 If[datatype=="table",          RUTableFit[dataF,core,corex]];
 	 If[datatype=!="list" && datatype=!="table", Print["invalid datatype"]];
Return[Null]]


(*--------------------------Table Fit--------------------------------------------------*)   


RUTableFit[dataF0_,core_,corex_]:=
Module[{resvec,xmat,ymat,riskscorevec,riskscorename,  modelname1,modelname2,markername,
          datasetname,datatype, check1,check0,check},
    (*check inputs*)
        check0=CheckInput[core];
        check1=CheckDataTable[dataF0];
        check=check0&& check1;
    If[check ==True,
         {xmat,ymat,riskscorevec,riskscorename,  modelname1,modelname2,markername,
         datasetname,datatype}=dataF0;
        (*new data format with two risk scores*)
        dataF= {xmat,ymat,riskscorevec,riskscorevec,riskscorename,riskscorename,
              modelname1,modelname2,markername,datasetname,datatype};
              plotpdf=None;
        resvec=RUTableFitKey[dataF,core,corex,plotpdf]];
    If[check==False, Print["invalid input or data format"]];    
        
  Return[resvec]]
         
         
         
         
         
(*--------------------------List Fit--------------------------------------------------*)   
   
 RUListFit[dataF0_,core_,corex_,numcut_]:=
Module[{check0,check1,check,
         set,set0,set1, xmat,ymat,P,formatQ,
         P0,showdataQ,showestQ,showzone,format,
         y,r1,r2,modelname1,modelname2,markername,datasetname,datatype,
         riskscorevec1,riskscorename1,riskscorevec2,riskscorename2,
          dataF,resvec},
     (*check input*)        
                check1=CheckDataList[dataF0];
                check0=CheckInput[core];
                 check=check0 &&  check1;
     If[check, 
         {y,r1,r2,modelname1,modelname2,markername,datasetname,datatype}=dataF0;
         (*split into event and no event*)
        	 set=Transpose[{y,r1,r2}];
        	 set0=Select[set,(#[[1]]==0)&];
        	  set1=Select[set,(#[[1]]==1)&];
         (*histogram*)
         {P,formatQ,showplot}=core;
         plotpdf=GenDist[set0,set1];
         (*create tables*)
               xmat=GenTableFromList[set1,numcut];
               ymat=GenTableFromList[set0,numcut];
               {riskscorevec1,riskscorename1,riskscorevec2,riskscorename2}=GenRiskVecFromList[set,numcut];
       (*new data*)
            dataF={xmat,ymat,riskscorevec1,riskscorevec2,  riskscorename1,riskscorename2,
                    modelname1,modelname2,markername,datasetname,datatype};
          resvec=RUTableFitKey[dataF,core,corex,plotpdf]];
     Return[resvec]]

GenDist[setN_,setE_]:=
Module[{yN,rN1,rN2,yE,rE1,rE2,hist1,hist2,plot,d1,plot2d1},
{yN,rN1,rN2}=Transpose[setN];
{yE,rE1,rE2}=Transpose[setE];
hist1=GenHDPair[{rN1,rE1},"Model 1"];
hist2=GenHDPair[{rN2,rE2},"Model 2"];
plot=Show[GraphicsRow[{hist1,hist2}],ImageSize->Large];
Return[plot]]

GenHDPair[{rN1_,rE1_},modname_]:=
Module[{plotN,plotE},
minr=Min@Join[rN1,rE1];
maxr=Max@Join[rN1,rE1];

plotN=GenHD[rN1,Red,modname];
plotE=GenHD[rE1,Blue,modname];
plot=Show[plotN,plotE,PlotRange->{{minr,maxr},All}, Frame->True,
FrameLabel->{"score","density", modname,None},
AxesOrigin->{minr,0}];
Return[plot]];




GenHD[rN1_,color_,modname_]:=
Module[{min,max,d1,plotdvec,plotd1},
min=Min[rN1];
max=Max[rN1];
d1=HistogramDistribution[rN1];
maxd=Max[d1];
plotd1=DiscretePlot[PDF[d1,x], {x, min, max, .05}, 
 PlotStyle->color, 
Joined->True, Axes->True, AspectRatio->1];


Return[plotd1]]


GenTableFromList[matraw_,numcut_]:=
Module[{y,r1,r2,matpair,maxr,topr,inc,tab,rvec1,rvec2,i,j},
  {y,r1,r2}=Transpose[matraw];
  matpair=Transpose[{r1,r2}];
  maxr1=Max@r1;
  maxr2=Max@r2;
  topr1=Ceiling[maxr1 numcut]/numcut//N;
  topr2=Ceiling[maxr2 numcut]/numcut//N;
  inc1=topr1/numcut;
  inc2=topr2/numcut;
  tab=BinCounts[matpair,{0, topr1,inc1},{0,topr2,inc2}];
Return[tab]] 


GenRiskVecFromList[matraw_,numcut_]:=
Module[{y,r1,r2,matpair,maxr,topr,inc,riskvec,riskpairvec,riskscorevec0,riskscorevec,riskscorename},
  {y,r1,r2}=Transpose[matraw];
  matpair=Transpose[{r1,r2}];
    maxr1=Max@r1;
    maxr2=Max@r2;
    topr1=Ceiling[maxr1 numcut]/numcut//N;
    topr2=Ceiling[maxr2 numcut]/numcut//N;
    inc1=topr1/numcut;
  inc2=topr2/numcut;
  {riskscorevec1,riskscorename1}=GenRiskScore[topr1,inc1];
  {riskscorevec2,riskscorename2}=GenRiskScore[topr2,inc2];
Return[{riskscorevec1,riskscorename1,riskscorevec2,riskscorename2}]] 


GenRiskScore[topr_,inc_]:=
Module[{riskvec,riskpairvec,riskscorevec0,riskscorevec,riskscorename,riskscorevecDROP},
 riskvec=Table[x,{x,0,topr,inc}];
 riskpairvec=Partition[riskvec,2,1];
 (*RISK SCORE IS MEAN OF PAIRS*)
   riskscorevec0=Mean[#]& /@ riskpairvec;
   riskscorevec=(Round[# 10000]/10000//N) & /@  riskscorevec0;
(*RISK NAME IS INTERVAL*) 
  riskscorename=StringGen[#]& /@ riskpairvec;
Return[{riskscorevec,riskscorename}]]

 StringGen[{a_,b_}]:=StringJoin[ToString[a],"-",ToString[b]];     
      



CheckInput[core_]:=
Module[{P,format, check1,check2,check},
     {P0,format,showplot}=core;
     If[(NumberQ[P0] && (P0>0 && P0 <1)) || P0===Automatic, check1=True, check1=False; Print["invalid P"]];
    If[format=="talk" || format=="manu", check2=True, check2=False; Print["invalid format"]];     
    If[showplot=="All" || showplot=="Basic" || showplot=="Standard" || showplot=="Talk", check3=True, check3=False; Print["invalid showplot"]];     
    check=check1 &&  check2 && check3;      
      Return[check]]


CheckDataTable[dataF_]:=
Module[{len,d1,d2,d3,d4,d5,d6,d7,d8,d9,showtab,
         check1,check2,check3,check4,check5},
            len=Length[dataF];
         If[len==9, check1=True, check1=False; Print["invalid length of data input"]]; 
         If[check1==True,
          {d1,d2,d3,d4,d5,d6,d7,d8,d9}=Dimensions[#]& /@ dataF;
           If[Length[d1]==2 && Length[d2]==2  &&
           d1[[1]]==d1[[2]] && d1[[1]]==d2[[1]] &&  d1[[1]]==d2[[2]] && 
           d1[[1]]==d3[[1]]  &&  d1[[1]]==d4[[1]],
           check2=True, 
           (*else*)
           check2=False; Print["invalid input"]]];
      check=check1 && check2;
      Return[check]]
      
      
CheckDataList[dataF_]:=
Module[{d1,d2,d3,d4,d5,d6,d7,d8,yindvec,resvec,
            set,set0,set1,y,r1,r2,modelname1,modelname2,datansetname,
            showtab,len,check1,check2,check3,check4,check5,checky,check,checkr1,checkr2},
      (*CHECK DATA INPUT*)
         len=Length[dataF];
         If[len==8, check1=True, 
             check1=False; check2=False; checky=False; checkr1=False; checkr2=False;
             Print["invalid length of data input"]]; 
         If[check1==True,
          {d1,d2,d3,d4,d4,d6,d7,d8}=Dimensions[#]& /@ dataF;
           If[d1==d2 && d1==d3,check2=True, 
                check2=False; checky=False; checkr1=False; checkr2=False; Print["invalid input"]]];
         If[check2==True,  
            {y,r1,r2,modelname1,modelname2,markername,datasetname,datatype}=dataF;
           If[Max[r1]<=1 && Min[r1] >=0, checkr1=True, checkr1=False; Print["invalid risk 1"]];
           If[Max[r2]<=1 && Min[r2] >=0, checkr2=True, checkr2=False; Print["invalid risk 2"]];
            yindvec=MemberQ[{0,1},#]& /@ y;
            If[Union[yindvec][[1]]==True, checky=True, checky=False; Print["invalid y"]]];  
         check=checky && checkr1&& checkr2;
Return[check]]




End[] 
EndPackage[]

Print["for main program:  RUFit[data,options]"];