(*:Mathematica:: Version 7     *)
(*:Context: "swirl`"           *)
(*:Swirl-and-Ripple            *)
(*:Version  1                  *)
(*:Input: 
  swirlg.m            TrainGAutomatic
  swirlw.m            TrainWAutomatic
  swirlreport.m      ReportGoal2
                          ReportGoal1
		          ReportProgress1:
		          ComputeRuleDist
		          ComputeGeneFreq
		          ComputeFinalModel
  swirlroc.m:        ComputeROCTestBoot 
                         ComputeROCTestSample
                         PlotROCGoal1
                         PlotROCGoal2
  swirlplot.m        PlotTwoGenes           *)
(*:Author: 2010 Stuart G. Baker             *)



BeginPackage["swirlcore`","swirlg`", "swirlw`", "swirlsup`","swirlroc`","swirlplot`","swirlreport`"]

					 
ClassifyCore::usage="ClassifyCore[data,spec1,spec2,genname,showsplitw,maxboot]"  

Clear[ClassifyCore]


Begin["Private`"]

		

ClassifyCore[data_,spec1_,spec2_,genename_,dname_,showsplitw_,maxboot_]:=
Module[{parsym,maxgenes,maxsplits,split,splitw,maxsplitsw,topnum,delta,phi,
	    fset,showgenekey,showprog1,show2Dplot,showfinalmodel,showw,pi,
            deltaw, res1,res2,res3,name,	  
            repplot1,reprule1,reprule1x,repfreq1,repfin1,ruplot1,
	    repplot2,reprule2,reprule2x,repfreq2,repfin2,ruplot2},
	 (*inputs*)
            {parsym,maxgenes,maxsplits,split,splitw,maxsplitsw,topnum,delta,phi,pi}=spec1;
	    {fset,showgenekey,showprog1,showprog2,show2Dplot,showfinalmodel,showw,goal}=spec2; 
	 (*ADJUST performance measure for Wrapper*)
	    deltaw=phi delta;
	 (*CALCULATION all feature selection algorithms*)
	    {res1,res2}=ClassifyOneSift[data,{parsym,maxgenes,maxsplits,split,splitw,maxsplitsw,topnum,#,delta,deltaw},
	                                {fset,showprog1,showprog2,show2Dplot,showfinalmodel,showw,goal,pi},
                                         genename,dname,showsplitw,maxboot,False]& /@ {"Greedy","Wrapper"};
	    {repplot1,ruplot1,reprule1,repfreq1,repfin1}=res1;
	    {repplot2,ruplot2,reprule2,repfreq2,repfin2}=res2;
            If[goal==2,ReportGoal2[data,genename,res1,res2,showfinalmodel,showgenekey]];
 Return[{repplot1,repplot2,ruplot1,ruplot2}]]


 

(*---------------CLASSIFY FOR EACH FEATURE SELECTION (SIFT) ALGORITHM----------------------------------------------*)
																  

ClassifyOneSift[data_,{parsym_,maxgenes_,maxsplits_,split_,splitw_,maxsplitsw_,topnum_,sift_,delta_,deltaw_},
               {fset_,showprog1_,showprog2_,show2Dplot_,showfinalmodel_,showw_,goal_,pi_},
                genename_,dname_,showsplitw_,maxboot_,show_]:=
Module[{specs,cutvec,cutvectab,
   	 geneset0,auclist,tprlist1,tprlist2,scorelist,distancelist,posveclist,
             repplot,repvar,reprule,repfreq,repfin,i,genesetopt,tset,siftname},	   
     Print["Feature Selection via ",sift];
    specs= {parsym,maxgenes,maxsplits,split,splitw,maxsplitsw,topnum,sift,delta,deltaw};
    If[sift=="Greedy", siftname="greedy"];
    If[sift=="Wrapper", siftname="wrapper"];
     (*Iterate over splits*) 	   
  	   SeedRandom[18];
	   {auclist,tprlist1,tprlist2,scorelist,distancelist,posveclist,tset,cutvectab}=
	     Transpose@Table[ClassifyEachSplit[data,specs,fset,pi,showprog1,showprog2,show2Dplot,
			     showw,goal,genename,showsplitw,maxboot,i],{i,1,maxsplits}];
    If[goal==1,     tsetx=tset[[1]]; (*for bootstraps*)
                          score=scorelist[[1]];
                          cutvec=cutvectab[[1]];
                     {repplot,ruplot}=PlotROCGoal1[fset,tsetx,siftname,pi,dname,cutvec];
                      reprule=0;  repfreq=0,repfin=0];  
  If[goal==2,
         {repplot,ruplot}=PlotROCGoal2[fset,tset,siftname,pi,dname];  
          reprule=ComputeRuleDist[sift,scorelist,distancelist,posveclist,maxsplits,maxgenes];
          repfreq=ComputeGeneFreq[genename,posveclist,show];
         repfin=ComputeFinalModel[data,genename,{maxgenes,topnum,sift,splitw,maxsplitsw,delta,deltaw},
                        fset,pi,showprog1,showsplitw,showw,goal]];
Return[{repplot,ruplot,reprule,repfreq,repfin}]]
									


																			 

(*-----------CLASIFY FOR EACH SPLIT WITHIN A FEATURE SELECTION ALGORITHM---------------------------------------*)


ClassifyEachSplit[data_,{parsym_,maxgenes_,maxsplits_,split_,splitw_,maxsplitsw_,topnum_,sift_,delta_,deltaw_},fset_,pi_,
   showprog1_,showprog2_,show2Dplot_,showw_,goal_,genename_,showsplitw_,maxboot_,i_]:=
Module[{dataTRAIN,dataTEST,resTRAIN,posvec,trainsubjects0,trainsubjects1,tab1,tab2,
            plotrocTRAIN,plotrocTEST,ROCsetTRAIN,ROCsetTEST, posvec0,tsetTEST,
		 aucTEST,tprTEST1,tprTEST2,vartprTEST2,genetab,cutvectab,
		 score,distance,auctab,res},
   (*random split*)
	  {dataTRAIN,dataTEST,{trainsubjects0,trainsubjects1}}=RanSample[data,split,False]; 
   (*compute for feature selection*)
         {resTRAIN,score,distance,auctab,postab,cutvectab}=TrainAutomatic[dataTRAIN,{maxgenes,topnum,sift,splitw,maxsplitsw,delta,deltaw},
                                                                {fset,pi,genename,showsplitw,showw,goal,showprog2}];
    (*Identify final list of genes --by number*)
 	    posvec=resTRAIN[[1]];
    (*Show Progress: AUC on each split*)
 	    If[showprog1, ReportProgress1[auctab,postab,maxgenes,i,score,distance,posvec,sift,trainsubjects0,trainsubjects1,showprog2]];
    (*Compute ROC curves TEST SAMPLE for 2 scores x 2 data sets for each distance*)
  	    ROCsetTEST=ComputeROCTestSample[dataTEST,resTRAIN,{score,distance},fset,False];
	   {aucTEST,tprTEST1,tprTEST2,tsetTEST0}=ROCsetTEST;	   
    (*Bootstrap ROC for Goal 1*)
	    If[goal==1, 
  		 ROCsetTESTboot=ComputeROCTestBoot[dataTEST,resTRAIN,{score,distance},fset,maxboot];
		 {aucTESTboot,tprTEST1boot,tprTEST2boot,tsetTESTboot}=ROCsetTESTboot;
		 tsetTEST=tsetTESTboot];
	   If[goal==2,	tsetTEST=tsetTEST0];
   (*Plot 2 genes if show2Dplot*)
	     If[Length[posvec]==2 && show2Dplot,PlotTwoGenes[dataTRAIN,dataTEST,parsym,resTRAIN,showsplitw]];
    (*Output: note posvec is original positions*)
	   res={aucTEST,tprTEST1,tprTEST2,score,distance,posvec,tsetTEST,cutvectab};
 Return[res]]


 (*----FORMULATE CLASSIFICATION RULE FOR TRAINING SAMPLE------------------------------*)				

TrainAutomatic[data_,{maxgenes_,topnum_,sift_,splitw_,maxsplitsw_,delta_,deltaw_},
              {fset_,pi_,genename_,showsplitw_,showw_,goal_,showprog2_}]:=
Module[{res,score,distance,auctab,genelist,cutvec},
     If[sift=="Greedy", {res,score,distance,auctab,postab}=TrainGAutomatic[data,{maxgenes,topnum,sift,splitw,delta},showprog2]];
     If[sift=="Wrapper",{res,score,distance,auctab,postab}=TrainWAutomatic[data,{maxgenes,topnum,sift,splitw,maxsplitsw,deltaw},
                                                                                    {fset,pi,genename,showsplitw,showw,goal},showprog2]];
     If[goal==1,ReportGoal1[data,{res,score,distance,auctab,postab},genename]];
      cutvec=ComputeScore[data,res,{score,distance},fset];
    Return[{res,score,distance,auctab,postab,cutvec}]]





ComputeScore[{x0_,x1_},{posIN_,posOUT_,tlistx_,xm0_,xm1_,vm0_,vm1_},{score_,distance_},fset_,show_:False]:=
Module[{xtestt0,xtest1,d00,d01,d10,d11,d00sq,d01sq,d10sq,d0sum,d1sum,r0,r1,plotroc,ROCset}, 
   (*selected genes from training sample*)
      xtest0=x0[[posIN]];
      xtest1=x1[[posIN]];
	(*distance measures of selected genes from training sample*)
		{d00sq,d01sq,d10sq,d11sq}=SumOfDistanceSquaredTest[{xtest0,xtest1},{xm0,xm1,vm0,vm1},distance];
	If[score=="Swirl",
	    d00=Sqrt[d00sq];
		 d01=Sqrt[d01sq];
		 d10=Sqrt[d10sq];
		 d11=Sqrt[d11sq];	
		 d0sum=ReplaceZero[d00+d01];
        	    d1sum=ReplaceZero[d10+d11];
        	    r0=d00/d0sum;	 (*probability classify x0 as 0 = true positive rate*)
                 r1=d10/d1sum]; (*probability classify x1 as 0 = false positive rate*)
	If[score=="Ripple",
        	 r0=d00sq-d01sq;
                 r1=d10sq-d11sq];
             fsetx=Join[{0},fset,{1}];
           cutvec=Quantile[Join[r0,r1],fsetx];       
   Return[cutvec]]

SumOfDistanceSquaredTest[{x0_,x1_},{xc0_,xc1_,vm0_,vm1_},distance_:1]:=
Module[{p0,n0,p1,n1,v0,v1,vm,xc00,xc01,xc10,xc11,d00sq,d01sq,d10sq,d11sq},
 (*p is genes, n is subjects*)
   {p0,n0}=Dimensions[x0];
   {p1,n1}=Dimensions[x1];
(*variance*)
   If[distance==1,   vm=(vm0 (n0-1) + vm1 (n1-1) )/(n0+n1-2);   v0=vm;      v1=vm];	   
   If[distance==2,                                              v0=vm0;     v1=vm1];								  																	 
(*distance measures square root of sum over squares of genes: vector for specimens*)  
   d00sq= Apply[Plus,(x0-xc0)^2/v0];		    (*distance of x0 to centroid 0*)
   d01sq= Apply[Plus,(x0-xc1)^2/v1];		    (*distance of x0 to centroid 1*)
   d10sq= Apply[Plus,(x1-xc0)^2/v0];		    (*distance of x1 to centroid 0*)
   d11sq= Apply[Plus,(x1-xc1)^2/v1];    	    (*distance of x1 to centroid 1*)
Return[{d00sq,d01sq,d10sq,d11sq}]];
		  

End[] 
EndPackage[]

