(*:Mathematica:: Version 7     *)
(*:Context: "swirl`"           *)
(*:Swirls and Ripples          *)
(*:Version  1                  *)
(*:Input:                      *)
(*:Author: 2010 Stuart G. Baker *)



BeginPackage["swirlreport`","swirlroc`"]

(*Goal = 1= Discovery and testing*)
ReportGoal1::usage="ReportGoal1"
ReportWrapper::usage="ReportWrapper"

(*Goal= 2= Discovery only*)
 ReportGoal2::usage="ReportGoal2"
 ReportProgress1::usage="ReportProgress1" 
 
(*Goal=2=Discovery only*) 
 ComputeRuleDist::usage="ComputeRuleDist"
 ComputeGeneFreq::usage="ComputeGeneFreq"
 ComputeFinalModel::usage="ComputeFinalModel"

Clear[ReportProgress1,ReportGoal1,ReportGoal2,
  ComputeRuleDist,ComputeGeneFreq,ComputeFinalModel]


Begin["Private`"]


(*===============GOAL 1 DISCOVERY AND TESTING==============================================*)
 	


ReportGoal1[data_,set_,genename_]:=				 
Module[{res,repfin,metric,distance,auctab,postab,posvec,posvecx,genenumlist,genenamelist,genepair,
            len,i,insert},
      (*basics*)
	       {res,metric,distance,auctab,postab}=set;
		 respin={metric,distance} ~Join~ res;
	 (*CLASSIFICATION RULE*)
			ReportRule[" ",data,respin];
	  	(*GENE KEY*)
		    genenumlist=res[[1]]; (*posIN*)
		        genenamelist=genename[[genenumlist]];
			  len=Length[genenumlist];
			  insert=Table[" = ",{i,1,len}];
		         genepair=Transpose[{genenumlist,insert,genenamelist}];
			  Print["          Key to genes selected"];
		     Print["            ",TableForm[genepair]];
	 	Return[Null]]	   


  ReportRule[name_,data_,repfin_]:=
		Module[{posIN,posINx,posOUT,tlist,mlist0,mlist1,vm0,vm1,x0,x1,p0,n0,p1,n1,
		 vm,mlist0r,mlist1r,slist0r,slit1r,matr,disname},
	  (*components*)
	  	 {metric,distance,posIN,posOUT,tlist,mlist0,mlist1,vm0,vm1}=repfin;
		{x0,x1}=data;
 	        {p0,n0}=Dimensions[x0];
                 {p1,n1}=Dimensions[x1];
                If[distance==1,   vm=(vm0 (n0-1) + vm1 (n1-1) )/(n0+n1-2);   v0=vm;      v1=vm];	   
                If[distance==2,                                              v0=vm0;     v1=vm1];		
	          mlist0r=rxx[mlist0];
		  mlist1r=rxx[mlist1];
		  slist0r=rxx[Sqrt[v0]];
		  slist1r=rxx[Sqrt[v1]];
		  If[distance==1,disname="D=1"];
		  If[distance==2,disname="D=2"];
		  Print[" ",name,"        ",metric," "disname];
		  If[Length[posIN]==1,
		  posINx={posIN},posINx=posIN];
		  matr=Transpose@{posINx,mlist0r,mlist1r,slist0r,slist1r};
		  Print["       ",TableForm[matr,TableHeadings->{None,{"gene","centroid-0","centroid-1","se-0","se-1"}}]];
		Return[Null]]

	
		  
		  rxx[x_]:=Round[x 10]/10//N
		  

  

	ReportWrapper[mat_,fset_,sift_,pi_,maxsplitsw_,maxgenes_]:=
	  Module[{auclist,posveclist,tsetlist,resist,metriclist,distancelist,auctablist,postablist,
	         reprule,nr,nc,max,range,reprule1,ranges,ranges1,repfreq,heading},
	  (*input*)
		 {auclist,posveclist,tsetlist,resist,metriclist,distancelist,auctablist,postablist}=Transpose[mat];
	   (*ROC CURVES FOR RANDOM SPLITS*)
	      {repplot,ruplot}=PlotROC[fset,tsetlist,sift,pi];
	        Print["        ROC and RU curves in training-test sample (95% CI)"];
	        Print["          ",GraphicsRow@Join[repplot,ruplot]];
	(*DISTRIBUTION OF RULES*)
	         reprule=ComputeRuleDist[sift,metriclist,distancelist,posveclist,maxsplitsw,maxgenes];
		      Print["      Rule distributions in training-training sample"];
				{nr,nc}=Dimensions[reprule];
				max=Min[8,nc-1];
				range=Range[max-3];
		                reprule1=Take[#,max]& /@ reprule;
				ranges=ToString[#]& /@ (range);
				ranges1={"algorithm","boundary","distance"} ~Join~ ranges;
				Print["       ",TableForm[reprule1,TableHeadings->{Automatic,ranges1}]];			  
	     (*GENES WITH HIGHEST FREQUENCIES*)
                  repfreq=ComputeGeneFreq[genename,posveclist,show];
		      Print["      Most frequently selected genes in training-training sample"];
		      heading={{"gene","probablity"},{"gene 1","gene 2","gene 3"}};
	  	      Print["       ",TableForm[repfreq, TableHeadings->heading]];
	 Return[Null]]


(*=================GOAL 2 DISCOVERY ONLY================================*)



ReportGoal2[data_,genename_,res1_,res2_,showfinalmodel_,showgenekey_]:=
Module[{genenumlist1,genenumlist2,gennumlist3,genumlist4,genenumlist,genenamelist, 
	genenumlistfinal,
        repplot1,reprule1,reprule1x,repfreq1,repfin1,ruplot1,
	 repplot2,reprule2,reprule2x,repfreq2,repfin2,ruplot2,
       	alg,matfreq,matfreq1,mat, numshow, 
        coremat,repmatx, repmat,colnum,colname},
     (*Input*)     
    	 {repplot1,ruplot1,reprule1,repfreq1,repfin1}=res1;
	 {repplot2,ruplot2,reprule2,repfreq2,repfin2}=res2;	
   (*ROC and RU CURVES*)
	    Print["==================RESULTS IN TEST SAMPLES-----------------"];
	    Print["ROC curves in test sample (95% CI)"];
	    plotrow=GraphicsRow[{repplot1,repplot2,ruplot1,ruplot2}];
	    Print@Show[plotrow];			
    (*DISTRIBUTION OF RULES*)		
	    Print["==================RESULTS IN TRAINING SAMPLES-----------------"];
		 Print["Rule distributions in training sample"];
		 repmat=Join[reprule1,reprule2];							 		
 		 coremat=RuleSummary[#]& /@ repmat;				  		
	         {fracvec,minvec,maxvec,meanvec}=Transpose@coremat;
		 numshow=Min[10,Max[maxvec]];
		 reprule1x=Take[#,numshow+3]& /@ reprule1; 
		 reprule2x=Take[#,numshow+3]& /@ reprule2; 
	        repmatx= Join[reprule1x,reprule2x];
		 colnum=ToString[#]& /@ Range[numshow];
		 colname=Join[{"algorithm","metric","distance"},colnum]; 	
 	         Print@TableForm[repmatx,TableHeadings->{None,colname}];
	 	 colmat=Take[#,3]& /@ repmat;
		 repmatx=Transpose[Transpose[colmat] ~Join~ Transpose[coremat]];
		 colnamex={"algorithm","metric","distance","frac","min","max","mean"};	
 	          Print@TableForm[repmatx,TableHeadings->{None,colnamex}];  
 	(*GENE FREQUENCIES*)
		 Print["Most frequently selected genes in training sample"];
	         matfreq={Flatten[repfreq1],Flatten[repfreq2]};
		 alg={"Greedy","Wrapper"};
		 matfreq1=Transpose[Join[{alg},Transpose[matfreq]]];
		 Print@TableForm[matfreq1, TableHeadings->{None,{"algorithm","gene 1","gene 2","gene 3","frac 1"," frac 2","frac 3"}}];
	 (*FINAL MODEL*)
	    If[showfinalmodel, 	
		    Print["========FINAL MODEL FIT TO ALL DATA==========="];
		    ReportRule["Greedy",data,repfin1]; 
 	            ReportRule["Wrapper",data,repfin2];
		    genenumlistfinal= repfin1[[3]] ~Join~ repfin2[[3]];
	   	    gen3numlistfinal={}];
	 (*NAMES OF GENES CORRESPONDING TO RULES*)
	     If[showgenekey,
               Print["=====================Definitions of Reported Gene Numbers====================="];
		    genenumlist1=repfreq1[[1]];
		    genenumlist2=repfreq2[[1]];	   
		    genenumlist=Union[genenumlist1,genenumlist2,genenumlistfinal];
		    genenamelist=genename[[genenumlist]];
		    genepair=Transpose[{genenumlist,genenamelist}];
		    Print@TableForm[genepair]];
	 Return[Null]]


	  	  
	 RuleSummary[reprule_]:=
	 Module[{reprule1,reprule2,posvec,min,max,mean,frac,range},
	  (*drop first three columns which are labels*)
	     reprule1=Drop[reprule,3];
	  (*sign =1 is nonzero *)
	     reprule2=Sign[reprule1];
	 (*positions of nonzero genes*)
	  	  posvec=Flatten@Position[reprule2,1];
	        min=Min[posvec];
		  max=Max[posvec];
	  (*mean*)
	    range=Range@Length[reprule1];
	    frac=Apply[Plus,reprule1];
	  (*sum  number x distibution for row = expected number*)
	    mean=Apply[Plus,range reprule1 /frac];
	 Return[{frac,min,max,mean}]]


		
  

 (*-----Report AUC on each split------------------------*)		  
 	   
  ReportProgress1[auctab_,postab_,maxgenes_,i_,metric_,distance_,posvec_,sift_,trainsubjects0_,trainsubjects1_,showprog2_]:=
   Module[{tab1,auctabr,auctabx,postab1,postabr,postabx,head},
	 (*display 5 values*)
	  auctab1=Take[#,5]& /@ auctab;
	  auctabr=Partition[modaucb[#]& /@ Flatten[auctab1],5];
	  postab1=Take[#,5]& /@ postab;
	  postabr=Partition[modtabb[#]& /@ Flatten[postab1],5];
	  head={"S1","S2","R1","R2"};	   
	  auctabx=TableForm[auctabr,TableHeadings->{head,Automatic}];
	  postabx=TableForm[postabr,TableHeadings->{head,Automatic}];
	  Print[sift," split ",i,"  selection is  " ,{metric,distance},"  genes ",posvec];
	  Print["       AUC                               Genes       "];
 	  Print["   ",auctabx,"        ",postabx];
         If[showprog2,
         Print["    train class 0  ", Sort[trainsubjects0]];
 	  Print["    train class 1  ", Sort[trainsubjects1]]]; 
    Return[Null]]


  modaucb[x_]:=If[x==0,"-",Round[x 1000]/1000//N];
  modtabb[x_]:=If[x==0,"-",x];



 														   
 
 
  (*----------Compute Classification Rules-----------------------*)

	ComputeRuleDist[sift_,metriclist_,distancelist_,posveclist_,maxsplits_,maxgenes_]:=
		Module[{lenlist,maxlen,baseS0,baseS,baseR0,baseR,base,rownamex,
		      set1,set2,freq,freq1,freq4,freq5,freq2,freq3,rowname,colname,i,j,mat,mat1,mat2,mat3,mat4,tab},
		(*number of genes*)
		   lenlist=Length[#]& /@ posveclist;
	   (*basic categories*)
        set1=Transpose[{metriclist,distancelist,lenlist}];
		   freq=Sort@Tally[set1];
			freqx=adj[#,maxsplits]& /@ freq;
		(*CREATE TABLE allows for 0 counts*)
         maxlen=Max[lenlist];
		   baseR0=Table[{"Ripple",i,j},{i,1,3},{j,1,maxlen}];
		   baseR=Partition[Flatten[baseR0],3];
		   baseS0=Table[{"Swirl",i,j},{i,1,3},{j,1,maxlen}];
		   baseS=Partition[Flatten[baseS0],3];
		   base=Join[baseR,baseS];
		   set2=Join[set1,base];
		(*category,count*)
		   freq1=Sort@Tally[set2];
		(*pull out counts*)
		   freq2=Take[#,-1]& /@ freq1;
		(*subtract contribution from base*)
	      freq3=freq2-1;
		(*express as percent*)
		   freq4=freq3/maxsplits;
			freq5=Round[freq4 100]/100//N;
		(*create table*)
		   mat=Partition[Flatten[freq5],maxlen];
		 (*create names*)
			aname={sift,sift,sift,sift,sift,sift};
			bname={"Ripple","Ripple","Ripple","Swirl","Swirl","Swirl"};
			cname={1,2,3,1,2,3};
			mat1=Transpose@Join[{aname,bname,cname},Transpose[mat]];
		 (*pad up to maxgenes per row with last one a sum*)
			 mat2=pad[mat1,maxgenes,maxlen];
		 (*choose nonzero rows; last in row is sum*)
			 	mat3=Select[mat2,(#[[-1]]>0)&];
		 (*drop sum which is last in row *)
		     mat4=Drop[#,-1]& /@ mat3;
		 Return[mat4]];
	


		pad[mat_,maxgenes_,maxlen_]:=
		Module[{i,matnew,zero},
		   zero=Flatten@Table[0,{i,maxgenes-maxlen}];
		   matnew=padrow[#,zero,maxlen]& /@ mat;
	  	Return[matnew]]


		padrow[vec_,zero_,maxlen_]:=
		Module[{vec1,sum,res},
		 vec1=Take[vec,-maxlen];
		 sum=Apply[Plus,vec1];
		 res=Join[vec,zero,{sum}];
		Return[res]]


		adj[{a_,b_},len_]:=
		Module[{f,fr,res},
		 f=b/len;
		 fr=Round[f 100]/100//N;
		 res={a,fr};
		Return[res]]

	


 (*----------------Compute Gene Frequencies based on rule selected at each split-----------------*)
 																	
   ComputeGeneFreq[genename_,posveclist_,show_:False]:=
	Module[{leng,pair,pair1,pair2,pair3,pair4,tab4,len,pair5,pair6},
 	  (*counts gene, number of occurrences*)
	   	pair=Tally[Flatten[posveclist]];
	 (*change counts to fractions, reverse order fraction then gened*)
	 	leng=Length[Flatten[posveclist]];
	 	pair1=flipdiv[#,leng]& /@ pair;
		pair2=Sort[pair1];
		pair3=Reverse[#]& /@ pair2;
	 (*take top 3 or smaller if less than 3*)
		len=Min[Length[pair3],3];
		pair4=Reverse@Take[pair3,-1 len];
		pair5=Transpose[pair4];
  	(*pad with zeros if less than 3*)
		pair6=padvec[#]& /@ pair5;
	 (*table for intermediate output if desired*)
		tab4=TableForm[pair6,TableHeadings->{{"gene","frequency"},Automatic}];
		If[show,
			Print["  Genes with ",len," highest frequencies"];
	      Print["    ",tab4]];
	Return[pair6]]

	padvec[x_]:=
	Module[{len,a,b,c,d,res},
	  len=Length@x;
	  If[len==3,res=x];
	  If[len==2,{a,b}=x; res={a,b,0}];
	  If[len==1,{a}=x;   res={a,0,0}];
	Return[res]]


   flipdiv[{a_,b_},len_]:= {Round[100 b/len]/100//N,a}


 (*-------------Compute Final Results ---------------------------*)
	
	ComputeFinalModel[data_,genename_,{maxgenes_,topnum_,sift_,splitw_,maxsplitsw_,delta_,deltaw_},
	     fset_,pi_,showprog1_,showsplitw_,showw_,goal_]:=
	 Module[{res,metric,distance,auctab,genelist,postab,genetab,
	        showprog2,
	        posIN,posOUT,tlist,mlist0,mlist1,vlist0,vlist1,
	          posvec,posvecx,aucF,flist,tab,repfin},
	 (*fit model*)
	   Print["   .... performing final calculation"];
	  showprog2=False;
	   {res,metric,distance,auctab,postab,cutvec}=
		  TrainAutomatic[data,{maxgenes,topnum,sift,splitw,maxsplitsw,delta,deltaw},
		     {fset,pi,genename,showsplitwt,showw,goal,showprog2}];
	  {posIN,posOUT,tlist,mlist0,mlist1,vlist0,vlist1}=res;
	  repfin={metric,distance,posIN,posOUT,tlist,mlist0,mlist1,vlist0,vlist1};
	Return[repfin]]


End[] 
EndPackage[]

