(*Mathematica:: Version 8.0      *)
(*:Context:: "mfit`"                     *)
(*:Title::  MFit            		*)
(*:Summary:: fitting algorithm for gene sequences *)
(*:References::  Finding biologically relevant response patterns in high-throughput experiments*)
(*Date 2013   Stuart G. Baker   *)
(*Packages
mfit.m                      main program and calls all programs
mfitpair.m                compute results for model pairs
mfitsupport.m          support functions
mfitform.m	        compute heterochrony and heterometry
mfitkey.m                model selection
mfitplot.m                compute plots
mfitplotoverlay.m    overlay plot
mfitplotside.m		side-by-side plot
mfitreport.m		report lists of genes by model pair
mfitcore.m               core fitting program
mfitdiagnostic		plot of RPE versus AIC for good fiitting models*)

(*Data for example 
mfitdata.m               generate data in correct format from raw data and genename
mfitrawdata1.m	raw data for frog example 1
mfitrawdata2.m	raw data for frog example 2
mfitgenename.m    gene names for frog examples*)

(*Function Inputs
PlotTargetGene	      from mfitplot.m
MFitPairReport	      from mfitpair.m
MFitKey			      from mfitkey.m*)    

(*dataset={data,geneid,genename,times,xname,yname, plotname,dataname};
   data = rows indicate genes;   columns indicate times 
   geneid=  a list of gene identification numbers corresponding to data rows,  
   genename = a list of gene names corresponding to data rows,
   times= list of times of data collection,
   xname=name of x axis on plot
   yname= name of yaxis on plot
   plotname= name of data set for plot
   dataname=  name of data set for storing files*)



BeginPackage["mfit`","mfitpair`","mfitkey`","mfitcore`","mfitdata`","mfitplot`","mfitplotoverlay`",
   "mfitplotside`","mfitform`","mfitsupport`","mfitreport`","mfitdiagnostic`"]  

MFit::usage="MFit[dataset1,dataset2,options]"
MFitPlotGene::usage="MFitPlotGene[dataset1,dataset2,geneidset,options]"

Clear[MFit,MFitPlotGene]
  
Options[MFit]=
 { ShowPlot->False,
 ShowRPEAIC->False,
 ShowSummary->True,
 ShowReport->False,
  SplitType->"alternating",
  SplitFitSet->{1, 4, 7, 10,13},
   NewFitQ->False,
    ModelSetQ->"full+poly",
   NewNameCORE->"mfit",
  StoredNameCORE->"mfit"}

ShowPlot::usage="True or False for showing plots"
ShowSummary::usage="True or False for showing summary tables"
ShowReport::usage="True or False for showing  tables for each gene pair"
NewFitQ::usage="True or False for fitting a new model or using stored fit"
NewNameCORE::usage="name which prepended to data set name"
StoredNameCORE::usage="name which is prepended to data set name"
ShowRPEAIC::usage="True or False for showing relation of RPE to AIC"
ModelSetQ::usage="reduced or full"  
SplitType::usage="alternating or custom"
SplitFitSet::usage="list of ranks for fitting used with SplitType->custom"
Begin["Private`"]	  


MFit[dataset1_,dataset2_,options___Rule]:=
Module[{par,showplot,showsummary,showreport,splitQ,exportplotQ,newfit,newbest,
            check0,check1,check2,check, showdatainput, modelsetQ,kslope,kform,
              newnameCORE,storednameCORE,zbest1,znext1,zbest2,znext2,list,res,
           zbestmat1,zbestmat2, len1,len2,input,modset,modpairvec,splittype,splitfitset},
  (*FROM OPTIONS*) 
       {showplot,showsummary,showreport,showRPEAIC,newfitQ
          ,newnameCORE,storednameCORE,modelsetQ,splittype,splitfitset}=
       {ShowPlot,ShowSummary,ShowReport,ShowRPEAIC,NewFitQ,NewNameCORE,StoredNameCORE,
          ModelSetQ,SplitType,SplitFitSet}/. {options}/.Options[MFit];  
  (*SPLIT INFO FIXED VS EVALUATION*)
     splitQ={splittype,splitfitset};
  (*INPUTS FIXED*)
  {exportplotQ,zbest,znext,newbestQ,par,showdatainput,kslope,kform}=FixedInputs[];
  (*CHECK INPUTS*)      
   check1=CheckDataInput[dataset1,newfitQ,newnameCORE,storednameCORE,modelsetQ,splitQ];
    check2=CheckDataInput[dataset2,newfitQ,newnameCORE,storednameCORE,modelsetQ,splitQ];
    check=check1 && check2;
    If[check1==False, Print["data 1 incorrect"]];
   If[check2==False, Print["data 2 incorrect"]];
 
  (*MODEL LIST*) 
   If[modelsetQ=="full+poly",
     modelset={"flat",   "lineD", "transitionD", "hockeyD", "sigmoidD", "impulseD", "stepD","impulse+D","step+D",
                                 "lineU", "transitionU", "hockeyU", "sigmoidU", "impulseU", "stepU", "impulse+U","step+U",
                                 "polynomial3","polynomial5","polynomial7"}]; 
   If[modelsetQ=="full",
     modelset={"flat",   "lineD", "transitionD", "hockeyD", "sigmoidD", "impulseD", "stepD","impulse+D","step+D",
                              "lineU", "transitionU", "hockeyU", "sigmoidU", "impulseU", "stepU", "impulse+U","step+U"}];                               
   If[modelsetQ=="reduced",
     modelset={"flat",   "lineD", "transitionD", "hockeyD", "sigmoidD", "impulseD", "stepD",
                                 "lineU", "transitionU", "hockeyU", "sigmoidU", "impulseU", "stepU"}]; 
      Print["modelset ",modelsetQ];
  
 
 (*RETRIEVE DATA ON BEST FITTING MODELS*) 
    If[check==True, 
    zbestset1=MFitKey[dataset1,{splitQ,exportplotQ,newfitQ,newbestQ},showdatainput, modelsetQ,kslope,
           {newnameCORE,storednameCORE},{zbest,znext},par];
    zbestset2=MFitKey[dataset2,{splitQ,exportplotQ,newfitQ,newbestQ},showdatainput, modelsetQ,kslope,
           {newnameCORE,storednameCORE},{zbest,znext},par];
   ReportX[dataset1,splitQ];        
      checkbest1=CheckBest[zbestset1];
       checkbest2=CheckBest[zbestset2];
    If[checkbest1==False || checkbest2==False, Print["invalid best fitting input"]];  
    If[checkbest1==True && checkbest2==True,  
      {zbestmat1,zbest1,znext1,splitQ1}=zbestset1;
      {zbestmat2,best2,znext2,splitQ2}=zbestset2;
 
 (*PAIR REPORT*)
   format="small";
    MFitPairReport[modelset,{zbestmat1,zbestmat2,dataset1,dataset2,par,splitQ1,exportplotQ,modelsetQ,kform},
        showsummary,showreport,showplot,showRPEAIC,format]]];
Return[Null]]







(*------------------------------------------------MFitPlotGene--------------------------------------------*)

MFitPlotGene[dataset1_,dataset2_,geneset_,options___Rule]:=
Module[{showplot,showsummary,showreport,showRPEAIC,newfitQ,
              newnameCORE,storednameCORE,modelsetQ,targettype, plotset},
       (*import options*)
       {showplot,showsummary,showreport,showRPEAIC,newfitQ
             ,newnameCORE,storednameCORE,modelsetQ,splittype,splitfitset}=
          {ShowPlot,ShowSummary,ShowReport,ShowRPEAIC,NewFitQ,NewNameCORE,StoredNameCORE,
          ModelSetQ,SplitType,SplitFitSet}/. {options}/.Options[MFit];  
        (*SPLIT INFO FIXED VS EVALUATION*)
     splitQ={splittype,splitfitset};    
     (*plot*)
     targettype="GeneID";
     plotset= MFitPlotGeneKey[dataset1,dataset2,#,targettype,
                    {newfitQ,showsummary,showreport,modelsetQ,splitQ,
                     newnameCORE,storednameCORE}]&    /@ geneset;
    Return[Null]]


MFitPlotGeneKey[dataset1_,dataset2_,genetarget_,targettype_,
       {newfitQ_,showsummary_,showreport_,modelsetQ_,splitQ_,newnameCORE_,storednameCORE_}]:=
Module[{par,showplot,exportplotQ,newfit,newbest,
            check0,check1,check2,check,
          zbest1,znext1,zbest2,znext2,showdatainput, kslope,geneid0,
           zbestmat1,zbestmat2, len1,len2,input,modset,modpairvec},
  (*INPUTS FIXED*)
     {exportplotQ,zbest,znext,newbestQ,par,showdatainput,kslope,kform}=FixedInputs[];
  (*DATA ON BEST FITTING MODELS*) 
       zbestset1=MFitKey[dataset1,{splitQ,exportplotQ,newfitQ,newbestQ},showdatainput, modelsetQ,kslope,
           {newnameCORE,storednameCORE},{zbest,znext},par];
      zbestset2=MFitKey[dataset2,{splitQ,exportplotQ,newfitQ,newbestQ},showdatainput, modelsetQ,kslope,
           {newnameCORE,storednameCORE},{zbest,znext},par];
     {zbestmat1,zbest1,znext1,splitQ1}=zbestset1;
     {zbestmat2,zbest2,znext2,splitQ2}=zbestset2;
    {data,geneid,genename,times,xname,yname,plotname,dataname}=dataset1;
    If[targettype=="GeneID",     pos0x=Position[geneid,genetarget]];
    If[targettype=="GeneName", pos0x=Position[genename,genetarget]];
    If[pos0x=={},Print["gene not found among good fits"],
 (*else*)
      pos0=pos0x[[1,1]];
      geneid0=geneid[[pos0]];
      format="small";
      plotres=PlotTargetGene[pos0,zbestmat1,zbestmat2,dataset1,dataset2,par,splitQ,exportplotQ,
                    kform,format];
    If[format=="small",    ExportSmall[plotres,geneid0]];            
     If[format=="large",    ExportLarge[plotres,geneid0]]];            
 Return[Null]]

(*----------------------------Export------------------------------*)

ExportLarge[plotset_,geneid0_]:=
Module[{plotb,plot1,plot2,geneids,plotnameb,plotname1,plotname2},
       {plotb,plot1,plot2}=plotset;
        geneids=ToString[geneid0];
        plotnameb=StringJoin["mfit",geneids,"Fig1.jpg"];
       plotname1=StringJoin["mfit",geneids,"Fig2.jpg"];
       plotname2=StringJoin["mfit",geneids,"Fig3.jpg"];
       Print[plotb];
       Print[plot1];
       Print[plot2];	
      ExportX[plotnameb,plotb];
       ExportX[plotname1,plot1];
       ExportX[plotname2,plot2];
 Return[Null]]

ExportSmall[plotset_,geneid0_]:=
Module[{plotall,geneids,plotnameall},
     plotall=Show[GraphicsRow[plotset],ImageSize->Large];
      geneids=ToString[geneid0];
      plotnameall=StringJoin["mfit",geneids,"FigSet.eps"];
      Print[plotall];
       ExportX[plotnameall,plotall];
Return[Null]]

ExportX[plotname_,plot_]:=
Module[{},
      Export[plotname,plot];
      Print["exporting plot named ",plotname];
Return[Null]]


(*------------------------------------------Check Input----------------------------------*)



CheckBest[zbestset_]:=
Module[{check},
If[Length[zbestset]==4,check=True,check=False];
Return[check]]


CheckDataInput[dataset_,newfitQ_,newnameCORE_,storednameCORE_,modelsetQ_,splitQ_]:=
Module[{lendata,check0,check1,check2,check3,check4, check5,check6,
store1Q,newname,storedname,fileQ,check,
data,geneid,genename,times,xname,yname,plotname,dataname},
    (*LENGTH CHECK*) 
     lendata=Length[dataset];
     If[lendata==8,
      check0=True, 
       (*else*)
      check0=False; check1=False; check2=False; check3=False;
      Print["invalid length of data list"]];
    (*IF PASS LENGTH CHECK, DIMENSION CHECK*)    
    If[check0==True,
      {data,geneid,genename,times,xname,yname,plotname,dataname}=dataset;
      {ngene,nspecimen}=Dimensions[data];
      ngeneid=Length[geneid];
      ngenename=Length[genename];
      ntimes=Length[times];
      If[ngene==ngeneid && ngene==ngenename, 
          check1=True,
          check1=False; Print["data problem: inconsistent gene numbers and names"]];
      If[nspecimen==ntimes, 
         check2=True,
         check2=False; Print["data problem: number of columns not equal number of times"]];
      dataname=dataset[[-1]];
      (*AND STORED FILE CHECK  OR NAME OF NEW FILE*)
      If[newfitQ==True,  
            store1Q=True;
           newname=StringJoin[newnameCORE,dataname,".m"];
            check3=True,
       (*else check if stored file exists*)
            storedname=StringJoin[storednameCORE,dataname,".m"];
            fileQ=FileExistsQ[storedname];
            If[fileQ ==True, 
               check3=True,
               check3=False; Print["stored file ",storedname," does not exist"]]]];
      (*ModelSetQ*)
      If[modelsetQ=="full+poly" || modelsetQ=="full" || modelsetQ=="reduced",
          check4=True, check4=False; Print["invalid modelsetQ"]];
     (*SplitQ*)
      {splittype,splitfitset}=splitQ;
      If[splittype=="alternating" || splittype=="custom",check5=True, check5=False; Print["invalid splittype"]];
      range=Range[ntimes];
      checksplitvec=MemberQ[range,#]& /@ splitfitset;
      checksplittot=Apply[And, checksplitvec];
      If[checksplittot==True, check6=True, check6=False; Print["invalid spitfitset"]];
       (*COMBINE CHECKS*)
         check=check0 && check1 && check2 && check3 && check4 && check5 && check6;
       If[check==False,    Print["Check Set ",{check0,check1,check2,check3,check4,check5,check6}];
          Print["INVALID INPUT"]];   
Return[check]]

 FixedInputs[]:=
 Module[{exportplotQ,zbest,znext,newbestQ,par,showdatainput,modelsetQ,kslope},
   exportplotQ=False; (*not used*)
   zbest=.10; (*maximum Z value for model to be declared good fit*)
   znext=.05;  (*maximum increase in Z value for next best fitting model to be declared good fit*)
   newbestQ=False;  (*only comes into use for storing best fit data*)     
   par={a,b,c,d,e,f,g,h,s};
   showdatainput=True; 
   kslope=.1; (*minimum slope for fitting hockey stick*)
   kform=.1;  (*for computing heterochrony and heterometry*)
   res={exportplotQ,zbest,znext,newbestQ,par,showdatainput,kslope,kform};
 Return[res]]  


ReportX[dataset_,splitQ_]:=
Module[{data,geneid,genename,times,xname,yname,plotname,dataname,numgenes,numtimes,
             exportplotQ,zbest,znext,newbestQ,par,showsummary,showreport,showdatainput,
                modelsetQ,kslope,kform,mat,matc,colname,colnamec,splittype,splittypeset},
        {data,geneid,genename,times,xname,yname,plotname,dataname}=dataset;
     {exportplotQ,zbest,znext,newbestQ,par,showdatainput,
               kslope,kform}=FixedInputs[];
       If[showsummary,    
         numgenes=Length[data];
         numtimes=Length[times];
         mat={{numgenes,numtimes}};
        colname=  {" number of genes", " number of times"};
        Print["Data set characteristics"];
        Print["     ",TableForm[mat,TableHeadings->{None,colname}]];
        matc={{kslope,kform,zbest,znext}};
        colnamec=  {"hockey slope", "HC HM", "RPEbest","RPEaddfornext"};
         Print["Thresholds"];
           Print["     ",TableForm[matc,TableHeadings->{None,colnamec}]]];
        Print["Splitting into fixed and evaluation points"];
        {splittype,splitfitset}=splitQ;
        If[splittype=="alternating", Print["alternating times: odd for fit; even for evaluation"]];
        If[splittype=="custom", Print["ranks for fitting ",splitfitset]];
     Return[Null]]        
 




End[] 
EndPackage[]

(*for manuscript*)
geneset={15426,18046,3544,2414};
