(*:Mathematica:: Version 7*)
(*:Context: "swirl`" *)
(*:Swirl-and-Ripple       *)
(*: Version 1 *)
(*: Input data:swirldata1.m
               swirldata2class0.m
				   swirldata2class1.m
				   swirldata3.m
				   swirldata4.m
					
				   swirldataname1.m
				   swirldataname2.m
				   swirldataname4.m*)
(*Author: 2010 Stuart G. Baker *)





BeginPackage["swirldata`"]
   
CreateDataSets::usage="CreateDataSets[]"
createdata1::usage="createdata1[]"
createdata2::usage="createdata2[]"
createdata3::usage="createdata3[]"
createdata4::usage="createdata4[]"
createdata5::usage="createdata5[]"

Clear[CreateDataSets,createdata1,createdata2,createdata3,createdata4,createdata5]  


Begin["Private`"]

CreateDataSets[]:=
Module[{data1,data2,data3,data4,dtaat5,res},
(*colon cancer data Alon et al *)
    data1=createdata1[];
(*leukemia data Golub at al *)
    data2=createdata2[];  
(*medulloblastoma*)
    data3=createdata3[];
(*prostate cancer data*)
    data4=createdata4[];
 (*leukemia St Jude*)
    data5=createdata5[];
res={data1,data2,data3,data4,data5};
Return[res]]



(*------------COLON CANCER DATA--------------------------*)

createdata1[]:=
Module[{mat,index,pairx,pair0,pair1,index0,index1,pos0,pos1,dat0,dat1,
data1x0,data1x1,data1x,list1,data1,name1,datalist1},
(*original data e**3 = 1000, etc*)
    mat=ReadList["swirldata1.m", Number, RecordLists -> True];
(*positive is normal; negative is tumor*)
   index=
 {-1,1,-2,2,-3,3,-4,4,-5,5,-6,6,-7,7,-8,8,-9,9,-10,10,-11,11,-12,12,-13,-14,-15,-16,-17,-18,-19,-20,
  -21,-22,-23,-24,-25,-26,27,-27,-28,28,29,-29,-30,-31,-32,32,-33,33,34,-34,-35,35,36,-36,-37,-38,
  -39,39,-40,40};
 (*posititions of two classes*)
   pairx=Transpose[{index,Range[62]}];
   pair0=Select[pairx,(#[[1]]>0)&];
  {index0,pos0}=Transpose[pair0];
  pair1=Select[pairx,(#[[1]]<0)&];
  {index1,pos1}=Transpose[pair1];
(*identify two classes*)
  data1x0= Transpose[(Transpose[mat])[[pos0]]];
  data1x1= Transpose[(Transpose[mat])[[pos1]]];
  data1x={data1x0,data1x1};
(*gene names*)
  list1= ReadList["swirldataname1.m", String];
 (*output*)
 name1={"colon cancer","normal","tumor"};
  datalist1={data1x,list1,name1};
 report[datalist1];
Return[datalist1]]

(*
datalist1= 
Microarray data on colon cancer with 2000 genes,
22 normal specimens, and 40 tumor specimens at 
http://microarray.princeton.edu/oncology (Alon et al 1999). 
Alon, U., Barkai, N., Notterman, D. A., Gish, K., Ybarra, S., Mack, D.,
   and Levine, A. J. (1999).  Broad patterns of gene expression revealed by clustering
    analysis of tumor and normal colon tissues probed by oglionucleotide arrays.
    Proceedings of the  National Academy of Sciences  96, 6745-6750.
*)


(*----LEUKEMIA DATA-------------------------------*)
									 
createdata2[]:=
Module[{mata,matb,ma,mb,ma0,ma1,mb0,mb1,mat0,mat1,res,dim0,dim1,name2,data2,datalist2},
  (*read in data*)  
    mata = ReadList["swirldata20.m", Number, RecordLists -> True];
    matb = ReadList["swirldata21.m", Number, RecordLists -> True];
  (*data sets a and b are original training and test*)
  (*first row is just list of column numbers and needs to be removed*)
     ma=Drop[mata,1];
     mb=Drop[matb,1];
  (*first 27 columns in test data set are ALL*)
     ma0=SelectColumns[ma,Range[27]];
     ma1=RemoveColumns[ma,Range[27]];
  (*first 20 columns in independent data set are ALL*)
     mb0=SelectColumns[mb,Range[20]];
     mb1=RemoveColumns[mb,Range[20]];
  (*need to combine orginal training and test sample to create one sample*)
     mat0=Hcat[ma0,mb0];
     mat1=Hcat[ma1,mb1];
	(*combine*)
	  data2={mat0,mat1}//N;
 	(*gene names*)
  	  list2= ReadList["swirldataname2.m", String];
  (*output*)
	  name2={"leukemia 1","ALL","AML"};
     datalist2={data2,list2,name2};			  	
     report[datalist2];
  Return[datalist2]]



SelectRows[matrix_List?MatrixQ,indices_List?VectorQ] :=    matrix[[#]]& /@ indices 
                
                   
SelectColumns[matrix_List?MatrixQ, indices_List?VectorQ] :=  
   Transpose @ SelectRows[Transpose[matrix],indices] 


RemoveRows[matrix_List?MatrixQ,indices_List?VectorQ] :=
   Delete[matrix,List /@ indices]
 
RemoveColumns[matrix_List?MatrixQ, indices_List?VectorQ] :=           
   Transpose[RemoveRows[Transpose[matrix],indices]] 


	Hcat[mat1_,mat2_]:=Transpose@ Join[Transpose[mat1],Transpose[mat2]]


 (*
datalist2=
Golub, T. R., Slonim, D. K., Tamayo,  P., Huard, C.., Gaasenbeek, M.,
    Mesirov, J. P., Coller. H., Loh, M. L., Downing, J. R., Caligiuri, M. A.,
    Bloomfield, C. D., and Lander, E. S. (1999). Molecular classification of
    cancer: class discovery and class prediction by gene expression monitoring. Science
    286, 531-537.
Microarray data on leukemia with 7219 genes, 
47 acute lymphoblastic leukemia specimens and 25 acute myeloid leukemia specimens at 
http://www.broad.mit.edu/cgi-bin/cancer/datasets.cgi (Golub et al 1999).
	*)


(*-----MEDULLOBLASTOMA DATA-----------------------*)

createdata3[]:=
Module[{mat1,mat2,mat3,rmata,rmatb,rmatc,data3x,data3,list3,name3},
 (*read in data gene by subject*)
    mat1 = ReadList["swirldata3.m", Number, RecordLists -> True];
  (*identify the classes*)  
	 mat3=Transpose[mat1];
	 rmata=Transpose@Take[mat3,21];  (*D*)
    rmatb=Transpose@Drop[mat3,21];  (*A*)
    data3={rmata,rmatb};
  (*gene names*)
  	 list3= ReadList["swirldataname3.m", String];
  (*output*)
	 name3={"medulloblastoma","survivor","death"};
  	 datalist3={data3,list3,name3};
	 report[datalist3];
 Return[datalist3]]

(*
Pomeroy SL, Tamayo P, Gaasenbeek M, Sturia LM. Angelo M, McLaughlin ME, Kim JYH, Goumnerova LC,
Black PM, Lau C, Allen JC, Zagzag D, Olson JM, Curran T, Wetmore C, Biegel JA, Poggio T, Mukherjee S, Rifkin R,
Califano A, Stolovitzky G, Louis DN, Mesirov JP, Lander ES, Golub TR:
Prediction of central nervous system embryonal tumour outcome based on gene expression.
Nature 2002, 414:436-42. 
http://www.broadinstitute.org/mpr/publications/projects/Cancer_Susceptibility/medullo_datasetC_outcome.gct
*)


(*----------------PROSTATE CANCER DATA----------------*)

createdata4[]:=
Module[{mat,index,pairx,pair0,pair1,index0,index1,pos0,pos1,dat0,dat1,
data4x0,data4x1,data4x,list4,data4,dim0,dim1,name4,datalist4},
(*original data e**3 = 1000, etc*)
    mat=ReadList["swirldata4.m", Number, RecordLists -> True];
(*first fifty no tumor*)
   pos0=Range[50];
(*second fifty tumor*)
   pos1=Range[52]+50;
(*identify two classes*)
  data4x0= Transpose[(Transpose[mat])[[pos0]]];
  data4x1= Transpose[(Transpose[mat])[[pos1]]];
  data4x={data4x0,data4x1};
(*gene names*)
  list4= ReadList["swirldataname4.m", String];
 (*output*)
   name4={"prostate cancer","non-tumor","tumor"};
  datalist4={data4x,list4,name4};
  report[datalist4];
Return[datalist4]]



(*
datalist4=
Dinesh Singh, Phillip G. Febbo, Kenneth Ross, Donald G. Jackson, Judith Manola, 
Christine Ladd, Pablo Tamayo, Andrew A. Renshaw, Anthony V. D'Amico, Jerome P. Richie, Eric S. Lander, 
Massimo Loda, Philip W. Kantoff, Todd R. Golub, William R. Selle (2002)
Gene expression correlates of clinical prostate cancer behavior.
Cancer Cell 1(2) 203-209	 
52 of these prostate tumors and 50 nontumor prostate samples
 using oligonucleotide microarrays containing probes for approximately 12,600 genes
http://www.broadinstitute.org/cgi-bin/cancer/publications/pub_paper.cgi?mode=view&paper_id=75
*)


 



(*----------------Leukemia data (St Jude----------------*)

createdata5[]:=
Module[{x0,x1,list5,datalist5,dim0,dim1,name5},
 (*data*)
   x0=ReadList["swirldata50.m", Number, RecordLists -> True];
    x1=ReadList["swirldata51.m", Number, RecordLists -> True];
(*gene names*)
  list5= ReadList["swirldataname5.m", String];
 (*names*)
   name5={"leukemia 2","T-ALL","TEL-AML"};
 (*output*)
	datalist5={{x0,x1},list5,name5};
	report[datalist5];
Return[datalist5]]



(*
Classification, subtype discovery, and prediction of outcome in pediatric acute lymphoblastic leukemia
by gene expression profiling  Eng-Juh Yeoh1, 7, 11, Mary E Ross2, 11, Sheila A Shurtleff1,
W.Kent Williams1, Divyen Patel6, Rami Mahfouz1, Fred G Behm1, Susana C Raimondi1, Mary V Relling3,
Anami Patel1, Cheng Cheng4, Dario Campana1, 2, Dawn Wilkins8, Xiaodong Zhou8, Jinyan Li9,
Huiqing Liu9, Ching-Hon Pui2, William E Evans3, Clayton Naeve6, Limsoon Wong9 and 
James R Downing
Cancer Cell
Volume 1, Issue 2, March 2002, Pages 133-14 
43 cases of T-ALL  	
79 cases of TEL-AML1
http://www.stjuderesearch.org/data/ALL1/all_datafiles.html
*)



report[{data_,list_,name_}]:=
Module[{x0,x1,dim0,dim1,dname,name0,name1,p0,n0,p1,n1},
    {x0,x1}=data;
    {dname,name0,name1}=name;
    {p0,n0}=Dimensions[x0];
	 {p1,n1}=Dimensions[x1];
    Print[dname," ",p0,"  genes ",name0," ",n0,"  ",name1," ",n1];
 Return[Null]]

End[] 
EndPackage[]






{datalist1,datalist2,datalist3,datalist4,datalist5}=CreateDataSets[]; 
