/******************************************************************************
* Macro to demonstrate how Affymetrix GeneChip *.CEL files can be converted   *
* to ROOT Trees and stored in a ROOT file.                                    *
*                                                                             *
* Copyright(c) 2000-2003, Dr. Christian Stratowa, Vienna, Austria.            *
* Author: Christian Stratowa, Vienna, Austria.                                *
* Created: 15 Mar 2003                           Last modified: 18 April 2003 *
*                                                                             *
* This macro is distributed for demo purposes only,                           *
* but WITHOUT ANY WARRANTY; without even the implied warranty of              *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                        *
*                                                                             *
* Based on: "The ROOT System", All rights reserved.                           *
* Authors: Rene Brun and Fons Rademakers.                                     *
* For the licensing terms of "The ROOT System" see $ROOTSYS/AA_LICENSE.       *
* For the list of contributors to "The ROOT System" see $ROOTSYS/AA_CREDITS.  *
******************************************************************************/

///////////////////////////////////////////////////////////////////////////////
//
// The intention of this macro is to demonstrate, how a common standard
// to store Affymetrix *.CEL files as ROOT trees could be established.
//
// Note: This macro was created and tested to run correctly on MacOS X,
// and Rene Brun was so kind to make some minor corrections and to test
// it on Linux (gcc3.2), Alpha/osf, Sun/Solaris and Windows/XP (VC++)
// However, any remaining inconsistencies and bugs are the responsibility
// of the author, so please, send bug reports to: cstrato@aon.at
//
// When running as interpreted code, you will get some warning messages,
// since CINT cannot create a dictionary for the interpreted classes:
// XTestManager and XCell
// When compiled, using ACLiC, there should be no warnings.
//
// Before you run this macro you need to download the CEL-files 
// for the MIT prostate cancer study from the following web-site:
//    http://www-genome.wi.mit.edu/cgi-bin/cancer/datasets.cgi
// Download the following file:
//    prostate_normal_N01-N31.CEL.tar.gz
// Unzip the files into a directory called "prostate", and
// copy macro "ImportHybridizations.C" to directory "prostate", too.
//
// Change to the prostate directory: 
//    cd /mypath/prostate
//
// 1. Use CINT C/C++ interpreter to interpret the macro:
// Start a new root session:
//   root
// From within root, run the macro by typing (type ".x" i.e. dot x):
//   .x ImportHybridizations.C
// Note: Importing all CEL-files takes some time, a finished message 
// will be displayed, and the root [1] prompt will appear.
// Note: Drawing the chip-image also takes some time.
// Quit: To quit root, type: .q (i.e. dot q)
//
// 2. Use ACLiC compiler to compile the macro:
// Start a new root session:
//   root
// From within root, run the macro by typing (note the "+" at the end!!):
//   .x ImportHybridizations.C+
// Note: Adding ".C+" is all you need for compilation and creation of
// a shared library including a dictionary for the new classes.
//
// 3. To inspect the contents of the created file "ProstateFile.root",
// you can start the RootBrowser from the commandline by typing:
//   TBrowser b
// Note: When running the macro, the Browser is started by the macro.
// You need to double-click on "ROOT Files" to see file contents
// Double clicking on e.g. leaf "fInten" will open a histogram.
//
// Comments:
// 1, Note that the trees are automatically compressed (default = 1).
// Compare the size of file "ProstateFile.root" with the size of the
// sum of the imported *.CEL files!
//
// 2, You can save the created image by selecting "Save as" from 
// the Canvas menu: You can not only save drawings as .ps, .eps, .svg,
// or .gif files for publication quality figures, but also as .C file, 
// which creates the C++ source code of the drawing for easily adding 
// features later. Note, that saving the current image as C++ file
// takes some time, because it produces about 14MB source code.
//
// 3, Zoom image: Move the cursor to the x-axis until it changes to
// a hand symbol. Then click and drag the range of the x-axis that you
// want to zoom-in. In the same way you can zoom-in the y-axis.
// To unzoom. move the cursor to the axis again until it changes to a hand.
// Then right-click the mouse to get a context menu and select "UnZoom".
// Do the same for the y-axis.
//
///////////////////////////////////////////////////////////////////////////////

//#ifndef __CINT__
#include "TFile.h"
#include "TTree.h"
#include "TBranch.h"
#include "TLeaf.h"
#include "TSystem.h"
#include "TCanvas.h"
#include "TStyle.h"
#include "TH2.h"
#include "TBrowser.h"
//#endif
#include <Riostream.h>

//////////////////////////////////////////////////////////////////////////
//                                                                      //
// XTestManager                                                         //
//                                                                      //
// Test class for managing files                                        //
//                                                                      //
//////////////////////////////////////////////////////////////////////////
class XTestManager: public TNamed {

   protected:
      TFile    *fFile;           //! ROOT file 
      TTree    *fTree;           //! current tree
      TString   fChipName;       //name of chip type
      TString   fDataTreeName;   //name of tree containing raw data
      Int_t     fNRows;          //number of rows of CEL-file
      Int_t     fNCols;          //number of columns of CEL-file
      Int_t     fNCells;         //number of cells in CEL-file
      TCanvas  *fCanvas;         //canvas for drawing
      Bool_t    fAbort;          //abort further actions

   protected:
      virtual Int_t ReadHeader(ifstream &input, char delim);
      virtual Int_t ReadData(ifstream &input, char delim, Int_t split);

   public:
      XTestManager();
      XTestManager(const char *name, const char *title = "");
      virtual ~XTestManager();

      virtual void   New(const char *name, const char *dir, const char *type);
      virtual Int_t  Import(const char *name, const char *infile, 
                        char delim = '\n', Int_t split = 99);
      virtual void   DrawImage(const char *canvasname, const char *treename,
                        Option_t *opt, Double_t min = 0, Double_t max = 10000);
     
#if !defined (__CINT__) || defined (__MAKECINT__)
      ClassDef(XTestManager,1) //TestManager
#endif
};

//////////////////////////////////////////////////////////////////////////
//                                                                      //
// XCell                                                                //
//                                                                      //
// Class describing a single GeneChip cell                              //
//                                                                      //
//////////////////////////////////////////////////////////////////////////
class XCell: public TObject {

   protected:
      Int_t      fX;            //x-coordinate of feature cell
      Int_t      fY;            //y-coordinate of feature cell
      Double_t   fInten;        //cell intensity
      Double_t   fStdev;        //standard deviation
      Int_t      fNPixels;      //number of pixels used for calculation

   public:
      XCell() {}
      virtual ~XCell() {}

      void     SetX(Int_t x)                {fX       = x;}
      void     SetY(Int_t y)                {fY       = y;}
      void     SetIntensity(Double_t inten) {fInten   = inten;}
      void     SetStdev(Double_t stdev)     {fStdev   = stdev;}
      void     SetNumPixels(Int_t numpix)   {fNPixels = numpix;}
      Int_t    GetX()                 const {return fX;}
      Int_t    GetY()                 const {return fY;}
      Double_t GetIntensity()         const {return fInten;}
      Double_t GetStdev()             const {return fStdev;}
      Int_t    GetNumPixels()         const {return fNPixels;}
      
#if !defined (__CINT__) || defined (__MAKECINT__)
      ClassDef(XCell,1) //Cell
#endif
};

#if !defined (__CINT__) || defined (__MAKECINT__)
ClassImp(XTestManager);
ClassImp(XCell);
#endif

const Int_t  kBufSize = 512;


//////////////////////////////////////////////////////////////////////////
//                                                                      //
// XTestManager                                                         //
//                                                                      //
// Test class to create a root file and import CEl-files as root trees  //
//                                                                      //
//////////////////////////////////////////////////////////////////////////

//______________________________________________________________________________
XTestManager::XTestManager()
             :TNamed()
{
   // Default Manager constructor

   fFile   = 0;
   fTree   = 0;
   fCanvas = 0;
   fAbort  = kFALSE;
}//Constructor

//______________________________________________________________________________
XTestManager::XTestManager(const char *name, const char *title)
             :TNamed(name, title)
{
   // Normal Manager constructor

   fFile   = 0;
   fTree   = 0;
   fCanvas = 0;
   fAbort  = kFALSE;
}//Constructor

//______________________________________________________________________________
XTestManager::~XTestManager()
{
   // Manager destructor

}//Destructor

//______________________________________________________________________________
void XTestManager::New(const char *name, const char *dir, const char *type)
{
   // Create new root file

   fAbort = kTRUE;

// Note: RECREATE overwrites existing file!
   //TString fullname = TString(name) + ".root";
   TString fullname = name;
   fullname += ".root";
   fFile = new TFile(fullname, "RECREATE", GetTitle());
   if (!fFile) return;

   SetTitle(type);
   fAbort = kFALSE;
}//New

//______________________________________________________________________________
Int_t XTestManager::Import(const char *name, const char *infile, char delim, Int_t split)
{
   // Import CEL-file

   if (fAbort) return 1;
   fFile->cd();

// Set tree name
   fDataTreeName = TString(name);

// Open infile containing data
   ifstream input(infile, ios::in);
   if (!input) {
      Error("Import","File <%s> does not exist.You have to give the full pathname!",infile);
      fAbort = kTRUE;
      return 1;
   }//if

// Import CEL-file
   Int_t err = 0;
   printf("Reading file <%s>...\n",infile);
   if (!err) err = this->ReadHeader(input, delim);
   if (!err) err = this->ReadData(input, delim, split);
   printf("New data <%s> is added...\n",name);

   input.close();
   return err;
}//Import

//______________________________________________________________________________
void XTestManager::DrawImage(const char *canvasname, const char *treename,
                   Option_t *opt, Double_t min, Double_t max)
{
   // Draw image for CEL-file

   if (fAbort) return;

// Append extension .cel to treename
   TString tname = TString(treename);
   if (!tname.Contains(".cel")) {
      tname += ".cel";
      treename = tname.Data();
   }//if

// Get tree with treename
   TTree *tree = (TTree*)fFile->Get(treename);
   if (!tree) {
      Error("DrawImage","Tree <%s> is not found.",treename);
      return;
   }//if

// Get tree branches for leaf fX, fY and fInten
// This allows us to get the tree data even if we do not know
// the class used for filling the tree, here class XCell
   TLeaf *leaf1 = tree->FindLeaf("fX");
   TLeaf *leaf2 = tree->FindLeaf("fY");
   TLeaf *leaf3 = tree->FindLeaf("fInten");
// Alternatively, set the branch address:
//   XCell *cell = 0;
//   tree->SetBranchAddress("DataBranch",&cell);

// Initialize image array (check if number of tree entries is correct)
   Int_t entries = (Int_t)(tree->GetEntries());
   if ((fNRows * fNCols) != entries) {
      Error("DrawImage","Number of rows times columns is not equal to <%d>",entries); 
      return;
   }//if
   Double_t *img = new Double_t[entries]; 

// Fill image array
   Int_t x, y, z;
   Double_t v;
   for (Int_t i=0; i<entries; i++) { 
      tree->GetEntry(i);
      x = (Int_t)(leaf1->GetValue());
      y = (Int_t)(leaf2->GetValue());
      v = leaf3->GetValue();
// Alternatively, get the values from cell:
//      x = cell->GetX();
//      y = cell->GetY();
//      v = cell->GetIntensity();
      z = x * fNRows + y;
      if (v > 0) {img[z] = TMath::Log10(v);} else {img[z] = 0.1;}
   }//for_i

// Create new canvas
   fCanvas = new TCanvas(canvasname, treename, 210,10,800,800);
   gStyle->SetPalette(1,0);
   gStyle->SetOptStat(0000000);

// Draw image
   TString title = "Image: ";
   title += tname;
   TH2D *h2 = new TH2D("H2",title,fNCols,0,fNCols,fNRows,0,fNRows);
   h2->SetMinimum(min);
   h2->SetMaximum(max);
   h2->GetXaxis()->SetLabelSize(0.03);
   h2->GetYaxis()->SetLabelSize(0.03);
   h2->GetZaxis()->SetLabelSize(0.02); //palette
   for (Int_t i=0; i<fNCols; i++) {
      for (Int_t j=0; j<fNRows; j++) {
         h2->SetCellContent(i+1,fNRows-j,img[i*fNRows+j]);
      }//for_j
   }//for_i
   h2->SetEntries(1);
   h2->Draw(opt); 

   delete [] img;
}//Draw

//______________________________________________________________________________
Int_t XTestManager::ReadHeader(ifstream &input, char delim)
{
   // Read header from input. 

   char  nextline[kBufSize];
   Int_t i;

// Check for first line [CEL]
   input.getline(nextline, kBufSize, delim);
   if ( strncmp("[CEL]", nextline, 5) != 0) return 1;

// Check version of CEL-file
   input.getline(nextline, kBufSize, delim);
   if (!input.good()) return 1;
   if ( strncmp("Version=", nextline, 8) == 0) {
      sscanf(&(nextline[8]), "%d", &i);
      if (i != 3) {
         Error("readheader","Currently, only version=3 of CEL-file is supported");
         return 1;
      }//if
   }//if

// Check for header line [HEADER]
   while (strncmp("[HEADER]", nextline, 8) != 0) {
      input.getline(nextline, kBufSize, delim);
      if (input.eof()) return 1;
   }//while

   // get column number
   input.getline(nextline, kBufSize, delim);
   if ( strncmp("Cols=", nextline, 5) != 0) return 1;
   sscanf(&(nextline[5]), "%d", &fNCols);
   printf("   Number of columns = %d\n",fNCols);

   // get row number
   input.getline(nextline, kBufSize, delim);
   if ( strncmp("Rows=", nextline, 5) != 0) return 1;
   sscanf(&(nextline[5]), "%d", &fNRows);
   printf("   Number of rows    = %d\n",fNRows);

// Get chip type from DatHeader
   while (strncmp("DatHeader=", nextline, 10) != 0) {
      input.getline(nextline, kBufSize, delim);
      if (input.eof()) return 1;
   }//while
   TString name = &nextline[0];
   Int_t start, end;
   for(Int_t j =0;j<2;j++){
      start = name.Index("\x14");
      name = &name[start+2];
   }//for_j
   // remove extension ".lsq"
   end = name.Index(".");
   fChipName = TString(name.Data(),end);
   printf("   Chip name is     <%s>\n",fChipName.Data());

// Check for intensity line [INTENSITY]
   while (strncmp("[INTENSITY]", nextline, 11) != 0) {
      input.getline(nextline, kBufSize, delim);
      if (input.eof()) return 1;
   }//while

// Get number of cells
   input.getline(nextline, kBufSize, delim);
   if (strncmp("NumberCells=", nextline, 12) != 0) return 1;
   sscanf(&(nextline[12]), "%d", &fNCells);
   printf("   Number of cells   = %d\n",fNCells);

   return 0;
}//ReadHeader

//______________________________________________________________________________
Int_t XTestManager::ReadData(ifstream &input, char delim, Int_t split)
{
   // Read data from input and store in data tree. 

   char     nextline[kBufSize];
   Int_t    i, x, y, numpix;
   Double_t inten, stdev;
   Int_t    err    = 0;
   Int_t    numcel = 0;
   Double_t min    = 999999999.9;  //= DBL_MAX;
   Double_t max    = 0;
   Int_t    nummin = 0;
   Int_t    nummax = 0;
   Int_t    maxpix = 0;

// Create data tree
   fDataTreeName += ".cel";
   TTree *datatree = new TTree(fDataTreeName, fChipName);
   XCell *cell     = new XCell();
   datatree->Branch("DataBranch", "XCell", &cell, 64000, split);
   printf("   Tree name is  <%s>\n",fDataTreeName.Data());

// Read header line containing column names
   input.getline(nextline, kBufSize, delim);
   if (strncmp("CellHeader=", nextline, 11) != 0) return 1;

// Read data and store in data tree
   for (i=0; i<fNCells; i++) {
      input.getline(nextline, kBufSize, delim);
      if (!input.good()) {err = 1; break;}
      sscanf(nextline,"%i %i %lf %lf %i", &x, &y, &inten, &stdev, &numpix);

   // number of cells with minimal intensity
      if (inten < min) {
         min = inten;
         nummin = 1;
      } else if (inten == min) {
         nummin++;
      }//if
   // number of cells with maximal intensity
      if (inten > max) {
         max = inten;
         nummax = 1;
      } else if (inten == max) {
         nummax++;
      }//if
   // maximal pixel number
      if (numpix > maxpix) {
         maxpix = numpix;
      }//if

   // fill data tree
      cell->SetX(x);
      cell->SetY(y);
      cell->SetIntensity(inten);
      cell->SetStdev(stdev);
      cell->SetNumPixels(numpix);
      datatree->Fill(); 

      numcel++;
   }//for_i

   printf("   Hybridization statistics:\n");
   printf("      %d cells with minimal intensity <%f>\n" ,nummin, min);
   printf("      %d cells with maximal intensity <%f>\n" ,nummax, max);

// Write data tree to file if all data are read
   if (numcel == fNCells) {
   // Write data tree to file 
      datatree->Write();
   } else {
      fDataTreeName = "NA";
      err = 1;
      Error("ReadData","number of lines read <%d> is not equal to to number of cells <%d>",numcel,fNCells);
  }//if

// Delete data tree from RAM
   datatree->Delete("");
   datatree = 0;
   delete cell;

   return err;
}//ReadData


//______________________________________________________________________________
//______________________________________________________________________________

// The following is the main function "ImportHybridizations()", analogous
// to the scripts shown in the paper (see e.g. Script 2).
// This code can be written in two ways:
// 1. Stack-based: Create class manager on the stack, it is automatically
// deleted when the function is finished
// 2. Heap-based: Create class manager on the heap with "new". Then you
// are responsible to delete the object with "delete"

// 1. Stack-based code: code looks similar to Java code
//______________________________________________________________________________
void ImportHybridizations()
{
// Import Affymetrix *.CEL files into root file

// create new manager
   XTestManager manager("MyManager");

// create new root data file 
   manager.New("ProstateFile","","GeneChip");

// store *.CEL data for prostate normal as tree in data file
   manager.Import("ProstateN01","N01__normal.CEL");
   manager.Import("ProstateN02","N02__normal.CEL");
   manager.Import("ProstateN03","N03__normal.CEL");
// alternatively, you can give the full path and use your own CEL-files:
//   manager.Import("ProstateN01","/fullpath to cel_files/prostate/N01__normal.CEL");

// for prostate tumor samples, download: prostate_tumor_T01-T30.CEL.tar.gz
//   manager.Import("ProstateT01","T01__tumor.CEL");
//   manager.Import("ProstateT02","T02__tumor.CEL");

// draw image for tree ProstateN01.cel
   manager.DrawImage("TestCanvas","ProstateN01","COLZ",1.5,4.5);

// open RootBrowser to inspect the file contents
   new TBrowser;
   
   printf("Macro is finished, to see the file contents, double click on \"ROOT Files\" in the browser\n");
}//ImportHybridizations


// 2. Heap-based code: Compare with script 2
/*
//______________________________________________________________________________
void ImportHybridizations1()
{
// Import Affymetrix *.CEL files into root file

// create new manager
   XTestManager *manager = new XTestManager("MyManager");

// create new root data file 
   manager->New("ProstateFile","","GeneChip");

// store *.CEL data for prostate normal as tree in data file
   manager->Import("ProstateN01","N01__normal.CEL");
   manager->Import("ProstateN02","N02__normal.CEL");
   manager->Import("ProstateN03","N03__normal.CEL");
// alternatively, you can give the full path and use your own CEL-files:
//   manager->Import("ProstateN01","/fullpath to cel_files/prostate/N01__normal.CEL");

// for prostate tumor samples, download: prostate_tumor_T01-T30.CEL.tar.gz
//   manager->Import("ProstateT01","T01__tumor.CEL");
//   manager->Import("ProstateT02","T02__tumor.CEL");

// draw image for tree ProstateN01.cel
   manager->DrawImage("TestCanvas","ProstateN01","COLZ",1.5,4.5);

// open RootBrowser to inspect the file contents
   new TBrowser;
   
   printf("Macro is finished, to see the file contents, double click on \"ROOT Files\" in the browser\n");

// cleanup
   delete manager;
}//ImportHybridizations
*/