loess.normalize <- function(mat,subset=sample(1:(dim(mat)[2]),5000),
                      epsilon=10^-2,maxit=1,log.it=TRUE,verbose=TRUE,span=2/3,
                      family.loess="symmetric"){
  J <- dim(mat)[2]
  II <- dim(mat)[1]
  newData <- mat
  if(log.it){
    mat <- log2(mat)
    newData <- log2(newData)
  }
  change <- epsilon +1
  fs <- matrix(0,II,J)##contains what we substract
  iter <- 0
  w <- c(0,rep(1,length(subset)),0) ##this way we give 0 weight to the
  ##extremes added so that we can interpolate
  while(iter < maxit){
    iter <- iter+1
    means <- matrix(0,II,J) ##contains temp of what we substract
    for(j in 1:(J-1)){
      for(k in (j+1):J){
        y <- newData[,j]-newData[,k]
        x <-(newData[,j]+newData[,k])/2
        index <- c(order(x)[1],subset,order(-x)[1])
        ##put endpoints in so we can interpolate
        xx <- x[index]
        yy <- y[index]
        aux <-loess(yy~xx,span=span,degree=1,weights=w,family=family.loess)
        aux <- predict.loess(aux,data.frame(xx=x))/J
        means[,j] <- means[,j] + aux 
        means[,k] <- means[,k] - aux
        if(verbose) cat("Done with",j,"vs",k," in iteration ",iter,"\n")
      }
    }
    fs <- fs+means
    newData <- mat-fs
    change <- max(apply((means[subset,])^2,2,mean))
    if(verbose) cat(iter,change,"\n")
    oldfs <- fs
  }
  if(change>epsilon & maxit>1) warning(paste("No convergence after",maxit,"iterations.\n"))
  if(log.it) return(2^newData)
  else return(newData)
}


library(modreg)
##*******************************************************************************************
#**********  maffy.normalise     *****
maffy.normalise <- function(data,subset,verbose=FALSE,span=0.25,family="symmetric",log.it=TRUE){

k <- dim(data)[2]   ### Number of chips

####   Create the transformation matrix 
t1 <- 1/sqrt(k)
t2 <- (k-2-t1)/(k-1)
t3 <- -(1+t1)/(k-1)

transmat <- matrix(t3,k,k)
for(i in 1:k){ 
     transmat[1,i]<-t1
     transmat[i,1]<-t1
} 
for(i in 2:k) transmat[i,i]<-t2 

#### Find normalizing curve   

if(verbose) cat("Fitting normalizing curve\n")
n<- length(subset)
data.subset <- data[subset,]

data.subset <- log(data.subset)%*%t(transmat)
index <- order(data.subset[,1])
data.subset <- data.subset[index,]

if( k>2) curve <- multiloess(data.subset[,2:k]~data.subset[,1],span=span,family=family,surface="direct")
else     curve <-      loess(data.subset[,2:k]~data.subset[,1],span=span,family=family,surface="direct")

### Transform the normalizing curve before and after normalization
scaled   <- cbind(data.subset[,1],matrix(0,n,k-1)) %*%(transmat)
unscaled <- cbind(data.subset[,1],curve$fitted)      %*%(transmat)

w <-c(0,rep(1,n,n),0)

data.scaled <- NULL

### Normalize each array
for(i in 1:k){
    if(verbose) cat("Normalizing chip ",i,"\n")
    if(log.it){
        mini  <- log(min(data[,i]))
        maxi  <- log(max(data[,i]))
    }
    else{
        mini  <- min(data[,i])
        maxi  <- max(data[,i])
    }

    curve <- loess(c(mini,scaled[,i],maxi)~c(mini,unscaled[,i],maxi),weights=w,span=span)

    if(log.it) 
         temp <-  exp(predict.loess(curve,log(data[,i])))
    else
         temp <-      predict.loess(curve,data[,i])

    data.scaled <- cbind(data.scaled,temp) 
}

data.scaled

}


##*******************************************************************************************
#**********  Select A subset with small rank-range over arrays  *****

maffy.subset <- function(data,subset.size=5000,maxit=100,subset.delta=max(round(subset.size/100),25),verbose=FALSE){


k     <- dim(data)[2]   ### Number of chips
n     <- dim(data)[1]   ## Size of starting subset, i.e. all rows

if(verbose)
      cat("Data size",n,"x",k,"Desired subset size",subset.size,"+-",subset.delta,"\n")       

means <- data%*%(rep(1,k,k)/k)

index0 <- order(means)

data.sorted <- data[index0,]

## Init
set <- rep(TRUE,n,n)      ## Set-indicator
index.set <- 1:n       ## Indexes for subset 
nprev <- n+1           
iter  <- 1
part.of.n <- 1

## loop
while(nprev>n & n>(subset.size+subset.delta) & iter <maxit){
    if(verbose)
      cat("Comuting ranks of old subset....")       
    ranks <-apply(data.sorted[index.set,],2,rank)              ## Compute ranks, chip by chip.
    ranks.range <- apply(ranks,1,function(r) max(r)-min(r) )   ## Range of ranks over chips

    q <-min((n*part.of.n+subset.size)/((1+part.of.n)*n),1)     ## Select quantiles
    low <- quantile(ranks.range[1:(n*0.2)+n*0.0],probs=q,names=FALSE)/n  
    high <-quantile(ranks.range[n+1-(1:(n*0.2))],probs=q,names=FALSE)/n
    
    newset <-  ranks.range < (low*n+(0:n-1)*(high-low))        ## Set-indicator of new set

    if(sum(newset)<subset.size-subset.delta){                  ## To small?
       part.of.n <- 1+part.of.n
       if(verbose)
         cat("\nSize of newset to small (",sum(newset),"). Increasing part.of.n.\n")
    }
    else{                                                      ## New set OK
       set <- newset
       index.set <- subset(index.set,set)
       index.set <- index.set[!is.na(index.set)] 
       nprev <- n
       n <- length(index.set)
       if(verbose)
          cat("Size of new subset: ",n,"\n")       
   }

   iter <- iter+1
}
##end loop

if(!iter <maxit) warning("Maximum number of iterations reached, result my not be correct\n")

list(subset=index0[index.set])

}




##*******************************************************************************************
multiloess <-
function(formula, data=NULL, weights, subset, na.action, model = FALSE,
	 span = 0.75, enp.target, degree = 2,
	 normalize = TRUE,
	 family = c("gaussian", "symmetric"),
	 method = c("loess", "model.frame"),
	 control = loess.control(...), ...)
{
    parametric <- FALSE
    drop.square <- FALSE

    mt <- terms(formula, data = data)
    mf <- match.call(expand.dots=FALSE)
    mf$model <- mf$span <- mf$enp.target <- mf$degree <-
	mf$parametric <- mf$drop.square <- mf$normalize <- mf$family <-
	    mf$control <- mf$... <- NULL
    mf[[1]] <- as.name("model.frame")
    mf <- eval(mf, parent.frame())
    if (match.arg(method) == "model.frame") return(mf)
    y <- model.response(mf, "numeric")

    if(is.vector(y))  stop("The respons variable is not a matrix, use loess")

    w <- model.weights(mf)
    if(is.null(w)) w <- rep(1, NROW(y))
    nmx <- as.character(attr(mt, "variables"))[-(1:2)]
    x <- mf[, nmx, drop=FALSE]
    if(any(sapply(x, is.factor))) stop("predictors must all be numeric")
    x <- as.matrix(x)
    D <- ncol(x)
    nmx <- colnames(x)
    names(nmx) <- nmx
    drop.square <- match(nmx, nmx[drop.square], 0) > 0
    parametric <- match(nmx, nmx[parametric], 0) > 0
    if(!match(degree, 0:2, 0)) stop("degree must be 0, 1 or 2")
    iterations <- if(family=="gaussian") 1 else control$iterations
    if(!missing(enp.target))
	if(!missing(span))
	    warning("both span and enp.target specified: span will be used")
	else {				# White book p.321
	    tau <- switch(degree+1, 1, D+1, (D+1)*(D+2)/2) - sum(drop.square)
	    span <- 1.2 * tau/enp.target
	}
    fit <- simplemultiLoess(y, x, w, span, degree,
		       normalize, control$statistics, control$surface,
		       control$cell, iterations, control$trace.hat)
    fit$call <- match.call()
    fit$terms <- mt
    fit$xnames <- nmx
    fit$x <- x
    fit$y <- y
    fit$weights <- w
    if(model) fit$model <- mf
    fit
}



##*******************************************************************************************
simplemultiLoess <- function(y, x, weights, span = 0.75, degree = 2,
	   normalize = TRUE,
	   statistics = "approximate", surface = "interpolate",
	   cell = 0.2, iterations = 1, trace.hat = "exact")
{
 
    ## Extra init 
    parametric <- FALSE
    drop.square <- FALSE
    
    M <- NCOL(y)
    A <- rep(1,M,M)

    D <- NCOL(x)
    N <- NROW(x)

    fitted.all <- matrix(1,N,M)
    fitted.residuals <- matrix(1,N,M)
    pseudo.resid.all <-  matrix(1,N,M)

    if(!N || !D)	stop("invalid `x'")
    if(!length(y))	stop("invalid `y'")
    x <- as.matrix(x)
    max.kd <-  max(N, 200)
    robust <- rep(1, N)
    divisor<- rep(1, D)
    if(normalize && D > 1) {
	trim <- ceiling(0.1 * N)
	divisor <-
	    sqrt(apply(apply(x, 2, sort)[seq(trim+1, N-trim), , drop = FALSE],
		       2, var))
	x <- x/rep(divisor, rep(N, D))
    }
    sum.drop.sqr <- sum(drop.square)
    sum.parametric <- sum(parametric)
    nonparametric <- sum(!parametric)
    order.parametric <- order(parametric)
    x <- x[, order.parametric]
    order.drop.sqr <- (2 - drop.square)[order.parametric]
    if(degree==1 && sum.drop.sqr)
	stop("Specified the square of a factor predictor to be dropped when degree = 1")
    if(D == 1 && sum.drop.sqr)
	stop("Specified the square of a predictor to be dropped with only one numeric predictor")
    if(sum.parametric == D) stop("Specified parametric for all predictors")

    if(iterations)
    for(j in 1:iterations) {
	robust <- weights * robust
	if(j > 1) statistics <- "none"
	if(surface == "interpolate" && statistics == "approximate")
	    statistics <- if(trace.hat == "approximate") "2.approx"
	    else if(trace.hat == "exact") "1.approx"
	surf.stat <- paste(surface, statistics, sep="/")
        for(k in 1:M) {        
        	z <- .C("loess_raw",
			as.double(y[,k]),
			as.double(x),
			as.double(weights),
			as.double(robust),
			as.integer(D),
			as.integer(N),
			as.double(span),
			as.integer(degree),
			as.integer(nonparametric),
			as.integer(order.drop.sqr),
			as.integer(sum.drop.sqr),
			as.double(span*cell),
			as.character(surf.stat),
			fitted.values = double(N),
			parameter = integer(7),
			a = integer(max.kd),
			xi = double(max.kd),
			vert = double(2*D),
			vval = double((D+1)*max.kd),
			diagonal = double(N),
			trL = double(1),
			delta1 = double(1),
			delta2 = double(1),
			as.integer(surf.stat == "interpolate/exact"),
			PACKAGE="modreg")
			fitted.all[,k] <- z$fitted.values
	}

	if(j==1) {
	    trace.hat.out <- z$trL
	    one.delta <- z$delta1
	    two.delta <- z$delta2
	}

	residuals.all <- (y-fitted.all)
	fitted.residuals <- sqrt((residuals.all^2)%*%A)

	if(j < iterations)
	    robust <- .Fortran("lowesw",
			       as.double(fitted.residuals),
			       as.integer(N),
			       robust = double(N),
			       double(N),
			       PACKAGE="modreg")$robust
    }
    if(surface == "interpolate")
    {
	pars <- z$parameter
	names(pars) <- c("d", "n", "vc", "nc", "nv", "liv", "lv")
	enough <- (D + 1) * pars["nv"]
	fit.kd <- list(parameter=pars, a=z$a[1:pars[4]], xi=z$xi[1:pars[4]],
		       vert=z$vert, vval=z$vval[1:enough])
    }
    if(iterations > 1) {
        for(k in 1:M) {        
		pseudovalues <- .Fortran("lowesp",
					 as.integer(N),
					 as.double(y[,k]),
					 as.double(fitted.all[,k]),
					 as.double(weights),
					 as.double(robust),
					 double(N),
					 pseudovalues = double(N),
					 PACKAGE="modreg")$pseudovalues
		zz <- .C("loess_raw",
			as.double(pseudovalues),
			as.double(x),
			as.double(weights),
			as.double(weights),
			as.integer(D),
			as.integer(N),
			as.double(span),
			as.integer(degree),
			as.integer(nonparametric),
			as.integer(order.drop.sqr),
			as.integer(sum.drop.sqr),
			as.integer(span*cell),
			as.character(surf.stat),
			temp = double(N),
			parameter = integer(7),
			a = integer(max.kd),
			xi = double(max.kd),
			vert = double(2*D),
			vval = double((D+1)*max.kd),
			diagonal = double(N),
			trL = double(1),
			delta1 = double(1),
			delta2 = double(1),
			as.integer(0),
			PACKAGE="modreg")
		pseudo.resid.all[,k] <- pseudovalues-zz$temp
	}

	pseudo.resid <- sqrt((pseudo.resid.all^2)%*%A)

    }
    sum.squares <- if(iterations <= 1) sum(weights * fitted.residuals^2)
    else sum(weights * pseudo.resid^2)
    enp <- one.delta + 2*trace.hat.out - N
    s <- sqrt(sum.squares/one.delta)
    pars <- list(robust=robust, span=span, degree=degree, normalize=normalize,
		 parametric=parametric, drop.square=drop.square,
		 surface=surface, cell=cell, family=
		 if(iterations <= 1) "gaussian" else "symmetric",
		 iterations=iterations)
    fit <- list(n=N, fitted=fitted.all, residuals=residuals.all,
		enp=enp, s=s, one.delta=one.delta, two.delta=two.delta,
		trace.hat=trace.hat.out, divisor=divisor)
    fit$pars <- pars
    if(surface == "interpolate") fit$kd <- fit.kd
    class(fit) <- "loess"
    fit

}


##*******************************************************************************************





merge.AffyBatch <- function(x, y, annotation=paste(x@annotation, y@annotation),
                            description=NULL,
                            notes=paste(x@notes, y@notes), ...) {

  adim <- dim(intensity(x))[1]

  if ((x@nrow != y@nrow) || (x@ncol != y@ncol))
    stop("cannot merge chips of different sizes !")

  if (x@cdfName != y@cdfName)
    warning("cdfName mismatch (using the cdfName of x)!")

  if (is.null(description))
    description <- paste("merged")
                         
  lx <- length(x)
  ly <- length(y)

  phenodata <- phenoData(x)
  pData(phenodata) <- rbind(pData(x),pData(y))
  return(new("AffyBatch",
             exprs=cbind(intensity(x),intensity(y)),
             cdfName=x@cdfName,
             nrow=x@nrow,
             ncol=x@ncol,
             phenoData=phenodata,
             annotation=x@annotation,
             description=x@description, ##need to write a merge for MIAME
             notes=paste(x@notes,y@notes))
         )
}
library(modreg)
mva.pairs <- function(x,labels=colnames(x),log.it=TRUE,span=2/3,family.loess="symmetric",digits=3,line.col=2,main="MVA plot",...){
  if(log.it) x <-log2(x)
  J <- dim(x)[2]
  frame()
  old.par <- par(no.readonly = TRUE)
   on.exit(par(old.par))
par(mfrow=c(J,J),mgp=c(0,.2,0),mar=c(1,1,1,1),oma=c(1,1.4,2,1))
  for(j in 1:(J-1)){
    par(mfg=c(j,j));plot(1,1,type="n",xaxt="n",yaxt="n",xlab="",ylab="");text(1,1,labels[j],cex=2)
    for(k in (j+1):J){
      par(mfg=c(j,k))
      yy <- x[,j]-x[,k]
      xx <-(x[,j]+x[,k])/2
      xx <- xx
      yy <- yy
      aux <- loess(yy~xx,degree=1,span=span,family=family.loess)$fitted
      plot(xx,yy,pch=".",xlab="",ylab="",tck=0,...)
      o <- order(xx)
      lines(approx(xx[o],aux[o]),col=line.col)
      par(mfg=c(k,j))
      sigma <- quantile(yy,.75)-quantile(yy,.25)
      txt <- format(c(sigma,0.123456789),digits=digits)
      plot(c(0,1),c(0,1),type="n",ylab="",xlab="",xaxt="n",yaxt="n")
      text(0.5,0.5,txt,cex=2)
    }
  }
  par(mfg=c(J,J));plot(1,1,type="n",xaxt="n",yaxt="n",xlab="",ylab="");
  text(1,1,labels[J],cex=2)
  mtext("A",1,outer=TRUE,cex=1.5)
  mtext("M",2,outer=TRUE,cex=1.5,las=1)
  mtext(main,3,outer=TRUE,cex=1.5)
  invisible()
}

normalize.AffyBatch.constantsubset <- function(abatch, refindex=1, FUN=mean, na.rm=TRUE, stable.probes) {

  #print(stable.probes)

  n <- length( abatch )
  
  if (! (refindex %in% 1:n)) stop("invalid reference index for normalization")
  refconstant <- FUN(intensity(abatch[[refindex]]), na.rm=na.rm)
  
  #set.na.spotsd(abatch)
                             
  for (i in (1:n)[-refindex]) {
    m <- normalize.constantsubset(intensity(abatch[[i]]), refconstant, FUN=FUN, na.rm=na.rm, stable.probes=stable.probes)
    myhistory <- list(name="normalized by constant",
                      constant=attr(m,"constant"))
    attr(m,"constant") <- NULL
    intensity(abatch)[, i] <- m
    ##history(abatch)[[i]] <- myhistory
  }
  return(abatch)
}       


normalize.constantsubset <- function(x, refconstant, FUN=mean, na.rm=TRUE, stable.probes) {
  thisconstant <- FUN(x, na.rm=na.rm)
  r <- x / thisconstant * refconstant
  attr(r,"constant") <- thisconstant * refconstant
  print(stable.probes)
  return(r)
}




normalize.AffyBatch.contrasts <- function(abatch,span=2/3,choose.subset=TRUE,subset.size=5000,verbose=TRUE,family="symmetric",pmonly=FALSE) {
  
  if(pmonly)
    Index <- unlist(pmindex(abatch))
  else
    Index <- unlist(indexProbes(abatch,"both"))
  
  
  ##we need default argumetns becuase they are used in this transitional file
  alldata <- intensity(abatch)[Index,]
  
  if(choose.subset)
    subset1 <- maffy.subset(alldata,verbose=verbose,subset.size=subset.size)$subset
  else
    subset1 <- sample(1:dim(alldata)[1],subset.size)
  aux <-   maffy.normalise(alldata,subset=subset1,verbose=verbose,span=span,family=family)
  
  intensity(abatch)[Index,] <- aux
  
  return(abatch)
}









## Laurent 2002

## The idea would be to have the function related to one particular normalization
## technique into one single file (to avoid to run after many different files).
##
## Strongly suggested naming convention:
## (This is not that particular naming convention that matters but that there is A naming
## convention... reasons upon requests for the skeptics...).
## All the functions in such a 'scaling function' file must be like
## 'normalize.xxx.yyy' or 'normalize.yyy'. 'xxx' has to be whether 'Cel' for Affymetrix worshipers,
## whether 'cdna' cDNA arrays devotees (other names will come if other popular formats show up).

## As you probably already suspect it from above, there is one single function 'normalize.yyy'
## per file. This last function is the data-format-independant algorithm.
## The 'normalize.xxx.yyy' function will communicate with the
## 'normalize.yyy' function according the nature of their data. Of course it may exist functions
## that are not suitable for a particular format 'xxx'. The corresponding function will just
## be absent.
## note: In the case of only one format 'xxx' is by a particular 'yyy' method,
## it may still be wise to have a 'normalize.yyy' function (1- for the future possible formats to
## come, 2- for the re-usability of the code).
##
## The attribute 'history' of the 'Cel' object is used to store informations about the
## procedure (note: they come from the 'scale.yyy' as an attribute). This is probably
## a temporary (but decent) solution.
##

normalize.AffyBatch.invariantset <- function(abatch, prd.td=c(0.003,0.007), progress=FALSE) {

  require(modreg, quietly=TRUE)
  
  w.pm <- unlist(indexProbes(abatch, which="pm"))             # boolean to find the PM probes
  i.pm <- rep(FALSE, abatch@nrow * abatch@ncol)
  i.pm[w.pm] <- TRUE
  rm(w.pm)
  
  np <- sum(i.pm)                                     # number of PM probes
  nc  <-  length(abatch)                                 # number of CEL files
  
  # take as a reference the array having the median overall intensity
  m <- vector("numeric", length=nc)
  for (i in 1:nc)
    m[i] <- mean(intensity(abatch)[, i][i.pm])
  refindex <- trunc(median(rank(m)))
  rm(m)           

  if (progress) cat("Data from", chipNames(abatch)[refindex], "used as baseline.\n")
  
  ##set.na.spotsd(cel.container)
  
  ## loop over the CEL files and normalize them
  for (i in (1:nc)[-refindex]) {
  
    if (progress) cat("normalizing array", chipNames(abatch)[i], "...")
    
    ##temporary
    tmp <- normalize.invariantset(c(intensity(abatch)[, i])[i.pm],
                                  c(intensity(abatch)[, refindex])[i.pm],
                                  prd.td)
    i.set <- which(i.pm)[tmp$i.set]
    tmp <- as.numeric(approx(tmp$n.curve$y, tmp$n.curve$x,
                             xout=intensity(abatch)[, i], rule=2)$y)
    attr(tmp,"invariant.set") <- NULL
    intensity(abatch)[, i] <- tmp

    ## storing information about what has been done
    ##history(abatch)[[i]] <- list(name="normalized by invariant set",
    ##                                   invariantset=i.set)
    
    if (progress) cat("done.\n")
    
  }
  ##history(abatch)[[refindex]] <- list(name="reference for the invariant set")
  
  return(abatch)
}



##  The 'common-to-all' part of the algorithm. Operates on two vectors of numeric data
##
normalize.invariantset <- function(data, ref, prd.td=c(0.003,0.007)) {

  np <- length(data)
  r.ref <- rank(ref)
  r.array <- rank(data)
  
  ## init
  prd.td.adj <- prd.td*10                           # adjusted threshold things
  i.set <- rep(TRUE, np)                            # index all the PM probes as being in the invariant set
  ns <- sum(i.set)                                  # number of probes in the invariant set
  ns.old <- ns+50+1                                 # number of probes previously in the invariant set
    
  ## iterate while the number of genes in the invariant set (ns) still varies...
  while ( (ns.old-ns) > 50 ) {
    air <- (r.ref[i.set] + r.array[i.set]) / (2*ns)  # average intensity rank for the probe intensities
    prd <- abs(r.ref[i.set] - r.array[i.set]) / ns
    threshold <- (prd.td.adj[2]-prd.td[1]) * air + prd.td.adj[1]
    i.set[i.set] <- (prd < threshold)
    
    ns.old <- ns
    ns <- sum(i.set)
    
    if (prd.td.adj[1] > prd.td[1])
      prd.td.adj <- prd.td.adj * 0.9  # update the adjusted threshold parameters
  }
  
  ## the index i.set corresponds to the 'invariant genes'
  n.curve <- smooth.spline(ref[i.set], data[i.set])
  ## n.curve$x contains smoothed reference intensities
  ## n.curve$y contains smoothed i-th array intensities
  
  ##data <- as.numeric(approx(n.curve$y, n.curve$x, xout=data)$y)
  ##attr(data,"invariant.set") <- i.set
  ##return(data)
  return(list(n.curve=n.curve, i.set=i.set))
}







normalize.AffyBatch.loess <- function(abatch, ...) {
  
  
  Index <- unlist(indexProbes(abatch,"both"))
  intensity(abatch)[Index,] <- normalize.loess(intensity(abatch)[Index,], ...)

  ##set.na.spotsd(listcel) # set 'sd' to nothing (meaningless after normalization)
  ##cat(cols,rows)


  ##need to use MIAME
  ##for (i in 1:abatch@nexp) {
  ##  history(abatch)[[i]] <- list(name="normalized by loess")
  ##}

  return(abatch)
}



normalize.loess <- function(mat, subset=sample(1:(dim(mat)[1]), min(c(5000, nrow(mat)))),
                            epsilon=10^-2, maxit=1, log.it=TRUE, verbose=TRUE, span=2/3,
                            family.loess="symmetric"){
  
  J <- dim(mat)[2]
  II <- dim(mat)[1]
  newData <- mat
  if(log.it){
    mat <- log2(mat)
    newData <- log2(newData)
  }
  
  change <- epsilon +1
  fs <- matrix(0, II, J)##contains what we substract
  iter <- 0
  w <- c(0, rep(1,length(subset)), 0) ##this way we give 0 weight to the
                                      ##extremes added so that we can interpolate
  
  while(iter < maxit){
    iter <- iter + 1
    means <- matrix(0,II,J) ##contains temp of what we substract
    
    for (j in 1:(J-1)){
      for (k in (j+1):J){
        y <- newData[,j] - newData[,k]
        x <- (newData[,j] + newData[,k]) / 2
        index <- c(order(x)[1], subset, order(-x)[1])
        ##put endpoints in so we can interpolate
        xx <- x[index]
        yy <- y[index]
        aux <-loess(yy~xx, span=span, degree=1, weights=w, family=family.loess)
        aux <- predict.loess(aux, data.frame(xx=x)) / J
        means[, j] <- means[, j] + aux 
        means[, k] <- means[, k] - aux
        if (verbose)
          cat("Done with",j,"vs",k," in iteration ",iter,"\n")
      }
    }
    fs <- fs + means
    newData <- mat - fs
    change <- max(apply((means[subset,])^2, 2, mean))
    
    if(verbose)
      cat(iter, change,"\n")
    
    oldfs <- fs
    
  }
  
  if ((change > epsilon) & (maxit > 1))
    warning(paste("No convergence after", maxit, "iterations.\n"))
  
  if(log.it) {
    return(2^newData)
  } else
    return(newData)
}
normalize.AffyBatch.qspline <- function(abatch, ...) {
  intensity(abatch) <- normalize.qspline(t(intensity(abatch)), ...)
  
  #set.na.spotsd(listcel)
  
  ##need to use MIAME for this
  ##  for (i in 1:length(abatch)) {
  ##    history(abatch)[[i]] <- list(name="normalized by qspline")
  ## }
  
  return(abatch)
}

normalize.qspline <- function(x,
                              target        = NULL,
                              samples       = NULL,
                              fit.iters     = 5, 
                              min.offset    = 5,
                              spline.method = "natural", # c("fmm", "natural", "periodic")
                              smooth        = TRUE,
                              spar          = 0,     # smoothing parameter 
                              p.min         = 0, 
                              p.max         = 1.0, 
                              incl.ends     = TRUE,
                              converge      = FALSE,
                              verbose       = TRUE,
                              na.rm         = FALSE
                              ){

  require(modreg)
  
  if (is.null(target))
    target <- exp(apply(log(x), 1, mean))
  
  x.n <- dim(x)[1]
  m   <- dim(x)[2]

  if (is.null(samples))
    samples <- max(round(x.n/1000), 100)
  else
    if (samples < 1)
      samples <- round(samples * x.n)
  
  p <- 1:samples / samples
  p <- p[ which(p <= p.max) & which(p >= p.min) ]
  samples <- length(p)
  
  k <- fit.iters
  
  if (na.rm==TRUE)
    y.n <- sum(!is.na(target))
  else
    y.n <- length(target)
  
  py.inds  <- as.integer(p * y.n)
  y.offset <- round(py.inds[1]/fit.iters)
  
  if (y.offset <= min.offset) { 
    y.offset <- min.offset;
    k <- round(py.inds[1]/min.offset)
  }

  if (k < 1) {
    warning("'k' found is non-sense. using default")
    k <- fit.iters
  }
  
  y.offset <- c(0, array(y.offset, (k-1)))
  y.order <- order(target)

  fx <- matrix(0, x.n,m)
  if(verbose==TRUE)
    print(paste("samples=",samples, "k=", k, "first=", py.inds[1]))
  
  for (i in 1:m) {
                                        # to handel NA values for each array
    if (na.rm==TRUE)
      x.valid <- which(!is.na(x[,i])) 
    else
      x.valid <- 1:x.n
    
    x.n <- length(x.valid)
    px.inds  <- as.integer(p * x.n)
      x.offset <- round(px.inds[1]/fit.iters)
    
    if (x.offset<=min.offset) { 
      x.offset <- min.offset; 
      k <- min(round(px.inds[1]/min.offset), k) 
    }
    
    x.offset <- c(0, array(x.offset, (k-1)))
    x.order  <- order(x[,i]) # NA's at the end (?)
    
    y.inds   <- py.inds ## must be reset each iteration
    x.inds   <- px.inds 

    for (j in 1:k) {
         y.inds <- y.inds - y.offset[j]
         x.inds <- x.inds - x.offset[j]
         ty.inds <- y.inds
         tx.inds <- x.inds
         if (verbose==TRUE)
           print(paste("sampling(array=", i, "iter=", j, "off=",
                       x.inds[1], -x.offset[j], y.inds[1], -y.offset[j], ")"))
         
         if (converge==TRUE) {
           ty.inds <- as.integer(c(1, y.inds))
           tx.inds <- as.integer(c(1, x.inds))
           
           if (j > 1) {
             ty.inds <- c(ty.inds, y.n)
             tx.inds <- c(tx.inds, x.n)
           }
         }
         qy <- target[y.order[ty.inds]]
         qx <-  x[x.order[tx.inds],i]
         
         if (smooth==TRUE) {
           sspl <- smooth.spline(qx, qy, spar=spar)
           qx <- sspl$x
           qy <- sspl$y
         }
         
         fcn <- splinefun(qx, qy, method=spline.method)
         fx[x.valid,i] <- fx[x.valid,i] + fcn(x[x.valid,i])/k
       }
    
    if (na.rm==TRUE) {
      invalid <- which(is.na(x[,i]))
      fx[invalid,i] <- NA
    }
  }
  return(fx)
}
normalize.AffyBatch.quantiles <- function(abatch,pmonly=FALSE) {

  pms <- unlist(pmindex(abatch))
  noNA <- apply(intensity(abatch)[pms,],1,function(x) all(!is.na(x)))
  pms <- pms[noNA]
  intensity(abatch)[pms,] <- normalize.quantiles(intensity(abatch)[pms, ])
  if(!pmonly){ 
    mms <- unlist(mmindex(abatch))
    noNA <- apply(intensity(abatch)[mms,],1,function(x) all(!is.na(x)))
    mms <- mms[noNA]

    intensity(abatch)[mms,] <- normalize.quantiles(intensity(abatch)[mms, ])
  }
  
  ##this is MIAME we need to decide how to do this properly.
  ##for (i in 1:length(abatch)) {
  ##  history(abatch)[[i]]$name <- "normalized by quantiles"
  ##}

                return(abatch)
}
  
normalize.quantiles <- function(x){

  rows <- dim(x)[1]
  cols <- dim(x)[2]
  
  matrix(.C("qnorm_c", as.double(as.vector(x)), as.integer(rows), as.integer(cols))[[1]], rows, cols)
}


normalize.AffyBatch.quantiles.robust <- function(abatch, pmonly=FALSE) {

  pms <- unlist(pmindex(abatch))
  intensity(abatch)[pms, ] <- normalize.quantiles.robust(intensity(abatch)[pms, ])
  if(!pmonly){ 
    mms <- unlist(mmindex(abatch))
    intensity(abatch)[mms, ] <- normalize.quantiles.robust(intensity(abatch)[mms, ])
  }
  
  ##this is MIAME we need to decide how to do this properly.
  ##for (i in 1:length(abatch)) {
  ##  history(abatch)[[i]]$name <- "normalized by quantiles"
  ##}

  return(abatch)
}

normalize.quantiles.robust <- function(x,weights=NULL,remove.extreme=c("variance","mean","both","none"),n.remove=1,approx.meth = FALSE,...){
  
  calc.var.ratios <- function(x){
    cols <- dim(x)[2]
    vars <- apply(x,2,var)
    results <- matrix(0,cols,cols)
    for (i in 1:cols-1)
      for (j in (i+1):cols){
        results[i,j] <- vars[i]/vars[j]
        results[j,i] <- vars[j]/vars[i]
      }
    results
  }

  calc.mean.dists <- function(x){
    cols <- dim(x)[2]
    means <- apply(x,2,mean)
    results <- matrix(0,cols,cols)
    for (i in 1:cols-1)
      for (j in (i+1):cols){
        results[i,j] <- means[i] - means[j]
        results[j,i] <- means[j] - means[i]
      }
    results
  }
  
  rows <- dim(x)[1]
  cols <- dim(x)[2]
  
  if (is.null(weights)){
    weights <- rep(1,cols)
    if (remove.extreme == "variance"){
      var.ratios <- calc.var.ratios(x)
      vars.big <- apply(var.ratios,1,sum)
      vars.small <- apply(var.ratios,2,sum)
      var.adj <- vars.big + vars.small
      remove.order <- order(-var.adj)
      weights[remove.order[1:n.remove]] <- 0
    }
    if (remove.extreme == "mean"){
      means <- abs(apply(calc.mean.dists(x),2,sum))
      remove.order <- order(-means)
      weights[remove.order[1:n.remove]] <- 0
    }
    if (remove.extreme == "both"){
      var.ratios <- calc.var.ratios(x)
      vars.big <- apply(var.ratios,1,sum)
      vars.small <- apply(var.ratios,2,sum)
      var.adj <- vars.big + vars.small
      means <- abs(apply(calc.mean.dists(x),2,sum))
      # by convention we will remove first the most extreme variance, then the most extreme mean
      remove.order <- order(-var.adj)
      weights[remove.order[1]] <- 0
      remove.order <- order(-means)
      weights[remove.order[1]] <- 0
    }
  }
  if (length(weights) != cols){
    stop("Weights vector incorrect length\n")
  }
  if (sum(weights > 0) < 2){
    stop("Need at least two non negative weights\n")
  }
  cat("Chip weights are ",weights,"\n") 
  if (approx.meth == FALSE){
    matrix(.C("qnorm_robust_c",as.double(as.vector(x)),as.double(weights),as.integer(rows),as.integer(cols))[[1]],rows,cols)
  } else {
    cat("Approximation currently not implemented \nFalling back to standard Quantile method\n")
    matrix(.C("qnorm_robust_c",as.double(as.vector(x)),as.double(weights),as.integer(rows),as.integer(cols))[[1]],rows,cols)
  }
}
pairs.AffyBatch <- function(x, panel=points, ..., transfo=I, main=NULL, oma=NULL,
                            font.main = par("font.main"), cex.main = par("cex.main"),
                            cex.labels = NULL, 
                            lower.panel=panel, upper.panel=NULL,
                            diag.panel=NULL,
                                        #text.panel = textPanel,
                                        #label.pos = 0.5 + has.diag/3,                                
                            font.labels = 1, row1attop = TRUE, gap = 1) {

  #label1 <- chipNames(x)
  #label2 <- unlist(lapply(history(x), function(z) z$name))
  
  #textPanel <- function(x = 0.5, y = 0.5, txt, cex, font) {
  #  text(x, y, txt, cex = cex, font = font)
  #}
  
  ##labels <- paste(sampleNames(x), unlist(lapply(history(x), function(z) if (is.null(z$name)) "" else z$name)), sep="\n")
  labels <- sampleNames(x)
  ##y <- matrix(intensity(x)[, , seq(along=x)], ncol=length(x))
  y <- intensity(x)
  
  pairs(transfo(y), labels=labels,
        panel=panel, ..., main=main, oma=oma,
        font.main = font.main, cex.main = cex.main,
        lower.panel=lower.panel, upper.panel=upper.panel, diag.panel=diag.panel,
        #text.panel = text.panel,
        #label.pos = label.pos,
        cex.labels = cex.labels, 
        font.labels = font.labels, row1attop = row1attop, gap = gap
        )
   
}
plotDensity <- function(mat,
                        ylab="density", xlab="x", ...) {
  
  x.density <- apply(mat, 2, density)

  all.x <- do.call("cbind", lapply(x.density, function(x) x$x))
  all.y <- do.call("cbind", lapply(x.density, function(x) x$y))
  
  matplot(all.x, all.y, ylab=ylab, xlab=xlab, ...)
}
 

plotDensity.AffyBatch <- function(x, col=rainbow(length(x)), log=TRUE,
                                  which=c("pm","mm","both"),
                                  ylab="density",
                                  xlab=NULL,
                                  ...){
  
  Index <- unlist(indexProbes(x, which=which))
  
  x <- intensity(x)[Index, ]
  
  if(log){
    x <- log2(x)
    if(is.null(xlab)) xlab <- "log intensity"
  }
  else  if(is.null(xlab)) xlab <- "intensity"
  
  
  plotDensity(x, ylab=ylab, xlab=xlab, col=col, ...)
}
plotLocation <- function(x, col="green", pch=22, ...) {
  if (is.list(x)) {
    x <- cbind(unlist(lapply(x, function(x) x[,1])),
               unlist(lapply(x, function(x) x[,2])))
  }
  points(x[,1], x[,2]
         , pch=pch, col=col, ...)
}
pmcorrect.mas <- function(object, contrast.tau=0.03, scale.tau=10, delta=9.536743e-07)
  {
    all.pps.pm <- pm(object)
    all.pps.mm <- mm(object)
    
    diff <- log2(all.pps.pm) - log2(all.pps.mm)

    delta <- rep(delta, nrow(diff))
    ## segfaults
    ##sb <- .C("Tukey_Biweight", as.double(diff), as.integer(length(diff)))
    for (i in 1:ncol(diff)) {
      sb <- tukey.biweight(diff[, i])
      pps.pm <- all.pps.pm[, i]
      pps.mm <- all.pps.mm[, i]
      pps.im <- pps.mm
      
      j <- (pps.mm >= pps.pm) & (sb > contrast.tau)
      pps.im[j] <- pps.pm[j] / 2^sb
      
      j <- (pps.mm >= pps.pm) & (sb <= contrast.tau)
      pps.im[j] <- pps.pm[j] / 2^(contrast.tau / (1 + (contrast.tau - sb) / scale.tau))
      
      pm.corrected <- apply(cbind(pps.pm, pps.im, delta), 1, max)
      
      diff[, i] <- pm.corrected
    }
    return(diff)
  }







# originally called 'getnature' in affyR

pmormm <- function(cdf) {
  a.i <- which(cdf@pbase.levels == "A")
  t.i <- which(cdf@pbase.levels == "T")
  g.i <- which(cdf@pbase.levels == "G")
  c.i <- which(cdf@pbase.levels == "C")
  md <- dim(cdf@name)

  # init to 'NA'
  nature <- matrix(NA,md[1],md[2])
  i <- which(
             ((cdf@pbase == a.i) & (cdf@tbase == t.i)) | 
             ((cdf@pbase == t.i) & (cdf@tbase == a.i)) |
             ((cdf@pbase == g.i) & (cdf@tbase == c.i)) |
             ((cdf@pbase == c.i) & (cdf@tbase == g.i))
             )
  # set the PM to TRUE
  nature[i] <- TRUE
  i <- which((! is.na(cdf@atom)) & is.na(nature))
  # set the MM to FALSE
  nature[i] <- FALSE
  return(nature)   
}


















pwidget.selector <- function(choices, title="Select Window", sub="choices", choices.help=choices)  {#htmlhelp=getOption("htmlhelp")) {

  require(tkWidgets) || stop("requires the package tkWigets !")
  require(tcltk) || stop("requires the package tcltk !")
  
  ## pre -- hook
  if ((! is.null(choices.help)) & (length(choices) != length(choices.help))) {
    stop("length mismatch between choices and choices.help")
  }
  old.htmlhelp <- options()$htmlhelp
  tt <- tktoplevel();
  ## post -- hook
  on.exit(tkdestroy(tt))
  on.exit(options(htmlhelp=old.htmlhelp), add=TRUE)
  ## ---------
  ##help.start() ##out for now
  tkwm.title(tt, title)
  done <- tclVar(0)
  norm.but <- tkbutton(tt, text="ok",
                       command=function()tclvalue(done)<-1)
  ## -- help button (if wanted)
  if (! is.null(choices.help)) {
    help.but <- tkbutton(tt, text="help",
                         command=function()tclvalue(done)<-2)
  }
  
  alt.rbuts.meth <- tkframe(tt)
  tkpack(tklabel(alt.rbuts.meth, text=sub))
  alt.meth <- tclVar(choices[1])
  for ( i in choices){
    tmp<-tkradiobutton(alt.rbuts.meth, text=i, variable=alt.meth, value=i)
    tkpack(tmp,anchor="w")
  }
  
  if (! is.null(choices.help)) {
    tkgrid(norm.but, alt.rbuts.meth, help.but)
  } else {
     tkgrid(norm.but, alt.rbuts.meth)
  }
  
  tkbind(tt, "<Destroy>", function()tclvalue(done)<-9)

  while (tclvalue(done)!="1") {
    tkwait.variable(done)
    
    if(tclvalue(done)=="9")
      stop("aborted")
    if(tclvalue(done)=="2") {
      help.ok <- try(do.call("help",list(choices.help[match(tclvalue(alt.meth), choices)])))
      ##if (inherits(help.ok, "try-error"))
      ##  cat(paste("Error: no help file", choices.help[match(tclvalue(alt.meth), choices)] , "!\n"))
      }
  }
  
  return(tclvalue(alt.meth))
}
projvec <- function(vec1,vec2){
  sum(vec1*vec2)/sum(vec2^2) * vec2
}

reorder.vec <- function(vec1,ordering){
  vec1[ordering]
}
read.affy <- function(file,chip,chip.names=NULL){
  Names <- scan(file,nlines=1,what="c")
  nchips <- length(Names) - 5 ##first five are info not intensities
  if(is.null(chip.names)) chip.names <- Names[-c(1:5)]
  else{
    if(length(chip.names)!=nchips){
      warning("Not the same number of chips than chip names. Assigning names from file.\n") 
      chip.names <- Names[-c(1:5)]
    }
  }
  Data <- matrix(scan(file,skip=1),ncol=length(Names),byrow=TRUE)
  ncol <- max(Data[,4])+1
  nrow <- max(Data[,5])+1
  n <- dim(Data)[1]
  pmindex <- (1:n)[Data[,3] == 1]
  mmindex <- (1:n)[Data[,3] == 0]
  if(any((pmindex+ncol)-mmindex))
    warning("MMs are not underneath PMs\n")
  pm <-matrix(as.matrix(Data[pmindex,-c(1:5)]),ncol=nchips)##contains all pms
  mm <-matrix(as.matrix(Data[mmindex,-c(1:5)]),ncol=nchips)##contains all mms
  dimnames(pm)<- list(NULL,chip.names)
  dimnames(mm)<- list(NULL,chip.names)
  probe.names <- Data[pmindex,1]
  x <- Data[pmindex,4]
  y <- Data[pmindex,5]
  probe.numbers <- Data[pmindex,2]
  if(any(pm<=0)){
    warning("Non positive pms found. Substituting with minimum.\n")
     pm[pm<=0] <- min(pm[pm>0])
   }
  if(any(mm<=0)){
    warning("Non positive mms found. Substituting with minimum.\n")
    mm[mm<=0] <- min(mm[mm>0])
  }
  probe.names <- scan(chip,what="c")
  return(list(pm=pm,mm=mm,id=Data[pmindex,1],
              numbers=probe.numbers,names=probe.names,n=length(probe.names),
              x=x,y=y,ncol=ncol,nrow=nrow,nchips=nchips,
              nprob=length(pmindex),chip.names=chip.names))
}







read.affybatch <- function(..., filenames=character(0),
                           ##sd=FALSE,
                           phenoData=new("phenoData"),
                           description=NULL,
                           notes="",
                           compress=getOption("BioC")$affy$compress.cel,
                           rm.mask=FALSE, rm.outliers=FALSE, rm.extra=FALSE,
                           hdf5=FALSE, hdf5FilePath=NULL,
                           ##widget = FALSE, ##now a separate function to get filenames
                           verbose=FALSE) {
  
  auxnames <- as.list(substitute(list(...)))[-1]
  filenames <- .Primitive("c")(filenames, auxnames)
  
  n <- length(filenames)
  
  ## error if no file name !
  if (n == 0)
    stop("No file name given !")
  
  pdata <- pData(phenoData)
  ##try to read sample names form phenoData. if not there use CEL filenames
  if(dim(pdata)[1]!=n){#if empty pdata filename are samplenames
    warning("Incompatible phenoData object. Created a new one.\n")
    
    samplenames <- sub("^/?([^/]*/)*", "", unlist(filenames), extended=TRUE)
    pdata <- data.frame(sample=1:n,row.names=samplenames)
    phenoData <- new("phenoData",pData=pdata,varLabels=list(sample="arbitrary numbering"))
  }
  else samplenames <- rownames(pdata)
  
  if (is.null(description))
    {
      description <- new("MIAME")
      description@preprocessing$filenames <- filenames
      description@preprocessing$affyversion <- library(help=affy)$info[[2]][[2]][2]
    }
  ## read the first file to see what we have
  if (verbose) cat(1, "reading",filenames[[1]],"...")
  
  cel <- read.celfile(filenames[[1]],
                      ##sd=sd,
                      compress=compress,
                      rm.mask = rm.mask,
                      rm.outliers = rm.outliers,
                      rm.extra = rm.extra)
  if (verbose) cat("done.\n")
  
  ##now we use the length
  firstintensity <- intensity(cel)
  dim.intensity <- dim(firstintensity)
  ##if (sd)
  ##  dim.sd <- dim.intensity
  ##else
  ##  dim.sd <- c(1,1)
  
  if (hdf5) {
    require(rhdf5) || stop("The package rhdf5 is required !")
    if (is.null(hdf5FilePath))
      stop("A path for tmp files must be specified")
    if (! is.na(file.info(hdf5FilePath)$size)) {
      warning(paste("The file \"", hdf5FilePath, "\" already exists !", sep=""))
    }
    conty <- new.AffyBatch.hdf5(n, prod(dim.intensity), ##prod cause no 2d array
                                hdfile.group="raw",
                                hdfile.name=hdf5FilePath,
                                cdfName = cel@cdfName
                                )
    
  } else {
    conty <- new("AffyBatch",
                 exprs  = array(NA, dim=c(prod(dim.intensity), n)),
                 ##se.exprs = array(NA, dim=dim.sd),
                 cdfName    = cel@cdfName,
                 phenoData  = phenoData,
                 nrow       = dim.intensity[1],
                 ncol       = dim.intensity[2],
                 annotation = cleancdfname(cel@cdfName,addcdf=FALSE),
                 description= description,
                 notes      = notes)
    ##           history    = vector("list", length=n)) we need to put this in MIAME
    ##we have to use phenoData here: dimnames(intensity(conty)) <- list(NULL, NULL, rep("", n))
    
  }
  
  intensity(conty)[, 1] <- as.vector(firstintensity)
  
  ##if (sd)
  ##  spotsd(conty)[, , 1] <- spotsd(cel)

  ##We need to get names from phenoData
  ##c.names <- rep("", n)
  ##c.names[1] <- cel@name
  ##outliers(conty)[[1]] <- outliers(cel)
  ##masks(conty)[[1]] <- masks(cel)
  ##history(conty)[[1]] <- history(cel) ###this must be done through MIAME

  ## finish if only one file.. we have to make phenoData agree. cant return just yet
  ##if (n == 1)
  ##  return(conty)

  if(n>1) {
    for (i in 2:n) {
      
      if (verbose) cat(i, "reading",filenames[[i]],"...")
      cel <- read.celfile(filenames[[i]],
                          ##sd=sd,
                          compress=compress, rm.mask=rm.mask,
                          rm.outliers=rm.outliers, rm.extra=rm.extra)
      if (dim(intensity(cel)) != dim.intensity)
        stop(paste("CEL file dimension mismatch !\n(file",filenames[[i]],")"))
      if (verbose) cat("done.\n")
      
      if (cel@cdfName != conty@cdfName)
        warning(paste("cdfName mismatch !\n(", filenames[[i]], ")"))
      
      intensity(conty)[, i] <- as.vector(intensity(cel))
      ##      dimnames(intensity(conty))[[3]][i] <- cel@name ##now through phenoData
      ##if (sd)
      ##  spotsd(conty)[, , i] <- spotsd(cel)
      
      ##c.names[i] <- cel@name ##from phenoData now
      ##outliers(conty)[[i]] <- outliers(cel)
      ##masks(conty)[[i]] <- masks(cel)
      ##history(conty)[[i]] <- history(cel) now through MIAME
    }
    ##dim(intensity(conty)) <- c(prod(dim.intensity), n) ##alread done. by definition
    ##chipNames(conty) <- c.names  ##now phenoData
  }
  
  colnames(intensity(conty)) <- samplenames
  return(conty)
}

list.celfiles <-   function(...){
  files <- list.files(...)
  return(files[grep("\.[cC][eE][lL]\.gz$|\.[cC][eE][lL]$", files)])
}

###this is user friendly wrapper for read.affybatch
ReadAffy <- function(..., filenames=character(0),
                     widget=getOption("BioC")$affy$use.widgets,
                     compress=getOption("BioC")$affy$compress.cel,
                     celfile.path=getwd(),
                     sampleNames=NULL,
                     phenoData=NULL,
                     description=NULL,
                     notes="",
                     rm.mask=FALSE, rm.outliers=FALSE, rm.extra=FALSE,
                     hdf5=FALSE, hdf5FilePath=NULL,verbose=FALSE){
  ##first figure out filenames
  auxnames <- unlist(as.list(substitute(list(...)))[-1])

  if (widget){
    require(tkWidgets)
    widgetfiles <- fileBrowser(textToShow="Choose CEL files",
                               testFun=hasSuffix("[cC][eE][lL]"))
  }
  else
    widgetfiles <- character(0)
  
  filenames <- .Primitive("c")(filenames, auxnames, widgetfiles)
  
  if(length(filenames)==0) filenames <- list.celfiles(celfile.path,full.names=TRUE)
  
  if(length(filenames)==0) stop("No cel filennames specified and no cel files in specified directory:",celfile.path,"\n")
  
  
  ##now assign sampleNames if phenoData not given
  if(is.null(phenoData)){
    if(is.null(sampleNames)){
      if(widget){
        require(tkWidgets)
        tksn <- tkSampleNames(filenames=filenames)
        sampleNames <- tksn[,1]
        ##notice that a description of the files is ingored for now
        ##soon to go into MIAME
      }
      else{
        sampleNames <- sub("^/?([^/]*/)*", "", filenames, extended=TRUE)
      }
    }
    else{
      if(length(sampleNames)!=length(filenames)){
        warning("sampleNames not same length as filenames. Using filenames as sampleNames instead\n")
        sampleNames <- sub("^/?([^/]*/)*", "", filenames, extended=TRUE)
      }
    }
  }
  
  ##now get phenoData
  if(is.character(phenoData)) ##if character read file
    phenoData <- read.phenoData(filename=phenoData)
  else{
    if(class(phenoData)!="phenoData"){
      if(widget){
        require(tkWidgets)
        phenoData <- read.phenoData(sampleNames=sampleNames,widget=TRUE)
      }
      else
        phenoData <- read.phenoData(sampleNames=sampleNames,widget=FALSE)
    }
  }
  
  ##get MIAME information
  if(is.character(description)){
    description <- read.MIAME(filename=description,widget=FALSE)
  }
  else{
    if(class(description)!="MIAME"){
      if(widget){
        require(tkWidgets)
        description <- read.MIAME(widget=TRUE)
      }
      else
        description <- new("MIAME")
    }
  }
  
  ##MIAME stuff
  description@preprocessing$filenames <- filenames
  if(exists("tksn")) description@samples$description <- tksn[,2]
  description@preprocessing$affyversion <- library(help=affy)$info[[2]][[2]][2]

  ##and now we are ready to read cel files
  return(read.affybatch(filenames=filenames,
                        phenoData=phenoData,
                        description=description,
                        notes=notes,
                        compress=compress,
                        rm.mask=rm.mask,
                        rm.outliers=rm.outliers,
                        rm.extra=rm.extra,
                        hdf5=hdf5,
                        hdf5FilePath=hdf5FilePath,
                        verbose=verbose))
}








read.cdffile <- function(file, compress=getOption("BioC")$affy$compress.cdf) {

  ff <- new("Cdf")

  ## ---------
  ## the extra operation on the string are done to match what is done
  ## is 'whatcdf'
  tmp <- getInfoInAffyFile(file, "CDF", unit="Chip", property="Name", compress=compress)

  
  tmp <- substr(tmp, 1, nchar(tmp)-2)
  ##we will use cleancdfname later
  ##tmp <- gsub("_","",tmp)
  ##tmp <- tolower(tmp)
  ff@cdfName <- tmp
  ##----------
  tmp <- .Call("readCDFfile", as.character(file),
               as.integer(3), as.integer(compress))
  tmp[tmp == ""] <- NA
  mydim <- dim(tmp)

  tmp <- factor(tmp)
  
  ff@name <- array(as.integer(tmp), mydim)

  ff@name.levels <- levels(tmp)
  rm(tmp)
  gc()
  tmp <- .Call("readCDFfile", as.character(file),
               as.integer(7), as.integer(compress))
  tmp[tmp == ""] <- NA
  mydim <- dim(tmp)
  
  tmp <- factor(tmp)
  
  ff@pbase <- array(as.integer(tmp), mydim)

  ff@pbase.levels <- levels(tmp)
  rm(tmp)
  gc()
  tmp <- .Call("readCDFfile", as.character(file),
               as.integer(8), as.integer(compress))
  tmp[tmp == ""] <- NA
  mydim <- dim(tmp)
  tmp <- factor(tmp)

  ff@tbase <- array(as.integer(tmp), mydim)

  ff@tbase.levels <- levels(tmp)
  rm(tmp)
  gc()
  
  tmp <- .Call("readCDFfile", as.character(file),
               as.integer(9), as.integer(compress))
  tmp[tmp == ""] <- NA
  mydim <- dim(tmp)

  ff@atom <- array(as.integer(tmp), mydim)

  gc() 

  return(ff)
}

read.celfile <- function(file, compress=getOption("BioC")$affy$compress.cel,
                         sd=FALSE, name=NULL,
                         rm.mask=FALSE, rm.outliers=FALSE, rm.extra=FALSE) {
  
  ff <- new("Cel", intensity=matrix(), sd=matrix(), name="",
            masks=matrix(), outliers=matrix())

  ff@cdfName <- whatcdf(file, compress=compress)
  
  intensity(ff) <- .Call("readCELfile", as.character(file),
                         as.integer(1), as.integer(compress))
  
  if (sd) {
    spotsd(ff) <- .Call("readCELfile",as.character(file),as.integer(2),
                   as.integer(compress))
  }

  masks(ff) <- .Call("getIndexExtraFromCEL", as.character(file),
                     as.character("MASKS"),
                     as.integer(compress))
  
  outliers(ff) <- .Call("getIndexExtraFromCEL", as.character(file),
                        as.character("OUTLIERS"),
                        as.integer(compress))
  
  if (rm.mask | rm.extra) {
    intensity(ff)[masks(ff)] <- NA
    if (! is.na.spotsd(ff))
      spotsd(ff)[masks(ff)] <- NA
  }
  
  if (rm.outliers | rm.extra) {
    intensity(ff)[outliers(ff)] <- NA
    if (! is.na.spotsd(ff))
      spotsd(ff)[outliers(ff)] <- NA
  }

  history(ff)$name <- paste("read from file:",sub("^/?([^/]*/)*", "", file, extended=TRUE))
  
  if (is.null(name)) {
    name <- sub("^/?([^/]*/)*", "", file, extended=TRUE)
  }
  
  ff@name <- name
  
  return(ff)
}

######################################################
#
# rma - RMA interface to c code
#
# the RMA method implemented in c code
#
# this code serves as interface to the c code.
# currently
# implemented (version 0.25) background correction
#
# Background correction code has been added.
#
# note this function does not leave the supplied
# AffyBatch unchanged if you select DESTRUCTIVE=TRUE. this is 
# for memory purposes but can be quite
# dangerous if you are not careful. Use destructive=FALSE if this is
# deemed likely to be a problem.
#
########################################################

rma <- function(object,subset=NULL, verbose=TRUE, destructive = FALSE,normalize=TRUE,...){

  rows <- length(probeNames(object))
  cols <- length(object)
 
  ngenes <- length(geneNames(object))
  
  #background correction
  bg.dens <- function(x){density(x,kernel="epanechnikov",n=2^14)}

  if (destructive){
  	exprs <- .Call("rma_c_complete",pm(object),mm(object),probeNames(object),ngenes,body(bg.dens),new.env(),normalize)
  } else {
	exprs <- .Call("rma_c_complete_copy",pm(object),mm(object),probeNames(object),ngenes,body(bg.dens),new.env(),normalize)
  }
  colnames(exprs) <- sampleNames(object)
  se.exprs <- array(NA, dim(exprs)) # to be fixed later, besides which don't believe much in nominal se's with medianpolish
  
  phenodata <- phenoData(object)
  annotation <- annotation(object)
  description <- description(object) 
  notes <- notes(object)
  
  new("exprSet", exprs = exprs, se.exprs = se.exprs, phenoData = phenodata, 
       annotation = annotation, description = description, notes = notes)
}
##took split from Biobase for consistency
split.AffyBatch <- function(x, f) {
  lenf <- length(f)
  exs <- exprs(x)
  pD <- phenoData(x)
  aN <- annotation(x)
  nc <- ncol(x)
  nr <- nrow(x)
  cdf <- x@cdfName
  if( (nrow(exs) %% lenf == 0 ) ) {
    splitexprs <- lapply(split(1:nrow(exs), f),
                         function(ind) exs[ind, , drop =
                                           FALSE])
    nsplit<-length(splitexprs)
    for(i in 1:nsplit) {
      splitexprs[[i]] <- new("exprSet",
                             exprs=splitexprs[[i]],
                             phenoData = pD, annotation= aN,
                             ncol=nc, nrow=nr, cdfName=cdf)
    }
    return(splitexprs)
  }  ##split the expressions
  if( (nrow(pData(x)) %% lenf ==0) ) {
    npD <- split(pD, f)
    nEx <- lapply(split(1:ncol(exs), f),
                  function(ind) exs[,ind,drop=FALSE])
    nsplit <- length(npD)
    for( i in 1:nsplit)
      npD[[i]] <- new("exprSet", exprs=nEx[[i]],
                      phenoData=npD[[i]],
                      annotation=aN,
                      ncol=nc, nrow=nr, cdfName=cdf)
    return(npD)
  }
  else
    stop("could not split")
}


  ##     if (is.list(f)) 
#         f <- interaction(f)
#     f <- factor(f)
#     lf <- levels(f)
#     y <- vector("list", length(lf))
#     names(y) <- lf

#     ###need to fix phenoData
#     for (k in lf) {
#       nchips <- sum(f == k)
#       y[[k]] <- new("AffyBatch",
#                     exprs = intensity(x)[, f == k, drop=FALSE],
#                     ##chipNames = sampleNames(x)[ f == k ],
#                     phenoData = new("phenoData"),
#                     cdfName = x@cdfName,
#                     nrow = x@nrow,
#                     ncol = x@ncol,
#                     ##nexp = nchips,
#                     annotation = paste(x@annotation, "(split)"),
#                     description = "split",
#                     notes = x@notes,
#                     ##history = history(x)[ f == k ]
#                     )
#     }
    
#     return(y)
# }






###these are summary functions they take matrices of probes x chips
###and return expression and se (when applicable)

##DEBUG: appending the se to the expression values in a same vector
##       is too much hackish (I think)... we need to think about something
##       better

avdiff <- function(x,constant=3){
  e <- apply(x,2,function(y){
    o <- order(y)
    yy <- y[-c(o[1],o[length(y)])] #take out biggest and smallest
    if(length(yy)<2)  # SK, some genes have only one probe
      mean(y)
    else
      mean(y[abs(y-mean(yy))<constant*sd(yy)])
  })
  list(exprs=e,se.exprs=apply(x,2,sd)/sqrt(nrow(x)))
}

li.wong <- function(data.matrix,remove.outliers=TRUE,
                    normal.array.quantile=0.5,
                    normal.resid.quantile=0.9,
                    large.threshold=3,
                    large.variation=0.8,
                    outlier.fraction=0.14,
                    delta = 1e-06,maxit=50,outer.maxit=50,verbose=FALSE){

  e <-  fit.li.wong(t(data.matrix),remove.outliers,normal.array.quantile,normal.resid.quantile,large.threshold,large.variation,outlier.fraction,delta,maxit,outer.maxit,verbose)
  list(exprs=e$theta,se.exprs=e$sigma.theta)
}


medianpolish <- function(x, ...){
  tmp <- medpolish(log2(x), trace.iter=FALSE, ...)
  ##rough estimate
  sigma <- 1.483*median(abs(as.vector(tmp$residuals)))/sqrt(nrow(x))
  list(exprs=tmp$overall + tmp$col,se.exprs=rep(sigma, ncol(x)))
}

tukeybiweight <- function(x, c=5, epsilon=0.0001){
  tmp <- function(x, c=5, epsilon=0.0001)
    {
      m <- median(x)
      s <- median(abs(x - m))
      u <- (x - m) / (c * s + epsilon)
      w <- rep(0, length(x))
      i <- abs(u) <= 1
      w[i] <- ((1 - u^2)^2)[i]
      t.bi <- sum(w * x) / sum(w)
      return(t.bi)
    }
  list(exprs=apply(log2(x),2,tmp),se.exprs=rep(NA,ncol(x)))
}


tukey.biweight <- function(x, c=5, epsilon=0.0001)
  {
    m <- median(x)
    s <- median(abs(x - m))
    u <- (x - m) / (c * s + epsilon)
    w <- rep(0, length(x))
    i <- abs(u) <= 1
    w[i] <- ((1 - u^2)^2)[i]
    t.bi <- sum(w * x) / sum(w)
    return(t.bi)
  }

tukeybiweight <-  function(x, c=5, epsilon=0.0001)
  list(exprs=apply(x,2,tukey.biweight,c=c,epsilon=epsilon),se.exprs=rep(NA,ncol(x)))


##this function changes the affymetrix cdf file name to the Bioconductor
##annotation name for that cdf file
## note: we had a hard time finding exact rules to match what is in the
## CEL file with what is in the CDF file
## ex: CEL says 'ecoli' while CDF says 'ecoligenome'
## or: CEL says '' while CDF says hu6800.1sq
cleancdfname <- function(cdfname, addcdf=TRUE) {
  i <- match(cdfname, mapCdfName$inCDF)
  if (is.na(i)) {
    tmp <- tolower(cdfname) #make lower case
    tmp <- gsub("_", "", tmp) #take out underscore
    tmp <- gsub("-", "", tmp) #take out underscore
    tmp <- gsub("\ ", "", tmp) ##take out spaces
    if(addcdf) tmp <- paste(tmp, "cdf", sep="")
  } else {
    tmp <- mapCdfName$inBioC[1]
  }
  return(tmp)
}
##this funnction gets the cdf from a celfile

whatcdf <- function(filename, compress=getOption("BioC")$affy$compress.cel){
  
  ##finds what cdf environment to use with cdf file
  tmp <- getInfoInAffyFile(filename,"CEL","HEADER","DatHeader",compress=compress) ##find appropriate line
  tmp <- strsplit(tmp," ")[[1]] #split by space
  tmp <- tmp[grep(".1sq",tmp)] #pick the one with 1sq (from experience)
  if (identical(tmp, character(0))) {
    warning("could not find CDF name, setting it to 'unknown'")
    tmp <- "unknown"
  }
  else {
    tmp <- gsub("\.1sq","",tmp) #take out .1sq
  }
  return(tmp)
}

  
write.celfile <- function(cel, file,
                          header.title="foo",
                          header.cdfName=cel@cdfName,
                          header.algorithm="Percentile") {

  
  n.row <- nrow(intensity(cel))
  n.col <- ncol(intensity(cel))
  
  cat("[CEL]\nVersion=3",
      "",
      "[HEADER]",
      paste("Cols=", n.col, sep=""),
      paste("Rows=", n.row, sep=""),
      paste("TotalX=", n.col, sep=""),
      paste("TotalY=", n.row, sep=""),
      "OffsetX=0",
      "OffsetY=0",
      "GridCornerUL=234 234",
      "GridCornerUR=4495 245",
      "GridCornerLR=4492 4511",
      "GridCornerLL=231 4500",
      "Axis-invertX=0",
      "AxisInvertY=0",
      "swapXY=0",
      paste("DatHeader=[52..46146]  ", header.title, ":CLS=4733 RWS=4733 XIN=3  YIN=3  VE=17        2.0 02/16/01 12:52:10    \t\t ", header.cdfName," \t \t \t \t \t \t \t \t 6", sep=""), # not really correct...
      paste("Algorithm", header.algorithm, sep="="),
      "AlgorithmParameters=Percentile:75;CellMargin:2;OutlierHigh:1.500;OutlierLow:1.004",
      "",
      "[INTENSITY]",
      paste("NumberCells=", n.row * n.col, sep=""),
      "CellHeader=X\tY\tMEAN\tSTDV\tNPIXELS",
      file=file,
      sep="\n")

  x <- intensity(cel)
  
  if (is.na.spotsd(cel))
    y <- rep(0, length(x))
  else
    y <- spotsd(cel)
  
  write.table(cbind(c(row(x))-1, c(col(x))-1, c(x), c(y), 36),
              file=file,
              append=TRUE,
              col.names=FALSE,
              row.names=FALSE,
              quote=FALSE,
              sep="\t")

  cat("\n", file=file, append=TRUE, sep="")
  
  cat("[MASKS]",
      paste("NumberCells=", length(masks(cel)), sep=""),   
      "CellHeader=X    Y",
      file=file,
      append=TRUE,
      sep="\n")

  x <- masks(cel)
  write.table(cbind(c(row(x))-1, c(col(x))-1),
              file=file,
              append=TRUE,
              col.names=FALSE,
              row.names=FALSE,
              quote=FALSE,
              sep="\t")  

  cat("\n", file=file, append=TRUE, sep="")
  
  cat("[OUTLIERS]",
      paste("NumberCells=", length(outliers(cel)), sep=""),
      "CellHeader=X    Y",
      file=file,
      append=TRUE,
      sep="\n")
  
  x <- outliers(cel)
  write.table(cbind(c(row(x))-1, c(col(x))-1),
              file=file,
              append=TRUE,
              col.names=FALSE,
              row.names=FALSE,
              quote=FALSE,
              sep="\t")  
  
}
.initNormalize <- function(where, all.affy) {
  if (debug.affy123) cat("-->detecting normalization methods from naming convention\n")
  
  ## this could move into the respective methods of AffyBatch later

  start <- nchar("normalize.AffyBatch.")
  assign("normalize.AffyBatch.methods",
         substr(all.affy[grep("normalize\.AffyBatch\.*", all.affy)], start+1, 100),
         envir=as.environment(where)) 
}

.initExpression <- function(where, all.affy) {
  if (debug.affy123) cat("-->detecting expression value methods from naming convention\n")
  
  ## the first one is deprecated (well... "should be"...)
  assign("generateExprSet.methods",
         substr(all.affy[grep("generateExprVal\.method\.*", all.affy)], 24,100),
         envir=as.environment(where))
  assign("express.summary.stat.methods",
         substr(all.affy[grep("generateExprVal\.method\.*", all.affy)], 24,100),
         envir=as.environment(where))
}

.initBackgroundCorrect <- function(where, all.affy) {
  if (debug.affy123) cat("-->detecting background correction methods from naming convention\n")
  ##assign("bg.correct.methods",
  ##       substr(ls(where)[grep("bg.correct\.*", ls(where))], 12,100),
  ##       envir=as.environment(where))
  start <- nchar("bg.correct.")
  assign("bgcorrect.methods",
         substr(all.affy[grep("bg\.correct\.*", all.affy)], start+1, 100),
         envir=as.environment(where))
       }

.initPmCorrect <- function(where, all.affy) {
  if (debug.affy123) cat("-->detecting pm correction methods from naming convention\n")
  start <- nchar("pmcorrect.")
  assign("pmcorrect.methods",
         substr(all.affy[grep("pmcorrect\.*", all.affy)], start+1, 100),
         envir=as.environment(where))
}

.initMapCdfName <- function(where) {
  filepath <- file.path(.path.package("affy"), "data", "mapCdfName.tab")
  mapCdfName <- read.table(filepath, colClasses=rep("character", 3), quote="\"", sep="\t", comment="#", row.names=NULL, header=TRUE)
  assign("mapCdfName", mapCdfName, envir=as.environment(where))
}

.First.lib <- function(libname, pkgname, where) {
  

  where <- match(paste("package:", pkgname, sep=""), search())
  all.affy <- ls(where)
  
  ## DEBUG flag
  ##assign("debug.affy123", TRUE, envir=as.environment(where))
  assign("debug.affy123", FALSE, envir=as.environment(where))
  
  message <- FALSE
  
  if (message) {
    cat(rep("*",13),"\n",sep="")
    cat("affy: development version\n")
    cat(rep("*",13),"\n",sep="")
    cat("The package is under major changes.\n")
    cat("unpack the package and read the file NEWS to know more....\n")
    cat("The draft for the new vignette (called 'affy2') is distributed with the pacakge\n")
    cat(rep("*",13),"\n",sep="")
    cat("demo(affy.tour) will eventually work and give an overview...\n")
    cat(rep("*",13),"\n",sep="")
    cat("IMPORTANT: you need the latest versions of the required packages too.\n")
    cat(rep("*",13),"\n",sep="")
  }
  
  library.dynam("affy", pkgname, libname)
  
  require(Biobase, quietly=TRUE) ##Biobase uses methods
  require(modreg, quietly=TRUE)
  require(eda, quietly=TRUE)

  ##i was having troulbes, and changing where to
  ###match(paste("package:", pkgname, sep=""), search()) fixed.. thanx to RG
  
  .initNormalize(match(paste("package:", pkgname, sep=""), search()), all.affy)
  .initExpression(match(paste("package:", pkgname, sep=""), search()), all.affy)
  .initBackgroundCorrect(match(paste("package:", pkgname, sep=""), search()), all.affy)
  .initPmCorrect(match(paste("package:", pkgname, sep=""), search()), all.affy)
  .initMapCdfName(match(paste("package:", pkgname, sep=""), search()))
  .initCdf(match(paste("package:", pkgname, sep=""), search()))
  .initCel(match(paste("package:", pkgname, sep=""), search()))
  .initAffyBatch(match(paste("package:", pkgname, sep=""), search()))
  .initProbeSet(match(paste("package:", pkgname, sep=""), search()))


  ## add affy specific options
  ## (not unlike what is done in 'Biobase')
  if (is.null(getOption("BioC"))) {
    BioC <- list()
    class(BioC) <- "BioCOptions"
    options("BioC"=BioC)
  }
  
  ##affy$urls <- list( bioc = "http://www.bioconductor.org")

  probesloc.first <- list(what="environment", where=.GlobalEnv)
  probesloc.second <- list(what="package", where=NULL, probesloc.autoload=TRUE)
  probesloc.third <- list(what="data", where="affy")

  
  ## i added use.widgets=FALSE. Shuold it be true?
  ## --> I do not think so. Let's keep it FALSE. 
  affy <- list(compress.cdf=FALSE, compress.cel=FALSE, use.widgets=FALSE,
               probesloc = list(probesloc.first, probesloc.second, probesloc.third))
  class(affy) <- "BioCPkg"
  
  BioC <- getOption("BioC")
  BioC$affy <- affy
  options("BioC"=BioC)
  ## ---

  cacheMetaData(as.environment(where))

}

.Last.lib <- function(libpath) {
  options("BioC")$affy <- NULL
  dyn.unload(file.path(libpath, "libs",
                       paste("affy", .Platform$"dynlib.ext", sep="")))
  .Dyn.libs <- .Dyn.libs[- which(.Dyn.libs == "affy")]
}





