# Lurking variable plot for arbitrary covariate.
#
#
# $Revision: 1.26 $ $Date: 2008/07/25 22:23:53 $
#

lurking <- function(object, covariate, type="eem",
                    cumulative=TRUE,
                    clipwindow=default.clipwindow(object),
                    rv = NULL,
                    plot.sd=is.poisson.ppm(object), plot.it=TRUE,
                    typename,
                    covname, oldstyle=FALSE,
                    check=TRUE, ..., splineargs=list(spar=0.5)) {
  
  # validate object
  if(is.ppp(object))
    object <- ppm(object, ~1, forcefit=TRUE)
  verifyclass(object, "ppm")

  # match type argument
  type <- pickoption("type", type,
                     c(eem="eem",
                       raw="raw",
                       inverse="inverse",
                       pearson="pearson",
                       Pearson="pearson"))
  if(missing(typename))
    typename <- switch(type,
                       eem="exponential energy weights",
                       raw="raw residuals",
                       inverse="inverse-lambda residuals",
                       pearson="Pearson residuals")

  # may need to refit the model
  if(plot.sd && is.null(getglmfit(object)))
    object <- update(object, forcefit=TRUE)
  
  # extract spatial locations 
  Q <- quad.ppm(object, drop=TRUE)
  datapoints <- Q$data
  quadpoints <- union.quad(Q)
  Z <- is.data(Q)
  wts <- w.quad(Q)

  #################################################################
  # compute the covariate

  if(is.im(covariate)) {
    covvalues <- covariate[quadpoints, drop=FALSE]
  } else if(is.vector(covariate) && is.numeric(covariate)) {
    covvalues <- covariate
    if(length(covvalues) != quadpoints$n)
      stop("Length of covariate vector,", length(covvalues), "!=",
           quadpoints$n, ", number of quadrature points")
  } else if(is.expression(covariate)) {
    # Expression involving covariates in the model
    # Set up environment for evaluating expression
    if(!is.null(object$covariates)) {
      # Expression may involve an external covariate
      # Recompute model, extracting all covariates 
      object <- update(object, allcovar=TRUE)
      # harmonise, just in case
      Q <- quad.ppm(object, drop=TRUE)
      datapoints <- Q$data
      quadpoints <- union.quad(Q)
      Z <- is.data(Q)
      wts <- w.quad(Q)
      # 
    }
    glmdata <- getglmdata(object, drop=TRUE)
    # Fix special cases
    if(is.null(glmdata)) {
      # default 
      glmdata <- data.frame(x=quadpoints$x, y=quadpoints$y)
      if(is.marked(quadpoints))
        glmdata$marks <- marks(quadpoints)
    }
    # ensure x and y are in data frame 
    if(!all(c("x","y") %in% names(glmdata))) {
      glmdata$x <- quadpoints$x
      glmdata$y <- quadpoints$y
    } 
    # Evaluate expression
    sp <- parent.frame()
    covvalues <- eval(covariate, envir= glmdata, enclos=sp)
    if(!is.numeric(covvalues))
      stop("The evaluated covariate is not numeric")
  } else 
    stop(paste("The", sQuote("covariate"), "should be either",
               "a pixel image, an expression or a numeric vector"))

  #################################################################
  # Validate covariate values

  if(naughty <- any(nbg <- is.na(covvalues))) {
    # remove NA's
    if(is.im(covariate))
      warning(paste(sum(nbg), "out of", length(nbg),
                  "quadrature points discarded because",
                  ngettext(sum(nbg), "it lies", "they lie"),
                 "outside the domain of the covariate image"))
    else
      warning(paste(sum(nbg), "out of", length(nbg),
                 "covariate values discarded because",
                 ngettext(sum(nbg), "it is NA", "they are NA")))
    # remove offending points
    ok <- !nbg
    Q <- Q[ok]
    covvalues <- covvalues[ok]
    quadpoints <- quadpoints[ok]
    # adjust
    Z <- is.data(Q)
    wts <- w.quad(Q)
  }
  if(any(is.infinite(covvalues) | is.nan(covvalues)))
    stop("covariate contains Inf or NaN values")

  # Quadrature points marked by covariate value
  covq <- quadpoints %mark% as.numeric(covvalues)

  ################################################################
  # Residuals/marks attached to appropriate locations.
  # Stoyan-Grabarnik weights are attached to the data points only.
  # Others (residuals) are attached to all quadrature points.

  resvalues <- 
    if(!is.null(rv)) rv
    else if(type=="eem") eem(object, check=check)
    else residuals.ppm(object, type=type, drop=TRUE, check=check)
  if(naughty && type != "eem")
    resvalues <- resvalues[ok]

  res <- (if(type == "eem") datapoints else quadpoints) %mark% as.numeric(resvalues)

  # ... and the same locations marked by the covariate
  covres <- if(type == "eem") covq[Z] else covq

  # NAMES OF THINGS
  # name of the covariate
  if(missing(covname)) 
    covname <- if(is.expression(covariate)) paste(covariate) else "covariate"
  # type of residual/mark
  if(missing(typename)) 
    typename <- if(!is.null(rv)) "rv" else attr(resvalues, "typename")

  #######################################################################
  # START ANALYSIS
  # Clip to subwindow if needed
  clip <- !is.poisson.ppm(object) ||
              (!missing(clipwindow) && !is.null(clipwindow))
  if(clip) {
    covq <- covq[clipwindow]
    res <- res[clipwindow]
    covres <- covres[clipwindow]
    clipquad <- inside.owin(quadpoints$x, quadpoints$y, clipwindow)
    wts <- wts[ clipquad ]
  }

  # -----------------------------------------------------------------------
  # (A) EMPIRICAL CUMULATIVE FUNCTION
  # based on data points if type="eem", otherwise on quadrature points

    # cumulative sums which ignore NA's
    cumsumna <- function(x) {
      x[is.na(x)] <- 0
      return(cumsum(x))
    }

      # Reorder the data/quad points in order of increasing covariate value
      # and then compute the cumulative sum of their residuals/marks
    markscovres <- marks(covres)
    o <- order(markscovres)
    covsort <- markscovres[o]
    cummark <- cumsumna(marks(res)[o])
      # we'll plot(covsort, cummark) in the cumulative case

  # (B) THEORETICAL MEAN CUMULATIVE FUNCTION
  # based on all quadrature points
    
      # Range of covariate values
    covqmarks <- marks(covq)
    covrange <- range(covqmarks, na.rm=TRUE)
      # Suitable breakpoints
    cvalues <- seq(covrange[1], covrange[2], length=100)
    csmall <- cvalues[1] - diff(cvalues[1:2])
    cbreaks <- c(csmall, cvalues)
      # cumulative area as function of covariate values
    covclass <- cut(covqmarks, breaks=cbreaks)
    increm <- tapply(wts, covclass, sum)
    cumarea <- cumsumna(increm)
      # compute theoretical mean (when model is true)
    mean0 <- if(type == "eem") cumarea else rep(0, length(cumarea))
      # we'll plot(cvalues, mean0) in the cumulative case

  # (A'),(B') DERIVATIVES OF (A) AND (B)
  #  Required if cumulative=FALSE  
  #  Estimated by spline smoothing (with x values jittered)
    if(!cumulative) {
      # fit smoothing spline to (A) 
      ss <- do.call("smooth.spline",
                    append(list(covsort, cummark),
                           splineargs)
                    )
      # estimate derivative of (A)
      derivmark <- predict(ss, covsort, deriv=1)$y 
      # similarly for (B) 
      ss <- do.call("smooth.spline",
                    append(list(cvalues, mean0),
                           splineargs)
                    )
      derivmean <- predict(ss, cvalues, deriv=1)$y
    }
  
  # -----------------------------------------------------------------------
  # Store what will be plotted
  
   if(cumulative) {
     empirical <- data.frame(covariate=covsort, value=cummark)
     theoretical <- data.frame(covariate=cvalues, mean=mean0)
   } else {
     empirical <- data.frame(covariate=covsort, value=derivmark)
     theoretical <- data.frame(covariate=cvalues, mean=derivmean)
   }

  # ------------------------------------------------------------------------
  
    # (C) STANDARD DEVIATION if desired
    # (currently implemented only for Poisson)
    # (currently implemented only for cumulative case)

    if(plot.sd && !is.poisson.ppm(object))
      warning(paste("standard deviation is calculated for Poisson model;",
                    "not valid for this model"))

    if(plot.sd && cumulative) {
      # Fitted intensity at quadrature points
      lambda <- fitted.ppm(object, type="trend", drop=TRUE, check=check)
      if(naughty) lambda <- lambda[ok]
      # Fisher information for coefficients
      asymp <- vcov(object,what="internals")
      Fisher <- asymp$fisher
      # Local sufficient statistic at quadrature points
      suff <- asymp$suff
      if(naughty) suff <- suff[ok, ,drop=FALSE]
      # Clip if required
      if(clip) {
        lambda <- lambda[clipquad]
        suff   <- suff[clipquad, , drop=FALSE]  # suff is a matrix
      }
      # First term: integral of lambda^(2p+1)
      switch(type,
             pearson={
               varI <- cumarea
             },
             raw={
               # Compute sum of w*lambda for quadrature points in each interval
               dvar <- tapply(wts * lambda, covclass, sum)
               # tapply() returns NA when the table is empty
               dvar[is.na(dvar)] <- 0
               # Cumulate
               varI <- cumsum(dvar)
             },
             inverse=, # same as eem
             eem={
               # Compute sum of w/lambda for quadrature points in each interval
               dvar <- tapply(wts / lambda, covclass, sum)
               # tapply() returns NA when the table is empty
               dvar[is.na(dvar)] <- 0
               # Cumulate
               varI <- cumsum(dvar)
             })

      # variance-covariance matrix of coefficients
      V <- try(solve(Fisher), silent=TRUE)
      if(inherits(V, "try-error")) {
        warning("Fisher information is singular; reverting to oldstyle=TRUE")
        oldstyle <- TRUE
      }
      
      # Second term: B' V B
      if(oldstyle) {
        varII <- 0
      } else {
        # lamp = lambda^(p + 1)
        lamp <- switch(type,
                       raw     = lambda, 
                       pearson = sqrt(lambda),
                       inverse =,
                       eem     = ifelse(lambda > 0, 1, 0))
        # Compute sum of w * lamp * suff for quad points in intervals
        Bcontrib <- as.vector(wts * lamp) * suff
        dB <- matrix(, nrow=length(cumarea), ncol=ncol(Bcontrib))
        for(j in seq(ncol(dB))) 
          dB[,j] <- tapply(Bcontrib[,j], covclass, sum)
        # tapply() returns NA when the table is empty
        dB[is.na(dB)] <- 0
        # Cumulate columns
        B <- apply(dB, 2, cumsum)
        # compute B' V B for each i 
        varII <- diag(B %*% V %*% t(B))
      }
      #
      # variance of residuals
      varR <- varI - varII
      # trap numerical errors
      nbg <- (varR < 0)
      if(any(nbg)) {
        ran <- range(varR)
        varR[nbg] <- 0
        relerr <- abs(ran[1]/ran[2])
        nerr <- sum(nbg)
        if(relerr > 1e-6) {
          browser()
          warning(paste(nerr, "negative",
                        ngettext(nerr, "value (", "values (min="),
                        signif(ran[1], 4), ")",
                        "of residual variance reset to zero",
                        "(out of", length(varR), "values)"))
        }
      }
      theoretical$sd <- sqrt(varR)
    }

    # ---------------  PLOT THEM  ----------------------------------
    if(plot.it) {
      # work out plot range
      mr <- range(c(0, empirical$value, theoretical$mean), na.rm=TRUE)
      if(!is.null(theoretical$sd))
        mr <- range(c(mr, theoretical$mean + 2 * theoretical$sd,
                          theoretical$mean - 2 * theoretical$sd),
                    na.rm=TRUE)

      # start plot
      vname <- paste(if(cumulative)"cumulative" else "marginal", typename)
      do.call("plot",
              resolve.defaults(
                               list(covrange, mr),
                               list(type="n"),
                               list(...),
                               list(xlab=covname, ylab=vname)))
      # (A)/(A') Empirical
      lines(value ~ covariate, empirical, ...)
      # (B)/(B') Theoretical mean
      do.call("lines",
              resolve.defaults(
                               list(mean ~ covariate, theoretical),
                               list(...),
                               list(lty=2)))
      # (C) Standard deviation 
      if(!is.null(theoretical$sd)) {
        do.call("lines",
                resolve.defaults(
                                 list(mean + 2 * sd ~ covariate, theoretical),
                                 list(...),
                                 list(lty=3)))
        do.call("lines",
                resolve.defaults(
                                 list(mean - 2 * sd ~ covariate, theoretical),
                                 list(...),
                                 list(lty=3)))
      }
    }
  
    # ----------------  RETURN COORDINATES ----------------------------
  stuff <- list(empirical=empirical, theoretical=theoretical)

  return(invisible(stuff))
}

