#
# Test out the "user mode" functions, using a log-rank test
#   y is a survival object (2 column matrix)
#

# This function returns the node label, for which I have chosen to use
#  the overall event rate and the number of events.
# It also returns the "deviance", for which I am using
#   sum[ sqrt(observed) - sqrt(expected)]^2
# based on the fact that the square root of the number of events has approx
# constant variance.
temp1 <- function(y, wt, parms) {
    rate <-  sum(y[,2]*wt)/ sum(y[,1]*wt)
    expect <- rate * y[,1]  #expected number of events
    rss <- sum(wt* (sqrt(y[,2]) - sqrt(expect))^2)  #scaled obs-exp events
    list(label= c(rate, sum(y[,2]*wt)), deviance=rss)
    }

# This routine splits the node
# Key trick: the numerator of the log-rank statistic is sum(M_i) within the
#  left hand group, where M are the martingale residuals from a NULL model.
# The scaled statistic is approximately (O-E)^2/E
temp2 <- function(y, wt, x, parms, continuous) {
    # Compute the martingale residuals
    n <- nrow(y)
    ord <- order(y[,1], -y[,2])
    temp <- .C("coxmart", as.integer(n),
		          as.integer(0),
		          as.double(y[ord,1]),
		          as.integer(y[ord,2]),
		          as.integer(rep(0,n)),
		          as.double( rep(1.0,n)),
		          as.double(wt[ord]),
		          resid=double(n))$resid
    mresid <- double(n)
    mresid[ord] <- temp
    status <- y[,2]

    # The logrank statistic is approx a scaled sum of the residuals
    if (continuous) {
	lr <- cumsum((mresid*wt)[-n])
	expect <- cumsum((status-mresid)*wt)[-n]
	denom <- 1/expect + 1/(sum(status*wt) - expect)
	list(goodness= sqrt(denom * lr^2), direction=sign(lr))
	}
    else {
	# Categorical X variable
	ux <- sort(unique(x))
	sum1 <- tapply(y[,1]*wt, x, sum)
	sum2 <- tapply(y[2,]*wt, x, sum)
	sum3 <- tapply(mresid*wt,x, sum)
	rates <- sum2/sum1

	# We can order the categories by their rates
	#  then use the same code as for a non-categorical
	ord <- order(rates)
	n <- length(ord)
	lr <- cumsum((sum3[ord])[-n])
	expect <- cumsum(sum2-sum3)[-n]
	denom <- 1/expect + 1/(sum(status*wt) - expect)
	list(goodness= sqrt(denom * lr^2), direction = ux[ord])
	}
    }
	
#Initialize: rescale the data so that the event rate for the top node is =1
temp3 <- function(y, offset, parms, wt) {
    if (!is.null(offset)) stop ("Offset not allowed for logrank")
    if (!is.Surv(y)) stop("Y must be a survival object")
    ytime <- y[,1]
    ystat <- y[,2]
    scale <- sum(ystat)/sum(ytime)
    list(y=cbind(ytime*scale, ystat), parms=0, numresp=2, numy=2,
	      summary= function(yval, dev, wt, ylevel, digits ) {
		  paste("  rate=", formatg(yval[,1], digits),
			" (", formatg(yval[,2], digits), " events)",
			", MSE=" , formatg(dev/wt, digits),
			sep='')
	     },	
	 text= function(yval, dev, wt, ylevel, digits, n, use.n ) {
	     if(use.n) paste(formatg(yval,digits),"\nn=", n, sep="")
	     else      paste(formatg(yval,digits))}
    )}


lrlist <- list(eval=temp1, split=temp2, init=temp3)

fit1 <- rpart(Surv(time, status) ~ age + sex + ph.ecog + ph.karno + 
	           pat.karno + meal.cal + wt.loss,
	      lung, control=rpart.control(minsplit=10, xval=0, cp=.011),
	     method=lrlist)

fit2 <- rpart(Surv(time, status) ~ age + sex + ph.ecog + ph.karno + 
	           pat.karno + meal.cal + wt.loss,
	      lung, control=rpart.control(minsplit=10, xval=0, cp=.012))

# For the top splits, the two methods should agree quite closely based on
#  the theory of LeBlanc and Crowley's paper.  They even agree fairly well
#  further down, at which time the log-rank test has implicitly rescaled the
#  data.

fit3 <- rpart(Surv(time, status) ~ age+ sex + ph.ecog + ph.karno + 
	           pat.karno + meal.cal + wt.loss, cost=1 + 1:7/10,
	      lung, control=rpart.control(minsplit=10, xval=0, cp=.012))

print(fit1)
print(fit2)
print(fit3)
