bsml {unknown}R Documentation

Basis Selection from Multiple Libraries

Description

This is the implementation of the namesake methodology. You can supply as many basis libraries as you want that can potentially fit the data well. The program will adaptively select basis functions from each library and find the optimal model complexity by using the Generalized Degree of Freedom.

Usage

bsml(y, baseslist, method = "bsmlc", sub.maxbas = c(), maxbas = 30, crit = "GCV", idf = 1.2, backward = F, elim.crit = "AIC", ridge = F, approach = "gdf", gdf.control = list(nrep = 100, tauhat = "Rice"), gcvpk.control = list(delta = 0.1, job = 1005, lamlim = c(-6, 2), tau = 10, ntbl = 100, maxtbl = 100))

Arguments

y Responses.
baseslist List of libraries.
method Three methods available: HAS, BSML-C and BSML-S.
sub.maxbas Maximum number of bases to be pre-selected for each library.
maxbas Maximum number of bases to be selected.
crit Basis selection criterion for HAS. Available options are "AIC", "BIC", "Cp" and "GCV".
idf IDF value for HAS. Default is 1.2. If value NA is supplied, it will be estimated.
backward Backward elimination enabler. True or False. If enabled, a further backward elimination step will be carried out using the criterion specified in "elim.crit".
elim.crit Backward elimination criterion. Available options are "AIC", "BIC", "Cp" and "GCV".
ridge Use ridge regression or not. HAS only.
approach GDF approach or Covariance Penalty approach. For BSML-C only.
gdf.control Controlling parameters for GDF estimation.
gcvpk.control Controlling parameters for HAS.

Value

coefficients Coefficients
residuals Residuals
fitted.values Fitted Values
chosen.bases.full Full list of indices of chosen bases, excluding null bases.
chosen.bases.trim List of indices of chosen bases in the final model, excluding null bases.
chosen.bases.matrix Matrix of chosen bases in final model, excluding null bases.
score Scores used to determine optimal model complexity. The first score is for the null model.
idf Inflated Degree of Freedom for each chosen basis
gdf Cumulated Generalized Degree of Freedom for the corresponding number of chosen bases in the model.
nb Total number of bases in the model, including both null and chosen bases.
index.bas HAS only. Indices of chosen bases in one-dimensional designations. For internal use.

Author(s)

Junqing Wu, Jeff Sklar, Wendy Meiring, Yuedong Wang

Examples

##############################
# Heavisine Function Example #
##############################
n=128
x <- seq(0,1,length=n)
pt <- (rep(1,n)%o%x)[,-c(n)]
y <- 2.2*(4*sin(4*pi*x)-sign(x-.3)-sign(.72-x))
set.seed(123)
sigma=3
y.e <- y + sigma*rnorm(n)
null.bas.per <- cbind(rep(1,length=n))
lib.tp0  <- 1*cbind((x-pt)>0)
baseslist=list(null.bas.per,lib.tp0)
has.obj=bsml(y.e,baseslist,method="has")
bsmlc.obj=bsml(y.e,baseslist,method="bsmlc")
bsmls.obj=bsml(y.e,baseslist,method="bsmls")
plot(x,y.e)
lines(x,has.obj$fit,col="red")
lines(x,bsmlc.obj$fit,col="green")
lines(x,bsmls.obj$fit,col="blue")

#######################
# Geyser Data Example #
#######################
library(assist)
library(MASS)
data(geyser)
n <- nrow(geyser)
y <- geyser[,1]
x <- (geyser[,2]-min(geyser[,2]))/(max(geyser[,2])-min(geyser[,2]))/1.1
bas.nul <- as.matrix(cbind(1,x-.5))
bas.cub <- as.matrix(cubic(x,unique(x)))
# remove the first few and the last few points
loc <- seq(.2,.8,len=10)
pt <- rep(1,n)%o%loc
bas.con <- as.matrix((x-pt)*(x-pt>0))
baslist <- list(bas.nul,bas.cub,bas.con) 

# HAS
baslist.has <- list(bas.nul,bas.cub)
has.obj <- bsml(y, baslist.has, maxbas=50, method="has",gcvpk.control=list(delta=.001,job=1001,lamlim=c(-6,2),tau=10,ntbl=100,maxtbl=100))
# BSML-C
bsmlc.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmlc")
# BSML-S
bsmls.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmls")

#postscript(file="geyser.ps",height=4,width=6.5,horizontal=F,pointsize=9)
par(mfrow=c(1,1), mgp=c(2,1,0), mar=c(3,3,3,1)+0.1)
plot(geyser[,2], y, xlab="duration (min)", ylab="waiting time (min)",
     xlim=c(0.8,5.5), type="n", axes=F)
box()
axis(2)
axis(1, at=c(1,2,3,4,5), label=as.character(c(1,2,3,4,5)), cex=.5)
points(geyser[,2], y, cex=.4)
f <- ssr(y~x, cubic(x))
xx <- x*(max(geyser[,2])-min(geyser[,2]))+min(geyser[,2])
ord <- order(xx)
xx <- xx[ord]
lines(xx,f$fit[ord],lty=2)
lines(xx,bsmls.obj$fit[ord],lty=1,col="blue")
lines(xx,bsmlc.obj$fit[ord],lty=3,col="green")
lines(xx,has.obj$fit[ord],lty=4,col="red")
legend(1,65,lty=c(2,1,3,4),legend=c("spline","BSML-S","BSML-C","HAS"),cex=.8)
xlab <- seq(0,1,by=.2)
xxlab <- xlab*(max(geyser[,2])-min(geyser[,2]))+min(geyser[,2])
axis(3, at=xxlab, label=c("0.0","0.2","0.4","0.6","0.8","1.0"),cex=0.5)
#dev.off()

###########################
# Motorcycle Data Example #
###########################
library(assist)
library(MASS)
data(mcycle)
n <- nrow(mcycle)
y <- mcycle[,2]
x <- (mcycle[,1]-min(mcycle[,1]))/(max(mcycle[,1])-min(mcycle[,1]))/1.1
bas.nul <- as.matrix(cbind(1,x-.5))
bas.cub <- as.matrix(cubic(x,unique(x)))
# remove the first few and the last few points
pt <- (rep(1,n)%o%unique(x))[,-c(1:19,86:length(unique(x)))]
bas.con <- as.matrix((x-pt)*(x-pt>0))
baslist <- list(bas.nul,bas.cub,bas.con) 

# HAS
baslist.has <- list(bas.nul,bas.cub)
has.obj <- bsml(y, baslist.has, maxbas=50, method="has",gcvpk.control=list(delta=.001,job=1001,lamlim=c(-6,2),tau=10,ntbl=100,maxtbl=100))
# BSML-C
bsmlc.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmlc")
# BSML-S
bsmls.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmls")
#postscript(file="mcycle.ps",height=4,width=6.5,horizontal=F,pointsize=9)
par(mfrow=c(1,1), mgp=c(2,1,0), mar=c(3,3,3,1)+0.1)
plot(mcycle[,1], y, xlab="time (ms)", ylab="acceleration (g)", xlim=c(0,60), type="n", axes=F)
box()
axis(2)
axis(1, at=c(0,10,20,30,40,50,60), label=as.character(c(0,10,20,30,40,50,60)), cex=.5)
points(mcycle[,1], y, cex=.4)
f <- ssr(y~x, cubic(x))
xx <- x*(max(mcycle[,1])-min(mcycle[,1]))+min(mcycle[,1])
lines(xx,f$fit,lty=2)
lines(xx,bsmls.obj$fit,lty=1,col="blue")
lines(xx,bsmlc.obj$fit,lty=3,col="green")
lines(xx,has.obj$fit,lty=4,col="red")
legend(40,-80,lty=c(2,1,3,4),legend=c("spline","BSML-S","BSML-C","HAS"),cex=.8)
xlab <- seq(0,1,by=.2)
xxlab <- xlab*(max(mcycle[,1])-min(mcycle[,1]))+min(mcycle[,1])
axis(3, at=xxlab, label=c("0.0","0.2","0.4","0.6","0.8","1.0"),cex=0.5)
#dev.off()

######################
# Ozone Data Example #
######################
library(assist)
library(mda)
data(Arosa)
attach(Arosa)
csmonth <- (month-0.5)/12
csyear <- (year-1)/45
x <- (month+12*(year-1)-.5)/(46*12)
grid <- seq(0,1,len=100)
bas.nul <- as.matrix(cbind(1,x-.5))
bas.per <- as.matrix(periodic(csmonth,unique(csmonth)))
bas.cub <- as.matrix(cubic(x,unique(x)))
baslist <- list(bas.nul,bas.per,bas.cub) 

#### fit with one variable as long time series
# fit SS ANOVA
ozone.ssanova.fit1 <- ssr(thick~x, rk=list(periodic(csmonth),cubic(x)))
ssanovafit <- ozone.ssanova.fit1$fit
m1 <- sapply(split(thick,month),mean)
m1 <- m1-mean(m1)
m2 <- sapply(split(thick,year),mean)
m2 <- m2-mean(m2)
grid1 <- data.frame(csmonth=grid, x=.5)
grid2 <- data.frame(csmonth=.5, x=grid)
p.ozone.ssanova11 <- predict(ozone.ssanova.fit1,grid1,terms=c(0,0,1,0))
p.ozone.ssanova12 <- predict(ozone.ssanova.fit1,grid2,terms=c(0,1,0,1))
ssanovafitmonth <- p.ozone.ssanova11$fit <- p.ozone.ssanova11$fit-mean(p.ozone.ssanova11$fit)
ssanovafityear <- p.ozone.ssanova12$fit <- p.ozone.ssanova12$fit-mean(p.ozone.ssanova12$fit)

# fit HAS
ozone.bsmlfit1 <- bsml(thick,baslist,method="has")
hasfit <- ozone.bsmlfit1$fit
finsel <- ozone.bsmlfit1$chosen.bases.matrix
finselper <- ozone.bsmlfit1$chosen.bases.trim[,1]==2
finselcub <- ozone.bsmlfit1$chosen.bases.trim[,1]==3
finbasper <- ozone.bsmlfit1$chosen.bases.matrix[,finselper]
finbascub <- ozone.bsmlfit1$chosen.bases.matrix[,finselcub]
finco <- ozone.bsmlfit1$coef[!is.na(ozone.bsmlfit1$coef)][-c(1:ncol(bas.nul))]
fincoper <- finco[finselper]
fincocub <- finco[finselcub]
hasfityear <- ozone.bsmlfit1$coef[2]*grid+cubic(grid,unique(x)[ozone.bsmlfit1$chosen.bases.trim[,2][ozone.bsmlfit1$chosen.bases.trim[,1]==3]])%*%fincocub
hasfitmonth <- periodic(grid,unique(csmonth)[ozone.bsmlfit1$chosen.bases.trim[,2][ozone.bsmlfit1$chosen.bases.trim[,1]==2]])%*%fincoper
hasfityear <- hasfityear-mean(hasfityear)
hasfitmonth <- hasfitmonth-mean(hasfitmonth)

# fit BSML-C
ozone.bsmlfit2 <- bsml(thick,baslist,method="bsmlc")
bsmlcfit <- ozone.bsmlfit2$fit
finsel <- ozone.bsmlfit2$chosen.bases.matrix
finselper <- ozone.bsmlfit2$chosen.bases.trim[,1]==2
finselcub <- ozone.bsmlfit2$chosen.bases.trim[,1]==3
finbasper <- ozone.bsmlfit2$chosen.bases.matrix[,finselper]
finbascub <- ozone.bsmlfit2$chosen.bases.matrix[,finselcub]
finco <- ozone.bsmlfit2$coef[!is.na(ozone.bsmlfit2$coef)][-c(1:ncol(bas.nul))]
fincoper <- finco[finselper]
fincocub <- finco[finselcub]
bsmlcfityear <- ozone.bsmlfit2$coef[2]*grid+cubic(grid,unique(x)[ozone.bsmlfit2$chosen.bases.trim[,2][ozone.bsmlfit2$chosen.bases.trim[,1]==3]])%*%fincocub
bsmlcfitmonth <- periodic(grid,unique(csmonth)[ozone.bsmlfit2$chosen.bases.trim[,2][ozone.bsmlfit2$chosen.bases.trim[,1]==2]])%*%fincoper
bsmlcfityear <- bsmlcfityear-mean(bsmlcfityear)
bsmlcfitmonth <- bsmlcfitmonth-mean(bsmlcfitmonth)

# fit BSML-S
ozone.bsmlfit3 <- bsml(thick,baslist,method="bsmls")
bsmlsfit <-ozone.bsmlfit3$fit
finsel <- ozone.bsmlfit3$chosen.bases.matrix
finselper <- ozone.bsmlfit3$chosen.bases.trim[,1]==2
finselcub <- ozone.bsmlfit3$chosen.bases.trim[,1]==3
finbasper <- ozone.bsmlfit3$chosen.bases.matrix[,finselper]
finbascub <- ozone.bsmlfit3$chosen.bases.matrix[,finselcub]
finco <- ozone.bsmlfit3$coef[!is.na(ozone.bsmlfit3$coef)][-c(1:ncol(bas.nul))]
fincoper <- finco[finselper]
fincocub <- finco[finselcub]
bsmlsfityear <- ozone.bsmlfit3$coef[2]*grid
bsmlsfitmonth <- periodic(grid,unique(csmonth)[ozone.bsmlfit3$chosen.bases.trim[,2][ozone.bsmlfit3$chosen.bases.trim[,1]==2]])%*%fincoper
bsmlsfityear <- bsmlsfityear-mean(bsmlsfityear)
bsmlsfitmonth <- bsmlsfitmonth-mean(bsmlsfitmonth)

#pdf(file="ozone1.pdf",height=3,width=6.5,horizontal=F,pointsize=8)
#postscript(file="ozone1.ps",height=3,width=6.5,horizontal=F,pointsize=8)
plot(x,thick,xlab="time",ylab="thickness",pch=".",axes=F,type="l")
box()
axis(1,at=unique(csyear)[c(1,9,18,27,36,45)],label=c("1926","1935","1944","1953","1962","1971"),cex=0.5)
axis(2,cex=.5)
axis(3,at=unique(csyear)[c(1,9,18,27,36,45)],label=c("0.0","0.2","0.4","0.6","0.8","1.0"),cex=0.5)
lines(x,ssanovafit)
lines(x, hasfit,col="red")
lines(x, bsmlcfit,col="green")
lines(x, bsmlsfit,col="blue")
mtext("Observations and overall fit",cex=1.2)
#dev.off()

#pdf(file="ozone2.pdf",height=3,width=6.5,horizontal=F,pointsize=8)
#postscript(file="ozone2.ps",height=3,width=6.5,horizontal=F,pointsize=8)
par(mfrow=c(1,2), mgp=c(2,1,0), mar=c(3,3,3,1)+0.1)
plot(unique(csmonth),m1,xlim=c(0,1),ylim=c(-55,50),xlab="month",ylab="thickness",axes=F,type="n")
points(unique(csmonth),m1,pch="o",cex=0.6)
box()
axis(2,cex=.5)
axis(1,at=unique(csmonth), label=c("J","F","M","A","M","J","J","A","S","O","N","D"),cex=0.2)
axis(3,at=unique(csmonth), label=as.character(1:12),cex=0.5)

lines(grid, ssanovafitmonth, lty=2)
lines(grid, hasfitmonth, lty=4,col="red")
lines(grid, bsmlcfitmonth, lty=3,col="green")
lines(grid, bsmlsfitmonth, lty=1,col="blue")
legend(.1,44,lty=c(2,1,3,4),legend=c("spline","BSML-S","BSML-C","HAS"),cex=.8)
mtext("Main effect of month",cex=1.2)

plot(unique(csyear),m2,xlim=c(0,1),ylim=c(-55,50),xlab="time",ylab="thickness",axes=F,type="n")
points(unique(csyear),m2,pch="o",cex=0.6)
box()
axis(2,cex=.5)
axis(1,at=unique(csyear)[c(1,9,18,27,36,45)],label=c("1926","1935","1944","1953","1962","1971"),cex=0.5)
axis(3,at=unique(csyear)[c(1,9,18,27,36,45)],label=c("0.0","0.2","0.4","0.6","0.8","1.0"),cex=0.5)
lines(grid, ssanovafityear, lty=2)
lines(grid, hasfityear, lty=4,col="red")
lines(grid, bsmlcfityear, lty=3,col="green")
lines(grid, bsmlsfityear, lty=1,col="blue")
mtext("Main effect of year",cex=1.2)
#dev.off()

[Package Index]