bsml {bsml}R Documentation

Basis Selection from Multiple Libraries Ver. 1.0-5

Description

This is the implementation of the namesake methodology. Multiple libraries may be used to approximate the mean function. The program will adaptively select basis functions from each library and find the optimal model complexity by using the Generalized Degree of Freedom.

Usage

bsml(y,baseslist,method="bsmlc",maxbas=30,sub.maxbas=c(),backward=NULL,
                 has.control=list(crit="GCV",idf=1.2,ridge=F),
                 bsmlc.approach="gdf",
                 gdf.control=list(nrep=100,tauhat="Rice"),
                 gcvpk.control=list(delta=.1,job=1005,lamlim=c(-6,2),tau=10,ntbl=100,maxtbl=100))

Arguments

y Responses.
baseslist List of libraries.
method Three methods available: HAS, BSML-C and BSML-S. Use "has", "bsmlc" and "bsmls" to specify.
maxbas Maximum number of bases to be selected.
sub.maxbas Maximum number of bases to be pre-selected for each library.
backward Backward elimination criterion. Available options are "AIC", "BIC", "Cp" and "GCV". Default is NULL, when no backward elimination is carried out.
has.control Includes three controlling arguments: "crit" for basis selection criterion for HAS; Available options are "AIC", "BIC", "Cp" and "GCV". "idf" for IDF value for HAS; Default is 1.2; If value NA is supplied, it will be estimated. "ridge" for use ridge regression or not. TRUE or FALSE.
bsmlc.approach GDF approach or Covariance Penalty approach. For BSML-C only.
gdf.control Controlling parameters for GDF estimation.
gcvpk.control Controlling parameters for HAS.

Value

coefficients Coefficients
residuals Residuals
fitted.values Fitted Values
chosen.bases.full Full list of indices of chosen bases, excluding null bases.
chosen.bases.trim List of indices of chosen bases in the final model, excluding null bases.
chosen.bases.matrix Matrix of chosen bases in final model, excluding null bases.
score Scores used to determine optimal model complexity. The first score is for the null model.
idf Inflated Degree of Freedom for each chosen basis
gdf Cumulated Generalized Degree of Freedom for the corresponding number of chosen bases in the model.
nb Total number of bases in the model, including both null and chosen bases.
sigma_sq Estimated variance.
dof Degree of Freedom for the model.
index.bas HAS only. Indices of chosen bases in one-dimensional designations. For internal use.

Author(s)

Junqing Wu, Jeff Sklar, Wendy Meiring, Yuedong Wang

Examples

##############################
# Heavisine Function Example #
##############################
# Install packages nlme and assist first
library(assist)
n=128
x=seq(0,1,length=n)
pt=(rep(1,n)%o%x)[,-c(n)]
f=2.2*(4*sin(4*pi*x)-sign(x-.3)-sign(.72-x))
set.seed(123)
sigma=3
y=f+sigma*rnorm(n)
lib1=stdz(cbind(rep(1,length=n)))
lib2=stdz(1*cbind((x-pt)>0))
lib3=stdz(periodic(x)[,-n])
baseslist=list(lib1,lib2,lib3)
has.obj=bsml(y,baseslist,method="has")
bsmlc.obj=bsml(y,baseslist,method="bsmlc")
bsmls.obj=bsml(y,baseslist,method="bsmls")
plot(x,y)
lines(x,has.obj$fit,col="red")
lines(x,bsmlc.obj$fit,col="green")
lines(x,bsmls.obj$fit,col="blue")

#######################
# Geyser Data Example #
#######################
# Install packages assist, nlme and MASS first
library(assist)
library(MASS)
data(geyser)
n <- nrow(geyser)
y <- geyser[,1]
x <- (geyser[,2]-min(geyser[,2]))/(max(geyser[,2])-min(geyser[,2]))/1.1
bas.nul <- stdz(cbind(1,x-.5))
bas.cub <- stdz(cubic(x,unique(x)))
# remove the first few and the last few points
loc <- seq(.2,.8,len=10)
pt <- rep(1,n)%o%loc
bas.con <- stdz((x-pt)*(x-pt>0))
baslist <- list(bas.nul,bas.cub,bas.con) 

# HAS
baslist.has <- list(bas.nul,bas.cub)
has.obj <- bsml(y, baslist.has, maxbas=50, method="has",gcvpk.control=list(delta=.001,job=1001,lamlim=c(-6,2),tau=10,ntbl=100,maxtbl=100))
# BSML-C
bsmlc.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmlc")
# BSML-S
bsmls.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmls")

plot(geyser[,2], y, xlab="duration (min)", ylab="waiting time (min)", xlim=c(0.8,5.5), type="n")
points(geyser[,2], y, cex=.4)
f <- ssr(y~x, cubic(x))
xx <- x*(max(geyser[,2])-min(geyser[,2]))+min(geyser[,2])
ord <- order(xx)
xx <- xx[ord]
lines(xx,f$fit[ord],lty=2)
lines(xx,bsmls.obj$fit[ord],lty=1,col="blue")
lines(xx,bsmlc.obj$fit[ord],lty=3,col="green")
lines(xx,has.obj$fit[ord],lty=4,col="red")
legend(1,65,lty=c(2,1,3,4),legend=c("spline","BSML-S","BSML-C","HAS"),cex=.8,col=c("black","blue","green","red"))

###########################
# Motorcycle Data Example #
###########################
# Install packages assist, MASS and nlme first
library(assist)
library(MASS)
data(mcycle)
n <- nrow(mcycle)
y <- mcycle[,2]
x <- (mcycle[,1]-min(mcycle[,1]))/(max(mcycle[,1])-min(mcycle[,1]))/1.1
bas.nul <- stdz(cbind(1,x-.5))
bas.cub <- stdz(cubic(x,unique(x)))
# remove the first few and the last few points
pt <- (rep(1,n)%o%unique(x))[,-c(1:19,86:length(unique(x)))]
bas.con <- stdz((x-pt)*(x-pt>0))
baslist <- list(bas.nul,bas.cub,bas.con) 

# HAS
baslist.has <- list(bas.nul,bas.cub)
has.obj <- bsml(y, baslist.has, maxbas=50, method="has",gcvpk.control=list(delta=.001,job=1001,lamlim=c(-6,2),tau=10,ntbl=100,maxtbl=100))
# BSML-C
bsmlc.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmlc")
# BSML-S
bsmls.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmls")

plot(mcycle[,1], y, xlab="time (ms)", ylab="acceleration (g)", xlim=c(0,60), type="n")
points(mcycle[,1], y, cex=.4)
f <- ssr(y~x, cubic(x))
xx <- x*(max(mcycle[,1])-min(mcycle[,1]))+min(mcycle[,1])
lines(xx,f$fit,lty=2)
lines(xx,bsmls.obj$fit,lty=1,col="blue")
lines(xx,bsmlc.obj$fit,lty=3,col="green")
lines(xx,has.obj$fit,lty=4,col="red")
legend(40,-80,lty=c(2,1,3,4),legend=c("spline","BSML-S","BSML-C","HAS"),cex=.8,col=c("black","blue","green","red"))

######################
# Ozone Data Example #
######################
# Install packages assist, nlme and mda first
library(assist)
library(mda)
data(Arosa)
attach(Arosa)
csmonth <- (month-0.5)/12
csyear <- (year-1)/45
x <- (month+12*(year-1)-.5)/(46*12)
grid <- seq(0,1,len=100)
bas.nul <- cbind(1,x-.5)
bas.per <- periodic(csmonth,unique(csmonth))
bas.cub <- cubic(x,unique(x))
baslist <- list(bas.nul,bas.per,bas.cub) 

#### fit with one variable as long time series
# fit SS ANOVA
ozone.ssanova.fit1 <- ssr(thick~x, rk=list(periodic(csmonth),cubic(x)))
ssanovafit <- ozone.ssanova.fit1$fit
m1 <- sapply(split(thick,month),mean)
m1 <- m1-mean(m1)
m2 <- sapply(split(thick,year),mean)
m2 <- m2-mean(m2)
grid1 <- data.frame(csmonth=grid, x=.5)
grid2 <- data.frame(csmonth=.5, x=grid)
p.ozone.ssanova11 <- predict(ozone.ssanova.fit1,grid1,terms=c(0,0,1,0))
p.ozone.ssanova12 <- predict(ozone.ssanova.fit1,grid2,terms=c(0,1,0,1))
ssanovafitmonth <- p.ozone.ssanova11$fit <- p.ozone.ssanova11$fit-mean(p.ozone.ssanova11$fit)
ssanovafityear <- p.ozone.ssanova12$fit <- p.ozone.ssanova12$fit-mean(p.ozone.ssanova12$fit)

# fit HAS
ozone.bsmlfit1 <- bsml(thick,baslist,method="has")
hasfit <- ozone.bsmlfit1$fit
finsel <- ozone.bsmlfit1$chosen.bases.matrix
finselper <- ozone.bsmlfit1$chosen.bases.trim[,1]==2
finselcub <- ozone.bsmlfit1$chosen.bases.trim[,1]==3
finbasper <- ozone.bsmlfit1$chosen.bases.matrix[,finselper]
finbascub <- ozone.bsmlfit1$chosen.bases.matrix[,finselcub]
finco <- ozone.bsmlfit1$coef[!is.na(ozone.bsmlfit1$coef)][-c(1:ncol(bas.nul))]
fincoper <- finco[finselper]
fincocub <- finco[finselcub]
hasfityear <- ozone.bsmlfit1$coef[2]*grid+cubic(grid,unique(x)[ozone.bsmlfit1$chosen.bases.trim[,2][ozone.bsmlfit1$chosen.bases.trim[,1]==3]])%*%fincocub
hasfitmonth <- periodic(grid,unique(csmonth)[ozone.bsmlfit1$chosen.bases.trim[,2][ozone.bsmlfit1$chosen.bases.trim[,1]==2]])%*%fincoper
hasfityear <- hasfityear-mean(hasfityear)
hasfitmonth <- hasfitmonth-mean(hasfitmonth)

# fit BSML-C
ozone.bsmlfit2 <- bsml(thick,baslist,method="bsmlc")
bsmlcfit <- ozone.bsmlfit2$fit
finsel <- ozone.bsmlfit2$chosen.bases.matrix
finselper <- ozone.bsmlfit2$chosen.bases.trim[,1]==2
finselcub <- ozone.bsmlfit2$chosen.bases.trim[,1]==3
finbasper <- ozone.bsmlfit2$chosen.bases.matrix[,finselper]
finbascub <- ozone.bsmlfit2$chosen.bases.matrix[,finselcub]
finco <- ozone.bsmlfit2$coef[!is.na(ozone.bsmlfit2$coef)][-c(1:ncol(bas.nul))]
fincoper <- finco[finselper]
fincocub <- finco[finselcub]
bsmlcfityear <- ozone.bsmlfit2$coef[2]*grid+cubic(grid,unique(x)[ozone.bsmlfit2$chosen.bases.trim[,2][ozone.bsmlfit2$chosen.bases.trim[,1]==3]])%*%fincocub
bsmlcfitmonth <- periodic(grid,unique(csmonth)[ozone.bsmlfit2$chosen.bases.trim[,2][ozone.bsmlfit2$chosen.bases.trim[,1]==2]])%*%fincoper
bsmlcfityear <- bsmlcfityear-mean(bsmlcfityear)
bsmlcfitmonth <- bsmlcfitmonth-mean(bsmlcfitmonth)

# fit BSML-S
ozone.bsmlfit3 <- bsml(thick,baslist,method="bsmls")
bsmlsfit <-ozone.bsmlfit3$fit
finsel <- ozone.bsmlfit3$chosen.bases.matrix
finselper <- ozone.bsmlfit3$chosen.bases.trim[,1]==2
finselcub <- ozone.bsmlfit3$chosen.bases.trim[,1]==3
finbasper <- ozone.bsmlfit3$chosen.bases.matrix[,finselper]
finbascub <- ozone.bsmlfit3$chosen.bases.matrix[,finselcub]
finco <- ozone.bsmlfit3$coef[!is.na(ozone.bsmlfit3$coef)][-c(1:ncol(bas.nul))]
fincoper <- finco[finselper]
fincocub <- finco[finselcub]
bsmlsfityear <- ozone.bsmlfit3$coef[2]*grid
bsmlsfitmonth <- periodic(grid,unique(csmonth)[ozone.bsmlfit3$chosen.bases.trim[,2][ozone.bsmlfit3$chosen.bases.trim[,1]==2]])%*%fincoper
bsmlsfityear <- bsmlsfityear-mean(bsmlsfityear)
bsmlsfitmonth <- bsmlsfitmonth-mean(bsmlsfitmonth)

# Overall fit
plot(x,thick,xlab="time",ylab="thickness",pch=".",type="l")
lines(x,ssanovafit)
lines(x, hasfit,col="red")
lines(x, bsmlcfit,col="green")
lines(x, bsmlsfit,col="blue")
mtext("Observations and overall fit",cex=1.2)

# Fits of components
par(mfrow=c(1,2), mgp=c(2,1,0), mar=c(3,3,3,1)+0.1)
plot(unique(csmonth),m1,xlim=c(0,1),ylim=c(-55,50),xlab="month",ylab="thickness",type="n")
points(unique(csmonth),m1,pch="o",cex=0.6)
lines(grid, ssanovafitmonth, lty=2)
lines(grid, hasfitmonth, lty=4,col="red")
lines(grid, bsmlcfitmonth, lty=3,col="green")
lines(grid, bsmlsfitmonth, lty=1,col="blue")
legend(.1,44,lty=c(2,1,3,4),legend=c("spline","BSML-S","BSML-C","HAS"),cex=.8,col=c("black","blue","green","red"))
mtext("Main effect of month",cex=1.2)

plot(unique(csyear),m2,xlim=c(0,1),ylim=c(-55,50),xlab="year",ylab="thickness",type="n")
points(unique(csyear),m2,pch="o",cex=0.6)
lines(grid, ssanovafityear, lty=2)
lines(grid, hasfityear, lty=4,col="red")
lines(grid, bsmlcfityear, lty=3,col="green")
lines(grid, bsmlsfityear, lty=1,col="blue")
mtext("Main effect of year",cex=1.2)

[Package Index]