| bsml {unknown} | R Documentation |
Basis Selection from Multiple Libraries
Description
This is the implementation of the namesake methodology. You can supply as many basis libraries as you want that can potentially fit the data well. The program will adaptively select basis functions from each library and find the optimal model complexity by using the Generalized Degree of Freedom.
Usage
bsml(y, baseslist, method = "bsmlc", sub.maxbas = c(), maxbas = 30, crit = "GCV", idf = 1.2, backward = F, elim.crit = "AIC", ridge = F, approach = "gdf", gdf.control = list(nrep = 100, tauhat = "Rice"), gcvpk.control = list(delta = 0.1, job = 1005, lamlim = c(-6, 2), tau = 10, ntbl = 100, maxtbl = 100))
Arguments
y |
Responses. |
baseslist |
List of libraries. |
method |
Three methods available: HAS, BSML-C and BSML-S. |
sub.maxbas |
Maximum number of bases to be pre-selected for each library. |
maxbas |
Maximum number of bases to be selected. |
crit |
Basis selection criterion for HAS. Available options are "AIC", "BIC", "Cp" and "GCV". |
idf |
IDF value for HAS. Default is 1.2. If value NA is supplied, it will be estimated. |
backward |
Backward elimination enabler. True or False. If enabled, a further backward elimination step will be carried out using the criterion specified in "elim.crit". |
elim.crit |
Backward elimination criterion. Available options are "AIC", "BIC", "Cp" and "GCV". |
ridge |
Use ridge regression or not. HAS only. |
approach |
GDF approach or Covariance Penalty approach. For BSML-C only. |
gdf.control |
Controlling parameters for GDF estimation. |
gcvpk.control |
Controlling parameters for HAS. |
Value
coefficients |
Coefficients |
residuals |
Residuals |
fitted.values |
Fitted Values |
chosen.bases.full |
Full list of indices of chosen bases, excluding null bases. |
chosen.bases.trim |
List of indices of chosen bases in the final model, excluding null bases. |
chosen.bases.matrix |
Matrix of chosen bases in final model, excluding null bases. |
score |
Scores used to determine optimal model complexity. The first score is for the null model. |
idf |
Inflated Degree of Freedom for each chosen basis |
gdf |
Cumulated Generalized Degree of Freedom for the corresponding number of chosen bases in the model. |
nb |
Total number of bases in the model, including both null and chosen bases. |
index.bas |
HAS only. Indices of chosen bases in one-dimensional designations. For internal use. |
Author(s)
Junqing Wu, Jeff Sklar, Wendy Meiring, Yuedong Wang
Examples
##############################
# Heavisine Function Example #
##############################
n=128
x <- seq(0,1,length=n)
pt <- (rep(1,n)%o%x)[,-c(n)]
y <- 2.2*(4*sin(4*pi*x)-sign(x-.3)-sign(.72-x))
set.seed(123)
sigma=3
y.e <- y + sigma*rnorm(n)
null.bas.per <- cbind(rep(1,length=n))
lib.tp0 <- 1*cbind((x-pt)>0)
baseslist=list(null.bas.per,lib.tp0)
has.obj=bsml(y.e,baseslist,method="has")
bsmlc.obj=bsml(y.e,baseslist,method="bsmlc")
bsmls.obj=bsml(y.e,baseslist,method="bsmls")
plot(x,y.e)
lines(x,has.obj$fit,col="red")
lines(x,bsmlc.obj$fit,col="green")
lines(x,bsmls.obj$fit,col="blue")
#######################
# Geyser Data Example #
#######################
library(assist)
library(MASS)
data(geyser)
n <- nrow(geyser)
y <- geyser[,1]
x <- (geyser[,2]-min(geyser[,2]))/(max(geyser[,2])-min(geyser[,2]))/1.1
bas.nul <- as.matrix(cbind(1,x-.5))
bas.cub <- as.matrix(cubic(x,unique(x)))
# remove the first few and the last few points
loc <- seq(.2,.8,len=10)
pt <- rep(1,n)%o%loc
bas.con <- as.matrix((x-pt)*(x-pt>0))
baslist <- list(bas.nul,bas.cub,bas.con)
# HAS
baslist.has <- list(bas.nul,bas.cub)
has.obj <- bsml(y, baslist.has, maxbas=50, method="has",gcvpk.control=list(delta=.001,job=1001,lamlim=c(-6,2),tau=10,ntbl=100,maxtbl=100))
# BSML-C
bsmlc.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmlc")
# BSML-S
bsmls.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmls")
#postscript(file="geyser.ps",height=4,width=6.5,horizontal=F,pointsize=9)
par(mfrow=c(1,1), mgp=c(2,1,0), mar=c(3,3,3,1)+0.1)
plot(geyser[,2], y, xlab="duration (min)", ylab="waiting time (min)",
xlim=c(0.8,5.5), type="n", axes=F)
box()
axis(2)
axis(1, at=c(1,2,3,4,5), label=as.character(c(1,2,3,4,5)), cex=.5)
points(geyser[,2], y, cex=.4)
f <- ssr(y~x, cubic(x))
xx <- x*(max(geyser[,2])-min(geyser[,2]))+min(geyser[,2])
ord <- order(xx)
xx <- xx[ord]
lines(xx,f$fit[ord],lty=2)
lines(xx,bsmls.obj$fit[ord],lty=1,col="blue")
lines(xx,bsmlc.obj$fit[ord],lty=3,col="green")
lines(xx,has.obj$fit[ord],lty=4,col="red")
legend(1,65,lty=c(2,1,3,4),legend=c("spline","BSML-S","BSML-C","HAS"),cex=.8)
xlab <- seq(0,1,by=.2)
xxlab <- xlab*(max(geyser[,2])-min(geyser[,2]))+min(geyser[,2])
axis(3, at=xxlab, label=c("0.0","0.2","0.4","0.6","0.8","1.0"),cex=0.5)
#dev.off()
###########################
# Motorcycle Data Example #
###########################
library(assist)
library(MASS)
data(mcycle)
n <- nrow(mcycle)
y <- mcycle[,2]
x <- (mcycle[,1]-min(mcycle[,1]))/(max(mcycle[,1])-min(mcycle[,1]))/1.1
bas.nul <- as.matrix(cbind(1,x-.5))
bas.cub <- as.matrix(cubic(x,unique(x)))
# remove the first few and the last few points
pt <- (rep(1,n)%o%unique(x))[,-c(1:19,86:length(unique(x)))]
bas.con <- as.matrix((x-pt)*(x-pt>0))
baslist <- list(bas.nul,bas.cub,bas.con)
# HAS
baslist.has <- list(bas.nul,bas.cub)
has.obj <- bsml(y, baslist.has, maxbas=50, method="has",gcvpk.control=list(delta=.001,job=1001,lamlim=c(-6,2),tau=10,ntbl=100,maxtbl=100))
# BSML-C
bsmlc.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmlc")
# BSML-S
bsmls.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmls")
#postscript(file="mcycle.ps",height=4,width=6.5,horizontal=F,pointsize=9)
par(mfrow=c(1,1), mgp=c(2,1,0), mar=c(3,3,3,1)+0.1)
plot(mcycle[,1], y, xlab="time (ms)", ylab="acceleration (g)", xlim=c(0,60), type="n", axes=F)
box()
axis(2)
axis(1, at=c(0,10,20,30,40,50,60), label=as.character(c(0,10,20,30,40,50,60)), cex=.5)
points(mcycle[,1], y, cex=.4)
f <- ssr(y~x, cubic(x))
xx <- x*(max(mcycle[,1])-min(mcycle[,1]))+min(mcycle[,1])
lines(xx,f$fit,lty=2)
lines(xx,bsmls.obj$fit,lty=1,col="blue")
lines(xx,bsmlc.obj$fit,lty=3,col="green")
lines(xx,has.obj$fit,lty=4,col="red")
legend(40,-80,lty=c(2,1,3,4),legend=c("spline","BSML-S","BSML-C","HAS"),cex=.8)
xlab <- seq(0,1,by=.2)
xxlab <- xlab*(max(mcycle[,1])-min(mcycle[,1]))+min(mcycle[,1])
axis(3, at=xxlab, label=c("0.0","0.2","0.4","0.6","0.8","1.0"),cex=0.5)
#dev.off()
######################
# Ozone Data Example #
######################
library(assist)
library(mda)
data(Arosa)
attach(Arosa)
csmonth <- (month-0.5)/12
csyear <- (year-1)/45
x <- (month+12*(year-1)-.5)/(46*12)
grid <- seq(0,1,len=100)
bas.nul <- as.matrix(cbind(1,x-.5))
bas.per <- as.matrix(periodic(csmonth,unique(csmonth)))
bas.cub <- as.matrix(cubic(x,unique(x)))
baslist <- list(bas.nul,bas.per,bas.cub)
#### fit with one variable as long time series
# fit SS ANOVA
ozone.ssanova.fit1 <- ssr(thick~x, rk=list(periodic(csmonth),cubic(x)))
ssanovafit <- ozone.ssanova.fit1$fit
m1 <- sapply(split(thick,month),mean)
m1 <- m1-mean(m1)
m2 <- sapply(split(thick,year),mean)
m2 <- m2-mean(m2)
grid1 <- data.frame(csmonth=grid, x=.5)
grid2 <- data.frame(csmonth=.5, x=grid)
p.ozone.ssanova11 <- predict(ozone.ssanova.fit1,grid1,terms=c(0,0,1,0))
p.ozone.ssanova12 <- predict(ozone.ssanova.fit1,grid2,terms=c(0,1,0,1))
ssanovafitmonth <- p.ozone.ssanova11$fit <- p.ozone.ssanova11$fit-mean(p.ozone.ssanova11$fit)
ssanovafityear <- p.ozone.ssanova12$fit <- p.ozone.ssanova12$fit-mean(p.ozone.ssanova12$fit)
# fit HAS
ozone.bsmlfit1 <- bsml(thick,baslist,method="has")
hasfit <- ozone.bsmlfit1$fit
finsel <- ozone.bsmlfit1$chosen.bases.matrix
finselper <- ozone.bsmlfit1$chosen.bases.trim[,1]==2
finselcub <- ozone.bsmlfit1$chosen.bases.trim[,1]==3
finbasper <- ozone.bsmlfit1$chosen.bases.matrix[,finselper]
finbascub <- ozone.bsmlfit1$chosen.bases.matrix[,finselcub]
finco <- ozone.bsmlfit1$coef[!is.na(ozone.bsmlfit1$coef)][-c(1:ncol(bas.nul))]
fincoper <- finco[finselper]
fincocub <- finco[finselcub]
hasfityear <- ozone.bsmlfit1$coef[2]*grid+cubic(grid,unique(x)[ozone.bsmlfit1$chosen.bases.trim[,2][ozone.bsmlfit1$chosen.bases.trim[,1]==3]])%*%fincocub
hasfitmonth <- periodic(grid,unique(csmonth)[ozone.bsmlfit1$chosen.bases.trim[,2][ozone.bsmlfit1$chosen.bases.trim[,1]==2]])%*%fincoper
hasfityear <- hasfityear-mean(hasfityear)
hasfitmonth <- hasfitmonth-mean(hasfitmonth)
# fit BSML-C
ozone.bsmlfit2 <- bsml(thick,baslist,method="bsmlc")
bsmlcfit <- ozone.bsmlfit2$fit
finsel <- ozone.bsmlfit2$chosen.bases.matrix
finselper <- ozone.bsmlfit2$chosen.bases.trim[,1]==2
finselcub <- ozone.bsmlfit2$chosen.bases.trim[,1]==3
finbasper <- ozone.bsmlfit2$chosen.bases.matrix[,finselper]
finbascub <- ozone.bsmlfit2$chosen.bases.matrix[,finselcub]
finco <- ozone.bsmlfit2$coef[!is.na(ozone.bsmlfit2$coef)][-c(1:ncol(bas.nul))]
fincoper <- finco[finselper]
fincocub <- finco[finselcub]
bsmlcfityear <- ozone.bsmlfit2$coef[2]*grid+cubic(grid,unique(x)[ozone.bsmlfit2$chosen.bases.trim[,2][ozone.bsmlfit2$chosen.bases.trim[,1]==3]])%*%fincocub
bsmlcfitmonth <- periodic(grid,unique(csmonth)[ozone.bsmlfit2$chosen.bases.trim[,2][ozone.bsmlfit2$chosen.bases.trim[,1]==2]])%*%fincoper
bsmlcfityear <- bsmlcfityear-mean(bsmlcfityear)
bsmlcfitmonth <- bsmlcfitmonth-mean(bsmlcfitmonth)
# fit BSML-S
ozone.bsmlfit3 <- bsml(thick,baslist,method="bsmls")
bsmlsfit <-ozone.bsmlfit3$fit
finsel <- ozone.bsmlfit3$chosen.bases.matrix
finselper <- ozone.bsmlfit3$chosen.bases.trim[,1]==2
finselcub <- ozone.bsmlfit3$chosen.bases.trim[,1]==3
finbasper <- ozone.bsmlfit3$chosen.bases.matrix[,finselper]
finbascub <- ozone.bsmlfit3$chosen.bases.matrix[,finselcub]
finco <- ozone.bsmlfit3$coef[!is.na(ozone.bsmlfit3$coef)][-c(1:ncol(bas.nul))]
fincoper <- finco[finselper]
fincocub <- finco[finselcub]
bsmlsfityear <- ozone.bsmlfit3$coef[2]*grid
bsmlsfitmonth <- periodic(grid,unique(csmonth)[ozone.bsmlfit3$chosen.bases.trim[,2][ozone.bsmlfit3$chosen.bases.trim[,1]==2]])%*%fincoper
bsmlsfityear <- bsmlsfityear-mean(bsmlsfityear)
bsmlsfitmonth <- bsmlsfitmonth-mean(bsmlsfitmonth)
#pdf(file="ozone1.pdf",height=3,width=6.5,horizontal=F,pointsize=8)
#postscript(file="ozone1.ps",height=3,width=6.5,horizontal=F,pointsize=8)
plot(x,thick,xlab="time",ylab="thickness",pch=".",axes=F,type="l")
box()
axis(1,at=unique(csyear)[c(1,9,18,27,36,45)],label=c("1926","1935","1944","1953","1962","1971"),cex=0.5)
axis(2,cex=.5)
axis(3,at=unique(csyear)[c(1,9,18,27,36,45)],label=c("0.0","0.2","0.4","0.6","0.8","1.0"),cex=0.5)
lines(x,ssanovafit)
lines(x, hasfit,col="red")
lines(x, bsmlcfit,col="green")
lines(x, bsmlsfit,col="blue")
mtext("Observations and overall fit",cex=1.2)
#dev.off()
#pdf(file="ozone2.pdf",height=3,width=6.5,horizontal=F,pointsize=8)
#postscript(file="ozone2.ps",height=3,width=6.5,horizontal=F,pointsize=8)
par(mfrow=c(1,2), mgp=c(2,1,0), mar=c(3,3,3,1)+0.1)
plot(unique(csmonth),m1,xlim=c(0,1),ylim=c(-55,50),xlab="month",ylab="thickness",axes=F,type="n")
points(unique(csmonth),m1,pch="o",cex=0.6)
box()
axis(2,cex=.5)
axis(1,at=unique(csmonth), label=c("J","F","M","A","M","J","J","A","S","O","N","D"),cex=0.2)
axis(3,at=unique(csmonth), label=as.character(1:12),cex=0.5)
lines(grid, ssanovafitmonth, lty=2)
lines(grid, hasfitmonth, lty=4,col="red")
lines(grid, bsmlcfitmonth, lty=3,col="green")
lines(grid, bsmlsfitmonth, lty=1,col="blue")
legend(.1,44,lty=c(2,1,3,4),legend=c("spline","BSML-S","BSML-C","HAS"),cex=.8)
mtext("Main effect of month",cex=1.2)
plot(unique(csyear),m2,xlim=c(0,1),ylim=c(-55,50),xlab="time",ylab="thickness",axes=F,type="n")
points(unique(csyear),m2,pch="o",cex=0.6)
box()
axis(2,cex=.5)
axis(1,at=unique(csyear)[c(1,9,18,27,36,45)],label=c("1926","1935","1944","1953","1962","1971"),cex=0.5)
axis(3,at=unique(csyear)[c(1,9,18,27,36,45)],label=c("0.0","0.2","0.4","0.6","0.8","1.0"),cex=0.5)
lines(grid, ssanovafityear, lty=2)
lines(grid, hasfityear, lty=4,col="red")
lines(grid, bsmlcfityear, lty=3,col="green")
lines(grid, bsmlsfityear, lty=1,col="blue")
mtext("Main effect of year",cex=1.2)
#dev.off()