Chapter 6 - Linear Model Selection Regularization q8

(a)¶

set.seed(1)
X = rnorm(100)
error = rnorm(100)

(b)¶

b0=3
b1=2
b2=-4
b3=0.4
Y = b0+b1*X+b2*X^2+b3*X^3+error
sim_data = data.frame(y=Y,x=X)

(c)¶

library(leaps)
bestsubset.fit = regsubsets(Y~poly(X,10,raw=T),data=sim_data,nvmax=10)
bestsubset_summary = summary(bestsubset.fit)
bestsubset_summary

Subset selection object
Call: regsubsets.formula(Y ~ poly(X, 10, raw = T), data = sim_data, 
    nvmax = 10)
10 Variables  (and intercept)
                       Forced in Forced out
poly(X, 10, raw = T)1      FALSE      FALSE
poly(X, 10, raw = T)2      FALSE      FALSE
poly(X, 10, raw = T)3      FALSE      FALSE
poly(X, 10, raw = T)4      FALSE      FALSE
poly(X, 10, raw = T)5      FALSE      FALSE
poly(X, 10, raw = T)6      FALSE      FALSE
poly(X, 10, raw = T)7      FALSE      FALSE
poly(X, 10, raw = T)8      FALSE      FALSE
poly(X, 10, raw = T)9      FALSE      FALSE
poly(X, 10, raw = T)10     FALSE      FALSE
1 subsets of each size up to 10
Selection Algorithm: exhaustive
          poly(X, 10, raw = T)1 poly(X, 10, raw = T)2 poly(X, 10, raw = T)3
1  ( 1 )  " "                   "*"                   " "                  
2  ( 1 )  "*"                   "*"                   " "                  
3  ( 1 )  "*"                   "*"                   " "                  
4  ( 1 )  "*"                   "*"                   " "                  
5  ( 1 )  "*"                   "*"                   " "                  
6  ( 1 )  "*"                   "*"                   " "                  
7  ( 1 )  "*"                   "*"                   " "                  
8  ( 1 )  "*"                   "*"                   "*"                  
9  ( 1 )  "*"                   "*"                   " "                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(X, 10, raw = T)4 poly(X, 10, raw = T)5 poly(X, 10, raw = T)6
1  ( 1 )  " "                   " "                   " "                  
2  ( 1 )  " "                   " "                   " "                  
3  ( 1 )  " "                   "*"                   " "                  
4  ( 1 )  " "                   "*"                   "*"                  
5  ( 1 )  " "                   " "                   " "                  
6  ( 1 )  " "                   "*"                   "*"                  
7  ( 1 )  "*"                   "*"                   "*"                  
8  ( 1 )  "*"                   " "                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(X, 10, raw = T)7 poly(X, 10, raw = T)8 poly(X, 10, raw = T)9
1  ( 1 )  " "                   " "                   " "                  
2  ( 1 )  " "                   " "                   " "                  
3  ( 1 )  " "                   " "                   " "                  
4  ( 1 )  " "                   " "                   " "                  
5  ( 1 )  "*"                   "*"                   "*"                  
6  ( 1 )  " "                   "*"                   " "                  
7  ( 1 )  " "                   "*"                   " "                  
8  ( 1 )  " "                   "*"                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(X, 10, raw = T)10
1  ( 1 )  " "                   
2  ( 1 )  " "                   
3  ( 1 )  " "                   
4  ( 1 )  " "                   
5  ( 1 )  " "                   
6  ( 1 )  "*"                   
7  ( 1 )  "*"                   
8  ( 1 )  "*"                   
9  ( 1 )  "*"                   
10  ( 1 ) "*"

names(bestsubset_summary)

par(mfrow=c(2,2))
#adjusted R-squared
plot(bestsubset_summary$adjr2,xlab="Number of variables",ylab="adjr2",type="l")
max_adjr2 = which.max(bestsubset_summary$adjr2)
points(max_adjr2,bestsubset_summary$adjr2[max_adjr2],col="red",cex=2,pch=20)
#cp
plot(bestsubset_summary$cp,xlab="Number of Variables",ylab="cp",type="l")
min_cp = which.min(bestsubset_summary$cp)
points(min_cp,bestsubset_summary$cp[min_cp],col="red",cex=2,pch=20)
#bic
plot(bestsubset_summary$bic,xlab="Number of Variables",ylab="bic",type="l")
min_bic = which.min(bestsubset_summary$bic)
points(min_bic,bestsubset_summary$bic[min_bic],col="red",cex=2,pch=20)

max_adjr2
min_bic
min_cp

coef(bestsubset.fit,3)

(d)¶

#forward stepwise selection
forward.fit = regsubsets(Y~poly(x,10,raw=T),data=sim_data,nvmax=10,method="forward")
forward_summary = summary(forward.fit)
forward_summary

Subset selection object
Call: regsubsets.formula(Y ~ poly(x, 10, raw = T), data = sim_data, 
    nvmax = 10, method = "forward")
10 Variables  (and intercept)
                       Forced in Forced out
poly(x, 10, raw = T)1      FALSE      FALSE
poly(x, 10, raw = T)2      FALSE      FALSE
poly(x, 10, raw = T)3      FALSE      FALSE
poly(x, 10, raw = T)4      FALSE      FALSE
poly(x, 10, raw = T)5      FALSE      FALSE
poly(x, 10, raw = T)6      FALSE      FALSE
poly(x, 10, raw = T)7      FALSE      FALSE
poly(x, 10, raw = T)8      FALSE      FALSE
poly(x, 10, raw = T)9      FALSE      FALSE
poly(x, 10, raw = T)10     FALSE      FALSE
1 subsets of each size up to 10
Selection Algorithm: forward
          poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1  ( 1 )  " "                   "*"                   " "                  
2  ( 1 )  "*"                   "*"                   " "                  
3  ( 1 )  "*"                   "*"                   " "                  
4  ( 1 )  "*"                   "*"                   " "                  
5  ( 1 )  "*"                   "*"                   " "                  
6  ( 1 )  "*"                   "*"                   " "                  
7  ( 1 )  "*"                   "*"                   " "                  
8  ( 1 )  "*"                   "*"                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(x, 10, raw = T)4 poly(x, 10, raw = T)5 poly(x, 10, raw = T)6
1  ( 1 )  " "                   " "                   " "                  
2  ( 1 )  " "                   " "                   " "                  
3  ( 1 )  " "                   "*"                   " "                  
4  ( 1 )  " "                   "*"                   "*"                  
5  ( 1 )  " "                   "*"                   "*"                  
6  ( 1 )  " "                   "*"                   "*"                  
7  ( 1 )  "*"                   "*"                   "*"                  
8  ( 1 )  "*"                   "*"                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(x, 10, raw = T)7 poly(x, 10, raw = T)8 poly(x, 10, raw = T)9
1  ( 1 )  " "                   " "                   " "                  
2  ( 1 )  " "                   " "                   " "                  
3  ( 1 )  " "                   " "                   " "                  
4  ( 1 )  " "                   " "                   " "                  
5  ( 1 )  " "                   " "                   "*"                  
6  ( 1 )  "*"                   " "                   "*"                  
7  ( 1 )  "*"                   " "                   "*"                  
8  ( 1 )  "*"                   " "                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(x, 10, raw = T)10
1  ( 1 )  " "                   
2  ( 1 )  " "                   
3  ( 1 )  " "                   
4  ( 1 )  " "                   
5  ( 1 )  " "                   
6  ( 1 )  " "                   
7  ( 1 )  " "                   
8  ( 1 )  " "                   
9  ( 1 )  " "                   
10  ( 1 ) "*"

max_adjr2 = which.max(forward_summary$adjr2)
max_adjr2

min_bic = which.min(forward_summary$bic)
min_bic

min_cp = which.min(forward_summary$cp)
min_cp

coef(forward.fit,3)

The result of forward stepwise regression is not different from best subset regression.

#backward stepwise regression
backward.fit = regsubsets(Y~poly(x,10,raw=T),data=sim_data,nvmax=10,method="backward")
backward_summary = summary(backward.fit)
backward_summary

Subset selection object
Call: regsubsets.formula(Y ~ poly(x, 10, raw = T), data = sim_data, 
    nvmax = 10, method = "backward")
10 Variables  (and intercept)
                       Forced in Forced out
poly(x, 10, raw = T)1      FALSE      FALSE
poly(x, 10, raw = T)2      FALSE      FALSE
poly(x, 10, raw = T)3      FALSE      FALSE
poly(x, 10, raw = T)4      FALSE      FALSE
poly(x, 10, raw = T)5      FALSE      FALSE
poly(x, 10, raw = T)6      FALSE      FALSE
poly(x, 10, raw = T)7      FALSE      FALSE
poly(x, 10, raw = T)8      FALSE      FALSE
poly(x, 10, raw = T)9      FALSE      FALSE
poly(x, 10, raw = T)10     FALSE      FALSE
1 subsets of each size up to 10
Selection Algorithm: backward
          poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1  ( 1 )  " "                   "*"                   " "                  
2  ( 1 )  "*"                   "*"                   " "                  
3  ( 1 )  "*"                   "*"                   " "                  
4  ( 1 )  "*"                   "*"                   " "                  
5  ( 1 )  "*"                   "*"                   " "                  
6  ( 1 )  "*"                   "*"                   " "                  
7  ( 1 )  "*"                   "*"                   " "                  
8  ( 1 )  "*"                   "*"                   " "                  
9  ( 1 )  "*"                   "*"                   " "                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(x, 10, raw = T)4 poly(x, 10, raw = T)5 poly(x, 10, raw = T)6
1  ( 1 )  " "                   " "                   " "                  
2  ( 1 )  " "                   " "                   " "                  
3  ( 1 )  " "                   "*"                   " "                  
4  ( 1 )  " "                   "*"                   " "                  
5  ( 1 )  " "                   "*"                   " "                  
6  ( 1 )  " "                   "*"                   "*"                  
7  ( 1 )  "*"                   "*"                   "*"                  
8  ( 1 )  "*"                   "*"                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(x, 10, raw = T)7 poly(x, 10, raw = T)8 poly(x, 10, raw = T)9
1  ( 1 )  " "                   " "                   " "                  
2  ( 1 )  " "                   " "                   " "                  
3  ( 1 )  " "                   " "                   " "                  
4  ( 1 )  " "                   "*"                   " "                  
5  ( 1 )  " "                   "*"                   " "                  
6  ( 1 )  " "                   "*"                   " "                  
7  ( 1 )  " "                   "*"                   " "                  
8  ( 1 )  " "                   "*"                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(x, 10, raw = T)10
1  ( 1 )  " "                   
2  ( 1 )  " "                   
3  ( 1 )  " "                   
4  ( 1 )  " "                   
5  ( 1 )  "*"                   
6  ( 1 )  "*"                   
7  ( 1 )  "*"                   
8  ( 1 )  "*"                   
9  ( 1 )  "*"                   
10  ( 1 ) "*"

which.max(backward_summary$adjr2)

which.min(backward_summary$bic)

which.min(backward_summary$cp)

coef(backward.fit,3)

The result of backward stepwise regression is not different from best subset selection

(e)¶

X = model.matrix(Y~poly(x,10,raw=T),data=sim_data)[,-1]
Y = sim_data[,1]
#creating a trianing data
set.seed(1)
train = sample(1:100,50)

set.seed(1)
cv.output = cv.glmnet(X[train,],Y[train],alpha=1)
plot(cv.output)

best.lambda = cv.output$lambda.min
lasso.fit = glmnet(X,Y,alpha=1)
coef.pred = predict(lasso.fit,type="coefficients",s=best.lambda)[1:11,]
coef.pred

coef.pred[coef.pred!=0]

The Lasso has picked the variables x^1,x^2,x^3 and x^5

(f)¶

b0=4
b7=8
Y = b0 + b7*X^7 + error
sim_data = data.frame(y=Y,x=X)

#using best subset selection
best.subset.fit = regsubsets(Y~poly(x,10,raw=T),data=sim_data,nvmax=10)
best.subset.summary = summary(best.subset.fit)
names(best.subset.summary)

which.min(best.subset.summary$cp)

which.min(best.subset.summary$bic)

which.max(best.subset.summary$adjr2)

The best subset selection produces the following models according to cp,bic and adjusted R-squared

coef(best.subset.fit,2)

coef(best.subset.fit,1)

coef(best.subset.fit,4)

#using lasso
b0=4
b7=8
Y = b0 + b7*X^7 + error
sim_data = data.frame(y=Y,x=X)
X = model.matrix(Y~poly(x,10,raw=T),data=sim_data)[,-1]
Y = sim_data[,1]
#creating training data
set.seed(1)
train = sample(1:100,50)

cv.output = cv.glmnet(X[train,],Y[train],alpha=1)
best.lambda=cv.output$lambda.min
best.lambda

lasso.fit = glmnet(X,Y,alpha=1)
coef.pred = predict(lasso.fit,type="coefficients",s=best.lambda)[1:11,]
coef.pred

However the lasso produces the following one model with only one variable x^7 which is similar to the original model.

coef.pred[coef.pred!=0]