Chapter 6 - Linear Model Selection Regularization q8

(a)

In [167]:
set.seed(1)
X = rnorm(100)
error = rnorm(100)

(b)

In [168]:
b0=3
b1=2
b2=-4
b3=0.4
Y = b0+b1*X+b2*X^2+b3*X^3+error
sim_data = data.frame(y=Y,x=X)

(c)

In [169]:
library(leaps)
bestsubset.fit = regsubsets(Y~poly(X,10,raw=T),data=sim_data,nvmax=10)
bestsubset_summary = summary(bestsubset.fit)
bestsubset_summary
Subset selection object
Call: regsubsets.formula(Y ~ poly(X, 10, raw = T), data = sim_data, 
    nvmax = 10)
10 Variables  (and intercept)
                       Forced in Forced out
poly(X, 10, raw = T)1      FALSE      FALSE
poly(X, 10, raw = T)2      FALSE      FALSE
poly(X, 10, raw = T)3      FALSE      FALSE
poly(X, 10, raw = T)4      FALSE      FALSE
poly(X, 10, raw = T)5      FALSE      FALSE
poly(X, 10, raw = T)6      FALSE      FALSE
poly(X, 10, raw = T)7      FALSE      FALSE
poly(X, 10, raw = T)8      FALSE      FALSE
poly(X, 10, raw = T)9      FALSE      FALSE
poly(X, 10, raw = T)10     FALSE      FALSE
1 subsets of each size up to 10
Selection Algorithm: exhaustive
          poly(X, 10, raw = T)1 poly(X, 10, raw = T)2 poly(X, 10, raw = T)3
1  ( 1 )  " "                   "*"                   " "                  
2  ( 1 )  "*"                   "*"                   " "                  
3  ( 1 )  "*"                   "*"                   " "                  
4  ( 1 )  "*"                   "*"                   " "                  
5  ( 1 )  "*"                   "*"                   " "                  
6  ( 1 )  "*"                   "*"                   " "                  
7  ( 1 )  "*"                   "*"                   " "                  
8  ( 1 )  "*"                   "*"                   "*"                  
9  ( 1 )  "*"                   "*"                   " "                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(X, 10, raw = T)4 poly(X, 10, raw = T)5 poly(X, 10, raw = T)6
1  ( 1 )  " "                   " "                   " "                  
2  ( 1 )  " "                   " "                   " "                  
3  ( 1 )  " "                   "*"                   " "                  
4  ( 1 )  " "                   "*"                   "*"                  
5  ( 1 )  " "                   " "                   " "                  
6  ( 1 )  " "                   "*"                   "*"                  
7  ( 1 )  "*"                   "*"                   "*"                  
8  ( 1 )  "*"                   " "                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(X, 10, raw = T)7 poly(X, 10, raw = T)8 poly(X, 10, raw = T)9
1  ( 1 )  " "                   " "                   " "                  
2  ( 1 )  " "                   " "                   " "                  
3  ( 1 )  " "                   " "                   " "                  
4  ( 1 )  " "                   " "                   " "                  
5  ( 1 )  "*"                   "*"                   "*"                  
6  ( 1 )  " "                   "*"                   " "                  
7  ( 1 )  " "                   "*"                   " "                  
8  ( 1 )  " "                   "*"                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(X, 10, raw = T)10
1  ( 1 )  " "                   
2  ( 1 )  " "                   
3  ( 1 )  " "                   
4  ( 1 )  " "                   
5  ( 1 )  " "                   
6  ( 1 )  "*"                   
7  ( 1 )  "*"                   
8  ( 1 )  "*"                   
9  ( 1 )  "*"                   
10  ( 1 ) "*"                   
In [170]:
names(bestsubset_summary)
  1. 'which'
  2. 'rsq'
  3. 'rss'
  4. 'adjr2'
  5. 'cp'
  6. 'bic'
  7. 'outmat'
  8. 'obj'
In [171]:
par(mfrow=c(2,2))
#adjusted R-squared
plot(bestsubset_summary$adjr2,xlab="Number of variables",ylab="adjr2",type="l")
max_adjr2 = which.max(bestsubset_summary$adjr2)
points(max_adjr2,bestsubset_summary$adjr2[max_adjr2],col="red",cex=2,pch=20)
#cp
plot(bestsubset_summary$cp,xlab="Number of Variables",ylab="cp",type="l")
min_cp = which.min(bestsubset_summary$cp)
points(min_cp,bestsubset_summary$cp[min_cp],col="red",cex=2,pch=20)
#bic
plot(bestsubset_summary$bic,xlab="Number of Variables",ylab="bic",type="l")
min_bic = which.min(bestsubset_summary$bic)
points(min_bic,bestsubset_summary$bic[min_bic],col="red",cex=2,pch=20)
In [172]:
max_adjr2
min_bic
min_cp
3
3
3
In [174]:
coef(bestsubset.fit,3)
(Intercept)
3.07187220886589
poly(X, 10, raw = T)1
2.34563538332697
poly(X, 10, raw = T)2
-4.15241772557949
poly(X, 10, raw = T)5
0.0738342630773857

(d)

In [176]:
#forward stepwise selection
forward.fit = regsubsets(Y~poly(x,10,raw=T),data=sim_data,nvmax=10,method="forward")
forward_summary = summary(forward.fit)
forward_summary
Subset selection object
Call: regsubsets.formula(Y ~ poly(x, 10, raw = T), data = sim_data, 
    nvmax = 10, method = "forward")
10 Variables  (and intercept)
                       Forced in Forced out
poly(x, 10, raw = T)1      FALSE      FALSE
poly(x, 10, raw = T)2      FALSE      FALSE
poly(x, 10, raw = T)3      FALSE      FALSE
poly(x, 10, raw = T)4      FALSE      FALSE
poly(x, 10, raw = T)5      FALSE      FALSE
poly(x, 10, raw = T)6      FALSE      FALSE
poly(x, 10, raw = T)7      FALSE      FALSE
poly(x, 10, raw = T)8      FALSE      FALSE
poly(x, 10, raw = T)9      FALSE      FALSE
poly(x, 10, raw = T)10     FALSE      FALSE
1 subsets of each size up to 10
Selection Algorithm: forward
          poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1  ( 1 )  " "                   "*"                   " "                  
2  ( 1 )  "*"                   "*"                   " "                  
3  ( 1 )  "*"                   "*"                   " "                  
4  ( 1 )  "*"                   "*"                   " "                  
5  ( 1 )  "*"                   "*"                   " "                  
6  ( 1 )  "*"                   "*"                   " "                  
7  ( 1 )  "*"                   "*"                   " "                  
8  ( 1 )  "*"                   "*"                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(x, 10, raw = T)4 poly(x, 10, raw = T)5 poly(x, 10, raw = T)6
1  ( 1 )  " "                   " "                   " "                  
2  ( 1 )  " "                   " "                   " "                  
3  ( 1 )  " "                   "*"                   " "                  
4  ( 1 )  " "                   "*"                   "*"                  
5  ( 1 )  " "                   "*"                   "*"                  
6  ( 1 )  " "                   "*"                   "*"                  
7  ( 1 )  "*"                   "*"                   "*"                  
8  ( 1 )  "*"                   "*"                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(x, 10, raw = T)7 poly(x, 10, raw = T)8 poly(x, 10, raw = T)9
1  ( 1 )  " "                   " "                   " "                  
2  ( 1 )  " "                   " "                   " "                  
3  ( 1 )  " "                   " "                   " "                  
4  ( 1 )  " "                   " "                   " "                  
5  ( 1 )  " "                   " "                   "*"                  
6  ( 1 )  "*"                   " "                   "*"                  
7  ( 1 )  "*"                   " "                   "*"                  
8  ( 1 )  "*"                   " "                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(x, 10, raw = T)10
1  ( 1 )  " "                   
2  ( 1 )  " "                   
3  ( 1 )  " "                   
4  ( 1 )  " "                   
5  ( 1 )  " "                   
6  ( 1 )  " "                   
7  ( 1 )  " "                   
8  ( 1 )  " "                   
9  ( 1 )  " "                   
10  ( 1 ) "*"                   
In [184]:
max_adjr2 = which.max(forward_summary$adjr2)
max_adjr2
3
In [185]:
min_bic = which.min(forward_summary$bic)
min_bic
3
In [186]:
min_cp = which.min(forward_summary$cp)
min_cp
3
In [187]:
coef(forward.fit,3)
(Intercept)
3.07187220886589
poly(x, 10, raw = T)1
2.34563538332697
poly(x, 10, raw = T)2
-4.15241772557949
poly(x, 10, raw = T)5
0.0738342630773856

The result of forward stepwise regression is not different from best subset regression.

In [189]:
#backward stepwise regression
backward.fit = regsubsets(Y~poly(x,10,raw=T),data=sim_data,nvmax=10,method="backward")
backward_summary = summary(backward.fit)
backward_summary
Subset selection object
Call: regsubsets.formula(Y ~ poly(x, 10, raw = T), data = sim_data, 
    nvmax = 10, method = "backward")
10 Variables  (and intercept)
                       Forced in Forced out
poly(x, 10, raw = T)1      FALSE      FALSE
poly(x, 10, raw = T)2      FALSE      FALSE
poly(x, 10, raw = T)3      FALSE      FALSE
poly(x, 10, raw = T)4      FALSE      FALSE
poly(x, 10, raw = T)5      FALSE      FALSE
poly(x, 10, raw = T)6      FALSE      FALSE
poly(x, 10, raw = T)7      FALSE      FALSE
poly(x, 10, raw = T)8      FALSE      FALSE
poly(x, 10, raw = T)9      FALSE      FALSE
poly(x, 10, raw = T)10     FALSE      FALSE
1 subsets of each size up to 10
Selection Algorithm: backward
          poly(x, 10, raw = T)1 poly(x, 10, raw = T)2 poly(x, 10, raw = T)3
1  ( 1 )  " "                   "*"                   " "                  
2  ( 1 )  "*"                   "*"                   " "                  
3  ( 1 )  "*"                   "*"                   " "                  
4  ( 1 )  "*"                   "*"                   " "                  
5  ( 1 )  "*"                   "*"                   " "                  
6  ( 1 )  "*"                   "*"                   " "                  
7  ( 1 )  "*"                   "*"                   " "                  
8  ( 1 )  "*"                   "*"                   " "                  
9  ( 1 )  "*"                   "*"                   " "                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(x, 10, raw = T)4 poly(x, 10, raw = T)5 poly(x, 10, raw = T)6
1  ( 1 )  " "                   " "                   " "                  
2  ( 1 )  " "                   " "                   " "                  
3  ( 1 )  " "                   "*"                   " "                  
4  ( 1 )  " "                   "*"                   " "                  
5  ( 1 )  " "                   "*"                   " "                  
6  ( 1 )  " "                   "*"                   "*"                  
7  ( 1 )  "*"                   "*"                   "*"                  
8  ( 1 )  "*"                   "*"                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(x, 10, raw = T)7 poly(x, 10, raw = T)8 poly(x, 10, raw = T)9
1  ( 1 )  " "                   " "                   " "                  
2  ( 1 )  " "                   " "                   " "                  
3  ( 1 )  " "                   " "                   " "                  
4  ( 1 )  " "                   "*"                   " "                  
5  ( 1 )  " "                   "*"                   " "                  
6  ( 1 )  " "                   "*"                   " "                  
7  ( 1 )  " "                   "*"                   " "                  
8  ( 1 )  " "                   "*"                   "*"                  
9  ( 1 )  "*"                   "*"                   "*"                  
10  ( 1 ) "*"                   "*"                   "*"                  
          poly(x, 10, raw = T)10
1  ( 1 )  " "                   
2  ( 1 )  " "                   
3  ( 1 )  " "                   
4  ( 1 )  " "                   
5  ( 1 )  "*"                   
6  ( 1 )  "*"                   
7  ( 1 )  "*"                   
8  ( 1 )  "*"                   
9  ( 1 )  "*"                   
10  ( 1 ) "*"                   
In [190]:
which.max(backward_summary$adjr2)
3
In [191]:
which.min(backward_summary$bic)
3
In [192]:
which.min(backward_summary$cp)
3
In [193]:
coef(backward.fit,3)
(Intercept)
3.07187220886589
poly(x, 10, raw = T)1
2.34563538332697
poly(x, 10, raw = T)2
-4.15241772557949
poly(x, 10, raw = T)5
0.0738342630773856

The result of backward stepwise regression is not different from best subset selection

(e)

In [208]:
X = model.matrix(Y~poly(x,10,raw=T),data=sim_data)[,-1]
Y = sim_data[,1]
#creating a trianing data
set.seed(1)
train = sample(1:100,50)
In [218]:
set.seed(1)
cv.output = cv.glmnet(X[train,],Y[train],alpha=1)
plot(cv.output)
In [228]:
best.lambda = cv.output$lambda.min
lasso.fit = glmnet(X,Y,alpha=1)
coef.pred = predict(lasso.fit,type="coefficients",s=best.lambda)[1:11,]
coef.pred
(Intercept)
3.03150152239386
poly(x, 10, raw = T)1
2.25288346271139
poly(x, 10, raw = T)2
-4.09202422874494
poly(x, 10, raw = T)3
0.0647925691298561
poly(x, 10, raw = T)4
0
poly(x, 10, raw = T)5
0.0590268390507903
poly(x, 10, raw = T)6
0
poly(x, 10, raw = T)7
0
poly(x, 10, raw = T)8
0
poly(x, 10, raw = T)9
0
poly(x, 10, raw = T)10
0
In [227]:
coef.pred[coef.pred!=0]
(Intercept)
3.03150152239386
poly(x, 10, raw = T)1
2.25288346271139
poly(x, 10, raw = T)2
-4.09202422874494
poly(x, 10, raw = T)3
0.0647925691298561
poly(x, 10, raw = T)5
0.0590268390507903

The Lasso has picked the variables x^1,x^2,x^3 and x^5

(f)

In [229]:
b0=4
b7=8
Y = b0 + b7*X^7 + error
sim_data = data.frame(y=Y,x=X)
In [232]:
#using best subset selection
best.subset.fit = regsubsets(Y~poly(x,10,raw=T),data=sim_data,nvmax=10)
best.subset.summary = summary(best.subset.fit)
names(best.subset.summary)
  1. 'which'
  2. 'rsq'
  3. 'rss'
  4. 'adjr2'
  5. 'cp'
  6. 'bic'
  7. 'outmat'
  8. 'obj'
In [233]:
which.min(best.subset.summary$cp)
2
In [234]:
which.min(best.subset.summary$bic)
1
In [238]:
which.max(best.subset.summary$adjr2)
4

The best subset selection produces the following models according to cp,bic and adjusted R-squared

In [240]:
coef(best.subset.fit,2)
(Intercept)
4.07049036762636
poly(x, 10, raw = T)2
-0.141708425295761
poly(x, 10, raw = T)7
8.00155518856387
In [241]:
coef(best.subset.fit,1)
(Intercept)
3.95894024674507
poly(x, 10, raw = T)7
8.00077047427056
In [242]:
coef(best.subset.fit,4)
(Intercept)
4.07625244968331
poly(x, 10, raw = T)1
0.291401607645746
poly(x, 10, raw = T)2
-0.161767130528661
poly(x, 10, raw = T)3
-0.252652678282311
poly(x, 10, raw = T)7
8.00913375439679
In [252]:
#using lasso
b0=4
b7=8
Y = b0 + b7*X^7 + error
sim_data = data.frame(y=Y,x=X)
X = model.matrix(Y~poly(x,10,raw=T),data=sim_data)[,-1]
Y = sim_data[,1]
#creating training data
set.seed(1)
train = sample(1:100,50)
In [253]:
cv.output = cv.glmnet(X[train,],Y[train],alpha=1)
best.lambda=cv.output$lambda.min
best.lambda
18.5313758208065
In [254]:
lasso.fit = glmnet(X,Y,alpha=1)
coef.pred = predict(lasso.fit,type="coefficients",s=best.lambda)[1:11,]
coef.pred
(Intercept)
5.24932820548313
poly(x, 10, raw = T)1
0
poly(x, 10, raw = T)2
0
poly(x, 10, raw = T)3
0
poly(x, 10, raw = T)4
0
poly(x, 10, raw = T)5
0
poly(x, 10, raw = T)6
0
poly(x, 10, raw = T)7
7.69501722253953
poly(x, 10, raw = T)8
0
poly(x, 10, raw = T)9
0
poly(x, 10, raw = T)10
0

However the lasso produces the following one model with only one variable x^7 which is similar to the original model.

In [255]:
coef.pred[coef.pred!=0]
(Intercept)
5.24932820548313
poly(x, 10, raw = T)7
7.69501722253953