Chapter 7 - Moving Beyond Linearity, Question 9

In [3]:
library(ISLR)
library(MASS)
summary(Boston)
      crim                zn             indus            chas        
 Min.   : 0.00632   Min.   :  0.00   Min.   : 0.46   Min.   :0.00000  
 1st Qu.: 0.08204   1st Qu.:  0.00   1st Qu.: 5.19   1st Qu.:0.00000  
 Median : 0.25651   Median :  0.00   Median : 9.69   Median :0.00000  
 Mean   : 3.61352   Mean   : 11.36   Mean   :11.14   Mean   :0.06917  
 3rd Qu.: 3.67708   3rd Qu.: 12.50   3rd Qu.:18.10   3rd Qu.:0.00000  
 Max.   :88.97620   Max.   :100.00   Max.   :27.74   Max.   :1.00000  
      nox               rm             age              dis        
 Min.   :0.3850   Min.   :3.561   Min.   :  2.90   Min.   : 1.130  
 1st Qu.:0.4490   1st Qu.:5.886   1st Qu.: 45.02   1st Qu.: 2.100  
 Median :0.5380   Median :6.208   Median : 77.50   Median : 3.207  
 Mean   :0.5547   Mean   :6.285   Mean   : 68.57   Mean   : 3.795  
 3rd Qu.:0.6240   3rd Qu.:6.623   3rd Qu.: 94.08   3rd Qu.: 5.188  
 Max.   :0.8710   Max.   :8.780   Max.   :100.00   Max.   :12.127  
      rad              tax           ptratio          black       
 Min.   : 1.000   Min.   :187.0   Min.   :12.60   Min.   :  0.32  
 1st Qu.: 4.000   1st Qu.:279.0   1st Qu.:17.40   1st Qu.:375.38  
 Median : 5.000   Median :330.0   Median :19.05   Median :391.44  
 Mean   : 9.549   Mean   :408.2   Mean   :18.46   Mean   :356.67  
 3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20   3rd Qu.:396.23  
 Max.   :24.000   Max.   :711.0   Max.   :22.00   Max.   :396.90  
     lstat            medv      
 Min.   : 1.73   Min.   : 5.00  
 1st Qu.: 6.95   1st Qu.:17.02  
 Median :11.36   Median :21.20  
 Mean   :12.65   Mean   :22.53  
 3rd Qu.:16.95   3rd Qu.:25.00  
 Max.   :37.97   Max.   :50.00  

(a)

In [10]:
lm.fit = lm(nox~poly(dis,3),data=Boston)
dis.lims = range(Boston$dis)
dis.grid = seq(dis.lims[1],dis.lims[2],0.1)
pred = predict(lm.fit,newdata=list(dis=dis.grid))
In [13]:
plot(Boston$dis,Boston$nox,col="darkgrey",xlab="weighted distance", ylab="nox ppm")
lines(dis.grid,pred,col="blue")

(b)

In [31]:
par(mfrow=c(4,4))
rss = c()
for(d in 1:10){
    lm.fit = lm(nox~poly(dis,d),data=Boston)
    pred = predict(lm.fit,newdata=list(dis=dis.grid))
    plot(Boston$dis,Boston$nox,col="darkgrey",xlab="weighted distance", ylab="nox ppm")
    lines(dis.grid,pred,col="blue")
    rss[d]=deviance(lm.fit)
}
In [32]:
#RSS
print(rss)
 [1] 2.768563 2.035262 1.934107 1.932981 1.915290 1.878257 1.849484 1.835630
 [9] 1.833331 1.832171

(c)

In [43]:
library(boot)
set.seed(3)
cverr = c()
for(i in 1:10){
    glm.model = glm(nox~poly(dis,i),data=Boston)
    cverr[i] = cv.glm(Boston,glm.model,K=10)$delta[2]
}
plot(cverr,type="l",xlab="no. of degrees")
min_point = which.min(cverr)
points(min_point,cverr[min_point],col="red")
In [45]:
# No. of degrees of polynomial giving the least cross validation error
min_point
3

(d)

In [48]:
library(splines)
spline.model = lm(nox~bs(dis,df=4),data=Boston)
pred = predict(spline.model,newdata=list(dis=dis.grid))
plot(Boston$dis,Boston$nox,col="darkgrey",xlab="weighted distance", ylab="nox ppm")
lines(dis.grid,pred,col="blue")

The choice of the knots depend on the number of degrees of freedom

(e)

In [50]:
rss=c()
par(mfrow=c(3,3))
for(i in 3:10){
    spline.model = lm(nox~bs(dis,i),data=Boston)
    pred = predict(spline.model,newdata=list(dis=dis.grid))
    plot(Boston$dis,Boston$nox,col="darkgrey",xlab="weighted distance", ylab="nox ppm")
    lines(dis.grid,pred,col="blue")
    rss[i]=deviance(spline.model)
}
In [51]:
#rss
rss
  1. NA
  2. NA
  3. 1.93410670717907
  4. 1.92277499281193
  5. 1.84017280148852
  6. 1.83396590316021
  7. 1.82988444592328
  8. 1.81699505672523
  9. 1.82565251038706
  10. 1.79253488955613

(f)

In [57]:
cverr = c()
set.seed(1)
for(i in 3:10){
    spline.model = glm(nox~bs(dis,i),data=Boston)
    cverr[i] = cv.glm(Boston,spline.model,K=10)$delta[2]
}
plot(cverr,type="l")
Warning message in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots = c(1.1296, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots = c(1.1296, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots = c(1.137, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots = c(1.137, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(3.2157, .Names = "50%"), :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(3.2157, .Names = "50%"), :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(3.1827, .Names = "50%"), :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(3.1827, .Names = "50%"), :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(2.38403333333333, 4.2474:
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(2.38403333333333, 4.2474:
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(2.39256666666667, 4.26416666666667:
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(2.39256666666667, 4.26416666666667:
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(2.1169, 3.23925, 5.212575:
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(2.1169, 3.23925, 5.212575:
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(2.0941, 3.2628, 5.21325:
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(2.0941, 3.2628, 5.21325:
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.9799, 2.7147, 3.945, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.9799, 2.7147, 3.945, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.94154, 2.59694, 3.79176, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.94154, 2.59694, 3.79176, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.86156666666667, 2.3727, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.86156666666667, 2.3727, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.86223333333333, 2.39623333333333, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.86223333333333, 2.39623333333333, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.80062857142857, 2.219, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.80062857142857, 2.219, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.7912, 2.1705, 2.741, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.7912, 2.1705, 2.741, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.748425, 2.10525, 2.50505, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.748425, 2.10525, 2.50505, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.74115, 2.0643, 2.4718, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”Warning message in bs(dis, degree = 3L, knots = structure(c(1.74115, 2.0643, 2.4718, :
“some 'x' values beyond boundary knots may cause ill-conditioned bases”

The 6 degrees of freedom gives the lowest cross validation error

In [ ]: