What is deviance ?
> cpus.ltr <- tree(log10(perf) ~ syct + mmin + mmax + cach+ chmin + chmax, data=cpus)
> cv.tree(cpus.ltr, , prune.tree)
$size
[1] 10 8 7 6 5 4 3 2 1
$dev
[1] 9.626327 11.656761 11.696385 11.591726 12.860608
[6] 13.059687 20.178303 20.605124 43.299480
$k
[1] -Inf 0.6808309 0.7243056 0.8000558 1.1607588
[6] 1.4148749 3.7783549 3.8519002 23.6820624
$method
[1] "deviance"
attr(,"class")
[1] "prune" "tree.sequence"
> head(cpus)
name syct mmin mmax cach chmin chmax perf
1 ADVISOR 32/60 125 256 6000 256 16 128 198
2 AMDAHL 470V/7 29 8000 32000 32 8 32 269
3 AMDAHL 470/7A 29 8000 32000 32 8 32 220
4 AMDAHL 470V/7B 29 8000 32000 32 8 32 172
5 AMDAHL 470V/7C 29 8000 16000 32 8 16 132
6 AMDAHL 470V/8 26 8000 32000 64 8 32 318
estperf
1 199
2 253
3 253
4 253
5 132
6 290
> plot(cpus.ltr)
> a1.cv<-cv.tree(cpus.ltr, , prune.tree)
> plot(a1.cv)
> data(fgl, package="MASS")
> fgl.tr <- tree(type ~ ., fgl)
> plot(print(fgl.tr))
node), split, n, deviance, yval, (yprob)
* denotes terminal node
1) root 214 645.700 WinNF ( 0.327103 0.355140 0.079439 0.060748 0.042056 0.135514 )
2) Mg < 2.695 61 159.200 Head ( 0.000000 0.213115 0.000000 0.213115 0.147541 0.426230 )
4) Na < 13.785 24 40.160 Con ( 0.000000 0.458333 0.000000 0.500000 0.000000 0.041667 )
8) Al < 1.38 8 6.028 WinNF ( 0.000000 0.875000 0.000000 0.000000 0.000000 0.125000 ) *
9) Al > 1.38 16 17.990 Con ( 0.000000 0.250000 0.000000 0.750000 0.000000 0.000000 )
18) Fe < 0.085 10 0.000 Con ( 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 ) *
19) Fe > 0.085 6 7.638 WinNF ( 0.000000 0.666667 0.000000 0.333333 0.000000 0.000000 ) *
5) Na > 13.785 37 63.940 Head ( 0.000000 0.054054 0.000000 0.027027 0.243243 0.675676 )
10) Ba < 0.2 12 17.320 Tabl ( 0.000000 0.166667 0.000000 0.000000 0.750000 0.083333 )
20) RI < 1.265 7 0.000 Tabl ( 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 ) *
21) RI > 1.265 5 10.550 WinNF ( 0.000000 0.400000 0.000000 0.000000 0.400000 0.200000 ) *
11) Ba > 0.2 25 8.397 Head ( 0.000000 0.000000 0.000000 0.040000 0.000000 0.960000 ) *
3) Mg > 2.695 153 319.600 WinF ( 0.457516 0.411765 0.111111 0.000000 0.000000 0.019608 )
6) Al < 1.42 101 189.000 WinF ( 0.633663 0.227723 0.128713 0.000000 0.000000 0.009901 )
12) RI < -0.93 14 28.970 Veh ( 0.214286 0.285714 0.500000 0.000000 0.000000 0.000000 )
24) RI < -1.885 5 6.730 WinF ( 0.600000 0.000000 0.400000 0.000000 0.000000 0.000000 ) *
25) RI > -1.885 9 12.370 Veh ( 0.000000 0.444444 0.555556 0.000000 0.000000 0.000000 ) *
13) RI > -0.93 87 142.200 WinF ( 0.701149 0.218391 0.068966 0.000000 0.000000 0.011494 )
26) K < 0.29 28 42.500 WinF ( 0.714286 0.071429 0.214286 0.000000 0.000000 0.000000 )
52) Ca < 9.67 17 22.070 WinF ( 0.647059 0.000000 0.352941 0.000000 0.000000 0.000000 ) *
53) Ca > 9.67 11 10.430 WinF ( 0.818182 0.181818 0.000000 0.000000 0.000000 0.000000 ) *
27) K > 0.29 59 80.310 WinF ( 0.694915 0.288136 0.000000 0.000000 0.000000 0.016949 )
54) Mg < 3.75 49 49.640 WinF ( 0.836735 0.142857 0.000000 0.000000 0.000000 0.020408 )
108) Fe < 0.145 38 18.440 WinF ( 0.947368 0.026316 0.000000 0.000000 0.000000 0.026316 )
216) RI < 1.045 33 0.000 WinF ( 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ) *
217) RI > 1.045 5 9.503 WinF ( 0.600000 0.200000 0.000000 0.000000 0.000000 0.200000 ) *
109) Fe > 0.145 11 15.160 WinNF ( 0.454545 0.545455 0.000000 0.000000 0.000000 0.000000 )
218) Al < 1.17 5 0.000 WinNF ( 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 ) *
219) Al > 1.17 6 5.407 WinF ( 0.833333 0.166667 0.000000 0.000000 0.000000 0.000000 ) *
55) Mg > 3.75 10 0.000 WinNF ( 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 ) *
7) Al > 1.42 52 80.450 WinNF ( 0.115385 0.769231 0.076923 0.000000 0.000000 0.038462 )
14) Mg < 3.455 17 29.710 WinNF ( 0.000000 0.647059 0.235294 0.000000 0.000000 0.117647 )
28) Si < 72.84 8 16.640 Veh ( 0.000000 0.250000 0.500000 0.000000 0.000000 0.250000 ) *
29) Si > 72.84 9 0.000 WinNF ( 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 ) *
15) Mg > 3.455 35 32.070 WinNF ( 0.171429 0.828571 0.000000 0.000000 0.000000 0.000000 )
30) Na < 12.835 7 9.561 WinF ( 0.571429 0.428571 0.000000 0.000000 0.000000 0.000000 ) *
31) Na > 12.835 28 14.410 WinNF ( 0.071429 0.928571 0.000000 0.000000 0.000000 0.000000 )
62) K < 0.55 6 7.638 WinNF ( 0.333333 0.666667 0.000000 0.000000 0.000000 0.000000 ) *
63) K > 0.55 22 0.000 WinNF ( 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 ) *
> fgl.cv <- cv.tree(fgl.tr,, prune.tree)
> for(i in 2:5) fgl.cv$dev <- fgl.cv$dev +
+ cv.tree(fgl.tr,, prune.tree)$dev
> fgl.cv$dev <- fgl.cv$dev/5
> plot(fgl.cv)
> ir.tr <- tree(Species ~., iris)
> plot(ir.tr)
> text(ir.tr)
> fit <- rpart(Kyphosis ~ Age + Number + Start, data = kyphosis)
> fit2 <- rpart(Kyphosis ~ Age + Number + Start, data = kyphosis,parms = list(prior = c(0.65, 0.35), split = "information")
+ )
> fit3 <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis,control = rpart.control(cp = 0.05))
> par(mfrow = c(1,2), xpd = TRUE)
> plot(fit)
> text(fit, use.n = TRUE)
> plot(fit2)
> text(fit2, use.n = TRUE)
> plot(fit3)
> text(fit3, use.n = TRUE)
> text(ir.tr,use.n = TRUE)
Warning messages:
1: In text.default(xy$x[ind], xy$y[ind] + 0.5 * charht, rows[ind], :
"use.n" is not a graphical parameter
2: In text.default(xy$x[leaves], xy$y[leaves] - 0.5 * charht, labels = stat, :
"use.n" is not a graphical parameter
> z.auto <- rpart(Mileage ~ Weight, car.test.frame)
Warning messages:
1: In doTryCatch(return(expr), name, parentenv, handler) :
"use.n" is not a graphical parameter
2: In doTryCatch(return(expr), name, parentenv, handler) :
"use.n" is not a graphical parameter
3: In doTryCatch(return(expr), name, parentenv, handler) :
"use.n" is not a graphical parameter
4: In doTryCatch(return(expr), name, parentenv, handler) :
"use.n" is not a graphical parameter
5: In doTryCatch(return(expr), name, parentenv, handler) :
"use.n" is not a graphical parameter
6: In doTryCatch(return(expr), name, parentenv, handler) :
"use.n" is not a graphical parameter
> meanvar(z.auto, log = 'xy')
> fit <- rpart(Kyphosis ~ Age + Number + Start, data = kyphosis)
> print(fit)
n= 81
node), split, n, loss, yval, (yprob)
* denotes terminal node
1) root 81 17 absent (0.79012346 0.20987654)
2) Start>=8.5 62 6 absent (0.90322581 0.09677419)
4) Start>=14.5 29 0 absent (1.00000000 0.00000000) *
5) Start< 14.5 33 6 absent (0.81818182 0.18181818)
10) Age< 55 12 0 absent (1.00000000 0.00000000) *
11) Age>=55 21 6 absent (0.71428571 0.28571429)
22) Age>=111 14 2 absent (0.85714286 0.14285714) *
23) Age< 111 7 3 present (0.42857143 0.57142857) *
3) Start< 8.5 19 8 present (0.42105263 0.57894737) *
> path.rpart(fit, node = c(11, 22))
node number: 11
root
Start>=8.5
Start< 14.5
Age>=55
node number: 22
root
Start>=8.5
Start< 14.5
Age>=55
Age>=111
> fit <- rpart(Price ~ Mileage + Type + Country, cu.summary)
> par(xpd = TRUE)
> plot(fit, compress = TRUE)
> text(fit, use.n = TRUE)
> z.auto <- rpart(Mileage ~ Weight, car.test.frame)
> post(z.auto, file = "")
> post(z.auto, file = "pretty.ps", title = " ")
> z.hp <- rpart(Mileage ~ Weight + HP, car.test.frame)
> post(z.hp)
> fit <- rpart(Mileage ~ Weight, car.test.frame)
> xmat <- xpred.rpart(fit)
> xerr <- (xmat - car.test.frame$Mileage)^2
> apply(xerr, 2, sum)
0.79767456 0.28300396 0.04154257 0.01132626
1370.3189 780.0400 544.1634 535.9519
> apply(xerr, 2, sum)/var(car.test.frame$Mileage)
0.79767456 0.28300396 0.04154257 0.01132626
59.68538 33.97529 23.70149 23.34383
> printcp(fit)
Regression tree:
rpart(formula = Mileage ~ Weight, data = car.test.frame)
Variables actually used in tree construction:
[1] Weight
Root node error: 1354.6/60 = 22.576
n= 60
CP nsplit rel error xerror xstd
1 0.595349 0 1.00000 1.06870 0.184571
2 0.134528 1 0.40465 0.58480 0.086640
3 0.012828 2 0.27012 0.48111 0.080818
4 0.010000 3 0.25729 0.48100 0.080856
> z.auto <- rpart(Mileage ~ Weight, car.test.frame)
> predict(z.auto)
Eagle Summit 4
30.93333
Ford Escort 4
30.93333
Ford Festiva 4
30.93333
Honda Civic 4
30.93333
Mazda Protege 4
30.93333
Mercury Tracer 4
30.93333
Nissan Sentra 4
30.93333
Pontiac LeMans 4
30.93333
Subaru Loyale 4
30.93333
Subaru Justy 3
30.93333
Toyota Corolla 4
30.93333
Toyota Tercel 4
30.93333
Volkswagen Jetta 4
30.93333
Chevrolet Camaro V8
20.40909
Dodge Daytona
23.80000
Ford Mustang V8
20.40909
Ford Probe
25.62500
Honda Civic CRX Si 4
30.93333
Honda Prelude Si 4WS 4
25.62500
Nissan 240SX 4
23.80000
Plymouth Laser
23.80000
Subaru XT 4
30.93333
Audi 80 4
25.62500
Buick Skylark 4
25.62500
Chevrolet Beretta 4
25.62500
Chrysler Le Baron V6
23.80000
Ford Tempo 4
23.80000
Honda Accord 4
23.80000
Mazda 626 4
23.80000
Mitsubishi Galant 4
25.62500
Mitsubishi Sigma V6
20.40909
Nissan Stanza 4
23.80000
Oldsmobile Calais 4
25.62500
Peugeot 405 4
25.62500
Subaru Legacy 4
23.80000
Toyota Camry 4
23.80000
Volvo 240 4
23.80000
Acura Legend V6
20.40909
Buick Century 4
23.80000
Chrysler Le Baron Coupe
23.80000
Chrysler New Yorker V6
20.40909
Eagle Premier V6
20.40909
Ford Taurus V6
20.40909
Ford Thunderbird V6
20.40909
Hyundai Sonata 4
23.80000
Mazda 929 V6
20.40909
Nissan Maxima V6
20.40909
Oldsmobile Cutlass Ciera 4
23.80000
Oldsmobile Cutlass Supreme V6
20.40909
Toyota Cressida 6
20.40909
Buick Le Sabre V6
20.40909
Chevrolet Caprice V8
20.40909
Ford LTD Crown Victoria V8
20.40909
Chevrolet Lumina APV V6
20.40909
Dodge Grand Caravan V6
20.40909
Ford Aerostar V6
20.40909
Mazda MPV V6
20.40909
Mitsubishi Wagon 4
20.40909
Nissan Axxess 4
20.40909
Nissan Van 4
20.40909
> fit <- rpart(Kyphosis ~ Age + Number + Start, data = kyphosis)
> predict(fit, type = "prob")
absent present
1 0.4210526 0.5789474
2 0.8571429 0.1428571
3 0.4210526 0.5789474
4 0.4210526 0.5789474
5 1.0000000 0.0000000
6 1.0000000 0.0000000
7 1.0000000 0.0000000
8 1.0000000 0.0000000
9 1.0000000 0.0000000
10 0.4285714 0.5714286
11 0.4285714 0.5714286
12 1.0000000 0.0000000
13 0.4210526 0.5789474
14 1.0000000 0.0000000
15 1.0000000 0.0000000
16 1.0000000 0.0000000
17 1.0000000 0.0000000
18 0.8571429 0.1428571
19 1.0000000 0.0000000
20 1.0000000 0.0000000
21 1.0000000 0.0000000
22 0.4210526 0.5789474
23 0.4285714 0.5714286
24 0.4210526 0.5789474
25 0.4210526 0.5789474
26 1.0000000 0.0000000
27 0.4210526 0.5789474
28 0.4285714 0.5714286
29 1.0000000 0.0000000
30 1.0000000 0.0000000
31 1.0000000 0.0000000
32 0.8571429 0.1428571
33 0.8571429 0.1428571
34 1.0000000 0.0000000
35 0.8571429 0.1428571
36 1.0000000 0.0000000
37 1.0000000 0.0000000
38 0.4210526 0.5789474
39 1.0000000 0.0000000
40 0.4285714 0.5714286
41 0.4210526 0.5789474
42 1.0000000 0.0000000
43 0.4210526 0.5789474
44 0.4210526 0.5789474
45 1.0000000 0.0000000
46 0.8571429 0.1428571
47 1.0000000 0.0000000
48 0.8571429 0.1428571
49 0.4210526 0.5789474
50 0.8571429 0.1428571
51 0.4285714 0.5714286
52 1.0000000 0.0000000
53 0.4210526 0.5789474
54 1.0000000 0.0000000
55 1.0000000 0.0000000
56 1.0000000 0.0000000
57 1.0000000 0.0000000
58 0.4210526 0.5789474
59 1.0000000 0.0000000
60 0.4285714 0.5714286
61 0.4210526 0.5789474
62 0.4210526 0.5789474
63 0.4210526 0.5789474
64 1.0000000 0.0000000
65 1.0000000 0.0000000
66 1.0000000 0.0000000
67 1.0000000 0.0000000
68 0.8571429 0.1428571
69 1.0000000 0.0000000
70 1.0000000 0.0000000
71 0.8571429 0.1428571
72 0.8571429 0.1428571
73 1.0000000 0.0000000
74 0.8571429 0.1428571
75 1.0000000 0.0000000
76 1.0000000 0.0000000
77 0.8571429 0.1428571
78 1.0000000 0.0000000
79 0.8571429 0.1428571
80 0.4210526 0.5789474
81 1.0000000 0.0000000
> predict(fit, type = "vector")
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
2 1 2 2 1 1 1 1 1 2 2 1 2 1 1 1 1
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
1 1 1 1 2 2 2 2 1 2 2 1 1 1 1 1 1
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
1 1 1 2 1 2 2 1 2 2 1 1 1 1 2 1 2
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
1 2 1 1 1 1 2 1 2 2 2 2 1 1 1 1 1
69 70 71 72 73 74 75 76 77 78 79 80 81
1 1 1 1 1 1 1 1 1 1 1 2 1
> predict(fit, type = "class")
1 2 3 4 5 6
present absent present present absent absent
7 8 9 10 11 12
absent absent absent present present absent
13 14 15 16 17 18
present absent absent absent absent absent
19 20 21 22 23 24
absent absent absent present present present
25 26 27 28 29 30
present absent present present absent absent
31 32 33 34 35 36
absent absent absent absent absent absent
37 38 39 40 41 42
absent present absent present present absent
43 44 45 46 47 48
present present absent absent absent absent
49 50 51 52 53 54
present absent present absent present absent
55 56 57 58 59 60
absent absent absent present absent present
61 62 63 64 65 66
present present present absent absent absent
67 68 69 70 71 72
absent absent absent absent absent absent
73 74 75 76 77 78
absent absent absent absent absent absent
79 80 81
absent present absent
Levels: absent present
> predict(fit, type = "matrix")
[,1] [,2] [,3] [,4] [,5] [,6]
1 2 8 11 0.4210526 0.5789474 0.23456790
2 1 12 2 0.8571429 0.1428571 0.17283951
3 2 8 11 0.4210526 0.5789474 0.23456790
4 2 8 11 0.4210526 0.5789474 0.23456790
5 1 29 0 1.0000000 0.0000000 0.35802469
6 1 29 0 1.0000000 0.0000000 0.35802469
7 1 29 0 1.0000000 0.0000000 0.35802469
8 1 29 0 1.0000000 0.0000000 0.35802469
9 1 29 0 1.0000000 0.0000000 0.35802469
10 2 3 4 0.4285714 0.5714286 0.08641975
11 2 3 4 0.4285714 0.5714286 0.08641975
12 1 29 0 1.0000000 0.0000000 0.35802469
13 2 8 11 0.4210526 0.5789474 0.23456790
14 1 12 0 1.0000000 0.0000000 0.14814815
15 1 29 0 1.0000000 0.0000000 0.35802469
16 1 29 0 1.0000000 0.0000000 0.35802469
17 1 29 0 1.0000000 0.0000000 0.35802469
18 1 12 2 0.8571429 0.1428571 0.17283951
19 1 29 0 1.0000000 0.0000000 0.35802469
20 1 12 0 1.0000000 0.0000000 0.14814815
21 1 29 0 1.0000000 0.0000000 0.35802469
22 2 8 11 0.4210526 0.5789474 0.23456790
23 2 3 4 0.4285714 0.5714286 0.08641975
24 2 8 11 0.4210526 0.5789474 0.23456790
25 2 8 11 0.4210526 0.5789474 0.23456790
26 1 12 0 1.0000000 0.0000000 0.14814815
27 2 8 11 0.4210526 0.5789474 0.23456790
28 2 3 4 0.4285714 0.5714286 0.08641975
29 1 29 0 1.0000000 0.0000000 0.35802469
30 1 29 0 1.0000000 0.0000000 0.35802469
31 1 29 0 1.0000000 0.0000000 0.35802469
32 1 12 2 0.8571429 0.1428571 0.17283951
33 1 12 2 0.8571429 0.1428571 0.17283951
34 1 29 0 1.0000000 0.0000000 0.35802469
35 1 12 2 0.8571429 0.1428571 0.17283951
36 1 29 0 1.0000000 0.0000000 0.35802469
37 1 12 0 1.0000000 0.0000000 0.14814815
38 2 8 11 0.4210526 0.5789474 0.23456790
39 1 12 0 1.0000000 0.0000000 0.14814815
40 2 3 4 0.4285714 0.5714286 0.08641975
41 2 8 11 0.4210526 0.5789474 0.23456790
42 1 12 0 1.0000000 0.0000000 0.14814815
43 2 8 11 0.4210526 0.5789474 0.23456790
44 2 8 11 0.4210526 0.5789474 0.23456790
45 1 29 0 1.0000000 0.0000000 0.35802469
46 1 12 2 0.8571429 0.1428571 0.17283951
47 1 29 0 1.0000000 0.0000000 0.35802469
48 1 12 2 0.8571429 0.1428571 0.17283951
49 2 8 11 0.4210526 0.5789474 0.23456790
50 1 12 2 0.8571429 0.1428571 0.17283951
51 2 3 4 0.4285714 0.5714286 0.08641975
52 1 29 0 1.0000000 0.0000000 0.35802469
53 2 8 11 0.4210526 0.5789474 0.23456790
54 1 29 0 1.0000000 0.0000000 0.35802469
55 1 29 0 1.0000000 0.0000000 0.35802469
56 1 29 0 1.0000000 0.0000000 0.35802469
57 1 12 0 1.0000000 0.0000000 0.14814815
58 2 8 11 0.4210526 0.5789474 0.23456790
59 1 12 0 1.0000000 0.0000000 0.14814815
60 2 3 4 0.4285714 0.5714286 0.08641975
61 2 8 11 0.4210526 0.5789474 0.23456790
62 2 8 11 0.4210526 0.5789474 0.23456790
63 2 8 11 0.4210526 0.5789474 0.23456790
64 1 29 0 1.0000000 0.0000000 0.35802469
65 1 29 0 1.0000000 0.0000000 0.35802469
66 1 12 0 1.0000000 0.0000000 0.14814815
67 1 29 0 1.0000000 0.0000000 0.35802469
68 1 12 2 0.8571429 0.1428571 0.17283951
69 1 12 0 1.0000000 0.0000000 0.14814815
70 1 29 0 1.0000000 0.0000000 0.35802469
71 1 12 2 0.8571429 0.1428571 0.17283951
72 1 12 2 0.8571429 0.1428571 0.17283951
73 1 29 0 1.0000000 0.0000000 0.35802469
74 1 12 2 0.8571429 0.1428571 0.17283951
75 1 29 0 1.0000000 0.0000000 0.35802469
76 1 29 0 1.0000000 0.0000000 0.35802469
77 1 12 2 0.8571429 0.1428571 0.17283951
78 1 12 0 1.0000000 0.0000000 0.14814815
79 1 12 2 0.8571429 0.1428571 0.17283951
80 2 8 11 0.4210526 0.5789474 0.23456790
81 1 12 0 1.0000000 0.0000000 0.14814815
>
0 Comments