r - Variation on "How to plot decision boundary of a k-nearest neighbor classifier from Elements of Statistical Learning?" -

- June 15, 2012

this question related https://stats.stackexchange.com/questions/21572/how-to-plot-decision-boundary-of-a-k-nearest-neighbor-classifier-from-elements-o

for completeness, here's original example link:

library(elemstatlearn) require(class) x <- mixture.example$x g <- mixture.example$y xnew <- mixture.example$xnew mod15 <- knn(x, xnew, g, k=15, prob=true) prob <- attr(mod15, "prob") prob <- ifelse(mod15=="1", prob, 1-prob) px1 <- mixture.example$px1 px2 <- mixture.example$px2 prob15 <- matrix(prob, length(px1), length(px2)) par(mar=rep(2,4)) contour(px1, px2, prob15, levels=0.5, labels="", xlab="", ylab="", main=         "15-nearest neighbour", axes=false) points(x, col=ifelse(g==1, "coral", "cornflowerblue")) gd <- expand.grid(x=px1, y=px2) points(gd, pch=".", cex=1.2, col=ifelse(prob15>0.5, "coral", "cornflowerblue")) box()

i've been playing example, , try make work 3 classes. can change values of g like

g[8:16] <- 2

just pretend there samples third class. can't make plot work, though. guess need change lines deal proportion of votes winning class:

prob <- attr(mod15, "prob") prob <- ifelse(mod15=="1", prob, 1-prob)

and levels on contour:

contour(px1, px2, prob15, levels=0.5, labels="", xlab="", ylab="", main= "15-nearest neighbour", axes=false)

i not sure contour right tool this. 1 alternative works create matrix of data covers region i'm interested, classify each point of matrix , plot large marker , different colors, similar being done points(gd...) bit.

the final purpose able show different decision boundaries generated different classifiers. can point me right direction?

thanks rafael

separating main parts in code outlining how achieve this:

test data 3 classes

 train <- rbind(iris3[1:25,1:2,1],                 iris3[1:25,1:2,2],                 iris3[1:25,1:2,3])  cl <- factor(c(rep("s",25), rep("c",25), rep("v",25)))

test data covering grid

 require(mass)   test <- expand.grid(x=seq(min(train[,1]-1), max(train[,1]+1),                            by=0.1),                      y=seq(min(train[,2]-1), max(train[,2]+1),                             by=0.1))

classification grid

3 classes obviously

 require(class)  classif <- knn(train, test, cl, k = 3, prob=true)  prob <- attr(classif, "prob")

data structure plotting

 require(dplyr)   dataf <- bind_rows(mutate(test,                            prob=prob,                            cls="c",                            prob_cls=ifelse(classif==cls,                                            1, 0)),                     mutate(test,                            prob=prob,                            cls="v",                            prob_cls=ifelse(classif==cls,                                            1, 0)),                     mutate(test,                            prob=prob,                            cls="s",                            prob_cls=ifelse(classif==cls,                                            1, 0)))

plot

 require(ggplot2)  ggplot(dataf) +     geom_point(aes(x=x, y=y, col=cls),                data = mutate(test, cls=classif),                size=1.2) +      geom_contour(aes(x=x, y=y, z=prob_cls, group=cls, color=cls),                  bins=2,                  data=dataf) +     geom_point(aes(x=x, y=y, col=cls),                size=3,                data=data.frame(x=train[,1], y=train[,2], cls=cl))

plot

we can little fancier , plot probability of class membership indication of "confidence".

 ggplot(dataf) +     geom_point(aes(x=x, y=y, col=cls, size=prob),                data = mutate(test, cls=classif)) +      scale_size(range=c(0.8, 2)) +     geom_contour(aes(x=x, y=y, z=prob_cls, group=cls, color=cls),                  bins=2,                  data=dataf) +     geom_point(aes(x=x, y=y, col=cls),                size=3,                data=data.frame(x=train[,1], y=train[,2], cls=cl)) +     geom_point(aes(x=x, y=y),                size=3, shape=1,                data=data.frame(x=train[,1], y=train[,2], cls=cl))

enter image description here

Search This Blog

Current CAD

r - Variation on "How to plot decision boundary of a k-nearest neighbor classifier from Elements of Statistical Learning?" -

Comments

Post a Comment

Popular posts from this blog

python - argument must be rect style object - Pygame -

c++ - Qt setGeometry: Unable to set geometry -

php - Zend Framework / Skeleton-Application / Composer install issue -