KNN on RStudio Studi Kasus

Lakukan perintah dasar pada Rstudio

> getwd()
[1] "C:/Users/ACER/Documents"

> MyData <- read.csv(file="dataTraining.csv", header=TRUE, sep=",")
> View(MyData)
> names(MyData)
[1] "Win.Loss" "Optimism" "Pessimism" "PastUsed" "FutureUsed" "PresentUsed"
[7] "OwnPartyCount" "OppPartyCount" "NumericContent" "Extra" "Emoti" "Agree"
[13] "Consc" "Openn"

> attributes(MyData)
$names
[1] "Win.Loss" "Optimism" "Pessimism" "PastUsed" "FutureUsed" "PresentUsed"
[7] "OwnPartyCount" "OppPartyCount" "NumericContent" "Extra" "Emoti" "Agree"
[13] "Consc" "Openn"

$class
[1] "data.frame"

$row.names
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
[22] 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
[43] 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
[64] 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
[85] 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
[106] 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
[127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147

> dim(MyData)
[1] 1524 14

> typeof(MyData)
[1] "list"

> data1=MyData
> data1$Win.Loss = as.factor(data1$Win.Loss)
> data1$Win.Loss
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[54] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
[107] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1
[160] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[213] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[266] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[319] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[372] 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[425] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[478] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[531] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[584] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[637] 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[690] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[743] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[796] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[849] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[902] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[955] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[ reached getOption("max.print") -- omitted 524 entries ]
Levels: 0 1

> index = createDataPartition(data1$Win.Loss, p = 0.7, list = F )
Error in createDataPartition(data1$Win.Loss, p = 0.7, list = F) :
could not find function "createDataPartition"

#jika ada kesalahan createDataPartition, maka kita butuh import library caret
> library(caret)

> index = createDataPartition(data1$Win.Loss, p = 0.7, list = F )
> dim(index)
[1] 1068 1
> typeof(index)
[1] "integer"

> index[1:30,1]
[1] 1 3 5 7 9 10 11 12 13 14 15 16 18 19 20 22 24 25 29 30 32 33 34 35 36 37 38 39 40 41

#atau bagian ahirnya:

>index

[993,] 1419

[994,] 1420

[995,] 1421

[996,] 1422

[997,] 1423

[998,] 1424

[999,] 1425

[1000,] 1426

[ reached getOption("max.print") -- omitted 68 rows ]

> train = data1[index,]
> train
Win.Loss Optimism Pessimism PastUsed FutureUsed PresentUsed OwnPartyCount OppPartyCount
1 1 0.104504505 0.050450450 0.43814433 0.49484536 0.06701031 2 2
3 1 0.112571898 0.049301561 0.41596639 0.51680672 0.06722689 1 1
5 1 0.105826397 0.051724138 0.33426184 0.58217270 0.08356546 3 4
7 1 0.098382749 0.064016173 0.32407407 0.60185185 0.07407407 6 4
9 1 0.106107341 0.046884639 0.36335404 0.53726708 0.09937888 2 5

> dim(train)
[1] 1068 14

> validation = data1[-index,]
> validation
Win.Loss Optimism Pessimism PastUsed FutureUsed PresentUsed OwnPartyCount OppPartyCount
2 1 0.11457521 0.059236165 0.29126214 0.62135922 0.08737864 1 4
4 1 0.10723350 0.046319797 0.46349206 0.46666667 0.06984127 1 3
6 1 0.07586207 0.034482759 0.28000000 0.52000000 0.20000000 0 0
8 1 0.10377924 0.056388722 0.36927224 0.54986523 0.08086253 2 4

> dim(validation)
[1] 456 14

> head(data1)
Win.Loss Optimism Pessimism PastUsed FutureUsed PresentUsed OwnPartyCount OppPartyCount NumericContent
1 1 0.10450450 0.05045045 0.4381443 0.4948454 0.06701031 2 2 0.001877543
2 1 0.11457521 0.05923617 0.2912621 0.6213592 0.08737864 1 4 0.001418909
3 1 0.11257190 0.04930156 0.4159664 0.5168067 0.06722689 1 1 0.002131163
4 1 0.10723350 0.04631980 0.4634921 0.4666667 0.06984127 1 3 0.001871715
5 1 0.10582640 0.05172414 0.3342618 0.5821727 0.08356546 3 4 0.002229220
6 1 0.07586207 0.03448276 0.2800000 0.5200000 0.20000000 0 0 0.003290827

Extra Emoti Agree Consc Openn
1 4.041 4.049 3.469 2.450 2.548
2 3.446 3.633 3.528 2.402 2.831
3 3.463 4.039 3.284 2.159 2.465
4 4.195 4.661 4.007 2.801 3.067
5 4.658 4.023 3.283 2.415 2.836
6 2.843 3.563 3.075 1.769 1.479

> levels(train$Win.Loss) <- make.names(levels(factor(train$Win.Loss)))
> levels(train$Win.Loss)
[1] "X0" "X1"

> head(validation)
Win.Loss Optimism Pessimism PastUsed FutureUsed PresentUsed OwnPartyCount OppPartyCount NumericContent
2 1 0.11457521 0.05923617 0.2912621 0.6213592 0.08737864 1 4 0.001418909
4 1 0.10723350 0.04631980 0.4634921 0.4666667 0.06984127 1 3 0.001871715
6 1 0.07586207 0.03448276 0.2800000 0.5200000 0.20000000 0 0 0.003290827
8 1 0.10377924 0.05638872 0.3692722 0.5498652 0.08086253 2 4 0.002215028
17 1 0.11289199 0.05505227 0.3891051 0.5214008 0.08949416 2 7 0.001165647
21 1 0.11466373 0.03858875 0.2736842 0.6210526 0.10526316 1 7 0.003105161
Extra Emoti Agree Consc Openn
2 3.446 3.633 3.528 2.402 2.831
4 4.195 4.661 4.007 2.801 3.067
6 2.843 3.563 3.075 1.769 1.479
8 4.027 4.631 3.920 2.417 2.291
17 4.086 4.173 3.368 2.348 2.412
21 3.770 3.858 2.874 1.949 2.006

> levels(validation$Win.Loss) <- make.names(levels(factor(validation$Win.Loss)))
> levels(validation$Win.Loss)
[1] "X0" "X1"

#Membuat Method

> repeats = 3
> numbers = 10
> tunel = 10
> set.seed(1234)
> x = trainControl(method = "repeatedcv",number = numbers,repeats = repeats,classProbs = TRUE,summaryFunction = twoClassSummary)
> dim(x)
NULL
> x
$method
[1] "repeatedcv"

$number
[1] 10

$repeats
[1] 3

$search
[1] "grid"

$p
[1] 0.75

$initialWindow
NULL

$horizon
[1] 1

$fixedWindow
[1] TRUE

$skip
[1] 0

$verboseIter
[1] FALSE

$returnData
[1] TRUE

$returnResamp
[1] "final"

$savePredictions
[1] FALSE

$classProbs
[1] TRUE

$summaryFunction
function (data, lev = NULL, model = NULL)
{
lvls <- levels(data$obs)
if (length(lvls) > 2)
stop(paste("Your outcome has", length(lvls), "levels. The twoClassSummary() function isn't appropriate."))
requireNamespaceQuietStop("ModelMetrics")
if (!all(levels(data[, "pred"]) == lvls))
stop("levels of observed and predicted data do not match")
rocAUC <- ModelMetrics::auc(ifelse(data$obs == lev[2], 0,
1), data[, lvls[1]])
out <- c(rocAUC, sensitivity(data[, "pred"], data[, "obs"],
lev[1]), specificity(data[, "pred"], data[, "obs"], lev[2]))
names(out) <- c("ROC", "Sens", "Spec")
out
}
<environment: namespace:caret>

$selectionFunction
[1] "best"

$preProcOptions
$preProcOptions$thresh
[1] 0.95

$preProcOptions$ICAcomp
[1] 3

$preProcOptions$k
[1] 5

$preProcOptions$freqCut
[1] 19

$preProcOptions$uniqueCut
[1] 10

$preProcOptions$cutoff
[1] 0.9

$sampling
NULL

$index
NULL

$indexOut
NULL

$indexFinal
NULL

$timingSamps
[1] 0

$predictionBounds
[1] FALSE FALSE

$seeds
[1] NA

$adaptive
$adaptive$min
[1] 5

$adaptive$alpha
[1] 0.05

$adaptive$method
[1] "gls"

$adaptive$complete
[1] TRUE

$trim
[1] FALSE

$allowParallel
[1] TRUE

> model1 <- train(Win.Loss~. , data = train, method = "knn",preProcess = c("center","scale"),trControl = x,metric = "ROC",tuneLength = tunel)
> model1
k-Nearest Neighbors

1068 samples
13 predictor
2 classes: 'X0', 'X1'

Pre-processing: centered (13), scaled (13)
Resampling: Cross-Validated (10 fold, repeated 3 times)
Summary of sample sizes: 962, 961, 961, 961, 961, 961, ...
Resampling results across tuning parameters:

k ROC Sens Spec
5 0.8387421 0.6956446 0.8320591
7 0.8465032 0.6686218 0.8499611
9 0.8435422 0.6654665 0.8417793
11 0.8448189 0.6620790 0.8453846
13 0.8482900 0.6532133 0.8561150
15 0.8486402 0.6397019 0.8586946
17 0.8441997 0.6324623 0.8653380
19 0.8407904 0.6196864 0.8658042
21 0.8411525 0.6236934 0.8688967
23 0.8411559 0.6165118 0.8714530

ROC was used to select the optimal model using the largest value.
The final value used for the model was k = 15.
> plot(model1)

Nilai max saat k = 11, artinya KNN mencapai nilai tertinggi K-11

> data1[11,1]
[1] 1

Kategori 1

Cat:

> data1[1:5,1:6]
  Win.Loss  Optimism  Pessimism  PastUsed FutureUsed PresentUsed
1        1 0.1045045 0.05045045 0.4381443  0.4948454  0.06701031
2        1 0.1145752 0.05923617 0.2912621  0.6213592  0.08737864
3        1 0.1125719 0.04930156 0.4159664  0.5168067  0.06722689
4        1 0.1072335 0.04631980 0.4634921  0.4666667  0.06984127
5        1 0.1058264 0.05172414 0.3342618  0.5821727  0.08356546
> train[1:5,1:6]
  Win.Loss   Optimism  Pessimism  PastUsed FutureUsed PresentUsed
1       X1 0.10450450 0.05045045 0.4381443  0.4948454  0.06701031
3       X1 0.11257190 0.04930156 0.4159664  0.5168067  0.06722689
5       X1 0.10582640 0.05172414 0.3342618  0.5821727  0.08356546
7       X1 0.09838275 0.06401617 0.3240741  0.6018519  0.07407407
9       X1 0.10610734 0.04688464 0.3633540  0.5372671  0.09937888
> validation[1:5,1:6]
   Win.Loss   Optimism  Pessimism  PastUsed FutureUsed PresentUsed
2        X1 0.11457521 0.05923617 0.2912621  0.6213592  0.08737864
4        X1 0.10723350 0.04631980 0.4634921  0.4666667  0.06984127
6        X1 0.07586207 0.03448276 0.2800000  0.5200000  0.20000000
8        X1 0.10377924 0.05638872 0.3692722  0.5498652  0.08086253
17       X1 0.11289199 0.05505227 0.3891051  0.5214008  0.08949416
>

NB2:

library(ROCR)
Error in library(ROCR) : there is no package called ‘ROCR’
> install.packages("ROCR",dep=T)

+++++++++++++++++++++STUDI KASUS LAIN

download datalatih: Nodal Involvement in Prostate Cancer

Cari Blog Ini

R Studio Oh R Studio

KNN on RStudio Studi Kasus

Komentar

Posting Komentar

Postingan populer dari blog ini

FIltering DataFrame Manual

Data Frame