Kmeans Rstudio
> library(datasets)
> head(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
> tail(iris,15)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
136 7.7 3.0 6.1 2.3 virginica
137 6.3 3.4 5.6 2.4 virginica
138 6.4 3.1 5.5 1.8 virginica
139 6.0 3.0 4.8 1.8 virginica
140 6.9 3.1 5.4 2.1 virginica
141 6.7 3.1 5.6 2.4 virginica
142 6.9 3.1 5.1 2.3 virginica
143 5.8 2.7 5.1 1.9 virginica
144 6.8 3.2 5.9 2.3 virginica
145 6.7 3.3 5.7 2.5 virginica
146 6.7 3.0 5.2 2.3 virginica
147 6.3 2.5 5.0 1.9 virginica
148 6.5 3.0 5.2 2.0 virginica
149 6.2 3.4 5.4 2.3 virginica
150 5.9 3.0 5.1 1.8 virginica
> library("ggplot2")
#melihat data irus secara tabel clustering
>ggplot(iris, aes(Petal.Length, Petal.Width, color = Species)) + geom_point()
> I=iris[, 3:4] #menampilkan semua data kolom ke 3 dan ke 4[ Petal.Length Petal.Width] Petal.Length Petal.Width
1 1.4 0.2
2 1.4 0.2
3 1.3 0.2
4 1.5 0.2
5 1.4 0.2
6 1.7 0.4
7 1.4 0.3
8 1.5 0.2
9 1.4 0.2
10 1.5 0.1
11 1.5 0.2
12 1.6 0.2
13 1.4 0.1
14 1.1 0.1
15 1.2 0.2
16 1.5 0.4
17 1.3 0.4
18 1.4 0.3
19 1.7 0.3
20 1.5 0.3
21 1.7 0.2
22 1.5 0.4
23 1.0 0.2
24 1.7 0.5
25 1.9 0.2
26 1.6 0.2
27 1.6 0.4
28 1.5 0.2
29 1.4 0.2
30 1.6 0.2
31 1.6 0.2
32 1.5 0.4
33 1.5 0.1
34 1.4 0.2
35 1.5 0.2
36 1.2 0.2
37 1.3 0.2
38 1.4 0.1
39 1.3 0.2
40 1.5 0.2
41 1.3 0.3
42 1.3 0.3
43 1.3 0.2
44 1.6 0.6
45 1.9 0.4
46 1.4 0.3
47 1.6 0.2
48 1.4 0.2
49 1.5 0.2
50 1.4 0.2
51 4.7 1.4
52 4.5 1.5
53 4.9 1.5
54 4.0 1.3
55 4.6 1.5
56 4.5 1.3
57 4.7 1.6
58 3.3 1.0
59 4.6 1.3
60 3.9 1.4
61 3.5 1.0
62 4.2 1.5
63 4.0 1.0
64 4.7 1.4
65 3.6 1.3
66 4.4 1.4
67 4.5 1.5
68 4.1 1.0
69 4.5 1.5
70 3.9 1.1
71 4.8 1.8
72 4.0 1.3
73 4.9 1.5
74 4.7 1.2
75 4.3 1.3
76 4.4 1.4
77 4.8 1.4
78 5.0 1.7
79 4.5 1.5
80 3.5 1.0
81 3.8 1.1
82 3.7 1.0
83 3.9 1.2
84 5.1 1.6
85 4.5 1.5
86 4.5 1.6
87 4.7 1.5
88 4.4 1.3
89 4.1 1.3
90 4.0 1.3
91 4.4 1.2
92 4.6 1.4
93 4.0 1.2
94 3.3 1.0
95 4.2 1.3
96 4.2 1.2
97 4.2 1.3
98 4.3 1.3
99 3.0 1.1
100 4.1 1.3
101 6.0 2.5
102 5.1 1.9
103 5.9 2.1
104 5.6 1.8
105 5.8 2.2
106 6.6 2.1
107 4.5 1.7
108 6.3 1.8
109 5.8 1.8
110 6.1 2.5
111 5.1 2.0
112 5.3 1.9
113 5.5 2.1
114 5.0 2.0
115 5.1 2.4
116 5.3 2.3
117 5.5 1.8
118 6.7 2.2
119 6.9 2.3
120 5.0 1.5
121 5.7 2.3
122 4.9 2.0
123 6.7 2.0
124 4.9 1.8
125 5.7 2.1
126 6.0 1.8
127 4.8 1.8
128 4.9 1.8
129 5.6 2.1
130 5.8 1.6
131 6.1 1.9
132 6.4 2.0
133 5.6 2.2
134 5.1 1.5
135 5.6 1.4
136 6.1 2.3
137 5.6 2.4
138 5.5 1.8
139 4.8 1.8
140 5.4 2.1
141 5.6 2.4
142 5.1 2.3
143 5.1 1.9
144 5.9 2.3
145 5.7 2.5
146 5.2 2.3
147 5.0 1.9
148 5.2 2.0
149 5.4 2.3
150 5.1 1.8
> irisCluster <- kmeans(I, 3, nstart = 20)
> typeof(irisCluster)
[1] "list"
> irisCluster
K-means clustering with 3 clusters of sizes 50, 52, 48
Cluster means:
Petal.Length Petal.Width
1 1.462000 0.246000
2 4.269231 1.342308
3 5.595833 2.037500
Clustering vector:
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2
[60] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3
[119] 3 2 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3
Within cluster sum of squares by cluster:
[1] 2.02200 13.05769 16.29167
(between_SS / total_SS = 94.3 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss" "betweenss" "size" "iter" "ifault"
################################
isi data:
> irisCluster$cluster
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3
[60] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2
[119] 2 3 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2
> iris$Species
[1] setosa setosa setosa setosa setosa setosa setosa setosa setosa setosa
[11] setosa setosa setosa setosa setosa setosa setosa setosa setosa setosa
[21] setosa setosa setosa setosa setosa setosa setosa setosa setosa setosa
[31] setosa setosa setosa setosa setosa setosa setosa setosa setosa setosa
[41] setosa setosa setosa setosa setosa setosa setosa setosa setosa setosa
[51] versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor
[61] versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor
[71] versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor
[81] versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor
[91] versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor versicolor
[101] virginica virginica virginica virginica virginica virginica virginica virginica virginica virginica
[111] virginica virginica virginica virginica virginica virginica virginica virginica virginica virginica
[121] virginica virginica virginica virginica virginica virginica virginica virginica virginica virginica
[131] virginica virginica virginica virginica virginica virginica virginica virginica virginica virginica
[141] virginica virginica virginica virginica virginica virginica virginica virginica virginica virginica
Levels: setosa versicolor virginica
>
################################
> table(irisCluster$cluster, iris$Species)
setosa versicolor virginica
1 50 0 0
2 0 48 4
3 0 2 46
> bakso<- as.factor(irisCluster$cluster)
> bakso
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2
[60] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3
[119] 3 2 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3
Levels: 1 2 3
>ggplot(iris, aes(Petal.Length, Petal.Width, color = bakso)) + geom_point()
Ohhhhhh mudah nyaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa....
NB VIEW :
iris[1:5,] #baris 1 s/d 5 semua kolom
iris[1:5] #semua baris, kolom 1 s/d 5
str(iris) #struktur
summary(iris) #statistik summary
dim(iris) #dimensi
names(iris) #nama kolom
attributes(iris) #$names, $row.names, $class =(data.frame)
iris$Sepal.Length[1:10] #membaca data kolom 1 sd 10
hist(iris$Sepal.Width) #histogram
table(iris$Species) #data header dan summary tabel isi
pie(table(iris$Species)) #tabel summary isi
virginica <- filter(iris, species == "virginica") #filtering virginica only
sepalLength6 <- filter="" iris="" sepal.length="" species="=" virginica=""> 6) #filter length>6 dan virginica only
selected <- select(iris, sepal.length, sepal.width, petal.length) #filtering
selected2 <- select(iris, sepal.length:petal.length) #filter range
identical(selected, selected2) #cek kesamaan TRUE/FALSE
newCol <- mutate(iris, COMPARENYA = sepal.width > 0.5 * sepal.length) #variabel baru u kolom
tabelBaru <- arrange(newCol, petal.width)
plot(iris)
plot(iris$sepal.width, iris$sepal.length)
hist(iris$sepal.width)

Komentar
Posting Komentar