[=> Case Study] | 8.3 Case Study
(-) Create exemplary dataset.
# Generate data
mydatc8_case <- data.frame(Flavor = c("Milk", "Espresso", "Biscuit", "Orange", "Strawberry", "Mango", "Cappuccino", "Mousse", "Caramel", "Nougat", "Nut"),
Price = c(4.5, 5.1667, 5.0588, 3.8, 3.4444, 3.5, 5.25, 5.8571, 5.0833, 5.2727, 4.5),
Refreshing = c(4, 4.25, 3.8235, 5.4, 5.0556, 3.5, 3.4167, 4.4286, 4.0833, 3.6, 4),
Delicious = c(4.375, 3.8333, 4.7647, 3.8, 3.7778, 3.875, 4.5833, 4.9286, 4.6667, 3.9091, 4.2),
Healthy = c(3.875, 3.8333, 3.4375, 2.4, 3.7647, 4, 3.9167, 3.8571, 4, 4.0909, 3.9),
Bitter = c(3.25, 2.1667, 4.2353, 5, 3.9444, 4.625, 4.3333, 4.0714, 4, 4.0909, 3.7),
Light = c(3.75, 3.75, 4.4706, 5, 5.3889, 5.25, 4.4167, 5.0714, 4.25, 4.0909, 3.9),
Crunchy = c(4, 3.2727, 3.7647, 5, 5.0556, 5.5, 4.6667, 2.9286, 3.8182, 4.5455, 3.6),
Exotic = c(2.375, 2.3333, 2.7059, 4.4, 4.9444, 6, 3.6667, 2.0909, 1.5455, 1.7273, 2.2),
Sweet = c(4.625, 3.75, 3.5294, 4, 4.2222, 4.75, 4.5, 4.5714, 3.75, 3.9091, 3.5),
Fruity = c(4.125, 3.4167, 3.5294, 4.6, 5.2778, 5.375, 3.5833, 3.7857, 4.1667, 3.8182, 3.7))
print(mydatc8_case)
## Flavor Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic
## 1 Milk 4.5000 4.0000 4.3750 3.8750 3.2500 3.7500 4.0000 2.3750
## 2 Espresso 5.1667 4.2500 3.8333 3.8333 2.1667 3.7500 3.2727 2.3333
## 3 Biscuit 5.0588 3.8235 4.7647 3.4375 4.2353 4.4706 3.7647 2.7059
## 4 Orange 3.8000 5.4000 3.8000 2.4000 5.0000 5.0000 5.0000 4.4000
## 5 Strawberry 3.4444 5.0556 3.7778 3.7647 3.9444 5.3889 5.0556 4.9444
## 6 Mango 3.5000 3.5000 3.8750 4.0000 4.6250 5.2500 5.5000 6.0000
## 7 Cappuccino 5.2500 3.4167 4.5833 3.9167 4.3333 4.4167 4.6667 3.6667
## 8 Mousse 5.8571 4.4286 4.9286 3.8571 4.0714 5.0714 2.9286 2.0909
## 9 Caramel 5.0833 4.0833 4.6667 4.0000 4.0000 4.2500 3.8182 1.5455
## 10 Nougat 5.2727 3.6000 3.9091 4.0909 4.0909 4.0909 4.5455 1.7273
## 11 Nut 4.5000 4.0000 4.2000 3.9000 3.7000 3.9000 3.6000 2.2000
## Sweet Fruity
## 1 4.6250 4.1250
## 2 3.7500 3.4167
## 3 3.5294 3.5294
## 4 4.0000 4.6000
## 5 4.2222 5.2778
## 6 4.7500 5.3750
## 7 4.5000 3.5833
## 8 4.5714 3.7857
## 9 3.7500 4.1667
## 10 3.9091 3.8182
## 11 3.5000 3.7000
(-) Proximity measure # Squared Euclidean distances
(-) Hierarchical clustering # Single Linkage Algorithm
(-) Figure 8.22 - Agglomeration schedule of Ward’s method for the case study
(-) Determine number of clusters
(-) Figure 8.24 - Development of the heterogeneity measure in the case study
# Absolute values differ from SPSS (Height= Value of the heterogeneity measure used, here: variance criterion)
hc_w$merge
## [,1] [,2]
## [1,] -9 -11
## [2,] -10 1
## [3,] -3 2
## [4,] -1 3
## [5,] -4 -5
## [6,] -8 4
## [7,] -7 6
## [8,] -2 7
## [9,] -6 5
## [10,] 8 9
hc_w[2:1]
## $height
## [1] 1.543777 2.152186 2.772340 3.406401 4.181745 6.372982 6.845778
## [8] 8.160512 8.987961 43.415330
##
## $merge
## [,1] [,2]
## [1,] -9 -11
## [2,] -10 1
## [3,] -3 2
## [4,] -1 3
## [5,] -4 -5
## [6,] -8 4
## [7,] -7 6
## [8,] -2 7
## [9,] -6 5
## [10,] 8 9
hc_w$height
## [1] 1.543777 2.152186 2.772340 3.406401 4.181745 6.372982 6.845778
## [8] 8.160512 8.987961 43.415330
clust_steps_data <- data.frame(hc_w[2:1]) %>%
mutate(nclust = rev(seq_along(10:1)),
step = seq(1:10))
print(clust_steps_data)
## height merge.1 merge.2 nclust step
## 1 1.543777 -9 -11 10 1
## 2 2.152186 -10 1 9 2
## 3 2.772340 -3 2 8 3
## 4 3.406401 -1 3 7 4
## 5 4.181745 -4 -5 6 5
## 6 6.372982 -8 4 5 6
## 7 6.845778 -7 6 4 7
## 8 8.160512 -2 7 3 8
## 9 8.987961 -6 5 2 9
## 10 43.415330 8 9 1 10
plot(clust_steps_data$nclust,clust_steps_data$height, type = "b",
xlab = "Number of clusters",
ylab = "Scaled Height",
cex.main = 1,
cex.lab = 1,
cex.axis = 1);
axis(side = 1, at = seq(1, 10, 1))
(-) 2 Cluster Solution
# Hierarchical clustering with Ward method
hc_w <- hclust(distdata, method = "ward.D2")
plot(hc_w,
ylab = "Scaled Height",
cex.main = 1,
cex.lab = 1,
cex.axis = 1)
# Add names of flavors with numbers as labels
hc_w$labels <- mydatc8_case$Flavor
# 2 Cluster Solution (k = 2)
plot(hc_w,
ylab = "Scaled Height",
cex.main = 1,
cex.lab = 1,
cex.axis = 1);
rect.hclust(hc_w, k = 2, border = "red")
(-) Cluster assignment for 2,3,4,5 cluster solution
(-) Optimization of the cluster solution with K-Means
(-) Figure 8.26 - Cluster membership and final cluster centers according to the k-means method
# Select K
k <- list()
input <- mydatc8_case
for(i in 1:10){
k[[i]] <- kmeans(mydatc8_case[, c(-1)], i)
}
print(k)
## [[1]]
## K-means clustering with 1 clusters of sizes 11
##
## Cluster means:
## Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic
## 1 4.675727 4.141609 4.246682 3.734109 3.947 4.485318 4.195636 3.089909
## Sweet Fruity
## 1 4.100645 4.125255
##
## Clustering vector:
## [1] 1 1 1 1 1 1 1 1 1 1 1
##
## Within cluster sum of squares by cluster:
## [1] 58.14504
## (between_SS / total_SS = 0.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
##
## [[2]]
## K-means clustering with 2 clusters of sizes 8, 3
##
## Cluster means:
## Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic
## 1 5.086075 3.950263 4.407587 3.863812 3.730950 4.212450 3.82455 2.330575
## 2 3.581467 4.651867 3.817600 3.388233 4.523133 5.212967 5.18520 5.114800
## Sweet Fruity
## 1 4.016863 3.765625
## 2 4.324067 5.084267
##
## Clustering vector:
## [1] 1 1 1 2 2 2 1 1 1 1 1
##
## Within cluster sum of squares by cluster:
## [1] 15.97533 6.39791
## (between_SS / total_SS = 61.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
##
## [[3]]
## K-means clustering with 3 clusters of sizes 3, 1, 7
##
## Cluster means:
## Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic
## 1 3.581467 4.651867 3.817600 3.388233 4.523133 5.212967 5.185200 5.114800
## 2 5.166700 4.250000 3.833300 3.833300 2.166700 3.750000 3.272700 2.333300
## 3 5.074557 3.907443 4.489629 3.868171 3.954414 4.278514 3.903386 2.330186
## Sweet Fruity
## 1 4.324067 5.084267
## 2 3.750000 3.416700
## 3 4.054986 3.815471
##
## Clustering vector:
## [1] 3 2 3 1 1 1 3 3 3 3 3
##
## Within cluster sum of squares by cluster:
## [1] 6.39791 0.00000 11.87781
## (between_SS / total_SS = 68.6 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
##
## [[4]]
## K-means clustering with 4 clusters of sizes 1, 5, 3, 2
##
## Cluster means:
## Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic
## 1 3.500000 3.500000 3.87500 4.000000 4.62500 5.25000 5.500000 6.000000
## 2 5.304380 3.870420 4.57048 3.860440 4.14618 4.45992 3.944740 2.347260
## 3 4.722233 4.083333 4.13610 3.869433 3.03890 3.80000 3.624233 2.302767
## 4 3.622200 5.227800 3.78890 3.082350 4.47220 5.19445 5.027800 4.672200
## Sweet Fruity
## 1 4.750000 5.375000
## 2 4.051980 3.776660
## 3 3.958333 3.747233
## 4 4.111100 4.938900
##
## Clustering vector:
## [1] 3 3 2 4 4 1 2 2 2 2 3
##
## Within cluster sum of squares by cluster:
## [1] 0.000000 8.586505 2.984639 2.090872
## (between_SS / total_SS = 76.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
##
## [[5]]
## K-means clustering with 5 clusters of sizes 1, 1, 1, 3, 5
##
## Cluster means:
## Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic
## 1 5.857100 4.428600 4.9286 3.857100 4.071400 5.071400 2.92860 2.09090
## 2 5.250000 3.416700 4.5833 3.916700 4.333300 4.416700 4.66670 3.66670
## 3 5.166700 4.250000 3.8333 3.833300 2.166700 3.750000 3.27270 2.33330
## 4 3.581467 4.651867 3.8176 3.388233 4.523133 5.212967 5.18520 5.11480
## 5 4.882960 3.901360 4.3831 3.860680 3.855240 4.092300 3.94568 2.11074
## Sweet Fruity
## 1 4.571400 3.785700
## 2 4.500000 3.583300
## 3 3.750000 3.416700
## 4 4.324067 5.084267
## 5 3.862700 3.867860
##
## Clustering vector:
## [1] 5 3 5 4 4 4 2 1 5 5 5
##
## Within cluster sum of squares by cluster:
## [1] 0.000000 0.000000 0.000000 6.397910 4.905746
## (between_SS / total_SS = 80.6 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
##
## [[6]]
## K-means clustering with 6 clusters of sizes 4, 1, 1, 1, 1, 3
##
## Cluster means:
## Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic
## 1 4.978700 3.876700 4.385125 3.857100 4.006550 4.177875 3.9321 2.044675
## 2 5.857100 4.428600 4.928600 3.857100 4.071400 5.071400 2.9286 2.090900
## 3 5.166700 4.250000 3.833300 3.833300 2.166700 3.750000 3.2727 2.333300
## 4 4.500000 4.000000 4.375000 3.875000 3.250000 3.750000 4.0000 2.375000
## 5 5.250000 3.416700 4.583300 3.916700 4.333300 4.416700 4.6667 3.666700
## 6 3.581467 4.651867 3.817600 3.388233 4.523133 5.212967 5.1852 5.114800
## Sweet Fruity
## 1 3.672125 3.803575
## 2 4.571400 3.785700
## 3 3.750000 3.416700
## 4 4.625000 4.125000
## 5 4.500000 3.583300
## 6 4.324067 5.084267
##
## Clustering vector:
## [1] 4 3 1 6 6 6 5 2 1 1 1
##
## Within cluster sum of squares by cluster:
## [1] 3.205558 0.000000 0.000000 0.000000 0.000000 6.397910
## (between_SS / total_SS = 83.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
##
## [[7]]
## K-means clustering with 7 clusters of sizes 1, 2, 1, 1, 1, 1, 4
##
## Cluster means:
## Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic
## 1 5.0588 3.823500 4.7647 3.437500 4.235300 4.470600 3.764700 2.70590
## 2 3.6222 5.227800 3.7889 3.082350 4.472200 5.194450 5.027800 4.67220
## 3 5.1667 4.250000 3.8333 3.833300 2.166700 3.750000 3.272700 2.33330
## 4 5.2500 3.416700 4.5833 3.916700 4.333300 4.416700 4.666700 3.66670
## 5 3.5000 3.500000 3.8750 4.000000 4.625000 5.250000 5.500000 6.00000
## 6 5.8571 4.428600 4.9286 3.857100 4.071400 5.071400 2.928600 2.09090
## 7 4.8390 3.920825 4.2877 3.966475 3.760225 3.997725 3.990925 1.96195
## Sweet Fruity
## 1 3.529400 3.529400
## 2 4.111100 4.938900
## 3 3.750000 3.416700
## 4 4.500000 3.583300
## 5 4.750000 5.375000
## 6 4.571400 3.785700
## 7 3.946025 3.952475
##
## Clustering vector:
## [1] 7 3 1 2 2 5 4 6 7 7 7
##
## Within cluster sum of squares by cluster:
## [1] 0.000000 2.090872 0.000000 0.000000 0.000000 0.000000 3.328431
## (between_SS / total_SS = 90.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
##
## [[8]]
## K-means clustering with 8 clusters of sizes 1, 1, 1, 1, 1, 1, 1, 4
##
## Cluster means:
## Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic
## 1 5.2500 3.4167 4.583300 3.9167 4.33330 4.416700 4.6667 3.666700
## 2 3.4444 5.0556 3.777800 3.7647 3.94440 5.388900 5.0556 4.944400
## 3 4.5000 4.0000 4.375000 3.8750 3.25000 3.750000 4.0000 2.375000
## 4 3.8000 5.4000 3.800000 2.4000 5.00000 5.000000 5.0000 4.400000
## 5 3.5000 3.5000 3.875000 4.0000 4.62500 5.250000 5.5000 6.000000
## 6 5.8571 4.4286 4.928600 3.8571 4.07140 5.071400 2.9286 2.090900
## 7 5.1667 4.2500 3.833300 3.8333 2.16670 3.750000 3.2727 2.333300
## 8 4.9787 3.8767 4.385125 3.8571 4.00655 4.177875 3.9321 2.044675
## Sweet Fruity
## 1 4.500000 3.583300
## 2 4.222200 5.277800
## 3 4.625000 4.125000
## 4 4.000000 4.600000
## 5 4.750000 5.375000
## 6 4.571400 3.785700
## 7 3.750000 3.416700
## 8 3.672125 3.803575
##
## Clustering vector:
## [1] 3 7 8 4 2 5 1 6 8 8 8
##
## Within cluster sum of squares by cluster:
## [1] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.205558
## (between_SS / total_SS = 94.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
##
## [[9]]
## K-means clustering with 9 clusters of sizes 1, 1, 1, 1, 1, 1, 1, 3, 1
##
## Cluster means:
## Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic
## 1 5.058800 3.823500 4.7647 3.437500 4.235300 4.470600 3.7647 2.7059
## 2 5.272700 3.600000 3.9091 4.090900 4.090900 4.090900 4.5455 1.7273
## 3 5.857100 4.428600 4.9286 3.857100 4.071400 5.071400 2.9286 2.0909
## 4 4.500000 4.000000 4.2000 3.900000 3.700000 3.900000 3.6000 2.2000
## 5 4.500000 4.000000 4.3750 3.875000 3.250000 3.750000 4.0000 2.3750
## 6 5.083300 4.083300 4.6667 4.000000 4.000000 4.250000 3.8182 1.5455
## 7 5.166700 4.250000 3.8333 3.833300 2.166700 3.750000 3.2727 2.3333
## 8 3.581467 4.651867 3.8176 3.388233 4.523133 5.212967 5.1852 5.1148
## 9 5.250000 3.416700 4.5833 3.916700 4.333300 4.416700 4.6667 3.6667
## Sweet Fruity
## 1 3.529400 3.529400
## 2 3.909100 3.818200
## 3 4.571400 3.785700
## 4 3.500000 3.700000
## 5 4.625000 4.125000
## 6 3.750000 4.166700
## 7 3.750000 3.416700
## 8 4.324067 5.084267
## 9 4.500000 3.583300
##
## Clustering vector:
## [1] 5 7 1 8 8 8 9 3 6 2 4
##
## Within cluster sum of squares by cluster:
## [1] 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 6.39791 0.00000
## (between_SS / total_SS = 89.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
##
## [[10]]
## K-means clustering with 10 clusters of sizes 2, 1, 1, 1, 1, 1, 1, 1, 1, 1
##
## Cluster means:
## Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic Sweet
## 1 4.79165 4.04165 4.43335 3.9500 3.8500 4.0750 3.7091 1.87275 3.6250
## 2 5.27270 3.60000 3.90910 4.0909 4.0909 4.0909 4.5455 1.72730 3.9091
## 3 5.05880 3.82350 4.76470 3.4375 4.2353 4.4706 3.7647 2.70590 3.5294
## 4 5.16670 4.25000 3.83330 3.8333 2.1667 3.7500 3.2727 2.33330 3.7500
## 5 3.44440 5.05560 3.77780 3.7647 3.9444 5.3889 5.0556 4.94440 4.2222
## 6 3.80000 5.40000 3.80000 2.4000 5.0000 5.0000 5.0000 4.40000 4.0000
## 7 3.50000 3.50000 3.87500 4.0000 4.6250 5.2500 5.5000 6.00000 4.7500
## 8 5.25000 3.41670 4.58330 3.9167 4.3333 4.4167 4.6667 3.66670 4.5000
## 9 5.85710 4.42860 4.92860 3.8571 4.0714 5.0714 2.9286 2.09090 4.5714
## 10 4.50000 4.00000 4.37500 3.8750 3.2500 3.7500 4.0000 2.37500 4.6250
## Fruity
## 1 3.93335
## 2 3.81820
## 3 3.52940
## 4 3.41670
## 5 5.27780
## 6 4.60000
## 7 5.37500
## 8 3.58330
## 9 3.78570
## 10 4.12500
##
## Clustering vector:
## [1] 10 4 3 6 5 7 8 9 1 2 1
##
## Within cluster sum of squares by cluster:
## [1] 0.7718885 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## [8] 0.0000000 0.0000000 0.0000000
## (between_SS / total_SS = 98.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
betweenss_totss <- list()
for(i in 1:10){
betweenss_totss[[i]] <- k[[i]]$betweenss/k[[i]]$totss
}
plot(1:10, betweenss_totss, type = "b",
ylab = "Between SS / Total SS",
xlab = "Clusters (k)",
cex.main = 1,
cex.lab = 1,
cex.axis = 1); axis(side = 1, at = seq(1, 10, 1))
# Calculation of K-MEANS for different k (here: k = 2)
fit_kmeans_K2 <- kmeans(input[, -1],centers = 2, nstart = 25)
# Results: Cluster membership and cluster centers according go the k-means method (k = 2)
summary(fit_kmeans_K2); print(fit_kmeans_K2); print(fit_kmeans_K2$centers)
## Length Class Mode
## cluster 11 -none- numeric
## centers 20 -none- numeric
## totss 1 -none- numeric
## withinss 2 -none- numeric
## tot.withinss 1 -none- numeric
## betweenss 1 -none- numeric
## size 2 -none- numeric
## iter 1 -none- numeric
## ifault 1 -none- numeric
## K-means clustering with 2 clusters of sizes 3, 8
##
## Cluster means:
## Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic
## 1 3.581467 4.651867 3.817600 3.388233 4.523133 5.212967 5.18520 5.114800
## 2 5.086075 3.950263 4.407587 3.863812 3.730950 4.212450 3.82455 2.330575
## Sweet Fruity
## 1 4.324067 5.084267
## 2 4.016863 3.765625
##
## Clustering vector:
## [1] 2 2 2 1 1 1 2 2 2 2 2
##
## Within cluster sum of squares by cluster:
## [1] 6.39791 15.97533
## (between_SS / total_SS = 61.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
## Price Refreshing Delicious Healthy Bitter Light Crunchy Exotic
## 1 3.581467 4.651867 3.817600 3.388233 4.523133 5.212967 5.18520 5.114800
## 2 5.086075 3.950263 4.407587 3.863812 3.730950 4.212450 3.82455 2.330575
## Sweet Fruity
## 1 4.324067 5.084267
## 2 4.016863 3.765625
(-) Figure 8.27 - ANOVA table of the k-means clustering method
# ANOVA
kmeans_df <- data.frame(t(fit_kmeans_K2$centers)) %>% rownames_to_column(., var = "Criterion")
colnames(kmeans_df)[1:3] <- c("Criterion", "Cluster01", "Cluster02")
print(kmeans_df)
## Criterion Cluster01 Cluster02
## 1 Price 3.581467 5.086075
## 2 Refreshing 4.651867 3.950263
## 3 Delicious 3.817600 4.407587
## 4 Healthy 3.388233 3.863812
## 5 Bitter 4.523133 3.730950
## 6 Light 5.212967 4.212450
## 7 Crunchy 5.185200 3.824550
## 8 Exotic 5.114800 2.330575
## 9 Sweet 4.324067 4.016863
## 10 Fruity 5.084267 3.765625
kmeans_df2 <- mydatc8_case %>%
mutate(Cluster = fit_kmeans_K2$cluster)
# Run all single ANOVAs (e.g., Price, Refreshing)
paste("Price"); summary(aov(kmeans_df2$Price~kmeans_df2$Cluster));
## [1] "Price"
## Df Sum Sq Mean Sq F value Pr(>F)
## kmeans_df2$Cluster 1 4.939 4.939 31.23 0.000339 ***
## Residuals 9 1.424 0.158
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
paste("Refreshing"); summary(aov(kmeans_df2$Refreshing~kmeans_df2$Cluster));
## [1] "Refreshing"
## Df Sum Sq Mean Sq F value Pr(>F)
## kmeans_df2$Cluster 1 1.074 1.0740 3.435 0.0968 .
## Residuals 9 2.814 0.3127
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ...
# Run all single ANOVAs at once and print output at once
formulae <- lapply(colnames(kmeans_df2)[2:ncol(kmeans_df2)], function(x) as.formula(paste0(x, " ~ Cluster")))
cluster_aovres <- lapply(formulae, function(x) summary(aov(x, data = kmeans_df2)))
## Warning in model.matrix.default(mt, mf, contrasts): the response appeared on the
## right-hand side and was dropped
## Warning in model.matrix.default(mt, mf, contrasts): problem with term 1 in
## model.matrix: no columns are assigned
names(cluster_aovres) <- format(formulae)
print(cluster_aovres)
## $`Price ~ Cluster`
## Df Sum Sq Mean Sq F value Pr(>F)
## Cluster 1 4.939 4.939 31.23 0.000339 ***
## Residuals 9 1.424 0.158
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Refreshing ~ Cluster`
## Df Sum Sq Mean Sq F value Pr(>F)
## Cluster 1 1.074 1.0740 3.435 0.0968 .
## Residuals 9 2.814 0.3127
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Delicious ~ Cluster`
## Df Sum Sq Mean Sq F value Pr(>F)
## Cluster 1 0.7595 0.7595 6.078 0.0358 *
## Residuals 9 1.1246 0.1250
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Healthy ~ Cluster`
## Df Sum Sq Mean Sq F value Pr(>F)
## Cluster 1 0.4935 0.4935 2.538 0.146
## Residuals 9 1.7497 0.1944
##
## $`Bitter ~ Cluster`
## Df Sum Sq Mean Sq F value Pr(>F)
## Cluster 1 1.369 1.3692 2.943 0.12
## Residuals 9 4.187 0.4652
##
## $`Light ~ Cluster`
## Df Sum Sq Mean Sq F value Pr(>F)
## Cluster 1 2.184 2.1841 13.41 0.00521 **
## Residuals 9 1.465 0.1628
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Crunchy ~ Cluster`
## Df Sum Sq Mean Sq F value Pr(>F)
## Cluster 1 4.039 4.039 14.14 0.00448 **
## Residuals 9 2.571 0.286
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Exotic ~ Cluster`
## Df Sum Sq Mean Sq F value Pr(>F)
## Cluster 1 16.913 16.913 35.35 0.000217 ***
## Residuals 9 4.306 0.478
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Sweet ~ Cluster`
## Df Sum Sq Mean Sq F value Pr(>F)
## Cluster 1 0.2059 0.2059 0.993 0.345
## Residuals 9 1.8664 0.2074
##
## $`Fruity ~ Cluster`
## Df Sum Sq Mean Sq F value Pr(>F)
## Cluster 1 3.794 3.794 39.48 0.000144 ***
## Residuals 9 0.865 0.096
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Cluster ~ Cluster`
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 10 2.182 0.2182
(-) Figure 8.28 - Mean values and variances of the assessments in the survey population (total) and the two clusters
kmeans_df <- data.frame(t(fit_kmeans_K2$centers)) %>% rownames_to_column(., var = "Criterion")
colnames(kmeans_df)[1:3] <- c("Criterion", "Cluster01", "Cluster02")
print(kmeans_df)
## Criterion Cluster01 Cluster02
## 1 Price 3.581467 5.086075
## 2 Refreshing 4.651867 3.950263
## 3 Delicious 3.817600 4.407587
## 4 Healthy 3.388233 3.863812
## 5 Bitter 4.523133 3.730950
## 6 Light 5.212967 4.212450
## 7 Crunchy 5.185200 3.824550
## 8 Exotic 5.114800 2.330575
## 9 Sweet 4.324067 4.016863
## 10 Fruity 5.084267 3.765625
kmeans_df2 <- mydatc8_case %>%
mutate(Cluster = fit_kmeans_K2$cluster)
# Mean values and variances by group
kmeans_df2 %>%
group_by(Cluster) %>%
summarise_at(., .vars = vars(2:11), .funs = c(mean, var))
## # A tibble: 2 x 21
## Cluster Price_fn1 Refreshing_fn1 Delicious_fn1 Healthy_fn1 Bitter_fn1
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.58 4.65 3.82 3.39 4.52
## 2 2 5.09 3.95 4.41 3.86 3.73
## # ... with 15 more variables: Light_fn1 <dbl>, Crunchy_fn1 <dbl>,
## # Exotic_fn1 <dbl>, Sweet_fn1 <dbl>, Fruity_fn1 <dbl>, Price_fn2 <dbl>,
## # Refreshing_fn2 <dbl>, Delicious_fn2 <dbl>, Healthy_fn2 <dbl>,
## # Bitter_fn2 <dbl>, Light_fn2 <dbl>, Crunchy_fn2 <dbl>, Exotic_fn2 <dbl>,
## # Sweet_fn2 <dbl>, Fruity_fn2 <dbl>
# Mean values and variances in total with cluster allocation
kmeans_df2 %>%
# group_by(Cluster) %>%
summarise_at(., .vars = vars(2:11), .funs = c(mean, var))
## Price_fn1 Refreshing_fn1 Delicious_fn1 Healthy_fn1 Bitter_fn1 Light_fn1
## 1 4.675727 4.141609 4.246682 3.734109 3.947 4.485318
## Crunchy_fn1 Exotic_fn1 Sweet_fn1 Fruity_fn1 Price_fn2 Refreshing_fn2
## 1 4.195636 3.089909 4.100645 4.125255 0.6362881 0.3888235
## Delicious_fn2 Healthy_fn2 Bitter_fn2 Light_fn2 Crunchy_fn2 Exotic_fn2
## 1 0.1884095 0.2243131 0.5556122 0.3649436 0.6610622 2.121967
## Sweet_fn2 Fruity_fn2
## 1 0.2072298 0.4658554
# Number of flavors per cluster and total
kmeans_df2 %>%
group_by(Cluster) %>%
tally()
## # A tibble: 2 x 2
## Cluster n
## <int> <int>
## 1 1 3
## 2 2 8
kmeans_df2 %>%
tally()
## n
## 1 11
(-) Description of the two cluster solution with t- and F-values
(-) Table 8.21 - t- and F-values of the two-cluster solution in the case study
# t-values
cluster_stats <- kmeans_df2 %>%
group_by(Cluster) %>%
summarise_at(., .vars = vars(2:11), .funs = c(mean, var))
### Cluster 1 (= Fruit)
C1_F_t_values <- cbind(
cluster = paste0("Cluster = Fruit, in the following fn1 = t-value, fn2 = F-value"),
# Price t-value Cluster 1 (= Fruit)
(cluster_stats[1,2]-mean(kmeans_df2$Price))/(sqrt(var(kmeans_df2$Price))),
# Price F-value Cluster 1 (= Fruit)
(cluster_stats[1,12])/(var(kmeans_df2$Price)),
# Refreshing t-value Cluster 1 (= Fruit)
(cluster_stats[1,3]-mean(kmeans_df2$Refreshing))/(sqrt(var(kmeans_df2$Refreshing))),
# Refreshing F-value Cluster 1 (= Fruit)
(cluster_stats[1,13])/(var(kmeans_df2$Refreshing)),
# Delicious t-value Cluster 1 (= Fruit)
(cluster_stats[1,4]-mean(kmeans_df2$Delicious))/(sqrt(var(kmeans_df2$Delicious))),
# Delicious F-value Cluster 1 (= Fruit)
(cluster_stats[1,14])/(var(kmeans_df2$Delicious)),
# Healthy t-value Cluster 1 (= Fruit)
(cluster_stats[1,5]-mean(kmeans_df2$Healthy))/(sqrt(var(kmeans_df2$Healthy))),
# Healthy F-value Cluster 1 (= Fruit)
(cluster_stats[1,15])/(var(kmeans_df2$Healthy)),
# Bitter t-value Cluster 1 (= Fruit)
(cluster_stats[1,6]-mean(kmeans_df2$Bitter))/(sqrt(var(kmeans_df2$Bitter))),
# Bitter F-value Cluster 1 (= Fruit)
(cluster_stats[1,16])/(var(kmeans_df2$Bitter)),
# Light t-value Cluster 1 (= Fruit)
(cluster_stats[1,7]-mean(kmeans_df2$Light))/(sqrt(var(kmeans_df2$Light))),
# Light F-value Cluster 1 (= Fruit)
(cluster_stats[1,17])/(var(kmeans_df2$Light)),
# Crunchy t-value Cluster 1 (= Fruit)
(cluster_stats[1,8]-mean(kmeans_df2$Crunchy))/(sqrt(var(kmeans_df2$Crunchy))),
# Crunchy F-value Cluster 1 (= Fruit)
(cluster_stats[1,18])/(var(kmeans_df2$Crunchy)),
# Exotic t-value Cluster 1 (= Fruit)
(cluster_stats[1,9]-mean(kmeans_df2$Exotic))/(sqrt(var(kmeans_df2$Exotic))),
# Exotic F-value Cluster 1 (= Fruit)
(cluster_stats[1,19])/(var(kmeans_df2$Exotic)),
# Sweet t-value Cluster 1 (= Fruit)
(cluster_stats[1,10]-mean(kmeans_df2$Sweet))/(sqrt(var(kmeans_df2$Sweet))),
# Sweet F-value Cluster 1 (= Fruit)
(cluster_stats[1,20])/(var(kmeans_df2$Sweet)),
# Fruity t-value Cluster 1 (= Fruit)
(cluster_stats[1,11]-mean(kmeans_df2$Fruity))/(sqrt(var(kmeans_df2$Fruity))),
# Fruity F-value Cluster 1 (= Fruit)
(cluster_stats[1,21])/(var(kmeans_df2$Fruity)))
### Cluster 2 (= Classic)
C2_F_t_values <- cbind(
cluster = paste0("Cluster = Classic, in the following fn1 = t-value, fn2 = F-value"),
# Price t-values Cluster 2 (= ClassiC)
(cluster_stats[2,2]-mean(kmeans_df2$Price))/(sqrt(var(kmeans_df2$Price))),
# Price F-values Cluster 2 (= ClassiC)
(cluster_stats[2,12])/(var(kmeans_df2$Price)),
# Refreshing t-values Cluster 2 (= ClassiC)
(cluster_stats[2,3]-mean(kmeans_df2$Refreshing))/(sqrt(var(kmeans_df2$Refreshing))),
# Refreshing F-values Cluster 2 (= ClassiC)
(cluster_stats[2,13])/(var(kmeans_df2$Refreshing)),
# Delicious t-values Cluster 2 (= ClassiC)
(cluster_stats[2,4]-mean(kmeans_df2$Delicious))/(sqrt(var(kmeans_df2$Delicious))),
# Delicious F-values Cluster 2 (= ClassiC)
(cluster_stats[2,14])/(var(kmeans_df2$Delicious)),
# Healthy t-values Cluster 2 (= ClassiC)
(cluster_stats[2,5]-mean(kmeans_df2$Healthy))/(sqrt(var(kmeans_df2$Healthy))),
# Healthy F-values Cluster 2 (= ClassiC)
(cluster_stats[2,15])/(var(kmeans_df2$Healthy)),
# Bitter t-values Cluster 2 (= ClassiC)
(cluster_stats[2,6]-mean(kmeans_df2$Bitter))/(sqrt(var(kmeans_df2$Bitter))),
# Bitter F-values Cluster 2 (= ClassiC)
(cluster_stats[2,16])/(var(kmeans_df2$Bitter)),
# Light t-values Cluster 2 (= ClassiC)
(cluster_stats[2,7]-mean(kmeans_df2$Light))/(sqrt(var(kmeans_df2$Light))),
# Light F-values Cluster 2 (= ClassiC)
(cluster_stats[2,17])/(var(kmeans_df2$Light)),
# Crunchy t-values Cluster 2 (= ClassiC)
(cluster_stats[2,8]-mean(kmeans_df2$Crunchy))/(sqrt(var(kmeans_df2$Crunchy))),
# Crunchy F-values Cluster 2 (= ClassiC)
(cluster_stats[2,18])/(var(kmeans_df2$Crunchy)),
# Exotic t-values Cluster 2 (= ClassiC)
(cluster_stats[2,9]-mean(kmeans_df2$Exotic))/(sqrt(var(kmeans_df2$Exotic))),
# Exotic F-values Cluster 2 (= ClassiC)
(cluster_stats[2,19])/(var(kmeans_df2$Exotic)),
# Sweet t-values Cluster 2 (= ClassiC)
(cluster_stats[2,10]-mean(kmeans_df2$Sweet))/(sqrt(var(kmeans_df2$Sweet))),
# Sweet F-values Cluster 2 (= ClassiC)
(cluster_stats[2,20])/(var(kmeans_df2$Sweet)),
# Fruity t-values Cluster 2 (= ClassiC)
(cluster_stats[2,11]-mean(kmeans_df2$Fruity))/(sqrt(var(kmeans_df2$Fruity))),
# Fruity F-values Cluster 2 (= ClassiC)
(cluster_stats[2,21])/(var(kmeans_df2$Fruity)))
# Results t-values and F-values by Cluster
print(t(C1_F_t_values)); print(t(C2_F_t_values))
## [,1]
## cluster "Cluster = Fruit, in the following fn1 = t-value, fn2 = F-value"
## Price_fn1 "-1.37181"
## Price_fn2 "0.05750611"
## Refreshing_fn1 "0.8183013"
## Refreshing_fn2 "2.635516"
## Delicious_fn1 "-0.9885273"
## Delicious_fn2 "0.01376937"
## Healthy_fn1 "-0.7302858"
## Healthy_fn2 "3.327026"
## Bitter_fn1 "0.7729246"
## Bitter_fn2 "0.5153873"
## Light_fn1 "1.204505"
## Light_fn2 "0.1064258"
## Crunchy_fn1 "1.21709"
## Crunchy_fn2 "0.1136007"
## Exotic_fn1 "1.390056"
## Exotic_fn2 "0.3118697"
## Sweet_fn1 "0.4907929"
## Sweet_fn2 "0.7161498"
## Fruity_fn1 "1.405072"
## Fruity_fn2 "0.3826243"
## [,1]
## cluster "Cluster = Classic, in the following fn1 = t-value, fn2 = F-value"
## Price_fn1 "0.5144286"
## Price_fn2 "0.3031869"
## Refreshing_fn1 "-0.306863"
## Refreshing_fn2 "0.2809712"
## Delicious_fn1 "0.3706978"
## Delicious_fn2 "0.8487954"
## Healthy_fn1 "0.2738572"
## Healthy_fn2 "0.1637163"
## Bitter_fn1 "-0.2898467"
## Bitter_fn2 "0.9292713"
## Light_fn1 "-0.4516895"
## Light_fn2 "0.5432088"
## Crunchy_fn1 "-0.4564086"
## Crunchy_fn2 "0.5232009"
## Exotic_fn1 "-0.5212711"
## Exotic_fn2 "0.2008149"
## Sweet_fn1 "-0.1840473"
## Sweet_fn2 "1.082011"
## Fruity_fn1 "-0.5269019"
## Fruity_fn2 "0.1558665"