Abstract
Conventional Gaussian kernel c-means clustering algorithms are widely used in applications. However, Gaussian kernel functions have an important parameter, the width hyper-parameter, which needs to be tuned. Usually this parameter is tuned once and for all and it is the same for all variables. Thus, implicitly, all the variables are equally rescaled and therefore, they have equal importance on the clustering task. This paper presents Gaussian kernel c-means hard clustering algorithms with automated computation of the width hyper-parameters. In these kernel-based clustering algorithms, the hyper-parameters change at each iteration of the algorithm, they differ from variable to variable and can differ from cluster to cluster. Because each variable is rescaled differently according to its own hyper-parameter, these algorithms can select the important variables in the clustering process. Experiments using synthetic data sets and using UCI machine learning repository data sets corroborate the usefulness of the proposed algorithms..
BibTeX
@article{DECARVALHO2018370,
title = "Gaussian kernel c-means hard clustering algorithms with automated computation of the width hyper-parameters",
journal = "Pattern Recognition",
volume = "79",
pages = "370 - 386",
year = "2018",
issn = "0031-3203",
doi = "https://doi.org/10.1016/j.patcog.2018.02.018",
url = "http://www.sciencedirect.com/science/article/pii/S0031320318300712",
author = "Francisco de A.T. [de Carvalho] and Eduardo C. Simões and Lucas V.C. Santana and Marcelo R.P. Ferreira",
keywords = "Gaussian kernel clustering, Kernelization of the metric, Feature space, Width hyper-parameter",
abstract = "Conventional Gaussian kernel c-means clustering algorithms are widely used in applications. However, Gaussian kernel functions have an important parameter, the width hyper-parameter, which needs to be tuned. Usually this parameter is tuned once and for all and it is the same for all variables. Thus, implicitly, all the variables are equally rescaled and therefore, they have equal importance on the clustering task. This paper presents Gaussian kernel c-means hard clustering algorithms with automated computation of the width hyper-parameters. In these kernel-based clustering algorithms, the hyper-parameters change at each iteration of the algorithm, they differ from variable to variable and can differ from cluster to cluster. Because each variable is rescaled differently according to its own hyper-parameter, these algorithms can select the important variables in the clustering process. Experiments using synthetic data sets and using UCI machine learning repository data sets corroborate the usefulness of the proposed algorithms."
}