Documentation>C API
kmeans.h File Reference

K-means (K-means clustering) More...

#include "generic.h"
#include "random.h"
#include "mathop.h"
#include "kdtree.h"

Data Structures

struct  VlKMeans
 K-means quantizer. More...
 

Enumerations

enum  VlKMeansAlgorithm { VlKMeansLloyd, VlKMeansElkan, VlKMeansANN }
 K-means algorithms. More...
 
enum  VlKMeansInitialization { VlKMeansRandomSelection, VlKMeansPlusPlus }
 K-means initialization algorithms. More...
 

Functions

Create and destroy
VlKMeansvl_kmeans_new (vl_type dataType, VlVectorComparisonType distance)
 Create a new KMeans object. More...
 
VlKMeansvl_kmeans_new_copy (VlKMeans const *kmeans)
 Create a new KMeans object by copy. More...
 
void vl_kmeans_delete (VlKMeans *self)
 Deletes a KMeans object. More...
 
Basic data processing
void vl_kmeans_reset (VlKMeans *self)
 Reset state. More...
 
double vl_kmeans_cluster (VlKMeans *self, void const *data, vl_size dimension, vl_size numData, vl_size numCenters)
 Cluster data. More...
 
void vl_kmeans_quantize (VlKMeans *self, vl_uint32 *assignments, void *distances, void const *data, vl_size numData)
 Quantize data. More...
 
void vl_kmeans_quantize_ANN (VlKMeans *self, vl_uint32 *assignments, void *distances, void const *data, vl_size numData, vl_size iteration)
 
Advanced data processing
void vl_kmeans_set_centers (VlKMeans *self, void const *centers, vl_size dimension, vl_size numCenters)
 Set centers. More...
 
void vl_kmeans_init_centers_with_rand_data (VlKMeans *self, void const *data, vl_size dimensions, vl_size numData, vl_size numCenters)
 init centers by randomly sampling data More...
 
void vl_kmeans_init_centers_plus_plus (VlKMeans *self, void const *data, vl_size dimensions, vl_size numData, vl_size numCenters)
 Seed centers by the KMeans++ algorithm. More...
 
double vl_kmeans_refine_centers (VlKMeans *self, void const *data, vl_size numData)
 Refine center locations. More...
 
Retrieve data and parameters
vl_type vl_kmeans_get_data_type (VlKMeans const *self)
 Get data type. More...
 
VlVectorComparisonType vl_kmeans_get_distance (VlKMeans const *self)
 Get data type. More...
 
VlKMeansAlgorithm vl_kmeans_get_algorithm (VlKMeans const *self)
 Get K-means algorithm. More...
 
VlKMeansInitialization vl_kmeans_get_initialization (VlKMeans const *self)
 Get K-means initialization algorithm. More...
 
vl_size vl_kmeans_get_num_repetitions (VlKMeans const *self)
 Get maximum number of repetitions. More...
 
vl_size vl_kmeans_get_dimension (VlKMeans const *self)
 Get data dimension. More...
 
vl_size vl_kmeans_get_num_centers (VlKMeans const *self)
 Get the number of centers (K) More...
 
int vl_kmeans_get_verbosity (VlKMeans const *self)
 Get verbosity level. More...
 
vl_size vl_kmeans_get_max_num_iterations (VlKMeans const *self)
 Get maximum number of iterations. More...
 
double vl_kmeans_get_min_energy_variation (VlKMeans const *self)
 Get the minimum relative energy variation for convergence. More...
 
vl_size vl_kmeans_get_max_num_comparisons (VlKMeans const *self)
 Get the maximum number of comparisons in the KD-forest ANN algorithm. More...
 
vl_size vl_kmeans_get_num_trees (VlKMeans const *self)
 
double vl_kmeans_get_energy (VlKMeans const *self)
 Get the number energy of the current fit. More...
 
void const * vl_kmeans_get_centers (VlKMeans const *self)
 Get centers. More...
 
Set parameters
void vl_kmeans_set_algorithm (VlKMeans *self, VlKMeansAlgorithm algorithm)
 Set K-means algorithm. More...
 
void vl_kmeans_set_initialization (VlKMeans *self, VlKMeansInitialization initialization)
 Set K-means initialization algorithm. More...
 
void vl_kmeans_set_num_repetitions (VlKMeans *self, vl_size numRepetitions)
 Set maximum number of repetitions. More...
 
void vl_kmeans_set_max_num_iterations (VlKMeans *self, vl_size maxNumIterations)
 Set maximum number of iterations. More...
 
void vl_kmeans_set_min_energy_variation (VlKMeans *self, double minEnergyVariation)
 Set the maximum relative energy variation for convergence. More...
 
void vl_kmeans_set_verbosity (VlKMeans *self, int verbosity)
 Set verbosity level. More...
 
void vl_kmeans_set_max_num_comparisons (VlKMeans *self, vl_size maxNumComparisons)
 Set maximum number of comparisons in ANN-KD-Tree. More...
 
void vl_kmeans_set_num_trees (VlKMeans *self, vl_size numTrees)
 Set the number of trees in the KD-forest ANN algorithm. More...
 

Detailed Description

Author
Andrea Vedaldi
David Novotny

Enumeration Type Documentation

◆ VlKMeansAlgorithm

Enumerator
VlKMeansLloyd 

Lloyd algorithm

VlKMeansElkan 

Elkan algorithm

VlKMeansANN 

Approximate nearest neighbors

◆ VlKMeansInitialization

Enumerator
VlKMeansRandomSelection 

Randomized selection

VlKMeansPlusPlus 

Plus plus raondomized selection

Function Documentation

◆ vl_kmeans_cluster()

double vl_kmeans_cluster ( VlKMeans self,
void const *  data,
vl_size  dimension,
vl_size  numData,
vl_size  numCenters 
)
Parameters
selfKMeans object.
datadata to quantize.
dimensiondata dimension.
numDatanumber of data points.
numCentersnumber of clusters.
Returns
K-means energy at the end of optimization.

The function initializes the centers by using the initialization algorithm set by vl_kmeans_set_initialization and refines them by the quantization algorithm set by vl_kmeans_set_algorithm. The process is repeated one or more times (see vl_kmeans_set_num_repetitions) and the resutl with smaller energy is retained.

◆ vl_kmeans_delete()

void vl_kmeans_delete ( VlKMeans self)
Parameters
selfKMeans object instance.

The function deletes the KMeans object instance created by vl_kmeans_new.

◆ vl_kmeans_get_algorithm()

VlKMeansAlgorithm vl_kmeans_get_algorithm ( VlKMeans const *  self)
inline
Parameters
selfKMeans object.
Returns
algorithm.

◆ vl_kmeans_get_centers()

void const * vl_kmeans_get_centers ( VlKMeans const *  self)
inline
Parameters
selfKMeans object instance.
Returns
cluster centers.

◆ vl_kmeans_get_data_type()

vl_type vl_kmeans_get_data_type ( VlKMeans const *  self)
inline
Parameters
selfKMeans object instance.
Returns
data type.

◆ vl_kmeans_get_dimension()

vl_size vl_kmeans_get_dimension ( VlKMeans const *  self)
inline
Parameters
selfKMeans object instance.
Returns
data dimension.

◆ vl_kmeans_get_distance()

VlVectorComparisonType vl_kmeans_get_distance ( VlKMeans const *  self)
inline
Parameters
selfKMeans object instance.
Returns
data type.

◆ vl_kmeans_get_energy()

double vl_kmeans_get_energy ( VlKMeans const *  self)
inline
Parameters
selfKMeans object instance.
Returns
energy.

◆ vl_kmeans_get_initialization()

VlKMeansInitialization vl_kmeans_get_initialization ( VlKMeans const *  self)
inline
Parameters
selfKMeans object.
Returns
algorithm.

◆ vl_kmeans_get_max_num_comparisons()

vl_size vl_kmeans_get_max_num_comparisons ( VlKMeans const *  self)
inline
Parameters
selfKMeans object instance.
Returns
maximum number of comparisons.

◆ vl_kmeans_get_max_num_iterations()

vl_size vl_kmeans_get_max_num_iterations ( VlKMeans const *  self)
inline
Parameters
selfKMeans object instance.
Returns
maximum number of iterations.

◆ vl_kmeans_get_min_energy_variation()

double vl_kmeans_get_min_energy_variation ( VlKMeans const *  self)
inline
Parameters
selfKMeans object instance.
Returns
minimum energy variation.

◆ vl_kmeans_get_num_centers()

vl_size vl_kmeans_get_num_centers ( VlKMeans const *  self)
inline
Parameters
selfKMeans object instance.
Returns
number of centers.

◆ vl_kmeans_get_num_repetitions()

vl_size vl_kmeans_get_num_repetitions ( VlKMeans const *  self)
inline
Parameters
selfKMeans object instance.
Returns
current number of repretitions for quantization.

◆ vl_kmeans_get_verbosity()

int vl_kmeans_get_verbosity ( VlKMeans const *  self)
inline
Parameters
selfKMeans object instance.
Returns
verbosity level.

◆ vl_kmeans_init_centers_plus_plus()

void vl_kmeans_init_centers_plus_plus ( VlKMeans self,
void const *  data,
vl_size  dimension,
vl_size  numData,
vl_size  numCenters 
)
Parameters
selfKMeans object.
datadata to sample from.
dimensiondata dimension.
numDatanmber of data points.
numCentersnumber of centers.

◆ vl_kmeans_init_centers_with_rand_data()

void vl_kmeans_init_centers_with_rand_data ( VlKMeans self,
void const *  data,
vl_size  dimension,
vl_size  numData,
vl_size  numCenters 
)
Parameters
selfKMeans object.
datadata to sample from.
dimensiondata dimension.
numDatanmber of data points.
numCentersnumber of centers.

The function inits the KMeans centers by randomly sampling the data data.

◆ vl_kmeans_new()

VlKMeans* vl_kmeans_new ( vl_type  dataType,
VlVectorComparisonType  distance 
)
Parameters
dataTypetype of data (VL_TYPE_FLOAT or VL_TYPE_DOUBLE)
distancedistance.
Returns
new KMeans object instance.

◆ vl_kmeans_new_copy()

VlKMeans* vl_kmeans_new_copy ( VlKMeans const *  kmeans)
Parameters
kmeansKMeans object to copy.
Returns
new copy.

◆ vl_kmeans_quantize()

void vl_kmeans_quantize ( VlKMeans self,
vl_uint32 assignments,
void *  distances,
void const *  data,
vl_size  numData 
)
Parameters
selfKMeans object.
assignmentsdata to closest center assignments (output).
distancesdata to closest center distance (output).
datadata to quantize.
numDatanumber of data points to quantize.

◆ vl_kmeans_refine_centers()

double vl_kmeans_refine_centers ( VlKMeans self,
void const *  data,
vl_size  numData 
)
Parameters
selfKMeans object.
datadata to quantize.
numDatanumber of data points.
Returns
K-means energy at the end of optimization.

The function calls the underlying K-means quantization algorithm (VlKMeansAlgorithm) to quantize the specified data data. The function assumes that the cluster centers have already been assigned by using one of the seeding functions, or by setting them.

◆ vl_kmeans_reset()

void vl_kmeans_reset ( VlKMeans self)

The function reset the state of the KMeans object. It deletes any stored centers, releasing the corresponding memory. This cancels the effect of seeding or setting the centers, but does not change the other configuration parameters.

◆ vl_kmeans_set_algorithm()

void vl_kmeans_set_algorithm ( VlKMeans self,
VlKMeansAlgorithm  algorithm 
)
inline
Parameters
selfKMeans object.
algorithmK-means algorithm.

◆ vl_kmeans_set_centers()

void vl_kmeans_set_centers ( VlKMeans self,
void const *  centers,
vl_size  dimension,
vl_size  numCenters 
)
Parameters
selfKMeans object.
centerscenters to copy.
dimensiondata dimension.
numCentersnumber of centers.

◆ vl_kmeans_set_initialization()

void vl_kmeans_set_initialization ( VlKMeans self,
VlKMeansInitialization  initialization 
)
inline
Parameters
selfKMeans object.
initializationinitialization.

◆ vl_kmeans_set_max_num_comparisons()

void vl_kmeans_set_max_num_comparisons ( VlKMeans self,
vl_size  maxNumComparisons 
)
inline
Parameters
selfKMeans filter.
maxNumComparisonsmaximum number of comparisons.

◆ vl_kmeans_set_max_num_iterations()

void vl_kmeans_set_max_num_iterations ( VlKMeans self,
vl_size  maxNumIterations 
)
inline
Parameters
selfKMeans filter.
maxNumIterationsmaximum number of iterations.

◆ vl_kmeans_set_min_energy_variation()

void vl_kmeans_set_min_energy_variation ( VlKMeans self,
double  minEnergyVariation 
)
inline
Parameters
selfKMeans object instance.
minEnergyVariationmaximum number of repetitions. The variation cannot be negative.

The relative energy variation is calculated after the \(t\)-th update to the parameters as:

\[ \epsilon_t = \frac{E_{t-1} - E_t}{E_0 - E_t} \]

Note that this quantitiy is non-negative since \(E_{t+1} \leq E_t\). Hence, \(\epsilon_t\) is the improvement to the energy made in the last iteration compared to the total improvement so far. The algorithm stops if this value is less or equal than minEnergyVariation.

This test is applied only to the LLoyd and ANN algorithms.

◆ vl_kmeans_set_num_repetitions()

void vl_kmeans_set_num_repetitions ( VlKMeans self,
vl_size  numRepetitions 
)
inline
Parameters
selfKMeans object instance.
numRepetitionsmaximum number of repetitions. The number of repetitions cannot be smaller than 1.

◆ vl_kmeans_set_num_trees()

void vl_kmeans_set_num_trees ( VlKMeans self,
vl_size  numTrees 
)
inline
Parameters
selfKMeans object instance.
numTreesnumber of trees to use.

◆ vl_kmeans_set_verbosity()

void vl_kmeans_set_verbosity ( VlKMeans self,
int  verbosity 
)
inline
Parameters
selfKMeans object instance.
verbosityverbosity level.