import numpy as np
from kmeans import kmeans, findClosestCentroids, evaluate_error, init_plot
[docs]class KMeansStream:
"""Helper class for kmeans clustering.
This class provides train and predict functions for using kmeans with
`Stream_Learn`.
Parameters
----------
draw : boolean
Describes whether the data is to be plotted (data must have 2 or less
dimensions).
output : boolean
Describes whether debug info is to be printed. Info includes average
error, average number of iterations, current number of iterations, and
number of changed points over time.
k : int
Describes the number of clusters to train.
incremental : boolean, optional
Describes whether the kmeans algorithm is run incrementally or not (the
default is True). If incremental, then previous clusters are used to
initialize new clusters. Otherwise, clusters are reinitialized randomly
for each window.
figsize : tuple, optional
A tuple containing the width and height of the plot for the map (the
default is (15, 8)).
Attributes
----------
train : function
The train function with signature as required by `Stream_Learn`.
predict : function
The predict function with signature as required by 'Stream_Learn'.
avg_iterations : float
The average number of iterations per window of data trained.
avg_error : float
The average error per window of data trained.
"""
def __init__(self, draw, output, k, incremental=True, figsize=(15, 8)):
self.draw = draw
self.output = output
self.k = k
self.incremental = incremental
self.avg_iterations = 0
self.avg_error = 0
self._init_func()
self.centroids = None
if draw:
init_plot(figsize)
def _init_func(self):
def train_function(x, y, model, window_state):
if not model:
class Model:
centroids = None
k = self.k
sum_iterations = 0
sum_error = 0
i = 0
model = Model()
if model.centroids is not None and self.incremental:
[centroids, index, i] = kmeans(x, model.k, model.centroids,
draw=self.draw,
output=self.output)
else:
[centroids, index, i] = kmeans(x, model.k, draw=self.draw,
output=self.output)
model.centroids = centroids
self.centroids = centroids
error = evaluate_error(x, centroids, index)
if self.output:
print "Error: ", error
model.sum_iterations += i
model.sum_error += error
model.i += 1
return model
def predict_function(x, y, model):
self.avg_iterations = float(model.sum_iterations) / float(model.i)
self.avg_error = float(model.sum_error) / float(model.i)
if self.output:
print "Average number of iterations: ", self.avg_iterations
print "Average error: ", self.avg_error, "\n"
return findClosestCentroids(np.array(x).reshape(1, len(x)),
model.centroids)
self.train = train_function
self.predict = predict_function
[docs] def reset(self):
"""Resets the KMeans functions and average values.
Resets: train, predict, avg_iterations, avg_error
"""
self._init_func()
if self.draw:
init_plot()
self.avg_iterations = 0
self.avg_error = 0