From: doris on
I am new to matlab,but my teacher asked me to do a project with matlab. the deadling is next wednesday.

I tried a week ,just did first the question in part 1.So I ask for help,please give me some ideas: how to do part1 in the project?



some functions for the project:

---------------------------------------------
% Shuffles the data you provide.
%
% [SHUFFLEDDATA, SHUFFLEDDATALABELS] = SHUFFLEROWS( DATA, DATALABELS )
%
% Arguments: 'data' should be a training data matrix of N examples (N rows),
% and M dimensions for each (M columns).
% 'datalabels' should be a Nx1 column vector of labels.
%
% Returns: Shuffled data.
%
% HINT: set the random number seed with:
%
% rand('state', seed)
%
% to allow reproduciblity of experimental results.
%
function [shuffleddata, shuffleddatalabels] = shufflerows( data, datalabels )


permutation = randperm( size(data,1) );

shuffleddata = data(permutation, :);
shuffleddatalabels = datalabels(permutation, :);

---------------------------------------------------------
%
% SHOWDIGIT( digdata )
%
% Arguments: 'digdata' should be a matrix with 256 elements,
% either as 16x16, or 1x256.
%
% Returns: Nothing.
%
% Displays the image of the supplied digit.
%
function showdigit( digdata )

figure
imagesc( reshape(digdata,16,16) );

set( gcf, 'Position', [500 500 256 256]);
colormap gray;
axis off;
axis square;
-----------------------------------------------
% Visualizes the supplied 2-d training matrix.
%
% SHOWDATA( traindata, truelabels, [predictions] )
%
% Arguments: 'traindata' should be a 2d matrix of N examples (N rows)
% and M dimensions (M columns).
%
% Returns: Nothing.
%
% MUST be the original pixels (i.e. 256 columns in the data matrix)
%
% Supply an optional argument 'predictions' to visualise your mistakes
% against the true labels.
%
function showdata( data, labels, guess )

%first sort the digits so they're in order
[labels, sortorder] = sort(labels);
data = data(sortorder,:);

%check to see whether this includes predictions
testing = true;
if exist('guess','var')
guess = guess(sortorder);
else
guess = labels;
testing = false;
end

%find out how many there are and restrict accordingly
numexamples = size(data,1);
if numexamples > 300
error('Too big!');
end

%only display proper digits
if size(data,2) ~= 256
error('Sorry - can only display digits as the original pixels.');
end



%decide how many digits to put in the square
for side=1:17
if side^2 >= (numexamples)
break;
end
end

%set up the border parameters
border = 3;
framewidth = 16+(2*border);

%and the main matrix to display
m = zeros(side*framewidth,side*framewidth);

n=1;
mistakes = 0;
for row = 1:framewidth:(side*framewidth)-1
for col = 1:framewidth:(side*framewidth)-1

%retrieve the digit pixels
digit = reshape(data(n,:), 16,16);

%put a black border around it
frame = zeros(framewidth);
frame(border:(border+15), border:(border+15)) = digit;
digit = frame;

%draw a further white border around the digit, if we've made an error
if labels(n)~=guess(n)
digit( border, border:(framewidth-border) ) = 255; %top of white 'mistake' box
digit( framewidth-border, border:(framewidth-border) ) = 255; %bottom
digit( border:(framewidth-border), border ) = 255; %left
digit( border:(framewidth-border), framewidth-border ) = 255; %right
mistakes = mistakes + 1;
end

%put it in the main matrix
m(row:(row+(framewidth-1)), col:(col+(framewidth-1))) = digit;

%increment which example we're dealing with
n=n+1;

%break if we reached the end
if n > size(data,1)
break;
end;
end

%break if we reached the end
if n > size(data,1)
break;
end;
end

%display it
imagesc( m );
colormap gray;
axis off;
axis square;

%put a title on it
if testing==true
rate = mistakes / numexamples;
rate = ceil(rate*10000)/100;
title([ num2str(mistakes) ' errors from ' num2str(numexamples) ' (' num2str(rate) '%)'], 'FontSize', 16);
end
-------------------------------------------------------
% K-nearest Neighbour classifier
%
% Y = KNEAREST( k, x, data, truelabels )
%
% Arguments:
% 'data' should be a N rows by M columns matrix of data, composed
% of N training examples, each with M dimensions.
%
% 'truelabels' should be a Nx1 column vector, with class labels.
%
% 'x' is the data vector, size 1xM, where the knn estimate is required.
%
% 'k' is the number of neighbours to take into account.
% Note that even values will result in ties broken randomly.
%
% Returns:
% 'y' - a predicted class label for your data vector 'x'
%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% YOU SHOULD NOT BE EDITING THIS CODE
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function y = knearest( k, x, data, datalabels )

%get data characteristics
numtrain = size(data,1);
numfeatures = size(data,2);

if size(x,2) ~= numfeatures
error('Test data dimensions does not match train data dimensions.');
end

if k > numtrain
error( ['Not enough training samples to use k = ' num2str(k) ' (you only supplied ' num2str(numtrain) ')'] );
end

%copy the test example 'numtrain' times
protos = repmat(x, numtrain, 1);

%measure the Euclidean distance from this test example to every training example
distances = [ sqrt(sum((data - protos).^2,2)) datalabels ];

%sort them according to distances (find nearest neighbours)
distances = sortrows(distances);

%calculate the most common class in the nearest 'k' neighbours
y = mode( distances(1:k,2)' );

-------------------------------------------------
%
% GETONEDIGIT( n, instance, data )
%
% Arguments: 'n' and 'instance' are integer, 'data' should
% be the 3-d matrix loaded up from the USPS .mat files.
%
% Returns: 2d matrix of instance 'instance' for digit 'n',
% pulling it out of the supplied data matrix.
%
function digdata = getonedigit( n, instance, data )

if ~exist('data', 'var')
error('Three arguments required. Format is: getdigitdata( n, instance, data )');
end

if n > size(data,3)
error('INCORRECT ARGUMENTS: First argument is the digit (1-9) you want (use 10 get get digit zero), second argument is which instance you want.');
end

if instance > size(data,2)
error( [ 'INCORRECT ARGUMENT: The instance argument must be in the range 1-' num2str(size(data,2)) '.' ] );
end

%reshape it into a square matrix
digdata = reshape( data( : ,instance, n ) , 16 , 16);

%cast to doubles
digdata = double(digdata);
--------------------------------------------

%
% EXTRACTFEATURES( digdata )
%
% Arguments: 'digdata' is a 2-d matrix, size 16x16.
%
% Process the supplied 2d matrix to generate a lower dimensional
% feature vector, to be used in a learning algorithm.
%
% The returned vector is the sum of pixel values in each of the 16 columns.
% Alternatives might be the sum of values in the 16 rows, or combinations
% of the two, or other statistics of the pixels, like standard deviation.
%
% Note: This MUST return a 1-d array
%
%
function x = extractfeatures( digdata )

%sum the values in along matrix dimension 1 (rows)
x = sum(digdata,1);
---------------------------------------
% CROSSFOLD allows you to partition your data into several train/test splits, also known as 'folds'.
%
% [traindata, trainlabels, testdata, testlabels] = CROSSFOLD( fold, numfolds, data, datalabels )
%
% Arguments:
% fold - integer, which fold you want, out of 'numfolds'.
% numfolds - integer, total number of folds you want to make.
% data - 2d matrix N examples (N rows) by M dimensions (M columns).
% datalabels - the data labels, as a Nx1 column vector.
%
% Example:
% I want to do a 5-fold cross validation, assuming I have variables 'data' and
% 'datalabels' already created, I type:
%
% [trdata trlabels tedata telabels] = crossfold( 1, 5, data, datalabels );
%
% to get the first train/test split.
%
% [trdata trlabels tedata telabels] = crossfold( 4, 5, data, datalabels );
%
% to get the fourth split.
%
% NB: This function generates training data as the SMALLER fold.
%
%
function [traindata, trainlabels, testdata, testlabels] = crossfold( fold, numfolds, data, datalabels )

%take note of how many features (columns) we have
numfeatures = size(data,2);

%join the data and the labels up to make this easier
data = [data datalabels];

%calculate how big each fold (data partition) will be
foldsize = round( size(data,1) / numfolds );

%calculate the matrix indices for the start/end of the partitions
startindex = (fold-1)*foldsize+1;
endindex = (fold-1)*foldsize+foldsize;

%boundary condition
if fold==numfolds
endindex = size(data,1);
end

%find the training data rows
trainindices = startindex:endindex;
%everything else is testing data
testindices = [ (1:startindex-1) (endindex+1):size(data,1) ];

%split it
traindata = data(trainindices,:);
testdata = data(testindices,:);

%split off the data from the data labels
trainlabels = traindata(:,numfeatures+1);
traindata(:,numfeatures+1) = [];
testlabels = testdata(:,numfeatures+1);
testdata(:,numfeatures+1) = [];

------------------------------------------------