0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 classdef soskstest < handle
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059 properties
0060 sosObj
0061 name
0062 s1
0063 s1ColName
0064 s1Col
0065 type
0066 runSpecificTest
0067 desiredpvalCondition
0068 desiredpvalConditionHandle
0069 desiredpval
0070 tail
0071 label
0072 pdSpread
0073 nbin
0074 lastp
0075 end
0076
0077
0078 properties (Constant)
0079 smallVal = 0.00000001
0080 end
0081
0082
0083 methods
0084
0085
0086 function obj = soskstest(varargin)
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102 p = inputParser;
0103 p.addParamValue('sosObj', 'null',...
0104 @(sosObj)strcmp(class(sosObj),'sos'));
0105 p.addParamValue('type','null', ...
0106 @(type)soskstest.validTestType(type));
0107 p.addParamValue('sample1','null', ...
0108 @(sample1)strcmp(class(sample1),'sample'));
0109 p.addParamValue('name','noname',@(name)ischar(name));
0110
0111
0112 p.KeepUnmatched = true;
0113 p.parse(varargin{:});
0114
0115
0116
0117
0118
0119
0120 sosObj = p.Results.sosObj;
0121
0122 if(strcmp(p.Results.type,'matchUniform'))
0123 obj.constructkstest(sosObj,varargin{:})
0124 else
0125 error(['Specified <type>: ',p.Results.type, ...
0126 ' is not supported']);
0127 end
0128
0129 obj.lastp = NaN;
0130 obj.sosObj = p.Results.sosObj;
0131
0132
0133 if any(strcmp(p.UsingDefaults,'name'))
0134
0135 numTests = length(obj.sosObj.sosstattests);
0136
0137 obj.name = ['kstest_',num2str(numTests+1)];
0138 else
0139 obj.name = p.Results.name;
0140 end
0141
0142 end
0143
0144
0145
0146 function [userHypothesis, prob, label] = runTest(obj,varargin)
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158 [userHypothesis, prob, label] = obj.runSpecificTest(varargin);
0159 obj.lastp = prob;
0160 end
0161
0162
0163
0164 function [userHypothesis, prob, label] = ...
0165 runkstest(obj,varargin)
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175
0176
0177 varargin = varargin{1};
0178
0179 p = inputParser;
0180
0181 p.addParamValue('reportStyle','short', ...
0182 @(reportStyle)any([strcmp(reportStyle,'short') ...
0183 strcmp(reportStyle,'full') ...
0184 strcmp(reportStyle,'none')]));
0185 p.parse(varargin{:});
0186
0187
0188 reportStyle = p.Results.reportStyle;
0189
0190
0191
0192
0193
0194
0195 scores = (obj.s1.zdata{obj.s1Col})';
0196
0197 rawScores = (obj.s1.data{obj.s1Col})';
0198
0199
0200
0201
0202
0203 if strcmp(obj.pdSpread,'sample')
0204 minVal = min(scores);
0205 maxVal = max(scores);
0206 minRaw = min(rawScores);
0207 maxRaw = max(rawScores);
0208 elseif strcmp(obj.pdSpread,'allItems')
0209
0210 minVal = min([obj.s1.population.zdata{obj.s1Col}]);
0211 maxVal = max([obj.s1.population.zdata{obj.s1Col}]);
0212 minRaw = min([obj.s1.population.data{obj.s1Col}]);
0213 maxRaw = max([obj.s1.population.data{obj.s1Col}]);
0214
0215
0216
0217 for i=1:length(obj.s1.population.samples)
0218 minVal = min([minVal; ...
0219 obj.s1.population.samples(i).zdata{obj.s1Col}]);
0220 maxVal = max([maxVal; ...
0221 obj.s1.population.samples(i).zdata{obj.s1Col}]);
0222 minRaw = min([minRaw; ...
0223 obj.s1.population.samples(i).data{obj.s1Col}]);
0224 maxRaw = max([maxRaw; ...
0225 obj.s1.population.samples(i).data{obj.s1Col}]);
0226 end
0227 else
0228 error('Unsupported pdSpread in kstest. pdSpread must be "sample" or "allItems"');
0229 end
0230
0231
0232 minVal = minVal - obj.smallVal;
0233 maxVal = maxVal + obj.smallVal;
0234
0235
0236
0237 spread = maxVal - minVal;
0238 binSize = spread/obj.nbin;
0239
0240
0241
0242 bins = (minVal+0.5*binSize):binSize:(maxVal-0.5*binSize)+obj.smallVal;
0243
0244
0245 pd = hist(scores,bins)/length(scores);
0246
0247 pdScores = [];
0248 for i=1:length(pd)
0249 for j=1:pd(i)*length(scores)
0250 pdScores = [pdScores; bins(i)];
0251 end
0252 end
0253
0254
0255 yScores = cdf('Uniform',bins,min(bins),max(bins));
0256
0257
0258 [h,prob,stats,cutoff] = kstest(pdScores, [bins; yScores]');
0259
0260
0261
0262
0263
0264
0265 ent = pd*log((pd+1)')/length(pd);
0266 ent = -1* (ent - 1/length(pd)*log(1/length(pd)+1)) / ...
0267 (1*log(2)/length(pd) - 1/length(pd)*log(1/length(pd)+1));
0268
0269
0270 userHypothesis = obj.desiredpvalConditionHandle(...
0271 prob,obj.desiredpval);
0272
0273 label = [obj.s1.name, '{',obj.s1ColName, '}-', ...
0274 '{Uniform}'];
0275
0276 if (isnan(userHypothesis))
0277 printHyp = 'N/A';
0278 elseif userHypothesis == 1
0279 printHyp = 'PASS';
0280 elseif userHypothesis == 0
0281 printHyp = 'FAIL';
0282 end
0283
0284
0285 if (strcmp(reportStyle,'short'))
0286 verbosePrint([' UserHyp: ', printHyp, '; ', label, ': ', ...
0287 'ks[',obj.type,'](',num2str(length(obj.s1.zdata{obj.s1Col})),') = ', ...
0288 num2str(stats), ', p = ', num2str(prob), ...
0289 ' p-des: ',num2str(obj.desiredpval)], ...
0290 'soskstest_runMatchUniformkstest');
0291 elseif (strcmp(reportStyle,'full'))
0292 verbosePrint([' UserHyp: ', printHyp , ...
0293 '; ', label, ': ', ...
0294 'ks[',obj.type,'](',num2str(length(obj.s1.zdata{obj.s1Col})),') = ', ...
0295 num2str(stats), ', p = ', num2str(prob), ...
0296 ' p-des: ',num2str(obj.desiredpval), ...
0297 ' ent = ', num2str(ent),', targmin = ',num2str(minRaw), ...
0298 ', targmax = ',num2str(maxRaw)], ...
0299 'soskstest_runMatchUniformkstest');
0300 end
0301 end
0302
0303
0304
0305
0306
0307 function constructkstest(obj,sosObj,varargin)
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334 p = inputParser;
0335
0336 p.addRequired('sosObj', ...
0337 @(sosObj)strcmp(class(sosObj),'sos'));
0338 p.addParamValue('type','null', ...
0339 @(type)soskstest.validTestType(type));
0340 p.addParamValue('sample1','null', ...
0341 @(sample1)strcmp(class(sample1),'sample'));
0342 p.addParamValue('s1ColName',NaN, ...
0343 @(s1ColName)ischar(s1ColName));
0344 p.addParamValue('desiredpvalCondition','N/A', ...
0345 @(desiredpvalCondition)any([strcmp(desiredpvalCondition,'<='), ...
0346 strcmp(desiredpvalCondition,'=>'), ...
0347 strcmp(desiredpvalCondition,'N/A')]));
0348 p.addParamValue('desiredpval', 0.05, ...
0349 @(desiredpval)validateattributes(desiredpval, ...
0350 {'numeric'}, ...
0351 {'scalar', 'positive', '>=', 0, '<=', 1}));
0352 p.addParamValue('tail','both', ...
0353 @(tail)any([strcmp(tail,'both'), strcmp(tail,'left'),strcmp(tail,'right')]));
0354 p.addParamValue('pdSpread','null', ...
0355 @(pdSpread)any([strcmp(pdSpread,'sample'), strcmp(pdSpread,'allItems')]));
0356 p.addParamValue('name','noname',@(name)ischar(name));
0357 p.addParamValue('nbin',2,@(nbin)validateattributes(nbin, {'numeric'}, ...
0358 {'scalar', 'integer', 'positive', '>', 0}));
0359 p.parse(sosObj,varargin{:});
0360
0361
0362
0363
0364 sample1 = p.Results.sample1;
0365 s1ColName = p.Results.s1ColName;
0366 obj.desiredpvalCondition = p.Results.desiredpvalCondition;
0367 obj.pdSpread = p.Results.pdSpread;
0368
0369
0370 if any(strcmp(p.UsingDefaults,'nbin'))
0371 obj.nbin = p.Results.sample1.n;
0372 else
0373 if p.Results.nbin <= p.Results.sample1.n
0374 obj.nbin = p.Results.nbin;
0375 else
0376 error('Number of bins must be <= number of observations in the sample');
0377 end
0378 end
0379
0380 if obj.nbin < 2
0381 error ('There must be at least 2 bins in the entropy calculation');
0382 end
0383
0384
0385 if strcmp(obj.desiredpvalCondition,'N/A')
0386 obj.desiredpvalConditionHandle = @sosksest.returnNaN;
0387 elseif strcmp(obj.desiredpvalCondition,'<=')
0388 obj.desiredpvalConditionHandle = @le;
0389 elseif strcmp(obj.desiredpvalCondition,'=>')
0390 obj.desiredpvalConditionHandle = @ge;
0391 end
0392
0393
0394 if strcmp(obj.desiredpvalCondition,'N/A')
0395 obj.desiredpval = NaN;
0396 else
0397 obj.desiredpval = p.Results.desiredpval;
0398 end
0399
0400
0401
0402
0403 present1 = sosObj.containsSample(sample1);
0404 if (present1 == 0 )
0405 error('sos sosObject does not contain sample1');
0406 end
0407
0408 col1 = sample1.colName2colNum(s1ColName);
0409 if(col1 == -1)
0410 error('<s1ColName> not a column of data in <sample1>');
0411 end
0412
0413 if isempty(sample1.data)
0414 error('sample 1 does not contain items - did you fill it yet?');
0415 end
0416
0417
0418
0419 if sample1.n < 2
0420
0421
0422 error('The ks requires at least 2 items in the sample');
0423 end
0424
0425
0426 obj.s1 = sample1;
0427 obj.s1ColName = s1ColName;
0428 obj.s1Col = col1;
0429 obj.type = p.Results.type;
0430 obj.tail = p.Results.tail;
0431
0432 obj.runSpecificTest = @obj.runkstest;
0433
0434 obj.label = [obj.s1.name, '{',obj.s1ColName, '}-{Uniform}', ...
0435 ...
0436 ':ks[',obj.type,']'];
0437
0438
0439 end
0440
0441
0442 end
0443
0444
0445 methods (Static)
0446
0447
0448 function userHypothesis = returnNaN(~,~)
0449
0450 userHypothesis = NaN;
0451 end
0452
0453
0454 function flag = validTestType(str)
0455
0456
0457 flag = 0;
0458
0459 if(ischar(str) == false)
0460 error('<Type> must be "matchUniform"');
0461 end
0462
0463 if (strcmp(str,'matchUniform'))
0464 flag = 1;
0465 else
0466 error('<Type> must be "matchUniform"');
0467 end
0468
0469 end
0470 end
0471
0472 end
0473