0001 
0002 
0003 
0004 
0005 
0006 
0007 
0008 
0009 
0010 
0011 
0012 
0013 
0014 
0015 
0016 
0017 
0018 
0019 
0020 
0021 
0022 
0023 classdef soskstest < handle
0024     
0025     
0026     
0027     
0028     
0029     
0030     
0031 
0032     
0033     
0034     
0035     
0036     
0037     
0038     
0039     
0040     
0041     
0042     
0043     
0044     
0045     
0046     
0047     
0048     
0049     
0050     
0051     
0052     
0053     
0054     
0055     
0056 
0057     
0058     
0059     properties
0060         sosObj 
0061         name 
0062         s1 
0063         s1ColName 
0064         s1Col   
0065         type    
0066         runSpecificTest 
0067         desiredpvalCondition 
0068         desiredpvalConditionHandle  
0069         desiredpval 
0070         tail 
0071         label 
0072         pdSpread 
0073         nbin 
0074         lastp 
0075     end 
0076 
0077    
0078     properties (Constant)
0079         smallVal = 0.00000001 
0080     end
0081     
0082     
0083     methods
0084         
0085         
0086         function obj = soskstest(varargin)
0087             
0088             
0089             
0090             
0091             
0092             
0093             
0094             
0095             
0096             
0097             
0098             
0099 
0100             
0101             
0102             p = inputParser;
0103             p.addParamValue('sosObj', 'null',...
0104                 @(sosObj)strcmp(class(sosObj),'sos'));
0105             p.addParamValue('type','null', ...
0106                 @(type)soskstest.validTestType(type));
0107             p.addParamValue('sample1','null', ...
0108                             @(sample1)strcmp(class(sample1),'sample'));            
0109             p.addParamValue('name','noname',@(name)ischar(name));
0110             
0111             
0112             p.KeepUnmatched = true;
0113             p.parse(varargin{:});
0114 
0115             
0116             
0117             
0118             
0119 
0120            sosObj = p.Results.sosObj; 
0121            
0122             if(strcmp(p.Results.type,'matchUniform'))
0123                 obj.constructkstest(sosObj,varargin{:}) 
0124             else 
0125                error(['Specified <type>: ',p.Results.type, ...
0126                         ' is not supported']);  
0127             end
0128 
0129             obj.lastp = NaN;
0130             obj.sosObj = p.Results.sosObj;
0131             
0132             
0133             if any(strcmp(p.UsingDefaults,'name'))
0134                  
0135                  numTests = length(obj.sosObj.sosstattests);
0136                 
0137              obj.name = ['kstest_',num2str(numTests+1)];  
0138              else
0139                  obj.name = p.Results.name;  
0140              end
0141             
0142         end 
0143         
0144 
0145         
0146         function [userHypothesis, prob, label] = runTest(obj,varargin)
0147             
0148             
0149             
0150             
0151             
0152             
0153             
0154             
0155             
0156             
0157             
0158             [userHypothesis, prob, label] = obj.runSpecificTest(varargin);
0159             obj.lastp = prob;
0160         end
0161         
0162         
0163         
0164         function [userHypothesis, prob, label] = ...
0165                 runkstest(obj,varargin)
0166             
0167             
0168             
0169             
0170             
0171             
0172             
0173             
0174             
0175                      
0176             
0177             varargin = varargin{1};
0178                       
0179             p = inputParser;
0180             
0181             p.addParamValue('reportStyle','short', ...
0182                     @(reportStyle)any([strcmp(reportStyle,'short') ...
0183                                     strcmp(reportStyle,'full') ...
0184                                     strcmp(reportStyle,'none')]));
0185             p.parse(varargin{:});
0186 
0187             
0188             reportStyle = p.Results.reportStyle;
0189 
0190 
0191             
0192             
0193             
0194             
0195             scores = (obj.s1.zdata{obj.s1Col})';
0196  
0197             rawScores = (obj.s1.data{obj.s1Col})';
0198             
0199             
0200             
0201             
0202             
0203             if strcmp(obj.pdSpread,'sample')
0204                 minVal = min(scores);
0205                 maxVal = max(scores);    
0206                 minRaw = min(rawScores);
0207                 maxRaw = max(rawScores);
0208             elseif strcmp(obj.pdSpread,'allItems')
0209                 
0210                 minVal = min([obj.s1.population.zdata{obj.s1Col}]);
0211                 maxVal = max([obj.s1.population.zdata{obj.s1Col}]);
0212                 minRaw = min([obj.s1.population.data{obj.s1Col}]);
0213                 maxRaw = max([obj.s1.population.data{obj.s1Col}]);
0214                 
0215                 
0216                 
0217                 for i=1:length(obj.s1.population.samples)
0218                     minVal = min([minVal; ...
0219                         obj.s1.population.samples(i).zdata{obj.s1Col}]);
0220                     maxVal = max([maxVal; ...
0221                         obj.s1.population.samples(i).zdata{obj.s1Col}]);  
0222                     minRaw = min([minRaw; ...
0223                         obj.s1.population.samples(i).data{obj.s1Col}]);
0224                     maxRaw = max([maxRaw; ...
0225                         obj.s1.population.samples(i).data{obj.s1Col}]);                      
0226                 end
0227             else
0228                 error('Unsupported pdSpread in kstest.  pdSpread must be "sample" or "allItems"');
0229             end
0230                 
0231             
0232             minVal = minVal - obj.smallVal;
0233             maxVal = maxVal + obj.smallVal;
0234             
0235             
0236             
0237             spread  = maxVal - minVal;            
0238             binSize = spread/obj.nbin;
0239             
0240             
0241             
0242             bins = (minVal+0.5*binSize):binSize:(maxVal-0.5*binSize)+obj.smallVal;
0243               
0244             
0245             pd = hist(scores,bins)/length(scores);
0246                           
0247             pdScores = [];            
0248             for i=1:length(pd)
0249                for j=1:pd(i)*length(scores) 
0250                    pdScores = [pdScores; bins(i)]; 
0251                end
0252             end
0253            
0254             
0255             yScores = cdf('Uniform',bins,min(bins),max(bins));
0256             
0257             
0258            [h,prob,stats,cutoff] = kstest(pdScores, [bins; yScores]');     
0259 
0260     
0261     
0262             
0263             
0264             
0265             ent = pd*log((pd+1)')/length(pd);     
0266             ent = -1* (ent - 1/length(pd)*log(1/length(pd)+1)) / ...
0267                 (1*log(2)/length(pd) - 1/length(pd)*log(1/length(pd)+1));
0268       
0269              
0270             userHypothesis = obj.desiredpvalConditionHandle(...
0271                                   prob,obj.desiredpval);
0272              
0273              label = [obj.s1.name, '{',obj.s1ColName, '}-', ...
0274                     '{Uniform}'];
0275              
0276              if (isnan(userHypothesis))
0277                  printHyp = 'N/A';
0278              elseif userHypothesis == 1
0279                  printHyp = 'PASS';
0280              elseif userHypothesis == 0
0281                  printHyp = 'FAIL';
0282              end
0283                  
0284              
0285              if (strcmp(reportStyle,'short'))
0286                 verbosePrint([' UserHyp: ', printHyp, '; ', label, ': ', ...
0287                      'ks[',obj.type,'](',num2str(length(obj.s1.zdata{obj.s1Col})),') = ', ...
0288                      num2str(stats), ', p = ', num2str(prob), ...
0289                      ' p-des: ',num2str(obj.desiredpval)], ...
0290                      'soskstest_runMatchUniformkstest');
0291              elseif (strcmp(reportStyle,'full'))
0292                  verbosePrint([' UserHyp: ', printHyp , ...
0293                      '; ', label, ': ', ...
0294                      'ks[',obj.type,'](',num2str(length(obj.s1.zdata{obj.s1Col})),') = ', ...
0295                      num2str(stats), ', p = ', num2str(prob), ...
0296                      ' p-des: ',num2str(obj.desiredpval), ...
0297                      ' ent = ', num2str(ent),', targmin = ',num2str(minRaw), ...
0298                      ', targmax = ',num2str(maxRaw)], ...
0299                      'soskstest_runMatchUniformkstest');
0300              end                            
0301         end 
0302 
0303         
0304  
0305         
0306         
0307         function constructkstest(obj,sosObj,varargin)
0308             
0309             
0310             
0311             
0312             
0313             
0314             
0315             
0316             
0317             
0318             
0319             
0320             
0321             
0322             
0323             
0324             
0325             
0326             
0327             
0328             
0329             
0330             
0331             
0332             
0333 
0334             p = inputParser;
0335 
0336             p.addRequired('sosObj', ...
0337                 @(sosObj)strcmp(class(sosObj),'sos'));
0338             p.addParamValue('type','null', ...
0339                 @(type)soskstest.validTestType(type));
0340             p.addParamValue('sample1','null', ...
0341                         @(sample1)strcmp(class(sample1),'sample'));
0342             p.addParamValue('s1ColName',NaN, ...
0343                 @(s1ColName)ischar(s1ColName));
0344             p.addParamValue('desiredpvalCondition','N/A', ...
0345                 @(desiredpvalCondition)any([strcmp(desiredpvalCondition,'<='), ...
0346                             strcmp(desiredpvalCondition,'=>'), ...
0347                             strcmp(desiredpvalCondition,'N/A')]));
0348             p.addParamValue('desiredpval', 0.05, ...
0349                 @(desiredpval)validateattributes(desiredpval, ...
0350                     {'numeric'}, ...
0351                     {'scalar', 'positive', '>=', 0, '<=', 1}));
0352             p.addParamValue('tail','both', ...
0353                 @(tail)any([strcmp(tail,'both'), strcmp(tail,'left'),strcmp(tail,'right')]));
0354             p.addParamValue('pdSpread','null', ...
0355                 @(pdSpread)any([strcmp(pdSpread,'sample'), strcmp(pdSpread,'allItems')]));            
0356             p.addParamValue('name','noname',@(name)ischar(name)); 
0357             p.addParamValue('nbin',2,@(nbin)validateattributes(nbin, {'numeric'}, ...
0358                 {'scalar', 'integer', 'positive', '>', 0}));
0359             p.parse(sosObj,varargin{:});
0360 
0361            
0362             
0363 
0364             sample1 = p.Results.sample1;
0365             s1ColName = p.Results.s1ColName; 
0366             obj.desiredpvalCondition = p.Results.desiredpvalCondition;
0367             obj.pdSpread = p.Results.pdSpread;
0368             
0369             
0370             if any(strcmp(p.UsingDefaults,'nbin'))
0371                 obj.nbin = p.Results.sample1.n;
0372             else
0373                 if p.Results.nbin <= p.Results.sample1.n
0374                     obj.nbin = p.Results.nbin;
0375                 else
0376                     error('Number of bins must be <= number of observations in the sample');
0377                 end
0378             end
0379             
0380             if obj.nbin < 2
0381                 error ('There must be at least 2 bins in the entropy calculation');
0382             end
0383             
0384             
0385             if strcmp(obj.desiredpvalCondition,'N/A')
0386                 obj.desiredpvalConditionHandle = @sosksest.returnNaN;
0387             elseif strcmp(obj.desiredpvalCondition,'<=')
0388                 obj.desiredpvalConditionHandle = @le;
0389             elseif strcmp(obj.desiredpvalCondition,'=>')
0390                 obj.desiredpvalConditionHandle = @ge;
0391             end
0392 
0393             
0394             if strcmp(obj.desiredpvalCondition,'N/A')
0395                 obj.desiredpval = NaN;
0396             else
0397                 obj.desiredpval = p.Results.desiredpval;
0398             end
0399 
0400 
0401             
0402             
0403             present1 = sosObj.containsSample(sample1);
0404             if (present1 == 0 )
0405                 error('sos sosObject does not contain sample1');
0406             end
0407 
0408             col1 = sample1.colName2colNum(s1ColName); 
0409             if(col1 == -1)
0410                 error('<s1ColName> not a column of data in <sample1>');
0411             end
0412             
0413             if isempty(sample1.data)
0414                 error('sample 1 does not contain items - did you fill it yet?');
0415             end
0416  
0417 
0418             
0419             if sample1.n < 2
0420                 
0421                 
0422                 error('The ks requires at least 2 items in the sample');
0423             end
0424             
0425             
0426             obj.s1 = sample1;
0427             obj.s1ColName = s1ColName; 
0428             obj.s1Col = col1;
0429             obj.type = p.Results.type;
0430             obj.tail = p.Results.tail;
0431 
0432             obj.runSpecificTest = @obj.runkstest;
0433             
0434             obj.label = [obj.s1.name, '{',obj.s1ColName, '}-{Uniform}', ...
0435                      ...
0436                     ':ks[',obj.type,']'];            
0437           
0438                 
0439         end
0440  
0441  
0442     end
0443     
0444      
0445     methods (Static)
0446         
0447         
0448         function userHypothesis = returnNaN(~,~)
0449             
0450             userHypothesis = NaN;
0451         end
0452 
0453         
0454         function flag = validTestType(str)
0455             
0456             
0457             flag = 0; 
0458             
0459             if(ischar(str) == false)
0460                 error('<Type> must be "matchUniform"');
0461             end
0462             
0463             if (strcmp(str,'matchUniform'))
0464                 flag = 1;   
0465             else
0466                 error('<Type> must be "matchUniform"');
0467             end
0468             
0469         end
0470     end
0471     
0472 end
0473