Home > src > sample.m

sample

PURPOSE ^

- sample object

SYNOPSIS ^

This is a script file.

DESCRIPTION ^

 - sample object
 
 copyright 2009-2012 Blair Armstrong, Christine Watson, David Plaut

    This file is part of SOS

    SOS is free software: you can redistribute it and/or modify
    it for academic and non-commercial purposes
    under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.  For commercial or for-profit
    uses, please contact the authors (sos@cnbc.cmu.edu).

    SOS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 % - sample object
0002 %
0003 % copyright 2009-2012 Blair Armstrong, Christine Watson, David Plaut
0004 %
0005 %    This file is part of SOS
0006 %
0007 %    SOS is free software: you can redistribute it and/or modify
0008 %    it for academic and non-commercial purposes
0009 %    under the terms of the GNU General Public License as published by
0010 %    the Free Software Foundation, either version 3 of the License, or
0011 %    (at your option) any later version.  For commercial or for-profit
0012 %    uses, please contact the authors (sos@cnbc.cmu.edu).
0013 %
0014 %    SOS is distributed in the hope that it will be useful,
0015 %    but WITHOUT ANY WARRANTY; without even the implied warranty of
0016 %    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0017 %    GNU General Public License for more details.
0018 
0019 %    You should have received a copy of the GNU General Public License
0020 %    along with SOS (see COPYING.txt).
0021 %    If not, see <http://www.gnu.org/licenses/>.
0022 
0023 
0024 
0025 classdef sample < dataFrame
0026     %% creates and supports sample objects
0027     %
0028     % Additional functionality is inherited from parent class <dataFrame>
0029     %
0030     %PROPERTIES
0031     %   n - target number of items for the sample (NOT necessarily current number of items)
0032     %   population - population to derive the sample from
0033     %   locks - int array of length <sampleObj>.n indicating if an item can be swapped (0) or not (1)
0034     %   name - string name associated with the object
0035     %
0036     % ** ALSO manages the global property 'sampleCount', which tracks the
0037     % number of samples that have been created.
0038     %
0039     %METHODS
0040     %   sample(n,varargin) - Constructor - Creates a sample object
0041     %   lockAll() - locks all the items in the sample
0042     %   unlockAll() - locks all the items in the sample
0043     %   setPop(population) - links a sample with a population
0044     %   item = popItem(itemIndex) - pops the item from sample.data at itemIndex and returns it
0045     %    appendItem(oitem) - append the item at {item index}
0046     %   swapItems(,sIndex,df,dfIndex,sosObj) - swaps a sample item with a population item
0047     %
0048     %METHODS (STATIC)
0049     %   p = sampleInputParser() - generates an input parser for the constructor
0050     %
0051     %METHODS (Access = private)
0052     %   createLocks() - creates locks for sample items
0053     %
0054     %METHODS (Static, Access = private)
0055     %   p = parseConstructorArgs(n,varargin) - parses arguments from sample constructor
0056     %
0057  
0058     %% PROPERTIES
0059     properties
0060         n % target number of items for the sample (NOT necessarily current number of items)
0061         population % population to derive the sample from
0062         locks % int array of length <sampleObj>.n indicating if an item can be swapped (0) or not (1)
0063         name
0064     end % properties
0065     
0066     
0067     methods
0068         
0069         %% sample CONSTRUCTOR
0070         function obj = sample(n,varargin)
0071         % Constructor - Creates a sample object
0072         %
0073         % CALL:
0074         % sample(n, ['filename',<string>, 'isHeader',<logical>, 'isFormatting',<logical>, 'outFile',<string>])
0075         %
0076         % SYNOPSIS:
0077         % Constructor - Creates a sample object and returns it
0078         %
0079         % PARAMETERS:
0080         %  REQUIRED:
0081         %   n - target number of observations for the sample
0082         %
0083         % OPTIONAL:
0084         %   fileName - src file for the sample, which must
0085         %       follow the SOS dataFrame format specifications.
0086         %   isHeader/logical - param/logical-value pair indicating if
0087         %       the source file has a header.  Defaults to false.
0088         %   isFormatting/logical -  param/logical-value indicating if
0089         %       the source file has formating.  Defaults to false.
0090         %   outFile - param/string-value pair indicating the name
0091         %       (inc. path, if other than pwd is desired) of
0092         %       of file to save the sample in after
0093         %       optimization has been completed.  Outfile is not
0094         %       validated until write.  Defaults to 'null'
0095         %   name/string - param/string-value pair indicating string name to
0096         %       associate with the variable
0097         %
0098         % EXAMPLE:
0099         %   s1 = sample(5); %creates a sample object targeted to have 5 observations
0100         %   s2 = sample(10, 'fileName', 's1.txt'); % creates a sample
0101         %                % object with target of 10 obs, reading in initial
0102         %                % observations from s1.txt
0103             
0104             verbosePrint([char(10) 'Creating and Configuring Sample Object'], ...
0105                 'sample_constructor_startObjCreation');
0106             
0107             
0108             p = sample.parseConstructorArgs(n,varargin);
0109             
0110             global sampleCount;
0111             if(isempty(sampleCount))
0112                 curCount = 1;
0113             else
0114                 curCount = sampleCount + 1;
0115             end
0116             
0117             %override the default outFile with the sample number if a user
0118             %specified value was not specified.
0119             if any(strcmp(p.UsingDefaults,'outFile'))
0120                 outFile = ['sample',num2str(curCount),'.out.txt'];
0121             else
0122                 outFile = p.Results.outFile;
0123             end
0124             
0125             %check that the outFile is associated with a valid file name.  If
0126             %it's not, print a message to that effect
0127             if ischar(outFile) == false || strcmp(outFile,'null')
0128                 error('Outfile has not been set to a string != null.');
0129             end
0130             
0131             if (exist(outFile,'file'))
0132                verbosePrint([char(10) 'WARNING: File {', outFile, '} already exists.  ', ...
0133                    'If you attempt to write to this file, ',...
0134                    'the existing one will be overridden' char(10)], ...
0135                     'sample_constructor_fileExists');  
0136             end
0137                
0138             %similary, use a default name based on the sample count if
0139             %necessary
0140              if any(strcmp(p.UsingDefaults,'name'))
0141                  name = ['sample',num2str(curCount)];  %#ok<PROP>
0142              else
0143                  name = p.Results.name;  %#ok<PROP>
0144              end
0145             
0146             obj = obj@dataFrame('fileName',p.Results.fileName, ...
0147                 'isHeader',p.Results.isHeader,'isFormatting', ...
0148                 p.Results.isFormatting,'outFile',outFile);
0149                         
0150             obj.n = p.Results.n;
0151             
0152             obj.name = name; %#ok<PROP>
0153             
0154             %check that the user has not pre-loaded more data than the size
0155             %of the sample.
0156             if(isempty(obj.data) == 0)
0157                 userN = length(obj.data{1});
0158                 
0159                 if(userN > obj.n)
0160                     error('Sample Constructor: Pre-loaded sample size cannot be larger than target sample size {n}');
0161                 end
0162             end
0163                 
0164             obj.createLocks();
0165                    
0166             verbosePrint(['Creation of Sample Object Complete' char(10)], ...
0167                 'sample_constructor_endObjCreation');
0168             
0169             % creation of object successful, increment the counter
0170             sampleCount = curCount;
0171             
0172         end % constructor
0173         
0174         
0175         %% lockAll() METHOD
0176         function lockAll(obj)
0177            % locks all the items in the sample
0178            %
0179            %EXAMPLE:
0180            %    s1.lockAll();
0181            
0182            if isempty(obj.data) == false
0183                for i=1:length(obj.data{1}); obj.locks(i) = 1; end;
0184            else
0185                error('ERROR: no data in sample to lock');
0186            end
0187            
0188            verbosePrint(['All items in sample ', obj.name,' are now locked '], ...
0189                     'sample_lockAll_end');             
0190         end
0191         
0192         %% unlockAll() METHOD
0193         function unlockAll(obj)
0194            % locks all the items in the sample
0195            %
0196            %EXAMPLE:
0197            %    s1.unlockAll();
0198            
0199            for i=1:length(obj.locks); obj.locks(i) = 0; end;
0200            
0201            verbosePrint(['All items in sample ', obj.name,' are now unlocked '], ...
0202                     'sample_unlockAll_end');             
0203                 
0204         end
0205             
0206         %% setPop(population) METHOD
0207         function obj = setPop(obj,population)
0208             % links a sample with a population
0209             %
0210             %PARAMETERS:
0211             %   population - a population object
0212             %
0213             %EXAMPLE:
0214             %   s1.setPop(p1) % where s1 is a sample, p1 is a population
0215             
0216             if(strcmp(class(population),'population') ~= 1)
0217                 error('{population} must be a population object');
0218             end
0219             
0220             %ensure that the population is compatible with the sample as it
0221             %is currently configured.
0222            
0223             % if the sample has data, must make sure both the sample and
0224             % the population have the same columns; otherwise, sample can
0225             % just copy the population header info
0226             if(isempty(obj.data) == 0)
0227                [obj, population] = dataFrame.mergeHeaders(obj,population);
0228             else
0229                obj.header = population.header;
0230                obj.format = population.format;               
0231             end
0232                 
0233            obj.population = population;
0234            
0235            %link the population with the sample as well
0236            population.addSample(obj);
0237            
0238            verbosePrint(['Sample ', obj.name,'''s population set to: ',obj.population.name], ...
0239                     'sample_setPop_end');  
0240            
0241         end % sample costructor
0242         
0243         
0244         
0245         %% popItem(itemIndex) METHOD
0246         function item = popItem(obj,itemIndex)  
0247             %pops the item from sample.data at itemIndex and returns it
0248             %
0249             %WARNING!!!:
0250             % Currently, this function pops an item and then shifts up all
0251             % of the subsequent data to fill the empty entries in the data.
0252             % To avoid altering the object locks, this method currently
0253             % will throw an error if the item at itemIndex, or a subsequent
0254             % item, is locked.  This is a known issue, but should not
0255             % interact with the standard swapping procedure or initial
0256             % filling of the items.  If there is demand for this function
0257             % its behavior may be corrected in the future.
0258             %
0259             %CALL:
0260             % item =  <sampleObj>.popItem(itemIndex)
0261             %
0262             %SYNOPSIS:
0263             % pops and returns the item at {itemIndex}
0264             %
0265             %PARAMETERS:
0266             %   itemIndex - the row index of the to-be-popped item
0267             
0268             p = inputParser;
0269             
0270             p.addRequired('obj');
0271             p.addRequired('itemIndex',@(itemIndex)validateattributes(itemIndex, {'numeric'}, ...
0272                 {'scalar', 'integer', 'positive','>' 0}));
0273             p.parse(obj,itemIndex);
0274             
0275             if(isempty(obj.data) == true)
0276                 error('No data in sample object - cannot pop item');
0277             elseif itemIndex > length(obj.data{1})
0278                 error('{itemIndex} exceeds row range of data array');
0279             elseif (any(obj.locks(itemIndex:end)) == 1)
0280                 error(['ERROR: Attempted to pop an item, when that item ', ...
0281                     'or a subsequent one in obj.data has its obj.lock',...
0282                     'active.  See the sample.popItem docmuentation for more info.']);
0283             end
0284         
0285             item = popItem@dataFrame(obj,itemIndex);
0286         end
0287         
0288         
0289         %% appendItem(item) METHOD
0290         function obj = appendItem(obj,item)
0291             % append the item at {item index}
0292             %
0293             % Warning!!!  Method does not confirm that the column structure
0294             % of {item} (i.e., what columns of data are in what order)
0295             % match that of the sample's data.  This should be the case if
0296             % SOS is manipulating the items since it will only insert items
0297             % into the sample that belong to the corresponding population,
0298             % which should be in sync.  However, manual invocation of this
0299             % method does not have this guarantee.  Use carefully!
0300             %
0301             %CALL:
0302             % item =  <sampleObj>.popItem(itemIndex)
0303             %
0304             %SYNOPSIS:
0305             % pops and returns the item at {itemIndex}
0306             %
0307             %PARAMETERS:
0308             %   itemIndex - the row index of the to-be-popped item
0309             
0310             
0311             %check to make sure haven't exceeded sample size.
0312             if(isempty(obj.data) == false)
0313                 if (length(obj.data{1}) +1 > obj.n)
0314                     error('Sample already full.  Cannot append item.');
0315                 end
0316             end
0317 
0318             appendItem@dataFrame(obj,item);
0319         end
0320             
0321         %% swapItems(sIndex,pIndex) METHOD
0322         
0323         %% THIS CLASS NEEDS TO BE CORRECTED
0324         
0325         
0326         function obj = swapItems(obj,sIndex,df,dfIndex,sosObj)
0327             % swaps a sample item with an item from another datafame that shares the (or is the) population associated with that sample
0328             %
0329             %CALL:
0330             %   swapItems(sIndex,df,dfIndex,sosObj)
0331             %
0332             %SYNOPSIS:
0333             % swaps a sample item with an item from another data frame
0334             % This dataframe must either be the, or share the, population with
0335             % this sample.  This swap takes place both in the
0336             % place both in the item's (raw) data and (normalized) zdata.
0337             % Both objects must be associated with the same sosObj for the
0338             % swap to occur.  This is to ensure that the normalized data
0339             % that is being swapped is normalized relative to the other
0340             % data in this SOS object, and not relative to some other SOS
0341             % object of which this sample/population are also a part.
0342             %
0343             %PARAMETERS:
0344             %   sIndex - index of sample item
0345             %   df - another dataframe
0346             %   dfIndex - index of population item
0347             %   sosObj - sosObj that sample and population are linked to (and usually the object making the function call).
0348             %
0349  
0350             %Validate the inputs
0351             p = inputParser;
0352             
0353             p.addRequired('obj');
0354             p.addRequired('sIndex',@(itemIndex)validateattributes(itemIndex, {'numeric'}, ...
0355                 {'scalar', 'integer', 'positive','>' 0}));
0356             p.addRequired('df',@(df)any(strcmp(superclasses(df),'dataFrame')));
0357             p.addRequired('dfIndex',@(dfIndex)validateattributes(dfIndex, {'numeric'}, ...
0358                 {'scalar', 'integer', 'positive','>' 0}));
0359             p.addRequired('sosObj',@(sosObj)strcmp(class(sosObj),'sos'));
0360             
0361             
0362             
0363             p.parse(obj,sIndex,df,dfIndex,sosObj);    
0364             
0365             %check that the population and sample exist and contain
0366             %normalized data for the right SOSobj
0367             
0368             %first make sure both the sample and the dataframe have data
0369             if(isempty(obj.data) == true)
0370                 error('Sample data is empty');
0371             end
0372             
0373             
0374             if(isempty(df.data) == true)
0375                 error('Population data is empty');
0376             end
0377             
0378             %check that sample and dataframe have been associated with the
0379             %current SOSobject, which should indicate whether any
0380             %normalized data was normalized for this particular SOS object.
0381             
0382             if(obj.sosObj ~= sosObj)
0383                 error('Sample is not associated with SOS object executing match');
0384             end
0385             
0386             if(df.sosObj ~= sosObj)
0387                 error('Dataframe is not associated with SOS object executing match');
0388             end
0389             
0390             
0391             %check that the sample and dataframe both share the same
0392             %population
0393             if(obj.population ~= df)
0394                 if(strcmp(class(df),'sample'))
0395                     if(obj.population ~= df.population)
0396                         error('Sample and Dataframe do not share a population');
0397                     end
0398                 else
0399                     error('If df is not the population for the sample, it should be a sample itself');
0400                 end
0401                     
0402             end
0403             
0404               
0405             %check that the sample and the population contain normalized
0406             %data.
0407             if(isempty(obj.zdata) || isempty(obj.population.zdata))
0408                 error('df or sample do not contain normalized data.Data must be normalized prior to swaps');
0409             end
0410             
0411             %validate the indices for the swap:
0412             if(dfIndex > length(df.data{1}))
0413                 error('df index exceeds number of rows in population');
0414             end
0415             
0416             if(sIndex > length(obj.data{1}))
0417                 error('sample index exceeds number of rows in sample');
0418             end
0419             
0420             if(obj.locks(sIndex) == 1)
0421                 error('item at specified sample index is locked');
0422             end
0423             
0424             if(strcmp(class(df),'sample'))
0425                 if (df.locks(dfIndex) == 1)
0426                     error('item at specified df index is locked');
0427                 end
0428             end           
0429             
0430             
0431             
0432             % copy the sample; again, assumes population and sample header
0433             % syncing works as designed
0434             sampleItemData = cell(1,length(obj.header));
0435             sampleItemzData = cell(1,length(obj.header));
0436             for i=1:length(obj.header)
0437                 sampleItemData{i} = obj.data{i}(sIndex);
0438                 sampleItemzData{i} = obj.zdata{i}(sIndex);
0439             end
0440       
0441             %move the df item into the sample, then move the copied
0442             % sample item into the df
0443             for i=1:length(obj.header)
0444                 
0445                 %try to speed up processing of strings by using a more
0446                 %primitive operation to copy numbers;  Some more fancy
0447                 %string wrapping will be necessary in the case of strings.
0448                 
0449                 if strcmp(obj.format{i},'%f')
0450                     obj.data{i}(sIndex) = df.data{i}(dfIndex);
0451                     obj.zdata{i}(sIndex) = df.zdata{i}(dfIndex);  
0452                     
0453                    df.data{i}(dfIndex) = sampleItemData{i};
0454                    df.zdata{i}(dfIndex) = sampleItemzData{i};
0455                    
0456                 else
0457                     
0458                     obj.data{i}(sIndex) = df.data{i}(dfIndex);
0459                     obj.zdata{i}(sIndex) = df.zdata{i}(dfIndex);
0460 
0461                     %trying to optimize performance by changing these to sub
0462                     %cell references
0463                     % used to be:
0464                     %
0465 
0466                    %df.data{i}(dfIndex) = sampleItemData{i};
0467                    %df.zdata{i}(dfIndex) = sampleItemzData{i};
0468 
0469                    df.data{i}(dfIndex) = sampleItemData{i};
0470                    df.zdata{i}(dfIndex) = sampleItemzData{i};
0471                 end
0472                
0473             end          
0474         end % swapItems()
0475         
0476     end
0477     
0478     methods (Access = private)
0479         
0480         %% createLocks() PRIVATE METHOD
0481         function createLocks(obj)
0482             % creates locks for sample items, using 'isLocked' data from a user-specified file, if applicable
0483             
0484             lockIndex=-1;
0485             for i=1:length(obj.header)              
0486                if(strcmp(obj.header{i},'isLocked'))
0487                    lockIndex=i;
0488                end
0489             end
0490             
0491             % if locks were specified, move them to the locks array and
0492             % remove them from the data file
0493             if(lockIndex >= 0)
0494                 obj.header(lockIndex) = [];
0495                 obj.format(lockIndex) = [];
0496             
0497                 if(isempty(obj.data) == 0)
0498                     obj.locks=[obj.data{lockIndex} ... 
0499                         zeros(1,(obj.n-length(obj.data{1})))];
0500                     obj.data(lockIndex) = [];
0501                 else
0502                     obj.locks=zeros(1,obj.n);
0503                 end
0504             else
0505                 obj.locks=zeros(1,obj.n);
0506             end
0507             
0508         end % createLocks
0509         
0510         
0511     end
0512     
0513     methods (Static)
0514         
0515         %% p = sampleInputParser() STATIC METHOD
0516         function p = sampleInputParser()
0517             %generates an input parser with parameter/value pairs and validators for the constructor
0518             
0519             p = inputParser;
0520             
0521             %Define required and optional arguments, and specify how they
0522             %are to be validated
0523             
0524             p.addRequired('n',@(n)validateattributes(n, {'numeric'}, ...
0525                 {'scalar', 'integer', 'positive', '>', 0}));
0526             p.addParamValue('fileName','null', ...
0527                 @(fileName)validFileNameOrNull(fileName));
0528             p.addParamValue('isHeader',false, ...
0529                 @(isHeader)validLogical(isHeader));
0530             p.addParamValue('isFormatting',false, ...
0531                 @(isFormatting)validLogical(isFormatting));
0532             p.addParamValue('name','noname', ...
0533                 @(name)ischar(name));
0534             %Note that outfile is not validated at this point; this will be
0535             %done when it comes time to write to the file.  Motivation is
0536             %that I don't want to write to create a file (and directory
0537             %structure) unless the user ultimately wants to write
0538             %something.
0539             p.addParamValue('outFile','null', ...
0540                 @(outFile)validStringOrNull(outFile));     
0541         end % sampleInputParser
0542     end
0543     
0544     
0545     methods (Static, Access =private)
0546         
0547         %% p = parseConstructorArgs(n,varargin) STATIC PRIVATE METHOD
0548         function p = parseConstructorArgs(n,varargin)
0549             % parses arguments from sample constructor
0550             %
0551             %CALL:
0552             % p = sample.parseConstructorArgs(n,varargin)
0553             %
0554             %SYOPSIS:
0555             %parses the arguments from the sample constructor.  Default
0556             %values are substituted where appropriate.  Returns a struct
0557             %with the parsed args.
0558             %
0559             %PARAMETERS:
0560             % SAME as population CONSTRUCTOR
0561             %
0562             %RETURNS:
0563             % the parsed constructor arguments in struct format
0564     
0565             %this cell gets recursively wrapped when it is passed, so
0566             %unwrap one layer.
0567             
0568             varargin = varargin{1};
0569             
0570             p = sample.sampleInputParser();   
0571               
0572             p.parse(n,varargin{:});
0573             
0574         end % parseConstructorArgs
0575                        
0576     end
0577 end
0578

Generated on Fri 27-Jan-2012 16:18:41 by m2html © 2005