Skip to content

Commit

Permalink
Merge pull request #1 from blazetopher/initial_functions
Browse files Browse the repository at this point in the history
Adds initial set of functions
  • Loading branch information
lukecampbell committed Mar 8, 2013
2 parents 7277975 + 8458845 commit 1cc3b9c
Show file tree
Hide file tree
Showing 14 changed files with 1,352 additions and 0 deletions.
4 changes: 4 additions & 0 deletions ion_functions/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
from version import version as __version__

from utils import isnumeric, isreal, isvector, isscalar, isempty
from qc import *
from data import *
1 change: 1 addition & 0 deletions ion_functions/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from data_functions import *
8 changes: 8 additions & 0 deletions ion_functions/data/test/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env python

"""
@package
@file __init__.py
@author Christopher Mueller
@brief
"""
12 changes: 12 additions & 0 deletions ion_functions/data/test/data_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env python

"""
@package ion_functions.data.data_functions
@file ion_functions/data/data_functions.py
@author Christopher Mueller
@brief Module containing data-calculation functions. Primarily used for calculating values in Parameter Functions
"""


def data_density(*args):
pass
1 change: 1 addition & 0 deletions ion_functions/qc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from qc_functions import *
43 changes: 43 additions & 0 deletions ion_functions/qc/matlab_scripts/dataqc_globalrangetest.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
% DATAQC_GLOBALRANGETEST Data quality control algorithm testing
% if measurements fall into a user-defined valid range.
% Returns 1 for presumably good data and 0 for data presumed bad.
%
% Time-stamp: <2010-07-28 15:16:00 mlankhorst>
%
% USAGE: out=dataqc_globalrangetest(dat,validrange);
%
% out: Boolean, 0 if value is outside range, else 1.
% dat: Input dataset, any scalar, vector, or matrix.
% Must be numeric and real.
% validrange: Two-element vector with the minimum and
% maximum values considered to be valid
%
% EXAMPLE:
%
% >> x=[17 16 17 18 25 19];
% >> qc=dataqc_globalrangetest(x,[10 20])
%
% qc =
%
% 1 1 1 1 0 1
%
%
function out=dataqc_globalrangetest(dat,datlim);

if ~isnumeric(dat)
error('DAT must be numeric.')
end
if ~all(isreal(dat(:)))
error('DAT must be real.')
end
if ~isnumeric(datlim)
error('VALIDRANGE must be numeric.')
end
if ~all(isreal(datlim(:)))
error('VALIDRANGE must be real.')
end
if length(datlim)~=2
error('VALIDRANGE must be two-element vector.')
end
datlim=[min(datlim(:)) max(datlim(:))];
out=(dat>=datlim(1))&(dat<=datlim(2))
86 changes: 86 additions & 0 deletions ion_functions/qc/matlab_scripts/dataqc_polytrendtest.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
% DATAQC_POLYTRENDTEST Data quality control algorithm testing
% if measurements contain a significant portion of a polynomial.
% Returns 1 if this is not the case, else 0.
%
% Time-stamp: <2010-10-29 13:56:46 mlankhorst>
%
% RATIONALE: The purpose of this test is to check if a significant
% fraction of the variability in a time series can be explained
% by a drift, possibly interpreted as a sensor drift. This drift
% is assumed to be a polynomial of order ORD. Use ORD=1 to
% consider a linear drift
%
% METHODOLOGY: The time series DAT is passed to MatLab's POLYFIT
% routine to obtain a polynomial fit PP to DAT, and the
% difference DAT-PP is compared to the original DAT. If the
% standard deviation of (DAT-PP) is less than that of DAT by a
% factor of NSTD, the time series is assumed to contain a
% significant trend (output will be 0), else not (output will be
% 1).
%
% USAGE: OUT=dataqc_polytrendtest(DAT,ORD,NSTD);
%
% OUT: Boolean scalar, 0 if trend is detected, 1 if not.
%
% DAT: Input dataset, a numeric real vector.
% ORD (optional, defaults to 1): Polynomial order.
% NSTD (optional, defaults to 3): Factor by how much the
% standard deviation must be reduced before OUT
% switches from 1 to 0
%
function out=dataqc_polytrendtest(varargin);
error(nargchk(1,3,nargin,'struct'))
dat=varargin{1};
if ~isnumeric(dat)
error('DAT must be numeric.')
end
if ~isvector(dat)
error('DAT must be vector.')
end
if ~isreal(dat)
error('DAT must be real.')
end
ord=1;
nstd=3;
if nargin==2
if ~isempty(varargin{2})
ord=varargin{2};
end
end
if nargin==3
if ~isempty(varargin{2})
ord=varargin{2};
end
if ~isempty(varargin{3})
nstd=varargin{3};
end
end
if ~isnumeric(ord)
error('ORD must be numeric.')
end
if ~isscalar(ord)
error('ORD must be scalar.')
end
if ~isreal(ord)
error('ORD must be real.')
end
if ~isnumeric(nstd)
error('NSTD must be numeric.')
end
if ~isscalar(nstd)
error('NSTD must be scalar.')
end
if ~isreal(nstd)
error('NSTD must be real.')
end
ord=round(abs(ord));
nstd=abs(nstd);
ll=length(dat);
x=[1:ll];
pp=polyfit(x,dat,ord);
datpp=polyval(pp,x);
if (nstd*std(dat-datpp))<std(dat)
out=0;
else
out=1;
end
142 changes: 142 additions & 0 deletions ion_functions/qc/matlab_scripts/dataqc_spiketest.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
% DATAQC_SPIKETEST Data quality control algorithm testing a time
% series for spikes. Returns 1 for presumably
% good data and 0 for data presumed bad.
%
% Time-stamp: <2010-07-28 14:25:42 mlankhorst>
%
% METHODOLOGY: The time series is divided into windows of length L
% (an odd integer number). Then, window by window, each value is
% compared to its (L-1) neighboring values: a range R of these
% (L-1) values is computed (max. minus min.), and replaced with
% the measurement accuracy ACC if ACC>R. A value is presumed to
% be good, i.e. no spike, if it deviates from the mean of the
% (L-1) peers by less than a multiple of the range, N*max(R,ACC).
%
% Further than (L-1)/2 values from the start or end points, the
% peer values are symmetrically before and after the test
% value. Within that range of the start and end, the peers are
% the first/last L values (without the test value itself).
%
% The purpose of ACC is to restrict spike detection to deviations
% exceeding a minimum threshold value (N*ACC) even if the data
% have little variability. Use ACC=0 to disable this behavior.
%
%
% USAGE: out=dataqc_spiketest(dat,acc,N,L);
% OR: out=dataqc_spiketest(dat,acc);
%
% out: Boolean. 0 for detected spike, else 1.
% dat: Input dataset, a real numeric vector.
% acc: Accuracy of any input measurement.
% N (optional, defaults to 5): Range multiplier, cf. above
% L (optional, defaults to 5): Window length, cf. above
%
% EXAMPLE:
%
% >> x=[-4 3 40 -1 1 -6 -6 1];
% >> dataqc_spiketest(x,.1)
%
% ans =
%
% 1 1 0 1 1 1 1 1
%
function out=dataqc_spiketest(varargin);

error(nargchk(2,4,nargin,'struct'))
dat=varargin{1};
acc=varargin{2};
N=5;
L=5;
switch nargin
case 3,
if ~isempty(varargin{3})
N=varargin{3};
end
case 4,
if ~isempty(varargin{3})
N=varargin{3};
end
if ~isempty(varargin{4})
L=varargin{4};
end
end
if ~isnumeric(dat)
error('DAT must be numeric.')
end
if ~isvector(dat)
error('DAT must be a vector.')
end
if ~isreal(dat)
error('DAT must be real.')
end
if ~isnumeric(acc)
error('ACC must be numeric.')
end
if ~isscalar(acc)
error('ACC must be scalar.')
end
if ~isreal(acc)
error('ACC must be real.')
end
if ~isnumeric(N)
error('N must be numeric.')
end
if ~isscalar(N)
error('N must be scalar.')
end
if ~isreal(N)
error('N must be real.')
end
if ~isnumeric(L)
error('L must be numeric.')
end
if ~isscalar(L)
error('L must be scalar.')
end
if ~isreal(L)
error('L must be real.')
end
L=ceil(abs(L));
if (L/2)==round(L/2)
L=L+1;
warning('L was even; setting L:=L+1')
end
if L<3
L=5;
warning('L was too small; setting L:=5')
end
ll=length(dat);

L2=(L-1)/2;
i1=1+L2;
i2=ll-L2;

if ll>=L

for ii=i1:i2
tmpdat=dat(ii+[-L2:-1 1:L2]);
R=max(tmpdat)-min(tmpdat);
R=max([R acc]);
if (N*R)>abs(dat(ii)-mean(tmpdat))
out(ii)=1;
end
end
for ii=1:L2
tmpdat=dat([1:ii-1 ii+1:L]);
R=max(tmpdat)-min(tmpdat);
R=max([R acc]);
if (N*R)>abs(dat(ii)-mean(tmpdat))
out(ii)=1;
end
end
for ii=ll-L2+1:ll
tmpdat=dat([ll-L+1:ii-1 ii+1:ll]);
R=max(tmpdat)-min(tmpdat);
R=max([R acc]);
if (N*R)>abs(dat(ii)-mean(tmpdat))
out(ii)=1;
end
end
else
warning('L was greater than length of DAT, returning zeros.')
end
83 changes: 83 additions & 0 deletions ion_functions/qc/matlab_scripts/dataqc_stuckvaluetest.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
% DATAQC_STUCKVALUETEST Data quality control algorithm testing a
% time series for "stuck values", i.e. repeated occurences of
% one value. Returns 1 for presumably good data and 0 for data
% presumed bad.
%
% Time-stamp: <2011-10-31 11:20:23 mlankhorst>
%
% USAGE: OUT=dataqc_stuckvaluetest(X,RESO,NUM);
%
% OUT: Boolean output: 0 where stuck values are found,
% 1 elsewhere.
% X: Input time series (vector, numeric).
% RESO: Resolution; repeat values less than RESO apart will
% be considered "stuck values".
% NUM: Minimum number of successive values within RESO of
% each other that will trigger the "stuck value". NUM
% is optional and defaults to 10 if omitted or empty.
%
% EXAMPLE:
%
% >> x=[4.83 1.40 3.33 3.33 3.33 3.33 4.09 2.97 2.85 3.67];
%
% >> dataqc_stuckvaluetest(x,.001,4)
%
% ans =
%
% 1 1 0 0 0 0 1 1 1 1
%
function out=dataqc_stuckvaluetest(varargin);

error(nargchk(2,3,nargin,'struct'))
x=varargin{1};
reso=varargin{2};
num=10;
switch nargin
case 3,
if ~isempty(varargin{3})
num=varargin{3};
end
end
if ~isnumeric(x)
error('X must be numeric.')
end
if ~isvector(x)
error('X must be a vector.')
end
if ~isnumeric(reso)
error('RESO must be numeric.')
end
if ~isscalar(reso)
error('RESO must be a scalar.')
end
if ~isreal(reso)
error('RESO must be real.')
end
reso=abs(reso);
if ~isnumeric(num)
error('NUM must be numeric.')
end
if ~isscalar(num)
error('NUM must be a scalar.')
end
if ~isreal(num)
error('NUM must be real.')
end
num=abs(num);
ll=length(x);
out=zeros(size(x));
out=logical(out);
if ll<num
warning('NUM is greater than length(X). Returning zeros.')
else
out=ones(size(x));
iimax=ll-num+1;
for ii=1:iimax
ind=[ii:ii+num-1];
tmp=abs(x(ii)-x(ind));
if all(tmp<reso)
out(ind)=0;
end
end
end
out=logical(out);
Loading

0 comments on commit 1cc3b9c

Please sign in to comment.