diff --git a/qctests/CSIRO_gradient.py b/qctests/CSIRO_constant_bottom.py similarity index 50% rename from qctests/CSIRO_gradient.py rename to qctests/CSIRO_constant_bottom.py index a9606c8..64539eb 100644 --- a/qctests/CSIRO_gradient.py +++ b/qctests/CSIRO_constant_bottom.py @@ -1,6 +1,6 @@ """ -Implements the gradient test of DOI: 10.1175/JTECHO539.1 -All questionable features result in a flag, in order to minimize false negatives +Implements CSIRO's constant-bottom check +All questionable features result in a flag, in order to minimize false negatives """ import numpy @@ -14,15 +14,13 @@ def test(p): # Get temperature values from the profile. t = p.t() - # Get depth values (m) from the profile. + # depths d = p.z() # is this an xbt? isXBT = p.probe_type() == 2 - - assert len(t.data) == len(d.data), 'Number of temperature measurements does not equal number of depth measurements.' + latitude = p.latitude() # initialize qc as a bunch of falses; - # implies all measurements pass when a gradient can't be calculated, such as at edges & gaps in data: qc = numpy.zeros(len(t.data), dtype=bool) # check for gaps in data @@ -30,13 +28,9 @@ def test(p): isDepth = (d.mask==False) isData = isTemperature & isDepth - for i in range(0,len(t.data)-1): - if isData[i] & isData[i+1]: - - gradient = (d.data[i+1] - d.data[i]) / (t.data[i+1] - t.data[i]) - - # gradient flag - if gradient > -0.4 and gradient < 12.5: - qc[i] = True + # constant temperature at bottom of profile, for latitude > -40 and bottom two depths at least 30m apart: + if isData[-1] and isData[-2] and isXBT: + if t.data[-1] == t.data[-2] and latitude > -40 and d.data[-1] - d.data[-2] > 30: + qc[-1] = True - return qc + return qc \ No newline at end of file diff --git a/qctests/CSIRO_long_gradient.py b/qctests/CSIRO_long_gradient.py new file mode 100644 index 0000000..ad3308d --- /dev/null +++ b/qctests/CSIRO_long_gradient.py @@ -0,0 +1,52 @@ +""" +Implements CSIRO's long gradient check +All questionable features result in a flag, in order to minimize false negatives +""" + +import numpy + +def test(p): + """ + Runs the quality control check on profile p and returns a numpy array + of quality control decisions with False where the data value has + passed the check and True where it failed. + """ + + # depths + d = p.z() + # temperatures + t = p.t() + + # initialize qc as a bunch of falses; + qc = numpy.zeros(len(t.data), dtype=bool) + + # check for gaps in data + isDepth = (d.mask==False) + isTemperature = (t.mask==False) + isData = isTemperature & isDepth + + on_inv = False # are we currently in an inversion? + + for i in range(0, p.n_levels()-1 ): + if isData[i] and isData[i+1]: + # not interested below 5m: + if d.data[i] < 5: continue + + if t.data[i+1] > t.data[i] and not on_inv: + # entering an inversion + start_inv_temp = t.data[i] + start_inv_depth = d.data[i] + potential_flag = i + on_inv = True + + if t.data[i+1] < t.data[i] and on_inv: + # exiting the inversion + end_inv_temp = t.data[i] + end_inv_depth = d.data[i] + on_inv = False + gradlong = (end_inv_depth - start_inv_depth) / (end_inv_temp - start_inv_depth) + + if abs(gradlong) < 4: + qc[potential_flag] = True + + return qc \ No newline at end of file diff --git a/qctests/CSIRO_short_gradient.py b/qctests/CSIRO_short_gradient.py new file mode 100644 index 0000000..959756a --- /dev/null +++ b/qctests/CSIRO_short_gradient.py @@ -0,0 +1,37 @@ +""" +Implements CSIRO's short gradient check +All questionable features result in a flag, in order to minimize false negatives +""" + +import numpy + +def test(p): + """ + Runs the quality control check on profile p and returns a numpy array + of quality control decisions with False where the data value has + passed the check and True where it failed. + """ + + # depths + d = p.z() + # temperatures + t = p.t() + + # initialize qc as a bunch of falses; + qc = numpy.zeros(len(t.data), dtype=bool) + + # check for gaps in data + isDepth = (d.mask==False) + isTemperature = (t.mask==False) + isData = isTemperature & isDepth + + for i in range(0, p.n_levels()-1 ): + if isData[i] and isData[i+1]: + deltaD = (d.data[i+1] - d.data[i]) + deltaT = (t.data[i+1] - t.data[i]) + gradshort = deltaD / deltaT + if (deltaT > 0.5 and deltaD < 30) or abs(gradshort) < 0.4 or (gradshort > 0 and gradshort < 12.5): + if abs(deltaT) > 0.4: + qc[i] = True + + return qc \ No newline at end of file diff --git a/qctests/CSIRO_surface_spikes.py b/qctests/CSIRO_surface_spikes.py new file mode 100644 index 0000000..f88c601 --- /dev/null +++ b/qctests/CSIRO_surface_spikes.py @@ -0,0 +1,38 @@ +""" +Implements CSIRO's surface spike check +All questionable features result in a flag, in order to minimize false negatives +""" + +import numpy + +def test(p): + """ + Runs the quality control check on profile p and returns a numpy array + of quality control decisions with False where the data value has + passed the check and True where it failed. + """ + + # depths + d = p.z() + # is this an xbt? + isXBT = p.probe_type() == 2 + + # initialize qc as a bunch of falses; + qc = numpy.zeros(len(d.data), dtype=bool) + + # check for gaps in data + isDepth = (d.mask==False) + + if not isXBT: + return qc + + # flag any level that is shallower than 4m and is followed by a level shallower than 8m. + for i in range(p.n_levels()): + if isDepth[i]: + if d.data[i] < 4 and i < p.n_levels()-1: #only interested in depths less than 4m and not at the bottom of the profile. + if d.data[i+1] < 8: + qc[i] = True + else: + break + + return qc \ No newline at end of file diff --git a/qctests/CSIRO_wire_break.py b/qctests/CSIRO_wire_break.py index 0191d15..68fc2e7 100644 --- a/qctests/CSIRO_wire_break.py +++ b/qctests/CSIRO_wire_break.py @@ -24,9 +24,8 @@ def test(p): isTemperature = (t.mask==False) # wire breaks at bottom of profile: - i = len(t.data)-1 - if isTemperature[i] and isXBT: - if t.data[i] < -2.8 or t.data[i] > 36: - qc[i] = True + if isTemperature[-1] and isXBT: + if t.data[-1] < -2.8 or t.data[-1] > 36: + qc[-1] = True return qc \ No newline at end of file diff --git a/tests/CSIRO_constant_bottom_validation.py b/tests/CSIRO_constant_bottom_validation.py new file mode 100644 index 0000000..e142ec5 --- /dev/null +++ b/tests/CSIRO_constant_bottom_validation.py @@ -0,0 +1,43 @@ +import qctests.CSIRO_constant_bottom +import util.testingProfile +import numpy + +##### CSIRO_constant_bottom --------------------------------------------------- + +def test_CSIRO_constant_bottom(): + ''' + Spot-check the nominal behavior of the CSIRO constant bottom test. + ''' + + # nominal + p = util.testingProfile.fakeProfile([0,0,0], [0,100,200], latitude=0, longitude=0, probe_type=2) + qc = qctests.CSIRO_constant_bottom.test(p) + truth = numpy.zeros(3, dtype=bool) + truth[2] = True + + assert numpy.array_equal(qc, truth), 'failed to flag a constant temperature at bottom of profile' + + # inappropriate probe type + p = util.testingProfile.fakeProfile([0,0,0], [0,100,200], latitude=0, longitude=0, probe_type=1) + qc = qctests.CSIRO_constant_bottom.test(p) + truth = numpy.zeros(3, dtype=bool) + + assert numpy.array_equal(qc, truth), 'flagged a constant temperature for an inappropriate probe type' + + # inappropriate latitude + p = util.testingProfile.fakeProfile([0,0,0], [0,100,200], latitude=-41, longitude=0, probe_type=2) + qc = qctests.CSIRO_constant_bottom.test(p) + + assert numpy.array_equal(qc, truth), 'flagged a constant temperature for an inappropriate latitude' + + # inappropriate depth difference + p = util.testingProfile.fakeProfile([0,0,0], [0,100,100], latitude=0, longitude=0, probe_type=2) + qc = qctests.CSIRO_constant_bottom.test(p) + + assert numpy.array_equal(qc, truth), 'flagged a constant temperature for an inappropriate depth distance' + + # not at bottom of profile + p = util.testingProfile.fakeProfile([0,0, -1], [100,200,300], latitude=0, longitude=0, probe_type=2) + qc = qctests.CSIRO_constant_bottom.test(p) + + assert numpy.array_equal(qc, truth), 'flagged a constant temperature not at the bottom of the profile' \ No newline at end of file diff --git a/tests/CSIRO_gradient_validation.py b/tests/CSIRO_gradient_validation.py deleted file mode 100644 index 34090d5..0000000 --- a/tests/CSIRO_gradient_validation.py +++ /dev/null @@ -1,22 +0,0 @@ -import qctests.CSIRO_gradient -import util.testingProfile -import numpy - -##### CSIRO_gradient_test --------------------------------------------------- - -def test_CSIRO_gradient(): - ''' - Spot-check some values in the CSIRO gradient test. - ''' - - p = util.testingProfile.fakeProfile([0,10,0,0.1,-24.9,-24.1], [10,20,30,40,50,60]) - qc = qctests.CSIRO_gradient.test(p) - truth = numpy.zeros(6, dtype=bool) - truth[0] = True; # in range - truth[1] = False; # too low - truth[2] = False; # too high - truth[3] = False; # right on the low margin - truth[4] = False; # right on the high margin - truth[5] = False; # can't caluclate gradient at endpoint - - assert numpy.array_equal(qc, truth), 'nominal beahvior failure in CSIRO_gradient' \ No newline at end of file diff --git a/tests/CSIRO_long_gradient_validation.py b/tests/CSIRO_long_gradient_validation.py new file mode 100644 index 0000000..6e69f68 --- /dev/null +++ b/tests/CSIRO_long_gradient_validation.py @@ -0,0 +1,34 @@ +import qctests.CSIRO_long_gradient +import util.testingProfile +import numpy + +##### CSIRO_long_gradient --------------------------------------------------- + +def test_CSIRO_long_gradient(): + ''' + Spot-check the nominal behavior of the CSIRO long gradient test. + ''' + + # nominal + p = util.testingProfile.fakeProfile([20,10,15,15,15,10], [0,5,10,15,20,25]) + qc = qctests.CSIRO_long_gradient.test(p) + truth = numpy.zeros(6, dtype=bool) + truth[1] = True + + assert numpy.array_equal(qc, truth), 'failed to flag a nominal long inversion' + + # gradlong too large + p = util.testingProfile.fakeProfile([20,10,15,15,15,10], [0,10,20,30,40,50]) + qc = qctests.CSIRO_long_gradient.test(p) + truth = numpy.zeros(6, dtype=bool) + + assert numpy.array_equal(qc, truth), 'should not flag a long inversion with such a large gradient' + + # too shallow + p = util.testingProfile.fakeProfile([20,10,15,15,15,10], [0,1,6,11,16,21]) + qc = qctests.CSIRO_long_gradient.test(p) + truth = numpy.zeros(6, dtype=bool) + + print truth, qc + + assert numpy.array_equal(qc, truth), 'should not flag a long inversion that begins at < 5m' \ No newline at end of file diff --git a/tests/CSIRO_short_gradient_validation.py b/tests/CSIRO_short_gradient_validation.py new file mode 100644 index 0000000..9f6563c --- /dev/null +++ b/tests/CSIRO_short_gradient_validation.py @@ -0,0 +1,30 @@ +import qctests.CSIRO_short_gradient +import util.testingProfile +import numpy + +##### CSIRO_short_gradient --------------------------------------------------- + +def test_CSIRO_short_gradient(): + ''' + Spot-check the nominal behavior of the CSIRO short gradient test. + ''' + + # nominal + p = util.testingProfile.fakeProfile([0,1], [0,10]) + qc = qctests.CSIRO_short_gradient.test(p) + truth = numpy.zeros(2, dtype=bool) + truth[0] = True + + assert numpy.array_equal(qc, truth), 'failed to flag a gradient at small delta-temp and delta-depth' + + p = util.testingProfile.fakeProfile([0,10], [0,100]) + qc = qctests.CSIRO_short_gradient.test(p) + + assert numpy.array_equal(qc, truth), 'failed to flag a gradient outside of delta temp and depth ranges, but inside gradshort ranges' + + # temperature change too small + p = util.testingProfile.fakeProfile([0,0.1], [0,0.2]) + qc = qctests.CSIRO_short_gradient.test(p) + truth = numpy.zeros(2, dtype=bool) + + assert numpy.array_equal(qc, truth), 'flagged a gradient even though its temperature change was too small to consider' \ No newline at end of file diff --git a/tests/CSIRO_surface_spikes.py b/tests/CSIRO_surface_spikes.py new file mode 100644 index 0000000..03c4250 --- /dev/null +++ b/tests/CSIRO_surface_spikes.py @@ -0,0 +1,33 @@ +import qctests.CSIRO_surface_spikes +import util.testingProfile +import numpy + +##### CSIRO_surface_spikes --------------------------------------------------- + +def test_CSIRO_surface_spikes(): + ''' + Spot-check the nominal behavior of the CSIRO CSIRO_surface_spikes test. + ''' + + # nominal + p = util.testingProfile.fakeProfile([0,0,0], [1,2,3], probe_type=2) + qc = qctests.CSIRO_surface_spikes.test(p) + truth = numpy.zeros(3, dtype=bool) + truth[0] = True + truth[1] = True + + assert numpy.array_equal(qc, truth), 'failed to flag a collection of shallow profiles' + + # inappropriate probe type + p = util.testingProfile.fakeProfile([0,0,0], [1,2,3], probe_type=1) + qc = qctests.CSIRO_surface_spikes.test(p) + truth = numpy.zeros(3, dtype=bool) + + assert numpy.array_equal(qc, truth), 'flagged shallow profiles for an inappropriate probe type' + + # no cluster + p = util.testingProfile.fakeProfile([0,0,0], [1,20,30], probe_type=1) + qc = qctests.CSIRO_surface_spikes.test(p) + truth = numpy.zeros(3, dtype=bool) + + assert numpy.array_equal(qc, truth), 'flagged shallow profile without a cluster' \ No newline at end of file diff --git a/util/splitData.py b/util/splitData.py index 9480ba6..52d728e 100644 --- a/util/splitData.py +++ b/util/splitData.py @@ -1,37 +1,43 @@ -# utility to split a raw datafile up into n roughly equal parts. +# utility to split a raw datafile up into n roughly equal parts, keeping all profiles for a given cruise in the same file -import main import math from wodpy import wod -#filename = '../../AutoQC_raw/quota/quota_all.dat' -#filename = '../data/quota_subset.dat' -filename = '../../AutoQC_raw/quota/test/chunk.dat' +filename = '../quota_all.dat' n = 30 fid = open(filename) fid.read() fileSize = fid.tell() -chunkSize = int(math.ceil(fileSize / n)); +chunkSize = int(math.ceil(fileSize / n)); # final files should be about this big -fileNo = 0 -start = 0 -end = 0 - -target = open('split-' + str(fileNo) + '.dat', 'w') +# identify cruise numbers, profile start and profile end positions for all profiles +markers = [] fid.seek(0) -while not (fid.read(1) == ''): - #write next chunk to open target - fid.seek(end) +while True: start = fid.tell() profile = wod.WodProfile(fid) end = fid.tell() - fid.seek(start) - extract = fid.read(end-start) - target.write(extract) + markers.append( (profile.cruise(), start, end) ) + if profile.is_last_profile_in_file(fid) == True: + break +# sort by cruise number +markers = sorted(markers, key=lambda x: x[0]) - #wrap the file and start a new one once we've crossed the max size - if target.tell() > chunkSize: +# write subfiles +fileNo = 0 +currentCruise = None +target = open('split-' + str(fileNo) + '.dat', 'w') +for i in range(len(markers)): + lastCruise = currentCruise + currentCruise = markers[i][0] + # switch out to the next file when we pass chunksize AND are finished the current cruise + if target.tell() > chunkSize and currentCruise != lastCruise and None not in [lastCruise, currentCruise]: target.close() fileNo += 1 target = open('split-' + str(fileNo) + '.dat', 'w') + fid.seek(markers[i][1]) + extract = fid.read(markers[i][2]-markers[i][1]) + target.write(extract) + +target.close() \ No newline at end of file