Added text to demo and made optics diagnostic more versatile (#23)

* Added text to demo and made optics diagnostic more versatile * Fixed comments from Callum --------- Co-authored-by: ChiaraMonforte <[email protected]>
OceanGlidersCommunity · Jul 12, 2024 · 8b7c405 · 8b7c405
1 parent 09514f8
commit 8b7c405
Show file tree

Hide file tree

Showing 3 changed files with 144 additions and 44 deletions.
diff --git a/glidertest/tools.py b/glidertest/tools.py
@@ -195,50 +195,56 @@ def plot_basic_vars(ds, v_res=1, start_prof=0, end_prof=-1):
     [a.grid() for a in ax]
 
 
-def chl_first_check(ds):
+def optics_first_check(ds, var='CHLA'):
     """
-    Function to asses any drift in deep chlorophyll data and the presence of any possible negative data
+    Function to asses any drift in deep optics data and the presence of any possible negative data
     This function returns plots and text
     """
-
+    if var not in ds.variables:
+        print(f"{var} does not exist in the dataset. Make sure the spelling is correct or add this variable to your dataset")
+        return
     # Check how much negative data there is
-    neg_chl = np.round((len(np.where(ds.CHLA < 0)[0]) * 100) / len(ds.CHLA), 1)
+    neg_chl = np.round((len(np.where(ds[var] < 0)[0]) * 100) / len(ds[var]), 1)
     if neg_chl > 0:
-        print(f'{neg_chl}% of scaled chlorophyll data is negative, consider recalibrating data')
+        print(f'{neg_chl}% of scaled {var} data is negative, consider recalibrating data')
         # Check where the negative values occur and if we just see them at specific time of the mission or not
-        start = ds.TIME[np.where(ds.CHLA < 0)][0]
-        end = ds.TIME[np.where(ds.CHLA < 0)][-1]
-        min_z = (np.round(ds.DEPTH[np.where(ds.CHLA < 0)].min().values, 1))
-        max_z = (np.round(ds.DEPTH[np.where(ds.CHLA < 0)].max().values, 1))
+        start = ds.TIME[np.where(ds[var] < 0)][0]
+        end = ds.TIME[np.where(ds[var] < 0)][-1]
+        min_z = ds.DEPTH[np.where(ds[var] < 0)].min().values
+        max_z = ds.DEPTH[np.where(ds[var] < 0)].max().values
         print(f'Negative data in present from {str(start.values)[:16]} to {str(end.values)[:16]}')
-        print(f'Negative data is present between {min_z} and {max_z} ')
+        print(f'Negative data is present between {"%.1f" % np.round(min_z, 1)} and {"%.1f" % np.round(max_z, 1)} ')
     else:
         print('There is no negative scaled chlorophyll data, recalibration and further checks are still recommended ')
     # Check if there is any missing data throughout the mission
-    if len(ds.TIME) != len(ds.CHLA.dropna(dim='N_MEASUREMENTS').TIME):
+    if len(ds.TIME) != len(ds[var].dropna(dim='N_MEASUREMENTS').TIME):
         print('Chlorophyll data is missing for part of the mission')  # Add to specify where the gaps are
     else:
         print('Chlorophyll data is present for the entire mission duration')
     # Check bottom dark count and any drift there
-    bottom_chl_data = ds.CHLA.where(ds.CHLA.DEPTH > ds.DEPTH.max() - (ds.DEPTH.max() * 0.1)).dropna(
+    bottom_opt_data = ds[var].where(ds[var].DEPTH > ds.DEPTH.max() - (ds.DEPTH.max() * 0.1)).dropna(
         dim='N_MEASUREMENTS')
-    slope, intercept, r_value, p_value, std_err = stats.linregress(np.arange(0, len(bottom_chl_data)), bottom_chl_data)
-    ax = sns.regplot(data=ds, x=np.arange(0, len(bottom_chl_data)), y=bottom_chl_data,
+    slope, intercept, r_value, p_value, std_err = stats.linregress(np.arange(0, len(bottom_opt_data)), bottom_opt_data)
+    ax = sns.regplot(data=ds, x=np.arange(0, len(bottom_opt_data)), y=bottom_opt_data,
                      scatter_kws={"color": "grey"},
-                     line_kws={"color": "red", "label": "y={0:.6f}x+{1:.3f}".format(slope, intercept)},
+                     line_kws={"color": "red", "label": "y={0:.8f}x+{1:.5f}".format(slope, intercept)},
                      )
     ax.legend(loc=2)
     ax.grid()
-    ax.set(ylim=(np.nanpercentile(bottom_chl_data, 0.1), np.nanpercentile(bottom_chl_data, 99.9)),
+    ax.set(ylim=(np.nanpercentile(bottom_opt_data, 0.5), np.nanpercentile(bottom_opt_data, 99.5)),
            xlabel='Measurements',
-           ylabel='Chla')
-
-    if slope >= 0.00001:
+           ylabel=var)
+    percentage_change = (((slope*len(bottom_opt_data)+intercept) - intercept) / abs(intercept))* 100
+
+    if abs(percentage_change) >= 1:
         print(
-            'Data from the deepest 10% of data has been analysed and data does not seem stable. An alternative solution for dark counts has to be considered. \nMoreover, it is recommended to check the sensor has this may suggest issues with the sensor (i.e water inside the sensor, temporal drift etc)')
+            'Data from the deepest 10% of data has been analysed and data does not seem perfectly stable. An alternative solution for dark counts has to be considered. \nMoreover, it is recommended to check the sensor has this may suggest issues with the sensor (i.e water inside the sensor, temporal drift etc)')
+        print(f'Data changed (increased or decreased) by {"%.1f" % np.round(percentage_change, 1)}% from the beginning to the end of the mission')
     else:
         print(
-            'Data from the deepest 10% of data has been analysed and data seems stable. These deep values can be used to re-assess the dark count if the no chlorophyll at depth assumption is valid in this site and this depth')
+            f'Data from the deepest 10% of data has been analysed and data seems stable. These deep values can be used to re-assess the dark count if the no {var} at depth assumption is valid in this site and this depth')
+
+
 def sunset_sunrise(time, lat, lon):
     """
     Calculates the local sunrise/sunset of the glider location from GliderTools.
@@ -371,10 +377,10 @@ def day_night_avg(ds, sel_var='CHLA', start_time='2024-04-18', end_time='2024-04
     ----------
     ds: xarray on OG1 format containing at least time, depth, latitude, longitude and the selected variable. 
         Data should not be gridded.
-    start_time: Start date of the data selection. As missions can be long and came make it hard to visualise NPQ effetc, 
-                we reccomend selecting small section of few days to few weeks.
-    end_time: End date of the data selection. As missions can be long and came make it hard to visualise NPQ effetc, 
-                we reccomend selecting small section of few days to few weeks.
+    start_time: Start date of the data selection. As missions can be long and can make it hard to visualise NPQ effetc, 
+                we recommend end selecting small section of few days to few weeks.
+    end_time: End date of the data selection. As missions can be long and can make it hard to visualise NPQ effetc, 
+                we recommend selecting small section of few days to few weeks.
                 
     Returns
     -------

diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb
@@ -1,23 +1,34 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "97e9729d-4db2-419c-bbc2-a2078fc9b7dc",
+   "metadata": {},
+   "source": [
+    "## Glidertest demo \n",
+    "\n",
+    "The purpose of this notebook is to demostrate the functionality of glidertests functions. \n",
+    "This notebooks can be used to diagnose issues within your glider data. We have added suggested processing in some cases."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
+   "id": "1dcffb1b-4040-4976-b753-c173e5e1fe78",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys \n",
+    "sys.path.append('C:\\\\Users\\\\u241346\\\\uni_hamburg\\\\glidertest')\n",
+    "import xarray as xr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "id": "7c437da6-c3b3-4c48-b272-ee5b8ac27f69",
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "ModuleNotFoundError",
-     "evalue": "No module named 'glidertest'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[1], line 3\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mglidertest\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m fetchers\n\u001b[0;32m      4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mglidertest\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tools\n",
-      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'glidertest'"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
@@ -100,12 +111,24 @@
     "ax[0].set_ylabel('Depth [m]')"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "d4553a25-675a-4738-be65-409c0f134ac0",
+   "metadata": {},
+   "source": [
+    "### CTD\n",
+    "\n",
+    "Check for any thermal intertia related issues\n",
+    "\n",
+    "Salinity SOP provides a great summary of the needed processing of salinity data and the vaious difference based on sensor modela nd platform type https://oceangliderscommunity.github.io/Salinity_SOP/sections/salinity_dmqc.html"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "4ae684a3-f9a8-454c-804e-a669cd6d25a5",
    "metadata": {},
    "source": [
-    "### Chlorophyll\n",
+    "### Chlorophyll fluorescence\n",
     "\n",
     "*  Check bottom data and see if we have stable data that can be used for calibration. We also check stability of data to assess whether or not we have suspicious drift over the mission\n",
     "* We then check if data is affected by non photochemical quenching (NPQ). NPQ is a physiological response to high light environments used by plants and algae to protect themselves from damage and causes an evident weakening in fluorescence signal during the day. With the 'day_night_avg' function, we compute day and night averages of chlorophyll. We then plot a selected section of chlorophyll data with 'plot_section_with_srss' to see if any NPQ effect in the top few meters is visible and then we plot a selcted day daily and night average to check again any NPQ effect with 'plot_daynight_avg'.\n",
@@ -120,7 +143,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tools.chl_first_check(ds)"
+    "tools.optics_first_check(ds, var='CHLA')"
    ]
   },
   {
@@ -168,7 +191,7 @@
    "id": "60e18c38-0911-443d-8543-6099237f4475",
    "metadata": {},
    "source": [
-    "Do we see any difference in chl between day and night? Can this just simply be exaplined by changes in water mass properties (different temp and salinity)?"
+    "Do we see any difference in chl between day and night? Can this just simply be explained by changes in water mass properties (different temp and salinity)?"
    ]
   },
   {
@@ -188,10 +211,81 @@
     "https://doi.org/10.1002/lom3.10234"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "6f23e85e-80a2-45b0-af03-817223db10c0",
+   "metadata": {},
+   "source": [
+    "### Photosyntetically Active Radiation (PAR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c2c640e7-f3e0-4e7a-b652-c1efbc0c2fc7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(1, 1, figsize=(5, 5))\n",
+    "tools.plot_updown_bias(tools.updown_bias(ds, var='DPAR', v_res=1), ax,  xlabel='Irradiance')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "da91f0b2-99d9-44f9-b593-006f3cd20244",
+   "metadata": {},
+   "source": [
+    "Do we notice any strong up down cast bias?\n",
+    "\n",
+    "Likely we do as the diving angle changes. The pitch for upcast and downcast are very different while the position of the sensor remains the same. This means that the angle at which the sensor is exposed to light is very different and data will not be comparable. Furthermore, navigation patterns have to be considered too when processing PAR data. As the glider sits at surface, the pitch (therefore the sensor angle) can be very different from the rest of the dive. Moreover, as the glider starts to dive or prepares for surfacing during a climb the pitch may be very different as well.\n",
+    "\n",
+    "Discarding and reconstructing algebraically the surface PAR using an exponential equation and selecting data from only up or downcast is therefore recommended. GliderTools provides great examples and functions to address this issues (https://glidertools.readthedocs.io/en/latest/optics.html#par-replacement)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5d15da55-af84-41d0-aedd-d699b651c4de",
+   "metadata": {},
+   "source": [
+    "### Optical Backscatter\n",
+    "\n",
+    "*  Check bottom data and see if we have stable data that can be used for calibration. We also check stability of data to assess whether or not we have suspicious drift over the mission\n",
+    "* In case computation of particle backscattering from the scaled optical data was not done, this can be done following a function from GliderTools. this functions uses uses the coefficients from Zhang et al. (2009) to convert the raw counts into total backscatter (m-1), correcting for temperature and salinity. The $\\chi$ factor and $\\theta$ in this example were taken from Sullivan et al. (2013) and Slade & Boss (2015).\n",
+    "\n",
+    "Slade, W., Boss, E. 2015. Spectral attenuation and backscattering as indicators of average particle size. Applied Optics 54: 7264-7277, doi:10.1364/AO.54.00726. \n",
+    "\n",
+    "Sullivan, J., Twardowski, M., Zaneveld, J.R.V., Moore, C. 2013. Measuring optical backscattering in water. Light Scattering Reviews 7. 189-224. 10.1007/978-3-642-21907-8_6.\n",
+    "\n",
+    "Zhang, X., and L. Hu. 2009. Estimating scattering of pure water from density fluctuation of the \n",
+    "refractive index. Optics Express, 17: 1671-1678. DOI: 10.1364/OE.17.001671 7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5975e520-b695-4dd9-bbce-58feb91e4164",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tools.optics_first_check(ds, var='BBP700')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f8c7d90a-6482-4f25-b417-ecc6e2e7f65d",
+   "metadata": {},
+   "source": [
+    "### Oxygen\n",
+    "\n",
+    "*  Check for any possible drift in the data that might look suspicious. The great temporal and spatial variability may not allow for such check to be succesful. Evaluation using reference CTD cast data or any other data available in the stufy area is recommended.\n",
+    "\n",
+    "Oxygen SOP provides a great summary of the needed processing of salinity data and the vaious difference based on sensor modela nd platform type https://oceangliderscommunity.github.io/Salinity_SOP/sections/salinity_dmqc.html"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6c212170-ef3e-43b1-8d6b-fe1a0757c0fb",
+   "id": "d6096ce8-2a9a-4d36-bde2-3148045191aa",
    "metadata": {},
    "outputs": [],
    "source": []

diff --git a/tests/test_tools.py b/tests/test_tools.py
@@ -15,7 +15,7 @@ def test_up_down_bias():
 
 def test_chl():
     ds = fetchers.load_sample_dataset()
-    tools.chl_first_check(ds)
+    tools.optics_first_check(ds)
 
 def test_quench_sequence():
     ds = fetchers.load_sample_dataset()