Data Clean Up (QARTOD)#

Quality Assurance/Quality Control (QA/QC) configuration#

variable_name = "sea_surface_height_above_sea_level_geoid_mhhw"


qc_config = {
    "qartod": {
        "gross_range_test": {"fail_span": [-10, 10], "suspect_span": [-2, 3]},
        "flat_line_test": {
            "tolerance": 0.001,
            "suspect_threshold": 10800,
            "fail_threshold": 21600,
        },
        "spike_test": {
            "suspect_threshold": 0.8,
            "fail_threshold": 3,
        },
    }
}

data from the AOOS ERDDAP server#

Note that the data may change in the future. For reproducibility’s sake we will save the data downloaded into a CSV file.

from pathlib import Path

import pandas as pd
from erddapy import ERDDAP

path = Path().absolute()
fname = path.joinpath("data", "water_level_example.csv")

if fname.is_file():
    data = pd.read_csv(fname, parse_dates=["time (UTC)"])
else:
    e = ERDDAP(server="http://erddap.aoos.org/erddap/", protocol="tabledap")
    e.dataset_id = "kotzebue-alaska-water-level"
    e.constraints = {
        "time>=": "2018-09-05T21:00:00Z",
        "time<=": "2019-07-10T19:00:00Z",
    }
    e.variables = [
        variable_name,
        "time",
        "z",
    ]
    data = e.to_pandas(
        index_col="time (UTC)",
        parse_dates=True,
    )
    data["timestamp"] = data.index.astype("int64") // 1e9
    data.to_csv(fname)

data.head()

Run the QC#

from ioos_qc.config import QcConfig

qc = QcConfig(qc_config)

qc_results = qc.run(
    inp=data["sea_surface_height_above_sea_level_geoid_mhhw (m)"],
    tinp=data["timestamp"],
    zinp=data["z (m)"],
)

qc_results

Function to plot the results#

%matplotlib inline

from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np


def plot_results(data, var_name, results, title, test_name):
    time = data.index
    obs = data[var_name]
    qc_test = results["qartod"][test_name]

    qc_pass = np.ma.masked_where(qc_test != 1, obs)
    qc_suspect = np.ma.masked_where(qc_test != 3, obs)
    qc_fail = np.ma.masked_where(qc_test != 4, obs)
    qc_notrun = np.ma.masked_where(qc_test != 2, obs)

    fig, ax = plt.subplots(figsize=(15, 3.75))
    fig.set_title = f"{test_name}: {title}"

    ax.set_xlabel("Time")
    ax.set_ylabel("Observation Value")

    kw = {"marker": "o", "linestyle": "none"}
    ax.plot(time, obs, label="obs", color="#A6CEE3")
    ax.plot(
        time, qc_notrun, markersize=2, label="qc not run", color="gray", alpha=0.2, **kw
    )
    ax.plot(
        time, qc_pass, markersize=4, label="qc pass", color="green", alpha=0.5, **kw
    )
    ax.plot(
        time,
        qc_suspect,
        markersize=4,
        label="qc suspect",
        color="orange",
        alpha=0.7,
        **kw,
    )
    ax.plot(time, qc_fail, markersize=6, label="qc fail", color="red", alpha=1.0, **kw)
    ax.grid(True)


title = "Water Level [MHHW] [m] : Kotzebue, AK"

Gross Range test#

plot_results(
    data,
    "sea_surface_height_above_sea_level_geoid_mhhw (m)",
    qc_results,
    title,
    "gross_range_test",
)

Spike test#

plot_results(
    data,
    "sea_surface_height_above_sea_level_geoid_mhhw (m)",
    qc_results,
    title,
    "spike_test",
)

Flat Line test#

plot_results(
    data,
    "sea_surface_height_above_sea_level_geoid_mhhw (m)",
    qc_results,
    title,
    "flat_line_test",
)