{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Clean Up (QARTOD)\n", "\n", "![](https://imgs.xkcd.com/comics/artifacts.png)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Quality Assurance/Quality Control (QA/QC) configuration\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "variable_name = \"sea_surface_height_above_sea_level_geoid_mhhw\"\n", "\n", "\n", "qc_config = {\n", " \"qartod\": {\n", " \"gross_range_test\": {\"fail_span\": [-10, 10], \"suspect_span\": [-2, 3]},\n", " \"flat_line_test\": {\n", " \"tolerance\": 0.001,\n", " \"suspect_threshold\": 10800,\n", " \"fail_threshold\": 21600,\n", " },\n", " \"spike_test\": {\n", " \"suspect_threshold\": 0.8,\n", " \"fail_threshold\": 3,\n", " },\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# data from the [AOOS ERDDAP server](http://erddap.aoos.org/erddap/)\n", "\n", "Note that the data may change in the future. For reproducibility’s sake we will\n", "save the data downloaded into a CSV file.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "import pandas as pd\n", "from erddapy import ERDDAP\n", "\n", "path = Path().absolute()\n", "fname = path.joinpath(\"data\", \"water_level_example.csv\")\n", "\n", "if fname.is_file():\n", " data = pd.read_csv(fname, parse_dates=[\"time (UTC)\"])\n", "else:\n", " e = ERDDAP(server=\"http://erddap.aoos.org/erddap/\", protocol=\"tabledap\")\n", " e.dataset_id = \"kotzebue-alaska-water-level\"\n", " e.constraints = {\n", " \"time>=\": \"2018-09-05T21:00:00Z\",\n", " \"time<=\": \"2019-07-10T19:00:00Z\",\n", " }\n", " e.variables = [\n", " variable_name,\n", " \"time\",\n", " \"z\",\n", " ]\n", " data = e.to_pandas(\n", " index_col=\"time (UTC)\",\n", " parse_dates=True,\n", " )\n", " data[\"timestamp\"] = data.index.astype(\"int64\") // 1e9\n", " data.to_csv(fname)\n", "\n", "data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Run the QC\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from ioos_qc.config import QcConfig\n", "\n", "qc = QcConfig(qc_config)\n", "\n", "qc_results = qc.run(\n", " inp=data[\"sea_surface_height_above_sea_level_geoid_mhhw (m)\"],\n", " tinp=data[\"timestamp\"],\n", " zinp=data[\"z (m)\"],\n", ")\n", "\n", "qc_results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Function to plot the results\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "from datetime import datetime\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "\n", "def plot_results(data, var_name, results, title, test_name):\n", " time = data.index\n", " obs = data[var_name]\n", " qc_test = results[\"qartod\"][test_name]\n", "\n", " qc_pass = np.ma.masked_where(qc_test != 1, obs)\n", " qc_suspect = np.ma.masked_where(qc_test != 3, obs)\n", " qc_fail = np.ma.masked_where(qc_test != 4, obs)\n", " qc_notrun = np.ma.masked_where(qc_test != 2, obs)\n", "\n", " fig, ax = plt.subplots(figsize=(15, 3.75))\n", " fig.set_title = f\"{test_name}: {title}\"\n", "\n", " ax.set_xlabel(\"Time\")\n", " ax.set_ylabel(\"Observation Value\")\n", "\n", " kw = {\"marker\": \"o\", \"linestyle\": \"none\"}\n", " ax.plot(time, obs, label=\"obs\", color=\"#A6CEE3\")\n", " ax.plot(\n", " time, qc_notrun, markersize=2, label=\"qc not run\", color=\"gray\", alpha=0.2, **kw\n", " )\n", " ax.plot(\n", " time, qc_pass, markersize=4, label=\"qc pass\", color=\"green\", alpha=0.5, **kw\n", " )\n", " ax.plot(\n", " time,\n", " qc_suspect,\n", " markersize=4,\n", " label=\"qc suspect\",\n", " color=\"orange\",\n", " alpha=0.7,\n", " **kw,\n", " )\n", " ax.plot(time, qc_fail, markersize=6, label=\"qc fail\", color=\"red\", alpha=1.0, **kw)\n", " ax.grid(True)\n", "\n", "\n", "title = \"Water Level [MHHW] [m] : Kotzebue, AK\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Gross Range test\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plot_results(\n", " data,\n", " \"sea_surface_height_above_sea_level_geoid_mhhw (m)\",\n", " qc_results,\n", " title,\n", " \"gross_range_test\",\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Spike test\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plot_results(\n", " data,\n", " \"sea_surface_height_above_sea_level_geoid_mhhw (m)\",\n", " qc_results,\n", " title,\n", " \"spike_test\",\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Flat Line test\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plot_results(\n", " data,\n", " \"sea_surface_height_above_sea_level_geoid_mhhw (m)\",\n", " qc_results,\n", " title,\n", " \"flat_line_test\",\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.5" } }, "nbformat": 4, "nbformat_minor": 4 }