{ "cells": [ { "cell_type": "code", "execution_count": 34, "id": "54a3c1fc-b51f-4232-9651-eaa1f1ecf5bc", "metadata": {}, "outputs": [], "source": [ "# Procesar Datos de Tablas/Matrices\n", "# Que son las librerias y para que se usan\n", "# Seleccionar valores específicos de una matriz\n", "# Leer archivos netcdf\n", "# Realizar operaciones matemáticas/estadísticas en matrices/tablas" ] }, { "cell_type": "code", "execution_count": 73, "id": "8bc4f2a0-8684-41a9-b33e-259c658d6d97", "metadata": {}, "outputs": [], "source": [ "import numpy as np # numerical python\n", "import netCDF4 as nc" ] }, { "cell_type": "markdown", "id": "a232f4d2-a67e-4057-9fd0-b2da19835945", "metadata": {}, "source": [ "# Numpy arrays" ] }, { "cell_type": "code", "execution_count": 36, "id": "e4552b5d-7360-4422-b74a-108f2568e1ea", "metadata": {}, "outputs": [], "source": [ "# Ver presentación" ] }, { "cell_type": "code", "execution_count": 37, "id": "1024fba7-2cc2-406f-847d-998a5c346f22", "metadata": {}, "outputs": [], "source": [ "spy = np.array([1, 4, 5.0, 6])" ] }, { "cell_type": "code", "execution_count": 75, "id": "457aca2d-d6cf-4971-b71d-1b155118cc2c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "numpy.ndarray" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(spy) # variable es un numpy arrray pero no dice que hay dentro de spy" ] }, { "cell_type": "code", "execution_count": 39, "id": "abb3f673-d338-4e60-bb34-42d8613e01cf", "metadata": {}, "outputs": [], "source": [ "# Podemos utilizar el punto spy.dtype ya que dtype es una función de numpy" ] }, { "cell_type": "code", "execution_count": 76, "id": "7b1a6fcd-c8d6-499f-b4f6-980ca0d36460", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "float64\n" ] } ], "source": [ "print(spy.dtype) # Tipo de datos dentro del array spy" ] }, { "cell_type": "code", "execution_count": 79, "id": "75918627-f92a-40b9-9a66-b970eb5f10a0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(4,)\n", "(4,)\n", "6.0\n", "6.0\n" ] } ], "source": [ "print(spy.shape) # tamaño de variable spy\n", "print(np.shape(spy))\n", "print(spy[3])\n", "print(spy[-1])" ] }, { "cell_type": "code", "execution_count": 80, "id": "ad1f4a6f-bc93-4089-939f-aa6fbb264287", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "primer valor en los datos: 1.0\n" ] } ], "source": [ "print('primer valor en los datos:', spy[0]) # 0 es el indice 1 en Python\n" ] }, { "cell_type": "markdown", "id": "5cc34d92-d905-4aa3-8a57-2d3b62d33157", "metadata": { "tags": [] }, "source": [ "# Slicing data" ] }, { "cell_type": "markdown", "id": "f9f13f69-dcab-40dd-968d-dc6e311dee6e", "metadata": {}, "source": [ "Podemos seleccionar un indíce o una sección del vector" ] }, { "cell_type": "code", "execution_count": 83, "id": "c3944c52-7195-4c69-9f6b-6e5510333a9c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1. 4. 5. 6.]\n" ] } ], "source": [ "print(spy)" ] }, { "cell_type": "code", "execution_count": 81, "id": "a4f425d8-efc3-444b-b074-827e070871fd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[5. 6.]\n", "[4. 5.]\n" ] } ], "source": [ "print(spy[2:]) # empieza en el índice 2 hasta el final\n", "print(spy[1:3]) # empieza en el índice 1 pero no incluye el 3" ] }, { "cell_type": "code", "execution_count": 44, "id": "459741a4-58e9-47a7-93ff-8317d769301d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1. 4.]\n" ] } ], "source": [ "# No tenemos que incluir el límite inferior ni superior\n", "print(spy[:2]) # significa desde el índice 0 hasta el 4" ] }, { "cell_type": "code", "execution_count": 84, "id": "3b965e06-17b2-4230-bdc9-5c89cd9e707d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4.0\n" ] } ], "source": [ "print(np.mean(spy)) # numpy tiene funciones que realizan operaciones en el vector" ] }, { "cell_type": "code", "execution_count": 87, "id": "109b9e64-13a7-4172-bbec-7bd924caf916", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4.0\n", "1.8708286933869707\n" ] } ], "source": [ "print(spy.mean())\n", "print(spy.std())" ] }, { "cell_type": "markdown", "id": "2bf6c173-602b-465c-90b2-498ac70c0594", "metadata": {}, "source": [ "# Reading netCDF" ] }, { "cell_type": "code", "execution_count": 46, "id": "42b7a19a-cd9f-4180-ba9e-fbd183f15d51", "metadata": {}, "outputs": [], "source": [ "# Ahora utilizemos datos atmósfericos para algo más complicado" ] }, { "cell_type": "code", "execution_count": 47, "id": "b05ab355-9273-48d3-b69a-eab0bafa85f3", "metadata": {}, "outputs": [], "source": [ "fileobj = nc.Dataset('shared/ERA5/ERA5_Coarse.nc') # Lee el archivo netcdf nc la extension" ] }, { "cell_type": "code", "execution_count": 67, "id": "f19e2d1a-bb1b-428a-8991-9af1d7e73f59", "metadata": {}, "outputs": [], "source": [ "# netCDF network Common Data Form formato que guarda datos MULTIDIMENSIONALES" ] }, { "cell_type": "code", "execution_count": 88, "id": "82fdb213-e5c4-4e36-ba71-823028868785", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "root group (NETCDF4 data model, file format HDF5):\n", " dimensions(sizes): time(756), latitude(180), longitude(360)\n", " variables(dimensions): int16 u10(time, latitude, longitude), int16 v10(time, latitude, longitude), int16 sst(time, latitude, longitude), int16 sp(time, latitude, longitude), int16 t2m(time, latitude, longitude), int16 tp(time, latitude, longitude), float32 t2m_F(time, latitude, longitude), float32 longitude(longitude), float32 latitude(latitude), int32 time(time)\n", " groups: \n" ] } ], "source": [ "print(fileobj)" ] }, { "cell_type": "code", "execution_count": 91, "id": "b8acfe42-9945-485e-87d3-d46aac8b0ef2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dict_keys(['u10', 'v10', 'sst', 'sp', 't2m', 'tp', 't2m_F', 'longitude', 'latitude', 'time'])\n" ] } ], "source": [ "print(fileobj.variables.keys())" ] }, { "cell_type": "code", "execution_count": 49, "id": "c221bf15-16df-4b7e-9e68-7e9fd27695bf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dict_keys(['u10', 'v10', 'sst', 'sp', 't2m', 'tp', 't2m_F', 'longitude', 'latitude', 'time'])\n" ] } ], "source": [ "print(fileobj.variables.keys()) # siempre revisar que variables hay en el archivo" ] }, { "cell_type": "code", "execution_count": 92, "id": "81622cb3-2096-434f-8b22-01bbdd90cc95", "metadata": {}, "outputs": [], "source": [ "# Guarda en la memoria virtual estas variables\n", "lon = fileobj['longitude']\n", "lat = fileobj['latitude']\n", "time = fileobj['time']" ] }, { "cell_type": "code", "execution_count": 93, "id": "b98b5d77-2054-45be-bcb1-afdc2e2af5de", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "float32 longitude(longitude)\n", " _FillValue: nan\n", " units: degrees_east\n", " long_name: longitude\n", "unlimited dimensions: \n", "current shape = (360,)\n", "filling on\n" ] } ], "source": [ "print(lon)" ] }, { "cell_type": "code", "execution_count": 51, "id": "febc8090-5ce1-4c07-a223-82677059f92b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "float32 latitude(latitude)\n", " _FillValue: nan\n", " units: degrees_north\n", " long_name: latitude\n", "unlimited dimensions: \n", "current shape = (180,)\n", "filling on\n" ] } ], "source": [ "print(lat) # nos da una explicación sobre la variable" ] }, { "cell_type": "code", "execution_count": 94, "id": "53cf2b46-97b0-4857-a6c5-b3c803dbdded", "metadata": {}, "outputs": [], "source": [ "sst = fileobj['sst'] " ] }, { "cell_type": "code", "execution_count": 95, "id": "533e315a-4504-41a1-87be-6655584b3a4f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "int16 sst(time, latitude, longitude)\n", " _FillValue: -32767\n", " units: K\n", " long_name: Sea surface temperature\n", " add_offset: 289.4649014722902\n", " scale_factor: 0.0006169772945977599\n", " missing_value: -32767\n", "unlimited dimensions: \n", "current shape = (756, 180, 360)\n", "filling on\n" ] } ], "source": [ "print(sst) # cuantas dimensiones tiene esta variable" ] }, { "cell_type": "code", "execution_count": 97, "id": "c1841f16-53fd-4a92-a078-18da8ed8e897", "metadata": {}, "outputs": [], "source": [ "sst = fileobj['sst'][:] # que son los dos puntitos" ] }, { "cell_type": "code", "execution_count": 99, "id": "f3020e81-2e84-42e2-966a-31988a0927e3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(756, 180, 360)\n" ] } ], "source": [ "print(np.shape(sst))" ] }, { "cell_type": "code", "execution_count": 55, "id": "6aa5dfa6-154b-42d6-91f9-2b0fed8441e8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(756, 180, 360)\n" ] } ], "source": [ "print(sst.shape) # cuantas dimensiones tiene esta variable" ] }, { "cell_type": "markdown", "id": "0871b331-54b7-4253-afd0-0d637d6dd848", "metadata": {}, "source": [ "Numpy tiene funciones útiles para analizar datos!" ] }, { "cell_type": "code", "execution_count": 100, "id": "92b513d9-e743-4581-8cc2-0202faff02de", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "11.668331290394544\n", "11.668331290394544\n" ] } ], "source": [ "print(sst.std())\n", "print(np.std(sst))" ] }, { "cell_type": "code", "execution_count": 61, "id": "71bb2070-5654-4384-bd11-78547a24ce8b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "286.63940498273524\n" ] } ], "source": [ "print(sst.mean()) # Si quieres el promedio de todos los datos" ] }, { "cell_type": "code", "execution_count": 101, "id": "0bc119d6-2797-4373-a36a-9b8b200482d4", "metadata": {}, "outputs": [], "source": [ "sst_C = sst - 273.15" ] }, { "cell_type": "code", "execution_count": 63, "id": "bb6d2f43-8616-4b60-a7f2-dad14b58c8a8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "13.489404982734603\n" ] } ], "source": [ "print(sst_C.mean())" ] }, { "cell_type": "code", "execution_count": 102, "id": "52bab89f-ed66-4a93-8ace-e73d9ef03786", "metadata": {}, "outputs": [], "source": [ "maxval, minval, stdval = np.max(sst_C), np.min(sst_C), np.std(sst_C)\n" ] }, { "cell_type": "code", "execution_count": 103, "id": "69a1dda1-45b8-40d9-8011-4a1a5ad6b1d0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Max SST: 36.185889199400265\n", "Min SST: -3.7553699209748856\n", "Desviación Standard: 11.668331290394526\n" ] } ], "source": [ "print('Max SST:', maxval)\n", "print('Min SST:', minval)\n", "print('Desviación Standard:', stdval)" ] }, { "cell_type": "markdown", "id": "c07d0282-8d65-4ff3-a18f-c69d9566fff5", "metadata": {}, "source": [ "Como saber que funciones tiene numpy. Escribir np y después tab. Intentálo!" ] }, { "cell_type": "markdown", "id": "31fee00d-d204-4160-a030-cd77e536bdad", "metadata": {}, "source": [ "Cuando analizamos datos oceanográficos generalmente queremos saber el promedio sobre un area o tiempo específico" ] }, { "cell_type": "code", "execution_count": 68, "id": "580dc15a-29be-4634-92dd-e8614ab3c38c", "metadata": {}, "outputs": [], "source": [ "# Ir a la presentación" ] }, { "cell_type": "code", "execution_count": 104, "id": "41299cc7-554f-422a-9f02-0b02774aaff4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[-1.690020472943193 -1.690020472943193 -1.690020472943193 ...\n", " -1.690020472943193 -1.690020472943193 -1.690020472943193]\n", " [-1.6900188407281278 -1.6900196568356602 -1.6900196568356602 ...\n", " -1.6900172085130627 -1.6900180246205951 -1.6900188407281278]\n", " [-1.6900188407281278 -1.6900188407281278 -1.6900188407281278 ...\n", " -1.69001639240553 -1.6900180246205951 -1.6900188407281278]\n", " ...\n", " [-- -- -- ... -- -- --]\n", " [-- -- -- ... -- -- --]\n", " [-- -- -- ... -- -- --]]\n" ] } ], "source": [ "print(np.mean(sst_C, axis=0))" ] }, { "cell_type": "code", "execution_count": 57, "id": "f7c084b7-842a-41e9-ab5b-248b1858665e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(180, 360)\n" ] } ], "source": [ "# El promedio de la temperatura en cada lat y lon de todo el tiempo\n", "print(np.mean(sst_C, axis=0).shape) # axis =0 es sobre dimensión 0 (tiempo)" ] }, { "cell_type": "code", "execution_count": 58, "id": "a8ac93d6-1f12-4449-b472-7b48f745b1a1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(756, 360)\n" ] } ], "source": [ "# El promedio de la temperatura sobre todos los renglones (latitud)\n", "print(np.mean(sst_C, axis=1).shape) # axis = 1 es sobre dimensión renglones (rows latitud)" ] }, { "cell_type": "code", "execution_count": 108, "id": "e92f18e4-9d80-4ee7-9f5c-6ac7133a2a65", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "masked_array(data=[-1.6900181379688635, -1.690006760069681,\n", " -1.6899877243614847, -1.6899144991131279,\n", " -1.6899064513860709, -1.6896772067801817,\n", " -1.6884178440431403, -1.6823770204799693,\n", " -1.6489731771821603, -1.5816772256881508,\n", " -1.4771803220097532, -1.3479930015591848,\n", " -1.182235642307473, -0.9939801886754818,\n", " -0.7373676366667153, -0.5040172492606008,\n", " -0.26965351854671754, 0.09446895101639383,\n", " 0.4336198428509824, 0.8837989185510522,\n", " 1.0174925322963926, 1.1327164454932175,\n", " 1.6139704045547965, 2.1984112278902455,\n", " 2.718583477733101, 3.1846600108358323,\n", " 3.919262133697807, 3.9705070618289255,\n", " 4.317240345330061, 4.673977128405407,\n", " 5.0296964536867925, 5.5145073541889404,\n", " 5.818161982028872, 6.122005792976584,\n", " 6.357286307900196, 6.698262968414926, 6.76358540180476,\n", " 7.240890160611007, 7.514439800294637,\n", " 7.8130902083881795, 8.263033293152104,\n", " 8.439452499326954, 8.803072657945508,\n", " 9.594178302820403, 10.389353000293163,\n", " 11.268988810893404, 12.26357344536613,\n", " 13.071534492997367, 14.157506472518037,\n", " 15.330075151798367, 16.389892732217113,\n", " 17.189522311583517, 18.033297459564388,\n", " 18.752127476796122, 19.35107592315887,\n", " 19.835805361674115, 20.462167822792946,\n", " 20.982455859116513, 21.350112474809325,\n", " 21.701947761863433, 22.439357099006177,\n", " 22.923373974508387, 23.523208511302965,\n", " 24.014823522225225, 24.516835423586233,\n", " 24.84081779589453, 25.1246568797575,\n", " 25.413527500737956, 25.70519033320649,\n", " 25.982744715951547, 26.295153343538253,\n", " 26.524745738710948, 26.760850942338067,\n", " 26.984901176322214, 27.178847064601282,\n", " 27.32670109538924, 27.473530177548927,\n", " 27.58818219454823, 27.667606347619078,\n", " 27.781885987319857, 27.942765864681146,\n", " 28.048944040893407, 28.12912726984836,\n", " 28.201063104132917, 28.174411518957804,\n", " 28.134804308368945, 28.097507673164746,\n", " 27.977594994083063, 27.75894354336433,\n", " 27.46125471288095, 27.30234341526245,\n", " 27.30329100157845, 27.387579888318072,\n", " 27.41628785299799, 27.47991015299138,\n", " 27.45452788623761, 27.426351393397297,\n", " 27.337179301888796, 27.24485500024314,\n", " 27.159752158915904, 27.023895177748543,\n", " 26.891526902095325, 26.715642144077616,\n", " 26.49928726905775, 26.230276529555447,\n", " 25.90408461700788, 25.558853295052106,\n", " 25.342357772571688, 25.025825834625735,\n", " 24.72872830643171, 24.438028442756085,\n", " 24.067554498440263, 23.7388313220877,\n", " 23.412171190917682, 23.08220995753288,\n", " 22.74097156354641, 22.347648892592737,\n", " 21.953197579539374, 21.490999318822144,\n", " 20.986984581661304, 20.481335038062543,\n", " 19.897564444390103, 19.328976374695134,\n", " 18.74759216969734, 18.16499190510353,\n", " 17.58717599476669, 16.96613230351236,\n", " 16.32711002686899, 15.638052721841344,\n", " 14.935252847709545, 14.140982409567343,\n", " 13.288524069476008, 12.346274338783608,\n", " 11.408118783501955, 10.489186734472739,\n", " 9.613138560329782, 8.851594976837715,\n", " 8.077582037974743, 7.340193578053533,\n", " 6.576840517722451, 5.912081311025313,\n", " 5.336482145540234, 4.7913230128964654,\n", " 4.2465665007530715, 3.7307410571518314,\n", " 3.2377557176479717, 2.663734184084183,\n", " 2.11479494703165, 1.6088955414237651,\n", " 1.1168191135688328, 0.6531749857640199,\n", " 0.21425257322596983, -0.1769660734681487,\n", " -0.5405649503733324, -0.8542330989373138,\n", " -1.113105142560139, -1.2886685512452507,\n", " -1.3715486419977305, -1.4611978621455668,\n", " -1.5462684829986444, -1.6011792725711507,\n", " -1.630589715665575, -1.6430794478170205,\n", " -1.641895355189927, -1.6401971418052792,\n", " -1.6323259918765443, -1.6139779899952689,\n", " -1.6103907780922724, -1.6106058650595783, --, --, --,\n", " --, --, --, --, --, --, --, --],\n", " mask=[False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, True, True, True, True, True, True, True,\n", " True, True, True, True],\n", " fill_value=1e+20)" ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(sst_C, axis=0).mean(axis=1)" ] }, { "cell_type": "markdown", "id": "ececda84-4acc-4294-9ba4-723662d49377", "metadata": {}, "source": [ "y que es axis=2?" ] }, { "cell_type": "code", "execution_count": 70, "id": "58c24d12-6b28-4e67-a481-69220149243f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Matrix A\n", "[[1. 2.]\n", " [3. 4.]]\n", "Matrix B\n", "[[5. 6.]\n", " [7. 8.]]\n" ] } ], "source": [ "a = np.array([[1, 2], [3, 4]], float) # crear una matriz de dos dimensiones tipo float\n", "b = np.array([[5, 6], [7,8]], float)\n", "\n", "print('Matrix A')\n", "print(a)\n", "print('Matrix B')\n", "print(b)" ] }, { "cell_type": "code", "execution_count": 71, "id": "643ca8f6-c2a0-4dd6-9b70-bd46c9203430", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1., 2.],\n", " [3., 4.],\n", " [5., 6.],\n", " [7., 8.]])" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.concatenate((a,b)) # unir las dos matrices" ] }, { "cell_type": "code", "execution_count": 72, "id": "68e732b6-df8a-4f34-830b-3f902493eb6f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1., 2., 5., 6.],\n", " [3., 4., 7., 8.]])" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.concatenate((a,b), axis=1) # unir las dos matrices\n" ] }, { "cell_type": "code", "execution_count": 109, "id": "3da47bc3-8a2d-4508-8814-d75b432274df", "metadata": {}, "outputs": [], "source": [ "import xarray as xr # Libreria para procesar datos de gran tamaño" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }