61. Reading earthcare level1b data#
This notebook does a quick inspection of one of the earthcare cases, with plots of the radar reflectivity and doppler velocity. The earthcare data is stored in hdf5 format, which is also the underlying data format for netcdf. Unlike netcdf, the files don’t supply dimensions or coordinates, so we have to figure out those ourselves.
61.1. Installation#
Download a case folder from the satdata/earthcare folder on our gdrive folder
fetch and rebase from upstream/main to get this notebook in week11
from pathlib import Path
import xarray as xr
import numpy as np
import pyproj
from matplotlib import pyplot as plt
import datetime
import pytz
import pyproj
61.2. Find the case files#
Organize them in a dictionary keyed by the case name, which is at the start of the relative file path. pathlib has a function that finds the relative path, and also can break the folder path into its various parts
data_dir = Path().home() / 'repos/a301/satdata/earthcare'
radar_filepaths = list(data_dir.glob("**/*.h5"))
filepaths=dict()
for filepath in radar_filepaths:
relpath = filepath.relative_to(data_dir)
casename = relpath.parts[0]
filepaths[casename]=filepath
relpath
PosixPath('case4/ECA_JXCA_CPR_NOM_1B_20241213T224328Z_20250213T185124Z_03096D.h5')
filepaths
{'case13': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case13/ECA_JXCA_CPR_NOM_1B_20250213T183051Z_20250213T214347Z_04058D.h5'),
'case25': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case25/ECA_JXCA_CPR_NOM_1B_20240905T201951Z_20250207T032255Z_01554D.h5'),
'case22': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case22/ECA_JXCA_CPR_NOM_1B_20241103T201606Z_20250210T071959Z_02472D.h5'),
'case12': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case12/ECA_JXCA_CPR_NOM_1B_20250130T225341Z_20250131T003943Z_03843D.h5'),
'case15': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case15/ECA_JXCA_CPR_NOM_1B_20250225T185602Z_20250225T220905Z_04245D.h5'),
'case6': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case6/ECA_JXCA_CPR_NOM_1B_20241225T200406Z_20250223T144933Z_03281D.h5'),
'case30': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case30/ECA_JXCA_CPR_NOM_1B_20240831T200203Z_20250205T175426Z_01476D.h5'),
'case9': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case9/ECA_JXCA_CPR_NOM_1B_20250117T201449Z_20250117T215611Z_03639D.h5'),
'case7': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case7/ECA_JXCA_CPR_NOM_1B_20241227T195312Z_20250223T164600Z_03312D.h5'),
'case31': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case31/ECA_JXCA_CPR_NOM_1B_20240727T190018Z_20250225T155058Z_00931D.h5'),
'case17': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case17/ECA_JXCA_CPR_NOM_1B_20250216T190020Z_20250216T222300Z_04105D.h5'),
'case28': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case28/ECA_JXCA_CPR_NOM_1B_20240903T203112Z_20250207T021445Z_01523D.h5'),
'case21': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case21/ECA_JXCA_CPR_NOM_1B_20241225T200406Z_20250223T144933Z_03281D.h5'),
'case26': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case26/ECA_JXCA_CPR_NOM_1B_20240911T194535Z_20250207T064304Z_01647D.h5'),
'case19': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case19/ECA_JXCA_CPR_NOM_1B_20250118T192256Z_20250118T223604Z_03654D.h5'),
'case27': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case27/ECA_JXCA_CPR_NOM_1B_20240908T191638Z_20250207T045809Z_01600D.h5'),
'case20': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case20/ECA_JXCA_CPR_NOM_1B_20241227T195312Z_20250223T164600Z_03312D.h5'),
'case29': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case29/ECA_JXCA_CPR_NOM_1B_20240901T191008Z_20250207T012629Z_01491D.h5'),
'case11': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case11/ECA_JXCA_CPR_NOM_1B_20250119T183101Z_20250119T214712Z_03669D.h5'),
'case4': PosixPath('/Users/phil/repos/a301/satdata/earthcare/case4/ECA_JXCA_CPR_NOM_1B_20241213T224328Z_20250213T185124Z_03096D.h5')}
def sort_keys(the_case):
number = the_case[4:]
return int(number)
sorted_keys = list(filepaths.keys())
sorted_keys.sort(key=sort_keys)
casenum = 'case4'
#sorted_keys
61.3. Read the data and metadata#
These are netcdf files, so no dimensions or coordinates, we’ll need to create these. Start by naming the time and height dimensions
cpr_data = xr.open_dataset(filepaths[casenum], engine='h5netcdf', group='/ScienceData/Data',
decode_times=True, phony_dims='access')
cpr_data = cpr_data.rename_dims({'phony_dim_0':'time','phony_dim_1': 'height'})
cpr_meta = xr.open_dataset(filepaths[casenum], engine='h5netcdf', group='/ScienceData/Geo',
decode_times=True, phony_dims='access')
cpr_meta = cpr_meta.rename_dims({'phony_dim_0':'time','phony_dim_1': 'height'})
cpr_meta
<xarray.Dataset> Size: 10MB
Dimensions: (time: 9946, height: 218, phony_dim_2: 1)
Dimensions without coordinates: time, height, phony_dim_2
Data variables: (12/25)
binHeight (time, height) float32 9MB ...
latitude (time) float64 80kB ...
longitude (time) float64 80kB ...
navigationLandWaterFlg (time) float32 40kB ...
pitchAngle (time) float32 40kB ...
processingFrameNo (time) float32 40kB ...
... ...
surfaceElevation (time) float32 40kB ...
timeFlag (time) float32 40kB ...
xPosition (time) float64 80kB ...
yPosition (time) float64 80kB ...
yawAngle (time) float32 40kB ...
zPosition (time) float64 80kB ...61.4. construct height, time, distance coords#
61.4.1. get times#
def find_times(filepath):
"""
times are stored as seconds after Jan 1, 2000
use the datetime.timedelta function to increment from
that start time
Also set the timezone as utc
"""
cpr_meta = xr.open_dataset(filepath, engine='h5netcdf', group='/ScienceData/Geo',
decode_times=False, phony_dims='access')
times = cpr_meta['profileTime'].data
start_time = datetime.datetime(2000,1,1,0,0,0)
the_times =[start_time + datetime.timedelta(seconds=item) for item in times]
the_times = [item.replace(tzinfo = pytz.utc) for item in the_times]
return the_times
the_times = find_times(filepaths[casenum])
print(len(the_times))
the_times[:4]
9946
[datetime.datetime(2024, 12, 13, 22, 43, 31, 511866, tzinfo=<UTC>),
datetime.datetime(2024, 12, 13, 22, 43, 31, 580652, tzinfo=<UTC>),
datetime.datetime(2024, 12, 13, 22, 43, 31, 649437, tzinfo=<UTC>),
datetime.datetime(2024, 12, 13, 22, 43, 31, 718223, tzinfo=<UTC>)]
61.4.2. print the case times#
for casename in sorted_keys:
filepath = filepaths[casename]
check_times = find_times(filepath)
print(casename, check_times[0], check_times[-1])
case4 2024-12-13 22:43:31.511866+00:00 2024-12-13 22:55:21.904667+00:00
case6 2024-12-25 20:04:09.702314+00:00 2024-12-25 20:16:00.055151+00:00
case7 2024-12-27 19:53:15.007300+00:00 2024-12-27 20:05:05.358234+00:00
case9 2025-01-17 20:14:51.729728+00:00 2025-01-17 20:26:42.075556+00:00
case11 2025-01-19 18:31:04.489242+00:00 2025-01-19 18:42:54.702278+00:00
case12 2025-01-30 22:53:44.522380+00:00 2025-01-30 23:05:34.735525+00:00
case13 2025-02-13 18:30:54.491970+00:00 2025-02-13 18:42:44.837739+00:00
case15 2025-02-25 18:56:05.332980+00:00 2025-02-25 19:07:55.542302+00:00
case17 2025-02-16 19:00:23.101428+00:00 2025-02-16 19:12:13.312624+00:00
case19 2025-01-18 19:22:59.248577+00:00 2025-01-18 19:34:49.594296+00:00
case20 2024-12-27 19:53:15.007300+00:00 2024-12-27 20:05:05.358234+00:00
case21 2024-12-25 20:04:09.702314+00:00 2024-12-25 20:16:00.055151+00:00
case22 2024-11-03 20:16:08.522453+00:00 2024-11-03 20:27:58.909370+00:00
case25 2024-09-05 20:19:53.853945+00:00 2024-09-05 20:31:43.653490+00:00
case26 2024-09-11 19:45:38.173358+00:00 2024-09-11 19:57:27.924280+00:00
case27 2024-09-08 19:16:40.803839+00:00 2024-09-08 19:28:30.554429+00:00
case28 2024-09-03 20:31:15.664716+00:00 2024-09-03 20:43:05.463916+00:00
case29 2024-09-01 19:10:11.047918+00:00 2024-09-01 19:22:00.842677+00:00
case30 2024-08-31 20:02:05.833955+00:00 2024-08-31 20:13:55.765801+00:00
case31 2024-07-27 19:00:21.082480+00:00 2024-07-27 19:12:12.720094+00:00
61.4.3. get binheights#
def get_binheights(cpr_meta):
"""
the radar bin heights are stored for every radar pulse. If the terrain is
roughly level, these should be the same pulse to pulse, so just use the
first one
Some radar height bins are missing, so fill those in by finding the average
distance between bins and using that as an increment
Questions -- what does [...] accomplish here? Why all the float() calls?
"""
the_bins = cpr_meta.binHeight[...]
the_bins = the_bins[0,:].data
diff_bins = np.diff(the_bins)
del_y = float(np.nanmean(diff_bins))
new_y = [float(the_bins[0])]
for count,old_y in enumerate(the_bins[1:]):
if np.isnan(old_y):
new_y.append(float(new_y[count]) + float(del_y))
else:
new_y.append(float(old_y))
return np.array(new_y)
heights = get_binheights(cpr_meta)
61.4.4. get the along-track distance#
We want to turn time into distance, so use pyproj to find the great circle distance between each set of pulses, using their lon, lat coords
great_circle=pyproj.Geod(ellps='WGS84')
meters2km = 1.e-3
def calc_distance(lonvec,latvec):
distance=[0]
startlon,startlat = lonvec[0],latvec[0]
for lon,lat in zip(lonvec[1:],latvec[1:]):
azi12,azi21,step= great_circle.inv(startlon,startlat,lon,lat)
distance.append(distance[-1] + step)
startlon,startlat = lon, lat
distance=np.array(distance)*meters2km
return distance
lonvec = cpr_meta['longitude'].data
latvec = cpr_meta['latitude'].data
distance = calc_distance(lonvec, latvec)
distance[:4]
array([0. , 0.50020804, 1.00041453, 1.50069449])
61.5. add time, height coords#
coords = dict(time=("time", the_times),
height=("height",heights),
distance = ("time",distance))
cpr_meta = cpr_meta.assign_coords(coords=coords)
cpr_data = cpr_data.assign_coords(coords=coords)
cpr_meta.coords
Coordinates:
* time (time) object 80kB 2024-12-13T22:43:31.511866+00:00 ... 2024-12...
* height (height) float64 2kB 1.618e+04 1.608e+04 ... -5.401e+03 -5.501e+03
distance (time) float64 80kB 0.0 0.5002 1.0 ... 5.172e+03 5.173e+03
61.6. plot the radar reflectivity#
radar = cpr_data['radarReflectivityFactor'].T
radar.shape
(218, 9946)
dbZ = 10*np.log10(radar)
/Users/phil/mini310/envs/a301/lib/python3.13/site-packages/xarray/computation/apply_ufunc.py:821: RuntimeWarning: divide by zero encountered in log10
result_data = func(*input_data)
/Users/phil/mini310/envs/a301/lib/python3.13/site-packages/xarray/computation/apply_ufunc.py:821: RuntimeWarning: invalid value encountered in log10
result_data = func(*input_data)
len(distance),dbZ.shape,len(heights)
(9946, (218, 9946), 218)
dbZ.coords
Coordinates:
* time (time) object 80kB 2024-12-13T22:43:31.511866+00:00 ... 2024-12...
* height (height) float64 2kB 1.618e+04 1.608e+04 ... -5.401e+03 -5.501e+03
distance (time) float64 80kB 0.0 0.5002 1.0 ... 5.172e+03 5.173e+03
fig, ax = plt.subplots(1, 1, figsize=(12, 4))
dbZ.plot.pcolormesh(x="distance",y="height",vmin=-3,vmax=25);
ax.set_title(casenum);
61.7. plot the doppler velocity#
velocity = cpr_data['dopplerVelocity'].T
fig, ax = plt.subplots(1, 1, figsize=(12, 4))
velocity.plot.pcolormesh(ax=ax,x="distance",y="height",vmin=-3,vmax=3)
ax.set_xlabel("distance (km)")
ax.set_ylabel("height (m)")
ax.set_title(casenum);
velocity =velocity.set_xindex("distance")
61.8. Your assignment#
Pick one of the available earthcare cases, and find the closest GOES 16 image to the central time of the earthcare segment. Plot the eathcare ground track on top of the clipped goes false color image