You can run this notebook in a live session Binder or view it on Github.

Calculating Seasonal Averages from Time Series of Monthly Means

Author: Joe Hamman

The data used for this example can be found in the xarray-data repository. You may need to change the path to rasm.nc below.

Suppose we have a netCDF or xarray.Dataset of monthly mean data and we want to calculate the seasonal average. To do this properly, we need to calculate the weighted average considering that each month has a different number of days.

[1]:
%matplotlib inline
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt

Open the Dataset

[2]:
ds = xr.tutorial.open_dataset("rasm").load()
ds
---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
File /usr/lib/python3/dist-packages/urllib3/connection.py:174, in HTTPConnection._new_conn(self)
    173 try:
--> 174     conn = connection.create_connection(
    175         (self._dns_host, self.port), self.timeout, **extra_kw
    176     )
    178 except SocketTimeout:

File /usr/lib/python3/dist-packages/urllib3/util/connection.py:73, in create_connection(address, timeout, source_address, socket_options)
     69     return six.raise_from(
     70         LocationParseError("'%s', label empty or too long" % host), None
     71     )
---> 73 for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
     74     af, socktype, proto, canonname, sa = res

File /usr/lib/python3.12/socket.py:964, in getaddrinfo(host, port, family, type, proto, flags)
    963 addrlist = []
--> 964 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    965     af, socktype, proto, canonname, sa = res

gaierror: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:

NewConnectionError                        Traceback (most recent call last)
File /usr/lib/python3/dist-packages/urllib3/connectionpool.py:716, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    715 # Make the request on the httplib connection object.
--> 716 httplib_response = self._make_request(
    717     conn,
    718     method,
    719     url,
    720     timeout=timeout_obj,
    721     body=body,
    722     headers=headers,
    723     chunked=chunked,
    724 )
    726 # If we're going to release the connection in ``finally:``, then
    727 # the response doesn't need to know about the connection. Otherwise
    728 # it will also try to release it and we'll have a double-release
    729 # mess.

File /usr/lib/python3/dist-packages/urllib3/connectionpool.py:405, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    404 try:
--> 405     self._validate_conn(conn)
    406 except (SocketTimeout, BaseSSLError) as e:
    407     # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.

File /usr/lib/python3/dist-packages/urllib3/connectionpool.py:1059, in HTTPSConnectionPool._validate_conn(self, conn)
   1058 if not getattr(conn, "sock", None):  # AppEngine might not have  `.sock`
-> 1059     conn.connect()
   1061 if not conn.is_verified:

File /usr/lib/python3/dist-packages/urllib3/connection.py:363, in HTTPSConnection.connect(self)
    361 def connect(self):
    362     # Add certificate verification
--> 363     self.sock = conn = self._new_conn()
    364     hostname = self.host

File /usr/lib/python3/dist-packages/urllib3/connection.py:186, in HTTPConnection._new_conn(self)
    185 except SocketError as e:
--> 186     raise NewConnectionError(
    187         self, "Failed to establish a new connection: %s" % e
    188     )
    190 return conn

NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f3fa95feb70>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:

MaxRetryError                             Traceback (most recent call last)
File /usr/lib/python3/dist-packages/requests/adapters.py:667, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    666 try:
--> 667     resp = conn.urlopen(
    668         method=request.method,
    669         url=url,
    670         body=request.body,
    671         headers=request.headers,
    672         redirect=False,
    673         assert_same_host=False,
    674         preload_content=False,
    675         decode_content=False,
    676         retries=self.max_retries,
    677         timeout=timeout,
    678         chunked=chunked,
    679     )
    681 except (ProtocolError, OSError) as err:

File /usr/lib/python3/dist-packages/urllib3/connectionpool.py:800, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    798     e = ProtocolError("Connection aborted.", e)
--> 800 retries = retries.increment(
    801     method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
    802 )
    803 retries.sleep()

File /usr/lib/python3/dist-packages/urllib3/util/retry.py:592, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
    591 if new_retry.is_exhausted():
--> 592     raise MaxRetryError(_pool, url, error or ResponseError(cause))
    594 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)

MaxRetryError: HTTPSConnectionPool(host='github.com', port=443): Max retries exceeded with url: /pydata/xarray-data/raw/master/rasm.nc (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f3fa95feb70>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
Cell In[2], line 1
----> 1 ds = xr.tutorial.open_dataset("rasm").load()
      2 ds

File /usr/lib/python3/dist-packages/xarray/tutorial.py:161, in open_dataset(name, cache, cache_dir, engine, **kws)
    158     url = f"{base_url}/raw/{version}/{path.name}"
    160 # retrieve the file
--> 161 filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir)
    162 ds = _open_dataset(filepath, engine=engine, **kws)
    163 if not cache:

File /usr/lib/python3/dist-packages/pooch/core.py:239, in retrieve(url, known_hash, fname, path, processor, downloader, progressbar)
    236 if downloader is None:
    237     downloader = choose_downloader(url, progressbar=progressbar)
--> 239 stream_download(url, full_path, known_hash, downloader, pooch=None)
    241 if known_hash is None:
    242     get_logger().info(
    243         "SHA256 hash of downloaded file: %s\n"
    244         "Use this value as the 'known_hash' argument of 'pooch.retrieve'"
   (...)
    247         file_hash(str(full_path)),
    248     )

File /usr/lib/python3/dist-packages/pooch/core.py:807, in stream_download(url, fname, known_hash, downloader, pooch, retry_if_failed)
    803 try:
    804     # Stream the file to a temporary so that we can safely check its
    805     # hash before overwriting the original.
    806     with temporary_file(path=str(fname.parent)) as tmp:
--> 807         downloader(url, tmp, pooch)
    808         hash_matches(tmp, known_hash, strict=True, source=str(fname.name))
    809         shutil.move(tmp, str(fname))

File /usr/lib/python3/dist-packages/pooch/downloaders.py:208, in HTTPDownloader.__call__(self, url, output_file, pooch, check_only)
    206     output_file = open(output_file, "w+b")
    207 try:
--> 208     response = requests.get(url, **kwargs)
    209     response.raise_for_status()
    210     content = response.iter_content(chunk_size=self.chunk_size)

File /usr/lib/python3/dist-packages/requests/api.py:73, in get(url, params, **kwargs)
     62 def get(url, params=None, **kwargs):
     63     r"""Sends a GET request.
     64
     65     :param url: URL for the new :class:`Request` object.
   (...)
     70     :rtype: requests.Response
     71     """
---> 73     return request("get", url, params=params, **kwargs)

File /usr/lib/python3/dist-packages/requests/api.py:59, in request(method, url, **kwargs)
     55 # By using the 'with' statement we are sure the session is closed, thus we
     56 # avoid leaving sockets open which can trigger a ResourceWarning in some
     57 # cases, and look like a memory leak in others.
     58 with sessions.Session() as session:
---> 59     return session.request(method=method, url=url, **kwargs)

File /usr/lib/python3/dist-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    584 send_kwargs = {
    585     "timeout": timeout,
    586     "allow_redirects": allow_redirects,
    587 }
    588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
    591 return resp

File /usr/lib/python3/dist-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
    700 start = preferred_clock()
    702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
    705 # Total elapsed time of the request (approximately)
    706 elapsed = preferred_clock() - start

File /usr/lib/python3/dist-packages/requests/adapters.py:700, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    696     if isinstance(e.reason, _SSLError):
    697         # This branch is for urllib3 v1.22 and later.
    698         raise SSLError(e, request=request)
--> 700     raise ConnectionError(e, request=request)
    702 except ClosedPoolError as e:
    703     raise ConnectionError(e, request=request)

ConnectionError: HTTPSConnectionPool(host='github.com', port=443): Max retries exceeded with url: /pydata/xarray-data/raw/master/rasm.nc (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f3fa95feb70>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))

Now for the heavy lifting:

We first have to come up with the weights, - calculate the month length for each monthly data record - calculate weights using groupby('time.season')

Finally, we just need to multiply our weights by the Dataset and sum along the time dimension. Creating a DataArray for the month length is as easy as using the days_in_month accessor on the time coordinate. The calendar type, in this case 'noleap', is automatically considered in this operation.

[3]:
month_length = ds.time.dt.days_in_month
month_length
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[3], line 1
----> 1 month_length = ds.time.dt.days_in_month
      2 month_length

NameError: name 'ds' is not defined
[4]:
# Calculate the weights by grouping by 'time.season'.
weights = (
    month_length.groupby("time.season") / month_length.groupby("time.season").sum()
)

# Test that the sum of the weights for each season is 1.0
np.testing.assert_allclose(weights.groupby("time.season").sum().values, np.ones(4))

# Calculate the weighted average
ds_weighted = (ds * weights).groupby("time.season").sum(dim="time")
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[4], line 3
      1 # Calculate the weights by grouping by 'time.season'.
      2 weights = (
----> 3     month_length.groupby("time.season") / month_length.groupby("time.season").sum()
      4 )
      6 # Test that the sum of the weights for each season is 1.0
      7 np.testing.assert_allclose(weights.groupby("time.season").sum().values, np.ones(4))

NameError: name 'month_length' is not defined
[5]:
ds_weighted
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[5], line 1
----> 1 ds_weighted

NameError: name 'ds_weighted' is not defined
[6]:
# only used for comparisons
ds_unweighted = ds.groupby("time.season").mean("time")
ds_diff = ds_weighted - ds_unweighted
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[6], line 2
      1 # only used for comparisons
----> 2 ds_unweighted = ds.groupby("time.season").mean("time")
      3 ds_diff = ds_weighted - ds_unweighted

NameError: name 'ds' is not defined
[7]:
# Quick plot to show the results
notnull = pd.notnull(ds_unweighted["Tair"][0])

fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(14, 12))
for i, season in enumerate(("DJF", "MAM", "JJA", "SON")):
    ds_weighted["Tair"].sel(season=season).where(notnull).plot.pcolormesh(
        ax=axes[i, 0],
        vmin=-30,
        vmax=30,
        cmap="Spectral_r",
        add_colorbar=True,
        extend="both",
    )

    ds_unweighted["Tair"].sel(season=season).where(notnull).plot.pcolormesh(
        ax=axes[i, 1],
        vmin=-30,
        vmax=30,
        cmap="Spectral_r",
        add_colorbar=True,
        extend="both",
    )

    ds_diff["Tair"].sel(season=season).where(notnull).plot.pcolormesh(
        ax=axes[i, 2],
        vmin=-0.1,
        vmax=0.1,
        cmap="RdBu_r",
        add_colorbar=True,
        extend="both",
    )

    axes[i, 0].set_ylabel(season)
    axes[i, 1].set_ylabel("")
    axes[i, 2].set_ylabel("")

for ax in axes.flat:
    ax.axes.get_xaxis().set_ticklabels([])
    ax.axes.get_yaxis().set_ticklabels([])
    ax.axes.axis("tight")
    ax.set_xlabel("")

axes[0, 0].set_title("Weighted by DPM")
axes[0, 1].set_title("Equal Weighting")
axes[0, 2].set_title("Difference")

plt.tight_layout()

fig.suptitle("Seasonal Surface Air Temperature", fontsize=16, y=1.02)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[7], line 2
      1 # Quick plot to show the results
----> 2 notnull = pd.notnull(ds_unweighted["Tair"][0])
      4 fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(14, 12))
      5 for i, season in enumerate(("DJF", "MAM", "JJA", "SON")):

NameError: name 'ds_unweighted' is not defined
[8]:
# Wrap it into a simple function
def season_mean(ds, calendar="standard"):
    # Make a DataArray with the number of days in each month, size = len(time)
    month_length = ds.time.dt.days_in_month

    # Calculate the weights by grouping by 'time.season'
    weights = (
        month_length.groupby("time.season") / month_length.groupby("time.season").sum()
    )

    # Test that the sum of the weights for each season is 1.0
    np.testing.assert_allclose(weights.groupby("time.season").sum().values, np.ones(4))

    # Calculate the weighted average
    return (ds * weights).groupby("time.season").sum(dim="time")