PAT = "your_personal_access_token"

# e.g. PAT="edh_pat_44bbb7e9192a4c6bb47ddf07d07564eee5d17de8dfc48f7118f88e3bc4a4157f8fe2403f5aa0a2d53441b6922ea9a33a"

import xarray as xr

ds = xr.open_dataset(
    f"https://edh:{PAT}@data.earthdatahub.destine.eu/derived-GPWv4-Histsoc/demographics-hybrid-1950-2020-15-min-v0",
    storage_options={"client_kwargs":{"trust_env":True}},
    chunks={},
    engine="zarr",
)
ds

<xarray.Dataset> Size: 4GB
Dimensions:               (age_band_lower_bound: 14, latitude: 720,
                           longitude: 1440, year: 71)
Coordinates:
  * age_band_lower_bound  (age_band_lower_bound) int64 112B 0 5 10 ... 55 60 65
  * latitude              (latitude) float64 6kB 90.0 89.75 ... -89.5 -89.75
  * longitude             (longitude) float64 12kB 0.0 0.25 0.5 ... 359.5 359.8
  * year                  (year) int64 568B 1950 1951 1952 ... 2018 2019 2020
Data variables:
    demographic_totals    (latitude, longitude, age_band_lower_bound, year) float32 4GB dask.array<chunksize=(64, 64, 14, 71), meta=np.ndarray>

array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65])

array([ 90.  ,  89.75,  89.5 , ..., -89.25, -89.5 , -89.75])

array([0.0000e+00, 2.5000e-01, 5.0000e-01, ..., 3.5925e+02, 3.5950e+02,
       3.5975e+02])

array([1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961,
       1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973,
       1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985,
       1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
       1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
       2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020])

PandasIndex(Index([0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65], dtype='int64', name='age_band_lower_bound'))

PandasIndex(Index([  90.0,  89.75,   89.5,  89.25,   89.0,  88.75,   88.5,  88.25,   88.0,
        87.75,
       ...
        -87.5, -87.75,  -88.0, -88.25,  -88.5, -88.75,  -89.0, -89.25,  -89.5,
       -89.75],
      dtype='float64', name='latitude', length=720))

PandasIndex(Index([   0.0,   0.25,    0.5,   0.75,    1.0,   1.25,    1.5,   1.75,    2.0,
         2.25,
       ...
        357.5, 357.75,  358.0, 358.25,  358.5, 358.75,  359.0, 359.25,  359.5,
       359.75],
      dtype='float64', name='longitude', length=1440))

PandasIndex(Index([1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961,
       1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973,
       1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985,
       1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
       1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
       2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],
      dtype='int64', name='year'))

ds.demographic_totals.sel(age_band_lower_bound=0, year=2020).plot(vmax=50_000, cmap="YlGnBu_r")

<matplotlib.collections.QuadMesh at 0x7f745ec685d0>

xr.set_options(keep_attrs=True)

ds = ds.assign_coords(longitude=(((ds.longitude + 180) % 360) - 180))
ds = ds.roll(longitude=int(len(ds.longitude) / 2), roll_coords=True)
ds

<xarray.Dataset> Size: 4GB
Dimensions:               (age_band_lower_bound: 14, latitude: 720,
                           longitude: 1440, year: 71)
Coordinates:
  * age_band_lower_bound  (age_band_lower_bound) int64 112B 0 5 10 ... 55 60 65
  * latitude              (latitude) float64 6kB 90.0 89.75 ... -89.5 -89.75
  * year                  (year) int64 568B 1950 1951 1952 ... 2018 2019 2020
  * longitude             (longitude) float64 12kB -180.0 -179.8 ... 179.5 179.8
Data variables:
    demographic_totals    (latitude, longitude, age_band_lower_bound, year) float32 4GB dask.array<chunksize=(64, 64, 14, 71), meta=np.ndarray>

array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65])

array([ 90.  ,  89.75,  89.5 , ..., -89.25, -89.5 , -89.75])

array([1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961,
       1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973,
       1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985,
       1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
       1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
       2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020])

array([-180.  , -179.75, -179.5 , ...,  179.25,  179.5 ,  179.75])

PandasIndex(Index([0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65], dtype='int64', name='age_band_lower_bound'))

PandasIndex(Index([  90.0,  89.75,   89.5,  89.25,   89.0,  88.75,   88.5,  88.25,   88.0,
        87.75,
       ...
        -87.5, -87.75,  -88.0, -88.25,  -88.5, -88.75,  -89.0, -89.25,  -89.5,
       -89.75],
      dtype='float64', name='latitude', length=720))

PandasIndex(Index([1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961,
       1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973,
       1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985,
       1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
       1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
       2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],
      dtype='int64', name='year'))

PandasIndex(Index([ -180.0, -179.75,  -179.5, -179.25,  -179.0, -178.75,  -178.5, -178.25,
        -178.0, -177.75,
       ...
         177.5,  177.75,   178.0,  178.25,   178.5,  178.75,   179.0,  179.25,
         179.5,  179.75],
      dtype='float64', name='longitude', length=1440))

ds.demographic_totals.sel(age_band_lower_bound=0, year=2020).plot(vmax=50_000, cmap="YlGnBu_r")

<matplotlib.collections.QuadMesh at 0x7f745c1cb350>

europe_population = ds.demographic_totals.sel(latitude=slice(70, 35), longitude=slice(-10,41))

europe_population_2020 = europe_population.sel(year = 2020)
europe_population_2020

<xarray.DataArray 'demographic_totals' (latitude: 141, longitude: 205,
                                        age_band_lower_bound: 14)> Size: 2MB
dask.array<getitem, shape=(141, 205, 14), dtype=float32, chunksize=(64, 64, 14), chunktype=numpy.ndarray>
Coordinates:
  * age_band_lower_bound  (age_band_lower_bound) int64 112B 0 5 10 ... 55 60 65
  * latitude              (latitude) float64 1kB 70.0 69.75 69.5 ... 35.25 35.0
    year                  int64 8B 2020
  * longitude             (longitude) float64 2kB -10.0 -9.75 ... 40.75 41.0

array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65])

array([70.  , 69.75, 69.5 , 69.25, 69.  , 68.75, 68.5 , 68.25, 68.  , 67.75,
       67.5 , 67.25, 67.  , 66.75, 66.5 , 66.25, 66.  , 65.75, 65.5 , 65.25,
       65.  , 64.75, 64.5 , 64.25, 64.  , 63.75, 63.5 , 63.25, 63.  , 62.75,
       62.5 , 62.25, 62.  , 61.75, 61.5 , 61.25, 61.  , 60.75, 60.5 , 60.25,
       60.  , 59.75, 59.5 , 59.25, 59.  , 58.75, 58.5 , 58.25, 58.  , 57.75,
       57.5 , 57.25, 57.  , 56.75, 56.5 , 56.25, 56.  , 55.75, 55.5 , 55.25,
       55.  , 54.75, 54.5 , 54.25, 54.  , 53.75, 53.5 , 53.25, 53.  , 52.75,
       52.5 , 52.25, 52.  , 51.75, 51.5 , 51.25, 51.  , 50.75, 50.5 , 50.25,
       50.  , 49.75, 49.5 , 49.25, 49.  , 48.75, 48.5 , 48.25, 48.  , 47.75,
       47.5 , 47.25, 47.  , 46.75, 46.5 , 46.25, 46.  , 45.75, 45.5 , 45.25,
       45.  , 44.75, 44.5 , 44.25, 44.  , 43.75, 43.5 , 43.25, 43.  , 42.75,
       42.5 , 42.25, 42.  , 41.75, 41.5 , 41.25, 41.  , 40.75, 40.5 , 40.25,
       40.  , 39.75, 39.5 , 39.25, 39.  , 38.75, 38.5 , 38.25, 38.  , 37.75,
       37.5 , 37.25, 37.  , 36.75, 36.5 , 36.25, 36.  , 35.75, 35.5 , 35.25,
       35.  ])

array(2020)

array([-10.  ,  -9.75,  -9.5 , ...,  40.5 ,  40.75,  41.  ])

PandasIndex(Index([0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65], dtype='int64', name='age_band_lower_bound'))

PandasIndex(Index([ 70.0, 69.75,  69.5, 69.25,  69.0, 68.75,  68.5, 68.25,  68.0, 67.75,
       ...
       37.25,  37.0, 36.75,  36.5, 36.25,  36.0, 35.75,  35.5, 35.25,  35.0],
      dtype='float64', name='latitude', length=141))

PandasIndex(Index([-10.0, -9.75,  -9.5, -9.25,  -9.0, -8.75,  -8.5, -8.25,  -8.0, -7.75,
       ...
       38.75,  39.0, 39.25,  39.5, 39.75,  40.0, 40.25,  40.5, 40.75,  41.0],
      dtype='float64', name='longitude', length=205))

import costing

costing.estimate_download_size(ds.demographic_totals, europe_population_2020)

estimated_needed_chunks: 12
estimated_memory_size: 0.195 GB
estimated_download_size: 0.02 GB

europe_total_population_2020 = europe_population_2020.sum("age_band_lower_bound")
europe_total_population_2020

<xarray.DataArray 'demographic_totals' (latitude: 141, longitude: 205)> Size: 116kB
dask.array<sum-aggregate, shape=(141, 205), dtype=float32, chunksize=(64, 64), chunktype=numpy.ndarray>
Coordinates:
  * latitude   (latitude) float64 1kB 70.0 69.75 69.5 69.25 ... 35.5 35.25 35.0
    year       int64 8B 2020
  * longitude  (longitude) float64 2kB -10.0 -9.75 -9.5 ... 40.5 40.75 41.0

array([70.  , 69.75, 69.5 , 69.25, 69.  , 68.75, 68.5 , 68.25, 68.  , 67.75,
       67.5 , 67.25, 67.  , 66.75, 66.5 , 66.25, 66.  , 65.75, 65.5 , 65.25,
       65.  , 64.75, 64.5 , 64.25, 64.  , 63.75, 63.5 , 63.25, 63.  , 62.75,
       62.5 , 62.25, 62.  , 61.75, 61.5 , 61.25, 61.  , 60.75, 60.5 , 60.25,
       60.  , 59.75, 59.5 , 59.25, 59.  , 58.75, 58.5 , 58.25, 58.  , 57.75,
       57.5 , 57.25, 57.  , 56.75, 56.5 , 56.25, 56.  , 55.75, 55.5 , 55.25,
       55.  , 54.75, 54.5 , 54.25, 54.  , 53.75, 53.5 , 53.25, 53.  , 52.75,
       52.5 , 52.25, 52.  , 51.75, 51.5 , 51.25, 51.  , 50.75, 50.5 , 50.25,
       50.  , 49.75, 49.5 , 49.25, 49.  , 48.75, 48.5 , 48.25, 48.  , 47.75,
       47.5 , 47.25, 47.  , 46.75, 46.5 , 46.25, 46.  , 45.75, 45.5 , 45.25,
       45.  , 44.75, 44.5 , 44.25, 44.  , 43.75, 43.5 , 43.25, 43.  , 42.75,
       42.5 , 42.25, 42.  , 41.75, 41.5 , 41.25, 41.  , 40.75, 40.5 , 40.25,
       40.  , 39.75, 39.5 , 39.25, 39.  , 38.75, 38.5 , 38.25, 38.  , 37.75,
       37.5 , 37.25, 37.  , 36.75, 36.5 , 36.25, 36.  , 35.75, 35.5 , 35.25,
       35.  ])

array(2020)

array([-10.  ,  -9.75,  -9.5 , ...,  40.5 ,  40.75,  41.  ])

PandasIndex(Index([ 70.0, 69.75,  69.5, 69.25,  69.0, 68.75,  68.5, 68.25,  68.0, 67.75,
       ...
       37.25,  37.0, 36.75,  36.5, 36.25,  36.0, 35.75,  35.5, 35.25,  35.0],
      dtype='float64', name='latitude', length=141))

PandasIndex(Index([-10.0, -9.75,  -9.5, -9.25,  -9.0, -8.75,  -8.5, -8.25,  -8.0, -7.75,
       ...
       38.75,  39.0, 39.25,  39.5, 39.75,  40.0, 40.25,  40.5, 40.75,  41.0],
      dtype='float64', name='longitude', length=205))

%%time

europe_total_population_2020_computed = europe_total_population_2020.compute();

CPU times: user 839 ms, sys: 184 ms, total: 1.02 s
Wall time: 875 ms

europe_total_population_2020_computed.plot(vmax=300_000, cmap="YlGnBu_r")

<matplotlib.collections.QuadMesh at 0x7f7447fda2d0>

How to work with Hybrid gridded demographic data for the world, 1950-2020, on Earth Data Hub¶

Goal of this tutorial¶

What you will learn:¶

Working with EDH data¶

1. Data access¶

2. Data selection¶

3. Data reduction¶

4. Data downalod¶

5. Visualization¶