import dask.distributed
import folium
import folium.plugins
import geopandas as gpd
import odc.ui
import shapely.geometry
import yaml
from branca.element import Figure
from IPython.display import HTML, display
from odc.algo import to_rgba
from pystac_client import Client
from odc.stac import configure_rio, stac_load
Access Sentinel 2 Data from AWS
https://registry.opendata.aws/sentinel-2-l2a-cogs/
def convert_bounds(bbox, invert_y=False):
"""
Helper method for changing bounding box representation to leaflet notation
``(lon1, lat1, lon2, lat2) -> ((lat1, lon1), (lat2, lon2))``
"""
= bbox
x1, y1, x2, y2 if invert_y:
= y2, y1
y1, y2 return ((y1, x1), (y2, x2))
= """---
cfg sentinel-s2-l2a-cogs:
assets:
'*':
data_type: uint16
nodata: 0
unit: '1'
SCL:
data_type: uint8
nodata: 0
unit: '1'
visual:
data_type: uint8
nodata: 0
unit: '1'
aliases: # Alias -> Canonical Name
red: B04
green: B03
blue: B02
"*":
warnings: ignore # Disable warnings about duplicate common names
"""
= yaml.load(cfg, Loader=yaml.SafeLoader) cfg
Start Dask Client
This step is optional, but it does improve load speed significantly. You don’t have to use Dask, as you can load data directly into memory of the notebook.
= dask.distributed.Client()
client =True, aws={"aws_unsigned": True}, client=client)
configure_rio(cloud_defaults display(client)
Find STAC Items to Load
= 1.0 / 111
km2deg = (113.887, -25.843) # Center point of a query
x, y = 100 * km2deg
r = (x - r, y - r, x + r, y + r)
bbox
= Client.open("https://earth-search.aws.element84.com/v1")
catalog
= catalog.search(
query =["sentinel-2-l2a"], datetime="2021-09-16", limit=100, bbox=bbox
collections
)
= list(query.items())
items print(f"Found: {len(items):d} datasets")
# Convert STAC items into a GeoJSON FeatureCollection
= query.item_collection_as_dict() stac_json
Review Query Result
We’ll use GeoPandas DataFrame object to make plotting easier.
= gpd.GeoDataFrame.from_features(stac_json, "epsg:4326")
gdf
# Compute granule id from components
"granule"] = (
gdf["mgrs:utm_zone"].apply(lambda x: f"{x:02d}")
gdf[+ gdf["mgrs:latitude_band"]
+ gdf["mgrs:grid_square"]
)
= gdf.plot(
fig "granule",
="black",
edgecolor=True,
categorical="equal",
aspect=0.5,
alpha=(6, 12),
figsize=True,
legend={"loc": "upper left", "frameon": False, "ncol": 1},
legend_kwds
)= fig.set_title("STAC Query Results") _
Plot STAC Items on a Map
# https://github.com/python-visualization/folium/issues/1501
= Figure(width="400px", height="500px")
fig = folium.Map()
map1
fig.add_child(map1)
folium.GeoJson(*bbox),
shapely.geometry.box(=lambda x: dict(fill=False, weight=1, opacity=0.7, color="olive"),
style_function="Query",
name
).add_to(map1)
gdf.explore("granule",
=True,
categorical=[
tooltip"granule",
"datetime",
"s2:nodata_pixel_percentage",
"eo:cloud_cover",
],=True,
popup=dict(fillOpacity=0.1, width=2),
style_kwds="STAC",
name=map1,
m
)
=convert_bounds(gdf.unary_union.bounds))
map1.fit_bounds(bounds display(fig)
Construct Dask Dataset
Note that even though there are 9 STAC Items on input, there is only one timeslice on output. This is because of groupby="solar_day"
. With that setting stac_load
will place all items that occured on the same day (as adjusted for the timezone) into one image plane.
# Since we will plot it on a map we need to use `EPSG:3857` projection
= "epsg:3857"
crs = 2**5 # overview level 5
zoom
= stac_load(
xx
items,=("red", "green", "blue"),
bands=crs,
crs=10 * zoom,
resolution={}, # <-- use Dask
chunks="solar_day",
groupby=cfg,
stac_cfg
) display(xx)
Load data and convert to RGBA
%%time
= to_rgba(xx, clamp=(1, 3000))
rgba = rgba.compute() _rgba
Display Image on a map
= folium.Map()
map2
folium.GeoJson(*bbox),
shapely.geometry.box(=lambda x: dict(fill=False, weight=1, opacity=0.7, color="olive"),
style_function="Query",
name
).add_to(map2)
gdf.explore("granule",
=True,
categorical=[
tooltip"granule",
"datetime",
"s2:nodata_pixel_percentage",
"eo:cloud_cover",
],=True,
popup=dict(fillOpacity=0.1, width=2),
style_kwds="STAC",
name=map2,
m
)
# Image bounds are specified in Lat/Lon order with Lat axis inversed
= convert_bounds(_rgba.geobox.geographic_extent.boundingbox, invert_y=True)
image_bounds = folium.raster_layers.ImageOverlay(
img_ovr =0).data, bounds=image_bounds, name="Image"
_rgba.isel(time
)
img_ovr.add_to(map2)=image_bounds)
map2.fit_bounds(bounds
folium.LayerControl().add_to(map2)
folium.plugins.Fullscreen().add_to(map2) map2
Load with bounding box
As you can see stac_load
returned all the data covered by STAC items returned from the query. This happens by default as stac_load
has no way of knowing what your query was. But it is possible to control what region is loaded. There are several mechanisms available, but probably simplest one is to use bbox=
parameter (compatible with stac_client
).
Let’s load a small region at native resolution to demonstrate.
= 6.5 * km2deg
r = (x - r, y - r, x + r, y + r)
small_bbox
= stac_load(
yy
items,=("red", "green", "blue"),
bands=crs,
crs=10,
resolution={}, # <-- use Dask
chunks="solar_day",
groupby=cfg,
stac_cfg=small_bbox,
bbox
)= to_rgba(yy, clamp=(1, 3000)).compute() im_small
= odc.ui.mk_data_uri(
img_zoomed_in =0).data, quality=80), "image/jpeg"
odc.ui.to_jpeg_data(im_small.isel(time
)print(f"Image url: {img_zoomed_in[:64]}...")
HTML(=f"""
data<style> .img-two-column{{
width: 50%;
float: left;
}}</style>
<img src="{img_zoomed_in}" alt="Sentinel-2 Zoom in" class="img-two-column">
<img src="{img_ovr.url}" alt="Sentinel-2 Mosaic" class="img-two-column">
"""
)
# Test change