Note
This page was generated from examples/notebooks/catalog.ipynb.
Catalog#
[1]:
%matplotlib widget
import starepandas
import pandas
import geopandas
import matplotlib.pyplot as plt
import sqlalchemy
import shapely
import pystare
import numpy
Loading countries#
We load the countries of the world and create STARE index values representing their cover.
[2]:
path = geopandas.datasets.get_path('naturalearth_lowres')
world = geopandas.read_file(path)
n_america = world[world.continent=='North America']
stare = starepandas.sids_from_gdf(n_america, level=8, force_ccw=True)
n_america = starepandas.STAREDataFrame(n_america, sids=stare)
# The trixels are just for visualization
trixels = n_america.make_trixels()
n_america.set_trixels(trixels, inplace=True)
n_america.head()
[2]:
| pop_est | continent | name | iso_a3 | gdp_md_est | geometry | sids | trixels | |
|---|---|---|---|---|---|---|---|---|
| 3 | 37589262.0 | North America | Canada | CAN | 1736425 | MULTIPOLYGON (((-122.84000 49.00000, -122.9742... | [3035426148847714308, 3062447746611937284, 306... | MULTIPOLYGON (((-75.36119 57.63159, -70.81323 ... |
| 4 | 328239523.0 | North America | United States of America | USA | 21433226 | MULTIPOLYGON (((-122.84000 49.00000, -120.0000... | [3332663724254167043, 3071454945866678276, 307... | MULTIPOLYGON (((-90.00000 35.26439, -101.10559... |
| 16 | 11263077.0 | North America | Haiti | HTI | 14332 | POLYGON ((-71.71236 19.71446, -71.62487 19.169... | [3100869080932941831, 3130283215999205383, 310... | MULTIPOLYGON (((-72.49081 19.13394, -72.09835 ... |
| 17 | 10738958.0 | North America | Dominican Rep. | DOM | 88941 | POLYGON ((-71.70830 18.04500, -71.68774 18.316... | [3130072109766672391, 3130107294138761223, 313... | MULTIPOLYGON (((-70.94437 18.60746, -70.17478 ... |
| 19 | 389482.0 | North America | Bahamas | BHS | 13578 | MULTIPOLYGON (((-78.98000 26.79000, -78.51000 ... | [3126113867906678792, 3128875841115652104, 312... | MULTIPOLYGON (((-78.19386 26.86535, -77.77602 ... |
[3]:
fig, ax = plt.subplots(dpi=100, figsize=(14,9))
ax.grid(True)
n_america.plot(ax=ax, trixels=True, boundary=True, column='name', zorder=0, linewidth=0.2)
n_america.plot(ax=ax, trixels=False, facecolor="none", edgecolor='black', zorder=1, linewidth=0.5)
santa_barbara_sid = pystare.from_lonlat([-119.81100397568609], [34.44687326105255], level=5)
santa_barbara = starepandas.STAREDataFrame(sids=santa_barbara_sid).add_trixels().plot(ax=ax, color='red')
fig.tight_layout()
#plt.savefig('n_america.png')
We are creating a catalogue dataframe#
folder2catalogue() scans the path for granules, reads stare cover from the sidecar file as well as the timestamps from the metadata and creatse a catalogue dataframe
[4]:
folder = '../tests/data/catalog/'
catalog = starepandas.folder2catalog(path=folder,
granule_extension='hdf',
add_sf=True)
catalog.add_trixels(inplace=True)
Now we use the catalog to find all granules that intersect our ROI#
[5]:
fig, ax = plt.subplots(dpi=300, figsize=(5,5))
ax.grid(True)
country = n_america[n_america.name=='Greenland']
country.plot(ax=ax)
catalog.plot(ax=ax, color='r')
[5]:
<Axes: >
We can do this with geopandas SF based intersects method#
[6]:
roi_wkt = country.geometry.iloc[0]
cover_intersects = catalog.intersects(roi_wkt)
granule_subset = catalog[cover_intersects].granule_path
granule_subset.head()
[6]:
0 ../tests/data/catalog/MOD05_L2.A2019336.0000.0...
Name: granule_path, dtype: object
We can do this with stare_intersects#
[7]:
roi_stare = n_america[n_america.name=='Greenland']['sids'].iloc[0]
cover_intersects = catalog.stare_intersects(roi_stare)
granule_subset = catalog[cover_intersects].granule_path
msg = 'there are {} granules intersecting the ROI'.format(len(granule_subset))
print(msg)
granule_subset.head()
there are 1 granules intersecting the ROI
[7]:
0 ../tests/data/catalog/MOD05_L2.A2019336.0000.0...
Name: granule_path, dtype: object
Finally, we extract the data from the pre-selected granules that intersects with our ROI#
[8]:
df = pandas.DataFrame()
for granule in granule_subset:
g = starepandas.read_granule(granule, sidecar=True)
intersects = g.stare_intersects(roi_stare)
msg = '{granule} has {n} intersecting points'.format(granule=granule,
n=sum(intersects))
print(msg)
df = pandas.concat([pandas.DataFrame(g)[intersects]])
../tests/data/catalog/MOD05_L2.A2019336.0000.061.2019336211522.hdf has 24686 intersecting points
[9]:
df.head()
[9]:
| sids | Scan_Start_Time | Solar_Zenith | Solar_Azimuth | Sensor_Zenith | Sensor_Azimuth | Water_Vapor_Infrared | |
|---|---|---|---|---|---|---|---|
| 56082 | 3630363253602952779 | 8.493986e+08 | 132.619997 | -54.259999 | 25.859999 | 69.119998 | 0.369 |
| 56083 | 3630363554835632171 | 8.493986e+08 | 132.589997 | -54.389999 | 26.319999 | 69.019998 | 0.358 |
| 56084 | 3630387764322774539 | 8.493986e+08 | 132.559997 | -54.519999 | 26.779999 | 68.919998 | 0.340 |
| 56350 | 3630359803401928043 | 8.493986e+08 | 132.649997 | -53.999999 | 24.939999 | 68.229998 | 0.374 |
| 56351 | 3630364160514931659 | 8.493986e+08 | 132.619997 | -54.129999 | 25.399999 | 68.139998 | 0.357 |
Write to SQL#
We can write the catalogue dataframe to an sql (e.g. sqlite) database. From here, we could use STARELite to query the catalogue, or load it back into a STAREDataframe if we need to
[10]:
catalog
[10]:
| granule_path | sidecar_path | stare_cover | begining | ending | geom | trixels | |
|---|---|---|---|---|---|---|---|
| 0 | ../tests/data/catalog/MOD05_L2.A2019336.0000.0... | ../tests/data/catalog/MOD05_L2.A2019336.0000.0... | [3604075970547417096, 3604089164686950409, 360... | 2019-12-02 00:00:00 | 2019-12-02 00:05:00 | POLYGON ((-15.93400 53.20178, -15.93275 53.292... | MULTIPOLYGON (((-45.00000 61.32495, -37.69296 ... |
| 1 | ../tests/data/catalog/MOD05_L2.A2005349.2125.0... | ../tests/data/catalog/MOD05_L2.A2005349.2125.0... | [864691128455135238, 864867050315579401, 86486... | 2005-12-15 21:25:00 | 2005-12-15 21:30:00 | POLYGON ((-171.02278 37.52311, -171.02644 37.4... | MULTIPOLYGON (((-160.28950 23.13179, -165.1269... |
| 2 | ../tests/data/catalog/MOD05_L2.A2020254.1320.0... | ../tests/data/catalog/MOD05_L2.A2020254.1320.0... | [2461217196357976072, 2461228191474253833, 246... | 2020-09-10 13:20:00 | 2020-09-10 13:25:00 | POLYGON ((-49.78107 29.28507, -49.78662 29.195... | MULTIPOLYGON (((-36.58975 18.83358, -40.88022 ... |
[12]:
db_path = '' # Empty for in-memory
uri = 'sqlite:///{db_path}'.format(db_path=db_path)
engine = sqlalchemy.create_engine(uri)
catalog_ewkb = geopandas.io.sql._convert_to_ewkb(catalog, 'geom', 4326)
catalog_ewkb[['granule_path', 'sidecar_path', 'stare_cover', 'begining', 'ending', 'geom']].to_sql(name='catalog', con=engine, if_exists='replace', index=False)
[12]:
3
Load from SQL#
[13]:
cover_intersects = pandas.read_sql(sql='catalog', con=engine)
# Geometry and STARE are stored in blobs, so we need to deserialize
cover_intersects.stare_cover = cover_intersects.stare_cover.apply(func=numpy.frombuffer, args=('int64',))
cover_intersects.geom = cover_intersects.geom.apply(shapely.wkb.loads, args=(True,))
# We convert the conventional DF to a STAREDataFrame and set the stare column
cover_intersects = starepandas.STAREDataFrame(cover_intersects, sids='stare_cover')
[ ]: