Note

This page was generated from examples/notebooks/catalog.ipynb.

Catalog#

[1]:
%matplotlib widget
import starepandas
import pandas
import geopandas
import matplotlib.pyplot as plt
import sqlalchemy
import shapely
import pystare
import numpy

Loading countries#

We load the countries of the world and create STARE index values representing their cover.

[2]:
path = geopandas.datasets.get_path('naturalearth_lowres')
world = geopandas.read_file(path)
n_america = world[world.continent=='North America']

stare = starepandas.sids_from_gdf(n_america, level=8, force_ccw=True)
n_america = starepandas.STAREDataFrame(n_america, sids=stare)

# The trixels are just for visualization
trixels = n_america.make_trixels()
n_america.set_trixels(trixels, inplace=True)
n_america.head()
[2]:
pop_est continent name iso_a3 gdp_md_est geometry sids trixels
3 37589262.0 North America Canada CAN 1736425 MULTIPOLYGON (((-122.84000 49.00000, -122.9742... [3035426148847714308, 3062447746611937284, 306... MULTIPOLYGON (((-75.36119 57.63159, -70.81323 ...
4 328239523.0 North America United States of America USA 21433226 MULTIPOLYGON (((-122.84000 49.00000, -120.0000... [3332663724254167043, 3071454945866678276, 307... MULTIPOLYGON (((-90.00000 35.26439, -101.10559...
16 11263077.0 North America Haiti HTI 14332 POLYGON ((-71.71236 19.71446, -71.62487 19.169... [3100869080932941831, 3130283215999205383, 310... MULTIPOLYGON (((-72.49081 19.13394, -72.09835 ...
17 10738958.0 North America Dominican Rep. DOM 88941 POLYGON ((-71.70830 18.04500, -71.68774 18.316... [3130072109766672391, 3130107294138761223, 313... MULTIPOLYGON (((-70.94437 18.60746, -70.17478 ...
19 389482.0 North America Bahamas BHS 13578 MULTIPOLYGON (((-78.98000 26.79000, -78.51000 ... [3126113867906678792, 3128875841115652104, 312... MULTIPOLYGON (((-78.19386 26.86535, -77.77602 ...
[3]:
fig, ax = plt.subplots(dpi=100, figsize=(14,9))
ax.grid(True)

n_america.plot(ax=ax, trixels=True, boundary=True, column='name', zorder=0, linewidth=0.2)
n_america.plot(ax=ax, trixels=False, facecolor="none", edgecolor='black', zorder=1, linewidth=0.5)
santa_barbara_sid = pystare.from_lonlat([-119.81100397568609], [34.44687326105255], level=5)
santa_barbara = starepandas.STAREDataFrame(sids=santa_barbara_sid).add_trixels().plot(ax=ax, color='red')

fig.tight_layout()

#plt.savefig('n_america.png')

We are creating a catalogue dataframe#

folder2catalogue() scans the path for granules, reads stare cover from the sidecar file as well as the timestamps from the metadata and creatse a catalogue dataframe

[4]:
folder = '../tests/data/catalog/'
catalog = starepandas.folder2catalog(path=folder,
                                     granule_extension='hdf',
                                     add_sf=True)
catalog.add_trixels(inplace=True)

Now we use the catalog to find all granules that intersect our ROI#

[5]:
fig, ax = plt.subplots(dpi=300, figsize=(5,5))
ax.grid(True)

country = n_america[n_america.name=='Greenland']
country.plot(ax=ax)
catalog.plot(ax=ax, color='r')
[5]:
<Axes: >

We can do this with geopandas SF based intersects method#

[6]:
roi_wkt = country.geometry.iloc[0]

cover_intersects = catalog.intersects(roi_wkt)
granule_subset = catalog[cover_intersects].granule_path
granule_subset.head()
[6]:
0    ../tests/data/catalog/MOD05_L2.A2019336.0000.0...
Name: granule_path, dtype: object

We can do this with stare_intersects#

[7]:
roi_stare = n_america[n_america.name=='Greenland']['sids'].iloc[0]

cover_intersects = catalog.stare_intersects(roi_stare)
granule_subset = catalog[cover_intersects].granule_path

msg = 'there are {} granules intersecting the ROI'.format(len(granule_subset))
print(msg)
granule_subset.head()
there are 1 granules intersecting the ROI
[7]:
0    ../tests/data/catalog/MOD05_L2.A2019336.0000.0...
Name: granule_path, dtype: object

Finally, we extract the data from the pre-selected granules that intersects with our ROI#

[8]:
df = pandas.DataFrame()
for granule in granule_subset:
    g = starepandas.read_granule(granule, sidecar=True)

    intersects = g.stare_intersects(roi_stare)
    msg = '{granule} has {n} intersecting points'.format(granule=granule,
                                                        n=sum(intersects))

    print(msg)
    df = pandas.concat([pandas.DataFrame(g)[intersects]])
../tests/data/catalog/MOD05_L2.A2019336.0000.061.2019336211522.hdf has 24686 intersecting points
[9]:
df.head()
[9]:
sids Scan_Start_Time Solar_Zenith Solar_Azimuth Sensor_Zenith Sensor_Azimuth Water_Vapor_Infrared
56082 3630363253602952779 8.493986e+08 132.619997 -54.259999 25.859999 69.119998 0.369
56083 3630363554835632171 8.493986e+08 132.589997 -54.389999 26.319999 69.019998 0.358
56084 3630387764322774539 8.493986e+08 132.559997 -54.519999 26.779999 68.919998 0.340
56350 3630359803401928043 8.493986e+08 132.649997 -53.999999 24.939999 68.229998 0.374
56351 3630364160514931659 8.493986e+08 132.619997 -54.129999 25.399999 68.139998 0.357

Write to SQL#

We can write the catalogue dataframe to an sql (e.g. sqlite) database. From here, we could use STARELite to query the catalogue, or load it back into a STAREDataframe if we need to

[10]:
catalog
[10]:
granule_path sidecar_path stare_cover begining ending geom trixels
0 ../tests/data/catalog/MOD05_L2.A2019336.0000.0... ../tests/data/catalog/MOD05_L2.A2019336.0000.0... [3604075970547417096, 3604089164686950409, 360... 2019-12-02 00:00:00 2019-12-02 00:05:00 POLYGON ((-15.93400 53.20178, -15.93275 53.292... MULTIPOLYGON (((-45.00000 61.32495, -37.69296 ...
1 ../tests/data/catalog/MOD05_L2.A2005349.2125.0... ../tests/data/catalog/MOD05_L2.A2005349.2125.0... [864691128455135238, 864867050315579401, 86486... 2005-12-15 21:25:00 2005-12-15 21:30:00 POLYGON ((-171.02278 37.52311, -171.02644 37.4... MULTIPOLYGON (((-160.28950 23.13179, -165.1269...
2 ../tests/data/catalog/MOD05_L2.A2020254.1320.0... ../tests/data/catalog/MOD05_L2.A2020254.1320.0... [2461217196357976072, 2461228191474253833, 246... 2020-09-10 13:20:00 2020-09-10 13:25:00 POLYGON ((-49.78107 29.28507, -49.78662 29.195... MULTIPOLYGON (((-36.58975 18.83358, -40.88022 ...
[12]:
db_path = '' # Empty for in-memory
uri = 'sqlite:///{db_path}'.format(db_path=db_path)
engine = sqlalchemy.create_engine(uri)

catalog_ewkb = geopandas.io.sql._convert_to_ewkb(catalog, 'geom', 4326)
catalog_ewkb[['granule_path', 'sidecar_path', 'stare_cover', 'begining', 'ending', 'geom']].to_sql(name='catalog', con=engine, if_exists='replace', index=False)
[12]:
3

Load from SQL#

[13]:
cover_intersects = pandas.read_sql(sql='catalog', con=engine)

# Geometry and STARE are stored in blobs, so we need to deserialize
cover_intersects.stare_cover = cover_intersects.stare_cover.apply(func=numpy.frombuffer, args=('int64',))
cover_intersects.geom = cover_intersects.geom.apply(shapely.wkb.loads, args=(True,))

# We convert the conventional DF to a STAREDataFrame and set the stare column
cover_intersects = starepandas.STAREDataFrame(cover_intersects, sids='stare_cover')

[ ]: