Note

This page was generated from examples/notebooks/manual_join.ipynb.

Manual Join#

[1]:
import starepandas
import geopandas
import pandas
[2]:
cities = ['Buenos Aires', 'Brasilia', 'Santiago',
          'Bogota', 'Caracas', 'Sao Paulo', 'Bridgetown']

latitudes = [-34.58, -15.78, -33.45, 4.60, 10.48, -23.55, 13.1]
longitudes = [-58.66, -47.91, -70.66, -74.08, -66.86, -46.63, -59.62]
data =  {'City': cities,
         'Latitude': latitudes, 'Longitude': longitudes}

cities = starepandas.STAREDataFrame(data)
stare = starepandas.sids_from_xy(cities.Longitude, cities.Latitude, level=27)
geom = geopandas.points_from_xy(cities.Longitude, cities.Latitude, crs='EPSG:4326')
cities.set_sids(stare, inplace=True)
cities.set_geometry(geom, inplace=True)
cities.add_trixels(inplace=True)
[3]:
countries = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
countries = countries.sort_values(by='name')
countries['geom_simple'] = countries.simplify(0.002)
countries.set_geometry('geom_simple', inplace=True)
samerica = countries[countries.continent=='South America']

sids = starepandas.sids_from_gdf(samerica, level=9, force_ccw=True)
samerica = starepandas.STAREDataFrame(samerica, sids=sids)
trixels = samerica.make_trixels()
samerica.set_trixels(trixels, inplace=True)
[4]:
left_df = cities
right_df = samerica

left_key = []
right_key = []
for i, row in right_df.iterrows():
    k = left_df.index[left_df.intersects(row.geometry)]
    left_key.extend(list(k))
    right_key.extend([i]*len(k))

indices = pandas.DataFrame({'_key_left': left_key,
                            '_key_right':right_key})
indices
[4]:
_key_left _key_right
0 0 9
1 1 29
2 5 29
3 2 10
4 3 32
5 4 40
[5]:
left_key = []
right_key = []

for i, row in right_df.iterrows():
    k = left_df.index[left_df.stare_intersects(row.sids)]
    left_key.extend(list(k))
    right_key.extend([i]*len(k))

indices = pandas.DataFrame({'_key_left': left_key,
                            '_key_right': right_key})

indices
[5]:
_key_left _key_right
0 0 9
1 1 29
2 5 29
3 2 10
4 3 32
5 4 40
[6]:
# Inner join
joined = left_df
joined = joined.merge(indices, left_index=True, right_index=True)
joined = joined.merge(right_df, left_on='_key_right', right_index=True)
joined = joined.set_index('_key_left')
joined = joined.drop(["_key_right"], axis=1)
[7]:
# Left Join
index_left = 'index_left'
left_df.index = left_df.index.rename(index_left)
left_df = left_df.reset_index()

joined = left_df
joined = joined.merge(indices, left_index=True, right_index=True, how="left")
joined = joined.merge(right_df.drop(right_df.geometry.name, axis=1),
                      how="left",
                      left_on="_key_right",
                      right_index=True,
                      suffixes=("_left", "_right"))
joined = joined.set_index(index_left)
joined = joined.drop(["_key_right"], axis=1)
[ ]: