Note
This page was generated from examples/notebooks/manual_join.ipynb.
Manual Join#
[1]:
import starepandas
import geopandas
import pandas
[2]:
cities = ['Buenos Aires', 'Brasilia', 'Santiago',
'Bogota', 'Caracas', 'Sao Paulo', 'Bridgetown']
latitudes = [-34.58, -15.78, -33.45, 4.60, 10.48, -23.55, 13.1]
longitudes = [-58.66, -47.91, -70.66, -74.08, -66.86, -46.63, -59.62]
data = {'City': cities,
'Latitude': latitudes, 'Longitude': longitudes}
cities = starepandas.STAREDataFrame(data)
stare = starepandas.sids_from_xy(cities.Longitude, cities.Latitude, level=27)
geom = geopandas.points_from_xy(cities.Longitude, cities.Latitude, crs='EPSG:4326')
cities.set_sids(stare, inplace=True)
cities.set_geometry(geom, inplace=True)
cities.add_trixels(inplace=True)
[3]:
countries = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
countries = countries.sort_values(by='name')
countries['geom_simple'] = countries.simplify(0.002)
countries.set_geometry('geom_simple', inplace=True)
samerica = countries[countries.continent=='South America']
sids = starepandas.sids_from_gdf(samerica, level=9, force_ccw=True)
samerica = starepandas.STAREDataFrame(samerica, sids=sids)
trixels = samerica.make_trixels()
samerica.set_trixels(trixels, inplace=True)
[4]:
left_df = cities
right_df = samerica
left_key = []
right_key = []
for i, row in right_df.iterrows():
k = left_df.index[left_df.intersects(row.geometry)]
left_key.extend(list(k))
right_key.extend([i]*len(k))
indices = pandas.DataFrame({'_key_left': left_key,
'_key_right':right_key})
indices
[4]:
| _key_left | _key_right | |
|---|---|---|
| 0 | 0 | 9 |
| 1 | 1 | 29 |
| 2 | 5 | 29 |
| 3 | 2 | 10 |
| 4 | 3 | 32 |
| 5 | 4 | 40 |
[5]:
left_key = []
right_key = []
for i, row in right_df.iterrows():
k = left_df.index[left_df.stare_intersects(row.sids)]
left_key.extend(list(k))
right_key.extend([i]*len(k))
indices = pandas.DataFrame({'_key_left': left_key,
'_key_right': right_key})
indices
[5]:
| _key_left | _key_right | |
|---|---|---|
| 0 | 0 | 9 |
| 1 | 1 | 29 |
| 2 | 5 | 29 |
| 3 | 2 | 10 |
| 4 | 3 | 32 |
| 5 | 4 | 40 |
[6]:
# Inner join
joined = left_df
joined = joined.merge(indices, left_index=True, right_index=True)
joined = joined.merge(right_df, left_on='_key_right', right_index=True)
joined = joined.set_index('_key_left')
joined = joined.drop(["_key_right"], axis=1)
[7]:
# Left Join
index_left = 'index_left'
left_df.index = left_df.index.rename(index_left)
left_df = left_df.reset_index()
joined = left_df
joined = joined.merge(indices, left_index=True, right_index=True, how="left")
joined = joined.merge(right_df.drop(right_df.geometry.name, axis=1),
how="left",
left_on="_key_right",
right_index=True,
suffixes=("_left", "_right"))
joined = joined.set_index(index_left)
joined = joined.drop(["_key_right"], axis=1)
[ ]: