Source code for libadalina_core.graph_extraction.readers.open_street_map

import os.path
from shapely.io import from_wkt

from libadalina_core.exceptions.input_file_exception import InputFileException
from libadalina_core.graph_extraction.readers.reader import MapReader, MandatoryColumns, OneWay, RoadTypes
import geopandas as gpd
import pandas as pd


[docs]
class OpenStreetMapReader(MapReader):
    """
    A class to read OpenStreetMap (OSM) data files and convert them into a GeoDataFrame with standardized columns.
    """

    CRS = 4326 # OSM data are exported in WGS84



[docs]
    def read(self, file_path: str) -> gpd.GeoDataFrame:
        """
        Read an OSM data file and return a GeoDataFrame with the required columns.
        Accepts CSV, Shapefile, and GeoPackage formats.

        Parameters
        ----------
        file_path : str
            The path to the OSM data file. Must contain 'osm_id', 'name', 'oneway', and 'geometry' columns.

        Returns
        -------
        gpd.GeoDataFrame
            A GeoDataFrame containing only the mandatory columns with standardized names.
        """
        if file_path.endswith('.csv'):
            return self.read_csv(file_path)
        elif file_path.endswith('.shp'):
            return self.read_shp(file_path)
        elif file_path.endswith('.gpkg'):
            return self.read_gpkg(file_path)

        raise InputFileException(f'no reader found for file {file_path}')


    def read_csv(self, file_path: str) -> gpd.GeoDataFrame:
        if not os.path.exists(file_path):
            raise FileNotFoundError(f'file {file_path} does not exist')

        df = pd.read_csv(file_path, sep=',')
        return self.from_dataframe(df)

    def read_shp(self, file_path: str) -> gpd.GeoDataFrame:
        if not os.path.exists(file_path):
            raise FileNotFoundError(f'file {file_path} does not exist')

        return self._map_columns(gpd.read_file(file_path))
    
    def read_gpkg(self, file_path: str) -> gpd.GeoDataFrame:
        if not os.path.exists(file_path):
            raise FileNotFoundError(f'file {file_path} does not exist')

        layers = gpd.list_layers(file_path)
        layer_name = layers.loc[0, 'name']

        return self._map_columns(gpd.read_file(file_path, layer=layer_name))


[docs]
    def from_dataframe(self, df: pd.DataFrame) -> gpd.GeoDataFrame:
        """
        Convert a pandas DataFrame of OSM data to a GeoDataFrame with the required columns.

        Parameters
        ----------
        df : pd.DataFrame
            The OSM data as a pandas DataFrame. Must contain 'osm_id', 'name', 'oneway', and 'geometry' columns.

        Returns
        -------
        gpd.GeoDataFrame
            A GeoDataFrame containing only the mandatory columns with standardized names.
        """
        df.loc[:, 'geometry'] = df['geometry'].apply(from_wkt)
        return self._map_columns(gpd.GeoDataFrame(df, geometry='geometry', crs=self.CRS))


    def _map_columns(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
        gdf['name'] = gdf['name'].fillna('')
        gdf = self._filter_roads(gdf)
        gdf = self.map_and_reduce(gdf, {
            MandatoryColumns.id: 'osm_id',
            MandatoryColumns.road_name: 'name',
            MandatoryColumns.oneway: 'oneway'
        })
        
        oneway_mapping = {
            'F': OneWay.Forward.value,
            'T': OneWay.Backward.value,
            'B': OneWay.Both.value
        }
        gdf.loc[:, MandatoryColumns.oneway.value] = gdf[MandatoryColumns.oneway.value].map(oneway_mapping).fillna(
            OneWay.Both.value)
        return gdf

    def _filter_roads(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
        """
        Filter roads based on the specified road type.
        Road types of OSM are defined in https://download.geofabrik.de/osm-data-in-gis-formats-free.pdf
        Parameters
        ----------
        gdf : geopandas.GeoDataFrame
            The input GeoDataFrame containing road data with a 'code' column representing road types.

        Returns
        -------
        geopandas.GeoDataFrame
            A GeoDataFrame containing only the roads that match the specified road type.
        """
        if self._road_types == RoadTypes.CAR_ONLY:
            return gdf[(
                ((gdf['code'] >= 5110) & (gdf['code'] <= 5119)) |
                ((gdf['code'] >= 5130) & (gdf['code'] <= 5139)) |
                ((gdf['code'] >= 5121) & (gdf['code'] <= 5122))
            )]
        elif self._road_types == RoadTypes.MAIN_ROADS:
            return gdf[(
                    ((gdf['code'] >= 5110) & (gdf['code'] <= 5119)) |
                    ((gdf['code'] >= 5130) & (gdf['code'] <= 5139))
            )]
        else:
            return gdf