Source code for libadalina_core.readers.read_dataset

import re
from libadalina_core.sedona_utils import DataFrame
from libadalina_core.readers.geopackage import geopackage_to_dataframe
from libadalina_core.readers.csv import csv_to_dataframe

[docs] def read_dataset(dataset_path: str) -> DataFrame: """ Read a dataset from a file, supporting both GeoPackage (.gpkg) and CSV (.csv) formats. Parameters ---------- dataset_path : str Path to the dataset file Returns ------- DataFrame The loaded dataset as a DataFrame """ # Extract optional layer names from brackets at end of path match = re.search(r'\[(.*)\]$', dataset_path) optional_layer = match.group(1) if match else None # Remove layer specification before getting extension path_without_layers = dataset_path.split('[')[0] if '[' in dataset_path else dataset_path file_extension = path_without_layers.lower().split('.')[-1] if file_extension == 'gpkg': return geopackage_to_dataframe(dataset_path, optional_layer) elif file_extension == 'csv': return csv_to_dataframe(dataset_path) else: raise ValueError(f"Unsupported file extension: {file_extension}. Supported extensions are: .gpkg, .csv")