Source code for SuomiGeoData.paituli
import os
import io
import zipfile
import typing
import pandas
import geopandas
import requests
import tempfile
from .core import Core
# from .syke import Syke
[docs]
class Paituli:
'''
Provides functionality for downloading and extracting data from Paituli
(https://paituli.csc.fi/download.html).
'''
@property
def indexmap_dem(
self
) -> geopandas.GeoDataFrame:
'''
Returns a GeoDataFrame containing the DEM index map.
'''
output = geopandas.read_file(
os.path.join(
os.path.dirname(__file__), 'data', 'nls_dem_index.shp'
)
)
return output
@property
def indexmap_tdb(
self
) -> geopandas.GeoDataFrame:
'''
Returns a GeoDataFrame containing the topographic database index map.
'''
output = geopandas.read_file(
os.path.join(
os.path.dirname(__file__), 'data', 'nls_td_index.shp'
)
)
return output
[docs]
def save_indexmap_dem(
self,
file_path: str
) -> str:
'''
Saves the GeoDataFrame of the DEM index map to the specified file path
and return a success message.
Parameters
----------
file_path : str
File path to save the GeoDataFrame.
Returns
-------
str
A confirmation message indicating the output file has been saved.
'''
check_file = Core().is_valid_ogr_driver(file_path)
if check_file is True:
self.indexmap_dem.to_file(file_path)
else:
raise Exception(
'Could not retrieve driver from the file path.'
)
return 'GeoDataFrame saved to the output file.'
[docs]
def save_indexmap_tdb(
self,
file_path: str
) -> str:
'''
Saves the GeoDataFrame of the topographic database
index map to the specified file path and returns a success message.
Parameters
----------
file_path : str
File path to save the GeoDataFrame.
Returns
-------
str
A confirmation message indicating the output file has been saved.
'''
check_file = Core().is_valid_ogr_driver(file_path)
if check_file is True:
self.indexmap_tdb.to_file(file_path)
else:
raise Exception(
'Could not retrieve driver from the file path.'
)
return 'GeoDataFrame saved to the output file.'
@property
def dem_labels(
self
) -> list[str]:
'''
Returns the list of labels from the DEM index map.
'''
output = list(self.indexmap_dem['label'])
return output
@property
def tdb_labels(
self
) -> list[str]:
'''
Returns the list of labels from the topographic database index map.
'''
output = list(self.indexmap_tdb['label'])
return output
[docs]
def is_valid_label_dem(
self,
label: str
) -> bool:
'''
Returns whether the label exists in the DEM index map.
Parameters
----------
label : str
Name of the label.
Returns
-------
bool
True if the label exists, False otherwise.
'''
return label in self.dem_labels
[docs]
def is_valid_label_tdb(
self,
label: str
) -> bool:
'''
Returns whether the label exists in the topographic database index map.
Parameters
----------
label : str
Name of the label.
Returns
-------
bool
True if the label exists, False otherwise.
'''
return label in self.tdb_labels
[docs]
def dem_download_by_labels(
self,
labels: list[str],
folder_path: str,
http_headers: typing.Optional[dict[str, str]] = None
) -> str:
'''
Downloads the DEM raster files for the given labels and
returns a confirmation message.
Parameters
----------
labels : list
List of label names from the DEM index map.
folder_path : str
Path of empty folder to save the downloaded raster files.
http_headers : dict, optional
HTTP headers to be used for the web request. Defaults to
:attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided.
Returns
-------
str
A confirmation message indicating that all downloads are complete.
'''
# check empty folder path
if len(os.listdir(folder_path)) > 0:
raise Exception(
'Output folder must be empty.'
)
else:
pass
# check whether the input labels exist
for label in labels:
if self.is_valid_label_dem(label):
pass
else:
raise Exception(
f'The label {label} does not exist in the index map.'
)
# web request headers
headers = Core().default_http_headers if http_headers is None else http_headers
# download topographic database
suffix_urls = self.indexmap_dem[self.indexmap_dem['label'].isin(labels)]['path']
count = 1
for label, url in zip(labels, suffix_urls):
label_url = Core()._url_prefix_paituli_dem_tdb + url
response = requests.get(
url=label_url,
headers=headers
)
downloaded_file = os.path.join(
folder_path, f'{label}.tif'
)
with open(downloaded_file, 'wb') as downloaded_raster:
downloaded_raster.write(response.content)
print(
f'Download of label {label} completed (count {count}/{len(labels)}).'
)
count = count + 1
return 'All downloads are complete.'
[docs]
def tdb_download_by_labels(
self,
labels: list[str],
folder_path: str,
http_headers: typing.Optional[dict[str, str]] = None
) -> str:
'''
Downloads the topographic database folders of shapefiles for the given labels and
returns a confirmation message.
Parameters
----------
labels : list
List of label names from the topographic database index map.
folder_path : str
Path of empty folder to save the downloaded folder of shapefiles.
http_headers : dict, optional
HTTP headers to be used for the web request. Defaults to
:attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided.
Returns
-------
str
A confirmation message indicating that all downloads are complete.
'''
# check empty folder path
if len(os.listdir(folder_path)) > 0:
raise Exception(
'Output folder must be empty.'
)
else:
pass
# check whether the input labels exist
for label in labels:
if self.is_valid_label_tdb(label):
pass
else:
raise Exception(
f'The label {label} does not exist in the index map.'
)
# web request headers
headers = Core().default_http_headers if http_headers is None else http_headers
# download topographic database
suffix_urls = self.indexmap_tdb[self.indexmap_tdb['label'].isin(labels)]['path']
count = 1
for label, url in zip(labels, suffix_urls):
label_url = Core()._url_prefix_paituli_dem_tdb + url
response = requests.get(
url=label_url,
headers=headers
)
downloaded_data = io.BytesIO(response.content)
with zipfile.ZipFile(downloaded_data, 'r') as downloaded_zip:
downloaded_zip.extractall(
os.path.join(folder_path, label)
)
print(
f'Download of label {label} completed (count {count}/{len(labels)}).'
)
count = count + 1
return 'All downloads are complete.'
@property
def get_example_area(
self
) -> geopandas.GeoDataFrame:
'''
Returns a GeoDataFrame of example area to test
raster and vector downloads.
'''
output = geopandas.read_file(
os.path.join(
os.path.dirname(__file__), 'data', 'example_area.shp'
)
)
return output
[docs]
def dem_labels_download_by_area(
self,
shape_file: str,
folder_path: str,
http_headers: typing.Optional[dict[str, str]] = None
) -> str:
'''
Downloads the DEM raster files for the given area and
returns a confirmation message.
Parameters
----------
shape_file : str
Shapefile path of the input area.
folder_path : str
Path of empty folder to save the downloaded raster files.
http_headers : dict, optional
HTTP headers to be used for the web request. Defaults to
:attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided.
Returns
-------
str
A confirmation message indicating that all downloads are complete.
'''
# input area
area_gdf = geopandas.read_file(shape_file)
# check crs of input area
target_crs = 'EPSG:3067'
if area_gdf.crs is None:
area_gdf = area_gdf.set_crs(target_crs)
elif str(area_gdf.crs) != target_crs:
area_gdf = area_gdf.to_crs(target_crs)
else:
pass
# DEM index map
index_gdf = self.indexmap_dem
# labels
label_gdf = geopandas.sjoin(index_gdf, area_gdf, how='inner').reset_index(drop=True)
label_gdf = label_gdf.drop_duplicates(subset=['label']).reset_index(drop=True)
# download labels
if label_gdf.shape[0] == 0:
raise Exception('The index map does not intersect with the given area.')
else:
message = self.dem_download_by_labels(
labels=list(label_gdf['label']),
folder_path=folder_path,
http_headers=http_headers
)
return message
[docs]
def tdb_labels_download_by_area(
self,
shape_file: str,
folder_path: str,
http_headers: typing.Optional[dict[str, str]] = None
) -> str:
'''
Downloads the topographic database label folders of shapefiles
for the given area and returns a confirmation message.
Parameters
----------
shape_file : str
Shapefile path of the input area.
folder_path : str
Path of empty folder to save the downloaded folders of shapefiles.
http_headers : dict, optional
HTTP headers to be used for the web request. Defaults to
:attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided.
Returns
-------
str
A confirmation message indicating that all downloads are complete.
'''
# input area
area_gdf = geopandas.read_file(shape_file)
# check crs of input area
target_crs = 'EPSG:3067'
if area_gdf.crs is None:
area_gdf = area_gdf.set_crs(target_crs)
elif str(area_gdf.crs) != target_crs:
area_gdf = area_gdf.to_crs(target_crs)
else:
pass
# topographic database index map
index_gdf = self.indexmap_tdb
# labels
label_gdf = geopandas.sjoin(index_gdf, area_gdf, how='inner').reset_index(drop=True)
label_gdf = label_gdf.drop_duplicates(subset=['label']).reset_index(drop=True)
# download labels
if label_gdf.shape[0] == 0:
raise Exception('The index map does not intersect with the given area.')
else:
message = self.tdb_download_by_labels(
labels=list(label_gdf['label']),
folder_path=folder_path,
http_headers=http_headers
)
return message
[docs]
def dem_clipped_download_by_area(
self,
shape_file: str,
raster_file: str,
http_headers: typing.Optional[dict[str, str]] = None
) -> str:
'''
Downloads the clipped DEM raster file for the given area and
returns a confirmation message.
Parameters
----------
shape_file : str
Shapefile path of the input area.
raster_file : str
File path to save the output raster.
http_headers : dict, optional
HTTP headers to be used for the web request. Defaults to
:attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided.
Returns
-------
str
A confirmation message indicating that the raster clipping is complete.
'''
with tempfile.TemporaryDirectory() as tmp_dir:
# download DEM label rasters
message = self.dem_labels_download_by_area(
shape_file=shape_file,
folder_path=tmp_dir,
http_headers=http_headers
)
print(message)
# merging rasters
message = Core().raster_merging(
folder_path=tmp_dir,
raster_file=os.path.join(tmp_dir, 'merged.tif'),
compress='lzw'
)
print(message)
# clipping rasters
message = Core().raster_clipping_by_mask(
input_file=os.path.join(tmp_dir, 'merged.tif'),
mask_file=shape_file,
output_file=raster_file
)
return message
[docs]
def get_tdb_metadata(
self,
excel_file: str,
http_headers: typing.Optional[dict[str, str]] = None
) -> pandas.DataFrame:
'''
Downloads topographic database metadata,
converts it to a multi-index DataFrame, and saves it to an Excel file.
Parameters
----------
excel_file : str
Path to an Excel file to save the DataFrame.
http_headers : dict, optional
HTTP headers to be used for the web request. Defaults to
:attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided.
Returns
-------
DataFrame
A multi-index DataFrame of the topographic database metadata.
'''
# web request headers
headers = Core().default_http_headers if http_headers is None else http_headers
# downloading topographic database metadata
with tempfile.TemporaryDirectory() as tmp_dir:
url = 'https://www.nic.funet.fi/index/geodata/mml/maastotietokanta/2022/maastotietokanta_kohdemalli_eng_2019.xlsx'
response = requests.get(
url=url,
headers=headers
)
download_file = os.path.join(tmp_dir, 'tdb_metadata.xlsx')
with open(download_file, 'wb') as download_write:
download_write.write(response.content)
df = pandas.read_excel(download_file)
# processing of the Dataframe
df = df.dropna(
thresh=3,
ignore_index=True
)
df = df.iloc[:, :-2]
df = df.drop(index=0).reset_index(drop=True)
df = df.dropna(subset=[df.columns[-1]]).reset_index(drop=True)
df.columns = ['Name', 'Category', 'Shape', 'Group', 'Class']
index_columns = ['Category', 'Shape', 'Group']
df = df.set_index(index_columns)
df = df.sort_index(
level=index_columns,
ascending=[True] * len(index_columns)
)
df = df.groupby(level=index_columns, group_keys=False).apply(
lambda x: x.sort_values('Class')
)
df = df.set_index('Name', append=True)
# saving DataFrame to the input Excel file
excel_ext = Core()._excel_file_extension(excel_file)
if excel_ext != '.xlsx':
raise Exception(f'Input file extension "{excel_ext}" does not match the required ".xlsx".')
else:
with pandas.ExcelWriter(excel_file, engine='xlsxwriter') as excel_writer:
df.to_excel(excel_writer)
workbook = excel_writer.book
worksheet = excel_writer.sheets['Sheet1']
# excel sheet column width
worksheet.set_column(len(df.index.names), len(df.index.names) + len(df.columns) - 1, 20)
for idx, i in enumerate(df.index.names):
if i == 'Category':
worksheet.set_column(idx, idx, 30)
elif i == 'Name':
worksheet.set_column(idx, idx, 50)
else:
worksheet.set_column(idx, idx, 20)
# index formatting
for i in range(len(df.index.names)):
if df.index.names[i] != 'Name':
for jdx, j in enumerate(df.index.get_level_values(i)):
worksheet.write(
jdx + 1, i, j, workbook.add_format(
{
'align': 'center', 'valign': 'vcenter', 'bold': True, 'border': 1, 'font_size': 14
}
)
)
else:
for jdx, j in enumerate(df.index.get_level_values(i)):
worksheet.write(
jdx + 1, i, j, workbook.add_format(
{
'align': 'left', 'valign': 'vcenter', 'bold': True, 'border': 1
}
)
)
# column formatting
for i in range(len(df.columns)):
for jdx, j in enumerate(df[df.columns[i]]):
worksheet.write(
jdx + 1, len(df.index.names) + i, j,
workbook.add_format(
{
'align': 'right', 'valign': 'vcenter', 'border': 1
}
)
)
# header formatting
for idx, i in enumerate(list(df.index.names) + list(df.columns)):
worksheet.write(
0, idx, i, workbook.add_format(
{
'align': 'center', 'bold': True, 'border': 1, 'font_size': 18, 'fg_color': 'cyan'
}
)
)
return df
[docs]
def tdb_feature_extraction(
self,
folder_path: str,
class_number: int,
shape_file: str
) -> str:
'''
Extracts feature class geometries from the downloaded topographic database label folders
for the specified class number and saves the output to a shapefile.
Parameters
----------
folder_path : str
Folder path containing the downloaded topographic database label folders.
class_number : int
Feature class number in the topographic database meta data, obtained from the
:meth:`SuomiGeoData.Paituli.get_tdb_metadata` method.
shape_file : str
Shapefile path to save the output GeoDataFrame.
Returns
-------
str
A confirmation message indicating that the feature class geometry extraction is complete.
'''
# check output file
check_file = Core().is_valid_ogr_driver(shape_file)
if check_file is True:
pass
else:
raise Exception('Could not retrieve driver from the file path.')
# DataFrame of topographic database metadat
meta_df = pandas.read_excel(
os.path.join(
os.path.dirname(__file__), 'data', 'tdb_metadata.xlsx'
)
)
# dictionary for mapping between geometry type and shapeffile
shapefile_ends = {
'Point': '_s',
'Text': '_t',
'Line': '_v',
'Area': '_p'
}
# check class
check_class = class_number in list(meta_df.iloc[:, -1])
if check_class is True:
class_df = meta_df[meta_df.iloc[:, -1] == class_number].reset_index(drop=True)
class_shape = class_df.loc[0, 'Shape']
class_sfe = shapefile_ends[class_shape]
else:
raise Exception(f'Input feature class {class_number} does not exist.')
# targeted shapefiles
feature_paths = []
for sub_folder in os.listdir(folder_path):
sf_path = os.path.join(folder_path, sub_folder)
shapefiles = filter(
lambda x: x.endswith('.shp'), os.listdir(sf_path)
)
required_files = filter(
lambda x: x.split('.')[0].endswith(class_sfe), shapefiles
)
required_paths = map(
lambda x: os.path.join(sf_path, x), required_files
)
feature_paths.extend(list(required_paths))
# geometry extraction
total_paths = len(feature_paths)
gdf_columns = ['File', 'geometry']
gdf = geopandas.GeoDataFrame(columns=gdf_columns)
count = 1
for file_path in feature_paths:
file_name = os.path.split(file_path)[-1]
print(f'Searching shapefile ({file_name}): {count}/{total_paths}')
fp_gdf = geopandas.read_file(file_path)
fp_gdf = fp_gdf[fp_gdf['LUOKKA'] == class_number]
fp_gdf = fp_gdf[['geometry']]
fp_gdf['File'] = file_name
gdf = pandas.concat([gdf, fp_gdf], ignore_index=True)
count = count + 1
if len(gdf) == 0:
raise Exception(
f'No geometry is found in the downloaded files for the feature class number {class_number}.'
)
else:
gdf.to_file(shape_file)
return 'Feature class geometries extraction completed.'
[docs]
def tdb_feature_extraction_by_area(
self,
input_file: str,
class_number: int,
output_file: str,
http_headers: typing.Optional[dict[str, str]] = None
) -> str:
'''
Extracts topographic database feature class for the given area
and saves the output to a shapefile.
Parameters
----------
input_file : str
Shapefile path of the input area GeoDataFrame.
class_number : int
Feature class number in the topographic database meta data, obtained from the
:meth:`SuomiGeoData.Paituli.get_tdb_metadata` method.
output_file : str
Shapefile path to save the output GeoDataFrame.
http_headers : dict, optional
HTTP headers to be used for the web request. Defaults to
:attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided.
Returns
-------
str
A confirmation message indicating that the feature class geometry extraction is complete.
'''
with tempfile.TemporaryDirectory() as tmp_dir:
# download topographic database label folders
message = self.tdb_labels_download_by_area(
shape_file=input_file,
folder_path=tmp_dir,
http_headers=http_headers
)
print(message)
# extracting feature class geometries by area
extract_file = os.path.join(tmp_dir, 'extract.shp')
message = self.tdb_feature_extraction(
folder_path=tmp_dir,
class_number=class_number,
shape_file=extract_file
)
print(message)
# shapefile clipping
message = Core().shape_clipping_by_mask(
input_file=extract_file,
mask_file=input_file,
output_file=output_file
)
return message
[docs]
def select_subcatchments_of_Syke(
self,
input_file: str,
level: int,
id_subcatchments: list[int],
output_file: str,
percentage_cutoff: float = -1,
) -> geopandas.GeoDataFrame:
'''
Selects subcatchments from the shapefile of
Syke's catachment divisions and returns a GeoDataFrame.
Parameters
----------
input_file : str
Path to the shapefile containing catchment area divisions.
This file can be downloaded from:
https://wwwd3.ymparisto.fi/d3/gis_data/spesific/valumaalueet.zip
level : int
Catchment division level, must be one of 1, 2, 3, 4, or 5.
id_subcatchments : list
List of selected integer values from the 'taso<level>_osai' column in the shapefile.
output_file : str
Shapefile path to save the output GeoDataFrame.
percentage_cutoff : float, optional
Excludes polygon below the specified area percentage, ranging between 0 to 100,
relative to the total area of all polygons. Default is -1 for no exclusion.
Returns
-------
GeoDataFrame
A GeoDataFrame containing the selected subcatchments.
'''
# check output file
check_file = Core().is_valid_ogr_driver(output_file)
if check_file is True:
pass
else:
raise Exception('Could not retrieve driver from the file path.')
# check level
if level in [1, 2, 3, 4, 5]:
pass
else:
raise Exception('Input level must be one of 1, 2, 3, 4, or 5.')
# input GeoDataFrame
gdf = geopandas.read_file(input_file)
# processing of selected subcatchments
id_col = f'taso{level}_osai'
area_gdf = gdf[gdf[id_col].isin(id_subcatchments)].reset_index(drop=True)
if area_gdf.shape[0] == 0:
raise Exception('Selected ID(s) do not exist in the subcatchment divisions map.')
else:
area_gdf = area_gdf.dissolve()[['geometry']]
area_gdf = area_gdf.explode(ignore_index=True)
total_area = area_gdf.geometry.area.sum()
area_gdf['area_%'] = round(100 * area_gdf.geometry.area / total_area).astype('int')
area_gdf = area_gdf[area_gdf['area_%'] > percentage_cutoff].reset_index(drop=True)
area_gdf = area_gdf.drop(columns=['area_%'])
area_gdf['PID'] = list(range(1, area_gdf.shape[0] + 1))
# saving GeoDataFrame
area_gdf.to_file(output_file)
return area_gdf
[docs]
def dem_clipped_download_by_syke_subcatchment(
self,
shape_file: str,
level: int,
id_subcatchments: list[int],
raster_file: str,
percentage_cutoff: float = 0,
http_headers: typing.Optional[dict[str, str]] = None,
) -> str:
'''
Downloads the clipped DEM raster file for the given subcatchment division of Syke and
returns a confirmation message.
Parameters
----------
shape_file : str
Path to the shapefile containing catchment area divisions.
This file can be downloaded from:
https://wwwd3.ymparisto.fi/d3/gis_data/spesific/valumaalueet.zip
level : int
Level of catchment division and must be one of 1, 2, 3, 4 or 5.
id_subcatchments : list
List of selected integer values from the 'taso<level>_osai' column in the shapefile.
raster_file : str
File path to save the output raster.
percentage_cutoff : float, optional
Excludes polygon below the specified area percentage, ranging from 0 to 100,
relative to the total area of all polygons. Default is 0, excluding negligible polygons.
Provide -1 for no exclusion.
http_headers : dict, optional
HTTP headers to be used for the web request. Defaults to
:attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided.
Returns
-------
str
A confirmation message indicating that the raster clipping is complete.
'''
with tempfile.TemporaryDirectory() as tmp_dir:
area_file = os.path.join(tmp_dir, 'area.shp')
# select Syke subcatchments
self.select_subcatchments_of_Syke(
input_file=shape_file,
level=level,
id_subcatchments=id_subcatchments,
output_file=area_file,
percentage_cutoff=percentage_cutoff
)
# clipping DEM by area
message = self.dem_clipped_download_by_area(
shape_file=area_file,
raster_file=raster_file,
http_headers=http_headers
)
return message
[docs]
def tdb_feature_extraction_by_syke_subcatchment(
self,
input_file: str,
level: int,
id_subcatchments: list[int],
class_number: int,
output_file: str,
percentage_cutoff: float = 0,
http_headers: typing.Optional[dict[str, str]] = None,
) -> str:
'''
Extracts topographic database feature class for the given
subcatchment division of Syke and returns a confirmation message.
Parameters
----------
input_file : str
Path to the shapefile containing catchment area divisions.
This file can be downloaded from:
https://wwwd3.ymparisto.fi/d3/gis_data/spesific/valumaalueet.zip
level : int
Level of catchment division and must be one of 1, 2, 3, 4 or 5.
id_subcatchments : list
List of selected integer values from the 'taso<level>_osai' column in the shapefile.
class_number : int
Feature class number in the topographic database meta data, obtained from the
:meth:`SuomiGeoData.Paituli.get_tdb_metadata` method.
output_file : str
Shapefile path to save the output GeoDataFrame.
percentage_cutoff : float, optional
Excludes polygon below the specified area percentage, ranging from 0 to 100,
relative to the total area of all polygons. Default is 0, excluding negligible polygons.
Provide -1 for no exclusion.
http_headers : dict, optional
HTTP headers to be used for the web request. Defaults to
:attr:`SuomiGeoData.core.Core.default_http_headers` attribute if not provided.
Returns
-------
str
A confirmation message indicating that the GeoDataFrame clipping is complete.
'''
with tempfile.TemporaryDirectory() as tmp_dir:
area_file = os.path.join(tmp_dir, 'area.shp')
# select Syke subcatchments
self.select_subcatchments_of_Syke(
input_file=input_file,
level=level,
id_subcatchments=id_subcatchments,
output_file=area_file,
percentage_cutoff=percentage_cutoff
)
# clipping topographic database feature by area
message = self.tdb_feature_extraction_by_area(
input_file=area_file,
class_number=class_number,
output_file=output_file,
http_headers=http_headers
)
return message