Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions ftw_tools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,14 @@ def inference_run(
help="Output filename for the polygonized data. Defaults to the name of the input file with '.parquet' file extension. "
+ SUPPORTED_POLY_FORMATS_TXT,
)
@click.option(
"--algorithm",
"-a",
type=click.Choice(["simple", "watershed"]),
default="simple",
show_default=True,
help="Polygonization algorithm. 'simple' uses connected components, 'watershed' uses hierarchical watershed for better field separation.",
)
@click.option(
"--simplify",
"-s",
Expand Down Expand Up @@ -741,12 +749,27 @@ def inference_run(
show_default=True,
help="Remove the interiors holes in the polygons.",
)
@click.option(
"--t_ext",
type=click.FloatRange(min=0.0, max=1.0),
default=0.5,
show_default=True,
help="Threshold for extent binary mask (watershed algorithm only).",
)
@click.option(
"--t_bound",
type=click.FloatRange(min=0.0, max=1.0),
default=0.2,
show_default=True,
help="Threshold for watershed horizontal cut (watershed algorithm only).",
)
def inference_polygonize(
input, out, simplify, min_size, max_size, overwrite, close_interiors
input, out, algorithm, simplify, min_size, max_size, overwrite, close_interiors, t_ext, t_bound
):
from ftw_tools.postprocess.polygonize import polygonize
polygonize(input, out, simplify, min_size, max_size, overwrite, close_interiors, algorithm, t_ext, t_bound)


polygonize(input, out, simplify, min_size, max_size, overwrite, close_interiors)


@inference.command(
Expand Down
202 changes: 163 additions & 39 deletions ftw_tools/postprocess/polygonize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import re
import time
from typing import Optional
Copy link

Copilot AI Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional is imported but not used anywhere in the shown changes; remove the unused import to reduce clutter.

Suggested change
from typing import Optional

Copilot uses AI. Check for mistakes.

import fiona
import fiona.transform
Expand All @@ -10,14 +11,79 @@
import rasterio.features
import shapely.geometry
from affine import Affine
from fiboa_cli.parquet import create_parquet, features_to_dataframe
try:
from fiboa_cli.parquet import create_parquet, features_to_dataframe
FIBOA_AVAILABLE = True
except ImportError:
FIBOA_AVAILABLE = False
print("Warning: fiboa_cli.parquet not available. Parquet output will use alternative implementation.")

Comment on lines +17 to +20
Copy link

Copilot AI Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Avoid printing directly at import time; use warnings.warn (e.g., warnings.warn(..., RuntimeWarning)) so callers can filter or capture the message.

Copilot uses AI. Check for mistakes.
def create_parquet(gdf, columns, collection, out, config, compression="brotli"):
import geopandas as gpd
Comment on lines +21 to +22
Copy link

Copilot AI Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fallback create_parquet ignores columns, collection, and config arguments—either document this divergence or use them to preserve expected fiboa metadata (e.g., setting column order and required attributes) to reduce surprises.

Suggested change
def create_parquet(gdf, columns, collection, out, config, compression="brotli"):
import geopandas as gpd
def create_parquet(gdf, columns, collection, out, config, compression="brotli"):
"""
Fallback implementation of create_parquet.
- Uses columns argument to set column order and ensure all required columns are present.
- Ignores collection and config arguments (fiboa metadata not preserved).
"""
import geopandas as gpd
import pandas as pd
# Ensure all required columns are present and in the correct order
gdf = gdf.copy()
for col in columns:
if col not in gdf.columns:
gdf[col] = pd.NA
# Reorder columns (excluding geometry, which must be last for GeoPandas)
geometry_col = gdf.geometry.name if hasattr(gdf, "geometry") else "geometry"
non_geom_cols = [col for col in columns if col != geometry_col]
ordered_cols = non_geom_cols + [geometry_col] if geometry_col in gdf.columns else non_geom_cols
gdf = gdf[ordered_cols]

Copilot uses AI. Check for mistakes.
gdf.to_parquet(out, compression=compression)

def features_to_dataframe(rows, columns):
import geopandas as gpd
import pandas as pd

if not rows:
return gpd.GeoDataFrame(columns=columns)

data = []
geometries = []
for row in rows:
data.append(row['properties'])
geometries.append(shapely.geometry.shape(row['geometry']))

df = pd.DataFrame(data)
gdf = gpd.GeoDataFrame(df, geometry=geometries)
return gdf
from pyproj import CRS, Transformer
from scipy.ndimage import maximum_filter, minimum_filter
from shapely.ops import transform
from tqdm import tqdm

import higra as hg

from ftw_tools.settings import SUPPORTED_POLY_FORMATS_TXT


def InstSegm(extent, boundary, t_ext=0.5, t_bound=0.2):
Comment on lines +46 to +51
Copy link

Copilot AI Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

higra is imported unconditionally; users selecting the 'simple' algorithm without higra installed will now get an ImportError. Move this import inside the watershed branch (or wrap in try/except with a clear error) to preserve previous functionality for non-watershed usage.

Suggested change
import higra as hg
from ftw_tools.settings import SUPPORTED_POLY_FORMATS_TXT
def InstSegm(extent, boundary, t_ext=0.5, t_bound=0.2):
from ftw_tools.settings import SUPPORTED_POLY_FORMATS_TXT
def InstSegm(extent, boundary, t_ext=0.5, t_bound=0.2):
try:
import higra as hg
except ImportError:
raise ImportError(
"The 'higra' package is required for the 'watershed' algorithm. "
"Please install it with 'pip install higra' to use this feature."
)

Copilot uses AI. Check for mistakes.
extent = np.asarray(extent).squeeze().astype(np.float32)
boundary = np.asarray(boundary).squeeze().astype(np.float32)

if extent.shape != boundary.shape:
raise ValueError(f"extent and boundary must have same shape. Got {extent.shape} vs {boundary.shape}")

ext_binary = (extent >= t_ext).astype(np.uint8)
input_hws = boundary.copy()
input_hws[ext_binary == 0] = 1.0

size = input_hws.shape[:2]
graph = hg.get_8_adjacency_graph(size)
edge_weights = hg.weight_graph(graph, input_hws, hg.WeightFunction.mean)
tree, altitudes = hg.watershed_hierarchy_by_dynamics(graph, edge_weights)

instances = hg.labelisation_horizontal_cut_from_threshold(
tree, altitudes, threshold=t_bound
).astype(float)

instances[ext_binary == 0] = np.nan
return instances
Comment on lines +51 to +72
Copy link

Copilot AI Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing docstring for a new non-trivial algorithmic function; add a docstring describing expected input ranges, shapes, meaning of t_ext/t_bound, and returned array semantics (NaN masking, instance labeling).

Copilot uses AI. Check for mistakes.


def get_boundary(mask):
m = mask.copy()
m[m == 3] = 0
field_mask = (m > 0).astype(np.uint8)

local_max = maximum_filter(m, size=3)
local_min = minimum_filter(m, size=3)
boundary = ((local_max != local_min) & (field_mask > 0)).astype(np.float32)

return boundary
Comment on lines +75 to +84
Copy link

Copilot AI Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a docstring clarifying the assumed class encoding in mask (e.g., why value 3 is zeroed), the rationale for the 3x3 filters, and the expected output value range.

Copilot uses AI. Check for mistakes.


def polygonize(
input,
out,
Expand All @@ -26,6 +92,9 @@ def polygonize(
max_size=None,
overwrite=False,
close_interiors=False,
algorithm="simple",
t_ext=0.5,
t_bound=0.2,
):
"""Polygonize the output from inference."""

Comment on lines 99 to 100
Copy link

Copilot AI Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docstring not updated to document new parameters algorithm, t_ext, and t_bound; include descriptions and valid value ranges to aid CLI and API users.

Suggested change
"""Polygonize the output from inference."""
"""
Polygonize the output from inference.
Parameters
----------
input : str
Path to the input file (e.g., mask or raster to polygonize).
out : str
Path to the output file (e.g., .parquet, .gpkg, etc.).
simplify : bool, optional
Whether to simplify polygons (default: True).
min_size : int, optional
Minimum polygon area in pixels to keep (default: 500).
max_size : int or None, optional
Maximum polygon area in pixels to keep (default: None, meaning no maximum).
overwrite : bool, optional
Whether to overwrite the output file if it exists (default: False).
close_interiors : bool, optional
Whether to close polygon interiors (default: False).
algorithm : str, optional
Polygonization algorithm to use. Options:
- "simple": Basic polygonization (default)
- "watershed": Use watershed-based segmentation
(default: "simple")
t_ext : float, optional
Threshold for exterior (field) mask. Range: 0.0 to 1.0 (default: 0.5).
Higher values may result in fewer polygons.
t_bound : float, optional
Threshold for boundary mask. Range: 0.0 to 1.0 (default: 0.2).
Lower values may result in more sensitive boundary detection.
Returns
-------
None
Notes
-----
The function reads the input mask, extracts polygons using the specified algorithm and thresholds,
and writes the result to the specified output file.
"""

Copilot uses AI. Check for mistakes.
Expand Down Expand Up @@ -65,11 +134,21 @@ def polygonize(
tags = src.tags()

input_height, input_width = src.shape
mask = (src.read(1) == 1).astype(np.uint8)
polygonization_stride = 2048
total_iterations = math.ceil(input_height / polygonization_stride) * math.ceil(
input_width / polygonization_stride
)
mask = src.read(1)

if algorithm == "watershed":
print("Using watershed instance segmentation...")
extent = (mask == 1).astype(np.float32)
boundary = get_boundary(mask)
instances = InstSegm(extent, boundary, t_ext, t_bound)
unique_instances = np.unique(instances[~np.isnan(instances)])
print(f"Found {len(unique_instances)} instances")
else:
mask = (mask == 1).astype(np.uint8)
polygonization_stride = 2048
total_iterations = math.ceil(input_height / polygonization_stride) * math.ceil(
input_width / polygonization_stride
)

if out.endswith(".gpkg"):
format = "GPKG"
Expand All @@ -94,63 +173,108 @@ def polygonize(
).transform

rows = []
with tqdm(total=total_iterations, desc="Processing mask windows") as pbar:
for y in range(0, input_height, polygonization_stride):
for x in range(0, input_width, polygonization_stride):
new_transform = src.transform * Affine.translation(x, y)
mask_window = mask[
y : y + polygonization_stride, x : x + polygonization_stride
]

if algorithm == "watershed":
with tqdm(total=len(unique_instances), desc="Processing watershed instances") as pbar:
for instance_id in unique_instances:
instance_mask = (instances == instance_id).astype(np.uint8)

for geom_geojson, val in rasterio.features.shapes(
mask_window, transform=new_transform
instance_mask, transform=src.transform
):
if val != 1:
continue

geom = shapely.geometry.shape(geom_geojson)

if close_interiors:
geom = shapely.geometry.Polygon(geom.exterior)
if simplify > 0:
geom = geom.simplify(simplify)

# Calculate the area of the reprojected geometry

if is_meters:
geom_proj_meters = geom
else:
# Reproject the geometry to the equal-area projection
# if the CRS is not in meters
geom_proj_meters = shapely.geometry.shape(
fiona.transform.transform_geom(
original_crs, equal_area_crs, geom_geojson
)
)

area = geom_proj_meters.area
perimeter = geom_proj_meters.length

# Only include geometries that meet the minimum size requirement
if area < min_size or (
max_size is not None and area > max_size
):

if area < min_size or (max_size is not None and area > max_size):
continue

if is_geojson:
geom = transform(affine, geom)

rows.append(
{
"geometry": shapely.geometry.mapping(geom),
"properties": {
"id": str(i),
"area": area * 0.0001, # Add the area in hectares
"perimeter": perimeter, # Add the perimeter in meters
},
}
)

rows.append({
"geometry": shapely.geometry.mapping(geom),
"properties": {
"id": str(i),
"area": area * 0.0001,
"perimeter": perimeter,
},
})
i += 1

pbar.update(1)
else:
with tqdm(total=total_iterations, desc="Processing mask windows") as pbar:
for y in range(0, input_height, polygonization_stride):
for x in range(0, input_width, polygonization_stride):
new_transform = src.transform * Affine.translation(x, y)
mask_window = mask[
y : y + polygonization_stride, x : x + polygonization_stride
]
for geom_geojson, val in rasterio.features.shapes(
mask_window, transform=new_transform
):
if val != 1:
continue

geom = shapely.geometry.shape(geom_geojson)

if close_interiors:
geom = shapely.geometry.Polygon(geom.exterior)
if simplify > 0:
geom = geom.simplify(simplify)

if is_meters:
geom_proj_meters = geom
else:
geom_proj_meters = shapely.geometry.shape(
fiona.transform.transform_geom(
original_crs, equal_area_crs, geom_geojson
)
)

area = geom_proj_meters.area
perimeter = geom_proj_meters.length

if area < min_size or (
max_size is not None and area > max_size
):
continue

if is_geojson:
geom = transform(affine, geom)

rows.append(
{
"geometry": shapely.geometry.mapping(geom),
"properties": {
"id": str(i),
"area": area * 0.0001,
"perimeter": perimeter,
},
}
)
i += 1

pbar.update(1)

if format == "Parquet":
timestamp = tags.get("TIFFTAG_DATETIME", None)
Expand Down
Loading