import os
import re
import unicodedata
from pathlib import Path
import networkx as nx
import numpy as np
import numpy.typing as npt
import pandas as pd
from rich.progress import (
BarColumn,
Progress,
ProgressColumn,
SpinnerColumn,
TaskProgressColumn,
TextColumn,
TimeElapsedColumn,
)
from rich.text import Text
[docs]
def normalize_pixel_dimensions(
points: npt.NDArray, image_shape: tuple[int, int]
) -> npt.NDArray:
"""
Rescale pixel dimensions to a new image shape.
Args:
points (NDArray): Numpy array containing a list of points with columns width, height.
image_shape (tuple[int, int]): Image shape.
Returns:
NDArray: Numpy array containing the rescaled points.
"""
height, width = image_shape
points[:, 0] = points[:, 0] / width
points[:, 1] = points[:, 1] / height
return points
[docs]
def boxes_overlap(box1, box2) -> bool:
"""Check if two bounding boxes overlap
Args:
box1 (list[float]): List of xmin, ymin, xmax, ymax coordinates defining first box
box2 (list[float]): List of xmin, ymin, xmax, ymax coordinates defining second box
Returns:
(bool): True if the boxes overlap
"""
# Unpack coordinates
xmin1, ymin1, xmax1, ymax1 = box1
xmin2, ymin2, xmax2, ymax2 = box2
# Check for overlap
return not (xmax1 < xmin2 or xmax2 < xmin1 or ymax1 < ymin2 or ymax2 < ymin1)
[docs]
def regions_horizontal_overlapping(
node_df: pd.DataFrame, start_node: int, end_node: int
) -> bool:
"""Check if two regions are horizontally overlapping"""
start_node_row = node_df.loc[node_df["node_id"] == start_node].iloc[0]
end_node_row = node_df.loc[node_df["node_id"] == end_node].iloc[0]
left_node = (
start_node_row
if start_node_row["bbox_minc"] < end_node_row["bbox_minc"]
else end_node_row
)
right_node = (
start_node_row
if start_node_row["bbox_minc"] > end_node_row["bbox_minc"]
else end_node_row
)
left_node_max = left_node["bbox_maxc"]
right_node_min = right_node["bbox_minc"]
return left_node_max > right_node_min
[docs]
def rescale_cartesian_coordinates(
points: npt.NDArray, origin=(0, 0), scale: float = 1.0
) -> npt.NDArray:
"""
Normalize radius in polar coordinates, then convert back to cartesian to get rescaled cartesian coordinates in image dimensions.
Args:
points (NDArray): Numpy array containing a list of points.
origin (tuple[int, int]): Origin point.
scale (float): Scaling number.
Returns:
NDArray: Numpy array containing the rescaled points.
"""
# Convert the points to polar coordinates
polar_coordinates = convert_to_polar_coordinates(points, origin=origin, scale=scale)
scaled_1 = polar_coordinates[:, 0] * np.cos(polar_coordinates[:, 1])
scaled_0 = polar_coordinates[:, 0] * np.sin(polar_coordinates[:, 1])
return np.stack([scaled_0, scaled_1], axis=1)
[docs]
def convert_to_polar_coordinates(
points: npt.NDArray, origin=(0, 0), scale=1.0
) -> npt.NDArray:
"""
Convert a set of 2D points to polar coordinates with radius and angle.
Args:
points (NDArray): Numpy array containing a list of points.
origin (tuple[int, int]): Origin point.
scale (float): Scaling number.
"""
# Calculate the relative position of the points to the origin
relative_points = points - origin
# Calculate the radius and angle of the points
intermediate = np.sum(np.square(relative_points), axis=1)
radius = np.sqrt(intermediate) / scale
angle = np.arctan2(relative_points[:, 1], relative_points[:, 0])
# Stack the radius and angle into a single array
return np.stack([radius, angle], axis=1)
[docs]
def generate_graph_from_nodes(node_df: pd.DataFrame) -> nx.Graph:
"""Update a pattern graph with new node data from a DataFrame object"""
pattern_graph = nx.Graph()
for _, row in node_df.iterrows():
node_id = row["node_id"]
# Use all other columns as attributes
attributes = row.drop("node_id").to_dict()
pattern_graph.add_node(node_id, **attributes)
edge_df = (
node_df[["node_id", "centroid_1", "centroid_0"]]
.copy(deep=True)
.merge(
node_df[["node_id", "centroid_1", "centroid_0"]].copy(deep=True),
how="cross",
)
)
edge_df = edge_df.loc[edge_df["node_id_x"] < edge_df["node_id_y"]]
edge_df = edge_df.rename(
columns={"node_id_x": "start_node", "node_id_y": "end_node"}
)
if len(edge_df) == 0:
edge_df["horizontal_overlap"] = False
else:
edge_df["horizontal_overlap"] = edge_df.apply(
lambda x: regions_horizontal_overlapping(
node_df, x["start_node"], x["end_node"]
),
axis=1,
)
edge_df["weight"] = np.sqrt(
(edge_df["centroid_1_x"] - edge_df["centroid_1_y"]) ** 2
+ (edge_df["centroid_0_x"] - edge_df["centroid_0_y"]) ** 2
)
edge_df["horizontal_weight"] = np.abs(
edge_df["centroid_1_x"] - edge_df["centroid_1_y"]
)
edge_df["vertical_weight"] = np.abs(
edge_df["centroid_0_x"] - edge_df["centroid_0_y"]
)
edge_df = edge_df[
[
"start_node",
"end_node",
"weight",
"horizontal_weight",
"vertical_weight",
"horizontal_overlap",
]
].copy()
edge_df = edge_df.drop_duplicates(
subset=["start_node", "end_node"], keep="first"
).reset_index(drop=True)
pattern_graph.add_edges_from(edge_df[["start_node", "end_node"]].to_numpy())
return pattern_graph
def _make_progress(mute: bool, transient: bool) -> Progress:
"""
If `muted` is True return (nullcontext(), None),
else return (progress, progress).
Transient determines if it hides after completion.
"""
if mute:
return Progress(disable=True)
class PercentOrTotal(ProgressColumn):
"""Render either % or completed/total depending on task flags."""
_percent = TaskProgressColumn()
def render(self, task) -> Text:
if task.fields.get("show_percent", False): # 42.0
return self._percent.render(task)
if task.fields.get("show_total", True): # 12/37
return Text(f"{int(task.completed)}/{int(task.total)}") # type: ignore # noqa: PGH003
return Text("") # blank cell
class MaybeSpinner(SpinnerColumn):
"""Show the spinner only when task.fields['show_spinner'] is truthy."""
def render(self, task) -> Text:
if task.fields.get("show_spinner", True):
return super().render(task) # type: ignore # noqa: PGH003
return Text("")
return Progress(
MaybeSpinner(),
TextColumn("[bold]{task.fields[pad]}{task.description}"),
BarColumn(),
PercentOrTotal(),
TimeElapsedColumn(),
transient=transient,
refresh_per_second=30,
)
[docs]
def normalize_path(path_str: str) -> Path:
"""Normalize a file path string for use with pathlib.
This will:
1. Remove control characters and convert “smart” quotes into plain quotes.
2. Strip leading/trailing whitespace and any surrounding quotes.
3. Expand user (~) and environment variables.
4. Normalize Unicode, unify separators, and collapse “..”/“.” segments.
Args:
path_str: Raw path string copied from Windows (may contain spaces,
smart quotes, stray control chars, etc.)
Returns:
A pathlib.Path pointing to the normalized path.
"""
# 1. Drop control characters
filtered = "".join(ch for ch in path_str if unicodedata.category(ch)[0] != "C")
# 2. Convert smart quotes to plain ones
smart_quotes = {"\u201c": '"', "\u201d": '"', "\u2018": "'", "\u2019": "'"}
for smart, plain in smart_quotes.items():
filtered = filtered.replace(smart, plain)
# 3. Trim whitespace and surrounding quotes
filtered = filtered.strip()
m = re.match(r'^[\'"](.*)[\'"]$', filtered)
if m:
filtered = m.group(1)
# 4. Expand ~ and env vars
expanded = os.path.expanduser(os.path.expandvars(filtered)) # noqa: PTH111
# 5. Normalize Unicode and separators
normalized_unicode = unicodedata.normalize("NFC", expanded)
unified_sep = normalized_unicode.replace("/", os.sep)
# 6. Collapse redundant segments
final_path = os.path.normpath(unified_sep)
return Path(final_path)