diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b865f88350ea..39daf3dd7f88 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: - id: auto-walrus - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.15.15 + rev: v0.15.9 hooks: - id: ruff-check - id: ruff-format @@ -32,7 +32,7 @@ repos: - tomli - repo: https://github.com/tox-dev/pyproject-fmt - rev: v2.23.0 + rev: v2.21.0 hooks: - id: pyproject-fmt diff --git a/DIRECTORY.md b/DIRECTORY.md index daf71bab8162..ca454bd5fd82 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -471,8 +471,6 @@ * [Geometry](geometry/geometry.py) * [Graham Scan](geometry/graham_scan.py) * [Jarvis March](geometry/jarvis_march.py) - * [Ramer Douglas Peucker](geometry/ramer_douglas_peucker.py) - * [Segment Intersection](geometry/segment_intersection.py) * Tests * [Test Graham Scan](geometry/tests/test_graham_scan.py) * [Test Jarvis March](geometry/tests/test_jarvis_march.py) @@ -525,7 +523,6 @@ * [Graphs Floyd Warshall](graphs/graphs_floyd_warshall.py) * [Greedy Best First](graphs/greedy_best_first.py) * [Greedy Min Vertex Cover](graphs/greedy_min_vertex_cover.py) - * [Johnson](graphs/johnson.py) * [Kahns Algorithm Long](graphs/kahns_algorithm_long.py) * [Kahns Algorithm Topo](graphs/kahns_algorithm_topo.py) * [Karger](graphs/karger.py) @@ -546,7 +543,6 @@ * [Strongly Connected Components](graphs/strongly_connected_components.py) * [Tarjans Scc](graphs/tarjans_scc.py) * Tests - * [Test Johnson](graphs/tests/test_johnson.py) * [Test Min Spanning Tree Kruskal](graphs/tests/test_min_spanning_tree_kruskal.py) * [Test Min Spanning Tree Prim](graphs/tests/test_min_spanning_tree_prim.py) diff --git a/geometry/ramer_douglas_peucker.py b/geometry/ramer_douglas_peucker.py deleted file mode 100644 index a03bbb2e5086..000000000000 --- a/geometry/ramer_douglas_peucker.py +++ /dev/null @@ -1,184 +0,0 @@ -""" -Ramer-Douglas-Peucker polyline simplification algorithm. - -Given a sequence of 2-D points and a tolerance epsilon, the algorithm -reduces the number of points while preserving the overall shape of the curve. - -Time complexity: O(n log n) average, O(n²) worst case -Space complexity: O(n) - -References: - https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm -""" - -from __future__ import annotations - -import math - - -def _euclidean_distance( - point_a: tuple[float, float], - point_b: tuple[float, float], -) -> float: - """Return the Euclidean distance between two 2-D points. - - >>> _euclidean_distance((0.0, 0.0), (3.0, 4.0)) - 5.0 - >>> _euclidean_distance((1.0, 1.0), (1.0, 1.0)) - 0.0 - """ - return math.hypot(point_b[0] - point_a[0], point_b[1] - point_a[1]) - - -def _perpendicular_distance( - point: tuple[float, float], - line_start: tuple[float, float], - line_end: tuple[float, float], -) -> float: - """Return the distance from *point* to the line **segment** between - *line_start* and *line_end*. - - When the perpendicular projection of *point* onto the infinite line falls - within the segment, this equals the perpendicular distance to that line. - When the projection falls outside the segment, the distance to the nearest - endpoint is returned instead (projection clamped to [0, 1]). - - This is the correct distance measure for the Ramer-Douglas-Peucker - algorithm: using the infinite-line distance can incorrectly discard points - whose projection lies beyond a segment endpoint. - - >>> _perpendicular_distance((4.0, 0.0), (0.0, 0.0), (0.0, 3.0)) - 4.0 - >>> # order of line_start and line_end does not affect the result - >>> _perpendicular_distance((4.0, 0.0), (0.0, 3.0), (0.0, 0.0)) - 4.0 - >>> _perpendicular_distance((4.0, 1.0), (0.0, 1.0), (0.0, 4.0)) - 4.0 - >>> _perpendicular_distance((2.0, 1.0), (-2.0, 1.0), (-2.0, 4.0)) - 4.0 - >>> # projection falls outside the segment; distance to nearest endpoint - >>> round(_perpendicular_distance((0.0, 2.0), (1.0, 0.0), (3.0, 0.0)), 6) - 2.236068 - """ - px, py = point - ax, ay = line_start - bx, by = line_end - dx, dy = bx - ax, by - ay - seg_len_sq = dx * dx + dy * dy - if seg_len_sq == 0.0: - # line_start and line_end coincide; fall back to point-to-point distance - return _euclidean_distance(point, line_start) - # Project point onto the segment line, then clamp t to [0, 1] so the - # nearest point is always on the segment rather than the infinite line. - t = max(0.0, min(1.0, ((px - ax) * dx + (py - ay) * dy) / seg_len_sq)) - nearest_x = ax + t * dx - nearest_y = ay + t * dy - return math.hypot(px - nearest_x, py - nearest_y) - - -def ramer_douglas_peucker( - pts: list[tuple[float, float]], - epsilon: float, -) -> list[tuple[float, float]]: - """Simplify a polyline using the Ramer-Douglas-Peucker algorithm. - - Given a sequence of 2-D points and a maximum allowable deviation - *epsilon* (>= 0), returns a simplified list of points such that no - discarded point is farther than *epsilon* from the simplified polyline. - - Parameters - ---------- - pts: - Ordered sequence of ``(x, y)`` points describing the polyline. - epsilon: - Maximum allowable distance of any discarded point from the - simplified polyline. Must be non-negative. - - Returns - ------- - list[tuple[float, float]] - Simplified list of ``(x, y)`` points. The first and last points of - *pts* are always preserved. - - Raises - ------ - ValueError - If *epsilon* is negative. - - References - ---------- - https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm - - Examples - -------- - >>> ramer_douglas_peucker([], epsilon=1.0) - [] - >>> ramer_douglas_peucker([(0.0, 0.0)], epsilon=1.0) - [(0.0, 0.0)] - >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.0)], epsilon=1.0) - [(0.0, 0.0), (1.0, 0.0)] - >>> # middle point is within epsilon - it is discarded - >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.1), (2.0, 0.0)], epsilon=0.5) - [(0.0, 0.0), (2.0, 0.0)] - >>> # middle point exceeds epsilon - it is kept - >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 1.0), (2.0, 0.0)], epsilon=0.5) - [(0.0, 0.0), (1.0, 1.0), (2.0, 0.0)] - >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.5), (2.0, 0.0)], epsilon=-1.0) - Traceback (most recent call last): - ... - ValueError: epsilon must be non-negative, got -1.0 - """ - if epsilon < 0: - msg = f"epsilon must be non-negative, got {epsilon!r}" - raise ValueError(msg) - - if len(pts) < 3: - return list(pts) - - # --------------------------------------------------------------------------- - # Iterative, stack-based implementation. - # - # The naive recursive approach copies sublists at every level via slicing - # (pts[:max_index+1] / pts[max_index:]), which is O(n) per call and makes - # the overall algorithm O(n²) in memory even for well-balanced splits. An - # explicit stack operating on index ranges avoids all copying and also - # eliminates the risk of hitting Python's recursion limit for long polylines. - # --------------------------------------------------------------------------- - n = len(pts) - - # keep[i] is True when pts[i] must appear in the output. - keep: list[bool] = [False] * n - keep[0] = True - keep[-1] = True - - # Stack of (start_index, end_index) pairs still to be examined. - stack: list[tuple[int, int]] = [(0, n - 1)] - - while stack: - start, end = stack.pop() - if end - start < 2: - # Only one interior candidate at most; nothing to split further. - continue - - # Find the interior point with the greatest distance to the segment. - max_dist = 0.0 - max_index = start - for i in range(start + 1, end): - dist = _perpendicular_distance(pts[i], pts[start], pts[end]) - if dist > max_dist: - max_dist = dist - max_index = i - - if max_dist > epsilon: - keep[max_index] = True - stack.append((start, max_index)) - stack.append((max_index, end)) - # else: all interior points are within epsilon; discard them all. - - return [pts[i] for i in range(n) if keep[i]] - - -if __name__ == "__main__": - import doctest - - doctest.testmod() diff --git a/geometry/segment_intersection.py b/geometry/segment_intersection.py deleted file mode 100644 index e2e2e10f1e4d..000000000000 --- a/geometry/segment_intersection.py +++ /dev/null @@ -1,112 +0,0 @@ -""" -Given two line segments, determine whether they intersect. - -This is based on the algorithm described in Introduction to Algorithms -(CLRS), Chapter 33. - -Reference: - - https://en.wikipedia.org/wiki/Line%E2%80%93line_intersection - - https://en.wikipedia.org/wiki/Orientation_(geometry) -""" - -from __future__ import annotations - -from typing import NamedTuple - - -class Point(NamedTuple): - """A point in 2D space. - - >>> Point(0, 0) - Point(x=0, y=0) - >>> Point(1, -3) - Point(x=1, y=-3) - """ - - x: float - y: float - - -def direction(pivot: Point, target: Point, query: Point) -> float: - """Return the cross product of vectors (pivot->query) and (pivot->target). - - The sign of the result encodes the orientation of the ordered triple - (pivot, target, query): - - Negative -> counter-clockwise (left turn) - - Positive -> clockwise (right turn) - - Zero -> collinear - - >>> direction(Point(0, 0), Point(1, 0), Point(0, 1)) - -1 - >>> direction(Point(0, 0), Point(0, 1), Point(1, 0)) - 1 - >>> direction(Point(0, 0), Point(1, 1), Point(2, 2)) - 0 - """ - return (query.x - pivot.x) * (target.y - pivot.y) - (target.x - pivot.x) * ( - query.y - pivot.y - ) - - -def on_segment(seg_start: Point, seg_end: Point, point: Point) -> bool: - """Check whether *point*, known to be collinear with the segment, lies on it. - - >>> on_segment(Point(0, 0), Point(4, 4), Point(2, 2)) - True - >>> on_segment(Point(0, 0), Point(4, 4), Point(5, 5)) - False - >>> on_segment(Point(0, 0), Point(4, 0), Point(2, 0)) - True - """ - return min(seg_start.x, seg_end.x) <= point.x <= max( - seg_start.x, seg_end.x - ) and min(seg_start.y, seg_end.y) <= point.y <= max(seg_start.y, seg_end.y) - - -def segments_intersect(p1: Point, p2: Point, p3: Point, p4: Point) -> bool: - """Return True if line segment p1p2 intersects line segment p3p4. - - Uses the CLRS cross-product / orientation method. Handles both the - general case (proper crossing) and degenerate cases where one endpoint - lies exactly on the other segment. - - >>> segments_intersect(Point(0, 0), Point(2, 2), Point(0, 2), Point(2, 0)) - True - >>> segments_intersect(Point(0, 0), Point(2, 2), Point(1, 1), Point(3, 3)) - True - >>> segments_intersect(Point(0, 0), Point(1, 0), Point(2, 0), Point(3, 0)) - False - >>> segments_intersect(Point(0, 0), Point(1, 1), Point(1, 0), Point(2, 1)) - False - >>> segments_intersect(Point(0, 0), Point(1, 1), Point(0, 1), Point(0, 2)) - False - >>> segments_intersect(Point(0, 0), Point(1, 0), Point(1, 0), Point(2, 0)) - True - """ - d1 = direction(p3, p4, p1) - d2 = direction(p3, p4, p2) - d3 = direction(p1, p2, p3) - d4 = direction(p1, p2, p4) - - if ((d1 < 0 < d2) or (d2 < 0 < d1)) and ((d3 < 0 < d4) or (d4 < 0 < d3)): - return True - - if d1 == 0 and on_segment(p3, p4, p1): - return True - if d2 == 0 and on_segment(p3, p4, p2): - return True - if d3 == 0 and on_segment(p1, p2, p3): - return True - return d4 == 0 and on_segment(p1, p2, p4) - - -if __name__ == "__main__": - import doctest - - doctest.testmod() - - print("Enter four points as 'x y' pairs (one per line):") - points = [Point(*map(float, input().split())) for _ in range(4)] - p1, p2, p3, p4 = points - result = segments_intersect(p1, p2, p3, p4) - print(1 if result else 0) diff --git a/graphs/johnson.py b/graphs/johnson.py deleted file mode 100644 index 6306ab5f8654..000000000000 --- a/graphs/johnson.py +++ /dev/null @@ -1,118 +0,0 @@ -import heapq -from collections.abc import Hashable - -Node = Hashable -edge = tuple[Node, Node, float] -adjacency = dict[Node, list[tuple[Node, float]]] - - -def _collect_nodes_and_edges(graph: adjacency) -> tuple[list[Node], list[edge]]: - nodes = set() - edges: list[edge] = [] - for u, neighbors in graph.items(): - nodes.add(u) - for v, w in neighbors: - nodes.add(v) - edges.append((u, v, w)) - return list(nodes), edges - - -def _bellman_ford(nodes: list[Node], edges: list[edge]) -> dict[Node, float]: - """ - Bellman-Ford relaxation to compute potentials h[v] for all vertices. - Raises ValueError if a negative weight cycle exists. - """ - dist: dict[Node, float] = dict.fromkeys(nodes, 0.0) - n = len(nodes) - - for _ in range(n - 1): - updated = False - for u, v, w in edges: - if dist[u] + w < dist[v]: - dist[v] = dist[u] + w - updated = True - if not updated: - break - else: - for u, v, w in edges: - if dist[u] + w < dist[v]: - raise ValueError("Negative weight cycle detected") - return dist - - -def _dijkstra( - start: Node, - nodes: list[Node], - graph: adjacency, - potentials: dict[Node, float], -) -> dict[Node, float]: - """ - Dijkstra over reweighted graph, using potentials h to make weights non-negative. - Returns distances from start in the reweighted space. - """ - inf = float("inf") - dist: dict[Node, float] = dict.fromkeys(nodes, inf) - dist[start] = 0.0 - heap: list[tuple[float, Node]] = [(0.0, start)] - - while heap: - d_u, u = heapq.heappop(heap) - if d_u > dist[u]: - continue - for v, w in graph.get(u, []): - w_prime = w + potentials[u] - potentials[v] - if w_prime < 0: - raise ValueError( - "Negative edge weight after reweighting: numeric error" - ) - new_dist = d_u + w_prime - if new_dist < dist[v]: - dist[v] = new_dist - heapq.heappush(heap, (new_dist, v)) - return dist - - -def johnson(graph: adjacency) -> dict[Node, dict[Node, float]]: - """ - Compute all-pairs shortest paths using Johnson's algorithm. - - Reference: - https://en.wikipedia.org/wiki/Johnson%27s_algorithm - - Args: - graph: adjacency list {u: [(v, weight), ...], ...} - - Returns: - dict of dicts: dist[u][v] = shortest distance from u to v - - Raises: - ValueError: if a negative weight cycle is detected - - Example: - >>> g = { - ... 0: [(1, 3), (2, 8), (4, -4)], - ... 1: [(3, 1), (4, 7)], - ... 2: [(1, 4)], - ... 3: [(0, 2), (2, -5)], - ... 4: [(3, 6)], - ... } - >>> round(johnson(g)[0][3], 2) - 2.0 - """ - nodes, edges = _collect_nodes_and_edges(graph) - potentials = _bellman_ford(nodes, edges) - - all_pairs: dict[Node, dict[Node, float]] = {} - inf = float("inf") - for s in nodes: - dist_reweighted = _dijkstra(s, nodes, graph, potentials) - dists_orig: dict[Node, float] = {} - for v in nodes: - d_prime = dist_reweighted[v] - if d_prime < inf: - dists_orig[v] = d_prime - potentials[s] + potentials[v] - else: - dists_orig[v] = inf - all_pairs[s] = dists_orig - - return all_pairs diff --git a/graphs/tests/test_johnson.py b/graphs/tests/test_johnson.py deleted file mode 100644 index e149aac85d0f..000000000000 --- a/graphs/tests/test_johnson.py +++ /dev/null @@ -1,24 +0,0 @@ -import math - -import pytest - -from graphs.johnson import johnson - - -def test_johnson_basic(): - g = { - 0: [(1, 3), (2, 8), (4, -4)], - 1: [(3, 1), (4, 7)], - 2: [(1, 4)], - 3: [(0, 2), (2, -5)], - 4: [(3, 6)], - } - dist = johnson(g) - assert math.isclose(dist[0][3], 2.0, abs_tol=1e-9) - assert math.isclose(dist[3][2], -5.0, abs_tol=1e-9) - - -def test_johnson_negative_cycle(): - g2 = {0: [(1, 1)], 1: [(0, -3)]} - with pytest.raises(ValueError): - johnson(g2) diff --git a/pyproject.toml b/pyproject.toml index 0b0c3f5cfda4..34e099a46435 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -169,14 +169,14 @@ skip = """\ python_version = "3.14" [tool.pytest] +ini_options.markers = [ + "mat_ops: mark a test as utilizing matrix operations.", +] ini_options.addopts = [ "--durations=10", "--doctest-modules", "--showlocals", ] -ini_options.markers = [ - "mat_ops: mark a test as utilizing matrix operations.", -] [tool.coverage] report.omit = [ diff --git a/searches/binary_search.py b/searches/binary_search.py index bec87b3c5aec..6907eda477b8 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -196,22 +196,28 @@ def binary_search(sorted_collection: list[int], item: int) -> int: 1 >>> binary_search([0, 5, 7, 10, 15], 6) -1 + >>> binary_search([1, 2, 2, 2, 3], 2) + 1 + >>> binary_search([1, 1, 1, 2, 3], 1) + 0 """ if any(a > b for a, b in pairwise(sorted_collection)): raise ValueError("sorted_collection must be sorted in ascending order") left = 0 right = len(sorted_collection) - 1 + result = -1 while left <= right: midpoint = left + (right - left) // 2 current_item = sorted_collection[midpoint] if current_item == item: - return midpoint + result = midpoint + right = midpoint - 1 elif item < current_item: right = midpoint - 1 else: left = midpoint + 1 - return -1 + return result def binary_search_std_lib(sorted_collection: list[int], item: int) -> int: diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index 2eeed88b7399..41ab4a10a87b 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -1,7 +1,4 @@ -from typing import Any - - -def binary_search(lst: list[Any], item: Any, start: int, end: int) -> int: +def binary_search(lst, item, start, end): if start == end: return start if lst[start] > item else start + 1 if start > end: @@ -16,7 +13,7 @@ def binary_search(lst: list[Any], item: Any, start: int, end: int) -> int: return mid -def insertion_sort(lst: list[Any]) -> list[Any]: +def insertion_sort(lst): length = len(lst) for index in range(1, length): @@ -27,7 +24,7 @@ def insertion_sort(lst: list[Any]) -> list[Any]: return lst -def merge(left: list[Any], right: list[Any]) -> list[Any]: +def merge(left, right): if not left: return right @@ -40,7 +37,7 @@ def merge(left: list[Any], right: list[Any]) -> list[Any]: return [right[0], *merge(left, right[1:])] -def tim_sort(lst: list[Any] | tuple[Any, ...] | str) -> list[Any]: +def tim_sort(lst): """ >>> tim_sort("Python") ['P', 'h', 'n', 'o', 't', 'y'] @@ -56,7 +53,7 @@ def tim_sort(lst: list[Any] | tuple[Any, ...] | str) -> list[Any]: length = len(lst) runs, sorted_runs = [], [] new_run = [lst[0]] - sorted_array: list[Any] = [] + sorted_array = [] i = 1 while i < length: if lst[i] < lst[i - 1]: