"""
tests/test_build_scripts.py
===========================
Unit tests for pure helper functions in build_* scripts.

Only tests functions that:
  - have no I/O side-effects (no file reads, network, DB)
  - are imported directly from the module

No fixtures, no mocks, no integration markers — all tests run by default.
"""

from __future__ import annotations

import sys
import os
import math

import pandas as pd
import pytest

# ---------------------------------------------------------------------------
# build_dvf_historique — CSV conversion helpers
# ---------------------------------------------------------------------------

from build_dvf_historique import (
    _parse_date,
    _parse_float,
    _build_code_commune,
    _build_parcel_id,
    _make_id,
)


class TestParseDateDvf:
    def test_standard(self):
        assert _parse_date("15/03/2022") == "2022-03-15"

    def test_leading_zeros(self):
        assert _parse_date("01/01/2020") == "2020-01-01"

    def test_invalid_passthrough(self):
        assert _parse_date("not-a-date") == "not-a-date"

    def test_empty_passthrough(self):
        assert _parse_date("  ") == ""


class TestParseFloatDvf:
    def test_comma_decimal(self):
        assert _parse_float("1 234,50") == "1234.50"

    def test_already_dot(self):
        assert _parse_float("3500.00") == "3500.00"

    def test_spaces_stripped(self):
        assert _parse_float("  500 ") == "500"


class TestBuildCodeCommune:
    def test_standard(self):
        assert _build_code_commune("59", "350") == "59350"

    def test_zero_padding(self):
        assert _build_code_commune("59", "9") == "59009"


class TestBuildParcelId:
    def test_standard(self):
        pid = _build_parcel_id("59", "350", "", "AK", "216")
        assert pid == "59350000AK0216"

    def test_single_char_section(self):
        # zfill pads on the LEFT: "B" → "0B", giving "59350" + "000" + "0B" + "2378"
        pid = _build_parcel_id("59", "350", "", "B", "2378")
        assert pid == "593500000B2378"

    def test_with_prefixe(self):
        pid = _build_parcel_id("59", "350", "001", "AK", "216")
        assert pid == "59350001AK0216"

    def test_length_always_14(self):
        pid = _build_parcel_id("59", "350", "", "AK", "216")
        assert len(pid) == 14


class TestMakeId:
    def test_basic(self):
        result = _make_id("59350", "01/01/2022", "1", "150000")
        assert "59350" in result
        assert "01012022" in result

    def test_different_vf_produces_different_ids(self):
        id1 = _make_id("59350", "01/01/2022", "1", "100000")
        id2 = _make_id("59350", "01/01/2022", "1", "200000")
        assert id1 != id2

    def test_same_acte_same_id(self):
        id1 = _make_id("59350", "15/06/2023", "3", "250000")
        id2 = _make_id("59350", "15/06/2023", "3", "250000")
        assert id1 == id2


# ---------------------------------------------------------------------------
# build_parcel_centroids_59 — centroid computation
# ---------------------------------------------------------------------------

from build_parcel_centroids_59 import _centroid


class TestCentroid:
    def test_polygon(self):
        # Non-closed 4-point ring — function averages all listed coords
        ring = [[0.0, 0.0], [1.0, 0.0], [1.0, 1.0], [0.0, 1.0]]
        geom = {"type": "Polygon", "coordinates": [ring]}
        lat, lon = _centroid(geom)
        assert abs(lat - 0.5) < 0.01
        assert abs(lon - 0.5) < 0.01

    def test_multipolygon_uses_largest_ring(self):
        # small ring: 4 vertices — large ring: 6 vertices → max() picks large
        small = [[0.0, 0.0], [0.1, 0.0], [0.1, 0.1], [0.0, 0.0]]
        large = [[2.0, 2.0], [3.0, 2.0], [4.0, 2.0], [4.0, 4.0], [3.0, 4.0], [2.0, 2.0]]
        geom = {"type": "MultiPolygon", "coordinates": [[small], [large]]}
        lat, lon = _centroid(geom)
        # centroid of large ring is well above 1.0
        assert lat > 1.0

    def test_unknown_type_returns_none(self):
        assert _centroid({"type": "Point", "coordinates": [1.0, 2.0]}) is None

    def test_empty_coords_returns_none(self):
        assert _centroid({"type": "Polygon", "coordinates": []}) is None


# ---------------------------------------------------------------------------
# build_flood_zones — tile generator + GML parser
# ---------------------------------------------------------------------------

from build_flood_zones import _tiles, _parse_pos_list


class TestTiles:
    def test_count_for_small_bbox(self):
        # 1x1 degree bbox with 0.5 size → 4 tiles
        tiles = _tiles(0.0, 0.0, 1.0, 1.0, 0.5)
        assert len(tiles) == 4

    def test_tile_bounds_within_bbox(self):
        tiles = _tiles(0.0, 0.0, 2.0, 1.0, 1.0)
        for (min_lng, min_lat, max_lng, max_lat) in tiles:
            assert min_lng >= 0.0
            assert max_lng <= 2.0
            assert min_lat >= 0.0
            assert max_lat <= 1.0

    def test_empty_bbox(self):
        assert _tiles(1.0, 1.0, 0.0, 0.0, 0.5) == []


class TestParsePosListFlood:
    def test_lon_lat_order(self):
        # lon < 10, lat > 40 — should NOT swap
        result = _parse_pos_list("2.5 50.5 3.0 51.0")
        assert result == [[2.5, 50.5], [3.0, 51.0]]

    def test_lat_lon_swap(self):
        # first value > 10 indicates lat-lon order (WFS) → swap to lon-lat (GeoJSON)
        result = _parse_pos_list("50.5 2.5 51.0 3.0")
        assert result[0] == [2.5, 50.5]
        assert result[1] == [3.0, 51.0]


# ---------------------------------------------------------------------------
# build_poi — OSM element classification
# ---------------------------------------------------------------------------

from build_poi import _classify, _name


class TestClassifyPoi:
    def test_townhall(self):
        assert _classify({"amenity": "townhall"}) == "mairie"

    def test_tram(self):
        assert _classify({"railway": "tram_stop"}) == "tram"

    def test_metro(self):
        assert _classify({"railway": "station", "station": "subway"}) == "metro"

    def test_gare(self):
        assert _classify({"railway": "station", "train": "yes"}) == "gare"

    def test_unknown(self):
        assert _classify({"amenity": "cafe"}) is None


class TestNamePoi:
    def test_name_field(self):
        assert _name({"name": "Gare de Lille"}, "fallback") == "Gare de Lille"

    def test_fallback_when_no_name(self):
        assert _name({}, "fallback") == "fallback"

    def test_name_fr_second_choice(self):
        assert _name({"name:fr": "Mairie"}, "x") == "Mairie"

    def test_official_name_third_choice(self):
        assert _name({"official_name": "Hôtel de Ville"}, "x") == "Hôtel de Ville"

    def test_strips_whitespace(self):
        assert _name({"name": "  Roubaix  "}, "x") == "Roubaix"


# ---------------------------------------------------------------------------
# build_prix_evolution — stats and evolution helpers
# ---------------------------------------------------------------------------

from build_prix_evolution import normaliser, calculer_stats, ajouter_evolution


class TestNormaliserPrix:
    def test_ligatures(self):
        assert normaliser("Marcq-en-Barœul") == "MARCQ EN BAROEUL"

    def test_accents_removed(self):
        assert normaliser("Villeneuve-d'Ascq") == "VILLENEUVE D ASCQ"

    def test_already_normalised(self):
        assert normaliser("LILLE") == "LILLE"


class TestCalculerStats:
    def _make_txs(self, annee, cp, commune, type_local, prix_m2s, surfaces):
        result = []
        for pm2, surf in zip(prix_m2s, surfaces):
            result.append({
                "annee": annee,
                "code_postal": cp,
                "commune": commune,
                "type_local": type_local,
                "prix_m2": pm2,
                "valeur": pm2 * surf,
                "surface": surf,
            })
        return result

    def test_single_group(self):
        txs = self._make_txs(2024, "59000", "Lille", "Appartement", [3000.0, 4000.0], [50.0, 60.0])
        stats = calculer_stats(txs)
        assert len(stats) == 1
        assert stats[0]["nb_transactions"] == 2
        assert stats[0]["prix_m2_median"] == 3500.0

    def test_two_groups(self):
        txs = (
            self._make_txs(2024, "59000", "Lille", "Appartement", [3000.0], [50.0]) +
            self._make_txs(2024, "59000", "Lille", "Maison",      [2500.0], [100.0])
        )
        stats = calculer_stats(txs)
        assert len(stats) == 2


class TestAjouterEvolution:
    def _make_stat(self, cp, type_local, annee, median):
        return {
            "code_postal": cp,
            "type_local": type_local,
            "annee": annee,
            "prix_m2_median": median,
            "nom_commune": "Lille",
            "nb_transactions": 5,
            "prix_m2_moyen": median,
            "prix_moyen": 0.0,
            "prix_median": 0.0,
            "surface_moyenne": 50.0,
        }

    def test_first_year_is_none(self):
        stats = [self._make_stat("59000", "Appartement", 2021, 3000.0)]
        result = ajouter_evolution(stats)
        assert result[0]["evolution_m2_pct"] is None

    def test_evolution_computed(self):
        stats = [
            self._make_stat("59000", "Appartement", 2021, 3000.0),
            self._make_stat("59000", "Appartement", 2022, 3300.0),
        ]
        result = ajouter_evolution(stats)
        ev = next(r["evolution_m2_pct"] for r in result if r["annee"] == 2022)
        assert abs(ev - 10.0) < 0.01

    def test_type_not_cross_contaminated(self):
        stats = [
            self._make_stat("59000", "Appartement", 2021, 3000.0),
            self._make_stat("59000", "Maison",       2022, 2000.0),
        ]
        result = ajouter_evolution(stats)
        # Maison 2022 has no Maison 2021 → None
        maison = next(r for r in result if r["type_local"] == "Maison")
        assert maison["evolution_m2_pct"] is None


# ---------------------------------------------------------------------------
# build_securite_nord — danger score formula
# ---------------------------------------------------------------------------

from build_securite_nord import compute_score, OVERRIDES, DENSITE_BASE, ROLE_BONUS


class TestComputeScore:
    def _row(self, codgeo, densite, role, zone="ZPN"):
        return pd.Series({
            "CODGEO":        codgeo,
            "typo_densite":  densite,
            "CATEAAV2020":   role,
            "zone_competence": zone,
        })

    def test_override_takes_priority(self):
        # Roubaix is overridden to 10
        row = self._row("59512", "Grands centres urbains", "Commune-Centre")
        assert compute_score(row, "typo_densite") == 10

    def test_formula_no_override(self):
        row = self._row("00000", "Petites villes", "Commune-Centre", "ZPN")
        # base=3, bonus=2, adj=0 → 5
        assert compute_score(row, "typo_densite") == 5

    def test_zgn_subtracts_one(self):
        row = self._row("00000", "Petites villes", "Commune-Centre", "ZGN")
        # base=3, bonus=2, adj=-1 → 4
        assert compute_score(row, "typo_densite") == 4

    def test_clamp_min(self):
        row = self._row("00000", "Rural à habitat très dispersé", "Commune hors attraction des villes", "ZGN")
        # base=1, bonus=0, adj=-1 → 0 → clamped to 1
        assert compute_score(row, "typo_densite") >= 1

    def test_clamp_max(self):
        row = self._row("00000", "Grands centres urbains", "Commune-Centre", "ZPN")
        # base=6, bonus=2 → 8, well within [1,9]
        result = compute_score(row, "typo_densite")
        assert result <= 9


# ---------------------------------------------------------------------------
# build_seveso — department extraction + filter
# ---------------------------------------------------------------------------

from build_seveso import _to_dept, _filtrer


class TestToDept:
    def test_mainland_2digits(self):
        assert _to_dept("59000") == "59"
        assert _to_dept("31000") == "31"

    def test_overseas_3digits(self):
        assert _to_dept("97100") == "971"
        assert _to_dept("97600") == "976"

    def test_strips_spaces(self):
        assert _to_dept("  59000 ") == "59"


class TestFiltrerSeveso:
    def _site(self, seveso="Seveso seuil haut", lat=50.5, lng=3.0, cp="59000"):
        return {
            "raisonSociale": "ACME",
            "commune": "Lille",
            "codePostal": cp,
            "adresse1": "1 rue",
            "statutSeveso": seveso,
            "etatActivite": "En activité",
            "latitude": lat,
            "longitude": lng,
        }

    def test_seveso_haut_kept(self):
        rows = _filtrer([self._site("Seveso seuil haut")])
        assert len(rows) == 1

    def test_seveso_bas_kept(self):
        rows = _filtrer([self._site("Seveso seuil bas")])
        assert len(rows) == 1

    def test_non_seveso_filtered(self):
        rows = _filtrer([self._site("non seveso")])
        assert len(rows) == 0

    def test_missing_gps_filtered(self):
        site = self._site()
        site["latitude"] = None
        rows = _filtrer([site])
        assert len(rows) == 0

    def test_output_tuple_has_9_fields(self):
        rows = _filtrer([self._site()])
        assert len(rows[0]) == 9


# ---------------------------------------------------------------------------
# build_noise_zones — Overpass element parser
# ---------------------------------------------------------------------------

from build_noise_zones import _parse


class TestParseNoise:
    def _make_way(self, highway="motorway", coords=None):
        if coords is None:
            coords = [{"lon": 3.0, "lat": 50.5}, {"lon": 3.1, "lat": 50.6}]
        return {
            "type": "way",
            "tags": {"highway": highway},
            "geometry": coords,
        }

    def test_motorway_parsed(self):
        rows = _parse([self._make_way("motorway")])
        assert len(rows) == 1
        assert rows[0][0] == "routier"
        assert rows[0][1] == 75  # LDEN

    def test_unknown_highway_skipped(self):
        rows = _parse([self._make_way("residential")])
        assert len(rows) == 0

    def test_railway_parsed(self):
        way = {
            "type": "way",
            "tags": {"railway": "rail"},
            "geometry": [{"lon": 3.0, "lat": 50.5}, {"lon": 3.1, "lat": 50.6}],
        }
        rows = _parse([way])
        assert len(rows) == 1
        assert rows[0][0] == "ferroviaire"

    def test_single_coord_skipped(self):
        rows = _parse([self._make_way("motorway", [{"lon": 3.0, "lat": 50.5}])])
        assert len(rows) == 0

    def test_non_way_skipped(self):
        rows = _parse([{"type": "node", "tags": {}, "geometry": []}])
        assert len(rows) == 0


# ---------------------------------------------------------------------------
# build_pression_iris — normalisation + safe_merge
# ---------------------------------------------------------------------------

from build_pression_iris import _minmax, _safe_merge


class TestMinmax:
    def test_uniform_returns_half(self):
        s = pd.Series([5.0, 5.0, 5.0])
        result = _minmax(s)
        assert (result == 0.5).all()

    def test_range_zero_to_one(self):
        s = pd.Series([0.0, 50.0, 100.0])
        result = _minmax(s)
        assert result.min() == pytest.approx(0.0)
        assert result.max() == pytest.approx(1.0)

    def test_midpoint(self):
        s = pd.Series([0.0, 10.0])
        result = _minmax(s)
        assert result.iloc[0] == pytest.approx(0.0)
        assert result.iloc[1] == pytest.approx(1.0)


class TestSafeMerge:
    def test_normal_merge(self):
        left  = pd.DataFrame({"code_iris": ["A", "B"], "val": [1, 2]})
        right = pd.DataFrame({"code_iris": ["A"], "extra": [99]})
        result = _safe_merge(left, right, on="code_iris")
        assert "extra" in result.columns
        assert result.loc[result["code_iris"] == "A", "extra"].iloc[0] == 99

    def test_empty_right_returns_left(self):
        left  = pd.DataFrame({"code_iris": ["A"], "val": [1]})
        right = pd.DataFrame()
        result = _safe_merge(left, right, on="code_iris")
        assert list(result.columns) == ["code_iris", "val"]

    def test_missing_key_returns_left(self):
        left  = pd.DataFrame({"code_iris": ["A"], "val": [1]})
        right = pd.DataFrame({"other_col": ["A"]})
        result = _safe_merge(left, right, on="code_iris")
        assert "other_col" not in result.columns


# ---------------------------------------------------------------------------
# build_mutations_iris — IRIS price evolution
# ---------------------------------------------------------------------------

from build_mutations_iris import calculer_prix_evolution_iris


class TestCalculerPrixEvolutionIris:
    def _make_txs(self, code_iris, type_local, annee, prix_m2s):
        return [
            {
                "code_iris": code_iris,
                "nom_iris": "Test IRIS",
                "nom_commune": "Lille",
                "type_local": type_local,
                "annee": annee,
                "prix_m2": p,
            }
            for p in prix_m2s
        ]

    def test_below_min_transactions_excluded(self):
        # MIN_TRANSACTIONS = 3; only 2 supplied
        txs = self._make_txs("592710101", "Appartement", 2024, [3000.0, 3500.0])
        rows = calculer_prix_evolution_iris(txs)
        assert len(rows) == 0

    def test_enough_transactions(self):
        txs = self._make_txs("592710101", "Appartement", 2024, [3000.0, 3500.0, 4000.0])
        rows = calculer_prix_evolution_iris(txs)
        assert len(rows) == 1
        assert rows[0]["prix_m2_median"] == pytest.approx(3500.0)

    def test_evolution_yoy(self):
        txs = (
            self._make_txs("592710101", "Appartement", 2023, [3000.0, 3000.0, 3000.0]) +
            self._make_txs("592710101", "Appartement", 2024, [3300.0, 3300.0, 3300.0])
        )
        rows = calculer_prix_evolution_iris(txs)
        row_2024 = next(r for r in rows if r["annee"] == 2024)
        assert row_2024["evolution_m2_pct"] == pytest.approx(10.0)

    def test_no_iris_code_excluded(self):
        txs = [{"code_iris": None, "type_local": "Maison", "annee": 2024, "prix_m2": 2000.0}]
        rows = calculer_prix_evolution_iris(txs)
        assert len(rows) == 0


# ---------------------------------------------------------------------------
# build_backcast_iris — backcast computation
# ---------------------------------------------------------------------------

from build_backcast_iris import _compute_backcast


class TestComputeBackcast:
    def test_fills_missing_year(self):
        # Observed: IRIS "592710101" Appartement 2022 → 3000
        # Commune:  59271 Appartement 2021 → 2800, 2022 → 3000
        observed = {("592710101", "Appartement", 2022): 3000.0}
        commune_prices = {
            ("59271", "Appartement", 2021): 2800.0,
            ("59271", "Appartement", 2022): 3000.0,
        }
        iris_meta = {"592710101": {"nom_iris": "Test", "nom_commune": "Lille"}}

        rows = _compute_backcast(observed, commune_prices, iris_meta)
        # Should produce a backcast row for 2021 (missing) but not 2022 (observed)
        backcasted_years = [r["annee"] for r in rows]
        assert 2021 in backcasted_years
        assert 2022 not in backcasted_years

    def test_ratio_applied_correctly(self):
        # ratio = iris(3000) / commune(3000) = 1.0 → backcast = commune * 1.0
        observed = {("592710101", "Appartement", 2022): 3000.0}
        commune_prices = {
            ("59271", "Appartement", 2021): 2500.0,
            ("59271", "Appartement", 2022): 3000.0,
        }
        iris_meta = {"592710101": {"nom_iris": "T", "nom_commune": "L"}}
        rows = _compute_backcast(observed, commune_prices, iris_meta)
        row_2021 = next(r for r in rows if r["annee"] == 2021)
        assert row_2021["prix_m2_median"] == pytest.approx(2500.0, rel=0.01)

    def test_no_commune_price_no_output(self):
        observed = {("592710101", "Appartement", 2022): 3000.0}
        # No commune prices → ratio cannot be computed → no backcast rows
        rows = _compute_backcast(observed, {}, {"592710101": {"nom_iris": "", "nom_commune": ""}})
        assert len(rows) == 0


# ---------------------------------------------------------------------------
# build_securite_lille — safety: main() exits cleanly when source is absent
# ---------------------------------------------------------------------------

from build_securite_lille import main as securite_lille_main


def test_securite_lille_missing_source_exits(tmp_path, monkeypatch):
    """main() must call sys.exit(1) when the source Excel is not found."""
    # Point _SRC to a non-existent path
    import build_securite_lille as mod
    monkeypatch.setattr(mod, "_SRC", str(tmp_path / "nonexistent.xlsx"))
    with pytest.raises(SystemExit) as exc_info:
        securite_lille_main()
    assert exc_info.value.code == 1