Skip to main content

tp_lib_core/path/
viterbi.rs

1//! Log-space Viterbi algorithm for HMM-based map matching
2//!
3//! Implements the Viterbi algorithm (Newson & Krumm, 2009) to decode the most
4//! probable sequence of netelements given a sequence of GNSS observations and
5//! per-position candidate sets with emission probabilities.
6//!
7//! The algorithm operates in log-space to avoid numerical underflow on long
8//! sequences.
9
10use crate::errors::ProjectionError;
11use crate::models::{AssociatedNetElement, Netelement};
12use crate::path::candidate::CandidateNetElement;
13use crate::path::graph::{
14    cached_shortest_path_distance, shortest_path_route, NetelementSide, ShortestPathCache,
15};
16use crate::path::probability::{calculate_transition_probability, is_near_netelement_edge};
17use crate::path::PathConfig;
18use geo::{HaversineDistance, Point};
19use petgraph::graph::{DiGraph, NodeIndex};
20use petgraph::visit::EdgeRef;
21use serde::{Deserialize, Serialize};
22use std::collections::HashMap;
23
24/// Result of the Viterbi decoding.
25///
26/// Contains the decoded path as a single subsequence for continuous GNSS
27/// input (no Viterbi breaks — the algorithm uses penalty carry-forward
28/// to maintain chain continuity).
29#[derive(Debug, Clone)]
30pub struct ViterbiResult {
31    /// Decoded subsequences.  For continuous GNSS input this will always
32    /// contain exactly one entry.
33    pub subsequences: Vec<ViterbiSubsequence>,
34
35    /// All feasible (non-zero) transition probabilities computed during the
36    /// forward pass, as `(from_t, from_candidate_idx, to_t, to_candidate_idx,
37    /// transition_probability_linear)` tuples.
38    pub transition_records: Vec<(usize, usize, usize, usize, f64)>,
39}
40
41/// A single unbroken Viterbi sub-sequence.
42#[derive(Debug, Clone)]
43pub struct ViterbiSubsequence {
44    /// `(position_index, candidate_index)` for each time-step in this
45    /// sub-sequence.  `position_index` is an index into the original
46    /// `working_positions` (and `position_candidates`) slice.
47    pub states: Vec<(usize, usize)>,
48
49    /// Log-probability of this sub-sequence.
50    pub log_probability: f64,
51}
52
53/// Decode the most probable netelement sequence using the Viterbi algorithm.
54///
55/// # Arguments
56///
57/// * `position_candidates` — Per-position candidate netelements (from Phase 1).
58/// * `position_probabilities` — Per-position emission probabilities: `position_probabilities[t][candidate_idx] = emission_prob`.
59///   `candidate_idx` is an index into `position_candidates[t]`.
60/// * `netelements` — Full netelement set (for geometry lookups).
61/// * `netelement_index` — Map from netelement ID → index in `netelements`.
62/// * `graph` — Directed topology graph with distance-weighted edges.
63/// * `node_map` — Map from `NetelementSide` → `NodeIndex` in `graph`.
64/// * `cache` — Mutable shortest-path cache (lazily populated).
65/// * `config` — Path configuration (provides `beta`, `edge_zone_distance`).
66///
67/// # Returns
68///
69/// A `ViterbiResult` with one or more sub-sequences covering all time-steps
70/// that have at least one candidate.
71#[allow(clippy::too_many_arguments)]
72pub fn viterbi_decode(
73    position_candidates: &[Vec<CandidateNetElement>],
74    position_probabilities: &[Vec<f64>],
75    netelements: &[Netelement],
76    netelement_index: &HashMap<String, usize>,
77    graph: &DiGraph<NetelementSide, f64>,
78    node_map: &HashMap<NetelementSide, NodeIndex>,
79    cache: &mut ShortestPathCache,
80    config: &PathConfig,
81) -> Result<ViterbiResult, ProjectionError> {
82    let t_count = position_candidates.len();
83    if t_count == 0 {
84        return Ok(ViterbiResult {
85            subsequences: vec![],
86            transition_records: vec![],
87        });
88    }
89
90    // Pre-compute whether each candidate is near a netelement edge.
91    // near_edge[t][j] = true ⟹ candidate j at time t is in the edge zone
92    let near_edge: Vec<Vec<bool>> = position_candidates
93        .iter()
94        .map(|cands| {
95            cands
96                .iter()
97                .map(|c| {
98                    if let Some(&ne_idx) = netelement_index.get(&c.netelement_id) {
99                        is_near_netelement_edge(
100                            &c.projected_point,
101                            &netelements[ne_idx].geometry,
102                            config.edge_zone_distance,
103                        )
104                    } else {
105                        true // unknown NE → conservative: treat as edge
106                    }
107                })
108                .collect()
109        })
110        .collect();
111
112    // Trellis tables.
113    // log_v[t][j] = best log-probability reaching candidate j at time t.
114    // backptr[t][j] = (prev_time, prev_candidate_index) for back-tracing.
115    let mut log_v: Vec<Vec<f64>> = Vec::with_capacity(t_count);
116    let mut backptr: Vec<Vec<Option<usize>>> = Vec::with_capacity(t_count);
117    // Parallel to backptr: records the actual predecessor time-step for
118    // each (t, j) so the back-trace can skip over empty rows.
119    let mut backptr_time: Vec<Vec<Option<usize>>> = Vec::with_capacity(t_count);
120    let mut transition_records: Vec<(usize, usize, usize, usize, f64)> = Vec::new();
121
122    // ── Initialise t = 0 ────────────────────────────────────────────────
123    {
124        let cands = &position_candidates[0];
125        let probs = &position_probabilities[0];
126        let mut lv = Vec::with_capacity(cands.len());
127        let mut bp = Vec::with_capacity(cands.len());
128        for (j, _) in cands.iter().enumerate() {
129            let emission = probs.get(j).copied().unwrap_or(0.0);
130            lv.push(safe_ln(emission));
131            bp.push(None);
132        }
133        log_v.push(lv);
134        backptr.push(bp);
135        backptr_time.push(vec![None; position_candidates[0].len()]);
136    }
137
138    // Penalty applied when no valid transition exists (ln(1e-10) ≈ -23).
139    // This is large enough to strongly discourage impossible transitions
140    // while keeping the Viterbi chain unbroken for continuous paths.
141    const NO_TRANSITION_PENALTY: f64 = -23.0;
142
143    // Minimum emission probability floor.  A single noisy observation
144    // (e.g. heading beyond cutoff → emission = 0) must not irrecoverably
145    // destroy hundreds of steps of accumulated evidence.  The floor is
146    // tiny enough to heavily penalise the step without producing −∞.
147    const EMISSION_FLOOR: f64 = 1e-10;
148
149    // ── Recurse t = 1 .. T-1 ───────────────────────────────────────────
150    for t in 1..t_count {
151        let curr_cands = &position_candidates[t];
152        let curr_probs = &position_probabilities[t];
153
154        if curr_cands.is_empty() {
155            // No candidates at this time-step — push empty row.
156            log_v.push(vec![]);
157            backptr.push(vec![]);
158            backptr_time.push(vec![]);
159            continue;
160        }
161
162        // Find the most recent non-empty time-step with finite scores to
163        // use as the predecessor.  Normally this is t-1, but if t-1 had
164        // no candidates we search further back.
165        let mut prev_t = None;
166        for pt in (0..t).rev() {
167            if !log_v[pt].is_empty() && log_v[pt].iter().any(|&v| v != f64::NEG_INFINITY) {
168                prev_t = Some(pt);
169                break;
170            }
171        }
172
173        if prev_t.is_none() {
174            // No usable predecessor at all — initialise from emission only.
175            // This only happens when *every* earlier time-step was empty.
176            let mut lv = Vec::with_capacity(curr_cands.len());
177            let mut bp = Vec::with_capacity(curr_cands.len());
178            for (j, _) in curr_cands.iter().enumerate() {
179                let emission = curr_probs.get(j).copied().unwrap_or(0.0);
180                lv.push(safe_ln(emission));
181                bp.push(None);
182            }
183            log_v.push(lv);
184            backptr.push(bp);
185            backptr_time.push(vec![None; curr_cands.len()]);
186            continue;
187        }
188        let prev_t = prev_t.unwrap();
189        let prev_cands = &position_candidates[prev_t];
190        let prev_lv = &log_v[prev_t];
191
192        let mut lv = vec![f64::NEG_INFINITY; curr_cands.len()];
193        let mut bp: Vec<Option<(usize, usize)>> = vec![None; curr_cands.len()];
194
195        for (j, cand_j) in curr_cands.iter().enumerate() {
196            let emission_j = curr_probs.get(j).copied().unwrap_or(0.0);
197            let ln_emission_j = safe_ln(emission_j.max(EMISSION_FLOOR));
198
199            for (i, cand_i) in prev_cands.iter().enumerate() {
200                if prev_lv[i] == f64::NEG_INFINITY {
201                    continue;
202                }
203
204                let ln_trans = compute_log_transition(
205                    cand_i,
206                    cand_j,
207                    i,
208                    j,
209                    prev_t,
210                    t,
211                    &near_edge,
212                    netelements,
213                    netelement_index,
214                    graph,
215                    node_map,
216                    cache,
217                    config,
218                );
219
220                if ln_trans == f64::NEG_INFINITY {
221                    continue;
222                }
223
224                transition_records.push((prev_t, i, t, j, ln_trans.exp()));
225
226                let score = prev_lv[i] + ln_trans + ln_emission_j;
227                if score > lv[j] {
228                    lv[j] = score;
229                    bp[j] = Some((prev_t, i));
230                }
231            }
232        }
233
234        // If all lv[j] are -∞ after the inner loop, no valid transition
235        // was possible.  Instead of creating a Viterbi break, carry
236        // forward the best previous state with a penalty so the chain
237        // remains continuous (the GNSS input is one unbroken drive).
238        if lv.iter().all(|&v| v == f64::NEG_INFINITY) {
239            // Find the best previous candidate.
240            let best_prev_i = prev_lv
241                .iter()
242                .enumerate()
243                .filter(|(_, &v)| v != f64::NEG_INFINITY)
244                .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
245                .map(|(i, _)| i);
246
247            if let Some(best_i) = best_prev_i {
248                let carry_score = prev_lv[best_i] + NO_TRANSITION_PENALTY;
249                for (j, _) in curr_cands.iter().enumerate() {
250                    let emission = curr_probs.get(j).copied().unwrap_or(0.0);
251                    let ln_em = safe_ln(emission.max(EMISSION_FLOOR));
252                    lv[j] = carry_score + ln_em;
253                    bp[j] = Some((prev_t, best_i));
254                }
255            }
256        }
257
258        log_v.push(lv);
259        let flat_bp: Vec<Option<usize>> = bp.iter().map(|opt| opt.map(|(_, i)| i)).collect();
260        let time_bp: Vec<Option<usize>> = bp.iter().map(|opt| opt.map(|(pt, _)| pt)).collect();
261        backptr.push(flat_bp);
262        backptr_time.push(time_bp);
263    }
264
265    // ── Back-trace ──────────────────────────────────────────────────────
266    // With the no-break approach we always produce a single subsequence.
267    let subsequences = backtrace_continuous(&log_v, &backptr, &backptr_time, t_count);
268
269    Ok(ViterbiResult {
270        subsequences,
271        transition_records,
272    })
273}
274
275// ─── Helpers ────────────────────────────────────────────────────────────────
276
277/// Compute the log-transition probability between candidate `i` at time
278/// `t_prev` and candidate `j` at time `t_curr`.
279#[allow(clippy::too_many_arguments)]
280fn compute_log_transition(
281    cand_i: &CandidateNetElement,
282    cand_j: &CandidateNetElement,
283    i: usize,
284    j: usize,
285    t_prev: usize,
286    t_curr: usize,
287    near_edge: &[Vec<bool>],
288    netelements: &[Netelement],
289    netelement_index: &HashMap<String, usize>,
290    graph: &DiGraph<NetelementSide, f64>,
291    node_map: &HashMap<NetelementSide, NodeIndex>,
292    cache: &mut ShortestPathCache,
293    config: &PathConfig,
294) -> f64 {
295    let same_ne = cand_i.netelement_id == cand_j.netelement_id;
296
297    // Same-netelement shortcut: transition is free.
298    if same_ne {
299        return 0.0; // ln(1.0)
300    }
301
302    // Edge-zone skip: both candidates interior → impossible transition.
303    let i_near = near_edge[t_prev][i];
304    let j_near = near_edge[t_curr][j];
305    if !i_near && !j_near {
306        return f64::NEG_INFINITY; // ln(0.0)
307    }
308
309    // Need Dijkstra route distance.  Try all (from_side → to_side) combos
310    // (there are at most 4: from_0→to_0, from_0→to_1, from_1→to_0, from_1→to_1)
311    // and pick the shortest.
312    let Some(&ne_i_idx) = netelement_index.get(&cand_i.netelement_id) else {
313        return f64::NEG_INFINITY;
314    };
315    let Some(&ne_j_idx) = netelement_index.get(&cand_j.netelement_id) else {
316        return f64::NEG_INFINITY;
317    };
318
319    // Which sides of the from-netelement could the candidate be near?
320    // If near_edge is true we try both sides; shortest wins.
321    let from_sides = candidate_sides(&cand_i.netelement_id);
322    let to_sides = candidate_sides(&cand_j.netelement_id);
323
324    // Great-circle distance between the two projected points.
325    let gc_distance = cand_i
326        .projected_point
327        .haversine_distance(&cand_j.projected_point);
328
329    // Evaluate all (from_side, to_side) combinations and pick the one with
330    // the highest combined transition probability (route distance + turn angle).
331    let ne_i_geom = &netelements[ne_i_idx].geometry;
332    let ne_j_geom = &netelements[ne_j_idx].geometry;
333
334    let mut best_ln_trans = f64::NEG_INFINITY;
335    for from_side in &from_sides {
336        for to_side in &to_sides {
337            if let Some(d) =
338                cached_shortest_path_distance(cache, graph, node_map, from_side, to_side)
339            {
340                let from_partial =
341                    partial_netelement_distance(cand_i, from_side.position, &netelements[ne_i_idx]);
342                let to_partial =
343                    partial_netelement_distance(cand_j, to_side.position, &netelements[ne_j_idx]);
344                let route_distance = from_partial + d + to_partial;
345
346                let base_trans =
347                    calculate_transition_probability(route_distance, gc_distance, config.beta);
348
349                // Turn-angle penalty: compare the exit heading from the
350                // from-NE with the entry heading into the to-NE.
351                let turn_factor = netelement_connection_turn_factor(
352                    ne_i_geom,
353                    from_side.position,
354                    ne_j_geom,
355                    to_side.position,
356                    config.turn_scale,
357                );
358
359                let combined = base_trans * turn_factor;
360                let ln_combined = safe_ln(combined);
361                if ln_combined > best_ln_trans {
362                    best_ln_trans = ln_combined;
363                }
364            }
365        }
366    }
367
368    best_ln_trans
369}
370
371/// Compute the turn-angle penalty factor at a netelement connection.
372///
373/// Compares the exit heading from `from_geom` at `from_side` with the entry
374/// heading into `to_geom` at `to_side`. Returns a factor in (0, 1] where
375/// 1.0 means straight through and values closer to 0 penalise sharper turns.
376fn netelement_connection_turn_factor(
377    from_geom: &geo::LineString<f64>,
378    from_side: u8,
379    to_geom: &geo::LineString<f64>,
380    to_side: u8,
381    turn_scale: f64,
382) -> f64 {
383    use crate::path::candidate::{directional_heading_difference, haversine_bearing};
384
385    let from_pts: Vec<Point<f64>> = from_geom.points().collect();
386    let to_pts: Vec<Point<f64>> = to_geom.points().collect();
387    if from_pts.len() < 2 || to_pts.len() < 2 {
388        return 1.0; // degenerate geometry — no penalty
389    }
390
391    // Exit heading: direction the train is moving as it leaves from_geom.
392    let exit_heading = if from_side == 0 {
393        // Exiting through start → was traveling from end toward start.
394        haversine_bearing(&from_pts[1], &from_pts[0])
395    } else {
396        // Exiting through end → was traveling from start toward end.
397        let n = from_pts.len();
398        haversine_bearing(&from_pts[n - 2], &from_pts[n - 1])
399    };
400
401    // Entry heading: direction the train moves after entering to_geom.
402    let entry_heading = if to_side == 0 {
403        // Entering at start → heading toward end.
404        haversine_bearing(&to_pts[0], &to_pts[1])
405    } else {
406        // Entering at end → heading toward start.
407        let n = to_pts.len();
408        haversine_bearing(&to_pts[n - 1], &to_pts[n - 2])
409    };
410
411    let turn_angle = directional_heading_difference(exit_heading, entry_heading);
412    (-turn_angle / turn_scale).exp()
413}
414
415/// Return both sides of a netelement as `NetelementSide` values.
416fn candidate_sides(netelement_id: &str) -> [NetelementSide; 2] {
417    [
418        NetelementSide {
419            netelement_id: netelement_id.to_string(),
420            position: 0,
421        },
422        NetelementSide {
423            netelement_id: netelement_id.to_string(),
424            position: 1,
425        },
426    ]
427}
428
429/// Approximate distance from a candidate's projected point to a netelement endpoint.
430///
431/// `side` is 0 (start) or 1 (end).  Uses intrinsic coordinate × netelement
432/// length as a quick proxy.
433fn partial_netelement_distance(
434    cand: &CandidateNetElement,
435    side: u8,
436    netelement: &Netelement,
437) -> f64 {
438    use geo::HaversineLength;
439    let length = netelement.geometry.haversine_length();
440    if side == 0 {
441        cand.intrinsic_coordinate * length
442    } else {
443        (1.0 - cand.intrinsic_coordinate) * length
444    }
445}
446
447/// `ln` that maps 0.0 → -∞ and avoids NaN.
448fn safe_ln(x: f64) -> f64 {
449    if x <= 0.0 {
450        f64::NEG_INFINITY
451    } else {
452        x.ln()
453    }
454}
455
456/// Back-trace through the trellis to extract one or more optimal sub-sequences.
457#[allow(dead_code)]
458fn backtrace(
459    log_v: &[Vec<f64>],
460    backptr: &[Vec<Option<usize>>],
461    subseq_starts: &[usize],
462    t_count: usize,
463) -> Vec<ViterbiSubsequence> {
464    let mut result: Vec<ViterbiSubsequence> = Vec::new();
465
466    // For each sub-sequence delimited by `subseq_starts`, find the best
467    // terminal state and trace backwards.
468    for (seg_idx, &start_t) in subseq_starts.iter().enumerate() {
469        let end_t = if seg_idx + 1 < subseq_starts.len() {
470            subseq_starts[seg_idx + 1]
471        } else {
472            t_count
473        };
474
475        // Find the time-step with the last non-empty row in [start_t, end_t).
476        let mut last_valid_t = None;
477        for t in (start_t..end_t).rev() {
478            if !log_v[t].is_empty() && log_v[t].iter().any(|&v| v != f64::NEG_INFINITY) {
479                last_valid_t = Some(t);
480                break;
481            }
482        }
483
484        let Some(term_t) = last_valid_t else {
485            continue; // entirely empty sub-sequence
486        };
487
488        // Best candidate at terminal time-step.
489        let (best_j, best_log) = log_v[term_t]
490            .iter()
491            .enumerate()
492            .filter(|(_, &v)| v != f64::NEG_INFINITY)
493            .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
494            .unwrap(); // safe: we know at least one is finite
495
496        // Trace back.
497        let mut states = Vec::with_capacity(term_t - start_t + 1);
498        let mut j = best_j;
499        states.push((term_t, j));
500        let mut t = term_t;
501        while t > start_t {
502            if let Some(prev_j) = backptr[t][j] {
503                t -= 1;
504                j = prev_j;
505                states.push((t, j));
506            } else {
507                break;
508            }
509        }
510        states.reverse();
511
512        result.push(ViterbiSubsequence {
513            states,
514            log_probability: *best_log,
515        });
516    }
517
518    result
519}
520
521/// Back-trace through the trellis for the continuous (no-break) Viterbi.
522///
523/// Produces a single `ViterbiSubsequence` covering all time-steps with
524/// candidates.  The `backptr_time` table records, for each (t, j), the
525/// actual predecessor time-step (which may differ from t-1 if intermediate
526/// rows were empty).
527fn backtrace_continuous(
528    log_v: &[Vec<f64>],
529    backptr: &[Vec<Option<usize>>],
530    backptr_time: &[Vec<Option<usize>>],
531    t_count: usize,
532) -> Vec<ViterbiSubsequence> {
533    // Find the last time-step with non-empty, finite scores.
534    let mut last_valid_t = None;
535    for t in (0..t_count).rev() {
536        if !log_v[t].is_empty() && log_v[t].iter().any(|&v| v != f64::NEG_INFINITY) {
537            last_valid_t = Some(t);
538            break;
539        }
540    }
541
542    let Some(term_t) = last_valid_t else {
543        return vec![]; // no valid states at all
544    };
545
546    // Best candidate at terminal time-step.
547    let (best_j, best_log) = log_v[term_t]
548        .iter()
549        .enumerate()
550        .filter(|(_, &v)| v != f64::NEG_INFINITY)
551        .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
552        .unwrap();
553
554    // Trace back using backptr_time to jump over empty rows.
555    let mut states = Vec::with_capacity(term_t + 1);
556    let mut j = best_j;
557    let mut t = term_t;
558    states.push((t, j));
559
560    loop {
561        let prev_j = backptr[t][j];
562        let prev_t = backptr_time[t][j];
563        match (prev_t, prev_j) {
564            (Some(pt), Some(pj)) => {
565                t = pt;
566                j = pj;
567                states.push((t, j));
568            }
569            _ => break,
570        }
571    }
572    states.reverse();
573
574    vec![ViterbiSubsequence {
575        states,
576        log_probability: *best_log,
577    }]
578}
579
580// ─── Post-processing: build AssociatedNetElement path ───────────────────────
581
582/// Convert Viterbi output into a sequence of `AssociatedNetElement` segments.
583///
584/// 1. Deduplicates consecutive same-netelement entries.
585/// 2. Inserts bridge netelements between non-adjacent observed NEs
586///    (recovered from cached Dijkstra paths if available; otherwise left
587///    as a direct jump, which downstream code can flag).
588/// 3. Computes intrinsic ranges and GNSS index ranges per segment.
589pub fn build_path_from_viterbi(
590    viterbi: &ViterbiResult,
591    position_candidates: &[Vec<CandidateNetElement>],
592    netelements: &[Netelement],
593    netelement_index: &HashMap<String, usize>,
594    graph: &DiGraph<NetelementSide, f64>,
595    node_map: &HashMap<NetelementSide, NodeIndex>,
596    cache: &mut ShortestPathCache,
597) -> Result<Vec<AssociatedNetElement>, ProjectionError> {
598    let mut segments: Vec<AssociatedNetElement> = Vec::new();
599
600    for subseq in &viterbi.subsequences {
601        if subseq.states.is_empty() {
602            continue;
603        }
604
605        // Group consecutive states by netelement.
606        let mut groups: Vec<NetelementGroup> = Vec::new();
607        for &(pos_idx, cand_idx) in &subseq.states {
608            let cand = &position_candidates[pos_idx][cand_idx];
609            if let Some(last) = groups.last_mut() {
610                if last.netelement_id == cand.netelement_id {
611                    // Extend existing group.
612                    last.update(pos_idx, cand);
613                    continue;
614                }
615            }
616            groups.push(NetelementGroup::new(pos_idx, cand));
617        }
618
619        // Emit segments, inserting bridges between non-adjacent groups.
620        for (g_idx, group) in groups.iter().enumerate() {
621            if g_idx > 0 {
622                let prev = &groups[g_idx - 1];
623                insert_bridges(
624                    prev,
625                    group,
626                    &mut segments,
627                    netelements,
628                    netelement_index,
629                    graph,
630                    node_map,
631                    cache,
632                )?;
633            }
634
635            segments.push(group.to_associated_net_element()?);
636        }
637    }
638
639    Ok(segments)
640}
641
642/// Decision record for a single consecutive-segment pair during sanity validation.
643#[derive(Debug, Clone, Serialize, Deserialize)]
644pub struct SanityDecision {
645    /// Index of this pair (0 = first consecutive pair)
646    pub pair_index: usize,
647    /// Netelement ID of the source segment
648    pub from_netelement_id: String,
649    /// Netelement ID of the target segment
650    pub to_netelement_id: String,
651    /// Whether the target was reachable from the source
652    pub reachable: bool,
653    /// Action taken: "kept", "removed", or "rerouted"
654    pub action: String,
655    /// Netelement IDs inserted by Dijkstra re-routing (empty if not rerouted)
656    pub rerouted_via: Vec<String>,
657    /// Warning message (empty if reachable)
658    pub warning: String,
659}
660
661/// Record of a gap-fill action between two consecutive segments after sanity validation.
662#[derive(Debug, Clone, Serialize, Deserialize)]
663pub struct GapFill {
664    /// Index of this consecutive pair (0-based)
665    pub pair_index: usize,
666    /// Netelement ID of the segment before the gap
667    pub from_netelement_id: String,
668    /// Netelement ID of the segment after the gap
669    pub to_netelement_id: String,
670    /// Whether a Dijkstra route was found between the two segments
671    pub route_found: bool,
672    /// Netelement IDs inserted to bridge the gap (empty if no route)
673    pub inserted_netelements: Vec<String>,
674    /// Warning message (empty if directly connected or successfully filled)
675    pub warning: String,
676}
677
678/// Post-Viterbi navigability validation.
679///
680/// Walks the assembled path segments sequentially, checking each consecutive
681/// pair for topological reachability via the directed topology graph.
682/// When a segment is unreachable from its predecessor:
683///   1. The unreachable segment is removed.
684///   2. A Dijkstra re-route is attempted from the last valid segment to the
685///      *next* remaining segment. If a route exists, bridge NEs are inserted.
686///   3. A warning is recorded.
687///
688/// Returns the validated path segments, a list of warnings, and structured
689/// sanity decisions for debug output.
690#[allow(clippy::too_many_arguments)]
691pub fn validate_path_navigability(
692    segments: Vec<AssociatedNetElement>,
693    _netelements: &[Netelement],
694    netelement_index: &HashMap<String, usize>,
695    graph: &DiGraph<NetelementSide, f64>,
696    node_map: &HashMap<NetelementSide, NodeIndex>,
697    cache: &mut ShortestPathCache,
698) -> (Vec<AssociatedNetElement>, Vec<String>, Vec<SanityDecision>) {
699    if segments.len() < 2 {
700        return (segments, vec![], vec![]);
701    }
702
703    let mut validated: Vec<AssociatedNetElement> = Vec::new();
704    let mut warnings: Vec<String> = Vec::new();
705    let mut decisions: Vec<SanityDecision> = Vec::new();
706    let mut pair_index: usize = 0;
707
708    // Start with the first segment (always kept).
709    validated.push(segments[0].clone());
710
711    let mut i = 1;
712    while i < segments.len() {
713        // Clone data from last valid segment to avoid borrowing `validated` across mutations.
714        let last_ne_id = validated.last().unwrap().netelement_id.clone();
715        let last_gnss_end = validated.last().unwrap().gnss_end_index;
716        let candidate = &segments[i];
717
718        // Same netelement → always valid.
719        if last_ne_id == candidate.netelement_id {
720            decisions.push(SanityDecision {
721                pair_index,
722                from_netelement_id: last_ne_id,
723                to_netelement_id: candidate.netelement_id.clone(),
724                reachable: true,
725                action: "kept".to_string(),
726                rerouted_via: vec![],
727                warning: String::new(),
728            });
729            validated.push(candidate.clone());
730            pair_index += 1;
731            i += 1;
732            continue;
733        }
734
735        // Check reachability across all side combinations.
736        let from_sides = candidate_sides(&last_ne_id);
737        let to_sides = candidate_sides(&candidate.netelement_id);
738        let mut reachable = false;
739        for from in &from_sides {
740            for to in &to_sides {
741                if cached_shortest_path_distance(cache, graph, node_map, from, to).is_some() {
742                    reachable = true;
743                    break;
744                }
745            }
746            if reachable {
747                break;
748            }
749        }
750
751        if reachable {
752            decisions.push(SanityDecision {
753                pair_index,
754                from_netelement_id: last_ne_id,
755                to_netelement_id: candidate.netelement_id.clone(),
756                reachable: true,
757                action: "kept".to_string(),
758                rerouted_via: vec![],
759                warning: String::new(),
760            });
761            validated.push(candidate.clone());
762            pair_index += 1;
763            i += 1;
764            continue;
765        }
766
767        // Unreachable — remove this segment and attempt re-route to the next one.
768        let warn_msg = format!(
769            "Removed unreachable segment {} (no navigable path from {})",
770            candidate.netelement_id, last_ne_id
771        );
772        warnings.push(warn_msg.clone());
773
774        // Look ahead: try to find the next segment reachable from last valid.
775        let mut rerouted = false;
776        let mut rerouted_via: Vec<String> = Vec::new();
777
778        if i + 1 < segments.len() {
779            let next = &segments[i + 1];
780            // Try Dijkstra from last valid to next.
781            let fwd_from_sides = candidate_sides(&last_ne_id);
782            let fwd_to_sides = candidate_sides(&next.netelement_id);
783
784            let mut best_route_cost: Option<f64> = None;
785            let mut best_from_side = 0u8;
786            let mut best_to_side = 0u8;
787            for from in &fwd_from_sides {
788                for to in &fwd_to_sides {
789                    if let Some(d) = cached_shortest_path_distance(cache, graph, node_map, from, to)
790                    {
791                        if best_route_cost.is_none() || d < best_route_cost.unwrap() {
792                            best_route_cost = Some(d);
793                            best_from_side = from.position;
794                            best_to_side = to.position;
795                        }
796                    }
797                }
798            }
799
800            if let Some(cost) = best_route_cost {
801                rerouted = true;
802                // Insert bridge NEs if needed (cost > 0 means non-adjacent).
803                if cost > 1e-9 {
804                    let from_side = NetelementSide {
805                        netelement_id: last_ne_id.clone(),
806                        position: best_from_side,
807                    };
808                    let to_side = NetelementSide {
809                        netelement_id: next.netelement_id.clone(),
810                        position: best_to_side,
811                    };
812                    let bridge_ne_ids = trace_intermediate_netelements(
813                        graph,
814                        node_map,
815                        &from_side,
816                        &to_side,
817                        &last_ne_id,
818                        &next.netelement_id,
819                    );
820                    let gnss_idx = last_gnss_end;
821                    for ne_id in &bridge_ne_ids {
822                        if netelement_index.contains_key(ne_id) {
823                            if let Ok(bridge) = AssociatedNetElement::new(
824                                ne_id.clone(),
825                                1.0,
826                                0.0,
827                                1.0,
828                                gnss_idx,
829                                gnss_idx,
830                            ) {
831                                validated.push(bridge);
832                            }
833                        }
834                    }
835                    rerouted_via = bridge_ne_ids;
836                }
837            }
838        }
839
840        let action = if rerouted { "rerouted" } else { "removed" };
841        decisions.push(SanityDecision {
842            pair_index,
843            from_netelement_id: last_ne_id,
844            to_netelement_id: candidate.netelement_id.clone(),
845            reachable: false,
846            action: action.to_string(),
847            rerouted_via,
848            warning: warn_msg,
849        });
850
851        pair_index += 1;
852        i += 1;
853    }
854
855    // Second pass: detect and collapse oscillation patterns (A→…→A).
856    let validated = remove_oscillations(validated, &mut warnings, &mut decisions, &mut pair_index);
857
858    // Third pass: remove segments that violate directional consistency.
859    // A triple (A, B, C) is consistent if the entry side of B from A and the
860    // exit side of B towards C use opposite sides (i.e. the train passes through
861    // B without reversing direction).
862    let validated = remove_direction_violations(
863        validated,
864        graph,
865        node_map,
866        &mut warnings,
867        &mut decisions,
868        &mut pair_index,
869    );
870
871    (validated, warnings, decisions)
872}
873
874/// Detect and collapse oscillation patterns in the path.
875///
876/// An oscillation is when the same netelement appears more than once with a short
877/// intermediate detour—for example `A→B→C→A` where B and C cover fewer GNSS positions
878/// than the first A occurrence.  The collapse merges the two A-segments and removes
879/// the intermediate segments.
880fn remove_oscillations(
881    segments: Vec<AssociatedNetElement>,
882    warnings: &mut Vec<String>,
883    decisions: &mut Vec<SanityDecision>,
884    pair_index: &mut usize,
885) -> Vec<AssociatedNetElement> {
886    if segments.len() < 3 {
887        return segments;
888    }
889
890    let mut result = segments;
891    let mut changed = true;
892
893    while changed {
894        changed = false;
895        let mut i = 0;
896        while i < result.len() {
897            let ne_id = result[i].netelement_id.clone();
898
899            // Look for the next occurrence of the same netelement.
900            let mut j_opt: Option<usize> = None;
901            for (k, step) in result.iter().enumerate().skip(i + 1) {
902                if step.netelement_id == ne_id {
903                    j_opt = Some(k);
904                    break;
905                }
906            }
907
908            let j = match j_opt {
909                Some(v) => v,
910                None => {
911                    i += 1;
912                    continue;
913                }
914            };
915
916            // Check whether the intermediate segments are short enough to be
917            // oscillation noise.  We compare the total GNSS coverage of the
918            // intermediates against the first occurrence's GNSS coverage.
919            let first_gnss_count = result[i]
920                .gnss_end_index
921                .saturating_sub(result[i].gnss_start_index);
922            let intermediate_gnss_count = if j > i + 1 {
923                result[j - 1]
924                    .gnss_end_index
925                    .saturating_sub(result[i + 1].gnss_start_index)
926            } else {
927                0
928            };
929
930            // Collapse when the intermediate is shorter than the first segment
931            // or very short in absolute terms (< 10 GNSS positions), AND the
932            // number of distinct intermediate NEs is small.  Long chains of
933            // distinct NEs represent genuine path segments even when GNSS
934            // coverage is low (e.g. poor satellite reception).
935            let intermediate_ne_count = j - i - 1;
936            let is_oscillation = intermediate_ne_count <= MAX_OSCILLATION_INTERMEDIATE_NES
937                && (intermediate_gnss_count <= first_gnss_count || intermediate_gnss_count < 10);
938
939            if !is_oscillation {
940                i += 1;
941                continue;
942            }
943
944            // Record the collapsed oscillation.
945            let removed_ne_ids: Vec<String> = result[(i + 1)..j]
946                .iter()
947                .map(|s| s.netelement_id.clone())
948                .collect();
949
950            let warn_msg = format!(
951                "Collapsed oscillation: {} revisited after [{}] (intermediate covered {} GNSS positions)",
952                ne_id,
953                removed_ne_ids.join(", "),
954                intermediate_gnss_count,
955            );
956            warnings.push(warn_msg.clone());
957
958            decisions.push(SanityDecision {
959                pair_index: *pair_index,
960                from_netelement_id: ne_id.clone(),
961                to_netelement_id: ne_id.clone(),
962                reachable: true,
963                action: "collapsed-oscillation".to_string(),
964                rerouted_via: removed_ne_ids,
965                warning: warn_msg,
966            });
967            *pair_index += 1;
968
969            // Merge: extend segment[i] to cover segment[j]'s GNSS range.
970            result[i].gnss_end_index = result[j].gnss_end_index;
971
972            // Update end_intrinsic from the second occurrence, unless it is a
973            // bridge segment (gnss_start == gnss_end) whose intrinsic is just a
974            // placeholder.
975            if result[j].gnss_start_index != result[j].gnss_end_index {
976                result[i].end_intrinsic = result[j].end_intrinsic;
977            }
978
979            // Remove segments (i+1)..=j  (the intermediates plus the duplicate).
980            result.drain((i + 1)..=j);
981            changed = true;
982            // Don't increment i — the merged segment may trigger another collapse.
983        }
984    }
985
986    result
987}
988
989/// Check whether a zero-weight (external) edge exists from `from` to `to` in the
990/// topology graph, indicating a direct netrelation connection.
991fn has_direct_connection(
992    graph: &DiGraph<NetelementSide, f64>,
993    node_map: &HashMap<NetelementSide, NodeIndex>,
994    from: &NetelementSide,
995    to: &NetelementSide,
996) -> bool {
997    let Some(&from_idx) = node_map.get(from) else {
998        return false;
999    };
1000    let Some(&to_idx) = node_map.get(to) else {
1001        return false;
1002    };
1003
1004    graph
1005        .edges_directed(from_idx, petgraph::Direction::Outgoing)
1006        .any(|e| e.target() == to_idx && *e.weight() < 1e-9)
1007}
1008
1009/// Check if the triple of consecutive netelements A→B→C is directionally
1010/// consistent.  For at least one way the train can enter B from A, the
1011/// opposite (exit) side of B must have a direct connection to some side of C.
1012fn triple_is_consistent(
1013    ne_a: &str,
1014    ne_b: &str,
1015    ne_c: &str,
1016    graph: &DiGraph<NetelementSide, f64>,
1017    node_map: &HashMap<NetelementSide, NodeIndex>,
1018) -> bool {
1019    // Same-NE transitions are always consistent.
1020    if ne_a == ne_b || ne_b == ne_c {
1021        return true;
1022    }
1023
1024    for a_exit in [0u8, 1] {
1025        let a_side = NetelementSide {
1026            netelement_id: ne_a.to_string(),
1027            position: a_exit,
1028        };
1029        for b_entry in [0u8, 1] {
1030            let b_entry_side = NetelementSide {
1031                netelement_id: ne_b.to_string(),
1032                position: b_entry,
1033            };
1034
1035            if !has_direct_connection(graph, node_map, &a_side, &b_entry_side) {
1036                continue;
1037            }
1038
1039            // Train enters B from b_entry, exits from opposite side.
1040            let b_exit_side = NetelementSide {
1041                netelement_id: ne_b.to_string(),
1042                position: 1 - b_entry,
1043            };
1044
1045            for c_entry in [0u8, 1] {
1046                let c_entry_side = NetelementSide {
1047                    netelement_id: ne_c.to_string(),
1048                    position: c_entry,
1049                };
1050                if has_direct_connection(graph, node_map, &b_exit_side, &c_entry_side) {
1051                    return true;
1052                }
1053            }
1054        }
1055    }
1056
1057    false
1058}
1059
1060/// Minimum number of GNSS positions a real segment must span to be
1061/// protected from automatic removal.  Segments below this threshold are
1062/// considered dead-end artefacts eligible for removal; segments at or above
1063/// it are kept with a warning.
1064const DIRECTION_REMOVAL_GNSS_THRESHOLD: usize = 100;
1065
1066/// Maximum number of distinct intermediate netelements that can be collapsed
1067/// as an oscillation.  Sequences with more intermediates are treated as
1068/// genuine path segments (the train actually traversed them) even when their
1069/// total GNSS coverage is small relative to the repeated netelement.
1070const MAX_OSCILLATION_INTERMEDIATE_NES: usize = 3;
1071
1072/// Maximum number of neighbour removals a single netelement can cause during
1073/// direction-violation processing before it is force-removed as the likely
1074/// source of path corruption (wrong GNSS assignment).
1075const MAX_DIRECTION_CASCADE_REMOVALS: usize = 3;
1076
1077/// Remove segments that violate directional consistency (U-turns).
1078///
1079/// Walks the path checking each triple (A, B, C).  When the triple is
1080/// inconsistent the function determines which segment to remove:
1081///
1082///   1. **A == C (oscillation remnant)**: remove C (the second occurrence).
1083///   2. **A→B connected (wrong exit)**: target B.  If B exceeds the GNSS
1084///      threshold, try C as a fallback.
1085///   3. **A→B disconnected (orphan)**: prefer bridge, then smaller of {A, B}.
1086///
1087/// Segments with fewer than [`DIRECTION_REMOVAL_GNSS_THRESHOLD`] GNSS
1088/// positions are always removable.  Larger real segments are kept with a
1089/// warning when no smaller alternative exists.
1090fn remove_direction_violations(
1091    segments: Vec<AssociatedNetElement>,
1092    graph: &DiGraph<NetelementSide, f64>,
1093    node_map: &HashMap<NetelementSide, NodeIndex>,
1094    warnings: &mut Vec<String>,
1095    decisions: &mut Vec<SanityDecision>,
1096    pair_index: &mut usize,
1097) -> Vec<AssociatedNetElement> {
1098    if segments.len() < 3 {
1099        return segments;
1100    }
1101
1102    let gnss_span = |seg: &AssociatedNetElement| -> usize {
1103        seg.gnss_end_index.saturating_sub(seg.gnss_start_index)
1104    };
1105    let is_bridge =
1106        |seg: &AssociatedNetElement| -> bool { seg.gnss_start_index == seg.gnss_end_index };
1107    let is_removable = |seg: &AssociatedNetElement| -> bool {
1108        is_bridge(seg) || gnss_span(seg) < DIRECTION_REMOVAL_GNSS_THRESHOLD
1109    };
1110
1111    let mut result = segments;
1112    let mut changed = true;
1113    let mut warned_triples: std::collections::HashSet<(String, String, String)> =
1114        std::collections::HashSet::new();
1115    // Track how many neighbour removals each NE has caused during the
1116    // direction-violation cascade. Two separate counters avoid conflating
1117    // unrelated removal patterns:
1118    //
1119    // • *anchor*  — incremented for A when B is removed and A→B was
1120    //   connected (A stays, successive Bs get eaten).
1121    // • *protected* — incremented for B when C is removed as fallback
1122    //   because B was too significant (B stays, successive Cs get eaten).
1123    //
1124    // When either counter for B reaches the threshold **and** the current
1125    // C would be removable (i.e. the cascade would continue), B is force-
1126    // removed as the likely source of path corruption.  The C-removability
1127    // guard prevents force-removal when the cascade would naturally stop
1128    // at a significant C (kept-direction-warning).
1129    let mut cascade_as_anchor: std::collections::HashMap<String, usize> =
1130        std::collections::HashMap::new();
1131    let mut cascade_as_protected: std::collections::HashMap<String, usize> =
1132        std::collections::HashMap::new();
1133
1134    while changed {
1135        changed = false;
1136
1137        let mut i = 1;
1138        while i < result.len().saturating_sub(1) {
1139            let ne_a = result[i - 1].netelement_id.clone();
1140            let ne_b = result[i].netelement_id.clone();
1141            let ne_c = result[i + 1].netelement_id.clone();
1142
1143            if triple_is_consistent(&ne_a, &ne_b, &ne_c, graph, node_map) {
1144                i += 1;
1145                continue;
1146            }
1147
1148            // ── 0. Cascade detection: force-remove B if it caused too
1149            //       many neighbour removals ───────────────────────────────
1150            let b_anchor = cascade_as_anchor.get(&ne_b).copied().unwrap_or(0);
1151            let b_protected = cascade_as_protected.get(&ne_b).copied().unwrap_or(0);
1152            let c_would_be_removable = is_removable(&result[i + 1]);
1153            if (b_anchor >= MAX_DIRECTION_CASCADE_REMOVALS
1154                || b_protected >= MAX_DIRECTION_CASCADE_REMOVALS)
1155                && c_would_be_removable
1156            {
1157                // B has caused too many cascade removals — it is likely a
1158                // wrong GNSS assignment.  Force-remove it regardless of
1159                // GNSS coverage.
1160                if !is_bridge(&result[i]) && i > 0 {
1161                    let end = result[i].gnss_end_index;
1162                    if result[i - 1].gnss_end_index < end {
1163                        result[i - 1].gnss_end_index = end;
1164                    }
1165                }
1166
1167                let warn = format!(
1168                    "Removed {} (cascade: anchor={} protected={} at triple {}/{}/{})",
1169                    ne_b, b_anchor, b_protected, ne_a, ne_b, ne_c,
1170                );
1171                warnings.push(warn.clone());
1172                decisions.push(SanityDecision {
1173                    pair_index: *pair_index,
1174                    from_netelement_id: ne_a,
1175                    to_netelement_id: ne_c,
1176                    reachable: false,
1177                    action: "removed-direction-cascade".to_string(),
1178                    rerouted_via: vec![ne_b],
1179                    warning: warn,
1180                });
1181                *pair_index += 1;
1182
1183                result.remove(i);
1184                changed = true;
1185                break;
1186            }
1187
1188            // ── 1. Oscillation remnant (A == C): always target C ─────────
1189            if ne_a == ne_c {
1190                let target_idx = i + 1;
1191                let target_ne = result[target_idx].netelement_id.clone();
1192
1193                if !is_bridge(&result[target_idx]) && target_idx > 0 {
1194                    let end = result[target_idx].gnss_end_index;
1195                    if result[target_idx - 1].gnss_end_index < end {
1196                        result[target_idx - 1].gnss_end_index = end;
1197                    }
1198                }
1199
1200                let warn = format!(
1201                    "Removed {} (oscillation remnant: triple {}/{}/{})",
1202                    target_ne, ne_a, ne_b, ne_c,
1203                );
1204                warnings.push(warn.clone());
1205                decisions.push(SanityDecision {
1206                    pair_index: *pair_index,
1207                    from_netelement_id: ne_a,
1208                    to_netelement_id: ne_c,
1209                    reachable: false,
1210                    action: "removed-direction-violation".to_string(),
1211                    rerouted_via: vec![target_ne],
1212                    warning: warn,
1213                });
1214                *pair_index += 1;
1215
1216                result.remove(target_idx);
1217                changed = true;
1218                break;
1219            }
1220
1221            // ── 2. Determine primary target ──────────────────────────────
1222            let a_to_b_connected = (0u8..=1).any(|ap| {
1223                (0u8..=1).any(|bp| {
1224                    has_direct_connection(
1225                        graph,
1226                        node_map,
1227                        &NetelementSide {
1228                            netelement_id: ne_a.clone(),
1229                            position: ap,
1230                        },
1231                        &NetelementSide {
1232                            netelement_id: ne_b.clone(),
1233                            position: bp,
1234                        },
1235                    )
1236                })
1237            });
1238
1239            let primary_idx = if a_to_b_connected {
1240                i // B
1241            } else {
1242                match (is_bridge(&result[i - 1]), is_bridge(&result[i])) {
1243                    (_, true) => i,
1244                    (true, false) => i - 1,
1245                    _ => {
1246                        if gnss_span(&result[i - 1]) <= gnss_span(&result[i]) {
1247                            i - 1
1248                        } else {
1249                            i
1250                        }
1251                    }
1252                }
1253            };
1254
1255            // ── 3. If primary is too large, try fallback ─────────────────
1256            //
1257            // Fallback to C is only allowed when A→B is connected (wrong-
1258            // exit): C may be forcing B to exit on the wrong side.
1259            //
1260            // When A→B is disconnected the triple fails because of the A/B
1261            // gap itself; C is an innocent bystander (possibly a valid
1262            // intermediary for B→next) and removing it would make subsequent
1263            // connectivity worse.
1264            let target_idx = if is_removable(&result[primary_idx]) {
1265                primary_idx
1266            } else if a_to_b_connected {
1267                // Wrong-exit case: try C (i+1) as fallback — it may be
1268                // forcing the impossible exit direction on B.
1269                if is_removable(&result[i + 1]) {
1270                    i + 1
1271                } else {
1272                    // Both B and C too large — warn and skip.
1273                    let triple_key = (ne_a.clone(), ne_b.clone(), ne_c.clone());
1274                    if warned_triples.insert(triple_key) {
1275                        let warn = format!(
1276                            "Directional violation at {}/{}/{} \
1277                             (kept: segments too significant to remove automatically)",
1278                            ne_a, ne_b, ne_c,
1279                        );
1280                        warnings.push(warn.clone());
1281                        decisions.push(SanityDecision {
1282                            pair_index: *pair_index,
1283                            from_netelement_id: ne_a,
1284                            to_netelement_id: ne_c,
1285                            reachable: false,
1286                            action: "kept-direction-warning".to_string(),
1287                            rerouted_via: vec![ne_b],
1288                            warning: warn,
1289                        });
1290                        *pair_index += 1;
1291                    }
1292                    i += 1;
1293                    continue;
1294                }
1295            } else {
1296                // Disconnected case: warn and skip — do NOT eat C.
1297                let triple_key = (ne_a.clone(), ne_b.clone(), ne_c.clone());
1298                if warned_triples.insert(triple_key) {
1299                    let warn = format!(
1300                        "Directional violation at {}/{}/{} \
1301                         (kept: segments too significant to remove automatically)",
1302                        ne_a, ne_b, ne_c,
1303                    );
1304                    warnings.push(warn.clone());
1305                    decisions.push(SanityDecision {
1306                        pair_index: *pair_index,
1307                        from_netelement_id: ne_a,
1308                        to_netelement_id: ne_c,
1309                        reachable: false,
1310                        action: "kept-direction-warning".to_string(),
1311                        rerouted_via: vec![ne_b],
1312                        warning: warn,
1313                    });
1314                    *pair_index += 1;
1315                }
1316                i += 1;
1317                continue;
1318            };
1319
1320            let target_ne = result[target_idx].netelement_id.clone();
1321
1322            // ── 4. Remove target and extend neighbour ────────────────────
1323            if !is_bridge(&result[target_idx]) {
1324                if target_idx > 0 {
1325                    let end = result[target_idx].gnss_end_index;
1326                    if result[target_idx - 1].gnss_end_index < end {
1327                        result[target_idx - 1].gnss_end_index = end;
1328                    }
1329                } else if target_idx + 1 < result.len() {
1330                    let start = result[target_idx].gnss_start_index;
1331                    if result[target_idx + 1].gnss_start_index > start {
1332                        result[target_idx + 1].gnss_start_index = start;
1333                    }
1334                }
1335            }
1336
1337            // ── 5. Track cascade causes ──────────────────────────────────
1338            // When B is removed: A is the anchor that caused the removal.
1339            // When C is removed (fallback because B was protected): B is
1340            // the anchor.  A high count for an anchor signals a wrong GNSS
1341            // assignment that poisons subsequent triples.
1342            if target_idx == i {
1343                *cascade_as_anchor.entry(ne_a.clone()).or_insert(0) += 1;
1344            } else if target_idx == i + 1 {
1345                *cascade_as_protected.entry(ne_b.clone()).or_insert(0) += 1;
1346            }
1347
1348            let warn = format!(
1349                "Removed {} (directional violation: triple {}/{}/{})",
1350                target_ne, ne_a, ne_b, ne_c,
1351            );
1352            warnings.push(warn.clone());
1353            decisions.push(SanityDecision {
1354                pair_index: *pair_index,
1355                from_netelement_id: ne_a,
1356                to_netelement_id: ne_c,
1357                reachable: false,
1358                action: "removed-direction-violation".to_string(),
1359                rerouted_via: vec![target_ne],
1360                warning: warn,
1361            });
1362            *pair_index += 1;
1363
1364            result.remove(target_idx);
1365            changed = true;
1366            break;
1367        }
1368    }
1369
1370    result
1371}
1372
1373/// Temporary grouping of consecutive Viterbi states on the same netelement.
1374struct NetelementGroup {
1375    netelement_id: String,
1376    min_intrinsic: f64,
1377    max_intrinsic: f64,
1378    first_pos_idx: usize,
1379    last_pos_idx: usize,
1380    count: usize,
1381}
1382
1383impl NetelementGroup {
1384    fn new(pos_idx: usize, cand: &CandidateNetElement) -> Self {
1385        Self {
1386            netelement_id: cand.netelement_id.clone(),
1387            min_intrinsic: cand.intrinsic_coordinate,
1388            max_intrinsic: cand.intrinsic_coordinate,
1389            first_pos_idx: pos_idx,
1390            last_pos_idx: pos_idx,
1391            count: 1,
1392        }
1393    }
1394
1395    fn update(&mut self, pos_idx: usize, cand: &CandidateNetElement) {
1396        self.min_intrinsic = self.min_intrinsic.min(cand.intrinsic_coordinate);
1397        self.max_intrinsic = self.max_intrinsic.max(cand.intrinsic_coordinate);
1398        self.last_pos_idx = pos_idx;
1399        self.count += 1;
1400    }
1401
1402    fn to_associated_net_element(&self) -> Result<AssociatedNetElement, ProjectionError> {
1403        AssociatedNetElement::new(
1404            self.netelement_id.clone(),
1405            1.0, // Probability will be recomputed downstream if needed.
1406            self.min_intrinsic,
1407            self.max_intrinsic,
1408            self.first_pos_idx,
1409            self.last_pos_idx,
1410        )
1411    }
1412}
1413
1414/// Insert bridge netelements between two consecutive observed groups
1415/// by recovering the Dijkstra shortest path from the cache.
1416#[allow(clippy::too_many_arguments)]
1417fn insert_bridges(
1418    prev: &NetelementGroup,
1419    next: &NetelementGroup,
1420    segments: &mut Vec<AssociatedNetElement>,
1421    _netelements: &[Netelement],
1422    netelement_index: &HashMap<String, usize>,
1423    graph: &DiGraph<NetelementSide, f64>,
1424    node_map: &HashMap<NetelementSide, NodeIndex>,
1425    cache: &mut ShortestPathCache,
1426) -> Result<(), ProjectionError> {
1427    // Check direct adjacency first (any side combination connects with cost 0).
1428    let from_sides = candidate_sides(&prev.netelement_id);
1429    let to_sides = candidate_sides(&next.netelement_id);
1430
1431    // Find shortest route to identify bridge NEs along the path.
1432    let mut best_route: Option<f64> = None;
1433    let mut best_from_side = 0u8;
1434    let mut best_to_side = 0u8;
1435    for from in &from_sides {
1436        for to in &to_sides {
1437            if let Some(d) = cached_shortest_path_distance(cache, graph, node_map, from, to) {
1438                if best_route.is_none() || d < best_route.unwrap() {
1439                    best_route = Some(d);
1440                    best_from_side = from.position;
1441                    best_to_side = to.position;
1442                }
1443            }
1444        }
1445    }
1446
1447    if best_route.is_none() {
1448        // Disconnected — nothing to bridge.
1449        return Ok(());
1450    }
1451
1452    // To recover intermediate netelements we would need the actual Dijkstra
1453    // path (not just the cost).  Since our current `shortest_path_distance`
1454    // only returns the cost, we do a lightweight BFS/Dijkstra trace here.
1455    // For now, if the route cost equals 0 (directly adjacent) we skip.
1456    // Otherwise we attempt to reconstruct intermediate NEs by running Dijkstra
1457    // and walking the predecessor map.
1458    let route_cost = best_route.unwrap();
1459    if route_cost < 1e-9 {
1460        // Directly adjacent — no bridges needed.
1461        return Ok(());
1462    }
1463
1464    // Run Dijkstra from the best from_side and trace predecessors to best to_side.
1465    let from_side = NetelementSide {
1466        netelement_id: prev.netelement_id.clone(),
1467        position: best_from_side,
1468    };
1469    let to_side = NetelementSide {
1470        netelement_id: next.netelement_id.clone(),
1471        position: best_to_side,
1472    };
1473
1474    let bridge_ne_ids = trace_intermediate_netelements(
1475        graph,
1476        node_map,
1477        &from_side,
1478        &to_side,
1479        &prev.netelement_id,
1480        &next.netelement_id,
1481    );
1482
1483    // Build a lookup from NE id to whether it exists in netelements.
1484    let gnss_idx = prev.last_pos_idx; // Bridge GNSS range = gap between groups.
1485    for ne_id in &bridge_ne_ids {
1486        if netelement_index.contains_key(ne_id) {
1487            segments.push(AssociatedNetElement::new(
1488                ne_id.clone(),
1489                1.0, // Bridge probability
1490                0.0, // Full intrinsic range
1491                1.0,
1492                gnss_idx,
1493                gnss_idx,
1494            )?);
1495        }
1496    }
1497
1498    Ok(())
1499}
1500
1501/// Fill gaps left in the path after sanity validation.
1502///
1503/// Walks the final path segments and checks each consecutive pair for direct
1504/// topological connectivity.  When a gap is found (no direct edge), a Dijkstra
1505/// route is attempted and intermediate bridge netelements are inserted.
1506///
1507/// Before inserting bridges the function checks whether the route would create
1508/// a **U-turn** at the target segment (the last bridge NE, the target, and the
1509/// segment after the target form a directionally inconsistent triple).  When a
1510/// U-turn is detected the target segment is skipped entirely — its GNSS range
1511/// is absorbed by the predecessor — and the gap is re-evaluated against the
1512/// next segment in the path.
1513///
1514/// Returns the gap-filled path, any warnings, and structured gap-fill records.
1515pub fn fill_path_gaps(
1516    segments: Vec<AssociatedNetElement>,
1517    netelement_index: &HashMap<String, usize>,
1518    graph: &DiGraph<NetelementSide, f64>,
1519    node_map: &HashMap<NetelementSide, NodeIndex>,
1520    cache: &mut ShortestPathCache,
1521) -> (Vec<AssociatedNetElement>, Vec<String>, Vec<GapFill>) {
1522    if segments.len() < 2 {
1523        return (segments, vec![], vec![]);
1524    }
1525
1526    let mut result: Vec<AssociatedNetElement> = Vec::new();
1527    let mut warnings: Vec<String> = Vec::new();
1528    let mut gap_fills: Vec<GapFill> = Vec::new();
1529
1530    result.push(segments[0].clone());
1531
1532    // Cursor-based loop so we can skip U-turn segments and re-process
1533    // the gap from the same predecessor.
1534    let mut cursor = 1usize;
1535    let mut effective_prev = 0usize; // index in `segments` of the active predecessor
1536
1537    while cursor < segments.len() {
1538        let prev = &segments[effective_prev];
1539        let next = &segments[cursor];
1540
1541        // Same netelement — always connected.
1542        if prev.netelement_id == next.netelement_id {
1543            result.push(next.clone());
1544            effective_prev = cursor;
1545            cursor += 1;
1546            continue;
1547        }
1548
1549        // Check if any side combination gives a direct connection.
1550        let from_sides = candidate_sides(&prev.netelement_id);
1551        let to_sides = candidate_sides(&next.netelement_id);
1552
1553        let mut directly_connected = false;
1554        for from in &from_sides {
1555            for to in &to_sides {
1556                if has_direct_connection(graph, node_map, from, to) {
1557                    directly_connected = true;
1558                    break;
1559                }
1560            }
1561            if directly_connected {
1562                break;
1563            }
1564        }
1565
1566        if directly_connected {
1567            result.push(next.clone());
1568            effective_prev = cursor;
1569            cursor += 1;
1570            continue;
1571        }
1572
1573        // Not directly connected — try Dijkstra to find a route.
1574        let mut best_route_cost: Option<f64> = None;
1575        let mut best_from_side = 0u8;
1576        let mut best_to_side = 0u8;
1577        for from in &from_sides {
1578            for to in &to_sides {
1579                if let Some(d) = cached_shortest_path_distance(cache, graph, node_map, from, to) {
1580                    if best_route_cost.is_none() || d < best_route_cost.unwrap() {
1581                        best_route_cost = Some(d);
1582                        best_from_side = from.position;
1583                        best_to_side = to.position;
1584                    }
1585                }
1586            }
1587        }
1588
1589        if let Some(_cost) = best_route_cost {
1590            let from_side = NetelementSide {
1591                netelement_id: prev.netelement_id.clone(),
1592                position: best_from_side,
1593            };
1594            let to_side = NetelementSide {
1595                netelement_id: next.netelement_id.clone(),
1596                position: best_to_side,
1597            };
1598            let bridge_ne_ids = trace_intermediate_netelements(
1599                graph,
1600                node_map,
1601                &from_side,
1602                &to_side,
1603                &prev.netelement_id,
1604                &next.netelement_id,
1605            );
1606
1607            // ── U-turn detection ─────────────────────────────────────────
1608            // If the last bridge NE, the target, and the segment after the
1609            // target form a directionally inconsistent triple, the gap-fill
1610            // would force the path to enter `next` and immediately reverse.
1611            // Skip `next` and re-evaluate the gap from the same predecessor.
1612            if !bridge_ne_ids.is_empty() && cursor + 1 < segments.len() {
1613                let last_bridge = bridge_ne_ids.last().unwrap();
1614                let after_target = &segments[cursor + 1];
1615                if !triple_is_consistent(
1616                    last_bridge,
1617                    &next.netelement_id,
1618                    &after_target.netelement_id,
1619                    graph,
1620                    node_map,
1621                ) {
1622                    let warn = format!(
1623                        "Gap fill: removed {} (U-turn: bridge route via {} \
1624                         creates direction violation with successor {})",
1625                        next.netelement_id, last_bridge, after_target.netelement_id,
1626                    );
1627                    warnings.push(warn.clone());
1628                    gap_fills.push(GapFill {
1629                        pair_index: cursor - 1,
1630                        from_netelement_id: prev.netelement_id.clone(),
1631                        to_netelement_id: next.netelement_id.clone(),
1632                        route_found: false,
1633                        inserted_netelements: vec![],
1634                        warning: warn,
1635                    });
1636                    // Absorb the skipped segment's GNSS range.
1637                    if let Some(last) = result.last_mut() {
1638                        if last.gnss_end_index < next.gnss_end_index {
1639                            last.gnss_end_index = next.gnss_end_index;
1640                        }
1641                    }
1642                    // Re-process from the same predecessor.
1643                    cursor += 1;
1644                    continue;
1645                }
1646            }
1647
1648            if !bridge_ne_ids.is_empty() {
1649                let gnss_idx = result
1650                    .last()
1651                    .map_or(prev.gnss_end_index, |r| r.gnss_end_index);
1652                let mut inserted = Vec::new();
1653                for ne_id in &bridge_ne_ids {
1654                    if netelement_index.contains_key(ne_id) {
1655                        if let Ok(bridge) = AssociatedNetElement::new(
1656                            ne_id.clone(),
1657                            1.0,
1658                            0.0,
1659                            1.0,
1660                            gnss_idx,
1661                            gnss_idx,
1662                        ) {
1663                            result.push(bridge);
1664                            inserted.push(ne_id.clone());
1665                        }
1666                    }
1667                }
1668                let warn = format!(
1669                    "Gap fill: inserted {} bridge NE(s) between {} and {}: [{}]",
1670                    inserted.len(),
1671                    prev.netelement_id,
1672                    next.netelement_id,
1673                    inserted.join(", "),
1674                );
1675                warnings.push(warn.clone());
1676                gap_fills.push(GapFill {
1677                    pair_index: cursor - 1,
1678                    from_netelement_id: prev.netelement_id.clone(),
1679                    to_netelement_id: next.netelement_id.clone(),
1680                    route_found: true,
1681                    inserted_netelements: inserted,
1682                    warning: warn,
1683                });
1684            } else {
1685                // Dijkstra found a route but no intermediate NEs (adjacent via non-zero-weight edge).
1686                gap_fills.push(GapFill {
1687                    pair_index: cursor - 1,
1688                    from_netelement_id: prev.netelement_id.clone(),
1689                    to_netelement_id: next.netelement_id.clone(),
1690                    route_found: true,
1691                    inserted_netelements: vec![],
1692                    warning: String::new(),
1693                });
1694            }
1695        } else {
1696            let warn = format!(
1697                "Gap fill: no route found between {} and {}",
1698                prev.netelement_id, next.netelement_id,
1699            );
1700            warnings.push(warn.clone());
1701            gap_fills.push(GapFill {
1702                pair_index: cursor - 1,
1703                from_netelement_id: prev.netelement_id.clone(),
1704                to_netelement_id: next.netelement_id.clone(),
1705                route_found: false,
1706                inserted_netelements: vec![],
1707                warning: warn,
1708            });
1709        }
1710
1711        result.push(next.clone());
1712        effective_prev = cursor;
1713        cursor += 1;
1714    }
1715
1716    (result, warnings, gap_fills)
1717}
1718
1719/// Find intermediate netelement IDs along the direction-aware shortest path
1720/// between two netelement sides (excluding `from_ne_id` and `to_ne_id`).
1721fn trace_intermediate_netelements(
1722    graph: &DiGraph<NetelementSide, f64>,
1723    node_map: &HashMap<NetelementSide, NodeIndex>,
1724    from: &NetelementSide,
1725    to: &NetelementSide,
1726    from_ne_id: &str,
1727    to_ne_id: &str,
1728) -> Vec<String> {
1729    let Some(path_nodes) = shortest_path_route(graph, node_map, from, to) else {
1730        return vec![];
1731    };
1732
1733    // Extract unique netelement IDs, excluding from and to NEs.
1734    let mut ne_ids: Vec<String> = Vec::new();
1735    let mut seen = std::collections::HashSet::new();
1736    for nidx in &path_nodes {
1737        let ne_side = &graph[*nidx];
1738        if ne_side.netelement_id != from_ne_id
1739            && ne_side.netelement_id != to_ne_id
1740            && seen.insert(ne_side.netelement_id.clone())
1741        {
1742            ne_ids.push(ne_side.netelement_id.clone());
1743        }
1744    }
1745    ne_ids
1746}
1747
1748#[cfg(test)]
1749mod tests {
1750    use super::*;
1751    use geo::{LineString, Point};
1752
1753    fn make_ne(id: &str, coords: Vec<(f64, f64)>) -> Netelement {
1754        Netelement {
1755            id: id.to_string(),
1756            geometry: LineString::from(coords),
1757            crs: "EPSG:4326".to_string(),
1758        }
1759    }
1760
1761    fn make_cand(ne_id: &str, intrinsic: f64, lon: f64, lat: f64) -> CandidateNetElement {
1762        CandidateNetElement {
1763            netelement_id: ne_id.to_string(),
1764            distance_meters: 5.0,
1765            intrinsic_coordinate: intrinsic,
1766            projected_point: Point::new(lon, lat),
1767        }
1768    }
1769
1770    /// Simple 3-position × 2-candidate trellis where the optimal path is obvious.
1771    #[test]
1772    fn test_viterbi_simple_trellis() {
1773        // Two netelements: A (3.0,50.0)→(3.001,50.0) and B (3.001,50.0)→(3.002,50.0)
1774        let netelements = vec![
1775            make_ne("A", vec![(3.0, 50.0), (3.001, 50.0)]),
1776            make_ne("B", vec![(3.001, 50.0), (3.002, 50.0)]),
1777        ];
1778        let netelement_index: HashMap<String, usize> = [("A".to_string(), 0), ("B".to_string(), 1)]
1779            .into_iter()
1780            .collect();
1781
1782        // Netrelation: A(1) → B(0) forward
1783        use crate::models::NetRelation;
1784        let netrelations = vec![NetRelation::new(
1785            "NR1".to_string(),
1786            "A".to_string(),
1787            "B".to_string(),
1788            1,
1789            0,
1790            true,
1791            true,
1792        )
1793        .unwrap()];
1794
1795        let (graph, node_map) =
1796            crate::path::graph::build_topology_graph(&netelements, &netrelations).unwrap();
1797        let mut cache = ShortestPathCache::new();
1798
1799        let config = PathConfig::default();
1800
1801        // 3 positions: t0 on A, t1 on A, t2 on B
1802        let position_candidates = vec![
1803            vec![make_cand("A", 0.2, 3.0002, 50.0)],
1804            vec![make_cand("A", 0.8, 3.0008, 50.0)],
1805            vec![
1806                make_cand("A", 0.99, 3.00099, 50.0),
1807                make_cand("B", 0.1, 3.0011, 50.0),
1808            ],
1809        ];
1810
1811        // Emission probabilities (higher for the "correct" candidate)
1812        let position_probabilities = vec![
1813            vec![0.9],
1814            vec![0.85],
1815            vec![0.3, 0.9], // B is much more likely at t2
1816        ];
1817
1818        let result = viterbi_decode(
1819            &position_candidates,
1820            &position_probabilities,
1821            &netelements,
1822            &netelement_index,
1823            &graph,
1824            &node_map,
1825            &mut cache,
1826            &config,
1827        )
1828        .unwrap();
1829
1830        assert_eq!(result.subsequences.len(), 1);
1831        let seq = &result.subsequences[0];
1832        assert_eq!(seq.states.len(), 3);
1833        // t0: candidate 0 (A)
1834        assert_eq!(seq.states[0], (0, 0));
1835        // t1: candidate 0 (A)
1836        assert_eq!(seq.states[1], (1, 0));
1837        // t2: candidate 1 (B) — higher emission probability
1838        assert_eq!(seq.states[2], (2, 1));
1839    }
1840
1841    /// When no transitions are possible between disconnected netelements,
1842    /// the algorithm carries forward with a penalty instead of breaking,
1843    /// producing a single continuous subsequence.
1844    #[test]
1845    fn test_viterbi_no_break_on_disconnected() {
1846        // Two disconnected netelements
1847        let netelements = vec![
1848            make_ne("A", vec![(3.0, 50.0), (3.001, 50.0)]),
1849            make_ne("B", vec![(4.0, 51.0), (4.001, 51.0)]),
1850        ];
1851        let netelement_index: HashMap<String, usize> = [("A".to_string(), 0), ("B".to_string(), 1)]
1852            .into_iter()
1853            .collect();
1854
1855        let (graph, node_map) =
1856            crate::path::graph::build_topology_graph(&netelements, &[]).unwrap();
1857        let mut cache = ShortestPathCache::new();
1858        let config = PathConfig::default();
1859
1860        // t0 on A, t1 on B (disconnected → penalty carry-forward, no break)
1861        let position_candidates = vec![
1862            vec![make_cand("A", 0.5, 3.0005, 50.0)],
1863            vec![make_cand("B", 0.5, 4.0005, 51.0)],
1864        ];
1865        let position_probabilities = vec![vec![0.9], vec![0.9]];
1866
1867        let result = viterbi_decode(
1868            &position_candidates,
1869            &position_probabilities,
1870            &netelements,
1871            &netelement_index,
1872            &graph,
1873            &node_map,
1874            &mut cache,
1875            &config,
1876        )
1877        .unwrap();
1878
1879        // Should produce one continuous subsequence (no break)
1880        assert_eq!(result.subsequences.len(), 1);
1881        assert_eq!(result.subsequences[0].states.len(), 2);
1882        // t0: A, t1: B (carried forward with penalty)
1883        assert_eq!(
1884            position_candidates[result.subsequences[0].states[0].0]
1885                [result.subsequences[0].states[0].1]
1886                .netelement_id,
1887            "A"
1888        );
1889        assert_eq!(
1890            position_candidates[result.subsequences[0].states[1].0]
1891                [result.subsequences[0].states[1].1]
1892                .netelement_id,
1893            "B"
1894        );
1895    }
1896
1897    #[test]
1898    fn test_viterbi_empty_input() {
1899        let netelements: Vec<Netelement> = vec![];
1900        let netelement_index = HashMap::new();
1901        let graph = DiGraph::new();
1902        let node_map = HashMap::new();
1903        let mut cache = ShortestPathCache::new();
1904        let config = PathConfig::default();
1905
1906        let result = viterbi_decode(
1907            &[],
1908            &[],
1909            &netelements,
1910            &netelement_index,
1911            &graph,
1912            &node_map,
1913            &mut cache,
1914            &config,
1915        )
1916        .unwrap();
1917
1918        assert!(result.subsequences.is_empty());
1919    }
1920
1921    #[test]
1922    fn test_safe_ln() {
1923        assert_eq!(safe_ln(0.0), f64::NEG_INFINITY);
1924        assert_eq!(safe_ln(-1.0), f64::NEG_INFINITY);
1925        assert!((safe_ln(1.0) - 0.0).abs() < 1e-12);
1926        assert!((safe_ln(std::f64::consts::E) - 1.0).abs() < 1e-12);
1927    }
1928}