ocr cover fix

2026-01-19 12:58:05 -04:00
parent eb251b5eac
commit 6cb5b7a61f
1 changed files with 28 additions and 7 deletions
--- a/src-tauri/src/ocr.rs
+++ b/src-tauri/src/ocr.rs
@@ -96,7 +96,7 @@ pub fn run_ocr_detection(
    let map_h = shape[2] as u32;
    
    // Create binary map (threshold 0.3)
-    let threshold = 0.2; // Lower threshold to catch more text
+    let threshold = 0.3; 
    let mut binary_map = vec![false; (map_w * map_h) as usize];
    
    for i in 0..binary_map.len() {
@@ -149,16 +149,37 @@ pub fn run_ocr_detection(
                // Filter small noise
                if pixel_count < 10 { continue; }
                
-                // Scale back to original
+                // Calculate Scale Factors
                let scale_x = orig_w as f64 / resize_w as f64;
                let scale_y = orig_h as f64 / resize_h as f64;
                
-                // Removed brightness check to allow detection of any text detected by DBNet
+                // Map to raw coordinates in map space
+                let raw_w = (max_x - min_x + 1) as f64;
+                let raw_h = (max_y - min_y + 1) as f64;
+                
+                // --- ASPECT RATIO FILTERING ---
+                // Watermarks are typically horizontal text lines. 
+                // A cross or vertical pillar will have a small width/height ratio.
+                let aspect_ratio = raw_w / raw_h;
+                if aspect_ratio < 1.5 {
+                    continue; // Skip vertical or square-ish non-text objects
+                }
+
+                // --- PADDING / DILATION ---
+                let pad_x = raw_w * 0.15; // 15% horizontal is usually enough
+                let pad_y = raw_h * 1.00; // Increased to 100% for aggressive vertical coverage
+                
+                let box_x = (min_x as f64 - pad_x).max(0.0);
+                let box_y = (min_y as f64 - pad_y).max(0.0);
+                let box_w = raw_w + 2.0 * pad_x;
+                let box_h = raw_h + 2.0 * pad_y;
+
+                // Convert to Normalized Image Coordinates [0, 1]
                boxes.push(DetectedBox {
-                    x: min_x as f64 * scale_x / orig_w as f64,
-                    y: min_y as f64 * scale_y / orig_h as f64,
-                    width: (max_x - min_x + 1) as f64 * scale_x / orig_w as f64,
-                    height: (max_y - min_y + 1) as f64 * scale_y / orig_h as f64,
+                    x: (box_x * scale_x) / orig_w as f64,
+                    y: (box_y * scale_y) / orig_h as f64,
+                    width: (box_w * scale_x) / orig_w as f64,
+                    height: (box_h * scale_y) / orig_h as f64,
                });
            }
        }