diff --git a/src-tauri/src/ocr.rs b/src-tauri/src/ocr.rs index ed703ef..d6cf159 100644 --- a/src-tauri/src/ocr.rs +++ b/src-tauri/src/ocr.rs @@ -96,7 +96,7 @@ pub fn run_ocr_detection( let map_h = shape[2] as u32; // Create binary map (threshold 0.3) - let threshold = 0.2; // Lower threshold to catch more text + let threshold = 0.3; let mut binary_map = vec![false; (map_w * map_h) as usize]; for i in 0..binary_map.len() { @@ -149,16 +149,37 @@ pub fn run_ocr_detection( // Filter small noise if pixel_count < 10 { continue; } - // Scale back to original + // Calculate Scale Factors let scale_x = orig_w as f64 / resize_w as f64; let scale_y = orig_h as f64 / resize_h as f64; - // Removed brightness check to allow detection of any text detected by DBNet + // Map to raw coordinates in map space + let raw_w = (max_x - min_x + 1) as f64; + let raw_h = (max_y - min_y + 1) as f64; + + // --- ASPECT RATIO FILTERING --- + // Watermarks are typically horizontal text lines. + // A cross or vertical pillar will have a small width/height ratio. + let aspect_ratio = raw_w / raw_h; + if aspect_ratio < 1.5 { + continue; // Skip vertical or square-ish non-text objects + } + + // --- PADDING / DILATION --- + let pad_x = raw_w * 0.15; // 15% horizontal is usually enough + let pad_y = raw_h * 1.00; // Increased to 100% for aggressive vertical coverage + + let box_x = (min_x as f64 - pad_x).max(0.0); + let box_y = (min_y as f64 - pad_y).max(0.0); + let box_w = raw_w + 2.0 * pad_x; + let box_h = raw_h + 2.0 * pad_y; + + // Convert to Normalized Image Coordinates [0, 1] boxes.push(DetectedBox { - x: min_x as f64 * scale_x / orig_w as f64, - y: min_y as f64 * scale_y / orig_h as f64, - width: (max_x - min_x + 1) as f64 * scale_x / orig_w as f64, - height: (max_y - min_y + 1) as f64 * scale_y / orig_h as f64, + x: (box_x * scale_x) / orig_w as f64, + y: (box_y * scale_y) / orig_h as f64, + width: (box_w * scale_x) / orig_w as f64, + height: (box_h * scale_y) / orig_h as f64, }); } }