ocr cover fix

This commit is contained in:
Julian Freeman
2026-01-19 12:58:05 -04:00
parent eb251b5eac
commit 6cb5b7a61f

View File

@@ -96,7 +96,7 @@ pub fn run_ocr_detection(
let map_h = shape[2] as u32;
// Create binary map (threshold 0.3)
let threshold = 0.2; // Lower threshold to catch more text
let threshold = 0.3;
let mut binary_map = vec![false; (map_w * map_h) as usize];
for i in 0..binary_map.len() {
@@ -149,16 +149,37 @@ pub fn run_ocr_detection(
// Filter small noise
if pixel_count < 10 { continue; }
// Scale back to original
// Calculate Scale Factors
let scale_x = orig_w as f64 / resize_w as f64;
let scale_y = orig_h as f64 / resize_h as f64;
// Removed brightness check to allow detection of any text detected by DBNet
// Map to raw coordinates in map space
let raw_w = (max_x - min_x + 1) as f64;
let raw_h = (max_y - min_y + 1) as f64;
// --- ASPECT RATIO FILTERING ---
// Watermarks are typically horizontal text lines.
// A cross or vertical pillar will have a small width/height ratio.
let aspect_ratio = raw_w / raw_h;
if aspect_ratio < 1.5 {
continue; // Skip vertical or square-ish non-text objects
}
// --- PADDING / DILATION ---
let pad_x = raw_w * 0.15; // 15% horizontal is usually enough
let pad_y = raw_h * 1.00; // Increased to 100% for aggressive vertical coverage
let box_x = (min_x as f64 - pad_x).max(0.0);
let box_y = (min_y as f64 - pad_y).max(0.0);
let box_w = raw_w + 2.0 * pad_x;
let box_h = raw_h + 2.0 * pad_y;
// Convert to Normalized Image Coordinates [0, 1]
boxes.push(DetectedBox {
x: min_x as f64 * scale_x / orig_w as f64,
y: min_y as f64 * scale_y / orig_h as f64,
width: (max_x - min_x + 1) as f64 * scale_x / orig_w as f64,
height: (max_y - min_y + 1) as f64 * scale_y / orig_h as f64,
x: (box_x * scale_x) / orig_w as f64,
y: (box_y * scale_y) / orig_h as f64,
width: (box_w * scale_x) / orig_w as f64,
height: (box_h * scale_y) / orig_h as f64,
});
}
}