anti-prestige-tool/reforger_queue/ocr.py

import json
import os
import sys

from .config import AUTO_FONT_CANDIDATES
from .magick import image_size, magick_fonts, ppm_from_magick, try_ppm_from_magick


def orange_digit_mask(width, height, raw):
    mask = [[False] * width for _ in range(height)]
    for y in range(height):
        row_offset = y * width * 3
        for x in range(width):
            offset = row_offset + x * 3
            r, g, b = raw[offset], raw[offset + 1], raw[offset + 2]
            mask[y][x] = r >= 115 and 55 <= g <= 205 and b <= 100 and r - g >= 20
    return mask


def white_mask(width, height, raw):
    mask = [[False] * width for _ in range(height)]
    for y in range(height):
        row_offset = y * width * 3
        for x in range(width):
            offset = row_offset + x * 3
            r, g, b = raw[offset], raw[offset + 1], raw[offset + 2]
            mask[y][x] = r >= 120 and g >= 120 and b >= 120
    return mask


def connected_components(mask):
    height = len(mask)
    width = len(mask[0]) if height else 0
    seen = [[False] * width for _ in range(height)]
    components = []

    for y in range(height):
        for x in range(width):
            if seen[y][x] or not mask[y][x]:
                continue

            stack = [(x, y)]
            seen[y][x] = True
            points = set()

            while stack:
                cx, cy = stack.pop()
                points.add((cx, cy))
                for ny in range(cy - 1, cy + 2):
                    for nx in range(cx - 1, cx + 2):
                        if nx < 0 or ny < 0 or nx >= width or ny >= height:
                            continue
                        if seen[ny][nx] or not mask[ny][nx]:
                            continue
                        seen[ny][nx] = True
                        stack.append((nx, ny))

            xs = [point[0] for point in points]
            ys = [point[1] for point in points]
            bbox = (min(xs), min(ys), max(xs), max(ys))
            box_width = bbox[2] - bbox[0] + 1
            box_height = bbox[3] - bbox[1] + 1

            if len(points) >= 40 and box_width >= 5 and box_height >= 15:
                components.append({"points": points, "bbox": bbox, "area": len(points)})

    components.sort(key=lambda component: component["bbox"][0])
    return components


def normalize(component, width=24, height=36):
    x1, y1, x2, y2 = component["bbox"]
    source_width = x2 - x1 + 1
    source_height = y2 - y1 + 1
    points = component["points"]
    output = []

    for y in range(height):
        source_y = y1 + int((y + 0.5) * source_height / height)
        row = []
        for x in range(width):
            source_x = x1 + int((x + 0.5) * source_width / width)
            row.append((source_x, source_y) in points)
        output.append(row)

    return output


def template_distance(left, right):
    total = len(left) * len(left[0])
    differences = 0
    intersection = 0
    union = 0

    for left_row, right_row in zip(left, right):
        for left_value, right_value in zip(left_row, right_row):
            differences += left_value != right_value
            intersection += left_value and right_value
            union += left_value or right_value

    hamming = differences / total
    iou = intersection / union if union else 0.0
    return hamming, iou


def template_font_candidates(font):
    if font != "auto":
        return [font]

    available = magick_fonts()
    candidates = [candidate for candidate in AUTO_FONT_CANDIDATES if candidate in available]
    return [*candidates, None]


def build_template_args(font, pointsize):
    args = ["-background", "black", "-fill", "white"]
    if font:
        args.extend(["-font", font])
    args.extend(["-pointsize", str(pointsize), "label:0123456789"])
    return args


def build_templates(font, pointsize):
    errors = []
    for candidate in template_font_candidates(font):
        ppm, error = try_ppm_from_magick(build_template_args(candidate, pointsize))
        if ppm is None:
            errors.append(f"{candidate or 'ImageMagick default'}: {error}")
            continue

        width, height, raw = ppm
        components = connected_components(white_mask(width, height, raw))
        if len(components) < 10:
            errors.append(f"{candidate or 'ImageMagick default'}: rendered only {len(components)} digit components")
            continue
        return {str(index): [normalize(component)] for index, component in enumerate(components[:10])}

    if font == "auto":
        sys.exit("could not render digit templates with any available ImageMagick font:\n  " + "\n  ".join(errors))
    sys.exit(
        f"could not render digit templates with font {font!r}. "
        "Install that font, use --font auto, or pass another ImageMagick font name."
    )


def validate_template_grid(grid, digit, sample_index, path):
    if not isinstance(grid, list) or len(grid) != 36:
        sys.exit(f"invalid template grid for digit {digit} sample {sample_index} in {path}: expected 36 rows")
    for row in grid:
        if not isinstance(row, list) or len(row) != 24 or any(not isinstance(value, bool) for value in row):
            sys.exit(
                f"invalid template grid for digit {digit} sample {sample_index} in {path}: "
                "expected 24 boolean columns per row"
            )


def load_template_set(path):
    try:
        with open(path, "r", encoding="utf-8") as handle:
            payload = json.load(handle)
    except FileNotFoundError:
        sys.exit(f"missing digit template set {path}; regenerate it from the regression dataset")
    except json.JSONDecodeError as exc:
        sys.exit(f"failed to parse digit template set {path}: {exc}")

    digits = payload.get("digits") if isinstance(payload, dict) else None
    if not isinstance(digits, dict):
        sys.exit(f"invalid digit template set {path}: missing digits object")

    templates = {}
    for digit in "0123456789":
        samples = digits.get(digit)
        if not isinstance(samples, list) or not samples:
            sys.exit(f"invalid digit template set {path}: missing samples for digit {digit}")
        templates[digit] = []
        for index, grid in enumerate(samples):
            validate_template_grid(grid, digit, index, path)
            templates[digit].append(grid)

    return templates


def templates_from_dataset(paths, reference_crop, reference_size, scale_mode, cropped):
    templates = {str(digit): [] for digit in range(10)}
    for path in paths:
        expected = os.path.splitext(os.path.basename(path))[0]
        if not expected.isdigit():
            sys.exit(f"template source filename must be numeric: {path}")

        crop = resolve_crop(path, None, reference_crop, reference_size, scale_mode, cropped)
        width, height, raw = load_queue_image(path, crop, cropped)
        components = connected_components(orange_digit_mask(width, height, raw))
        if len(components) != len(expected):
            sys.exit(
                f"template source {path} produced {len(components)} digit components, "
                f"but filename expects {len(expected)}"
            )

        for digit, component in zip(expected, components):
            templates[digit].append(normalize(component))

    missing = [digit for digit, samples in templates.items() if not samples]
    if missing:
        sys.exit(f"template dataset has no samples for digits: {', '.join(missing)}")
    return templates


def write_template_set(output, templates, source_paths):
    payload = {
        "version": 1,
        "normalize_size": [24, 36],
        "source": "regression digit crops",
        "source_files": [os.path.basename(path) for path in source_paths],
        "digits": templates,
    }
    directory = os.path.dirname(output)
    if directory:
        os.makedirs(directory, exist_ok=True)
    temp_path = f"{output}.tmp"
    with open(temp_path, "w", encoding="utf-8") as handle:
        json.dump(payload, handle, separators=(",", ":"))
        handle.write("\n")
    os.replace(temp_path, output)


def resolve_templates(template_set, font, pointsize):
    if font:
        return build_templates(font, pointsize)
    return load_template_set(template_set)


def classify(component, templates):
    sample = normalize(component)
    ranked = []
    for digit, digit_templates in templates.items():
        for template in digit_templates:
            hamming, iou = template_distance(sample, template)
            ranked.append((hamming, -iou, digit, iou))
    ranked.sort()
    hamming, negative_iou, digit, iou = ranked[0]
    return digit, hamming, iou


def load_queue_image(image_path, crop, already_cropped=False):
    if already_cropped:
        return ppm_from_magick([image_path, "+repage"])

    x, y, width, height = crop
    return ppm_from_magick([image_path, "+repage", "-crop", f"{width}x{height}+{x}+{y}", "+repage"])


def read_queue_number(image_path, crop, template_set, font, pointsize, already_cropped=False):
    width, height, raw = load_queue_image(image_path, crop, already_cropped)
    components = connected_components(orange_digit_mask(width, height, raw))
    templates = resolve_templates(template_set, font, pointsize)

    digits = []
    details = []
    for component in components:
        digit, hamming, iou = classify(component, templates)
        digits.append(digit)
        details.append((digit, component["bbox"], component["area"], hamming, iou))

    return "".join(digits), details


def scale_crop(reference_crop, reference_size, target_size, scale_mode):
    reference_width, reference_height = reference_size
    target_width, target_height = target_size
    x, y, width, height = reference_crop

    if scale_mode == "width":
        x_scale = target_width / reference_width
        y_scale = x_scale
    elif scale_mode == "independent":
        x_scale = target_width / reference_width
        y_scale = target_height / reference_height
    else:
        raise ValueError(f"unsupported scale mode: {scale_mode}")

    return (
        round(x * x_scale),
        round(y * y_scale),
        max(1, round(width * x_scale)),
        max(1, round(height * y_scale)),
    )


def resolve_crop(image_path, explicit_crop, reference_crop, reference_size, scale_mode, already_cropped):
    if already_cropped:
        return None
    if explicit_crop:
        return explicit_crop
    return scale_crop(reference_crop, reference_size, image_size(image_path), scale_mode)