From 9e1c5fbccdea38f6b59aced5655a86765b18f260 Mon Sep 17 00:00:00 2001 From: odrling Date: Wed, 28 Dec 2022 18:29:13 +0100 Subject: [PATCH] initial commit --- .gitignore | 6 ++ ocr.py | 216 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 + 3 files changed, 225 insertions(+) create mode 100644 .gitignore create mode 100755 ocr.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..75b6656 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.lwi +*.mkv +*.mp4 +.envrc +.venv/ +__pycache__/ diff --git a/ocr.py b/ocr.py new file mode 100755 index 0000000..47d06c0 --- /dev/null +++ b/ocr.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 + +import json +import subprocess +import sys +from pathlib import Path + +import lvsfunc as lvf +import typer +import vapoursynth as vs +import vsutil + +core = vs.core + + +blackthr = 20 +whitethr = 200 + +alphabet = (r" -!\"#%&'()*+,./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_" + "abcdefghijklmnopqrstuvwxyz{|}©«®°»ÀÂÆÇÈÉÊËÎÏÔÙÛÜàâäæçèéêëîïñôùûüÿŒœŸ‘’“”$£€fifl—…") + + +def invert(clip): + return core.std.Expr([clip], "x 255 - abs") + + +def diff_mask(hardsub, softsub): + diff_size = hardsub.width // 4, hardsub.height // 4 + hy, hcb, hcr = vsutil.split(hardsub.resize.Point(*diff_size, format=vs.YUV444P8)) + sy, scb, scr = vsutil.split(softsub.resize.Point(*diff_size, format=vs.YUV444P8)) + diff_y = core.std.Expr([hy, sy], "x y - abs") + diff_cb = core.std.Expr([hcb, scb], "x y - abs") + diff_cr = core.std.Expr([hcr, scr], "x y - abs") + + diff = core.std.Expr([diff_y, diff_cb, diff_cr], "x y z + +") + diff = diff.std.Binarize(40) + + diff = diff.resize.Point(hardsub.width, hardsub.height) + return diff + + +# mask subs +def brightness_mask(hardsub): + white_sub = core.std.Binarize(vsutil.get_y(hardsub), whitethr) + black_sub = core.std.Binarize(vsutil.get_y(hardsub), blackthr) + black_sub = invert(black_sub) + white_expanded = vsutil.iterate(white_sub, core.std.Maximum, 6) + white_lightly_expanded = vsutil.iterate(white_sub, core.std.Maximum, 2) + + matching_edges = core.std.Expr([white_expanded, black_sub], "x y and 255 *") + matching_edges = matching_edges.resize.Point(format=vs.GRAY8) + + hyst_matching = core.misc.Hysteresis(matching_edges, white_lightly_expanded) + hyst_matching = core.std.MaskedMerge(core.std.BlankClip(hyst_matching), + hyst_matching, white_sub) + return matching_edges, hyst_matching + + +def filter_sub(hardsub, mask): + white_sub = core.std.Binarize(vsutil.get_y(hardsub), whitethr) + black = core.std.BlankClip(white_sub) + hardsub_expr = core.std.MaskedMerge(black, white_sub, mask, first_plane=True) + hardsub_expr = invert(hardsub_expr) + return hardsub_expr + + +def run_ocr(n, f, clip, ocr): + if f.props['HardsubDiffAverage'] > 0.0001: + # if f.props['_SceneChangePrev'] == 1 or f.props['_SceneChangeNext'] == 1 or n % 3 == 0: + return ocr + + return clip + + +def set_sub_diff(n, f): + fout = f[0].copy() + for k, v in f[1].props.items(): + if "HardsubDiff" in k: + fout.props[k] = v + + return fout + + +def fill_text(clip): + out_clip = core.std.BlankClip(clip) + fill_val = 255 + + for n, frame in enumerate(clip.frames()): + fcopy = frame.copy() + print(n, dir(fcopy[0]), file=sys.stderr) + plane = fcopy[0] + w, h = plane.shape + + for x in range(w): + fill_next = False + last_v = 0 + for y in range(h): + v = plane[x, y] + if v and v != last_v: + fill_next = not fill_next + else: + plane[x, y] = fill_val if fill_next else 0 + + last_v = v + + out_clip[n][0] = plane + + return out_clip + + +def dump(n, f): + if (f.props.get('OCRString') or f.props['_SceneChangePrev'] == 1 + or f.props['_SceneChangeNext'] == 1): + with open("ocr_dump.json", mode="a") as j: + props = {**f.props, "n": n} + if 'OCRString' in props: + props['OCRString'] = props['OCRString'].decode() + json.dump(props, j) + j.write("\n") + + return ocr + + +def read_dump(): + data = [] + + try: + with open("ocr_dump.json") as stream: + data = [json.loads(line) for line in stream] + except Exception as e: + print(e, file=sys.stderr) + + to_list = lambda i: [i] if isinstance(i, int) else i + + bad_ocr = sorted([i["n"] for i in data + if "OCRConfidence" in i and min(to_list(i["OCRConfidence"])) < 20]) + + +def apply_ocr(clip): + ocr_clip = core.ocr.Recognize(clip, language="eng", + options=["enable_noise_removal", "false", + "tessedit_char_whitelist", alphabet, + "user_defined_dpi", "70"]) + # ocr_clip = core.std.FrameEval(clip, functools.partial(run_ocr, clip=clip, ocr=ocr_clip), clip) + ocr_clip = ocr_clip.text.FrameProps() + return ocr_clip + + +def main(hardsub_file, raw_file, comparison=False): + src1 = lvf.src(hardsub_file) + src1 = src1.resize.Spline36(width=1920, height=1080) + # src1 = vsutil.depth(src1, 16) + src1 = src1.std.AssumeFPS(fpsnum=24000, fpsden=1001) + src2 = lvf.src(raw_file) + # src2 = vsutil.depth(src2, 16) + src2 = src2.std.AssumeFPS(fpsnum=24000, fpsden=1001) + + sub_diff = diff_mask(vsutil.get_y(src1), vsutil.get_y(src2)) + sub_diff = sub_diff.std.PlaneStats(prop="HardsubDiff") + + sub_border_mask, sub_brightness = brightness_mask(src1) + + hardsub_expr = filter_sub(src1, sub_diff) + hardsub_expr = hardsub_expr.std.PlaneStats() + hardsub_expr = core.std.ModifyFrame(hardsub_expr, [hardsub_expr, sub_diff], set_sub_diff) + hardsub_expr = core.misc.SCDetect(hardsub_expr, 0.0005) + + ocr = apply_ocr(hardsub_expr) + brightness_ocr = apply_ocr(sub_brightness) + + dumped_ocr = core.std.FrameEval(ocr, dump, ocr) + + src1 = src1.std.PlaneStats() + src2 = src2.std.PlaneStats() + + if comparison: + final = lvf.comparison.stack_vertical( + lvf.comparison.stack_horizontal(sub_diff.resize.Point(format=src1.format), + brightness_ocr.resize.Point(format=src1.format), + src2.text.FrameProps()), + lvf.comparison.stack_horizontal(ocr.resize.Point(format=src1.format), + vsutil.get_y(src1).std.Binarize(whitethr).resize.Point(format=src1.format), + src1.text.FrameProps()) + ) + final = final.text.FrameNum(9) + final = final.text.FrameProps() + + return final + else: + return dumped_ocr + + +app = typer.Typer() + + +@app.command() +def pipe(hardsub_file: Path, raw_file: Path): + final = main(hardsub_file, raw_file, comparison=True) + final.output(sys.stdout, y4m=True) + + +@app.command() +def mpv(hardsub_file: Path, raw_file: Path): + final = main(hardsub_file, raw_file, comparison=True) + mpv_proc = subprocess.Popen(["mpv", "-"], stdin=subprocess.PIPE) + final.output(mpv_proc.stdin, y4m=True) + mpv_proc.communicate() + + +@app.command(name="dump") +def dump_ocr(): + pass + + +if __name__ == "__main__": + app() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..38c4fc0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +lvsfunc +typer +vsutil