#!/usr/bin/env python3 import json import subprocess import sys from pathlib import Path import lvsfunc as lvf import typer import vapoursynth as vs import vsutil core = vs.core blackthr = 20 whitethr = 200 alphabet = (r" -!\"#%&'()*+,./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_" "abcdefghijklmnopqrstuvwxyz{|}©«®°»ÀÂÆÇÈÉÊËÎÏÔÙÛÜàâäæçèéêëîïñôùûüÿŒœŸ‘’“”$£€fifl—…") def invert(clip): return core.std.Expr([clip], "x 255 - abs") def diff_mask(hardsub, softsub): diff_size = hardsub.width // 4, hardsub.height // 4 hy, hcb, hcr = vsutil.split(hardsub.resize.Point(*diff_size, format=vs.YUV444P8)) sy, scb, scr = vsutil.split(softsub.resize.Point(*diff_size, format=vs.YUV444P8)) diff_y = core.std.Expr([hy, sy], "x y - abs") diff_cb = core.std.Expr([hcb, scb], "x y - abs") diff_cr = core.std.Expr([hcr, scr], "x y - abs") diff = core.std.Expr([diff_y, diff_cb, diff_cr], "x y z + +") diff = diff.std.Binarize(40) diff = diff.resize.Point(hardsub.width, hardsub.height) return diff # mask subs def brightness_mask(hardsub): white_sub = core.std.Binarize(vsutil.get_y(hardsub), whitethr) black_sub = core.std.Binarize(vsutil.get_y(hardsub), blackthr) black_sub = invert(black_sub) white_expanded = vsutil.iterate(white_sub, core.std.Maximum, 6) white_lightly_expanded = vsutil.iterate(white_sub, core.std.Maximum, 2) matching_edges = core.std.Expr([white_expanded, black_sub], "x y and 255 *") matching_edges = matching_edges.resize.Point(format=vs.GRAY8) hyst_matching = core.misc.Hysteresis(matching_edges, white_lightly_expanded) hyst_matching = core.std.MaskedMerge(core.std.BlankClip(hyst_matching), hyst_matching, white_sub) return matching_edges, hyst_matching def filter_sub(hardsub, mask): white_sub = core.std.Binarize(vsutil.get_y(hardsub), whitethr) black = core.std.BlankClip(white_sub) hardsub_expr = core.std.MaskedMerge(black, white_sub, mask, first_plane=True) hardsub_expr = invert(hardsub_expr) return hardsub_expr def run_ocr(n, f, clip, ocr): if f.props['HardsubDiffAverage'] > 0.0001: # if f.props['_SceneChangePrev'] == 1 or f.props['_SceneChangeNext'] == 1 or n % 3 == 0: return ocr return clip def set_sub_diff(n, f): fout = f[0].copy() for k, v in f[1].props.items(): if "HardsubDiff" in k: fout.props[k] = v return fout def dump(n, f, ocr): if (f.props.get('OCRString') or f.props['_SceneChangePrev'] == 1 or f.props['_SceneChangeNext'] == 1): with open("ocr_dump.json", mode="a") as j: props = {**f.props, "n": n} if 'OCRString' in props: props['OCRString'] = props['OCRString'].decode() json.dump(props, j) j.write("\n") return ocr def read_dump(): data = [] try: with open("ocr_dump.json") as stream: data = [json.loads(line) for line in stream] except Exception as e: print(e, file=sys.stderr) to_list = lambda i: [i] if isinstance(i, int) else i bad_ocr = sorted([i["n"] for i in data if "OCRConfidence" in i and min(to_list(i["OCRConfidence"])) < 20]) def apply_ocr(clip): ocr_clip = core.ocr.Recognize(clip, language="eng", options=["enable_noise_removal", "false", "tessedit_char_whitelist", alphabet, "user_defined_dpi", "70"]) # ocr_clip = core.std.FrameEval(clip, functools.partial(run_ocr, clip=clip, ocr=ocr_clip), clip) # noqa ocr_clip = ocr_clip.text.FrameProps() return ocr_clip def main(hardsub_file, raw_file, comparison=False): src1 = lvf.src(hardsub_file) src1 = src1.resize.Spline36(width=1920, height=1080) # src1 = vsutil.depth(src1, 16) src1 = src1.std.AssumeFPS(fpsnum=24000, fpsden=1001) src2 = lvf.src(raw_file) # src2 = vsutil.depth(src2, 16) src2 = src2.std.AssumeFPS(fpsnum=24000, fpsden=1001) sub_diff = diff_mask(vsutil.get_y(src1), vsutil.get_y(src2)) sub_diff = sub_diff.std.PlaneStats(prop="HardsubDiff") sub_border_mask, sub_brightness = brightness_mask(src1) hardsub_expr = filter_sub(src1, sub_diff) hardsub_expr = hardsub_expr.std.PlaneStats() hardsub_expr = core.std.ModifyFrame(hardsub_expr, [hardsub_expr, sub_diff], set_sub_diff) hardsub_expr = core.misc.SCDetect(hardsub_expr, 0.0005) ocr = apply_ocr(hardsub_expr) brightness_ocr = apply_ocr(sub_brightness) dumped_ocr = core.std.FrameEval(ocr, dump, ocr) src1 = src1.std.PlaneStats() src2 = src2.std.PlaneStats() if comparison: final = lvf.comparison.stack_vertical( lvf.comparison.stack_horizontal( sub_diff.resize.Point(format=src1.format), brightness_ocr.resize.Point(format=src1.format), src2.text.FrameProps() ), lvf.comparison.stack_horizontal( ocr.resize.Point(format=src1.format), vsutil.get_y(src1).std.Binarize(whitethr).resize.Point(format=src1.format), src1.text.FrameProps() ) ) final = final.text.FrameNum(9) final = final.text.FrameProps() return final else: return dumped_ocr app = typer.Typer() @app.command() def pipe(hardsub_file: Path, raw_file: Path): final = main(hardsub_file, raw_file, comparison=True) final.output(sys.stdout, y4m=True) @app.command() def mpv(hardsub_file: Path, raw_file: Path): final = main(hardsub_file, raw_file, comparison=True) mpv_proc = subprocess.Popen(["mpv", "-"], stdin=subprocess.PIPE) final.output(mpv_proc.stdin, y4m=True) mpv_proc.communicate() @app.command(name="dump") def dump_ocr(): pass if __name__ == "__main__": app()