initial commit

This commit is contained in:
odrling 2022-12-28 18:29:13 +01:00
commit 9e1c5fbccd
No known key found for this signature in database
GPG Key ID: A0145F975F9F8B75
3 changed files with 225 additions and 0 deletions

6
.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
*.lwi
*.mkv
*.mp4
.envrc
.venv/
__pycache__/

216
ocr.py Executable file
View File

@ -0,0 +1,216 @@
#!/usr/bin/env python3
import json
import subprocess
import sys
from pathlib import Path
import lvsfunc as lvf
import typer
import vapoursynth as vs
import vsutil
core = vs.core
blackthr = 20
whitethr = 200
alphabet = (r" -!\"#%&'()*+,./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_"
"abcdefghijklmnopqrstuvwxyz{|}©«®°»ÀÂÆÇÈÉÊËÎÏÔÙÛÜàâäæçèéêëîïñôùûüÿŒœŸ‘’“”$£€fifl—…")
def invert(clip):
return core.std.Expr([clip], "x 255 - abs")
def diff_mask(hardsub, softsub):
diff_size = hardsub.width // 4, hardsub.height // 4
hy, hcb, hcr = vsutil.split(hardsub.resize.Point(*diff_size, format=vs.YUV444P8))
sy, scb, scr = vsutil.split(softsub.resize.Point(*diff_size, format=vs.YUV444P8))
diff_y = core.std.Expr([hy, sy], "x y - abs")
diff_cb = core.std.Expr([hcb, scb], "x y - abs")
diff_cr = core.std.Expr([hcr, scr], "x y - abs")
diff = core.std.Expr([diff_y, diff_cb, diff_cr], "x y z + +")
diff = diff.std.Binarize(40)
diff = diff.resize.Point(hardsub.width, hardsub.height)
return diff
# mask subs
def brightness_mask(hardsub):
white_sub = core.std.Binarize(vsutil.get_y(hardsub), whitethr)
black_sub = core.std.Binarize(vsutil.get_y(hardsub), blackthr)
black_sub = invert(black_sub)
white_expanded = vsutil.iterate(white_sub, core.std.Maximum, 6)
white_lightly_expanded = vsutil.iterate(white_sub, core.std.Maximum, 2)
matching_edges = core.std.Expr([white_expanded, black_sub], "x y and 255 *")
matching_edges = matching_edges.resize.Point(format=vs.GRAY8)
hyst_matching = core.misc.Hysteresis(matching_edges, white_lightly_expanded)
hyst_matching = core.std.MaskedMerge(core.std.BlankClip(hyst_matching),
hyst_matching, white_sub)
return matching_edges, hyst_matching
def filter_sub(hardsub, mask):
white_sub = core.std.Binarize(vsutil.get_y(hardsub), whitethr)
black = core.std.BlankClip(white_sub)
hardsub_expr = core.std.MaskedMerge(black, white_sub, mask, first_plane=True)
hardsub_expr = invert(hardsub_expr)
return hardsub_expr
def run_ocr(n, f, clip, ocr):
if f.props['HardsubDiffAverage'] > 0.0001:
# if f.props['_SceneChangePrev'] == 1 or f.props['_SceneChangeNext'] == 1 or n % 3 == 0:
return ocr
return clip
def set_sub_diff(n, f):
fout = f[0].copy()
for k, v in f[1].props.items():
if "HardsubDiff" in k:
fout.props[k] = v
return fout
def fill_text(clip):
out_clip = core.std.BlankClip(clip)
fill_val = 255
for n, frame in enumerate(clip.frames()):
fcopy = frame.copy()
print(n, dir(fcopy[0]), file=sys.stderr)
plane = fcopy[0]
w, h = plane.shape
for x in range(w):
fill_next = False
last_v = 0
for y in range(h):
v = plane[x, y]
if v and v != last_v:
fill_next = not fill_next
else:
plane[x, y] = fill_val if fill_next else 0
last_v = v
out_clip[n][0] = plane
return out_clip
def dump(n, f):
if (f.props.get('OCRString') or f.props['_SceneChangePrev'] == 1
or f.props['_SceneChangeNext'] == 1):
with open("ocr_dump.json", mode="a") as j:
props = {**f.props, "n": n}
if 'OCRString' in props:
props['OCRString'] = props['OCRString'].decode()
json.dump(props, j)
j.write("\n")
return ocr
def read_dump():
data = []
try:
with open("ocr_dump.json") as stream:
data = [json.loads(line) for line in stream]
except Exception as e:
print(e, file=sys.stderr)
to_list = lambda i: [i] if isinstance(i, int) else i
bad_ocr = sorted([i["n"] for i in data
if "OCRConfidence" in i and min(to_list(i["OCRConfidence"])) < 20])
def apply_ocr(clip):
ocr_clip = core.ocr.Recognize(clip, language="eng",
options=["enable_noise_removal", "false",
"tessedit_char_whitelist", alphabet,
"user_defined_dpi", "70"])
# ocr_clip = core.std.FrameEval(clip, functools.partial(run_ocr, clip=clip, ocr=ocr_clip), clip)
ocr_clip = ocr_clip.text.FrameProps()
return ocr_clip
def main(hardsub_file, raw_file, comparison=False):
src1 = lvf.src(hardsub_file)
src1 = src1.resize.Spline36(width=1920, height=1080)
# src1 = vsutil.depth(src1, 16)
src1 = src1.std.AssumeFPS(fpsnum=24000, fpsden=1001)
src2 = lvf.src(raw_file)
# src2 = vsutil.depth(src2, 16)
src2 = src2.std.AssumeFPS(fpsnum=24000, fpsden=1001)
sub_diff = diff_mask(vsutil.get_y(src1), vsutil.get_y(src2))
sub_diff = sub_diff.std.PlaneStats(prop="HardsubDiff")
sub_border_mask, sub_brightness = brightness_mask(src1)
hardsub_expr = filter_sub(src1, sub_diff)
hardsub_expr = hardsub_expr.std.PlaneStats()
hardsub_expr = core.std.ModifyFrame(hardsub_expr, [hardsub_expr, sub_diff], set_sub_diff)
hardsub_expr = core.misc.SCDetect(hardsub_expr, 0.0005)
ocr = apply_ocr(hardsub_expr)
brightness_ocr = apply_ocr(sub_brightness)
dumped_ocr = core.std.FrameEval(ocr, dump, ocr)
src1 = src1.std.PlaneStats()
src2 = src2.std.PlaneStats()
if comparison:
final = lvf.comparison.stack_vertical(
lvf.comparison.stack_horizontal(sub_diff.resize.Point(format=src1.format),
brightness_ocr.resize.Point(format=src1.format),
src2.text.FrameProps()),
lvf.comparison.stack_horizontal(ocr.resize.Point(format=src1.format),
vsutil.get_y(src1).std.Binarize(whitethr).resize.Point(format=src1.format),
src1.text.FrameProps())
)
final = final.text.FrameNum(9)
final = final.text.FrameProps()
return final
else:
return dumped_ocr
app = typer.Typer()
@app.command()
def pipe(hardsub_file: Path, raw_file: Path):
final = main(hardsub_file, raw_file, comparison=True)
final.output(sys.stdout, y4m=True)
@app.command()
def mpv(hardsub_file: Path, raw_file: Path):
final = main(hardsub_file, raw_file, comparison=True)
mpv_proc = subprocess.Popen(["mpv", "-"], stdin=subprocess.PIPE)
final.output(mpv_proc.stdin, y4m=True)
mpv_proc.communicate()
@app.command(name="dump")
def dump_ocr():
pass
if __name__ == "__main__":
app()

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
lvsfunc
typer
vsutil