dotfiles/.aegisub/automation/include/myaa/ASSParser.moon

388 lines
14 KiB
Plaintext

DependencyControl = require "l0.DependencyControl"
version = DependencyControl {
name: "ASSParser",
version: "0.0.4",
description: "Utility function for parsing ASS files",
author: "Myaamori",
url: "http://github.com/TypesettingTools/Myaamori-Aegisub-Scripts",
moduleName: "myaa.ASSParser",
feed: "https://raw.githubusercontent.com/TypesettingTools/Myaamori-Aegisub-Scripts/master/DependencyControl.json",
{
"aegisub.re", "aegisub.util",
{"l0.Functional", version: "0.6.0", url: "https://github.com/TypesettingTools/Functional",
feed: "https://raw.githubusercontent.com/TypesettingTools/Functional/master/DependencyControl.json"}
}
}
re, util, F = version\requireModules!
import lshift, rshift, band, bor from bit
parser = {}
STYLE_FORMAT_STRING = "Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, " ..
"OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, " ..
"Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, " ..
"MarginV, Encoding"
EVENT_FORMAT_STRING = "Layer, Start, End, Style, Name, MarginL, MarginR, " ..
"MarginV, Effect, Text"
DATA_FORMAT_STRING = "Id, Key, Value"
DIALOGUE_DEFAULTS =
actor: "", class: "dialogue", comment: false, effect: "",
start_time: 0, end_time: 0, layer: 0, margin_l: 0,
margin_r: 0, margin_t: 0, section: "[Events]", style: "Default",
text: "", extra: nil
STYLE_DEFAULTS =
class: "style", section: "[V4+ Styles]", name: "Default",
fontname: "Arial", fontsize: 45, color1: "&H00FFFFFF",
color2: "&H000000FF", color3: "&H00000000", color4: "&H00000000",
bold: false, italic: false, underline: false, strikeout: false,
scale_x: 100, scale_y: 100, spacing: 0, angle: 0,
borderstyle: 1, outline: 4.5, shadow: 4.5, align: 2,
margin_l: 23, margin_r: 23, margin_t: 23, encoding: 1
create_line_from = (line, fields)->
line = util.copy line
if fields
for key, value in pairs fields
line[key] = value
return line
parser.create_dialogue_line = (fields)->
line = create_line_from DIALOGUE_DEFAULTS, fields
line.extra = line.extra or {}
line
parser.create_style_line = (fields)-> create_line_from STYLE_DEFAULTS, fields
parser.decode_extradata_value = (value)->
enc, data = value\match "^([eu])(.*)$"
if enc == 'e'
return parser.inline_string_decode data
else
return parser.uudecode data
parse_format_line = (format_string)-> [match for match in format_string\gmatch "([^, ]+)"]
parser.raw_to_line = (raw, extradata=nil, format=nil)->
line_type, value = raw\match "^([^:]+):%s*(.*)$"
if not value
return nil
default_format = {Dialogue: EVENT_FORMAT_STRING,
Comment: EVENT_FORMAT_STRING,
Style: STYLE_FORMAT_STRING,
Data: DATA_FORMAT_STRING}
if line_type == "Format"
return {class: "format", format: parse_format_line value}
elseif not default_format[line_type]
return {class: "info", key: line_type, value: value}
format = format or parse_format_line default_format[line_type]
elements = F.string.split value, ",", 1, true, #format - 1
return nil if #elements != #format
fields = {format[i], elements[i] for i=1,#elements}
if line_type == "Dialogue" or line_type == "Comment"
line = parser.create_dialogue_line
actor: fields.Name, comment: line_type == "Comment"
effect: fields.Effect, start_time: F.util.assTimecode2ms(fields.Start)
end_time: F.util.assTimecode2ms(fields.End), layer: tonumber(fields.Layer)
margin_l: tonumber(fields.MarginL), margin_r: tonumber(fields.MarginR)
margin_t: tonumber(fields.MarginV), style: fields.Style
text: fields.Text
-- handle extradata (e.g. '{=32=33}Line text')
extramatch = re.match line.text, "^\\{((?:=\\d+)+)\\}(.*)$"
if extramatch
line.text = extramatch[3].str
if extradata
for id in extramatch[2].str\gmatch "=(%d+)"
id = tonumber id
if extradata[id]
eline = extradata[id]
line.extra[eline.key] = eline.value
else
aegisub.log 2,
"WARNING: Found extradata ID, but no extradata mapping provided: " ..
"#{raw}\n"
return line
elseif line_type == "Style"
boolean_map = {["-1"]: true, ["0"]: false}
line = parser.create_style_line
name: fields.Name, fontname: fields.Fontname
fontsize: tonumber(fields.Fontsize), color1: fields.PrimaryColour
color2: fields.SecondaryColour, color3: fields.OutlineColour
color4: fields.BackColour, bold: boolean_map[fields.Bold]
italic: boolean_map[fields.Italic], underline: boolean_map[fields.Underline]
strikeout: boolean_map[fields.StrikeOut], scale_x: tonumber(fields.ScaleX)
scale_y: tonumber(fields.ScaleY), spacing: tonumber(fields.Spacing)
angle: tonumber(fields.Angle), borderstyle: tonumber(fields.BorderStyle)
outline: tonumber(fields.Outline), shadow: tonumber(fields.Shadow)
align: tonumber(fields.Alignment), margin_l: tonumber(fields.MarginL)
margin_r: tonumber(fields.MarginR), margin_t: tonumber(fields.MarginV)
encoding: tonumber(fields.Encoding)
return line
elseif line_type == "Data"
return {class: "data", id: tonumber(fields.Id),
key: fields.Key, value: parser.decode_extradata_value fields.Value}
parser.line_to_raw = (line)->
if line.class == "dialogue"
prefix = if line.comment then "Comment" else "Dialogue"
"#{prefix}: #{line.layer},#{F.util.ms2AssTimecode line.start_time}," ..
"#{F.util.ms2AssTimecode line.end_time},#{line.style},#{line.actor}," ..
"#{line.margin_l},#{line.margin_r},#{line.margin_t},#{line.effect},#{line.text}"
elseif line.class == "style"
map = {[true]: "-1", [false]: "0"}
clr = (color)-> util.ass_style_color util.extract_color color
"Style: #{line.name},#{line.fontname},#{line.fontsize},#{clr line.color1}," ..
"#{clr line.color2},#{clr line.color3},#{clr line.color4},#{map[line.bold]}," ..
"#{map[line.italic]},#{map[line.underline]},#{map[line.strikeout]}," ..
"#{line.scale_x},#{line.scale_y},#{line.spacing},#{line.angle}," ..
"#{line.borderstyle},#{line.outline},#{line.shadow},#{line.align}," ..
"#{line.margin_l},#{line.margin_r},#{line.margin_t},#{line.encoding}"
elseif line.class == "info"
"#{line.key}: #{line.value}"
parser.inline_string_encode = (input)->
output = {}
for i=1,#input
c = input\byte i
if c <= 0x1F or c >= 0x80 or c == 0x23 or c == 0x2C or c == 0x3A or c == 0x7C
table.insert output, string.format "#%02X", c
else
table.insert output, input\sub i,i
return table.concat output
parser.inline_string_decode = (input)->
output = {}
i = 1
while i <= #input
if (input\sub i, i) != "#" or i + 1 > #input
table.insert output, input\sub i, i
else
table.insert output, string.char tonumber (input\sub i+1, i+2), 16
i += 2
i += 1
return table.concat output
parser.uuencode = (input)->
ret = {}
for pos=1,#input,3
chunk = input\sub pos, pos+2
src = [c\byte! for c in chunk\gmatch "."]
while #src < 3
src[#src+1] = 0
dst = {(rshift src[1], 2),
(bor (lshift (band src[1], 0x3), 4), (rshift (band src[2], 0xF0), 4)),
(bor (lshift (band src[2], 0xF), 2), (rshift (band src[3], 0xC0), 6)),
(band src[3], 0x3F)}
for i=1,math.min(#input - pos + 2, 4)
table.insert ret, dst[i] + 33
return table.concat [string.char i for i in *ret]
parser.uudecode = (input)->
ret = {}
pos = 1
while pos <= #input
chunk = input\sub pos, pos+3
src = [(string.byte c) - 33 for c in chunk\gmatch "."]
if #src > 1
table.insert ret, bor (lshift src[1], 2), (rshift src[2], 4)
if #src > 2
table.insert ret, bor (lshift (band src[2], 0xF), 4), (rshift src[3], 2)
if #src > 3
table.insert ret, bor (lshift (band src[3], 0x3), 6), src[4]
pos += #src
return table.concat [string.char i for i in *ret]
class ASSFile
new: (file)=>
@sections = {}
@styles = {}
@events = {}
@script_info = {}
@script_info_mapping = {}
@aegisub_garbage = {}
@aegisub_garbage_mapping = {}
@extradata = {}
@extradata_mapping = {}
@parse file
parse: (file)=>
@read_sections file
@parse_extradata!
@script_info = @parse_section "Script Info", {"info": true}
@aegisub_garbage = @parse_section "Aegisub Project Garbage", {"info": true}
@styles = @parse_section "V4+ Styles", {"style": true}
@events = @parse_section "Events", {"dialogue": true}
for info in *@script_info
@script_info_mapping[info.key] = info.value
for garbage in *@aegisub_garbage
@aegisub_garbage_mapping[garbage.key] = garbage.value
read_sections: (file)=>
current_section = nil
-- read lines from file, sort into sections
for row in file\lines!
-- remove BOM if present, remove newlines, and trim leading spaces
row = F.string.trimLeft (row\gsub "^\xEF\xBB\xBF", "")\gsub "[\r\n]*$", ""
if row == "" or row\match "^;"
continue
section = row\match "^%[(.*)%]$"
if section
current_section = section
@sections[current_section] = {}
continue
table.insert @sections[current_section], row
parse_extradata: =>
if @sections["Aegisub Extradata"]
for row in *@sections["Aegisub Extradata"]
line = parser.raw_to_line row
if not line or line.class != "data"
aegisub.log 2, "WARNING: Malformed data line: #{row}\n"
continue
@extradata[line.id] = line
@extradata_mapping[line.key] = @extradata_mapping[line.key] or {}
@extradata_mapping[line.key][line.value] = line.id
parse_section: (section, expected_classes)=>
lines = {}
return lines if not @sections[section]
format = nil
for row in *@sections[section]
line = parser.raw_to_line row, @extradata, format
if not line
aegisub.log 2, "WARNING: Malformed line: #{line}\n"
elseif line.class == "format"
format = line.format
elseif expected_classes[line.class]
table.insert lines, line
else
aegisub.log 2, "WARNING: Unexpected type #{line.class} in section #{section}\n"
return lines
parser.parse_file = (file)->
return ASSFile file
parser.generate_styles_section = (styles, callback)->
callback "[V4+ Styles]\n"
callback "Format: #{STYLE_FORMAT_STRING}\n"
for line in *styles
callback parser.line_to_raw(line) .. "\n"
parser.generate_events_section = (events, extradata_mapping, callback)->
callback "[Events]\n"
callback "Format: #{EVENT_FORMAT_STRING}\n"
-- find the largest extradata ID seen so far
last_eid = 0
if extradata_mapping
for key, v in pairs extradata_mapping
for value, eid in pairs v
last_eid = math.max last_eid, eid
extradata_to_write = {}
for line in *events
-- handle extradata
if line.extra and extradata_mapping
lineindices = {}
for key, value in pairs line.extra
-- look for data in the original file's extradata
cached_id = extradata_mapping[key] and extradata_mapping[key][value]
if not cached_id
-- if new extradata, generate new ID and cache it
last_eid += 1
cached_id = last_eid
extradata_mapping[key] = extradata_mapping[key] or {}
extradata_mapping[key][value] = cached_id
table.insert lineindices, cached_id
extradata_to_write[cached_id] = {key, value}
-- add indices to line text (e.g. {=32=33}Text)
if #lineindices > 0
table.sort lineindices
indexstring = table.concat ["=#{ind}" for ind in *lineindices]
line.text = "{#{indexstring}}" .. line.text
callback parser.line_to_raw(line) .. "\n"
out_indices = [ind for ind, _ in pairs extradata_to_write]
if #out_indices > 0
callback "\n[Aegisub Extradata]\n"
table.sort out_indices
for ind in *out_indices
{key, value} = extradata_to_write[ind]
encoded_data = parser.inline_string_encode value
-- a mystical incantation passed down from subtitle_format_ass.cpp
if 4*#value < 3*#encoded_data
value = "u" .. parser.uuencode value
else
value = "e" .. encoded_data
callback "Data: #{ind},#{key},#{value}\n"
parser.generate_script_info_section = (lines, callback, bom=true)->
if bom
callback "\xEF\xBB\xBF"
callback "[Script Info]\n"
for line in *lines
callback parser.line_to_raw(line) .. "\n"
parser.generate_aegisub_garbage_section = (lines, callback)->
callback "[Aegisub Project Garbage]\n"
for line in *lines
callback parser.line_to_raw(line) .. "\n"
parser.generate_file = (script_info, aegisub_garbage, styles, events, extradata_mapping, callback)->
sec_added = false
new_section = ->
if sec_added
callback "\n"
sec_added = true
if script_info
new_section!
parser.generate_script_info_section script_info, callback
if aegisub_garbage
new_section!
parser.generate_aegisub_garbage_section aegisub_garbage, callback
if styles
new_section!
parser.generate_styles_section styles, callback
if events
new_section!
parser.generate_events_section events, extradata_mapping, callback
parser.version = version
return version\register parser