388 lines
14 KiB
Plaintext
388 lines
14 KiB
Plaintext
DependencyControl = require "l0.DependencyControl"
|
|
|
|
version = DependencyControl {
|
|
name: "ASSParser",
|
|
version: "0.0.4",
|
|
description: "Utility function for parsing ASS files",
|
|
author: "Myaamori",
|
|
url: "http://github.com/TypesettingTools/Myaamori-Aegisub-Scripts",
|
|
moduleName: "myaa.ASSParser",
|
|
feed: "https://raw.githubusercontent.com/TypesettingTools/Myaamori-Aegisub-Scripts/master/DependencyControl.json",
|
|
{
|
|
"aegisub.re", "aegisub.util",
|
|
{"l0.Functional", version: "0.6.0", url: "https://github.com/TypesettingTools/Functional",
|
|
feed: "https://raw.githubusercontent.com/TypesettingTools/Functional/master/DependencyControl.json"}
|
|
}
|
|
}
|
|
|
|
re, util, F = version\requireModules!
|
|
|
|
import lshift, rshift, band, bor from bit
|
|
|
|
parser = {}
|
|
|
|
STYLE_FORMAT_STRING = "Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, " ..
|
|
"OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, " ..
|
|
"Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, " ..
|
|
"MarginV, Encoding"
|
|
EVENT_FORMAT_STRING = "Layer, Start, End, Style, Name, MarginL, MarginR, " ..
|
|
"MarginV, Effect, Text"
|
|
DATA_FORMAT_STRING = "Id, Key, Value"
|
|
|
|
DIALOGUE_DEFAULTS =
|
|
actor: "", class: "dialogue", comment: false, effect: "",
|
|
start_time: 0, end_time: 0, layer: 0, margin_l: 0,
|
|
margin_r: 0, margin_t: 0, section: "[Events]", style: "Default",
|
|
text: "", extra: nil
|
|
|
|
STYLE_DEFAULTS =
|
|
class: "style", section: "[V4+ Styles]", name: "Default",
|
|
fontname: "Arial", fontsize: 45, color1: "&H00FFFFFF",
|
|
color2: "&H000000FF", color3: "&H00000000", color4: "&H00000000",
|
|
bold: false, italic: false, underline: false, strikeout: false,
|
|
scale_x: 100, scale_y: 100, spacing: 0, angle: 0,
|
|
borderstyle: 1, outline: 4.5, shadow: 4.5, align: 2,
|
|
margin_l: 23, margin_r: 23, margin_t: 23, encoding: 1
|
|
|
|
create_line_from = (line, fields)->
|
|
line = util.copy line
|
|
if fields
|
|
for key, value in pairs fields
|
|
line[key] = value
|
|
return line
|
|
|
|
parser.create_dialogue_line = (fields)->
|
|
line = create_line_from DIALOGUE_DEFAULTS, fields
|
|
line.extra = line.extra or {}
|
|
line
|
|
|
|
parser.create_style_line = (fields)-> create_line_from STYLE_DEFAULTS, fields
|
|
|
|
parser.decode_extradata_value = (value)->
|
|
enc, data = value\match "^([eu])(.*)$"
|
|
|
|
if enc == 'e'
|
|
return parser.inline_string_decode data
|
|
else
|
|
return parser.uudecode data
|
|
|
|
parse_format_line = (format_string)-> [match for match in format_string\gmatch "([^, ]+)"]
|
|
|
|
parser.raw_to_line = (raw, extradata=nil, format=nil)->
|
|
line_type, value = raw\match "^([^:]+):%s*(.*)$"
|
|
if not value
|
|
return nil
|
|
|
|
default_format = {Dialogue: EVENT_FORMAT_STRING,
|
|
Comment: EVENT_FORMAT_STRING,
|
|
Style: STYLE_FORMAT_STRING,
|
|
Data: DATA_FORMAT_STRING}
|
|
|
|
if line_type == "Format"
|
|
return {class: "format", format: parse_format_line value}
|
|
elseif not default_format[line_type]
|
|
return {class: "info", key: line_type, value: value}
|
|
|
|
format = format or parse_format_line default_format[line_type]
|
|
elements = F.string.split value, ",", 1, true, #format - 1
|
|
return nil if #elements != #format
|
|
|
|
fields = {format[i], elements[i] for i=1,#elements}
|
|
|
|
if line_type == "Dialogue" or line_type == "Comment"
|
|
line = parser.create_dialogue_line
|
|
actor: fields.Name, comment: line_type == "Comment"
|
|
effect: fields.Effect, start_time: F.util.assTimecode2ms(fields.Start)
|
|
end_time: F.util.assTimecode2ms(fields.End), layer: tonumber(fields.Layer)
|
|
margin_l: tonumber(fields.MarginL), margin_r: tonumber(fields.MarginR)
|
|
margin_t: tonumber(fields.MarginV), style: fields.Style
|
|
text: fields.Text
|
|
|
|
-- handle extradata (e.g. '{=32=33}Line text')
|
|
extramatch = re.match line.text, "^\\{((?:=\\d+)+)\\}(.*)$"
|
|
if extramatch
|
|
line.text = extramatch[3].str
|
|
if extradata
|
|
for id in extramatch[2].str\gmatch "=(%d+)"
|
|
id = tonumber id
|
|
if extradata[id]
|
|
eline = extradata[id]
|
|
line.extra[eline.key] = eline.value
|
|
else
|
|
aegisub.log 2,
|
|
"WARNING: Found extradata ID, but no extradata mapping provided: " ..
|
|
"#{raw}\n"
|
|
|
|
return line
|
|
elseif line_type == "Style"
|
|
boolean_map = {["-1"]: true, ["0"]: false}
|
|
line = parser.create_style_line
|
|
name: fields.Name, fontname: fields.Fontname
|
|
fontsize: tonumber(fields.Fontsize), color1: fields.PrimaryColour
|
|
color2: fields.SecondaryColour, color3: fields.OutlineColour
|
|
color4: fields.BackColour, bold: boolean_map[fields.Bold]
|
|
italic: boolean_map[fields.Italic], underline: boolean_map[fields.Underline]
|
|
strikeout: boolean_map[fields.StrikeOut], scale_x: tonumber(fields.ScaleX)
|
|
scale_y: tonumber(fields.ScaleY), spacing: tonumber(fields.Spacing)
|
|
angle: tonumber(fields.Angle), borderstyle: tonumber(fields.BorderStyle)
|
|
outline: tonumber(fields.Outline), shadow: tonumber(fields.Shadow)
|
|
align: tonumber(fields.Alignment), margin_l: tonumber(fields.MarginL)
|
|
margin_r: tonumber(fields.MarginR), margin_t: tonumber(fields.MarginV)
|
|
encoding: tonumber(fields.Encoding)
|
|
|
|
return line
|
|
elseif line_type == "Data"
|
|
return {class: "data", id: tonumber(fields.Id),
|
|
key: fields.Key, value: parser.decode_extradata_value fields.Value}
|
|
|
|
parser.line_to_raw = (line)->
|
|
if line.class == "dialogue"
|
|
prefix = if line.comment then "Comment" else "Dialogue"
|
|
"#{prefix}: #{line.layer},#{F.util.ms2AssTimecode line.start_time}," ..
|
|
"#{F.util.ms2AssTimecode line.end_time},#{line.style},#{line.actor}," ..
|
|
"#{line.margin_l},#{line.margin_r},#{line.margin_t},#{line.effect},#{line.text}"
|
|
elseif line.class == "style"
|
|
map = {[true]: "-1", [false]: "0"}
|
|
clr = (color)-> util.ass_style_color util.extract_color color
|
|
"Style: #{line.name},#{line.fontname},#{line.fontsize},#{clr line.color1}," ..
|
|
"#{clr line.color2},#{clr line.color3},#{clr line.color4},#{map[line.bold]}," ..
|
|
"#{map[line.italic]},#{map[line.underline]},#{map[line.strikeout]}," ..
|
|
"#{line.scale_x},#{line.scale_y},#{line.spacing},#{line.angle}," ..
|
|
"#{line.borderstyle},#{line.outline},#{line.shadow},#{line.align}," ..
|
|
"#{line.margin_l},#{line.margin_r},#{line.margin_t},#{line.encoding}"
|
|
elseif line.class == "info"
|
|
"#{line.key}: #{line.value}"
|
|
|
|
parser.inline_string_encode = (input)->
|
|
output = {}
|
|
for i=1,#input
|
|
c = input\byte i
|
|
if c <= 0x1F or c >= 0x80 or c == 0x23 or c == 0x2C or c == 0x3A or c == 0x7C
|
|
table.insert output, string.format "#%02X", c
|
|
else
|
|
table.insert output, input\sub i,i
|
|
return table.concat output
|
|
|
|
parser.inline_string_decode = (input)->
|
|
output = {}
|
|
i = 1
|
|
while i <= #input
|
|
if (input\sub i, i) != "#" or i + 1 > #input
|
|
table.insert output, input\sub i, i
|
|
else
|
|
table.insert output, string.char tonumber (input\sub i+1, i+2), 16
|
|
i += 2
|
|
i += 1
|
|
return table.concat output
|
|
|
|
parser.uuencode = (input)->
|
|
ret = {}
|
|
for pos=1,#input,3
|
|
chunk = input\sub pos, pos+2
|
|
src = [c\byte! for c in chunk\gmatch "."]
|
|
while #src < 3
|
|
src[#src+1] = 0
|
|
|
|
dst = {(rshift src[1], 2),
|
|
(bor (lshift (band src[1], 0x3), 4), (rshift (band src[2], 0xF0), 4)),
|
|
(bor (lshift (band src[2], 0xF), 2), (rshift (band src[3], 0xC0), 6)),
|
|
(band src[3], 0x3F)}
|
|
|
|
for i=1,math.min(#input - pos + 2, 4)
|
|
table.insert ret, dst[i] + 33
|
|
|
|
return table.concat [string.char i for i in *ret]
|
|
|
|
parser.uudecode = (input)->
|
|
ret = {}
|
|
pos = 1
|
|
|
|
while pos <= #input
|
|
chunk = input\sub pos, pos+3
|
|
src = [(string.byte c) - 33 for c in chunk\gmatch "."]
|
|
if #src > 1
|
|
table.insert ret, bor (lshift src[1], 2), (rshift src[2], 4)
|
|
if #src > 2
|
|
table.insert ret, bor (lshift (band src[2], 0xF), 4), (rshift src[3], 2)
|
|
if #src > 3
|
|
table.insert ret, bor (lshift (band src[3], 0x3), 6), src[4]
|
|
|
|
pos += #src
|
|
|
|
return table.concat [string.char i for i in *ret]
|
|
|
|
class ASSFile
|
|
new: (file)=>
|
|
@sections = {}
|
|
@styles = {}
|
|
@events = {}
|
|
@script_info = {}
|
|
@script_info_mapping = {}
|
|
@aegisub_garbage = {}
|
|
@aegisub_garbage_mapping = {}
|
|
@extradata = {}
|
|
@extradata_mapping = {}
|
|
|
|
@parse file
|
|
|
|
parse: (file)=>
|
|
@read_sections file
|
|
|
|
@parse_extradata!
|
|
@script_info = @parse_section "Script Info", {"info": true}
|
|
@aegisub_garbage = @parse_section "Aegisub Project Garbage", {"info": true}
|
|
@styles = @parse_section "V4+ Styles", {"style": true}
|
|
@events = @parse_section "Events", {"dialogue": true}
|
|
|
|
for info in *@script_info
|
|
@script_info_mapping[info.key] = info.value
|
|
for garbage in *@aegisub_garbage
|
|
@aegisub_garbage_mapping[garbage.key] = garbage.value
|
|
|
|
read_sections: (file)=>
|
|
current_section = nil
|
|
|
|
-- read lines from file, sort into sections
|
|
for row in file\lines!
|
|
-- remove BOM if present, remove newlines, and trim leading spaces
|
|
row = F.string.trimLeft (row\gsub "^\xEF\xBB\xBF", "")\gsub "[\r\n]*$", ""
|
|
|
|
if row == "" or row\match "^;"
|
|
continue
|
|
|
|
section = row\match "^%[(.*)%]$"
|
|
if section
|
|
current_section = section
|
|
@sections[current_section] = {}
|
|
continue
|
|
|
|
table.insert @sections[current_section], row
|
|
|
|
parse_extradata: =>
|
|
if @sections["Aegisub Extradata"]
|
|
for row in *@sections["Aegisub Extradata"]
|
|
line = parser.raw_to_line row
|
|
if not line or line.class != "data"
|
|
aegisub.log 2, "WARNING: Malformed data line: #{row}\n"
|
|
continue
|
|
|
|
@extradata[line.id] = line
|
|
@extradata_mapping[line.key] = @extradata_mapping[line.key] or {}
|
|
@extradata_mapping[line.key][line.value] = line.id
|
|
|
|
parse_section: (section, expected_classes)=>
|
|
lines = {}
|
|
return lines if not @sections[section]
|
|
|
|
format = nil
|
|
for row in *@sections[section]
|
|
line = parser.raw_to_line row, @extradata, format
|
|
|
|
if not line
|
|
aegisub.log 2, "WARNING: Malformed line: #{line}\n"
|
|
elseif line.class == "format"
|
|
format = line.format
|
|
elseif expected_classes[line.class]
|
|
table.insert lines, line
|
|
else
|
|
aegisub.log 2, "WARNING: Unexpected type #{line.class} in section #{section}\n"
|
|
|
|
return lines
|
|
|
|
parser.parse_file = (file)->
|
|
return ASSFile file
|
|
|
|
parser.generate_styles_section = (styles, callback)->
|
|
callback "[V4+ Styles]\n"
|
|
callback "Format: #{STYLE_FORMAT_STRING}\n"
|
|
for line in *styles
|
|
callback parser.line_to_raw(line) .. "\n"
|
|
|
|
parser.generate_events_section = (events, extradata_mapping, callback)->
|
|
callback "[Events]\n"
|
|
callback "Format: #{EVENT_FORMAT_STRING}\n"
|
|
|
|
-- find the largest extradata ID seen so far
|
|
last_eid = 0
|
|
if extradata_mapping
|
|
for key, v in pairs extradata_mapping
|
|
for value, eid in pairs v
|
|
last_eid = math.max last_eid, eid
|
|
|
|
extradata_to_write = {}
|
|
|
|
for line in *events
|
|
-- handle extradata
|
|
if line.extra and extradata_mapping
|
|
lineindices = {}
|
|
for key, value in pairs line.extra
|
|
-- look for data in the original file's extradata
|
|
cached_id = extradata_mapping[key] and extradata_mapping[key][value]
|
|
if not cached_id
|
|
-- if new extradata, generate new ID and cache it
|
|
last_eid += 1
|
|
cached_id = last_eid
|
|
extradata_mapping[key] = extradata_mapping[key] or {}
|
|
extradata_mapping[key][value] = cached_id
|
|
|
|
table.insert lineindices, cached_id
|
|
extradata_to_write[cached_id] = {key, value}
|
|
|
|
-- add indices to line text (e.g. {=32=33}Text)
|
|
if #lineindices > 0
|
|
table.sort lineindices
|
|
indexstring = table.concat ["=#{ind}" for ind in *lineindices]
|
|
line.text = "{#{indexstring}}" .. line.text
|
|
|
|
callback parser.line_to_raw(line) .. "\n"
|
|
|
|
out_indices = [ind for ind, _ in pairs extradata_to_write]
|
|
if #out_indices > 0
|
|
callback "\n[Aegisub Extradata]\n"
|
|
|
|
table.sort out_indices
|
|
for ind in *out_indices
|
|
{key, value} = extradata_to_write[ind]
|
|
encoded_data = parser.inline_string_encode value
|
|
-- a mystical incantation passed down from subtitle_format_ass.cpp
|
|
if 4*#value < 3*#encoded_data
|
|
value = "u" .. parser.uuencode value
|
|
else
|
|
value = "e" .. encoded_data
|
|
callback "Data: #{ind},#{key},#{value}\n"
|
|
|
|
parser.generate_script_info_section = (lines, callback, bom=true)->
|
|
if bom
|
|
callback "\xEF\xBB\xBF"
|
|
callback "[Script Info]\n"
|
|
for line in *lines
|
|
callback parser.line_to_raw(line) .. "\n"
|
|
|
|
parser.generate_aegisub_garbage_section = (lines, callback)->
|
|
callback "[Aegisub Project Garbage]\n"
|
|
for line in *lines
|
|
callback parser.line_to_raw(line) .. "\n"
|
|
|
|
parser.generate_file = (script_info, aegisub_garbage, styles, events, extradata_mapping, callback)->
|
|
sec_added = false
|
|
new_section = ->
|
|
if sec_added
|
|
callback "\n"
|
|
sec_added = true
|
|
|
|
if script_info
|
|
new_section!
|
|
parser.generate_script_info_section script_info, callback
|
|
if aegisub_garbage
|
|
new_section!
|
|
parser.generate_aegisub_garbage_section aegisub_garbage, callback
|
|
if styles
|
|
new_section!
|
|
parser.generate_styles_section styles, callback
|
|
if events
|
|
new_section!
|
|
parser.generate_events_section events, extradata_mapping, callback
|
|
|
|
parser.version = version
|
|
return version\register parser
|