--[[ version = 1.0, 2026-05-19 author = Hans Hagen, PRAGMA-ADE, Hasselt NL, Udi Fogiel copyright = PRAGMA ADE / ConTeXt Development Team license = GPL v2.0 comment = Unicode bidi (sort of) This is a follow up on typo-uda which itself is a follow up on t-bidi by Khaled Hosny which in turn is based on minibidi.c from Arabeyes. This is a further optimizations, as well as an update on some recent unicode bidi developments. There is (and will) also be more control added. As a consequence this module is somewhat slower than its precursor which itself is slower than the one-pass bidi handler. This is also a playground and I might add some plugin support. However, in the meantime performance got a bit better and this third variant is again some 10% faster than the second variant. todo (cf html): * normal The element does not offer a additional level of embedding with respect to the bidirectional algorithm. For inline elements implicit reordering works across element boundaries. * embed If the element is inline, this value opens an additional level of embedding with respect to the bidirectional algorithm. The direction of this embedding level is given by the direction property. * bidi-override For inline elements this creates an override. For block container elements this creates an override for inline-level descendants not within another block container element. This means that inside the element, reordering is strictly in sequence according to the direction property; the implicit part of the bidirectional algorithm is ignored. * isolate This keyword indicates that the element's container directionality should be calculated without considering the content of this element. The element is therefore isolated from its siblings. When applying its bidirectional-resolution algorithm, its container element treats it as one or several U+FFFC Object Replacement Character, i.e. like an image. * isolate-override This keyword applies the isolation behavior of the isolate keyword to the surrounding content and the override behavior o f the bidi-override keyword to the inner content. * plaintext This keyword makes the elements directionality calculated without considering its parent bidirectional state or the value of the direction property. The directionality is calculated using the P2 and P3 rules of the Unicode Bidirectional Algorithm. This value allows to display data which has already formatted using a tool following the Unicode Bidirectional Algorithm. todo: check for introduced errors todo: reuse list, we have size, so we can just change values (and auto allocate when not there) todo: reuse the stack todo: no need for a max check todo: collapse bound similar ranges (not ok yet) todo: combine some sweeps todo: removing is not needed when we inject at the same spot (only change the dir property) todo: isolated runs (isolating runs are similar to bidi=local in the basic analyzer) todo: check unicode addenda (from the draft): Added support for canonical equivalents in BD16. Changed logic in N0 to not check forwards for context in the case of enclosed text opposite the embedding direction. Major extension of the algorithm to allow for the implementation of directional isolates and the introduction of new isolate-related values to the Bidi_Class property. Adds BD8, BD9, BD10, BD11, BD12, BD13, BD14, BD15, and BD16, Sections 2.4 and 2.5, and Rules X5a, X5b, X5c and X6a. Extensively revises Section 3.3.2, Explicit Levels and Directions and its existing X rules to formalize the algorithm for matching a PDF with the embedding or override initiator whose scope it terminates. Moves Rules X9 and X10 into a separate new Section 3.3.3, Preparations for Implicit Processing. Modifies Rule X10 to make the isolating run sequence the unit to which subsequent rules are applied. Modifies Rule W1 to change an NSM preceded by an isolate initiator or PDI into ON. Adds Rule N0 and makes other changes to Section 3.3.5, Resolving Neutral and Isolate Formatting Types to resolve bracket pairs to the same level. This file is a derivative of typo-duc.lua from the ConTeXt project. ]]-- local setmetatable = setmetatable local utfcodes = utf8.codes local utfchar = utf8.char local concat = table.concat local data = require("unibidi-lua-data") local directiondata = data.directions local mirrordata = data.mirrors local brackettypedata = data.brackets local lefttoright_code = 0 local righttoleft_code = 1 local maximum_stack = 125 local max_bracket_stack = 63 local analyze_fences = true local remove_mode = 2 local removelevels = { none = 0, controls = 1, full = 2, } local apply_mirror = true local apply_nsm = true -- strong (old): -- -- l : left to right -- r : right to left -- lro : left to right override -- rlo : left to left override -- lre : left to right embedding -- rle : left to left embedding -- al : right to legt arabic (esp punctuation issues) -- -- weak: -- -- en : english number -- es : english number separator -- et : english number terminator -- an : arabic number -- cs : common number separator -- nsm : nonspacing mark -- bn : boundary neutral -- -- neutral: -- -- b : paragraph separator -- s : segment separator -- ws : whitespace -- on : other neutrals -- -- interesting: this is indeed better (and more what we expect i.e. we already use this split -- in the old original (also these isolates) -- -- strong (new): -- -- l : left to right -- r : right to left -- al : right to left arabic (esp punctuation issues) -- -- explicit: (new) -- -- lro : left to right override -- rlo : left to left override -- lre : left to right embedding -- rle : left to left embedding -- pdf : pop dir format -- lri : left to right isolate -- rli : left to left isolate -- fsi : first string isolate -- pdi : pop directional isolate local whitespace = { lre = true, rle = true, lro = true, rlo = true, pdf = true, bn = true, ws = true, lri = true, rli = true, fsi = true, pdi = true, } local b_s_ws_on = { b = true, s = true, ws = true, on = true } local isolate_initiator = { lri = true, rli = true, fsi = true, } -- keeping the list and overwriting doesn't save much runtime, only a few percent -- char is only used for mirror, so in fact we can as well only store it for -- glyphs only -- -- tracking what direction is used and skipping tests is not faster (extra kind of -- compensates gain) local mt_space = { __index = { char = 0x0020, direction = "ws", original = "ws", level = 0, skip = 0 } } local mt_lre = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0, skip = 0 } } local mt_rle = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0, skip = 0 } } local mt_pdf = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0, skip = 0 } } local mt_object = { __index = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = 0 } } local stack = setmetatable({}, { __index = function(t, k) local v = {} t[k] = v return v end }) -- shared local list = { } -- shared local function build_list_str(str) local size = 0 for _, chr in utfcodes(str) do size = size + 1 local dir = directiondata[chr] list[size] = { char = chr, direction = dir, original = dir, level = 0, original_index = size } end return list, size end local function build_list_codepoints(codepoints) local size = #codepoints for i = 1, size do local chr = codepoints[i] local dir = directiondata[chr] list[i] = { char = chr, direction = dir, original = dir, level = 0, original_index = i } end return list, size end -- new -- we could support ( ] and [ ) and such ... -- ש ) ל ( א 0-0 -- ש ( ל ] א 0-0 -- ש ( ל ) א 2-4 -- ש ( ל [ א ) כ ] 2-6 -- ש ( ל ] א ) כ 2-6 -- ש ( ל ) א ) כ 2-4 -- ש ( ל ( א ) כ 4-6 -- ש ( ל ( א ) כ ) 2-8,4-6 -- ש ( ל [ א ] כ ) 2-8,4-6 local fencestack = setmetatable({}, { __index = function(t, k) local v = {} t[k] = v return v end }) local function closing_equivalent(a, b) return a == b or (a == 0x3009 and b == 0x232A) or (a == 0x232A and b == 0x3009) end local function resolve_fences(list,size,start,limit) local seq_level = list[start].level local nofstack = 0 local overflow = false local nofpaired = 0 -- pass 1: find bracket pairs (BD16), only at seq_level for i=start,limit do local entry = list[i] if entry.level == seq_level and entry.direction == "on" then local char = entry.char local mirror = mirrordata[char] if mirror then local class = brackettypedata[char] entry.class = class if class == "o" then if nofstack < max_bracket_stack then nofstack = nofstack + 1 local stacktop = fencestack[nofstack] stacktop[1] = mirror stacktop[2] = i else overflow = true end elseif class == "c" and nofstack > 0 and not overflow then -- search stack for matching open bracket for k=nofstack,1,-1 do if closing_equivalent(fencestack[k][1], char) then -- found match at position k list[fencestack[k][2]].paired = i list[i].paired = fencestack[k][2] nofstack = k - 1 -- pop down to just below the match nofpaired = nofpaired + 1 break end end end end end end if nofpaired == 0 then return end -- pass 2: N0 resolution local embedding_dir = seq_level % 2 == 1 and "r" or "l" for i=start,limit do local entry = list[i] if entry.level == seq_level and entry.class == "o" and entry.paired then local open = i local close = entry.paired -- scan inside for strong type, skipping different-level entries local found_embedding = false local found_opposite = false for j=open+1,close-1 do local e = list[j] if e.level == seq_level then local d = e.direction local is_r = (d == "r" or d == "an" or d == "en" or d == "al") local is_l = (d == "l") if is_r or is_l then local this_dir = is_r and "r" or "l" if this_dir == embedding_dir then found_embedding = true break else found_opposite = true end end end end local strong_inside = nil if found_embedding then strong_inside = embedding_dir elseif found_opposite then strong_inside = (embedding_dir == "r") and "l" or "r" end if strong_inside == nil then -- no strong type inside: leave for N1/N2 elseif strong_inside == embedding_dir then -- N0b list[open].direction = embedding_dir list[close].direction = embedding_dir else -- scan before opening bracket for strong context, -- skipping different-level entries local strong_before = nil for j=open-1,1,-1 do local e = list[j] if e.level == seq_level then local d = e.direction if d == "l" then strong_before = "l" break elseif d == "r" or d == "an" or d == "en" then strong_before = "r" break end elseif e.direction == "bn" then -- skip end end if strong_before == nil then strong_before = embedding_dir -- use sos end if strong_before == strong_inside then -- N0c list[open].direction = strong_inside list[close].direction = strong_inside else -- N0d list[open].direction = embedding_dir list[close].direction = embedding_dir end end end end -- post-N0: NSMs immediately following a resolved bracket inherit its direction for i=start,limit do local entry = list[i] if entry.level == seq_level and entry.class == "c" and entry.paired then local resolved = entry.direction if resolved == "l" or resolved == "r" then for j=i+1,limit do local next = list[j] if next.level == seq_level and next.original == "nsm" then next.direction = resolved elseif next.level ~= seq_level or next.direction == "bn" then -- skip different-level or bn else break end end end end end end -- the action local function skip_isolate(list, i, size) local depth = 1 i = i + 1 while i <= size do local dir = list[i].direction if isolate_initiator[dir] then depth = depth + 1 elseif dir == "pdi" then depth = depth - 1 if depth == 0 then return i end end i = i + 1 end return size + 1 -- no matching PDI; treat rest of paragraph as inside isolate end local function get_baselevel(direction,list,size,where) if direction == lefttoright_code or direction == righttoleft_code then return direction end -- P2, P3 -- P2: find the first strong character that is not inside an isolate run. local i = 1 while i <= size do local entry = list[i] local direction = entry.direction -- P2: skip characters enclosed in an isolate sequence if isolate_initiator[direction] then i = skip_isolate(list, i, size) elseif direction == "pdi" then -- unmatched PDI at the paragraph level; treat as neutral, keep scanning elseif direction == "r" or direction == "al" then return righttoleft_code elseif direction == "l" then return lefttoright_code end i = i + 1 end return lefttoright_code end local function resolve_explicit(list,size,baselevel) -- X1 local level = baselevel local override = "on" local nofstack = 0 local valid_isolate_count = 0 local overflow_isolate_count = 0 local overflow_embedding_count = 0 local overflow_isolate_oebc_stack = {} for i=1,size do local entry = list[i] local direction = entry.direction -- X2 if direction == "rle" then local new_level = level + (level % 2 == 1 and 2 or 1) if overflow_isolate_count == 0 and overflow_embedding_count == 0 and nofstack < maximum_stack and new_level <= maximum_stack then nofstack = nofstack + 1 local stacktop = stack[nofstack] stacktop[1] = level stacktop[2] = override stacktop[3] = false level = new_level override = "on" else overflow_embedding_count = overflow_embedding_count + 1 end entry.level = level entry.direction = "bn" entry.remove = true -- X3 elseif direction == "lre" then local new_level = level + (level % 2 == 1 and 1 or 2) if overflow_isolate_count == 0 and overflow_embedding_count == 0 and nofstack < maximum_stack and new_level <= maximum_stack then nofstack = nofstack + 1 local stacktop = stack[nofstack] stacktop[1] = level stacktop[2] = override stacktop[3] = false level = new_level override = "on" else overflow_embedding_count = overflow_embedding_count + 1 end entry.level = level entry.direction = "bn" entry.remove = true -- X4 elseif direction == "rlo" then local new_level = level + (level % 2 == 1 and 2 or 1) if overflow_isolate_count == 0 and overflow_embedding_count == 0 and nofstack < maximum_stack and new_level <= maximum_stack then nofstack = nofstack + 1 local stacktop = stack[nofstack] stacktop[1] = level stacktop[2] = override stacktop[3] = false level = new_level override = "r" else overflow_embedding_count = overflow_embedding_count + 1 end entry.level = level entry.direction = "bn" entry.remove = true -- X5 elseif direction == "lro" then local new_level = level + (level % 2 == 1 and 1 or 2) if overflow_isolate_count == 0 and overflow_embedding_count == 0 and nofstack < maximum_stack and new_level <= maximum_stack then nofstack = nofstack + 1 local stacktop = stack[nofstack] stacktop[1] = level stacktop[2] = override stacktop[3] = false level = new_level override = "l" else overflow_embedding_count = overflow_embedding_count + 1 end entry.level = level entry.direction = "bn" entry.remove = true -- X5a: RLI elseif direction == "rli" then entry.level = level if override ~= "on" then entry.direction = override else entry.direction = "on" end local new_level = level + (level % 2 == 1 and 2 or 1) if overflow_isolate_count == 0 and nofstack < maximum_stack and new_level <= maximum_stack then nofstack = nofstack + 1 valid_isolate_count = valid_isolate_count + 1 local stacktop = stack[nofstack] stacktop[1] = level stacktop[2] = override stacktop[3] = true stacktop[4] = i level = new_level override = "on" entry.matched_isolate = true else overflow_isolate_count = overflow_isolate_count + 1 overflow_isolate_oebc_stack[overflow_isolate_count] = overflow_embedding_count end -- X5b: LRI elseif direction == "lri" then entry.level = level if override ~= "on" then entry.direction = override else entry.direction = "on" end local new_level = level + (level % 2 == 1 and 1 or 2) if overflow_isolate_count == 0 and nofstack < maximum_stack and new_level <= maximum_stack then nofstack = nofstack + 1 valid_isolate_count = valid_isolate_count + 1 local stacktop = stack[nofstack] stacktop[1] = level stacktop[2] = override stacktop[3] = true stacktop[4] = i level = new_level override = "on" entry.matched_isolate = true else overflow_isolate_count = overflow_isolate_count + 1 overflow_isolate_oebc_stack[overflow_isolate_count] = overflow_embedding_count end -- X5c: FSI elseif direction == "fsi" then entry.level = level if override ~= "on" then entry.direction = override else entry.direction = "on" end local fsi_dir = lefttoright_code local depth = 1 local j = i + 1 while j <= size do local jdir = list[j].direction if isolate_initiator[jdir] then depth = depth + 1 elseif jdir == "pdi" then depth = depth - 1 if depth == 0 then break end elseif depth == 1 then if jdir == "r" or jdir == "al" then fsi_dir = righttoleft_code break elseif jdir == "l" then fsi_dir = lefttoright_code break end end j = j + 1 end local new_level if fsi_dir == righttoleft_code then new_level = level + (level % 2 == 1 and 2 or 1) else new_level = level + (level % 2 == 1 and 1 or 2) end if overflow_isolate_count == 0 and nofstack < maximum_stack and new_level <= maximum_stack then nofstack = nofstack + 1 valid_isolate_count = valid_isolate_count + 1 local stacktop = stack[nofstack] stacktop[1] = level stacktop[2] = override stacktop[3] = true stacktop[4] = i level = new_level override = "on" entry.matched_isolate = true else overflow_isolate_count = overflow_isolate_count + 1 overflow_isolate_oebc_stack[overflow_isolate_count] = overflow_embedding_count end -- X7 elseif direction == "pdf" then if overflow_isolate_count > 0 then -- do nothing: stray PDF inside overflow isolate elseif overflow_embedding_count > 0 then overflow_embedding_count = overflow_embedding_count - 1 elseif nofstack > 0 and not stack[nofstack][3] then local stacktop = stack[nofstack] level = stacktop[1] override = stacktop[2] nofstack = nofstack - 1 end entry.level = level entry.direction = "bn" entry.remove = true -- X6a: PDI elseif direction == "pdi" then if overflow_isolate_count > 0 then overflow_embedding_count = overflow_isolate_oebc_stack[overflow_isolate_count] overflow_isolate_count = overflow_isolate_count - 1 elseif valid_isolate_count > 0 then overflow_embedding_count = 0 valid_isolate_count = valid_isolate_count - 1 while nofstack > 0 and not stack[nofstack][3] do nofstack = nofstack - 1 end if nofstack > 0 then local stacktop = stack[nofstack] level = stacktop[1] override = stacktop[2] if stacktop[4] then list[stacktop[4]].paired_index = i entry.paired_index = stacktop[4] end nofstack = nofstack - 1 end entry.matched_isolate = true end entry.level = level entry.direction = "on" -- X6 else entry.level = level if override ~= "on" and entry.direction ~= "bn" then entry.direction = override end end end -- X8 (reset states and overrides after paragraph) end local function resolve_weak(list,size,start,limit,orderbefore,orderafter) local seq_level = list[start].level local function is_isolate(entry) return isolate_initiator[entry.original] or entry.original == "pdi" end -- W1 for i=start,limit do local entry = list[i] if entry.level ~= seq_level then -- skip elseif entry.direction == "nsm" then if i == start then entry.direction = orderbefore else local prev_dir = orderbefore for j=i-1,start,-1 do local prev = list[j] if prev.level ~= seq_level then -- skip elseif is_isolate(prev) then prev_dir = "on" break elseif prev.direction ~= "bn" then prev_dir = prev.direction break end end entry.direction = prev_dir end end end -- W2 for i=start,limit do local entry = list[i] if entry.level == seq_level and entry.direction == "en" then for j=i-1,start,-1 do local prev = list[j] if prev.level ~= seq_level then -- skip elseif not is_isolate(prev) then local direction = prev.direction if direction == "al" then entry.direction = "an" break elseif direction == "r" or direction == "l" then break end end end end end -- W3 for i=start,limit do local entry = list[i] if entry.level == seq_level and entry.direction == "al" then entry.direction = "r" end end -- W4 local runner = start + 1 while runner <= limit - 1 do local current = list[runner] if current.level ~= seq_level then runner = runner + 1 else local direction = current.direction if direction == "es" or direction == "cs" then local next_dir = nil for j=runner+1,limit do if list[j].level == seq_level and list[j].direction ~= "bn" then next_dir = list[j].direction break end end local prev_dir = nil for j=runner-1,start,-1 do if list[j].level == seq_level and list[j].direction ~= "bn" then prev_dir = list[j].direction break end end if direction == "es" then if prev_dir == "en" and next_dir == "en" then current.direction = "en" end elseif direction == "cs" then if prev_dir == "en" and next_dir == "en" then current.direction = "en" elseif prev_dir == "an" and next_dir == "an" then current.direction = "an" end end end runner = runner + 1 end end -- W5 local i = start while i <= limit do if list[i].level == seq_level and list[i].direction == "et" then local runstart = i local runlimit = i while runlimit <= limit and list[runlimit].level == seq_level and (list[runlimit].direction == "et" or list[runlimit].direction == "bn") do runlimit = runlimit + 1 end runlimit = runlimit - 1 local has_en_before = false for j=runstart-1,start,-1 do if list[j].level ~= seq_level then -- skip elseif list[j].direction == "en" then has_en_before = true break elseif list[j].direction ~= "bn" then break end end local has_en_after = false for j=runlimit+1,limit do if list[j].level ~= seq_level then -- skip elseif list[j].direction == "en" then has_en_after = true break elseif list[j].direction ~= "bn" then break end end if has_en_before or has_en_after then for j=runstart,runlimit do if list[j].level == seq_level then list[j].direction = "en" end end end i = runlimit end i = i + 1 end -- W6 for i=start,limit do local entry = list[i] if entry.level ~= seq_level then -- skip else local direction = entry.direction if direction == "es" or direction == "et" or direction == "cs" then entry.direction = "on" end end end -- W7 for i=start,limit do local entry = list[i] if entry.level ~= seq_level then -- skip elseif entry.direction == "en" then local prev_strong = orderbefore for j=i-1,start,-1 do local d = list[j].direction if list[j].level ~= seq_level then -- skip elseif d == "l" or d == "r" then prev_strong = d break end end if prev_strong == "l" then entry.direction = "l" end end end end local function resolve_neutral(list,size,start,limit,orderbefore,orderafter) local seq_level = list[start].level local i = start while i <= limit do local entry = list[i] if entry.level ~= seq_level then -- skip elseif b_s_ws_on[entry.direction] then local leading_direction, trailing_direction, resolved_direction local runstart = i local runlimit = runstart for j=runstart+1,limit do local jentry = list[j] if jentry.level ~= seq_level then -- skip: inside isolate elseif (isolate_initiator[jentry.original] and jentry.matched_isolate) or (jentry.original == "pdi" and jentry.matched_isolate) then break elseif b_s_ws_on[jentry.direction] then runlimit = j else break end end if runstart == start then leading_direction = orderbefore else leading_direction = orderbefore for j=runstart-1,1,-1 do local prev = list[j] if prev.level ~= seq_level then -- skip: inside isolate elseif (isolate_initiator[prev.original] and prev.matched_isolate) or (prev.original == "pdi" and prev.matched_isolate) then -- transparent: keep scanning elseif prev.direction ~= "bn" then leading_direction = prev.direction if leading_direction == "en" or leading_direction == "an" then leading_direction = "r" end break end end end if runlimit == limit then trailing_direction = orderafter else trailing_direction = orderafter for j=runlimit+1,limit do local next = list[j] if next.level ~= seq_level then -- skip: inside isolate elseif (isolate_initiator[next.original] and next.matched_isolate) or (next.original == "pdi" and next.matched_isolate) then -- isolate boundary, keep scanning elseif next.direction == "bn" then -- skip elseif next.direction == "r" or next.direction == "l" or next.direction == "en" or next.direction == "an" then trailing_direction = next.direction if trailing_direction == "en" or trailing_direction == "an" then trailing_direction = "r" end break end end end if leading_direction == trailing_direction then resolved_direction = leading_direction else resolved_direction = entry.level % 2 == 1 and "r" or "l" end for j=runstart,runlimit do if list[j].level == seq_level then list[j].direction = resolved_direction end end i = runlimit end i = i + 1 end end local function resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel) local seq_level = list[start].level for i=start,limit do local entry = list[i] if entry.level ~= seq_level then -- skip: inside isolate else local level = entry.level local direction = entry.direction if level % 2 ~= 1 then if direction == "r" then entry.level = level + 1 elseif direction == "an" or direction == "en" then entry.level = level + 2 end else if direction == "l" or direction == "en" or direction == "an" then entry.level = level + 1 end end end end end local function resolve_levels(list,size,baselevel) local original_levels = {} for i=1,size do original_levels[i] = list[i].level end -- Step 1: collect all level runs local runs = {} local pos = 1 while pos <= size do while pos <= size and list[pos].direction == "bn" do pos = pos + 1 end if pos > size then break end local level = list[pos].level local rstart = pos local rlimit = pos local probe = pos + 1 while probe <= size do if list[probe].direction == "bn" then probe = probe + 1 elseif list[probe].level == level then rlimit = probe probe = probe + 1 else break end end runs[#runs+1] = { start = rstart, limit = rlimit, level = level } pos = rlimit + 1 end -- Step 2: index runs by starting position local run_starting_at = {} for i,run in ipairs(runs) do run_starting_at[run.start] = i end -- Step 3: group into isolating run sequences per BD13 local sequences = {} local run_in_seq = {} for i,run in ipairs(runs) do if not run_in_seq[i] then local first_char = list[run.start] if not (first_char.original == "pdi" and first_char.matched_isolate) then local seq = { i } run_in_seq[i] = true while true do local last_run = runs[seq[#seq]] local last_char = list[last_run.limit] if isolate_initiator[last_char.original] and last_char.matched_isolate and last_char.paired_index then local pdi_pos = last_char.paired_index local next_run_i = run_starting_at[pdi_pos] if next_run_i and not run_in_seq[next_run_i] then seq[#seq+1] = next_run_i run_in_seq[next_run_i] = true else break end else break end end sequences[#sequences+1] = seq end end end -- Step 4: process each run with its own sor/eor (same as before) for _,seq in ipairs(sequences) do local first_run = runs[seq[1]] local last_run = runs[seq[#seq]] local level = first_run.level local seq_start = first_run.start local seq_limit = last_run.limit -- sor: based on first run local sor_level local first_char = list[seq_start] if first_char.original == "pdi" and first_char.matched_isolate then local initiator_level = baselevel if first_char.paired_index then initiator_level = original_levels[first_char.paired_index] end sor_level = level > initiator_level and level or initiator_level else local prev_level = baselevel for j=seq_start-1,1,-1 do if list[j].direction ~= "bn" then prev_level = original_levels[j] break end end sor_level = level > prev_level and level or prev_level end -- eor: based on last run local eor_level local last_char = list[seq_limit] if isolate_initiator[last_char.original] and last_char.matched_isolate then eor_level = level > baselevel and level or baselevel elseif isolate_initiator[last_char.original] then eor_level = baselevel else local next_level = baselevel for j=seq_limit+1,size do if list[j].direction ~= "bn" then next_level = original_levels[j] break end end local next_non_bn = nil for j=seq_limit+1,size do if list[j].direction ~= "bn" then next_non_bn = list[j] break end end if next_non_bn and next_non_bn.original == "b" then eor_level = baselevel else eor_level = level > next_level and level or next_level end end local orderbefore = sor_level % 2 == 1 and "r" or "l" local orderafter = eor_level % 2 == 1 and "r" or "l" -- W and I rules per run, with adjusted orderbefore tracking last strong resolve_weak(list,size,seq_start,seq_limit,orderbefore,orderafter) if analyze_fences then resolve_fences(list,size,seq_start,seq_limit) end resolve_neutral(list,size,seq_start,seq_limit,orderbefore,orderafter) resolve_implicit(list,size,seq_start,seq_limit,orderbefore,orderafter,baselevel) end -- L1 for i=1,size do local entry = list[i] local direction = entry.original if direction == "s" or direction == "b" then entry.level = baselevel for j=i-1,1,-1 do local entry = list[j] if whitespace[entry.original] then entry.level = baselevel else break end end end end for i=size,1,-1 do local entry = list[i] if whitespace[entry.original] then entry.level = baselevel else break end end -- L4 if apply_mirror then for i=1,size do local entry = list[i] if entry.level % 2 == 1 then local mirror = mirrordata[entry.char] if mirror then entry.mirror = mirror end end end end end local function is_removed(entry) return entry.remove_always or (remove_mode > 0 and entry.remove) or (remove_mode > 1 and entry.original == "bn") end local function reorder_str(list,size) -- L2 actual local maxlevel = 0 for i = 1, size do if not is_removed(list[i]) then local l = list[i].level if l > maxlevel then maxlevel = l end end end for level = maxlevel, 1, -1 do local i = 1 while i <= size do if not is_removed(list[i]) and list[i].level >= level then local start = i -- find end of run, skipping removed entries local stop = i i = i + 1 while i <= size do if is_removed(list[i]) then i = i + 1 -- skip removed, keep looking elseif list[i].level >= level then stop = i i = i + 1 else break end end -- reverse between start and stop, skipping removed entries local lo, hi = start, stop while lo < hi do -- advance lo past removed while lo < hi and is_removed(list[lo]) do lo = lo + 1 end -- retreat hi past removed while lo < hi and is_removed(list[hi]) do hi = hi - 1 end if lo < hi then list[lo], list[hi] = list[hi], list[lo] lo = lo + 1 hi = hi - 1 end end else i = i + 1 end end end end local function reorder_nsm(list,size,remove_mode) -- L3 local i = 1 while i <= size do -- skip removed entries while i <= size and is_removed(list[i]) do i = i + 1 end if i <= size and list[i].original == "nsm" then local run_start = i local run_level = list[i].level while i <= size and list[i].original == "nsm" and list[i].level == run_level do i = i + 1 -- skip removed entries within nsm run while i <= size and is_removed(list[i]) do i = i + 1 end end -- i is now at the base (skipping removed) while i <= size and is_removed(list[i]) do i = i + 1 end if i <= size and run_level % 2 == 1 then local base = list[i] for j = i, run_start + 1, -1 do list[j] = list[j-1] end list[run_start] = base i = i + 1 else i = i + 1 end else i = i + 1 end end end local function process_str(str,direction,where) local list, size = build_list_str(str) local baselevel = get_baselevel(direction,list,size,where) resolve_explicit(list,size,baselevel) resolve_levels(list,size,baselevel) reorder_str(list,size) if apply_nsm then reorder_nsm(list,size) end local t = {} local n = 0 for i = 1, size do local entry = list[i] if not is_removed(entry) then -- X9 n = n + 1 local cp = entry.mirror or entry.char t[n] = utfchar(cp) end end return concat(t) end local function process_codepoints(codepoints,direction,where) local list, size = build_list_codepoints(codepoints) local baselevel = get_baselevel(direction,list,size,where) resolve_explicit(list, size, baselevel) resolve_levels(list, size, baselevel) reorder_str(list, size) if apply_nsm then reorder_nsm(list, size) end local result = {} local n = 0 for i = 1, size do local entry = list[i] if not is_removed(entry) then n = n + 1 result[n] = entry.mirror or entry.char end end return result end local function levels_str(str,direction,where) local list, size = build_list_str(str) local baselevel = get_baselevel(direction,list,size,where) resolve_explicit(list,size,baselevel) resolve_levels(list,size,baselevel) local levels = {} for i = 1, size do local entry = list[i] if is_removed(entry) then levels[i] = false else levels[i] = entry.level end end reorder_str(list,size) if apply_nsm then reorder_nsm(list,size) end local reorder = {} local n = 0 for i = 1, size do local entry = list[i] if not is_removed(entry) then n = n + 1 reorder[n] = entry.original_index end end return levels, reorder end local function levels_codepoints(codepoints,direction,where) local list, size = build_list_codepoints(codepoints) local baselevel = get_baselevel(direction,list,size,where) resolve_explicit(list,size,baselevel) resolve_levels(list,size,baselevel) local levels = {} for i = 1, size do local entry = list[i] if is_removed(entry) then levels[i] = false else levels[i] = entry.level end end reorder_str(list, size) if apply_nsm then reorder_nsm(list, size) end local reorder = {} local n = 0 for i = 1, size do local entry = list[i] if not is_removed(entry) then n = n + 1 reorder[n] = entry.original_index end end return levels, reorder end if not( type(status) == "table" and type(status.luatex_version) == "number" and not ( type(tex) == "table" and type(tex.initialize) == "function" )) then return { directions = directiondata, mirrors = mirrordata, brackets = brackettypedata, string = { reorder = process_str, levels = levels_str, }, codepoints = { reorder = process_codepoints, levels = levels_codepoints, }, set = function(k, v) if k == "fences" then analyze_fences = v elseif k == "nsm" then apply_nsm = v elseif k == "mirror" then apply_mirror = v elseif k == "remove" then remove_mode = removelevels[v] or v elseif k == "baselevel" then get_baselevel = v else error("unibidi-lua: unknown option: " .. tostring(k)) end end, get = function(k) if k == "fences" then return analyze_fences elseif k == "nsm" then return apply_nsm elseif k == "mirror" then return apply_mirror elseif k == "remove" then return remove_mode elseif k == "baselevel" then return get_baselevel else error("unibidi-lua: unknown option: " .. tostring(k)) end end, } end local node = node local direct = node.direct local getnext = direct.getnext local getprev = direct.getprev local getid = direct.getid local getsubtype = direct.getsubtype local getchar = direct.getchar local getdirection = direct.getdirection local setchar = direct.setchar local setdirection = direct.setdirection local properties = direct.get_properties_table() local new_node = direct.new local remove_node = direct.remove local insertnodeafter = direct.insert_after local insertnodebefore = direct.insert_before local getfont = direct.getfont local get_font = font.getfont local todirect = direct.todirect local tonode = direct.tonode local glyph_code = node.id("glyph") local glue_code = node.id("glue") local math_code = node.id("math") local dir_code = node.id("dir") local par_code = node.id("local_par") local penalty_code = node.id("penalty") local startofpar if tex.luatexversion > 124 then startofpar = function(n) local s = getsubtype(n) return s == 0 or s == 4 end else startofpar = function(n) local s = getsubtype(n) return s == 0 or s == 3 end end local function new_direction(dir,swap) local t = new_node(dir_code) setdirection(t,dir,swap) return t end local parfillskip_code = 15 local function build_list(head) -- P1 local current = head local size = 0 while current do size = size + 1 local id = getid(current) local p = properties[current] if p and p.directions then -- tricky as dirs can be injected in between local skip = 0 local last = id current = getnext(current) while current do local id = getid(current) local p = properties[current] if p and p.directions then skip = skip + 1 last = id current = getnext(current) else break end end if id == last then -- the start id list[size] = setmetatable({ skip = skip, id = id },mt_object) else list[size] = setmetatable({ skip = skip, id = id, last = last },mt_object) end elseif id == glyph_code then local chr = getchar(current) local dir = directiondata[chr] -- could also be a metatable list[size] = { char = chr, direction = dir, original = dir, level = 0 } current = getnext(current) -- if not list[dir] then list[dir] = true end -- not faster when we check for usage elseif id == glue_code then -- and how about kern list[size] = setmetatable({ },mt_space) current = getnext(current) elseif id == dir_code then local dir, pop = getdirection(current) if dir == lefttoright_code then list[size] = setmetatable({ remove_always = true },pop and mt_pdf or mt_lre) elseif dir == righttoleft_code then list[size] = setmetatable({ remove_always = true },pop and mt_pdf or mt_rle) else list[size] = setmetatable({ id = id },mt_object) end current = getnext(current) elseif id == math_code then local skip = 0 current = getnext(current) while getid(current) ~= math_code do skip = skip + 1 current = getnext(current) end skip = skip + 1 current = getnext(current) list[size] = setmetatable({ id = id, skip = skip },mt_object) else -- disc_code: we assume that these are the same as the surrounding local skip = 0 local last = id current = getnext(current) while current do local id = getid(current) if id ~= glyph_code and id ~= glue_code and id ~= dir_code and id ~= math_code then skip = skip + 1 last = id current = getnext(current) else break end end if id == last then -- the start id list[size] = setmetatable({ id = id, skip = skip },mt_object) else list[size] = setmetatable({ id = id, skip = skip, last = last },mt_object) end end end return list, size end local function get_baselevel(direction,list,size,where,head) if getid(head) == par_code and startofpar(head) then direction = getdirection(head) if direction == lefttoright_code or direction == righttoleft_code then return direction end elseif direction == lefttoright_code or direction == righttoleft_code then return direction end -- P2, P3 -- P2: find the first strong character that is not inside an isolate run. local i = 1 while i <= size do local entry = list[i] local direction = entry.direction -- P2: skip characters enclosed in an isolate sequence if isolate_initiator[direction] then i = skip_isolate(list, i, size) elseif direction == "pdi" then -- unmatched PDI at the paragraph level; treat as neutral, keep scanning elseif direction == "r" or direction == "al" then return righttoleft_code elseif direction == "l" then return lefttoright_code end i = i + 1 end return lefttoright_code end local function insert_from_level(baselevel) return baselevel end local function insert_dir_points(list,size,baselevel) -- L2, but no actual reversion is done, we simply annotate where -- begindir/endddir node will be inserted. local maxlevel = 0 for i=1,size do local level = list[i].level if level > maxlevel then maxlevel = level end end for level=insert_from_level(baselevel),maxlevel do local started = false local runstart = nil local runlast = nil local dircode = (level % 2 == 1) and righttoleft_code or lefttoright_code for i=1,size do local entry = list[i] if entry.level >= level then if not started then local b = entry.begindirs if b then b[#b+1] = dircode else entry.begindirs = { dircode } end runstart = i started = true end runlast = i else if started and runlast then local l = list[runlast] local e = l.enddirs if e then e[#e+1] = dircode else l.enddirs = { dircode } end started = false runstart = nil runlast = nil end end end -- Close at end if still open if started and runlast then local l = list[runlast] local e = l.enddirs if e then e[#e+1] = dircode else l.enddirs = { dircode } end end end end -- We flag nodes that can be skipped when we see them again but because whatever -- mechanism can inject dir nodes that then are not flagged, we don't flag dir -- nodes that we inject here. local function mirror_char(current,mirror) local curr_font = get_font(getfont(current)) local props = curr_font and curr_font.properties local mode = props and props.mode if mode ~= 'harf' and mode ~= 'plug' then setchar(current,mirror) end end local function apply_to_list(list,size,head) local index = 1 local current = head while current do if index > size then break end local id = getid(current) local entry = list[index] local p = properties[current] if p then p.directions = true else properties[current] = { directions = true } end -- Handle begindirs local b = entry.begindirs if b then if id == par_code and startofpar(current) then -- par should always be the 1st node, insert begindirs AFTER it for i=1,#b do head, current = insertnodeafter(head,current,new_direction(b[i])) end else -- Insert begindirs BEFORE current node for i=1,#b do head = insertnodebefore(head,current,new_direction(b[i])) end end entry.begindirs = nil end if id == glyph_code and entry.mirror then mirror_char(current,entry.mirror) elseif id == glue_code then if getsubtype(current) == parfillskip_code then local e = entry.enddirs if e then -- insert the enddirs before \parfillskip glue local c = current local p = getprev(c) if p and getid(p) == penalty_code then -- linepenalty c = p end for i = #e, 1, -1 do head = insertnodebefore(head,c,new_direction(e[i],true)) end entry.enddirs = nil end end end local skip = entry.skip if skip and skip > 0 then for i=1,skip do current = getnext(current) local p = properties[current] if p then p.directions = true else properties[current] = { directions = true } end end end -- Insert all enddirs AFTER current node (in reverse order for proper nesting) local e = entry.enddirs if is_removed(entry) then local to_remove = current current = getnext(current) if e then for i = #e, 1, -1 do head = insertnodebefore(head, current, new_direction(e[i], true)) end end head = remove_node(head, to_remove, true) else if e then for i = #e, 1, -1 do head, current = insertnodeafter(head, current, new_direction(e[i], true)) end end current = getnext(current) end index = index + 1 end return head end -- If needed we can optimize for only_one. There is no need to do anything -- when it's not a glyph. Otherwise we only need to check mirror and apply -- directions when it's different from the surrounding. Paragraphs always -- have more than one node. Actually, we only enter this function when we -- do have a glyph! local function process(head,direction,where) local list, size = build_list(head) local baselevel = get_baselevel(direction,list,size,where,head) resolve_explicit(list,size,baselevel) resolve_levels(list,size,baselevel) insert_dir_points(list,size,baselevel) return apply_to_list(list,size,head) end local function levels_node(head,direction,where) local list, size = build_list(head) local baselevel = get_baselevel(direction,list,size,where,head) resolve_explicit(list,size,baselevel) resolve_levels(list,size,baselevel) local levels = {} for i = 1, size do local entry = list[i] if is_removed(entry) then levels[i] = false else levels[i] = entry.level end end return levels end return { directions = directiondata, mirrors = mirrordata, brackets = brackettypedata, string = { reorder = process_str, levels = levels_str, }, direct = { reorder = process, levels = levels_node, }, node = { reorder = function(head, ...) return tonode(process(todirect(head), ...)) end, levels = function(head, ...) return levels_node(todirect(head), ...) end, }, codepoints = { reorder = process_codepoints, levels = levels_codepoints, }, set = function(k, v) if k == "fences" then analyze_fences = v elseif k == "nsm" then apply_nsm = v elseif k == "mirror" then apply_mirror = v elseif k == "remove" then remove_mode = removelevels[v] or v elseif k == "baselevel" then get_baselevel = v elseif k == "startlevel" then insert_from_level = v elseif k == "mirrorchar" then mirror_char = v else error("unibidi-lua: unknown option: " .. tostring(k)) end end, get = function(k) if k == "fences" then return analyze_fences elseif k == "nsm" then return apply_nsm elseif k == "mirror" then return apply_mirror elseif k == "remove" then return remove_mode elseif k == "baselevel" then return get_baselevel elseif k == "startlevel" then return insert_from_level elseif k == "mirrorchar" then return mirror_char else error("unibidi-lua: unknown option: " .. tostring(k)) end end, }