From 45df4d0d9b577fecee798d672695fe24ff57fb1b Mon Sep 17 00:00:00 2001
From: mo khan <mo@mokhan.ca>
Date: Tue, 15 Jul 2025 16:37:08 -0600
Subject: feat: migrate from Cedar to SpiceDB authorization system

This is a major architectural change that replaces the Cedar policy-based
authorization system with SpiceDB's relation-based authorization.

Key changes:

- Migrate from Rust to Go implementation
- Replace Cedar policies with SpiceDB schema and relationships
- Switch from envoy `ext_authz` with Cedar to SpiceDB permission checks
- Update build system and dependencies for Go ecosystem
- Maintain Envoy integration for external authorization

This change enables more flexible permission modeling through SpiceDB's
Google Zanzibar inspired relation-based system, supporting complex
hierarchical permissions that were difficult to express in Cedar.

Breaking change: Existing Cedar policies and Rust-based configuration
will no longer work and need to be migrated to SpiceDB schema.
---
 vendor/unicode-width/scripts/unicode.py | 2156 -------------------------------
 1 file changed, 2156 deletions(-)
 delete mode 100755 vendor/unicode-width/scripts/unicode.py

(limited to 'vendor/unicode-width/scripts')
diff --git a/vendor/unicode-width/scripts/unicode.py b/vendor/unicode-width/scripts/unicode.py
deleted file mode 100755
index 320da14e..00000000
--- a/vendor/unicode-width/scripts/unicode.py
+++ /dev/null
@@ -1,2156 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright 2011-2022 The Rust Project Developers. See the COPYRIGHT
-# file at the top-level directory of this distribution and at
-# http://rust-lang.org/COPYRIGHT.
-#
-# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-# option. This file may not be copied, modified, or distributed
-# except according to those terms.
-
-# This script uses the following Unicode tables:
-#
-# - DerivedCoreProperties.txt
-# - EastAsianWidth.txt
-# - HangulSyllableType.txt
-# - NormalizationTest.txt (for tests only)
-# - PropList.txt
-# - ReadMe.txt
-# - UnicodeData.txt
-# - auxiliary/GraphemeBreakProperty.txt
-# - emoji/emoji-data.txt
-# - emoji/emoji-variation-sequences.txt
-# - extracted/DerivedGeneralCategory.txt
-#
-# Since this should not require frequent updates, we just store this
-# out-of-line and check the generated module into git.
-
-import enum
-import math
-import operator
-import os
-import re
-import sys
-import urllib.request
-from collections import defaultdict
-from itertools import batched
-from typing import Callable, Iterable
-
-UNICODE_VERSION = "15.1.0"
-"""The version of the Unicode data files to download."""
-
-NUM_CODEPOINTS = 0x110000
-"""An upper bound for which `range(0, NUM_CODEPOINTS)` contains Unicode's codespace."""
-
-MAX_CODEPOINT_BITS = math.ceil(math.log2(NUM_CODEPOINTS - 1))
-"""The maximum number of bits required to represent a Unicode codepoint."""
-
-
-class OffsetType(enum.IntEnum):
-    """Represents the data type of a lookup table's offsets. Each variant's value represents the
-    number of bits required to represent that variant's type."""
-
-    U2 = 2
-    """Offsets are 2-bit unsigned integers, packed four-per-byte."""
-    U4 = 4
-    """Offsets are 4-bit unsigned integers, packed two-per-byte."""
-    U8 = 8
-    """Each offset is a single byte (u8)."""
-
-
-MODULE_PATH = "../src/tables.rs"
-"""The path of the emitted Rust module (relative to the working directory)"""
-
-TABLE_SPLITS = [7, 13]
-"""The splits between the bits of the codepoint used to index each subtable.
-Adjust these values to change the sizes of the subtables"""
-
-Codepoint = int
-BitPos = int
-
-
-def fetch_open(filename: str, local_prefix: str = "", emoji: bool = False):
-    """Opens `filename` and return its corresponding file object. If `filename` isn't on disk,
-    fetches it from `https://www.unicode.org/Public/`. Exits with code 1 on failure.
-    """
-    basename = os.path.basename(filename)
-    localname = os.path.join(local_prefix, basename)
-    if not os.path.exists(localname):
-        if emoji:
-            prefix = f"emoji/{UNICODE_VERSION[:-2]}"
-        else:
-            prefix = f"{UNICODE_VERSION}/ucd"
-        urllib.request.urlretrieve(
-            f"https://www.unicode.org/Public/{prefix}/{filename}",
-            localname,
-        )
-    try:
-        return open(localname, encoding="utf-8")
-    except OSError:
-        sys.stderr.write(f"cannot load {localname}")
-        sys.exit(1)
-
-
-def load_unicode_version() -> tuple[int, int, int]:
-    """Returns the current Unicode version by fetching and processing `ReadMe.txt`."""
-    with fetch_open("ReadMe.txt") as readme:
-        pattern = r"for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
-        return tuple(map(int, re.search(pattern, readme.read()).groups()))  # type: ignore
-
-
-def load_property(filename: str, pattern: str, action: Callable[[int], None]):
-    with fetch_open(filename) as properties:
-        single = re.compile(rf"^([0-9A-F]+)\s*;\s*{pattern}\s+")
-        multiple = re.compile(rf"^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*{pattern}\s+")
-
-        for line in properties.readlines():
-            raw_data = None  # (low, high)
-            if match := single.match(line):
-                raw_data = (match.group(1), match.group(1))
-            elif match := multiple.match(line):
-                raw_data = (match.group(1), match.group(2))
-            else:
-                continue
-            low = int(raw_data[0], 16)
-            high = int(raw_data[1], 16)
-            for cp in range(low, high + 1):
-                action(cp)
-
-
-def to_sorted_ranges(iter: Iterable[Codepoint]) -> list[tuple[Codepoint, Codepoint]]:
-    "Creates a sorted list of ranges from an iterable of codepoints"
-    lst = [c for c in iter]
-    lst.sort()
-    ret = []
-    for cp in lst:
-        if len(ret) > 0 and ret[-1][1] == cp - 1:
-            ret[-1] = (ret[-1][0], cp)
-        else:
-            ret.append((cp, cp))
-    return ret
-
-
-class EastAsianWidth(enum.IntEnum):
-    """Represents the width of a Unicode character according to UAX 16.
-    All East Asian Width classes resolve into either
-    `EffectiveWidth.NARROW`, `EffectiveWidth.WIDE`, or `EffectiveWidth.AMBIGUOUS`.
-    """
-
-    NARROW = 1
-    """ One column wide. """
-    WIDE = 2
-    """ Two columns wide. """
-    AMBIGUOUS = 3
-    """ Two columns wide in a CJK context. One column wide in all other contexts. """
-
-
-class CharWidthInTable(enum.IntEnum):
-    """Represents the width of a Unicode character
-    as stored in the tables."""
-
-    ZERO = 0
-    ONE = 1
-    TWO = 2
-    SPECIAL = 3
-
-
-class WidthState(enum.IntEnum):
-    """
-    Width calculation proceeds according to a state machine.
-    We iterate over the characters of the string from back to front;
-    the next character encountered determines the transition to take.
-
-    The integer values of these variants have special meaning:
-    - Top bit: whether this is Vs16
-    - 2nd from top: whether this is Vs15
-    - 3rd bit from top: whether this is transparent to emoji/text presentation
-      (if set, should also set 4th)
-    - 4th bit: whether to set top bit on emoji presentation.
-      If this is set but 3rd is not, the width mode is related to zwj sequences
-    - 5th from top: whether this is unaffected by ligature-transparent
-    - 6th bit: if 4th is set but this one is not, then this is a ZWJ ligature state
-      where no ZWJ has been encountered yet; encountering one flips this on"""
-
-    # BASIC WIDTHS
-
-    ZERO = 0x1_0000
-    "Zero columns wide."
-
-    NARROW = 0x1_0001
-    "One column wide."
-
-    WIDE = 0x1_0002
-    "Two columns wide."
-
-    THREE = 0x1_0003
-    "Three columns wide."
-
-    # \r\n
-    LINE_FEED = 0b0000_0000_0000_0001
-    "\\n (CRLF has width 1)"
-
-    # EMOJI
-
-    # Emoji skintone modifiers
-    EMOJI_MODIFIER = 0b0000_0000_0000_0010
-    "`Emoji_Modifier`"
-
-    # Emoji ZWJ sequences
-
-    REGIONAL_INDICATOR = 0b0000_0000_0000_0011
-    "`Regional_Indicator`"
-
-    SEVERAL_REGIONAL_INDICATOR = 0b0000_0000_0000_0100
-    "At least two `Regional_Indicator`in sequence"
-
-    EMOJI_PRESENTATION = 0b0000_0000_0000_0101
-    "`Emoji_Presentation`"
-
-    ZWJ_EMOJI_PRESENTATION = 0b0001_0000_0000_0110
-    "\\u200D `Emoji_Presentation`"
-
-    VS16_ZWJ_EMOJI_PRESENTATION = 0b1001_0000_0000_0110
-    "\\uFE0F \\u200D `Emoji_Presentation`"
-
-    KEYCAP_ZWJ_EMOJI_PRESENTATION = 0b0001_0000_0000_0111
-    "\\u20E3 \\u200D `Emoji_Presentation`"
-
-    VS16_KEYCAP_ZWJ_EMOJI_PRESENTATION = 0b1001_0000_0000_0111
-    "\\uFE0F \\u20E3 \\u200D `Emoji_Presentation`"
-
-    REGIONAL_INDICATOR_ZWJ_PRESENTATION = 0b0000_0000_0000_1001
-    "`Regional_Indicator` \\u200D `Emoji_Presentation`"
-
-    EVEN_REGIONAL_INDICATOR_ZWJ_PRESENTATION = 0b0000_0000_0000_1010
-    "(`Regional_Indicator` `Regional_Indicator`)+ \\u200D `Emoji_Presentation`"
-
-    ODD_REGIONAL_INDICATOR_ZWJ_PRESENTATION = 0b0000_0000_0000_1011
-    "(`Regional_Indicator` `Regional_Indicator`)+ `Regional_Indicator` \\u200D `Emoji_Presentation`"
-
-    TAG_END_ZWJ_EMOJI_PRESENTATION = 0b0000_0000_0001_0000
-    "\\uE007F \\u200D `Emoji_Presentation`"
-
-    TAG_D1_END_ZWJ_EMOJI_PRESENTATION = 0b0000_0000_0001_0001
-    "\\uE0030..=\\uE0039 \\uE007F \\u200D `Emoji_Presentation`"
-
-    TAG_D2_END_ZWJ_EMOJI_PRESENTATION = 0b0000_0000_0001_0010
-    "(\\uE0030..=\\uE0039){2} \\uE007F \\u200D `Emoji_Presentation`"
-
-    TAG_D3_END_ZWJ_EMOJI_PRESENTATION = 0b0000_0000_0001_0011
-    "(\\uE0030..=\\uE0039){3} \\uE007F \\u200D `Emoji_Presentation`"
-
-    TAG_A1_END_ZWJ_EMOJI_PRESENTATION = 0b0000_0000_0001_1001
-    "\\uE0061..=\\uE007A \\uE007F \\u200D `Emoji_Presentation`"
-
-    TAG_A2_END_ZWJ_EMOJI_PRESENTATION = 0b0000_0000_0001_1010
-    "(\\uE0061..=\\uE007A){2} \\uE007F \\u200D `Emoji_Presentation`"
-
-    TAG_A3_END_ZWJ_EMOJI_PRESENTATION = 0b0000_0000_0001_1011
-    "(\\uE0061..=\\uE007A){3} \\uE007F \\u200D `Emoji_Presentation`"
-
-    TAG_A4_END_ZWJ_EMOJI_PRESENTATION = 0b0000_0000_0001_1100
-    "(\\uE0061..=\\uE007A){4} \\uE007F \\u200D `Emoji_Presentation`"
-
-    TAG_A5_END_ZWJ_EMOJI_PRESENTATION = 0b0000_0000_0001_1101
-    "(\\uE0061..=\\uE007A){35} \\uE007F \\u200D `Emoji_Presentation`"
-
-    TAG_A6_END_ZWJ_EMOJI_PRESENTATION = 0b0000_0000_0001_1110
-    "(\\uE0061..=\\uE007A){6} \\uE007F \\u200D `Emoji_Presentation`"
-
-    # VARIATION SELECTORS
-
-    # Text presentation sequences (not CJK)
-    VARIATION_SELECTOR_15 = 0b0100_0000_0000_0000
-    "\\uFE0E (text presentation sequences)"
-
-    # Emoji presentation sequences
-    VARIATION_SELECTOR_16 = 0b1000_0000_0000_0000
-    "\\uFE0F (emoji presentation sequences)"
-
-    # ARABIC LAM ALEF
-
-    JOINING_GROUP_ALEF = 0b0011_0000_1111_1111
-    "Joining_Group=Alef (Arabic Lam-Alef ligature)"
-
-    # COMBINING SOLIDUS (CJK only)
-
-    COMBINING_LONG_SOLIDUS_OVERLAY = 0b0011_1100_1111_1111
-    "\\u0338 (CJK only, makes <, =, > width 2)"
-
-    # SOLIDUS + ALEF (solidus is Joining_Type=Transparent)
-    SOLIDUS_OVERLAY_ALEF = 0b0011_1000_1111_1111
-    "\\u0338 followed by Joining_Group=Alef"
-
-    # SCRIPT ZWJ LIGATURES
-
-    # Hebrew alef lamed
-
-    HEBREW_LETTER_LAMED = 0b0011_1000_0000_0000
-    "\\u05DC (Alef-ZWJ-Lamed ligature)"
-
-    ZWJ_HEBREW_LETTER_LAMED = 0b0011_1100_0000_0000
-    "\\u200D\\u05DC (Alef-ZWJ-Lamed ligature)"
-
-    # Buginese <a -i> ya
-
-    BUGINESE_LETTER_YA = 0b0011_1000_0000_0001
-    "\\u1A10 (<a, -i> + ya ligature)"
-
-    ZWJ_BUGINESE_LETTER_YA = 0b0011_1100_0000_0001
-    "\\u200D\\u1A10 (<a, -i> + ya ligature)"
-
-    BUGINESE_VOWEL_SIGN_I_ZWJ_LETTER_YA = 0b0011_1100_0000_0010
-    "\\u1A17\\u200D\\u1A10 (<a, -i> + ya ligature)"
-
-    # Tifinagh bi-consonants
-
-    TIFINAGH_CONSONANT = 0b0011_1000_0000_0011
-    "\\u2D31..=\\u2D65 or \\u2D6F (joined by ZWJ or \\u2D7F TIFINAGH CONSONANT JOINER)"
-
-    ZWJ_TIFINAGH_CONSONANT = 0b0011_1100_0000_0011
-    "ZWJ then \\u2D31..=\\u2D65 or \\u2D6F"
-
-    TIFINAGH_JOINER_CONSONANT = 0b0011_1100_0000_0100
-    "\\u2D7F then \\u2D31..=\\u2D65 or \\u2D6F"
-
-    # Lisu tone letters
-    LISU_TONE_LETTER_MYA_NA_JEU = 0b0011_1100_0000_0101
-    "\\uA4FC or \\uA4FD (https://www.unicode.org/versions/Unicode15.0.0/ch18.pdf#G42078)"
-
-    # Old Turkic orkhon ec - orkhon i
-
-    OLD_TURKIC_LETTER_ORKHON_I = 0b0011_1000_0000_0110
-    "\\u10C03 (ORKHON EC-ZWJ-ORKHON I ligature)"
-
-    ZWJ_OLD_TURKIC_LETTER_ORKHON_I = 0b0011_1100_0000_0110
-    "\\u10C03 (ORKHON EC-ZWJ-ORKHON I ligature)"
-
-    # Khmer coeng signs
-
-    KHMER_COENG_ELIGIBLE_LETTER = 0b0011_1100_0000_0111
-    "\\u1780..=\\u17A2 | \\u17A7 | \\u17AB | \\u17AC | \\u17AF"
-
-    def table_width(self) -> CharWidthInTable:
-        "The width of a character as stored in the lookup tables."
-        match self:
-            case WidthState.ZERO:
-                return CharWidthInTable.ZERO
-            case WidthState.NARROW:
-                return CharWidthInTable.ONE
-            case WidthState.WIDE:
-                return CharWidthInTable.TWO
-            case _:
-                return CharWidthInTable.SPECIAL
-
-    def is_carried(self) -> bool:
-        "Whether this corresponds to a non-default `WidthInfo`."
-        return int(self) <= 0xFFFF
-
-    def width_alone(self) -> int:
-        "The width of a character with this type when it appears alone."
-        match self:
-            case (
-                WidthState.ZERO
-                | WidthState.COMBINING_LONG_SOLIDUS_OVERLAY
-                | WidthState.VARIATION_SELECTOR_15
-                | WidthState.VARIATION_SELECTOR_16
-            ):
-                return 0
-            case (
-                WidthState.WIDE
-                | WidthState.EMOJI_MODIFIER
-                | WidthState.EMOJI_PRESENTATION
-            ):
-                return 2
-            case WidthState.THREE:
-                return 3
-            case _:
-                return 1
-
-    def is_cjk_only(self) -> bool:
-        return self in [
-            WidthState.COMBINING_LONG_SOLIDUS_OVERLAY,
-            WidthState.SOLIDUS_OVERLAY_ALEF,
-        ]
-
-    def is_non_cjk_only(self) -> bool:
-        return self == WidthState.VARIATION_SELECTOR_15
-
-
-assert len(set([v.value for v in WidthState])) == len([v.value for v in WidthState])
-
-
-def load_east_asian_widths() -> list[EastAsianWidth]:
-    """Return a list of effective widths, indexed by codepoint.
-    Widths are determined by fetching and parsing `EastAsianWidth.txt`.
-
-    `Neutral`, `Narrow`, and `Halfwidth` characters are assigned `EffectiveWidth.NARROW`.
-
-    `Wide` and `Fullwidth` characters are assigned `EffectiveWidth.WIDE`.
-
-    `Ambiguous` characters are assigned `EffectiveWidth.AMBIGUOUS`."""
-
-    with fetch_open("EastAsianWidth.txt") as eaw:
-        # matches a width assignment for a single codepoint, i.e. "1F336;N  # ..."
-        single = re.compile(r"^([0-9A-F]+)\s*;\s*(\w+) +# (\w+)")
-        # matches a width assignment for a range of codepoints, i.e. "3001..3003;W  # ..."
-        multiple = re.compile(r"^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*(\w+) +# (\w+)")
-        # map between width category code and condensed width
-        width_codes = {
-            **{c: EastAsianWidth.NARROW for c in ["N", "Na", "H"]},
-            **{c: EastAsianWidth.WIDE for c in ["W", "F"]},
-            "A": EastAsianWidth.AMBIGUOUS,
-        }
-
-        width_map = []
-        current = 0
-        for line in eaw.readlines():
-            raw_data = None  # (low, high, width)
-            if match := single.match(line):
-                raw_data = (match.group(1), match.group(1), match.group(2))
-            elif match := multiple.match(line):
-                raw_data = (match.group(1), match.group(2), match.group(3))
-            else:
-                continue
-            low = int(raw_data[0], 16)
-            high = int(raw_data[1], 16)
-            width = width_codes[raw_data[2]]
-
-            assert current <= high
-            while current <= high:
-                # Some codepoints don't fall into any of the ranges in EastAsianWidth.txt.
-                # All such codepoints are implicitly given Neural width (resolves to narrow)
-                width_map.append(EastAsianWidth.NARROW if current < low else width)
-                current += 1
-
-        while len(width_map) < NUM_CODEPOINTS:
-            # Catch any leftover codepoints and assign them implicit Neutral/narrow width.
-            width_map.append(EastAsianWidth.NARROW)
-
-    # Ambiguous `Letter`s and `Modifier_Symbol`s are narrow
-    load_property(
-        "extracted/DerivedGeneralCategory.txt",
-        r"(:?Lu|Ll|Lt|Lm|Lo|Sk)",
-        lambda cp: (
-            operator.setitem(width_map, cp, EastAsianWidth.NARROW)
-            if width_map[cp] == EastAsianWidth.AMBIGUOUS
-            else None
-        ),
-    )
-
-    # GREEK ANO TELEIA: NFC decomposes to U+00B7 MIDDLE DOT
-    width_map[0x0387] = EastAsianWidth.AMBIGUOUS
-
-    # Canonical equivalence for symbols with stroke
-    with fetch_open("UnicodeData.txt") as udata:
-        single = re.compile(r"([0-9A-Z]+);.*?;.*?;.*?;.*?;([0-9A-Z]+) 0338;")
-        for line in udata.readlines():
-            if match := single.match(line):
-                composed = int(match.group(1), 16)
-                decomposed = int(match.group(2), 16)
-                if width_map[decomposed] == EastAsianWidth.AMBIGUOUS:
-                    width_map[composed] = EastAsianWidth.AMBIGUOUS
-
-    return width_map
-
-
-def load_zero_widths() -> list[bool]:
-    """Returns a list `l` where `l[c]` is true if codepoint `c` is considered a zero-width
-    character. `c` is considered a zero-width character if
-
-    - it has the `Default_Ignorable_Code_Point` property (determined from `DerivedCoreProperties.txt`),
-    - or if it has the `Grapheme_Extend` property (determined from `DerivedCoreProperties.txt`),
-    - or if it one of eight characters that should be `Grapheme_Extend` but aren't due to a Unicode spec bug,
-    - or if it has a `Hangul_Syllable_Type` of `Vowel_Jamo` or `Trailing_Jamo` (determined from `HangulSyllableType.txt`).
-    """
-
-    zw_map = [False] * NUM_CODEPOINTS
-
-    # `Default_Ignorable_Code_Point`s also have 0 width:
-    # https://www.unicode.org/faq/unsup_char.html#3
-    # https://www.unicode.org/versions/Unicode15.1.0/ch05.pdf#G40095
-    #
-    # `Grapheme_Extend` includes characters with general category `Mn` or `Me`,
-    # as well as a few `Mc` characters that need to be included so that
-    # canonically equivalent sequences have the same width.
-    load_property(
-        "DerivedCoreProperties.txt",
-        r"(?:Default_Ignorable_Code_Point|Grapheme_Extend)",
-        lambda cp: operator.setitem(zw_map, cp, True),
-    )
-
-    # Unicode spec bug: these should be `Grapheme_Cluster_Break=Extend`,
-    # as they canonically decompose to two characters with this property,
-    # but they aren't.
-    for c in [0x0CC0, 0x0CC7, 0x0CC8, 0x0CCA, 0x0CCB, 0x1B3B, 0x1B3D, 0x1B43]:
-        zw_map[c] = True
-
-    # Treat `Hangul_Syllable_Type`s of `Vowel_Jamo` and `Trailing_Jamo`
-    # as zero-width. This matches the behavior of glibc `wcwidth`.
-    #
-    # Decomposed Hangul characters consist of 3 parts: a `Leading_Jamo`,
-    # a `Vowel_Jamo`, and an optional `Trailing_Jamo`. Together these combine
-    # into a single wide grapheme. So we treat vowel and trailing jamo as
-    # 0-width, such that only the width of the leading jamo is counted
-    # and the resulting grapheme has width 2.
-    #
-    # (See the Unicode Standard sections 3.12 and 18.6 for more on Hangul)
-    load_property(
-        "HangulSyllableType.txt",
-        r"(?:V|T)",
-        lambda cp: operator.setitem(zw_map, cp, True),
-    )
-
-    # Syriac abbreviation mark:
-    # Zero-width `Prepended_Concatenation_Mark`
-    zw_map[0x070F] = True
-
-    # Some Arabic Prepended_Concatenation_Mark`s
-    # https://www.unicode.org/versions/Unicode15.0.0/ch09.pdf#G27820
-    zw_map[0x0605] = True
-    zw_map[0x0890] = True
-    zw_map[0x0891] = True
-    zw_map[0x08E2] = True
-
-    # `[:Grapheme_Cluster_Break=Prepend:]-[:Prepended_Concatenation_Mark:]`
-    gcb_prepend = set()
-    load_property(
-        "auxiliary/GraphemeBreakProperty.txt",
-        "Prepend",
-        lambda cp: gcb_prepend.add(cp),
-    )
-    load_property(
-        "PropList.txt",
-        "Prepended_Concatenation_Mark",
-        lambda cp: gcb_prepend.remove(cp),
-    )
-    for cp in gcb_prepend:
-        zw_map[cp] = True
-
-    # HANGUL CHOSEONG FILLER
-    # U+115F is a `Default_Ignorable_Code_Point`, and therefore would normally have
-    # zero width. However, the expected usage is to combine it with vowel or trailing jamo
-    # (which are considered 0-width on their own) to form a composed Hangul syllable with
-    # width 2. Therefore, we treat it as having width 2.
-    zw_map[0x115F] = False
-
-    # TIFINAGH CONSONANT JOINER
-    # (invisible only when used to join two Tifinagh consonants
-    zw_map[0x2D7F] = False
-
-    # DEVANAGARI CARET
-    # https://www.unicode.org/versions/Unicode15.0.0/ch12.pdf#G667447
-    zw_map[0xA8FA] = True
-
-    return zw_map
-
-
-def load_width_maps() -> tuple[list[WidthState], list[WidthState]]:
-    """Load complete width table, including characters needing special handling.
-    (Returns 2 tables, one for East Asian and one for not.)"""
-
-    eaws = load_east_asian_widths()
-    zws = load_zero_widths()
-
-    not_ea = []
-    ea = []
-
-    for eaw, zw in zip(eaws, zws):
-        if zw:
-            not_ea.append(WidthState.ZERO)
-            ea.append(WidthState.ZERO)
-        else:
-            if eaw == EastAsianWidth.WIDE:
-                not_ea.append(WidthState.WIDE)
-            else:
-                not_ea.append(WidthState.NARROW)
-
-            if eaw == EastAsianWidth.NARROW:
-                ea.append(WidthState.NARROW)
-            else:
-                ea.append(WidthState.WIDE)
-
-    # Joining_Group=Alef (Arabic Lam-Alef ligature)
-    alef_joining = []
-    load_property(
-        "extracted/DerivedJoiningGroup.txt",
-        "Alef",
-        lambda cp: alef_joining.append(cp),
-    )
-
-    # Regional indicators
-    regional_indicators = []
-    load_property(
-        "PropList.txt",
-        "Regional_Indicator",
-        lambda cp: regional_indicators.append(cp),
-    )
-
-    # Emoji modifiers
-    emoji_modifiers = []
-    load_property(
-        "emoji/emoji-data.txt",
-        "Emoji_Modifier",
-        lambda cp: emoji_modifiers.append(cp),
-    )
-
-    # Default emoji presentation (for ZWJ sequences)
-    emoji_presentation = []
-    load_property(
-        "emoji/emoji-data.txt",
-        "Emoji_Presentation",
-        lambda cp: emoji_presentation.append(cp),
-    )
-
-    for cps, width in [
-        ([0x0A], WidthState.LINE_FEED),
-        ([0x05DC], WidthState.HEBREW_LETTER_LAMED),
-        (alef_joining, WidthState.JOINING_GROUP_ALEF),
-        (range(0x1780, 0x1783), WidthState.KHMER_COENG_ELIGIBLE_LETTER),
-        (range(0x1784, 0x1788), WidthState.KHMER_COENG_ELIGIBLE_LETTER),
-        (range(0x1789, 0x178D), WidthState.KHMER_COENG_ELIGIBLE_LETTER),
-        (range(0x178E, 0x1794), WidthState.KHMER_COENG_ELIGIBLE_LETTER),
-        (range(0x1795, 0x1799), WidthState.KHMER_COENG_ELIGIBLE_LETTER),
-        (range(0x179B, 0x179E), WidthState.KHMER_COENG_ELIGIBLE_LETTER),
-        (
-            [0x17A0, 0x17A2, 0x17A7, 0x17AB, 0x17AC, 0x17AF],
-            WidthState.KHMER_COENG_ELIGIBLE_LETTER,
-        ),
-        ([0x17A4], WidthState.WIDE),
-        ([0x17D8], WidthState.THREE),
-        ([0x1A10], WidthState.BUGINESE_LETTER_YA),
-        (range(0x2D31, 0x2D66), WidthState.TIFINAGH_CONSONANT),
-        ([0x2D6F], WidthState.TIFINAGH_CONSONANT),
-        ([0xA4FC], WidthState.LISU_TONE_LETTER_MYA_NA_JEU),
-        ([0xA4FD], WidthState.LISU_TONE_LETTER_MYA_NA_JEU),
-        ([0xFE0F], WidthState.VARIATION_SELECTOR_16),
-        ([0x10C03], WidthState.OLD_TURKIC_LETTER_ORKHON_I),
-        (emoji_presentation, WidthState.EMOJI_PRESENTATION),
-        (emoji_modifiers, WidthState.EMOJI_MODIFIER),
-        (regional_indicators, WidthState.REGIONAL_INDICATOR),
-    ]:
-        for cp in cps:
-            not_ea[cp] = width
-            ea[cp] = width
-
-    # East-Asian only
-    ea[0x0338] = WidthState.COMBINING_LONG_SOLIDUS_OVERLAY
-
-    # Not East Asian only
-    not_ea[0xFE0E] = WidthState.VARIATION_SELECTOR_15
-
-    return (not_ea, ea)
-
-
-def load_joining_group_lam() -> list[tuple[Codepoint, Codepoint]]:
-    "Returns a list of character ranges with Joining_Group=Lam"
-    lam_joining = []
-    load_property(
-        "extracted/DerivedJoiningGroup.txt",
-        "Lam",
-        lambda cp: lam_joining.append(cp),
-    )
-
-    return to_sorted_ranges(lam_joining)
-
-
-def load_non_transparent_zero_widths(
-    width_map: list[WidthState],
-) -> list[tuple[Codepoint, Codepoint]]:
-    "Returns a list of characters with zero width but not 'Joining_Type=Transparent'"
-
-    zero_widths = set()
-    for cp, width in enumerate(width_map):
-        if width.width_alone() == 0:
-            zero_widths.add(cp)
-    transparent = set()
-    load_property(
-        "extracted/DerivedJoiningType.txt",
-        "T",
-        lambda cp: transparent.add(cp),
-    )
-
-    return to_sorted_ranges(zero_widths - transparent)
-
-
-def load_ligature_transparent() -> list[tuple[Codepoint, Codepoint]]:
-    """Returns a list of character ranges corresponding to all combining marks that are also
-    `Default_Ignorable_Code_Point`s, plus ZWJ. This is the set of characters that won't interrupt
-    a ligature."""
-    default_ignorables = set()
-    load_property(
-        "DerivedCoreProperties.txt",
-        "Default_Ignorable_Code_Point",
-        lambda cp: default_ignorables.add(cp),
-    )
-
-    combining_marks = set()
-    load_property(
-        "extracted/DerivedGeneralCategory.txt",
-        "(?:Mc|Mn|Me)",
-        lambda cp: combining_marks.add(cp),
-    )
-
-    default_ignorable_combinings = default_ignorables.intersection(combining_marks)
-    default_ignorable_combinings.add(0x200D)  # ZWJ
-
-    return to_sorted_ranges(default_ignorable_combinings)
-
-
-def load_solidus_transparent(
-    ligature_transparents: list[tuple[Codepoint, Codepoint]],
-    cjk_width_map: list[WidthState],
-) -> list[tuple[Codepoint, Codepoint]]:
-    """Characters expanding to a canonical combining class above 1, plus `ligature_transparent`s from above.
-    Ranges matching ones in `ligature_transparent` exactly are excluded (for compression), so it needs to bechecked also.
-    """
-
-    ccc_above_1 = set()
-    load_property(
-        "extracted/DerivedCombiningClass.txt",
-        "(?:[2-9]|(?:[1-9][0-9]+))",
-        lambda cp: ccc_above_1.add(cp),
-    )
-
-    for lo, hi in ligature_transparents:
-        for cp in range(lo, hi + 1):
-            ccc_above_1.add(cp)
-
-    num_chars = len(ccc_above_1)
-
-    # Recursive decompositions
-    while True:
-        with fetch_open("UnicodeData.txt") as udata:
-            single = re.compile(r"([0-9A-Z]+);.*?;.*?;.*?;.*?;([0-9A-F ]+);")
-            for line in udata.readlines():
-                if match := single.match(line):
-                    composed = int(match.group(1), 16)
-                    decomposed = [int(c, 16) for c in match.group(2).split(" ")]
-                    if all([c in ccc_above_1 for c in decomposed]):
-                        ccc_above_1.add(composed)
-        if len(ccc_above_1) == num_chars:
-            break
-        else:
-            num_chars = len(ccc_above_1)
-
-    for cp in ccc_above_1:
-        if cp != 0xFE0F:
-            assert (
-                cjk_width_map[cp].table_width() != CharWidthInTable.SPECIAL
-            ), f"U+{cp:X}"
-
-    sorted = to_sorted_ranges(ccc_above_1)
-    return list(filter(lambda range: range not in ligature_transparents, sorted))
-
-
-def load_normalization_tests() -> list[tuple[str, str, str, str, str]]:
-    def parse_codepoints(cps: str) -> str:
-        return "".join(map(lambda cp: chr(int(cp, 16)), cps.split(" ")))
-
-    with fetch_open("NormalizationTest.txt") as normtests:
-        ret = []
-        single = re.compile(
-            r"^([0-9A-F ]+);([0-9A-F ]+);([0-9A-F ]+);([0-9A-F ]+);([0-9A-F ]+);"
-        )
-        for line in normtests.readlines():
-            if match := single.match(line):
-                ret.append(
-                    (
-                        parse_codepoints(match.group(1)),
-                        parse_codepoints(match.group(2)),
-                        parse_codepoints(match.group(3)),
-                        parse_codepoints(match.group(4)),
-                        parse_codepoints(match.group(5)),
-                    )
-                )
-        return ret
-
-
-def make_special_ranges(
-    width_map: list[WidthState],
-) -> list[tuple[tuple[Codepoint, Codepoint], WidthState]]:
-    "Assign ranges of characters to their special behavior (used in match)"
-    ret = []
-    can_merge_with_prev = False
-    for cp, width in enumerate(width_map):
-        if width == WidthState.EMOJI_PRESENTATION:
-            can_merge_with_prev = False
-        elif width.table_width() == CharWidthInTable.SPECIAL:
-            if can_merge_with_prev and ret[-1][1] == width:
-                ret[-1] = ((ret[-1][0][0], cp), width)
-            else:
-                ret.append(((cp, cp), width))
-                can_merge_with_prev = True
-    return ret
-
-
-class Bucket:
-    """A bucket contains a group of codepoints and an ordered width list. If one bucket's width
-    list overlaps with another's width list, those buckets can be merged via `try_extend`.
-    """
-
-    def __init__(self):
-        """Creates an empty bucket."""
-        self.entry_set = set()
-        self.widths = []
-
-    def append(self, codepoint: Codepoint, width: CharWidthInTable):
-        """Adds a codepoint/width pair to the bucket, and appends `width` to the width list."""
-        self.entry_set.add((codepoint, width))
-        self.widths.append(width)
-
-    def try_extend(self, attempt: "Bucket") -> bool:
-        """If either `self` or `attempt`'s width list starts with the other bucket's width list,
-        set `self`'s width list to the longer of the two, add all of `attempt`'s codepoints
-        into `self`, and return `True`. Otherwise, return `False`."""
-        (less, more) = (self.widths, attempt.widths)
-        if len(self.widths) > len(attempt.widths):
-            (less, more) = (attempt.widths, self.widths)
-        if less != more[: len(less)]:
-            return False
-        self.entry_set |= attempt.entry_set
-        self.widths = more
-        return True
-
-    def entries(self) -> list[tuple[Codepoint, CharWidthInTable]]:
-        """Return a list of the codepoint/width pairs in this bucket, sorted by codepoint."""
-        result = list(self.entry_set)
-        result.sort()
-        return result
-
-    def width(self) -> CharWidthInTable | None:
-        """If all codepoints in this bucket have the same width, return that width; otherwise,
-        return `None`."""
-        if len(self.widths) == 0:
-            return None
-        potential_width = self.widths[0]
-        for width in self.widths[1:]:
-            if potential_width != width:
-                return None
-        return potential_width
-
-
-def make_buckets(
-    entries: Iterable[tuple[int, CharWidthInTable]], low_bit: BitPos, cap_bit: BitPos
-) -> list[Bucket]:
-    """Partitions the `(Codepoint, EffectiveWidth)` tuples in `entries` into `Bucket`s. All
-    codepoints with identical bits from `low_bit` to `cap_bit` (exclusive) are placed in the
-    same bucket. Returns a list of the buckets in increasing order of those bits."""
-    num_bits = cap_bit - low_bit
-    assert num_bits > 0
-    buckets = [Bucket() for _ in range(0, 2**num_bits)]
-    mask = (1 << num_bits) - 1
-    for codepoint, width in entries:
-        buckets[(codepoint >> low_bit) & mask].append(codepoint, width)
-    return buckets
-
-
-class Table:
-    """Represents a lookup table. Each table contains a certain number of subtables; each
-    subtable is indexed by a contiguous bit range of the codepoint and contains a list
-    of `2**(number of bits in bit range)` entries. (The bit range is the same for all subtables.)
-
-    Typically, tables contain a list of buckets of codepoints. Bucket `i`'s codepoints should
-    be indexed by sub-table `i` in the next-level lookup table. The entries of this table are
-    indexes into the bucket list (~= indexes into the sub-tables of the next-level table.) The
-    key to compression is that two different buckets in two different sub-tables may have the
-    same width list, which means that they can be merged into the same bucket.
-
-    If no bucket contains two codepoints with different widths, calling `indices_to_widths` will
-    discard the buckets and convert the entries into `EffectiveWidth` values."""
-
-    def __init__(
-        self,
-        name: str,
-        entry_groups: Iterable[Iterable[tuple[int, CharWidthInTable]]],
-        secondary_entry_groups: Iterable[Iterable[tuple[int, CharWidthInTable]]],
-        low_bit: BitPos,
-        cap_bit: BitPos,
-        offset_type: OffsetType,
-        align: int,
-        bytes_per_row: int | None = None,
-        starting_indexed: list[Bucket] = [],
-        cfged: bool = False,
-    ):
-        """Create a lookup table with a sub-table for each `(Codepoint, EffectiveWidth)` iterator
-        in `entry_groups`. Each sub-table is indexed by codepoint bits in `low_bit..cap_bit`,
-        and each table entry is represented in the format specified by  `offset_type`. Asserts
-        that this table is actually representable with `offset_type`."""
-        starting_indexed_len = len(starting_indexed)
-        self.name = name
-        self.low_bit = low_bit
-        self.cap_bit = cap_bit
-        self.offset_type = offset_type
-        self.entries: list[int] = []
-        self.indexed: list[Bucket] = list(starting_indexed)
-        self.align = align
-        self.bytes_per_row = bytes_per_row
-        self.cfged = cfged
-
-        buckets: list[Bucket] = []
-        for entries in entry_groups:
-            buckets.extend(make_buckets(entries, self.low_bit, self.cap_bit))
-
-        for bucket in buckets:
-            for i, existing in enumerate(self.indexed):
-                if existing.try_extend(bucket):
-                    self.entries.append(i)
-                    break
-            else:
-                self.entries.append(len(self.indexed))
-                self.indexed.append(bucket)
-
-        self.primary_len = len(self.entries)
-        self.primary_bucket_len = len(self.indexed)
-
-        buckets = []
-        for entries in secondary_entry_groups:
-            buckets.extend(make_buckets(entries, self.low_bit, self.cap_bit))
-
-        for bucket in buckets:
-            for i, existing in enumerate(self.indexed):
-                if existing.try_extend(bucket):
-                    self.entries.append(i)
-                    break
-            else:
-                self.entries.append(len(self.indexed))
-                self.indexed.append(bucket)
-
-        # Validate offset type
-        max_index = 1 << int(self.offset_type)
-        for index in self.entries:
-            assert index < max_index, f"{index} <= {max_index}"
-
-        self.indexed = self.indexed[starting_indexed_len:]
-
-    def indices_to_widths(self):
-        """Destructively converts the indices in this table to the `EffectiveWidth` values of
-        their buckets. Assumes that no bucket contains codepoints with different widths.
-        """
-        self.entries = list(map(lambda i: int(self.indexed[i].width()), self.entries))  # type: ignore
-        del self.indexed
-
-    def buckets(self):
-        """Returns an iterator over this table's buckets."""
-        return self.indexed
-
-    def to_bytes(self) -> list[int]:
-        """Returns this table's entries as a list of bytes. The bytes are formatted according to
-        the `OffsetType` which the table was created with, converting any `EffectiveWidth` entries
-        to their enum variant's integer value. For example, with `OffsetType.U2`, each byte will
-        contain four packed 2-bit entries."""
-        entries_per_byte = 8 // int(self.offset_type)
-        byte_array = []
-        for i in range(0, len(self.entries), entries_per_byte):
-            byte = 0
-            for j in range(0, entries_per_byte):
-                byte |= self.entries[i + j] << (j * int(self.offset_type))
-            byte_array.append(byte)
-        return byte_array
-
-
-def make_tables(
-    width_map: list[WidthState],
-    cjk_width_map: list[WidthState],
-) -> list[Table]:
-    """Creates a table for each configuration in `table_cfgs`, with the first config corresponding
-    to the top-level lookup table, the second config corresponding to the second-level lookup
-    table, and so forth. `entries` is an iterator over the `(Codepoint, EffectiveWidth)` pairs
-    to include in the top-level table."""
-
-    entries = enumerate([w.table_width() for w in width_map])
-    cjk_entries = enumerate([w.table_width() for w in cjk_width_map])
-
-    root_table = Table(
-        "WIDTH_ROOT",
-        [entries],
-        [],
-        TABLE_SPLITS[1],
-        MAX_CODEPOINT_BITS,
-        OffsetType.U8,
-        128,
-    )
-
-    cjk_root_table = Table(
-        "WIDTH_ROOT_CJK",
-        [cjk_entries],
-        [],
-        TABLE_SPLITS[1],
-        MAX_CODEPOINT_BITS,
-        OffsetType.U8,
-        128,
-        starting_indexed=root_table.indexed,
-        cfged=True,
-    )
-
-    middle_table = Table(
-        "WIDTH_MIDDLE",
-        map(lambda bucket: bucket.entries(), root_table.buckets()),
-        map(lambda bucket: bucket.entries(), cjk_root_table.buckets()),
-        TABLE_SPLITS[0],
-        TABLE_SPLITS[1],
-        OffsetType.U8,
-        2 ** (TABLE_SPLITS[1] - TABLE_SPLITS[0]),
-        bytes_per_row=2 ** (TABLE_SPLITS[1] - TABLE_SPLITS[0]),
-    )
-
-    leaves_table = Table(
-        "WIDTH_LEAVES",
-        map(
-            lambda bucket: bucket.entries(),
-            middle_table.buckets()[: middle_table.primary_bucket_len],
-        ),
-        map(
-            lambda bucket: bucket.entries(),
-            middle_table.buckets()[middle_table.primary_bucket_len :],
-        ),
-        0,
-        TABLE_SPLITS[0],
-        OffsetType.U2,
-        2 ** (TABLE_SPLITS[0] - 2),
-        bytes_per_row=2 ** (TABLE_SPLITS[0] - 2),
-    )
-
-    return [root_table, cjk_root_table, middle_table, leaves_table]
-
-
-def load_emoji_presentation_sequences() -> list[Codepoint]:
-    """Outputs a list of cpodepoints, corresponding to all the valid characters for starting
-    an emoji presentation sequence."""
-
-    with fetch_open("emoji/emoji-variation-sequences.txt") as sequences:
-        # Match all emoji presentation sequences
-        # (one codepoint followed by U+FE0F, and labeled "emoji style")
-        sequence = re.compile(r"^([0-9A-F]+)\s+FE0F\s*;\s*emoji style")
-        codepoints = []
-        for line in sequences.readlines():
-            if match := sequence.match(line):
-                cp = int(match.group(1), 16)
-                codepoints.append(cp)
-    return codepoints
-
-
-def load_text_presentation_sequences() -> list[Codepoint]:
-    """Outputs a list of codepoints, corresponding to all the valid characters
-    whose widths change with a text presentation sequence."""
-
-    text_presentation_seq_codepoints = set()
-    with fetch_open("emoji/emoji-variation-sequences.txt") as sequences:
-        # Match all text presentation sequences
-        # (one codepoint followed by U+FE0E, and labeled "text style")
-        sequence = re.compile(r"^([0-9A-F]+)\s+FE0E\s*;\s*text style")
-        for line in sequences.readlines():
-            if match := sequence.match(line):
-                cp = int(match.group(1), 16)
-                text_presentation_seq_codepoints.add(cp)
-
-    default_emoji_codepoints = set()
-
-    load_property(
-        "emoji/emoji-data.txt",
-        "Emoji_Presentation",
-        lambda cp: default_emoji_codepoints.add(cp),
-    )
-
-    codepoints = []
-    for cp in text_presentation_seq_codepoints.intersection(default_emoji_codepoints):
-        # "Enclosed Ideographic Supplement" block;
-        # wide even in text presentation
-        if not cp in range(0x1F200, 0x1F300):
-            codepoints.append(cp)
-
-    codepoints.sort()
-    return codepoints
-
-
-def load_emoji_modifier_bases() -> list[Codepoint]:
-    """Outputs a list of codepoints, corresponding to all the valid characters
-    whose widths change with a text presentation sequence."""
-
-    ret = []
-    load_property(
-        "emoji/emoji-data.txt",
-        "Emoji_Modifier_Base",
-        lambda cp: ret.append(cp),
-    )
-    ret.sort()
-    return ret
-
-
-def make_presentation_sequence_table(
-    seqs: list[Codepoint],
-    lsb: int = 10,
-) -> tuple[list[tuple[int, int]], list[list[int]]]:
-    """Generates 2-level lookup table for whether a codepoint might start an emoji variation sequence.
-    The first level is a match on all but the 10 LSB, the second level is a 1024-bit bitmap for those 10 LSB.
-    """
-
-    prefixes_dict = defaultdict(set)
-    for cp in seqs:
-        prefixes_dict[cp >> lsb].add(cp & (2**lsb - 1))
-
-    msbs: list[int] = list(prefixes_dict.keys())
-
-    leaves: list[list[int]] = []
-    for cps in prefixes_dict.values():
-        leaf = [0] * (2 ** (lsb - 3))
-        for cp in cps:
-            idx_in_leaf, bit_shift = divmod(cp, 8)
-            leaf[idx_in_leaf] |= 1 << bit_shift
-        leaves.append(leaf)
-
-    indexes = [(msb, index) for (index, msb) in enumerate(msbs)]
-
-    # Cull duplicate leaves
-    i = 0
-    while i < len(leaves):
-        first_idx = leaves.index(leaves[i])
-        if first_idx == i:
-            i += 1
-        else:
-            for j in range(0, len(indexes)):
-                if indexes[j][1] == i:
-                    indexes[j] = (indexes[j][0], first_idx)
-                elif indexes[j][1] > i:
-                    indexes[j] = (indexes[j][0], indexes[j][1] - 1)
-
-            leaves.pop(i)
-
-    return (indexes, leaves)
-
-
-def make_ranges_table(
-    seqs: list[Codepoint],
-) -> tuple[list[tuple[int, int]], list[list[tuple[int, int]]]]:
-    """Generates 2-level lookup table for a binary property of a codepoint.
-    First level is all but the last byte, second level is ranges for last byte
-    """
-
-    prefixes_dict = defaultdict(list)
-    for cp in seqs:
-        prefixes_dict[cp >> 8].append(cp & 0xFF)
-
-    msbs: list[int] = list(prefixes_dict.keys())
-
-    leaves: list[list[tuple[int, int]]] = []
-    for cps in prefixes_dict.values():
-        leaf = []
-        for cp in cps:
-            if len(leaf) > 0 and leaf[-1][1] == cp - 1:
-                leaf[-1] = (leaf[-1][0], cp)
-            else:
-                leaf.append((cp, cp))
-        leaves.append(leaf)
-
-    indexes = [(msb, index) for (index, msb) in enumerate(msbs)]
-
-    # Cull duplicate leaves
-    i = 0
-    while i < len(leaves):
-        first_idx = leaves.index(leaves[i])
-        if first_idx == i:
-            i += 1
-        else:
-            for j in range(0, len(indexes)):
-                if indexes[j][1] == i:
-                    indexes[j] = (indexes[j][0], first_idx)
-                elif indexes[j][1] > i:
-                    indexes[j] = (indexes[j][0], indexes[j][1] - 1)
-
-            leaves.pop(i)
-
-    return (indexes, leaves)
-
-
-def lookup_fns(
-    is_cjk: bool,
-    special_ranges: list[tuple[tuple[Codepoint, Codepoint], WidthState]],
-    joining_group_lam: list[tuple[Codepoint, Codepoint]],
-) -> str:
-    if is_cjk:
-        cfg = '#[cfg(feature = "cjk")]\n'
-        cjk_lo = "_cjk"
-        cjk_cap = "_CJK"
-        ambig = "wide"
-    else:
-        cfg = ""
-        cjk_lo = ""
-        cjk_cap = ""
-        ambig = "narrow"
-    s = f"""
-/// Returns the [UAX #11](https://www.unicode.org/reports/tr11/) based width of `c` by
-/// consulting a multi-level lookup table.
-///
-/// # Maintenance
-/// The tables themselves are autogenerated but this function is hardcoded. You should have
-/// nothing to worry about if you re-run `unicode.py` (for example, when updating Unicode.)
-/// However, if you change the *actual structure* of the lookup tables (perhaps by editing the
-/// `make_tables` function in `unicode.py`) you must ensure that this code reflects those changes.
-{cfg}#[inline]
-fn lookup_width{cjk_lo}(c: char) -> (u8, WidthInfo) {{
-    let cp = c as usize;
-
-    let t1_offset = WIDTH_ROOT{cjk_cap}.0[cp >> {TABLE_SPLITS[1]}];
-
-    // Each sub-table in WIDTH_MIDDLE is 7 bits, and each stored entry is a byte,
-    // so each sub-table is 128 bytes in size.
-    // (Sub-tables are selected using the computed offset from the previous table.)
-    let t2_offset = WIDTH_MIDDLE.0[usize::from(t1_offset)][cp >> {TABLE_SPLITS[0]} & 0x{(2 ** (TABLE_SPLITS[1] - TABLE_SPLITS[0]) - 1):X}];
-
-    // Each sub-table in WIDTH_LEAVES is 6 bits, but each stored entry is 2 bits.
-    // This is accomplished by packing four stored entries into one byte.
-    // So each sub-table is 2**(7-2) == 32 bytes in size.
-    // Since this is the last table, each entry represents an encoded width.
-    let packed_widths = WIDTH_LEAVES.0[usize::from(t2_offset)][cp >> 2 & 0x{(2 ** (TABLE_SPLITS[0] - 2) - 1):X}];
-
-    // Extract the packed width
-    let width = packed_widths >> (2 * (cp & 0b11)) & 0b11;
-
-    if width < 3 {{
-        (width, WidthInfo::DEFAULT)
-    }} else {{
-        match c {{
-"""
-
-    for (lo, hi), width in special_ranges:
-        s += f"            '\\u{{{lo:X}}}'"
-        if hi != lo:
-            s += f"..='\\u{{{hi:X}}}'"
-        if width.is_carried():
-            width_info = width.name
-        else:
-            width_info = "DEFAULT"
-        s += f" => ({width.width_alone()}, WidthInfo::{width_info}),\n"
-
-    s += f"""            _ => (2, WidthInfo::EMOJI_PRESENTATION),
-        }}
-    }}
-}}
-
-/// Returns the [UAX #11](https://www.unicode.org/reports/tr11/) based width of `c`, or
-/// `None` if `c` is a control character.
-/// Ambiguous width characters are treated as {ambig}.
-{cfg}#[inline]
-pub fn single_char_width{cjk_lo}(c: char) -> Option<usize> {{
-    if c < '\\u{{7F}}' {{
-        if c >= '\\u{{20}}' {{
-            // U+0020 to U+007F (exclusive) are single-width ASCII codepoints
-            Some(1)
-        }} else {{
-            // U+0000 to U+0020 (exclusive) are control codes
-            None
-        }}
-    }} else if c >= '\\u{{A0}}' {{
-        // No characters >= U+00A0 are control codes, so we can consult the lookup tables
-        Some(lookup_width{cjk_lo}(c).0.into())
-    }} else {{
-        // U+007F to U+00A0 (exclusive) are control codes
-        None
-    }}
-}}
-
-/// Returns the [UAX #11](https://www.unicode.org/reports/tr11/) based width of `c`.
-/// Ambiguous width characters are treated as {ambig}.
-{cfg}#[inline]
-fn width_in_str{cjk_lo}(c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) {{
-    if next_info.is_emoji_presentation() {{
-        if starts_emoji_presentation_seq(c) {{
-            let width = if next_info.is_zwj_emoji_presentation() {{
-                0
-            }} else {{
-                2
-            }};
-            return (width, WidthInfo::EMOJI_PRESENTATION);
-        }} else {{
-            next_info = next_info.unset_emoji_presentation();
-        }}
-    }}"""
-
-    if is_cjk:
-        s += """
-    if (matches!(
-        next_info,
-        WidthInfo::COMBINING_LONG_SOLIDUS_OVERLAY | WidthInfo::SOLIDUS_OVERLAY_ALEF
-    ) && matches!(c, '<' | '=' | '>'))
-    {
-        return (2, WidthInfo::DEFAULT);
-    }"""
-
-    s += """
-    if c <= '\\u{A0}' {
-        match c {
-            // According to the spec, LF should be width 1, which is how it is often rendered when it is forced to have a single-line rendering
-            // However, this makes it harder to use this crate to calculate line breaks, and breaks assumptions of downstream crates.
-            // https://github.com/unicode-rs/unicode-width/issues/60
-            '\\n' => (0, WidthInfo::LINE_FEED),
-            '\\r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT),
-            _ => (1, WidthInfo::DEFAULT),
-        }
-    } else {
-        // Fast path
-        if next_info != WidthInfo::DEFAULT {
-            if c == '\\u{FE0F}' {
-                return (0, next_info.set_emoji_presentation());
-            }"""
-
-    if not is_cjk:
-        s += """
-            if c == '\\u{FE0E}' {
-                return (0, next_info.set_text_presentation());
-            }
-            if next_info.is_text_presentation() {
-                if starts_non_ideographic_text_presentation_seq(c) {
-                    return (1, WidthInfo::DEFAULT);
-                } else {
-                    next_info = next_info.unset_text_presentation();
-                }
-            }"""
-
-    s += """
-            if next_info.is_ligature_transparent() {
-                if c == '\\u{200D}' {
-                    return (0, next_info.set_zwj_bit());
-                } else if is_ligature_transparent(c) {
-                    return (0, next_info);
-                }
-            }
-
-            match (next_info, c) {"""
-    if is_cjk:
-        s += """
-                (WidthInfo::COMBINING_LONG_SOLIDUS_OVERLAY, _) if is_solidus_transparent(c) => {
-                    return (
-                        lookup_width_cjk(c).0 as i8,
-                        WidthInfo::COMBINING_LONG_SOLIDUS_OVERLAY,
-                    );
-                }
-                (WidthInfo::JOINING_GROUP_ALEF, '\\u{0338}') => {
-                    return (0, WidthInfo::SOLIDUS_OVERLAY_ALEF);
-                }
-                // Arabic Lam-Alef ligature
-                (
-                    WidthInfo::JOINING_GROUP_ALEF | WidthInfo::SOLIDUS_OVERLAY_ALEF,
-                    """
-    else:
-        s += """
-                // Arabic Lam-Alef ligature
-                (
-                    WidthInfo::JOINING_GROUP_ALEF,
-                    """
-
-    tail = False
-    for lo, hi in joining_group_lam:
-        if tail:
-            s += " | "
-        tail = True
-        s += f"'\\u{{{lo:X}}}'"
-        if hi != lo:
-            s += f"..='\\u{{{hi:X}}}'"
-    s += """,
-                ) => return (0, WidthInfo::DEFAULT),
-                (WidthInfo::JOINING_GROUP_ALEF, _) if is_transparent_zero_width(c) => {
-                    return (0, WidthInfo::JOINING_GROUP_ALEF);
-                }
-
-                // Hebrew Alef-ZWJ-Lamed ligature
-                (WidthInfo::ZWJ_HEBREW_LETTER_LAMED, '\\u{05D0}') => {
-                    return (0, WidthInfo::DEFAULT);
-                }
-
-                // Khmer coeng signs
-                (WidthInfo::KHMER_COENG_ELIGIBLE_LETTER, '\\u{17D2}') => {
-                    return (-1, WidthInfo::DEFAULT);
-                }
-
-                // Buginese <a, -i> ZWJ ya ligature
-                (WidthInfo::ZWJ_BUGINESE_LETTER_YA, '\\u{1A17}') => {
-                    return (0, WidthInfo::BUGINESE_VOWEL_SIGN_I_ZWJ_LETTER_YA)
-                }
-                (WidthInfo::BUGINESE_VOWEL_SIGN_I_ZWJ_LETTER_YA, '\\u{1A15}') => {
-                    return (0, WidthInfo::DEFAULT)
-                }
-
-                // Tifinagh bi-consonants
-                (WidthInfo::TIFINAGH_CONSONANT | WidthInfo::ZWJ_TIFINAGH_CONSONANT, '\\u{2D7F}') => {
-                    return (1, WidthInfo::TIFINAGH_JOINER_CONSONANT);
-                }
-                (WidthInfo::ZWJ_TIFINAGH_CONSONANT, '\\u{2D31}'..='\\u{2D65}' | '\\u{2D6F}') => {
-                    return (0, WidthInfo::DEFAULT);
-                }
-                (WidthInfo::TIFINAGH_JOINER_CONSONANT, '\\u{2D31}'..='\\u{2D65}' | '\\u{2D6F}') => {
-                    return (-1, WidthInfo::DEFAULT);
-                }
-
-                // Lisu tone letter combinations
-                (WidthInfo::LISU_TONE_LETTER_MYA_NA_JEU, '\\u{A4F8}'..='\\u{A4FB}') => {
-                    return (0, WidthInfo::DEFAULT);
-                }
-
-                // Old Turkic ligature
-                (WidthInfo::ZWJ_OLD_TURKIC_LETTER_ORKHON_I, '\\u{10C32}') => {
-                    return (0, WidthInfo::DEFAULT);
-                }"""
-
-    s += f"""
-                // Emoji modifier
-                (WidthInfo::EMOJI_MODIFIER, _) if is_emoji_modifier_base(c) => {{
-                    return (0, WidthInfo::EMOJI_PRESENTATION);
-                }}
-
-                // Regional indicator
-                (
-                    WidthInfo::REGIONAL_INDICATOR | WidthInfo::SEVERAL_REGIONAL_INDICATOR,
-                    '\\u{{1F1E6}}'..='\\u{{1F1FF}}',
-                ) => return (1, WidthInfo::SEVERAL_REGIONAL_INDICATOR),
-
-                // ZWJ emoji
-                (
-                    WidthInfo::EMOJI_PRESENTATION
-                    | WidthInfo::SEVERAL_REGIONAL_INDICATOR
-                    | WidthInfo::EVEN_REGIONAL_INDICATOR_ZWJ_PRESENTATION
-                    | WidthInfo::ODD_REGIONAL_INDICATOR_ZWJ_PRESENTATION
-                    | WidthInfo::EMOJI_MODIFIER,
-                    '\\u{{200D}}',
-                ) => return (0, WidthInfo::ZWJ_EMOJI_PRESENTATION),
-                (WidthInfo::ZWJ_EMOJI_PRESENTATION, '\\u{{20E3}}') => {{
-                    return (0, WidthInfo::KEYCAP_ZWJ_EMOJI_PRESENTATION);
-                }}
-                (WidthInfo::VS16_ZWJ_EMOJI_PRESENTATION, _) if starts_emoji_presentation_seq(c) => {{
-                    return (0, WidthInfo::EMOJI_PRESENTATION)
-                }}
-                (WidthInfo::VS16_KEYCAP_ZWJ_EMOJI_PRESENTATION, '0'..='9' | '#' | '*') => {{
-                    return (0, WidthInfo::EMOJI_PRESENTATION)
-                }}
-                (WidthInfo::ZWJ_EMOJI_PRESENTATION, '\\u{{1F1E6}}'..='\\u{{1F1FF}}') => {{
-                    return (1, WidthInfo::REGIONAL_INDICATOR_ZWJ_PRESENTATION);
-                }}
-                (
-                    WidthInfo::REGIONAL_INDICATOR_ZWJ_PRESENTATION
-                    | WidthInfo::ODD_REGIONAL_INDICATOR_ZWJ_PRESENTATION,
-                    '\\u{{1F1E6}}'..='\\u{{1F1FF}}',
-                ) => return (-1, WidthInfo::EVEN_REGIONAL_INDICATOR_ZWJ_PRESENTATION),
-                (
-                    WidthInfo::EVEN_REGIONAL_INDICATOR_ZWJ_PRESENTATION,
-                    '\\u{{1F1E6}}'..='\\u{{1F1FF}}',
-                ) => return (3, WidthInfo::ODD_REGIONAL_INDICATOR_ZWJ_PRESENTATION),
-                (WidthInfo::ZWJ_EMOJI_PRESENTATION, '\\u{{1F3FB}}'..='\\u{{1F3FF}}') => {{
-                    return (0, WidthInfo::EMOJI_MODIFIER);
-                }}
-                (WidthInfo::ZWJ_EMOJI_PRESENTATION, '\\u{{E007F}}') => {{
-                    return (0, WidthInfo::TAG_END_ZWJ_EMOJI_PRESENTATION);
-                }}
-                (WidthInfo::TAG_END_ZWJ_EMOJI_PRESENTATION, '\\u{{E0061}}'..='\\u{{E007A}}') => {{
-                    return (0, WidthInfo::TAG_A1_END_ZWJ_EMOJI_PRESENTATION);
-                }}
-                (WidthInfo::TAG_A1_END_ZWJ_EMOJI_PRESENTATION, '\\u{{E0061}}'..='\\u{{E007A}}') => {{
-                    return (0, WidthInfo::TAG_A2_END_ZWJ_EMOJI_PRESENTATION)
-                }}
-                (WidthInfo::TAG_A2_END_ZWJ_EMOJI_PRESENTATION, '\\u{{E0061}}'..='\\u{{E007A}}') => {{
-                    return (0, WidthInfo::TAG_A3_END_ZWJ_EMOJI_PRESENTATION)
-                }}
-                (WidthInfo::TAG_A3_END_ZWJ_EMOJI_PRESENTATION, '\\u{{E0061}}'..='\\u{{E007A}}') => {{
-                    return (0, WidthInfo::TAG_A4_END_ZWJ_EMOJI_PRESENTATION)
-                }}
-                (WidthInfo::TAG_A4_END_ZWJ_EMOJI_PRESENTATION, '\\u{{E0061}}'..='\\u{{E007A}}') => {{
-                    return (0, WidthInfo::TAG_A5_END_ZWJ_EMOJI_PRESENTATION)
-                }}
-                (WidthInfo::TAG_A5_END_ZWJ_EMOJI_PRESENTATION, '\\u{{E0061}}'..='\\u{{E007A}}') => {{
-                    return (0, WidthInfo::TAG_A6_END_ZWJ_EMOJI_PRESENTATION)
-                }}
-                (
-                    WidthInfo::TAG_END_ZWJ_EMOJI_PRESENTATION
-                    | WidthInfo::TAG_A1_END_ZWJ_EMOJI_PRESENTATION
-                    | WidthInfo::TAG_A2_END_ZWJ_EMOJI_PRESENTATION
-                    | WidthInfo::TAG_A3_END_ZWJ_EMOJI_PRESENTATION
-                    | WidthInfo::TAG_A4_END_ZWJ_EMOJI_PRESENTATION,
-                    '\\u{{E0030}}'..='\\u{{E0039}}',
-                ) => return (0, WidthInfo::TAG_D1_END_ZWJ_EMOJI_PRESENTATION),
-                (WidthInfo::TAG_D1_END_ZWJ_EMOJI_PRESENTATION, '\\u{{E0030}}'..='\\u{{E0039}}') => {{
-                    return (0, WidthInfo::TAG_D2_END_ZWJ_EMOJI_PRESENTATION);
-                }}
-                (WidthInfo::TAG_D2_END_ZWJ_EMOJI_PRESENTATION, '\\u{{E0030}}'..='\\u{{E0039}}') => {{
-                    return (0, WidthInfo::TAG_D3_END_ZWJ_EMOJI_PRESENTATION);
-                }}
-                (
-                    WidthInfo::TAG_A3_END_ZWJ_EMOJI_PRESENTATION
-                    | WidthInfo::TAG_A4_END_ZWJ_EMOJI_PRESENTATION
-                    | WidthInfo::TAG_A5_END_ZWJ_EMOJI_PRESENTATION
-                    | WidthInfo::TAG_A6_END_ZWJ_EMOJI_PRESENTATION
-                    | WidthInfo::TAG_D3_END_ZWJ_EMOJI_PRESENTATION,
-                    '\\u{{1F3F4}}',
-                ) => return (0, WidthInfo::EMOJI_PRESENTATION),
-                (WidthInfo::ZWJ_EMOJI_PRESENTATION, _)
-                    if lookup_width{cjk_lo}(c).1 == WidthInfo::EMOJI_PRESENTATION =>
-                {{
-                    return (0, WidthInfo::EMOJI_PRESENTATION)
-                }}
-
-                // Fallback
-                _ => {{}}
-            }}
-        }}
-
-        let ret = lookup_width{cjk_lo}(c);
-        (ret.0 as i8, ret.1)
-    }}
-}}
-
-{cfg}#[inline]
-pub fn str_width{cjk_lo}(s: &str) -> usize {{
-    s.chars()
-        .rfold(
-            (0, WidthInfo::DEFAULT),
-            |(sum, next_info), c| -> (usize, WidthInfo) {{
-                let (add, info) = width_in_str{cjk_lo}(c, next_info);
-                (sum.wrapping_add_signed(isize::from(add)), info)
-            }},
-        )
-        .0
-}}
-"""
-
-    return s
-
-
-def emit_module(
-    out_name: str,
-    unicode_version: tuple[int, int, int],
-    tables: list[Table],
-    special_ranges: list[tuple[tuple[Codepoint, Codepoint], WidthState]],
-    special_ranges_cjk: list[tuple[tuple[Codepoint, Codepoint], WidthState]],
-    emoji_presentation_table: tuple[list[tuple[int, int]], list[list[int]]],
-    text_presentation_table: tuple[list[tuple[int, int]], list[list[tuple[int, int]]]],
-    emoji_modifier_table: tuple[list[tuple[int, int]], list[list[tuple[int, int]]]],
-    joining_group_lam: list[tuple[Codepoint, Codepoint]],
-    non_transparent_zero_widths: list[tuple[Codepoint, Codepoint]],
-    ligature_transparent: list[tuple[Codepoint, Codepoint]],
-    solidus_transparent: list[tuple[Codepoint, Codepoint]],
-    normalization_tests: list[tuple[str, str, str, str, str]],
-):
-    """Outputs a Rust module to `out_name` using table data from `tables`.
-    If `TABLE_CFGS` is edited, you may need to edit the included code for `lookup_width`.
-    """
-    if os.path.exists(out_name):
-        os.remove(out_name)
-    with open(out_name, "w", newline="\n", encoding="utf-8") as module:
-        module.write(
-            """// Copyright 2012-2022 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
-
-use core::cmp::Ordering;
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-struct WidthInfo(u16);
-
-impl WidthInfo {
-    /// No special handling necessary
-    const DEFAULT: Self = Self(0);
-"""
-        )
-
-        for variant in WidthState:
-            if variant.is_carried():
-                if variant.is_cjk_only():
-                    module.write('    #[cfg(feature = "cjk")]\n')
-                module.write(
-                    f"    const {variant.name}: Self = Self(0b{variant.value:016b});\n"
-                )
-
-        module.write(
-            f"""
-    /// Whether this width mode is ligature_transparent
-    /// (has 5th MSB set.)
-    fn is_ligature_transparent(self) -> bool {{
-        (self.0 & 0b0000_1000_0000_0000) == 0b0000_1000_0000_0000
-    }}
-
-    /// Sets 6th MSB.
-    fn set_zwj_bit(self) -> Self {{
-        Self(self.0 | 0b0000_0100_0000_0000)
-    }}
-
-    /// Has top bit set
-    fn is_emoji_presentation(self) -> bool {{
-        (self.0 & 0b1000_0000_0000_0000) == 0b1000_0000_0000_0000
-    }}
-
-    /// Has top bit set
-    fn is_zwj_emoji_presentation(self) -> bool {{
-        (self.0 & 0b1011_0000_0000_0000) == 0b1001_0000_0000_0000
-    }}
-
-    /// Set top bit
-    fn set_emoji_presentation(self) -> Self {{
-        if (self.0 & 0b0010_0000_0000_0000) == 0b0010_0000_0000_0000
-            || (self.0 & 0b1001_0000_0000_0000) == 0b0001_0000_0000_0000
-        {{
-            Self(self.0 | 0b1000_0000_0000_0000)
-        }} else {{
-            Self::VARIATION_SELECTOR_16
-        }}
-    }}
-
-    /// Clear top bit
-    fn unset_emoji_presentation(self) -> Self {{
-        if (self.0 & 0b0010_0000_0000_0000) == 0b0010_0000_0000_0000 {{
-            Self(self.0 & 0b0111_1111_1111_1111)
-        }} else {{
-            Self::DEFAULT
-        }}
-    }}
-
-    /// Has 2nd bit set
-    fn is_text_presentation(self) -> bool {{
-        (self.0 & 0b0100_0000_0000_0000) == 0b0100_0000_0000_0000
-    }}
-
-    /// Set 2nd bit
-    fn set_text_presentation(self) -> Self {{
-        if (self.0 & 0b0010_0000_0000_0000) == 0b0010_0000_0000_0000 {{
-            Self(self.0 | 0b0100_0000_0000_0000)
-        }} else {{
-            Self(0b0100_0000_0000_0000)
-        }}
-    }}
-
-    /// Clear 2nd bit
-    fn unset_text_presentation(self) -> Self {{
-        Self(self.0 & 0b1011_1111_1111_1111)
-    }}
-}}
-
-/// The version of [Unicode](http://www.unicode.org/)
-/// that this version of unicode-width is based on.
-pub const UNICODE_VERSION: (u8, u8, u8) = {unicode_version};
-"""
-        )
-
-        module.write(lookup_fns(False, special_ranges, joining_group_lam))
-        module.write(lookup_fns(True, special_ranges_cjk, joining_group_lam))
-
-        emoji_presentation_idx, emoji_presentation_leaves = emoji_presentation_table
-        text_presentation_idx, text_presentation_leaves = text_presentation_table
-        emoji_modifier_idx, emoji_modifier_leaves = emoji_modifier_table
-
-        module.write(
-            """
-/// Whether this character is a zero-width character with
-/// `Joining_Type=Transparent`. Used by the Alef-Lamed ligatures.
-/// See also [`is_ligature_transparent`], a near-subset of this (only ZWJ is excepted)
-/// which is transparent for non-Arabic ligatures.
-fn is_transparent_zero_width(c: char) -> bool {
-    if lookup_width(c).0 != 0 {
-        // Not zero-width
-        false
-    } else {
-        let cp: u32 = c.into();
-        NON_TRANSPARENT_ZERO_WIDTHS
-            .binary_search_by(|&(lo, hi)| {
-                let lo = u32::from_le_bytes([lo[0], lo[1], lo[2], 0]);
-                let hi = u32::from_le_bytes([hi[0], hi[1], hi[2], 0]);
-                if cp < lo {
-                    Ordering::Greater
-                } else if cp > hi {
-                    Ordering::Less
-                } else {
-                    Ordering::Equal
-                }
-            })
-            .is_err()
-    }
-}
-
-/// Whether this character is a default-ignorable combining mark
-/// or ZWJ. These characters won't interrupt non-Arabic ligatures.
-fn is_ligature_transparent(c: char) -> bool {
-    matches!(c, """
-        )
-
-        tail = False
-        for lo, hi in ligature_transparent:
-            if tail:
-                module.write(" | ")
-            tail = True
-            module.write(f"'\\u{{{lo:X}}}'")
-            if hi != lo:
-                module.write(f"..='\\u{{{hi:X}}}'")
-
-        module.write(
-            """)
-}
-
-/// Whether this character is transparent wrt the effect of
-/// U+0338 COMBINING LONG SOLIDUS OVERLAY
-/// on its base character.
-#[cfg(feature = "cjk")]
-fn is_solidus_transparent(c: char) -> bool {
-    let cp: u32 = c.into();
-    is_ligature_transparent(c)
-        || SOLIDUS_TRANSPARENT
-            .binary_search_by(|&(lo, hi)| {
-                let lo = u32::from_le_bytes([lo[0], lo[1], lo[2], 0]);
-                let hi = u32::from_le_bytes([hi[0], hi[1], hi[2], 0]);
-                if cp < lo {
-                    Ordering::Greater
-                } else if cp > hi {
-                    Ordering::Less
-                } else {
-                    Ordering::Equal
-                }
-            })
-            .is_ok()
-}
-
-/// Whether this character forms an [emoji presentation sequence]
-/// (https://www.unicode.org/reports/tr51/#def_emoji_presentation_sequence)
-/// when followed by `'\\u{FEOF}'`.
-/// Emoji presentation sequences are considered to have width 2.
-#[inline]
-pub fn starts_emoji_presentation_seq(c: char) -> bool {
-    let cp: u32 = c.into();
-    // First level of lookup uses all but 10 LSB
-    let top_bits = cp >> 10;
-    let idx_of_leaf: usize = match top_bits {
-"""
-        )
-
-        for msbs, i in emoji_presentation_idx:
-            module.write(f"        0x{msbs:X} => {i},\n")
-
-        module.write(
-            """        _ => return false,
-    };
-    // Extract the 3-9th (0-indexed) least significant bits of `cp`,
-    // and use them to index into `leaf_row`.
-    let idx_within_leaf = usize::try_from((cp >> 3) & 0x7F).unwrap();
-    let leaf_byte = EMOJI_PRESENTATION_LEAVES.0[idx_of_leaf][idx_within_leaf];
-    // Use the 3 LSB of `cp` to index into `leaf_byte`.
-    ((leaf_byte >> (cp & 7)) & 1) == 1
-}
-
-/// Returns `true` if `c` has default emoji presentation, but forms a [text presentation sequence]
-/// (https://www.unicode.org/reports/tr51/#def_text_presentation_sequence)
-/// when followed by `'\\u{FEOE}'`, and is not ideographic.
-/// Such sequences are considered to have width 1.
-#[inline]
-pub fn starts_non_ideographic_text_presentation_seq(c: char) -> bool {
-    let cp: u32 = c.into();
-    // First level of lookup uses all but 8 LSB
-    let top_bits = cp >> 8;
-    let leaf: &[(u8, u8)] = match top_bits {
-"""
-        )
-
-        for msbs, i in text_presentation_idx:
-            module.write(f"        0x{msbs:X} => &TEXT_PRESENTATION_LEAF_{i},\n")
-
-        module.write(
-            """        _ => return false,
-    };
-
-    let bottom_bits = (cp & 0xFF) as u8;
-    leaf.binary_search_by(|&(lo, hi)| {
-        if bottom_bits < lo {
-            Ordering::Greater
-        } else if bottom_bits > hi {
-            Ordering::Less
-        } else {
-            Ordering::Equal
-        }
-    })
-    .is_ok()
-}
-
-/// Returns `true` if `c` is an `Emoji_Modifier_Base`.
-#[inline]
-pub fn is_emoji_modifier_base(c: char) -> bool {
-    let cp: u32 = c.into();
-    // First level of lookup uses all but 8 LSB
-    let top_bits = cp >> 8;
-    let leaf: &[(u8, u8)] = match top_bits {
-"""
-        )
-
-        for msbs, i in emoji_modifier_idx:
-            module.write(f"        0x{msbs:X} => &EMOJI_MODIFIER_LEAF_{i},\n")
-
-        module.write(
-            """        _ => return false,
-    };
-
-    let bottom_bits = (cp & 0xFF) as u8;
-    leaf.binary_search_by(|&(lo, hi)| {
-        if bottom_bits < lo {
-            Ordering::Greater
-        } else if bottom_bits > hi {
-            Ordering::Less
-        } else {
-            Ordering::Equal
-        }
-    })
-    .is_ok()
-}
-
-#[repr(align(32))]
-struct Align32<T>(T);
-
-#[repr(align(64))]
-struct Align64<T>(T);
-
-#[repr(align(128))]
-struct Align128<T>(T);
-"""
-        )
-
-        subtable_count = 1
-        for i, table in enumerate(tables):
-            new_subtable_count = len(table.buckets())
-            if i == len(tables) - 1:
-                table.indices_to_widths()  # for the last table, indices == widths
-            byte_array = table.to_bytes()
-
-            if table.bytes_per_row is None:
-                module.write(
-                    f"/// Autogenerated. {subtable_count} sub-table(s). Consult [`lookup_width`] for layout info.)\n"
-                )
-                if table.cfged:
-                    module.write('#[cfg(feature = "cjk")]\n')
-                module.write(
-                    f"static {table.name}: Align{table.align}<[u8; {len(byte_array)}]> = Align{table.align}(["
-                )
-                for j, byte in enumerate(byte_array):
-                    # Add line breaks for every 15th entry (chosen to match what rustfmt does)
-                    if j % 16 == 0:
-                        module.write("\n   ")
-                    module.write(f" 0x{byte:02X},")
-                module.write("\n")
-            else:
-                num_rows = len(byte_array) // table.bytes_per_row
-                num_primary_rows = (
-                    table.primary_len
-                    // (8 // int(table.offset_type))
-                    // table.bytes_per_row
-                )
-                module.write(
-                    f"""
-#[cfg(feature = "cjk")]
-const {table.name}_LEN: usize = {num_rows};
-#[cfg(not(feature = "cjk"))]
-const {table.name}_LEN: usize = {num_primary_rows};
-/// Autogenerated. {subtable_count} sub-table(s). Consult [`lookup_width`] for layout info.
-static {table.name}: Align{table.align}<[[u8; {table.bytes_per_row}]; {table.name}_LEN]> = Align{table.align}([\n"""
-                )
-                for row_num in range(0, num_rows):
-                    if row_num >= num_primary_rows:
-                        module.write('    #[cfg(feature = "cjk")]\n')
-                    module.write("    [\n")
-                    row = byte_array[
-                        row_num
-                        * table.bytes_per_row : (row_num + 1)
-                        * table.bytes_per_row
-                    ]
-                    for subrow in batched(row, 15):
-                        module.write("       ")
-                        for entry in subrow:
-                            module.write(f" 0x{entry:02X},")
-                        module.write("\n")
-                    module.write("    ],\n")
-            module.write("]);\n")
-            subtable_count = new_subtable_count
-
-        # non transparent zero width table
-
-        module.write(
-            f"""
-/// Sorted list of codepoint ranges (inclusive)
-/// that are zero-width but not `Joining_Type=Transparent`
-/// FIXME: can we get better compression?
-static NON_TRANSPARENT_ZERO_WIDTHS: [([u8; 3], [u8; 3]); {len(non_transparent_zero_widths)}] = [
-"""
-        )
-
-        for lo, hi in non_transparent_zero_widths:
-            module.write(
-                f"    ([0x{lo & 0xFF:02X}, 0x{lo >> 8 & 0xFF:02X}, 0x{lo >> 16:02X}], [0x{hi & 0xFF:02X}, 0x{hi >> 8 & 0xFF:02X}, 0x{hi >> 16:02X}]),\n"
-            )
-
-        # solidus transparent table
-
-        module.write(
-            f"""];
-
-/// Sorted list of codepoint ranges (inclusive)
-/// that don't affect how the combining solidus applies
-/// (mostly ccc > 1).
-/// FIXME: can we get better compression?
-#[cfg(feature = "cjk")]
-static SOLIDUS_TRANSPARENT: [([u8; 3], [u8; 3]); {len(solidus_transparent)}] = [
-"""
-        )
-
-        for lo, hi in solidus_transparent:
-            module.write(
-                f"    ([0x{lo & 0xFF:02X}, 0x{lo >> 8 & 0xFF:02X}, 0x{lo >> 16:02X}], [0x{hi & 0xFF:02X}, 0x{hi >> 8 & 0xFF:02X}, 0x{hi >> 16:02X}]),\n"
-            )
-
-        # emoji table
-
-        module.write(
-            f"""];
-
-/// Array of 1024-bit bitmaps. Index into the correct bitmap with the 10 LSB of your codepoint
-/// to get whether it can start an emoji presentation sequence.
-static EMOJI_PRESENTATION_LEAVES: Align128<[[u8; 128]; {len(emoji_presentation_leaves)}]> = Align128([
-"""
-        )
-        for leaf in emoji_presentation_leaves:
-            module.write("    [\n")
-            for row in batched(leaf, 15):
-                module.write("       ")
-                for entry in row:
-                    module.write(f" 0x{entry:02X},")
-                module.write("\n")
-            module.write("    ],\n")
-
-        module.write("]);\n")
-
-        # text table
-
-        for leaf_idx, leaf in enumerate(text_presentation_leaves):
-            module.write(
-                f"""
-#[rustfmt::skip]
-static TEXT_PRESENTATION_LEAF_{leaf_idx}: [(u8, u8); {len(leaf)}] = [
-"""
-            )
-            for lo, hi in leaf:
-                module.write(f"    (0x{lo:02X}, 0x{hi:02X}),\n")
-            module.write(f"];\n")
-
-        # emoji modifier table
-
-        for leaf_idx, leaf in enumerate(emoji_modifier_leaves):
-            module.write(
-                f"""
-#[rustfmt::skip]
-static EMOJI_MODIFIER_LEAF_{leaf_idx}: [(u8, u8); {len(leaf)}] = [
-"""
-            )
-            for lo, hi in leaf:
-                module.write(f"    (0x{lo:02X}, 0x{hi:02X}),\n")
-            module.write(f"];\n")
-
-        test_width_variants = []
-        test_width_variants_cjk = []
-        for variant in WidthState:
-            if variant.is_carried():
-                if not variant.is_cjk_only():
-                    test_width_variants.append(variant)
-                if not variant.is_non_cjk_only():
-                    test_width_variants_cjk.append(variant)
-
-        module.write(
-            f"""
-#[cfg(test)]
-mod tests {{
-    use super::*;
-
-    fn str_width_test(s: &str, init: WidthInfo) -> isize {{
-        s.chars()
-            .rfold((0, init), |(sum, next_info), c| -> (isize, WidthInfo) {{
-                let (add, info) = width_in_str(c, next_info);
-                (sum.checked_add(isize::from(add)).unwrap(), info)
-            }})
-            .0
-    }}
-
-    #[cfg(feature = "cjk")]
-    fn str_width_test_cjk(s: &str, init: WidthInfo) -> isize {{
-        s.chars()
-            .rfold((0, init), |(sum, next_info), c| -> (isize, WidthInfo) {{
-                let (add, info) = width_in_str_cjk(c, next_info);
-                (sum.checked_add(isize::from(add)).unwrap(), info)
-            }})
-            .0
-    }}
-
-    #[test]
-    fn test_normalization() {{
-        for &(orig, nfc, nfd, nfkc, nfkd) in &NORMALIZATION_TEST {{
-            for init in NORMALIZATION_TEST_WIDTHS {{
-                assert_eq!(
-                    str_width_test(orig, init),
-                    str_width_test(nfc, init),
-                    "width of X = {{orig:?}} differs from toNFC(X) = {{nfc:?}} with mode {{init:X?}}",
-                );
-                assert_eq!(
-                    str_width_test(orig, init),
-                    str_width_test(nfd, init),
-                    "width of X = {{orig:?}} differs from toNFD(X) = {{nfd:?}} with mode {{init:X?}}",
-                );
-                assert_eq!(
-                    str_width_test(nfkc, init),
-                    str_width_test(nfkd, init),
-                    "width of toNFKC(X) = {{nfkc:?}} differs from toNFKD(X) = {{nfkd:?}} with mode {{init:X?}}",
-                );
-            }}
-
-            #[cfg(feature = "cjk")]
-            for init in NORMALIZATION_TEST_WIDTHS_CJK {{
-                assert_eq!(
-                    str_width_test_cjk(orig, init),
-                    str_width_test_cjk(nfc, init),
-                    "CJK width of X = {{orig:?}} differs from toNFC(X) = {{nfc:?}} with mode {{init:X?}}",
-                );
-                assert_eq!(
-                    str_width_test_cjk(orig, init),
-                    str_width_test_cjk(nfd, init),
-                    "CJK width of X = {{orig:?}} differs from toNFD(X) = {{nfd:?}} with mode {{init:X?}}",
-                );
-                assert_eq!(
-                    str_width_test_cjk(nfkc, init),
-                    str_width_test_cjk(nfkd, init),
-                    "CJK width of toNFKC(X) = {{nfkc:?}} differs from toNFKD(X) = {{nfkd:?}} with mode {{init:?}}",
-                );
-            }}
-        }}
-    }}
-
-    static NORMALIZATION_TEST_WIDTHS: [WidthInfo; {len(test_width_variants) + 1}] = [
-        WidthInfo::DEFAULT,\n"""
-        )
-
-        for variant in WidthState:
-            if variant.is_carried() and not variant.is_cjk_only():
-                module.write(f"        WidthInfo::{variant.name},\n")
-
-        module.write(
-            f"""    ];
-
-    #[cfg(feature = "cjk")]
-    static NORMALIZATION_TEST_WIDTHS_CJK: [WidthInfo; {len(test_width_variants_cjk) + 1}] = [
-        WidthInfo::DEFAULT,\n"""
-        )
-
-        for variant in WidthState:
-            if variant.is_carried() and not variant.is_non_cjk_only():
-                module.write(f"        WidthInfo::{variant.name},\n")
-
-        module.write(
-            f"""    ];
-
-    #[rustfmt::skip]
-    static NORMALIZATION_TEST: [(&str, &str, &str, &str, &str); {len(normalization_tests)}] = [\n"""
-        )
-        for orig, nfc, nfd, nfkc, nfkd in normalization_tests:
-            module.write(
-                f'        (r#"{orig}"#, r#"{nfc}"#, r#"{nfd}"#, r#"{nfkc}"#, r#"{nfkd}"#),\n'
-            )
-
-        module.write("    ];\n}\n")
-
-
-def main(module_path: str):
-    """Obtain character data from the latest version of Unicode, transform it into a multi-level
-    lookup table for character width, and write a Rust module utilizing that table to
-    `module_filename`.
-
-    See `lib.rs` for documentation of the exact width rules.
-    """
-    version = load_unicode_version()
-    print(f"Generating module for Unicode {version[0]}.{version[1]}.{version[2]}")
-
-    (width_map, cjk_width_map) = load_width_maps()
-
-    tables = make_tables(width_map, cjk_width_map)
-
-    special_ranges = make_special_ranges(width_map)
-    cjk_special_ranges = make_special_ranges(cjk_width_map)
-
-    emoji_presentations = load_emoji_presentation_sequences()
-    emoji_presentation_table = make_presentation_sequence_table(emoji_presentations)
-
-    text_presentations = load_text_presentation_sequences()
-    text_presentation_table = make_ranges_table(text_presentations)
-
-    emoji_modifier_bases = load_emoji_modifier_bases()
-    emoji_modifier_table = make_ranges_table(emoji_modifier_bases)
-
-    joining_group_lam = load_joining_group_lam()
-    non_transparent_zero_widths = load_non_transparent_zero_widths(width_map)
-    ligature_transparent = load_ligature_transparent()
-    solidus_transparent = load_solidus_transparent(ligature_transparent, cjk_width_map)
-
-    normalization_tests = load_normalization_tests()
-
-    fetch_open("emoji-test.txt", "../tests", emoji=True)
-
-    print("------------------------")
-    total_size = 0
-    for i, table in enumerate(tables):
-        size_bytes = len(table.to_bytes())
-        print(f"Table {i} size: {size_bytes} bytes")
-        total_size += size_bytes
-
-    for s, table in [
-        ("Emoji presentation", emoji_presentation_table),
-    ]:
-        index_size = len(table[0]) * (math.ceil(math.log(table[0][-1][0], 256)) + 8)
-        print(f"{s} index size: {index_size} bytes")
-        total_size += index_size
-        leaves_size = len(table[1]) * len(table[1][0])
-        print(f"{s} leaves size: {leaves_size} bytes")
-        total_size += leaves_size
-
-    for s, table in [
-        ("Text presentation", text_presentation_table),
-        ("Emoji modifier", emoji_modifier_table),
-    ]:
-        index_size = len(table[0]) * (math.ceil(math.log(table[0][-1][0], 256)) + 16)
-        print(f"{s} index size: {index_size} bytes")
-        total_size += index_size
-        leaves_size = 2 * sum(map(len, table[1]))
-        print(f"{s} leaves size: {leaves_size} bytes")
-        total_size += leaves_size
-
-    for s, table in [
-        ("Non transparent zero width", non_transparent_zero_widths),
-        ("Solidus transparent", solidus_transparent),
-    ]:
-        table_size = 6 * len(table)
-        print(f"{s} table size: {table_size} bytes")
-        total_size += table_size
-    print("------------------------")
-    print(f"  Total size: {total_size} bytes")
-
-    emit_module(
-        out_name=module_path,
-        unicode_version=version,
-        tables=tables,
-        special_ranges=special_ranges,
-        special_ranges_cjk=cjk_special_ranges,
-        emoji_presentation_table=emoji_presentation_table,
-        text_presentation_table=text_presentation_table,
-        emoji_modifier_table=emoji_modifier_table,
-        joining_group_lam=joining_group_lam,
-        non_transparent_zero_widths=non_transparent_zero_widths,
-        ligature_transparent=ligature_transparent,
-        solidus_transparent=solidus_transparent,
-        normalization_tests=normalization_tests,
-    )
-    print(f'Wrote to "{module_path}"')
-
-
-if __name__ == "__main__":
-    main(MODULE_PATH)
-- 
cgit v1.2.3