diff options
Diffstat (limited to 'vendor/unicode-security/scripts/unicode.py')
| -rwxr-xr-x | vendor/unicode-security/scripts/unicode.py | 694 |
1 files changed, 0 insertions, 694 deletions
diff --git a/vendor/unicode-security/scripts/unicode.py b/vendor/unicode-security/scripts/unicode.py deleted file mode 100755 index ba6442e1..00000000 --- a/vendor/unicode-security/scripts/unicode.py +++ /dev/null @@ -1,694 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright 2011-2015 The Rust Project Developers. See the COPYRIGHT -# file at the top-level directory of this distribution and at -# http://rust-lang.org/COPYRIGHT. -# -# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -# option. This file may not be copied, modified, or distributed -# except according to those terms. - -# This script uses the following Unicode security tables: -# - IdentifierStatus.txt -# - IdentifierType.txt -# - PropertyValueAliases.txt -# - confusables.txt -# - ReadMe.txt -# This script also uses the following Unicode UCD data: -# - Scripts.txt -# -# Since this should not require frequent updates, we just store this -# out-of-line and check the tables.rs file into git. - -import fileinput, re, os, sys, operator - -preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly - -#![allow(missing_docs, non_upper_case_globals, non_snake_case)] -''' - -UNICODE_VERSION = (16, 0, 0) - -UNICODE_VERSION_NUMBER = "%s.%s.%s" %UNICODE_VERSION - -# Download a Unicode security table file -def fetch(f): - if not os.path.exists(os.path.basename(f)): - os.system("curl -O https://www.unicode.org/Public/security/%s/%s" - % (UNICODE_VERSION_NUMBER, f)) - - if not os.path.exists(os.path.basename(f)): - sys.stderr.write("cannot load %s\n" % f) - exit(1) - -# Download a UCD table file -def fetch_unidata(f): - if not os.path.exists(os.path.basename(f)): - os.system("curl -O https://www.unicode.org/Public/%s/ucd/%s" - % (UNICODE_VERSION_NUMBER, f)) - - if not os.path.exists(os.path.basename(f)): - sys.stderr.write("cannot load %s" % f) - exit(1) - -# Loads code point data from IdentifierStatus.txt and -# IdentifierType.txt -# Implementation from unicode-segmentation -def load_properties(f, interestingprops = None): - fetch(f) - props = {} - re1 = re.compile(r"^ *([0-9A-F]+) *; *(\w+)") - re2 = re.compile(r"^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)") - - for line in fileinput.input(os.path.basename(f), openhook=fileinput.hook_encoded("utf-8")): - prop = None - d_lo = 0 - d_hi = 0 - m = re1.match(line) - if m: - d_lo = m.group(1) - d_hi = m.group(1) - prop = m.group(2).strip() - else: - m = re2.match(line) - if m: - d_lo = m.group(1) - d_hi = m.group(2) - prop = m.group(3).strip() - else: - continue - if interestingprops and prop not in interestingprops: - continue - d_lo = int(d_lo, 16) - d_hi = int(d_hi, 16) - if prop not in props: - props[prop] = [] - props[prop].append((d_lo, d_hi)) - - return props - -# Loads script data from Scripts.txt -def load_script_properties(f, interestingprops): - fetch_unidata(f) - props = {} - # Note: these regexes are different from those in unicode-segmentation, - # becase we need to handle spaces here - re1 = re.compile(r"^ *([0-9A-F]+) *; *([^#]+) *#") - re2 = re.compile(r"^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *([^#]+) *#") - - for line in fileinput.input(os.path.basename(f)): - prop = None - d_lo = 0 - d_hi = 0 - m = re1.match(line) - if m: - d_lo = m.group(1) - d_hi = m.group(1) - prop = m.group(2).strip() - else: - m = re2.match(line) - if m: - d_lo = m.group(1) - d_hi = m.group(2) - prop = m.group(3).strip() - else: - continue - if interestingprops and prop not in interestingprops: - continue - d_lo = int(d_lo, 16) - d_hi = int(d_hi, 16) - if prop not in props: - props[prop] = [] - props[prop].append((d_lo, d_hi)) - - return props - -# Loads confusables data from confusables.txt -def load_confusables(f): - fetch(f) - confusables = [] - re1 = re.compile(r"^((?:[0-9A-F]+ )+);\t((?:[0-9A-F]+ )+);\t\w*") - - for line in fileinput.input(os.path.basename(f), openhook=fileinput.hook_encoded("utf-8")): - d_input = 0 - d_outputs = [] - m = re1.match(line) - if not m: - continue - d_inputs = m.group(1).split() - if len(d_inputs) != 1: - raise Exception('More than one code point in first column') - d_input = int(d_inputs[0].strip(), 16) - for d_output in m.group(2).split(): - d_outputitem = int(d_output, 16) - d_outputs.append(d_outputitem) - confusables.append((d_input, d_outputs)) - - return confusables - -# Loads Unicode script name correspondence from PropertyValueAliases.txt -def aliases(): - # This function is taken from the `unicode-script` crate. If significant - # changes are introduced, update accordingly. - - # Note that this file is in UCD directly, not security directory. - # we use `fetch_unidata` function to download it. - fetch_unidata("PropertyValueAliases.txt") - longforms = {} - shortforms = {} - re1 = re.compile(r"^ *sc *; *(\w+) *; *(\w+)") - for line in fileinput.input(os.path.basename("PropertyValueAliases.txt")): - m = re1.match(line) - if m: - l = m.group(2).strip() - s = m.group(1).strip() - assert(s not in longforms) - assert(l not in shortforms) - longforms[s] = l - shortforms[l] = s - else: - continue - - return (longforms, shortforms) - -# Loads Unicode script name list and correspondence mapping -def load_scripts(f): - # This function is taken from the `unicode-script` crate. If significant - # changes are introduced, update accordingly. - - (longforms, shortforms) = aliases() - scripts = load_script_properties(f, []) - - script_table = [] - script_list = [] - - for script in scripts: - if script not in ["Common", "Unknown", "Inherited"]: - script_list.append(shortforms[script]) - script_table.extend([(x, y, shortforms[script]) for (x, y) in scripts[script]]) - script_list.sort() - script_table.sort(key=lambda w: w[0]) - return (longforms, script_table) - -def is_script_ignored_in_mixedscript(source): - return source == 'Zinh' or source == 'Zyyy' or source == 'Zzzz' - -# When a codepoint's prototype consists of multiple codepoints. -# The situation is more complex. Here we make up a few rules -# to cover all the cases in confusables.txt . -# The principle is that when replacing the original codepoint with its prototype. -# Neither a "non-ignored script" appears nor it disappears. -# -# We make up several rules to cover the cases occurred within confusables.txt -# Return True, True when we want to consider it confusable, -# and return True, False when we want to consider it non-confusable. -# and return False, _ when new not-yet-processed cases are added in future Unicode versions. -def process_mixedscript_single_to_multi(item_i, script_i, proto_lst, scripts): - script_lst = script_list(proto_lst, scripts) - script_lst.sort() - # here's a few rules to process current version of Unicode data (13.0 at this time) - script_lst_len = len(script_lst) - assert(script_lst_len > 0) - # Rule: A - A -> Processed, DontAdd - if script_lst_len == 1 and script_lst[0] == script_i: - return True, False - # Rule: A(not in (Zinh, Zyyy, Zzzz)) - B(not in (Zinh, Zyyy, Zzzz)) -> Processed, Add - if (script_lst_len == 1 and not is_script_ignored_in_mixedscript(script_lst[0]) - and not is_script_ignored_in_mixedscript(script_i) - and script_lst[0] != script_i): - return True, True - # Rule: (Zinh | Zyyy | Zzzz) - A(not in (Zinh, Zyyy, Zzzz)) -> Processed, Add - if (script_lst_len == 1 and is_script_ignored_in_mixedscript(script_lst[0]) - and not is_script_ignored_in_mixedscript(script_i)): - return True, True - # Rule: A ... - A -> Processed, DontAdd - if script_lst_len > 1 and script_i in script_lst: - return True, False - # Rule: (Zinh | Zyyy | Zzzz) A(not in (Zinh, Zyyy, Zzzz)) - B(not in (Zinh, Zyyy, Zzzz)) -> Processed, Add - if (script_lst_len == 2 and is_script_ignored_in_mixedscript(script_lst[0]) - and not is_script_ignored_in_mixedscript(script_lst[1]) - and not is_script_ignored_in_mixedscript(script_i) - and script_lst[1] != script_i): - return True, True - if (script_lst_len == 2 and is_script_ignored_in_mixedscript(script_lst[1]) - and not is_script_ignored_in_mixedscript(script_lst[0]) - and not is_script_ignored_in_mixedscript(script_i) - and script_lst[0] != script_i): - return True, True - # Rule: (Zinh | Zyyy | Zzzz) (Zinh | Zyyy | Zzzz) - A(not in (Zinh, Zyyy, Zzzz)) -> Processed, Add - if (script_lst_len == 2 and is_script_ignored_in_mixedscript(script_lst[0]) - and is_script_ignored_in_mixedscript(script_lst[1]) - and not is_script_ignored_in_mixedscript(script_i)): - return True, True - - # NotProcessed, DontAdd - return False, False - -def is_codepoint_identifier_allowed(c, identifier_allowed): - for data in identifier_allowed: - if c >= data[0] and c <= data[1]: - return True - return False - -# This function load and generates a table of all the confusable characters. -# It returns a pair consists of a `mixedscript_confusable` table and a -# `mixedscript_confusable_unresolved` table. -# The `mixedscript_confusable` is a dict, its keys are Unicode script names, and each -# entry has a value of a inner dict. The inner dict's keys are confusable code points -# converted to string with the `escape_char` function, and its values are pairs. -# pair[0] keeps a copy of the confusable code point itself but as integer. -# pair[1] keeps a list of all the code points that are mixed script confusable with it. -# which is only used for debugging purposes. -# note that the string 'multi' will occur in the list when pair[0] is considered -# confusable with its multiple code point prototype. -# Usually the `mixedscript_confusable_unresolved` table is empty, but it's possible -# that future Unicode version update may cause that table become nonempty, in which -# case more rules needs to be added to the `process_mixedscript_single_to_multi` function -# above to cover those new cases. -def load_potential_mixedscript_confusables(f, identifier_allowed, scripts): - # First, load all confusables data from confusables.txt - confusables = load_confusables(f) - - # The confusables.txt is reductive, means that it is intended to be used in - # on the fly substitutions. The code points that didn't occur in the file can be - # seen as substitutes to itself. So if the confusables.txt says A -> C, B -> C, - # and implicitly C -> C, it means A <-> B, A <-> C, B <-> C are confusable. - - # Here we're dividing all confusable lhs and rhs(prototype) operands of the substitution into equivalence classes. - # Principally we'll be using the rhs operands as the representive element of its equivalence classes. - # However some rhs operands are single code point, while some others are not. - # Here we collect them separately into `codepoint_map` and `multicodepoint_map`. - codepoint_map = {} - multicodepoint_map = {} - for item in confusables: - d_source = item[0] - # According to the RFC, we'll skip those code points that are restricted from identifier usage. - if not is_codepoint_identifier_allowed(d_source, identifier_allowed): - continue - d_proto_list = item[1] - if len(d_proto_list) == 1: - d_proto = escape_char(d_proto_list[0]) - # we use the escaped representation of rhs as key to the dict when creating new equivalence class. - if d_proto not in codepoint_map: - codepoint_map[d_proto] = [] - # when we create new equivalence class, we'll check whether the representative element should be collected. - # i.e. if it is not restricted from identifier usage, we collect it into the equivalence class. - if is_codepoint_identifier_allowed(d_proto_list[0], identifier_allowed): - codepoint_map[d_proto].append(d_proto_list[0]) - # we collect the original code point to be substituted into this list. - codepoint_map[d_proto].append(d_source) - else: - d_protos = escape_char_list(d_proto_list) - # difference in multi code point case: the rhs part is not directly usable, however we store it in - # dict for further special examination between each lhs and this multi code point rhs. - # and there's an extra level of tuple here. - if d_protos not in multicodepoint_map: - multicodepoint_map[d_protos] = (d_proto_list, []) - multicodepoint_map[d_protos][1].append(d_source) - - mixedscript_confusable = {} - - def confusable_entry_item(confusable, script, item_text, item): - if script not in confusable: - confusable[script] = {} - script_entry = confusable[script] - if item_text not in script_entry: - script_entry[item_text] = (item, []) - return script_entry[item_text][1] - - # First let's examine the each code point having single code point prototype case. - for _, source in codepoint_map.items(): - source_len = len(source) - # Examine each pair in the equivalence class - for i in range(0, source_len - 1): - for j in range(i + 1, source_len): - item_i, item_j = source[i], source[j] - script_i, script_j = codepoint_script(item_i, scripts), codepoint_script(item_j, scripts) - # If they're in the same script, just skip this pair. - if script_i == script_j: - continue - # If `item_i` (the first) is not in a non-ignored script, and `item_j` (the second) is in a differnt one (maybe ignored), - # this means that this usage of the `item_i` can be suspicious, when it occurs in a document that is written in `script_j`. - # We'll consider it a mixed_script_confusable code point. - if not is_script_ignored_in_mixedscript(script_i): - # store it within the map, saving as much information as possible, for further investigation on the final results. - confusable_entry_item(mixedscript_confusable, script_i, escape_char(item_i), item_i).append(item_j) - # Do the same in reverse from `item_j` to `item_i` - if not is_script_ignored_in_mixedscript(script_j): - confusable_entry_item(mixedscript_confusable, script_j, escape_char(item_j), item_j).append(item_i) - - # Then let's examine the each code point having multiple code point prototype case. - # We'll check between the code points that shares the same prototype - for _, proto_lst_and_source in multicodepoint_map.items(): - source = proto_lst_and_source[1] - source_len = len(source) - # This is basically the same as the single code point case. - for i in range(0, source_len - 1): - for j in range(i + 1, source_len): - item_i, item_j = source[i], source[j] - script_i, script_j = codepoint_script(item_i, scripts), codepoint_script(item_j, scripts) - if script_i == script_j: - continue - if not is_script_ignored_in_mixedscript(script_i): - confusable_entry_item(mixedscript_confusable, script_i, escape_char(item_i), item_i).append(item_j) - if not is_script_ignored_in_mixedscript(script_j): - confusable_entry_item(mixedscript_confusable, script_j, escape_char(item_j), item_j).append(item_i) - - mixedscript_confusable_unresolved = {} - # We'll also check between each code points and its multiple codepoint prototype - for _, proto_lst_and_source in multicodepoint_map.items(): - proto_lst = proto_lst_and_source[0] - proto_lst_can_be_part_of_identifier = True - # If the prototype contains one or more restricted code point, then we skip it. - for c in proto_lst: - if not is_codepoint_identifier_allowed(c, identifier_allowed): - proto_lst_can_be_part_of_identifier = False - break - if not proto_lst_can_be_part_of_identifier: - continue - source = proto_lst_and_source[1] - source_len = len(source) - for i in range(0, source_len): - item_i = source[i] - # So here we're just checking whether the single code point should be considered confusable. - script_i = codepoint_script(item_i, scripts) - # If it's in ignored script, we don't need to do anything here. - if is_script_ignored_in_mixedscript(script_i): - continue - # Here're some rules on examining whether the single code point should be considered confusable. - # The principle is that, when subsitution happens, no new non-ignored script are introduced, and its - # own script is not lost. - processed, should_add = process_mixedscript_single_to_multi(item_i, script_i, proto_lst, scripts) - if should_add: - assert(processed) - # Mark the single code point as confusable. - confusable_entry_item(mixedscript_confusable, script_i, escape_char(item_i), item_i).append('multi') - if processed: - # Finished dealing with this code point. - continue - # If it's not processed we must be dealing with a newer version Unicode data, which introduced some significant - # changes. We don't throw an exception here, instead we collect it into a table for debugging purpose, and throw - # an exception after we returned and printed the table out. - proto_lst_text = escape_char_list(proto_lst) - if not proto_lst_text in mixedscript_confusable_unresolved: - mixedscript_confusable_unresolved[proto_lst_text] = (proto_lst, []) - mixedscript_confusable_unresolved[proto_lst_text][1].append(item_i) - return (mixedscript_confusable, mixedscript_confusable_unresolved) - -def codepoint_script(c, scripts): - for x, y, script in scripts: - if c >= x and c <= y: - return script - raise Exception("Not in scripts: " + escape_char(c)) - -# Emit some useful information for debugging when further update happens. -def debug_emit_mixedscript_confusable(f, mixedscript_confusable, text, scripts): - f.write("/* " + text + "\n") - for script, lst in mixedscript_confusable.items(): - f.write("/// Script - " + script + "\n") - source_lst = [v[0] for (_, v) in lst.items()] - source_lst.sort() - for source in source_lst: - source_text = escape_char(source) - source_item_and_target_lst = lst[source_text] - target_lst = source_item_and_target_lst[1] - f.write(source_text + " => " + escape_char_list(target_lst) + " // " + escape_script_list(target_lst, scripts)+ "\n") - f.write("*/\n") - - -def script_list(char_lst, scripts): - script_lst = [] - for c in char_lst: - if c == 'multi': - script = 'Z~multi' - else: - script = codepoint_script(c, scripts) - if script not in script_lst: - script_lst.append(script) - return script_lst - -def escape_script_list(char_lst, scripts): - script_lst = script_list(char_lst, scripts) - script_lst.sort() - return str(script_lst) - -def debug_emit_mixedscript_confusable_unresolved(f, map, text, scripts): - if len(map) == 0: - return - print("// " + text + "\n") - for prototype_text, pair in map.items(): - prototype = pair[0] - source = pair[1] - print(prototype_text + " => " + escape_char_list(source) + " // " + escape_script_list(prototype, scripts) + " => " + escape_script_list(source, scripts) + "\n") - raise Exception("update the python script to add new rules for new data") - -def format_table_content(f, content, indent): - line = " "*indent - first = True - for chunk in content.split(","): - if len(line) + len(chunk) < 98: - if first: - line += chunk - else: - line += ", " + chunk - first = False - else: - f.write(line + ",\n") - line = " "*indent + chunk - f.write(line) - -def escape_char(c): - if c == 'multi': - return "\"<multiple code points>\"" - return "'\\u{%x}'" % c - -def escape_char_list(l): - line = "[" - first = True - for c in l: - if first: - line += escape_char(c) - else: - line += ", " + escape_char(c) - first = False - line += "]" - return line - -def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True, - pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])), is_const=True): - pub_string = "const" - if not is_const: - pub_string = "let" - if is_pub: - pub_string = "pub " + pub_string - f.write(" %s %s: %s = &[\n" % (pub_string, name, t_type)) - data = "" - first = True - for dat in t_data: - if not first: - data += "," - first = False - data += pfun(dat) - format_table_content(f, data, 8) - f.write("\n ];\n\n") - -def emit_identifier_module(f): - f.write("pub mod identifier {") - f.write(""" - - #[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)] - #[allow(non_camel_case_types)] - /// https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type - pub enum IdentifierType { - // Restricted - Not_Character, - Deprecated, - Default_Ignorable, - Not_NFKC, - Not_XID, - Exclusion, - Obsolete, - Technical, - Uncommon_Use, - Limited_Use, - - // Allowed - Inclusion, - Recommended - } - #[inline] - pub fn identifier_status_allowed(c: char) -> bool { - // FIXME: do we want to special case ASCII here? - match c as usize { - _ => super::util::bsearch_range_table(c, IDENTIFIER_STATUS) - } - } - - #[inline] - pub fn identifier_type(c: char) -> Option<IdentifierType> { - // FIXME: do we want to special case ASCII here? - match c as usize { - _ => super::util::bsearch_range_value_table(c, IDENTIFIER_TYPE) - } - } -""") - - f.write(" // Identifier status table:\n") - identifier_status_table = load_properties("IdentifierStatus.txt") - emit_table(f, "IDENTIFIER_STATUS", identifier_status_table['Allowed'], "&'static [(char, char)]", is_pub=False, - pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1]))) - identifier_type = load_properties("IdentifierType.txt") - type_table = [] - for ty in identifier_type: - type_table.extend([(x, y, ty) for (x, y) in identifier_type[ty]]) - - type_table.sort(key=lambda w: w[0]) - - emit_table(f, "IDENTIFIER_TYPE", type_table, "&'static [(char, char, IdentifierType)]", is_pub=False, - pfun=lambda x: "(%s,%s, IdentifierType::%s)" % (escape_char(x[0]), escape_char(x[1]), x[2])) - f.write("}\n\n") - -def emit_confusable_detection_module(f): - f.write("pub mod confusable_detection {") - f.write(""" - - #[inline] - pub fn char_confusable_prototype(c: char) -> Option<&'static [char]> { - // FIXME: do we want to special case ASCII here? - match c as usize { - _ => super::util::bsearch_value_table(c, CONFUSABLES) - } - } - -""") - - f.write(" // Confusable table:\n") - confusable_table = load_confusables("confusables.txt") - confusable_table.sort(key=lambda w: w[0]) - - last_key = None - for (k, _) in confusable_table: - if k == last_key: - raise Exception("duplicate keys in confusables table: %s" % k) - last_key = k - - emit_table(f, "CONFUSABLES", confusable_table, "&'static [(char, &'static [char])]", is_pub=False, - pfun=lambda x: "(%s, &%s)" % (escape_char(x[0]), escape_char_list(x[1]))) - f.write("}\n\n") - -def escape_script_constant(name, longforms): - return "Script::" + longforms[name].strip() - -def emit_potiential_mixed_script_confusable(f): - f.write("pub mod potential_mixed_script_confusable {") - f.write(""" - #[inline] - pub fn potential_mixed_script_confusable(c: char) -> bool { - match c as usize { - _ => super::util::bsearch_table(c, CONFUSABLES) - } - } -""") - identifier_status_table = load_properties("IdentifierStatus.txt") - _, scripts = load_scripts("Scripts.txt") - identifier_allowed = identifier_status_table['Allowed'] - (mixedscript_confusable, mixedscript_confusable_unresolved) = load_potential_mixedscript_confusables("confusables.txt", identifier_allowed, scripts) - debug = False - if debug == True: - debug_emit_mixedscript_confusable(f, mixedscript_confusable, "mixedscript_confusable", scripts) - debug_emit_mixedscript_confusable_unresolved(f, mixedscript_confusable_unresolved, "mixedscript_confusable_unresolved", scripts) - confusable_table = [] - for script, lst in mixedscript_confusable.items(): - for _, pair in lst.items(): - source = pair[0] - confusable_table.append((source, script)) - confusable_table.sort(key=lambda w: w[0]) - emit_table(f, "CONFUSABLES", confusable_table, "&'static [char]", is_pub=False, - pfun=lambda x: "%s" % escape_char(x[0])) - f.write("}\n\n") - - -def emit_util_mod(f): - f.write(""" -pub mod util { - use core::result::Result::{Ok, Err}; - - #[inline] - pub fn bsearch_table(c: char, r: &'static [char]) -> bool { - r.binary_search(&c).is_ok() - } - - #[inline] - pub fn bsearch_value_table<T: Copy>(c: char, r: &'static [(char, T)]) -> Option<T> { - match r.binary_search_by_key(&c, |&(k, _)| k) { - Ok(idx) => { - let (_, v) = r[idx]; - Some(v) - } - Err(_) => None - } - } - - #[inline] - pub fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { - use core::cmp::Ordering::{Equal, Less, Greater}; - r.binary_search_by(|&(lo,hi)| { - if lo <= c && c <= hi { Equal } - else if hi < c { Less } - else { Greater } - }).is_ok() - } - - pub fn bsearch_range_value_table<T: Copy>(c: char, r: &'static [(char, char, T)]) -> Option<T> { - use core::cmp::Ordering::{Equal, Less, Greater}; - match r.binary_search_by(|&(lo, hi, _)| { - if lo <= c && c <= hi { Equal } - else if hi < c { Less } - else { Greater } - }) { - Ok(idx) => { - let (_, _, cat) = r[idx]; - Some(cat) - } - Err(_) => None - } - } - -} - -""") - -if __name__ == "__main__": - r = "tables.rs" - if os.path.exists(r): - os.remove(r) - with open(r, "w") as rf: - # write the file's preamble - rf.write(preamble) - - rf.write(""" -/// The version of [Unicode](http://www.unicode.org/) -/// that this version of unicode-security is based on. -pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s); - -""" % UNICODE_VERSION) - - emit_util_mod(rf) - ### identifier module - emit_identifier_module(rf) - ### confusable_detection module - emit_confusable_detection_module(rf) - ### mixed_script_confusable_detection module - emit_potiential_mixed_script_confusable(rf) |
