From 8cdfa445d6629ffef4cb84967ff7017654045bc2 Mon Sep 17 00:00:00 2001 From: mo khan Date: Wed, 2 Jul 2025 18:36:06 -0600 Subject: chore: add vendor directory --- vendor/unicode-width/benches/benches.rs | 114 ++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 vendor/unicode-width/benches/benches.rs (limited to 'vendor/unicode-width/benches') diff --git a/vendor/unicode-width/benches/benches.rs b/vendor/unicode-width/benches/benches.rs new file mode 100644 index 00000000..44aaee6a --- /dev/null +++ b/vendor/unicode-width/benches/benches.rs @@ -0,0 +1,114 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +#![feature(test)] + +extern crate test; + +use std::iter; + +use test::Bencher; + +use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; + +#[bench] +fn cargo(b: &mut Bencher) { + let string = iter::repeat('a').take(4096).collect::(); + + b.iter(|| { + for c in string.chars() { + test::black_box(UnicodeWidthChar::width(c)); + } + }); +} + +#[bench] +#[allow(deprecated)] +fn stdlib(b: &mut Bencher) { + let string = iter::repeat('a').take(4096).collect::(); + + b.iter(|| { + for c in string.chars() { + test::black_box(c.width()); + } + }); +} + +#[bench] +fn simple_if(b: &mut Bencher) { + let string = iter::repeat('a').take(4096).collect::(); + + b.iter(|| { + for c in string.chars() { + test::black_box(simple_width_if(c)); + } + }); +} + +#[bench] +fn simple_match(b: &mut Bencher) { + let string = iter::repeat('a').take(4096).collect::(); + + b.iter(|| { + for c in string.chars() { + test::black_box(simple_width_match(c)); + } + }); +} + +#[inline] +fn simple_width_if(c: char) -> Option { + let cu = c as u32; + if cu < 127 { + if cu > 31 { + Some(1) + } else if cu == 0 { + Some(0) + } else { + None + } + } else { + UnicodeWidthChar::width(c) + } +} + +#[inline] +fn simple_width_match(c: char) -> Option { + match c as u32 { + cu if cu == 0 => Some(0), + cu if cu < 0x20 => None, + cu if cu < 0x7f => Some(1), + _ => UnicodeWidthChar::width(c), + } +} + +#[bench] +fn enwik8(b: &mut Bencher) { + // To benchmark, download & unzip `enwik8` from https://data.deepai.org/enwik8.zip + let data_path = "bench_data/enwik8"; + let string = std::fs::read_to_string(data_path).unwrap_or_default(); + b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str()))); +} + +#[bench] +fn jawiki(b: &mut Bencher) { + // To benchmark, download & extract `jawiki-20220501-pages-articles-multistream-index.txt` from + // https://dumps.wikimedia.org/jawiki/20220501/jawiki-20220501-pages-articles-multistream-index.txt.bz2 + let data_path = "bench_data/jawiki-20220501-pages-articles-multistream-index.txt"; + let string = std::fs::read_to_string(data_path).unwrap_or_default(); + b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str()))); +} + +#[bench] +fn emoji(b: &mut Bencher) { + // To benchmark, download emoji-style.txt from https://www.unicode.org/emoji/charts/emoji-style.txt + let data_path = "bench_data/emoji-style.txt"; + let string = std::fs::read_to_string(data_path).unwrap_or_default(); + b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str()))); +} -- cgit v1.2.3