diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-02 18:36:06 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-02 18:36:06 -0600 |
| commit | 8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch) | |
| tree | 22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/unicode-width/benches | |
| parent | 4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff) | |
chore: add vendor directory
Diffstat (limited to 'vendor/unicode-width/benches')
| -rw-r--r-- | vendor/unicode-width/benches/benches.rs | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/vendor/unicode-width/benches/benches.rs b/vendor/unicode-width/benches/benches.rs new file mode 100644 index 00000000..44aaee6a --- /dev/null +++ b/vendor/unicode-width/benches/benches.rs @@ -0,0 +1,114 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +#![feature(test)] + +extern crate test; + +use std::iter; + +use test::Bencher; + +use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; + +#[bench] +fn cargo(b: &mut Bencher) { + let string = iter::repeat('a').take(4096).collect::<String>(); + + b.iter(|| { + for c in string.chars() { + test::black_box(UnicodeWidthChar::width(c)); + } + }); +} + +#[bench] +#[allow(deprecated)] +fn stdlib(b: &mut Bencher) { + let string = iter::repeat('a').take(4096).collect::<String>(); + + b.iter(|| { + for c in string.chars() { + test::black_box(c.width()); + } + }); +} + +#[bench] +fn simple_if(b: &mut Bencher) { + let string = iter::repeat('a').take(4096).collect::<String>(); + + b.iter(|| { + for c in string.chars() { + test::black_box(simple_width_if(c)); + } + }); +} + +#[bench] +fn simple_match(b: &mut Bencher) { + let string = iter::repeat('a').take(4096).collect::<String>(); + + b.iter(|| { + for c in string.chars() { + test::black_box(simple_width_match(c)); + } + }); +} + +#[inline] +fn simple_width_if(c: char) -> Option<usize> { + let cu = c as u32; + if cu < 127 { + if cu > 31 { + Some(1) + } else if cu == 0 { + Some(0) + } else { + None + } + } else { + UnicodeWidthChar::width(c) + } +} + +#[inline] +fn simple_width_match(c: char) -> Option<usize> { + match c as u32 { + cu if cu == 0 => Some(0), + cu if cu < 0x20 => None, + cu if cu < 0x7f => Some(1), + _ => UnicodeWidthChar::width(c), + } +} + +#[bench] +fn enwik8(b: &mut Bencher) { + // To benchmark, download & unzip `enwik8` from https://data.deepai.org/enwik8.zip + let data_path = "bench_data/enwik8"; + let string = std::fs::read_to_string(data_path).unwrap_or_default(); + b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str()))); +} + +#[bench] +fn jawiki(b: &mut Bencher) { + // To benchmark, download & extract `jawiki-20220501-pages-articles-multistream-index.txt` from + // https://dumps.wikimedia.org/jawiki/20220501/jawiki-20220501-pages-articles-multistream-index.txt.bz2 + let data_path = "bench_data/jawiki-20220501-pages-articles-multistream-index.txt"; + let string = std::fs::read_to_string(data_path).unwrap_or_default(); + b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str()))); +} + +#[bench] +fn emoji(b: &mut Bencher) { + // To benchmark, download emoji-style.txt from https://www.unicode.org/emoji/charts/emoji-style.txt + let data_path = "bench_data/emoji-style.txt"; + let string = std::fs::read_to_string(data_path).unwrap_or_default(); + b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str()))); +} |
