为什么我的程序在Windows上打印UTF-8 BOM?

kyvafyod  于 2022-11-18  发布在  Windows
关注(0)|答案(1)|浏览(149)

我有下面的程序,它在Linux上运行良好:
Cargo.toml

[package]
name = "ansi-color-codec"
authors = ["Richard Neumann <mail@richard-neumann.de>"]
description = "Encode bytes as ANSI background colors"
license-file = "LICENSE"
homepage = "https://github.com/conqp/ansi-color-codec/"
repository = "https://github.com/conqp/ansi-color-codec/"
readme = "README.md"
documentation = "https://docs.rs/ansi-color-codec"
keywords = [ "ANSI", "color", "encoding"]
categories = ["command-line-utilities", "encoding"]
version = "0.3.8"
edition = "2021"
exclude = [
    ".gitignore",
    "input.txt",
]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
clap = { version = "4.0.23", features = ["derive"] }
ctrlc = "3.2.3"

[profile.release]
strip = true
lto = true
codegen-units = 1
panic = "abort"use std::iter::FlatMap;

src/lib.rs

const MASK_LOW: u8 = 0b00001111;
const MASK_HIGH: u8 = 0b11110000;
const MASK_BITS: u8 = 4;
const MASK_TRIPLET: u8 = MASK_LOW >> 1;
const COLOR_OFFSET_LOW: u8 = 40;
const COLOR_OFFSET_HIGH: u8 = 100;
const COLOR_CODE_LOW_MAX: u8 = MASK_TRIPLET;
const COLOR_CODE_MAX: u8 = MASK_LOW;
const COLOR_CODE_HIGH_BIT: u8 = 0b1000;
const MAX_DIGITS: u8 = 3;
const CODE_START: u8 = 0x1b;
const NUMBER_PREFIX: char = '[';
const NUMBER_SUFFIX: char = 'm';
const UNEXPECTED_TERMINATION_MSG: &str = "Byte stream terminated unexpectedly";

type ColorCodes<T> = FlatMap<T, [ColorCode; 2], fn(u8) -> [ColorCode; 2]>;

pub trait ColorCodec<T>
where
    T: Iterator<Item = u8>,
{
    fn ansi_color_encode(self) -> ColorCodes<T>;
    fn ansi_color_decode(self) -> ColorCodesToBytes<ColorCodesFromBytes<T>>;
}

impl<T> ColorCodec<T> for T
where
    T: Iterator<Item = u8>,
{
    fn ansi_color_encode(self) -> ColorCodes<T> {
        self.flat_map(|byte| byte.to_color_codes())
    }

    fn ansi_color_decode(self) -> ColorCodesToBytes<ColorCodesFromBytes<T>> {
        ColorCodesToBytes::from(ColorCodesFromBytes::from(self))
    }
}

#[derive(Debug, Eq, PartialEq)]
pub struct ColorCode {
    number: u8,
}

impl ColorCode {
    pub fn new(number: u8) -> Result<Self, String> {
        if (0..=COLOR_OFFSET_LOW + COLOR_CODE_LOW_MAX).contains(&number)
            || (COLOR_OFFSET_HIGH..=COLOR_OFFSET_HIGH + COLOR_CODE_LOW_MAX).contains(&number)
        {
            Ok(Self { number })
        } else {
            Err(format!("Invalid color code: {}", number))
        }
    }

    pub fn normalized(&self) -> u8 {
        if self.number < COLOR_OFFSET_HIGH {
            self.number - COLOR_OFFSET_LOW
        } else {
            self.number - COLOR_OFFSET_HIGH + COLOR_CODE_HIGH_BIT
        }
    }
}

impl TryFrom<u8> for ColorCode {
    type Error = String;

    fn try_from(value: u8) -> Result<Self, Self::Error> {
        if value <= COLOR_CODE_LOW_MAX {
            Self::new(value + COLOR_OFFSET_LOW)
        } else if value <= COLOR_CODE_MAX {
            Self::new((value & MASK_TRIPLET) + COLOR_OFFSET_HIGH)
        } else {
            Err(format!("Value out of bounds for color code: {}", value))
        }
    }
}

impl ToString for ColorCode {
    fn to_string(&self) -> String {
        format!("\x1b[{}m ", self.number)
    }
}

trait ColorEncodable {
    fn to_color_codes(&self) -> [ColorCode; 2];
    fn from_color_codes(color_codes: [ColorCode; 2]) -> Self;
}

impl ColorEncodable for u8 {
    fn to_color_codes(&self) -> [ColorCode; 2] {
        [
            ColorCode::try_from((self & MASK_HIGH) >> MASK_BITS).unwrap(),
            ColorCode::try_from(self & MASK_LOW).unwrap(),
        ]
    }

    fn from_color_codes(color_codes: [ColorCode; 2]) -> Self {
        (color_codes[0].normalized() << MASK_BITS) + color_codes[1].normalized()
    }
}

#[derive(Debug, Eq, PartialEq)]
pub struct ColorCodesFromBytes<T>
where
    T: Iterator<Item = u8>,
{
    bytes: T,
}

impl<T> ColorCodesFromBytes<T>
where
    T: Iterator<Item = u8>,
{
    fn next_header(&mut self) -> Option<Result<(), String>> {
        match self.bytes.next() {
            Some(byte) => {
                if byte == CODE_START {
                    match self.bytes.next() {
                        Some(byte) => {
                            if byte as char == NUMBER_PREFIX {
                                Some(Ok(()))
                            } else {
                                Some(Err(format!("Invalid number prefix: {}", byte)))
                            }
                        }
                        None => Some(Err(UNEXPECTED_TERMINATION_MSG.to_string())),
                    }
                } else {
                    Some(Err(format!("Invalid start byte: {}", byte)))
                }
            }
            None => None,
        }
    }

    fn read_digits(&mut self) -> Result<String, String> {
        let mut digits = String::new();

        for count in 0..=MAX_DIGITS {
            match self.bytes.next() {
                Some(byte) => {
                    if byte.is_ascii_digit() {
                        if count < MAX_DIGITS {
                            digits.push(byte as char);
                        } else {
                            return Err(format!("Expected at most {} digits", MAX_DIGITS));
                        }
                    } else if byte as char == NUMBER_SUFFIX {
                        return if digits.is_empty() {
                            Err("Expected at least one digit".to_string())
                        } else {
                            Ok(digits)
                        };
                    } else {
                        return Err(format!("Encountered Unexpected byte \"{}\"", byte));
                    }
                }
                None => return Err(UNEXPECTED_TERMINATION_MSG.to_string()),
            }
        }

        Ok(digits)
    }

    fn parse_color_code(&mut self) -> Result<u8, String> {
        let digits = self.read_digits()?;
        self.bytes.next(); // Discard bg-color encoded char
        match digits.parse::<u8>() {
            Ok(number) => Ok(number),
            Err(_) => Err(format!("Could not parse u8 from {}", digits)),
        }
    }
}

impl<T> From<T> for ColorCodesFromBytes<T>
where
    T: Iterator<Item = u8>,
{
    fn from(bytes: T) -> Self {
        Self { bytes }
    }
}

impl<T> Iterator for ColorCodesFromBytes<T>
where
    T: Iterator<Item = u8>,
{
    type Item = Result<ColorCode, String>;

    fn next(&mut self) -> Option<Self::Item> {
        if let Err(msg) = self.next_header()? {
            return Some(Err(msg));
        }

        match self.parse_color_code() {
            Ok(sum) => {
                if sum == 0 {
                    None
                } else {
                    Some(ColorCode::new(sum))
                }
            }
            Err(msg) => Some(Err(format!("{} while parsing color code", msg))),
        }
    }
}

#[derive(Debug, Eq, PartialEq)]
pub struct ColorCodesToBytes<T>
where
    T: Iterator<Item = Result<ColorCode, String>>,
{
    codes: T,
}

impl<T> From<T> for ColorCodesToBytes<T>
where
    T: Iterator<Item = Result<ColorCode, String>>,
{
    fn from(codes: T) -> Self {
        Self { codes }
    }
}

impl<T> Iterator for ColorCodesToBytes<T>
where
    T: Iterator<Item = Result<ColorCode, String>>,
{
    type Item = Result<u8, String>;

    fn next(&mut self) -> Option<Self::Item> {
        match self.codes.next() {
            Some(high) => match high {
                Ok(high) => match self.codes.next() {
                    Some(low) => match low {
                        Ok(low) => Some(Ok(u8::from_color_codes([high, low]))),
                        Err(msg) => Some(Err(msg)),
                    },
                    None => Some(Err("Missing second color code block".to_string())),
                },
                Err(msg) => Some(Err(msg)),
            },
            None => None,
        }
    }
}

src/main.rs

use ansi_color_codec::ColorCodec;
use clap::Parser;
use ctrlc::set_handler;
use std::io::{stdin, stdout, Read, Write};
use std::process::exit;
use std::sync::{
    atomic::{AtomicBool, Ordering},
    Arc,
};

const STDOUT_WRITE_ERR: &str = "Could not write bytes to STDOUT";

#[derive(Parser)]
#[clap(about, author, version)]
struct Args {
    #[clap(short, long, name = "decode")]
    pub decode: bool,

    #[clap(short, long, name = "no-clear")]
    pub no_clear: bool,
}

fn main() {
    let args = Args::parse();
    let running = Arc::new(AtomicBool::new(true));
    let bytes = stream_stdin(running.clone());

    set_handler(move || {
        running.store(false, Ordering::SeqCst);
    })
    .expect("Error setting Ctrl-C handler");

    if args.decode {
        decode(bytes)
    } else {
        encode(bytes, !args.no_clear)
    }
}

fn decode(bytes: impl Iterator<Item = u8>) {
    for result in bytes.ansi_color_decode() {
        match result {
            Ok(byte) => {
                stdout().write_all(&[byte]).expect(STDOUT_WRITE_ERR);
            }
            Err(msg) => {
                eprintln!("{}", msg);
                exit(1);
            }
        }
    }

    stdout().flush().expect("Could not flush STDOUT")
}

fn encode(bytes: impl Iterator<Item = u8>, clear: bool) {
    for code in bytes.ansi_color_encode() {
        stdout()
            .write_all(code.to_string().as_bytes())
            .expect(STDOUT_WRITE_ERR);
    }

    if clear {
        println!("\x1b[0m ");
    }
}

fn stream_stdin(running: Arc<AtomicBool>) -> impl Iterator<Item = u8> {
    stdin()
        .bytes()
        .take_while(move |byte| byte.is_ok() && running.load(Ordering::SeqCst))
        .map(|byte| byte.unwrap())
}

然而,当我跑

> echo "Windows doing Windows stuff" | ansi-color-codec | ansi-color-codec -d

在Windows上,程序失败,并显示

Invalid start byte: 239

当我检查第一个(三个)字节时,我可以看到ansi-color-codec -dansi-color-codec接收UTF-8 BOM

nhhxz33t

nhhxz33t1#

Reddit上找到答案:
编辑:[已解决]
原因:Windows powershell(至少)使用与外部程序(在这种情况下是rust程序)不同的代码页,导致进程间通信不一致。
解决方案:设置以下环境变量,以便与rust程序进行一致的通信。

$OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding

相关问题