From 5edfa9ead62d9c0bc37674c3afd0b7768957b571 Mon Sep 17 00:00:00 2001 From: Miikka Salminen Date: Wed, 18 Jun 2025 20:41:31 +0300 Subject: [PATCH 1/3] Keep radix for integer literals in generated bindings --- .../tests/different_radix_literals.rs | 41 +++ .../headers/different_radix_literals.hpp | 70 +++++ bindgen/clang.rs | 42 +++ bindgen/codegen/helpers.rs | 225 ++++++++++++++- bindgen/codegen/mod.rs | 17 +- bindgen/ir/enum_ty.rs | 13 + bindgen/ir/var.rs | 267 ++++++++++++++++-- 7 files changed, 637 insertions(+), 38 deletions(-) create mode 100644 bindgen-tests/tests/expectations/tests/different_radix_literals.rs create mode 100644 bindgen-tests/tests/headers/different_radix_literals.hpp diff --git a/bindgen-tests/tests/expectations/tests/different_radix_literals.rs b/bindgen-tests/tests/expectations/tests/different_radix_literals.rs new file mode 100644 index 0000000000..60e419fafa --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/different_radix_literals.rs @@ -0,0 +1,41 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +pub const DEFINE_BIN_LITERAL: u32 = 0b10; +pub const DEFINE_NEG_BIN_LITERAL: i32 = -0b10; +pub const DEFINE_OCT_LITERAL: u32 = 0o10; +pub const DEFINE_NEG_OCT_LITERAL: i32 = -0o10; +pub const DEFINE_HEX_LITERAL: u32 = 0x10; +pub const DEFINE_NEG_HEX_LITERAL: i32 = -0x10; +pub const DEFINE_DEC_LITERAL: u32 = 10; +pub const DEFINE_NEG_DEC_LITERAL: i32 = -10; +pub const CONST_INT_BIN_LITERAL: ::std::os::raw::c_int = 0b10; +pub const CONST_INT_NEG_BIN_LITERAL: ::std::os::raw::c_int = -0b10; +pub const CONST_INT_OCT_LITERAL: ::std::os::raw::c_int = 0o10; +pub const CONST_INT_NEG_OCT_LITERAL: ::std::os::raw::c_int = -0o10; +pub const CONST_INT_HEX_LITERAL: ::std::os::raw::c_int = 0x10; +pub const CONST_INT_NEG_HEX_LITERAL: ::std::os::raw::c_int = -0x10; +pub const CONST_INT_DEC_LITERAL: ::std::os::raw::c_int = 10; +pub const CONST_INT_NEG_DEC_LITERAL: ::std::os::raw::c_int = -10; +pub const MultiRadixLiteral_ENUM_BIN_LITERAL: MultiRadixLiteral = 0b10; +pub const MultiRadixLiteral_ENUM_NEG_BIN_LITERAL: MultiRadixLiteral = -0b10; +pub const MultiRadixLiteral_ENUM_OCT_LITERAL: MultiRadixLiteral = 0o10; +pub const MultiRadixLiteral_ENUM_NEG_OCT_LITERAL: MultiRadixLiteral = -0o10; +pub const MultiRadixLiteral_ENUM_HEX_LITERAL: MultiRadixLiteral = 0x10; +pub const MultiRadixLiteral_ENUM_NEG_HEX_LITERAL: MultiRadixLiteral = -0x10; +pub const MultiRadixLiteral_ENUM_DEC_LITERAL: MultiRadixLiteral = 10; +pub const MultiRadixLiteral_ENUM_NEG_DEC_LITERAL: MultiRadixLiteral = -10; +pub type MultiRadixLiteral = ::std::os::raw::c_int; +pub const MIN_I64_BIN: ::std::os::raw::c_longlong = -0b1000000000000000000000000000000000000000000000000000000000000000; +pub const MIN_I64_OCT: ::std::os::raw::c_longlong = -0o1000000000000000000000; +pub const MIN_I64_DEC: ::std::os::raw::c_longlong = -9223372036854775808; +pub const MIN_I64_HEX: ::std::os::raw::c_longlong = -0x8000000000000000; +pub const BIG_B_BIN: ::std::os::raw::c_int = 0b1; +pub const BIG_X_HEX: ::std::os::raw::c_int = 0xf; +pub const AGENT: ::std::os::raw::c_char = 0o7; +pub const SEP_BIN: ::std::os::raw::c_ulonglong = 0b1111111100000000; +pub const SEP_OCT: ::std::os::raw::c_ulonglong = 0o777777777777; +pub const SEP_DEC: ::std::os::raw::c_ulonglong = 299792458; +pub const SEP_HEX: ::std::os::raw::c_ulonglong = 0x1111bbbbccccdddd; +pub const BIN_1ST: ::std::os::raw::c_long = 0b10101010; +pub const OCT_2ND: ::std::os::raw::c_long = 0o777; +pub const DEC_3RD: ::std::os::raw::c_long = 1234; +pub const HEX_4TH: ::std::os::raw::c_long = 0xffff; diff --git a/bindgen-tests/tests/headers/different_radix_literals.hpp b/bindgen-tests/tests/headers/different_radix_literals.hpp new file mode 100644 index 0000000000..0c155d73df --- /dev/null +++ b/bindgen-tests/tests/headers/different_radix_literals.hpp @@ -0,0 +1,70 @@ +// bindgen-flags: -- -std=c++14 +// (C23 is not available in clang 9.0, but C++14 supports the same literals) + +// Binary integer literals (C23) - 0b10 is 2 in decimal + +#define DEFINE_BIN_LITERAL 0b10 +#define DEFINE_NEG_BIN_LITERAL -0b10 +const int CONST_INT_BIN_LITERAL = 0b10; +const int CONST_INT_NEG_BIN_LITERAL = -0b10; + +// Octal integer literals - 010 is 8 in decimal + +#define DEFINE_OCT_LITERAL 010 +#define DEFINE_NEG_OCT_LITERAL -010 +const int CONST_INT_OCT_LITERAL = 010; +const int CONST_INT_NEG_OCT_LITERAL = -010; + +// Hexadecimal integer literals - 0x10 is 16 in decimal + +#define DEFINE_HEX_LITERAL 0x10 +#define DEFINE_NEG_HEX_LITERAL -0x10 +const int CONST_INT_HEX_LITERAL = 0x10; +const int CONST_INT_NEG_HEX_LITERAL = -0x10; + +// Default decimal integer literals - 10 is 10 in decimal + +#define DEFINE_DEC_LITERAL 10 +#define DEFINE_NEG_DEC_LITERAL -10 +const int CONST_INT_DEC_LITERAL = 10; +const int CONST_INT_NEG_DEC_LITERAL = -10; + +// Enums with binary, octal, and hexadecimal integer literals + +enum MultiRadixLiteral { + ENUM_BIN_LITERAL = 0b10, + ENUM_NEG_BIN_LITERAL = -0b10, + ENUM_OCT_LITERAL = 010, + ENUM_NEG_OCT_LITERAL = -010, + ENUM_HEX_LITERAL = 0x10, + ENUM_NEG_HEX_LITERAL = -0x10, + ENUM_DEC_LITERAL = 10, + ENUM_NEG_DEC_LITERAL = -10, +}; + +// Edge cases: minimum i64s + +const long long MIN_I64_BIN = -0b1000000000000000000000000000000000000000000000000000000000000000; +const long long MIN_I64_OCT = -01000000000000000000000; +const long long MIN_I64_DEC = -9223372036854775808; +const long long MIN_I64_HEX = -0x8000000000000000; + +// Big B or big X + +const int BIG_B_BIN = 0B1; +const int BIG_X_HEX = 0XF; + +// Octal with extra leading zero + +const char AGENT = 007; + +// C23 and C++14 thousands'/digit separator ' + +const unsigned long long SEP_BIN = 0b11111111'00000000; +const unsigned long long SEP_OCT = 07777'7777'7777; +const unsigned long long SEP_DEC = 299'792'458; +const unsigned long long SEP_HEX = 0x1111'bbbb'cccc'dddd; + +// Multiple declarations + +const long BIN_1ST = 0b10101010, OCT_2ND = 0777, DEC_3RD = 1234, HEX_4TH = 0xffff; diff --git a/bindgen/clang.rs b/bindgen/clang.rs index e52fed0d4a..860ec7fa55 100644 --- a/bindgen/clang.rs +++ b/bindgen/clang.rs @@ -5,6 +5,7 @@ #![deny(clippy::missing_docs_in_private_items)] use crate::ir::context::BindgenContext; +use crate::ir::var::LiteralRadix; use clang_sys::*; use std::cmp; @@ -973,6 +974,47 @@ impl Cursor { pub(crate) fn is_inline_namespace(&self) -> bool { unsafe { clang_Cursor_isInlineNamespace(self.x) != 0 } } + + /// Obtain the number base (radix) of an integer literal definition corresponding to the cursor. + /// + /// Returns `None` if unable to infer a base. + pub(crate) fn get_literal_radix(&self) -> Option { + self.tokens().iter().find_map(|token| { + if token.kind == CXToken_Literal { + LiteralRadix::from_integer_literal_token(token.spelling()) + } else { + None + } + }) + } + + /// Obtain the number base (radix) of an integer literal definition corresponding to the cursor, + /// ensuring that the radix is from the literal following a given identifier in the list of + /// tokens. + /// + /// Returns `None` if unable to infer a base. + pub(crate) fn get_literal_radix_of_identifier( + &self, + identifier: &str, + ) -> Option { + self.tokens() + .iter() + .scan(false, |identifier_found, token| { + if token.kind == CXToken_Identifier && + token.spelling() == identifier.as_bytes() + { + *identifier_found = true; + } + Some((*identifier_found, token)) + }) + .find_map(|(identifier_found, token)| { + if identifier_found && token.kind == CXToken_Literal { + LiteralRadix::from_integer_literal_token(token.spelling()) + } else { + None + } + }) + } } /// A struct that owns the tokenizer result from a given cursor. diff --git a/bindgen/codegen/helpers.rs b/bindgen/codegen/helpers.rs index 82172f3488..c1f1668156 100644 --- a/bindgen/codegen/helpers.rs +++ b/bindgen/codegen/helpers.rs @@ -139,6 +139,7 @@ pub(crate) mod ast_ty { use crate::ir::function::FunctionSig; use crate::ir::layout::Layout; use crate::ir::ty::{FloatKind, IntKind}; + use crate::ir::var::LiteralRadix; use crate::RustTarget; use proc_macro2::TokenStream; use std::str::FromStr; @@ -291,16 +292,50 @@ pub(crate) mod ast_ty { } } - pub(crate) fn int_expr(val: i64) -> TokenStream { + fn integer_with_radix( + val: u64, + is_negative: bool, + radix: &LiteralRadix, + ) -> TokenStream { + let sign = if is_negative { "-" } else { "" }; + let val = match radix { + LiteralRadix::Binary => format!("{sign}0b{val:b}"), + LiteralRadix::Octal => format!("{sign}0o{val:o}"), + LiteralRadix::Hexadecimal => format!("{sign}0x{val:x}"), + LiteralRadix::Decimal => format!("{sign}{val}"), + }; + TokenStream::from_str(val.as_str()) + .expect("val was infallibly constructed") + } + + pub(crate) fn int_expr( + val: i64, + radix: Option<&LiteralRadix>, + ) -> TokenStream { // Don't use quote! { #val } because that adds the type suffix. - let val = proc_macro2::Literal::i64_unsuffixed(val); - quote!(#val) + match radix { + None | Some(LiteralRadix::Decimal) => { + let val = proc_macro2::Literal::i64_unsuffixed(val); + quote!(#val) + } + Some(radix) => { + integer_with_radix(val.unsigned_abs(), val.is_negative(), radix) + } + } } - pub(crate) fn uint_expr(val: u64) -> TokenStream { + pub(crate) fn uint_expr( + val: u64, + radix: Option<&LiteralRadix>, + ) -> TokenStream { // Don't use quote! { #val } because that adds the type suffix. - let val = proc_macro2::Literal::u64_unsuffixed(val); - quote!(#val) + match radix { + None | Some(LiteralRadix::Decimal) => { + let val = proc_macro2::Literal::u64_unsuffixed(val); + quote!(#val) + } + Some(radix) => integer_with_radix(val, false, radix), + } } pub(crate) fn cstr_expr(mut string: String) -> TokenStream { @@ -392,4 +427,182 @@ pub(crate) mod ast_ty { }) .collect() } + + #[cfg(test)] + mod test { + use super::*; + + #[test] + fn integer_with_radix_outputs_correct_tokens() { + use super::LiteralRadix as R; + struct Ar { + v: u64, + n: bool, + r: R, + } + let inputs_and_expected_results = &[ + (Ar { v: 0b0, n: false, r: R::Binary }, quote! { 0b0 }), + (Ar { v: 0o0, n: false, r: R::Octal }, quote! { 0o0 }), + (Ar { v: 0, n: false, r: R::Decimal }, quote! { 0 }), + (Ar { v: 0x0, n: false, r: R::Hexadecimal }, quote! { 0x0 }), + + (Ar { v: 0b1, n: false, r: R::Binary }, quote! { 0b1 }), + (Ar { v: 0o1, n: false, r: R::Octal }, quote! { 0o1 }), + (Ar { v: 1, n: false, r: R::Decimal }, quote! { 1 }), + (Ar { v: 0x1, n: false, r: R::Hexadecimal }, quote! { 0x1 }), + + (Ar { v: 0b1, n: true, r: R::Binary }, quote! { -0b1 }), + (Ar { v: 0o1, n: true, r: R::Octal }, quote! { -0o1 }), + (Ar { v: 1, n: true, r: R::Decimal }, quote! { -1 }), + (Ar { v: 0x1, n: true, r: R::Hexadecimal }, quote! { -0x1 }), + + (Ar { v: 0b1000000000000000000000000000000000000000000000000000000000000000, n: false, r: R::Binary }, quote! { 0b1000000000000000000000000000000000000000000000000000000000000000 }), + (Ar { v: 0o1000000000000000000000, n: false, r: R::Octal }, quote! { 0o1000000000000000000000 }), + (Ar { v: 9223372036854775808, n: false, r: R::Decimal }, quote! { 9223372036854775808 }), + (Ar { v: 0x8000000000000000, n: false, r: R::Hexadecimal }, quote! { 0x8000000000000000 }), + + (Ar { v: 0b1000000000000000000000000000000000000000000000000000000000000000, n: true, r: R::Binary }, quote! { -0b1000000000000000000000000000000000000000000000000000000000000000 }), + (Ar { v: 0o1000000000000000000000, n: true, r: R::Octal }, quote! { -0o1000000000000000000000 }), + (Ar { v: 9223372036854775808, n: true, r: R::Decimal }, quote! { -9223372036854775808 }), + (Ar { v: 0x8000000000000000, n: true, r: R::Hexadecimal }, quote! { -0x8000000000000000 }), + + (Ar { v: u64::MAX, n: false, r: R::Binary }, quote! { 0b1111111111111111111111111111111111111111111111111111111111111111 }), + (Ar { v: u64::MAX, n: false, r: R::Octal }, quote! { 0o1777777777777777777777 }), + (Ar { v: u64::MAX, n: false, r: R::Decimal }, quote! { 18446744073709551615 }), + (Ar { v: u64::MAX, n: false, r: R::Hexadecimal }, quote! { 0xffffffffffffffff }), + ]; + for (i, e) in inputs_and_expected_results { + assert_eq!( + integer_with_radix(i.v, i.n, &i.r).to_string(), + e.to_string() + ); + } + } + + #[test] + fn int_expr_outputs_correct_tokens() { + use super::LiteralRadix as R; + let values_and_expected_results = &[ + ( + 0, + ( + quote! { 0b0 }, + quote! { 0o0 }, + quote! { 0 }, + quote! { 0x0 }, + ), + ), + ( + 1, + ( + quote! { 0b1 }, + quote! { 0o1 }, + quote! { 1 }, + quote! { 0x1 }, + ), + ), + ( + -1, + ( + quote! { -0b1 }, + quote! { -0o1 }, + quote! { -1 }, + quote! { -0x1 }, + ), + ), + ( + i64::MIN, + ( + quote! { -0b1000000000000000000000000000000000000000000000000000000000000000 }, + quote! { -0o1000000000000000000000 }, + quote! { -9223372036854775808 }, + quote! { -0x8000000000000000 }, + ), + ), + ( + i64::MAX, + ( + quote! { 0b111111111111111111111111111111111111111111111111111111111111111 }, + quote! { 0o777777777777777777777 }, + quote! { 9223372036854775807 }, + quote! { 0x7fffffffffffffff }, + ), + ), + ]; + + for (val, e) in values_and_expected_results { + assert_eq!( + int_expr(*val, Some(&R::Binary)).to_string(), + e.0.to_string() + ); + assert_eq!( + int_expr(*val, Some(&R::Octal)).to_string(), + e.1.to_string() + ); + assert_eq!(int_expr(*val, None).to_string(), e.2.to_string()); + assert_eq!( + int_expr(*val, Some(&R::Decimal)).to_string(), + e.2.to_string() + ); + assert_eq!( + int_expr(*val, Some(&R::Hexadecimal)).to_string(), + e.3.to_string() + ); + } + } + + #[test] + fn uint_expr_outputs_correct_tokens() { + use super::LiteralRadix as R; + let values_and_expected_results = &[ + ( + 0, + ( + quote! { 0b0 }, + quote! { 0o0 }, + quote! { 0 }, + quote! { 0x0 }, + ), + ), + ( + 1, + ( + quote! { 0b1 }, + quote! { 0o1 }, + quote! { 1 }, + quote! { 0x1 }, + ), + ), + ( + u64::MAX, + ( + quote! { 0b1111111111111111111111111111111111111111111111111111111111111111 }, + quote! { 0o1777777777777777777777 }, + quote! { 18446744073709551615 }, + quote! { 0xffffffffffffffff }, + ), + ), + ]; + + for (val, e) in values_and_expected_results { + assert_eq!( + uint_expr(*val, Some(&R::Binary)).to_string(), + e.0.to_string() + ); + assert_eq!( + uint_expr(*val, Some(&R::Octal)).to_string(), + e.1.to_string() + ); + assert_eq!(uint_expr(*val, None).to_string(), e.2.to_string()); + assert_eq!( + uint_expr(*val, Some(&R::Decimal)).to_string(), + e.2.to_string() + ); + assert_eq!( + uint_expr(*val, Some(&R::Hexadecimal)).to_string(), + e.3.to_string() + ); + } + } + } } diff --git a/bindgen/codegen/mod.rs b/bindgen/codegen/mod.rs index 5425962bac..e969484500 100644 --- a/bindgen/codegen/mod.rs +++ b/bindgen/codegen/mod.rs @@ -691,6 +691,7 @@ impl CodeGenerator for Var { }); } VarType::Int(val) => { + let radix = self.radix(); let int_kind = var_ty .into_resolver() .through_type_aliases() @@ -700,9 +701,9 @@ impl CodeGenerator for Var { .as_integer() .unwrap(); let val = if int_kind.is_signed() { - helpers::ast_ty::int_expr(val) + helpers::ast_ty::int_expr(val, radix) } else { - helpers::ast_ty::uint_expr(val as _) + helpers::ast_ty::uint_expr(val as _, radix) }; result.push(quote! { #(#attrs)* @@ -2430,7 +2431,7 @@ impl CodeGenerator for CompInfo { }; fields.insert(0, align_field); } else { - let explicit = helpers::ast_ty::int_expr(explicit as i64); + let explicit = helpers::ast_ty::int_expr(explicit as i64, None); attributes.push(quote! { #[repr(align(#explicit))] }); @@ -3368,11 +3369,15 @@ impl EnumBuilder { let is_rust_enum = self.is_rust_enum(); let expr = match variant.val() { EnumVariantValue::Boolean(v) if is_rust_enum => { - helpers::ast_ty::uint_expr(u64::from(v)) + helpers::ast_ty::uint_expr(u64::from(v), None) } EnumVariantValue::Boolean(v) => quote!(#v), - EnumVariantValue::Signed(v) => helpers::ast_ty::int_expr(v), - EnumVariantValue::Unsigned(v) => helpers::ast_ty::uint_expr(v), + EnumVariantValue::Signed(v) => { + helpers::ast_ty::int_expr(v, variant.radix()) + } + EnumVariantValue::Unsigned(v) => { + helpers::ast_ty::uint_expr(v, variant.radix()) + } }; match self.kind { diff --git a/bindgen/ir/enum_ty.rs b/bindgen/ir/enum_ty.rs index 9b08da3bce..9bf85764ff 100644 --- a/bindgen/ir/enum_ty.rs +++ b/bindgen/ir/enum_ty.rs @@ -6,6 +6,7 @@ use super::item::Item; use super::ty::{Type, TypeKind}; use crate::clang; use crate::ir::annotations::Annotations; +use crate::ir::var::LiteralRadix; use crate::parse::ParseError; use crate::regex_set::RegexSet; @@ -103,6 +104,7 @@ impl Enum { }; if let Some(val) = value { let name = cursor.spelling(); + let radix = cursor.get_literal_radix(); let annotations = Annotations::new(&cursor); let custom_behavior = ctx .options() @@ -142,6 +144,7 @@ impl Enum { comment, val, custom_behavior, + radix, )); } } @@ -254,6 +257,9 @@ pub(crate) struct EnumVariant { /// The custom behavior this variant may have, if any. custom_behavior: Option, + + /// The radix of the literal value of the variant. + radix: Option, } /// A constant value assigned to an enumeration variant. @@ -277,6 +283,7 @@ impl EnumVariant { comment: Option, val: EnumVariantValue, custom_behavior: Option, + radix: Option, ) -> Self { EnumVariant { name, @@ -284,6 +291,7 @@ impl EnumVariant { comment, val, custom_behavior, + radix, } } @@ -302,6 +310,11 @@ impl EnumVariant { self.val } + /// Get this variant's radix. + pub(crate) fn radix(&self) -> Option<&LiteralRadix> { + self.radix.as_ref() + } + /// Get this variant's documentation. pub(crate) fn comment(&self) -> Option<&str> { self.comment.as_deref() diff --git a/bindgen/ir/var.rs b/bindgen/ir/var.rs index 45f4ba1ba0..fd8bc680de 100644 --- a/bindgen/ir/var.rs +++ b/bindgen/ir/var.rs @@ -30,6 +30,107 @@ pub(crate) enum VarType { String(Vec), } +/// Numeric literal's radix. +#[derive(Debug)] +pub(crate) enum LiteralRadix { + /// Binary (base 2). + Binary, + /// Octal (base 8). + Octal, + /// Decimal (base 10). + Decimal, + /// Hexadecimal (base 16). + Hexadecimal, +} + +/// Possible integer literal suffixes, all cases (U|u)?(L|l){0,2}, from longest to +/// shortest in number of characters. +#[rustfmt::skip] // hand-formatted for clarity +const INTEGER_SUFFIXES: [&str; 21] = [ + "ULL", "ULl", "UlL", "Ull", "uLL", "uLl", "ulL", "ull", + "UL", "Ul", "uL", "ul", + "LL", "Ll", "lL", "ll", + "U", "u", + "L", "l", + "", +]; + +impl LiteralRadix { + /// Obtain the number base of a bytestring corresponding to an existing integer + /// literal definition. + /// + /// Returns `None` if unable to infer a base. + pub(crate) fn from_integer_literal_token( + tok: impl AsRef<[u8]>, + ) -> Option { + let tok = tok.as_ref(); + + // Strip integer suffix (e.g. ULL) if exists + let tok = INTEGER_SUFFIXES + .iter() + .find_map(|suffix| tok.strip_suffix::<[u8]>(suffix.as_ref())) + .expect("last suffix is empty bytestring"); + + if tok.is_empty() { + return None; + } else if tok.len() == 1 { + // single digit numbers from 0 to 9 are always decimal + return tok[0].is_ascii_digit().then_some(Self::Decimal); + } + + match tok[0] { + b'0' => match tok[1] { + b'x' | b'X' => { + if tok.len() < 3 { + None // "0x" without actual value + } else { + if tok[2] == b'\'' { + return None; + } + // hexadecimal value: 0-9, a-f, A-F + tok[2..] + .iter() + .all(|chr| chr.is_ascii_hexdigit() || chr == &b'\'') + .then_some(Self::Hexadecimal) + } + } + b'b' | b'B' => { + if tok.len() < 3 { + None // "0b" without actual value + } else { + if tok[2] == b'\'' { + return None; + } + // binary value: zeros and ones + tok[2..] + .iter() + .all(|chr| { + (b'0'..=b'1').contains(chr) || chr == &b'\'' + }) + .then_some(Self::Binary) + } + } + b'0'..=b'7' => { + // octal value: digits 0 to 7 (incl.) + tok[2..] + .iter() + .all(|chr| (b'0'..=b'7').contains(chr) || chr == &b'\'') + .then_some(Self::Octal) + } + _ => None, + }, + b'1'..=b'9' => { + // decimal value: digits 0 to 9 (incl.) + tok[1..] + .iter() + .all(|chr| chr.is_ascii_digit() || chr == &b'\'') + .then_some(Self::Decimal) + } + _ => None, + } + } +} + /// A `Var` is our intermediate representation of a variable. #[derive(Debug)] pub(crate) struct Var { @@ -45,6 +146,8 @@ pub(crate) struct Var { val: Option, /// Whether this variable is const. is_const: bool, + /// The radix of the variable, if integer. + radix: Option, } impl Var { @@ -56,6 +159,7 @@ impl Var { ty: TypeId, val: Option, is_const: bool, + radix: Option, ) -> Var { assert!(!name.is_empty()); Var { @@ -65,9 +169,15 @@ impl Var { ty, val, is_const, + radix, } } + /// The radix of this integer variable, if any. + pub(crate) fn radix(&self) -> Option<&LiteralRadix> { + self.radix.as_ref() + } + /// Is this variable `const` qualified? pub(crate) fn is_const(&self) -> bool { self.is_const @@ -223,11 +333,13 @@ impl ClangSubItemParser for Var { // enforce utf8 there, so we should have already panicked at // this point. let name = String::from_utf8(id).unwrap(); - let (type_kind, val) = match value { + let (type_kind, val, radix) = match value { EvalResult::Invalid => return Err(ParseError::Continue), - EvalResult::Float(f) => { - (TypeKind::Float(FloatKind::Double), VarType::Float(f)) - } + EvalResult::Float(f) => ( + TypeKind::Float(FloatKind::Double), + VarType::Float(f), + None, + ), EvalResult::Char(c) => { let c = match c { CChar::Char(c) => { @@ -237,7 +349,7 @@ impl ClangSubItemParser for Var { CChar::Raw(c) => u8::try_from(c).unwrap(), }; - (TypeKind::Int(IntKind::U8), VarType::Char(c)) + (TypeKind::Int(IntKind::U8), VarType::Char(c), None) } EvalResult::Str(val) => { let char_ty = Item::builtin_type( @@ -248,7 +360,7 @@ impl ClangSubItemParser for Var { for callbacks in &ctx.options().parse_callbacks { callbacks.str_macro(&name, &val); } - (TypeKind::Pointer(char_ty), VarType::String(val)) + (TypeKind::Pointer(char_ty), VarType::String(val), None) } EvalResult::Int(Wrapping(value)) => { let kind = ctx @@ -258,14 +370,16 @@ impl ClangSubItemParser for Var { default_macro_constant_type(ctx, value) }); - (TypeKind::Int(kind), VarType::Int(value)) + let radix = cursor.get_literal_radix(); + + (TypeKind::Int(kind), VarType::Int(value), radix) } }; let ty = Item::builtin_type(type_kind, true, ctx); Ok(ParseResult::New( - Var::new(name, None, None, ty, Some(val), true), + Var::new(name, None, None, ty, Some(val), true, radix), Some(cursor), )) } @@ -334,39 +448,51 @@ impl ClangSubItemParser for Var { // TODO: Strings, though the lookup is a bit more hard (we need // to look at the canonical type of the pointee too, and check // is char, u8, or i8 I guess). - let value = if is_integer { + let (value, radix) = if is_integer { let TypeKind::Int(kind) = *canonical_ty.unwrap().kind() else { unreachable!() }; let mut val = cursor.evaluate().and_then(|v| v.as_int()); + let radix = cursor.get_literal_radix_of_identifier(&name); + if val.is_none() || !kind.signedness_matches(val.unwrap()) { val = get_integer_literal_from_cursor(&cursor); } - val.map(|val| { - if kind == IntKind::Bool { - VarType::Bool(val != 0) - } else { - VarType::Int(val) - } - }) + ( + val.map(|val| { + if kind == IntKind::Bool { + VarType::Bool(val != 0) + } else { + VarType::Int(val) + } + }), + radix, + ) } else if is_float { - cursor - .evaluate() - .and_then(|v| v.as_double()) - .map(VarType::Float) + ( + cursor + .evaluate() + .and_then(|v| v.as_double()) + .map(VarType::Float), + None, + ) } else { - cursor - .evaluate() - .and_then(|v| v.as_literal_string()) - .map(VarType::String) + ( + cursor + .evaluate() + .and_then(|v| v.as_literal_string()) + .map(VarType::String), + None, + ) }; let mangling = cursor_mangling(ctx, &cursor); - let var = - Var::new(name, mangling, link_name, ty, value, is_const); + let var = Var::new( + name, mangling, link_name, ty, value, is_const, radix, + ); Ok(ParseResult::New(var, Some(cursor))) } @@ -521,3 +647,92 @@ fn duplicated_macro_diagnostic( .display(); } } + +#[cfg(test)] +mod test { + use super::*; + + impl PartialEq for LiteralRadix { + fn eq(&self, other: &Self) -> bool { + core::mem::discriminant(self) == core::mem::discriminant(other) + } + } + + #[test] + fn parses_correct_radix_from_valid_raw_token() { + let raw_tok_radix_pairs: &[(&[u8], Option)] = &[ + (b"0", Some(LiteralRadix::Decimal)), + (b"1", Some(LiteralRadix::Decimal)), + (b"18446744073709551615", Some(LiteralRadix::Decimal)), // u64::MAX + (b"9223372036854775808", Some(LiteralRadix::Decimal)), // non-prefixed i64::MIN + (b"0b0", Some(LiteralRadix::Binary)), + (b"0b1", Some(LiteralRadix::Binary)), + (b"0B1", Some(LiteralRadix::Binary)), + ( + b"0b10000000'00000000'00000000'00000000", + Some(LiteralRadix::Binary), + ), + (b"00", Some(LiteralRadix::Octal)), + (b"01", Some(LiteralRadix::Octal)), + (b"0x0", Some(LiteralRadix::Hexadecimal)), + (b"0x1", Some(LiteralRadix::Hexadecimal)), + (b"0X1", Some(LiteralRadix::Hexadecimal)), + (b"0ULL", Some(LiteralRadix::Decimal)), + (b"10UL", Some(LiteralRadix::Decimal)), + (b"100L", Some(LiteralRadix::Decimal)), + (b"0b0ULL", Some(LiteralRadix::Binary)), + (b"0b10UL", Some(LiteralRadix::Binary)), + (b"0b100L", Some(LiteralRadix::Binary)), + (b"00ULL", Some(LiteralRadix::Octal)), + (b"010UL", Some(LiteralRadix::Octal)), + (b"0100L", Some(LiteralRadix::Octal)), + (b"0x0ULL", Some(LiteralRadix::Hexadecimal)), + (b"0x10UL", Some(LiteralRadix::Hexadecimal)), + (b"0x100L", Some(LiteralRadix::Hexadecimal)), + ]; + + for (tok, radix) in raw_tok_radix_pairs { + assert_eq!( + LiteralRadix::from_integer_literal_token(tok), + *radix, + "tok = b\"{}\"", + std::str::from_utf8(tok).unwrap() + ); + } + } + + #[test] + fn parses_correct_radix_from_valid_str() { + assert_eq!( + LiteralRadix::from_integer_literal_token("0xf00d"), + Some(LiteralRadix::Hexadecimal), + "tok = \"0xf00d\"" + ); + } + + #[test] + fn parses_none_radix_from_invalid_raw_token() { + let raw_toks: &[&[u8]] = + &[b"", b"0b", b"0b2", b"0x", b"A", b"f", b"0x'", b"0b'"]; + + for tok in raw_toks { + assert_eq!( + LiteralRadix::from_integer_literal_token(tok), + None, + "tok = b\"{}\"", + std::str::from_utf8(tok).unwrap() + ); + } + } + + #[test] + fn parses_none_radix_from_lone_integer_suffixes() { + for suffix in INTEGER_SUFFIXES { + assert_eq!( + LiteralRadix::from_integer_literal_token(suffix), + None, + "tok = \"{suffix}\"" + ); + } + } +} From 65f3b5dbdc53fbe2b8ded6a84cd4ae28b5681b62 Mon Sep 17 00:00:00 2001 From: Miikka Salminen Date: Sat, 21 Jun 2025 21:18:58 +0300 Subject: [PATCH 2/3] To Be Squashed Use the different radix literals for integers that were used in the original C and C++ header files, now that it is supported. The new values were confirmed by hand to be the same as the original ones - they are just represented using the Rust equivalent of the original form. --- .../expectations/tests/class_static_const.rs | 4 +-- .../expectations/tests/constant-evaluate.rs | 2 +- .../default-macro-constant-type-signed.rs | 8 ++--- .../default-macro-constant-type-unsigned.rs | 8 ++--- .../tests/default-macro-constant-type.rs | 8 ++--- .../tests/fit-macro-constant-types-signed.rs | 8 ++--- .../tests/fit-macro-constant-types.rs | 8 ++--- .../expectations/tests/jsval_layout_opaque.rs | 30 +++++++++---------- .../expectations/tests/layout_eth_conf.rs | 6 ++-- .../expectations/tests/overflowed_enum.rs | 6 ++-- .../tests/prepend-enum-constified-variant.rs | 2 +- .../tests/expectations/tests/short-enums.rs | 6 ++-- .../expectations/tests/wrap-static-fns.rs | 2 +- 13 files changed, 49 insertions(+), 49 deletions(-) diff --git a/bindgen-tests/tests/expectations/tests/class_static_const.rs b/bindgen-tests/tests/expectations/tests/class_static_const.rs index d628239c4c..c09d181d0f 100644 --- a/bindgen-tests/tests/expectations/tests/class_static_const.rs +++ b/bindgen-tests/tests/expectations/tests/class_static_const.rs @@ -5,8 +5,8 @@ pub struct A { pub _address: u8, } pub const A_a: ::std::os::raw::c_int = 0; -pub const A_b: i32 = 63; -pub const A_c: u32 = 255; +pub const A_b: i32 = 0o77; +pub const A_c: u32 = 0xff; #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of A"][::std::mem::size_of::() - 1usize]; diff --git a/bindgen-tests/tests/expectations/tests/constant-evaluate.rs b/bindgen-tests/tests/expectations/tests/constant-evaluate.rs index bbcf6d5450..8506729427 100644 --- a/bindgen-tests/tests/expectations/tests/constant-evaluate.rs +++ b/bindgen-tests/tests/expectations/tests/constant-evaluate.rs @@ -8,7 +8,7 @@ pub enum _bindgen_ty_1 { bar = 8, } pub type EasyToOverflow = ::std::os::raw::c_ulonglong; -pub const k: EasyToOverflow = 2147483648; +pub const k: EasyToOverflow = 0x80000000; pub const k_expr: EasyToOverflow = 1152921504606846976; pub const wow: EasyToOverflow = 2147483648; pub const BAZ: ::std::os::raw::c_longlong = 24; diff --git a/bindgen-tests/tests/expectations/tests/default-macro-constant-type-signed.rs b/bindgen-tests/tests/expectations/tests/default-macro-constant-type-signed.rs index 7fca57b6b9..ec0032fc2c 100644 --- a/bindgen-tests/tests/expectations/tests/default-macro-constant-type-signed.rs +++ b/bindgen-tests/tests/expectations/tests/default-macro-constant-type-signed.rs @@ -4,8 +4,8 @@ pub const N1: i32 = 1; pub const N2: i32 = 2; pub const N_1: i32 = -1; pub const N_2: i32 = -2; -pub const MAX_U16: i32 = 65535; -pub const MAX_I16: i32 = 32767; +pub const MAX_U16: i32 = 0xffff; +pub const MAX_I16: i32 = 0x7fff; pub const MAX_I16_Plus1: i32 = 32768; pub const MAX_U16_Plus1: i32 = 65536; pub const MAX_I16_Minus1: i32 = 32766; @@ -16,8 +16,8 @@ pub const MIN_U16_Plus1: i32 = 1; pub const MIN_I16_Plus1: i32 = -32767; pub const MIN_U16_Minus1: i32 = -1; pub const MIN_I16_Minus1: i32 = -32769; -pub const MAX_U32: i64 = 4294967295; -pub const MAX_I32: i32 = 2147483647; +pub const MAX_U32: i64 = 0xffffffff; +pub const MAX_I32: i32 = 0x7fffffff; pub const MAX_I32_Plus1: i64 = 2147483648; pub const MAX_U32_Plus1: i64 = 4294967296; pub const MAX_I32_Minus1: i32 = 2147483646; diff --git a/bindgen-tests/tests/expectations/tests/default-macro-constant-type-unsigned.rs b/bindgen-tests/tests/expectations/tests/default-macro-constant-type-unsigned.rs index d34d050a1a..f492b6bfc4 100644 --- a/bindgen-tests/tests/expectations/tests/default-macro-constant-type-unsigned.rs +++ b/bindgen-tests/tests/expectations/tests/default-macro-constant-type-unsigned.rs @@ -4,8 +4,8 @@ pub const N1: u32 = 1; pub const N2: u32 = 2; pub const N_1: i32 = -1; pub const N_2: i32 = -2; -pub const MAX_U16: u32 = 65535; -pub const MAX_I16: u32 = 32767; +pub const MAX_U16: u32 = 0xffff; +pub const MAX_I16: u32 = 0x7fff; pub const MAX_I16_Plus1: u32 = 32768; pub const MAX_U16_Plus1: u32 = 65536; pub const MAX_I16_Minus1: u32 = 32766; @@ -16,8 +16,8 @@ pub const MIN_U16_Plus1: u32 = 1; pub const MIN_I16_Plus1: i32 = -32767; pub const MIN_U16_Minus1: i32 = -1; pub const MIN_I16_Minus1: i32 = -32769; -pub const MAX_U32: u32 = 4294967295; -pub const MAX_I32: u32 = 2147483647; +pub const MAX_U32: u32 = 0xffffffff; +pub const MAX_I32: u32 = 0x7fffffff; pub const MAX_I32_Plus1: u32 = 2147483648; pub const MAX_U32_Plus1: u64 = 4294967296; pub const MAX_I32_Minus1: u32 = 2147483646; diff --git a/bindgen-tests/tests/expectations/tests/default-macro-constant-type.rs b/bindgen-tests/tests/expectations/tests/default-macro-constant-type.rs index d34d050a1a..f492b6bfc4 100644 --- a/bindgen-tests/tests/expectations/tests/default-macro-constant-type.rs +++ b/bindgen-tests/tests/expectations/tests/default-macro-constant-type.rs @@ -4,8 +4,8 @@ pub const N1: u32 = 1; pub const N2: u32 = 2; pub const N_1: i32 = -1; pub const N_2: i32 = -2; -pub const MAX_U16: u32 = 65535; -pub const MAX_I16: u32 = 32767; +pub const MAX_U16: u32 = 0xffff; +pub const MAX_I16: u32 = 0x7fff; pub const MAX_I16_Plus1: u32 = 32768; pub const MAX_U16_Plus1: u32 = 65536; pub const MAX_I16_Minus1: u32 = 32766; @@ -16,8 +16,8 @@ pub const MIN_U16_Plus1: u32 = 1; pub const MIN_I16_Plus1: i32 = -32767; pub const MIN_U16_Minus1: i32 = -1; pub const MIN_I16_Minus1: i32 = -32769; -pub const MAX_U32: u32 = 4294967295; -pub const MAX_I32: u32 = 2147483647; +pub const MAX_U32: u32 = 0xffffffff; +pub const MAX_I32: u32 = 0x7fffffff; pub const MAX_I32_Plus1: u32 = 2147483648; pub const MAX_U32_Plus1: u64 = 4294967296; pub const MAX_I32_Minus1: u32 = 2147483646; diff --git a/bindgen-tests/tests/expectations/tests/fit-macro-constant-types-signed.rs b/bindgen-tests/tests/expectations/tests/fit-macro-constant-types-signed.rs index d4ad5e0fcc..9d3588473b 100644 --- a/bindgen-tests/tests/expectations/tests/fit-macro-constant-types-signed.rs +++ b/bindgen-tests/tests/expectations/tests/fit-macro-constant-types-signed.rs @@ -4,8 +4,8 @@ pub const N1: i8 = 1; pub const N2: i8 = 2; pub const N_1: i8 = -1; pub const N_2: i8 = -2; -pub const MAX_U16: i32 = 65535; -pub const MAX_I16: i16 = 32767; +pub const MAX_U16: i32 = 0xffff; +pub const MAX_I16: i16 = 0x7fff; pub const MAX_I16_Plus1: i32 = 32768; pub const MAX_U16_Plus1: i32 = 65536; pub const MAX_I16_Minus1: i16 = 32766; @@ -16,8 +16,8 @@ pub const MIN_U16_Plus1: i8 = 1; pub const MIN_I16_Plus1: i16 = -32767; pub const MIN_U16_Minus1: i8 = -1; pub const MIN_I16_Minus1: i32 = -32769; -pub const MAX_U32: i64 = 4294967295; -pub const MAX_I32: i32 = 2147483647; +pub const MAX_U32: i64 = 0xffffffff; +pub const MAX_I32: i32 = 0x7fffffff; pub const MAX_I32_Plus1: i64 = 2147483648; pub const MAX_U32_Plus1: i64 = 4294967296; pub const MAX_I32_Minus1: i32 = 2147483646; diff --git a/bindgen-tests/tests/expectations/tests/fit-macro-constant-types.rs b/bindgen-tests/tests/expectations/tests/fit-macro-constant-types.rs index 5542a645da..0c8d33c493 100644 --- a/bindgen-tests/tests/expectations/tests/fit-macro-constant-types.rs +++ b/bindgen-tests/tests/expectations/tests/fit-macro-constant-types.rs @@ -4,8 +4,8 @@ pub const N1: u8 = 1; pub const N2: u8 = 2; pub const N_1: i8 = -1; pub const N_2: i8 = -2; -pub const MAX_U16: u16 = 65535; -pub const MAX_I16: u16 = 32767; +pub const MAX_U16: u16 = 0xffff; +pub const MAX_I16: u16 = 0x7fff; pub const MAX_I16_Plus1: u16 = 32768; pub const MAX_U16_Plus1: u32 = 65536; pub const MAX_I16_Minus1: u16 = 32766; @@ -16,8 +16,8 @@ pub const MIN_U16_Plus1: u8 = 1; pub const MIN_I16_Plus1: i16 = -32767; pub const MIN_U16_Minus1: i8 = -1; pub const MIN_I16_Minus1: i32 = -32769; -pub const MAX_U32: u32 = 4294967295; -pub const MAX_I32: u32 = 2147483647; +pub const MAX_U32: u32 = 0xffffffff; +pub const MAX_I32: u32 = 0x7fffffff; pub const MAX_I32_Plus1: u32 = 2147483648; pub const MAX_U32_Plus1: u64 = 4294967296; pub const MAX_I32_Minus1: u32 = 2147483646; diff --git a/bindgen-tests/tests/expectations/tests/jsval_layout_opaque.rs b/bindgen-tests/tests/expectations/tests/jsval_layout_opaque.rs index dc0ef8ed7f..6be6a4d2f7 100644 --- a/bindgen-tests/tests/expectations/tests/jsval_layout_opaque.rs +++ b/bindgen-tests/tests/expectations/tests/jsval_layout_opaque.rs @@ -146,27 +146,27 @@ where } } pub const JSVAL_TAG_SHIFT: u32 = 47; -pub const JSVAL_PAYLOAD_MASK: u64 = 140737488355327; -pub const JSVAL_TAG_MASK: i64 = -140737488355328; +pub const JSVAL_PAYLOAD_MASK: u64 = 0x7fffffffffff; +pub const JSVAL_TAG_MASK: i64 = -0x800000000000; #[repr(u8)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum JSValueType { - JSVAL_TYPE_DOUBLE = 0, - JSVAL_TYPE_INT32 = 1, - JSVAL_TYPE_UNDEFINED = 2, - JSVAL_TYPE_BOOLEAN = 3, - JSVAL_TYPE_MAGIC = 4, - JSVAL_TYPE_STRING = 5, - JSVAL_TYPE_SYMBOL = 6, - JSVAL_TYPE_NULL = 7, - JSVAL_TYPE_OBJECT = 8, - JSVAL_TYPE_UNKNOWN = 32, - JSVAL_TYPE_MISSING = 33, + JSVAL_TYPE_DOUBLE = 0x0, + JSVAL_TYPE_INT32 = 0x1, + JSVAL_TYPE_UNDEFINED = 0x2, + JSVAL_TYPE_BOOLEAN = 0x3, + JSVAL_TYPE_MAGIC = 0x4, + JSVAL_TYPE_STRING = 0x5, + JSVAL_TYPE_SYMBOL = 0x6, + JSVAL_TYPE_NULL = 0x7, + JSVAL_TYPE_OBJECT = 0x8, + JSVAL_TYPE_UNKNOWN = 0x20, + JSVAL_TYPE_MISSING = 0x21, } #[repr(u32)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum JSValueTag { - JSVAL_TAG_MAX_DOUBLE = 131056, + JSVAL_TAG_MAX_DOUBLE = 0x1fff0, JSVAL_TAG_INT32 = 131057, JSVAL_TAG_UNDEFINED = 131058, JSVAL_TAG_STRING = 131061, @@ -179,7 +179,7 @@ pub enum JSValueTag { #[repr(u64)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum JSValueShiftedTag { - JSVAL_SHIFTED_TAG_MAX_DOUBLE = 18444492278190833663, + JSVAL_SHIFTED_TAG_MAX_DOUBLE = 0xfff80000ffffffff, JSVAL_SHIFTED_TAG_INT32 = 18444633011384221696, JSVAL_SHIFTED_TAG_UNDEFINED = 18444773748872577024, JSVAL_SHIFTED_TAG_STRING = 18445195961337643008, diff --git a/bindgen-tests/tests/expectations/tests/layout_eth_conf.rs b/bindgen-tests/tests/expectations/tests/layout_eth_conf.rs index 2686d8f5fa..b3dee3da4a 100644 --- a/bindgen-tests/tests/expectations/tests/layout_eth_conf.rs +++ b/bindgen-tests/tests/expectations/tests/layout_eth_conf.rs @@ -87,9 +87,9 @@ where } } } -pub const ETH_MQ_RX_RSS_FLAG: u32 = 1; -pub const ETH_MQ_RX_DCB_FLAG: u32 = 2; -pub const ETH_MQ_RX_VMDQ_FLAG: u32 = 4; +pub const ETH_MQ_RX_RSS_FLAG: u32 = 0x1; +pub const ETH_MQ_RX_DCB_FLAG: u32 = 0x2; +pub const ETH_MQ_RX_VMDQ_FLAG: u32 = 0x4; pub const ETH_VMDQ_MAX_VLAN_FILTERS: u32 = 64; pub const ETH_DCB_NUM_USER_PRIORITIES: u32 = 8; pub const ETH_VMDQ_DCB_NUM_QUEUES: u32 = 128; diff --git a/bindgen-tests/tests/expectations/tests/overflowed_enum.rs b/bindgen-tests/tests/expectations/tests/overflowed_enum.rs index 2c67ba6903..24f3057257 100644 --- a/bindgen-tests/tests/expectations/tests/overflowed_enum.rs +++ b/bindgen-tests/tests/expectations/tests/overflowed_enum.rs @@ -2,9 +2,9 @@ #[repr(u32)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum Foo { - BAP_ARM = 9698489, - BAP_X86 = 11960045, - BAP_X86_64 = 3128633167, + BAP_ARM = 0x93fcb9, + BAP_X86 = 0xb67eed, + BAP_X86_64 = 0xba7b274f, } #[repr(u16)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] diff --git a/bindgen-tests/tests/expectations/tests/prepend-enum-constified-variant.rs b/bindgen-tests/tests/expectations/tests/prepend-enum-constified-variant.rs index ff49d684f1..4250e1e1f8 100644 --- a/bindgen-tests/tests/expectations/tests/prepend-enum-constified-variant.rs +++ b/bindgen-tests/tests/expectations/tests/prepend-enum-constified-variant.rs @@ -5,5 +5,5 @@ impl AVCodecID { #[repr(u32)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum AVCodecID { - AV_CODEC_ID_FIRST_UNKNOWN = 98304, + AV_CODEC_ID_FIRST_UNKNOWN = 0x18000, } diff --git a/bindgen-tests/tests/expectations/tests/short-enums.rs b/bindgen-tests/tests/expectations/tests/short-enums.rs index 493bb5b419..9295f5b715 100644 --- a/bindgen-tests/tests/expectations/tests/short-enums.rs +++ b/bindgen-tests/tests/expectations/tests/short-enums.rs @@ -2,15 +2,15 @@ #[repr(u8)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum one_byte_t { - SOME_VALUE = 1, + SOME_VALUE = 0x1, } #[repr(u16)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum two_byte_t { - SOME_OTHER_VALUE = 256, + SOME_OTHER_VALUE = 0x100, } #[repr(u32)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum four_byte_t { - SOME_BIGGER_VALUE = 16777216, + SOME_BIGGER_VALUE = 0x1000000, } diff --git a/bindgen-tests/tests/expectations/tests/wrap-static-fns.rs b/bindgen-tests/tests/expectations/tests/wrap-static-fns.rs index bafcad8a7e..4e054d21aa 100644 --- a/bindgen-tests/tests/expectations/tests/wrap-static-fns.rs +++ b/bindgen-tests/tests/expectations/tests/wrap-static-fns.rs @@ -40,7 +40,7 @@ unsafe extern "C" { arg: *const *const ::std::os::raw::c_int, ) -> ::std::os::raw::c_int; } -pub const foo_BAR: foo = 0; +pub const foo_BAR: foo = 0x0; pub type foo = ::std::os::raw::c_uint; unsafe extern "C" { #[link_name = "takes_enum__extern"] From 83c2034b8e4518b9fe41e59a69d2ad66a9bad69f Mon Sep 17 00:00:00 2001 From: Miikka Salminen Date: Tue, 15 Jul 2025 20:18:28 +0300 Subject: [PATCH 3/3] To Be Squashed - Gate the integer literal radix retaining functionality behind option `keep_integer_radices`, defaulting to `false` for now. - Add the new `--keep-integer-radices` flag to test headers that are affected by it. --- .../tests/headers/class_static_const.hpp | 2 +- bindgen-tests/tests/headers/constant-evaluate.h | 2 +- .../headers/default-macro-constant-type-signed.h | 2 +- .../default-macro-constant-type-unsigned.h | 2 +- .../tests/headers/default-macro-constant-type.h | 1 + .../tests/headers/different_radix_literals.hpp | 2 +- .../headers/fit-macro-constant-types-signed.h | 2 +- .../tests/headers/fit-macro-constant-types.h | 2 +- .../tests/headers/jsval_layout_opaque.hpp | 2 +- bindgen-tests/tests/headers/layout_eth_conf.h | 2 +- bindgen-tests/tests/headers/overflowed_enum.hpp | 2 +- .../headers/prepend-enum-constified-variant.h | 2 +- bindgen-tests/tests/headers/short-enums.hpp | 2 +- bindgen-tests/tests/headers/wrap-static-fns.h | 2 +- bindgen/ir/enum_ty.rs | 6 +++++- bindgen/ir/var.rs | 12 ++++++++++-- bindgen/options/cli.rs | 5 +++++ bindgen/options/mod.rs | 16 ++++++++++++++++ 18 files changed, 50 insertions(+), 16 deletions(-) diff --git a/bindgen-tests/tests/headers/class_static_const.hpp b/bindgen-tests/tests/headers/class_static_const.hpp index 3e320edcbe..d870f7e290 100644 --- a/bindgen-tests/tests/headers/class_static_const.hpp +++ b/bindgen-tests/tests/headers/class_static_const.hpp @@ -1,4 +1,4 @@ -// bindgen-flags: --with-derive-hash --with-derive-partialeq --with-derive-eq +// bindgen-flags: --with-derive-hash --with-derive-partialeq --with-derive-eq --keep-integer-radices using int32_t = int; typedef unsigned int uint32_t; diff --git a/bindgen-tests/tests/headers/constant-evaluate.h b/bindgen-tests/tests/headers/constant-evaluate.h index 812553ed3e..846fe0b072 100644 --- a/bindgen-tests/tests/headers/constant-evaluate.h +++ b/bindgen-tests/tests/headers/constant-evaluate.h @@ -1,5 +1,5 @@ // bindgen-unstable -// bindgen-flags: --rustified-enum ".*" +// bindgen-flags: --rustified-enum ".*" --keep-integer-radices enum { foo = 4, diff --git a/bindgen-tests/tests/headers/default-macro-constant-type-signed.h b/bindgen-tests/tests/headers/default-macro-constant-type-signed.h index da3f134467..769be60e08 100644 --- a/bindgen-tests/tests/headers/default-macro-constant-type-signed.h +++ b/bindgen-tests/tests/headers/default-macro-constant-type-signed.h @@ -1,3 +1,3 @@ -// bindgen-flags: --default-macro-constant-type signed +// bindgen-flags: --default-macro-constant-type signed --keep-integer-radices // All values are i32 if they fit; otherwise i64. #include "default-macro-constant-type.h" diff --git a/bindgen-tests/tests/headers/default-macro-constant-type-unsigned.h b/bindgen-tests/tests/headers/default-macro-constant-type-unsigned.h index 1078e852ee..e22abb2ae3 100644 --- a/bindgen-tests/tests/headers/default-macro-constant-type-unsigned.h +++ b/bindgen-tests/tests/headers/default-macro-constant-type-unsigned.h @@ -1,3 +1,3 @@ -// bindgen-flags: --default-macro-constant-type unsigned +// bindgen-flags: --default-macro-constant-type unsigned --keep-integer-radices // Negative values are i32 or i64; others are u32 or u64. #include "default-macro-constant-type.h" diff --git a/bindgen-tests/tests/headers/default-macro-constant-type.h b/bindgen-tests/tests/headers/default-macro-constant-type.h index a863362c98..36b7042dd6 100644 --- a/bindgen-tests/tests/headers/default-macro-constant-type.h +++ b/bindgen-tests/tests/headers/default-macro-constant-type.h @@ -1,3 +1,4 @@ +// bindgen-flags: --keep-integer-radices // Test default of --default-macro-constant-type // Negative values are i32 or i64; others are u32 or u64. diff --git a/bindgen-tests/tests/headers/different_radix_literals.hpp b/bindgen-tests/tests/headers/different_radix_literals.hpp index 0c155d73df..c4b830b026 100644 --- a/bindgen-tests/tests/headers/different_radix_literals.hpp +++ b/bindgen-tests/tests/headers/different_radix_literals.hpp @@ -1,4 +1,4 @@ -// bindgen-flags: -- -std=c++14 +// bindgen-flags: --keep-integer-radices -- -std=c++14 // (C23 is not available in clang 9.0, but C++14 supports the same literals) // Binary integer literals (C23) - 0b10 is 2 in decimal diff --git a/bindgen-tests/tests/headers/fit-macro-constant-types-signed.h b/bindgen-tests/tests/headers/fit-macro-constant-types-signed.h index dba20937df..fbd89365a2 100644 --- a/bindgen-tests/tests/headers/fit-macro-constant-types-signed.h +++ b/bindgen-tests/tests/headers/fit-macro-constant-types-signed.h @@ -1,2 +1,2 @@ -// bindgen-flags: --default-macro-constant-type=signed --fit-macro-constant-types +// bindgen-flags: --default-macro-constant-type=signed --fit-macro-constant-types --keep-integer-radices #include "default-macro-constant-type.h" diff --git a/bindgen-tests/tests/headers/fit-macro-constant-types.h b/bindgen-tests/tests/headers/fit-macro-constant-types.h index b995bfc0d4..9d87c2cd69 100644 --- a/bindgen-tests/tests/headers/fit-macro-constant-types.h +++ b/bindgen-tests/tests/headers/fit-macro-constant-types.h @@ -1,4 +1,4 @@ -// bindgen-flags: --fit-macro-constant-types +// bindgen-flags: --fit-macro-constant-types --keep-integer-radices // Test fitting macro constants into smaller integer types // Negative values are i8, i16, i32 or i64; others are u8, u16, u32 or u64. #include "default-macro-constant-type.h" \ No newline at end of file diff --git a/bindgen-tests/tests/headers/jsval_layout_opaque.hpp b/bindgen-tests/tests/headers/jsval_layout_opaque.hpp index ef13b85b25..380f91072f 100644 --- a/bindgen-tests/tests/headers/jsval_layout_opaque.hpp +++ b/bindgen-tests/tests/headers/jsval_layout_opaque.hpp @@ -1,4 +1,4 @@ -// bindgen-flags: --with-derive-hash --with-derive-partialeq --with-derive-eq --rustified-enum ".*" +// bindgen-flags: --with-derive-hash --with-derive-partialeq --with-derive-eq --rustified-enum ".*" --keep-integer-radices // bindgen-flags: -- -std=c++11 /** diff --git a/bindgen-tests/tests/headers/layout_eth_conf.h b/bindgen-tests/tests/headers/layout_eth_conf.h index 1c821c9769..d0d806cda6 100644 --- a/bindgen-tests/tests/headers/layout_eth_conf.h +++ b/bindgen-tests/tests/headers/layout_eth_conf.h @@ -1,4 +1,4 @@ -// bindgen-flags: --with-derive-hash --with-derive-partialeq --with-derive-eq --rustified-enum ".*" --rust-target 1.40 +// bindgen-flags: --with-derive-hash --with-derive-partialeq --with-derive-eq --rustified-enum ".*" --rust-target 1.40 --keep-integer-radices typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; diff --git a/bindgen-tests/tests/headers/overflowed_enum.hpp b/bindgen-tests/tests/headers/overflowed_enum.hpp index 18d3fe4173..42b25022f7 100644 --- a/bindgen-tests/tests/headers/overflowed_enum.hpp +++ b/bindgen-tests/tests/headers/overflowed_enum.hpp @@ -1,4 +1,4 @@ -// bindgen-flags: --rustified-enum ".*" -- -std=c++11 -Wno-narrowing +// bindgen-flags: --rustified-enum ".*" --keep-integer-radices -- -std=c++11 -Wno-narrowing enum Foo { BAP_ARM = 0x93fcb9, diff --git a/bindgen-tests/tests/headers/prepend-enum-constified-variant.h b/bindgen-tests/tests/headers/prepend-enum-constified-variant.h index e9ba0e732b..6c6bc78a24 100644 --- a/bindgen-tests/tests/headers/prepend-enum-constified-variant.h +++ b/bindgen-tests/tests/headers/prepend-enum-constified-variant.h @@ -1,4 +1,4 @@ -// bindgen-flags: --no-prepend-enum-name --rustified-enum ".*" +// bindgen-flags: --no-prepend-enum-name --rustified-enum ".*" --keep-integer-radices enum AVCodecID { AV_CODEC_ID_FIRST_UNKNOWN = 0x18000, diff --git a/bindgen-tests/tests/headers/short-enums.hpp b/bindgen-tests/tests/headers/short-enums.hpp index 14f833de64..0af05b9718 100644 --- a/bindgen-tests/tests/headers/short-enums.hpp +++ b/bindgen-tests/tests/headers/short-enums.hpp @@ -1,4 +1,4 @@ -// bindgen-flags: --rustified-enum ".*" -- -std=c++11 -fshort-enums +// bindgen-flags: --rustified-enum ".*" --keep-integer-radices -- -std=c++11 -fshort-enums typedef enum { SOME_VALUE = 0x1, diff --git a/bindgen-tests/tests/headers/wrap-static-fns.h b/bindgen-tests/tests/headers/wrap-static-fns.h index a35e713f2b..38c506858e 100644 --- a/bindgen-tests/tests/headers/wrap-static-fns.h +++ b/bindgen-tests/tests/headers/wrap-static-fns.h @@ -1,4 +1,4 @@ -// bindgen-flags: --wrap-static-fns +// bindgen-flags: --wrap-static-fns --keep-integer-radices // bindgen-parse-callbacks: wrap-as-variadic-fn // to avoid polluting the expectation tests we put the stdarg.h behind a conditional diff --git a/bindgen/ir/enum_ty.rs b/bindgen/ir/enum_ty.rs index 9bf85764ff..025f54d6f7 100644 --- a/bindgen/ir/enum_ty.rs +++ b/bindgen/ir/enum_ty.rs @@ -104,7 +104,11 @@ impl Enum { }; if let Some(val) = value { let name = cursor.spelling(); - let radix = cursor.get_literal_radix(); + let radix = if ctx.options().keep_integer_radices { + cursor.get_literal_radix() + } else { + None + }; let annotations = Annotations::new(&cursor); let custom_behavior = ctx .options() diff --git a/bindgen/ir/var.rs b/bindgen/ir/var.rs index fd8bc680de..37e1ea08bc 100644 --- a/bindgen/ir/var.rs +++ b/bindgen/ir/var.rs @@ -370,7 +370,11 @@ impl ClangSubItemParser for Var { default_macro_constant_type(ctx, value) }); - let radix = cursor.get_literal_radix(); + let radix = if ctx.options().keep_integer_radices { + cursor.get_literal_radix() + } else { + None + }; (TypeKind::Int(kind), VarType::Int(value), radix) } @@ -455,7 +459,11 @@ impl ClangSubItemParser for Var { }; let mut val = cursor.evaluate().and_then(|v| v.as_int()); - let radix = cursor.get_literal_radix_of_identifier(&name); + let radix = if ctx.options().keep_integer_radices { + cursor.get_literal_radix_of_identifier(&name) + } else { + None + }; if val.is_none() || !kind.signedness_matches(val.unwrap()) { val = get_integer_literal_from_cursor(&cursor); diff --git a/bindgen/options/cli.rs b/bindgen/options/cli.rs index bce7faed35..0514d6f84d 100644 --- a/bindgen/options/cli.rs +++ b/bindgen/options/cli.rs @@ -525,6 +525,9 @@ struct BindgenCommand { /// be called. #[arg(long)] generate_private_functions: bool, + /// Whether to retain integer literal radices in generated Rust code. + #[arg(long)] + keep_integer_radices: bool, /// Whether to emit diagnostics or not. #[cfg(feature = "experimental")] #[arg(long, requires = "experimental")] @@ -676,6 +679,7 @@ where generate_deleted_functions, generate_pure_virtual_functions, generate_private_functions, + keep_integer_radices, #[cfg(feature = "experimental")] emit_diagnostics, generate_shell_completions, @@ -971,6 +975,7 @@ where generate_deleted_functions, generate_pure_virtual_functions, generate_private_functions, + keep_integer_radices, } ); diff --git a/bindgen/options/mod.rs b/bindgen/options/mod.rs index b876b4d5b3..80fa432d77 100644 --- a/bindgen/options/mod.rs +++ b/bindgen/options/mod.rs @@ -2283,4 +2283,20 @@ options! { }, as_args: "--generate-private-functions", }, + /// Whether to retain integer literal radices in generated Rust code. + keep_integer_radices: bool { + default: false, + methods: { + /// Set whether to retain number bases of C/C++ integer literals. + /// + /// Integer literals defined as binary `0b…`, octal `0…`, and hexadecimal `0x…` have the + /// equivalent notation in the generated Rust code, i.e., `0b…`, `0o…`, and `0x…` + /// respectively. + pub fn keep_integer_radices(mut self, doit: bool) -> Self { + self.options.keep_integer_radices = doit; + self + } + }, + as_args: "--keep-integer-radices", + }, }