From 3dea5590039fa87ee17b8fe3ca1a4b223fcbd103 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 10 Jul 2025 12:08:27 -0600 Subject: [PATCH 1/6] Add ability to define a simple implementation in embed.fnc This can replace most of mathoms.c and lots of essentially boiler plate scattered throughout the code. Quite a few of our functions and macros are specializations of more general ones, taking fewer parameters than the generalized ones. Often we will have a foo_flags(a, flags) and a foo(a) which just calls foo_flags with a flag of 0. This commit allows for a single extra line to be added to embed.fnc to indicate the implementation of plain foo, replacing the extra boiler plate lines that were needed to do this prior to this commit. Another advantage is this lowers the likelihood of the implementation of long-name functions getting out-of-sync with supposedly-equivalent short name macros that are generally used to bypass them. This commit causees the long name form to be automatically generated, so there is no possibility of getting out-of-sync. It uses the current infrastructure in the embed regeneration to automatically create any needed long names, inserting thread context if necessary without the programmer needing to worry about this. The results are placed in embed.h and proto.h I'm not thrilled about the syntax of this. You add an extra continuation line to the declaration. This line begins with an '=', followed by the implementation details. For example Adp |U8 * |uv_to_utf8 |NN U8 *d \ |UV uv \ = uv_to_utf8_flags(d,uv,0) But embed.fnc is the file that already generates embed.h and proto.h, and no extra information beyond what embed.fnc already contains needs to be added. The bottom line is that many few-line functions in our code can be replaced by adding a single line to embed.fnc, which will make those into macros instead of functions. This requires fewer resources; the generated macros require no resources unless actually called; and it's simply more convenient to do this here in one place, than do the extra boiler plate that has to be in multiple places otherwise. embed.h will now contain long name definitions that formerly it didn't, so autodoc.pl has to change to look for them This commit changes nothing; just adds the capability. The next few commits will start to use it. --- autodoc.pl | 21 ++++++++++----- embed.fnc | 31 ++++++++++++++++----- makedef.pl | 4 ++- regen/HeaderParser.pm | 38 +++++++++++++++++++++++--- regen/embed.pl | 63 ++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 137 insertions(+), 20 deletions(-) diff --git a/autodoc.pl b/autodoc.pl index 3540893211eb..6b4f0a3a8588 100644 --- a/autodoc.pl +++ b/autodoc.pl @@ -2570,14 +2570,23 @@ ($destpod) } elsif ($file eq "embed.h") { - # embed.h won't have any apidoc lines in it. Instead look for lines - # that define the obsolete 'perl_' lines. Then we can check later - # that such a definition actually exists when we encounter input that - # claims there is + # embed.h won't have any apidoc lines in it. Instead look for: + # 1) lines that define Perl_foo, the long name for 'foo'; and + # 2) lines that define the obsolete 'perl_' lines. + + # Then we can check later that such a definition actually exists when + # we encounter input that claims there is open my $fh, '<', $file or die "Cannot open $file for docs: $!\n"; while (defined (my $input = <$fh>)) { - $protos{$1} = $2 - if $input =~ / ^\# \s* define \s+ ( perl_\w+ ) ( [^)]* \) ) /x; + if ($input =~ s/ ^ \# \s* define \s+ ( [Pp] erl_\w+ ) \s* //x) { + my $full_name = $1; + + # The full name might be followed by an argument list, but not + # necessarily + $input =~ s/ \( [^)]* \) \s* (\w+) //x; + + $protos{$full_name} = $1; + } } close $fh or die "Error closing $file: $!\n"; } diff --git a/embed.fnc b/embed.fnc index 5ddab55acec8..09431cf09237 100644 --- a/embed.fnc +++ b/embed.fnc @@ -35,14 +35,33 @@ : Supported at least since perl-5.23.8, with or without ppport.h. : : Lines in this file are of the form: -: flags|return_type|name|arg1|arg2|...|argN +: flags|return_type|name|arg1|arg2|...|argN [ = implementation ] : -: 'flags' is a string of single letters. Most of the flags are meaningful only -: to embed.pl; some only to autodoc.pl, and others only to makedef.pl. The -: comments here mostly don't include how Devel::PPPort or diag.t use them: -: All the possible flags and their meanings are given below. +: 'name' is the name of the entity being declared; usually its a function +: +: 'flags' is a string of single letters. Most of the flags are meaningful +: only to embed.pl; some only to autodoc.pl, and others only to +: makedef.pl. The comments here mostly don't include how +: Devel::PPPort or diag.t use them: All the possible flags and their +: meanings are given a ways below. +: +: 'return_type' is the type of value that 'name' returns, or 'void' if there is +: no returned value. +: +: 'arg1' .. argN' are the arguments to 'name'. These are omitted for an entity +: taking no parameters. +: +: '= implementation is optional; if present it defines the implementation of +: 'name'. Its presence indicates that 'name' is a macro, so the 'm' +: flag is implied and isn't required to be present. This is intended +: for macros that are simple, typically a variation on another entity +: defined in this file, like calling such an entity with an extra, +: fixed, parameter. For example the implementation +: = bar(a,b,c,0) +: means that 'name(a,b,c)' is defined to be 'bar(a,b,c,0)' +: This facility automatically will generate any required long names +: with any needed thread context parameters. : -: A function taking no parameters will have no 'arg' elements. : A line may be continued onto the next by ending it with a backslash. : Leading and trailing whitespace will be ignored in each component. : diff --git a/makedef.pl b/makedef.pl index 538595086373..05e17491605f 100644 --- a/makedef.pl +++ b/makedef.pl @@ -796,8 +796,10 @@ sub readvar { foreach (@$embed_array) { my $embed= $_->{embed} or next; - my ($flags, $retval, $func, $args) = @{$embed}{qw(flags return_type name args)}; + my ($flags, $retval, $func, $args, $implementation) = + @{$embed}{qw(flags return_type name args implementation)}; next unless $func; + next if $implementation; # Having this implies it is a macro if (($flags =~ /[AXC]/ && $flags !~ $excludedre) || (!$define{'NO_MATHOMS'} && $flags =~ /b/)) { diff --git a/regen/HeaderParser.pm b/regen/HeaderParser.pm index 8b166570fea7..be6c2e71c9aa 100644 --- a/regen/HeaderParser.pm +++ b/regen/HeaderParser.pm @@ -744,6 +744,8 @@ sub tidy_embed_fnc_entry { $line =~ s/\s*\\\n/ /g; $line =~ s/\s+\z//; ($line)= expand($line); + $line =~ s/ \s* = \s* (.*?) \s* \z //x; + my $implementation = $1; my ($flags, $ret, $name, @args)= split /\s*\|\s*/, $line; my %flag_seen; $flags= join "", grep !$flag_seen{$_}++, sort split //, $flags; @@ -759,28 +761,55 @@ sub tidy_embed_fnc_entry { . " in 'embed.fnc' at line $line_data->{start_line_num}\n" . "Did you a forget a line continuation on the previous line?\n"; } - for ($ret, @args) { + for ($ret, @args, $implementation) { + next unless defined $_; s/(\w)\*/$1 */g; s/\*\s+(\w)/*$1/g; s/\*const/* const/g; } my $head= sprintf "%-8s|%-7s", $flags, $ret; $head .= sprintf "|%*s", -(31 - length($head)), $name; - if (@args and length($head) > 32) { + if ((@args || $implementation) and length($head) > 32) { $head .= "\\\n"; $head .= " " x 32; } foreach my $ix (0 .. $#args) { my $arg= $args[$ix]; $head .= "|$arg"; - $head .= "\\\n" . (" " x 32) if $ix < $#args; + + # Append continuation marker for all but final line + $head .= "\\\n" if $ix < $#args || $implementation; + + # indent next argument line; $implementation line indented + # separately below + $head .= (" " x 32) if $ix < $#args; + } + + if ($implementation) { + + # Get rid of spaces around punctuation + $implementation =~ s/ \s* ( [[:punct:]] ) \s* /$1/xg; + + # Use 14 spaces so as to generally line up with $name + $head .= (" " x 14) . "= $implementation"; } + $line= $head . "\n"; if ($line =~ /\\\n/) { + + # Create continuation line markers so as to all be in the same column, + # and at least in column 72 my @lines= split /\s*\\\n/, $line; my $len= length($lines[0]); - $len < length($_) and $len= length($_) for @lines; + + # Any implementation line doesn't cause the marker to be output + # further right than the argument lines. Otherwise, could move them + # far to the right, giving more of a bad display. XXX We could fold + # the implementation line. + my $upper_bound = ($implementation) ? $#lines - 1 : $#lines; + + $len < length($_) and $len= length($_) for @lines[ 0 .. $upper_bound ]; $len= int(($len + 7) / 8) * 8; $len= 72 if $len < 72; $line= join("\\\n", @@ -794,6 +823,7 @@ sub tidy_embed_fnc_entry { return_type => $ret, name => $name, args => \@args, + implementation => $implementation, ); $line =~ s/\s+\z/\n/; $line_data->{line}= $line; diff --git a/regen/embed.pl b/regen/embed.pl index abbee02401fe..f9f0620cf168 100755 --- a/regen/embed.pl +++ b/regen/embed.pl @@ -121,7 +121,10 @@ sub generate_proto_h { $ind .= " " x ($level-1) if $level>1; my $inner_ind= $ind ? " " : " "; - my ($flags,$retval,$plain_func,$args) = @{$embed}{qw(flags return_type name args)}; + my ($flags,$retval,$plain_func,$args,$implementation) = @{$embed}{qw(flags return_type name args implementation)}; + my $has_implementation = defined $implementation; + $flags .= 'M' if $has_implementation && $flags !~ /M/; + if ($flags =~ / ( [^AabCDdEefFGhIiMmNnOoPpRrSsTUuvWXx;] ) /x) { die_at_end "flag $1 is not legal (for function $plain_func)"; } @@ -221,6 +224,7 @@ sub generate_proto_h { if $flags =~ /b/ && $flags !~ /M/ && $flags !~ /D/; die_at_end "For '$plain_func', I and i flags are mutually exclusive" if $flags =~ /I/ && $flags =~ /i/; + next if $has_implementation; $ret = ""; $ret .= "$retval\n"; @@ -505,12 +509,65 @@ sub embed_h { } my $level= $_->{level}; my $embed= $_->{embed} or next; - my ($flags,$retval,$func,$args) = @{$embed}{qw(flags return_type name args)}; + my ($flags,$retval,$func,$args,$implementation) = @{$embed}{qw(flags return_type name args implementation)}; + my $ret = ""; my $ind= $level ? " " : ""; $ind .= " " x ($level-1) if $level>1; my $inner_ind= $ind ? " " : " "; - if ($flags !~ /[omM]/ or ($flags =~ /m/ && $flags =~ /p/)) { + + if ($implementation) { + + # Currently, uses the simplistic assumption that the basic + # argument is comprised of the the final \w+ chars, so gets rid of + # any NULLOK, and pointers '*' + $_ =~ s/ ^ .* \W //x for $args->@*; + + # Use the furnished implementation as the base definition + my $arglist = join ",", $args->@*; + $ret = "#${ind}define $func($arglist)"; + add_indent($ret); + $ret .= $implementation . "\n"; + + # And add a full name definition if it differs from the base + my $caller_full_name = full_name($func, $flags); + if ($caller_full_name ne $func) { + my $no_thread_full_define = + indent_define($caller_full_name, $func, $ind); + if ($flags =~ /[T]/) { + + # Without threads, the full name call has nothing extra + $ret .= $no_thread_full_define; + } + else { + # XXX This assumes that if the caller has a pTHX, so does + # the callee. + my ($callee_name, $callee_args_plus_r_paren) = + $implementation =~ m/ ^ + ( .+? ) + \( + ( .*? ) + \s* + \z + /xx; + die "The implementation must be of the form: '" + . "foo(a,b, ...)'" unless $callee_name + and $callee_args_plus_r_paren; + my $callee_full_name = full_name($callee_name, $flags); + + # mTHX in both caller and callee in the threaded case will + # match aTHX + $ret .= "#${ind}ifdef USE_THREADS\n" + . "#${ind} define $caller_full_name(mTHX," + . "$arglist) $callee_full_name(mTHX," + . "$callee_args_plus_r_paren\n" + . "#${ind}else\n" + . "$ind $no_thread_full_define" + . "#${ind}endif\n"; + } + } + } + elsif ($flags !~ /[omM]/ or ($flags =~ /m/ && $flags =~ /p/)) { my $argc = scalar @$args; if ($flags =~ /[T]/) { my $full_name = full_name($func, $flags); From 483dc7feecea9779e1427bf3c1e2d7d96f9eaaf8 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 11 Jul 2025 06:40:54 -0600 Subject: [PATCH 2/6] Use new embed.fnc implementation capability for sv_utf8_downgrade This shows how taking a #define for sv_utf8_downgrade() from sv.h and moving it to embed.fnc creates the equivalent entry in embed.h, plus automatically generating macros that implement the long form Perl_sv_utf8_downgrade(). This means the manual implementation of Perl_sv_utf8_downgrade() in mathoms.c can be and should be deleted, which this commit also does. --- embed.fnc | 3 ++- embed.h | 4 ++++ mathoms.c | 8 -------- proto.h | 5 ----- sv.h | 1 - 5 files changed, 6 insertions(+), 15 deletions(-) diff --git a/embed.fnc b/embed.fnc index 09431cf09237..76656a86ea3c 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3540,7 +3540,8 @@ AMbdp |void |sv_usepvn_mg |NN SV *sv \ Adp |bool |sv_utf8_decode |NN SV * const sv AMbdp |bool |sv_utf8_downgrade \ |NN SV * const sv \ - |const bool fail_ok + |const bool fail_ok \ + = sv_utf8_downgrade_flags(sv,fail_ok,SV_GMAGIC) Adp |bool |sv_utf8_downgrade_flags \ |NN SV * const sv \ |const bool fail_ok \ diff --git a/embed.h b/embed.h index 9c5c2a8acf04..10e566e95f4d 100644 --- a/embed.h +++ b/embed.h @@ -776,6 +776,7 @@ # define sv_upgrade(a,b) Perl_sv_upgrade(aTHX_ a,b) # define sv_usepvn_flags(a,b,c,d) Perl_sv_usepvn_flags(aTHX_ a,b,c,d) # define sv_utf8_decode(a) Perl_sv_utf8_decode(aTHX_ a) +# define sv_utf8_downgrade(sv,fail_ok) sv_utf8_downgrade_flags(sv,fail_ok,SV_GMAGIC) # define sv_utf8_downgrade_flags(a,b,c) Perl_sv_utf8_downgrade_flags(aTHX_ a,b,c) # define sv_utf8_encode(a) Perl_sv_utf8_encode(aTHX_ a) # define sv_utf8_upgrade_flags_grow(a,b,c) Perl_sv_utf8_upgrade_flags_grow(aTHX_ a,b,c) @@ -2259,8 +2260,11 @@ # define PerlIO_write(a,b,c) Perl_PerlIO_write(aTHX_ a,b,c) # endif /* defined(USE_PERLIO) */ # if defined(USE_THREADS) +# define Perl_sv_utf8_downgrade(mTHX,sv,fail_ok) Perl_sv_utf8_downgrade_flags(mTHX,sv,fail_ok,SV_GMAGIC) # define thread_locale_init() Perl_thread_locale_init(aTHX) # define thread_locale_term() Perl_thread_locale_term(aTHX) +# else +# define Perl_sv_utf8_downgrade sv_utf8_downgrade # endif # if defined(VMS) || defined(WIN32) # define do_aspawn(a,b,c) Perl_do_aspawn(aTHX_ a,b,c) diff --git a/mathoms.c b/mathoms.c index fc7db080934e..2352ce38aece 100644 --- a/mathoms.c +++ b/mathoms.c @@ -808,14 +808,6 @@ Perl_newSVsv(pTHX_ SV *const old) return newSVsv(old); } -bool -Perl_sv_utf8_downgrade(pTHX_ SV *const sv, const bool fail_ok) -{ - PERL_ARGS_ASSERT_SV_UTF8_DOWNGRADE; - - return sv_utf8_downgrade(sv, fail_ok); -} - char * Perl_sv_2pvutf8(pTHX_ SV *sv, STRLEN *const lp) { diff --git a/proto.h b/proto.h index a7e81e069149..c97eb0ab7894 100644 --- a/proto.h +++ b/proto.h @@ -6107,11 +6107,6 @@ Perl_sv_usepvn_mg(pTHX_ SV *sv, char *ptr, STRLEN len); # define PERL_ARGS_ASSERT_SV_USEPVN_MG \ assert(sv) -PERL_CALLCONV bool -Perl_sv_utf8_downgrade(pTHX_ SV * const sv, const bool fail_ok); -# define PERL_ARGS_ASSERT_SV_UTF8_DOWNGRADE \ - assert(sv) - PERL_CALLCONV STRLEN Perl_sv_utf8_upgrade(pTHX_ SV *sv); # define PERL_ARGS_ASSERT_SV_UTF8_UPGRADE \ diff --git a/sv.h b/sv.h index 449091e85375..17eb37daab92 100644 --- a/sv.h +++ b/sv.h @@ -2218,7 +2218,6 @@ immediately written again. #define sv_pvn_force_nomg(sv, lp) sv_pvn_force_flags(sv, lp, 0) #define sv_utf8_upgrade_flags(sv, flags) sv_utf8_upgrade_flags_grow(sv, flags, 0) #define sv_utf8_upgrade_nomg(sv) sv_utf8_upgrade_flags(sv, 0) -#define sv_utf8_downgrade(sv, fail_ok) sv_utf8_downgrade_flags(sv, fail_ok, SV_GMAGIC) #define sv_utf8_downgrade_nomg(sv, fail_ok) sv_utf8_downgrade_flags(sv, fail_ok, 0) /* =for apidoc_defn Am|void|sv_catpvn_nomg|NN SV * const dsv \ From dbfd36de974144a51dc7b63ffaa8d7eabb35a4a9 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 10 Jul 2025 12:34:07 -0600 Subject: [PATCH 3/6] Use new embed.fnc implementation for extended_utf8_to_uv This shows how two synonymous macros can be conveniently defined in a single place --- embed.fnc | 5 +++-- embed.h | 3 ++- proto.h | 3 --- utf8.h | 2 -- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/embed.fnc b/embed.fnc index 76656a86ea3c..bf06b098ba08 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1177,11 +1177,12 @@ AOdp |SV * |eval_pv |NN const char *p \ |I32 croak_on_error AOdp |SSize_t|eval_sv |NN SV *sv \ |I32 flags -ATdmp |bool |extended_utf8_to_uv \ +ATdp |bool |extended_utf8_to_uv \ |NN const U8 * const s \ |NN const U8 * const e \ |NN UV *cp_p \ - |NULLOK Size_t *advance_p + |NULLOK Size_t *advance_p \ + = utf8_to_uv(s,e,cp_p,advance_p) Adfpv |void |fatal_warner |U32 err \ |NN const char *pat \ |... diff --git a/embed.h b/embed.h index 10e566e95f4d..b35efea66912 100644 --- a/embed.h +++ b/embed.h @@ -225,7 +225,8 @@ # define dump_vindent(a,b,c,d) Perl_dump_vindent(aTHX_ a,b,c,d) # define eval_pv(a,b) Perl_eval_pv(aTHX_ a,b) # define eval_sv(a,b) Perl_eval_sv(aTHX_ a,b) -# define extended_utf8_to_uv Perl_extended_utf8_to_uv +# define extended_utf8_to_uv(s,e,cp_p,advance_p) utf8_to_uv(s,e,cp_p,advance_p) +# define Perl_extended_utf8_to_uv extended_utf8_to_uv # define fbm_compile(a,b) Perl_fbm_compile(aTHX_ a,b) # define fbm_instr(a,b,c,d) Perl_fbm_instr(aTHX_ a,b,c,d) # define filter_add(a,b) Perl_filter_add(aTHX_ a,b) diff --git a/proto.h b/proto.h index c97eb0ab7894..cc9595d89307 100644 --- a/proto.h +++ b/proto.h @@ -1101,9 +1101,6 @@ Perl_eval_sv(pTHX_ SV *sv, I32 flags); #define PERL_ARGS_ASSERT_EVAL_SV \ assert(sv) -/* PERL_CALLCONV bool -Perl_extended_utf8_to_uv(const U8 * const s, const U8 * const e, UV *cp_p, Size_t *advance_p); */ - PERL_CALLCONV void Perl_fatal_warner(pTHX_ U32 err, const char *pat, ...) __attribute__format__(__printf__,pTHX_2,pTHX_3); diff --git a/utf8.h b/utf8.h index 7e9fa216a0f4..3566c0bb4a32 100644 --- a/utf8.h +++ b/utf8.h @@ -165,8 +165,6 @@ typedef enum { Perl_utf8_to_uv_errors( s, e, cp_p, advance_p, flags, 0) #define Perl_utf8_to_uv_errors( s, e, cp_p, advance_p, flags, errors) \ Perl_utf8_to_uv_msgs( s, e, cp_p, advance_p, flags, errors, 0) -#define Perl_extended_utf8_to_uv(s, e, cp_p, advance_p) \ - Perl_utf8_to_uv(s, e, cp_p, advance_p) #define Perl_strict_utf8_to_uv( s, e, cp_p, advance_p) \ Perl_utf8_to_uv_flags( s, e, cp_p, advance_p, \ UTF8_DISALLOW_ILLEGAL_INTERCHANGE) From 2924401569ae6257774b15b07167cab7e34a1e2b Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 10 Jul 2025 12:52:26 -0600 Subject: [PATCH 4/6] Use new embed.fnc implementation for strict_utf8_to_uv This shows how the new capability works for elements without a thread context parameter --- embed.fnc | 5 +++-- embed.h | 3 ++- proto.h | 3 --- utf8.h | 3 --- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/embed.fnc b/embed.fnc index bf06b098ba08..5de2b69b51df 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3086,11 +3086,12 @@ dopx |PerlIO *|start_glob |NN SV *tmpglob \ |NN IO *io Adp |I32 |start_subparse |I32 is_format \ |U32 flags -ATdmp |bool |strict_utf8_to_uv \ +ATdp |bool |strict_utf8_to_uv \ |NN const U8 * const s \ |NN const U8 * const e \ |NN UV *cp_p \ - |NULLOK Size_t *advance_p + |NULLOK Size_t *advance_p \ + = utf8_to_uv_flags(s,e,cp_p,advance_p,UTF8_DISALLOW_ILLEGAL_INTERCHANGE) CRp |NV |str_to_version |NN SV *sv : Used in pp_ctl.c p |void |sub_crush_depth|NN CV *cv diff --git a/embed.h b/embed.h index b35efea66912..ad78e297daa4 100644 --- a/embed.h +++ b/embed.h @@ -652,7 +652,8 @@ # define stack_grow(a,b,c) Perl_stack_grow(aTHX_ a,b,c) # define start_subparse(a,b) Perl_start_subparse(aTHX_ a,b) # define str_to_version(a) Perl_str_to_version(aTHX_ a) -# define strict_utf8_to_uv Perl_strict_utf8_to_uv +# define strict_utf8_to_uv(s,e,cp_p,advance_p) utf8_to_uv_flags(s,e,cp_p,advance_p,UTF8_DISALLOW_ILLEGAL_INTERCHANGE) +# define Perl_strict_utf8_to_uv strict_utf8_to_uv # define suspend_compcv(a) Perl_suspend_compcv(aTHX_ a) # define sv_2bool_flags(a,b) Perl_sv_2bool_flags(aTHX_ a,b) # define sv_2cv(a,b,c,d) Perl_sv_2cv(aTHX_ a,b,c,d) diff --git a/proto.h b/proto.h index cc9595d89307..aea7e0734bd8 100644 --- a/proto.h +++ b/proto.h @@ -4309,9 +4309,6 @@ Perl_str_to_version(pTHX_ SV *sv) #define PERL_ARGS_ASSERT_STR_TO_VERSION \ assert(sv) -/* PERL_CALLCONV bool -Perl_strict_utf8_to_uv(const U8 * const s, const U8 * const e, UV *cp_p, Size_t *advance_p); */ - PERL_CALLCONV void Perl_sub_crush_depth(pTHX_ CV *cv) __attribute__visibility__("hidden"); diff --git a/utf8.h b/utf8.h index 3566c0bb4a32..a185efb0f443 100644 --- a/utf8.h +++ b/utf8.h @@ -165,9 +165,6 @@ typedef enum { Perl_utf8_to_uv_errors( s, e, cp_p, advance_p, flags, 0) #define Perl_utf8_to_uv_errors( s, e, cp_p, advance_p, flags, errors) \ Perl_utf8_to_uv_msgs( s, e, cp_p, advance_p, flags, errors, 0) -#define Perl_strict_utf8_to_uv( s, e, cp_p, advance_p) \ - Perl_utf8_to_uv_flags( s, e, cp_p, advance_p, \ - UTF8_DISALLOW_ILLEGAL_INTERCHANGE) #define Perl_c9strict_utf8_to_uv(s, e, cp_p, advance_p) \ Perl_utf8_to_uv_flags( s, e, cp_p, advance_p, \ UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE) From 805ecde5892025b7cc40f52c43557addca086e08 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 10 Jul 2025 12:58:01 -0600 Subject: [PATCH 5/6] Use new embed.fnc implementation for uv_to_utf8 This shows how the new capability works for replacing elements with a thread context parameter that have an inline function --- embed.fnc | 5 +++-- embed.h | 4 +++- inline.h | 7 ------- proto.h | 5 ----- 4 files changed, 6 insertions(+), 15 deletions(-) diff --git a/embed.fnc b/embed.fnc index 5de2b69b51df..27e2811435a0 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3857,8 +3857,9 @@ Cp |U8 * |uvoffuni_to_utf8_flags_msgs \ |const UV flags \ |NULLOK HV **msgs -Adip |U8 * |uv_to_utf8 |NN U8 *d \ - |UV uv +Adp |U8 * |uv_to_utf8 |NN U8 *d \ + |UV uv \ + = uv_to_utf8_flags(d,uv,0) Adip |U8 * |uv_to_utf8_flags \ |NN U8 *d \ |UV uv \ diff --git a/embed.h b/embed.h index ad78e297daa4..8262348a9a52 100644 --- a/embed.h +++ b/embed.h @@ -826,7 +826,7 @@ # define utf8n_to_uvchr Perl_utf8n_to_uvchr # define utf8n_to_uvchr_error Perl_utf8n_to_uvchr_error # define utf8n_to_uvchr_msgs Perl_utf8n_to_uvchr_msgs -# define uv_to_utf8(a,b) Perl_uv_to_utf8(aTHX_ a,b) +# define uv_to_utf8(d,uv) uv_to_utf8_flags(d,uv,0) # define uv_to_utf8_flags(a,b,c) Perl_uv_to_utf8_flags(aTHX_ a,b,c) # define uv_to_utf8_msgs(a,b,c,d) Perl_uv_to_utf8_msgs(aTHX_ a,b,c,d) # define uvoffuni_to_utf8_flags_msgs(a,b,c,d) Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d) @@ -2263,10 +2263,12 @@ # endif /* defined(USE_PERLIO) */ # if defined(USE_THREADS) # define Perl_sv_utf8_downgrade(mTHX,sv,fail_ok) Perl_sv_utf8_downgrade_flags(mTHX,sv,fail_ok,SV_GMAGIC) +# define Perl_uv_to_utf8(mTHX,d,uv) Perl_uv_to_utf8_flags(mTHX,d,uv,0) # define thread_locale_init() Perl_thread_locale_init(aTHX) # define thread_locale_term() Perl_thread_locale_term(aTHX) # else # define Perl_sv_utf8_downgrade sv_utf8_downgrade +# define Perl_uv_to_utf8 uv_to_utf8 # endif # if defined(VMS) || defined(WIN32) # define do_aspawn(a,b,c) Perl_do_aspawn(aTHX_ a,b,c) diff --git a/inline.h b/inline.h index 44128c624d07..1a14187e696e 100644 --- a/inline.h +++ b/inline.h @@ -3272,18 +3272,11 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen) return 0; } -PERL_STATIC_INLINE U8 * -Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) -{ - return uv_to_utf8_msgs(d, uv, 0, 0); -} - PERL_STATIC_INLINE U8 * Perl_uv_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) { return uv_to_utf8_msgs(d, uv, flags, 0); } - PERL_STATIC_INLINE U8 * Perl_uv_to_utf8_msgs(pTHX_ U8 *d, UV uv, UV flags , HV **msgs) { diff --git a/proto.h b/proto.h index aea7e0734bd8..42b1df9dae1f 100644 --- a/proto.h +++ b/proto.h @@ -10275,11 +10275,6 @@ Perl_utf8n_to_uvchr_msgs(const U8 * const s0, STRLEN curlen, STRLEN *retlen, con # define PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS \ assert(s0) -PERL_STATIC_INLINE U8 * -Perl_uv_to_utf8(pTHX_ U8 *d, UV uv); -# define PERL_ARGS_ASSERT_UV_TO_UTF8 \ - assert(d) - PERL_STATIC_INLINE U8 * Perl_uv_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags); # define PERL_ARGS_ASSERT_UV_TO_UTF8_FLAGS \ From 23a0ae3c554dd40a21ca1462ab18a069071121a2 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 10 Jul 2025 13:01:17 -0600 Subject: [PATCH 6/6] Use new embed.fnc implementation for uv_to_utf8_flags This shows how the new capability works for chaining elements, as uv_to_utf8 calls this, which now calls uv_to_utf8_msgs --- embed.fnc | 5 +++-- embed.h | 4 +++- inline.h | 5 ----- proto.h | 5 ----- 4 files changed, 6 insertions(+), 13 deletions(-) diff --git a/embed.fnc b/embed.fnc index 27e2811435a0..0116f8d96afd 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3860,10 +3860,11 @@ Cp |U8 * |uvoffuni_to_utf8_flags_msgs \ Adp |U8 * |uv_to_utf8 |NN U8 *d \ |UV uv \ = uv_to_utf8_flags(d,uv,0) -Adip |U8 * |uv_to_utf8_flags \ +Adp |U8 * |uv_to_utf8_flags \ |NN U8 *d \ |UV uv \ - |UV flags + |UV flags \ + = uv_to_utf8_msgs(d,uv,flags,0) Adip |U8 * |uv_to_utf8_msgs|NN U8 *d \ |UV uv \ |UV flags \ diff --git a/embed.h b/embed.h index 8262348a9a52..31015c16ae1c 100644 --- a/embed.h +++ b/embed.h @@ -827,7 +827,7 @@ # define utf8n_to_uvchr_error Perl_utf8n_to_uvchr_error # define utf8n_to_uvchr_msgs Perl_utf8n_to_uvchr_msgs # define uv_to_utf8(d,uv) uv_to_utf8_flags(d,uv,0) -# define uv_to_utf8_flags(a,b,c) Perl_uv_to_utf8_flags(aTHX_ a,b,c) +# define uv_to_utf8_flags(d,uv,flags) uv_to_utf8_msgs(d,uv,flags,0) # define uv_to_utf8_msgs(a,b,c,d) Perl_uv_to_utf8_msgs(aTHX_ a,b,c,d) # define uvoffuni_to_utf8_flags_msgs(a,b,c,d) Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d) # define valid_identifier_pve(a,b,c) Perl_valid_identifier_pve(aTHX_ a,b,c) @@ -2264,11 +2264,13 @@ # if defined(USE_THREADS) # define Perl_sv_utf8_downgrade(mTHX,sv,fail_ok) Perl_sv_utf8_downgrade_flags(mTHX,sv,fail_ok,SV_GMAGIC) # define Perl_uv_to_utf8(mTHX,d,uv) Perl_uv_to_utf8_flags(mTHX,d,uv,0) +# define Perl_uv_to_utf8_flags(mTHX,d,uv,flags) Perl_uv_to_utf8_msgs(mTHX,d,uv,flags,0) # define thread_locale_init() Perl_thread_locale_init(aTHX) # define thread_locale_term() Perl_thread_locale_term(aTHX) # else # define Perl_sv_utf8_downgrade sv_utf8_downgrade # define Perl_uv_to_utf8 uv_to_utf8 +# define Perl_uv_to_utf8_flags uv_to_utf8_flags # endif # if defined(VMS) || defined(WIN32) # define do_aspawn(a,b,c) Perl_do_aspawn(aTHX_ a,b,c) diff --git a/inline.h b/inline.h index 1a14187e696e..6196f60e1617 100644 --- a/inline.h +++ b/inline.h @@ -3272,11 +3272,6 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen) return 0; } -PERL_STATIC_INLINE U8 * -Perl_uv_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) -{ - return uv_to_utf8_msgs(d, uv, flags, 0); -} PERL_STATIC_INLINE U8 * Perl_uv_to_utf8_msgs(pTHX_ U8 *d, UV uv, UV flags , HV **msgs) { diff --git a/proto.h b/proto.h index 42b1df9dae1f..1ef6e423b6a7 100644 --- a/proto.h +++ b/proto.h @@ -10275,11 +10275,6 @@ Perl_utf8n_to_uvchr_msgs(const U8 * const s0, STRLEN curlen, STRLEN *retlen, con # define PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS \ assert(s0) -PERL_STATIC_INLINE U8 * -Perl_uv_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags); -# define PERL_ARGS_ASSERT_UV_TO_UTF8_FLAGS \ - assert(d) - PERL_STATIC_INLINE U8 * Perl_uv_to_utf8_msgs(pTHX_ U8 *d, UV uv, UV flags, HV **msgs); # define PERL_ARGS_ASSERT_UV_TO_UTF8_MSGS \