From a4a255687844b03133d436daf3c0c93e6a8f3ea2 Mon Sep 17 00:00:00 2001 From: Philippe Benard Date: Thu, 5 Jun 2025 16:00:08 +0200 Subject: [PATCH 1/3] Fixing ${ list; } ----------------- MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The documentation documents 2 behaviors that are contradictory, and obviously the implementation implements only one of those, and unfortunately this is the worst one that is implemented. The documnetation says Command Substitution. The standard output from a command list enclosed in parentheses pre‐ ceded by a dollar sign ( $(list) ), or in a brace group preceded by a dollar sign ( ${ list;} ), or in a pair of grave accents (``) may be used as part or all of a word; trailing new-lines are removed. In the second case, the { and } are treated as a reserved words so that { must be followed by a blank and } must appear at the beginning of the line or follow a ;. ... Except for the second form, the command list is run in a subshell so that no side effects are possible. For the second form, the final } will be recognized as a reserved word after any token. So the former part say that ${ list ;} is closed either by ;} or \n} This is not true the current implementation miss this, ;} or \n} is never recognised as a closing for ${ list. The later part say that ${ list ;} is closed either by a } following any token, meaning the first } alone (as a word) is considered a close. Bottom line alias echo1=echo alias echo2=echo $ echo1 ${ echo2 word } } } word } } This is because the implementation close ${ list with the first } then echo2 receive word as uniq arg, then echo1 receive arg } } This is a 'lazy' } lookup for ${... If we want to get the ${ list [;\n] } semantic we got to implement a 'greedy' } lookup, i.e all the } that are not a reserved word, i.e } following [;\n] are then regular chars. Because this is not compatible with the current implementation, and because we want to keep the historical strange implementation, we got to introduce a ksh93 option, when it is active we want the greedy lookup. The option flag is called comsub_brace_greedy and is off by default, i.e the test suite use the lazy implementation, and the greedy implementation require specific tests. Ksh93 option definition. ----------------------- * cmd/ksh93/include/shell.h : Define SH_COMSUB_BRACE_GREEDY * cmd/ksh93/data/options.c : Enter "comsub_brace_greedy" into shtab_options[] Greedy implementation --------------------- * cmd/ksh93/include/shlex.h: Add one more member in struct _shlex_pvt_lexstate_ { ... char inbracecomsub; /* 1 inside ${ ... [;\n]} bug-691: Phi: */ ... } When ${ is scanned and SH_COMSUB_BRACE_GREEDY is on then lp->lex.inbracecomsub=1; Skipping over } in greedy mode ============================== * cmd/ksh93/sh/lex.c:sh_lex() ======== while(1) { /* skip over characters in the current state */ state = sh_lexstates[mode]; do { n = STATE(state,c); .... switch(n) case S_REG: case S_BRACE: For state ST_BEGIN and ST_NORM, when STATE return c=='}' we got n==S_BRACE, we implement a gross hack here, if the '}' is not on the first reserved word, we pretend we got a S_REG and accept the } as a char, not a closing ${...} this is done with n = STATE(state,c); /* * Bug-691: Phi gross hack * Greedy ${list} will treat all '}' not on a reserved * word (i.e after [;\n]) as regular char. * Non greedy close ${ on first '}' (ATT code) */ if( sh_isoption(SH_COMSUB_BRACE_GREEDY) && lp->lex.inbracecomsub && c=='}' && ( state==sh_lexstates[ ST_BEGIN] || state==sh_lexstates[ST_NORM] ) ) { n=S_REG ; /* treat '}' as reg char */ } Setting inbracecomsub=1 ======================= * cmd/ksh93/sh/lex.c:comsub() ======== save = lp->lex; ... /* bug-691: Phi: */ if( sh_isoption(SH_COMSUB_BRACE_GREEDY) && *cp=='{' ) lp->lex.inbracecomsub = 1; Note inbracecomsub is set to 1 here, and reset at return time lp->lex = save; (See Resetting inbracecomsub) * cmd/ksh93/sh/parse.c:sh_dolparen() ============= case LBRACE: lp->lex.inbracecomsub=1; /* bug-691: Phi: */ Resetting inbracecomsub=0 ========================= * cmd/ksh93/sh/lex.c:comsub() ======== A seen above the comsub() return time do reset inbracecomsub=0 * cmd/ksh93/sh/lex.c:sh_lex() ======== case S_NL: /* bug-691: Phi: */ if( sh_isoption(SH_COMSUB_BRACE_GREEDY) ) lp->lex.inbracecomsub=0; case S_OP: /* bug-691: Phi: */ if( sh_isoption(SH_COMSUB_BRACE_GREEDY) ) lp->lex.inbracecomsub=0; After a new line or ';' we are not in greedy mode any more next } will close ${ New test ======== src/cmd/ksh93/tests/shtests: ======= Added greedy_comsub.sh for testing greedy ${...{...}...[;\n]} Test accouting fixed for greedy_comsub.sh greedy_comsub.sh) grep -c '^T ' $i ;; # bug-691: Phi: --- src/cmd/ksh93/tests/greedy_comsub.sh | 79 ++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100755 src/cmd/ksh93/tests/greedy_comsub.sh diff --git a/src/cmd/ksh93/tests/greedy_comsub.sh b/src/cmd/ksh93/tests/greedy_comsub.sh new file mode 100755 index 000000000000..fee7cc04c9dc --- /dev/null +++ b/src/cmd/ksh93/tests/greedy_comsub.sh @@ -0,0 +1,79 @@ +######################################################################## +# # +# This file is part of the ksh 93u+m package # +# Copyright (c) 2022-2025 Contributors to ksh 93u+m # +# and is licensed under the # +# Eclipse Public License, Version 2.0 # +# # +# A copy of the License is available at # +# https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.html # +# (with md5 checksum 84283fa8859daf213bdda5a9f8d1be1d) # +# # +# Phi # +# Martijn Dekker # +# # +######################################################################## + + +. "${SHTESTS_COMMON:-${0%/*}/_common}" + + +alias T='do_test "$LINENO"' + +# ====== +# Tests for greedy comsub, i.e ${ list } [;\n]} +# | | +# | +-> Reserved word (keyword) +# +--------> ! Reserved word (keyword) +# https://github.com/ksh93/ksh/issues/691 + +function do_test # $LINENO "${ list ;}" "expect" +{ [ "$2" = "$3" ] || + \err_exit "$1" "expected '$3', got '$2'" +} + + +# ====== + +set -o | grep -q comsub_brace_greedy || +{ warning \ + "This SHELL doesn't support 'set -o comsub_brace_greedy '; skipping tests" + alias T=: +} +set -o comsub_brace_greedy + +T "${ echo A ; }" "A" +T "${ echo A ;}" "A" +T "${ echo A;}" "A" +# ====== + +T "${ echo A ${ echo B ;} ;}" "A B" # err_exit +T "${ echo A ${ echo B ;};}" "A B" +T "${ echo A ${ echo B;};}" "A B" +T "${ echo A { ${ echo B ;} C } ;}" "A { B C }" +T "${ echo A { ${ echo B;} C } ;}" "A { B C }" +T "${ echo A { ${ echo B;} C};}" "A { B C}" +T "${ echo A ;} ${ echo B ;}" "A B" +T "${ echo A ;} ${ echo B;}" "A B" +T "${ echo A ;}${ echo B;}" "AB" +T "${ echo A;}${ echo B;}" "AB" + +# @stephane-chazelas test cases +# https://github.com/ksh93/ksh/issues/691 +T "${ echo {a.b}; }" "{a.b}" +T "${ echo '{a,b}c' ;}" "{a,b}c" +T "${ echo ${ echo {fd[0]}< /dev/null; } ;}" "" + +# Variation on @stephane-chazelas +a=1 a.c=1 +T "${ echo ${a.c}; }" "1" +T "${ echo {a..c}; }" "a b c" +a=x b=z +T "${ echo {$a..$b}; }" "x y z" +T "${ echo {${a}..${b}}; }" "x y z" +a='x;' b='z;' +T "${ echo {${a//;}..${b//;}}; }" "x y z" + + +# ====== +exit $((Errors<125?Errors:125)) From 1196e29e70261cea5f5d3182abb8cbc4597a0e9c Mon Sep 17 00:00:00 2001 From: Philippe Benard Date: Thu, 5 Jun 2025 17:07:47 +0200 Subject: [PATCH 2/3] Fixing ${ list; } ----------------- MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The documentation documents 2 behaviors that are contradictory, and obviously the implementation implements only one of those, and unfortunately this is the worst one that is implemented. The documnetation says Command Substitution. The standard output from a command list enclosed in parentheses pre‐ ceded by a dollar sign ( $(list) ), or in a brace group preceded by a dollar sign ( ${ list;} ), or in a pair of grave accents (``) may be used as part or all of a word; trailing new-lines are removed. In the second case, the { and } are treated as a reserved words so that { must be followed by a blank and } must appear at the beginning of the line or follow a ;. ... Except for the second form, the command list is run in a subshell so that no side effects are possible. For the second form, the final } will be recognized as a reserved word after any token. So the former part say that ${ list ;} is closed either by ;} or \n} This is not true the current implementation miss this, ;} or \n} is never recognised as a closing for ${ list. The later part say that ${ list ;} is closed either by a } following any token, meaning the first } alone (as a word) is considered a close. Bottom line alias echo1=echo alias echo2=echo $ echo1 ${ echo2 word } } } word } } This is because the implementation close ${ list with the first } then echo2 receive word as uniq arg, then echo1 receive arg } } This is a 'lazy' } lookup for ${... If we want to get the ${ list [;\n] } semantic we got to implement a 'greedy' } lookup, i.e all the } that are not a reserved word, i.e } following [;\n] are then regular chars. Because this is not compatible with the current implementation, and because we want to keep the historical strange implementation, we got to introduce a ksh93 option, when it is active we want the greedy lookup. The option flag is called comsub_brace_greedy and is off by default, i.e the test suite use the lazy implementation, and the greedy implementation require specific tests. Ksh93 option definition. ----------------------- * cmd/ksh93/include/shell.h : Define SH_COMSUB_BRACE_GREEDY * cmd/ksh93/data/options.c : Enter "comsub_brace_greedy" into shtab_options[] Greedy implementation --------------------- * cmd/ksh93/include/shlex.h: Add one more member in struct _shlex_pvt_lexstate_ { ... char inbracecomsub; /* 1 inside ${ ... [;\n]} bug-691: Phi: */ ... } When ${ is scanned and SH_COMSUB_BRACE_GREEDY is on then lp->lex.inbracecomsub=1; Skipping over } in greedy mode ============================== * cmd/ksh93/sh/lex.c:sh_lex() ======== while(1) { /* skip over characters in the current state */ state = sh_lexstates[mode]; do { n = STATE(state,c); .... switch(n) case S_REG: case S_BRACE: For state ST_BEGIN and ST_NORM, when STATE return c=='}' we got n==S_BRACE, we implement a gross hack here, if the '}' is not on the first reserved word, we pretend we got a S_REG and accept the } as a char, not a closing ${...} this is done with n = STATE(state,c); /* * Bug-691: Phi gross hack * Greedy ${list} will treat all '}' not on a reserved * word (i.e after [;\n]) as regular char. * Non greedy close ${ on first '}' (ATT code) */ if( sh_isoption(SH_COMSUB_BRACE_GREEDY) && lp->lex.inbracecomsub && c=='}' && ( state==sh_lexstates[ ST_BEGIN] || state==sh_lexstates[ST_NORM] ) ) { n=S_REG ; /* treat '}' as reg char */ } Setting inbracecomsub=1 ======================= * cmd/ksh93/sh/lex.c:comsub() ======== save = lp->lex; ... /* bug-691: Phi: */ if( sh_isoption(SH_COMSUB_BRACE_GREEDY) && *cp=='{' ) lp->lex.inbracecomsub = 1; Note inbracecomsub is set to 1 here, and reset at return time lp->lex = save; (See Resetting inbracecomsub) * cmd/ksh93/sh/parse.c:sh_dolparen() ============= case LBRACE: lp->lex.inbracecomsub=1; /* bug-691: Phi: */ Resetting inbracecomsub=0 ========================= * cmd/ksh93/sh/lex.c:comsub() ======== A seen above the comsub() return time do reset inbracecomsub=0 * cmd/ksh93/sh/lex.c:sh_lex() ======== case S_NL: /* bug-691: Phi: */ if( sh_isoption(SH_COMSUB_BRACE_GREEDY) ) lp->lex.inbracecomsub=0; case S_OP: /* bug-691: Phi: */ if( sh_isoption(SH_COMSUB_BRACE_GREEDY) ) lp->lex.inbracecomsub=0; After a new line or ';' we are not in greedy mode any more next } will close ${ New test ======== src/cmd/ksh93/tests/shtests: ======= Added greedy_comsub.sh for testing greedy ${...{...}...[;\n]} Test accouting fixed for greedy_comsub.sh greedy_comsub.sh) grep -c '^T ' $i ;; # bug-691: Phi: --- src/cmd/ksh93/data/options.c | 1 + src/cmd/ksh93/include/shell.h | 2 ++ src/cmd/ksh93/include/shlex.h | 1 + src/cmd/ksh93/sh/lex.c | 39 +++++++++++++++++++++++++++++++++-- src/cmd/ksh93/sh/parse.c | 1 + src/cmd/ksh93/tests/shtests | 1 + 6 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/cmd/ksh93/data/options.c b/src/cmd/ksh93/data/options.c index fee746561d50..4bec11bd9712 100644 --- a/src/cmd/ksh93/data/options.c +++ b/src/cmd/ksh93/data/options.c @@ -39,6 +39,7 @@ const Shtable_t shtab_options[] = "braceexpand", SH_BRACEEXPAND, #endif "noclobber", SH_NOCLOBBER, + "comsub_brace_greedy", SH_COMSUB_BRACE_GREEDY, /* bug-691: phi: */ #if SHOPT_ESH "emacs", SH_EMACS, #endif diff --git a/src/cmd/ksh93/include/shell.h b/src/cmd/ksh93/include/shell.h index b9c88bd75816..68f6a00492e1 100644 --- a/src/cmd/ksh93/include/shell.h +++ b/src/cmd/ksh93/include/shell.h @@ -152,6 +152,8 @@ typedef union Shnode_u Shnode_t; #define SH_MULTILINE 47 #define SH_NOBACKSLCTRL 48 #endif +/* bug-691: Phi: ${ list } } [;\n] } eat '}' until we get one followin [;\n] */ +#define SH_COMSUB_BRACE_GREEDY 49 /* Bug-691: */ #define SH_LOGIN_SHELL 67 #if _BLD_ksh diff --git a/src/cmd/ksh93/include/shlex.h b/src/cmd/ksh93/include/shlex.h index 0894a732f88f..ea334656dc01 100644 --- a/src/cmd/ksh93/include/shlex.h +++ b/src/cmd/ksh93/include/shlex.h @@ -34,6 +34,7 @@ struct _shlex_pvt_lexstate_ { char incase; /* 1 for case pattern, 2 after case */ char intest; /* 1 inside [[ ... ]] */ + char inbracecomsub; /* 1 inside ${ ... [;\n]} bug-691: Phi:*/ char testop1; /* 1 when unary test op legal */ char testop2; /* 1 when binary test op legal */ char reservok; /* >0 for reserved word legal */ diff --git a/src/cmd/ksh93/sh/lex.c b/src/cmd/ksh93/sh/lex.c index 643fda58c444..a99dba8f5efa 100644 --- a/src/cmd/ksh93/sh/lex.c +++ b/src/cmd/ksh93/sh/lex.c @@ -312,6 +312,20 @@ int sh_lex(Lex_t* lp) state = sh_lexstates[mode]; do { n = STATE(state,c); + /* + * Bug-691: Phi gross hack + * Greedy ${list} will treat all '}' not on a reserved + * word (i.e after [;\n]) as regular char. + * Non greedy close ${ on first '}' (ATT code) + */ + if( sh_isoption(SH_COMSUB_BRACE_GREEDY) && + lp->lex.inbracecomsub && c=='}' && + ( state==sh_lexstates[ ST_BEGIN] || + state==sh_lexstates[ST_NORM] + ) + ) + { n=S_REG ; /* treat '}' as reg char */ + } if (varnametry) varnamecount += LEN; } while (n == 0); @@ -430,6 +444,10 @@ int sh_lex(Lex_t* lp) lp->lex.skipword = 0; /* FALLTHROUGH */ case S_NL: + /* bug-691: Phi: */ + if( sh_isoption(SH_COMSUB_BRACE_GREEDY) ) + lp->lex.inbracecomsub=0; + /* skip over new-lines */ lp->lex.last_quote = 0; while(sh.inlineno++,fcget()=='\n'); @@ -453,6 +471,10 @@ int sh_lex(Lex_t* lp) } continue; case S_OP: + /* bug-691: Phi: */ + if( sh_isoption(SH_COMSUB_BRACE_GREEDY) ) + lp->lex.inbracecomsub=0; + /* return operator token */ if(c=='<' || c=='>') { @@ -1014,7 +1036,7 @@ int sh_lex(Lex_t* lp) continue; if((n=sh_lexstates[ST_BEGIN][c])==0 || n==S_OP || n==S_NLTOK) { - c = LBRACE; + c = LBRACE; goto do_comsub; } } @@ -1304,7 +1326,12 @@ int sh_lex(Lex_t* lp) if(n==LBRACT) c = 0; else if(n==RBRACE && lp->comsub) - return lp->token=n; + { /* Bug-691: Phi: AT&T original non greedy path */ + if( ! (sh_isoption(SH_COMSUB_BRACE_GREEDY) && + lp->lex.inbracecomsub)) + return lp->token=n; + /* Bug-691: Phi: Greedy continue... */ + } else if(n=='~') c = ARG_MAC; else @@ -1575,6 +1602,11 @@ static int comsub(Lex_t *lp, int endtok) int off, messages=0, assignok=lp->assignok, csub; struct _shlex_pvt_lexstate_ save = lp->lex; csub = lp->comsub; + + /* bug-691: Phi: */ + if( sh_isoption(SH_COMSUB_BRACE_GREEDY) && *cp=='{' ) + lp->lex.inbracecomsub = 1; + sh_lexopen(lp,1); lp->lexd.dolparen++; lp->lexd.dolparen_arithexp = endtok==LPAREN && fcpeek(1)==LPAREN; /* $(( */ @@ -1645,6 +1677,8 @@ static int comsub(Lex_t *lp, int endtok) if(endtok==LBRACE && !lp->lex.incase) { lp->comsub = 0; + /* bug-691: Phi: */ + if(! sh_isoption(SH_COMSUB_BRACE_GREEDY)) count++; } break; @@ -1652,6 +1686,7 @@ static int comsub(Lex_t *lp, int endtok) rbrace: if(endtok==LBRACE && --count<=0) goto done; + if(count==1) lp->comsub = endtok==LBRACE; break; diff --git a/src/cmd/ksh93/sh/parse.c b/src/cmd/ksh93/sh/parse.c index a78992d53fcb..148756ed216b 100644 --- a/src/cmd/ksh93/sh/parse.c +++ b/src/cmd/ksh93/sh/parse.c @@ -492,6 +492,7 @@ Shnode_t *sh_dolparen(Lex_t* lp) t = sh_cmd(lp,RPAREN,SH_NL|SH_EMPTY); break; case LBRACE: + lp->lex.inbracecomsub=1; /* bug-691: Phi: */ t = sh_cmd(lp,RBRACE,SH_NL|SH_EMPTY); break; } diff --git a/src/cmd/ksh93/tests/shtests b/src/cmd/ksh93/tests/shtests index e8b38a3d7a3f..a7e8fe38ab86 100755 --- a/src/cmd/ksh93/tests/shtests +++ b/src/cmd/ksh93/tests/shtests @@ -376,6 +376,7 @@ do [[ $i == *.sh ]] || i+='.sh' glob.sh) grep -c '^[[:blank:]]*test_[a-z]\{3,\}' $i ;; leaks.sh) grep -c ^TEST $i ;; printf.sh) grep -c -E '([[:blank:]]err_exit|^[[:blank:]]*T)[[:blank:]]' $i ;; + greedy_comsub.sh) grep -c '^T ' $i ;; # bug-691: Phi: pty.sh) grep -c 'tst ' $i ;; *) grep -c err_exit $i ;; esac ) From 10f2f7f859dafc29e5d511e492bd2ef5e4f4a2b4 Mon Sep 17 00:00:00 2001 From: Philippe Benard Date: Thu, 5 Jun 2025 22:47:21 +0200 Subject: [PATCH 3/3] bug-691: Make comsub_brace_greedy available for shcomp as well. In lex.c, for the shcomp context only add a set -o comsub_brace_greedy catch and set sh.option accordingly so shcomp can proceed. --- src/cmd/ksh93/sh/lex.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/cmd/ksh93/sh/lex.c b/src/cmd/ksh93/sh/lex.c index a99dba8f5efa..dc55cae98981 100644 --- a/src/cmd/ksh93/sh/lex.c +++ b/src/cmd/ksh93/sh/lex.c @@ -1304,6 +1304,42 @@ int sh_lex(Lex_t* lp) if(!(state=lp->lexd.first)) state = fcfirst(); n = fcseek(0)-(char*)state; + /* + * bug-691: Phi: + * In the perf path (ksh interpreter) we don't enter the following if() + * For shcomp we catch "set [+-]o comsub_brace_greedy" and set sh_option + * accordingly, allowing compile to proceed in greedy mode if needed. + */ + if(sh.shcomp && lp->lex.reservok && n==3 && + state[0]=='s' && state[1]=='e'&& state[2]=='t' ) + { + int o=0; + const char *p=state+3; + while(*p && *p!=';' && *p!='\n') + { + if(p[0]=='+' && p[1]=='o') + { + o=1; + } + if(p[0]=='-' && p[1]=='o') + { + o=2; + } + if( o && strncmp(p,"comsub_brace_greedy",19)==0) + { + if(o==1) + { + sh_offoption(SH_COMSUB_BRACE_GREEDY); + } + if(o==2) + { + sh_onoption(SH_COMSUB_BRACE_GREEDY); + } + break; + } + p++; + } + } if(!lp->arg) lp->arg = stkseek(sh.stk,ARGVAL); if(n>0)