Skip to content

Commit 231a606

Browse files
committed
main: using regex for choosing a parser for the given file name
This change extends --map-<LANG> option to support regular expression matching with the full file name. The original --map-<LANG> option supports the glob based matching and the extension comparison with the file basename. However, two methods are not enough if the file names are too generic. See #3287 . The regular expression passed to --map-<LANG> must be surrounded by % character like --map-RpmMacros='%(.*/)?macros\.d/macros\.([^/]+)$%' If you want to match in a case-insensitive way, append `i' after the second % like --map-RpmMacros='%(.*/)?macros\.d/macros\.([^/]+)$%i' If you want to use % as part of an expression, put \ before % for escaping. TODO: - [ ] update ctags.1 - [ ] add Tmain test cases - [ ] add pcre backend - [ ] update NEWS Signed-off-by: Masatake YAMATO <[email protected]>
1 parent 44da7ec commit 231a606

File tree

17 files changed

+503
-31
lines changed

17 files changed

+503
-31
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0

Tmain/list-map-rexprs.d/run.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Copyright: 2025 Masatake YAMATO
2+
# License: GPL-2
3+
4+
CTAGS=$1
5+
6+
$CTAGS --quiet --options=NONE \
7+
--langdef=Something \
8+
--map-Something='%\%ESCAPING\%%' \
9+
--map-Something=+'%ICASE%i' \
10+
--map-Something=+'%TEMP%' \
11+
--map-Something=-'%TEMP%' \
12+
--map-Something=+'%TEMPi%i' \
13+
--map-Something=-'%TEMPi%i' \
14+
--list-map-rexprs=all && \
15+
echo '## RpmMacros' && \
16+
$CTAGS --quiet --options=NONE --list-map-rexprs=RpmMacros

Tmain/list-map-rexprs.d/stderr-expected.txt

Whitespace-only changes.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#LANGUAGE EXPRESSION CASE
2+
RpmMacros (.*/)?macros\.d/macros\.([^/]+)$ sensitive
3+
Something %ESCAPING% sensitive
4+
Something ICASE insensitive
5+
## RpmMacros
6+
#EXPRESSION CASE
7+
(.*/)?macros\.d/macros\.([^/]+)$ sensitive

Tmain/versioning.d/stdout-expected.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ About TEST language
1616
enabled: yes
1717
version: 10.9
1818

19+
Mappings/rexprs
20+
-------------------------------------------------------
21+
22+
1923
Mappings/patterns
2024
-------------------------------------------------------
2125
MYTEST

main/options.c

Lines changed: 74 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "interactive_p.h"
4040
#include "writer_p.h"
4141
#include "trace.h"
42+
#include "flags_p.h"
4243

4344
#ifdef HAVE_JANSSON
4445
#include <jansson.h>
@@ -59,6 +60,8 @@
5960
/* The following separators are permitted for list options.
6061
*/
6162
#define EXTENSION_SEPARATOR '.'
63+
#define REXPR_START '%'
64+
#define REXPR_STOP '%'
6265
#define PATTERN_START '('
6366
#define PATTERN_STOP ')'
6467
#define IGNORE_SEPARATORS ", \t\n"
@@ -303,10 +306,10 @@ static optionDescription LongOptionDescription [] = {
303306
{1,0," --langmap=<map>[,<map>[...]]"},
304307
{1,0," Override default mapping of language to input file extension."},
305308
{1,0," e.g. --langmap=c:.c.x,java:+.j,make:([Mm]akefile).mak"},
306-
{1,0," --map-<LANG>=[+|-]<extension>|<pattern>"},
309+
{1,0," --map-<LANG>=[+|-]<extension>|<pattern>|<rexpr>"},
307310
{1,0," Set, add(+) or remove(-) the map for <LANG>."},
308-
{1,0," Unlike --langmap, this doesn't take a list; only one file name <pattern>"},
309-
{1,0," or one file <extension> can be specified at once."},
311+
{1,0," Unlike --langmap, this doesn't take a list; only one file name <pattern>,"},
312+
{1,0," one file name <extension>, or one file <rexpr> can be specified at once."},
310313
{1,0," Unlike --langmap the change with this option affects mapping of <LANG> only."},
311314
{1,0,""},
312315
{1,0,"Tags File Contents Options"},
@@ -436,6 +439,8 @@ static optionDescription LongOptionDescription [] = {
436439
{1,0," Output list of language extensions in mapping."},
437440
{1,0," --list-map-patterns[=(<language>|all)]"},
438441
{1,0," Output list of language patterns in mapping."},
442+
{1,0," --list-map-rexprs[=(<language>|all)]"},
443+
{1,0," Output list of language regular expressions in mapping."},
439444
{1,0," --list-maps[=(<language>|all)]"},
440445
{1,0," Output list of language mappings (both extensions and patterns)."},
441446
{1,0," --list-mline-regex-flags"},
@@ -1793,6 +1798,7 @@ static char* extractMapFromParameter (const langType language,
17931798
++parameter;
17941799
for (p = parameter ; *p != PATTERN_STOP && *p != '\0' ; ++p)
17951800
{
1801+
/* TODO: Can this handle a pattern including ')' ? */
17961802
if (*p == '\\' && *(p + 1) == PATTERN_STOP)
17971803
++p;
17981804
}
@@ -1808,9 +1814,45 @@ static char* extractMapFromParameter (const langType language,
18081814
return result;
18091815
}
18101816

1817+
if (first == REXPR_START)
1818+
{
1819+
*mapType = LMAP_REXPR;
1820+
1821+
++parameter;
1822+
vString *rexpr = vStringNew ();
1823+
for (p = parameter ; *p != REXPR_STOP && *p != '\0' ; ++p)
1824+
{
1825+
if (*p == '\\' && *(p + 1) == REXPR_STOP)
1826+
p++;
1827+
vStringPut (rexpr, *p);
1828+
}
1829+
if (*p == '\0')
1830+
error (FATAL, "Unterminated file name regular expression for %s language: %s",
1831+
getLanguageName (language), parameter);
1832+
1833+
*tail = p + 1;
1834+
return vStringDeleteUnwrap (rexpr);
1835+
}
1836+
18111837
return NULL;
18121838
}
18131839

1840+
static void langmap_rexpr_icase_short (char c CTAGS_ATTR_UNUSED, void* data)
1841+
{
1842+
bool *icase = data;
1843+
*icase = true;
1844+
}
1845+
1846+
static void langmap_rexpr_icase_long (const char* s CTAGS_ATTR_UNUSED, const char* const unused CTAGS_ATTR_UNUSED, void* data)
1847+
{
1848+
langmap_rexpr_icase_short ('i', data);
1849+
}
1850+
1851+
static flagDefinition langmapRexprFlagDef[] = {
1852+
{ 'i', "icase", langmap_rexpr_icase_short, langmap_rexpr_icase_long,
1853+
NULL, "applied in a case-insensitive manner"},
1854+
};
1855+
18141856
static char* addLanguageMap (const langType language, char* map_parameter,
18151857
bool exclusiveInAllLanguages)
18161858
{
@@ -1823,6 +1865,13 @@ static char* addLanguageMap (const langType language, char* map_parameter,
18231865
addLanguageExtensionMap (language, map, exclusiveInAllLanguages);
18241866
else if (map && map_type == LMAP_PATTERN)
18251867
addLanguagePatternMap (language, map, exclusiveInAllLanguages);
1868+
else if (map && map_type == LMAP_REXPR)
1869+
{
1870+
bool icase = false;
1871+
1872+
flagsEval (p, langmapRexprFlagDef, ARRAY_SIZE(langmapRexprFlagDef), &icase);
1873+
addLanguageRexprMap (language, map, icase, exclusiveInAllLanguages);
1874+
}
18261875
else
18271876
error (FATAL, "Badly formed language map for %s language",
18281877
getLanguageName (language));
@@ -1843,6 +1892,13 @@ static char* removeLanguageMap (const langType language, char* map_parameter)
18431892
removeLanguageExtensionMap (language, map);
18441893
else if (map && map_type == LMAP_PATTERN)
18451894
removeLanguagePatternMap (language, map);
1895+
else if (map && map_type == LMAP_REXPR)
1896+
{
1897+
bool icase = false;
1898+
1899+
flagsEval (p, langmapRexprFlagDef, ARRAY_SIZE(langmapRexprFlagDef), &icase);
1900+
removeLanguageRexprMap (language, map, icase);
1901+
}
18461902
else
18471903
error (FATAL, "Badly formed language map for %s language",
18481904
getLanguageName (language));
@@ -2164,6 +2220,13 @@ static void processListMapPatternsOption (const char *const option,
21642220
processListMapsOptionForType (option, parameter, LMAP_PATTERN|LMAP_TABLE_OUTPUT);
21652221
}
21662222

2223+
static void processListMapRexprsOption (const char *const option,
2224+
const char *const parameter)
2225+
{
2226+
processListMapsOptionForType (option, parameter, LMAP_REXPR|LMAP_TABLE_OUTPUT);
2227+
}
2228+
2229+
21672230
static void processListMapsOption (
21682231
const char *const option CTAGS_ATTR_UNUSED,
21692232
const char *const parameter CTAGS_ATTR_UNUSED)
@@ -2327,6 +2390,13 @@ static void processDescribeLanguage(const char *const option,
23272390
getLanguageVersionCurrent (language),
23282391
getLanguageVersionAge (language));
23292392

2393+
puts("");
2394+
puts("Mappings/rexprs");
2395+
puts("-------------------------------------------------------");
2396+
printLanguageMaps (language, LMAP_REXPR|LMAP_NO_LANG_PREFIX,
2397+
localOption.withListHeader, localOption.machinable,
2398+
stdout);
2399+
23302400
puts("");
23312401
puts("Mappings/patterns");
23322402
puts("-------------------------------------------------------");
@@ -2999,6 +3069,7 @@ static parametricOption ParametricOptions [] = {
29993069
{ "list-maps", processListMapsOption, true, STAGE_ANY },
30003070
{ "list-map-extensions", processListMapExtensionsOption, true, STAGE_ANY },
30013071
{ "list-map-patterns", processListMapPatternsOption, true, STAGE_ANY },
3072+
{ "list-map-rexprs", processListMapRexprsOption, true, STAGE_ANY },
30023073
{ "list-mline-regex-flags", processListMultilineRegexFlagsOption, true, STAGE_ANY },
30033074
{ "list-output-formats", processListOutputFormatsOption, true, STAGE_ANY },
30043075
{ "list-params", processListParametersOption, true, STAGE_ANY },

0 commit comments

Comments
 (0)