Skip to content

Commit 625a545

Browse files
committed
main: using regex for choosing a parser for the given file name
This change extends --map-<LANG> option to support regular expression matching with the full file name. The original --map-<LANG> option supports the glob based matching and the extension comparison with the file basename. However, two methods are not enough if the file names are too generic. See #3287 . The regular expression passed to --map-<LANG> must be surrounded by % character like --map-RpmMacros='%(.*/)?macros\.d/macros\.([^/]+)$%' If you want to match in a case-insensitive way, append `i' after the second % like --map-RpmMacros='%(.*/)?macros\.d/macros\.([^/]+)$%i' If you want to use % as part of an expression, put \ before % for escaping. TODO: - [ ] update ctags.1 - [ ] add Tmain test cases - [ ] add pcre backend - [ ] update NEWS Signed-off-by: Masatake YAMATO <[email protected]>
1 parent 44da7ec commit 625a545

File tree

17 files changed

+503
-31
lines changed

17 files changed

+503
-31
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0

Tmain/list-map-rexprs.d/run.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Copyright: 2025 Masatake YAMATO
2+
# License: GPL-2
3+
4+
CTAGS=$1
5+
6+
$CTAGS --quiet --options=NONE \
7+
--langdef=Something \
8+
--map-Something='%\%ESCAPING\%%' \
9+
--map-Something=+'%ICASE%i' \
10+
--map-Something=+'%TEMP%' \
11+
--map-Something=-'%TEMP%' \
12+
--map-Something=+'%TEMPi%i' \
13+
--map-Something=-'%TEMPi%i' \
14+
--list-map-rexprs=all && \
15+
echo '## RpmMacros' && \
16+
$CTAGS --quiet --options=NONE --list-map-rexprs=RpmMacros

Tmain/list-map-rexprs.d/stderr-expected.txt

Whitespace-only changes.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#LANGUAGE EXPRESSION CASE
2+
RpmMacros (.*/)?macros\.d/macros\.([^/]+)$ sensitive
3+
Something %ESCAPING% sensitive
4+
Something ICASE insensitive
5+
## RpmMacros
6+
#EXPRESSION CASE
7+
(.*/)?macros\.d/macros\.([^/]+)$ sensitive

Tmain/versioning.d/stdout-expected.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ About TEST language
1616
enabled: yes
1717
version: 10.9
1818

19+
Mappings/rexprs
20+
-------------------------------------------------------
21+
22+
1923
Mappings/patterns
2024
-------------------------------------------------------
2125
MYTEST

main/options.c

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "interactive_p.h"
4040
#include "writer_p.h"
4141
#include "trace.h"
42+
#include "flags_p.h"
4243

4344
#ifdef HAVE_JANSSON
4445
#include <jansson.h>
@@ -59,6 +60,9 @@
5960
/* The following separators are permitted for list options.
6061
*/
6162
#define EXTENSION_SEPARATOR '.'
63+
#define REXPR_START '%'
64+
#define REXPR_STOP '%'
65+
#define REXPR_ICASE 'i'
6266
#define PATTERN_START '('
6367
#define PATTERN_STOP ')'
6468
#define IGNORE_SEPARATORS ", \t\n"
@@ -303,10 +307,10 @@ static optionDescription LongOptionDescription [] = {
303307
{1,0," --langmap=<map>[,<map>[...]]"},
304308
{1,0," Override default mapping of language to input file extension."},
305309
{1,0," e.g. --langmap=c:.c.x,java:+.j,make:([Mm]akefile).mak"},
306-
{1,0," --map-<LANG>=[+|-]<extension>|<pattern>"},
310+
{1,0," --map-<LANG>=[+|-]<extension>|<pattern>|<rexpr>"},
307311
{1,0," Set, add(+) or remove(-) the map for <LANG>."},
308-
{1,0," Unlike --langmap, this doesn't take a list; only one file name <pattern>"},
309-
{1,0," or one file <extension> can be specified at once."},
312+
{1,0," Unlike --langmap, this doesn't take a list; only one file name <pattern>,"},
313+
{1,0," one file name <extension>, or one file <rexpr> can be specified at once."},
310314
{1,0," Unlike --langmap the change with this option affects mapping of <LANG> only."},
311315
{1,0,""},
312316
{1,0,"Tags File Contents Options"},
@@ -436,6 +440,8 @@ static optionDescription LongOptionDescription [] = {
436440
{1,0," Output list of language extensions in mapping."},
437441
{1,0," --list-map-patterns[=(<language>|all)]"},
438442
{1,0," Output list of language patterns in mapping."},
443+
{1,0," --list-map-rexprs[=(<language>|all)]"},
444+
{1,0," Output list of language regular expressions in mapping."},
439445
{1,0," --list-maps[=(<language>|all)]"},
440446
{1,0," Output list of language mappings (both extensions and patterns)."},
441447
{1,0," --list-mline-regex-flags"},
@@ -1793,6 +1799,7 @@ static char* extractMapFromParameter (const langType language,
17931799
++parameter;
17941800
for (p = parameter ; *p != PATTERN_STOP && *p != '\0' ; ++p)
17951801
{
1802+
/* TODO: Can this handle a pattern including ')' ? */
17961803
if (*p == '\\' && *(p + 1) == PATTERN_STOP)
17971804
++p;
17981805
}
@@ -1808,9 +1815,46 @@ static char* extractMapFromParameter (const langType language,
18081815
return result;
18091816
}
18101817

1818+
if (first == REXPR_START)
1819+
{
1820+
*mapType = LMAP_REXPR;
1821+
1822+
++parameter;
1823+
const char* parameter_orig = parameter;
1824+
vString *rexpr = vStringNew ();
1825+
for (p = parameter ; *p != REXPR_STOP && *p != '\0' ; ++p)
1826+
{
1827+
if (*p == '\\' && *(p + 1) == REXPR_STOP)
1828+
p++;
1829+
vStringPut (rexpr, *p);
1830+
}
1831+
if (*p == '\0')
1832+
error (FATAL, "Unterminated file name regular expression for %s language: %s",
1833+
getLanguageName (language), parameter_orig);
1834+
1835+
*tail = p + 1;
1836+
return vStringDeleteUnwrap (rexpr);
1837+
}
1838+
18111839
return NULL;
18121840
}
18131841

1842+
static void langmap_rexpr_icase_short (char c CTAGS_ATTR_UNUSED, void* data)
1843+
{
1844+
bool *icase = data;
1845+
*icase = true;
1846+
}
1847+
1848+
static void langmap_rexpr_icase_long (const char* s CTAGS_ATTR_UNUSED, const char* const unused CTAGS_ATTR_UNUSED, void* data)
1849+
{
1850+
langmap_rexpr_icase_short ('i', data);
1851+
}
1852+
1853+
static flagDefinition langmapRexprFlagDef[] = {
1854+
{ 'i', "icase", langmap_rexpr_icase_short, langmap_rexpr_icase_long,
1855+
NULL, "applied in a case-insensitive manner"},
1856+
};
1857+
18141858
static char* addLanguageMap (const langType language, char* map_parameter,
18151859
bool exclusiveInAllLanguages)
18161860
{
@@ -1823,6 +1867,13 @@ static char* addLanguageMap (const langType language, char* map_parameter,
18231867
addLanguageExtensionMap (language, map, exclusiveInAllLanguages);
18241868
else if (map && map_type == LMAP_PATTERN)
18251869
addLanguagePatternMap (language, map, exclusiveInAllLanguages);
1870+
else if (map && map_type == LMAP_REXPR)
1871+
{
1872+
bool icase = false;
1873+
1874+
flagsEval (p, langmapRexprFlagDef, ARRAY_SIZE(langmapRexprFlagDef), &icase);
1875+
addLanguageRexprMap (language, map, icase, exclusiveInAllLanguages);
1876+
}
18261877
else
18271878
error (FATAL, "Badly formed language map for %s language",
18281879
getLanguageName (language));
@@ -1843,6 +1894,13 @@ static char* removeLanguageMap (const langType language, char* map_parameter)
18431894
removeLanguageExtensionMap (language, map);
18441895
else if (map && map_type == LMAP_PATTERN)
18451896
removeLanguagePatternMap (language, map);
1897+
else if (map && map_type == LMAP_REXPR)
1898+
{
1899+
bool icase = (*p == REXPR_ICASE);
1900+
removeLanguageRexprMap (language, map, icase);
1901+
if (icase)
1902+
p++;
1903+
}
18461904
else
18471905
error (FATAL, "Badly formed language map for %s language",
18481906
getLanguageName (language));
@@ -2164,6 +2222,13 @@ static void processListMapPatternsOption (const char *const option,
21642222
processListMapsOptionForType (option, parameter, LMAP_PATTERN|LMAP_TABLE_OUTPUT);
21652223
}
21662224

2225+
static void processListMapRexprsOption (const char *const option,
2226+
const char *const parameter)
2227+
{
2228+
processListMapsOptionForType (option, parameter, LMAP_REXPR|LMAP_TABLE_OUTPUT);
2229+
}
2230+
2231+
21672232
static void processListMapsOption (
21682233
const char *const option CTAGS_ATTR_UNUSED,
21692234
const char *const parameter CTAGS_ATTR_UNUSED)
@@ -2327,6 +2392,13 @@ static void processDescribeLanguage(const char *const option,
23272392
getLanguageVersionCurrent (language),
23282393
getLanguageVersionAge (language));
23292394

2395+
puts("");
2396+
puts("Mappings/rexprs");
2397+
puts("-------------------------------------------------------");
2398+
printLanguageMaps (language, LMAP_REXPR|LMAP_NO_LANG_PREFIX,
2399+
localOption.withListHeader, localOption.machinable,
2400+
stdout);
2401+
23302402
puts("");
23312403
puts("Mappings/patterns");
23322404
puts("-------------------------------------------------------");
@@ -2999,6 +3071,7 @@ static parametricOption ParametricOptions [] = {
29993071
{ "list-maps", processListMapsOption, true, STAGE_ANY },
30003072
{ "list-map-extensions", processListMapExtensionsOption, true, STAGE_ANY },
30013073
{ "list-map-patterns", processListMapPatternsOption, true, STAGE_ANY },
3074+
{ "list-map-rexprs", processListMapRexprsOption, true, STAGE_ANY },
30023075
{ "list-mline-regex-flags", processListMultilineRegexFlagsOption, true, STAGE_ANY },
30033076
{ "list-output-formats", processListOutputFormatsOption, true, STAGE_ANY },
30043077
{ "list-params", processListParametersOption, true, STAGE_ANY },

0 commit comments

Comments
 (0)