Skip to content

Commit b339647

Browse files
committed
Store additional information in Tokens member TokenUserDataType
Also: CommonTree<ImplTraits>::toString() calls Token's method toString() insted of getText() Test a007 shows how to use Token's UserData to pass information from Tree node to Tree leaf Test a007 shows how to override Tokens toString() method
1 parent 8557bc6 commit b339647

File tree

8 files changed

+258
-4
lines changed

8 files changed

+258
-4
lines changed

runtime/Cpp/include/antlr3commontoken.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ class CommonToken : public ImplTraits::AllocPolicyType
8585
typedef typename ImplTraits::StringType StringType;
8686
typedef typename ImplTraits::InputStreamType InputStreamType;
8787
typedef typename ImplTraits::StreamDataType StreamDataType;
88+
typedef typename ImplTraits::TokenUserDataType UserDataType;
8889

8990
private:
9091
/** The actual type of this token
@@ -243,6 +244,7 @@ class CommonToken : public ImplTraits::AllocPolicyType
243244
*/
244245
StringType toString() const;
245246

247+
UserDataType UserData;
246248
};
247249

248250
}

runtime/Cpp/include/antlr3commontoken.inl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ CommonToken<ImplTraits>::CommonToken(TOKEN_TYPE type)
4545
template<class ImplTraits>
4646
CommonToken<ImplTraits>::CommonToken( const CommonToken& ctoken )
4747
:m_tokText( ctoken.m_tokText )
48+
,UserData(ctoken.UserData)
4849
{
4950
m_type = ctoken.m_type;
5051
m_channel = ctoken.m_channel;
@@ -60,6 +61,7 @@ CommonToken<ImplTraits>::CommonToken( const CommonToken& ctoken )
6061
template<class ImplTraits>
6162
CommonToken<ImplTraits>& CommonToken<ImplTraits>::operator=( const CommonToken& ctoken )
6263
{
64+
UserData = ctoken.UserData;
6365
m_type = ctoken.m_type;
6466
m_channel = ctoken.m_channel;
6567
m_lineStart = ctoken.m_lineStart;

runtime/Cpp/include/antlr3commontree.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ typename CommonTree<ImplTraits>::StringType CommonTree<ImplTraits>::toString()
441441
{
442442
if( this->isNilNode())
443443
return StringType("nil");
444-
return m_token->getText();
444+
return m_token->toString();
445445
}
446446

447447
template<class ImplTraits>

runtime/Cpp/include/antlr3traits.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class CustomTraitsBase
3434
typedef Empty LexStateType;
3535

3636
typedef Empty CommonTokenType;
37+
typedef Empty TokenUserDataType;
3738

3839
typedef Empty TokenIntStreamType;
3940
typedef Empty TokenStreamType;
@@ -60,7 +61,6 @@ class CustomTraitsBase
6061
typedef Empty TreeType;
6162
typedef Empty TreeAdaptorType;
6263
typedef Empty TreeStoreType;
63-
6464

6565
template<class StreamType>
6666
class ExceptionBaseType : public Empty
@@ -84,7 +84,7 @@ class CustomTraitsBase
8484
};
8585

8686
typedef Empty RuleReturnValueType;
87-
87+
8888
//If we want to change the way tokens are stored
8989
static const bool TOKENS_ACCESSED_FROM_OWNING_RULE = false;
9090
static const unsigned TOKEN_FILL_BUFFER_INCREMENT = 100; //used only if the above val is true
@@ -218,6 +218,10 @@ class Traits : public TraitsBase<UserTraits>
218218
typedef typename TraitsSelector< typename UserTraits<TraitsType>::CommonTokenType,
219219
CommonToken<TraitsType> >::selected CommonTokenType;
220220

221+
// TokenUserDataType
222+
typedef typename TraitsSelector< typename UserTraits<TraitsType>::TokenUserDataType,
223+
Empty >::selected TokenUserDataType;
224+
221225
// TokenListType
222226
typedef typename BaseTraitsType::AllocPolicyType::template ListType<const CommonTokenType*> TokenListType;
223227

@@ -272,7 +276,6 @@ class Traits : public TraitsBase<UserTraits>
272276
// TreeType
273277
typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeType,
274278
CommonTree<TraitsType> >::selected TreeType;
275-
276279
// TreeAdaptorType
277280
typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeAdaptorType,
278281
CommonTreeAdaptor<TraitsType> >::selected TreeAdaptorType;
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#ifndef _A007_TEST_TRAITS_H
2+
#define _A007_TEST_TRAITS_H
3+
4+
#include <antlr3.hpp>
5+
6+
#include <fstream>
7+
#include <iostream>
8+
#include <sstream>
9+
10+
// Forward declaration for Lexer&Parser class(es)
11+
namespace Antlr3Test {
12+
//code for overriding
13+
template<class ImplTraits>
14+
class UserTraits : public antlr3::CustomTraitsBase<ImplTraits>
15+
{
16+
public:
17+
struct A007TokenUserDataType
18+
{
19+
A007TokenUserDataType() : identifierClass(-1), usageType(-1) {};
20+
int identifierClass, usageType;
21+
};
22+
23+
class A007Token : public antlr3::CommonToken<ImplTraits>
24+
{
25+
typedef antlr3::CommonToken<ImplTraits> super;
26+
typedef typename antlr3::CommonToken<ImplTraits>::TOKEN_TYPE TOKEN_TYPE;
27+
typedef typename super::StringType StringType;
28+
public:
29+
// Override all possible constructors
30+
A007Token() : super() {};
31+
A007Token( ANTLR_UINT32 type) : super(type) {};
32+
A007Token( TOKEN_TYPE type) : super(type) {};
33+
A007Token( const A007Token& ctoken ) : super(ctoken) {};
34+
A007Token& operator=( const A007Token& other ) { super::operator=(other); return *this; };
35+
36+
// Override toString method
37+
StringType toString() const
38+
{
39+
StringType m_txt;
40+
m_txt = super::getText();
41+
if (super::UserData.identifierClass > 0)
42+
m_txt += "[" + std::to_string(super::UserData.identifierClass) + "]";
43+
return m_txt;
44+
}
45+
};
46+
47+
// Override default trait's types
48+
typedef A007Token CommonTokenType;
49+
typedef A007TokenUserDataType TokenUserDataType;
50+
};
51+
52+
class a007Lexer; class a007Parser;
53+
54+
// Instantiate the Traits class(will be used for Lexer/Parser template instantiations)
55+
typedef antlr3::Traits<a007Lexer, a007Parser, UserTraits> a007LexerTraits;
56+
typedef a007LexerTraits a007ParserTraits;
57+
};
58+
59+
#endif

runtime/Cpp/tests/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ a005: a005.cpp a005.tokens ATestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS)
5252
a006: a006.cpp a006.tokens ATestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS)
5353
$(CXX) $(CFLAGS) -DUSESTL $(INCLUDES) $< $(wildcard $@?*.cpp) -o $@
5454

55+
a007: a007.cpp a007.tokens A007TestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS)
56+
$(CXX) $(CFLAGS) -DUSESTL $(INCLUDES) $< $(wildcard $@?*.cpp) -o $@
57+
5558
s001: s001.cpp s001.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS)
5659
$(CXX) $(CFLAGS) $(INCLUDES) $< $(wildcard $@?*.cpp) utils.cpp -o $@
5760

runtime/Cpp/tests/a007.cpp

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#include "utils.hpp"
2+
#include "A007TestTraits.hpp"
3+
#include "a007Lexer.hpp"
4+
#include "a007Parser.hpp"
5+
6+
#include <iostream>
7+
#include <sstream>
8+
#include <fstream>
9+
10+
using namespace Antlr3Test;
11+
using namespace std;
12+
13+
void test1(const char* input);
14+
void test2(const char* input);
15+
void test3(const char* input);
16+
17+
int
18+
main (int argc, char *argv[])
19+
{
20+
test1("ABC");
21+
test1("(ABC)");
22+
test1("ABC AS 5");
23+
test1("ABC AS(1,2,3,4,5,6)");
24+
test1("(ABC,ABD,ABE,ABF)AS(1,2,3,4,5,6)");
25+
26+
test2("ABC");
27+
test2("(ABC)");
28+
test2("ABC AS 5");
29+
test2("ABC AS(1,2,3,4,5,6)");
30+
test2("(ABC,ABD,ABE,ABF)AS(1,2,3,4,5,6)");
31+
32+
test3("SAMPLE (4)");
33+
test3("SAMPLE BLOCK(4,5)");
34+
35+
printf("finished parsing OK\n"); // Finnish parking is pretty good - I think it is all the snow
36+
37+
return 0;
38+
}
39+
40+
void test1(const char* input)
41+
{
42+
a007LexerTraits::InputStreamType* istream = new a007LexerTraits::InputStreamType((const ANTLR_UINT8 *)input
43+
, antlr3::ENC_8BIT
44+
, strlen(input)
45+
, (ANTLR_UINT8*)"test1");
46+
istream->setUcaseLA(true);
47+
48+
a007Lexer* lxr = new a007Lexer(istream);
49+
a007LexerTraits::TokenStreamType* tstream = new a007LexerTraits::TokenStreamType(ANTLR_SIZE_HINT, lxr->get_tokSource());
50+
a007Parser* psr = new a007Parser(tstream);
51+
{
52+
auto r1 = psr->test1();
53+
std::cout << r1.tree->toStringTree() << std::endl;
54+
}
55+
56+
delete psr;
57+
delete tstream;
58+
delete lxr;
59+
delete istream;
60+
}
61+
62+
void test2(const char* input)
63+
{
64+
a007LexerTraits::InputStreamType* istream = new a007LexerTraits::InputStreamType((const ANTLR_UINT8 *)input
65+
, antlr3::ENC_8BIT
66+
, strlen(input)
67+
, (ANTLR_UINT8*)"test2");
68+
istream->setUcaseLA(true);
69+
70+
a007Lexer* lxr = new a007Lexer(istream);
71+
a007LexerTraits::TokenStreamType* tstream = new a007LexerTraits::TokenStreamType(ANTLR_SIZE_HINT, lxr->get_tokSource());
72+
a007Parser* psr = new a007Parser(tstream);
73+
{
74+
auto r1 = psr->test2();
75+
std::cout << r1.tree->toStringTree() << std::endl;
76+
}
77+
78+
delete psr;
79+
delete tstream;
80+
delete lxr;
81+
delete istream;
82+
}
83+
84+
void test3(const char* input)
85+
{
86+
a007LexerTraits::InputStreamType* istream = new a007LexerTraits::InputStreamType((const ANTLR_UINT8 *)input
87+
, antlr3::ENC_8BIT
88+
, strlen(input)
89+
, (ANTLR_UINT8*)"test3");
90+
istream->setUcaseLA(true);
91+
92+
a007Lexer* lxr = new a007Lexer(istream);
93+
a007LexerTraits::TokenStreamType* tstream = new a007LexerTraits::TokenStreamType(ANTLR_SIZE_HINT, lxr->get_tokSource());
94+
a007Parser* psr = new a007Parser(tstream);
95+
{
96+
auto r1 = psr->test3();
97+
std::cout << r1.tree->toStringTree() << std::endl;
98+
}
99+
100+
delete psr;
101+
delete tstream;
102+
delete lxr;
103+
delete istream;
104+
}

runtime/Cpp/tests/a007.g

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
grammar a007;
2+
3+
options {
4+
language=Cpp;
5+
output=AST;
6+
}
7+
8+
tokens {
9+
T_A = 'token A';
10+
P_R = 'token pivot root';
11+
S_R = 'token sample root';
12+
E_R = 'token expression root';
13+
14+
T_ADHOC_ENUM_VAL = 'ad-hoc generated enum constant';
15+
}
16+
17+
@lexer::includes
18+
{
19+
#include "A007TestTraits.hpp"
20+
}
21+
@lexer::namespace
22+
{ Antlr3Test }
23+
24+
@parser::includes {
25+
#include "A007TestTraits.hpp"
26+
#include "a007Lexer.hpp"
27+
}
28+
@parser::namespace
29+
{ Antlr3Test }
30+
31+
test1 // see unpivot_in_elements
32+
: ( column_name[T_ADHOC_ENUM_VAL]
33+
| '(' column_name[0] (',' column_name[0])* ')'
34+
)
35+
( 'AS'
36+
( constant
37+
| ('(')=> '(' constant (',' constant)* ')'
38+
)
39+
)?
40+
-> column_name+ ^(P_R constant*)
41+
;
42+
43+
test2 // see unpivot_in_elements
44+
: ( column_name[T_ADHOC_ENUM_VAL]
45+
| '(' column_name[0] (',' column_name[0])* ')'
46+
)
47+
( 'AS'
48+
( constant
49+
| ('(')=> '(' constant (',' constant)* ')'
50+
)
51+
)?
52+
-> column_name+ ^(P_R constant)*
53+
;
54+
55+
56+
test3 //sample_clause
57+
: s='SAMPLE' 'BLOCK'?
58+
'(' c1=constant (',' c2=constant)? ')'
59+
-> ^(S_R[$s] 'BLOCK'? ^(E_R $c1) ^(E_R $c2)?)
60+
;
61+
62+
column_name[int identifierClass]
63+
: t=T_COLUMN_NAME
64+
{
65+
// We have to get over const correctness here:
66+
// const_cast<CommonTokenType*>($t)->set_type(ID);
67+
const_cast<CommonTokenType*>($t)->UserData.identifierClass = identifierClass;
68+
}
69+
;
70+
71+
constant
72+
: T_CONSTANT;
73+
74+
T_COLUMN_NAME
75+
: ('A'..'Z')+;
76+
77+
T_CONSTANT
78+
: ('0'..'9')+;
79+
80+
WS
81+
: ' '+ { $channel = HIDDEN; };

0 commit comments

Comments
 (0)