Store additional information in Tokens member TokenUserDataType

ibre5041 · ibre5041 · commit b33964725f07 · 2015-09-17T21:14:02.000+02:00
Also:
	CommonTree&lt;ImplTraits&gt;::toString() calls Token's method toString() insted of getText()
	Test a007 shows how to use Token's UserData to pass information from Tree node to Tree leaf
	Test a007 shows how to override Tokens toString() method
diff --git a/runtime/Cpp/include/antlr3commontoken.hpp b/runtime/Cpp/include/antlr3commontoken.hpp
@@ -85,6 +85,7 @@ class CommonToken : public ImplTraits::AllocPolicyType
 	typedef typename ImplTraits::StringType StringType;
 	typedef typename ImplTraits::InputStreamType InputStreamType;
 	typedef typename ImplTraits::StreamDataType StreamDataType;
+	typedef typename ImplTraits::TokenUserDataType UserDataType;
 
 private:
     /** The actual type of this token
@@ -243,6 +244,7 @@ class CommonToken : public ImplTraits::AllocPolicyType
      */
     StringType  toString() const;
 
+	UserDataType UserData;	
 };
 
 }
diff --git a/runtime/Cpp/include/antlr3commontoken.inl b/runtime/Cpp/include/antlr3commontoken.inl
@@ -45,6 +45,7 @@ CommonToken<ImplTraits>::CommonToken(TOKEN_TYPE type)
 template<class ImplTraits>
 CommonToken<ImplTraits>::CommonToken( const CommonToken& ctoken )
 	:m_tokText( ctoken.m_tokText )
+	,UserData(ctoken.UserData)	 
 {
 	m_type = ctoken.m_type;
 	m_channel = ctoken.m_channel;
@@ -60,6 +61,7 @@ CommonToken<ImplTraits>::CommonToken( const CommonToken& ctoken )
 template<class ImplTraits>
 CommonToken<ImplTraits>& CommonToken<ImplTraits>::operator=( const CommonToken& ctoken )
 {
+	UserData = ctoken.UserData;
 	m_type = ctoken.m_type;
 	m_channel = ctoken.m_channel;
 	m_lineStart = ctoken.m_lineStart;
diff --git a/runtime/Cpp/include/antlr3commontree.inl b/runtime/Cpp/include/antlr3commontree.inl
@@ -441,7 +441,7 @@ typename CommonTree<ImplTraits>::StringType	CommonTree<ImplTraits>::toString()
 {
 	if( this->isNilNode())
 		return StringType("nil");
-	return	m_token->getText();
+	return	m_token->toString();
 }
 
 template<class ImplTraits>
diff --git a/runtime/Cpp/include/antlr3traits.hpp b/runtime/Cpp/include/antlr3traits.hpp
@@ -34,6 +34,7 @@ class CustomTraitsBase
 	typedef Empty LexStateType;
 
 	typedef Empty CommonTokenType;
+	typedef Empty TokenUserDataType;
 
 	typedef Empty TokenIntStreamType;
 	typedef Empty TokenStreamType;
@@ -60,7 +61,6 @@ class CustomTraitsBase
 	typedef Empty TreeType;
 	typedef Empty TreeAdaptorType;
 	typedef Empty TreeStoreType;
-  
 	
 	template<class StreamType>
 	class ExceptionBaseType : public Empty
@@ -84,7 +84,7 @@ class CustomTraitsBase
 	};
 
 	typedef Empty  RuleReturnValueType;
-	
+
 	//If we want to change the way tokens are stored
 	static const bool TOKENS_ACCESSED_FROM_OWNING_RULE = false;
 	static const unsigned TOKEN_FILL_BUFFER_INCREMENT = 100; //used only if the above val is true
@@ -218,6 +218,10 @@ class Traits : public TraitsBase<UserTraits>
 	typedef typename TraitsSelector< typename UserTraits<TraitsType>::CommonTokenType, 
 					 CommonToken<TraitsType> >::selected CommonTokenType;
 
+	// TokenUserDataType
+	typedef typename TraitsSelector< typename UserTraits<TraitsType>::TokenUserDataType,
+					 Empty >::selected TokenUserDataType;
+
 	// TokenListType
 	typedef typename BaseTraitsType::AllocPolicyType::template ListType<const CommonTokenType*> TokenListType;
 
@@ -272,7 +276,6 @@ class Traits : public TraitsBase<UserTraits>
 	// TreeType
 	typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeType, 
 					 CommonTree<TraitsType> >::selected TreeType;
-
 	// TreeAdaptorType
 	typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeAdaptorType, 
 					 CommonTreeAdaptor<TraitsType> >::selected TreeAdaptorType;
diff --git a/runtime/Cpp/tests/A007TestTraits.hpp b/runtime/Cpp/tests/A007TestTraits.hpp
@@ -0,0 +1,59 @@
+#ifndef	_A007_TEST_TRAITS_H
+#define	_A007_TEST_TRAITS_H
+
+#include <antlr3.hpp>
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+// Forward declaration for Lexer&Parser class(es)
+namespace Antlr3Test {
+	//code for overriding
+	template<class ImplTraits>
+	class UserTraits : public antlr3::CustomTraitsBase<ImplTraits>
+	{
+	public:
+		struct A007TokenUserDataType
+		{
+			A007TokenUserDataType() : identifierClass(-1), usageType(-1) {};
+			int identifierClass, usageType;
+		};
+
+		class A007Token : public antlr3::CommonToken<ImplTraits>
+		{
+			typedef antlr3::CommonToken<ImplTraits> super;
+			typedef typename antlr3::CommonToken<ImplTraits>::TOKEN_TYPE TOKEN_TYPE;
+			typedef typename super::StringType StringType;
+		public:
+			// Override all possible constructors
+			A007Token() : super() {};
+			A007Token( ANTLR_UINT32 type) : super(type) {};
+			A007Token( TOKEN_TYPE type) : super(type) {};
+			A007Token( const A007Token& ctoken ) : super(ctoken) {};
+			A007Token& operator=( const A007Token& other ) { super::operator=(other); return *this; };
+
+			// Override toString method
+			StringType toString() const
+			{
+				StringType m_txt;
+				m_txt = super::getText();
+				if (super::UserData.identifierClass > 0)
+					m_txt += "[" + std::to_string(super::UserData.identifierClass) + "]";
+				return m_txt;
+			}
+		};
+
+		// Override default trait's types
+		typedef A007Token CommonTokenType;
+		typedef A007TokenUserDataType TokenUserDataType;
+	};
+
+  	class a007Lexer;	class a007Parser;
+
+	// Instantiate the Traits class(will be used for Lexer/Parser template instantiations)
+	typedef antlr3::Traits<a007Lexer, a007Parser, UserTraits> a007LexerTraits;
+	typedef a007LexerTraits a007ParserTraits;
+};
+
+#endif
diff --git a/runtime/Cpp/tests/Makefile b/runtime/Cpp/tests/Makefile
@@ -52,6 +52,9 @@ a005:	a005.cpp a005.tokens ATestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS)
 a006:	a006.cpp a006.tokens ATestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS)
 	$(CXX) $(CFLAGS) -DUSESTL $(INCLUDES) $< $(wildcard $@?*.cpp) -o $@
 
+a007:	a007.cpp a007.tokens A007TestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS)
+	$(CXX) $(CFLAGS) -DUSESTL $(INCLUDES) $< $(wildcard $@?*.cpp) -o $@
+
 s001:	s001.cpp s001.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS)
 	$(CXX) $(CFLAGS) $(INCLUDES) $< $(wildcard $@?*.cpp) utils.cpp -o $@
 
diff --git a/runtime/Cpp/tests/a007.cpp b/runtime/Cpp/tests/a007.cpp
@@ -0,0 +1,104 @@
+#include "utils.hpp"
+#include "A007TestTraits.hpp"
+#include "a007Lexer.hpp"
+#include "a007Parser.hpp"
+
+#include <iostream>
+#include <sstream>
+#include <fstream>
+
+using namespace Antlr3Test;
+using namespace std;
+
+void test1(const char* input);
+void test2(const char* input);
+void test3(const char* input);
+
+int 
+main	(int argc, char *argv[])
+{
+	test1("ABC");
+ 	test1("(ABC)");
+	test1("ABC AS 5");
+	test1("ABC AS(1,2,3,4,5,6)");
+	test1("(ABC,ABD,ABE,ABF)AS(1,2,3,4,5,6)");
+
+	test2("ABC");
+	test2("(ABC)");
+	test2("ABC AS 5");
+	test2("ABC AS(1,2,3,4,5,6)");
+	test2("(ABC,ABD,ABE,ABF)AS(1,2,3,4,5,6)");
+
+	test3("SAMPLE (4)");
+	test3("SAMPLE BLOCK(4,5)");
+	
+	printf("finished parsing OK\n");	// Finnish parking is pretty good - I think it is all the snow
+
+	return 0;
+}
+
+void test1(const char* input)
+{
+	a007LexerTraits::InputStreamType* istream = new a007LexerTraits::InputStreamType((const ANTLR_UINT8 *)input
+											 , antlr3::ENC_8BIT
+											 , strlen(input)
+											 , (ANTLR_UINT8*)"test1");
+	istream->setUcaseLA(true);
+	
+	a007Lexer* lxr = new a007Lexer(istream);
+	a007LexerTraits::TokenStreamType* tstream = new a007LexerTraits::TokenStreamType(ANTLR_SIZE_HINT, lxr->get_tokSource());
+	a007Parser* psr = new a007Parser(tstream);	
+	{
+		auto r1 = psr->test1();
+		std::cout << r1.tree->toStringTree() << std::endl;
+	}
+	
+	delete psr;
+	delete tstream; 
+	delete lxr;
+	delete istream;
+}
+
+void test2(const char* input)
+{
+	a007LexerTraits::InputStreamType* istream = new a007LexerTraits::InputStreamType((const ANTLR_UINT8 *)input
+											 , antlr3::ENC_8BIT
+											 , strlen(input)
+											 , (ANTLR_UINT8*)"test2");
+	istream->setUcaseLA(true);
+	
+	a007Lexer* lxr = new a007Lexer(istream);
+	a007LexerTraits::TokenStreamType* tstream = new a007LexerTraits::TokenStreamType(ANTLR_SIZE_HINT, lxr->get_tokSource());
+	a007Parser* psr = new a007Parser(tstream);	
+	{
+		auto r1 = psr->test2();
+		std::cout << r1.tree->toStringTree() << std::endl;
+	}
+	
+	delete psr;
+	delete tstream; 
+	delete lxr;
+	delete istream;
+}
+
+void test3(const char* input)
+{
+	a007LexerTraits::InputStreamType* istream = new a007LexerTraits::InputStreamType((const ANTLR_UINT8 *)input
+											 , antlr3::ENC_8BIT
+											 , strlen(input)
+											 , (ANTLR_UINT8*)"test3");
+	istream->setUcaseLA(true);
+	
+	a007Lexer* lxr = new a007Lexer(istream);
+	a007LexerTraits::TokenStreamType* tstream = new a007LexerTraits::TokenStreamType(ANTLR_SIZE_HINT, lxr->get_tokSource());
+	a007Parser* psr = new a007Parser(tstream);	
+	{
+		auto r1 = psr->test3();
+		std::cout << r1.tree->toStringTree() << std::endl;
+	}
+	
+	delete psr;
+	delete tstream; 
+	delete lxr;
+	delete istream;
+}
diff --git a/runtime/Cpp/tests/a007.g b/runtime/Cpp/tests/a007.g
@@ -0,0 +1,81 @@
+grammar a007;
+
+options {
+	language=Cpp;
+	output=AST;
+}
+
+tokens {
+    T_A = 'token A';
+    P_R = 'token pivot root';
+    S_R = 'token sample root';
+    E_R = 'token expression root';
+
+    T_ADHOC_ENUM_VAL = 'ad-hoc generated enum constant';
+}
+
+@lexer::includes 
+{
+#include "A007TestTraits.hpp"
+}
+@lexer::namespace 
+{ Antlr3Test }
+
+@parser::includes {
+#include "A007TestTraits.hpp"
+#include "a007Lexer.hpp"
+}
+@parser::namespace 
+{ Antlr3Test }
+
+test1 // see unpivot_in_elements
+    :   (    column_name[T_ADHOC_ENUM_VAL]
+        |    '(' column_name[0] (',' column_name[0])* ')'
+        )
+        (     'AS'
+            (    constant
+            |    ('(')=> '(' constant (',' constant)* ')'
+            )
+        )?
+        -> column_name+ ^(P_R constant*)
+    ;
+
+test2 // see unpivot_in_elements
+    :   (    column_name[T_ADHOC_ENUM_VAL]
+        |    '(' column_name[0] (',' column_name[0])* ')'
+        )
+        (     'AS'
+            (    constant
+            |    ('(')=> '(' constant (',' constant)* ')'
+            )
+        )?
+        -> column_name+ ^(P_R constant)*
+    ;
+
+
+test3 //sample_clause
+    :	s='SAMPLE' 'BLOCK'? 
+        '(' c1=constant (',' c2=constant)? ')'
+        -> ^(S_R[$s] 'BLOCK'? ^(E_R $c1) ^(E_R $c2)?)
+    ;
+
+column_name[int identifierClass]
+	:	t=T_COLUMN_NAME
+        {
+            // We have to get over const correctness here:
+            // const_cast<CommonTokenType*>($t)->set_type(ID);
+            const_cast<CommonTokenType*>($t)->UserData.identifierClass = identifierClass;            
+        }
+    ;
+
+constant
+	:	T_CONSTANT;
+
+T_COLUMN_NAME
+	:	('A'..'Z')+;
+
+T_CONSTANT
+	:	('0'..'9')+;
+
+WS
+    : ' '+  { $channel = HIDDEN; };

Original file line number	Diff line number	Diff line change
`@@ -441,7 +441,7 @@ typename CommonTree<ImplTraits>::StringType CommonTree<ImplTraits>::toString()`
`441`	`441`	`{`
`442`	`442`	`if( this->isNilNode())`
`443`	`443`	`return StringType("nil");`
`444`		`- return m_token->getText();`
	`444`	`+ return m_token->toString();`
`445`	`445`	`}`
`446`	`446`
`447`	`447`	`template<class ImplTraits>`