1+ use std:: collections:: HashMap ;
2+ use std:: iter:: Peekable ;
3+ use std:: str:: Chars ;
4+ use crate :: token:: { KeywordTable , Token , TokenKind } ;
5+
6+ /// Лексер, разбирающий исходный текст на токены
7+ pub struct Lexer {
8+ current_line : u16 ,
9+ }
10+
11+ impl Lexer {
12+ /// Создает новый экземпляр лексера. Отсчет номеров строк токенов начинается с 1.
13+ pub fn new ( ) -> Self { Lexer { current_line : 1 } }
14+
15+ /// Осуществляет лексинг переданного исходного текста. Возвращает вектор токенов, включающих в себя
16+ /// как простой текст, так и токены инструкций препроцессора
17+ pub fn lex ( & mut self , source : & str ) -> Vec < Token > {
18+ let mut chars = source. chars ( ) . peekable ( ) ;
19+ let mut tokens: Vec < Token > = Vec :: new ( ) ;
20+ let keywords_table = KeywordTable :: new ( ) ;
21+
22+ while let Some ( char) = chars. peek ( ) {
23+ match char {
24+ '#' => {
25+ chars. next ( ) ;
26+
27+ if match_char ( '!' , & mut chars) {
28+ let token = Token :: new ( TokenKind :: Shebang , "#!" . to_string ( ) , self
29+ . current_line , self . current_line ) ;
30+ tokens. push ( token) ;
31+ Lexer :: shebang_text ( self , & mut tokens, & mut chars) ;
32+ } else {
33+ let token = Token :: new ( TokenKind :: Hash , "#" . to_string ( ) , self
34+ . current_line , self . current_line ) ;
35+ tokens. push ( token) ;
36+ Lexer :: preprocessor_line ( self , & mut tokens, & mut chars, & keywords_table) ;
37+ }
38+ }
39+ _ => {
40+ Lexer :: text ( self , & mut tokens, & mut chars) ;
41+ }
42+ }
43+ }
44+
45+ tokens
46+ }
47+
48+ fn shebang_text ( & mut self , tokens : & mut Vec < Token > , chars : & mut Peekable < Chars > ) {
49+ let mut text_chars: Vec < char > = Vec :: new ( ) ;
50+ let start_line = self . current_line ;
51+ let end_line = self . current_line ;
52+
53+ while let Some ( char) = chars. peek ( ) {
54+ match char {
55+ '\n' => {
56+ break ;
57+ }
58+ _ => {
59+ text_chars. push ( * char) ;
60+ chars. next ( ) ;
61+ }
62+ }
63+ }
64+
65+ if !text_chars. is_empty ( ) {
66+ let token = Token :: new ( TokenKind :: Text , text_chars. iter ( ) . collect ( ) , start_line, end_line) ;
67+ tokens. push ( token) ;
68+ }
69+ }
70+
71+ fn preprocessor_line ( & mut self , tokens : & mut Vec < Token > , chars : & mut Peekable < Chars > , keywords : & KeywordTable ) {
72+ while let Some ( char) = chars. peek ( ) {
73+ match char {
74+ '\n' => {
75+ break ;
76+ }
77+ char if char. is_alphabetic ( ) || * char == '_' => {
78+ let token = Lexer :: identifier ( self , chars, & keywords. table ) ;
79+ tokens. push ( token) ;
80+ }
81+ '"' => {
82+ let mut token = Lexer :: string ( self , chars) ;
83+ token. token_kind = TokenKind :: Path ;
84+ tokens. push ( token) ;
85+ }
86+ _ => {
87+ chars. next ( ) ;
88+ }
89+ }
90+ }
91+ }
92+
93+ fn identifier ( & mut self , chars : & mut Peekable < Chars > , keywords : & HashMap < String , TokenKind > ) -> Token {
94+ let mut text_chars: Vec < char > = Vec :: new ( ) ;
95+
96+ while let Some ( char) = chars. peek ( ) {
97+ if char. is_alphabetic ( ) || * char == '_' {
98+ text_chars. push ( * char) ;
99+ chars. next ( ) ;
100+ } else {
101+ break ;
102+ }
103+ }
104+
105+ let identifier: String = String :: from_iter ( text_chars) ;
106+ if let Some ( token_kind) = keywords. get ( identifier. to_uppercase ( ) . as_str ( ) ) {
107+ return Token :: new ( * token_kind, identifier, self . current_line , self . current_line ) ;
108+ }
109+
110+ return Token :: new ( TokenKind :: Identifier , identifier, self . current_line , self . current_line ) ;
111+ }
112+
113+ fn text ( & mut self , tokens : & mut Vec < Token > , chars : & mut Peekable < Chars > ) {
114+ let mut text_chars: Vec < char > = Vec :: new ( ) ;
115+ let mut string_or_date = false ;
116+ let start_line = self . current_line ;
117+ let mut end_line = self . current_line ;
118+
119+ while let Some ( char) = chars. peek ( ) {
120+ match char {
121+ '#' => {
122+ if !string_or_date {
123+ break ;
124+ } else {
125+ text_chars. push ( * char) ;
126+ chars. next ( ) ;
127+ }
128+ }
129+ '"' | '\'' => {
130+ if string_or_date == false {
131+ string_or_date = true
132+ } else {
133+ string_or_date = false
134+ }
135+ text_chars. push ( * char) ;
136+ chars. next ( ) ;
137+ }
138+ '\n' => {
139+ self . current_line = self . current_line + 1 ;
140+ end_line = end_line + 1 ;
141+ text_chars. push ( * char) ;
142+ chars. next ( ) ;
143+ }
144+ _ => {
145+ text_chars. push ( * char) ;
146+ chars. next ( ) ;
147+ }
148+ }
149+ }
150+
151+ let token = Token :: new ( TokenKind :: Text , text_chars. into_iter ( ) . collect ( ) ,
152+ start_line, end_line) ;
153+ tokens. push ( token) ;
154+ }
155+
156+ fn string ( & mut self , chars : & mut Peekable < Chars > ) -> Token {
157+ let mut text_chars: Vec < char > = Vec :: new ( ) ;
158+ // add first quote symbol
159+ text_chars. push ( chars. next ( ) . unwrap ( ) ) ;
160+ let start_line = self . current_line ;
161+ let mut end_line = self . current_line ;
162+
163+ while let Some ( char) = chars. next ( ) {
164+ match char {
165+ char if char == '"' => {
166+ text_chars. push ( char) ;
167+ break ;
168+ }
169+ '\n' => {
170+ self . current_line = self . current_line + 1 ;
171+ end_line = end_line + 1 ;
172+ text_chars. push ( char) ;
173+ }
174+ _ => { text_chars. push ( char) }
175+ }
176+ }
177+
178+ Token :: new ( TokenKind :: Text , text_chars. into_iter ( ) . collect ( ) , start_line, end_line)
179+ }
180+ }
181+
182+ fn match_char ( expected : char , chars : & mut Peekable < Chars > ) -> bool {
183+ let next_char = chars. peek ( ) ;
184+
185+ if next_char. is_none ( ) {
186+ return false ;
187+ }
188+
189+ if next_char. is_some ( ) && * next_char. unwrap ( ) != expected {
190+ return false ;
191+ }
192+
193+ // Consume matched character
194+ chars. next ( ) ;
195+ true
196+ }
0 commit comments