1
1
"""A reconstructor for HCL2 implemented using Lark's experimental reconstruction functionality"""
2
2
3
3
import re
4
- import json
5
4
from typing import List , Dict , Callable , Optional , Union , Any , Tuple
6
5
7
6
from lark import Lark , Tree
@@ -137,7 +136,7 @@ def _is_equals_sign(self, terminal) -> bool:
137
136
)
138
137
139
138
# pylint: disable=too-many-branches, too-many-return-statements
140
- def _should_add_space (self , rule , current_terminal ):
139
+ def _should_add_space (self , rule , current_terminal , is_block_label : bool = False ):
141
140
"""
142
141
This method documents the situations in which we add space around
143
142
certain tokens while reconstructing the generated HCL.
@@ -155,6 +154,7 @@ def _should_add_space(self, rule, current_terminal):
155
154
156
155
This should be sufficient to make a spacing decision.
157
156
"""
157
+
158
158
# we don't need to add multiple spaces
159
159
if self ._last_char_space :
160
160
return False
@@ -166,6 +166,14 @@ def _should_add_space(self, rule, current_terminal):
166
166
if self ._is_equals_sign (current_terminal ):
167
167
return True
168
168
169
+ if is_block_label and isinstance (rule , Token ) and rule .value == "string" :
170
+ if (
171
+ current_terminal == self ._last_terminal == Terminal ("DBLQUOTE" )
172
+ or current_terminal == Terminal ("DBLQUOTE" )
173
+ and self ._last_terminal == Terminal ("NAME" )
174
+ ):
175
+ return True
176
+
169
177
# if we're in a ternary or binary operator, add space around the operator
170
178
if (
171
179
isinstance (rule , Token )
@@ -235,7 +243,7 @@ def _should_add_space(self, rule, current_terminal):
235
243
return True
236
244
237
245
# always add space between string literals
238
- if current_terminal == Terminal ("STRING_LIT " ):
246
+ if current_terminal == Terminal ("STRING_CHARS " ):
239
247
return True
240
248
241
249
# if we just opened a block, add a space, unless the block is empty
@@ -257,7 +265,7 @@ def _should_add_space(self, rule, current_terminal):
257
265
# preceded by a space if they're following a comma in a tuple or
258
266
# function arg
259
267
if current_terminal in [
260
- Terminal ("STRING_LIT " ),
268
+ Terminal ("DBLQUOTE " ),
261
269
Terminal ("DECIMAL" ),
262
270
Terminal ("NAME" ),
263
271
Terminal ("NEGATIVE_DECIMAL" ),
@@ -267,13 +275,15 @@ def _should_add_space(self, rule, current_terminal):
267
275
# the catch-all case, we're not sure, so don't add a space
268
276
return False
269
277
270
- def _reconstruct (self , tree ):
278
+ def _reconstruct (self , tree , is_block_label = False ):
271
279
unreduced_tree = self .match_tree (tree , tree .data )
272
280
res = self .write_tokens .transform (unreduced_tree )
273
281
for item in res :
274
282
# any time we encounter a child tree, we recurse
275
283
if isinstance (item , Tree ):
276
- yield from self ._reconstruct (item )
284
+ yield from self ._reconstruct (
285
+ item , (unreduced_tree .data == "block" and item .data != "body" )
286
+ )
277
287
278
288
# every leaf should be a tuple, which contains information about
279
289
# which terminal the leaf represents
@@ -309,7 +319,7 @@ def _reconstruct(self, tree):
309
319
self ._deferred_item = None
310
320
311
321
# potentially add a space before the next token
312
- if self ._should_add_space (rule , terminal ):
322
+ if self ._should_add_space (rule , terminal , is_block_label ):
313
323
yield " "
314
324
self ._last_char_space = True
315
325
@@ -353,21 +363,21 @@ def _name_to_identifier(name: str) -> Tree:
353
363
354
364
@staticmethod
355
365
def _escape_interpolated_str (interp_s : str ) -> str :
356
- if interp_s .strip ().startswith (' <<-' ) or interp_s .strip ().startswith ('<<' ):
366
+ if interp_s .strip ().startswith (" <<-" ) or interp_s .strip ().startswith ("<<" ):
357
367
# For heredoc strings, preserve their format exactly
358
368
return reverse_quotes_within_interpolation (interp_s )
359
369
# Escape backslashes first (very important to do this first)
360
- escaped = interp_s .replace (' \\ ' , ' \\ \\ ' )
370
+ escaped = interp_s .replace (" \\ " , " \\ \\ " )
361
371
# Escape quotes
362
372
escaped = escaped .replace ('"' , '\\ "' )
363
373
# Escape control characters
364
- escaped = escaped .replace (' \n ' , ' \\ n' )
365
- escaped = escaped .replace (' \r ' , ' \\ r' )
366
- escaped = escaped .replace (' \t ' , ' \\ t' )
367
- escaped = escaped .replace (' \b ' , ' \\ b' )
368
- escaped = escaped .replace (' \f ' , ' \\ f' )
374
+ escaped = escaped .replace (" \n " , " \\ n" )
375
+ escaped = escaped .replace (" \r " , " \\ r" )
376
+ escaped = escaped .replace (" \t " , " \\ t" )
377
+ escaped = escaped .replace (" \b " , " \\ b" )
378
+ escaped = escaped .replace (" \f " , " \\ f" )
369
379
# find each interpolation within the string and remove the backslashes
370
- interp_s = reverse_quotes_within_interpolation (f' "{ escaped } "' )
380
+ interp_s = reverse_quotes_within_interpolation (f"{ escaped } " )
371
381
return interp_s
372
382
373
383
@staticmethod
@@ -420,6 +430,48 @@ def _newline(self, level: int, count: int = 1) -> Tree:
420
430
[Token ("NL_OR_COMMENT" , f"\n { ' ' * level } " ) for _ in range (count )],
421
431
)
422
432
433
+ def _build_string_rule (self , string : str , level : int = 0 ) -> Tree :
434
+ # grammar in hcl2.lark defines that a string is built of any number of string parts,
435
+ # each string part can be either interpolation expression, escaped interpolation string
436
+ # or regular string
437
+ # this method build hcl2 string rule based on arbitrary string,
438
+ # splitting such string into individual parts and building a lark tree out of them
439
+ #
440
+ result = []
441
+
442
+ pattern = re .compile (r"(\${1,2}\{(?:[^{}]|\{[^{}]*})*})" )
443
+ parts = re .split (pattern , string )
444
+ # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}']
445
+
446
+ if parts [- 1 ] == "" :
447
+ parts .pop ()
448
+ if len (parts ) > 0 and parts [0 ] == "" :
449
+ parts .pop (0 )
450
+
451
+ for part in parts :
452
+ if part .startswith ("$${" ) and part .endswith ("}" ):
453
+ result .append (Token ("ESCAPED_INTERPOLATION" , part ))
454
+
455
+ # unwrap interpolation expression and recurse into it
456
+ elif part .startswith ("${" ) and part .endswith ("}" ):
457
+ part = part [2 :- 1 ]
458
+ if part .startswith ('"' ) and part .endswith ('"' ):
459
+ part = part [1 :- 1 ]
460
+ part = self ._transform_value_to_expr_term (part , level )
461
+ else :
462
+ part = Tree (
463
+ Token ("RULE" , "expr_term" ),
464
+ [Tree (Token ("RULE" , "identifier" ), [Token ("NAME" , part )])],
465
+ )
466
+
467
+ result .append (Tree (Token ("RULE" , "interpolation" ), [part ]))
468
+
469
+ else :
470
+ result .append (Token ("STRING_CHARS" , part ))
471
+
472
+ result = [Tree (Token ("RULE" , "string_part" ), [element ]) for element in result ]
473
+ return Tree (Token ("RULE" , "string" ), result )
474
+
423
475
def _is_block (self , value : Any ) -> bool :
424
476
if isinstance (value , dict ):
425
477
block_body = value
@@ -485,8 +537,8 @@ def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree:
485
537
block_labels , block_body_dict = self ._calculate_block_labels (
486
538
block_v
487
539
)
488
- block_label_tokens = [
489
- Token ( "STRING_LIT" , f'" { block_label } "' )
540
+ block_label_trees = [
541
+ self . _build_string_rule ( block_label , level )
490
542
for block_label in block_labels
491
543
]
492
544
block_body = self ._transform_dict_to_body (
@@ -496,7 +548,7 @@ def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree:
496
548
# create our actual block to add to our own body
497
549
block = Tree (
498
550
Token ("RULE" , "block" ),
499
- [identifier_name ] + block_label_tokens + [block_body ],
551
+ [identifier_name ] + block_label_trees + [block_body ],
500
552
)
501
553
children .append (block )
502
554
# add empty line after block
@@ -675,10 +727,10 @@ def _transform_value_to_expr_term(self, value, level) -> Union[Token, Tree]:
675
727
parsed_value = attribute .children [2 ]
676
728
return parsed_value
677
729
678
- # otherwise it's just a string.
730
+ # otherwise it's a string
679
731
return Tree (
680
732
Token ("RULE" , "expr_term" ),
681
- [Token ( "STRING_LIT" , self ._escape_interpolated_str (value ))],
733
+ [self . _build_string_rule ( self ._escape_interpolated_str (value ), level )],
682
734
)
683
735
684
736
# otherwise, we don't know the type
0 commit comments