@@ -19,7 +19,10 @@ use std::mem::MaybeUninit;
1919use std:: ptr:: NonNull ;
2020
2121pub use self :: bindings:: * ;
22- use ruby_prism_sys:: { pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t} ;
22+ use ruby_prism_sys:: {
23+ pm_buffer_free, pm_buffer_init, pm_buffer_length, pm_buffer_t, pm_buffer_value, pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_options_read, pm_options_t, pm_options_version_t,
24+ pm_parse, pm_parser_free, pm_parser_init, pm_parser_t, pm_serialize, pm_serialize_parse,
25+ } ;
2326
2427/// A range in the source file.
2528pub struct Location < ' pr > {
@@ -428,6 +431,8 @@ pub struct ParseResult<'pr> {
428431 source : & ' pr [ u8 ] ,
429432 parser : NonNull < pm_parser_t > ,
430433 node : NonNull < pm_node_t > ,
434+ options_string : Vec < u8 > ,
435+ options : NonNull < pm_options_t > ,
431436}
432437
433438impl < ' pr > ParseResult < ' pr > {
@@ -529,6 +534,16 @@ impl<'pr> ParseResult<'pr> {
529534 pub fn node ( & self ) -> Node < ' _ > {
530535 Node :: new ( self . parser , self . node . as_ptr ( ) )
531536 }
537+
538+ /// Returns the serialized representation of the parse result.
539+ #[ must_use]
540+ pub fn serialize ( & self ) -> Vec < u8 > {
541+ let mut buffer = Buffer :: default ( ) ;
542+ unsafe {
543+ pm_serialize ( self . parser . as_ptr ( ) , self . node . as_ptr ( ) , & mut buffer. buffer ) ;
544+ }
545+ buffer. value ( ) . into ( )
546+ }
532547}
533548
534549impl < ' pr > Drop for ParseResult < ' pr > {
@@ -537,10 +552,177 @@ impl<'pr> Drop for ParseResult<'pr> {
537552 pm_node_destroy ( self . parser . as_ptr ( ) , self . node . as_ptr ( ) ) ;
538553 pm_parser_free ( self . parser . as_ptr ( ) ) ;
539554 drop ( Box :: from_raw ( self . parser . as_ptr ( ) ) ) ;
555+
556+ _ = self . options ;
557+ _ = self . options_string ;
558+
559+ // pm_options_free(self.options.as_ptr());
560+ // drop(Box::from_raw(self.options.as_ptr()));
561+ }
562+ }
563+ }
564+
565+ /**
566+ * A scope of locals surrounding the code that is being parsed.
567+ */
568+ #[ derive( Debug , Default , Clone ) ]
569+ pub struct OptionsScope {
570+ /** Flags for the set of forwarding parameters in this scope. */
571+ pub forwarding_flags : u8 ,
572+ /** The names of the locals in the scope. */
573+ pub locals : Vec < String > ,
574+ }
575+
576+ /// The options that can be passed to the parser.
577+ #[ derive( Debug , Clone ) ]
578+ pub struct Options {
579+ /** The name of the file that is currently being parsed. */
580+ pub filepath : String ,
581+ /**
582+ * The line within the file that the parse starts on. This value is
583+ * 1-indexed.
584+ */
585+ pub line : i32 ,
586+ /**
587+ * The name of the encoding that the source file is in. Note that this must
588+ * correspond to a name that can be found with Encoding.find in Ruby.
589+ */
590+ pub encoding : String ,
591+ /**
592+ * Whether or not the frozen string literal option has been set.
593+ * May be:
594+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
595+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
596+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
597+ */
598+ pub frozen_string_literal : Option < bool > ,
599+ /** A bitset of the various options that were set on the command line. */
600+ pub command_line : u8 ,
601+ /**
602+ * The version of prism that we should be parsing with. This is used to
603+ * allow consumers to specify which behavior they want in case they need to
604+ * parse exactly as a specific version of CRuby.
605+ */
606+ pub version : pm_options_version_t ,
607+ /**
608+ * Whether or not the encoding magic comments should be respected. This is a
609+ * niche use-case where you want to parse a file with a specific encoding
610+ * but ignore any encoding magic comments at the top of the file.
611+ */
612+ pub encoding_locked : bool ,
613+ /**
614+ * When the file being parsed is the main script, the shebang will be
615+ * considered for command-line flags (or for implicit -x). The caller needs
616+ * to pass this information to the parser so that it can behave correctly.
617+ */
618+ pub main_script : bool ,
619+ /**
620+ * When the file being parsed is considered a "partial" script, jumps will
621+ * not be marked as errors if they are not contained within loops/blocks.
622+ * This is used in the case that you're parsing a script that you know will
623+ * be embedded inside another script later, but you do not have that context
624+ * yet. For example, when parsing an ERB template that will be evaluated
625+ * inside another script.
626+ */
627+ pub partial_script : bool ,
628+ /**
629+ * Whether or not the parser should freeze the nodes that it creates. This
630+ * makes it possible to have a deeply frozen AST that is safe to share
631+ * between concurrency primitives.
632+ */
633+ pub freeze : bool ,
634+ /**
635+ * The scopes surrounding the code that is being parsed. For most parses
636+ * this will be empty, but for evals it will be the locals that are in scope
637+ * surrounding the eval. Scopes are ordered from the outermost scope to the
638+ * innermost one.
639+ */
640+ pub scopes : Vec < OptionsScope > ,
641+ }
642+
643+ impl Default for Options {
644+ fn default ( ) -> Self {
645+ Self {
646+ filepath : String :: new ( ) ,
647+ line : 1 ,
648+ encoding : String :: new ( ) ,
649+ frozen_string_literal : None ,
650+ command_line : 0 ,
651+ version : pm_options_version_t:: PM_OPTIONS_VERSION_LATEST ,
652+ encoding_locked : false ,
653+ main_script : true ,
654+ partial_script : false ,
655+ freeze : false ,
656+ scopes : Vec :: new ( ) ,
540657 }
541658 }
542659}
543660
661+ impl Options {
662+ fn to_binary_string ( & self ) -> Vec < u8 > {
663+ let mut output = Vec :: new ( ) ;
664+
665+ output. extend ( ( self . filepath . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
666+ output. extend ( self . filepath . as_bytes ( ) ) ;
667+ output. extend ( self . line . to_ne_bytes ( ) ) ;
668+ output. extend ( ( self . encoding . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
669+ output. extend ( self . encoding . as_bytes ( ) ) ;
670+ output. extend ( self . frozen_string_literal . map_or_else ( || 0i8 , |frozen| if frozen { 1 } else { -1 } ) . to_ne_bytes ( ) ) ;
671+ output. push ( self . command_line ) ;
672+ output. extend ( ( self . version as u8 ) . to_ne_bytes ( ) ) ;
673+ output. push ( self . encoding_locked . into ( ) ) ;
674+ output. push ( self . main_script . into ( ) ) ;
675+ output. push ( self . partial_script . into ( ) ) ;
676+ output. push ( self . freeze . into ( ) ) ;
677+ output. extend ( ( self . scopes . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
678+ for scope in & self . scopes {
679+ output. extend ( ( scope. locals . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
680+ output. extend ( scope. forwarding_flags . to_ne_bytes ( ) ) ;
681+ for local in & scope. locals {
682+ output. extend ( ( local. len ( ) as u32 ) . to_ne_bytes ( ) ) ;
683+ output. extend ( local. as_bytes ( ) ) ;
684+ }
685+ }
686+ output
687+ }
688+ }
689+
690+ struct Buffer {
691+ buffer : pm_buffer_t ,
692+ }
693+
694+ impl Default for Buffer {
695+ fn default ( ) -> Self {
696+ let buffer = unsafe {
697+ let mut uninit = MaybeUninit :: < pm_buffer_t > :: uninit ( ) ;
698+ let initialized = pm_buffer_init ( uninit. as_mut_ptr ( ) ) ;
699+ assert ! ( initialized) ;
700+ uninit. assume_init ( )
701+ } ;
702+ Self { buffer }
703+ }
704+ }
705+
706+ impl Buffer {
707+ fn length ( & self ) -> usize {
708+ unsafe { pm_buffer_length ( & self . buffer ) }
709+ }
710+
711+ fn value ( & self ) -> & [ u8 ] {
712+ unsafe {
713+ let value = pm_buffer_value ( & self . buffer ) ;
714+ let value = value. cast :: < u8 > ( ) . cast_const ( ) ;
715+ std:: slice:: from_raw_parts ( value, self . length ( ) )
716+ }
717+ }
718+ }
719+
720+ impl Drop for Buffer {
721+ fn drop ( & mut self ) {
722+ unsafe { pm_buffer_free ( & mut self . buffer ) }
723+ }
724+ }
725+
544726/// Parses the given source string and returns a parse result.
545727///
546728/// # Panics
@@ -549,25 +731,56 @@ impl<'pr> Drop for ParseResult<'pr> {
549731///
550732#[ must_use]
551733pub fn parse ( source : & [ u8 ] ) -> ParseResult < ' _ > {
734+ parse_with_options ( source, & Options :: default ( ) )
735+ }
736+
737+ /// Parses the given source string and returns a parse result.
738+ ///
739+ /// # Panics
740+ ///
741+ /// Panics if the parser fails to initialize.
742+ ///
743+ #[ must_use]
744+ pub fn parse_with_options < ' pr > ( source : & ' pr [ u8 ] , options : & Options ) -> ParseResult < ' pr > {
745+ let options_string = options. to_binary_string ( ) ;
552746 unsafe {
553747 let uninit = Box :: new ( MaybeUninit :: < pm_parser_t > :: uninit ( ) ) ;
554748 let uninit = Box :: into_raw ( uninit) ;
555749
556- pm_parser_init ( ( * uninit) . as_mut_ptr ( ) , source. as_ptr ( ) , source. len ( ) , std:: ptr:: null ( ) ) ;
750+ let options = Box :: into_raw ( Box :: new ( MaybeUninit :: < pm_options_t > :: zeroed ( ) ) ) ;
751+ pm_options_read ( ( * options) . as_mut_ptr ( ) , options_string. as_ptr ( ) . cast ( ) ) ;
752+ let options = NonNull :: new ( ( * options) . assume_init_mut ( ) ) . unwrap ( ) ;
753+
754+ pm_parser_init ( ( * uninit) . as_mut_ptr ( ) , source. as_ptr ( ) , source. len ( ) , options. as_ptr ( ) ) ;
557755
558756 let parser = ( * uninit) . assume_init_mut ( ) ;
559757 let parser = NonNull :: new_unchecked ( parser) ;
560758
561759 let node = pm_parse ( parser. as_ptr ( ) ) ;
562760 let node = NonNull :: new_unchecked ( node) ;
563761
564- ParseResult { source, parser, node }
762+ ParseResult { source, parser, node, options_string, options }
763+ }
764+ }
765+
766+ /// Serializes the given source string and returns a parse result.
767+ ///
768+ /// # Panics
769+ ///
770+ /// Panics if the parser fails to initialize.
771+ #[ must_use]
772+ pub fn serialize_parse ( source : & [ u8 ] , options : & Options ) -> Vec < u8 > {
773+ let mut buffer = Buffer :: default ( ) ;
774+ let opts = options. to_binary_string ( ) ;
775+ unsafe {
776+ pm_serialize_parse ( & mut buffer. buffer , source. as_ptr ( ) , source. len ( ) , opts. as_ptr ( ) . cast ( ) ) ;
565777 }
778+ buffer. value ( ) . into ( )
566779}
567780
568781#[ cfg( test) ]
569782mod tests {
570- use super :: parse;
783+ use super :: { parse, parse_with_options , serialize_parse } ;
571784
572785 #[ test]
573786 fn comments_test ( ) {
@@ -1157,6 +1370,29 @@ end
11571370 assert ! ( ( value - 1.0 ) . abs( ) < f64 :: EPSILON ) ;
11581371 }
11591372
1373+ #[ test]
1374+ fn serialize_parse_test ( ) {
1375+ let source = r#"__FILE__"# ;
1376+ let mut options = crate :: Options :: default ( ) ;
1377+ options. filepath = "test.rb" . to_string ( ) ;
1378+ let bytes = serialize_parse ( source. as_ref ( ) , & options) ;
1379+
1380+ let result = parse_with_options ( source. as_bytes ( ) , & options) ;
1381+
1382+ assert_eq ! ( bytes, result. serialize( ) ) ;
1383+
1384+ let expected = r#"@ ProgramNode (location: (1,0)-(1,8))
1385+ +-- locals: []
1386+ +-- statements:
1387+ @ StatementsNode (location: (1,0)-(1,8))
1388+ +-- body: (length: 1)
1389+ +-- @ SourceFileNode (location: (1,0)-(1,8))
1390+ +-- StringFlags: nil
1391+ +-- filepath: "test.rb"
1392+ "# ;
1393+ assert_eq ! ( expected, result. node( ) . pretty_print( ) . as_str( ) )
1394+ }
1395+
11601396 #[ test]
11611397 fn node_field_lifetime_test ( ) {
11621398 // The code below wouldn't typecheck prior to https://github.com/ruby/prism/pull/2519,
0 commit comments