2222#include " llvm/ADT/StringRef.h"
2323#include " llvm/BinaryFormat/Magic.h"
2424#include " llvm/BinaryFormat/Wasm.h"
25+ #include " llvm/Support/CheckedArithmetic.h"
2526#include " llvm/Support/Endian.h"
2627#include " llvm/Support/Format.h"
2728#include < optional>
@@ -50,7 +51,8 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
5051 return version == llvm::wasm::WasmVersion;
5152}
5253
53- static std::optional<ConstString>
54+ // FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor.
55+ static std::optional<std::string>
5456GetWasmString (llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
5557 // A Wasm string is encoded as a vector of UTF-8 codes.
5658 // Vectors are encoded with their u32 length followed by the element
@@ -61,7 +63,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
6163 return std::nullopt ;
6264 }
6365
64- if (len >= ( uint64_t ( 1 ) << 32 )) {
66+ if (len > std::numeric_limits< uint32_t >:: max ( )) {
6567 return std::nullopt ;
6668 }
6769
@@ -72,8 +74,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
7274 return std::nullopt ;
7375 }
7476
75- llvm::StringRef str = toStringRef (llvm::ArrayRef (str_storage));
76- return ConstString (str);
77+ return std::string (toStringRef (llvm::ArrayRef (str_storage)));
7778}
7879
7980char ObjectFileWasm::ID;
@@ -174,15 +175,15 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
174175 if (!c)
175176 return !llvm::errorToBool (c.takeError ());
176177
177- if (payload_len >= ( uint64_t ( 1 ) << 32 ))
178+ if (payload_len > std::numeric_limits< uint32_t >:: max ( ))
178179 return false ;
179180
180181 if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
181182 // Custom sections have the id 0. Their contents consist of a name
182183 // identifying the custom section, followed by an uninterpreted sequence
183184 // of bytes.
184185 lldb::offset_t prev_offset = c.tell ();
185- std::optional<ConstString > sect_name = GetWasmString (data, c);
186+ std::optional<std::string > sect_name = GetWasmString (data, c);
186187 if (!sect_name)
187188 return false ;
188189
@@ -191,7 +192,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
191192
192193 uint32_t section_length = payload_len - (c.tell () - prev_offset);
193194 m_sect_infos.push_back (section_info{*offset_ptr + c.tell (), section_length,
194- section_id, *sect_name});
195+ section_id, ConstString ( *sect_name) });
195196 *offset_ptr += (c.tell () + section_length);
196197 } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
197198 m_sect_infos.push_back (section_info{*offset_ptr + c.tell (),
@@ -248,12 +249,203 @@ bool ObjectFileWasm::ParseHeader() {
248249 return true ;
249250}
250251
251- void ObjectFileWasm::ParseSymtab (Symtab &symtab) {}
252+ static llvm::Expected<std::vector<AddressRange>>
253+ ParseFunctions (SectionSP code_section_sp) {
254+ DataExtractor data;
255+ code_section_sp->GetSectionData (data);
256+ lldb::offset_t offset = 0 ;
257+
258+ const uint64_t function_count = data.GetULEB128 (&offset);
259+ if (function_count > std::numeric_limits<uint32_t >::max ())
260+ return llvm::createStringError (" function count overflows uint32_t" );
261+
262+ std::vector<AddressRange> functions;
263+ functions.reserve (function_count);
264+
265+ for (uint32_t i = 0 ; i < function_count; ++i) {
266+ const uint64_t function_size = data.GetULEB128 (&offset);
267+ if (function_size > std::numeric_limits<uint32_t >::max ())
268+ return llvm::createStringError (" function size overflows uint32_t" );
269+ // llvm-objdump considers the ULEB with the function size to be part of the
270+ // function. We can't do that here because that would break symbolic
271+ // breakpoints, as that address is never executed.
272+ functions.emplace_back (code_section_sp, offset, function_size);
273+
274+ std::optional<lldb::offset_t > next_offset =
275+ llvm::checkedAddUnsigned (offset, function_size);
276+ if (!next_offset)
277+ return llvm::createStringError (" function offset overflows uint64_t" );
278+ offset = *next_offset;
279+ }
280+
281+ return functions;
282+ }
283+
284+ static llvm::Expected<std::vector<AddressRange>>
285+ ParseData (SectionSP data_section_sp) {
286+ DataExtractor data;
287+ data_section_sp->GetSectionData (data);
288+
289+ lldb::offset_t offset = 0 ;
290+
291+ const uint64_t segment_count = data.GetULEB128 (&offset);
292+ if (segment_count > std::numeric_limits<uint32_t >::max ())
293+ return llvm::createStringError (" segment count overflows uint32_t" );
294+
295+ std::vector<AddressRange> segments;
296+ segments.reserve (segment_count);
297+
298+ for (uint32_t i = 0 ; i < segment_count; ++i) {
299+ const uint64_t flags = data.GetULEB128 (&offset);
300+ if (flags > std::numeric_limits<uint32_t >::max ())
301+ return llvm::createStringError (" segment flags overflows uint32_t" );
302+
303+ const uint64_t segment_size = data.GetULEB128 (&offset);
304+ if (flags > std::numeric_limits<uint32_t >::max ())
305+ return llvm::createStringError (" segment size overflows uint32_t" );
306+
307+ segments.emplace_back (data_section_sp, offset, segment_size);
308+
309+ std::optional<lldb::offset_t > next_offset =
310+ llvm::checkedAddUnsigned (offset, segment_size);
311+ if (!next_offset)
312+ return llvm::createStringError (" segment offset overflows uint64_t" );
313+ offset = *next_offset;
314+ }
315+
316+ return segments;
317+ }
318+
319+ static llvm::Expected<std::vector<Symbol>>
320+ ParseNames (SectionSP name_section_sp,
321+ const std::vector<AddressRange> &function_ranges,
322+ const std::vector<AddressRange> &segment_ranges) {
323+ DataExtractor name_section_data;
324+ name_section_sp->GetSectionData (name_section_data);
325+
326+ llvm::DataExtractor data = name_section_data.GetAsLLVM ();
327+ llvm::DataExtractor::Cursor c (0 );
328+ std::vector<Symbol> symbols;
329+ while (c && c.tell () < data.size ()) {
330+ const uint8_t type = data.getU8 (c);
331+ const uint64_t size = data.getULEB128 (c);
332+ if (size > std::numeric_limits<uint32_t >::max ())
333+ return llvm::createStringError (" size overflows uint32_t" );
334+
335+ switch (type) {
336+ case llvm::wasm::WASM_NAMES_FUNCTION: {
337+ const uint64_t count = data.getULEB128 (c);
338+ if (count > std::numeric_limits<uint32_t >::max ())
339+ return llvm::createStringError (" function count overflows uint32_t" );
340+
341+ for (uint64_t i = 0 ; c && i < count; ++i) {
342+ const uint64_t idx = data.getULEB128 (c);
343+ const std::optional<std::string> name = GetWasmString (data, c);
344+ if (!name || idx >= function_ranges.size ())
345+ continue ;
346+ symbols.emplace_back (
347+ symbols.size (), Mangled (*name), lldb::eSymbolTypeCode,
348+ /* external=*/ false , /* is_debug=*/ false , /* is_trampoline=*/ false ,
349+ /* is_artificial=*/ false , function_ranges[idx],
350+ /* size_is_valid=*/ true , /* contains_linker_annotations=*/ false ,
351+ /* flags=*/ 0 );
352+ }
353+ } break ;
354+ case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
355+ const uint64_t count = data.getULEB128 (c);
356+ if (count > std::numeric_limits<uint32_t >::max ())
357+ return llvm::createStringError (" data count overflows uint32_t" );
358+ for (uint64_t i = 0 ; c && i < count; ++i) {
359+ const uint64_t idx = data.getULEB128 (c);
360+ const std::optional<std::string> name = GetWasmString (data, c);
361+ if (!name || idx >= segment_ranges.size ())
362+ continue ;
363+ symbols.emplace_back (
364+ symbols.size (), Mangled (*name), lldb::eSymbolTypeData,
365+ /* external=*/ false , /* is_debug=*/ false , /* is_trampoline=*/ false ,
366+ /* is_artificial=*/ false , segment_ranges[idx],
367+ /* size_is_valid=*/ true , /* contains_linker_annotations=*/ false ,
368+ /* flags=*/ 0 );
369+ }
370+
371+ } break ;
372+ case llvm::wasm::WASM_NAMES_GLOBAL:
373+ case llvm::wasm::WASM_NAMES_LOCAL:
374+ default :
375+ std::optional<uint64_t > offset = llvm::checkedAddUnsigned (c.tell (), size);
376+ if (!offset)
377+ return llvm::createStringError (" offset overflows uint64_t" );
378+ c.seek (*offset);
379+ }
380+ }
381+
382+ if (!c)
383+ return c.takeError ();
384+
385+ return symbols;
386+ }
387+
388+ void ObjectFileWasm::ParseSymtab (Symtab &symtab) {
389+ assert (m_sections_up && " sections must be parsed" );
390+ Log *log = GetLog (LLDBLog::Object);
391+
392+ // The name section contains names and indexes. First parse the data from the
393+ // relevant sections so we can access it by its index.
394+ std::vector<AddressRange> function_ranges;
395+ std::vector<AddressRange> segment_ranges;
396+
397+ // Parse the code section.
398+ if (SectionSP code_section_sp =
399+ m_sections_up->FindSectionByType (lldb::eSectionTypeCode, false )) {
400+ llvm::Expected<std::vector<AddressRange>> functions =
401+ ParseFunctions (code_section_sp);
402+ if (!functions) {
403+ LLDB_LOG_ERROR (log, functions.takeError (),
404+ " Failed to parse Wasm code section: {0}" );
405+ return ;
406+ }
407+ function_ranges = *functions;
408+ }
409+
410+ // Parse the data section.
411+ if (SectionSP data_section_sp =
412+ m_sections_up->FindSectionByType (lldb::eSectionTypeData, false )) {
413+ llvm::Expected<std::vector<AddressRange>> segments =
414+ ParseData (data_section_sp);
415+ if (!segments) {
416+ LLDB_LOG_ERROR (log, segments.takeError (),
417+ " Failed to parse Wasm data section: {0}" );
418+ return ;
419+ }
420+ segment_ranges = *segments;
421+ }
422+
423+ // Parse the name section.
424+ SectionSP name_section_sp =
425+ m_sections_up->FindSectionByType (lldb::eSectionTypeWasmName, false );
426+ if (!name_section_sp) {
427+ LLDB_LOG (log, " Failed to parse Wasm symbol table: no names section" );
428+ return ;
429+ }
430+
431+ llvm::Expected<std::vector<Symbol>> symbols =
432+ ParseNames (name_section_sp, function_ranges, segment_ranges);
433+ if (!symbols) {
434+ LLDB_LOG_ERROR (log, symbols.takeError (), " Failed to parse Wasm names: {0}" );
435+ return ;
436+ }
437+
438+ for (const Symbol &symbol : *symbols)
439+ symtab.AddSymbol (symbol);
440+
441+ symtab.Finalize ();
442+ }
252443
253444static SectionType GetSectionTypeFromName (llvm::StringRef Name) {
254- if (Name.consume_front (" .debug_" ) || Name.consume_front (" .zdebug_" )) {
445+ if (Name == " name" )
446+ return lldb::eSectionTypeWasmName;
447+ if (Name.consume_front (" .debug_" ) || Name.consume_front (" .zdebug_" ))
255448 return ObjectFile::GetDWARFSectionTypeFromName (Name);
256- }
257449 return eSectionTypeOther;
258450}
259451
@@ -283,6 +475,9 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
283475 // For this reason Section::GetFileAddress() must return zero for the
284476 // Code section.
285477 vm_addr = 0 ;
478+ } else if (llvm::wasm::WASM_SEC_DATA == sect_info.id ) {
479+ section_type = eSectionTypeData;
480+ section_name = ConstString (" data" );
286481 } else {
287482 section_type = GetSectionTypeFromName (sect_info.name .GetStringRef ());
288483 if (section_type == eSectionTypeOther)
@@ -397,9 +592,9 @@ std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
397592 ReadImageData (sect_info.offset , kBufferSize );
398593 llvm::DataExtractor data = section_header_data.GetAsLLVM ();
399594 llvm::DataExtractor::Cursor c (0 );
400- std::optional<ConstString > symbols_url = GetWasmString (data, c);
595+ std::optional<std::string > symbols_url = GetWasmString (data, c);
401596 if (symbols_url)
402- return FileSpec (symbols_url-> GetStringRef () );
597+ return FileSpec (* symbols_url);
403598 }
404599 }
405600 return std::nullopt ;
0 commit comments