Skip to content

Commit 1aceea3

Browse files
committed
Refine codegen and ELF handling
1 parent 197ac23 commit 1aceea3

File tree

5 files changed

+99
-67
lines changed

5 files changed

+99
-67
lines changed

src/arm-codegen.c

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,13 @@ void cfg_flatten(void)
138138
func_t *func;
139139

140140
if (dynlink)
141-
elf_offset = 112; /* offset of start + branch + exit in codegen */
141+
elf_offset =
142+
88; /* offset of dynamic linking setup + global init call */
142143
else {
143144
func = find_func("__syscall");
144145
func->bbs->elf_offset = 48; /* offset of start + exit in codegen */
145-
elf_offset = 84; /* offset of start + branch + exit + syscall in codegen */
146+
elf_offset =
147+
84; /* offset of start + branch + exit + syscall in codegen */
146148
}
147149

148150
GLOBAL_FUNC->bbs->elf_offset = elf_offset;
@@ -154,7 +156,7 @@ void cfg_flatten(void)
154156

155157
/* prepare 'argc' and 'argv', then proceed to 'main' function */
156158
if (dynlink)
157-
elf_offset += 20;
159+
elf_offset += 20; /* 5 insns: restore r0/r1 from r9/r10, bl to main */
158160
else
159161
elf_offset += 32; /* 6 insns for main call + 2 for exit */
160162

@@ -481,43 +483,56 @@ void code_generate(void)
481483
emit(__push_reg(__AL, __r0));
482484
emit(__mov_i(__AL, __r12, 0));
483485
emit(__push_reg(__AL, __r12));
484-
emit(__movw(__AL, __r0, elf_code_start + 56));
485-
emit(__movt(__AL, __r0, elf_code_start + 56));
486+
/* Pass the address of our main wrapper function;
487+
* After these two mov movw/movt, we have:
488+
* - mov r3, #0
489+
* - bl to __libc_start_main@plt
490+
* - mov r0, #127
491+
* - bl +28
492+
* - (main wrapper starts here)
493+
*
494+
* Total offset = current + 8 + 16 = current + 24
495+
*
496+
* That is, the current code size + 24 is the starting address
497+
* of main wrapper.
498+
* */
499+
int main_wrapper_offset = elf_code->size + 24;
500+
emit(__movw(__AL, __r0, elf_code_start + main_wrapper_offset));
501+
emit(__movt(__AL, __r0, elf_code_start + main_wrapper_offset));
486502
emit(__mov_i(__AL, __r3, 0));
487503
emit(__bl(__AL, (elf_plt_start + PLT_FIXUP_SIZE) -
488504
(elf_code_start + elf_code->size)));
489505
/* Goto the 'exit' code snippet if __libc_start_main returns */
490506
emit(__mov_i(__AL, __r0, 127));
491507
emit(__bl(__AL, 28));
492508

493-
/* If the compiled program is dynamic linking, the starting
494-
* point of 'start' is located here.
495-
*
496-
* Preserve the 'argc' and 'argv' for the 'main' function.
509+
/* If the compiled program is dynamic linking, it needs to
510+
* preserve the 'argc' and 'argv' for the 'main' function.
497511
* */
498512
emit(__mov_r(__AL, __r9, __r0));
499513
emit(__mov_r(__AL, __r10, __r1));
500514
}
501-
/* If the compiled program is static linking, the starting point
502-
* of 'start' is here.
515+
/* For both static and dynamic linking, we need to set up the stack
516+
* and call the main function.
503517
* */
504518
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
505519
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
506520
emit(__sub_r(__AL, __sp, __sp, __r8));
507521
emit(__mov_r(__AL, __r12, __sp));
522+
/* Calculate the branch offset to the global initialization code */
508523
emit(__bl(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size));
509524
/* After global init, jump to main preparation */
510525
emit(__b(__AL, 56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */
511526

512-
/* exit */
513-
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
514-
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
515-
emit(__add_r(__AL, __sp, __sp, __r8));
516-
emit(__mov_r(__AL, __r0, __r0));
517-
emit(__mov_i(__AL, __r7, 1));
518-
emit(__svc());
519-
520527
if (!dynlink) {
528+
/* exit - only for statck linking */
529+
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
530+
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
531+
emit(__add_r(__AL, __sp, __sp, __r8));
532+
emit(__mov_r(__AL, __r0, __r0));
533+
emit(__mov_i(__AL, __r7, 1));
534+
emit(__svc());
535+
521536
/* syscall */
522537
emit(__mov_r(__AL, __r7, __r0));
523538
emit(__mov_r(__AL, __r0, __r1));

src/elf.c

Lines changed: 51 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -193,28 +193,29 @@ void elf_generate_program_headers(void)
193193
* 54 | | |
194194
*/
195195
/* program header - code and data combined */
196-
phdr.p_type = 1; /* PT_LOAD */
197-
phdr.p_offset = elf_header_len; /* offset of segment */
198-
phdr.p_vaddr = elf_code_start; /* virtual address */
199-
phdr.p_paddr = elf_code_start; /* physical address */
196+
phdr.p_type = 1; /* PT_LOAD */
197+
phdr.p_offset = elf_header_len; /* offset of segment */
198+
phdr.p_vaddr = elf_code_start; /* virtual address */
199+
phdr.p_paddr = elf_code_start; /* physical address */
200+
/* Don't include interp in the first LOAD segment for dynlink */
200201
phdr.p_filesz = elf_code->size + elf_data->size; /* size in file */
201202
phdr.p_memsz = elf_code->size + elf_data->size; /* size in memory */
202203
phdr.p_flags = 7; /* flags */
203204
phdr.p_align = 4; /* alignment */
204205
elf_write_blk(elf_program_header, &phdr, sizeof(elf32_phdr_t));
205206
if (dynlink) {
206-
/* program header - .rel.plt .plt .got .dynstr .dynsym and .dynamic
207-
* sections combined */
207+
/* program header - interp + dynamic sections combined in second LOAD */
208208
phdr.p_type = 1; /* PT_LOAD */
209-
phdr.p_offset = elf_header_len + elf_code->size + elf_data->size +
210-
elf_interp->size; /* offset of segment */
211-
phdr.p_vaddr = elf_relplt_start; /* virtual address */
212-
phdr.p_paddr = elf_relplt_start; /* physical address */
213-
phdr.p_filesz = elf_relplt->size + elf_plt->size + elf_got->size +
214-
elf_dynstr->size + elf_dynsym->size +
209+
phdr.p_offset = elf_header_len + elf_code->size +
210+
elf_data->size; /* offset of segment */
211+
/* Virtual address must map correctly: VA = ELF_START + file_offset */
212+
phdr.p_vaddr = ELF_START + phdr.p_offset; /* virtual address */
213+
phdr.p_paddr = ELF_START + phdr.p_offset; /* physical address */
214+
phdr.p_filesz = elf_interp->size + elf_relplt->size + elf_plt->size +
215+
elf_got->size + elf_dynstr->size + elf_dynsym->size +
215216
elf_dynamic->size; /* size in file */
216-
phdr.p_memsz = elf_relplt->size + elf_plt->size + elf_got->size +
217-
elf_dynstr->size + elf_dynsym->size +
217+
phdr.p_memsz = elf_interp->size + elf_relplt->size + elf_plt->size +
218+
elf_got->size + elf_dynstr->size + elf_dynsym->size +
218219
elf_dynamic->size; /* size in memory */
219220
phdr.p_flags = 7; /* flags */
220221
phdr.p_align = 4; /* alignment */
@@ -223,29 +224,28 @@ void elf_generate_program_headers(void)
223224
/* program header - program interpreter (.interp section) */
224225
phdr.p_type = 3; /* PT_INTERP */
225226
phdr.p_offset = elf_header_len + elf_code->size +
226-
elf_data->size; /* offset of segment */
227-
phdr.p_vaddr = elf_data_start + elf_data->size; /* virtual address */
228-
phdr.p_paddr = elf_data_start + elf_data->size; /* physical address */
229-
phdr.p_filesz = strlen(DYN_LINKER) + 1; /* size in file */
230-
phdr.p_memsz = strlen(DYN_LINKER) + 1; /* size in memory */
231-
phdr.p_flags = 4; /* flags */
232-
phdr.p_align = 1; /* alignment */
227+
elf_data->size; /* offset of segment */
228+
/* Virtual address must map correctly: VA = ELF_START + file_offset */
229+
phdr.p_vaddr = ELF_START + phdr.p_offset; /* virtual address */
230+
phdr.p_paddr = ELF_START + phdr.p_offset; /* physical address */
231+
phdr.p_filesz = elf_interp->size; /* size in file */
232+
phdr.p_memsz = elf_interp->size; /* size in memory */
233+
phdr.p_flags = 4; /* flags */
234+
phdr.p_align = 1; /* alignment */
233235
elf_write_blk(elf_program_header, &phdr, sizeof(elf32_phdr_t));
234236

235237
/* program header - .dynamic section */
236238
phdr.p_type = 2; /* PT_DYNAMIC */
237239
phdr.p_offset = elf_header_len + elf_code->size + elf_data->size +
238240
elf_interp->size + elf_relplt->size + elf_plt->size +
239241
elf_got->size + elf_dynstr->size +
240-
elf_dynsym->size; /* offset of segment */
241-
phdr.p_vaddr = elf_got_start + elf_got->size + elf_dynstr->size +
242-
elf_dynsym->size; /* virtual address */
243-
phdr.p_paddr = elf_got_start + elf_got->size + elf_dynstr->size +
244-
elf_dynsym->size; /* physical address */
245-
phdr.p_filesz = elf_dynamic->size; /* size in file */
246-
phdr.p_memsz = elf_dynamic->size; /* size in memory */
247-
phdr.p_flags = 6; /* flags */
248-
phdr.p_align = 4; /* alignment */
242+
elf_dynsym->size; /* offset of segment */
243+
phdr.p_vaddr = ELF_START + phdr.p_offset; /* virtual address */
244+
phdr.p_paddr = ELF_START + phdr.p_offset; /* physical address */
245+
phdr.p_filesz = elf_dynamic->size; /* size in file */
246+
phdr.p_memsz = elf_dynamic->size; /* size in memory */
247+
phdr.p_flags = 6; /* flags */
248+
phdr.p_align = 4; /* alignment */
249249
elf_write_blk(elf_program_header, &phdr, sizeof(elf32_phdr_t));
250250
}
251251
}
@@ -332,7 +332,7 @@ void elf_generate_section_headers(void)
332332
shdr.sh_name = sh_name;
333333
shdr.sh_type = 1;
334334
shdr.sh_flags = 0x2;
335-
shdr.sh_addr = elf_data_start + elf_data->size;
335+
shdr.sh_addr = ELF_START + ofs; /* Use consistent VA calculation */
336336
shdr.sh_offset = ofs;
337337
shdr.sh_size = strlen(DYN_LINKER) + 1;
338338
shdr.sh_link = 0;
@@ -345,9 +345,9 @@ void elf_generate_section_headers(void)
345345

346346
/* .rel.plt */
347347
shdr.sh_name = sh_name;
348-
shdr.sh_type = 9; /* SHT_REL */
349-
shdr.sh_flags = 0x42; /* 0x40 | SHF_ALLOC */
350-
shdr.sh_addr = elf_relplt_start;
348+
shdr.sh_type = 9; /* SHT_REL */
349+
shdr.sh_flags = 0x42; /* 0x40 | SHF_ALLOC */
350+
shdr.sh_addr = ELF_START + ofs; /* Use consistent VA calculation */
351351
shdr.sh_offset = ofs;
352352
shdr.sh_size = elf_relplt->size;
353353
shdr.sh_link = 8; /* The section header index of .dynsym. */
@@ -362,7 +362,7 @@ void elf_generate_section_headers(void)
362362
shdr.sh_name = sh_name;
363363
shdr.sh_type = 1;
364364
shdr.sh_flags = 0x6;
365-
shdr.sh_addr = elf_plt_start;
365+
shdr.sh_addr = ELF_START + ofs; /* Use consistent VA calculation */
366366
shdr.sh_offset = ofs;
367367
shdr.sh_size = elf_plt->size;
368368
shdr.sh_link = 0;
@@ -377,7 +377,7 @@ void elf_generate_section_headers(void)
377377
shdr.sh_name = sh_name;
378378
shdr.sh_type = 1;
379379
shdr.sh_flags = 0x3;
380-
shdr.sh_addr = elf_got_start;
380+
shdr.sh_addr = ELF_START + ofs; /* Use consistent VA calculation */
381381
shdr.sh_offset = ofs;
382382
shdr.sh_size = elf_got->size;
383383
shdr.sh_link = 0;
@@ -392,7 +392,7 @@ void elf_generate_section_headers(void)
392392
shdr.sh_name = sh_name;
393393
shdr.sh_type = 3;
394394
shdr.sh_flags = 0x2;
395-
shdr.sh_addr = elf_got_start + elf_got->size;
395+
shdr.sh_addr = ELF_START + ofs; /* Use consistent VA calculation */
396396
shdr.sh_offset = ofs;
397397
shdr.sh_size = elf_dynstr->size;
398398
shdr.sh_link = 0;
@@ -407,7 +407,7 @@ void elf_generate_section_headers(void)
407407
shdr.sh_name = sh_name;
408408
shdr.sh_type = 11;
409409
shdr.sh_flags = 0x2;
410-
shdr.sh_addr = elf_got_start + elf_got->size + elf_dynstr->size;
410+
shdr.sh_addr = ELF_START + ofs; /* Use consistent VA calculation */
411411
shdr.sh_offset = ofs;
412412
shdr.sh_size = elf_dynsym->size;
413413
shdr.sh_link = 7;
@@ -422,8 +422,7 @@ void elf_generate_section_headers(void)
422422
shdr.sh_name = sh_name;
423423
shdr.sh_type = 6;
424424
shdr.sh_flags = 0x3;
425-
shdr.sh_addr =
426-
elf_got_start + elf_got->size + elf_dynstr->size + elf_dynsym->size;
425+
shdr.sh_addr = ELF_START + ofs; /* Use consistent VA calculation */
427426
shdr.sh_offset = ofs;
428427
shdr.sh_size = elf_dynamic->size;
429428
shdr.sh_link = 7; /* The section header index of .dynstr. */
@@ -531,7 +530,14 @@ void elf_generate_sections(void)
531530
got_sz += PTR_SIZE;
532531

533532
/* Get the starting points of the sections. */
534-
elf_relplt_start = elf_data_start + elf_data->size + elf_interp->size;
533+
int code_size_estimate = elf_offset;
534+
int data_size_adjusted = elf_data->size;
535+
536+
/* Now calculate the virtual addresses */
537+
int file_offset_after_data =
538+
elf_header_len + code_size_estimate + data_size_adjusted;
539+
elf_interp_start = ELF_START + file_offset_after_data;
540+
elf_relplt_start = elf_interp_start + elf_interp->size;
535541
elf_plt_start = elf_relplt_start + relplt_sz;
536542
elf_got_start = elf_plt_start + plt_sz;
537543

@@ -701,8 +707,12 @@ void elf_preprocess(void)
701707
elf_header_len += (sizeof(elf32_phdr_t) * 3);
702708
elf_code_start = ELF_START + elf_header_len;
703709
elf_data_start = elf_code_start + elf_offset;
710+
/* Align elf_data BEFORE generate_sections so the size is correct */
704711
elf_align(elf_data);
712+
713+
/* Now generate sections with the correct aligned sizes */
705714
elf_generate_sections();
715+
706716
elf_align(elf_symtab);
707717
elf_align(elf_strtab);
708718
}

src/globals.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ strbuf_t *elf_got;
109109
int elf_header_len;
110110
int elf_code_start;
111111
int elf_data_start;
112+
int elf_interp_start;
112113
int elf_relplt_start;
113114
int elf_plt_start;
114115
int elf_got_start;

src/parser.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4719,13 +4719,17 @@ void parse_internal(void)
47194719
/* shecc run-time defines */
47204720
add_alias("__SHECC__", "1");
47214721

4722+
/* Linux syscall */
4723+
func_t *func = add_func("__syscall", true);
4724+
func->return_def.type = TY_int;
4725+
func->num_params = 0;
4726+
func->va_args = 1;
47224727
if (!dynlink) {
4723-
/* Linux syscall */
4724-
func_t *func = add_func("__syscall", true);
4725-
func->return_def.type = TY_int;
4726-
func->num_params = 0;
4727-
func->va_args = 1;
47284728
func->bbs = arena_alloc(BB_ARENA, sizeof(basic_block_t));
4729+
} else {
4730+
/* In dynlink mode. __syscall won't be implemented but needs to exist
4731+
* for parsing the built-in libc. It will be treated as external */
4732+
func->bbs = NULL;
47294733
}
47304734

47314735
/* lexer initialization */

src/reg-alloc.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,8 @@ void reg_alloc(void)
553553
strcpy(ir->func_name, insn->rs2->var_name);
554554
if (dynlink) {
555555
func_t *target_func = find_func(ir->func_name);
556-
target_func->is_used = true;
556+
if (target_func)
557+
target_func->is_used = true;
557558
}
558559
} else {
559560
/* FIXME: Avoid outdated content in register after
@@ -605,7 +606,8 @@ void reg_alloc(void)
605606
strcpy(ir->func_name, insn->str);
606607
if (dynlink) {
607608
func_t *target_func = find_func(ir->func_name);
608-
target_func->is_used = true;
609+
if (target_func)
610+
target_func->is_used = true;
609611
}
610612

611613
is_pushing_args = 0;

0 commit comments

Comments
 (0)