Files
openwrt_mitrastar/toolchain/musl/patches/100-tools-Rework-adding-of-CFI-annotations.patch
Felix Fietkau 0b1e1943d5 toolchain/musl: fix build regression on x86_64
Fix whitespace mangling which broke matching opcodes in the CFI patch

Signed-off-by: Felix Fietkau <nbd@nbd.name>
2025-05-06 14:29:09 +02:00

1569 lines
47 KiB
Diff
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
From: =?UTF-8?q?Ignacy=20Gaw=C4=99dzki?=
<ignacy.gawedzki@green-communications.fr>
Date: Thu, 20 Mar 2025 12:07:21 +0100
Subject: [PATCH] tools: Rework adding of CFI annotations.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Rework awk scripts used to add CFI annotations to i386 and x86_64
assembly, in order to properly maintain CFA offset across in-function
jumps.
Add arm and aarch64 versions of these scripts.
Signed-off-by: Ignacy Gawędzki <ignacy.gawedzki@green-communications.fr>
---
create mode 100644 tools/add-cfi.aarch64.awk
create mode 100644 tools/add-cfi.arm.awk
--- /dev/null
+++ b/tools/add-cfi.aarch64.awk
@@ -0,0 +1,287 @@
+# Insert GAS CFI directives ("control frame information") into AArch64 asm input.
+#
+# CFI directives tell the assembler how to generate "stack frame" debug info.
+# This information can tell a debugger (like gdb) how to find the current stack
+# frame at any point in the program code, and how to find the values which
+# various registers had at higher points in the call stack.
+# With this information, the debugger can show a backtrace, and you can move up
+# and down the call stack and examine the values of local variables.
+
+BEGIN {
+ # Don't put CFI data in the .eh_frame ELF section (which we don't keep).
+ print ".cfi_sections .debug_frame"
+
+ # Only emit CFI directives inside a function.
+ in_function = ""
+
+ # Emit .loc directives with line numbers from original source.
+ printf ".file 1 \"%s\"\n", ARGV[1]
+ line_number = 0
+
+ re_label = "([0-9+|[a-zA-Z_][a-zA-Z0-9_]*)"
+
+ # Build an associative array of canonical register names.
+ for (i = 0; i < 30; ++i)
+ regname["x" i] = regname["w" i] = "x" i
+ regname["x30"] = regname["w30"] = regname["lr"] = "x30"
+ regname["xzr"] = regname["wzr"] = "xzr"
+ regname["sp"] = regname["wsp"] = "sp"
+}
+
+{
+ ++line_number
+
+ # Clean the input up before doing anything else.
+ # Delete comments.
+ gsub(/^#.*|\/\/.*|\/\*.*\*\//, "")
+
+ # Canonicalize whitespace.
+ gsub(/[ \t]+/, " ") # Mawk doesn't understand \s.
+ gsub(/ *, */, ",")
+ gsub(/ *: */, ": ")
+ gsub(/ $/, "")
+ gsub(/^ /, "")
+}
+
+# Check for assembler directives which we care about.
+/^\.(section|data|text)/ {
+ # A .cfi_startproc/.cfi_endproc pair should be within the same section
+ # otherwise, clang will choke when generating ELF output.
+ if (in_function) {
+ print ".cfi_endproc"
+ in_function = ""
+ }
+}
+
+# Record each function name.
+/^\.type [a-zA-Z0-9_]+( STT_FUNCTION|,[#@%"]function)/ {
+ functions[substr($2, 1, length($2) - 10)] = 1
+}
+
+# Not interested in assembler directives beyond this, just pass them through.
+/^\./ {
+ print
+ next
+}
+
+# Helper to adjust CFA offset.
+function adjust_sp_offset(delta) {
+ if (in_function) {
+ printf ".cfi_adjust_cfa_offset %d\n", delta
+ cfa_offset[in_function] += delta
+ }
+}
+
+# Helper to invalidate unsaved register.
+function trashed(reg) {
+ if (in_function && !(reg in saved) && !(reg in dirty))
+ printf ".cfi_undefined %s\n", reg
+ dirty[reg] = 1
+}
+
+# Helper to process jumps to labels by saving the current CFA offset.
+function jump_to_label(label) {
+ if (in_function) {
+ if (match(label, /^[0-9]+f$/)) # "forward" label
+ cfa_offset[substr(label, 1, RLENGTH - 1)] = cfa_offset[in_function]
+ else if (match(label, /^[a-zA-Z_][a-zA-Z0-9_]*$/))
+ cfa_offset[label] = cfa_offset[in_function]
+ }
+}
+
+# Helper to set relative offset of registers pushed on the stack.
+function push_regs(regs, numregs, i) {
+ adjust_sp_offset(numregs * 4)
+ for (i = 1; i <= numregs; ++i) {
+ reg = regname[regs[i]]
+ if (!(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_rel_offset %s,%i\n", reg, ((i - 1) * 4)
+ saved[reg] = 1
+ }
+ }
+}
+
+# Helper to invalidate unsaved registers popped from the stack.
+function pop_regs(regs, numregs, i) {
+ adjust_sp_offset(numregs * -4)
+ for (i = 1; i <= numregs; ++i) {
+ reg = regname[regs[i]]
+ trashed(reg)
+ }
+}
+
+# Helper to save a single register saved in SP-relative locations.
+function save_reg(reg, offset) {
+ reg = regname[reg]
+ if (!(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_rel_offset %s,%d\n", reg, offset
+ saved[reg] = 1
+ }
+}
+
+# Process labels.
+$0 ~ "^" re_label ":" {
+ # Parse each leading label.
+ while (match($0, "^" re_label ":")) {
+
+ # Extract label name.
+ label = substr($1, 1, RLENGTH - 1)
+
+ # Remove label from current line.
+ sub("^" re_label ": ?", "")
+
+ if (label in functions) {
+ if (in_function) {
+ print ".cfi_endproc"
+ for (l in called)
+ delete called[l]
+ }
+
+ in_function = label
+ print ".cfi_startproc"
+
+ for (reg in saved)
+ delete saved[reg]
+ for (reg in dirty)
+ delete dirty[reg]
+ }
+
+ printf "%s:\n", label
+
+ # If this label has been jumped to, define the CFA offset to its
+ # value at the location of the jump.
+ if (!(label in functions) && in_function && label in cfa_offset) {
+ if (cfa_offset[in_function] != cfa_offset[label]) {
+ printf ".cfi_def_cfa_offset %d\n", cfa_offset[label]
+ cfa_offset[in_function] = cfa_offset[label]
+ }
+ delete cfa_offset[label]
+ }
+
+ # If this label has been called, possibly invalidate LR.
+ if (label in called && !(label in functions)) {
+ trashed("lr")
+ delete called[label]
+ }
+ }
+ # An instruction may follow on the same line, so continue processing.
+}
+
+# Skip empty line.
+/^$/ { next }
+
+# Issue source line number.
+{
+ printf ".loc 1 %d\n", line_number
+ print
+}
+
+# Process jumps to label (using B*).
+/^b[^xrl]/ {
+ jump_to_label($2)
+}
+
+# Process jumps to label (using [CT]BN?Z).
+/^[ct]bn?z / {
+ if (match($2, /,.+$/))
+ jump_to_label(substr($2, RSTART + 1, RLENGTH - 1))
+}
+
+# Issue relative offsets of registers stored in SP-relative locations.
+/^st(n?p|r[bh]?|l[lu]?r|tr|ur) .+,\[(sp|x30)[,\]]/ {
+ if (in_function) {
+ if (match($2, /(,#?[+-]?(0x[0-9a-fA-F]+|[0-9]+))?\]$/)) {
+ # Offset with no write-back.
+ if (RLENGTH == 1)
+ offset = 0
+ else
+ offset = parse_const(substr($2, RSTART + 2, RLENGTH - 3))
+ split($2, operands, ",")
+ if (match($1, /^stn?p$/)) {
+ if (match(operands[1], /^x/)) {
+ save_reg(operands[1], offset)
+ save_reg(operands[2], offset + 8)
+ }
+ } else if (match(operands[1], /x^/))
+ save_reg(operands[1], offset)
+ } else if (match($2, /,#?[+-]?(0x[0-9a-fA-F]+|[0-9]+)\]!$/)) {
+ # Pre-index with write-back.
+ offset = parse_const(substr($2, RSTART + 2, RLENGTH - 4))
+ adjust_sp_offset(-offset)
+ split($2, operands, ",")
+ if ($1 == "stp") {
+ if (match(operands[1], /^x/)) {
+ save_reg(operands[1], 0)
+ save_reg(operands[2], 8)
+ }
+ } else if (match(operands[1], /^x/))
+ save_reg(operands[1], 0)
+ } else if (match($2, /,#?[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/)) {
+ # Post-index
+ offset = parse_const(substr($2, RSTART + 2, RLENGTH - 2))
+ split($2, operands, ",")
+ if ($1 == "stp") {
+ if (match(operands[1], /^x/)) {
+ save_reg(operands[1], 0)
+ save_reg(operands[2], 8)
+ }
+ } else if (match(operands[1], /^x/))
+ save_reg(operands[1], 0)
+ adjust_sp_offset(-offset)
+ }
+ }
+}
+
+# Adjust CFA offset when decreasing SP.
+/subs?(\.[nw])? sp,sp,/ {
+ if (in_function && match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/))
+ adjust_sp_offset(parse_const(substr($2, RSTART + 2, RLENGTH - 2)))
+}
+
+# Adjust CFA offset when increasing SP.
+/adds?(\.[nw])? sp,sp,/ {
+ if (in_function && match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/))
+ adjust_sp_offset(-parse_const(substr($2, RSTART + 2, RLENGTH - 2)))
+}
+
+# Process calls to labels.
+/bl[a-z]* / {
+ if (match($2, /^[0-9]+f$/)) # "forward" label
+ called[substr($2, 1, RLENGTH - 1)] = 1
+ else if (match($2, /^[a-zA-Z_][0-9a-zA-Z_]*$/))
+ called[$2] = 1
+}
+
+# Invalidate unsaved registers being written to.
+/^(adcs?|adds?|adrp?|ands?|asrv?|bfc|bfi|bfm|bfxil|bics?|cin[cv]|cl[sz]|cneg|crc32[a-z]+|csel|csetm?|csin[cv]|csneg|eo[nr]|extr|ldap(r[bh]?|ur(s?[bhw]?))|ldar[bh]?|ldax[pr][bh]?|ldlar[bh]?|ldr((aa)?|s?[bhw])|ldtrs?[bhw]?|ldurs?[bhw]?|ldxr[bh]?|ls[lr]v?|madd|mneg|mov[knz]?|mrs|msub|mul|mvn|negs?|ngcs?|orn|orr|pac[a-z0-9]+|rbit|rev(16|32)?|rorv?|sbcs?|sbfiz|sbfm|sbfx|sdiv|smaddl|smnegl|smsubl|smul[hl]|subs?|sxt[bhw]|sysl|ubfiz|ubfm|ubfx|udiv|umaddl|umnegl|umsubl|umul[hl]|uxt[bhw]) ([xw]([0-9]|[12][0-9]|30)|sp),/ {
+ split($2, args, ",")
+ reg = args[1]
+ if (reg != "sp")
+ trashed(regname[reg])
+}
+
+# Invalidate unsaved registers being written to by atomic operations in memory.
+/^ld(add|clr|eor|set|[su](max|min))/ {
+ split($2, args, ",")
+ trashed(regname[args[2]])
+}
+
+# Invalidate unsaved registers being written to by pair loading.
+/^ld[nx]p(sw)? / {
+ split($2, args, ",")
+ trashed(regname[args[1]])
+ trashed(regname[args[2]])
+}
+
+# Invalidate unsaved registers being written to by long instructions.
+/^(smlals?|smlal(bb|bt|tb|tt)|smlaldx?|smlsldx?|smull|umaal|umlal|umulls?) / {
+ split($2, args, ",")
+ trashed(regname[args[1]])
+ trashed(regname[args[2]])
+}
+
+END {
+ # Issue end of function if still inside one.
+ if (in_function)
+ print ".cfi_endproc"
+}
--- /dev/null
+++ b/tools/add-cfi.arm.awk
@@ -0,0 +1,367 @@
+# Insert GAS CFI directives ("control frame information") into ARM asm input.
+#
+# CFI directives tell the assembler how to generate "stack frame" debug info.
+# This information can tell a debugger (like gdb) how to find the current stack
+# frame at any point in the program code, and how to find the values which
+# various registers had at higher points in the call stack.
+# With this information, the debugger can show a backtrace, and you can move up
+# and down the call stack and examine the values of local variables.
+
+BEGIN {
+ # Don't put CFI data in the .eh_frame ELF section (which we don't keep).
+ print ".cfi_sections .debug_frame"
+
+ # Only emit CFI directives inside a function.
+ in_function = ""
+
+ # Emit .loc directives with line numbers from original source.
+ printf ".file 1 \"%s\"\n", ARGV[1]
+ line_number = 0
+
+ re_label = "([0-9+|[a-zA-Z_][a-zA-Z0-9_]*)"
+
+ # Build an associative array of canonical register names.
+ for (i = 0; i < 10; ++i) {
+ regname["r" i] = "r" i
+ regnum["r" i] = i
+ }
+ regname["r10"] = regname["sl"] = "r10"
+ regnum["r10"] = regnum["sl"] = 10
+ regname["r11"] = regname["fp"] = "r11"
+ regnum["r11"] = regnum["fp"] = 11
+ regname["r12"] = regname["ip"] = "r12"
+ regnum["r12"] = regnum["ip"] = 12
+ regname["r13"] = regname["sp"] = "r13"
+ regnum["r13"] = regnum["sp"] = 13
+ regname["r14"] = regname["lr"] = "r14"
+ regnum["r14"] = regnum["lr"] = 14
+ regname["r15"] = regname["pc"] = "r15"
+ regnum["r15"] = regnum["pc"] = 15
+}
+
+{
+ ++line_number
+
+ # Clean the input up before doing anything else.
+ # Delete comments.
+ gsub(/(^#|@|\/\/).*|\/\*.*\*\//, "")
+
+ # Canonicalize whitespace.
+ gsub(/[ \t]+/, " ") # Mawk doesn't understand \s.
+ gsub(/ *, */, ",")
+ gsub(/ *: */, ": ")
+ gsub(/ $/, "")
+ gsub(/^ /, "")
+}
+
+# Check for assembler directives which we care about.
+/^\.(section|data|text)/ {
+ # A .cfi_startproc/.cfi_endproc pair should be within the same section
+ # otherwise, clang will choke when generating ELF output.
+ if (in_function) {
+ print ".cfi_endproc"
+ in_function = ""
+ }
+}
+
+# Record each function name.
+/^\.type [a-zA-Z0-9_]+( STT_FUNCTION|,[#@%"]function)/ {
+ functions[substr($2, 1, length($2) - 10)] = 1
+}
+
+# Not interested in assembler directives beyond this, just pass them through.
+/^\./ {
+ print
+ next
+}
+
+# Helper to adjust CFA offset.
+function adjust_sp_offset(delta) {
+ if (in_function) {
+ printf ".cfi_adjust_cfa_offset %d\n", delta
+ cfa_offset[in_function] += delta
+ }
+}
+
+# Helper to invalidate unsaved register.
+function trashed(reg) {
+ if (in_function && !(reg in saved) && !(reg in dirty))
+ printf ".cfi_undefined %s\n", reg
+ dirty[reg] = 1
+}
+
+# Helper to process jumps to labels by saving the current CFA offset.
+function jump_to_label(label) {
+ if (in_function) {
+ if (match(label, /^[0-9]+f$/)) # "forward" label
+ cfa_offset[substr(label, 1, RLENGTH - 1)] = cfa_offset[in_function]
+ else if (match(label, /^[a-zA-Z_][a-zA-Z0-9_]*$/))
+ cfa_offset[label] = cfa_offset[in_function]
+ }
+}
+
+# Helper to save a single register saved in SP-relative locations.
+function save_reg(reg, offset) {
+ reg = regname[reg]
+ if (!(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_rel_offset %s,%d\n", reg, offset
+ saved[reg] = 1
+ }
+}
+
+# Helper to save registers relative to SP.
+function save_regs(regs, numregs, i) {
+ for (i = 1; i <= numregs; ++i)
+ save_reg(regname[regs[i]], (i - 1) * -4)
+}
+
+# Helper to set relative offset of registers pushed on the stack.
+function push_regs(regs, numregs, i) {
+ adjust_sp_offset(numregs * 4)
+ for (i = 1; i <= numregs; ++i)
+ save_reg(regname[regs[i]], (i - 1) * 4)
+}
+
+# Helper to invalidate unsaved registers popped from the stack.
+function pop_regs(regs, numregs, i) {
+ adjust_sp_offset(numregs * -4)
+ for (i = 1; i <= numregs; ++i) {
+ reg = regname[regs[i]]
+ trashed(reg)
+ }
+}
+
+# Helper to parse register lists.
+function split_reglist(arg, regs, num, toks, tmp, dash, i, j) {
+ while (match(arg, /^{[^}]+}/)) {
+ num = split(substr(arg, RSTART + 1, RLENGTH - 2), toks, ",")
+ for (i = 1; i <= num; ++i)
+ if (match(toks[i], /^r([0-9]|1[0-5])-r([0-9]|1[0-5])$/)) {
+ dash = index(toks[i], "-")
+ first = 0 + substr(toks[i], 2, dash - 2)
+ last = 0 + substr(toks[i], dash + 2)
+ for (j = first; j <= last; ++j)
+ tmp[j]
+ } else
+ tmp[regnum[toks[i]]]
+ arg = substr(arg, RSTART + RLENGTH)
+ if (!match(arg, /^[\t ]*[+|][\t ]*/))
+ break
+ arg = substr(arg, RLENGTH + 1)
+ }
+ num = 0
+ for (i = 0; i < 16; ++i) {
+ if (!(i in tmp))
+ continue
+ regs[++num] = regname["r" i]
+ }
+ return num
+}
+
+# Process labels.
+$0 ~ "^" re_label ":" {
+ # Parse each leading label.
+ while (match($0, "^" re_label ":")) {
+
+ # Extract label name.
+ label = substr($1, 1, RLENGTH - 1)
+
+ # Remove label from current line.
+ sub("^" re_label ": ?", "")
+
+ if (label in functions) {
+ if (in_function) {
+ print ".cfi_endproc"
+ for (l in called)
+ delete called[l]
+ }
+
+ in_function = label
+ print ".cfi_startproc"
+
+ for (reg in saved)
+ delete saved[reg]
+ for (reg in dirty)
+ delete dirty[reg]
+ }
+
+ printf "%s:\n", label
+
+ # If this label has been jumped to, define the CFA offset to its
+ # value at the location of the jump.
+ if (!(label in functions) && in_function && label in cfa_offset) {
+ if (cfa_offset[in_function] != cfa_offset[label]) {
+ printf ".cfi_def_cfa_offset %d\n", cfa_offset[label]
+ cfa_offset[in_function] = cfa_offset[label]
+ }
+ delete cfa_offset[label]
+ }
+
+ # If this label has been called, possibly invalidate LR.
+ if (label in called && !(label in functions)) {
+ trashed("lr")
+ delete called[label]
+ }
+ }
+ # An instruction may follow on the same line, so continue processing.
+}
+
+# Skip empty line.
+/^$/ { next }
+
+# Issue source line number.
+{
+ printf ".loc 1 %d\n", line_number
+ print
+}
+
+# Process jumps to label (using B*).
+/^b[^xl]/ {
+ jump_to_label($2)
+}
+
+# Process jumps to label (using CBNZ?).
+/^cbnz? / {
+ if (match($2, /,.*$/))
+ jump_to_label(substr($2, RSTART + 1, RLENGTH - 1))
+}
+
+# Adjust CFA offset and issue relative offsets of pushed registers using PUSH.
+/^push / {
+ if (in_function) {
+ numregs = split_reglist($2, regs)
+ push_regs(regs, numregs);
+ }
+}
+
+# Adjust CFA offset and Issue relative offsets of pushed registers using STMFD.
+/^stm(fd|db)(al)?(\.[nw])? (sp|r13)!,/ {
+ if (in_function) {
+ numregs = split_reglist(substr($2, index($2, ",") + 1), regs)
+ push_regs(regs, numregs);
+ }
+}
+
+/^stm(ia|ea)?(al)?(\.[nw])? (sp|r13),/ {
+ if (in_function) {
+ numregs = split_reglist(substr($2, index($2, ",") + 1), regs)
+ save_regs(regs, numregs);
+ }
+}
+
+# Adjust CFA offset and invalidate unsaved registers popped using POP.
+/^pop / {
+ if (in_function) {
+ numregs = split_reglist($2, regs)
+ pop_regs(regs, numregs)
+ }
+}
+
+# Adjust CFA offset and invalidate unsaved registers popped using LDMFD.
+/^ldm(fd|ia)(al)?(\.[nw])? (sp|r13)!,/ {
+ if (in_function) {
+ numregs = split_reglist(substr($2, index($2, ",") + 1), regs)
+ pop_regs(regs, numregs)
+ }
+}
+
+# Issue relative offsets of registers stored in SP-relative locations.
+/^str[a-z.]* .*,\[(sp|r13)[,\]]/ {
+ if (in_function && !match($1, /^str(ex)?[bh]/)) {
+ if (match($2, /(,#[+-]?(0x[0-9a-fA-F]+|[0-9]+))?\]$/)) {
+ # Offset with no write-back.
+ if (RLENGTH == 1)
+ offset = 0
+ else
+ offset = parse_const(substr($2, RSTART + 2, RLENGTH - 3))
+ split($2, operands, ",")
+ if (match($1, /^str(ex)?d/)) {
+ save_reg(operands[1], offset)
+ save_reg(operands[2], offset + 4)
+ } else
+ save_reg(operands[1], offset)
+ } else if (match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)\]!$/)) {
+ # Pre-index with write-back.
+ offset = parse_const(substr($2, RSTART + 2, RLENGTH - 4))
+ adjust_sp_offset(-offset)
+ split($2, operands, ",")
+ if (match($1, /^str(ex)?d/)) {
+ save_reg(operands[1], 0)
+ save_reg(operands[2], 4)
+ } else
+ save_reg(operands[1], 0)
+ } else if (match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/)) {
+ # Post-index
+ offset = parse_const(substr($2, RSTART + 2, RLENGTH - 2))
+ split($2, operands, ",")
+ if (match($1, /^str(ex)?d/)) {
+ save_reg(operands[1], 0)
+ save_reg(operands[2], 4)
+ } else
+ save_reg(operands[1], 0)
+ adjust_sp_offset(-offset)
+ }
+ }
+}
+
+# Adjust CFA offset when decreasing SP.
+/subs?(al)?(\.[nw])? (sp|r13),(sp|r13),/ {
+ if (in_function && match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/))
+ adjust_sp_offset(parse_const(substr($2, RSTART + 2, RLENGTH - 2)))
+}
+
+# Adjust CFA offset when increasing SP.
+/adds?(al)?(\.[nw])? (sp|r13),(sp|r13),/ {
+ if (in_function && match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/))
+ adjust_sp_offset(-parse_const(substr($2, RSTART + 2, RLENGTH - 2)))
+}
+
+# Process calls to labels.
+/bl[a-z]* / {
+ if (match($2, /^[0-9]+f$/)) # "forward" label
+ called[substr($2, 1, RLENGTH - 1)] = 1
+ else if (match($2, /^[a-zA-Z_][0-9a-zA-Z_]*$/))
+ called[$2] = 1
+}
+
+# Invalidate unsaved registers being written to.
+/^((adc|add|and|asr|adr|bic|eor|lsl|lsr|mla|mov|mul|mvn|orn|orr|ror|rrx|rsb|rsc|sbc|sub)s?|bfc|bfi|clz|cpy|ldr[a-z]*|mls||movt|mrs|neg|pkh(bt|tb)|qadd(8|16)?|qasx|qdadd|qdsub|qsax|qsub(8|16)?|rbit|rev(16)?|revsh|sadd(16|8)|sasx|sbfx|sdiv|sel|shadd(16|8)|shasx|shsax|shsub(16|8)|smla(bb|bt|tb|tt)|smladx?|smlaw[tb]|smlsdx?|smmlar?|smlsr?|smmulr?|smuadx?|smul(bb|bt|tb|tt)|smulw[bt]|smusdx?|ssat(16)?|ssax|ssub(16|8)|swpb?|sxtab(16)?|sxtah|sxtb(16)?|sxth|sxtb(16)?|sxth|uadd(16|8)|uasx|ubfx|udiv|uhadd(16|8)|uhasx|uhsax|uhsub(16|8)|uqadd(16|8)|uqasx|uqsax|uqsub(16|8)|usada?8|usat(16)?|usax|usub(16|8)|uxtab(16)?|uxtah|uxtb(16)?|uxth)(eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al)? (r([0-9]|1[0-5])|ip|sp|lr|pc),/ {
+ split($2, args, ",")
+ reg = args[1]
+ if (reg != "sp")
+ trashed(regname[reg])
+}
+
+# Invalidate unsaved registers being written to by long instructions.
+/^(smlals?|smlal(bb|bt|tb|tt)|smlaldx?|smlsldx?|smull|umaal|umlal|umulls?)/ {
+ split($2, args, ",")
+ trashed(regname[args[1]])
+ trashed(regname[args[2]])
+}
+
+# Invalidate unsaved register being modified by write-back on store multiple.
+/^stm[a-z.]* [^,]+!,/ {
+ first_arg = substr($2, 1, index($2, ",") - 1)
+ if (!match(first_arg, /^(sp|r13)/))
+ trashed(regname[substr(first_arg, 1, length(first_arg) - 1)])
+}
+
+# Invalidate unsaved registers being modified by load multiple.
+/^ldm[a-z.]* [^,]+,{.*}$/ {
+ comma = index($2, ",")
+ first_arg = substr($2, 1, comma - 1)
+ other_args = substr($2, comma + 1)
+ if (!match(first_arg, /^(sp|r13)/)) {
+ if (match(first_arg, /!$/))
+ trashed(regname[substr(first_arg, 1, RSTART - 1)])
+ numregs = split_reglist(other_args, regs)
+ for (i = 1; i <= numregs; ++i)
+ trashed(regname[regs[i]])
+ }
+}
+
+END {
+ # Issue end of function if still inside one.
+ if (in_function)
+ print ".cfi_endproc"
+}
--- a/tools/add-cfi.common.awk
+++ b/tools/add-cfi.common.awk
@@ -1,26 +1,46 @@
-function hex2int(str, i) {
+function hex2int(str, i) {
str = tolower(str)
for (i = 1; i <= 16; i++) {
char = substr("0123456789abcdef", i, 1)
- lookup[char] = i-1
+ lookup[char] = i - 1
}
result = 0
for (i = 1; i <= length(str); i++) {
- result = result * 16
- char = substr(str, i, 1)
- result = result + lookup[char]
+ result *= 16
+ char = substr(str, i, 1)
+ result += lookup[char]
+ }
+ return result
+}
+
+function oct2int(str, i) {
+ str = tolower(str)
+
+ for (i = 1; i <= 8; ++i) {
+ char = substr("01234567", i, 1)
+ lookup[char] = i - 1
+ }
+
+ result = 0
+ for (i = 1; i <= length(str); ++i) {
+ result *= 8
+ char = substr(str, i, 1)
+ result += lookup[char]
}
return result
}
function parse_const(str) {
- sign = sub(/^-/, "", str)
- hex = sub(/^0x/, "", str)
+ neg = sub(/^-/, "", str)
+ oct = match(str, /^0[0-7]/)
+ hex = sub(/^0x/, "", str)
if (hex)
n = hex2int(str)
+ else if (oct)
+ n = oct2int(str)
else
n = str+0
- return sign ? -n : n
+ return neg? -n: n
}
--- a/tools/add-cfi.i386.awk
+++ b/tools/add-cfi.i386.awk
@@ -1,123 +1,179 @@
-# Insert GAS CFI directives ("control frame information") into x86-32 asm input
+# Insert GAS CFI directives ("control frame information") into x86-32 asm input.
#
-# CFI directives tell the assembler how to generate "stack frame" debug info
+# CFI directives tell the assembler how to generate "stack frame" debug info.
# This information can tell a debugger (like gdb) how to find the current stack
# frame at any point in the program code, and how to find the values which
-# various registers had at higher points in the call stack
+# various registers had at higher points in the call stack.
# With this information, the debugger can show a backtrace, and you can move up
-# and down the call stack and examine the values of local variables
+# and down the call stack and examine the values of local variables.
BEGIN {
- # don't put CFI data in the .eh_frame ELF section (which we don't keep)
+ # Don't put CFI data in the .eh_frame ELF section (which we don't keep).
print ".cfi_sections .debug_frame"
- # only emit CFI directives inside a function
- in_function = 0
+ # Only emit CFI directives inside a function.
+ in_function = ""
- # emit .loc directives with line numbers from original source
+ # Emit .loc directives with line numbers from original source.
printf ".file 1 \"%s\"\n", ARGV[1]
line_number = 0
- # used to detect "call label; label:" trick
- called = ""
+ re_label = "([0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)"
+
+ for (i = 1; i <= 4; ++i) {
+ letter = substr("abcd", i, 1)
+ regname[letter "l"] = regname[letter "h"] = regname[letter "x"] = \
+ regname["e" letter "x"] = "e" letter "x"
+ }
+
+ regname["si"] = regname["esi"] = "esi"
+ regname["di"] = regname["edi"] = "edi"
+ regname["bp"] = regname["ebp"] = "ebp"
+ regname["sp"] = regname["esp"] = "esp"
}
+# For instructions with 2 operands, get 1st operand (assuming it is constant).
function get_const1() {
- # for instructions with 2 operands, get 1st operand (assuming it is constant)
- match($0, /-?(0x[0-9a-fA-F]+|[0-9]+),/)
- return parse_const(substr($0, RSTART, RLENGTH-1))
+ match($2, /^\$[+-]?(0x[0-9a-fA-F]+|[0-9]+),/)
+ return parse_const(substr($2, 2, RLENGTH - 2))
}
-function canonicalize_reg(register) {
- if (match(register, /^e/))
- return register
- else if (match(register, /[hl]$/)) # AH, AL, BH, BL, etc
- return "e" substr(register, 1, 1) "x"
- else # AX, BX, CX, etc
- return "e" register
-}
+# Only use if you already know there is 1 and only 1 register.
function get_reg() {
- # only use if you already know there is 1 and only 1 register
- match($0, /%e?([abcd][hlx]|si|di|bp)/)
- return canonicalize_reg(substr($0, RSTART+1, RLENGTH-1))
+ return regname[substr($2, 2, length($2) - 1)]
}
+
+# For instructions with 2 operands, get 1st operand (assuming it is register).
function get_reg1() {
- # for instructions with 2 operands, get 1st operand (assuming it is register)
- match($0, /%e?([abcd][hlx]|si|di|bp),/)
- return canonicalize_reg(substr($0, RSTART+1, RLENGTH-2))
+ match($2, /^%e?([abcd][hlx]|si|di|bp),/)
+ return regname[substr($2, 2, RLENGTH - 2)]
}
+
+# For instructions with 2 operands, get 2nd operand (assuming it is register).
function get_reg2() {
- # for instructions with 2 operands, get 2nd operand (assuming it is register)
- match($0, /,%e?([abcd][hlx]|si|di|bp)/)
- return canonicalize_reg(substr($0, RSTART+2, RLENGTH-2))
+ match($2, /,%e?([abcd][hlx]|si|di|bp)$/)
+ return regname[substr($2, RSTART + 2, RLENGTH - 2)]
}
+# Helper to adjust CFA offset.
function adjust_sp_offset(delta) {
- if (in_function)
+ if (in_function) {
printf ".cfi_adjust_cfa_offset %d\n", delta
+ cfa_offset[in_function] += delta
+ }
+}
+
+function save_reg(reg, offset) {
+ if (!(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_rel_offset %s,%d\n", reg, offset
+ saved[reg] = 1
+ }
+}
+
+# Helper to process jumps to labels by saving the current CFA offset.
+function jump_to_label(label) {
+ if (in_function) {
+ if (match(label, /^[0-9]+f$/)) # "forward" label
+ cfa_offset[substr(label, 1, RLENGTH - 1)] = cfa_offset[in_function]
+ else if (match(label, /^[a-zA-Z_][a-zA-Z0-9_]*$/))
+ cfa_offset[label] = cfa_offset[in_function]
+ }
}
{
- line_number = line_number + 1
+ ++line_number
- # clean the input up before doing anything else
- # delete comments
- gsub(/(#|\/\/).*/, "")
+ # Clean the input up before doing anything else.
+ # Delete comments.
+ gsub(/#.*|\/\*.*\*\//, "")
- # canonicalize whitespace
- gsub(/[ \t]+/, " ") # mawk doesn't understand \s
+ # Canonicalize whitespace.
+ gsub(/[ \t]+/, " ") # Mawk doesn't understand \s.
gsub(/ *, */, ",")
gsub(/ *: */, ": ")
+ gsub(/%cs: */, "%cs:")
+ gsub(/%ds: */, "%ds:")
+ gsub(/%ss: */, "%ss:")
+ gsub(/%es: */, "%es:")
+ gsub(/%fs: */, "%fs:")
+ gsub(/%gs: */, "%gs:")
gsub(/ $/, "")
gsub(/^ /, "")
}
-# check for assembler directives which we care about
+# Check for assembler directives which we care about.
/^\.(section|data|text)/ {
- # a .cfi_startproc/.cfi_endproc pair should be within the same section
- # otherwise, clang will choke when generating ELF output
+ # A .cfi_startproc/.cfi_endproc pair should be within the same section.
+ # Otherwise, clang will choke when generating ELF output.
if (in_function) {
print ".cfi_endproc"
- in_function = 0
+ in_function = ""
}
}
-/^\.type [a-zA-Z0-9_]+,@function/ {
- functions[substr($2, 1, length($2)-10)] = 1
+
+# Record each function name.
+/^\.type [a-zA-Z0-9_]+( STT_FUNCTION|,[#@%"]function)/ {
+ functions[substr($2, 1, length($2) - 10)] = 1
}
-# not interested in assembler directives beyond this, just pass them through
+
+# Not interested in assembler directives beyond this, just pass them through.
/^\./ {
print
next
}
-/^[a-zA-Z0-9_]+:/ {
- label = substr($1, 1, length($1)-1) # drop trailing :
-
- if (called == label) {
- # note adjustment of stack pointer from "call label; label:"
- adjust_sp_offset(4)
- }
+$0 ~ "^" re_label ":" {
+ # Parse each leading label.
+ while (match($0, "^" re_label ":")) {
+
+ # Extract label name.
+ label = substr($1, 1, RLENGTH - 1)
+
+ # Remove label from current line.
+ sub("^" re_label ": ?", "")
+
+ if (label in functions) {
+ if (in_function) {
+ print ".cfi_endproc"
+ for (l in called)
+ delete called[l]
+ }
+
+ in_function = label
+ print ".cfi_startproc"
+
+ for (reg in saved)
+ delete saved[reg]
+ for (reg in dirty)
+ delete dirty[reg]
+ }
- if (functions[label]) {
- if (in_function)
- print ".cfi_endproc"
+ printf "%s:\n", label
- in_function = 1
- print ".cfi_startproc"
+ # If this label has been jumped to, define the CFA offset to its
+ # value at the location of the jump.
+ if (!(label in functions) && in_function && label in cfa_offset) {
+ if (cfa_offset[in_function] != cfa_offset[label]) {
+ printf ".cfi_def_cfa_offset %d\n", cfa_offset[label]
+ cfa_offset[in_function] = cfa_offset[label]
+ }
+ delete cfa_offset[label]
+ }
- for (register in saved)
- delete saved[register]
- for (register in dirty)
- delete dirty[register]
+ # If this label has been called, adjust CFA offset.
+ if (label in called && !(label in functions)) {
+ adjust_sp_offset(4);
+ delete called[label]
+ }
}
-
- # an instruction may follow on the same line, so continue processing
+ # An instruction may follow on the same line, so continue processing.
}
+# Skip empty line.
/^$/ { next }
+# Issue source line number.
{
- called = ""
printf ".loc 1 %d\n", line_number
print
}
@@ -126,82 +182,145 @@ function adjust_sp_offset(delta) {
# We do NOT attempt to understand foolish and ridiculous tricks like stashing
# the stack pointer and then using %esp as a scratch register, or bitshifting
# it or taking its square root or anything stupid like that.
-# %esp should only be adjusted by pushing/popping or adding/subtracting constants
+# %esp should only be adjusted by pushing/popping or adding/subtracting
+# constants.
#
-/pushl?/ {
- if (match($0, / %(ax|bx|cx|dx|di|si|bp|sp)/))
+/^push[wl]? / {
+ if ($1 == "pushw" || match($2, /^%([abcd]x|di|si|bp|sp)$/))
adjust_sp_offset(2)
else
adjust_sp_offset(4)
}
-/popl?/ {
- if (match($0, / %(ax|bx|cx|dx|di|si|bp|sp)/))
+
+/^pop[wl]? / {
+ if ($1 == "popw" || match($2, /^%([abcd]x|di|si|bp|sp)$/))
adjust_sp_offset(-2)
else
adjust_sp_offset(-4)
}
-/addl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%esp/ { adjust_sp_offset(-get_const1()) }
-/subl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%esp/ { adjust_sp_offset(get_const1()) }
-/call/ {
- if (match($0, /call [0-9]+f/)) # "forward" label
- called = substr($0, RSTART+5, RLENGTH-6)
- else if (match($0, /call [0-9a-zA-Z_]+/))
- called = substr($0, RSTART+5, RLENGTH-5)
+/^pushal?$/ {
+ adjust_sp_offset(32)
+ if (in_function) {
+ save_reg("eax", 28)
+ save_reg("ecx", 24)
+ save_reg("edx", 20)
+ save_reg("ebx", 16)
+ save_reg("esp", 12)
+ save_reg("ebp", 8)
+ save_reg("esi", 4)
+ save_reg("edi", 0)
+ }
+}
+
+/^pushaw$/ {
+ adjust_sp_offset(16)
+}
+
+/^popal?$/ {
+ adjust_sp_offset(-32)
+}
+
+/^popaw$/ {
+ adjust_sp_offset(-16)
+}
+
+/^pushfl?$/ {
+ adjust_sp_offset(4)
+}
+
+/^pushfw$/ {
+ adjust_sp_offset(2)
+}
+
+/^popfl?$/ {
+ adjust_sp_offset(-4)
+}
+
+/^popfw$/ {
+ adjust_sp_offset(-2)
+}
+
+/^addl? \$[+-]?(0x[0-9a-fA-F]+|[0-9]+),%esp/ {
+ adjust_sp_offset(-get_const1())
+}
+
+/^subl? \$[+-]?(0x[0-9a-fA-F]+|[0-9]+),%esp/ {
+ adjust_sp_offset(get_const1())
+}
+
+/^call / {
+ if (match($2, /^[0-9]+f$/)) # "forward" label
+ called[substr($2, 1, RLENGTH - 1)] = 1
+ else if (match($2, /^[a-zA-Z_][0-9a-zA-Z_]*$/))
+ called[$2] = 1
+}
+
+/^j/ {
+ jump_to_label($2)
}
# TRACKING REGISTER VALUES FROM THE PREVIOUS STACK FRAME
#
-/pushl? %e(ax|bx|cx|dx|si|di|bp)/ { # don't match "push (%reg)"
- # if a register is being pushed, and its value has not changed since the
+/^pushl? %e([abcd]x|si|di|bp)$/ {
+ # Don't match "push (%reg)"
+ # If a register is being pushed, and its value has not changed since the
# beginning of this function, the pushed value can be used when printing
- # local variables at the next level up the stack
- # emit '.cfi_rel_offset' for that
+ # local variables at the next level up the stack.
+ # Emit '.cfi_rel_offset' for that.
- if (in_function) {
- register = get_reg()
- if (!saved[register] && !dirty[register]) {
- printf ".cfi_rel_offset %s,0\n", register
- saved[register] = 1
- }
- }
+ if (in_function)
+ save_reg(get_reg(), 0)
}
-/movl? %e(ax|bx|cx|dx|si|di|bp),-?(0x[0-9a-fA-F]+|[0-9]+)?\(%esp\)/ {
+/^movl? %e(ax|bx|cx|dx|si|di|bp),[+-]?(0x[0-9a-fA-F]+|[0-9]+)?\(%esp\)$/ {
if (in_function) {
- register = get_reg()
- if (match($0, /-?(0x[0-9a-fA-F]+|[0-9]+)\(%esp\)/)) {
- offset = parse_const(substr($0, RSTART, RLENGTH-6))
+ if (match($2, /,[+-]?(0x[0-9a-fA-F]+|[0-9]+)\(%esp\)$/)) {
+ offset = parse_const(substr($2, RSTART + 1, RLENGTH - 7))
} else {
offset = 0
}
- if (!saved[register] && !dirty[register]) {
- printf ".cfi_rel_offset %s,%d\n", register, offset
- saved[register] = 1
- }
+ save_reg(get_reg1(), offset)
}
}
# IF REGISTER VALUES ARE UNCEREMONIOUSLY TRASHED
# ...then we want to know about it.
#
-function trashed(register) {
- if (in_function && !saved[register] && !dirty[register]) {
- printf ".cfi_undefined %s\n", register
- }
- dirty[register] = 1
-}
-# this does NOT exhaustively check for all possible instructions which could
-# overwrite a register value inherited from the caller (just the common ones)
-/mov.*,%e?([abcd][hlx]|si|di|bp)$/ { trashed(get_reg2()) }
-/(add|addl|sub|subl|and|or|xor|lea|sal|sar|shl|shr).*,%e?([abcd][hlx]|si|di|bp)$/ {
+function trashed(reg) {
+ if (in_function && !(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_undefined %s\n", reg
+ dirty[reg] = 1
+ }
+}
+# This does NOT exhaustively check for all possible instructions which could
+# overwrite a register value inherited from the caller (just the common ones).
+/^mov.*,%e?([abcd][hlx]|si|di|bp)$/ {
+ trashed(get_reg2())
+}
+/^(add|sub|and|x?or|lea|s[ah][lr])[bwl]? [^,]+,%e?([abcd][hlx]|si|di|bp)$/ {
trashed(get_reg2())
}
-/^i?mul [^,]*$/ { trashed("eax"); trashed("edx") }
-/^i?mul.*,%e?([abcd][hlx]|si|di|bp)$/ { trashed(get_reg2()) }
-/^i?div/ { trashed("eax"); trashed("edx") }
-/(dec|inc|not|neg|pop) %e?([abcd][hlx]|si|di|bp)/ { trashed(get_reg()) }
-/cpuid/ { trashed("eax"); trashed("ebx"); trashed("ecx"); trashed("edx") }
+/^i?mul[bwl] [^,]+$/ {
+ trashed("eax")
+ trashed("edx")
+}
+/^i?mul[bwl]? [^,]+,%e?([abcd][hlx]|si|di|bp)$/ {
+ trashed(get_reg2())
+}
+/^i?div / {
+ trashed("eax")
+ trashed("edx")
+}
+/^(dec|inc|not|neg|pop)[bwl]? %e?([abcd][hlx]|si|di|bp)$/ {
+ trashed(get_reg())
+}
+/^cpuid/ {
+ trashed("eax")
+ trashed("ebx")
+ trashed("ecx")
+ trashed("edx")
+}
END {
if (in_function)
--- a/tools/add-cfi.x86_64.awk
+++ b/tools/add-cfi.x86_64.awk
@@ -1,169 +1,247 @@
-# Insert GAS CFI directives ("control frame information") into x86-64 asm input
+# Insert GAS CFI directives ("control frame information") into x86-64 asm input.
BEGIN {
- # don't put CFI data in the .eh_frame ELF section (which we don't keep)
+ # Don't put CFI data in the .eh_frame ELF section (which we don't keep).
print ".cfi_sections .debug_frame"
- # only emit CFI directives inside a function
- in_function = 0
+ # Only emit CFI directives inside a function.
+ in_function = ""
- # emit .loc directives with line numbers from original source
+ # Emit .loc directives with line numbers from original source.
printf ".file 1 \"%s\"\n", ARGV[1]
line_number = 0
- # used to detect "call label; label:" trick
- called = ""
+ re_label = "([0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)"
+
+ for (i = 1; i <= 4; ++i) {
+ letter = substr("abcd", i, 1)
+ regname[letter "l"] = regname[letter "h"] = regname[letter "x"] = \
+ regname["e" letter "x"] = regname["r" letter "x"] = "r" letter "x"
+ }
+
+ regname["si"] = regname["esi"] = regname["rsi"] = "rsi"
+ regname["di"] = regname["edi"] = regname["rdi"] = "rdi"
+ regname["bp"] = regname["ebp"] = regname["rbp"] = "rbp"
+ regname["sp"] = regname["esp"] = regname["rsp"] = "rsp"
+
+ for (i = 8; i <= 15; ++i)
+ regname["r" i] = "r" i
}
+# For instructions with 2 operands, get 1st operand (assuming it is constant).
function get_const1() {
- # for instructions with 2 operands, get 1st operand (assuming it is constant)
- match($0, /-?(0x[0-9a-fA-F]+|[0-9]+),/)
- return parse_const(substr($0, RSTART, RLENGTH-1))
+ match($2, /^\$[+-]?(0x[0-9a-fA-F]+|[0-9]+),/)
+ return parse_const(substr($2, 2, RLENGTH - 2))
}
-function canonicalize_reg(register) {
- if (match(register, /^r/))
- return register
- else if (match(register, /^e/))
- return "r" substr(register, 2, length(register)-1)
- else if (match(register, /[hl]$/)) # AH, AL, BH, BL, etc
- return "r" substr(register, 1, 1) "x"
- else # AX, BX, CX, etc
- return "r" register
-}
+# Only use if you already know there is 1 and only 1 register.
function get_reg() {
- # only use if you already know there is 1 and only 1 register
- match($0, /%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)/)
- return canonicalize_reg(substr($0, RSTART+1, RLENGTH-1))
+ return regname[substr($2, 2, length($2) - 1)]
}
+
+# For instructions with 2 operands, get 1st operand (assuming it is register).
function get_reg1() {
- # for instructions with 2 operands, get 1st operand (assuming it is register)
- match($0, /%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15),/)
- return canonicalize_reg(substr($0, RSTART+1, RLENGTH-2))
+ match($2, /^%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5]),/)
+ return regname[substr($2, 2, RLENGTH - 2)]
}
+
+# For instructions with 2 operands, get 2nd operand (assuming it is register).
function get_reg2() {
- # for instructions with 2 operands, get 2nd operand (assuming it is register)
- match($0, /,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)/)
- return canonicalize_reg(substr($0, RSTART+2, RLENGTH-2))
+ match($2, /,%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/)
+ return regname[substr($2, RSTART + 2, RLENGTH - 2)]
}
+# Helper to adjust CFA offset.
function adjust_sp_offset(delta) {
- if (in_function)
+ if (in_function) {
printf ".cfi_adjust_cfa_offset %d\n", delta
+ cfa_offset[in_function] += delta
+ }
+}
+
+# Helper to process jumps to labels by saving the current CFA offset.
+function jump_to_label(label) {
+ if (in_function) {
+ if (match(label, /^[0-9]+f$/)) # "forward" label
+ cfa_offset[substr(label, 1, RLENGTH - 1)] = cfa_offset[in_function]
+ else if (match(label, /^[a-zA-Z_][a-zA-Z0-9_]*$/))
+ cfa_offset[label] = cfa_offset[in_function]
+ }
}
{
- line_number = line_number + 1
+ ++line_number
- # clean the input up before doing anything else
- # delete comments
- gsub(/(#|\/\/).*/, "")
+ # Clean the input up before doing anything else.
+ # Delete comments.
+ gsub(/#.*|\/\*.*\*\//, "")
- # canonicalize whitespace
- gsub(/[ \t]+/, " ") # mawk doesn't understand \s
+ # Canonicalize whitespace.
+ gsub(/[ \t]+/, " ") # Mawk doesn't understand \s.
gsub(/ *, */, ",")
- gsub(/ *: */, ": ")
+ if (match(":", $1))
+ sub(/ *: */, ": ")
gsub(/ $/, "")
gsub(/^ /, "")
}
-# check for assembler directives which we care about
+# Check for assembler directives which we care about.
/^\.(section|data|text)/ {
- # a .cfi_startproc/.cfi_endproc pair should be within the same section
- # otherwise, clang will choke when generating ELF output
+ # A .cfi_startproc/.cfi_endproc pair should be within the same section.
+ # Otherwise, clang will choke when generating ELF output.
if (in_function) {
print ".cfi_endproc"
- in_function = 0
+ in_function = ""
}
}
-/^\.type [a-zA-Z0-9_]+,@function/ {
- functions[substr($2, 1, length($2)-10)] = 1
+
+# Record each function name.
+/^\.type [a-zA-Z0-9_]+( STT_FUNCTION|,[#@%"]function)/ {
+ functions[substr($2, 1, length($2) - 10)] = 1
}
-# not interested in assembler directives beyond this, just pass them through
+# Not interested in assembler directives beyond this, just pass them through.
/^\./ {
print
next
}
-/^[a-zA-Z0-9_]+:/ {
- label = substr($1, 1, length($1)-1) # drop trailing :
-
- if (called == label) {
- # note adjustment of stack pointer from "call label; label:"
- adjust_sp_offset(8)
- }
+$0 ~ "^" re_label ":" {
+ # Parse each leading label.
+ while (match($0, "^" re_label ":")) {
+
+ # Extract label name.
+ label = substr($1, 1, RLENGTH - 1)
+
+ # Remove label from current line.
+ sub("^" re_label ": ?", "")
+
+ if (label in functions) {
+ if (in_function) {
+ print ".cfi_endproc"
+ for (l in called)
+ delete called[l]
+ }
+
+ in_function = label
+ print ".cfi_startproc"
+
+ for (reg in saved)
+ delete saved[reg]
+ for (reg in dirty)
+ delete dirty[reg]
+ }
- if (functions[label]) {
- if (in_function)
- print ".cfi_endproc"
+ printf "%s:\n", label
- in_function = 1
- print ".cfi_startproc"
+ # If this label has been jumped to, define the CFA offset to its
+ # value at the location of the jump.
+ if (!(label in functions) && in_function && label in cfa_offset) {
+ if (cfa_offset[in_function] != cfa_offset[label]) {
+ printf ".cfi_def_cfa_offset %d\n", cfa_offset[label]
+ cfa_offset[in_function] = cfa_offset[label]
+ }
+ delete cfa_offset[label]
+ }
- for (register in saved)
- delete saved[register]
- for (register in dirty)
- delete dirty[register]
+ # If this label has been called, adjust CFA offset.
+ if (label in called && !(label in functions)) {
+ adjust_sp_offset(8);
+ delete called[label]
+ }
}
-
- # an instruction may follow on the same line, so continue processing
+ # An instruction may follow on the same line, so continue processing.
}
+# Skip empty line.
/^$/ { next }
+# Issue source line number.
{
- called = ""
printf ".loc 1 %d\n", line_number
print
}
# KEEPING UP WITH THE STACK POINTER
-# %rsp should only be adjusted by pushing/popping or adding/subtracting constants
+# %rsp should only be adjusted by pushing/popping or adding/subtracting
+# constants.
#
-/pushl?/ {
+/^push[wq]? / {
+ if ($1 == "pushw" || match($2, /^%([abcd]x|di|si|bp|sp)$/))
+ adjust_sp_offset(2)
+ else
+ adjust_sp_offset(8)
+}
+
+/^pop[wq]? / {
+ if ($1 == "popw" || match($2, /^%([abcd]x|di|si|bp|sp)$/))
+ adjust_sp_offset(-2)
+ else
+ adjust_sp_offset(-8)
+}
+
+/^pushfq?$/ {
adjust_sp_offset(8)
}
-/popl?/ {
+
+/^pushfw$/ {
+ adjust_sp_offset(2)
+}
+
+/^popfq?$/ {
adjust_sp_offset(-8)
}
-/addl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%rsp/ { adjust_sp_offset(-get_const1()) }
-/subl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%rsp/ { adjust_sp_offset(get_const1()) }
-/call/ {
- if (match($0, /call [0-9]+f/)) # "forward" label
- called = substr($0, RSTART+5, RLENGTH-6)
- else if (match($0, /call [0-9a-zA-Z_]+/))
- called = substr($0, RSTART+5, RLENGTH-5)
+/^popfw$/ {
+ adjust_sp_offset(-2)
+}
+
+/^addq? \$[+-]?(0x[0-9a-fA-F]+|[0-9]+),%rsp$/ {
+ adjust_sp_offset(-get_const1())
+}
+/^subq? \$[+-]?(0x[0-9a-fA-F]+|[0-9]+),%rsp$/ {
+ adjust_sp_offset(get_const1())
+}
+
+/^call / {
+ if (match($2, /^[0-9]+f$/)) # "forward" label
+ called[substr($2, 1, RLENGTH - 1)] = 1
+ else if (match($2, /^[a-zA-Z_][0-9a-zA-Z_]*$/))
+ called[$2] = 1
+}
+
+/^j/ {
+ jump_to_label($2)
}
# TRACKING REGISTER VALUES FROM THE PREVIOUS STACK FRAME
#
-/pushl? %r(ax|bx|cx|dx|si|di|bp|8|9|10|11|12|13|14|15)/ { # don't match "push (%reg)"
- # if a register is being pushed, and its value has not changed since the
+/^pushq? %r([abcd]x|si|di|bp|[89]|1[0-5])$/ {
+ # Don't match "push (%reg)".
+ # If a register is being pushed, and its value has not changed since the
# beginning of this function, the pushed value can be used when printing
- # local variables at the next level up the stack
- # emit '.cfi_rel_offset' for that
+ # local variables at the next level up the stack.
+ # Emit '.cfi_rel_offset' for that.
if (in_function) {
- register = get_reg()
- if (!saved[register] && !dirty[register]) {
- printf ".cfi_rel_offset %s,0\n", register
- saved[register] = 1
+ reg = get_reg()
+ if (!(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_rel_offset %s,0\n", reg
+ saved[reg] = 1
}
}
}
-/movl? %r(ax|bx|cx|dx|si|di|bp|8|9|10|11|12|13|14|15),-?(0x[0-9a-fA-F]+|[0-9]+)?\(%rsp\)/ {
+/^movq? %r([abcd]x|si|di|bp|[89]|1[0-5]),[+-]?(0x[0-9a-fA-F]+|[0-9]+)?\(%rsp\)$/ {
if (in_function) {
- register = get_reg()
- if (match($0, /-?(0x[0-9a-fA-F]+|[0-9]+)\(%rsp\)/)) {
- offset = parse_const(substr($0, RSTART, RLENGTH-6))
+ if (match($2, /,[+-]?(0x[0-9a-fA-F]+|[0-9]+)\(%rsp\)$/)) {
+ offset = parse_const(substr($2, RSTART, RLENGTH - 7))
} else {
offset = 0
}
- if (!saved[register] && !dirty[register]) {
- printf ".cfi_rel_offset %s,%d\n", register, offset
- saved[register] = 1
+ reg = get_reg1()
+ if (!(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_rel_offset %s,%d\n", reg, offset
+ saved[reg] = 1
}
}
}
@@ -171,24 +249,41 @@ function adjust_sp_offset(delta) {
# IF REGISTER VALUES ARE UNCEREMONIOUSLY TRASHED
# ...then we want to know about it.
#
-function trashed(register) {
- if (in_function && !saved[register] && !dirty[register]) {
- printf ".cfi_undefined %s\n", register
- }
- dirty[register] = 1
-}
-# this does NOT exhaustively check for all possible instructions which could
-# overwrite a register value inherited from the caller (just the common ones)
-/mov.*,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)$/ { trashed(get_reg2()) }
-/(add|addl|sub|subl|and|or|xor|lea|sal|sar|shl|shr).*,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)$/ {
+function trashed(reg) {
+ if (in_function && !(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_undefined %s\n", reg
+ }
+ dirty[reg] = 1
+}
+# This does NOT exhaustively check for all possible instructions which could
+# overwrite a register value inherited from the caller (just the common ones).
+/^mov.*,%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/ {
+ trashed(get_reg2())
+}
+/^(add|sub|and|x?or|lea|s[ah][lr])[bwlq]? [^,]+,%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/ {
trashed(get_reg2())
}
-/^i?mul [^,]*$/ { trashed("rax"); trashed("rdx") }
-/^i?mul.*,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)$/ { trashed(get_reg2()) }
-/^i?div/ { trashed("rax"); trashed("rdx") }
+/^i?mul[bwlq]? [^,]+$/ {
+ trashed("rax")
+ trashed("rdx")
+}
+/^i?mul[bwlq] [^,]+,%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/ {
+ trashed(get_reg2())
+}
+/^i?div[bwlq]? / {
+ trashed("rax")
+ trashed("rdx")
+}
-/(dec|inc|not|neg|pop) %[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)/ { trashed(get_reg()) }
-/cpuid/ { trashed("rax"); trashed("rbx"); trashed("rcx"); trashed("rdx") }
+/^(dec|inc|not|neg|pop)[bwlq]? %[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/ {
+ trashed(get_reg())
+}
+/^cpuid$/ {
+ trashed("rax")
+ trashed("rbx")
+ trashed("rcx")
+ trashed("rdx")
+}
END {
if (in_function)