6

Backgrounds:

I need to get the register value in the c code, so I found this in gcc usage. I get the ebp value using the following code.

   register int ebp asm("ebp");
   printf("currently ebp is %08x\n", ebp);
   // then some code use the value

Everything seems okay until I changed my program's compiler to clang. In gcc, it usually print something similar as 0x7f1284978, definitely a pointer like value.

But when use clang, output became weird, it print out a value like 0x9. Ebp must can't have a value like this.

Qs:

  • Does clang didn't support this register variable usage?
  • Why it didn't complain warning or error if it doesn't support this feature(using the following code to compile)?

    #include <stdio.h>
    
    static size_t vfp = 0x233;
    
    int main(void) {
        register int ebp asm("ebp");
        vfp = (size_t) ebp;
        printf("vfp value is 0x%lx\n", vfp);
        return 0;
    }
    
occia
  • 129
  • 1
  • 10

3 Answers3

4

TL;DR:

Clang does not support explicit register variable as for now.

Details:

See clang documentation

clang only supports global register variables when the register specified is non-allocatable (e.g. the stack pointer). Support for general global register variables is unlikely to be implemented soon because it requires additional LLVM backend support.

On my machine (x86_64 ubuntu 16.04), if I compile with Clang-5.0, the assembly I get is:

 08048410 <main>:
 8048410:       55                      push   %ebp
 8048411:       89 e5                   mov    %esp,%ebp
 8048413:       83 ec 18                sub    $0x18,%esp
 8048416:       8d 05 c0 84 04 08       lea    0x80484c0,%eax
 804841c:       8b 4d fc                mov    -0x4(%ebp),%ecx ;this line is wrong, the behavior is meaningless
 804841f:       89 0d 1c a0 04 08       mov    %ecx,0x804a01c
 8048425:       8b 0d 1c a0 04 08       mov    0x804a01c,%ecx
 804842b:       89 04 24                mov    %eax,(%esp)
 804842e:       89 4c 24 04             mov    %ecx,0x4(%esp)
 8048432:       e8 89 fe ff ff          call   80482c0 <printf@plt>
 8048437:       89 45 f8                mov    %eax,-0x8(%ebp)
 804843a:       83 c4 18                add    $0x18,%esp
 804843d:       5d                      pop    %ebp
 804843e:       c3                      ret
 804843f:       90                      nop

If I compile with GCC-5.5.0, this is the assembly I got:

0000051d <main>:


 51d:   8d 4c 24 04             lea    0x4(%esp),%ecx
 521:   83 e4 f0                and    $0xfffffff0,%esp
 524:   ff 71 fc                pushl  -0x4(%ecx)
 527:   55                      push   %ebp
 528:   89 e5                   mov    %esp,%ebp
 52a:   53                      push   %ebx
 52b:   51                      push   %ecx
 52c:   e8 33 00 00 00          call   564 <__x86.get_pc_thunk.ax>
 531:   05 a7 1a 00 00          add    $0x1aa7,%eax
 536:   89 ea                   mov    %ebp,%edx ; this is the correct location to get the value of ebp
 538:   89 90 30 00 00 00       mov    %edx,0x30(%eax)
 53e:   8b 90 30 00 00 00       mov    0x30(%eax),%edx
 544:   83 ec 08                sub    $0x8,%esp
 547:   52                      push   %edx
 548:   8d 90 18 e6 ff ff       lea    -0x19e8(%eax),%edx
 54e:   52                      push   %edx
 54f:   89 c3                   mov    %eax,%ebx
 551:   e8 5a fe ff ff          call   3b0 <printf@plt>
 556:   83 c4 10                add    $0x10,%esp
 559:   90                      nop
 55a:   8d 65 f8                lea    -0x8(%ebp),%esp
 55d:   59                      pop    %ecx
 55e:   5b                      pop    %ebx
 55f:   5d                      pop    %ebp
 560:   8d 61 fc                lea    -0x4(%ecx),%esp
 563:   c3                      ret

We can see that GCC generally supports explicit register value access while Clang does not.

Solution:

If you wish to use Clang to access ebp value, you can use inline assembly, like this: asm("\t movl %%ebp,%0" : "=r"(vfp));

ThePatrickStar
  • 106
  • 1
  • 5
  • 4
    "*We can see that GCC generally supports explicit register value access*" - Except the documentation says that this usage is explicitly not supported. If it appears to work, it is by accident. – melpomene Apr 12 '19 at 09:35
  • seems that using local register variable is not a safe choice as the compiler and version both matters, I'll change to inline assembly – occia Apr 12 '19 at 10:00
  • 1
    The documention you quote for clang is talking about *global* register-asm variables, like if you tried to do that at global scope. The code you compiled is using a *local* register-asm variable, which is different (even for GCC). IIRC, clang *does* support register-asm local variables just as much as gcc *officially* supports them, i.e. only for operands to GNU C Extended Asm statements. It's the only way to specify a register operand like `r8` or `r10` for `asm("syscall" : "+a"(result) : ... :);` on x86-64, or any register on ARM or AArch64 where there are no specific-register constraints – Peter Cordes Apr 12 '19 at 10:28
  • 3
    @occia: **GCC has a builtin for this**: [`void * __builtin_frame_address(0)` gets the current function's frame pointer](https://gcc.gnu.org/onlinedocs/gcc/Return-Address.html). Using that is always(?) safe, and doesn't require compiling a whole file with `-fno-omit-frame-pointer`. – Peter Cordes Apr 12 '19 at 10:29
  • You don't need the mov in the asm. Just empty asm with output constraint in a "register asm" var will do. – R.. GitHub STOP HELPING ICE Apr 12 '19 at 11:13
  • Using inline assembly to move a value into `ebp` doesn't guarantee that the compiler will keep it there. – Sapphire_Brick Jun 15 '20 at 22:06
3

In Gcc the register keyword does the following (as explained here: Using Gcc - Local Register Variables):

If you use the variable in Inline Assembly, gcc will try to put it into the register you specified. In any other context, the register keyword does not have an effect, and as noted at the bottom of the first link, it is no alternative for specifying the variable as in input to the inline assembly.

What that keyword does if used with clang I do not know, most problably it is just ignored (see Is the register keyword still used?.

Boden_Units
  • 83
  • 2
  • 7
  • 1
    That's not *just* the `register` keyword, it's using `register` and `asm("%eax")` to *specify* the "asm name" of the variable. That's very different from just plain ISO C `register`. And BTW, it will have *guaranteed* / *supported* effect, but in practice current GCC *will* at least try to use that register for your variable even if you don't use it with an `asm` statement. That means it's easy to write code that happens to work but is not supported or future-proof. :/ The "only supported for `asm`" wording was added to the GCC docs somewhat recently. – Peter Cordes Apr 12 '19 at 10:27
2

As a complementary to the answers by @ThePatrickStar and @Boden_Units: the explicit register initialization is erased by Clang Driver during the LLVM IR generation. Here is the content of inline_asm.ll when running clang -emit-llvm -S inline_asm.c -o inline_asm.ll (clang-7).

; ModuleID = 'inline_asm.c'
source_filename = "inline_asm.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

@vfp = internal global i64 563, align 8
@.str = private unnamed_addr constant [20 x i8] c"vfp value is 0x%lx\0A\00", align 1

; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @main() #0 {
  %1 = alloca i32, align 4
  %2 = alloca i32, align 4
  store i32 0, i32* %1, align 4
  %3 = load i32, i32* %2, align 4
  %4 = sext i32 %3 to i64
  store i64 %4, i64* @vfp, align 8
  %5 = load i64, i64* @vfp, align 8
  %6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i32 0, i32 0), i64 %5)
  ret i32 0
}

declare dso_local i32 @printf(i8*, ...) #1

attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 7.0.1-svn348686-1~exp1~20190113235231.54 (branches/release_70)"}

In fact, the generated IR for register int ebp asm("ebp"); is no different from register int ebp;, as if ebp is never initialized or bound to the ebp register.

Hongxu Chen
  • 5,240
  • 2
  • 45
  • 85