I'm learning x86-64 and I'm working with some compiler generated assembly code which I mostly understand. Its a recursive factorial program which calls itself till a base is reached wherein 1 is placed in rax which in turn is multiplied with each previously decremented count value. I understand alignment in the context of variable access wherein there is a massive cost to accessing unaligned data and I suppose the text segment being aligned is much the same.
In the program, there are two marked points I find confusing the first makes use of one of the three stack-allocated local variable spaces in the decrementing of the rdi register which holds the user provided number to calculate the factorial for. Why not just use rax directly in replacing:
mov qword [rbp + - 16]
with
mov rdi, rax?.
The second is the use of the other two stack local variables in performing each factorial multiplication and subsequently doing what seems to be a redundant operation where the result of the multiplication is moved into a local variable from rax and then back into rax before the function returns.
mov qword [rbp + -24], rax
mov rax, rdi
imul rax, qword [rbp + -24]
mov qword [rbp + -8], rax
mov rax, qword [rbp + -8]
Would these calculations not be much faster utilizing any of the untouched general purpose registers and omitting these stack locals or are these operations a part of the 16-byte alignment?
rec:
push rbp
mov rbp, rsp
sub rsp, 24
push rbx
push r12
push r13
push r14
push r15
.sec0:
mov qword [rbp + -8], 1
test rdi, rdi
je .sec1
.sec2:
mov rax, rdi
sub rax, 1
mov qword [rbp + -16], rax ;; point 1.0
push rcx
push rdx
push rsi
push rdi
push r8
push r9
push r10
push r11
mov rdi, qword [rbp + -16] ;; point 1.1
call rec
pop r11
pop r10
pop r9
pop r8
pop rdi
pop rsi
pop rdx
pop rcx
mov qword [rbp + -24], rax ;; point 2.0
mov rax, rdi
imul rax, qword [rbp + -24] ;; point 2.1
mov qword [rbp + -8], rax ;; point 2.2
mov rax, qword [rbp + -8] ;; point 2.3
pop r15
pop r14
pop r13
pop r12
pop rbx
leave
ret
.sec1:
mov rax, qword [rbp + -8]
pop r15
pop r14
pop r13
pop r12
pop rbx
leave
ret