Вы находитесь на странице: 1из 11

Linux kernel & Embedded Developer

Machine Programming basics


The format and behavior of a machine-level program is defined By the instruction set
architecture, or ISA, defining the processor state, the format of the instructions, and the
effect each of these instructions will have on the state. Most ISAs,(including IA32, x86-64,
arm) describe the behavior of each machine instruction.

The compiler does most of the work in the overall compilation sequence, transforming
programs written in high level programming syntax into machine instructions that
processor executes. The assembly-code representation is very close to machine code. Its
main feature is that it is in a more readable textual format, as compared to the binary
format of machine code. Being able to understand assembly code and how it relates to the
original C code is a key step in understanding how programs run.

IA32 Registers

An IA32 central processing unit (CPU) contains a set of eight registers storing 32-bit values.
These registers are used to store integer data as well as pointers. Their names all begin
with %e, but otherwise, they have peculiar names. With the original 8086, the registers
were 16 bits and each had a specific purpose. The names were chosen to reflect these
different purposes. With flat addressing, the need for specialized registers is greatly
reduced. For the most part, the first six registers can be considered general-purpose
registers with no restrictions placed on their use. We said for the most part, because
some instructions use fixed registers as sources and/or destinations. In addition, within
procedures there are different conventions for saving and restoring the first three registers
%eax, %ecx, and %edx than for the next three %ebx, %edi, and %esi. The final two

1|Page
www.techveda.org Course companion Kit
Linux kernel & Embedded Developer

registers %ebp and %esp contain pointers to important places in the program stack. They
should only be altered according to the set of standard conventions for stack management.

The low-order 2 bytes of the first four registers can be independently read or written by
the byte operation instructions. This feature was provided in the 8086 to allow backward
compatibility to the 8008 and 8080two 8-bit microprocessors that date back to
1974.When a byte instruction updates one of these single-byte register elements, the
remaining 3 bytes of the register do not change. Similarly, the low-order 16 bits of each
register can be read or written by word operation instructions.

C source to Assembly (IA32)

In this document, we show assembly code in ATT (named after AT&T, the company
that operated Bell Laboratories for many years) format, the default format for gcc, on Linux
Platforms.

Important note:

In our presentation, we will show the code generated by a particular version of gcc . If you
compile code on your own machine, chances are you will be using a different compiler or a
different version of gcc and hence will generate different code. The open source community
supporting gcc keeps changing the code generator, attempting to generate more efficient
code according to changing code guidelines provided by the microprocessor
manufacturers.

Our goal in studying the examples is to demonstrate how to examine assembly code and
map it back to the constructs found in high-level programming languages. You will need to
adapt these techniques to the style of code generated by your particular compiler.

2|Page
www.techveda.org Course companion Kit
Linux kernel & Embedded Developer

Case 1:

int main()
{
int a, b;
short c;
a = 100;
b = 20;
c = a + b;
return 0;
}

Symbol table:

name type composition offset


a int 4 bytes -10(%ebp)
b int 4 bytes -6(%ebp)
c short 2 bytes -2(%ebp)

Assembly:

main:
pushl %ebp
movl %esp, %ebp
subl $10, %esp
movl $100, -10(%ebp)
movl $20, -6(%ebp)
movl -6(%ebp), %eax
addl -10(%ebp), %eax
movw %ax, -2(%ebp)
movl $0, %eax
leave
ret

Case 2:

int main()
{
int a,b;
a = 100;
b = 20;
if(a > b)

3|Page
www.techveda.org Course companion Kit
Linux kernel & Embedded Developer

b++;
return 0;
}

Symbol table:

name type composition offset


a int 4 bytes -8(%ebp)
b int 4 bytes -4(%ebp)

Assembly:

main:
pushl %ebp
movl %esp. %ebp
subl $8, %esp
movl $100, -8(%ebp)
movl $20, -4(%ebp)
movl -8(%ebp), %eax
cmpl -4(%ebp), %eax
jle .L2
addl $1, -4(%ebp)
.L2:
movl $0, %eax
leave
ret

Case 3:

int main()
{
int a = 100, b = 50;
if( b < a)
b++;
else
a++;

return 0;
}

Symbol table:

name type composition offset


a int 4 bytes -8(%ebp)

4|Page
www.techveda.org Course companion Kit
Linux kernel & Embedded Developer

b int 4 bytes -4(%ebp)

Assembly:

main:
pushl %ebp
movl %esp. %ebp
subl $8, %esp
movl $100, -8(%ebp)
movl $50, -4(%ebp)
movl -4(%ebp), %eax
cmpl -8(%ebp), %eax
jge .L2
addl $1, -4(%ebp)
jmp .L3
.L2:
addl $1, -8(%ebp)
.L3:
movl $0, %eax
leave
ret

Case 4:

int main()
{
int a = 300;
switch (a) {
case 100:
a = 300;
break;
case 300:
a = 100;
break;
default:
a++;
break;
}
return 0;
}

Symbol table:

name type composition offset


a int 4 bytes -4(%ebp)

5|Page
www.techveda.org Course companion Kit
Linux kernel & Embedded Developer

Assembly:

main:
pushl %ebp
movl %esp. %ebp
subl $4, %esp
movl $300, -4(%ebp)
movl -4(%ebp), %eax
cmpl $100, %eax
je .CASE1
cmpl $300, %eax
je .CASE2
jmp .CASE3
.CASE1:
movl $300, -4(%ebp)
jmp .END
.CASE2:
movl $100, -4(%ebp)
jmp .END
.CASE3:
addl $1, -4(%ebp)
.END:
movl $0, %eax
leave
ret

Case 5:

int main()
{
int l;
int i = 0;
for(l = 0; l < 3; l++)
i++;
return 0;
}

Symbol table:

name type composition offset

6|Page
www.techveda.org Course companion Kit
Linux kernel & Embedded Developer

l int 4 bytes -8(%ebp)


i int 4 bytes -4(%ebp)

Assembly:

main:
pushl %ebp
movl %esp. %ebp
subl $8, %esp
movl $0, -4(%ebp)
movl $0, -8(%ebp)
.LOOP:
cmpl $2, -8(%ebp)
jge .OUT
addl $1, -4(%ebp)
addl $1, -8(%ebp)
jmp .LOOP
.OUT:
movl $0, %eax
leave
ret

Case 6:

int main()
{
int l = 0;
int i = 0;
while( l < 10)
{
i++;
l++;
}
return 0;
}

Symbol table:

name type composition offset


l int 4 bytes -8(%ebp)
i int 4 bytes -4(%ebp)

7|Page
www.techveda.org Course companion Kit
Linux kernel & Embedded Developer

Assembly:

main:
pushl %ebp
movl %esp. %ebp
subl $8, %esp
movl $0, -8(%ebp)
movl $0, -4(%ebp)
.LOOP:
cmpl $9, -8(%ebp)
jge .OUT
addl $1, -4(%ebp)
addl $1, -8(%ebp)
jmp .LOOP
.OUT:
movl $0, %eax
leave
ret

Case 7:

int main()
{
register int l = 0;
int i = 0;
while ( l < 10)
{
i++;
l++;
}

8|Page
www.techveda.org Course companion Kit
Linux kernel & Embedded Developer

return 0;
}

Symbol table:

name type composition offset


i int 4 bytes -4(%ebp)

Assembly:

main:
pushl %ebp
movl %esp. %ebp
subl $4, %esp
movl $0, %ebx
movl $0, -4(%ebp)
.LOOP:
cmpl $9, %ebx
jge .OUT
addl $1, -4(%ebp)
addl $1, %ebx
jmp .LOOP
.OUT:
movl $0, %eax
leave
ret

Case 8:

int main()
{
int i = 100;
int *p;
p = &i;
*p = 500;
return 0;
}

Symbol table:

9|Page
www.techveda.org Course companion Kit
Linux kernel & Embedded Developer

name type composition offset


i int 4 bytes -8(%ebp)
p int * 4 bytes -4(%ebp)

Assembly:

main:
pushl %ebp
movl %esp. %ebp
subl $8, %esp
movl $100, -8(%ebp)
leal -8(%ebp), %eax
movl %eax, -4(%ebp)
movl -4(%ebp), %eax
movl $500, (%eax)
movl $0, %eax
leave
ret

Case 9:

int main()
{
int a[4] = {10, 20, 30, 40 };
int *p;
register int l = 0;
p = a;
while (l < 5)
{
*p = 100;
*p++;
l++;
}
return 0;
}

Symbol table:

name type composition offset

10 | P a g e
www.techveda.org Course companion Kit
Linux kernel & Embedded Developer

a[0] int 4 bytes -20(%ebp)


a[1] int 4 bytes -16(%ebp)
a[2] int 4 bytes -12(%ebp)
a[3] int 4 bytes -8(%ebp)
p int * 4 bytes -4(%ebp)

Assembly:

main:
pushl %ebp
movl %esp. %ebp
subl $20, %esp
movl $10, -20(%ebp)
movl $20, -16(%ebp)
movl $30, -12(%ebp)
movl $40, -8(%ebp)
movl $0, %ebx
leal -20(%ebp), %eax
movl %eax, -4(%ebp)

.LOOP:
cmpl $4, %ebx
jge .OUT
movl -4(%ebp), %eax
movl $100, (%eax)
addl $4, -4(%ebp)
addl $1, ebx
jmp .LOOP
.OUT:
movl $0, %eax
leave
ret

11 | P a g e
www.techveda.org Course companion Kit

Вам также может понравиться