用php读取elf结构
前提知识
- UNIX系统的可执行文件都采用ELF格式,类型分为目标文件、可执行文件和共享库
- ELF格式探析之三:sections
- The ELF file format
- 本例基于64位的小端序linux机器
- php的unpack函数第一个参数的说明
- v:格式化为unsigned short(小端序)
- V:格式化为unsigned long(小端序)
- P:格式化为unsigned long long(小端序)
生成目标文件和可执行文件
#include <stdio.h>
void say_hello(char *who) {
printf("hello, %s!\n", who);
}
char *my_name = "wb";
int main() {
say_hello(my_name);
return 0;
}
// 生成目标文件hello.o:gcc -c hello.c
// 生成可执行文件hello:gcc -no-pie -o hello hello.c
目标文件elf的结构
- ELF header,位于文件的0~64字节,存储文件的描述信息,Section header table的起始位置
- N个Section,Section的种类有:
- SHT_STRTAB有3种:.shstrtab表示section name;.strtab表示symbol table条目的符号名称;.dynstr表示dynamic symbol table条目的符号名称
- SHT_SYMTAB只有.symtab,标识所有在elf文件内的符号
- SHT_DYNSYM只有.dynsym,标识所有imported或者exported的符号
- Section header table,目标文件需要,每个条目64字节,对应一个Section的信息。没有strip过的可执行文件都含有此信息
- Program header table,可执行文件需要
- 举例分析目标文件hello.o:
- ELF header占用64字节
- N个Section占用6488-64-1472=4952字节
- Section header table占用23*64=1472字节
ELF header、Section header、Program header、Symbols
// ELF header
typedef struct {
unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
Elf64_Half e_type; /* Object file type */
Elf64_Half e_machine; /* Architecture */
Elf64_Word e_version; /* Object file version */
Elf64_Addr e_entry; /* Entry point virtual address */
Elf64_Off e_phoff; /* Program header table file offset */
Elf64_Off e_shoff; /* Section header table file offset */
Elf64_Word e_flags; /* Processor-specific flags */
Elf64_Half e_ehsize; /* ELF header size in bytes */
Elf64_Half e_phentsize; /* Program header table entry size */
Elf64_Half e_phnum; /* Program header table entry count */
Elf64_Half e_shentsize; /* Section header table entry size */
Elf64_Half e_shnum; /* Section header table entry count */
Elf64_Half e_shstrndx; /* Section header string table index */
} Elf64_Ehdr;
// Section header table
typedef struct {
Elf64_Word sh_name; /* Section name (string tbl index) */
Elf64_Word sh_type; /* Section type */
Elf64_Xword sh_flags; /* Section flags */
Elf64_Addr sh_addr; /* Section virtual addr at execution */
Elf64_Off sh_offset; /* Section file offset */
Elf64_Xword sh_size; /* Section size in bytes */
Elf64_Word sh_link; /* Link to another section */
Elf64_Word sh_info; /* Additional section information */
Elf64_Xword sh_addralign; /* Section alignment */
Elf64_Xword sh_entsize; /* Entry size if section holds table */
} Elf64_Shdr;
// Program header table
typedef struct {
Elf64_Word p_type; /* Segment type */
Elf64_Word p_flags; /* Segment flags */
Elf64_Off p_offset; /* Segment file offset */
Elf64_Addr p_vaddr; /* Segment virtual address */
Elf64_Addr p_paddr; /* Segment physical address */
Elf64_Xword p_filesz; /* Segment size in file */
Elf64_Xword p_memsz; /* Segment size in memory */
Elf64_Xword p_align; /* Segment alignment */
} Elf64_Phdr;
// Symbol table
typedef struct {
Elf64_Word st_name; /* Symbol name (string table index) */
unsigned char st_info; /* Symbol type and binding */
unsigned char st_other; /* Symbol visibility */
Elf64_Section st_shndx; /* Section header table index, except ABS/UNDEF/COMMON */
Elf64_Addr st_value; /* Symbol address(目标文件是elf文件的偏移位置;可执行文件是absolute run-time address) */
Elf64_Xword st_size; /* Symbol size(单位是字节) */
} Elf64_Sym;
$ readelf -h hello.o
ELF Header:
Class: ELF64
Data: 2's complement, little endian
OS/ABI: UNIX - System V
Type: REL (Relocatable file)
Machine: Advanced Micro Devices X86-64
Start of program headers: 0 (bytes into file)
Start of section headers: 5016 (bytes into file) //Section header table的起始位置
Size of this header: 64 (bytes) //ELF header的占用大小
Size of program headers: 0 (bytes) //hello.o没有program header table
Number of program headers: 0 //hello.o没有program header table
Size of section headers: 64 (bytes) //Section header table每个条目占用大小
Number of section headers: 23 //Section header table条目个数
Section header string table index: 22 //.shstrtab Section位于Section header table第22个条目
$ readelf -S -W hello.o
Section Headers:
[Nr] Name Type Address Off Size ES Flg Lk Inf Al
[ 0] NULL 0000000000000000 000000 000000 00 0 0 0
[ 1] .text PROGBITS 0000000000000000 000040 000041 00 AX 0 0 1
[ 2] .rela.text RELA 0000000000000000 0002c8 000060 18 I 12 1 8
[ 3] .data PROGBITS 0000000000000000 000081 000000 00 WA 0 0 1
[ 4] .bss NOBITS 0000000000000000 000081 000000 00 WA 0 0 1
[ 5] .rodata PROGBITS 0000000000000000 000081 00000f 00 A 0 0 1
[ 6] .data.rel.local PROGBITS 0000000000000000 000090 000008 00 WA 0 0 8
[ 7] .rela.data.rel.local RELA 0000000000000000 000328 000018 18 I 12 6 8
[ 8] .comment PROGBITS 0000000000000000 000098 00002a 01 MS 0 0 1
[ 9] .note.GNU-stack PROGBITS 0000000000000000 0000c2 000000 00 0 0 1
[10] .eh_frame PROGBITS 0000000000000000 0000c8 000058 00 A 0 0 8
[11] .rela.eh_frame RELA 0000000000000000 000340 000030 18 I 12 10 8
[12] .symtab SYMTAB 0000000000000000 000120 000168 18 13 10 8
[13] .strtab STRTAB 0000000000000000 000288 00003d 00 0 0 1
[14] .shstrtab STRTAB 0000000000000000 000370 000076 00 0 0 1
$ readelf -l -W hello 或者 objdump -p hello
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x0001f8 0x0001f8 R 0x8
INTERP 0x000238 0x0000000000000238 0x0000000000000238 0x00001c 0x00001c R 0x1
[Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x0008a0 0x0008a0 R E 0x200000
LOAD 0x000db8 0x0000000000200db8 0x0000000000200db8 0x000260 0x000268 RW 0x200000
DYNAMIC 0x000dc8 0x0000000000200dc8 0x0000000000200dc8 0x0001f0 0x0001f0 RW 0x8
NOTE 0x000254 0x0000000000000254 0x0000000000000254 0x000044 0x000044 R 0x4
GNU_EH_FRAME 0x000734 0x0000000000000734 0x0000000000000734 0x000044 0x000044 R 0x4
GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0x10
GNU_RELRO 0x000db8 0x0000000000200db8 0x0000000000200db8 0x000248 0x000248 R 0x1
$ readelf --symbols -W hello.o
Symbol table '.symtab' contains 15 entries:
Num: Value Size Type Bind Vis Ndx Name
0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
1: 0000000000000000 0 FILE LOCAL DEFAULT ABS hello.c
2: 0000000000000000 0 SECTION LOCAL DEFAULT 1
3: 0000000000000000 0 SECTION LOCAL DEFAULT 3
4: 0000000000000000 0 SECTION LOCAL DEFAULT 4
5: 0000000000000000 0 SECTION LOCAL DEFAULT 5
6: 0000000000000000 0 SECTION LOCAL DEFAULT 6
7: 0000000000000000 0 SECTION LOCAL DEFAULT 9
8: 0000000000000000 0 SECTION LOCAL DEFAULT 10
9: 0000000000000000 0 SECTION LOCAL DEFAULT 8
10: 0000000000000000 39 FUNC GLOBAL DEFAULT 1 say_hello
11: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND _GLOBAL_OFFSET_TABLE_
12: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND printf
13: 0000000000000000 8 OBJECT GLOBAL DEFAULT 6 my_name
14: 0000000000000027 26 FUNC GLOBAL DEFAULT 1 main
用php读取elf结构
<?php
$fp = fopen("hello.o", "rb");
fseek($fp, 32, SEEK_SET);
$ph_off = uint($fp, 8);
print_ln("program header offset in file: " . $ph_off);
$sh_off = uint($fp, 8);
print_ln("section header offset in file: " . $sh_off);
fseek($fp, 6, SEEK_CUR);
$ph_ent_size = uint($fp, 2);
print_ln("program header entry size: " . $ph_ent_size);
$ph_num = uint($fp, 2);
print_ln("program header number: " . $ph_num);
$sh_ent_size = uint($fp, 2);
print_ln("section header entry size: " . $sh_ent_size);
$sh_num = uint($fp, 2);
print_ln("section header number: " . $sh_num);
$sh_strtab_index = uint($fp, 2);
print_ln("section header string table index: " . $sh_strtab_index);
// 读.shstrtab Section的offset和size
fseek($fp, $sh_off + $sh_strtab_index * $sh_ent_size, SEEK_SET);
fseek($fp, 24, SEEK_CUR); //sh_name(4) + sh_type(4) + sh_flags(8) + sh_addr(8) = 24
$str_table_off = uint($fp, 8);
print_ln("section name string table offset: " . $str_table_off);
$str_table_size = uint($fp, 8);
print_ln("section name string table size: " . $str_table_size);
// 读取.shstrtab
fseek($fp, $str_table_off, SEEK_SET);
$shstrtab = fread($fp, $str_table_size);
// 读取所有Section条目信息
$sh_table = [];
for ($i = 0; $i < $sh_num; $i++) {
fseek($fp, $sh_off + $i * $sh_ent_size, SEEK_SET);
$sh_name = uint($fp, 4);
fseek($fp, 20, SEEK_CUR); //sh_type(4) + sh_flags(8) + sh_addr(8) = 20
$sh_offset = uint($fp, 8);
$sh_size = uint($fp, 8);
$name = get_string_name($shstrtab, $sh_name);
$sh_table[$name] = [$sh_offset, $sh_size];
printf("section: %2s name: %-24s offset: %12s size: %12s\n", $i, $name, $sh_offset, $sh_size);
}
// 读取所有program条目信息
for ($i = 0; $i < $ph_num; $i++) {
fseek($fp, $ph_off + $i * $ph_ent_size, SEEK_SET);
fseek($fp, 16, SEEK_CUR);
$vAddr = uint($fp, 8);
$pAddr = uint($fp, 8);
$fileSize = uint($fp, 8);
$memSize = uint($fp, 8);
$align = uint($fp, 8);
printf("program: %2s vAddr: %12s pAddr: %12s fileSize: %12s memSize: %12s align: %12s\n",
$i, $vAddr, $pAddr, $fileSize, $memSize, $align);
}
// 读取.strtab
fseek($fp, $sh_table['.strtab'][0], SEEK_SET);
$strtab = fread($fp, $sh_table['.strtab'][1]);
// 读取.symtab所有条目
for ($i = 0; $i < $sh_table['.symtab'][1] / 24; $i++) {
fseek($fp, $sh_table['.symtab'][0] + $i * 24, SEEK_SET);
$sh_name = uint($fp, 4);
fseek($fp, 2, SEEK_CUR);
$ndx = uint($fp, 2);
$value = uint($fp, 8);
$size = uint($fp, 8);
$ndxArr = [0 => 'UND', 65521 => 'ABS', 65522 => 'COM'];
$ndx = isset($ndxArr[$ndx]) ? $ndxArr[$ndx] : $ndx;
$name = get_string_name($strtab, $sh_name);
printf("symtab: %2s value: %12s size: %12s ndx: %3s name: %-24s\n", $i, $value, $size, $ndx, $name);
}
// 读取.dynstr
$dynstr = "";
if (isset($sh_table['.dynstr'])) {
fseek($fp, $sh_table['.dynstr'][0], SEEK_SET);
$dynstr = fread($fp, $sh_table['.dynstr'][1]);
}
// 读取.dynsym所有条目
if (isset($sh_table['.dynsym'])) for ($i = 0; $i < $sh_table['.dynsym'][1] / 24; $i++) {
fseek($fp, $sh_table['.dynsym'][0] + $i * 24, SEEK_SET);
$sh_name = uint($fp, 4);
fseek($fp, 2, SEEK_CUR);
$ndx = uint($fp, 2);
$value = uint($fp, 8);
$size = uint($fp, 8);
$ndxArr = [0 => 'UND', 65521 => 'ABS', 65522 => 'COM'];
$ndx = isset($ndxArr[$ndx]) ? $ndxArr[$ndx] : $ndx;
$name = get_string_name($dynstr, $sh_name);
printf("dynsym: %2s value: %12s size: %12s ndx: %3s name: %-24s\n", $i, $value, $size, $ndx, $name);
}
function uint($fp, $size)
{
$sizeFlags = [2 => 'v', 4 => 'V', 8 => 'P'];
$value = fread($fp, $size);
$value = unpack($sizeFlags[$size], $value);
return $value[1];
}
function get_string_name($str, $start)
{
$name = substr($str, $start);
return strstr($name, "\x00", true);
}
function print_ln($msg)
{
print_r($msg . "\n");
}
本作品采用《CC 协议》,转载必须注明作者和本文链接