# ------------------------------------------------- # byter.bytes -- a text-to-binary converter # ------------------------------------------------- # ------------------------------------------------- # ELF header # ------------------------------------------------- # magic number "ELF" 7f 45 4c 46 # 64-bit architecture 02 # little-endian 01 # ELF version, the one and only 01 # no ELF extensions 00 # reserved 00 00 00 00 00 00 00 00 # executable file 02 00 # x86-64 architecture 3e 00 # file version, the one and only 01 00 00 00 # address of first instruction to execute b0 00 40 00 00 00 00 00 # file offset of the program header table 40 00 00 00 00 00 00 00 # file offset of the section header table (absent) 00 00 00 00 00 00 00 00 # architecture-specific flags 00 00 00 00 # size of this ELF header 40 00 # size of a program header 38 00 # count of program headers 02 00 # size of a section header 40 00 # count of section headers 00 00 # index of the section name section header (absent) 00 00 # ------------------------------------------------- # 1st program header (code) # ------------------------------------------------- # loadable segment 01 00 00 00 # permissions: read+execute 05 00 00 00 # file offset 00 00 00 00 00 00 00 00 # memory address 00 00 40 00 00 00 00 00 # reserved 00 00 00 00 00 00 00 00 # size in file 5a 01 00 00 00 00 00 00 # size in memory 5a 01 00 00 00 00 00 00 # 4KB alignment 00 10 00 00 00 00 00 00 # ------------------------------------------------- # 2nd program header (read/write buffer) # ------------------------------------------------- # loadable segment 01 00 00 00 # permissions: read+write 06 00 00 00 # file offset 00 00 00 00 00 00 00 00 # memory address 00 00 41 00 00 00 00 00 # reserved 00 00 00 00 00 00 00 00 # size in file 00 00 00 00 00 00 00 00 # size in memory 00 10 00 00 00 00 00 00 # 4KB alignment 00 10 00 00 00 00 00 00 # ------------------------------------------------- # code # ------------------------------------------------- # lea rbp, [rip-06] (start of code, end of headers) 8d 2d fa ff ff ff # mov bl, 01 (state: start of line) b3 01 ## read input # xor edi, edi (stdin) 33 ff # mov esi, [rbp-28] (start of buffer) 8b 75 d8 # mov edx, [rbp-10] (size of buffer) 8b 55 f0 # do: mov eax, edi (read syscall) 8b c7 # syscall 0f 05 # cmp eax, -4 (eintr) 3d fc ff ff ff # je [rip-0b] (retry) 74 f5 # test eax, eax 85 c0 # js [rip+46] (panic) 78 46 # jz [rip+49] (exit) 74 49 # mov ecx, eax (size of input) 8b c8 # mov edi, esi (start of buffer) 8b fe ## finite state machine # lodsb ac # cmp al, 0a (newline) 3c 0a # mov dl, 13 (states which accept newline) b2 13 # mov dh, 01 (state: start of line) b6 01 # je [rip+43] (check transition) 74 43 # test bl, 02 (state: inside comment) f6 c3 02 # jnz [rip+44] (next iteration) 75 44 # cmp al, 20 (space) 3c 20 # mov dl, 15 (states which accept space) b2 15 # mov dh, 04 (state: waiting for number) b6 04 # je [rip+36] (check transition) 74 36 # cmp al, 23 ('#') 3c 23 # mov dl, 01 (states which accept '#') b2 01 # mov dh, 02 (state: inside comment) b6 02 # je [rip+2e] (check transition) 74 2e # cmp al, 30 ('0') 3c 30 # jb [rip+1e] (panic) 72 1e # cmp al, 39 ('9') 3c 39 # jbe [rip+0c] (found digit) 76 0c # or al, 20 (smash case) 0c 20 # cmp al, 61 ('a') 3c 61 # jb [rip+14] (panic) 72 14 # cmp al, 66 ('f') 3c 66 # ja [rip+10] (panic) 77 10 # add al, 09 04 09 ## found digit # test bl, 05 (states which accept a first digit) f6 c3 05 # jz [rip+40] (second digit) 74 40 # mov bl, 08 (state: middle of number) b3 08 # shl al, 4 c0 e0 04 # mov bh, al 8a f8 # jmp [rip+12] (next iteration) eb 12 ## panic # mov edi, 1 (nonzero exit status) bf 01 00 00 00 ## exit # mov eax, 3c (exit syscall) b8 3c 00 00 00 # syscall 0f 05 ## check transition # test bl, dl (acceptable states) 84 da # jz [rip-10] (panic) 74 f0 # mov bl, dh 8a de ## next iteration # loop [rip-54] (finite state machine) e2 ac ## write output # mov edx, edi (end of output buffer) 8b d7 # mov edi, 1 (stdout) bf 01 00 00 00 # mov esi, [rbp-28] (start of buffer) 8b 75 d8 # sub edx, esi (get size of output buffer) 2b d6 # jz [rip-7f] (read input) 74 81 # do: mov eax, edi (write syscall) 8b c7 # syscall 0f 05 # cmp eax, -4 (eintr) 3d fc ff ff ff # je [rip-0b] (retry) 74 f5 # test eax, eax 85 c0 # js [rip-31] (panic) 78 cf # add esi, eax 03 f0 # sub edx, eax 2b d0 # jmp [rip-17] (maybe write more) eb e9 ## second digit # test bl, 08 (states which accept a second digit) f6 c3 08 # jz [rip-3c] (panic) 74 c4 # mov bl, 10 (state: end of number) b3 10 # and al, 0f 24 0f # or al, bh 0a c7 # stosb aa # jmp [rip-33] (next iteration) eb cd