Linux Nasm Detect Eof

Linux NASM detect EOF

You are comparing the address of the buffer to EOF (-1) instead of the character stored in the buffer.

Having said that, the read system call does not return the value of EOF when end of file is reached, but it returns zero and doesn't stick anything in the buffer (see man 2 read). To identify end of file, just check the value of eax after the call to read:

section .bss
buf: resb 1

section .text
global _start

_exit:
mov eax, 1 ; exit
mov ebx, 0 ; exit status
int 80h

_start:
mov eax, 3 ; sys_read
mov ebx, 0 ; stdin
mov ecx, buf ; buffer
mov edx, 1 ; read byte count
int 80h

cmp eax, 0
je _exit

mov eax, 4 ; sys_write
mov ebx, 1 ; stdout
mov ecx, buf ; buffer
mov edx, 1 ; write byte count
int 80h

jmp _start

If you did want to properly compare the character to some value, use:

cmp byte [buf], VALUE

Also, I renamed char to buf. char is a basic C data type and a bad choice for a variable name.

EOF in assembly using fscanf

Why are you not checking the return value of fscanf??? It returns EOF when the end of file is reached. EOF is generally -1, so (Since you didn't tag your Assembler, I will use NASM format for the example.)....

.Read:  
push ebx
push fmtstr
push esi
call fscanf
add esp, 4 * 3
test eax, eax
js .NoMore

push ebx
push fmtstr
push ebp
call fprintf
add esp, 4 * 3
jmp .Read
.NoMore:

...

extern fopen, fscanf, fprintf, fclose, fread
extern fseek, fwrite, ftell, rewind, malloc
extern free, exit, stdout

%define SEEK_END 2

global main

section .data
read_mode db "r", 0
write_mode db "w", 0
textFile_name db "in.txt",0
dictFile_name db "out.txt", 0
fmtstr db "%s",10 , 0

section .text
main:

push read_mode
push textFile_name
call fopen
add esp, 4 * 2
mov esi, eax

push SEEK_END
push 0
push eax
call fseek
add esp, 4 * 3

push esi
call ftell
add esp, 4 * 1
mov edi, eax

push eax
call malloc
add esp, 4 * 1
mov ebx, eax

push esi
call rewind
add esp, 4 * 1

push write_mode
push dictFile_name
call fopen
add esp, 4 * 2
mov ebp, eax

.Read:
push ebx
push fmtstr
push esi
call fscanf
add esp, 4 * 3
test eax, eax
js .NoMore

push ebx
push fmtstr
push ebp
call fprintf
add esp, 4 * 3
jmp .Read

.NoMore:
push esi
call fclose
add esp, 4 * 1

push ebp
call fclose
add esp, 4 * 1

push ebx
call free
add esp, 4 * 1

push 0
call exit
add esp, 4 * 1

Wouldn't it be easier to just use fread and fwrite?

section .text
main:

push read_mode
push textFile_name
call fopen
add esp, 4 * 2
mov esi, eax

push SEEK_END
push 0
push eax
call fseek
add esp, 4 * 3

push esi
call ftell
add esp, 4 * 1
mov edi, eax

push eax
call malloc
add esp, 4 * 1
mov ebx, eax

push esi
call rewind
add esp, 4 * 1

push esi
push edi
push 1
push ebx
call fread
add esp, 4 * 4

push esi
call fclose
add esp, 4 * 1

push write_mode
push dictFile_name
call fopen
add esp, 4 * 2
mov esi, eax

push eax
push edi
push 1
push ebx
call fwrite
add esp, 4 * 4

push esi
call fclose
add esp, 4 * 1

push ebx
call free
add esp, 4 * 1

push 0
call exit
add esp, 4 * 1

Error checking is left out on purpose, you of course should/would check the return values of those calls...

Nasm Linux x64-86 | Add bits at the end of file for correct base 64 encoding

In my previous answer.. that code was supposed to eat 3 bytes always, padded by zeroes, and to fix/patch the result afterwards!

I.e. for single input byte 0x44 the Bytes needs to be set to 44 00 00 (the first 44 is set by sys_read, other two need to be cleared by code). You will get the wrong conversion result RAAA, and then you need to patch it to the correct RA==.

I.e.

SECTION .bss
BYTESLEN equ 3 ; 3 bytes of real buffer are needed
Bytes: resb BYTESLEN + 5; real buffer +5 padding (total 8B)
B64output: resb 4+4 ; 4 bytes are real output buffer
; +4 bytes are padding (total 8B)

SECTION .text

;...
Read:
mov eax,3 ; Specify sys_read call
xor ebx,ebx ; Specify File Descriptor 0: Standard Input
mov ecx,Bytes ; Pass offset of the buffer to read to
mov edx,BYTESLEN ; Pass number of bytes to read at one pass
int 80h ; Call sys_read to fill the buffer
test eax,eax
jl ReadingError ; OS has problem, system "errno" is set
mov ebp,eax ; Save # of bytes read from file for later
jz Done ; 0 bytes read, no more input
; eax = 1, 2, 3
mov [ecx + eax],ebx ; clear padding bytes
; ^^ this is a bit nasty EBX reuse, works only for STDIN (0)
; for any file handle use fixed zero: mov word [ecx+eax],0
call ConvertBytesToB64Output ; convert to Base64 output
; overwrite last two/one/none characters based on how many input
; bytes were read (B64output+3+1 = B64output+4 => beyond 4 chars)
mov word [B64output + ebp + 1], '=='
;TODO store B64output where you wish
cmp ebp,3
je Read ; if 3 bytes were read, loop again
; 1 or 2 bytes will continue with "Done:"
Done:
; ...

ReadingError:
; ...

ConvertBytesToB64Output:
; ...
ret

Written again to be short and simple, not caring about performance much.

The trick to make instructions simple is to have enough padding at end of buffers, so you don't need to worry about overwriting memory beyond buffers, then you can write two '==' after each output, and just position it at desired place (either overwriting two last characters, or one last character, or writing it completely outside of output, into the padding area).

Without that probably lot of if (length == 1/2/3) {...} else {...} would creep into code, to guard the memory writes, and overwrite only the output buffer and nothing more.

So make sure you understand what I did and how it works, and add enough padding to your own buffers.

Also... !disclaimer!: I actually don't know how many = should be at end of base64 output, and when... that's up to OP to study the base64 definition. I'm just showing how to fix wrong output of 3B->4B conversion, which takes shorter input padded by zeroes. Hmm, according to online BASE64 generator it actually works as my code... (input%3) => 0 has no =, 1 has two =, and 2 has single =.

Reading from non-blocking stdin in NASM

Linux system calls return -errno values on error. read is returning -EAGAIN as documented for non-blocking I/O on a device that has no bytes "ready", thus EAX = -11 which has 4 non-zero bytes in its bit-pattern.

You're not actually printing 4 bytes, you're passing a huge value to write. It writes until the end of the page. Instead of returning -EFAULT, it returns a count of how many bytes it actually copied from the buffer to the file descriptor, even though it stopped because it hit an unmapped page.

Writing output to the terminal makes that less visible, unless you look at the strace output. ASCII NUL (\0, a zero byte) prints as zero-width on a standard VT100-style terminal, but in other contexts that's very much not the same as not writing anything. Run

./nonblocking | hexdump -C --no-squeezing to see the 4kB of zero bytes you write to stdout.

BTW, there's no point in storing EAX to memory just to reload it again. Just mov edx, eax.


You only get EOF on a TTY when the user types control-D. (Assuming "cooked" mode and default stty settings).

Non-blocking doesn't turn no-data-ready into EOF; that would make it impossible to distinguish actual end of file! (Non-blocking I/O on a regular file will give you EOF at the end of the file, or -EAGAIN if the file hasn't been fetched from disk yet so you'd have to block on I/O.)

The situations where read returns 0 (meaning EOF) are the same for blocking and non-blocking read.

If blocking read would sit there waiting for the user to type something (and "submit" it in line-buffered cooked mode with return or control-D), non-blocking read will return -EAGAIN.

From the Linux read(2) man page:

EAGAIN
The file descriptor fd refers to a file other than a socket
and has been marked nonblocking (O_NONBLOCK), and the read
would block. See open(2) for further details on the
O_NONBLOCK flag.

Finding the number of bytes of entered string at runtime

You actually want movzx eax, byte [myvariable+(number_of_digits-1)] to only load 1 byte, not a dword. Or just directly test memory with test byte [...], 1. You can skip the sub because '0' is an even number; subtracting to convert from ASCII code to integer digit doesn't change the low bit.

But yes, you need least significant digit, the last (highest address) in printing / reading order.

A read system call returns the number of bytes read in EAX. (Or negative error code). This will include a newline if the user hit return, but not if the user redirected from a file that didn't end with a newline. (Or if they submitted input on a terminal using control-d after typing some digits). The most simple and robust way would be to simply loop looking for the first non-digit in the buffer.

But the "clever" / fun way would be to check if [mybuffer + eax - 1] is a digit, and if so use it. Otherwise check the previous byte. (Or just assume there's a newline and always check [mybuffer + eax - 2], the 2nd-last byte of what was read. (Or off the start of the buffer if the user just pressed return.)

(To efficiently check for an ASCII digit; sub al, '0' / cmp al, 9 / ja non_digit. See double condition checking in assembly / What is the idea behind ^= 32, that converts lowercase letters to upper and vice versa?)


Just for fun, here's a more compact version that always just checks the 2nd-last byte of the read() input. (It doesn't check for being a digit, and it reads outside the buffer for input lengths of 0 or 1, e.g. pressing control-D or return.) Also for read errors, e.g. redirect with strace ./oddeven <&- to close its stdin.

Note the interesting part:

  ; check if the low digit is even or odd
mov ecx, msg_even
mov edx, msg_odd ; these don't set flags and actually could be done after TEST
test byte [mybuf + eax - 2], 1 ; check the low bit of 2nd-last byte of the read input
cmovnz ecx, edx

;Display selected message
mov eax, 4 ; sys_write
mov ebx, 1 ; file descriptor: stdout
mov edx, msg_odd.len
int 80h ; write(1, digit&1 ? msg_odd : msg_even, msg_odd.len)

I used cmov, but a simple branch over a mov ecx, msg_odd would work. You don't need to duplicate the whole setup for the system call, just run it with the right pointer and length. (ECX and EDX values, and I padded the odd message with a space so I could use the same length for both.)

And this is a homebrewed static_assert(msg_odd.len == msg_even.len), using NASM's conditional directives (https://nasm.us/doc/nasmdoc4.html). It's not just a separate preprocessor like C has, it can use NASM numeric equ expressions.

%if msg_odd.len != msg_even.len
; homebrew assert with NASM preprocessor, since I chose to skip doing a 2nd cmov for the length
%warn we assume both messages have the same length
%endif

The full thing. I outside of the part shown above, I just tweaked comments to sometimes simplify when I thought it was too redundant, and used meaningful label names.

Also, I put .rodata and .bss at the top because NASM complained about referencing msg_odd.len before it was defined. (You previously had your strings in .data, but read-only data should generally go in .rodata, so the OS can share those pages between runs of the same program because they stay clean.)

Other fixes:

  • Linux/Unix uses 0xa line endings, \n not \n\r.
  • stdin is fd 0. 2 is stderr. (2 happens to work because terminal emulators normally run the shell with all 3 file descriptors referring to the same read+write open file description for the tty).
; Ask the user to enter a number from the keyboard
; Check if this number is odd or even and display a message to say this

section .rodata
msg_prompt db "Please enter a number: ", 0xA
.len equ $- msg_prompt

msg_odd db "The entered number is odd ", 0xA ; padded with a space for same length as even
.len equ $- msg_odd

msg_even db "The entered number is even", 0xA
.len equ $- msg_even

section .bss
mybuf resb 128
.len equ $ - mybuf

section .text
global _start
_start: ; ld defaults to starting at the top of the .text section, but exporting a symbol silences the warning and can make GDB work more easily.

; Display prompt
mov eax, 4 ; sys_write
mov ebx, 1 ; file descriptor: stdout
mov ecx, msg_prompt
mov edx, msg_prompt.len
int 80h ; perform system call

mov eax, 3 ; sys_read
xor ebx, ebx ; file descriptor: stdin
mov ecx, mybuf
mov edx, mybuf.len
int 80h ; read(0, mybuf, len)

; return value in EAX: negative for error, 0 for EOF, or positive byte count
; for this toy program, lets assume valid input ending with digit\n

; the newline will be at [mybuf + eax - 1]. The digit before that, at [mybuf + eax - 2].
; If the user just presses return, we'll access before the end of mybuf, and may segfault if it's at the start of a page.

; check if the low digit is even or odd
mov ecx, msg_even
mov edx, msg_odd ; these don't set flags and actually could be done after TEST
test byte [mybuf + eax - 2], 1 ; check the low bit of 2nd-last byte of the read input
cmovnz ecx, edx

;Display selected message
mov eax, 4 ; sys_write
mov ebx, 1 ; file descriptor: stdout
mov edx, msg_odd.len
int 80h ; write(1, digit&1 ? msg_odd : msg_even, msg_odd.len)

%if msg_odd.len != msg_even.len
; homebrew assert with NASM preprocessor, since I chose to skip doing a 2nd cmov for the length
%warning we assume both messages have the same length
%endif

mov eax, 1 ;system call number (sys_exit)
xor ebx, ebx
int 0x80 ; _exit(0)

assemble + link with nasm -felf32 oddeven.asm && ld -melf_i386 -o oddeven oddeven.o



Related Topics



Leave a reply



Submit