Linux NASM detect EOF
You are comparing the address of the buffer to EOF (-1) instead of the character stored in the buffer.
Having said that, the read
system call does not return the value of EOF when end of file is reached, but it returns zero and doesn't stick anything in the buffer (see man 2 read
). To identify end of file, just check the value of eax
after the call to read
:
section .bss
buf: resb 1
section .text
global _start
_exit:
mov eax, 1 ; exit
mov ebx, 0 ; exit status
int 80h
_start:
mov eax, 3 ; sys_read
mov ebx, 0 ; stdin
mov ecx, buf ; buffer
mov edx, 1 ; read byte count
int 80h
cmp eax, 0
je _exit
mov eax, 4 ; sys_write
mov ebx, 1 ; stdout
mov ecx, buf ; buffer
mov edx, 1 ; write byte count
int 80h
jmp _start
If you did want to properly compare the character to some value, use:
cmp byte [buf], VALUE
Also, I renamed char
to buf
. char
is a basic C data type and a bad choice for a variable name.
EOF in assembly using fscanf
Why are you not checking the return value of fscanf
??? It returns EOF
when the end of file is reached. EOF
is generally -1, so (Since you didn't tag your Assembler, I will use NASM format for the example.)....
.Read:
push ebx
push fmtstr
push esi
call fscanf
add esp, 4 * 3
test eax, eax
js .NoMore
push ebx
push fmtstr
push ebp
call fprintf
add esp, 4 * 3
jmp .Read
.NoMore:
...
extern fopen, fscanf, fprintf, fclose, fread
extern fseek, fwrite, ftell, rewind, malloc
extern free, exit, stdout
%define SEEK_END 2
global main
section .data
read_mode db "r", 0
write_mode db "w", 0
textFile_name db "in.txt",0
dictFile_name db "out.txt", 0
fmtstr db "%s",10 , 0
section .text
main:
push read_mode
push textFile_name
call fopen
add esp, 4 * 2
mov esi, eax
push SEEK_END
push 0
push eax
call fseek
add esp, 4 * 3
push esi
call ftell
add esp, 4 * 1
mov edi, eax
push eax
call malloc
add esp, 4 * 1
mov ebx, eax
push esi
call rewind
add esp, 4 * 1
push write_mode
push dictFile_name
call fopen
add esp, 4 * 2
mov ebp, eax
.Read:
push ebx
push fmtstr
push esi
call fscanf
add esp, 4 * 3
test eax, eax
js .NoMore
push ebx
push fmtstr
push ebp
call fprintf
add esp, 4 * 3
jmp .Read
.NoMore:
push esi
call fclose
add esp, 4 * 1
push ebp
call fclose
add esp, 4 * 1
push ebx
call free
add esp, 4 * 1
push 0
call exit
add esp, 4 * 1
Wouldn't it be easier to just use fread
and fwrite
?
section .text
main:
push read_mode
push textFile_name
call fopen
add esp, 4 * 2
mov esi, eax
push SEEK_END
push 0
push eax
call fseek
add esp, 4 * 3
push esi
call ftell
add esp, 4 * 1
mov edi, eax
push eax
call malloc
add esp, 4 * 1
mov ebx, eax
push esi
call rewind
add esp, 4 * 1
push esi
push edi
push 1
push ebx
call fread
add esp, 4 * 4
push esi
call fclose
add esp, 4 * 1
push write_mode
push dictFile_name
call fopen
add esp, 4 * 2
mov esi, eax
push eax
push edi
push 1
push ebx
call fwrite
add esp, 4 * 4
push esi
call fclose
add esp, 4 * 1
push ebx
call free
add esp, 4 * 1
push 0
call exit
add esp, 4 * 1
Error checking is left out on purpose, you of course should/would check the return values of those calls...
Nasm Linux x64-86 | Add bits at the end of file for correct base 64 encoding
In my previous answer.. that code was supposed to eat 3 bytes always, padded by zeroes, and to fix/patch the result afterwards!
I.e. for single input byte 0x44
the Bytes
needs to be set to 44 00 00
(the first 44
is set by sys_read
, other two need to be cleared by code). You will get the wrong conversion result RAAA
, and then you need to patch it to the correct RA==
.
I.e.
SECTION .bss
BYTESLEN equ 3 ; 3 bytes of real buffer are needed
Bytes: resb BYTESLEN + 5; real buffer +5 padding (total 8B)
B64output: resb 4+4 ; 4 bytes are real output buffer
; +4 bytes are padding (total 8B)
SECTION .text
;...
Read:
mov eax,3 ; Specify sys_read call
xor ebx,ebx ; Specify File Descriptor 0: Standard Input
mov ecx,Bytes ; Pass offset of the buffer to read to
mov edx,BYTESLEN ; Pass number of bytes to read at one pass
int 80h ; Call sys_read to fill the buffer
test eax,eax
jl ReadingError ; OS has problem, system "errno" is set
mov ebp,eax ; Save # of bytes read from file for later
jz Done ; 0 bytes read, no more input
; eax = 1, 2, 3
mov [ecx + eax],ebx ; clear padding bytes
; ^^ this is a bit nasty EBX reuse, works only for STDIN (0)
; for any file handle use fixed zero: mov word [ecx+eax],0
call ConvertBytesToB64Output ; convert to Base64 output
; overwrite last two/one/none characters based on how many input
; bytes were read (B64output+3+1 = B64output+4 => beyond 4 chars)
mov word [B64output + ebp + 1], '=='
;TODO store B64output where you wish
cmp ebp,3
je Read ; if 3 bytes were read, loop again
; 1 or 2 bytes will continue with "Done:"
Done:
; ...
ReadingError:
; ...
ConvertBytesToB64Output:
; ...
ret
Written again to be short and simple, not caring about performance much.
The trick to make instructions simple is to have enough padding at end of buffers, so you don't need to worry about overwriting memory beyond buffers, then you can write two '=='
after each output, and just position it at desired place (either overwriting two last characters, or one last character, or writing it completely outside of output, into the padding area).
Without that probably lot of if (length == 1/2/3) {...} else {...}
would creep into code, to guard the memory writes, and overwrite only the output buffer and nothing more.
So make sure you understand what I did and how it works, and add enough padding to your own buffers.
Also... !disclaimer!: I actually don't know how many =
should be at end of base64 output, and when... that's up to OP to study the base64 definition. I'm just showing how to fix wrong output of 3B->4B conversion, which takes shorter input padded by zeroes. Hmm, according to online BASE64 generator it actually works as my code... (input%3) => 0 has no =
, 1 has two =
, and 2 has single =
.
Reading from non-blocking stdin in NASM
Linux system calls return -errno
values on error. read
is returning -EAGAIN
as documented for non-blocking I/O on a device that has no bytes "ready", thus EAX = -11
which has 4 non-zero bytes in its bit-pattern.
You're not actually printing 4 bytes, you're passing a huge value to write
. It writes until the end of the page. Instead of returning -EFAULT
, it returns a count of how many bytes it actually copied from the buffer to the file descriptor, even though it stopped because it hit an unmapped page.
Writing output to the terminal makes that less visible, unless you look at the strace output. ASCII NUL (\0
, a zero byte) prints as zero-width on a standard VT100-style terminal, but in other contexts that's very much not the same as not writing anything. Run./nonblocking | hexdump -C --no-squeezing
to see the 4kB of zero bytes you write to stdout.
BTW, there's no point in storing EAX to memory just to reload it again. Just mov edx, eax
.
You only get EOF on a TTY when the user types control-D. (Assuming "cooked" mode and default stty
settings).
Non-blocking doesn't turn no-data-ready into EOF; that would make it impossible to distinguish actual end of file! (Non-blocking I/O on a regular file will give you EOF at the end of the file, or -EAGAIN
if the file hasn't been fetched from disk yet so you'd have to block on I/O.)
The situations where read
returns 0
(meaning EOF) are the same for blocking and non-blocking read
.
If blocking read
would sit there waiting for the user to type something (and "submit" it in line-buffered cooked mode with return or control-D), non-blocking read
will return -EAGAIN
.
From the Linux read(2)
man page:
EAGAIN
The file descriptor fd refers to a file other than a socket
and has been marked nonblocking (O_NONBLOCK), and the read
would block. See open(2) for further details on the
O_NONBLOCK flag.
Finding the number of bytes of entered string at runtime
You actually want movzx eax, byte [myvariable+(number_of_digits-1)]
to only load 1 byte, not a dword. Or just directly test memory with test byte [...], 1
. You can skip the sub because '0'
is an even number; subtracting to convert from ASCII code to integer digit doesn't change the low bit.
But yes, you need least significant digit, the last (highest address) in printing / reading order.
A read
system call returns the number of bytes read in EAX. (Or negative error code). This will include a newline if the user hit return, but not if the user redirected from a file that didn't end with a newline. (Or if they submitted input on a terminal using control-d after typing some digits). The most simple and robust way would be to simply loop looking for the first non-digit in the buffer.
But the "clever" / fun way would be to check if [mybuffer + eax - 1]
is a digit, and if so use it. Otherwise check the previous byte. (Or just assume there's a newline and always check [mybuffer + eax - 2]
, the 2nd-last byte of what was read. (Or off the start of the buffer if the user just pressed return.)
(To efficiently check for an ASCII digit; sub al, '0'
/ cmp al, 9
/ ja non_digit
. See double condition checking in assembly / What is the idea behind ^= 32, that converts lowercase letters to upper and vice versa?)
Just for fun, here's a more compact version that always just checks the 2nd-last byte of the read()
input. (It doesn't check for being a digit, and it reads outside the buffer for input lengths of 0 or 1, e.g. pressing control-D or return.) Also for read errors, e.g. redirect with strace ./oddeven <&-
to close its stdin.
Note the interesting part:
; check if the low digit is even or odd
mov ecx, msg_even
mov edx, msg_odd ; these don't set flags and actually could be done after TEST
test byte [mybuf + eax - 2], 1 ; check the low bit of 2nd-last byte of the read input
cmovnz ecx, edx
;Display selected message
mov eax, 4 ; sys_write
mov ebx, 1 ; file descriptor: stdout
mov edx, msg_odd.len
int 80h ; write(1, digit&1 ? msg_odd : msg_even, msg_odd.len)
I used cmov
, but a simple branch over a mov ecx, msg_odd
would work. You don't need to duplicate the whole setup for the system call, just run it with the right pointer and length. (ECX and EDX values, and I padded the odd message with a space so I could use the same length for both.)
And this is a homebrewed static_assert(msg_odd.len == msg_even.len)
, using NASM's conditional directives (https://nasm.us/doc/nasmdoc4.html). It's not just a separate preprocessor like C has, it can use NASM numeric equ expressions.
%if msg_odd.len != msg_even.len
; homebrew assert with NASM preprocessor, since I chose to skip doing a 2nd cmov for the length
%warn we assume both messages have the same length
%endif
The full thing. I outside of the part shown above, I just tweaked comments to sometimes simplify when I thought it was too redundant, and used meaningful label names.
Also, I put .rodata
and .bss
at the top because NASM complained about referencing msg_odd.len
before it was defined. (You previously had your strings in .data
, but read-only data should generally go in .rodata, so the OS can share those pages between runs of the same program because they stay clean.)
Other fixes:
- Linux/Unix uses
0xa
line endings,\n
not\n\r
. - stdin is fd 0. 2 is stderr. (2 happens to work because terminal emulators normally run the shell with all 3 file descriptors referring to the same read+write open file description for the tty).
; Ask the user to enter a number from the keyboard
; Check if this number is odd or even and display a message to say this
section .rodata
msg_prompt db "Please enter a number: ", 0xA
.len equ $- msg_prompt
msg_odd db "The entered number is odd ", 0xA ; padded with a space for same length as even
.len equ $- msg_odd
msg_even db "The entered number is even", 0xA
.len equ $- msg_even
section .bss
mybuf resb 128
.len equ $ - mybuf
section .text
global _start
_start: ; ld defaults to starting at the top of the .text section, but exporting a symbol silences the warning and can make GDB work more easily.
; Display prompt
mov eax, 4 ; sys_write
mov ebx, 1 ; file descriptor: stdout
mov ecx, msg_prompt
mov edx, msg_prompt.len
int 80h ; perform system call
mov eax, 3 ; sys_read
xor ebx, ebx ; file descriptor: stdin
mov ecx, mybuf
mov edx, mybuf.len
int 80h ; read(0, mybuf, len)
; return value in EAX: negative for error, 0 for EOF, or positive byte count
; for this toy program, lets assume valid input ending with digit\n
; the newline will be at [mybuf + eax - 1]. The digit before that, at [mybuf + eax - 2].
; If the user just presses return, we'll access before the end of mybuf, and may segfault if it's at the start of a page.
; check if the low digit is even or odd
mov ecx, msg_even
mov edx, msg_odd ; these don't set flags and actually could be done after TEST
test byte [mybuf + eax - 2], 1 ; check the low bit of 2nd-last byte of the read input
cmovnz ecx, edx
;Display selected message
mov eax, 4 ; sys_write
mov ebx, 1 ; file descriptor: stdout
mov edx, msg_odd.len
int 80h ; write(1, digit&1 ? msg_odd : msg_even, msg_odd.len)
%if msg_odd.len != msg_even.len
; homebrew assert with NASM preprocessor, since I chose to skip doing a 2nd cmov for the length
%warning we assume both messages have the same length
%endif
mov eax, 1 ;system call number (sys_exit)
xor ebx, ebx
int 0x80 ; _exit(0)
assemble + link with nasm -felf32 oddeven.asm && ld -melf_i386 -o oddeven oddeven.o
Related Topics
Run 'Perf Stat' on The Output of 'Perf Record'
Linux Shell Kill Signal Sigkill && Kill
Why Isn't Git Bash Transforming The Path to *Nix Notation for My Python Installation
Ansible: Copying One Unique File to Each Server in a Group
Is It Secure to Rely on "X-Forwarded-For" to Restrict Access by Ip in Apache While Using Cloudflare
Xfs - How to Not Modify Mtime When Writing to File
Installing New Version of Python on Debian Linux Server
Install Ssh Server on Embedded Device
Linux Telnet Vt100 Return Key Sends ^M
Ssh Command Output to Save in a Text File in Shell Script
Problem of Understanding Clock_Gettime
User Time Larger Than Real Time
Cmakelist File to Generate Llvm Bitcode File from C Source File
How to Change Port Gitlab on Centos 6
Hbase Does Not Run After ./Start-Hbase.Sh - Permission Denied
Tk Initialization Failed: No Display Name and No $Display Environment Variable