Linux Nasm Detect Eof

Linux NASM detect EOF

You are comparing the address of the buffer to EOF (-1) instead of the character stored in the buffer.

Having said that, the read system call does not return the value of EOF when end of file is reached, but it returns zero and doesn't stick anything in the buffer (see man 2 read). To identify end of file, just check the value of eax after the call to read:

section .bss
    buf:   resb    1

section .text
    global  _start

_exit:
    mov     eax,    1       ; exit
    mov     ebx,    0       ; exit status
    int     80h

_start:
    mov     eax,    3       ; sys_read
    mov     ebx,    0       ; stdin
    mov     ecx,    buf    ; buffer
    mov     edx,    1       ; read byte count
    int     80h

    cmp     eax, 0
    je      _exit

    mov     eax,    4       ; sys_write
    mov     ebx,    1       ; stdout
    mov     ecx,    buf    ; buffer
    mov     edx,    1       ; write byte count
    int     80h

    jmp     _start

If you did want to properly compare the character to some value, use:

cmp byte [buf], VALUE

Also, I renamed char to buf. char is a basic C data type and a bad choice for a variable name.

EOF in assembly using fscanf

Why are you not checking the return value of fscanf??? It returns EOF when the end of file is reached. EOF is generally -1, so (Since you didn't tag your Assembler, I will use NASM format for the example.)....

.Read:  
    push    ebx
    push    fmtstr
    push    esi
    call    fscanf
    add     esp, 4 * 3
    test    eax, eax
    js      .NoMore

    push    ebx
    push    fmtstr
    push    ebp
    call    fprintf
    add     esp, 4 * 3
    jmp     .Read
.NoMore:

...

extern fopen, fscanf, fprintf, fclose, fread
extern fseek, fwrite, ftell, rewind, malloc
extern free, exit, stdout

%define SEEK_END 2

global main

section .data
read_mode       db  "r", 0
write_mode      db  "w", 0
textFile_name   db  "in.txt",0
dictFile_name   db  "out.txt", 0
fmtstr          db  "%s",10 , 0

section .text
main:

    push    read_mode
    push    textFile_name 
    call    fopen
    add     esp, 4 * 2
    mov     esi, eax

    push    SEEK_END
    push    0
    push    eax
    call    fseek
    add     esp, 4 * 3

    push    esi
    call    ftell
    add     esp, 4 * 1  
    mov     edi, eax

    push    eax
    call    malloc
    add     esp, 4 * 1
    mov     ebx, eax

    push    esi
    call    rewind
    add     esp, 4 * 1

    push    write_mode
    push    dictFile_name
    call    fopen
    add     esp, 4 * 2
    mov     ebp, eax

.Read:  
    push    ebx
    push    fmtstr
    push    esi
    call    fscanf
    add     esp, 4 * 3
    test    eax, eax
    js      .NoMore

    push    ebx
    push    fmtstr
    push    ebp
    call    fprintf
    add     esp, 4 * 3
    jmp     .Read

.NoMore:        
    push    esi
    call    fclose
    add     esp, 4 * 1

    push    ebp
    call    fclose
    add     esp, 4 * 1

    push    ebx
    call    free
    add     esp, 4 * 1  

    push    0
    call    exit
    add     esp, 4 * 1

Wouldn't it be easier to just use fread and fwrite?

section .text
main:

    push    read_mode
    push    textFile_name 
    call    fopen
    add     esp, 4 * 2
    mov     esi, eax

    push    SEEK_END
    push    0
    push    eax
    call    fseek
    add     esp, 4 * 3

    push    esi
    call    ftell
    add     esp, 4 * 1  
    mov     edi, eax

    push    eax
    call    malloc
    add     esp, 4 * 1
    mov     ebx, eax

    push    esi
    call    rewind
    add     esp, 4 * 1

    push    esi
    push    edi
    push    1
    push    ebx
    call    fread
    add     esp, 4 * 4

    push    esi
    call    fclose
    add     esp, 4 * 1

    push    write_mode
    push    dictFile_name
    call    fopen
    add     esp, 4 * 2
    mov     esi, eax

    push    eax
    push    edi
    push    1
    push    ebx
    call    fwrite
    add     esp, 4 * 4

    push    esi
    call    fclose
    add     esp, 4 * 1

    push    ebx
    call    free
    add     esp, 4 * 1  

    push    0
    call    exit
    add     esp, 4 * 1

Error checking is left out on purpose, you of course should/would check the return values of those calls...

Nasm Linux x64-86 | Add bits at the end of file for correct base 64 encoding

In my previous answer.. that code was supposed to eat 3 bytes always, padded by zeroes, and to fix/patch the result afterwards!

I.e. for single input byte 0x44 the Bytes needs to be set to 44 00 00 (the first 44 is set by sys_read, other two need to be cleared by code). You will get the wrong conversion result RAAA, and then you need to patch it to the correct RA==.

I.e.

SECTION .bss
BYTESLEN    equ     3           ; 3 bytes of real buffer are needed
Bytes:      resb    BYTESLEN + 5; real buffer +5 padding (total 8B)
B64output:  resb    4+4         ; 4 bytes are real output buffer
                                ; +4 bytes are padding (total 8B)

SECTION .text

        ;...
Read:
        mov     eax,3           ; Specify sys_read call
        xor     ebx,ebx         ; Specify File Descriptor 0: Standard Input
        mov     ecx,Bytes       ; Pass offset of the buffer to read to
        mov     edx,BYTESLEN    ; Pass number of bytes to read at one pass
        int     80h             ; Call sys_read to fill the buffer
        test    eax,eax
        jl      ReadingError    ; OS has problem, system "errno" is set
        mov     ebp,eax         ; Save # of bytes read from file for later
        jz      Done            ; 0 bytes read, no more input
        ; eax = 1, 2, 3
        mov     [ecx + eax],ebx ; clear padding bytes
            ; ^^ this is a bit nasty EBX reuse, works only for STDIN (0)
            ; for any file handle use fixed zero: mov word [ecx+eax],0
        call    ConvertBytesToB64Output     ; convert to Base64 output
        ; overwrite last two/one/none characters based on how many input
        ; bytes were read (B64output+3+1 = B64output+4 => beyond 4 chars)
        mov     word [B64output + ebp + 1], '=='
        ;TODO store B64output where you wish
        cmp     ebp,3
        je      Read            ; if 3 bytes were read, loop again
        ; 1 or 2 bytes will continue with "Done:"
Done:
        ; ...

ReadingError:
        ; ...

ConvertBytesToB64Output:
        ; ...
        ret

Written again to be short and simple, not caring about performance much.

The trick to make instructions simple is to have enough padding at end of buffers, so you don't need to worry about overwriting memory beyond buffers, then you can write two '==' after each output, and just position it at desired place (either overwriting two last characters, or one last character, or writing it completely outside of output, into the padding area).

Without that probably lot of if (length == 1/2/3) {...} else {...} would creep into code, to guard the memory writes, and overwrite only the output buffer and nothing more.

So make sure you understand what I did and how it works, and add enough padding to your own buffers.

Also... !disclaimer!: I actually don't know how many = should be at end of base64 output, and when... that's up to OP to study the base64 definition. I'm just showing how to fix wrong output of 3B->4B conversion, which takes shorter input padded by zeroes. Hmm, according to online BASE64 generator it actually works as my code... (input%3) => 0 has no =, 1 has two =, and 2 has single =.

Reading from non-blocking stdin in NASM

Linux system calls return -errno values on error. read is returning -EAGAIN as documented for non-blocking I/O on a device that has no bytes "ready", thus EAX = -11 which has 4 non-zero bytes in its bit-pattern.

You're not actually printing 4 bytes, you're passing a huge value to write. It writes until the end of the page. Instead of returning -EFAULT, it returns a count of how many bytes it actually copied from the buffer to the file descriptor, even though it stopped because it hit an unmapped page.

Writing output to the terminal makes that less visible, unless you look at the strace output. ASCII NUL (\0, a zero byte) prints as zero-width on a standard VT100-style terminal, but in other contexts that's very much not the same as not writing anything. Run

./nonblocking | hexdump -C --no-squeezing to see the 4kB of zero bytes you write to stdout.

BTW, there's no point in storing EAX to memory just to reload it again. Just mov edx, eax.

You only get EOF on a TTY when the user types control-D. (Assuming "cooked" mode and default stty settings).

Non-blocking doesn't turn no-data-ready into EOF; that would make it impossible to distinguish actual end of file! (Non-blocking I/O on a regular file will give you EOF at the end of the file, or -EAGAIN if the file hasn't been fetched from disk yet so you'd have to block on I/O.)

The situations where read returns 0 (meaning EOF) are the same for blocking and non-blocking read.

If blocking read would sit there waiting for the user to type something (and "submit" it in line-buffered cooked mode with return or control-D), non-blocking read will return -EAGAIN.

From the Linux read(2) man page:

EAGAIN
The file descriptor fd refers to a file other than a socket
and has been marked nonblocking (O_NONBLOCK), and the read
would block. See open(2) for further details on the
O_NONBLOCK flag.

Finding the number of bytes of entered string at runtime

You actually want movzx eax, byte [myvariable+(number_of_digits-1)] to only load 1 byte, not a dword. Or just directly test memory with test byte [...], 1. You can skip the sub because '0' is an even number; subtracting to convert from ASCII code to integer digit doesn't change the low bit.

But yes, you need least significant digit, the last (highest address) in printing / reading order.

A read system call returns the number of bytes read in EAX. (Or negative error code). This will include a newline if the user hit return, but not if the user redirected from a file that didn't end with a newline. (Or if they submitted input on a terminal using control-d after typing some digits). The most simple and robust way would be to simply loop looking for the first non-digit in the buffer.

But the "clever" / fun way would be to check if [mybuffer + eax - 1] is a digit, and if so use it. Otherwise check the previous byte. (Or just assume there's a newline and always check [mybuffer + eax - 2], the 2nd-last byte of what was read. (Or off the start of the buffer if the user just pressed return.)

(To efficiently check for an ASCII digit; sub al, '0' / cmp al, 9 / ja non_digit. See double condition checking in assembly / What is the idea behind ^= 32, that converts lowercase letters to upper and vice versa?)

Just for fun, here's a more compact version that always just checks the 2nd-last byte of the read() input. (It doesn't check for being a digit, and it reads outside the buffer for input lengths of 0 or 1, e.g. pressing control-D or return.) Also for read errors, e.g. redirect with strace ./oddeven <&- to close its stdin.

Note the interesting part:

  ; check if the low digit is even or odd
  mov    ecx, msg_even
  mov    edx, msg_odd                 ; these don't set flags and actually could be done after TEST
  test   byte [mybuf + eax - 2], 1    ; check the low bit of 2nd-last byte of the read input
  cmovnz ecx, edx

  ;Display selected message
  mov  eax, 4             ; sys_write
  mov  ebx, 1             ; file descriptor: stdout
  mov  edx, msg_odd.len
  int  80h                ; write(1, digit&1 ? msg_odd : msg_even, msg_odd.len)

I used cmov, but a simple branch over a mov ecx, msg_odd would work. You don't need to duplicate the whole setup for the system call, just run it with the right pointer and length. (ECX and EDX values, and I padded the odd message with a space so I could use the same length for both.)

And this is a homebrewed static_assert(msg_odd.len == msg_even.len), using NASM's conditional directives (https://nasm.us/doc/nasmdoc4.html). It's not just a separate preprocessor like C has, it can use NASM numeric equ expressions.

%if msg_odd.len != msg_even.len
  ; homebrew assert with NASM preprocessor, since I chose to skip doing a 2nd cmov for the length
  %warn we assume both messages have the same length
%endif

The full thing. I outside of the part shown above, I just tweaked comments to sometimes simplify when I thought it was too redundant, and used meaningful label names.

Also, I put .rodata and .bss at the top because NASM complained about referencing msg_odd.len before it was defined. (You previously had your strings in .data, but read-only data should generally go in .rodata, so the OS can share those pages between runs of the same program because they stay clean.)

Other fixes:

Linux/Unix uses 0xa line endings, \n not \n\r.
stdin is fd 0. 2 is stderr. (2 happens to work because terminal emulators normally run the shell with all 3 file descriptors referring to the same read+write open file description for the tty).

; Ask the user to enter a number from the keyboard
; Check if this number is odd or even and display a message to say this

section .rodata
  msg_prompt db "Please enter a number: ", 0xA
  .len equ $- msg_prompt

  msg_odd db  "The entered number is odd ", 0xA    ; padded with a space for same length as even
  .len equ $- msg_odd

  msg_even db "The entered number is even", 0xA
  .len equ $- msg_even

section .bss
  mybuf resb 128
  .len equ $ - mybuf

section .text
   global _start
_start:                  ; ld defaults to starting at the top of the .text section, but exporting a symbol silences the warning and can make GDB work more easily.

  ; Display prompt
  mov  eax, 4             ; sys_write
  mov  ebx, 1             ; file descriptor: stdout
  mov  ecx, msg_prompt
  mov  edx, msg_prompt.len
  int  80h                ; perform system call

  mov  eax, 3            ; sys_read
  xor  ebx, ebx          ; file descriptor: stdin
  mov  ecx, mybuf
  mov  edx, mybuf.len
  int  80h               ; read(0, mybuf, len)

; return value in EAX: negative for error, 0 for EOF, or positive byte count
; for this toy program, lets assume valid input ending with digit\n

; the newline will be at [mybuf + eax - 1].  The digit before that, at [mybuf + eax - 2].
; If the user just presses return, we'll access before the end of mybuf, and may segfault if it's at the start of a page.

  ; check if the low digit is even or odd
  mov    ecx, msg_even
  mov    edx, msg_odd                 ; these don't set flags and actually could be done after TEST
  test   byte [mybuf + eax - 2], 1    ; check the low bit of 2nd-last byte of the read input
  cmovnz ecx, edx

  ;Display selected message
  mov  eax, 4             ; sys_write
  mov  ebx, 1             ; file descriptor: stdout
  mov  edx, msg_odd.len
  int  80h                ; write(1, digit&1 ? msg_odd : msg_even, msg_odd.len)

%if msg_odd.len != msg_even.len
  ; homebrew assert with NASM preprocessor, since I chose to skip doing a 2nd cmov for the length
  %warning  we assume both messages have the same length
%endif

  mov   eax, 1        ;system call number (sys_exit)
  xor   ebx, ebx
  int   0x80          ; _exit(0)

assemble + link with nasm -felf32 oddeven.asm && ld -melf_i386 -o oddeven oddeven.o

Linux Nasm Detect Eof