0

I started to write a BrainF*ck interpreter for my OS in 32-bit x86 assembly. I have already written one in C that just works and tried to implement it in assembly but the one written in assembly doesn't print any output.

I'm still new to assembly so I guess I just made some beginner mistakes. Only thing I can think of is that I messed up the addressing somewhere. I'd be really happy if someone could point out what I'm doing wrong.

I tested the C and assembly programs with the same input:

-[--->+<]>---.+[----->+++<]>.[--->+<]>+.[--->++<]>-.++++.

It should print RexOS

I created a pastebin of my C code if that helps understand what I'm trying to accomplish: pastebin

My assembly code is the following:

.intel_syntax noprefix

.section .data
TAPE:
    .zero 30000

.section .text
.global interpret
interpret:
    push ebp                // prologue
    mov ebp, esp

    mov edx, [ebp+8]        // getting the input string
    mov edi, offset TAPE    // a pointer to the tape
    xor ecx, ecx            // stores current char
                            // inner loop counter

loop:
    mov cl, byte ptr [edx]  // getting the current char

    inc edx                 // increase the index to the next char

    cmp cl, 0               // if we reached the end of the string
    je exit                 // return the length

    cmp cl, '>'
    je pinc                 // increment the pointer

    cmp cl, '<'
    je pdec                 // decrement the pointer

    cmp cl, '+'
    je vinc                 // increment value at index

    cmp cl, '-'
    je vdec                 // decrement value at index

    cmp cl, '.'
    je prnt                 // print the value at index

    cmp cl, ','
    je read                 // read character from stdin

    cmp cl, ']'
    je bend                 // end of bracket

    jmp loop

pinc:
    inc edi
    jmp loop

pdec:
    dec edi
    jmp loop

vinc:
    inc byte ptr [edi]
    jmp loop

vdec:
    dec byte ptr [edi]
    jmp loop

prnt:
    push edx

    push dword ptr [edi]
    call putchar
    add esp, 4

    pop edx

    jmp loop

read:
    call getchar
    mov byte ptr [edi], al

    jmp loop

bend:
    cmp byte ptr [edi], 0
    je loop

    mov ch, 1

ilst:
    cmp ch, 0
    jle loop

    dec edx                 // jump to the previous index
    mov cl, byte ptr [edx]  // getting the current char

    cmp cl, '['
    je dclp                 // decrease internal loop counter

    cmp cl, ']'
    je inlp                 // increase internal loop counter

    jmp ilst

inlp:
    inc ch
    jmp ilst

dclp:
    dec ch
    jmp ilst

exit:
    mov esp, ebp            // epilogue
    pop ebp
    ret
7
  • Intel-syntax doesn't use size suffixes like movb or incb; GAS might accept them, but it's better to be consistent. (e.g. inc byte ptr [eax]). And If you want to zero both CL and CH, use xor ecx,ecx to zero them both at once with one equally-compact instruction. Commented May 7, 2021 at 9:41
  • 2
    Have you used a debugger to single-step your code, or set breakpoints to see if any printing code is even reached? Commented May 7, 2021 at 9:42
  • push word ptr [eax] is mismatched with pop eax, so you're messing up your stack every putchar by subtracting 2 from ESP then adding 4 with pop. Push a whole dword; putchar will still only look at the low byte. (or movzx load into another reg and push that.) Commented May 7, 2021 at 9:46
  • 4
    Debugging with a debugger should always be your first step, before asking other people to spend their time looking at your code. It's essential for assembly language, where there aren't any guard-rails to keep your code on track or detect problems at build-time, so most problems just lead to a crash or a obscure / non-obvious strange behaviour. Commented May 7, 2021 at 9:56
  • 1
    IMHO, a much more interesting challenge would be to write a BF compiler in BF :) Commented May 7, 2021 at 10:06

1 Answer 1

1

I finally solved this. All I needed to do was move the pointer incrementation to the end of the loop and save additional registers in the prologue (ebx) then restore the registers in the epilogue. Also needed to save and restore registers in the prnt and scan part.

My final working code looks like this:

.intel_syntax noprefix

.section .data
TAPE:
    .zero 30000

.section .text
.global interpret
interpret:
    push ebp                // prologue
    mov ebp, esp
    push ebx

    xor eax, eax            // loop counter
    mov ebx, offset TAPE    // a pointer to the tape
    mov edx, [ebp+8]        // getting the input string
    
clrt:                       // clear TAPE
    cmp eax, 30000
    je loop

    mov byte ptr [ebx+eax], 0

    inc eax

    jmp clrt

loop:
    mov cl, byte ptr [edx]  // getting the current char

    cmp cl, 0               // if we reached the end of the string
    je exit                 // return the length

    cmp cl, '>'
    je pinc                 // increment the pointer

    cmp cl, '<'
    je pdec                 // decrement the pointer

    cmp cl, '+'
    je vinc                 // increment value at index

    cmp cl, '-'
    je vdec                 // decrement value at index

    cmp cl, '.'
    je prnt                 // print the value at index

    cmp cl, ','
    je read                 // read character from stdin

    cmp cl, ']'
    je bend                 // end of bracket

ptlp:                       // postloop
    inc edx                 // increment the input pointer
    jmp loop                // continue the loop

pinc:
    inc ebx
    jmp ptlp

pdec:
    dec ebx
    jmp ptlp

vinc:
    inc byte ptr [ebx]
    jmp ptlp

vdec:
    dec byte ptr [ebx]
    jmp ptlp

prnt:
    push edx

    push dword ptr [ebx]
    call putchar
    add esp, 4

    pop edx

    jmp ptlp

read:
    push edx

    call getchar
    mov byte ptr [ebx], al

    pop edx

    jmp ptlp

bend:
    cmp byte ptr [ebx], 0
    je ptlp

    mov ch, 1

ilst:
    cmp ch, 0
    jle ptlp

    dec edx                 // jump to the previous index
    mov cl, byte ptr [edx]  // getting the current char

    cmp cl, '['
    je dclp                 // decrease internal loop counter

    cmp cl, ']'
    je inlp                 // increase internal loop counter

    jmp ilst

inlp:
    inc ch
    jmp ilst

dclp:
    dec ch
    jmp ilst

exit:
    pop ebx                 // epilogue
    mov esp, ebp
    pop ebp
    ret
Sign up to request clarification or add additional context in comments.

9 Comments

I suspect the failure to save and restore registers was the real problem. Other than that, there's no reason why iterating with the pointer instead of an index shouldn't work. You could go back to that approach if you want.
@NateEldredge I changed it back to incrementing and decrementing edx instead of doing it with eax and now it doesn't print the output, so I think I found a new bug. I guess I'm gonna fix it tomorrow then I will update my answer.
push dword ptr [ebx] looks sketchy, since it will push a full 32-bit word whereas only the low byte is what you want to print. Since putchar takes an int argument, I am not sure that it is guaranteed to mask off the high part. What about doing instead movzx eax, byte ptr [ebx] ; push eax?
It's also alarming that you have no protection against running off the end of the tape in either direction. This is an exploitable security vulnerability.
@NateEldredge: It's safe to pass high garbage in an int arg to putchar(3): writes the character c, cast to an unsigned char. And on normal 2's complement machines including x86, int->unsigned char just truncates, taking the low byte. So the only danger would be if you had arranged for your tape array to end at the end of a page, followed by an unmapped page. (So you have HW memory protection, unless the BF code seeks the cursor past any guard page into your stack memory or something, before touching memory.)
|

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.