2

In JavaScript, for some research I'm trying to simulate the process of following some x86-64 Assembly instructions from scratch. The first step is to properly initiate and be able to perform basic math with any two registers as operators. Since each smaller register in the General Purpose Registers is a piece of a larger register, I initiated the 16 GPR registers as ArrayBuffers and then used a Register class to create the correct views on the 16 buffers.

But my math operations must be able to handle 64-bit and greater register sizes, so my getOperand method tried to create a BigUint64Array, with any parts of the ArrayBuffer that shouldn't be included in the operation zeroed out. The BigUInt64Array is initializing as a much larger value than it should be.

You'll see when you run the example. I'm not even sure I'm going about this right. Could someone explain the best way to improve this, or what's wrong with what I'm doing?

Note: The typed arrays and buffers being logged in this snippet are much easier to read if you F12 the Dev Console rather than the logs rendered by SO.

const registerContainers = {
    GPR: [],
}
for (let i = 0; i < 16; i++) {
    registerContainers.GPR.push(new ArrayBuffer(8))
}

const registers = {}

/**
 * Creates a new Uint8Array based on two different ArrayBuffers
 *
 * @private
 * @param {ArrayBuffers} buffer1 The first buffer.
 * @param {ArrayBuffers} buffer2 The second buffer.
 * @return {ArrayBuffers} The new ArrayBuffer created out of the two.
 */
function joinArrayBuffers(buffer1, buffer2) {
    var tmp = new Uint8Array(buffer1.byteLength + buffer2.byteLength);
    tmp.set(new Uint8Array(buffer1), 0);
    tmp.set(new Uint8Array(buffer2), buffer1.byteLength);
    console.log('joining array buffers:', buffer1, buffer2, ":", tmp.buffer)
    return tmp.buffer;
  };

function padArrayBufferTo64(arrBuffer){
    console.log('padding arrayBuffer to mult of 64 (8, 16, ...):', arrBuffer)
    if (arrBuffer.byteLength === 8) {
        console.log('arrBuffer.byteLength', arrBuffer.byteLength)
        return arrBuffer
    } else {
        const remainder = arrBuffer.byteLength % 8
        const arrBufferPad = new ArrayBuffer(remainder)
        console.log('joinArrayBuffers(arrBufferPad, arrBuffer).byteLength', joinArrayBuffers(arrBufferPad, arrBuffer).byteLength)
        return joinArrayBuffers(arrBufferPad, arrBuffer)
    }
}

class ArithmeticLogicUnit {
    constructor(){}
    add(args) {
        const operand1 = args.operands[0].getOperand()
        const operand2 = args.operands[1].getOperand()
        const target = args.target
        console.log(operand1, '+', operand2, '=', operand1 + operand2)
        target.set(operand1 + operand2)
    }
}

class Register {
    constructor(name, container, offset, bytes){
        registers[name] = this
        this.container = container
        this.bytes = bytes
        this.offset = offset
    }
    byteArray(){
        return new Uint8Array(this.container)
    }
    getOperand(){
        if (this.bytes === this.container.byteLength) return new BigUint64Array(padArrayBufferTo64(this.container))
        else return new BigUint64Array(padArrayBufferTo64(this.container.slice(this.offset, this.container.length)))
    }
    read(){
        if (this.bytes === this.container.byteLength) return this.byteArray()
        else return this.byteArray().subarray(this.offset, this.container.length)
    }
    readHexStr(){
        return Array.from(this.read()).map(x => {
            return x.toString(16).padStart(2, '0')
        }).join(' ');
    }
    set(value){
        console.log('register', this.read())
        console.log('value to set', value)
        return this.read().set(value)
    }
}

// # General Purpose Registers

/*

Note: Usage during syscall/function call:

    - First six arguments are in rdi, rsi, rdx, rcx, r8d, r9d; remaining arguments are on 
      the stack.
    - For syscalls, the syscall number is in rax.
    - Return value is in rax.
    - The called routine is expected to preserve rsp, rbp, rbx, r12, r13, r14, and r15 but
      may trample any other registers.

*/

/*

Note: Other usage of pointers:

    - AX multiply/divide, string load & store
    - BX index register for MOVE
    - CX count for string operations & shifts
    - DX port address for IN and OUT
    - SP points to top of the stack
    - BP points to base of the stack frame
    - SI points to a source in stream operations
    - DI points to a destination in stream operations

*/

// ## Data Registers

new Register('rax', registerContainers.GPR[0], 0, 8) // Register A Extended
new Register('eax', registerContainers.GPR[0], 4, 4)
new Register('ax',  registerContainers.GPR[0], 6, 2) // multiply/divide, string load & store
new Register('ah',  registerContainers.GPR[0], 6, 1)
new Register('al',  registerContainers.GPR[0], 7, 1)

new Register('rbx', registerContainers.GPR[1], 0, 8) // Register B Extended
new Register('ebx', registerContainers.GPR[1], 4, 4) 
new Register('bx',  registerContainers.GPR[1], 6, 2) // index register for MOVE
new Register('bh',  registerContainers.GPR[1], 6, 1)
new Register('bl',  registerContainers.GPR[1], 7, 1)

new Register('rcx', registerContainers.GPR[2], 0, 8) // Register C Extended
new Register('ecx', registerContainers.GPR[2], 4, 4)
new Register('cx',  registerContainers.GPR[2], 6, 2) // count for string operations & shifts
new Register('ch',  registerContainers.GPR[2], 6, 1)
new Register('cl',  registerContainers.GPR[2], 7, 1)

new Register('rdx', registerContainers.GPR[3], 0, 8) // Register D Extended
new Register('edx', registerContainers.GPR[3], 4, 4)
new Register('dx',  registerContainers.GPR[3], 6, 2) // port address for IN and OUT
new Register('dh',  registerContainers.GPR[3], 6, 1)
new Register('dl',  registerContainers.GPR[3], 7, 1)

// ## Pointer Registers

new Register('rsp', registerContainers.GPR[4], 0, 8)
new Register('esp', registerContainers.GPR[4], 4, 4)
new Register('sp',  registerContainers.GPR[4], 6, 2) // points to top of the stack
new Register('spl', registerContainers.GPR[4], 7, 1)

new Register('rbp', registerContainers.GPR[4], 0, 8)
new Register('ebp', registerContainers.GPR[4], 4, 4)
new Register('bp',  registerContainers.GPR[4], 6, 2) // points to base of the stack frame
new Register('bpl', registerContainers.GPR[4], 7, 1)

new Register('rsi', registerContainers.GPR[4], 0, 8)
new Register('esi', registerContainers.GPR[4], 4, 4)
new Register('si',  registerContainers.GPR[4], 6, 2) // points to a source in stream operations
new Register('sil', registerContainers.GPR[4], 7, 1)

new Register('rdi', registerContainers.GPR[4], 0, 8)
new Register('edi', registerContainers.GPR[4], 4, 4)
new Register('di',  registerContainers.GPR[4], 6, 2) // points to a destination in stream operations
new Register('dil', registerContainers.GPR[4], 7, 1)

new Register('r8',  registerContainers.GPR[4], 0, 8)
new Register('r8d', registerContainers.GPR[4], 4, 4)
new Register('r8w', registerContainers.GPR[4], 6, 2)
new Register('r8b', registerContainers.GPR[4], 7, 1)

new Register('r9',  registerContainers.GPR[4], 0, 8)
new Register('r9d', registerContainers.GPR[4], 4, 4)
new Register('r9w', registerContainers.GPR[4], 6, 2)
new Register('r9b', registerContainers.GPR[4], 7, 1)

new Register('r10',     registerContainers.GPR[4], 0, 8)
new Register('r10d',    registerContainers.GPR[4], 4, 4)
new Register('r10w',    registerContainers.GPR[4], 6, 2)
new Register('r10b',    registerContainers.GPR[4], 7, 1)

new Register('r11',     registerContainers.GPR[4], 0, 8)
new Register('r11d',    registerContainers.GPR[4], 4, 4)
new Register('r11w',    registerContainers.GPR[4], 6, 2)
new Register('r11b',    registerContainers.GPR[4], 7, 1)

new Register('r12',     registerContainers.GPR[4], 0, 8)
new Register('r12d',    registerContainers.GPR[4], 4, 4)
new Register('r12w',    registerContainers.GPR[4], 6, 2)
new Register('r12b',    registerContainers.GPR[4], 7, 1)

new Register('r13',     registerContainers.GPR[4], 0, 8)
new Register('r13d',    registerContainers.GPR[4], 4, 4)
new Register('r13w',    registerContainers.GPR[4], 6, 2)
new Register('r13b',    registerContainers.GPR[4], 7, 1)

new Register('r14',     registerContainers.GPR[4], 0, 8)
new Register('r14d',    registerContainers.GPR[4], 4, 4)
new Register('r14w',    registerContainers.GPR[4], 6, 2)
new Register('r14b',    registerContainers.GPR[4], 7, 1)

new Register('r15',     registerContainers.GPR[4], 0, 8)
new Register('r15d',    registerContainers.GPR[4], 4, 4)
new Register('r15w',    registerContainers.GPR[4], 6, 2)
new Register('r15b',    registerContainers.GPR[4], 7, 1)

const ALU = new ArithmeticLogicUnit()
registers.eax.set(Uint8Array.from([0x1f, 0x1f, 0x1f, 0x1f]))
console.log('updated eax', registers.eax.read())
console.log('updated eax', registers.eax.readHexStr())
registers.ebx.set(Uint8Array.from([0x1f, 0x1f, 0x1f, 0x1f]))
console.log('updated ebx', registers.ebx.read())
console.log('updated ebx', registers.ebx.readHexStr())
registers.ecx.set(Uint8Array.from([0x0, 0x0, 0x0, 0x0]))
console.log('updated ecx', registers.ecx.read())
console.log('updated ecx', registers.ecx.readHexStr())
ALU.add({
    operands: [registers.eax, registers.ebx],
    target: registers.ecx
})
J.Todd
  • 707
  • 1
  • 12
  • 34

1 Answers1

2

Don't make it so complicated. joinArrayBuffers and padArrayBufferTo64 are very inefficient, notice that buffers and typed arrays have quite some overhead in JS - they are designed to hold large binary data, not individual values, and you should try to create them once and only read/write to them afterwards.

Instead of trying to use BigUint64Array for all your operands, and moving around buffers, I would recommend to use the appropriately sized typed arrays for your smaller registers, and just cast the number to a bigint after accessing the array (if you need bigints for all your ALU operations at all - a 32 bit ALU is probably much more efficient to implement).

const gprBuffer = new ArrayBuffer(16 * 8);

class Register {
    constructor(type, offset) {
        this.container = new type(gprBuffer, offset, 1);
        this.byteArray = new Uint8Array(gprBuffer, offset, type.BYTES_PER_ELEMENT);
    }
}
class NumberRegister extends Register {
    constructor(type, offset) {
        super(type, offset);
        this.mod = 2n ** BigInt(8 * type.BYTES_PER_ELEMENT);
    }
    read() {
        return BigInt(this.container[0]);
    }
    write(val) {
        this.container[0] = Number(val % this.mod);
    }
}
class BigIntRegister extends Register {
    constructor(type, offset) {
        console.assert(type == BigUint64Array);
        super(type, offset);
    }
    read() {
        return this.container[0];
    }
    write(val) {
        this.container[0] = val;
    }
}

function makeRegister(base, bitsize, byteoffset) {
    const arrayType = {8: Uint8Array, 16: Uint16Array, 32: Uint32Array, 64: BigUint64Array}[bitsize];
    const registerType = bitsize > 53 ? BigIntRegister : NumberRegister
    return new registerType(arrayType, base * 8 + byteoffset);
}
const registers = {
    rax: makeRegister(0, 64, 0),
    eax: makeRegister(0, 32, 4),
    ax: makeRegister(0, 16, 6),
    ah: makeRegister(0, 8, 6),
    al: makeRegister(0, 8, 7),
    rbx: makeRegister(1, 64, 0),
    ebx: makeRegister(1, 32, 4),
    bx: makeRegister(1, 16, 6),
    bh: makeRegister(1, 8, 6),
    bl: makeRegister(1, 8, 7),
    // …
};
console.log(registers)

However, notice that typed arrays have arbitrary endianness, which you probably don't want for an emulator. Instead, consider using a (single) DataView where you control endianness, and can use different methods to write single values into your gprBuffer at arbitrary offsets.

Bergi
  • 630,263
  • 148
  • 957
  • 1,375
  • "cast the number to a bigint" - could you elaborate or name the method / feature that does this? – J.Todd May 22 '21 at 19:45
  • I assume you mean convert the TypedArray to a Bigint, but I'm not sure how to do that and I dont see it as a feature on the TypedArray docs. – J.Todd May 22 '21 at 20:06
  • 1
    No, I meant casting the number that you get from accessing a `Uint32/16/8Array` - just `BigInt(typedArray[i])` – Bergi May 22 '21 at 20:14
  • Oh now I understand what you meant, one big buffer for less overhead. Was `bitsize > 53` intentional or typo? I imagine you meant 63, to trigger at 64 – J.Todd May 22 '21 at 22:53
  • @J.Todd Nah, I meant [53](https://stackoverflow.com/questions/1848700/biggest-integer-that-can-be-stored-in-a-double). – Bergi May 22 '21 at 23:18
  • 1
    Btw I guess it doesn't matter whether there's one big buffer or one buffer for each overlapping register set, and whether there's one big typed array that each register indexes into or whether each register has its own typed array (as in my snippet), but the real overhead that one needs to avoid is creating new typed arrays in every `read`/`write` operation. – Bergi May 22 '21 at 23:20