Crimsontide
|
|
« Reply #9 on: June 19, 2018, 11:57:41 AM » |
|
I started work on a small assembler of sorts years ago in C++. The idea was a little different than yours. Rather than output to a file, it would output the assembly to a chunk of memory, and wrap it like a standard function object. The idea being I could dynamically generate code 'on the fly'. I got it working for a few instructions before I got bored (or busy, I'm not sure I was in university at the time), but I can post a few code snippets to give a general idea of the structure. This is all old code while I was still learning C++ (so while it did work, and the idea was sound IMHO, its still rather nooby-ish), but maybe it'll give you some ideas. The class looked like this: // ----- FunctorX64 ----- template<class F> class FunctorX64 : public Core::FunctorTemplate<F,boost::function_traits<F>::arity> { protected: // register enumerations enum Reg8 { al, bl, cl, dl, ah, bh, ch, dh, sil, dil, bpl, spl, r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b }; enum Reg16 { ax, bx, cx, dx, si, di, bp, sp, r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w }; enum Reg32 { eax, ebx, ecx, edx, esi, edi, ebp, esp, r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d }; enum Reg64 { rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp, r8, r9, r10, r11, r12, r13, r14, r15 }; enum RegXMM { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 };
private: // register type info (a little tedious this way, but easier than handling function template specializations of enums in a template class) bool IsReg (Reg8) const; bool IsReg (Reg16) const; bool IsReg (Reg32) const; bool IsReg (Reg64) const; bool IsReg (RegXMM) const;
bool IsReg8 (Reg8) const; bool IsReg8 (Reg16) const; bool IsReg8 (Reg32) const; bool IsReg8 (Reg64) const; bool IsReg8 (RegXMM) const;
bool IsReg16 (Reg8) const; bool IsReg16 (Reg16) const; bool IsReg16 (Reg32) const; bool IsReg16 (Reg64) const; bool IsReg16 (RegXMM) const;
bool IsReg32 (Reg8) const; bool IsReg32 (Reg16) const; bool IsReg32 (Reg32) const; bool IsReg32 (Reg64) const; bool IsReg32 (RegXMM) const;
bool IsReg64 (Reg8) const; bool IsReg64 (Reg16) const; bool IsReg64 (Reg32) const; bool IsReg64 (Reg64) const; bool IsReg64 (RegXMM) const;
bool IsRegXMM (Reg8) const; bool IsRegXMM (Reg16) const; bool IsRegXMM (Reg32) const; bool IsRegXMM (Reg64) const; bool IsRegXMM (RegXMM) const;
// register info bool IsExtendedRegister (Reg8) const; // returns true for extended registers, any register that requires the Rex.R or Rex.B bits to be set bool IsExtendedRegister (Reg16) const; bool IsExtendedRegister (Reg32) const; bool IsExtendedRegister (Reg64) const; bool IsExtendedRegister (RegXMM) const;
bool IsRexRegister (Reg8) const; // true for registers which require a rex prefix possibly without the Rex.B or Rex.R bits set (sil, dil, bpl, spl, r8b - r15b) bool IsRexRegister (Reg16) const; // same as IsExtendedRegister bool IsRexRegister (Reg32) const; // same as IsExtendedRegister bool IsRexRegister (Reg64) const; // same as IsExtendedRegister bool IsRexRegister (RegXMM) const; // same as IsExtendedRegister
bool IsHighRegister (Reg8) const; // returns true for ah, bh, ch, dh bool IsHighRegister (Reg16) const; // returns false bool IsHighRegister (Reg32) const; // returns false bool IsHighRegister (Reg64) const; // returns false bool IsHighRegister (RegXMM) const; // returns false
bool IsLowRegister (Reg8) const; // returns true for al, bl, cl, dl bool IsOldRegister (Reg8) const; // returns true for al, bl, cl, dl, ah, bh, ch, dh bool IsNewRegister (Reg8) const; // returns true for al, bl, cl, dl, sil, dil, bpl, spl, r8b, ..., r15b
// helper functions byte RegisterValue (Reg8) const; byte RegisterValue (Reg16) const; byte RegisterValue (Reg32) const; byte RegisterValue (Reg64) const; byte RegisterValue (RegXMM) const; byte GetScale (int) const;
// prefix's void OperandSizePrefix (); // changes the default operand size from 32 to 16 bit, must be used before RexPrefix void AddressSizePrefix (); // changes the default address size from 64 to 32 bit, must be used before RexPrefix void RexPrefix (bool W, bool R, bool X, bool B); // emits a Rex prefix with the associated WRXB bits set
// output ModRM byte (if D bit in OP is set rm = src, reg = dest, otherwise reg = src, rm = dest), SIB, Disp, as needed void ModRM (Reg8 reg, Reg8 rm); void ModRM (Reg16 reg, Reg16 rm); void ModRM (Reg32 reg, Reg32 rm); void ModRM (Reg64 reg, Reg64 rm);
// op with reg / reg operands (if D bit in OP is set rm = src, reg = dest, otherwise reg = src, rm = dest), SIB, Disp, as needed void OpRR (byte op, Reg8 reg, Reg8 rm); void OpRR (byte op, Reg16 reg, Reg16 rm); void OpRR (byte op, Reg32 reg, Reg32 rm); void OpRR (byte op, Reg64 reg, Reg64 rm);
// op with an immediate operand (register is encoded in OP, set rexb if the encoded register needs rex.b set) void OpI (byte op, uint8, bool rex, bool rexb); // rex needs to be set if sil, dil, bpl, spl are to be used, rex and rexb need to be set for r8b - r15b void OpI (byte op, uint16, bool rexb); void OpI (byte op, uint32, bool rexb); void OpI (byte op, uint64, bool rexb);
// op with reg / memory access template<class TR> void OpRM (byte op, TR reg, int32 disp); // absolute displacement template<class TR> void OpRP (byte op, TR reg, int32 disp); // displacement from RIP
template<class TR, class TA> void OpRM (byte op, TR reg, TA base, int32 disp); template<class TR, class TA> void OpRM (byte op, TR reg, int scale, TA index, int32 disp); template<class TR, class TA> void OpRM (byte op, TR reg, TA base, int scale, TA index, int32 disp);
protected: // basic operations void NOP (); void RET (); // near (same segment) return void FARRET (); // far (different segment) return
// move reg to reg void MOV (Reg8 dest, Reg8 src); void MOV (Reg16 dest, Reg16 src); void MOV (Reg32 dest, Reg32 src); void MOV (Reg64 dest, Reg64 src);
// move immediate to reg void MOV (Reg8 dest, uint8 i); void MOV (Reg16 dest, uint16 i); void MOV (Reg32 dest, uint32 i); void MOV (Reg64 dest, uint64 i);
// move absolute address (access global / absolute data) void LOD (Reg8 dest, const void* src); void LOD (Reg16 dest, const void* src); void LOD (Reg32 dest, const void* src); void LOD (Reg64 dest, const void* src);
void SAV (Reg8 src, void* dest); void SAV (Reg16 src, void* dest); void SAV (Reg32 src, void* dest); void SAV (Reg64 src, void* dest);
// load from memory void LOD (Reg8 dest, Reg32 base, int scale, Reg32 index, int32 disp = 0); // loads from base + scale * index + disp
// save to memory void SAV (Reg8 src, Reg32 base, int scale, Reg32 index, int32 disp = 0); // save to base + scale * index + disp
// load from indirect address (no displacement) void LOD (Reg8 dest, Reg32 srcAddr); void LOD (Reg16 dest, Reg32 srcAddr); void LOD (Reg32 dest, Reg32 srcAddr); void LOD (Reg64 dest, Reg32 srcAddr);
void LOD (Reg8 dest, Reg64 srcAddr); void LOD (Reg16 dest, Reg64 srcAddr); void LOD (Reg32 dest, Reg64 srcAddr); void LOD (Reg64 dest, Reg64 srcAddr);
// function prolog/epilog code
// debug
public: // constructor FunctorX64 (); };
The registers were all enums, with a bit of function overloading you can make the assembly both easy to use and easier to code. Most of the ops follow a few simple templates, for example 'RR' ops (operations that access two registers) might look like: // ----- op reg / reg ----- template<class F> void FunctorX64<F>::OpRR (byte op, Reg8 reg, Reg8 rm) {
// attempt to encode without REX if (IsOldRegister(src) && IsOldRegister(dest)) { AddOp(op); ModRM(reg,rm); return; }
// encode with REX if (IsNewRegister(src) && IsNewRegister(dest)) { RexPrefix(false,IsExtendedRegister(src),false,IsExtendedRegister(dest)); AddOp(op); ModRM(src,dest); return; }
// thrown by using the old high 8-bit registers and the new registers in the same op (bh -> r9b for example) throw InvalidRegisterCombination("template <typename F> void FunctorX64<F>::AddOpRR (byte op, Reg8 reg, Reg8 rm) - invalid register combination"); }
another examples, operand with an immediate encoded, shows how operator overloading can make things quite clean: // -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- // op with an immediate operand // --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
// ----- op immediate ----- template<class F> void FunctorX64<F>::OpI (byte op, uint8 i, bool rex, bool rexb) { if (rex | rexb) RexPrefix(false, false, false, rexb); AddOp(op); AddImmediate(i); }
template<class F> void FunctorX64<F>::OpI (byte op, uint16 i, bool rexb) { OperandSizePrefix(); // encode 16 bit operand if (rexb) RexPrefix(false, false, false, true); // encode extended register AddOp(op); // encode op AddImmediate(i); // encode immediate value }
template<class F> void FunctorX64<F>::OpI (byte op, uint32 i, bool rexb) { if (rexb) RexPrefix(false, false, false, true); AddOp(op); AddImmediate(i); }
template<class F> void FunctorX64<F>::OpI (byte op, uint64 i, bool rexb) { RexPrefix(true, false, false, rexb); AddOp(op); AddImmediate(i); }
An op code might be encoded like: template<class F> void FunctorX64<F>::LOD(Reg8 dest, Reg32 srcAddr) { OpRM(0x8a, dest, srcAddr, 0); }
As far as to all the details of how intel assembly works at the hex/machine code level. Well you're just going to have to download and read the official docs, which you can download straight from intel last I checked. x86/x64 are very complex instruction sets with a ton of 'gotchas'. Good luck.
|
|
|
Logged
|
|
|
|