//==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file is part of the WebAssembly Disassembler. /// /// It contains code to translate the data produced by the decoder into /// MCInsts. /// //===----------------------------------------------------------------------===// #include "TargetInfo/WebAssemblyTargetInfo.h" #include "Utils/WebAssemblyTypeUtilities.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; #define DEBUG_TYPE "wasm-disassembler" using DecodeStatus = MCDisassembler::DecodeStatus; #include "WebAssemblyGenDisassemblerTables.inc" namespace { static constexpr int WebAssemblyInstructionTableSize = 256; class WebAssemblyDisassembler final : public MCDisassembler { std::unique_ptr MCII; DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const override; Optional onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const override; public: WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, std::unique_ptr MCII) : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} }; } // end anonymous namespace static MCDisassembler *createWebAssemblyDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx) { std::unique_ptr MCII(T.createMCInstrInfo()); return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyDisassembler() { // Register the disassembler for each target. TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), createWebAssemblyDisassembler); TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), createWebAssemblyDisassembler); } static int nextByte(ArrayRef Bytes, uint64_t &Size) { if (Size >= Bytes.size()) return -1; auto V = Bytes[Size]; Size++; return V; } static bool nextLEB(int64_t &Val, ArrayRef Bytes, uint64_t &Size, bool Signed) { unsigned N = 0; const char *Error = nullptr; Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, Bytes.data() + Bytes.size(), &Error) : static_cast(decodeULEB128(Bytes.data() + Size, &N, Bytes.data() + Bytes.size(), &Error)); if (Error) return false; Size += N; return true; } static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, ArrayRef Bytes, bool Signed) { int64_t Val; if (!nextLEB(Val, Bytes, Size, Signed)) return false; MI.addOperand(MCOperand::createImm(Val)); return true; } template bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef Bytes) { if (Size + sizeof(T) > Bytes.size()) return false; T Val = support::endian::read( Bytes.data() + Size); Size += sizeof(T); if (std::is_floating_point::value) { MI.addOperand( MCOperand::createDFPImm(bit_cast(static_cast(Val)))); } else { MI.addOperand(MCOperand::createImm(static_cast(Val))); } return true; } Optional WebAssemblyDisassembler::onSymbolStart( SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const { Size = 0; if (Address == 0) { // Start of a code section: we're parsing only the function count. int64_t FunctionCount; if (!nextLEB(FunctionCount, Bytes, Size, false)) return None; outs() << " # " << FunctionCount << " functions in section."; } else { // Parse the start of a single function. int64_t BodySize, LocalEntryCount; if (!nextLEB(BodySize, Bytes, Size, false) || !nextLEB(LocalEntryCount, Bytes, Size, false)) return None; if (LocalEntryCount) { outs() << " .local "; for (int64_t I = 0; I < LocalEntryCount; I++) { int64_t Count, Type; if (!nextLEB(Count, Bytes, Size, false) || !nextLEB(Type, Bytes, Size, false)) return None; for (int64_t J = 0; J < Count; J++) { if (I || J) outs() << ", "; outs() << WebAssembly::anyTypeToString(Type); } } } } outs() << "\n"; return MCDisassembler::Success; } MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( MCInst &MI, uint64_t &Size, ArrayRef Bytes, uint64_t /*Address*/, raw_ostream &CS) const { CommentStream = &CS; Size = 0; int Opc = nextByte(Bytes, Size); if (Opc < 0) return MCDisassembler::Fail; const auto *WasmInst = &InstructionTable0[Opc]; // If this is a prefix byte, indirect to another table. if (WasmInst->ET == ET_Prefix) { WasmInst = nullptr; // Linear search, so far only 2 entries. for (auto PT = PrefixTable; PT->Table; PT++) { if (PT->Prefix == Opc) { WasmInst = PT->Table; break; } } if (!WasmInst) return MCDisassembler::Fail; int64_t PrefixedOpc; if (!nextLEB(PrefixedOpc, Bytes, Size, false)) return MCDisassembler::Fail; if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) return MCDisassembler::Fail; WasmInst += PrefixedOpc; } if (WasmInst->ET == ET_Unused) return MCDisassembler::Fail; // At this point we must have a valid instruction to decode. assert(WasmInst->ET == ET_Instruction); MI.setOpcode(WasmInst->Opcode); // Parse any operands. for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { auto OT = OperandTable[WasmInst->OperandStart + OPI]; switch (OT) { // ULEB operands: case WebAssembly::OPERAND_BASIC_BLOCK: case WebAssembly::OPERAND_LOCAL: case WebAssembly::OPERAND_GLOBAL: case WebAssembly::OPERAND_FUNCTION32: case WebAssembly::OPERAND_TABLE: case WebAssembly::OPERAND_OFFSET32: case WebAssembly::OPERAND_OFFSET64: case WebAssembly::OPERAND_P2ALIGN: case WebAssembly::OPERAND_TYPEINDEX: case WebAssembly::OPERAND_TAG: case MCOI::OPERAND_IMMEDIATE: { if (!parseLEBImmediate(MI, Size, Bytes, false)) return MCDisassembler::Fail; break; } // SLEB operands: case WebAssembly::OPERAND_I32IMM: case WebAssembly::OPERAND_I64IMM: { if (!parseLEBImmediate(MI, Size, Bytes, true)) return MCDisassembler::Fail; break; } // block_type operands: case WebAssembly::OPERAND_SIGNATURE: { int64_t Val; uint64_t PrevSize = Size; if (!nextLEB(Val, Bytes, Size, true)) return MCDisassembler::Fail; if (Val < 0) { // Negative values are single septet value types or empty types if (Size != PrevSize + 1) { MI.addOperand( MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); } else { MI.addOperand(MCOperand::createImm(Val & 0x7f)); } } else { // We don't have access to the signature, so create a symbol without one MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); auto *WasmSym = cast(Sym); WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); const MCExpr *Expr = MCSymbolRefExpr::create( WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); MI.addOperand(MCOperand::createExpr(Expr)); } break; } // heap_type operands, for e.g. ref.null: case WebAssembly::OPERAND_HEAPTYPE: { int64_t Val; uint64_t PrevSize = Size; if (!nextLEB(Val, Bytes, Size, true)) return MCDisassembler::Fail; if (Val < 0 && Size == PrevSize + 1) { // The HeapType encoding is like BlockType, in that encodings that // decode as negative values indicate ValTypes. In practice we expect // either wasm::ValType::EXTERNREF or wasm::ValType::FUNCREF here. // // The positive SLEB values are reserved for future expansion and are // expected to be type indices in the typed function references // proposal, and should disassemble as MCSymbolRefExpr as in BlockType // above. MI.addOperand(MCOperand::createImm(Val & 0x7f)); } else { MI.addOperand( MCOperand::createImm(int64_t(WebAssembly::HeapType::Invalid))); } break; } // FP operands. case WebAssembly::OPERAND_F32IMM: { if (!parseImmediate(MI, Size, Bytes)) return MCDisassembler::Fail; break; } case WebAssembly::OPERAND_F64IMM: { if (!parseImmediate(MI, Size, Bytes)) return MCDisassembler::Fail; break; } // Vector lane operands (not LEB encoded). case WebAssembly::OPERAND_VEC_I8IMM: { if (!parseImmediate(MI, Size, Bytes)) return MCDisassembler::Fail; break; } case WebAssembly::OPERAND_VEC_I16IMM: { if (!parseImmediate(MI, Size, Bytes)) return MCDisassembler::Fail; break; } case WebAssembly::OPERAND_VEC_I32IMM: { if (!parseImmediate(MI, Size, Bytes)) return MCDisassembler::Fail; break; } case WebAssembly::OPERAND_VEC_I64IMM: { if (!parseImmediate(MI, Size, Bytes)) return MCDisassembler::Fail; break; } case WebAssembly::OPERAND_BRLIST: { int64_t TargetTableLen; if (!nextLEB(TargetTableLen, Bytes, Size, false)) return MCDisassembler::Fail; for (int64_t I = 0; I < TargetTableLen; I++) { if (!parseLEBImmediate(MI, Size, Bytes, false)) return MCDisassembler::Fail; } // Default case. if (!parseLEBImmediate(MI, Size, Bytes, false)) return MCDisassembler::Fail; break; } case MCOI::OPERAND_REGISTER: // The tablegen header currently does not have any register operands since // we use only the stack (_S) instructions. // If you hit this that probably means a bad instruction definition in // tablegen. llvm_unreachable("Register operand in WebAssemblyDisassembler"); default: llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); } } return MCDisassembler::Success; }