diff --git a/CMakeLists.txt b/CMakeLists.txt index da0f4f957..355c4649b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,6 +80,7 @@ set(REMILL_BUILD_SEMANTICS_DIR_AARCH64 "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/AAr set(REMILL_BUILD_SEMANTICS_DIR_SPARC32 "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/SPARC32/Runtime") set(REMILL_BUILD_SEMANTICS_DIR_SPARC64 "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/SPARC64/Runtime") set(REMILL_BUILD_SEMANTICS_DIR_PPC64_32ADDR "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/PPC/Runtime") +set(REMILL_BUILD_SEMANTICS_DIR_MIPS64_32ADDR "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/MIPS/Runtime") set(REMILL_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") set(REMILL_LIB_DIR "${CMAKE_CURRENT_SOURCE_DIR}/lib") @@ -220,6 +221,7 @@ target_compile_definitions(remill_settings INTERFACE "REMILL_BUILD_SEMANTICS_DIR_SPARC32=\"${REMILL_BUILD_SEMANTICS_DIR_SPARC32}\"" "REMILL_BUILD_SEMANTICS_DIR_SPARC64=\"${REMILL_BUILD_SEMANTICS_DIR_SPARC64}\"" "REMILL_BUILD_SEMANTICS_DIR_PPC64_32ADDR=\"${REMILL_BUILD_SEMANTICS_DIR_PPC64_32ADDR}\"" + "REMILL_BUILD_SEMANTICS_DIR_MIPS64_32ADDR=\"${REMILL_BUILD_SEMANTICS_DIR_MIPS64_32ADDR}\"" ) set(ghidra_patch_user "github-actions[bot]") @@ -317,6 +319,7 @@ if(REMILL_ENABLE_INSTALL_TARGET) ) install(FILES "${sleigh_BINARY_DIR}/specfiles/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.sla" DESTINATION "${CMAKE_INSTALL_DATADIR}/sleigh/specfiles/Ghidra/Processors/PowerPC/data/languages/") + install(FILES "${sleigh_BINARY_DIR}/specfiles/Ghidra/Processors/MIPS/data/languages/mips64be.sla" DESTINATION "${CMAKE_INSTALL_DATADIR}/sleigh/specfiles/Ghidra/Processors/MIPS/data/languages/") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/remillConfig.cmake.in" diff --git a/bin/lift/Lift.cpp b/bin/lift/Lift.cpp index 3cc0afbca..d8ffed574 100644 --- a/bin/lift/Lift.cpp +++ b/bin/lift/Lift.cpp @@ -125,7 +125,10 @@ class SimpleTraceManager : public remill::TraceManager { public: virtual ~SimpleTraceManager(void) = default; - explicit SimpleTraceManager(Memory &memory_) : memory(memory_) {} + explicit SimpleTraceManager(Memory &memory_, + const remill::IntrinsicTable &intrinsics_) + : memory(memory_), + intrinsics(intrinsics_) {} protected: // Called when we have lifted, i.e. defined the contents, of a new trace. @@ -147,7 +150,13 @@ class SimpleTraceManager : public remill::TraceManager { if (trace_it != traces.end()) { return trace_it->second; } else { - return nullptr; + // Use function_call if we can't have the target addr in scope + auto byte_it = memory.find(addr); + if (byte_it == memory.end()) { + return intrinsics.function_call; + } else { + return nullptr; + } } } @@ -174,6 +183,7 @@ class SimpleTraceManager : public remill::TraceManager { public: Memory &memory; std::unordered_map traces; + const remill::IntrinsicTable &intrinsics; }; // Looks for calls to a function like `__remill_function_return`, and @@ -231,6 +241,18 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } + if (FLAGS_bytes[0] == '@') { + std::ifstream file(FLAGS_bytes.substr(1)); + if (!file.is_open()) { + std::cerr << "Could not open file " << FLAGS_bytes.substr(1) + << " specified to --bytes." << std::endl; + return EXIT_FAILURE; + } + std::stringstream buffer; + buffer << file.rdbuf(); + FLAGS_bytes = buffer.str(); + } + if (FLAGS_bytes.size() % 2) { std::cerr << "Please specify an even number of nibbles to --bytes." << std::endl; @@ -266,8 +288,8 @@ int main(int argc, char *argv[]) { const auto mem_ptr_type = arch->MemoryPointerType(); Memory memory = UnhexlifyInputBytes(addr_mask); - SimpleTraceManager manager(memory); remill::IntrinsicTable intrinsics(module.get()); + SimpleTraceManager manager(memory, intrinsics); auto inst_lifter = arch->DefaultLifter(intrinsics); diff --git a/include/remill/Arch/Arch.h b/include/remill/Arch/Arch.h index 4fc5ff8ef..9504a356e 100644 --- a/include/remill/Arch/Arch.h +++ b/include/remill/Arch/Arch.h @@ -357,6 +357,7 @@ class Arch { bool IsSPARC32(void) const; bool IsSPARC64(void) const; bool IsPPC(void) const; + bool IsMIPS(void) const; bool IsWindows(void) const; bool IsLinux(void) const; @@ -437,6 +438,10 @@ class Arch { static ArchPtr GetSleighPPC(llvm::LLVMContext *context, OSName os, ArchName arch_name); + // Defined in `lib/Arch/Sleigh/MIPSArch.cpp` + static ArchPtr GetSleighMIPS(llvm::LLVMContext *context, OSName os, + ArchName arch_name); + // Defined in `lib/Arch/SPARC32/Arch.cpp`. static ArchPtr GetSPARC32(llvm::LLVMContext *context, OSName os, ArchName arch_name); diff --git a/include/remill/Arch/MIPS/Runtime/State.h b/include/remill/Arch/MIPS/Runtime/State.h new file mode 100644 index 000000000..e2270f5a1 --- /dev/null +++ b/include/remill/Arch/MIPS/Runtime/State.h @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2022-present Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#pragma clang diagnostic push +#pragma clang diagnostic fatal "-Wpadded" + +#include "remill/Arch/Runtime/State.h" + +#if !defined(INCLUDED_FROM_REMILL) +# include "remill/Arch/Runtime/Types.h" +#endif + +struct Reg final { + union { + alignas(8) uint64_t qword; + alignas(4) uint32_t dword; + } __attribute__((packed)); +} __attribute__((packed)); + +static_assert(sizeof(uint64_t) == sizeof(Reg), "Invalid packing of `Reg`."); +static_assert(0 == __builtin_offsetof(Reg, dword), + "Invalid packing of `Reg::dword`."); + +static_assert(0 == __builtin_offsetof(Reg, qword), + "Invalid packing of `Reg::qword`."); + +// General Purpose Registers +struct alignas(8) GPR final { + volatile uint64_t _0; + Reg zero; + volatile uint64_t _1; + Reg at; + volatile uint64_t _2; + Reg v0; + volatile uint64_t _3; + Reg v1; + volatile uint64_t _4; + Reg a0; + volatile uint64_t _5; + Reg a1; + volatile uint64_t _6; + Reg a2; + volatile uint64_t _7; + Reg a3; + volatile uint64_t _8; + Reg t0; + volatile uint64_t _9; + Reg t1; + volatile uint64_t _10; + Reg t2; + volatile uint64_t _11; + Reg t3; + volatile uint64_t _12; + Reg t4; + volatile uint64_t _13; + Reg t5; + volatile uint64_t _14; + Reg t6; + volatile uint64_t _15; + Reg t7; + volatile uint64_t _16; + Reg s0; + volatile uint64_t _17; + Reg s1; + volatile uint64_t _18; + Reg s2; + volatile uint64_t _19; + Reg s3; + volatile uint64_t _20; + Reg s4; + volatile uint64_t _21; + Reg s5; + volatile uint64_t _22; + Reg s6; + volatile uint64_t _23; + Reg s7; + volatile uint64_t _24; + Reg t8; + volatile uint64_t _25; + Reg t9; + volatile uint64_t _26; + Reg k0; + volatile uint64_t _27; + Reg k1; + volatile uint64_t _28; + Reg gp; + volatile uint64_t _29; + Reg sp; + volatile uint64_t _30; + Reg s8; + volatile uint64_t _31; + Reg ra; + volatile uint64_t _32; + Reg pc; + +} __attribute__((packed)); + +static_assert(528 == sizeof(GPR), "Invalid structure packing of `GPR`."); + +// Floating Pointer Registers +struct alignas(8) FPR final { + volatile uint64_t _0; + Reg f0; + volatile uint64_t _1; + Reg f1; + volatile uint64_t _2; + Reg f2; + volatile uint64_t _3; + Reg f3; + volatile uint64_t _4; + Reg f4; + volatile uint64_t _5; + Reg f5; + volatile uint64_t _6; + Reg f6; + volatile uint64_t _7; + Reg f7; + volatile uint64_t _8; + Reg f8; + volatile uint64_t _9; + Reg f9; + volatile uint64_t _10; + Reg f10; + volatile uint64_t _11; + Reg f11; + volatile uint64_t _12; + Reg f12; + volatile uint64_t _13; + Reg f13; + volatile uint64_t _14; + Reg f14; + volatile uint64_t _15; + Reg f15; + volatile uint64_t _16; + Reg f16; + volatile uint64_t _17; + Reg f17; + volatile uint64_t _18; + Reg f18; + volatile uint64_t _19; + Reg f19; + volatile uint64_t _20; + Reg f20; + volatile uint64_t _21; + Reg f21; + volatile uint64_t _22; + Reg f22; + volatile uint64_t _23; + Reg f23; + volatile uint64_t _24; + Reg f24; + volatile uint64_t _25; + Reg f25; + volatile uint64_t _26; + Reg f26; + volatile uint64_t _27; + Reg f27; + volatile uint64_t _28; + Reg f28; + volatile uint64_t _29; + Reg f29; + volatile uint64_t _30; + Reg f30; + volatile uint64_t _31; + Reg f31; + +} __attribute__((packed)); + +static_assert(512 == sizeof(FPR), "Invalid structure packing of `FPR`."); + +struct alignas(8) FlagRegisters final { + volatile uint64_t _0; + Reg ISAModeSwitch; + // TODO: Move them elsewhere + volatile uint64_t _1; + Reg HI; + volatile uint64_t _2; + Reg LO; +} __attribute__((packed)); + +struct alignas(8) COP0Registers final { + volatile uint64_t _0; + Reg Index; + volatile uint64_t _1; + Reg Random; + volatile uint64_t _2; + Reg EntryLo0; + volatile uint64_t _3; + Reg EntryLo1; + volatile uint64_t _4; + Reg Context; + volatile uint64_t _5; + Reg PageMask; + volatile uint64_t _6; + Reg Wired; + volatile uint64_t _7; + Reg HWREna; + volatile uint64_t _8; + Reg BadVAddr; + volatile uint64_t _9; + Reg Count; + volatile uint64_t _10; + Reg EntryHi; + volatile uint64_t _11; + Reg Compare; + volatile uint64_t _12; + Reg Status; + volatile uint64_t _13; + Reg Cause; + volatile uint64_t _14; + Reg EPC; + volatile uint64_t _15; + Reg PRId; + volatile uint64_t _16; + Reg Config; + volatile uint64_t _17; + Reg LLAddr; + volatile uint64_t _18; + Reg WatchLo; + volatile uint64_t _19; + Reg WatchHi; + volatile uint64_t _20; + Reg XContext; + volatile uint64_t _21; + Reg cop0_reg21; + volatile uint64_t _22; + Reg cop0_reg22; + volatile uint64_t _23; + Reg Debug; + volatile uint64_t _24; + Reg DEPC; + volatile uint64_t _25; + Reg PerfCnt; + volatile uint64_t _26; + Reg ErrCtl; + volatile uint64_t _27; + Reg CacheErr; + volatile uint64_t _28; + Reg TagLo; + volatile uint64_t _29; + Reg TagHi; + volatile uint64_t _30; + Reg ErrorEPC; + volatile uint64_t _31; + Reg DESAVE; +} __attribute__((packed)); + +struct alignas(8) COP1Registers final { + volatile uint64_t _0; + Reg FCSR; +} __attribute__((packed)); + +struct alignas(8) MIPSState : public ArchState { + GPR gpr; // 528 bytes. + + uint64_t _0; + + FPR fpr; + + uint64_t _1; + + FlagRegisters flags; + + uint64_t _2; + + COP0Registers cop0; + + uint64_t _3; + + COP1Registers cop1; + + uint64_t _4; +} __attribute__((packed)); + +struct State : public MIPSState {}; + +#pragma clang diagnostic pop diff --git a/include/remill/Arch/Name.h b/include/remill/Arch/Name.h index 4c030d3db..01477a251 100644 --- a/include/remill/Arch/Name.h +++ b/include/remill/Arch/Name.h @@ -121,6 +121,7 @@ enum ArchName : uint32_t { kArchThumb2LittleEndian, kArchPPC, + kArchMIPS, }; ArchName GetArchName(const llvm::Triple &triple); diff --git a/include/remill/Arch/Runtime/HyperCall.h b/include/remill/Arch/Runtime/HyperCall.h index 1a32eeafd..a50163e0a 100644 --- a/include/remill/Arch/Runtime/HyperCall.h +++ b/include/remill/Arch/Runtime/HyperCall.h @@ -96,6 +96,9 @@ class SyncHyperCall { kPPCEmulateInstruction, kPPCSysCall, + + kMIPSEmulateInstruction, + kMIPSSysCall, }; } __attribute__((packed)); diff --git a/include/remill/Arch/Runtime/Intrinsics.h b/include/remill/Arch/Runtime/Intrinsics.h index c06343149..ddfa987f7 100644 --- a/include/remill/Arch/Runtime/Intrinsics.h +++ b/include/remill/Arch/Runtime/Intrinsics.h @@ -137,6 +137,10 @@ __remill_flag_computation_carry(bool result, ...); [[gnu::used]] extern Memory *__remill_async_hyper_call(State &, addr_t ret_addr, Memory *); +[[gnu::used]] extern void +__remill_set_coprocessor_reg(State &, uint8_t cop_num, uint64_t reg_num, + uint64_t value, uint8_t sel); + // This intrinsic must be tagged with the `always_inline` function attribute // since it has an implementation we want to use in Anvill's lifted IR. // @@ -425,4 +429,8 @@ __remill_ppc_emulate_instruction(Memory *); [[gnu::used, gnu::const]] extern Memory *__remill_ppc_syscall(Memory *); +[[gnu::used, gnu::const]] extern Memory *__remill_mips_emulate_instruction(Memory *); + +[[gnu::used, gnu::const]] extern Memory *__remill_mips_syscall(Memory *); + } // extern C diff --git a/include/remill/BC/IntrinsicTable.h b/include/remill/BC/IntrinsicTable.h index 227b60f6f..1998deebf 100644 --- a/include/remill/BC/IntrinsicTable.h +++ b/include/remill/BC/IntrinsicTable.h @@ -44,6 +44,7 @@ class IntrinsicTable { // OS interaction. llvm::Function *const sync_hyper_call; llvm::Function *const async_hyper_call; + llvm::Function *const set_coprocessor_reg; // Memory read intrinsics. llvm::Function *const read_memory_8; diff --git a/include/remill/BC/PCodeCFG.h b/include/remill/BC/PCodeCFG.h index 84e7d47eb..fb545a276 100644 --- a/include/remill/BC/PCodeCFG.h +++ b/include/remill/BC/PCodeCFG.h @@ -67,11 +67,13 @@ class PcodeCFG { PcodeCFG(std::map blocks); }; -PcodeCFG CreateCFG(const std::vector &linear_ops); +PcodeCFG CreateCFG(const std::vector &linear_ops, + const remill::Arch &arch); class PcodeCFGBuilder { public: - explicit PcodeCFGBuilder(const std::vector &linear_ops); + explicit PcodeCFGBuilder(const std::vector &linear_ops, + const remill::Arch &arch); PcodeCFG Build() const; private: @@ -82,6 +84,7 @@ class PcodeCFGBuilder { std::vector GetBlockStarts() const; const std::vector &linear_ops; + const remill::Arch &arch; }; diff --git a/include/remill/BC/SleighLifter.h b/include/remill/BC/SleighLifter.h index 27e22cef9..bc1a27151 100644 --- a/include/remill/BC/SleighLifter.h +++ b/include/remill/BC/SleighLifter.h @@ -82,6 +82,7 @@ class SleighLifter : public InstructionLifter { const ContextValues &context_values); ::Sleigh &GetEngine(void) const; + const remill::Arch &arch; }; diff --git a/lib/Arch/Arch.cpp b/lib/Arch/Arch.cpp index e9edc59f0..62161583d 100644 --- a/lib/Arch/Arch.cpp +++ b/lib/Arch/Arch.cpp @@ -58,6 +58,7 @@ static unsigned AddressSize(ArchName arch_name) { case kArchThumb2LittleEndian: case kArchSparc32: case kArchSparc32_SLEIGH: + case kArchMIPS: return 32; // Actually MIPS64 but on 32bit Address bus for vr4300 case kArchPPC: return 32; case kArchAMD64: case kArchAMD64_AVX: @@ -117,6 +118,7 @@ ArchLocker Arch::Lock(ArchName arch_name_) { case ArchName::kArchX86_SLEIGH: case ArchName::kArchSparc32_SLEIGH: case ArchName::kArchPPC: return &gSleighArchLock; + case ArchName::kArchMIPS: return &gSleighArchLock; default: return ArchLocker(); } } @@ -247,6 +249,10 @@ auto Arch::GetArchByName(llvm::LLVMContext *context_, OSName os_name_, return GetSleighPPC(context_, os_name_, arch_name_); } + case kArchMIPS: { + DLOG(INFO) << "Using architecture: MIPS"; + return GetSleighMIPS(context_, os_name_, arch_name_); + } default: { return nullptr; } @@ -428,6 +434,10 @@ bool Arch::IsPPC(void) const { return remill::kArchPPC == arch_name; } +bool Arch::IsMIPS(void) const { + return remill::kArchMIPS == arch_name; +} + bool Arch::IsWindows(void) const { return remill::kOSWindows == os_name; } @@ -720,8 +730,14 @@ llvm::Function *Arch::DeclareLiftedFunction(std::string_view name_, auto func_type = llvm::dyn_cast( RecontextualizeType(LiftedFunctionType(), context)); llvm::StringRef name(name_.data(), name_.size()); - auto func = llvm::Function::Create( + auto func = module->getFunction(name.str()); + + if (!func || func->getFunctionType() != func_type) { + func = llvm::Function::Create( func_type, llvm::GlobalValue::ExternalLinkage, 0u, name, module); + } else if (func->isDeclaration()) { + func->setLinkage(llvm::GlobalValue::WeakAnyLinkage); + } auto memory = remill::NthArgument(func, kMemoryPointerArgNum); auto state = remill::NthArgument(func, kStatePointerArgNum); diff --git a/lib/Arch/CMakeLists.txt b/lib/Arch/CMakeLists.txt index 3f6310875..edbbdb7a3 100644 --- a/lib/Arch/CMakeLists.txt +++ b/lib/Arch/CMakeLists.txt @@ -29,6 +29,7 @@ add_library(remill_arch STATIC add_subdirectory(AArch32) add_subdirectory(AArch64) add_subdirectory(PPC) +add_subdirectory(MIPS) add_subdirectory(SPARC32) add_subdirectory(SPARC64) add_subdirectory(Sleigh) diff --git a/lib/Arch/Instruction.cpp b/lib/Arch/Instruction.cpp index ba6a12fcb..72b43134b 100644 --- a/lib/Arch/Instruction.cpp +++ b/lib/Arch/Instruction.cpp @@ -673,6 +673,7 @@ std::string Instruction::Serialize(void) const { case kArchSparc32: ss << "SPARC32"; break; case kArchSparc64: ss << "SPARC64"; break; case kArchPPC: ss << "PowerPC"; break; + case kArchMIPS: ss << "MIPS"; break; } }; diff --git a/lib/Arch/MIPS/CMakeLists.txt b/lib/Arch/MIPS/CMakeLists.txt new file mode 100644 index 000000000..588d9c739 --- /dev/null +++ b/lib/Arch/MIPS/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(Runtime) diff --git a/lib/Arch/MIPS/Runtime/CMakeLists.txt b/lib/Arch/MIPS/Runtime/CMakeLists.txt new file mode 100644 index 000000000..694cfd4ff --- /dev/null +++ b/lib/Arch/MIPS/Runtime/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright (c) 2022-present Trail of Bits, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.6) +project(mips_runtime) + +set(MIPSRUNTIME_SOURCEFILES + Instructions.cpp + + "${REMILL_LIB_DIR}/Arch/Runtime/Intrinsics.cpp" +) + +set_source_files_properties(BasicBlock.cpp PROPERTIES COMPILE_FLAGS "-O3 -g0") + +# Visual C++ requires C++14 +if(WIN32) + set(required_cpp_standard "c++14") +else() + set(required_cpp_standard "c++17") +endif() + +add_runtime(mips + SOURCES ${MIPSRUNTIME_SOURCEFILES} + ADDRESS_SIZE 32 + DEFINITIONS "LITTLE_ENDIAN=${little_endian}" "REMILL_DISABLE_INT128=1" + BCFLAGS "-std=${required_cpp_standard}" + INCLUDEDIRECTORIES "${REMILL_INCLUDE_DIR}" "${REMILL_SOURCE_DIR}" + INSTALLDESTINATION "${REMILL_INSTALL_SEMANTICS_DIR}" + ARCH mips64 + + DEPENDENCIES + "${REMILL_INCLUDE_DIR}/remill/Arch/MIPS/Runtime/State.h" +) diff --git a/lib/Arch/MIPS/Runtime/Instructions.cpp b/lib/Arch/MIPS/Runtime/Instructions.cpp new file mode 100644 index 000000000..36ab6d3f6 --- /dev/null +++ b/lib/Arch/MIPS/Runtime/Instructions.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022-present Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "remill/Arch/MIPS/Runtime/State.h" +#include "remill/Arch/Runtime/Float.h" +#include "remill/Arch/Runtime/Intrinsics.h" +#include "remill/Arch/Runtime/Operators.h" + +// A definition is required to ensure that LLVM doesn't optimize the `State` type out of the bytecode +// See https://github.com/lifting-bits/remill/pull/631#issuecomment-1279989004f +State __remill_state; + +#define HYPER_CALL state.hyper_call + +namespace { + +DEF_SEM(HandleUnsupported) { + return __remill_sync_hyper_call(state, memory, + SyncHyperCall::kMIPSEmulateInstruction); +} + +DEF_SEM(HandleInvalidInstruction) { + HYPER_CALL = AsyncHyperCall::kInvalidInstruction; + return memory; +} + +} // namespace + +DEF_ISEL(UNSUPPORTED_INSTRUCTION) = HandleUnsupported; +DEF_ISEL(INVALID_INSTRUCTION) = HandleInvalidInstruction; diff --git a/lib/Arch/Name.cpp b/lib/Arch/Name.cpp index d8180f956..fc1554d41 100644 --- a/lib/Arch/Name.cpp +++ b/lib/Arch/Name.cpp @@ -30,6 +30,7 @@ ArchName GetArchName(const llvm::Triple &triple) { case llvm::Triple::sparc: return kArchSparc32; case llvm::Triple::sparcv9: return kArchSparc64; case llvm::Triple::ppc: return kArchPPC; + case llvm::Triple::mips64: return kArchMIPS; default: return kArchInvalid; } } @@ -75,10 +76,10 @@ ArchName GetArchName(std::string_view arch_name) { } else if (arch_name == "sparc32_sleigh") { return kArchSparc32_SLEIGH; - + } else if (arch_name == "mips") { + return kArchMIPS; } else if (arch_name == "ppc") { return kArchPPC; - } else if (arch_name == "aarch64_sleigh") { return kArchAArch64LittleEndian_SLEIGH; } else { @@ -106,6 +107,7 @@ static const std::string_view kArchNames[] = { [kArchSparc32_SLEIGH] = "sparc32_sleigh", [kArchThumb2LittleEndian] = "thumb2", [kArchPPC] = "ppc", + [kArchMIPS] = "mips", }; } // namespace diff --git a/lib/Arch/Runtime/HyperCall.cpp b/lib/Arch/Runtime/HyperCall.cpp index 4633d858e..6ea5d98ff 100644 --- a/lib/Arch/Runtime/HyperCall.cpp +++ b/lib/Arch/Runtime/HyperCall.cpp @@ -26,6 +26,9 @@ #elif defined(__aarch64__) # include "remill/Arch/AArch64/Runtime/State.h" # define REMILL_HYPERCALL_AARCH64 1 +#elif defined(__mips__) +# include "remill/Arch/MIPS/Runtime/State.h" +# define REMILL_HYPERCALL_MIPS 1 #elif defined(__sparc__) # if ADDRESS_SIZE_BITS == 32 # include "remill/Arch/SPARC32/Runtime/State.h" @@ -379,9 +382,15 @@ Memory *__remill_sync_hyper_call(State &state, Memory *mem, break; # endif +#elif defined(REMILL_HYPERCALL_MIPS) + case SyncHyperCall::kMIPSEmulateInstruction: + mem = __remill_mips_emulate_instruction(mem); + break; + case SyncHyperCall::kMIPSSysCall: + mem = __remill_mips_syscall(mem); + break; #elif defined(REMILL_HYPERCALL_PPC) - case SyncHyperCall::kPPCEmulateInstruction: mem = __remill_ppc_emulate_instruction(mem); break; diff --git a/lib/Arch/Runtime/Intrinsics.cpp b/lib/Arch/Runtime/Intrinsics.cpp index b83e973ca..368daf9e9 100644 --- a/lib/Arch/Runtime/Intrinsics.cpp +++ b/lib/Arch/Runtime/Intrinsics.cpp @@ -120,6 +120,7 @@ extern "C" [[gnu::used]] void __remill_intrinsics(void) { USED(__remill_async_hyper_call); USED(__remill_sync_hyper_call); + USED(__remill_set_coprocessor_reg); USED(__remill_undefined_8); USED(__remill_undefined_16); diff --git a/lib/Arch/Sleigh/CMakeLists.txt b/lib/Arch/Sleigh/CMakeLists.txt index 8e480ede2..361c99d27 100644 --- a/lib/Arch/Sleigh/CMakeLists.txt +++ b/lib/Arch/Sleigh/CMakeLists.txt @@ -39,14 +39,17 @@ add_library(remill_arch_sleigh STATIC "${REMILL_INCLUDE_DIR}/remill/Arch/SPARC32/SPARC32Base.h" "${REMILL_INCLUDE_DIR}/remill/Arch/PPC/Runtime/State.h" + "${REMILL_INCLUDE_DIR}/remill/Arch/MIPS/Runtime/State.h" Arch.h Thumb.h PPC.h + MIPS.h Arch.cpp X86Arch.cpp Thumb2Arch.cpp PPCArch.cpp + MIPSArch.cpp ControlFlowStructuring.cpp ControlFlowStructuring.h diff --git a/lib/Arch/Sleigh/MIPS.h b/lib/Arch/Sleigh/MIPS.h new file mode 100644 index 000000000..95863d235 --- /dev/null +++ b/lib/Arch/Sleigh/MIPS.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022-present Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "Arch.h" + +namespace remill::sleighmips { + +class SleighMIPSDecoder final : public remill::sleigh::SleighDecoder { + public: + SleighMIPSDecoder(const remill::Arch &); + + llvm::Value *LiftPcFromCurrPc(llvm::IRBuilder<> &, llvm::Value *, size_t, + const DecodingContext &) const override; + + void InitializeSleighContext(uint64_t addr, + remill::sleigh::SingleInstructionSleighContext &, + const ContextValues &) const override; +}; + +} // namespace remill::sleighmips diff --git a/lib/Arch/Sleigh/MIPSArch.cpp b/lib/Arch/Sleigh/MIPSArch.cpp new file mode 100644 index 000000000..06c2ed971 --- /dev/null +++ b/lib/Arch/Sleigh/MIPSArch.cpp @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2022-present Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Arch.h" +#include "MIPS.h" + +#define INCLUDED_FROM_REMILL +#include + +namespace remill { + +namespace sleighmips { +SleighMIPSDecoder::SleighMIPSDecoder(const remill::Arch &arch) + : SleighDecoder(arch, "mips64be.sla", "mips64.pspec", + sleigh::ContextRegMappings({}, {}), {}) {} + +llvm::Value * +SleighMIPSDecoder::LiftPcFromCurrPc(llvm::IRBuilder<> &bldr, + llvm::Value *curr_pc, size_t curr_insn_size, + const DecodingContext &) const { + return bldr.CreateAdd(curr_pc, llvm::ConstantInt::get(curr_pc->getType(), 4)); +} + +void SleighMIPSDecoder::InitializeSleighContext( + uint64_t addr, remill::sleigh::SingleInstructionSleighContext &ctxt, + const ContextValues &values) const { + //sleigh::SetContextRegisterValueInSleigh( + // addr, std::string("ZERO").c_str(), "zero", 0, ctxt, values); +} + +class SleighMIPSArch : public ArchBase { + public: + SleighMIPSArch(llvm::LLVMContext *context_, OSName os_name_, + ArchName arch_name_) + : ArchBase(context_, os_name_, arch_name_), + decoder(*this) {} + virtual ~SleighMIPSArch() = default; + + DecodingContext CreateInitialContext(void) const override { + return DecodingContext(); + } + + std::string_view StackPointerRegisterName(void) const override { + return "SP"; + } + + std::string_view ProgramCounterRegisterName(void) const override { + return "PC"; + } + + OperandLifter::OpLifterPtr + DefaultLifter(const remill::IntrinsicTable &intrinsics) const override { + return decoder.GetOpLifter(); + } + + bool DecodeInstruction(uint64_t address, std::string_view instr_bytes, + Instruction &inst, + DecodingContext context) const override { + inst.pc = address; + inst.next_pc = address + instr_bytes.size(); // Default fall-through. + inst.branch_taken_pc = 0; + inst.branch_not_taken_pc = 0; + inst.has_branch_taken_delay_slot = false; + inst.has_branch_not_taken_delay_slot = false; + inst.arch_name = arch_name; + inst.sub_arch_name = arch_name; + inst.branch_taken_arch_name = arch_name; + inst.arch = this; + inst.category = Instruction::kCategoryInvalid; + inst.operands.clear(); + inst.flows = Instruction::InvalidInsn(); + + context.UpdateContextReg(std::string("ZERO"), 0); // What to do here? + + return this->decoder.DecodeInstruction(address, instr_bytes, inst, context); + } + + uint64_t MinInstructionAlign(const DecodingContext &) const override { + return 4; + } + + uint64_t MinInstructionSize(const DecodingContext &) const override { + return 4; + } + + uint64_t MaxInstructionSize(const DecodingContext &, + bool permit_fuse_idioms) const { + return 8; // Note: Technically 4 but due to delay slots we need pass 8 bytes to sleigh + } + + llvm::CallingConv::ID DefaultCallingConv(void) const override { + return llvm::CallingConv::C; + } + + llvm::Triple Triple(void) const override { + auto triple = BasicTriple(); + triple.setArch(llvm::Triple::mips64); + return triple; + } + + llvm::DataLayout DataLayout(void) const override { + // M4xw: TODO: Confirm this is correct + return llvm::DataLayout("E-m:e-p:32:32-i64:64-f128:64-n32-S64"); + } + + void PopulateRegisterTable(void) const override { + CHECK_NOTNULL(context); + + reg_by_offset.resize(sizeof(MIPSState)); + + auto u8 = llvm::Type::getInt8Ty(*context); + auto u32 = llvm::Type::getInt32Ty(*context); + auto u64 = llvm::Type::getInt64Ty(*context); + + auto f32 = llvm::Type::getFloatTy(*context); + auto f64 = llvm::Type::getDoubleTy(*context); + +#define OFFSET_OF(type, access) \ + (reinterpret_cast(&reinterpret_cast( \ + static_cast(nullptr)->access))) + +#define REG(name, access, type) \ + AddRegister(#name, type, OFFSET_OF(MIPSState, access), nullptr) + +#define SUB_REG(name, access, type, parent_reg_name) \ + AddRegister(#name, type, OFFSET_OF(MIPSState, access), #parent_reg_name) + + REG(ZERO, gpr.zero.qword, u64); + SUB_REG(ZERO_LO, gpr.zero.dword, u32, ZERO); + REG(AT, gpr.at.qword, u64); + SUB_REG(AT_LO, gpr.at.dword, u32, AT); + REG(V0, gpr.v0.qword, u64); + SUB_REG(V0_LO, gpr.v0.dword, u32, V0); + REG(V1, gpr.v1.qword, u64); + SUB_REG(V1_LO, gpr.v1.dword, u32, V1); + REG(A0, gpr.a0.qword, u64); + SUB_REG(A0_LO, gpr.a0.dword, u32, A0); + REG(A1, gpr.a1.qword, u64); + SUB_REG(A1_LO, gpr.a1.dword, u32, A1); + REG(A2, gpr.a2.qword, u64); + SUB_REG(A2_LO, gpr.a2.dword, u32, A2); + REG(A3, gpr.a3.qword, u64); + SUB_REG(A3_LO, gpr.a3.dword, u32, A3); + REG(T0, gpr.t0.qword, u64); + SUB_REG(T0_LO, gpr.t0.dword, u32, T0); + REG(T1, gpr.t1.qword, u64); + SUB_REG(T1_LO, gpr.t1.dword, u32, T1); + REG(T2, gpr.t2.qword, u64); + SUB_REG(T2_LO, gpr.t2.dword, u32, T2); + REG(T3, gpr.t3.qword, u64); + SUB_REG(T3_LO, gpr.t3.dword, u32, T3); + REG(T4, gpr.t4.qword, u64); + SUB_REG(T4_LO, gpr.t4.dword, u32, T4); + REG(T5, gpr.t5.qword, u64); + SUB_REG(T5_LO, gpr.t5.dword, u32, T5); + REG(T6, gpr.t6.qword, u64); + SUB_REG(T6_LO, gpr.t6.dword, u32, T6); + REG(T7, gpr.t7.qword, u64); + SUB_REG(T7_LO, gpr.t7.dword, u32, T7); + REG(S0, gpr.s0.qword, u64); + SUB_REG(S0_LO, gpr.s0.dword, u32, S0); + REG(S1, gpr.s1.qword, u64); + SUB_REG(S1_LO, gpr.s1.dword, u32, S1); + REG(S2, gpr.s2.qword, u64); + SUB_REG(S2_LO, gpr.s2.dword, u32, S2); + REG(S3, gpr.s3.qword, u64); + SUB_REG(S3_LO, gpr.s3.dword, u32, S3); + REG(S4, gpr.s4.qword, u64); + SUB_REG(S4_LO, gpr.s4.dword, u32, S4); + REG(S5, gpr.s5.qword, u64); + SUB_REG(S5_LO, gpr.s5.dword, u32, S5); + REG(S6, gpr.s6.qword, u64); + SUB_REG(S6_LO, gpr.s6.dword, u32, S6); + REG(S7, gpr.s7.qword, u64); + SUB_REG(S7_LO, gpr.s7.dword, u32, S7); + REG(T8, gpr.t8.qword, u64); + SUB_REG(T8_LO, gpr.t8.dword, u32, T8); + REG(T9, gpr.t9.qword, u64); + SUB_REG(T9_LO, gpr.t9.dword, u32, T9); + REG(K0, gpr.k0.qword, u64); + SUB_REG(K0_LO, gpr.k0.dword, u32, K0); + REG(K1, gpr.k1.qword, u64); + SUB_REG(K1_LO, gpr.k1.dword, u32, K1); + REG(GP, gpr.gp.qword, u64); + SUB_REG(GP_LO, gpr.gp.dword, u32, GP); + REG(SP, gpr.sp.qword, u64); + SUB_REG(SP_LO, gpr.sp.dword, u32, SP); + REG(S8, gpr.s8.qword, u64); + SUB_REG(S8_LO, gpr.s8.dword, u32, S8); + REG(RA, gpr.ra.qword, u64); + SUB_REG(RA_LO, gpr.ra.dword, u32, RA); + REG(PC, gpr.pc.qword, u64); + SUB_REG(PC_LO, gpr.pc.dword, u32, PC); + + // Flags + REG(ISAMODESWITCH, flags.ISAModeSwitch.qword, u8); + REG(HI, flags.HI.qword, u64); + REG(LO, flags.LO.qword, u64); + + // FPR + REG(F0, fpr.f0.qword, f64); + SUB_REG(F0_LO, fpr.f0.dword, f32, F0); + REG(F1, fpr.f1.qword, f64); + SUB_REG(F1_LO, fpr.f1.dword, f32, F1); + REG(F2, fpr.f2.qword, f64); + SUB_REG(F2_LO, fpr.f2.dword, f32, F2); + REG(F3, fpr.f3.qword, f64); + SUB_REG(F3_LO, fpr.f3.dword, f32, F3); + REG(F4, fpr.f4.qword, f64); + SUB_REG(F4_LO, fpr.f4.dword, f32, F4); + REG(F5, fpr.f5.qword, f64); + SUB_REG(F5_LO, fpr.f5.dword, f32, F5); + REG(F6, fpr.f6.qword, f64); + SUB_REG(F6_LO, fpr.f6.dword, f32, F6); + REG(F7, fpr.f7.qword, f64); + SUB_REG(F7_LO, fpr.f7.dword, f32, F7); + REG(F8, fpr.f8.qword, f64); + SUB_REG(F8_LO, fpr.f8.dword, f32, F8); + REG(F9, fpr.f9.qword, f64); + SUB_REG(F9_LO, fpr.f9.dword, f32, F9); + REG(F10, fpr.f10.qword, f64); + SUB_REG(F10_LO, fpr.f10.dword, f32, F10); + REG(F11, fpr.f11.qword, f64); + SUB_REG(F11_LO, fpr.f11.dword, f32, F11); + REG(F12, fpr.f12.qword, f64); + SUB_REG(F12_LO, fpr.f12.dword, f32, F12); + REG(F13, fpr.f13.qword, f64); + SUB_REG(F13_LO, fpr.f13.dword, f32, F13); + REG(F14, fpr.f14.qword, f64); + SUB_REG(F14_LO, fpr.f14.dword, f32, F14); + REG(F15, fpr.f15.qword, f64); + SUB_REG(F15_LO, fpr.f15.dword, f32, F15); + REG(F16, fpr.f16.qword, f64); + SUB_REG(F16_LO, fpr.f16.dword, f32, F16); + REG(F17, fpr.f17.qword, f64); + SUB_REG(F17_LO, fpr.f17.dword, f32, F17); + REG(F18, fpr.f18.qword, f64); + SUB_REG(F18_LO, fpr.f18.dword, f32, F18); + REG(F19, fpr.f19.qword, f64); + SUB_REG(F19_LO, fpr.f19.dword, f32, F19); + REG(F20, fpr.f20.qword, f64); + SUB_REG(F20_LO, fpr.f20.dword, f32, F20); + REG(F21, fpr.f21.qword, f64); + SUB_REG(F21_LO, fpr.f21.dword, f32, F21); + REG(F22, fpr.f22.qword, f64); + SUB_REG(F22_LO, fpr.f22.dword, f32, F22); + REG(F23, fpr.f23.qword, f64); + SUB_REG(F23_LO, fpr.f23.dword, f32, F23); + REG(F24, fpr.f24.qword, f64); + SUB_REG(F24_LO, fpr.f24.dword, f32, F24); + REG(F25, fpr.f25.qword, f64); + SUB_REG(F25_LO, fpr.f25.dword, f32, F25); + REG(F26, fpr.f26.qword, f64); + SUB_REG(F26_LO, fpr.f26.dword, f32, F26); + REG(F27, fpr.f27.qword, f64); + SUB_REG(F27_LO, fpr.f27.dword, f32, F27); + REG(F28, fpr.f28.qword, f64); + SUB_REG(F28_LO, fpr.f28.dword, f32, F28); + REG(F29, fpr.f29.qword, f64); + SUB_REG(F29_LO, fpr.f29.dword, f32, F29); + REG(F30, fpr.f30.qword, f64); + SUB_REG(F30_LO, fpr.f30.dword, f32, F30); + REG(F31, fpr.f31.qword, f64); + SUB_REG(F31_LO, fpr.f31.dword, f32, F31); + + // COP0 + REG(INDEX, cop0.Index.qword, u64); + REG(RANDOM, cop0.Random.qword, u64); + REG(ENTRYLO0, cop0.EntryLo0.qword, u64); + REG(ENTRYLO1, cop0.EntryLo1.qword, u64); + REG(CONTEXT, cop0.Context.qword, u64); + REG(PAGEMASK, cop0.PageMask.qword, u64); + REG(WIRED, cop0.Wired.qword, u64); + REG(HWRENA, cop0.HWREna.qword, u64); + REG(BADVADDR, cop0.BadVAddr.qword, u64); + REG(COUNT, cop0.Count.qword, u64); + REG(ENTRYHI, cop0.EntryHi.qword, u64); + REG(COMPARE, cop0.Compare.qword, u64); + REG(STATUS, cop0.Status.qword, u64); + REG(CAUSE, cop0.Cause.qword, u64); + REG(EPC, cop0.EPC.qword, u64); + REG(PRID, cop0.PRId.qword, u64); + REG(CONFIG, cop0.Config.qword, u64); + REG(LLADDR, cop0.LLAddr.qword, u64); + REG(WATCHLO, cop0.WatchLo.qword, u64); + REG(WATCHHI, cop0.WatchHi.qword, u64); + REG(XCONTEXT, cop0.XContext.qword, u64); + REG(COP0_REG21, cop0.cop0_reg21.qword, u64); + REG(COP0_REG22, cop0.cop0_reg22.qword, u64); + REG(DEBUG, cop0.Debug.qword, u64); + REG(DEPC, cop0.DEPC.qword, u64); + REG(PERFCNT, cop0.PerfCnt.qword, u64); + REG(ERRCTL, cop0.ErrCtl.qword, u64); + REG(CACHEERR, cop0.CacheErr.qword, u64); + REG(TAGLO, cop0.TagLo.qword, u64); + REG(TAGHI, cop0.TagHi.qword, u64); + REG(ERRORPC, cop0.ErrorEPC.qword, u64); + REG(DESAVE, cop0.DESAVE.qword, u64); + + // COP1 + // TODO: Maybe move fpr here? + REG(FCSR, cop1.FCSR.dword, u32); + } + + void + FinishLiftedFunctionInitialization(llvm::Module *module, + llvm::Function *bb_func) const override { + auto &context = module->getContext(); + const auto addr = llvm::Type::getInt64Ty(context); + + auto &entry_block = bb_func->getEntryBlock(); + llvm::IRBuilder<> ir(&entry_block); + + const auto pc_arg = NthArgument(bb_func, kPCArgNum); + const auto state_ptr_arg = NthArgument(bb_func, kStatePointerArgNum); + + auto mk_alloca = [&](auto &from) { + return ir.CreateAlloca(addr, nullptr, from.data()); + }; + ir.CreateStore(pc_arg, mk_alloca(kNextPCVariableName)); + ir.CreateStore(pc_arg, mk_alloca(kIgnoreNextPCVariableName)); + + /*auto u8 = llvm::Type::getInt8Ty(context); + auto zero_c = ir.CreateAlloca(u8, nullptr, "ZERO"); + ir.CreateStore(llvm::Constant::getNullValue(u8), zero_c);*/ + + std::ignore = RegisterByName(kPCVariableName)->AddressOf(state_ptr_arg, ir); + } + + private: + SleighMIPSDecoder decoder; +}; + +} // namespace sleighmips + +Arch::ArchPtr Arch::GetSleighMIPS(llvm::LLVMContext *context_, + remill::OSName os_name_, + remill::ArchName arch_name_) { + return std::make_unique(context_, os_name_, + arch_name_); +} + +} // namespace remill diff --git a/lib/BC/IntrinsicTable.cpp b/lib/BC/IntrinsicTable.cpp index 215ed7a02..0b383003e 100644 --- a/lib/BC/IntrinsicTable.cpp +++ b/lib/BC/IntrinsicTable.cpp @@ -87,6 +87,7 @@ IntrinsicTable::IntrinsicTable(llvm::Module *module) // OS interaction. sync_hyper_call(FindIntrinsic(module, "__remill_sync_hyper_call")), async_hyper_call(FindIntrinsic(module, "__remill_async_hyper_call")), + set_coprocessor_reg(FindIntrinsic(module, "__remill_set_coprocessor_reg")), // Memory access. read_memory_8(SetMemoryReadNone(FindPureIntrinsic(module, "__remill_read_memory_8"))), diff --git a/lib/BC/PcodeCFG.cpp b/lib/BC/PcodeCFG.cpp index 5265d3374..64c38a27b 100644 --- a/lib/BC/PcodeCFG.cpp +++ b/lib/BC/PcodeCFG.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include @@ -15,8 +17,9 @@ namespace remill { namespace sleigh { -PcodeCFG CreateCFG(const std::vector &linear_ops) { - return PcodeCFGBuilder(linear_ops).Build(); +PcodeCFG CreateCFG(const std::vector &linear_ops, + const remill::Arch &arch) { + return PcodeCFGBuilder(linear_ops, arch).Build(); } @@ -171,8 +174,10 @@ PcodeCFG PcodeCFGBuilder::Build() const { } -PcodeCFGBuilder::PcodeCFGBuilder(const std::vector &linear_ops) - : linear_ops(linear_ops) {} +PcodeCFGBuilder::PcodeCFGBuilder(const std::vector &linear_ops, + const remill::Arch &arch) + : linear_ops(linear_ops), + arch(arch) {} PcodeCFG::PcodeCFG(std::map blocks) : blocks(std::move(blocks)) {} diff --git a/lib/BC/SleighLifter.cpp b/lib/BC/SleighLifter.cpp index 443512dee..fad0968e5 100644 --- a/lib/BC/SleighLifter.cpp +++ b/lib/BC/SleighLifter.cpp @@ -84,6 +84,7 @@ static size_t kNextPcArgNum = 3; static const std::string kEqualityClaimName = "claim_eq"; static const std::string kSysCallName = "syscall"; +static const std::string kSetCopRegName = "setCopReg"; static bool isVarnodeInConstantSpace(VarnodeData vnode) { auto spc = vnode.getAddr().getSpace(); @@ -1352,6 +1353,37 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { args); return kLiftedInstruction; + } else if (other_func_name == kSetCopRegName && + insn.arch_name == ArchName::kArchMIPS) { + DLOG(INFO) << "Invoking setCopReg"; + + if (isize == 5) { + VarnodeData &cop_num = vars[1]; + VarnodeData ®_num = vars[2]; + VarnodeData &value = vars[3]; + VarnodeData &sel = vars[4]; + + auto inval_cop_num = this->LiftIntegerInParam(bldr, cop_num); + auto inval_reg_num = ConstantValue::CreatConstant( + this->replacement_cont.LiftOffsetOrReplace( + bldr, reg_num, + llvm::IntegerType::get(this->context, reg_num.size * 8))); + auto inval_value = LiftIntegerInParam(bldr, value); + auto inval_sel = this->LiftIntegerInParam(bldr, sel); + + std::array args = { + state_pointer, inval_cop_num.value(), + inval_reg_num.get() + ->LiftAsInParam(bldr, llvm::IntegerType::get( + this->context, reg_num.size * 8)) + .value(), + inval_value.value(), inval_sel.value()}; + + bldr.CreateCall( + insn_lifter_parent.GetIntrinsicTable()->set_coprocessor_reg, + args); + } + return kLiftedInstruction; } DLOG(ERROR) << "Unsupported pcode intrinsic: " << *other_func_name; } @@ -1382,6 +1414,13 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { return LiftStatus::kLiftedLifterError; } + if (btaken_var.invert) { + // Branch taken evaluation is inverted + *maybe_should_branch = bldr.CreateICmpEQ( + *maybe_should_branch, + llvm::ConstantInt::get(llvm::IntegerType::get(this->context, 8), 0)); + } + auto should_branch = bldr.CreateZExtOrTrunc( *maybe_should_branch, llvm::IntegerType::get(this->context, 8)); auto branch_taken_ref = this->GetBranchTakenRef(); @@ -1606,7 +1645,8 @@ SleighLifter::SleighLifter(const remill::Arch &arch_, : InstructionLifter(&arch_, intrinsics_), sleigh_context(new sleigh::SingleInstructionSleighContext( dec_.GetSLAName(), dec_.GetPSpec())), - decoder(dec_) {} + decoder(dec_), + arch(arch_) {} const std::string_view SleighLifter::kInstructionFunctionPrefix = @@ -1633,8 +1673,14 @@ SleighLifter::DefineInstructionFunction(Instruction &inst, ptr_ty}; auto ty = llvm::FunctionType::get(inst.arch->MemoryPointerType(), params, false); - auto func = llvm::Function::Create(ty, llvm::GlobalValue::ExternalLinkage, 0, - nm.str(), target_mod); + auto func = target_mod->getFunction(nm.str()); + + if (!func || func->getFunctionType() != ty) { + func = llvm::Function::Create(ty, llvm::GlobalValue::ExternalLinkage, 0, + nm.str(), target_mod); + } else if (func->isDeclaration()) { + func->setLinkage(llvm::GlobalValue::WeakAnyLinkage); + } auto memory = remill::NthArgument(func, 1); auto state = remill::NthArgument(func, 0); @@ -1688,7 +1734,7 @@ SleighLifter::LiftIntoInternalBlockWithSleighState( //TODO(Ian): make a safe to use sleighinstruction context that wraps a context with an arch to preform reset reinits - auto cfg = sleigh::CreateCFG(pcode_record.ops); + auto cfg = sleigh::CreateCFG(pcode_record.ops, this->arch); SleighLifter::PcodeToLLVMEmitIntoBlock::DecodingContextConstants @@ -1767,6 +1813,27 @@ LiftStatus SleighLifter::LiftIntoBlockWithSleighState( llvm::ConstantInt::get(this->GetWordType(), inst.bytes.size())), next_pc_ref); + /////////////////////////////////////////////////////////////////////////////////////////// + // Handle COUNT Reg approximation + // May be prefered here over patches to sleigh definitions for now + // TODO(M4xw): Implement exact cycle count per opcode according to the optimization manual + if (inst.arch->IsMIPS()) { + const auto [count_ref, count_ref_type] = + LoadRegAddress(block, state_ptr, "COUNT"); + + const auto count = + intoblock_builer.CreateLoad(this->GetWordType(), count_ref); + + intoblock_builer.CreateStore( + intoblock_builer.CreateAdd( + count, llvm::ConstantInt::get( + this->GetWordType(), + 4)), // Historically approximated Count per Opcode + count_ref); + } + LOG(INFO) << inst.Serialize(); + ////////////////////////////////////////////////////////////////////////////////////////// + // TODO(Ian): THIS IS AN UNSOUND ASSUMPTION THAT RETURNS ALWAYS RETURN TO THE FALLTHROUGH, this is just to make things work intoblock_builer.CreateStore( intoblock_builer.CreateLoad(this->GetWordType(), next_pc_ref), diff --git a/lib/BC/TraceLifter.cpp b/lib/BC/TraceLifter.cpp index c53c5a2f6..9a662294c 100644 --- a/lib/BC/TraceLifter.cpp +++ b/lib/BC/TraceLifter.cpp @@ -477,6 +477,9 @@ bool TraceLifter::Impl::Lift( case Instruction::kCategoryDirectFunctionCall: { direct_func_call: try_add_delay_slot(true, block); + // M4xw: How to handle this appropriately? + StoreProgramCounter(block, inst.branch_taken_pc, *intrinsics); + if (inst.branch_not_taken_pc != inst.branch_taken_pc) { trace_work_list.insert(inst.branch_taken_pc); auto target_trace = get_trace_decl(inst.branch_taken_pc);