Skip to content
This repository was archived by the owner on Aug 23, 2022. It is now read-only.
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .remill_commit_id
Original file line number Diff line number Diff line change
@@ -1 +1 @@
eae68217c43f2e99a657c75ae36d40af740cc20e
cb3f49bf3e52c465f194cd01a3a3894bfecf7b2a
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ add_executable(${MCSEMA_LIFT}
mcsema/BC/External.cpp
mcsema/BC/Function.cpp
mcsema/BC/Instruction.cpp
mcsema/BC/Info.cpp
mcsema/BC/Legacy.cpp
mcsema/BC/Lift.cpp
mcsema/BC/Optimize.cpp
Expand Down
33 changes: 18 additions & 15 deletions mcsema/BC/Function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
#include "mcsema/BC/Callback.h"
#include "mcsema/BC/Function.h"
#include "mcsema/BC/Instruction.h"
#include "mcsema/BC/Info.h"
#include "mcsema/BC/Legacy.h"
#include "mcsema/BC/Lift.h"
#include "mcsema/BC/Optimize.h"
Expand Down Expand Up @@ -990,6 +991,22 @@ static llvm::Function *LiftFunction(

} // namespace

void DeclareLiftedFunction(const NativeObject *cfg_func) {
const auto &func_name = cfg_func->lifted_name;
auto lifted_func = gModule->getFunction(func_name);

if (lifted_func) {
LOG(INFO) << "Already inserted function: " << func_name << ", skipping.";
return;
}

lifted_func = remill::DeclareLiftedFunction(gModule.get(), func_name);
// make local functions 'static'
LOG(INFO) << "Inserted function: " << func_name;

info::Set( { cfg_func->name, cfg_func->ea }, *lifted_func );
}

// Declare the lifted functions. This is a separate step from defining
// functions because it's important that all possible code- and data-cross
// references are resolved before any data or instructions can use
Expand All @@ -1000,21 +1017,7 @@ void DeclareLiftedFunctions(const NativeModule *cfg_module) {
if (cfg_func->is_external) {
continue;
}

const auto &func_name = cfg_func->lifted_name;
auto lifted_func = gModule->getFunction(func_name);

if (!lifted_func) {
lifted_func = remill::DeclareLiftedFunction(gModule.get(), func_name);

// make local functions 'static'
LOG(INFO)
<< "Inserted function: " << func_name;

} else {
LOG(INFO)
<< "Already inserted function: " << func_name << ", skipping.";
}
DeclareLiftedFunction(cfg_func);
}
}

Expand Down
52 changes: 52 additions & 0 deletions mcsema/BC/Info.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) 2020 Trail of Bits, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <mcsema/BC/Info.h>
#include <mcsema/BC/Util.h>

#include <remill/BC/Util.h>

// TODO(lukas): Nested declaration once C++17 is available
namespace mcsema {
namespace info {

void Set(const Info &meta, llvm::Function &func) {
if (meta.ea)
SetMetadata(func, Kinds::ea_kind, std::to_string(*meta.ea));
if (meta.name)
SetMetadata(func, Kinds::name_kind, *meta.name);
}

Info Get(llvm::Function &func) {
return { Name(func), EA(func) };
}

std::optional<std::string> Name(llvm::Function &func) {
return GetMetadata(func, Kinds::name_kind);
}

std::optional<uint64_t> EA(llvm::Function &func) {
auto as_str = GetMetadata(func, Kinds::ea_kind);
if (!as_str) {
return {};
}
return { stoul(*as_str) };
}

} // namespace info
} // namespace mcsema


60 changes: 60 additions & 0 deletions mcsema/BC/Info.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2020 Trail of Bits, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <iostream>
#include <optional>
#include <string>

namespace llvm {
class Function;
} // namespace llvm

namespace mcsema::info {

struct Kinds {
// TODO(lukas): std::string_view once c++17 is available
static constexpr char *ea_kind = "bin.ea";
static constexpr char *name_kind = "bin.name";
};

struct Info {
std::optional<std::string> name;
std::optional<uint64_t> ea;

template<typename Stream>
friend Stream &operator<<(Stream &os, const Info &info) {
if ( info.ea ) {
os << "0x" << std::hex << *info.ea << std::dec;
} else {
os << "(unknown)";
}

os << ": " << ((info.name) ? *info.name : "(unknown)") << std::endl;
return os;
}
};

void Set(const Info &meta, llvm::Function &func);
Info Get(llvm::Function &func);

std::optional<std::string> Name(llvm::Function &func);
std::optional<uint64_t> EA(llvm::Function &func);

} // namespace mcsema::info


24 changes: 24 additions & 0 deletions mcsema/BC/Util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,28 @@ llvm::Constant *LiftEA(const NativeSegment *cfg_seg, uint64_t ea) {
llvm::ConstantInt::get(gWordType, offset));
}

void SetMetadata(llvm::GlobalObject &go,
const std::string &kind, const std::string &val) {
if (go.getMetadata(kind)) {
LOG(WARNING) << remill::LLVMThingToString(&go) << " already has metadata of kind: "
<< kind;
}
auto &ctx = go.getContext();
auto node = llvm::MDNode::get(ctx, llvm::MDString::get(ctx, val));
go.setMetadata(kind, node);
}

MetaValue GetMetadata(llvm::GlobalObject &go, const std::string &kind) {
auto node = go.getMetadata(kind);
if (!node) {
return {};
}

CHECK(node->getNumOperands() == 1)
<< "util::GetMetada does not support nodes with more than one operand";

return { llvm::cast<llvm::MDString>(node->getOperand(0))->getString().str() };
}


} // namespace mcsema
73 changes: 73 additions & 0 deletions mcsema/BC/Util.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@

#include <cstdint>
#include <list>
#include <optional>
#include <vector>

#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Module.h>

#include "mcsema/CFG/CFG.h"

#include "remill/BC/Annotate.h"

namespace llvm {

class BasicBlock;
Expand All @@ -44,6 +48,62 @@ extern std::shared_ptr<llvm::LLVMContext> gContext;
extern llvm::IntegerType *gWordType;
extern std::unique_ptr<llvm::Module> gModule;

template <typename Self>
struct LLVMConstants {

llvm::ConstantInt *i32(int32_t value) {
return GetConstantInt(value, 32);
}

llvm::ConstantInt *i64(int64_t value) {
return GetConstantInt(value, 64);
}

llvm::ConstantInt *GetConstantInt(int64_t value, int64_t size) {
return llvm::ConstantInt::get(
llvm::Type::getIntNTy(static_cast<Self &>(*this).context, size), value);
}

llvm::Type *i64_t() {
return llvm::Type::getInt64Ty(static_cast<Self &>(*this).context);
}

llvm::Type *i64_ptr_t() {
return llvm::Type::getInt64PtrTy(static_cast<Self &>(*this).context);
}

llvm::Type *i_n_ptr_t(uint64_t size) {
return llvm::Type::getIntNPtrTy(static_cast<Self &>(*this).context, size);
}

llvm::Type *i8_t() {
return llvm::Type::getInt8Ty(static_cast<Self &>(*this).context);
}

llvm::Type *i8_ptr_t() {
return llvm::Type::getInt8PtrTy(static_cast<Self &>(*this).context);
}

llvm::Type *i_n_ty(uint64_t size) {
return llvm::Type::getIntNTy(static_cast<Self &>(*this).context, size);
}

llvm::Value *undef(llvm::Type *type) {
return llvm::UndefValue::get(type);
}

llvm::Type *ptr(llvm::Type *type, unsigned addr_space=0) {
return llvm::PointerType::get(type, addr_space);
}

};

template<typename Self>
struct ModuleUtil {
llvm::Function &function(const std::string &name) {
return *static_cast<Self &>(*this).module.getFunction(name);
}
};

llvm::Value *GetConstantInt(unsigned size, uint64_t value);

Expand All @@ -54,6 +114,19 @@ llvm::FunctionType *LiftedFunctionType(void);
// lifted segment associated with `seg`.
llvm::Constant *LiftEA(const NativeSegment *seg, uint64_t ea);

template<typename Yield>
void ForEachLifted(llvm::Module &_module, Yield yield) {
using funcs = std::vector<llvm::Function *>;
for (auto f : remill::GetFunctionsByOrigin<funcs, remill::LiftedFunction>(_module)) {
yield(f);
}
}

using MetaValue = std::optional<std::string>;

void SetMetadata(llvm::GlobalObject &go, const std::string &kind, const std::string &val);
MetaValue GetMetadata(llvm::GlobalObject &go, const std::string &kind);

} // namespace mcsema

#endif // MCSEMA_BC_UTIL_H_