// Copyright 2023 xensik. All rights reserved. // // Use of this source code is governed by a GNU GPLv3 license // that can be found in the LICENSE file. #include "xsk/stdinc.hpp" #include "xsk/utils/string.hpp" #include "xsk/arc/disassembler.hpp" #include "xsk/arc/context.hpp" namespace xsk::arc { disassembler::disassembler(context const* ctx) : ctx_{ ctx } { } auto disassembler::disassemble(buffer const& data) -> assembly::ptr { return disassemble(data.data, data.size); } auto disassembler::disassemble(std::vector const& data) -> assembly::ptr { return disassemble(data.data(), data.size()); } auto disassembler::disassemble(u8 const* data, usize data_size) -> assembly::ptr { script_ = utils::reader{ data, static_cast(data_size), ctx_->endian() == endian::big }; assembly_ = assembly::make(); import_refs_.clear(); string_refs_.clear(); anim_refs_.clear(); auto header_ = header{}; header_.magic = script_.read(); if (header_.magic != ctx_->magic()) throw disasm_error("invalid script file or unsupported game version"); header_.source_crc = script_.read(); header_.include_offset = script_.read(); header_.animtree_offset = script_.read(); header_.cseg_offset = script_.read(); header_.stringtablefixup_offset = script_.read(); if (ctx_->props() & props::devstr) header_.devblock_stringtablefixup_offset = script_.read(); header_.exports_offset = script_.read(); header_.imports_offset = script_.read(); header_.fixup_offset = script_.read(); if (ctx_->props() & props::globals) header_.globalvar_offset = script_.read(); header_.profile_offset = script_.read(); header_.cseg_size = script_.read(); if (ctx_->props() & props::size64) header_.name = script_.read(); else header_.name = script_.read(); header_.stringtablefixup_count = script_.read(); header_.exports_count = script_.read(); header_.imports_count = script_.read(); header_.fixup_count = script_.read(); if (ctx_->props() & props::globals) header_.globalvar_count = script_.read(); header_.profile_count = script_.read(); if (ctx_->props() & props::devstr) header_.devblock_stringtablefixup_count = script_.read(); header_.include_count = script_.read(); header_.animtree_count = script_.read(); header_.flags = script_.read(); auto string_pool = std::map{}; script_.pos((ctx_->props() & props::headerxx) ? header_size_v3 : (ctx_->props() & props::header72) ? header_size_v2 : header_size_v1); while (script_.pos() < header_.include_offset) { auto pos = script_.pos(); string_pool.insert({ pos, script_.read_cstr() }); } script_.pos(header_.include_offset); for (auto i = 0u; i < header_.include_count; i++) { assembly_->includes.push_back(string_pool.at(script_.read())); } script_.pos(header_.animtree_offset); for (auto i = 0u; i < header_.animtree_count; i++) { auto entry = std::make_shared(); auto ref_count = 0u; auto anim_count = 0u; if (ctx_->props() & props::size64) { entry->name = string_pool.at(script_.read()); ref_count = script_.read(); anim_count = script_.read(); } else { entry->name = string_pool.at(script_.read()); ref_count = script_.read(); anim_count = script_.read(); script_.seek(2); } for (auto j = 0u; j < ref_count; j++) { auto ref = script_.read(); entry->refs.push_back(ref); anim_refs_.insert({ ref, entry }); } for (auto j = 0u; j < anim_count; j++) { if (ctx_->props() & props::size64) { auto name = string_pool.at(static_cast(script_.read())); auto ref = static_cast(script_.read()); entry->anims.push_back({ name, ref }); anim_refs_.insert({ ref, entry }); } else { auto name = string_pool.at(script_.read()); auto ref = script_.read(); entry->anims.push_back({ name, ref }); anim_refs_.insert({ ref, entry }); } } } script_.pos(header_.stringtablefixup_offset); for (auto i = 0u; i < header_.stringtablefixup_count; i++) { auto entry = std::make_shared(); entry->name = string_pool.at((ctx_->props() & props::size64) ? script_.read() : script_.read()); auto count = script_.read(); entry->type = script_.read(); if (ctx_->props() & props::size64) script_.seek(2); for (auto j = 0u; j < count; j++) { auto ref = script_.read(); string_refs_.insert({ ref, entry }); } } if (ctx_->props() & props::devstr) { script_.pos(header_.devblock_stringtablefixup_offset); for (auto i = 0u; i < header_.devblock_stringtablefixup_count; i++) { auto entry = std::make_shared(); entry->name = "__devstr__"; script_.seek(4); auto count = script_.read(); entry->type = script_.read(); script_.seek(2); for (auto j = 0; j < count; j++) { auto ref = script_.read(); string_refs_.insert({ ref, entry }); } } } if (ctx_->props() & props::globals) { script_.pos(header_.globalvar_offset); for (auto i = 0u; i < header_.globalvar_count; i++) { auto name = ctx_->hash_name(script_.read()); auto refs = script_.read(); for (auto j = 0u; j < refs; j++) { // todo t8 vars } } } script_.pos(header_.imports_offset); for (auto i = 0u; i < header_.imports_count; i++) { auto entry = std::make_shared(); if (ctx_->props() & props::hashids) { entry->name = ctx_->hash_name(script_.read()); entry->space = ctx_->hash_name(script_.read()); } else { entry->name = string_pool.at(script_.read()); entry->space = string_pool.at(script_.read()); } auto count = script_.read(); entry->params = script_.read(); entry->flags = script_.read(); for (auto j = 0; j < count; j++) { import_refs_.insert({ script_.read(), entry }); } } auto exports_ = std::vector{}; script_.pos(header_.exports_offset); for (auto i = 0u; i < header_.exports_count; i++) { auto entry = std::make_shared(); entry->checksum = script_.read(); entry->offset = script_.read(); if (ctx_->props() & props::hashids) { entry->name = ctx_->hash_name(script_.read()); entry->space = ctx_->hash_name(script_.read()); } else { entry->name = string_pool.at(script_.read()); entry->space = ""; } entry->params = script_.read(); entry->flags = script_.read(); if (ctx_->props() & props::hashids) script_.seek(2); exports_.push_back(entry); } for (auto i = 0u; i < exports_.size(); i++) { auto& entry = exports_[i]; if (i < exports_.size() - 1) { entry->size = (exports_[i + 1]->offset - entry->offset); auto pad_size = (ctx_->props() & props::size64) ? 8 : 4; auto end_pos = entry->offset + entry->size - pad_size; script_.pos(end_pos); if ((ctx_->props() & props::size64) && script_.read() == 0) { entry->size -= pad_size; script_.pos(end_pos - 2); script_.align(2); for (auto j = 0; j < 4; j++) { auto op = ctx_->opcode_enum(script_.read()); if (op == opcode::OP_End || op == opcode::OP_Return) break; script_.seek_neg(4); } entry->size -= end_pos - script_.pos(); } else if (script_.read() == 0) { entry->size -= pad_size; for (auto j = 1; j < 4; j++) { script_.pos(end_pos - j); if (script_.read() <= 0x01) break; entry->size--; } } } else { entry->size = (header_.cseg_offset + header_.cseg_size) - entry->offset; } script_.pos(entry->offset); func_ = function::make(); func_->index = entry->offset; func_->size = entry->size; func_->params = entry->params; func_->flags = entry->flags; func_->name = entry->name; func_->space = entry->space; disassemble_function(*func_); assembly_->functions.push_back(std::move(func_)); } return std::move(assembly_); } auto disassembler::disassemble_function(function& func) -> void { auto size = static_cast(func.size); while (size > 0) { auto inst = instruction::make(); inst->index = script_.pos(); if (ctx_->props() & props::size64) { auto index = script_.read(); if (size < 8 && (index >= 0x4000 || ctx_->opcode_enum(index) == opcode::OP_Invalid)) break; if ((index & 0x4000) == 0) inst->opcode = ctx_->opcode_enum(index); else throw disasm_error(utils::string::va("invalid opcode index 0x%X at pos '%04X'!", index, inst->index)); } else { auto index = script_.read(); if (size < 4 && ctx_->opcode_enum(index) == opcode::OP_Invalid) break; inst->opcode = ctx_->opcode_enum(index); } inst->size = ctx_->opcode_size(inst->opcode); disassemble_instruction(*inst); if (ctx_->props() & props::size64) inst->size += script_.align(2); size -= inst->size; func.instructions.push_back(std::move(inst)); } // remove padding garbage ops auto last_idx = 0; for (auto i = 1; i <= 4; i++) { if (func.instructions.size() - i <= 0) break; auto& inst = func.instructions.at(func.instructions.size() - i); if (inst->opcode == opcode::OP_End || inst->opcode == opcode::OP_Return) last_idx = i; if (func.labels.contains(inst->index)) break; } while (last_idx-- > 1) func.instructions.pop_back(); } auto disassembler::disassemble_instruction(instruction& inst) -> void { switch (inst.opcode) { case opcode::OP_End: case opcode::OP_Return: case opcode::OP_GetUndefined: case opcode::OP_GetZero: case opcode::OP_GetLevelObject: case opcode::OP_GetAnimObject: case opcode::OP_GetSelf: case opcode::OP_GetLevel: case opcode::OP_GetGame: case opcode::OP_GetAnim: case opcode::OP_GetGameRef: case opcode::OP_CreateLocalVariable: case opcode::OP_EvalArray: case opcode::OP_EvalArrayRef: case opcode::OP_ClearArray: case opcode::OP_EmptyArray: case opcode::OP_GetSelfObject: case opcode::OP_SafeSetVariableFieldCached: case opcode::OP_ClearParams: case opcode::OP_CheckClearParams: case opcode::OP_SetVariableField: case opcode::OP_Wait: case opcode::OP_WaitTillFrameEnd: case opcode::OP_PreScriptCall: case opcode::OP_DecTop: case opcode::OP_CastFieldObject: case opcode::OP_CastBool: case opcode::OP_BoolNot: case opcode::OP_BoolComplement: case opcode::OP_Inc: case opcode::OP_Dec: case opcode::OP_Bit_Or: case opcode::OP_Bit_Xor: case opcode::OP_Bit_And: case opcode::OP_Equal: case opcode::OP_NotEqual: case opcode::OP_LessThan: case opcode::OP_GreaterThan: case opcode::OP_LessThanOrEqualTo: case opcode::OP_GreaterThanOrEqualTo: case opcode::OP_ShiftLeft: case opcode::OP_ShiftRight: case opcode::OP_Plus: case opcode::OP_Minus: case opcode::OP_Multiply: case opcode::OP_Divide: case opcode::OP_Modulus: case opcode::OP_SizeOf: case opcode::OP_WaitTill: case opcode::OP_Notify: case opcode::OP_EndOn: case opcode::OP_VoidCodePos: case opcode::OP_Vector: case opcode::OP_RealWait: case opcode::OP_IsDefined: case opcode::OP_VectorScale: case opcode::OP_AnglesToUp: case opcode::OP_AnglesToRight: case opcode::OP_AnglesToForward: case opcode::OP_AngleClamp180: case opcode::OP_VectorToAngles: case opcode::OP_Abs: case opcode::OP_GetTime: case opcode::OP_GetDvar: case opcode::OP_GetDvarInt: case opcode::OP_GetDvarFloat: case opcode::OP_GetDvarVector: case opcode::OP_GetDvarColorRed: case opcode::OP_GetDvarColorGreen: case opcode::OP_GetDvarColorBlue: case opcode::OP_GetDvarColorAlpha: case opcode::OP_FirstArrayKey: case opcode::OP_NextArrayKey: //case opcode::OP_ProfileStart: case opcode::OP_ProfileStop: case opcode::OP_SafeDecTop: case opcode::OP_Nop: case opcode::OP_Abort: case opcode::OP_Object: case opcode::OP_ThreadObject: case opcode::OP_EvalLocalVariable: case opcode::OP_EvalLocalVariableRef: case opcode::OP_GetClassesObject: case opcode::OP_GetClasses: case opcode::OP_GetWorldObject: case opcode::OP_GetWorld: case opcode::OP_SuperEqual: case opcode::OP_SuperNotEqual: break; case opcode::OP_GetByte: case opcode::OP_GetNegByte: inst.data.push_back(fmt::format("{}", script_.read())); break; case opcode::OP_GetUnsignedShort: case opcode::OP_GetNegUnsignedShort: inst.size += script_.align(2); inst.data.push_back(fmt::format("{}", script_.read())); break; case opcode::OP_GetInteger: inst.size += script_.align(4); disassemble_animtree(inst); inst.data.push_back(fmt::format("{}", script_.read())); break; case opcode::OP_GetFloat: inst.size += script_.align(4); inst.data.push_back(utils::string::float_string(script_.read())); break; case opcode::OP_GetUintptr: //case opcode::OP_ProfileStart: case opcode::OP_GetAPIFunction: inst.size += script_.align(8); inst.data.push_back(fmt::format("0x{:016X}", script_.read())); break; case opcode::OP_GetVector: inst.size += script_.align(4); inst.data.push_back(utils::string::float_string(script_.read())); inst.data.push_back(utils::string::float_string(script_.read())); inst.data.push_back(utils::string::float_string(script_.read())); break; case opcode::OP_GetString: case opcode::OP_GetIString: disassemble_string(inst); break; case opcode::OP_GetAnimation: disassemble_animation(inst); break; case opcode::OP_WaitTillMatch: inst.data.push_back(fmt::format("{}", script_.read())); break; case opcode::OP_VectorConstant: inst.data.push_back(fmt::format("{}", script_.read())); break; case opcode::OP_GetHash: inst.size += script_.align(4); inst.data.push_back(ctx_->hash_name(script_.read())); break; case opcode::OP_ScriptFunctionCallClass: case opcode::OP_ScriptThreadCallClass: script_.seek(1); inst.size += script_.align(4); inst.data.push_back(ctx_->hash_name(script_.read())); break; case opcode::OP_SafeCreateLocalVariables: disassemble_params(inst); break; case opcode::OP_RemoveLocalVariables: case opcode::OP_EvalLocalVariableCached: case opcode::OP_EvalLocalArrayRefCached: case opcode::OP_SafeSetWaittillVariableFieldCached: case opcode::OP_EvalLocalVariableRefCached: inst.data.push_back(fmt::format("{}", script_.read())); break; case opcode::OP_EvalFieldVariable: case opcode::OP_EvalFieldVariableRef: case opcode::OP_ClearFieldVariable: case opcode::OP_EvalLocalVariableCachedDebug: case opcode::OP_EvalLocalVariableRefCachedDebug: case opcode::OP_LevelEvalFieldVariableRef: case opcode::OP_LevelEvalFieldVariable: case opcode::OP_SelfEvalFieldVariableRef: case opcode::OP_SelfEvalFieldVariable: case opcode::OP_New: disassemble_name(inst); break; case opcode::OP_ScriptFunctionCallPointer: case opcode::OP_ScriptMethodCallPointer: case opcode::OP_ScriptThreadCallPointer: case opcode::OP_ScriptMethodThreadCallPointer: inst.data.push_back(fmt::format("{}", script_.read())); break; case opcode::OP_GetFunction: disassemble_import(inst); break; case opcode::OP_CallBuiltin: case opcode::OP_CallBuiltinMethod: case opcode::OP_ScriptFunctionCall: case opcode::OP_ScriptMethodCall: case opcode::OP_ScriptThreadCall: case opcode::OP_ScriptMethodThreadCall: script_.seek(1); disassemble_import(inst); break; case opcode::OP_JumpOnFalse: case opcode::OP_JumpOnTrue: case opcode::OP_JumpOnFalseExpr: case opcode::OP_JumpOnTrueExpr: case opcode::OP_Jump: case opcode::OP_JumpBack: case opcode::OP_DevblockBegin: disassemble_jump(inst); break; case opcode::OP_Switch: disassemble_switch(inst); break; case opcode::OP_EndSwitch: disassemble_end_switch(inst); break; default: throw disasm_error(fmt::format("unhandled opcode {} at index {:04X}", ctx_->opcode_name(inst.opcode), inst.index)); } } auto disassembler::disassemble_name(instruction& inst) -> void { inst.size += script_.align((ctx_->props() & props::hashids) ? 4 : 2); if (ctx_->props() & props::hashids) { inst.data.push_back(ctx_->hash_name(script_.read())); } else { auto const itr = string_refs_.find(script_.pos()); if (itr != string_refs_.end()) { inst.data.push_back(itr->second->name); script_.seek(2); return; } throw disasm_error(fmt::format("string reference not found at index {:04X}", inst.index)); } } auto disassembler::disassemble_params(instruction& inst) -> void { auto const count = script_.read(); for (auto i = 0u; i < count; i++) { if (ctx_->props() & props::hashids) { inst.size += script_.align(4) + 5; inst.data.push_back(ctx_->hash_name(script_.read())); inst.data.push_back(fmt::format("{}", script_.read())); } else { disassemble_string(inst); inst.size += 2; } } } auto disassembler::disassemble_import(instruction& inst) -> void { inst.size += script_.align((ctx_->props() & props::size64) ? 8 : 4); script_.seek((ctx_->props() & props::size64) ? 8 : 4); auto const itr = import_refs_.find(inst.index); if (itr != import_refs_.end()) { inst.data.push_back(itr->second->space); inst.data.push_back(itr->second->name); return; } throw disasm_error(fmt::format("import reference not found at index {:04X}", inst.index)); } auto disassembler::disassemble_string(instruction& inst) -> void { inst.size += script_.align((ctx_->props() & props::size64) ? 4 : 2); auto const itr = string_refs_.find(script_.pos()); if (itr != string_refs_.end()) { inst.data.push_back(itr->second->name); script_.seek((ctx_->props() & props::size64) ? 4 : 2); return; } throw disasm_error(fmt::format("string reference not found at index {:04X}", inst.index)); } auto disassembler::disassemble_animtree(instruction& inst) -> void { auto const itr = anim_refs_.find(script_.pos()); if (itr != anim_refs_.end()) { inst.data.push_back(itr->second->name); } } auto disassembler::disassemble_animation(instruction& inst) -> void { inst.size += script_.align((ctx_->props() & props::size64) ? 8 : 4); auto const ref = script_.pos(); auto const itr = anim_refs_.find(ref); if (itr != anim_refs_.end()) { inst.data.push_back(itr->second->name); for (auto const& anim : itr->second->anims) { if (anim.ref == ref) { inst.data.push_back(anim.name); script_.seek((ctx_->props() & props::size64) ? 8 : 4); return; } } } throw disasm_error(fmt::format("animation reference not found at index {:04X}", inst.index)); } auto disassembler::disassemble_jump(instruction& inst) -> void { inst.size += script_.align(2); auto addr = u32{}; if (ctx_->props() & props::size64) addr = ((script_.read() + 1) & ~(1)) + script_.pos(); else addr = script_.read() + script_.pos(); auto const label = fmt::format("loc_{:X}", addr); inst.data.push_back(label); func_->labels.insert({ addr, label }); } auto disassembler::disassemble_switch(instruction& inst) -> void { inst.size += script_.align(4); auto const addr = script_.read() + script_.pos(); auto const label = fmt::format("loc_{:X}", addr); inst.data.push_back(label); func_->labels.insert({ addr, label }); } auto disassembler::disassemble_end_switch(instruction& inst) -> void { inst.size += script_.align(4); auto const itr = func_->labels.find(script_.pos()); if (itr != func_->labels.end()) { for (auto const& entry : func_->instructions) { if (entry->opcode == opcode::OP_Switch && entry->data[0] == itr->second) { auto const label = fmt::format("loc_{:X}", inst.index); entry->data[0] = label; func_->labels.erase(script_.pos()); if (!func_->labels.contains(inst.index)) { func_->labels.insert({ inst.index, label }); } break; } } } auto type = switch_type::none; auto const count = script_.read(); inst.data.push_back(fmt::format("{}", count)); for (auto i = 0u; i < count; i++) { if (ctx_->props() & props::size64) { const auto value = script_.read(); const auto str = string_refs_.find(script_.pos() - 4); if (str != string_refs_.end()) { type = switch_type::string; inst.data.push_back("case"); inst.data.push_back(str->second->name); } else if (value == 0 && i == count - 1) { inst.data.push_back("default"); } else { type = switch_type::integer; inst.data.push_back("case"); inst.data.push_back(fmt::format("{}", value)); } } else { auto const value = script_.read(); if (value == 0) { inst.data.push_back("default"); } else if (value < 0x40000) { type = switch_type::string; inst.data.push_back("case"); inst.data.push_back(string_refs_.at(script_.pos() - 2)->name); } else { type = switch_type::integer; inst.data.push_back("case"); inst.data.push_back(fmt::format("{}", (value - 0x800000) & 0xFFFFFF)); } } auto const addr = script_.read() + script_.pos(); auto const label = fmt::format("loc_{:X}", addr); inst.data.push_back(label); func_->labels.insert({ addr, label }); inst.size += 8; } inst.data.push_back(fmt::format("{}", static_cast>(type))); } } // namespace xsk::arc