2023-12-06 17:43:39 -05:00
|
|
|
// This file is part of AsmJit project <https://asmjit.com>
|
|
|
|
//
|
|
|
|
// See asmjit.h or LICENSE.md for license and copyright information
|
|
|
|
// SPDX-License-Identifier: Zlib
|
|
|
|
|
|
|
|
#include <asmjit/core.h>
|
|
|
|
|
|
|
|
#if !defined(ASMJIT_NO_AARCH64)
|
|
|
|
#include <asmjit/a64.h>
|
|
|
|
|
|
|
|
#include <limits>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "asmjit_test_perf.h"
|
|
|
|
|
|
|
|
using namespace asmjit;
|
|
|
|
|
|
|
|
// Generates a long sequence of GP instructions.
|
|
|
|
template<typename Emitter>
|
|
|
|
static void generateGpSequenceInternal(
|
|
|
|
Emitter& cc,
|
|
|
|
const a64::Gp& a, const a64::Gp& b, const a64::Gp& c, const a64::Gp& d) {
|
|
|
|
|
|
|
|
using namespace asmjit::a64;
|
|
|
|
|
|
|
|
Gp wA = a.w();
|
|
|
|
Gp wB = b.w();
|
|
|
|
Gp wC = c.w();
|
|
|
|
Gp wD = d.w();
|
|
|
|
|
|
|
|
Gp xA = a.x();
|
|
|
|
Gp xB = b.x();
|
|
|
|
Gp xC = c.x();
|
|
|
|
Gp xD = d.x();
|
|
|
|
|
|
|
|
Mem m = ptr(xD);
|
|
|
|
|
|
|
|
cc.mov(wA, 0);
|
|
|
|
cc.mov(wB, 1);
|
|
|
|
cc.mov(wC, 2);
|
|
|
|
cc.mov(wD, 3);
|
|
|
|
|
|
|
|
cc.adc(wA, wB, wC);
|
|
|
|
cc.adc(xA, xB, xC);
|
|
|
|
cc.adc(wA, wzr, wC);
|
|
|
|
cc.adc(xA, xzr, xC);
|
|
|
|
cc.adc(wzr, wB, wC);
|
|
|
|
cc.adc(xzr, xB, xC);
|
|
|
|
cc.adcs(wA, wB, wC);
|
|
|
|
cc.adcs(xA, xB, xC);
|
|
|
|
cc.add(wA, wB, wC);
|
|
|
|
cc.add(xA, xB, xC);
|
|
|
|
cc.add(wA, wB, wC, lsl(3));
|
|
|
|
cc.add(xA, xB, xC, lsl(3));
|
|
|
|
cc.add(wA, wzr, wC);
|
|
|
|
cc.add(xA, xzr, xC);
|
|
|
|
cc.add(wzr, wB, wC);
|
|
|
|
cc.add(xzr, xB, xC);
|
|
|
|
cc.add(wC, wD, 0, lsl(12));
|
|
|
|
cc.add(xC, xD, 0, lsl(12));
|
|
|
|
cc.add(wC, wD, 1024, lsl(12));
|
|
|
|
cc.add(xC, xD, 1024, lsl(12));
|
|
|
|
cc.add(wC, wD, 1024, lsl(12));
|
|
|
|
cc.add(xC, xD, 1024, lsl(12));
|
|
|
|
cc.adds(wA, wB, wC);
|
|
|
|
cc.adds(xA, xB, xC);
|
|
|
|
cc.adr(xA, 0);
|
|
|
|
cc.adr(xA, 256);
|
|
|
|
cc.adrp(xA, 4096);
|
|
|
|
cc.and_(wA, wB, wC);
|
|
|
|
cc.and_(xA, xB, xC);
|
|
|
|
cc.and_(wA, wB, 1);
|
|
|
|
cc.and_(xA, xB, 1);
|
|
|
|
cc.and_(wA, wB, 15);
|
|
|
|
cc.and_(xA, xB, 15);
|
|
|
|
cc.and_(wA, wzr, wC);
|
|
|
|
cc.and_(xA, xzr, xC);
|
|
|
|
cc.and_(wzr, wB, wC);
|
|
|
|
cc.and_(xzr, xB, xC);
|
|
|
|
cc.and_(wA, wB, 0x1);
|
|
|
|
cc.and_(xA, xB, 0x1);
|
|
|
|
cc.and_(wA, wB, 0xf);
|
|
|
|
cc.and_(xA, xB, 0xf);
|
|
|
|
cc.ands(wA, wB, wC);
|
|
|
|
cc.ands(xA, xB, xC);
|
|
|
|
cc.ands(wA, wzr, wC);
|
|
|
|
cc.ands(xA, xzr, xC);
|
|
|
|
cc.ands(wzr, wB, wC);
|
|
|
|
cc.ands(xzr, xB, xC);
|
|
|
|
cc.ands(wA, wB, 0x1);
|
|
|
|
cc.ands(xA, xB, 0x1);
|
|
|
|
cc.ands(wA, wB, 0xf);
|
|
|
|
cc.ands(xA, xB, 0xf);
|
|
|
|
cc.asr(wA, wB, 15);
|
|
|
|
cc.asr(xA, xB, 15);
|
|
|
|
cc.asrv(wA, wB, wC);
|
|
|
|
cc.asrv(xA, xB, xC);
|
|
|
|
cc.bfc(wA, 8, 16);
|
|
|
|
cc.bfc(xA, 8, 16);
|
|
|
|
cc.bfi(wA, wB, 8, 16);
|
|
|
|
cc.bfi(xA, xB, 8, 16);
|
|
|
|
cc.bfm(wA, wB, 8, 16);
|
|
|
|
cc.bfm(xA, xB, 8, 16);
|
|
|
|
cc.bfxil(wA, wB, 8, 16);
|
|
|
|
cc.bfxil(xA, xB, 8, 16);
|
|
|
|
cc.bic(wA, wB, wC, lsl(4));
|
|
|
|
cc.bic(xA, xB, xC, lsl(4));
|
|
|
|
cc.bic(wA, wzr, wC);
|
|
|
|
cc.bic(xA, xzr, xC);
|
|
|
|
cc.bics(wA, wB, wC, lsl(4));
|
|
|
|
cc.bics(xA, xB, xC, lsl(4));
|
|
|
|
cc.bics(wA, wzr, wC);
|
|
|
|
cc.bics(xA, xzr, xC);
|
|
|
|
cc.cas(wA, wB, m);
|
|
|
|
cc.cas(xA, xB, m);
|
|
|
|
cc.casa(wA, wB, m);
|
|
|
|
cc.casa(xA, xB, m);
|
|
|
|
cc.casab(wA, wB, m);
|
|
|
|
cc.casah(wA, wB, m);
|
|
|
|
cc.casal(wA, wB, m);
|
|
|
|
cc.casal(xA, xB, m);
|
|
|
|
cc.casalb(wA, wB, m);
|
|
|
|
cc.casalh(wA, wB, m);
|
|
|
|
cc.casb(wA, wB, m);
|
|
|
|
cc.cash(wA, wB, m);
|
|
|
|
cc.casl(wA, wB, m);
|
|
|
|
cc.casl(xA, xB, m);
|
|
|
|
cc.caslb(wA, wB, m);
|
|
|
|
cc.caslh(wA, wB, m);
|
|
|
|
cc.casp(wA, wB, wC, wD, m);
|
|
|
|
cc.casp(xA, xB, xC, xD, m);
|
|
|
|
cc.caspa(wA, wB, wC, wD, m);
|
|
|
|
cc.caspa(xA, xB, xC, xD, m);
|
|
|
|
cc.caspal(wA, wB, wC, wD, m);
|
|
|
|
cc.caspal(xA, xB, xC, xD, m);
|
|
|
|
cc.caspl(wA, wB, wC, wD, m);
|
|
|
|
cc.caspl(xA, xB, xC, xD, m);
|
|
|
|
cc.ccmn(wA, wB, 3, CondCode::kEQ);
|
|
|
|
cc.ccmn(xA, xB, 3, CondCode::kEQ);
|
|
|
|
cc.ccmn(wA, 2, 3, CondCode::kEQ);
|
|
|
|
cc.ccmn(xA, 2, 3, CondCode::kEQ);
|
|
|
|
cc.ccmn(wA, wzr, 3, CondCode::kEQ);
|
|
|
|
cc.ccmn(xA, xzr, 3, CondCode::kEQ);
|
|
|
|
cc.ccmp(wA, wB, 3, CondCode::kEQ);
|
|
|
|
cc.ccmp(xA, xB, 3, CondCode::kEQ);
|
|
|
|
cc.ccmp(wA, 2, 3, CondCode::kEQ);
|
|
|
|
cc.ccmp(xA, 2, 3, CondCode::kEQ);
|
|
|
|
cc.ccmp(wA, wzr, 3, CondCode::kEQ);
|
|
|
|
cc.ccmp(xA, xzr, 3, CondCode::kEQ);
|
|
|
|
cc.cinc(wA, wB, CondCode::kEQ);
|
|
|
|
cc.cinc(xA, xB, CondCode::kEQ);
|
|
|
|
cc.cinc(wzr, wB, CondCode::kEQ);
|
|
|
|
cc.cinc(wA, wzr, CondCode::kEQ);
|
|
|
|
cc.cinc(xzr, xB, CondCode::kEQ);
|
|
|
|
cc.cinc(xA, xzr, CondCode::kEQ);
|
|
|
|
cc.cinv(wA, wB, CondCode::kEQ);
|
|
|
|
cc.cinv(xA, xB, CondCode::kEQ);
|
|
|
|
cc.cinv(wzr, wB, CondCode::kEQ);
|
|
|
|
cc.cinv(wA, wzr, CondCode::kEQ);
|
|
|
|
cc.cinv(xzr, xB, CondCode::kEQ);
|
|
|
|
cc.cinv(xA, xzr, CondCode::kEQ);
|
|
|
|
cc.cls(wA, wB);
|
|
|
|
cc.cls(xA, xB);
|
|
|
|
cc.cls(wA, wzr);
|
|
|
|
cc.cls(xA, xzr);
|
|
|
|
cc.cls(wzr, wB);
|
|
|
|
cc.cls(xzr, xB);
|
|
|
|
cc.clz(wA, wB);
|
|
|
|
cc.clz(xA, xB);
|
|
|
|
cc.clz(wA, wzr);
|
|
|
|
cc.clz(xA, xzr);
|
|
|
|
cc.clz(wzr, wB);
|
|
|
|
cc.clz(xzr, xB);
|
|
|
|
cc.cmn(wA, 33);
|
|
|
|
cc.cmn(xA, 33);
|
|
|
|
cc.cmn(wA, wB);
|
|
|
|
cc.cmn(xA, xB);
|
|
|
|
cc.cmn(wA, wB, uxtb(2));
|
|
|
|
cc.cmn(xA, xB, uxtb(2));
|
|
|
|
cc.cmp(wA, 33);
|
|
|
|
cc.cmp(xA, 33);
|
|
|
|
cc.cmp(wA, wB);
|
|
|
|
cc.cmp(xA, xB);
|
|
|
|
cc.cmp(wA, wB, uxtb(2));
|
|
|
|
cc.cmp(xA, xB, uxtb(2));
|
|
|
|
cc.crc32b(wA, wB, wC);
|
|
|
|
cc.crc32b(wzr, wB, wC);
|
|
|
|
cc.crc32b(wA, wzr, wC);
|
|
|
|
cc.crc32b(wA, wB, wzr);
|
|
|
|
cc.crc32cb(wA, wB, wC);
|
|
|
|
cc.crc32cb(wzr, wB, wC);
|
|
|
|
cc.crc32cb(wA, wzr, wC);
|
|
|
|
cc.crc32cb(wA, wB, wzr);
|
|
|
|
cc.crc32ch(wA, wB, wC);
|
|
|
|
cc.crc32ch(wzr, wB, wC);
|
|
|
|
cc.crc32ch(wA, wzr, wC);
|
|
|
|
cc.crc32ch(wA, wB, wzr);
|
|
|
|
cc.crc32cw(wA, wB, wC);
|
|
|
|
cc.crc32cw(wzr, wB, wC);
|
|
|
|
cc.crc32cw(wA, wzr, wC);
|
|
|
|
cc.crc32cw(wA, wB, wzr);
|
|
|
|
cc.crc32cx(wA, wB, xC);
|
|
|
|
cc.crc32cx(wzr, wB, xC);
|
|
|
|
cc.crc32cx(wA, wzr, xC);
|
|
|
|
cc.crc32cx(wA, wB, xzr);
|
|
|
|
cc.crc32h(wA, wB, wC);
|
|
|
|
cc.crc32h(wzr, wB, wC);
|
|
|
|
cc.crc32h(wA, wzr, wC);
|
|
|
|
cc.crc32h(wA, wB, wzr);
|
|
|
|
cc.crc32w(wA, wB, wC);
|
|
|
|
cc.crc32w(wzr, wB, wC);
|
|
|
|
cc.crc32w(wA, wzr, wC);
|
|
|
|
cc.crc32w(wA, wB, wzr);
|
|
|
|
cc.crc32x(wA, wB, xC);
|
|
|
|
cc.crc32x(wzr, wB, xC);
|
|
|
|
cc.crc32x(wA, wzr, xC);
|
|
|
|
cc.crc32x(wA, wB, xzr);
|
|
|
|
cc.csel(wA, wB, wC, CondCode::kEQ);
|
|
|
|
cc.csel(xA, xB, xC, CondCode::kEQ);
|
|
|
|
cc.cset(wA, CondCode::kEQ);
|
|
|
|
cc.cset(xA, CondCode::kEQ);
|
|
|
|
cc.cset(wA, CondCode::kEQ);
|
|
|
|
cc.cset(xA, CondCode::kEQ);
|
|
|
|
cc.csetm(wA, CondCode::kEQ);
|
|
|
|
cc.csetm(xA, CondCode::kEQ);
|
|
|
|
cc.csinc(wA, wB, wC, CondCode::kEQ);
|
|
|
|
cc.csinc(xA, xB, xC, CondCode::kEQ);
|
|
|
|
cc.csinv(wA, wB, wC, CondCode::kEQ);
|
|
|
|
cc.csinv(xA, xB, xC, CondCode::kEQ);
|
|
|
|
cc.csneg(wA, wB, wC, CondCode::kEQ);
|
|
|
|
cc.csneg(xA, xB, xC, CondCode::kEQ);
|
|
|
|
cc.eon(wA, wB, wC);
|
|
|
|
cc.eon(wzr, wB, wC);
|
|
|
|
cc.eon(wA, wzr, wC);
|
|
|
|
cc.eon(wA, wB, wzr);
|
|
|
|
cc.eon(wA, wB, wC, lsl(4));
|
|
|
|
cc.eon(xA, xB, xC);
|
|
|
|
cc.eon(xzr, xB, xC);
|
|
|
|
cc.eon(xA, xzr, xC);
|
|
|
|
cc.eon(xA, xB, xzr);
|
|
|
|
cc.eon(xA, xB, xC, lsl(4));
|
|
|
|
cc.eor(wA, wB, wC);
|
|
|
|
cc.eor(wzr, wB, wC);
|
|
|
|
cc.eor(wA, wzr, wC);
|
|
|
|
cc.eor(wA, wB, wzr);
|
|
|
|
cc.eor(xA, xB, xC);
|
|
|
|
cc.eor(xzr, xB, xC);
|
|
|
|
cc.eor(xA, xzr, xC);
|
|
|
|
cc.eor(xA, xB, xzr);
|
|
|
|
cc.eor(wA, wB, wC, lsl(4));
|
|
|
|
cc.eor(xA, xB, xC, lsl(4));
|
|
|
|
cc.eor(wA, wB, 0x4000);
|
|
|
|
cc.eor(xA, xB, 0x8000);
|
|
|
|
cc.extr(wA, wB, wC, 15);
|
|
|
|
cc.extr(wzr, wB, wC, 15);
|
|
|
|
cc.extr(wA, wzr, wC, 15);
|
|
|
|
cc.extr(wA, wB, wzr, 15);
|
|
|
|
cc.extr(xA, xB, xC, 15);
|
|
|
|
cc.extr(xzr, xB, xC, 15);
|
|
|
|
cc.extr(xA, xzr, xC, 15);
|
|
|
|
cc.extr(xA, xB, xzr, 15);
|
|
|
|
cc.ldadd(wA, wB, m);
|
|
|
|
cc.ldadd(xA, xB, m);
|
|
|
|
cc.ldadda(wA, wB, m);
|
|
|
|
cc.ldadda(xA, xB, m);
|
|
|
|
cc.ldaddab(wA, wB, m);
|
|
|
|
cc.ldaddah(wA, wB, m);
|
|
|
|
cc.ldaddal(wA, wB, m);
|
|
|
|
cc.ldaddal(xA, xB, m);
|
|
|
|
cc.ldaddalb(wA, wB, m);
|
|
|
|
cc.ldaddalh(wA, wB, m);
|
|
|
|
cc.ldaddb(wA, wB, m);
|
|
|
|
cc.ldaddh(wA, wB, m);
|
|
|
|
cc.ldaddl(wA, wB, m);
|
|
|
|
cc.ldaddl(xA, xB, m);
|
|
|
|
cc.ldaddlb(wA, wB, m);
|
|
|
|
cc.ldaddlh(wA, wB, m);
|
|
|
|
cc.ldclr(wA, wB, m);
|
|
|
|
cc.ldclr(xA, xB, m);
|
|
|
|
cc.ldclra(wA, wB, m);
|
|
|
|
cc.ldclra(xA, xB, m);
|
|
|
|
cc.ldclrab(wA, wB, m);
|
|
|
|
cc.ldclrah(wA, wB, m);
|
|
|
|
cc.ldclral(wA, wB, m);
|
|
|
|
cc.ldclral(xA, xB, m);
|
|
|
|
cc.ldclralb(wA, wB, m);
|
|
|
|
cc.ldclralh(wA, wB, m);
|
|
|
|
cc.ldclrb(wA, wB, m);
|
|
|
|
cc.ldclrh(wA, wB, m);
|
|
|
|
cc.ldclrl(wA, wB, m);
|
|
|
|
cc.ldclrl(xA, xB, m);
|
|
|
|
cc.ldclrlb(wA, wB, m);
|
|
|
|
cc.ldclrlh(wA, wB, m);
|
|
|
|
cc.ldeor(wA, wB, m);
|
|
|
|
cc.ldeor(xA, xB, m);
|
|
|
|
cc.ldeora(wA, wB, m);
|
|
|
|
cc.ldeora(xA, xB, m);
|
|
|
|
cc.ldeorab(wA, wB, m);
|
|
|
|
cc.ldeorah(wA, wB, m);
|
|
|
|
cc.ldeoral(wA, wB, m);
|
|
|
|
cc.ldeoral(xA, xB, m);
|
|
|
|
cc.ldeoralb(wA, wB, m);
|
|
|
|
cc.ldeoralh(wA, wB, m);
|
|
|
|
cc.ldeorb(wA, wB, m);
|
|
|
|
cc.ldeorh(wA, wB, m);
|
|
|
|
cc.ldeorl(wA, wB, m);
|
|
|
|
cc.ldeorl(xA, xB, m);
|
|
|
|
cc.ldeorlb(wA, wB, m);
|
|
|
|
cc.ldeorlh(wA, wB, m);
|
|
|
|
cc.ldlar(wA, m);
|
|
|
|
cc.ldlar(xA, m);
|
|
|
|
cc.ldlarb(wA, m);
|
|
|
|
cc.ldlarh(wA, m);
|
|
|
|
cc.ldnp(wA, wB, m);
|
|
|
|
cc.ldnp(xA, xB, m);
|
|
|
|
cc.ldp(wA, wB, m);
|
|
|
|
cc.ldp(xA, xB, m);
|
|
|
|
cc.ldpsw(xA, xB, m);
|
|
|
|
cc.ldr(wA, m);
|
|
|
|
cc.ldr(xA, m);
|
|
|
|
cc.ldrb(wA, m);
|
|
|
|
cc.ldrh(wA, m);
|
|
|
|
cc.ldrsw(xA, m);
|
|
|
|
cc.ldraa(xA, m);
|
|
|
|
cc.ldrab(xA, m);
|
|
|
|
cc.ldset(wA, wB, m);
|
|
|
|
cc.ldset(xA, xB, m);
|
|
|
|
cc.ldseta(wA, wB, m);
|
|
|
|
cc.ldseta(xA, xB, m);
|
|
|
|
cc.ldsetab(wA, wB, m);
|
|
|
|
cc.ldsetah(wA, wB, m);
|
|
|
|
cc.ldsetal(wA, wB, m);
|
|
|
|
cc.ldsetal(xA, xB, m);
|
|
|
|
cc.ldsetalh(wA, wB, m);
|
|
|
|
cc.ldsetalb(wA, wB, m);
|
|
|
|
cc.ldsetb(wA, wB, m);
|
|
|
|
cc.ldseth(wA, wB, m);
|
|
|
|
cc.ldsetl(wA, wB, m);
|
|
|
|
cc.ldsetl(xA, xB, m);
|
|
|
|
cc.ldsetlb(wA, wB, m);
|
|
|
|
cc.ldsetlh(wA, wB, m);
|
|
|
|
cc.ldsmax(wA, wB, m);
|
|
|
|
cc.ldsmax(xA, xB, m);
|
|
|
|
cc.ldsmaxa(wA, wB, m);
|
|
|
|
cc.ldsmaxa(xA, xB, m);
|
|
|
|
cc.ldsmaxab(wA, wB, m);
|
|
|
|
cc.ldsmaxah(wA, wB, m);
|
|
|
|
cc.ldsmaxal(wA, wB, m);
|
|
|
|
cc.ldsmaxal(xA, xB, m);
|
|
|
|
cc.ldsmaxalb(wA, wB, m);
|
|
|
|
cc.ldsmaxalh(wA, wB, m);
|
|
|
|
cc.ldsmaxb(wA, wB, m);
|
|
|
|
cc.ldsmaxh(wA, wB, m);
|
|
|
|
cc.ldsmaxl(wA, wB, m);
|
|
|
|
cc.ldsmaxl(xA, xB, m);
|
|
|
|
cc.ldsmaxlb(wA, wB, m);
|
|
|
|
cc.ldsmaxlh(wA, wB, m);
|
|
|
|
cc.ldsmin(wA, wB, m);
|
|
|
|
cc.ldsmin(xA, xB, m);
|
|
|
|
cc.ldsmina(wA, wB, m);
|
|
|
|
cc.ldsmina(xA, xB, m);
|
|
|
|
cc.ldsminab(wA, wB, m);
|
|
|
|
cc.ldsminah(wA, wB, m);
|
|
|
|
cc.ldsminal(wA, wB, m);
|
|
|
|
cc.ldsminal(xA, xB, m);
|
|
|
|
cc.ldsminalb(wA, wB, m);
|
|
|
|
cc.ldsminalh(wA, wB, m);
|
|
|
|
cc.ldsminb(wA, wB, m);
|
|
|
|
cc.ldsminh(wA, wB, m);
|
|
|
|
cc.ldsminl(wA, wB, m);
|
|
|
|
cc.ldsminl(xA, xB, m);
|
|
|
|
cc.ldsminlb(wA, wB, m);
|
|
|
|
cc.ldsminlh(wA, wB, m);
|
|
|
|
cc.ldtr(wA, m);
|
|
|
|
cc.ldtr(xA, m);
|
|
|
|
cc.ldtrb(wA, m);
|
|
|
|
cc.ldtrh(wA, m);
|
|
|
|
cc.ldtrsb(wA, m);
|
|
|
|
cc.ldtrsh(wA, m);
|
|
|
|
cc.ldtrsw(xA, m);
|
|
|
|
cc.ldumax(wA, wB, m);
|
|
|
|
cc.ldumax(xA, xB, m);
|
|
|
|
cc.ldumaxa(wA, wB, m);
|
|
|
|
cc.ldumaxa(xA, xB, m);
|
|
|
|
cc.ldumaxab(wA, wB, m);
|
|
|
|
cc.ldumaxah(wA, wB, m);
|
|
|
|
cc.ldumaxal(wA, wB, m);
|
|
|
|
cc.ldumaxal(xA, xB, m);
|
|
|
|
cc.ldumaxalb(wA, wB, m);
|
|
|
|
cc.ldumaxalh(wA, wB, m);
|
|
|
|
cc.ldumaxb(wA, wB, m);
|
|
|
|
cc.ldumaxh(wA, wB, m);
|
|
|
|
cc.ldumaxl(wA, wB, m);
|
|
|
|
cc.ldumaxl(xA, xB, m);
|
|
|
|
cc.ldumaxlb(wA, wB, m);
|
|
|
|
cc.ldumaxlh(wA, wB, m);
|
|
|
|
cc.ldumin(wA, wB, m);
|
|
|
|
cc.ldumin(xA, xB, m);
|
|
|
|
cc.ldumina(wA, wB, m);
|
|
|
|
cc.ldumina(xA, xB, m);
|
|
|
|
cc.lduminab(wA, wB, m);
|
|
|
|
cc.lduminah(wA, wB, m);
|
|
|
|
cc.lduminal(wA, wB, m);
|
|
|
|
cc.lduminal(xA, xB, m);
|
|
|
|
cc.lduminalb(wA, wB, m);
|
|
|
|
cc.lduminalh(wA, wB, m);
|
|
|
|
cc.lduminb(wA, wB, m);
|
|
|
|
cc.lduminh(wA, wB, m);
|
|
|
|
cc.lduminl(wA, wB, m);
|
|
|
|
cc.lduminl(xA, xB, m);
|
|
|
|
cc.lduminlb(wA, wB, m);
|
|
|
|
cc.lduminlh(wA, wB, m);
|
|
|
|
cc.ldur(wA, m);
|
|
|
|
cc.ldur(xA, m);
|
|
|
|
cc.ldurb(wA, m);
|
|
|
|
cc.ldurh(wA, m);
|
|
|
|
cc.ldursb(wA, m);
|
|
|
|
cc.ldursh(wA, m);
|
|
|
|
cc.ldursw(xA, m);
|
|
|
|
cc.ldxp(wA, wB, m);
|
|
|
|
cc.ldxp(xA, xB, m);
|
|
|
|
cc.ldxr(wA, m);
|
|
|
|
cc.ldxr(xA, m);
|
|
|
|
cc.ldxrb(wA, m);
|
|
|
|
cc.ldxrh(wA, m);
|
|
|
|
cc.lsl(wA, wB, wC);
|
|
|
|
cc.lsl(xA, xB, xC);
|
|
|
|
cc.lsl(wA, wB, 15);
|
|
|
|
cc.lsl(xA, xB, 15);
|
|
|
|
cc.lslv(wA, wB, wC);
|
|
|
|
cc.lslv(xA, xB, xC);
|
|
|
|
cc.lsr(wA, wB, wC);
|
|
|
|
cc.lsr(xA, xB, xC);
|
|
|
|
cc.lsr(wA, wB, 15);
|
|
|
|
cc.lsr(xA, xB, 15);
|
|
|
|
cc.lsrv(wA, wB, wC);
|
|
|
|
cc.lsrv(xA, xB, xC);
|
|
|
|
cc.madd(wA, wB, wC, wD);
|
|
|
|
cc.madd(xA, xB, xC, xD);
|
|
|
|
cc.mneg(wA, wB, wC);
|
|
|
|
cc.mneg(xA, xB, xC);
|
|
|
|
cc.mov(wA, wB);
|
|
|
|
cc.mov(xA, xB);
|
|
|
|
cc.mov(wA, 0);
|
|
|
|
cc.mov(wA, 1);
|
|
|
|
cc.mov(wA, 2);
|
|
|
|
cc.mov(wA, 3);
|
|
|
|
cc.mov(wA, 4);
|
|
|
|
cc.mov(wA, 5);
|
|
|
|
cc.mov(wA, 6);
|
|
|
|
cc.mov(wA, 7);
|
|
|
|
cc.mov(wA, 8);
|
|
|
|
cc.mov(wA, 9);
|
|
|
|
cc.mov(wA, 10);
|
|
|
|
cc.mov(wA, 0xA234);
|
|
|
|
cc.mov(xA, 0xA23400000000);
|
|
|
|
cc.msub(wA, wB, wC, wD);
|
|
|
|
cc.msub(xA, xB, xC, xD);
|
|
|
|
cc.mul(wA, wB, wC);
|
|
|
|
cc.mul(xA, xB, xC);
|
|
|
|
cc.mvn(wA, wB);
|
|
|
|
cc.mvn(xA, xB);
|
|
|
|
cc.mvn(wA, wB, lsl(4));
|
|
|
|
cc.mvn(xA, xB, lsl(4));
|
|
|
|
cc.neg(wA, wB);
|
|
|
|
cc.neg(xA, xB);
|
|
|
|
cc.neg(wA, wB, lsl(4));
|
|
|
|
cc.neg(xA, xB, lsl(4));
|
|
|
|
cc.negs(wA, wB);
|
|
|
|
cc.negs(xA, xB);
|
|
|
|
cc.negs(wA, wB, lsl(4));
|
|
|
|
cc.negs(xA, xB, lsl(4));
|
|
|
|
cc.ngc(wA, wB);
|
|
|
|
cc.ngc(xA, xB);
|
|
|
|
cc.ngcs(wA, wB);
|
|
|
|
cc.ngcs(xA, xB);
|
|
|
|
cc.orn(wA, wB, wC);
|
|
|
|
cc.orn(xA, xB, xC);
|
|
|
|
cc.orn(wA, wB, wC, lsl(4));
|
|
|
|
cc.orn(xA, xB, xC, lsl(4));
|
|
|
|
cc.orr(wA, wB, wC);
|
|
|
|
cc.orr(xA, xB, xC);
|
|
|
|
cc.orr(wA, wB, wC, lsl(4));
|
|
|
|
cc.orr(xA, xB, xC, lsl(4));
|
|
|
|
cc.orr(wA, wB, 0x4000);
|
|
|
|
cc.orr(xA, xB, 0x8000);
|
|
|
|
cc.rbit(wA, wB);
|
|
|
|
cc.rbit(xA, xB);
|
|
|
|
cc.rev(wA, wB);
|
|
|
|
cc.rev(xA, xB);
|
|
|
|
cc.rev16(wA, wB);
|
|
|
|
cc.rev16(xA, xB);
|
|
|
|
cc.rev32(xA, xB);
|
|
|
|
cc.rev64(xA, xB);
|
|
|
|
cc.ror(wA, wB, wC);
|
|
|
|
cc.ror(xA, xB, xC);
|
|
|
|
cc.ror(wA, wB, 15);
|
|
|
|
cc.ror(xA, xB, 15);
|
|
|
|
cc.rorv(wA, wB, wC);
|
|
|
|
cc.rorv(xA, xB, xC);
|
|
|
|
cc.sbc(wA, wB, wC);
|
|
|
|
cc.sbc(xA, xB, xC);
|
|
|
|
cc.sbcs(wA, wB, wC);
|
|
|
|
cc.sbcs(xA, xB, xC);
|
|
|
|
cc.sbfiz(wA, wB, 5, 10);
|
|
|
|
cc.sbfiz(xA, xB, 5, 10);
|
|
|
|
cc.sbfm(wA, wB, 5, 10);
|
|
|
|
cc.sbfm(xA, xB, 5, 10);
|
|
|
|
cc.sbfx(wA, wB, 5, 10);
|
|
|
|
cc.sbfx(xA, xB, 5, 10);
|
|
|
|
cc.sdiv(wA, wB, wC);
|
|
|
|
cc.sdiv(xA, xB, xC);
|
|
|
|
cc.smaddl(xA, wB, wC, xD);
|
|
|
|
cc.smnegl(xA, wB, wC);
|
|
|
|
cc.smsubl(xA, wB, wC, xD);
|
|
|
|
cc.smulh(xA, xB, xC);
|
|
|
|
cc.smull(xA, wB, wC);
|
|
|
|
cc.stp(wA, wB, m);
|
|
|
|
cc.stp(xA, xB, m);
|
|
|
|
cc.sttr(wA, m);
|
|
|
|
cc.sttr(xA, m);
|
|
|
|
cc.sttrb(wA, m);
|
|
|
|
cc.sttrh(wA, m);
|
|
|
|
cc.stur(wA, m);
|
|
|
|
cc.stur(xA, m);
|
|
|
|
cc.sturb(wA, m);
|
|
|
|
cc.sturh(wA, m);
|
|
|
|
cc.stxp(wA, wB, wC, m);
|
|
|
|
cc.stxp(wA, xB, xC, m);
|
|
|
|
cc.stxr(wA, wB, m);
|
|
|
|
cc.stxr(wA, xB, m);
|
|
|
|
cc.stxrb(wA, wB, m);
|
|
|
|
cc.stxrh(wA, wB, m);
|
|
|
|
cc.sub(wA, wB, wC);
|
|
|
|
cc.sub(xA, xB, xC);
|
|
|
|
cc.sub(wA, wB, wC, lsl(3));
|
|
|
|
cc.sub(xA, xB, xC, lsl(3));
|
|
|
|
cc.subg(xA, xB, 32, 11);
|
|
|
|
cc.subp(xA, xB, xC);
|
|
|
|
cc.subps(xA, xB, xC);
|
|
|
|
cc.subs(wA, wB, wC);
|
|
|
|
cc.subs(xA, xB, xC);
|
|
|
|
cc.subs(wA, wB, wC, lsl(3));
|
|
|
|
cc.subs(xA, xB, xC, lsl(3));
|
|
|
|
cc.sxtb(wA, wB);
|
|
|
|
cc.sxtb(xA, wB);
|
|
|
|
cc.sxth(wA, wB);
|
|
|
|
cc.sxth(xA, wB);
|
|
|
|
cc.sxtw(xA, wB);
|
|
|
|
cc.tst(wA, 1);
|
|
|
|
cc.tst(xA, 1);
|
|
|
|
cc.tst(wA, wB);
|
|
|
|
cc.tst(xA, xB);
|
|
|
|
cc.tst(wA, wB, lsl(4));
|
|
|
|
cc.tst(xA, xB, lsl(4));
|
|
|
|
cc.udiv(wA, wB, wC);
|
|
|
|
cc.udiv(xA, xB, xC);
|
|
|
|
cc.ubfiz(wA, wB, 5, 10);
|
|
|
|
cc.ubfiz(xA, xB, 5, 10);
|
|
|
|
cc.ubfm(wA, wB, 5, 10);
|
|
|
|
cc.ubfm(xA, xB, 5, 10);
|
|
|
|
cc.ubfx(wA, wB, 5, 10);
|
|
|
|
cc.ubfx(xA, xB, 5, 10);
|
|
|
|
cc.umaddl(xA, wB, wC, xD);
|
|
|
|
cc.umnegl(xA, wB, wC);
|
|
|
|
cc.umsubl(xA, wB, wC, xD);
|
|
|
|
cc.umulh(xA, xB, xC);
|
|
|
|
cc.umull(xA, wB, wC);
|
|
|
|
cc.uxtb(wA, wB);
|
|
|
|
cc.uxth(wA, wB);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void generateGpSequence(BaseEmitter& emitter, bool emitPrologEpilog) {
|
|
|
|
if (emitter.isAssembler()) {
|
|
|
|
a64::Assembler& cc = *emitter.as<a64::Assembler>();
|
|
|
|
|
|
|
|
a64::Gp a = a64::x0;
|
|
|
|
a64::Gp b = a64::x1;
|
|
|
|
a64::Gp c = a64::x2;
|
|
|
|
a64::Gp d = a64::x3;
|
|
|
|
|
|
|
|
if (emitPrologEpilog) {
|
|
|
|
FuncDetail func;
|
2024-06-17 19:04:36 -04:00
|
|
|
func.init(FuncSignature::build<void, void*, const void*, size_t>(), cc.environment());
|
2023-12-06 17:43:39 -05:00
|
|
|
|
|
|
|
FuncFrame frame;
|
|
|
|
frame.init(func);
|
|
|
|
frame.addDirtyRegs(a, b, c, d);
|
|
|
|
frame.finalize();
|
|
|
|
|
|
|
|
cc.emitProlog(frame);
|
|
|
|
generateGpSequenceInternal(cc, a, b, c, d);
|
|
|
|
cc.emitEpilog(frame);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
generateGpSequenceInternal(cc, a, b, c, d);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#ifndef ASMJIT_NO_BUILDER
|
|
|
|
else if (emitter.isBuilder()) {
|
|
|
|
a64::Builder& cc = *emitter.as<a64::Builder>();
|
|
|
|
|
|
|
|
a64::Gp a = a64::x0;
|
|
|
|
a64::Gp b = a64::x1;
|
|
|
|
a64::Gp c = a64::x2;
|
|
|
|
a64::Gp d = a64::x3;
|
|
|
|
|
|
|
|
if (emitPrologEpilog) {
|
|
|
|
FuncDetail func;
|
2024-06-17 19:04:36 -04:00
|
|
|
func.init(FuncSignature::build<void, void*, const void*, size_t>(), cc.environment());
|
2023-12-06 17:43:39 -05:00
|
|
|
|
|
|
|
FuncFrame frame;
|
|
|
|
frame.init(func);
|
|
|
|
frame.addDirtyRegs(a, b, c, d);
|
|
|
|
frame.finalize();
|
|
|
|
|
|
|
|
cc.emitProlog(frame);
|
|
|
|
generateGpSequenceInternal(cc, a, b, c, d);
|
|
|
|
cc.emitEpilog(frame);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
generateGpSequenceInternal(cc, a, b, c, d);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifndef ASMJIT_NO_COMPILER
|
|
|
|
else if (emitter.isCompiler()) {
|
|
|
|
a64::Compiler& cc = *emitter.as<a64::Compiler>();
|
|
|
|
|
|
|
|
a64::Gp a = cc.newIntPtr("a");
|
|
|
|
a64::Gp b = cc.newIntPtr("b");
|
|
|
|
a64::Gp c = cc.newIntPtr("c");
|
|
|
|
a64::Gp d = cc.newIntPtr("d");
|
|
|
|
|
2024-06-17 19:04:36 -04:00
|
|
|
cc.addFunc(FuncSignature::build<void>());
|
2023-12-06 17:43:39 -05:00
|
|
|
generateGpSequenceInternal(cc, a, b, c, d);
|
|
|
|
cc.endFunc();
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename EmitterFn>
|
|
|
|
static void benchmarkA64Function(Arch arch, uint32_t numIterations, const char* description, const EmitterFn& emitterFn) noexcept {
|
|
|
|
CodeHolder code;
|
|
|
|
printf("%s:\n", description);
|
|
|
|
|
|
|
|
uint32_t instCount = 0;
|
|
|
|
|
|
|
|
#ifndef ASMJIT_NO_BUILDER
|
|
|
|
instCount = asmjit_perf_utils::calculateInstructionCount<a64::Builder>(code, arch, [&](a64::Builder& cc) {
|
|
|
|
emitterFn(cc, false);
|
|
|
|
});
|
|
|
|
#endif
|
|
|
|
|
|
|
|
asmjit_perf_utils::bench<a64::Assembler>(code, arch, numIterations, "[raw]", instCount, [&](a64::Assembler& cc) {
|
|
|
|
emitterFn(cc, false);
|
|
|
|
});
|
|
|
|
|
|
|
|
asmjit_perf_utils::bench<a64::Assembler>(code, arch, numIterations, "[validated]", instCount, [&](a64::Assembler& cc) {
|
|
|
|
cc.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler);
|
|
|
|
emitterFn(cc, false);
|
|
|
|
});
|
|
|
|
|
|
|
|
asmjit_perf_utils::bench<a64::Assembler>(code, arch, numIterations, "[prolog/epilog]", instCount, [&](a64::Assembler& cc) {
|
|
|
|
cc.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler);
|
|
|
|
emitterFn(cc, true);
|
|
|
|
});
|
|
|
|
|
|
|
|
#ifndef ASMJIT_NO_BUILDER
|
|
|
|
asmjit_perf_utils::bench<a64::Builder>(code, arch, numIterations, "[no-asm]", instCount, [&](a64::Builder& cc) {
|
|
|
|
emitterFn(cc, false);
|
|
|
|
});
|
|
|
|
|
|
|
|
asmjit_perf_utils::bench<a64::Builder>(code, arch, numIterations, "[finalized]", instCount, [&](a64::Builder& cc) {
|
|
|
|
emitterFn(cc, false);
|
|
|
|
cc.finalize();
|
|
|
|
});
|
|
|
|
|
|
|
|
asmjit_perf_utils::bench<a64::Builder>(code, arch, numIterations, "[prolog/epilog]", instCount, [&](a64::Builder& cc) {
|
|
|
|
emitterFn(cc, true);
|
|
|
|
cc.finalize();
|
|
|
|
});
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef ASMJIT_NO_COMPILER
|
|
|
|
asmjit_perf_utils::bench<a64::Compiler>(code, arch, numIterations, "[no-asm]", instCount, [&](a64::Compiler& cc) {
|
|
|
|
emitterFn(cc, true);
|
|
|
|
});
|
|
|
|
|
|
|
|
asmjit_perf_utils::bench<a64::Compiler>(code, arch, numIterations, "[finalized]", instCount, [&](a64::Compiler& cc) {
|
|
|
|
emitterFn(cc, true);
|
|
|
|
cc.finalize();
|
|
|
|
});
|
|
|
|
#endif
|
|
|
|
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
void benchmarkA64Emitters(uint32_t numIterations) {
|
|
|
|
static const char description[] = "GpSequence (Sequence of GP instructions - reg/mem)";
|
|
|
|
benchmarkA64Function(Arch::kAArch64, numIterations, description, [](BaseEmitter& emitter, bool emitPrologEpilog) {
|
|
|
|
generateGpSequence(emitter, emitPrologEpilog);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // !ASMJIT_NO_AARCH64
|