www.llvm.org/doxygen/AArch64LegalizerInfo_8cpp_source.html

//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

/// \file

/// This file implements the targeting of the Machinelegalizer class for

/// AArch64.

/// \todo This should be generated by TableGen.

//===----------------------------------------------------------------------===//


#include "AArch64LegalizerInfo.h"

#include "AArch64Subtarget.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"

#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"

#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"

#include "llvm/CodeGen/GlobalISel/Utils.h"

#include "llvm/CodeGen/MachineInstr.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/TargetOpcodes.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/IR/IntrinsicsAArch64.h"

#include "llvm/IR/Type.h"

#include "llvm/Support/MathExtras.h"

#include <initializer_list>


#define DEBUG_TYPE "aarch64-legalinfo"


using namespace llvm;

using namespace LegalizeActions;

using namespace LegalizeMutations;

using namespace LegalityPredicates;

using namespace MIPatternMatch;


AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)

    : ST(&ST) {

  using namespace TargetOpcode;

  const LLT p0 = LLT::pointer(0, 64);

  const LLT s8 = LLT::scalar(8);

  const LLT s16 = LLT::scalar(16);

  const LLT s32 = LLT::scalar(32);

  const LLT s64 = LLT::scalar(64);

  const LLT s128 = LLT::scalar(128);

  const LLT v16s8 = LLT::fixed_vector(16, 8);

  const LLT v8s8 = LLT::fixed_vector(8, 8);

  const LLT v4s8 = LLT::fixed_vector(4, 8);

  const LLT v2s8 = LLT::fixed_vector(2, 8);

  const LLT v8s16 = LLT::fixed_vector(8, 16);

  const LLT v4s16 = LLT::fixed_vector(4, 16);

  const LLT v2s16 = LLT::fixed_vector(2, 16);

  const LLT v2s32 = LLT::fixed_vector(2, 32);

  const LLT v4s32 = LLT::fixed_vector(4, 32);

  const LLT v2s64 = LLT::fixed_vector(2, 64);

  const LLT v2p0 = LLT::fixed_vector(2, p0);


  const LLT nxv16s8 = LLT::scalable_vector(16, s8);

  const LLT nxv8s16 = LLT::scalable_vector(8, s16);

  const LLT nxv4s32 = LLT::scalable_vector(4, s32);

  const LLT nxv2s64 = LLT::scalable_vector(2, s64);


  std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */

                                                        v16s8, v8s16, v4s32,

                                                        v2s64, v2p0,

                                                        /* End 128bit types */

                                                        /* Begin 64bit types */

                                                        v8s8, v4s16, v2s32};

  std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};

  SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);

  SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);


  const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();


  // FIXME: support subtargets which have neon/fp-armv8 disabled.

  if (!ST.hasNEON() || !ST.hasFPARMv8()) {

    getLegacyLegalizerInfo().computeTables();

    return;

  }


  // Some instructions only support s16 if the subtarget has full 16-bit FP

  // support.

  const bool HasFP16 = ST.hasFullFP16();

  const LLT &MinFPScalar = HasFP16 ? s16 : s32;


  const bool HasCSSC = ST.hasCSSC();

  const bool HasRCPC3 = ST.hasRCPC3();

  const bool HasSVE = ST.hasSVE();


  getActionDefinitionsBuilder(

      {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})

      .legalFor({p0, s8, s16, s32, s64})

      .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,

                 v2s64, v2p0})

      .widenScalarToNextPow2(0)

      .clampScalar(0, s8, s64)

      .moreElementsToNextPow2(0)

      .widenVectorEltsToVectorMinSize(0, 64)

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampMaxNumElements(0, s64, 2)

      .clampMaxNumElements(0, p0, 2)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);


  getActionDefinitionsBuilder(G_PHI)

      .legalFor({p0, s16, s32, s64})

      .legalFor(PackedVectorAllTypeList)

      .widenScalarToNextPow2(0)

      .moreElementsToNextPow2(0)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .clampScalar(0, s16, s64)

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampMaxNumElements(0, s64, 2)

      .clampMaxNumElements(0, p0, 2);


  getActionDefinitionsBuilder(G_INSERT)

      .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),

                   smallerThan(1, 0)))

      .widenScalarToNextPow2(0)

      .clampScalar(0, s32, s64)

      .widenScalarToNextPow2(1)

      .minScalar(1, s8)

      .maxScalarIf(typeInSet(0, {s32}), 1, s16)

      .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);


  getActionDefinitionsBuilder(G_EXTRACT)

      .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),

                   typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))

      .widenScalarToNextPow2(1)

      .clampScalar(1, s32, s128)

      .widenScalarToNextPow2(0)

      .minScalar(0, s16)

      .maxScalarIf(typeInSet(1, {s32}), 0, s16)

      .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)

      .maxScalarIf(typeInSet(1, {s128}), 0, s64);


  getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})

      .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})

      .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})

      .widenScalarToNextPow2(0)

      .clampScalar(0, s32, s64)

      .clampMaxNumElements(0, s8, 16)

      .clampMaxNumElements(0, s16, 8)

      .clampNumElements(0, v2s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[0].getNumElements() <= 2;

          },

          0, s32)

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[0].getNumElements() <= 4;

          },

          0, s16)

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[0].getNumElements() <= 16;

          },

          0, s8)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .moreElementsToNextPow2(0);


  getActionDefinitionsBuilder(G_MUL)

      .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})

      .widenScalarToNextPow2(0)

      .clampScalar(0, s32, s64)

      .clampMaxNumElements(0, s8, 16)

      .clampMaxNumElements(0, s16, 8)

      .clampNumElements(0, v2s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[0].getNumElements() <= 2;

          },

          0, s32)

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[0].getNumElements() <= 4;

          },

          0, s16)

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[0].getNumElements() <= 16;

          },

          0, s8)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .moreElementsToNextPow2(0);


  getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})

      .customIf([=](const LegalityQuery &Query) {

        const auto &SrcTy = Query.Types[0];

        const auto &AmtTy = Query.Types[1];

        return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&

               AmtTy.getSizeInBits() == 32;

      })

      .legalFor({

          {s32, s32},

          {s32, s64},

          {s64, s64},

          {v8s8, v8s8},

          {v16s8, v16s8},

          {v4s16, v4s16},

          {v8s16, v8s16},

          {v2s32, v2s32},

          {v4s32, v4s32},

          {v2s64, v2s64},

      })

      .widenScalarToNextPow2(0)

      .clampScalar(1, s32, s64)

      .clampScalar(0, s32, s64)

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .moreElementsToNextPow2(0)

      .minScalarSameAs(1, 0)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .minScalarEltSameAsIf(isVector(0), 1, 0)

      .maxScalarEltSameAsIf(isVector(0), 1, 0);


  getActionDefinitionsBuilder(G_PTR_ADD)

      .legalFor({{p0, s64}, {v2p0, v2s64}})

      .clampScalarOrElt(1, s64, s64)

      .clampNumElements(0, v2p0, v2p0);


  getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});


  getActionDefinitionsBuilder({G_SDIV, G_UDIV})

      .legalFor({s32, s64})

      .libcallFor({s128})

      .clampScalar(0, s32, s64)

      .widenScalarToNextPow2(0)

      .scalarize(0);


  getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})

      .lowerFor({s8, s16, s32, s64, v2s32, v4s32, v2s64})

      .libcallFor({s128})

      .widenScalarOrEltToNextPow2(0)

      .minScalarOrElt(0, s32)

      .clampNumElements(0, v2s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .scalarize(0);


  getActionDefinitionsBuilder({G_SMULO, G_UMULO})

      .widenScalarToNextPow2(0, /*Min = */ 32)

      .clampScalar(0, s32, s64)

      .lower();


  getActionDefinitionsBuilder({G_SMULH, G_UMULH})

      .legalFor({s64, v16s8, v8s16, v4s32})

      .lower();


  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})

      .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})

      .legalFor(HasCSSC, {s32, s64})

      .minScalar(HasCSSC, 0, s32)

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .lower();


  // FIXME: Legal vector types are only legal with NEON.

  getActionDefinitionsBuilder(G_ABS)

      .legalFor(HasCSSC, {s32, s64})

      .legalFor(PackedVectorAllTypeList)

      .customIf([=](const LegalityQuery &Q) {

        // TODO: Fix suboptimal codegen for 128+ bit types.

        LLT SrcTy = Q.Types[0];

        return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;

      })

      .widenScalarIf(

          [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },

          [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })

      .widenScalarIf(

          [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },

          [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .moreElementsToNextPow2(0)

      .lower();


  getActionDefinitionsBuilder(

      {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})

      .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})

      .lower();


  getActionDefinitionsBuilder(

      {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})

      .legalFor({{s32, s32}, {s64, s32}})

      .clampScalar(0, s32, s64)

      .clampScalar(1, s32, s64)

      .widenScalarToNextPow2(0);


  getActionDefinitionsBuilder({G_FSHL, G_FSHR})

      .customFor({{s32, s32}, {s32, s64}, {s64, s64}})

      .lower();


  getActionDefinitionsBuilder(G_ROTR)

      .legalFor({{s32, s64}, {s64, s64}})

      .customIf([=](const LegalityQuery &Q) {

        return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;

      })

      .lower();

  getActionDefinitionsBuilder(G_ROTL).lower();


  getActionDefinitionsBuilder({G_SBFX, G_UBFX})

      .customFor({{s32, s32}, {s64, s64}});


  auto always = [=](const LegalityQuery &Q) { return true; };

  getActionDefinitionsBuilder(G_CTPOP)

      .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})

      .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})

      .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})

      .customFor({{s128, s128},

                  {v4s16, v4s16},

                  {v8s16, v8s16},

                  {v2s32, v2s32},

                  {v4s32, v4s32},

                  {v2s64, v2s64}})

      .clampScalar(0, s32, s128)

      .widenScalarToNextPow2(0)

      .minScalarEltSameAsIf(always, 1, 0)

      .maxScalarEltSameAsIf(always, 1, 0)

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .moreElementsToNextPow2(0)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);


  getActionDefinitionsBuilder({G_CTLZ, G_CTLS})

      .legalFor({{s32, s32},

                 {s64, s64},

                 {v8s8, v8s8},

                 {v16s8, v16s8},

                 {v4s16, v4s16},

                 {v8s16, v8s16},

                 {v2s32, v2s32},

                 {v4s32, v4s32}})

      .widenScalarToNextPow2(1, /*Min=*/32)

      .clampScalar(1, s32, s64)

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .moreElementsToNextPow2(0)

      .scalarizeIf(scalarOrEltWiderThan(0, 32), 0)

      .scalarSameSizeAs(0, 1);


  getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();


  getActionDefinitionsBuilder(G_CTTZ)

      .lowerIf(isVector(0))

      .widenScalarToNextPow2(1, /*Min=*/32)

      .clampScalar(1, s32, s64)

      .scalarSameSizeAs(0, 1)

      .legalFor(HasCSSC, {s32, s64})

      .customFor(!HasCSSC, {s32, s64});


  getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();


  getActionDefinitionsBuilder(G_BITREVERSE)

      .legalFor({s32, s64, v8s8, v16s8})

      .widenScalarToNextPow2(0, /*Min = */ 32)

      .widenScalarOrEltToNextPow2OrMinSize(0, 8)

      .clampScalar(0, s32, s64)

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .moreElementsToNextPow2(0)

      .lower();


  getActionDefinitionsBuilder(G_BSWAP)

      .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})

      .widenScalarOrEltToNextPow2(0, 16)

      .clampScalar(0, s32, s64)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .moreElementsToNextPow2(0);


  getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})

      .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})

      .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampMaxNumElements(0, s64, 2)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .moreElementsToNextPow2(0)

      .lower();


  getActionDefinitionsBuilder(

      {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,

       G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,

       G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})

      .legalFor({s32, s64, v2s32, v4s32, v2s64})

      .legalFor(HasFP16, {s16, v4s16, v8s16})

      .libcallFor({s128})

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .minScalarOrElt(0, MinFPScalar)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .moreElementsToNextPow2(0);


  getActionDefinitionsBuilder({G_FABS, G_FNEG})

      .legalFor({s32, s64, v2s32, v4s32, v2s64})

      .legalFor(HasFP16, {s16, v4s16, v8s16})

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .lowerIf(scalarOrEltWiderThan(0, 64))

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .moreElementsToNextPow2(0)

      .lowerFor({s16, v4s16, v8s16});


  getActionDefinitionsBuilder(G_FREM)

      .libcallFor({s32, s64, s128})

      .minScalar(0, s32)

      .scalarize(0);


  getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,

                               G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,

                               G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,

                               G_FSINH, G_FTANH, G_FMODF})

      // We need a call for these, so we always need to scalarize.

      .scalarize(0)

      // Regardless of FP16 support, widen 16-bit elements to 32-bits.

      .minScalar(0, s32)

      .libcallFor({s32, s64, s128});

  getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})

      .scalarize(0)

      .minScalar(0, s32)

      .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});


  getActionDefinitionsBuilder({G_LROUND, G_INTRINSIC_LRINT})

      .legalFor({{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})

      .legalFor(HasFP16, {{s32, s16}, {s64, s16}})

      .minScalar(1, s32)

      .libcallFor({{s64, s128}})

      .lower();

  getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})

      .legalFor({{s64, s32}, {s64, s64}})

      .legalFor(HasFP16, {{s64, s16}})

      .minScalar(0, s64)

      .minScalar(1, s32)

      .libcallFor({{s64, s128}})

      .lower();


  // TODO: Custom legalization for mismatched types.

  getActionDefinitionsBuilder(G_FCOPYSIGN)

      .moreElementsIf(

          [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },

          [=](const LegalityQuery &Query) {

            const LLT Ty = Query.Types[0];

            return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));

          })

      .lower();


  getActionDefinitionsBuilder(G_FMAD).lower();


  for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {

    auto &Actions =  getActionDefinitionsBuilder(Op);


    if (Op == G_SEXTLOAD)

      Actions.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered));


    // Atomics have zero extending behavior.

    Actions

      .legalForTypesWithMemDesc({{s32, p0, s8, 8},

                                 {s32, p0, s16, 8},

                                 {s32, p0, s32, 8},

                                 {s64, p0, s8, 2},

                                 {s64, p0, s16, 2},

                                 {s64, p0, s32, 4},

                                 {s64, p0, s64, 8},

                                 {p0, p0, s64, 8},

                                 {v2s32, p0, s64, 8}})

      .widenScalarToNextPow2(0)

      .clampScalar(0, s32, s64)

      // TODO: We could support sum-of-pow2's but the lowering code doesn't know

      //       how to do that yet.

      .unsupportedIfMemSizeNotPow2()

      // Lower anything left over into G_*EXT and G_LOAD

      .lower();

  }


  auto IsPtrVecPred = [=](const LegalityQuery &Query) {

    const LLT &ValTy = Query.Types[0];

    return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;

  };


  getActionDefinitionsBuilder(G_LOAD)

      .customIf([=](const LegalityQuery &Query) {

        return HasRCPC3 && Query.Types[0] == s128 &&

               Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;

      })

      .customIf([=](const LegalityQuery &Query) {

        return Query.Types[0] == s128 &&

               Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;

      })

      .legalForTypesWithMemDesc({{s8, p0, s8, 8},

                                 {s16, p0, s16, 8},

                                 {s32, p0, s32, 8},

                                 {s64, p0, s64, 8},

                                 {p0, p0, s64, 8},

                                 {s128, p0, s128, 8},

                                 {v8s8, p0, s64, 8},

                                 {v16s8, p0, s128, 8},

                                 {v4s16, p0, s64, 8},

                                 {v8s16, p0, s128, 8},

                                 {v2s32, p0, s64, 8},

                                 {v4s32, p0, s128, 8},

                                 {v2s64, p0, s128, 8}})

      // These extends are also legal

      .legalForTypesWithMemDesc(

          {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})

      .legalForTypesWithMemDesc({

          // SVE vscale x 128 bit base sizes

          {nxv16s8, p0, nxv16s8, 8},

          {nxv8s16, p0, nxv8s16, 8},

          {nxv4s32, p0, nxv4s32, 8},

          {nxv2s64, p0, nxv2s64, 8},

      })

      .widenScalarToNextPow2(0, /* MinSize = */ 8)

      .clampMaxNumElements(0, s8, 16)

      .clampMaxNumElements(0, s16, 8)

      .clampMaxNumElements(0, s32, 4)

      .clampMaxNumElements(0, s64, 2)

      .clampMaxNumElements(0, p0, 2)

      .lowerIfMemSizeNotByteSizePow2()

      .clampScalar(0, s8, s64)

      .narrowScalarIf(

          [=](const LegalityQuery &Query) {

            // Clamp extending load results to 32-bits.

            return Query.Types[0].isScalar() &&

                   Query.Types[0] != Query.MMODescrs[0].MemoryTy &&

                   Query.Types[0].getSizeInBits() > 32;

          },

          changeTo(0, s32))

      // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out

      .bitcastIf(typeInSet(0, {v4s8}),

                 [=](const LegalityQuery &Query) {

                   const LLT VecTy = Query.Types[0];

                   return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));

                 })

      .customIf(IsPtrVecPred)

      .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);


  getActionDefinitionsBuilder(G_STORE)

      .customIf([=](const LegalityQuery &Query) {

        return HasRCPC3 && Query.Types[0] == s128 &&

               Query.MMODescrs[0].Ordering == AtomicOrdering::Release;

      })

      .customIf([=](const LegalityQuery &Query) {

        return Query.Types[0] == s128 &&

               Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;

      })

      .widenScalarIf(

          all(scalarNarrowerThan(0, 32),

              atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Release)),

          changeTo(0, s32))

      .legalForTypesWithMemDesc(

          {{s8, p0, s8, 8},     {s16, p0, s8, 8},  // truncstorei8 from s16

           {s32, p0, s8, 8},                       // truncstorei8 from s32

           {s64, p0, s8, 8},                       // truncstorei8 from s64

           {s16, p0, s16, 8},   {s32, p0, s16, 8}, // truncstorei16 from s32

           {s64, p0, s16, 8},                      // truncstorei16 from s64

           {s32, p0, s8, 8},    {s32, p0, s16, 8},    {s32, p0, s32, 8},

           {s64, p0, s64, 8},   {s64, p0, s32, 8}, // truncstorei32 from s64

           {p0, p0, s64, 8},    {s128, p0, s128, 8},  {v16s8, p0, s128, 8},

           {v8s8, p0, s64, 8},  {v4s16, p0, s64, 8},  {v8s16, p0, s128, 8},

           {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})

      .legalForTypesWithMemDesc({

          // SVE vscale x 128 bit base sizes

          // TODO: Add nxv2p0. Consider bitcastIf.

          //       See #92130

          // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461

          {nxv16s8, p0, nxv16s8, 8},

          {nxv8s16, p0, nxv8s16, 8},

          {nxv4s32, p0, nxv4s32, 8},

          {nxv2s64, p0, nxv2s64, 8},

      })

      .clampScalar(0, s8, s64)

      .minScalarOrElt(0, s8)

      .lowerIf([=](const LegalityQuery &Query) {

        return Query.Types[0].isScalar() &&

               Query.Types[0] != Query.MMODescrs[0].MemoryTy;

      })

      // Maximum: sN * k = 128

      .clampMaxNumElements(0, s8, 16)

      .clampMaxNumElements(0, s16, 8)

      .clampMaxNumElements(0, s32, 4)

      .clampMaxNumElements(0, s64, 2)

      .clampMaxNumElements(0, p0, 2)

      .lowerIfMemSizeNotPow2()

      // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out

      .bitcastIf(all(typeInSet(0, {v4s8}),

                     LegalityPredicate([=](const LegalityQuery &Query) {

                       return Query.Types[0].getSizeInBits() ==

                              Query.MMODescrs[0].MemoryTy.getSizeInBits();

                     })),

                 [=](const LegalityQuery &Query) {

                   const LLT VecTy = Query.Types[0];

                   return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));

                 })

      .customIf(IsPtrVecPred)

      .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .lower();


  getActionDefinitionsBuilder(G_INDEXED_STORE)

      // Idx 0 == Ptr, Idx 1 == Val

      // TODO: we can implement legalizations but as of now these are

      // generated in a very specific way.

      .legalForTypesWithMemDesc({

          {p0, s8, s8, 8},

          {p0, s16, s16, 8},

          {p0, s32, s8, 8},

          {p0, s32, s16, 8},

          {p0, s32, s32, 8},

          {p0, s64, s64, 8},

          {p0, p0, p0, 8},

          {p0, v8s8, v8s8, 8},

          {p0, v16s8, v16s8, 8},

          {p0, v4s16, v4s16, 8},

          {p0, v8s16, v8s16, 8},

          {p0, v2s32, v2s32, 8},

          {p0, v4s32, v4s32, 8},

          {p0, v2s64, v2s64, 8},

          {p0, v2p0, v2p0, 8},

          {p0, s128, s128, 8},

      })

      .unsupported();


  auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {

    LLT LdTy = Query.Types[0];

    LLT PtrTy = Query.Types[1];

    if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&

        !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)

      return false;

    if (PtrTy != p0)

      return false;

    return true;

  };

  getActionDefinitionsBuilder(G_INDEXED_LOAD)

      .unsupportedIf(

          atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))

      .legalIf(IndexedLoadBasicPred)

      .unsupported();

  getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})

      .unsupportedIf(

          atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))

      .legalIf(all(typeInSet(0, {s16, s32, s64}),

                   LegalityPredicate([=](const LegalityQuery &Q) {

                     LLT LdTy = Q.Types[0];

                     LLT PtrTy = Q.Types[1];

                     LLT MemTy = Q.MMODescrs[0].MemoryTy;

                     if (PtrTy != p0)

                       return false;

                     if (LdTy == s16)

                       return MemTy == s8;

                     if (LdTy == s32)

                       return MemTy == s8 || MemTy == s16;

                     if (LdTy == s64)

                       return MemTy == s8 || MemTy == s16 || MemTy == s32;

                     return false;

                   })))

      .unsupported();


  // Constants

  getActionDefinitionsBuilder(G_CONSTANT)

      .legalFor({p0, s8, s16, s32, s64})

      .widenScalarToNextPow2(0)

      .clampScalar(0, s8, s64);

  getActionDefinitionsBuilder(G_FCONSTANT)

      // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT

      .legalFor({s16, s32, s64, s128})

      .clampScalar(0, MinFPScalar, s128);


  // FIXME: fix moreElementsToNextPow2

  getActionDefinitionsBuilder(G_ICMP)

      .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})

      .widenScalarOrEltToNextPow2(1)

      .clampScalar(1, s32, s64)

      .clampScalar(0, s32, s32)

      .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)

      .minScalarEltSameAsIf(

          [=](const LegalityQuery &Query) {

            const LLT &Ty = Query.Types[0];

            const LLT &SrcTy = Query.Types[1];

            return Ty.isVector() && !SrcTy.isPointerVector() &&

                   Ty.getElementType() != SrcTy.getElementType();

          },

          0, 1)

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },

          1, s32)

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[1].isPointerVector();

          },

          0, s64)

      .moreElementsToNextPow2(1)

      .clampNumElements(1, v8s8, v16s8)

      .clampNumElements(1, v4s16, v8s16)

      .clampNumElements(1, v2s32, v4s32)

      .clampNumElements(1, v2s64, v2s64)

      .clampNumElements(1, v2p0, v2p0)

      .customIf(isVector(0));


  getActionDefinitionsBuilder(G_FCMP)

      .legalFor({{s32, s32},

                 {s32, s64},

                 {v4s32, v4s32},

                 {v2s32, v2s32},

                 {v2s64, v2s64}})

      .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})

      .widenScalarOrEltToNextPow2(1)

      .clampScalar(0, s32, s32)

      .minScalarOrElt(1, MinFPScalar)

      .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)

      .minScalarEltSameAsIf(

          [=](const LegalityQuery &Query) {

            const LLT &Ty = Query.Types[0];

            const LLT &SrcTy = Query.Types[1];

            return Ty.isVector() && !SrcTy.isPointerVector() &&

                   Ty.getElementType() != SrcTy.getElementType();

          },

          0, 1)

      .clampNumElements(1, v4s16, v8s16)

      .clampNumElements(1, v2s32, v4s32)

      .clampMaxNumElements(1, s64, 2)

      .moreElementsToNextPow2(1)

      .libcallFor({{s32, s128}});


  // Extensions

  auto ExtLegalFunc = [=](const LegalityQuery &Query) {

    unsigned DstSize = Query.Types[0].getSizeInBits();


    // Handle legal vectors using legalFor

    if (Query.Types[0].isVector())

      return false;


    if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))

      return false; // Extending to a scalar s128 needs narrowing.


    const LLT &SrcTy = Query.Types[1];


    // Make sure we fit in a register otherwise. Don't bother checking that

    // the source type is below 128 bits. We shouldn't be allowing anything

    // through which is wider than the destination in the first place.

    unsigned SrcSize = SrcTy.getSizeInBits();

    if (SrcSize < 8 || !isPowerOf2_32(SrcSize))

      return false;


    return true;

  };

  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})

      .legalIf(ExtLegalFunc)

      .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})

      .clampScalar(0, s64, s64) // Just for s128, others are handled above.

      .moreElementsToNextPow2(0)

      .clampMaxNumElements(1, s8, 8)

      .clampMaxNumElements(1, s16, 4)

      .clampMaxNumElements(1, s32, 2)

      // Tries to convert a large EXTEND into two smaller EXTENDs

      .lowerIf([=](const LegalityQuery &Query) {

        return (Query.Types[0].getScalarSizeInBits() >

                Query.Types[1].getScalarSizeInBits() * 2) &&

               Query.Types[0].isVector() &&

               (Query.Types[1].getScalarSizeInBits() == 8 ||

                Query.Types[1].getScalarSizeInBits() == 16);

      })

      .clampMinNumElements(1, s8, 8)

      .clampMinNumElements(1, s16, 4)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);


  getActionDefinitionsBuilder(G_TRUNC)

      .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})

      .moreElementsToNextPow2(0)

      .clampMaxNumElements(0, s8, 8)

      .clampMaxNumElements(0, s16, 4)

      .clampMaxNumElements(0, s32, 2)

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },

          0, s8)

      .lowerIf([=](const LegalityQuery &Query) {

        LLT DstTy = Query.Types[0];

        LLT SrcTy = Query.Types[1];

        return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&

               DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();

      })

      .clampMinNumElements(0, s8, 8)

      .clampMinNumElements(0, s16, 4)

      .alwaysLegal();


  getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})

      .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})

      .clampNumElements(0, v2s32, v2s32);


  getActionDefinitionsBuilder(G_SEXT_INREG)

      .legalFor({s32, s64})

      .legalFor(PackedVectorAllTypeList)

      .maxScalar(0, s64)

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampMaxNumElements(0, s64, 2)

      .lower();


  // FP conversions

  getActionDefinitionsBuilder(G_FPTRUNC)

      .legalFor(

          {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})

      .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})

      .moreElementsToNextPow2(1)

      .customIf([](const LegalityQuery &Q) {

        LLT DstTy = Q.Types[0];

        LLT SrcTy = Q.Types[1];

        return SrcTy.isFixedVector() && DstTy.isFixedVector() &&

               SrcTy.getScalarSizeInBits() == 64 &&

               DstTy.getScalarSizeInBits() == 16;

      })

      // Clamp based on input

      .clampNumElements(1, v4s32, v4s32)

      .clampNumElements(1, v2s64, v2s64)

      .scalarize(0);


  getActionDefinitionsBuilder(G_FPEXT)

      .legalFor(

          {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})

      .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})

      .moreElementsToNextPow2(0)

      .widenScalarIf(

          [](const LegalityQuery &Q) {

            LLT DstTy = Q.Types[0];

            LLT SrcTy = Q.Types[1];

            return SrcTy.isVector() && DstTy.isVector() &&

                   SrcTy.getScalarSizeInBits() == 16 &&

                   DstTy.getScalarSizeInBits() == 64;

          },

          changeElementTo(1, s32))

      .clampNumElements(0, v4s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .scalarize(0);


  // Conversions

  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})

      .legalFor({{s32, s32},

                 {s64, s32},

                 {s32, s64},

                 {s64, s64},

                 {v2s32, v2s32},

                 {v4s32, v4s32},

                 {v2s64, v2s64}})

      .legalFor(HasFP16,

                {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)

      // The range of a fp16 value fits into an i17, so we can lower the width

      // to i64.

      .narrowScalarIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;

          },

          changeTo(0, s64))

      .moreElementsToNextPow2(0)

      .widenScalarOrEltToNextPow2OrMinSize(0)

      .minScalar(0, s32)

      .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)

      .widenScalarIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[0].getScalarSizeInBits() <= 64 &&

                   Query.Types[0].getScalarSizeInBits() >

                       Query.Types[1].getScalarSizeInBits();

          },

          LegalizeMutations::changeElementSizeTo(1, 0))

      .widenScalarIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[1].getScalarSizeInBits() <= 64 &&

                   Query.Types[0].getScalarSizeInBits() <

                       Query.Types[1].getScalarSizeInBits();

          },

          LegalizeMutations::changeElementSizeTo(0, 1))

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampMaxNumElements(0, s64, 2)

      .libcallFor(

          {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});


  getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})

      .legalFor({{s32, s32},

                 {s64, s32},

                 {s32, s64},

                 {s64, s64},

                 {v2s32, v2s32},

                 {v4s32, v4s32},

                 {v2s64, v2s64}})

      .legalFor(

          HasFP16,

          {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})

      // Handle types larger than i64 by scalarizing/lowering.

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)

      // The range of a fp16 value fits into an i17, so we can lower the width

      // to i64.

      .narrowScalarIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;

          },

          changeTo(0, s64))

      .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)

      .moreElementsToNextPow2(0)

      .widenScalarToNextPow2(0, /*MinSize=*/32)

      .minScalar(0, s32)

      .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)

      .widenScalarIf(

          [=](const LegalityQuery &Query) {

            unsigned ITySize = Query.Types[0].getScalarSizeInBits();

            return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&

                   ITySize > Query.Types[1].getScalarSizeInBits();

          },

          LegalizeMutations::changeElementSizeTo(1, 0))

      .widenScalarIf(

          [=](const LegalityQuery &Query) {

            unsigned FTySize = Query.Types[1].getScalarSizeInBits();

            return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&

                   Query.Types[0].getScalarSizeInBits() < FTySize;

          },

          LegalizeMutations::changeElementSizeTo(0, 1))

      .widenScalarOrEltToNextPow2(0)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampMaxNumElements(0, s64, 2);


  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})

      .legalFor({{s32, s32},

                 {s64, s32},

                 {s32, s64},

                 {s64, s64},

                 {v2s32, v2s32},

                 {v4s32, v4s32},

                 {v2s64, v2s64}})

      .legalFor(HasFP16,

                {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})

      .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .moreElementsToNextPow2(1)

      .widenScalarOrEltToNextPow2OrMinSize(1)

      .minScalar(1, s32)

      .lowerIf([](const LegalityQuery &Query) {

        return Query.Types[1].isVector() &&

               Query.Types[1].getScalarSizeInBits() == 64 &&

               Query.Types[0].getScalarSizeInBits() == 16;

      })

      .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)

      .scalarizeIf(

          // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.

          [](const LegalityQuery &Query) {

            return Query.Types[0].getScalarSizeInBits() == 32 &&

                   Query.Types[1].getScalarSizeInBits() == 64;

          },

          0)

      .widenScalarIf(

          [](const LegalityQuery &Query) {

            return Query.Types[1].getScalarSizeInBits() <= 64 &&

                   Query.Types[0].getScalarSizeInBits() <

                       Query.Types[1].getScalarSizeInBits();

          },

          LegalizeMutations::changeElementSizeTo(0, 1))

      .widenScalarIf(

          [](const LegalityQuery &Query) {

            return Query.Types[0].getScalarSizeInBits() <= 64 &&

                   Query.Types[0].getScalarSizeInBits() >

                       Query.Types[1].getScalarSizeInBits();

          },

          LegalizeMutations::changeElementSizeTo(1, 0))

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampMaxNumElements(0, s64, 2)

      .libcallFor({{s16, s128},

                   {s32, s128},

                   {s64, s128},

                   {s128, s128},

                   {s128, s32},

                   {s128, s64}});


  // Control-flow

  getActionDefinitionsBuilder(G_BR).alwaysLegal();

  getActionDefinitionsBuilder(G_BRCOND)

    .legalFor({s32})

    .clampScalar(0, s32, s32);

  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});


  getActionDefinitionsBuilder(G_SELECT)

      .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})

      .widenScalarToNextPow2(0)

      .clampScalar(0, s32, s64)

      .clampScalar(1, s32, s32)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)

      .lowerIf(isVector(0));


  // Pointer-handling

  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});


  if (TM.getCodeModel() == CodeModel::Small)

    getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();

  else

    getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});


  getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)

      .legalIf(all(typeIs(0, p0), typeIs(1, p0)));


  getActionDefinitionsBuilder(G_PTRTOINT)

      .legalFor({{s64, p0}, {v2s64, v2p0}})

      .widenScalarToNextPow2(0, 64)

      .clampScalar(0, s64, s64)

      .clampMaxNumElements(0, s64, 2);


  getActionDefinitionsBuilder(G_INTTOPTR)

      .unsupportedIf([&](const LegalityQuery &Query) {

        return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();

      })

      .legalFor({{p0, s64}, {v2p0, v2s64}})

      .clampMaxNumElements(1, s64, 2);


  // Casts for 32 and 64-bit width type are just copies.

  // Same for 128-bit width type, except they are on the FPR bank.

  getActionDefinitionsBuilder(G_BITCAST)

      // Keeping 32-bit instructions legal to prevent regression in some tests

      .legalForCartesianProduct({s32, v2s16, v4s8})

      .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})

      .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})

      .customIf([=](const LegalityQuery &Query) {

        // Handle casts from i1 vectors to scalars.

        LLT DstTy = Query.Types[0];

        LLT SrcTy = Query.Types[1];

        return DstTy.isScalar() && SrcTy.isVector() &&

               SrcTy.getScalarSizeInBits() == 1;

      })

      .lowerIf([=](const LegalityQuery &Query) {

        return Query.Types[0].isVector() != Query.Types[1].isVector();

      })

      .moreElementsToNextPow2(0)

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .lower();


  getActionDefinitionsBuilder(G_VASTART).legalFor({p0});


  // va_list must be a pointer, but most sized types are pretty easy to handle

  // as the destination.

  getActionDefinitionsBuilder(G_VAARG)

      .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})

      .clampScalar(0, s8, s64)

      .widenScalarToNextPow2(0, /*Min*/ 8);


  getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)

      .lowerIf(

          all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));


  bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();


  getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)

      .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})

      .customFor(!UseOutlineAtomics, {{s128, p0}})

      .libcallFor(UseOutlineAtomics,

                  {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})

      .clampScalar(0, s32, s64);


  getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,

                               G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,

                               G_ATOMICRMW_XOR})

      .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})

      .libcallFor(UseOutlineAtomics,

                  {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})

      .clampScalar(0, s32, s64);


  // Do not outline these atomics operations, as per comment in

  // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().

  getActionDefinitionsBuilder(

      {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})

      .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))

      .clampScalar(0, s32, s64);


  getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});


  // Merge/Unmerge

  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {

    unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;

    unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;

    getActionDefinitionsBuilder(Op)

        .widenScalarToNextPow2(LitTyIdx, 8)

        .widenScalarToNextPow2(BigTyIdx, 32)

        .clampScalar(LitTyIdx, s8, s64)

        .clampScalar(BigTyIdx, s32, s128)

        .legalIf([=](const LegalityQuery &Q) {

          switch (Q.Types[BigTyIdx].getSizeInBits()) {

          case 32:

          case 64:

          case 128:

            break;

          default:

            return false;

          }

          switch (Q.Types[LitTyIdx].getSizeInBits()) {

          case 8:

          case 16:

          case 32:

          case 64:

            return true;

          default:

            return false;

          }

        });

  }


  // TODO : nxv4s16, nxv2s16, nxv2s32

  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)

      .legalFor(HasSVE, {{s16, nxv16s8, s64},

                         {s16, nxv8s16, s64},

                         {s32, nxv4s32, s64},

                         {s64, nxv2s64, s64}})

      .unsupportedIf([=](const LegalityQuery &Query) {

        const LLT &EltTy = Query.Types[1].getElementType();

        if (Query.Types[1].isScalableVector())

          return false;

        return Query.Types[0] != EltTy;

      })

      .minScalar(2, s64)

      .customIf([=](const LegalityQuery &Query) {

        const LLT &VecTy = Query.Types[1];

        return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||

               VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||

               VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;

      })

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) {

            // We want to promote to <M x s1> to <M x s64> if that wouldn't

            // cause the total vec size to be > 128b.

            return Query.Types[1].isFixedVector() &&

                   Query.Types[1].getNumElements() <= 2;

          },

          0, s64)

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[1].isFixedVector() &&

                   Query.Types[1].getNumElements() <= 4;

          },

          0, s32)

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[1].isFixedVector() &&

                   Query.Types[1].getNumElements() <= 8;

          },

          0, s16)

      .minScalarOrEltIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[1].isFixedVector() &&

                   Query.Types[1].getNumElements() <= 16;

          },

          0, s8)

      .minScalarOrElt(0, s8) // Worst case, we need at least s8.

      .moreElementsToNextPow2(1)

      .clampMaxNumElements(1, s64, 2)

      .clampMaxNumElements(1, s32, 4)

      .clampMaxNumElements(1, s16, 8)

      .clampMaxNumElements(1, s8, 16)

      .clampMaxNumElements(1, p0, 2)

      .scalarizeIf(scalarOrEltWiderThan(1, 64), 1);


  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)

      .legalIf(

          typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))

      .legalFor(HasSVE, {{nxv16s8, s32, s64},

                         {nxv8s16, s32, s64},

                         {nxv4s32, s32, s64},

                         {nxv2s64, s64, s64}})

      .moreElementsToNextPow2(0)

      .widenVectorEltsToVectorMinSize(0, 64)

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v2s32, v4s32)

      .clampMaxNumElements(0, s64, 2)

      .clampMaxNumElements(0, p0, 2)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);


  getActionDefinitionsBuilder(G_BUILD_VECTOR)

      .legalFor({{v8s8, s8},

                 {v16s8, s8},

                 {v4s16, s16},

                 {v8s16, s16},

                 {v2s32, s32},

                 {v4s32, s32},

                 {v2s64, s64},

                 {v2p0, p0}})

      .clampNumElements(0, v4s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .minScalarOrElt(0, s8)

      .widenVectorEltsToVectorMinSize(0, 64)

      .widenScalarOrEltToNextPow2(0)

      .minScalarSameAs(1, 0);


  getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();


  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)

      .legalIf([=](const LegalityQuery &Query) {

        const LLT &DstTy = Query.Types[0];

        const LLT &SrcTy = Query.Types[1];

        // For now just support the TBL2 variant which needs the source vectors

        // to be the same size as the dest.

        if (DstTy != SrcTy)

          return false;

        return llvm::is_contained(

            {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);

      })

      .moreElementsIf(

          [](const LegalityQuery &Query) {

            return Query.Types[0].getNumElements() >

                   Query.Types[1].getNumElements();

          },

          changeTo(1, 0))

      .moreElementsToNextPow2(0)

      .moreElementsIf(

          [](const LegalityQuery &Query) {

            return Query.Types[0].getNumElements() <

                   Query.Types[1].getNumElements();

          },

          changeTo(0, 1))

      .widenScalarOrEltToNextPow2OrMinSize(0, 8)

      .clampNumElements(0, v8s8, v16s8)

      .clampNumElements(0, v4s16, v8s16)

      .clampNumElements(0, v4s32, v4s32)

      .clampNumElements(0, v2s64, v2s64)

      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)

      .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {

        // Bitcast pointers vector to i64.

        const LLT DstTy = Query.Types[0];

        return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));

      });


  getActionDefinitionsBuilder(G_CONCAT_VECTORS)

      .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})

      .bitcastIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[0].isFixedVector() &&

                   Query.Types[1].isFixedVector() &&

                   Query.Types[0].getSizeInBits() <= 128 &&

                   Query.Types[1].getSizeInBits() <= 64;

          },

          [=](const LegalityQuery &Query) {

            const LLT DstTy = Query.Types[0];

            const LLT SrcTy = Query.Types[1];

            return std::pair(

                0, DstTy.changeElementSize(SrcTy.getSizeInBits())

                       .changeElementCount(

                           DstTy.getElementCount().divideCoefficientBy(

                               SrcTy.getNumElements())));

          });


  getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)

      .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})

      .widenScalarOrEltToNextPow2(0)

      .immIdx(0); // Inform verifier imm idx 0 is handled.


  // TODO: {nxv16s8, s8}, {nxv8s16, s16}

  getActionDefinitionsBuilder(G_SPLAT_VECTOR)

      .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});


  getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});


  getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});


  getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();


  getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();


  getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();


  if (ST.hasMOPS()) {

    // G_BZERO is not supported. Currently it is only emitted by

    // PreLegalizerCombiner for G_MEMSET with zero constant.

    getActionDefinitionsBuilder(G_BZERO).unsupported();


    getActionDefinitionsBuilder(G_MEMSET)

        .legalForCartesianProduct({p0}, {s64}, {s64})

        .customForCartesianProduct({p0}, {s8}, {s64})

        .immIdx(0); // Inform verifier imm idx 0 is handled.


    getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})

        .legalForCartesianProduct({p0}, {p0}, {s64})

        .immIdx(0); // Inform verifier imm idx 0 is handled.


    // G_MEMCPY_INLINE does not have a tailcall immediate

    getActionDefinitionsBuilder(G_MEMCPY_INLINE)

        .legalForCartesianProduct({p0}, {p0}, {s64});


  } else {

    getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})

        .libcall();

  }


  // For fadd reductions we have pairwise operations available. We treat the

  // usual legal types as legal and handle the lowering to pairwise instructions

  // later.

  getActionDefinitionsBuilder(G_VECREDUCE_FADD)

      .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})

      .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})

      .minScalarOrElt(0, MinFPScalar)

      .clampMaxNumElements(1, s64, 2)

      .clampMaxNumElements(1, s32, 4)

      .clampMaxNumElements(1, s16, 8)

      .moreElementsToNextPow2(1)

      .scalarize(1)

      .lower();


  // For fmul reductions we need to split up into individual operations. We

  // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of

  // smaller types, followed by scalarizing what remains.

  getActionDefinitionsBuilder(G_VECREDUCE_FMUL)

      .minScalarOrElt(0, MinFPScalar)

      .clampMaxNumElements(1, s64, 2)

      .clampMaxNumElements(1, s32, 4)

      .clampMaxNumElements(1, s16, 8)

      .clampMaxNumElements(1, s32, 2)

      .clampMaxNumElements(1, s16, 4)

      .scalarize(1)

      .lower();


  getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})

      .scalarize(2)

      .lower();


  getActionDefinitionsBuilder(G_VECREDUCE_ADD)

      .legalFor({{s8, v8s8},

                 {s8, v16s8},

                 {s16, v4s16},

                 {s16, v8s16},

                 {s32, v2s32},

                 {s32, v4s32},

                 {s64, v2s64}})

      .moreElementsToNextPow2(1)

      .clampMaxNumElements(1, s64, 2)

      .clampMaxNumElements(1, s32, 4)

      .clampMaxNumElements(1, s16, 8)

      .clampMaxNumElements(1, s8, 16)

      .widenVectorEltsToVectorMinSize(1, 64)

      .scalarize(1);


  getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,

                               G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})

      .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})

      .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})

      .minScalarOrElt(0, MinFPScalar)

      .clampMaxNumElements(1, s64, 2)

      .clampMaxNumElements(1, s32, 4)

      .clampMaxNumElements(1, s16, 8)

      .scalarize(1)

      .lower();


  getActionDefinitionsBuilder(G_VECREDUCE_MUL)

      .clampMaxNumElements(1, s32, 2)

      .clampMaxNumElements(1, s16, 4)

      .clampMaxNumElements(1, s8, 8)

      .scalarize(1)

      .lower();


  getActionDefinitionsBuilder(

      {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})

      .legalFor({{s8, v8s8},

                 {s8, v16s8},

                 {s16, v4s16},

                 {s16, v8s16},

                 {s32, v2s32},

                 {s32, v4s32}})

      .moreElementsIf(

          [=](const LegalityQuery &Query) {

            return Query.Types[1].isVector() &&

                   Query.Types[1].getElementType() != s8 &&

                   Query.Types[1].getNumElements() & 1;

          },

          LegalizeMutations::moreElementsToNextPow2(1))

      .clampMaxNumElements(1, s64, 2)

      .clampMaxNumElements(1, s32, 4)

      .clampMaxNumElements(1, s16, 8)

      .clampMaxNumElements(1, s8, 16)

      .scalarize(1)

      .lower();


  getActionDefinitionsBuilder(

      {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})

      // Try to break down into smaller vectors as long as they're at least 64

      // bits. This lets us use vector operations for some parts of the

      // reduction.

      .fewerElementsIf(

          [=](const LegalityQuery &Q) {

            LLT SrcTy = Q.Types[1];

            if (SrcTy.isScalar())

              return false;

            if (!isPowerOf2_32(SrcTy.getNumElements()))

              return false;

            // We can usually perform 64b vector operations.

            return SrcTy.getSizeInBits() > 64;

          },

          [=](const LegalityQuery &Q) {

            LLT SrcTy = Q.Types[1];

            return std::make_pair(1, SrcTy.divide(2));

          })

      .scalarize(1)

      .lower();


  // TODO: Update this to correct handling when adding AArch64/SVE support.

  getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();


  // Access to floating-point environment.

  getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,

                               G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})

      .libcall();


  getActionDefinitionsBuilder(G_IS_FPCLASS).lower();


  getActionDefinitionsBuilder(G_PREFETCH).custom();


  getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();


  getLegacyLegalizerInfo().computeTables();

  verify(*ST.getInstrInfo());

}


bool AArch64LegalizerInfo::legalizeCustom(

    LegalizerHelper &Helper, MachineInstr &MI,

    LostDebugLocObserver &LocObserver) const {

  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

  GISelChangeObserver &Observer = Helper.Observer;

  switch (MI.getOpcode()) {

  default:

    // No idea what to do.

    return false;

  case TargetOpcode::G_VAARG:

    return legalizeVaArg(MI, MRI, MIRBuilder);

  case TargetOpcode::G_LOAD:

  case TargetOpcode::G_STORE:

    return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);

  case TargetOpcode::G_SHL:

  case TargetOpcode::G_ASHR:

  case TargetOpcode::G_LSHR:

    return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);

  case TargetOpcode::G_GLOBAL_VALUE:

    return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);

  case TargetOpcode::G_SBFX:

  case TargetOpcode::G_UBFX:

    return legalizeBitfieldExtract(MI, MRI, Helper);

  case TargetOpcode::G_FSHL:

  case TargetOpcode::G_FSHR:

    return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);

  case TargetOpcode::G_ROTR:

    return legalizeRotate(MI, MRI, Helper);

  case TargetOpcode::G_CTPOP:

    return legalizeCTPOP(MI, MRI, Helper);

  case TargetOpcode::G_ATOMIC_CMPXCHG:

    return legalizeAtomicCmpxchg128(MI, MRI, Helper);

  case TargetOpcode::G_CTTZ:

    return legalizeCTTZ(MI, Helper);

  case TargetOpcode::G_BZERO:

  case TargetOpcode::G_MEMCPY:

  case TargetOpcode::G_MEMMOVE:

  case TargetOpcode::G_MEMSET:

    return legalizeMemOps(MI, Helper);

  case TargetOpcode::G_EXTRACT_VECTOR_ELT:

    return legalizeExtractVectorElt(MI, MRI, Helper);

  case TargetOpcode::G_DYN_STACKALLOC:

    return legalizeDynStackAlloc(MI, Helper);

  case TargetOpcode::G_PREFETCH:

    return legalizePrefetch(MI, Helper);

  case TargetOpcode::G_ABS:

    return Helper.lowerAbsToCNeg(MI);

  case TargetOpcode::G_ICMP:

    return legalizeICMP(MI, MRI, MIRBuilder);

  case TargetOpcode::G_BITCAST:

    return legalizeBitcast(MI, Helper);

  case TargetOpcode::G_FPTRUNC:

    // In order to lower f16 to f64 properly, we need to use f32 as an

    // intermediary

    return legalizeFptrunc(MI, MIRBuilder, MRI);

  }


  llvm_unreachable("expected switch to return");

}


bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,

                                           LegalizerHelper &Helper) const {

  assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");

  auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();

  // We're trying to handle casts from i1 vectors to scalars but reloading from

  // stack.

  if (!DstTy.isScalar() || !SrcTy.isVector() ||

      SrcTy.getElementType() != LLT::scalar(1))

    return false;


  Helper.createStackStoreLoad(DstReg, SrcReg);

  MI.eraseFromParent();

  return true;

}


bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,

                                               MachineRegisterInfo &MRI,

                                               MachineIRBuilder &MIRBuilder,

                                               GISelChangeObserver &Observer,

                                               LegalizerHelper &Helper) const {

  assert(MI.getOpcode() == TargetOpcode::G_FSHL ||

         MI.getOpcode() == TargetOpcode::G_FSHR);


  // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic

  // lowering

  Register ShiftNo = MI.getOperand(3).getReg();

  LLT ShiftTy = MRI.getType(ShiftNo);

  auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);


  // Adjust shift amount according to Opcode (FSHL/FSHR)

  // Convert FSHL to FSHR

  LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());

  APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);


  // Lower non-constant shifts and leave zero shifts to the optimizer.

  if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)

    return (Helper.lowerFunnelShiftAsShifts(MI) ==

            LegalizerHelper::LegalizeResult::Legalized);


  APInt Amount = VRegAndVal->Value.urem(BitWidth);


  Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;


  // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount

  // in the range of 0 <-> BitWidth, it is legal

  if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&

      VRegAndVal->Value.ult(BitWidth))

    return true;


  // Cast the ShiftNumber to a 64-bit type

  auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));


  if (MI.getOpcode() == TargetOpcode::G_FSHR) {

    Observer.changingInstr(MI);

    MI.getOperand(3).setReg(Cast64.getReg(0));

    Observer.changedInstr(MI);

  }

  // If Opcode is FSHL, remove the FSHL instruction and create a FSHR

  // instruction

  else if (MI.getOpcode() == TargetOpcode::G_FSHL) {

    MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},

                          {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),

                           Cast64.getReg(0)});

    MI.eraseFromParent();

  }

  return true;

}


bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,

                                        MachineRegisterInfo &MRI,

                                        MachineIRBuilder &MIRBuilder) const {

  Register DstReg = MI.getOperand(0).getReg();

  Register SrcReg1 = MI.getOperand(2).getReg();

  Register SrcReg2 = MI.getOperand(3).getReg();

  LLT DstTy = MRI.getType(DstReg);

  LLT SrcTy = MRI.getType(SrcReg1);


  // Check the vector types are legal

  if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||

      DstTy.getNumElements() != SrcTy.getNumElements() ||

      (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))

    return false;


  // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for

  // following passes

  CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();

  if (Pred != CmpInst::ICMP_NE)

    return true;

  Register CmpReg =

      MIRBuilder

          .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)

          .getReg(0);

  MIRBuilder.buildNot(DstReg, CmpReg);


  MI.eraseFromParent();

  return true;

}


bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,

                                          MachineRegisterInfo &MRI,

                                          LegalizerHelper &Helper) const {

  // To allow for imported patterns to match, we ensure that the rotate amount

  // is 64b with an extension.

  Register AmtReg = MI.getOperand(2).getReg();

  LLT AmtTy = MRI.getType(AmtReg);

  (void)AmtTy;

  assert(AmtTy.isScalar() && "Expected a scalar rotate");

  assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");

  auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);

  Helper.Observer.changingInstr(MI);

  MI.getOperand(2).setReg(NewAmt.getReg(0));

  Helper.Observer.changedInstr(MI);

  return true;

}


bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(

    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,

    GISelChangeObserver &Observer) const {

  assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);

  // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +

  // G_ADD_LOW instructions.

  // By splitting this here, we can optimize accesses in the small code model by

  // folding in the G_ADD_LOW into the load/store offset.

  auto &GlobalOp = MI.getOperand(1);

  // Don't modify an intrinsic call.

  if (GlobalOp.isSymbol())

    return true;

  const auto* GV = GlobalOp.getGlobal();

  if (GV->isThreadLocal())

    return true; // Don't want to modify TLS vars.


  auto &TM = ST->getTargetLowering()->getTargetMachine();

  unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);


  if (OpFlags & AArch64II::MO_GOT)

    return true;


  auto Offset = GlobalOp.getOffset();

  Register DstReg = MI.getOperand(0).getReg();

  auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})

                  .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);

  // Set the regclass on the dest reg too.

  MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);


  // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so

  // by creating a MOVK that sets bits 48-63 of the register to (global address

  // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to

  // prevent an incorrect tag being generated during relocation when the

  // global appears before the code section. Without the offset, a global at

  // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced

  // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =

  // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`

  // instead of `0xf`.

  // This assumes that we're in the small code model so we can assume a binary

  // size of <= 4GB, which makes the untagged PC relative offset positive. The

  // binary must also be loaded into address range [0, 2^48). Both of these

  // properties need to be ensured at runtime when using tagged addresses.

  if (OpFlags & AArch64II::MO_TAGGED) {

    assert(!Offset &&

           "Should not have folded in an offset for a tagged global!");

    ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})

               .addGlobalAddress(GV, 0x100000000,

                                 AArch64II::MO_PREL | AArch64II::MO_G3)

               .addImm(48);

    MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);

  }


  MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})

      .addGlobalAddress(GV, Offset,

                        OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);

  MI.eraseFromParent();

  return true;

}


bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,

                                             MachineInstr &MI) const {

  MachineIRBuilder &MIB = Helper.MIRBuilder;

  MachineRegisterInfo &MRI = *MIB.getMRI();


  auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {

    MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});

    MI.eraseFromParent();

    return true;

  };

  auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {

    MIB.buildInstr(Opcode, {MI.getOperand(0)},

                   {MI.getOperand(2), MI.getOperand(3)});

    MI.eraseFromParent();

    return true;

  };

  auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {

    MIB.buildInstr(Opcode, {MI.getOperand(0)},

                   {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});

    MI.eraseFromParent();

    return true;

  };


  Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();

  switch (IntrinsicID) {

  case Intrinsic::vacopy: {

    unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;

    unsigned VaListSize =

      (ST->isTargetDarwin() || ST->isTargetWindows())

          ? PtrSize

          : ST->isTargetILP32() ? 20 : 32;


    MachineFunction &MF = *MI.getMF();

    auto Val = MF.getRegInfo().createGenericVirtualRegister(

        LLT::scalar(VaListSize * 8));

    MIB.buildLoad(Val, MI.getOperand(2),

                  *MF.getMachineMemOperand(MachinePointerInfo(),

                                           MachineMemOperand::MOLoad,

                                           VaListSize, Align(PtrSize)));

    MIB.buildStore(Val, MI.getOperand(1),

                   *MF.getMachineMemOperand(MachinePointerInfo(),

                                            MachineMemOperand::MOStore,

                                            VaListSize, Align(PtrSize)));

    MI.eraseFromParent();

    return true;

  }

  case Intrinsic::get_dynamic_area_offset: {

    MIB.buildConstant(MI.getOperand(0).getReg(), 0);

    MI.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_mops_memset_tag: {

    assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);

    // Anyext the value being set to 64 bit (only the bottom 8 bits are read by

    // the instruction).

    auto &Value = MI.getOperand(3);

    Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);

    Value.setReg(ExtValueReg);

    return true;

  }

  case Intrinsic::aarch64_prefetch: {

    auto &AddrVal = MI.getOperand(1);


    int64_t IsWrite = MI.getOperand(2).getImm();

    int64_t Target = MI.getOperand(3).getImm();

    int64_t IsStream = MI.getOperand(4).getImm();

    int64_t IsData = MI.getOperand(5).getImm();


    unsigned PrfOp = (IsWrite << 4) |    // Load/Store bit

                     (!IsData << 3) |    // IsDataCache bit

                     (Target << 1) |     // Cache level bits

                     (unsigned)IsStream; // Stream bit


    MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);

    MI.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_range_prefetch: {

    auto &AddrVal = MI.getOperand(1);


    int64_t IsWrite = MI.getOperand(2).getImm();

    int64_t IsStream = MI.getOperand(3).getImm();

    unsigned PrfOp = (IsStream << 2) | IsWrite;


    MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)

        .addImm(PrfOp)

        .add(AddrVal)

        .addUse(MI.getOperand(4).getReg()); // Metadata

    MI.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_prefetch_ir: {

    auto &AddrVal = MI.getOperand(1);

    MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(24).add(AddrVal);

    MI.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_neon_uaddv:

  case Intrinsic::aarch64_neon_saddv:

  case Intrinsic::aarch64_neon_umaxv:

  case Intrinsic::aarch64_neon_smaxv:

  case Intrinsic::aarch64_neon_uminv:

  case Intrinsic::aarch64_neon_sminv: {

    bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||

                    IntrinsicID == Intrinsic::aarch64_neon_smaxv ||

                    IntrinsicID == Intrinsic::aarch64_neon_sminv;


    auto OldDst = MI.getOperand(0).getReg();

    auto OldDstTy = MRI.getType(OldDst);

    LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();

    if (OldDstTy == NewDstTy)

      return true;


    auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);


    Helper.Observer.changingInstr(MI);

    MI.getOperand(0).setReg(NewDst);

    Helper.Observer.changedInstr(MI);


    MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());

    MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,

                        OldDst, NewDst);


    return true;

  }

  case Intrinsic::aarch64_neon_uaddlp:

  case Intrinsic::aarch64_neon_saddlp: {

    unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp

                       ? AArch64::G_UADDLP

                       : AArch64::G_SADDLP;

    MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});

    MI.eraseFromParent();


    return true;

  }

  case Intrinsic::aarch64_neon_uaddlv:

  case Intrinsic::aarch64_neon_saddlv: {

    unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv

                       ? AArch64::G_UADDLV

                       : AArch64::G_SADDLV;

    Register DstReg = MI.getOperand(0).getReg();

    Register SrcReg = MI.getOperand(2).getReg();

    LLT DstTy = MRI.getType(DstReg);


    LLT MidTy, ExtTy;

    if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {

      MidTy = LLT::fixed_vector(4, 32);

      ExtTy = LLT::scalar(32);

    } else {

      MidTy = LLT::fixed_vector(2, 64);

      ExtTy = LLT::scalar(64);

    }


    Register MidReg =

        MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();

    Register ZeroReg =

        MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();

    Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},

                                     {MidReg, ZeroReg})

                          .getReg(0);


    if (DstTy.getScalarSizeInBits() < 32)

      MIB.buildTrunc(DstReg, ExtReg);

    else

      MIB.buildCopy(DstReg, ExtReg);


    MI.eraseFromParent();


    return true;

  }

  case Intrinsic::aarch64_neon_smax:

    return LowerBinOp(TargetOpcode::G_SMAX);

  case Intrinsic::aarch64_neon_smin:

    return LowerBinOp(TargetOpcode::G_SMIN);

  case Intrinsic::aarch64_neon_umax:

    return LowerBinOp(TargetOpcode::G_UMAX);

  case Intrinsic::aarch64_neon_umin:

    return LowerBinOp(TargetOpcode::G_UMIN);

  case Intrinsic::aarch64_neon_fmax:

    return LowerBinOp(TargetOpcode::G_FMAXIMUM);

  case Intrinsic::aarch64_neon_fmin:

    return LowerBinOp(TargetOpcode::G_FMINIMUM);

  case Intrinsic::aarch64_neon_fmaxnm:

    return LowerBinOp(TargetOpcode::G_FMAXNUM);

  case Intrinsic::aarch64_neon_fminnm:

    return LowerBinOp(TargetOpcode::G_FMINNUM);

  case Intrinsic::aarch64_neon_pmull:

  case Intrinsic::aarch64_neon_pmull64:

    return LowerBinOp(AArch64::G_PMULL);

  case Intrinsic::aarch64_neon_smull:

    return LowerBinOp(AArch64::G_SMULL);

  case Intrinsic::aarch64_neon_umull:

    return LowerBinOp(AArch64::G_UMULL);

  case Intrinsic::aarch64_neon_sabd:

    return LowerBinOp(TargetOpcode::G_ABDS);

  case Intrinsic::aarch64_neon_uabd:

    return LowerBinOp(TargetOpcode::G_ABDU);

  case Intrinsic::aarch64_neon_uhadd:

    return LowerBinOp(TargetOpcode::G_UAVGFLOOR);

  case Intrinsic::aarch64_neon_urhadd:

    return LowerBinOp(TargetOpcode::G_UAVGCEIL);

  case Intrinsic::aarch64_neon_shadd:

    return LowerBinOp(TargetOpcode::G_SAVGFLOOR);

  case Intrinsic::aarch64_neon_srhadd:

    return LowerBinOp(TargetOpcode::G_SAVGCEIL);

  case Intrinsic::aarch64_neon_sqshrn: {

    if (!MRI.getType(MI.getOperand(0).getReg()).isVector())

      return true;

    // Create right shift instruction. Store the output register in Shr.

    auto Shr = MIB.buildInstr(AArch64::G_VASHR,

                              {MRI.getType(MI.getOperand(2).getReg())},

                              {MI.getOperand(2), MI.getOperand(3).getImm()});

    // Build the narrow intrinsic, taking in Shr.

    MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});

    MI.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_neon_sqshrun: {

    if (!MRI.getType(MI.getOperand(0).getReg()).isVector())

      return true;

    // Create right shift instruction. Store the output register in Shr.

    auto Shr = MIB.buildInstr(AArch64::G_VASHR,

                              {MRI.getType(MI.getOperand(2).getReg())},

                              {MI.getOperand(2), MI.getOperand(3).getImm()});

    // Build the narrow intrinsic, taking in Shr.

    MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});

    MI.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_neon_sqrshrn: {

    if (!MRI.getType(MI.getOperand(0).getReg()).isVector())

      return true;

    // Create right shift instruction. Store the output register in Shr.

    auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,

                              {MRI.getType(MI.getOperand(2).getReg())},

                              {MI.getOperand(2), MI.getOperand(3).getImm()});

    // Build the narrow intrinsic, taking in Shr.

    MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});

    MI.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_neon_sqrshrun: {

    if (!MRI.getType(MI.getOperand(0).getReg()).isVector())

      return true;

    // Create right shift instruction. Store the output register in Shr.

    auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,

                              {MRI.getType(MI.getOperand(2).getReg())},

                              {MI.getOperand(2), MI.getOperand(3).getImm()});

    // Build the narrow intrinsic, taking in Shr.

    MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});

    MI.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_neon_uqrshrn: {

    if (!MRI.getType(MI.getOperand(0).getReg()).isVector())

      return true;

    // Create right shift instruction. Store the output register in Shr.

    auto Shr = MIB.buildInstr(AArch64::G_URSHR_I,

                              {MRI.getType(MI.getOperand(2).getReg())},

                              {MI.getOperand(2), MI.getOperand(3).getImm()});

    // Build the narrow intrinsic, taking in Shr.

    MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});

    MI.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_neon_uqshrn: {

    if (!MRI.getType(MI.getOperand(0).getReg()).isVector())

      return true;

    // Create right shift instruction. Store the output register in Shr.

    auto Shr = MIB.buildInstr(AArch64::G_VLSHR,

                              {MRI.getType(MI.getOperand(2).getReg())},

                              {MI.getOperand(2), MI.getOperand(3).getImm()});

    // Build the narrow intrinsic, taking in Shr.

    MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});

    MI.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_neon_sqshlu: {

    // Check if last operand is constant vector dup

    auto ShiftAmount = isConstantOrConstantSplatVector(

        *MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);

    if (ShiftAmount) {

      // If so, create a new intrinsic with the correct shift amount

      MIB.buildInstr(AArch64::G_SQSHLU_I, {MI.getOperand(0)},

                     {MI.getOperand(2)})

          .addImm(ShiftAmount->getSExtValue());

      MI.eraseFromParent();

      return true;

    }

    return false;

  }

  case Intrinsic::aarch64_neon_vsli: {

    MIB.buildInstr(

        AArch64::G_SLI, {MI.getOperand(0)},

        {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});

    MI.eraseFromParent();

    break;

  }

  case Intrinsic::aarch64_neon_vsri: {

    MIB.buildInstr(

        AArch64::G_SRI, {MI.getOperand(0)},

        {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});

    MI.eraseFromParent();

    break;

  }

  case Intrinsic::aarch64_neon_abs: {

    // Lower the intrinsic to G_ABS.

    MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});

    MI.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_neon_sqadd: {

    if (MRI.getType(MI.getOperand(0).getReg()).isVector())

      return LowerBinOp(TargetOpcode::G_SADDSAT);

    break;

  }

  case Intrinsic::aarch64_neon_sqsub: {

    if (MRI.getType(MI.getOperand(0).getReg()).isVector())

      return LowerBinOp(TargetOpcode::G_SSUBSAT);

    break;

  }

  case Intrinsic::aarch64_neon_uqadd: {

    if (MRI.getType(MI.getOperand(0).getReg()).isVector())

      return LowerBinOp(TargetOpcode::G_UADDSAT);

    break;

  }

  case Intrinsic::aarch64_neon_uqsub: {

    if (MRI.getType(MI.getOperand(0).getReg()).isVector())

      return LowerBinOp(TargetOpcode::G_USUBSAT);

    break;

  }

  case Intrinsic::aarch64_neon_udot:

    return LowerTriOp(AArch64::G_UDOT);

  case Intrinsic::aarch64_neon_sdot:

    return LowerTriOp(AArch64::G_SDOT);

  case Intrinsic::aarch64_neon_usdot:

    return LowerTriOp(AArch64::G_USDOT);

  case Intrinsic::aarch64_neon_sqxtn:

    return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);

  case Intrinsic::aarch64_neon_sqxtun:

    return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);

  case Intrinsic::aarch64_neon_uqxtn:

    return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);

  case Intrinsic::aarch64_neon_fcvtzu:

    return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);

  case Intrinsic::aarch64_neon_fcvtzs:

    return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);


  case Intrinsic::vector_reverse:

    // TODO: Add support for vector_reverse

    return false;

  }


  return true;

}


bool AArch64LegalizerInfo::legalizeShlAshrLshr(

    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,

    GISelChangeObserver &Observer) const {

  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||

         MI.getOpcode() == TargetOpcode::G_LSHR ||

         MI.getOpcode() == TargetOpcode::G_SHL);

  // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the

  // imported patterns can select it later. Either way, it will be legal.

  Register AmtReg = MI.getOperand(2).getReg();

  auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);

  if (!VRegAndVal)

    return true;

  // Check the shift amount is in range for an immediate form.

  int64_t Amount = VRegAndVal->Value.getSExtValue();

  if (Amount > 31)

    return true; // This will have to remain a register variant.

  auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);

  Observer.changingInstr(MI);

  MI.getOperand(2).setReg(ExtCst.getReg(0));

  Observer.changedInstr(MI);

  return true;

}


static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset,

                                MachineRegisterInfo &MRI) {

  Base = Root;

  Offset = 0;


  Register NewBase;

  int64_t NewOffset;

  if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&

      isShiftedInt<7, 3>(NewOffset)) {

    Base = NewBase;

    Offset = NewOffset;

  }

}


// FIXME: This should be removed and replaced with the generic bitcast legalize

// action.

bool AArch64LegalizerInfo::legalizeLoadStore(

    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,

    GISelChangeObserver &Observer) const {

  assert(MI.getOpcode() == TargetOpcode::G_STORE ||

         MI.getOpcode() == TargetOpcode::G_LOAD);

  // Here we just try to handle vector loads/stores where our value type might

  // have pointer elements, which the SelectionDAG importer can't handle. To

  // allow the existing patterns for s64 to fire for p0, we just try to bitcast

  // the value to use s64 types.


  // Custom legalization requires the instruction, if not deleted, must be fully

  // legalized. In order to allow further legalization of the inst, we create

  // a new instruction and erase the existing one.


  Register ValReg = MI.getOperand(0).getReg();

  const LLT ValTy = MRI.getType(ValReg);


  if (ValTy == LLT::scalar(128)) {


    AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();

    bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;

    bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;

    bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;

    bool IsRcpC3 =

        ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);


    LLT s64 = LLT::scalar(64);


    unsigned Opcode;

    if (IsRcpC3) {

      Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;

    } else {

      // For LSE2, loads/stores should have been converted to monotonic and had

      // a fence inserted after them.

      assert(Ordering == AtomicOrdering::Monotonic ||

             Ordering == AtomicOrdering::Unordered);

      assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");


      Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;

    }


    MachineInstrBuilder NewI;

    if (IsLoad) {

      NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});

      MIRBuilder.buildMergeLikeInstr(

          ValReg, {NewI->getOperand(0), NewI->getOperand(1)});

    } else {

      auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));

      NewI = MIRBuilder.buildInstr(

          Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});

    }


    if (IsRcpC3) {

      NewI.addUse(MI.getOperand(1).getReg());

    } else {

      Register Base;

      int Offset;

      matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);

      NewI.addUse(Base);

      NewI.addImm(Offset / 8);

    }


    NewI.cloneMemRefs(MI);

    constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),

                                     *MRI.getTargetRegisterInfo(),

                                     *ST->getRegBankInfo());

    MI.eraseFromParent();

    return true;

  }


  if (!ValTy.isPointerVector() ||

      ValTy.getElementType().getAddressSpace() != 0) {

    LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");

    return false;

  }


  unsigned PtrSize = ValTy.getElementType().getSizeInBits();

  const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);

  auto &MMO = **MI.memoperands_begin();

  MMO.setType(NewTy);


  if (MI.getOpcode() == TargetOpcode::G_STORE) {

    auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);

    MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);

  } else {

    auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);

    MIRBuilder.buildBitcast(ValReg, NewLoad);

  }

  MI.eraseFromParent();

  return true;

}


bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,

                                         MachineRegisterInfo &MRI,

                                         MachineIRBuilder &MIRBuilder) const {

  MachineFunction &MF = MIRBuilder.getMF();

  Align Alignment(MI.getOperand(2).getImm());

  Register Dst = MI.getOperand(0).getReg();

  Register ListPtr = MI.getOperand(1).getReg();


  LLT PtrTy = MRI.getType(ListPtr);

  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());


  const unsigned PtrSize = PtrTy.getSizeInBits() / 8;

  const Align PtrAlign = Align(PtrSize);

  auto List = MIRBuilder.buildLoad(

      PtrTy, ListPtr,

      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,

                               PtrTy, PtrAlign));


  MachineInstrBuilder DstPtr;

  if (Alignment > PtrAlign) {

    // Realign the list to the actual required alignment.

    auto AlignMinus1 =

        MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);

    auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));

    DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));

  } else

    DstPtr = List;


  LLT ValTy = MRI.getType(Dst);

  uint64_t ValSize = ValTy.getSizeInBits() / 8;

  MIRBuilder.buildLoad(

      Dst, DstPtr,

      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,

                               ValTy, std::max(Alignment, PtrAlign)));


  auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));


  auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));


  MIRBuilder.buildStore(NewList, ListPtr,

                        *MF.getMachineMemOperand(MachinePointerInfo(),

                                                 MachineMemOperand::MOStore,

                                                 PtrTy, PtrAlign));


  MI.eraseFromParent();

  return true;

}


bool AArch64LegalizerInfo::legalizeBitfieldExtract(

    MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {

  // Only legal if we can select immediate forms.

  // TODO: Lower this otherwise.

  return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&

         getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);

}


bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,

                                         MachineRegisterInfo &MRI,

                                         LegalizerHelper &Helper) const {

  // When there is no integer popcount instruction (FEAT_CSSC isn't available),

  // it can be more efficiently lowered to the following sequence that uses

  // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD

  // registers are cheap.

  //  FMOV    D0, X0        // copy 64-bit int to vector, high bits zero'd

  //  CNT     V0.8B, V0.8B  // 8xbyte pop-counts

  //  ADDV    B0, V0.8B     // sum 8xbyte pop-counts

  //  UMOV    X0, V0.B[0]   // copy byte result back to integer reg

  //

  // For 128 bit vector popcounts, we lower to the following sequence:

  //  cnt.16b   v0, v0  // v8s16, v4s32, v2s64

  //  uaddlp.8h v0, v0  // v8s16, v4s32, v2s64

  //  uaddlp.4s v0, v0  //        v4s32, v2s64

  //  uaddlp.2d v0, v0  //               v2s64

  //

  // For 64 bit vector popcounts, we lower to the following sequence:

  //  cnt.8b    v0, v0  // v4s16, v2s32

  //  uaddlp.4h v0, v0  // v4s16, v2s32

  //  uaddlp.2s v0, v0  //        v2s32


  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;

  Register Dst = MI.getOperand(0).getReg();

  Register Val = MI.getOperand(1).getReg();

  LLT Ty = MRI.getType(Val);

  unsigned Size = Ty.getSizeInBits();


  assert(Ty == MRI.getType(Dst) &&

         "Expected src and dst to have the same type!");


  if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {

    LLT s64 = LLT::scalar(64);


    auto Split = MIRBuilder.buildUnmerge(s64, Val);

    auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));

    auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));

    auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);


    MIRBuilder.buildZExt(Dst, Add);

    MI.eraseFromParent();

    return true;

  }


  if (!ST->hasNEON() ||

      MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {

    // Use generic lowering when custom lowering is not possible.

    return Ty.isScalar() && (Size == 32 || Size == 64) &&

           Helper.lowerBitCount(MI) ==

               LegalizerHelper::LegalizeResult::Legalized;

  }


  // Pre-conditioning: widen Val up to the nearest vector type.

  // s32,s64,v4s16,v2s32 -> v8i8

  // v8s16,v4s32,v2s64 -> v16i8

  LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);

  if (Ty.isScalar()) {

    assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");

    if (Size == 32) {

      Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);

    }

  }

  Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);


  // Count bits in each byte-sized lane.

  auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);


  // Sum across lanes.


  if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&

      Ty.getScalarSizeInBits() != 16) {

    LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;

    auto Zeros = MIRBuilder.buildConstant(Dt, 0);

    auto Ones = MIRBuilder.buildConstant(VTy, 1);

    MachineInstrBuilder Sum;


    if (Ty == LLT::fixed_vector(2, 64)) {

      auto UDOT =

          MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});

      Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});

    } else if (Ty == LLT::fixed_vector(4, 32)) {

      Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});

    } else if (Ty == LLT::fixed_vector(2, 32)) {

      Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});

    } else {

      llvm_unreachable("unexpected vector shape");

    }


    Sum->getOperand(0).setReg(Dst);

    MI.eraseFromParent();

    return true;

  }


  Register HSum = CTPOP.getReg(0);

  unsigned Opc;

  SmallVector<LLT> HAddTys;

  if (Ty.isScalar()) {

    Opc = Intrinsic::aarch64_neon_uaddlv;

    HAddTys.push_back(LLT::scalar(32));

  } else if (Ty == LLT::fixed_vector(8, 16)) {

    Opc = Intrinsic::aarch64_neon_uaddlp;

    HAddTys.push_back(LLT::fixed_vector(8, 16));

  } else if (Ty == LLT::fixed_vector(4, 32)) {

    Opc = Intrinsic::aarch64_neon_uaddlp;

    HAddTys.push_back(LLT::fixed_vector(8, 16));

    HAddTys.push_back(LLT::fixed_vector(4, 32));

  } else if (Ty == LLT::fixed_vector(2, 64)) {

    Opc = Intrinsic::aarch64_neon_uaddlp;

    HAddTys.push_back(LLT::fixed_vector(8, 16));

    HAddTys.push_back(LLT::fixed_vector(4, 32));

    HAddTys.push_back(LLT::fixed_vector(2, 64));

  } else if (Ty == LLT::fixed_vector(4, 16)) {

    Opc = Intrinsic::aarch64_neon_uaddlp;

    HAddTys.push_back(LLT::fixed_vector(4, 16));

  } else if (Ty == LLT::fixed_vector(2, 32)) {

    Opc = Intrinsic::aarch64_neon_uaddlp;

    HAddTys.push_back(LLT::fixed_vector(4, 16));

    HAddTys.push_back(LLT::fixed_vector(2, 32));

  } else

    llvm_unreachable("unexpected vector shape");

  MachineInstrBuilder UADD;

  for (LLT HTy : HAddTys) {

    UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);

    HSum = UADD.getReg(0);

  }


  // Post-conditioning.

  if (Ty.isScalar() && (Size == 64 || Size == 128))

    MIRBuilder.buildZExt(Dst, UADD);

  else

    UADD->getOperand(0).setReg(Dst);

  MI.eraseFromParent();

  return true;

}


bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(

    MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {

  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;

  LLT s64 = LLT::scalar(64);

  auto Addr = MI.getOperand(1).getReg();

  auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));

  auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));

  auto DstLo = MRI.createGenericVirtualRegister(s64);

  auto DstHi = MRI.createGenericVirtualRegister(s64);


  MachineInstrBuilder CAS;

  if (ST->hasLSE()) {

    // We have 128-bit CASP instructions taking XSeqPair registers, which are

    // s128. We need the merge/unmerge to bracket the expansion and pair up with

    // the rest of the MIR so we must reassemble the extracted registers into a

    // 128-bit known-regclass one with code like this:

    //

    //     %in1 = REG_SEQUENCE Lo, Hi    ; One for each input

    //     %out = CASP %in1, ...

    //     %OldLo = G_EXTRACT %out, 0

    //     %OldHi = G_EXTRACT %out, 64

    auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();

    unsigned Opcode;

    switch (Ordering) {

    case AtomicOrdering::Acquire:

      Opcode = AArch64::CASPAX;

      break;

    case AtomicOrdering::Release:

      Opcode = AArch64::CASPLX;

      break;

    case AtomicOrdering::AcquireRelease:

    case AtomicOrdering::SequentiallyConsistent:

      Opcode = AArch64::CASPALX;

      break;

    default:

      Opcode = AArch64::CASPX;

      break;

    }


    LLT s128 = LLT::scalar(128);

    auto CASDst = MRI.createGenericVirtualRegister(s128);

    auto CASDesired = MRI.createGenericVirtualRegister(s128);

    auto CASNew = MRI.createGenericVirtualRegister(s128);

    MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})

        .addUse(DesiredI->getOperand(0).getReg())

        .addImm(AArch64::sube64)

        .addUse(DesiredI->getOperand(1).getReg())

        .addImm(AArch64::subo64);

    MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})

        .addUse(NewI->getOperand(0).getReg())

        .addImm(AArch64::sube64)

        .addUse(NewI->getOperand(1).getReg())

        .addImm(AArch64::subo64);


    CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});


    MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);

    MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);

  } else {

    // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP

    // can take arbitrary registers so it just has the normal GPR64 operands the

    // rest of AArch64 is expecting.

    auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();

    unsigned Opcode;

    switch (Ordering) {

    case AtomicOrdering::Acquire:

      Opcode = AArch64::CMP_SWAP_128_ACQUIRE;

      break;

    case AtomicOrdering::Release:

      Opcode = AArch64::CMP_SWAP_128_RELEASE;

      break;

    case AtomicOrdering::AcquireRelease:

    case AtomicOrdering::SequentiallyConsistent:

      Opcode = AArch64::CMP_SWAP_128;

      break;

    default:

      Opcode = AArch64::CMP_SWAP_128_MONOTONIC;

      break;

    }


    auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);

    CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},

                                {Addr, DesiredI->getOperand(0),

                                 DesiredI->getOperand(1), NewI->getOperand(0),

                                 NewI->getOperand(1)});

  }


  CAS.cloneMemRefs(MI);

  constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),

                                   *MRI.getTargetRegisterInfo(),

                                   *ST->getRegBankInfo());


  MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});

  MI.eraseFromParent();

  return true;

}


bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,

                                        LegalizerHelper &Helper) const {

  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

  LLT Ty = MRI.getType(MI.getOperand(1).getReg());

  auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));

  MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);

  MI.eraseFromParent();

  return true;

}


bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,

                                          LegalizerHelper &Helper) const {

  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;


  // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic

  if (MI.getOpcode() == TargetOpcode::G_MEMSET) {

    // Anyext the value being set to 64 bit (only the bottom 8 bits are read by

    // the instruction).

    auto &Value = MI.getOperand(1);

    Register ExtValueReg =

        MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);

    Value.setReg(ExtValueReg);

    return true;

  }


  return false;

}


bool AArch64LegalizerInfo::legalizeExtractVectorElt(

    MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {

  const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);

  auto VRegAndVal =

      getIConstantVRegValWithLookThrough(Element->getIndexReg(), MRI);

  if (VRegAndVal)

    return true;

  LLT VecTy = MRI.getType(Element->getVectorReg());

  if (VecTy.isScalableVector())

    return true;

  return Helper.lowerExtractInsertVectorElt(MI) !=

         LegalizerHelper::LegalizeResult::UnableToLegalize;

}


bool AArch64LegalizerInfo::legalizeDynStackAlloc(

    MachineInstr &MI, LegalizerHelper &Helper) const {

  MachineFunction &MF = *MI.getParent()->getParent();

  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();


  // If stack probing is not enabled for this function, use the default

  // lowering.

  if (!MF.getFunction().hasFnAttribute("probe-stack") ||

      MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=

          "inline-asm") {

    Helper.lowerDynStackAlloc(MI);

    return true;

  }


  Register Dst = MI.getOperand(0).getReg();

  Register AllocSize = MI.getOperand(1).getReg();

  Align Alignment = assumeAligned(MI.getOperand(2).getImm());


  assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&

         "Unexpected type for dynamic alloca");

  assert(MRI.getType(AllocSize) == LLT::scalar(64) &&

         "Unexpected type for dynamic alloca");


  LLT PtrTy = MRI.getType(Dst);

  Register SPReg =

      Helper.getTargetLowering().getStackPointerRegisterToSaveRestore();

  Register SPTmp =

      Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);

  auto NewMI =

      MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});

  MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);

  MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);

  MIRBuilder.buildCopy(Dst, SPTmp);


  MI.eraseFromParent();

  return true;

}


bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,

                                            LegalizerHelper &Helper) const {

  MachineIRBuilder &MIB = Helper.MIRBuilder;

  auto &AddrVal = MI.getOperand(0);


  int64_t IsWrite = MI.getOperand(1).getImm();

  int64_t Locality = MI.getOperand(2).getImm();

  int64_t IsData = MI.getOperand(3).getImm();


  bool IsStream = Locality == 0;

  if (Locality != 0) {

    assert(Locality <= 3 && "Prefetch locality out-of-range");

    // The locality degree is the opposite of the cache speed.

    // Put the number the other way around.

    // The encoding starts at 0 for level 1

    Locality = 3 - Locality;

  }


  unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;


  MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);

  MI.eraseFromParent();

  return true;

}


bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,

                                           MachineIRBuilder &MIRBuilder,

                                           MachineRegisterInfo &MRI) const {

  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();

  assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&

         "Expected a power of 2 elements");


  LLT s16 = LLT::scalar(16);

  LLT s32 = LLT::scalar(32);

  LLT s64 = LLT::scalar(64);

  LLT v2s16 = LLT::fixed_vector(2, s16);

  LLT v4s16 = LLT::fixed_vector(4, s16);

  LLT v2s32 = LLT::fixed_vector(2, s32);

  LLT v4s32 = LLT::fixed_vector(4, s32);

  LLT v2s64 = LLT::fixed_vector(2, s64);


  SmallVector<Register> RegsToUnmergeTo;

  SmallVector<Register> TruncOddDstRegs;

  SmallVector<Register> RegsToMerge;


  unsigned ElemCount = SrcTy.getNumElements();


  // Find the biggest size chunks we can work with

  int StepSize = ElemCount % 4 ? 2 : 4;


  // If we have a power of 2 greater than 2, we need to first unmerge into

  // enough pieces

  if (ElemCount <= 2)

    RegsToUnmergeTo.push_back(Src);

  else {

    for (unsigned i = 0; i < ElemCount / 2; ++i)

      RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));


    MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);

  }


  // Create all of the round-to-odd instructions and store them

  for (auto SrcReg : RegsToUnmergeTo) {

    Register Mid =

        MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})

            .getReg(0);

    TruncOddDstRegs.push_back(Mid);

  }


  // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise

  // truncate 2s32 to 2s16.

  unsigned Index = 0;

  for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {

    if (StepSize == 4) {

      Register ConcatDst =

          MIRBuilder

              .buildMergeLikeInstr(

                  {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})

              .getReg(0);


      RegsToMerge.push_back(

          MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));

    } else {

      RegsToMerge.push_back(

          MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));

    }

  }


  // If there is only one register, replace the destination

  if (RegsToMerge.size() == 1) {

    MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());

    MI.eraseFromParent();

    return true;

  }


  // Merge the rest of the instructions & replace the register

  Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);

  MRI.replaceRegWith(Dst, Fin);

  MI.eraseFromParent();

  return true;

}

MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

matchLDPSTPAddrMode
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
Definition AArch64LegalizerInfo.cpp:2061

AArch64LegalizerInfo.h
This file declares the targeting of the Machinelegalizer class for AArch64.

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AArch64Subtarget.h

unsupported
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:71

Utils.h

DerivedTypes.h

GenericMachineInstrs.h
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

libcall
@ libcall
Definition HWAddressSanitizer.cpp:244

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

Type.h

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

Intrinsics.h

LegalizerHelper.h

LegalizerInfo.h
Interface for Targets to specify which operations they can successfully select and how the others sho...

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

MachineIRBuilder.h
This file declares the MachineIRBuilder class.

MachineInstrBuilder.h

MachineInstr.h

MachineRegisterInfo.h

MathExtras.h

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

getReg
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition MipsDisassembler.cpp:106

verify
ppc ctr loops verify
Definition PPCCTRLoopsVerify.cpp:71

if
if(PassOpts->AAPipeline)
Definition PassBuilderBindings.cpp:64

SPReg
static constexpr MCPhysReg SPReg
Definition RISCVFrameLowering.cpp:54

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:77

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

TargetOpcodes.h

llvm::AArch64LegalizerInfo::legalizeCustom
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
Definition AArch64LegalizerInfo.cpp:1447

llvm::AArch64LegalizerInfo::legalizeIntrinsic
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
Definition AArch64LegalizerInfo.cpp:1682

llvm::AArch64LegalizerInfo::AArch64LegalizerInfo
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Definition AArch64LegalizerInfo.cpp:42

llvm::AArch64Subtarget
Definition AArch64Subtarget.h:38

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::zext
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023

llvm::APInt::urem
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677

llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577

llvm::Attribute::getValueAsString
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Definition Attributes.cpp:405

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition InstrTypes.h:697

llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition InstrTypes.h:698

llvm::Function::getFnAttribute
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:764

llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729

llvm::GExtractVectorElement::getVectorReg
Register getVectorReg() const
Definition GenericMachineInstrs.h:803

llvm::GExtractVectorElement::getIndexReg
Register getIndexReg() const
Definition GenericMachineInstrs.h:804

llvm::GISelChangeObserver
Abstract class that contains various methods for clients to notify about changes.
Definition GISelChangeObserver.h:30

llvm::GISelChangeObserver::changingInstr
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.

llvm::GISelChangeObserver::changedInstr
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.

llvm::LLT
Definition LowLevelType.h:40

llvm::LLT::isScalableVector
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition LowLevelType.h:182

llvm::LLT::getScalarSizeInBits
constexpr unsigned getScalarSizeInBits() const
Definition LowLevelType.h:279

llvm::LLT::isScalar
constexpr bool isScalar() const
Definition LowLevelType.h:147

llvm::LLT::scalable_vector
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition LowLevelType.h:114

llvm::LLT::vector
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition LowLevelType.h:65

llvm::LLT::isPointerVector
constexpr bool isPointerVector() const
Definition LowLevelType.h:153

llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition LowLevelType.h:43

llvm::LLT::getNumElements
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition LowLevelType.h:160

llvm::LLT::isVector
constexpr bool isVector() const
Definition LowLevelType.h:149

llvm::LLT::pointer
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition LowLevelType.h:58

llvm::LLT::getSizeInBits
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition LowLevelType.h:191

llvm::LLT::getElementType
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition LowLevelType.h:292

llvm::LLT::getElementCount
constexpr ElementCount getElementCount() const
Definition LowLevelType.h:184

llvm::LLT::changeElementSize
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition LowLevelType.h:225

llvm::LLT::getAddressSpace
constexpr unsigned getAddressSpace() const
Definition LowLevelType.h:285

llvm::LLT::fixed_vector
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition LowLevelType.h:101

llvm::LLT::isFixedVector
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
Definition LowLevelType.h:178

llvm::LLT::changeElementCount
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition LowLevelType.h:242

llvm::LegacyLegalizerInfo::computeTables
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
Definition LegacyLegalizerInfo.cpp:105

llvm::LegalizeRuleSet::minScalar
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
Definition LegalizerInfo.h:1079

llvm::LegalizeRuleSet::widenScalarOrEltToNextPow2OrMinSize
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
Definition LegalizerInfo.h:1008

llvm::LegalizeRuleSet::legalFor
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
Definition LegalizerInfo.h:636

llvm::LegalizeRuleSet::maxScalarEltSameAsIf
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
Definition LegalizerInfo.h:1214

llvm::LegalizeRuleSet::unsupported
LegalizeRuleSet & unsupported()
The instruction is unsupported.
Definition LegalizerInfo.h:880

llvm::LegalizeRuleSet::scalarSameSizeAs
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
Definition LegalizerInfo.h:1190

llvm::LegalizeRuleSet::bitcastIf
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
Definition LegalizerInfo.h:710

llvm::LegalizeRuleSet::libcallFor
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
Definition LegalizerInfo.h:806

llvm::LegalizeRuleSet::maxScalar
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
Definition LegalizerInfo.h:1118

llvm::LegalizeRuleSet::minScalarOrElt
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
Definition LegalizerInfo.h:1038

llvm::LegalizeRuleSet::clampMaxNumElements
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
Definition LegalizerInfo.h:1279

llvm::LegalizeRuleSet::clampMinNumElements
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
Definition LegalizerInfo.h:1241

llvm::LegalizeRuleSet::widenVectorEltsToVectorMinSize
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
Definition LegalizerInfo.h:1059

llvm::LegalizeRuleSet::lowerIfMemSizeNotPow2
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
Definition LegalizerInfo.h:900

llvm::LegalizeRuleSet::minScalarEltSameAsIf
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
Definition LegalizerInfo.h:1196

llvm::LegalizeRuleSet::customForCartesianProduct
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
Definition LegalizerInfo.h:940

llvm::LegalizeRuleSet::lowerIfMemSizeNotByteSizePow2
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
Definition LegalizerInfo.h:908

llvm::LegalizeRuleSet::moreElementsToNextPow2
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
Definition LegalizerInfo.h:1233

llvm::LegalizeRuleSet::narrowScalarIf
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
Definition LegalizerInfo.h:845

llvm::LegalizeRuleSet::lower
LegalizeRuleSet & lower()
The instruction is lowered.
Definition LegalizerInfo.h:719

llvm::LegalizeRuleSet::moreElementsIf
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
Definition LegalizerInfo.h:862

llvm::LegalizeRuleSet::lowerFor
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
Definition LegalizerInfo.h:745

llvm::LegalizeRuleSet::scalarizeIf
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
Definition LegalizerInfo.h:1030

llvm::LegalizeRuleSet::lowerIf
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
Definition LegalizerInfo.h:728

llvm::LegalizeRuleSet::clampScalar
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
Definition LegalizerInfo.h:1145

llvm::LegalizeRuleSet::custom
LegalizeRuleSet & custom()
Unconditionally custom lower.
Definition LegalizerInfo.h:971

llvm::LegalizeRuleSet::minScalarSameAs
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
Definition LegalizerInfo.h:1165

llvm::LegalizeRuleSet::unsupportedIf
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
Definition LegalizerInfo.h:884

llvm::LegalizeRuleSet::minScalarOrEltIf
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
Definition LegalizerInfo.h:1047

llvm::LegalizeRuleSet::widenScalarIf
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
Definition LegalizerInfo.h:836

llvm::LegalizeRuleSet::alwaysLegal
LegalizeRuleSet & alwaysLegal()
Definition LegalizerInfo.h:703

llvm::LegalizeRuleSet::clampNumElements
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
Definition LegalizerInfo.h:1303

llvm::LegalizeRuleSet::maxScalarIf
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
Definition LegalizerInfo.h:1129

llvm::LegalizeRuleSet::customIf
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
Definition LegalizerInfo.h:913

llvm::LegalizeRuleSet::widenScalarToNextPow2
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
Definition LegalizerInfo.h:978

llvm::LegalizeRuleSet::scalarize
LegalizeRuleSet & scalarize(unsigned TypeIdx)
Definition LegalizerInfo.h:1024

llvm::LegalizeRuleSet::legalForCartesianProduct
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
Definition LegalizerInfo.h:685

llvm::LegalizeRuleSet::legalForTypesWithMemDesc
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
Definition LegalizerInfo.h:676

llvm::LegalizeRuleSet::immIdx
unsigned immIdx(unsigned ImmIdx)
Definition LegalizerInfo.h:618

llvm::LegalizeRuleSet::widenScalarOrEltToNextPow2
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
Definition LegalizerInfo.h:998

llvm::LegalizeRuleSet::legalIf
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
Definition LegalizerInfo.h:629

llvm::LegalizerHelper
Definition LegalizerHelper.h:49

llvm::LegalizerHelper::lowerDynStackAlloc
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
Definition LegalizerHelper.cpp:9377

llvm::LegalizerHelper::lowerBitCount
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
Definition LegalizerHelper.cpp:7660

llvm::LegalizerHelper::lowerExtractInsertVectorElt
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
Definition LegalizerHelper.cpp:9154

llvm::LegalizerHelper::lowerAbsToCNeg
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
Definition LegalizerHelper.cpp:10270

llvm::LegalizerHelper::getTargetLowering
const TargetLowering & getTargetLowering() const
Definition LegalizerHelper.h:82

llvm::LegalizerHelper::lowerFunnelShiftAsShifts
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
Definition LegalizerHelper.cpp:7918

llvm::LegalizerHelper::createStackStoreLoad
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
Definition LegalizerHelper.cpp:4932

llvm::LegalizerHelper::Legalized
@ Legalized
Instruction has been legalized and the MachineFunction changed.
Definition LegalizerHelper.h:73

llvm::LegalizerHelper::UnableToLegalize
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
Definition LegalizerHelper.h:77

llvm::LegalizerHelper::Observer
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Definition LegalizerHelper.h:56

llvm::LegalizerHelper::getDynStackAllocTargetPtr
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
Definition LegalizerHelper.cpp:9353

llvm::LegalizerHelper::MIRBuilder
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
Definition LegalizerHelper.h:53

llvm::LegalizerInfo::getActionDefinitionsBuilder
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
Definition LegalizerInfo.cpp:314

llvm::LegalizerInfo::getLegacyLegalizerInfo
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
Definition LegalizerInfo.h:1353

llvm::LostDebugLocObserver
Definition LostDebugLocObserver.h:20

llvm::MachineFunction
Definition MachineFunction.h:295

llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition MachineFunction.cpp:544

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:801

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:752

llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition MachineIRBuilder.h:237

llvm::MachineIRBuilder::setInsertPt
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
Definition MachineIRBuilder.h:340

llvm::MachineIRBuilder::buildAdd
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
Definition MachineIRBuilder.h:1853

llvm::MachineIRBuilder::buildNot
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
Definition MachineIRBuilder.h:2028

llvm::MachineIRBuilder::buildUnmerge
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
Definition MachineIRBuilder.cpp:708

llvm::MachineIRBuilder::buildExtract
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
Definition MachineIRBuilder.cpp:640

llvm::MachineIRBuilder::buildICmp
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
Definition MachineIRBuilder.cpp:945

llvm::MachineIRBuilder::getInsertPt
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
Definition MachineIRBuilder.h:335

llvm::MachineIRBuilder::buildZExt
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
Definition MachineIRBuilder.cpp:529

llvm::MachineIRBuilder::buildIntrinsic
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
Definition MachineIRBuilder.cpp:895

llvm::MachineIRBuilder::buildCTLZ
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
Definition MachineIRBuilder.h:2047

llvm::MachineIRBuilder::buildMergeLikeInstr
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
Definition MachineIRBuilder.cpp:680

llvm::MachineIRBuilder::buildLoad
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
Definition MachineIRBuilder.h:1041

llvm::MachineIRBuilder::buildPtrAdd
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
Definition MachineIRBuilder.cpp:206

llvm::MachineIRBuilder::buildBitReverse
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
Definition MachineIRBuilder.h:2451

llvm::MachineIRBuilder::buildStore
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
Definition MachineIRBuilder.cpp:492

llvm::MachineIRBuilder::buildInstr
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Definition MachineIRBuilder.h:423

llvm::MachineIRBuilder::buildCTPOP
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
Definition MachineIRBuilder.h:2042

llvm::MachineIRBuilder::getMF
MachineFunction & getMF()
Getter for the function we currently build.
Definition MachineIRBuilder.h:289

llvm::MachineIRBuilder::buildExtOrTrunc
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
Definition MachineIRBuilder.cpp:571

llvm::MachineIRBuilder::buildTrunc
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
Definition MachineIRBuilder.cpp:934

llvm::MachineIRBuilder::getMBB
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
Definition MachineIRBuilder.h:321

llvm::MachineIRBuilder::buildAnyExt
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
Definition MachineIRBuilder.cpp:519

llvm::MachineIRBuilder::buildBitcast
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
Definition MachineIRBuilder.h:763

llvm::MachineIRBuilder::getMRI
MachineRegisterInfo * getMRI()
Getter for MRI.
Definition MachineIRBuilder.h:311

llvm::MachineIRBuilder::buildFPTrunc
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
Definition MachineIRBuilder.cpp:940

llvm::MachineIRBuilder::buildCopy
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
Definition MachineIRBuilder.cpp:332

llvm::MachineIRBuilder::buildMaskLowPtrBits
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
Definition MachineIRBuilder.cpp:247

llvm::MachineIRBuilder::buildConstant
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Definition MachineIRBuilder.cpp:337

llvm::MachineInstrBuilder
Definition MachineInstrBuilder.h:171

llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition MachineInstrBuilder.h:196

llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
Definition MachineInstrBuilder.h:225

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:233

llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition MachineInstrBuilder.h:326

llvm::MachineInstrBuilder::cloneMemRefs
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Definition MachineInstrBuilder.h:315

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:607

llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition MachineMemOperand.h:137

llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition MachineMemOperand.h:139

llvm::MachineOperand::setReg
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Definition MachineOperand.cpp:60

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:372

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::MachineRegisterInfo::createGenericVirtualRegister
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Definition MachineRegisterInfo.cpp:191

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:20

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition SmallVector.h:676

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:419

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:80

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1205

llvm::TargetLoweringBase::getStackPointerRegisterToSaveRestore
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Definition TargetLowering.h:2076

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition TargetMachine.h:83

llvm::TargetMachine::getCodeModel
CodeModel::Model getCodeModel() const
Returns the code model.
Definition TargetMachine.h:264

llvm::Target
Target - Wrapper for Target specific information.
Definition TargetRegistry.h:147

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::details::FixedOrScalableQuantity::divideCoefficientBy
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::AArch64II::MO_NC
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
Definition AArch64BaseInfo.h:960

llvm::AArch64II::MO_PAGEOFF
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
Definition AArch64BaseInfo.h:924

llvm::AArch64II::MO_GOT
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
Definition AArch64BaseInfo.h:955

llvm::AArch64II::MO_PREL
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
Definition AArch64BaseInfo.h:979

llvm::AArch64II::MO_PAGE
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
Definition AArch64BaseInfo.h:919

llvm::AArch64II::MO_TAGGED
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
Definition AArch64BaseInfo.h:987

llvm::AArch64II::MO_G3
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
Definition AArch64BaseInfo.h:928

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Definition AMDGPUMetadata.h:183

llvm::ARM_MB::ST
@ ST
Definition ARMBaseInfo.h:73

llvm::CodeModel::Small
@ Small
Definition CodeGen.h:31

llvm::ISD::CTPOP
@ CTPOP
Definition ISDOpcodes.h:782

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::LegalityPredicates
Definition LegalizerInfo.h:206

llvm::LegalityPredicates::scalarOrEltWiderThan
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
Definition LegalityPredicates.cpp:178

llvm::LegalityPredicates::isPointerVector
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
Definition LegalityPredicates.cpp:104

llvm::LegalityPredicates::typeInSet
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
Definition LegalityPredicates.cpp:34

llvm::LegalityPredicates::smallerThan
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
Definition LegalityPredicates.cpp:134

llvm::LegalityPredicates::atomicOrderingAtLeastOrStrongerThan
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Definition LegalityPredicates.cpp:246

llvm::LegalityPredicates::any
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
Definition LegalizerInfo.h:249

llvm::LegalityPredicates::isVector
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Definition LegalityPredicates.cpp:84

llvm::LegalityPredicates::all
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
Definition LegalizerInfo.h:236

llvm::LegalityPredicates::typeIs
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
Definition LegalityPredicates.cpp:28

llvm::LegalityPredicates::scalarWiderThan
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
Definition LegalityPredicates.cpp:126

llvm::LegalityPredicates::scalarNarrowerThan
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
Definition LegalityPredicates.cpp:118

llvm::LegalizeActions
Definition LegalizerInfo.h:44

llvm::LegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition LegalizerInfo.h:75

llvm::LegalizeMutations
Definition LegalizerInfo.h:375

llvm::LegalizeMutations::moreElementsToNextPow2
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
Definition LegalizeMutations.cpp:95

llvm::LegalizeMutations::scalarize
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
Definition LegalizeMutations.cpp:106

llvm::LegalizeMutations::changeElementTo
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
Definition LegalizeMutations.cpp:29

llvm::LegalizeMutations::widenScalarOrEltToNextPow2
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
Definition LegalizeMutations.cpp:75

llvm::LegalizeMutations::changeTo
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
Definition LegalizeMutations.cpp:17

llvm::LegalizeMutations::changeElementSizeTo
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
Definition LegalizeMutations.cpp:65

llvm::MIPatternMatch
Definition MIPatternMatch.h:25

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition MIPatternMatch.h:311

llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition MIPatternMatch.h:102

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition MIPatternMatch.h:28

llvm::MIPatternMatch::m_GPtrAdd
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:570

llvm::NVPTX::Ordering
Ordering
Definition NVPTX.h:166

llvm::TargetOpcode
Invariant opcodes: All instruction sets have these as their low opcodes.
Definition TargetOpcodes.h:20

llvm::codeview::ExportFlags::IsData
@ IsData
Definition CodeView.h:466

llvm::dwarf::Index
Index
Definition Dwarf.h:903

llvm::logicalview::LVOutputKind::Split
@ Split
Definition LVOptions.h:145

llvm::sampleprof::Base
@ Base
Definition Discriminator.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26

llvm::Offset
@ Offset
Definition DWP.cpp:532

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::constrainSelectedInstRegOperands
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155

llvm::isConstantOrConstantSplatVector
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1566

llvm::LegalityPredicate
std::function< bool(const LegalityQuery &)> LegalityPredicate
Definition LegalizerInfo.h:202

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1131

llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition AtomicOrdering.h:56

llvm::AtomicOrdering::Monotonic
@ Monotonic
Definition AtomicOrdering.h:59

llvm::AtomicOrdering::Unordered
@ Unordered
Definition AtomicOrdering.h:58

llvm::AtomicOrdering::NotAtomic
@ NotAtomic
Definition AtomicOrdering.h:57

llvm::AtomicOrdering::AcquireRelease
@ AcquireRelease
Definition AtomicOrdering.h:63

llvm::AtomicOrdering::Acquire
@ Acquire
Definition AtomicOrdering.h:61

llvm::AtomicOrdering::Release
@ Release
Definition AtomicOrdering.h:62

llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
Definition AtomicOrdering.h:64

llvm::RecurKind::Add
@ Add
Sum of integers.
Definition IVDescriptors.h:37

llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::isShiftedInt
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182

llvm::BitWidth
constexpr unsigned BitWidth
Definition BitmaskEnum.h:219

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::getIConstantVRegValWithLookThrough
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947

llvm::BasicBlockSection::List
@ List
Definition TargetOptions.h:59

llvm::assumeAligned
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100

llvm::Log2
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::LegalityQuery
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Definition LegalizerInfo.h:111

llvm::LegalityQuery::MMODescrs
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
Definition LegalizerInfo.h:133

llvm::LegalityQuery::Types
ArrayRef< LLT > Types
Definition LegalizerInfo.h:113

llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition MachineMemOperand.h:42