diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 73226fd..f9c93a9 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -49,9 +49,7 @@ set_exponent(nir_builder *b, nir_ssa_def *src, nir_ssa_def *exp) /* The exponent is bits 52-62, or 20-30 of the high word, so set the exponent * to 1023 */ - nir_ssa_def *new_hi = nir_bitfield_insert(b, hi, exp, - nir_imm_int(b, 20), - nir_imm_int(b, 11)); + nir_ssa_def *new_hi = nir_bfi(b, nir_imm_int(b, 0x7ff00000), exp, hi); /* recombine */ return nir_pack_64_2x32_split(b, lo, new_hi); } diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 9de8168..6c36099 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -151,14 +151,6 @@ NVC0_CODEGEN_SOURCES := \ codegen/nv50_ir_target_nvc0.h NVC0_C_SOURCES := \ - nvc0/cla0c0qmd.h \ - nvc0/clc0c0qmd.h \ - nvc0/clc3c0qmd.h \ - nvc0/drf.h \ - nvc0/qmd.h \ - nvc0/qmda0c0.c \ - nvc0/qmdc0c0.c \ - nvc0/qmdc3c0.c \ nvc0/gm107_texture.xml.h \ nvc0/nvc0_3d.xml.h \ nvc0/nvc0_compute.c \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index d58c0d2..42ee969 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -67,10 +67,8 @@ enum operation OP_AND, OP_OR, OP_XOR, - OP_LOP3_LUT, OP_SHL, OP_SHR, - OP_SHF, OP_MAX, OP_MIN, OP_SAT, // CLAMP(f32, 0.0, 1.0) @@ -118,7 +116,6 @@ enum operation OP_PINTERP, OP_EMIT, // emit vertex OP_RESTART, // restart primitive - OP_FINAL, // finish emitting primitives OP_TEX, OP_TXB, // texture bias OP_TXL, // texure lod @@ -154,10 +151,7 @@ enum operation OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK OP_BFIND, // find highest/lowest set bit - OP_BREV, // bitfield reverse - OP_BMSK, // bitfield mask OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order) - OP_SGXT, OP_ATOM, OP_BAR, // execution barrier, sources = { id, thread count, predicate } OP_VADD, // byte/word vector operations @@ -173,7 +167,6 @@ enum operation OP_SHFL, // warp shuffle OP_VOTE, OP_BUFQ, // buffer query - OP_WARPSYNC, OP_LAST }; @@ -261,29 +254,11 @@ enum operation #define NV50_IR_SUBOP_VOTE_ALL 0 #define NV50_IR_SUBOP_VOTE_ANY 1 #define NV50_IR_SUBOP_VOTE_UNI 2 -#define NV50_IR_SUBOP_LOP3_LUT_SRC0 0xf0 -#define NV50_IR_SUBOP_LOP3_LUT_SRC1 0xcc -#define NV50_IR_SUBOP_LOP3_LUT_SRC2 0xaa -#define NV50_IR_SUBOP_LOP3_LUT(exp) ({ \ - uint8_t a = NV50_IR_SUBOP_LOP3_LUT_SRC0; \ - uint8_t b = NV50_IR_SUBOP_LOP3_LUT_SRC1; \ - uint8_t c = NV50_IR_SUBOP_LOP3_LUT_SRC2; \ - (uint8_t)(exp); \ -}) -#define NV50_IR_SUBOP_BMSK_C (0 << 0) -#define NV50_IR_SUBOP_BMSK_W (1 << 0) #define NV50_IR_SUBOP_MINMAX_LOW 1 #define NV50_IR_SUBOP_MINMAX_MED 2 #define NV50_IR_SUBOP_MINMAX_HIGH 3 -#define NV50_IR_SUBOP_SHF_L (0 << 0) -#define NV50_IR_SUBOP_SHF_R (1 << 0) -#define NV50_IR_SUBOP_SHF_LO (0 << 1) -#define NV50_IR_SUBOP_SHF_HI (1 << 1) -#define NV50_IR_SUBOP_SHF_C (0 << 2) -#define NV50_IR_SUBOP_SHF_W (1 << 2) - // xmad(src0, src1, 0) << 16 + src2 #define NV50_IR_SUBOP_XMAD_PSL (1 << 0) // (xmad(src0, src1, src2) & 0xffff) | (src1 << 16) @@ -925,7 +900,7 @@ public: uint16_t subOp; // quadop, 1 for mul-high, etc. - unsigned encSize : 5; // encoding size in bytes + unsigned encSize : 4; // encoding size in bytes unsigned saturate : 1; // to [0.0f, 1.0f] unsigned join : 1; // converge control flow (use OP_JOIN until end) unsigned fixed : 1; // prevent dead code elimination diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 63ea7f5..5dc0e24 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -29,8 +29,6 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" -struct nir_shader_compiler_options; - /* * This struct constitutes linkage information in TGSI terminology. * @@ -72,12 +70,10 @@ struct nv50_ir_prog_symbol uint32_t offset; }; -#define NVISA_GF100_CHIPSET 0xc0 #define NVISA_GK104_CHIPSET 0xe0 #define NVISA_GK20A_CHIPSET 0xea #define NVISA_GM107_CHIPSET 0x110 #define NVISA_GM200_CHIPSET 0x120 -#define NVISA_GV100_CHIPSET 0x140 struct nv50_ir_prog_info { @@ -204,9 +200,6 @@ struct nv50_ir_prog_info extern "C" { #endif -const struct nir_shader_compiler_options * -nv50_ir_nir_shader_compiler_options(int chipset); - extern int nv50_ir_generate_code(struct nv50_ir_prog_info *); extern void nv50_ir_relocate_code(void *relocData, uint32_t *code, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index dd8e1ab..e244bd0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -23,7 +23,6 @@ */ #include "codegen/nv50_ir_target_gm107.h" -#include "codegen/nv50_ir_sched_gm107.h" //#define GM107_DEBUG_SCHED_DATA @@ -171,7 +170,6 @@ private: void emitBFI(); void emitBFE(); void emitFLO(); - void emitPRMT(); void emitLDSTs(int, DataType); void emitLDSTc(int); @@ -2373,33 +2371,6 @@ CodeEmitterGM107::emitFLO() emitGPR (0x00, insn->def(0)); } -void -CodeEmitterGM107::emitPRMT() -{ - switch (insn->src(1).getFile()) { - case FILE_GPR: - emitInsn(0x5bc00000); - emitGPR (0x14, insn->src(1)); - break; - case FILE_MEMORY_CONST: - emitInsn(0x4bc00000); - emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); - break; - case FILE_IMMEDIATE: - emitInsn(0x36c00000); - emitIMMD(0x14, 19, insn->src(1)); - break; - default: - assert(!"bad src1 file"); - break; - } - - emitField(0x30, 3, insn->subOp); - emitGPR (0x27, insn->src(2)); - emitGPR (0x08, insn->src(0)); - emitGPR (0x00, insn->def(0)); -} - /******************************************************************************* * memory ******************************************************************************/ @@ -3566,9 +3537,6 @@ CodeEmitterGM107::emitInstruction(Instruction *i) case OP_BFIND: emitFLO(); break; - case OP_PERMT: - emitPRMT(); - break; case OP_SLCT: if (isFloatType(insn->dType)) emitFCMP(); @@ -3774,6 +3742,156 @@ CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const * sched data calculator ******************************************************************************/ +class SchedDataCalculatorGM107 : public Pass +{ +public: + SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {} + +private: + struct RegScores + { + struct ScoreData { + int r[256]; + int p[8]; + int c; + } rd, wr; + int base; + + void rebase(const int base) + { + const int delta = this->base - base; + if (!delta) + return; + this->base = 0; + + for (int i = 0; i < 256; ++i) { + rd.r[i] += delta; + wr.r[i] += delta; + } + for (int i = 0; i < 8; ++i) { + rd.p[i] += delta; + wr.p[i] += delta; + } + rd.c += delta; + wr.c += delta; + } + void wipe() + { + memset(&rd, 0, sizeof(rd)); + memset(&wr, 0, sizeof(wr)); + } + int getLatest(const ScoreData& d) const + { + int max = 0; + for (int i = 0; i < 256; ++i) + if (d.r[i] > max) + max = d.r[i]; + for (int i = 0; i < 8; ++i) + if (d.p[i] > max) + max = d.p[i]; + if (d.c > max) + max = d.c; + return max; + } + inline int getLatestRd() const + { + return getLatest(rd); + } + inline int getLatestWr() const + { + return getLatest(wr); + } + inline int getLatest() const + { + return MAX2(getLatestRd(), getLatestWr()); + } + void setMax(const RegScores *that) + { + for (int i = 0; i < 256; ++i) { + rd.r[i] = MAX2(rd.r[i], that->rd.r[i]); + wr.r[i] = MAX2(wr.r[i], that->wr.r[i]); + } + for (int i = 0; i < 8; ++i) { + rd.p[i] = MAX2(rd.p[i], that->rd.p[i]); + wr.p[i] = MAX2(wr.p[i], that->wr.p[i]); + } + rd.c = MAX2(rd.c, that->rd.c); + wr.c = MAX2(wr.c, that->wr.c); + } + void print(int cycle) + { + for (int i = 0; i < 256; ++i) { + if (rd.r[i] > cycle) + INFO("rd $r%i @ %i\n", i, rd.r[i]); + if (wr.r[i] > cycle) + INFO("wr $r%i @ %i\n", i, wr.r[i]); + } + for (int i = 0; i < 8; ++i) { + if (rd.p[i] > cycle) + INFO("rd $p%i @ %i\n", i, rd.p[i]); + if (wr.p[i] > cycle) + INFO("wr $p%i @ %i\n", i, wr.p[i]); + } + if (rd.c > cycle) + INFO("rd $c @ %i\n", rd.c); + if (wr.c > cycle) + INFO("wr $c @ %i\n", wr.c); + } + }; + + RegScores *score; // for current BB + std::vector scoreBoards; + + const TargetGM107 *targ; + bool visit(Function *); + bool visit(BasicBlock *); + + void commitInsn(const Instruction *, int); + int calcDelay(const Instruction *, int) const; + void setDelay(Instruction *, int, const Instruction *); + void recordWr(const Value *, int, int); + void checkRd(const Value *, int, int&) const; + + inline void emitYield(Instruction *); + inline void emitStall(Instruction *, uint8_t); + inline void emitReuse(Instruction *, uint8_t); + inline void emitWrDepBar(Instruction *, uint8_t); + inline void emitRdDepBar(Instruction *, uint8_t); + inline void emitWtDepBar(Instruction *, uint8_t); + + inline int getStall(const Instruction *) const; + inline int getWrDepBar(const Instruction *) const; + inline int getRdDepBar(const Instruction *) const; + inline int getWtDepBar(const Instruction *) const; + + void setReuseFlag(Instruction *); + + inline void printSchedInfo(int, const Instruction *) const; + + struct LiveBarUse { + LiveBarUse(Instruction *insn, Instruction *usei) + : insn(insn), usei(usei) { } + Instruction *insn; + Instruction *usei; + }; + + struct LiveBarDef { + LiveBarDef(Instruction *insn, Instruction *defi) + : insn(insn), defi(defi) { } + Instruction *insn; + Instruction *defi; + }; + + bool insertBarriers(BasicBlock *); + + bool doesInsnWriteTo(const Instruction *insn, const Value *val) const; + Instruction *findFirstUse(const Instruction *) const; + Instruction *findFirstDef(const Instruction *) const; + + bool needRdDepBar(const Instruction *) const; + bool needWrDepBar(const Instruction *) const; +}; + inline void SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt) { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp deleted file mode 100644 index ef33743..0000000 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp +++ /dev/null @@ -1,2052 +0,0 @@ -/* - * Copyright 2020 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#include "codegen/nv50_ir_emit_gv100.h" -#include "codegen/nv50_ir_sched_gm107.h" - -namespace nv50_ir { - -/******************************************************************************* - * instruction format helpers - ******************************************************************************/ - -#define FA_NODEF (1 << 0) -#define FA_RRR (1 << 1) -#define FA_RRI (1 << 2) -#define FA_RRC (1 << 3) -#define FA_RIR (1 << 4) -#define FA_RCR (1 << 5) - -#define FA_SRC_MASK 0x0ff -#define FA_SRC_NEG 0x100 -#define FA_SRC_ABS 0x200 - -#define EMPTY -1 -#define __(a) (a) // no source modifiers -#define _A(a) ((a) | FA_SRC_ABS) -#define N_(a) ((a) | FA_SRC_NEG) -#define NA(a) ((a) | FA_SRC_NEG | FA_SRC_ABS) - -void -CodeEmitterGV100::emitFormA_I32(int src) -{ - emitIMMD(32, 32, insn->src(src)); - if (insn->src(src).mod.abs()) - code[1] &= 0x7fffffff; - if (insn->src(src).mod.neg()) - code[1] ^= 0x80000000; -} - -void -CodeEmitterGV100::emitFormA_RRC(uint16_t op, int src1, int src2) -{ - emitInsn(op); - if (src1 >= 0) { - emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG)); - emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS)); - emitGPR (64, insn->src(src1 & FA_SRC_MASK)); - } - if (src2 >= 0) { - emitNEG (63, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG)); - emitABS (62, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS)); - emitCBUF(54, -1, 38, 0, 2, insn->src(src2 & FA_SRC_MASK)); - } -} - -void -CodeEmitterGV100::emitFormA_RRI(uint16_t op, int src1, int src2) -{ - emitInsn(op); - if (src1 >= 0) { - emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG)); - emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS)); - emitGPR (64, insn->src(src1 & FA_SRC_MASK)); - } - if (src2 >= 0) - emitFormA_I32(src2 & FA_SRC_MASK); -} - -void -CodeEmitterGV100::emitFormA_RRR(uint16_t op, int src1, int src2) -{ - emitInsn(op); - if (src2 >= 0) { - emitNEG (75, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG)); - emitABS (74, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS)); - emitGPR (64, insn->src(src2 & FA_SRC_MASK)); - } - - if (src1 >= 0) { - emitNEG (63, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG)); - emitABS (62, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS)); - emitGPR (32, insn->src(src1 & FA_SRC_MASK)); - } -} - -void -CodeEmitterGV100::emitFormA(uint16_t op, uint8_t forms, - int src0, int src1, int src2) -{ - switch ((src1 < 0) ? FILE_GPR : insn->src(src1 & FA_SRC_MASK).getFile()) { - case FILE_GPR: - switch ((src2 < 0) ? FILE_GPR : insn->src(src2 & FA_SRC_MASK).getFile()) { - case FILE_GPR: - assert(forms & FA_RRR); - emitFormA_RRR((1 << 9) | op, src1, src2); - break; - case FILE_IMMEDIATE: - assert(forms & FA_RRI); - emitFormA_RRI((2 << 9) | op, src1, src2); - break; - case FILE_MEMORY_CONST: - assert(forms & FA_RRC); - emitFormA_RRC((3 << 9) | op, src1, src2); - break; - default: - assert(!"bad src2 file"); - break; - } - break; - case FILE_IMMEDIATE: - assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR); - assert(forms & FA_RIR); - emitFormA_RRI((4 << 9) | op, src2, src1); - break; - case FILE_MEMORY_CONST: - assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR); - assert(forms & FA_RCR); - emitFormA_RRC((5 << 9) | op, src2, src1); - break; - default: - assert(!"bad src1 file"); - break; - } - - if (src0 >= 0) { - assert(insn->src(src0 & FA_SRC_MASK).getFile() == FILE_GPR); - emitABS(73, (src0 & FA_SRC_MASK), (src0 & FA_SRC_ABS)); - emitNEG(72, (src0 & FA_SRC_MASK), (src0 & FA_SRC_NEG)); - emitGPR(24, insn->src(src0 & FA_SRC_MASK)); - } - - if (!(forms & FA_NODEF)) - emitGPR(16, insn->def(0)); -} - -/******************************************************************************* - * control - ******************************************************************************/ - -void -CodeEmitterGV100::emitBRA() -{ - const FlowInstruction *insn = this->insn->asFlow(); - int64_t target = ((int64_t)insn->target.bb->binPos - (codeSize + 0x10)) / 4; - - assert(!insn->indirect && !insn->absolute); - - emitInsn (0x947); - emitField(34, 48, target); - emitPRED (87); - emitField(86, 2, 0); // ./.INC/.DEC -} - -void -CodeEmitterGV100::emitEXIT() -{ - emitInsn (0x94d); - emitNOT (90); - emitPRED (87); - emitField(85, 1, 0); // .NO_ATEXIT - emitField(84, 2, 0); // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3 -} - -void -CodeEmitterGV100::emitKILL() -{ - emitInsn(0x95b); - emitPRED(87); -} - -void -CodeEmitterGV100::emitNOP() -{ - emitInsn(0x918); -} - -void -CodeEmitterGV100::emitWARPSYNC() -{ - emitFormA(0x148, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); - emitNOT (90); - emitPRED (87); -} - -/******************************************************************************* - * movement / conversion - ******************************************************************************/ - -void -CodeEmitterGV100::emitCS2R() -{ - emitInsn(0x805); - emitSYS (72, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitF2F() -{ - if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8) - emitFormA(0x104, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); - else - emitFormA(0x110, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); - emitField(84, 2, util_logbase2(typeSizeof(insn->sType))); - emitFMZ (80, 1); - emitRND (78); - emitField(75, 2, util_logbase2(typeSizeof(insn->dType))); - emitField(60, 2, insn->subOp); // ./.H1/.INVALID2/.INVALID3 -} - -void -CodeEmitterGV100::emitF2I() -{ - if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8) - emitFormA(0x105, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); - else - emitFormA(0x111, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); - emitField(84, 2, util_logbase2(typeSizeof(insn->sType))); - emitFMZ (80, 1); - emitRND (78); - emitField(77, 1, 0); // .NTZ - emitField(75, 2, util_logbase2(typeSizeof(insn->dType))); - emitField(72, 1, isSignedType(insn->dType)); -} - -void -CodeEmitterGV100::emitFRND() -{ - int subop = 0; - - switch (insn->op) { - case OP_CVT: - switch (insn->rnd) { - case ROUND_NI: subop = 0; break; - case ROUND_MI: subop = 1; break; - case ROUND_PI: subop = 2; break; - case ROUND_ZI: subop = 3; break; - default: - assert(!"invalid FRND mode"); - break; - } - break; - case OP_FLOOR: subop = 1; break; - case OP_CEIL : subop = 2; break; - case OP_TRUNC: subop = 3; break; - default: - assert(!"invalid FRND opcode"); - break; - } - - if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8) - emitFormA(0x107, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); - else - emitFormA(0x113, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); - emitField(84, 2, util_logbase2(typeSizeof(insn->sType))); - emitFMZ (80, 1); - emitField(78, 2, subop); - emitField(75, 2, util_logbase2(typeSizeof(insn->dType))); -} - -void -CodeEmitterGV100::emitI2F() -{ - if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8) - emitFormA(0x106, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); - else - emitFormA(0x112, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); - emitField(84, 2, util_logbase2(typeSizeof(insn->sType))); - emitRND (78); - emitField(75, 2, util_logbase2(typeSizeof(insn->dType))); - emitField(74, 1, isSignedType(insn->sType)); - if (typeSizeof(insn->sType) == 2) - emitField(60, 2, insn->subOp >> 1); - else - emitField(60, 2, insn->subOp); // ./.B1/.B2/.B3 -} - -void -CodeEmitterGV100::emitMOV() -{ - switch (insn->def(0).getFile()) { - case FILE_GPR: - switch (insn->src(0).getFile()) { - case FILE_GPR: - case FILE_MEMORY_CONST: - case FILE_IMMEDIATE: - emitFormA(0x002, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); - emitField(72, 4, insn->lanes); - break; - case FILE_PREDICATE: - emitInsn (0x807); - emitGPR (16, insn->def(0)); - emitGPR (24); - emitField(32, 32, 0xffffffff); - emitField(90, 1, 1); - emitPRED (87, insn->src(0)); - break; - default: - assert(!"bad src file"); - break; - } - break; - case FILE_PREDICATE: - emitInsn (0x20c); - emitPRED (87); - emitPRED (84); - emitNOT (71); - emitPRED (68); - emitPRED (81, insn->def(0)); - emitCond3(76, CC_NE); - emitGPR (24, insn->src(0)); - emitGPR (32); - break; - default: - assert(!"bad dst file"); - break; - } -} - -void -CodeEmitterGV100::emitPRMT() -{ - emitFormA(0x016, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2)); - emitField(72, 3, insn->subOp); -} - -void -CodeEmitterGV100::emitS2R() -{ - emitInsn(0x919); - emitSYS (72, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -static void -selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) -{ - int loc = entry->loc; - if (data.force_persample_interp) - code[loc + 2] |= 1 << 26; - else - code[loc + 2] &= ~(1 << 26); -} - -void -CodeEmitterGV100::emitSEL() -{ - emitFormA(0x007, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY); - emitNOT (90, insn->src(2)); - emitPRED (87, insn->src(2)); - if (insn->subOp == 1) - addInterp(0, 0, selpFlip); -} - -void -CodeEmitterGV100::emitSHFL() -{ - switch (insn->src(1).getFile()) { - case FILE_GPR: - switch (insn->src(2).getFile()) { - case FILE_GPR: - emitInsn(0x389); - emitGPR (64, insn->src(2)); - break; - case FILE_IMMEDIATE: - emitInsn(0x589); - emitIMMD(40, 13, insn->src(2)); - break; - default: - assert(!"bad src2 file"); - break; - } - emitGPR(32, insn->src(1)); - break; - case FILE_IMMEDIATE: - switch (insn->src(2).getFile()) { - case FILE_GPR: - emitInsn(0x989); - emitGPR (64, insn->src(2)); - break; - case FILE_IMMEDIATE: - emitInsn(0xf89); - emitIMMD(40, 13, insn->src(2)); - break; - default: - assert(!"bad src2 file"); - break; - } - emitIMMD(53, 5, insn->src(1)); - break; - default: - assert(!"bad src1 file"); - break; - } - - if (insn->defExists(1)) - emitPRED(81, insn->def(1)); - else - emitPRED(81); - - emitField(58, 2, insn->subOp); - emitGPR (24, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -/******************************************************************************* - * fp32 - ******************************************************************************/ - -void -CodeEmitterGV100::emitFADD() -{ - if (insn->src(1).getFile() == FILE_GPR) - emitFormA(0x021, FA_RRR , NA(0), NA(1), EMPTY); - else - emitFormA(0x021, FA_RRI | FA_RRC, NA(0), EMPTY, NA(1)); - emitFMZ (80, 1); - emitRND (78); - emitSAT (77); -} - -void -CodeEmitterGV100::emitFFMA() -{ - emitFormA(0x023, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2)); - emitField(80, 1, insn->ftz); - emitRND (78); - emitSAT (77); - emitField(76, 1, insn->dnz); -} - -void -CodeEmitterGV100::emitFMNMX() -{ - emitFormA(0x009, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); - emitField(90, 1, insn->op == OP_MAX); - emitPRED (87); - emitFMZ (80, 1); -} - -void -CodeEmitterGV100::emitFMUL() -{ - emitFormA(0x020, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); - emitField(80, 1, insn->ftz); - emitPDIV (84); - emitRND (78); - emitSAT (77); - emitField(76, 1, insn->dnz); -} - -void -CodeEmitterGV100::emitFSET_BF() -{ - const CmpInstruction *insn = this->insn->asCmp(); - - emitFormA(0x00a, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); - emitFMZ (80, 1); - emitCond4(76, insn->setCond); - - if (insn->op != OP_SET) { - switch (insn->op) { - case OP_SET_AND: emitField(74, 2, 0); break; - case OP_SET_OR : emitField(74, 2, 1); break; - case OP_SET_XOR: emitField(74, 2, 2); break; - default: - assert(!"invalid set op"); - break; - } - emitNOT (90, insn->src(2)); - emitPRED(87, insn->src(2)); - } else { - emitPRED(87); - } -} - -void -CodeEmitterGV100::emitFSETP() -{ - const CmpInstruction *insn = this->insn->asCmp(); - - emitFormA(0x00b, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); - emitFMZ (80, 1); - emitCond4(76, insn->setCond); - - if (insn->op != OP_SET) { - switch (insn->op) { - case OP_SET_AND: emitField(74, 2, 0); break; - case OP_SET_OR : emitField(74, 2, 1); break; - case OP_SET_XOR: emitField(74, 2, 2); break; - default: - assert(!"invalid set op"); - break; - } - emitNOT (90, insn->src(2)); - emitPRED(87, insn->src(2)); - } else { - emitPRED(87); - } - - if (insn->defExists(1)) - emitPRED(84, insn->def(1)); - else - emitPRED(84); - emitPRED(81, insn->def(0)); -} - -void -CodeEmitterGV100::emitFSWZADD() -{ - uint8_t subOp = 0; - - // NP/PN swapped vs SM60 - for (int i = 0; i < 4; i++) { - uint8_t p = ((insn->subOp >> (i * 2)) & 3); - if (p == 1 || p == 2) - p ^= 3; - subOp |= p << (i * 2); - } - - emitInsn (0x822); - emitFMZ (80, 1); - emitRND (78); - emitField(77, 1, insn->lanes); /* abused for .ndv */ - emitGPR (64, insn->src(1)); - emitField(32, 8, subOp); - emitGPR (24, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitMUFU() -{ - int mufu = 0; - - switch (insn->op) { - case OP_COS : mufu = 0; break; - case OP_SIN : mufu = 1; break; - case OP_EX2 : mufu = 2; break; - case OP_LG2 : mufu = 3; break; - case OP_RCP : mufu = 4 + 2 * insn->subOp; break; - case OP_RSQ : mufu = 5 + 2 * insn->subOp; break; - case OP_SQRT: mufu = 8; break; - default: - assert(!"invalid mufu"); - break; - } - - emitFormA(0x108, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); - emitField(74, 4, mufu); -} - -/******************************************************************************* - * fp64 - ******************************************************************************/ - -void -CodeEmitterGV100::emitDADD() -{ - emitFormA(0x029, FA_RRR | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1)); - emitRND(78); -} - -void -CodeEmitterGV100::emitDFMA() -{ - emitFormA(0x02b, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2)); - emitRND(78); -} - -void -CodeEmitterGV100::emitDMUL() -{ - emitFormA(0x028, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); - emitRND(78); -} - -void -CodeEmitterGV100::emitDSETP() -{ - const CmpInstruction *insn = this->insn->asCmp(); - - if (insn->src(1).getFile() == FILE_GPR) - emitFormA(0x02a, FA_NODEF | FA_RRR , NA(0), NA(1), EMPTY); - else - emitFormA(0x02a, FA_NODEF | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1)); - - if (insn->op != OP_SET) { - switch (insn->op) { - case OP_SET_AND: emitField(74, 2, 0); break; - case OP_SET_OR : emitField(74, 2, 1); break; - case OP_SET_XOR: emitField(74, 2, 2); break; - default: - assert(!"invalid set op"); - break; - } - emitNOT (90, insn->src(2)); - emitPRED(87, insn->src(2)); - } else { - emitPRED(87); - } - - if (insn->defExists(1)) - emitPRED(84, insn->def(1)); - else - emitPRED(84); - emitPRED (81, insn->def(0)); - emitCond4(76, insn->setCond); -} - -/******************************************************************************* - * integer - ******************************************************************************/ - -void -CodeEmitterGV100::emitBMSK() -{ - emitFormA(0x01b, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY); - emitField(75, 1, insn->subOp); // .C/.W -} - -void -CodeEmitterGV100::emitBREV() -{ - emitFormA(0x101, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); -} - -void -CodeEmitterGV100::emitFLO() -{ - emitFormA(0x100, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); - emitPRED (81); - emitField(74, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT); - emitField(73, 1, isSignedType(insn->dType)); - emitNOT (63, insn->src(0)); -} - -void -CodeEmitterGV100::emitIABS() -{ - emitFormA(0x013, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); -} - -void -CodeEmitterGV100::emitIADD3() -{ -// emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), N_(2)); - emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), EMPTY); - emitGPR (64); //XXX: fix when switching back to N_(2) - emitPRED (84, NULL); // .CC1 - emitPRED (81, insn->flagsDef >= 0 ? insn->getDef(insn->flagsDef) : NULL); - if (insn->flagsSrc >= 0) { - emitField(74, 1, 1); // .X - emitPRED (87, insn->getSrc(insn->flagsSrc)); - emitField(77, 4, 0xf); // .X1 - } -} - -void -CodeEmitterGV100::emitIMAD() -{ - emitFormA(0x024, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2)); - emitField(73, 1, isSignedType(insn->sType)); -} - -void -CodeEmitterGV100::emitIMAD_WIDE() -{ - emitFormA(0x025, FA_RRR | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2)); - emitPRED (81); - emitField(73, 1, isSignedType(insn->sType)); -} - -void -CodeEmitterGV100::emitISETP() -{ - const CmpInstruction *insn = this->insn->asCmp(); - - emitFormA(0x00c, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY); - - if (insn->op != OP_SET) { - switch (insn->op) { - case OP_SET_AND: emitField(74, 2, 0); break; - case OP_SET_OR : emitField(74, 2, 1); break; - case OP_SET_XOR: emitField(74, 2, 2); break; - default: - assert(!"invalid set op"); - break; - } - emitNOT (90, insn->src(2)); - emitPRED(87, insn->src(2)); - } else { - emitPRED(87); - } - - //XXX: CC->pred - if (insn->flagsSrc >= 0) { - assert(0); - emitField(68, 4, 6); - } else { - emitNOT (71); - if (!insn->subOp) - emitPRED(68); - } - - if (insn->defExists(1)) - emitPRED(84, insn->def(1)); - else - emitPRED(84); - emitPRED (81, insn->def(0)); - emitCond3(76, insn->setCond); - emitField(73, 1, isSignedType(insn->sType)); - - if (insn->subOp) { // .EX - assert(0); - emitField(72, 1, 1); - emitPRED (68, insn->srcExists(3) ? insn->src(3) : insn->src(2)); - } -} - -void -CodeEmitterGV100::emitLEA() -{ - assert(insn->src(1).get()->asImm()); - - emitFormA(0x011, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(2), EMPTY); - emitPRED (81); - emitIMMD (75, 5, insn->src(1)); - emitGPR (64); -} - -void -CodeEmitterGV100::emitLOP3_LUT() -{ - emitFormA(0x012, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), __(2)); - emitField(90, 1, 1); - emitPRED (87); - emitPRED (81); - emitField(80, 1, 0); // .PAND - emitField(72, 8, insn->subOp); -} - -void -CodeEmitterGV100::emitPOPC() -{ - emitFormA(0x109, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); - emitNOT (63, insn->src(0)); -} - -void -CodeEmitterGV100::emitSGXT() -{ - emitFormA(0x01a, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY); - emitField(75, 1, 0); // .W - emitField(73, 1, 1); // /.U32 -} - -void -CodeEmitterGV100::emitSHF() -{ - emitFormA(0x019, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2)); - emitField(80, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_HI)); - emitField(76, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_R)); - emitField(75, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_W)); - - switch (insn->sType) { - case TYPE_S64: emitField(73, 2, 0); break; - case TYPE_U64: emitField(73, 2, 1); break; - case TYPE_S32: emitField(73, 2, 2); break; - case TYPE_U32: - default: - emitField(73, 2, 3); - break; - } -} - -/******************************************************************************* - * load/stores - ******************************************************************************/ - -void -CodeEmitterGV100::emitALD() -{ - emitInsn (0x321); - emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1); - emitGPR (32, insn->src(0).getIndirect(1)); - emitO (79); - emitP (76); - emitADDR (24, 40, 10, 0, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitAST() -{ - emitInsn (0x322); - emitField(74, 2, (typeSizeof(insn->dType) / 4) - 1); - emitGPR (64, insn->src(0).getIndirect(1)); - emitP (76); - emitADDR (24, 40, 10, 0, insn->src(0)); - emitGPR (32, insn->src(1)); -} - -void -CodeEmitterGV100::emitATOM() -{ - unsigned subOp, dType; - - if (insn->subOp != NV50_IR_SUBOP_ATOM_CAS) { - emitInsn(0x38a); - - if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) - subOp = 8; - else - subOp = insn->subOp; - emitField(87, 4, subOp); - - switch (insn->dType) { - case TYPE_U32 : dType = 0; break; - case TYPE_S32 : dType = 1; break; - case TYPE_U64 : dType = 2; break; - case TYPE_F32 : dType = 3; break; - case TYPE_B128: dType = 4; break; - case TYPE_S64 : dType = 5; break; - default: - assert(!"unexpected dType"); - dType = 0; - break; - } - emitField(73, 3, dType); - } else { - emitInsn(0x38b); - - switch (insn->dType) { - case TYPE_U32: dType = 0; break; - case TYPE_U64: dType = 2; break; - default: - assert(!"unexpected dType"); - dType = 0; - break; - } - emitField(73, 3, dType); - emitGPR (64, insn->src(2)); - } - - emitPRED (81); - emitField(79, 2, 1); - emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); - emitGPR (32, insn->src(1)); - emitADDR (24, 40, 24, 0, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitATOMS() -{ - unsigned dType, subOp; - - if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { - switch (insn->dType) { - case TYPE_U32: dType = 0; break; - case TYPE_S32: dType = 1; break; - case TYPE_U64: dType = 2; break; - default: assert(!"unexpected dType"); dType = 0; break; - } - - emitInsn (0x38d); - emitField(87, 1, 0); // ATOMS.CAS/ATOMS.CAST - emitField(73, 2, dType); - emitGPR (64, insn->src(2)); - } else { - emitInsn(0x38c); - - if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) - subOp = 8; - else - subOp = insn->subOp; - emitField(87, 4, subOp); - - switch (insn->dType) { - case TYPE_U32: dType = 0; break; - case TYPE_S32: dType = 1; break; - case TYPE_U64: dType = 2; break; - default: assert(!"unexpected dType"); dType = 0; break; - } - - emitField(73, 2, dType); - } - - emitGPR (32, insn->src(1)); - emitADDR (24, 40, 24, 0, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -static void -interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) -{ - int ipa = entry->ipa; - int loc = entry->loc; - - if (data.force_persample_interp && - (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && - (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { - ipa |= NV50_IR_INTERP_CENTROID; - } - - int sample; - switch (ipa & NV50_IR_INTERP_SAMPLE_MASK) { - case NV50_IR_INTERP_DEFAULT : sample = 0; break; - case NV50_IR_INTERP_CENTROID: sample = 1; break; - case NV50_IR_INTERP_OFFSET : sample = 2; break; - default: assert(!"invalid sample mode"); - } - - int interp; - switch (ipa & NV50_IR_INTERP_MODE_MASK) { - case NV50_IR_INTERP_LINEAR : - case NV50_IR_INTERP_PERSPECTIVE: interp = 0; break; - case NV50_IR_INTERP_FLAT : interp = 1; break; - case NV50_IR_INTERP_SC : interp = 2; break; - default: assert(!"invalid ipa mode"); - } - - code[loc + 2] &= ~(0xf << 12); - code[loc + 2] |= sample << 12; - code[loc + 2] |= interp << 14; -} - -void -CodeEmitterGV100::emitIPA() -{ - emitInsn (0x326); - emitPRED (81, insn->defExists(1) ? insn->def(1) : NULL); - - switch (insn->getInterpMode()) { - case NV50_IR_INTERP_LINEAR : - case NV50_IR_INTERP_PERSPECTIVE: emitField(78, 2, 0); break; - case NV50_IR_INTERP_FLAT : emitField(78, 2, 1); break; - case NV50_IR_INTERP_SC : emitField(78, 2, 2); break; - default: - assert(!"invalid ipa mode"); - break; - } - - switch (insn->getSampleMode()) { - case NV50_IR_INTERP_DEFAULT : emitField(76, 2, 0); break; - case NV50_IR_INTERP_CENTROID: emitField(76, 2, 1); break; - case NV50_IR_INTERP_OFFSET : emitField(76, 2, 2); break; - default: - assert(!"invalid sample mode"); - break; - } - - if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) { - emitGPR (32); - addInterp(insn->ipa, 0xff, interpApply); - } else { - emitGPR (32, insn->src(1)); - addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply); - } - - assert(!insn->src(0).isIndirect(0)); - emitADDR (-1, 64, 8, 2, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitISBERD() -{ - emitInsn(0x923); - emitGPR (24, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitLDSTc(int posm, int poso) -{ - int mode = 0; - int order = 1; - - switch (insn->cache) { - case CACHE_CA: mode = 0; order = 1; break; - case CACHE_CG: mode = 2; order = 2; break; - case CACHE_CV: mode = 3; order = 2; break; - default: - assert(!"invalid caching mode"); - break; - } - - emitField(poso, 2, order); - emitField(posm, 2, mode); -} - -void -CodeEmitterGV100::emitLDSTs(int pos, DataType type) -{ - int data = 0; - - switch (typeSizeof(type)) { - case 1: data = isSignedType(type) ? 1 : 0; break; - case 2: data = isSignedType(type) ? 3 : 2; break; - case 4: data = 4; break; - case 8: data = 5; break; - case 16: data = 6; break; - default: - assert(!"bad type"); - break; - } - - emitField(pos, 3, data); -} - -void -CodeEmitterGV100::emitLD() -{ - emitInsn (0x980); - emitField(79, 2, 2); // .CONSTANT/./.STRONG/.MMIO - emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS - emitLDSTs(73, insn->dType); - emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); - emitADDR (24, 32, 32, 0, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitLDC() -{ - emitFormA(0x182, FA_RCR, EMPTY, __(0), EMPTY); - emitField(78, 2, insn->subOp); - emitLDSTs(73, insn->dType); - emitGPR (24, insn->src(0).getIndirect(0)); -} - -void -CodeEmitterGV100::emitLDL() -{ - emitInsn (0x983); - emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7 - emitLDSTs(73, insn->dType); - emitADDR (24, 40, 24, 0, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitLDS() -{ - emitInsn (0x984); - emitLDSTs(73, insn->dType); - emitADDR (24, 40, 24, 0, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitOUT() -{ - const int cut = insn->op == OP_RESTART || insn->subOp; - const int emit = insn->op == OP_EMIT; - - if (insn->op != OP_FINAL) - emitFormA(0x124, FA_RRR | FA_RIR, __(0), __(1), EMPTY); - else - emitFormA(0x124, FA_RRR | FA_RIR, __(0), EMPTY, EMPTY); - emitField(78, 2, (cut << 1) | emit); -} - -void -CodeEmitterGV100::emitRED() -{ - unsigned dType; - - switch (insn->dType) { - case TYPE_U32: dType = 0; break; - case TYPE_S32: dType = 1; break; - case TYPE_U64: dType = 2; break; - case TYPE_F32: dType = 3; break; - case TYPE_B128: dType = 4; break; - case TYPE_S64: dType = 5; break; - default: assert(!"unexpected dType"); dType = 0; break; - } - - emitInsn (0x98e); - emitField(87, 3, insn->subOp); - emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA - emitField(79, 2, 2); // .INVALID0/./.STRONG/.INVALID3 - emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS - emitField(73, 3, dType); - emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); - emitGPR (32, insn->src(1)); - emitADDR (24, 40, 24, 0, insn->src(0)); -} - -void -CodeEmitterGV100::emitST() -{ - emitInsn (0x385); - emitField(79, 2, 2); // .INVALID0/./.STRONG/.MMIO - emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS - emitLDSTs(73, insn->dType); - emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); - emitGPR (64, insn->src(1)); - emitADDR (24, 32, 32, 0, insn->src(0)); -} - -void -CodeEmitterGV100::emitSTL() -{ - emitInsn (0x387); - emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7 - emitLDSTs(73, insn->dType); - emitADDR (24, 40, 24, 0, insn->src(0)); - emitGPR (32, insn->src(1)); -} - -void -CodeEmitterGV100::emitSTS() -{ - emitInsn (0x388); - emitLDSTs(73, insn->dType); - emitADDR (24, 40, 24, 0, insn->src(0)); - emitGPR (32, insn->src(1)); -} - -/******************************************************************************* - * texture - ******************************************************************************/ - -void -CodeEmitterGV100::emitTEXs(int pos) -{ - int src1 = insn->predSrc == 1 ? 2 : 1; - if (insn->srcExists(src1)) - emitGPR(pos, insn->src(src1)); - else - emitGPR(pos); -} - -void -CodeEmitterGV100::emitTEX() -{ - const TexInstruction *insn = this->insn->asTex(); - int lodm = 0; - - if (!insn->tex.levelZero) { - switch (insn->op) { - case OP_TEX: lodm = 0; break; - case OP_TXB: lodm = 2; break; - case OP_TXL: lodm = 3; break; - default: - assert(!"invalid tex op"); - break; - } - } else { - lodm = 1; - } - - if (insn->tex.rIndirectSrc < 0) { - emitInsn (0xb60); - emitField(54, 5, prog->driver->io.auxCBSlot); - emitField(40, 14, insn->tex.r); - } else { - emitInsn (0x361); - emitField(59, 1, 1); // .B - } - emitField(90, 1, insn->tex.liveOnly); // .NODEP - emitField(87, 3, lodm); - emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA - emitField(78, 1, insn->tex.target.isShadow()); // .DC - emitField(77, 1, insn->tex.derivAll); // .NDV - emitField(76, 1, insn->tex.useOffsets == 1); // .AOFFI - emitPRED (81); - emitGPR (64, insn->def(1)); - emitGPR (16, insn->def(0)); - emitGPR (24, insn->src(0)); - emitTEXs (32); - emitField(63, 1, insn->tex.target.isArray()); - emitField(61, 2, insn->tex.target.isCube() ? 3 : - insn->tex.target.getDim() - 1); - emitField(72, 4, insn->tex.mask); -} - -void -CodeEmitterGV100::emitTLD() -{ - const TexInstruction *insn = this->insn->asTex(); - - if (insn->tex.rIndirectSrc < 0) { - emitInsn (0xb66); - emitField(54, 5, prog->driver->io.auxCBSlot); - emitField(40, 14, insn->tex.r); - } else { - emitInsn (0x367); - emitField(59, 1, 1); // .B - } - emitField(90, 1, insn->tex.liveOnly); - emitField(87, 3, insn->tex.levelZero ? 1 /* .LZ */ : 3 /* .LL */); - emitPRED (81); - emitField(78, 1, insn->tex.target.isMS()); - emitField(76, 1, insn->tex.useOffsets == 1); - emitField(72, 4, insn->tex.mask); - emitGPR (64, insn->def(1)); - emitField(63, 1, insn->tex.target.isArray()); - emitField(61, 2, insn->tex.target.isCube() ? 3 : - insn->tex.target.getDim() - 1); - emitTEXs (32); - emitGPR (24, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitTLD4() -{ - const TexInstruction *insn = this->insn->asTex(); - - int offsets = 0; - switch (insn->tex.useOffsets) { - case 4: offsets = 2; break; - case 1: offsets = 1; break; - case 0: offsets = 0; break; - default: assert(!"invalid offsets count"); break; - } - - if (insn->tex.rIndirectSrc < 0) { - emitInsn (0xb63); - emitField(54, 5, prog->driver->io.auxCBSlot); - emitField(40, 14, insn->tex.r); - } else { - emitInsn (0x364); - emitField(59, 1, 1); // .B - } - emitField(90, 1, insn->tex.liveOnly); - emitField(87, 2, insn->tex.gatherComp); - emitField(84, 1, 1); // !.EF - emitPRED (81); - emitField(78, 1, insn->tex.target.isShadow()); - emitField(76, 2, offsets); - emitField(72, 4, insn->tex.mask); - emitGPR (64, insn->def(1)); - emitField(63, 1, insn->tex.target.isArray()); - emitField(61, 2, insn->tex.target.isCube() ? 3 : - insn->tex.target.getDim() - 1); - emitTEXs (32); - emitGPR (24, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitTMML() -{ - const TexInstruction *insn = this->insn->asTex(); - - if (insn->tex.rIndirectSrc < 0) { - emitInsn (0xb69); - emitField(54, 5, prog->driver->io.auxCBSlot); - emitField(40, 14, insn->tex.r); - } else { - emitInsn (0x36a); - emitField(59, 1, 1); // .B - } - emitField(90, 1, insn->tex.liveOnly); - emitField(77, 1, insn->tex.derivAll); - emitField(72, 4, insn->tex.mask); - emitGPR (64, insn->def(1)); - emitField(63, 1, insn->tex.target.isArray()); - emitField(61, 2, insn->tex.target.isCube() ? 3 : - insn->tex.target.getDim() - 1); - emitTEXs (32); - emitGPR (24, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitTXD() -{ - const TexInstruction *insn = this->insn->asTex(); - - if (insn->tex.rIndirectSrc < 0) { - emitInsn (0xb6c); - emitField(54, 5, prog->driver->io.auxCBSlot); - emitField(40, 14, insn->tex.r); - } else { - emitInsn (0x36d); - emitField(59, 1, 1); // .B - } - emitField(90, 1, insn->tex.liveOnly); - emitPRED (81); - emitField(76, 1, insn->tex.useOffsets == 1); - emitField(72, 4, insn->tex.mask); - emitGPR (64, insn->def(1)); - emitField(63, 1, insn->tex.target.isArray()); - emitField(61, 2, insn->tex.target.isCube() ? 3 : - insn->tex.target.getDim() - 1); - emitTEXs (32); - emitGPR (24, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitTXQ() -{ - const TexInstruction *insn = this->insn->asTex(); - int type = 0; - - switch (insn->tex.query) { - case TXQ_DIMS : type = 0x00; break; - case TXQ_TYPE : type = 0x01; break; - case TXQ_SAMPLE_POSITION: type = 0x02; break; - default: - assert(!"invalid txq query"); - break; - } - - if (insn->tex.rIndirectSrc < 0) { - emitInsn (0xb6f); - emitField(54, 5, prog->driver->io.auxCBSlot); - emitField(40, 14, insn->tex.r); - } else { - emitInsn (0x370); - emitField(59, 1, 1); // .B - } - emitField(90, 1, insn->tex.liveOnly); - emitField(72, 4, insn->tex.mask); - emitGPR (64, insn->def(1)); - emitField(62, 2, type); - emitGPR (24, insn->src(0)); - emitGPR (16, insn->def(0)); -} - -/******************************************************************************* - * surface - ******************************************************************************/ - -void -CodeEmitterGV100::emitSUHandle(const int s) -{ - const TexInstruction *insn = this->insn->asTex(); - - assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP); - - if (insn->src(s).getFile() == FILE_GPR) { - emitGPR(64, insn->src(s)); - } else { - assert(0); - //XXX: not done - ImmediateValue *imm = insn->getSrc(s)->asImm(); - assert(imm); - emitField(0x33, 1, 1); - emitField(0x24, 13, imm->reg.data.u32); - } -} - -void -CodeEmitterGV100::emitSUTarget() -{ - const TexInstruction *insn = this->insn->asTex(); - int target = 0; - - assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP); - - if (insn->tex.target == TEX_TARGET_BUFFER) { - target = 1; - } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) { - target = 2; - } else if (insn->tex.target == TEX_TARGET_2D || - insn->tex.target == TEX_TARGET_RECT) { - target = 3; - } else if (insn->tex.target == TEX_TARGET_2D_ARRAY || - insn->tex.target == TEX_TARGET_CUBE || - insn->tex.target == TEX_TARGET_CUBE_ARRAY) { - target = 4; - } else if (insn->tex.target == TEX_TARGET_3D) { - target = 5; - } else { - assert(insn->tex.target == TEX_TARGET_1D); - } - emitField(61, 3, target); -} - -void -CodeEmitterGV100::emitSUATOM() -{ - const TexInstruction *insn = this->insn->asTex(); - uint8_t type = 0, subOp; - - if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) - emitInsn(0x396); // SUATOM.D.CAS - else - emitInsn(0x394); // SUATOM.D - - emitSUTarget(); - - // destination type - switch (insn->dType) { - case TYPE_S32: type = 1; break; - case TYPE_U64: type = 2; break; - case TYPE_F32: type = 3; break; - case TYPE_S64: type = 5; break; - default: - assert(insn->dType == TYPE_U32); - break; - } - - // atomic operation - if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { - subOp = 0; - } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) { - subOp = 8; - } else { - subOp = insn->subOp; - } - - emitField(87, 4, subOp); - emitPRED (81); - emitField(79, 2, 1); - emitField(73, 3, type); - emitField(72, 1, 0); // .BA - emitGPR (32, insn->src(1)); - emitGPR (24, insn->src(0)); - emitGPR (16, insn->def(0)); - - emitSUHandle(2); -} - -void -CodeEmitterGV100::emitSULD() -{ - const TexInstruction *insn = this->insn->asTex(); - int type = 0; - - if (insn->op == OP_SULDB) { - emitInsn(0x99a); - emitSUTarget(); - - switch (insn->dType) { - case TYPE_U8: type = 0; break; - case TYPE_S8: type = 1; break; - case TYPE_U16: type = 2; break; - case TYPE_S16: type = 3; break; - case TYPE_U32: type = 4; break; - case TYPE_U64: type = 5; break; - case TYPE_B128: type = 6; break; - default: - assert(0); - break; - } - emitField(73, 3, type); - } else { - emitInsn(0x998); - emitSUTarget(); - emitField(72, 4, 0xf); // rgba - } - - emitPRED (81); - emitLDSTc(77, 79); - - emitGPR (16, insn->def(0)); - emitGPR (24, insn->src(0)); - - emitSUHandle(1); -} - -void -CodeEmitterGV100::emitSUST() -{ - const TexInstruction *insn = this->insn->asTex(); - - emitInsn(0x99c); // SUST.P -#if 0 - if (insn->op == OP_SUSTB) - emitField(0x34, 1, 1); -#endif - emitSUTarget(); - - emitLDSTc(77, 79); - emitField(72, 4, 0xf); // rgba - emitGPR(32, insn->src(1)); - emitGPR(24, insn->src(0)); - emitSUHandle(2); -} - -/******************************************************************************* - * misc - ******************************************************************************/ - -void -CodeEmitterGV100::emitAL2P() -{ - emitInsn (0x920); - emitO (79); - emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1); - emitField(40, 11, insn->src(0).get()->reg.data.offset); - emitGPR (24, insn->src(0).getIndirect(0)); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitBAR() -{ - uint8_t subop, redop = 0x00; - - // 80 - // 01: DEFER_BLOCKING - // 78:77 - // 00: SYNC - // 01: ARV - // 02: RED - // 03: SCAN - // 75:74 - // 00: RED.POPC - // 01: RED.AND - // 02: RED.OR - - switch (insn->subOp) { - case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; redop = 0x00; break; - case NV50_IR_SUBOP_BAR_RED_AND : subop = 0x02; redop = 0x01; break; - case NV50_IR_SUBOP_BAR_RED_OR : subop = 0x02; redop = 0x02; break; - case NV50_IR_SUBOP_BAR_ARRIVE : subop = 0x01; break; - default: - subop = 0x00; - assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC); - break; - } - - if (insn->src(0).getFile() == FILE_GPR) { - emitInsn ((1 << 9) | 0x11d); - emitGPR (32, insn->src(0)); //XXX: nvdisasm shows src0==src1 - } else { - ImmediateValue *imm = insn->getSrc(0)->asImm(); - assert(imm); - if (insn->src(1).getFile() == FILE_GPR) { - emitInsn ((4 << 9) | 0x11d); - emitGPR (32, insn->src(1)); - } else { - emitInsn ((5 << 9) | 0x11d); - } - emitField(54, 4, imm->reg.data.u32); - } - - emitField(77, 2, subop); - emitField(74, 2, redop); - - if (insn->srcExists(2) && (insn->predSrc != 2)) { - emitField(90, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT)); - emitPRED (87, insn->src(2)); - } else { - emitField(87, 3, 7); - } -} - -void -CodeEmitterGV100::emitCCTL() -{ - if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) - emitInsn(0x98f); - else - emitInsn(0x990); - emitField(87, 4, insn->subOp); - emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); - emitADDR (24, 32, 32, 0, insn->src(0)); -} - -void -CodeEmitterGV100::emitMEMBAR() -{ - emitInsn (0x992); - switch (NV50_IR_SUBOP_MEMBAR_SCOPE(insn->subOp)) { - case NV50_IR_SUBOP_MEMBAR_CTA: emitField(76, 3, 0); break; - case NV50_IR_SUBOP_MEMBAR_GL : emitField(76, 3, 2); break; - case NV50_IR_SUBOP_MEMBAR_SYS: emitField(76, 3, 3); break; - default: - assert(!"invalid scope"); - break; - } -} - -void -CodeEmitterGV100::emitPIXLD() -{ - emitInsn (0x925); - switch (insn->subOp) { - case NV50_IR_SUBOP_PIXLD_COVMASK : emitField(78, 3, 1); break; // .COVMASK - case NV50_IR_SUBOP_PIXLD_SAMPLEID: emitField(78, 3, 3); break; // .MY_INDEX - default: - assert(0); - break; - } - emitPRED (71); - emitGPR (16, insn->def(0)); -} - -void -CodeEmitterGV100::emitPLOP3_LUT() -{ - uint8_t op[2] = {}; - - switch (insn->op) { - case OP_AND: op[0] = 0xf0 & 0xcc; break; - case OP_OR : op[0] = 0xf0 | 0xcc; break; - case OP_XOR: op[0] = 0xf0 ^ 0xcc; break; - default: - assert(!"invalid PLOP3"); - break; - } - - emitInsn(0x81c); - emitNOT (90, insn->src(0)); - emitPRED(87, insn->src(0)); - emitPRED(84); // def(1) - emitPRED(81, insn->def(0)); - emitNOT (80, insn->src(1)); - emitPRED(77, insn->src(1)); - emitField(72, 5, op[0] >> 3); - emitNOT (71); // src(2) - emitPRED(68); // src(2) - emitField(64, 3, op[0] & 7); - emitField(16, 8, op[1]); -} - -void -CodeEmitterGV100::emitVOTE() -{ - const ImmediateValue *imm; - uint32_t u32; - - int r = -1, p = -1; - for (int i = 0; insn->defExists(i); i++) { - if (insn->def(i).getFile() == FILE_GPR) - r = i; - else if (insn->def(i).getFile() == FILE_PREDICATE) - p = i; - } - - emitInsn (0x806); - emitField(72, 2, insn->subOp); - if (r >= 0) - emitGPR (16, insn->def(r)); - else - emitGPR (16); - if (p >= 0) - emitPRED (81, insn->def(p)); - else - emitPRED (81); - - switch (insn->src(0).getFile()) { - case FILE_PREDICATE: - emitField(90, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT)); - emitPRED (87, insn->src(0)); - break; - case FILE_IMMEDIATE: - imm = insn->getSrc(0)->asImm(); - assert(imm); - u32 = imm->reg.data.u32; - assert(u32 == 0 || u32 == 1); - emitField(90, 1, u32 == 0); - emitPRED (87); - break; - default: - assert(!"Unhandled src"); - break; - } -} - -bool -CodeEmitterGV100::emitInstruction(Instruction *i) -{ - insn = i; - - switch (insn->op) { - case OP_ABS: - assert(!isFloatType(insn->dType)); - emitIABS(); - break; - case OP_ADD: - if (isFloatType(insn->dType)) { - if (insn->dType == TYPE_F32) - emitFADD(); - else - emitDADD(); - } else { - emitIADD3(); - } - break; - case OP_AFETCH: - emitAL2P(); - break; - case OP_AND: - case OP_OR: - case OP_XOR: - if (insn->def(0).getFile() == FILE_PREDICATE) { - emitPLOP3_LUT(); - } else { - assert(!"invalid logop"); - emitNOP(); - } - break; - case OP_ATOM: - if (insn->src(0).getFile() == FILE_MEMORY_SHARED) - emitATOMS(); - else - if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS) - emitRED(); - else - emitATOM(); - break; - case OP_BAR: - emitBAR(); - break; - case OP_BFIND: - emitFLO(); - break; - case OP_BMSK: - emitBMSK(); - break; - case OP_BREV: - emitBREV(); - break; - case OP_BRA: - case OP_JOIN: //XXX - emitBRA(); - break; - case OP_CCTL: - emitCCTL(); - break; - case OP_CEIL: - case OP_CVT: - case OP_FLOOR: - case OP_TRUNC: - if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE || - insn->src(0).getFile() == FILE_PREDICATE)) { - emitMOV(); - } else if (isFloatType(insn->dType)) { - if (isFloatType(insn->sType)) { - if (insn->sType == insn->dType) - emitFRND(); - else - emitF2F(); - } else { - emitI2F(); - } - } else { - if (isFloatType(insn->sType)) { - emitF2I(); - } else { - assert(!"I2I"); - emitNOP(); - } - } - break; - case OP_COS: - case OP_EX2: - case OP_LG2: - case OP_RCP: - case OP_RSQ: - case OP_SIN: - case OP_SQRT: - emitMUFU(); - break; - case OP_DISCARD: - emitKILL(); - break; - case OP_EMIT: - case OP_FINAL: - case OP_RESTART: - emitOUT(); - break; - case OP_EXIT: - emitEXIT(); - break; - case OP_EXPORT: - emitAST(); - break; - case OP_FMA: - case OP_MAD: - if (isFloatType(insn->dType)) { - if (insn->dType == TYPE_F32) - emitFFMA(); - else - emitDFMA(); - } else { - if (typeSizeof(insn->dType) != 8) - emitIMAD(); - else - emitIMAD_WIDE(); - } - break; - case OP_JOINAT: //XXX - emitNOP(); - break; - case OP_LINTERP: - emitIPA(); - break; - case OP_LOAD: - switch (insn->src(0).getFile()) { - case FILE_MEMORY_CONST : emitLDC(); break; - case FILE_MEMORY_LOCAL : emitLDL(); break; - case FILE_MEMORY_SHARED: emitLDS(); break; - case FILE_MEMORY_GLOBAL: emitLD(); break; - default: - assert(!"invalid load"); - emitNOP(); - break; - } - break; - case OP_LOP3_LUT: - emitLOP3_LUT(); - break; - case OP_MAX: - case OP_MIN: - if (isFloatType(insn->dType)) { - if (insn->dType == TYPE_F32) { - emitFMNMX(); - } else { - assert(!"invalid FMNMX"); - emitNOP(); - } - } else { - assert(!"invalid MNMX"); - emitNOP(); - } - break; - case OP_MEMBAR: - emitMEMBAR(); - break; - case OP_MOV: - emitMOV(); - break; - case OP_MUL: - if (isFloatType(insn->dType)) { - if (insn->dType == TYPE_F32) - emitFMUL(); - else - emitDMUL(); - } else { - assert(!"invalid IMUL"); - emitNOP(); - } - break; - case OP_PERMT: - emitPRMT(); - break; - case OP_PFETCH: - emitISBERD(); - break; - case OP_PIXLD: - emitPIXLD(); - break; - case OP_POPCNT: - emitPOPC(); - break; - case OP_QUADOP: - emitFSWZADD(); - break; - case OP_RDSV: - if (targ->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv)) - emitCS2R(); - else - emitS2R(); - break; - case OP_SELP: - emitSEL(); - break; - case OP_SET: - case OP_SET_AND: - case OP_SET_OR: - case OP_SET_XOR: - if (insn->def(0).getFile() != FILE_PREDICATE) { - if (isFloatType(insn->dType)) { - if (insn->dType == TYPE_F32) { - emitFSET_BF(); - } else { - assert(!"invalid FSET"); - emitNOP(); - } - } else { - assert(!"invalid SET"); - emitNOP(); - } - } else { - if (isFloatType(insn->sType)) - if (insn->sType == TYPE_F64) - emitDSETP(); - else - emitFSETP(); - else - emitISETP(); - } - break; - case OP_SGXT: - emitSGXT(); - break; - case OP_SHF: - emitSHF(); - break; - case OP_SHFL: - emitSHFL(); - break; - case OP_SHLADD: - emitLEA(); - break; - case OP_STORE: - switch (insn->src(0).getFile()) { - case FILE_MEMORY_LOCAL : emitSTL(); break; - case FILE_MEMORY_SHARED: emitSTS(); break; - case FILE_MEMORY_GLOBAL: emitST(); break; - default: - assert(!"invalid store"); - emitNOP(); - break; - } - break; - case OP_SULDB: - case OP_SULDP: - emitSULD(); - break; - case OP_SUREDB: - case OP_SUREDP: - emitSUATOM(); - break; - case OP_SUSTB: - case OP_SUSTP: - emitSUST(); - break; - case OP_TEX: - case OP_TXB: - case OP_TXL: - emitTEX(); - break; - case OP_TXD: - emitTXD(); - break; - case OP_TXF: - emitTLD(); - break; - case OP_TXG: - emitTLD4(); - break; - case OP_TXLQ: - emitTMML(); - break; - case OP_TXQ: - emitTXQ(); - break; - case OP_VFETCH: - emitALD(); - break; - case OP_VOTE: - emitVOTE(); - break; - case OP_WARPSYNC: - emitWARPSYNC(); - break; - default: - assert(!"invalid opcode"); - emitNOP(); - break; - } - - code[3] &= 0x000001ff; - code[3] |= insn->sched << 9; - code += 4; - codeSize += 16; - return true; -} - -void -CodeEmitterGV100::prepareEmission(BasicBlock *bb) -{ - Function *func = bb->getFunction(); - Instruction *i; - int j; - - for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j); - - for (; j >= 0; --j) { - BasicBlock *in = func->bbArray[j]; - Instruction *exit = in->getExit(); - - if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) { - in->binSize -= 16; - func->binSize -= 16; - - for (++j; j < func->bbCount; ++j) - func->bbArray[j]->binPos -= 16; - - in->remove(exit); - } - bb->binPos = in->binPos + in->binSize; - if (in->binSize) // no more no-op branches to bb - break; - } - func->bbArray[func->bbCount++] = bb; - - if (!bb->getExit()) - return; - - for (i = bb->getEntry(); i; i = i->next) { - i->encSize = getMinEncodingSize(i); - bb->binSize += i->encSize; - } - - assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 16)); - - func->binSize += bb->binSize; -} - -void -CodeEmitterGV100::prepareEmission(Function *func) -{ - SchedDataCalculatorGM107 sched(targ); - CodeEmitter::prepareEmission(func); - sched.run(func, true, true); -} - -void -CodeEmitterGV100::prepareEmission(Program *prog) -{ - for (ArrayList::Iterator fi = prog->allFuncs.iterator(); - !fi.end(); fi.next()) { - Function *func = reinterpret_cast(fi.get()); - func->binPos = prog->binSize; - prepareEmission(func); - prog->binSize += func->binSize; - } - - this->prog = prog; -} - -CodeEmitterGV100::CodeEmitterGV100(TargetGV100 *target) - : CodeEmitter(target), targ(target) -{ - code = NULL; - codeSize = codeSizeLimit = 0; - relocInfo = NULL; -} -}; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h deleted file mode 100644 index 15ab717..0000000 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h +++ /dev/null @@ -1,403 +0,0 @@ -/* - * Copyright 2020 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef __NV50_IR_EMIT_GV100_H__ -#define __NV50_IR_EMIT_GV100_H__ -#include "codegen/nv50_ir_target_gv100.h" - -namespace nv50_ir { - -class CodeEmitterGV100 : public CodeEmitter { -public: - CodeEmitterGV100(TargetGV100 *target); - - virtual bool emitInstruction(Instruction *); - virtual uint32_t getMinEncodingSize(const Instruction *) const { return 16; } - -private: - const Program *prog; - const TargetGV100 *targ; - const Instruction *insn; - - virtual void prepareEmission(Program *); - virtual void prepareEmission(Function *); - virtual void prepareEmission(BasicBlock *); - - inline void emitInsn(uint32_t op) { - code[0] = op; - code[1] = 0; - code[2] = 0; - code[3] = 0; - if (insn->predSrc >= 0) { - emitField(12, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id); - emitField(15, 1, insn->cc == CC_NOT_P); - } else { - emitField(12, 3, 7); - } - }; - - inline void emitField(int b, int s, uint64_t v) { - if (b >= 0) { - uint64_t m = ~0ULL >> (64 - s); - uint64_t d = v & m; - assert(!(v & ~m) || (v & ~m) == ~m); - if (b < 64 && b + s > 64) { - *(uint64_t *)&code[0] |= d << b; - *(uint64_t *)&code[2] |= d >> (64 - b); - } else { - *(uint64_t *)&code[(b/64*2)] |= d << (b & 0x3f); - } - } - }; - - inline void emitABS(int pos, int src, bool supported) - { - if (insn->src(src).mod.abs()) { - assert(supported); - emitField(pos, 1, 1); - } - } - - inline void emitABS(int pos, int src) - { - emitABS(pos, src, true); - } - - inline void emitNEG(int pos, int src, bool supported) { - if (insn->src(src).mod.neg()) { - assert(supported); - emitField(pos, 1, 1); - } - } - - inline void emitNEG(int pos, int src) { - emitNEG(pos, src, true); - } - - inline void emitNOT(int pos) { - emitField(pos, 1, 0); - }; - - inline void emitNOT(int pos, const ValueRef &ref) { - emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT))); - } - - inline void emitSAT(int pos) { - emitField(pos, 1, insn->saturate); - } - - inline void emitRND(int rmp, RoundMode rnd, int rip) { - int rm = 0, ri = 0; - switch (rnd) { - case ROUND_NI: ri = 1; - case ROUND_N : rm = 0; break; - case ROUND_MI: ri = 1; - case ROUND_M : rm = 1; break; - case ROUND_PI: ri = 1; - case ROUND_P : rm = 2; break; - case ROUND_ZI: ri = 1; - case ROUND_Z : rm = 3; break; - default: - assert(!"invalid round mode"); - break; - } - emitField(rip, 1, ri); - emitField(rmp, 2, rm); - } - - inline void emitRND(int pos) { - emitRND(pos, insn->rnd, -1); - } - - inline void emitFMZ(int pos, int len) { - emitField(pos, len, insn->dnz << 1 | insn->ftz); - } - - inline void emitPDIV(int pos) { - emitField(pos, 3, insn->postFactor + 4); - } - - inline void emitO(int pos) { - emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT); - } - - inline void emitP(int pos) { - emitField(pos, 1, insn->perPatch); - } - - inline void emitCond3(int pos, CondCode code) { - int data = 0; - - switch (code) { - case CC_FL : data = 0x00; break; - case CC_LTU: - case CC_LT : data = 0x01; break; - case CC_EQU: - case CC_EQ : data = 0x02; break; - case CC_LEU: - case CC_LE : data = 0x03; break; - case CC_GTU: - case CC_GT : data = 0x04; break; - case CC_NEU: - case CC_NE : data = 0x05; break; - case CC_GEU: - case CC_GE : data = 0x06; break; - case CC_TR : data = 0x07; break; - default: - assert(!"invalid cond3"); - break; - } - - emitField(pos, 3, data); - } - - inline void emitCond4(int pos, CondCode code) { - int data = 0; - - switch (code) { - case CC_FL: data = 0x00; break; - case CC_LT: data = 0x01; break; - case CC_EQ: data = 0x02; break; - case CC_LE: data = 0x03; break; - case CC_GT: data = 0x04; break; - case CC_NE: data = 0x05; break; - case CC_GE: data = 0x06; break; - // case CC_NUM: data = 0x07; break; - // case CC_NAN: data = 0x08; break; - case CC_LTU: data = 0x09; break; - case CC_EQU: data = 0x0a; break; - case CC_LEU: data = 0x0b; break; - case CC_GTU: data = 0x0c; break; - case CC_NEU: data = 0x0d; break; - case CC_GEU: data = 0x0e; break; - case CC_TR: data = 0x0f; break; - default: - assert(!"invalid cond4"); - break; - } - - emitField(pos, 4, data); - } - - inline void emitSYS(int pos, const Value *val) { - int id = val ? val->reg.data.id : -1; - - switch (id) { - case SV_LANEID : id = 0x00; break; - case SV_VERTEX_COUNT : id = 0x10; break; - case SV_INVOCATION_ID : id = 0x11; break; - case SV_THREAD_KILL : id = 0x13; break; - case SV_INVOCATION_INFO: id = 0x1d; break; - case SV_COMBINED_TID : id = 0x20; break; - case SV_TID : id = 0x21 + val->reg.data.sv.index; break; - case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break; - case SV_LANEMASK_EQ : id = 0x38; break; - case SV_LANEMASK_LT : id = 0x39; break; - case SV_LANEMASK_LE : id = 0x3a; break; - case SV_LANEMASK_GT : id = 0x3b; break; - case SV_LANEMASK_GE : id = 0x3c; break; - case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break; - default: - assert(!"invalid system value"); - id = 0; - break; - } - - emitField(pos, 8, id); - } - - inline void emitSYS(int pos, const ValueRef &ref) { - emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL); - } - - inline void emitGPR(int pos, const Value *val, int off) { - emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ? - val->reg.data.id + off: 255); - } - - inline void emitGPR(int pos, const Value *v) { - emitGPR(pos, v, 0); - } - - inline void emitGPR(int pos) { - emitGPR(pos, (const Value *)NULL); - } - - inline void emitGPR(int pos, const ValueRef &ref) { - emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL); - } - - inline void emitGPR(int pos, const ValueRef *ref) { - emitGPR(pos, ref ? ref->rep() : (const Value *)NULL); - } - - inline void emitGPR(int pos, const ValueDef &def) { - emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL); - } - - inline void emitGPR(int pos, const ValueDef &def, int off) { - emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL, off); - } - - inline void emitPRED(int pos, const Value *val) { - emitField(pos, 3, val ? val->reg.data.id : 7); - }; - - inline void emitPRED(int pos) { - emitPRED(pos, (const Value *)NULL); - } - - inline void emitPRED(int pos, const ValueRef &ref) { - emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL); - } - - inline void emitPRED(int pos, const ValueDef &def) { - emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL); - } - - inline void emitCBUF(int buf, int gpr, int off, int len, int align, - const ValueRef &ref) { - const Value *v = ref.get(); - const Symbol *s = v->asSym(); - - assert(!(s->reg.data.offset & ((1 << align) - 1))); - - emitField(buf, 5, v->reg.fileIndex); - if (gpr >= 0) - emitGPR(gpr, ref.getIndirect(0)); - emitField(off, 16, s->reg.data.offset); - } - - inline void emitIMMD(int pos, int len, const ValueRef &ref) { - const ImmediateValue *imm = ref.get()->asImm(); - uint32_t val = imm->reg.data.u32; - - if (insn->sType == TYPE_F64) { - assert(!(imm->reg.data.u64 & 0x00000000ffffffffULL)); - val = imm->reg.data.u64 >> 32; - } - - emitField(pos, len, val); - } - - inline void emitADDR(int gpr, int off, int len, int shr, - const ValueRef &ref) { - const Value *v = ref.get(); - assert(!(v->reg.data.offset & ((1 << shr) - 1))); - if (gpr >= 0) - emitGPR(gpr, ref.getIndirect(0)); - emitField(off, len, v->reg.data.offset >> shr); - } - - inline void emitFormA(uint16_t op, uint8_t forms, int src0, int src1, int src2); - inline void emitFormA_RRR(uint16_t op, int src1, int src2); - inline void emitFormA_RRI(uint16_t op, int src1, int src2); - inline void emitFormA_RRC(uint16_t op, int src1, int src2); - inline void emitFormA_I32(int src); - - void emitBRA(); - void emitEXIT(); - void emitKILL(); - void emitNOP(); - void emitWARPSYNC(); - - void emitCS2R(); - void emitF2F(); - void emitF2I(); - void emitFRND(); - void emitI2F(); - void emitMOV(); - void emitPRMT(); - void emitS2R(); - void emitSEL(); - void emitSHFL(); - - void emitFADD(); - void emitFFMA(); - void emitFMNMX(); - void emitFMUL(); - void emitFSET_BF(); - void emitFSETP(); - void emitFSWZADD(); - void emitMUFU(); - - void emitDADD(); - void emitDFMA(); - void emitDMUL(); - void emitDSETP(); - - void emitBMSK(); - void emitBREV(); - void emitFLO(); - void emitIABS(); - void emitIADD3(); - void emitIMAD(); - void emitIMAD_WIDE(); - void emitISETP(); - void emitLEA(); - void emitLOP3_LUT(); - void emitPOPC(); - void emitSGXT(); - void emitSHF(); - - void emitALD(); - void emitAST(); - void emitATOM(); - void emitATOMS(); - void emitIPA(); - void emitISBERD(); - void emitLDSTc(int, int); - void emitLDSTs(int, DataType); - void emitLD(); - void emitLDC(); - void emitLDL(); - void emitLDS(); - void emitOUT(); - void emitRED(); - void emitST(); - void emitSTL(); - void emitSTS(); - - void emitTEXs(int); - void emitTEX(); - void emitTLD(); - void emitTLD4(); - void emitTMML(); - void emitTXD(); - void emitTXQ(); - - void emitSUHandle(const int); - void emitSUTarget(); - void emitSUATOM(); - void emitSULD(); - void emitSUST(); - - void emitAL2P(); - void emitBAR(); - void emitCCTL(); - void emitMEMBAR(); - void emitPIXLD(); - void emitPLOP3_LUT(); - void emitVOTE(); -}; - -}; -#endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index eee9aa6..bd78b76 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -170,7 +170,6 @@ private: NirArrayLMemOffsets regToLmemOffset; NirBlockMap blocks; unsigned int curLoopDepth; - unsigned int curIfDepth; BasicBlock *exit; Value *zero; @@ -189,7 +188,6 @@ Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) : ConverterCommon(prog, info), nir(nir), curLoopDepth(0), - curIfDepth(0), clipVertexOutput(-1) { zero = mkImm((uint32_t)0); @@ -573,10 +571,6 @@ Converter::getSubOp(nir_op op) case nir_op_imul_high: case nir_op_umul_high: return NV50_IR_SUBOP_MUL_HIGH; - case nir_op_ishl: - case nir_op_ishr: - case nir_op_ushr: - return NV50_IR_SUBOP_SHIFT_WRAP; default: return 0; } @@ -915,7 +909,7 @@ calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info, uint16_t slots; switch (stage) { case Program::TYPE_GEOMETRY: - slots = type->count_attribute_slots(false); + slots = type->uniform_locations(); if (input) slots /= info.gs.vertices_in; break; @@ -923,9 +917,9 @@ calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info, case Program::TYPE_TESSELLATION_EVAL: // remove first dimension if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL)) - slots = type->count_attribute_slots(false); + slots = type->uniform_locations(); else - slots = type->fields.array->count_attribute_slots(false); + slots = type->fields.array->uniform_locations(); break; default: slots = type->count_attribute_slots(false); @@ -935,24 +929,6 @@ calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info, return slots; } -static uint8_t -getMaskForType(const glsl_type *type, uint8_t slot) { - uint16_t comp = type->without_array()->components(); - comp = comp ? comp : 4; - - if (glsl_base_type_is_64bit(type->without_array()->base_type)) { - comp *= 2; - if (comp > 4) { - if (slot % 2) - comp -= 4; - else - comp = 4; - } - } - - return (1 << comp) - 1; -} - bool Converter::assignSlots() { unsigned name; unsigned index; @@ -1005,8 +981,16 @@ bool Converter::assignSlots() { const glsl_type *type = var->type; int slot = var->data.location; uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var); + uint32_t comp = type->is_array() ? type->without_array()->component_slots() + : type->component_slots(); + uint32_t frac = var->data.location_frac; uint32_t vary = var->data.driver_location; + if (glsl_base_type_is_64bit(type->without_array()->base_type)) { + if (comp > 2) + slots *= 2; + } + assert(vary + slots <= PIPE_MAX_SHADER_INPUTS); switch(prog->getType()) { @@ -1030,8 +1014,6 @@ bool Converter::assignSlots() { info->numPatchConstants = MAX2(info->numPatchConstants, index + slots); break; case Program::TYPE_VERTEX: - if (slot >= VERT_ATTRIB_GENERIC0) - slot = VERT_ATTRIB_GENERIC0 + vary; vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index); switch (name) { case TGSI_SEMANTIC_EDGEFLAG: @@ -1047,12 +1029,17 @@ bool Converter::assignSlots() { } for (uint16_t i = 0u; i < slots; ++i, ++vary) { - nv50_ir_varying *v = &info->in[vary]; - - v->patch = var->data.patch; - v->sn = name; - v->si = index + i; - v->mask |= getMaskForType(type, i) << var->data.location_frac; + info->in[vary].id = vary; + info->in[vary].patch = var->data.patch; + info->in[vary].sn = name; + info->in[vary].si = index + i; + if (glsl_base_type_is_64bit(type->without_array()->base_type)) + if (i & 0x1) + info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4); + else + info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf); + else + info->in[vary].mask |= ((1 << comp) - 1) << frac; } info->numInputs = std::max(info->numInputs, vary); } @@ -1061,8 +1048,16 @@ bool Converter::assignSlots() { const glsl_type *type = var->type; int slot = var->data.location; uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var); + uint32_t comp = type->is_array() ? type->without_array()->component_slots() + : type->component_slots(); + uint32_t frac = var->data.location_frac; uint32_t vary = var->data.driver_location; + if (glsl_base_type_is_64bit(type->without_array()->base_type)) { + if (comp > 2) + slots *= 2; + } + assert(vary < PIPE_MAX_SHADER_OUTPUTS); switch(prog->getType()) { @@ -1072,11 +1067,7 @@ bool Converter::assignSlots() { case TGSI_SEMANTIC_COLOR: if (!var->data.fb_fetch_output) info->prop.fp.numColourResults++; - - if (var->data.location == FRAG_RESULT_COLOR && - nir->info.outputs_written & BITFIELD64_BIT(var->data.location)) - info->prop.fp.separateFragData = true; - + info->prop.fp.separateFragData = true; // sometimes we get FRAG_RESULT_DATAX with data.index 0 // sometimes we get FRAG_RESULT_DATA0 with data.index X index = index == 0 ? var->data.index : index; @@ -1127,14 +1118,20 @@ bool Converter::assignSlots() { } for (uint16_t i = 0u; i < slots; ++i, ++vary) { - nv50_ir_varying *v = &info->out[vary]; - v->patch = var->data.patch; - v->sn = name; - v->si = index + i; - v->mask |= getMaskForType(type, i) << var->data.location_frac; + info->out[vary].id = vary; + info->out[vary].patch = var->data.patch; + info->out[vary].sn = name; + info->out[vary].si = index + i; + if (glsl_base_type_is_64bit(type->without_array()->base_type)) + if (i & 0x1) + info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4); + else + info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf); + else + info->out[vary].mask |= ((1 << comp) - 1) << frac; if (nir->info.outputs_read & 1ull << slot) - v->oread = 1; + info->out[vary].oread = 1; } info->numOutputs = std::max(info->numOutputs, vary); } @@ -1278,7 +1275,6 @@ Converter::parseNIR() info->bin.tlsSpace = 0; info->io.clipDistances = nir->info.clip_distance_array_size; info->io.cullDistances = nir->info.cull_distance_array_size; - info->io.layer_viewport_relative = nir->info.layer_viewport_relative; switch(prog->getType()) { case Program::TYPE_COMPUTE: @@ -1295,7 +1291,7 @@ Converter::parseNIR() info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage; info->prop.fp.readsSampleLocations = (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS); - info->prop.fp.usesDiscard = nir->info.fs.uses_discard || nir->info.fs.uses_demote; + info->prop.fp.usesDiscard = nir->info.fs.uses_discard; info->prop.fp.usesSampleMaskIn = !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN); break; @@ -1430,69 +1426,64 @@ Converter::visit(nir_block *block) bool Converter::visit(nir_if *nif) { - curIfDepth++; - DataType sType = getSType(nif->condition, false, false); Value *src = getSrc(&nif->condition, 0); nir_block *lastThen = nir_if_last_then_block(nif); nir_block *lastElse = nir_if_last_else_block(nif); - BasicBlock *headBB = bb; + assert(!lastThen->successors[1]); + assert(!lastElse->successors[1]); + BasicBlock *ifBB = convert(nir_if_first_then_block(nif)); BasicBlock *elseBB = convert(nir_if_first_else_block(nif)); bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE); bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE); - bool insertJoins = lastThen->successors[0] == lastElse->successors[0]; + // we only insert joinats, if both nodes end up at the end of the if again. + // the reason for this to not happens are breaks/continues/ret/... which + // have their own handling + if (lastThen->successors[0] == lastElse->successors[0]) + bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]), + CC_ALWAYS, NULL); + mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType); foreach_list_typed(nir_cf_node, node, node, &nif->then_list) { if (!visit(node)) return false; } - setPosition(convert(lastThen), true); - if (!bb->isTerminated()) { + if (!bb->getExit() || + !bb->getExit()->asFlow() || + bb->getExit()->asFlow()->op == OP_JOIN) { BasicBlock *tailBB = convert(lastThen->successors[0]); mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL); bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD); - } else { - insertJoins = insertJoins && bb->getExit()->op == OP_BRA; } foreach_list_typed(nir_cf_node, node, node, &nif->else_list) { if (!visit(node)) return false; } - setPosition(convert(lastElse), true); - if (!bb->isTerminated()) { + if (!bb->getExit() || + !bb->getExit()->asFlow() || + bb->getExit()->asFlow()->op == OP_JOIN) { BasicBlock *tailBB = convert(lastElse->successors[0]); mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL); bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD); - } else { - insertJoins = insertJoins && bb->getExit()->op == OP_BRA; } - /* only insert joins for the most outer if */ - if (--curIfDepth) - insertJoins = false; - - /* we made sure that all threads would converge at the same block */ - if (insertJoins) { - BasicBlock *conv = convert(lastThen->successors[0]); - setPosition(headBB->getExit(), false); - headBB->joinAt = mkFlow(OP_JOINAT, conv, CC_ALWAYS, NULL); - setPosition(conv, false); + if (lastThen->successors[0] == lastElse->successors[0]) { + setPosition(convert(lastThen->successors[0]), true); mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; } return true; } -// TODO: add convergency bool Converter::visit(nir_loop *loop) { @@ -1500,8 +1491,8 @@ Converter::visit(nir_loop *loop) func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth); BasicBlock *loopBB = convert(nir_loop_first_block(loop)); - BasicBlock *tailBB = convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node))); - + BasicBlock *tailBB = + convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node))); bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE); mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL); @@ -1512,15 +1503,19 @@ Converter::visit(nir_loop *loop) if (!visit(node)) return false; } - - if (!bb->isTerminated()) { - mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); - bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); + Instruction *insn = bb->getExit(); + if (bb->cfg.incidentCount() != 0) { + if (!insn || !insn->asFlow()) { + mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); + bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); + } else if (insn && insn->op == OP_BRA && !insn->getPredicate() && + tailBB->cfg.incidentCount() == 0) { + // RA doesn't like having blocks around with no incident edge, + // so we create a fake one to make it happy + bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE); + } } - if (tailBB->cfg.incidentCount() == 0) - loopBB->cfg.attach(&tailBB->cfg, Graph::Edge::TREE); - curLoopDepth -= 1; return true; @@ -1565,7 +1560,6 @@ Converter::convert(nir_intrinsic_op intr) return SV_DRAWID; case nir_intrinsic_load_front_face: return SV_FACE; - case nir_intrinsic_is_helper_invocation: case nir_intrinsic_load_helper_invocation: return SV_THREAD_KILL; case nir_intrinsic_load_instance_id: @@ -1623,7 +1617,6 @@ Converter::visit(nir_intrinsic_instr *insn) { nir_intrinsic_op op = insn->intrinsic; const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op]; - unsigned dest_components = nir_intrinsic_dest_components(insn); switch (op) { case nir_intrinsic_load_uniform: { @@ -1631,7 +1624,7 @@ Converter::visit(nir_intrinsic_instr *insn) const DataType dType = getDType(insn); Value *indirect; uint32_t coffset = getIndirect(insn, 0, 0, indirect); - for (uint8_t i = 0; i < dest_components; ++i) { + for (uint8_t i = 0; i < insn->num_components; ++i) { loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect); } break; @@ -1642,7 +1635,7 @@ Converter::visit(nir_intrinsic_instr *insn) DataType dType = getSType(insn->src[0], false, false); uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect); - for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) { + for (uint8_t i = 0u; i < insn->num_components; ++i) { if (!((1u << i) & nir_intrinsic_write_mask(insn))) continue; @@ -1659,7 +1652,6 @@ Converter::visit(nir_intrinsic_instr *insn) break; } case Program::TYPE_GEOMETRY: - case Program::TYPE_TESSELLATION_EVAL: case Program::TYPE_VERTEX: { if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) { mkMov(clipVtx[i], src); @@ -1696,7 +1688,7 @@ Converter::visit(nir_intrinsic_instr *insn) srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0))); srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0))); - for (uint8_t i = 0u; i < dest_components; ++i) { + for (uint8_t i = 0u; i < insn->num_components; ++i) { defs.push_back(newDefs[i]); mask |= 1 << i; } @@ -1723,25 +1715,15 @@ Converter::visit(nir_intrinsic_instr *insn) // see load_barycentric_* handling if (prog->getType() == Program::TYPE_FRAGMENT) { + mode = translateInterpMode(&vary, nvirOp); if (op == nir_intrinsic_load_interpolated_input) { ImmediateValue immMode; if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode)) - mode = immMode.reg.data.u32; - } - if (mode == NV50_IR_INTERP_DEFAULT) - mode |= translateInterpMode(&vary, nvirOp); - else { - if (vary.linear) { - nvirOp = OP_LINTERP; - mode |= NV50_IR_INTERP_LINEAR; - } else { - nvirOp = OP_PINTERP; - mode |= NV50_IR_INTERP_PERSPECTIVE; - } + mode |= immMode.reg.data.u32; } } - for (uint8_t i = 0u; i < dest_components; ++i) { + for (uint8_t i = 0u; i < insn->num_components; ++i) { uint32_t address = getSlotAddress(insn, idx, i); Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address); if (prog->getType() == Program::TYPE_FRAGMENT) { @@ -1832,11 +1814,9 @@ Converter::visit(nir_intrinsic_instr *insn) loadImm(newDefs[1], mode); break; } - case nir_intrinsic_demote: case nir_intrinsic_discard: mkOp(OP_DISCARD, TYPE_NONE, NULL); break; - case nir_intrinsic_demote_if: case nir_intrinsic_discard_if: { Value *pred = getSSA(1, FILE_PREDICATE); if (insn->num_components > 1) { @@ -1852,7 +1832,6 @@ Converter::visit(nir_intrinsic_instr *insn) case nir_intrinsic_load_base_instance: case nir_intrinsic_load_draw_id: case nir_intrinsic_load_front_face: - case nir_intrinsic_is_helper_invocation: case nir_intrinsic_load_helper_invocation: case nir_intrinsic_load_instance_id: case nir_intrinsic_load_invocation_id: @@ -1879,7 +1858,7 @@ Converter::visit(nir_intrinsic_instr *insn) SVSemantic sv = convert(op); LValues &newDefs = convert(&insn->dest); - for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) { + for (uint8_t i = 0u; i < insn->num_components; ++i) { Value *def; if (typeSizeof(dType) == 8) def = getSSA(); @@ -1931,12 +1910,12 @@ Converter::visit(nir_intrinsic_instr *insn) if (op == nir_intrinsic_read_first_invocation) { mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY; - mkOp1(OP_BREV, TYPE_U32, tmp, tmp); + mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV; mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT; } else tmp = getSrc(&insn->src[1], 0); - for (uint8_t i = 0; i < dest_components; ++i) { + for (uint8_t i = 0; i < insn->num_components; ++i) { mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f)) ->subOp = NV50_IR_SUBOP_SHFL_IDX; } @@ -1952,7 +1931,7 @@ Converter::visit(nir_intrinsic_instr *insn) Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS), mkImm(baseVertex), indirectVertex); - for (uint8_t i = 0u; i < dest_components; ++i) { + for (uint8_t i = 0u; i < insn->num_components; ++i) { uint32_t address = getSlotAddress(insn, idx, i); loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0, indirectOffset, vtxBase, info->in[idx].patch); @@ -1975,24 +1954,19 @@ Converter::visit(nir_intrinsic_instr *insn) vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase); - for (uint8_t i = 0u; i < dest_components; ++i) { + for (uint8_t i = 0u; i < insn->num_components; ++i) { uint32_t address = getSlotAddress(insn, idx, i); loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0, indirectOffset, vtxBase, info->in[idx].patch); } break; } - case nir_intrinsic_emit_vertex: { + case nir_intrinsic_emit_vertex: if (info->io.genUserClip > 0) handleUserClipPlanes(); - uint32_t idx = nir_intrinsic_stream_id(insn); - mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; - break; - } + // fallthrough case nir_intrinsic_end_primitive: { uint32_t idx = nir_intrinsic_stream_id(insn); - if (idx) - break; mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; break; } @@ -2004,7 +1978,7 @@ Converter::visit(nir_intrinsic_instr *insn) uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1; uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); - for (uint8_t i = 0u; i < dest_components; ++i) { + for (uint8_t i = 0u; i < insn->num_components; ++i) { loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i, indirectOffset, indirectIndex); } @@ -2027,7 +2001,7 @@ Converter::visit(nir_intrinsic_instr *insn) uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer); uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset); - for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) { + for (uint8_t i = 0u; i < insn->num_components; ++i) { if (!((1u << i) & nir_intrinsic_write_mask(insn))) continue; Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType, @@ -2046,7 +2020,7 @@ Converter::visit(nir_intrinsic_instr *insn) uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer); uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); - for (uint8_t i = 0u; i < dest_components; ++i) + for (uint8_t i = 0u; i < insn->num_components; ++i) loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i, indirectOffset, indirectBuffer); @@ -2340,7 +2314,7 @@ Converter::visit(nir_intrinsic_instr *insn) Value *indirectOffset; uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); - for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) { + for (uint8_t i = 0u; i < insn->num_components; ++i) { if (!((1u << i) & nir_intrinsic_write_mask(insn))) continue; Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType)); @@ -2354,7 +2328,7 @@ Converter::visit(nir_intrinsic_instr *insn) Value *indirectOffset; uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset); - for (uint8_t i = 0u; i < dest_components; ++i) + for (uint8_t i = 0u; i < insn->num_components; ++i) loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset); break; @@ -2393,7 +2367,7 @@ Converter::visit(nir_intrinsic_instr *insn) Value *indirectOffset; uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset); - for (auto i = 0u; i < dest_components; ++i) + for (auto i = 0u; i < insn->num_components; ++i) loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset); info->io.globalAccess |= 0x1; @@ -2402,7 +2376,7 @@ Converter::visit(nir_intrinsic_instr *insn) case nir_intrinsic_store_global: { DataType sType = getSType(insn->src[0], false, false); - for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) { + for (auto i = 0u; i < insn->num_components; ++i) { if (!((1u << i) & nir_intrinsic_write_mask(insn))) continue; if (typeSizeof(sType) == 8) { @@ -2444,6 +2418,7 @@ Converter::visit(nir_jump_instr *insn) case nir_jump_continue: { bool isBreak = insn->type == nir_jump_break; nir_block *block = insn->instr.block; + assert(!block->successors[1]); BasicBlock *target = convert(block->successors[0]); mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL); bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK); @@ -2799,7 +2774,7 @@ Converter::visit(nir_alu_instr *insn) case nir_op_bfm: { DEFAULT_CHECKS; LValues &newDefs = convert(&insn->dest); - mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W; + mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1])); break; } case nir_op_bitfield_insert: { @@ -2819,69 +2794,17 @@ Converter::visit(nir_alu_instr *insn) case nir_op_bitfield_reverse: { DEFAULT_CHECKS; LValues &newDefs = convert(&insn->dest); - mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0])); + mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV; break; } case nir_op_find_lsb: { DEFAULT_CHECKS; LValues &newDefs = convert(&insn->dest); Value *tmp = getSSA(); - mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0])); + mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV; mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT; break; } - case nir_op_extract_u8: { - DEFAULT_CHECKS; - LValues &newDefs = convert(&insn->dest); - Value *prmt = getSSA(); - mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440)); - mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0)); - break; - } - case nir_op_extract_i8: { - DEFAULT_CHECKS; - LValues &newDefs = convert(&insn->dest); - Value *prmt = getSSA(); - mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880)); - mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0)); - break; - } - case nir_op_extract_u16: { - DEFAULT_CHECKS; - LValues &newDefs = convert(&insn->dest); - Value *prmt = getSSA(); - mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410)); - mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0)); - break; - } - case nir_op_extract_i16: { - DEFAULT_CHECKS; - LValues &newDefs = convert(&insn->dest); - Value *prmt = getSSA(); - mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910)); - mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0)); - break; - } - case nir_op_urol: { - DEFAULT_CHECKS; - LValues &newDefs = convert(&insn->dest); - mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), - getSrc(&insn->src[1]), getSrc(&insn->src[0])) - ->subOp = NV50_IR_SUBOP_SHF_L | - NV50_IR_SUBOP_SHF_W | - NV50_IR_SUBOP_SHF_HI; - break; - } - case nir_op_uror: { - DEFAULT_CHECKS; - LValues &newDefs = convert(&insn->dest); - mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), - getSrc(&insn->src[1]), getSrc(&insn->src[0])) - ->subOp = NV50_IR_SUBOP_SHF_R | - NV50_IR_SUBOP_SHF_W | - NV50_IR_SUBOP_SHF_LO; - break; - } // boolean conversions case nir_op_b2f32: { DEFAULT_CHECKS; @@ -3067,11 +2990,14 @@ Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_vari CacheMode Converter::convert(enum gl_access_qualifier access) { - if (access & ACCESS_VOLATILE) + switch (access) { + case ACCESS_VOLATILE: return CACHE_CV; - if (access & ACCESS_COHERENT) + case ACCESS_COHERENT: return CACHE_CG; - return CACHE_CA; + default: + return CACHE_CA; + } } CacheMode @@ -3298,11 +3224,6 @@ Converter::run() NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS_V(nir, nir_lower_phis_to_scalar); - /*TODO: improve this lowering/optimisation loop so that we can use - * nir_opt_idiv_const effectively before this. - */ - NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_precise); - do { progress = false; NIR_PASS(progress, nir, nir_copy_prop); @@ -3364,125 +3285,3 @@ Program::makeFromNIR(struct nv50_ir_prog_info *info) } } // namespace nv50_ir - -static nir_shader_compiler_options -nvir_nir_shader_compiler_options(int chipset) -{ - nir_shader_compiler_options op = {}; - op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET); - op.lower_ffma = false; - op.fuse_ffma = false; /* nir doesn't track mad vs fma */ - op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET); - op.lower_flrp32 = true; - op.lower_flrp64 = true; - op.lower_fpow = false; // TODO: nir's lowering is broken, or we could use it - op.lower_fsat = false; - op.lower_fsqrt = false; // TODO: only before gm200 - op.lower_sincos = false; - op.lower_fmod = true; - op.lower_bitfield_extract = false; - op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET); - op.lower_bitfield_insert = false; - op.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET); - op.lower_bitfield_insert_to_bitfield_select = false; - op.lower_bitfield_reverse = false; - op.lower_bit_count = false; - op.lower_ifind_msb = false; - op.lower_find_lsb = false; - op.lower_uadd_carry = true; // TODO - op.lower_usub_borrow = true; // TODO - op.lower_mul_high = false; - op.lower_negate = false; - op.lower_sub = true; - op.lower_scmp = true; // TODO: not implemented yet - op.lower_vector_cmp = false; - op.lower_idiv = true; - op.lower_bitops = false; - op.lower_isign = (chipset >= NVISA_GV100_CHIPSET); - op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET); - op.lower_fdph = false; - op.lower_fdot = false; - op.fdot_replicates = false; // TODO - op.lower_ffloor = false; // TODO - op.lower_ffract = true; - op.lower_fceil = false; // TODO - op.lower_ftrunc = false; - op.lower_ldexp = true; - op.lower_pack_half_2x16 = true; - op.lower_pack_unorm_2x16 = true; - op.lower_pack_snorm_2x16 = true; - op.lower_pack_unorm_4x8 = true; - op.lower_pack_snorm_4x8 = true; - op.lower_unpack_half_2x16 = true; - op.lower_unpack_unorm_2x16 = true; - op.lower_unpack_snorm_2x16 = true; - op.lower_unpack_unorm_4x8 = true; - op.lower_unpack_snorm_4x8 = true; - op.lower_pack_split = false; - op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET); - op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET); - op.lower_all_io_to_temps = false; - op.lower_all_io_to_elements = false; - op.vertex_id_zero_based = false; - op.lower_base_vertex = false; - op.lower_helper_invocation = false; - op.optimize_sample_mask_in = false; - op.lower_cs_local_index_from_id = true; - op.lower_cs_local_id_from_index = false; - op.lower_device_index_to_zero = false; // TODO - op.lower_wpos_pntc = false; // TODO - op.lower_hadd = true; // TODO - op.lower_add_sat = true; // TODO - op.vectorize_io = false; - op.lower_to_scalar = false; - op.unify_interfaces = false; - op.use_interpolated_input_intrinsics = true; - op.lower_mul_2x32_64 = true; // TODO - op.lower_rotate = (chipset < NVISA_GV100_CHIPSET); - op.has_imul24 = false; - op.intel_vec4 = false; - op.max_unroll_iterations = 32; - op.lower_int64_options = (nir_lower_int64_options) ( - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) | - nir_lower_divmod64 | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) | - ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) | - nir_lower_ufind_msb64 - ); - op.lower_doubles_options = (nir_lower_doubles_options) ( - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) | - nir_lower_dmod | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0) - ); - return op; -} - -static const nir_shader_compiler_options gf100_nir_shader_compiler_options = -nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET); -static const nir_shader_compiler_options gm107_nir_shader_compiler_options = -nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET); -static const nir_shader_compiler_options gv100_nir_shader_compiler_options = -nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET); - -const nir_shader_compiler_options * -nv50_ir_nir_shader_compiler_options(int chipset) -{ - if (chipset >= NVISA_GV100_CHIPSET) - return &gv100_nir_shader_compiler_options; - if (chipset >= NVISA_GM107_CHIPSET) - return &gm107_nir_shader_compiler_options; - return &gf100_nir_shader_compiler_options; -} diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 3fd76f6..60f3d58 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -3401,7 +3401,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) // ReadInvocationARB(src, findLSB(ballot(true))) val0 = getScratch(); mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY; - mkOp1(OP_BREV, TYPE_U32, val0, val0); + mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000)) + ->subOp = NV50_IR_SUBOP_EXTBF_REV; mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT; src1 = val0; /* fallthrough */ @@ -3819,7 +3820,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); val0 = getScratch(); - mkOp1(OP_BREV, TYPE_U32, val0, src0); + geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000)); + geni->subOp = NV50_IR_SUBOP_EXTBF_REV; geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0); geni->subOp = NV50_IR_SUBOP_BFIND_SAMT; } @@ -3834,7 +3836,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_BREV: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); - mkOp1(OP_BREV, TYPE_U32, dst0[c], src0); + geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000)); + geni->subOp = NV50_IR_SUBOP_EXTBF_REV; } break; case TGSI_OPCODE_POPC: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp index 9fad1dc..49a5f3b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp @@ -239,8 +239,9 @@ GM107LoweringPass::handlePFETCH(Instruction *i) Value *tmp1 = bld.getScratch(); Value *tmp2 = bld.getScratch(); bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0)); - bld.mkOp3(OP_PERMT, TYPE_U32, tmp1, tmp0, bld.mkImm(0x4442), bld.mkImm(0)); - bld.mkOp3(OP_PERMT, TYPE_U32, tmp0, tmp0, bld.mkImm(0x4440), bld.mkImm(0)); + bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16)); + bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff)); + bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff)); if (i->getSrc(1)) bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1)); else diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h index dfa1d03..71e5ea6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h @@ -21,7 +21,6 @@ class GM107LegalizeSSA : public NVC0LegalizeSSA private: virtual bool visit(Instruction *); -protected: void handlePFETCH(Instruction *); void handleLOAD(Instruction *); }; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp deleted file mode 100644 index 644d492..0000000 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp +++ /dev/null @@ -1,481 +0,0 @@ -/* - * Copyright 2020 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#include "codegen/nv50_ir.h" -#include "codegen/nv50_ir_build_util.h" - -#include "codegen/nv50_ir_target_nvc0.h" -#include "codegen/nv50_ir_lowering_gv100.h" - -#include - -namespace nv50_ir { - -bool -GV100LegalizeSSA::handleCMP(Instruction *i) -{ - Value *pred = bld.getSSA(1, FILE_PREDICATE); - - bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred, - i->sType, bld.mkImm(0), i->getSrc(2))->ftz = i->ftz; - bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred); - return true; -} - -// NIR deals with most of these for us, but codegen generates more in pointer -// calculations from other lowering passes. -bool -GV100LegalizeSSA::handleIADD64(Instruction *i) -{ - Value *carry = bld.getSSA(1, FILE_PREDICATE); - Value *def[2] = { bld.getSSA(), bld.getSSA() }; - Value *src[2][2]; - - for (int s = 0; s < 2; s++) { - if (i->getSrc(s)->reg.size == 8) { - bld.mkSplit(src[s], 4, i->getSrc(s)); - } else { - src[s][0] = i->getSrc(s); - src[s][1] = bld.mkImm(0); - } - } - - bld.mkOp2(OP_ADD, TYPE_U32, def[0], src[0][0], src[1][0])-> - setFlagsDef(1, carry); - bld.mkOp2(OP_ADD, TYPE_U32, def[1], src[0][1], src[1][1])-> - setFlagsSrc(2, carry); - bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]); - return true; -} - -bool -GV100LegalizeSSA::handleIMAD_HIGH(Instruction *i) -{ - Value *def = bld.getSSA(8), *defs[2]; - Value *src2; - - if (i->srcExists(2) && - (!i->getSrc(2)->asImm() || i->getSrc(2)->asImm()->reg.data.u32)) { - Value *src2s[2] = { bld.getSSA(), bld.getSSA() }; - bld.mkMov(src2s[0], bld.mkImm(0)); - bld.mkMov(src2s[1], i->getSrc(2)); - src2 = bld.mkOp2(OP_MERGE, TYPE_U64, bld.getSSA(8), src2s[0], src2s[1])->getDef(0); - } else { - src2 = bld.mkImm(0); - } - - bld.mkOp3(OP_MAD, isSignedType(i->sType) ? TYPE_S64 : TYPE_U64, def, - i->getSrc(0), i->getSrc(1), src2); - - bld.mkSplit(defs, 4, def); - i->def(0).replace(defs[1], false); - return true; -} - -// XXX: We should be able to do this in GV100LoweringPass, but codegen messes -// up somehow and swaps the condcode without swapping the sources. -// - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test -bool -GV100LegalizeSSA::handleIMNMX(Instruction *i) -{ - Value *pred = bld.getSSA(1, FILE_PREDICATE); - - bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, i->dType, pred, - i->sType, i->getSrc(0), i->getSrc(1)); - bld.mkOp3(OP_SELP, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), pred); - return true; -} - -bool -GV100LegalizeSSA::handleIMUL(Instruction *i) -{ - if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) - return handleIMAD_HIGH(i); - - bld.mkOp3(OP_MAD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), - bld.mkImm(0)); - return true; -} - -bool -GV100LegalizeSSA::handleLOP2(Instruction *i) -{ - uint8_t src0 = NV50_IR_SUBOP_LOP3_LUT_SRC0; - uint8_t src1 = NV50_IR_SUBOP_LOP3_LUT_SRC1; - uint8_t subOp; - - if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) - src0 = ~src0; - if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) - src1 = ~src1; - - switch (i->op) { - case OP_AND: subOp = src0 & src1; break; - case OP_OR : subOp = src0 | src1; break; - case OP_XOR: subOp = src0 ^ src1; break; - default: - assert(!"invalid LOP2 opcode"); - break; - } - - bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), - bld.mkImm(0))->subOp = subOp; - return true; -} - -bool -GV100LegalizeSSA::handleNOT(Instruction *i) -{ - bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), bld.mkImm(0), i->getSrc(0), - bld.mkImm(0))->subOp = (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1; - return true; -} - -bool -GV100LegalizeSSA::handlePREEX2(Instruction *i) -{ - i->def(0).replace(i->src(0), false); - return true; -} - -bool -GV100LegalizeSSA::handleQUADON(Instruction *i) -{ - handleSHFL(i); // Inserts OP_WARPSYNC - return true; -} - -bool -GV100LegalizeSSA::handleQUADPOP(Instruction *i) -{ - return true; -} - -bool -GV100LegalizeSSA::handleSET(Instruction *i) -{ - Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL; - Value *pred = bld.getSSA(1, FILE_PREDICATE), *met; - Instruction *xsetp; - - if (isFloatType(i->dType)) { - if (i->sType == TYPE_F32) - return false; // HW has FSET.BF - met = bld.mkImm(0x3f800000); - } else { - met = bld.mkImm(0xffffffff); - } - - xsetp = bld.mkCmp(i->op, i->asCmp()->setCond, TYPE_U8, pred, i->sType, - i->getSrc(0), i->getSrc(1)); - xsetp->src(0).mod = i->src(0).mod; - xsetp->src(1).mod = i->src(1).mod; - xsetp->setSrc(2, src2); - xsetp->ftz = i->ftz; - - i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred); - i->src(2).mod = Modifier(NV50_IR_MOD_NOT); - return true; -} - -bool -GV100LegalizeSSA::handleSHFL(Instruction *i) -{ - Instruction *sync = new_Instruction(func, OP_WARPSYNC, TYPE_NONE); - sync->fixed = 1; - sync->setSrc(0, bld.mkImm(0xffffffff)); - i->bb->insertBefore(i, sync); - return false; -} - -bool -GV100LegalizeSSA::handleShift(Instruction *i) -{ - Value *zero = bld.mkImm(0); - Value *src1 = i->getSrc(1); - Value *src0, *src2; - uint8_t subOp = i->op == OP_SHL ? NV50_IR_SUBOP_SHF_L : NV50_IR_SUBOP_SHF_R; - - if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR) { - src0 = i->getSrc(0); - src2 = zero; - } else { - src0 = zero; - src2 = i->getSrc(0); - subOp |= NV50_IR_SUBOP_SHF_HI; - } - if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP) - subOp |= NV50_IR_SUBOP_SHF_W; - - bld.mkOp3(OP_SHF, i->dType, i->getDef(0), src0, src1, src2)->subOp = subOp; - return true; -} - -bool -GV100LegalizeSSA::handleSUB(Instruction *i) -{ - Instruction *xadd = - bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1)); - xadd->src(0).mod = i->src(0).mod; - xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG); - xadd->ftz = i->ftz; - return true; -} - -bool -GV100LegalizeSSA::visit(Instruction *i) -{ - bool lowered = false; - - bld.setPosition(i, false); - if (i->sType == TYPE_F32 && i->dType != TYPE_F16 && - prog->getType() != Program::TYPE_COMPUTE) - handleFTZ(i); - - switch (i->op) { - case OP_AND: - case OP_OR: - case OP_XOR: - if (i->def(0).getFile() != FILE_PREDICATE) - lowered = handleLOP2(i); - break; - case OP_NOT: - lowered = handleNOT(i); - break; - case OP_SHL: - case OP_SHR: - lowered = handleShift(i); - break; - case OP_SET: - case OP_SET_AND: - case OP_SET_OR: - case OP_SET_XOR: - if (i->def(0).getFile() != FILE_PREDICATE) - lowered = handleSET(i); - break; - case OP_SLCT: - lowered = handleCMP(i); - break; - case OP_PREEX2: - lowered = handlePREEX2(i); - break; - case OP_MUL: - if (!isFloatType(i->dType)) - lowered = handleIMUL(i); - break; - case OP_MAD: - if (!isFloatType(i->dType) && i->subOp == NV50_IR_SUBOP_MUL_HIGH) - lowered = handleIMAD_HIGH(i); - break; - case OP_SHFL: - lowered = handleSHFL(i); - break; - case OP_QUADON: - lowered = handleQUADON(i); - break; - case OP_QUADPOP: - lowered = handleQUADPOP(i); - break; - case OP_SUB: - lowered = handleSUB(i); - break; - case OP_MAX: - case OP_MIN: - if (!isFloatType(i->dType)) - lowered = handleIMNMX(i); - break; - case OP_ADD: - if (!isFloatType(i->dType) && typeSizeof(i->dType) == 8) - lowered = handleIADD64(i); - break; - case OP_PFETCH: - handlePFETCH(i); - break; - case OP_LOAD: - handleLOAD(i); - break; - default: - break; - } - - if (lowered) - delete_Instruction(prog, i); - - return true; -} - -bool -GV100LoweringPass::handleDMNMX(Instruction *i) -{ - Value *pred = bld.getSSA(1, FILE_PREDICATE); - Value *src0[2], *src1[2], *dest[2]; - - bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, TYPE_U32, pred, - i->sType, i->getSrc(0), i->getSrc(1)); - bld.mkSplit(src0, 4, i->getSrc(0)); - bld.mkSplit(src1, 4, i->getSrc(1)); - bld.mkSplit(dest, 4, i->getDef(0)); - bld.mkOp3(OP_SELP, TYPE_U32, dest[0], src0[0], src1[0], pred); - bld.mkOp3(OP_SELP, TYPE_U32, dest[1], src0[1], src1[1], pred); - bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), dest[0], dest[1]); - return true; -} - -bool -GV100LoweringPass::handleEXTBF(Instruction *i) -{ - Value *bit = bld.getScratch(); - Value *cnt = bld.getScratch(); - Value *mask = bld.getScratch(); - Value *zero = bld.mkImm(0); - - bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero); - bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero); - bld.mkOp2(OP_BMSK, TYPE_U32, mask, bit, cnt); - bld.mkOp2(OP_AND, TYPE_U32, mask, i->getSrc(0), mask); - bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), mask, bit); - if (isSignedType(i->dType)) - bld.mkOp2(OP_SGXT, TYPE_S32, i->getDef(0), i->getDef(0), cnt); - - return true; -} - -bool -GV100LoweringPass::handleFLOW(Instruction *i) -{ - i->op = OP_BRA; - return false; -} - -bool -GV100LoweringPass::handleI2I(Instruction *i) -{ - bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), i->sType, i->getSrc(0))-> - subOp = i->subOp; - bld.mkCvt(OP_CVT, i->dType, i->getDef(0), TYPE_F32, i->getDef(0)); - return true; -} - -bool -GV100LoweringPass::handleINSBF(Instruction *i) -{ - Value *bit = bld.getScratch(); - Value *cnt = bld.getScratch(); - Value *mask = bld.getScratch(); - Value *src0 = bld.getScratch(); - Value *zero = bld.mkImm(0); - - bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero); - bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero); - bld.mkOp2(OP_BMSK, TYPE_U32, mask, zero, cnt); - - bld.mkOp2(OP_AND, TYPE_U32, src0, i->getSrc(0), mask); - bld.mkOp2(OP_SHL, TYPE_U32, src0, src0, bit); - - bld.mkOp2(OP_SHL, TYPE_U32, mask, mask, bit); - bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), src0, i->getSrc(2), mask)-> - subOp = NV50_IR_SUBOP_LOP3_LUT(a | (b & ~c)); - - return true; -} - -bool -GV100LoweringPass::handlePINTERP(Instruction *i) -{ - Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL; - Instruction *ipa, *mul; - - ipa = bld.mkOp2(OP_LINTERP, TYPE_F32, i->getDef(0), i->getSrc(0), src2); - ipa->ipa = i->ipa; - mul = bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), i->getSrc(1)); - - if (i->getInterpMode() == NV50_IR_INTERP_SC) { - ipa->setDef(1, bld.getSSA(1, FILE_PREDICATE)); - mul->setPredicate(CC_NOT_P, ipa->getDef(1)); - } - - return true; -} - -bool -GV100LoweringPass::handlePREFLOW(Instruction *i) -{ - return true; -} - -bool -GV100LoweringPass::handlePRESIN(Instruction *i) -{ - const float f = 1.0 / (2.0 * 3.14159265); - bld.mkOp2(OP_MUL, i->dType, i->getDef(0), i->getSrc(0), bld.mkImm(f)); - return true; -} - -bool -GV100LoweringPass::visit(Instruction *i) -{ - bool lowered = false; - - bld.setPosition(i, false); - - switch (i->op) { - case OP_BREAK: - case OP_CONT: - lowered = handleFLOW(i); - break; - case OP_PREBREAK: - case OP_PRECONT: - lowered = handlePREFLOW(i); - break; - case OP_CVT: - if (i->src(0).getFile() != FILE_PREDICATE && - i->def(0).getFile() != FILE_PREDICATE && - !isFloatType(i->dType) && !isFloatType(i->sType)) - lowered = handleI2I(i); - break; - case OP_EXTBF: - lowered = handleEXTBF(i); - break; - case OP_INSBF: - lowered = handleINSBF(i); - break; - case OP_MAX: - case OP_MIN: - if (i->dType == TYPE_F64) - lowered = handleDMNMX(i); - break; - case OP_PINTERP: - lowered = handlePINTERP(i); - break; - case OP_PRESIN: - lowered = handlePRESIN(i); - break; - default: - break; - } - - if (lowered) - delete_Instruction(prog, i); - - return true; -} - -} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h deleted file mode 100644 index d918c6e..0000000 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2020 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef __NV50_IR_LOWERING_GV100_H__ -#define __NV50_IR_LOWERING_GV100_H__ -#include "codegen/nv50_ir_lowering_gm107.h" - -namespace nv50_ir { - -class GV100LoweringPass : public Pass -{ -public: - GV100LoweringPass(Program *p) { - bld.setProgram(p); - } - -private: - BuildUtil bld; - - virtual bool visit(Instruction *); - - bool handleDMNMX(Instruction *); - bool handleEXTBF(Instruction *); - bool handleFLOW(Instruction *); - bool handleI2I(Instruction *); - bool handleINSBF(Instruction *); - bool handlePINTERP(Instruction *); - bool handlePREFLOW(Instruction *); - bool handlePRESIN(Instruction *); -}; - -class GV100LegalizeSSA : public GM107LegalizeSSA -{ -public: - GV100LegalizeSSA(Program *p) { - bld.setProgram(p); - } - -private: - virtual bool visit(Function *) { return true; } - virtual bool visit(BasicBlock *) { return true; } - virtual bool visit(Instruction *); - - bool handleCMP(Instruction *); - bool handleIADD64(Instruction *); - bool handleIMAD_HIGH(Instruction *); - bool handleIMNMX(Instruction *); - bool handleIMUL(Instruction *); - bool handleLOP2(Instruction *); - bool handleNOT(Instruction *); - bool handlePREEX2(Instruction *); - bool handleQUADON(Instruction *); - bool handleQUADPOP(Instruction *); - bool handleSET(Instruction *); - bool handleSHFL(Instruction *); - bool handleShift(Instruction *); - bool handleSUB(Instruction *); -}; -} -#endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 067f9ab..a608810 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -310,14 +310,6 @@ NVC0LegalizeSSA::handleSET(CmpInstruction *cmp) cmp->sType = hTy; } -void -NVC0LegalizeSSA::handleBREV(Instruction *i) -{ - i->op = OP_EXTBF; - i->subOp = NV50_IR_SUBOP_EXTBF_REV; - i->setSrc(1, bld.mkImm(0x2000)); -} - bool NVC0LegalizeSSA::visit(Function *fn) { @@ -362,9 +354,6 @@ NVC0LegalizeSSA::visit(BasicBlock *bb) if (typeSizeof(i->sType) == 8 && i->sType != TYPE_F64) handleSET(i->asCmp()); break; - case OP_BREV: - handleBREV(i); - break; default: break; } @@ -867,11 +856,11 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) next = hi; } - if (i->op != OP_MOV && i->op != OP_PFETCH) - replaceZero(i); - if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS) replaceCvt(i); + + if (i->op != OP_MOV && i->op != OP_PFETCH) + replaceZero(i); } } if (!bb->getEntry()) @@ -898,8 +887,6 @@ NVC0LoweringPass::visit(Function *fn) gpEmitAddress = bld.loadImm(NULL, 0)->asLValue(); if (fn->cfgExit) { bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false); - if (prog->getTarget()->getChipset() >= NVISA_GV100_CHIPSET) - bld.mkOp1(OP_FINAL, TYPE_NONE, NULL, gpEmitAddress)->fixed = 1; bld.mkMovToReg(0, gpEmitAddress); } } @@ -1727,8 +1714,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) cctl->setPredicate(cas->cc, cas->getPredicate()); } - if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS && - targ->getChipset() < NVISA_GV100_CHIPSET) { + if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS) { // CAS is crazy. It's 2nd source is a double reg, and the 3rd source // should be set to the high part of the double reg or bad things will // happen elsewhere in the universe. diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 8c99427..b4c405a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -64,14 +64,12 @@ private: void handleDIV(Instruction *); // integer division, modulus void handleRCPRSQLib(Instruction *, Value *[]); void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt + void handleFTZ(Instruction *); void handleSET(CmpInstruction *); void handleTEXLOD(TexInstruction *); void handleShift(Instruction *); - void handleBREV(Instruction *); protected: - void handleFTZ(Instruction *); - BuildUtil bld; }; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 3a4ec3c..2f46b0e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -558,19 +558,6 @@ ConstantFolding::expr(Instruction *i, memset(&res.data, 0, sizeof(res.data)); switch (i->op) { - case OP_SGXT: { - int bits = b->data.u32; - if (bits) { - uint32_t data = a->data.u32 & (0xffffffff >> (32 - bits)); - if (bits < 32 && (data & (1 << (bits - 1)))) - data = data - (1 << bits); - res.data.u32 = data; - } - break; - } - case OP_BMSK: - res.data.u32 = ((1 << b->data.u32) - 1) << a->data.u32; - break; case OP_MAD: case OP_FMA: case OP_MUL: @@ -793,23 +780,6 @@ ConstantFolding::expr(Instruction *i, memset(&res.data, 0, sizeof(res.data)); switch (i->op) { - case OP_LOP3_LUT: - for (int n = 0; n < 32; n++) { - uint8_t lut = ((a->data.u32 >> n) & 1) << 2 | - ((b->data.u32 >> n) & 1) << 1 | - ((c->data.u32 >> n) & 1); - res.data.u32 |= !!(i->subOp & (1 << lut)) << n; - } - break; - case OP_PERMT: - if (!i->subOp) { - uint64_t input = (uint64_t)c->data.u32 << 32 | a->data.u32; - uint16_t permt = b->data.u32; - for (int n = 0 ; n < 4; n++, permt >>= 4) - res.data.u32 |= ((input >> ((permt & 0xf) * 8)) & 0xff) << n * 8; - } else - return; - break; case OP_INSBF: { int offset = b->data.u32 & 0xff; int width = (b->data.u32 >> 8) & 0xff; @@ -1556,12 +1526,6 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->subOp = 0; break; } - case OP_BREV: { - uint32_t res = util_bitreverse(imm0.reg.data.u32); - i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res)); - i->op = OP_MOV; - break; - } case OP_POPCNT: { // Only deal with 1-arg POPCNT here if (i->srcExists(1)) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index ce0d250..5dcbf3c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -93,10 +93,8 @@ const char *operationStr[OP_LAST + 1] = "and", "or", "xor", - "lop3 lut", "shl", "shr", - "shf", "max", "min", "sat", @@ -144,7 +142,6 @@ const char *operationStr[OP_LAST + 1] = "pinterp", "emit", "restart", - "final", "tex", "texbias", "texlod", @@ -180,10 +177,7 @@ const char *operationStr[OP_LAST + 1] = "insbf", "extbf", "bfind", - "brev", - "bmsk", "permt", - "sgxt", "atom", "bar", "vadd", @@ -199,7 +193,6 @@ const char *operationStr[OP_LAST + 1] = "shfl", "vote", "bufq", - "warpsync", "(invalid)" }; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 4e5b21d..6df2664 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -988,8 +988,6 @@ GCRA::coalesce(ArrayList& insns) case 0x110: case 0x120: case 0x130: - case 0x140: - case 0x160: ret = doCoalesce(insns, JOIN_MASK_UNION); break; default: @@ -2299,26 +2297,14 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) if (isTextureOp(tex->op)) textureMask(tex); - if (targ->getChipset() < NVISA_GV100_CHIPSET) { - if (isScalarTexGM107(tex)) { - handleScalarTexGM107(tex); - return; - } - - assert(!tex->tex.scalar); - condenseDefs(tex); - } else { - if (isTextureOp(tex->op)) { - int defCount = tex->defCount(0xff); - if (defCount > 3) - condenseDefs(tex, 2, 3); - if (defCount > 1) - condenseDefs(tex, 0, 1); - } else { - condenseDefs(tex); - } + if (isScalarTexGM107(tex)) { + handleScalarTexGM107(tex); + return; } + assert(!tex->tex.scalar); + condenseDefs(tex); + if (isSurfaceOp(tex->op)) { int s = tex->tex.target.getDim() + (tex->tex.target.isArray() || tex->tex.target.isCube()); @@ -2499,8 +2485,6 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) case 0x110: case 0x120: case 0x130: - case 0x140: - case 0x160: texConstraintGM107(tex); break; default: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h deleted file mode 100644 index 54443ae..0000000 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h +++ /dev/null @@ -1,156 +0,0 @@ -#ifndef __NV50_IR_SCHED_GM107_H__ -#define __NV50_IR_SCHED_GM107_H__ -namespace nv50_ir { - -class SchedDataCalculatorGM107 : public Pass -{ -public: - SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {} - -private: - struct RegScores - { - struct ScoreData { - int r[256]; - int p[8]; - int c; - } rd, wr; - int base; - - void rebase(const int base) - { - const int delta = this->base - base; - if (!delta) - return; - this->base = 0; - - for (int i = 0; i < 256; ++i) { - rd.r[i] += delta; - wr.r[i] += delta; - } - for (int i = 0; i < 8; ++i) { - rd.p[i] += delta; - wr.p[i] += delta; - } - rd.c += delta; - wr.c += delta; - } - void wipe() - { - memset(&rd, 0, sizeof(rd)); - memset(&wr, 0, sizeof(wr)); - } - int getLatest(const ScoreData& d) const - { - int max = 0; - for (int i = 0; i < 256; ++i) - if (d.r[i] > max) - max = d.r[i]; - for (int i = 0; i < 8; ++i) - if (d.p[i] > max) - max = d.p[i]; - if (d.c > max) - max = d.c; - return max; - } - inline int getLatestRd() const - { - return getLatest(rd); - } - inline int getLatestWr() const - { - return getLatest(wr); - } - inline int getLatest() const - { - return MAX2(getLatestRd(), getLatestWr()); - } - void setMax(const RegScores *that) - { - for (int i = 0; i < 256; ++i) { - rd.r[i] = MAX2(rd.r[i], that->rd.r[i]); - wr.r[i] = MAX2(wr.r[i], that->wr.r[i]); - } - for (int i = 0; i < 8; ++i) { - rd.p[i] = MAX2(rd.p[i], that->rd.p[i]); - wr.p[i] = MAX2(wr.p[i], that->wr.p[i]); - } - rd.c = MAX2(rd.c, that->rd.c); - wr.c = MAX2(wr.c, that->wr.c); - } - void print(int cycle) - { - for (int i = 0; i < 256; ++i) { - if (rd.r[i] > cycle) - INFO("rd $r%i @ %i\n", i, rd.r[i]); - if (wr.r[i] > cycle) - INFO("wr $r%i @ %i\n", i, wr.r[i]); - } - for (int i = 0; i < 8; ++i) { - if (rd.p[i] > cycle) - INFO("rd $p%i @ %i\n", i, rd.p[i]); - if (wr.p[i] > cycle) - INFO("wr $p%i @ %i\n", i, wr.p[i]); - } - if (rd.c > cycle) - INFO("rd $c @ %i\n", rd.c); - if (wr.c > cycle) - INFO("wr $c @ %i\n", wr.c); - } - }; - - RegScores *score; // for current BB - std::vector scoreBoards; - - const TargetGM107 *targ; - bool visit(Function *); - bool visit(BasicBlock *); - - void commitInsn(const Instruction *, int); - int calcDelay(const Instruction *, int) const; - void setDelay(Instruction *, int, const Instruction *); - void recordWr(const Value *, int, int); - void checkRd(const Value *, int, int&) const; - - inline void emitYield(Instruction *); - inline void emitStall(Instruction *, uint8_t); - inline void emitReuse(Instruction *, uint8_t); - inline void emitWrDepBar(Instruction *, uint8_t); - inline void emitRdDepBar(Instruction *, uint8_t); - inline void emitWtDepBar(Instruction *, uint8_t); - - inline int getStall(const Instruction *) const; - inline int getWrDepBar(const Instruction *) const; - inline int getRdDepBar(const Instruction *) const; - inline int getWtDepBar(const Instruction *) const; - - void setReuseFlag(Instruction *); - - inline void printSchedInfo(int, const Instruction *) const; - - struct LiveBarUse { - LiveBarUse(Instruction *insn, Instruction *usei) - : insn(insn), usei(usei) { } - Instruction *insn; - Instruction *usei; - }; - - struct LiveBarDef { - LiveBarDef(Instruction *insn, Instruction *defi) - : insn(insn), defi(defi) { } - Instruction *insn; - Instruction *defi; - }; - - bool insertBarriers(BasicBlock *); - - bool doesInsnWriteTo(const Instruction *insn, const Value *val) const; - Instruction *findFirstUse(const Instruction *) const; - Instruction *findFirstDef(const Instruction *) const; - - bool needRdDepBar(const Instruction *) const; - bool needWrDepBar(const Instruction *) const; -}; - -}; // namespace nv50_ir -#endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 765375a..5c6d057 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -33,7 +33,7 @@ const uint8_t Target::operationSrcNr[] = 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD 3, 3, // SHLADD, XMAD 1, 1, 1, // ABS, NEG, NOT - 2, 2, 2, 3, 2, 2, 3, // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF + 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR 2, 2, 1, // MAX, MIN, SAT 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT @@ -43,7 +43,7 @@ const uint8_t Target::operationSrcNr[] = 0, 0, 0, // PRERET,CONT,BREAK 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP - 1, 1, 1, // EMIT, RESTART, FINAL + 1, 1, // EMIT, RESTART 1, 1, 1, // TEX, TXB, TXL, 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA @@ -51,15 +51,13 @@ const uint8_t Target::operationSrcNr[] = 0, // TEXBAR 1, 1, // DFDX, DFDY 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP - 2, 3, 2, 1, 1, 2, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT - 2, // SGXT + 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT 2, 2, // ATOM, BAR 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL 3, // SHFL 1, // VOTE 1, // BUFQ - 1, // WARPSYNC 0 }; @@ -77,10 +75,10 @@ const OpClass Target::operationClass[] = OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, - // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF + // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR OPCLASS_CONVERT, OPCLASS_CONVERT, - OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, - OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT, + OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, + OPCLASS_SHIFT, OPCLASS_SHIFT, // MAX, MIN OPCLASS_COMPARE, OPCLASS_COMPARE, // SAT, CEIL, FLOOR, TRUNC; CVT @@ -105,8 +103,8 @@ const OpClass Target::operationClass[] = OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE, // LINTERP, PINTERP OPCLASS_SFU, OPCLASS_SFU, - // EMIT, RESTART, FINAL - OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL, + // EMIT, RESTART + OPCLASS_CONTROL, OPCLASS_CONTROL, // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, @@ -121,9 +119,9 @@ const OpClass Target::operationClass[] = // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, - // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT - OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, + // POPCNT, INSBF, EXTBF, BFIND; PERMT OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, + OPCLASS_BITFIELD, // ATOM, BAR OPCLASS_ATOMIC, OPCLASS_CONTROL, // VADD, VAVG, VMIN, VMAX @@ -138,13 +136,10 @@ const OpClass Target::operationClass[] = OPCLASS_OTHER, // BUFQ OPCLASS_OTHER, - // WARPSYNC - OPCLASS_OTHER, OPCLASS_PSEUDO // LAST }; -extern Target *getTargetGV100(unsigned int chipset); extern Target *getTargetGM107(unsigned int chipset); extern Target *getTargetNVC0(unsigned int chipset); extern Target *getTargetNV50(unsigned int chipset); @@ -154,9 +149,6 @@ Target *Target::create(unsigned int chipset) STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1); STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1); switch (chipset & ~0xf) { - case 0x160: - case 0x140: - return getTargetGV100(chipset); case 0x110: case 0x120: case 0x130: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h index 0f7db11..afeca14 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h @@ -200,7 +200,7 @@ public: uint8_t dstMods; uint16_t srcFiles[3]; uint16_t dstFiles; - unsigned int minEncSize : 5; + unsigned int minEncSize : 4; unsigned int vector : 1; unsigned int predicate : 1; unsigned int commutative : 1; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp deleted file mode 100644 index fd969e1..0000000 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp +++ /dev/null @@ -1,594 +0,0 @@ -/* - * Copyright 2020 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#include "codegen/nv50_ir_target_gv100.h" -#include "codegen/nv50_ir_lowering_gv100.h" -#include "codegen/nv50_ir_emit_gv100.h" - -namespace nv50_ir { - -void -TargetGV100::initOpInfo() -{ - unsigned int i, j; - - static const operation commutative[] = - { - OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_MAX, OP_MIN, - OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT - }; - - static const operation noDest[] = - { - OP_EXIT - }; - - static const operation noPred[] = - { - }; - - for (i = 0; i < DATA_FILE_COUNT; ++i) - nativeFileMap[i] = (DataFile)i; - nativeFileMap[FILE_ADDRESS] = FILE_GPR; - nativeFileMap[FILE_FLAGS] = FILE_PREDICATE; - - for (i = 0; i < OP_LAST; ++i) { - opInfo[i].variants = NULL; - opInfo[i].op = (operation)i; - opInfo[i].srcTypes = 1 << (int)TYPE_F32; - opInfo[i].dstTypes = 1 << (int)TYPE_F32; - opInfo[i].immdBits = 0; - opInfo[i].srcNr = operationSrcNr[i]; - - for (j = 0; j < opInfo[i].srcNr; ++j) { - opInfo[i].srcMods[j] = 0; - opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR; - } - opInfo[i].dstMods = 0; - opInfo[i].dstFiles = 1 << (int)FILE_GPR; - - opInfo[i].hasDest = 1; - opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA); - opInfo[i].commutative = false; /* set below */ - opInfo[i].pseudo = (i < OP_MOV); - opInfo[i].predicate = !opInfo[i].pseudo; - opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN); - opInfo[i].minEncSize = 16; - } - for (i = 0; i < ARRAY_SIZE(commutative); ++i) - opInfo[commutative[i]].commutative = true; - for (i = 0; i < ARRAY_SIZE(noDest); ++i) - opInfo[noDest[i]].hasDest = 0; - for (i = 0; i < ARRAY_SIZE(noPred); ++i) - opInfo[noPred[i]].predicate = 0; -} - -struct opInfo { - struct { - uint8_t files; - uint8_t mods; - } src[3]; -}; - -#define SRC_NONE 0 -#define SRC_R (1 << FILE_GPR) -#define SRC_I (1 << FILE_MEMORY_CONST) -#define SRC_C (1 << FILE_IMMEDIATE) -#define SRC_RC (SRC_R | SRC_C) -#define SRC_RI (SRC_R | SRC_I ) -#define SRC_RIC (SRC_R | SRC_I | SRC_C) - -#define MOD_NONE 0 -#define MOD_NEG NV50_IR_MOD_NEG -#define MOD_ABS NV50_IR_MOD_ABS -#define MOD_NOT NV50_IR_MOD_NOT -#define MOD_NA (MOD_NEG | MOD_ABS) - -#define OPINFO(O,SA,MA,SB,MB,SC,MC) \ -static struct opInfo \ -opInfo_##O = { \ - .src = { { SRC_##SA, MOD_##MA }, \ - { SRC_##SB, MOD_##MB }, \ - { SRC_##SC, MOD_##MC }}, \ -}; - - -/* Handled by GV100LegalizeSSA. */ -OPINFO(FABS , RIC , NA , NONE, NONE, NONE, NONE); -OPINFO(FCMP , R , NONE, RIC , NONE, RIC , NONE); //XXX: use FSEL for mods -OPINFO(FNEG , RIC , NA , NONE, NONE, NONE, NONE); -OPINFO(FSET , R , NA , RIC , NA , NONE, NONE); -OPINFO(ICMP , R , NONE, RIC , NONE, RIC , NONE); -OPINFO(IMUL , R , NONE, RIC , NONE, NONE, NONE); -OPINFO(INEG , RIC , NEG , NONE, NONE, NONE, NONE); -OPINFO(ISET , R , NONE, RIC , NONE, NONE, NONE); -OPINFO(LOP2 , R , NOT , RIC , NOT , NONE, NONE); -OPINFO(NOT , RIC , NONE, NONE, NONE, NONE, NONE); -OPINFO(SAT , RIC , NA , NONE, NONE, NONE, NONE); -OPINFO(SHL , RIC , NONE, RIC , NONE, NONE, NONE); -OPINFO(SHR , RIC , NONE, RIC , NONE, NONE, NONE); -OPINFO(SUB , R , NONE, RIC , NEG , NONE, NONE); -OPINFO(IMNMX , R , NONE, RIC , NONE, NONE, NONE); - -/* Handled by CodeEmitterGV100. */ -OPINFO(AL2P , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(ALD , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(AST , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(ATOM , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(ATOMS , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(BAR , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(BRA , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(BMSK , R , NONE, RIC , NONE, NONE, NONE); -OPINFO(BREV , RIC , NONE, NONE, NONE, NONE, NONE); -OPINFO(CCTL , NONE, NONE, NONE, NONE, NONE, NONE); -//OPINFO(CS2R , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(DADD , R , NA , RIC , NA , NONE, NONE); -OPINFO(DFMA , R , NA , RIC , NA , RIC , NA ); -OPINFO(DMUL , R , NA , RIC , NA , NONE, NONE); -OPINFO(DSETP , R , NA , RIC , NA , NONE, NONE); -OPINFO(EXIT , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(F2F , RIC , NA , NONE, NONE, NONE, NONE); -OPINFO(F2I , RIC , NA , NONE, NONE, NONE, NONE); -OPINFO(FADD , R , NA , RIC , NA , NONE, NONE); -OPINFO(FFMA , R , NA , RIC , NA , RIC , NA ); -OPINFO(FLO , RIC , NOT , NONE, NONE, NONE, NONE); -OPINFO(FMNMX , R , NA , RIC , NA , NONE, NONE); -OPINFO(FMUL , R , NA , RIC , NA , NONE, NONE); -OPINFO(FRND , RIC , NA , NONE, NONE, NONE, NONE); -OPINFO(FSET_BF , R , NA , RIC , NA , NONE, NONE); -OPINFO(FSETP , R , NA , RIC , NA , NONE, NONE); -OPINFO(FSWZADD , R , NONE, R , NONE, NONE, NONE); -OPINFO(I2F , RIC , NONE, NONE, NONE, NONE, NONE); -OPINFO(IABS , RIC , NONE, NONE, NONE, NONE, NONE); -OPINFO(IADD3 , R , NEG , RIC , NEG , R , NEG ); -OPINFO(IMAD , R , NONE, RIC , NONE, RIC , NEG ); -OPINFO(IMAD_WIDE, R , NONE, RIC , NONE, RC , NEG ); -OPINFO(IPA , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(ISBERD , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(ISETP , R , NONE, RIC , NONE, NONE, NONE); -OPINFO(KILL , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(LD , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(LDC , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(LDL , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(LDS , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(LEA , R , NEG , I , NONE, RIC , NEG ); -OPINFO(LOP3_LUT , R , NONE, RIC , NONE, R , NONE); -OPINFO(MEMBAR , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(MOV , RIC , NONE, NONE, NONE, NONE, NONE); -OPINFO(MUFU , RIC , NA , NONE, NONE, NONE, NONE); -OPINFO(NOP , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(OUT , R , NONE, RI , NONE, NONE, NONE); -OPINFO(PIXLD , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(PLOP3_LUT, NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(POPC , RIC , NOT , NONE, NONE, NONE, NONE); -OPINFO(PRMT , R , NONE, RIC , NONE, RIC , NONE); -OPINFO(RED , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(SGXT , R , NONE, RIC , NONE, NONE, NONE); -OPINFO(S2R , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(SEL , R , NONE, RIC , NONE, NONE, NONE); -OPINFO(SHF , R , NONE, RIC , NONE, RIC , NONE); -OPINFO(SHFL , R , NONE, R , NONE, R , NONE); -OPINFO(ST , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(STL , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(STS , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(SUATOM , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(SULD , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(SUST , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(TEX , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(TLD , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(TLD4 , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(TMML , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(TXD , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(TXQ , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(VOTE , NONE, NONE, NONE, NONE, NONE, NONE); -OPINFO(WARPSYNC , R , NONE, NONE, NONE, NONE, NONE); - -static const struct opInfo * -getOpInfo(const Instruction *i) -{ - switch (i->op) { - case OP_ABS: - if (isFloatType(i->dType)) - return &opInfo_FABS; - return &opInfo_IABS; - case OP_ADD: - if (isFloatType(i->dType)) { - if (i->dType == TYPE_F32) - return &opInfo_FADD; - else - return &opInfo_DADD; - } else { - return &opInfo_IADD3; - } - break; - case OP_AFETCH: return &opInfo_AL2P; - case OP_AND: - case OP_OR: - case OP_XOR: - if (i->def(0).getFile() == FILE_PREDICATE) - return &opInfo_PLOP3_LUT; - return &opInfo_LOP2; - case OP_ATOM: - if (i->src(0).getFile() == FILE_MEMORY_SHARED) - return &opInfo_ATOMS; - else - if (!i->defExists(0) && i->subOp < NV50_IR_SUBOP_ATOM_CAS) - return &opInfo_RED; - else - return &opInfo_ATOM; - break; - case OP_BAR: return &opInfo_BAR; - case OP_BFIND: return &opInfo_FLO; - case OP_BMSK: return &opInfo_BMSK; - case OP_BREV: return &opInfo_BREV; - case OP_BRA: - case OP_JOIN: return &opInfo_BRA; //XXX - case OP_CCTL: return &opInfo_CCTL; - case OP_CEIL: - case OP_CVT: - case OP_FLOOR: - case OP_TRUNC: - if (i->op == OP_CVT && (i->def(0).getFile() == FILE_PREDICATE || - i->src(0).getFile() == FILE_PREDICATE)) { - return &opInfo_MOV; - } else if (isFloatType(i->dType)) { - if (isFloatType(i->sType)) { - if (i->sType == i->dType) - return &opInfo_FRND; - else - return &opInfo_F2F; - } else { - return &opInfo_I2F; - } - } else { - if (isFloatType(i->sType)) - return &opInfo_F2I; - } - break; - case OP_COS: - case OP_EX2: - case OP_LG2: - case OP_RCP: - case OP_RSQ: - case OP_SIN: - case OP_SQRT: return &opInfo_MUFU; - case OP_DISCARD: return &opInfo_KILL; - case OP_EMIT: - case OP_FINAL: - case OP_RESTART: return &opInfo_OUT; - case OP_EXIT: return &opInfo_EXIT; - case OP_EXPORT: return &opInfo_AST; - case OP_FMA: - case OP_MAD: - if (isFloatType(i->dType)) { - if (i->dType == TYPE_F32) - return &opInfo_FFMA; - else - return &opInfo_DFMA; - } else { - if (typeSizeof(i->dType) != 8) - return &opInfo_IMAD; - else - return &opInfo_IMAD_WIDE; - } - break; - case OP_JOINAT: return &opInfo_NOP; //XXX - case OP_LINTERP: return &opInfo_IPA; - case OP_LOAD: - switch (i->src(0).getFile()) { - case FILE_MEMORY_CONST : return &opInfo_LDC; - case FILE_MEMORY_LOCAL : return &opInfo_LDL; - case FILE_MEMORY_SHARED: return &opInfo_LDS; - case FILE_MEMORY_GLOBAL: return &opInfo_LD; - default: - break; - } - break; - case OP_LOP3_LUT: return &opInfo_LOP3_LUT; - case OP_MAX: - case OP_MIN: - if (isFloatType(i->dType)) { - if (i->dType == TYPE_F32) - return &opInfo_FMNMX; - } else { - return &opInfo_IMNMX; - } - break; - case OP_MEMBAR: return &opInfo_MEMBAR; - case OP_MOV: return &opInfo_MOV; - case OP_MUL: - if (isFloatType(i->dType)) { - if (i->dType == TYPE_F32) - return &opInfo_FMUL; - else - return &opInfo_DMUL; - } - return &opInfo_IMUL; - case OP_NEG: - if (isFloatType(i->dType)) - return &opInfo_FNEG; - return &opInfo_INEG; - case OP_NOT: return &opInfo_NOT; - case OP_PERMT: return &opInfo_PRMT; - case OP_PFETCH: return &opInfo_ISBERD; - case OP_PIXLD: return &opInfo_PIXLD; - case OP_POPCNT: return &opInfo_POPC; - case OP_QUADOP: return &opInfo_FSWZADD; - case OP_RDSV: -#if 0 - if (targ->isCS2RSV(i->getSrc(0)->reg.data.sv.sv)) - return &opInfo_CS2R; -#endif - return &opInfo_S2R; - case OP_SAT: return &opInfo_SAT; - case OP_SELP: return &opInfo_SEL; - case OP_SET: - case OP_SET_AND: - case OP_SET_OR: - case OP_SET_XOR: - if (i->def(0).getFile() != FILE_PREDICATE) { - if (isFloatType(i->dType)) { - if (i->dType == TYPE_F32) - return &opInfo_FSET_BF; - } else { - if (isFloatType(i->sType)) - return &opInfo_FSET; - return &opInfo_ISET; - } - } else { - if (isFloatType(i->sType)) - if (i->sType == TYPE_F64) - return &opInfo_DSETP; - else - return &opInfo_FSETP; - else - return &opInfo_ISETP; - } - break; - case OP_SGXT: return &opInfo_SGXT; - case OP_SHF: return &opInfo_SHF; - case OP_SHFL: return &opInfo_SHFL; - case OP_SHL: return &opInfo_SHL; - case OP_SHLADD: return &opInfo_LEA; - case OP_SHR: return &opInfo_SHR; - case OP_SLCT: - if (isFloatType(i->sType)) - return &opInfo_FCMP; - return &opInfo_ICMP; - case OP_STORE: - switch (i->src(0).getFile()) { - case FILE_MEMORY_LOCAL : return &opInfo_STL; - case FILE_MEMORY_SHARED: return &opInfo_STS; - case FILE_MEMORY_GLOBAL: return &opInfo_ST; - default: - break; - } - break; - case OP_SUB: return &opInfo_SUB; - case OP_SULDB: - case OP_SULDP: return &opInfo_SULD; - case OP_SUREDB: - case OP_SUREDP: return &opInfo_SUATOM; - case OP_SUSTB: - case OP_SUSTP: return &opInfo_SUST; - case OP_TEX: - case OP_TXB: - case OP_TXL: return &opInfo_TEX; - case OP_TXD: return &opInfo_TXD; - case OP_TXF: return &opInfo_TLD; - case OP_TXG: return &opInfo_TLD4; - case OP_TXLQ: return &opInfo_TMML; - case OP_TXQ: return &opInfo_TXQ; - case OP_VFETCH: return &opInfo_ALD; - case OP_VOTE: return &opInfo_VOTE; - case OP_WARPSYNC: return &opInfo_WARPSYNC; - default: - break; - } - return NULL; -} - -bool -TargetGV100::isSatSupported(const Instruction *i) const -{ - switch (i->dType) { - case TYPE_F32: - switch (i->op) { - case OP_ADD: - case OP_FMA: - case OP_MAD: - case OP_MUL: return true; - default: - break; - } - break; - default: - break; - } - return false; -} - -bool -TargetGV100::isModSupported(const Instruction *i, int s, Modifier mod) const -{ - const struct opInfo *info = nv50_ir::getOpInfo(i); - uint8_t mods = 0; - if (info && s < (int)ARRAY_SIZE(info->src)) - mods = info->src[s].mods; - return (mod & Modifier(mods)) == mod; -} - -bool -TargetGV100::isOpSupported(operation op, DataType ty) const -{ - if (op == OP_MAD || op == OP_FMA) - return true; - if (ty == TYPE_F32) { - if (op == OP_MAX) - return true; - } - if (op == OP_RSQ) - return true; - if (op == OP_SET || - op == OP_SET_AND || - op == OP_SET_OR || - op == OP_SET_XOR) - return true; - if (op == OP_SHLADD) - return true; - return false; -} - -bool -TargetGV100::isBarrierRequired(const Instruction *i) const -{ - switch (i->op) { - case OP_BREV: - return true; - default: - break; - } - - return TargetGM107::isBarrierRequired(i); -} - -bool -TargetGV100::insnCanLoad(const Instruction *i, int s, - const Instruction *ld) const -{ - const struct opInfo *info = nv50_ir::getOpInfo(i); - uint16_t files = 0; - - if (ld->src(0).getFile() == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0) - return (!i->isPseudo() && - !i->asTex() && - i->op != OP_EXPORT && i->op != OP_STORE); - - if (ld->src(0).isIndirect(0)) - return false; - - if (info && s < (int)ARRAY_SIZE(info->src)) { - files = info->src[s].files; - if ((s == 1 && i->srcExists(2) && i->src(2).getFile() != FILE_GPR) || - (s == 2 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR)) { - files &= ~(1 << FILE_MEMORY_CONST); - files &= ~(1 << FILE_IMMEDIATE); - } else - if ((i->op == OP_SHL || i->op == OP_SHR) && - ((s == 0 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR) || - (s == 1 && i->srcExists(0) && i->src(0).getFile() != FILE_GPR))) { - files &= ~(1 << FILE_MEMORY_CONST); - files &= ~(1 << FILE_IMMEDIATE); - } - } - - if (ld->src(0).getFile() == FILE_IMMEDIATE) { - if (i->sType == TYPE_F64) { - if (ld->getSrc(0)->asImm()->reg.data.u64 & 0x00000000ffffffff) - return false; - } - } - - return (files & (1 << ld->src(0).getFile())); -} - -void -TargetGV100::getBuiltinCode(const uint32_t **code, uint32_t *size) const -{ - //XXX: find out why gv100 (tu1xx is fine) hangs without this - static uint32_t builtin[] = { - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, - }; - *code = builtin; - *size = sizeof(builtin); -} - -uint32_t -TargetGV100::getBuiltinOffset(int builtin) const -{ - return 0; -} - -bool -TargetGV100::runLegalizePass(Program *prog, CGStage stage) const -{ - if (stage == CG_STAGE_PRE_SSA) { - GM107LoweringPass pass1(prog); - GV100LoweringPass pass2(prog); - pass1.run(prog, false, true); - pass2.run(prog, false, true); - return true; - } else - if (stage == CG_STAGE_SSA) { - GV100LegalizeSSA pass(prog); - return pass.run(prog, false, true); - } else - if (stage == CG_STAGE_POST_RA) { - NVC0LegalizePostRA pass(prog); - return pass.run(prog, false, true); - } - return false; -} - -CodeEmitter * -TargetGV100::getCodeEmitter(Program::Type type) -{ - return new CodeEmitterGV100(this); -} - -TargetGV100::TargetGV100(unsigned int chipset) - : TargetGM107(chipset) -{ - initOpInfo(); -}; - -Target *getTargetGV100(unsigned int chipset) -{ - return new TargetGV100(chipset); -} - -}; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h deleted file mode 100644 index 897e6a2..0000000 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2020 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef __NV50_IR_TARGET_GV100_H__ -#define __NV50_IR_TARGET_GV100_H__ -#include "codegen/nv50_ir_target_gm107.h" - -namespace nv50_ir { - -class TargetGV100 : public TargetGM107 { -public: - TargetGV100(unsigned int chipset); - - virtual CodeEmitter *getCodeEmitter(Program::Type); - - virtual bool runLegalizePass(Program *, CGStage stage) const; - - virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const; - virtual uint32_t getBuiltinOffset(int builtin) const; - - virtual bool insnCanLoad(const Instruction *, int, const Instruction *) const; - virtual bool isOpSupported(operation, DataType) const; - virtual bool isModSupported(const Instruction *, int s, Modifier) const; - virtual bool isSatSupported(const Instruction *) const; - - virtual bool isBarrierRequired(const Instruction *) const; - -private: - void initOpInfo(); - void initProps(const struct opProperties *, int); -}; - -}; -#endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index ed5b343..60134b4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -30,7 +30,7 @@ Target *getTargetNVC0(unsigned int chipset) } TargetNVC0::TargetNVC0(unsigned int card) : - Target(card < 0x110, false, card >= 0xe4 && card < 0x140) + Target(card < 0x110, false, card >= 0xe4) { chipset = card; initOpInfo(); diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build index 68cfebd..7a1d18a 100644 --- a/src/gallium/drivers/nouveau/meson.build +++ b/src/gallium/drivers/nouveau/meson.build @@ -150,31 +150,17 @@ files_libnouveau = files( 'codegen/nv50_ir_util.cpp', 'codegen/nv50_ir_util.h', 'codegen/unordered_set.h', - 'codegen/nv50_ir_emit_gv100.cpp', - 'codegen/nv50_ir_emit_gv100.h', 'codegen/nv50_ir_emit_gk110.cpp', 'codegen/nv50_ir_emit_gm107.cpp', 'codegen/nv50_ir_emit_nvc0.cpp', - 'codegen/nv50_ir_lowering_gv100.cpp', - 'codegen/nv50_ir_lowering_gv100.h', 'codegen/nv50_ir_lowering_gm107.cpp', 'codegen/nv50_ir_lowering_gm107.h', 'codegen/nv50_ir_lowering_nvc0.cpp', 'codegen/nv50_ir_lowering_nvc0.h', - 'codegen/nv50_ir_target_gv100.cpp', - 'codegen/nv50_ir_target_gv100.h', 'codegen/nv50_ir_target_gm107.cpp', 'codegen/nv50_ir_target_gm107.h', 'codegen/nv50_ir_target_nvc0.cpp', 'codegen/nv50_ir_target_nvc0.h', - 'nvc0/cla0c0qmd.h', - 'nvc0/clc0c0qmd.h', - 'nvc0/clc3c0qmd.h', - 'nvc0/drf.h', - 'nvc0/qmd.h', - 'nvc0/qmda0c0.c', - 'nvc0/qmdc0c0.c', - 'nvc0/qmdc3c0.c', 'nvc0/gm107_texture.xml.h', 'nvc0/nvc0_3d.xml.h', 'nvc0/nvc0_compute.c', diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 8606ba4..de9cce3 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -188,11 +188,7 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) if (nv_dbg) nouveau_mesa_debug = atoi(nv_dbg); - if (dev->chipset < 0x140) - screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false); - else - screen->prefer_nir = true; - + screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false); screen->force_enable_cl = debug_get_bool_option("NOUVEAU_ENABLE_CL", false); if (screen->force_enable_cl) glsl_type_singleton_init_or_ref(); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h index 31e7cf8..899d73d 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h @@ -218,7 +218,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_2D_PATTERN_SELECT_BITMAP_1X64 0x00000002 #define NV50_2D_PATTERN_SELECT_COLOR 0x00000003 -#define NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE 0x000002b8 +#define NVC0_2D_UNK02B8(i0) (0x000002b8 + 0x4*(i0)) +#define NVC0_2D_UNK02B8__ESIZE 0x00000004 +#define NVC0_2D_UNK02B8__LEN 0x00000009 #define NVC0_2D_UNK2DC 0x000002dc diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h index fac195d..664bfae 100644 --- a/src/gallium/drivers/nouveau/nv_object.xml.h +++ b/src/gallium/drivers/nouveau/nv_object.xml.h @@ -195,8 +195,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define GM200_3D_CLASS 0x0000b197 #define GP100_3D_CLASS 0x0000c097 #define GP102_3D_CLASS 0x0000c197 -#define GV100_3D_CLASS 0x0000c397 -#define TU102_3D_CLASS 0x0000c597 #define NV50_2D_CLASS 0x0000502d #define NVC0_2D_CLASS 0x0000902d #define NV50_COMPUTE_CLASS 0x000050c0 @@ -209,8 +207,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define GM200_COMPUTE_CLASS 0x0000b1c0 #define GP100_COMPUTE_CLASS 0x0000c0c0 #define GP104_COMPUTE_CLASS 0x0000c1c0 -#define GV100_COMPUTE_CLASS 0x0000c3c0 -#define TU102_COMPUTE_CLASS 0x0000c5c0 #define NV84_CRYPT_CLASS 0x000074c1 #define BLOB_NVC0_PCOPY1_CLASS 0x000090b8 #define BLOB_NVC0_PCOPY0_CLASS 0x000090b5 diff --git a/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h b/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h deleted file mode 100644 index c0829f1..0000000 --- a/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h +++ /dev/null @@ -1,660 +0,0 @@ -/******************************************************************************* - Copyright (c) 2016 NVIDIA Corporation - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to - deal in the Software without restriction, including without limitation the - rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - sell copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - DEALINGS IN THE SOFTWARE. - -*******************************************************************************/ - -/* AUTO GENERATED FILE -- DO NOT EDIT */ - -#ifndef __CLA0C0QMD_H__ -#define __CLA0C0QMD_H__ - -/* -** Queue Meta Data, Version 00_06 - */ - -// The below C preprocessor definitions describe "multi-word" structures, where -// fields may have bit numbers beyond 32. For example, MW(127:96) means -// the field is in bits 0-31 of word number 3 of the structure. The "MW(X:Y)" -// syntax is to distinguish from similar "X:Y" single-word definitions: the -// macros historically used for single-word definitions would fail with -// multi-word definitions. -// -// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel -// interface layer of nvidia.ko for an example of how to manipulate -// these MW(X:Y) definitions. - -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_A MW(30:0) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_B MW(31:31) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_C MW(62:32) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_D MW(63:63) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_E MW(94:64) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_F MW(95:95) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_G MW(126:96) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_H MW(127:127) -#define NVA0C0_QMDV00_06_QMD_RESERVED_A_A MW(159:128) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_I MW(191:160) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_J MW(196:192) -#define NVA0C0_QMDV00_06_QMD_RESERVED_A MW(199:197) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K MW(200:200) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L MW(201:201) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0 MW(202:202) -#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1 MW(203:203) -#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_QMD_RESERVED_B MW(207:204) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_M MW(222:208) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N MW(223:223) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_O MW(248:224) -#define NVA0C0_QMDV00_06_QMD_RESERVED_C MW(249:249) -#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250) -#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251) -#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252) -#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE MW(253:253) -#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE MW(254:254) -#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255) -#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_PROGRAM_OFFSET MW(287:256) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_P MW(319:288) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_Q MW(327:320) -#define NVA0C0_QMDV00_06_QMD_RESERVED_D MW(335:328) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_R MW(351:336) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_S MW(357:352) -#define NVA0C0_QMDV00_06_QMD_RESERVED_E MW(365:358) -#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE MW(366:366) -#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 -#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 -#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE MW(369:368) -#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 -#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 -#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T MW(370:370) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U MW(371:371) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_THROTTLED MW(372:372) -#define NVA0C0_QMDV00_06_THROTTLED_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_THROTTLED_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_QMD_RESERVED_E2_A MW(376:376) -#define NVA0C0_QMDV00_06_QMD_RESERVED_E2_B MW(377:377) -#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT MW(378:378) -#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT__32 0x00000000 -#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 -#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING MW(379:379) -#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000 -#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001 -#define NVA0C0_QMDV00_06_SAMPLER_INDEX MW(382:382) -#define NVA0C0_QMDV00_06_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 -#define NVA0C0_QMDV00_06_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 -#define NVA0C0_QMDV00_06_QMD_RESERVED_E3_A MW(383:383) -#define NVA0C0_QMDV00_06_CTA_RASTER_WIDTH MW(415:384) -#define NVA0C0_QMDV00_06_CTA_RASTER_HEIGHT MW(431:416) -#define NVA0C0_QMDV00_06_CTA_RASTER_DEPTH MW(447:432) -#define NVA0C0_QMDV00_06_CTA_RASTER_WIDTH_RESUME MW(479:448) -#define NVA0C0_QMDV00_06_CTA_RASTER_HEIGHT_RESUME MW(495:480) -#define NVA0C0_QMDV00_06_CTA_RASTER_DEPTH_RESUME MW(511:496) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_V MW(535:512) -#define NVA0C0_QMDV00_06_QMD_RESERVED_F MW(542:536) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W MW(543:543) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_SHARED_MEMORY_SIZE MW(561:544) -#define NVA0C0_QMDV00_06_QMD_RESERVED_G MW(575:562) -#define NVA0C0_QMDV00_06_QMD_VERSION MW(579:576) -#define NVA0C0_QMDV00_06_QMD_MAJOR_VERSION MW(583:580) -#define NVA0C0_QMDV00_06_QMD_RESERVED_H MW(591:584) -#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION0 MW(607:592) -#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION1 MW(623:608) -#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION2 MW(639:624) -#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) -#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_QMD_RESERVED_I MW(668:648) -#define NVA0C0_QMDV00_06_L1_CONFIGURATION MW(671:669) -#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001 -#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002 -#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003 -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_X MW(703:672) -#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_Y MW(735:704) -#define NVA0C0_QMDV00_06_RELEASE0_ADDRESS_LOWER MW(767:736) -#define NVA0C0_QMDV00_06_RELEASE0_ADDRESS_UPPER MW(775:768) -#define NVA0C0_QMDV00_06_QMD_RESERVED_J MW(783:776) -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP MW(790:788) -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 -#define NVA0C0_QMDV00_06_QMD_RESERVED_K MW(791:791) -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT MW(793:792) -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE MW(794:794) -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE MW(799:799) -#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVA0C0_QMDV00_06_RELEASE0_PAYLOAD MW(831:800) -#define NVA0C0_QMDV00_06_RELEASE1_ADDRESS_LOWER MW(863:832) -#define NVA0C0_QMDV00_06_RELEASE1_ADDRESS_UPPER MW(871:864) -#define NVA0C0_QMDV00_06_QMD_RESERVED_L MW(879:872) -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP MW(886:884) -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 -#define NVA0C0_QMDV00_06_QMD_RESERVED_M MW(887:887) -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT MW(889:888) -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE MW(890:890) -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE MW(895:895) -#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVA0C0_QMDV00_06_RELEASE1_PAYLOAD MW(927:896) -#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_ADDR_LOWER(i) MW((959+(i)*64):(928+(i)*64)) -#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_ADDR_UPPER(i) MW((967+(i)*64):(960+(i)*64)) -#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((973+(i)*64):(968+(i)*64)) -#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE(i) MW((974+(i)*64):(974+(i)*64)) -#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 -#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 -#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_SIZE(i) MW((991+(i)*64):(975+(i)*64)) -#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_LOW_SIZE MW(1463:1440) -#define NVA0C0_QMDV00_06_QMD_RESERVED_N MW(1466:1464) -#define NVA0C0_QMDV00_06_BARRIER_COUNT MW(1471:1467) -#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(1495:1472) -#define NVA0C0_QMDV00_06_REGISTER_COUNT MW(1503:1496) -#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1527:1504) -#define NVA0C0_QMDV00_06_SASS_VERSION MW(1535:1528) -#define NVA0C0_QMDV00_06_QMD_SPARE_A MW(1567:1536) -#define NVA0C0_QMDV00_06_QMD_SPARE_B MW(1599:1568) -#define NVA0C0_QMDV00_06_QMD_SPARE_C MW(1631:1600) -#define NVA0C0_QMDV00_06_QMD_SPARE_D MW(1663:1632) -#define NVA0C0_QMDV00_06_QMD_SPARE_E MW(1695:1664) -#define NVA0C0_QMDV00_06_QMD_SPARE_F MW(1727:1696) -#define NVA0C0_QMDV00_06_QMD_SPARE_G MW(1759:1728) -#define NVA0C0_QMDV00_06_QMD_SPARE_H MW(1791:1760) -#define NVA0C0_QMDV00_06_QMD_SPARE_I MW(1823:1792) -#define NVA0C0_QMDV00_06_QMD_SPARE_J MW(1855:1824) -#define NVA0C0_QMDV00_06_QMD_SPARE_K MW(1887:1856) -#define NVA0C0_QMDV00_06_QMD_SPARE_L MW(1919:1888) -#define NVA0C0_QMDV00_06_QMD_SPARE_M MW(1951:1920) -#define NVA0C0_QMDV00_06_QMD_SPARE_N MW(1983:1952) -#define NVA0C0_QMDV00_06_DEBUG_ID_UPPER MW(2015:1984) -#define NVA0C0_QMDV00_06_DEBUG_ID_LOWER MW(2047:2016) - - -/* -** Queue Meta Data, Version 01_06 - */ - -#define NVA0C0_QMDV01_06_OUTER_PUT MW(30:0) -#define NVA0C0_QMDV01_06_OUTER_OVERFLOW MW(31:31) -#define NVA0C0_QMDV01_06_OUTER_GET MW(62:32) -#define NVA0C0_QMDV01_06_OUTER_STICKY_OVERFLOW MW(63:63) -#define NVA0C0_QMDV01_06_INNER_GET MW(94:64) -#define NVA0C0_QMDV01_06_INNER_OVERFLOW MW(95:95) -#define NVA0C0_QMDV01_06_INNER_PUT MW(126:96) -#define NVA0C0_QMDV01_06_INNER_STICKY_OVERFLOW MW(127:127) -#define NVA0C0_QMDV01_06_QMD_RESERVED_A_A MW(159:128) -#define NVA0C0_QMDV01_06_SCHEDULER_NEXT_QMD_POINTER MW(191:160) -#define NVA0C0_QMDV01_06_QMD_GROUP_ID MW(197:192) -#define NVA0C0_QMDV01_06_QMD_RESERVED_A MW(199:198) -#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE MW(200:200) -#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(201:201) -#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0 MW(202:202) -#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1 MW(203:203) -#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS MW(204:204) -#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_QMD_RESERVED_B MW(207:205) -#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_ADDR MW(222:208) -#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID MW(223:223) -#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_SIZE MW(248:224) -#define NVA0C0_QMDV01_06_QMD_RESERVED_C MW(249:249) -#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250) -#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251) -#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252) -#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE MW(253:253) -#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE MW(254:254) -#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255) -#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_PROGRAM_OFFSET MW(287:256) -#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) -#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) -#define NVA0C0_QMDV01_06_QMD_RESERVED_D MW(335:328) -#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) -#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_ID MW(357:352) -#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) -#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE MW(366:366) -#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 -#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 -#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) -#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE MW(369:368) -#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 -#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 -#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 -#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS MW(370:370) -#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) -#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_THROTTLED MW(372:372) -#define NVA0C0_QMDV01_06_THROTTLED_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_THROTTLED_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR MW(376:376) -#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR_LEGACY 0x00000000 -#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE 0x00000001 -#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR MW(377:377) -#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO 0x00000000 -#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE 0x00000001 -#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT MW(378:378) -#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT__32 0x00000000 -#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 -#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING MW(379:379) -#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000 -#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001 -#define NVA0C0_QMDV01_06_SAMPLER_INDEX MW(382:382) -#define NVA0C0_QMDV01_06_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 -#define NVA0C0_QMDV01_06_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 -#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION MW(383:383) -#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION_KEEP_DENORMS 0x00000000 -#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS 0x00000001 -#define NVA0C0_QMDV01_06_CTA_RASTER_WIDTH MW(415:384) -#define NVA0C0_QMDV01_06_CTA_RASTER_HEIGHT MW(431:416) -#define NVA0C0_QMDV01_06_CTA_RASTER_DEPTH MW(447:432) -#define NVA0C0_QMDV01_06_CTA_RASTER_WIDTH_RESUME MW(479:448) -#define NVA0C0_QMDV01_06_CTA_RASTER_HEIGHT_RESUME MW(495:480) -#define NVA0C0_QMDV01_06_CTA_RASTER_DEPTH_RESUME MW(511:496) -#define NVA0C0_QMDV01_06_LAUNCH_QUOTA MW(535:512) -#define NVA0C0_QMDV01_06_QMD_RESERVED_F MW(542:536) -#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE MW(543:543) -#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_SHARED_MEMORY_SIZE MW(561:544) -#define NVA0C0_QMDV01_06_QMD_RESERVED_G MW(575:562) -#define NVA0C0_QMDV01_06_QMD_VERSION MW(579:576) -#define NVA0C0_QMDV01_06_QMD_MAJOR_VERSION MW(583:580) -#define NVA0C0_QMDV01_06_QMD_RESERVED_H MW(591:584) -#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION0 MW(607:592) -#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION1 MW(623:608) -#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION2 MW(639:624) -#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) -#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_QMD_RESERVED_I MW(668:648) -#define NVA0C0_QMDV01_06_L1_CONFIGURATION MW(671:669) -#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001 -#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002 -#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003 -#define NVA0C0_QMDV01_06_SM_DISABLE_MASK_LOWER MW(703:672) -#define NVA0C0_QMDV01_06_SM_DISABLE_MASK_UPPER MW(735:704) -#define NVA0C0_QMDV01_06_RELEASE0_ADDRESS_LOWER MW(767:736) -#define NVA0C0_QMDV01_06_RELEASE0_ADDRESS_UPPER MW(775:768) -#define NVA0C0_QMDV01_06_QMD_RESERVED_J MW(783:776) -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP MW(790:788) -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 -#define NVA0C0_QMDV01_06_QMD_RESERVED_K MW(791:791) -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT MW(793:792) -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE MW(794:794) -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE MW(799:799) -#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVA0C0_QMDV01_06_RELEASE0_PAYLOAD MW(831:800) -#define NVA0C0_QMDV01_06_RELEASE1_ADDRESS_LOWER MW(863:832) -#define NVA0C0_QMDV01_06_RELEASE1_ADDRESS_UPPER MW(871:864) -#define NVA0C0_QMDV01_06_QMD_RESERVED_L MW(879:872) -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP MW(886:884) -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 -#define NVA0C0_QMDV01_06_QMD_RESERVED_M MW(887:887) -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT MW(889:888) -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE MW(890:890) -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE MW(895:895) -#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVA0C0_QMDV01_06_RELEASE1_PAYLOAD MW(927:896) -#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_ADDR_LOWER(i) MW((959+(i)*64):(928+(i)*64)) -#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_ADDR_UPPER(i) MW((967+(i)*64):(960+(i)*64)) -#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((973+(i)*64):(968+(i)*64)) -#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE(i) MW((974+(i)*64):(974+(i)*64)) -#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 -#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 -#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_SIZE(i) MW((991+(i)*64):(975+(i)*64)) -#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_LOW_SIZE MW(1463:1440) -#define NVA0C0_QMDV01_06_QMD_RESERVED_N MW(1466:1464) -#define NVA0C0_QMDV01_06_BARRIER_COUNT MW(1471:1467) -#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(1495:1472) -#define NVA0C0_QMDV01_06_REGISTER_COUNT MW(1503:1496) -#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1527:1504) -#define NVA0C0_QMDV01_06_SASS_VERSION MW(1535:1528) -#define NVA0C0_QMDV01_06_HW_ONLY_INNER_GET MW(1566:1536) -#define NVA0C0_QMDV01_06_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1567:1567) -#define NVA0C0_QMDV01_06_HW_ONLY_INNER_PUT MW(1598:1568) -#define NVA0C0_QMDV01_06_HW_ONLY_SCHEDULE_ON_PUT_UPDATE_ENABLE MW(1599:1599) -#define NVA0C0_QMDV01_06_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(1606:1600) -#define NVA0C0_QMDV01_06_QMD_RESERVED_Q MW(1609:1607) -#define NVA0C0_QMDV01_06_COALESCE_WAITING_PERIOD MW(1617:1610) -#define NVA0C0_QMDV01_06_QMD_RESERVED_R MW(1631:1618) -#define NVA0C0_QMDV01_06_QMD_SPARE_D MW(1663:1632) -#define NVA0C0_QMDV01_06_QMD_SPARE_E MW(1695:1664) -#define NVA0C0_QMDV01_06_QMD_SPARE_F MW(1727:1696) -#define NVA0C0_QMDV01_06_QMD_SPARE_G MW(1759:1728) -#define NVA0C0_QMDV01_06_QMD_SPARE_H MW(1791:1760) -#define NVA0C0_QMDV01_06_QMD_SPARE_I MW(1823:1792) -#define NVA0C0_QMDV01_06_QMD_SPARE_J MW(1855:1824) -#define NVA0C0_QMDV01_06_QMD_SPARE_K MW(1887:1856) -#define NVA0C0_QMDV01_06_QMD_SPARE_L MW(1919:1888) -#define NVA0C0_QMDV01_06_QMD_SPARE_M MW(1951:1920) -#define NVA0C0_QMDV01_06_QMD_SPARE_N MW(1983:1952) -#define NVA0C0_QMDV01_06_DEBUG_ID_UPPER MW(2015:1984) -#define NVA0C0_QMDV01_06_DEBUG_ID_LOWER MW(2047:2016) - - -/* -** Queue Meta Data, Version 01_07 - */ - -#define NVA0C0_QMDV01_07_OUTER_PUT MW(30:0) -#define NVA0C0_QMDV01_07_OUTER_OVERFLOW MW(31:31) -#define NVA0C0_QMDV01_07_OUTER_GET MW(62:32) -#define NVA0C0_QMDV01_07_OUTER_STICKY_OVERFLOW MW(63:63) -#define NVA0C0_QMDV01_07_INNER_GET MW(94:64) -#define NVA0C0_QMDV01_07_INNER_OVERFLOW MW(95:95) -#define NVA0C0_QMDV01_07_INNER_PUT MW(126:96) -#define NVA0C0_QMDV01_07_INNER_STICKY_OVERFLOW MW(127:127) -#define NVA0C0_QMDV01_07_QMD_RESERVED_A_A MW(159:128) -#define NVA0C0_QMDV01_07_DEPENDENT_QMD_POINTER MW(191:160) -#define NVA0C0_QMDV01_07_QMD_GROUP_ID MW(197:192) -#define NVA0C0_QMDV01_07_QMD_RESERVED_A MW(200:198) -#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(201:201) -#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0 MW(202:202) -#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1 MW(203:203) -#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS MW(204:204) -#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE MW(205:205) -#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE MW(206:206) -#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE_QUEUE 0x00000000 -#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE_GRID 0x00000001 -#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY MW(207:207) -#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_QMD_RESERVED_B MW(223:208) -#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_SIZE MW(248:224) -#define NVA0C0_QMDV01_07_QMD_RESERVED_C MW(249:249) -#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250) -#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251) -#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252) -#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE MW(253:253) -#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE MW(254:254) -#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255) -#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_PROGRAM_OFFSET MW(287:256) -#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) -#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) -#define NVA0C0_QMDV01_07_QMD_RESERVED_D MW(335:328) -#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) -#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_ID MW(357:352) -#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) -#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE MW(366:366) -#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 -#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 -#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) -#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE MW(369:368) -#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 -#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 -#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 -#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS MW(370:370) -#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) -#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_THROTTLED MW(372:372) -#define NVA0C0_QMDV01_07_THROTTLED_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_THROTTLED_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR MW(376:376) -#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR_LEGACY 0x00000000 -#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE 0x00000001 -#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR MW(377:377) -#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO 0x00000000 -#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE 0x00000001 -#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT MW(378:378) -#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT__32 0x00000000 -#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 -#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING MW(379:379) -#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000 -#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001 -#define NVA0C0_QMDV01_07_SAMPLER_INDEX MW(382:382) -#define NVA0C0_QMDV01_07_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 -#define NVA0C0_QMDV01_07_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 -#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION MW(383:383) -#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_KEEP_DENORMS 0x00000000 -#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS 0x00000001 -#define NVA0C0_QMDV01_07_CTA_RASTER_WIDTH MW(415:384) -#define NVA0C0_QMDV01_07_CTA_RASTER_HEIGHT MW(431:416) -#define NVA0C0_QMDV01_07_CTA_RASTER_DEPTH MW(447:432) -#define NVA0C0_QMDV01_07_CTA_RASTER_WIDTH_RESUME MW(479:448) -#define NVA0C0_QMDV01_07_CTA_RASTER_HEIGHT_RESUME MW(495:480) -#define NVA0C0_QMDV01_07_CTA_RASTER_DEPTH_RESUME MW(511:496) -#define NVA0C0_QMDV01_07_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512) -#define NVA0C0_QMDV01_07_COALESCE_WAITING_PERIOD MW(529:522) -#define NVA0C0_QMDV01_07_SHARED_MEMORY_SIZE MW(561:544) -#define NVA0C0_QMDV01_07_QMD_RESERVED_G MW(575:562) -#define NVA0C0_QMDV01_07_QMD_VERSION MW(579:576) -#define NVA0C0_QMDV01_07_QMD_MAJOR_VERSION MW(583:580) -#define NVA0C0_QMDV01_07_QMD_RESERVED_H MW(591:584) -#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION0 MW(607:592) -#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION1 MW(623:608) -#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION2 MW(639:624) -#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) -#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_QMD_RESERVED_I MW(668:648) -#define NVA0C0_QMDV01_07_L1_CONFIGURATION MW(671:669) -#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001 -#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002 -#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003 -#define NVA0C0_QMDV01_07_SM_DISABLE_MASK_LOWER MW(703:672) -#define NVA0C0_QMDV01_07_SM_DISABLE_MASK_UPPER MW(735:704) -#define NVA0C0_QMDV01_07_RELEASE0_ADDRESS_LOWER MW(767:736) -#define NVA0C0_QMDV01_07_RELEASE0_ADDRESS_UPPER MW(775:768) -#define NVA0C0_QMDV01_07_QMD_RESERVED_J MW(783:776) -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP MW(790:788) -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 -#define NVA0C0_QMDV01_07_QMD_RESERVED_K MW(791:791) -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT MW(793:792) -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE MW(794:794) -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE MW(799:799) -#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVA0C0_QMDV01_07_RELEASE0_PAYLOAD MW(831:800) -#define NVA0C0_QMDV01_07_RELEASE1_ADDRESS_LOWER MW(863:832) -#define NVA0C0_QMDV01_07_RELEASE1_ADDRESS_UPPER MW(871:864) -#define NVA0C0_QMDV01_07_QMD_RESERVED_L MW(879:872) -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP MW(886:884) -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 -#define NVA0C0_QMDV01_07_QMD_RESERVED_M MW(887:887) -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT MW(889:888) -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE MW(890:890) -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE MW(895:895) -#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVA0C0_QMDV01_07_RELEASE1_PAYLOAD MW(927:896) -#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_LOWER(i) MW((959+(i)*64):(928+(i)*64)) -#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_UPPER(i) MW((967+(i)*64):(960+(i)*64)) -#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((973+(i)*64):(968+(i)*64)) -#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE(i) MW((974+(i)*64):(974+(i)*64)) -#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_SIZE(i) MW((991+(i)*64):(975+(i)*64)) -#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_LOW_SIZE MW(1463:1440) -#define NVA0C0_QMDV01_07_QMD_RESERVED_N MW(1466:1464) -#define NVA0C0_QMDV01_07_BARRIER_COUNT MW(1471:1467) -#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(1495:1472) -#define NVA0C0_QMDV01_07_REGISTER_COUNT MW(1503:1496) -#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1527:1504) -#define NVA0C0_QMDV01_07_SASS_VERSION MW(1535:1528) -#define NVA0C0_QMDV01_07_HW_ONLY_INNER_GET MW(1566:1536) -#define NVA0C0_QMDV01_07_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1567:1567) -#define NVA0C0_QMDV01_07_HW_ONLY_INNER_PUT MW(1598:1568) -#define NVA0C0_QMDV01_07_QMD_RESERVED_P MW(1599:1599) -#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1629:1600) -#define NVA0C0_QMDV01_07_QMD_RESERVED_Q MW(1630:1630) -#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1631:1631) -#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000 -#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001 -#define NVA0C0_QMDV01_07_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1663:1632) -#define NVA0C0_QMDV01_07_QMD_SPARE_E MW(1695:1664) -#define NVA0C0_QMDV01_07_QMD_SPARE_F MW(1727:1696) -#define NVA0C0_QMDV01_07_QMD_SPARE_G MW(1759:1728) -#define NVA0C0_QMDV01_07_QMD_SPARE_H MW(1791:1760) -#define NVA0C0_QMDV01_07_QMD_SPARE_I MW(1823:1792) -#define NVA0C0_QMDV01_07_QMD_SPARE_J MW(1855:1824) -#define NVA0C0_QMDV01_07_QMD_SPARE_K MW(1887:1856) -#define NVA0C0_QMDV01_07_QMD_SPARE_L MW(1919:1888) -#define NVA0C0_QMDV01_07_QMD_SPARE_M MW(1951:1920) -#define NVA0C0_QMDV01_07_QMD_SPARE_N MW(1983:1952) -#define NVA0C0_QMDV01_07_DEBUG_ID_UPPER MW(2015:1984) -#define NVA0C0_QMDV01_07_DEBUG_ID_LOWER MW(2047:2016) - - - -#endif // #ifndef __CLA0C0QMD_H__ diff --git a/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h deleted file mode 100644 index 040bdcd..0000000 --- a/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h +++ /dev/null @@ -1,665 +0,0 @@ -/******************************************************************************* - Copyright (c) 2016 NVIDIA Corporation - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to - deal in the Software without restriction, including without limitation the - rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - sell copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - DEALINGS IN THE SOFTWARE. - -*******************************************************************************/ - -/* AUTO GENERATED FILE -- DO NOT EDIT */ - -#ifndef __CLC0C0QMD_H__ -#define __CLC0C0QMD_H__ - -/* -** Queue Meta Data, Version 01_07 - */ - -// The below C preprocessor definitions describe "multi-word" structures, where -// fields may have bit numbers beyond 32. For example, MW(127:96) means -// the field is in bits 0-31 of word number 3 of the structure. The "MW(X:Y)" -// syntax is to distinguish from similar "X:Y" single-word definitions: the -// macros historically used for single-word definitions would fail with -// multi-word definitions. -// -// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel -// interface layer of nvidia.ko for an example of how to manipulate -// these MW(X:Y) definitions. - -#define NVC0C0_QMDV01_07_OUTER_PUT MW(30:0) -#define NVC0C0_QMDV01_07_OUTER_OVERFLOW MW(31:31) -#define NVC0C0_QMDV01_07_OUTER_GET MW(62:32) -#define NVC0C0_QMDV01_07_OUTER_STICKY_OVERFLOW MW(63:63) -#define NVC0C0_QMDV01_07_INNER_GET MW(94:64) -#define NVC0C0_QMDV01_07_INNER_OVERFLOW MW(95:95) -#define NVC0C0_QMDV01_07_INNER_PUT MW(126:96) -#define NVC0C0_QMDV01_07_INNER_STICKY_OVERFLOW MW(127:127) -#define NVC0C0_QMDV01_07_QMD_RESERVED_A_A MW(159:128) -#define NVC0C0_QMDV01_07_DEPENDENT_QMD_POINTER MW(191:160) -#define NVC0C0_QMDV01_07_QMD_GROUP_ID MW(197:192) -#define NVC0C0_QMDV01_07_SM_GLOBAL_CACHING_ENABLE MW(198:198) -#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION MW(199:199) -#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_IS_QUEUE MW(200:200) -#define NVC0C0_QMDV01_07_IS_QUEUE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_IS_QUEUE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(201:201) -#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0 MW(202:202) -#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1 MW(203:203) -#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS MW(204:204) -#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE MW(205:205) -#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE MW(206:206) -#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE_QUEUE 0x00000000 -#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE_GRID 0x00000001 -#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY MW(207:207) -#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_QMD_RESERVED_B MW(223:208) -#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_SIZE MW(248:224) -#define NVC0C0_QMDV01_07_QMD_RESERVED_C MW(249:249) -#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250) -#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251) -#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252) -#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE MW(253:253) -#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE MW(254:254) -#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255) -#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_PROGRAM_OFFSET MW(287:256) -#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) -#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) -#define NVC0C0_QMDV01_07_QMD_RESERVED_D MW(335:328) -#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) -#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_ID MW(357:352) -#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) -#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE MW(366:366) -#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 -#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 -#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) -#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE MW(369:368) -#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 -#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 -#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 -#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS MW(370:370) -#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) -#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_THROTTLED MW(372:372) -#define NVC0C0_QMDV01_07_THROTTLED_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_THROTTLED_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR MW(376:376) -#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR_LEGACY 0x00000000 -#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE 0x00000001 -#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR MW(377:377) -#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO 0x00000000 -#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE 0x00000001 -#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT MW(378:378) -#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT__32 0x00000000 -#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 -#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING MW(379:379) -#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000 -#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001 -#define NVC0C0_QMDV01_07_SAMPLER_INDEX MW(382:382) -#define NVC0C0_QMDV01_07_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 -#define NVC0C0_QMDV01_07_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 -#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION MW(383:383) -#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_KEEP_DENORMS 0x00000000 -#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS 0x00000001 -#define NVC0C0_QMDV01_07_CTA_RASTER_WIDTH MW(415:384) -#define NVC0C0_QMDV01_07_CTA_RASTER_HEIGHT MW(431:416) -#define NVC0C0_QMDV01_07_CTA_RASTER_DEPTH MW(447:432) -#define NVC0C0_QMDV01_07_CTA_RASTER_WIDTH_RESUME MW(479:448) -#define NVC0C0_QMDV01_07_CTA_RASTER_HEIGHT_RESUME MW(495:480) -#define NVC0C0_QMDV01_07_CTA_RASTER_DEPTH_RESUME MW(511:496) -#define NVC0C0_QMDV01_07_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512) -#define NVC0C0_QMDV01_07_COALESCE_WAITING_PERIOD MW(529:522) -#define NVC0C0_QMDV01_07_SHARED_MEMORY_SIZE MW(561:544) -#define NVC0C0_QMDV01_07_QMD_RESERVED_G MW(575:562) -#define NVC0C0_QMDV01_07_QMD_VERSION MW(579:576) -#define NVC0C0_QMDV01_07_QMD_MAJOR_VERSION MW(583:580) -#define NVC0C0_QMDV01_07_QMD_RESERVED_H MW(591:584) -#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION0 MW(607:592) -#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION1 MW(623:608) -#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION2 MW(639:624) -#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) -#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_QMD_RESERVED_I MW(668:648) -#define NVC0C0_QMDV01_07_L1_CONFIGURATION MW(671:669) -#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001 -#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002 -#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003 -#define NVC0C0_QMDV01_07_SM_DISABLE_MASK_LOWER MW(703:672) -#define NVC0C0_QMDV01_07_SM_DISABLE_MASK_UPPER MW(735:704) -#define NVC0C0_QMDV01_07_RELEASE0_ADDRESS_LOWER MW(767:736) -#define NVC0C0_QMDV01_07_RELEASE0_ADDRESS_UPPER MW(775:768) -#define NVC0C0_QMDV01_07_QMD_RESERVED_J MW(783:776) -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP MW(790:788) -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 -#define NVC0C0_QMDV01_07_QMD_RESERVED_K MW(791:791) -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT MW(793:792) -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE MW(794:794) -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE MW(799:799) -#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVC0C0_QMDV01_07_RELEASE0_PAYLOAD MW(831:800) -#define NVC0C0_QMDV01_07_RELEASE1_ADDRESS_LOWER MW(863:832) -#define NVC0C0_QMDV01_07_RELEASE1_ADDRESS_UPPER MW(871:864) -#define NVC0C0_QMDV01_07_QMD_RESERVED_L MW(879:872) -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP MW(886:884) -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 -#define NVC0C0_QMDV01_07_QMD_RESERVED_M MW(887:887) -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT MW(889:888) -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE MW(890:890) -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE MW(895:895) -#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVC0C0_QMDV01_07_RELEASE1_PAYLOAD MW(927:896) -#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_LOWER(i) MW((959+(i)*64):(928+(i)*64)) -#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_UPPER(i) MW((967+(i)*64):(960+(i)*64)) -#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((973+(i)*64):(968+(i)*64)) -#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE(i) MW((974+(i)*64):(974+(i)*64)) -#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_SIZE(i) MW((991+(i)*64):(975+(i)*64)) -#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_LOW_SIZE MW(1463:1440) -#define NVC0C0_QMDV01_07_QMD_RESERVED_N MW(1466:1464) -#define NVC0C0_QMDV01_07_BARRIER_COUNT MW(1471:1467) -#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(1495:1472) -#define NVC0C0_QMDV01_07_REGISTER_COUNT MW(1503:1496) -#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1527:1504) -#define NVC0C0_QMDV01_07_SASS_VERSION MW(1535:1528) -#define NVC0C0_QMDV01_07_HW_ONLY_INNER_GET MW(1566:1536) -#define NVC0C0_QMDV01_07_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1567:1567) -#define NVC0C0_QMDV01_07_HW_ONLY_INNER_PUT MW(1598:1568) -#define NVC0C0_QMDV01_07_HW_ONLY_SCG_TYPE MW(1599:1599) -#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1629:1600) -#define NVC0C0_QMDV01_07_QMD_RESERVED_Q MW(1630:1630) -#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1631:1631) -#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000 -#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001 -#define NVC0C0_QMDV01_07_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1663:1632) -#define NVC0C0_QMDV01_07_QMD_SPARE_E MW(1695:1664) -#define NVC0C0_QMDV01_07_QMD_SPARE_F MW(1727:1696) -#define NVC0C0_QMDV01_07_QMD_SPARE_G MW(1759:1728) -#define NVC0C0_QMDV01_07_QMD_SPARE_H MW(1791:1760) -#define NVC0C0_QMDV01_07_QMD_SPARE_I MW(1823:1792) -#define NVC0C0_QMDV01_07_QMD_SPARE_J MW(1855:1824) -#define NVC0C0_QMDV01_07_QMD_SPARE_K MW(1887:1856) -#define NVC0C0_QMDV01_07_QMD_SPARE_L MW(1919:1888) -#define NVC0C0_QMDV01_07_QMD_SPARE_M MW(1951:1920) -#define NVC0C0_QMDV01_07_QMD_SPARE_N MW(1983:1952) -#define NVC0C0_QMDV01_07_DEBUG_ID_UPPER MW(2015:1984) -#define NVC0C0_QMDV01_07_DEBUG_ID_LOWER MW(2047:2016) - - -/* -** Queue Meta Data, Version 02_00 - */ - -#define NVC0C0_QMDV02_00_OUTER_PUT MW(30:0) -#define NVC0C0_QMDV02_00_OUTER_OVERFLOW MW(31:31) -#define NVC0C0_QMDV02_00_OUTER_GET MW(62:32) -#define NVC0C0_QMDV02_00_OUTER_STICKY_OVERFLOW MW(63:63) -#define NVC0C0_QMDV02_00_INNER_GET MW(94:64) -#define NVC0C0_QMDV02_00_INNER_OVERFLOW MW(95:95) -#define NVC0C0_QMDV02_00_INNER_PUT MW(126:96) -#define NVC0C0_QMDV02_00_INNER_STICKY_OVERFLOW MW(127:127) -#define NVC0C0_QMDV02_00_QMD_RESERVED_A_A MW(159:128) -#define NVC0C0_QMDV02_00_DEPENDENT_QMD_POINTER MW(191:160) -#define NVC0C0_QMDV02_00_QMD_GROUP_ID MW(197:192) -#define NVC0C0_QMDV02_00_SM_GLOBAL_CACHING_ENABLE MW(198:198) -#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION MW(199:199) -#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_IS_QUEUE MW(200:200) -#define NVC0C0_QMDV02_00_IS_QUEUE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_IS_QUEUE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(201:201) -#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0 MW(202:202) -#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1 MW(203:203) -#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS MW(204:204) -#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE MW(205:205) -#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE MW(206:206) -#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE_QUEUE 0x00000000 -#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE_GRID 0x00000001 -#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY MW(207:207) -#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_QMD_RESERVED_B MW(223:208) -#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_SIZE MW(248:224) -#define NVC0C0_QMDV02_00_QMD_RESERVED_C MW(249:249) -#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250) -#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251) -#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252) -#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE MW(253:253) -#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE MW(254:254) -#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255) -#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_PROGRAM_OFFSET MW(287:256) -#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) -#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) -#define NVC0C0_QMDV02_00_QMD_RESERVED_D MW(335:328) -#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) -#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_ID MW(357:352) -#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) -#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE MW(366:366) -#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 -#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 -#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) -#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE MW(369:368) -#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 -#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 -#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 -#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS MW(370:370) -#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) -#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_THROTTLED MW(372:372) -#define NVC0C0_QMDV02_00_THROTTLED_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_THROTTLED_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT MW(378:378) -#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT__32 0x00000000 -#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 -#define NVC0C0_QMDV02_00_SAMPLER_INDEX MW(382:382) -#define NVC0C0_QMDV02_00_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 -#define NVC0C0_QMDV02_00_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 -#define NVC0C0_QMDV02_00_CTA_RASTER_WIDTH MW(415:384) -#define NVC0C0_QMDV02_00_CTA_RASTER_HEIGHT MW(431:416) -#define NVC0C0_QMDV02_00_QMD_RESERVED13A MW(447:432) -#define NVC0C0_QMDV02_00_CTA_RASTER_DEPTH MW(463:448) -#define NVC0C0_QMDV02_00_QMD_RESERVED14A MW(479:464) -#define NVC0C0_QMDV02_00_QMD_RESERVED15A MW(511:480) -#define NVC0C0_QMDV02_00_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512) -#define NVC0C0_QMDV02_00_COALESCE_WAITING_PERIOD MW(529:522) -#define NVC0C0_QMDV02_00_SHARED_MEMORY_SIZE MW(561:544) -#define NVC0C0_QMDV02_00_QMD_RESERVED_G MW(575:562) -#define NVC0C0_QMDV02_00_QMD_VERSION MW(579:576) -#define NVC0C0_QMDV02_00_QMD_MAJOR_VERSION MW(583:580) -#define NVC0C0_QMDV02_00_QMD_RESERVED_H MW(591:584) -#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION0 MW(607:592) -#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION1 MW(623:608) -#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION2 MW(639:624) -#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) -#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_QMD_RESERVED_I MW(671:648) -#define NVC0C0_QMDV02_00_SM_DISABLE_MASK_LOWER MW(703:672) -#define NVC0C0_QMDV02_00_SM_DISABLE_MASK_UPPER MW(735:704) -#define NVC0C0_QMDV02_00_RELEASE0_ADDRESS_LOWER MW(767:736) -#define NVC0C0_QMDV02_00_RELEASE0_ADDRESS_UPPER MW(775:768) -#define NVC0C0_QMDV02_00_QMD_RESERVED_J MW(783:776) -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP MW(790:788) -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 -#define NVC0C0_QMDV02_00_QMD_RESERVED_K MW(791:791) -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT MW(793:792) -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE MW(794:794) -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE MW(799:799) -#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVC0C0_QMDV02_00_RELEASE0_PAYLOAD MW(831:800) -#define NVC0C0_QMDV02_00_RELEASE1_ADDRESS_LOWER MW(863:832) -#define NVC0C0_QMDV02_00_RELEASE1_ADDRESS_UPPER MW(871:864) -#define NVC0C0_QMDV02_00_QMD_RESERVED_L MW(879:872) -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP MW(886:884) -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 -#define NVC0C0_QMDV02_00_QMD_RESERVED_M MW(887:887) -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT MW(889:888) -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE MW(890:890) -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE MW(895:895) -#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVC0C0_QMDV02_00_RELEASE1_PAYLOAD MW(927:896) -#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_LOW_SIZE MW(951:928) -#define NVC0C0_QMDV02_00_QMD_RESERVED_N MW(954:952) -#define NVC0C0_QMDV02_00_BARRIER_COUNT MW(959:955) -#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(983:960) -#define NVC0C0_QMDV02_00_REGISTER_COUNT MW(991:984) -#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1015:992) -#define NVC0C0_QMDV02_00_SASS_VERSION MW(1023:1016) -#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_ADDR_LOWER(i) MW((1055+(i)*64):(1024+(i)*64)) -#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_ADDR_UPPER(i) MW((1072+(i)*64):(1056+(i)*64)) -#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((1073+(i)*64):(1073+(i)*64)) -#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE(i) MW((1074+(i)*64):(1074+(i)*64)) -#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_SIZE_SHIFTED4(i) MW((1087+(i)*64):(1075+(i)*64)) -#define NVC0C0_QMDV02_00_HW_ONLY_INNER_GET MW(1566:1536) -#define NVC0C0_QMDV02_00_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1567:1567) -#define NVC0C0_QMDV02_00_HW_ONLY_INNER_PUT MW(1598:1568) -#define NVC0C0_QMDV02_00_HW_ONLY_SCG_TYPE MW(1599:1599) -#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1629:1600) -#define NVC0C0_QMDV02_00_QMD_RESERVED_Q MW(1630:1630) -#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1631:1631) -#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000 -#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001 -#define NVC0C0_QMDV02_00_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1663:1632) -#define NVC0C0_QMDV02_00_CTA_RASTER_WIDTH_RESUME MW(1695:1664) -#define NVC0C0_QMDV02_00_CTA_RASTER_HEIGHT_RESUME MW(1711:1696) -#define NVC0C0_QMDV02_00_CTA_RASTER_DEPTH_RESUME MW(1727:1712) -#define NVC0C0_QMDV02_00_QMD_SPARE_G MW(1759:1728) -#define NVC0C0_QMDV02_00_QMD_SPARE_H MW(1791:1760) -#define NVC0C0_QMDV02_00_QMD_SPARE_I MW(1823:1792) -#define NVC0C0_QMDV02_00_QMD_SPARE_J MW(1855:1824) -#define NVC0C0_QMDV02_00_QMD_SPARE_K MW(1887:1856) -#define NVC0C0_QMDV02_00_QMD_SPARE_L MW(1919:1888) -#define NVC0C0_QMDV02_00_QMD_SPARE_M MW(1951:1920) -#define NVC0C0_QMDV02_00_QMD_SPARE_N MW(1983:1952) -#define NVC0C0_QMDV02_00_DEBUG_ID_UPPER MW(2015:1984) -#define NVC0C0_QMDV02_00_DEBUG_ID_LOWER MW(2047:2016) - - -/* -** Queue Meta Data, Version 02_01 - */ - -#define NVC0C0_QMDV02_01_OUTER_PUT MW(30:0) -#define NVC0C0_QMDV02_01_OUTER_OVERFLOW MW(31:31) -#define NVC0C0_QMDV02_01_OUTER_GET MW(62:32) -#define NVC0C0_QMDV02_01_OUTER_STICKY_OVERFLOW MW(63:63) -#define NVC0C0_QMDV02_01_INNER_GET MW(94:64) -#define NVC0C0_QMDV02_01_INNER_OVERFLOW MW(95:95) -#define NVC0C0_QMDV02_01_INNER_PUT MW(126:96) -#define NVC0C0_QMDV02_01_INNER_STICKY_OVERFLOW MW(127:127) -#define NVC0C0_QMDV02_01_QMD_GROUP_ID MW(133:128) -#define NVC0C0_QMDV02_01_SM_GLOBAL_CACHING_ENABLE MW(134:134) -#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION MW(135:135) -#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_IS_QUEUE MW(136:136) -#define NVC0C0_QMDV02_01_IS_QUEUE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_IS_QUEUE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(137:137) -#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0 MW(138:138) -#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1 MW(139:139) -#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS MW(140:140) -#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE MW(141:141) -#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE MW(142:142) -#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE_QUEUE 0x00000000 -#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE_GRID 0x00000001 -#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY MW(143:143) -#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_QMD_RESERVED_B MW(159:144) -#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_SIZE MW(184:160) -#define NVC0C0_QMDV02_01_QMD_RESERVED_C MW(185:185) -#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE MW(186:186) -#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(187:187) -#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE MW(188:188) -#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE MW(189:189) -#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE MW(190:190) -#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE MW(191:191) -#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_CTA_RASTER_WIDTH_RESUME MW(223:192) -#define NVC0C0_QMDV02_01_CTA_RASTER_HEIGHT_RESUME MW(239:224) -#define NVC0C0_QMDV02_01_CTA_RASTER_DEPTH_RESUME MW(255:240) -#define NVC0C0_QMDV02_01_PROGRAM_OFFSET MW(287:256) -#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) -#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) -#define NVC0C0_QMDV02_01_QMD_RESERVED_D MW(335:328) -#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) -#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_ID MW(357:352) -#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) -#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE MW(366:366) -#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 -#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 -#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) -#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE MW(369:368) -#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 -#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 -#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 -#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS MW(370:370) -#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) -#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_THROTTLED MW(372:372) -#define NVC0C0_QMDV02_01_THROTTLED_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_THROTTLED_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT MW(378:378) -#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT__32 0x00000000 -#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 -#define NVC0C0_QMDV02_01_SAMPLER_INDEX MW(382:382) -#define NVC0C0_QMDV02_01_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 -#define NVC0C0_QMDV02_01_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 -#define NVC0C0_QMDV02_01_CTA_RASTER_WIDTH MW(415:384) -#define NVC0C0_QMDV02_01_CTA_RASTER_HEIGHT MW(431:416) -#define NVC0C0_QMDV02_01_QMD_RESERVED13A MW(447:432) -#define NVC0C0_QMDV02_01_CTA_RASTER_DEPTH MW(463:448) -#define NVC0C0_QMDV02_01_QMD_RESERVED14A MW(479:464) -#define NVC0C0_QMDV02_01_DEPENDENT_QMD_POINTER MW(511:480) -#define NVC0C0_QMDV02_01_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512) -#define NVC0C0_QMDV02_01_COALESCE_WAITING_PERIOD MW(529:522) -#define NVC0C0_QMDV02_01_SHARED_MEMORY_SIZE MW(561:544) -#define NVC0C0_QMDV02_01_QMD_RESERVED_G MW(575:562) -#define NVC0C0_QMDV02_01_QMD_VERSION MW(579:576) -#define NVC0C0_QMDV02_01_QMD_MAJOR_VERSION MW(583:580) -#define NVC0C0_QMDV02_01_QMD_RESERVED_H MW(591:584) -#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION0 MW(607:592) -#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION1 MW(623:608) -#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION2 MW(639:624) -#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) -#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_QMD_RESERVED_I MW(671:648) -#define NVC0C0_QMDV02_01_SM_DISABLE_MASK_LOWER MW(703:672) -#define NVC0C0_QMDV02_01_SM_DISABLE_MASK_UPPER MW(735:704) -#define NVC0C0_QMDV02_01_RELEASE0_ADDRESS_LOWER MW(767:736) -#define NVC0C0_QMDV02_01_RELEASE0_ADDRESS_UPPER MW(775:768) -#define NVC0C0_QMDV02_01_QMD_RESERVED_J MW(783:776) -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP MW(790:788) -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 -#define NVC0C0_QMDV02_01_QMD_RESERVED_K MW(791:791) -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT MW(793:792) -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE MW(794:794) -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE MW(799:799) -#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVC0C0_QMDV02_01_RELEASE0_PAYLOAD MW(831:800) -#define NVC0C0_QMDV02_01_RELEASE1_ADDRESS_LOWER MW(863:832) -#define NVC0C0_QMDV02_01_RELEASE1_ADDRESS_UPPER MW(871:864) -#define NVC0C0_QMDV02_01_QMD_RESERVED_L MW(879:872) -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP MW(886:884) -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 -#define NVC0C0_QMDV02_01_QMD_RESERVED_M MW(887:887) -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT MW(889:888) -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE MW(890:890) -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE MW(895:895) -#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVC0C0_QMDV02_01_RELEASE1_PAYLOAD MW(927:896) -#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_LOW_SIZE MW(951:928) -#define NVC0C0_QMDV02_01_QMD_RESERVED_N MW(954:952) -#define NVC0C0_QMDV02_01_BARRIER_COUNT MW(959:955) -#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(983:960) -#define NVC0C0_QMDV02_01_REGISTER_COUNT MW(991:984) -#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1015:992) -#define NVC0C0_QMDV02_01_SASS_VERSION MW(1023:1016) -#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_ADDR_LOWER(i) MW((1055+(i)*64):(1024+(i)*64)) -#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_ADDR_UPPER(i) MW((1072+(i)*64):(1056+(i)*64)) -#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((1073+(i)*64):(1073+(i)*64)) -#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE(i) MW((1074+(i)*64):(1074+(i)*64)) -#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_SIZE_SHIFTED4(i) MW((1087+(i)*64):(1075+(i)*64)) -#define NVC0C0_QMDV02_01_QMD_RESERVED_R MW(1567:1536) -#define NVC0C0_QMDV02_01_QMD_RESERVED_S MW(1599:1568) -#define NVC0C0_QMDV02_01_HW_ONLY_INNER_GET MW(1630:1600) -#define NVC0C0_QMDV02_01_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1631:1631) -#define NVC0C0_QMDV02_01_HW_ONLY_INNER_PUT MW(1662:1632) -#define NVC0C0_QMDV02_01_HW_ONLY_SCG_TYPE MW(1663:1663) -#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1693:1664) -#define NVC0C0_QMDV02_01_QMD_RESERVED_Q MW(1694:1694) -#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1695:1695) -#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000 -#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001 -#define NVC0C0_QMDV02_01_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1727:1696) -#define NVC0C0_QMDV02_01_QMD_SPARE_G MW(1759:1728) -#define NVC0C0_QMDV02_01_QMD_SPARE_H MW(1791:1760) -#define NVC0C0_QMDV02_01_QMD_SPARE_I MW(1823:1792) -#define NVC0C0_QMDV02_01_QMD_SPARE_J MW(1855:1824) -#define NVC0C0_QMDV02_01_QMD_SPARE_K MW(1887:1856) -#define NVC0C0_QMDV02_01_QMD_SPARE_L MW(1919:1888) -#define NVC0C0_QMDV02_01_QMD_SPARE_M MW(1951:1920) -#define NVC0C0_QMDV02_01_QMD_SPARE_N MW(1983:1952) -#define NVC0C0_QMDV02_01_DEBUG_ID_UPPER MW(2015:1984) -#define NVC0C0_QMDV02_01_DEBUG_ID_LOWER MW(2047:2016) - - - -#endif // #ifndef __CLC0C0QMD_H__ diff --git a/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h deleted file mode 100644 index 588cc63..0000000 --- a/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h +++ /dev/null @@ -1,245 +0,0 @@ -/******************************************************************************* - Copyright (c) 2001-2010 NVIDIA Corporation - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to - deal in the Software without restriction, including without limitation the - rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - sell copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - DEALINGS IN THE SOFTWARE. - -*******************************************************************************/ - -/* AUTO GENERATED FILE -- DO NOT EDIT */ - -#ifndef __CLC3C0QMD_H__ -#define __CLC3C0QMD_H__ - -/* -** Queue Meta Data, Version 02_02 - */ - -// The below C preprocessor definitions describe "multi-word" structures, where -// fields may have bit numbers beyond 32. For example, MW(127:96) means -// the field is in bits 0-31 of word number 3 of the structure. The "MW(X:Y)" -// syntax is to distinguish from similar "X:Y" single-word definitions: the -// macros historically used for single-word definitions would fail with -// multi-word definitions. -// -// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel -// interface layer of nvidia.ko for an example of how to manipulate -// these MW(X:Y) definitions. - -#define NVC3C0_QMDV02_02_OUTER_PUT MW(30:0) -#define NVC3C0_QMDV02_02_OUTER_OVERFLOW MW(31:31) -#define NVC3C0_QMDV02_02_OUTER_GET MW(62:32) -#define NVC3C0_QMDV02_02_OUTER_STICKY_OVERFLOW MW(63:63) -#define NVC3C0_QMDV02_02_INNER_GET MW(94:64) -#define NVC3C0_QMDV02_02_INNER_OVERFLOW MW(95:95) -#define NVC3C0_QMDV02_02_INNER_PUT MW(126:96) -#define NVC3C0_QMDV02_02_INNER_STICKY_OVERFLOW MW(127:127) -#define NVC3C0_QMDV02_02_QMD_GROUP_ID MW(133:128) -#define NVC3C0_QMDV02_02_SM_GLOBAL_CACHING_ENABLE MW(134:134) -#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION MW(135:135) -#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_IS_QUEUE MW(136:136) -#define NVC3C0_QMDV02_02_IS_QUEUE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_IS_QUEUE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(137:137) -#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0 MW(138:138) -#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1 MW(139:139) -#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS MW(140:140) -#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE MW(141:141) -#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE MW(142:142) -#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_QUEUE 0x00000000 -#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_GRID 0x00000001 -#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY MW(143:143) -#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_QMD_RESERVED_B MW(159:144) -#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_SIZE MW(184:160) -#define NVC3C0_QMDV02_02_QMD_RESERVED_C MW(185:185) -#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE MW(186:186) -#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(187:187) -#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE MW(188:188) -#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE MW(189:189) -#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE MW(190:190) -#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE MW(191:191) -#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH_RESUME MW(223:192) -#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT_RESUME MW(239:224) -#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH_RESUME MW(255:240) -#define NVC3C0_QMDV02_02_PROGRAM_OFFSET MW(287:256) -#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) -#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) -#define NVC3C0_QMDV02_02_QMD_RESERVED_D MW(335:328) -#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) -#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_ID MW(357:352) -#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) -#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE MW(366:366) -#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 -#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 -#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) -#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE MW(369:368) -#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 -#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 -#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 -#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS MW(370:370) -#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) -#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT MW(378:378) -#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT__32 0x00000000 -#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 -#define NVC3C0_QMDV02_02_SAMPLER_INDEX MW(382:382) -#define NVC3C0_QMDV02_02_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 -#define NVC3C0_QMDV02_02_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 -#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH MW(415:384) -#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT MW(431:416) -#define NVC3C0_QMDV02_02_QMD_RESERVED13A MW(447:432) -#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH MW(463:448) -#define NVC3C0_QMDV02_02_QMD_RESERVED14A MW(479:464) -#define NVC3C0_QMDV02_02_DEPENDENT_QMD_POINTER MW(511:480) -#define NVC3C0_QMDV02_02_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512) -#define NVC3C0_QMDV02_02_COALESCE_WAITING_PERIOD MW(529:522) -#define NVC3C0_QMDV02_02_SHARED_MEMORY_SIZE MW(561:544) -#define NVC3C0_QMDV02_02_MIN_SM_CONFIG_SHARED_MEM_SIZE MW(568:562) -#define NVC3C0_QMDV02_02_MAX_SM_CONFIG_SHARED_MEM_SIZE MW(575:569) -#define NVC3C0_QMDV02_02_QMD_VERSION MW(579:576) -#define NVC3C0_QMDV02_02_QMD_MAJOR_VERSION MW(583:580) -#define NVC3C0_QMDV02_02_QMD_RESERVED_H MW(591:584) -#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION0 MW(607:592) -#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION1 MW(623:608) -#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION2 MW(639:624) -#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) -#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_REGISTER_COUNT_V MW(656:648) -#define NVC3C0_QMDV02_02_TARGET_SM_CONFIG_SHARED_MEM_SIZE MW(663:657) -#define NVC3C0_QMDV02_02_FREE_CTA_SLOTS_EMPTY_SM MW(671:664) -#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_LOWER MW(703:672) -#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_UPPER MW(735:704) -#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_LOWER MW(767:736) -#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_UPPER MW(775:768) -#define NVC3C0_QMDV02_02_QMD_RESERVED_J MW(783:776) -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP MW(790:788) -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 -#define NVC3C0_QMDV02_02_QMD_RESERVED_K MW(791:791) -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT MW(793:792) -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE MW(794:794) -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE MW(799:799) -#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVC3C0_QMDV02_02_RELEASE0_PAYLOAD MW(831:800) -#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_LOWER MW(863:832) -#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_UPPER MW(871:864) -#define NVC3C0_QMDV02_02_QMD_RESERVED_L MW(879:872) -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP MW(886:884) -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 -#define NVC3C0_QMDV02_02_QMD_RESERVED_M MW(887:887) -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT MW(889:888) -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE MW(890:890) -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE MW(895:895) -#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 -#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 -#define NVC3C0_QMDV02_02_RELEASE1_PAYLOAD MW(927:896) -#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_LOW_SIZE MW(951:928) -#define NVC3C0_QMDV02_02_QMD_RESERVED_N MW(954:952) -#define NVC3C0_QMDV02_02_BARRIER_COUNT MW(959:955) -#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(983:960) -#define NVC3C0_QMDV02_02_REGISTER_COUNT MW(991:984) -#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1015:992) -#define NVC3C0_QMDV02_02_SASS_VERSION MW(1023:1016) -#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_LOWER(i) MW((1055+(i)*64):(1024+(i)*64)) -#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_UPPER(i) MW((1072+(i)*64):(1056+(i)*64)) -#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((1073+(i)*64):(1073+(i)*64)) -#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE(i) MW((1074+(i)*64):(1074+(i)*64)) -#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_SIZE_SHIFTED4(i) MW((1087+(i)*64):(1075+(i)*64)) -#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_LOWER MW(1567:1536) -#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_UPPER MW(1584:1568) -#define NVC3C0_QMDV02_02_QMD_RESERVED_S MW(1599:1585) -#define NVC3C0_QMDV02_02_HW_ONLY_INNER_GET MW(1630:1600) -#define NVC3C0_QMDV02_02_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1631:1631) -#define NVC3C0_QMDV02_02_HW_ONLY_INNER_PUT MW(1662:1632) -#define NVC3C0_QMDV02_02_HW_ONLY_SCG_TYPE MW(1663:1663) -#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1693:1664) -#define NVC3C0_QMDV02_02_QMD_RESERVED_Q MW(1694:1694) -#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1695:1695) -#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000 -#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001 -#define NVC3C0_QMDV02_02_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1727:1696) -#define NVC3C0_QMDV02_02_QMD_SPARE_G MW(1759:1728) -#define NVC3C0_QMDV02_02_QMD_SPARE_H MW(1791:1760) -#define NVC3C0_QMDV02_02_QMD_SPARE_I MW(1823:1792) -#define NVC3C0_QMDV02_02_QMD_SPARE_J MW(1855:1824) -#define NVC3C0_QMDV02_02_QMD_SPARE_K MW(1887:1856) -#define NVC3C0_QMDV02_02_QMD_SPARE_L MW(1919:1888) -#define NVC3C0_QMDV02_02_QMD_SPARE_M MW(1951:1920) -#define NVC3C0_QMDV02_02_QMD_SPARE_N MW(1983:1952) -#define NVC3C0_QMDV02_02_DEBUG_ID_UPPER MW(2015:1984) -#define NVC3C0_QMDV02_02_DEBUG_ID_LOWER MW(2047:2016) - - - -#endif // #ifndef __CLC3C0QMD_H__ diff --git a/src/gallium/drivers/nouveau/nvc0/drf.h b/src/gallium/drivers/nouveau/nvc0/drf.h deleted file mode 100644 index bf95c8c..0000000 --- a/src/gallium/drivers/nouveau/nvc0/drf.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright 2019 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef __NVHW_DRF_H__ -#define __NVHW_DRF_H__ - -/* Helpers common to all DRF accessors. */ -#define DRF_LO(drf) (0 ? drf) -#define DRF_HI(drf) (1 ? drf) -#define DRF_BITS(drf) (DRF_HI(drf) - DRF_LO(drf) + 1) -#define DRF_MASK(drf) (~0ULL >> (64 - DRF_BITS(drf))) -#define DRF_SMASK(drf) (DRF_MASK(drf) << DRF_LO(drf)) - -/* Helpers for DRF-MW accessors. */ -#define DRF_MX_MW(drf) drf -#define DRF_MX(drf) DRF_MX_##drf -#define DRF_MW(drf) DRF_MX(drf) -#define DRF_MW_SPANS(o,drf) (DRF_LW_IDX((o),drf) != DRF_HW_IDX((o),drf)) -#define DRF_MW_SIZE(o) (sizeof((o)[0]) * 8) - -#define DRF_LW_IDX(o,drf) (DRF_LO(DRF_MW(drf)) / DRF_MW_SIZE(o)) -#define DRF_LW_LO(o,drf) (DRF_LO(DRF_MW(drf)) % DRF_MW_SIZE(o)) -#define DRF_LW_HI(o,drf) (DRF_MW_SPANS((o),drf) ? (DRF_MW_SIZE(o) - 1) : DRF_HW_HI((o),drf)) -#define DRF_LW_BITS(o,drf) (DRF_LW_HI((o),drf) - DRF_LW_LO((o),drf) + 1) -#define DRF_LW_MASK(o,drf) (~0ULL >> (64 - DRF_LW_BITS((o),drf))) -#define DRF_LW_SMASK(o,drf) (DRF_LW_MASK((o),drf) << DRF_LW_LO((o),drf)) -#define DRF_LW_GET(o,drf) (((o)[DRF_LW_IDX((o),drf)] >> DRF_LW_LO((o),drf)) & DRF_LW_MASK((o),drf)) -#define DRF_LW_VAL(o,drf,v) (((v) & DRF_LW_MASK((o),drf)) << DRF_LW_LO((o),drf)) -#define DRF_LW_CLR(o,drf) ((o)[DRF_LW_IDX((o),drf)] & ~DRF_LW_SMASK((o),drf)) -#define DRF_LW_SET(o,drf,v) (DRF_LW_CLR((o),drf) | DRF_LW_VAL((o),drf,(v))) - -#define DRF_HW_IDX(o,drf) (DRF_HI(DRF_MW(drf)) / DRF_MW_SIZE(o)) -#define DRF_HW_LO(o,drf) 0 -#define DRF_HW_HI(o,drf) (DRF_HI(DRF_MW(drf)) % DRF_MW_SIZE(o)) -#define DRF_HW_BITS(o,drf) (DRF_HW_HI((o),drf) - DRF_HW_LO((o),drf) + 1) -#define DRF_HW_MASK(o,drf) (~0ULL >> (64 - DRF_HW_BITS((o),drf))) -#define DRF_HW_SMASK(o,drf) (DRF_HW_MASK((o),drf) << DRF_HW_LO((o),drf)) -#define DRF_HW_GET(o,drf) ((o)[DRF_HW_IDX(o,drf)] & DRF_HW_SMASK((o),drf)) -#define DRF_HW_VAL(o,drf,v) (((long long)(v) >> DRF_LW_BITS((o),drf)) & DRF_HW_SMASK((o),drf)) -#define DRF_HW_CLR(o,drf) ((o)[DRF_HW_IDX((o),drf)] & ~DRF_HW_SMASK((o),drf)) -#define DRF_HW_SET(o,drf,v) (DRF_HW_CLR((o),drf) | DRF_HW_VAL((o),drf,(v))) - -/* DRF accessors. */ -#define NVVAL_X(drf,v) (((v) & DRF_MASK(drf)) << DRF_LO(drf)) -#define NVVAL_N(X,d,r,f, v) NVVAL_X(d##_##r##_##f, (v)) -#define NVVAL_I(X,d,r,f,i,v) NVVAL_X(d##_##r##_##f(i), (v)) -#define NVVAL_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL -#define NVVAL(A...) NVVAL_(X, ##A, NVVAL_I, NVVAL_N)(X, ##A) - -#define NVDEF_N(X,d,r,f, v) NVVAL_X(d##_##r##_##f, d##_##r##_##f##_##v) -#define NVDEF_I(X,d,r,f,i,v) NVVAL_X(d##_##r##_##f(i), d##_##r##_##f##_##v) -#define NVDEF_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL -#define NVDEF(A...) NVDEF_(X, ##A, NVDEF_I, NVDEF_N)(X, ##A) - -#define NVVAL_GET_X(o,drf) (((o) >> DRF_LO(drf)) & DRF_MASK(drf)) -#define NVVAL_GET_N(X,o,d,r,f ) NVVAL_GET_X(o, d##_##r##_##f) -#define NVVAL_GET_I(X,o,d,r,f,i) NVVAL_GET_X(o, d##_##r##_##f(i)) -#define NVVAL_GET_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL -#define NVVAL_GET(A...) NVVAL_GET_(X, ##A, NVVAL_GET_I, NVVAL_GET_N)(X, ##A) - -#define NVVAL_SET_X(o,drf,v) (((o) & ~DRF_SMASK(drf)) | NVVAL_X(drf, (v))) -#define NVVAL_SET_N(X,o,d,r,f, v) NVVAL_SET_X(o, d##_##r##_##f, (v)) -#define NVVAL_SET_I(X,o,d,r,f,i,v) NVVAL_SET_X(o, d##_##r##_##f(i), (v)) -#define NVVAL_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL -#define NVVAL_SET(A...) NVVAL_SET_(X, ##A, NVVAL_SET_I, NVVAL_SET_N)(X, ##A) - -#define NVDEF_SET_N(X,o,d,r,f, v) \ - NVVAL_SET_X(o, d##_##r##_##f, d##_##r##_##f##_##v) -#define NVDEF_SET_I(X,o,d,r,f,i,v) \ - NVVAL_SET_X(o, d##_##r##_##f(i), d##_##r##_##f##_##v) -#define NVDEF_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL -#define NVDEF_SET(A...) NVDEF_SET_(X, ##A, NVDEF_SET_I, NVDEF_SET_N)(X, ##A) - -/* DRF-MW accessors. */ -#define NVVAL_MW_GET_X(o,drf) \ - ((DRF_MW_SPANS((o),drf) ? \ - (DRF_HW_GET((o),drf) << DRF_LW_BITS((o),drf)) : 0) | DRF_LW_GET((o),drf)) -#define NVVAL_MW_GET_N(X,o,d,r,f ) NVVAL_MW_GET_X((o), d##_##r##_##f) -#define NVVAL_MW_GET_I(X,o,d,r,f,i) NVVAL_MW_GET_X((o), d##_##r##_##f(i)) -#define NVVAL_MW_GET_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL -#define NVVAL_MW_GET(A...) NVVAL_MW_GET_(X, ##A, NVVAL_MW_GET_I, NVVAL_MW_GET_N)(X, ##A) - -#define NVVAL_MW_SET_X(o,drf,v) do { \ - (o)[DRF_LW_IDX((o),drf)] = DRF_LW_SET((o),drf,(v)); \ - if (DRF_MW_SPANS((o),drf)) \ - (o)[DRF_HW_IDX((o),drf)] = DRF_HW_SET((o),drf,(v)); \ -} while(0) -#define NVVAL_MW_SET_N(X,o,d,r,f, v) NVVAL_MW_SET_X((o), d##_##r##_##f, (v)) -#define NVVAL_MW_SET_I(X,o,d,r,f,i,v) NVVAL_MW_SET_X((o), d##_##r##_##f(i), (v)) -#define NVVAL_MW_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL -#define NVVAL_MW_SET(A...) \ - NVVAL_MW_SET_(X, ##A, NVVAL_MW_SET_I, NVVAL_MW_SET_N)(X, ##A) - -#define NVDEF_MW_SET_N(X,o,d,r,f, v) \ - NVVAL_MW_SET_X(o, d##_##r##_##f, d##_##r##_##f##_##v) -#define NVDEF_MW_SET_I(X,o,d,r,f,i,v) \ - NVVAL_MW_SET_X(o, d##_##r##_##f(i), d##_##r##_##f##_##v) -#define NVDEF_MW_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL -#define NVDEF_MW_SET(A...) \ - NVDEF_MW_SET_(X, ##A, NVDEF_MW_SET_I, NVDEF_MW_SET_N)(X, ##A) -#endif diff --git a/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h deleted file mode 100644 index 390741c..0000000 --- a/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h +++ /dev/null @@ -1,904 +0,0 @@ -#define NV_MME_PRED_MODE_UUUU 0 -#define NV_MME_PRED_MODE_TTTT 1 -#define NV_MME_PRED_MODE_FFFF 2 -#define NV_MME_PRED_MODE_TTUU 3 -#define NV_MME_PRED_MODE_FFUU 4 -#define NV_MME_PRED_MODE_TFUU 5 -#define NV_MME_PRED_MODE_TUUU 6 -#define NV_MME_PRED_MODE_FUUU 7 -#define NV_MME_PRED_MODE_UUTT 8 -#define NV_MME_PRED_MODE_UUTF 9 -#define NV_MME_PRED_MODE_UUTU 10 -#define NV_MME_PRED_MODE_UUFT 11 -#define NV_MME_PRED_MODE_UUFF 12 -#define NV_MME_PRED_MODE_UUFU 13 -#define NV_MME_PRED_MODE_UUUT 14 -#define NV_MME_PRED_MODE_UUUF 15 - -#define NV_MME_REG_R0 0 -#define NV_MME_REG_R1 1 -#define NV_MME_REG_R2 2 -#define NV_MME_REG_R3 3 -#define NV_MME_REG_R4 4 -#define NV_MME_REG_R5 5 -#define NV_MME_REG_R6 6 -#define NV_MME_REG_R7 7 -#define NV_MME_REG_R8 8 -#define NV_MME_REG_R9 9 -#define NV_MME_REG_R10 10 -#define NV_MME_REG_R11 11 -#define NV_MME_REG_R12 12 -#define NV_MME_REG_R13 13 -#define NV_MME_REG_R14 14 -#define NV_MME_REG_R15 15 -#define NV_MME_REG_R16 16 -#define NV_MME_REG_R17 17 -#define NV_MME_REG_R18 18 -#define NV_MME_REG_R19 19 -#define NV_MME_REG_R20 20 -#define NV_MME_REG_R21 21 -#define NV_MME_REG_R22 22 -#define NV_MME_REG_R23 23 -#define NV_MME_REG_ZERO 24 -#define NV_MME_REG_IMMED 25 -#define NV_MME_REG_IMMEDPAIR 26 -#define NV_MME_REG_IMMED32 27 -#define NV_MME_REG_LOAD0 28 -#define NV_MME_REG_LOAD1 29 - -#define NV_MME_ALU_ADD 0 -#define NV_MME_ALU_ADDC 1 -#define NV_MME_ALU_SUB 2 -#define NV_MME_ALU_SUBB 3 -#define NV_MME_ALU_MUL 4 -#define NV_MME_ALU_MULH 5 -#define NV_MME_ALU_MULU 6 -#define NV_MME_ALU_EXTENDED 7 -#define NV_MME_ALU_CLZ 8 -#define NV_MME_ALU_SLL 9 -#define NV_MME_ALU_SRL 10 -#define NV_MME_ALU_SRA 11 -#define NV_MME_ALU_AND 12 -#define NV_MME_ALU_NAND 13 -#define NV_MME_ALU_OR 14 -#define NV_MME_ALU_XOR 15 -#define NV_MME_ALU_MERGE 16 -#define NV_MME_ALU_SLT 17 -#define NV_MME_ALU_SLTU 18 -#define NV_MME_ALU_SLE 19 -#define NV_MME_ALU_SLEU 20 -#define NV_MME_ALU_SEQ 21 -#define NV_MME_ALU_STATE 22 -#define NV_MME_ALU_LOOP 23 -#define NV_MME_ALU_JAL 24 -#define NV_MME_ALU_BLT 25 -#define NV_MME_ALU_BLTU 26 -#define NV_MME_ALU_BLE 27 -#define NV_MME_ALU_BLEU 28 -#define NV_MME_ALU_BEQ 29 -#define NV_MME_ALU_DREAD 30 -#define NV_MME_ALU_DWRITE 31 - -#define NV_MME_OUT_NONE 0 -#define NV_MME_OUT_ALU0 1 -#define NV_MME_OUT_ALU1 2 -#define NV_MME_OUT_LOAD0 3 -#define NV_MME_OUT_LOAD1 4 -#define NV_MME_OUT_IMMED0 5 -#define NV_MME_OUT_IMMED1 6 -#define NV_MME_OUT_RESERVED 7 -#define NV_MME_OUT_IMMEDHIGH0 8 -#define NV_MME_OUT_IMMEDHIGH1 9 -#define NV_MME_OUT_IMMED32_0 10 - -#define MME_BITS(en,pm,pr,o0,d0,a0,b0,i0,o1,d1,a1,b1,i1,m0,e0,m1,e1) \ - ((e1) << (92 - 64) | (m1) << (89 - 64) | \ - (e0) << (85 - 64) | (m0) << (82 - 64) | \ - (i1) << (66 - 64) | (b1) >> (64 - 61)), \ - (((b1) & 7) << (61 - 32) | (a1) << (56 - 32) | \ - (d1) << (51 - 32) | (o1) << (46 - 32) | \ - (i0) >> (32 - 30)), \ - (((i0) & 3) << 30 | (b0) << 25 | (a0) << 20 | (d0) << 15 | (o0) << 10 | \ - (pr) << 5 | (pm) << 1 | (en)) - -#define MME_INSN(en,o0,d0,a0,b0,i0,m0,e0,o1,d1,a1,b1,i1,m1,e1) \ - MME_BITS((en), NV_MME_PRED_MODE_UUUU, NV_MME_REG_ZERO, \ - NV_MME_ALU_##o0, NV_MME_REG_##d0, \ - NV_MME_REG_##a0, NV_MME_REG_##b0, (i0), \ - NV_MME_ALU_##o1, NV_MME_REG_##d1, \ - NV_MME_REG_##a1, NV_MME_REG_##b1, (i1), \ - NV_MME_OUT_##m0, NV_MME_OUT_##e0, \ - NV_MME_OUT_##m1, NV_MME_OUT_##e1) - -uint32_t mmec597_per_instance_bf[] = { -// r1 = load(); // count -// r3 = load(); // mask -// mthd(0x1880, 1); // VERTEX_ARRAY_PER_INSTANCE[0] - MME_INSN(0, ADD, R1, LOAD0, ZERO, (1<<12)|0x1880/4, IMMED0, NONE, - ADD, R3, LOAD1, ZERO, 0, NONE, NONE), -// while (HW_LOOP_COUNT < r1) { -// send(r3 & 1); -// r3 >>= 1; -// } - MME_INSN(0, LOOP, ZERO, R1, ZERO, 0x0003, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, AND, ZERO, R3, IMMED, 1, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, SRL, R3, R3, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -}; - -uint32_t mmec597_vertex_array_select[] = { -// r1 = load(); // array -// r2 = load(); // limit hi -// r3 = load(); // limit lo -// r4 = load(); // start hi -// r5 = load(); // start lo -// r6 = (r1 & 0x1f) << 2; -// r7 = (r1 & 0x1f) << 1; -// mthd(0x1c04 + r6, 1); // VERTEX_ARRAY_START_HIGH[] -// send(r4); -// send(r5); -// mthd(0x0600 + r7, 1); // VERTEX_ARRAY_LIMIT_HIGH[] -// send(r2); -// send(r3); - MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, - ADD, R2, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE, - ADD, R4, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R5, LOAD0, ZERO, 0, NONE, NONE, - MERGE, R6, ZERO, R1, (2<<10)|(5<<5)|0, NONE, NONE), - MME_INSN(0, MERGE, R7, ZERO, R1, (1<<10)|(5<<5)|0, ALU1, NONE, - ADD, ZERO, R6, IMMED, (1<<12)|0x1c04/4, NONE, NONE), - MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0, - ADD, ZERO, R5, ZERO, 0, NONE, ALU1), - MME_INSN(1, ADD, ZERO, R7, IMMED, (1<<12)|0x0600/4, ALU0, ALU1, - ADD, ZERO, R2, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, R3, ZERO, 0, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -}; - -uint32_t mmec597_blend_enables[] = { -// r1 = load(); // enable mask -// mthd(0x1360, 1); // NVC0_3D_BLEND_ENABLE[] -// send((r1 >> 0) & 1); -// send((r1 >> 1) & 1); -// send((r1 >> 2) & 1); -// send((r1 >> 3) & 1); -// send((r1 >> 4) & 1); -// send((r1 >> 5) & 1); -// send((r1 >> 6) & 1); -// send((r1 >> 7) & 1); - MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, IMMED1, NONE, - ADD, ZERO, ZERO, ZERO, (1<<12)|0x1360/4, NONE, NONE), - MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|0, NONE, ALU0, - MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|1, NONE, ALU1), - MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|2, NONE, ALU0, - MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|3, NONE, ALU1), - MME_INSN(1, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|4, NONE, ALU0, - MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|5, NONE, ALU1), - MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|6, NONE, ALU0, - MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|7, NONE, ALU1), -}; - -uint32_t mmec597_poly_mode_front[] = { -// r1 = load(); -// mthd(0x0dac,0); // POLYGON_MODE_FRONT -// send(r1); -// r2 = read(0x0db0); // POLYGON_MODE_BACK -// r3 = read(0x20c0); // SP_SELECT[3] -// r7 = r1 | r2; -// r4 = read(0x2100); // SP_SELECT[4] -// r6 = 0x60; -// r7 = r7 & 1; -// if (r7 != 0) - MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x0dac/4, IMMED0, ALU0, - STATE, R2, IMMED, ZERO, 0x0db0/4, NONE, NONE), - MME_INSN(0, STATE, R3, IMMED, ZERO, 0x20c0/4, NONE, NONE, - OR, R7, R1, R2, 0, NONE, NONE), - MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE, - ADD, R6, IMMED, ZERO, 0x60, NONE, NONE), - MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r6 = 0x200; - MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r7 = r3 | r4; -// r7 = r7 & 1; -// if (r7 != 0) - MME_INSN(0, OR, R7, R3, R4, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r6 = 0; - MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// mthd(0x02ec, 0); -// send(r6); - MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -}; - -uint32_t mmec597_poly_mode_back[] = { -// r1 = load(); -// mthd(0x0db0,0); // POLYGON_MODE_BACK -// send(r1); -// r2 = read(0x0dac); // POLYGON_MODE_FRONT -// r3 = read(0x20c0); // SP_SELECT[3] -// r7 = r1 | r2; -// r4 = read(0x2100); // SP_SELECT[4] -// r6 = 0x60; -// r7 = r7 & 1; -// if (r7 != 0) - MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x0db0/4, IMMED0, ALU0, - STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE), - MME_INSN(0, STATE, R3, IMMED, ZERO, 0x20c0/4, NONE, NONE, - OR, R7, R1, R2, 0, NONE, NONE), - MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE, - ADD, R6, IMMED, ZERO, 0x60, NONE, NONE), - MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r6 = 0x200; - MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r7 = r3 | r4; -// r7 = r7 & 1; -// if (r7 != 0) - MME_INSN(0, OR, R7, R3, R4, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r6 = 0; - MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// mthd(0x02ec, 0); -// send(r6); - MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -}; - -uint32_t mmec597_gp_select[] = { -// r1 = load(); -// mthd(0x2100,0); // SP_SELECT[4] -// send(r1); -// r2 = read(0x0dac); // POLYGON_MODE_FRONT -// r3 = read(0x0db0); // POLYGON_MODE_BACK -// r7 = r2 | r3; -// r4 = read(0x20c0); // SP_SELECT[3] -// r6 = 0x60; -// r7 = r7 & 1; -// if (r7 != 0) - MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x2100/4, IMMED0, ALU0, - STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE), - MME_INSN(0, STATE, R3, IMMED, ZERO, 0x0db0/4, NONE, NONE, - OR, R7, R2, R3, 0, NONE, NONE), - MME_INSN(0, STATE, R4, IMMED, ZERO, 0x20c0/4, NONE, NONE, - ADD, R6, IMMED, ZERO, 0x60, NONE, NONE), - MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r6 = 0x200; - MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r7 = r1 | r4; -// r7 = r7 & 1; -// if (r7 != 0) - MME_INSN(0, OR, R7, R1, R4, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r6 = 0; - MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// mthd(0x02ec, 0); -// send(r6); - MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -}; - -uint32_t mmec597_tep_select[] = { -// r1 = load(); -// mthd(0x20c0,0); // SP_SELECT[3] -// send(r1); -// r2 = read(0x0dac); // POLYGON_MODE_FRONT -// r3 = read(0x0db0); // POLYGON_MODE_BACK -// r7 = r2 | r3; -// r4 = read(0x2100); // SP_SELECT[4] -// r6 = 0x60; -// r7 = r7 & 1; -// if (r7 != 0) - MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x20c0/4, IMMED0, ALU0, - STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE), - MME_INSN(0, STATE, R3, IMMED, ZERO, 0x0db0/4, NONE, NONE, - OR, R7, R2, R3, 0, NONE, NONE), - MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE, - ADD, R6, IMMED, ZERO, 0x60, NONE, NONE), - MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r6 = 0x200; - MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r7 = r1 | r4; -// r7 = r7 & 1; -// if (r7 != 0) - MME_INSN(0, OR, R7, R1, R4, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r6 = 0; - MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// mthd(0x02ec, 0); -// send(r6); - MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -}; - -uint32_t mmec597_draw_arrays_indirect[] = { -// r1 = load(); // mode -// r5 = read(0x1438); // VB_INSTANCE_BASE -// r6 = load(); // start_drawid -// r7 = load(); // numparams - MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, - ADD, R6, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE, - STATE, R5, IMMED, ZERO, 0x1438/4, NONE, NONE), -// while (HW_LOOP_COUNT < r7) { -// r2 = load(); // count -// r3 = load(); // instance_count -// mthd(0x0d74, 0); // VERTEX_BUFFER_FIRST -// send(load()); // start -// r4 = load(); // start_instance -// if (r3) { - MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x000c, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R2, LOAD0, ZERO, 0x0d74/4, IMMED0, NONE, - ADD, R3, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0, - ADD, R4, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R3, ZERO, (2<<14)|0x0008, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// mthd(0x238c, 1); // CB_POS -// send(256 + 160); -// send(0); // base_vertex -// send(r4); // start_instance -// send(r6); // draw id -// mthd(0x1438, 0); // VB_INSTANCE_BASE -// send(r4); -// r1 = r1 & ~(1<<26); // clear INSTANCE_NEXT - MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1, - ADD, ZERO, ZERO, ZERO, 256 + 160, NONE, ALU0), - MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0, - ADD, ZERO, R6, ZERO, 0, NONE, ALU1), - MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1438/4, IMMED0, ALU0, - MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE), -// do { -// mthd(0x1618, 0); // VERTEX_BEGIN_GL -// send(r1); // mode -// mthd(0x0d78, 0); // VERTEX_BUFFER_COUNT -// send(r2); // count -// mthd(0x1614, 0); // VERTEX_END_GL -// send(0); -// r1 |= (1<<26); // set INSTANCE_NEXT -// } while(--r3); -// } - MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0, - ADD, ZERO, R2, ZERO, 0x0d78/4, IMMED1, ALU1), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0, - ADD, R4, IMMED, ZERO, 1, NONE, NONE), - MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE, - SUB, R3, R3, IMMED, 1, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x3ffd, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r6 = r6 + 1; -// }; - MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// mthd(0x1438, 0); // restore VB_INSTANCE_BASE -// send(r5); - MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0x1438/4, IMMED0, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, R5, ZERO, 0, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -}; - -uint32_t mmec597_draw_elts_indirect[] = { -// r1 = load(); // mode -// r8 = read(0x1434); // VB_ELEMENT_BASE -// r9 = read(0x1438); // VB_INSTANCE_BASE -// r6 = load(); // start_drawid -// r7 = load(); // numparams - MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, - STATE, R8, IMMED, ZERO, 0x1434/4, NONE, NONE), - MME_INSN(0, STATE, R9, IMMED, ZERO, 0x1438/4, NONE, NONE, - ADD, R6, LOAD0, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// while (HW_LOOP_COUNT < r7) { -// r3 = load(); // count -// r2 = load(); // instance_count -// mthd(0x17dc, 0); // INDEX_BATCH_FIRST -// send(load()); // start -// r4 = load(); // index_bias -// mthd(0x238c, 1); // CB_POS -// send(256 + 160); -// send(r4); // index_bias -// r5 = load(); // start_instance -// if (r2) { - MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x000d, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R3, LOAD0, ZERO, 0x17dc/4, IMMED0, NONE, - ADD, R2, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0, - ADD, R4, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1, - ADD, ZERO, R4, ZERO, 256 + 160, NONE, ALU1), - MME_INSN(0, BEQ, ZERO, R2, ZERO, (2<<14)|0x0008, NONE, NONE, - ADD, R5, LOAD0, ZERO, 0, NONE, NONE), -// send(r5); // start_instance -// send(r6); // draw_id -// mthd(0x1434, 1); // VB_ELEMENT_BASE -// send(r4); // index_bias -// send(r5); // start_instance -// mthd(0x1118, 0); // VERTEX_ID_BASE -// send(r4); // index_bias -// r1 &= ~(1 << 26); // clear INSTANCE_NEXT - MME_INSN(0, ADD, ZERO, R5, ZERO, 0, NONE, ALU0, - ADD, ZERO, R6, ZERO, 0, NONE, ALU1), - MME_INSN(0, ADD, ZERO, R4, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0, - ADD, ZERO, R5, ZERO, 0, NONE, ALU1), - MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1118/4, IMMED0, ALU0, - MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE), -// do { -// mthd(0x1618, 0); // VERTEX_BEGIN_GL -// send(r1); // mode -// mthd(0x17e0, 0); // INDEX_BATCH_COUNT -// send(r3); // count -// mthd(0x1614, 0); // VERTEX_END_GL -// send(0); -// r1 |= (1 << 26); // set INSTANCE_NEXT -// } while (--r2); -// } - MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0, - ADD, ZERO, R3, ZERO, 0x17e0/4, IMMED1, ALU1), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0, - ADD, R4, IMMED, ZERO, 1, NONE, NONE), - MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE, - SUB, R2, R2, IMMED, 1, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R2, ZERO, (1<<14)|0x3ffd, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r6 = r6 + 1; -// }; - MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// mthd(0x1434, 1); -// send(r8); // restore VB_ELEMENT_BASE -// send(r9); // restore VB_INSTANCE_BASE -// mthd(0x1118, 0); -// send(r8); // restore VERTEX_ID_BASE - MME_INSN(1, ADD, ZERO, R8, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0, - ADD, ZERO, R9, ZERO, 0, NONE, ALU1), - MME_INSN(0, ADD, ZERO, R8, ZERO, 0x1118/4, IMMED0, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -}; - -uint32_t mmec597_draw_arrays_indirect_count[] = { -// r1 = load(); // mode -// r6 = load(); // start_drawid -// r7 = load(); // numparams -// r5 = load(); // totaldraws -// r8 = read(0x1438); // VB_INSTANCE_BASE -// r5 = r5 - r6; // remaining draws -// if (r5 > r7) - MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, - ADD, R6, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE, - ADD, R5, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, STATE, R8, IMMED, ZERO, 0x1438/4, NONE, NONE, - SUB, R5, R5, R6, 0, NONE, NONE), - MME_INSN(0, BLE, ZERO, R5, R7, (2<<14)|0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r5 = r7; - MME_INSN(0, ADD, R5, R7, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// if (r5 >= 0) { - MME_INSN(0, BLT, ZERO, R5, ZERO, (2<<14)|0x000e, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// while (HW_LOOP_COUNT < r5) { -// r2 = load(); // count -// r3 = load(); // instance_count -// mthd(0x0d74, 0); // VERTEX_BUFFER_FIRST -// send(load()); // start -// r4 = load(); // start_instance -// if (r3) { - MME_INSN(0, LOOP, ZERO, R5, ZERO, 0x000c, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R2, LOAD0, ZERO, 0x0d74/4, IMMED0, NONE, - ADD, R3, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0, - ADD, R4, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R3, ZERO, (2<<14)|0x0008, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// mthd(0x238c, 1); // CB_POS -// send(256 + 160); -// send(0); // base_vertex -// send(r4); // start_instance -// send(r6); // draw_id -// mthd(0x1438, 0); // VB_INSTANCE_BASE -// send(r4); -// r1 &= ~(1 << 26); // clear INSTANCE_NEXT - MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1, - ADD, ZERO, ZERO, ZERO, 256+160, NONE, ALU0), - MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0, - ADD, ZERO, R6, ZERO, 0, NONE, ALU1), - MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1438/4, IMMED0, ALU0, - MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE), -// do { -// mthd(0x1618, 0); // VERTEX_BEGIN_GL -// send(r1); // mode -// mthd(0x0d78, 0); // VERTEX_BUFFER_COUNT -// send(r2); -// mthd(0x1614, 0); // VERTEX_END_GL -// send(0); -// r1 |= (1 << 26); // set INSTANCE_NEXT -// } while (--r3); -// } - MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0, - ADD, ZERO, R2, ZERO, 0x0d78/4, IMMED1, ALU1), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0, - ADD, R4, IMMED, ZERO, 1, NONE, NONE), - MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE, - SUB, R3, R3, IMMED, 1, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x3ffd, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r6 = r6 + 1; // draw_id++ -// } - MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r7 = r7 - r5; // unneeded params -// } - MME_INSN(0, SUB, R7, R7, R5, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// while (HW_LOOP_COUNT < r7) { -// load(); -// load(); -// load(); -// load(); -// } - MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x0003, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, - ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, - ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE), -// exit mthd(0x1438, 0); // VB_INSTANCE_BASE -// send(r8); - MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0x1438/4, IMMED0, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, R8, ZERO, 0, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -}; - -uint32_t mmec597_draw_elts_indirect_count[] = { -// r8 = read(0x1434); -// r1 = load(); -// r9 = read(0x1438); -// r6 = load(); -// r7 = load(); -// r5 = load(); -// r5 = r5 - r6; -// if (r5 > r7) - MME_INSN(0, STATE, R8, IMMED, ZERO, 0x1434/4, NONE, NONE, - ADD, R1, LOAD0, ZERO, 0, NONE, NONE), - MME_INSN(0, STATE, R9, IMMED, ZERO, 0x1438/4, NONE, NONE, - ADD, R6, LOAD0, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE, - ADD, R5, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, SUB, R5, R5, R6, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, BLE, ZERO, R5, R7, (2<<14)|0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r5 = r7; - MME_INSN(0, ADD, R5, R7, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// if (r5 >= 0) { - MME_INSN(0, BLT, ZERO, R5, ZERO, (2<<14)|0x000f, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// while (HW_LOOP_COUNT < r5) { -// r3 = load(); -// r2 = load(); -// mthd(0x17dc, 0); -// send(load()); -// r4 = load(); -// mthd(0x238c, 1); -// send(256 + 160); -// send(r4); -// r10 = load(); -// if (r2) { - MME_INSN(0, LOOP, ZERO, R5, ZERO, 0x000d, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R3, LOAD0, ZERO, (0<<12)|0x17dc/4, IMMED0, NONE, - ADD, R2, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, LOAD0, ZERO, (1<<12)|0x238c/4, NONE, ALU0, - ADD, R4, LOAD1, ZERO, 256 + 160, IMMED0, IMMED1), - MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0, - ADD, R10, LOAD0, ZERO, 0, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R2, ZERO, (2<<14)|0x0008, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// send(r10); -// send(r6); -// mthd(0x1434, 1); -// send(r4); -// send(r10); -// mthd(0x1118, 0); -// send(r4); -// r1 &= ~(1 << 26); - MME_INSN(0, ADD, ZERO, R10, ZERO, 0, NONE, ALU0, - ADD, ZERO, R6, ZERO, 0, NONE, ALU1), - MME_INSN(0, ADD, ZERO, R4, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0, - ADD, ZERO, R10, ZERO, 0, NONE, ALU1), - MME_INSN(0, ADD, ZERO, R4, ZERO, (0<<12)|0x1118/4, IMMED0, ALU0, - MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE), -// do { -// mthd(0x1618, 0); -// send(r1); -// mthd(0x17e0, 0); -// send(r3); -// mthd(0x1614, 0); -// send(0); -// r1 |= (1 << 26); -// } while (--r2); -// } - MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0, - ADD, ZERO, R3, ZERO, 0x17e0/4, IMMED1, ALU1), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0, - ADD, R4, IMMED, ZERO, 1, NONE, NONE), - MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE, - SUB, R2, R2, IMMED, 1, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R2, ZERO, (1<<14)|0x3ffd, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r6 = r6 + 1; -// } - MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// r7 = r7 - r5; // unneeded params -// } - MME_INSN(0, SUB, R7, R7, R5, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// while (HW_LOOP_COUNT < r7) { -// r2 = load(); -// r2 = load(); -// r2 = load(); -// r2 = load(); -// r2 = load(); -// } - MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x0004, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, - ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, - ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// mthd(0x1434, 1); -// send(r8); -// send(r9); -// exit mthd(0x1118, 0); -// send(r8); - MME_INSN(1, ADD, ZERO, R8, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0, - ADD, ZERO, R9, ZERO, 0, NONE, ALU1), - MME_INSN(0, ADD, ZERO, R8, ZERO, (0<<12)|0x1118/4, IMMED0, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -}; - -uint32_t mmec597_query_buffer_write[] = { -// r1 = load(); // clamp value -// r2 = load(); // end value (lo) -// r3 = load(); // end value (hi) -// r4 = load(); // start value (lo) -// r5 = load(); // start value (hi) -// r8 = load(); // desired sequence -// r9 = load(); // actual sequence -// r7 = load(); // query address (hi) -// r6 = load(); // query address (lo) -// if (r9 >= r8) { - MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, - ADD, R2, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE, - ADD, R4, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R5, LOAD0, ZERO, 0, NONE, NONE, - ADD, R8, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R9, LOAD0, ZERO, 0, NONE, NONE, - ADD, R7, LOAD1, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R6, LOAD0, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, BLT, ZERO, R9, R8, (2<<14)|0x000e, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// [r3,r2] = [r3,r2] - [r5,r4]; -// if (r1) { - MME_INSN(0, SUB, R2, R2, R4, 0, NONE, NONE, - SUBB, R3, R3, R5, 0, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R1, ZERO, (2<<14)|0x0004, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// if (r3 != 0 || r1 < r2) -// r2 = r1; -// } - MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, BLTU, ZERO, R1, R2, (1<<14)|0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R2, R1, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// mthd(0x1b00, 1); -// send(r7); -// send(r6); -// send(r2) -// send(0x10000000); -// if (!r1) { - MME_INSN(0, ADD, ZERO, R7, ZERO, (1<<12)|0x1b00/4, IMMED0, ALU0, - ADD, ZERO, R6, ZERO, 0, NONE, ALU1), - MME_INSN(0, ADD, ZERO, R2, ZERO, 0, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0, - ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), - MME_INSN(0, BEQ, ZERO, R1, ZERO, (1<<14)|0x0004, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// [r7,r6] = [r7,r6] + 4; -// mthd(0x1b00, 1); -// send(r7); -// send(r6); -// send(r3); -// send(0x10000000); -// } - MME_INSN(0, ADD, ZERO, R6, IMMED, 4, IMMED1, ALU1, - ADDC, ZERO, R7, ZERO, (1<<12)|0x1b00/4, NONE, ALU0), - MME_INSN(0, ADD, ZERO, R3, ZERO, 0, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0, - ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), -// mthd(0x0110, 0); -// send(0); - MME_INSN(0, ADD, ZERO, ZERO, ZERO, (0<<12)|0x0110/4, IMMED0, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// } - MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -}; - -uint32_t mmec597_conservative_raster_state[] = { -// r1 = load(); -// mthd(0x3400, 1); -// send(0); -// send(((r1 >> 8) & 7) << 23); -// send(0x03800000); -// mthd(0x2310, 1); -// send(0x00418800); -// r2 = r1 & 0xf; -// r3 = 16; -// r2 = r2 | (((r1 >> 4) & 0xf) << 8); -// mthd(0x0a1c, 8); - MME_INSN(0, ADD, R1, LOAD0, ZERO, (1<<12)|0x3400/4, IMMED0, IMMED1, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, MERGE, ZERO, ZERO, R1, (23<<10)|(3<<5)|8, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x0380, NONE, IMMED32_0, - ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x2310/4, IMMED0, NONE, - ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x0041, NONE, IMMED32_0, - ADD, ZERO, ZERO, ZERO, 0x8800, NONE, NONE), - MME_INSN(0, AND, R2, R1, IMMED, 0xf, NONE, NONE, - ADD, R3, ZERO, IMMED, 16, NONE, NONE), - MME_INSN(0, MERGE, R2, R2, R1, (8<<10)|(4<<5)|4, IMMED1, NONE, - ADD, ZERO, ZERO, ZERO, (8<<12)|0x0a1c/4, NONE, NONE), -// while (HW_LOOP_COUNT < r3) -// send(r2); - MME_INSN(0, LOOP, ZERO, R3, ZERO, 0x0002, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, R2, ZERO, 0, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -// mthd(0x1148, 0); -// send(1); - MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x1148/4, IMMED0, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 1, NONE, IMMED1, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), -}; - -uint32_t mmec597_compute_counter[] = { -// r0 = load(); -// r1 = 1; -// r2 = 0; -// while (HW_LOOP_COUNT < r2) { - MME_INSN(0, ADD, R0, LOAD0, ZERO, 0, NONE, NONE, - ADD, R1, IMMED, ZERO, 1, NONE, NONE), - MME_INSN(0, LOOP, ZERO, R0, ZERO, 0x0003, NONE, NONE, - ADD, R2, ZERO, ZERO, 0, NONE, NONE), -// r3 = load(); -// [r1,r0] *= r3; -// } - MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, MULU, R1, R1, R3, 0, NONE, NONE, - MULH, R2, ZERO, ZERO, 0, NONE, NONE), -// r3 = read(0x3410); -// r4 = read(0x3414); -// [r4,r3] += [r2,r1]; -// mthd(0x3410, 1); -// send(r3); -// send(r4); - MME_INSN(0, STATE, ZERO, ZERO, ZERO, 0x3410/4, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(1, STATE, ZERO, ZERO, ZERO, 0x3414/4, NONE, NONE, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, R3, R3, R1, (1<<12)|0x3410/4, IMMED0, ALU0, - ADDC, R4, R4, R2, 0, NONE, ALU1), -}; - -uint32_t mmec597_compute_counter_to_query[] = { -// r1 = load(); -// r3 = read(0x3410); -// r2 = load(); -// r4 = read(0x3414); -// [r2,r1] = [r2,r1] + [r4,r3]; -// mthd(0x1b00, 1); -// r3 = load(); -// send(r3); -// r4 = load(); -// send(r4); -// send(r1); -// send(0x10000000); - MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, - STATE, R3, IMMED, ZERO, 0x3410/4, NONE, NONE), - MME_INSN(0, ADD, R2, LOAD0, ZERO, 0, NONE, NONE, - STATE, R4, IMMED, ZERO, 0x3414/4, NONE, NONE), - MME_INSN(0, ADD, R1, R1, R3, (1<<12)|0x1b00/4, IMMED0, NONE, - ADDC, R2, R2, R4, 0, NONE, NONE), - MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, ALU0, - ADD, R4, LOAD1, ZERO, 0, NONE, ALU1), - MME_INSN(0, ADD, ZERO, R1, ZERO, 0, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0, - ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), -// [r3,r4] = [r3,r4] + 4; -// mthd(0x1b00, 1); -// send(r3); -// send(r4); -// send(r2); -// send(0x10000000); - MME_INSN(0, ADD, ZERO, R4, IMMED, 4, IMMED1, ALU1, - ADDC, ZERO, R3, ZERO, (1<<12)|0x1b00/4, NONE, ALU0), - MME_INSN(1, ADD, ZERO, R2, ZERO, 0, NONE, ALU0, - ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), - MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0, - ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), -}; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h index 539bdc7..221bab3 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h @@ -157,12 +157,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_UNK0220__ESIZE 0x00000004 #define NVC0_3D_UNK0220__LEN 0x00000028 -#define TU102_3D_INDEX_ARRAY_LIMIT_HIGH 0x00000238 - -#define TU102_3D_INDEX_ARRAY_LIMIT_LOW 0x0000023c - -#define TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE 0x000002b8 - #define NVC0_3D_UNK02C0 0x000002c0 #define NVC0_3D_UNK02C4 0x000002c4 @@ -284,9 +278,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_UNK0400__ESIZE 0x00000004 #define NVC0_3D_UNK0400__LEN 0x000000c0 -#define TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00000600 + 0x8*(i0)) -#define TU102_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00000604 + 0x8*(i0)) - #define NVC0_3D_TFB_STREAM(i0) (0x00000700 + 0x10*(i0)) #define NVC0_3D_TFB_STREAM__ESIZE 0x00000010 #define NVC0_3D_TFB_STREAM__LEN 0x00000004 @@ -1796,9 +1787,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_SP_UNK14__ESIZE 0x00000004 #define NVC0_3D_SP_UNK14__LEN 0x00000004 -#define GV100_3D_SP_ADDRESS_HIGH(i0) (0x00002014 + 0x40*(i0)) -#define GV100_3D_SP_ADDRESS_LOW(i0) (0x00002018 + 0x40*(i0)) - #define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0)) #define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010 #define NVC0_3D_TEX_LIMITS__LEN 0x00000005 diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c index 69131fa..c897e4e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c @@ -38,55 +38,6 @@ nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d) } static uint32_t -tu102_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed) -{ - uint32_t kind; - - if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR)) - return 0; - if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR)) - return 0; - - switch (mt->base.base.format) { - case PIPE_FORMAT_Z16_UNORM: - if (compressed) - kind = 0x0b; // NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC - else - kind = 0x01; // NV_MMU_PTE_KIND_Z16 - break; - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8X24_UINT: - case PIPE_FORMAT_S8_UINT_Z24_UNORM: - if (compressed) - kind = 0x0e; // NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC - else - kind = 0x05; // NV_MMU_PTE_KIND_Z24S8 - break; - case PIPE_FORMAT_X24S8_UINT: - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - if (compressed) - kind = 0x0c; // NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC - else - kind = 0x03; // NV_MMU_PTE_KIND_S8Z24 - break; - case PIPE_FORMAT_X32_S8X24_UINT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - if (compressed) - kind = 0x0d; // NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC - else - kind = 0x04; // NV_MMU_PTE_KIND_ZF32_X24S8 - break; - case PIPE_FORMAT_Z32_FLOAT: - default: - kind = 0x06; - break; - } - - return kind; -} - -static uint32_t nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed) { const unsigned ms = util_logbase2(mt->base.base.nr_samples); @@ -406,10 +357,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, if (pt->bind & PIPE_BIND_LINEAR) pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR; - if (dev->chipset < 0x160) - bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed); - else - bo_config.nvc0.memtype = tu102_mt_choose_storage_type(mt, compressed); + bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed); if (!nvc0_miptree_init_ms_mode(mt)) { FREE(mt); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index d2b2de4..32aa82d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -645,10 +645,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, prog->code_size = info->bin.codeSize; prog->relocs = info->bin.relocData; prog->fixups = info->bin.fixupData; - if (info->target >= NVISA_GV100_CHIPSET) - prog->num_gprs = MIN2(info->bin.maxGPR + 5, 256); //XXX: why? - else - prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); + prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); prog->cp.smem_size = info->bin.smemSize; prog->num_barriers = info->numBarriers; @@ -737,14 +734,7 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog) struct nvc0_screen *screen = nvc0->screen; const bool is_cp = prog->type == PIPE_SHADER_COMPUTE; int ret; - uint32_t size = prog->code_size; - - if (!is_cp) { - if (screen->eng3d->oclass < TU102_3D_CLASS) - size += GF100_SHADER_HEADER_SIZE; - else - size += TU102_SHADER_HEADER_SIZE; - } + uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); /* On Fermi, SP_START_ID must be aligned to 0x40. * On Kepler, the first instruction must be aligned to 0x80 because @@ -760,8 +750,7 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog) prog->code_base = prog->mem->start; if (!is_cp) { - if (screen->base.class_3d >= NVE4_3D_CLASS && - screen->base.class_3d < TU102_3D_CLASS) { + if (screen->base.class_3d >= NVE4_3D_CLASS) { switch (prog->mem->start & 0xff) { case 0x40: prog->code_base += 0x70; break; case 0x80: prog->code_base += 0x30; break; @@ -788,16 +777,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) { struct nvc0_screen *screen = nvc0->screen; const bool is_cp = prog->type == PIPE_SHADER_COMPUTE; - uint32_t code_pos = prog->code_base; - uint32_t size_sph = 0; - - if (!is_cp) { - if (screen->eng3d->oclass < TU102_3D_CLASS) - size_sph = GF100_SHADER_HEADER_SIZE; - else - size_sph = TU102_SHADER_HEADER_SIZE; - } - code_pos += size_sph; + uint32_t code_pos = prog->code_base + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); if (prog->relocs) nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, @@ -823,7 +803,8 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) if (!is_cp) nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base, - NV_VRAM_DOMAIN(&screen->base), size_sph, prog->hdr); + NV_VRAM_DOMAIN(&screen->base), + NVC0_SHADER_HEADER_SIZE, prog->hdr); nvc0->base.push_data(&nvc0->base, screen->text, code_pos, NV_VRAM_DOMAIN(&screen->base), prog->code_size, @@ -836,14 +817,7 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog) struct nvc0_screen *screen = nvc0->screen; const bool is_cp = prog->type == PIPE_SHADER_COMPUTE; int ret; - uint32_t size = prog->code_size; - - if (!is_cp) { - if (screen->eng3d->oclass < TU102_3D_CLASS) - size += GF100_SHADER_HEADER_SIZE; - else - size += TU102_SHADER_HEADER_SIZE; - } + uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); ret = nvc0_program_alloc_code(nvc0, prog); if (ret) { @@ -900,7 +874,8 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog) BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(FLUSH), 1); PUSH_DATA (nvc0->base.pushbuf, NVC0_COMPUTE_FLUSH_CODE); } else { - nvc0_program_sp_start_id(nvc0, i, progs[i]); + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(SP_START_ID(i)), 1); + PUSH_DATA (nvc0->base.pushbuf, progs[i]->code_base); } } } @@ -978,7 +953,7 @@ nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label) unsigned base = 0; unsigned i; if (prog->type != PIPE_SHADER_COMPUTE) - base = GF100_SHADER_HEADER_SIZE; + base = NVC0_SHADER_HEADER_SIZE; for (i = 0; i < prog->cp.num_syms; ++i) if (syms[i].label == label) return prog->code_base + base + syms[i].offset; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h index 2c465b3..5684207 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h @@ -15,9 +15,7 @@ struct nvc0_transform_feedback_state { }; -#define GF100_SHADER_HEADER_SIZE (20 * 4) -#define TU102_SHADER_HEADER_SIZE (32 * 4) -#define NVC0_MAX_SHADER_HEADER_SIZE TU102_SHADER_HEADER_SIZE +#define NVC0_SHADER_HEADER_SIZE (20 * 4) struct nvc0_program { struct pipe_shader_state pipe; @@ -32,7 +30,7 @@ struct nvc0_program { unsigned code_size; unsigned parm_size; /* size of non-bindable uniforms (c0[]) */ - uint32_t hdr[NVC0_MAX_SHADER_HEADER_SIZE/4]; + uint32_t hdr[20]; uint32_t flags[2]; struct { @@ -74,6 +72,4 @@ struct nvc0_program { struct nouveau_heap *mem; }; -void -nvc0_program_sp_start_id(struct nvc0_context *, int, struct nvc0_program *); #endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 07d74dd..7abbf76 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -27,17 +27,15 @@ #include "util/format/u_format_s3tc.h" #include "util/u_screen.h" #include "pipe/p_screen.h" +#include "compiler/nir/nir.h" #include "nouveau_vp3_video.h" -#include "codegen/nv50_ir_driver.h" - #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_screen.h" #include "nvc0/mme/com9097.mme.h" #include "nvc0/mme/com90c0.mme.h" -#include "nvc0/mme/comc597.mme.h" #include "nv50/g80_texture.xml.h" @@ -445,8 +443,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_PREFERRED_IR: return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_SUPPORTED_IRS: { - uint32_t irs = 1 << PIPE_SHADER_IR_NIR | - ((class_3d >= GV100_3D_CLASS) ? 0 : 1 << PIPE_SHADER_IR_TGSI); + uint32_t irs = 1 << PIPE_SHADER_IR_TGSI | + 1 << PIPE_SHADER_IR_NIR; if (screen->force_enable_cl) irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED; return irs; @@ -469,14 +467,6 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: return shader != PIPE_SHADER_FRAGMENT; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: - /* HW doesn't support indirect addressing of fragment program inputs - * on Volta. The binary driver generates a function to handle every - * possible indirection, and indirectly calls the function to handle - * this instead. - */ - if (class_3d >= GV100_3D_CLASS) - return shader != PIPE_SHADER_FRAGMENT; - return 1; case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: return 1; @@ -727,26 +717,6 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, return pos + size; } -static int -tu102_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, - unsigned size, const uint32_t *data) -{ - struct nouveau_pushbuf *push = screen->base.pushbuf; - - size /= 4; - - assert((pos + size) <= 0x800); - - BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2); - PUSH_DATA (push, (m - 0x3800) / 8); - PUSH_DATA (push, pos); - BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1); - PUSH_DATA (push, pos); - PUSH_DATAp(push, data, size); - - return pos + (size / 3); -} - static void nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) { @@ -758,10 +728,8 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) BEGIN_NVC0(push, SUBC_3D(0x10ec), 2); PUSH_DATA (push, 0xff); PUSH_DATA (push, 0xff); - if (obj_class < GV100_3D_CLASS) { - BEGIN_NVC0(push, SUBC_3D(0x074c), 1); - PUSH_DATA (push, 0x3f); - } + BEGIN_NVC0(push, SUBC_3D(0x074c), 1); + PUSH_DATA (push, 0x3f); BEGIN_NVC0(push, SUBC_3D(0x16a8), 1); PUSH_DATA (push, (3 << 16) | 3); @@ -793,10 +761,8 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) BEGIN_NVC0(push, SUBC_3D(0x0300), 1); PUSH_DATA (push, 3); - if (obj_class < GV100_3D_CLASS) { - BEGIN_NVC0(push, SUBC_3D(0x02d0), 1); - PUSH_DATA (push, 0x3fffff); - } + BEGIN_NVC0(push, SUBC_3D(0x02d0), 1); + PUSH_DATA (push, 0x3fffff); BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1); PUSH_DATA (push, 1); BEGIN_NVC0(push, SUBC_3D(0x19c0), 1); @@ -856,8 +822,6 @@ nvc0_screen_init_compute(struct nvc0_screen *screen) case 0x110: case 0x120: case 0x130: - case 0x140: - case 0x160: return nve4_screen_compute_setup(screen, screen->base.pushbuf); default: return -1; @@ -929,15 +893,13 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size) nouveau_heap_init(&screen->text_heap, 0, size - 0x100); /* update the code segment setup */ - if (screen->eng3d->oclass < GV100_3D_CLASS) { - BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); + if (screen->compute) { + BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); - if (screen->compute) { - BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->text->offset); - PUSH_DATA (push, screen->text->offset); - } } return 0; @@ -977,14 +939,74 @@ nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize, IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0)); } +static const nir_shader_compiler_options nir_options = { + .lower_fdiv = false, + .lower_ffma = false, + .fuse_ffma = false, /* nir doesn't track mad vs fma */ + .lower_flrp32 = true, + .lower_flrp64 = true, + .lower_fpow = false, + .lower_fsat = false, + .lower_fsqrt = false, // TODO: only before gm200 + .lower_fmod = true, + .lower_bitfield_extract = false, + .lower_bitfield_extract_to_shifts = false, + .lower_bitfield_insert = false, + .lower_bitfield_insert_to_shifts = false, + .lower_bitfield_reverse = false, + .lower_bit_count = false, + .lower_ifind_msb = false, + .lower_find_lsb = false, + .lower_uadd_carry = true, // TODO + .lower_usub_borrow = true, // TODO + .lower_mul_high = false, + .lower_negate = false, + .lower_sub = true, + .lower_scmp = true, // TODO: not implemented yet + .lower_idiv = true, + .lower_isign = false, // TODO + .fdot_replicates = false, // TODO + .lower_ffloor = false, // TODO + .lower_ffract = true, + .lower_fceil = false, // TODO + .lower_ldexp = true, + .lower_pack_half_2x16 = true, + .lower_pack_unorm_2x16 = true, + .lower_pack_snorm_2x16 = true, + .lower_pack_unorm_4x8 = true, + .lower_pack_snorm_4x8 = true, + .lower_unpack_half_2x16 = true, + .lower_unpack_unorm_2x16 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_unorm_4x8 = true, + .lower_unpack_snorm_4x8 = true, + .lower_extract_byte = true, + .lower_extract_word = true, + .lower_all_io_to_temps = false, + .vertex_id_zero_based = false, + .lower_base_vertex = false, + .lower_helper_invocation = false, + .lower_cs_local_index_from_id = true, + .lower_cs_local_id_from_index = false, + .lower_device_index_to_zero = false, // TODO + .lower_wpos_pntc = false, // TODO + .lower_hadd = true, // TODO + .lower_add_sat = true, // TODO + .use_interpolated_input_intrinsics = true, + .lower_mul_2x32_64 = true, // TODO + .max_unroll_iterations = 32, + .lower_int64_options = nir_lower_ufind_msb64|nir_lower_divmod64, // TODO + .lower_doubles_options = nir_lower_dmod, // TODO + .lower_to_scalar = true, +}; + static const void * nvc0_screen_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir, enum pipe_shader_type shader) { - struct nvc0_screen *screen = nvc0_screen(pscreen); if (ir == PIPE_SHADER_IR_NIR) - return nv50_ir_nir_shader_compiler_options(screen->base.device->chipset); + return &nir_options; return NULL; } @@ -1016,8 +1038,6 @@ nvc0_screen_create(struct nouveau_device *dev) case 0x110: case 0x120: case 0x130: - case 0x140: - case 0x160: break; default: return NULL; @@ -1084,19 +1104,16 @@ nvc0_screen_create(struct nouveau_device *dev) screen->base.fence.emit = nvc0_screen_fence_emit; screen->base.fence.update = nvc0_screen_fence_update; - if (dev->chipset < 0x140) { - ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e, - NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw); - if (ret) - FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret); - BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); - PUSH_DATA (push, screen->nvsw->handle); - } + ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e, + NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw); + if (ret) + FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret); + + BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->nvsw->handle); switch (dev->chipset & ~0xf) { - case 0x160: - case 0x140: case 0x130: case 0x120: case 0x110: @@ -1150,12 +1167,6 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, screen->fence.bo->offset + 16); switch (dev->chipset & ~0xf) { - case 0x160: - obj_class = TU102_3D_CLASS; - break; - case 0x140: - obj_class = GV100_3D_CLASS; - break; case 0x130: switch (dev->chipset) { case 0x130: @@ -1403,47 +1414,25 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, 16384 << 16); } - if (screen->eng3d->oclass < TU102_3D_CLASS) { #define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); - i = 0; - MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf); - MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables); - MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select); - MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select); - MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select); - MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front); - MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back); - MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect); - MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect); - MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); - MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); - MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); - MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state); - MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter); - MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query); - MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect); - } else { -#undef MK_MACRO -#define MK_MACRO(m, n) i = tu102_graph_set_macro(screen, m, i, sizeof(n), n); - - i = 0; - MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mmec597_per_instance_bf); - MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mmec597_blend_enables); - MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mmec597_vertex_array_select); - MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mmec597_tep_select); - MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mmec597_gp_select); - MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mmec597_poly_mode_front); - MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mmec597_poly_mode_back); - MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mmec597_draw_arrays_indirect); - MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mmec597_draw_elts_indirect); - MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mmec597_draw_arrays_indirect_count); - MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mmec597_draw_elts_indirect_count); - MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mmec597_query_buffer_write); - MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mmec597_conservative_raster_state); - MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mmec597_compute_counter); - MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mmec597_compute_counter_to_query); - } + i = 0; + MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf); + MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables); + MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select); + MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select); + MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select); + MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front); + MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back); + MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect); + MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect); + MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); + MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); + MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); + MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state); + MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter); + MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query); + MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect); BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 490026b..b7e0c8a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -65,22 +65,6 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) } void -nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage, - struct nvc0_program *prog) -{ - struct nouveau_pushbuf *push = nvc0->base.pushbuf; - - if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) { - BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1); - PUSH_DATA (push, prog->code_base); - } else { - BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2); - PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base); - PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base); - } -} - -void nvc0_vertprog_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; @@ -90,9 +74,9 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0) return; nvc0_program_update_context_state(nvc0, vp, 0); - BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 1); + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2); PUSH_DATA (push, 0x11); - nvc0_program_sp_start_id(nvc0, 1, vp); + PUSH_DATA (push, vp->code_base); BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1); PUSH_DATA (push, vp->num_gprs); @@ -168,9 +152,9 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) fp->fp.post_depth_coverage); } - BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 1); + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2); PUSH_DATA (push, 0x51); - nvc0_program_sp_start_id(nvc0, 5, fp); + PUSH_DATA (push, fp->code_base); BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1); PUSH_DATA (push, fp->num_gprs); @@ -192,9 +176,9 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0) BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1); PUSH_DATA (push, tp->tp.tess_mode); } - BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2); PUSH_DATA (push, 0x21); - nvc0_program_sp_start_id(nvc0, 2, tp); + PUSH_DATA (push, tp->code_base); BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1); PUSH_DATA (push, tp->num_gprs); } else { @@ -202,9 +186,9 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0) /* not a whole lot we can do to handle this failure */ if (!nvc0_program_validate(nvc0, tp)) assert(!"unable to validate empty tcp"); - BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2); PUSH_DATA (push, 0x20); - nvc0_program_sp_start_id(nvc0, 2, tp); + PUSH_DATA (push, tp->code_base); } nvc0_program_update_context_state(nvc0, tp, 1); } @@ -222,7 +206,8 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0) } BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1); PUSH_DATA (push, 0x31); - nvc0_program_sp_start_id(nvc0, 3, tp); + BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1); + PUSH_DATA (push, tp->code_base); BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1); PUSH_DATA (push, tp->num_gprs); } else { @@ -242,7 +227,8 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) { BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); PUSH_DATA (push, 0x41); - nvc0_program_sp_start_id(nvc0, 4, gp); + BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1); + PUSH_DATA (push, gp->code_base); BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1); PUSH_DATA (push, gp->num_gprs); } else { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 731b0b5..538effd 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -29,8 +29,6 @@ #include "util/format/u_format.h" #include "util/u_surface.h" -#include "tgsi/tgsi_ureg.h" - #include "os/os_thread.h" #include "nvc0/nvc0_context.h" @@ -140,11 +138,6 @@ nvc0_2d_texture_set(struct nouveau_pushbuf *push, bool dst, PUSH_DATA (push, bo->offset + offset); } - if (dst) { - IMMED_NVC0(push, SUBC_2D(NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE), - util_format_is_depth_or_stencil(pformat)); - } - #if 0 if (dst) { BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4); @@ -779,7 +772,7 @@ gm200_evaluate_depth_buffer(struct pipe_context *pipe) struct nvc0_blitter { struct nvc0_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES]; - struct nvc0_program *vp; + struct nvc0_program vp; struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */ @@ -792,7 +785,6 @@ struct nvc0_blitctx { struct nvc0_context *nvc0; struct nvc0_program *fp; - struct nvc0_program *vp; uint8_t mode; uint16_t color_mask; uint8_t filter; @@ -817,27 +809,78 @@ struct nvc0_blitctx struct nvc0_rasterizer_stateobj rast; }; -static void * -nvc0_blitter_make_vp(struct pipe_context *pipe) +static void +nvc0_blitter_make_vp(struct nvc0_blitter *blit) { - struct ureg_program *ureg; - struct ureg_src ipos, itex; - struct ureg_dst opos, otex; - - ureg = ureg_create(PIPE_SHADER_VERTEX); - if (!ureg) - return NULL; - - opos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); - ipos = ureg_DECL_vs_input(ureg, 0); - otex = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0); - itex = ureg_DECL_vs_input(ureg, 1); - - ureg_MOV(ureg, ureg_writemask(opos, TGSI_WRITEMASK_XY ), ipos); - ureg_MOV(ureg, ureg_writemask(otex, TGSI_WRITEMASK_XYZ), itex); - ureg_END(ureg); + static const uint32_t code_nvc0[] = + { + 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */ + 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */ + 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */ + 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */ + 0x00001de7, 0x80000000, /* exit */ + }; + static const uint32_t code_nve4[] = + { + 0x00000007, 0x20000000, /* sched */ + 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */ + 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */ + 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */ + 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */ + 0x00001de7, 0x80000000, /* exit */ + }; + static const uint32_t code_gk110[] = + { + 0x00000000, 0x08000000, /* sched */ + 0x401ffc12, 0x7ec7fc00, /* ld b64 $r4d a[0x80] 0x0 0x0 */ + 0x481ffc02, 0x7ecbfc00, /* ld b96 $r0t a[0x90] 0x0 0x0 */ + 0x381ffc12, 0x7f07fc00, /* st b64 a[0x70] $r4d 0x0 0x0 */ + 0x401ffc02, 0x7f0bfc00, /* st b96 a[0x80] $r0t 0x0 0x0 */ + 0x001c003c, 0x18000000, /* exit */ + }; + static const uint32_t code_gm107[] = + { + 0xe4200701, 0x001d0400, /* sched (st 0x1 wr 0x0) (st 0x1 wr 0x1) (st 0x1 wr 0x2) */ + 0x0807ff00, 0xefd87f80, /* ld b32 $r0 a[0x80] 0x0 */ + 0x0847ff01, 0xefd87f80, /* ld b32 $r1 a[0x84] 0x0 */ + 0x0907ff02, 0xefd87f80, /* ld b32 $r2 a[0x90] 0x0 */ + 0xf0200761, 0x003f8400, /* sched (st 0x1 wr 0x3) (st 0x1 wr 0x4) (st 0x1 wt 0x1) */ + 0x0947ff03, 0xefd87f80, /* ld b32 $r3 a[0x94] 0x0 */ + 0x0987ff04, 0xefd87f80, /* ld b32 $r4 a[0x98] 0x0 */ + 0x0707ff00, 0xeff07f80, /* st b32 a[0x70] $r0 0x0 */ + 0xfc2017e1, 0x011f8404, /* sched (st 0x1 wt 0x2) (st 0x1 wt 0x4) (st 0x1 wt 0x8) */ + 0x0747ff01, 0xeff07f80, /* st b32 a[0x74] $r1 0x0 */ + 0x0807ff02, 0xeff07f80, /* st b32 a[0x80] $r2 0x0 */ + 0x0847ff03, 0xeff07f80, /* st b32 a[0x84] $r3 0x0 */ + 0xfde087e1, 0x001f8000, /* sched (st 0x1 wt 0x10) (st 0xf) (st 0x0) */ + 0x0887ff04, 0xeff07f80, /* st b32 a[0x88] $r4 0x0 */ + 0x0007000f, 0xe3000000, /* exit */ + }; + + blit->vp.type = PIPE_SHADER_VERTEX; + blit->vp.translated = true; + if (blit->screen->base.class_3d >= GM107_3D_CLASS) { + blit->vp.code = (uint32_t *)code_gm107; /* const_cast */ + blit->vp.code_size = sizeof(code_gm107); + } else + if (blit->screen->base.class_3d >= NVF0_3D_CLASS) { + blit->vp.code = (uint32_t *)code_gk110; /* const_cast */ + blit->vp.code_size = sizeof(code_gk110); + } else + if (blit->screen->base.class_3d >= NVE4_3D_CLASS) { + blit->vp.code = (uint32_t *)code_nve4; /* const_cast */ + blit->vp.code_size = sizeof(code_nve4); + } else { + blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */ + blit->vp.code_size = sizeof(code_nvc0); + } + blit->vp.num_gprs = 6; + blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS; - return ureg_create_shader_and_destroy(ureg, pipe); + blit->vp.hdr[0] = 0x00020461; /* vertprog magic */ + blit->vp.hdr[4] = 0x000ff000; /* no outputs read */ + blit->vp.hdr[6] = 0x00000073; /* a[0x80].xy, a[0x90].xyz */ + blit->vp.hdr[13] = 0x00073000; /* o[0x70].xy, o[0x80].xyz */ } static void @@ -868,20 +911,6 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit) } static void -nvc0_blit_select_vp(struct nvc0_blitctx *ctx) -{ - struct nvc0_blitter *blitter = ctx->nvc0->screen->blitter; - - if (!blitter->vp) { - mtx_lock(&blitter->mutex); - if (!blitter->vp) - blitter->vp = nvc0_blitter_make_vp(&ctx->nvc0->base.pipe); - mtx_unlock(&blitter->mutex); - } - ctx->vp = blitter->vp; -} - -static void nvc0_blit_select_fp(struct nvc0_blitctx *ctx, const struct pipe_blit_info *info) { struct nvc0_blitter *blitter = ctx->nvc0->screen->blitter; @@ -1053,7 +1082,7 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx, nvc0->rast = &ctx->rast; - nvc0->vertprog = ctx->vp; + nvc0->vertprog = &blitter->vp; nvc0->tctlprog = NULL; nvc0->tevlprog = NULL; nvc0->gmtyprog = NULL; @@ -1192,7 +1221,6 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) blit->filter = nv50_blit_get_filter(info); blit->render_condition_enable = info->render_condition_enable; - nvc0_blit_select_vp(blit); nvc0_blit_select_fp(blit, info); nvc0_blitctx_pre_blit(blit, info); @@ -1238,11 +1266,6 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) } } - if (screen->eng3d->oclass >= TU102_3D_CLASS) { - IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE), - util_format_is_depth_or_stencil(info->dst.format)); - } - IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0); IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 | NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1); @@ -1303,10 +1326,7 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) PUSH_DATAh(push, vtxbuf); PUSH_DATA (push, vtxbuf); PUSH_DATA (push, 0); - if (screen->eng3d->oclass < TU102_3D_CLASS) - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); - else - BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); PUSH_DATAh(push, vtxbuf + length - 1); PUSH_DATA (push, vtxbuf + length - 1); @@ -1383,8 +1403,6 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) /* restore viewport transform */ IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1); - if (screen->eng3d->oclass >= TU102_3D_CLASS) - IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE), 0); } static void @@ -1679,6 +1697,7 @@ nvc0_blitter_create(struct nvc0_screen *screen) (void) mtx_init(&screen->blitter->mutex, mtx_plain); + nvc0_blitter_make_vp(screen->blitter); nvc0_blitter_make_sampler(screen->blitter); return true; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index 8287d84..92bd7eb 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -360,11 +360,7 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0) PUSH_DATAh(push, res->address + offset); PUSH_DATA (push, res->address + offset); } - - if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); - else - BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); PUSH_DATAh(push, res->address + limit); PUSH_DATA (push, res->address + limit); @@ -410,11 +406,7 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); PUSH_DATAh(push, buf->address + offset); PUSH_DATA (push, buf->address + offset); - - if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2); - else - BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(b)), 2); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2); PUSH_DATAh(push, buf->address + limit); PUSH_DATA (push, buf->address + limit); @@ -969,23 +961,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) assert(nouveau_resource_mapped_by_gpu(&buf->base)); PUSH_SPACE(push, 6); - if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) { - BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5); - PUSH_DATAh(push, buf->address); - PUSH_DATA (push, buf->address); - PUSH_DATAh(push, buf->address + buf->base.width0 - 1); - PUSH_DATA (push, buf->address + buf->base.width0 - 1); - PUSH_DATA (push, info->index_size >> 1); - } else { - BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 2); - PUSH_DATAh(push, buf->address); - PUSH_DATA (push, buf->address); - BEGIN_NVC0(push, SUBC_3D(TU102_3D_INDEX_ARRAY_LIMIT_HIGH), 2); - PUSH_DATAh(push, buf->address + buf->base.width0 - 1); - PUSH_DATA (push, buf->address + buf->base.width0 - 1); - BEGIN_NVC0(push, NVC0_3D(INDEX_FORMAT), 1); - PUSH_DATA (push, info->index_size >> 1); - } + BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5); + PUSH_DATAh(push, buf->address); + PUSH_DATA (push, buf->address); + PUSH_DATAh(push, buf->address + buf->base.width0 - 1); + PUSH_DATA (push, buf->address + buf->base.width0 - 1); + PUSH_DATA (push, info->index_size >> 1); BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c index d49a5df..8aa7088 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c @@ -228,11 +228,7 @@ nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count) BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2); PUSH_DATAh(push, va); PUSH_DATA (push, va); - - if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); - else - BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); PUSH_DATAh(push, va + size - 1); PUSH_DATA (push, va + size - 1); @@ -775,11 +771,7 @@ nvc0_push_upload_vertex_ids(struct push_context *ctx, PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size); PUSH_DATAh(push, va); PUSH_DATA (push, va); - - if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2); - else - BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(1)), 2); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2); PUSH_DATAh(push, va + info->count * index_size - 1); PUSH_DATA (push, va + info->count * index_size - 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index ebbc410..146eeb3 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -27,18 +27,11 @@ #include "codegen/nv50_ir_driver.h" -#include "drf.h" -#include "qmd.h" -#include "cla0c0qmd.h" -#include "clc0c0qmd.h" -#include "clc3c0qmd.h" - -#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a) -#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a) -#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a) -#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a) -#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a) -#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a) +#ifndef NDEBUG +static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *); +static void gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *); +#endif + int nve4_screen_compute_setup(struct nvc0_screen *screen, @@ -52,12 +45,6 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, uint64_t address; switch (dev->chipset & ~0xf) { - case 0x160: - obj_class = TU102_COMPUTE_CLASS; - break; - case 0x140: - obj_class = GV100_COMPUTE_CLASS; - break; case 0x100: case 0xf0: obj_class = NVF0_COMPUTE_CLASS; /* GK110 */ @@ -101,35 +88,24 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, PUSH_DATAh(push, screen->tls->size / screen->mp_count); PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); PUSH_DATA (push, 0xff); - if (obj_class < GV100_COMPUTE_CLASS) { - BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3); - PUSH_DATAh(push, screen->tls->size / screen->mp_count); - PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); - PUSH_DATA (push, 0xff); - } + BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3); + PUSH_DATAh(push, screen->tls->size / screen->mp_count); + PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); + PUSH_DATA (push, 0xff); /* Unified address space ? Who needs that ? Certainly not OpenCL. * * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be * accessible. We cannot prevent that at the moment, so expect failure. */ - if (obj_class < GV100_COMPUTE_CLASS) { - BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1); - PUSH_DATA (push, 0xff << 24); - BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1); - PUSH_DATA (push, 0xfe << 24); - - BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->text->offset); - PUSH_DATA (push, screen->text->offset); - } else { - BEGIN_NVC0(push, SUBC_CP(0x2a0), 2); - PUSH_DATAh(push, 0xfeULL << 24); - PUSH_DATA (push, 0xfeULL << 24); - BEGIN_NVC0(push, SUBC_CP(0x7b0), 2); - PUSH_DATAh(push, 0xffULL << 24); - PUSH_DATA (push, 0xffULL << 24); - } + BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1); + PUSH_DATA (push, 0xff << 24); + BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1); + PUSH_DATA (push, 0xfe << 24); + + BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); BEGIN_NVC0(push, SUBC_CP(0x0310), 1); PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300); @@ -566,35 +542,14 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); } -static inline void -gp100_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, - struct nouveau_bo *bo, uint32_t base, uint32_t size) -{ - uint64_t address = bo->offset + base; - - assert(index < 8); - assert(!(base & 0xff)); - - NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address); - NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32); - NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index, - DIV_ROUND_UP(size, 16)); - NVC0C0_QMDV02_01_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE); -} - -static inline void -nve4_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, struct nouveau_bo *bo, - uint32_t base, uint32_t size) +static inline uint8_t +nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size) { - uint64_t address = bo->offset + base; - - assert(index < 8); - assert(!(base & 0xff)); - - NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address); - NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32); - NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_SIZE, index, size); - NVA0C0_QMDV00_06_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE); + if (shared_size > (32 << 10)) + return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1; + if (shared_size > (16 << 10)) + return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1; + return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1; } static void @@ -622,186 +577,92 @@ nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc) } static void -nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd, +nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, + struct nve4_cp_launch_desc *desc, const struct pipe_grid_info *info) { const struct nvc0_screen *screen = nvc0->screen; const struct nvc0_program *cp = nvc0->compprog; - NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, TRUE); - NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, TRUE); - NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_DATA_CACHE, TRUE); - NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_DATA_CACHE, TRUE); - NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, TRUE); - NVA0C0_QMDV00_06_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR); - NVA0C0_QMDV00_06_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR); - NVA0C0_QMDV00_06_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK); - NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30); - - NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, - nvc0_program_symbol_offset(cp, info->pc)); - - NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]); - NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]); - NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]); - NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]); - NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]); - NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]); - - NVA0C0_QMDV00_06_VAL_SET(qmd, SHARED_MEMORY_SIZE, - align(cp->cp.smem_size, 0x100)); - NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, - (cp->hdr[1] & 0xfffff0) + - align(cp->cp.lmem_size, 0x10)); - NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); - NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800); - - if (cp->cp.smem_size > (32 << 10)) - NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, - DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB); - else - if (cp->cp.smem_size > (16 << 10)) - NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, - DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB); - else - NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, - DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB); + nve4_cp_launch_desc_init_default(desc); - NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs); - NVA0C0_QMDV00_06_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers); + desc->entry = nvc0_program_symbol_offset(cp, info->pc); + + desc->griddim_x = info->grid[0]; + desc->griddim_y = info->grid[1]; + desc->griddim_z = info->grid[2]; + desc->blockdim_x = info->block[0]; + desc->blockdim_y = info->block[1]; + desc->blockdim_z = info->block[2]; + + desc->shared_size = align(cp->cp.smem_size, 0x100); + desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10); + desc->local_size_n = 0; + desc->cstack_size = 0x800; + desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size); + + desc->gpr_alloc = cp->num_gprs; + desc->bar_alloc = cp->num_barriers; // Only bind user uniforms and the driver constant buffer through the // launch descriptor because UBOs are sticked to the driver cb to avoid the // limitation of 8 CBs. if (nvc0->constbuf[5][0].user || cp->parm_size) { - nve4_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo, + nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo, NVC0_CB_USR_INFO(5), 1 << 16); // Later logic will attempt to bind a real buffer at position 0. That // should not happen if we've bound a user buffer. assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); } - nve4_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo, + nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, NVC0_CB_AUX_INFO(5), 1 << 11); - nve4_compute_setup_buf_cb(nvc0, false, qmd); + nve4_compute_setup_buf_cb(nvc0, false, desc); } static void -gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd, +gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, + struct gp100_cp_launch_desc *desc, const struct pipe_grid_info *info) { const struct nvc0_screen *screen = nvc0->screen; const struct nvc0_program *cp = nvc0->compprog; - NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); - NVC0C0_QMDV02_01_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR); - NVC0C0_QMDV02_01_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR); - NVC0C0_QMDV02_01_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK); - - NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, - nvc0_program_symbol_offset(cp, info->pc)); - - NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]); - NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]); - NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]); - NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]); - NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]); - NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]); - - NVC0C0_QMDV02_01_VAL_SET(qmd, SHARED_MEMORY_SIZE, - align(cp->cp.smem_size, 0x100)); - NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, - (cp->hdr[1] & 0xfffff0) + - align(cp->cp.lmem_size, 0x10)); - NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); - NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800); + gp100_cp_launch_desc_init_default(desc); - NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs); - NVC0C0_QMDV02_01_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers); + desc->entry = nvc0_program_symbol_offset(cp, info->pc); - // Only bind user uniforms and the driver constant buffer through the - // launch descriptor because UBOs are sticked to the driver cb to avoid the - // limitation of 8 CBs. - if (nvc0->constbuf[5][0].user || cp->parm_size) { - gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo, - NVC0_CB_USR_INFO(5), 1 << 16); + desc->griddim_x = info->grid[0]; + desc->griddim_y = info->grid[1]; + desc->griddim_z = info->grid[2]; + desc->blockdim_x = info->block[0]; + desc->blockdim_y = info->block[1]; + desc->blockdim_z = info->block[2]; - // Later logic will attempt to bind a real buffer at position 0. That - // should not happen if we've bound a user buffer. - assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); - } - gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo, - NVC0_CB_AUX_INFO(5), 1 << 11); - - nve4_compute_setup_buf_cb(nvc0, true, qmd); -} - -static int -gv100_sm_config_smem_size(u32 size) -{ - if (size > 64 * 1024) size = 96 * 1024; - else if (size > 32 * 1024) size = 64 * 1024; - else if (size > 16 * 1024) size = 32 * 1024; - else if (size > 8 * 1024) size = 16 * 1024; - else size = 8 * 1024; - return (size / 4096) + 1; -} + desc->shared_size = align(cp->cp.smem_size, 0x100); + desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10); + desc->local_size_n = 0; + desc->cstack_size = 0x800; -static void -gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd, - const struct pipe_grid_info *info) -{ - struct nvc0_program *cp = nvc0->compprog; - struct nvc0_screen *screen = nvc0->screen; - uint64_t entry = - screen->text->offset + nvc0_program_symbol_offset(cp, info->pc); - - NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); - NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK); - NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, INDEPENDENTLY); - NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE, - align(cp->cp.smem_size, 0x100)); - NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, - (cp->hdr[1] & 0xfffff0) + - align(cp->cp.lmem_size, 0x10)); - NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); - NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, - gv100_sm_config_smem_size(8 * 1024)); - NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, - gv100_sm_config_smem_size(96 * 1024)); - NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2); - NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2); - NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, - gv100_sm_config_smem_size(cp->cp.smem_size)); - - NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]); - NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]); - NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]); - NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]); - NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]); - NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]); - NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, cp->num_gprs); - NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers); + desc->gpr_alloc = cp->num_gprs; + desc->bar_alloc = cp->num_barriers; // Only bind user uniforms and the driver constant buffer through the // launch descriptor because UBOs are sticked to the driver cb to avoid the // limitation of 8 CBs. if (nvc0->constbuf[5][0].user || cp->parm_size) { - gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo, + gp100_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo, NVC0_CB_USR_INFO(5), 1 << 16); // Later logic will attempt to bind a real buffer at position 0. That // should not happen if we've bound a user buffer. assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); } - gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo, + gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, NVC0_CB_AUX_INFO(5), 1 << 11); - nve4_compute_setup_buf_cb(nvc0, true, qmd); - - NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, entry & 0xffffffff); - NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, entry >> 32); + nve4_compute_setup_buf_cb(nvc0, true, desc); } static inline void * @@ -816,7 +677,6 @@ nve4_compute_alloc_launch_desc(struct nouveau_context *nv, ptr += adj; *pgpuaddr += adj; } - memset(ptr, 0x00, 256); return ptr; } @@ -874,9 +734,6 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) if (ret) goto out; - if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS) - gv100_compute_setup_launch_desc(nvc0, desc, info); - else if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) gp100_compute_setup_launch_desc(nvc0, desc, info); else @@ -886,14 +743,10 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) #ifndef NDEBUG if (debug_get_num_option("NV50_PROG_DEBUG", 0)) { - debug_printf("Queue Meta Data:\n"); - if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS) - NVC3C0QmdDump_V02_02(desc); - else if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) - NVC0C0QmdDump_V02_01(desc); + gp100_compute_dump_launch_desc(desc); else - NVA0C0QmdDump_V00_06(desc); + nve4_compute_dump_launch_desc(desc); } #endif @@ -1024,6 +877,115 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0) nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; } + +#ifndef NDEBUG +static const char *nve4_cache_split_name(unsigned value) +{ + switch (value) { + case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1"; + case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1"; + case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1"; + default: + return "(invalid)"; + } +} + +static void +nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc) +{ + const uint32_t *data = (const uint32_t *)desc; + unsigned i; + bool zero = false; + + debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n"); + + for (i = 0; i < sizeof(*desc); i += 4) { + if (data[i / 4]) { + debug_printf("[%x]: 0x%08x\n", i, data[i / 4]); + zero = false; + } else + if (!zero) { + debug_printf("...\n"); + zero = true; + } + } + + debug_printf("entry = 0x%x\n", desc->entry); + debug_printf("grid dimensions = %ux%ux%u\n", + desc->griddim_x, desc->griddim_y, desc->griddim_z); + debug_printf("block dimensions = %ux%ux%u\n", + desc->blockdim_x, desc->blockdim_y, desc->blockdim_z); + debug_printf("s[] size: 0x%x\n", desc->shared_size); + debug_printf("l[] size: -0x%x / +0x%x\n", + desc->local_size_n, desc->local_size_p); + debug_printf("stack size: 0x%x\n", desc->cstack_size); + debug_printf("barrier count: %u\n", desc->bar_alloc); + debug_printf("$r count: %u\n", desc->gpr_alloc); + debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split)); + debug_printf("linked tsc: %d\n", desc->linked_tsc); + + for (i = 0; i < 8; ++i) { + uint64_t address; + uint32_t size = desc->cb[i].size; + bool valid = !!(desc->cb_mask & (1 << i)); + + address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l; + + if (!valid && !address && !size) + continue; + debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n", + i, address, size, valid ? "" : " (invalid)"); + } +} + +static void +gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *desc) +{ + const uint32_t *data = (const uint32_t *)desc; + unsigned i; + bool zero = false; + + debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n"); + + for (i = 0; i < sizeof(*desc); i += 4) { + if (data[i / 4]) { + debug_printf("[%x]: 0x%08x\n", i, data[i / 4]); + zero = false; + } else + if (!zero) { + debug_printf("...\n"); + zero = true; + } + } + + debug_printf("entry = 0x%x\n", desc->entry); + debug_printf("grid dimensions = %ux%ux%u\n", + desc->griddim_x, desc->griddim_y, desc->griddim_z); + debug_printf("block dimensions = %ux%ux%u\n", + desc->blockdim_x, desc->blockdim_y, desc->blockdim_z); + debug_printf("s[] size: 0x%x\n", desc->shared_size); + debug_printf("l[] size: -0x%x / +0x%x\n", + desc->local_size_n, desc->local_size_p); + debug_printf("stack size: 0x%x\n", desc->cstack_size); + debug_printf("barrier count: %u\n", desc->bar_alloc); + debug_printf("$r count: %u\n", desc->gpr_alloc); + debug_printf("linked tsc: %d\n", desc->linked_tsc); + + for (i = 0; i < 8; ++i) { + uint64_t address; + uint32_t size = desc->cb[i].size_sh4 << 4; + bool valid = !!(desc->cb_mask & (1 << i)); + + address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l; + + if (!valid && !address && !size) + continue; + debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n", + i, address, size, valid ? "" : " (invalid)"); + } +} +#endif + #ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER static void nve4_compute_trap_info(struct nvc0_context *nvc0) diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h index d2599f7..7ff6935 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h @@ -4,6 +4,142 @@ #include "nvc0/nve4_compute.xml.h" +struct nve4_cp_launch_desc +{ + u32 unk0[8]; + u32 entry; + u32 unk9[2]; + u32 unk11_0 : 30; + u32 linked_tsc : 1; + u32 unk11_31 : 1; + u32 griddim_x : 31; + u32 unk12 : 1; + u16 griddim_y; + u16 griddim_z; + u32 unk14[3]; + u16 shared_size; /* must be aligned to 0x100 */ + u16 unk17; + u16 unk18; + u16 blockdim_x; + u16 blockdim_y; + u16 blockdim_z; + u32 cb_mask : 8; + u32 unk20_8 : 21; + u32 cache_split : 2; + u32 unk20_31 : 1; + u32 unk21[8]; + struct { + u32 address_l; + u32 address_h : 8; + u32 reserved : 7; + u32 size : 17; + } cb[8]; + u32 local_size_p : 20; + u32 unk45_20 : 7; + u32 bar_alloc : 5; + u32 local_size_n : 20; + u32 unk46_20 : 4; + u32 gpr_alloc : 8; + u32 cstack_size : 20; + u32 unk47_20 : 12; + u32 unk48[16]; +}; + +struct gp100_cp_launch_desc +{ + u32 unk0[8]; + u32 entry; + u32 unk9[2]; + u32 unk11_0 : 30; + u32 linked_tsc : 1; + u32 unk11_31 : 1; + u32 griddim_x : 31; + u32 unk12 : 1; + u16 griddim_y; + u16 unk13; + u16 griddim_z; + u16 unk14; + u32 unk15[2]; + u32 shared_size : 18; + u32 unk17 : 14; + u16 unk18; + u16 blockdim_x; + u16 blockdim_y; + u16 blockdim_z; + u32 cb_mask : 8; + u32 unk20 : 24; + u32 unk21[8]; + u32 local_size_p : 24; + u32 unk29 : 3; + u32 bar_alloc : 5; + u32 local_size_n : 24; + u32 gpr_alloc : 8; + u32 cstack_size : 24; + u32 unk31 : 8; + struct { + u32 address_l; + u32 address_h : 17; + u32 reserved : 2; + u32 size_sh4 : 13; + } cb[8]; + u32 unk48[16]; +}; + +static inline void +nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc) +{ + memset(desc, 0, sizeof(*desc)); + + desc->unk0[7] = 0xbc000000; + desc->unk11_0 = 0x04014000; + desc->unk47_20 = 0x300; +} + +static inline void +nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc, + unsigned index, + struct nouveau_bo *bo, + uint32_t base, uint32_t size) +{ + uint64_t address = bo->offset + base; + + assert(index < 8); + assert(!(base & 0xff)); + + desc->cb[index].address_l = address; + desc->cb[index].address_h = address >> 32; + desc->cb[index].size = size; + + desc->cb_mask |= 1 << index; +} + +static inline void +gp100_cp_launch_desc_init_default(struct gp100_cp_launch_desc *desc) +{ + memset(desc, 0, sizeof(*desc)); + + desc->unk0[4] = 0x40; + desc->unk11_0 = 0x04014000; +} + +static inline void +gp100_cp_launch_desc_set_cb(struct gp100_cp_launch_desc *desc, + unsigned index, + struct nouveau_bo *bo, + uint32_t base, uint32_t size) +{ + uint64_t address = bo->offset + base; + + assert(index < 8); + assert(!(base & 0xff)); + + desc->cb[index].address_l = address; + desc->cb[index].address_h = address >> 32; + desc->cb[index].size_sh4 = DIV_ROUND_UP(size, 16); + + desc->cb_mask |= 1 << index; +} + struct nve4_mp_trap_info { u32 lock; u32 pc; diff --git a/src/gallium/drivers/nouveau/nvc0/qmd.h b/src/gallium/drivers/nouveau/nvc0/qmd.h deleted file mode 100644 index 86c290f..0000000 --- a/src/gallium/drivers/nouveau/nvc0/qmd.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef __NVHW_QMD_H__ -#define __NVHW_QMD_H__ -#include -#include -#include "util/u_debug.h" -#include "drf.h" - -#define NVQMD_ENUM_1(X,drf,v0) \ - [drf##_##v0] = #v0 -#define NVQMD_ENUM_2(X,drf,v0,v1) \ - [drf##_##v0] = #v0, \ - [drf##_##v1] = #v1 -#define NVQMD_ENUM_3(X,drf,v0,v1,v2) \ - [drf##_##v0] = #v0, \ - [drf##_##v1] = #v1, \ - [drf##_##v2] = #v2 -#define NVQMD_ENUM_8(X,drf,v0,v1,v2,v3,v4,v5,v6,v7) \ - [drf##_##v0] = #v0, \ - [drf##_##v1] = #v1, \ - [drf##_##v2] = #v2, \ - [drf##_##v3] = #v3, \ - [drf##_##v4] = #v4, \ - [drf##_##v5] = #v5, \ - [drf##_##v6] = #v6, \ - [drf##_##v7] = #v7 - -#define NVQMD_ENUM_(X,_1,_2,_3,_4,_5,_6,_7,_8,_9,IMPL,...) IMPL -#define NVQMD_ENUM(A...) NVQMD_ENUM_(X, ##A, NVQMD_ENUM_8, NVQMD_ENUM_7, \ - NVQMD_ENUM_6, NVQMD_ENUM_5, \ - NVQMD_ENUM_4, NVQMD_ENUM_3, \ - NVQMD_ENUM_2, NVQMD_ENUM_1)(X, ##A) - -#define NVQMD_VAL_N(X,d,r,p,f,o) do { \ - uint32_t val = NVVAL_MW_GET_X((p), d##_##r##_##f); \ - debug_printf(" %-36s: "o"\n", #f, val); \ -} while(0) -#define NVQMD_VAL_I(X,d,r,p,f,i,o) do { \ - uint32_t val = NVVAL_MW_GET_X((p), d##_##r##_##f(i)); \ - char name[80]; \ - snprintf(name, sizeof(name), "%s(%d)", #f, i); \ - debug_printf(" %-36s: "o"\n", name, val); \ -} while(0) -#define NVQMD_VAL_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL -#define NVQMD_VAL(A...) NVQMD_VAL_(X, ##A, NVQMD_VAL_I, NVQMD_VAL_N)(X, ##A) - -#define NVQMD_DEF(d,r,p,f,e...) do { \ - static const char *ev[] = { NVQMD_ENUM(d##_##r##_##f,##e) }; \ - uint32_t val = NVVAL_MW_GET((p), d, r, f); \ - if (val < ARRAY_SIZE(ev) && ev[val]) \ - debug_printf(" %-36s: %s\n", #f, ev[val]); \ - else \ - debug_printf(" %-36s: UNKNOWN 0x%x\n", #f, val); \ -} while(0) -#define NVQMD_IDX(d,r,p,f,i,e...) do { \ - static const char *ev[] = { NVQMD_ENUM(d##_##r##_##f,##e) }; \ - char name[80]; \ - snprintf(name, sizeof(name), "%s(%d)", #f, i); \ - uint32_t val = NVVAL_MW_GET((p), d, r, f, i); \ - if (val < ARRAY_SIZE(ev) && ev[val]) \ - debug_printf(" %-36s: %s\n", name, ev[val]); \ - else \ - debug_printf(" %-36s: UNKNOWN 0x%x\n", name, val); \ -} while(0) - -void NVA0C0QmdDump_V00_06(uint32_t *); -void NVC0C0QmdDump_V02_01(uint32_t *); -void NVC3C0QmdDump_V02_02(uint32_t *); -#endif diff --git a/src/gallium/drivers/nouveau/nvc0/qmda0c0.c b/src/gallium/drivers/nouveau/nvc0/qmda0c0.c deleted file mode 100644 index 7103a89..0000000 --- a/src/gallium/drivers/nouveau/nvc0/qmda0c0.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright 2020 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#include "qmd.h" -#include "cla0c0qmd.h" - -#define NVA0C0_QMDV00_06_VAL(a...) NVQMD_VAL(NVA0C0, QMDV00_06, ##a) -#define NVA0C0_QMDV00_06_DEF(a...) NVQMD_DEF(NVA0C0, QMDV00_06, ##a) -#define NVA0C0_QMDV00_06_IDX(a...) NVQMD_IDX(NVA0C0, QMDV00_06, ##a) - -void -NVA0C0QmdDump_V00_06(uint32_t *qmd) -{ - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_A, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_B, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_C, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_D, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_E, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_F, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_G, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_H, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_A_A, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_I, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_J, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_A, "0x%x"); - NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_K, FALSE, TRUE); - NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_L, FALSE, TRUE); - NVA0C0_QMDV00_06_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE); - NVA0C0_QMDV00_06_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_B, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_M, "0x%x"); - NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_N, FALSE, TRUE); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_O, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_C, "0x%x"); - NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE); - NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE); - NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE); - NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE); - NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE); - NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE); - NVA0C0_QMDV00_06_VAL(qmd, PROGRAM_OFFSET, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_P, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_Q, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_D, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_R, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_S, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E, "0x%x"); - NVA0C0_QMDV00_06_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR); - NVA0C0_QMDV00_06_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR); - NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_T, FALSE, TRUE); - NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_U, FALSE, TRUE); - NVA0C0_QMDV00_06_DEF(qmd, THROTTLED, FALSE, TRUE); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E2_A, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E2_B, "0x%x"); - NVA0C0_QMDV00_06_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK); - NVA0C0_QMDV00_06_DEF(qmd, SHARED_MEMORY_BANK_MAPPING, FOUR_BYTES_PER_BANK, - EIGHT_BYTES_PER_BANK); - NVA0C0_QMDV00_06_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E3_A, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_WIDTH, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_DEPTH, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_V, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_F, "0x%x"); - NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_W, FALSE, TRUE); - NVA0C0_QMDV00_06_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_G, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_VERSION, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_MAJOR_VERSION, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_H, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x"); - for (int i = 0; i < 8; i++) - NVA0C0_QMDV00_06_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_I, "0x%x"); - NVA0C0_QMDV00_06_DEF(qmd, L1_CONFIGURATION, - DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB, - DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB, - DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_X, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_Y, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_J, "0x%x"); - NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD, - RED_MIN, - RED_MAX, - RED_INC, - RED_DEC, - RED_AND, - RED_OR, - RED_XOR); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_K, "0x%x"); - NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); - NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE); - NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); - NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_PAYLOAD, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_L, "0x%x"); - NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD, - RED_MIN, - RED_MAX, - RED_INC, - RED_DEC, - RED_AND, - RED_OR, - RED_XOR); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_M, "0x%x"); - NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); - NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE); - NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); - NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_PAYLOAD, "0x%x"); - for (int i = 0; i < 8; i++) { - NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x"); - NVA0C0_QMDV00_06_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE); - NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_SIZE, i, "0x%x"); - } - NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_N, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, BARRIER_COUNT, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, REGISTER_COUNT, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, SASS_VERSION, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_A, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_B, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_C, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_D, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_E, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_F, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_G, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_H, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_I, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_J, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_K, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_L, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_M, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_N, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, DEBUG_ID_UPPER, "0x%x"); - NVA0C0_QMDV00_06_VAL(qmd, DEBUG_ID_LOWER, "0x%x"); -} diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c deleted file mode 100644 index 945439e..0000000 --- a/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright 2020 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#include "qmd.h" -#include "clc0c0qmd.h" - -#define NVC0C0_QMDV02_01_VAL(a...) NVQMD_VAL(NVC0C0, QMDV02_01, ##a) -#define NVC0C0_QMDV02_01_DEF(a...) NVQMD_DEF(NVC0C0, QMDV02_01, ##a) -#define NVC0C0_QMDV02_01_IDX(a...) NVQMD_IDX(NVC0C0, QMDV02_01, ##a) - -void -NVC0C0QmdDump_V02_01(uint32_t *qmd) -{ - NVC0C0_QMDV02_01_VAL(qmd, OUTER_PUT, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, OUTER_OVERFLOW, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, OUTER_GET, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, INNER_GET, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, INNER_OVERFLOW, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, INNER_PUT, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_GROUP_ID, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x"); - NVC0C0_QMDV02_01_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, IS_QUEUE, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID); - NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_B, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_C, "0x%x"); - NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE); - NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, PROGRAM_OFFSET, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_D, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x"); - NVC0C0_QMDV02_01_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR); - NVC0C0_QMDV02_01_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR); - NVC0C0_QMDV02_01_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, THROTTLED, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK); - NVC0C0_QMDV02_01_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX); - NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_WIDTH, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED13A, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_DEPTH, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED14A, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_G, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_VERSION, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_MAJOR_VERSION, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_H, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x"); - for (int i = 0; i < 8; i++) - NVC0C0_QMDV02_01_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_I, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_J, "0x%x"); - NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD, - RED_MIN, - RED_MAX, - RED_INC, - RED_DEC, - RED_AND, - RED_OR, - RED_XOR); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_K, "0x%x"); - NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); - NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); - NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_PAYLOAD, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_L, "0x%x"); - NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD, - RED_MIN, - RED_MAX, - RED_INC, - RED_DEC, - RED_AND, - RED_OR, - RED_XOR); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_M, "0x%x"); - NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); - NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE); - NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); - NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_PAYLOAD, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_N, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, BARRIER_COUNT, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, REGISTER_COUNT, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, SASS_VERSION, "0x%x"); - for (int i = 0; i < 8; i++) { - NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x"); - NVC0C0_QMDV02_01_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE); - NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x"); - } - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_R, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_S, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_INNER_GET, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_Q, "0x%x"); - NVC0C0_QMDV02_01_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE); - NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_G, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_H, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_I, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_J, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_K, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_L, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_M, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_N, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, DEBUG_ID_UPPER, "0x%x"); - NVC0C0_QMDV02_01_VAL(qmd, DEBUG_ID_LOWER, "0x%x"); -} diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c deleted file mode 100644 index c9bd896..0000000 --- a/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright 2020 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#include "qmd.h" -#include "clc3c0qmd.h" - -#define NVC3C0_QMDV02_02_VAL(a...) NVQMD_VAL(NVC3C0, QMDV02_02, ##a) -#define NVC3C0_QMDV02_02_DEF(a...) NVQMD_DEF(NVC3C0, QMDV02_02, ##a) -#define NVC3C0_QMDV02_02_IDX(a...) NVQMD_IDX(NVC3C0, QMDV02_02, ##a) - -void -NVC3C0QmdDump_V02_02(uint32_t *qmd) -{ - NVC3C0_QMDV02_02_VAL(qmd, OUTER_PUT, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, OUTER_OVERFLOW, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, OUTER_GET, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, INNER_GET, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, INNER_OVERFLOW, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, INNER_PUT, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_GROUP_ID, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x"); - NVC3C0_QMDV02_02_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, IS_QUEUE, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID); - NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_B, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_C, "0x%x"); - NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE); - NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_OFFSET, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_D, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x"); - NVC3C0_QMDV02_02_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR); - NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR); - NVC3C0_QMDV02_02_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK); - NVC3C0_QMDV02_02_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX); - NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED13A, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED14A, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_VERSION, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_MAJOR_VERSION, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_H, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x"); - for (int i = 0; i < 8; i++) - NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE); - NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT_V, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, FREE_CTA_SLOTS_EMPTY_SM, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_J, "0x%x"); - NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD, - RED_MIN, - RED_MAX, - RED_INC, - RED_DEC, - RED_AND, - RED_OR, - RED_XOR); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_K, "0x%x"); - NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); - NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); - NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_PAYLOAD, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_L, "0x%x"); - NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD, - RED_MIN, - RED_MAX, - RED_INC, - RED_DEC, - RED_AND, - RED_OR, - RED_XOR); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_M, "0x%x"); - NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); - NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE); - NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); - NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_PAYLOAD, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_N, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, BARRIER_COUNT, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, SASS_VERSION, "0x%x"); - for (int i = 0; i < 8; i++) { - NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x"); - NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE); - NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x"); - } - NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_LOWER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_UPPER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_S, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_GET, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_Q, "0x%x"); - NVC3C0_QMDV02_02_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE); - NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_G, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_H, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_I, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_J, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_K, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_L, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_M, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_N, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_UPPER, "0x%x"); - NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_LOWER, "0x%x"); -} diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c index d123c8a..5c43518 100644 --- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c +++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c @@ -104,8 +104,6 @@ nouveau_drm_screen_create(int fd) case 0x110: case 0x120: case 0x130: - case 0x140: - case 0x160: init = nvc0_screen_create; break; default: