diff --git a/lib/Dialect/TritonNvidiaGPU/Transforms/OptimizeTMemLayouts.cpp b/lib/Dialect/TritonNvidiaGPU/Transforms/OptimizeTMemLayouts.cpp index 449a3ad3f5d8..2131daa8ce0e 100644 --- a/lib/Dialect/TritonNvidiaGPU/Transforms/OptimizeTMemLayouts.cpp +++ b/lib/Dialect/TritonNvidiaGPU/Transforms/OptimizeTMemLayouts.cpp @@ -363,8 +363,11 @@ class TMemToSharedMemPattern : public OpRewritePattern { SmallVector> uses; uses.push_back({tmemLoadOp.getResult(), newEncoding}); bool foundImprovedStore = false; + llvm::DenseSet> visited; while (!uses.empty()) { auto [v, encoding] = uses.pop_back_val(); + if (!visited.insert({v, encoding}).second) + continue; for (auto user : v.getUsers()) { if (auto localStore = dyn_cast(user)) { // Check if the store benefits from the new layout.