1#ifndef FREE_TENSOR_MAKE_PARLLEL_REDUCTION_H
2#define FREE_TENSOR_MAKE_PARLLEL_REDUCTION_H
5#include <unordered_map>
6#include <unordered_set>
26 std::unordered_map<ID, ParallelInfo> results_;
28 std::vector<ID> loopStack_;
31 const std::unordered_map<ID, ParallelInfo> &
results()
const {
40 std::unordered_map<ID, std::vector<For>>
42 std::vector<For> loopStack_;
45 const std::unordered_map<ID, std::vector<For>> &
results()
const {
64 struct ReductionItemFactors {
67 std::vector<std::vector<Ref<CompUniqueBounds::Bound>>>
72 const std::unordered_map<ID, std::unordered_set<ID>>
79 std::unordered_set<ID> toUseSync_;
81 std::unordered_map<ID, ParallelScope> paraScopes_;
82 std::unordered_map<ID, std::vector<ReductionItemFactors>> forReductions_;
83 std::unordered_map<ID, std::unordered_set<std::string>>
86 std::vector<ID> paraLoopStack_;
89 bool needSync(
const ReduceTo &op,
const ID &loopId);
93 const std::unordered_map<
ID, std::unordered_set<ID>> &toAlter,
95 : toAlter_(toAlter), variantMap_(variantMap) {}
97 const auto &
toUseSync()
const {
return toUseSync_; }
112 const std::unordered_set<ID> &toUseSync_;
113 const std::unordered_map<ID, std::vector<For>>
117#if defined(__GNUC__) && !defined(__clang__)
124 struct SyncCacheInfo {
126 std::vector<Expr> newShape_, newTargetIndices_;
127 std::vector<bool> preserveDim_;
129 std::unordered_map<
ID,
130 std::vector<SyncCacheInfo>>
133 int64_t gpuThreadDim_ = 1;
164 bool canResideInGPULocal(
DataType dtype,
165 const std::vector<Expr> &shape)
const;
168 const std::vector<Expr> &shape)
const;
172 const std::unordered_set<ID> &toUseSync,
173 const std::unordered_map<
ID, std::vector<For>> &serialOverRed,
175 : toUseSync_(toUseSync), serialOverRed_(serialOverRed),
176 variantMap_(variantMap), target_(target) {}
Definition: comp_transient_bounds.h:50
BaseClass::StmtRetType visit(const For &op) override
Definition: comp_transient_bounds.h:128
Definition: data_type.h:106
Definition: make_parallel_reduction.h:24
const std::unordered_map< ID, ParallelInfo > & results() const
Definition: make_parallel_reduction.h:31
void visit(const For &op) override
Definition: make_parallel_reduction.cc:29
Definition: make_parallel_reduction.h:39
const std::unordered_map< ID, std::vector< For > > & results() const
Definition: make_parallel_reduction.h:45
void visit(const For &op) override
Definition: make_parallel_reduction.cc:39
Definition: make_parallel_reduction.h:61
const auto & toUseSync() const
Definition: make_parallel_reduction.h:97
MakeLoopCarriedReduction(const std::unordered_map< ID, std::unordered_set< ID > > &toAlter, const LoopVariExprMap &variantMap)
Definition: make_parallel_reduction.h:92
Stmt visit(const ReduceTo &op) override
Definition: make_parallel_reduction.cc:71
Definition: make_parallel_reduction.h:109
Stmt visit(const ReduceTo &op) override
Definition: make_parallel_reduction.cc:229
MakeSyncReduction(const std::unordered_set< ID > &toUseSync, const std::unordered_map< ID, std::vector< For > > &serialOverRed, const LoopVariExprMap &variantMap, const Ref< Target > &target)
Definition: make_parallel_reduction.h:171
Definition: symbol_table.h:122
BaseClass::StmtRetType visit(const VarDef &op) override
Definition: symbol_table.h:167
#define DEFINE_PASS_FOR_FUNC(pass)
Definition: func.h:88
Definition: allocator.h:9
std::unordered_map< StmtOrExprID, std::unordered_map< ID, LoopVariability > > LoopVariExprMap
Definition: find_loop_variance.h:26
std::variant< SerialScope, OpenMPScope, CUDAStreamScope, CUDAScope > ParallelScope
Definition: parallel_scope.h:73
Ref< StmtNode > Stmt
Definition: ast.h:152
ReduceOp
Definition: reduce_op.h:30
Stmt makeParallelReduction(const Stmt &op, const Ref< Target > &target)
Definition: make_parallel_reduction.cc:376
MemType
Definition: mem_type.h:14
Definition: make_parallel_reduction.h:19
std::vector< ID > outerLoops_
Definition: make_parallel_reduction.h:21
ParallelScope type_
Definition: make_parallel_reduction.h:20