doxygen/html/lower_8h_source.html

#ifndef FREE_TENSOR_LOWER_H

#define FREE_TENSOR_LOWER_H


#include <unordered_set>


#include <autograd/clear_mark_version.h>

#include <config.h>

#include <driver/target.h>

#include <pass/cpu/lower_parallel_reduction.h>

#include <pass/float_simplify.h>

#include <pass/gpu/lower_parallel_reduction.h>

#include <pass/gpu/lower_vector.h>

#include <pass/gpu/make_sync.h>

#include <pass/gpu/multiplex_buffers.h>

#include <pass/gpu/normalize_threads.h>

#include <pass/gpu/normalize_var_in_kernel.h>

#include <pass/gpu/simplex_buffers.h>

#include <pass/make_heap_alloc.h>

#include <pass/make_parallel_reduction.h>

#include <pass/make_reduction.h>

#include <pass/merge_and_hoist_if.h>

#include <pass/move_out_first_or_last_iter.h>

#include <pass/prop_one_time_use.h>

#include <pass/remove_cyclic_assign.h>

#include <pass/remove_dead_var.h>

#include <pass/remove_writes.h>

#include <pass/scalar_prop_const.h>

#include <pass/shrink_for.h>

#include <pass/shrink_var.h>

#include <pass/simplify.h>

#include <pass/sink_var.h>

#include <pass/tensor_prop_const.h>

#include <pass/use_builtin_div.h>

#include <pass/z3_simplify.h>


namespace freetensor {


template <class T>

T lower(const T &_ast, const Ref<Target> &_target = nullptr,

        const std::unordered_set<std::string> &skipPasses = {},

        int verbose = 0) {


    auto target = _target.isValid() ? _target : Config::defaultTarget();


    auto maybePrint = [&](const std::string &name, const T &ast) -> T {

        if (verbose >= 2) {

            logger() << "AST after " << name << " is:" << std::endl

                     << ast << std::endl;

        }

        return ast;

    };


#define FIRST_OF(x, ...) (x)

#define APPLY(name, pass, ...)                                                 \

    skipPasses.count(name) ? FIRST_OF(__VA_ARGS__)                             \

                           : maybePrint(name, pass(__VA_ARGS__))


    // NOTE: The following passes enables each other: some optimizations can be

    // done in pass A only after we do pass B first. Thus the order of the

    // passes matters. If you found some program that cannot be optimized by the

    // current order, add it to `test/20.pass/test_lower.py` and adjust the

    // order.

    //

    // We only focus on programs having a real use, because there is no one

    // order that fits all. A seemingly possible solution is to run all the

    // passes iteratively until convergence, but the passes are slow and it may

    // require a number of iterations proportional to the program size to

    // converge. Such a progam can be

    //

    // ```

    // if (1 == 1) {

    //   a = 1

    // }

    // if (a == 1) {

    //   b = 1

    // }

    // if (b == 1) {

    //   c = 1

    // }

    // ```

    //

    // where it needs `simplify` to remove the `if`s, and `prop_const` to fill

    // the varaible into the `if`s' conditions. We consider it more important to

    // compile a program than to make it optimal, so we are not going to fully

    // optimize it.


    T ast = _ast;

    ast = clearMarkVersion(ast);

    ast = APPLY("make_reduction", makeReduction, ast);

    ast = APPLY("scalar_prop_const", scalarPropConst, ast);

    ast = APPLY("remove_dead_var", removeDeadVar, ast);

    ast = APPLY("simplify", simplify,

                ast); // first time before propagations for indices

    ast = APPLY("remove_writes", removeWrites, ast);

    ast = APPLY("prop_one_time_use", propOneTimeUse, ast);

    ast = APPLY("float_simplify", floatSimplify, ast); // After propOneTimeUse

    ast = APPLY("z3_simplify", z3Simplify, ast);

    ast = APPLY("simplify", simplify,

                ast); // next time after propagations for propagated values

    ast = APPLY("move_out_first_or_last_iter", moveOutFirstOrLastIter, ast);

    ast = APPLY("sink_var", sinkVar, ast);

    ast = APPLY("shrink_var", shrinkVar, ast);

    ast = APPLY("merge_and_hoist_if", mergeAndHoistIf, ast);

    ast = APPLY("tensor_prop_const", tensorPropConst, ast);

    ast = APPLY("remove_dead_var", removeDeadVar,

                ast); // After remove_writes and prop_const

    ast = APPLY("remove_cyclic_assign", removeCyclicAssign,

                ast); // After remove_writes and remove_dead_var

    ast = APPLY("make_parallel_reduction", makeParallelReduction, ast, target);

    ast = APPLY("shrink_for", shrinkFor,

                ast); // After remove_writes and make_parallel_reduction


    switch (target->type()) {

#ifdef FT_WITH_CUDA

    case TargetType::GPU: {

        auto t = target.as<GPUTarget>();

        ast = APPLY("gpu_lower_parallel_reduction", gpu::lowerParallelReduction,

                    ast); // Before gpu_nromalize_threads

        ast = APPLY("gpu_multiplex_buffers", gpu::multiplexBuffers, ast, t);

        ast = APPLY("gpu_simplex_buffers", gpu::simplexBuffers, ast);

        ast = APPLY("gpu_normalize_threads", gpu::normalizeThreads,

                    ast); // After gpu_multiplex_buffers

        ast = APPLY("gpu_normalize_var_in_kernel", gpu::normalizeVarInKernel,

                    ast);

        ast = APPLY("make_heap_alloc", makeHeapAlloc, ast);

        ast = APPLY("gpu_make_sync", gpu::makeSync, ast,

                    t); // After gpu_normalize_threads

        ast = APPLY("gpu_lower_vector", gpu::lowerVector, ast);

        ast = APPLY("use_builtin_div", useBuiltinDiv, ast);

        break;

    }

#endif // FT_WITH_CUDA


    case TargetType::CPU:

        ast = APPLY("cpu_lower_parallel_reduction", cpu::lowerParallelReduction,

                    ast);

        ast = APPLY("make_heap_alloc", makeHeapAlloc, ast);

        ast = APPLY("use_builtin_div", useBuiltinDiv, ast);

        break;


    default:

        ASSERT(false);

    }


#undef FIRST_OF

#undef APPLY


    if (verbose >= 1) {

        logger() << "The lowered AST is:" << std::endl << ast << std::endl;

    }


    return ast;

}


} // namespace freetensor


#endif // FREE_TENSOR_LOWER_H

freetensor::Config::defaultTarget
static Ref< Target > defaultTarget()
Definition: config.h:146

freetensor::Ref
Definition: ref.h:24

clear_mark_version.h

config.h

lower_parallel_reduction.h

ASSERT
#define ASSERT(expr)
Definition: except.h:152

float_simplify.h

lower_parallel_reduction.h

APPLY
#define APPLY(name, pass,...)

lower_vector.h

make_heap_alloc.h

make_parallel_reduction.h

make_reduction.h

make_sync.h

merge_and_hoist_if.h

move_out_first_or_last_iter.h

multiplex_buffers.h

freetensor::cpu::lowerParallelReduction
Stmt lowerParallelReduction(const Stmt &op)
Definition: lower_parallel_reduction.cc:206

freetensor::gpu::normalizeVarInKernel
Stmt normalizeVarInKernel(const Stmt &s)
Definition: normalize_var_in_kernel.cc:121

freetensor
Definition: allocator.h:9

freetensor::TargetType::CPU
@ CPU

freetensor::TargetType::GPU
@ GPU

freetensor::removeDeadVar
Stmt removeDeadVar(const Stmt &op)
Definition: remove_dead_var.cc:124

freetensor::useBuiltinDiv
Stmt useBuiltinDiv(const Stmt &op)
Definition: use_builtin_div.cc:95

freetensor::lower
T lower(const T &_ast, const Ref< Target > &_target=nullptr, const std::unordered_set< std::string > &skipPasses={}, int verbose=0)
Definition: lower.h:53

freetensor::shrinkFor
Stmt shrinkFor(const Stmt &op, const ID &subAST=ID(), bool doSimplify=true, bool unordered=false)
Definition: shrink_for.cc:396

freetensor::simplify
Stmt simplify(const Stmt &op)
Definition: simplify.cc:1036

freetensor::tensorPropConst
Stmt tensorPropConst(const Stmt &op, const ID &bothInSubAST=ID(), const ID &eitherInSubAST=ID())
Definition: tensor_prop_const.cc:25

freetensor::sinkVar
Stmt sinkVar(const Stmt &op, const std::optional< std::unordered_set< ID > > &toSink=std::nullopt, const std::function< bool(const Stmt &)> &scopeFilter=nullptr)
Definition: sink_var.cc:182

freetensor::z3Simplify
Stmt z3Simplify(const Stmt &op)
Definition: z3_simplify.cc:602

freetensor::logger
Logger logger()
Definition: logger.h:60

freetensor::mergeAndHoistIf
Stmt mergeAndHoistIf(const Stmt &op)
Definition: merge_and_hoist_if.cc:104

freetensor::shrinkVar
Stmt shrinkVar(const Stmt &op)
Definition: shrink_var.cc:102

freetensor::makeHeapAlloc
Stmt makeHeapAlloc(const Stmt &op)
Definition: make_heap_alloc.cc:127

freetensor::clearMarkVersion
Stmt clearMarkVersion(const Stmt &op)
Definition: clear_mark_version.h:15

freetensor::removeWrites
Stmt removeWrites(const Stmt &op, const ID &singleDefId={})
Definition: remove_writes.cc:185

freetensor::moveOutFirstOrLastIter
Stmt moveOutFirstOrLastIter(const Stmt &op)
Definition: move_out_first_or_last_iter.h:35

freetensor::scalarPropConst
Stmt scalarPropConst(const Stmt &op)
Definition: scalar_prop_const.cc:241

freetensor::propOneTimeUse
Stmt propOneTimeUse(const Stmt &op, const ID &subAST=ID())
Definition: prop_one_time_use.cc:57

freetensor::floatSimplify
Stmt floatSimplify(const Stmt &op)
Definition: float_simplify.cc:492

freetensor::removeCyclicAssign
Stmt removeCyclicAssign(const Stmt &op)
Definition: remove_cyclic_assign.cc:6

freetensor::makeReduction
Stmt makeReduction(const Stmt &op, const std::unordered_set< ReduceOp > &types, bool canonicalOnly=false)
Definition: make_reduction.h:38

freetensor::makeParallelReduction
Stmt makeParallelReduction(const Stmt &op, const Ref< Target > &target)
Definition: make_parallel_reduction.cc:376

normalize_threads.h

normalize_var_in_kernel.h

prop_one_time_use.h

remove_cyclic_assign.h

remove_dead_var.h

remove_writes.h

scalar_prop_const.h

shrink_for.h

shrink_var.h

simplex_buffers.h

simplify.h

sink_var.h

target.h

tensor_prop_const.h

use_builtin_div.h

z3_simplify.h