Skip to content

Finish #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
.PHONY : build clean format install-python test-cpp test-onnx

TYPE ?= Release


TYPE ?= Debug
TEST ?= ON

CMAKE_OPT = -DCMAKE_BUILD_TYPE=$(TYPE)
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# TinyInfiniTensor



一个简化版的 ai compiler,用于初学者快速上手学习,保留了计算图和 kernel 层的概念,能够基于 c++ 搭建计算图进行推理计算,目前只支持 cpu 平台。

[环境部署文档](docs/项目部署.md)

[训练营作业介绍文档](docs/训练营作业介绍.md)
[训练营作业介绍文档](docs/训练营作业介绍.md)
3 changes: 2 additions & 1 deletion include/core/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ namespace infini {
// TODO:可能需要设计一个数据结构来存储free block,以便于管理和合并
// HINT: 可以使用一个 map 来存储 free block,key 为 block 的起始/结尾地址,value 为 block 的大小
// =================================== 作业 ===================================

std::map<size_t, size_t> freeBlocks;

public:
Allocator(Runtime runtime);

Expand Down
10 changes: 10 additions & 0 deletions report.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<testsuites tests="1" failures="1" disabled="0" errors="0" time="0." timestamp="2025-08-08T12:42:56.640" name="AllTests">
<testsuite name="Concat" tests="1" failures="1" disabled="0" skipped="0" errors="0" time="0." timestamp="2025-08-08T12:42:56.640">
<testcase name="NativeCpu" file="/amax/2020/hx2024/Cpp/TinyInfiniTensor/test/kernels/nativecpu/test_nativecpu_concat.cc" line="9" status="run" result="completed" time="0." timestamp="2025-08-08T12:42:56.640" classname="Concat">
<failure message="unknown file&#x0A;C++ exception with description &quot;&quot; thrown in the test body.&#x0A;" type=""><![CDATA[unknown file
C++ exception with description "" thrown in the test body.
]]></failure>
</testcase>
</testsuite>
</testsuites>
55 changes: 52 additions & 3 deletions src/core/allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,67 @@ namespace infini
// =================================== 作业 ===================================
// TODO: 设计一个算法来分配内存,返回起始地址偏移量
// =================================== 作业 ===================================

if (freeBlocks.empty())
{
freeBlocks[0] = 4096; // Initially, all memory is free
}
for(auto it = freeBlocks.begin(); it != freeBlocks.end(); it ++){
auto [addr, blockSize] = *it;
if(blockSize >= size){ //blockSize 是可用空间
if(blockSize > size){
// Split the block if it's larger than requested size
freeBlocks[addr + size] = blockSize - size;
}
freeBlocks.erase(it);
used += size;
peak = std::max(peak, used);
return it->first;
}
}

return 0;



// if (this->freeBlocks.empty())
// this->freeBlocks[0] = 1024;
// for (auto it = this->freeBlocks.begin(); it != this->freeBlocks.end(); ++it)
// {
// if (it->second >= size)
// {
// if (it->second > size)
// this->freeBlocks[it->first + size] = it->second - size;
// auto ans = it->first;
// this->freeBlocks.erase(it);
// this->used += size;
// this->peak = (this->peak >= this->used) ? this->peak : this->used;
// return ans;
// }
// }
}

void Allocator::free(size_t addr, size_t size)
{
IT_ASSERT(this->ptr == nullptr);
size = getAlignedSize(size);

// =================================== 作业 ===================================
// TODO: 设计一个算法来回收内存
// =================================== 作业 ===================================
// =================================== 作业 ===================================
freeBlocks[addr] = size;
auto it = freeBlocks.find(addr);
auto nextIt = std::next(it);
if (nextIt != freeBlocks.end() && it->first + it->second == nextIt->first)
{
it->second += nextIt->second;
freeBlocks.erase(nextIt);
}
auto prevIt = std::prev(it);
if (it != freeBlocks.begin() && prevIt->first + prevIt->second == it->first)
{
prevIt->second += it->second;
freeBlocks.erase(it);
}
used = used - size;
}

void *Allocator::getPtr()
Expand Down
193 changes: 192 additions & 1 deletion src/core/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
#include <algorithm>
#include <numeric>
#include <queue>

#include "operators/matmul.h"
#include "operators/transpose.h"
namespace infini
{

Expand Down Expand Up @@ -106,6 +107,163 @@ namespace infini
// 1. 去除冗余的算子(例如,两个相邻的算子都是 transpose 算子,且做的是相反的操作,可以将其全部删除)
// 2. 合并算子(例如,矩阵乘算子中含有属性transA、transB,如果其输入存在transpose,且对最后两个维度做交换,就可以将transpose融入到矩阵乘算子的属性中去)
// =================================== 作业 ===================================

// rule1: 删除无用的transpose算子
for (size_t i = 0; i < ops.size(); ++i)
{
Operator op = ops[i];
if (op->getOpType() == OpType::Transpose)
{
Tensor tensor = op->getOutput();
if (!tensor)
continue;
auto targets = tensor->getTargets();
if (targets.empty())
continue;
Operator op_next = targets[0];
if (op_next->getOpType() == OpType::Transpose)
{
TransposeObj *op1 = as<TransposeObj>(op).get();
TransposeObj *op2 = as<TransposeObj>(op_next).get();
auto op1_permute = op1->getPermute();
auto op2_permute = op2->getPermute();
if (op1_permute.size() != op2_permute.size())
continue;
bool flag = true;
for (int j = 0; j < (int)op1_permute.size(); j++)
{
if (op1_permute[op2_permute[j]] != j)
{
flag = false;
continue;
}
}
if (!flag) //flag为false说明 无法合并
continue;
// 获取第一个转置算子的输入张量(原始输入数据)
Tensor originalInput = op->getInputs()[0];

// 获取第一个转置算子的输出张量(第一次转置结果)
Tensor firstTransposeOutput = op->getOutput();

// 获取第二个转置算子的输出张量(最终转置结果)
Tensor secondTransposeOutput = op_next->getOutput();

// 获取使用最终结果的消费者算子(如矩阵乘法)
Operator consumerOp = secondTransposeOutput->getTargets()[0];

// 保留消费者算子的其他输入(如矩阵乘法的右矩阵)
Tensor consumerOtherInput = consumerOp->getInputs()[1];

// 重定向消费者算子的输入:跳过两个转置,直接使用原始输入
consumerOp->replaceInput(consumerOp->getInputs()[0], originalInput);

// 更新原始输入的连接关系:
originalInput->removeTarget(op); // 移除对第一个转置的引用
originalInput->addTarget(consumerOp); // 添加对消费者算子的引用
originalInput->setSource(nullptr); // 清除可能存在的生产者标记

// 清理冗余资源
removeOperator(op); // 删除第一个转置算子
removeOperator(op_next); // 删除第二个转置算子
removeTensor(firstTransposeOutput); // 删除中间结果张量
removeTensor(secondTransposeOutput); // 删除最终结果张量

// 更新算子间的拓扑依赖关系
consumerOp->removePredecessors(op_next); // 移除与第二个转置的依赖

// 如果原始输入有生产者,建立新的依赖关系
if (originalInput->getSource()) {
consumerOp->addPredecessors(originalInput->getSource());
originalInput->getSource()->addSuccessors(consumerOp);
}
}
}
}

// 遍历图中的所有算子,寻找可优化的矩阵乘法算子
for (size_t opIndex = 0; opIndex < ops.size(); ++opIndex) {
Operator currentOp = ops[opIndex];

// 只处理矩阵乘法算子
if (currentOp->getOpType() == OpType::MatMul) {
// 获取矩阵乘法的输入张量列表(左矩阵和右矩阵)
TensorVec matmulInputs = currentOp->getInputs();
int inputIndex = 0; // 用于标识当前是左输入(0)还是右输入(1)

// 检查每个输入张量
for (Tensor inputTensor : matmulInputs) {
inputIndex++;

// 检查输入张量是否有生产者算子
if (inputTensor->getSource()) {
Operator producerOp = inputTensor->getSource();

// 如果生产者是转置算子
if (producerOp->getOpType() == OpType::Transpose) {
TransposeObj *transposeOp = as<TransposeObj>(producerOp).get();
Shape transposePerm = transposeOp->getPermute();
bool isLastTwoDimsSwap = true;

/* 验证转置操作是否只交换最后两个维度:
* 1. 前n-2个维度必须保持原顺序(即perm[j] == j)
* 2. 最后两个维度必须交换(即perm[-2] == rank-1 且 perm[-1] == rank-2)
*/
for (int dim = 0; dim < (int)transposePerm.size() - 2; dim++) {
if (transposePerm[dim] != dim) {
isLastTwoDimsSwap = false;
break;
}
}
if (transposePerm[transposePerm.size() - 2] != (int)transposePerm.size() - 1 ||
transposePerm[transposePerm.size() - 1] != (int)transposePerm.size() - 2) {
isLastTwoDimsSwap = false;
}

// 如果不满足条件则跳过优化
if (!isLastTwoDimsSwap) continue;

// 获取矩阵乘法算子(用于修改转置属性)
MatmulObj *matmulOp = as<MatmulObj>(currentOp).get();
Tensor transposedTensor;

// 根据输入位置设置对应的转置标志
if (inputIndex == 1) { // 左输入
matmulOp->setTransA(true); // 启用左矩阵转置
transposedTensor = matmulOp->getInputs(0);
} else { // 右输入
matmulOp->setTransB(true); // 启用右矩阵转置
transposedTensor = matmulOp->getInputs(1);
}

// 获取转置算子的输入(原始未转置的张量)
Operator transposeOperator = transposedTensor->getSource();
Tensor originalTensor = transposeOperator->getInputs()[0];

// 重定向矩阵乘法的输入:跳过转置算子,直接使用原始张量
matmulOp->replaceInput(transposedTensor, originalTensor);

// 更新张量连接关系
originalTensor->removeTarget(transposeOperator);
originalTensor->addTarget(currentOp);

// 清理资源:删除转置算子和中间张量
removeOperator(transposeOperator);
removeTensor(transposedTensor);

// 更新拓扑关系:移除转置算子作为前驱
currentOp->removePredecessors(transposeOperator);

// 如果原始张量有生产者,建立新的依赖关系
if (originalTensor->getSource()) {
currentOp->addPredecessors(originalTensor->getSource());
originalTensor->getSource()->addSuccessors(currentOp);
}
}
}
}
}
}
}

Tensor GraphObj::getTensor(int fuid) const
Expand Down Expand Up @@ -152,7 +310,40 @@ namespace infini
// TODO:利用 allocator 给计算图分配内存
// HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存
// =================================== 作业 ===================================
// allocator.info();
// void* allocatorPtr = allocator.getPtr();
// for(auto it = tensors.begin(); it != tensors.end(); it++){
// auto tensor = *it;
// size_t size = tensor->getBytes();
// size_t addr = allocator.alloc(size);
// char * tmpPtr = reinterpret_cast<char*>(allocatorPtr) + addr;
// Blob blob = make_ref<BlobObj>(runtime, (void *)tmpPtr);
// tensor->setDataBlob(blob);
// }
// topological sorting first
IT_ASSERT(topo_sort() == true);

// =================================== 作业 ===================================
// TODO:利用 allocator 给计算图分配内存
// HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存
// =================================== 作业 ===================================
vector<size_t> offsets;
for (auto tensor : tensors)
{
size_t size = tensor->getBytes();
size_t offset = allocator.alloc(size);
offsets.push_back(offset);
}
auto it = offsets.begin();
void *basePtr = allocator.getPtr();
for (auto tensor : tensors)
{
char *charPtr = reinterpret_cast<char *>(basePtr) + *it;
void *ptr = charPtr;
Blob blob = make_ref<BlobObj>(runtime, ptr);
tensor->setDataBlob(blob);
it++;
}
allocator.info();
}

Expand Down
23 changes: 19 additions & 4 deletions src/operators/concat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,30 @@ ConcatObj::ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int _dim)
}

optional<vector<Shape>> ConcatObj::inferShape(const TensorVec &inputs) {
Shape dims = inputs[0]->getDims();
Shape dims = inputs[0]->getDims(); // 数组的 shape
auto rank = inputs[0]->getRank();

// =================================== 作业 ===================================
// TODO:修改 dims,返回正确的 concat 后的 shape
// REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13
// =================================== 作业 ===================================

return {{dims}};
if(inputs.size() == 0) {
return std::nullopt;
}
for(auto input: inputs){
if(input->getDims().size() != rank)
return std::nullopt;
}
vector<int> res(rank, 0);
for(auto input: inputs){
for(size_t i = 0; i < rank; i++){
if(i == size_t(dim)){
res[i] += input->getDims()[i];
}else if (i != size_t(dim)){
res[i] = input->getDims()[i];
}
}
}
return {{res}};
}

std::string ConcatObj::toString() const {
Expand Down
Loading