LearningInfiniTensor · wudizhr · Jul 29, 2025 · Aug 15, 2025
diff --git a/3rd-party/googletest b/3rd-party/googletest
diff --git a/include/core/allocator.h b/include/core/allocator.h
@@ -25,6 +25,31 @@ namespace infini {
 
     // =================================== 作业 ===================================
     // TODO：可能需要设计一个数据结构来存储free block，以便于管理和合并
+
+    // from InfiniTensor
+    struct freeBlockInfo {
+        size_t addr;
+        size_t blockSize;
+    };
+
+    struct cmpFreeBlockInfo {
+        bool operator()(const freeBlockInfo &a, const freeBlockInfo &b) const {
+            return (a.blockSize != b.blockSize) ? (a.blockSize < b.blockSize)
+                                                : (a.addr < b.addr);
+        }
+    };
+
+    // free balanced tree, maintains all free memory blocks
+    std::set<freeBlockInfo, cmpFreeBlockInfo> freeBlocks;
+
+    // key: head address offset of the free memory block
+    // value: blockSize of the block
+    std::unordered_map<size_t, size_t> headAddrToBlockSize;
+
+    // key: tail address offset of the free memory block
+    // value: blockSize of the block
+    std::unordered_map<size_t, size_t> tailAddrToBlockSize;
+
     // HINT: 可以使用一个 map 来存储 free block，key 为 block 的起始/结尾地址，value 为 block 的大小
     // =================================== 作业 ===================================
 

diff --git a/include/core/graph.h b/include/core/graph.h
@@ -58,14 +58,16 @@ namespace infini
         void dataMalloc();
 
         /**
-         * @brief Add an operator and create its outputs. Output tensor arguments
+         * @brief Add an operator and create its outputs. Output tensor arguments should be empty Refs (e.g., nullptr).
          * should be empty Refs (e.g., nullptr).
          */
         template <typename T, typename... Args>
         Ref<T> addOp(Args &&...args)
         {
             Ref<T> op = infini::make_ref<T>(this, std::forward<Args>(args)...);
+            // this->print();
             addOperatorAndConnect(op);
+            // this->print();
             return op;
         }
 

diff --git a/src/core/allocator.cc b/src/core/allocator.cc
@@ -31,9 +31,45 @@ namespace infini
 
         // =================================== 作业 ===================================
         // TODO: 设计一个算法来分配内存，返回起始地址偏移量
+        auto it = this->freeBlocks.lower_bound(freeBlockInfo{(size_t)0, size});
+        size_t retAddr = this->peak;
+        if(it != freeBlocks.end()) //find free block
+        {
+            size_t blockSize = it->blockSize;
+            retAddr = it->addr;
+            size_t tailAddr = retAddr + size;
+            this->headAddrToBlockSize.erase(retAddr);
+            this->tailAddrToBlockSize.erase(tailAddr);
+            if(blockSize > size)
+            {
+                freeBlockInfo newBlock = {tailAddr, blockSize - size};
+                this->headAddrToBlockSize[tailAddr] = newBlock.blockSize;
+                this->tailAddrToBlockSize[tailAddr + newBlock.blockSize] = newBlock.blockSize;
+                this->freeBlocks.insert(newBlock);
+            }
+            this->freeBlocks.erase(it);
+        }
+        else
+        {
+            auto blockTailWithPeak = this->tailAddrToBlockSize.find(this->peak);
+            if(blockTailWithPeak != this->tailAddrToBlockSize.end())
+            {
+                retAddr = this->peak - blockTailWithPeak->second;
+                this->peak += (size - blockTailWithPeak->second);
+                freeBlockInfo endBlock = {retAddr, blockTailWithPeak->second};
+                this->freeBlocks.erase(endBlock);
+                this->headAddrToBlockSize.erase(endBlock.addr);
+                this->tailAddrToBlockSize.erase(endBlock.addr + endBlock.blockSize);
+            }
+            else
+            {
+                this->peak += size;
+            }
+        }
+        this->used += size;
         // =================================== 作业 ===================================
 
-        return 0;
+        return retAddr;
     }
 
     void Allocator::free(size_t addr, size_t size)
@@ -43,6 +79,35 @@ namespace infini
 
         // =================================== 作业 ===================================
         // TODO: 设计一个算法来回收内存
+        auto tailAddr = addr + size;
+        freeBlockInfo block = {addr, size};
+        this->headAddrToBlockSize[block.addr] = block.blockSize;
+        this->tailAddrToBlockSize[tailAddr] = block.blockSize;
+        auto preFreeBlockIter = this->tailAddrToBlockSize.find(addr);
+        auto subFreeBlockIter = this->headAddrToBlockSize.find(tailAddr);
+        if(preFreeBlockIter != this->tailAddrToBlockSize.end())
+        {
+            size_t preBlockSize = preFreeBlockIter->second;
+            this->headAddrToBlockSize.erase(block.addr);
+            this->headAddrToBlockSize[block.addr - preBlockSize] += block.blockSize;
+            this->tailAddrToBlockSize.erase(block.addr);
+            this->tailAddrToBlockSize[tailAddr] += preBlockSize;
+            block.addr -= preBlockSize;
+            block.blockSize += preBlockSize;
+            this->freeBlocks.erase(freeBlockInfo({block.addr, preBlockSize}));
+        }
+        if(subFreeBlockIter != this->headAddrToBlockSize.end())
+        {
+            size_t subBlockSize = preFreeBlockIter->second;
+            this->headAddrToBlockSize.erase(tailAddr);
+            this->headAddrToBlockSize[block.addr] += subBlockSize;
+            this->tailAddrToBlockSize.erase(tailAddr);
+            this->tailAddrToBlockSize[tailAddr + subBlockSize] += block.blockSize;
+            block.blockSize += subBlockSize;
+            this->freeBlocks.erase(freeBlockInfo({tailAddr, subBlockSize}));            
+        }
+        this->freeBlocks.insert(block);
+        this->used -= size;
         // =================================== 作业 ===================================
     }
 

diff --git a/src/core/graph.cc b/src/core/graph.cc
@@ -1,4 +1,5 @@
 #include "core/graph.h"
+#include "operators/matmul.h"
 #include <algorithm>
 #include <numeric>
 #include <queue>
@@ -105,6 +106,130 @@ namespace infini
         // 图优化规则如下：
         // 1. 去除冗余的算子（例如，两个相邻的算子都是 transpose 算子，且做的是相反的操作，可以将其全部删除）
         // 2. 合并算子（例如，矩阵乘算子中含有属性transA、transB，如果其输入存在transpose，且对最后两个维度做交换，就可以将transpose融入到矩阵乘算子的属性中去）
+        Operator last_opt;
+        // OpVec delop;
+        // TensorVec save_tensors;
+        for(size_t i = 0; i < ops.size();)
+        {
+            auto op = ops[i];
+            if(op->getOpType().underlying() == 10)
+            {
+                auto predecessors = op->getPredecessors();
+                for(auto predecessor : predecessors)
+                {
+                    if(predecessor->getOpType().underlying() == 10)
+                    {
+                        TensorVec Outputs = op->getOutputs();
+                        TensorVec Inputs = predecessor->getInputs();
+                        Tensor input = Inputs[0];
+                        Tensor output = Outputs[0];
+                        if(output->getDims() == input->getDims())
+                        {
+                            std::cout << "transpose delete" << std::endl;
+                            this->removeOperator(op);
+                            this->removeOperator(predecessor);
+                            this->removeTensor(op->getInputs()[0]);
+                            this->removeTensor(op->getOutputs()[0]);
+
+                            if (auto pred = input->getSource())
+                            {
+                                pred->removeSuccessors(predecessor);
+                                for (auto &succ : output->getTargets())
+                                {
+                                    pred->addSuccessors(succ);
+                                }
+                            }
+                            input->removeTarget(predecessor);
+                            for (auto &succ : output->getTargets())
+                            {
+                                input->addTarget(succ);
+                                succ->replaceInput(output, input);
+                                succ->removePredecessors(op);
+                                for (auto &predop : predecessor->getPredecessors())
+                                {
+                                    succ->addPredecessors(predop);
+                                }
+                            }
+                            i--;
+                            continue;
+                        }
+                    }
+                }                
+            }
+            else if(op->getOpType().underlying() == 7)
+            {
+                Tensor tensorA = op->getInputs()[0];
+                Tensor tensorB = op->getInputs()[1];
+                if(const auto &source = tensorA->getSource())
+                {
+                    if(source->getOpType().underlying() == 10)
+                    {
+                        Tensor input = source->getInputs()[0];
+                        Tensor output = source->getOutputs()[0];
+                        auto input_dim = input->getDims();
+                        auto output_dim = output->getDims();
+                        if(input_dim[input_dim.size()-1] == output_dim[output_dim.size()-2])
+                        {
+                            std::cout << "transpose merge A" << std::endl;
+                            Tensor input = source->getInputs()[0];
+                            Tensor output = source->getOutputs()[0];
+
+                            // update op info
+                            for (auto &predop : source->getPredecessors())
+                            {
+                                predop->removeSuccessors(source);
+                                predop->addSuccessors(op);
+                                op->removePredecessors(source);
+                                op->addPredecessors(predop);
+                            }  
+                            input->removeTarget(source);
+                            input->addTarget(op);
+                            op->replaceInput(output, input);   
+                            auto* matmulOp = dynamic_cast<MatmulObj*>(source.get());
+                            matmulOp->setTransA(true);
+                            continue;                  
+                        }
+                    }
+                }
+                if(const auto &source = tensorB->getSource())
+                {
+                    if(source->getOpType().underlying() == 10)
+                    {
+                        Tensor input = source->getInputs()[0];
+                        Tensor output = source->getOutputs()[0];
+                        auto input_dim = input->getDims();
+                        auto output_dim = output->getDims();
+                        if(input_dim[input_dim.size()-1] == output_dim[output_dim.size()-2])
+                        {
+                            std::cout << "transpose merge B" << std::endl;
+                            // std::cout << input << std::endl;
+                            Tensor input = source->getInputs()[0];
+                            Tensor output = source->getOutputs()[0];
+                            // update op info
+                            op->removePredecessors(source);
+                            for (auto &predop : source->getPredecessors())
+                            {
+                                predop->removeSuccessors(source);
+                                predop->addSuccessors(op);
+                                op->addPredecessors(predop);
+                            }  
+                            input->removeTarget(source);
+                            input->addTarget(op);
+                            op->replaceInput(output, input);  
+                            auto* matmulOp = dynamic_cast<MatmulObj*>(op.get());
+                            matmulOp->setTransB(true);
+                            this->removeOperator(source);
+                            this->removeTensor(output);
+                            // this->print();
+                            continue;                  
+                        }
+                    }
+                }
+            }
+            i++;
+        }
+        std::cout << "Optimize complete!" << std::endl << std::endl;
+
         // =================================== 作业 ===================================
     }
 
@@ -151,6 +276,23 @@ namespace infini
         // =================================== 作业 ===================================
         // TODO：利用 allocator 给计算图分配内存
         // HINT: 获取分配好的内存指针后，可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存
+        // Naive Version
+        std::unordered_map<std::shared_ptr<infini::TensorObj>, size_t> tensorToOffset;
+        for(auto tensor : tensors)
+        {
+            tensorToOffset[tensor] = allocator.alloc(tensor->getBytes());
+            // std::cout << "loop1end" << std::endl;
+        }
+        for(auto tensor : tensors)
+        {
+            tensor->setDataBlob(make_ref<BlobObj>
+                (
+                    tensor->runtime,
+                    static_cast<uint8_t *>(allocator.getPtr()) +
+                        tensorToOffset[tensor]
+                )
+            );
+        }
         // =================================== 作业 ===================================
 
         allocator.info();

diff --git a/src/operators/concat.cc b/src/operators/concat.cc
@@ -1,24 +1,39 @@
 #include "operators/concat.h"
 #include "utils/operator_utils.h"
+#include "core/graph.h"
 
 namespace infini {
 ConcatObj::ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int _dim)
     : OperatorObj(OpType::Concat, inputs, {output}) {
+        // TensorVec a = {nullptr};
+        // std::cout << "outputsize:"  << a.size() << std::endl; //这个空元素竟然也会size+1
     int rank = inputs[0]->getRank();
     dim = get_real_axis(_dim, rank);
-    IT_ASSERT(checkValid(graph));
+    IT_ASSERT(checkValid(graph)); //outputs established in here
 }
 
 optional<vector<Shape>> ConcatObj::inferShape(const TensorVec &inputs) {
     Shape dims = inputs[0]->getDims();
+    Shape ans = inputs[0]->getDims();
     auto rank = inputs[0]->getRank();
 
     // =================================== 作业 ===================================
     // TODO：修改 dims，返回正确的 concat 后的 shape
     // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13
+    for(size_t i = 0;i < rank;i++)
+    {
+        for(size_t j = 1;j < inputs.size();j++)
+        {
+            // inputs[j]->print();
+            if(dims[i] != inputs[j]->getDims()[i])
+            {
+                ans[i] += inputs[j]->getDims()[i];
+            }     
+        }
+    }
     // =================================== 作业 ===================================
 
-    return {{dims}};
+    return {{ans}};
 }
 
 std::string ConcatObj::toString() const {

diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc
@@ -26,8 +26,37 @@ namespace infini
         // =================================== 作业 ===================================
         // TODO：返回经过 matmul 操作后的 shape
         // REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm
+        const auto A = inputs[0];
+        const auto B = inputs[1];
+        Shape dimA = A->getDims();
+        Shape dimB = B->getDims();
+        Shape out_shape = dimA;
+        int tempA = dimA.size() - 1;
+        int tempB = dimA.size() - 1;
+        if(dimA[dimA.size()-1] == dimB[dimB.size()-1])
+        {
+            tempA = dimA.size()-2;
+            tempB = dimB.size()-2;
+        }
+        else if(dimA[dimA.size()-1] == dimB[dimB.size()-2])
+        {
+            tempA = dimA.size()-2;
+            tempB = dimB.size()-1;            
+        }
+        else if(dimA[dimA.size()-2] == dimB[dimB.size()-1])
+        {
+            tempA = dimA.size()-1;
+            tempB = dimB.size()-2;            
+        }
+        else if(dimA[dimA.size()-2] == dimB[dimB.size()-2])
+        {
+            tempA = dimA.size()-1;
+            tempB = dimB.size()-1;            
+        }
+        out_shape[dimA.size()-2] = dimA[tempA];
+        out_shape[dimA.size()-1] = dimB[tempB];
         // =================================== 作业 ===================================
-        return std::nullopt;
+        return {{out_shape}};
     }
 
 } // namespace infini
diff --git a/src/operators/transpose.cc b/src/operators/transpose.cc
@@ -32,9 +32,17 @@ namespace infini
         // =================================== 作业 ===================================
         // TODO：修改 output_dim，返回正确的 transpose 后的 shape
         // REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21
+        if(rank > 1)
+        {
+            for(int i=0;i<rank;i++)
+            {
+                output_dim[i] = input_dim[transposePermute[i]];
+            }
+        }
         // =================================== 作业 ===================================
 
-        return std::nullopt;
+        //return std::nullopt;
+        return {{output_dim}};
     }
 
     std::string TransposeObj::toString() const
+15 −4		BUILD.bazel
+2 −2		CMakeLists.txt
+27 −18		MODULE.bazel
+4 −12		README.md
+8 −9		WORKSPACE
+94 −59		ci/linux-presubmit.sh
+15 −5		ci/macos-presubmit.sh
+36 −14		ci/windows-presubmit.bat
+7 −7		docs/_layouts/default.html
+103 −67		docs/advanced.md
+0 −13		docs/faq.md
+53 −20		docs/gmock_cook_book.md
+2 −2		docs/primer.md
+8 −8		docs/quickstart-bazel.md
+3 −3		docs/quickstart-cmake.md
+4 −3		docs/reference/actions.md
+9 −2		docs/reference/assertions.md
+34 −4		docs/reference/matchers.md
+98 −1		docs/reference/testing.md
+32 −4		fake_fuchsia_sdk.bzl
+53 −9		googlemock/include/gmock/gmock-actions.h
+353 −73		googlemock/include/gmock/gmock-matchers.h
+4 −5		googlemock/include/gmock/gmock-more-actions.h
+0 −5		googlemock/include/gmock/internal/gmock-internal-utils.h
+1 −1		googlemock/src/gmock-spec-builders.cc
+20 −4		googlemock/test/gmock-actions_test.cc
+3 −3		googlemock/test/gmock-function-mocker_test.cc
+201 −20		googlemock/test/gmock-matchers-arithmetic_test.cc
+159 −9		googlemock/test/gmock-matchers-comparisons_test.cc
+315 −17		googlemock/test/gmock-matchers-containers_test.cc
+80 −21		googlemock/test/gmock-matchers-misc_test.cc
+39 −10		googlemock/test/gmock-more-actions_test.cc
+1 −1		googlemock/test/gmock-pp_test.cc
+4 −5		googlemock/test/gmock-spec-builders_test.cc
+4 −4		googlemock/test/gmock_output_test_golden.txt
+0 −3		googletest/CMakeLists.txt
+6 −6		googletest/README.md
+1 −1		googletest/cmake/internal_utils.cmake
+7 −0		googletest/include/gtest/gtest-assertion-result.h
+32 −3		googletest/include/gtest/gtest-matchers.h
+95 −39		googletest/include/gtest/gtest-param-test.h
+94 −49		googletest/include/gtest/gtest-printers.h
+61 −65		googletest/include/gtest/gtest-typed-test.h
+3 −3		googletest/include/gtest/gtest.h
+40 −44		googletest/include/gtest/internal/gtest-internal.h
+48 −14		googletest/include/gtest/internal/gtest-param-util.h
+15 −177		googletest/include/gtest/internal/gtest-port.h
+4 −0		googletest/src/gtest-internal-inl.h
+10 −10		googletest/src/gtest-printers.cc
+190 −46		googletest/src/gtest.cc
+51 −17		googletest/test/BUILD.bazel
+1 −0		googletest/test/googletest-color-test.py
+1 −1		googletest/test/googletest-death-test-test.cc
+38 −0		googletest/test/googletest-fail-if-no-test-linked-test-with-disabled-test_.cc
+38 −0		googletest/test/googletest-fail-if-no-test-linked-test-with-enabled-test_.cc
+165 −0		googletest/test/googletest-fail-if-no-test-linked-test.py
+91 −0		googletest/test/googletest-fail-if-no-test-selected-test.py
+19 −0		googletest/test/googletest-filter-unittest.py
+95 −16		googletest/test/googletest-json-output-unittest.py
+1 −1		googletest/test/googletest-output-test-golden-lin.txt
+71 −1		googletest/test/googletest-param-test-test.cc
+97 −19		googletest/test/googletest-printers-test.cc
+2 −2		googletest/test/googletest-setuptestsuite-test_.cc
+60 −9		googletest/test/gtest_unittest.cc
+67 −26		googletest/test/gtest_xml_output_unittest.py
+21 −1		googletest/test/gtest_xml_output_unittest_.cc
+3 −3		googletest_deps.bzl