Skip to content

all #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open

all #11

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions include/core/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ namespace infini {
// TODO:可能需要设计一个数据结构来存储free block,以便于管理和合并
// HINT: 可以使用一个 map 来存储 free block,key 为 block 的起始/结尾地址,value 为 block 的大小
// =================================== 作业 ===================================
std::map<size_t, size_t> freeByAddr; // addr -> size
std::multimap<size_t, size_t> freeBySize; // size -> addr
std::unordered_map<size_t, size_t> live; // addr -> size

public:
Allocator(Runtime runtime);
Expand All @@ -51,6 +54,17 @@ namespace infini {

void info();

void insertFreeBlock(size_t addr, size_t size);

void eraseFreeBlockByAddr(std::map<size_t,size_t>::iterator itAddr);

void eraseFreeBlockBySize(std::multimap<size_t,size_t>::iterator itSize);

static inline bool checked_add(size_t a, size_t b, size_t &out) {
if (b > std::numeric_limits<size_t>::max() - a) return false;
out = a + b;
return true;
}
private:
// function: memory alignment, rouned up
// return: size of the aligned memory block
Expand Down
23 changes: 10 additions & 13 deletions include/core/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,8 @@ namespace infini
Tensor addTensor(Shape dim, DataType dtype = DataType::Float32);
Tensor addTensor(const Tensor &tensor);
TensorVec addTensor(const TensorVec &tensors);
void removeOperator(Operator op)
{
auto it = std::find(ops.begin(), ops.end(), op);
if (it != ops.end())
ops.erase(it);
}

void removeTensor(Tensor tensor)
{
auto it = std::find(tensors.begin(), tensors.end(), tensor);
if (it != tensors.end())
tensors.erase(it);
}
void removeOperator(Operator op);
void removeTensor(Tensor tensor);

const TensorVec &getTensors() const { return tensors; }
const OpVec &getOperators() const { return ops; }
Expand Down Expand Up @@ -112,6 +101,14 @@ namespace infini
*/
void addOperatorAndConnect(const Operator &op);

// Optimization helpers (implemented in src/core/graph.cc)
bool fuseConsecutiveTransposes();
bool foldTransposeIntoMatmul();
bool deadCodeEliminate();
void replaceInputTensor(const Operator &op, const Tensor &oldT, const Tensor &newT);
void rewirePredToSucc(const Operator &pred, const Operator &succ);
bool hasSingleUse(const Tensor &t) const;

/**
* @brief If the nodes is sorted in topological order.
*/
Expand Down
7 changes: 5 additions & 2 deletions include/core/ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@ std::vector<WRef<T>> refs_to_wrefs(const std::vector<Ref<T>> &refs) {
template <typename T>
std::vector<Ref<T>> wrefs_to_refs(const std::vector<WRef<T>> &wrefs) {
std::vector<Ref<T>> refs;
for (const auto &wref : wrefs)
refs.emplace_back(wref);
refs.reserve(wrefs.size());
for (const auto &wref : wrefs) {
if (auto p = wref.lock())
refs.emplace_back(std::move(p));
}
return refs;
}

Expand Down
1 change: 1 addition & 0 deletions include/operators/transpose.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace infini
int numInputs() const override { return 1; }
int numOutputs() const override { return 1; }
std::vector<int> getPermute() const { return transposePermute; }
void setPermute(const std::vector<int> &p) { transposePermute = p; }

private:
vector<int> transposePermute;
Expand Down
136 changes: 130 additions & 6 deletions src/core/allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,35 @@

namespace infini
{
void Allocator::insertFreeBlock(size_t addr, size_t size) {
IT_ASSERT(size > 0);
// 回填两个索引
freeByAddr.emplace(addr, size);
freeBySize.emplace(size, addr);
}

void Allocator::eraseFreeBlockByAddr(std::map<size_t,size_t>::iterator itAddr) {
// 同步从 size 索引里删除对应项(可能有多个相同 size,按 <size,addr> 唯一定位)
size_t addr = itAddr->first, sz = itAddr->second;
auto range = freeBySize.equal_range(sz);
for (auto it = range.first; it != range.second; ++it) {
if (it->second == addr) { freeBySize.erase(it); break; }
}
freeByAddr.erase(itAddr);
}

void Allocator::eraseFreeBlockBySize(std::multimap<size_t,size_t>::iterator itSize) {
// 同步从 addr 索引里删除对应项
size_t sz = itSize->first, addr = itSize->second;
auto itAddr = freeByAddr.find(addr);
if (itAddr != freeByAddr.end()) {
IT_ASSERT(itAddr->second == sz);
freeByAddr.erase(itAddr);
}
freeBySize.erase(itSize);
}


Allocator::Allocator(Runtime runtime) : runtime(runtime)
{
used = 0;
Expand All @@ -25,33 +54,128 @@ namespace infini

size_t Allocator::alloc(size_t size)
{
IT_ASSERT(this->ptr == nullptr);
// pad the size to the multiple of alignment
size = this->getAlignedSize(size);

// =================================== 作业 ===================================
// TODO: 设计一个算法来分配内存,返回起始地址偏移量
// =================================== 作业 ===================================

return 0;
}
auto it = freeBySize.lower_bound(size);
if (it != freeBySize.end()) {
size_t blkSize = it->first;
size_t addr = it->second;

// 同步删除旧空闲块(两个索引)
eraseFreeBlockBySize(it);

// 2) 分裂:保留尾侧残块 [addr+size, addr+blkSize)
if (blkSize > size) {
size_t tailAddr;
[[maybe_unused]] bool ok = checked_add(addr, size, tailAddr);
IT_ASSERT(ok);
insertFreeBlock(tailAddr, blkSize - size);
}

// 3) 记账:live / used / peak
live.emplace(addr, size);
used += size;

size_t high;
[[maybe_unused]] bool ok2 = checked_add(addr, size, high);
IT_ASSERT(ok2);
if (high > peak) peak = high; // 注意:peak 是地址高水位,不是 used 峰值

return addr;
}
size_t addr = peak;
size_t newPeak;
bool ok = checked_add(peak, size, newPeak);
IT_ASSERT(ok);
peak = newPeak;

live.emplace(addr, size);
used += size;
return addr;
}

void Allocator::free(size_t addr, size_t size)
{
IT_ASSERT(this->ptr == nullptr);
size = getAlignedSize(size);

// =================================== 作业 ===================================
// TODO: 设计一个算法来回收内存
// =================================== 作业 ===================================
auto itLive = live.find(addr);
IT_ASSERT(itLive != live.end()); // 非法地址
IT_ASSERT(itLive->second == size); // 尺寸必须一致(你也可以选择“以 live 为准”)
live.erase(itLive);
used -= size;

// 2) 合并:先将 [addr,size] 投入 freeByAddr,再和前/后邻接块合并
auto itInsert = freeByAddr.lower_bound(addr);
size_t newAddr = addr, newSize = size;

// 与前块合并(prev)
if (itInsert != freeByAddr.begin()) {
auto itPrev = std::prev(itInsert);
size_t prevAddr = itPrev->first, prevSize = itPrev->second;
size_t prevEnd;
bool ok = checked_add(prevAddr, prevSize, prevEnd);
IT_ASSERT(ok);
if (prevEnd == addr) {
// 删前块(同步 size 索引)
eraseFreeBlockByAddr(itPrev);
newAddr = prevAddr;
newSize += prevSize;
}
}

// 与后块合并(next = itInsert)
if (itInsert != freeByAddr.end()) {
size_t nextAddr = itInsert->first, nextSize = itInsert->second;
size_t thisEnd;
bool ok = checked_add(newAddr, newSize, thisEnd);
IT_ASSERT(ok);
if (thisEnd == nextAddr) {
// 删后块(同步 size 索引)
eraseFreeBlockByAddr(itInsert);
newSize += nextSize;
}
}

// 3) 写回合并后的最终空闲块到两个索引
insertFreeBlock(newAddr, newSize);

// // 4) 回缩 peak:仅当“末端连续空闲覆盖到高水位”
// // 需要在 freeByAddr 中查看最高地址块是否接触 peak
auto itLast = freeByAddr.empty() ? freeByAddr.end() : std::prev(freeByAddr.end());
if (!freeByAddr.empty()) {
size_t lastAddr = itLast->first, lastSize = itLast->second;
size_t lastEnd;
bool ok = checked_add(lastAddr, lastSize, lastEnd);
IT_ASSERT(ok);
if (lastEnd == peak) {
// 从末端收缩:不断回收可贴到峰值的尾块
// 移除并更新两索引,直到不再贴合
while (!freeByAddr.empty()) {
auto itTail = std::prev(freeByAddr.end());
size_t a = itTail->first, s = itTail->second, e;
bool ok2 = checked_add(a, s, e);
IT_ASSERT(ok2);
if (e != peak) break;
eraseFreeBlockByAddr(itTail);
peak = a;
if (freeByAddr.empty()) break;
}
}
}
}

void *Allocator::getPtr()
{
if (this->ptr == nullptr)
{
this->ptr = runtime->alloc(this->peak);
printf("Allocator really alloc: %p %lu bytes\n", this->ptr, peak);
}
return this->ptr;
}
Expand Down
Loading