Skip to content

Commit a5e20d2

Browse files
committed
Refactor legacy assemble to also use RAII instruction location construction
1 parent 287b66a commit a5e20d2

File tree

1 file changed

+57
-101
lines changed

1 file changed

+57
-101
lines changed

libevmasm/Assembly.cpp

Lines changed: 57 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,104 +1329,72 @@ LinkerObject const& Assembly::assembleLegacy() const
13291329
uint8_t dataRefPush = static_cast<uint8_t>(pushInstruction(bytesPerDataRef));
13301330

13311331
LinkerObject::CodeSectionLocation codeSectionLocation;
1332+
codeSectionLocation.instructionLocations.reserve(items.size());
13321333
codeSectionLocation.start = 0;
1333-
size_t assemblyItemIndex = 0;
1334-
auto assembleInstruction = [&](auto&& _addInstruction) {
1335-
size_t start = ret.bytecode.size();
1336-
_addInstruction();
1337-
size_t end = ret.bytecode.size();
1338-
codeSectionLocation.instructionLocations.emplace_back(
1339-
LinkerObject::InstructionLocation{
1340-
.start = start,
1341-
.end = end,
1342-
.assemblyItemIndex = assemblyItemIndex
1343-
}
1344-
);
1345-
};
1346-
for (AssemblyItem const& item: items)
1334+
for (auto const& [assemblyItemIndex, item]: items | ranges::views::enumerate)
13471335
{
1336+
// collect instruction locations via side effects
1337+
InstructionLocationEmitter instructionLocationEmitter(codeSectionLocation.instructionLocations, ret.bytecode, assemblyItemIndex);
13481338
// store position of the invalid jump destination
13491339
if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits<size_t>::max())
13501340
m_tagPositionsInBytecode[0] = ret.bytecode.size();
13511341

13521342
switch (item.type())
13531343
{
13541344
case Operation:
1355-
assembleInstruction([&](){
1356-
ret.bytecode += assembleOperation(item);
1357-
});
1345+
ret.bytecode += assembleOperation(item);
13581346
break;
13591347
case Push:
1360-
assembleInstruction([&](){
1361-
ret.bytecode += assemblePush(item);
1362-
});
1348+
ret.bytecode += assemblePush(item);
13631349
break;
13641350
case PushTag:
1365-
{
1366-
assembleInstruction([&](){
1367-
ret.bytecode.push_back(tagPush);
1368-
tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
1369-
ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
1370-
});
1351+
ret.bytecode.push_back(tagPush);
1352+
tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
1353+
ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
13711354
break;
1372-
}
13731355
case PushData:
1374-
assembleInstruction([&]() {
1375-
ret.bytecode.push_back(dataRefPush);
1376-
dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
1377-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1378-
});
1356+
ret.bytecode.push_back(dataRefPush);
1357+
dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
1358+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13791359
break;
13801360
case PushSub:
1381-
assembleInstruction([&]() {
1382-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1383-
ret.bytecode.push_back(dataRefPush);
1384-
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1385-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1386-
});
1361+
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1362+
ret.bytecode.push_back(dataRefPush);
1363+
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1364+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13871365
break;
13881366
case PushSubSize:
13891367
{
1390-
assembleInstruction([&](){
1391-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1392-
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1393-
item.setPushedValue(u256(s));
1394-
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
1395-
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
1396-
ret.bytecode.resize(ret.bytecode.size() + b);
1397-
bytesRef byr(&ret.bytecode.back() + 1 - b, b);
1398-
toBigEndian(s, byr);
1399-
});
1368+
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1369+
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1370+
item.setPushedValue(u256(s));
1371+
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
1372+
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
1373+
ret.bytecode.resize(ret.bytecode.size() + b);
1374+
bytesRef byr(&ret.bytecode.back() + 1 - b, b);
1375+
toBigEndian(s, byr);
14001376
break;
14011377
}
14021378
case PushProgramSize:
1403-
{
1404-
assembleInstruction([&](){
1405-
ret.bytecode.push_back(dataRefPush);
1406-
sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
1407-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1408-
});
1379+
ret.bytecode.push_back(dataRefPush);
1380+
sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
1381+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
14091382
break;
1410-
}
14111383
case PushLibraryAddress:
14121384
{
1413-
assembleInstruction([&]() {
1414-
auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
1415-
ret.bytecode += bytecode;
1416-
ret.linkReferences.insert(linkRef);
1417-
});
1385+
auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
1386+
ret.bytecode += bytecode;
1387+
ret.linkReferences.insert(linkRef);
14181388
break;
14191389
}
14201390
case PushImmutable:
1421-
assembleInstruction([&]() {
1422-
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
1423-
// Maps keccak back to the "identifier" std::string of that immutable.
1424-
ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
1425-
// Record the bytecode offset of the PUSH32 argument.
1426-
ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
1427-
// Advance bytecode by 32 bytes (default initialized).
1428-
ret.bytecode.resize(ret.bytecode.size() + 32);
1429-
});
1391+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
1392+
// Maps keccak back to the "identifier" std::string of that immutable.
1393+
ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
1394+
// Record the bytecode offset of the PUSH32 argument.
1395+
ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
1396+
// Advance bytecode by 32 bytes (default initialized).
1397+
ret.bytecode.resize(ret.bytecode.size() + 32);
14301398
break;
14311399
case VerbatimBytecode:
14321400
ret.bytecode += assembleVerbatimBytecode(item);
@@ -1439,53 +1407,41 @@ LinkerObject const& Assembly::assembleLegacy() const
14391407
{
14401408
if (i != offsets.size() - 1)
14411409
{
1442-
assembleInstruction([&]() {
1443-
ret.bytecode.push_back(uint8_t(Instruction::DUP2));
1444-
});
1445-
assembleInstruction([&]() {
1446-
ret.bytecode.push_back(uint8_t(Instruction::DUP2));
1447-
});
1410+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::DUP2));
1411+
// This item type decomposes into multiple evm instructions, so we manually call emit()
1412+
instructionLocationEmitter.emit();
1413+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::DUP2));
1414+
instructionLocationEmitter.emit();
14481415
}
1449-
assembleInstruction([&]() {
1450-
// TODO: should we make use of the constant optimizer methods for pushing the offsets?
1451-
bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
1452-
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
1453-
ret.bytecode += offsetBytes;
1454-
});
1455-
assembleInstruction([&]() {
1456-
ret.bytecode.push_back(uint8_t(Instruction::ADD));
1457-
});
1458-
assembleInstruction([&]() {
1459-
ret.bytecode.push_back(uint8_t(Instruction::MSTORE));
1460-
});
1416+
// TODO: should we make use of the constant optimizer methods for pushing the offsets?
1417+
bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
1418+
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
1419+
ret.bytecode += offsetBytes;
1420+
instructionLocationEmitter.emit();
1421+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::ADD));
1422+
instructionLocationEmitter.emit();
1423+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::MSTORE));
1424+
// No emit needed here, it's taken care of by the destructor of instructionLocationEmitter.
14611425
}
14621426
if (offsets.empty())
14631427
{
1464-
assembleInstruction([&]() {
1465-
ret.bytecode.push_back(uint8_t(Instruction::POP));
1466-
});
1467-
assembleInstruction([&]() {
1468-
ret.bytecode.push_back(uint8_t(Instruction::POP));
1469-
});
1428+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::POP));
1429+
instructionLocationEmitter.emit();
1430+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::POP));
1431+
// no emit needed here, it's taken care of by the destructor of instructionLocationEmitter
14701432
}
14711433
immutableReferencesBySub.erase(item.data());
14721434
break;
14731435
}
14741436
case PushDeployTimeAddress:
1475-
assembleInstruction([&]() {
1476-
ret.bytecode += assemblePushDeployTimeAddress();
1477-
});
1437+
ret.bytecode += assemblePushDeployTimeAddress();
14781438
break;
14791439
case Tag:
1480-
assembleInstruction([&](){
1481-
ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
1482-
});
1440+
ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
14831441
break;
14841442
default:
14851443
solAssert(false, "Unexpected opcode while assembling.");
14861444
}
1487-
1488-
++assemblyItemIndex;
14891445
}
14901446

14911447
codeSectionLocation.end = ret.bytecode.size();

0 commit comments

Comments
 (0)