diff --git a/build b/build new file mode 100755 index 0000000..cbb4236 --- /dev/null +++ b/build @@ -0,0 +1,219 @@ +#!/bin/bash + +################################################################################ +# Config # +################################################################################ + +# Whether to produce optimized, name-mangled, debugging-unfriendly JS. +OPTIMIZED=0 + +# Which functions to export to the final Module object. +EXPORTS='["_main","_PyRun_SimpleStringFlags"]' + +# Where the LLVM binaries can be found. +LLVM=/home/max/emscripten-workspace/llvm-build/Release/bin/ + +# Which modules to build dynamically. +# Maps each module name to the C files implementing it. +declare -A MODULES +MODULES[array]="arraymodule.c" +MODULES[cmath]="cmathmodule.c _math.c" +MODULES[math]="mathmodule.c _math.c" +MODULES[strop]="stropmodule.c" +MODULES[time]="timemodule.c" +MODULES[datetime]="datetimemodule.c timemodule.c" +MODULES[itertools]="itertoolsmodule.c" +MODULES[future_builtins]="future_builtins.c" +MODULES[_random]="_randommodule.c" +MODULES[_collections]="_collectionsmodule.c" +MODULES[_bisect]="_bisectmodule.c" +MODULES[_heapq]="_heapqmodule.c" +MODULES[operator]="operator.c" +MODULES[_functools]="_functoolsmodule.c" +MODULES[_io]="_io/bufferedio.c _io/bytesio.c _io/fileio.c _io/iobase.c _io/_iomodule.c _io/stringio.c _io/textio.c" +MODULES[_json]="_json.c" +MODULES[_testcapi]="_testcapimodule.c" +MODULES[_hotshot]="_hotshot.c" +MODULES[_lsprof]="_lsprof.c rotatingtree.c" +MODULES[unicodedata]="unicodedata.c" +MODULES[_locale]="_localemodule.c" +MODULES[fcntl]="fcntlmodule.c" +MODULES[grp]="grpmodule.c" +MODULES[spwd]="spwdmodule.c" +MODULES[cStringIO]="cStringIO.c" +MODULES[cPickle]="cPickle.c" +MODULES[mmap]="mmapmodule.c" +MODULES[syslog]="syslogmodule.c" +MODULES[audioop]="audioop.c" +MODULES[imageop]="imageop.c" +MODULES[crypt]="cryptmodule.c" +MODULES[_csv]="_csv.c" +MODULES[_socket]="socketmodule.c" +MODULES[_sha]="shamodule.c" +MODULES[_md5]="md5module.c md5.c" +MODULES[_sha256]="sha256module.c" +MODULES[_sha512]="sha512module.c" +MODULES[gdbm]="gdbmmodule.c" +MODULES[termios]="termios.c" +MODULES[resource]="resource.c" +MODULES[nis]="nismodule.c" +MODULES[_curses]="_cursesmodule.c" +MODULES[_curses_panel]="_curses_panel.c" +MODULES[zlib]="zlibmodule.c" +MODULES[binascii]="binascii.c" +MODULES[bz2]="bz2module.c" +MODULES[_elementtree]="_elementtree.c" +MODULES[_multibytecodec]="cjkcodecs/multibytecodec.c" +MODULES[_codecs_kr]="cjkcodecs/_codecs_kr.c" +MODULES[_codecs_jp]="cjkcodecs/_codecs_jp.c" +MODULES[_codecs_cn]="cjkcodecs/_codecs_cn.c" +MODULES[_codecs_tw]="cjkcodecs/_codecs_tw.c" +MODULES[_codecs_hk]="cjkcodecs/_codecs_hk.c" +MODULES[_codecs_iso2022]="cjkcodecs/_codecs_iso2022.c" +MODULES[dl]="dlmodule.c" +MODULES[pyexpat]="pyexpat.c expat/xmlparse.c expat/xmlrole.c expat/xmltok.c" +MODULES[parser]="parsermodule.c" +MODULES[_struct]="_struct.c" + +# These are baked into the main executable but can also be built separately. +# MODULES[_symtable]="symtablemodule.c" +# MODULES[zipimport]="zipimport.c" + +# These are disabled because they contain inline assembly. +# MODULES[fpectl]="fpectlmodule.c" +# MODULES[select]="selectmodule.c" +# MODULES[linuxaudiodev]="linuxaudiodev.c" +# MODULES[ossaudiodev]="ossaudiodev.c" + +################################################################################ +# Script # +################################################################################ + +# TODO: Make it possible to build only the main python.js, only modules or only +# a specific module. +# TODO: Build the native version too, for comparison during debugging. + +# Exit on first error. +set -e + +# Setup optimization flags for all future emscripting. +if [ $OPTIMIZED -eq 1 ]; then + OPTIMIZATION_ARGS="-s OPTIMIZE=1 -s RELOOP=1 -s ASSERTIONS=0" +else + OPTIMIZATION_ARGS="-s OPTIMIZE=0 -s RELOOP=0 -s ASSERTIONS=1" +fi + +# Create two folders for intermediate and final files, respectively. +mkdir -p obj +mkdir -p dist + +# Remove old build. +rm -rf obj/* +rm -rf dist/* + +# Start building in the obj folder. +cd obj + +# Create the Makefile and configurations using a filtering proxy for llvm-gcc. +CC=../ccproxy.py ../cpython/configure --without-threads --without-pymalloc + +# Adjust configuration. +# Remove the closing endif so we can insert new options. +sed -i 's~#endif /\*Py_PYCONFIG_H\*/~~' pyconfig.h +# Emscripten doesn't support CPU-specific assembly code. +echo '#undef HAVE_GCC_ASM_FOR_X87' >> pyconfig.h +# Emscripten does not support interrupt signals. +echo '#undef HAVE_SIGACTION' >> pyconfig.h +echo '#undef HAVE_SIGINTERRUPT' >> pyconfig.h +echo '#undef HAVE_SIGRELSE' >> pyconfig.h +# Put the closing endif back. +echo '#endif /*Py_PYCONFIG_H*/' >> pyconfig.h + +# Compile CPython. +echo 'Compiling...' +make + +# Link the resulting libraries. +echo 'Relinking...' +mkdir -p relinked +cd relinked +ar x ../libpython2.7.a +cp ../Modules/python.o . +${LLVM}/llvm-link -o=python.bc *.o +cp python.bc .. +cd .. + +# Run LLVM optimizations. +if [ $OPTIMIZED -eq 1 ]; then + echo 'Running LLVM optimizations...' + ${LLVM}/opt -O3 -o python.opt.bc python.bc +else + cp python.bc python.opt.bc +fi + +# Compile the assembly into JS using Emscripten. +echo 'Emscripting...' +# TODO: Add -m/--dlmalloc once it's debugged. +python2 ../emscripten/emscripten.py python.opt.bc -o python.js \ + -s INCLUDE_FULL_LIBRARY=1 -s EXPORTED_FUNCTIONS=$EXPORTS \ + $OPTIMIZATION_ARGS + +# Copy the main Python JS executable to dist. +cp python.js ../dist/python.js + +# Copy the pure Python modules to dist. +# TODO: Remove unnecessary files and folders. +cp -r ../cpython/Lib/* ../dist + +# Create a dummy modules list. +mkdir -p ../dist/Modules +touch ../dist/Modules/Setup + +# Build dynamically loaded modules. +echo 'Building dynamic modules...' +function build_module { + echo "Building $1..." + + # Compile. + FLAGS="-c -emit-llvm -fPIC -fno-strict-aliasing -I. -IInclude -I../cpython/Include -Wstrict-prototypes" + FILES="" + for ((i=2; i<=$#; i++)); do + ../ccproxy.py $FLAGS ../cpython/Modules/${!i} -o `basename ${!i}`.o + FILES="$FILES `basename ${!i}`.o" + done + + # Link. + ../ccproxy.py $FILES -o $1.so.bc + + # Optimize. + if [ $OPTIMIZED -eq 1 ]; then + ${LLVM}/opt -O3 -o $1.so.opt.bc $1.so.bc + else + cp $1.so.bc $1.so.opt.bc + fi + + # Emscript. + python2 ../emscripten/emscripten.py $1.so.opt.bc -o $1.so.js \ + -s BUILD_AS_SHARED_LIB=1 -s EXPORTED_FUNCTIONS="[\"_init$1\"]" \ + $OPTIMIZATION_ARGS + + # Copy the new module to dist. + cp $1.so.js ../dist/$1.so.js +} +for i in "${!MODULES[@]}"; do + build_module $i ${MODULES[$i]} +done + +# Create virtual file system entries. +echo 'Mapping virtual filesystem.' +python2 ../map_filesystem.py ../dist ../dist/python.js + +# TODO: Optimize and compress the resulting JS using Closure. +# Below is an old version that doesn't work right now. +# if [ $OPTIMIZED -eq 1 ]; then +# echo 'Running closure...' +# closure --compilation_level ADVANCED_OPTIMIZATIONS --variable_map_output_file python.js.vars --js python.js --js_output_file python.opt.js +# else +# cp ../dist/python.js ../dist/python.opt.js +# touch ../dist/python.js.vars +# fi diff --git a/ccproxy.py b/ccproxy.py new file mode 100755 index 0000000..f2d8407 --- /dev/null +++ b/ccproxy.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python2 + +from __future__ import with_statement + +import os +import subprocess +import shutil +import sys + +# LLVM commands. +# TODO: Factor out to be configurable from ./build. +CC = '/home/max/emscripten-workspace/llvm-gcc-install/bin/llvm-gcc' +LINK = '/home/max/emscripten-workspace/llvm-build/Release/bin/llvm-link' +LLC = '/home/max/emscripten-workspace/llvm-build/Release/bin/llc' + +# Argument filters. +SKIPPED_CC_ARGS = [ + # Don't run optimizations prematurely. + '-O1', + '-O2', + '-O3', +] +ADDITIONAL_CC_ARGS = [ + # Compile for the most basic posible x86 machine. + '-m32', + # Compile with debugging info, so emscripten can easily access struct members. + '-g', + # Avoid architecture-specific optimizations. + '-U__i386__', + '-U__x86_64__', + '-U__SSE__', + # Our doubles are funky. Don't rely on accuracy. + '-UX87_DOUBLE_ROUNDING', + # We can't run inline assembly. + '-UHAVE_GCC_ASM_FOR_X87', + # Use plain old utime() instead of utimes(). + '-UHAVE_UTIMES', + # Tell expat we actually have MEMMOVE, despite its belief in the opposite. + '-DHAVE_MEMMOVE', + # Tell elementtree that we have expat available. + '-DUSE_PYEXPAT_CAPI', +] +ALLOWED_LINK_ARGS = [ + # Don't allow linking to external libraries. + '-f', + '-help', + '-o', + '-print-after', + '-print-after-all', + '-print-before', + '-print-before-all', + '-time-passes', + '-v', + '-verify-dom-info', + '-version', +] + +# Determine whether we want to call the linker or the compiler. +call = LINK +for arg in sys.argv[1:]: + if arg == '--version' or (not arg.startswith('-') and arg.endswith('.c')): + call = CC + break + +# Filter the args. +if call == CC: + newargs = [arg for arg in sys.argv[1:] if arg not in SKIPPED_CC_ARGS] + newargs += ADDITIONAL_CC_ARGS + if 'conftest.c' not in newargs: + # For real files (rather than tests), we want to compile to LLVM bytecode. + newargs.append('-emit-llvm') + newargs.append('-c') +else: + def isArgAllowed(arg): + return not arg.startswith('-') or arg.split('=')[0] in ALLOWED_LINK_ARGS + newargs = [arg for arg in sys.argv[1:] if isArgAllowed(arg)] + +# Run the linker or compiler. +with open('ccproxy.log', 'a') as log: + log.write('## Called with %s\n' % ' '.join(sys.argv)) + if any(i.startswith('-print-prog-name') for i in sys.argv): + # Redirect any program name queries to us. + print sys.argv[0] + ret = 0 + elif call == LINK and 'libpython2.7.a' in sys.argv: + # We don't care about the final Python binary. Create a fake. + fake_python_file = open('python', 'w') + fake_python_file.write('#!/bin/bash\n') + fake_python_file.close() + ret = subprocess.call(['chmod', '+x', 'python']) + else: + log.write('** Calling %s %s\n\n' % (call, ' '.join(newargs))) + ret = subprocess.call([call] + newargs) + if call == LINK and 'Parser/pgen' in sys.argv: + # We want to compile the parser generator to native code. + target = 'Parser/pgen' + subprocess.call([LLC, '--filetype=obj', target, '-o', target + '.tmp.o']) + subprocess.call([CC, '-m32', target + '.tmp.o', '-o', target]) + os.unlink(target + '.tmp.o') + # Pass the subprocess result to the caller. + sys.exit(ret) diff --git a/map_filesystem.py b/map_filesystem.py new file mode 100755 index 0000000..b56edac --- /dev/null +++ b/map_filesystem.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python2 + +import os +import sys + +def main(root, main_filename): + os.chdir(root) + commands = [] + for (dirpath, dirnames, filenames) in os.walk('.'): + for folder in dirnames: + commands.append('FS.createFolder("%s", "%s", true, true);' % + (dirpath, folder)) + for filename in filenames: + jsless_filename = filename[:-3] if filename.endswith('.js') else filename + commands.append('FS.createLazyFile("%s", "%s", "%s", true, false);' % + (dirpath, jsless_filename, filename)) + + infile = open(main_filename, 'r') + src = infile.read().replace('// {{PRE_RUN_ADDITIONS}}', '\n'.join(commands)) + infile.close() + + outfile = open(main_filename, 'w') + outfile.write(src) + outfile.close() + +if __name__ == '__main__': + if len(sys.argv) != 3: + print 'Usage: %s root outfile' % sys.argv[0] + else: + main(sys.argv[1], sys.argv[2])