forked from jedbrown/talks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
20110816-Bebop.tex
160 lines (133 loc) · 4.55 KB
/
20110816-Bebop.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
%\documentclass[handout]{beamer}
\documentclass{beamer}
\mode<presentation>
{
\usetheme{default}
\usefonttheme[onlymath]{serif}
%\usetheme{Singapore}
%\usetheme{Warsaw}
%\usetheme{Malmoe}
% \useinnertheme{circles}
% \useoutertheme{infolines}
% \useinnertheme{rounded}
\setbeamercovered{transparent=5}
}
\usepackage[english]{babel}
\usepackage[latin1]{inputenc}
\usepackage{textpos,alltt,listings,multirow,ulem,siunitx}
\newcommand\hmmax{0}
\newcommand\bmmax{0}
\usepackage{bm}
% font definitions, try \usepackage{ae} instead of the following
% three lines if you don't like this look
\usepackage{mathptmx}
\usepackage[scaled=.90]{helvet}
%\usepackage{courier}
\usepackage[T1]{fontenc}
\usepackage{tikz}
\usetikzlibrary[shapes,shapes.arrows,arrows,shapes.misc,fit,positioning]
% \usepackage{pgfpages}
% \pgfpagesuselayout{4 on 1}[a4paper,landscape,border shrink=5mm]
\input{JedMacros.tex}
\title{Writing Assembler in Python}
\subtitle{Making up for in-order execution on PowerPC 450}
\author[Jed Brown]{Aron Ahmadia\inst{1}, {\bf Jed Brown}\inst{2}, Nathan Collier\inst{1}, Tareq Malas\inst{1}, John Gunnels\inst{3}}
% - Use the \inst command only if there are several affiliations.
% - Keep it simple, no one is interested in your street address.
\institute[ANL]
{
\inst{1}{King Abdullah University of Science and Technology}\and
\inst{2}{Argonne National Laboratory} / {ETH Z\"urich}\and
\inst{3}{IBM Watson}
}
\date{BEBOP 2011-07-13}
% This is only inserted into the PDF information catalog. Can be left
% out.
\subject{Talks}
% If you have a file called "university-logo-filename.xxx", where xxx
% is a graphic format that can be processed by latex or pdflatex,
% resp., then you can add a logo as follows:
% \pgfdeclareimage[height=0.5cm]{university-logo}{university-logo-filename}
% \logo{\pgfuseimage{university-logo}}
% Delete this, if you do not want the table of contents to pop up at
% the beginning of each subsection:
% \AtBeginSubsection[]
% {
% \begin{frame}<beamer>
% \frametitle{Outline}
% \tableofcontents[currentsection,currentsubsection]
% \end{frame}
% }
% If you wish to uncover everything in a step-wise fashion, uncomment
% the following command:
%\beamerdefaultoverlayspecification{<+->}
\begin{document}
\lstset{language=C}
\normalem
\begin{frame}
\titlepage
\end{frame}
\input{slides/BlueGeneP.tex}
\input{slides/SimASM/Purpose.tex}
\input{slides/SimASM/Stencil.tex}
\input{slides/SimASM/StencilResults.tex}
\input{slides/SimASM/StencilImplementation.tex}
\input{slides/IGA/Intro.tex}
\begin{frame}{Perspective on SimASM}
\begin{block}{Blue Gene/P is representative of future architectures}
\begin{itemize}
\item In-order execution
\item Longer FP registers
\item More cores
\item Less memory bandwidth
\end{itemize}
\end{block}
\begin{block}{Need some way to get close to peak performance}
\begin{itemize}
\item SSE intrinsics are pretty good on Intel/AMD
\begin{itemize}
\item Better designed intrinsic API
\item Out of order execution more tolerant
\item Fewer registers
\item Lightweight templating (\eg Mako) might be good enough
\end{itemize}
\item Interesting alternatives
\begin{itemize}
\item OpenCL (wide vectorization, different memory model)
\item Intel SPMD Program Compiler (\url{ispc.github.com})
\end{itemize}
\end{itemize}
\end{block}
\end{frame}
\begin{frame}{Outlook}
\begin{block}{Stencils}
\begin{itemize}
\item SIMD instructions are less snazzy with longer registers (\texttt{fxcxma})
\item ``Pure'' stencils are so absurdly limited
\begin{itemize}
\item linear constant coefficient, diffusive, Cartesian mesh
\item storing variable coefficients completely changes the problem \\
(at least an order of magnitude more memory traffic per flop)
\end{itemize}
\end{itemize}
\end{block}
\begin{block}{Lots more to do with IGA/FEM}
\begin{itemize}
\item Library interface for vectorized physics/assembly
\item Connecting structured blocks (T-splines)
\item Algorithmic (analytic Jacobians, preconditioning)
\end{itemize}
\end{block}
\begin{block}{SimASM}
\begin{itemize}
\item Better optimization framework.
\item Different target architectures (\eg Blue Gene/Q, Knight's Corner).
\item Interface improvements/visualization.
\item Code generation from high level/symbolic description?
\item \url{bitbucket.org/ahmadia/simasm}
\end{itemize}
\end{block}
\end{frame}
\input{slides/Dohp/RepresentationOfJacobians.tex}
\input{slides/Dohp/TensorVsAssembly.tex}
\end{document}