forked from hiraditya/profile-guided-opt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
slides.tex
146 lines (128 loc) · 4.72 KB
/
slides.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
\documentclass{beamer}
\usepackage{hyperref}
\begin{document}
\title{Profile guided optimization using open source tools}
\author{Sebastian Pop and Aditya Kumar}
\institute{SARC: Samsung Austin R\&D Center}
%\date{May 5, 2015}
\date{\today}
\frame{\titlepage}
\frame{\frametitle{Profile guided optimization (Introduction)}
\begin{itemize}
\item Iterative approach to program optimization.
\begin{itemize}
\item Compile $-->$ Run + Collect Profile $-->$ Process Profile data $-->$ Compile with profile info. $-->$ Run
\end{itemize}
\item Intrusive approach (PGO)
\item Non-intrusive approach (AutoFDO)
\end{itemize}
}
\frame{\frametitle{Open source tools}
\begin{itemize}
\item Compiler: gcc, llvm
\item Gprof
\item Linux Perf
\item Create\_gcov
\item Gooda
\end{itemize}
}
\frame{\frametitle{PGO: Compiler instrumentation}
\begin{itemize}
\item Compiler inserts probes in the code.
\item Instrumented program collects profile as it executes.
\item Gives very precise profile information.
\item Good for benchmarking, analyzing small programs.
\item gprof/gcov used to process sampled output and display.
\end{itemize}
}
\frame{\frametitle{PGO: Compiler instrumentation (Useful Commands)}
gprof:
\begin{itemize}
\item gcc -fprofile-generate test.cpp
\item gprof [exe] [gmon.out] $>$ [outfile]
\end{itemize}
gcov:
\begin{itemize}
\item gcc -fprofile-arcs -ftest-coverage test.cpp
\item gcov [options] test.cpp
\end{itemize}
}
\frame{\frametitle{AutoFDO: Non Intrusive}
\begin{itemize}
\item Linux perf collects profiles (Sample based)
\item No compiler instrumentation, debug-info required while running.
\item Negligible overhead (1-2\%) [Google]
\item Run on different machines and at different time intervals.
\item Good for system wide profiling.
\end{itemize}
}
\frame{\frametitle{AutoFDO: Non Intrusive (Useful Commands)}
\begin{itemize}
\item perf record/report/stat
\item collecting profile: perf record -b [exe] [-o perf.data]
\item report: perf report [perf.data]
\end{itemize}
}
\frame{\frametitle{Results [Baptiste Wicht et al.]}
\begin{tabular}{| l | p {2.5cm} | p {3cm} | }
\hline
PGO Kind & Avg. Run Time Overhead (\%) & Performance Improvement (\%) \\ \hline
Instrumented & 16 (highest 53) & 7 to 14 \\ \hline
AutoFDO & 1 to 2 & 5 to 10 \\ \hline
AutoFDO+LTO & 1 to 2 & 10 to 30 \\ \hline
\end{tabular}
}
\frame{\frametitle{Challenges for non-intrusive AutoFDO}
\begin{itemize}
\item Value based profiling not available.
\item Only works on recent Intel machines.
\item Tradeoff between accuracy of profile and overhead.
\item Goal is to be as close to the instrumented profile.
\end{itemize}
}
\frame{\frametitle{Challenges for non-intrusive AutoFDO...}
\begin{itemize}
\item Calculating missing edge/basic-block frequencies based on Equivalence class.
\item Calculating accurate profile of execution paths sharing single line of code based on DWARF discriminator [Baptiste Wicht et al.].
\end{itemize}
}
\frame{\frametitle{create\_gcov}
\begin{itemize}
\item Creates coverage file from perf.data to be used by the compiler.
\item optimize with profile information: gcc -fauto-profile=file.gcov -O2 test.cpp
\item Source: https://github.com/google/autofdo
\end{itemize}
}
\frame{\frametitle{gooda}
\begin{itemize}
\item gooda-analyzer
\item gooda-visualizer
\item Source: https://github.com/David-Levinthal/gooda
\item Requires patching linux perf to link against libpfm4 [See: gooda-analyzer/README]
\item Requires LBR (Available in Intel Sandy Bridge, Ivy Bridge, Westmere)
\end{itemize}
}
\frame{\frametitle{Interesting Areas}
Tools related:
\begin{itemize}
\item Extend PGO infrastructure in gcc/llvm:
currently PGO benefits inliner, code layout, register allocation.
\item Compiler cost model based on profile to enable demand driven optimization.
\item Online optimizations driven by hardware performance monitoring [Schneider et al.].
\end{itemize}
Architecture specific:
\begin{itemize}
\item No AutoFDO tool for generating value profiles.
\item No LBR for ARM.
\end{itemize}
}
\frame{\frametitle{References}
\begin{itemize}
\item \url{http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36575.pdf}
\item Baptiste Wicht et al. ``Hardware Counted Profile-Guided Optimization.''
\item \url{http://www.burningcutlery.com/derek/docs/instant-profiling-CGO13.pdf}
\item \url{https://github.com/David-Levinthal/gooda}
\item \url{https://github.com/google/autofdo}
\end{itemize}
}
\end{document}