-
Notifications
You must be signed in to change notification settings - Fork 1
/
FlashR.tex
230 lines (188 loc) · 8.07 KB
/
FlashR.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
%% For double-blind review submission
\documentclass[sigplan,10pt]{acmart}\settopmatter{printfolios=true,printccs=false,printacmref=false}
%% For single-blind review submission
%\documentclass[sigplan,10pt,review]{acmart}\settopmatter{printfolios=true}
%% For final camera-ready submission
%\documentclass[sigplan,10pt]{acmart}\settopmatter{}
%% Note: Authors migrating a paper from traditional SIGPLAN
%% proceedings format to PACMPL format should change 'sigplan' to
%% 'acmsmall'.
%% Some recommended packages.
\usepackage{booktabs} %% For formal tables:
%% http://ctan.org/pkg/booktabs
\usepackage{subcaption} %% For complex figures with subfigures/subcaptions
%% http://ctan.org/pkg/subcaption
\usepackage{minted}
\usepackage{color,soul}
%% Some recommended packages.
\usepackage{booktabs} %% For formal tables:
%% http://ctan.org/pkg/booktabs
\usepackage{subcaption} %% For complex figures with subfigures/subcaptions
%% http://ctan.org/pkg/subcaption
\textheight=8.75in
\newcommand{\dz}[1]{{\color{blue}{\it DZ: #1}}}
%% Conference information
%% Supplied to authors by publisher for camera-ready submission;
%% use defaults for review submission.
\startPage{1}
\copyrightyear{2018}
\acmYear{2018}
\setcopyright{acmcopyright}
\acmConference[PPoPP '18]{PPoPP '18: 23nd ACM SIGPLAN Symposium on
Principles and Practice of Parallel Programming}{February 24--28,
2018}{Vienna, Austria}
%\acmBooktitle{PPoPP '18: PPoPP '18: 23nd ACM SIGPLAN Symposium on
% Principles and Practice of Parallel Programming, February 24--28, 2018,
%Vienna, Austria}
\acmPrice{15.00}
\acmDOI{10.1145/3178487.3178501}
\acmISBN{978-1-4503-4982-6/18/02}
%% Copyright information
%% Supplied to authors (based on authors' rights management selection;
%% see authors.acm.org) by publisher for camera-ready submission
%\setcopyright{none} %% For review submission
\setcopyright{acmcopyright}
%\setcopyright{acmlicensed}
%\setcopyright{rightsretained}
%\copyrightyear{2017} %% If different from \acmYear
%% Bibliography style
\bibliographystyle{ACM-Reference-Format}
%% Citation style
%% Note: author/year citations are required for papers published as an
%% issue of PACMPL.
%\citestyle{acmauthoryear} %% For author/year citations
%\citestyle{acmnumeric} %% For numeric citations
%\setcitestyle{nosort} %% With 'acmnumeric', to disable automatic
%% sorting of references within a single citation;
%% e.g., \cite{Smith99,Carpenter05,Baker12}
%% rendered as [14,5,2] rather than [2,5,14].
%\setcitesyle{nocompress} %% With 'acmnumeric', to disable automatic
%% compression of sequential references within a
%% single citation;
%% e.g., \cite{Baker12,Baker14,Baker16}
%% rendered as [2,3,4] rather than [2-4].
\pagenumbering{gobble}
\begin{document}
\title{FlashR: Parallelize and Scale R for Machine Learning using SSDs}
%% Author information
%% Contents and number of authors suppressed with 'anonymous'.
%% Each author should be introduced by \author, followed by
%% \authornote (optional), \orcid (optional), \affiliation, and
%% \email.
%% An author may have multiple affiliations and/or emails; repeat the
%% appropriate command.
%% Many elements are not rendered, but should be provided for metadata
%% extraction tools.
%% Author with single affiliation.
%\author[1]{\rm Da Zheng}
%\author[1]{\rm Disa Mhembere}
%\author[3]{\rm Joshua T. Vogelstein}
%\author[2]{\rm Carey E. Priebe}
%\author[1]{\rm Randal Burns}
%\affil[1]{Department of Computer Science, Johns Hopkins University}
%\affil[2]{Department of Applied Mathematics and Statistics, Johns Hopkins University}
%\affil[3]{Department of Biomedical Engineering, Johns Hopkins University}
\author{Da Zheng}
\authornote{The work is done when the author was at Johns Hopkins University} %% \authornote is optional;
\affiliation{\footnotesize
\institution{Amazon} %% \institution is required
}
\author{Disa Mhembere}
\affiliation{
\institution{\footnotesize Dept. of Computer Science, \\
Johns Hopkins University}
}
\author{Joshua T. Vogelstein}
\affiliation{
\institution{\footnotesize Institute for Computational Medicine, \\
Dept. of Biomedical Engineering, \\
Johns Hopkins University}
}
\author{Carey E. Priebe}
\affiliation{
\institution{\footnotesize Dept. of Applied Math and Statistics, \\
Johns Hopkins University}
}
\author{Randal Burns}
\affiliation{
\institution{\footnotesize Dept. of Computer Science, \\
Johns Hopkins University}
}
\renewcommand{\shortauthors}{D. Zheng, D. Mhembere, J.T. Vogelstein, C.E. Priebe
and R. Burns}
%% Paper note
%% The \thanks command may be used to create a "paper note" ---
%% similar to a title note or an author note, but not explicitly
%% associated with a particular element. It will appear immediately
%% above the permission/copyright statement.
%\thanks{with paper note} %% \thanks is optional
%% can be repeated if necesary
%% contents suppressed with 'anonymous'
%% Abstract
%% Note: \begin{abstract}...\end{abstract} environment must come
%% before \maketitle command
\begin{abstract}
R is one of the most popular programming languages for statistics and machine
learning, but it is slow and unable to scale to large
datasets. The general approach for having an efficient algorithm in R is to
implement it in C or FORTRAN and provide an R wrapper. FlashR accelerates
and scales existing R code by parallelizing
a large number of matrix functions in the R \textit{base} package and
scaling them beyond memory capacity with solid-state drives (SSDs).
FlashR performs memory hierarchy aware execution to speed up parallelized
R code by
\textit{(i)} evaluating matrix operations lazily,
\textit{(ii)} performing all operations in a DAG in a single execution
and with only one pass over data to increase the ratio of computation to I/O,
\textit{(iii)} performing two levels of matrix partitioning and reordering
computation on matrix partitions to reduce data movement in the memory hierarchy.
We evaluate FlashR on various machine learning and statistics algorithms
on inputs of up to four billion data points. Despite the huge performance
gap between SSDs and RAM, FlashR on SSDs closely tracks the performance
of FlashR in memory for many algorithms. The R implementations
in FlashR outperforms H$_2$O and Spark MLlib by a factor of $3-20$.
\end{abstract}
%% 2012 ACM Computing Classification System (CSS) concepts
%% Generate at 'http://dl.acm.org/ccs/ccs.cfm'.
\begin{CCSXML}
<ccs2012>
<concept>
<concept_id>10011007.10011006.10011008</concept_id>
<concept_desc>Software and its engineering~General programming languages</concept_desc>
<concept_significance>500</concept_significance>
</concept>
<concept>
<concept_id>10003456.10003457.10003521.10003525</concept_id>
<concept_desc>Social and professional topics~History of programming languages</concept_desc>
<concept_significance>300</concept_significance>
</concept>
</ccs2012>
\end{CCSXML}
%\ccsdesc[500]{Software and its engineering~General programming languages}
%\ccsdesc[300]{Social and professional topics~History of programming languages}
%% End of generated code
%% Keywords
%% comma separated list
\keywords{R, parallel, machine learning, solid-state drives} %% \keywords is optional
%% \maketitle
%% Note: \maketitle command must come after title commands, author
%% commands, abstract environment, Computing Classification System
%% environment and commands, and keywords command.
\maketitle
\section{Introduction}
\input{intro}
\vspace{-10pt}
\section{Related Work}
\input{relwork}
\input{design}
\input{eval}
\section{Conclusions}
\input{conclusion}
\section{Acknowledgements}
We would like to thank the PPoPP reviewers for their insightful comments.
This work is supported by NSF Grant \# 1649880.
%% Bibliography
{\footnotesize \bibliographystyle{acm}
\bibliography{kdd17}}
%\bibliography{bibfile}
\end{document}