-
Notifications
You must be signed in to change notification settings - Fork 1
/
index.html
339 lines (260 loc) · 59.4 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
<!DOCTYPE html><html lang="zh-CN" data-theme="light"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1"><title>ZeroRains Blog - No matter what happens, I will do my best.</title><meta name="author" content="zerorains,[email protected]"><meta name="copyright" content="zerorains"><meta name="format-detection" content="telephone=no"><meta name="theme-color" content="#ffffff"><meta name="description" content="No matter what happens, I will do my best.">
<meta property="og:type" content="website">
<meta property="og:title" content="ZeroRains Blog">
<meta property="og:url" content="http://blog.zerorains.top/index.html">
<meta property="og:site_name" content="ZeroRains Blog">
<meta property="og:description" content="No matter what happens, I will do my best.">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="http://blog.zerorains.top/assets/apple-touch-icon.png">
<meta property="article:author" content="zerorains">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="http://blog.zerorains.top/assets/apple-touch-icon.png"><link rel="shortcut icon" href="/assets/favicon.ico"><link rel="canonical" href="http://blog.zerorains.top/"><link rel="preconnect" href="//cdn.jsdelivr.net"/><link rel="preconnect" href="//busuanzi.ibruce.info"/><link rel="stylesheet" href="/css/index.css"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free/css/all.min.css" media="print" onload="this.media='all'"><script>const GLOBAL_CONFIG = {
root: '/',
algolia: undefined,
localSearch: {"path":"search.xml","languages":{"hits_empty":"找不到您查询的内容:${query}"}},
translate: undefined,
noticeOutdate: undefined,
highlight: {"plugin":"highlighjs","highlightCopy":true,"highlightLang":true,"highlightHeightLimit":false},
copy: {
success: '复制成功',
error: '复制错误',
noSupport: '浏览器不支持'
},
relativeDate: {
homepage: false,
post: false
},
runtime: '天',
date_suffix: {
just: '刚刚',
min: '分钟前',
hour: '小时前',
day: '天前',
month: '个月前'
},
copyright: undefined,
lightbox: 'fancybox',
Snackbar: undefined,
source: {
jQuery: 'https://cdn.jsdelivr.net/npm/jquery@latest/dist/jquery.min.js',
justifiedGallery: {
js: 'https://cdn.jsdelivr.net/npm/justifiedGallery/dist/js/jquery.justifiedGallery.min.js',
css: 'https://cdn.jsdelivr.net/npm/justifiedGallery/dist/css/justifiedGallery.min.css'
},
fancybox: {
js: 'https://cdn.jsdelivr.net/npm/@fancyapps/fancybox@latest/dist/jquery.fancybox.min.js',
css: 'https://cdn.jsdelivr.net/npm/@fancyapps/fancybox@latest/dist/jquery.fancybox.min.css'
}
},
isPhotoFigcaption: false,
islazyload: false,
isanchor: false
}</script><script id="config-diff">var GLOBAL_CONFIG_SITE = {
isPost: false,
isHome: true,
isHighlightShrink: false,
isToc: false,
postUpdate: '2023-05-01 15:47:18'
}</script><noscript><style type="text/css">
#nav {
opacity: 1
}
.justified-gallery img {
opacity: 1
}
#recent-posts time,
#post-meta time {
display: inline !important
}
</style></noscript><script>(win=>{
win.saveToLocal = {
set: function setWithExpiry(key, value, ttl) {
if (ttl === 0) return
const now = new Date()
const expiryDay = ttl * 86400000
const item = {
value: value,
expiry: now.getTime() + expiryDay,
}
localStorage.setItem(key, JSON.stringify(item))
},
get: function getWithExpiry(key) {
const itemStr = localStorage.getItem(key)
if (!itemStr) {
return undefined
}
const item = JSON.parse(itemStr)
const now = new Date()
if (now.getTime() > item.expiry) {
localStorage.removeItem(key)
return undefined
}
return item.value
}
}
win.getScript = url => new Promise((resolve, reject) => {
const script = document.createElement('script')
script.src = url
script.async = true
script.onerror = reject
script.onload = script.onreadystatechange = function() {
const loadState = this.readyState
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
script.onload = script.onreadystatechange = null
resolve()
}
document.head.appendChild(script)
})
win.activateDarkMode = function () {
document.documentElement.setAttribute('data-theme', 'dark')
if (document.querySelector('meta[name="theme-color"]') !== null) {
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#0d0d0d')
}
}
win.activateLightMode = function () {
document.documentElement.setAttribute('data-theme', 'light')
if (document.querySelector('meta[name="theme-color"]') !== null) {
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#ffffff')
}
}
const t = saveToLocal.get('theme')
if (t === 'dark') activateDarkMode()
else if (t === 'light') activateLightMode()
const asideStatus = saveToLocal.get('aside-status')
if (asideStatus !== undefined) {
if (asideStatus === 'hide') {
document.documentElement.classList.add('hide-aside')
} else {
document.documentElement.classList.remove('hide-aside')
}
}
})(window)</script><meta name="generator" content="Hexo 5.4.0"></head><body><div id="web_bg"></div><div id="sidebar"><div id="menu-mask"></div><div id="sidebar-menus"><div class="author-avatar"><img class="avatar-img" src="/assets/apple-touch-icon.png" onerror="onerror=null;src='/img/friend_404.gif'" alt="avatar"/></div><div class="site-data"><div class="data-item is-center"><div class="data-item-link"><a href="/archives/"><div class="headline">文章</div><div class="length-num">90</div></a></div></div><div class="data-item is-center"><div class="data-item-link"><a href="/tags/"><div class="headline">标签</div><div class="length-num">101</div></a></div></div><div class="data-item is-center"><div class="data-item-link"><a href="/categories/"><div class="headline">分类</div><div class="length-num">13</div></a></div></div></div><hr/><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> 时间轴</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> 标签</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> 分类</span></a></div><div class="menus_item"><a class="site-page" href="/drink/"><i class="fa-fw fas fa-mug-hot"></i><span> 请我喝茶</span></a></div><div class="menus_item"><a class="site-page" target="_blank" rel="noopener" href="https://ml.akasaki.space/"><i class="fa-fw fas fa-link"></i><span> DL笔记</span></a></div><div class="menus_item"><a class="site-page" target="_blank" rel="noopener" href="https://notebook.therainisme.com/"><i class="fa-fw fas fa-link"></i><span> 急救箱</span></a></div></div></div></div><div class="page" id="body-wrap"><header class="full_page" id="page-header" style="background-image: url('/img/head_background.png')"><nav id="nav"><span id="blog_name"><a id="site-name" href="/">ZeroRains Blog</a></span><div id="menus"><div id="search-button"><a class="site-page social-icon search"><i class="fas fa-search fa-fw"></i><span> 搜索</span></a></div><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> 时间轴</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> 标签</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> 分类</span></a></div><div class="menus_item"><a class="site-page" href="/drink/"><i class="fa-fw fas fa-mug-hot"></i><span> 请我喝茶</span></a></div><div class="menus_item"><a class="site-page" target="_blank" rel="noopener" href="https://ml.akasaki.space/"><i class="fa-fw fas fa-link"></i><span> DL笔记</span></a></div><div class="menus_item"><a class="site-page" target="_blank" rel="noopener" href="https://notebook.therainisme.com/"><i class="fa-fw fas fa-link"></i><span> 急救箱</span></a></div></div><div id="toggle-menu"><a class="site-page"><i class="fas fa-bars fa-fw"></i></a></div></div></nav><div id="site-info"><h1 id="site-title">ZeroRains Blog</h1><div id="site-subtitle"><span id="subtitle"></span></div><div id="site_social_icons"><a class="social-icon" href="tencent://message?uin=2274033547" target="_blank" title="qq"><i class="fab fa-qq"></i></a><a class="social-icon" href="https://github.com/zeroRains" target="_blank" title="Github"><i class="fab fa-github"></i></a><a class="social-icon" href="mailto:[email protected]" target="_blank" title="Email"><i class="fas fa-envelope"></i></a><a class="social-icon" href="https://blog.csdn.net/kiminoamae?spm=1000.2115.3001.5343" target="_blank" title="csdn"><i class="fab fa-cuttlefish"></i></a></div></div><div id="scroll-down"><i class="fas fa-angle-down scroll-down-effects"></i></div></header><main class="layout" id="content-inner"><div class="recent-posts" id="recent-posts"><div class="recent-post-item"><div class="post_cover left_radius"><a href="/2023/04/26/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DPiPAD-Pipelined-and-Parallel-Dynamic-GNN-Training-on-GPUs/" title="「论文笔记」PiPAD: Pipelined and Parallel Dynamic GNN Training on GPUs"> <img class="post_bg" src="/img/16.jpg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」PiPAD: Pipelined and Parallel Dynamic GNN Training on GPUs"></a></div><div class="recent-post-info"><a class="article-title" href="/2023/04/26/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DPiPAD-Pipelined-and-Parallel-Dynamic-GNN-Training-on-GPUs/" title="「论文笔记」PiPAD: Pipelined and Parallel Dynamic GNN Training on GPUs">「论文笔记」PiPAD: Pipelined and Parallel Dynamic GNN Training on GPUs</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2023-04-26T05:34:14.000Z" title="发表于 2023-04-26 13:34:14">2023-04-26</time></span><span class="article-meta"><span class="article-meta__separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/GPU%E5%8A%A0%E9%80%9F/">GPU加速</a></span><span class="article-meta tags"><span class="article-meta__separator">|</span><i class="fas fa-tag"></i><a class="article-meta__tags" href="/tags/%E6%B5%81%E6%B0%B4%E5%B9%B6%E8%A1%8C/">流水并行</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/DGNN%E8%AE%AD%E7%BB%83%E5%8A%A0%E9%80%9F/">DGNN训练加速</a></span></div><div class="content">简要
背景:
图神经网络被广泛应用处理图结构的数据并用于提取多种图相关应用的依赖关系
GCN是在图(聚合)和神经(更新)操作中普遍使用的方法。
图的拓扑结构和节点特征可能随着时间的推移而动态发展,由此衍生出动态图。使用GNN的方法可以从动态图中捕获时间和结构信息。根据分区原则和粒度,可以将动态图分为
连续时间动态图(CTDG)
离散时间动态图(DTDG):对图进行定期采样的快照序列。一般处理方法是使用静态GNN在所有时间步长上的单个快照进行空间图学习。同时部署RNN来获取不同快照之间的时间特征(本文主要关注DTDG-base的DGNN)
滑动窗机制可以将多个连续快照同时输入到模型中,这种方式被广泛用于捕获更好的时间依赖并提高准确率。(本文将每一个滑动窗称为帧)
问题:
检索稀疏邻接矩阵通常伴随着不规则的内存访问和稀疏计算,SpMM-like的聚合操作通常被认为是GNN的主要瓶颈。
DGNN引入时间系列组件处理大量的快照,导致了两个主要为的性能问题:
DGNN训练时要沿着时间轴不断更新图快照,数据传输时间占据了总体训练时间的大部分,进一步加剧了内存密集型的聚合操作产生的GP ...</div></div></div><div class="recent-post-item"><div class="post_cover right_radius"><a href="/2023/04/04/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DEkko-A-Large-Scale-Deep-Learning-Recommender-System-with-Low-Latency-Model-Update/" title="「论文笔记」Ekko: A Large-Scale Deep Learning Recommender System with Low-Latency Model Update"> <img class="post_bg" src="/img/23.jpg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」Ekko: A Large-Scale Deep Learning Recommender System with Low-Latency Model Update"></a></div><div class="recent-post-info"><a class="article-title" href="/2023/04/04/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DEkko-A-Large-Scale-Deep-Learning-Recommender-System-with-Low-Latency-Model-Update/" title="「论文笔记」Ekko: A Large-Scale Deep Learning Recommender System with Low-Latency Model Update">「论文笔记」Ekko: A Large-Scale Deep Learning Recommender System with Low-Latency Model Update</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2023-04-04T03:24:12.000Z" title="发表于 2023-04-04 11:24:12">2023-04-04</time></span><span class="article-meta"><span class="article-meta__separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a></span><span class="article-meta tags"><span class="article-meta__separator">|</span><i class="fas fa-tag"></i><a class="article-meta__tags" href="/tags/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/%E6%8E%A8%E8%8D%90%E7%B3%BB%E7%BB%9F%E5%B5%8C%E5%85%A5%E6%A8%A1%E5%9E%8B%E5%8A%A0%E9%80%9F/">推荐系统嵌入模型加速</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/%E6%8E%A8%E8%8D%90%E6%A8%A1%E5%9E%8B%E6%9B%B4%E6%96%B0%E8%B0%83%E5%BA%A6%E5%99%A8/">推荐模型更新调度器</a></span></div><div class="content">简要
问题:
DLRS(深度学习推荐系统)通常采用参数服务器的方式对模型参数进行保存(与客户端容错以及低延迟的通信),这些参数服务器通常保存在不同地理位置的数据中心中。DLRS离线训练/验证模型,并将这个模型传输到全局的推理集群中,因此,他们会导致显著的模型更新延迟(例如几十分钟),这对服务级别对象(SLO)会产生不利的影响。
现有的系统(Merlin,TFRA,Check-N-Run,BigGraph)基本都使用了局部采集数据和懒同步复制的方式更新复制的模型。然而,懒同步复制引入了非常关键的异步操作,这个异步操作通常会对SLO的性能有不好的影响。
Idea:允许服务器在线更新模型,并立即将模型更新传播到所有推理集群中。通过绕过长延迟的更新步骤(离线训练,模型检查,验证和广播)减少模型更新的延迟。其需要解决下面三个问题:
如何在带宽有限,网络路径异构的广域网上有效地传播海量模型进行更新
如何保护SLO面授网络拥塞的影响,从而延后重要的更新
如何保护SLO免受有损于模型精度的有偏差的模型更新
方案:Ekko——新颖的大规模DLRS,其可以低延迟地全局地更新复制模型,其从下面两个角 ...</div></div></div><div class="recent-post-item"><div class="post_cover left_radius"><a href="/2023/03/27/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DDeepRecSys-A-System-for-Optimizing-End-To-End-At-Scale-Neural-Recommendation-Inference/" title="「论文笔记」DeepRecSys: A System for Optimizing End-To-End At-Scale Neural Recommendation Inference"> <img class="post_bg" src="/img/4.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」DeepRecSys: A System for Optimizing End-To-End At-Scale Neural Recommendation Inference"></a></div><div class="recent-post-info"><a class="article-title" href="/2023/03/27/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DDeepRecSys-A-System-for-Optimizing-End-To-End-At-Scale-Neural-Recommendation-Inference/" title="「论文笔记」DeepRecSys: A System for Optimizing End-To-End At-Scale Neural Recommendation Inference">「论文笔记」DeepRecSys: A System for Optimizing End-To-End At-Scale Neural Recommendation Inference</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2023-03-27T02:30:28.000Z" title="发表于 2023-03-27 10:30:28">2023-03-27</time></span><span class="article-meta"><span class="article-meta__separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a></span><span class="article-meta tags"><span class="article-meta__separator">|</span><i class="fas fa-tag"></i><a class="article-meta__tags" href="/tags/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/%E6%8E%A8%E8%8D%90%E7%B3%BB%E7%BB%9F%E5%B5%8C%E5%85%A5%E6%A8%A1%E5%9E%8B%E5%8A%A0%E9%80%9F/">推荐系统嵌入模型加速</a></span></div><div class="content">简要
现象or问题:在Facebook的数据中心中,超过百分之80的机器学习推理周期都用于推荐和排名推理。
不同的模型表现出对计算量和内存访问量不同的特性,图中左边黄色部分的模型更多倾向于计算密集型。
从图中右边展示了不同推荐模型,密集特征处理过程通常伴随着规则的内存访问,而稀疏特征则是不规则的内存访问。
推荐系统的查询工作集大小遵循一个明显的分布具有较重的尾部效应,这种查询大小分布的差异,导致了不同的规模推理优化策略。
方案:基于hill-climbing的调度器——DeepRecSched——根据查询大小和到达的模式,推荐模型和硬件底层平台将查询分成小批次。DeepRecSched在严格的尾部延迟目标下,通过权衡请求和批处理级并行性,最大化系统负载。
同时设计了DeepRecInfra(端到端的基础设施,用于研究查询大小和到达模式的大规模影响)。
贡献:
设计了一个端到端的基础设施——DeepRecInfra,能够通过多种推荐模型进行系统设计和优化。其结合了在产品数据中心上观测到的查询到达模式以及大小分布。
提出DeepRecSched——首个批处理调度器,考虑 ...</div></div></div><div class="recent-post-item"><div class="post_cover right_radius"><a href="/2023/03/25/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DFleche-An-Efficient-GPU-Embedding-Cache-for-Personalized-Recommendations/" title="「论文笔记」Fleche: An Efficient GPU Embedding Cache for Personalized Recommendations"> <img class="post_bg" src="/img/27.jpg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」Fleche: An Efficient GPU Embedding Cache for Personalized Recommendations"></a></div><div class="recent-post-info"><a class="article-title" href="/2023/03/25/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DFleche-An-Efficient-GPU-Embedding-Cache-for-Personalized-Recommendations/" title="「论文笔记」Fleche: An Efficient GPU Embedding Cache for Personalized Recommendations">「论文笔记」Fleche: An Efficient GPU Embedding Cache for Personalized Recommendations</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2023-03-25T04:31:59.000Z" title="发表于 2023-03-25 12:31:59">2023-03-25</time></span><span class="article-meta"><span class="article-meta__separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a></span><span class="article-meta tags"><span class="article-meta__separator">|</span><i class="fas fa-tag"></i><a class="article-meta__tags" href="/tags/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/%E6%8E%A8%E8%8D%90%E7%B3%BB%E7%BB%9F%E5%B5%8C%E5%85%A5%E6%A8%A1%E5%9E%8B%E5%8A%A0%E9%80%9F/">推荐系统嵌入模型加速</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/GPU%E7%BC%93%E5%AD%98%E6%9C%BA%E5%88%B6/">GPU缓存机制</a></span></div><div class="content">简要
问题:
CPU端的DRAM数据访问与GPU处理之间的差距成为深度embedding模型推理性能的阻碍,用GPU-resident cache缓存embedding表可以解决这个差距,但是这个方案并没有被完全探索。
现有的缓存方案为每个embedding表维护一个固定大小的cache表,防止在表重分区期间发生大量的数据移动。这样的设计有两个重要的影响因素:缓存利用率低(静态单表缓存的结构缺陷)和内核维护开销(过多的小缓存查询内核)
方案:FLECHE——用于有效GPU-resident embedding caching的新颖缓存机制
主要思想:联合设计缓存结构和工作流,从而促进缓存利用率并减少缓存查询时间。
结构:GPU-HBM层用于缓存hot embedding,CPU-DRAM层存储所有的embedding(这里只考虑了模型能完全放入内存中的情况,大型模型会另外讨论)
为了解决缓存利用率低的问题,提出了flat cache(FC)。其通过将所有的embedding表输入id重新编码为统一格式的flat key,让所有embedding table共享一个全局缓存后端。此 ...</div></div></div><div class="recent-post-item"><div class="post_cover left_radius"><a href="/2023/03/21/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DJiZhi-A-Fast-and-Cost-Eective-Model-As-A-Service-System-for-Web-Scale-Online-Inference-at-Baidu/" title="「论文笔记」JiZhi: A Fast and Cost-Eective Model-As-A-Service System for Web-Scale Online Inference at Baidu"> <img class="post_bg" src="/img/26.jpg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」JiZhi: A Fast and Cost-Eective Model-As-A-Service System for Web-Scale Online Inference at Baidu"></a></div><div class="recent-post-info"><a class="article-title" href="/2023/03/21/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DJiZhi-A-Fast-and-Cost-Eective-Model-As-A-Service-System-for-Web-Scale-Online-Inference-at-Baidu/" title="「论文笔记」JiZhi: A Fast and Cost-Eective Model-As-A-Service System for Web-Scale Online Inference at Baidu">「论文笔记」JiZhi: A Fast and Cost-Eective Model-As-A-Service System for Web-Scale Online Inference at Baidu</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2023-03-21T05:11:19.000Z" title="发表于 2023-03-21 13:11:19">2023-03-21</time></span><span class="article-meta"><span class="article-meta__separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a></span><span class="article-meta tags"><span class="article-meta__separator">|</span><i class="fas fa-tag"></i><a class="article-meta__tags" href="/tags/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/%E6%8E%A8%E8%8D%90%E7%B3%BB%E7%BB%9F%E5%B5%8C%E5%85%A5%E6%A8%A1%E5%9E%8B%E5%8A%A0%E9%80%9F/">推荐系统嵌入模型加速</a></span></div><div class="content">简要
问题:面对数十亿用户的时变网络规模数据,以经济有效的方式将训练有素的深度模型用于在线实时推理服务仍然具有挑战性。
使用DNN进行网页规模的实时在线推理的成本成为大多数公司采用该技术的主要负担
在线服务的时间消耗对用户体验至关重要,会影响长期留存率
支持DNN的资源消耗将要求大量的服务基础设施投资,并且有时会使系统设计,实现和运行超出预算。
使用GPU,TensorRT等方法可以有效境地DNN的计算延迟,但是这些技术主要针对单个DNN的训练和推理进行优化,忽略了时变网络传输下的在线推理的负载数据和计算依赖关系。因此,实时网页规模的DNN推理服务主要有下面的挑战:
巨大且稀疏的DNN模型:需要巨大的空间对他们进行存储,并且检索转换和计算也是比较大的麻烦
时变网络尺度传输:推荐系统的推荐请求并发量通常是非常大的
多样化的推荐场景:推荐输入可以是图片,文字以及搜索引擎中的用户喜好。
方案:JIZHI——用于网络规模在线推理服务的Model-as-a-Service系统。
Staged Event-Driven Pipeline(SEDP):基于深度学习的推荐推理工作流都会转化成 ...</div></div></div><div class="recent-post-item"><div class="post_cover right_radius"><a href="/2023/03/17/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DPetPS-Supporting-Huge-Embedding-Models-with-Persistent-Memory/" title="「论文笔记」PetPS: Supporting Huge Embedding Models with Persistent Memory"> <img class="post_bg" src="/img/17.jpg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」PetPS: Supporting Huge Embedding Models with Persistent Memory"></a></div><div class="recent-post-info"><a class="article-title" href="/2023/03/17/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DPetPS-Supporting-Huge-Embedding-Models-with-Persistent-Memory/" title="「论文笔记」PetPS: Supporting Huge Embedding Models with Persistent Memory">「论文笔记」PetPS: Supporting Huge Embedding Models with Persistent Memory</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2023-03-17T12:51:58.000Z" title="发表于 2023-03-17 20:51:58">2023-03-17</time></span><span class="article-meta"><span class="article-meta__separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a></span><span class="article-meta tags"><span class="article-meta__separator">|</span><i class="fas fa-tag"></i><a class="article-meta__tags" href="/tags/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/%E6%8E%A8%E8%8D%90%E7%B3%BB%E7%BB%9F%E5%B5%8C%E5%85%A5%E6%A8%A1%E5%9E%8B%E5%8A%A0%E9%80%9F/">推荐系统嵌入模型加速</a></span></div><div class="content">简要
问题:随着embedding模型容量的不断增大,使用传统的参数服务器的存储成本高,在遇到宕机后重启的回复时间也很长。于是考虑使用持久性内存来拓展内存容量,但是持久性内存也面临两个挑战:高读取延迟和较重的CPU负担。
使用DRAM的参数服务器存储大型模型有两方面的缺点
造成了比较高的存储开销:DRAM是昂贵的媒介,同时DRAM几乎占整个系统功耗的一半
在参数服务器宕机后,将参数重新加载到DRAM中需要相当长的恢复时间,违反了在线推理的服务级协议(SLA)
持久化内存(也称非易失内存,PM)也支持像DRAM字节地址,但多提供了8倍的容量,同时提供了数据持久化的功能,并且可以提供更快的恢复和更少的停机时间。但仍然存在两个挑战
较高的持久化内存读取延迟(PM的延迟比DRAM高3倍)
较高的CPU负担(在和DRAM相同的CPU下,但是要处理8倍的数据空间)
方案:PetPS(Persistent Embedding Table Parameter Server)
PetHash:最大程度优化PM哈希索引,以便于更好地减少PM的访问
使用单级结构来定位只读取一个PM的存储桶。
对 ...</div></div></div><div class="recent-post-item"><div class="post_cover left_radius"><a href="/2023/03/15/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DHercules-Heterogeneity-Aware-Inference-Serving-for-At-Scale-Personalized-Recommendation/" title="「论文笔记」Hercules: Heterogeneity-Aware Inference Serving for At-Scale Personalized Recommendation"> <img class="post_bg" src="/img/21.jpg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」Hercules: Heterogeneity-Aware Inference Serving for At-Scale Personalized Recommendation"></a></div><div class="recent-post-info"><a class="article-title" href="/2023/03/15/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DHercules-Heterogeneity-Aware-Inference-Serving-for-At-Scale-Personalized-Recommendation/" title="「论文笔记」Hercules: Heterogeneity-Aware Inference Serving for At-Scale Personalized Recommendation">「论文笔记」Hercules: Heterogeneity-Aware Inference Serving for At-Scale Personalized Recommendation</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2023-03-15T01:46:43.000Z" title="发表于 2023-03-15 09:46:43">2023-03-15</time></span><span class="article-meta"><span class="article-meta__separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a></span><span class="article-meta tags"><span class="article-meta__separator">|</span><i class="fas fa-tag"></i><a class="article-meta__tags" href="/tags/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/%E6%8E%A8%E8%8D%90%E7%B3%BB%E7%BB%9F%E5%B5%8C%E5%85%A5%E6%A8%A1%E5%9E%8B%E5%8A%A0%E9%80%9F/">推荐系统嵌入模型加速</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/%E5%BC%82%E6%9E%84%E8%B5%84%E6%BA%90%E8%B0%83%E5%BA%A6%E5%99%A8/">异构资源调度器</a></span></div><div class="content">简要
现象:数据中心的深度学习推荐系统具有许多独特的工作负载特征和系统需求——模型多样性、云规模的系统异构行和时变负载模式。这些都需要特定于应用程序的解决方案来提高执行效率。
问题:
模型多样性:推荐模型的迅速发展,以支持新的用例,并实现更高的预测精度。这种不同算法结构导致了不同的性能瓶颈。最先进的推荐模型的计算和内存强度可以变化1~2个数量级。
云规模的系统异构:各种各样的系统架构可以在数据中心中共存,其原因如下:
系统升级会周期性的发生,不同微架构的服务器一代又一代地出现
特定领域的加速器越来越多地部署在数据中心,以最大化执行效率
时变负载模式:查询到达服从泊松分布,查询大小呈明显的重尾分布。动态变化的条件要求调度程序在不同级别运行,以快速适应和响应负载变化。
最优调度鞠策高度依赖于模型和硬件,并且需要一个有效的搜索机制来充分探索所有SLA(服务级协议)目标在模型并行、操作员并行和数据并行维度上的大调度空间。但现有的任务调度器设计缺乏遍历整个并行空间的能力。
方案:
Herules——heterogeneity-aware recommendation u ...</div></div></div><div class="recent-post-item"><div class="post_cover right_radius"><a href="/2023/02/18/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DSingle-shot-Embedding-Dimension-Search-in-Recommender-System/" title="「论文笔记」Single-shot Embedding Dimension Search in Recommender System"> <img class="post_bg" src="/img/20.jpg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」Single-shot Embedding Dimension Search in Recommender System"></a></div><div class="recent-post-info"><a class="article-title" href="/2023/02/18/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DSingle-shot-Embedding-Dimension-Search-in-Recommender-System/" title="「论文笔记」Single-shot Embedding Dimension Search in Recommender System">「论文笔记」Single-shot Embedding Dimension Search in Recommender System</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2023-02-18T08:13:58.000Z" title="发表于 2023-02-18 16:13:58">2023-02-18</time></span><span class="article-meta"><span class="article-meta__separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a></span><span class="article-meta tags"><span class="article-meta__separator">|</span><i class="fas fa-tag"></i><a class="article-meta__tags" href="/tags/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/%E6%8E%A8%E8%8D%90%E7%B3%BB%E7%BB%9F%E5%B5%8C%E5%85%A5%E6%A8%A1%E5%9E%8B%E5%8A%A0%E9%80%9F/">推荐系统嵌入模型加速</a></span></div><div class="content">简要
在推荐系统中大部分的方法为所有的特征固定了embedding的纬度,这可能会收到以下影响:
embedding可能包含数百亿个参数,导致高内存使用和计算成本
过度参数化低频特征可能导致过拟合,甚至产生意外噪声
此外,高频特征需要更多的参数来传递有效的信息。
提出单次嵌入维度搜索方法(SSEDS),能够通过使用单次embedding剪枝操作有效的分配每个特征域对应的维度同时保持模型的推荐准确率。
如何自动地为不同的特征分配embedding维度是很重要的问题,本文称为embedding维度搜索(EDS)问题。
这个问题需要解决两个挑战
如何识别各个特征域的embedding维度
识别embedding每个维度的重要性,然后去除不重要的相关维度,以实现自动获取混合维度embedding
标记每个特征域的embedding维度,同时保持其他的不变。然后通过评估他对损失函数的影响,计算每个维度的显著分数,以此表达维度的重要性。
如何通过一种有效的方式搜索embedding维度
通过观察显著分数,可以对所有特征域的embnedding维度进行降序排列,并根据给定的参数预算保留 ...</div></div></div><div class="recent-post-item"><div class="post_cover left_radius"><a href="/2023/02/14/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DPICASSO-Unleashing-the-Potential-of-GPU-centric-Training-for-Wide-and-deep-Recommender-Systems/" title="「论文笔记」PICASSO: Unleashing the Potential of GPU-centric Training for Wide-and-deep Recommender Systems"> <img class="post_bg" src="/img/28.jpg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」PICASSO: Unleashing the Potential of GPU-centric Training for Wide-and-deep Recommender Systems"></a></div><div class="recent-post-info"><a class="article-title" href="/2023/02/14/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DPICASSO-Unleashing-the-Potential-of-GPU-centric-Training-for-Wide-and-deep-Recommender-Systems/" title="「论文笔记」PICASSO: Unleashing the Potential of GPU-centric Training for Wide-and-deep Recommender Systems">「论文笔记」PICASSO: Unleashing the Potential of GPU-centric Training for Wide-and-deep Recommender Systems</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2023-02-14T07:20:13.000Z" title="发表于 2023-02-14 15:20:13">2023-02-14</time></span><span class="article-meta"><span class="article-meta__separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a></span><span class="article-meta tags"><span class="article-meta__separator">|</span><i class="fas fa-tag"></i><a class="article-meta__tags" href="/tags/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/%E6%8E%A8%E8%8D%90%E7%B3%BB%E7%BB%9F%E5%B5%8C%E5%85%A5%E6%A8%A1%E5%9E%8B%E5%8A%A0%E9%80%9F/">推荐系统嵌入模型加速</a></span></div><div class="content">简要
针对的问题:
训练推荐系统中的GPU设备没有得到充分的利用,他们不能像CV和NLP领域那样实现预期的吞吐量提高。这个问题主要由两个特征引起
推荐模型包含了多达1000多个输入特征域,这引入了碎片化和内存密集型的操作
多组成特征交互子模块引入了大量的小型计算核
虽然定制的硬件能够对特定的WDL(Wide-and-Deep Learning)工作模式进行优化,但是需要考虑下面两个问题
有多种WDL设计,需要不同的工作模式(比如,特征域的数量,特征交互层的子模块),新的WDL模块每月都在出现。
对于公共云使用,处于预算和弹性考虑,首选商用硬件
通过详细分析大量的WDL工作流,得到下面的影响
由于有大量的特征域,WDL模型训练过程中有碎片化的操作,这通常会导致在加载操作中有比较大的开销
embedding层由内存密集型和通信密集型的操作构成,而特征交互和MLP需要计算密集型的操作。在处理大量的嵌入参数时,计算资源将得不到充分利用,并导致像脉冲一样的GPU使用。
系统框架:PICASSO
创建细粒度embedding特征组,同一组内的操作被打包,以减少零碎操作的数量 ...</div></div></div><div class="recent-post-item"><div class="post_cover right_radius"><a href="/2023/02/10/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DAutoShard-Automated-Embedding-Table-Sharding-for-Recommender-Systems/" title="「论文笔记」AutoShard: Automated Embedding Table Sharding for Recommender Systems"> <img class="post_bg" src="/img/2.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」AutoShard: Automated Embedding Table Sharding for Recommender Systems"></a></div><div class="recent-post-info"><a class="article-title" href="/2023/02/10/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DAutoShard-Automated-Embedding-Table-Sharding-for-Recommender-Systems/" title="「论文笔记」AutoShard: Automated Embedding Table Sharding for Recommender Systems">「论文笔记」AutoShard: Automated Embedding Table Sharding for Recommender Systems</a><div class="article-meta-wrap"><span class="post-meta-date"><i class="far fa-calendar-alt"></i><span class="article-meta-label">发表于</span><time datetime="2023-02-10T07:11:51.000Z" title="发表于 2023-02-10 15:11:51">2023-02-10</time></span><span class="article-meta"><span class="article-meta__separator">|</span><i class="fas fa-inbox"></i><a class="article-meta__categories" href="/categories/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a></span><span class="article-meta tags"><span class="article-meta__separator">|</span><i class="fas fa-tag"></i><a class="article-meta__tags" href="/tags/%E5%88%86%E5%B8%83%E5%BC%8F/">分布式</a><span class="article-meta__link">•</span><a class="article-meta__tags" href="/tags/%E6%8E%A8%E8%8D%90%E7%B3%BB%E7%BB%9F%E5%B5%8C%E5%85%A5%E6%A8%A1%E5%9E%8B%E5%8A%A0%E9%80%9F/">推荐系统嵌入模型加速</a></span></div><div class="content">简要
problem:大规模的embedding表参数会影响到推荐系统embedding模型的训练过程,这些大规模的embedding表如果使用不平衡的分布式存储方式,会严重影响模型的执行效率。这就涉及到一个挑战:embedding表分片问题(embedding table sharding)
如上图所示,蓝色的框表示一个embedding表,然后框内的数字表示执行操作执行时间的成本。紫色的框表示一个分片(sharding)旁边的数字表示这个分片的执行时间成本。
如果在分片过程中没有考虑到分片的平衡(图左边),就可能会导致GPU之间的不平衡,其中所有的GPU必须强制等待最慢的GPU(图中的bottleneck)。相反,如果分片是平衡的(图右边),就可以通过减小等待时间来显著加速embedding的操作。
因为sharding内部是并行执行的,所以sharding内部的执行时间是显著小于所有embedding表的执行时间之和的,比如 5<1+2+35<1+2+35<1+2+3,10<4+5+610<4+5+610<4+5+6
在这个问题中有两个重 ...</div></div></div><nav id="pagination"><div class="pagination"><span class="page-number current">1</span><a class="page-number" href="/page/2/#content-inner">2</a><span class="space">…</span><a class="page-number" href="/page/9/#content-inner">9</a><a class="extend next" rel="next" href="/page/2/#content-inner"><i class="fas fa-chevron-right fa-fw"></i></a></div></nav></div><div class="aside-content" id="aside-content"><div class="card-widget card-info"><div class="card-info-avatar is-center"><img class="avatar-img" src="/assets/apple-touch-icon.png" onerror="this.onerror=null;this.src='/img/friend_404.gif'" alt="avatar"/><div class="author-info__name">zerorains</div><div class="author-info__description">No matter what happens, I will do my best.</div></div><div class="card-info-data"><div class="card-info-data-item is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">90</div></a></div><div class="card-info-data-item is-center"><a href="/tags/"><div class="headline">标签</div><div class="length-num">101</div></a></div><div class="card-info-data-item is-center"><a href="/categories/"><div class="headline">分类</div><div class="length-num">13</div></a></div></div><a class="button--animated" id="card-info-btn" target="_blank" rel="noopener" href="https://github.com/zeroRains"><i class="fab fa-github"></i><span>Follow Me</span></a><div class="card-info-social-icons is-center"><a class="social-icon" href="tencent://message?uin=2274033547" target="_blank" title="qq"><i class="fab fa-qq"></i></a><a class="social-icon" href="https://github.com/zeroRains" target="_blank" title="Github"><i class="fab fa-github"></i></a><a class="social-icon" href="mailto:[email protected]" target="_blank" title="Email"><i class="fas fa-envelope"></i></a><a class="social-icon" href="https://blog.csdn.net/kiminoamae?spm=1000.2115.3001.5343" target="_blank" title="csdn"><i class="fab fa-cuttlefish"></i></a></div></div><div class="card-widget card-announcement"><div class="item-headline"><i class="fas fa-bullhorn card-announcement-animation"></i><span>公告</span></div><div class="announcement_content">未来主要研究方向。。。算了还是先学好基础。</div></div><div class="sticky_layout"><div class="card-widget card-recent-post"><div class="item-headline"><i class="fas fa-history"></i><span>最新文章</span></div><div class="aside-list"><div class="aside-list-item"><a class="thumbnail" href="/2023/04/26/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DPiPAD-Pipelined-and-Parallel-Dynamic-GNN-Training-on-GPUs/" title="「论文笔记」PiPAD: Pipelined and Parallel Dynamic GNN Training on GPUs"><img src="/img/16.jpg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」PiPAD: Pipelined and Parallel Dynamic GNN Training on GPUs"/></a><div class="content"><a class="title" href="/2023/04/26/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DPiPAD-Pipelined-and-Parallel-Dynamic-GNN-Training-on-GPUs/" title="「论文笔记」PiPAD: Pipelined and Parallel Dynamic GNN Training on GPUs">「论文笔记」PiPAD: Pipelined and Parallel Dynamic GNN Training on GPUs</a><time datetime="2023-04-26T05:34:14.000Z" title="发表于 2023-04-26 13:34:14">2023-04-26</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/2023/04/04/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DEkko-A-Large-Scale-Deep-Learning-Recommender-System-with-Low-Latency-Model-Update/" title="「论文笔记」Ekko: A Large-Scale Deep Learning Recommender System with Low-Latency Model Update"><img src="/img/23.jpg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」Ekko: A Large-Scale Deep Learning Recommender System with Low-Latency Model Update"/></a><div class="content"><a class="title" href="/2023/04/04/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DEkko-A-Large-Scale-Deep-Learning-Recommender-System-with-Low-Latency-Model-Update/" title="「论文笔记」Ekko: A Large-Scale Deep Learning Recommender System with Low-Latency Model Update">「论文笔记」Ekko: A Large-Scale Deep Learning Recommender System with Low-Latency Model Update</a><time datetime="2023-04-04T03:24:12.000Z" title="发表于 2023-04-04 11:24:12">2023-04-04</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/2023/03/27/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DDeepRecSys-A-System-for-Optimizing-End-To-End-At-Scale-Neural-Recommendation-Inference/" title="「论文笔记」DeepRecSys: A System for Optimizing End-To-End At-Scale Neural Recommendation Inference"><img src="/img/4.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」DeepRecSys: A System for Optimizing End-To-End At-Scale Neural Recommendation Inference"/></a><div class="content"><a class="title" href="/2023/03/27/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DDeepRecSys-A-System-for-Optimizing-End-To-End-At-Scale-Neural-Recommendation-Inference/" title="「论文笔记」DeepRecSys: A System for Optimizing End-To-End At-Scale Neural Recommendation Inference">「论文笔记」DeepRecSys: A System for Optimizing End-To-End At-Scale Neural Recommendation Inference</a><time datetime="2023-03-27T02:30:28.000Z" title="发表于 2023-03-27 10:30:28">2023-03-27</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/2023/03/25/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DFleche-An-Efficient-GPU-Embedding-Cache-for-Personalized-Recommendations/" title="「论文笔记」Fleche: An Efficient GPU Embedding Cache for Personalized Recommendations"><img src="/img/27.jpg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」Fleche: An Efficient GPU Embedding Cache for Personalized Recommendations"/></a><div class="content"><a class="title" href="/2023/03/25/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DFleche-An-Efficient-GPU-Embedding-Cache-for-Personalized-Recommendations/" title="「论文笔记」Fleche: An Efficient GPU Embedding Cache for Personalized Recommendations">「论文笔记」Fleche: An Efficient GPU Embedding Cache for Personalized Recommendations</a><time datetime="2023-03-25T04:31:59.000Z" title="发表于 2023-03-25 12:31:59">2023-03-25</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/2023/03/21/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DJiZhi-A-Fast-and-Cost-Eective-Model-As-A-Service-System-for-Web-Scale-Online-Inference-at-Baidu/" title="「论文笔记」JiZhi: A Fast and Cost-Eective Model-As-A-Service System for Web-Scale Online Inference at Baidu"><img src="/img/26.jpg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="「论文笔记」JiZhi: A Fast and Cost-Eective Model-As-A-Service System for Web-Scale Online Inference at Baidu"/></a><div class="content"><a class="title" href="/2023/03/21/%E3%80%8C%E8%AE%BA%E6%96%87%E7%AC%94%E8%AE%B0%E3%80%8DJiZhi-A-Fast-and-Cost-Eective-Model-As-A-Service-System-for-Web-Scale-Online-Inference-at-Baidu/" title="「论文笔记」JiZhi: A Fast and Cost-Eective Model-As-A-Service System for Web-Scale Online Inference at Baidu">「论文笔记」JiZhi: A Fast and Cost-Eective Model-As-A-Service System for Web-Scale Online Inference at Baidu</a><time datetime="2023-03-21T05:11:19.000Z" title="发表于 2023-03-21 13:11:19">2023-03-21</time></div></div></div></div><div class="card-widget card-categories"><div class="item-headline">
<i class="fas fa-folder-open"></i>
<span>分类</span>
<a class="card-more-btn" href="/categories/" title="查看更多">
<i class="fas fa-angle-right"></i></a>
</div>
<ul class="card-category-list" id="aside-cat-list">
<li class="card-category-list-item "><a class="card-category-list-link" href="/categories/GPU%E5%8A%A0%E9%80%9F/"><span class="card-category-list-name">GPU加速</span><span class="card-category-list-count">1</span></a></li><li class="card-category-list-item "><a class="card-category-list-link" href="/categories/%E5%88%86%E5%B8%83%E5%BC%8F/"><span class="card-category-list-name">分布式</span><span class="card-category-list-count">12</span></a></li><li class="card-category-list-item "><a class="card-category-list-link" href="/categories/%E5%A4%A7%E6%95%B0%E6%8D%AE/"><span class="card-category-list-name">大数据</span><span class="card-category-list-count">1</span></a></li><li class="card-category-list-item "><a class="card-category-list-link" href="/categories/%E6%93%8D%E4%BD%9C%E7%B3%BB%E7%BB%9F/"><span class="card-category-list-name">操作系统</span><span class="card-category-list-count">1</span></a></li><li class="card-category-list-item "><a class="card-category-list-link" href="/categories/%E6%95%B0%E5%AD%A6%E5%BB%BA%E6%A8%A1/"><span class="card-category-list-name">数学建模</span><span class="card-category-list-count">1</span></a></li><li class="card-category-list-item "><a class="card-category-list-link" href="/categories/%E6%95%B0%E6%8D%AE%E5%BA%93/"><span class="card-category-list-name">数据库</span><span class="card-category-list-count">7</span></a></li><li class="card-category-list-item "><a class="card-category-list-link" href="/categories/%E6%95%B0%E6%8D%AE%E5%BA%93%E7%B3%BB%E7%BB%9F/"><span class="card-category-list-name">数据库系统</span><span class="card-category-list-count">2</span></a></li><li class="card-category-list-item "><a class="card-category-list-link" href="/categories/%E6%95%B0%E6%8D%AE%E7%BB%93%E6%9E%84/"><span class="card-category-list-name">数据结构</span><span class="card-category-list-count">1</span></a></li>
</ul></div><div class="card-widget card-tags"><div class="item-headline"><i class="fas fa-tags"></i><span>标签</span></div><div class="card-tag-cloud"><a href="/tags/C/" style="font-size: 1.26em; color: #999fa8">C++</a> <a href="/tags/DGNN%E8%AE%AD%E7%BB%83%E5%8A%A0%E9%80%9F/" style="font-size: 1.1em; color: #999">DGNN训练加速</a> <a href="/tags/ER%E5%9B%BE/" style="font-size: 1.1em; color: #999">ER图</a> <a href="/tags/ER%E5%9B%BE%E8%BD%AC%E5%85%B3%E7%B3%BB%E6%A8%A1%E5%BC%8F/" style="font-size: 1.1em; color: #999">ER图转关系模式</a> <a href="/tags/GPU%E7%BC%93%E5%AD%98%E6%9C%BA%E5%88%B6/" style="font-size: 1.1em; color: #999">GPU缓存机制</a> <a href="/tags/Nonlocal/" style="font-size: 1.1em; color: #999">Nonlocal</a> <a href="/tags/PAT/" style="font-size: 1.1em; color: #999">PAT</a> <a href="/tags/SOTA/" style="font-size: 1.18em; color: #999ca1">SOTA</a> <a href="/tags/STL/" style="font-size: 1.1em; color: #999">STL</a> <a href="/tags/bfs/" style="font-size: 1.1em; color: #999">bfs</a> <a href="/tags/c/" style="font-size: 1.42em; color: #99a6b7">c++</a> <a href="/tags/dfs/" style="font-size: 1.18em; color: #999ca1">dfs</a> <a href="/tags/%E4%B8%8A%E4%B8%8B%E6%96%87/" style="font-size: 1.1em; color: #999">上下文</a> <a href="/tags/%E4%B8%8D%E5%AE%8C%E6%95%B4%E7%9A%84%E5%A4%9A%E8%A7%86%E5%9B%BE%E8%81%9A%E7%B1%BB/" style="font-size: 1.1em; color: #999">不完整的多视图聚类</a> <a href="/tags/%E4%B9%A0%E9%A2%98/" style="font-size: 1.1em; color: #999">习题</a> <a href="/tags/%E4%BA%8C%E5%88%86%E6%B3%95/" style="font-size: 1.1em; color: #999">二分法</a> <a href="/tags/%E4%BA%8C%E8%BF%9B%E5%88%B6/" style="font-size: 1.1em; color: #999">二进制</a> <a href="/tags/%E4%BA%A4%E6%9B%BF%E8%AE%AD%E7%BB%83%E6%9C%BA%E5%88%B6/" style="font-size: 1.1em; color: #999">交替训练机制</a> <a href="/tags/%E4%BA%A7%E4%B8%9A%E8%90%BD%E5%9C%B0/" style="font-size: 1.1em; color: #999">产业落地</a> <a href="/tags/%E4%BA%BA%E8%84%B8%E8%AF%86%E5%88%AB/" style="font-size: 1.1em; color: #999">人脸识别</a> <a href="/tags/%E4%BD%8D%E8%BF%90%E7%AE%97/" style="font-size: 1.1em; color: #999">位运算</a> <a href="/tags/%E5%85%A8%E5%B1%80%E4%B8%8E%E5%B1%80%E9%83%A8/" style="font-size: 1.1em; color: #999">全局与局部</a> <a href="/tags/%E5%85%B3%E7%B3%BB%E4%BB%A3%E6%95%B0/" style="font-size: 1.1em; color: #999">关系代数</a> <a href="/tags/%E5%88%86%E5%89%B2%E7%BB%BC%E8%BF%B0/" style="font-size: 1.18em; color: #999ca1">分割综述</a> <a href="/tags/%E5%88%86%E5%B8%83%E5%BC%8F/" style="font-size: 1.5em; color: #99a9bf">分布式</a> <a href="/tags/%E5%88%86%E7%A6%BB%E8%A7%86%E8%A7%89%E7%89%B9%E5%BE%81/" style="font-size: 1.1em; color: #999">分离视觉特征</a> <a href="/tags/%E5%88%B7%E9%A2%98/" style="font-size: 1.1em; color: #999">刷题</a> <a href="/tags/%E5%89%8D%E7%BC%80%E5%92%8C/" style="font-size: 1.1em; color: #999">前缀和</a> <a href="/tags/%E5%8A%A8%E6%80%81%E8%A7%84%E5%88%92/" style="font-size: 1.1em; color: #999">动态规划</a> <a href="/tags/%E5%8D%8A%E7%9B%91%E7%9D%A3/" style="font-size: 1.1em; color: #999">半监督</a> <a href="/tags/%E5%9B%BE%E5%83%8F%E6%94%BB%E5%87%BB/" style="font-size: 1.1em; color: #999">图像攻击</a> <a href="/tags/%E5%9B%BE%E5%8E%BB%E5%99%AA/" style="font-size: 1.1em; color: #999">图去噪</a> <a href="/tags/%E5%9B%BE%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0/" style="font-size: 1.1em; color: #999">图强化学习</a> <a href="/tags/%E5%9B%BE%E6%B3%A8%E6%84%8F%E5%8A%9B/" style="font-size: 1.1em; color: #999">图注意力</a> <a href="/tags/%E5%9B%BE%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/" style="font-size: 1.42em; color: #99a6b7">图神经网络</a> <a href="/tags/%E5%9B%BE%E8%AE%BA/" style="font-size: 1.1em; color: #999">图论</a> <a href="/tags/%E5%9F%BA%E6%9C%AC%E6%A6%82%E5%BF%B5/" style="font-size: 1.18em; color: #999ca1">基本概念</a> <a href="/tags/%E5%A4%96%E9%83%A8%E6%A0%B7%E6%9C%AC%E9%97%AE%E9%A2%98/" style="font-size: 1.18em; color: #999ca1">外部样本问题</a> <a href="/tags/%E5%A4%9A%E8%A7%86%E5%9B%BE%E8%81%9A%E7%B1%BB/" style="font-size: 1.34em; color: #99a3b0">多视图聚类</a> <a href="/tags/%E5%A4%A7%E6%95%B0%E6%8D%AE/" style="font-size: 1.1em; color: #999">大数据</a></div></div><div class="card-widget card-archives"><div class="item-headline"><i class="fas fa-archive"></i><span>归档</span><a class="card-more-btn" href="/archives/" title="查看更多">
<i class="fas fa-angle-right"></i></a></div><ul class="card-archive-list"><li class="card-archive-list-item"><a class="card-archive-list-link" href="/archives/2023/04/"><span class="card-archive-list-date">四月 2023</span><span class="card-archive-list-count">2</span></a></li><li class="card-archive-list-item"><a class="card-archive-list-link" href="/archives/2023/03/"><span class="card-archive-list-date">三月 2023</span><span class="card-archive-list-count">5</span></a></li><li class="card-archive-list-item"><a class="card-archive-list-link" href="/archives/2023/02/"><span class="card-archive-list-date">二月 2023</span><span class="card-archive-list-count">3</span></a></li><li class="card-archive-list-item"><a class="card-archive-list-link" href="/archives/2023/01/"><span class="card-archive-list-date">一月 2023</span><span class="card-archive-list-count">1</span></a></li><li class="card-archive-list-item"><a class="card-archive-list-link" href="/archives/2022/10/"><span class="card-archive-list-date">十月 2022</span><span class="card-archive-list-count">5</span></a></li><li class="card-archive-list-item"><a class="card-archive-list-link" href="/archives/2022/09/"><span class="card-archive-list-date">九月 2022</span><span class="card-archive-list-count">6</span></a></li><li class="card-archive-list-item"><a class="card-archive-list-link" href="/archives/2022/08/"><span class="card-archive-list-date">八月 2022</span><span class="card-archive-list-count">8</span></a></li><li class="card-archive-list-item"><a class="card-archive-list-link" href="/archives/2022/07/"><span class="card-archive-list-date">七月 2022</span><span class="card-archive-list-count">4</span></a></li></ul></div><div class="card-widget card-webinfo"><div class="item-headline"><i class="fas fa-chart-line"></i><span>网站资讯</span></div><div class="webinfo"><div class="webinfo-item"><div class="item-name">文章数目 :</div><div class="item-count">90</div></div><div class="webinfo-item"><div class="item-name">已运行时间 :</div><div class="item-count" id="runtimeshow" data-publishDate="2021-04-04T16:00:00.000Z"></div></div><div class="webinfo-item"><div class="item-name">本站访客数 :</div><div class="item-count" id="busuanzi_value_site_uv"></div></div><div class="webinfo-item"><div class="item-name">本站总访问量 :</div><div class="item-count" id="busuanzi_value_site_pv"></div></div><div class="webinfo-item"><div class="item-name">最后更新时间 :</div><div class="item-count" id="last-push-date" data-lastPushDate="2023-05-01T07:47:16.556Z"></div></div></div></div></div></div></main><footer id="footer"><div id="footer-wrap"><div class="copyright">©2020 - 2023 By zerorains</div><div class="framework-info"><span>框架 </span><a target="_blank" rel="noopener" href="https://hexo.io">Hexo</a><span class="footer-separator">|</span><span>主题 </span><a target="_blank" rel="noopener" href="https://github.com/jerryc127/hexo-theme-butterfly">Butterfly</a></div></div></footer></div><div id="rightside"><div id="rightside-config-hide"><button id="darkmode" type="button" title="浅色和深色模式转换"><i class="fas fa-adjust"></i></button><button id="hide-aside-btn" type="button" title="单栏和双栏切换"><i class="fas fa-arrows-alt-h"></i></button></div><div id="rightside-config-show"><button id="rightside_config" type="button" title="设置"><i class="fas fa-cog fa-spin"></i></button><button id="go-up" type="button" title="回到顶部"><i class="fas fa-arrow-up"></i></button></div></div><div id="local-search"><div class="search-dialog"><div class="search-dialog__title" id="local-search-title">本地搜索</div><div id="local-input-panel"><div id="local-search-input"><div class="local-search-box"><input class="local-search-box--input" placeholder="搜索文章" type="text"/></div></div></div><hr/><div id="local-search-results"></div><span class="search-close-button"><i class="fas fa-times"></i></span></div><div id="search-mask"></div></div><div><script src="/js/utils.js"></script><script src="/js/main.js"></script><script src="/js/search/local-search.js"></script><div class="js-pjax"><script>function subtitleType () {
if (true) {
var typed = new Typed("#subtitle", {
strings: "No matter what happens, I will do my best.".split(","),
startDelay: 300,
typeSpeed: 150,
loop: true,
backSpeed: 50
})
} else {
document.getElementById("subtitle").innerHTML = 'No matter what happens, I will do my best.'
}
}
if (true) {
if (typeof Typed === 'function') {
subtitleType()
} else {
getScript('https://cdn.jsdelivr.net/npm/typed.js/lib/typed.min.js').then(subtitleType)
}
} else {
subtitleType()
}</script></div><script src="https://cdn.jsdelivr.net/npm/butterfly-extsrc@1/dist/activate-power-mode.min.js"></script><script>POWERMODE.colorful = true;
POWERMODE.shake = true;
POWERMODE.mobile = false;
document.body.addEventListener('input', POWERMODE);
</script><script id="click-heart" src="https://cdn.jsdelivr.net/npm/butterfly-extsrc@1/dist/click-heart.min.js" async="async" mobile="false"></script><script async data-pjax src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script></div></body></html>