This repository has been archived by the owner on Mar 31, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
DataSourceManagementGuide.html
605 lines (465 loc) · 34.3 KB
/
DataSourceManagementGuide.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<!-- Always force latest IE rendering engine or request Chrome Frame -->
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
<!-- Use title if it's in the page YAML frontmatter -->
<title>Data Source Management Guide | Pivotal CF Docs</title>
<link href="../stylesheets/master.css" media="screen,print" rel="stylesheet" type="text/css" />
<link href="../stylesheets/breadcrumbs.css" media="screen,print" rel="stylesheet" type="text/css" />
<link href="../stylesheets/search.css" media="screen,print" rel="stylesheet" type="text/css" />
<link href="../stylesheets/portal-style.css" media="screen,print" rel="stylesheet" type="text/css" />
<link href="../stylesheets/printable.css" media="print" rel="stylesheet" type="text/css" />
<script src="../javascripts/all.js" type="text/javascript"></script>
<link href='/images/favicon.ico' rel='shortcut icon'>
</head>
<body class="pivotalax pivotalax_DataSourceManagementGuide">
<div class='wrap'>
<script src="//use.typekit.net/clb0qji.js" type="text/javascript"></script>
<script type="text/javascript">
try {
Typekit.load();
} catch (e) {
}
</script>
<script type="text/javascript">
document.domain = "gopivotal.com";
</script>
<header class="navbar desktop-only" id="nav">
<div class="navbar-inner">
<div class="container-fluid">
<div class="pivotal-logo--container">
<a class="pivotal-logo" href="http://gopivotal.com"><span></span></a>
</div>
<ul class="nav pull-right">
<li class="navbar-link">
<a href="http://www.gopivotal.com/paas" id="paas-nav-link">PaaS</a>
</li>
<li class="navbar-link">
<a href="http://www.gopivotal.com/big-data" id="big-data-nav-link">BIG DATA</a>
</li>
<li class="navbar-link">
<a href="http://www.gopivotal.com/agile" id="agile-nav-link">AGILE</a>
</li>
<li class="navbar-link">
<a href="http://www.gopivotal.com/oss" id="oss-nav-link">OSS</a>
</li>
<li class="nav-search">
<a href="http://www.gopivotal.com/search-pivotal"><span></span></a>
</li>
</ul>
</div>
<a href="http://www.gopivotal.com/contact">
<img id="get-started" src="http://www.gopivotal.com/sites/all/themes/gopo13/images/get-started.png">
</a>
</div>
</header>
<div class="main-wrap">
<div class="container-fluid">
<div class="container">
<div id="sub-nav" class="nav-container desktop-only">
<ul class="menu">
<li class="first expanded js-maintopic"><a id='home-nav-link' href="/pivotalax/index.html">Home</a></li>
<li class="first expanded js-maintopic"><a href="/pivotalax/PivotalAXVersion1_0.html">Overview of Pivotal Analytics</a></li>
<li class="first expanded js-maintopic"><a href="/pivotalax/InstallationGuide.html">Installation Guide</a></li>
<li class="first expanded js-maintopic"><a href="/pivotalax/SystemRequirementsandSizingGuidelines.html">System Requirements and Sizing</a></li>
<li class="first expanded js-maintopic"><a href="/pivotalax/AdministrationGuide.html">Administration Guide</a></li>
<li class="first expanded js-maintopic"><a href="/pivotalax/DataSourceManagementGuide.html">Data Source Management Guide</a></li>
<li class="first expanded js-maintopic"><a href="/pivotalax/DataVisualizationGuide.html">Data Visualization Guide</a></li>
<li class="first expanded js-maintopic"><a href="/pivotalax/DataIngestionAPIs.html">Data Ingestion APIs</a></li>
</ul>
</div><!--end of sub-nav-->
<div class="body-container content">
<!-- Google CSE Search Box -->
<script>
(function() {
var cx = '005804290371689664731:n1ix15a_ysy';
var gcse = document.createElement('script');
gcse.type = 'text/javascript';
gcse.async = true;
gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
'//www.google.com/cse/cse.js?cx=' + cx;
var s = document.getElementsByTagName('script')[0];
s.parentNode.insertBefore(gcse, s);
})();
</script>
<gcse:search></gcse:search>
<ul class="breadcrumbs"><li><a href="./">Pivotal AX Version 1.0 Documentation</a></li> <li class="active"><span>Data Source Management Guide</span></li></ul>
<h3 class="title-container">Data Source Management Guide</h3>
<ul>
<li><p><a href="#about-data-sources">About Data Sources</a></p>
<ul>
<li> <a href="#data-sources-page">Data Sources Page</a></li>
<li> <a href="#data-upload-formats">Data Upload Formats</a>
<ul>
<li> <a href="#single-line">Treating file contents as single-line events</a></li>
<li> <a href="#compressed">About Compressed Files</a></li>
</ul></li>
<li> <a href="#extraction-data-quality">Extraction and Data Quality Rules</a></li>
<li> <a href="#update-rules">Update Rules</a></li>
<li> <a href="#scheduling-options">Scheduling Options</a></li>
</ul></li>
<li><p><a href="#adding-data-sources">Adding Data Sources</a></p>
<ul>
<li> <a href="#upload-files">Upload Files</a></li>
<li> <a href="#live-feeds">Live Feeds</a></li>
<li> <a href="#amazon">Amazon S3</a></li>
<li> <a href="#hdfs">HDFS</a></li>
<li> <a href="#ftp-sftp">FTP/SFTP</a></li>
<li> <a href="#gemfire">GemFire</a></li>
<li> <a href="#application-feeds">Application Feeds </a></li>
</ul></li>
<li><p><a href="#resetting-all-data-sources">Resetting All Data Sources</a></p></li>
<li><p><a href="#data-ingestion-apis">Data Ingestion APIs</a></p></li>
</ul>
<h1><a id='about-data-sources'></a>About Data Sources</h1>
<p>Pivotal AX <em>Data Sources</em> define the set of data you want to analyze. You define one or more data sources for each Pivotal AX project. (For more information about Pivotal AX projects, see <a href="AdministrationGuide.html#managing-projects">Managing Projects</a>.) Data sources can consist of static files from a variety of sources and can also include live data feeds from applications, log data, and other messages. The procedures for loading these various types of data are discussed in this section. Although you can likely work directly with existing data sources, your business requirements may require that the data be manipulated by filtering, normalizing or eliminating unneeded data dimensions before being submitted to the Pivotal AX system for analysis.</p>
<p>When you submit a data source for processing, Pivotal AX analyzes and indexes the data. Once this process is complete, you can begin to use Pivotal AX to visualize and analyze your data.</p>
<h2><a id='data-sources-page'></a>Data Sources Page</h2>
<p>The <strong>Data Sources</strong> page is where you add and manage the data sources for your Pivotal AX project.</p>
<p>To view the <strong>Data Sources</strong> page:</p>
<ol>
<li><p>Log in to Pivotal AX.</p>
<p>See <a href="LoggingintoPivotalAX.html">Logging in to Pivotal AX</a>.</p></li>
<li><p>Select <strong>Data Sources</strong> from the left menu.</p>
<p><img src="images/select_data_sources.png" /></p>
<p>The <strong>Data Sources</strong> page displays an overview of the data sources configured for the current project. You can see at a glance the number of events processed in the last day and detailed information on each data source you have configured. Click the <strong>Application Feeds</strong> tab to see information about your application feeds. </p>
<p><img src="images/data_sources_page.png" /></p></li>
</ol>
<h2><a id='data-upload-formats'></a>Data Upload Formats</h2>
<p>Depending on which type of data source you are processing, you can use one of the following data formats to add data sources:</p>
<table border="1"><tbody><tr><th width="20%" class="confluenceTh">Data Format</th><th class="confluenceTh">Description</th></tr><tr><td >Autodetect</td><td >Automatically discovers the format of your data among all currently supported formats, or combinations of formats (including many not explicitly shown in the list).</td></tr><tr><td >Apache CLF</td><td >Select this format for data compliant with the Apache Common Log Format, or the Apache Combined Log Format.</td></tr><tr><td colspan="1" ><p>Avro</p></td><td colspan="1" >Select this format for data generated by the Avro framework. Avro is a serialized data communication format used by Hadoop.</td></tr><tr><td >CSV <span class="confluence-anchor-link" id="DataSourceManagementGuide-csv"></span></td><td ><p>Select this format for data formatted as Comma Separated Values (CSV) or, more generally, Delimiter Separated Values, where the “delimiter” can be any character frequently used as a separator (e.g., a comma or tab). When you select CSV, the following additional options display:</p><ul><li><strong>Separator</strong>—select <strong>Autodetect</strong>, or you can select one of several options for field separators</li><li><strong>Quotes</strong>—select the type of quotes used to identify strings in your data or select <strong>none</strong>.</li><li><strong>Headers</strong>—You can use headers to define the field names in your data. Select the way headers are defined in your data from the following options: <ul><li><strong>First row of file is header</strong>—select this option if your CSV file includes filed headers on the first line</li><li><strong>System assigned header</strong>—select this option to allow Pivotal AX to automatically name the fields</li><li><strong>User defined</strong>—the system prompts you for field names</li><li><strong>External header file</strong>—the system prompts you for a CSV-formatted header file that defines the field name.</li></ul></li></ul><p>See <a href="#single-line">Treating file contents as single-line events</a>.</p></td></tr><tr><td colspan="1" >Excel</td><td colspan="1" ><p>Select this format to upload files in Microsoft Excel format. The files should be basic spreadsheets that do not contain graphics and have a well-formed row and column structure that can be exported to the CSV format. The first row of the worksheet should contain field names for the columns.</p><p>You can upload an Excel workbook file that contains multiple spreadsheets. Each spreadsheet is processed as a separate file.</p></td></tr><tr><td colspan="1" >OpenTSDB</td><td colspan="1" >Select this format for time-series data that uses the OpenTSDB format of Hadoop HBase.</td></tr><tr><td >Syslog</td><td >Select this format for data compliant with the Syslog format, as specified in RFC 3164 or RFC 5424.</td></tr><tr><td >Log4j</td><td ><p>Select this format for data compliant with the Apache Log4j format (using any of the many predefined patterns supported).</p></td></tr><tr><td >JSON</td><td ><p>Select this format for data compliant with the JavaScript Object Notation (JSON) format.</p><p>See <a href="#single-line">Treating file contents as single-line events</a>.</p></td></tr><tr><td colspan="1" >TEXT</td><td colspan="1" ><p>Select this format for unstructured textual data.</p><p>See <a href="#single-line">Treating file contents as single-line events</a> .</p></td></tr><tr><td colspan="1" >vShield</td><td colspan="1" >Select this format to upload log files from VMware vShield servers.</td></tr><tr><td colspan="1" >XML</td><td colspan="1" >Select this format for XML-formatted data.</td></tr></tbody></table>
<p>The data processing applied to your data source is determined by the data format you select. If you specify a data format for a file that is not compliant with that format, the data is still processed, but with much less accuracy, depending on your actual selection.</p>
<h3><a id='single-line'></a>Treating file contents as single-line events</h3>
<p>For the CSV, JSON, and TEXT data sources, de-select the <strong>Treat file contents as single-line events</strong> if your data spans multiple lines. </p>
<p>When checked, input files processed using MapReduce inside Hadoop are initially split into multiple sub-files and each sub-file is processed independently for parallelization. The splits occur using a newline as the event/record boundary. Although this is correct in most formats such as CSV and single-line JSON, this will not be correct in cases where events/records span across multiple lines.</p>
<h3><a id='compressed'></a>About Compressed Files</h3>
<p>You can upload your files as uncompressed plain text, or as compressed data in any of the following formats (or their common combinations):</p>
<ul>
<li> <code>BZIP2</code> (<code>bz2</code>)</li>
<li> <code>GZIP</code> (<code>gz</code>)</li>
<li> <code>JAR</code></li>
<li> <code>LZO</code></li>
<li> <code>ZIP</code></li>
<li> <code>TAR</code></li>
<li> GZipped <code>TAR</code> (<code>.tar.gz</code>)</li>
</ul>
<p>Note the following regarding compression:</p>
<ul>
<li> To upload compressed archives containing files that have different formats (any of the supported formats, as listed above), use the <strong>Autodetect</strong> option.</li>
<li> If you select a data format (any available option other than <strong>Autodetect</strong>), all the files in the compressed archive are processed as compliant with that format.</li>
<li> The compression format of each file is automatically detected according to the file’s contents, therefore you can use any file name or extension.</li>
</ul>
<h2><a id='extraction-data-quality'></a>Extraction and Data Quality Rules</h2>
<p>When prompted, choose one of the following <strong>Extraction and Data Quality Rules</strong>:</p>
<ul>
<li> <strong>Retain raw event</strong>—In addition to normal processing, raw events are also retained “as is” (This is only useful if you plan to do searches on the original events).</li>
<li> <strong>Enable Data Quality Control</strong>—You can include or exclude events using a regular expression. When you select this option, two text boxes display where you can enter text to either drop or accept events that match the expression. Select the radio button to <strong>Accept</strong> or <strong>Drop</strong> events. </li>
<li> <strong>Field Extraction Rules</strong>—You can extract data from a data set into a new file using a regular expression. Enter values for the name of the field and the expression and click <strong>Add Rule</strong>. You can add multiple rules.</li>
</ul>
<h2><a id='update-rules'></a>Update Rules</h2>
<p>When prompted, select one of the following <strong>Update Rules</strong>:</p>
<ul>
<li> <strong>Regular</strong>—New data is added to the end of the existing data and does not update or replace previously loaded data. If the data source name exactly matches the name of a previously loaded data source, all data is replaced with the new data. </li>
<li> <strong>Update</strong>—If the keys match, the old data is updated with the new data in the upload. A text box displays where you can define one or more key fields. Click <strong>Add another key</strong> to enter a additional keys. Click the minus sign to remove a key. </li>
</ul>
<h2><a id='scheduling-options'></a>Scheduling Options</h2>
<p>The following options are available when scheduling data sources:</p>
<ul>
<li> <strong>Run-immediately</strong>—Upload the file now, and do not repeat the upload.</li>
<li> <strong>Hourly</strong>—Upload the file every hour or at an interval of hours that you specify. A set of fields display to schedule when and how often the upload occurs. You can also specify a start and end date. </li>
<li> <strong>Daily</strong>—Upload the file every day or at an interval of days that you specify. A set of fields display to schedule when and how often the upload occurs. You can also specify a start and end date. </li>
<li> <strong>Weekly</strong>—Upload the file every week or specify an interval of weeks. You can also specify the day of the week when the upload occurs. A set of fields display to schedule when and how often the upload occurs. You can also specify a start and end date. </li>
<li> <strong>Monthly</strong>—Upload the file every month or at an interval of months that you specify. A set of fields display to schedule when and how often the upload occurs. You can also specify a start and end date. </li>
</ul>
<h1><a id='adding-data-sources'></a>Adding Data Sources</h1>
<p>To add a data source:</p>
<ol>
<li><p>Log in to Pivotal AX and click <strong>Data Sources</strong> in the left menu.</p>
<p>The <strong>Add Data Sources</strong> menu displays:</p>
<p><img src="images/add_data_source_menu.png" /></p></li>
<li><p>Follow the procedures below to add any of the following data sources:</p>
<ul>
<li> <a href="#upload-files">Upload Files</a></li>
<li> <a href="#live-feeds">Live Feeds</a></li>
<li> <a href="#amazon">Amazon S3</a></li>
<li> <a href="#hdfs">HDFS</a></li>
<li> <a href="#ftp-sftp">FTP/SFTP</a></li>
<li> <a href="#gemfire">GemFire</a></li>
<li> <a href="#application-feeds">Application Feed</a></li>
</ul></li>
</ol>
<h2><a id='upload-files'></a>Upload Files</h2>
<p>The <strong>Upload Files</strong> data source allows you to directly upload files from your local computer. </p>
<p>To upload files:</p>
<ol>
<li> Log in to Pivotal AX.</li>
<li> Select <strong>Data Sources</strong> from the left menu.</li>
<li><p>Click <strong>Upload File</strong> from the <strong>Add Data Sources</strong> menu.</p>
<p>The <strong>Add Source</strong> tab displays.</p></li>
<li><p>Drag and drop one or more files into the grey box or click <strong>Choose Files</strong> to open a file browser.</p>
<p>The file uploads to Pivotal AX and displays a progress meter. When the upload is complete you will see a green checkmark icon and the file uploaded message.</p></li>
<li><p>After the files are uploaded, click **Next.</p>
<p><strong>The **Data Format and Type</strong> options display.<strong> </strong>You can click the <strong>Preview</strong> button to view the raw data. </p></li>
<li><p>Select a data format and type, or select <strong>Autodetect</strong>.<strong> </strong>You can choose any of the data formats listed in the <a href="#data-upload-formats">Data Upload Formats</a> table. </p></li>
<li><p>Click <strong>Next.</strong></p></li>
<li><p>(Optional) Select an<strong>Extraction and Data Quality Rules</strong> option.</p>
<p>See <a href="#extraction-data-quality">Extraction and Data Quality Rules</a>.</p></li>
<li><p>Click <strong>Next</strong>.</p></li>
<li><p>Select an <strong>Update Rule</strong>.</p>
<p>See <a href="#update-rules">Update Rules</a>.</p></li>
<li><p>(Optional) Click <strong>Preview</strong> to view the data set you are loading.</p></li>
<li><p>Click <strong>Done</strong>. </p>
<p>The data uploads and processing begins. You can view the status of your uploads on the <strong>Data Feeds tab</strong>.</p>
<p><img src="images/upload_page.png" /></p></li>
</ol>
<p>From the <strong>Upload Files</strong> page you can also perform the following operations: </p>
<ul>
<li><p>Click <strong>Details</strong> for additional status information. </p></li>
<li><p>Click the <strong>Trash</strong> icon to delete the data source.</p></li>
<li><p>Click the red <strong>Stop</strong> icon to stop processing the data.</p></li>
</ul>
<p>When the processing of the uploaded file is complete, a notification displays. </p>
<h2><a id='live-feeds'></a>Live Feeds</h2>
<p>The <strong>Live Feeds</strong> data source option allows you to stream data from log files in formats such as log4j or JSON.</p>
<p>To add a <strong>Live Feed</strong> data source:</p>
<ol>
<li> Log in to Pivotal AX.</li>
<li> Select <strong>Data Sources</strong> from the left menu.</li>
<li> Select <strong>Live Feeds</strong> from the <strong>Add Data Sources</strong> menu.</li>
<li> Enter a name for the data source in the <strong>Source Name</strong> field.</li>
<li> Click <strong>Next</strong>.</li>
<li><p>Select a <strong>Data Format and Type</strong>.</p>
<p>See <a href="#data-upload-formats">Data Upload Formats</a>.</p></li>
<li><p>(Optional) Select an <strong>Extraction and Data Quality Rules</strong> option </p>
<p>See <a href="#extraction-data-quality">Extraction and Data Quality Rules</a>.</p></li>
<li><p>Click <strong>Next</strong>.</p></li>
<li><p>Select an <strong>Update Rule</strong>.</p>
<p>See <a href="#update-rules">Update Rules</a>. </p></li>
<li><p>(Optional) Click <strong>Preview</strong> to view the data set you are loading.</p></li>
<li><p>Click <strong>Done</strong>.</p></li>
</ol>
<h2><a id='amazon'></a>Amazon S3</h2>
<p>You can add data sources that are stored in Amazon S3 cloud storage service. Before configuring an S3 data source, you must obtain the following information from your S3 environment:</p>
<ul>
<li> Access Key</li>
<li> Secret Key</li>
<li> S3 Root URL</li>
</ul>
<p>To add an Amazon S3 data source:</p>
<ol>
<li> Log in to Pivotal AX.</li>
<li> Select <strong>Data Sources</strong> from the left menu.</li>
<li> Select <strong>Amazon S3</strong> from the <strong>Add Data Sources</strong> menu.</li>
<li> Enter your <strong>Access Key</strong>and <strong>Secret Key.</strong></li>
<li> In the <strong>S3 Root</strong>field, enter the URL to your Amazon S3 root.</li>
<li><p>Click <strong>Browse</strong> to locate the file in the S3 filesystem.</p>
<p><img src="images/s3.png" /> </p></li>
<li><p>Click <strong>Next</strong>.</p></li>
<li><p>Select a data format from the drop-down list or select <strong>Autodetect</strong>. </p>
<p>See <a href="#data-upload-formats">Data Upload Formats</a>.</p>
<p>You can click the <strong>Preview</strong> button to view the raw data you are uploading.</p></li>
<li><p>(Optional) Select an <strong>Extraction and Data Quality Rules</strong> option </p>
<p>See <a href="#extraction-data-quality">Extraction and Data Quality Rules</a>.</p></li>
<li><p>Click <strong>Next</strong>.</p></li>
<li><p>Select an <strong>Update Rule</strong>.</p>
<p>See <a href="#update-rules">Update Rules</a>. </p></li>
<li><p>(Optional) Click <strong>Preview</strong> to view the data set you are loading.</p></li>
<li><p>Click <strong>Next.</strong></p></li>
<li><p>In the <strong>Repeats</strong> drop-down list, select a schedule option. Your file will upload automatically using the schedule you specify. </p>
<p>See <a href="#scheduling-options">Scheduling Options</a>.</p></li>
<li><p>If you selected a scheduling option other than <strong>Run-immediately</strong>, select <strong>Incremental Processing</strong> to load only new data. </p></li>
<li><p>Click <strong>Done</strong>. </p>
<p>The data uploads and processing begins. You can view the status of your uploads on the <strong>Data Feeds tab</strong>. </p></li>
</ol>
<h2><a id='hdfs'></a> HDFS</h2>
<p>To add data stored in a Hadoop file system:</p>
<ol>
<li> Log in to Pivotal AX.</li>
<li> Select <strong>Data Sources</strong> from the left menu.</li>
<li> Select <strong>HDFS</strong> from the <strong>Add Data Sources</strong> menu.</li>
<li><p>Select the type of Hadoop distribution where your data is stored. </p>
<p>Chose one of the following options:</p>
<ul>
<li> Pivotal Hadoop Distribution 1.0</li>
<li> Cloudera Distribution for Hadoop 3</li>
<li> Cloudera Distribution for Hadoop 4</li>
</ul></li>
<li><p>Enter the <strong>HDFS Root</strong>. </p></li>
<li><p>Click the <strong>Browse</strong> button to select a file for uploading.</p></li>
<li><p>Select the <strong>Hadoop Processing</strong> option to process the data on the Hadoop cluster where the data source resides. </p>
<p>If you do not select this option, processing occurs only on Pivotal Analytic nodes, which may reduce performance.</p></li>
<li><p>Select the <strong>Auto Summarize</strong> option to automatically summarize events:</p>
<ul>
<li> When this option is selected, individual events are not indexed, but the resulting metadata size is much smaller.</li>
<li> When this option is not selected, individual events are indexed and will require provisioning of sufficient storage for metadata.</li>
</ul></li>
<li><p>Click <strong>Next</strong>.</p></li>
<li><p>Select a <strong>Data Format and Type</strong> from the drop-down list or select <strong>Autodetect</strong> . </p>
<p>See <a href="#data-upload-formats">Data Upload Formats</a> .</p></li>
<li><p>The <strong>Treat file contents as single-line events</strong> option is available. </p>
<p>See <a href="#single-line">Treating file contents as single-line events</a>. </p></li>
<li><p>(Optional) Select an <strong>Extraction and Data Quality Rules</strong> option </p>
<p>See <a href="#extraction-data-quality">Extraction and Data Quality Rules</a>.</p></li>
<li><p>Click <strong>Next</strong> .</p></li>
<li><p>Select <strong>Update Rules</strong> .</p>
<p>See <a href="#update-rules">Update Rules</a> .</p></li>
<li><p>In the <strong>Repeats</strong> drop-down list, select a schedule option. Your file will upload automatically using the schedule you specify. </p>
<p>See <a href="#scheduling-options">Scheduling Options</a>.</p></li>
<li><p>Click <strong>Done</strong> . </p></li>
</ol>
<h2><a id='ftp-sftp'></a>FTP/SFTP</h2>
<p>To upload data from files stored on FTP or SFTP sources:</p>
<ol>
<li> Log in to Pivotal AX.</li>
<li> Select <strong>Data Sources</strong> from the left menu.</li>
<li> Select <strong>FTP/SFTP</strong> from the <strong>Add Data Sources</strong> menu.</li>
<li> Select the type of file transport from the <strong>FTP/SFTP</strong> drop-down list, either FTP or SFTP.</li>
<li> Enter the <strong>Username</strong> for access to the FTP or SFTP account.</li>
<li> Enter the <strong>Password</strong> for access to the FTP or SFTP account.</li>
<li> Enter the FTP root directory URL.</li>
<li> Click the <strong>Browse</strong> button to select to the file you want to upload. </li>
<li><p>Select a <strong>Data Format and Type</strong> from the drop-down list or select <strong>Autodetect</strong>. </p>
<p>See <a href="#data-upload-formats">Data Upload Formats</a>.</p></li>
<li><p>Click <strong>Next</strong>. </p>
<p>You can click <strong>Preview</strong> to view the raw data you are uploading.</p></li>
<li><p>(Optional) Select an <strong>Extraction and Data Quality Rules</strong> option.</p>
<p>See <a href="#extraction-data-quality">Extraction and Data Quality Rules</a>.</p></li>
<li><p>Click <strong>Next</strong>.</p></li>
<li><p>Select <strong>Update Rules</strong>.</p>
<p>See <a href="#update-rules">Update Rules</a>.</p></li>
<li><p>Click <strong>Next</strong>. </p></li>
<li><p>In the <strong>Repeats</strong> drop-down list, select a schedule option. Your file will upload automatically using the schedule you specify. </p>
<p>See <a href="#scheduling-options">Scheduling Options</a> .</p></li>
<li><p>Click <strong>Done</strong>.</p></li>
</ol>
<h2><a id='gemfire'></a>GemFire</h2>
<p>You can add a data sources that retrieves data from a Pivotal GemFire region. Consult the GemFire documentation for more information about data access in GemFire.</p>
<ol>
<li> Log in to Pivotal AX.</li>
<li> Select <strong>Data Sources</strong> from the left menu.</li>
<li> Select <strong>GemFire Source</strong> from the <strong>Add Data Sources</strong> menu.</li>
<li> Enter a name for the source (for internal reference only) in the <strong>Source Name</strong> field. </li>
<li> Enter the host name of the GemFire installation in the <strong>GemFire Server Host</strong> field.</li>
<li> Enter the port number of the GemFire installation in the <strong>GemFire Server Port</strong> field.</li>
<li><p>Select the type of query from the following options:</p>
<ul>
<li> <strong>Continuous Query</strong>—Enter a query string for a GemFire continuous query.</li>
<li> <strong>One Time Query</strong>—Enter a query string for a query that is performed immediately. </li>
<li> <strong>Register for Events by regular expression</strong>—Enter a <strong>Region Name</strong> and a <strong>Regular Expression</strong> </li>
</ul></li>
<li><p>In the <strong>Repeats</strong> drop-down list, select a schedule option. </p>
<p>Your file will upload automatically using the schedule you specify. </p>
<p>See <a href="#scheduling-options">Scheduling Options</a> .</p></li>
<li><p>Click <strong>Done</strong>.</p></li>
</ol>
<h2><a id='application-feeds'></a>Application Feeds</h2>
<p>You can add data from running applications by coding those applications to send event messages to Pivotal AX. You define a data source in Pivotal AX and the system provides you with snippets of application code you can embed in your applications to send the event messages. Code is available for the following environments:</p>
<ul>
<li> <strong>JavaScript API</strong>—Use the Pivotal AX JavaScript API to write code that tracks the data you want to analyze.</li>
<li> <strong>JavaScript Page Visit Event</strong>—Use this option to track page views. Event data includes URLs visited, Page Title, Session Analysis, screen resolution, browser, and operating system information. </li>
<li> <strong>JavaScript Page DOM Click Event</strong>—Use this option to track mouse click events. </li>
<li> <strong>JavaScript eCommerce Event</strong>—Use this option to track ecommerce activities. Event data includes shopping carts, products, and custom variables. </li>
<li> <strong>JavaScript Game Tracking Event</strong>—Use this option to track user activites while playing a game, for example, advancing levels, invitations to friends, and purchasing of virtual goods.</li>
<li> <strong>JavaScript User Action Event</strong>—Use this option to track user actions on a site. </li>
<li> <strong>JavaScript Custom Event</strong>—Use this option to track custom events that you define. </li>
<li> <strong>REST API</strong>—You can write custom code using a REST API to send application events to Pivotal AX. </li>
</ul>
<p>To configure an <strong>Application Feed</strong>:</p>
<ol>
<li> Log in to Pivotal AX.</li>
<li> Select <strong>Data Sources</strong> from the left menu.</li>
<li> Select <strong>Application Feed</strong> from the <strong>Add Data Sources</strong> menu.</li>
<li> Enter a name for the application feed in the <strong>Application Name</strong> field.</li>
<li> Enter the URL of the application in the <strong>URL</strong> field.</li>
<li> (Optional) Enter a description of the application in the <strong>Description</strong> field.</li>
<li><p>Select the type of application. (The selection is for reference purposes only.)</p>
<p>You can choose from:</p>
<ul>
<li> WEB</li>
<li> MOBILE</li>
<li> FACEBOOK</li>
<li> DESKTOP</li>
<li> UNKNOWN</li>
</ul></li>
<li><p>Click <strong>Next</strong>.</p></li>
<li><p>Select an <strong>Update Rule</strong>.</p>
<p>See <a href="#update-rules">Update Rules</a>. </p></li>
<li><p>Click <strong>Done</strong>.</p>
<p>The <strong>Application Details</strong> screen displays. </p></li>
<li><p>Select the <strong>JavaScript</strong> tab and event type to copy application tracking code for any of the listed events. </p>
<p>You can paste this code into your application.</p>
<p>You can also select the <strong>REST API</strong> tab for information about adding Web services code using the REST API. </p>
<p>You can also access the API key from this page. </p></li>
<li><p>(Optional) Click <strong>Preview</strong> to preview the data from the application feed.</p></li>
<li><p>Click <strong>Close</strong>.</p></li>
</ol>
<h1><a id='resetting-all-data-sources'></a>Resetting All Data Sources</h1>
<p>You can reset all data sources so that the indexes are cleaned up. </p>
<p>To reset data sources:</p>
<ol>
<li> Log in to Pivotal AX.</li>
<li> Select <strong>Data Sources</strong> from the left menu.</li>
<li> Click the <strong>Reset All Data Sources</strong> link.</li>
</ol>
<h1><a id='data-ingestion-apis'></a>Data Ingestion APIs</h1>
<p>The Data Ingestion APIs are provided in the form of either a set of SDKs for various languages or RESTful APIs for integration of any client that can follow the RESTful API model. A summary of the APIs that can be used for ingestion are listed below (click on the corresponding link for more information):</p>
<ul>
<li> <a href="JavaScriptDataIngestSDK.html">Data Ingestion JavaScript SDK</a></li>
<li> <a href="DataIngestionRESTfulAPIs.html">Data Ingestion RESTful APIs</a></li>
<li> <a href="DataIngestionSDKJava.html">Data Ingestion Java SDK</a></li>
</ul>
</div><!-- end of body-container content-->
</div><!-- end of container -->
</div><!--end of container-fluid-->
</div><!--end of main-wrap-->
<div class="site-footer desktop-only">
<div class="container-fluid">
<div class="site-footer-links">
<span class="version"><a href='/'>Pivotal Documentation</a></span>
<span>©
<script>
var d = new Date();
document.write(d.getFullYear());
</script>
<a href='http://gopivotal.com'>Pivotal Software</a> Inc. All Rights Reserved.
</span>
</div>
</div>
</div>
<script type="text/javascript">
(function() {
var didInit = false;
function initMunchkin() {
if(didInit === false) {
didInit = true;
Munchkin.init('625-IUJ-009');
}
}
var s = document.createElement('script');
s.type = 'text/javascript';
s.async = true;
s.src = document.location.protocol + '//munchkin.marketo.net/munchkin.js';
s.onreadystatechange = function() {
if (this.readyState == 'complete' || this.readyState == 'loaded') {
initMunchkin();
}
};
s.onload = initMunchkin;
document.getElementsByTagName('head')[0].appendChild(s);
})();
</script>
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-39702075-1']);
_gaq.push(['_setDomainName', 'gopivotal.com']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
</body>
</html>