-
Notifications
You must be signed in to change notification settings - Fork 299
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support
elasticsearch.byte_size
(#1852)
* Modified code for `byte_size` parameter 1. Sending byte size limit parameter to bulk processor 2. Added a condition to check if total byte size of bulk request has reached byte size limit and accordingly sending bulk request * Refactor the code of the bulk framework The bulk request is now aware of its own limits with `FsCrawlerBulkRequest#isOverTheLimit()`. The bulk request computes itself its current size when adding a new entity (`FsCrawlerBulkRequest#add(T request)`). We are adding a lot of unit tests for both `FsCrawlerBulkRequest` and the `FsCrawlerBulkProcessor`. Closes #1835. --------- Co-authored-by: kamal-sharma <[email protected]>
- Loading branch information
1 parent
c35bc18
commit 43dc79a
Showing
12 changed files
with
394 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
158 changes: 158 additions & 0 deletions
158
...st/java/fr/pilato/elasticsearch/crawler/fs/framework/bulk/FsCrawlerBulkProcessorTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
/* | ||
* Licensed to David Pilato under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package fr.pilato.elasticsearch.crawler.fs.framework.bulk; | ||
|
||
import fr.pilato.elasticsearch.crawler.fs.framework.ByteSizeUnit; | ||
import fr.pilato.elasticsearch.crawler.fs.framework.ByteSizeValue; | ||
import fr.pilato.elasticsearch.crawler.fs.framework.TimeValue; | ||
import fr.pilato.elasticsearch.crawler.fs.test.framework.AbstractFSCrawlerTestCase; | ||
import org.junit.Test; | ||
|
||
import java.io.IOException; | ||
|
||
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween; | ||
import static fr.pilato.elasticsearch.crawler.fs.framework.JsonUtil.serialize; | ||
import static org.junit.Assert.*; | ||
|
||
public class FsCrawlerBulkProcessorTest extends AbstractFSCrawlerTestCase { | ||
private static final TestBean PAYLOAD = new TestBean("bar"); | ||
private static final int PAYLOAD_SIZE = serialize(PAYLOAD).getBytes().length + 12 /* for the json payload field overhead */; | ||
|
||
@Test | ||
public void testBulkProcessorMaxActions() throws IOException { | ||
int maxActions = randomIntBetween(1, 1000); | ||
TestBulkListener listener = new TestBulkListener(); | ||
FsCrawlerBulkProcessor<TestOperation, TestBulkRequest, TestBulkResponse> bulkProcessor = | ||
new FsCrawlerBulkProcessor<>( | ||
new TestEngine(), | ||
listener, | ||
maxActions, | ||
null, | ||
new ByteSizeValue(1, ByteSizeUnit.MB), | ||
TestBulkRequest::new); | ||
|
||
generatePayload(bulkProcessor, 1, maxActions - 1); | ||
assertEquals(0, listener.nbSuccessfulExecutions); | ||
generatePayload(bulkProcessor, maxActions, 1); | ||
assertEquals(1, listener.nbSuccessfulExecutions); | ||
generatePayload(bulkProcessor, maxActions + 1, 1); | ||
bulkProcessor.close(); | ||
assertEquals(2, listener.nbSuccessfulExecutions); | ||
} | ||
|
||
@Test | ||
public void testBulkProcessorNullSize() throws IOException { | ||
int maxActions = randomIntBetween(1, 1000); | ||
TestBulkListener listener = new TestBulkListener(); | ||
FsCrawlerBulkProcessor<TestOperation, TestBulkRequest, TestBulkResponse> bulkProcessor = | ||
new FsCrawlerBulkProcessor<>( | ||
new TestEngine(), | ||
listener, | ||
maxActions, | ||
null, | ||
null, | ||
TestBulkRequest::new); | ||
|
||
generatePayload(bulkProcessor, 1, maxActions - 1); | ||
assertEquals(0, listener.nbSuccessfulExecutions); | ||
generatePayload(bulkProcessor, maxActions, 1); | ||
assertEquals(1, listener.nbSuccessfulExecutions); | ||
generatePayload(bulkProcessor, maxActions + 1, 1); | ||
bulkProcessor.close(); | ||
assertEquals(2, listener.nbSuccessfulExecutions); | ||
} | ||
|
||
@Test | ||
public void testBulkProcessorZeroSize() throws IOException { | ||
int maxActions = randomIntBetween(1, 1000); | ||
TestBulkListener listener = new TestBulkListener(); | ||
FsCrawlerBulkProcessor<TestOperation, TestBulkRequest, TestBulkResponse> bulkProcessor = | ||
new FsCrawlerBulkProcessor<>( | ||
new TestEngine(), | ||
listener, | ||
maxActions, | ||
null, | ||
new ByteSizeValue(0, ByteSizeUnit.MB), | ||
TestBulkRequest::new); | ||
|
||
generatePayload(bulkProcessor, 1, maxActions - 1); | ||
assertEquals(0, listener.nbSuccessfulExecutions); | ||
generatePayload(bulkProcessor, maxActions, 1); | ||
assertEquals(1, listener.nbSuccessfulExecutions); | ||
generatePayload(bulkProcessor, maxActions + 1, 1); | ||
bulkProcessor.close(); | ||
assertEquals(2, listener.nbSuccessfulExecutions); | ||
} | ||
|
||
@Test | ||
public void testBulkProcessorMaxSize() throws IOException { | ||
int maxActions = randomIntBetween(1, 1000); | ||
TestBulkListener listener = new TestBulkListener(); | ||
FsCrawlerBulkProcessor<TestOperation, TestBulkRequest, TestBulkResponse> bulkProcessor = | ||
new FsCrawlerBulkProcessor<>( | ||
new TestEngine(), | ||
listener, | ||
0, | ||
null, | ||
new ByteSizeValue((long) maxActions * PAYLOAD_SIZE, ByteSizeUnit.BYTES), | ||
TestBulkRequest::new); | ||
|
||
generatePayload(bulkProcessor, 1, maxActions - 1); | ||
assertEquals(0, listener.nbSuccessfulExecutions); | ||
generatePayload(bulkProcessor, maxActions, 1); | ||
assertEquals(1, listener.nbSuccessfulExecutions); | ||
generatePayload(bulkProcessor, maxActions + 1, maxActions - 1); | ||
assertEquals(1, listener.nbSuccessfulExecutions); | ||
generatePayload(bulkProcessor, 2 * maxActions, 1); | ||
assertEquals(2, listener.nbSuccessfulExecutions); | ||
generatePayload(bulkProcessor, 2 * maxActions + 1, 1); | ||
bulkProcessor.close(); | ||
assertEquals(3, listener.nbSuccessfulExecutions); | ||
} | ||
|
||
@Test | ||
public void testBulkProcessorFlushInterval() throws IOException, InterruptedException { | ||
int maxActions = randomIntBetween(1, 1000); | ||
TimeValue flushInterval = TimeValue.timeValueMillis(randomIntBetween(500, 2000)); | ||
TestBulkListener listener = new TestBulkListener(); | ||
FsCrawlerBulkProcessor<TestOperation, TestBulkRequest, TestBulkResponse> bulkProcessor = | ||
new FsCrawlerBulkProcessor<>(new TestEngine(), listener, 0, flushInterval, null, TestBulkRequest::new); | ||
|
||
// We don't load immediately the bulk processor | ||
Thread.sleep(100); | ||
|
||
generatePayload(bulkProcessor, 1, maxActions); | ||
assertEquals(0, listener.nbSuccessfulExecutions); | ||
|
||
Thread.sleep(flushInterval.millis()); | ||
|
||
assertEquals(1, listener.nbSuccessfulExecutions); | ||
bulkProcessor.close(); | ||
assertEquals(1, listener.nbSuccessfulExecutions); | ||
} | ||
|
||
private void generatePayload(FsCrawlerBulkProcessor<TestOperation, TestBulkRequest, TestBulkResponse> bulkProcessor, int start, int size) { | ||
for (int i = start; i < start + size; i++) { | ||
logger.trace("Adding a new operation [{}]", i); | ||
bulkProcessor.add(new TestOperation(PAYLOAD)); | ||
} | ||
} | ||
|
||
} |
Oops, something went wrong.