-
Notifications
You must be signed in to change notification settings - Fork 124
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Initial commit adding support for reading avro files. Fixes #100. * Updated introduction. * Added testcases and added method to start the processing of a while when you don't open in the context of the InputFile. * Added check to test case to call out when a resource file is not found. * Added testing data for Avro files. * Code cleanup. * Updated documentation about installation. * Updated to new parent pom to correct jdk 11 issues. Fixes #170.
- Loading branch information
1 parent
10dcc69
commit 011238a
Showing
13 changed files
with
7,308 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# | ||
# Copyright © 2016 Jeremy Custenborder ([email protected]) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
name=AvroSpoolDir | ||
tasks.max=1 | ||
connector.class=com.github.jcustenborder.kafka.connect.spooldir.SpoolDirAvroSourceConnector | ||
input.file.pattern=^.*\.avro$ | ||
|
||
halt.on.error=false | ||
topic=testing | ||
|
||
input.path=/Users/jeremy/data/stackoverflow | ||
finished.path=/tmp/spooldir/finished | ||
error.path=/tmp/spooldir/error | ||
batch.size = 5000 | ||
cleanup.policy = NONE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
47 changes: 47 additions & 0 deletions
47
...ain/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceConnector.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/** | ||
* Copyright © 2016 Jeremy Custenborder ([email protected]) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.github.jcustenborder.kafka.connect.spooldir; | ||
|
||
import com.github.jcustenborder.kafka.connect.utils.config.Description; | ||
import com.github.jcustenborder.kafka.connect.utils.config.DocumentationImportant; | ||
import com.github.jcustenborder.kafka.connect.utils.config.Title; | ||
import org.apache.kafka.common.config.ConfigDef; | ||
import org.apache.kafka.connect.connector.Task; | ||
|
||
import java.util.Map; | ||
|
||
@Title("Avro Source Connector") | ||
@Description("This connector is used to read avro data files from the file system and write their contents " + | ||
"to Kafka. The schema of the file is used to read the data and produce it to Kafka") | ||
@DocumentationImportant("This connector has a dependency on the Confluent Schema Registry specifically kafka-connect-avro-converter. " + | ||
"This dependency is not shipped along with the connector to ensure that there are not potential version mismatch issues. " + | ||
"The easiest way to ensure this component is available is to use one of the Confluent packages or containers for deployment.") | ||
public class SpoolDirAvroSourceConnector extends AbstractSourceConnector<SpoolDirAvroSourceConnectorConfig> { | ||
@Override | ||
protected SpoolDirAvroSourceConnectorConfig config(Map<String, ?> settings) { | ||
return new SpoolDirAvroSourceConnectorConfig(settings); | ||
} | ||
|
||
@Override | ||
public Class<? extends Task> taskClass() { | ||
return SpoolDirAvroSourceTask.class; | ||
} | ||
|
||
@Override | ||
public ConfigDef config() { | ||
return SpoolDirAvroSourceConnectorConfig.config(); | ||
} | ||
} |
32 changes: 32 additions & 0 deletions
32
...va/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceConnectorConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/** | ||
* Copyright © 2016 Jeremy Custenborder ([email protected]) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.github.jcustenborder.kafka.connect.spooldir; | ||
|
||
import org.apache.kafka.common.config.ConfigDef; | ||
|
||
import java.util.Map; | ||
|
||
public class SpoolDirAvroSourceConnectorConfig extends AbstractSourceConnectorConfig { | ||
|
||
|
||
public SpoolDirAvroSourceConnectorConfig(Map<?, ?> originals) { | ||
super(config(), originals, true); | ||
} | ||
|
||
public static ConfigDef config() { | ||
return AbstractSourceConnectorConfig.config(true); | ||
} | ||
} |
84 changes: 84 additions & 0 deletions
84
src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceTask.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/** | ||
* Copyright © 2016 Jeremy Custenborder ([email protected]) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.github.jcustenborder.kafka.connect.spooldir; | ||
|
||
import io.confluent.connect.avro.AvroData; | ||
import org.apache.avro.file.DataFileReader; | ||
import org.apache.avro.generic.GenericContainer; | ||
import org.apache.avro.generic.GenericDatumReader; | ||
import org.apache.avro.io.DatumReader; | ||
import org.apache.kafka.connect.data.SchemaAndValue; | ||
import org.apache.kafka.connect.source.SourceRecord; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
public class SpoolDirAvroSourceTask extends AbstractSourceTask<SpoolDirAvroSourceConnectorConfig> { | ||
private static final Logger log = LoggerFactory.getLogger(SpoolDirAvroSourceTask.class); | ||
long recordOffset; | ||
AvroData avroData = new AvroData(1024); | ||
DataFileReader<GenericContainer> dataFileReader; | ||
DatumReader<GenericContainer> datumReader = new GenericDatumReader<>(); | ||
|
||
|
||
@Override | ||
protected SpoolDirAvroSourceConnectorConfig config(Map<String, ?> settings) { | ||
return new SpoolDirAvroSourceConnectorConfig(settings); | ||
} | ||
|
||
@Override | ||
protected void configure(InputFile inputFile, Long lastOffset) throws IOException { | ||
if (null != this.dataFileReader) { | ||
this.dataFileReader.close(); | ||
} | ||
inputFile.startProcessing(); | ||
this.dataFileReader = new DataFileReader<>(inputFile.file(), datumReader); | ||
this.recordOffset = 0; | ||
|
||
if (null != lastOffset) { | ||
while (recordOffset < lastOffset && this.dataFileReader.hasNext()) { | ||
this.dataFileReader.next(); | ||
recordOffset++; | ||
} | ||
} | ||
|
||
} | ||
|
||
@Override | ||
protected List<SourceRecord> process() throws IOException { | ||
int recordCount = 0; | ||
List<SourceRecord> records = new ArrayList<>(this.config.batchSize); | ||
GenericContainer container = null; | ||
while (recordCount <= this.config.batchSize && dataFileReader.hasNext()) { | ||
container = dataFileReader.next(container); | ||
SchemaAndValue value = avroData.toConnectData(this.dataFileReader.getSchema(), container); | ||
SourceRecord sourceRecord = record(null, value, null); | ||
records.add(sourceRecord); | ||
recordCount++; | ||
recordOffset++; | ||
} | ||
return records; | ||
} | ||
|
||
@Override | ||
protected long recordOffset() { | ||
return recordOffset; | ||
} | ||
} |
Oops, something went wrong.