Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/transform fields #294

Merged
merged 4 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/main/java/com/datastax/cdm/feature/ExtractJson.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public class ExtractJson extends AbstractFeature {

private String targetColumnName = "";
private Integer targetColumnIndex = -1;
private boolean overwriteTarget = false;

@Override
public boolean loadProperties(IPropertyHelper helper) {
Expand All @@ -49,6 +50,8 @@ public boolean loadProperties(IPropertyHelper helper) {

originColumnName = getColumnName(helper, KnownProperties.EXTRACT_JSON_ORIGIN_COLUMN_NAME);
targetColumnName = getColumnName(helper, KnownProperties.EXTRACT_JSON_TARGET_COLUMN_MAPPING);
overwriteTarget = helper.getBoolean(KnownProperties.EXTRACT_JSON_TARGET_OVERWRITE);

// Convert columnToFieldMapping to targetColumnName and originJsonFieldName
if (!targetColumnName.isBlank()) {
String[] parts = targetColumnName.split("\\:");
Expand Down Expand Up @@ -146,6 +149,10 @@ public String getTargetColumnName() {
return isEnabled ? targetColumnName : "";
}

public boolean overwriteTarget() {
return overwriteTarget;
}

private String getColumnName(IPropertyHelper helper, String colName) {
String columnName = CqlTable.unFormatName(helper.getString(colName));
return (null == columnName) ? "" : columnName;
Expand Down
11 changes: 9 additions & 2 deletions src/main/java/com/datastax/cdm/job/DiffJobSession.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ public class DiffJobSession extends CopyJobSession {
boolean logDebug = logger.isDebugEnabled();
boolean logTrace = logger.isTraceEnabled();
private ExtractJson extractJsonFeature;
private boolean overwriteTarget;

public DiffJobSession(CqlSession originSession, CqlSession targetSession, SparkConf sc) {
super(originSession, targetSession, sc);
Expand Down Expand Up @@ -111,6 +112,7 @@ public DiffJobSession(CqlSession originSession, CqlSession targetSession, SparkC
}

extractJsonFeature = (ExtractJson) this.targetSession.getCqlTable().getFeature(Featureset.EXTRACT_JSON);
overwriteTarget = extractJsonFeature.isEnabled() && extractJsonFeature.overwriteTarget();

logger.info("CQL -- origin select: {}", this.originSession.getOriginSelectByPartitionRangeStatement().getCQL());
logger.info("CQL -- target select: {}", this.targetSession.getTargetSelectByPKStatement().getCQL());
Expand Down Expand Up @@ -270,7 +272,13 @@ private String isDifferent(Record record) {
logger.trace("PK {}, targetIndex {} skipping constant column {}", pk, targetIndex,
targetColumnNames.get(targetIndex));
return; // nothing to compare in origin
} else if (targetIndex == extractJsonFeature.getTargetColumnIndex()) {
}

targetAsOriginType = targetSession.getCqlTable().getAndConvertData(targetIndex, targetRow);
if (targetIndex == extractJsonFeature.getTargetColumnIndex()) {
if (!overwriteTarget && null != targetAsOriginType) {
return; // skip validation when target has data
}
originIndex = extractJsonFeature.getOriginColumnIndex();
origin = extractJsonFeature.extract(originRow.getString(originIndex));
} else {
Expand Down Expand Up @@ -301,7 +309,6 @@ private String isDifferent(Record record) {
+ explodeMapKeyIndex + ", valueIndex:" + explodeMapValueIndex + ")");
}
}
targetAsOriginType = targetSession.getCqlTable().getAndConvertData(targetIndex, targetRow);

if (logDebug)
logger.debug(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,13 +243,15 @@ public enum PropertyType {
public static final String EXTRACT_JSON_EXCLUSIVE = "spark.cdm.feature.extractJson.exclusive";
public static final String EXTRACT_JSON_ORIGIN_COLUMN_NAME = "spark.cdm.feature.extractJson.originColumn";
public static final String EXTRACT_JSON_TARGET_COLUMN_MAPPING = "spark.cdm.feature.extractJson.propertyMapping";
public static final String EXTRACT_JSON_TARGET_OVERWRITE = "spark.cdm.feature.extractJson.overwrite";

static {
types.put(EXTRACT_JSON_EXCLUSIVE, PropertyType.BOOLEAN);
defaults.put(EXTRACT_JSON_EXCLUSIVE, "false");
types.put(EXTRACT_JSON_ORIGIN_COLUMN_NAME, PropertyType.STRING);
types.put(EXTRACT_JSON_TARGET_COLUMN_MAPPING, PropertyType.STRING);
}
types.put(EXTRACT_JSON_TARGET_OVERWRITE, PropertyType.BOOLEAN);
defaults.put(EXTRACT_JSON_TARGET_OVERWRITE, "false"); }

// ==========================================================================
// Guardrail Feature
Expand Down
6 changes: 6 additions & 0 deletions src/resources/cdm-detailed.properties
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,16 @@ spark.cdm.perfops.ratelimit.target 20000
# - If the specified JSON property does not exist in the JSON content, the Target column
# will be set to null.
# Note: This feature currently supports extraction of only one JSON property.
#
# .overwrite Default is false. This property only applies to Validation run (NA for Migration)
# When set to true, the extracted JSON value will overwrite any existing value in the
# Target column during Validation. False will skip validation if the Target column has
# any non-null value.
#-----------------------------------------------------------------------------------------------------------
#spark.cdm.feature.extractJson.exclusive false
#spark.cdm.feature.extractJson.originColumn origin_columnname_with_json_content
#spark.cdm.feature.extractJson.propertyMapping origin_json_propertyname:target_columnname
#spark.cdm.feature.extractJson.overwrite false

#===========================================================================================================
# Guardrail feature manages records that exceed guardrail checks. The Guardrail job will generate a
Expand Down
14 changes: 14 additions & 0 deletions src/test/java/com/datastax/cdm/feature/ExtractJsonTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public class ExtractJsonTest {

String standardOriginName = "content";
String standardTargetName = "age";
String mappedTargetName = "personAge:person_age";

@BeforeEach
public void setup() {
Expand Down Expand Up @@ -98,9 +99,22 @@ public void loadProperties() {
assertAll(
() -> assertTrue(loaded, "properties are loaded and valid"),
() -> assertTrue(feature.isEnabled()),
() -> assertFalse(feature.overwriteTarget()),
() -> assertEquals(standardTargetName, feature.getTargetColumnName())
);
}

@Test
public void loadPropertiesWithMapping() {
when(propertyHelper.getString(KnownProperties.EXTRACT_JSON_TARGET_COLUMN_MAPPING)).thenReturn(mappedTargetName);
boolean loaded = feature.loadProperties(propertyHelper);

assertAll(
() -> assertTrue(loaded, "properties are loaded and valid"),
() -> assertTrue(feature.isEnabled()),
() -> assertEquals("person_age", feature.getTargetColumnName())
);
}

@Test
public void loadPropertiesException() {
Expand Down
18 changes: 18 additions & 0 deletions src/test/java/com/datastax/cdm/feature/TrackRunTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.datastax.cdm.feature;
msmygit marked this conversation as resolved.
Show resolved Hide resolved

import static org.junit.jupiter.api.Assertions.assertEquals;

import org.junit.jupiter.api.Test;

class TrackRunTest {

@Test
void test() {
assertEquals("MIGRATE", TrackRun.RUN_TYPE.MIGRATE.name());
assertEquals("DIFF_DATA", TrackRun.RUN_TYPE.DIFF_DATA.name());

assertEquals(2, TrackRun.RUN_TYPE.values().length);
assertEquals(5, TrackRun.RUN_STATUS.values().length);
}

}