diff --git a/pandas_schema/column.py b/pandas_schema/column.py index 199b883..5416cc0 100644 --- a/pandas_schema/column.py +++ b/pandas_schema/column.py @@ -5,7 +5,9 @@ from .validation_warning import ValidationWarning class Column: - def __init__(self, name: str, validations: typing.Iterable['validation._BaseValidation'] = [], allow_empty=False): + def __init__(self, name: str, validations: typing.Iterable['validation._BaseValidation'] = [], + allow_empty=False, + optional=False): """ Creates a new Column object @@ -16,6 +18,7 @@ def __init__(self, name: str, validations: typing.Iterable['validation._BaseVali self.name = name self.validations = list(validations) self.allow_empty = allow_empty + self.optional = optional def validate(self, series: pd.Series) -> typing.List[ValidationWarning]: """ diff --git a/pandas_schema/schema.py b/pandas_schema/schema.py index 5c0442e..f8a4c26 100644 --- a/pandas_schema/schema.py +++ b/pandas_schema/schema.py @@ -73,7 +73,7 @@ def validate(self, df: pd.DataFrame, columns: typing.List[str] = None) -> typing for column in columns_to_pair: # Throw an error if the schema column isn't in the data frame - if column.name not in df: + if column.name not in df and not column.optional: errors.append(ValidationWarning( 'The column {} exists in the schema but not in the data frame'.format(column.name))) return errors diff --git a/pandas_schema/validation.py b/pandas_schema/validation.py index 5f7c763..6b8ba14 100644 --- a/pandas_schema/validation.py +++ b/pandas_schema/validation.py @@ -90,7 +90,8 @@ def get_errors(self, series: pd.Series, column: 'column.Column'): validated = ~series.isnull() & simple_validation else: validated = (series.str.len() > 0) & simple_validation - + elif column.optional and (bool(series.isnull().all()) or list(series.unique()) == ['']): + validated = [] else: validated = simple_validation diff --git a/test/test_validation.py b/test/test_validation.py index fc40100..b4f2851 100644 --- a/test/test_validation.py +++ b/test/test_validation.py @@ -660,6 +660,18 @@ def test_in_range_allow_empty_false_with_error(self): errors = validator.get_errors(pd.Series(self.vals), Column('', allow_empty=False)) self.assertEqual(len(errors), len(self.vals)) + def test_in_range_optional_missing(self): + validator = InRangeValidation(min=0) + errors = validator.get_errors(pd.Series(), Column('', optional=True)) + + self.assertEqual(len(errors), 0) + + def test_in_range_optional_with_error(self): + validator = InRangeValidation(min=4) + errors = validator.get_errors(pd.Series(self.vals), Column('', optional=False)) + + self.assertEqual(len(errors), len(self.vals)) + class PandasDtypeTests(ValidationTestBase): """