diff --git a/src/datatrove/pipeline/filters/language_filter.py b/src/datatrove/pipeline/filters/language_filter.py index f43e5153..e57dce07 100644 --- a/src/datatrove/pipeline/filters/language_filter.py +++ b/src/datatrove/pipeline/filters/language_filter.py @@ -33,7 +33,7 @@ def __init__( super().__init__(exclusion_writer) self.language_threshold = language_threshold if isinstance(languages, str): - languages = list(languages) + languages = [languages] self.languages = languages self.backend = backend self.model = FT176LID(languages) if backend == "ft176" else GlotLID(languages)