Skip to content

Commit

Permalink
Text edits
Browse files Browse the repository at this point in the history
  • Loading branch information
JaniceManwiller authored Nov 22, 2024
1 parent cc8c3c2 commit 2dec9d3
Showing 1 changed file with 26 additions and 26 deletions.
52 changes: 26 additions & 26 deletions tonic_textual/classes/parse_api_responses/file_parse_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@


class FileParseResult(object):
"""A class representing the result of a parsed file.
"""A class that represents the result of a parsed file.
Parameters
----------
Expand Down Expand Up @@ -101,7 +101,7 @@ def get_json(self) -> Dict:
Returns
-------
Dict
The raw JSON generated by Textual when parsing the file, as a dictionary.
The raw JSON that Textal generates when it parses the file, in the form of a dictionary.
"""

self.__get_content()
Expand All @@ -113,28 +113,28 @@ def get_markdown(
generator_default: PiiState = PiiState.Off,
random_seed: Optional[int] = None
) -> str:
"""Returns file in markdown format, redacted or synthesized based on config.
"""Returns the file in Markdown format. In the file, the entities are redacted or synthesized based on the specified configuration.
Parameters
----------
generator_config: Dict[str, PiiState]
A dictionary of sensitive data entities. For each entity, indicates whether
to redact, synthesize, or ignore it.
A dictionary of sensitive data entity types. For each entity type, indicates whether
to redact, synthesize, or ignore the detected entities.
Values must be one of "Redaction", "Synthesis", or "Off".
generator_default: PiiState = PiiState.Redaction
The default redaction used for all types not specified in generator_config.
Values must be one of "Redaction", "Synthesis", or "Off".
The default redaction to use for all entity types that not specified in generator_config.
Value must be one of "Redaction", "Synthesis", or "Off".
random_seed: Optional[int] = None
An optional value to use to override Textual's default random number
seeding. Can be used to ensure that different API calls use the same or
seeding. Can be used to ensure that different API calls use the same or
different random seeds.
Returns
-------
str
The file in markdown format, redacted or synthesized based on
The file in Markdown format. In the file, the entities are redacted or synthesized based on
generator_config and generator_default.
"""
self.__get_content()
Expand Down Expand Up @@ -172,7 +172,7 @@ def describe(self) -> str:
return f"{self.parsed_file_path}"

def get_all_entities(self) -> List[SingleDetectionResult]:
"""Returns a list of all the detected entities in the file.
"""Returns a list of all of the detected entities in the file.
Returns
-------
Expand All @@ -194,12 +194,12 @@ def get_entities(
Parameters
----------
generator_default: PiiState
The default redaction used for all types not specified in generator_config.
Values must be one of "Redaction", "Synthesis", or "Off".
The default redaction to use for all entity types that not specified in generator_config.
Value must be one of "Redaction", "Synthesis", or "Off".
generator_config: Dict[str, PiiState]
A dictionary of sensitive data entities. For each entity, indicates whether
to redact, synthesize, or ignore it.
A dictionary of sensitive data entity types. For each entity type, indicates whether
to redact, synthesize, or ignore the detected entities.
Values must be one of "Redaction", "Synthesis", or "Off".
Returns
Expand Down Expand Up @@ -233,7 +233,7 @@ def get_entities(
def is_sensitive(
self, sensitive_entity_types: List[str], start: int = 0, end: int = -1
) -> bool:
"""Returns True if the element contains sensitive data, False otherwise.
"""Returns True if the element contains sensitive data. Otherwise returns False.
Parameters
----------
Expand All @@ -249,7 +249,7 @@ def is_sensitive(
Returns
-------
bool
True if the element contains sensitive data, False otherwise.
Returns True if the element contains sensitive data. Otherwise returns False.
"""
all_entities = self.get_entities()
entities = [
Expand All @@ -259,7 +259,7 @@ def is_sensitive(
return len(filtered_entities) > 0

def get_tables(self) -> List[Table]:
"""Returns a list of tables found in document. This is applicable to CSV, XLSX, PDF, and images
"""Returns a list of tables found in the document. Applies to CSV, XLSX, PDF, and image files.
Parameters
----------
Expand All @@ -275,7 +275,7 @@ def get_tables(self) -> List[Table]:
Returns
-------
bool
True if the element contains sensitive data, False otherwise.
Returns True if the element contains sensitive data. Otherwise returns False.
"""
self.__get_content()

Expand Down Expand Up @@ -315,11 +315,11 @@ def _find_intersecting_entites(
The start index to check for intersecting entities.
end: int
The end index to check for intersecting entities. If -1, returns all
entities that occur after start
entities that occur after start.
Returns
-------
List[Dict]
A list of entities that intersect with the given start and end indices
A list of entities that intersect with the given start and end indices.
"""
intersecting_entities = []
for entity in entities:
Expand All @@ -337,7 +337,7 @@ def get_chunks(
metadata_entities: List[str] = [],
include_metadata=True,
) -> List:
"""Returns a list of chunks of text from the document. The chunks are filtered
"""Returns a list of chunks of text from the document. The chunks are filtered
by the generator_default configuration.
Parameters
Expand All @@ -346,21 +346,21 @@ def get_chunks(
The maximum number of characters in each chunk.
generator_config: Dict[str, PiiState]
A dictionary of sensitive data entities. For each entity, indicates
whether to redact, synthesize, or ignore it.
A dictionary of sensitive data entity types. For each entity type, indicates
whether to redact, synthesize, or ignore the detected entities.
Values must be one of "Redaction", "Synthesis", or "Off".
generator_default: PiiState = PiiState.Redaction
The default redaction used for all types not specified in
The default redaction to use for all entity types that are not specified in
generator_config.
Values must be one of "Redaction", "Synthesis", or "Off".
Value must be one of "Redaction", "Synthesis", or "Off".
include_metadata: bool = True
If True, the metadata is included in the chunk.
Returns:
List[str]
A list of strings containing the chunks of text.
A list of strings that contain the chunks of text.
"""
text = self.get_markdown(generator_config, generator_default)
all_entities = self.get_all_entities()
Expand Down

0 comments on commit 2dec9d3

Please sign in to comment.