Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for allow_list, allow_list_match, regex_flags in REST API #1478

Merged
merged 10 commits into from
Nov 11, 2024
92 changes: 92 additions & 0 deletions e2e-tests/tests/test_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,3 +480,95 @@ def test_given_ad_hoc_deny_list_recognizer_the_right_entities_are_returned():
assert equal_json_strings(
expected_response, response_content, ignore_keys=["recognition_metadata"]
)


@pytest.mark.api
def test_given_allow_list_then_no_entity_is_returned():
request_body = """
{
"text": "email: [email protected]",
"language": "en",
"allow_list": ["[email protected]"]
}
"""

response_status, response_content = analyze(request_body)

expected_response = """
[]
"""
assert response_status == 200
assert equal_json_strings(
expected_response, response_content
)


@pytest.mark.api
def test_given_allow_list_with_regex_match_then_no_entity_is_returned():
request_body = """
{
"text": "email: [email protected]",
"language": "en",
"allow_list": [".*@github.com"],
"allow_list_match": "regex"
}
"""

response_status, response_content = analyze(request_body)

expected_response = """
[]
"""
assert response_status == 200
assert equal_json_strings(
expected_response, response_content
)


@pytest.mark.api
def test_given_allow_list_without_setting_allow_list_match_then_normal_entity_is_returned():
request_body = """
{
"text": "email: [email protected]",
"language": "en",
"allow_list": [".*@github.com"]
}
"""

response_status, response_content = analyze(request_body)

expected_response = """
[
{"entity_type": "EMAIL_ADDRESS", "start": 7, "end": 23, "score": 0.85, "analysis_explanation":null}
]
"""
assert response_status == 200
assert equal_json_strings(
expected_response, response_content, ignore_keys=["recognition_metadata"]
)


@pytest.mark.api
def test_given_regex_flags_and_normal_entities_are_returned():
# case sensitive flags are turned off, GitHub != github
request_body = """
{
"text": "email: [email protected]",
"language": "en",
"allow_list": [".*@github.com"],
"allow_list_match": "regex",
"regex_flags": 0
}
"""

response_status, response_content = analyze(request_body)

expected_response = """
[
omri374 marked this conversation as resolved.
Show resolved Hide resolved
{"entity_type": "EMAIL_ADDRESS", "start": 7, "end": 23, "score": 0.85, "analysis_explanation":null}
]
"""
assert response_status == 200
assert equal_json_strings(
expected_response, response_content, ignore_keys=["recognition_metadata"]
)
3 changes: 3 additions & 0 deletions presidio-analyzer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def analyze() -> Tuple[str, int]:
return_decision_process=req_data.return_decision_process,
ad_hoc_recognizers=req_data.ad_hoc_recognizers,
context=req_data.context,
allow_list=req_data.allow_list,
allow_list_match=req_data.allow_list_match,
regex_flags=req_data.regex_flags
)

return Response(
Expand Down
5 changes: 5 additions & 0 deletions presidio-analyzer/presidio_analyzer/analyzer_request.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from typing import Dict

from presidio_analyzer import PatternRecognizer
Expand Down Expand Up @@ -34,3 +35,7 @@ def __init__(self, req_data: Dict):
PatternRecognizer.from_dict(rec) for rec in ad_hoc_recognizers
]
self.context = req_data.get("context")
self.allow_list = req_data.get("allow_list")
self.allow_list_match = req_data.get("allow_list_match", "exact")
self.regex_flags = req_data.get("regex_flags",
re.DOTALL | re.MULTILINE | re.IGNORECASE)
Loading