diff --git a/PII DATA/ReadME.md b/PII DATA/ReadME.md new file mode 100644 index 00000000..9e7522c6 --- /dev/null +++ b/PII DATA/ReadME.md @@ -0,0 +1,36 @@ +PII Data Detector + +Overview + +The PII Data Detector is a simple Python script designed to identify Personally Identifiable Information (PII) elements within a text file. PII elements include sensitive data such as email addresses, phone numbers, and Social Security Numbers (SSNs). This tool can be useful for data security, compliance checks, and data anonymization. + +Purpose + +The main purpose of this program is to help users easily scan through text files and identify any potential PII data. It can be particularly useful for: + +Ensuring that documents do not contain sensitive information before sharing. + +Auditing data files for compliance with data protection regulations +Assisting in data anonymization processes. + +Technologies Used + +The program is written in Python and utilizes the following technologies: + +Python Standard Library: + +The script is built using Python's built-in libraries, primarily re for regular expressions, which allows pattern matching in text. + +Regular Expressions (Regex): + +Regex patterns are used to identify PII elements such as emails, phone numbers, and SSNs in the text file. + +Running the Script + +Open your terminal or command prompt. + +Navigate to the directory where the pii_detector.py file is located. + +Run the script by entering the following command: + +python p2data.py diff --git a/PII DATA/p2data.py b/PII DATA/p2data.py new file mode 100644 index 00000000..58d679e3 --- /dev/null +++ b/PII DATA/p2data.py @@ -0,0 +1,40 @@ +import re + +def load_text(file_path): + """Reads the content of the file and returns it line by line.""" + try: + with open(file_path, 'r') as file: + return file.readlines() + except FileNotFoundError: + print(f"Error: The file '{file_path}' was not found.") + return [] + +def find_pii(text_lines): + """Scans each line for PII data such as emails, phone numbers, and SSNs.""" + email_regex = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}" + phone_regex = r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b" + ssn_regex = r"\b\d{3}-\d{2}-\d{4}\b" + + for line in text_lines: + search_and_print("email", email_regex, line) + search_and_print("phone number", phone_regex, line) + search_and_print("SSN", ssn_regex, line) + +def search_and_print(data_type, pattern, text): + """Searches for a pattern in the text and prints any matches found.""" + matches = re.findall(pattern, text) + for match in matches: + print(f"Found {data_type}: {match}") + +def main(): + print("Welcome to PII Data Detector!") + file_path = input("Enter the file path to scan for PII data: ").strip() + + text_lines = load_text(file_path) + if text_lines: + find_pii(text_lines) + else: + print("No content to scan.") + +if __name__ == "__main__": + main()