Skip to content

Commit

Permalink
Updated config.ini with POIs and data_fetch script to load enron data…
Browse files Browse the repository at this point in the history
…frame conversion and poi load
  • Loading branch information
advaithsrao committed Oct 17, 2023
1 parent 6dc3cf4 commit b4b9253
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 144 deletions.
147 changes: 6 additions & 141 deletions config.ini
Original file line number Diff line number Diff line change
@@ -1,144 +1,9 @@
[data]
; path to the local enron dataset
; enron = <>
;replace enron = <local_path_of_enron_dataset> in the below line to make the utils->data_fetch.py->LoadEnronData() work to pull enron data as a DataFrame
enron = https://www.cs.cmu.edu/~enron/enron_mail_20150507.tar.gz

[person_of_interest_email]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = joe'.'[email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = kevin'.'[email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = ken'.'[email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = '[email protected]
email = [email protected]
email = 'david.delainey'@enron.com
email = [email protected]
email = delainey'.'[email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = ben'.'[email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = lawyer'.'[email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = '[email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
email = [email protected]
[person_of_interest.emails]
emails = [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & joe'.'[email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & kevin'.'[email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & ken'.'[email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & '[email protected] & [email protected] & 'david.delainey'@enron.com & [email protected] & delainey'.'[email protected] & [email protected] & [email protected] & [email protected] & ben'.'[email protected] & [email protected] & [email protected] & [email protected] & lawyer'.'[email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & '[email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected] & [email protected]

[person_of_interest_name]
[Names]
name = Lay, Kenneth
name = Skilling, Jeffrey
name = Howard, Kevin
name = Krautz, Michael
name = Yeager, Scott
name = Hirko, Joseph
name = Shelby, Rex
name = Bermingham, David
name = Darby, Giles
name = Mulgrew, Gary
name = Bayley, Daniel
name = Brown, James
name = Furst, Robert
name = Fuhs, William
name = Causey, Richard
name = Calger, Christopher
name = DeSpain, Timothy
name = Hannon, Kevin
name = Koenig, Mark
name = Forney, John
name = Rice, Kenneth
name = Rieker, Paula
name = Fastow, Lea
name = Fastow, Andrew
name = Delainey, David
name = Glisan, Ben
name = Richter, Jeffrey
name = Lawyer, Larry
name = Belden, Timothy
name = Kopper, Michael
name = Duncan, David
name = Bowen, Raymond
name = Colwell, Wesley
name = Boyle, Dan
name = Loehr, Christopher
[person_of_interest.names]
names = Lay, Kenneth & Skilling, Jeffrey & Howard, Kevin & Krautz, Michael & Yeager, Scott & Hirko, Joseph & Shelby, Rex & Bermingham, David & Darby, Giles & Mulgrew, Gary & Bayley, Daniel & Brown, James & Furst, Robert & Fuhs, William & Causey, Richard & Calger, Christopher & DeSpain, Timothy & Hannon, Kevin & Koenig, Mark & Forney, John & Rice, Kenneth & Rieker, Paula & Fastow, Lea & Fastow, Andrew & Delainey, David & Glisan, Ben & Richter, Jeffrey & Lawyer, Larry & Belden, Timothy & Kopper, Michael & Duncan, David & Bowen, Raymond & Colwell, Wesley & Boyle, Dan & Loehr, Christopher
56 changes: 53 additions & 3 deletions utils/data_fetch.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import pandas as pd
import glob
import email
Expand All @@ -6,7 +7,57 @@
#read config.ini file
import configparser
config = configparser.ConfigParser()
config.read('../config.ini')
config.read(
os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'../config.ini'
)
)


class PersonOfInterest:
def __init__(
self,
name_list: list[str] | None = None,
email_list: list[str] | None = None,
):
"""Class to operate with the person of interest data from config.ini file
"""
self.poi = {}

#read [person_of_interest_name] and [person_of_interest_email] section from config.ini file if not given explicitly
if name_list is None:
self.poi['names'] = config['person_of_interest.names']['names']
else:
self.poi['names'] = name_list

if email_list is None:
self.poi['emails'] = config['person_of_interest.emails']['emails']
else:
self.poi['emails'] = email_list

#convert the values to lists
self.poi['names'] = [name.strip() for name in self.poi['names'].split('&')]
self.poi['emails'] = [email.strip() for email in self.poi['emails'].split('&')]

def check_person_of_interest_name(
self,
name: str
):
if name in self.poi['names']:
return True

def check_person_of_interest_email(
self,
email: str
):
if email in self.poi['emails']:
return True

def return_person_of_interest(
self,
):
return self.poi


class LoadEnronData:
Expand All @@ -17,8 +68,7 @@ def __call__(
"""Load the Enron email data
Note:
To run this, please specify the local path to enron dataset in config.ini.
Download path for enron dataset: https://www.cs.cmu.edu/~enron/enron_mail_20150507.tar.gz
To run this locally
Args:
datapath (str, optional): Path to the Enron email data. Defaults to None.
Expand Down

0 comments on commit b4b9253

Please sign in to comment.