Skip to content

Commit

Permalink
Merge pull request #905 from chakaponi/bug/incorrect-element-selectors
Browse files Browse the repository at this point in the history
fix: 🐛 Hotfix. Change selectors due to new html structure

Co-authored-by: chakaponi <[email protected]>
  • Loading branch information
surapuramakhil and surapuramakhil authored Nov 25, 2024
2 parents b624ffd + 22f2c3b commit 65f8ac9
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 73 deletions.
67 changes: 25 additions & 42 deletions src/ai_hawk/job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,6 @@ def start_applying(self):
def get_jobs_from_page(self, scroll=False):

try:

no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand')
if 'No matching jobs found' in no_jobs_element.text or 'unfortunately, things aren' in self.driver.page_source.lower():
logger.debug("No matching jobs found on this page, skipping.")
Expand All @@ -255,22 +254,23 @@ def get_jobs_from_page(self, scroll=False):

try:
# XPath query to find the ul tag with class scaffold-layout__list-container
job_results_xpath_query = "//ul[contains(@class, 'scaffold-layout__list-container')]"
job_results = self.driver.find_element(By.XPATH, job_results_xpath_query)
jobs_xpath_query = "//ul[contains(@class, 'scaffold-layout__list-container')]"
jobs_container = self.driver.find_element(By.XPATH, jobs_xpath_query)

if scroll:
job_results_scrolableElament = job_results.find_element(By.XPATH,"..")
logger.warning(f'is scrollable: {browser_utils.is_scrollable(job_results_scrolableElament)}')
jobs_container_scrolableElement = jobs_container.find_element(By.XPATH,"..")
logger.warning(f'is scrollable: {browser_utils.is_scrollable(jobs_container_scrolableElement)}')

browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement)
browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement, step=300, reverse=True)

browser_utils.scroll_slow(self.driver, job_results_scrolableElament)
browser_utils.scroll_slow(self.driver, job_results_scrolableElament, step=300, reverse=True)
job_element_list = jobs_container.find_elements(By.XPATH, ".//li[contains(@class, 'jobs-search-results__list-item') and contains(@class, 'ember-view')]")

job_list_elements = job_results.find_elements(By.XPATH, ".//li[contains(@class, 'jobs-search-results__list-item') and contains(@class, 'ember-view')]")
if not job_list_elements:
if not job_element_list:
logger.debug("No job class elements found on page, skipping.")
return []

return job_list_elements
return job_element_list

except NoSuchElementException as e:
logger.warning(f'No job results found on the page. \n expection: {traceback.format_exc()}')
Expand All @@ -281,20 +281,9 @@ def get_jobs_from_page(self, scroll=False):
return []

def read_jobs(self):
try:
no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand')
if 'No matching jobs found' in no_jobs_element.text or 'unfortunately, things aren' in self.driver.page_source.lower():
raise Exception("No more jobs on this page")
except NoSuchElementException:
pass

job_results = self.driver.find_element(By.CLASS_NAME, "jobs-search-results-list")
browser_utils.scroll_slow(self.driver, job_results)
browser_utils.scroll_slow(self.driver, job_results, step=300, reverse=True)
job_list_elements = self.driver.find_elements(By.CLASS_NAME, 'scaffold-layout__list-container')[0].find_elements(By.CLASS_NAME, 'jobs-search-results__list-item')
if not job_list_elements:
raise Exception("No job class elements found on page")
job_list = [self.job_tile_to_job(job_element) for job_element in job_list_elements]

job_element_list = self.get_jobs_from_page()
job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list]
for job in job_list:
if self.is_blacklisted(job.title, job.company, job.link, job.location):
logger.info(f"Blacklisted {job.title} at {job.company} in {job.location}, skipping...")
Expand All @@ -307,21 +296,9 @@ def read_jobs(self):
continue

def apply_jobs(self):
try:
no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand')
if 'No matching jobs found' in no_jobs_element.text or 'unfortunately, things aren' in self.driver.page_source.lower():
logger.debug("No matching jobs found on this page, skipping")
return
except NoSuchElementException:
pass

job_list_elements = self.get_jobs_from_page()
job_element_list = self.get_jobs_from_page()

if not job_list_elements:
logger.debug("No job class elements found on page, skipping")
return

job_list = [self.job_tile_to_job(job_element) for job_element in job_list_elements]
job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list]

for job in job_list:

Expand Down Expand Up @@ -494,7 +471,7 @@ def job_tile_to_job(self, job_tile) -> Job:
logger.debug(f"Job link extracted: {job.link}")
except NoSuchElementException:
logger.warning("Job link is missing.")

try:
job.company = job_tile.find_element(By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span").text
logger.debug(f"Job company extracted: {job.company}")
Expand All @@ -517,11 +494,17 @@ def job_tile_to_job(self, job_tile) -> Job:
except NoSuchElementException:
logger.warning("Job location is missing.")


try:
job.apply_method = job_tile.find_element(By.CLASS_NAME, 'job-card-container__apply-method').text
job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__apply-method')]").text
except NoSuchElementException as e:
job.apply_method = "Applied"
logger.warning(f'Apply method not found, assuming \'Applied\'. {e} {traceback.format_exc()}')
try:
# Fetching state when apply method is not found
job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-job-state')]").text
job.apply_method = "Applied"
logger.warning(f'Apply method not found, state {job_state}. {e} {traceback.format_exc()}')
except NoSuchElementException as e:
logger.warning(f'Apply method and state not found. {e} {traceback.format_exc()}')

return job

Expand Down
4 changes: 2 additions & 2 deletions src/ai_hawk/linkedIn_easy_applier.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,8 @@ def fill_up(self, job_context : JobContext) -> None:
EC.presence_of_element_located((By.CLASS_NAME, 'jobs-easy-apply-content'))
)

pb4_elements = easy_apply_content.find_elements(By.CLASS_NAME, 'pb4')
for element in pb4_elements:
input_elements = easy_apply_content.find_elements(By.CLASS_NAME, 'jobs-easy-apply-form-section__grouping')
for element in input_elements:
self._process_form_element(element, job_context)
except Exception as e:
logger.error(f"Failed to find form elements: {e}")
Expand Down
54 changes: 25 additions & 29 deletions tests/test_aihawk_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,21 +71,33 @@ def test_get_jobs_from_page_no_jobs(mocker, job_manager):

def test_get_jobs_from_page_with_jobs(mocker, job_manager):
"""Test get_jobs_from_page when job elements are found."""
# Mock the no_jobs_element to behave correctly
mock_no_jobs_element = mocker.Mock()
mock_no_jobs_element.text = "No matching jobs found"
# Mock no_jobs_element to simulate the absence of "No matching jobs found" banner
no_jobs_element_mock = mocker.Mock()
no_jobs_element_mock.text = "" # Empty text means "No matching jobs found" is not present

# Mocking the find_element to return the mock no_jobs_element
mocker.patch.object(job_manager.driver, 'find_element',
return_value=mock_no_jobs_element)
# Mock the driver to simulate the page source
mocker.patch.object(job_manager.driver, 'page_source', return_value="")

# Mock the page_source
mocker.patch.object(job_manager.driver, 'page_source',
return_value="some page content")
# Mock the outer find_element
container_mock = mocker.Mock()

# Ensure jobs are returned as empty list due to "No matching jobs found"
jobs = job_manager.get_jobs_from_page()
assert jobs == [] # No jobs expected due to "No matching jobs found"
# Mock the inner find_elements to return job list items
job_element_mock = mocker.Mock()
# Simulating two job items
job_elements_list = [job_element_mock, job_element_mock]

# Return the container mock, which itself returns the job elements list
container_mock.find_elements.return_value = job_elements_list
mocker.patch.object(job_manager.driver, 'find_element', side_effect=[
no_jobs_element_mock,
container_mock
])

job_manager.get_jobs_from_page()

assert job_manager.driver.find_element.call_count == 2
assert container_mock.find_elements.call_count == 1



def test_apply_jobs_with_no_jobs(mocker, job_manager):
Expand All @@ -94,9 +106,6 @@ def test_apply_jobs_with_no_jobs(mocker, job_manager):
mock_element = mocker.Mock()
mock_element.text = "No matching jobs found"

# Mock the driver to simulate the page source
mocker.patch.object(job_manager.driver, 'page_source', return_value="")

# Mock the driver to return the mock element when find_element is called
mocker.patch.object(job_manager.driver, 'find_element',
return_value=mock_element)
Expand All @@ -111,26 +120,13 @@ def test_apply_jobs_with_no_jobs(mocker, job_manager):
def test_apply_jobs_with_jobs(mocker, job_manager):
"""Test apply_jobs when jobs are present."""

# Mock no_jobs_element to simulate the absence of "No matching jobs found" banner
no_jobs_element = mocker.Mock()
no_jobs_element.text = "" # Empty text means "No matching jobs found" is not present
mocker.patch.object(job_manager.driver, 'find_element',
return_value=no_jobs_element)

# Mock the page_source to simulate what the page looks like when jobs are present
mocker.patch.object(job_manager.driver, 'page_source',
return_value="some job content")

# Mock the outer find_elements (scaffold-layout__list-container)
container_mock = mocker.Mock()

# Mock the inner find_elements to return job list items
# Simulating two job elements
job_element_mock = mocker.Mock()
# Simulating two job items
job_elements_list = [job_element_mock, job_element_mock]

mocker.patch.object(job_manager.driver, 'find_elements',
return_value=[container_mock])

mocker.patch.object(job_manager, 'get_jobs_from_page', return_value=job_elements_list)

Expand Down

0 comments on commit 65f8ac9

Please sign in to comment.