Get full indeed description (#70)

Bunsly · Nov 27, 2023 · eed7fca · eed7fca
1 parent dfb8c18
commit eed7fca
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 19 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.28"
+version = "1.1.29"
 description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
 authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
 homepage = "https://github.com/Bunsly/JobSpy"

diff --git a/src/jobspy/jobs/__init__.py b/src/jobspy/jobs/__init__.py
@@ -121,7 +121,7 @@ class Country(Enum):
     # internal for ziprecruiter
     US_CANADA = ("usa/ca", "www")
 
-    # internal for linkeind
+    # internal for linkedin
     WORLDWIDE = ("worldwide", "www")
 
     @property

diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py
@@ -235,24 +235,9 @@ def get_description(self, job_page_url: str) -> str | None:
         if response.status_code not in range(200, 400):
             return None
 
-        soup = BeautifulSoup(response.text, "html.parser")
-        script_tag = soup.find(
-            "script", text=lambda x: x and "window._initialData" in x
-        )
-
-        if not script_tag:
-            return None
-
-        script_code = script_tag.string
-        match = re.search(r"window\._initialData\s*=\s*({.*?})\s*;", script_code, re.S)
-
-        if not match:
-            return None
-
-        json_string = match.group(1)
-        data = json.loads(json_string)
         try:
-            job_description = data["jobInfoWrapperModel"]["jobInfoModel"][
+            data = json.loads(response.text)
+            job_description = data["body"]["jobInfoWrapperModel"]["jobInfoModel"][
                 "sanitizedJobDescription"
             ]
         except (KeyError, TypeError, IndexError):