Skip to content

Commit

Permalink
include SUPP files in selection
Browse files Browse the repository at this point in the history
  • Loading branch information
kevingreenman committed Jun 6, 2023
1 parent e1404df commit bee7deb
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions select_chem.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,29 @@ def patent_directory(patent_year, patent_link, data_dir):
subdirectory = [
s for s in subdirectory if ".txt" not in s
] # don't consider any present .txt files
# subdirectory_zip = [s for s in subdirectory if ".zip" in s]

# TODO: also add ZIP files from *-SUPP directories from late 2010 onwards
for item in subdirectory:
subdirectory_zip = os.listdir(os.path.join(current_path, item))
for element_zip in subdirectory_zip:
if element_zip.lower().endswith(".zip"):
list_zip.append(os.path.join(current_path, item, element_zip))

# There are also ZIP files in *-SUPP directories from late 2010 onwards
if int(patent_year) >= 2010:
if os.path.isdir(os.path.join(current_path + "-SUPP")):
current_path = os.path.join(current_path + "-SUPP")
list_path.append(current_path)
subdirectory = os.listdir(current_path)
subdirectory = [
s for s in subdirectory if ".txt" not in s
] # don't consider any present .txt files

for item in subdirectory:
subdirectory_zip = os.listdir(os.path.join(current_path, item))
for element_zip in subdirectory_zip:
if element_zip.lower().endswith(".zip"):
list_zip.append(os.path.join(current_path, item, element_zip))

print("Step 1 Complete")

return list_path, list_zip
Expand Down

0 comments on commit bee7deb

Please sign in to comment.