diff --git a/.github/workflows/update-ad-publications.yaml b/.github/workflows/update-publications.yaml
similarity index 78%
rename from .github/workflows/update-ad-publications.yaml
rename to .github/workflows/update-publications.yaml
index a008e3a..e0b5c8c 100644
--- a/.github/workflows/update-ad-publications.yaml
+++ b/.github/workflows/update-publications.yaml
@@ -1,8 +1,9 @@
-name: "Update AD Publications"
+name: "Update Publications"
 
 on:
   schedule:
     - cron: "0 0 1 * *"
+  workflow_dispatch:
 
 env:
   RETICULATE_AUTOCONFIGURE: 'FALSE'
@@ -10,18 +11,19 @@ env:
   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
 jobs:
-  update-ad-publications:
+  update-publications:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
 
-      - uses: r-lib/actions/setup-r@master
+      - uses: r-lib/actions/setup-r@v2
         with:
-          r-version: '4.0'
+          r-version: '4.1.3'
 
       - name: Query dependencies
         run: |
           install.packages('remotes')
+          install.packages("synapser")
           saveRDS(remotes::dev_package_deps(dependencies = TRUE), "depends.Rds", version = 2)
         shell: Rscript {0}
 
@@ -48,11 +50,11 @@ jobs:
         run: |
           Rscript -e "reticulate::py_discover_config()"
           Rscript -e "reticulate::py_install(c('pandas', 'numpy', 'boto3', 'synapseclient'), pip = TRUE)"
-      
+
       - name: Install porTools
         run: |
-          Rscript -e "remotes::install_github('Sage-Bionetworks/porTools')"
-      
+          Rscript -e "remotes::install_github('eliteportal/publication_scraper')"
+
       - name: Query PubMed and upload results
         run: |
-          Rscript ./inst/scripts/update-publications-ad.R --grant_table syn17024229 --parent syn20463015 --pub_table syn20448807 --auth_token ${{ secrets.SYNAPSE_PAT }}
+          Rscript ./inst/scripts/query-pubmed-grants.R --grant_table syn51209786 --parent syn51400816 --pub_table syn51407023 --auth_token ${{ secrets.SYNAPSE_PAT }}
diff --git a/.gitignore b/.gitignore
index 5b6a065..b3879be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,6 @@
 .Rhistory
 .RData
 .Ruserdata
+.synapseConfig
+.DS_Store
+publications_pmid_list.txt
diff --git a/R/global-hard-coded-variables.R b/R/global-hard-coded-variables.R
new file mode 100644
index 0000000..45e13fe
--- /dev/null
+++ b/R/global-hard-coded-variables.R
@@ -0,0 +1,12 @@
+# IDs of syanpse folders and tables that are used throughout the package for gather grant IDs and uploading annotations and entities.
+# Edit the following for the relevant project
+
+root_dir <- "~/Documents/Projects/ELITE/ELITE-porTools"
+sid_project <- "syn27229419"
+sid_studies_table <- "syn51210771"
+sid_studies_fv <- "syn51523775"
+sid_projects_table <- "syn51209786" # ELITE Portal Projects Table
+sid_pub_table <- "syn51407023"
+sid_pub_folder <- "syn51317180"
+sid_people_table <- "syn51209684"
+sid_pmid_file <- "syn52227331"
diff --git a/R/md-converter.R b/R/md-converter.R
new file mode 100644
index 0000000..cd19d62
--- /dev/null
+++ b/R/md-converter.R
@@ -0,0 +1,2 @@
+# Convert vignette to R script
+knitr::purl("~/Documents/Projects/ELITE/ELITE-porTools/vignettes/query-pubmed-grants.Rmd")
diff --git a/R/pubmed.R b/R/pubmed.R
index dd0ed55..42cca3e 100644
--- a/R/pubmed.R
+++ b/R/pubmed.R
@@ -106,7 +106,7 @@ pub_query <- function(pub_pmids_list) {
   names(pub_summary_list) <- names(pub_pmids_list)
 
   # collapse list of dataframes into a single df
-  dplyr::bind_rows(pub_summary_list, .id = "grantSerialNumber")
+  dplyr::bind_rows(pub_summary_list, .id = "result")
 }
 
 #' Parse Summary Obj
@@ -214,7 +214,7 @@ make_entity_name <- function(dat){
   # Need to leave space for year and pubmed ID
   # Arbitrarily set to 200 characters
   short_name <- stringr::str_trunc(
-    glue::glue("{first_author} {dat$fulljournalname}"),
+    glue::glue("{first_author} {dat$journal}"),
     width = 200
   )
 
diff --git a/R/setup_env.R b/R/setup_env.R
new file mode 100644
index 0000000..3fd4550
--- /dev/null
+++ b/R/setup_env.R
@@ -0,0 +1,24 @@
+# setup env
+# Package names
+packages <- c("librarian", "knitr")
+
+# Install packages not yet installed
+installed_packages <- packages %in% rownames(installed.packages())
+if (any(installed_packages == FALSE)) {
+  install.packages(packages[!installed_packages])
+}
+
+# install.packages("synapser", repos=c("http://ran.synapse.org", "http://cran.fhcrc.org"))
+
+librarian::shelf(
+  optparse,
+  rentrez,
+  rmarkdown,
+  reticulate,
+  janitor,
+  dplyr,
+  readr,
+  stringr,
+  reticulate,
+  easyPubMed
+)
diff --git a/R/synapseLogin.R b/R/synapseLogin.R
new file mode 100644
index 0000000..a4857df
--- /dev/null
+++ b/R/synapseLogin.R
@@ -0,0 +1,25 @@
+library("optparse")
+
+# nolint start
+option_list <- list(
+  make_option(
+    "--auth_token",
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Synapse Personal Access Token. If not given, assumes local .synapseConfig."
+  )
+)
+
+opts <- parse_args(OptionParser(option_list = option_list))
+# nolint end
+
+## Synapse client and logging in
+synapseclient <- reticulate::import("synapseclient")
+syntab <- reticulate::import("synapseclient.table")
+syn <- synapseclient$Synapse()
+if(!is.na(opts$auth_token)) {
+  syn$login(authToken = opts$auth_token)
+} else {
+  syn$login()
+}
diff --git a/R/text-cleaning.R b/R/text-cleaning.R
index 7be5e93..1c8cc9f 100644
--- a/R/text-cleaning.R
+++ b/R/text-cleaning.R
@@ -53,6 +53,7 @@ remove_unacceptable_characters <- function(text) {
   conv <- gsub(",", "", conv)
   conv <- gsub("\\]", "", conv)
   conv <- gsub("\\[", "", conv)
+  conv <- gsub("=", "-", conv)
   return(conv)
 }
 #' Clean up funky text
diff --git a/README.md b/README.md
index 92adb03..63dc912 100644
--- a/README.md
+++ b/README.md
@@ -6,3 +6,9 @@ Sage portals require content management of publications, people, data, studies a
 [[[[work in-progress]]]]
 
 `devtools::install_github('Sage-Bionetworks/porTools')`
+
+
+## Updates
+**2023-10-10**
+- If the grant serial number overlaps with annother for example `UH2AG064706` and `UH3AG064706` then a different call to get the search results must be made and the previously developed functions do not work 
+- Found the NIH library for R is much faster than python 
diff --git a/inst/scripts/curate-portal-people-table-pec.R b/inst/scripts/curate-portal-people-table-pec.R
index 3ef5eea..b8fcb5a 100644
--- a/inst/scripts/curate-portal-people-table-pec.R
+++ b/inst/scripts/curate-portal-people-table-pec.R
@@ -1,9 +1,11 @@
 library(tidyverse)
 library(purrr)
-synapseclient <- reticulate::import("synapseclient")
-syntab <- reticulate::import("synapseclient.table")
-syn <- synapseclient$Synapse()
-syn$login()
+
+# Login to synapse
+source("~/Projects/ELITE/porTools/R/synapseLogin.R")
+
+### Hard coded variables
+source("~/Projects/ELITE/porTools/R/globalHardCodedVariables.R")
 
 ## functions
 update_synapse_table <- function(table_id, update_df, syn, syntab) {
@@ -14,6 +16,7 @@ update_synapse_table <- function(table_id, update_df, syn, syntab) {
   update_rows <- syntab$Table(table_id, tmpfile)
   syn$store(update_rows)
 }
+
 make_df <- function(list, column_name) {
   df <- tibble::enframe(list) %>%
     tidyr::unnest(cols = c(value), keep_empty = TRUE)
@@ -22,7 +25,7 @@ make_df <- function(list, column_name) {
   df
 }
 ###
-people <- read_csv(syn$tableQuery("Select * from syn22096112")$filepath)
+people <- read_csv(syn$tableQuery(glue::glue("SELECT * from {sid_people_table}")$filepath) # table to portal - people
 team <- syn$getTeamMembers("3323356")
 list <- reticulate::iterate(team)
 member <- map(list, ~.$get("member"))
@@ -57,4 +60,4 @@ update <- update %>% mutate_all(function(x) ifelse(is.na(x),"",x))
 
 update$ROW_ID <- ""
 
-update_synapse_table("syn22096112", update, syn, syntab)
+update_synapse_table(sid_people_table, update, syn, syntab)
diff --git a/inst/scripts/curate-portal-studies-table-pec.R b/inst/scripts/curate-portal-studies-table-pec.R
index 2038c97..64116fe 100644
--- a/inst/scripts/curate-portal-studies-table-pec.R
+++ b/inst/scripts/curate-portal-studies-table-pec.R
@@ -1,40 +1,44 @@
 library(tidyverse)
-synapseclient <- reticulate::import("synapseclient")
-syntab <- reticulate::import("synapseclient.table")
-syn <- synapseclient$Synapse()
-syn$login()
+
+# Login to synapse
+source("~/Projects/ELITE/porTools/R/synapseLogin.R")
 
 # Once study folders are annotated, this script will find those annotations and merge them
 # into the studies table that creates the study cards in the portal.
 
+### Hard coded variables
+source("~/Projects/ELITE/porTools/R/globalHardCodedVariables.R")
+
 ### functions
-coalesceJoin <- function(x, y,
-                         by = NULL, suffix = c(".x", ".y"),
-                         join = dplyr::left_join, ...) {
+coalesceJoin <- function(x,
+                         y,
+                         by = NULL,
+                         suffix = c(".x", ".y"),
+                         join = dplyr::left_join,
+                         ...) {
   joined <- join(x, y, by = by, suffix = suffix, ...)
   # names of desired output
   cols <- union(names(x), names(y))
 
   to_coalesce <- names(joined)[!names(joined) %in% cols]
-  suffix_used <- suffix[ifelse(endsWith(to_coalesce, suffix[1]), 1, 2)]
+  suffix_used <-
+    suffix[ifelse(endsWith(to_coalesce, suffix[1]), 1, 2)]
   # remove suffixes and de-duplicate
-  to_coalesce <- unique(substr(
-    to_coalesce,
-    1,
-    nchar(to_coalesce) - nchar(suffix_used)
-  ))
-
-  coalesced <- purrr::map_dfc(to_coalesce, ~dplyr::coalesce(
-    joined[[paste0(.x, suffix[1])]],
-    joined[[paste0(.x, suffix[2])]]
-  ))
+  to_coalesce <- unique(substr(to_coalesce,
+                               1,
+                               nchar(to_coalesce) - nchar(suffix_used)))
+
+  coalesced <- purrr::map_dfc(to_coalesce, ~ dplyr::coalesce(joined[[paste0(.x, suffix[1])]],
+                                                             joined[[paste0(.x, suffix[2])]]))
+
   names(coalesced) <- to_coalesce
 
   dplyr::bind_cols(joined, coalesced)[cols]
 }
 
 update_synapse_table <- function(table_id, update_df, syn, syntab) {
-  current_rows <- syn$tableQuery(glue::glue("SELECT * FROM {table_id}"))
+  current_rows <-
+    syn$tableQuery(glue::glue("SELECT * FROM {table_id}"))
   syn$delete(current_rows)
   tmpfile <- fs::file_temp("rows.csv")
   write_csv(update_df, tmpfile)
@@ -45,32 +49,57 @@ update_synapse_table <- function(table_id, update_df, syn, syntab) {
 
 # update studies table
 # force view to rebuild
-trigger <- syn$tableQuery("select * from syn21990011")
+trigger <- syn$tableQuery(glue::glue("SELECT * FROM {sid_studies}", ))
 
-table <- dccvalidator::get_synapse_table("syn21783965", syn)
-fv <- dccvalidator::get_synapse_table("syn21990011", syn)
+table <- dccvalidator::get_synapse_table(sid_studies, syn)
+
+# Why do we need a file view?
+fv <- dccvalidator::get_synapse_table(sid_studies_fv, syn) # studies view for portal
 
 # Parse rows from file view that contain annotations to be captured in the
 # PEC studies table
-to_update <- fv[!is.na(fv$studyDescription),]
+to_update <- fv[!is.na(fv$studyDescription), ]
 
 to_update <- rename(to_update, key = id,
                     studyName = name)
+
 table <- rename(table, key = study)
 
 # join on synId
-updated <- coalesceJoin(to_update, table, by = "key", join = full_join)
+updated <-
+  coalesceJoin(to_update, table, by = "key", join = full_join)
 
 # change to required schema
 updated <- rename(updated, study = key)
 
 # NAs must be changed to empty strings
-dat <- updated %>% mutate_all(function(x) ifelse(is.na(x),"",x))
+dat <- updated %>% mutate_all(function(x)
+  ifelse(is.na(x), "", x))
 
 # order cards alphabetically
-dat <- dat[order(dat$studyName),]
+dat <- dat[order(dat$studyName), ]
 
 #order schema
-dat <- dplyr::select(dat, studyType, isModelSystem, numberOfIndividuals, species, study, studyDescription, studyName, nucleicAcidSource, contributingInstitution, dataTypes, diagnosis, grants, phase, methods, relatedStudies, tissue)
-
-update_synapse_table("syn21783965", dat, syn, syntab)
+dat <-
+  dplyr::select(
+    dat,
+    studyType,
+    isModelSystem,
+    numberOfIndividuals,
+    species,
+    study,
+    studyDescription,
+    studyName,
+    nucleicAcidSource,
+    contributingInstitution,
+    dataTypes,
+    diagnosis,
+    grants,
+    phase,
+    methods,
+    relatedStudies,
+    tissue
+  )
+
+# update the portal studies table
+update_synapse_table(sid_studies_table, dat, syn, syntab)
diff --git a/inst/scripts/query-pubmed-grants.R b/inst/scripts/query-pubmed-grants.R
new file mode 100755
index 0000000..45c920f
--- /dev/null
+++ b/inst/scripts/query-pubmed-grants.R
@@ -0,0 +1,324 @@
+## ----setup, include = FALSE-----------------------------------------------------------------------------------------------------------------------------------------------------------
+install.packages('librarian')
+install.packages("synapser", repos=c("http://ran.synapse.org", "http://cran.fhcrc.org"))
+
+librarian::shelf(
+  optparse,
+  rmarkdown,
+  reticulate,
+  janitor,
+  dplyr,
+  readr,
+  stringr,
+  reticulate,
+  synapser,
+  easyPubMed,
+  comprehenr,
+  easyPubMed,
+  httr,
+  tidyr,
+  dplyr
+)
+
+library('synapser')
+
+# nolint start
+option_list <- list(
+  make_option(
+    "--auth_token",
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Synapse Personal Access Token. If not given, assumes local .synapseConfig."
+  ),
+  make_option(
+    "--grant_table",
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Synapse synID for table with grants to query for. Requires columns `grant`, `grantSerialNumber`, `Program`. grants are queried by serial number."
+  ),
+  make_option(
+    "--parent",
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Synapse synID of parent folder to store publication entities to."
+  ),
+  make_option(
+    "--pub_table",
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Synapse synID of file view scoped to publication folder (`parent`)."
+  )
+)
+opts <- parse_args(OptionParser(option_list = option_list))
+
+# get the base working directory to make it work on others systems
+base_dir <- gsub('vignettes', '', getwd())
+source(glue::glue("{base_dir}/R/pubmed.R"))
+source(glue::glue("{base_dir}/R/text-cleaning.R"))
+source(glue::glue("{base_dir}/R/annotation.R"))
+source(glue::glue("{base_dir}/R/global-hard-coded-variables.R"))
+
+# Login to synapse
+## Synapse client and logging in
+synapseclient <- reticulate::import("synapseclient")
+syntab <- reticulate::import("synapseclient.table")
+syn <- synapseclient$Synapse()
+if (!is.na(opts$auth_token)) {
+  syn$login(authToken = opts$auth_token)
+} else {
+  syn$login()
+}
+
+## ----functions------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+hacky_cleaning <- function(text) {
+  conv <- convert_to_ascii(text = text)
+  conv <- remove_hmtl_formatting(text = conv)
+  conv <- gsub("&amp;|amp;", "and", conv)
+  conv <- gsub("&#39;|&quot;", "'", conv)
+  conv <- gsub("&gt;", "Greater Than ", conv)
+  conv <- gsub("[[:punct:]]+", "", conv)
+  conv <- gsub("\\s+", " ", conv)
+  conv <- str_trunc(text, width = 500)
+  return(conv)
+}
+
+
+## ----vars, echo=FALSE-----------------------------------------------------------------------------------------------------------------------------------------------------------------
+# table_id <- "syn51209786" # ELITE Portal Projects Table
+
+# Gather list of grants from synapse
+grants <-
+  syn$tableQuery(glue::glue("SELECT grant, program, name FROM {sid_projects_table}"))$asDataFrame()
+
+# expand rows with multiple grants
+grants <- tidyr::unnest(grants, cols = grant)
+
+grant_list <- grants$grant
+
+## ----scrape pubmed ids from grant numbers---------------------------------------------------------------------------------------------------------------------------------------------
+get_pub_details <- function(request_body) {
+  # Make the POST request
+  response <-
+    POST(
+      url = API_URL,
+      headers = headers,
+      body = request_body,
+      encode = "json"
+    )
+  return (response)
+}
+
+process_response <- function(response) {
+  if (response$status_code == 200) {
+    # Success!
+    data <- content(response, "parsed")
+
+    df <- as.data.frame(do.call(rbind, data$results))
+
+    return (df)
+  }
+}
+
+# works for project Numbers instead of project serial numbers
+# Set the API URL
+API_URL <- "https://api.reporter.nih.gov/v2/publications/search"
+
+# Set the headers
+headers <- list(accept = "application/json",
+                "Content-Type" = "application/json")
+
+# Set the request body
+request_body <- list(
+  criteria = list(core_project_nums = grant_list),
+  offset = 0,
+  limit = 50,
+  sort_field = "core_project_nums",
+  sort_order = "desc"
+)
+
+# Make the POST request
+response <-
+  POST(
+    url = API_URL,
+    headers = headers,
+    body = request_body,
+    encode = "json"
+  )
+
+# Check the response status code
+if (response$status_code == 200) {
+  # Success!
+  pmids <- list()
+
+  # get results as dataframe
+  pmids_temp <- process_response(response)
+
+  data <- content(response, "parsed")
+
+  total <- data$meta$total
+
+  results <- process_response(response)
+
+  pmids[[length(pmids) + 1]] <- results
+
+  request_body$offset <- request_body$offset + request_body$limit
+
+  while (nrow(results) > 0) {
+    response <- get_pub_details(request_body)
+
+    results <- process_response(response)
+
+    # extend pmids list
+    pmids[[length(pmids) + 1]] <- results
+
+    # update offset in request
+    request_body$offset <-
+      request_body$offset + request_body$limit
+  }
+} else {
+  # Something went wrong
+  print("Error:", response$status_code)
+}
+
+# create dataframe with pmids
+pmids_df <- do.call(rbind, pmids)
+
+pmids_df <- pmids_df %>% rename('grant' = 'coreproject')
+
+# for joining
+pmids_df$grant <- as.character(pmids_df$grant)
+
+# remove exisiting entities in portal
+pubs_exisiting <-
+  syn$tableQuery(
+    glue::glue(
+      "SELECT id, Name, PubmedId, PMID, Title, grant, Program FROM {sid_pub_table}"
+    )
+  )$asDataFrame()
+
+# remove duplicate no name entities
+# for (i in as.list(pubs_exisiting[grep("NA NA NA", pubs_exisiting$Name), "id"])) {
+#   print(i)
+#   tryCatch({
+#     syn$delete(i)
+#   },
+#   error = function(e) {
+#     print(glue::glue('error deleting {i}'))
+#   })
+# }
+
+# collapse rows by grouping by pmids since some publications can be assoicated with multiple grants
+pmids_df <- pmids_df %>% group_by(pmid) %>% reframe(
+  grant = paste0(grant, collapse = ","),
+  applid = paste0(unique(applid), collapse = ",")
+)
+
+# Take only pmids not in the portal already
+pmids_df <-
+  pmids_df[!(pmids_df$pmid %in% pubs_exisiting$PubmedId),]
+
+## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+# one eternity later....
+pmid_metadata <- pub_query(pmids_df$pmid)
+
+## ----query----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+# create complete dataset
+dat <- dplyr::right_join(grants, pmids_df, by = "grant")
+
+dat$pmid <- as.character(dat$pmid)
+
+dat <- dplyr::inner_join(dat, pmid_metadata, by = "pmid")
+
+# clean column names
+dat <- janitor::clean_names(dat, "lower_camel")
+
+## ----hacky----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+# Included in hacky_cleaning is conversion to ascii and removing html formatting
+dat$year <- stringr::str_extract(dat$pubdate, "\\d{4}")
+dat$year <- as.integer(dat$year)
+dat$title <- hacky_cleaning(dat$title)
+dat$authors <- hacky_cleaning(dat$authors)
+dat$journal <- remove_unacceptable_characters(dat$fulljournalname)
+
+# dat$abstract <- hacky_cleaning(dat$abstract)
+
+# drop unnecessary columns
+dat <- dat %>% select(-c('applid', 'result', 'pubdate'))
+
+cat(
+  'Total rows: ',
+  nrow(dat),
+  '\n',
+  'Duplicates: ',
+  sum(dat %>% duplicated()),
+  '\n',
+  'Rows after duplicate remove: ',
+  nrow(dat) - sum(dat %>% duplicated())
+)
+
+# Need to remove duplicates, but keep all grants and consortium
+# Includes some renaming and dropping of columns
+dat <- dat %>%
+  group_by(pmid) %>%
+  mutate(grant = glue::glue_collapse(unique(.data$`grant`), ", ")) %>%
+  mutate(consortium = glue::glue_collapse(unique(.data$program), ", ")) %>%
+  mutate(name = glue::glue_collapse(unique(.data$name), ", ")) %>%
+  select(!c(grant, program)) %>%
+  rename(
+    pubmed_id = pmid,
+    DOI = doi,
+    program = consortium,
+    study = name
+  ) %>%
+  distinct()
+
+dat <- dat %>% rename('pmid' = 'pubmed_id')
+dat$entity_name <- make_entity_name(dat)
+dat$Name <- make_entity_name(dat)
+
+
+#Using rename()
+dat <- dat %>% rename(
+  "Authors" = "authors",
+  "Journal" = "journal",
+  "PubmedId" = "pmid",
+  "Title" = "title",
+  "Year" = "year",
+  "Program" = "program",
+)
+
+# Remove common, unallowed characters from entity name; includes hacky_cleaning
+dat$entity_name <- remove_unacceptable_characters(dat$entity_name)
+
+## ----columns--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+dat <- set_up_multiannotations(dat, "grant")
+dat <- set_up_multiannotations(dat, "Program")
+dat <- set_up_multiannotations(dat, "Authors")
+
+
+## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+store_as_annotations <- function(parent, list) {
+  entity <- purrr::map(
+    list,
+    ~ synapseclient$File(
+      path = glue::glue("http://doi.org/{.$DOI}"),
+      name = .$entity_name,
+      parent = parent,
+      synapseStore = FALSE,
+      annotations = .
+    )
+  )
+  # entity
+  purrr::map(entity, ~ syn$store(., forceVersion = FALSE))
+}
+
+## ----store, message=FALSE, echo=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------
+# parent = "syn51317180" # ELITE publications folder
+dat_list <- purrr::transpose(dat)
+
+# another eternity
+store_as_annotations(parent = sid_pub_folder, dat_list)
diff --git a/inst/scripts/set-folder-annotations-pec.R b/inst/scripts/set-folder-annotations-pec.R
index 3f80335..6446798 100644
--- a/inst/scripts/set-folder-annotations-pec.R
+++ b/inst/scripts/set-folder-annotations-pec.R
@@ -4,9 +4,10 @@ library(easyPubMed)
 library(readr)
 library(reticulate)
 library(porTools)
-synapseclient <- reticulate::import("synapseclient")
-syn <- synapseclient$Synapse()
-syn$login()
+
+# Login to synapse
+source("~/Projects/ELITE/porTools/R/synapseLogin.R")
+
 # The study folders get annotated with phase, grants, tissue, species, diagnosis, study type, study description, nucleic acid source and contributing institution.
 #
 # - The keys *grants*, *tissue*, *species*, *diagnosis* and *nucleicAcidSource* follow the constrained vocabulary of the [synapseAnnotations repo](https://github.com/sage-bionetworks/synapseannotations).
diff --git a/inst/scripts/update-publications.R b/inst/scripts/update-publications.R
index 03904b3..7632c8e 100644
--- a/inst/scripts/update-publications.R
+++ b/inst/scripts/update-publications.R
@@ -1,12 +1,12 @@
 #!/usr/bin/Rscript
 
 #######################################################
-##      Update AD Knowledge Portal Publications      ##
+##      Update ELITE Portal Publications             ##
 ##                                                   ##
 ## Description:                                      ##
 ##   Query PubMed for publications and upload        ##
 ##   results to Synapse in the format required by    ##
-##   the AD Knowledge Portal                         ##
+##   the ELITE Portal                                ##
 ##                                                   ##
 ## Usage:                                            ##
 ##   Rscript update-publications-ad.R \              ##
@@ -19,16 +19,21 @@
 
 ## Libraries -------------------------------------------------------------------
 
-library("dplyr")
-library("optparse")
-library("porTools")
-library("rentrez")
-library("purrr")
-library("stringr")
-## Required, but not fully loaded
-## readr, reticulate, glue, easyPubMed, dccvalidator
-
-## Setup -----------------------------------------------------------------------
+librarian::shelf(
+  optparse,
+  rmarkdown,
+  reticulate,
+  janitor,
+  dplyr,
+  readr,
+  stringr,
+  reticulate,
+  easyPubMed,
+  synapser,
+  httr,
+  tidyr,
+  dplyr
+)
 
 # nolint start
 option_list <- list(
@@ -62,98 +67,263 @@ option_list <- list(
   )
 )
 opts <- parse_args(OptionParser(option_list = option_list))
-# nolint end
 
+# get the base working directory to make it work on others systems
+base_dir <- gsub('vignettes', '', getwd())
+source(glue::glue("{base_dir}/R/pubmed.R"))
+source(glue::glue("{base_dir}/R/text-cleaning.R"))
+source(glue::glue("{base_dir}/R/annotation.R"))
+source(glue::glue("{base_dir}/R/global-hard-coded-variables.R"))
+
+# Login to synapse
 ## Synapse client and logging in
 synapseclient <- reticulate::import("synapseclient")
 syntab <- reticulate::import("synapseclient.table")
 syn <- synapseclient$Synapse()
-if(!is.na(opts$auth_token)) {
+if (!is.na(opts$auth_token)) {
   syn$login(authToken = opts$auth_token)
 } else {
   syn$login()
 }
 
-## Grab grants ------------------------------------------------------
+## ----functions------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+hacky_cleaning <- function(text) {
+  conv <- convert_to_ascii(text = text)
+  conv <- remove_hmtl_formatting(text = conv)
+  conv <- gsub("&amp;|amp;", "and", conv)
+  conv <- gsub("&#39;|&quot;", "'", conv)
+  conv <- gsub("&gt;", "Greater Than ", conv)
+  conv <- gsub("[[:punct:]]+", "", conv)
+  conv <- gsub("\\s+", " ", conv)
+  conv <- str_trunc(text, width = 500)
+  return(conv)
+}
+
+
+## ----vars, echo=FALSE-----------------------------------------------------------------------------------------------------------------------------------------------------------------
+# table_id <- "syn51209786" # ELITE Portal Projects Table
+
+# Gather list of grants from synapse
+grants <-
+  syn$tableQuery(glue::glue("SELECT grantNumber, program, name FROM {sid_projects_table}"))$asDataFrame()
+
+# convert grant numbers into string
+library(comprehenr)
+grantNumbers <-
+  to_list(for (g in grants$grantNumber)
+    for (y in g)
+      y)
+
+# expand rows with multiple grantNumbers
+grants$grantNumber <-
+  purrr::map(grants$grantNumber, function(x) {
+    paste(unlist(x), collapse = ",")
+  })
+
+grants <- grants %>%
+  separate_rows(grantNumber)
+
+## ----scrape pubmed ids from grant numbers---------------------------------------------------------------------------------------------------------------------------------------------
+get_pub_details <- function(request_body) {
+  # Make the POST request
+  response <-
+    POST(
+      url = API_URL,
+      headers = headers,
+      body = request_body,
+      encode = "json"
+    )
+  return (response)
+}
+
+process_response <- function(response) {
+  if (response$status_code == 200) {
+    # Success!
+    data <- content(response, "parsed")
+
+    df <- as.data.frame(do.call(rbind, data$results))
 
-# qury synapse
-grants <- syn$tableQuery(
-  glue::glue(
-    "SELECT \"Grant Number\", grantSerialNumber, Program ",
-    "FROM {opts$grant_table}"
+    return (df)
+  }
+}
+
+# works for project Numbers instead of project serial numbers
+# Set the API URL
+API_URL <- "https://api.reporter.nih.gov/v2/publications/search"
+
+# Set the headers
+headers <- list(accept = "application/json",
+                "Content-Type" = "application/json")
+
+# Set the request body
+request_body <- list(
+  criteria = list(core_project_nums = grantNumbers),
+  offset = 0,
+  limit = 50,
+  sort_field = "core_project_nums",
+  sort_order = "desc"
+)
+
+# Make the POST request
+response <-
+  POST(
+    url = API_URL,
+    headers = headers,
+    body = request_body,
+    encode = "json"
   )
-)$asDataFrame()
 
-# remove rows that have NaN or NA or empty string for the serial number
-grants <- grants[!(grants$grantSerialNumber %in% c(NaN, NA, "")), ]
+# Check the response status code
+if (response$status_code == 200) {
+  # Success!
+  pmids <- list()
+
+  # get results as dataframe
+  pmids_temp <- process_response(response)
+
+  data <- content(response, "parsed")
+
+  total <- data$meta$total
+
+  results <- process_response(response)
+
+  pmids[[length(pmids) + 1]] <- results
+
+  request_body$offset <- request_body$offset + request_body$limit
+
+  while (nrow(results) > 0) {
+    response <- get_pub_details(request_body)
 
-## Query PubMed -----------------------------------------------------
+    results <- process_response(response)
 
-# unlist list of grant serial numbers into a vector
-grant_serial_nums <- unlist(grants$grantSerialNumber)
+    # extend pmids list
+    pmids[[length(pmids) + 1]] <- results
 
-# run all grant serial numbers through query pubmed
-# returns a tibble
-  # each row is a publication
-  # columns include grantserialnumber, pubmed id, publication date, title, full journal name, doi, authors
-dat <- query_pubmed(grant_serial_nums)
+    # update offset in request
+    request_body$offset <-
+      request_body$offset + request_body$limit
+  }
+} else {
+  # Something went wrong
+  print("Error:", response$status_code)
+}
+
+# create dataframe with pmids
+pmids_df <- do.call(rbind, pmids)
+
+pmids_df <- pmids_df %>% rename('grantNumber' = 'coreproject')
+
+# for joining
+pmids_df$grantNumber <- as.character(pmids_df$grantNumber)
+
+
+## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+# one eternity later....
+pmid_metadata <- pub_query(pmids_df$pmid)
 
-## Clean up ---------------------------------------------------------
 
-# munge pubmed query results
-# this function pulls out the year from pubdate and adds entity_name column
-dat <- munge_pubmed(dat)
+## ----query----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+# create complete dataset
+dat <- dplyr::right_join(grants, pmids_df, by = "grantNumber")
 
-# For some reason, grantSerialNumber isn't always a character
-grants$grantSerialNumber <- as.character(grants$grantSerialNumber)
+dat$pmid <- as.character(dat$pmid)
+
+dat <- dplyr::left_join(dat, pmid_metadata, by = "pmid")
+
+# clean column names
+dat <- janitor::clean_names(dat, "lower_camel")
+
+
+
+## ----hacky----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+# Included in hacky_cleaning is conversion to ascii and removing html formatting
+dat$year <- stringr::str_extract(dat$pubdate, "\\d{4}")
+dat$year <- as.integer(dat$year)
+dat$title <- hacky_cleaning(dat$title)
+dat$authors <- hacky_cleaning(dat$authors)
+dat$journal <- remove_unacceptable_characters(dat$fulljournalname)
 
-# Join dat and grants table by grantSerialNumber
-dat <- dplyr::right_join(grants, dat, by = "grantSerialNumber")
+# dat$abstract <- hacky_cleaning(dat$abstract)
 
-# Some pubmedIDs show up multiple times under different grants
-# Need to capture this information in a single row of information so it isn't duplicated
+# drop unnecessary columns
+dat <- dat %>% select(-c('applid', 'result', 'pubdate'))
 
+cat(
+  'Total rows: ',
+  nrow(dat),
+  '\n',
+  'Duplicates: ',
+  sum(dat %>% duplicated()),
+  '\n',
+  'Rows after duplicate remove: ',
+  nrow(dat) - sum(dat %>% duplicated())
+)
+
+# Need to remove duplicates, but keep all grants and consortium
+# Includes some renaming and dropping of columns
 dat <- dat %>%
-  # for each pubmedID
   group_by(pmid) %>%
-  mutate(
-    # Create a new column that captures all grants that duplicate pmid is associated with
-    grant = glue::glue_collapse(unique(.data$`Grant Number`), ", ")
+  mutate(grant = glue::glue_collapse(unique(.data$`grantNumber`), ", ")) %>%
+  mutate(consortium = glue::glue_collapse(unique(.data$program), ", ")) %>%
+  select(!c(grantNumber, program)) %>%
+  rename(
+    pubmed_id = pmid,
+    DOI = doi,
+    program = consortium,
+    study = name
   ) %>%
-  # Create a new column that captures all programs that duplicate pmid is associated with
-  mutate(consortium = glue::glue_collapse(unique(.data$Program), ", ")) %>%
-  # drop Grant Number, Program, and GrantSerialNumber cols
-  select(!c(`Grant Number`, Program, grantSerialNumber)) %>%
-  # rename some columns
-  rename(pubmed_id = pmid, DOI = doi, Program = consortium, journal = fulljournalname) %>%
-  # keep only distinct rows
   distinct()
 
-# Hacky cleaning
-## Included in hacky_cleaning is conversion to ascii and removing html formatting
-dat$title <- hacky_cleaning(dat$title)
-dat$authors <- hacky_cleaning(dat$authors)
-dat$journal <- hacky_cleaning(dat$journal)
+dat <- dat %>% rename('pmid' = 'pubmed_id')
+dat$entity_name <- make_entity_name(dat)
+dat$Name <- make_entity_name(dat)
+
+
+#Using rename()
+dat <- dat %>% rename(
+  "Authors" = "authors",
+  "Journal" = "journal",
+  "PubmedId" = "pmid",
+  "Title" = "title",
+  "Year" = "year",
+  "Grant" = "grant",
+  "Program" = "program",
+)
 
 # Remove common, unallowed characters from entity name; includes hacky_cleaning
 dat$entity_name <- remove_unacceptable_characters(dat$entity_name)
 
-## Remove row of NA
-# See this (https://github.com/Sage-Bionetworks/porTools/issues/10#issuecomment-1083441995) issue comment for more info
-# TODO: Figure out why this is happening
-# capture cases where pubmed ID is NA
-idx <- is.na(dat$pubmed_id)
-# only keep cases where pubmed ID is not NA
-dat <- dat[!idx,]
 
-# Set up multi-annotation columns correctly
-dat <- set_up_multiannotations(dat, "grant")
+
+## ----columns--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+dat <- set_up_multiannotations(dat, "Grant")
 dat <- set_up_multiannotations(dat, "Program")
+dat <- set_up_multiannotations(dat, "Authors")
+
+
+## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+store_as_annotations <- function(parent, list) {
+  entity <- purrr::map(
+    list,
+    ~ synapseclient$File(
+      path = glue::glue("http://doi.org/{.$DOI}"),
+      name = .$entity_name,
+      parent = parent,
+      synapseStore = FALSE,
+      annotations = .
+    )
+  )
+  # entity
+  purrr::map(entity, ~ syn$store(., forceVersion = FALSE))
+}
 
-## Store publications --------------------------------------------
 
+## ----store, message=FALSE, echo=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------
+# parent = "syn51317180" # ELITE publications folder
 dat_list <- purrr::transpose(dat)
-store_as_annotations(parent = opts$parent, dat_list)
+
+# another eternity
+store_as_annotations(parent = sid_pub_folder, dat_list)
 
 ## Force file view update
 if (!is.na(opts$pub_table)) {
diff --git a/vignettes/query-pubmed-grants.Rmd b/vignettes/query-pubmed-grants.Rmd
index 12ac1b9..deb3409 100644
--- a/vignettes/query-pubmed-grants.Rmd
+++ b/vignettes/query-pubmed-grants.Rmd
@@ -1,44 +1,93 @@
 ---
 title: "Query Pubmed by Grant"
-author: "Nicole Kauer", "Kelsey Montgomery"
+author: "Nicole Kauer", "Kelsey Montgomery", "Nicholas Lee"
 date: "3/24/2020"
+edited: "10/10/2023"
 output: html_document
 ---
 
 ```{r setup, include = FALSE}
+rm(list=ls()); gc()
+
 knitr::opts_chunk$set(echo = TRUE)
-library(dccvalidator)
-library(dplyr)
-library(easyPubMed)
-library(readr)
-library(reticulate)
-library(porTools)
-synapseclient <- reticulate::import("synapseclient")
-syntab <- reticulate::import("synapseclient.table")
-syn <- synapseclient$Synapse()
-syn$login()
+# library(dccvalidator)
+# library(janitor)
+# library(dplyr)
+# library(readr)
+# library(stringr)
+# library(reticulate)
+# 
+# library(easyPubMed)
+# library(synapser)
+# library(porTools)
+
+librarian::shelf(
+  optparse,
+  rmarkdown,
+  reticulate,
+  janitor,
+  dplyr,
+  readr,
+  stringr,
+  reticulate,
+  easyPubMed,
+  synapser,
+  httr,
+  tidyr,
+  dplyr
+)
+
+# get the base working directory to make it work on others systems
+base_dir <- gsub('vignettes', '', getwd())
+source(glue::glue("{base_dir}R/pubmed.R"))
+source(glue::glue("{base_dir}R/text-cleaning.R"))
+source(glue::glue("{base_dir}R/annotation.R"))
+source(glue::glue("{base_dir}R/global-hard-coded-variables.R"))
+
+# Login to synapse
+source(glue::glue("{base_dir}R/synapseLogin.R"))
+```
+
+```{r functions}
+hacky_cleaning <- function(text) {
+  conv <- convert_to_ascii(text = text)
+  conv <- remove_hmtl_formatting(text = conv)
+  conv <- gsub("&amp;|amp;", "and", conv)
+  conv <- gsub("&#39;|&quot;", "'", conv)
+  conv <- gsub("&gt;", "Greater Than ", conv)
+  conv <- gsub("[[:punct:]]+", "", conv)
+  conv <- gsub("\\s+", " ", conv)
+  conv <- str_trunc(text, width = 500)
+  return(conv)
+}
 ```
 
 ## Query Pubmed and store data as file annotations
 
 The data needed for these steps are *Grant Number*, *grantSerialNumber* and *Program*. Theses functions take a list of grant serial numbers and queries Pubmed to download title, abstract, authors, journal name, year and DOI. Theses annotations are visible in the [AD Knowledge Portal - Publications View](https://www.synapse.org/#!Synapse:syn20448807/tables/). See the [Explore Publications module](https://adknowledgeportal.synapse.org/Explore/Publications) for a visual of how this data is surfaced on the portal.
 
-```{r ex_format, echo = FALSE}
-tribble(~`Grant Number`, ~grantSerialNumber, ~Program,
-        "U01AG046139", "AG046139", "AMP-AD"
-        )
-```
-
 Import the grants with their respective programs and serial numbers.
-
+Serial number is the characters after the first three characters till the end e.g. "U19AG063893" -> "AG063893" is the serial number.
 ```{r vars, echo=FALSE}
-table_id <- "syn17024229"
+table_id <- "syn51209786" # ELITE Portal Projects Table
+
+# Gather list of grants from synapse
 grants <- syn$tableQuery(
-  glue::glue("SELECT \"Grant Number\", grantSerialNumber, Program FROM {table_id}")
+  glue::glue("SELECT grantNumber, program, name FROM {table_id}")
 )$asDataFrame()
-# Remove rows that have NaN or NA or empty string for the serial number
-grants <- grants[!(grants$grantSerialNumber %in% c(NaN, NA, "")), ]
+
+# convert grant numbers into string
+library(comprehenr)
+grantNumbers <- to_list(for (g in grants$grantNumber) for (y in g) y)
+
+# expand rows with multiple grantNumbers
+grants$grantNumber <- purrr::map(grants$grantNumber, function(x){paste(unlist(x),collapse=",")})
+
+grants <- grants %>% 
+     separate_rows(grantNumber)
+
 ```
+
 ## Run the code
 
 Any character vector can be passed to `query_list_general`. This function wraps several functions:
@@ -50,59 +99,197 @@ Any character vector can be passed to `query_list_general`. This function wraps
 - leaves out grants that were not associated with a PubmedId
 - creates a *query* column to associate the PubmedId with a specific query
 
-```{r query, message=FALSE, warning=FALSE}
-dat <- query_list_general(grants$grantSerialNumber)
+```{r scrape pubmed ids from grant numbers}
+get_pub_details <- function(request_body) {
+  # Make the POST request
+  response <-
+    POST(
+      url = API_URL,
+      headers = headers,
+      body = request_body,
+      encode = "json"
+    )
+  return (response)
+}
+
+process_response <- function(response){
+  if (response$status_code == 200) {
+    # Success!
+    data <- content(response, "parsed")
+    
+    df <- as.data.frame(do.call(rbind, data$results))
+    
+    return (df)
+  }
+}
+
+# works for project Numbers instead of project serial numbers
+# Set the API URL
+API_URL <- "https://api.reporter.nih.gov/v2/publications/search"
+
+# Set the headers
+headers <- list(
+    accept = "application/json",
+    "Content-Type" = "application/json"
+)
+
+# Set the request body
+request_body <- list(
+    criteria = list(
+        core_project_nums = grantNumbers
+    ),
+    offset = 0,
+    limit = 50,
+    sort_field = "core_project_nums",
+    sort_order = "desc"
+)
+
+# Make the POST request
+response <- POST(url = API_URL, headers = headers, body = request_body, encode = "json")
+
+# Check the response status code
+if (response$status_code == 200) {
+    # Success!
+    pmids <- list()
+    
+    # get results as dataframe
+    pmids_temp <- process_response(response)
+    
+    data <- content(response, "parsed")
+    
+    total <- data$meta$total
+    
+    results <- process_response(response)
+    
+    pmids[[length(pmids) + 1]] <- results
+    
+    request_body$offset <- request_body$offset + request_body$limit
+    
+    while (nrow(results) > 0){
+      
+      response <- get_pub_details(request_body)
+      
+      results <- process_response(response)
+      
+      # extend pmids list
+      pmids[[length(pmids) + 1]] <- results
+      
+      # update offset in request
+      request_body$offset <- request_body$offset + request_body$limit
+    }
+} else {
+    # Something went wrong
+    print("Error:", response$status_code)
+}
+
+# create dataframe with pmids
+pmids_df <- do.call(rbind, pmids)
+
+pmids_df <- pmids_df %>% rename(
+  'grantNumber' = 'coreproject'
+)
+
+# for joining
+pmids_df$grantNumber <- as.character(pmids_df$grantNumber)
 ```
 
-Join the grants to the Pubmed queries and clean up.
 
+Gathers pmids from pubmed
+```{r}
+# one eternity later....
+pmid_metadata <- pub_query(pmids_df$pmid)
+```
+
+Join the grants to the Pubmed queries and clean up.
 ```{r query}
-dat <- dat %>%
-  rename(grantSerialNumber = query)
-# For some reason, grantSerialNumber isn't always a character
-grants$grantSerialNumber <- as.character(grants$grantSerialNumber)
-dat <- dplyr::right_join(grants, dat, by = "grantSerialNumber")
-# Need to remove duplicates, but keep all grants and consortium
-# Includes some renaming and dropping of columns
-dat <- dat %>%
-  group_by(pmid) %>%
-  mutate(grant = glue::glue_collapse(unique(.data$`Grant Number`), ", ")) %>%
-  mutate(consortium = glue::glue_collapse(unique(.data$Program), ", ")) %>%
-  select(!c(`Grant Number`, Program, grantSerialNumber)) %>%
-  rename(pubmed_id = pmid, DOI = doi, Program = consortium) %>%
-  distinct()
+# create complete dataset
+dat <- dplyr::right_join(grants, pmids_df, by = "grantNumber")
+
+dat$pmid <- as.character(dat$pmid)
+
+dat <- dplyr::left_join(dat, pmid_metadata, by = "pmid")
+
+# clean column names
+dat <- janitor::clean_names(dat, "lower_camel")
+
 ```
 
 The following has fixes for some of the formatting issues found. It also updates the entity name to remove common, unallowed characters.
 
 ```{r hacky}
 # Included in hacky_cleaning is conversion to ascii and removing html formatting
+dat$year <- stringr::str_extract(dat$pubdate, "\\d{4}")
+dat$year <- as.integer(dat$year)
 dat$title <- hacky_cleaning(dat$title)
 dat$authors <- hacky_cleaning(dat$authors)
-dat$journal <- hacky_cleaning(dat$journal)
-dat$abstract <- hacky_cleaning(dat$abstract)
+dat$journal <- remove_unacceptable_characters(dat$fulljournalname)
+
+# dat$abstract <- hacky_cleaning(dat$abstract)
+
+# drop unnecessary columns
+dat <- dat %>% select(-c('applid', 'result', 'pubdate'))
+
+cat('Total rows: ', nrow(dat), '\n', 'Duplicates: ', sum(dat %>% duplicated()), '\n', 'Rows after duplicate remove: ', nrow(dat)-sum(dat %>% duplicated()))
+
+# Need to remove duplicates, but keep all grants and consortium
+# Includes some renaming and dropping of columns
+dat <- dat %>%
+  group_by(pmid) %>%
+  mutate(grant = glue::glue_collapse(unique(.data$`grantNumber`), ", ")) %>%
+  mutate(consortium = glue::glue_collapse(unique(.data$program), ", ")) %>%
+  select(!c(grantNumber, program)) %>%
+  rename(pubmed_id = pmid, DOI = doi, program = consortium, study = name) %>%
+  distinct()
+
+dat <- dat %>% rename('pmid' = 'pubmed_id')
+dat$entity_name <- make_entity_name(dat)
+dat$Name <- dat$title
+
+
+#Using rename()
+dat <- dat %>% rename(
+        "Authors" = "authors",
+        "Journal"= "journal",
+        "PubmedId" = "pmid",
+        "Title"= "title",
+        "Year"="year",
+        "Grant"="grant",
+        "Program"="program",
+)
+
 # Remove common, unallowed characters from entity name; includes hacky_cleaning
 dat$entity_name <- remove_unacceptable_characters(dat$entity_name)
+
 ```
 
+
 `set_up_multiannotations` parses comma-separated lists to be stored correctly in Synapse as multi-value annotations. Before setting up the multiannotations, add extra columns that are needed for working with the Portal. The additional, redundant columns will be removed in the future. Should keep `grant` and `Program`, and remove `long_amp_ad_grants`, `doi`, and `consortium`.
 
 ```{r columns}
-dat <- set_up_multiannotations(dat, "grant")
+dat <- set_up_multiannotations(dat, "Grant")
 dat <- set_up_multiannotations(dat, "Program")
+dat <- set_up_multiannotations(dat, "Authors")
 ```
 
 The final data is transposed so that it can be iterated over by `purrr` and stored in Synapse under the `parent` folder.
+```{r}
+store_as_annotations <- function(parent, list) {
+  entity <- purrr::map(list, ~ synapseclient$File(
+    path = glue::glue("http://doi.org/{.$DOI}"),
+    name = .$entity_name,
+    parent = parent,
+    synapseStore = FALSE,
+    annotations = .
+  ))
+  # entity
+  purrr::map(entity, ~ syn$store(., forceVersion = FALSE))
+}
+```
 
 ```{r store, message=FALSE, echo=FALSE}
-parent = "syn20463015"
+parent = "syn51317180" # ELITE publications folder
 dat_list <- purrr::transpose(dat)
-store_as_annotations(parent = parent, dat_list)
-```
 
-Query the publications table to force an update.
-
-```{r query, message=FALSE, echo=FALSE}
-pub_table <- "syn20448807"
-syn$tableQuery(glue::glue("SELECT * FROM {pub_table} LIMIT 1"))
-```
+# another eternity
+store_as_annotations(parent = parent, dat_list)
+``` 
diff --git a/vignettes/query-pubmed.Rmd b/vignettes/query-pubmed.Rmd
index 70fc89b..a74490f 100644
--- a/vignettes/query-pubmed.Rmd
+++ b/vignettes/query-pubmed.Rmd
@@ -1,39 +1,52 @@
 ---
 title: "Query Pubmed"
-author: "Kelsey Montgomery"
+author: "Kelsey Montgomery, Nicholas Lee"
 date: "9/18/2020"
+edited: "04/28/2023"
+notes: "Adapted for the ELITE portal"
 output: html_document
 ---
 
 ```{r setup, include=FALSE}
 knitr::opts_chunk$set(echo = TRUE)
-library(dccvalidator)
+# library(dccvalidator)
 library(dplyr)
 library(easyPubMed)
 library(readr)
 library(reticulate)
-synapseclient <- reticulate::import("synapseclient")
-syntab <- reticulate::import("synapseclient.table")
-syn <- synapseclient$Synapse()
-syn$login()
+library(synapser)
+
+source("~/Documents/Projects/ELITE/ELITE-porTools/R/globalHardCodedVariables.R")
+source("~/Documents/Projects/ELITE/ELITE-porTools/R/pubmed.R")
+
+synLogin()
 ```
 
 ## Query Pubmed and store data as file annotations
 
 The schema for these steps is *pubmedId*, *grants* and *study*. Theses functions take
-a list of PubmedIds and queries the site to pull down title, abstract, authors, journal name, year and DOI. Theses annotations are visible in the [PEC Portal - Publications View](https://www.synapse.org/#!Synapse:syn22095937/tables/). See the [Explore Publications module](https://psychencode.synapse.org/Explore/Publications) for a visual of how this data is surfaced on the portal.
+a list of PubmedIds and queries the site to pull down title, abstract, authors, journal name, year and DOI. Theses annotations are visible in the [ELITE Portal - Publications View](https://www.synapse.org/#!Synapse:syn51209321/tables/). See the [Explore Publications module](https://psychencode.synapse.org/Explore/Publications) for a visual of how this data is surfaced on the portal.
+
+```{r query synapse for grants}
+grant_list <-
+  readr::read_csv(syn$tableQuery(paste(
+    "SELECT * FROM ", sid_studies_table, sep = ""
+  ))$filepath,
+  col_types = readr::cols(.default = "c"))
 
-```{r ex_format, echo = FALSE}
-tribble(~pubmedId, ~grants, ~study,
-        "24057217", "R21MH103877", c("study1,study2")
-        )
+# cleanup grant list
+grant_list$grantNumber <- gsub('\\[|\\]|\\"',"", as.character(grant_list$grantNumber))
+
+# take only grant number as the grants
+grants <- grant_list$grantNumber
 ```
 
 Import the list of Pubmed Ids and define the Synapse parentId where the file entities will be stored with the Pubmed-relevant annotations.
-
 ```{r vars, echo=FALSE}
-parent <- "syn22235314"
-pmids <- readr::read_tsv(syn$get("syn22080024")$path, 
+parent <- "syn51317180" # ELITE Portal backend folder for publications
+
+# read pmids file
+pmids <- readr::read_tsv(syn$get(sid_pmid_file)$path, 
                          col_types = readr::cols(.default = "c"))
 ```
 
@@ -46,7 +59,7 @@ Any character vector can be passed to `query_list_pmids`. This function wraps se
 - creates one row per PubmedId
 
 ```{r query}
-dat <- query_list_pmids(pmids$pubmedId)
+dat <- query_list_pmids(pmid_map$pubmed_Id)
 ```
 
 I am keeping an eye out for weird edge cases. These (hacky) steps clean some missing values and remove extraneous characters.
diff --git a/vignettes/update-static-table.Rmd b/vignettes/update-static-table.Rmd
new file mode 100644
index 0000000..3a830c8
--- /dev/null
+++ b/vignettes/update-static-table.Rmd
@@ -0,0 +1,47 @@
+---
+title: "extra"
+output: html_document
+date: "2023-10-10"
+---
+
+
+Create the pmid to grant file map. Drop the duplicates found and store in synapse as reference for later updates. 
+Write out pmid dataframe out to csv for loading later
+```{r}
+pmid_map <- dat %>% select("pubmed_id", "grant", "program", "study")
+
+write.table(pmid_map, file.path(root_dir, 'publications_pmid_list.txt'), row.names = FALSE, sep = "\t")
+
+file <- File(path = file.path(root_dir, 'publications_pmid_list.txt'), parent = 'syn52227310')
+
+file <- synStore(file)
+```
+
+```{r}
+## Delete old publication report table rows
+pub_report_table <- "syn51209321"
+current_table <- syn$tableQuery(glue::glue("SELECT * FROM {pub_report_table}"))
+```
+
+```{r}
+syn$delete(current_table) # delete current rows
+
+## Update table rows
+temp_table <- tempfile()
+write_csv(pubs, temp_table, na = "")
+
+```
+
+```{r}
+new_table <- synapse$Table(pub_report_table, temp_table)
+syn$store(new_table)
+
+## Query to force table index to rebuild
+syn$tableQuery(glue("SELECT ROW_ID FROM {pub_report_table}"))
+```
+
+Query the publications table to force an update.
+```{r query, message=FALSE, echo=FALSE}
+pub_table <- "syn51407023"
+syn$tableQuery(glue::glue("SELECT * FROM {pub_table} LIMIT 1"))
+```