From ac983fc8bb6066f17c4e4ebfad9266e2612c5b65 Mon Sep 17 00:00:00 2001
From: Daniel O'Connell <github@ahiru.pl>
Date: Fri, 4 Aug 2023 12:19:21 +0200
Subject: [PATCH] PR changes

---
 .github/workflows/fetch-dataset.yml           |  3 +--
 .github/workflows/fetch-weekly.yml            |  3 +--
 .github/workflows/push-dataset.yml            |  3 +--
 .github/workflows/upload-to-huggingface.yml   |  3 +--
 README.md                                     | 22 +++++++++++++------
 align_data/sources/articles/__init__.py       |  2 +-
 .../sources/arxiv_papers/arxiv_papers.py      |  2 +-
 align_data/sources/blogs/__init__.py          |  1 -
 tests/align_data/test_arxiv.py                |  3 +--
 9 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/fetch-dataset.yml b/.github/workflows/fetch-dataset.yml
index d9b17bae..788e24fc 100644
--- a/.github/workflows/fetch-dataset.yml
+++ b/.github/workflows/fetch-dataset.yml
@@ -58,13 +58,12 @@ on:
           - markdown
           - miri
           - ml_safety_newsletter
-          - nonarxiv_papers
-          - qualiacomputing
           - openai.research
           - pdfs
           - rob_miles_ai_safety
           - vkrakovna_blog
           - yudkowsky_blog
+          - xmls
 
 jobs:
   build-dataset:
diff --git a/.github/workflows/fetch-weekly.yml b/.github/workflows/fetch-weekly.yml
index 49fe7597..4af1fa26 100644
--- a/.github/workflows/fetch-weekly.yml
+++ b/.github/workflows/fetch-weekly.yml
@@ -37,13 +37,12 @@ jobs:
           - markdown
           - miri
           - ml_safety_newsletter
-          - nonarxiv_papers
-          - qualiacomputing
           - openai.research
           - pdfs
           - rob_miles_ai_safety
           - vkrakovna_blog
           - yudkowsky_blog
+          - xmls
 
     uses: ./.github/workflows/fetch-dataset.yml
     with:
diff --git a/.github/workflows/push-dataset.yml b/.github/workflows/push-dataset.yml
index 7108df65..768cf2a6 100644
--- a/.github/workflows/push-dataset.yml
+++ b/.github/workflows/push-dataset.yml
@@ -54,13 +54,12 @@ on:
           - markdown
           - miri
           - ml_safety_newsletter
-          - nonarxiv_papers
-          - qualiacomputing
           - openai.research
           - pdfs
           - rob_miles_ai_safety
           - vkrakovna_blog
           - yudkowsky_blog
+          - xmls
 
 jobs:
   generate-dataset:
diff --git a/.github/workflows/upload-to-huggingface.yml b/.github/workflows/upload-to-huggingface.yml
index 958353b4..eaac2ceb 100644
--- a/.github/workflows/upload-to-huggingface.yml
+++ b/.github/workflows/upload-to-huggingface.yml
@@ -38,13 +38,12 @@ jobs:
           - markdown
           - miri
           - ml_safety_newsletter
-          - nonarxiv_papers
-          - qualiacomputing
           - openai.research
           - pdfs
           - rob_miles_ai_safety
           - vkrakovna_blog
           - yudkowsky_blog
+          - xmls
 
     uses: ./.github/workflows/push-dataset.yml
     with:
diff --git a/README.md b/README.md
index c8166946..3e820519 100644
--- a/README.md
+++ b/README.md
@@ -10,32 +10,40 @@ The following list of sources may change and items may be renamed:
 - [aiimpacts](https://aiimpacts.org/)
 - [aisafety.camp](https://aisafety.camp/)
 - [aisafety.info](https://aisafety.info/)
+- [ai_alignment_playlist]()
+- [ai_explained](https://www.youtube.com/@ai-explained-)
+- [ai_safety_talks](https://www.youtube.com/@aisafetytalks)
+- [ai_safety_reading_group](https://www.youtube.com/@aisafetyreadinggroup/videos)
+- [ai_tech_tu_delft](https://www.youtube.com/@AiTechTUDelft/)
 - [alignmentforum](https://www.alignmentforum.org)
 - [alignment_newsletter](https://rohinshah.com/alignment-newsletter/)
 - [arbital](https://arbital.com/)
 - arxiv - alignment research papers from [arxiv](https://arxiv.org/)
-- audio_transcripts - transcripts from interviews with various researchers and other audio recordings
 - [carado.moe](https://carado.moe/)
 - [cold_takes](https://www.cold-takes.com/)
 - [deepmind_blog](https://deepmindsafetyresearch.medium.com/)
+- [deepmind_technical_blog](https://www.deepmind.com/blog-categories/technical-blogs)
 - [distill](https://distill.pub/)
 - [eaforum](https://forum.effectivealtruism.org/) - selected posts
-- ebooks - books include [Superintelligence](https://www.goodreads.com/book/show/20527133-superintelligence), [Human Compatible](https://www.goodreads.com/book/show/44767248-human-compatible), [Life 3.0](https://www.goodreads.com/book/show/34272565-life-3-0), [The Precipice](https://www.goodreads.com/book/show/50485582-the-precipice), and others
-- gdocs
+- [eleuther.ai](https://blog.eleuther.ai/)
 - [generative.ink](https://generative.ink/posts/)
 - [gwern_blog](https://gwern.net/)
+- gdocs - various doc files stored on Google drive
+- html_articles - various articles on websites
 - [import.ai](https://importai.substack.com)
 - [jsteinhardt_blog](https://jsteinhardt.wordpress.com/)
 - [lesswrong](https://www.lesswrong.com/) - selected posts
-- markdown.ebooks
+- markdown
 - [miri](https://intelligence.org/) - MIRI
 - [ml_safety_newsletter](https://newsletter.mlsafety.org)
-- nonarxiv_papers - other alignment research papers
-- [qualiacomputing](https://qualiacomputing.com/)
-- reports
+- [openai.research](https://openai.com/research)
+- pdfs - various pdfs from different places
+- [rob_miles_ai_safety](https://www.youtube.com/@RobertMilesAI)
 - [vkrakovna_blog](https://vkrakovna.wordpress.com)
 - [waitbutwhy](https://waitbutwhy.com/)
 - [yudkowsky_blog](https://www.yudkowsky.net/)
+- xmls - various articles stored as XML files
+
 
 ## Keys
 
diff --git a/align_data/sources/articles/__init__.py b/align_data/sources/articles/__init__.py
index a6fff663..6775e496 100644
--- a/align_data/sources/articles/__init__.py
+++ b/align_data/sources/articles/__init__.py
@@ -19,7 +19,7 @@
         sheet_id='1800487220'
     ),
     XMLArticles(
-        name='nonarxiv_papers',
+        name='xmls',
         spreadsheet_id='1l3azVJVukGAvZPgg0GyeqiaQe8bEMZvycBJaA8cRXf4',
         sheet_id='823056509'
     ),
diff --git a/align_data/sources/arxiv_papers/arxiv_papers.py b/align_data/sources/arxiv_papers/arxiv_papers.py
index d4eef69f..ae9b7cb9 100644
--- a/align_data/sources/arxiv_papers/arxiv_papers.py
+++ b/align_data/sources/arxiv_papers/arxiv_papers.py
@@ -62,7 +62,7 @@ def process_entry(self, item) -> None:
             "authors": authors,
             "date_published": self._get_published_date(self.is_val(item.date_published) or paper.get('date_published')),
             "data_last_modified": str(metadata.updated),
-            "abstract": metadata.summary.replace("\n", " "),
+            "summary": metadata.summary.replace("\n", " "),
             "author_comment": metadata.comment,
             "journal_ref": metadata.journal_ref,
             "doi": metadata.doi,
diff --git a/align_data/sources/blogs/__init__.py b/align_data/sources/blogs/__init__.py
index 8f1d5fc1..7021c994 100644
--- a/align_data/sources/blogs/__init__.py
+++ b/align_data/sources/blogs/__init__.py
@@ -12,7 +12,6 @@
     WordpressBlog(name="aisafety.camp", url="https://aisafety.camp"),
     WordpressBlog(name="miri", url="https://intelligence.org"),
     WordpressBlog(name="jsteinhardt_blog", url="https://jsteinhardt.wordpress.com"),
-    WordpressBlog(name="qualiacomputing", url="https://qualiacomputing.com"),
     WordpressBlog(name="vkrakovna_blog", url="https://vkrakovna.wordpress.com"),
     WordpressBlog(name="yudkowsky_blog", url="https://yudkowsky.net"),
     MediumBlog(name="deepmind_blog", url="https://deepmindsafetyresearch.medium.com/", authors=["DeepMind Safety Research"]),
diff --git a/tests/align_data/test_arxiv.py b/tests/align_data/test_arxiv.py
index 00b07969..30717d9e 100644
--- a/tests/align_data/test_arxiv.py
+++ b/tests/align_data/test_arxiv.py
@@ -44,7 +44,6 @@ def test_process_entry():
     with patch('align_data.arxiv_papers.arxiv_papers.parse_vanity', return_value=contents):
         with patch('align_data.arxiv_papers.arxiv_papers.arxiv', arxiv):
             assert dataset.process_entry(item).to_dict() == {
-                'abstract': 'abstract bla bla',
                 'author_comment': 'no comment',
                 'authors': ['mr blobby'],
                 'categories': 'wut',
@@ -56,7 +55,7 @@ def test_process_entry():
                 'primary_category': 'cat',
                 'source': 'asd',
                 'source_type': 'html',
-                'summaries': [],
+                'summaries': ['abstract bla bla'],
                 'text': 'this is the text',
                 'title': 'this is the title',
                 'url': 'https://arxiv.org/abs/2001.11038',