From 402fa57addf697259abf643589dc75ee26ed8ef3 Mon Sep 17 00:00:00 2001 From: alifeee Date: Sat, 20 Jan 2024 19:02:08 +0000 Subject: [PATCH] add script to grab all URLs --- _data/redirects.yaml | 173 ++++++++++++++++++++++++++++++++++++++++ scripts/get_all_urls.sh | 17 ++++ 2 files changed, 190 insertions(+) create mode 100644 scripts/get_all_urls.sh diff --git a/_data/redirects.yaml b/_data/redirects.yaml index 649a0e7..84742a0 100644 --- a/_data/redirects.yaml +++ b/_data/redirects.yaml @@ -2,3 +2,176 @@ - /lipu-tenpo-nanpa-mun/ - /lipu-tenpo-nanpa-ala/ - /lipu-tenpo-nanpa-kasi/ +- /poll/ +- /lipu-tenpo-nanpa-suno/ +- /lipu-tenpo-nanpa-akesi/ +- /j0001/ +- /lipu-tenpo-nanpa-kule/ +- /o-kama-sona-e-toki-pona/ +- /lipu-tenpo-nanpa-toki/ +- /lipu-tenpo-nanpa-moli/ +- /lipu-tenpo-nanpa-walo/ +- /lipu-tenpo-nanpa-lete/ +- /lipu-tenpo-nanpa-nimi/ +- /nanpa-kijetesantakalu/ +- /lipu-tenpo-nanpa-pipi/ +- /lipu-tenpo-nanpa-seli/ +- /lipu-tenpo-nanpa-pan/ +- /lipu-tenpo-nanpa-moku/ +- /lipu-tenpo-nanpa-tu/ +- /nimi-open-pi-toki-pona/ +- /ma-ilo-wijasa/ +- /tenpo-sike-tu/ +- /luka-juke-ante-pona-sin-li-lon-anu-powe/ +- /toki-li-seme/ +- /ale-li-nanpa/ +- /tawa-pi-poki-monsi-pi-maasija-nanpa-luka-luka-wanma-kanpusi/ +- /monsuta-li-lon/ +- /jan-pi-lon-ala/ +- /mi-ale/ +- /tenpo-pi-mi-jan-lili-la/ +- /kijetesantakalu-o/ +- /kati-tiki-tu-lili/ +- /nasin-nanpa-pona/ +- /tawa-pi-poki-monsi-pi-ma-asija-nanpa-luka-luka-tu-ma-wije/ +- /jan-kule-lon-tenpo-pini-la-jan-sona-alan-tuwin/ +- /tenpo-suno-mama-meli/ +- /suno-mi/ +- /o-lipu-e-sona/ +- /moku-pi-tenpo-pini/ +- /o-pali-musi-e-moku/ +- /kijetesantakalu-o-2/ +- /toki-musi-lili/ +- /o-moku-pona/ +- /tenpo-seli/ +- /alasa-nimi-ma-osejanija/ +- /alasa-nimi-ma-elopa-nanpa-wan/ +- /toki-tu/ +- /kama/ +- /lipu-tenpo-nanpa-mama/ +- /lipu-tenpo-nanpa-kulupu/ +- /lipu-tenpo-nanpa-musi/ +- /lipu-tenpo-nanpa-nasin/ +- /lipu-tenpo-nanpa-ma/ +- /jan-eli-li-tawa/ +- /utala-musi-pi-sitelen-toki/ +- /sitelen-pi-lipu-tenpo-lon-nasin-juniko/ +- /seme-li-mi-2/ +- /seme-li-mi-1/ +- /nasin-telo-suli-nile/ +- /jan-pali-li-ken-ala-ken-kamalawa-e-nasin-pali/ +- /sitelen-pi-nasin-jan/ +- /tawa-pi-poki-monsi-pi-ma-asija-nanpa-luka-luka-tu-ma-sonko/ +- /nasin-tawa-intawe/ +- /tan-ma-tawa-mun/ +- /kijetesantakalu-o-3/ +- /mun-suli/ +- /kalama-ali/ +- /seme-li-mi-3/ +- /lete-kama-seli/ +- /alasa-nimima-elopa-nanpa-tu/ +- /jan-pi-toki-pona-li-kulupu-lon-ma-tomo-mase/ +- /jan-pi-toki-sonko-li-toki-pona-la-seme-li-sin-lon-kulupu-ona%e4%ba%ba%e4%b9%8b%e8%a8%80sonko%e5%85%ae%e8%a8%80%e5%96%84%e6%97%b6%e4%bd%95%e5%85%ae%e6%96%b0%e4%ba%8e%e5%9b%a2%e4%bc%8a/ +- /sitelen-anku/ +- /sona-nasa-pi-kulupu-lili-kulupu-pali-pi-jan-insa-sin-tupa/ +- /sona-musi-lili-pi-ma-antateka/ +- /tawa-pi-poki-monsipi-ma-asija-nanpa-luka-luka-tu-tu-ma-tawan-en-ma-anku/ +- /toki-loje-tu/ +- /toki-pali-tu/ +- /toki-pilin-tu/ +- /musi-pi-nasin-sitelen-non/ +- /suno-li-pona-tawa-mi/ +- /kijetesantakalu-o-4/ +- /musi-linja/ +- /seme-li-mi-4/ +- /alasa-nimi-ma-asija/ +- /lipu-tenpo-nanpa-sewi/ +- /lipu-tenpo-nanpa-sin/ +- /lipu-tenpo-li-lon-selo-sin/ +- /ijo-sin-pi-lipu-wikipesija/ +- /tomo-pi-kiwen-suli/ +- /enko-li-seme/ +- /toki-ni-li-tan-ala-tan-jan-ape-antan/ +- /ma-nijon/ +- /toki-poki-tu/ +- /ijo-mi/ +- /tuki-tiki-li-seme/ +- /weka-li-ante-e-suli/ +- /kama-nanpa-tu/ +- /sin/ +- /toki-pona-tawa-sewi/ +- /seme-li-mi/ +- /taki/ +- /kijetesantakalu-o-5/ +- /toki-pona-li-ken-ala-ken-suwi-e-pilin-sina/ +- /sitelen-pona-pi-nimi-nalanja/ +- /toki-ma-la-seme-li-lon/ +- /ma-anpa-pi-ma-tomo-paki/ +- /kasi-kawa-en-telo-kawa/ +- /jan-li-toki-e-wile-ona-lon-ma-netelan/ +- /nasin-seme-la-ko-lete-walo-li-kama-tan-sewi/ +- /musi-palisa/ +- /sike-kon/ +- /toki-musi-tan-jan-sapo%c2%b9-tan-jan-katulo%c2%b2-musi/ +- /jan-makupetu/ +- /sina-pini-kin/ +- /toki-mun-tu/ +- /sitelen-pi-nasin-toki-tu/ +- /sitelen-musi/ +- /seme-li-mi-5/ +- /kijetesantakalu-o-6/ +- /toki-olapi-li-seme/ +- /category/ante/ +- /category/ijo-pona/ +- /category/jan/ +- /category/toki/kulupu/ +- /category/lipu/ +- /category/toki/ma/ +- /category/toki/moku-toki/ +- /category/toki/musi/ +- /category/toki/pilin/ +- /category/toki/sona/ +- /category/toki/toki-toki/ +- /category/toki/ +- /category/toki/toki-musi/ +- /category/toki/toki-pona/ +- /tag/akesi/ +- /tag/ala/ +- /tag/anniversary/ +- /tag/april-fools/ +- /tag/lipu/ +- /tag/lipu-tenpo/ +- /tag/lipu-tenpo-nanpa-sewi/ +- /tag/lipu-tenpo-nanpa-sin/ +- /tag/lipu-tenpo-nanpa-tu/ +- /tag/mama/ +- /tag/mun/ +- /tag/musi/ +- /tag/nanpa/ +- /tag/nanpa-akesi/ +- /tag/nanpa-ala/ +- /tag/nanpa-kulupu/ +- /tag/nanpa-ma/: /lipu/nanpa-ma/ +- /tag/nanpa-mama/ +- /tag/nanpa-moku/ +- /tag/nanpa-mun/ +- /tag/nanpa-musi/ +- /tag/nanpa-nasin/ +- /tag/nanpa-seli/ +- /tag/nanpa-sewi/ +- /tag/nanpa-sin/ +- /tag/nanpa-tu/ +- /tag/nasin/ +- /tag/seli/ +- /tag/sewi/ +- /tag/sike-tu/ +- /tag/sin/ +- /tag/tenpo/ +- /tag/tenpo-sike-tu/ +- /tag/tuki-tiki/ +- /author/jan-ke-tami/: /sona/ +- /author/alifeee/: /sona/ +- /author/janpolo/: /sona/ +- /author/jan-sonatan/: /sona/ +- /author/kulupu-pi-lipu-tenpo/: /sona/ +- /author/jan-alonola/: /sona/ diff --git a/scripts/get_all_urls.sh b/scripts/get_all_urls.sh new file mode 100644 index 0000000..7918548 --- /dev/null +++ b/scripts/get_all_urls.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# get all URLS from several sitemap files +# and remove base url (first argument) +# e.g., +# ./get_all_urls.sh https://liputenpo.org https://liputenpo.org/post-sitemap.xml https://liputenpo.org/page-sitemap.xml https://liputenpo.org/category-sitemap.xml https://liputenpo.org/post_tag-sitemap.xml https://liputenpo.org/author-sitemap.xml + +# get all urls from args +base_url=$1 +shift 1 +urls=$@ + +for url in $urls +do + # get all urls from sitemap + curl -s $url | grep -oP '(?<=).*?(?=)' | awk -v base_url="$base_url" 'BEGIN {baselen = length(base_url)} { printf "- %s\n", substr($0, baselen + 1)}' +done