Skip to content

Commit

Permalink
add script to grab all URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
alifeee committed Jan 20, 2024
1 parent 883e2e0 commit 402fa57
Show file tree
Hide file tree
Showing 2 changed files with 190 additions and 0 deletions.
173 changes: 173 additions & 0 deletions _data/redirects.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,176 @@
- /lipu-tenpo-nanpa-mun/
- /lipu-tenpo-nanpa-ala/
- /lipu-tenpo-nanpa-kasi/
- /poll/
- /lipu-tenpo-nanpa-suno/
- /lipu-tenpo-nanpa-akesi/
- /j0001/
- /lipu-tenpo-nanpa-kule/
- /o-kama-sona-e-toki-pona/
- /lipu-tenpo-nanpa-toki/
- /lipu-tenpo-nanpa-moli/
- /lipu-tenpo-nanpa-walo/
- /lipu-tenpo-nanpa-lete/
- /lipu-tenpo-nanpa-nimi/
- /nanpa-kijetesantakalu/
- /lipu-tenpo-nanpa-pipi/
- /lipu-tenpo-nanpa-seli/
- /lipu-tenpo-nanpa-pan/
- /lipu-tenpo-nanpa-moku/
- /lipu-tenpo-nanpa-tu/
- /nimi-open-pi-toki-pona/
- /ma-ilo-wijasa/
- /tenpo-sike-tu/
- /luka-juke-ante-pona-sin-li-lon-anu-powe/
- /toki-li-seme/
- /ale-li-nanpa/
- /tawa-pi-poki-monsi-pi-maasija-nanpa-luka-luka-wanma-kanpusi/
- /monsuta-li-lon/
- /jan-pi-lon-ala/
- /mi-ale/
- /tenpo-pi-mi-jan-lili-la/
- /kijetesantakalu-o/
- /kati-tiki-tu-lili/
- /nasin-nanpa-pona/
- /tawa-pi-poki-monsi-pi-ma-asija-nanpa-luka-luka-tu-ma-wije/
- /jan-kule-lon-tenpo-pini-la-jan-sona-alan-tuwin/
- /tenpo-suno-mama-meli/
- /suno-mi/
- /o-lipu-e-sona/
- /moku-pi-tenpo-pini/
- /o-pali-musi-e-moku/
- /kijetesantakalu-o-2/
- /toki-musi-lili/
- /o-moku-pona/
- /tenpo-seli/
- /alasa-nimi-ma-osejanija/
- /alasa-nimi-ma-elopa-nanpa-wan/
- /toki-tu/
- /kama/
- /lipu-tenpo-nanpa-mama/
- /lipu-tenpo-nanpa-kulupu/
- /lipu-tenpo-nanpa-musi/
- /lipu-tenpo-nanpa-nasin/
- /lipu-tenpo-nanpa-ma/
- /jan-eli-li-tawa/
- /utala-musi-pi-sitelen-toki/
- /sitelen-pi-lipu-tenpo-lon-nasin-juniko/
- /seme-li-mi-2/
- /seme-li-mi-1/
- /nasin-telo-suli-nile/
- /jan-pali-li-ken-ala-ken-kamalawa-e-nasin-pali/
- /sitelen-pi-nasin-jan/
- /tawa-pi-poki-monsi-pi-ma-asija-nanpa-luka-luka-tu-ma-sonko/
- /nasin-tawa-intawe/
- /tan-ma-tawa-mun/
- /kijetesantakalu-o-3/
- /mun-suli/
- /kalama-ali/
- /seme-li-mi-3/
- /lete-kama-seli/
- /alasa-nimima-elopa-nanpa-tu/
- /jan-pi-toki-pona-li-kulupu-lon-ma-tomo-mase/
- /jan-pi-toki-sonko-li-toki-pona-la-seme-li-sin-lon-kulupu-ona%e4%ba%ba%e4%b9%8b%e8%a8%80sonko%e5%85%ae%e8%a8%80%e5%96%84%e6%97%b6%e4%bd%95%e5%85%ae%e6%96%b0%e4%ba%8e%e5%9b%a2%e4%bc%8a/
- /sitelen-anku/
- /sona-nasa-pi-kulupu-lili-kulupu-pali-pi-jan-insa-sin-tupa/
- /sona-musi-lili-pi-ma-antateka/
- /tawa-pi-poki-monsipi-ma-asija-nanpa-luka-luka-tu-tu-ma-tawan-en-ma-anku/
- /toki-loje-tu/
- /toki-pali-tu/
- /toki-pilin-tu/
- /musi-pi-nasin-sitelen-non/
- /suno-li-pona-tawa-mi/
- /kijetesantakalu-o-4/
- /musi-linja/
- /seme-li-mi-4/
- /alasa-nimi-ma-asija/
- /lipu-tenpo-nanpa-sewi/
- /lipu-tenpo-nanpa-sin/
- /lipu-tenpo-li-lon-selo-sin/
- /ijo-sin-pi-lipu-wikipesija/
- /tomo-pi-kiwen-suli/
- /enko-li-seme/
- /toki-ni-li-tan-ala-tan-jan-ape-antan/
- /ma-nijon/
- /toki-poki-tu/
- /ijo-mi/
- /tuki-tiki-li-seme/
- /weka-li-ante-e-suli/
- /kama-nanpa-tu/
- /sin/
- /toki-pona-tawa-sewi/
- /seme-li-mi/
- /taki/
- /kijetesantakalu-o-5/
- /toki-pona-li-ken-ala-ken-suwi-e-pilin-sina/
- /sitelen-pona-pi-nimi-nalanja/
- /toki-ma-la-seme-li-lon/
- /ma-anpa-pi-ma-tomo-paki/
- /kasi-kawa-en-telo-kawa/
- /jan-li-toki-e-wile-ona-lon-ma-netelan/
- /nasin-seme-la-ko-lete-walo-li-kama-tan-sewi/
- /musi-palisa/
- /sike-kon/
- /toki-musi-tan-jan-sapo%c2%b9-tan-jan-katulo%c2%b2-musi/
- /jan-makupetu/
- /sina-pini-kin/
- /toki-mun-tu/
- /sitelen-pi-nasin-toki-tu/
- /sitelen-musi/
- /seme-li-mi-5/
- /kijetesantakalu-o-6/
- /toki-olapi-li-seme/
- /category/ante/
- /category/ijo-pona/
- /category/jan/
- /category/toki/kulupu/
- /category/lipu/
- /category/toki/ma/
- /category/toki/moku-toki/
- /category/toki/musi/
- /category/toki/pilin/
- /category/toki/sona/
- /category/toki/toki-toki/
- /category/toki/
- /category/toki/toki-musi/
- /category/toki/toki-pona/
- /tag/akesi/
- /tag/ala/
- /tag/anniversary/
- /tag/april-fools/
- /tag/lipu/
- /tag/lipu-tenpo/
- /tag/lipu-tenpo-nanpa-sewi/
- /tag/lipu-tenpo-nanpa-sin/
- /tag/lipu-tenpo-nanpa-tu/
- /tag/mama/
- /tag/mun/
- /tag/musi/
- /tag/nanpa/
- /tag/nanpa-akesi/
- /tag/nanpa-ala/
- /tag/nanpa-kulupu/
- /tag/nanpa-ma/: /lipu/nanpa-ma/
- /tag/nanpa-mama/
- /tag/nanpa-moku/
- /tag/nanpa-mun/
- /tag/nanpa-musi/
- /tag/nanpa-nasin/
- /tag/nanpa-seli/
- /tag/nanpa-sewi/
- /tag/nanpa-sin/
- /tag/nanpa-tu/
- /tag/nasin/
- /tag/seli/
- /tag/sewi/
- /tag/sike-tu/
- /tag/sin/
- /tag/tenpo/
- /tag/tenpo-sike-tu/
- /tag/tuki-tiki/
- /author/jan-ke-tami/: /sona/
- /author/alifeee/: /sona/
- /author/janpolo/: /sona/
- /author/jan-sonatan/: /sona/
- /author/kulupu-pi-lipu-tenpo/: /sona/
- /author/jan-alonola/: /sona/
17 changes: 17 additions & 0 deletions scripts/get_all_urls.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

# get all URLS from several sitemap files
# and remove base url (first argument)
# e.g.,
# ./get_all_urls.sh https://liputenpo.org https://liputenpo.org/post-sitemap.xml https://liputenpo.org/page-sitemap.xml https://liputenpo.org/category-sitemap.xml https://liputenpo.org/post_tag-sitemap.xml https://liputenpo.org/author-sitemap.xml

# get all urls from args
base_url=$1
shift 1
urls=$@

for url in $urls
do
# get all urls from sitemap
curl -s $url | grep -oP '(?<=<loc>).*?(?=</loc>)' | awk -v base_url="$base_url" 'BEGIN {baselen = length(base_url)} { printf "- %s\n", substr($0, baselen + 1)}'
done

0 comments on commit 402fa57

Please sign in to comment.