From 0154576f170b8de7ea7c9f35d9b74d4573ff200b Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 17 Oct 2023 11:49:19 -0500 Subject: [PATCH] Proxito: normalize code languages and redirect to them (#10750) Languages code are being normalized from the root, the old version of the lang codes are used when building with Sphinx. Projects affected by this change: ``` In [1]: old_language_codes = [ ...: 'nb_NO', ...: 'pt_BR', ...: 'es_MX', ...: 'uk_UA', ...: 'zh_CN', ...: 'zh_TW', ...: ] In [2]: Project.objects.filter(language__in=old_language_codes).count() Out[2]: 3544 ``` We may probably want to publish a small blog post communicating this change. We will be using the new code everywhere, even in the API responses. Old paths using the old code will redirect to the new language code. ### How to deploy this change - Deploy as usual - After the webs are out, run the migrations - This change has zero downtime for doc serving, only downtime will be for downloads (till the migration is run). Closes https://github.com/readthedocs/readthedocs.org/issues/2763 --------- Co-authored-by: Manuel Kaufmann --- readthedocs/core/unresolver.py | 12 +- readthedocs/doc_builder/backends/sphinx.py | 12 +- readthedocs/projects/constants.py | 36 +- .../migrations/0107_alter_project_language.py | 372 ++++++++++++++++++ .../migrations/0108_migrate_language_code.py | 31 ++ .../projects/tests/test_build_tasks.py | 40 ++ readthedocs/projects/views/public.py | 13 +- readthedocs/proxito/tests/test_full.py | 57 +++ readthedocs/proxito/views/mixins.py | 15 + readthedocs/proxito/views/serve.py | 41 +- 10 files changed, 607 insertions(+), 22 deletions(-) create mode 100644 readthedocs/projects/migrations/0107_alter_project_language.py create mode 100644 readthedocs/projects/migrations/0108_migrate_language_code.py diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index 82a469aca29..463a67524de 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -286,13 +286,21 @@ def _match_multiversion_project( return None language = match.group("language") + # Normalize old language codes to lowercase with dashes. + normalized_language = language.lower().replace("_", "-") + + # TODO: remove after deploy. + # This is so we can temporarily support old language codes + # while we migrate existing projects. + languages = [language, normalized_language] + version_slug = match.group("version") filename = self._normalize_filename(match.group("filename")) - if parent_project.language == language: + if parent_project.language in languages: project = parent_project else: - project = parent_project.translations.filter(language=language).first() + project = parent_project.translations.filter(language__in=languages).first() if not project: raise TranslationNotFoundError( project=parent_project, diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index 63b988fb963..35c7d7503fd 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -19,7 +19,7 @@ from readthedocs.builds.models import APIVersion from readthedocs.core.utils.filesystem import safe_open from readthedocs.doc_builder.exceptions import PDFNotFound -from readthedocs.projects.constants import PUBLIC +from readthedocs.projects.constants import OLD_LANGUAGES_CODE_MAPPING, PUBLIC from readthedocs.projects.exceptions import ProjectConfigurationError, UserFileNotFound from readthedocs.projects.models import Feature from readthedocs.projects.templatetags.projects_tags import sort_version_aware @@ -111,6 +111,10 @@ def __init__(self, *args, **kwargs): # because Read the Docs will automatically create one for it. pass + def get_language(self, project): + """Get a Sphinx compatible language code.""" + language = project.language + return OLD_LANGUAGES_CODE_MAPPING.get(language, language) def get_config_params(self): """Get configuration parameters to be rendered into the conf file.""" @@ -293,6 +297,7 @@ def build(self): ] if self.config.sphinx.fail_on_warning: build_command.extend(["-W", "--keep-going"]) + language = self.get_language(project) build_command.extend( [ "-b", @@ -300,7 +305,7 @@ def build(self): "-d", self.sphinx_doctrees_dir, "-D", - f"language={project.language}", + f"language={language}", # Sphinx's source directory (SOURCEDIR). # We are executing this command at the location of the `conf.py` file (CWD). # TODO: ideally we should execute it from where the repository was clonned, @@ -472,6 +477,7 @@ class PdfBuilder(BaseSphinx): pdf_file_name = None def build(self): + language = self.get_language(self.project) self.run( *self.get_sphinx_cmd(), "-T", @@ -481,7 +487,7 @@ def build(self): "-d", self.sphinx_doctrees_dir, "-D", - f"language={self.project.language}", + f"language={language}", # Sphinx's source directory (SOURCEDIR). # We are executing this command at the location of the `conf.py` file (CWD). # TODO: ideally we should execute it from where the repository was clonned, diff --git a/readthedocs/projects/constants.py b/readthedocs/projects/constants.py index 4cae14dd265..7fcb1cb1f90 100644 --- a/readthedocs/projects/constants.py +++ b/readthedocs/projects/constants.py @@ -279,15 +279,37 @@ ('zh', 'Chinese'), ('zu', 'Zulu'), # Try these to test our non-2 letter language support - ('nb_NO', 'Norwegian Bokmal'), - ('pt_BR', 'Brazilian Portuguese'), - ('es_MX', 'Mexican Spanish'), - ('uk_UA', 'Ukrainian'), - ('zh_CN', 'Simplified Chinese'), - ('zh_TW', 'Traditional Chinese'), + ("nb-no", "Norwegian Bokmal"), + ("pt-br", "Brazilian Portuguese"), + ("es-mx", "Mexican Spanish"), + ("uk-ua", "Ukrainian"), + ("zh-cn", "Simplified Chinese"), + ("zh-tw", "Traditional Chinese"), ) +LANGUAGE_CODES = [code for code, *_ in LANGUAGES] -LANGUAGES_REGEX = '|'.join([re.escape(code[0]) for code in LANGUAGES]) +# Normalize the language codes to lowercase with dashes, +# we use them to match the language codes in the URL. +# The old language codes were uppercase with underscores, +# and are deprecated, but we still need to support them. +old_language_codes = [ + "nb_NO", + "pt_BR", + "es_MX", + "uk_UA", + "zh_CN", + "zh_TW", +] +OLD_LANGUAGES_CODE_MAPPING = { + code.lower().replace("_", "-"): code for code in old_language_codes +} + +LANGUAGES_REGEX = "|".join( + [ + re.escape(code) + for code in LANGUAGE_CODES + list(OLD_LANGUAGES_CODE_MAPPING.values()) + ] +) PROGRAMMING_LANGUAGES = ( ('words', 'Only Words'), diff --git a/readthedocs/projects/migrations/0107_alter_project_language.py b/readthedocs/projects/migrations/0107_alter_project_language.py new file mode 100644 index 00000000000..1eda885aac9 --- /dev/null +++ b/readthedocs/projects/migrations/0107_alter_project_language.py @@ -0,0 +1,372 @@ +# Generated by Django 4.2.5 on 2023-10-11 23:18 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("projects", "0106_add_addons_config"), + ] + + operations = [ + migrations.AlterField( + model_name="historicalproject", + name="language", + field=models.CharField( + choices=[ + ("aa", "Afar"), + ("ab", "Abkhaz"), + ("acr", "Achi"), + ("af", "Afrikaans"), + ("agu", "Awakateko"), + ("am", "Amharic"), + ("ar", "Arabic"), + ("as", "Assamese"), + ("ay", "Aymara"), + ("az", "Azerbaijani"), + ("ba", "Bashkir"), + ("be", "Belarusian"), + ("bg", "Bulgarian"), + ("bh", "Bihari"), + ("bi", "Bislama"), + ("bn", "Bengali"), + ("bo", "Tibetan"), + ("br", "Breton"), + ("ca", "Catalan"), + ("caa", "Ch'orti'"), + ("cac", "Chuj"), + ("cab", "Garífuna"), + ("cak", "Kaqchikel"), + ("co", "Corsican"), + ("cs", "Czech"), + ("cy", "Welsh"), + ("da", "Danish"), + ("de", "German"), + ("dz", "Dzongkha"), + ("el", "Greek"), + ("en", "English"), + ("eo", "Esperanto"), + ("es", "Spanish"), + ("et", "Estonian"), + ("eu", "Basque"), + ("fa", "Iranian"), + ("fi", "Finnish"), + ("fj", "Fijian"), + ("fo", "Faroese"), + ("fr", "French"), + ("fy", "Western Frisian"), + ("ga", "Irish"), + ("gd", "Scottish Gaelic"), + ("gl", "Galician"), + ("gn", "Guarani"), + ("gu", "Gujarati"), + ("ha", "Hausa"), + ("hi", "Hindi"), + ("he", "Hebrew"), + ("hr", "Croatian"), + ("hu", "Hungarian"), + ("hy", "Armenian"), + ("ia", "Interlingua"), + ("id", "Indonesian"), + ("ie", "Interlingue"), + ("ik", "Inupiaq"), + ("is", "Icelandic"), + ("it", "Italian"), + ("itz", "Itza'"), + ("iu", "Inuktitut"), + ("ixl", "Ixil"), + ("ja", "Japanese"), + ("jac", "Popti'"), + ("jv", "Javanese"), + ("ka", "Georgian"), + ("kjb", "Q'anjob'al"), + ("kek", "Q'eqchi'"), + ("kk", "Kazakh"), + ("kl", "Kalaallisut"), + ("km", "Khmer"), + ("kn", "Kannada"), + ("knj", "Akateko"), + ("ko", "Korean"), + ("ks", "Kashmiri"), + ("ku", "Kurdish"), + ("ky", "Kyrgyz"), + ("la", "Latin"), + ("ln", "Lingala"), + ("lo", "Lao"), + ("lt", "Lithuanian"), + ("lv", "Latvian"), + ("mam", "Mam"), + ("mg", "Malagasy"), + ("mi", "Maori"), + ("mk", "Macedonian"), + ("ml", "Malayalam"), + ("mn", "Mongolian"), + ("mop", "Mopan"), + ("mr", "Marathi"), + ("ms", "Malay"), + ("mt", "Maltese"), + ("my", "Burmese"), + ("na", "Nauru"), + ("ne", "Nepali"), + ("nl", "Dutch"), + ("no", "Norwegian"), + ("oc", "Occitan"), + ("om", "Oromo"), + ("or", "Oriya"), + ("pa", "Panjabi"), + ("pl", "Polish"), + ("pnb", "Western Punjabi"), + ("poc", "Poqomam"), + ("poh", "Poqomchi"), + ("ps", "Pashto"), + ("pt", "Portuguese"), + ("qu", "Quechua"), + ("quc", "K'iche'"), + ("qum", "Sipakapense"), + ("quv", "Sakapulteko"), + ("rm", "Romansh"), + ("rn", "Kirundi"), + ("ro", "Romanian"), + ("ru", "Russian"), + ("rw", "Kinyarwanda"), + ("sa", "Sanskrit"), + ("sd", "Sindhi"), + ("sg", "Sango"), + ("si", "Sinhala"), + ("sk", "Slovak"), + ("skr", "Saraiki"), + ("sl", "Slovenian"), + ("sm", "Samoan"), + ("sn", "Shona"), + ("so", "Somali"), + ("sq", "Albanian"), + ("sr", "Serbian"), + ("ss", "Swati"), + ("st", "Southern Sotho"), + ("su", "Sudanese"), + ("sv", "Swedish"), + ("sw", "Swahili"), + ("ta", "Tamil"), + ("te", "Telugu"), + ("tg", "Tajik"), + ("th", "Thai"), + ("ti", "Tigrinya"), + ("tk", "Turkmen"), + ("tl", "Tagalog"), + ("tn", "Tswana"), + ("to", "Tonga"), + ("tr", "Turkish"), + ("ts", "Tsonga"), + ("tt", "Tatar"), + ("ttc", "Tektiteko"), + ("tzj", "Tz'utujil"), + ("tw", "Twi"), + ("ug", "Uyghur"), + ("uk", "Ukrainian"), + ("ur", "Urdu"), + ("usp", "Uspanteko"), + ("uz", "Uzbek"), + ("vi", "Vietnamese"), + ("vo", "Volapuk"), + ("wo", "Wolof"), + ("xh", "Xhosa"), + ("xin", "Xinka"), + ("yi", "Yiddish"), + ("yo", "Yoruba"), + ("za", "Zhuang"), + ("zh", "Chinese"), + ("zu", "Zulu"), + ("nb-no", "Norwegian Bokmal"), + ("pt-br", "Brazilian Portuguese"), + ("es-mx", "Mexican Spanish"), + ("uk-ua", "Ukrainian"), + ("zh-cn", "Simplified Chinese"), + ("zh-tw", "Traditional Chinese"), + ], + default="en", + help_text="The language the project documentation is rendered in. Note: this affects your project's URL.", + max_length=20, + verbose_name="Language", + ), + ), + migrations.AlterField( + model_name="project", + name="language", + field=models.CharField( + choices=[ + ("aa", "Afar"), + ("ab", "Abkhaz"), + ("acr", "Achi"), + ("af", "Afrikaans"), + ("agu", "Awakateko"), + ("am", "Amharic"), + ("ar", "Arabic"), + ("as", "Assamese"), + ("ay", "Aymara"), + ("az", "Azerbaijani"), + ("ba", "Bashkir"), + ("be", "Belarusian"), + ("bg", "Bulgarian"), + ("bh", "Bihari"), + ("bi", "Bislama"), + ("bn", "Bengali"), + ("bo", "Tibetan"), + ("br", "Breton"), + ("ca", "Catalan"), + ("caa", "Ch'orti'"), + ("cac", "Chuj"), + ("cab", "Garífuna"), + ("cak", "Kaqchikel"), + ("co", "Corsican"), + ("cs", "Czech"), + ("cy", "Welsh"), + ("da", "Danish"), + ("de", "German"), + ("dz", "Dzongkha"), + ("el", "Greek"), + ("en", "English"), + ("eo", "Esperanto"), + ("es", "Spanish"), + ("et", "Estonian"), + ("eu", "Basque"), + ("fa", "Iranian"), + ("fi", "Finnish"), + ("fj", "Fijian"), + ("fo", "Faroese"), + ("fr", "French"), + ("fy", "Western Frisian"), + ("ga", "Irish"), + ("gd", "Scottish Gaelic"), + ("gl", "Galician"), + ("gn", "Guarani"), + ("gu", "Gujarati"), + ("ha", "Hausa"), + ("hi", "Hindi"), + ("he", "Hebrew"), + ("hr", "Croatian"), + ("hu", "Hungarian"), + ("hy", "Armenian"), + ("ia", "Interlingua"), + ("id", "Indonesian"), + ("ie", "Interlingue"), + ("ik", "Inupiaq"), + ("is", "Icelandic"), + ("it", "Italian"), + ("itz", "Itza'"), + ("iu", "Inuktitut"), + ("ixl", "Ixil"), + ("ja", "Japanese"), + ("jac", "Popti'"), + ("jv", "Javanese"), + ("ka", "Georgian"), + ("kjb", "Q'anjob'al"), + ("kek", "Q'eqchi'"), + ("kk", "Kazakh"), + ("kl", "Kalaallisut"), + ("km", "Khmer"), + ("kn", "Kannada"), + ("knj", "Akateko"), + ("ko", "Korean"), + ("ks", "Kashmiri"), + ("ku", "Kurdish"), + ("ky", "Kyrgyz"), + ("la", "Latin"), + ("ln", "Lingala"), + ("lo", "Lao"), + ("lt", "Lithuanian"), + ("lv", "Latvian"), + ("mam", "Mam"), + ("mg", "Malagasy"), + ("mi", "Maori"), + ("mk", "Macedonian"), + ("ml", "Malayalam"), + ("mn", "Mongolian"), + ("mop", "Mopan"), + ("mr", "Marathi"), + ("ms", "Malay"), + ("mt", "Maltese"), + ("my", "Burmese"), + ("na", "Nauru"), + ("ne", "Nepali"), + ("nl", "Dutch"), + ("no", "Norwegian"), + ("oc", "Occitan"), + ("om", "Oromo"), + ("or", "Oriya"), + ("pa", "Panjabi"), + ("pl", "Polish"), + ("pnb", "Western Punjabi"), + ("poc", "Poqomam"), + ("poh", "Poqomchi"), + ("ps", "Pashto"), + ("pt", "Portuguese"), + ("qu", "Quechua"), + ("quc", "K'iche'"), + ("qum", "Sipakapense"), + ("quv", "Sakapulteko"), + ("rm", "Romansh"), + ("rn", "Kirundi"), + ("ro", "Romanian"), + ("ru", "Russian"), + ("rw", "Kinyarwanda"), + ("sa", "Sanskrit"), + ("sd", "Sindhi"), + ("sg", "Sango"), + ("si", "Sinhala"), + ("sk", "Slovak"), + ("skr", "Saraiki"), + ("sl", "Slovenian"), + ("sm", "Samoan"), + ("sn", "Shona"), + ("so", "Somali"), + ("sq", "Albanian"), + ("sr", "Serbian"), + ("ss", "Swati"), + ("st", "Southern Sotho"), + ("su", "Sudanese"), + ("sv", "Swedish"), + ("sw", "Swahili"), + ("ta", "Tamil"), + ("te", "Telugu"), + ("tg", "Tajik"), + ("th", "Thai"), + ("ti", "Tigrinya"), + ("tk", "Turkmen"), + ("tl", "Tagalog"), + ("tn", "Tswana"), + ("to", "Tonga"), + ("tr", "Turkish"), + ("ts", "Tsonga"), + ("tt", "Tatar"), + ("ttc", "Tektiteko"), + ("tzj", "Tz'utujil"), + ("tw", "Twi"), + ("ug", "Uyghur"), + ("uk", "Ukrainian"), + ("ur", "Urdu"), + ("usp", "Uspanteko"), + ("uz", "Uzbek"), + ("vi", "Vietnamese"), + ("vo", "Volapuk"), + ("wo", "Wolof"), + ("xh", "Xhosa"), + ("xin", "Xinka"), + ("yi", "Yiddish"), + ("yo", "Yoruba"), + ("za", "Zhuang"), + ("zh", "Chinese"), + ("zu", "Zulu"), + ("nb-no", "Norwegian Bokmal"), + ("pt-br", "Brazilian Portuguese"), + ("es-mx", "Mexican Spanish"), + ("uk-ua", "Ukrainian"), + ("zh-cn", "Simplified Chinese"), + ("zh-tw", "Traditional Chinese"), + ], + default="en", + help_text="The language the project documentation is rendered in. Note: this affects your project's URL.", + max_length=20, + verbose_name="Language", + ), + ), + ] diff --git a/readthedocs/projects/migrations/0108_migrate_language_code.py b/readthedocs/projects/migrations/0108_migrate_language_code.py new file mode 100644 index 00000000000..3bf7f214b50 --- /dev/null +++ b/readthedocs/projects/migrations/0108_migrate_language_code.py @@ -0,0 +1,31 @@ +# Generated by Django 4.2.5 on 2023-10-11 23:46 + +from django.db import migrations + + +def forwards_func(apps, schema_editor): + """Migrate language code to new format.""" + Project = apps.get_model("projects", "Project") + old_language_codes = [ + "nb_NO", + "pt_BR", + "es_MX", + "uk_UA", + "zh_CN", + "zh_TW", + ] + for old_language_code in old_language_codes: + new_language_code = old_language_code.lower().replace("_", "-") + Project.objects.filter(language=old_language_code).update( + language=new_language_code + ) + + +class Migration(migrations.Migration): + dependencies = [ + ("projects", "0107_alter_project_language"), + ] + + operations = [ + migrations.RunPython(forwards_func), + ] diff --git a/readthedocs/projects/tests/test_build_tasks.py b/readthedocs/projects/tests/test_build_tasks.py index a52f3b0e5e7..9ebadd56271 100644 --- a/readthedocs/projects/tests/test_build_tasks.py +++ b/readthedocs/projects/tests/test_build_tasks.py @@ -1358,6 +1358,46 @@ def test_python_mamba_commands(self, load_yaml_config): ] ) + @mock.patch("readthedocs.doc_builder.director.load_yaml_config") + def test_sphinx_normalized_language(self, load_yaml_config): + load_yaml_config.return_value = self._config_file( + { + "version": 2, + "sphinx": { + "configuration": "docs/conf.py", + "fail_on_warning": True, + }, + }, + ) + self.project.language = "es-mx" + self.project.save() + + self._trigger_update_docs_task() + + self.mocker.mocks["environment.run"].assert_has_calls( + [ + mock.call( + mock.ANY, + "-m", + "sphinx", + "-T", + "-E", + "-W", # fail on warning flag + "--keep-going", # fail on warning flag + "-b", + "html", + "-d", + "_build/doctrees", + "-D", + "language=es_MX", + ".", + "$READTHEDOCS_OUTPUT/html", + cwd=mock.ANY, + bin_path=mock.ANY, + ), + ] + ) + @mock.patch("readthedocs.doc_builder.director.load_yaml_config") def test_sphinx_fail_on_warning(self, load_yaml_config): load_yaml_config.return_value = self._config_file( diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py index a2482b9ad05..77c5472b79b 100644 --- a/readthedocs/projects/views/public.py +++ b/readthedocs/projects/views/public.py @@ -9,7 +9,7 @@ from django.conf import settings from django.contrib import messages from django.db.models import prefetch_related_objects -from django.http import Http404, HttpResponse +from django.http import Http404, HttpResponse, HttpResponseRedirect from django.shortcuts import get_object_or_404, redirect, render from django.urls import reverse from django.utils.crypto import constant_time_compare @@ -362,6 +362,17 @@ def get( project.subprojects, alias=subproject_slug ).child + # Redirect old language codes with underscores to new ones with dashes and lowercase. + normalized_language_code = lang_slug.lower().replace("_", "-") + if normalized_language_code != lang_slug: + if project.language != normalized_language_code: + project = get_object_or_404( + project.translations, language=normalized_language_code + ) + return HttpResponseRedirect( + project.get_production_media_url(type_, version_slug=version_slug) + ) + if project.language != lang_slug: project = get_object_or_404(project.translations, language=lang_slug) diff --git a/readthedocs/proxito/tests/test_full.py b/readthedocs/proxito/tests/test_full.py index 9425dbcd16a..a47ea1f98d8 100644 --- a/readthedocs/proxito/tests/test_full.py +++ b/readthedocs/proxito/tests/test_full.py @@ -395,6 +395,36 @@ def test_single_version_serving_language_like_dir(self): "/proxito/media/html/project/latest/en/awesome.html", ) + def test_old_language_code(self): + self.project.language = "pt-br" + self.project.save() + host = "project.dev.readthedocs.io" + + url = "/pt_BR/latest/index.html" + resp = self.client.get(url, headers={"host": host}) + self.assertEqual(resp.status_code, 302) + self.assertEqual( + resp["location"], + "http://project.dev.readthedocs.io/pt-br/latest/index.html", + ) + + url = "/pt-br/latest/index.html" + resp = self.client.get(url, headers={"host": host}) + self.assertEqual(resp.status_code, 200) + self.assertEqual( + resp["x-accel-redirect"], + "/proxito/media/html/project/latest/index.html", + ) + + # Ambiguous path. + url = "/pt-br/latest/bt_BR/index.html" + resp = self.client.get(url, headers={"host": host}) + self.assertEqual(resp.status_code, 200) + self.assertEqual( + resp["x-accel-redirect"], + "/proxito/media/html/project/latest/bt_BR/index.html", + ) + @override_settings( PUBLIC_DOMAIN="dev.readthedocs.io", @@ -465,6 +495,33 @@ def test_download_files_public_version(self): ) self.assertEqual(resp["CDN-Cache-Control"], "public") + @override_settings(PYTHON_MEDIA=False) + def test_download_project_with_old_language_code(self): + self.project.language = "pt-br" + self.project.save() + for type_ in DOWNLOADABLE_MEDIA_TYPES: + resp = self.client.get( + f"/_/downloads/pt_BR/latest/{type_}/", + headers={"host": "project.dev.readthedocs.io"}, + ) + self.assertEqual(resp.status_code, 302) + self.assertEqual( + resp["Location"], + f"//project.dev.readthedocs.io/_/downloads/pt-br/latest/{type_}/", + ) + + resp = self.client.get( + f"/_/downloads/pt-br/latest/{type_}/", + headers={"host": "project.dev.readthedocs.io"}, + ) + self.assertEqual(resp.status_code, 200) + extension = "zip" if type_ == MEDIA_TYPE_HTMLZIP else type_ + self.assertEqual( + resp["X-Accel-Redirect"], + f"/proxito/media/{type_}/project/latest/project.{extension}", + ) + self.assertEqual(resp["CDN-Cache-Control"], "public") + @override_settings(PYTHON_MEDIA=False, ALLOW_PRIVATE_REPOS=True) def test_download_files_private_version(self): self.version.privacy_level = PRIVATE diff --git a/readthedocs/proxito/views/mixins.py b/readthedocs/proxito/views/mixins.py index a0a145198f0..9d1bf5fc001 100644 --- a/readthedocs/proxito/views/mixins.py +++ b/readthedocs/proxito/views/mixins.py @@ -318,6 +318,21 @@ def system_redirect( log.debug( "System Redirect.", host=request.get_host(), from_url=filename, to_url=to ) + + new_path_parsed = urlparse(to) + old_path_parsed = urlparse(request.build_absolute_uri()) + # Check explicitly only the path and hostname, since a different + # protocol or query parameters could lead to a infinite redirect. + if ( + new_path_parsed.hostname == old_path_parsed.hostname + and new_path_parsed.path == old_path_parsed.path + ): + log.debug( + "Infinite Redirect: FROM URL is the same than TO URL.", + url=to, + ) + raise InfiniteRedirectException() + # All system redirects can be cached, since the final URL will check for authz. self.cache_response = True resp = HttpResponseRedirect(to) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 313740a5aa0..bceb62f8c1e 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -23,7 +23,7 @@ unresolver, ) from readthedocs.core.utils.extend import SettingsOverrideObject -from readthedocs.projects import constants +from readthedocs.projects.constants import OLD_LANGUAGES_CODE_MAPPING, PRIVATE from readthedocs.projects.models import Domain, Feature, HTMLFile from readthedocs.projects.templatetags.projects_tags import sort_version_aware from readthedocs.proxito.constants import RedirectType @@ -264,6 +264,27 @@ def serve_path(self, request, path): version = unresolved.version filename = unresolved.filename + # Check if the old language code format was used, and redirect to the new one. + # NOTE: we may have some false positives here, for example for an URL like: + # /pt-br/latest/pt_BR/index.html, but our protection for infinite redirects + # will prevent a redirect loop. + if ( + not project.single_version + and project.language in OLD_LANGUAGES_CODE_MAPPING + and OLD_LANGUAGES_CODE_MAPPING[project.language] in path + ): + try: + return self.system_redirect( + request=request, + final_project=project, + version_slug=version.slug, + filename=filename, + is_external_version=unresolved_domain.is_from_external_domain, + ) + except InfiniteRedirectException: + # A false positive was detected, continue with our normal serve. + pass + log.bind( project_slug=project.slug, version_slug=version.slug, @@ -707,14 +728,16 @@ def get(self, request): version_slug = project.get_default_version() version = project.versions.get(slug=version_slug) - no_serve_robots_txt = any([ - # If the default version is private or, - version.privacy_level == constants.PRIVATE, - # default version is not active or, - not version.active, - # default version is not built - not version.built, - ]) + no_serve_robots_txt = any( + [ + # If the default version is private or, + version.privacy_level == PRIVATE, + # default version is not active or, + not version.active, + # default version is not built + not version.built, + ] + ) if no_serve_robots_txt: # ... we do return a 404