-
Notifications
You must be signed in to change notification settings - Fork 35
/
xliff1_serializer.py
212 lines (185 loc) · 7.55 KB
/
xliff1_serializer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from urllib.parse import urlparse
from django.conf import settings
from django.core.serializers import base
from ..cms.models import Page, PageTranslation
from . import base_serializer
if TYPE_CHECKING:
from xml.dom.minidom import Element
from django.db.models.fields import CharField, TextField
logger = logging.getLogger(__name__)
class Serializer(base_serializer.Serializer):
"""
XLIFF serializer class for XLIFF version 1.2.
This was inspired by `django-xliff <https://github.com/callowayproject/django-xliff>`__.
"""
def start_serialization(self) -> None:
"""
Start serialization - open the XML document and the root element.
"""
super().start_serialization()
if TYPE_CHECKING:
assert self.xml
self.xml.startElement(
"xliff",
{
"version": "1.2",
"xmlns": "urn:oasis:names:tc:xliff:document:1.2",
},
)
def start_object(self, obj: PageTranslation) -> None:
"""
Called as each object is handled. Adds an XLIFF ``<file>``-block with meta-information about the object and an
additional ``<body>`` for XLIFF version 1.2.
:param obj: The page translation object which is started
:raises ~django.core.serializers.base.SerializationError: If the serialization fails
"""
if TYPE_CHECKING:
assert self.xml
if not (
source_language := obj.page.region.get_source_language(obj.language.slug)
):
raise base.SerializationError(
"The page translation is in the region's default language."
)
self.xml.startElement(
"file",
{
"original": str(obj.page.id),
"datatype": "plaintext",
"source-language": source_language.bcp47_tag,
"target-language": obj.language.bcp47_tag,
},
)
# This header is required to make sure the XLIFF file can be segmented with MemoQ's WPML filter to get the same
# translation memory as with the legacy export via WordPress/WPML. See also:
# https://docs.memoq.com/current/en/Places/wpml-xliff-filter.html
self.xml.startElement("header", {})
self.xml.startElement("phase-group", {})
self.xml.addQuickElement(
"phase",
attrs={
"phase-name": "shortcodes",
"process-name": "Shortcodes identification",
},
)
self.xml.addQuickElement(
"phase", attrs={"phase-name": "post_type", "process-name": "Post type"}
)
self.xml.endElement("phase-group")
self.xml.endElement("header")
self.xml.startElement("body", {})
def handle_field(self, obj: PageTranslation, field: CharField | TextField) -> None:
"""
Called to handle each field on an object (except for ForeignKeys and ManyToManyFields)
:param obj: The page translation object which is handled
:param field: The model field
"""
if TYPE_CHECKING:
assert self.xml
# Use legacy field name if available
REVERSE_XLIFF_LEGACY_FIELDS: dict[str, str] = dict(
map(reversed, settings.XLIFF_LEGACY_FIELDS.items()) # type: ignore[arg-type]
)
field_name = REVERSE_XLIFF_LEGACY_FIELDS.get(field.name, field.name)
attrs = {
"id": field_name,
"resname": field_name,
"restype": "string",
"datatype": "html",
}
self.xml.startElement("trans-unit", attrs)
self.xml.startElement("source", {})
source_translation = (
obj.public_source_translation
if self.only_public
else obj.public_or_draft_source_translation
)
self.xml.cdata(field.value_to_string(source_translation))
self.xml.endElement("source")
self.xml.startElement("target", {})
self.xml.cdata(field.value_to_string(obj))
self.xml.endElement("target")
self.xml.endElement("trans-unit")
def end_object(self, obj: PageTranslation) -> None:
"""
Called after handling all fields for an object.
Ends the ``<file>``-block.
:param obj: The page translation object which is finished
"""
if TYPE_CHECKING:
assert self.xml
self.xml.endElement("body")
self.xml.endElement("file")
class Deserializer(base_serializer.Deserializer):
"""
XLIFF deserializer class for XLIFF version 1.2
"""
#: The node name of serialized fields
unit_node = "trans-unit"
def get_object(self, node: Element) -> PageTranslation:
"""
Retrieve an object from the serialized unit node.
:param node: The current xml node of the object
:raises ~django.core.serializers.base.DeserializationError: If the deserialization fails
:return: The original page translation
"""
# Get original page
page_id = self.require_attribute(node, "original")
try:
page = Page.objects.get(id=page_id)
except (ValueError, Page.DoesNotExist) as e:
# If the id isn't a number or if no page with this id is found, check if the external file reference is given
if not (external_file := node.getElementsByTagName("external-file")):
# If no such reference is given, just raise the initial error
raise e
# Get href of external file and parse url
page_link = (
urlparse(self.require_attribute(external_file[0], "href"))
.path.strip("/")
.split("/")
)
logger.debug(
"<external-file>-node found, parsed page link: %r",
page_link,
)
# Expect the link to be in the format /<region_slug>/<language_slug>/[<parent_page_slug>]/<page_slug>/
if len(page_link) < 3:
raise base.DeserializationError(
"The page link of the <external-file> reference needs at least 3 segments"
) from e
page_translation_slug = page_link.pop()
region_slug, language_slug = page_link[:2]
page = Page.objects.filter(
region__slug=region_slug,
translations__slug=page_translation_slug,
translations__language__slug=language_slug,
).first()
if not page:
# If no page matches the link, just raise the initial error
raise e
logger.debug(
"Referenced original page: %r",
page,
)
# Get target language of this file
target_language = self.get_language(
self.require_attribute(node, "target-language")
)
# Get existing target translation or create a new one
if page_translation := page.get_translation(target_language.slug):
return page_translation
# Initial attributes passed to model constructor
attrs = {
"page": page,
"language": target_language,
}
# Get source translation to inherit status field
source_language = self.get_language(
self.require_attribute(node, "source-language")
)
if source_translation := page.get_translation(source_language.slug):
attrs["status"] = source_translation.status
return PageTranslation(**attrs)