-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added script for updating messages and basic usage help for other message scripts #1000
base: clarin-dev
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,47 +1,41 @@ | ||
#!/usr/bin/python | ||
# -*- coding: utf-8 -*- | ||
|
||
## USAGE EXAMPLE: python check_messsage_translations.sh cs | ||
|
||
import sys | ||
import argparse | ||
import os | ||
import codecs | ||
import re | ||
|
||
from check_message_lib import find_language_file_name | ||
from check_message_lib import find_language_file_name, get_js_keys, get_xml_keys | ||
|
||
arg_parser = argparse.ArgumentParser(description='Compare the XML and JS message keys for two languages.') | ||
arg_parser.add_argument('-lang1', required=True, help='First language as a 2-letter code') | ||
arg_parser.add_argument('-lang2', default='en', help='Second language as a 2-letter code (defaults to "en")') | ||
arguments = arg_parser.parse_args() | ||
language1 = arguments.lang1 | ||
language2 = arguments.lang2 | ||
|
||
script_directory = os.path.dirname(os.path.realpath(__file__)) | ||
os.chdir(script_directory) | ||
|
||
language1 = sys.argv[1] | ||
language2 = sys.argv[2] if len(sys.argv) > 2 else 'en' | ||
|
||
dspace_script = 'dspace-l10n-check.py' | ||
def compare_keys(file_name1, file_name2, keys_function): | ||
print('\n\nComparing {} and {}:'.format(file_name1, file_name2)) | ||
keys1 = keys_function(file_name1) | ||
keys2 = keys_function(file_name2) | ||
report_delta(file_name1, file_name2, keys2-keys1) | ||
report_delta(file_name2, file_name1, keys1-keys2) | ||
|
||
def report_delta(file_name1, file_name2, keys): | ||
if (len(keys) == 0): | ||
print('\n Every key in {} is also in {}.'.format(file_name2, file_name1)) | ||
else: | ||
print('\n Present in ' + file_name2 + ' but missing in ' + file_name1 + ':') | ||
for key in keys: | ||
print(' ' + key) | ||
|
||
xml_file_name1 = find_language_file_name(language1, 'xml') | ||
xml_file_name2 = find_language_file_name(language2, 'xml') | ||
os.system('python ' + dspace_script + ' ' + xml_file_name1 + ' ' + xml_file_name2) | ||
|
||
js_key_regexp = r'^\s*["\']([\w-]+?)["\']\s*:' | ||
def find_js_keys(js_file_name): | ||
js_file = codecs.open(js_file_name, 'r', 'UTF-8') | ||
keys = set() | ||
for line in js_file: | ||
match = re.search(js_key_regexp, line.strip(), re.U) | ||
if (match): | ||
keys.add(match.group(1)) | ||
return keys | ||
compare_keys(xml_file_name1, xml_file_name2, get_xml_keys) | ||
|
||
js_file_name1 = find_language_file_name(language1, 'js') | ||
js_keys1 = find_js_keys(js_file_name1) | ||
js_file_name2 = find_language_file_name(language2, 'js') | ||
js_keys2 = find_js_keys(js_file_name2) | ||
|
||
print '\nPresent in ' + js_file_name2 + ' but missing in ' + js_file_name1 + ':' | ||
for key in (js_keys2 - js_keys1): | ||
print key | ||
|
||
print '\nPresent in ' + js_file_name1 + ' but missing in ' + js_file_name2 + ':' | ||
for key in (js_keys1 - js_keys2): | ||
print key | ||
|
||
compare_keys(js_file_name1, js_file_name2, get_js_keys) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/usr/bin/python | ||
# -*- coding: utf-8 -*- | ||
|
||
import argparse | ||
import os | ||
import lxml.etree as lxml | ||
|
||
from check_message_lib import find_language_file_name, get_xml_keys | ||
|
||
arg_parser = argparse.ArgumentParser(description="Add English XML messages missing in the language's messages, marked with @TODO=TRANSLATE.") | ||
arg_parser.add_argument('-lang', required=True, help='Language (as a 2-letter code) of the messages file') | ||
arguments = arg_parser.parse_args() | ||
language = arguments.lang | ||
|
||
script_directory = os.path.dirname(os.path.realpath(__file__)) | ||
os.chdir(script_directory) | ||
|
||
english_file_name = find_language_file_name('en', 'xml') | ||
english_keys = get_xml_keys(english_file_name) | ||
other_file_name = find_language_file_name(language, 'xml') | ||
other_keys = get_xml_keys(other_file_name) | ||
|
||
if (other_keys == english_keys): | ||
print('\nThe sets of message keys in {} and {} are already the same.'.format(english_file_name, other_file_name)) | ||
else: | ||
current_map = {} | ||
parser = lxml.XMLParser(remove_blank_text=True) | ||
other_tree = lxml.parse(other_file_name, parser) | ||
other_root = other_tree.getroot() | ||
for message in other_root: | ||
if message.tag is lxml.Comment: | ||
other_root.remove(message) | ||
else: | ||
key = message.get('key') | ||
current_map[key] = message | ||
other_root.remove(message) | ||
english_tree = lxml.parse(english_file_name) | ||
english_root = english_tree.getroot() | ||
for message in english_root: | ||
if (message.tag != lxml.Comment): | ||
for element in message.xpath('descendant-or-self::*'): | ||
element.tag = element.tag[element.tag.index('}')+1:] | ||
Comment on lines
+41
to
+42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do these lines do? Seems that you are looking for moreover
But I must say There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this is hacky, and, as your test runs indicate, not robust. Curly braces are used to wrap a namespace in tag names (e.g., " Ok, I can fix the joint xml construction, and maybe it's best to make sure it does not have a namespace (since messages_cs.xml and messages_sl.xml don't). And then these lines can and must be removed. |
||
key = message.get('key') | ||
if (key in other_keys): | ||
other_root.append(current_map[key]) | ||
else: | ||
message.tail = None | ||
message.set('TODO', 'translate') | ||
other_root.append(message) | ||
other_tree.write(other_file_name, encoding='UTF-8', pretty_print=True) | ||
print('\n{} has been updated to contain all and only the keys of {}.'.format(other_file_name, english_file_name)) | ||
print('') |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll be a bit nitpicky here, but do we need
lxml
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Aha, probably not. I'll try without.