Skip to content

Commit

Permalink
Add DataFiles
Browse files Browse the repository at this point in the history
Provides a view into the data that is filtered by draft level
  • Loading branch information
movermeyer committed Apr 20, 2022
1 parent 690e496 commit ffa101b
Show file tree
Hide file tree
Showing 16 changed files with 335 additions and 37 deletions.
19 changes: 15 additions & 4 deletions lib/cldr/export.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@

module Cldr
module Export
autoload :Code, "cldr/export/code"
autoload :Data, "cldr/export/data"
autoload :Ruby, "cldr/export/ruby"
autoload :Yaml, "cldr/export/yaml"
autoload :Code, "cldr/export/code"
autoload :Data, "cldr/export/data"
autoload :DataFile, "cldr/export/data_file"
autoload :Ruby, "cldr/export/ruby"
autoload :Yaml, "cldr/export/yaml"

SHARED_COMPONENTS = [
:Aliases, :CountryCodes, :CurrencyDigitsAndRounding, :LikelySubtags,
Expand All @@ -35,9 +36,19 @@ def base_path=(base_path)
@@base_path = File.expand_path(base_path)
end

def minimum_draft_status
raise StandardError, "minimum_draft_status is not yet set." unless defined?(@@minimum_draft_status)
@@minimum_draft_status
end

def minimum_draft_status=(draft_status)
@@minimum_draft_status = draft_status
end

def export(options = {}, &block)
locales = options[:locales] || Data.locales
components = options[:components] || Data.components
self.minimum_draft_status = options[:minimum_draft_status] if options[:minimum_draft_status]
self.base_path = options[:target] if options[:target]

shared_components, locale_components = components.partition do |component|
Expand Down
27 changes: 7 additions & 20 deletions lib/cldr/export/data/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -68,26 +68,13 @@ def paths
private

def merge_paths(paths_to_merge)
# Some parts (`ldml`, `ldmlBCP47` amd `supplementalData`) of CLDR data require that you merge all the
# files with the same root element before doing lookups.
# Ref: https://www.unicode.org/reports/tr35/tr35.html#XML_Format
#
# The return of this method is a merged XML Nokogiri document.
# Note that it technically is no longer compliant with the CLDR `ldml.dtd`, since:
# * it has repeated elements
# * the <identity> elements no longer refer to the filename
#
# However, this is not an issue, since #select will find all of the matches from each of the repeated elements,
# and the <identity> elements are not important to us / make no sense when combined together.
return Nokogiri::XML("") if paths_to_merge.empty?

rest = paths_to_merge[1..paths_to_merge.size - 1]
rest.each_with_object(Nokogiri::XML(File.read(paths_to_merge.first))) do |path, result|
next_doc = Nokogiri::XML(File.read(path))

next_doc.root.children.each do |child|
result.root.add_child(child)
end
return Cldr::Export::DataFile.new(Nokogiri::XML("")) if paths_to_merge.empty?

first = Cldr::Export::DataFile.parse(File.read(paths_to_merge.first))
rest = paths_to_merge[1..]
rest.reduce(first) do |result, path|
parsed = Cldr::Export::DataFile.parse(File.read(path))
result.merge(parsed)
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/cldr/export/data/currency_digits_and_rounding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def initialize
super

path = "#{Cldr::Export::Data.dir}/supplemental/supplementalData.xml"
doc = File.open(path) { |file| Nokogiri::XML(file) }
doc = Cldr::Export::DataFile.parse(File.read(path))

doc.xpath("//currencyData/fractions/info").each do |node|
code = node.attr("iso4217")
Expand Down
2 changes: 1 addition & 1 deletion lib/cldr/export/data/metazones.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def initialize
super

path = "#{Cldr::Export::Data.dir}/supplemental/metaZones.xml"
doc = File.open(path) { |file| Nokogiri::XML(file) }
doc = Cldr::Export::DataFile.parse(File.read(path))
self[:timezones] = doc.xpath("//metaZones/metazoneInfo/timezone").each_with_object({}) do |node, result|
timezone = node.attr("type").to_sym
result[timezone] = metazone(node.xpath("usesMetazone"))
Expand Down
2 changes: 1 addition & 1 deletion lib/cldr/export/data/parent_locales.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def initialize
super

path = File.join(Cldr::Export::Data.dir, "supplemental", "supplementalData.xml")
doc = File.open(path) { |file| Nokogiri::XML(file) }
doc = Cldr::Export::DataFile.parse(File.read(path))

doc.xpath("//parentLocales/parentLocale").each do |node|
parent = Cldr::Export.to_i18n(node.attr("parent"))
Expand Down
4 changes: 1 addition & 3 deletions lib/cldr/export/data/plural_rules.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ def initialize(locale)

def sources
@sources ||= ["plurals", "ordinals"].each_with_object({}) do |source_name, ret|
ret[source_name] = ::Nokogiri::XML(
File.read("#{Cldr::Export::Data.dir}/supplemental/#{source_name}.xml")
)
ret[source_name] = Cldr::Export::DataFile.parse(File.read("#{Cldr::Export::Data.dir}/supplemental/#{source_name}.xml"))
end
end

Expand Down
6 changes: 1 addition & 5 deletions lib/cldr/export/data/plurals.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,7 @@ class Plurals < Hash

class << self
def rules
@@rules ||= Rules.parse(source)
end

def source
File.read("#{Cldr::Export::Data.dir}/supplemental/plurals.xml")
@@rules ||= Rules.parse(File.read("#{Cldr::Export::Data.dir}/supplemental/plurals.xml"))
end
end

Expand Down
2 changes: 1 addition & 1 deletion lib/cldr/export/data/plurals/rules.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class Plurals
class Rules < Array
class << self
def parse(xml)
doc = Nokogiri.XML(xml)
doc = Cldr::Export::DataFile.parse(xml)

rules = new
doc.xpath("//pluralRules").each do |node|
Expand Down
2 changes: 1 addition & 1 deletion lib/cldr/export/data/windows_zones.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def initialize
super

path = "#{Cldr::Export::Data.dir}/supplemental/windowsZones.xml"
doc = File.open(path) { |file| Nokogiri::XML(file) }
doc = Cldr::Export::DataFile.parse(File.read(path))
doc.xpath("//windowsZones/mapTimezones/mapZone").each_with_object(self) do |node, result|
zone = node.attr("other").to_s
territory = node.attr("territory")
Expand Down
83 changes: 83 additions & 0 deletions lib/cldr/export/data_file.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# frozen_string_literal: true

require "nokogiri"

module Cldr
module Export
class DataFile
class << self
def parse(string, minimum_draft_status: nil)
doc = Nokogiri::XML(string) do |config|
config.strict.noblanks
end
DataFile.new(doc, minimum_draft_status: minimum_draft_status)
end

def filter_by_draft(doc, minimum_draft_status)
doc.traverse do |child|
next unless child.text?

draft_status = child.parent.attribute("draft").nil? ? Cldr::DraftStatus::APPROVED : Cldr::DraftStatus.fetch(child.parent.attribute("draft"))
if draft_status < minimum_draft_status
ancestors = child.ancestors
child.remove
# Remove the ancestors that are now empty
ancestors.each do |ancestor|
ancestor.remove if ancestor.children.empty?
end
end
end
doc
end
end

attr_reader :doc, :minimum_draft_status

def initialize(doc, minimum_draft_status: nil)
@minimum_draft_status = minimum_draft_status || Cldr::Export.minimum_draft_status
@doc = Cldr::Export::DataFile.filter_by_draft(doc, @minimum_draft_status)
end

def traverse(&block)
@doc.traverse(&block)
end

def xpath(path)
@doc.xpath(path)
end

def /(*args)
@doc./(*args)
end

def locale
language = @doc.xpath("//ldml/identity/language").first&.attribute("type")&.value
territory = @doc.xpath("//ldml/identity/territory").first&.attribute("type")&.value
elements = [language, territory].compact
elements.empty? ? nil : elements.join("-").to_sym
end

def merge(other)
# Some parts (`ldml`, `ldmlBCP47` amd `supplementalData`) of CLDR data require that you merge all the
# files with the same root element before doing lookups.
# Ref: https://www.unicode.org/reports/tr35/tr35.html#XML_Format
#
# Note that it technically is no longer compliant with the CLDR `ldml.dtd`, since:
# * it has repeated elements
# * the <identity> elements no longer refer to the filename
#
# However, this is not an issue, since #xpath will find all of the matches from each of the repeated elements,
# and the <identity> elements are not important to us / make no sense when combined together.
raise StandardError, "Cannot merge data file with more permissive draft status" if other.minimum_draft_status < minimum_draft_status
raise StandardError, "Cannot merge data file from different locales" if other.locale != locale

result = @doc.dup
other.doc.root.children.each do |child|
result.root.add_child(child.dup)
end

Cldr::Export::DataFile.new(result, minimum_draft_status: minimum_draft_status)
end
end
end
end
Loading

0 comments on commit ffa101b

Please sign in to comment.