Integrate timezone data into the CLDR-via-QLocaleXml pipeline
Future work shall need the timezone alias data to be synchronized between the (expanded) locale-independent timezone data and the (coming) locale-dependent timezone data. The latter shall need to come via QLocaleXml, hence the former now needs to, too. This makes no change to the generated data, aside from changing the regeneration instructions for qtimezoneprivate_data_p.h, to use the same scripts as locale data, instead of cldr2qtimezone.py, which is now removed. Task-number: QTBUG-115158 Change-Id: I47ddd95f6af1855cbb1f601e9074c13f213cd61c Reviewed-by: Mate Barany <mate.barany@qt.io>bb10
parent
4e23dbb742
commit
9534341654
|
|
@ -97,7 +97,8 @@ struct UtcData
|
|||
|
||||
http://www.unicode.org/cldr/
|
||||
|
||||
Do not edit this code: run cldr2qtimezone.py on updated (or
|
||||
Do not edit this section: instead regenerate it using
|
||||
cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
|
||||
edited) CLDR data; see qtbase/util/locale_database/.
|
||||
*/
|
||||
|
||||
|
|
|
|||
|
|
@ -5,10 +5,8 @@ data (like date formats, country names etc). It is provided by the
|
|||
Unicode consortium.
|
||||
|
||||
See cldr2qlocalexml.py for how to run it and qlocalexml2cpp.py to
|
||||
update the locale data tables (principally text/qlocale_data_p.h and
|
||||
time/q*calendar_data_p.h under src/corelib/). See enumdata.py for when
|
||||
and how to update the data it provides. You shall definitely need to
|
||||
pass --no-verify or -n to git commit for these changes.
|
||||
|
||||
See cldr2qtimezone.py on how to update tables of Windows-specific
|
||||
names for zones and UTC-offset zone names.
|
||||
update the locale data tables (principally text/qlocale_data_p.h,
|
||||
time/q*calendar_data_p.h and time/qtimezone*_data_p.h under
|
||||
src/corelib/). See enumdata.py and zonedata.py for when and how to
|
||||
update the data they provide. You shall definitely need to pass
|
||||
--no-verify or -n to git commit for these changes.
|
||||
|
|
|
|||
|
|
@ -74,6 +74,77 @@ class CldrReader (object):
|
|||
# more out.
|
||||
pass # self.__wrapped(self.whitter, 'Skipping likelySubtags (for unknown codes): ', skips)
|
||||
|
||||
def zoneData(self):
|
||||
"""Locale-independent timezone data.
|
||||
|
||||
Returns a triple (alias, defaults, winIds) in which:
|
||||
* alias is a mapping from aliases for IANA zone IDs, that
|
||||
have the form of IANA IDs, to actual current IANA IDs; in
|
||||
particular, this maps each CLDR zone ID to its
|
||||
corresponding IANA ID.
|
||||
* defaults maps each Windows name for a zone to the IANA ID
|
||||
to use for it by default (when no territory is specified,
|
||||
or when no entry in winIds matches the given Windows name
|
||||
and territory).
|
||||
* winIds is a mapping {(winId, land): ianaList} from Windows
|
||||
name and territory code to the space-joined list of IANA
|
||||
IDs associated with the Windows name in the given
|
||||
territory.
|
||||
|
||||
and reports on any territories found in CLDR timezone data
|
||||
that are not mentioned in enumdata.territory_map, on any
|
||||
Windows IDs given in zonedata.windowsIdList that are no longer
|
||||
covered by the CLDR data."""
|
||||
alias, ignored = self.root.bcp47Aliases()
|
||||
defaults, winIds = self.root.readWindowsTimeZones(alias)
|
||||
|
||||
from zonedata import windowsIdList
|
||||
winUnused = set(n for n, o in windowsIdList).difference(
|
||||
set(defaults).union(w for w, t, ids in winIds))
|
||||
if winUnused:
|
||||
joined = "\n\t".join(winUnused)
|
||||
self.whitter.write(
|
||||
f'No Windows ID in\n\t{joined}\nis still in use.\n'
|
||||
'They could be removed at the next major version.\n')
|
||||
|
||||
# Check for duplicate entries in winIds:
|
||||
last = ('', '', '')
|
||||
winDup = {}
|
||||
for triple in sorted(winIds):
|
||||
if triple[:2] == last[:2]:
|
||||
try:
|
||||
seq = winDup[triple[:2]]
|
||||
except KeyError:
|
||||
seq = winDup[triple[:2]] = []
|
||||
seq.append(triple[-1])
|
||||
if winDup:
|
||||
joined = '\n\t'.join(f'{t}, {w}: ", ".join(ids)'
|
||||
for (w, t), ids in winDup.items())
|
||||
self.whitter.write(
|
||||
f'Duplicated (territory, Windows ID) entries:\n\t{joined}\n')
|
||||
winIds = [trip for trip in winIds if trip[:2] not in winDup]
|
||||
for (w, t), seq in winDup.items():
|
||||
ianalist = []
|
||||
for ids in seq:
|
||||
for iana in ids.split():
|
||||
if iana not in ianaList:
|
||||
ianaList.append(iana)
|
||||
winIds.append((w, t, ' '.join(ianaList)))
|
||||
|
||||
from enumdata import territory_map
|
||||
unLand = set(t for w, t, ids in winIds).difference(
|
||||
v[1] for k, v in territory_map.items())
|
||||
if unLand:
|
||||
self.grumble.write(
|
||||
'Unknown territory codes in timezone data: '
|
||||
f'{", ".join(unLand)}\n'
|
||||
'Skipping Windows zone mappings for these territories\n')
|
||||
winIds = [(w, t, ids) for w, t, ids in winIds if t not in unLand]
|
||||
|
||||
# Convert list of triples to mapping:
|
||||
winIds = {(w, t): ids for w, t, ids in winIds}
|
||||
return alias, defaults, winIds
|
||||
|
||||
def readLocales(self, calendars = ('gregorian',)):
|
||||
return {(k.language_id, k.script_id, k.territory_id, k.variant_code): k
|
||||
for k in self.__allLocales(calendars)}
|
||||
|
|
@ -458,9 +529,13 @@ enumdata.py (keeping the old name as an alias):
|
|||
|
||||
return alias, naming
|
||||
|
||||
def readWindowsTimeZones(self, lookup, alias): # For use by cldr2qtimezone.py
|
||||
def readWindowsTimeZones(self, alias):
|
||||
"""Digest CLDR's MS-Win time-zone name mapping.
|
||||
|
||||
Single argument, alias, should be the first part of the pair
|
||||
returned by a call to bcp47Aliases(); it shall be used to
|
||||
transform CLDR IDs into IANA IDs.
|
||||
|
||||
MS-Win have their own eccentric names for time-zones. CLDR
|
||||
helpfully provides a translation to more orthodox names,
|
||||
albeit these are CLDR IDs - see bcp47Aliases() - rather than
|
||||
|
|
@ -468,78 +543,48 @@ enumdata.py (keeping the old name as an alias):
|
|||
supplementalData/windowsZones/mapTimezones/mapZone nodes with
|
||||
attributes
|
||||
|
||||
territory -- using 001 (World) for 'default'
|
||||
territory -- ISO code
|
||||
type -- space-joined sequence of CLDR IDs of zones
|
||||
other -- Windows name of these zones in the given territory
|
||||
|
||||
First argument, lookup, is a mapping from known MS-Win names
|
||||
for timezones to a unique integer index (starting at 1). Second
|
||||
argument, alias, should be the first part of the pair returned
|
||||
by a call to bcp47Aliases(); it shall be used to transform
|
||||
CLDR IDs into IANA IDs.
|
||||
When 'territory' is '001', type is always just a single CLDR
|
||||
zone ID. This is the default zone for the given Windows name.
|
||||
|
||||
For each mapZone node, its territory is mapped to a
|
||||
QLocale::Territory enum with numeric value code e, its other
|
||||
is mapped through lookup to obtain an MS-Win name index k and
|
||||
its type is split on spacing and cleaned up as follows. Each
|
||||
entry in type is mapped, via alias (if present in it) to get a
|
||||
list of IANA IDs, omitting any later duplicates from earlier
|
||||
entries; the result list of IANA IDs is joined with spaces
|
||||
between to give a string s.
|
||||
For each mapZone node, its type is split on spacing and
|
||||
cleaned up as follows. Those entries that are keys of alias
|
||||
are mapped thereby to their canonical IANA IDs; all others are
|
||||
presumed to be canonical IANA IDs and left unchanged. Any
|
||||
later duplicates of earlier entries are omitted. The result
|
||||
list of IANA IDs is joined with single spaces between to give
|
||||
a string s.
|
||||
|
||||
Returns a triple (version, defaults, windows) in which version
|
||||
is the version of CLDR in use, defaults is a mapping {k: s}
|
||||
and windows is a mapping {(k, e): b} in which b maps
|
||||
'windowsId' to the Windows name of the zone (the node's other
|
||||
attribute), 'territoryCode' to e and 'ianaList' to s."""
|
||||
Returns a twople (defaults, windows) in which defaults is a
|
||||
mapping, from Windows ID to IANA ID (derived from the mapZone
|
||||
nodes with territory='001'), and windows is a list of triples
|
||||
(Windows ID, territory code, IANA ID list) in which the first
|
||||
two entries are the 'other' and 'territory' fields of a
|
||||
mapZone element and the last is s, its cleaned-up list of IANA
|
||||
IDs."""
|
||||
|
||||
defaults, windows = {}, []
|
||||
zones = self.supplement('windowsZones.xml')
|
||||
enum = self.__enumMap('territory')
|
||||
badZones, unLands, defaults, windows = set(), set(), {}, {}
|
||||
|
||||
for name, attrs in zones.find('windowsZones/mapTimezones'):
|
||||
if name != 'mapZone':
|
||||
continue
|
||||
|
||||
wid, code = attrs['other'], attrs['territory']
|
||||
cldrs, ianas = attrs['type'].split(), []
|
||||
for cldr in cldrs:
|
||||
if cldr in alias:
|
||||
iana = alias[cldr]
|
||||
if iana not in ianas:
|
||||
ianas.append(iana)
|
||||
else:
|
||||
ianas.append(cldr)
|
||||
data = dict(windowsId = wid,
|
||||
territoryCode = code,
|
||||
ianaList = ' '.join(ianas))
|
||||
|
||||
try:
|
||||
key = lookup[wid]
|
||||
except KeyError:
|
||||
badZones.add(wid)
|
||||
key = 0
|
||||
data['windowsKey'] = key
|
||||
wid, code, ianas = attrs['other'], attrs['territory'], []
|
||||
for cldr in attrs['type'].split():
|
||||
iana = alias.get(cldr, cldr)
|
||||
if iana not in ianas:
|
||||
ianas.append(iana)
|
||||
|
||||
if code == '001':
|
||||
defaults[key] = data['ianaList']
|
||||
assert len(ianas) == 1, (wid, *ianas)
|
||||
defaults[wid] = ianas[0]
|
||||
else:
|
||||
try:
|
||||
land, name = enum[code]
|
||||
except KeyError:
|
||||
unLands.append(code)
|
||||
continue
|
||||
data.update(territoryId = land, territory = name)
|
||||
windows[key, land] = data
|
||||
windows.append((wid, code, ' '.join(ianas)))
|
||||
|
||||
if unLands:
|
||||
raise Error('Unknown territory codes, please add to enumdata.py: '
|
||||
+ ', '.join(sorted(unLands)))
|
||||
|
||||
if badZones:
|
||||
raise Error('Unknown Windows IDs, please add to cldr2qtimezone.py: '
|
||||
+ ', '.join(sorted(badZones)))
|
||||
|
||||
return self.cldrVersion, defaults, windows
|
||||
return defaults, windows
|
||||
|
||||
@property
|
||||
def cldrVersion(self):
|
||||
|
|
|
|||
|
|
@ -27,10 +27,8 @@ append new entries to enumdata.py's lists and update documentation in
|
|||
src/corelib/text/qlocale.qdoc, adding the new entries in alphabetic
|
||||
order.
|
||||
|
||||
While updating the locale data, check also for updates to MS-Win's
|
||||
time zone names; see cldr2qtimezone.py for details.
|
||||
|
||||
All the scripts mentioned support --help to tell you how to use them.
|
||||
Both of the scripts mentioned support --help to tell you how to use
|
||||
them.
|
||||
|
||||
.. _CLDR: https://unicode.org/Public/cldr/
|
||||
.. _github: https://github.com/unicode-org/cldr
|
||||
|
|
@ -92,6 +90,7 @@ def main(argv, out, err):
|
|||
writer.version(reader.root.cldrVersion)
|
||||
writer.enumData(reader.root.englishNaming)
|
||||
writer.likelySubTags(reader.likelySubTags())
|
||||
writer.zoneData(*reader.zoneData()) # Locale-independent zone data.
|
||||
writer.locales(reader.readLocales(args.calendars), args.calendars)
|
||||
|
||||
writer.close(err.write)
|
||||
|
|
|
|||
|
|
@ -1,226 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2021 The Qt Company Ltd.
|
||||
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
"""Parse CLDR data for QTimeZone use with MS-Windows
|
||||
|
||||
Script to parse the CLDR common/supplemental/windowsZones.xml file and
|
||||
prepare its data for use in QTimeZone. See ``./cldr2qlocalexml.py`` for
|
||||
where to get the CLDR data. Pass its root directory as first parameter
|
||||
to this script. You can optionally pass the qtbase root directory as
|
||||
second parameter; it defaults to the root of the checkout containing
|
||||
this script. This script updates qtbase's
|
||||
src/corelib/time/qtimezoneprivate_data_p.h with the new data.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
import textwrap
|
||||
import argparse
|
||||
|
||||
from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root
|
||||
from cldr import CldrAccess
|
||||
# This script shall report any updates zonedata may need.
|
||||
from zonedata import windowsIdList, utcIdList
|
||||
|
||||
class ByteArrayData:
|
||||
def __init__(self):
|
||||
self.data = []
|
||||
self.hash = {}
|
||||
|
||||
def append(self, s):
|
||||
s = s + '\0'
|
||||
if s in self.hash:
|
||||
return self.hash[s]
|
||||
|
||||
lst = unicode2hex(s)
|
||||
index = len(self.data)
|
||||
if index > 0xffff:
|
||||
raise Error(f'Index ({index}) outside the uint16 range !')
|
||||
self.hash[s] = index
|
||||
self.data += lst
|
||||
return index
|
||||
|
||||
def write(self, out, name):
|
||||
out(f'\nstatic constexpr char {name}[] = {{\n')
|
||||
out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ')
|
||||
# Will over-spill 100-col if some 4-digit hex show up, but none do (yet).
|
||||
out('\n};\n')
|
||||
|
||||
class ZoneIdWriter (SourceFileEditor):
|
||||
# All the output goes into namespace QtTimeZoneCldr.
|
||||
def write(self, version, alias, defaults, windowsIds):
|
||||
self.__writeWarning(version)
|
||||
windows, iana, aliased = self.__writeTables(self.writer.write, alias, defaults, windowsIds)
|
||||
windows.write(self.writer.write, 'windowsIdData')
|
||||
iana.write(self.writer.write, 'ianaIdData')
|
||||
aliased.write(self.writer.write, 'aliasIdData')
|
||||
|
||||
def __writeWarning(self, version):
|
||||
self.writer.write(f"""
|
||||
/*
|
||||
This part of the file was generated on {datetime.date.today()} from the
|
||||
Common Locale Data Repository v{version}
|
||||
|
||||
http://www.unicode.org/cldr/
|
||||
|
||||
Do not edit this code: run cldr2qtimezone.py on updated (or
|
||||
edited) CLDR data; see qtbase/util/locale_database/.
|
||||
*/
|
||||
|
||||
""")
|
||||
|
||||
@staticmethod
|
||||
def __writeTables(out, alias, defaults, windowsIds):
|
||||
aliasIdData = ByteArrayData()
|
||||
ianaIdData, windowsIdData = ByteArrayData(), ByteArrayData()
|
||||
|
||||
# Write IANA alias table
|
||||
out('// Alias ID Index, Alias ID Index\n')
|
||||
out('static constexpr AliasData aliasMappingTable[] = {\n')
|
||||
for name, iana in sorted(alias.items()):
|
||||
if name != iana:
|
||||
out(' {{ {:6d},{:6d} }}, // {} -> {}\n'.format(
|
||||
aliasIdData.append(name),
|
||||
aliasIdData.append(iana), name, iana))
|
||||
out('};\n\n')
|
||||
|
||||
# Write Windows/IANA table
|
||||
out('// Windows ID Key, Territory Enum, IANA ID Index\n')
|
||||
out('static constexpr ZoneData zoneDataTable[] = {\n')
|
||||
# Sorted by (Windows ID Key, territory enum)
|
||||
for index, data in sorted(windowsIds.items()):
|
||||
out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format(
|
||||
data['windowsKey'], data['territoryId'],
|
||||
ianaIdData.append(data['ianaList']),
|
||||
data['windowsId'], data['territory']))
|
||||
out('};\n\n')
|
||||
|
||||
# Write Windows ID key table
|
||||
out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
|
||||
out('static constexpr WindowsData windowsDataTable[] = {\n')
|
||||
# Sorted by Windows ID key; sorting case-insensitively by
|
||||
# Windows ID must give the same order.
|
||||
winIdNames = [x.lower() for x, y in windowsIdList]
|
||||
assert all(x == y for x, y in zip(winIdNames, sorted(winIdNames))), \
|
||||
[(x, y) for x, y in zip(winIdNames, sorted(winIdNames)) if x != y]
|
||||
for index, pair in enumerate(windowsIdList, 1):
|
||||
out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format(
|
||||
index,
|
||||
windowsIdData.append(pair[0]),
|
||||
ianaIdData.append(defaults[index]),
|
||||
pair[1], pair[0]))
|
||||
out('};\n\n')
|
||||
|
||||
def offsetOf(utcName):
|
||||
"Maps a UTC±HH:mm name to its offset in seconds"
|
||||
assert utcName.startswith('UTC')
|
||||
if len(utcName) == 3:
|
||||
return 0
|
||||
assert utcName[3] in '+-', utcName
|
||||
sign = -1 if utcName[3] == '-' else 1
|
||||
assert len(utcName) == 9 and utcName[6] == ':', utcName
|
||||
hour, mins = int(utcName[4:6]), int(utcName[-2:])
|
||||
return sign * (hour * 60 + mins) * 60
|
||||
|
||||
offsetMap = {}
|
||||
for name in utcIdList:
|
||||
offset = offsetOf(name)
|
||||
offsetMap[offset] = offsetMap.get(offset, ()) + (name,)
|
||||
# Write UTC ID key table
|
||||
out('// IANA ID Index, UTC Offset\n')
|
||||
out('static constexpr UtcData utcDataTable[] = {\n')
|
||||
for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop.
|
||||
names = offsetMap[offset];
|
||||
out(' {{ {:6d},{:6d} }}, // {}\n'.format(
|
||||
ianaIdData.append(' '.join(names)), offset, names[0]))
|
||||
out('};\n')
|
||||
|
||||
return windowsIdData, ianaIdData, aliasIdData
|
||||
|
||||
|
||||
def main(out, err):
|
||||
"""Parses CLDR's data and updates Qt's representation of it.
|
||||
|
||||
Takes sys.stdout, sys.stderr (or equivalents) as
|
||||
arguments. Expects two command-line options: the root of the
|
||||
unpacked CLDR data-file tree and the root of the qtbase module's
|
||||
checkout. Updates QTimeZone's private data about Windows time-zone
|
||||
IDs."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Update Qt's CLDR-derived timezone data.")
|
||||
parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
|
||||
parser.add_argument('qtbase_path',
|
||||
help='path to the root of the qtbase source tree',
|
||||
nargs='?', default=qtbase_root)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
cldrPath = Path(args.cldr_path)
|
||||
qtPath = Path(args.qtbase_path)
|
||||
|
||||
if not qtPath.is_dir():
|
||||
parser.error(f"No such Qt directory: {qtPath}")
|
||||
|
||||
if not cldrPath.is_dir():
|
||||
parser.error(f"No such CLDR directory: {cldrPath}")
|
||||
|
||||
dataFilePath = qtPath.joinpath('src/corelib/time/qtimezoneprivate_data_p.h')
|
||||
|
||||
if not dataFilePath.is_file():
|
||||
parser.error(f'No such file: {dataFilePath}')
|
||||
|
||||
access = CldrAccess(cldrPath)
|
||||
try:
|
||||
alias, ignored = access.bcp47Aliases()
|
||||
# TODO: ignored maps IANA IDs to an extra-long name of the zone
|
||||
except IOError as e:
|
||||
parser.error(
|
||||
f'Failed to open common/bcp47/timezone.xml: {e}')
|
||||
return 1
|
||||
except Error as e:
|
||||
err.write('\n'.join(textwrap.wrap(
|
||||
f'Failed to read bcp47/timezone.xml: {e}',
|
||||
subsequent_indent=' ', width=80)) + '\n')
|
||||
return 1
|
||||
|
||||
try:
|
||||
version, defaults, winIds = access.readWindowsTimeZones(
|
||||
{name: ind for ind, name in enumerate((k for k, v in windowsIdList), 1)},
|
||||
alias)
|
||||
except IOError as e:
|
||||
parser.error(
|
||||
f'Failed to open common/supplemental/windowsZones.xml: {e}')
|
||||
return 1
|
||||
except Error as e:
|
||||
err.write('\n'.join(textwrap.wrap(
|
||||
f'Failed to read windowsZones.xml: {e}',
|
||||
subsequent_indent=' ', width=80)) + '\n')
|
||||
return 1
|
||||
|
||||
# Offsets of the windows tables, that are whole numbers of minutes, in minutes:
|
||||
winOff = set(m for m, s in (divmod(v, 60) for k, v in windowsIdList) if s == 0)
|
||||
winUtc = set(f'UTC-{h:02}:{m:02}'
|
||||
for h, m in (divmod(-o, 60) for o in winOff if o < 0)).union(
|
||||
f'UTC+{h:02}:{m:02}'
|
||||
for h, m in (divmod(o, 60) for o in winOff if o > 0))
|
||||
# All such offsets should be represented by entries in utcIdList:
|
||||
newUtc = winUtc.difference(utcIdList)
|
||||
if newUtc:
|
||||
err.write(f'Please add {", ".join(newUtc)} to zonedata.utcIdList\n')
|
||||
return 1
|
||||
|
||||
out.write('Input files parsed, now writing data\n')
|
||||
|
||||
try:
|
||||
with ZoneIdWriter(dataFilePath, qtPath) as writer:
|
||||
writer.write(version, alias, defaults, winIds)
|
||||
except Exception as e:
|
||||
err.write(f'\nError while updating timezone data: {e}\n')
|
||||
return 1
|
||||
|
||||
out.write(f'Data generation completed, please check the new file at {dataFilePath}\n')
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
sys.exit(main(sys.stdout, sys.stderr))
|
||||
|
|
@ -97,6 +97,21 @@ class QLocaleXmlReader (object):
|
|||
|
||||
yield (language, script, territory), locale
|
||||
|
||||
def aliasToIana(self):
|
||||
kid = self.__firstChildText
|
||||
for elt in self.__eachEltInGroup(self.root, 'zoneAliases', 'zoneAlias'):
|
||||
yield kid(elt, 'alias'), kid(elt, 'iana')
|
||||
|
||||
def msToIana(self):
|
||||
kid = self.__firstChildText
|
||||
for elt in self.__eachEltInGroup(self.root, 'windowsZone', 'msZoneIana'):
|
||||
yield kid(elt, 'msid'), kid(elt, 'iana')
|
||||
|
||||
def msLandIanas(self):
|
||||
kid = self.__firstChildText
|
||||
for elt in self.__eachEltInGroup(self.root, 'windowsZone', 'msLandZones'):
|
||||
yield (kid(elt, 'msid'), kid(elt, 'territorycode'), kid(elt, 'ianaids'))
|
||||
|
||||
def languageIndices(self, locales):
|
||||
index = 0
|
||||
for key, value in self.languages.items():
|
||||
|
|
@ -327,6 +342,33 @@ class QLocaleXmlWriter (object):
|
|||
self.__closeTag('likelySubtag')
|
||||
self.__closeTag('likelySubtags')
|
||||
|
||||
def zoneData(self, alias, defaults, windowsIds):
|
||||
self.__openTag('zoneAliases')
|
||||
# iana is a single IANA ID
|
||||
# name has the same form, but has been made redundant
|
||||
for name, iana in sorted(alias.items()):
|
||||
self.__openTag('zoneAlias')
|
||||
self.inTag('alias', name)
|
||||
self.inTag('iana', iana)
|
||||
self.__closeTag('zoneAlias')
|
||||
self.__closeTag('zoneAliases')
|
||||
|
||||
self.__openTag('windowsZone')
|
||||
for (msid, code), ids in windowsIds.items():
|
||||
# ianaids is a space-joined sequence of IANA IDs
|
||||
self.__openTag('msLandZones')
|
||||
self.inTag('msid', msid)
|
||||
self.inTag('territorycode', code)
|
||||
self.inTag('ianaids', ids)
|
||||
self.__closeTag('msLandZones')
|
||||
|
||||
for winid, iana in defaults.items():
|
||||
self.__openTag('msZoneIana')
|
||||
self.inTag('msid', winid)
|
||||
self.inTag('iana', iana)
|
||||
self.__closeTag('msZoneIana')
|
||||
self.__closeTag('windowsZone')
|
||||
|
||||
def locales(self, locales, calendars):
|
||||
self.__openTag('localeList')
|
||||
self.__openTag('locale')
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@ start = element localeDatabase {
|
|||
element scriptList { Script+ },
|
||||
element territoryList { Territory+ },
|
||||
element likelySubtags { LikelySubtag+ },
|
||||
element zoneAliases { ZoneAlias+ },
|
||||
element windowsZone { MsLandZones+, MsZoneIana+ },
|
||||
element localeList { Locale+ }
|
||||
}
|
||||
|
||||
|
|
@ -39,6 +41,23 @@ LocaleTriplet = (
|
|||
element territory { text }
|
||||
)
|
||||
|
||||
# TODO: xsd patterns for IANA IDs and space-joined lists of them
|
||||
ZoneAlias = element zoneAlias {
|
||||
element alias { text },
|
||||
element iana { text }
|
||||
}
|
||||
|
||||
MsLandZones = element msLandZones {
|
||||
element msid { text },
|
||||
element territorycode { text },
|
||||
element ianaids { text }
|
||||
}
|
||||
|
||||
MsZoneIana = element msZoneIana {
|
||||
element msid { text },
|
||||
element iana { text }
|
||||
}
|
||||
|
||||
WeekDay = ("sun" | "mon" | "tue" | "wed" | "thu" | "fri" | "sat")
|
||||
Digit = xsd:string { pattern = "\d" }
|
||||
Punctuation = xsd:string { pattern = "\p{P}" }
|
||||
|
|
|
|||
|
|
@ -22,6 +22,23 @@ from typing import Optional
|
|||
from qlocalexml import QLocaleXmlReader
|
||||
from localetools import *
|
||||
from iso639_3 import LanguageCodeData
|
||||
from zonedata import utcIdList, windowsIdList
|
||||
|
||||
|
||||
# Sanity check the zone data:
|
||||
|
||||
# Offsets of the windows tables, in minutes, where whole numbers:
|
||||
winOff = set(m for m, s in (divmod(v, 60) for k, v in windowsIdList) if s == 0)
|
||||
# The UTC±HH:mm forms of the non-zero offsets:
|
||||
winUtc = set(f'UTC-{h:02}:{m:02}'
|
||||
for h, m in (divmod(-o, 60) for o in winOff if o < 0)
|
||||
).union(f'UTC+{h:02}:{m:02}'
|
||||
for h, m in (divmod(o, 60) for o in winOff if o > 0))
|
||||
# All such offsets should be represented by entries in utcIdList:
|
||||
newUtc = winUtc.difference(utcIdList)
|
||||
assert not newUtc, (
|
||||
'Please add missing UTC-offset zones to to zonedata.utcIdList', newUtc)
|
||||
|
||||
|
||||
class LocaleKeySorter:
|
||||
"""Sort-ordering representation of a locale key.
|
||||
|
|
@ -47,6 +64,28 @@ class LocaleKeySorter:
|
|||
# TODO: should we compare territory before or after script ?
|
||||
return (key[0], self.foreign(key)) + key[1:]
|
||||
|
||||
class ByteArrayData:
|
||||
def __init__(self):
|
||||
self.data, self.hash = [], {}
|
||||
|
||||
def append(self, s):
|
||||
s += '\0'
|
||||
if s in self.hash:
|
||||
return self.hash[s]
|
||||
|
||||
index = len(self.data)
|
||||
if index > 0xffff:
|
||||
raise Error(f'Index ({index}) outside the uint16 range !')
|
||||
self.hash[s] = index
|
||||
self.data += unicode2hex(s)
|
||||
return index
|
||||
|
||||
def write(self, out, name):
|
||||
out(f'\nstatic constexpr char {name}[] = {{\n')
|
||||
out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ')
|
||||
# Will over-spill 100-col if some 4-digit hex show up, but none do (yet).
|
||||
out('\n};\n')
|
||||
|
||||
class StringDataToken:
|
||||
def __init__(self, index, length, bits):
|
||||
if index > 0xffff:
|
||||
|
|
@ -136,6 +175,92 @@ class LocaleSourceEditor (SourceFileEditor):
|
|||
|
||||
""")
|
||||
|
||||
class TimeZoneDataWriter (LocaleSourceEditor):
|
||||
def __init__(self, path: Path, temp: Path, version: str):
|
||||
super().__init__(path, temp, version)
|
||||
self.__ianaTable = ByteArrayData() # Single IANA IDs
|
||||
self.__ianaListTable = ByteArrayData() # Space-joined lists of IDs
|
||||
self.__windowsTable = ByteArrayData() # Windows names for zones
|
||||
self.__windowsList = sorted(windowsIdList,
|
||||
key=lambda p: p[0].lower())
|
||||
self.windowsKey = {name: (key, off) for key, (name, off)
|
||||
in enumerate(self.__windowsList, 1)}
|
||||
|
||||
def utcTable(self):
|
||||
offsetMap, out = {}, self.writer.write
|
||||
for name in utcIdList:
|
||||
offset = self.__offsetOf(name)
|
||||
offsetMap[offset] = offsetMap.get(offset, ()) + (name,)
|
||||
|
||||
# Write UTC ID key table
|
||||
out('// IANA ID Index, UTC Offset\n')
|
||||
out('static constexpr UtcData utcDataTable[] = {\n')
|
||||
for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop.
|
||||
names = offsetMap[offset];
|
||||
joined = self.__ianaListTable.append(' '.join(names))
|
||||
out(f' {{ {joined:6d},{offset:6d} }}, // {names[0]}\n')
|
||||
out('};\n')
|
||||
|
||||
def aliasToIana(self, pairs):
|
||||
out, store = self.writer.write, self.__ianaTable.append
|
||||
|
||||
out('// Alias ID Index, Alias ID Index\n')
|
||||
out('static constexpr AliasData aliasMappingTable[] = {\n')
|
||||
for name, iana in pairs: # They're ready-sorted
|
||||
if name != iana:
|
||||
out(f' {{ {store(name):6d},{store(iana):6d} }},'
|
||||
f' // {name} -> {iana}\n')
|
||||
out('};\n\n')
|
||||
|
||||
def msToIana(self, pairs):
|
||||
out, winStore = self.writer.write, self.__windowsTable.append
|
||||
ianaStore = self.__ianaListTable.append # TODO: Should be __ianaTable
|
||||
alias = dict(pairs) # {MS name: IANA ID}
|
||||
|
||||
out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
|
||||
out('static constexpr WindowsData windowsDataTable[] = {\n')
|
||||
# Sorted by Windows ID key:
|
||||
|
||||
for index, (name, offset) in enumerate(self.__windowsList, 1):
|
||||
out(f' {{ {index:6d},{winStore(name):6d},'
|
||||
f'{ianaStore(alias[name]):6d},{offset:6d} }}, // {name}\n')
|
||||
out('};\n\n')
|
||||
|
||||
def msLandIanas(self, triples): # (MS name, territory code, IANA list)
|
||||
out, store = self.writer.write, self.__ianaListTable.append
|
||||
from enumdata import territory_map
|
||||
landKey = {code: (i, name) for i, (name, code) in territory_map.items()}
|
||||
seq = sorted((self.windowsKey[name][0], landKey[land][0], name, landKey[land][1], ianas)
|
||||
for name, land, ianas in triples)
|
||||
|
||||
out('// Windows ID Key, Territory Enum, IANA ID Index\n')
|
||||
out('static constexpr ZoneData zoneDataTable[] = {\n')
|
||||
# Sorted by (Windows ID Key, territory enum)
|
||||
for winId, landId, name, land, ianas in seq:
|
||||
out(f' {{ {winId:6d},{landId:6d},{store(ianas):6d} }},'
|
||||
f' // {name} / {land}\n')
|
||||
out('};\n\n')
|
||||
|
||||
def writeTables(self):
|
||||
self.__windowsTable.write(self.writer.write, 'windowsIdData')
|
||||
# TODO: these are misnamed, entries in the first are lists,
|
||||
# those in the next are single IANA IDs
|
||||
self.__ianaListTable.write(self.writer.write, 'ianaIdData')
|
||||
self.__ianaTable.write(self.writer.write, 'aliasIdData')
|
||||
|
||||
# Implementation details:
|
||||
@staticmethod
|
||||
def __offsetOf(utcName):
|
||||
"Maps a UTC±HH:mm name to its offset in seconds"
|
||||
assert utcName.startswith('UTC')
|
||||
if len(utcName) == 3:
|
||||
return 0
|
||||
assert utcName[3] in '+-', utcName
|
||||
sign = -1 if utcName[3] == '-' else 1
|
||||
assert len(utcName) == 9 and utcName[6] == ':', utcName
|
||||
hour, mins = int(utcName[4:6]), int(utcName[-2:])
|
||||
return sign * (hour * 60 + mins) * 60
|
||||
|
||||
class LocaleDataWriter (LocaleSourceEditor):
|
||||
def likelySubtags(self, likely):
|
||||
# First sort likely, so that we can use binary search in C++
|
||||
|
|
@ -623,6 +748,20 @@ def main(argv, out, err):
|
|||
err.write(f'\nError updating qlocale.h: {e}\n')
|
||||
return 1
|
||||
|
||||
# Locale-independent timezone data
|
||||
try:
|
||||
with TimeZoneDataWriter(qtsrcdir.joinpath(
|
||||
'src/corelib/time/qtimezoneprivate_data_p.h'),
|
||||
qtsrcdir, reader.cldrVersion) as writer:
|
||||
writer.aliasToIana(reader.aliasToIana())
|
||||
writer.msLandIanas(reader.msLandIanas())
|
||||
writer.msToIana(reader.msToIana())
|
||||
writer.utcTable()
|
||||
writer.writeTables()
|
||||
except Exception as e:
|
||||
err.write(f'\nError updating qtimezoneprivate_data_p.h: {e}\n')
|
||||
return 1
|
||||
|
||||
# ./testlocales/localemodel.cpp
|
||||
try:
|
||||
path = 'util/locale_database/testlocales/localemodel.cpp'
|
||||
|
|
|
|||
Loading…
Reference in New Issue