Integrate timezone data into the CLDR-via-QLocaleXml pipeline

Future work shall need the timezone alias data to be synchronized
between the (expanded) locale-independent timezone data and the
(coming) locale-dependent timezone data. The latter shall need to come
via QLocaleXml, hence the former now needs to, too.

This makes no change to the generated data, aside from changing the
regeneration instructions for qtimezoneprivate_data_p.h, to use the
same scripts as locale data, instead of cldr2qtimezone.py, which is
now removed.

Task-number: QTBUG-115158
Change-Id: I47ddd95f6af1855cbb1f601e9074c13f213cd61c
Reviewed-by: Mate Barany <mate.barany@qt.io>
bb10
Edward Welbourne 2024-03-22 13:57:28 +01:00
parent 4e23dbb742
commit 9534341654
8 changed files with 314 additions and 297 deletions

View File

@ -97,7 +97,8 @@ struct UtcData
http://www.unicode.org/cldr/
Do not edit this code: run cldr2qtimezone.py on updated (or
Do not edit this section: instead regenerate it using
cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
edited) CLDR data; see qtbase/util/locale_database/.
*/

View File

@ -5,10 +5,8 @@ data (like date formats, country names etc). It is provided by the
Unicode consortium.
See cldr2qlocalexml.py for how to run it and qlocalexml2cpp.py to
update the locale data tables (principally text/qlocale_data_p.h and
time/q*calendar_data_p.h under src/corelib/). See enumdata.py for when
and how to update the data it provides. You shall definitely need to
pass --no-verify or -n to git commit for these changes.
See cldr2qtimezone.py on how to update tables of Windows-specific
names for zones and UTC-offset zone names.
update the locale data tables (principally text/qlocale_data_p.h,
time/q*calendar_data_p.h and time/qtimezone*_data_p.h under
src/corelib/). See enumdata.py and zonedata.py for when and how to
update the data they provide. You shall definitely need to pass
--no-verify or -n to git commit for these changes.

View File

@ -74,6 +74,77 @@ class CldrReader (object):
# more out.
pass # self.__wrapped(self.whitter, 'Skipping likelySubtags (for unknown codes): ', skips)
def zoneData(self):
"""Locale-independent timezone data.
Returns a triple (alias, defaults, winIds) in which:
* alias is a mapping from aliases for IANA zone IDs, that
have the form of IANA IDs, to actual current IANA IDs; in
particular, this maps each CLDR zone ID to its
corresponding IANA ID.
* defaults maps each Windows name for a zone to the IANA ID
to use for it by default (when no territory is specified,
or when no entry in winIds matches the given Windows name
and territory).
* winIds is a mapping {(winId, land): ianaList} from Windows
name and territory code to the space-joined list of IANA
IDs associated with the Windows name in the given
territory.
and reports on any territories found in CLDR timezone data
that are not mentioned in enumdata.territory_map, on any
Windows IDs given in zonedata.windowsIdList that are no longer
covered by the CLDR data."""
alias, ignored = self.root.bcp47Aliases()
defaults, winIds = self.root.readWindowsTimeZones(alias)
from zonedata import windowsIdList
winUnused = set(n for n, o in windowsIdList).difference(
set(defaults).union(w for w, t, ids in winIds))
if winUnused:
joined = "\n\t".join(winUnused)
self.whitter.write(
f'No Windows ID in\n\t{joined}\nis still in use.\n'
'They could be removed at the next major version.\n')
# Check for duplicate entries in winIds:
last = ('', '', '')
winDup = {}
for triple in sorted(winIds):
if triple[:2] == last[:2]:
try:
seq = winDup[triple[:2]]
except KeyError:
seq = winDup[triple[:2]] = []
seq.append(triple[-1])
if winDup:
joined = '\n\t'.join(f'{t}, {w}: ", ".join(ids)'
for (w, t), ids in winDup.items())
self.whitter.write(
f'Duplicated (territory, Windows ID) entries:\n\t{joined}\n')
winIds = [trip for trip in winIds if trip[:2] not in winDup]
for (w, t), seq in winDup.items():
ianalist = []
for ids in seq:
for iana in ids.split():
if iana not in ianaList:
ianaList.append(iana)
winIds.append((w, t, ' '.join(ianaList)))
from enumdata import territory_map
unLand = set(t for w, t, ids in winIds).difference(
v[1] for k, v in territory_map.items())
if unLand:
self.grumble.write(
'Unknown territory codes in timezone data: '
f'{", ".join(unLand)}\n'
'Skipping Windows zone mappings for these territories\n')
winIds = [(w, t, ids) for w, t, ids in winIds if t not in unLand]
# Convert list of triples to mapping:
winIds = {(w, t): ids for w, t, ids in winIds}
return alias, defaults, winIds
def readLocales(self, calendars = ('gregorian',)):
return {(k.language_id, k.script_id, k.territory_id, k.variant_code): k
for k in self.__allLocales(calendars)}
@ -458,9 +529,13 @@ enumdata.py (keeping the old name as an alias):
return alias, naming
def readWindowsTimeZones(self, lookup, alias): # For use by cldr2qtimezone.py
def readWindowsTimeZones(self, alias):
"""Digest CLDR's MS-Win time-zone name mapping.
Single argument, alias, should be the first part of the pair
returned by a call to bcp47Aliases(); it shall be used to
transform CLDR IDs into IANA IDs.
MS-Win have their own eccentric names for time-zones. CLDR
helpfully provides a translation to more orthodox names,
albeit these are CLDR IDs - see bcp47Aliases() - rather than
@ -468,78 +543,48 @@ enumdata.py (keeping the old name as an alias):
supplementalData/windowsZones/mapTimezones/mapZone nodes with
attributes
territory -- using 001 (World) for 'default'
territory -- ISO code
type -- space-joined sequence of CLDR IDs of zones
other -- Windows name of these zones in the given territory
First argument, lookup, is a mapping from known MS-Win names
for timezones to a unique integer index (starting at 1). Second
argument, alias, should be the first part of the pair returned
by a call to bcp47Aliases(); it shall be used to transform
CLDR IDs into IANA IDs.
When 'territory' is '001', type is always just a single CLDR
zone ID. This is the default zone for the given Windows name.
For each mapZone node, its territory is mapped to a
QLocale::Territory enum with numeric value code e, its other
is mapped through lookup to obtain an MS-Win name index k and
its type is split on spacing and cleaned up as follows. Each
entry in type is mapped, via alias (if present in it) to get a
list of IANA IDs, omitting any later duplicates from earlier
entries; the result list of IANA IDs is joined with spaces
between to give a string s.
For each mapZone node, its type is split on spacing and
cleaned up as follows. Those entries that are keys of alias
are mapped thereby to their canonical IANA IDs; all others are
presumed to be canonical IANA IDs and left unchanged. Any
later duplicates of earlier entries are omitted. The result
list of IANA IDs is joined with single spaces between to give
a string s.
Returns a triple (version, defaults, windows) in which version
is the version of CLDR in use, defaults is a mapping {k: s}
and windows is a mapping {(k, e): b} in which b maps
'windowsId' to the Windows name of the zone (the node's other
attribute), 'territoryCode' to e and 'ianaList' to s."""
Returns a twople (defaults, windows) in which defaults is a
mapping, from Windows ID to IANA ID (derived from the mapZone
nodes with territory='001'), and windows is a list of triples
(Windows ID, territory code, IANA ID list) in which the first
two entries are the 'other' and 'territory' fields of a
mapZone element and the last is s, its cleaned-up list of IANA
IDs."""
defaults, windows = {}, []
zones = self.supplement('windowsZones.xml')
enum = self.__enumMap('territory')
badZones, unLands, defaults, windows = set(), set(), {}, {}
for name, attrs in zones.find('windowsZones/mapTimezones'):
if name != 'mapZone':
continue
wid, code = attrs['other'], attrs['territory']
cldrs, ianas = attrs['type'].split(), []
for cldr in cldrs:
if cldr in alias:
iana = alias[cldr]
if iana not in ianas:
ianas.append(iana)
else:
ianas.append(cldr)
data = dict(windowsId = wid,
territoryCode = code,
ianaList = ' '.join(ianas))
try:
key = lookup[wid]
except KeyError:
badZones.add(wid)
key = 0
data['windowsKey'] = key
wid, code, ianas = attrs['other'], attrs['territory'], []
for cldr in attrs['type'].split():
iana = alias.get(cldr, cldr)
if iana not in ianas:
ianas.append(iana)
if code == '001':
defaults[key] = data['ianaList']
assert len(ianas) == 1, (wid, *ianas)
defaults[wid] = ianas[0]
else:
try:
land, name = enum[code]
except KeyError:
unLands.append(code)
continue
data.update(territoryId = land, territory = name)
windows[key, land] = data
windows.append((wid, code, ' '.join(ianas)))
if unLands:
raise Error('Unknown territory codes, please add to enumdata.py: '
+ ', '.join(sorted(unLands)))
if badZones:
raise Error('Unknown Windows IDs, please add to cldr2qtimezone.py: '
+ ', '.join(sorted(badZones)))
return self.cldrVersion, defaults, windows
return defaults, windows
@property
def cldrVersion(self):

View File

@ -27,10 +27,8 @@ append new entries to enumdata.py's lists and update documentation in
src/corelib/text/qlocale.qdoc, adding the new entries in alphabetic
order.
While updating the locale data, check also for updates to MS-Win's
time zone names; see cldr2qtimezone.py for details.
All the scripts mentioned support --help to tell you how to use them.
Both of the scripts mentioned support --help to tell you how to use
them.
.. _CLDR: https://unicode.org/Public/cldr/
.. _github: https://github.com/unicode-org/cldr
@ -92,6 +90,7 @@ def main(argv, out, err):
writer.version(reader.root.cldrVersion)
writer.enumData(reader.root.englishNaming)
writer.likelySubTags(reader.likelySubTags())
writer.zoneData(*reader.zoneData()) # Locale-independent zone data.
writer.locales(reader.readLocales(args.calendars), args.calendars)
writer.close(err.write)

View File

@ -1,226 +0,0 @@
#!/usr/bin/env python3
# Copyright (C) 2021 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Parse CLDR data for QTimeZone use with MS-Windows
Script to parse the CLDR common/supplemental/windowsZones.xml file and
prepare its data for use in QTimeZone. See ``./cldr2qlocalexml.py`` for
where to get the CLDR data. Pass its root directory as first parameter
to this script. You can optionally pass the qtbase root directory as
second parameter; it defaults to the root of the checkout containing
this script. This script updates qtbase's
src/corelib/time/qtimezoneprivate_data_p.h with the new data.
"""
import datetime
from pathlib import Path
import textwrap
import argparse
from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root
from cldr import CldrAccess
# This script shall report any updates zonedata may need.
from zonedata import windowsIdList, utcIdList
class ByteArrayData:
def __init__(self):
self.data = []
self.hash = {}
def append(self, s):
s = s + '\0'
if s in self.hash:
return self.hash[s]
lst = unicode2hex(s)
index = len(self.data)
if index > 0xffff:
raise Error(f'Index ({index}) outside the uint16 range !')
self.hash[s] = index
self.data += lst
return index
def write(self, out, name):
out(f'\nstatic constexpr char {name}[] = {{\n')
out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ')
# Will over-spill 100-col if some 4-digit hex show up, but none do (yet).
out('\n};\n')
class ZoneIdWriter (SourceFileEditor):
# All the output goes into namespace QtTimeZoneCldr.
def write(self, version, alias, defaults, windowsIds):
self.__writeWarning(version)
windows, iana, aliased = self.__writeTables(self.writer.write, alias, defaults, windowsIds)
windows.write(self.writer.write, 'windowsIdData')
iana.write(self.writer.write, 'ianaIdData')
aliased.write(self.writer.write, 'aliasIdData')
def __writeWarning(self, version):
self.writer.write(f"""
/*
This part of the file was generated on {datetime.date.today()} from the
Common Locale Data Repository v{version}
http://www.unicode.org/cldr/
Do not edit this code: run cldr2qtimezone.py on updated (or
edited) CLDR data; see qtbase/util/locale_database/.
*/
""")
@staticmethod
def __writeTables(out, alias, defaults, windowsIds):
aliasIdData = ByteArrayData()
ianaIdData, windowsIdData = ByteArrayData(), ByteArrayData()
# Write IANA alias table
out('// Alias ID Index, Alias ID Index\n')
out('static constexpr AliasData aliasMappingTable[] = {\n')
for name, iana in sorted(alias.items()):
if name != iana:
out(' {{ {:6d},{:6d} }}, // {} -> {}\n'.format(
aliasIdData.append(name),
aliasIdData.append(iana), name, iana))
out('};\n\n')
# Write Windows/IANA table
out('// Windows ID Key, Territory Enum, IANA ID Index\n')
out('static constexpr ZoneData zoneDataTable[] = {\n')
# Sorted by (Windows ID Key, territory enum)
for index, data in sorted(windowsIds.items()):
out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format(
data['windowsKey'], data['territoryId'],
ianaIdData.append(data['ianaList']),
data['windowsId'], data['territory']))
out('};\n\n')
# Write Windows ID key table
out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
out('static constexpr WindowsData windowsDataTable[] = {\n')
# Sorted by Windows ID key; sorting case-insensitively by
# Windows ID must give the same order.
winIdNames = [x.lower() for x, y in windowsIdList]
assert all(x == y for x, y in zip(winIdNames, sorted(winIdNames))), \
[(x, y) for x, y in zip(winIdNames, sorted(winIdNames)) if x != y]
for index, pair in enumerate(windowsIdList, 1):
out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format(
index,
windowsIdData.append(pair[0]),
ianaIdData.append(defaults[index]),
pair[1], pair[0]))
out('};\n\n')
def offsetOf(utcName):
"Maps a UTC±HH:mm name to its offset in seconds"
assert utcName.startswith('UTC')
if len(utcName) == 3:
return 0
assert utcName[3] in '+-', utcName
sign = -1 if utcName[3] == '-' else 1
assert len(utcName) == 9 and utcName[6] == ':', utcName
hour, mins = int(utcName[4:6]), int(utcName[-2:])
return sign * (hour * 60 + mins) * 60
offsetMap = {}
for name in utcIdList:
offset = offsetOf(name)
offsetMap[offset] = offsetMap.get(offset, ()) + (name,)
# Write UTC ID key table
out('// IANA ID Index, UTC Offset\n')
out('static constexpr UtcData utcDataTable[] = {\n')
for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop.
names = offsetMap[offset];
out(' {{ {:6d},{:6d} }}, // {}\n'.format(
ianaIdData.append(' '.join(names)), offset, names[0]))
out('};\n')
return windowsIdData, ianaIdData, aliasIdData
def main(out, err):
"""Parses CLDR's data and updates Qt's representation of it.
Takes sys.stdout, sys.stderr (or equivalents) as
arguments. Expects two command-line options: the root of the
unpacked CLDR data-file tree and the root of the qtbase module's
checkout. Updates QTimeZone's private data about Windows time-zone
IDs."""
parser = argparse.ArgumentParser(
description="Update Qt's CLDR-derived timezone data.")
parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
parser.add_argument('qtbase_path',
help='path to the root of the qtbase source tree',
nargs='?', default=qtbase_root)
args = parser.parse_args()
cldrPath = Path(args.cldr_path)
qtPath = Path(args.qtbase_path)
if not qtPath.is_dir():
parser.error(f"No such Qt directory: {qtPath}")
if not cldrPath.is_dir():
parser.error(f"No such CLDR directory: {cldrPath}")
dataFilePath = qtPath.joinpath('src/corelib/time/qtimezoneprivate_data_p.h')
if not dataFilePath.is_file():
parser.error(f'No such file: {dataFilePath}')
access = CldrAccess(cldrPath)
try:
alias, ignored = access.bcp47Aliases()
# TODO: ignored maps IANA IDs to an extra-long name of the zone
except IOError as e:
parser.error(
f'Failed to open common/bcp47/timezone.xml: {e}')
return 1
except Error as e:
err.write('\n'.join(textwrap.wrap(
f'Failed to read bcp47/timezone.xml: {e}',
subsequent_indent=' ', width=80)) + '\n')
return 1
try:
version, defaults, winIds = access.readWindowsTimeZones(
{name: ind for ind, name in enumerate((k for k, v in windowsIdList), 1)},
alias)
except IOError as e:
parser.error(
f'Failed to open common/supplemental/windowsZones.xml: {e}')
return 1
except Error as e:
err.write('\n'.join(textwrap.wrap(
f'Failed to read windowsZones.xml: {e}',
subsequent_indent=' ', width=80)) + '\n')
return 1
# Offsets of the windows tables, that are whole numbers of minutes, in minutes:
winOff = set(m for m, s in (divmod(v, 60) for k, v in windowsIdList) if s == 0)
winUtc = set(f'UTC-{h:02}:{m:02}'
for h, m in (divmod(-o, 60) for o in winOff if o < 0)).union(
f'UTC+{h:02}:{m:02}'
for h, m in (divmod(o, 60) for o in winOff if o > 0))
# All such offsets should be represented by entries in utcIdList:
newUtc = winUtc.difference(utcIdList)
if newUtc:
err.write(f'Please add {", ".join(newUtc)} to zonedata.utcIdList\n')
return 1
out.write('Input files parsed, now writing data\n')
try:
with ZoneIdWriter(dataFilePath, qtPath) as writer:
writer.write(version, alias, defaults, winIds)
except Exception as e:
err.write(f'\nError while updating timezone data: {e}\n')
return 1
out.write(f'Data generation completed, please check the new file at {dataFilePath}\n')
return 0
if __name__ == '__main__':
import sys
sys.exit(main(sys.stdout, sys.stderr))

View File

@ -97,6 +97,21 @@ class QLocaleXmlReader (object):
yield (language, script, territory), locale
def aliasToIana(self):
kid = self.__firstChildText
for elt in self.__eachEltInGroup(self.root, 'zoneAliases', 'zoneAlias'):
yield kid(elt, 'alias'), kid(elt, 'iana')
def msToIana(self):
kid = self.__firstChildText
for elt in self.__eachEltInGroup(self.root, 'windowsZone', 'msZoneIana'):
yield kid(elt, 'msid'), kid(elt, 'iana')
def msLandIanas(self):
kid = self.__firstChildText
for elt in self.__eachEltInGroup(self.root, 'windowsZone', 'msLandZones'):
yield (kid(elt, 'msid'), kid(elt, 'territorycode'), kid(elt, 'ianaids'))
def languageIndices(self, locales):
index = 0
for key, value in self.languages.items():
@ -327,6 +342,33 @@ class QLocaleXmlWriter (object):
self.__closeTag('likelySubtag')
self.__closeTag('likelySubtags')
def zoneData(self, alias, defaults, windowsIds):
self.__openTag('zoneAliases')
# iana is a single IANA ID
# name has the same form, but has been made redundant
for name, iana in sorted(alias.items()):
self.__openTag('zoneAlias')
self.inTag('alias', name)
self.inTag('iana', iana)
self.__closeTag('zoneAlias')
self.__closeTag('zoneAliases')
self.__openTag('windowsZone')
for (msid, code), ids in windowsIds.items():
# ianaids is a space-joined sequence of IANA IDs
self.__openTag('msLandZones')
self.inTag('msid', msid)
self.inTag('territorycode', code)
self.inTag('ianaids', ids)
self.__closeTag('msLandZones')
for winid, iana in defaults.items():
self.__openTag('msZoneIana')
self.inTag('msid', winid)
self.inTag('iana', iana)
self.__closeTag('msZoneIana')
self.__closeTag('windowsZone')
def locales(self, locales, calendars):
self.__openTag('localeList')
self.__openTag('locale')

View File

@ -16,6 +16,8 @@ start = element localeDatabase {
element scriptList { Script+ },
element territoryList { Territory+ },
element likelySubtags { LikelySubtag+ },
element zoneAliases { ZoneAlias+ },
element windowsZone { MsLandZones+, MsZoneIana+ },
element localeList { Locale+ }
}
@ -39,6 +41,23 @@ LocaleTriplet = (
element territory { text }
)
# TODO: xsd patterns for IANA IDs and space-joined lists of them
ZoneAlias = element zoneAlias {
element alias { text },
element iana { text }
}
MsLandZones = element msLandZones {
element msid { text },
element territorycode { text },
element ianaids { text }
}
MsZoneIana = element msZoneIana {
element msid { text },
element iana { text }
}
WeekDay = ("sun" | "mon" | "tue" | "wed" | "thu" | "fri" | "sat")
Digit = xsd:string { pattern = "\d" }
Punctuation = xsd:string { pattern = "\p{P}" }

View File

@ -22,6 +22,23 @@ from typing import Optional
from qlocalexml import QLocaleXmlReader
from localetools import *
from iso639_3 import LanguageCodeData
from zonedata import utcIdList, windowsIdList
# Sanity check the zone data:
# Offsets of the windows tables, in minutes, where whole numbers:
winOff = set(m for m, s in (divmod(v, 60) for k, v in windowsIdList) if s == 0)
# The UTC±HH:mm forms of the non-zero offsets:
winUtc = set(f'UTC-{h:02}:{m:02}'
for h, m in (divmod(-o, 60) for o in winOff if o < 0)
).union(f'UTC+{h:02}:{m:02}'
for h, m in (divmod(o, 60) for o in winOff if o > 0))
# All such offsets should be represented by entries in utcIdList:
newUtc = winUtc.difference(utcIdList)
assert not newUtc, (
'Please add missing UTC-offset zones to to zonedata.utcIdList', newUtc)
class LocaleKeySorter:
"""Sort-ordering representation of a locale key.
@ -47,6 +64,28 @@ class LocaleKeySorter:
# TODO: should we compare territory before or after script ?
return (key[0], self.foreign(key)) + key[1:]
class ByteArrayData:
def __init__(self):
self.data, self.hash = [], {}
def append(self, s):
s += '\0'
if s in self.hash:
return self.hash[s]
index = len(self.data)
if index > 0xffff:
raise Error(f'Index ({index}) outside the uint16 range !')
self.hash[s] = index
self.data += unicode2hex(s)
return index
def write(self, out, name):
out(f'\nstatic constexpr char {name}[] = {{\n')
out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ')
# Will over-spill 100-col if some 4-digit hex show up, but none do (yet).
out('\n};\n')
class StringDataToken:
def __init__(self, index, length, bits):
if index > 0xffff:
@ -136,6 +175,92 @@ class LocaleSourceEditor (SourceFileEditor):
""")
class TimeZoneDataWriter (LocaleSourceEditor):
def __init__(self, path: Path, temp: Path, version: str):
super().__init__(path, temp, version)
self.__ianaTable = ByteArrayData() # Single IANA IDs
self.__ianaListTable = ByteArrayData() # Space-joined lists of IDs
self.__windowsTable = ByteArrayData() # Windows names for zones
self.__windowsList = sorted(windowsIdList,
key=lambda p: p[0].lower())
self.windowsKey = {name: (key, off) for key, (name, off)
in enumerate(self.__windowsList, 1)}
def utcTable(self):
offsetMap, out = {}, self.writer.write
for name in utcIdList:
offset = self.__offsetOf(name)
offsetMap[offset] = offsetMap.get(offset, ()) + (name,)
# Write UTC ID key table
out('// IANA ID Index, UTC Offset\n')
out('static constexpr UtcData utcDataTable[] = {\n')
for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop.
names = offsetMap[offset];
joined = self.__ianaListTable.append(' '.join(names))
out(f' {{ {joined:6d},{offset:6d} }}, // {names[0]}\n')
out('};\n')
def aliasToIana(self, pairs):
out, store = self.writer.write, self.__ianaTable.append
out('// Alias ID Index, Alias ID Index\n')
out('static constexpr AliasData aliasMappingTable[] = {\n')
for name, iana in pairs: # They're ready-sorted
if name != iana:
out(f' {{ {store(name):6d},{store(iana):6d} }},'
f' // {name} -> {iana}\n')
out('};\n\n')
def msToIana(self, pairs):
out, winStore = self.writer.write, self.__windowsTable.append
ianaStore = self.__ianaListTable.append # TODO: Should be __ianaTable
alias = dict(pairs) # {MS name: IANA ID}
out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
out('static constexpr WindowsData windowsDataTable[] = {\n')
# Sorted by Windows ID key:
for index, (name, offset) in enumerate(self.__windowsList, 1):
out(f' {{ {index:6d},{winStore(name):6d},'
f'{ianaStore(alias[name]):6d},{offset:6d} }}, // {name}\n')
out('};\n\n')
def msLandIanas(self, triples): # (MS name, territory code, IANA list)
out, store = self.writer.write, self.__ianaListTable.append
from enumdata import territory_map
landKey = {code: (i, name) for i, (name, code) in territory_map.items()}
seq = sorted((self.windowsKey[name][0], landKey[land][0], name, landKey[land][1], ianas)
for name, land, ianas in triples)
out('// Windows ID Key, Territory Enum, IANA ID Index\n')
out('static constexpr ZoneData zoneDataTable[] = {\n')
# Sorted by (Windows ID Key, territory enum)
for winId, landId, name, land, ianas in seq:
out(f' {{ {winId:6d},{landId:6d},{store(ianas):6d} }},'
f' // {name} / {land}\n')
out('};\n\n')
def writeTables(self):
self.__windowsTable.write(self.writer.write, 'windowsIdData')
# TODO: these are misnamed, entries in the first are lists,
# those in the next are single IANA IDs
self.__ianaListTable.write(self.writer.write, 'ianaIdData')
self.__ianaTable.write(self.writer.write, 'aliasIdData')
# Implementation details:
@staticmethod
def __offsetOf(utcName):
"Maps a UTC±HH:mm name to its offset in seconds"
assert utcName.startswith('UTC')
if len(utcName) == 3:
return 0
assert utcName[3] in '+-', utcName
sign = -1 if utcName[3] == '-' else 1
assert len(utcName) == 9 and utcName[6] == ':', utcName
hour, mins = int(utcName[4:6]), int(utcName[-2:])
return sign * (hour * 60 + mins) * 60
class LocaleDataWriter (LocaleSourceEditor):
def likelySubtags(self, likely):
# First sort likely, so that we can use binary search in C++
@ -623,6 +748,20 @@ def main(argv, out, err):
err.write(f'\nError updating qlocale.h: {e}\n')
return 1
# Locale-independent timezone data
try:
with TimeZoneDataWriter(qtsrcdir.joinpath(
'src/corelib/time/qtimezoneprivate_data_p.h'),
qtsrcdir, reader.cldrVersion) as writer:
writer.aliasToIana(reader.aliasToIana())
writer.msLandIanas(reader.msLandIanas())
writer.msToIana(reader.msToIana())
writer.utcTable()
writer.writeTables()
except Exception as e:
err.write(f'\nError updating qtimezoneprivate_data_p.h: {e}\n')
return 1
# ./testlocales/localemodel.cpp
try:
path = 'util/locale_database/testlocales/localemodel.cpp'