diff --git a/src/corelib/time/qtimezoneprivate_data_p.h b/src/corelib/time/qtimezoneprivate_data_p.h index a5a9bbead3..50d1ed80c1 100644 --- a/src/corelib/time/qtimezoneprivate_data_p.h +++ b/src/corelib/time/qtimezoneprivate_data_p.h @@ -97,7 +97,8 @@ struct UtcData http://www.unicode.org/cldr/ - Do not edit this code: run cldr2qtimezone.py on updated (or + Do not edit this section: instead regenerate it using + cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or edited) CLDR data; see qtbase/util/locale_database/. */ diff --git a/util/locale_database/README b/util/locale_database/README index b910d36f2a..460f51993a 100644 --- a/util/locale_database/README +++ b/util/locale_database/README @@ -5,10 +5,8 @@ data (like date formats, country names etc). It is provided by the Unicode consortium. See cldr2qlocalexml.py for how to run it and qlocalexml2cpp.py to -update the locale data tables (principally text/qlocale_data_p.h and -time/q*calendar_data_p.h under src/corelib/). See enumdata.py for when -and how to update the data it provides. You shall definitely need to -pass --no-verify or -n to git commit for these changes. - -See cldr2qtimezone.py on how to update tables of Windows-specific -names for zones and UTC-offset zone names. +update the locale data tables (principally text/qlocale_data_p.h, +time/q*calendar_data_p.h and time/qtimezone*_data_p.h under +src/corelib/). See enumdata.py and zonedata.py for when and how to +update the data they provide. You shall definitely need to pass +--no-verify or -n to git commit for these changes. diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py index 2a4d891440..d99157965d 100644 --- a/util/locale_database/cldr.py +++ b/util/locale_database/cldr.py @@ -74,6 +74,77 @@ class CldrReader (object): # more out. pass # self.__wrapped(self.whitter, 'Skipping likelySubtags (for unknown codes): ', skips) + def zoneData(self): + """Locale-independent timezone data. + + Returns a triple (alias, defaults, winIds) in which: + * alias is a mapping from aliases for IANA zone IDs, that + have the form of IANA IDs, to actual current IANA IDs; in + particular, this maps each CLDR zone ID to its + corresponding IANA ID. + * defaults maps each Windows name for a zone to the IANA ID + to use for it by default (when no territory is specified, + or when no entry in winIds matches the given Windows name + and territory). + * winIds is a mapping {(winId, land): ianaList} from Windows + name and territory code to the space-joined list of IANA + IDs associated with the Windows name in the given + territory. + + and reports on any territories found in CLDR timezone data + that are not mentioned in enumdata.territory_map, on any + Windows IDs given in zonedata.windowsIdList that are no longer + covered by the CLDR data.""" + alias, ignored = self.root.bcp47Aliases() + defaults, winIds = self.root.readWindowsTimeZones(alias) + + from zonedata import windowsIdList + winUnused = set(n for n, o in windowsIdList).difference( + set(defaults).union(w for w, t, ids in winIds)) + if winUnused: + joined = "\n\t".join(winUnused) + self.whitter.write( + f'No Windows ID in\n\t{joined}\nis still in use.\n' + 'They could be removed at the next major version.\n') + + # Check for duplicate entries in winIds: + last = ('', '', '') + winDup = {} + for triple in sorted(winIds): + if triple[:2] == last[:2]: + try: + seq = winDup[triple[:2]] + except KeyError: + seq = winDup[triple[:2]] = [] + seq.append(triple[-1]) + if winDup: + joined = '\n\t'.join(f'{t}, {w}: ", ".join(ids)' + for (w, t), ids in winDup.items()) + self.whitter.write( + f'Duplicated (territory, Windows ID) entries:\n\t{joined}\n') + winIds = [trip for trip in winIds if trip[:2] not in winDup] + for (w, t), seq in winDup.items(): + ianalist = [] + for ids in seq: + for iana in ids.split(): + if iana not in ianaList: + ianaList.append(iana) + winIds.append((w, t, ' '.join(ianaList))) + + from enumdata import territory_map + unLand = set(t for w, t, ids in winIds).difference( + v[1] for k, v in territory_map.items()) + if unLand: + self.grumble.write( + 'Unknown territory codes in timezone data: ' + f'{", ".join(unLand)}\n' + 'Skipping Windows zone mappings for these territories\n') + winIds = [(w, t, ids) for w, t, ids in winIds if t not in unLand] + + # Convert list of triples to mapping: + winIds = {(w, t): ids for w, t, ids in winIds} + return alias, defaults, winIds + def readLocales(self, calendars = ('gregorian',)): return {(k.language_id, k.script_id, k.territory_id, k.variant_code): k for k in self.__allLocales(calendars)} @@ -458,9 +529,13 @@ enumdata.py (keeping the old name as an alias): return alias, naming - def readWindowsTimeZones(self, lookup, alias): # For use by cldr2qtimezone.py + def readWindowsTimeZones(self, alias): """Digest CLDR's MS-Win time-zone name mapping. + Single argument, alias, should be the first part of the pair + returned by a call to bcp47Aliases(); it shall be used to + transform CLDR IDs into IANA IDs. + MS-Win have their own eccentric names for time-zones. CLDR helpfully provides a translation to more orthodox names, albeit these are CLDR IDs - see bcp47Aliases() - rather than @@ -468,78 +543,48 @@ enumdata.py (keeping the old name as an alias): supplementalData/windowsZones/mapTimezones/mapZone nodes with attributes - territory -- using 001 (World) for 'default' + territory -- ISO code type -- space-joined sequence of CLDR IDs of zones other -- Windows name of these zones in the given territory - First argument, lookup, is a mapping from known MS-Win names - for timezones to a unique integer index (starting at 1). Second - argument, alias, should be the first part of the pair returned - by a call to bcp47Aliases(); it shall be used to transform - CLDR IDs into IANA IDs. + When 'territory' is '001', type is always just a single CLDR + zone ID. This is the default zone for the given Windows name. - For each mapZone node, its territory is mapped to a - QLocale::Territory enum with numeric value code e, its other - is mapped through lookup to obtain an MS-Win name index k and - its type is split on spacing and cleaned up as follows. Each - entry in type is mapped, via alias (if present in it) to get a - list of IANA IDs, omitting any later duplicates from earlier - entries; the result list of IANA IDs is joined with spaces - between to give a string s. + For each mapZone node, its type is split on spacing and + cleaned up as follows. Those entries that are keys of alias + are mapped thereby to their canonical IANA IDs; all others are + presumed to be canonical IANA IDs and left unchanged. Any + later duplicates of earlier entries are omitted. The result + list of IANA IDs is joined with single spaces between to give + a string s. - Returns a triple (version, defaults, windows) in which version - is the version of CLDR in use, defaults is a mapping {k: s} - and windows is a mapping {(k, e): b} in which b maps - 'windowsId' to the Windows name of the zone (the node's other - attribute), 'territoryCode' to e and 'ianaList' to s.""" + Returns a twople (defaults, windows) in which defaults is a + mapping, from Windows ID to IANA ID (derived from the mapZone + nodes with territory='001'), and windows is a list of triples + (Windows ID, territory code, IANA ID list) in which the first + two entries are the 'other' and 'territory' fields of a + mapZone element and the last is s, its cleaned-up list of IANA + IDs.""" + + defaults, windows = {}, [] zones = self.supplement('windowsZones.xml') - enum = self.__enumMap('territory') - badZones, unLands, defaults, windows = set(), set(), {}, {} - for name, attrs in zones.find('windowsZones/mapTimezones'): if name != 'mapZone': continue - wid, code = attrs['other'], attrs['territory'] - cldrs, ianas = attrs['type'].split(), [] - for cldr in cldrs: - if cldr in alias: - iana = alias[cldr] - if iana not in ianas: - ianas.append(iana) - else: - ianas.append(cldr) - data = dict(windowsId = wid, - territoryCode = code, - ianaList = ' '.join(ianas)) - - try: - key = lookup[wid] - except KeyError: - badZones.add(wid) - key = 0 - data['windowsKey'] = key + wid, code, ianas = attrs['other'], attrs['territory'], [] + for cldr in attrs['type'].split(): + iana = alias.get(cldr, cldr) + if iana not in ianas: + ianas.append(iana) if code == '001': - defaults[key] = data['ianaList'] + assert len(ianas) == 1, (wid, *ianas) + defaults[wid] = ianas[0] else: - try: - land, name = enum[code] - except KeyError: - unLands.append(code) - continue - data.update(territoryId = land, territory = name) - windows[key, land] = data + windows.append((wid, code, ' '.join(ianas))) - if unLands: - raise Error('Unknown territory codes, please add to enumdata.py: ' - + ', '.join(sorted(unLands))) - - if badZones: - raise Error('Unknown Windows IDs, please add to cldr2qtimezone.py: ' - + ', '.join(sorted(badZones))) - - return self.cldrVersion, defaults, windows + return defaults, windows @property def cldrVersion(self): diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py index d3aa88ec38..c2e2765af6 100755 --- a/util/locale_database/cldr2qlocalexml.py +++ b/util/locale_database/cldr2qlocalexml.py @@ -27,10 +27,8 @@ append new entries to enumdata.py's lists and update documentation in src/corelib/text/qlocale.qdoc, adding the new entries in alphabetic order. -While updating the locale data, check also for updates to MS-Win's -time zone names; see cldr2qtimezone.py for details. - -All the scripts mentioned support --help to tell you how to use them. +Both of the scripts mentioned support --help to tell you how to use +them. .. _CLDR: https://unicode.org/Public/cldr/ .. _github: https://github.com/unicode-org/cldr @@ -92,6 +90,7 @@ def main(argv, out, err): writer.version(reader.root.cldrVersion) writer.enumData(reader.root.englishNaming) writer.likelySubTags(reader.likelySubTags()) + writer.zoneData(*reader.zoneData()) # Locale-independent zone data. writer.locales(reader.readLocales(args.calendars), args.calendars) writer.close(err.write) diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py deleted file mode 100755 index 2a26f8e136..0000000000 --- a/util/locale_database/cldr2qtimezone.py +++ /dev/null @@ -1,226 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (C) 2021 The Qt Company Ltd. -# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 -"""Parse CLDR data for QTimeZone use with MS-Windows - -Script to parse the CLDR common/supplemental/windowsZones.xml file and -prepare its data for use in QTimeZone. See ``./cldr2qlocalexml.py`` for -where to get the CLDR data. Pass its root directory as first parameter -to this script. You can optionally pass the qtbase root directory as -second parameter; it defaults to the root of the checkout containing -this script. This script updates qtbase's -src/corelib/time/qtimezoneprivate_data_p.h with the new data. -""" - -import datetime -from pathlib import Path -import textwrap -import argparse - -from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root -from cldr import CldrAccess -# This script shall report any updates zonedata may need. -from zonedata import windowsIdList, utcIdList - -class ByteArrayData: - def __init__(self): - self.data = [] - self.hash = {} - - def append(self, s): - s = s + '\0' - if s in self.hash: - return self.hash[s] - - lst = unicode2hex(s) - index = len(self.data) - if index > 0xffff: - raise Error(f'Index ({index}) outside the uint16 range !') - self.hash[s] = index - self.data += lst - return index - - def write(self, out, name): - out(f'\nstatic constexpr char {name}[] = {{\n') - out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ') - # Will over-spill 100-col if some 4-digit hex show up, but none do (yet). - out('\n};\n') - -class ZoneIdWriter (SourceFileEditor): - # All the output goes into namespace QtTimeZoneCldr. - def write(self, version, alias, defaults, windowsIds): - self.__writeWarning(version) - windows, iana, aliased = self.__writeTables(self.writer.write, alias, defaults, windowsIds) - windows.write(self.writer.write, 'windowsIdData') - iana.write(self.writer.write, 'ianaIdData') - aliased.write(self.writer.write, 'aliasIdData') - - def __writeWarning(self, version): - self.writer.write(f""" -/* - This part of the file was generated on {datetime.date.today()} from the - Common Locale Data Repository v{version} - - http://www.unicode.org/cldr/ - - Do not edit this code: run cldr2qtimezone.py on updated (or - edited) CLDR data; see qtbase/util/locale_database/. -*/ - -""") - - @staticmethod - def __writeTables(out, alias, defaults, windowsIds): - aliasIdData = ByteArrayData() - ianaIdData, windowsIdData = ByteArrayData(), ByteArrayData() - - # Write IANA alias table - out('// Alias ID Index, Alias ID Index\n') - out('static constexpr AliasData aliasMappingTable[] = {\n') - for name, iana in sorted(alias.items()): - if name != iana: - out(' {{ {:6d},{:6d} }}, // {} -> {}\n'.format( - aliasIdData.append(name), - aliasIdData.append(iana), name, iana)) - out('};\n\n') - - # Write Windows/IANA table - out('// Windows ID Key, Territory Enum, IANA ID Index\n') - out('static constexpr ZoneData zoneDataTable[] = {\n') - # Sorted by (Windows ID Key, territory enum) - for index, data in sorted(windowsIds.items()): - out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format( - data['windowsKey'], data['territoryId'], - ianaIdData.append(data['ianaList']), - data['windowsId'], data['territory'])) - out('};\n\n') - - # Write Windows ID key table - out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n') - out('static constexpr WindowsData windowsDataTable[] = {\n') - # Sorted by Windows ID key; sorting case-insensitively by - # Windows ID must give the same order. - winIdNames = [x.lower() for x, y in windowsIdList] - assert all(x == y for x, y in zip(winIdNames, sorted(winIdNames))), \ - [(x, y) for x, y in zip(winIdNames, sorted(winIdNames)) if x != y] - for index, pair in enumerate(windowsIdList, 1): - out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format( - index, - windowsIdData.append(pair[0]), - ianaIdData.append(defaults[index]), - pair[1], pair[0])) - out('};\n\n') - - def offsetOf(utcName): - "Maps a UTC±HH:mm name to its offset in seconds" - assert utcName.startswith('UTC') - if len(utcName) == 3: - return 0 - assert utcName[3] in '+-', utcName - sign = -1 if utcName[3] == '-' else 1 - assert len(utcName) == 9 and utcName[6] == ':', utcName - hour, mins = int(utcName[4:6]), int(utcName[-2:]) - return sign * (hour * 60 + mins) * 60 - - offsetMap = {} - for name in utcIdList: - offset = offsetOf(name) - offsetMap[offset] = offsetMap.get(offset, ()) + (name,) - # Write UTC ID key table - out('// IANA ID Index, UTC Offset\n') - out('static constexpr UtcData utcDataTable[] = {\n') - for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop. - names = offsetMap[offset]; - out(' {{ {:6d},{:6d} }}, // {}\n'.format( - ianaIdData.append(' '.join(names)), offset, names[0])) - out('};\n') - - return windowsIdData, ianaIdData, aliasIdData - - -def main(out, err): - """Parses CLDR's data and updates Qt's representation of it. - - Takes sys.stdout, sys.stderr (or equivalents) as - arguments. Expects two command-line options: the root of the - unpacked CLDR data-file tree and the root of the qtbase module's - checkout. Updates QTimeZone's private data about Windows time-zone - IDs.""" - parser = argparse.ArgumentParser( - description="Update Qt's CLDR-derived timezone data.") - parser.add_argument('cldr_path', help='path to the root of the CLDR tree') - parser.add_argument('qtbase_path', - help='path to the root of the qtbase source tree', - nargs='?', default=qtbase_root) - - args = parser.parse_args() - - cldrPath = Path(args.cldr_path) - qtPath = Path(args.qtbase_path) - - if not qtPath.is_dir(): - parser.error(f"No such Qt directory: {qtPath}") - - if not cldrPath.is_dir(): - parser.error(f"No such CLDR directory: {cldrPath}") - - dataFilePath = qtPath.joinpath('src/corelib/time/qtimezoneprivate_data_p.h') - - if not dataFilePath.is_file(): - parser.error(f'No such file: {dataFilePath}') - - access = CldrAccess(cldrPath) - try: - alias, ignored = access.bcp47Aliases() - # TODO: ignored maps IANA IDs to an extra-long name of the zone - except IOError as e: - parser.error( - f'Failed to open common/bcp47/timezone.xml: {e}') - return 1 - except Error as e: - err.write('\n'.join(textwrap.wrap( - f'Failed to read bcp47/timezone.xml: {e}', - subsequent_indent=' ', width=80)) + '\n') - return 1 - - try: - version, defaults, winIds = access.readWindowsTimeZones( - {name: ind for ind, name in enumerate((k for k, v in windowsIdList), 1)}, - alias) - except IOError as e: - parser.error( - f'Failed to open common/supplemental/windowsZones.xml: {e}') - return 1 - except Error as e: - err.write('\n'.join(textwrap.wrap( - f'Failed to read windowsZones.xml: {e}', - subsequent_indent=' ', width=80)) + '\n') - return 1 - - # Offsets of the windows tables, that are whole numbers of minutes, in minutes: - winOff = set(m for m, s in (divmod(v, 60) for k, v in windowsIdList) if s == 0) - winUtc = set(f'UTC-{h:02}:{m:02}' - for h, m in (divmod(-o, 60) for o in winOff if o < 0)).union( - f'UTC+{h:02}:{m:02}' - for h, m in (divmod(o, 60) for o in winOff if o > 0)) - # All such offsets should be represented by entries in utcIdList: - newUtc = winUtc.difference(utcIdList) - if newUtc: - err.write(f'Please add {", ".join(newUtc)} to zonedata.utcIdList\n') - return 1 - - out.write('Input files parsed, now writing data\n') - - try: - with ZoneIdWriter(dataFilePath, qtPath) as writer: - writer.write(version, alias, defaults, winIds) - except Exception as e: - err.write(f'\nError while updating timezone data: {e}\n') - return 1 - - out.write(f'Data generation completed, please check the new file at {dataFilePath}\n') - return 0 - -if __name__ == '__main__': - import sys - sys.exit(main(sys.stdout, sys.stderr)) diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py index 8ac6eda433..c09d929916 100644 --- a/util/locale_database/qlocalexml.py +++ b/util/locale_database/qlocalexml.py @@ -97,6 +97,21 @@ class QLocaleXmlReader (object): yield (language, script, territory), locale + def aliasToIana(self): + kid = self.__firstChildText + for elt in self.__eachEltInGroup(self.root, 'zoneAliases', 'zoneAlias'): + yield kid(elt, 'alias'), kid(elt, 'iana') + + def msToIana(self): + kid = self.__firstChildText + for elt in self.__eachEltInGroup(self.root, 'windowsZone', 'msZoneIana'): + yield kid(elt, 'msid'), kid(elt, 'iana') + + def msLandIanas(self): + kid = self.__firstChildText + for elt in self.__eachEltInGroup(self.root, 'windowsZone', 'msLandZones'): + yield (kid(elt, 'msid'), kid(elt, 'territorycode'), kid(elt, 'ianaids')) + def languageIndices(self, locales): index = 0 for key, value in self.languages.items(): @@ -327,6 +342,33 @@ class QLocaleXmlWriter (object): self.__closeTag('likelySubtag') self.__closeTag('likelySubtags') + def zoneData(self, alias, defaults, windowsIds): + self.__openTag('zoneAliases') + # iana is a single IANA ID + # name has the same form, but has been made redundant + for name, iana in sorted(alias.items()): + self.__openTag('zoneAlias') + self.inTag('alias', name) + self.inTag('iana', iana) + self.__closeTag('zoneAlias') + self.__closeTag('zoneAliases') + + self.__openTag('windowsZone') + for (msid, code), ids in windowsIds.items(): + # ianaids is a space-joined sequence of IANA IDs + self.__openTag('msLandZones') + self.inTag('msid', msid) + self.inTag('territorycode', code) + self.inTag('ianaids', ids) + self.__closeTag('msLandZones') + + for winid, iana in defaults.items(): + self.__openTag('msZoneIana') + self.inTag('msid', winid) + self.inTag('iana', iana) + self.__closeTag('msZoneIana') + self.__closeTag('windowsZone') + def locales(self, locales, calendars): self.__openTag('localeList') self.__openTag('locale') diff --git a/util/locale_database/qlocalexml.rnc b/util/locale_database/qlocalexml.rnc index 818aa8f9c3..f8efe9204f 100644 --- a/util/locale_database/qlocalexml.rnc +++ b/util/locale_database/qlocalexml.rnc @@ -16,6 +16,8 @@ start = element localeDatabase { element scriptList { Script+ }, element territoryList { Territory+ }, element likelySubtags { LikelySubtag+ }, + element zoneAliases { ZoneAlias+ }, + element windowsZone { MsLandZones+, MsZoneIana+ }, element localeList { Locale+ } } @@ -39,6 +41,23 @@ LocaleTriplet = ( element territory { text } ) +# TODO: xsd patterns for IANA IDs and space-joined lists of them +ZoneAlias = element zoneAlias { + element alias { text }, + element iana { text } +} + +MsLandZones = element msLandZones { + element msid { text }, + element territorycode { text }, + element ianaids { text } +} + +MsZoneIana = element msZoneIana { + element msid { text }, + element iana { text } +} + WeekDay = ("sun" | "mon" | "tue" | "wed" | "thu" | "fri" | "sat") Digit = xsd:string { pattern = "\d" } Punctuation = xsd:string { pattern = "\p{P}" } diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py index b20e4fd155..2bf28b296c 100755 --- a/util/locale_database/qlocalexml2cpp.py +++ b/util/locale_database/qlocalexml2cpp.py @@ -22,6 +22,23 @@ from typing import Optional from qlocalexml import QLocaleXmlReader from localetools import * from iso639_3 import LanguageCodeData +from zonedata import utcIdList, windowsIdList + + +# Sanity check the zone data: + +# Offsets of the windows tables, in minutes, where whole numbers: +winOff = set(m for m, s in (divmod(v, 60) for k, v in windowsIdList) if s == 0) +# The UTC±HH:mm forms of the non-zero offsets: +winUtc = set(f'UTC-{h:02}:{m:02}' + for h, m in (divmod(-o, 60) for o in winOff if o < 0) + ).union(f'UTC+{h:02}:{m:02}' + for h, m in (divmod(o, 60) for o in winOff if o > 0)) +# All such offsets should be represented by entries in utcIdList: +newUtc = winUtc.difference(utcIdList) +assert not newUtc, ( + 'Please add missing UTC-offset zones to to zonedata.utcIdList', newUtc) + class LocaleKeySorter: """Sort-ordering representation of a locale key. @@ -47,6 +64,28 @@ class LocaleKeySorter: # TODO: should we compare territory before or after script ? return (key[0], self.foreign(key)) + key[1:] +class ByteArrayData: + def __init__(self): + self.data, self.hash = [], {} + + def append(self, s): + s += '\0' + if s in self.hash: + return self.hash[s] + + index = len(self.data) + if index > 0xffff: + raise Error(f'Index ({index}) outside the uint16 range !') + self.hash[s] = index + self.data += unicode2hex(s) + return index + + def write(self, out, name): + out(f'\nstatic constexpr char {name}[] = {{\n') + out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ') + # Will over-spill 100-col if some 4-digit hex show up, but none do (yet). + out('\n};\n') + class StringDataToken: def __init__(self, index, length, bits): if index > 0xffff: @@ -136,6 +175,92 @@ class LocaleSourceEditor (SourceFileEditor): """) +class TimeZoneDataWriter (LocaleSourceEditor): + def __init__(self, path: Path, temp: Path, version: str): + super().__init__(path, temp, version) + self.__ianaTable = ByteArrayData() # Single IANA IDs + self.__ianaListTable = ByteArrayData() # Space-joined lists of IDs + self.__windowsTable = ByteArrayData() # Windows names for zones + self.__windowsList = sorted(windowsIdList, + key=lambda p: p[0].lower()) + self.windowsKey = {name: (key, off) for key, (name, off) + in enumerate(self.__windowsList, 1)} + + def utcTable(self): + offsetMap, out = {}, self.writer.write + for name in utcIdList: + offset = self.__offsetOf(name) + offsetMap[offset] = offsetMap.get(offset, ()) + (name,) + + # Write UTC ID key table + out('// IANA ID Index, UTC Offset\n') + out('static constexpr UtcData utcDataTable[] = {\n') + for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop. + names = offsetMap[offset]; + joined = self.__ianaListTable.append(' '.join(names)) + out(f' {{ {joined:6d},{offset:6d} }}, // {names[0]}\n') + out('};\n') + + def aliasToIana(self, pairs): + out, store = self.writer.write, self.__ianaTable.append + + out('// Alias ID Index, Alias ID Index\n') + out('static constexpr AliasData aliasMappingTable[] = {\n') + for name, iana in pairs: # They're ready-sorted + if name != iana: + out(f' {{ {store(name):6d},{store(iana):6d} }},' + f' // {name} -> {iana}\n') + out('};\n\n') + + def msToIana(self, pairs): + out, winStore = self.writer.write, self.__windowsTable.append + ianaStore = self.__ianaListTable.append # TODO: Should be __ianaTable + alias = dict(pairs) # {MS name: IANA ID} + + out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n') + out('static constexpr WindowsData windowsDataTable[] = {\n') + # Sorted by Windows ID key: + + for index, (name, offset) in enumerate(self.__windowsList, 1): + out(f' {{ {index:6d},{winStore(name):6d},' + f'{ianaStore(alias[name]):6d},{offset:6d} }}, // {name}\n') + out('};\n\n') + + def msLandIanas(self, triples): # (MS name, territory code, IANA list) + out, store = self.writer.write, self.__ianaListTable.append + from enumdata import territory_map + landKey = {code: (i, name) for i, (name, code) in territory_map.items()} + seq = sorted((self.windowsKey[name][0], landKey[land][0], name, landKey[land][1], ianas) + for name, land, ianas in triples) + + out('// Windows ID Key, Territory Enum, IANA ID Index\n') + out('static constexpr ZoneData zoneDataTable[] = {\n') + # Sorted by (Windows ID Key, territory enum) + for winId, landId, name, land, ianas in seq: + out(f' {{ {winId:6d},{landId:6d},{store(ianas):6d} }},' + f' // {name} / {land}\n') + out('};\n\n') + + def writeTables(self): + self.__windowsTable.write(self.writer.write, 'windowsIdData') + # TODO: these are misnamed, entries in the first are lists, + # those in the next are single IANA IDs + self.__ianaListTable.write(self.writer.write, 'ianaIdData') + self.__ianaTable.write(self.writer.write, 'aliasIdData') + + # Implementation details: + @staticmethod + def __offsetOf(utcName): + "Maps a UTC±HH:mm name to its offset in seconds" + assert utcName.startswith('UTC') + if len(utcName) == 3: + return 0 + assert utcName[3] in '+-', utcName + sign = -1 if utcName[3] == '-' else 1 + assert len(utcName) == 9 and utcName[6] == ':', utcName + hour, mins = int(utcName[4:6]), int(utcName[-2:]) + return sign * (hour * 60 + mins) * 60 + class LocaleDataWriter (LocaleSourceEditor): def likelySubtags(self, likely): # First sort likely, so that we can use binary search in C++ @@ -623,6 +748,20 @@ def main(argv, out, err): err.write(f'\nError updating qlocale.h: {e}\n') return 1 + # Locale-independent timezone data + try: + with TimeZoneDataWriter(qtsrcdir.joinpath( + 'src/corelib/time/qtimezoneprivate_data_p.h'), + qtsrcdir, reader.cldrVersion) as writer: + writer.aliasToIana(reader.aliasToIana()) + writer.msLandIanas(reader.msLandIanas()) + writer.msToIana(reader.msToIana()) + writer.utcTable() + writer.writeTables() + except Exception as e: + err.write(f'\nError updating qtimezoneprivate_data_p.h: {e}\n') + return 1 + # ./testlocales/localemodel.cpp try: path = 'util/locale_database/testlocales/localemodel.cpp'