Get string operations working on 3ds

This commit is contained in:
Andrew Glaze
2025-12-07 12:45:22 -05:00
parent d943c2d074
commit 0acb7c74db
21 changed files with 30432 additions and 23 deletions

View File

@@ -0,0 +1,251 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "UnicodeData.h"
#include <stdint.h>
// Every 4 byte chunks of data that we need to hash (in this case only ever
// scalars and levels who are all uint32), we need to calculate K. At the end
// of this scramble sequence to get K, directly apply this to the current hash.
static inline __swift_uint32_t scramble(__swift_uint32_t scalar) {
scalar *= 0xCC9E2D51;
scalar = (scalar << 15) | (scalar >> 17);
scalar *= 0x1B873593;
return scalar;
}
// This is a reimplementation of MurMur3 hash with a modulo at the end.
static __swift_uint32_t hash(__swift_uint32_t scalar, __swift_uint32_t level,
__swift_uint32_t seed) {
__swift_uint32_t hash = seed;
hash ^= scramble(scalar);
hash = (hash << 13) | (hash >> 19);
hash = hash * 5 + 0xE6546B64;
hash ^= scramble(level);
hash = (hash << 13) | (hash >> 19);
hash = hash * 5 + 0xE6546B64;
hash ^= 8;
hash ^= hash >> 16;
hash *= 0x85EBCA6B;
hash ^= hash >> 13;
hash *= 0xC2B2AE35;
hash ^= hash >> 16;
return hash % level;
}
// This implementation is based on the minimal perfect hashing strategy found
// here: https://arxiv.org/pdf/1702.03154.pdf
__swift_intptr_t _swift_stdlib_getMphIdx(__swift_uint32_t scalar,
__swift_intptr_t levels,
const __swift_uint64_t * const *keys,
const __swift_uint16_t * const *ranks,
const __swift_uint16_t * const sizes) {
__swift_intptr_t resultIdx = 0;
// Here, levels represent the numbers of bit arrays used for this hash table.
for (int i = 0; i != levels; i += 1) {
auto bitArray = keys[i];
// Get the specific bit that this scalar hashes to in the bit array.
auto idx = (__swift_uint64_t) hash(scalar, sizes[i], i);
auto word = bitArray[idx / 64];
auto mask = (__swift_uint64_t) 1 << (idx % 64);
// If our scalar's bit is turned on in the bit array, it means we no longer
// need to iterate the bit arrays to find where our scalar is located...
// its in this one.
if (word & mask) {
// Our initial rank corresponds to our current level and there are ranks
// within each bit array every 512 bits. Say our level (bit array)
// contains 16 uint64 integers to represent all of the required bits.
// There would be a total of 1024 bits, so our rankings for this level
// would contain two values for precomputed counted bits for both halves
// of this bit array (1024 / 512 = 2).
auto rank = ranks[i][idx / 512];
// Because ranks are provided every 512 bits (8 uint64s), we still need to
// count the bits of the uints64s before us in our 8 uint64 sequence. So
// for example, if we are bit 576, we are larger than 512, so there is a
// provided rank for the first 8 uint64s, however we're in the second
// 8 uint64 sequence and within said sequence we are the #2 uint64. This
// loop will count the bits set for the first uint64 and terminate.
for (int j = (idx / 64) & ~7; j != idx / 64; j += 1) {
rank += __builtin_popcountll(bitArray[j]);
}
// After counting the other bits set in the uint64s before, its time to
// count our word itself and the bits before us.
if (idx % 64 > 0) {
rank += __builtin_popcountll(word << (64 - (idx % 64)));
}
// Our result is the built up rank value from all of the provided ranks
// and the ones we've manually counted ourselves.
resultIdx = rank;
break;
}
}
return resultIdx;
}
// A scalar bit array is represented using a combination of quick look bit
// arrays and specific bit arrays expanding these quick look arrays. There's
// usually a few data structures accompanying these bit arrays like ranks, data
// indices, and an actual data array.
//
// The bit arrays are constructed to look somewhat like the following:
//
// [quickLookSize, {uint64 * quickLookSize}, {5 * uint64}, {5 * uint64},
// {5 * uint64}...]
//
// where the number of {5 * uint64} (a specific bit array) is equal to the
// number of bits turned on within the {uint64 * quickLookSize}. This can be
// easily calculated using the passed in ranks arrays who looks like the
// following:
//
// [{uint16 * quickLookSize}, {5 * uint16}, {5 * uint16}, {5 * uint16}...]
//
// which is the same exact scheme as the bit arrays. Ranks contain the number of
// previously turned on bits according their respectful {}. For instance, each
// chunk, {5 * uint16}, begins with 0x0 and continuously grows as the number of
// bits within the chunk turn on. An example sequence of this looks like:
// [0x0, 0x0, 0x30, 0x70, 0xB0] where the first uint64 obviously doesn't have a
// previous uint64 to look at, so its rank is 0. The second uint64's rank will
// be the number of bits turned on in the first uint64, which in this case is
// also 0. The third uint64's rank is 0x30 meaning there were 48 bits turned on
// from the first uint64 through the second uint64.
__swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
const __swift_uint64_t *bitArrays,
const __swift_uint16_t *ranks) {
// Chunk size indicates the number of scalars in a singular bit in our quick
// look arrays. Currently, a chunk consists of 272 scalars being represented
// in a bit. 0x110000 represents the maximum scalar value that Unicode will
// never go over (or at least promised to never go over), 0x10FFFF, plus 1.
// There are 64 bit arrays allocated for the quick look search and within
// each bit array is an allocated 64 bits (8 bytes). Assuming the whole quick
// search array is allocated and used, this would mean 512 bytes are used
// solely for these arrays.
auto chunkSize = 0x110000 / 64 / 64;
// Our base is the specific bit in the context of all of the bit arrays that
// holds our scalar. Considering there are 64 bit arrays of 64 bits, that
// would mean there are 64 * 64 = 4096 total bits to represent all scalars.
auto base = scalar / chunkSize;
// Index is our specific bit array that holds our bit.
auto idx = base / 64;
// Chunk bit is the specific bit within the bit array for our scalar.
auto chunkBit = base % 64;
// At the beginning our bit arrays is a number indicating the number of
// actually implemented quick look bit arrays. We do this to save a little bit
// of code size for bit arrays towards the end that usually contain no
// properties, thus their bit arrays are most likely 0 or null.
auto quickLookSize = bitArrays[0];
// If our chunk index is larger than the quick look indices, then it means
// our scalar appears in chunks who are all 0 and trailing.
if ((__swift_uint64_t) idx > quickLookSize - 1) {
return INTPTR_MAX;
}
// Our scalar actually exists in a quick look bit array that was implemented.
auto quickLook = bitArrays[idx + 1];
// If the quick look array has our chunk bit not set, that means all 272
// (chunkSize) of the scalars being represented have no property and ours is
// one of them.
if ((quickLook & ((__swift_uint64_t) 1 << chunkBit)) == 0) {
return INTPTR_MAX;
}
// Ok, our scalar failed the quick look check. Go lookup our scalar in the
// chunk specific bit array. Ranks keeps track of the previous bit array's
// number of non zero bits and is iterative.
//
// For example, [1, 3, 10] are bit arrays who have certain number of bits
// turned on. The generated ranks array would look like [0, 1, 3] because
// the first value, 1, does not have any previous bit array to look at so its
// number of ranks are 0. 3 on the other hand will see its rank value as 1
// because the previous value had 1 bit turned on. 10 will see 3 because it is
// seeing both 1 and 3's number of turned on bits (3 has 2 bits on and
// 1 + 2 = 3).
auto chunkRank = ranks[idx];
// If our specific bit within the chunk isn't the first bit, then count the
// number of bits turned on preceding our chunk bit.
if (chunkBit != 0) {
chunkRank += __builtin_popcountll(quickLook << (64 - chunkBit));
}
// Each bit that is turned on in the quick look arrays is given a bit array
// that consists of 5 64 bit integers (5 * 64 = 320 which is enough to house
// at least 272 specific bits dedicated to each scalar within a chunk). Our
// specific chunk's array is located at:
// 1 (quick look count)
// +
// quickLookSize (number of actually implemented quick look arrays)
// +
// chunkRank * 5 (where chunkRank is the total number of bits turned on
// before ours and each chunk is given 5 uint64s)
auto chunkBA = bitArrays + 1 + quickLookSize + (chunkRank * 5);
// Our overall bit represents the bit within 0 - 271 (272 total, our
// chunkSize) that houses our scalar.
auto scalarOverallBit = scalar - (base * chunkSize);
// And our specific bit here represents the bit that houses our scalar inside
// a specific uint64 in our overall bit array.
auto scalarSpecificBit = scalarOverallBit % 64;
// Our word here is the index into the chunk's bit array to grab the specific
// uint64 who houses a bit representing our scalar.
auto scalarWord = scalarOverallBit / 64;
auto chunkWord = chunkBA[scalarWord];
// If our scalar specifically is not turned on within our chunk's bit array,
// then we know for sure that our scalar does not inhibit this property.
if ((chunkWord & ((__swift_uint64_t) 1 << scalarSpecificBit)) == 0) {
return INTPTR_MAX;
}
// Otherwise, this scalar does have whatever property this scalar array is
// representing. Our ranks also holds bit information for a chunk's bit array,
// so each chunk is given 5 uint16 in our ranks to count its own bits.
auto scalarRank = ranks[quickLookSize + (chunkRank * 5) + scalarWord];
// Again, if our scalar isn't the first bit in its uint64, then count the
// proceeding number of bits turned on in our uint64.
if (scalarSpecificBit != 0) {
scalarRank += __builtin_popcountll(chunkWord << (64 - scalarSpecificBit));
}
// In our last uint64 in our bit array, there is an index into our data index
// array. Because we only need 272 bits for the scalars, any remaining bits
// can be used for essentially whatever. 5 * 64 bits = 320 bits and we only
// allocate 16 bits in the last uint64 for the remaining scalars
// (4 * 64 bits = 256 + 16 = 272 (chunkSize)) leaving us with 48 spare bits.
auto chunkDataIdx = chunkBA[4] >> 16;
// Finally, our index (or rather whatever value is stored in our spare bits)
// is simply the start of our chunk's index plus the specific rank for our
// scalar.
return chunkDataIdx + scalarRank;
}

View File

@@ -0,0 +1,44 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#ifndef SWIFT_STDLIB_UNICODEDATA_H
#define SWIFT_STDLIB_UNICODEDATA_H
#include "../include/shims/SwiftStdbool.h"
#include "../include/shims/SwiftStdint.h"
#include "../include/shims/Visibility.h"
#ifdef __cplusplus
extern "C" {
#endif
//===----------------------------------------------------------------------===//
// Utilities
//===----------------------------------------------------------------------===//
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_intptr_t _swift_stdlib_getMphIdx(__swift_uint32_t scalar,
__swift_intptr_t levels,
const __swift_uint64_t * const *keys,
const __swift_uint16_t * const *ranks,
const __swift_uint16_t * const sizes);
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
const __swift_uint64_t *bitArrays,
const __swift_uint16_t *ranks);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // SWIFT_STDLIB_SHIMS_UNICODEDATA_H

View File

@@ -0,0 +1,65 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "../include/GraphemeData.h"
#include "UnicodeData.h"
#include <stdint.h>
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar) {
auto index = 1; //0th element is a dummy element
while (index < GRAPHEME_BREAK_DATA_COUNT) {
auto entry = _swift_stdlib_graphemeBreakProperties[index];
// Shift the enum and range count out of the value.
auto lower = (entry << 11) >> 11;
// Shift the enum out first, then shift out the scalar value.
auto upper = lower + ((entry << 3) >> 24);
// Shift everything out.
auto enumValue = (__swift_uint8_t)(entry >> 29);
// Special case: extendedPictographic who used an extra bit for the range.
if (enumValue == 5) {
upper = lower + ((entry << 2) >> 23);
}
//If we want the left child of the current node in our virtual tree,
//that's at index * 2, if we want the right child it's at (index * 2) + 1
if (scalar < lower) {
index = 2 * index;
} else if (scalar <= upper) {
return enumValue;
} else {
index = 2 * index + 1;
}
}
// If we made it out here, then our scalar was not found in the grapheme
// array (this occurs when a scalar doesn't map to any grapheme break
// property). Return the max value here to indicate .any.
return 0xFF;
}
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_bool _swift_stdlib_isInCB_Consonant(__swift_uint32_t scalar) {
auto idx = _swift_stdlib_getScalarBitArrayIdx(scalar,
_swift_stdlib_InCB_Consonant,
_swift_stdlib_InCB_Consonant_ranks);
if (idx == INTPTR_MAX) {
return false;
}
return true;
}

View File

@@ -0,0 +1,113 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "../include/NormalizationData.h"
#include "UnicodeData.h"
#include <stdint.h>
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint16_t _swift_stdlib_getNormData(__swift_uint32_t scalar) {
// Fast Path: ASCII and some latiny scalars are very basic and have no
// normalization properties.
if (scalar < 0xC0) {
return 0;
}
auto dataIdx = _swift_stdlib_getScalarBitArrayIdx(scalar,
_swift_stdlib_normData,
_swift_stdlib_normData_ranks);
// If we don't have an index into the data indices, then this scalar has no
// normalization information.
if (dataIdx == INTPTR_MAX) {
return 0;
}
auto scalarDataIdx = _swift_stdlib_normData_data_indices[dataIdx];
return _swift_stdlib_normData_data[scalarDataIdx];
}
SWIFT_RUNTIME_STDLIB_INTERNAL
const __swift_uint8_t * const _swift_stdlib_nfd_decompositions = _swift_stdlib_nfd_decomp;
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint32_t _swift_stdlib_getDecompositionEntry(__swift_uint32_t scalar) {
auto levelCount = NFD_DECOMP_LEVEL_COUNT;
__swift_intptr_t decompIdx = _swift_stdlib_getMphIdx(scalar, levelCount,
_swift_stdlib_nfd_decomp_keys,
_swift_stdlib_nfd_decomp_ranks,
_swift_stdlib_nfd_decomp_sizes);
return _swift_stdlib_nfd_decomp_indices[decompIdx];
}
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint32_t _swift_stdlib_getComposition(__swift_uint32_t x,
__swift_uint32_t y) {
auto levelCount = NFC_COMP_LEVEL_COUNT;
__swift_intptr_t compIdx = _swift_stdlib_getMphIdx(y, levelCount,
_swift_stdlib_nfc_comp_keys,
_swift_stdlib_nfc_comp_ranks,
_swift_stdlib_nfc_comp_sizes);
auto array = _swift_stdlib_nfc_comp_indices[compIdx];
// Ensure that the first element in this array is equal to our y scalar.
auto realY = (array[0] << 11) >> 11;
if (y != realY) {
return UINT32_MAX;
}
auto count = array[0] >> 21;
__swift_uint32_t low = 1;
__swift_uint32_t high = count - 1;
while (high >= low) {
auto idx = low + (high - low) / 2;
auto entry = array[idx];
// Shift the range count out of the scalar.
auto lower = (entry << 15) >> 15;
bool isNegative = entry >> 31;
auto rangeCount = (entry << 1) >> 18;
if (isNegative) {
rangeCount = -rangeCount;
}
auto composed = lower + rangeCount;
if (x == lower) {
return composed;
}
if (x > lower) {
low = idx + 1;
continue;
}
if (x < lower) {
high = idx - 1;
continue;
}
}
// If we made it out here, then our scalar was not found in the composition
// array.
// Return the max here to indicate that we couldn't find one.
return UINT32_MAX;
}

View File

@@ -0,0 +1,514 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "../include/ScalarPropsData.h"
#include "../include/CaseData.h"
#include "../include/ScriptData.h"
#include "UnicodeData.h"
#include <stdint.h>
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint64_t _swift_stdlib_getBinaryProperties(__swift_uint32_t scalar) {
auto lowerBoundIndex = 0;
auto endIndex = BIN_PROPS_COUNT;
auto upperBoundIndex = endIndex - 1;
while (upperBoundIndex >= lowerBoundIndex) {
auto index = lowerBoundIndex + (upperBoundIndex - lowerBoundIndex) / 2;
auto entry = _swift_stdlib_scalar_binProps[index];
// Shift the ccc value out of the scalar.
auto lowerBoundScalar = (entry << 11) >> 11;
__swift_uint32_t upperBoundScalar = 0;
// If we're not at the end of the array, the range count is simply the
// distance to the next element.
if (index != endIndex - 1) {
auto nextEntry = _swift_stdlib_scalar_binProps[index + 1];
auto nextLower = (nextEntry << 11) >> 11;
upperBoundScalar = nextLower - 1;
} else {
// Otherwise, the range count is the distance to 0x10FFFF
upperBoundScalar = 0x10FFFF;
}
// Shift everything out.
auto dataIndex = entry >> 21;
if (scalar >= lowerBoundScalar && scalar <= upperBoundScalar) {
return _swift_stdlib_scalar_binProps_data[dataIndex];
}
if (scalar > upperBoundScalar) {
lowerBoundIndex = index + 1;
continue;
}
if (scalar < lowerBoundScalar) {
upperBoundIndex = index - 1;
continue;
}
}
// If we make it out of this loop, then it means the scalar was not found at
// all in the array. This should never happen because the array represents all
// scalars from 0x0 to 0x10FFFF, but if somehow this branch gets reached,
// return 0 to indicate no properties.
return 0;
}
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint8_t _swift_stdlib_getNumericType(__swift_uint32_t scalar) {
auto lowerBoundIndex = 0;
auto endIndex = NUMERIC_TYPE_COUNT;
auto upperBoundIndex = endIndex - 1;
while (upperBoundIndex >= lowerBoundIndex) {
auto idx = lowerBoundIndex + (upperBoundIndex - lowerBoundIndex) / 2;
auto entry = _swift_stdlib_numeric_type[idx];
auto lowerBoundScalar = (entry << 11) >> 11;
auto rangeCount = (entry << 3) >> 24;
auto upperBoundScalar = lowerBoundScalar + rangeCount;
auto numericType = (__swift_uint8_t)(entry >> 29);
if (scalar >= lowerBoundScalar && scalar <= upperBoundScalar) {
return numericType;
}
if (scalar > upperBoundScalar) {
lowerBoundIndex = idx + 1;
continue;
}
if (scalar < lowerBoundScalar) {
upperBoundIndex = idx - 1;
continue;
}
}
// If we made it out here, then our scalar was not found in the composition
// array.
// Return the max here to indicate that we couldn't find one.
return UINT8_MAX;
}
SWIFT_RUNTIME_STDLIB_INTERNAL
double _swift_stdlib_getNumericValue(__swift_uint32_t scalar) {
auto levelCount = NUMERIC_VALUES_LEVEL_COUNT;
__swift_intptr_t scalarIdx = _swift_stdlib_getMphIdx(scalar, levelCount,
_swift_stdlib_numeric_values_keys,
_swift_stdlib_numeric_values_ranks,
_swift_stdlib_numeric_values_sizes);
auto valueIdx = _swift_stdlib_numeric_values_indices[scalarIdx];
return _swift_stdlib_numeric_values[valueIdx];
}
SWIFT_RUNTIME_STDLIB_INTERNAL
const char *_swift_stdlib_getNameAlias(__swift_uint32_t scalar) {
auto dataIdx = _swift_stdlib_getScalarBitArrayIdx(scalar,
_swift_stdlib_nameAlias,
_swift_stdlib_nameAlias_ranks);
if (dataIdx == INTPTR_MAX) {
return nullptr;
}
return _swift_stdlib_nameAlias_data[dataIdx];
}
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_int32_t _swift_stdlib_getMapping(__swift_uint32_t scalar,
__swift_uint8_t mapping) {
auto dataIdx = _swift_stdlib_getScalarBitArrayIdx(scalar,
_swift_stdlib_mappings,
_swift_stdlib_mappings_ranks);
if (dataIdx == INTPTR_MAX) {
return 0;
}
auto mappings = _swift_stdlib_mappings_data_indices[dataIdx];
__swift_uint8_t mappingIdx;
switch (mapping) {
// Uppercase
case 0:
mappingIdx = mappings & 0xFF;
break;
// Lowercase
case 1:
mappingIdx = (mappings & 0xFF00) >> 8;
break;
// Titlecase
case 2:
mappingIdx = (mappings & 0xFF0000) >> 16;
break;
// Unknown mapping
default:
return 0;
}
if (mappingIdx == 0xFF) {
return 0;
}
return _swift_stdlib_mappings_data[mappingIdx];
}
SWIFT_RUNTIME_STDLIB_INTERNAL
const __swift_uint8_t *_swift_stdlib_getSpecialMapping(__swift_uint32_t scalar,
__swift_uint8_t mapping,
__swift_intptr_t *length) {
auto dataIdx = _swift_stdlib_getScalarBitArrayIdx(scalar,
_swift_stdlib_special_mappings,
_swift_stdlib_special_mappings_ranks);
if (dataIdx == INTPTR_MAX) {
return nullptr;
}
auto index = _swift_stdlib_special_mappings_data_indices[dataIdx];
auto uppercase = _swift_stdlib_special_mappings_data + index;
auto lowercase = uppercase + 1 + *uppercase;
auto titlecase = lowercase + 1 + *lowercase;
switch (mapping) {
// Uppercase
case 0:
*length = *uppercase;
return uppercase + 1;
// Lowercase
case 1:
*length = *lowercase;
return lowercase + 1;
// Titlecase
case 2:
*length = *titlecase;
return titlecase + 1;
// Unknown mapping.
default:
return nullptr;
}
}
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_intptr_t _swift_stdlib_getScalarName(__swift_uint32_t scalar,
__swift_uint8_t *buffer,
__swift_intptr_t capacity) {
auto setOffset = _swift_stdlib_names_scalar_sets[scalar >> 7];
if (setOffset == UINT16_MAX) {
return 0;
}
auto scalarIndex = (setOffset << 7) + (scalar & ((1 << 7) - 1));
auto scalarOffset = _swift_stdlib_names_scalars[scalarIndex];
// U+20 is the first scalar that Unicode defines a name for, so their offset
// will the only valid 0.
if (scalarOffset == 0 && scalar != 0x20) {
return 0;
}
__swift_uint32_t nextScalarOffset = 0;
if (scalarIndex != NAMES_SCALARS_MAX_INDEX) {
int i = 1;
// Look for the next scalar who has a name and their position in the names
// array. This tells us exactly how many bytes our name takes up.
while (nextScalarOffset == 0) {
nextScalarOffset = _swift_stdlib_names_scalars[scalarIndex + i];
i += 1;
}
} else {
// This is the last element in the array which represents the last scalar
// name that Unicode defines (excluding variation selectors).
nextScalarOffset = NAMES_LAST_SCALAR_OFFSET;
}
auto nameSize = nextScalarOffset - scalarOffset;
// The total number of initialized bytes in the name string.
int c = 0;
for (__swift_uint32_t i = 0; i < nameSize; i += 1) {
__swift_uint16_t wordIndex = (__swift_uint16_t) _swift_stdlib_names[
scalarOffset + i
];
// If our word index is 0xFF, then it means our word index is larger than a
// byte, so the next two bytes will compose the 16 bit index.
if (wordIndex == 0xFF) {
i += 1;
auto firstPart = _swift_stdlib_names[scalarOffset + i];
wordIndex = firstPart;
i += 1;
auto secondPart = _swift_stdlib_names[scalarOffset + i];
wordIndex |= secondPart << 8;
}
auto wordOffset = _swift_stdlib_word_indices[wordIndex];
auto word = _swift_stdlib_words + wordOffset;
// The last character in a word has the 7th bit set.
while (*word < 0x80) {
if (c >= capacity) {
return c;
}
buffer[c++] = *word++;
}
if (c >= capacity) {
return c;
}
buffer[c++] = *word & 0x7F;
if (c >= capacity) {
return c;
}
buffer[c++] = ' ';
}
// Remove the trailing space.
c -= 1;
// The return value is the number of initialized bytes.
return c;
}
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint16_t _swift_stdlib_getAge(__swift_uint32_t scalar) {
auto lowerBoundIndex = 0;
auto endIndex = AGE_COUNT;
auto upperBoundIndex = endIndex - 1;
while (upperBoundIndex >= lowerBoundIndex) {
auto idx = lowerBoundIndex + (upperBoundIndex - lowerBoundIndex) / 2;
auto entry = _swift_stdlib_ages[idx];
auto lowerBoundScalar = (entry << 43) >> 43;
auto rangeCount = entry >> 32;
auto upperBoundScalar = lowerBoundScalar + rangeCount;
auto ageIdx = (__swift_uint8_t)((entry << 32) >> 32 >> 21);
if (scalar >= lowerBoundScalar && scalar <= upperBoundScalar) {
return _swift_stdlib_ages_data[ageIdx];
}
if (scalar > upperBoundScalar) {
lowerBoundIndex = idx + 1;
continue;
}
if (scalar < lowerBoundScalar) {
upperBoundIndex = idx - 1;
continue;
}
}
// If we made it out here, then our scalar was not found in the composition
// array.
// Return the max here to indicate that we couldn't find one.
return UINT16_MAX;
}
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint8_t _swift_stdlib_getGeneralCategory(__swift_uint32_t scalar) {
auto lowerBoundIndex = 0;
auto endIndex = GENERAL_CATEGORY_COUNT;
auto upperBoundIndex = endIndex - 1;
while (upperBoundIndex >= lowerBoundIndex) {
auto idx = lowerBoundIndex + (upperBoundIndex - lowerBoundIndex) / 2;
auto entry = _swift_stdlib_generalCategory[idx];
auto lowerBoundScalar = (entry << 43) >> 43;
auto rangeCount = entry >> 32;
auto upperBoundScalar = lowerBoundScalar + rangeCount;
auto generalCategory = (__swift_uint8_t)((entry << 32) >> 32 >> 21);
if (scalar >= lowerBoundScalar && scalar <= upperBoundScalar) {
return generalCategory;
}
if (scalar > upperBoundScalar) {
lowerBoundIndex = idx + 1;
continue;
}
if (scalar < lowerBoundScalar) {
upperBoundIndex = idx - 1;
continue;
}
}
// If we made it out here, then our scalar was not found in the composition
// array.
// Return the max here to indicate that we couldn't find one.
return UINT8_MAX;
}
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint8_t _swift_stdlib_getScript(__swift_uint32_t scalar) {
auto lowerBoundIndex = 0;
auto endIndex = SCRIPTS_COUNT;
auto upperBoundIndex = endIndex - 1;
while (upperBoundIndex >= lowerBoundIndex) {
auto index = lowerBoundIndex + (upperBoundIndex - lowerBoundIndex) / 2;
auto entry = _swift_stdlib_scripts[index];
// Shift the enum value out of the scalar.
auto lowerBoundScalar = (entry << 11) >> 11;
__swift_uint32_t upperBoundScalar = 0;
// If we're not at the end of the array, the range count is simply the
// distance to the next element.
if (index != endIndex - 1) {
auto nextEntry = _swift_stdlib_scripts[index + 1];
auto nextLower = (nextEntry << 11) >> 11;
upperBoundScalar = nextLower - 1;
} else {
// Otherwise, the range count is the distance to 0x10FFFF
upperBoundScalar = 0x10FFFF;
}
// Shift the scalar out and get the enum value.
auto script = entry >> 21;
if (scalar >= lowerBoundScalar && scalar <= upperBoundScalar) {
return script;
}
if (scalar > upperBoundScalar) {
lowerBoundIndex = index + 1;
continue;
}
if (scalar < lowerBoundScalar) {
upperBoundIndex = index - 1;
continue;
}
}
// If we make it out of this loop, then it means the scalar was not found at
// all in the array. This should never happen because the array represents all
// scalars from 0x0 to 0x10FFFF, but if somehow this branch gets reached,
// return 255 to indicate a failure.
return UINT8_MAX;
}
SWIFT_RUNTIME_STDLIB_INTERNAL
const __swift_uint8_t *_swift_stdlib_getScriptExtensions(__swift_uint32_t scalar,
__swift_uint8_t *count) {
auto dataIdx = _swift_stdlib_getScalarBitArrayIdx(scalar,
_swift_stdlib_script_extensions,
_swift_stdlib_script_extensions_ranks);
// If we don't have an index into the data indices, then this scalar has no
// script extensions
if (dataIdx == INTPTR_MAX) {
return 0;
}
auto scalarDataIdx = _swift_stdlib_script_extensions_data_indices[dataIdx];
*count = scalarDataIdx >> 11;
return _swift_stdlib_script_extensions_data + (scalarDataIdx & 0x7FF);
}
SWIFT_RUNTIME_STDLIB_INTERNAL
void _swift_stdlib_getCaseMapping(__swift_uint32_t scalar,
__swift_uint32_t *buffer) {
auto mphIdx = _swift_stdlib_getMphIdx(scalar, CASE_FOLD_LEVEL_COUNT,
_swift_stdlib_case_keys,
_swift_stdlib_case_ranks,
_swift_stdlib_case_sizes);
auto caseValue = _swift_stdlib_case[mphIdx];
__swift_uint32_t hashedScalar = (caseValue << 43) >> 43;
// If our scalar is not the original one we hashed, then this scalar has no
// case mapping. It maps to itself.
if (scalar != hashedScalar) {
buffer[0] = scalar;
return;
}
// If the top bit is NOT set, then this scalar simply maps to another scalar.
// We have stored the distance to said scalar in this value.
if ((caseValue & ((__swift_uint64_t)(0x1) << 63)) == 0) {
auto distance = (__swift_int32_t)((caseValue << 1) >> 22);
auto mappedScalar = (__swift_uint32_t)((__swift_int32_t)(scalar) - distance);
buffer[0] = mappedScalar;
return;
}
// Our top bit WAS set which means this scalar maps to multiple scalars.
// Lookup our mapping in the full mph.
auto fullMphIdx = _swift_stdlib_getMphIdx(scalar, CASE_FULL_FOLD_LEVEL_COUNT,
_swift_stdlib_case_full_keys,
_swift_stdlib_case_full_ranks,
_swift_stdlib_case_full_sizes);
auto fullCaseValue = _swift_stdlib_case_full[fullMphIdx];
// Count is either 2 or 3.
auto count = fullCaseValue >> 62;
for (__swift_uint64_t i = 0; i != count; i += 1) {
auto distance = (__swift_int32_t)(fullCaseValue & 0xFFFF);
if ((fullCaseValue & 0x10000) != 0) {
distance = -distance;
}
fullCaseValue >>= 17;
auto mappedScalar = (__swift_uint32_t)((__swift_int32_t)(scalar) - distance);
buffer[i] = mappedScalar;
}
}

View File

@@ -0,0 +1,43 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2022 - 2023 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "../include/WordData.h"
#include "UnicodeData.h"
#include <stdint.h>
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint8_t _swift_stdlib_getWordBreakProperty(__swift_uint32_t scalar) {
auto index = 1; //0th element is a dummy element
while (index < WORD_BREAK_DATA_COUNT) {
auto entry = _swift_stdlib_words[index];
// Shift the range count out of the value.
auto lower = (entry << 11) >> 11;
// Shift the enum out first, then shift out the scalar value.
auto upper = lower + (entry >> 21) - 1;
//If we want the left child of the current node in our virtual tree,
//that's at index * 2, if we want the right child it's at (index * 2) + 1
if (scalar < lower) {
index = 2 * index;
} else if (scalar <= upper) {
return _swift_stdlib_words_data[index];
} else {
index = 2 * index + 1;
}
}
// If we made it out here, then our scalar was not found in the word
// array (this occurs when a scalar doesn't map to any word break
// property). Return the max value here to indicate .any.
return UINT8_MAX;
}

18
Shared/source/shims.c Normal file
View File

@@ -0,0 +1,18 @@
#include <stdlib.h>
#include <malloc.h>
#include <errno.h>
int posix_memalign(void **res, size_t align, size_t len) {
if (align < sizeof(void *)) return 22;
void *mem = memalign(align, len);
if (!mem) return errno;
*res = mem;
return 0;
}
int getentropy(void *buffer, size_t length) {
for (int i = 0; i < length; i++) {
((int*)buffer)[i] = rand();
}
return 0;
}