commit 0ea9cff075677315e44a096b6e4d61a1252b95e9
parent 9ed87fdf96badfee0e8b2c7ec441254c4cb9c990
Author: Laslo Hunhold <dev@frign.de>
Date: Sat, 8 Jan 2022 15:45:39 +0100
gen/util: Add properties-handling and clean up old range-list-functions
As already announced we will generate separate data-tables for separate
properties. To make it all strict-aliasing-compliant, we have one
properties-struct (which currently only contains one entry
"break_property") that will however contain more entries, if needed.
Playing around with void-pointers quickly turns into undefined behaviour,
which is why it makes no sense to have separate definitions for each
property-type.
Each "user" only uses a certain subset of those fields in the struct.
Given the unused ones will be just zero, they will make no difference
in the compression.
To avoid code-duplication, the generation of break-property-tables
is handled by a single function, which is then called in the respective
generation tool.
Signed-off-by: Laslo Hunhold <dev@frign.de>
Diffstat:
M | gen/properties.c | | | 310 | +------------------------------------------------------------------------------ |
M | gen/util.c | | | 386 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------- |
M | gen/util.h | | | 28 | +++++++++++----------------- |
M | src/character.c | | | 3 | ++- |
4 files changed, 323 insertions(+), 404 deletions(-)
diff --git a/gen/properties.c b/gen/properties.c
@@ -12,22 +12,6 @@
#define FILE_EMOJI "data/emoji-data.txt"
#define FILE_GRAPHEME "data/GraphemeBreakProperty.txt"
-struct properties {
- uint_least8_t char_break_property;
-};
-
-struct property_spec {
- const char *enumname;
- const char *file;
- const char *ucdname;
-};
-
-struct property_payload {
- struct properties *prop;
- const struct property_spec *spec;
- uint_least8_t speclen;
-};
-
static const struct property_spec char_break_property[] = {
{
.enumname = "OTHER",
@@ -106,302 +90,14 @@ static const struct property_spec char_break_property[] = {
},
};
-static int
-break_property_callback(char *file, char **field, size_t nfields,
- char *comment, void *payload)
-{
- /* prop always has the length 0x110000 */
- struct property_payload *p = (struct property_payload *)payload;
- struct range r;
- uint_least8_t i;
- uint_least32_t cp;
-
- (void)comment;
-
- if (nfields < 2) {
- return 1;
- }
-
- for (i = 0; i < p->speclen; i++) {
- /* identify fitting file and identifier */
- if (p->spec[i].file &&
- !strcmp(p->spec[i].file, file) &&
- !strcmp(p->spec[i].ucdname, field[1])) {
- /* parse range in first field */
- if (range_parse(field[0], &r)) {
- return 1;
- }
-
- /* apply to all codepoints in the range */
- for (cp = r.lower; cp <= r.upper; cp++) {
- if (p->spec == char_break_property) {
- if (p->prop[cp].char_break_property != 0) {
- fprintf(stderr, "break_property_callback: "
- "Character break property overlap.\n");
- exit(1);
- }
- p->prop[cp].char_break_property = i;
- } else {
- fprintf(stderr, "break_property_callback: "
- "Unknown specification.\n");
- exit(1);
- }
- }
-
- break;
- }
- }
-
- return 0;
-}
-
-struct compressed_properties {
- size_t *offset;
- struct properties *data;
- size_t datalen;
-};
-
-static void
-compress_properties(const struct properties *prop,
- struct compressed_properties *comp)
-{
- uint_least32_t cp, i;
-
- /* initialization */
- if (!(comp->offset = malloc((size_t)0x110000 * sizeof(*(comp->offset))))) {
- fprintf(stderr, "malloc: %s\n", strerror(errno));
- exit(1);
- }
- comp->data = NULL;
- comp->datalen = 0;
-
- for (cp = 0; cp < 0x110000; cp++) {
- for (i = 0; i < comp->datalen; i++) {
- if (!memcmp(&(prop[cp]), &(comp->data[i]), sizeof(*prop))) {
- /* found a match! */
- comp->offset[cp] = i;
- break;
- }
- }
- if (i == comp->datalen) {
- /*
- * found no matching properties-struct, so
- * add current properties to data and add the
- * offset in the offset-table
- */
- if (!(comp->data = reallocarray(comp->data,
- ++(comp->datalen),
- sizeof(*(comp->data))))) {
- fprintf(stderr, "reallocarray: %s\n",
- strerror(errno));
- exit(1);
- }
- memcpy(&(comp->data[comp->datalen - 1]), &(prop[cp]),
- sizeof(*prop));
- comp->offset[cp] = comp->datalen - 1;
- }
- }
-}
-
-struct major_minor_properties {
- size_t *major;
- size_t *minor;
- size_t minorlen;
-};
-
-static double
-get_major_minor_properties(const struct compressed_properties *comp,
- struct major_minor_properties *mm)
-{
- size_t i, j, compression_count = 0;
-
- /*
- * we currently have an array comp->offset which maps the
- * codepoints 0..0x110000 to offsets into comp->data.
- * To improve cache-locality instead and allow a bit of
- * compressing, instead of directly mapping a codepoint
- * 0xAAAABB with comp->offset, we generate two arrays major
- * and minor such that
- * comp->offset(0xAAAABB) == minor[major[0xAAAA] + 0xBB]
- * This yields a major-array of length 2^16 and a minor array
- * of variable length depending on how many common subsequences
- * can be filtered out.
- */
-
- /* initialize */
- if (!(mm->major = malloc((size_t)0x1100 * sizeof(*(mm->major))))) {
- fprintf(stderr, "malloc: %s\n", strerror(errno));
- exit(1);
- }
- mm->minor = NULL;
- mm->minorlen = 0;
-
- printf("#include <stdint.h>\n\n");
-
- for (i = 0; i < (size_t)0x1100; i++) {
- /*
- * we now look at the cp-range (i << 8)..(i << 8 + 0xFF)
- * and check if its corresponding offset-data already
- * exists in minor (because then we just point there
- * and need less storage)
- */
- for (j = 0; j + 0xFF < mm->minorlen; j++) {
- if (!memcmp(&(comp->offset[i << 8]),
- &(mm->minor[j]),
- sizeof(*(comp->offset)) * 0x100)) {
- break;
- }
- }
- if (j + 0xFF < mm->minorlen) {
- /* found an index */
- compression_count++;
- mm->major[i] = j;
- } else {
- /*
- * add "new" sequence to minor and point to it
- * in major
- */
- mm->minorlen += 0x100;
- if (!(mm->minor = reallocarray(mm->minor,
- mm->minorlen,
- sizeof(*(mm->minor))))) {
- fprintf(stderr, "reallocarray: %s\n",
- strerror(errno));
- exit(1);
- }
- memcpy(&(mm->minor[mm->minorlen - 0x100]),
- &(comp->offset[i << 8]),
- sizeof(*(mm->minor)) * 0x100);
- mm->major[i] = mm->minorlen - 0x100;
- }
- }
-
- /* return compression ratio */
- return (double)compression_count / 0x1100 * 100;
-}
-
-static void
-print_lookup_table(char *name, size_t *data, size_t datalen)
-{
- char *type;
- size_t i, maxval;
-
- for (i = 0, maxval = 0; i < datalen; i++) {
- if (data[i] > maxval) {
- maxval = data[i];
- }
- }
-
- type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" :
- (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" :
- (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" :
- "uint_least64_t";
-
- printf("static const %s %s[] = {\n\t", type, name);
- for (i = 0; i < datalen; i++) {
- printf("%zu", data[i]);
- if (i + 1 == datalen) {
- printf("\n");
- } else if ((i + 1) % 8 != 0) {
- printf(", ");
- } else {
- printf(",\n\t");
- }
-
- }
- printf("};\n");
-}
-
-static uint_least8_t
-get_value(const void *payload, size_t offset)
-{
- return ((const struct properties *)payload)[offset].char_break_property;
-}
-
-static void
-print_derived_lookup_table(char *name, size_t *offset, size_t offsetlen,
- uint_least8_t (*get_value)(const void *, size_t),
- const void *payload)
-{
- size_t i;
-
- printf("static const uint_least8_t %s[] = {\n\t", name);
- for (i = 0; i < offsetlen; i++) {
- printf("%"PRIuLEAST8, get_value(payload, offset[i]));
- if (i + 1 == offsetlen) {
- printf("\n");
- } else if ((i + 1) % 8 != 0) {
- printf(", ");
- } else {
- printf(",\n\t");
- }
-
- }
- printf("};\n");
-}
-
-static void
-print_enum(const struct property_spec *spec, size_t speclen,
- const char *enumname, const char *enumprefix)
-{
- size_t i;
-
- printf("enum %s {\n", enumname);
- for (i = 0; i < speclen; i++) {
- printf("\t%s_%s,\n", enumprefix, spec[i].enumname);
- }
- printf("\tNUM_%sS,\n};\n\n", enumprefix);
-}
-
int
main(int argc, char *argv[])
{
- struct compressed_properties comp;
- struct major_minor_properties mm;
- struct property_payload payload;
- struct properties *prop;
-
(void)argc;
- /* allocate property buffer for all codepoints */
- if (!(prop = calloc(0x110000, sizeof(*prop)))) {
- fprintf(stderr, "calloc: %s\n", strerror(errno));
- exit(1);
- }
-
- /* extract properties */
- payload.prop = prop;
- payload.spec = char_break_property;
- payload.speclen = LEN(char_break_property);
-
- parse_file_with_callback(FILE_EMOJI, break_property_callback, &payload);
- parse_file_with_callback(FILE_GRAPHEME, break_property_callback, &payload);
-
- /*
- * deduplicate by generating an array of offsets into prop where
- * common data points at the same offset
- */
- compress_properties(prop, &comp);
-
- /* generate major-minor-offset-tables */
- fprintf(stderr, "%s: compression-ratio: %.2f%%\n", argv[0],
- get_major_minor_properties(&comp, &mm));
-
- /* print data */
- print_enum(char_break_property, LEN(char_break_property),
- "char_break_property", "CHAR_BREAK_PROP");
-
- print_lookup_table("major", mm.major, 0x1100);
- printf("\n");
- print_derived_lookup_table("minor", mm.minor, mm.minorlen, get_value,
- comp.data);
-
- /* free data */
- free(prop);
- free(comp.data);
- free(comp.offset);
- free(mm.major);
- free(mm.minor);
+ properties_generate_break_property(char_break_property,
+ LEN(char_break_property),
+ "char", argv[0]);
return 0;
}
diff --git a/gen/util.c b/gen/util.c
@@ -1,5 +1,7 @@
/* See LICENSE file for copyright and license details. */
+#include <ctype.h>
#include <errno.h>
+#include <inttypes.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
@@ -8,10 +10,28 @@
#include "util.h"
-struct property_list_payload
-{
- struct property *prop;
- size_t numprops;
+struct range {
+ uint_least32_t lower;
+ uint_least32_t upper;
+};
+
+struct properties_payload {
+ struct properties *prop;
+ const struct property_spec *spec;
+ uint_least8_t speclen;
+ int (*set_value)(struct properties_payload *, uint_least32_t, uint_least8_t);
+};
+
+struct properties_compressed {
+ size_t *offset;
+ struct properties *data;
+ size_t datalen;
+};
+
+struct properties_major_minor {
+ size_t *major;
+ size_t *minor;
+ size_t minorlen;
};
struct segment_test_payload
@@ -63,7 +83,7 @@ hextocp(const char *str, size_t len, uint_least32_t *cp)
return 0;
}
-int
+static int
range_parse(const char *str, struct range *range)
{
char *p;
@@ -85,28 +105,9 @@ range_parse(const char *str, struct range *range)
return 0;
}
-static void
-range_list_append(struct range **range, size_t *nranges, const struct range *new)
-{
- if (*nranges > 0 && (*range)[*nranges - 1].upper == new->lower) {
- /* we can merge with previous entry */
- (*range)[*nranges - 1].upper = new->upper;
- } else {
- /* need to append new entry */
- if ((*range = realloc(*range, (++(*nranges)) *
- sizeof(**range))) == NULL) {
- fprintf(stderr, "range_list_append: realloc: %s.\n",
- strerror(errno));
- exit(1);
- }
- (*range)[*nranges - 1].lower = new->lower;
- (*range)[*nranges - 1].upper = new->upper;
- }
-}
-
void
-parse_file_with_callback(char *fname, int (*callback)(char *, char **,
- size_t, char *, void *), void *payload)
+parse_file_with_callback(const char *fname, int (*callback)(const char *,
+ char **, size_t, char *, void *), void *payload)
{
FILE *fp;
char *line = NULL, **field = NULL, *comment;
@@ -197,12 +198,14 @@ parse_file_with_callback(char *fname, int (*callback)(char *, char **,
}
static int
-property_list_callback(char *fname, char **field, size_t nfields,
- char *comment, void *payload)
+properties_callback(const char *file, char **field, size_t nfields,
+ char *comment, void *payload)
{
- struct property *prop = ((struct property_list_payload *)payload)->prop;
+ /* prop always has the length 0x110000 */
+ struct properties_payload *p = (struct properties_payload *)payload;
struct range r;
- size_t i, numprops = ((struct property_list_payload *)payload)->numprops;
+ uint_least8_t i;
+ uint_least32_t cp;
(void)comment;
@@ -210,14 +213,22 @@ property_list_callback(char *fname, char **field, size_t nfields,
return 1;
}
- for (i = 0; i < numprops; i++) {
- if (!strcmp(field[1], prop[i].identifier) &&
- !strcmp(fname, prop[i].fname)) {
+ for (i = 0; i < p->speclen; i++) {
+ /* identify fitting file and identifier */
+ if (p->spec[i].file &&
+ !strcmp(p->spec[i].file, file) &&
+ !strcmp(p->spec[i].ucdname, field[1])) {
+ /* parse range in first field */
if (range_parse(field[0], &r)) {
return 1;
}
- range_list_append(&(prop[i].table),
- &(prop[i].tablelen), &r);
+
+ /* apply to all codepoints in the range */
+ for (cp = r.lower; cp <= r.upper; cp++) {
+ if (p->set_value(payload, cp, i)) {
+ exit(1);
+ }
+ }
break;
}
}
@@ -225,73 +236,290 @@ property_list_callback(char *fname, char **field, size_t nfields,
return 0;
}
-void
-property_list_parse(struct property *prop, size_t numprops)
+static void
+properties_compress(const struct properties *prop,
+ struct properties_compressed *comp)
{
- struct property_list_payload pl = {
- .prop = prop,
- .numprops = numprops
- };
- size_t i;
+ uint_least32_t cp, i;
- /* make sure to parse each file only once */
- for (i = 0; i < numprops; i++) {
- if (prop[i].tablelen > 0) {
- /* property's file was already parsed */
- continue;
+ /* initialization */
+ if (!(comp->offset = malloc((size_t)0x110000 * sizeof(*(comp->offset))))) {
+ fprintf(stderr, "malloc: %s\n", strerror(errno));
+ exit(1);
+ }
+ comp->data = NULL;
+ comp->datalen = 0;
+
+ for (cp = 0; cp < 0x110000; cp++) {
+ for (i = 0; i < comp->datalen; i++) {
+ if (!memcmp(&(prop[cp]), &(comp->data[i]), sizeof(*prop))) {
+ /* found a match! */
+ comp->offset[cp] = i;
+ break;
+ }
+ }
+ if (i == comp->datalen) {
+ /*
+ * found no matching properties-struct, so
+ * add current properties to data and add the
+ * offset in the offset-table
+ */
+ if (!(comp->data = reallocarray(comp->data,
+ ++(comp->datalen),
+ sizeof(*(comp->data))))) {
+ fprintf(stderr, "reallocarray: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ memcpy(&(comp->data[comp->datalen - 1]), &(prop[cp]),
+ sizeof(*prop));
+ comp->offset[cp] = comp->datalen - 1;
}
+ }
+}
- parse_file_with_callback(prop[i].fname,
- property_list_callback, &pl);
+static double
+properties_get_major_minor(const struct properties_compressed *comp,
+ struct properties_major_minor *mm)
+{
+ size_t i, j, compression_count = 0;
+
+ /*
+ * we currently have an array comp->offset which maps the
+ * codepoints 0..0x110000 to offsets into comp->data.
+ * To improve cache-locality instead and allow a bit of
+ * compressing, instead of directly mapping a codepoint
+ * 0xAAAABB with comp->offset, we generate two arrays major
+ * and minor such that
+ * comp->offset(0xAAAABB) == minor[major[0xAAAA] + 0xBB]
+ * This yields a major-array of length 2^16 and a minor array
+ * of variable length depending on how many common subsequences
+ * can be filtered out.
+ */
+
+ /* initialize */
+ if (!(mm->major = malloc((size_t)0x1100 * sizeof(*(mm->major))))) {
+ fprintf(stderr, "malloc: %s\n", strerror(errno));
+ exit(1);
+ }
+ mm->minor = NULL;
+ mm->minorlen = 0;
+
+ printf("#include <stdint.h>\n\n");
+
+ for (i = 0; i < (size_t)0x1100; i++) {
+ /*
+ * we now look at the cp-range (i << 8)..(i << 8 + 0xFF)
+ * and check if its corresponding offset-data already
+ * exists in minor (because then we just point there
+ * and need less storage)
+ */
+ for (j = 0; j + 0xFF < mm->minorlen; j++) {
+ if (!memcmp(&(comp->offset[i << 8]),
+ &(mm->minor[j]),
+ sizeof(*(comp->offset)) * 0x100)) {
+ break;
+ }
+ }
+ if (j + 0xFF < mm->minorlen) {
+ /* found an index */
+ compression_count++;
+ mm->major[i] = j;
+ } else {
+ /*
+ * add "new" sequence to minor and point to it
+ * in major
+ */
+ mm->minorlen += 0x100;
+ if (!(mm->minor = reallocarray(mm->minor,
+ mm->minorlen,
+ sizeof(*(mm->minor))))) {
+ fprintf(stderr, "reallocarray: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ memcpy(&(mm->minor[mm->minorlen - 0x100]),
+ &(comp->offset[i << 8]),
+ sizeof(*(mm->minor)) * 0x100);
+ mm->major[i] = mm->minorlen - 0x100;
+ }
}
+
+ /* return compression ratio */
+ return (double)compression_count / 0x1100 * 100;
}
-void
-property_list_print(const struct property *prop, size_t numprops,
- const char *identifier, const char *progname)
+static void
+properties_print_lookup_table(char *name, size_t *data, size_t datalen)
{
- size_t i, j;
+ char *type;
+ size_t i, maxval;
- printf("/* Automatically generated by %s */\n"
- "#include <stdint.h>\n\n#include \"../gen/types.h\"\n\n",
- progname);
+ for (i = 0, maxval = 0; i < datalen; i++) {
+ if (data[i] > maxval) {
+ maxval = data[i];
+ }
+ }
+
+ type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" :
+ (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" :
+ (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" :
+ "uint_least64_t";
+
+ printf("static const %s %s[] = {\n\t", type, name);
+ for (i = 0; i < datalen; i++) {
+ printf("%zu", data[i]);
+ if (i + 1 == datalen) {
+ printf("\n");
+ } else if ((i + 1) % 8 != 0) {
+ printf(", ");
+ } else {
+ printf(",\n\t");
+ }
- /* print enum */
- printf("enum %s {\n", identifier);
- for (i = 0; i < numprops; i++) {
- printf("\t%s,\n", prop[i].enumname);
}
- printf("};\n\n");
-
- /* print table */
- printf("static const struct range_list %s[] = {\n", identifier);
- for (i = 0; i < numprops; i++) {
- printf("\t[%s] = {\n\t\t.data = (struct range[]){\n",
- prop[i].enumname);
- for (j = 0; j < prop[i].tablelen; j++) {
- printf("\t\t\t{ UINT32_C(0x%06X), UINT32_C(0x%06X) },\n",
- prop[i].table[j].lower,
- prop[i].table[j].upper);
+ printf("};\n");
+}
+
+static void
+properties_print_derived_lookup_table(char *name, size_t *offset, size_t offsetlen,
+ uint_least8_t (*get_value)(const struct properties *,
+ size_t), const void *payload)
+{
+ size_t i;
+
+ printf("static const uint_least8_t %s[] = {\n\t", name);
+ for (i = 0; i < offsetlen; i++) {
+ printf("%"PRIuLEAST8, get_value(payload, offset[i]));
+ if (i + 1 == offsetlen) {
+ printf("\n");
+ } else if ((i + 1) % 8 != 0) {
+ printf(", ");
+ } else {
+ printf(",\n\t");
}
- printf("\t\t},\n\t\t.len = %zu,\n\t},\n", prop[i].tablelen);
+
}
printf("};\n");
}
-void
-property_list_free(struct property *prop, size_t numprops)
+static void
+properties_print_enum(const struct property_spec *spec, size_t speclen,
+ const char *enumname, const char *enumprefix)
{
size_t i;
- for (i = 0; i < numprops; i++) {
- free(prop[i].table);
- prop[i].table = NULL;
- prop[i].tablelen = 0;
+ printf("enum %s {\n", enumname);
+ for (i = 0; i < speclen; i++) {
+ printf("\t%s_%s,\n", enumprefix, spec[i].enumname);
+ }
+ printf("\tNUM_%sS,\n};\n\n", enumprefix);
+}
+
+static int
+set_value_bp(struct properties_payload *payload, uint_least32_t cp,
+ uint_least8_t value)
+{
+ if (payload->prop[cp].break_property != 0) {
+ fprintf(stderr, "set_value_bp: "
+ "Character break property overlap.\n");
+ return 1;
+ }
+ payload->prop[cp].break_property = value;
+
+ return 0;
+}
+
+static uint_least8_t
+get_value_bp(const struct properties *prop, size_t offset)
+{
+ return prop[offset].break_property;
+}
+
+void
+properties_generate_break_property(const struct property_spec *spec,
+ uint_least8_t speclen, const char *prefix,
+ const char *argv0)
+{
+ struct properties_compressed comp;
+ struct properties_major_minor mm;
+ struct properties_payload payload;
+ struct properties *prop;
+ size_t i, j, prefixlen = strlen(prefix);
+ char buf1[64], prefix_uc[64], buf2[64], buf3[64], buf4[64];
+
+ /* allocate property buffer for all 0x110000 codepoints */
+ if (!(prop = calloc(0x110000, sizeof(*prop)))) {
+ fprintf(stderr, "calloc: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ /* generate data */
+ payload.prop = prop;
+ payload.spec = spec;
+ payload.speclen = speclen;
+ payload.set_value = set_value_bp;
+
+ /* parse each file exactly once and ignore NULL-fields */
+ for (i = 0; i < speclen; i++) {
+ for (j = 0; j < i; j++) {
+ if (spec[i].file && spec[j].file &&
+ !strcmp(spec[i].file, spec[j].file)) {
+ /* file has already been parsed */
+ break;
+ }
+ }
+ if (i == j && spec[i].file) {
+ /* file has not been processed yet */
+ parse_file_with_callback(spec[i].file,
+ properties_callback,
+ &payload);
+ }
+ }
+
+ /* compress data */
+ properties_compress(prop, &comp);
+
+ fprintf(stderr, "%s: compression-ratio: %.2f%%\n", argv0,
+ properties_get_major_minor(&comp, &mm));
+
+ /* prepare names */
+ if ((size_t)snprintf(buf1, LEN(buf1), "%s_break_property", prefix) >= LEN(buf1)) {
+ fprintf(stderr, "snprintf: String truncated.\n");
+ exit(1);
}
+ if (LEN(prefix_uc) + 1 < prefixlen) {
+ fprintf(stderr, "snprintf: Buffer too small.\n");
+ exit(1);
+ }
+ for (i = 0; i < prefixlen; i++) {
+ prefix_uc[i] = (char)toupper(prefix[i]);
+ }
+ prefix_uc[prefixlen] = '\0';
+ if ((size_t)snprintf(buf2, LEN(buf2), "%s_BREAK_PROP", prefix_uc) >= LEN(buf2) ||
+ (size_t)snprintf(buf3, LEN(buf3), "%s_break_major", prefix) >= LEN(buf3) ||
+ (size_t)snprintf(buf4, LEN(buf4), "%s_break_minor", prefix) >= LEN(buf4)) {
+ fprintf(stderr, "snprintf: String truncated.\n");
+ exit(1);
+ }
+
+ /* print data */
+ properties_print_enum(spec, speclen, buf1, buf2);
+ properties_print_lookup_table(buf3, mm.major, 0x1100);
+ printf("\n");
+ properties_print_derived_lookup_table(buf4, mm.minor, mm.minorlen,
+ get_value_bp, comp.data);
+
+ /* free data */
+ free(prop);
+ free(comp.data);
+ free(comp.offset);
+ free(mm.major);
+ free(mm.minor);
}
static int
-segment_test_callback(char *fname, char **field, size_t nfields,
+segment_test_callback(const char *fname, char **field, size_t nfields,
char *comment, void *payload)
{
struct segment_test *t,
diff --git a/gen/util.h b/gen/util.h
@@ -7,17 +7,14 @@
#define LEN(x) (sizeof (x) / sizeof *(x))
-struct range {
- uint_least32_t lower;
- uint_least32_t upper;
+struct property_spec {
+ const char *enumname;
+ const char *file;
+ const char *ucdname;
};
-struct property {
- char *enumname;
- char *identifier;
- char *fname;
- struct range *table;
- size_t tablelen;
+struct properties {
+ uint_least8_t break_property;
};
struct segment_test {
@@ -28,15 +25,12 @@ struct segment_test {
char *descr;
};
-int range_parse(const char *, struct range *);
+void parse_file_with_callback(const char *, int (*callback)(const char *,
+ char **, size_t, char *, void *), void *payload);
-void parse_file_with_callback(char *, int (*callback)(char *, char **,
- size_t, char *, void *), void *payload);
-
-void property_list_parse(struct property *, size_t);
-void property_list_print(const struct property *, size_t, const char *,
- const char *);
-void property_list_free(struct property *, size_t);
+void properties_generate_break_property(const struct property_spec *,
+ uint_least8_t, const char *,
+ const char *);
void segment_test_list_parse(char *, struct segment_test **, size_t *);
void segment_test_list_print(const struct segment_test *, size_t,
diff --git a/src/character.c b/src/character.c
@@ -106,7 +106,8 @@ static enum char_break_property
get_break_prop(uint_least32_t cp)
{
if (likely(cp <= 0x10FFFF)) {
- return (enum char_break_property)minor[major[cp >> 8] + (cp & 0xff)];
+ return (enum char_break_property)
+ char_break_minor[char_break_major[cp >> 8] + (cp & 0xff)];
} else {
return CHAR_BREAK_PROP_OTHER;
}