diff --git a/.gitignore b/.gitignore index df310cf..31ce623 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,8 @@ *.dSYM *.txt *.out -bench -icu +bench/bench +bench/icu +bench/unistring + + diff --git a/bench/Makefile b/bench/Makefile index 6fa7085..8b63f2c 100644 --- a/bench/Makefile +++ b/bench/Makefile @@ -26,8 +26,14 @@ icu: icu.o util.o icu.out: $(DATAFILES) icu ./icu $(DATAFILES) > $@ +unistring: unistring.o util.o + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ unistring.o util.o -lunistring + +unistring.out: $(DATAFILES) unistring + ./unistring $(DATAFILES) > $@ + .c.o: $(CC) $(CPPFLAGS) -I.. $(CFLAGS) -c -o $@ $< clean: - rm -rf *.o *.txt bench *.out icu + rm -rf *.o *.txt bench *.out icu unistring diff --git a/bench/unistring.c b/bench/unistring.c new file mode 100644 index 0000000..2cc5ce8 --- /dev/null +++ b/bench/unistring.c @@ -0,0 +1,60 @@ +/* comparitive benchmark of GNU libunistring */ + +#include +#include +#include + +/* libunistring */ +#include +#include + +#include "util.h" + +int main(int argc, char **argv) +{ + int i; + uninorm_t nf = UNINORM_NFKC; + + for (i = 1; i < argc; ++i) { + if (!strcmp(argv[i], "-nfkc")) { + nf = UNINORM_NFKC; + continue; + } + if (!strcmp(argv[i], "-nfkd")) { + nf = UNINORM_NFKD; + continue; + } + if (!strcmp(argv[i], "-nfc")) { + nf = UNINORM_NFC; + continue; + } + if (!strcmp(argv[i], "-nfd")) { + nf = UNINORM_NFD; + continue; + } + if (argv[i][0] == '-') { + fprintf(stderr, "unrecognized option: %s\n", argv[i]); + return EXIT_FAILURE; + } + + size_t len; + uint8_t *src = readfile(argv[i], &len); + if (!src) { + fprintf(stderr, "error reading %s\n", argv[i]); + return EXIT_FAILURE; + } + + size_t destlen; + uint8_t *dest; + mytime start = gettime(); + for (int i = 0; i < 100; ++i) { + dest = u8_normalize(nf, src, len, NULL, &destlen); + if (!dest) return EXIT_FAILURE; + free(dest); + } + printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100); + free(src); + } + + return EXIT_SUCCESS; +}