Browse Source

add benchmark (issue #12)

pull/16/head
Steven G. Johnson 10 years ago
parent
commit
20cff0757b
  1. 4
      .gitignore
  2. 33
      bench/Makefile
  3. 56
      bench/bench.c
  4. 61
      bench/icu.c
  5. 39
      bench/util.c
  6. 22
      bench/util.h

4
.gitignore

@ -8,3 +8,7 @@
*.dll
*.dylib
*.dSYM
*.txt
*.out
bench
icu

33
bench/Makefile

@ -0,0 +1,33 @@
CURL=curl
CC = cc
CFLAGS = -O2 -std=c99 -pedantic -Wall
all: bench
LIBMOJIBAKE = ../libmojibake.a
bench: bench.o util.o
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ bench.o util.o $(LIBMOJIBAKE)
DATAURL = https://raw.githubusercontent.com/duerst/eprun/master/benchmark
DATAFILES = Deutsch_.txt Japanese_.txt Korean_.txt Vietnamese_.txt
$(DATAFILES):
$(CURL) -O $(DATAURL)/$@
bench.out: $(DATAFILES) bench
./bench -nfkc $(DATAFILES) > $@
# you may need make CPPFLAGS=... LDFLAGS=... to help it find ICU
icu: icu.o util.o
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ icu.o util.o -licuuc
icu.out: $(DATAFILES) icu
./icu $(DATAFILES) > $@
.c.o:
$(CC) $(CPPFLAGS) -I.. $(CFLAGS) -c -o $@ $<
clean:
rm -rf *.o *.txt bench *.out icu

56
bench/bench.c

@ -0,0 +1,56 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mojibake.h"
#include "util.h"
int main(int argc, char **argv)
{
int i;
int options = 0;
for (i = 1; i < argc; ++i) {
if (!strcmp(argv[i], "-nfkc")) {
options |= UTF8PROC_STABLE|UTF8PROC_COMPOSE|UTF8PROC_COMPAT;
continue;
}
if (!strcmp(argv[i], "-nfkd")) {
options |= UTF8PROC_STABLE|UTF8PROC_DECOMPOSE|UTF8PROC_COMPAT;
continue;
}
if (!strcmp(argv[i], "-nfc")) {
options |= UTF8PROC_STABLE|UTF8PROC_COMPOSE;
continue;
}
if (!strcmp(argv[i], "-nfd")) {
options |= UTF8PROC_STABLE|UTF8PROC_DECOMPOSE;
continue;
}
if (!strcmp(argv[i], "-casefold")) {
options |= UTF8PROC_CASEFOLD;
continue;
}
if (argv[i][0] == '-') {
fprintf(stderr, "unrecognized option: %s\n", argv[i]);
return EXIT_FAILURE;
}
size_t len;
uint8_t *src = readfile(argv[i], &len);
if (!src) {
fprintf(stderr, "error reading %s\n", argv[i]);
return EXIT_FAILURE;
}
uint8_t *dest;
mytime start = gettime();
for (int i = 0; i < 100; ++i) {
utf8proc_map(src, len, &dest, options);
free(dest);
}
printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100);
free(src);
}
return EXIT_SUCCESS;
}

61
bench/icu.c

@ -0,0 +1,61 @@
#include <stdio.h>
#include <stdlib.h>
/* ICU4C */
#include <unicode/utypes.h>
#include <unicode/ustring.h>
#include <unicode/ucnv.h>
#include <unicode/unorm2.h>
#include "util.h"
int main(int argc, char **argv)
{
int i;
UErrorCode err;
UConverter *uc = ucnv_open("UTF8", &err);
if (U_FAILURE(err)) return EXIT_FAILURE;
const UNormalizer2 *NFKC = unorm2_getNFKCInstance(&err);
if (U_FAILURE(err)) return EXIT_FAILURE;
for (i = 1; i < argc; ++i) {
if (argv[i][0] == '-') {
fprintf(stderr, "unrecognized option: %s\n", argv[i]);
return EXIT_FAILURE;
}
size_t len;
uint8_t *src = readfile(argv[i], &len);
if (!src) {
fprintf(stderr, "error reading %s\n", argv[i]);
return EXIT_FAILURE;
}
/* convert UTF8 data to ICU's UTF16 */
UChar *usrc = (UChar*) malloc(2*len * sizeof(UChar));
ucnv_toUChars(uc, usrc, 2*len, (char*) src, len, &err);
if (U_FAILURE(err)) return EXIT_FAILURE;
size_t ulen = u_strlen(usrc);
/* ICU's insane normalization API requires you to
know the size of the destination buffer in advance,
or alternatively to repeatly try normalizing and
double the buffer size until it succeeds. Here, I just
allocate a huge destination buffer to avoid the issue. */
UChar *udest = (UChar*) malloc(10*ulen * sizeof(UChar));
mytime start = gettime();
for (int i = 0; i < 100; ++i) {
unorm2_normalize(NFKC, usrc, ulen, udest, 10*ulen, &err);
if (U_FAILURE(err)) return EXIT_FAILURE;
}
printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100);
free(udest);
free(usrc);
free(src);
}
return EXIT_SUCCESS;
}

39
bench/util.c

@ -0,0 +1,39 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include "util.h"
/* read file named FILENAME into an array of *len bytes,
returning NULL on error */
uint8_t *readfile(const char *filename, size_t *len)
{
*len = 0;
struct stat st;
if (0 != stat(filename, &st)) return NULL;
*len = st.st_size;
FILE *f = fopen(filename, "r");
if (!f) return NULL;
uint8_t *s = (uint8_t *) malloc(sizeof(uint8_t) * *len);
if (!s) return NULL;
if (fread(s, 1, *len, f) != *len) {
free(s);
s = NULL;
}
fclose(f);
return s;
}
mytime gettime(void) {
mytime t;
gettimeofday(&t, NULL);
return t;
}
/* time difference in seconds */
double elapsed(mytime t1, mytime t0)
{
return (double)(t1.tv_sec - t0.tv_sec) +
(double)(t1.tv_usec - t0.tv_usec) * 1.0E-6;
}

22
bench/util.h

@ -0,0 +1,22 @@
#ifndef UTIL_H
#define UTIL_H 1
#include <inttypes.h>
#include <sys/time.h>
#include <time.h>
#ifdef __cplusplus
extern "C" {
#endif
uint8_t *readfile(const char *filename, size_t *len);
typedef struct timeval mytime;
mytime gettime(void);
double elapsed(mytime t1, mytime t0);
#ifdef __cplusplus
}
#endif
#endif /* UTIL_H */
Loading…
Cancel
Save