aboutsummaryrefslogtreecommitdiff
path: root/utf8.c
blob: dd10ae790e9e66f9315e586211850cd35d9f9af4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#include "utf8.h"
#include <stdio.h>
int utf8Next(char** strRef, int* codeRef) {
	unsigned char* str = (unsigned char*) *strRef;
	unsigned char first = *str;
	if (isASCII(first)) {
		*codeRef = first;
		++*strRef;
		return 0;
	}
	if (isContinuation(first))
		return ERR_UNEXPECTED_CONTINUATION | first;
	if (first >= 248)
		return ERR_INVALID_UTF8_BYTE | first;
	int expectedContinuations =
		is2ByteStarter(first) ? 1 :
		is3ByteStarter(first) ? 2 : 3;
	int code = first - (
		is2ByteStarter(first) ? 192 :
		is3ByteStarter(first) ? 224 : 240);
	for (int i = 1; i <= expectedContinuations; ++i) {
		unsigned char b = str[i];
		if (b >= 248) return ERR_INVALID_UTF8_BYTE | (i << 8) | b;
		if (!isContinuation(b)) return ERR_CONTINUATION_EXPECTED | (i << 8) | b;
		code = (code << 6) | (b & 0x7f);
	}
	*codeRef = code;
	*strRef += 1 + expectedContinuations;
	return 0;
}