blob: dd10ae790e9e66f9315e586211850cd35d9f9af4 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
#include "utf8.h"
#include <stdio.h>
int utf8Next(char** strRef, int* codeRef) {
unsigned char* str = (unsigned char*) *strRef;
unsigned char first = *str;
if (isASCII(first)) {
*codeRef = first;
++*strRef;
return 0;
}
if (isContinuation(first))
return ERR_UNEXPECTED_CONTINUATION | first;
if (first >= 248)
return ERR_INVALID_UTF8_BYTE | first;
int expectedContinuations =
is2ByteStarter(first) ? 1 :
is3ByteStarter(first) ? 2 : 3;
int code = first - (
is2ByteStarter(first) ? 192 :
is3ByteStarter(first) ? 224 : 240);
for (int i = 1; i <= expectedContinuations; ++i) {
unsigned char b = str[i];
if (b >= 248) return ERR_INVALID_UTF8_BYTE | (i << 8) | b;
if (!isContinuation(b)) return ERR_CONTINUATION_EXPECTED | (i << 8) | b;
code = (code << 6) | (b & 0x7f);
}
*codeRef = code;
*strRef += 1 + expectedContinuations;
return 0;
}
|