NewsBlur/clients/ios/Other Sources/SBJSON/SBJson4StreamTokeniser.m

396 lines
10 KiB
Mathematica
Raw Normal View History

//
// Created by SuperPappi on 09/01/2013.
//
// To change the template use AppCode | Preferences | File Templates.
//
#import "SBJson4StreamTokeniser.h"
#define SBStringIsIllegalSurrogateHighCharacter(character) (((character) >= 0xD800UL) && ((character) <= 0xDFFFUL))
#define SBStringIsSurrogateLowCharacter(character) ((character >= 0xDC00UL) && (character <= 0xDFFFUL))
#define SBStringIsSurrogateHighCharacter(character) ((character >= 0xD800UL) && (character <= 0xDBFFUL))
@implementation SBJson4StreamTokeniser {
NSMutableData *data;
const char *bytes;
NSUInteger index;
NSUInteger offset;
}
- (void)setError:(NSString *)error {
_error = [NSString stringWithFormat:@"%@ at index %lu", error, (unsigned long)(offset + index)];
}
- (void)appendData:(NSData *)data_ {
if (!data) {
data = [data_ mutableCopy];
} else if (index) {
// Discard data we've already parsed
[data replaceBytesInRange:NSMakeRange(0, index) withBytes:"" length:0];
[data appendData:data_];
// Add to the offset for reporting
offset += index;
// Reset index to point to current position
index = 0u;
}
else {
[data appendData:data_];
}
bytes = [data bytes];
}
- (void)skipWhitespace {
while (index < data.length) {
switch (bytes[index]) {
case ' ':
case '\t':
case '\r':
case '\n':
index++;
break;
default:
return;
}
}
}
- (BOOL)getUnichar:(unichar *)ch {
if ([self haveRemainingCharacters:1]) {
*ch = (unichar) bytes[index];
return YES;
}
return NO;
}
- (BOOL)haveOneMoreCharacter {
return [self haveRemainingCharacters:1];
}
- (BOOL)haveRemainingCharacters:(NSUInteger)length {
return data.length - index >= length;
}
- (sbjson4_token_t)match:(char *)str retval:(sbjson4_token_t)tok token:(char **)token length:(NSUInteger *)length {
NSUInteger len = strlen(str);
if ([self haveRemainingCharacters:len]) {
if (!memcmp(bytes + index, str, len)) {
*token = str;
*length = len;
index += len;
return tok;
}
[self setError: [NSString stringWithFormat:@"Expected '%s' after initial '%.1s'", str, str]];
return sbjson4_token_error;
}
return sbjson4_token_eof;
}
- (BOOL)decodeHexQuad:(unichar*)quad {
unichar tmp = 0;
for (int i = 0; i < 4; i++, index++) {
unichar c = (unichar)bytes[index];
tmp *= 16;
switch (c) {
case '0' ... '9':
tmp += c - '0';
break;
case 'a' ... 'f':
tmp += 10 + c - 'a';
break;
case 'A' ... 'F':
tmp += 10 + c - 'A';
break;
default:
return NO;
}
}
*quad = tmp;
return YES;
}
- (sbjson4_token_t)getStringToken:(char **)token length:(NSUInteger *)length {
// Skip initial "
index++;
NSUInteger string_start = index;
sbjson4_token_t tok = sbjson4_token_string;
for (;;) {
if (![self haveOneMoreCharacter])
return sbjson4_token_eof;
switch (bytes[index]) {
case 0 ... 0x1F:
[self setError:[NSString stringWithFormat:@"Unescaped control character [0x%0.2X] in string", bytes[index]]];
return sbjson4_token_error;
case '"':
*token = (char *)(bytes + string_start);
*length = index - string_start;
index++;
return tok;
case '\\':
tok = sbjson4_token_encoded;
index++;
if (![self haveOneMoreCharacter])
return sbjson4_token_eof;
if (bytes[index] == 'u') {
index++;
if (![self haveRemainingCharacters:4])
return sbjson4_token_eof;
unichar hi;
if (![self decodeHexQuad:&hi]) {
[self setError:@"Invalid hex quad"];
return sbjson4_token_error;
}
if (SBStringIsSurrogateHighCharacter(hi)) {
if (![self haveRemainingCharacters:6])
return sbjson4_token_eof;
unichar lo;
if (bytes[index++] != '\\' || bytes[index++] != 'u' || ![self decodeHexQuad:&lo]) {
[self setError:@"Missing low character in surrogate pair"];
return sbjson4_token_error;
}
if (!SBStringIsSurrogateLowCharacter(lo)) {
[self setError:@"Invalid low character in surrogate pair"];
return sbjson4_token_error;
}
} else if (SBStringIsIllegalSurrogateHighCharacter(hi)) {
[self setError:@"Invalid high character in surrogate pair"];
return sbjson4_token_error;
}
} else {
switch (bytes[index]) {
case '\\':
case '/':
case '"':
case 'b':
case 'n':
case 'r':
case 't':
case 'f':
index++;
break;
default:
[self setError:[NSString stringWithFormat:@"Illegal escape character [%x]", bytes[index]]];
return sbjson4_token_error;
}
}
break;
default:
index++;
break;
}
}
}
- (sbjson4_token_t)getNumberToken:(char **)token length:(NSUInteger *)length {
NSUInteger num_start = index;
if (bytes[index] == '-') {
index++;
if (![self haveOneMoreCharacter])
return sbjson4_token_eof;
}
sbjson4_token_t tok = sbjson4_token_integer;
if (bytes[index] == '0') {
index++;
if (![self haveOneMoreCharacter])
return sbjson4_token_eof;
if (isdigit(bytes[index])) {
[self setError:@"Leading zero is illegal in number"];
return sbjson4_token_error;
}
}
while (isdigit(bytes[index])) {
index++;
if (![self haveOneMoreCharacter])
return sbjson4_token_eof;
}
if (![self haveOneMoreCharacter])
return sbjson4_token_eof;
if (bytes[index] == '.') {
index++;
tok = sbjson4_token_real;
if (![self haveOneMoreCharacter])
return sbjson4_token_eof;
NSUInteger fraction_start = index;
while (isdigit(bytes[index])) {
index++;
if (![self haveOneMoreCharacter])
return sbjson4_token_eof;
}
if (fraction_start == index) {
[self setError:@"No digits after decimal point"];
return sbjson4_token_error;
}
}
if (bytes[index] == 'e' || bytes[index] == 'E') {
index++;
tok = sbjson4_token_real;
if (![self haveOneMoreCharacter])
return sbjson4_token_eof;
if (bytes[index] == '-' || bytes[index] == '+') {
index++;
if (![self haveOneMoreCharacter])
return sbjson4_token_eof;
}
NSUInteger exp_start = index;
while (isdigit(bytes[index])) {
index++;
if (![self haveOneMoreCharacter])
return sbjson4_token_eof;
}
if (exp_start == index) {
[self setError:@"No digits in exponent"];
return sbjson4_token_error;
}
}
if (num_start + 1 == index && bytes[num_start] == '-') {
[self setError:@"No digits after initial minus"];
return sbjson4_token_error;
}
*token = (char *)(bytes + num_start);
*length = index - num_start;
return tok;
}
- (sbjson4_token_t)getToken:(char **)token length:(NSUInteger *)length {
[self skipWhitespace];
NSUInteger copyOfIndex = index;
unichar ch;
if (![self getUnichar:&ch])
return sbjson4_token_eof;
sbjson4_token_t tok;
switch (ch) {
case '{': {
index++;
tok = sbjson4_token_object_open;
break;
}
case '}': {
index++;
tok = sbjson4_token_object_close;
break;
}
case '[': {
index++;
tok = sbjson4_token_array_open;
break;
}
case ']': {
index++;
tok = sbjson4_token_array_close;
break;
}
case 't': {
tok = [self match:"true" retval:sbjson4_token_bool token:token length:length];
break;
}
case 'f': {
tok = [self match:"false" retval:sbjson4_token_bool token:token length:length];
break;
}
case 'n': {
tok = [self match:"null" retval:sbjson4_token_null token:token length:length];
break;
}
case ',': {
index++;
tok = sbjson4_token_value_sep;
break;
}
case ':': {
index++;
tok = sbjson4_token_entry_sep;
break;
}
case '"': {
tok = [self getStringToken:token length:length];
break;
}
case '-':
case '0' ... '9': {
tok = [self getNumberToken:token length:length];
break;
}
case '+': {
self.error = @"Leading + is illegal in number";
tok = sbjson4_token_error;
break;
}
default: {
self.error = [NSString stringWithFormat:@"Illegal start of token [%c]", ch];
tok = sbjson4_token_error;
break;
}
}
if (tok == sbjson4_token_eof) {
// We ran out of bytes before we could finish parsing the current token.
// Back up to the start & wait for more data.
index = copyOfIndex;
}
return tok;
}
@end