NewsBlur/clients/ios/Other Sources/Overshare Kit/OSKTwitterText.m
2014-01-06 17:55:14 -08:00

799 lines
28 KiB
Objective-C
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// OSKTwitterText.m
//
// Copyright 2012 Twitter, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
#import "OSKTwitterText.h"
//
// These regular expressions are ported from twitter-text-rb on Apr 24 2012.
//
#define TWUControlCharacters @"\\u0009-\\u000D"
#define TWUSpace @"\\u0020"
#define TWUControl85 @"\\u0085"
#define TWUNoBreakSpace @"\\u00A0"
#define TWUOghamBreakSpace @"\\u1680"
#define TWUMongolianVowelSeparator @"\\u180E"
#define TWUWhiteSpaces @"\\u2000-\\u200A"
#define TWULineSeparator @"\\u2028"
#define TWUParagraphSeparator @"\\u2029"
#define TWUNarrowNoBreakSpace @"\\u202F"
#define TWUMediumMathematicalSpace @"\\u205F"
#define TWUIdeographicSpace @"\\u3000"
#define TWUUnicodeSpaces \
TWUControlCharacters \
TWUSpace \
TWUControl85 \
TWUNoBreakSpace \
TWUOghamBreakSpace \
TWUMongolianVowelSeparator \
TWUWhiteSpaces \
TWULineSeparator \
TWUParagraphSeparator \
TWUNarrowNoBreakSpace \
TWUMediumMathematicalSpace \
TWUIdeographicSpace
#define TWUInvalidCharacters @"\\uFFFE\\uFEFF\\uFFFF\\u202A-\\u202E"
#define TWULatinAccents \
@"\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u00FF\\u0100-\\u024F\\u0253-\\u0254\\u0256-\\u0257\\u0259\\u025b\\u0263\\u0268\\u026F\\u0272\\u0289\\u02BB\\u1E00-\\u1EFF"
//
// Hashtag
//
#define TWUCyrillicHashtagChars @"\\u0400-\\u04FF"
#define TWUCyrillicSupplementHashtagChars @"\\u0500-\\u0527"
#define TWUCyrillicExtendedAHashtagChars @"\\u2DE0-\\u2DFF"
#define TWUCyrillicExtendedBHashtagChars @"\\uA640-\\uA69F"
#define TWUHebrewHashtagChars @"\\u0591-\\u05BF\\u05C1-\\u05C2\\u05C4-\\u05C5\\u05C7\\u05D0-\\u05EA\\u05F0-\\u05F4"
#define TWUHebrewPresentationFormsHashtagChars @"\\uFB12-\\uFB28\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40-\\uFB41\\uFB43-\\uFB44\\uFB46-\\uFB4F"
#define TWUArabicHashtagChars @"\\u0610-\\u061A\\u0620-\\u065F\\u066E-\\u06D3\\u06D5-\\u06DC\\u06DE-\\u06E8\\u06EA-\\u06EF\\u06FA-\\u06FC\\u06FF"
#define TWUArabicSupplementHashtagChars @"\\u0750-\\u077F"
#define TWUArabicExtendedAHashtagChars @"\\u08A0\\u08A2-\\u08AC\\u08E4-\\u08FE"
#define TWUArabicPresentationFormsAHashtagChars @"\\uFB50-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFB"
#define TWUArabicPresentationFormsBHashtagChars @"\\uFE70-\\uFE74\\uFE76-\\uFEFC"
#define TWUZeroWidthNonJoiner @"\\u200C"
#define TWUThaiHashtagChars @"\\u0E01-\\u0E3A"
#define TWUHangulHashtagChars @"\\u0E40-\\u0E4E"
#define TWUHangulJamoHashtagChars @"\\u1100-\\u11FF"
#define TWUHangulCompatibilityJamoHashtagChars @"\\u3130-\\u3185"
#define TWUHangulJamoExtendedAHashtagChars @"\\uA960-\\uA97F"
#define TWUHangulSyllablesHashtagChars @"\\uAC00-\\uD7AF"
#define TWUHangulJamoExtendedBHashtagChars @"\\uD7B0-\\uD7FF"
#define TWUHalfWidthHangulHashtagChars @"\\uFFA1-\\uFFDC"
#define TWUNonLatinHashtagChars \
TWUCyrillicHashtagChars \
TWUCyrillicSupplementHashtagChars \
TWUCyrillicExtendedAHashtagChars \
TWUCyrillicExtendedBHashtagChars \
TWUHebrewHashtagChars \
TWUHebrewPresentationFormsHashtagChars \
TWUArabicHashtagChars \
TWUArabicSupplementHashtagChars \
TWUArabicExtendedAHashtagChars \
TWUArabicPresentationFormsAHashtagChars \
TWUArabicPresentationFormsBHashtagChars \
TWUZeroWidthNonJoiner \
TWUThaiHashtagChars \
TWUHangulHashtagChars \
TWUHangulJamoHashtagChars \
TWUHangulCompatibilityJamoHashtagChars \
TWUHangulJamoExtendedAHashtagChars \
TWUHangulSyllablesHashtagChars \
TWUHangulJamoExtendedBHashtagChars \
TWUHalfWidthHangulHashtagChars
#define TWUKatakanaHashtagChars @"\\u30A1-\\u30FA\\u30FC-\\u30FE"
#define TWUKatakanaHalfWidthHashtagChars @"\\uFF66-\\uFF9F"
#define TWULatinFullWidthHashtagChars @"\\uFF10-\\uFF19\\uFF21-\\uFF3A\\uFF41-\\uFF5A"
#define TWUHiraganaHashtagChars @"\\u3041-\\u3096\\u3099-\\u309E"
#define TWUCJKExtensionAHashtagChars @"\\u3400-\\u4DBF"
#define TWUCJKUnifiedHashtagChars @"\\u4E00-\\u9FFF"
#define TWUCJKExtensionBHashtagChars @"\\U00020000-\\U0002A6DF"
#define TWUCJKExtensionCHashtagChars @"\\U0002A700-\\U0002B73F"
#define TWUCJKExtensionDHashtagChars @"\\U0002B740-\\U0002B81F"
#define TWUCJKSupplementHashtagChars @"\\U0002F800-\\U0002FA1F\\u3003\\u3005\\u303B"
#define TWUCJKHashtagCharacters \
TWUKatakanaHashtagChars \
TWUKatakanaHalfWidthHashtagChars \
TWULatinFullWidthHashtagChars \
TWUHiraganaHashtagChars \
TWUCJKExtensionAHashtagChars \
TWUCJKUnifiedHashtagChars \
TWUCJKExtensionBHashtagChars \
TWUCJKExtensionCHashtagChars \
TWUCJKExtensionDHashtagChars \
TWUCJKSupplementHashtagChars
#define TWUPunctuationChars @"\\-_!\"#$%&'()*+,./:;<=>?@\\[\\]^`{|}~"
#define TWUPunctuationCharsWithoutHyphen @"_!\"#$%&'()*+,./:;<=>?@\\[\\]^`{|}~"
#define TWUPunctuationCharsWithoutHyphenAndUnderscore @"!\"#$%&'()*+,./:;<=>?@\\[\\]^`{|}~"
#define TWUCtrlChars @"\\x00-\\x1F\\x7F"
#define TWHashtagAlpha \
@"[a-z_" \
TWULatinAccents \
TWUNonLatinHashtagChars \
TWUCJKHashtagCharacters \
@"]"
#define TWUHashtagAlphanumeric \
@"[a-z0-9_" \
TWULatinAccents \
TWUNonLatinHashtagChars \
TWUCJKHashtagCharacters \
@"]"
#define TWUHashtagBoundary \
@"^|$|[^&a-z0-9_" \
TWULatinAccents \
TWUNonLatinHashtagChars \
TWUCJKHashtagCharacters \
@"]"
#define TWUValidHashtag \
@"(?:" TWUHashtagBoundary @")([#]" TWUHashtagAlphanumeric @"*" TWHashtagAlpha TWUHashtagAlphanumeric @"*)"
#define TWUEndHashTagMatch @"\\A(?:[#]|://)"
//
// Cashtag
//
#define TWUCashtag @"[a-z]{1,6}(?:[._][a-z]{1,2})?"
#define TWUValidCashtag \
@"(?:^|[" TWUUnicodeSpaces @"])" \
@"(\\$" TWUCashtag @")" \
@"(?=$|\\s|[" TWUPunctuationChars @"])"
//
// Mention and list name
//
#define TWUValidMentionPrecedingChars @"(?:[^a-zA-Z0-9_!#$%&*@]|^|RT:?)"
#define TWUAtSigns @"[@]"
#define TWUValidUsername @"\\A" TWUAtSigns @"[a-zA-Z0-9_]{1,20}\\z"
#define TWUValidList @"\\A" TWUAtSigns @"[a-zA-Z0-9_]{1,20}/[a-zA-Z][a-zA-Z0-9_\\-]{0,24}\\z"
#define TWUValidMentionOrList \
@"(" TWUValidMentionPrecedingChars @")" \
@"(" TWUAtSigns @")" \
@"([a-zA-Z0-9_]{1,20})" \
@"(/[a-zA-Z][a-zA-Z0-9_\\-]{0,24})?"
#define TWUValidReply @"\\A(?:[" TWUUnicodeSpaces @"])*" TWUAtSigns @"([a-zA-Z0-9_]{1,20})"
#define TWUEndMentionMatch @"\\A(?:" TWUAtSigns @"|[" TWULatinAccents @"]|://)"
//
// URL
//
#define TWUValidURLPrecedingChars @"(?:[^a-zA-Z0-9@$#" TWUInvalidCharacters @"]|^)"
#define TWUDomainValidStartEndChars \
@"[^" \
TWUPunctuationChars \
TWUCtrlChars \
TWUInvalidCharacters \
TWUUnicodeSpaces \
@"]"
#define TWUSubdomainValidMiddleChars \
@"[^" \
TWUPunctuationCharsWithoutHyphenAndUnderscore \
TWUCtrlChars \
TWUInvalidCharacters \
TWUUnicodeSpaces \
@"]"
#define TWUDomainValidMiddleChars \
@"[^" \
TWUPunctuationCharsWithoutHyphen \
TWUCtrlChars \
TWUInvalidCharacters \
TWUUnicodeSpaces \
@"]"
#define TWUValidSubdomain \
@"(?:" \
@"(?:" TWUDomainValidStartEndChars TWUSubdomainValidMiddleChars @"*)?" TWUDomainValidStartEndChars @"\\." \
@")"
#define TWUValidDomainName \
@"(?:" \
@"(?:" TWUDomainValidStartEndChars TWUDomainValidMiddleChars @"*)?" TWUDomainValidStartEndChars @"\\." \
@")"
#define TWUValidGTLD @"(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|xxx)(?=[^0-9a-z]|$))"
#define TWUValidCCTLD \
@"(?:" \
@"(?:" \
@"ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|" \
@"bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|" \
@"cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|" \
@"fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|" \
@"ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|" \
@"ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|" \
@"mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|" \
@"pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|" \
@"si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|" \
@"tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|" \
@"zw" \
@")" \
@"(?=[^0-9a-z]|$)" \
@")"
#define TWUValidPunycode @"(?:xn--[0-9a-z]+)"
#define TWUValidDomain \
@"(?:" \
TWUValidSubdomain @"*" TWUValidDomainName \
@"(?:" TWUValidGTLD @"|" TWUValidCCTLD @"|" TWUValidPunycode @")" \
@")"
#define TWUValidASCIIDomain \
@"(?:[a-zA-Z0-9\\-_" TWULatinAccents @"]+\\.)+" \
@"(?:" TWUValidGTLD @"|" TWUValidCCTLD @"|" TWUValidPunycode @")" \
#define TWUValidTCOURL @"https?://t\\.co/[a-zA-Z0-9]+"
#define TWUInvalidShortDomain @"\\A" TWUValidDomainName TWUValidCCTLD @"\\z"
#define TWUValidPortNumber @"[0-9]+"
#define TWUValidGeneralURLPathChars @"[a-zA-Z0-9!\\*';:=+,.$/%#\\[\\]\\-_~&|@" TWULatinAccents @"]"
#define TWUValidURLBalancedParens @"\\(" TWUValidGeneralURLPathChars @"+\\)"
#define TWUValidURLPathEndingChars @"[a-zA-Z0-9=_#/+\\-" TWULatinAccents @"]|(?:" TWUValidURLBalancedParens @")"
#define TWUValidURLPath \
@"(?:" \
@"(?:" \
TWUValidGeneralURLPathChars @"*" \
@"(?:" TWUValidURLBalancedParens TWUValidGeneralURLPathChars @"*)*" TWUValidURLPathEndingChars \
@")" \
@"|" \
@"(?:" TWUValidGeneralURLPathChars @"+/)" \
@")"
#define TWUValidURLQueryChars @"[a-zA-Z0-9!?*'\\(\\);:&=+$/%#\\[\\]\\-_\\.,~|@]"
#define TWUValidURLQueryEndingChars @"[a-zA-Z0-9_&=#/]"
#define TWUValidURL \
@"(" \
@"(" TWUValidURLPrecedingChars @")" \
@"(" \
@"(https?://)?" \
@"(" TWUValidDomain @")" \
@"(?::(" TWUValidPortNumber @"))?" \
@"(/" TWUValidURLPath @"*)?" \
@"(\\?" TWUValidURLQueryChars @"*" TWUValidURLQueryEndingChars @")?" \
@")" \
@")"
static const NSInteger MaxTweetLength = 140;
static const NSInteger HTTPShortURLLength = 22;
static const NSInteger HTTPSShortURLLength = 23;
@implementation OSKTwitterText
+ (NSArray*)entitiesInText:(NSString*)text
{
if (!text.length) {
return [NSArray array];
}
NSMutableArray *results = [NSMutableArray array];
NSArray *urls = [self URLsInText:text];
[results addObjectsFromArray:urls];
NSArray *hashtags = [self hashtagsInText:text withURLEntities:urls];
[results addObjectsFromArray:hashtags];
NSArray *cashtags = [self symbolsInText:text withURLEntities:urls];
[results addObjectsFromArray:cashtags];
NSArray *mentionsAndLists = [self mentionsOrListsInText:text];
NSMutableArray *addingItems = [NSMutableArray array];
for (OSKTwitterTextEntity *entity in mentionsAndLists) {
NSRange entityRange = entity.range;
BOOL found = NO;
for (OSKTwitterTextEntity *existingEntity in results) {
if (NSIntersectionRange(existingEntity.range, entityRange).length > 0) {
found = YES;
break;
}
}
if (!found) {
[addingItems addObject:entity];
}
}
[results addObjectsFromArray:addingItems];
[results sortUsingSelector:@selector(compare:)];
return results;
}
+ (NSArray*)URLsInText:(NSString*)text
{
if (!text.length) {
return [NSArray array];
}
NSMutableArray *results = [NSMutableArray array];
NSInteger len = text.length;
NSInteger position = 0;
NSRange allRange = NSMakeRange(0, 0);
while (1) {
position = NSMaxRange(allRange);
NSTextCheckingResult *urlResult = [[self validURLRegexp] firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
if (!urlResult || urlResult.numberOfRanges < 9) {
break;
}
allRange = urlResult.range;
NSRange precedingRange = [urlResult rangeAtIndex:2];
NSRange urlRange = [urlResult rangeAtIndex:3];
NSRange protocolRange = [urlResult rangeAtIndex:4];
NSRange domainRange = [urlResult rangeAtIndex:5];
// If protocol is missing and domain contains non-ASCII characters,
// extract ASCII-only domains.
if (protocolRange.location == NSNotFound) {
if (precedingRange.location != NSNotFound && precedingRange.length > 0) {
NSString *preceding = [text substringWithRange:precedingRange];
NSRange suffixRange = [preceding rangeOfCharacterFromSet:[self invalidURLWithoutProtocolPrecedingCharSet] options:NSBackwardsSearch | NSAnchoredSearch];
if (suffixRange.location != NSNotFound) {
continue;
}
}
NSInteger domainStart = domainRange.location;
NSInteger domainEnd = NSMaxRange(domainRange);
OSKTwitterTextEntity *lastEntity = nil;
BOOL lastInvalidShortResult = NO;
while (domainStart < domainEnd) {
NSTextCheckingResult *asciiResult = [[self validASCIIDomainRegexp] firstMatchInString:text options:0 range:NSMakeRange(domainStart, domainEnd - domainStart)];
if (!asciiResult) {
break;
}
urlRange = asciiResult.range;
lastEntity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityURL range:urlRange];
NSTextCheckingResult *invalidShortResult = [[self invalidShortDomainRegexp] firstMatchInString:text options:0 range:urlRange];
lastInvalidShortResult = (invalidShortResult != nil);
if (!lastInvalidShortResult) {
[results addObject:lastEntity];
}
domainStart = NSMaxRange(urlRange);
}
if (!lastEntity) {
continue;
}
NSRange pathRange = [urlResult rangeAtIndex:7];
if (pathRange.location != NSNotFound && NSMaxRange(lastEntity.range) == pathRange.location) {
if (lastInvalidShortResult) {
[results addObject:lastEntity];
}
NSRange entityRange = lastEntity.range;
entityRange.length += pathRange.length;
lastEntity.range = entityRange;
}
} else {
// In the case of t.co URLs, don't allow additional path characters
NSRange tcoRange = [[self validTCOURLRegexp] rangeOfFirstMatchInString:text options:0 range:urlRange];
if (tcoRange.location != NSNotFound) {
urlRange.length = tcoRange.length;
}
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityURL range:urlRange];
[results addObject:entity];
}
}
return results;
}
+ (NSArray*)hashtagsInText:(NSString*)text checkingURLOverlap:(BOOL)checkingURLOverlap
{
if (!text.length) {
return [NSArray array];
}
NSArray *urls = nil;
if (checkingURLOverlap) {
urls = [self URLsInText:text];
}
return [self hashtagsInText:text withURLEntities:urls];
}
+ (NSArray*)hashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities
{
if (!text.length) {
return [NSArray array];
}
NSMutableArray *results = [NSMutableArray array];
NSInteger len = text.length;
NSInteger position = 0;
while (1) {
NSTextCheckingResult *matchResult = [[self validHashtagRegexp] firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
if (!matchResult || matchResult.numberOfRanges < 2) {
break;
}
NSRange hashtagRange = [matchResult rangeAtIndex:1];
BOOL matchOk = YES;
// Check URL overlap
for (OSKTwitterTextEntity *urlEntity in urlEntities) {
if (NSIntersectionRange(urlEntity.range, hashtagRange).length > 0) {
matchOk = NO;
break;
}
}
if (matchOk) {
NSInteger afterStart = NSMaxRange(hashtagRange);
if (afterStart < len) {
NSRange endMatchRange = [[self endHashtagRegexp] rangeOfFirstMatchInString:text options:0 range:NSMakeRange(afterStart, len - afterStart)];
if (endMatchRange.location != NSNotFound) {
matchOk = NO;
}
}
if (matchOk) {
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityHashtag range:hashtagRange];
[results addObject:entity];
}
}
position = NSMaxRange(matchResult.range);
}
return results;
}
+ (NSArray*)symbolsInText:(NSString*)text checkingURLOverlap:(BOOL)checkingURLOverlap
{
if (!text.length) {
return [NSArray array];
}
NSArray *urls = nil;
if (checkingURLOverlap) {
urls = [self URLsInText:text];
}
return [self symbolsInText:text withURLEntities:urls];
}
+ (NSArray*)symbolsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities
{
if (!text.length) {
return [NSArray array];
}
NSMutableArray *results = [NSMutableArray array];
NSInteger len = text.length;
NSInteger position = 0;
while (1) {
NSTextCheckingResult *matchResult = [[self validCashtagRegexp] firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
if (!matchResult || matchResult.numberOfRanges < 2) {
break;
}
NSRange symbolRange = [matchResult rangeAtIndex:1];
BOOL matchOk = YES;
// Check URL overlap
for (OSKTwitterTextEntity *urlEntity in urlEntities) {
if (NSIntersectionRange(urlEntity.range, symbolRange).length > 0) {
matchOk = NO;
break;
}
}
if (matchOk) {
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntitySymbol range:symbolRange];
[results addObject:entity];
}
position = NSMaxRange(matchResult.range);
}
return results;
}
+ (NSArray*)mentionedScreenNamesInText:(NSString*)text
{
if (!text.length) {
return [NSArray array];
}
NSArray *mentionsOrLists = [self mentionsOrListsInText:text];
NSMutableArray *results = [NSMutableArray array];
for (OSKTwitterTextEntity *entity in mentionsOrLists) {
if (entity.type == OSKTwitterTextEntityScreenName) {
[results addObject:entity];
}
}
return results;
}
+ (NSArray*)mentionsOrListsInText:(NSString*)text
{
if (!text.length) {
return [NSArray array];
}
NSMutableArray *results = [NSMutableArray array];
NSInteger len = text.length;
NSInteger position = 0;
while (1) {
NSTextCheckingResult *matchResult = [[self validMentionOrListRegexp] firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
if (!matchResult || matchResult.numberOfRanges < 5) {
break;
}
NSRange allRange = matchResult.range;
NSInteger end = NSMaxRange(allRange);
NSRange endMentionRange = [[self endMentionRegexp] rangeOfFirstMatchInString:text options:0 range:NSMakeRange(end, len - end)];
if (endMentionRange.location == NSNotFound) {
NSRange atSignRange = [matchResult rangeAtIndex:2];
NSRange screenNameRange = [matchResult rangeAtIndex:3];
NSRange listNameRange = [matchResult rangeAtIndex:4];
if (listNameRange.location == NSNotFound) {
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityScreenName range:NSMakeRange(atSignRange.location, NSMaxRange(screenNameRange) - atSignRange.location)];
[results addObject:entity];
} else {
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityListName range:NSMakeRange(atSignRange.location, NSMaxRange(listNameRange) - atSignRange.location)];
[results addObject:entity];
}
} else {
// Avoid matching the second username in @username@username
end++;
}
position = end;
}
return results;
}
+ (OSKTwitterTextEntity*)repliedScreenNameInText:(NSString*)text
{
if (!text.length) {
return nil;
}
NSInteger len = text.length;
NSTextCheckingResult *matchResult = [[self validReplyRegexp] firstMatchInString:text options:(NSMatchingWithoutAnchoringBounds | NSMatchingAnchored) range:NSMakeRange(0, len)];
if (!matchResult || matchResult.numberOfRanges < 2) {
return nil;
}
NSRange replyRange = [matchResult rangeAtIndex:1];
NSInteger replyEnd = NSMaxRange(replyRange);
NSRange endMentionRange = [[self endMentionRegexp] rangeOfFirstMatchInString:text options:0 range:NSMakeRange(replyEnd, len - replyEnd)];
if (endMentionRange.location != NSNotFound) {
return nil;
}
return [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityScreenName range:replyRange];
}
+ (NSInteger)tweetLength:(NSString*)text
{
return [self tweetLength:text httpURLLength:HTTPShortURLLength httpsURLLength:HTTPSShortURLLength];
}
+ (NSInteger)tweetLength:(NSString*)text httpURLLength:(NSInteger)httpURLLength httpsURLLength:(NSInteger)httpsURLLength
{
text = [text precomposedStringWithCanonicalMapping];
if (!text.length) {
return 0;
}
// Remove URLs from text and add t.co length
NSMutableString *string = [text mutableCopy];
#if !__has_feature(objc_arc)
[string autorelease];
#endif
int urlLengthOffset = 0;
NSArray *urlEntities = [self URLsInText:text];
for (NSInteger i=urlEntities.count-1; i>=0; i--) {
OSKTwitterTextEntity *entity = [urlEntities objectAtIndex:i];
NSRange urlRange = entity.range;
NSString *url = [string substringWithRange:urlRange];
if ([url rangeOfString:@"https" options:(NSCaseInsensitiveSearch | NSAnchoredSearch)].location == 0) {
urlLengthOffset += httpsURLLength;
} else {
urlLengthOffset += httpURLLength;
}
[string deleteCharactersInRange:urlRange];
}
NSInteger len = string.length;
NSInteger charCount = len + urlLengthOffset;
if (len > 0) {
// Adjust count for non-BMP characters
UniChar buffer[len];
[string getCharacters:buffer range:NSMakeRange(0, len)];
for (int i=0; i<len; i++) {
UniChar c = buffer[i];
if (CFStringIsSurrogateHighCharacter(c)) {
if (i+1 < len) {
UniChar d = buffer[i+1];
if (CFStringIsSurrogateLowCharacter(d)) {
charCount--;
i++;
}
}
}
}
}
return charCount;
}
+ (NSInteger)remainingCharacterCount:(NSString*)text
{
return [self remainingCharacterCount:text httpURLLength:HTTPShortURLLength httpsURLLength:HTTPSShortURLLength];
}
+ (NSInteger)remainingCharacterCount:(NSString*)text httpURLLength:(NSInteger)httpURLLength httpsURLLength:(NSInteger)httpsURLLength
{
return MaxTweetLength - [self tweetLength:text httpURLLength:httpURLLength httpsURLLength:httpsURLLength];
}
#pragma mark - Regular Expressions and CharacterSet
+ (NSRegularExpression*)validURLRegexp
{
static NSRegularExpression *validURLRegexp;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
validURLRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidURL options:NSRegularExpressionCaseInsensitive error:NULL];
});
return validURLRegexp;
}
+ (NSRegularExpression*)validASCIIDomainRegexp
{
static NSRegularExpression *validASCIIDomainRegexp;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
validASCIIDomainRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidASCIIDomain options:NSRegularExpressionCaseInsensitive error:NULL];
});
return validASCIIDomainRegexp;
}
+ (NSRegularExpression*)invalidShortDomainRegexp
{
static NSRegularExpression *invalidShortDomainRegexp;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
invalidShortDomainRegexp = [[NSRegularExpression alloc] initWithPattern:TWUInvalidShortDomain options:NSRegularExpressionCaseInsensitive error:NULL];
});
return invalidShortDomainRegexp;
}
+ (NSRegularExpression*)validTCOURLRegexp
{
static NSRegularExpression *validTCOURLRegexp;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
validTCOURLRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidTCOURL options:NSRegularExpressionCaseInsensitive error:NULL];
});
return validTCOURLRegexp;
}
+ (NSRegularExpression*)validHashtagRegexp
{
static NSRegularExpression *validHashtagRegexp;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
validHashtagRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidHashtag options:NSRegularExpressionCaseInsensitive error:NULL];
});
return validHashtagRegexp;
}
+ (NSRegularExpression*)endHashtagRegexp
{
static NSRegularExpression *endHashtagRegexp;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
endHashtagRegexp = [[NSRegularExpression alloc] initWithPattern:TWUEndHashTagMatch options:NSRegularExpressionCaseInsensitive error:NULL];
});
return endHashtagRegexp;
}
+ (NSRegularExpression*)validCashtagRegexp
{
static NSRegularExpression *validCashtagRegexp;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
validCashtagRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidCashtag options:NSRegularExpressionCaseInsensitive error:NULL];
});
return validCashtagRegexp;
}
+ (NSRegularExpression*)validMentionOrListRegexp
{
static NSRegularExpression *validMentionOrListRegexp;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
validMentionOrListRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidMentionOrList options:NSRegularExpressionCaseInsensitive error:NULL];
});
return validMentionOrListRegexp;
}
+ (NSRegularExpression*)validReplyRegexp
{
static NSRegularExpression *validReplyRegexp;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
validReplyRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidReply options:NSRegularExpressionCaseInsensitive error:NULL];
});
return validReplyRegexp;
}
+ (NSRegularExpression*)endMentionRegexp
{
static NSRegularExpression *endMentionRegexp;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
endMentionRegexp = [[NSRegularExpression alloc] initWithPattern:TWUEndMentionMatch options:NSRegularExpressionCaseInsensitive error:NULL];
});
return endMentionRegexp;
}
+ (NSCharacterSet*)invalidURLWithoutProtocolPrecedingCharSet
{
static NSCharacterSet *invalidURLWithoutProtocolPrecedingCharSet;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
invalidURLWithoutProtocolPrecedingCharSet = [NSCharacterSet characterSetWithCharactersInString:@"-_./"];
#if !__has_feature(objc_arc)
[invalidURLWithoutProtocolPrecedingCharSet retain];
#endif
});
return invalidURLWithoutProtocolPrecedingCharSet;
}
@end