mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-21 05:45:13 +00:00
799 lines
28 KiB
Objective-C
Executable file
799 lines
28 KiB
Objective-C
Executable file
//
|
||
// OSKTwitterText.m
|
||
//
|
||
// Copyright 2012 Twitter, Inc.
|
||
//
|
||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
// you may not use this file except in compliance with the License.
|
||
// You may obtain a copy of the License at
|
||
//
|
||
// http://www.apache.org/licenses/LICENSE-2.0
|
||
//
|
||
|
||
#import "OSKTwitterText.h"
|
||
|
||
//
|
||
// These regular expressions are ported from twitter-text-rb on Apr 24 2012.
|
||
//
|
||
|
||
#define TWUControlCharacters @"\\u0009-\\u000D"
|
||
#define TWUSpace @"\\u0020"
|
||
#define TWUControl85 @"\\u0085"
|
||
#define TWUNoBreakSpace @"\\u00A0"
|
||
#define TWUOghamBreakSpace @"\\u1680"
|
||
#define TWUMongolianVowelSeparator @"\\u180E"
|
||
#define TWUWhiteSpaces @"\\u2000-\\u200A"
|
||
#define TWULineSeparator @"\\u2028"
|
||
#define TWUParagraphSeparator @"\\u2029"
|
||
#define TWUNarrowNoBreakSpace @"\\u202F"
|
||
#define TWUMediumMathematicalSpace @"\\u205F"
|
||
#define TWUIdeographicSpace @"\\u3000"
|
||
|
||
#define TWUUnicodeSpaces \
|
||
TWUControlCharacters \
|
||
TWUSpace \
|
||
TWUControl85 \
|
||
TWUNoBreakSpace \
|
||
TWUOghamBreakSpace \
|
||
TWUMongolianVowelSeparator \
|
||
TWUWhiteSpaces \
|
||
TWULineSeparator \
|
||
TWUParagraphSeparator \
|
||
TWUNarrowNoBreakSpace \
|
||
TWUMediumMathematicalSpace \
|
||
TWUIdeographicSpace
|
||
|
||
#define TWUInvalidCharacters @"\\uFFFE\\uFEFF\\uFFFF\\u202A-\\u202E"
|
||
|
||
#define TWULatinAccents \
|
||
@"\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u00FF\\u0100-\\u024F\\u0253-\\u0254\\u0256-\\u0257\\u0259\\u025b\\u0263\\u0268\\u026F\\u0272\\u0289\\u02BB\\u1E00-\\u1EFF"
|
||
|
||
//
|
||
// Hashtag
|
||
//
|
||
|
||
#define TWUCyrillicHashtagChars @"\\u0400-\\u04FF"
|
||
#define TWUCyrillicSupplementHashtagChars @"\\u0500-\\u0527"
|
||
#define TWUCyrillicExtendedAHashtagChars @"\\u2DE0-\\u2DFF"
|
||
#define TWUCyrillicExtendedBHashtagChars @"\\uA640-\\uA69F"
|
||
#define TWUHebrewHashtagChars @"\\u0591-\\u05BF\\u05C1-\\u05C2\\u05C4-\\u05C5\\u05C7\\u05D0-\\u05EA\\u05F0-\\u05F4"
|
||
#define TWUHebrewPresentationFormsHashtagChars @"\\uFB12-\\uFB28\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40-\\uFB41\\uFB43-\\uFB44\\uFB46-\\uFB4F"
|
||
#define TWUArabicHashtagChars @"\\u0610-\\u061A\\u0620-\\u065F\\u066E-\\u06D3\\u06D5-\\u06DC\\u06DE-\\u06E8\\u06EA-\\u06EF\\u06FA-\\u06FC\\u06FF"
|
||
#define TWUArabicSupplementHashtagChars @"\\u0750-\\u077F"
|
||
#define TWUArabicExtendedAHashtagChars @"\\u08A0\\u08A2-\\u08AC\\u08E4-\\u08FE"
|
||
#define TWUArabicPresentationFormsAHashtagChars @"\\uFB50-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFB"
|
||
#define TWUArabicPresentationFormsBHashtagChars @"\\uFE70-\\uFE74\\uFE76-\\uFEFC"
|
||
#define TWUZeroWidthNonJoiner @"\\u200C"
|
||
#define TWUThaiHashtagChars @"\\u0E01-\\u0E3A"
|
||
#define TWUHangulHashtagChars @"\\u0E40-\\u0E4E"
|
||
#define TWUHangulJamoHashtagChars @"\\u1100-\\u11FF"
|
||
#define TWUHangulCompatibilityJamoHashtagChars @"\\u3130-\\u3185"
|
||
#define TWUHangulJamoExtendedAHashtagChars @"\\uA960-\\uA97F"
|
||
#define TWUHangulSyllablesHashtagChars @"\\uAC00-\\uD7AF"
|
||
#define TWUHangulJamoExtendedBHashtagChars @"\\uD7B0-\\uD7FF"
|
||
#define TWUHalfWidthHangulHashtagChars @"\\uFFA1-\\uFFDC"
|
||
|
||
#define TWUNonLatinHashtagChars \
|
||
TWUCyrillicHashtagChars \
|
||
TWUCyrillicSupplementHashtagChars \
|
||
TWUCyrillicExtendedAHashtagChars \
|
||
TWUCyrillicExtendedBHashtagChars \
|
||
TWUHebrewHashtagChars \
|
||
TWUHebrewPresentationFormsHashtagChars \
|
||
TWUArabicHashtagChars \
|
||
TWUArabicSupplementHashtagChars \
|
||
TWUArabicExtendedAHashtagChars \
|
||
TWUArabicPresentationFormsAHashtagChars \
|
||
TWUArabicPresentationFormsBHashtagChars \
|
||
TWUZeroWidthNonJoiner \
|
||
TWUThaiHashtagChars \
|
||
TWUHangulHashtagChars \
|
||
TWUHangulJamoHashtagChars \
|
||
TWUHangulCompatibilityJamoHashtagChars \
|
||
TWUHangulJamoExtendedAHashtagChars \
|
||
TWUHangulSyllablesHashtagChars \
|
||
TWUHangulJamoExtendedBHashtagChars \
|
||
TWUHalfWidthHangulHashtagChars
|
||
|
||
#define TWUKatakanaHashtagChars @"\\u30A1-\\u30FA\\u30FC-\\u30FE"
|
||
#define TWUKatakanaHalfWidthHashtagChars @"\\uFF66-\\uFF9F"
|
||
#define TWULatinFullWidthHashtagChars @"\\uFF10-\\uFF19\\uFF21-\\uFF3A\\uFF41-\\uFF5A"
|
||
#define TWUHiraganaHashtagChars @"\\u3041-\\u3096\\u3099-\\u309E"
|
||
#define TWUCJKExtensionAHashtagChars @"\\u3400-\\u4DBF"
|
||
#define TWUCJKUnifiedHashtagChars @"\\u4E00-\\u9FFF"
|
||
#define TWUCJKExtensionBHashtagChars @"\\U00020000-\\U0002A6DF"
|
||
#define TWUCJKExtensionCHashtagChars @"\\U0002A700-\\U0002B73F"
|
||
#define TWUCJKExtensionDHashtagChars @"\\U0002B740-\\U0002B81F"
|
||
#define TWUCJKSupplementHashtagChars @"\\U0002F800-\\U0002FA1F\\u3003\\u3005\\u303B"
|
||
|
||
#define TWUCJKHashtagCharacters \
|
||
TWUKatakanaHashtagChars \
|
||
TWUKatakanaHalfWidthHashtagChars \
|
||
TWULatinFullWidthHashtagChars \
|
||
TWUHiraganaHashtagChars \
|
||
TWUCJKExtensionAHashtagChars \
|
||
TWUCJKUnifiedHashtagChars \
|
||
TWUCJKExtensionBHashtagChars \
|
||
TWUCJKExtensionCHashtagChars \
|
||
TWUCJKExtensionDHashtagChars \
|
||
TWUCJKSupplementHashtagChars
|
||
|
||
#define TWUPunctuationChars @"\\-_!\"#$%&'()*+,./:;<=>?@\\[\\]^`{|}~"
|
||
#define TWUPunctuationCharsWithoutHyphen @"_!\"#$%&'()*+,./:;<=>?@\\[\\]^`{|}~"
|
||
#define TWUPunctuationCharsWithoutHyphenAndUnderscore @"!\"#$%&'()*+,./:;<=>?@\\[\\]^`{|}~"
|
||
#define TWUCtrlChars @"\\x00-\\x1F\\x7F"
|
||
|
||
#define TWHashtagAlpha \
|
||
@"[a-z_" \
|
||
TWULatinAccents \
|
||
TWUNonLatinHashtagChars \
|
||
TWUCJKHashtagCharacters \
|
||
@"]"
|
||
|
||
#define TWUHashtagAlphanumeric \
|
||
@"[a-z0-9_" \
|
||
TWULatinAccents \
|
||
TWUNonLatinHashtagChars \
|
||
TWUCJKHashtagCharacters \
|
||
@"]"
|
||
|
||
#define TWUHashtagBoundary \
|
||
@"^|$|[^&a-z0-9_" \
|
||
TWULatinAccents \
|
||
TWUNonLatinHashtagChars \
|
||
TWUCJKHashtagCharacters \
|
||
@"]"
|
||
|
||
#define TWUValidHashtag \
|
||
@"(?:" TWUHashtagBoundary @")([##]" TWUHashtagAlphanumeric @"*" TWHashtagAlpha TWUHashtagAlphanumeric @"*)"
|
||
|
||
#define TWUEndHashTagMatch @"\\A(?:[##]|://)"
|
||
|
||
//
|
||
// Cashtag
|
||
//
|
||
|
||
#define TWUCashtag @"[a-z]{1,6}(?:[._][a-z]{1,2})?"
|
||
#define TWUValidCashtag \
|
||
@"(?:^|[" TWUUnicodeSpaces @"])" \
|
||
@"(\\$" TWUCashtag @")" \
|
||
@"(?=$|\\s|[" TWUPunctuationChars @"])"
|
||
|
||
//
|
||
// Mention and list name
|
||
//
|
||
|
||
#define TWUValidMentionPrecedingChars @"(?:[^a-zA-Z0-9_!#$%&*@@]|^|RT:?)"
|
||
#define TWUAtSigns @"[@@]"
|
||
#define TWUValidUsername @"\\A" TWUAtSigns @"[a-zA-Z0-9_]{1,20}\\z"
|
||
#define TWUValidList @"\\A" TWUAtSigns @"[a-zA-Z0-9_]{1,20}/[a-zA-Z][a-zA-Z0-9_\\-]{0,24}\\z"
|
||
|
||
#define TWUValidMentionOrList \
|
||
@"(" TWUValidMentionPrecedingChars @")" \
|
||
@"(" TWUAtSigns @")" \
|
||
@"([a-zA-Z0-9_]{1,20})" \
|
||
@"(/[a-zA-Z][a-zA-Z0-9_\\-]{0,24})?"
|
||
|
||
#define TWUValidReply @"\\A(?:[" TWUUnicodeSpaces @"])*" TWUAtSigns @"([a-zA-Z0-9_]{1,20})"
|
||
#define TWUEndMentionMatch @"\\A(?:" TWUAtSigns @"|[" TWULatinAccents @"]|://)"
|
||
|
||
//
|
||
// URL
|
||
//
|
||
|
||
#define TWUValidURLPrecedingChars @"(?:[^a-zA-Z0-9@@$##" TWUInvalidCharacters @"]|^)"
|
||
|
||
#define TWUDomainValidStartEndChars \
|
||
@"[^" \
|
||
TWUPunctuationChars \
|
||
TWUCtrlChars \
|
||
TWUInvalidCharacters \
|
||
TWUUnicodeSpaces \
|
||
@"]"
|
||
|
||
#define TWUSubdomainValidMiddleChars \
|
||
@"[^" \
|
||
TWUPunctuationCharsWithoutHyphenAndUnderscore \
|
||
TWUCtrlChars \
|
||
TWUInvalidCharacters \
|
||
TWUUnicodeSpaces \
|
||
@"]"
|
||
|
||
#define TWUDomainValidMiddleChars \
|
||
@"[^" \
|
||
TWUPunctuationCharsWithoutHyphen \
|
||
TWUCtrlChars \
|
||
TWUInvalidCharacters \
|
||
TWUUnicodeSpaces \
|
||
@"]"
|
||
|
||
#define TWUValidSubdomain \
|
||
@"(?:" \
|
||
@"(?:" TWUDomainValidStartEndChars TWUSubdomainValidMiddleChars @"*)?" TWUDomainValidStartEndChars @"\\." \
|
||
@")"
|
||
|
||
#define TWUValidDomainName \
|
||
@"(?:" \
|
||
@"(?:" TWUDomainValidStartEndChars TWUDomainValidMiddleChars @"*)?" TWUDomainValidStartEndChars @"\\." \
|
||
@")"
|
||
|
||
#define TWUValidGTLD @"(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|xxx)(?=[^0-9a-z]|$))"
|
||
#define TWUValidCCTLD \
|
||
@"(?:" \
|
||
@"(?:" \
|
||
@"ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|" \
|
||
@"bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|" \
|
||
@"cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|" \
|
||
@"fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|" \
|
||
@"ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|" \
|
||
@"ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|" \
|
||
@"mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|" \
|
||
@"pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|" \
|
||
@"si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|" \
|
||
@"tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|" \
|
||
@"zw" \
|
||
@")" \
|
||
@"(?=[^0-9a-z]|$)" \
|
||
@")"
|
||
|
||
#define TWUValidPunycode @"(?:xn--[0-9a-z]+)"
|
||
|
||
#define TWUValidDomain \
|
||
@"(?:" \
|
||
TWUValidSubdomain @"*" TWUValidDomainName \
|
||
@"(?:" TWUValidGTLD @"|" TWUValidCCTLD @"|" TWUValidPunycode @")" \
|
||
@")"
|
||
|
||
#define TWUValidASCIIDomain \
|
||
@"(?:[a-zA-Z0-9\\-_" TWULatinAccents @"]+\\.)+" \
|
||
@"(?:" TWUValidGTLD @"|" TWUValidCCTLD @"|" TWUValidPunycode @")" \
|
||
|
||
#define TWUValidTCOURL @"https?://t\\.co/[a-zA-Z0-9]+"
|
||
#define TWUInvalidShortDomain @"\\A" TWUValidDomainName TWUValidCCTLD @"\\z"
|
||
|
||
#define TWUValidPortNumber @"[0-9]+"
|
||
#define TWUValidGeneralURLPathChars @"[a-zA-Z0-9!\\*';:=+,.$/%#\\[\\]\\-_~&|@" TWULatinAccents @"]"
|
||
|
||
#define TWUValidURLBalancedParens @"\\(" TWUValidGeneralURLPathChars @"+\\)"
|
||
#define TWUValidURLPathEndingChars @"[a-zA-Z0-9=_#/+\\-" TWULatinAccents @"]|(?:" TWUValidURLBalancedParens @")"
|
||
|
||
#define TWUValidURLPath \
|
||
@"(?:" \
|
||
@"(?:" \
|
||
TWUValidGeneralURLPathChars @"*" \
|
||
@"(?:" TWUValidURLBalancedParens TWUValidGeneralURLPathChars @"*)*" TWUValidURLPathEndingChars \
|
||
@")" \
|
||
@"|" \
|
||
@"(?:" TWUValidGeneralURLPathChars @"+/)" \
|
||
@")"
|
||
|
||
#define TWUValidURLQueryChars @"[a-zA-Z0-9!?*'\\(\\);:&=+$/%#\\[\\]\\-_\\.,~|@]"
|
||
#define TWUValidURLQueryEndingChars @"[a-zA-Z0-9_&=#/]"
|
||
|
||
#define TWUValidURL \
|
||
@"(" \
|
||
@"(" TWUValidURLPrecedingChars @")" \
|
||
@"(" \
|
||
@"(https?://)?" \
|
||
@"(" TWUValidDomain @")" \
|
||
@"(?::(" TWUValidPortNumber @"))?" \
|
||
@"(/" TWUValidURLPath @"*)?" \
|
||
@"(\\?" TWUValidURLQueryChars @"*" TWUValidURLQueryEndingChars @")?" \
|
||
@")" \
|
||
@")"
|
||
|
||
static const NSInteger MaxTweetLength = 140;
|
||
static const NSInteger HTTPShortURLLength = 22;
|
||
static const NSInteger HTTPSShortURLLength = 23;
|
||
|
||
@implementation OSKTwitterText
|
||
|
||
+ (NSArray*)entitiesInText:(NSString*)text
|
||
{
|
||
if (!text.length) {
|
||
return [NSArray array];
|
||
}
|
||
|
||
NSMutableArray *results = [NSMutableArray array];
|
||
|
||
NSArray *urls = [self URLsInText:text];
|
||
[results addObjectsFromArray:urls];
|
||
|
||
NSArray *hashtags = [self hashtagsInText:text withURLEntities:urls];
|
||
[results addObjectsFromArray:hashtags];
|
||
|
||
NSArray *cashtags = [self symbolsInText:text withURLEntities:urls];
|
||
[results addObjectsFromArray:cashtags];
|
||
|
||
NSArray *mentionsAndLists = [self mentionsOrListsInText:text];
|
||
NSMutableArray *addingItems = [NSMutableArray array];
|
||
|
||
for (OSKTwitterTextEntity *entity in mentionsAndLists) {
|
||
NSRange entityRange = entity.range;
|
||
BOOL found = NO;
|
||
for (OSKTwitterTextEntity *existingEntity in results) {
|
||
if (NSIntersectionRange(existingEntity.range, entityRange).length > 0) {
|
||
found = YES;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (!found) {
|
||
[addingItems addObject:entity];
|
||
}
|
||
}
|
||
|
||
[results addObjectsFromArray:addingItems];
|
||
[results sortUsingSelector:@selector(compare:)];
|
||
|
||
return results;
|
||
}
|
||
|
||
+ (NSArray*)URLsInText:(NSString*)text
|
||
{
|
||
if (!text.length) {
|
||
return [NSArray array];
|
||
}
|
||
|
||
NSMutableArray *results = [NSMutableArray array];
|
||
NSInteger len = text.length;
|
||
NSInteger position = 0;
|
||
NSRange allRange = NSMakeRange(0, 0);
|
||
|
||
while (1) {
|
||
position = NSMaxRange(allRange);
|
||
NSTextCheckingResult *urlResult = [[self validURLRegexp] firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
|
||
if (!urlResult || urlResult.numberOfRanges < 9) {
|
||
break;
|
||
}
|
||
|
||
allRange = urlResult.range;
|
||
NSRange precedingRange = [urlResult rangeAtIndex:2];
|
||
NSRange urlRange = [urlResult rangeAtIndex:3];
|
||
NSRange protocolRange = [urlResult rangeAtIndex:4];
|
||
NSRange domainRange = [urlResult rangeAtIndex:5];
|
||
|
||
// If protocol is missing and domain contains non-ASCII characters,
|
||
// extract ASCII-only domains.
|
||
if (protocolRange.location == NSNotFound) {
|
||
if (precedingRange.location != NSNotFound && precedingRange.length > 0) {
|
||
NSString *preceding = [text substringWithRange:precedingRange];
|
||
NSRange suffixRange = [preceding rangeOfCharacterFromSet:[self invalidURLWithoutProtocolPrecedingCharSet] options:NSBackwardsSearch | NSAnchoredSearch];
|
||
if (suffixRange.location != NSNotFound) {
|
||
continue;
|
||
}
|
||
}
|
||
|
||
NSInteger domainStart = domainRange.location;
|
||
NSInteger domainEnd = NSMaxRange(domainRange);
|
||
OSKTwitterTextEntity *lastEntity = nil;
|
||
BOOL lastInvalidShortResult = NO;
|
||
|
||
while (domainStart < domainEnd) {
|
||
NSTextCheckingResult *asciiResult = [[self validASCIIDomainRegexp] firstMatchInString:text options:0 range:NSMakeRange(domainStart, domainEnd - domainStart)];
|
||
if (!asciiResult) {
|
||
break;
|
||
}
|
||
|
||
urlRange = asciiResult.range;
|
||
lastEntity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityURL range:urlRange];
|
||
|
||
NSTextCheckingResult *invalidShortResult = [[self invalidShortDomainRegexp] firstMatchInString:text options:0 range:urlRange];
|
||
lastInvalidShortResult = (invalidShortResult != nil);
|
||
if (!lastInvalidShortResult) {
|
||
[results addObject:lastEntity];
|
||
}
|
||
|
||
domainStart = NSMaxRange(urlRange);
|
||
}
|
||
|
||
if (!lastEntity) {
|
||
continue;
|
||
}
|
||
|
||
NSRange pathRange = [urlResult rangeAtIndex:7];
|
||
if (pathRange.location != NSNotFound && NSMaxRange(lastEntity.range) == pathRange.location) {
|
||
if (lastInvalidShortResult) {
|
||
[results addObject:lastEntity];
|
||
}
|
||
NSRange entityRange = lastEntity.range;
|
||
entityRange.length += pathRange.length;
|
||
lastEntity.range = entityRange;
|
||
}
|
||
|
||
} else {
|
||
// In the case of t.co URLs, don't allow additional path characters
|
||
NSRange tcoRange = [[self validTCOURLRegexp] rangeOfFirstMatchInString:text options:0 range:urlRange];
|
||
if (tcoRange.location != NSNotFound) {
|
||
urlRange.length = tcoRange.length;
|
||
}
|
||
|
||
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityURL range:urlRange];
|
||
[results addObject:entity];
|
||
}
|
||
}
|
||
|
||
return results;
|
||
}
|
||
|
||
+ (NSArray*)hashtagsInText:(NSString*)text checkingURLOverlap:(BOOL)checkingURLOverlap
|
||
{
|
||
if (!text.length) {
|
||
return [NSArray array];
|
||
}
|
||
|
||
NSArray *urls = nil;
|
||
if (checkingURLOverlap) {
|
||
urls = [self URLsInText:text];
|
||
}
|
||
return [self hashtagsInText:text withURLEntities:urls];
|
||
}
|
||
|
||
+ (NSArray*)hashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities
|
||
{
|
||
if (!text.length) {
|
||
return [NSArray array];
|
||
}
|
||
|
||
NSMutableArray *results = [NSMutableArray array];
|
||
NSInteger len = text.length;
|
||
NSInteger position = 0;
|
||
|
||
while (1) {
|
||
NSTextCheckingResult *matchResult = [[self validHashtagRegexp] firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
|
||
if (!matchResult || matchResult.numberOfRanges < 2) {
|
||
break;
|
||
}
|
||
|
||
NSRange hashtagRange = [matchResult rangeAtIndex:1];
|
||
BOOL matchOk = YES;
|
||
|
||
// Check URL overlap
|
||
for (OSKTwitterTextEntity *urlEntity in urlEntities) {
|
||
if (NSIntersectionRange(urlEntity.range, hashtagRange).length > 0) {
|
||
matchOk = NO;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (matchOk) {
|
||
NSInteger afterStart = NSMaxRange(hashtagRange);
|
||
if (afterStart < len) {
|
||
NSRange endMatchRange = [[self endHashtagRegexp] rangeOfFirstMatchInString:text options:0 range:NSMakeRange(afterStart, len - afterStart)];
|
||
if (endMatchRange.location != NSNotFound) {
|
||
matchOk = NO;
|
||
}
|
||
}
|
||
|
||
if (matchOk) {
|
||
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityHashtag range:hashtagRange];
|
||
[results addObject:entity];
|
||
}
|
||
}
|
||
|
||
position = NSMaxRange(matchResult.range);
|
||
}
|
||
|
||
return results;
|
||
}
|
||
|
||
+ (NSArray*)symbolsInText:(NSString*)text checkingURLOverlap:(BOOL)checkingURLOverlap
|
||
{
|
||
if (!text.length) {
|
||
return [NSArray array];
|
||
}
|
||
|
||
NSArray *urls = nil;
|
||
if (checkingURLOverlap) {
|
||
urls = [self URLsInText:text];
|
||
}
|
||
return [self symbolsInText:text withURLEntities:urls];
|
||
}
|
||
|
||
+ (NSArray*)symbolsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities
|
||
{
|
||
if (!text.length) {
|
||
return [NSArray array];
|
||
}
|
||
|
||
NSMutableArray *results = [NSMutableArray array];
|
||
NSInteger len = text.length;
|
||
NSInteger position = 0;
|
||
|
||
while (1) {
|
||
NSTextCheckingResult *matchResult = [[self validCashtagRegexp] firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
|
||
if (!matchResult || matchResult.numberOfRanges < 2) {
|
||
break;
|
||
}
|
||
|
||
NSRange symbolRange = [matchResult rangeAtIndex:1];
|
||
BOOL matchOk = YES;
|
||
|
||
// Check URL overlap
|
||
for (OSKTwitterTextEntity *urlEntity in urlEntities) {
|
||
if (NSIntersectionRange(urlEntity.range, symbolRange).length > 0) {
|
||
matchOk = NO;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (matchOk) {
|
||
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntitySymbol range:symbolRange];
|
||
[results addObject:entity];
|
||
}
|
||
|
||
position = NSMaxRange(matchResult.range);
|
||
}
|
||
|
||
return results;
|
||
}
|
||
|
||
+ (NSArray*)mentionedScreenNamesInText:(NSString*)text
|
||
{
|
||
if (!text.length) {
|
||
return [NSArray array];
|
||
}
|
||
|
||
NSArray *mentionsOrLists = [self mentionsOrListsInText:text];
|
||
NSMutableArray *results = [NSMutableArray array];
|
||
|
||
for (OSKTwitterTextEntity *entity in mentionsOrLists) {
|
||
if (entity.type == OSKTwitterTextEntityScreenName) {
|
||
[results addObject:entity];
|
||
}
|
||
}
|
||
|
||
return results;
|
||
}
|
||
|
||
+ (NSArray*)mentionsOrListsInText:(NSString*)text
|
||
{
|
||
if (!text.length) {
|
||
return [NSArray array];
|
||
}
|
||
|
||
NSMutableArray *results = [NSMutableArray array];
|
||
NSInteger len = text.length;
|
||
NSInteger position = 0;
|
||
|
||
while (1) {
|
||
NSTextCheckingResult *matchResult = [[self validMentionOrListRegexp] firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
|
||
if (!matchResult || matchResult.numberOfRanges < 5) {
|
||
break;
|
||
}
|
||
|
||
NSRange allRange = matchResult.range;
|
||
NSInteger end = NSMaxRange(allRange);
|
||
|
||
NSRange endMentionRange = [[self endMentionRegexp] rangeOfFirstMatchInString:text options:0 range:NSMakeRange(end, len - end)];
|
||
if (endMentionRange.location == NSNotFound) {
|
||
NSRange atSignRange = [matchResult rangeAtIndex:2];
|
||
NSRange screenNameRange = [matchResult rangeAtIndex:3];
|
||
NSRange listNameRange = [matchResult rangeAtIndex:4];
|
||
|
||
if (listNameRange.location == NSNotFound) {
|
||
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityScreenName range:NSMakeRange(atSignRange.location, NSMaxRange(screenNameRange) - atSignRange.location)];
|
||
[results addObject:entity];
|
||
} else {
|
||
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityListName range:NSMakeRange(atSignRange.location, NSMaxRange(listNameRange) - atSignRange.location)];
|
||
[results addObject:entity];
|
||
}
|
||
} else {
|
||
// Avoid matching the second username in @username@username
|
||
end++;
|
||
}
|
||
|
||
position = end;
|
||
}
|
||
|
||
return results;
|
||
}
|
||
|
||
+ (OSKTwitterTextEntity*)repliedScreenNameInText:(NSString*)text
|
||
{
|
||
if (!text.length) {
|
||
return nil;
|
||
}
|
||
|
||
NSInteger len = text.length;
|
||
|
||
NSTextCheckingResult *matchResult = [[self validReplyRegexp] firstMatchInString:text options:(NSMatchingWithoutAnchoringBounds | NSMatchingAnchored) range:NSMakeRange(0, len)];
|
||
if (!matchResult || matchResult.numberOfRanges < 2) {
|
||
return nil;
|
||
}
|
||
|
||
NSRange replyRange = [matchResult rangeAtIndex:1];
|
||
NSInteger replyEnd = NSMaxRange(replyRange);
|
||
|
||
NSRange endMentionRange = [[self endMentionRegexp] rangeOfFirstMatchInString:text options:0 range:NSMakeRange(replyEnd, len - replyEnd)];
|
||
if (endMentionRange.location != NSNotFound) {
|
||
return nil;
|
||
}
|
||
|
||
return [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityScreenName range:replyRange];
|
||
}
|
||
|
||
+ (NSInteger)tweetLength:(NSString*)text
|
||
{
|
||
return [self tweetLength:text httpURLLength:HTTPShortURLLength httpsURLLength:HTTPSShortURLLength];
|
||
}
|
||
|
||
+ (NSInteger)tweetLength:(NSString*)text httpURLLength:(NSInteger)httpURLLength httpsURLLength:(NSInteger)httpsURLLength
|
||
{
|
||
text = [text precomposedStringWithCanonicalMapping];
|
||
|
||
if (!text.length) {
|
||
return 0;
|
||
}
|
||
|
||
// Remove URLs from text and add t.co length
|
||
NSMutableString *string = [text mutableCopy];
|
||
#if !__has_feature(objc_arc)
|
||
[string autorelease];
|
||
#endif
|
||
|
||
int urlLengthOffset = 0;
|
||
NSArray *urlEntities = [self URLsInText:text];
|
||
for (NSInteger i=urlEntities.count-1; i>=0; i--) {
|
||
OSKTwitterTextEntity *entity = [urlEntities objectAtIndex:i];
|
||
NSRange urlRange = entity.range;
|
||
NSString *url = [string substringWithRange:urlRange];
|
||
if ([url rangeOfString:@"https" options:(NSCaseInsensitiveSearch | NSAnchoredSearch)].location == 0) {
|
||
urlLengthOffset += httpsURLLength;
|
||
} else {
|
||
urlLengthOffset += httpURLLength;
|
||
}
|
||
[string deleteCharactersInRange:urlRange];
|
||
}
|
||
|
||
NSInteger len = string.length;
|
||
NSInteger charCount = len + urlLengthOffset;
|
||
|
||
if (len > 0) {
|
||
// Adjust count for non-BMP characters
|
||
UniChar buffer[len];
|
||
[string getCharacters:buffer range:NSMakeRange(0, len)];
|
||
|
||
for (int i=0; i<len; i++) {
|
||
UniChar c = buffer[i];
|
||
if (CFStringIsSurrogateHighCharacter(c)) {
|
||
if (i+1 < len) {
|
||
UniChar d = buffer[i+1];
|
||
if (CFStringIsSurrogateLowCharacter(d)) {
|
||
charCount--;
|
||
i++;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return charCount;
|
||
}
|
||
|
||
+ (NSInteger)remainingCharacterCount:(NSString*)text
|
||
{
|
||
return [self remainingCharacterCount:text httpURLLength:HTTPShortURLLength httpsURLLength:HTTPSShortURLLength];
|
||
}
|
||
|
||
+ (NSInteger)remainingCharacterCount:(NSString*)text httpURLLength:(NSInteger)httpURLLength httpsURLLength:(NSInteger)httpsURLLength
|
||
{
|
||
return MaxTweetLength - [self tweetLength:text httpURLLength:httpURLLength httpsURLLength:httpsURLLength];
|
||
}
|
||
|
||
#pragma mark - Regular Expressions and CharacterSet
|
||
|
||
+ (NSRegularExpression*)validURLRegexp
|
||
{
|
||
static NSRegularExpression *validURLRegexp;
|
||
static dispatch_once_t onceToken;
|
||
dispatch_once(&onceToken, ^{
|
||
validURLRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidURL options:NSRegularExpressionCaseInsensitive error:NULL];
|
||
});
|
||
return validURLRegexp;
|
||
}
|
||
|
||
+ (NSRegularExpression*)validASCIIDomainRegexp
|
||
{
|
||
static NSRegularExpression *validASCIIDomainRegexp;
|
||
static dispatch_once_t onceToken;
|
||
dispatch_once(&onceToken, ^{
|
||
validASCIIDomainRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidASCIIDomain options:NSRegularExpressionCaseInsensitive error:NULL];
|
||
});
|
||
return validASCIIDomainRegexp;
|
||
}
|
||
|
||
+ (NSRegularExpression*)invalidShortDomainRegexp
|
||
{
|
||
static NSRegularExpression *invalidShortDomainRegexp;
|
||
static dispatch_once_t onceToken;
|
||
dispatch_once(&onceToken, ^{
|
||
invalidShortDomainRegexp = [[NSRegularExpression alloc] initWithPattern:TWUInvalidShortDomain options:NSRegularExpressionCaseInsensitive error:NULL];
|
||
});
|
||
return invalidShortDomainRegexp;
|
||
}
|
||
|
||
+ (NSRegularExpression*)validTCOURLRegexp
|
||
{
|
||
static NSRegularExpression *validTCOURLRegexp;
|
||
static dispatch_once_t onceToken;
|
||
dispatch_once(&onceToken, ^{
|
||
validTCOURLRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidTCOURL options:NSRegularExpressionCaseInsensitive error:NULL];
|
||
});
|
||
return validTCOURLRegexp;
|
||
}
|
||
|
||
+ (NSRegularExpression*)validHashtagRegexp
|
||
{
|
||
static NSRegularExpression *validHashtagRegexp;
|
||
static dispatch_once_t onceToken;
|
||
dispatch_once(&onceToken, ^{
|
||
validHashtagRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidHashtag options:NSRegularExpressionCaseInsensitive error:NULL];
|
||
});
|
||
return validHashtagRegexp;
|
||
}
|
||
|
||
+ (NSRegularExpression*)endHashtagRegexp
|
||
{
|
||
static NSRegularExpression *endHashtagRegexp;
|
||
static dispatch_once_t onceToken;
|
||
dispatch_once(&onceToken, ^{
|
||
endHashtagRegexp = [[NSRegularExpression alloc] initWithPattern:TWUEndHashTagMatch options:NSRegularExpressionCaseInsensitive error:NULL];
|
||
});
|
||
return endHashtagRegexp;
|
||
}
|
||
|
||
+ (NSRegularExpression*)validCashtagRegexp
|
||
{
|
||
static NSRegularExpression *validCashtagRegexp;
|
||
static dispatch_once_t onceToken;
|
||
dispatch_once(&onceToken, ^{
|
||
validCashtagRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidCashtag options:NSRegularExpressionCaseInsensitive error:NULL];
|
||
});
|
||
return validCashtagRegexp;
|
||
}
|
||
|
||
+ (NSRegularExpression*)validMentionOrListRegexp
|
||
{
|
||
static NSRegularExpression *validMentionOrListRegexp;
|
||
static dispatch_once_t onceToken;
|
||
dispatch_once(&onceToken, ^{
|
||
validMentionOrListRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidMentionOrList options:NSRegularExpressionCaseInsensitive error:NULL];
|
||
});
|
||
return validMentionOrListRegexp;
|
||
}
|
||
|
||
+ (NSRegularExpression*)validReplyRegexp
|
||
{
|
||
static NSRegularExpression *validReplyRegexp;
|
||
static dispatch_once_t onceToken;
|
||
dispatch_once(&onceToken, ^{
|
||
validReplyRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidReply options:NSRegularExpressionCaseInsensitive error:NULL];
|
||
});
|
||
return validReplyRegexp;
|
||
}
|
||
|
||
+ (NSRegularExpression*)endMentionRegexp
|
||
{
|
||
static NSRegularExpression *endMentionRegexp;
|
||
static dispatch_once_t onceToken;
|
||
dispatch_once(&onceToken, ^{
|
||
endMentionRegexp = [[NSRegularExpression alloc] initWithPattern:TWUEndMentionMatch options:NSRegularExpressionCaseInsensitive error:NULL];
|
||
});
|
||
return endMentionRegexp;
|
||
}
|
||
|
||
+ (NSCharacterSet*)invalidURLWithoutProtocolPrecedingCharSet
|
||
{
|
||
static NSCharacterSet *invalidURLWithoutProtocolPrecedingCharSet;
|
||
static dispatch_once_t onceToken;
|
||
dispatch_once(&onceToken, ^{
|
||
invalidURLWithoutProtocolPrecedingCharSet = [NSCharacterSet characterSetWithCharactersInString:@"-_./"];
|
||
#if !__has_feature(objc_arc)
|
||
[invalidURLWithoutProtocolPrecedingCharSet retain];
|
||
#endif
|
||
});
|
||
return invalidURLWithoutProtocolPrecedingCharSet;
|
||
}
|
||
|
||
@end
|