mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-31 21:41:33 +00:00
800 lines
28 KiB
Mathematica
800 lines
28 KiB
Mathematica
![]() |
//
|
|||
|
// OSKTwitterText.m
|
|||
|
//
|
|||
|
// Copyright 2012 Twitter, Inc.
|
|||
|
//
|
|||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
|
// you may not use this file except in compliance with the License.
|
|||
|
// You may obtain a copy of the License at
|
|||
|
//
|
|||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
|
//
|
|||
|
|
|||
|
#import "OSKTwitterText.h"
|
|||
|
|
|||
|
//
|
|||
|
// These regular expressions are ported from twitter-text-rb on Apr 24 2012.
|
|||
|
//
|
|||
|
|
|||
|
#define TWUControlCharacters @"\\u0009-\\u000D"
|
|||
|
#define TWUSpace @"\\u0020"
|
|||
|
#define TWUControl85 @"\\u0085"
|
|||
|
#define TWUNoBreakSpace @"\\u00A0"
|
|||
|
#define TWUOghamBreakSpace @"\\u1680"
|
|||
|
#define TWUMongolianVowelSeparator @"\\u180E"
|
|||
|
#define TWUWhiteSpaces @"\\u2000-\\u200A"
|
|||
|
#define TWULineSeparator @"\\u2028"
|
|||
|
#define TWUParagraphSeparator @"\\u2029"
|
|||
|
#define TWUNarrowNoBreakSpace @"\\u202F"
|
|||
|
#define TWUMediumMathematicalSpace @"\\u205F"
|
|||
|
#define TWUIdeographicSpace @"\\u3000"
|
|||
|
|
|||
|
#define TWUUnicodeSpaces \
|
|||
|
TWUControlCharacters \
|
|||
|
TWUSpace \
|
|||
|
TWUControl85 \
|
|||
|
TWUNoBreakSpace \
|
|||
|
TWUOghamBreakSpace \
|
|||
|
TWUMongolianVowelSeparator \
|
|||
|
TWUWhiteSpaces \
|
|||
|
TWULineSeparator \
|
|||
|
TWUParagraphSeparator \
|
|||
|
TWUNarrowNoBreakSpace \
|
|||
|
TWUMediumMathematicalSpace \
|
|||
|
TWUIdeographicSpace
|
|||
|
|
|||
|
#define TWUInvalidCharacters @"\\uFFFE\\uFEFF\\uFFFF\\u202A-\\u202E"
|
|||
|
|
|||
|
#define TWULatinAccents \
|
|||
|
@"\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u00FF\\u0100-\\u024F\\u0253-\\u0254\\u0256-\\u0257\\u0259\\u025b\\u0263\\u0268\\u026F\\u0272\\u0289\\u02BB\\u1E00-\\u1EFF"
|
|||
|
|
|||
|
//
|
|||
|
// Hashtag
|
|||
|
//
|
|||
|
|
|||
|
#define TWUCyrillicHashtagChars @"\\u0400-\\u04FF"
|
|||
|
#define TWUCyrillicSupplementHashtagChars @"\\u0500-\\u0527"
|
|||
|
#define TWUCyrillicExtendedAHashtagChars @"\\u2DE0-\\u2DFF"
|
|||
|
#define TWUCyrillicExtendedBHashtagChars @"\\uA640-\\uA69F"
|
|||
|
#define TWUHebrewHashtagChars @"\\u0591-\\u05BF\\u05C1-\\u05C2\\u05C4-\\u05C5\\u05C7\\u05D0-\\u05EA\\u05F0-\\u05F4"
|
|||
|
#define TWUHebrewPresentationFormsHashtagChars @"\\uFB12-\\uFB28\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40-\\uFB41\\uFB43-\\uFB44\\uFB46-\\uFB4F"
|
|||
|
#define TWUArabicHashtagChars @"\\u0610-\\u061A\\u0620-\\u065F\\u066E-\\u06D3\\u06D5-\\u06DC\\u06DE-\\u06E8\\u06EA-\\u06EF\\u06FA-\\u06FC\\u06FF"
|
|||
|
#define TWUArabicSupplementHashtagChars @"\\u0750-\\u077F"
|
|||
|
#define TWUArabicExtendedAHashtagChars @"\\u08A0\\u08A2-\\u08AC\\u08E4-\\u08FE"
|
|||
|
#define TWUArabicPresentationFormsAHashtagChars @"\\uFB50-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFB"
|
|||
|
#define TWUArabicPresentationFormsBHashtagChars @"\\uFE70-\\uFE74\\uFE76-\\uFEFC"
|
|||
|
#define TWUZeroWidthNonJoiner @"\\u200C"
|
|||
|
#define TWUThaiHashtagChars @"\\u0E01-\\u0E3A"
|
|||
|
#define TWUHangulHashtagChars @"\\u0E40-\\u0E4E"
|
|||
|
#define TWUHangulJamoHashtagChars @"\\u1100-\\u11FF"
|
|||
|
#define TWUHangulCompatibilityJamoHashtagChars @"\\u3130-\\u3185"
|
|||
|
#define TWUHangulJamoExtendedAHashtagChars @"\\uA960-\\uA97F"
|
|||
|
#define TWUHangulSyllablesHashtagChars @"\\uAC00-\\uD7AF"
|
|||
|
#define TWUHangulJamoExtendedBHashtagChars @"\\uD7B0-\\uD7FF"
|
|||
|
#define TWUHalfWidthHangulHashtagChars @"\\uFFA1-\\uFFDC"
|
|||
|
|
|||
|
#define TWUNonLatinHashtagChars \
|
|||
|
TWUCyrillicHashtagChars \
|
|||
|
TWUCyrillicSupplementHashtagChars \
|
|||
|
TWUCyrillicExtendedAHashtagChars \
|
|||
|
TWUCyrillicExtendedBHashtagChars \
|
|||
|
TWUHebrewHashtagChars \
|
|||
|
TWUHebrewPresentationFormsHashtagChars \
|
|||
|
TWUArabicHashtagChars \
|
|||
|
TWUArabicSupplementHashtagChars \
|
|||
|
TWUArabicExtendedAHashtagChars \
|
|||
|
TWUArabicPresentationFormsAHashtagChars \
|
|||
|
TWUArabicPresentationFormsBHashtagChars \
|
|||
|
TWUZeroWidthNonJoiner \
|
|||
|
TWUThaiHashtagChars \
|
|||
|
TWUHangulHashtagChars \
|
|||
|
TWUHangulJamoHashtagChars \
|
|||
|
TWUHangulCompatibilityJamoHashtagChars \
|
|||
|
TWUHangulJamoExtendedAHashtagChars \
|
|||
|
TWUHangulSyllablesHashtagChars \
|
|||
|
TWUHangulJamoExtendedBHashtagChars \
|
|||
|
TWUHalfWidthHangulHashtagChars
|
|||
|
|
|||
|
#define TWUKatakanaHashtagChars @"\\u30A1-\\u30FA\\u30FC-\\u30FE"
|
|||
|
#define TWUKatakanaHalfWidthHashtagChars @"\\uFF66-\\uFF9F"
|
|||
|
#define TWULatinFullWidthHashtagChars @"\\uFF10-\\uFF19\\uFF21-\\uFF3A\\uFF41-\\uFF5A"
|
|||
|
#define TWUHiraganaHashtagChars @"\\u3041-\\u3096\\u3099-\\u309E"
|
|||
|
#define TWUCJKExtensionAHashtagChars @"\\u3400-\\u4DBF"
|
|||
|
#define TWUCJKUnifiedHashtagChars @"\\u4E00-\\u9FFF"
|
|||
|
#define TWUCJKExtensionBHashtagChars @"\\U00020000-\\U0002A6DF"
|
|||
|
#define TWUCJKExtensionCHashtagChars @"\\U0002A700-\\U0002B73F"
|
|||
|
#define TWUCJKExtensionDHashtagChars @"\\U0002B740-\\U0002B81F"
|
|||
|
#define TWUCJKSupplementHashtagChars @"\\U0002F800-\\U0002FA1F\\u3003\\u3005\\u303B"
|
|||
|
|
|||
|
#define TWUCJKHashtagCharacters \
|
|||
|
TWUKatakanaHashtagChars \
|
|||
|
TWUKatakanaHalfWidthHashtagChars \
|
|||
|
TWULatinFullWidthHashtagChars \
|
|||
|
TWUHiraganaHashtagChars \
|
|||
|
TWUCJKExtensionAHashtagChars \
|
|||
|
TWUCJKUnifiedHashtagChars \
|
|||
|
TWUCJKExtensionBHashtagChars \
|
|||
|
TWUCJKExtensionCHashtagChars \
|
|||
|
TWUCJKExtensionDHashtagChars \
|
|||
|
TWUCJKSupplementHashtagChars
|
|||
|
|
|||
|
#define TWUPunctuationChars @"\\-_!\"#$%&'()*+,./:;<=>?@\\[\\]^`{|}~"
|
|||
|
#define TWUPunctuationCharsWithoutHyphen @"_!\"#$%&'()*+,./:;<=>?@\\[\\]^`{|}~"
|
|||
|
#define TWUPunctuationCharsWithoutHyphenAndUnderscore @"!\"#$%&'()*+,./:;<=>?@\\[\\]^`{|}~"
|
|||
|
#define TWUCtrlChars @"\\x00-\\x1F\\x7F"
|
|||
|
|
|||
|
#define TWHashtagAlpha \
|
|||
|
@"[a-z_" \
|
|||
|
TWULatinAccents \
|
|||
|
TWUNonLatinHashtagChars \
|
|||
|
TWUCJKHashtagCharacters \
|
|||
|
@"]"
|
|||
|
|
|||
|
#define TWUHashtagAlphanumeric \
|
|||
|
@"[a-z0-9_" \
|
|||
|
TWULatinAccents \
|
|||
|
TWUNonLatinHashtagChars \
|
|||
|
TWUCJKHashtagCharacters \
|
|||
|
@"]"
|
|||
|
|
|||
|
#define TWUHashtagBoundary \
|
|||
|
@"^|$|[^&a-z0-9_" \
|
|||
|
TWULatinAccents \
|
|||
|
TWUNonLatinHashtagChars \
|
|||
|
TWUCJKHashtagCharacters \
|
|||
|
@"]"
|
|||
|
|
|||
|
#define TWUValidHashtag \
|
|||
|
@"(?:" TWUHashtagBoundary @")([##]" TWUHashtagAlphanumeric @"*" TWHashtagAlpha TWUHashtagAlphanumeric @"*)"
|
|||
|
|
|||
|
#define TWUEndHashTagMatch @"\\A(?:[##]|://)"
|
|||
|
|
|||
|
//
|
|||
|
// Cashtag
|
|||
|
//
|
|||
|
|
|||
|
#define TWUCashtag @"[a-z]{1,6}(?:[._][a-z]{1,2})?"
|
|||
|
#define TWUValidCashtag \
|
|||
|
@"(?:^|[" TWUUnicodeSpaces @"])" \
|
|||
|
@"(\\$" TWUCashtag @")" \
|
|||
|
@"(?=$|\\s|[" TWUPunctuationChars @"])"
|
|||
|
|
|||
|
//
|
|||
|
// Mention and list name
|
|||
|
//
|
|||
|
|
|||
|
#define TWUValidMentionPrecedingChars @"(?:[^a-zA-Z0-9_!#$%&*@@]|^|RT:?)"
|
|||
|
#define TWUAtSigns @"[@@]"
|
|||
|
#define TWUValidUsername @"\\A" TWUAtSigns @"[a-zA-Z0-9_]{1,20}\\z"
|
|||
|
#define TWUValidList @"\\A" TWUAtSigns @"[a-zA-Z0-9_]{1,20}/[a-zA-Z][a-zA-Z0-9_\\-]{0,24}\\z"
|
|||
|
|
|||
|
#define TWUValidMentionOrList \
|
|||
|
@"(" TWUValidMentionPrecedingChars @")" \
|
|||
|
@"(" TWUAtSigns @")" \
|
|||
|
@"([a-zA-Z0-9_]{1,20})" \
|
|||
|
@"(/[a-zA-Z][a-zA-Z0-9_\\-]{0,24})?"
|
|||
|
|
|||
|
#define TWUValidReply @"\\A(?:[" TWUUnicodeSpaces @"])*" TWUAtSigns @"([a-zA-Z0-9_]{1,20})"
|
|||
|
#define TWUEndMentionMatch @"\\A(?:" TWUAtSigns @"|[" TWULatinAccents @"]|://)"
|
|||
|
|
|||
|
//
|
|||
|
// URL
|
|||
|
//
|
|||
|
|
|||
|
#define TWUValidURLPrecedingChars @"(?:[^a-zA-Z0-9@@$##" TWUInvalidCharacters @"]|^)"
|
|||
|
|
|||
|
#define TWUDomainValidStartEndChars \
|
|||
|
@"[^" \
|
|||
|
TWUPunctuationChars \
|
|||
|
TWUCtrlChars \
|
|||
|
TWUInvalidCharacters \
|
|||
|
TWUUnicodeSpaces \
|
|||
|
@"]"
|
|||
|
|
|||
|
#define TWUSubdomainValidMiddleChars \
|
|||
|
@"[^" \
|
|||
|
TWUPunctuationCharsWithoutHyphenAndUnderscore \
|
|||
|
TWUCtrlChars \
|
|||
|
TWUInvalidCharacters \
|
|||
|
TWUUnicodeSpaces \
|
|||
|
@"]"
|
|||
|
|
|||
|
#define TWUDomainValidMiddleChars \
|
|||
|
@"[^" \
|
|||
|
TWUPunctuationCharsWithoutHyphen \
|
|||
|
TWUCtrlChars \
|
|||
|
TWUInvalidCharacters \
|
|||
|
TWUUnicodeSpaces \
|
|||
|
@"]"
|
|||
|
|
|||
|
#define TWUValidSubdomain \
|
|||
|
@"(?:" \
|
|||
|
@"(?:" TWUDomainValidStartEndChars TWUSubdomainValidMiddleChars @"*)?" TWUDomainValidStartEndChars @"\\." \
|
|||
|
@")"
|
|||
|
|
|||
|
#define TWUValidDomainName \
|
|||
|
@"(?:" \
|
|||
|
@"(?:" TWUDomainValidStartEndChars TWUDomainValidMiddleChars @"*)?" TWUDomainValidStartEndChars @"\\." \
|
|||
|
@")"
|
|||
|
|
|||
|
#define TWUValidGTLD @"(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|xxx)(?=[^0-9a-z]|$))"
|
|||
|
#define TWUValidCCTLD \
|
|||
|
@"(?:" \
|
|||
|
@"(?:" \
|
|||
|
@"ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|" \
|
|||
|
@"bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|" \
|
|||
|
@"cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|" \
|
|||
|
@"fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|" \
|
|||
|
@"ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|" \
|
|||
|
@"ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|" \
|
|||
|
@"mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|" \
|
|||
|
@"pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|" \
|
|||
|
@"si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|" \
|
|||
|
@"tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|" \
|
|||
|
@"zw" \
|
|||
|
@")" \
|
|||
|
@"(?=[^0-9a-z]|$)" \
|
|||
|
@")"
|
|||
|
|
|||
|
#define TWUValidPunycode @"(?:xn--[0-9a-z]+)"
|
|||
|
|
|||
|
#define TWUValidDomain \
|
|||
|
@"(?:" \
|
|||
|
TWUValidSubdomain @"*" TWUValidDomainName \
|
|||
|
@"(?:" TWUValidGTLD @"|" TWUValidCCTLD @"|" TWUValidPunycode @")" \
|
|||
|
@")"
|
|||
|
|
|||
|
#define TWUValidASCIIDomain \
|
|||
|
@"(?:[a-zA-Z0-9\\-_" TWULatinAccents @"]+\\.)+" \
|
|||
|
@"(?:" TWUValidGTLD @"|" TWUValidCCTLD @"|" TWUValidPunycode @")" \
|
|||
|
|
|||
|
#define TWUValidTCOURL @"https?://t\\.co/[a-zA-Z0-9]+"
|
|||
|
#define TWUInvalidShortDomain @"\\A" TWUValidDomainName TWUValidCCTLD @"\\z"
|
|||
|
|
|||
|
#define TWUValidPortNumber @"[0-9]+"
|
|||
|
#define TWUValidGeneralURLPathChars @"[a-zA-Z0-9!\\*';:=+,.$/%#\\[\\]\\-_~&|@" TWULatinAccents @"]"
|
|||
|
|
|||
|
#define TWUValidURLBalancedParens @"\\(" TWUValidGeneralURLPathChars @"+\\)"
|
|||
|
#define TWUValidURLPathEndingChars @"[a-zA-Z0-9=_#/+\\-" TWULatinAccents @"]|(?:" TWUValidURLBalancedParens @")"
|
|||
|
|
|||
|
#define TWUValidURLPath \
|
|||
|
@"(?:" \
|
|||
|
@"(?:" \
|
|||
|
TWUValidGeneralURLPathChars @"*" \
|
|||
|
@"(?:" TWUValidURLBalancedParens TWUValidGeneralURLPathChars @"*)*" TWUValidURLPathEndingChars \
|
|||
|
@")" \
|
|||
|
@"|" \
|
|||
|
@"(?:" TWUValidGeneralURLPathChars @"+/)" \
|
|||
|
@")"
|
|||
|
|
|||
|
#define TWUValidURLQueryChars @"[a-zA-Z0-9!?*'\\(\\);:&=+$/%#\\[\\]\\-_\\.,~|@]"
|
|||
|
#define TWUValidURLQueryEndingChars @"[a-zA-Z0-9_&=#/]"
|
|||
|
|
|||
|
#define TWUValidURL \
|
|||
|
@"(" \
|
|||
|
@"(" TWUValidURLPrecedingChars @")" \
|
|||
|
@"(" \
|
|||
|
@"(https?://)?" \
|
|||
|
@"(" TWUValidDomain @")" \
|
|||
|
@"(?::(" TWUValidPortNumber @"))?" \
|
|||
|
@"(/" TWUValidURLPath @"*)?" \
|
|||
|
@"(\\?" TWUValidURLQueryChars @"*" TWUValidURLQueryEndingChars @")?" \
|
|||
|
@")" \
|
|||
|
@")"
|
|||
|
|
|||
|
static const NSInteger MaxTweetLength = 140;
|
|||
|
static const NSInteger HTTPShortURLLength = 22;
|
|||
|
static const NSInteger HTTPSShortURLLength = 23;
|
|||
|
|
|||
|
@implementation OSKTwitterText
|
|||
|
|
|||
|
+ (NSArray*)entitiesInText:(NSString*)text
|
|||
|
{
|
|||
|
if (!text.length) {
|
|||
|
return [NSArray array];
|
|||
|
}
|
|||
|
|
|||
|
NSMutableArray *results = [NSMutableArray array];
|
|||
|
|
|||
|
NSArray *urls = [self URLsInText:text];
|
|||
|
[results addObjectsFromArray:urls];
|
|||
|
|
|||
|
NSArray *hashtags = [self hashtagsInText:text withURLEntities:urls];
|
|||
|
[results addObjectsFromArray:hashtags];
|
|||
|
|
|||
|
NSArray *cashtags = [self symbolsInText:text withURLEntities:urls];
|
|||
|
[results addObjectsFromArray:cashtags];
|
|||
|
|
|||
|
NSArray *mentionsAndLists = [self mentionsOrListsInText:text];
|
|||
|
NSMutableArray *addingItems = [NSMutableArray array];
|
|||
|
|
|||
|
for (OSKTwitterTextEntity *entity in mentionsAndLists) {
|
|||
|
NSRange entityRange = entity.range;
|
|||
|
BOOL found = NO;
|
|||
|
for (OSKTwitterTextEntity *existingEntity in results) {
|
|||
|
if (NSIntersectionRange(existingEntity.range, entityRange).length > 0) {
|
|||
|
found = YES;
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (!found) {
|
|||
|
[addingItems addObject:entity];
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
[results addObjectsFromArray:addingItems];
|
|||
|
[results sortUsingSelector:@selector(compare:)];
|
|||
|
|
|||
|
return results;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSArray*)URLsInText:(NSString*)text
|
|||
|
{
|
|||
|
if (!text.length) {
|
|||
|
return [NSArray array];
|
|||
|
}
|
|||
|
|
|||
|
NSMutableArray *results = [NSMutableArray array];
|
|||
|
NSInteger len = text.length;
|
|||
|
NSInteger position = 0;
|
|||
|
NSRange allRange = NSMakeRange(0, 0);
|
|||
|
|
|||
|
while (1) {
|
|||
|
position = NSMaxRange(allRange);
|
|||
|
NSTextCheckingResult *urlResult = [[self validURLRegexp] firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
|
|||
|
if (!urlResult || urlResult.numberOfRanges < 9) {
|
|||
|
break;
|
|||
|
}
|
|||
|
|
|||
|
allRange = urlResult.range;
|
|||
|
NSRange precedingRange = [urlResult rangeAtIndex:2];
|
|||
|
NSRange urlRange = [urlResult rangeAtIndex:3];
|
|||
|
NSRange protocolRange = [urlResult rangeAtIndex:4];
|
|||
|
NSRange domainRange = [urlResult rangeAtIndex:5];
|
|||
|
|
|||
|
// If protocol is missing and domain contains non-ASCII characters,
|
|||
|
// extract ASCII-only domains.
|
|||
|
if (protocolRange.location == NSNotFound) {
|
|||
|
if (precedingRange.location != NSNotFound && precedingRange.length > 0) {
|
|||
|
NSString *preceding = [text substringWithRange:precedingRange];
|
|||
|
NSRange suffixRange = [preceding rangeOfCharacterFromSet:[self invalidURLWithoutProtocolPrecedingCharSet] options:NSBackwardsSearch | NSAnchoredSearch];
|
|||
|
if (suffixRange.location != NSNotFound) {
|
|||
|
continue;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
NSInteger domainStart = domainRange.location;
|
|||
|
NSInteger domainEnd = NSMaxRange(domainRange);
|
|||
|
OSKTwitterTextEntity *lastEntity = nil;
|
|||
|
BOOL lastInvalidShortResult = NO;
|
|||
|
|
|||
|
while (domainStart < domainEnd) {
|
|||
|
NSTextCheckingResult *asciiResult = [[self validASCIIDomainRegexp] firstMatchInString:text options:0 range:NSMakeRange(domainStart, domainEnd - domainStart)];
|
|||
|
if (!asciiResult) {
|
|||
|
break;
|
|||
|
}
|
|||
|
|
|||
|
urlRange = asciiResult.range;
|
|||
|
lastEntity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityURL range:urlRange];
|
|||
|
|
|||
|
NSTextCheckingResult *invalidShortResult = [[self invalidShortDomainRegexp] firstMatchInString:text options:0 range:urlRange];
|
|||
|
lastInvalidShortResult = (invalidShortResult != nil);
|
|||
|
if (!lastInvalidShortResult) {
|
|||
|
[results addObject:lastEntity];
|
|||
|
}
|
|||
|
|
|||
|
domainStart = NSMaxRange(urlRange);
|
|||
|
}
|
|||
|
|
|||
|
if (!lastEntity) {
|
|||
|
continue;
|
|||
|
}
|
|||
|
|
|||
|
NSRange pathRange = [urlResult rangeAtIndex:7];
|
|||
|
if (pathRange.location != NSNotFound && NSMaxRange(lastEntity.range) == pathRange.location) {
|
|||
|
if (lastInvalidShortResult) {
|
|||
|
[results addObject:lastEntity];
|
|||
|
}
|
|||
|
NSRange entityRange = lastEntity.range;
|
|||
|
entityRange.length += pathRange.length;
|
|||
|
lastEntity.range = entityRange;
|
|||
|
}
|
|||
|
|
|||
|
} else {
|
|||
|
// In the case of t.co URLs, don't allow additional path characters
|
|||
|
NSRange tcoRange = [[self validTCOURLRegexp] rangeOfFirstMatchInString:text options:0 range:urlRange];
|
|||
|
if (tcoRange.location != NSNotFound) {
|
|||
|
urlRange.length = tcoRange.length;
|
|||
|
}
|
|||
|
|
|||
|
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityURL range:urlRange];
|
|||
|
[results addObject:entity];
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return results;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSArray*)hashtagsInText:(NSString*)text checkingURLOverlap:(BOOL)checkingURLOverlap
|
|||
|
{
|
|||
|
if (!text.length) {
|
|||
|
return [NSArray array];
|
|||
|
}
|
|||
|
|
|||
|
NSArray *urls = nil;
|
|||
|
if (checkingURLOverlap) {
|
|||
|
urls = [self URLsInText:text];
|
|||
|
}
|
|||
|
return [self hashtagsInText:text withURLEntities:urls];
|
|||
|
}
|
|||
|
|
|||
|
+ (NSArray*)hashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities
|
|||
|
{
|
|||
|
if (!text.length) {
|
|||
|
return [NSArray array];
|
|||
|
}
|
|||
|
|
|||
|
NSMutableArray *results = [NSMutableArray array];
|
|||
|
NSInteger len = text.length;
|
|||
|
NSInteger position = 0;
|
|||
|
|
|||
|
while (1) {
|
|||
|
NSTextCheckingResult *matchResult = [[self validHashtagRegexp] firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
|
|||
|
if (!matchResult || matchResult.numberOfRanges < 2) {
|
|||
|
break;
|
|||
|
}
|
|||
|
|
|||
|
NSRange hashtagRange = [matchResult rangeAtIndex:1];
|
|||
|
BOOL matchOk = YES;
|
|||
|
|
|||
|
// Check URL overlap
|
|||
|
for (OSKTwitterTextEntity *urlEntity in urlEntities) {
|
|||
|
if (NSIntersectionRange(urlEntity.range, hashtagRange).length > 0) {
|
|||
|
matchOk = NO;
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (matchOk) {
|
|||
|
NSInteger afterStart = NSMaxRange(hashtagRange);
|
|||
|
if (afterStart < len) {
|
|||
|
NSRange endMatchRange = [[self endHashtagRegexp] rangeOfFirstMatchInString:text options:0 range:NSMakeRange(afterStart, len - afterStart)];
|
|||
|
if (endMatchRange.location != NSNotFound) {
|
|||
|
matchOk = NO;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (matchOk) {
|
|||
|
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityHashtag range:hashtagRange];
|
|||
|
[results addObject:entity];
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
position = NSMaxRange(matchResult.range);
|
|||
|
}
|
|||
|
|
|||
|
return results;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSArray*)symbolsInText:(NSString*)text checkingURLOverlap:(BOOL)checkingURLOverlap
|
|||
|
{
|
|||
|
if (!text.length) {
|
|||
|
return [NSArray array];
|
|||
|
}
|
|||
|
|
|||
|
NSArray *urls = nil;
|
|||
|
if (checkingURLOverlap) {
|
|||
|
urls = [self URLsInText:text];
|
|||
|
}
|
|||
|
return [self symbolsInText:text withURLEntities:urls];
|
|||
|
}
|
|||
|
|
|||
|
+ (NSArray*)symbolsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities
|
|||
|
{
|
|||
|
if (!text.length) {
|
|||
|
return [NSArray array];
|
|||
|
}
|
|||
|
|
|||
|
NSMutableArray *results = [NSMutableArray array];
|
|||
|
NSInteger len = text.length;
|
|||
|
NSInteger position = 0;
|
|||
|
|
|||
|
while (1) {
|
|||
|
NSTextCheckingResult *matchResult = [[self validCashtagRegexp] firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
|
|||
|
if (!matchResult || matchResult.numberOfRanges < 2) {
|
|||
|
break;
|
|||
|
}
|
|||
|
|
|||
|
NSRange symbolRange = [matchResult rangeAtIndex:1];
|
|||
|
BOOL matchOk = YES;
|
|||
|
|
|||
|
// Check URL overlap
|
|||
|
for (OSKTwitterTextEntity *urlEntity in urlEntities) {
|
|||
|
if (NSIntersectionRange(urlEntity.range, symbolRange).length > 0) {
|
|||
|
matchOk = NO;
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (matchOk) {
|
|||
|
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntitySymbol range:symbolRange];
|
|||
|
[results addObject:entity];
|
|||
|
}
|
|||
|
|
|||
|
position = NSMaxRange(matchResult.range);
|
|||
|
}
|
|||
|
|
|||
|
return results;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSArray*)mentionedScreenNamesInText:(NSString*)text
|
|||
|
{
|
|||
|
if (!text.length) {
|
|||
|
return [NSArray array];
|
|||
|
}
|
|||
|
|
|||
|
NSArray *mentionsOrLists = [self mentionsOrListsInText:text];
|
|||
|
NSMutableArray *results = [NSMutableArray array];
|
|||
|
|
|||
|
for (OSKTwitterTextEntity *entity in mentionsOrLists) {
|
|||
|
if (entity.type == OSKTwitterTextEntityScreenName) {
|
|||
|
[results addObject:entity];
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return results;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSArray*)mentionsOrListsInText:(NSString*)text
|
|||
|
{
|
|||
|
if (!text.length) {
|
|||
|
return [NSArray array];
|
|||
|
}
|
|||
|
|
|||
|
NSMutableArray *results = [NSMutableArray array];
|
|||
|
NSInteger len = text.length;
|
|||
|
NSInteger position = 0;
|
|||
|
|
|||
|
while (1) {
|
|||
|
NSTextCheckingResult *matchResult = [[self validMentionOrListRegexp] firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
|
|||
|
if (!matchResult || matchResult.numberOfRanges < 5) {
|
|||
|
break;
|
|||
|
}
|
|||
|
|
|||
|
NSRange allRange = matchResult.range;
|
|||
|
NSInteger end = NSMaxRange(allRange);
|
|||
|
|
|||
|
NSRange endMentionRange = [[self endMentionRegexp] rangeOfFirstMatchInString:text options:0 range:NSMakeRange(end, len - end)];
|
|||
|
if (endMentionRange.location == NSNotFound) {
|
|||
|
NSRange atSignRange = [matchResult rangeAtIndex:2];
|
|||
|
NSRange screenNameRange = [matchResult rangeAtIndex:3];
|
|||
|
NSRange listNameRange = [matchResult rangeAtIndex:4];
|
|||
|
|
|||
|
if (listNameRange.location == NSNotFound) {
|
|||
|
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityScreenName range:NSMakeRange(atSignRange.location, NSMaxRange(screenNameRange) - atSignRange.location)];
|
|||
|
[results addObject:entity];
|
|||
|
} else {
|
|||
|
OSKTwitterTextEntity *entity = [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityListName range:NSMakeRange(atSignRange.location, NSMaxRange(listNameRange) - atSignRange.location)];
|
|||
|
[results addObject:entity];
|
|||
|
}
|
|||
|
} else {
|
|||
|
// Avoid matching the second username in @username@username
|
|||
|
end++;
|
|||
|
}
|
|||
|
|
|||
|
position = end;
|
|||
|
}
|
|||
|
|
|||
|
return results;
|
|||
|
}
|
|||
|
|
|||
|
+ (OSKTwitterTextEntity*)repliedScreenNameInText:(NSString*)text
|
|||
|
{
|
|||
|
if (!text.length) {
|
|||
|
return nil;
|
|||
|
}
|
|||
|
|
|||
|
NSInteger len = text.length;
|
|||
|
|
|||
|
NSTextCheckingResult *matchResult = [[self validReplyRegexp] firstMatchInString:text options:(NSMatchingWithoutAnchoringBounds | NSMatchingAnchored) range:NSMakeRange(0, len)];
|
|||
|
if (!matchResult || matchResult.numberOfRanges < 2) {
|
|||
|
return nil;
|
|||
|
}
|
|||
|
|
|||
|
NSRange replyRange = [matchResult rangeAtIndex:1];
|
|||
|
NSInteger replyEnd = NSMaxRange(replyRange);
|
|||
|
|
|||
|
NSRange endMentionRange = [[self endMentionRegexp] rangeOfFirstMatchInString:text options:0 range:NSMakeRange(replyEnd, len - replyEnd)];
|
|||
|
if (endMentionRange.location != NSNotFound) {
|
|||
|
return nil;
|
|||
|
}
|
|||
|
|
|||
|
return [OSKTwitterTextEntity entityWithType:OSKTwitterTextEntityScreenName range:replyRange];
|
|||
|
}
|
|||
|
|
|||
|
+ (NSInteger)tweetLength:(NSString*)text
|
|||
|
{
|
|||
|
return [self tweetLength:text httpURLLength:HTTPShortURLLength httpsURLLength:HTTPSShortURLLength];
|
|||
|
}
|
|||
|
|
|||
|
+ (NSInteger)tweetLength:(NSString*)text httpURLLength:(NSInteger)httpURLLength httpsURLLength:(NSInteger)httpsURLLength
|
|||
|
{
|
|||
|
text = [text precomposedStringWithCanonicalMapping];
|
|||
|
|
|||
|
if (!text.length) {
|
|||
|
return 0;
|
|||
|
}
|
|||
|
|
|||
|
// Remove URLs from text and add t.co length
|
|||
|
NSMutableString *string = [text mutableCopy];
|
|||
|
#if !__has_feature(objc_arc)
|
|||
|
[string autorelease];
|
|||
|
#endif
|
|||
|
|
|||
|
int urlLengthOffset = 0;
|
|||
|
NSArray *urlEntities = [self URLsInText:text];
|
|||
|
for (NSInteger i=urlEntities.count-1; i>=0; i--) {
|
|||
|
OSKTwitterTextEntity *entity = [urlEntities objectAtIndex:i];
|
|||
|
NSRange urlRange = entity.range;
|
|||
|
NSString *url = [string substringWithRange:urlRange];
|
|||
|
if ([url rangeOfString:@"https" options:(NSCaseInsensitiveSearch | NSAnchoredSearch)].location == 0) {
|
|||
|
urlLengthOffset += httpsURLLength;
|
|||
|
} else {
|
|||
|
urlLengthOffset += httpURLLength;
|
|||
|
}
|
|||
|
[string deleteCharactersInRange:urlRange];
|
|||
|
}
|
|||
|
|
|||
|
NSInteger len = string.length;
|
|||
|
NSInteger charCount = len + urlLengthOffset;
|
|||
|
|
|||
|
if (len > 0) {
|
|||
|
// Adjust count for non-BMP characters
|
|||
|
UniChar buffer[len];
|
|||
|
[string getCharacters:buffer range:NSMakeRange(0, len)];
|
|||
|
|
|||
|
for (int i=0; i<len; i++) {
|
|||
|
UniChar c = buffer[i];
|
|||
|
if (CFStringIsSurrogateHighCharacter(c)) {
|
|||
|
if (i+1 < len) {
|
|||
|
UniChar d = buffer[i+1];
|
|||
|
if (CFStringIsSurrogateLowCharacter(d)) {
|
|||
|
charCount--;
|
|||
|
i++;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return charCount;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSInteger)remainingCharacterCount:(NSString*)text
|
|||
|
{
|
|||
|
return [self remainingCharacterCount:text httpURLLength:HTTPShortURLLength httpsURLLength:HTTPSShortURLLength];
|
|||
|
}
|
|||
|
|
|||
|
+ (NSInteger)remainingCharacterCount:(NSString*)text httpURLLength:(NSInteger)httpURLLength httpsURLLength:(NSInteger)httpsURLLength
|
|||
|
{
|
|||
|
return MaxTweetLength - [self tweetLength:text httpURLLength:httpURLLength httpsURLLength:httpsURLLength];
|
|||
|
}
|
|||
|
|
|||
|
#pragma mark - Regular Expressions and CharacterSet
|
|||
|
|
|||
|
+ (NSRegularExpression*)validURLRegexp
|
|||
|
{
|
|||
|
static NSRegularExpression *validURLRegexp;
|
|||
|
static dispatch_once_t onceToken;
|
|||
|
dispatch_once(&onceToken, ^{
|
|||
|
validURLRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidURL options:NSRegularExpressionCaseInsensitive error:NULL];
|
|||
|
});
|
|||
|
return validURLRegexp;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSRegularExpression*)validASCIIDomainRegexp
|
|||
|
{
|
|||
|
static NSRegularExpression *validASCIIDomainRegexp;
|
|||
|
static dispatch_once_t onceToken;
|
|||
|
dispatch_once(&onceToken, ^{
|
|||
|
validASCIIDomainRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidASCIIDomain options:NSRegularExpressionCaseInsensitive error:NULL];
|
|||
|
});
|
|||
|
return validASCIIDomainRegexp;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSRegularExpression*)invalidShortDomainRegexp
|
|||
|
{
|
|||
|
static NSRegularExpression *invalidShortDomainRegexp;
|
|||
|
static dispatch_once_t onceToken;
|
|||
|
dispatch_once(&onceToken, ^{
|
|||
|
invalidShortDomainRegexp = [[NSRegularExpression alloc] initWithPattern:TWUInvalidShortDomain options:NSRegularExpressionCaseInsensitive error:NULL];
|
|||
|
});
|
|||
|
return invalidShortDomainRegexp;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSRegularExpression*)validTCOURLRegexp
|
|||
|
{
|
|||
|
static NSRegularExpression *validTCOURLRegexp;
|
|||
|
static dispatch_once_t onceToken;
|
|||
|
dispatch_once(&onceToken, ^{
|
|||
|
validTCOURLRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidTCOURL options:NSRegularExpressionCaseInsensitive error:NULL];
|
|||
|
});
|
|||
|
return validTCOURLRegexp;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSRegularExpression*)validHashtagRegexp
|
|||
|
{
|
|||
|
static NSRegularExpression *validHashtagRegexp;
|
|||
|
static dispatch_once_t onceToken;
|
|||
|
dispatch_once(&onceToken, ^{
|
|||
|
validHashtagRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidHashtag options:NSRegularExpressionCaseInsensitive error:NULL];
|
|||
|
});
|
|||
|
return validHashtagRegexp;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSRegularExpression*)endHashtagRegexp
|
|||
|
{
|
|||
|
static NSRegularExpression *endHashtagRegexp;
|
|||
|
static dispatch_once_t onceToken;
|
|||
|
dispatch_once(&onceToken, ^{
|
|||
|
endHashtagRegexp = [[NSRegularExpression alloc] initWithPattern:TWUEndHashTagMatch options:NSRegularExpressionCaseInsensitive error:NULL];
|
|||
|
});
|
|||
|
return endHashtagRegexp;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSRegularExpression*)validCashtagRegexp
|
|||
|
{
|
|||
|
static NSRegularExpression *validCashtagRegexp;
|
|||
|
static dispatch_once_t onceToken;
|
|||
|
dispatch_once(&onceToken, ^{
|
|||
|
validCashtagRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidCashtag options:NSRegularExpressionCaseInsensitive error:NULL];
|
|||
|
});
|
|||
|
return validCashtagRegexp;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSRegularExpression*)validMentionOrListRegexp
|
|||
|
{
|
|||
|
static NSRegularExpression *validMentionOrListRegexp;
|
|||
|
static dispatch_once_t onceToken;
|
|||
|
dispatch_once(&onceToken, ^{
|
|||
|
validMentionOrListRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidMentionOrList options:NSRegularExpressionCaseInsensitive error:NULL];
|
|||
|
});
|
|||
|
return validMentionOrListRegexp;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSRegularExpression*)validReplyRegexp
|
|||
|
{
|
|||
|
static NSRegularExpression *validReplyRegexp;
|
|||
|
static dispatch_once_t onceToken;
|
|||
|
dispatch_once(&onceToken, ^{
|
|||
|
validReplyRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidReply options:NSRegularExpressionCaseInsensitive error:NULL];
|
|||
|
});
|
|||
|
return validReplyRegexp;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSRegularExpression*)endMentionRegexp
|
|||
|
{
|
|||
|
static NSRegularExpression *endMentionRegexp;
|
|||
|
static dispatch_once_t onceToken;
|
|||
|
dispatch_once(&onceToken, ^{
|
|||
|
endMentionRegexp = [[NSRegularExpression alloc] initWithPattern:TWUEndMentionMatch options:NSRegularExpressionCaseInsensitive error:NULL];
|
|||
|
});
|
|||
|
return endMentionRegexp;
|
|||
|
}
|
|||
|
|
|||
|
+ (NSCharacterSet*)invalidURLWithoutProtocolPrecedingCharSet
|
|||
|
{
|
|||
|
static NSCharacterSet *invalidURLWithoutProtocolPrecedingCharSet;
|
|||
|
static dispatch_once_t onceToken;
|
|||
|
dispatch_once(&onceToken, ^{
|
|||
|
invalidURLWithoutProtocolPrecedingCharSet = [NSCharacterSet characterSetWithCharactersInString:@"-_./"];
|
|||
|
#if !__has_feature(objc_arc)
|
|||
|
[invalidURLWithoutProtocolPrecedingCharSet retain];
|
|||
|
#endif
|
|||
|
});
|
|||
|
return invalidURLWithoutProtocolPrecedingCharSet;
|
|||
|
}
|
|||
|
|
|||
|
@end
|