diff options
| author | Ronny Fenrich <Fenrich@Gmail.com> | 2013-06-13 13:40:44 -0600 |
|---|---|---|
| committer | Ronny Fenrich <Fenrich@Gmail.com> | 2013-06-13 13:40:44 -0600 |
| commit | bf6c97cdee2264656211126ee01066c3c5d4bd8d (patch) | |
| tree | df4a6a9ed1b76109a57d1bf71c85b5632a6e3a0b /Volta/Additions/NSString+HTML.m | |
| parent | d6d01c9dd86561ad2121f0f85f0a4529142d5093 (diff) | |
added Xcode project and converted to CocoaPods (added a bunch of libraries)
Diffstat (limited to 'Volta/Additions/NSString+HTML.m')
| -rwxr-xr-x | Volta/Additions/NSString+HTML.m | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/Volta/Additions/NSString+HTML.m b/Volta/Additions/NSString+HTML.m new file mode 100755 index 0000000..7e59221 --- /dev/null +++ b/Volta/Additions/NSString+HTML.m @@ -0,0 +1,344 @@ +// +// NSString+HTML.m +// MWFeedParser +// +// Copyright (c) 2010 Michael Waterfall +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// 1. The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// 2. This Software cannot be used to archive or collect data such as (but not +// limited to) that of events, news, experiences and activities, for the +// purpose of any concept relating to diary/journal keeping. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#import "NSString+HTML.h" +#import "GTMNSString+HTML.h" + +@implementation NSString (HTML) + +#pragma mark - Instance Methods + +- (NSString *)stringByConvertingHTMLToPlainText { + + // Pool + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + + // Character sets + NSCharacterSet *stopCharacters = [NSCharacterSet characterSetWithCharactersInString:[NSString stringWithFormat:@"< \t\n\r%C%C%C%C", (unichar)0x0085, (unichar)0x000C, (unichar)0x2028, (unichar)0x2029]]; + NSCharacterSet *newLineAndWhitespaceCharacters = [NSCharacterSet characterSetWithCharactersInString:[NSString stringWithFormat:@" \t\n\r%C%C%C%C", (unichar)0x0085, (unichar)0x000C, (unichar)0x2028, (unichar)0x2029]]; + NSCharacterSet *tagNameCharacters = [NSCharacterSet characterSetWithCharactersInString:@"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"]; + + // Scan and find all tags + NSMutableString *result = [[NSMutableString alloc] initWithCapacity:self.length]; + NSScanner *scanner = [[NSScanner alloc] initWithString:self]; + [scanner setCharactersToBeSkipped:nil]; + [scanner setCaseSensitive:YES]; + NSString *str = nil, *tagName = nil; + BOOL dontReplaceTagWithSpace = NO; + do { + + // Scan up to the start of a tag or whitespace + if ([scanner scanUpToCharactersFromSet:stopCharacters intoString:&str]) { + [result appendString:str]; + str = nil; // reset + } + + // Check if we've stopped at a tag/comment or whitespace + if ([scanner scanString:@"<" intoString:NULL]) { + + // Stopped at a comment or tag + if ([scanner scanString:@"!--" intoString:NULL]) { + + // Comment + [scanner scanUpToString:@"-->" intoString:NULL]; + [scanner scanString:@"-->" intoString:NULL]; + + } else { + + // Tag - remove and replace with space unless it's + // a closing inline tag then dont replace with a space + if ([scanner scanString:@"/" intoString:NULL]) { + + // Closing tag - replace with space unless it's inline + tagName = nil; dontReplaceTagWithSpace = NO; + if ([scanner scanCharactersFromSet:tagNameCharacters intoString:&tagName]) { + tagName = [tagName lowercaseString]; + dontReplaceTagWithSpace = ([tagName isEqualToString:@"a"] || + [tagName isEqualToString:@"b"] || + [tagName isEqualToString:@"i"] || + [tagName isEqualToString:@"q"] || + [tagName isEqualToString:@"span"] || + [tagName isEqualToString:@"em"] || + [tagName isEqualToString:@"strong"] || + [tagName isEqualToString:@"cite"] || + [tagName isEqualToString:@"abbr"] || + [tagName isEqualToString:@"acronym"] || + [tagName isEqualToString:@"label"]); + } + + // Replace tag with string unless it was an inline + if (!dontReplaceTagWithSpace && result.length > 0 && ![scanner isAtEnd]) [result appendString:@" "]; + + } + + // Scan past tag + [scanner scanUpToString:@">" intoString:NULL]; + [scanner scanString:@">" intoString:NULL]; + + } + + } else { + + // Stopped at whitespace - replace all whitespace and newlines with a space + if ([scanner scanCharactersFromSet:newLineAndWhitespaceCharacters intoString:NULL]) { + if (result.length > 0 && ![scanner isAtEnd]) [result appendString:@" "]; // Dont append space to beginning or end of result + } + + } + + } while (![scanner isAtEnd]); + + // Cleanup + [scanner release]; + + // Decode HTML entities and return + NSString *retString = [[result stringByDecodingHTMLEntities] retain]; + [result release]; + + // Drain + [pool drain]; + + // Return + return [retString autorelease]; + +} + +- (NSString *)stringByDecodingHTMLEntities { + // Can return self so create new string if we're a mutable string + return [NSString stringWithString:[self gtm_stringByUnescapingFromHTML]]; +} + + +- (NSString *)stringByEncodingHTMLEntities { + // Can return self so create new string if we're a mutable string + return [NSString stringWithString:[self gtm_stringByEscapingForAsciiHTML]]; +} + +- (NSString *)stringByEncodingHTMLEntities:(BOOL)isUnicode { + // Can return self so create new string if we're a mutable string + return [NSString stringWithString:(isUnicode ? [self gtm_stringByEscapingForHTML] : [self gtm_stringByEscapingForAsciiHTML])]; +} + +- (NSString *)stringWithNewLinesAsBRs { + + // Pool + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + + // Strange New lines: + // Next Line, U+0085 + // Form Feed, U+000C + // Line Separator, U+2028 + // Paragraph Separator, U+2029 + + // Scanner + NSScanner *scanner = [[NSScanner alloc] initWithString:self]; + [scanner setCharactersToBeSkipped:nil]; + NSMutableString *result = [[NSMutableString alloc] init]; + NSString *temp; + + NSCharacterSet *newLineCharacters = [NSCharacterSet characterSetWithCharactersInString: + [NSString stringWithFormat:@"\n\r%C%C%C%C", (unichar)0x0085, (unichar)0x000C, (unichar)0x2028, (unichar)0x2029]]; + // Scan + do { + + // Get non new line characters + temp = nil; + [scanner scanUpToCharactersFromSet:newLineCharacters intoString:&temp]; + if (temp) [result appendString:temp]; + temp = nil; + + // Add <br /> s + if ([scanner scanString:@"\r\n" intoString:nil]) { + + // Combine \r\n into just 1 <br /> + [result appendString:@"<br />"]; + + } else if ([scanner scanCharactersFromSet:newLineCharacters intoString:&temp]) { + + // Scan other new line characters and add <br /> s + if (temp) { + for (NSUInteger i = 0; i < temp.length; i++) { + [result appendString:@"<br />"]; + } + } + + } + + } while (![scanner isAtEnd]); + + // Cleanup & return + [scanner release]; + NSString *retString = [[NSString stringWithString:result] retain]; + [result release]; + + // Drain + [pool drain]; + + // Return + return [retString autorelease]; + +} + +- (NSString *)stringByRemovingNewLinesAndWhitespace { + + // Pool + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + + // Strange New lines: + // Next Line, U+0085 + // Form Feed, U+000C + // Line Separator, U+2028 + // Paragraph Separator, U+2029 + + // Scanner + NSScanner *scanner = [[NSScanner alloc] initWithString:self]; + [scanner setCharactersToBeSkipped:nil]; + NSMutableString *result = [[NSMutableString alloc] init]; + NSString *temp; + NSCharacterSet *newLineAndWhitespaceCharacters = [NSCharacterSet characterSetWithCharactersInString: + [NSString stringWithFormat:@" \t\n\r%C%C%C%C", (unichar)0x0085, (unichar)0x000C, (unichar)0x2028, (unichar)0x2029]]; + + // Scan + while (![scanner isAtEnd]) { + + // Get non new line or whitespace characters + temp = nil; + [scanner scanUpToCharactersFromSet:newLineAndWhitespaceCharacters intoString:&temp]; + if (temp) [result appendString:temp]; + + // Replace with a space + if ([scanner scanCharactersFromSet:newLineAndWhitespaceCharacters intoString:NULL]) { + if (result.length > 0 && ![scanner isAtEnd]) // Dont append space to beginning or end of result + [result appendString:@" "]; + } + + } + + // Cleanup + [scanner release]; + + // Return + NSString *retString = [[NSString stringWithString:result] retain]; + [result release]; + + // Drain + [pool drain]; + + // Return + return [retString autorelease]; + +} + +- (NSString *)stringByLinkifyingURLs { + if (!NSClassFromString(@"NSRegularExpression")) return self; + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + NSString *pattern = @"(?<!=\")\\b((http|https):\\/\\/[\\w\\-_]+(\\.[\\w\\-_]+)+([\\w\\-\\.,@?^=%%&:/~\\+#]*[\\w\\-\\@?^=%%&/~\\+#])?)"; + NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:pattern options:0 error:nil]; + NSString *modifiedString = [[regex stringByReplacingMatchesInString:self options:0 range:NSMakeRange(0, [self length]) + withTemplate:@"<a href=\"$1\" class=\"linkified\">$1</a>"] retain]; + [pool drain]; + return [modifiedString autorelease]; +} + +- (NSString *)stringByStrippingTags { + + // Pool + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + + // Find first & and short-cut if we can + NSUInteger ampIndex = [self rangeOfString:@"<" options:NSLiteralSearch].location; + if (ampIndex == NSNotFound) { + return [NSString stringWithString:self]; // return copy of string as no tags found + } + + // Scan and find all tags + NSScanner *scanner = [NSScanner scannerWithString:self]; + [scanner setCharactersToBeSkipped:nil]; + NSMutableSet *tags = [[NSMutableSet alloc] init]; + NSString *tag; + do { + + // Scan up to < + tag = nil; + [scanner scanUpToString:@"<" intoString:NULL]; + [scanner scanUpToString:@">" intoString:&tag]; + + // Add to set + if (tag) { + NSString *t = [[NSString alloc] initWithFormat:@"%@>", tag]; + [tags addObject:t]; + [t release]; + } + + } while (![scanner isAtEnd]); + + // Strings + NSMutableString *result = [[NSMutableString alloc] initWithString:self]; + NSString *finalString; + + // Replace tags + NSString *replacement; + for (NSString *t in tags) { + + // Replace tag with space unless it's an inline element + replacement = @" "; + if ([t isEqualToString:@"<a>"] || + [t isEqualToString:@"</a>"] || + [t isEqualToString:@"<span>"] || + [t isEqualToString:@"</span>"] || + [t isEqualToString:@"<strong>"] || + [t isEqualToString:@"</strong>"] || + [t isEqualToString:@"<em>"] || + [t isEqualToString:@"</em>"]) { + replacement = @""; + } + + // Replace + [result replaceOccurrencesOfString:t + withString:replacement + options:NSLiteralSearch + range:NSMakeRange(0, result.length)]; + } + + // Remove multi-spaces and line breaks + finalString = [[result stringByRemovingNewLinesAndWhitespace] retain]; + + // Cleanup + [result release]; + [tags release]; + + // Drain + [pool drain]; + + // Return + return [finalString autorelease]; + +} + +@end |
