summaryrefslogtreecommitdiff
path: root/Volta/Additions/GTMNSString+HTML.m
diff options
context:
space:
mode:
Diffstat (limited to 'Volta/Additions/GTMNSString+HTML.m')
-rwxr-xr-xVolta/Additions/GTMNSString+HTML.m522
1 files changed, 522 insertions, 0 deletions
diff --git a/Volta/Additions/GTMNSString+HTML.m b/Volta/Additions/GTMNSString+HTML.m
new file mode 100755
index 0000000..81a7144
--- /dev/null
+++ b/Volta/Additions/GTMNSString+HTML.m
@@ -0,0 +1,522 @@
+//
+// GTMNSString+HTML.m
+// Dealing with NSStrings that contain HTML
+//
+// Copyright 2006-2008 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+//
+
+//#import "GTMDefines.h"
+#import "GTMNSString+HTML.h"
+
+typedef struct {
+ NSString *escapeSequence;
+ unichar uchar;
+} HTMLEscapeMap;
+
+// Taken from http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters
+// Ordered by uchar lowest to highest for bsearching
+static HTMLEscapeMap gAsciiHTMLEscapeMap[] = {
+ // A.2.2. Special characters
+ { @""", 34 },
+ { @"&", 38 },
+ { @"'", 39 },
+ { @"<", 60 },
+ { @">", 62 },
+
+ // A.2.1. Latin-1 characters
+ { @" ", 160 },
+ { @"¡", 161 },
+ { @"¢", 162 },
+ { @"£", 163 },
+ { @"¤", 164 },
+ { @"¥", 165 },
+ { @"¦", 166 },
+ { @"§", 167 },
+ { @"¨", 168 },
+ { @"©", 169 },
+ { @"ª", 170 },
+ { @"«", 171 },
+ { @"¬", 172 },
+ { @"­", 173 },
+ { @"®", 174 },
+ { @"¯", 175 },
+ { @"°", 176 },
+ { @"±", 177 },
+ { @"²", 178 },
+ { @"³", 179 },
+ { @"´", 180 },
+ { @"µ", 181 },
+ { @"¶", 182 },
+ { @"·", 183 },
+ { @"¸", 184 },
+ { @"¹", 185 },
+ { @"º", 186 },
+ { @"»", 187 },
+ { @"¼", 188 },
+ { @"½", 189 },
+ { @"¾", 190 },
+ { @"¿", 191 },
+ { @"À", 192 },
+ { @"Á", 193 },
+ { @"Â", 194 },
+ { @"Ã", 195 },
+ { @"Ä", 196 },
+ { @"Å", 197 },
+ { @"Æ", 198 },
+ { @"Ç", 199 },
+ { @"È", 200 },
+ { @"É", 201 },
+ { @"Ê", 202 },
+ { @"Ë", 203 },
+ { @"Ì", 204 },
+ { @"Í", 205 },
+ { @"Î", 206 },
+ { @"Ï", 207 },
+ { @"Ð", 208 },
+ { @"Ñ", 209 },
+ { @"Ò", 210 },
+ { @"Ó", 211 },
+ { @"Ô", 212 },
+ { @"Õ", 213 },
+ { @"Ö", 214 },
+ { @"×", 215 },
+ { @"Ø", 216 },
+ { @"Ù", 217 },
+ { @"Ú", 218 },
+ { @"Û", 219 },
+ { @"Ü", 220 },
+ { @"Ý", 221 },
+ { @"Þ", 222 },
+ { @"ß", 223 },
+ { @"à", 224 },
+ { @"á", 225 },
+ { @"â", 226 },
+ { @"ã", 227 },
+ { @"ä", 228 },
+ { @"å", 229 },
+ { @"æ", 230 },
+ { @"ç", 231 },
+ { @"è", 232 },
+ { @"é", 233 },
+ { @"ê", 234 },
+ { @"ë", 235 },
+ { @"ì", 236 },
+ { @"í", 237 },
+ { @"î", 238 },
+ { @"ï", 239 },
+ { @"ð", 240 },
+ { @"ñ", 241 },
+ { @"ò", 242 },
+ { @"ó", 243 },
+ { @"ô", 244 },
+ { @"õ", 245 },
+ { @"ö", 246 },
+ { @"÷", 247 },
+ { @"ø", 248 },
+ { @"ù", 249 },
+ { @"ú", 250 },
+ { @"û", 251 },
+ { @"ü", 252 },
+ { @"ý", 253 },
+ { @"þ", 254 },
+ { @"ÿ", 255 },
+
+ // A.2.2. Special characters cont'd
+ { @"Œ", 338 },
+ { @"œ", 339 },
+ { @"Š", 352 },
+ { @"š", 353 },
+ { @"Ÿ", 376 },
+
+ // A.2.3. Symbols
+ { @"ƒ", 402 },
+
+ // A.2.2. Special characters cont'd
+ { @"ˆ", 710 },
+ { @"˜", 732 },
+
+ // A.2.3. Symbols cont'd
+ { @"Α", 913 },
+ { @"Β", 914 },
+ { @"Γ", 915 },
+ { @"Δ", 916 },
+ { @"Ε", 917 },
+ { @"Ζ", 918 },
+ { @"Η", 919 },
+ { @"Θ", 920 },
+ { @"Ι", 921 },
+ { @"Κ", 922 },
+ { @"Λ", 923 },
+ { @"Μ", 924 },
+ { @"Ν", 925 },
+ { @"Ξ", 926 },
+ { @"Ο", 927 },
+ { @"Π", 928 },
+ { @"Ρ", 929 },
+ { @"Σ", 931 },
+ { @"Τ", 932 },
+ { @"Υ", 933 },
+ { @"Φ", 934 },
+ { @"Χ", 935 },
+ { @"Ψ", 936 },
+ { @"Ω", 937 },
+ { @"α", 945 },
+ { @"β", 946 },
+ { @"γ", 947 },
+ { @"δ", 948 },
+ { @"ε", 949 },
+ { @"ζ", 950 },
+ { @"η", 951 },
+ { @"θ", 952 },
+ { @"ι", 953 },
+ { @"κ", 954 },
+ { @"λ", 955 },
+ { @"μ", 956 },
+ { @"ν", 957 },
+ { @"ξ", 958 },
+ { @"ο", 959 },
+ { @"π", 960 },
+ { @"ρ", 961 },
+ { @"ς", 962 },
+ { @"σ", 963 },
+ { @"τ", 964 },
+ { @"υ", 965 },
+ { @"φ", 966 },
+ { @"χ", 967 },
+ { @"ψ", 968 },
+ { @"ω", 969 },
+ { @"ϑ", 977 },
+ { @"ϒ", 978 },
+ { @"ϖ", 982 },
+
+ // A.2.2. Special characters cont'd
+ { @" ", 8194 },
+ { @" ", 8195 },
+ { @" ", 8201 },
+ { @"‌", 8204 },
+ { @"‍", 8205 },
+ { @"‎", 8206 },
+ { @"‏", 8207 },
+ { @"–", 8211 },
+ { @"—", 8212 },
+ { @"‘", 8216 },
+ { @"’", 8217 },
+ { @"‚", 8218 },
+ { @"“", 8220 },
+ { @"”", 8221 },
+ { @"„", 8222 },
+ { @"†", 8224 },
+ { @"‡", 8225 },
+ // A.2.3. Symbols cont'd
+ { @"•", 8226 },
+ { @"…", 8230 },
+
+ // A.2.2. Special characters cont'd
+ { @"‰", 8240 },
+
+ // A.2.3. Symbols cont'd
+ { @"′", 8242 },
+ { @"″", 8243 },
+
+ // A.2.2. Special characters cont'd
+ { @"‹", 8249 },
+ { @"›", 8250 },
+
+ // A.2.3. Symbols cont'd
+ { @"‾", 8254 },
+ { @"⁄", 8260 },
+
+ // A.2.2. Special characters cont'd
+ { @"€", 8364 },
+
+ // A.2.3. Symbols cont'd
+ { @"ℑ", 8465 },
+ { @"℘", 8472 },
+ { @"ℜ", 8476 },
+ { @"™", 8482 },
+ { @"ℵ", 8501 },
+ { @"←", 8592 },
+ { @"↑", 8593 },
+ { @"→", 8594 },
+ { @"↓", 8595 },
+ { @"↔", 8596 },
+ { @"↵", 8629 },
+ { @"⇐", 8656 },
+ { @"⇑", 8657 },
+ { @"⇒", 8658 },
+ { @"⇓", 8659 },
+ { @"⇔", 8660 },
+ { @"∀", 8704 },
+ { @"∂", 8706 },
+ { @"∃", 8707 },
+ { @"∅", 8709 },
+ { @"∇", 8711 },
+ { @"∈", 8712 },
+ { @"∉", 8713 },
+ { @"∋", 8715 },
+ { @"∏", 8719 },
+ { @"∑", 8721 },
+ { @"−", 8722 },
+ { @"∗", 8727 },
+ { @"√", 8730 },
+ { @"∝", 8733 },
+ { @"∞", 8734 },
+ { @"∠", 8736 },
+ { @"∧", 8743 },
+ { @"∨", 8744 },
+ { @"∩", 8745 },
+ { @"∪", 8746 },
+ { @"∫", 8747 },
+ { @"∴", 8756 },
+ { @"∼", 8764 },
+ { @"≅", 8773 },
+ { @"≈", 8776 },
+ { @"≠", 8800 },
+ { @"≡", 8801 },
+ { @"≤", 8804 },
+ { @"≥", 8805 },
+ { @"⊂", 8834 },
+ { @"⊃", 8835 },
+ { @"⊄", 8836 },
+ { @"⊆", 8838 },
+ { @"⊇", 8839 },
+ { @"⊕", 8853 },
+ { @"⊗", 8855 },
+ { @"⊥", 8869 },
+ { @"⋅", 8901 },
+ { @"⌈", 8968 },
+ { @"⌉", 8969 },
+ { @"⌊", 8970 },
+ { @"⌋", 8971 },
+ { @"⟨", 9001 },
+ { @"⟩", 9002 },
+ { @"◊", 9674 },
+ { @"♠", 9824 },
+ { @"♣", 9827 },
+ { @"♥", 9829 },
+ { @"♦", 9830 }
+};
+
+// Taken from http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters
+// This is table A.2.2 Special Characters
+static HTMLEscapeMap gUnicodeHTMLEscapeMap[] = {
+ // C0 Controls and Basic Latin
+ { @""", 34 },
+ { @"&", 38 },
+ { @"'", 39 },
+ { @"<", 60 },
+ { @">", 62 },
+
+ // Latin Extended-A
+ { @"Œ", 338 },
+ { @"œ", 339 },
+ { @"Š", 352 },
+ { @"š", 353 },
+ { @"Ÿ", 376 },
+
+ // Spacing Modifier Letters
+ { @"ˆ", 710 },
+ { @"˜", 732 },
+
+ // General Punctuation
+ { @" ", 8194 },
+ { @" ", 8195 },
+ { @" ", 8201 },
+ { @"‌", 8204 },
+ { @"‍", 8205 },
+ { @"‎", 8206 },
+ { @"‏", 8207 },
+ { @"–", 8211 },
+ { @"—", 8212 },
+ { @"‘", 8216 },
+ { @"’", 8217 },
+ { @"‚", 8218 },
+ { @"“", 8220 },
+ { @"”", 8221 },
+ { @"„", 8222 },
+ { @"†", 8224 },
+ { @"‡", 8225 },
+ { @"‰", 8240 },
+ { @"‹", 8249 },
+ { @"›", 8250 },
+ { @"€", 8364 },
+};
+
+
+// Utility function for Bsearching table above
+static int EscapeMapCompare(const void *ucharVoid, const void *mapVoid) {
+ const unichar *uchar = (const unichar*)ucharVoid;
+ const HTMLEscapeMap *map = (const HTMLEscapeMap*)mapVoid;
+ int val;
+ if (*uchar > map->uchar) {
+ val = 1;
+ } else if (*uchar < map->uchar) {
+ val = -1;
+ } else {
+ val = 0;
+ }
+ return val;
+}
+
+@implementation NSString (GTMNSStringHTMLAdditions)
+
+- (NSString *)gtm_stringByEscapingHTMLUsingTable:(HTMLEscapeMap*)table
+ ofSize:(NSUInteger)size
+ escapingUnicode:(BOOL)escapeUnicode {
+ NSUInteger length = [self length];
+ if (!length) {
+ return self;
+ }
+
+ NSMutableString *finalString = [NSMutableString string];
+ NSMutableData *data2 = [NSMutableData dataWithCapacity:sizeof(unichar) * length];
+
+ // this block is common between GTMNSString+HTML and GTMNSString+XML but
+ // it's so short that it isn't really worth trying to share.
+ const unichar *buffer = CFStringGetCharactersPtr((CFStringRef)self);
+ if (!buffer) {
+ // We want this buffer to be autoreleased.
+ NSMutableData *data = [NSMutableData dataWithLength:length * sizeof(UniChar)];
+ if (!data) {
+ // COV_NF_START - Memory fail case
+// _GTMDevLog(@"couldn't alloc buffer");
+ return nil;
+ // COV_NF_END
+ }
+ [self getCharacters:[data mutableBytes]];
+ buffer = [data bytes];
+ }
+
+ if (!buffer || !data2) {
+ // COV_NF_START
+// _GTMDevLog(@"Unable to allocate buffer or data2");
+ return nil;
+ // COV_NF_END
+ }
+
+ unichar *buffer2 = (unichar *)[data2 mutableBytes];
+
+ NSUInteger buffer2Length = 0;
+
+ for (NSUInteger i = 0; i < length; ++i) {
+ HTMLEscapeMap *val = bsearch(&buffer[i], table,
+ size / sizeof(HTMLEscapeMap),
+ sizeof(HTMLEscapeMap), EscapeMapCompare);
+ if (val || (escapeUnicode && buffer[i] > 127)) {
+ if (buffer2Length) {
+ CFStringAppendCharacters((CFMutableStringRef)finalString,
+ buffer2,
+ buffer2Length);
+ buffer2Length = 0;
+ }
+ if (val) {
+ [finalString appendString:val->escapeSequence];
+ }
+ else {
+// _GTMDevAssert(escapeUnicode && buffer[i] > 127, @"Illegal Character");
+ [finalString appendFormat:@"&#%d;", buffer[i]];
+ }
+ } else {
+ buffer2[buffer2Length] = buffer[i];
+ buffer2Length += 1;
+ }
+ }
+ if (buffer2Length) {
+ CFStringAppendCharacters((CFMutableStringRef)finalString,
+ buffer2,
+ buffer2Length);
+ }
+ return finalString;
+}
+
+- (NSString *)gtm_stringByEscapingForHTML {
+ return [self gtm_stringByEscapingHTMLUsingTable:gUnicodeHTMLEscapeMap
+ ofSize:sizeof(gUnicodeHTMLEscapeMap)
+ escapingUnicode:NO];
+} // gtm_stringByEscapingHTML
+
+- (NSString *)gtm_stringByEscapingForAsciiHTML {
+ return [self gtm_stringByEscapingHTMLUsingTable:gAsciiHTMLEscapeMap
+ ofSize:sizeof(gAsciiHTMLEscapeMap)
+ escapingUnicode:YES];
+} // gtm_stringByEscapingAsciiHTML
+
+- (NSString *)gtm_stringByUnescapingFromHTML {
+ NSRange range = NSMakeRange(0, [self length]);
+ NSRange subrange = [self rangeOfString:@"&" options:NSBackwardsSearch range:range];
+
+ // if no ampersands, we've got a quick way out
+ if (subrange.length == 0) return self;
+ NSMutableString *finalString = [NSMutableString stringWithString:self];
+ do {
+ NSRange semiColonRange = NSMakeRange(subrange.location, NSMaxRange(range) - subrange.location);
+ semiColonRange = [self rangeOfString:@";" options:0 range:semiColonRange];
+ range = NSMakeRange(0, subrange.location);
+ // if we don't find a semicolon in the range, we don't have a sequence
+ if (semiColonRange.location == NSNotFound) {
+ continue;
+ }
+ NSRange escapeRange = NSMakeRange(subrange.location, semiColonRange.location - subrange.location + 1);
+ NSString *escapeString = [self substringWithRange:escapeRange];
+ NSUInteger length = [escapeString length];
+ // a squence must be longer than 3 (&lt;) and less than 11 (&thetasym;)
+ if (length > 3 && length < 11) {
+ if ([escapeString characterAtIndex:1] == '#') {
+ unichar char2 = [escapeString characterAtIndex:2];
+ if (char2 == 'x' || char2 == 'X') {
+ // Hex escape squences &#xa3;
+ NSString *hexSequence = [escapeString substringWithRange:NSMakeRange(3, length - 4)];
+ NSScanner *scanner = [NSScanner scannerWithString:hexSequence];
+ unsigned value;
+ if ([scanner scanHexInt:&value] &&
+ value < USHRT_MAX &&
+ value > 0
+ && [scanner scanLocation] == length - 4) {
+ unichar uchar = value;
+ NSString *charString = [NSString stringWithCharacters:&uchar length:1];
+ [finalString replaceCharactersInRange:escapeRange withString:charString];
+ }
+
+ } else {
+ // Decimal Sequences &#123;
+ NSString *numberSequence = [escapeString substringWithRange:NSMakeRange(2, length - 3)];
+ NSScanner *scanner = [NSScanner scannerWithString:numberSequence];
+ int value;
+ if ([scanner scanInt:&value] &&
+ value < USHRT_MAX &&
+ value > 0
+ && [scanner scanLocation] == length - 3) {
+ unichar uchar = value;
+ NSString *charString = [NSString stringWithCharacters:&uchar length:1];
+ [finalString replaceCharactersInRange:escapeRange withString:charString];
+ }
+ }
+ } else {
+ // "standard" sequences
+ for (unsigned i = 0; i < sizeof(gAsciiHTMLEscapeMap) / sizeof(HTMLEscapeMap); ++i) {
+ if ([escapeString isEqualToString:gAsciiHTMLEscapeMap[i].escapeSequence]) {
+ [finalString replaceCharactersInRange:escapeRange withString:[NSString stringWithCharacters:&gAsciiHTMLEscapeMap[i].uchar length:1]];
+ break;
+ }
+ }
+ }
+ }
+ } while ((subrange = [self rangeOfString:@"&" options:NSBackwardsSearch range:range]).length != 0);
+ return finalString;
+} // gtm_stringByUnescapingHTML
+
+
+
+@end \ No newline at end of file