ios - Detect HTML encoding when NSURLResponse returns nil for textEncodingName -
i'm loading website html using call -
nsmutableurlrequest *request = [nsmutableurlrequest requestwithurl:url]; [request setvalue:@"utf-8" forhttpheaderfield:@"accept-encoding"]; [request setvalue:@"text/html" forhttpheaderfield:@"accept"]; [nsurlconnection sendasynchronousrequest:request queue:[nsoperationqueue currentqueue] completionhandler:^(nsurlresponse *response, nsdata *data, nserror *error) { ... }
and then, convert nsdata nsstring, need know encoding, call -
nsstring *textencoding = [response textencodingname];
from code block, returns nil on websites won't specify "content-encoding" header field.
if don't know encoding, [[nsstring alloc] initwithdata:data encoding:responseencoding]
won't give me readable html.
how can detect right encoding websites don't send "content-encoding" header field?
it possible try different encodings , see 1 results readable text -
static int encodingpriority[] = { nsutf8stringencoding, nsasciistringencoding, nsisolatin1stringencoding, nsisolatin2stringencoding, nsunicodestringencoding, nswindowscp1251stringencoding, nswindowscp1252stringencoding, nswindowscp1253stringencoding, nswindowscp1254stringencoding, nswindowscp1250stringencoding, nsnextstepstringencoding, nsjapaneseeucstringencoding, nsnonlossyasciistringencoding, nsshiftjisstringencoding, /* kcfstringencodingdosjapanese */ nsiso2022jpstringencoding, /* iso 2022 japanese encoding e-mail */ nsmacosromanstringencoding, nsutf16bigendianstringencoding, nsutf16littleendianstringencoding, nsutf32stringencoding, nsutf32bigendianstringencoding, nsutf32littleendianstringencoding }; #define required_html_string @"<html" - (nsstring *)htmlstringforunknownencodingdata:(nsdata *)data detectedencoding:(nsstringencoding *)detectedencoding { nsstringencoding encoding; nsstring *html; (int = 0; < sizeof(encodingpriority); i++) { encoding = encodingpriority[i]; // try encoding html = [[nsstring alloc] initwithdata:data encoding:encoding]; // need find text, because bad encoding return unreadable text if (html && [html rangeofstring:required_html_string options:nscaseinsensitivesearch].location != nsnotfound) { *detectedencoding = encoding; return html; } } return nil; }
then, detect encoding html in nsdata using, call -
nsstringencoding encoding; html = [self htmlstringforunknownencodingdata:data detectedencoding:&encoding]; if (html) nslog("encoding detected!"); else nslog("no encoding detected");
Comments
Post a Comment