check encoding more smart.

This commit is contained in:
mattn
2012-06-13 18:44:45 +09:00
parent 05688356b3
commit bddc3f9ff5
2 changed files with 7 additions and 14 deletions

View File

@@ -492,10 +492,7 @@ function! zencoding#anchorizeURL(flag)
endif
let mx = '.*<title[^>]*>\s*\zs\([^<]\+\)\ze\s*<\/title[^>]*>.*'
let content = zencoding#util#getContentFromURL(url, 0)
if len(matchstr(content, mx)) == 0
let content = zencoding#util#getContentFromURL(url, 1)
endif
let content = zencoding#util#getContentFromURL(url)
let content = substitute(content, '\r', '', 'g')
let content = substitute(content, '[ \n]\+', ' ', 'g')
let content = substitute(content, '<!--.\{-}-->', '', 'g')

View File

@@ -144,16 +144,12 @@ endfunction
"==============================================================================
" html utils
"==============================================================================
function! zencoding#util#getContentFromURL(url, utf8)
silent! new
if a:utf8
silent! exec '0r ++enc=utf8 !'.g:zencoding_curl_command.' "'.substitute(a:url, '#.*', '', '').'"'
else
silent! exec '0r!'.g:zencoding_curl_command.' "'.substitute(a:url, '#.*', '', '').'"'
endif
let ret = join(getline(1, '$'), "\n")
silent! bw!
return ret
function! zencoding#util#getContentFromURL(url)
let res = system(printf("%s %s", g:zencoding_curl_command, shellescape(substitute(a:url, '#.*', '', ''))))
let s1 = len(split(res, '?'))
let utf8 = iconv(res, 'utf-8', &encoding)
let s2 = len(split(utf8, '?'))
return s2 > s1 * 2 ? utf8 : res
endfunction
function! zencoding#util#getTextFromHTML(buf)