fix analyzing content summary.

This commit is contained in:
mattn
2011-10-26 21:21:14 +09:00
parent 8f8cd747b2
commit 47a4d1ffa3

View File

@@ -1,7 +1,7 @@
"============================================================================= "=============================================================================
" zencoding.vim " zencoding.vim
" Author: Yasuhiro Matsumoto <mattn.jp@gmail.com> " Author: Yasuhiro Matsumoto <mattn.jp@gmail.com>
" Last Change: 14-Oct-2011. " Last Change: 25-Oct-2011.
let s:save_cpo = &cpo let s:save_cpo = &cpo
set cpo&vim set cpo&vim
@@ -1057,11 +1057,15 @@ function! zencoding#anchorizeURL(flag)
return return
endif endif
let content = s:get_content_from_url(url) let mx = '.*<title[^>]*>\s*\zs\([^<]\+\)\ze\s*<\/title[^>]*>.*'
let content = substitute(content, '\n', '', 'g') let content = s:get_content_from_url(url, 0)
let content = substitute(content, '\n\s*\n', '\n', 'g') if len(matchstr(content, mx)) == 0
let head = strpart(content, 0, stridx(content, '</head>')) let content = s:get_content_from_url(url, 1)
let title = substitute(head, '.*<title[^>]*>\([^<]\+\)<\/title[^>]*>.*', '\1', 'g') endif
let content = substitute(content, '\r', '', 'g')
let content = substitute(content, '[\s\n]\+', ' ', 'g')
let content = substitute(content, '<!--.\{-}-->', '', 'g')
let title = matchstr(content, mx)
if a:flag == 0 if a:flag == 0
let a = s:zen_parseTag('<a>') let a = s:zen_parseTag('<a>')
@@ -1096,9 +1100,13 @@ endfunction
"============================================================================== "==============================================================================
" html utils " html utils
"============================================================================== "==============================================================================
function! s:get_content_from_url(url) function! s:get_content_from_url(url, utf8)
silent! new silent! new
silent! exec '0r!'.g:zencoding_curl_command.' "'.substitute(a:url, '#.*', '', '').'"' if a:utf8
silent! exec '0r ++enc=utf8 !'.g:zencoding_curl_command.' "'.substitute(a:url, '#.*', '', '').'"'
else
silent! exec '0r!'.g:zencoding_curl_command.' "'.substitute(a:url, '#.*', '', '').'"'
endif
let ret = join(getline(1, '$'), "\n") let ret = join(getline(1, '$'), "\n")
silent! bw! silent! bw!
return ret return ret
@@ -1109,7 +1117,6 @@ function! s:get_text_from_html(buf)
let threshold_per = 0.1 let threshold_per = 0.1
let buf = a:buf let buf = a:buf
let buf = substitute(buf, '<!--.\{-}-->', '', 'g')
let buf = strpart(buf, stridx(buf, '</head>')) let buf = strpart(buf, stridx(buf, '</head>'))
let buf = substitute(buf, '<style[^>]*>.\{-}</style>', '', 'g') let buf = substitute(buf, '<style[^>]*>.\{-}</style>', '', 'g')
let buf = substitute(buf, '<script[^>]*>.\{-}</script>', '', 'g') let buf = substitute(buf, '<script[^>]*>.\{-}</script>', '', 'g')
@@ -1131,8 +1138,8 @@ function! s:get_text_from_html(buf)
let str = substitute(str, '\s\+', ' ', 'g') let str = substitute(str, '\s\+', ' ', 'g')
let l = len(str) let l = len(str)
if l > threshold_len if l > threshold_len
let per = len(c) / l let per = (l+0.0) / len(c)
if max < l && per < threshold_per if max < l && per > threshold_per
let max = l let max = l
let res = str let res = str
endif endif