word中复制到编辑器时,替换冗余HTML代码,清理HTML格式,这是最常见的问题,其中方法之一。
js函数如下:
function cWord(html)
{
html = REReplaceNocase(html,’<o:p>\s*<\/o:p>’,’’,’all’);
html = REReplaceNocase(html,’<o:p>.*?<\/o:p>’,’ ’,’all’);
html = REReplaceNocase(html,’\s*mso-[^:]+:[^;"]+;?’,’’,’all’);
html = REReplaceNocase(html,’\s*MARGIN: 0cm 0cm 0pt\s*;’,’’,’all’);
html = REReplaceNocase(html,’\s*MARGIN: 0cm 0cm 0pt\s*"’,’\"’,’all’);
html = REReplaceNocase(html,’\s*TEXT-INDENT: 0cm\s*;’,’’,’all’);
html = REReplaceNocase(html,’\s*TEXT-INDENT: 0cm\s*"’,’\"’,’all’);
html = REReplaceNocase(html,’\s*TEXT-ALIGN: [^\s;]+;?"’,’\"’,’all’);
html = REReplaceNocase(html,’\s*PAGE-BREAK-BEFORE: [^\s;]+;?"’,’\"’,’all’);
html = REReplaceNocase(html,’\s*FONT-VARIANT: [^\s;]+;?"’,’\"’,’all’);
html = REReplaceNocase(html,’\s*tab-stops:[^;"]*;?’,’’,’all’);
html = REReplaceNocase(html,’\s*tab-stops:[^"]*’,’’,’all’);
html = REReplaceNocase(html,’\s*face="[^"]*"’,’’,’all’);
html = REReplaceNocase(html,’\s*face=[^ >]*’,’’,’all’);
html = REReplaceNocase(html,’\s*FONT-FAMILY:[^;"]*;?’,’’,’all’);
html = REReplaceNocase(html,’<(\w[^>]*) class=([^ |>]*)([^>]*)’,’<\1’,’all’);
html = REReplaceNocase(html,’<(\w[^>]*) style="([^\"]*)"([^>]*)’,’<\1’,’all’);
html = REReplaceNocase(html,’\s*style="\s*"’,’’,’all’);
html = REReplaceNocase(html,’<SPAN\s*[^>]*>\s* \s*<\/SPAN>’,’ ’,’all’);
html = REReplaceNocase(html,’<SPAN\s*[^>]*><\/SPAN>’,’’,’all’);
html = REReplaceNocase(html,’<(\w[^>]*) lang=([^ |>]*)([^>]*)’,’<\1’,’all’);
html = REReplaceNocase(html,’<SPAN\s*>(.*?)<\/SPAN>’,’\1’,’all’);
html = REReplaceNocase(html,’<FONT\s*>(.*?)<\/FONT>’,’\1’,’all’);
html = REReplaceNocase(html,’<\\?\?xml[^>]*>’,’’,’all’);
html = REReplaceNocase(html,’<\/?\w+:[^>]*>’,’’,’all’);
html = REReplaceNocase(html,’<H\d>\s*<\/H\d>’,’’,’all’);
html = REReplaceNocase(html,’<H1([^>]*)>’,’<div\1><b><font size="6">’,’all’);
html = REReplaceNocase(html,’<H2([^>]*)>’,’<div\1><b><font size="5">’,’all’);
html = REReplaceNocase(html,’<H3([^>]*)>’,’<div\1><b><font size="4">’,’all’);
html = REReplaceNocase(html,’<H4([^>]*)>’,’<div\1><b><font size="3">’,’all’);
html = REReplaceNocase(html,’<H5([^>]*)>’,’<div\1><b><font size="2">’,’all’);
html = REReplaceNocase(html,’<H6([^>]*)>’,’<div\1><b><font size="1">’,’all’);
html = REReplaceNocase(html,’<\/H\d>’,’</font></b>’,’all’);
html = REReplaceNocase(html,’<(U|I|STRIKE)> <\/\1>’,’ ’,’all’);
html = REReplaceNocase(html,’<([^\s>]+)[^>]*>\s*<\/\1>’,’’,’all’);
html = REReplaceNocase(html,’<([^\s>]+)[^>]*>\s*<\/\1>’,’’,’all’);
html = REReplaceNocase(html,’<([^\s>]+)[^>]*>\s*<\/\1>’,’’,’all’);
html = REReplaceNocase(html,’(<P)([^>]*>.*?)(<\/P>)’,’<div\2’,’all’);
return html;
}
该方法简洁,可根据自己需要进行增删。