admin管理员组文章数量:1130349
I got a html String in javascript and using regex I want to remove id, style and class attributes in html tags, for example I have:
New York City.<div style="padding:20px" id="upp" class="upper"><div style="background:#F2F2F2; color:black; font-size:90%; padding:10px 10px; width:500px;">This message is.</div></div>
I want this String to become:
New York City.<div><div>This message is.</div></div>
I got a html String in javascript and using regex I want to remove id, style and class attributes in html tags, for example I have:
New York City.<div style="padding:20px" id="upp" class="upper"><div style="background:#F2F2F2; color:black; font-size:90%; padding:10px 10px; width:500px;">This message is.</div></div>
I want this String to become:
New York City.<div><div>This message is.</div></div>
Share
Improve this question
edited Sep 10, 2012 at 22:24
Jimmy Page
asked Sep 10, 2012 at 22:22
Jimmy PageJimmy Page
3431 gold badge6 silver badges12 bronze badges
3
|
8 Answers
Reset to default 11Instead of parsing the HTML using regular expressions, which is a bad idea, you could take advantage of the DOM functionality that is available in all browsers. We need to be able to walk the DOM tree first:
var walk_the_DOM = function walk(node, func) {
func(node);
node = node.firstChild;
while (node) {
walk(node, func);
node = node.nextSibling;
}
};
Now parse the string and manipulate the DOM:
var wrapper= document.createElement('div');
wrapper.innerHTML= '<!-- your HTML here -->';
walk_the_DOM(wrapper.firstChild, function(element) {
if(element.removeAttribute) {
element.removeAttribute('id');
element.removeAttribute('style');
element.removeAttribute('class');
}
});
result = wrapper.innerHTML;
See also this JSFiddle.
If you are willing to remove everything but the div tag names-
string=string.replace(/<(div)[^>]+>/ig,'<$1>');
This will return <DIV> if the html is upper Case.
If you just want to remove the attributes, then regex is the wrong tool. I'd suggest, instead:
function stripAttributes(elem){
if (!elem) {
return false;
}
else {
var attrs = elem.attributes;
while (attrs.length) {
elem.removeAttribute(attrs[0].name);
}
}
}
var div = document.getElementById('test');
stripAttributes(div);
JS Fiddle demo.
i used this
var html = 'New York City.<div style="padding:20px" id="upp"
class="upper"><div style="background:#F2F2F2; color:black; font-size:90%; padding:10px 10px; width:500px;">This message is.</div></div>';
function clear_attr(str,attrs){
var reg2 = /\s*(\w+)=\"[^\"]+\"/gm;
var reg = /<\s*(\w+).*?>/gm;
str = str.replace(reg,function(match, i) {
var r_ = match.replace(reg2,function(match_, i) {
var reg2_ = /\s*(\w+)=\"[^\"]+\"/gm;
var m = reg2_.exec(match_);
if(m!=null){
if(attrs.indexOf(m[1])>=0){
return match_;
}
}
return '';
});
return r_;
});
return str;
}
clear_attr(html,[]);
Use regular expression. That is fast (in production time) and easy (in development time).
htmlCode = htmlCode.replace(/<([^ >]+)[^>]*>/ig,'<$1>');
Trying to parse HTML with regexes will cause problems. This answer may be helpful in explaining them. If you are using jQuery, you may be able to do something like this:
var transformedHtml = $(html).find("*").removeAttr("id").removeAttr("style").removeAttr("class").outerHTML()
For this to work, you need to be using the outerHTML plugin described here.
If you don't want to use jQuery, it will be trickier. These question may have some helpful answers as to how to convert the string to a collection of DOM elements: Converting HTML string into DOM elements?, Creating a new DOM element from an HTML string using built-in DOM methods or prototype. You may be able to loop through the elements and remove the attributes using the built-in removeAttr function. I don't have the time or motivation to figure out all the details for you.
A plain script solution would be something like:
function removeProperties(markup) {
var div = document.createElement('div');
div.innerHTML = markup;
var el, els = div.getElementsByTagName('*');
for (var i=0, iLen=els.length; i<iLen; i++) {
el = els[i];
el.id = '';
el.style = '';
el.className = '';
}
// now add elements to the DOM
while (div.firstChild) {
// someElement.appendChild(div.firstChild);
}
}
A more general solution would get the property names as extra arguments, or say a space separated string, then iterate over the names to remove them.
I don't know about RegEx, but I sure as hell know about jQuery.
Convert the given HTML string into a DOM element, parse it, and return its contents.
function cleanStyles(html){
var temp = $(document.createElement('div'));
temp.html(html);
temp.find('*').removeAttr('style');
return temp.html();
}
I got a html String in javascript and using regex I want to remove id, style and class attributes in html tags, for example I have:
New York City.<div style="padding:20px" id="upp" class="upper"><div style="background:#F2F2F2; color:black; font-size:90%; padding:10px 10px; width:500px;">This message is.</div></div>
I want this String to become:
New York City.<div><div>This message is.</div></div>
I got a html String in javascript and using regex I want to remove id, style and class attributes in html tags, for example I have:
New York City.<div style="padding:20px" id="upp" class="upper"><div style="background:#F2F2F2; color:black; font-size:90%; padding:10px 10px; width:500px;">This message is.</div></div>
I want this String to become:
New York City.<div><div>This message is.</div></div>
Share
Improve this question
edited Sep 10, 2012 at 22:24
Jimmy Page
asked Sep 10, 2012 at 22:22
Jimmy PageJimmy Page
3431 gold badge6 silver badges12 bronze badges
3
- 3 /me is casting a link to the legendary don't-parse-html-with-regex answer... – zerkms Commented Sep 10, 2012 at 22:26
-
How about
removeAttribute(), maybe? – David Thomas Commented Sep 10, 2012 at 22:26 - 1 Convert it to a DOM element and use the appropriate tools to manipulate it. That's a far more stable solution. – You Commented Sep 10, 2012 at 22:31
8 Answers
Reset to default 11Instead of parsing the HTML using regular expressions, which is a bad idea, you could take advantage of the DOM functionality that is available in all browsers. We need to be able to walk the DOM tree first:
var walk_the_DOM = function walk(node, func) {
func(node);
node = node.firstChild;
while (node) {
walk(node, func);
node = node.nextSibling;
}
};
Now parse the string and manipulate the DOM:
var wrapper= document.createElement('div');
wrapper.innerHTML= '<!-- your HTML here -->';
walk_the_DOM(wrapper.firstChild, function(element) {
if(element.removeAttribute) {
element.removeAttribute('id');
element.removeAttribute('style');
element.removeAttribute('class');
}
});
result = wrapper.innerHTML;
See also this JSFiddle.
If you are willing to remove everything but the div tag names-
string=string.replace(/<(div)[^>]+>/ig,'<$1>');
This will return <DIV> if the html is upper Case.
If you just want to remove the attributes, then regex is the wrong tool. I'd suggest, instead:
function stripAttributes(elem){
if (!elem) {
return false;
}
else {
var attrs = elem.attributes;
while (attrs.length) {
elem.removeAttribute(attrs[0].name);
}
}
}
var div = document.getElementById('test');
stripAttributes(div);
JS Fiddle demo.
i used this
var html = 'New York City.<div style="padding:20px" id="upp"
class="upper"><div style="background:#F2F2F2; color:black; font-size:90%; padding:10px 10px; width:500px;">This message is.</div></div>';
function clear_attr(str,attrs){
var reg2 = /\s*(\w+)=\"[^\"]+\"/gm;
var reg = /<\s*(\w+).*?>/gm;
str = str.replace(reg,function(match, i) {
var r_ = match.replace(reg2,function(match_, i) {
var reg2_ = /\s*(\w+)=\"[^\"]+\"/gm;
var m = reg2_.exec(match_);
if(m!=null){
if(attrs.indexOf(m[1])>=0){
return match_;
}
}
return '';
});
return r_;
});
return str;
}
clear_attr(html,[]);
Use regular expression. That is fast (in production time) and easy (in development time).
htmlCode = htmlCode.replace(/<([^ >]+)[^>]*>/ig,'<$1>');
Trying to parse HTML with regexes will cause problems. This answer may be helpful in explaining them. If you are using jQuery, you may be able to do something like this:
var transformedHtml = $(html).find("*").removeAttr("id").removeAttr("style").removeAttr("class").outerHTML()
For this to work, you need to be using the outerHTML plugin described here.
If you don't want to use jQuery, it will be trickier. These question may have some helpful answers as to how to convert the string to a collection of DOM elements: Converting HTML string into DOM elements?, Creating a new DOM element from an HTML string using built-in DOM methods or prototype. You may be able to loop through the elements and remove the attributes using the built-in removeAttr function. I don't have the time or motivation to figure out all the details for you.
A plain script solution would be something like:
function removeProperties(markup) {
var div = document.createElement('div');
div.innerHTML = markup;
var el, els = div.getElementsByTagName('*');
for (var i=0, iLen=els.length; i<iLen; i++) {
el = els[i];
el.id = '';
el.style = '';
el.className = '';
}
// now add elements to the DOM
while (div.firstChild) {
// someElement.appendChild(div.firstChild);
}
}
A more general solution would get the property names as extra arguments, or say a space separated string, then iterate over the names to remove them.
I don't know about RegEx, but I sure as hell know about jQuery.
Convert the given HTML string into a DOM element, parse it, and return its contents.
function cleanStyles(html){
var temp = $(document.createElement('div'));
temp.html(html);
temp.find('*').removeAttr('style');
return temp.html();
}
本文标签:
版权声明:本文标题:javascript - A regex to remove id, style, class attributes from HTML tags in JS - Stack Overflow 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:https://it.en369.cn/questions/1738645352a1590977.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。


removeAttribute(), maybe? – David Thomas Commented Sep 10, 2012 at 22:26