I want to remove all HTML tags from a string in Excel VBA.
For example:
before_text = "text1 <br> text2 <a href = 'www.data.com' id = 'data'>text3</a> text4"
after_text = RemoveTags(before_text)
Result:
after_text = "text1 text2 text3 text4"
I want to remove all HTML tags from a string in Excel VBA.
For example:
before_text = "text1 <br> text2 <a href = 'www.data.com' id = 'data'>text3</a> text4"
after_text = RemoveTags(before_text)
Result:
after_text = "text1 text2 text3 text4"
vbscript.regexp
Code:
Function RemoveHTML(text As String) As String
Dim regexObject As Object
Set regexObject = CreateObject("vbscript.regexp")
With regexObject
.Pattern = "<!*[^<>]*>" 'html tags and comments
.Global = True
.IgnoreCase = True
.MultiLine = True
End With
RemoveHTML = regexObject.Replace(text, "")
End Function
Building on @zhihar's reply, to make this strip all HTML from the selected cell you can iterate through the selection.
Function RemoveHTML(text As String) As String
Dim regexObject As Object
Set regexObject = CreateObject("vbscript.regexp")
With regexObject
.Pattern = "<!*[^<>]*>" 'html tags and comments
.Global = True
.IgnoreCase = True
.MultiLine = True
End With
RemoveHTML = regexObject.Replace(text, "")
End Function
Sub StripHtmlSelected()
For Each Cell In Selection
If Not Cell.HasFormula Then
Cell.Value = RemoveHTML(Cell.Value)
End If
Next Cell
End Sub