We better use DOM to parse HTML content than Regex. For example we have a content like this.
- <p><img class="aligncenter size-full wp-image-3172" src="//wordpress.org/news/files/2014/04/theme1.jpg" alt="theme" width="1003" height="558">
- <p class="wp-caption-text">Wordpress Theme</p><br>
- Looking for a new theme should be easy and fun. Lose yourself in the boundless supply of free WordPress.org themes with the beautiful new theme browser.</p>
We want to strip out the p tag with class=”wp-caption-text” and the content (“WordPress Theme”). The tag is right below the the image. I made a function to accomplish the task. Here is the code.
- <?php
- function remove_content_in_tag($content,$tag,$attribute){
- $doc=new DOMDocument();
- libxml_use_internal_errors(true);
- $doc->loadHTML($content);
- $xpath=new DOMXPath($doc);
- $nlist=$xpath->query("//".$tag);
- if($attribute != "")
- $nlist=$xpath->query("//".$tag."[@".$attribute."]");
-
- for($i=0;$i<$nlist->length;$i++){
- $node=$nlist->item($i);
- $node->parentNode->removeChild($node);
- }
-
- $c_modified=$doc->saveHTML();
- return $c_modified;
- }
- ?>
To call the function and to remove the tags, You can use it like this.
- <?php
- echo remove_content_in_tag($content,"p","class='wp-caption-text'");
- ?>
Here is the complete code.
- <?php
- $content = <<<EOF
- <p><img class="aligncenter size-full wp-image-3172" src="//wordpress.org/news/files/2014/04/theme1.jpg" alt="theme" width="1003" height="558"><p class="wp-caption-text">Wordpress Theme</p><br>
- Looking for a new theme should be easy and fun. Lose yourself in the boundless supply of free WordPress.org themes with the beautiful new theme browser.</p>
- EOF;
-
- //echo $content;
-
- echo remove_content_in_tag($content,"p","class='wp-caption-text'");
- //echo remove_content_in_tag($content,"p","");
- //echo remove_content_in_tag($content,"img","");
-
- function remove_content_in_tag($content,$tag,$attribute){
- $doc=new DOMDocument();
- libxml_use_internal_errors(true);
- $doc->loadHTML($content);
- $xpath=new DOMXPath($doc);
- $nlist=$xpath->query("//".$tag);
- if($attribute != "")
- $nlist=$xpath->query("//".$tag."[@".$attribute."]");
-
- for($i=0;$i<$nlist->length;$i++){
- $node=$nlist->item($i);
- $node->parentNode->removeChild($node);
- }
-
- $c_modified=$doc->saveHTML();
- return $c_modified;
- }
- ?>