June 9, 2011

Minify or Compress HTML Output with CodeIgniter

Recently i worked on improving page load using Google's page speed tool. One of the suggestion includes minifying HTML , page speed indicated around 10 to 30 % improvement using minifyer.

So i decided to include it in our project which is built on CodeIgniter framework.

In CodeIgniter we can use hooks so that before HTML output is sent to browser it can be minifyed. here's how-

1) Enable hooks if not done previously

- Set $config['enable_hooks'] = TRUE; in /system/application/config/config.php.

2)Configure the hook function in /system/application/config/hooks.php. as below-


$hook['display_override'] = array(
                                'class'    => 'Minifyhtml',
                                'function' => 'minify',
                                'filename' => 'Minifyhtml.php',
                                'filepath' => 'hooks'
                                );

3) Create Minifyhtml.php in /system/application/hooks/ folder with following content. Following HTML minifying code is extracted from minifyer http://code.google.com/p/minify/ which is better than all available other as it wont break your existing HTML.

class Minifyhtml {
   
    /**
     * "Minify" an HTML page
     *
     * @param string $html
     *
     * @param array $options
     *
     * 'cssMinifier' : (optional) callback function to process content of STYLE  elements.
     *
     * 'jsMinifier' : (optional) callback function to process content of SCRIPT elements. Note: the type attribute
     *                    is ignored.
     * 'xhtml' : (optional boolean) should content be treated as XHTML1.0? If unset, minify will sniff for an
     *                  XHTML doctype.
     *
     * @return string
     */
    function minify() {
       
        $CI =& get_instance();
        $buffer = $CI->output->get_output();
   
        $options = array();
        $min = new Minifyhtml();
        $min->pre_process($buffer, $options);
        return $min->process();
    }
   
   
    /**
     * Create a minifier object
     *
     * @param string $html
     *
     * @param array $options
     *
     * 'cssMinifier' : (optional) callback function to process content of STYLE elements.
     *
     *
     * 'jsMinifier' : (optional) callback function to process content of SCRIPT elements. Note: the type attribute
     *                       is ignored.
     *
     * 'xhtml' : (optional boolean) should content be treated as XHTML1.0? If unset, minify will sniff for an
     *                      XHTML doctype.
     * @return null
     */
    public function pre_process($html, $options = array())
    {
        $this->_html = str_replace("\r\n", "\n", trim($html));
        if (isset($options['xhtml'])) {
            $this->_isXhtml = (bool)$options['xhtml'];
        }
        if (isset($options['cssMinifier'])) {
            $this->_cssMinifier = $options['cssMinifier'];
        }
        if (isset($options['jsMinifier'])) {
            $this->_jsMinifier = $options['jsMinifier'];
        }
    }
   
   
    /**
     * Minify the markeup
     *
     * @return string
     */
    public function process()
    {       
        if ($this->_isXhtml === null) {
            $this->_isXhtml = (false !== strpos($this->_html, '
        }
       
        $this->_replacementHash = 'MINIFYHTML' . md5($_SERVER['REQUEST_TIME']);
        $this->_placeholders = array();
       
        // replace SCRIPTs (and minify) with placeholders
        $this->_html = preg_replace_callback(
                       '/(\\s*)(]*?>)([\\s\\S]*?)<\\/script>(\\s*)/i'             
                       ,array($this, '_removeScriptCB')             
                      ,$this->_html);                
       
           // replace STYLEs (and minify) with placeholders         
          $this->_html = preg_replace_callback(            
                      '/\\s*(]*?>)([\\s\\S]*?)<\\/style>\\s*/i'            
                      ,array($this, '_removeStyleCB')             
                      ,$this->_html);                 

          // remove HTML comments (not containing IE conditional comments).        
          $this->_html = preg_replace_callback(            
                       '//'             
                      ,array($this, '_commentCB')             
                     ,$this->_html);                
         
          // replace PREs with placeholders         

          $this->_html = preg_replace_callback(
                       '/\\s*(]*?>[\\s\\S]*?<\\/pre>)\\s*/i'             
                       ,array($this, '_removePreCB')            
                       ,$this->_html);                

          // replace TEXTAREAs with placeholders         

         $this->_html = preg_replace_callback(            
                       '/\\s*(]*?>[\\s\\S]*?<\\/textarea>)\\s*/i'             
                       ,array($this, '_removeTextareaCB')             
                       ,$this->_html);                 

          // trim each line.         
          // @todo take into account attribute values that span multiple lines.         

          $this->_html = preg_replace('/^\\s+|\\s+$/m', '', $this->_html);                
          // remove ws around block/undisplayed elements        
          $this->_html = preg_replace(
                          '/\\s+(<\\?(?:area|base(?:font)?|blockquote|body'           
                         .'|caption|center|cite|col(?:group)?|dd|dir|div|dl|dt|fieldset|form'
                        .'|frame(?:set)?|h[1-6]|head|hr|html|legend|li|link|map|menu|meta'
                        .'|ol|opt(?:group|ion)|p|param|t(?:able|body|head|d|h||r|foot|itle)'    
                        .'|ul)\\b[^>]*>)/i', '$1', $this->_html);   
             
           // remove ws outside of all elements         
            $this->_html = preg_replace_callback('/>([^<]+),array($this, '_outsideTagCB')              
                                                 ,$this->_html);                

            // use newlines before 1st attribute in open tags (to limit line lengths)         

            $this->_html = preg_replace('/(<[a-z\\-]+)\\s+([^>]+>)/i', "$1\n$2", $this->_html);                 
           // fill placeholders        
            $this->_html = str_replace(array_keys($this->_placeholders)          
                                                           ,array_values($this->_placeholders) ,$this->_html);                         $CI =& get_instance();    
            $CI->output->set_output($this->_html);      
            $CI->output->_display();     
}         

protected function _commentCB($m)     {
        return (0 === strpos($m[1], '[') || false !== strpos($m[1], '            : '';     }         

protected function _reservePlace($content)     {         
      $placeholder = '%' . $this->_replacementHash . count($this->_placeholders) . '%';         $this->_placeholders[$placeholder] = $content;         return $placeholder;     }    

protected $_isXhtml = null;     
protected $_replacementHash = null;     
protected $_placeholders = array();     
protected $_cssMinifier = null;    
protected $_jsMinifier = null;    

protected function _outsideTagCB($m)     {  
       return '>' . preg_replace('/^\\s+|\\s+$/', ' ', $m[1]) . '<';     }     
protected function _removePreCB($m)     {      return $this->_reservePlace($m[1]);     }        
protected function _removeTextareaCB($m)     {     return $this->_reservePlace($m[1]);     }    
protected function _removeStyleCB($m)     {      
               $openStyle = $m[1];        
               $css = $m[2];        
               // remove HTML comments       
              $css = preg_replace('/(?:^\\s*\\s*$)/', '', $css);                 
              // remove CDATA section markers         
              $css = $this->_removeCdata($css);                
              // minify         
              $minifier = $this->_cssMinifier ? $this->_cssMinifier             : 'trim';         
              $css = call_user_func($minifier, $css);                
              
               return $this->_reservePlace($this->_needsCdata($css)  
                           ? "{$openStyle}/*<![CDATA[*/{$css}/*]]>*/&lgt;/style>
                            : "{$openStyle}{$css}</style>" );    

}    

protected function _removeScriptCB($m)     {         
               $openScript = $m[2];         
               $js = $m[3];                
                // whitespace surrounding? preserve at least one space        
               $ws1 = ($m[1] === '') ? '' : ' ';         
               $ws2 = ($m[4] === '') ? '' : ' ';        

                // remove HTML comments (and ending "//" if present)         

                $js = preg_replace('/(?:^\\s*\\s*$)/', '', $js);                     

                // remove CDATA section markers        
                $js = $this->_removeCdata($js);                
               // minify         
               $minifier = $this->_jsMinifier ? $this->_jsMinifier : 'trim';        
               $js = call_user_func($minifier, $js);                

               return $this->_reservePlace($this->_needsCdata($js)             
                                  ? "{$ws1}{$openScript}/*<![CDATA[*/{$js}/*]]>*/</script>{$ws2}"
                                  : "{$ws1}{$openScript}{$js}{$ws2}");  
}    

protected function _removeCdata($str)     {         return (false !== strpos($str, ''), '', $str)             : $str;     }
  protected function _needsCdata($str)     {   
                   return ($this->_isXhtml && preg_match('/(?:[<&]|\\-\\-|\\]\\]>)/', $str));     } 
}