2

My html form has a few input text fields which can potentially get characters from Chinese, japanese, european, special characters like £, etc. So, in short, unicode characters.

To process these values at the server side (with php), can I assume that all browsers by default encode these characters in UTF-8 format at the time of form submission.

Or is there is way to tell the browser to always submit these characters as UTF8 encoded , so that we can use the utf8_decode to process these values ?

Thanks.

1
  • Always clean your data as much as possible, making it as web-friendly as possible. Don't underestimate this, and you will have total control over your data. Commented Feb 28, 2011 at 22:40

3 Answers 3

2

Set the character encoding for the form page before you output the HTML.

header('Content-Type: text/html; charset=utf-8');
Sign up to request clarification or add additional context in comments.

Comments

0

Excerpt from here

/**
 * @usage $var = $this->clean__makesafe_value( $var, array( "urldecode" ), true );
 *     OR $this->clean__makesafe_value( $var, array( "urldecode" ) );
 */
Class Input
{   
    /**
     * Makesafe
     *
     * @param   mixed     REFERENCE: Data to make safe
     * @param   string    KEY [used as parameter-2 in the callback function of array_walk()
     * @param   array     Additional functions to filter the value through, prior to cleaning
     * @return  mixed     VOID if $_output_flag = false; MIXED otherwise.
     */
    private function _clean__makesafe ( &$val, $key, $filters = array() )
    {

        if ( $val === '' )                                                                         // Literally empty string, integer 0 excluded
        {
            return true;
        }

        # Let's apply additional functions, if any, to clean further
        if ( isset( $filters ) and is_array( $filters ) and count( $filters ) )
        {
            foreach( $filters as $_filter )
            {
                if ( is_array( $_filter ) and is_object( $_filter[0] ) and method_exists( $_filter[0], $_filter[1] ) )
                {
                    $val = &$_filter[0]->$_filter[1]( $val );
                }
                elseif ( function_exists( $_filter ) )
                {
                    $val = $_filter( $val );
                }
                else
                {
                    throw new Exception ("Parameter-2 of Input::_clean__makesafe() must be a valid function/method callback!");
                }
            }
        }

        $val = trim( $val );
        // $val = $this->clean__stripslashes( $val );
        $val = str_replace( " " , " " , $val );

        $val = $this->clean__control_characters( $val );

        # Convert all carriage return combos
        $val = str_replace( array( '\r\n', '\n\r', '\r' ), "\n", $val );

        # Continue with cleaning...

        $val = str_replace( "&"             , "&"           , $val );
        $val = str_replace( "<!--"          , "&#060;&#033;--"  , $val );
        $val = str_replace( "-->"           , "--&#062;"        , $val );
        $val = preg_replace( "/<script/i"   , "&#060;script"    , $val );
        $val = str_replace( ">"             , "&gt;"            , $val );
        $val = str_replace( "<"             , "&lt;"            , $val );
        $val = str_replace( '"'             , "&quot;"          , $val );
        $val = str_replace( '\n'            , "<br />"          , $val );                          // Convert literal newlines
        $val = str_replace( '$'             , "&#36;"           , $val );
        $val = str_replace( "!"             , "&#33;"           , $val );
        $val = str_replace( "'"             , "&#39;"           , $val );                          // IMPORTANT: It helps to increase sql query safety.

        # Convert HTML entities into friendly versions of them
        $_list_of_html_entities__from = array( "&#160;","&#161;","&#162;","&#163;","&#164;","&#165;","&#166;","&#167;","&#168;","&#169;","&#170;","&#171;","&#172;","&#173;","&#174;","&#175;","&#176;","&#177;","&#178;","&#179;","&#180;","&#181;","&#182;","&#183;","&#184;","&#185;","&#186;","&#187;","&#188;","&#189;","&#190;","&#191;","&#192;","&#193;","&#194;","&#195;","&#196;","&#197;","&#198;","&#199;","&#200;","&#201;","&#202;","&#203;","&#204;","&#205;","&#206;","&#207;","&#208;","&#209;","&#210;","&#211;","&#212;","&#213;","&#214;","&#215;","&#216;","&#217;","&#218;","&#219;","&#220;","&#221;","&#222;","&#223;","&#224;","&#225;","&#226;","&#227;","&#228;","&#229;","&#230;","&#231;","&#232;","&#233;","&#234;","&#235;","&#236;","&#237;","&#238;","&#239;","&#240;","&#241;","&#242;","&#243;","&#244;","&#245;","&#246;","&#247;","&#248;","&#249;","&#250;","&#251;","&#252;","&#253;","&#254;","&#255;","&#402;","&#913;","&#914;","&#915;","&#916;","&#917;","&#918;","&#919;","&#920;","&#921;","&#922;","&#923;","&#924;","&#925;","&#926;","&#927;","&#928;","&#929;","&#931;","&#932;","&#933;","&#934;","&#935;","&#936;","&#937;","&#x03B1;","&#946;","&#947;","&#948;","&#949;","&#950;","&#951;","&#952;","&#953;","&#954;","&#955;","&#956;","&#957;","&#958;","&#959;","&#960;","&#961;","&#962;","&#963;","&#964;","&#965;","&#966;","&#967;","&#968;","&#969;","&#977;","&#978;","&#982;","&#8226;","&#8230;","&#8242;","&#8243;","&#8254;","&#8260;","&#8472;","&#8465;","&#8476;","&#8482;","&#8501;","&#8592;","&#8593;","&#8594;","&#8595;","&#8596;","&#8629;","&#8656;","&#8657;","&#8658;","&#8659;","&#8660;","&#8704;","&#8706;","&#8707;","&#8709;","&#8711;","&#8712;","&#8713;","&#8715;","&#8719;","&#8721;","&#8722;","&#8727;","&#8730;","&#8733;","&#8734;","&#8736;","&#8743;","&#8744;","&#8745;","&#8746;","&#8747;","&#8756;","&#8764;","&#8773;","&#8776;","&#8800;","&#8801;","&#8804;","&#8805;","&#8834;","&#8835;","&#8836;","&#8838;","&#8839;","&#8853;","&#8855;","&#8869;","&#8901;","&#8968;","&#8969;","&#8970;","&#8971;","&#9001;","&#9002;","&#9674;","&#9824;","&#9827;","&#9829;","&#9830;","&#34;","&#38;","&#60;","&#62;","&#338;","&#339;","&#352;","&#353;","&#376;","&#710;","&#732;","&#8194;","&#8195;","&#8201;","&#8204;","&#8205;","&#8206;","&#8207;","&#8211;","&#8212;","&#8216;","&#8217;","&#8218;","&#8220;","&#8221;","&#8222;","&#8224;","&#8225;","&#8240;","&#8249;","&#8250;","&#8364;" );
        $_list_of_html_entities__to   = array( "&nbsp;","&iexcl;","&cent;","&pound;","&curren;","&yen;","&brvbar;","&sect;","&uml;","&copy;","&ordf;","&laquo;","&not;","&shy;","&reg;","&macr;","&deg;","&plusmn;","&sup2;","&sup3;","&acute;","&micro;","&para;","&middot;","&cedil;","&sup1;","&ordm;","&raquo;","&frac14;","&frac12;","&frac34;","&iquest;","&Agrave;","&Aacute;","&Acirc;","&Atilde;","&Auml;","&Aring;","&AElig;","&Ccedil;","&Egrave;","&Eacute;","&Ecirc;","&Euml;","&Igrave;","&Iacute;","&Icirc;","&Iuml;","&ETH;","&Ntilde;","&Ograve;","&Oacute;","&Ocirc;","&Otilde;","&Ouml;","&times;","&Oslash;","&Ugrave;","&Uacute;","&Ucirc;","&Uuml;","&Yacute;","&THORN;","&szlig;","&agrave;","&aacute;","&acirc;","&atilde;","&auml;","&aring;","&aelig;","&ccedil;","&egrave;","&eacute;","&ecirc;","&euml;","&igrave;","&iacute;","&icirc;","&iuml;","&eth;","&ntilde;","&ograve;","&oacute;","&ocirc;","&otilde;","&ouml;","&divide;","&oslash;","&ugrave;","&uacute;","&ucirc;","&uuml;","&yacute;","&thorn;","&yuml;","&fnof;","&Alpha;","&Beta;","&Gamma;","&Delta;","&Epsilon;","&Zeta;","&Eta;","&Theta;","&Iota;","&Kappa;","&Lambda;","&Mu;","&Nu;","&Xi;","&Omicron;","&Pi;","&Rho;","&Sigma;","&Tau;","&Upsilon;","&Phi;","&Chi;","&Psi;","&Omega;","&alpha;","&beta;","&gamma;","&delta;","&epsilon;","&zeta;","&eta;","&theta;","&iota;","&kappa;","&lambda;","&mu;","&nu;","&xi;","&omicron;","&pi;","&rho;","&sigmaf;","&sigma;","&tau;","&upsilon;","&phi;","&chi;","&psi;","&omega;","&thetasym;","&upsih;","&piv;","&bull;","&hellip;","&prime;","&Prime;","&oline;","&frasl;","&weierp;","&image;","&real;","&trade;","&alefsym;","&larr;","&uarr;","&rarr;","&darr;","&harr;","&crarr;","&lArr;","&uArr;","&rArr;","&dArr;","&hArr;","&forall;","&part;","&exist;","&empty;","&nabla;","&isin;","&notin;","&ni;","&prod;","&sum;","&minus;","&lowast;","&radic;","&prop;","&infin;","&ang;","&and;","&or;","&cap;","&cup;","&int;","&there4;","&sim;","&cong;","&asymp;","&ne;","&equiv;","&le;","&ge;","&sub;","&sup;","&nsub;","&sube;","&supe;","&oplus;","&otimes;","&perp;","&sdot;","&lceil;","&rceil;","&lfloor;","&rfloor;","&lang;","&rang;","&loz;","&spades;","&clubs;","&hearts;","&diams;","&quot;","&amp;","&lt;","&gt;","&OElig;","&oelig;","&Scaron;","&scaron;","&Yuml;","&circ;","&tilde;","&ensp;","&emsp;","&thinsp;","&zwnj;","&zwj;","&lrm;","&rlm;","&ndash;","&mdash;","&lsquo;","&rsquo;","&sbquo;","&ldquo;","&rdquo;","&bdquo;","&dagger;","&Dagger;","&permil;","&lsaquo;","&rsaquo;","&euro;");
        $val = str_replace( $_list_of_html_entities__from , $_list_of_html_entities__to , $val );

        # Ensure unicode chars are OK
        $val = preg_replace("/&amp;(#[0-9]+|[a-z]+);/s", "&\\1;", $val );

        # Try and fix up HTML entities with missing ;
        $val = preg_replace( "/&#(\d+?)([^\d;])/i", "&#\\1;\\2", $val );

        return true;
    }

    /**
     * WRAPPER for clean__makesafe(): Clean's incoming values (usually _GET, _POST)
     *
     * @param    mixed    REF: Mixed value to parse
     * @param    array    Additional functions to filter the value through, prior to cleaning
     * @param    boolean  Whether to return the result or not, defaults to FALSE
     * @return   mixed    MIXED Cleaned value if output_flag is set on; BOOLEAN otherwise
     */
    public function clean__makesafe_value ( &$val, $filters = array(), $do_output = false )
    {
        # If its an array, 'walk-through-it' recursively with Input::_clean__makesafe() ...
        if ( is_array( $val ) )
        {
            array_walk_recursive( $val, array( $this, "_clean__makesafe" ), $filters );
        }
        # ... otherwise, just apply Input::clean__makesafe() to it.
        else
        {
            $this->_clean__makesafe( $val, null, $filters );
        }

        # If explicit return is requested, comply - otherwise go Boolean.
        if ( $do_output )
        {
            return $val;
        }
        return true;
    }
}

Comments

-1

For submit data in your Content-type encoding
XML HTTP Requests can send in UTF-8 charset

The way submit form in UTF-8 send it by XML HTTP Request or use UTF-8 charset in your site

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.