Return to Answer

Commonmark migration

Source Link

edited Jun 10, 2020 at 13:24

Community Bot

Native parser took 26220 ms.

Custom parser took 6471 ms.

Performance gain was 305.19%

Made changes to remove the need for a separate 'fraction' part as suggested.

Source Link

edited Aug 3, 2018 at 12:33

Alain

/// <summary>High performance double parser with rudimentary flexibility.</summary>
/// <returns>Returns true only if we can be certain we parsed the string correctly.</returns>
/// <remarks>Does not support thousand separators or whitespace.</remarks>
/// <remarks>Supports all culture-specific symbols specified by the NumberFormatInfo of the
/// <see cref="CultureInfo.CurrentCulture"/> at the time this static class is instantiated.
/// So long as all culture symbols are a single character in length.
/// TODO: In theory, this class could be made instantiable, take the culture as an argument,
///       and support changing the culture at runtime in case the file the user is uploading
///       was generated on a machine with different culture settings.</remarks>
/// <remarks>Supports leading negative signs and positive signs, scientific notation,
/// as well as Infinity, Negative Infinity, and NaN, string representations.</remarks>
/// <remarks>A string containing only a negative sign (usually "-") intentionally returns a
/// value of zero. This is because it's a common representation of 0 in accounting.</remarks>
public static bool FastTryParseDouble(string input, out double result)
{
    int length = input.Length;
    if (length <= 0)
    {
        result = Double.NaN;
        return false;
    }
    double sign = 1d;
    int currentIndex = 0;
    char nextChar = input[0];

    /**************** Sign (+/-) and Special Case String Representations *****************/
    // Handle all cases where the string does not start with a numeric character
    if (nextChar < '0' || nextChar > '9')
    {
        // Non-numeric 1-character strings must match one of the supported special cases.
        if (length == 1)
            return CheckForSpecialCaseDoubleStrings(input, out result);
        // For anything more than one character, this should be a sign character.
        if (nextChar == CharNegative)
            sign = -1d;
        // The very next character may also be the decimal separator.
        else if (nextChar == CharDecimalSeparator)
        {
            // In this case, we treat the integer part as 0 and skip to the fractional part.
            result = 0d;
            goto SkipIntegerPart;
        }
        // Finally, unless it was a '+' sign, input must match one of a set of special cases.
        else if (nextChar != CharPositive)
            return CheckForSpecialCaseDoubleStrings(input, out result);

        // Once the sign is consumed, advance to the next character for further parsing
        nextChar = input[unchecked(++currentIndex)];
        // We must once more check whether the character is numeric before proceeding.
        if (nextChar < '0' || nextChar > '9')
        {
            // If not numeric, at this point, the character can only be a decimal separator
            // (as in "-.123" or "+.123"), or else it must be part of a special case string
            // (as in "-∞"). So check for those.
            if (nextChar != CharDecimalSeparator)
                return CheckForSpecialCaseDoubleStrings(input, out result);
            result = 0d;
            goto SkipIntegerPart;
        }
    }

    /********************************** "Integer Part" ***********************************/
    // Treat all subsequent numeric characters as the "integer part" of the result.
    // Since we've already checked that the next character is numeric,
    // We can save 2 ops by initializing the result directly.
    unchecked
    {
        // Since we've already checked that the next character is numeric,
        // We can save 2 ops by initializing the result directly.
        result = nextChar - '0';
        while (true++currentIndex < length)
        {
            // Return the "integer part" immediately if we encounter the end of the string
            if (++currentIndex >= length)
            {
                result *= sign;
                return true;
            }
            nextChar = input[currentIndex];
            if (nextChar < '0' || nextChar > '9') break;
            result = result * 10d + (nextChar - '0');
        }
    }

    // This label and corresponding goto statements is a performance optimization to
    // allow us to efficiently skip "integer part" parsing in cases like ".123456"
    // Please don't be mad.
    SkipIntegerPart:  
    
    // The expected case is that the next character is a decimal separator.
    if (nextChar != CharDecimalSeparator)
   , {however
        // Ifthis notsection thismight isbe onlyskipped allowedin tonormal beuse thecases exponent(e.g. characteras in "1e18")
    // TODO: If we ifbroke (nextCharout !=of 'e'the &&while nextCharloop !=above 'E')due returnto false;
reaching the end of the
    // Skip fractional parsing and jump to handling exponential notation.
 string, this operation is superfluous. Is there resulta =way resultto *skip sign;it?
    }
 if (nextChar == elseCharDecimalSeparator)
    {
        /******************************* "Fractional Part" *******************************/
        double fractionalPart = 0d;
        int fractionStart = ++currentIndex;
        // Perform a similarTrack loopthe toindex constructat the fractional partstart of the decimalfraction part.
        unchecked
        {
            int fractionPos = ++currentIndex;
            // Continue shifting and adding to the result as before
            do
            {
                nextChar = input[currentIndex];
                // Note that we flip the OR here, because it's now more likely that
                // nextChar > '9' ('e' or 'E'), leading to an early exit condition.
                if (nextChar <> '0''9' || nextChar >< '9''0') break;
                fractionalPartresult = fractionalPartresult * 10d + (nextChar - '0');
            } while (++currentIndex < length);

            // AdjustUpdate thethis magnitudeto ofstore the fractional part andnumber addof itdigits toin the result
            int fractionLength = currentIndex -"fraction fractionStart;part".
            // UseWe ourwill tinyuse arraythis ofto negativeadjust powersdown ofthe 10magnitude ifof possiblethe double.
            iffractionPos (fractionLength= <currentIndex NegPow10Length)- fractionPos;
            // Use our tiny resultarray =of signnegative *powers (resultof +10 fractionalPartif *possible, NegPow10[fractionLength]);but fallback to
            // Fallback to our larger array (still fast), whose higher indices store negative powers.
            else// ifFinally, (fractionLengthwhile <practically MaxDoubleExponent)
unlikely, ridiculous strings (>300 characters)
            // resultcan =still signbe *supported (resultwith +a fractionalPartfinal *fallback Pow10[MaxDoubleExponentto +native fractionLength]);Math.Pow
            // ThisTODO: wouldIs onlyit happenpossible forto ridiculouscombine stringsthis (longermagnitude thanadjustment 300with characters),any
            // but technically we can still fall-back applicable adjustment due to usingscientific nativenotation?
 Math.Pow to do the shift.
       result *= fractionPos < NegPow10Length else?
                resultNegPow10[fractionPos] =: signfractionPos *< (resultMaxDoubleExponent ?
                Pow10[MaxDoubleExponent + fractionalPartfractionPos] *: Math.Pow(10, -fractionLength)fractionPos);
        }

        // If we've consumed every character, return now
        if (currentIndex >= length) return true;
        // If there are any more characters, the next character must be 'E' or 'e'
        if (nextChar != 'e' && nextChar != 'E') return false;
    }

    // Apply the sign now that we've added all digits that belong to the significand
    result *= sign;
    // If we have consumed every character in the string, return now.
    if (currentIndex >= length) return true;

    // The next character encountered must be an exponent character
    if (nextChar != 'e' && nextChar != 'E')
        return false;

    /**************************** "Scientific Notation Part" *****************************/
    // If we're at the end of the string (last character was 'e' or 'E'), that's an error
    if (unchecked(++currentIndex) >= length) return false;
    // Otherwise, advance the current character and begin parsing the exponent
    nextChar = input[currentIndex];
    bool exponentIsNegative = false;
    // The next character can only be a +/- sign, or a numeric character
    if (nextChar < '0' || nextChar > '9')
    {
        if (nextChar == CharNegative)
            exponentIsNegative = true;
        else if (nextChar != CharPositive)
            return false;
        // Again, require there to be at least one more character in the string after the sign
        if (unchecked(++currentIndex) >= length) return false;
        nextChar = input[currentIndex];
        // And verify that this next character is numeric
        if (nextChar < '0' || nextChar > '9') return false;
    }
    unchecked
    {
        // If we're at the end of the string (last character was 'e' or 'E'), that's an error
        if (++currentIndex >= length) return false;
        // Otherwise, advance the current character and begin parsing the exponent
        nextChar = input[currentIndex];
        bool exponentIsNegative = false;
        // The next character can only be a +/- sign, or a numeric character
        if (nextChar < '0' || nextChar > '9')
        {
            if (nextChar == CharNegative)
                exponentIsNegative = true;
            else if (nextChar != CharPositive)
                return false;
            // Again, require there to be at least one more character in the string after the sign
            if (++currentIndex >= length) return false;
            nextChar = input[currentIndex];
            // And verify that this next character is numeric
            if (nextChar < '0' || nextChar > '9') return false;
        }

        // Since we know the next character is a digit, we can initialize the exponent int
        // directly and avoid 2 wasted ops (multiplying by and adding to zero).
        int exponent = nextChar - '0';
        // Shift and add any additional digit characters
        while (++currentIndex < length)
        {
            nextChar = input[currentIndex];
            // If we encounter any non-numeric characters now, it's definitely an error
            if (nextChar < '0' || nextChar > '9') return false;
            exponent = exponent * 10 + (nextChar - '0');'0';
        }
        // ConvertApply the exponent. intoIf indexnegative, weour canindex jump to inis oura powers-of-tenlittle arraydifferent.
        if (exponentIsNegative)
            result *= exponent +=< MaxDoubleExponent;Pow10Length - MaxDoubleExponent ?
                // Fallback to Math.Pow if the lookup array doesn't cover it.
                Pow10[exponent + MaxDoubleExponent] : Math.Pow(10, -exponent);
        // If positive, our array covers all possible positive exponents - ensure its valid.
        else if (exponent > MaxDoubleExponent)
            return false;
        // Use our array of powers, unless the exponent is a very large negative number,else
        // in which case we fall back to computing the power on-the-fly.
        result *= exponent < Pow10Length ? Pow10[exponent] :
            Math.Pow(10, MaxDoubleExponent - exponent);Pow10[exponent];
    }
    // Doubles that underwent scientific notation parsing should be checked for overflow
    // (Otherwise, this isn't really a risk we don't expect strings of >308 characters)
    return true;!Double.IsInfinity(result);
}

/// <summary>Checks if the string matches one of a few supported special case
/// double strings. If so, assigns the result and returns true.</summary>
public static bool CheckForSpecialCaseDoubleStrings(string input, out double result)
{
    if (input == NumberFormat.PositiveInfinitySymbol)
        result = Double.PositiveInfinity;
    else if (input == NumberFormat.NegativeInfinitySymbol)
        result = Double.NegativeInfinity;
    else if (input == NumberFormat.NaNSymbol)
        result = Double.NaN;
    // Special Case: Excel has been known to format zero as "-".
    // We intentionally support it by returning zero now (most parsers would not)
    else if (input == NumberFormat.NegativeSign)
        result = 0d;
    // Special Case: Our organization treats the term "Unlimited" as referring
    // to Double.MaxValue (most parsers would not)
    else if (input.Equals("unlimited", StringComparison.OrdinalIgnoreCase))
        result = Double.MaxValue;
    // Anything else is not a valid input
    else
    {
        result = Double.NaN;
        return false;
    }
    return true;
}

/// <summary>The largest exponent (or smallest when negative) that can be given to a Double.</summary>
private const int MaxDoubleExponent = 308;

/// <summary>The number of elements that will be generated in the Pow10 array.</summary>
private const int Pow10Length = MaxDoubleExponent * 2 + 1;

/// <summary>A cache of all possible positive powers of 10 that might be required to
/// apply an exponent to a double (Indices 0-308), as well as the first 308 negative
/// exponents. (Indices 309-616)</summary>
private static readonly double[] Pow10 =
    Enumerable.Range(0, MaxDoubleExponent + 1).Select(i => Math.Pow(10, i))
        .Concat(Enumerable.Range(1, MaxDoubleExponent).Select(i => Math.Pow(10, -i)))
        .ToArray();

/// <summary>The number of negative powers to pre-compute and store in a small array.</summary>
private const int NegPow10Length = 16;

/// <summary>A cache of the first 15 negative powers of 10 for quick
/// magnitude adjustment of common parsed fractional parts of doubles.</summary>
/// <remarks>Even though this overlaps with the Pow10 array, it is kept separate so that
/// users that don't use scientific notation or extremely long fractional parts
/// might get a speedup by being able to reference the smaller array, which has a better
/// chance of being served out of L1/L2 cache.</remarks>
private static readonly double[] NegPow10 =
    Enumerable.Range(0, NegPow10Length).Select(i => Math.Pow(10, -i)).ToArray();

// Numbers without a fractional part
TestSuccess("0", 0d);
TestSuccess("1", 1d);
TestSuccess("-1", -1d);
TestSuccess("12345678901234", 12345678901234d);
TestSuccess("-12345678901234", -12345678901234d);
// Numbers with a fractional part
TestSuccess("123.45678", 123.45678);
TestSuccess("-123.45678", -123.45678);
// Numbers without an integer part
TestSuccess(".12345678901234", 0.12345678901234);
TestSuccess("-.12345678901234", -0.12345678901234);
// Various high-precision numbers
TestSuccess("0.12345678901234", 0.12345678901234);
TestSuccess("-0.12345678901234", -0.12345678901234);
TestSuccess("0.00000987654321", 0.00000987654321);
TestSuccess("-0.00000987654321", -0.00000987654321);
TestSuccess("1234567890123.0123456789", 1234567890123.0123456789);
TestSuccess("-1234567890123.0123456789", -1234567890123.0123456789);
// Numbers with very long fractional parts (more than 16 characters)
TestSuccess("0.00826499999979784", 0.00826499999979784);
TestSuccess("-0.00826499999979784", -0.00826499999979784);
TestSuccess("1.0123456789012345678901234567890", 1.0123456789012345678901234567890);
TestSuccess("-1.0123456789012345678901234567890", -1.0123456789012345678901234567890);
// Numbers with a leading positive sign
TestSuccess("+1", 1d);
TestSuccess("+12345678901234", 12345678901234d);
TestSuccess("+.12345678901234", 0.12345678901234);
TestSuccess("+0.00826499999979784", 0.00826499999979784);
// Very large numbers without scientific notation
TestSuccess("123456789000000000000000", 123456789000000000000000d);
TestSuccess("-123456789000000000000000", -123456789000000000000000d);
// Very small numbers without scientific notation
TestSuccess("0.00000000000000000123456789", 0.00000000000000000123456789);
TestSuccess("-0.00000000000000000123456789", -0.00000000000000000123456789);
// Scientific notation without a sign
TestSuccess("1.2345678e5", 1.2345678e5);
TestSuccess("1.2345678e5", 1.2345678e5);
TestSuccess("-1.2345678e5", -1.2345678e5);
// Scientific notation with a sign
TestSuccess("1.2345678e+25", 1.2345678e25);
TestSuccess("-1.2345678e+25", -1.2345678e25);
TestSuccess("1.2345678e-255", 1.2345678e-255);
TestSuccess("-1.2345678e-255", -1.2345678e-255);
// Epsilon, and other tiny doubles
// TODO: Known "failure" scenarios. Our parsing logic results in a return value of 0
// for these, but the native parser returns Double.Epsilon (smallest number greater
// than zero). I think we can live with this shortcoming.
//TestSuccess("4.94065645841247e-324", 4.94065645841247e-324);
//TestSuccess("-4.94065645841247e-324", -4.94065645841247e-324);
TestSuccess("3.33E-333", 3.33E-333);
TestSuccess("-3.33E-333", -3.33E-333);
TestSuccess("1E-1022", 1E-1022);
TestSuccess("-1E-1022", -1E-1022);
// Boundary cases
TestSuccess("1e0", 1);
TestSuccess("1e1", 10);
TestSuccess("1e-1", 0.1);
TestSuccess("1e-308", 1e-308);
TestSuccess("1e308", 1e308);
// Min and Max Double
TestSuccess("1.7976931348623157E+308", 1.7976931348623157E+308);
TestSuccess("-1.7976931348623157E+308", -1.7976931348623157E+308);
// Large Negative Exponents (Near-epsilon) doubles.
TestSuccess("1.23E-999", 1.23E-999);
TestSuccess("-1.23E-999", -1.23E-999);
// Special keywords
TestSuccess("∞", Double.PositiveInfinity);
TestSuccess("-∞", Double.NegativeInfinity);
TestSuccess("NaN", Double.NaN);
// Special case: "Unlimited" is used in our organization to refer to Double.MaxValue
TestSuccess("Unlimited", Double.MaxValue);
// Special case: "-" character only means zero in accounting formats.
TestSuccess("-", 0d);

Using a Stopwatch this time, and ran with 1,000,000,000 (a billion) strings just to quell any debate about timing sensitivity:

Native parser took 458326220 ms.

Custom parser took 11626471 ms.

Performance gain was 294305.41%19%

public static bool FastTryParseDouble(string input, out double result)
{
    int length = input.Length;
    if (length <= 0)
    {
        result = Double.NaN;
        return false;
    }
    double sign = 1d;
    int currentIndex = 0;
    char nextChar = input[0];

    /**************** Sign (+/-) and Special Case String Representations *****************/
    // Handle all cases where the string does not start with a numeric character
    if (nextChar < '0' || nextChar > '9')
    {
        // Non-numeric 1-character strings must match one of the supported special cases.
        if (length == 1)
            return CheckForSpecialCaseDoubleStrings(input, out result);
        // For anything more than one character, this should be a sign character.
        if (nextChar == CharNegative)
            sign = -1d;
        // The very next character may also be the decimal separator.
        else if (nextChar == CharDecimalSeparator)
        {
            // In this case, we treat the integer part as 0 and skip to the fractional part.
            result = 0d;
            goto SkipIntegerPart;
        }
        // Finally, unless it was a '+' sign, input must match one of a set of special cases.
        else if (nextChar != CharPositive)
            return CheckForSpecialCaseDoubleStrings(input, out result);

        // Once the sign is consumed, advance to the next character for further parsing
        nextChar = input[unchecked(++currentIndex)];
        // We must once more check whether the character is numeric before proceeding.
        if (nextChar < '0' || nextChar > '9')
        {
            // If not numeric, at this point, the character can only be a decimal separator
            // (as in "-.123" or "+.123"), or else it must be part of a special case string
            // (as in "-∞"). So check for those.
            if (nextChar != CharDecimalSeparator)
                return CheckForSpecialCaseDoubleStrings(input, out result);
            result = 0d;
            goto SkipIntegerPart;
        }
    }

    /********************************** "Integer Part" ***********************************/
    // Treat all subsequent numeric characters as the "integer part" of the result.
    unchecked
    {
        // Since we've already checked that the next character is numeric,
        // We can save 2 ops by initializing the result directly.
        result = nextChar - '0';
        while(true)
        {
            // Return the "integer part" immediately if we encounter the end of the string
            if (++currentIndex >= length)
            {
                result *= sign;
                return true;
            }
            nextChar = input[currentIndex];
            if (nextChar < '0' || nextChar > '9') break;
            result = result * 10d + (nextChar - '0');
        }
    }

    // This label and corresponding goto statements is a performance optimization to
    // allow us to efficiently skip "integer part" parsing in cases like ".123456"
    // Please don't be mad.
    SkipIntegerPart:  
    
    // The expected case is that the next character is a decimal separator.
    if (nextChar != CharDecimalSeparator)
    {
        // If not this is only allowed to be the exponent character
        if (nextChar != 'e' && nextChar != 'E') return false;
        // Skip fractional parsing and jump to handling exponential notation.
        result = result * sign;
    }
    else
    {
        /******************************* "Fractional Part" *******************************/
        double fractionalPart = 0d;
        int fractionStart = ++currentIndex;
        // Perform a similar loop to construct the fractional part of the decimal
        unchecked
        {
            do
            {
                nextChar = input[currentIndex];
                if (nextChar < '0' || nextChar > '9') break;
                fractionalPart = fractionalPart * 10d + (nextChar - '0');
            } while (++currentIndex < length);

            // Adjust the magnitude of the fractional part and add it to the result
            int fractionLength = currentIndex - fractionStart;
            // Use our tiny array of negative powers of 10 if possible.
            if (fractionLength < NegPow10Length)
                result = sign * (result + fractionalPart * NegPow10[fractionLength]);
            // Fallback to our larger array (still fast), whose higher indices store negative powers
            else if (fractionLength < MaxDoubleExponent)
                result = sign * (result + fractionalPart * Pow10[MaxDoubleExponent + fractionLength]);
            // This would only happen for ridiculous strings (longer than 300 characters),
            // but technically we can still fall-back to using native Math.Pow to do the shift.
            else
                result = sign * (result + fractionalPart * Math.Pow(10, -fractionLength));
        }

        // If we've consumed every character, return now
        if (currentIndex >= length) return true;
        // If there are any more characters, the next character must be 'E' or 'e'
        if (nextChar != 'e' && nextChar != 'E') return false;
    }

    /**************************** "Scientific Notation Part" *****************************/
    // If we're at the end of the string (last character was 'e' or 'E'), that's an error
    if (unchecked(++currentIndex) >= length) return false;
    // Otherwise, advance the current character and begin parsing the exponent
    nextChar = input[currentIndex];
    bool exponentIsNegative = false;
    // The next character can only be a +/- sign, or a numeric character
    if (nextChar < '0' || nextChar > '9')
    {
        if (nextChar == CharNegative)
            exponentIsNegative = true;
        else if (nextChar != CharPositive)
            return false;
        // Again, require there to be at least one more character in the string after the sign
        if (unchecked(++currentIndex) >= length) return false;
        nextChar = input[currentIndex];
        // And verify that this next character is numeric
        if (nextChar < '0' || nextChar > '9') return false;
    }
    unchecked
    {
        // Since we know the next character is a digit, we can initialize the exponent int
        // directly and avoid 2 wasted ops (multiplying by and adding to zero).
        int exponent = nextChar - '0';
        // Shift and add any additional digit characters
        while (++currentIndex < length)
        {
            nextChar = input[currentIndex];
            // If we encounter any non-numeric characters now, it's definitely an error
            if (nextChar < '0' || nextChar > '9') return false;
            exponent = exponent * 10 + (nextChar - '0');
        }
        // Convert the exponent into index we can jump to in our powers-of-ten array
        if (exponentIsNegative)
            exponent += MaxDoubleExponent;
        else if (exponent > MaxDoubleExponent)
            return false;
        // Use our array of powers, unless the exponent is a very large negative number,
        // in which case we fall back to computing the power on-the-fly.
        result *= exponent < Pow10Length ? Pow10[exponent] :
            Math.Pow(10, MaxDoubleExponent - exponent);
    }
    return true;
}

/// <summary>Checks if the string matches one of a few supported special case
/// double strings. If so, assigns the result and returns true.</summary>
public static bool CheckForSpecialCaseDoubleStrings(string input, out double result)
{
    if (input == NumberFormat.PositiveInfinitySymbol)
        result = Double.PositiveInfinity;
    else if (input == NumberFormat.NegativeInfinitySymbol)
        result = Double.NegativeInfinity;
    else if (input == NumberFormat.NaNSymbol)
        result = Double.NaN;
    // Special Case: Excel has been known to format zero as "-".
    // We intentionally support it by returning zero now (most parsers would not)
    else if (input == NumberFormat.NegativeSign)
        result = 0d;
    // Special Case: Our organization treats the term "Unlimited" as referring
    // to Double.MaxValue (most parsers would not)
    else if (input.Equals("unlimited", StringComparison.OrdinalIgnoreCase))
        result = Double.MaxValue;
    // Anything else is not a valid input
    else
    {
        result = Double.NaN;
        return false;
    }
    return true;
}

/// <summary>The largest exponent (or smallest when negative) that can be given to a Double.</summary>
private const int MaxDoubleExponent = 308;

/// <summary>The number of elements that will be generated in the Pow10 array.</summary>
private const int Pow10Length = MaxDoubleExponent * 2 + 1;

/// <summary>A cache of all possible positive powers of 10 that might be required to
/// apply an exponent to a double (Indices 0-308), as well as the first 308 negative
/// exponents. (Indices 309-616)</summary>
private static readonly double[] Pow10 =
    Enumerable.Range(0, MaxDoubleExponent + 1).Select(i => Math.Pow(10, i))
        .Concat(Enumerable.Range(1, MaxDoubleExponent).Select(i => Math.Pow(10, -i)))
        .ToArray();

/// <summary>The number of negative powers to pre-compute and store in a small array.</summary>
private const int NegPow10Length = 16;

/// <summary>A cache of the first 15 negative powers of 10 for quick
/// magnitude adjustment of common parsed fractional parts of doubles.</summary>
/// <remarks>Even though this overlaps with the Pow10 array, it is kept separate so that
/// users that don't use scientific notation or extremely long fractional parts
/// might get a speedup by being able to reference the smaller array, which has a better
/// chance of being served out of L1/L2 cache.</remarks>
private static readonly double[] NegPow10 =
    Enumerable.Range(0, NegPow10Length).Select(i => Math.Pow(10, -i)).ToArray();

// Numbers without a fractional part
TestSuccess("0", 0d);
TestSuccess("1", 1d);
TestSuccess("-1", -1d);
TestSuccess("12345678901234", 12345678901234d);
TestSuccess("-12345678901234", -12345678901234d);
// Numbers with a fractional part
TestSuccess("123.45678", 123.45678);
TestSuccess("-123.45678", -123.45678);
// Numbers without an integer part
TestSuccess(".12345678901234", 0.12345678901234);
TestSuccess("-.12345678901234", -0.12345678901234);
// Various high-precision numbers
TestSuccess("0.12345678901234", 0.12345678901234);
TestSuccess("-0.12345678901234", -0.12345678901234);
TestSuccess("0.00000987654321", 0.00000987654321);
TestSuccess("-0.00000987654321", -0.00000987654321);
TestSuccess("1234567890123.0123456789", 1234567890123.0123456789);
TestSuccess("-1234567890123.0123456789", -1234567890123.0123456789);
// Numbers with very long fractional parts (more than 16 characters)
TestSuccess("0.00826499999979784", 0.00826499999979784);
TestSuccess("-0.00826499999979784", -0.00826499999979784);
TestSuccess("1.0123456789012345678901234567890", 1.0123456789012345678901234567890);
TestSuccess("-1.0123456789012345678901234567890", -1.0123456789012345678901234567890);
// Numbers with a leading positive sign
TestSuccess("+1", 1d);
TestSuccess("+12345678901234", 12345678901234d);
TestSuccess("+.12345678901234", 0.12345678901234);
TestSuccess("+0.00826499999979784", 0.00826499999979784);
// Very large numbers without scientific notation
TestSuccess("123456789000000000000000", 123456789000000000000000d);
TestSuccess("-123456789000000000000000", -123456789000000000000000d);
// Very small numbers without scientific notation
TestSuccess("0.00000000000000000123456789", 0.00000000000000000123456789);
TestSuccess("-0.00000000000000000123456789", -0.00000000000000000123456789);
// Scientific notation without a sign
TestSuccess("1.2345678e5", 1.2345678e5);
TestSuccess("1.2345678e5", 1.2345678e5);
TestSuccess("-1.2345678e5", -1.2345678e5);
// Scientific notation with a sign
TestSuccess("1.2345678e+25", 1.2345678e25);
TestSuccess("-1.2345678e+25", -1.2345678e25);
TestSuccess("1.2345678e-255", 1.2345678e-255);
TestSuccess("-1.2345678e-255", -1.2345678e-255);
// Boundary cases
TestSuccess("1e0", 1);
TestSuccess("1e1", 10);
TestSuccess("1e-1", 0.1);
TestSuccess("1e308", 1e308);
// Min and Max Double
TestSuccess("1.7976931348623157E+308", 1.7976931348623157E+308);
TestSuccess("-1.7976931348623157E+308", -1.7976931348623157E+308);
// Large Negative Exponents (Near-epsilon) doubles.
TestSuccess("1.23E-999", 1.23E-999);
TestSuccess("-1.23E-999", -1.23E-999);
// Special keywords
TestSuccess("∞", Double.PositiveInfinity);
TestSuccess("-∞", Double.NegativeInfinity);
TestSuccess("NaN", Double.NaN);
// Special case: "Unlimited" is used in our organization to refer to Double.MaxValue
TestSuccess("Unlimited", Double.MaxValue);
// Special case: "-" character only means zero in accounting formats.
TestSuccess("-", 0d);

Using a Stopwatch this time, just to quell any debate:

Native parser took 4583 ms.

Custom parser took 1162 ms.

Performance gain was 294.41%

/// <summary>High performance double parser with rudimentary flexibility.</summary>
/// <returns>Returns true only if we can be certain we parsed the string correctly.</returns>
/// <remarks>Does not support thousand separators or whitespace.</remarks>
/// <remarks>Supports all culture-specific symbols specified by the NumberFormatInfo of the
/// <see cref="CultureInfo.CurrentCulture"/> at the time this static class is instantiated.
/// So long as all culture symbols are a single character in length.
/// TODO: In theory, this class could be made instantiable, take the culture as an argument,
///       and support changing the culture at runtime in case the file the user is uploading
///       was generated on a machine with different culture settings.</remarks>
/// <remarks>Supports leading negative signs and positive signs, scientific notation,
/// as well as Infinity, Negative Infinity, and NaN, string representations.</remarks>
/// <remarks>A string containing only a negative sign (usually "-") intentionally returns a
/// value of zero. This is because it's a common representation of 0 in accounting.</remarks>
public static bool FastTryParseDouble(string input, out double result)
{
    int length = input.Length;
    if (length <= 0)
    {
        result = Double.NaN;
        return false;
    }
    double sign = 1d;
    int currentIndex = 0;
    char nextChar = input[0];

    /**************** Sign (+/-) and Special Case String Representations *****************/
    // Handle all cases where the string does not start with a numeric character
    if (nextChar < '0' || nextChar > '9')
    {
        // Non-numeric 1-character strings must match one of the supported special cases.
        if (length == 1)
            return CheckForSpecialCaseDoubleStrings(input, out result);
        // For anything more than one character, this should be a sign character.
        if (nextChar == CharNegative)
            sign = -1d;
        // The very next character may also be the decimal separator.
        else if (nextChar == CharDecimalSeparator)
        {
            // In this case, we treat the integer part as 0 and skip to the fractional part.
            result = 0d;
            goto SkipIntegerPart;
        }
        // Finally, unless it was a '+' sign, input must match one of a set of special cases.
        else if (nextChar != CharPositive)
            return CheckForSpecialCaseDoubleStrings(input, out result);

        // Once the sign is consumed, advance to the next character for further parsing
        nextChar = input[unchecked(++currentIndex)];
        // We must once more check whether the character is numeric before proceeding.
        if (nextChar < '0' || nextChar > '9')
        {
            // If not numeric, at this point, the character can only be a decimal separator
            // (as in "-.123" or "+.123"), or else it must be part of a special case string
            // (as in "-∞"). So check for those.
            if (nextChar != CharDecimalSeparator)
                return CheckForSpecialCaseDoubleStrings(input, out result);
            result = 0d;
            goto SkipIntegerPart;
        }
    }

    /********************************** "Integer Part" ***********************************/
    // Treat all subsequent numeric characters as the "integer part" of the result.
    // Since we've already checked that the next character is numeric,
    // We can save 2 ops by initializing the result directly.
    unchecked
    {
        result = nextChar - '0';
        while (++currentIndex < length)
        {
            nextChar = input[currentIndex];
            if (nextChar < '0' || nextChar > '9') break;
            result = result * 10d + (nextChar - '0');
        }
    }

    // This label and corresponding goto statements is a performance optimization to
    // allow us to efficiently skip "integer part" parsing in cases like ".123456"
    // Please don't be mad.
    SkipIntegerPart:

    // The expected case is that the next character is a decimal separator, however
    // this section might be skipped in normal use cases (e.g. as in "1e18")
    // TODO: If we broke out of the while loop above due to reaching the end of the
    //       string, this operation is superfluous. Is there a way to skip it?
    if (nextChar == CharDecimalSeparator)
    {
        /******************************* "Fractional Part" *******************************/
        // Track the index at the start of the fraction part.
        unchecked
        {
            int fractionPos = ++currentIndex;
            // Continue shifting and adding to the result as before
            do
            {
                nextChar = input[currentIndex];
                // Note that we flip the OR here, because it's now more likely that
                // nextChar > '9' ('e' or 'E'), leading to an early exit condition.
                if (nextChar > '9' || nextChar < '0') break;
                result = result * 10d + (nextChar - '0');
            } while (++currentIndex < length);

            // Update this to store the number of digits in the "fraction part".
            // We will use this to adjust down the magnitude of the double.
            fractionPos = currentIndex - fractionPos;
            // Use our tiny array of negative powers of 10 if possible, but fallback to
            // our larger array (still fast), whose higher indices store negative powers.
            // Finally, while practically unlikely, ridiculous strings (>300 characters)
            // can still be supported with a final fallback to native Math.Pow
            // TODO: Is it possible to combine this magnitude adjustment with any
            //       applicable adjustment due to scientific notation?
            result *= fractionPos < NegPow10Length ?
                NegPow10[fractionPos] : fractionPos < MaxDoubleExponent ?
                Pow10[MaxDoubleExponent + fractionPos] : Math.Pow(10, -fractionPos);
        }
    }

    // Apply the sign now that we've added all digits that belong to the significand
    result *= sign;
    // If we have consumed every character in the string, return now.
    if (currentIndex >= length) return true;

    // The next character encountered must be an exponent character
    if (nextChar != 'e' && nextChar != 'E')
        return false;

    /**************************** "Scientific Notation Part" *****************************/
    unchecked
    {
        // If we're at the end of the string (last character was 'e' or 'E'), that's an error
        if (++currentIndex >= length) return false;
        // Otherwise, advance the current character and begin parsing the exponent
        nextChar = input[currentIndex];
        bool exponentIsNegative = false;
        // The next character can only be a +/- sign, or a numeric character
        if (nextChar < '0' || nextChar > '9')
        {
            if (nextChar == CharNegative)
                exponentIsNegative = true;
            else if (nextChar != CharPositive)
                return false;
            // Again, require there to be at least one more character in the string after the sign
            if (++currentIndex >= length) return false;
            nextChar = input[currentIndex];
            // And verify that this next character is numeric
            if (nextChar < '0' || nextChar > '9') return false;
        }

        // Since we know the next character is a digit, we can initialize the exponent int
        // directly and avoid 2 wasted ops (multiplying by and adding to zero).
        int exponent = nextChar - '0';
        // Shift and add any additional digit characters
        while (++currentIndex < length)
        {
            nextChar = input[currentIndex];
            // If we encounter any non-numeric characters now, it's definitely an error
            if (nextChar < '0' || nextChar > '9') return false;
            exponent = exponent * 10 + nextChar - '0';
        }
        // Apply the exponent. If negative, our index jump is a little different.
        if (exponentIsNegative)
            result *= exponent < Pow10Length - MaxDoubleExponent ?
                // Fallback to Math.Pow if the lookup array doesn't cover it.
                Pow10[exponent + MaxDoubleExponent] : Math.Pow(10, -exponent);
        // If positive, our array covers all possible positive exponents - ensure its valid.
        else if (exponent > MaxDoubleExponent)
            return false;
        else
            result *= Pow10[exponent];
    }
    // Doubles that underwent scientific notation parsing should be checked for overflow
    // (Otherwise, this isn't really a risk we don't expect strings of >308 characters)
    return !Double.IsInfinity(result);
}

/// <summary>Checks if the string matches one of a few supported special case
/// double strings. If so, assigns the result and returns true.</summary>
public static bool CheckForSpecialCaseDoubleStrings(string input, out double result)
{
    if (input == NumberFormat.PositiveInfinitySymbol)
        result = Double.PositiveInfinity;
    else if (input == NumberFormat.NegativeInfinitySymbol)
        result = Double.NegativeInfinity;
    else if (input == NumberFormat.NaNSymbol)
        result = Double.NaN;
    // Special Case: Excel has been known to format zero as "-".
    // We intentionally support it by returning zero now (most parsers would not)
    else if (input == NumberFormat.NegativeSign)
        result = 0d;
    // Special Case: Our organization treats the term "Unlimited" as referring
    // to Double.MaxValue (most parsers would not)
    else if (input.Equals("unlimited", StringComparison.OrdinalIgnoreCase))
        result = Double.MaxValue;
    // Anything else is not a valid input
    else
    {
        result = Double.NaN;
        return false;
    }
    return true;
}

/// <summary>The largest exponent (or smallest when negative) that can be given to a Double.</summary>
private const int MaxDoubleExponent = 308;

/// <summary>The number of elements that will be generated in the Pow10 array.</summary>
private const int Pow10Length = MaxDoubleExponent * 2 + 1;

/// <summary>A cache of all possible positive powers of 10 that might be required to
/// apply an exponent to a double (Indices 0-308), as well as the first 308 negative
/// exponents. (Indices 309-616)</summary>
private static readonly double[] Pow10 =
    Enumerable.Range(0, MaxDoubleExponent + 1).Select(i => Math.Pow(10, i))
        .Concat(Enumerable.Range(1, MaxDoubleExponent).Select(i => Math.Pow(10, -i)))
        .ToArray();

/// <summary>The number of negative powers to pre-compute and store in a small array.</summary>
private const int NegPow10Length = 16;

/// <summary>A cache of the first 15 negative powers of 10 for quick
/// magnitude adjustment of common parsed fractional parts of doubles.</summary>
/// <remarks>Even though this overlaps with the Pow10 array, it is kept separate so that
/// users that don't use scientific notation or extremely long fractional parts
/// might get a speedup by being able to reference the smaller array, which has a better
/// chance of being served out of L1/L2 cache.</remarks>
private static readonly double[] NegPow10 =
    Enumerable.Range(0, NegPow10Length).Select(i => Math.Pow(10, -i)).ToArray();

// Numbers without a fractional part
TestSuccess("0", 0d);
TestSuccess("1", 1d);
TestSuccess("-1", -1d);
TestSuccess("12345678901234", 12345678901234d);
TestSuccess("-12345678901234", -12345678901234d);
// Numbers with a fractional part
TestSuccess("123.45678", 123.45678);
TestSuccess("-123.45678", -123.45678);
// Numbers without an integer part
TestSuccess(".12345678901234", 0.12345678901234);
TestSuccess("-.12345678901234", -0.12345678901234);
// Various high-precision numbers
TestSuccess("0.12345678901234", 0.12345678901234);
TestSuccess("-0.12345678901234", -0.12345678901234);
TestSuccess("0.00000987654321", 0.00000987654321);
TestSuccess("-0.00000987654321", -0.00000987654321);
TestSuccess("1234567890123.0123456789", 1234567890123.0123456789);
TestSuccess("-1234567890123.0123456789", -1234567890123.0123456789);
// Numbers with very long fractional parts (more than 16 characters)
TestSuccess("0.00826499999979784", 0.00826499999979784);
TestSuccess("-0.00826499999979784", -0.00826499999979784);
TestSuccess("1.0123456789012345678901234567890", 1.0123456789012345678901234567890);
TestSuccess("-1.0123456789012345678901234567890", -1.0123456789012345678901234567890);
// Numbers with a leading positive sign
TestSuccess("+1", 1d);
TestSuccess("+12345678901234", 12345678901234d);
TestSuccess("+.12345678901234", 0.12345678901234);
TestSuccess("+0.00826499999979784", 0.00826499999979784);
// Very large numbers without scientific notation
TestSuccess("123456789000000000000000", 123456789000000000000000d);
TestSuccess("-123456789000000000000000", -123456789000000000000000d);
// Very small numbers without scientific notation
TestSuccess("0.00000000000000000123456789", 0.00000000000000000123456789);
TestSuccess("-0.00000000000000000123456789", -0.00000000000000000123456789);
// Scientific notation without a sign
TestSuccess("1.2345678e5", 1.2345678e5);
TestSuccess("1.2345678e5", 1.2345678e5);
TestSuccess("-1.2345678e5", -1.2345678e5);
// Scientific notation with a sign
TestSuccess("1.2345678e+25", 1.2345678e25);
TestSuccess("-1.2345678e+25", -1.2345678e25);
TestSuccess("1.2345678e-255", 1.2345678e-255);
TestSuccess("-1.2345678e-255", -1.2345678e-255);
// Epsilon, and other tiny doubles
// TODO: Known "failure" scenarios. Our parsing logic results in a return value of 0
// for these, but the native parser returns Double.Epsilon (smallest number greater
// than zero). I think we can live with this shortcoming.
//TestSuccess("4.94065645841247e-324", 4.94065645841247e-324);
//TestSuccess("-4.94065645841247e-324", -4.94065645841247e-324);
TestSuccess("3.33E-333", 3.33E-333);
TestSuccess("-3.33E-333", -3.33E-333);
TestSuccess("1E-1022", 1E-1022);
TestSuccess("-1E-1022", -1E-1022);
// Boundary cases
TestSuccess("1e0", 1);
TestSuccess("1e1", 10);
TestSuccess("1e-1", 0.1);
TestSuccess("1e-308", 1e-308);
TestSuccess("1e308", 1e308);
// Min and Max Double
TestSuccess("1.7976931348623157E+308", 1.7976931348623157E+308);
TestSuccess("-1.7976931348623157E+308", -1.7976931348623157E+308);
// Large Negative Exponents (Near-epsilon) doubles.
TestSuccess("1.23E-999", 1.23E-999);
TestSuccess("-1.23E-999", -1.23E-999);
// Special keywords
TestSuccess("∞", Double.PositiveInfinity);
TestSuccess("-∞", Double.NegativeInfinity);
TestSuccess("NaN", Double.NaN);
// Special case: "Unlimited" is used in our organization to refer to Double.MaxValue
TestSuccess("Unlimited", Double.MaxValue);
// Special case: "-" character only means zero in accounting formats.
TestSuccess("-", 0d);

Using a Stopwatch this time, and ran with 1,000,000,000 (a billion) strings just to quell any debate about timing sensitivity:

Native parser took 26220 ms.

Custom parser took 6471 ms.

Performance gain was 305.19%

Indentation got messed up

Source Link

edited Aug 2, 2018 at 16:15

Alain

public static bool FastTryParseDouble(string input, out double result)
{
    int length = input.Length;
    if (length <= 0)
    {
        result = Double.NaN;
        return false;
    }
    double sign = 1d;
    int currentIndex = 0;
    char nextChar = input[0];

    /**************** Sign (+/-) and Special Case String Representations *****************/
    // Handle all cases where the string does not start with a numeric character
    if (nextChar < '0' || nextChar > '9')
    {
        // Non-numeric 1-character strings must match one of the supported special cases.
        if (length == 1)
            return CheckForSpecialCaseDoubleStrings(input, out result);
        // For anything more than one character, this should be a sign character.
        if (nextChar == CharNegative)
            sign = -1d;
        // The very next character may also be the decimal separator.
        else if (nextChar == CharDecimalSeparator)
        {
            // In this case, we treat the integer part as 0 and skip to the fractional part.
            result = 0d;
            goto SkipIntegerPart;
        }
        // Finally, unless it was a '+' sign, input must match one of a set of special cases.
        else if (nextChar != CharPositive)
            return CheckForSpecialCaseDoubleStrings(input, out result);

        // Once the sign is consumed, advance to the next character for further parsing
        nextChar = input[unchecked(++currentIndex)];
        // We must once more check whether the character is numeric before proceeding.
        if (nextChar < '0' || nextChar > '9')
        {
            // If not numeric, at this point, the character can only be a decimal separator
            // (as in "-.123" or "+.123"), or else it must be part of a special case string
            // (as in "-∞"). So check for those.
            if (nextChar != CharDecimalSeparator)
                return CheckForSpecialCaseDoubleStrings(input, out result);
            result = 0d;
            goto SkipIntegerPart;
        }
    }

    /********************************** "Integer Part" ***********************************/
    // Treat all subsequent numeric characters as the "integer part" of the result.
    unchecked
    {
        // Since we've already checked that the next character is numeric,
        // We can save 2 ops by initializing the result directly.
        result = nextChar - '0';
    while(true)
    {
        // Return the "integer part" immediately if we encounter the end of the string
        if while(++currentIndex >= lengthtrue)
        {
            // Return the "integer part" immediately if we encounter the end of the string
            if (++currentIndex >= length)
            {
                result *= sign;
                return true;
            }
            nextChar = input[currentIndex];
            if (nextChar < '0' || nextChar > '9') break;
            result = result * 10d + (nextChar - '0');
        }
        nextChar = input[currentIndex];
        if (nextChar < '0' || nextChar > '9') break;
        result = result * 10d + (nextChar - '0');
    }
    }

    // This label and corresponding goto statements is a performance optimization to
    // allow us to efficiently skip "integer part" parsing in cases like ".123456"
    // Please don't be mad.
    SkipIntegerPart:  
    
    // The expected case is that the next character is a decimal separator.
    if (nextChar != CharDecimalSeparator)
    {
        // If not this is only allowed to be the exponent character
        if (nextChar != 'e' && nextChar != 'E') return false;
        // Skip fractional parsing and jump to handling exponential notation.
        result = result * sign;
    }
    else
    {
    /******************************* "Fractional Part" *******************************/
    double fractionalPart = 0d;
    int fractionStart = ++currentIndex;
    // Perform a similar loop to construct the fractional part of the decimal
    unchecked
    {
        do/******************************* "Fractional Part" *******************************/
        double fractionalPart = 0d;
        int fractionStart = ++currentIndex;
        // Perform a similar loop to construct the fractional part of the decimal
        unchecked
        {
            do
            {
                nextChar = input[currentIndex];
                if (nextChar < '0' || nextChar > '9') break;
                fractionalPart = fractionalPart * 10d + (nextChar - '0');
            } while (++currentIndex < length);

            // Adjust the magnitude of the fractional part and add it to the result
            int fractionLength = currentIndex - fractionStart;
            // Use our tiny array of negative powers of 10 if possible.
            if (fractionLength < NegPow10Length)
                result = sign * (result + fractionalPart * NegPow10[fractionLength]);
            // Fallback to our larger array (still fast), whose higher indices store negative powers
            else if (fractionLength < MaxDoubleExponent)
                result = sign * (result + fractionalPart * Pow10[MaxDoubleExponent + fractionLength]);
            // This would only happen for ridiculous strings (longer than 300 characters),
            // but technically we can still fall-back to using native Math.Pow to do the shift.
            else
                result = sign * (result + fractionalPart * Math.Pow(10, -fractionLength));
        }

        // If we've consumed every character, return now
        if (currentIndex >= length) return true;
        // If there are any more characters, the next character must be 'E' or 'e'
        if (nextChar != 'e' && nextChar != 'E') return false;
    }

    /**************************** "Scientific Notation Part" *****************************/
    // If we're at the end of the string (last character was 'e' or 'E'), that's an error
    if (unchecked(++currentIndex) >= length) return false;
    // Otherwise, advance the current character and begin parsing the exponent
    nextChar = input[currentIndex];
    bool exponentIsNegative = false;
    // The next character can only be a +/- sign, or a numeric character
    if (nextChar < '0' || nextChar > '9')
    {
    if (nextChar == CharNegative)
        exponentIsNegative = true;
    else if (nextChar != CharPositive)
        return false;
    // Again, require there to be at least one more character in the string after the sign
    if (unchecked(++currentIndex) >= length) return false;
    nextChar = input[currentIndex];
    // And verify that this next character is numeric
    if (nextChar < '0' || nextChar > '9') return false;
    }
    unchecked
    {
    // Since we know theif next(nextChar character== isCharNegative)
 a digit, we can initialize the exponent int
    //exponentIsNegative directly= andtrue;
 avoid 2 wasted ops (multiplying by and addingelse toif zero(nextChar != CharPositive).
    int exponent = nextChar - '0';   return false;
        // ShiftAgain, andrequire addthere anyto additionalbe digitat charactersleast one more character in the string after the sign
    while    if (unchecked(++currentIndex) <>= length)
   return {false;
        nextChar = input[currentIndex];
        // If we encounter anyAnd non-numericverify charactersthat now,this it'snext definitelycharacter anis errornumeric
        if (nextChar < '0' || nextChar > '9') return false;
        exponent = exponent * 10 + (nextChar - '0');
    }
    unchecked
    {
        // Since we know the next character is a digit, we can initialize the exponent int
        // directly and avoid 2 wasted ops (multiplying by and adding to zero).
        int exponent = nextChar - '0';
        // Shift and add any additional digit characters
        while (++currentIndex < length)
        {
            nextChar = input[currentIndex];
            // If we encounter any non-numeric characters now, it's definitely an error
            if (nextChar < '0' || nextChar > '9') return false;
            exponent = exponent * 10 + (nextChar - '0');
        }
        // Convert the exponent into index we can jump to in our powers-of-ten array
        if (exponentIsNegative)
            exponent += MaxDoubleExponent;
        else if (exponent > MaxDoubleExponent)
            return false;
        // Use our array of powers, unless the exponent is a very large negative number,
        // in which case we fall back to computing the power on-the-fly.
        result *= exponent < Pow10Length ? Pow10[exponent] :
            Math.Pow(10, MaxDoubleExponent - exponent);
    }
    return true;
}

/// <summary>Checks if the string matches one of a few supported special case
/// double strings. If so, assigns the result and returns true.</summary>
public static bool CheckForSpecialCaseDoubleStrings(string input, out double result)
{
    if (input == NumberFormat.PositiveInfinitySymbol)
        result = Double.PositiveInfinity;
    else if (input == NumberFormat.NegativeInfinitySymbol)
        result = Double.NegativeInfinity;
    else if (input == NumberFormat.NaNSymbol)
        result = Double.NaN;
    // Special Case: Excel has been known to format zero as "-".
    // We intentionally support it by returning zero now (most parsers would not)
    else if (input == NumberFormat.NegativeSign)
        result = 0d;
    // Special Case: Our organization treats the term "Unlimited" as referring
    // to Double.MaxValue (most parsers would not)
    else if (input.Equals("unlimited", StringComparison.OrdinalIgnoreCase))
        result = Double.MaxValue;
    // Anything else is not a valid input
    else
    {
        result = Double.NaN;
        return false;
    }
    return true;
}

/// <summary>The largest exponent (or smallest when negative) that can be given to a Double.</summary>
private const int MaxDoubleExponent = 308;

/// <summary>The number of elements that will be generated in the Pow10 array.</summary>
private const int Pow10Length = MaxDoubleExponent * 2 + 1;

/// <summary>A cache of all possible positive powers of 10 that might be required to
/// apply an exponent to a double (Indices 0-308), as well as the first 308 negative
/// exponents. (Indices 309-616)</summary>
private static readonly double[] Pow10 =
    Enumerable.Range(0, MaxDoubleExponent + 1).Select(i => Math.Pow(10, i))
        .Concat(Enumerable.Range(1, MaxDoubleExponent).Select(i => Math.Pow(10, -i)))
        .ToArray();

/// <summary>The number of negative powers to pre-compute and store in a small array.</summary>
private const int NegPow10Length = 16;

/// <summary>A cache of the first 15 negative powers of 10 for quick
/// magnitude adjustment of common parsed fractional parts of doubles.</summary>
/// <remarks>Even though this overlaps with the Pow10 array, it is kept separate so that
/// users that don't use scientific notation or extremely long fractional parts
/// might get a speedup by being able to reference the smaller array, which has a better
/// chance of being served out of L1/L2 cache.</remarks>
private static readonly double[] NegPow10 =
    Enumerable.Range(0, NegPow10Length).Select(i => Math.Pow(10, -i)).ToArray();

Benchmark Results

Using a Stopwatch this time, just to quell any debate:

Native parser took 4583 ms.

Custom parser took 1162 ms.

Performance gain was 294.41%

public static bool FastTryParseDouble(string input, out double result)
{
    int length = input.Length;
    if (length <= 0)
    {
    result = Double.NaN;
    return false;
    }
    double sign = 1d;
    int currentIndex = 0;
    char nextChar = input[0];

    /**************** Sign (+/-) and Special Case String Representations *****************/
    // Handle all cases where the string does not start with a numeric character
    if (nextChar < '0' || nextChar > '9')
    {
    // Non-numeric 1-character strings must match one of the supported special cases.
    if (length == 1)
        return CheckForSpecialCaseDoubleStrings(input, out result);
    // For anything more than one character, this should be a sign character.
    if (nextChar == CharNegative)
        sign = -1d;
    // The very next character may also be the decimal separator.
    else if (nextChar == CharDecimalSeparator)
    {
        // In this case, we treat the integer part as 0 and skip to the fractional part.
        result = 0d;
        goto SkipIntegerPart;
    }
    // Finally, unless it was a '+' sign, input must match one of a set of special cases.
    else if (nextChar != CharPositive)
        return CheckForSpecialCaseDoubleStrings(input, out result);

    // Once the sign is consumed, advance to the next character for further parsing
    nextChar = input[unchecked(++currentIndex)];
    // We must once more check whether the character is numeric before proceeding.
    if (nextChar < '0' || nextChar > '9')
    {
        // If not numeric, at this point, the character can only be a decimal separator
        // (as in "-.123" or "+.123"), or else it must be part of a special case string
        // (as in "-∞"). So check for those.
        if (nextChar != CharDecimalSeparator)
        return CheckForSpecialCaseDoubleStrings(input, out result);
        result = 0d;
        goto SkipIntegerPart;
    }
    }

    /********************************** "Integer Part" ***********************************/
    // Treat all subsequent numeric characters as the "integer part" of the result.
    unchecked
    {
    // Since we've already checked that the next character is numeric,
    // We can save 2 ops by initializing the result directly.
    result = nextChar - '0';
    while(true)
    {
        // Return the "integer part" immediately if we encounter the end of the string
        if (++currentIndex >= length)
        {
        result *= sign;
        return true;
        }
        nextChar = input[currentIndex];
        if (nextChar < '0' || nextChar > '9') break;
        result = result * 10d + (nextChar - '0');
    }
    }

    // This label and corresponding goto statements is a performance optimization to
    // allow us to efficiently skip "integer part" parsing in cases like ".123456"
    // Please don't be mad.
    SkipIntegerPart:  
    
    // The expected case is that the next character is a decimal separator.
    if (nextChar != CharDecimalSeparator)
    {
    // If not this is only allowed to be the exponent character
    if (nextChar != 'e' && nextChar != 'E') return false;
    // Skip fractional parsing and jump to handling exponential notation.
    result = result * sign;
    }
    else
    {
    /******************************* "Fractional Part" *******************************/
    double fractionalPart = 0d;
    int fractionStart = ++currentIndex;
    // Perform a similar loop to construct the fractional part of the decimal
    unchecked
    {
        do
        {
        nextChar = input[currentIndex];
        if (nextChar < '0' || nextChar > '9') break;
        fractionalPart = fractionalPart * 10d + (nextChar - '0');
        } while (++currentIndex < length);

        // Adjust the magnitude of the fractional part and add it to the result
        int fractionLength = currentIndex - fractionStart;
        // Use our tiny array of negative powers of 10 if possible.
        if (fractionLength < NegPow10Length)
        result = sign * (result + fractionalPart * NegPow10[fractionLength]);
        // Fallback to our larger array (still fast), whose higher indices store negative powers
        else if (fractionLength < MaxDoubleExponent)
        result = sign * (result + fractionalPart * Pow10[MaxDoubleExponent + fractionLength]);
        // This would only happen for ridiculous strings (longer than 300 characters),
        // but technically we can still fall-back to using native Math.Pow to do the shift.
        else
        result = sign * (result + fractionalPart * Math.Pow(10, -fractionLength));
    }

    // If we've consumed every character, return now
    if (currentIndex >= length) return true;
    // If there are any more characters, the next character must be 'E' or 'e'
    if (nextChar != 'e' && nextChar != 'E') return false;
    }

    /**************************** "Scientific Notation Part" *****************************/
    // If we're at the end of the string (last character was 'e' or 'E'), that's an error
    if (unchecked(++currentIndex) >= length) return false;
    // Otherwise, advance the current character and begin parsing the exponent
    nextChar = input[currentIndex];
    bool exponentIsNegative = false;
    // The next character can only be a +/- sign, or a numeric character
    if (nextChar < '0' || nextChar > '9')
    {
    if (nextChar == CharNegative)
        exponentIsNegative = true;
    else if (nextChar != CharPositive)
        return false;
    // Again, require there to be at least one more character in the string after the sign
    if (unchecked(++currentIndex) >= length) return false;
    nextChar = input[currentIndex];
    // And verify that this next character is numeric
    if (nextChar < '0' || nextChar > '9') return false;
    }
    unchecked
    {
    // Since we know the next character is a digit, we can initialize the exponent int
    // directly and avoid 2 wasted ops (multiplying by and adding to zero).
    int exponent = nextChar - '0';
    // Shift and add any additional digit characters
    while (++currentIndex < length)
    {
        nextChar = input[currentIndex];
        // If we encounter any non-numeric characters now, it's definitely an error
        if (nextChar < '0' || nextChar > '9') return false;
        exponent = exponent * 10 + (nextChar - '0');
    }
    // Convert the exponent into index we can jump to in our powers-of-ten array
    if (exponentIsNegative)
        exponent += MaxDoubleExponent;
    else if (exponent > MaxDoubleExponent)
        return false;
    // Use our array of powers, unless the exponent is a very large negative number,
    // in which case we fall back to computing the power on-the-fly.
    result *= exponent < Pow10Length ? Pow10[exponent] :
        Math.Pow(10, MaxDoubleExponent - exponent);
    }
    return true;
}

/// <summary>Checks if the string matches one of a few supported special case
/// double strings. If so, assigns the result and returns true.</summary>
public static bool CheckForSpecialCaseDoubleStrings(string input, out double result)
{
    if (input == NumberFormat.PositiveInfinitySymbol)
    result = Double.PositiveInfinity;
    else if (input == NumberFormat.NegativeInfinitySymbol)
    result = Double.NegativeInfinity;
    else if (input == NumberFormat.NaNSymbol)
    result = Double.NaN;
    // Special Case: Excel has been known to format zero as "-".
    // We intentionally support it by returning zero now (most parsers would not)
    else if (input == NumberFormat.NegativeSign)
    result = 0d;
    // Special Case: Our organization treats the term "Unlimited" as referring
    // to Double.MaxValue (most parsers would not)
    else if (input.Equals("unlimited", StringComparison.OrdinalIgnoreCase))
    result = Double.MaxValue;
    // Anything else is not a valid input
    else
    {
    result = Double.NaN;
    return false;
    }
    return true;
}

/// <summary>The largest exponent (or smallest when negative) that can be given to a Double.</summary>
private const int MaxDoubleExponent = 308;

/// <summary>The number of elements that will be generated in the Pow10 array.</summary>
private const int Pow10Length = MaxDoubleExponent * 2 + 1;

/// <summary>A cache of all possible positive powers of 10 that might be required to
/// apply an exponent to a double (Indices 0-308), as well as the first 308 negative
/// exponents. (Indices 309-616)</summary>
private static readonly double[] Pow10 =
    Enumerable.Range(0, MaxDoubleExponent + 1).Select(i => Math.Pow(10, i))
    .Concat(Enumerable.Range(1, MaxDoubleExponent).Select(i => Math.Pow(10, -i)))
    .ToArray();

/// <summary>The number of negative powers to pre-compute and store in a small array.</summary>
private const int NegPow10Length = 16;

/// <summary>A cache of the first 15 negative powers of 10 for quick
/// magnitude adjustment of common parsed fractional parts of doubles.</summary>
/// <remarks>Even though this overlaps with the Pow10 array, it is kept separate so that
/// users that don't use scientific notation or extremely long fractional parts
/// might get a speedup by being able to reference the smaller array, which has a better
/// chance of being served out of L1/L2 cache.</remarks>
private static readonly double[] NegPow10 =
    Enumerable.Range(0, NegPow10Length).Select(i => Math.Pow(10, -i)).ToArray();

public static bool FastTryParseDouble(string input, out double result)
{
    int length = input.Length;
    if (length <= 0)
    {
        result = Double.NaN;
        return false;
    }
    double sign = 1d;
    int currentIndex = 0;
    char nextChar = input[0];

    /**************** Sign (+/-) and Special Case String Representations *****************/
    // Handle all cases where the string does not start with a numeric character
    if (nextChar < '0' || nextChar > '9')
    {
        // Non-numeric 1-character strings must match one of the supported special cases.
        if (length == 1)
            return CheckForSpecialCaseDoubleStrings(input, out result);
        // For anything more than one character, this should be a sign character.
        if (nextChar == CharNegative)
            sign = -1d;
        // The very next character may also be the decimal separator.
        else if (nextChar == CharDecimalSeparator)
        {
            // In this case, we treat the integer part as 0 and skip to the fractional part.
            result = 0d;
            goto SkipIntegerPart;
        }
        // Finally, unless it was a '+' sign, input must match one of a set of special cases.
        else if (nextChar != CharPositive)
            return CheckForSpecialCaseDoubleStrings(input, out result);

        // Once the sign is consumed, advance to the next character for further parsing
        nextChar = input[unchecked(++currentIndex)];
        // We must once more check whether the character is numeric before proceeding.
        if (nextChar < '0' || nextChar > '9')
        {
            // If not numeric, at this point, the character can only be a decimal separator
            // (as in "-.123" or "+.123"), or else it must be part of a special case string
            // (as in "-∞"). So check for those.
            if (nextChar != CharDecimalSeparator)
                return CheckForSpecialCaseDoubleStrings(input, out result);
            result = 0d;
            goto SkipIntegerPart;
        }
    }

    /********************************** "Integer Part" ***********************************/
    // Treat all subsequent numeric characters as the "integer part" of the result.
    unchecked
    {
        // Since we've already checked that the next character is numeric,
        // We can save 2 ops by initializing the result directly.
        result = nextChar - '0';
        while(true)
        {
            // Return the "integer part" immediately if we encounter the end of the string
            if (++currentIndex >= length)
            {
                result *= sign;
                return true;
            }
            nextChar = input[currentIndex];
            if (nextChar < '0' || nextChar > '9') break;
            result = result * 10d + (nextChar - '0');
        }
    }

    // This label and corresponding goto statements is a performance optimization to
    // allow us to efficiently skip "integer part" parsing in cases like ".123456"
    // Please don't be mad.
    SkipIntegerPart:  
    
    // The expected case is that the next character is a decimal separator.
    if (nextChar != CharDecimalSeparator)
    {
        // If not this is only allowed to be the exponent character
        if (nextChar != 'e' && nextChar != 'E') return false;
        // Skip fractional parsing and jump to handling exponential notation.
        result = result * sign;
    }
    else
    {
        /******************************* "Fractional Part" *******************************/
        double fractionalPart = 0d;
        int fractionStart = ++currentIndex;
        // Perform a similar loop to construct the fractional part of the decimal
        unchecked
        {
            do
            {
                nextChar = input[currentIndex];
                if (nextChar < '0' || nextChar > '9') break;
                fractionalPart = fractionalPart * 10d + (nextChar - '0');
            } while (++currentIndex < length);

            // Adjust the magnitude of the fractional part and add it to the result
            int fractionLength = currentIndex - fractionStart;
            // Use our tiny array of negative powers of 10 if possible.
            if (fractionLength < NegPow10Length)
                result = sign * (result + fractionalPart * NegPow10[fractionLength]);
            // Fallback to our larger array (still fast), whose higher indices store negative powers
            else if (fractionLength < MaxDoubleExponent)
                result = sign * (result + fractionalPart * Pow10[MaxDoubleExponent + fractionLength]);
            // This would only happen for ridiculous strings (longer than 300 characters),
            // but technically we can still fall-back to using native Math.Pow to do the shift.
            else
                result = sign * (result + fractionalPart * Math.Pow(10, -fractionLength));
        }

        // If we've consumed every character, return now
        if (currentIndex >= length) return true;
        // If there are any more characters, the next character must be 'E' or 'e'
        if (nextChar != 'e' && nextChar != 'E') return false;
    }

    /**************************** "Scientific Notation Part" *****************************/
    // If we're at the end of the string (last character was 'e' or 'E'), that's an error
    if (unchecked(++currentIndex) >= length) return false;
    // Otherwise, advance the current character and begin parsing the exponent
    nextChar = input[currentIndex];
    bool exponentIsNegative = false;
    // The next character can only be a +/- sign, or a numeric character
    if (nextChar < '0' || nextChar > '9')
    {
        if (nextChar == CharNegative)
            exponentIsNegative = true;
        else if (nextChar != CharPositive)
            return false;
        // Again, require there to be at least one more character in the string after the sign
        if (unchecked(++currentIndex) >= length) return false;
        nextChar = input[currentIndex];
        // And verify that this next character is numeric
        if (nextChar < '0' || nextChar > '9') return false;
    }
    unchecked
    {
        // Since we know the next character is a digit, we can initialize the exponent int
        // directly and avoid 2 wasted ops (multiplying by and adding to zero).
        int exponent = nextChar - '0';
        // Shift and add any additional digit characters
        while (++currentIndex < length)
        {
            nextChar = input[currentIndex];
            // If we encounter any non-numeric characters now, it's definitely an error
            if (nextChar < '0' || nextChar > '9') return false;
            exponent = exponent * 10 + (nextChar - '0');
        }
        // Convert the exponent into index we can jump to in our powers-of-ten array
        if (exponentIsNegative)
            exponent += MaxDoubleExponent;
        else if (exponent > MaxDoubleExponent)
            return false;
        // Use our array of powers, unless the exponent is a very large negative number,
        // in which case we fall back to computing the power on-the-fly.
        result *= exponent < Pow10Length ? Pow10[exponent] :
            Math.Pow(10, MaxDoubleExponent - exponent);
    }
    return true;
}

/// <summary>Checks if the string matches one of a few supported special case
/// double strings. If so, assigns the result and returns true.</summary>
public static bool CheckForSpecialCaseDoubleStrings(string input, out double result)
{
    if (input == NumberFormat.PositiveInfinitySymbol)
        result = Double.PositiveInfinity;
    else if (input == NumberFormat.NegativeInfinitySymbol)
        result = Double.NegativeInfinity;
    else if (input == NumberFormat.NaNSymbol)
        result = Double.NaN;
    // Special Case: Excel has been known to format zero as "-".
    // We intentionally support it by returning zero now (most parsers would not)
    else if (input == NumberFormat.NegativeSign)
        result = 0d;
    // Special Case: Our organization treats the term "Unlimited" as referring
    // to Double.MaxValue (most parsers would not)
    else if (input.Equals("unlimited", StringComparison.OrdinalIgnoreCase))
        result = Double.MaxValue;
    // Anything else is not a valid input
    else
    {
        result = Double.NaN;
        return false;
    }
    return true;
}

/// <summary>The largest exponent (or smallest when negative) that can be given to a Double.</summary>
private const int MaxDoubleExponent = 308;

/// <summary>The number of elements that will be generated in the Pow10 array.</summary>
private const int Pow10Length = MaxDoubleExponent * 2 + 1;

/// <summary>A cache of all possible positive powers of 10 that might be required to
/// apply an exponent to a double (Indices 0-308), as well as the first 308 negative
/// exponents. (Indices 309-616)</summary>
private static readonly double[] Pow10 =
    Enumerable.Range(0, MaxDoubleExponent + 1).Select(i => Math.Pow(10, i))
        .Concat(Enumerable.Range(1, MaxDoubleExponent).Select(i => Math.Pow(10, -i)))
        .ToArray();

/// <summary>The number of negative powers to pre-compute and store in a small array.</summary>
private const int NegPow10Length = 16;

/// <summary>A cache of the first 15 negative powers of 10 for quick
/// magnitude adjustment of common parsed fractional parts of doubles.</summary>
/// <remarks>Even though this overlaps with the Pow10 array, it is kept separate so that
/// users that don't use scientific notation or extremely long fractional parts
/// might get a speedup by being able to reference the smaller array, which has a better
/// chance of being served out of L1/L2 cache.</remarks>
private static readonly double[] NegPow10 =
    Enumerable.Range(0, NegPow10Length).Select(i => Math.Pow(10, -i)).ToArray();

Benchmark Results

Using a Stopwatch this time, just to quell any debate:

Native parser took 4583 ms.

Custom parser took 1162 ms.

Performance gain was 294.41%

Source Link

answered Aug 2, 2018 at 16:08

Alain