7

We are currently building a green-fields app in C#. We have extensive UI tests which use Selenium Web Driver. These tests ( as well as unit tests ) are run by our CI server.

Selenium exposes a .PageSource attribute, and it makes sense (to me) to run that source through a HTML5 validator as another part each UI test.

I want to pick up on the same sorts of things that http://validator.w3.org/ picks up on. As a bonus, I would also like to pick up on a 508 issues.

My problem is that I can't find anything that will do this locally and is easy to integrate into my UI tests.. The W3C site exposes a SOAP api, however I don't want to hit their site as part of the CI process. They also don't appear to support getting SOAP responses back. I would like to avoid installing a full W3C server locally.

The closest thing that I see is this http://www.totalvalidator.com/, using it would require writing temp files and parsing reports.

I thought I'd see if anyone knows of another way before I go down this track. Preferably a DotNet assembly that I can call.

c

1

3 Answers 3

1

After spending an entire weekend on this problem, the only solution I can see is a commercial library called CSE HTML Validator

It is located here http://www.htmlvalidator.com/htmldownload.html

I wrote a simple wrapper for it. Here is the code

using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;

[assembly: CLSCompliant(true)]
namespace HtmlValidator
{

public class Validator
{
    #region Constructors...

    public Validator(string htmlToValidate)
    {
        HtmlToValidate = htmlToValidate;
        HasExecuted = false;
        Errors = new List<ValidationResult>();
        Warnings = new List<ValidationResult>();
        OtherMessages = new List<ValidationResult>();

    }

    #endregion



    #region Properties...
    public IList<ValidationResult> Errors { get; private set; }
    public bool HasExecuted { get; private set; }
    public string HtmlToValidate { get; private set; }
    public IList<ValidationResult> OtherMessages { get; private set; }
    public string ResultsString { get; private set; }
    public string TempFilePath { get; private set; }
    public IList<ValidationResult> Warnings { get; private set; }
    #endregion



    #region Public methods...
    public void ValidateHtmlFile()
    {

        WriteTempFile();

        ExecuteValidator();

        DeleteTempFile();

        ParseResults();

        HasExecuted = true;
    }

    #endregion



    #region Private methods...
    private void DeleteTempFile()
    {
        TempFilePath = Path.GetTempFileName();
        File.Delete(TempFilePath);
    }


    private void ExecuteValidator()
    {
        var psi = new ProcessStartInfo(GetHTMLValidatorPath())
        {
            RedirectStandardInput = false,
            RedirectStandardOutput = true,
            RedirectStandardError = false,
            UseShellExecute = false,
            Arguments = String.Format(@"-e,(stdout),0,16 ""{0}""", TempFilePath)
        };

        var p = new Process
        {
            StartInfo = psi
        };
        p.Start();

        var stdOut = p.StandardOutput;

        ResultsString = stdOut.ReadToEnd();
    }


    private static string GetHTMLValidatorPath()
    {
        return @"C:\Program Files (x86)\HTMLValidator120\cmdlineprocessor.exe";
    }


    private void ParseResults()
    {
        var results = JsonConvert.DeserializeObject<dynamic>(ResultsString);
        IList<InternalValidationResult> messages = results.messages.ToObject<List<InternalValidationResult>>();


        foreach (InternalValidationResult internalValidationResult in messages)
        {
            ValidationResult result = new ValidationResult()
            {
                Message = internalValidationResult.message,
                LineNumber = internalValidationResult.linenumber,
                MessageCategory = internalValidationResult.messagecategory,
                MessageType = internalValidationResult.messagetype,
                CharLocation = internalValidationResult.charlocation
            };

            switch (internalValidationResult.messagetype)
            {
                case "ERROR":
                    Errors.Add(result);
                    break;

                case "WARNING":
                    Warnings.Add(result);
                    break;

                default:
                    OtherMessages.Add(result);
                    break;
            }
        }
    }


    private void WriteTempFile()
    {
        TempFilePath = Path.GetTempFileName();
        StreamWriter streamWriter = File.AppendText(TempFilePath);
        streamWriter.WriteLine(HtmlToValidate);
        streamWriter.Flush();
        streamWriter.Close();
    }
    #endregion
}
}




public class ValidationResult
{
    public string MessageType { get; set; }
    public string MessageCategory { get; set; }
    public string Message { get; set; }
    public int LineNumber { get; set; }
    public int CharLocation { get; set; }


    public override string ToString()
    {
        return String.Format("{0} Line {1} Char {2}:: {3}", this.MessageType, this.LineNumber, this.CharLocation, this.Message);

    }

}


public class InternalValidationResult
{
    /*
     * DA: this class is used as in intermediate store of messages that come back from the underlying validator. The fields must be cased as per the underlying Json object.
     * That is why they are ignored.
     */
    #region Properties...
    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "charlocation"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "charlocation")]
    public int charlocation { get; set; }
    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "linenumber"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "linenumber")]

    public int linenumber { get; set; }
    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "message"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "message")]

    public string message { get; set; }
    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "messagecategory"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "messagecategory")]
    public string messagecategory { get; set; }
    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "messagetype"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "messagetype")]

    public string messagetype { get; set; }
    #endregion
}

Usage/Testing

   private const string ValidHtml = "<!DOCType html><html><head></head><body><p>Hello World</p></body></html>";
    private const string BrokenHtml = "<!DOCType html><html><head></head><body><p>Hello World</p></body>";

    [TestMethod]
    public void CanValidHtmlStringReturnNoErrors()
    {
        Validator subject = new Validator(ValidHtml);
        subject.ValidateHtmlFile();
        Assert.IsTrue(subject.HasExecuted);
        Assert.IsTrue(subject.Errors.Count == 0);
    }


    [TestMethod]
    public void CanInvalidHtmlStringReturnErrors()
    {
        Validator subject = new Validator(BrokenHtml);
        subject.ValidateHtmlFile();
        Assert.IsTrue(subject.HasExecuted);
        Assert.IsTrue(subject.Errors.Count > 0);
        Assert.IsTrue(subject.Errors[0].ToString().Contains("ERROR"));
    }
Sign up to request clarification or add additional context in comments.

Comments

1

The best HTML5 validator, the nu checker, is in Java and hard to interface with from .NET. But libtidy can be written into a C++ dll to be called from managed code. The sample program they've posted did a good job for me, with a little adapting.

LibTidy.h:

public ref class LibTidy
{
public:
    System::String^ __clrcall Test(System::String^ input);
};

LibTidy.cpp:

System::String^ __clrcall LibTidy::Test(System::String^ input)
{
    CStringW cstring(input);
  
    const size_t newsizew = (cstring.GetLength() + 1) * 2;
    char* nstringw = new char[newsizew];
    size_t convertedCharsw = 0;
    wcstombs_s(&convertedCharsw, nstringw, newsizew, cstring, _TRUNCATE);

        TidyBuffer errbuf = { 0 };
        int rc = -1;
        Bool ok;

        TidyDoc tdoc = tidyCreate();                     // Initialize "document"
                
        ok = tidyOptSetBool(tdoc, TidyShowInfo, no);
        ok = tidyOptSetBool(tdoc, TidyQuiet, yes);
        ok = tidyOptSetBool(tdoc, TidyEmacs, yes);
        if (ok)
            rc = tidySetErrorBuffer(tdoc, &errbuf);      // Capture diagnostics
        if (rc >= 0)
            rc = tidyParseString(tdoc, nstringw);           // Parse the input
        if (rc >= 0)
            rc = tidyCleanAndRepair(tdoc);               // Tidy it up!
        if (rc >= 0)
            rc = tidyRunDiagnostics(tdoc);               // Kvetch
        char* outputBytes = (char*)errbuf.bp;
    
        if (errbuf.allocator != NULL) tidyBufFree(&errbuf);
        tidyRelease(tdoc);

        return gcnew System::String(outputBytes);
    }

Comments

0

It looks like this link may have what you want: Automated W3C Validation

You can download a markup validator in the accepted answer and pass your HTML to that. Sorry they're not .NET assemblies :/, but you could wrap it in a DLL if you really wanted to.

Also, one of the answers on this question suggests that the W3C service actually exposes a RESTful API, but can return a SOAP response: How might I use the W3C Markup Validator API in my .NET application?

1 Comment

Thanks for your reply. None of the binary tools in the first link can deal with HTML5. The W3C service does not appear to allow you to combine SOAP responses with sending arbitrary content to be validated (ie, you can only give them links to sites to check). This htmlvalidator.com/htmlval/developer.html seems to be the best solution so far. It's command line driven and can deal with HTML5.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.