1

I'm using the JsonExtensionData Attribute along with a Dictionary<string, object>, which actually works pretty well for all my "unknown" JSON results.

Unfortunately the JSON string I get from my webservice sometimes gives me keys with non-alphanumeric characters, which I want to cleanup during deserialization.

For example:

"dc:title":"My Document title"

During deserialization I want to remove all non-alphanumeric characters and I wonder if there is an easy way to do so with a custom converter?

Derserializer Code

public class TikaDeserializer : IDeserializer
{
    private Newtonsoft.Json.JsonSerializer serializer;


    public TikaDeserializer(Newtonsoft.Json.JsonSerializer serializer)
    {
        this.serializer = serializer;
    }

    public T Deserialize<T>(RestSharp.IRestResponse response)
    {
        var content = response.Content;

        using(var stringReader = new StringReader(content))
        {
            using(var jsonTextReader = new JsonTextReader(stringReader))
            {
                return serializer.Deserialize<T>(jsonTextReader);
            }
        }
    }

    public string DateFormat { get; set; }

    public string Namespace { get; set; }

    public string RootElement { get; set; }

    public static TikaDeserializer Default
    {
        get
        {
            return new TikaDeserializer(new Newtonsoft.Json.JsonSerializer()
            {
                NullValueHandling = NullValueHandling.Ignore,
            });
        }
    }

Converter Code by Gediminas

public class InputKeyNameCleanerConverter : JsonConverter
{
    public override bool CanConvert(Type objectType)
    {
        return objectType.IsGenericType && objectType.GetGenericTypeDefinition() == typeof(Dictionary<,>);
    }

    public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
    {
        var jObject = JToken.ReadFrom(reader);
        foreach(JToken token in jObject.ToList())
        {
            string replacedName = Regex.Replace(token.Path, @"[^A-Za-z]", String.Empty);
            JProperty newToken = new JProperty(replacedName, token.First);
            token.Replace(newToken);
        }
        return jObject.ToObject(objectType);
    }

    public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
    {
        throw new NotSupportedException();
    }
}

Model

public class ParsedDocument
{

    [JsonProperty("Author")]
    public string Author { get; set; }
    [JsonProperty("Content-Type")]
    public string ContentType { get; set; }
    [JsonProperty("Content-Encoding")]
    public string ContentEncoding { get; set; }
    [JsonProperty("Creation-Date")]
    public DateTime? DateCreated { get; set; }
    [JsonProperty("Last-Modified")]
    public DateTime? DateModified { get; set; }
    [JsonProperty("Last-Save-Date")]
    public DateTime? DateSaved { get; set; }
    [JsonProperty("Last-Printed")]
    public DateTime? DatePrinted { get; set; }

    [JsonProperty("title")]
    public string Title { get; set; }
    [JsonProperty("X-TIKA:content")]
    public string Content { get; set; }

    [JsonExtensionData]
    public Dictionary<string, object> MetaData { get; set; }




}
1
  • try using dynamic like so (dynamic data = deserialiseJson(JsonString); Commented Mar 9, 2016 at 8:31

2 Answers 2

1

You can do this using a custom contract resolver that replaces the JsonObjectContract.ExtensionDataSetter delegate:

public class ExtensionNameMappingContractResolver : IContractResolver
{
    readonly IContractResolver baseResolver;
    readonly Regex regex;
    readonly string replacement;

    // As of 7.0.1, Json.NET suggests using a static instance for "stateless" contract resolvers, for performance reasons.
    // http://www.newtonsoft.com/json/help/html/ContractResolver.htm
    // http://www.newtonsoft.com/json/help/html/M_Newtonsoft_Json_Serialization_DefaultContractResolver__ctor_1.htm
    // "Use the parameterless constructor and cache instances of the contract resolver within your application for optimal performance."

    static ExtensionNameMappingContractResolver removeNonAlphanumericCharactersInstance;

    static ExtensionNameMappingContractResolver()
    {
        // Regex is from https://stackoverflow.com/questions/8779189/how-do-i-strip-non-alphanumeric-characters-including-spaces-from-a-string
        removeNonAlphanumericCharactersInstance = new ExtensionNameMappingContractResolver(new DefaultContractResolver(), new Regex(@"[^\p{L}\p{N}]+"), "");
    }

    public static ExtensionNameMappingContractResolver RemoveNonAlphanumericCharactersInstance { get { return removeNonAlphanumericCharactersInstance; } }

    public ExtensionNameMappingContractResolver(IContractResolver baseResolver, Regex regex, string replacement)
    {
        if (regex == null || replacement == null || baseResolver == null)
            throw new ArgumentNullException();
        this.regex = regex;
        this.replacement = replacement;
        this.baseResolver = baseResolver;
    }

    #region IContractResolver Members

    public JsonContract ResolveContract(Type type)
    {
        var contract = baseResolver.ResolveContract(type);
        if (contract is JsonObjectContract)
        {
            var objContract = (JsonObjectContract)contract;
            if (objContract.ExtensionDataSetter != null)
            {
                var oldSetter = objContract.ExtensionDataSetter;
                objContract.ExtensionDataSetter = (o, key, value) =>
                {
                    var newKey = regex.Replace(key, replacement);
                    oldSetter(o, newKey, value);
                };
            }
        }
        return contract;
    }

    #endregion
}

Then use it like:

        var settings = new JsonSerializerSettings 
        {
             NullValueHandling = NullValueHandling.Ignore,
            ContractResolver = ExtensionNameMappingContractResolver.RemoveNonAlphanumericCharactersInstance 
        };
        var serializer = JsonSerializer.CreateDefault(settings);

Note that the Regex I am using replaces all Unicode non-alphanumeric characters as defined by the the table of Unicode character categories. For more options, including removing all non-ASCII, non-alphanumeric characters, see How do I strip non-alphanumeric characters (including spaces) from a string?.

Note that if, by removing non-alphanumeric characters from property names, your contract resolver attempts to add duplicate extension data keys, a JsonSerializationException: Error setting value in extension data ---> System.ArgumentException: An item with the same key has already been added will get thrown.

Sign up to request clarification or add additional context in comments.

1 Comment

You are awesome ! Thank you very much !
0

Sure! You can use regex. You can use [^a-zA-Z0-9] to match any non-alphanumeric character, and replace it with an empty string. In your JsonConverter, you can create a JObject and iterate over it's properties, replacing them with ones with a corrected name.

I came up with a generic solution which accepts a Regex object and a replacement string as constructor parameters:

Implementation:

class DictionaryRegexReplaceJsonConverter : JsonConverter
{
    public Regex ReplacingRegex { get; set; }
    public string Replacement { get; set; }

    public DictionaryRegexReplaceJsonConverter(Regex replacingRegex, string replacement = "")
    {
        ReplacingRegex = replacingRegex;
        Replacement = replacement;
    }

    public override bool CanConvert(Type objectType)
    {
        return objectType.IsGenericType && objectType.GetGenericTypeDefinition() == typeof(Dictionary<,>);
    }

    public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
    {
        var jo = JToken.ReadFrom(reader);
        foreach (JToken token in jo.ToList())
        {
            string replacedName = ReplacingRegex.Replace(token.Path, Replacement);
            JProperty newToken = new JProperty(replacedName, token.First);
            token.Replace(newToken);
        }
        return jo.ToObject(objectType);
    }

    public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
    {
        throw new NotSupportedException();
    }
}

Usage:

Regex regex = new Regex("[^a-zA-Z0-9]");
var converter = new DictionaryKeyReplacingJsonConverter(regex);
string json = "{\"dc:title\":\"My Document title\", \"Another Example!\": \"foo\"}";

// {"dctitle": "My Document title", "AnotherExample": "foo"} ]
var obj = JsonConvert.DeserializeObject<Dictionary<string, object>>(json, converter);

If performance is of concern, this could be more optimized by manually reading from the JsonReader and forming your object, rather than using JObject and replacing properties.

1 Comment

Thank you. That works pretty well if i follow your code. Is there a way to combine it with the ExtensionData Attribute ? [JsonExtensionData] public Dictionary<string, object> Metadata { get; set; } i tried to add a [JsonConverter(typeof(MyConverter)] but that does not to be recognized :-(

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.