0

I am using the BinaryFormatter to serialize an object graph, and when there are multiple references to the same object, I want this to be preserved after deserialization, i.e. the references in the deserialized data should still refer to the same object.

This seems to work for all objects that are part of the same Deserialize() call, but not for separate calls on the same BinaryFormatter instance. Is there a way to configure it differently, so that it will preserve references correctly over multiple calls?

See the following example, where ReferenceEquals() with objects from different Deserialize() calls returns false.

using System;
using System.Diagnostics;
using System.IO;
using System.Runtime.Serialization.Formatters.Binary;

namespace SerializeTest
{
  [Serializable]
  class Data
  {
    public int Value { get; set; }
  }

  class Program
  {
    static void Main(string[] args)
    {
      var data = new Data { Value = 1234 };

      using (var fs = new FileStream("serialized.dat", FileMode.Create, FileAccess.Write))
      {
        BinaryFormatter formatter = new BinaryFormatter();
        formatter.Serialize(fs, (data, data));
        formatter.Serialize(fs, data);
      }

      using (var fs = new FileStream("serialized.dat", FileMode.Open, FileAccess.Read))
      {
        BinaryFormatter formatter = new BinaryFormatter();

        var (data1, data2) = ((Data, Data))formatter.Deserialize(fs);
        var data3 = (Data)formatter.Deserialize(fs);

        // object references from separate calls are not the same?
        // accordings to docs, same Formatter and ObjectIdGenerator should lead to the same object being deserialized.
        Debug.Assert(ReferenceEquals(data1, data2), "objects from same call");
        Debug.Assert(ReferenceEquals(data1, data3), "objects from different calls");
      }
    }
  }
}

1 Answer 1

2

On the face of it, this isn't possible because object references are only meaningful for each de-serialize call. Internally, each call creates a new binary object reader which holds the references.

If you want just to know if they're equivalent, you could implement equality.

[Serializable]
class Data
{
    public int Value { get; set; }

    public override bool Equals(object obj) => 
        (obj is Data data) ? data.Value == Value : false;

    public override int GetHashCode() => Value.GetHashCode();
}

and this passes:

Debug.Assert(Equals(data1, data2), "objects from same call");
Debug.Assert(Equals(data1, data3), "objects from different calls");

If you absolutely need reference equality, you could try to share state between multiple calls by implementing an ISerializationSurrogate, and memoizing the results.

Update:

Well, I'd feel bad sending you off on a wild goose chase for a solution that may not even be feasible. So I tried it myself:

Let's try adding a type which wraps reference equality by mapping each instance to a unique id - which can just be GetHashCode itself.

[Serializable]
public class Ref<T>
{
    public readonly T Value;
    public readonly int Id;
    public Ref(T value) { Value = value; Id = value.GetHashCode(); }

    protected class RefSurrogate : ISerializationSurrogate
    {
        Dictionary<int, Ref<T>> Instances = new Dictionary<int, Ref<T>>();

        public void GetObjectData(object obj, SerializationInfo info, StreamingContext context)
        {
            var refs = (Ref<T>)obj;
            info.AddValue(nameof(Id), refs.Id);
            if (!Instances.ContainsKey(refs.Id))
            {
                Instances.Add(refs.Id, refs);
                info.AddValue(nameof(Value), refs.Value);
            }
        }

        public object SetObjectData(object obj, SerializationInfo info, StreamingContext context, ISurrogateSelector selector)
        {
            var id = (int)info.GetInt32(nameof(Id));
            if (Instances.TryGetValue(id, out var refs))
                return refs;
            else
                return Instances[id] = new Ref<T>((T)info.GetValue(nameof(Value), typeof(T)));
        }
    }

    public static SurrogateSelector AddTo(SurrogateSelector ss)
    {
        ss.AddSurrogate(typeof(Ref<T>), new StreamingContext(StreamingContextStates.All), new RefSurrogate());
        return ss;
    }
}

And the test program:

[Serializable]
class Data
{
    public int Value { get; set; } 
}

class Program
{
    static void Main(string[] args)
    {
        var data = new Ref<Data>(new Data { Value = 1234 });

        BinaryFormatter formatter = new BinaryFormatter();

        using (var fs = new FileStream("serialized.dat", FileMode.Create, FileAccess.Write))
        {
            formatter.Serialize(fs, (data, data));
            formatter.Serialize(fs, data);
        }

        using (var fs = new FileStream("serialized.dat", FileMode.Open, FileAccess.Read))
        {
            // Create a SurrogateSelector.

            formatter.SurrogateSelector = Ref<Data>.AddTo(new SurrogateSelector());

            var (data1, data2) = ((Ref<Data>, Ref<Data>))formatter.Deserialize(fs);
            var data3 = (Ref<Data>)formatter.Deserialize(fs);

            // object references from separate calls are not the same?
            // accordings to docs, same Formatter and ObjectIdGenerator should lead to the same object being deserialized.

            Debug.Assert(ReferenceEquals(data1.Value, data2.Value), "objects from same call");
            Debug.Assert(ReferenceEquals(data1.Value, data3.Value), "objects from different calls");
        }
    }
}

And that works.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.