1
typedef unsigned long Count;
typedef float Weight;
typedef std::map<std::string, Count> StringToCountMap;
typedef std::map<std::string, Weight> StringToWeightMap;
typedef std::map<unsigned long, StringToCountMap> UnsignedToStringToCountMap;
typedef std::map<unsigned long, StringToWeightMap> UnsignedToStringToWeightMap;

typedef std::map<unsigned long, std::size_t> ClustersMap;


class DefaultClusteringAlgorithm
{
public:
    // minumum number of documents changing clusters for algorithm to end
    static const unsigned DocumentChangeThreshold = 0;

    DefaultClusteringAlgorithm(unsigned numClusters, const UnsignedToStringToWeightMap &documentVectors)
        : numClusters_(numClusters)
        , documentVectors_(documentVectors)
    {
    }

~DefaultClusteringAlgorithm() {}

const ClustersMap &DoClustering();

private:
    void ChooseInitialCentroids();
    unsigned ClusterOnCentroids();
    void RecalculateCentroids();
    float DocumentDotProduct(const StringToWeightMap &left, const StringToWeightMap &right);
    float DocumentLength(const StringToWeightMap &document);

    unsigned numClusters_;

    // stores cluster_id => centroid
    std::vector<StringToWeightMap> centroids_;

    // maps question id => cluster id
    ClustersMap clusters_;

    // document vector
    const UnsignedToStringToWeightMap &documentVectors_;
};

void DefaultClusteringAlgorithm::RecalculateCentroids()
{
    std::vector<unsigned> newCentroidsSizes(centroids_.size());
    std::vector<StringToWeightMap> newCentroids(centroids_.size());

    ClustersMap::const_iterator clusterMapping = clusters_.begin();

    for (; clusterMapping != clusters_.end(); ++clusterMapping)
    {
        std::size_t clusterId = clusterMapping->second;

        ++newCentroidsSizes[clusterId];
        const StringToWeightMap &document = documentVectors_.at(clusterMapping->first);

        StringToWeightMap::const_iterator termWeight = document.cbegin();

        for (; termWeight != document.end(); ++termWeight);
        {
            newCentroids[clusterId][termWeight->first] += termWeight->second;
        }
    }

    std::vector<unsigned>::iterator centroidSize = newCentroidsSizes.begin();

    for (; centroidSize != newCentroidsSizes.end(); ++centroidSize)
    {
        std::size_t clusterId = centroidSize - newCentroidsSizes.begin();

        StringToWeightMap::iterator centroidTermWeight = newCentroids[clusterId].begin();

        for (; centroidTermWeight != newCentroids[clusterId].end(); ++centroidTermWeight)
        {
            centroidTermWeight->second /= *centroidSize;
        }
    }
}

debugger watch

The problem occurs in creating the const_iterator termWeight:

StringToWeightMap::const_iterator termWeight = document.begin();

As you can see in the image above the termWeight const_iterator has invalid data. However, the const std::map document is a perfectly valid std::map. I cannot think of any reason why this is happening.

I recently learned that std::map::cbegin() exists. Should I be using that method instead?

EDIT: Included more context

8
  • Which line of code is about to be executed when it shows the above memory dump? The data displayed for termWeight is invalid until after the line where it's first assigned. Commented Nov 29, 2013 at 3:55
  • cbegin won't help. Are there any const_casts in your code base? One way to get really bad behavior out of a std::map is to reorder the contents of the map while they are in the map. Second, what line are you on? Has any iteration occurred? Are there any other threads? What is the type of documentVectors_? Commented Nov 29, 2013 at 3:55
  • The next line to be executed is the single line in the inner-most for loop. So termWeight has already been assigned. Commented Nov 29, 2013 at 3:57
  • There are no other threads. I'll edit the question to show the type of documentVectors_ Commented Nov 29, 2013 at 3:58
  • If you're going to use cbegin(), you should be using cend() as well. Commented Nov 29, 2013 at 4:48

2 Answers 2

2

Hah! I found the error! A silly little semi-colon at the end of my for loop!

Sign up to request clarification or add additional context in comments.

Comments

0

The std::map begin() method could be returning an iterator that is pointing to the end of map since there may not be any elements in the map at all.

3 Comments

He's checking for that case in his for loop, although he should be using cend() instead of just end().
Notice that document has a size of 24 in the debugging window.
@JohnSaxton thanks for pointing that out. I recently changed it to cbegin() and forgot to make the appropriate change for cend()

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.