I am using the code in Study 2A in this link (https://github.com/yilangpeng/image-clustering/tree/main/study2A). In the D kmeans clustering, I met some problems because I need assign every label from k means to every picture, but the number of label is 9, the picture is 200, they are not matching well. The following is my code and error information.
import joblib, os
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
import ypoften as of
cwd = os.path.join('/content/drive/MyDrive/study2A/for lab teaching',"")
# please change this to the folder that has all the images
cvmodels = ["vgg16 fc1"]
## cvmodels = ["vgg16 fc1","vgg16 places fc1","vggface fc6"] choose the model you used
clmethod = "KMeans"
for cvmodel in cvmodels:
features_savepath = os.path.join(cwd,'img exfeature1','features PCA',cvmodel+'.dat')
features_array = joblib.load(features_savepath)
X = pd.DataFrame(features_array)
print(X)
nd = 200
X = X.iloc[:,0:nd]
savefolder = cvmodel + ' ' + clmethod + ' PCA' + str(nd)
# Read the 0321_imageselected_A.txt file
imgnamefile = os.path.join(cwd, "0321_imageselected_A.txt")
df = pd.read_csv(imgnamefile, sep='\t', header=0)
for K in [2]:
print('number of cluster', K)
cl = KMeans(K, random_state=0)
cl.fit(X)
labels = cl.labels_
imgnamefile = os.path.join(cwd,"0321_imageselected_A.txt")
df = pd.read_csv(imgnamefile, sep ='\t', header = 0)
print(df)
df['label'] = labels
filepath = os.path.join(cwd,'img cluster',savefolder,str(K),'label.txt')
of.create_path(filepath)
df.to_csv(filepath, index = None, header = None, sep = '\t')
print("DONE"*20)
ValueError: Length of values (9) does not match length of index (200)
I want to have a txt file that includes label so that I can proceed following steps.