i am calling a function vision to process images, i want to process a huge amount of images and write the data to file. i am using multithreading and running 4 threads and passing a different list of imagename to each thread but when i write the data to file it is overwriting and showing result of only one thread. how can process these images faster and write the data of coming from function in a file. this is my vision function.
def vision(filelist):
from google.cloud import vision
from google.cloud.vision import types
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'Logo Detection-ecc10ae26b70.json'
global lck
global df
global dff
lck.acquire()
for i in range(len(onlyfiles)):
imagepath=batch_folder+"\\"+onlyfiles[i]
with io.open(onlyfiles[i], 'rb') as image_file:
content = image_file.read()
logolist=[]
labellist=[]
objectlist=[]
image = vision.types.Image(content=content)
response = client.logo_detection(image=image)
logos = response.logo_annotations
response = client.label_detection(image=image)
labels = response.label_annotations
objects = client.object_localization(image=image).localized_object_annotations
for logo in logos:
logolist.append(logo.description)
for label in labels:
labellist.append(label.description)
for obj in objects:
objectlist.append(obj.name)
logolist2.append(logolist)
labellist2.append(labellist)
objectlist2.append(objectlist)
dff=[]
lck.acquire()
dff = pd.DataFrame(list(zip(onlyfiles,logolist2,labellist2,objectlist2)),columns =['filename', 'logo','label','object'])
df.append(dff,ignore_index=True)
lck.release()
df.to_csv(csvadd, index=False)
############image reading and divding data for threads#################
batch_folder='D:\\Projects\\VAT\\fakedetection\\keyframe\\ffmpeg-4.2.1-win64-static\\video'
onlyfiles = fnmatch.filter(os.listdir(batch_folder), '*.jpg')
countofimage=len(onlyfiles)
division=int(countofimage/4)
index1=[0,division]
index2=[division+1,division+division]
index3=[division+division+1,division+division+division]
index4=[division+division+division+1,division+division+division+division]
threadfile1=onlyfiles[index1[0]:index1[1]]
threadfile2=onlyfiles[index2[0]:index2[1]]
threadfile3=onlyfiles[index3[0]:index3[1]]
threadfile4=onlyfiles[index4[0]:index4[1]]
t1 = threading.Thread(target=vision, args=(threadfile1,))
t2 = threading.Thread(target=vision, args=(threadfile2,))
t3 = threading.Thread(target=vision, args=(threadfile3,))
t4 = threading.Thread(target=vision, args=(threadfile4,))
# starting thread 1
t1.start()
# starting thread 2
t2.start()
t3.start()
t4.start()
# wait until thread 1 is completely executed
t1.join()
# wait until thread 2 is completely executed
t2.join()
t3.join()
t4.join()
# both threads completely executed
print("Done!")
i tired lock and global df but the output is not coming from all threads in the file.if there is any improvement needed for this code which can process faster please suggest and help to write data coming from threads to a file the code is working fine no error. i am just not just getting proper output