I'm experiencing a CPU memory leak while running a Python script that processes text using various NLP models in an infinite loop. The script includes language translation, sentiment analysis, and topic classification. Here's a simplified version of the problematic code:
import ctranslate2
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification , DistilBertForSequenceClassification, DistilBertTokenizer
import spacy
spacy.require_gpu()
ner_model = spacy.load('ner_model_path', disable=["tagger", "parser", "attribute_ruler", "lemmatizer"])
OTHER_LANG_DICT = {'hi': 'nllb_hi'}
LANGUAGE_MODEL = ctranslate2.Translator('nllb-200-3.3B-int8', device="cuda")
def convertToEng(input_data):
try:
for k,v in input_data.items():
text = v['text']
lang = v['lang']
if lang and lang == 'en':
translated_text = text
if lang and lang in OTHER_LANG_DICT:
tokenizer = AutoTokenizer.from_pretrained(LANGUAGE_MODEL, src_lang=OTHER_LANG_DICT[lang])
tokens = tokenizer.encode(text, return_tensors="pt")
tokens_list = tokenizer.convert_ids_to_tokens(tokens[0])
results = LANGUAGE_MODEL.translate_batch([tokens_list], target_prefix=[["eng_Latn"]])
target = results[0].hypotheses[0][1:]
translated_text = tokenizer.decode(tokenizer.convert_tokens_to_ids(target), skip_special_tokens=True)
return translated_text
except Exception as e:
print(str(e))
MC_TOKENIZER = AutoTokenizer.from_pretrained('bert_model_path')
MC_MODEL = AutoModelForSequenceClassification.from_pretrained('bert_model_path').to("cuda")
MC_MODEL.eval()
THRESHOLD = 0.3
MODEL_3_MODEL = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased_model_path').to(torch.device("cuda"))
MODEL_3_TOKENIZER = DistilBertTokenizer.from_pretrained('distilbert-base-uncased_model_path')
def model2_prediction(input_text):
try:
tokens = MC_TOKENIZER(input_text, add_special_tokens=True, return_tensors="pt", padding=True)
tokens = {key: value.to('cuda') for key, value in tokens.items()}
with torch.no_grad():
logits = MC_MODEL(**tokens)[0].to('cuda')
pred = F.softmax(logits, dim=1)
filtered_classes = [[i for i, class_prob in enumerate(prob) if class_prob >= THRESHOLD] for prob in pred]
return filtered_classes
except Exception as e:
print(str(e))
def model2_labelling(input_data: str):
try:
classes = []
selected_classes = model2_prediction(input_data)
for class_id in selected_classes[0]:
class_name = MC_MODEL.config.id2label[class_id]
classes.append(class_name)
return classes
except Exception as e:
print(str(e))
def Model3(input_data: dict):
try:
id_val = input_data["id"]
text_val = str(input_data["text"]).lower()
tokens = MODEL_3_TOKENIZER(text_val, padding=True, truncation=True, return_tensors="pt")
tokens = {k: v.to(torch.device("cuda")) for k, v in tokens.items()}
with torch.no_grad():
outputs = MODEL_3_MODEL(**tokens)
pred = F.softmax(outputs.logits, dim=1).tolist()[0]
pred.insert(1, pred.pop(2))
result = {"id": id_val, "sentiment": pred}
return result
except Exception as e:
print(str(e))
def text_preprocessing(text):
pass
def ner_pred(text):
text = text_preprocessing(text)
doc = ner_model(text)
entity = []
for ent in doc.ents:
if ent.label_ == "PERSON":
entity.append(ent.text)
return entity
def ner_result(text):
pass # post processing of the result
def get_result(queue_name):
try:
data = queue_name
data = convertToEng(input_data = data)
data_text = data.get('text')
if data_text:
id = data['id']
# sentiment = SentimentAnalysis(input_data = {"id": id, "text": ' '.join(data_text.split()[:20])})
sentiment = Model3(input_data = {"id": id, "text": ' '.join(data_text.split()[:20])})
topics = model2_labelling(input_data = data_text)
final_result = [id,{'sentiment':sentiment['sentiment'],'topic':topics}]
return final_result
except Exception as e:
print(e)
data = {"doc1":{'text': 'ram is a good boy.', 'lang': 'en'}}
if __name__ == "__main__":
while True:
status = get_result(queue_name=data)
I an using following library along with python version 3.10 and i have NVIDIA GeForce RTX 3070, Driver Version: 535.183.01, CUDA Version: 12.2:
nvidia-cublas-cu12 12.1.3.1
nvidia-cuda-cupti-cu12 12.1.105
nvidia-cuda-nvrtc-cu12 12.1.105
nvidia-cuda-runtime-cu12 12.1.105
nvidia-cudnn-cu12 8.9.2.26
nvidia-cufft-cu12 11.0.2.54
nvidia-curand-cu12 10.3.2.106
nvidia-cusolver-cu12 11.4.5.107
nvidia-cusparse-cu12 12.1.0.106
nvidia-nccl-cu12 2.18.1
nvidia-nvjitlink-cu12 12.5.40
nvidia-nvtx-cu12 12.1.105
torch 2.1.0
spacy 3.7.4
spacy-alignments 0.9.1
spacy-curated-transformers 0.2.2
spacy-legacy 3.0.12
spacy-loggers 1.0.5
spacy-transformers 1.3.5
accelerate 0.29.3
transformers 4.36.2
I have tried to clear the cache using gc and also i have set the environment to os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128,garbage_collection_threshold:0.8' and
os.environ['ONEDNN_PRIMITIVE_CACHE_CAPACITY'] = '0'
Also i have deleted all the variable used in the function after they return the result in try block. i deleted in finallyblock. BUT NO IMPROVEMENT.