def store_embeddings_in_astradb(embeddings,text_chunks, metadata):
vstore = AstraDBVectorStore(
collection_name="test",
embedding=embedding_model,
token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
)
print("after Vstore")
# Create documents with page content, embeddings, and metadata
documents = [
{
"page_content": chunk,
"metadata": metadata
}
for chunk in text_chunks
]
for doc in documents:
print(f"Document structure: {doc}")
print("after documents")
# Add documents to AstraDB vector store
inserted_ids = vstore.add_documents(documents)
return inserted_ids
# List of PDF files to process
pdf_files = ["WhatYouNeedToKnowAboutWOMENSHEALTH.pdf", "Womens-Health-Book.pdf"]
# Initialize embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Process each PDF file
for pdf_file in pdf_files:
if not os.path.isfile(pdf_file):
raise ValueError(f"PDF file '{pdf_file}' not found.")
print(f"Processing file: {pdf_file}")
# Extract text from PDF
text = extract_text_from_pdf(pdf_file)
# Split text into chunks
text_chunks = split_text_into_chunks(text)
# Embed text chunks
embeddings = embed_text_chunks(text_chunks, embedding_model)
# Extract metadata
metadata = extract_metadata(pdf_file)
# Store embeddings in AstraDB
try:
inserted_ids = store_embeddings_in_astradb(embeddings,text_chunks, metadata)
print(f"Inserted {len(inserted_ids)} embeddings from '{pdf_file}' into AstraDB.")
except Exception as e:
print(f"Failed to insert embeddings for '{pdf_file}': {e}")
This is the code iam using to convert text chunks into embeddings and then store them in the AstraDB. At the time of insertion iam getting error 'dict' object has no attribute 'page_content'. How to resolve it?