0

I'm creating a chatbot RAG using the content from my MongoDB and sending to create vectors on Pinecone.

So my user can ask stuff to my chatbot about his reflections.

Here is my code:

import os
from pymongo import MongoClient
from pinecone import Pinecone, ServerlessSpec
from pymongo.errors import OperationFailure
from sentence_transformers import SentenceTransformer, util
from certifi import where  # Import certifi library

# mongodb stuff
client = MongoClient(
    "my-mongodb-uri",
    tls=True,  # Enable TLS encryption
    tlsAllowInvalidCertificates=False,  # Don't allow invalid certificates
    tlsCAFile=where()  # Use certifi library for CA bundle
)
db = client['test']
collection = db['reflections']

# Pinecone initialization
pc = Pinecone(api_key='my-api-key')
index = pc.Index("langchain-demo")

# transformer stuff
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Watch for changes
try:
  cursor = collection.watch()
  for change in cursor:
    print("Change detected:", change)
    if change['operationType'] == 'insert':
      document = change['fullDocument']
      vector = model.encode(document['content'])  # Assuming 'content' is the field
      print("Extracted Vector:", vector)

      # Extract document ID from ObjectId
      document_id = str(document['_id']).split("'")[1]

      # Wrap upsert call with empty vector check
      if vector:  # Check if vector is not empty
        index.upsert(vectors={document_id: vector})

    elif change['operationType'] == 'update':
      document_id = str(change['documentKey']['_id'])
      updated_fields = change['updateDescription']['updatedFields']
      if 'content' in updated_fields:
        vector = model.encode(updated_fields['content'])
        index.upsert(vectors=[document_id], data=vector.tolist())

    elif change['operationType'] == 'delete':
      document_id = str(change['documentKey']['_id'])
      index.delete(ids=[document_id])

except OperationFailure as e:
  print("Error watching collection:", e)
except Exception as e:
  print("An error occurred:", e)

This is the log I received on my terminal:

Change detected: {'_id': {'_data': '82665622AE0000000B2B042C0100296E5A1004F1A0DC5D2C0C4EC2843048538C6B36F3463C6F7065726174696F6E54797065003C696E736572740046646F63756D656E744B65790046645F69640064665622AE13B3C25B81FE46C1000004'}, 'operationType': 'insert', 'clusterTime': Timestamp(1716921006, 11), 'wallTime': datetime.datetime(2024, 5, 28, 18, 30, 6, 916000), 'fullDocument': {'_id': ObjectId('665622ae13b3c25b81fe46c1'), 'user': ObjectId('65d8937f6408bf2c0ca8d264'), 'content': 'teste mongodb', 'createdAt': datetime.datetime(2024, 5, 28, 18, 30, 6, 908000), '__v': 0}, 'ns': {'db': 'test', 'coll': 'reflections'}, 'documentKey': {'_id': ObjectId('665622ae13b3c25b81fe46c1')}}
Extracted Vector: [ 2.64226589e-02  5.51917292e-02 -8.01229179e-02  6.16759956e-02
  2.97571346e-03 -5.25409095e-02 -2.06136722e-02  2.41196547e-02
  1.70215759e-02  4.23866622e-02  6.73603592e-03 -5.09259291e-02
 -1.48372846e-02  7.09723681e-03 -1.48236733e-02 -1.65749993e-02
  9.42820311e-03 -3.47889923e-02  4.76156734e-02 -1.14416014e-02
 -2.76810937e-02 -7.33586177e-02  2.79922988e-02  4.48221937e-02
 -3.42520475e-02 -7.56083280e-02 -1.88546516e-02  3.71571630e-02
 -3.63041870e-02 -5.30020148e-02  4.92156222e-02  3.24101970e-02
  1.43917967e-02  2.31850450e-03 -3.07541038e-03  1.03986263e-02
  6.79664016e-02 -5.86303510e-02 -1.68009251e-02 -3.78069915e-02
  2.32911427e-02 -4.27663438e-02 -2.12721266e-02 -5.84340282e-02
  1.03256971e-01 -7.78031126e-02 -4.44727167e-02  1.10542767e-01
  6.30531460e-02 -3.19500417e-02  2.60527879e-02 -1.16486132e-01
 -5.51996529e-02  4.62782234e-02  3.89385074e-02  1.58163980e-01
 -8.12400039e-03 -3.00704502e-02 -3.35364193e-02  3.37796435e-02
  5.67190908e-02 -3.78245488e-02 -3.72845195e-02  3.34226415e-02
 -2.56197937e-02 -1.38711361e-02  3.36623588e-03  3.23332138e-02
 -4.64090845e-03 -2.81529520e-02  7.84241222e-03  1.87840331e-02
 -4.04786393e-02 -9.18242242e-03  1.42984195e-02  9.59344432e-02
 -8.56031012e-03 -9.00166705e-02  6.34619594e-02  3.46942805e-02
 -1.21315375e-01 -1.27947167e-01  2.92107705e-02 -5.98839074e-02
 -6.66733552e-03  2.20386945e-02  1.06475495e-01 -5.25924191e-02
 -4.81234193e-02 -6.64262474e-03  2.43848264e-02  1.28781358e-02
 -4.63195667e-02  7.55516142e-02  1.91857126e-02  5.11478595e-02
  7.73477629e-02  5.94875030e-02  7.93703869e-02  2.19271239e-02
 -5.33815532e-04  1.04968296e-02  7.78110474e-02 -3.95663939e-02
 -7.16580264e-03  3.37898545e-02  2.74467710e-02 -8.29642192e-02
 -4.68915589e-02 -2.53224969e-02 -1.62706897e-02  2.37261020e-02
 -3.05816010e-02  6.37660455e-03 -6.75126612e-02 -4.52077389e-03
 -4.86059487e-02 -5.44997081e-02 -1.06597044e-01  9.05475393e-02
  5.58611341e-02  7.52945840e-02 -3.28133292e-02 -2.91952137e-02
  3.31597738e-02  3.51161021e-03  8.75394344e-02  1.03704995e-33
  1.38022542e-01 -9.83591303e-02  4.43550274e-02  1.05274946e-03
  2.88495906e-02  7.61957541e-02 -2.07854919e-02  7.20968395e-02
 -8.30703005e-02  1.15298852e-03 -3.55196968e-02  1.29330147e-03
  2.64357477e-02 -5.18404879e-02  6.31415769e-02  3.08009889e-02
  3.76578197e-02  3.31700668e-02  1.30407363e-02  2.17529833e-02
  2.64088046e-02 -2.77639963e-02 -5.22936359e-02 -1.95870139e-02
  6.81351684e-03  6.55588508e-02 -3.70829068e-02 -2.03726869e-02
 -1.98107120e-02  1.93433892e-02 -6.25248849e-02 -4.19677747e-03
 -8.86835158e-02  9.57719833e-02  9.21144336e-03  2.34254729e-02
 -4.18317653e-02 -1.78317651e-02 -9.96567160e-02  2.77951220e-03
  5.78196160e-02  2.66690087e-02  6.71238592e-03 -1.26469489e-02
 -5.32274581e-02  4.53201607e-02  4.15935442e-02 -7.02985674e-02
  8.65548104e-02  1.93077344e-02 -8.29852968e-02  1.86279765e-03
 -9.70464796e-02  3.69346216e-02  4.38100286e-02  1.50465965e-02
  1.20123737e-02 -1.99827086e-02  4.49663401e-02 -2.27664579e-02
 -5.99026829e-02  2.14360859e-02  5.63477119e-03  7.70357698e-02
  4.07660700e-04 -1.44859506e-02 -6.10246100e-02 -5.85204959e-02
  1.64570604e-02  6.53662756e-02  3.03732231e-02  3.93993221e-02
 -2.78256908e-02  1.81106180e-02 -9.54285823e-03 -4.35498394e-02
  1.26534468e-02  1.56740248e-02 -8.21447670e-02  6.25986466e-03
  6.70449436e-02 -8.75824168e-02 -8.16964507e-02  1.55098401e-02
  7.45111937e-03  1.05148785e-01 -7.09625939e-03  2.56238016e-03
  2.65282597e-02 -1.08919352e-01  3.68081091e-04  1.03041202e-01
 -1.69032291e-02 -9.65850055e-02  3.27670053e-02 -1.52392722e-33
  2.88561676e-02 -6.08335771e-02  2.32155789e-02  4.65114824e-02
  1.07367739e-01 -3.87591906e-02  3.08643673e-02  7.41644343e-03
 -5.42402901e-02 -1.43773090e-02  3.89164947e-02 -1.10371888e-01
  2.37809680e-03 -2.96618696e-02 -5.97673617e-02 -3.35118175e-02
 -5.04749045e-02 -1.19375162e-01 -8.40588752e-03 -8.33129417e-03
 -1.01422250e-01  1.81846786e-02  4.10847627e-02 -2.07867264e-03
 -1.45480633e-02 -9.40343514e-02 -3.80858555e-02 -9.28523913e-02
 -3.49474549e-02  3.57780121e-02  2.82644555e-02  5.27115576e-02
 -4.71878871e-02  7.05714822e-02  2.55910270e-02  9.02293995e-03
  8.85344148e-02  3.68806347e-02  7.09631816e-02  4.70345989e-02
 -1.22014368e-02  9.92123038e-02 -5.31965233e-02 -5.14485613e-02
  6.69255704e-02  4.21657562e-02  1.32231619e-02 -7.31633278e-03
  2.26458535e-02 -2.64296532e-02 -3.49785648e-02 -2.58285161e-02
  5.24073280e-02 -1.41270570e-02  3.76646109e-02 -5.85196391e-02
 -2.59447079e-02 -4.46911417e-02  5.75564057e-02  3.45758721e-02
  1.68277156e-02  3.87044102e-02 -1.67042874e-02  6.53192848e-02
 -1.53256878e-02 -3.99874747e-02 -1.04426391e-01  2.89602540e-02
  1.76746026e-02  6.27156952e-03 -4.18228246e-02 -1.63344350e-02
 -1.45597830e-02  7.30229691e-02  4.04479764e-02 -6.02601655e-02
 -4.42335121e-02 -1.17401704e-02  5.29759973e-02  1.76030397e-02
  1.29814809e-02 -1.15860929e-03  3.80812511e-02  2.16609016e-02
  1.23684702e-03 -7.47688487e-02  3.23086232e-02  1.71934050e-02
 -1.07854068e-01  4.13478501e-02 -1.69676244e-02  5.14116921e-02
 -6.50631189e-02  2.90679317e-02  2.16390658e-02 -1.40003591e-08
 -5.47276549e-02 -2.21079458e-02  1.12641910e-02  6.77396730e-02
  2.63435580e-02 -2.30627018e-03  4.84103598e-02  1.90388169e-02
  6.29420951e-02  4.62095030e-02 -2.75534745e-02  1.38814524e-02
 -1.55894198e-02  3.66799012e-02 -2.41456479e-02  8.84115696e-04
  3.62182893e-02 -5.34663617e-04  2.87991520e-02  7.80000463e-02
  6.44254833e-02 -1.21932197e-02  2.01403350e-02 -7.63562024e-02
  1.93959419e-02  6.84652850e-02  7.04346001e-02  8.58995169e-02
 -5.04256077e-02 -3.08988057e-02  1.17744971e-02  2.72314884e-02
  6.22073896e-02 -3.06474343e-02  1.02516115e-01  6.61610290e-02
  1.60890911e-02  7.22552836e-02 -5.08080684e-02  6.51256591e-02
 -3.40761431e-02 -1.58857908e-02  4.98002209e-02 -5.82708716e-02
 -3.21344063e-02 -1.43419847e-01  3.67835648e-02  4.03264500e-02
  4.75163683e-02 -1.04223825e-01  1.91467311e-02 -5.59284166e-02
  5.88361137e-02 -3.11761834e-02  4.66121845e-02  5.89613020e-02
  5.65763302e-02 -5.29688671e-02 -7.20504746e-02 -1.39309671e-02
  8.39550421e-02 -7.33920708e-02 -1.97879802e-02 -9.86750890e-03]

The problem is I'm receiving this error too: list index out of range I searched on internet, try to use Gemini and OpenAI to help me and search on StackOverFlow.

Didn't find yet someone with this particularly error in this context.

2 Answers 2

0

The error is from the doc_id split line.

Your fullDocument dict looks like this:

{
    "_id": ObjectId("665622ae13b3c25b81fe46c1"),
    "user": ObjectId("65d8937f6408bf2c0ca8d264"),
    "content": "teste mongodb",
    "createdAt": datetime.datetime(2024, 5, 28, 18, 30, 6, 908000),
    "__v": 0,
}

So when you do

document = change['fullDocument']
...
document_id = str(document['_id']).split("'")[1]

There's no ' in the _id. So str(document['_id']).split("'") returns a list with only one element, the string ObjectId. Since there is no element at index 1, it raises IndexError.

Also, why are you splitting _id on ' ?

It should probably be just:

document_id = str(document['_id'])

You have two other lines where this is done correctly:

document_id = str(change['documentKey']['_id'])
Sign up to request clarification or add additional context in comments.

1 Comment

To be honest I don't know why i'm splitting like that. I'm testing different ideas I found maybe worth looking online. Thx for your help. I test the Steven Herren solution and works.
0
import os
from pymongo import MongoClient
import pinecone
from pymongo.errors import OperationFailure
from sentence_transformers import SentenceTransformer, util
from certifi import where  # Import certifi library

# MongoDB setup
client = MongoClient(
    "my-mongodb-uri",
    tls=True,  # Enable TLS encryption
    tlsAllowInvalidCertificates=False,  # Don't allow invalid certificates
    tlsCAFile=where()  # Use certifi library for CA bundle
)
db = client['test']
collection = db['reflections']

# Pinecone initialization
pinecone.init(api_key='my-api-key', environment='us-west1-gcp')  # Replace with your Pinecone environment
index_name = 'langchain-demo'
if index_name not in pinecone.list_indexes():
    pinecone.create_index(index_name, dimension=384)  # Adjust dimension according to your embedding model
index = pinecone.Index(index_name)

# Sentence Transformer model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Watch for changes
try:
    cursor = collection.watch()
    for change in cursor:
        print("Change detected:", change)
        if change['operationType'] == 'insert':
            document = change['fullDocument']
            vector = model.encode(document['content']).tolist()  # Assuming 'content' is the field
            print("Extracted Vector:", vector)

            document_id = str(document['_id'])

            # Upsert to Pinecone
            if vector:  # Check if vector is not empty
                index.upsert([(document_id, vector)])

        elif change['operationType'] == 'update':
            document_id = str(change['documentKey']['_id'])
            updated_fields = change['updateDescription']['updatedFields']
            if 'content' in updated_fields:
                vector = model.encode(updated_fields['content']).tolist()
                index.upsert([(document_id, vector)])

        elif change['operationType'] == 'delete':
            document_id = str(change['documentKey']['_id'])
            index.delete(ids=[document_id])

except OperationFailure as e:
    print("Error watching collection:", e)
except Exception as e:
    print("An error occurred:", e)

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.