use llama-index with open source LLM hosted locally

Question

I'm using the llama-index code below to create an index object from a saved text corpus. I'm then loading the saved index object and querying it to produce a response. I'm using an openai apikey so I can use a chatgpt model for the LLM. I'm wondering if I could use the same code or a modified version to use an open source LLM like for example llama-7b-chat that I have downloaded the model weights for on my local machine. does anyone know if that is possible and can you suggest how I would need to update the code below to use an opensource LLM hosted locally?

code:

# creating index from corpus

from config import api_key, old_api_key, personal_api_key

import os

os.environ['OPENAI_API_KEY'] = old_api_key


# Load you data into 'Documents' a custom type by LlamaIndex
# from typing_extensions import Protocol
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader('./data').load_data()


from llama_index import GPTVectorStoreIndex

index = GPTVectorStoreIndex.from_documents(documents)

# save storage context

storage_context_dict=index.storage_context.to_dict()

import json

# Serialize data into file:
json.dump( storage_context_dict, open( "general_attributes_storage_context_dict.json", 'w' ) )

# load saved context

import os

# plus
os.environ['OPENAI_API_KEY'] = old_api_key


# using previously saved index
import json

saved_context=json.load( open( "general_attributes_storage_context_dict.json" ) )

from llama_index import StorageContext, load_index_from_storage

# rebuild storage context

storage_context=StorageContext.from_dict(saved_context)    

stored_index=load_index_from_storage(storage_context)


query_engine = stored_index.as_query_engine()
response = query_engine.query("some question")
print(response)

Dattatray · Accepted Answer · 2023-10-31 09:50:53Z

1

from llama_index import SimpleDirectoryReader, ServiceContext, VectorStoreIndex
from llama_index import StorageContext, load_index_from_storage

llm = ('load local llm')

service_context = ServiceContext.from_defaults(llm=llm)


documents = SimpleDirectoryReader('./data').load_data()
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
index.storage_context.persist("data_index")
context = StorageContext.from_defaults(persist_dir='data_index')

stored_index = load_index_from_storage(context, service_context=service_context)

query_engine = stored_index.as_query_engine()
response = query_engine.query("some question")
print(response)

answered Oct 31, 2023 at 9:50

Dattatray

5734 silver badges9 bronze badges

Sign up to request clarification or add additional context in comments.

2 Comments

user3476463 Over a year ago

thank you for getting back to me with this. In the "llm = ('load local llm')", what should be in the place of 'load local llm'. I tried putting in the name of the model or the path to where I downloaded the weights but just get a "string has no attribute metadata" error.

Dattatray Over a year ago

from langchain import HuggingFacePipeline from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM model_path = 'model_path_from_local_or_huggingface tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained(model_path) text_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer) llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})

Collectives™ on Stack Overflow

use llama-index with open source LLM hosted locally

1 Answer 1

2 Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

2 Comments

Your Answer

Sign up or log in

Post as a guest

Related