초기조건:
- Python
- LangChain 1.0+ 스타일
- RAG 챗봇
- LLM: Azure OpenAI
- 비교 대상
- Azure AI Search + Azure OpenAI
- Qdrant + Azure OpenAI
1) Azure AI Search 아키텍처 샘플
구성:
- 문서 → chunk
- Azure OpenAI Embedding 생성
- Azure AI Search 인덱스 저장
- 질의 시 Azure AI Search에서 retrieval
- retrieved docs를 Azure OpenAI로 답변 생성
설치 패키지
pip install -U langchain langchain-core langchain-openai langchain-community langchain-text-splitters azure-search-documents python-dotenv환경변수 예시
AZURE_OPENAI_API_KEY=...
AZURE_OPENAI_ENDPOINT=https://YOUR-OPENAI.openai.azure.com/
AZURE_OPENAI_API_VERSION=2024-02-01
AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-4o-mini
AZURE_OPENAI_EMBEDDING_DEPLOYMENT=text-embedding-3-large
AZURE_SEARCH_ENDPOINT=https://YOUR-SEARCH.search.windows.net
AZURE_SEARCH_KEY=...
AZURE_SEARCH_INDEX_NAME=rag-index문서 적재 + 인덱싱 예시
import os
from dotenv import load_dotenv
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import AzureOpenAIEmbeddings
from langchain_community.vectorstores.azuresearch import AzureSearch
load_dotenv()
embeddings = AzureOpenAIEmbeddings(
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
)
docs = [
Document(
page_content="Azure AI Search는 BM25, vector search, hybrid search를 지원합니다.",
metadata={"source": "doc1", "category": "azure"},
),
Document(
page_content="Qdrant는 오픈소스 벡터 데이터베이스이며 dense, sparse, hybrid retrieval을 지원합니다.",
metadata={"source": "doc2", "category": "qdrant"},
),
]
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50,
)
split_docs = splitter.split_documents(docs)
vector_store = AzureSearch(
azure_search_endpoint=os.environ["AZURE_SEARCH_ENDPOINT"],
azure_search_key=os.environ["AZURE_SEARCH_KEY"],
index_name=os.environ["AZURE_SEARCH_INDEX_NAME"],
embedding_function=embeddings.embed_query,
)
vector_store.add_documents(split_docs)
print("Azure AI Search indexing complete")Retriever + RAG 체인 예시
LangChain 1.0+ 기준으로는 RetrievalQA보다create_stuff_documents_chain + create_retrieval_chain 조합을 쓰는 형태가 더 적절합니다.
import os
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_core.prompts import ChatPromptTemplate
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
load_dotenv()
embeddings = AzureOpenAIEmbeddings(
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
)
llm = AzureChatOpenAI(
azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
temperature=0,
)
vector_store = AzureSearch(
azure_search_endpoint=os.environ["AZURE_SEARCH_ENDPOINT"],
azure_search_key=os.environ["AZURE_SEARCH_KEY"],
index_name=os.environ["AZURE_SEARCH_INDEX_NAME"],
embedding_function=embeddings.embed_query,
)
retriever = vector_store.as_retriever(
search_type="hybrid",
search_kwargs={"k": 5},
)
prompt = ChatPromptTemplate.from_messages([
(
"system",
"너는 RAG 기반 어시스턴트다. 반드시 검색된 문서 내용에 근거해서만 답해라. "
"근거가 부족하면 '알 수 없다'고 답해라."
),
(
"human",
"질문: {input}\n\n참고 문서:\n{context}"
),
])
document_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, document_chain)
query = "Azure AI Search와 Qdrant의 hybrid search 차이를 설명해줘."
result = rag_chain.invoke({"input": query})
print("ANSWER:\n", result["answer"])
print("\nSOURCES:")
for doc in result["context"]:
print(doc.metadata, doc.page_content[:120])더 단순한 LCEL 스타일 예시
import os
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
load_dotenv()
embeddings = AzureOpenAIEmbeddings(
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
)
llm = AzureChatOpenAI(
azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
temperature=0,
)
vector_store = AzureSearch(
azure_search_endpoint=os.environ["AZURE_SEARCH_ENDPOINT"],
azure_search_key=os.environ["AZURE_SEARCH_KEY"],
index_name=os.environ["AZURE_SEARCH_INDEX_NAME"],
embedding_function=embeddings.embed_query,
)
retriever = vector_store.as_retriever(
search_type="hybrid",
search_kwargs={"k": 5},
)
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
prompt = ChatPromptTemplate.from_template(
"""다음 문맥에 근거해서만 답해라.
근거가 부족하면 "알 수 없다"라고 답해라.
문맥:
{context}
질문:
{question}
"""
)
chain = (
{
"context": retriever | format_docs,
"question": RunnablePassthrough(),
}
| prompt
| llm
| StrOutputParser()
)
answer = chain.invoke("Azure AI Search hybrid search의 장점은?")
print(answer)2) Qdrant 아키텍처 샘플
구성:
- 문서 → chunk
- Azure OpenAI Embedding 생성
- Qdrant collection 저장
- 질의 시 Qdrant retriever 사용
- retrieved docs를 Azure OpenAI로 답변 생성
설치 패키지
pip install -U langchain langchain-core langchain-openai langchain-qdrant qdrant-client langchain-text-splitters python-dotenv환경변수 예시
AZURE_OPENAI_API_KEY=...
AZURE_OPENAI_ENDPOINT=https://YOUR-OPENAI.openai.azure.com/
AZURE_OPENAI_API_VERSION=2024-02-01
AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-4o-mini
AZURE_OPENAI_EMBEDDING_DEPLOYMENT=text-embedding-3-large
QDRANT_URL=http://localhost:6333
QDRANT_API_KEY=
QDRANT_COLLECTION=rag_docs클라우드 Qdrant면:
QDRANT_URL=https://xxxxxx.cloud.qdrant.io
QDRANT_API_KEY=...문서 적재 + 인덱싱 예시
import os
from dotenv import load_dotenv
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import AzureOpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
load_dotenv()
embeddings = AzureOpenAIEmbeddings(
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
)
docs = [
Document(
page_content="Azure AI Search는 BM25와 vector query를 결합한 hybrid search를 지원합니다.",
metadata={"source": "doc1", "category": "azure"},
),
Document(
page_content="Qdrant는 벡터 검색 엔진이며 hybrid search와 reranking 파이프라인 구성이 가능합니다.",
metadata={"source": "doc2", "category": "qdrant"},
),
]
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50,
)
split_docs = splitter.split_documents(docs)
client = QdrantClient(
url=os.environ["QDRANT_URL"],
api_key=os.getenv("QDRANT_API_KEY"),
)
vector_store = QdrantVectorStore.from_documents(
documents=split_docs,
embedding=embeddings,
url=os.environ["QDRANT_URL"],
api_key=os.getenv("QDRANT_API_KEY"),
collection_name=os.environ["QDRANT_COLLECTION"],
)
print("Qdrant indexing complete")Retriever + RAG 체인 예시
import os
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore
from langchain_core.prompts import ChatPromptTemplate
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
load_dotenv()
embeddings = AzureOpenAIEmbeddings(
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
)
llm = AzureChatOpenAI(
azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
temperature=0,
)
client = QdrantClient(
url=os.environ["QDRANT_URL"],
api_key=os.getenv("QDRANT_API_KEY"),
)
vector_store = QdrantVectorStore(
client=client,
collection_name=os.environ["QDRANT_COLLECTION"],
embedding=embeddings,
)
retriever = vector_store.as_retriever(
search_kwargs={"k": 5},
)
prompt = ChatPromptTemplate.from_messages([
(
"system",
"너는 RAG 기반 어시스턴트다. 반드시 검색된 문서 내용에 근거해서만 답해라. "
"근거가 부족하면 '알 수 없다'고 답해라."
),
(
"human",
"질문: {input}\n\n참고 문서:\n{context}"
),
])
document_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, document_chain)
query = "Qdrant 검색 구조의 특징은?"
result = rag_chain.invoke({"input": query})
print("ANSWER:\n", result["answer"])
print("\nSOURCES:")
for doc in result["context"]:
print(doc.metadata, doc.page_content[:120])더 단순한 LCEL 스타일 예시
import os
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
load_dotenv()
embeddings = AzureOpenAIEmbeddings(
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
)
llm = AzureChatOpenAI(
azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
temperature=0,
)
client = QdrantClient(
url=os.environ["QDRANT_URL"],
api_key=os.getenv("QDRANT_API_KEY"),
)
vector_store = QdrantVectorStore(
client=client,
collection_name=os.environ["QDRANT_COLLECTION"],
embedding=embeddings,
)
retriever = vector_store.as_retriever(search_kwargs={"k": 5})
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
prompt = ChatPromptTemplate.from_template(
"""다음 문맥에 근거해서만 답해라.
근거가 부족하면 "알 수 없다"라고 답해라.
문맥:
{context}
질문:
{question}
"""
)
chain = (
{
"context": retriever | format_docs,
"question": RunnablePassthrough(),
}
| prompt
| llm
| StrOutputParser()
)
answer = chain.invoke("Qdrant 사용 구조의 장점은?")
print(answer)3) 두 구조의 차이
Azure AI Search 버전
장점:
- Azure SaaS 관리형
- hybrid retrieval 사용 가능
- Azure 생태계 통합 쉬움
- 운영 단순
단점:
- 세부 검색 커스터마이징 폭은 제한될 수 있음
- Azure 서비스 의존성 큼
Qdrant 버전
장점:
- 벡터 검색 제어 유연
- 고급 retrieval 실험에 유리
- self-host / cloud 선택 가능
단점:
- hybrid, sparse, reranker까지 가면 직접 설계할 게 많음
- 운영 부담이 커질 수 있음
5) 추가 검토사항
실서비스에서는 보통 추가가 필요합니다.
- PDF/HTML loader
- chunk metadata 정규화
- source citation
- conversation memory
- metadata filtering
- reranking
- evaluation
- fallback 처리
- prompt injection 방어
1) 두 구조를 동일 인터페이스로 추상화한 비교용 프로젝트 구조
목표:
- Azure AI Search / Qdrant를 같은 호출 방식으로 비교
- 애플리케이션 레이어는 백엔드 저장소 차이를 모르게 설계
- 나중에 평가 코드에서도 동일 인터페이스 사용 가능
프로젝트 구조 예시
rag_compare/
├─ app/
│ ├─ config.py
│ ├─ models.py
│ ├─ interfaces/
│ │ └─ retriever.py
│ ├─ providers/
│ │ ├─ azure_search_provider.py
│ │ └─ qdrant_provider.py
│ ├─ services/
│ │ └─ rag_service.py
│ └─ main.py
├─ scripts/
│ ├─ ingest_azure.py
│ └─ ingest_qdrant.py
├─ .env
└─ requirements.txtapp/config.py
import os
from dotenv import load_dotenv
load_dotenv()
class Settings:
AZURE_OPENAI_API_KEY = os.environ["AZURE_OPENAI_API_KEY"]
AZURE_OPENAI_ENDPOINT = os.environ["AZURE_OPENAI_ENDPOINT"]
AZURE_OPENAI_API_VERSION = os.environ["AZURE_OPENAI_API_VERSION"]
AZURE_OPENAI_CHAT_DEPLOYMENT = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"]
AZURE_OPENAI_EMBEDDING_DEPLOYMENT = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]
AZURE_SEARCH_ENDPOINT = os.getenv("AZURE_SEARCH_ENDPOINT")
AZURE_SEARCH_KEY = os.getenv("AZURE_SEARCH_KEY")
AZURE_SEARCH_INDEX_NAME = os.getenv("AZURE_SEARCH_INDEX_NAME")
QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
QDRANT_COLLECTION = os.getenv("QDRANT_COLLECTION", "rag_docs")
settings = Settings()app/models.py
from dataclasses import dataclass
from typing import Any
@dataclass
class RetrievedDoc:
content: str
metadata: dict[str, Any]
@dataclass
class RetrievalResult:
answer: str
documents: list[RetrievedDoc]app/interfaces/retriever.py
from abc import ABC, abstractmethod
from app.models import RetrievalResult
class BaseRAGProvider(ABC):
@abstractmethod
def ask(self, question: str) -> RetrievalResult:
passapp/providers/azure_search_provider.py
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_core.prompts import ChatPromptTemplate
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from app.config import settings
from app.interfaces.retriever import BaseRAGProvider
from app.models import RetrievedDoc, RetrievalResult
class AzureSearchRAGProvider(BaseRAGProvider):
def __init__(self):
self.embeddings = AzureOpenAIEmbeddings(
azure_deployment=settings.AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
openai_api_version=settings.AZURE_OPENAI_API_VERSION,
azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
api_key=settings.AZURE_OPENAI_API_KEY,
)
self.llm = AzureChatOpenAI(
azure_deployment=settings.AZURE_OPENAI_CHAT_DEPLOYMENT,
openai_api_version=settings.AZURE_OPENAI_API_VERSION,
azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
api_key=settings.AZURE_OPENAI_API_KEY,
temperature=0,
)
self.vector_store = AzureSearch(
azure_search_endpoint=settings.AZURE_SEARCH_ENDPOINT,
azure_search_key=settings.AZURE_SEARCH_KEY,
index_name=settings.AZURE_SEARCH_INDEX_NAME,
embedding_function=self.embeddings.embed_query,
)
self.retriever = self.vector_store.as_retriever(
search_type="hybrid",
search_kwargs={"k": 5},
)
prompt = ChatPromptTemplate.from_messages([
(
"system",
"너는 RAG 어시스턴트다. 검색 문서에 근거해서만 답해라. "
"근거가 부족하면 '알 수 없다'고 답해라."
),
(
"human",
"질문: {input}\n\n문서:\n{context}"
)
])
document_chain = create_stuff_documents_chain(self.llm, prompt)
self.chain = create_retrieval_chain(self.retriever, document_chain)
def ask(self, question: str) -> RetrievalResult:
result = self.chain.invoke({"input": question})
docs = [
RetrievedDoc(content=doc.page_content, metadata=doc.metadata)
for doc in result["context"]
]
return RetrievalResult(answer=result["answer"], documents=docs)app/providers/qdrant_provider.py
from qdrant_client import QdrantClient
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore
from langchain_core.prompts import ChatPromptTemplate
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from app.config import settings
from app.interfaces.retriever import BaseRAGProvider
from app.models import RetrievedDoc, RetrievalResult
class QdrantRAGProvider(BaseRAGProvider):
def __init__(self):
self.embeddings = AzureOpenAIEmbeddings(
azure_deployment=settings.AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
openai_api_version=settings.AZURE_OPENAI_API_VERSION,
azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
api_key=settings.AZURE_OPENAI_API_KEY,
)
self.llm = AzureChatOpenAI(
azure_deployment=settings.AZURE_OPENAI_CHAT_DEPLOYMENT,
openai_api_version=settings.AZURE_OPENAI_API_VERSION,
azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
api_key=settings.AZURE_OPENAI_API_KEY,
temperature=0,
)
client = QdrantClient(
url=settings.QDRANT_URL,
api_key=settings.QDRANT_API_KEY,
)
self.vector_store = QdrantVectorStore(
client=client,
collection_name=settings.QDRANT_COLLECTION,
embedding=self.embeddings,
)
self.retriever = self.vector_store.as_retriever(
search_kwargs={"k": 5},
)
prompt = ChatPromptTemplate.from_messages([
(
"system",
"너는 RAG 어시스턴트다. 검색 문서에 근거해서만 답해라. "
"근거가 부족하면 '알 수 없다'고 답해라."
),
(
"human",
"질문: {input}\n\n문서:\n{context}"
)
])
document_chain = create_stuff_documents_chain(self.llm, prompt)
self.chain = create_retrieval_chain(self.retriever, document_chain)
def ask(self, question: str) -> RetrievalResult:
result = self.chain.invoke({"input": question})
docs = [
RetrievedDoc(content=doc.page_content, metadata=doc.metadata)
for doc in result["context"]
]
return RetrievalResult(answer=result["answer"], documents=docs)app/services/rag_service.py
from app.interfaces.retriever import BaseRAGProvider
from app.models import RetrievalResult
class RAGService:
def __init__(self, provider: BaseRAGProvider):
self.provider = provider
def ask(self, question: str) -> RetrievalResult:
return self.provider.ask(question)app/main.py
import os
from app.services.rag_service import RAGService
from app.providers.azure_search_provider import AzureSearchRAGProvider
from app.providers.qdrant_provider import QdrantRAGProvider
BACKEND = os.getenv("RAG_BACKEND", "azure") # azure | qdrant
def build_service() -> RAGService:
if BACKEND == "azure":
return RAGService(AzureSearchRAGProvider())
elif BACKEND == "qdrant":
return RAGService(QdrantRAGProvider())
else:
raise ValueError("Unsupported backend")
if __name__ == "__main__":
service = build_service()
result = service.ask("Azure AI Search와 Qdrant의 검색 차이를 설명해줘.")
print("ANSWER:")
print(result.answer)
print("\nDOCS:")
for i, doc in enumerate(result.documents, 1):
print(f"[{i}] {doc.metadata} -> {doc.content[:120]}")장점
이 구조의 핵심은:
ask(question)인터페이스 통일- 나중에 평가 코드에서 provider만 바꿔 비교 가능
- FastAPI 붙이기도 쉬움
- reranker 추가도 provider별로 독립 적용 가능
2) Azure AI Search hybrid + semantic ranker 반영 버전
중요:
LangChain 래퍼에서 Azure AI Search의 semantic ranker 옵션을 완전히 추상화해서 지원하는지는 버전 의존적일 수 있습니다.
그래서 정확하게 제어하려면 Azure SDK(azure-search-documents)를 직접 쓰는 방식이 더 안전합니다.
즉, 여기서는 실무적으로 더 정확한 방식으로 보여드리겠습니다.
설치
pip install -U azure-search-documents langchain-openai python-dotenv환경변수 예시
AZURE_SEARCH_ENDPOINT=https://YOUR-SEARCH.search.windows.net
AZURE_SEARCH_KEY=...
AZURE_SEARCH_INDEX_NAME=rag-index
AZURE_SEARCH_SEMANTIC_CONFIG=my-semantic-config
AZURE_OPENAI_API_KEY=...
AZURE_OPENAI_ENDPOINT=https://YOUR-OPENAI.openai.azure.com/
AZURE_OPENAI_API_VERSION=2024-02-01
AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-4o-mini
AZURE_OPENAI_EMBEDDING_DEPLOYMENT=text-embedding-3-large핵심 코드
import os
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
load_dotenv()
search_client = SearchClient(
endpoint=os.environ["AZURE_SEARCH_ENDPOINT"],
index_name=os.environ["AZURE_SEARCH_INDEX_NAME"],
credential=AzureKeyCredential(os.environ["AZURE_SEARCH_KEY"]),
)
embeddings = AzureOpenAIEmbeddings(
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
)
llm = AzureChatOpenAI(
azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
temperature=0,
)
def azure_hybrid_semantic_search(query: str, k: int = 5):
query_vector = embeddings.embed_query(query)
results = search_client.search(
search_text=query,
vector_queries=[
VectorizedQuery(
vector=query_vector,
k_nearest_neighbors=10,
fields="content_vector"
)
],
query_type="semantic",
semantic_configuration_name=os.environ["AZURE_SEARCH_SEMANTIC_CONFIG"],
top=k,
select=["id", "content", "source", "category"],
)
docs = []
for r in results:
docs.append({
"content": r.get("content", ""),
"source": r.get("source"),
"category": r.get("category"),
})
return docs
def answer_question(question: str):
docs = azure_hybrid_semantic_search(question, k=5)
context = "\n\n".join(doc["content"] for doc in docs)
prompt = ChatPromptTemplate.from_messages([
(
"system",
"너는 검색 결과에 근거해서만 답하는 RAG 어시스턴트다. "
"근거가 부족하면 '알 수 없다'고 답해라."
),
(
"human",
"질문: {question}\n\n문맥:\n{context}"
)
])
chain = prompt | llm
response = chain.invoke({
"question": question,
"context": context,
})
return response.content, docs
if __name__ == "__main__":
answer, docs = answer_question("Azure AI Search hybrid와 semantic ranker의 장점은?")
print("ANSWER:\n", answer)
print("\nDOCS:")
for d in docs:
print(d)핵심 포인트
이 버전은:
search_text=query→ BM25/텍스트 검색vector_queries=[...]→ vector 검색- 둘을 함께 사용 → hybrid
query_type="semantic"+semantic_configuration_name=...
→ semantic ranker 적용
즉, 이게 질문하신 Azure AI Search hybrid + semantic ranker에 가장 가까운 실무형 코드입니다.
주의
이 코드가 동작하려면 Azure Search 인덱스에 보통 아래가 있어야 합니다.
content텍스트 필드content_vector벡터 필드- semantic configuration
- vector search configuration
이 인덱스 정의가 없으면 오류 납니다.
3) Qdrant hybrid search 반영 버전
Qdrant의 hybrid search는 보통 다음 중 하나입니다.
- dense + sparse를 함께 저장
- query 시 두 결과를 fusion
- 필요시 reranker 추가
다만 LangChain의 Qdrant 래퍼만으로 hybrid 세부 제어가 제한될 수 있어,
실무에서는 Qdrant client를 직접 쓰는 방식이 더 명확합니다.
설치
pip install -U qdrant-client langchain-openai fastembed python-dotenv환경변수 예시
QDRANT_URL=http://localhost:6333
QDRANT_API_KEY=
QDRANT_COLLECTION=rag_docs
AZURE_OPENAI_API_KEY=...
AZURE_OPENAI_ENDPOINT=https://YOUR-OPENAI.openai.azure.com/
AZURE_OPENAI_API_VERSION=2024-02-01
AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-4o-mini
AZURE_OPENAI_EMBEDDING_DEPLOYMENT=text-embedding-3-large예시 개념
Qdrant hybrid는 dense + sparse 둘 다 필요합니다.
그런데 sparse 벡터 생성은 별도 모델이 필요합니다.
실무 예시로는:
- dense: Azure OpenAI embeddings
- sparse: BM25 또는 FastEmbed sparse model
하이브리드 검색 예시 코드
import os
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from qdrant_client.models import (
Prefetch,
FusionQuery,
Fusion,
SparseVector,
)
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
load_dotenv()
client = QdrantClient(
url=os.environ["QDRANT_URL"],
api_key=os.getenv("QDRANT_API_KEY"),
)
embeddings = AzureOpenAIEmbeddings(
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
)
llm = AzureChatOpenAI(
azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
temperature=0,
)
def fake_sparse_encoder(text: str):
# 예시용 placeholder
# 실제로는 fastembed sparse model 또는 BM25 기반 sparse 인코더 필요
return SparseVector(indices=[1, 5, 9], values=[0.3, 0.8, 0.4])
def qdrant_hybrid_search(query: str, k: int = 5):
dense_vector = embeddings.embed_query(query)
sparse_vector = fake_sparse_encoder(query)
results = client.query_points(
collection_name=os.environ["QDRANT_COLLECTION"],
prefetch=[
Prefetch(
query=dense_vector,
using="dense",
limit=20,
),
Prefetch(
query=sparse_vector,
using="sparse",
limit=20,
),
],
query=FusionQuery(fusion=Fusion.RRF),
limit=k,
with_payload=True,
)
docs = []
for point in results.points:
payload = point.payload or {}
docs.append({
"content": payload.get("content", ""),
"source": payload.get("source"),
"category": payload.get("category"),
})
return docs
def answer_question(question: str):
docs = qdrant_hybrid_search(question, k=5)
context = "\n\n".join(doc["content"] for doc in docs)
prompt = ChatPromptTemplate.from_messages([
(
"system",
"너는 검색 결과에 근거해서만 답하는 RAG 어시스턴트다. "
"근거가 부족하면 '알 수 없다'고 답해라."
),
(
"human",
"질문: {question}\n\n문맥:\n{context}"
)
])
chain = prompt | llm
response = chain.invoke({
"question": question,
"context": context,
})
return response.content, docs
if __name__ == "__main__":
answer, docs = answer_question("Qdrant hybrid search의 장점은?")
print("ANSWER:\n", answer)
print("\nDOCS:")
for d in docs:
print(d)핵심 포인트
이 버전은:
Prefetch(query=dense_vector, using="dense")Prefetch(query=sparse_vector, using="sparse")FusionQuery(fusion=Fusion.RRF)
즉:
- dense 검색
- sparse 검색
- RRF fusion
으로 Qdrant hybrid search를 구성한 것입니다.
매우 중요한 점
위 코드의 fake_sparse_encoder()는 예시 placeholder입니다.
실제로는 아래 중 하나가 필요합니다.
- FastEmbed sparse model
- BM25 기반 sparse vector 생성기
- SPLADE 계열 sparse encoder
즉, 이 부분을 실제 모델로 바꾸지 않으면 진짜 hybrid 품질 비교는 안 됩니다.
따라서,
아래는 fake_sparse_encoder()를 실제 FastEmbed 기반 sparse encoder로 교체한 버전을 예시로 들겠습니다.
중요 포인트만 먼저 말하면:
- 기존 placeholder:
fake_sparse_encoder(text)
- 변경 후:
- FastEmbed sparse model로 실제 sparse vector 생성
- Qdrant 쪽은 그대로
- dense: Azure OpenAI embedding
- sparse: FastEmbed sparse embedding
- fusion: RRF
설치
pip install -U qdrant-client langchain-openai fastembed python-dotenv환경변수 예시
QDRANT_URL=http://localhost:6333
QDRANT_API_KEY=
QDRANT_COLLECTION=rag_docs
AZURE_OPENAI_API_KEY=...
AZURE_OPENAI_ENDPOINT=https://YOUR-OPENAI.openai.azure.com/
AZURE_OPENAI_API_VERSION=2024-02-01
AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-4o-mini
AZURE_OPENAI_EMBEDDING_DEPLOYMENT=text-embedding-3-large전제
이 코드는 Qdrant 컬렉션에 dense 벡터와 sparse 벡터가 모두 저장되어 있어야 합니다.
즉, collection에는 보통 아래 named vector 구성이 필요합니다.
- dense vector: 예)
"dense" - sparse vector: 예)
"sparse"
이 구성이 없으면 검색 시 오류가 납니다.
1) 실제 FastEmbed sparse encoder 적용 코드
import os
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from qdrant_client.models import (
Prefetch,
FusionQuery,
Fusion,
SparseVector,
)
from fastembed import SparseTextEmbedding
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
load_dotenv()
client = QdrantClient(
url=os.environ["QDRANT_URL"],
api_key=os.getenv("QDRANT_API_KEY"),
)
dense_embeddings = AzureOpenAIEmbeddings(
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
)
llm = AzureChatOpenAI(
azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
temperature=0,
)
# FastEmbed sparse model
# 모델명은 환경/버전에 따라 지원 여부가 다를 수 있음
# 일반적으로 많이 쓰는 계열 예시
sparse_model = SparseTextEmbedding(model_name="Qdrant/bm25")
def fastembed_sparse_encoder(text: str) -> SparseVector:
sparse_result = list(sparse_model.embed([text]))[0]
return SparseVector(
indices=sparse_result.indices.tolist()
if hasattr(sparse_result.indices, "tolist")
else list(sparse_result.indices),
values=sparse_result.values.tolist()
if hasattr(sparse_result.values, "tolist")
else list(sparse_result.values),
)
def qdrant_hybrid_search(query: str, k: int = 5):
dense_vector = dense_embeddings.embed_query(query)
sparse_vector = fastembed_sparse_encoder(query)
results = client.query_points(
collection_name=os.environ["QDRANT_COLLECTION"],
prefetch=[
Prefetch(
query=dense_vector,
using="dense",
limit=20,
),
Prefetch(
query=sparse_vector,
using="sparse",
limit=20,
),
],
query=FusionQuery(fusion=Fusion.RRF),
limit=k,
with_payload=True,
)
docs = []
for point in results.points:
payload = point.payload or {}
docs.append({
"content": payload.get("content", ""),
"source": payload.get("source"),
"category": payload.get("category"),
})
return docs
def answer_question(question: str):
docs = qdrant_hybrid_search(question, k=5)
context = "\n\n".join(doc["content"] for doc in docs)
prompt = ChatPromptTemplate.from_messages([
(
"system",
"너는 검색 결과에 근거해서만 답하는 RAG 어시스턴트다. "
"근거가 부족하면 '알 수 없다'고 답해라."
),
(
"human",
"질문: {question}\n\n문맥:\n{context}"
)
])
chain = prompt | llm
response = chain.invoke({
"question": question,
"context": context,
})
return response.content, docs
if __name__ == "__main__":
answer, docs = answer_question("Qdrant hybrid search의 장점은?")
print("ANSWER:\n", answer)
print("\nDOCS:")
for d in docs:
print(d)2) 핵심 변경점
기존:
def fake_sparse_encoder(text: str):
return SparseVector(indices=[1, 5, 9], values=[0.3, 0.8, 0.4])변경 후:
from fastembed import SparseTextEmbedding
sparse_model = SparseTextEmbedding(model_name="Qdrant/bm25")
def fastembed_sparse_encoder(text: str) -> SparseVector:
sparse_result = list(sparse_model.embed([text]))[0]
return SparseVector(
indices=list(sparse_result.indices),
values=list(sparse_result.values),
)즉 이제는 실제 sparse vector를 사용합니다.
3) 하지만 이것만으로는 충분하지 않음
중요합니다.
질문하신 건 검색 시 encoder 교체지만, 실제 hybrid search가 제대로 되려면 적재 시점에도 sparse vector가 함께 저장되어 있어야 합니다.
즉 다음 둘이 맞아야 합니다.
- 문서 적재 시:
- dense vector 저장
- sparse vector 저장
- 질의 시:
- dense query vector 생성
- sparse query vector 생성
둘 중 하나만 있으면 hybrid가 완성되지 않습니다.
4) Qdrant 적재 코드도 함께 바꿔야 함
아래는 FastEmbed sparse vector까지 같이 저장하는 적재 예시입니다.
import os
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from qdrant_client.models import (
Distance,
VectorParams,
SparseVectorParams,
PointStruct,
)
from fastembed import SparseTextEmbedding
from langchain_openai import AzureOpenAIEmbeddings
load_dotenv()
client = QdrantClient(
url=os.environ["QDRANT_URL"],
api_key=os.getenv("QDRANT_API_KEY"),
)
dense_embeddings = AzureOpenAIEmbeddings(
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
)
sparse_model = SparseTextEmbedding(model_name="Qdrant/bm25")
collection_name = os.environ["QDRANT_COLLECTION"]
docs = [
{
"id": 1,
"content": "Azure AI Search는 BM25와 vector query를 결합한 hybrid search를 지원합니다.",
"source": "doc1",
"category": "azure",
},
{
"id": 2,
"content": "Qdrant는 dense와 sparse retrieval을 함께 사용해 hybrid search를 구성할 수 있습니다.",
"source": "doc2",
"category": "qdrant",
},
]
# text-embedding-3-large 차원은 배포 설정에 따라 확인 필요
# 확실하지 않으면 실제 생성 결과 길이를 출력해서 맞춰야 함
sample_dense = dense_embeddings.embed_query("dimension check")
dense_dim = len(sample_dense)
client.recreate_collection(
collection_name=collection_name,
vectors_config={
"dense": VectorParams(size=dense_dim, distance=Distance.COSINE),
},
sparse_vectors_config={
"sparse": SparseVectorParams(),
},
)
points = []
for doc in docs:
dense_vector = dense_embeddings.embed_query(doc["content"])
sparse_result = list(sparse_model.embed([doc["content"]]))[0]
points.append(
PointStruct(
id=doc["id"],
vector={
"dense": dense_vector,
"sparse": {
"indices": sparse_result.indices.tolist()
if hasattr(sparse_result.indices, "tolist")
else list(sparse_result.indices),
"values": sparse_result.values.tolist()
if hasattr(sparse_result.values, "tolist")
else list(sparse_result.values),
},
},
payload={
"content": doc["content"],
"source": doc["source"],
"category": doc["category"],
},
)
)
client.upsert(
collection_name=collection_name,
points=points,
)
print("Qdrant dense+sparse indexing complete")5) 실무상 주의
5-1. Qdrant/bm25 모델명
이 모델명은 환경이나 fastembed 버전에 따라 다를 수 있습니다.
지원 모델이 다르면 오류가 날 수 있습니다.
즉, 정확한 지원 모델명은 실행 환경에서 확인해야 합니다.
확실하지 않으면 알 수 없다가 맞고, 실제 실행으로 검증해야 합니다.
5-2. recreate_collection
이 코드는 컬렉션을 다시 만듭니다.
기존 데이터가 있으면 날아갈 수 있습니다.
운영 환경에서는 신중해야 합니다.
5-3. dense 차원
Azure OpenAI embedding 차원은 배포/모델 옵션에 따라 달라질 수 있습니다.
그래서 아래처럼 실제 길이를 잡는 방식이 안전합니다.
dense_dim = len(dense_embeddings.embed_query("dimension check"))5-4. hybrid 품질
품질은 아래 영향이 큽니다.
- chunk 크기
- sparse 모델 품질
- dense 모델 품질
- fusion 방식
- reranker 추가 여부
즉, sparse encoder를 붙였다고 바로 최고 성능은 아닙니다.
from fastembed import SparseTextEmbedding
from qdrant_client import QdrantClient
from qdrant_client.models import Prefetch, FusionQuery, Fusion, SparseVector
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from app.config import settings
from app.interfaces.retriever import BaseRAGProvider
from app.models import RetrievedDoc, RetrievalResult
class QdrantHybridRAGProvider(BaseRAGProvider):
def __init__(self):
self.client = QdrantClient(
url=settings.QDRANT_URL,
api_key=settings.QDRANT_API_KEY,
)
self.dense_embeddings = AzureOpenAIEmbeddings(
azure_deployment=settings.AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
openai_api_version=settings.AZURE_OPENAI_API_VERSION,
azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
api_key=settings.AZURE_OPENAI_API_KEY,
)
self.sparse_model = SparseTextEmbedding(
model_name=settings.FASTEMBED_SPARSE_MODEL
)
self.llm = AzureChatOpenAI(
azure_deployment=settings.AZURE_OPENAI_CHAT_DEPLOYMENT,
openai_api_version=settings.AZURE_OPENAI_API_VERSION,
azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
api_key=settings.AZURE_OPENAI_API_KEY,
temperature=0,
)
self.prompt = ChatPromptTemplate.from_messages([
(
"system",
"너는 RAG 기반 어시스턴트다. 반드시 검색된 문서 내용에 근거해서만 답해라. "
"근거가 부족하면 '알 수 없다'고 답해라."
),
(
"human",
"질문: {question}\n\n참고 문서:\n{context}"
),
])
def _to_sparse_vector(self, text: str) -> SparseVector:
sparse_result = list(self.sparse_model.embed([text]))[0]
return SparseVector(
indices=sparse_result.indices.tolist()
if hasattr(sparse_result.indices, "tolist")
else list(sparse_result.indices),
values=sparse_result.values.tolist()
if hasattr(sparse_result.values, "tolist")
else list(sparse_result.values),
)
def _hybrid_search(self, question: str, k: int):
dense_vector = self.dense_embeddings.embed_query(question)
sparse_vector = self._to_sparse_vector(question)
results = self.client.query_points(
collection_name=settings.QDRANT_COLLECTION,
prefetch=[
Prefetch(
query=dense_vector,
using="dense",
limit=max(k * 4, 20),
),
Prefetch(
query=sparse_vector,
using="sparse",
limit=max(k * 4, 20),
),
],
query=FusionQuery(fusion=Fusion.RRF),
limit=k,
with_payload=True,
)
docs = []
for point in results.points:
payload = point.payload or {}
docs.append(
RetrievedDoc(
content=payload.get("content", ""),
metadata={
"source": payload.get("source"),
"category": payload.get("category"),
"id": payload.get("doc_id", point.id),
},
)
)
return docs
def ask(self, question: str) -> RetrievalResult:
docs = self._hybrid_search(question, settings.TOP_K)
context = "\n\n".join(doc.content for doc in docs)
chain = self.prompt | self.llm
response = chain.invoke({
"question": question,
"context": context,
})
return RetrievalResult(
answer=response.content,
documents=docs,
)
댓글
댓글 쓰기