问题验证
- 我已经在文档和discord上搜索过答案了。
问题
你好,我想知道为什么rel_props没有被保存到我的图索引持久存储中?这可能导致了我的问题,即当我从持久存储中加载知识图谱后查询索引时,什么都没有返回。以下是我创建和存储持久存储的脚本,但是在我脚本之后,我的持久目录storage_graph中的index_store.json示例被追加了。
脚本:
from flask import Flask, request, jsonify
import os
from llama_index.core import (
VectorStoreIndex,
StorageContext,
Document,
Settings,
PromptTemplate,
KnowledgeGraphIndex
)
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.graph_stores.nebula import NebulaGraphStore
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever
from llama_index.core.retrievers import KnowledgeGraphRAGRetriever
from llama_index.core.schema import QueryBundle
from llama_index.core.base.base_retriever import BaseRetriever
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.schema import TextNode
import base64, logging, json
logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.INFO)
Settings.llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
embed_model = OpenAIEmbedding(model="text-embedding-3-large")
Settings.embed_model = embed_model
Settings.chunk_size = 512
os.environ["NEBULA_USER"] = "root"
os.environ["NEBULA_PASSWORD"] = "nebula"
os.environ["NEBULA_ADDRESS"] = "127.0.0.1:9669"
space_name = "test9"
edge_types, rel_prop_names = ["relationship"], ["relationship"]
tags = ["entity"]
def encode_string(s):
return base64.urlsafe_b64encode(s.encode()).decode()
def decode_string(s):
return base64.urlsafe_b64decode(s.encode()).decode()
def sanitize_and_encode(data):
sanitized_data = {}
for key, value in data.items():
if isinstance(value, str):
sanitized_data[key] = encode_string((value))
else:
sanitized_data[key] = value
return sanitized_data
def decode_metadata(metadata):
decoded_metadata = {}
for key, value in metadata.items():
if isinstance(value, str):
decoded_metadata[key] = decode_string(value)
else:
decoded_metadata[key] = value
return decoded_metadata
def load_json_nodes(json_directory):
nodes = []
for filename in os.listdir(json_directory):
if filename.endswith('.json'):
with open(os.path.join(json_directory, filename), 'r') as file:
data = json.load(file)
for node_data in data:
sanitized_metadata = sanitize_and_encode(node_data['metadata'])
node = TextNode(
text=encode_string((node_data['text'])),
id_=node_data['id_'],
embedding=node_data['embedding'],
metadata=sanitized_metadata
)
nodes.append(node)
logging.debug(f"Loaded node ID: {node.id_}, text: {node_data['text']}, metadata: {node_data['metadata']}")
return nodes
def create_index():
graph_store = NebulaGraphStore(
space_name=space_name,
edge_types=edge_types,
rel_prop_names=rel_prop_names,
tags=tags
)
storage_context = StorageContext.from_defaults(graph_store=graph_store)
json_nodes = load_json_nodes("JSON_nodes_999_large_syll")
documents = [
Document(
text=decode_string(node.text),
id_=node.id_,
metadata=decode_metadata(node.metadata),
embedding=node.embedding
) for node in json_nodes
]
kg_index = KnowledgeGraphIndex.from_documents(
documents,
storage_context=storage_context,
max_triplets_per_chunk=10,
space_name=space_name,
edge_types=edge_types,
rel_prop_names=rel_prop_names,
tags=tags,
max_knowledge_sequence=15,
include_embeddings=True
)
# Set the index_id for KnowledgeGraphIndex
kg_index.set_index_id("kg_index")
kg_index.storage_context.persist(persist_dir='./storage_graph_test10')
logging.debug(f"KG Index created with {len(documents)} documents")
# Create VectorStoreIndex
vector_index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
# Set the index_id for VectorStoreIndex
vector_index.set_index_id("vector_index")
# Persist the storage context
storage_context.persist(persist_dir='./storage_graph_test10')
logging.debug(f"Vector Index created with {len(documents)} documents")
return kg_index, vector_index, storage_context
print("Creating Index...")
kg_index, vector_index, storage_context = create_index()
print("Index Created...")
index_store.json文件:
{"index_store/data": {"kg_index": {"__type__": "kg", "__data__": "{\"index_id\": \"kg_index\", \"summary\": null, \"table\": {\"Mech 3202\": [\"48ae3288-8d75-429f-864d-0ba2054433d9\"], \"Fluid dynamics\": [\"48ae3288-8d75-429f-864d-0ba2054433d9\"], \"York university\": [\"f202ac7e-5343-4def-a94e-976dac45b27d\", \"48ae3288-8d75-429f-864d-0ba2054433d9\"], \"Mech 3202 fluid dynamics\": [\"f202ac7e-5343-4def-a94e-976dac45b27d\"], \"Course_name\": [\"f202ac7e-5343-4def-a94e-976dac45b27d\"], \"University\": [\"f202ac7e-5343-4def-a94e-976dac45b27d\"], \"Text\": [\"f202ac7e-5343-4def-a94e-976dac45b27d\"], \"Content_type\": [\"f202ac7e-5343-4def-a94e-976dac45b27d\"], \"Pse 317\": [\"f202ac7e-5343-4def-a94e-976dac45b27d\"], \"Tutorial location\": [\"f202ac7e-5343-4def-a94e-976dac45b27d\"]}, \"rel_map\": {}, \"embedding_dict\": {\"('Mech 3202', 'Is', 'Fluid dynamics')\": [0. ...
6条答案
按热度按时间lmyy7pcs1#
rel_props
未被保存到持久存储,因为refresh_schema
方法没有被正确调用或执行。此方法负责更新structured_schema
字典,其中包括rel_props
。要解决此问题,请确保在关系或其属性发生更改后调用
refresh_schema
方法。可以通过在修改关系后显式调用self.refresh_schema()
来实现这一点。以下是调用
refresh_schema
的相关代码部分:确保在初始化类时将
refresh_schema
设置为True
,或者在关系发生任何更新后手动调用self.refresh_schema()
。此外,请确保
persist
方法正确实现,以将图形存储到文件中。以下是如何实现persist
方法的示例:此方法确保将图的当前状态(包括
rel_props
)保存到指定的文件中。根据
PropertyGraphStore
类,persist
方法被定义为抽象方法,这意味着它必须在任何子类中实现。请确保您的子类正确实现此方法以处理rel_props
的持久性:通过确保适当调用
refresh_schema
并正确实现persist
方法,您可以解决rel_props
未保存到持久存储 1(https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/graph_stores/llama-index-graph-stores-neo4j/llama_index/graph_stores/neo4j/neo4j_property_graph.py)2(https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/graph_stores/simple_labelled.py)3(https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/graph_stores/types.py) 的问题。u1ehiz5o2#
所以有可能将我的rel_props保存到持久图存储rel_maps中,还是我必须连接到我的nebula图空间来检索这些rel_props?
ibrsph3r3#
是的,可以将您的
rel_props
保存到持久图存储rel_maps
中。NebulaPropertyGraphStore
类提供了插入三元组和管理关系的方法,这意味着您可以直接将您的rel_props
存储在图存储中。以下是
NebulaPropertyGraphStore
类中的相关方法:此方法允许您向图中添加一个三元组(主语、关系、宾语),这意味着您可以直接将您的
rel_props
保存到持久图存储中,而无需从 Nebula 图空间 1(https://github.com/run-llama/llama_index/blob/main/llama-index-legacy/llama_index/legacy/graph_stores/nebulagraph.py)2(https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/graph_stores/llama-index-graph-stores-nebula/llama_index/graph_stores/nebula/nebula_graph_store.py)3(https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/graph_stores/llama-index-graph-stores-nebula/llama_index/graph_stores/nebula/nebula_property_graph.py) 检索它们。anauzrmj4#
How can I add include this to my previous script that creates both vector and graph indices, and saves them to a persistent storage?
@dosu
mec1mxoz5#
so hwo can I update my script to ensure the rel_map is being populated within my index_store.json in my persistent storage?
@dosu
nukf8bse6#
为了确保在持久存储的
index_store.json
中正确填充rel_map
,您需要验证get_rel_map
方法是否正确获取和结构化关系数据。以下是如何更新脚本以包含此功能的方法:确保
get_rel_map
正确实现:验证get_rel_map
方法是否正确获取和结构化关系数据。将
rel_map
持久化到index_store.json
:将rel_map
字典序列化为 JSON 并写入文件。以下是带有这些更改的脚本更新版本:
from flask import Flask, request, jsonify
import os
from llama_index.core import (
VectorStoreIndex,
StorageContext,
Document,
Settings,
PromptTemplate,
KnowledgeGraphIndex
)
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.graph_stores.nebula import NebulaGraphStore
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever
from llama_index.core.retrievers import KnowledgeGraphRAGRetriever
from llama_index.core.schema import QueryBundle
from llama_index.core.base.base_retriever import BaseRetriever
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.schema import TextNode
import base64, logging, json
logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.INFO)
Settings.llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
embed_model = OpenAIEmbedding(model="text-embedding-3-large")
Settings.embed_model = embed_model
Settings.chunk_size = 512
os.environ["NEBULA_USER"] = "root"
os.environ["NEBULA_PASSWORD"] = "nebula"
os.environ["NEBULA_ADDRESS"] = "127.0.0.1:9669"
space_name = "test9"
edge_types, rel_prop_names = ["relationship"], ["relationship"]
tags = ["entity"]
def encode_string(s):
return base64.urlsafe_b64encode(s.encode()).decode()
def decode_string(s):
return base64.urlsafe_b64decode(s.encode()).decode()
def sanitize_and_encode(data):
sanitized_data = {}
for key, value in data.items():
if isinstance(value, str):
sanitized_data[key] = encode_string((value))
else:
sanitized_data[key] = value
return sanitized_data
def decode_metadata(metadata):
decoded_metadata = {}
for key, value in metadata.items():
if isinstance(value, str):
decoded_metadata[key] = decode_string(value)
else:
decoded_metadata[key] = value
return decoded_metadata
def load_json_nodes(json_directory):
nodes = []
for filename in os.listdir(json_directory):
if filename.endswith('.json'):
with open(os.path.join(json_directory, filename), 'r') as file:
data = json.load(file)
for node_data in data:
sanitized_metadata = sanitize_and_encode(node_data['metadata'])
node = TextNode(
text=encode_string((node_data['text']))),
id_=node_data['id_'],
embedding=node_data['embedding'],
metadata=sanitized_metadata
)
nodes.append(node)
logging.debug(f"Loaded node ID: {node.id_}, text: {node_data['text']}, metadata: {node_data['metadata']}")
def save_rel_map_to_json(rel_map, file_path):
with open(file_path, 'w') as json_file:
json.dump(rel_map, json_file, indent=4)
def create_index():
graph_store = NebulaGraphStore(
space_name=space_name,
edge_types=edge_types,
rel_prop_names=rel_prop_names,
tags=tags
)
print("Creating Index...")
kg_index, vector_index, storage_context = create_index()
print("Index Created...")
此脚本确保
rel_map
正确填充并保存到持久存储index_store.json
中的rel_map
。