问题1:
有没有办法将Mac与M1 CPU和llama_index
一起使用?
我不能通过下面的Assert:
AssertionError Traceback (most recent call last)
<ipython-input-1-f2d62b66882b> in <module>
6 from transformers import pipeline
7
----> 8 class customLLM(LLM):
9 model_name = "google/flan-t5-large"
10 pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})
<ipython-input-1-f2d62b66882b> in customLLM()
8 class customLLM(LLM):
9 model_name = "google/flan-t5-large"
---> 10 pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})
11
12 def _call(self, prompt, stop=None):
~/Library/Python/3.9/lib/python/site-packages/transformers/pipelines/__init__.py in pipeline(task, model, config, tokenizer, feature_extractor, framework, revision, use_fast, use_auth_token, device, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)
868 kwargs["device"] = device
869
--> 870 return pipeline_class(model=model, framework=framework, task=task, **kwargs)
~/Library/Python/3.9/lib/python/site-packages/transformers/pipelines/text2text_generation.py in __init__(self, *args, **kwargs)
63
64 def __init__(self, *args, **kwargs):
---> 65 super().__init__(*args, **kwargs)
66
67 self.check_model_type(
~/Library/Python/3.9/lib/python/site-packages/transformers/pipelines/base.py in __init__(self, model, tokenizer, feature_extractor, modelcard, framework, task, args_parser, device, binary_output, **kwargs)
776 # Special handling
777 if self.framework == "pt" and self.device.type != "cpu":
--> 778 self.model = self.model.to(self.device)
779
780 # Update config with task specific parameters
~/Library/Python/3.9/lib/python/site-packages/transformers/modeling_utils.py in to(self, *args, **kwargs)
1680 )
1681 else:
-> 1682 return super().to(*args, **kwargs)
1683
1684 def half(self, *args):
~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in to(self, *args, **kwargs)
1143 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
1144
-> 1145 return self._apply(convert)
1146
1147 def register_full_backward_pre_hook(
~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in _apply(self, fn)
795 def _apply(self, fn):
796 for module in self.children():
--> 797 module._apply(fn)
798
799 def compute_should_use_set_data(tensor, tensor_applied):
~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in _apply(self, fn)
818 # `with torch.no_grad():`
819 with torch.no_grad():
--> 820 param_applied = fn(param)
821 should_use_set_data = compute_should_use_set_data(param, param_applied)
822 if should_use_set_data:
~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in convert(t)
1141 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None,
1142 non_blocking, memory_format=convert_to_format)
-> 1143 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
1144
1145 return self._apply(convert)
~/Library/Python/3.9/lib/python/site-packages/torch/cuda/__init__.py in _lazy_init()
237 "multiprocessing, you must use the 'spawn' start method")
238 if not hasattr(torch._C, '_cuda_getDeviceCount'):
--> 239 raise AssertionError("Torch not compiled with CUDA enabled")
240 if _cudart is None:
241 raise AssertionError(
AssertionError: Torch not compiled with CUDA enabled
显然我没有Nvidia卡,但我已经读到Pytorch现在也支持Mac M1
我尝试运行下面的例子:
from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex,GPTSimpleVectorIndex, PromptHelper
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LLMPredictor, ServiceContext
import torch
from langchain.llms.base import LLM
from transformers import pipeline
class customLLM(LLM):
model_name = "google/flan-t5-large"
pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})
def _call(self, prompt, stop=None):
return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
def _identifying_params(self):
return {"name_of_model": self.model_name}
def _llm_type(self):
return "custom"
llm_predictor = LLMPredictor(llm=customLLM())
问题2:
假设上述问题的答案是否定的-我不介意使用带有GPU的Google Colab,但是一旦索引完成,是否可以下载并在我的Mac上使用?
例如:
Google Colab:
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
index.save_to_disk('index.json')
...稍后在我的Mac上使用load_from_file
1条答案
按热度按时间9gm1akwq1#
为什么要传递
device=0
?如果是isinstance(device, int)
,PyTorch会认为device
是CUDA设备的索引,因此会出现错误。尝试device="cpu"
(或者简单地删除device
kwarg),这个问题应该会消失。