pytorch 使用llama_index与mac m1

dfty9e19  于 2023-04-12  发布在  Mac
关注(0)|答案(1)|浏览(254)

问题1:

有没有办法将Mac与M1 CPU和llama_index一起使用?
我不能通过下面的Assert:

AssertionError                            Traceback (most recent call last)
<ipython-input-1-f2d62b66882b> in <module>
      6 from transformers import pipeline
      7 
----> 8 class customLLM(LLM):
      9     model_name = "google/flan-t5-large"
     10     pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})

<ipython-input-1-f2d62b66882b> in customLLM()
      8 class customLLM(LLM):
      9     model_name = "google/flan-t5-large"
---> 10     pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})
     11 
     12     def _call(self, prompt, stop=None):

~/Library/Python/3.9/lib/python/site-packages/transformers/pipelines/__init__.py in pipeline(task, model, config, tokenizer, feature_extractor, framework, revision, use_fast, use_auth_token, device, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)
    868         kwargs["device"] = device
    869 
--> 870     return pipeline_class(model=model, framework=framework, task=task, **kwargs)

~/Library/Python/3.9/lib/python/site-packages/transformers/pipelines/text2text_generation.py in __init__(self, *args, **kwargs)
     63 
     64     def __init__(self, *args, **kwargs):
---> 65         super().__init__(*args, **kwargs)
     66 
     67         self.check_model_type(

~/Library/Python/3.9/lib/python/site-packages/transformers/pipelines/base.py in __init__(self, model, tokenizer, feature_extractor, modelcard, framework, task, args_parser, device, binary_output, **kwargs)
    776         # Special handling
    777         if self.framework == "pt" and self.device.type != "cpu":
--> 778             self.model = self.model.to(self.device)
    779 
    780         # Update config with task specific parameters

~/Library/Python/3.9/lib/python/site-packages/transformers/modeling_utils.py in to(self, *args, **kwargs)
   1680             )
   1681         else:
-> 1682             return super().to(*args, **kwargs)
   1683 
   1684     def half(self, *args):

~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in to(self, *args, **kwargs)
   1143             return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
   1144 
-> 1145         return self._apply(convert)
   1146 
   1147     def register_full_backward_pre_hook(

~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in _apply(self, fn)
    795     def _apply(self, fn):
    796         for module in self.children():
--> 797             module._apply(fn)
    798 
    799         def compute_should_use_set_data(tensor, tensor_applied):

~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in _apply(self, fn)
    818             # `with torch.no_grad():`
    819             with torch.no_grad():
--> 820                 param_applied = fn(param)
    821             should_use_set_data = compute_should_use_set_data(param, param_applied)
    822             if should_use_set_data:

~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in convert(t)
   1141                 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None,
   1142                             non_blocking, memory_format=convert_to_format)
-> 1143             return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
   1144 
   1145         return self._apply(convert)

~/Library/Python/3.9/lib/python/site-packages/torch/cuda/__init__.py in _lazy_init()
    237                 "multiprocessing, you must use the 'spawn' start method")
    238         if not hasattr(torch._C, '_cuda_getDeviceCount'):
--> 239             raise AssertionError("Torch not compiled with CUDA enabled")
    240         if _cudart is None:
    241             raise AssertionError(

AssertionError: Torch not compiled with CUDA enabled

显然我没有Nvidia卡,但我已经读到Pytorch现在也支持Mac M1
我尝试运行下面的例子:

from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex,GPTSimpleVectorIndex, PromptHelper
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LLMPredictor, ServiceContext
import torch
from langchain.llms.base import LLM
from transformers import pipeline

class customLLM(LLM):
    model_name = "google/flan-t5-large"
    pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})

    def _call(self, prompt, stop=None):
        return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
 
    def _identifying_params(self):
        return {"name_of_model": self.model_name}

    def _llm_type(self):
        return "custom"

llm_predictor = LLMPredictor(llm=customLLM())

问题2:

假设上述问题的答案是否定的-我不介意使用带有GPU的Google Colab,但是一旦索引完成,是否可以下载并在我的Mac上使用?
例如:
Google Colab:

service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
index.save_to_disk('index.json')

...稍后在我的Mac上使用load_from_file

9gm1akwq

9gm1akwq1#

为什么要传递device=0?如果是isinstance(device, int),PyTorch会认为device是CUDA设备的索引,因此会出现错误。尝试device="cpu"(或者简单地删除device kwarg),这个问题应该会消失。

相关问题