Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

How to use multiple GPUs for inference? #12

Open
waltonfuture opened this issue Aug 23, 2024 · 9 comments
Open

How to use multiple GPUs for inference? #12

waltonfuture opened this issue Aug 23, 2024 · 9 comments

Comments

@waltonfuture
Copy link

I want to use multiple GPUs for inference, and I use device_map='auto' to load the model. However, I always met that problem: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!

Can you help me with that? Thanks a lot!

@runninglsy
Copy link
Collaborator

Below is an example of running AIDC-AI/Ovis1.5-Llama3-8B on two GPUs:

import torch
from PIL import Image
from transformers import AutoModelForCausalLM

device_map = {
    "visual_tokenizer": 0,
    "vte": 0,
    "llm.model.embed_tokens": 0,
    "llm.model.norm": 0,
    "llm.lm_head": 0,
    "llm.model.layers.0": 0,
    "llm.model.layers.1": 0,
    "llm.model.layers.2": 0,
    "llm.model.layers.3": 0,
    "llm.model.layers.4": 0,
    "llm.model.layers.5": 0,
    "llm.model.layers.6": 0,
    "llm.model.layers.7": 0,
    "llm.model.layers.8": 0,
    "llm.model.layers.9": 0,
    "llm.model.layers.10": 0,
    "llm.model.layers.11": 0,
    "llm.model.layers.12": 0,
    "llm.model.layers.13": 0,
    "llm.model.layers.14": 1,
    "llm.model.layers.15": 1,
    "llm.model.layers.16": 1,
    "llm.model.layers.17": 1,
    "llm.model.layers.18": 1,
    "llm.model.layers.19": 1,
    "llm.model.layers.20": 1,
    "llm.model.layers.21": 1,
    "llm.model.layers.22": 1,
    "llm.model.layers.23": 1,
    "llm.model.layers.24": 1,
    "llm.model.layers.25": 1,
    "llm.model.layers.26": 1,
    "llm.model.layers.27": 1,
    "llm.model.layers.28": 1,
    "llm.model.layers.29": 1,
    "llm.model.layers.30": 1,
    "llm.model.layers.31": 1
}

# load model
model = AutoModelForCausalLM.from_pretrained("AIDC-AI/Ovis1.5-Llama3-8B",
                                             torch_dtype=torch.bfloat16,
                                             multimodal_max_length=8192,
					     device_map=device_map,
                                             trust_remote_code=True)
text_tokenizer = model.get_text_tokenizer()
visual_tokenizer = model.get_visual_tokenizer()
conversation_formatter = model.get_conversation_formatter()

# enter image path and prompt
image_path = input("Enter image path: ")
image = Image.open(image_path)
text = input("Enter prompt: ")
query = f'<image>\n{text}'
prompt, input_ids = conversation_formatter.format_query(query)
input_ids = torch.unsqueeze(input_ids, dim=0).cuda()
attention_mask = torch.ne(input_ids, text_tokenizer.pad_token_id).cuda()
pixel_values = [visual_tokenizer.preprocess_image(image).to(
    dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)]

# generate output
with torch.inference_mode():
    gen_kwargs = dict(
        max_new_tokens=1024,
        do_sample=False,
        top_p=None,
        top_k=None,
        temperature=None,
        repetition_penalty=None,
        eos_token_id=model.generation_config.eos_token_id,
        pad_token_id=text_tokenizer.pad_token_id,
        use_cache=True
    )
    output_ids = model.generate(input_ids, pixel_values=pixel_values, attention_mask=attention_mask, **gen_kwargs)[0]
    output = text_tokenizer.decode(output_ids, skip_special_tokens=True)
    print(f'Output: {output}')

@cyj95
Copy link

cyj95 commented Sep 11, 2024

Below is an example of running AIDC-AI/Ovis1.5-Llama3-8B on two GPUs:

import torch
from PIL import Image
from transformers import AutoModelForCausalLM

device_map = {
    "visual_tokenizer": 0,
    "vte": 0,
    "llm.model.embed_tokens": 0,
    "llm.model.norm": 0,
    "llm.lm_head": 0,
    "llm.model.layers.0": 0,
    "llm.model.layers.1": 0,
    "llm.model.layers.2": 0,
    "llm.model.layers.3": 0,
    "llm.model.layers.4": 0,
    "llm.model.layers.5": 0,
    "llm.model.layers.6": 0,
    "llm.model.layers.7": 0,
    "llm.model.layers.8": 0,
    "llm.model.layers.9": 0,
    "llm.model.layers.10": 0,
    "llm.model.layers.11": 0,
    "llm.model.layers.12": 0,
    "llm.model.layers.13": 0,
    "llm.model.layers.14": 1,
    "llm.model.layers.15": 1,
    "llm.model.layers.16": 1,
    "llm.model.layers.17": 1,
    "llm.model.layers.18": 1,
    "llm.model.layers.19": 1,
    "llm.model.layers.20": 1,
    "llm.model.layers.21": 1,
    "llm.model.layers.22": 1,
    "llm.model.layers.23": 1,
    "llm.model.layers.24": 1,
    "llm.model.layers.25": 1,
    "llm.model.layers.26": 1,
    "llm.model.layers.27": 1,
    "llm.model.layers.28": 1,
    "llm.model.layers.29": 1,
    "llm.model.layers.30": 1,
    "llm.model.layers.31": 1
}

# load model
model = AutoModelForCausalLM.from_pretrained("AIDC-AI/Ovis1.5-Llama3-8B",
                                             torch_dtype=torch.bfloat16,
                                             multimodal_max_length=8192,
					     device_map=device_map,
                                             trust_remote_code=True)
text_tokenizer = model.get_text_tokenizer()
visual_tokenizer = model.get_visual_tokenizer()
conversation_formatter = model.get_conversation_formatter()

# enter image path and prompt
image_path = input("Enter image path: ")
image = Image.open(image_path)
text = input("Enter prompt: ")
query = f'<image>\n{text}'
prompt, input_ids = conversation_formatter.format_query(query)
input_ids = torch.unsqueeze(input_ids, dim=0).cuda()
attention_mask = torch.ne(input_ids, text_tokenizer.pad_token_id).cuda()
pixel_values = [visual_tokenizer.preprocess_image(image).to(
    dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)]

# generate output
with torch.inference_mode():
    gen_kwargs = dict(
        max_new_tokens=1024,
        do_sample=False,
        top_p=None,
        top_k=None,
        temperature=None,
        repetition_penalty=None,
        eos_token_id=model.generation_config.eos_token_id,
        pad_token_id=text_tokenizer.pad_token_id,
        use_cache=True
    )
    output_ids = model.generate(input_ids, pixel_values=pixel_values, attention_mask=attention_mask, **gen_kwargs)[0]
    output = text_tokenizer.decode(output_ids, skip_special_tokens=True)
    print(f'Output: {output}')

what's the script for Ovis1.5-Gemma2-9B

@AntyRia
Copy link

AntyRia commented Sep 20, 2024

Did you solve it

@runninglsy
Copy link
Collaborator

Below is an example of running AIDC-AI/Ovis1.5-Gemma2-9B on two GPUs:

import torch
from PIL import Image
from transformers import AutoModelForCausalLM

device_map = {
    "visual_tokenizer": 0,
    "vte": 0,
    "llm.model.embed_tokens": 0,
    "llm.model.norm": 0,
    "llm.lm_head": 0,
    "llm.model.layers.0": 0,
    "llm.model.layers.1": 0,
    "llm.model.layers.2": 0,
    "llm.model.layers.3": 0,
    "llm.model.layers.4": 0,
    "llm.model.layers.5": 0,
    "llm.model.layers.6": 0,
    "llm.model.layers.7": 0,
    "llm.model.layers.8": 0,
    "llm.model.layers.9": 0,
    "llm.model.layers.10": 0,
    "llm.model.layers.11": 0,
    "llm.model.layers.12": 0,
    "llm.model.layers.13": 0,
    "llm.model.layers.14": 0,
    "llm.model.layers.15": 0,
    "llm.model.layers.16": 0,
    "llm.model.layers.17": 0,
    "llm.model.layers.18": 0,
    "llm.model.layers.19": 0,
    "llm.model.layers.20": 1,
    "llm.model.layers.21": 1,
    "llm.model.layers.22": 1,
    "llm.model.layers.23": 1,
    "llm.model.layers.24": 1,
    "llm.model.layers.25": 1,
    "llm.model.layers.26": 1,
    "llm.model.layers.27": 1,
    "llm.model.layers.28": 1,
    "llm.model.layers.29": 1,
    "llm.model.layers.30": 1,
    "llm.model.layers.31": 1,
    "llm.model.layers.32": 1,
    "llm.model.layers.33": 1,
    "llm.model.layers.34": 1,
    "llm.model.layers.35": 1,
    "llm.model.layers.36": 1,
    "llm.model.layers.37": 1,
    "llm.model.layers.38": 1,
    "llm.model.layers.39": 1,
    "llm.model.layers.40": 1,
    "llm.model.layers.41": 1
}

# load model
model = AutoModelForCausalLM.from_pretrained("AIDC-AI/Ovis1.5-Gemma2-9B",
                                             torch_dtype=torch.bfloat16,
                                             multimodal_max_length=8192,
					     device_map=device_map,
                                             trust_remote_code=True)
text_tokenizer = model.get_text_tokenizer()
visual_tokenizer = model.get_visual_tokenizer()
conversation_formatter = model.get_conversation_formatter()

# enter image path and prompt
image_path = input("Enter image path: ")
image = Image.open(image_path)
text = input("Enter prompt: ")
query = f'<image>\n{text}'
prompt, input_ids = conversation_formatter.format_query(query)
input_ids = torch.unsqueeze(input_ids, dim=0).cuda()
attention_mask = torch.ne(input_ids, text_tokenizer.pad_token_id).cuda()
pixel_values = [visual_tokenizer.preprocess_image(image).to(
    dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)]

# generate output
with torch.inference_mode():
    gen_kwargs = dict(
        max_new_tokens=1024,
        do_sample=False,
        top_p=None,
        top_k=None,
        temperature=None,
        repetition_penalty=None,
        eos_token_id=model.generation_config.eos_token_id,
        pad_token_id=text_tokenizer.pad_token_id,
        use_cache=True
    )
    output_ids = model.generate(input_ids, pixel_values=pixel_values, attention_mask=attention_mask, **gen_kwargs)[0]
    output = text_tokenizer.decode(output_ids, skip_special_tokens=True)
    print(f'Output: {output}')

@CVHub520
Copy link

CVHub520 commented Sep 23, 2024

@cyj95, @AntyRia, here’s the code, try running this:

from dataclasses import field, dataclass
from typing import Optional, Union, List

import torch
from PIL import Image

from ovis.model.modeling_ovis import Ovis
from ovis.util.constants import IMAGE_TOKEN


device_map = {
    "visual_tokenizer": 0,
    "vte": 0,
    "llm.model.embed_tokens": 0,
    "llm.model.norm": 0,
    "llm.lm_head": 0,
    "llm.model.layers.0": 0,
    "llm.model.layers.1": 0,
    "llm.model.layers.2": 0,
    "llm.model.layers.3": 0,
    "llm.model.layers.4": 0,
    "llm.model.layers.5": 0,
    "llm.model.layers.6": 0,
    "llm.model.layers.7": 0,
    "llm.model.layers.8": 0,
    "llm.model.layers.9": 0,
    "llm.model.layers.10": 0,
    "llm.model.layers.11": 0,
    "llm.model.layers.12": 0,
    "llm.model.layers.13": 0,
    "llm.model.layers.14": 0,
    "llm.model.layers.15": 0,
    "llm.model.layers.16": 0,
    "llm.model.layers.17": 0,
    "llm.model.layers.18": 0,
    "llm.model.layers.19": 0,
    "llm.model.layers.20": 1,
    "llm.model.layers.21": 1,
    "llm.model.layers.22": 1,
    "llm.model.layers.23": 1,
    "llm.model.layers.24": 1,
    "llm.model.layers.25": 1,
    "llm.model.layers.26": 1,
    "llm.model.layers.27": 1,
    "llm.model.layers.28": 1,
    "llm.model.layers.29": 1,
    "llm.model.layers.30": 1,
    "llm.model.layers.31": 1,
    "llm.model.layers.32": 1,
    "llm.model.layers.33": 1,
    "llm.model.layers.34": 1,
    "llm.model.layers.35": 1,
    "llm.model.layers.36": 1,
    "llm.model.layers.37": 1,
    "llm.model.layers.38": 1,
    "llm.model.layers.39": 1,
    "llm.model.layers.40": 1,
    "llm.model.layers.41": 1
}


@dataclass
class RunnerArguments:
    model_path: str
    max_new_tokens: int = field(default=512)
    do_sample: bool = field(default=False)
    top_p: Optional[float] = field(default=None)
    top_k: Optional[int] = field(default=None)
    temperature: Optional[float] = field(default=None)
    max_partition: int = field(default=9)


class OvisRunner:
    def __init__(self, args: RunnerArguments):
        self.model_path = args.model_path
        self.dtype = torch.bfloat16
        self.device = torch.cuda.current_device()
        self.dtype = torch.bfloat16
        self.model = Ovis.from_pretrained(self.model_path, torch_dtype=self.dtype, multimodal_max_length=8192, device_map=device_map)
        self.model = self.model.eval()
        self.eos_token_id = self.model.generation_config.eos_token_id
        self.text_tokenizer = self.model.get_text_tokenizer()
        self.pad_token_id = self.text_tokenizer.pad_token_id
        self.visual_tokenizer = self.model.get_visual_tokenizer()
        self.conversation_formatter = self.model.get_conversation_formatter()
        self.image_placeholder = IMAGE_TOKEN
        self.max_partition = args.max_partition
        self.gen_kwargs = dict(
            max_new_tokens=args.max_new_tokens,
            do_sample=args.do_sample,
            top_p=args.top_p,
            top_k=args.top_k,
            temperature=args.temperature,
            repetition_penalty=None,
            eos_token_id=self.eos_token_id,
            pad_token_id=self.pad_token_id,
            use_cache=True
        )

    def preprocess(self, inputs: List[Union[Image.Image, str]]):
        # for single image and single text inputs, ensure image ahead
        if len(inputs) == 2 and isinstance(inputs[0], str) and isinstance(inputs[1], Image.Image):
            inputs = reversed(inputs)

        # build query
        query = ''
        images = []
        for data in inputs:
            if isinstance(data, Image.Image):
                query += self.image_placeholder + '\n'
                images.append(data)
            elif isinstance(data, str):
                query += data.replace(self.image_placeholder, '')
            elif data is not None:
                raise RuntimeError(f'Invalid input type, expected `PIL.Image.Image` or `str`, but got {type(data)}')

        # format conversation
        prompt, input_ids, pixel_values = self.model.preprocess_inputs(
            query, images, max_partition=self.max_partition)
        attention_mask = torch.ne(input_ids, self.text_tokenizer.pad_token_id)
        input_ids = input_ids.unsqueeze(0).to(device=self.device)
        attention_mask = attention_mask.unsqueeze(0).to(device=self.device)
        if pixel_values is not None:
            pixel_values = [pixel_values.to(device=self.device, dtype=self.dtype)]
        else:
            pixel_values = [None]

        return prompt, input_ids, attention_mask, pixel_values

    def run(self, inputs: List[Union[Image.Image, str]]):
        prompt, input_ids, attention_mask, pixel_values = self.preprocess(inputs)
        output_ids = self.model.generate(
            input_ids,
            pixel_values=pixel_values,
            attention_mask=attention_mask,
            **self.gen_kwargs
        )
        output = self.text_tokenizer.decode(output_ids[0], skip_special_tokens=True)
        input_token_len = input_ids.shape[1]
        output_token_len = output_ids.shape[1]
        response = dict(
            prompt=prompt,
            output=output,
            prompt_tokens=input_token_len,
            total_tokens=input_token_len + output_token_len
        )
        return response


if __name__ == '__main__':
    text = <prompt>
    image_path = <image_path>
    model_path = <model_path>

    runner_args = RunnerArguments(model_path=model_path)
    runner = OvisRunner(runner_args)
    image = Image.open(image_path)
    response = runner.run([image, text])
    print(response['output'])

@LuciusLan
Copy link

Hi, for Ovis1.6-Gemma2-9B the device mapping issue persists for transformers version 4.45.1, seems the inconsistent device allocation is happening on HybridCache used (basically, when mannually set the device_map like previous posts, the cache is not set with same map so for layers 20 afterward, all cache tensors are on cuda0 but incoming key_states, value_states are on cuda1), and somehow passing use_cache=False in gen_kwargs still cannot disable usage of cache.
My current workaround is changing line 596-597 in modelling_ovis.py to:

layer_device_map = [0] * 20 + [1] * 22
kwargs['past_key_values'] = self._get_hybrid_cache_for_llm(
    getattr(kwargs, "num_beams", inputs_embeds.shape[0]), kwargs['max_new_tokens'] + inputs_embeds.shape[-2], 
    layer_device_map=layer_device_map)

(Following the device map specified by previous posts, for other device map for more gpus, edit the layer_device_map accordingly)

@momianhua
Copy link

Below is an example of running AIDC-AI/Ovis1.5-Gemma2-9B on two GPUs:

import torch
from PIL import Image
from transformers import AutoModelForCausalLM

device_map = {
    "visual_tokenizer": 0,
    "vte": 0,
    "llm.model.embed_tokens": 0,
    "llm.model.norm": 0,
    "llm.lm_head": 0,
    "llm.model.layers.0": 0,
    "llm.model.layers.1": 0,
    "llm.model.layers.2": 0,
    "llm.model.layers.3": 0,
    "llm.model.layers.4": 0,
    "llm.model.layers.5": 0,
    "llm.model.layers.6": 0,
    "llm.model.layers.7": 0,
    "llm.model.layers.8": 0,
    "llm.model.layers.9": 0,
    "llm.model.layers.10": 0,
    "llm.model.layers.11": 0,
    "llm.model.layers.12": 0,
    "llm.model.layers.13": 0,
    "llm.model.layers.14": 0,
    "llm.model.layers.15": 0,
    "llm.model.layers.16": 0,
    "llm.model.layers.17": 0,
    "llm.model.layers.18": 0,
    "llm.model.layers.19": 0,
    "llm.model.layers.20": 1,
    "llm.model.layers.21": 1,
    "llm.model.layers.22": 1,
    "llm.model.layers.23": 1,
    "llm.model.layers.24": 1,
    "llm.model.layers.25": 1,
    "llm.model.layers.26": 1,
    "llm.model.layers.27": 1,
    "llm.model.layers.28": 1,
    "llm.model.layers.29": 1,
    "llm.model.layers.30": 1,
    "llm.model.layers.31": 1,
    "llm.model.layers.32": 1,
    "llm.model.layers.33": 1,
    "llm.model.layers.34": 1,
    "llm.model.layers.35": 1,
    "llm.model.layers.36": 1,
    "llm.model.layers.37": 1,
    "llm.model.layers.38": 1,
    "llm.model.layers.39": 1,
    "llm.model.layers.40": 1,
    "llm.model.layers.41": 1
}

# load model
model = AutoModelForCausalLM.from_pretrained("AIDC-AI/Ovis1.5-Gemma2-9B",
                                             torch_dtype=torch.bfloat16,
                                             multimodal_max_length=8192,
					     device_map=device_map,
                                             trust_remote_code=True)
text_tokenizer = model.get_text_tokenizer()
visual_tokenizer = model.get_visual_tokenizer()
conversation_formatter = model.get_conversation_formatter()

# enter image path and prompt
image_path = input("Enter image path: ")
image = Image.open(image_path)
text = input("Enter prompt: ")
query = f'<image>\n{text}'
prompt, input_ids = conversation_formatter.format_query(query)
input_ids = torch.unsqueeze(input_ids, dim=0).cuda()
attention_mask = torch.ne(input_ids, text_tokenizer.pad_token_id).cuda()
pixel_values = [visual_tokenizer.preprocess_image(image).to(
    dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)]

# generate output
with torch.inference_mode():
    gen_kwargs = dict(
        max_new_tokens=1024,
        do_sample=False,
        top_p=None,
        top_k=None,
        temperature=None,
        repetition_penalty=None,
        eos_token_id=model.generation_config.eos_token_id,
        pad_token_id=text_tokenizer.pad_token_id,
        use_cache=True
    )
    output_ids = model.generate(input_ids, pixel_values=pixel_values, attention_mask=attention_mask, **gen_kwargs)[0]
    output = text_tokenizer.decode(output_ids, skip_special_tokens=True)
    print(f'Output: {output}')

what's the script for Ovis1.6-Gemma2-9B?How do I deploy on two cards?

@runninglsy
Copy link
Collaborator

Below is an example of running AIDC-AI/Ovis1.5-Gemma2-9B on two GPUs:

import torch
from PIL import Image
from transformers import AutoModelForCausalLM

device_map = {
    "visual_tokenizer": 0,
    "vte": 0,
    "llm.model.embed_tokens": 0,
    "llm.model.norm": 0,
    "llm.lm_head": 0,
    "llm.model.layers.0": 0,
    "llm.model.layers.1": 0,
    "llm.model.layers.2": 0,
    "llm.model.layers.3": 0,
    "llm.model.layers.4": 0,
    "llm.model.layers.5": 0,
    "llm.model.layers.6": 0,
    "llm.model.layers.7": 0,
    "llm.model.layers.8": 0,
    "llm.model.layers.9": 0,
    "llm.model.layers.10": 0,
    "llm.model.layers.11": 0,
    "llm.model.layers.12": 0,
    "llm.model.layers.13": 0,
    "llm.model.layers.14": 0,
    "llm.model.layers.15": 0,
    "llm.model.layers.16": 0,
    "llm.model.layers.17": 0,
    "llm.model.layers.18": 0,
    "llm.model.layers.19": 0,
    "llm.model.layers.20": 1,
    "llm.model.layers.21": 1,
    "llm.model.layers.22": 1,
    "llm.model.layers.23": 1,
    "llm.model.layers.24": 1,
    "llm.model.layers.25": 1,
    "llm.model.layers.26": 1,
    "llm.model.layers.27": 1,
    "llm.model.layers.28": 1,
    "llm.model.layers.29": 1,
    "llm.model.layers.30": 1,
    "llm.model.layers.31": 1,
    "llm.model.layers.32": 1,
    "llm.model.layers.33": 1,
    "llm.model.layers.34": 1,
    "llm.model.layers.35": 1,
    "llm.model.layers.36": 1,
    "llm.model.layers.37": 1,
    "llm.model.layers.38": 1,
    "llm.model.layers.39": 1,
    "llm.model.layers.40": 1,
    "llm.model.layers.41": 1
}

# load model
model = AutoModelForCausalLM.from_pretrained("AIDC-AI/Ovis1.5-Gemma2-9B",
                                             torch_dtype=torch.bfloat16,
                                             multimodal_max_length=8192,
					     device_map=device_map,
                                             trust_remote_code=True)
text_tokenizer = model.get_text_tokenizer()
visual_tokenizer = model.get_visual_tokenizer()
conversation_formatter = model.get_conversation_formatter()

# enter image path and prompt
image_path = input("Enter image path: ")
image = Image.open(image_path)
text = input("Enter prompt: ")
query = f'<image>\n{text}'
prompt, input_ids = conversation_formatter.format_query(query)
input_ids = torch.unsqueeze(input_ids, dim=0).cuda()
attention_mask = torch.ne(input_ids, text_tokenizer.pad_token_id).cuda()
pixel_values = [visual_tokenizer.preprocess_image(image).to(
    dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)]

# generate output
with torch.inference_mode():
    gen_kwargs = dict(
        max_new_tokens=1024,
        do_sample=False,
        top_p=None,
        top_k=None,
        temperature=None,
        repetition_penalty=None,
        eos_token_id=model.generation_config.eos_token_id,
        pad_token_id=text_tokenizer.pad_token_id,
        use_cache=True
    )
    output_ids = model.generate(input_ids, pixel_values=pixel_values, attention_mask=attention_mask, **gen_kwargs)[0]
    output = text_tokenizer.decode(output_ids, skip_special_tokens=True)
    print(f'Output: {output}')

what's the script for Ovis1.6-Gemma2-9B?How do I deploy on two cards?

The code is available at: https://huggingface.co/AIDC-AI/Ovis1.6-Gemma2-9B/discussions/6

@dwh2002
Copy link

dwh2002 commented Jan 14, 2025

@runninglsy Hi, how to separate Ovis1.6-Gemma2-27B into two GPUs?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

8 participants