Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bug]: Phi3 lora module not loading #4915

Open
arunpatala opened this issue May 20, 2024 · 6 comments
Open

[Bug]: Phi3 lora module not loading #4915

arunpatala opened this issue May 20, 2024 · 6 comments
Labels
bug Something isn't working

Comments

@arunpatala
Copy link

arunpatala commented May 20, 2024

ValueError: While loading /data/llm_resume_profiles_phi3_v1, expected target modules in ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'embed_tokens', 'lm_head'] but received ['qkv_proj', 'gate_up_proj']. Please verify that the loaded LoRA module is correct.

I am unable to load LORA module with phi3-128k-instruct version. Can support for this be added?
I am using VLLM docker with version 0.4.2

thanks

@arunpatala arunpatala added the bug Something isn't working label May 20, 2024
@WeiXiaoSummer
Copy link

yes, you can refer to #4715 for a conversation script. However phi3 128k contains also a gateup_proj, you can modify the script to decompose gate_up_proj layer as well. Like following:

import torch
import os
import json
import fire
import shutil

from safetensors.torch import load_file, save_file

def replicate_lora_a_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('qkv_proj')
    res = {}
    for t in ['q_proj', 'k_proj', 'v_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def replicate_lora_a_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('gate_up_proj')
    res = {}
    for t in ['gate_proj', 'up_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def split_lora_b_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 3
    prefix, suffix = name.split('qkv_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['q_proj', 'k_proj', 'v_proj'], weight.split(size))
    }
    return res
 
def split_lora_b_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 2
    prefix, suffix = name.split('gate_up_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['gate_proj', 'up_proj'], weight.split(size))
    }
    return res

def convert_qkv_gate_up_lora_to_splits_vllm(adapter_folder_path: str, output_folder_path: str) -> None:
    """return the new adapter dict"""

    adapter_bin_name = 'adapter_model.safetensors'
    adapter_config_name = 'adapter_config.json'


    lora = load_file(f"{adapter_folder_path}/{adapter_bin_name}")
    with open(f"{adapter_folder_path}/{adapter_config_name}", 'r') as f:
        lora_config = json.load(f)

    assert 'qkv_proj' in lora_config['target_modules']
    assert 'gate_up_proj' in lora_config['target_modules']

    # converting weights
    res = {}
    for k, v in lora.items():
        if 'qkv_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_qkv(k, v))
        elif 'qkv_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_qkv(k, v))
        elif 'gate_up_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_gate_up(k, v))
        elif 'gate_up_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_gate_up(k, v))
        else:
            res[k] = v
    
    # converting config
    temp = ['q_proj', 'k_proj', 'v_proj', 'gate_proj', 'up_proj'] + [t for t in lora_config['target_modules'] if t != 'qkv_proj' and t != 'gate_up_proj']
    lora_config['target_modules'] = temp

    # saving
    os.makedirs(output_folder_path, exist_ok=True)
    save_file(res, f"{output_folder_path}/{adapter_bin_name}", metadata={"format": "pt"})
    with open(f"{output_folder_path}/{adapter_config_name}", 'w') as f:
        json.dump(lora_config, f, indent=4)
    
    for file in os.listdir(adapter_folder_path):
        if file != adapter_bin_name and file != adapter_config_name:
            shutil.copy(f"{adapter_folder_path}/{file}", f"{output_folder_path}/{file}")

if __name__ == "__main__":
    fire.Fire(convert_qkv_gate_up_lora_to_splits_vllm)

It can load it without error, but not sure if there will be any performance issue.

@arunpatala
Copy link
Author

Thanks a lot. I will use this, but will VLLM be adding support for this in the future?

@SHIMURA0
Copy link

yes, you can refer to #4715 for a conversation script. However phi3 128k contains also a gateup_proj, you can modify the script to decompose gate_up_proj layer as well. Like following:

import torch
import os
import json
import fire
import shutil

from safetensors.torch import load_file, save_file

def replicate_lora_a_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('qkv_proj')
    res = {}
    for t in ['q_proj', 'k_proj', 'v_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def replicate_lora_a_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('gate_up_proj')
    res = {}
    for t in ['gate_proj', 'up_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def split_lora_b_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 3
    prefix, suffix = name.split('qkv_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['q_proj', 'k_proj', 'v_proj'], weight.split(size))
    }
    return res
 
def split_lora_b_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 2
    prefix, suffix = name.split('gate_up_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['gate_proj', 'up_proj'], weight.split(size))
    }
    return res

def convert_qkv_gate_up_lora_to_splits_vllm(adapter_folder_path: str, output_folder_path: str) -> None:
    """return the new adapter dict"""

    adapter_bin_name = 'adapter_model.safetensors'
    adapter_config_name = 'adapter_config.json'


    lora = load_file(f"{adapter_folder_path}/{adapter_bin_name}")
    with open(f"{adapter_folder_path}/{adapter_config_name}", 'r') as f:
        lora_config = json.load(f)

    assert 'qkv_proj' in lora_config['target_modules']
    assert 'gate_up_proj' in lora_config['target_modules']

    # converting weights
    res = {}
    for k, v in lora.items():
        if 'qkv_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_qkv(k, v))
        elif 'qkv_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_qkv(k, v))
        elif 'gate_up_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_gate_up(k, v))
        elif 'gate_up_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_gate_up(k, v))
        else:
            res[k] = v
    
    # converting config
    temp = ['q_proj', 'k_proj', 'v_proj', 'gate_proj', 'up_proj'] + [t for t in lora_config['target_modules'] if t != 'qkv_proj' and t != 'gate_up_proj']
    lora_config['target_modules'] = temp

    # saving
    os.makedirs(output_folder_path, exist_ok=True)
    save_file(res, f"{output_folder_path}/{adapter_bin_name}", metadata={"format": "pt"})
    with open(f"{output_folder_path}/{adapter_config_name}", 'w') as f:
        json.dump(lora_config, f, indent=4)
    
    for file in os.listdir(adapter_folder_path):
        if file != adapter_bin_name and file != adapter_config_name:
            shutil.copy(f"{adapter_folder_path}/{file}", f"{output_folder_path}/{file}")

if __name__ == "__main__":
    fire.Fire(convert_qkv_gate_up_lora_to_splits_vllm)

It can load it without error, but not sure if there will be any performance issue.

After decomposing layers of Phi3, will there be any problem merging LoRA layers back into Phi3? Do we have to merge the decomposed layers back before append LoRA back onto the original Phi3?

@KaranChand
Copy link

KaranChand commented May 21, 2024

I added the new projection layers using the conversion code above and the standard vLLM code for LoRA using snapshot_download using their documentation (https://docs.vllm.ai/en/latest/models/lora.html#using-lora-adapters).
It did work without errors, though performance dropped massively compared to the same model (with merged LoRA) without vLLM. I used the same tokenizer for both models.
I have not found out yet why this is the case, might just be a bug in my own code somewhere

Update: There was a bug in my code, the script above works just fine and the LoRa weights are merged well

@WeiXiaoSummer
Copy link

Thanks a lot. I will use this, but will VLLM be adding support for this in the future?

I guess they will, otherwise we will always need to convert the lora first, which is not very convenient.

@WeiXiaoSummer
Copy link

yes, you can refer to #4715 for a conversation script. However phi3 128k contains also a gateup_proj, you can modify the script to decompose gate_up_proj layer as well. Like following:

import torch
import os
import json
import fire
import shutil

from safetensors.torch import load_file, save_file

def replicate_lora_a_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('qkv_proj')
    res = {}
    for t in ['q_proj', 'k_proj', 'v_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def replicate_lora_a_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('gate_up_proj')
    res = {}
    for t in ['gate_proj', 'up_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def split_lora_b_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 3
    prefix, suffix = name.split('qkv_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['q_proj', 'k_proj', 'v_proj'], weight.split(size))
    }
    return res
 
def split_lora_b_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 2
    prefix, suffix = name.split('gate_up_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['gate_proj', 'up_proj'], weight.split(size))
    }
    return res

def convert_qkv_gate_up_lora_to_splits_vllm(adapter_folder_path: str, output_folder_path: str) -> None:
    """return the new adapter dict"""

    adapter_bin_name = 'adapter_model.safetensors'
    adapter_config_name = 'adapter_config.json'


    lora = load_file(f"{adapter_folder_path}/{adapter_bin_name}")
    with open(f"{adapter_folder_path}/{adapter_config_name}", 'r') as f:
        lora_config = json.load(f)

    assert 'qkv_proj' in lora_config['target_modules']
    assert 'gate_up_proj' in lora_config['target_modules']

    # converting weights
    res = {}
    for k, v in lora.items():
        if 'qkv_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_qkv(k, v))
        elif 'qkv_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_qkv(k, v))
        elif 'gate_up_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_gate_up(k, v))
        elif 'gate_up_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_gate_up(k, v))
        else:
            res[k] = v
    
    # converting config
    temp = ['q_proj', 'k_proj', 'v_proj', 'gate_proj', 'up_proj'] + [t for t in lora_config['target_modules'] if t != 'qkv_proj' and t != 'gate_up_proj']
    lora_config['target_modules'] = temp

    # saving
    os.makedirs(output_folder_path, exist_ok=True)
    save_file(res, f"{output_folder_path}/{adapter_bin_name}", metadata={"format": "pt"})
    with open(f"{output_folder_path}/{adapter_config_name}", 'w') as f:
        json.dump(lora_config, f, indent=4)
    
    for file in os.listdir(adapter_folder_path):
        if file != adapter_bin_name and file != adapter_config_name:
            shutil.copy(f"{adapter_folder_path}/{file}", f"{output_folder_path}/{file}")

if __name__ == "__main__":
    fire.Fire(convert_qkv_gate_up_lora_to_splits_vllm)

It can load it without error, but not sure if there will be any performance issue.

After decomposing layers of Phi3, will there be any problem merging LoRA layers back into Phi3? Do we have to merge the decomposed layers back before append LoRA back onto the original Phi3?

I think you can directly merge the original (not decomposed) LORA adapter to Phi3 if you're not trying to load LORA via vllm.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

4 participants