[Bug]: Phi3 lora module not loading #4915

arunpatala · 2024-05-20T08:42:05Z

ValueError: While loading /data/llm_resume_profiles_phi3_v1, expected target modules in ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'embed_tokens', 'lm_head'] but received ['qkv_proj', 'gate_up_proj']. Please verify that the loaded LoRA module is correct.

I am unable to load LORA module with phi3-128k-instruct version. Can support for this be added?
I am using VLLM docker with version 0.4.2

thanks

WeiXiaoSummer · 2024-05-20T08:54:16Z

yes, you can refer to #4715 for a conversation script. However phi3 128k contains also a gateup_proj, you can modify the script to decompose gate_up_proj layer as well. Like following:

import torch
import os
import json
import fire
import shutil

from safetensors.torch import load_file, save_file

def replicate_lora_a_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('qkv_proj')
    res = {}
    for t in ['q_proj', 'k_proj', 'v_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def replicate_lora_a_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('gate_up_proj')
    res = {}
    for t in ['gate_proj', 'up_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def split_lora_b_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 3
    prefix, suffix = name.split('qkv_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['q_proj', 'k_proj', 'v_proj'], weight.split(size))
    }
    return res
 
def split_lora_b_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 2
    prefix, suffix = name.split('gate_up_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['gate_proj', 'up_proj'], weight.split(size))
    }
    return res

def convert_qkv_gate_up_lora_to_splits_vllm(adapter_folder_path: str, output_folder_path: str) -> None:
    """return the new adapter dict"""

    adapter_bin_name = 'adapter_model.safetensors'
    adapter_config_name = 'adapter_config.json'


    lora = load_file(f"{adapter_folder_path}/{adapter_bin_name}")
    with open(f"{adapter_folder_path}/{adapter_config_name}", 'r') as f:
        lora_config = json.load(f)

    assert 'qkv_proj' in lora_config['target_modules']
    assert 'gate_up_proj' in lora_config['target_modules']

    # converting weights
    res = {}
    for k, v in lora.items():
        if 'qkv_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_qkv(k, v))
        elif 'qkv_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_qkv(k, v))
        elif 'gate_up_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_gate_up(k, v))
        elif 'gate_up_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_gate_up(k, v))
        else:
            res[k] = v
    
    # converting config
    temp = ['q_proj', 'k_proj', 'v_proj', 'gate_proj', 'up_proj'] + [t for t in lora_config['target_modules'] if t != 'qkv_proj' and t != 'gate_up_proj']
    lora_config['target_modules'] = temp

    # saving
    os.makedirs(output_folder_path, exist_ok=True)
    save_file(res, f"{output_folder_path}/{adapter_bin_name}", metadata={"format": "pt"})
    with open(f"{output_folder_path}/{adapter_config_name}", 'w') as f:
        json.dump(lora_config, f, indent=4)
    
    for file in os.listdir(adapter_folder_path):
        if file != adapter_bin_name and file != adapter_config_name:
            shutil.copy(f"{adapter_folder_path}/{file}", f"{output_folder_path}/{file}")

if __name__ == "__main__":
    fire.Fire(convert_qkv_gate_up_lora_to_splits_vllm)

It can load it without error, but not sure if there will be any performance issue.

arunpatala · 2024-05-21T05:34:01Z

Thanks a lot. I will use this, but will VLLM be adding support for this in the future?

SHIMURA0 · 2024-05-21T07:08:26Z

yes, you can refer to #4715 for a conversation script. However phi3 128k contains also a gateup_proj, you can modify the script to decompose gate_up_proj layer as well. Like following:

import torch
import os
import json
import fire
import shutil

from safetensors.torch import load_file, save_file

def replicate_lora_a_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('qkv_proj')
    res = {}
    for t in ['q_proj', 'k_proj', 'v_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def replicate_lora_a_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('gate_up_proj')
    res = {}
    for t in ['gate_proj', 'up_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def split_lora_b_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 3
    prefix, suffix = name.split('qkv_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['q_proj', 'k_proj', 'v_proj'], weight.split(size))
    }
    return res
 
def split_lora_b_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 2
    prefix, suffix = name.split('gate_up_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['gate_proj', 'up_proj'], weight.split(size))
    }
    return res

def convert_qkv_gate_up_lora_to_splits_vllm(adapter_folder_path: str, output_folder_path: str) -> None:
    """return the new adapter dict"""

    adapter_bin_name = 'adapter_model.safetensors'
    adapter_config_name = 'adapter_config.json'


    lora = load_file(f"{adapter_folder_path}/{adapter_bin_name}")
    with open(f"{adapter_folder_path}/{adapter_config_name}", 'r') as f:
        lora_config = json.load(f)

    assert 'qkv_proj' in lora_config['target_modules']
    assert 'gate_up_proj' in lora_config['target_modules']

    # converting weights
    res = {}
    for k, v in lora.items():
        if 'qkv_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_qkv(k, v))
        elif 'qkv_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_qkv(k, v))
        elif 'gate_up_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_gate_up(k, v))
        elif 'gate_up_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_gate_up(k, v))
        else:
            res[k] = v
    
    # converting config
    temp = ['q_proj', 'k_proj', 'v_proj', 'gate_proj', 'up_proj'] + [t for t in lora_config['target_modules'] if t != 'qkv_proj' and t != 'gate_up_proj']
    lora_config['target_modules'] = temp

    # saving
    os.makedirs(output_folder_path, exist_ok=True)
    save_file(res, f"{output_folder_path}/{adapter_bin_name}", metadata={"format": "pt"})
    with open(f"{output_folder_path}/{adapter_config_name}", 'w') as f:
        json.dump(lora_config, f, indent=4)
    
    for file in os.listdir(adapter_folder_path):
        if file != adapter_bin_name and file != adapter_config_name:
            shutil.copy(f"{adapter_folder_path}/{file}", f"{output_folder_path}/{file}")

if __name__ == "__main__":
    fire.Fire(convert_qkv_gate_up_lora_to_splits_vllm)

It can load it without error, but not sure if there will be any performance issue.

After decomposing layers of Phi3, will there be any problem merging LoRA layers back into Phi3? Do we have to merge the decomposed layers back before append LoRA back onto the original Phi3?

KaranChand · 2024-05-21T11:35:57Z

I added the new projection layers using the conversion code above and the standard vLLM code for LoRA using snapshot_download using their documentation (https://docs.vllm.ai/en/latest/models/lora.html#using-lora-adapters).
It did work without errors, though performance dropped massively compared to the same model (with merged LoRA) without vLLM. I used the same tokenizer for both models.
I have not found out yet why this is the case, might just be a bug in my own code somewhere

Update: There was a bug in my code, the script above works just fine and the LoRa weights are merged well

WeiXiaoSummer · 2024-05-21T16:03:19Z

Thanks a lot. I will use this, but will VLLM be adding support for this in the future?

I guess they will, otherwise we will always need to convert the lora first, which is not very convenient.

WeiXiaoSummer · 2024-05-21T16:04:48Z

yes, you can refer to #4715 for a conversation script. However phi3 128k contains also a gateup_proj, you can modify the script to decompose gate_up_proj layer as well. Like following:

import torch
import os
import json
import fire
import shutil

from safetensors.torch import load_file, save_file

def replicate_lora_a_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('qkv_proj')
    res = {}
    for t in ['q_proj', 'k_proj', 'v_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def replicate_lora_a_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('gate_up_proj')
    res = {}
    for t in ['gate_proj', 'up_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def split_lora_b_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 3
    prefix, suffix = name.split('qkv_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['q_proj', 'k_proj', 'v_proj'], weight.split(size))
    }
    return res
 
def split_lora_b_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 2
    prefix, suffix = name.split('gate_up_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['gate_proj', 'up_proj'], weight.split(size))
    }
    return res

def convert_qkv_gate_up_lora_to_splits_vllm(adapter_folder_path: str, output_folder_path: str) -> None:
    """return the new adapter dict"""

    adapter_bin_name = 'adapter_model.safetensors'
    adapter_config_name = 'adapter_config.json'


    lora = load_file(f"{adapter_folder_path}/{adapter_bin_name}")
    with open(f"{adapter_folder_path}/{adapter_config_name}", 'r') as f:
        lora_config = json.load(f)

    assert 'qkv_proj' in lora_config['target_modules']
    assert 'gate_up_proj' in lora_config['target_modules']

    # converting weights
    res = {}
    for k, v in lora.items():
        if 'qkv_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_qkv(k, v))
        elif 'qkv_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_qkv(k, v))
        elif 'gate_up_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_gate_up(k, v))
        elif 'gate_up_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_gate_up(k, v))
        else:
            res[k] = v
    
    # converting config
    temp = ['q_proj', 'k_proj', 'v_proj', 'gate_proj', 'up_proj'] + [t for t in lora_config['target_modules'] if t != 'qkv_proj' and t != 'gate_up_proj']
    lora_config['target_modules'] = temp

    # saving
    os.makedirs(output_folder_path, exist_ok=True)
    save_file(res, f"{output_folder_path}/{adapter_bin_name}", metadata={"format": "pt"})
    with open(f"{output_folder_path}/{adapter_config_name}", 'w') as f:
        json.dump(lora_config, f, indent=4)
    
    for file in os.listdir(adapter_folder_path):
        if file != adapter_bin_name and file != adapter_config_name:
            shutil.copy(f"{adapter_folder_path}/{file}", f"{output_folder_path}/{file}")

if __name__ == "__main__":
    fire.Fire(convert_qkv_gate_up_lora_to_splits_vllm)

It can load it without error, but not sure if there will be any performance issue.

After decomposing layers of Phi3, will there be any problem merging LoRA layers back into Phi3? Do we have to merge the decomposed layers back before append LoRA back onto the original Phi3?

I think you can directly merge the original (not decomposed) LORA adapter to Phi3 if you're not trying to load LORA via vllm.

arunpatala added the bug Something isn't working label May 20, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Bug]: Phi3 lora module not loading #4915

[Bug]: Phi3 lora module not loading #4915

arunpatala commented May 20, 2024 •

edited

WeiXiaoSummer commented May 20, 2024

arunpatala commented May 21, 2024

SHIMURA0 commented May 21, 2024

KaranChand commented May 21, 2024 •

edited

WeiXiaoSummer commented May 21, 2024

WeiXiaoSummer commented May 21, 2024

[Bug]: Phi3 lora module not loading #4915

[Bug]: Phi3 lora module not loading #4915

Comments

arunpatala commented May 20, 2024 • edited

WeiXiaoSummer commented May 20, 2024

arunpatala commented May 21, 2024

SHIMURA0 commented May 21, 2024

KaranChand commented May 21, 2024 • edited

WeiXiaoSummer commented May 21, 2024

WeiXiaoSummer commented May 21, 2024

arunpatala commented May 20, 2024 •

edited

KaranChand commented May 21, 2024 •

edited