-
Notifications
You must be signed in to change notification settings - Fork 46
Expand file tree
/
Copy pathMistral_7B_Instruct_v0.2_vitis_ai_config.json
More file actions
83 lines (83 loc) · 2.58 KB
/
Mistral_7B_Instruct_v0.2_vitis_ai_config.json
File metadata and controls
83 lines (83 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
{
"input_model": {
"type": "HFModel",
"model_path": "mistralai/Mistral-7B-Instruct-v0.2"
},
"systems": {
"local_system": {
"type": "LocalSystem",
"accelerators": [
{
"device": "cpu",
"execution_providers": [
"CPUExecutionProvider"
]
}
]
}
},
"passes": {
"qq": {
"type": "QuarkQuantization",
"quant_scheme": "w_uint4_per_group_asym",
"quant_algo": "awq",
"dataset": "pileval_for_awq_benchmark",
"data_type": "bfloat16",
"num_calib_data": 128,
"model_export": [
"hf_format"
],
"exclude_layers": [],
"quant_config": {
"name": "awq",
"scaling_layers": [
{
"prev_op": "input_layernorm",
"layers": [
"self_attn.q_proj",
"self_attn.k_proj",
"self_attn.v_proj"
],
"inp": "self_attn.q_proj",
"module2inspect": "self_attn"
},
{
"prev_op": "self_attn.v_proj",
"layers": [
"self_attn.o_proj"
],
"inp": "self_attn.o_proj"
},
{
"prev_op": "post_attention_layernorm",
"layers": [
"mlp.gate_proj",
"mlp.up_proj"
],
"inp": "mlp.gate_proj",
"module2inspect": "mlp"
},
{
"prev_op": "mlp.up_proj",
"layers": [
"mlp.down_proj"
],
"inp": "mlp.down_proj"
}
],
"model_decoder_layers": "model.layers"
}
},
"mg": {
"type": "VitisGenerateModelLLM",
"packed_const": false,
"cpu_only": false
}
},
"target": "local_system",
"log_severity_level": 1,
"output_dir": "model/Mistral-7B-Instruct-v0.2-vai",
"cache_dir": "cache",
"no_artifacts": true,
"evaluate_input_model": false
}