Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/standard.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Engine:
custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div"]
custom_white_list: ["lookup_table", "lookup_table_v2"]
save_load:
save_steps: 1000
save_by_steps: 1000
output_dir: ./output
ckpt_dir:
```
Expand All @@ -113,7 +113,7 @@ Engine:
| scale_loss | 使用fp16精度下,loss的放缩比例 |
| custom_black_list | 自定义算子黑名单。这个名单中的算子在支持float16计算时会被认为是数值危险的,它们的影响也可能会在下游操作中观察到。这些算子通常不会转为float16计算。 |
| custom_white_list | 自定义算子白名单。这个名单中的算子在支持float16计算时会被认为是数值安全的,并且对性能至关重要。如果设置了白名单,该名单中的算子会使用float16计算。|
| save_steps | 保存模型间隔 |
| save_by_step | 按照steps保存模型的间隔 |
| output_dir | 指定输出文件 |
| ckpt_dir | checkpoint的加载目录 |

Expand Down
4 changes: 2 additions & 2 deletions examples/gpt/auto_parallel/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ Engine训练设置完成模型训练/验证/推理等过程中的参数设置,
custom_white_list: ["lookup_table", "lookup_table_v2"]
use_recompute: True
save_load:
save_steps: 1000
save_by_steps: 1000
output_dir: ./output
ckpt_dir:
```
Expand All @@ -162,7 +162,7 @@ Engine训练设置完成模型训练/验证/推理等过程中的参数设置,
| custom_black_list | 自定义算子黑名单。这个名单中的算子在支持float16计算时会被认为是数值危险的,它们的影响也可能会在下游操作中观察到。这些算子通常不会转为float16计算。 |
| custom_white_list | 自定义算子白名单。这个名单中的算子在支持float16计算时会被认为是数值安全的,并且对性能至关重要。如果设置了白名单,该名单中的算子会使用float16计算。|
| use_recompute | 是否使用recompute训练 |
| save_steps | 保存模型间隔 |
| save_by_step | 按照steps保存模型的间隔 |
| output_dir | 指定输出文件 |
| ckpt_dir | checkpoint的加载目录 |

Expand Down
2 changes: 1 addition & 1 deletion examples/gpt/auto_parallel/configs_345M_dp8.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Engine:
# weight_bits: 8
# activation_bits: 8
save_load:
save_steps: 1000
save_by_steps: 1000
output_dir: ./output
ckpt_dir:

Expand Down
5 changes: 3 additions & 2 deletions examples/gpt/gpt_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def __init__(self, yaml_dict, **kw):
'custom_white_list': ["lookup_table", "lookup_table_v2"],
},
'save_load': {
'save_steps': 1000,
"save_by_epoch": -1,
"save_by_step": 1000,
'output_dir': None,
'ckpt_dir': None,
}
Expand Down Expand Up @@ -203,7 +204,7 @@ def __init__(self, yaml_dict, **kw):
},
'use_recompute': True,
'save_load': {
'save_steps': 1000,
'save_by_step': 1000,
'output_dir': None,
'ckpt_dir': None,
}
Expand Down
21 changes: 12 additions & 9 deletions examples/gpt/gpt_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,9 @@ def training_step_end(self, log_dict):
self.configs['Data']['dataset']['max_seq_len']

logger.info(
"[train] global step %d, epoch: %d, batch: %d, loss: %.9f, avg_batch_cost: %.5f sec, speed: %.2f step/s, ips_total: %.0f tokens/s, ips: %.0f tokens/s, learning rate: %.5e"
% (self.global_step, log_dict['epoch'], log_dict['batch'],
log_dict['loss'], 1. / speed, speed,
speed * default_global_tokens_num,
"[train] epoch: %d, batch: %d, loss: %.9f, avg_batch_cost: %.5f sec, speed: %.2f step/s, ips_total: %.0f tokens/s, ips: %.0f tokens/s, learning rate: %.5e"
% (log_dict['epoch'], log_dict['batch'], log_dict['loss'],
1. / speed, speed, speed * default_global_tokens_num,
speed * default_global_tokens_num, self.optimizer.get_lr()))

def configure_optimizers(self):
Expand Down Expand Up @@ -171,11 +170,15 @@ def training_step_end(self, log_dict):
self.configs['Data']['dataset']['max_seq_len']

logger.info(
"[train] global step %d, epoch: %d, batch: %d, loss: %.9f, avg_batch_cost: %.5f sec, speed: %.2f step/s, ips_total: %.0f tokens/s, ips: %.0f tokens/s, learning rate: %.5e"
% (self.global_step, log_dict['epoch'], log_dict['batch'],
log_dict['loss'], 1. / speed, speed, speed *
default_global_tokens_num, speed * default_global_tokens_num /
self.nranks, self.optimizer.get_lr()))
"[train] epoch: %d, batch: %d, loss: %.9f, avg_batch_cost: %.5f sec, speed: %.2f step/s, ips_total: %.0f tokens/s, ips: %.0f tokens/s, learning rate: %.5e"
% (log_dict['epoch'], log_dict['batch'], log_dict['loss'],
1. / speed, speed, speed * default_global_tokens_num,
speed * default_global_tokens_num / self.nranks,
self.optimizer.get_lr()))

def training_epoch_end(self, log_dict):
logger.info("[Training] epoch: %d, total time: %.5f sec" %
(log_dict['epoch'], log_dict['train_cost']))


class GPTGenerationModule(BasicModule):
Expand Down
4 changes: 2 additions & 2 deletions examples/gpt/hybrid_parallel/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ Engine训练设置完成模型训练/验证/推理等过程中的参数设置,
custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div"]
custom_white_list: ["lookup_table", "lookup_table_v2"]
save_load:
save_steps: 1000
save_by_steps: 1000
output_dir: ./output
ckpt_dir:
```
Expand All @@ -202,7 +202,7 @@ Engine训练设置完成模型训练/验证/推理等过程中的参数设置,
| scale_loss | 使用fp16精度下,loss的放缩比例 |
| custom_black_list | 自定义算子黑名单。这个名单中的算子在支持float16计算时会被认为是数值危险的,它们的影响也可能会在下游操作中观察到。这些算子通常不会转为float16计算。 |
| custom_white_list | 自定义算子白名单。这个名单中的算子在支持float16计算时会被认为是数值安全的,并且对性能至关重要。如果设置了白名单,该名单中的算子会使用float16计算。|
| save_steps | 保存模型间隔 |
| save_by_step | 按照steps保存模型的间隔 |
| output_dir | 指定输出文件 |
| ckpt_dir | checkpoint的加载目录 |

Expand Down
4 changes: 3 additions & 1 deletion examples/gpt/hybrid_parallel/configs_1.3B_dp8.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ Engine:
custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div"]
custom_white_list: ["lookup_table", "lookup_table_v2"]
save_load:
save_steps: 1000
save_by_epoch: -1
save_by_step: 1000
# save_by_steps: 1000
output_dir: ./output
ckpt_dir:

Expand Down
2 changes: 1 addition & 1 deletion examples/gpt/hybrid_parallel/configs_175B_mp8_pp16.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Engine:
custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div"]
custom_white_list: ["lookup_table", "lookup_table_v2"]
save_load:
save_steps: 1000
save_by_steps: 1000
output_dir: ./output
ckpt_dir:

Expand Down
2 changes: 1 addition & 1 deletion examples/gpt/hybrid_parallel/configs_345M_dp8.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Engine:
custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div"]
custom_white_list: ["lookup_table", "lookup_table_v2"]
save_load:
save_steps: 50000
save_by_step: 50000
output_dir: ./output
ckpt_dir:

Expand Down
2 changes: 1 addition & 1 deletion examples/gpt/hybrid_parallel/configs_345M_mp8_qat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Engine:
custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div"]
custom_white_list: ["lookup_table", "lookup_table_v2"]
save_load:
save_steps: 1000
save_by_steps: 1000
output_dir: ./output
ckpt_dir:

Expand Down
2 changes: 1 addition & 1 deletion examples/gpt/hybrid_parallel/configs_6.7B_sharding16.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Engine:
custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div"]
custom_white_list: ["lookup_table", "lookup_table_v2"]
save_load:
save_steps: 1000
save_by_steps: 1000
output_dir: ./output
ckpt_dir:

Expand Down
8 changes: 4 additions & 4 deletions examples/gpt/hybrid_parallel/run_pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ def do_train():
sharding_rank = hcg.get_sharding_parallel_rank()
sharding_size = hcg.get_sharding_parallel_world_size()

data_world_rank = dp_rank * sharding_size + sharding_rank
data_world_size = configs['Distributed']['dp_degree'] * \
configs['Distributed']['sharding']['sharding_degree']
local_rank = int(os.getenv("PADDLE_RANK_IN_NODE", 0))
# data_world_rank = dp_rank * sharding_size + sharding_rank
# data_world_size = configs['Distributed']['dp_degree'] * \
# configs['Distributed']['sharding']['sharding_degree']
# local_rank = int(os.getenv("PADDLE_RANK_IN_NODE", 0))

# seed control in hybrid parallel
set_hyrbid_parallel_seed(seed, data_world_rank, mp_rank, pp_rank)
Expand Down
4 changes: 2 additions & 2 deletions examples/gpt/single/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ Engine训练设置完成模型训练/验证/推理等过程中的参数设置,
custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div"]
custom_white_list: ["lookup_table", "lookup_table_v2"]
save_load:
save_steps: 1000
save_by_steps: 1000
output_dir: ./output
ckpt_dir:
```
Expand All @@ -143,7 +143,7 @@ Engine训练设置完成模型训练/验证/推理等过程中的参数设置,
| scale_loss | 使用fp16精度下,loss的放缩比例 |
| custom_black_list | 自定义算子黑名单。这个名单中的算子在支持float16计算时会被认为是数值危险的,它们的影响也可能会在下游操作中观察到。这些算子通常不会转为float16计算。 |
| custom_white_list | 自定义算子白名单。这个名单中的算子在支持float16计算时会被认为是数值安全的,并且对性能至关重要。如果设置了白名单,该名单中的算子会使用float16计算。|
| save_steps | 保存模型间隔 |
| save_by_step | 按照steps保存模型的间隔 |
| output_dir | 指定输出文件 |
| ckpt_dir | checkpoint的加载目录 |

Expand Down
2 changes: 1 addition & 1 deletion examples/gpt/single/configs_1.3B_single_card.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Engine:
custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div"]
custom_white_list: ["lookup_table", "lookup_table_v2"]
save_load:
save_steps: 1000
save_by_steps: 1000
output_dir: ./output
ckpt_dir:

Expand Down
5 changes: 3 additions & 2 deletions examples/gpt/single/configs_345m_single_card.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ Engine:
custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div"]
custom_white_list: ["lookup_table", "lookup_table_v2"]
save_load:
save_steps: 1000
save_by_epoch: -1
save_by_step: 1000
output_dir: ./output
ckpt_dir:

Expand All @@ -37,7 +38,7 @@ Data:
Model:
vocab_size: 50304
hidden_size: 1024
num_layers: 24
num_layers: 4
num_attention_heads: 16
ffn_hidden_size: 4096
hidden_dropout_prob: 0.1
Expand Down
54 changes: 27 additions & 27 deletions examples/gpt/single/run_pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,36 +49,36 @@ def do_train():

if configs['Engine']['save_load']['ckpt_dir'] is not None:
engine.load()

if configs['Quantization']['enable']:
module.qat_model()

for epoch in range(configs['Engine']['num_train_epochs']):
files = get_train_data_file(configs['Data']['dataset']['input_dir'])
files.sort()
num_files = len(files)

for f_id in range(num_files):
data_file = files[f_id]
train_data_loader, valid_data_loader, test_data_loader = create_pretrained_dataset(
configs, [data_file],
local_rank=0,
data_world_size=1,
data_world_rank=0,
max_seq_len=configs['Data']['dataset']['max_seq_len'],
eos_id=tokenizer.eos_token_id)
# Bug fix, if not call valid_data_loader, the enumerate will call valid_data_loader
# many times. and start a new random dataloader.
valid_data_loader = valid_data_loader()
test_data_loader = test_data_loader()

engine.fit(train_data_loader=train_data_loader,
valid_data_loader=valid_data_loader,
epoch=epoch)

# engine.evaluate(valid_data_loader=valid_data_loader, epoch=epoch)
# engine.predict(test_data_loader=test_data_loader, epoch=epoch)
engine.save()
# for epoch in range(configs['Engine']['num_train_epochs']):
epoch = configs['Engine']['num_train_epochs']
files = get_train_data_file(configs['Data']['dataset']['input_dir'])
files.sort()
num_files = len(files)

for f_id in range(num_files):
data_file = files[f_id]
train_data_loader, valid_data_loader, test_data_loader = create_pretrained_dataset(
configs, [data_file],
local_rank=0,
data_world_size=1,
data_world_rank=0,
max_seq_len=configs['Data']['dataset']['max_seq_len'],
eos_id=tokenizer.eos_token_id)
# Bug fix, if not call valid_data_loader, the enumerate will call valid_data_loader
# many times. and start a new random dataloader.
valid_data_loader = valid_data_loader()
test_data_loader = test_data_loader()

engine.fit(epoch=epoch,
train_data_loader=train_data_loader,
valid_data_loader=valid_data_loader)

# engine.evaluate(valid_data_loader=valid_data_loader, epoch=epoch)
# engine.predict(test_data_loader=test_data_loader, epoch=epoch)
engine.save()


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion fleetx/core/engine/auto_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def __init__(self, module, configs=None, strategy=None):
self._logging_freq = self._configs['logging_freq']
self._num_train_epochs = self._configs['num_train_epochs']

self._save_steps = self._configs['save_load']['save_steps']
self._save_steps = self._configs['save_load']['save_by_step']
self._output_dir = self._configs['save_load']['output_dir']
self._ckpt_dir = self._configs['save_load']['ckpt_dir']

Expand Down
Loading