JayceAnova commited on
Commit
095abb0
·
verified ·
1 Parent(s): d3f1650

Update finetune.py

Browse files
Files changed (1) hide show
  1. finetune.py +6 -6
finetune.py CHANGED
@@ -7,7 +7,7 @@ from typing import List
7
  import torch
8
  import transformers
9
 
10
- from transformers import LlamaForCausalLM, LlamaTokenizer, LlamaConfig
11
 
12
  from utils import *
13
  from collator import Collator
@@ -27,13 +27,13 @@ def train(args):
27
  if ddp:
28
  device_map = {"": local_rank}
29
 
30
- config = LlamaConfig.from_pretrained(args.base_model)
31
- tokenizer = LlamaTokenizer.from_pretrained(
32
  args.base_model,
33
  model_max_length = args.model_max_length,
34
  padding_side="right",
35
  )
36
- tokenizer.pad_token_id = 0
37
  gradient_checkpointing = True
38
 
39
  train_data, valid_data = load_datasets(args)
@@ -48,7 +48,7 @@ def train(args):
48
  collator = Collator(args, tokenizer)
49
 
50
 
51
- model = LlamaForCausalLM.from_pretrained(
52
  args.base_model,
53
  # torch_dtype=torch.float16,
54
  device_map=device_map,
@@ -85,7 +85,7 @@ def train(args):
85
  eval_steps=args.save_and_eval_steps,
86
  save_steps=args.save_and_eval_steps,
87
  output_dir=args.output_dir,
88
- save_total_limit=5,
89
  load_best_model_at_end=True,
90
  deepspeed=args.deepspeed,
91
  ddp_find_unused_parameters=False if ddp else None,
 
7
  import torch
8
  import transformers
9
 
10
+ from transformers import LlamaForCausalLM, LlamaTokenizer, LlamaConfig, AutoTokenizer, AutoModelForCausalLM, AutoConfig
11
 
12
  from utils import *
13
  from collator import Collator
 
27
  if ddp:
28
  device_map = {"": local_rank}
29
 
30
+ config = AutoConfig.from_pretrained(args.base_model)
31
+ tokenizer = AutoTokenizer.from_pretrained(
32
  args.base_model,
33
  model_max_length = args.model_max_length,
34
  padding_side="right",
35
  )
36
+ tokenizer.pad_token_id = tokenizer.eos_token_id
37
  gradient_checkpointing = True
38
 
39
  train_data, valid_data = load_datasets(args)
 
48
  collator = Collator(args, tokenizer)
49
 
50
 
51
+ model = AutoModelForCausalLM.from_pretrained(
52
  args.base_model,
53
  # torch_dtype=torch.float16,
54
  device_map=device_map,
 
85
  eval_steps=args.save_and_eval_steps,
86
  save_steps=args.save_and_eval_steps,
87
  output_dir=args.output_dir,
88
+ save_total_limit=20,
89
  load_best_model_at_end=True,
90
  deepspeed=args.deepspeed,
91
  ddp_find_unused_parameters=False if ddp else None,