Global:
use_gpu: True
epoch_num: 100
log_smooth_window: 20
print_batch_step: 200
save_model_dir: /backup2/synthtiger/bangla/PaddleOCR/output/rec/svtr/
save_epoch_step: 1
# evaluation is run every 5000 iterations after the 0th iteration
eval_batch_step: [0, 5000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words_en/41.jpg
# for data or label process
character_dict_path: /backup2/synthtiger/bangla/PaddleOCR/ppocr/utils/bn_char_synth.txt
character_type: ch
max_text_length: 25
infer_mode: False
use_space_char: True
save_res_path: /backup2/synthtiger/bangla/PaddleOCR/output/rec/predicts_svtr_tiny.txt
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.99
epsilon: 0.00000008
weight_decay: 0.05
no_weight_decay_name: norm pos_embed
one_dim_param_no_weight_decay: true
lr:
name: Cosine
learning_rate: 0.0005
warmup_epoch: 2
Architecture:
model_type: rec
algorithm: SVTR
Transform:
name: STN_ON
tps_inputsize: [32, 64]
tps_outputsize: [32, 100]
num_control_points: 20
tps_margins: [0.05,0.05]
stn_activation: none
Backbone:
name: SVTRNet
img_size: [32, 100]
out_char_num: 25
out_channels: 192
patch_merging: 'Conv'
embed_dim: [64, 128, 256]
depth: [3, 6, 3]
num_heads: [2, 4, 8]
mixer: ['Local','Local','Local','Local','Local','Local','Global','Global','Global','Global','Global','Global']
local_mixer: [[7, 11], [7, 11], [7, 11]]
last_stage: True
prenorm: false
Neck:
name: SequenceEncoder
encoder_type: reshape
Head:
name: CTCHead
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: /backup2/synthtiger/bangla/PaddleOCR/train_data/
label_file_list : ['/backup2/synthtiger/bangla/PaddleOCR/train_data/gt.txt']
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
character_dict_path:
image_shape: [3, 64, 256]
padding: False
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 1024
drop_last: True
num_workers: 0
Eval:
dataset:
name: SimpleDataSet
data_dir: /backup2/synthtiger/bangla/PaddleOCR/horizontal_valid/
label_file_list : ['/backup2/synthtiger/bangla/PaddleOCR/horizontal_valid/gt.txt']
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
character_dict_path:
image_shape: [3, 64, 256]
padding: False
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 512
num_workers: 0