Skip to content
Open
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
5d19fca
feat(storage): 添加阿里云OSS存储支持
zyfjrx Jan 29, 2026
f0ccfd4
Use cache mount for genai docker (#4954)
Bobholamovic Jan 29, 2026
f34bfc2
Fix HPS order bug (#4955)
Bobholamovic Jan 29, 2026
f53eaf7
Fix HPS and remove scipy from required deps (#4957)
Bobholamovic Jan 29, 2026
839241e
Fix transformers version (#4956)
Bobholamovic Jan 29, 2026
a102e8d
bugfix: unexpected change of the constant IMAGE_LABELS (#4960)
changdazhou Jan 30, 2026
bb4b1c1
[METAX] add ppdoclayoutv3 to METAX_GPU_WHITELIST (#4962)
changdazhou Jan 30, 2026
f7f83b7
vllm 0.10.2 needs transformers 4.x (#4963)
zhang-prog Jan 30, 2026
56ca189
Support setting PDF rendering scale factor (#4967)
Bobholamovic Feb 2, 2026
2e70318
fix: check if cropped image size is zero in table recognition v2 (#4937)
lyn-zzz Feb 3, 2026
06223e3
Fix/doc vlm async cancellation (#4969)
scyyh11 Feb 4, 2026
a20fddc
Fix: Update langchain import to use langchain_core.documents (#4944)
Ihebdhouibi Feb 4, 2026
edeb50e
Fix typo (#4982)
Bobholamovic Feb 6, 2026
fb23d61
Update Docker image for CI workflow (#4975)
plusNew001 Feb 9, 2026
ac930a9
add llama.cpp support (#4983)
zhang-prog Feb 9, 2026
e2b463e
fixing langchain text splitter import (#4981)
norbbrog Feb 10, 2026
b0be02f
修复PNG格式空白图像出现超出索引范围的问题 (#4945)
yang-521 Feb 10, 2026
9cdf48e
Remove PaddleOCR-VL server page limit (#4991)
Bobholamovic Feb 11, 2026
fe7c149
Add Intel GPU config (#4992)
Bobholamovic Feb 11, 2026
d59b2c4
PaddleX Add ROCm 7.0 compatibility patches (#4990)
M4jupitercannon Feb 11, 2026
9a3f4dd
[Feat] Support setting expiration for BOS URLs (#4993)
Bobholamovic Feb 12, 2026
3b04645
add \n for seal rec && bugfix for text in table && delete_pass by mod…
changdazhou Feb 13, 2026
bfda368
Fix auto batch size for PaddleOCR-VL-1.5-0.9B (#5003)
Bobholamovic Feb 13, 2026
e63a51a
Update HPS frozon deps (#5004)
Bobholamovic Feb 13, 2026
5bf095a
update vlm batch_size (#5005)
zhang-prog Feb 13, 2026
50f5932
add P800 document (#4995)
onecatcn Feb 14, 2026
062a782
Update mkdocs.yml to reflect kunlunxin docs changes (#5006)
onecatcn Feb 14, 2026
2054f94
support iluvatar_gpu for ppdet (#5002)
leo-q8 Feb 14, 2026
4511e2f
fix: add langchain compatibility shim for newer versions (0.1.x+) (#4…
Ansarimajid Feb 24, 2026
914f5fb
fix codes (#4984)
liu-jiaxuan Feb 24, 2026
cfba8bc
fix: guard chart_recognition_model init with use_chart_recognition fl…
scyyh11 Feb 24, 2026
3d5e3a0
Use git hash as image version (#5016)
Bobholamovic Feb 26, 2026
2044265
fix typo in error message (#5015)
F-Palmer Feb 26, 2026
1ffc4a6
Feature/hps paddleocr vl 1.5 (#5017)
scyyh11 Feb 28, 2026
09e1ff1
Add independent version.txt for PaddleOCR-VL-1.5 HPS SDK (#5026)
scyyh11 Mar 2, 2026
6b397a6
Fix: Integer overflow in `calculate_overlap_ratio` (`utils.py:248`) (…
albcunha Mar 2, 2026
2909209
[Fix] refine config of RT-DETR-L (#5036)
liu-jiaxuan Mar 5, 2026
01c1d80
Merge branch 'develop' into support-aliyun-oss
Bobholamovic Mar 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[Fix] refine config of RT-DETR-L (#5036)
* fix codes

* refine config

* refine codes

* refine codes

* refine codes
  • Loading branch information
liu-jiaxuan authored Mar 5, 2026
commit 29092094dc8f92c382fe0fcaf8761e1ff2d11139
158 changes: 93 additions & 65 deletions paddlex/inference/models/object_detection/modeling/rt_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,77 +181,107 @@ def __call__(self, head_out, im_shape, scale_factor, pad_shape):
class RTDETRConfig(PretrainedConfig):
def __init__(
self,
arch,
return_idx,
freeze_stem_only,
freeze_at,
freeze_norm,
lr_mult_list,
hidden_dim,
use_encoder_idx,
num_encoder_layers,
el_d_model,
el_nhead,
el_dim_feedforward,
el_dropout,
el_activation,
expansion,
tf_num_queries,
tf_position_embed_type,
tf_feat_strides,
tf_num_levels,
tf_nhead,
tf_num_decoder_layers,
tf_backbone_feat_channels,
tf_dim_feedforward,
tf_dropout,
tf_activation,
tf_num_denoising,
tf_label_noise_ratio,
tf_box_noise_scale,
tf_learnt_init_query,
loss_coeff,
aux_loss,
use_vfl,
matcher_coeff,
num_top_queries,
use_focal_loss,
initializer_range=0.01,
initializer_bias_prior_prob=None,
layer_norm_eps=1e-5,
batch_norm_eps=1e-5,
# backbone
backbone_config=None,
freeze_backbone_batch_norms=True,
# encoder HybridEncoder
encoder_hidden_dim=256,
encoder_in_channels=[512, 1024, 2048],
feat_strides=[8, 16, 32],
encoder_layers=1,
encoder_ffn_dim=1024,
encoder_attention_heads=8,
dropout=0.0,
activation_dropout=0.0,
encode_proj_layers=[2],
positional_encoding_temperature=10000,
encoder_activation_function="gelu",
activation_function="silu",
eval_size=None,
normalize_before=False,
hidden_expansion=1.0,
# decoder RTDetrTransformer
d_model=256,
num_queries=300,
decoder_in_channels=[256, 256, 256],
decoder_ffn_dim=1024,
num_feature_levels=3,
decoder_n_points=4,
decoder_layers=6,
decoder_attention_heads=8,
decoder_activation_function="relu",
attention_dropout=0.0,
num_denoising=100,
label_noise_ratio=0.5,
box_noise_scale=1.0,
learn_initial_query=False,
anchor_image_size=None,
disable_custom_kernels=True,
with_box_refine=True,
is_encoder_decoder=True,
# Loss
matcher_alpha=0.25,
matcher_gamma=2.0,
matcher_class_cost=2.0,
matcher_bbox_cost=5.0,
matcher_giou_cost=2.0,
use_focal_loss=True,
auxiliary_loss=True,
focal_loss_alpha=0.75,
focal_loss_gamma=2.0,
weight_loss_vfl=1.0,
weight_loss_bbox=5.0,
weight_loss_giou=2.0,
eos_coefficient=1e-4,
**kwargs,
):
self.arch = arch
self.return_idx = return_idx
self.freeze_stem_only = freeze_stem_only
self.freeze_at = freeze_at
self.freeze_norm = freeze_norm
self.lr_mult_list = lr_mult_list
self.hidden_dim = hidden_dim
self.use_encoder_idx = use_encoder_idx
self.num_encoder_layers = num_encoder_layers
if backbone_config["model_type"] != "hgnet_v2":
raise RuntimeError(
f"There is no dynamic graph implementation for backbone {repr(backbone_config["model_type"])}."
)
self.arch = backbone_config["arch"]
self.freeze_stem_only = backbone_config["freeze_stem_only"]
self.freeze_at = backbone_config["freeze_at"]
self.freeze_norm = backbone_config["freeze_norm"]
self.lr_mult_list = backbone_config["lr_mult_list"]
self.return_idx = backbone_config["return_idx"]
self.hidden_dim = encoder_hidden_dim
self.use_encoder_idx = encode_proj_layers
self.num_encoder_layers = encoder_layers
self.el_d_model = d_model
self.el_nhead = nhead
self.el_dim_feedforward = dim_feedforward
self.el_nhead = encoder_attention_heads
self.el_dim_feedforward = encoder_ffn_dim
self.el_dropout = dropout
self.el_activation = activation
self.expansion = expansion
self.el_activation = encoder_activation_function
self.expansion = hidden_expansion
self.tf_num_queries = num_queries
self.tf_position_embed_type = position_embed_type
self.tf_feat_strides = feat_strides
self.tf_num_levels = num_levels
self.tf_nhead = nhead
self.tf_num_decoder_layers = num_decoder_layers
self.tf_backbone_feat_channels = backbone_feat_channels
self.tf_dim_feedforward = dim_feedforward
self.tf_dropout = dropout
self.tf_activation = activation
self.tf_num_levels = num_feature_levels
self.tf_nhead = decoder_attention_heads
self.tf_num_decoder_layers = decoder_layers
self.tf_backbone_feat_channels = decoder_in_channels
self.tf_dim_feedforward = decoder_ffn_dim
self.tf_dropout = attention_dropout
self.tf_activation = decoder_activation_function
self.tf_num_denoising = num_denoising
self.tf_label_noise_ratio = label_noise_ratio
self.tf_box_noise_scale = box_noise_scale
self.tf_learnt_init_query = learnt_init_query
self.loss_coeff = loss_coeff
self.aux_loss = aux_loss
self.use_vfl = use_vfl
self.matcher_coeff = matcher_coeff
self.num_top_queries = num_top_queries
self.tf_learnt_init_query = learn_initial_query
self.loss_coeff = {
"class": weight_loss_vfl,
"bbox": weight_loss_bbox,
"giou": weight_loss_giou
}
self.aux_loss = auxiliary_loss
self.matcher_coeff = {
"class": matcher_class_cost,
"bbox": matcher_bbox_cost,
"giou": matcher_giou_cost
}
self.use_focal_loss = use_focal_loss
self.tensor_parallel_degree = 1

Expand Down Expand Up @@ -286,7 +316,6 @@ def __init__(self, config: RTDETRConfig):
)
self.transformer = RTDETRTransformer(
num_queries=self.config.tf_num_queries,
position_embed_type=self.config.tf_position_embed_type,
feat_strides=self.config.tf_feat_strides,
backbone_feat_channels=self.config.tf_backbone_feat_channels,
num_levels=self.config.tf_num_levels,
Expand All @@ -304,14 +333,13 @@ def __init__(self, config: RTDETRConfig):
loss=DINOLoss(
loss_coeff=self.config.loss_coeff,
aux_loss=self.config.aux_loss,
use_vfl=self.config.use_vfl,
matcher=HungarianMatcher(
matcher_coeff=self.config.matcher_coeff,
),
)
)
self.post_process = DETRPostProcess(
num_top_queries=self.config.num_top_queries,
num_top_queries=self.config.tf_num_queries,
use_focal_loss=self.config.use_focal_loss,
)

Expand Down