# Copyright (c) Tencent Inc. All rights reserved. | |
# YOLO Multi-Modal Backbone (Vision Language) | |
# Vision: YOLOv8 CSPDarknet | |
# Language: CLIP Text Encoder (12-layer transformer) | |
from .mm_backbone import ( | |
MultiModalYOLOBackbone, | |
HuggingVisionBackbone, | |
HuggingCLIPLanguageBackbone, | |
PseudoLanguageBackbone) | |
__all__ = [ | |
'MultiModalYOLOBackbone', | |
'HuggingVisionBackbone', | |
'HuggingCLIPLanguageBackbone', | |
'PseudoLanguageBackbone' | |
] | |