@article{paetzold25detector, author={Bastian P{\"a}tzold and Jan Nogga and Sven Behnke}, title={Leveraging Vision-Language Models for Open-Vocabulary Instance Segmentation and Tracking}, journal={arXiv preprint arXiv:2503.16538}, year={2025} }