@book{skobtsov-evolution, author = {Скобцов, Ю. А. and Сперанский, Д. В.}, title = {Эволюционные вычисления: Учебное пособие}, year = {2012}, publisher = {Национальный Открытый Университет «ИНТУИТ»}, address = {М.}, pages = {331}, note = {ил. — Серия «Основы информационных технологий»} } % Разное @online{yandex-research-calendar, author = {Осиков, Александр and Уласович, Кристина and Лохов, Пётр and Андрианова, Надежда and Сергиенко, Ярослав and Матющенко, Максим}, title = {Сколько длятся рабочие встречи}, year = {2025}, url = {https://yandex.ru/company/researches/2025/calendar}, urldate = {2026-01-03}, media = {eresource}, organization = {Яндекс} } @online{speech-rate, author = {Светозарова, Н. Д.}, title = {Темп речи}, year = {2017}, url = {https://old.bigenc.ru/linguistics/text/4186876}, urldate = {2026-01-03}, media = {eresource}, organization = {Большая российская энциклопедия}, note = {Электронная версия} } @misc{nemo-toolkit, title = {NeMo: a toolkit for building AI applications using Neural Modules}, author = {Oleksii Kuchaiev and Jason Li and Huyen Nguyen and Oleksii Hrinchuk and Ryan Leary and Boris Ginsburg and Samuel Kriman and Stanislav Beliaev and Vitaly Lavrukhin and Jack Cook and Patrice Castonguay and Mariya Popova and Jocelyn Huang and Jonathan M. Cohen}, year = {2019}, eprint = {1909.09577}, archiveprefix = {arXiv}, primaryclass = {cs.LG}, url = {https://arxiv.org/abs/1909.09577}, urldate = {2026-01-10} } % Научные e2e-статьи @misc{auto-meet, title = {AutoMeet: a proof-of-concept study of genAI to automate meetings in automotive engineering}, author = {Simon Baeuerle and Max Radyschevski and Ulrike Pado}, year = {2025}, eprint = {2507.16054}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2507.16054}, urldate = {2026-01-10} } @misc{building-real-world-meeting-summarization, title = {Building Real-World Meeting Summarization Systems using Large Language Models: A Practical Perspective}, author = {Md Tahmid Rahman Laskar and Xue-Yong Fu and Cheng Chen and Shashi Bhushan TN}, year = {2023}, eprint = {2310.19233}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2310.19233}, urldate = {2026-01-10} } @misc{end-to-end-speech-summarization, title = {An End-to-End Speech Summarization Using Large Language Model}, author = {Hengchao Shang and Zongyao Li and Jiaxin Guo and Shaojun Li and Zhiqiang Rao and Yuanchang Luo and Daimeng Wei and Hao Yang}, year = {2024}, eprint = {2407.02005}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2407.02005}, urldate = {2026-01-10} } @inproceedings{meetalk, title = {Meetalk: Retrieval-Augmented and Adaptively Personalized Meeting Summarization with Knowledge Learning from User Corrections}, author = {Chen, Zheng and Futian, Jiang and Deng, Yue and He, Changyang and Li, Bo}, editor = {Zhang, Yuji and Chen, Canyu and Li, Sha and Geva, Mor and Han, Chi and Wang, Xiaozhi and Feng, Shangbin and Gao, Silin and Augenstein, Isabelle and Bansal, Mohit and Li, Manling and Ji, Heng}, booktitle = {Proceedings of the 3rd Workshop on Towards Knowledgeable Foundation Models (KnowFM)}, year = {2025}, address = {Vienna, Austria}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2025.knowllm-1.9/}, urldate = {2026-01-10}, doi = {10.18653/v1/2025.knowllm-1.9}, pages = {94--110}, isbn = {979-8-89176-283-1} } % DIY-статьи @online{yoomoney-transcribe-calls, author = {izzyleet}, title = {Как мы транскрибируем аудио с внутренних созвонов в текст}, year = {2025}, url = {https://habr.com/ru/companies/yoomoney/articles/896096/}, urldate = {2026-01-03}, media = {eresource}, organization = {ЮMoney} } @online{alfabank-local-transcriber, author = {Луняка, Николай}, title = {На входе аудио, на выходе — саммари. Собираем локальный транскрибатор из бесплатного софта}, year = {2025}, url = {https://habr.com/ru/companies/alfa/articles/909498/}, urldate = {2026-01-03}, media = {eresource}, organization = {Альфа-Банк} } @online{bitrix-copilot-videocalls, author = {Соколов, Дмитрий}, title = {Будьте добры, помедленнее! Я записываю… или Как мы приручили нейросеть для видеозвонков}, year = {2025}, url = {https://habr.com/ru/companies/bitrix/articles/904916/}, urldate = {2026-01-03}, media = {eresource}, organization = {Битрикс24} } @online{habr-autoprotocoling-start, author = {virus3908}, title = {Как я начал писать своё автопротоколирование}, year = {2025}, url = {https://habr.com/ru/articles/910616/}, urldate = {2026-01-03}, media = {eresource}, organization = {Habr} } % Транскрибация + диаризация @online{whisperlivekit, author = {{QuentinFuxa}}, title = {{QuentinFuxa/WhisperLiveKit}: Ultra-low latency, self-hosted real-time speech-to-text with speaker identification}, urldate = {2025-12-25}, url = {https://github.com/QuentinFuxa/WhisperLiveKit}, media = {eresource}, organization = {GitHub} } @misc{whisperx, title = {WhisperX: Time-Accurate Speech Transcription of Long-Form Audio}, author = {Max Bain and Jaesung Huh and Tengda Han and Andrew Zisserman}, year = {2023}, eprint = {2303.00747}, archiveprefix = {arXiv}, primaryclass = {cs.SD}, url = {https://arxiv.org/abs/2303.00747}, urldate = {2026-01-10} } % Транскрибация @misc{e2e_asr_survey, title = {End-to-End Speech Recognition: A Survey}, author = {Rohit Prabhavalkar and Takaaki Hori and Tara N. Sainath and Ralf Schlüter and Shinji Watanabe}, year = {2023}, eprint = {2303.03329}, archiveprefix = {arXiv}, primaryclass = {eess.AS}, url = {https://arxiv.org/abs/2303.03329}, urldate = {2026-01-10} } @article{asr-deep-learning-survey, title = {Automatic Speech Recognition: A survey of deep learning techniques and approaches}, journal = {International Journal of Cognitive Computing in Engineering}, volume = {6}, pages = {201-237}, year = {2025}, issn = {2666-3074}, doi = {https://doi.org/10.1016/j.ijcce.2024.12.007}, url = {https://www.sciencedirect.com/science/article/pii/S2666307424000573}, urldate = {2026-01-10}, author = {Harsh Ahlawat and Naveen Aggarwal and Deepti Gupta}, keywords = {Automatic Speech Recognition, Deep Neural Networks, Conformer, Transformer, Datasets, Multilingual, Deep learning} } @online{russian-asr-leaderboard, author = {Шмырев, Николай}, title = {Открытые модели для распознавания русской речи 2025}, year = {2025}, url = {https://alphacephei.com/nsh/2025/04/18/russian-models.html}, urldate = {2026-01-05}, media = {eresource}, organization = {Alpha Cephei} } @misc{open-asr-leaderboard, title = {Open ASR Leaderboard: Towards Reproducible and Transparent Multilingual and Long-Form Speech Recognition Evaluation}, author = {Vaibhav Srivastav and Steven Zheng and Eric Bezzam and Eustache Le Bihan and Nithin Koluguri and Piotr Żelasko and Somshubra Majumdar and Adel Moumen and Sanchit Gandhi}, year = {2025}, eprint = {2510.06961}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2510.06961}, urldate = {2026-01-10} } % Whisper @misc{whisper, title = {Robust Speech Recognition via Large-Scale Weak Supervision}, author = {Alec Radford and Jong Wook Kim and Tao Xu and Greg Brockman and Christine McLeavey and Ilya Sutskever}, year = {2022}, eprint = {2212.04356}, archiveprefix = {arXiv}, primaryclass = {eess.AS}, url = {https://arxiv.org/abs/2212.04356}, urldate = {2026-01-10} } @misc{distil-whisper, title = {Distil-Whisper: Robust Knowledge Distillation via Large-Scale Pseudo Labelling}, author = {Sanchit Gandhi and Patrick von Platen and Alexander M. Rush}, year = {2023}, eprint = {2311.00430}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2311.00430}, urldate = {2026-01-10} } @online{ctranslate2, author = {{OpenNMT}}, title = {{OpenNMT/CTranslate2}: Fast inference engine for Transformer models}, urldate = {2025-12-25}, url = {https://github.com/OpenNMT/CTranslate2}, media = {eresource}, organization = {GitHub} } @online{faster-whisper, author = {{SYSTRAN}}, title = {{SYSTRAN/faster-whisper}: Faster Whisper transcription with CTranslate2}, urldate = {2025-12-25}, url = {https://github.com/SYSTRAN/faster-whisper}, media = {eresource}, organization = {GitHub} } @inproceedings{whisper-streaming, title = {Turning Whisper into Real-Time Transcription System}, author = {Mach{\'a}{\v{c}}ek, Dominik and Dabre, Raj and Bojar, Ond{\v{r}}ej}, editor = {Saha, Sriparna and Sujaini, Herry}, booktitle = {Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics: System Demonstrations}, month = nov, year = {2023}, address = {Bali, Indonesia}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2023.ijcnlp-demo.3}, urldate = {2026-01-10}, pages = {17--24} } @inproceedings{simul-whisper, series = {Interspeech 2024}, title = {Simul-Whisper: Attention-Guided Streaming Whisper with Truncation Detection}, url = {http://dx.doi.org/10.21437/Interspeech.2024-1814}, urldate = {2026-01-10}, doi = {10.21437/interspeech.2024-1814}, booktitle = {Interspeech 2024}, publisher = {ISCA}, author = {Wang, Haoyu and Hu, Guoqiang and Lin, Guodong and Zhang, Wei-Qiang and Li, Jian}, year = {2024}, month = sep, pages = {4483–4487}, collection = {Interspeech 2024} } @misc{simulstreaming, title = {Simultaneous Translation with Offline Speech and LLM Models in CUNI Submission to IWSLT 2025}, author = {Dominik Macháček and Peter Polák}, year = {2025}, eprint = {2506.17077}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2506.17077}, urldate = {2026-01-10} } % Sber @misc{giga-am, title = {GigaAM: Efficient Self-Supervised Learner for Speech Recognition}, author = {Aleksandr Kutsakov and Alexandr Maximenko and Georgii Gospodinov and Pavel Bogomolov and Fyodor Minkin}, year = {2025}, eprint = {2506.01192}, archiveprefix = {arXiv}, primaryclass = {eess.AS}, url = {https://arxiv.org/abs/2506.01192}, urldate = {2026-01-10} } @online{giga-am-v3, author = {Куцаков, Александр}, title = {GigaAM-v3: открытая SOTA-модель распознавания речи на русском}, year = {2025}, url = {https://habr.com/ru/companies/sberdevices/articles/973160/}, urldate = {2026-01-05}, media = {eresource}, organization = {SberDevices} } % Other @misc{canary-1b-v2-parakeet-tdt-06b-v3, title = {Canary-1B-v2 \& Parakeet-TDT-0.6B-v3: Efficient and High-Performance Models for Multilingual ASR and AST}, author = {Monica Sekoyan and Nithin Rao Koluguri and Nune Tadevosyan and Piotr Zelasko and Travis Bartley and Nikolay Karpov and Jagadeesh Balam and Boris Ginsburg}, year = {2025}, eprint = {2509.14128}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2509.14128}, urldate = {2026-01-10} } @online{vosk, title = {Vosk Speech Recognition Toolkit}, year = {2023}, url = {https://alphacephei.com/vosk/}, urldate = {2026-01-05}, media = {eresource}, organization = {Alpha Cephei} } % Диаризация @article{speaker-diarization-review, author = {O’Shaughnessy, Douglas}, title = {Speaker Diarization: A Review of Objectives and Methods}, journal = {Applied Sciences}, volume = {15}, year = {2025}, number = {4}, article-number = {2002}, url = {https://www.mdpi.com/2076-3417/15/4/2002}, urldate = {2026-01-10}, issn = {2076-3417}, doi = {10.3390/app15042002} } @misc{benchmarking-diarization-models, title = {Benchmarking Diarization Models}, author = {Luca A. Lanzendörfer and Florian Grötschla and Cesare Blaser and Roger Wattenhofer}, year = {2025}, eprint = {2509.26177}, archiveprefix = {arXiv}, primaryclass = {cs.SD}, url = {https://arxiv.org/abs/2509.26177}, urldate = {2026-01-10} } @misc{sdbench, title = {SDBench: A Comprehensive Benchmark Suite for Speaker Diarization}, author = {Eduardo Pacheco and Atila Orhon and Berkin Durmus and Blaise Munyampirwa and Andrey Leonov}, year = {2025}, eprint = {2507.16136}, archiveprefix = {arXiv}, primaryclass = {cs.SD}, url = {https://arxiv.org/abs/2507.16136}, urldate = {2026-01-10} } @misc{pyannote-audio, title = {pyannote.audio: neural building blocks for speaker diarization}, author = {Hervé Bredin and Ruiqing Yin and Juan Manuel Coria and Gregory Gelly and Pavel Korshunov and Marvin Lavechin and Diego Fustes and Hadrien Titeux and Wassim Bouaziz and Marie-Philippe Gill}, year = {2019}, eprint = {1911.01255}, archiveprefix = {arXiv}, primaryclass = {eess.AS}, url = {https://arxiv.org/abs/1911.01255}, urldate = {2026-01-10} } @online{pyannote-community-1, author = {{pyannote.ai}}, title = {Community-1: Unleashing open-source diarization}, year = {2025}, url = {https://www.pyannote.ai/blog/community-1}, urldate = {2026-01-05}, organization = {pyannote.ai}, media = {eresource}, note = {Блог о релизе модели Community-1 для open-source speaker diarization} } @misc{sortformer, title = {Sortformer: A Novel Approach for Permutation-Resolved Speaker Supervision in Speech-to-Text Systems}, author = {Taejin Park and Ivan Medennikov and Kunal Dhawan and Weiqing Wang and He Huang and Nithin Rao Koluguri and Krishna C. Puvvada and Jagadeesh Balam and Boris Ginsburg}, year = {2025}, eprint = {2409.06656}, archiveprefix = {arXiv}, primaryclass = {eess.AS}, url = {https://arxiv.org/abs/2409.06656}, urldate = {2026-01-10} } @inproceedings{diarizen, title = {Leveraging self-supervised learning for speaker diarization}, author = {Han, Jiangyu and Landini, Federico and Rohdin, Johan and Silnova, Anna and Diez, Mireia and Burget, Luk{\'a}{\v{s}}}, booktitle = {Proc. ICASSP}, year = {2025} } @misc{streaming-sortformer, title = {Streaming Sortformer: Speaker Cache-Based Online Speaker Diarization with Arrival-Time Ordering}, author = {Ivan Medennikov and Taejin Park and Weiqing Wang and He Huang and Kunal Dhawan and Jinhan Wang and Jagadeesh Balam and Boris Ginsburg}, year = {2025}, eprint = {2507.18446}, archiveprefix = {arXiv}, primaryclass = {eess.AS}, url = {https://arxiv.org/abs/2507.18446}, urldate = {2026-01-10} } % VAD @misc{silero-vad, author = {Silero Team}, title = {Silero VAD: pre-trained enterprise-grade Voice Activity Detector (VAD), Number Detector and Language Classifier}, year = {2024}, urldate = {2025-12-25}, url = {https://github.com/snakers4/silero-vad}, organization = {GitHub}, media = {eresource}, email = {hello@silero.ai} } % Суммаризация @article{text-summarization-survey, title = {A systematic survey of text summarization: From statistical methods to large language models}, author = {Zhang, Haopeng and Yu, Philip S and Zhang, Jiawei}, journal = {ACM Computing Surveys}, volume = {57}, number = {11}, pages = {1--41}, year = {2025}, publisher = {ACM New York, NY} } % LLM % Tiny Titans: Can Smaller Large Language Models Punch Above Their % Weight in the Real World for Meeting Summarization? @misc{vikhr, title = {Vikhr: The Family of Open-Source Instruction-Tuned Large Language Models for Russian}, author = {Aleksandr Nikolich and Konstantin Korolev and Sergei Bratchikov and Igor Kiselev and Artem Shelmanov}, year = {2025}, eprint = {2405.13929}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2405.13929}, urldate = {2026-01-10} } @misc{qwen, title = {Qwen2.5 Technical Report}, author = {Qwen and : and An Yang and Baosong Yang and Beichen Zhang and Binyuan Hui and Bo Zheng and Bowen Yu and Chengyuan Li and Dayiheng Liu and Fei Huang and Haoran Wei and Huan Lin and Jian Yang and Jianhong Tu and Jianwei Zhang and Jianxin Yang and Jiaxi Yang and Jingren Zhou and Junyang Lin and Kai Dang and Keming Lu and Keqin Bao and Kexin Yang and Le Yu and Mei Li and Mingfeng Xue and Pei Zhang and Qin Zhu and Rui Men and Runji Lin and Tianhao Li and Tianyi Tang and Tingyu Xia and Xingzhang Ren and Xuancheng Ren and Yang Fan and Yang Su and Yichang Zhang and Yu Wan and Yuqiong Liu and Zeyu Cui and Zhenru Zhang and Zihan Qiu}, year = {2025}, eprint = {2412.15115}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2412.15115}, urldate = {2026-01-10} } @misc{llama, title = {The Llama 3 Herd of Models}, author = {Aaron Grattafiori and Abhimanyu Dubey and Abhinav Jauhri and Abhinav Pandey and Abhishek Kadian and Ahmad Al-Dahle and Aiesha Letman and Akhil Mathur and Alan Schelten and Alex Vaughan and Amy Yang and Angela Fan and Anirudh Goyal and Anthony Hartshorn and Aobo Yang and Archi Mitra and Archie Sravankumar and Artem Korenev and Arthur Hinsvark and Arun Rao and Aston Zhang and Aurelien Rodriguez and Austen Gregerson and Ava Spataru and Baptiste Roziere and Bethany Biron and Binh Tang and Bobbie Chern and Charlotte Caucheteux and Chaya Nayak and Chloe Bi and Chris Marra and Chris McConnell and Christian Keller and Christophe Touret and Chunyang Wu and Corinne Wong and Cristian Canton Ferrer and Cyrus Nikolaidis and Damien Allonsius and Daniel Song and Danielle Pintz and Danny Livshits and Danny Wyatt and David Esiobu and Dhruv Choudhary and Dhruv Mahajan and Diego Garcia-Olano and Diego Perino and Dieuwke Hupkes and Egor Lakomkin and Ehab AlBadawy and Elina Lobanova and Emily Dinan and Eric Michael Smith and Filip Radenovic and Francisco Guzmán and Frank Zhang and Gabriel Synnaeve and Gabrielle Lee and Georgia Lewis Anderson and Govind Thattai and Graeme Nail and Gregoire Mialon and Guan Pang and Guillem Cucurell and Hailey Nguyen and Hannah Korevaar and Hu Xu and Hugo Touvron and Iliyan Zarov and Imanol Arrieta Ibarra and Isabel Kloumann and Ishan Misra and Ivan Evtimov and Jack Zhang and Jade Copet and Jaewon Lee and Jan Geffert and Jana Vranes and Jason Park and Jay Mahadeokar and Jeet Shah and Jelmer van der Linde and Jennifer Billock and Jenny Hong and Jenya Lee and Jeremy Fu and Jianfeng Chi and Jianyu Huang and Jiawen Liu and Jie Wang and Jiecao Yu and Joanna Bitton and Joe Spisak and Jongsoo Park and Joseph Rocca and Joshua Johnstun and Joshua Saxe and Junteng Jia and Kalyan Vasuden Alwala and Karthik Prasad and Kartikeya Upasani and Kate Plawiak and Ke Li and Kenneth Heafield and Kevin Stone and Khalid El-Arini and Krithika Iyer and Kshitiz Malik and Kuenley Chiu and Kunal Bhalla and Kushal Lakhotia and Lauren Rantala-Yeary and Laurens van der Maaten and Lawrence Chen and Liang Tan and Liz Jenkins and Louis Martin and Lovish Madaan and Lubo Malo and Lukas Blecher and Lukas Landzaat and Luke de Oliveira and Madeline Muzzi and Mahesh Pasupuleti and Mannat Singh and Manohar Paluri and Marcin Kardas and Maria Tsimpoukelli and Mathew Oldham and Mathieu Rita and Maya Pavlova and Melanie Kambadur and Mike Lewis and Min Si and Mitesh Kumar Singh and Mona Hassan and Naman Goyal and Narjes Torabi and Nikolay Bashlykov and Nikolay Bogoychev and Niladri Chatterji and Ning Zhang and Olivier Duchenne and Onur Çelebi and Patrick Alrassy and Pengchuan Zhang and Pengwei Li and Petar Vasic and Peter Weng and Prajjwal Bhargava and Pratik Dubal and Praveen Krishnan and Punit Singh Koura and Puxin Xu and Qing He and Qingxiao Dong and Ragavan Srinivasan and Raj Ganapathy and Ramon Calderer and Ricardo Silveira Cabral and Robert Stojnic and Roberta Raileanu and Rohan Maheswari and Rohit Girdhar and Rohit Patel and Romain Sauvestre and Ronnie Polidoro and Roshan Sumbaly and Ross Taylor and Ruan Silva and Rui Hou and Rui Wang and Saghar Hosseini and Sahana Chennabasappa and Sanjay Singh and Sean Bell and Seohyun Sonia Kim and Sergey Edunov and Shaoliang Nie and Sharan Narang and Sharath Raparthy and Sheng Shen and Shengye Wan and Shruti Bhosale and Shun Zhang and Simon Vandenhende and Soumya Batra and Spencer Whitman and Sten Sootla and Stephane Collot and Suchin Gururangan and Sydney Borodinsky and Tamar Herman and Tara Fowler and Tarek Sheasha and Thomas Georgiou and Thomas Scialom and Tobias Speckbacher and Todor Mihaylov and Tong Xiao and Ujjwal Karn and Vedanuj Goswami and Vibhor Gupta and Vignesh Ramanathan and Viktor Kerkez and Vincent Gonguet and Virginie Do and Vish Vogeti and Vítor Albiero and Vladan Petrovic and Weiwei Chu and Wenhan Xiong and Wenyin Fu and Whitney Meers and Xavier Martinet and Xiaodong Wang and Xiaofang Wang and Xiaoqing Ellen Tan and Xide Xia and Xinfeng Xie and Xuchao Jia and Xuewei Wang and Yaelle Goldschlag and Yashesh Gaur and Yasmine Babaei and Yi Wen and Yiwen Song and Yuchen Zhang and Yue Li and Yuning Mao and Zacharie Delpierre Coudert and Zheng Yan and Zhengxing Chen and Zoe Papakipos and Aaditya Singh and Aayushi Srivastava and Abha Jain and Adam Kelsey and Adam Shajnfeld and Adithya Gangidi and Adolfo Victoria and Ahuva Goldstand and Ajay Menon and Ajay Sharma and Alex Boesenberg and Alexei Baevski and Allie Feinstein and Amanda Kallet and Amit Sangani and Amos Teo and Anam Yunus and Andrei Lupu and Andres Alvarado and Andrew Caples and Andrew Gu and Andrew Ho and Andrew Poulton and Andrew Ryan and Ankit Ramchandani and Annie Dong and Annie Franco and Anuj Goyal and Aparajita Saraf and Arkabandhu Chowdhury and Ashley Gabriel and Ashwin Bharambe and Assaf Eisenman and Azadeh Yazdan and Beau James and Ben Maurer and Benjamin Leonhardi and Bernie Huang and Beth Loyd and Beto De Paola and Bhargavi Paranjape and Bing Liu and Bo Wu and Boyu Ni and Braden Hancock and Bram Wasti and Brandon Spence and Brani Stojkovic and Brian Gamido and Britt Montalvo and Carl Parker and Carly Burton and Catalina Mejia and Ce Liu and Changhan Wang and Changkyu Kim and Chao Zhou and Chester Hu and Ching-Hsiang Chu and Chris Cai and Chris Tindal and Christoph Feichtenhofer and Cynthia Gao and Damon Civin and Dana Beaty and Daniel Kreymer and Daniel Li and David Adkins and David Xu and Davide Testuggine and Delia David and Devi Parikh and Diana Liskovich and Didem Foss and Dingkang Wang and Duc Le and Dustin Holland and Edward Dowling and Eissa Jamil and Elaine Montgomery and Eleonora Presani and Emily Hahn and Emily Wood and Eric-Tuan Le and Erik Brinkman and Esteban Arcaute and Evan Dunbar and Evan Smothers and Fei Sun and Felix Kreuk and Feng Tian and Filippos Kokkinos and Firat Ozgenel and Francesco Caggioni and Frank Kanayet and Frank Seide and Gabriela Medina Florez and Gabriella Schwarz and Gada Badeer and Georgia Swee and Gil Halpern and Grant Herman and Grigory Sizov and Guangyi and Zhang and Guna Lakshminarayanan and Hakan Inan and Hamid Shojanazeri and Han Zou and Hannah Wang and Hanwen Zha and Haroun Habeeb and Harrison Rudolph and Helen Suk and Henry Aspegren and Hunter Goldman and Hongyuan Zhan and Ibrahim Damlaj and Igor Molybog and Igor Tufanov and Ilias Leontiadis and Irina-Elena Veliche and Itai Gat and Jake Weissman and James Geboski and James Kohli and Janice Lam and Japhet Asher and Jean-Baptiste Gaya and Jeff Marcus and Jeff Tang and Jennifer Chan and Jenny Zhen and Jeremy Reizenstein and Jeremy Teboul and Jessica Zhong and Jian Jin and Jingyi Yang and Joe Cummings and Jon Carvill and Jon Shepard and Jonathan McPhie and Jonathan Torres and Josh Ginsburg and Junjie Wang and Kai Wu and Kam Hou U and Karan Saxena and Kartikay Khandelwal and Katayoun Zand and Kathy Matosich and Kaushik Veeraraghavan and Kelly Michelena and Keqian Li and Kiran Jagadeesh and Kun Huang and Kunal Chawla and Kyle Huang and Lailin Chen and Lakshya Garg and Lavender A and Leandro Silva and Lee Bell and Lei Zhang and Liangpeng Guo and Licheng Yu and Liron Moshkovich and Luca Wehrstedt and Madian Khabsa and Manav Avalani and Manish Bhatt and Martynas Mankus and Matan Hasson and Matthew Lennie and Matthias Reso and Maxim Groshev and Maxim Naumov and Maya Lathi and Meghan Keneally and Miao Liu and Michael L. Seltzer and Michal Valko and Michelle Restrepo and Mihir Patel and Mik Vyatskov and Mikayel Samvelyan and Mike Clark and Mike Macey and Mike Wang and Miquel Jubert Hermoso and Mo Metanat and Mohammad Rastegari and Munish Bansal and Nandhini Santhanam and Natascha Parks and Natasha White and Navyata Bawa and Nayan Singhal and Nick Egebo and Nicolas Usunier and Nikhil Mehta and Nikolay Pavlovich Laptev and Ning Dong and Norman Cheng and Oleg Chernoguz and Olivia Hart and Omkar Salpekar and Ozlem Kalinli and Parkin Kent and Parth Parekh and Paul Saab and Pavan Balaji and Pedro Rittner and Philip Bontrager and Pierre Roux and Piotr Dollar and Polina Zvyagina and Prashant Ratanchandani and Pritish Yuvraj and Qian Liang and Rachad Alao and Rachel Rodriguez and Rafi Ayub and Raghotham Murthy and Raghu Nayani and Rahul Mitra and Rangaprabhu Parthasarathy and Raymond Li and Rebekkah Hogan and Robin Battey and Rocky Wang and Russ Howes and Ruty Rinott and Sachin Mehta and Sachin Siby and Sai Jayesh Bondu and Samyak Datta and Sara Chugh and Sara Hunt and Sargun Dhillon and Sasha Sidorov and Satadru Pan and Saurabh Mahajan and Saurabh Verma and Seiji Yamamoto and Sharadh Ramaswamy and Shaun Lindsay and Shaun Lindsay and Sheng Feng and Shenghao Lin and Shengxin Cindy Zha and Shishir Patil and Shiva Shankar and Shuqiang Zhang and Shuqiang Zhang and Sinong Wang and Sneha Agarwal and Soji Sajuyigbe and Soumith Chintala and Stephanie Max and Stephen Chen and Steve Kehoe and Steve Satterfield and Sudarshan Govindaprasad and Sumit Gupta and Summer Deng and Sungmin Cho and Sunny Virk and Suraj Subramanian and Sy Choudhury and Sydney Goldman and Tal Remez and Tamar Glaser and Tamara Best and Thilo Koehler and Thomas Robinson and Tianhe Li and Tianjun Zhang and Tim Matthews and Timothy Chou and Tzook Shaked and Varun Vontimitta and Victoria Ajayi and Victoria Montanez and Vijai Mohan and Vinay Satish Kumar and Vishal Mangla and Vlad Ionescu and Vlad Poenaru and Vlad Tiberiu Mihailescu and Vladimir Ivanov and Wei Li and Wenchen Wang and Wenwen Jiang and Wes Bouaziz and Will Constable and Xiaocheng Tang and Xiaojian Wu and Xiaolan Wang and Xilun Wu and Xinbo Gao and Yaniv Kleinman and Yanjun Chen and Ye Hu and Ye Jia and Ye Qi and Yenda Li and Yilin Zhang and Ying Zhang and Yossi Adi and Youngjin Nam and Yu and Wang and Yu Zhao and Yuchen Hao and Yundi Qian and Yunlu Li and Yuzi He and Zach Rait and Zachary DeVito and Zef Rosnbrick and Zhaoduo Wen and Zhenyu Yang and Zhiwei Zhao and Zhiyu Ma}, year = {2024}, eprint = {2407.21783}, archiveprefix = {arXiv}, primaryclass = {cs.AI}, url = {https://arxiv.org/abs/2407.21783}, urldate = {2026-01-10} } @misc{gemma, title = {Gemma 2: Improving Open Language Models at a Practical Size}, author = {Gemma Team and Morgane Riviere and Shreya Pathak and Pier Giuseppe Sessa and Cassidy Hardin and Surya Bhupatiraju and Léonard Hussenot and Thomas Mesnard and Bobak Shahriari and Alexandre Ramé and Johan Ferret and Peter Liu and Pouya Tafti and Abe Friesen and Michelle Casbon and Sabela Ramos and Ravin Kumar and Charline Le Lan and Sammy Jerome and Anton Tsitsulin and Nino Vieillard and Piotr Stanczyk and Sertan Girgin and Nikola Momchev and Matt Hoffman and Shantanu Thakoor and Jean-Bastien Grill and Behnam Neyshabur and Olivier Bachem and Alanna Walton and Aliaksei Severyn and Alicia Parrish and Aliya Ahmad and Allen Hutchison and Alvin Abdagic and Amanda Carl and Amy Shen and Andy Brock and Andy Coenen and Anthony Laforge and Antonia Paterson and Ben Bastian and Bilal Piot and Bo Wu and Brandon Royal and Charlie Chen and Chintu Kumar and Chris Perry and Chris Welty and Christopher A. Choquette-Choo and Danila Sinopalnikov and David Weinberger and Dimple Vijaykumar and Dominika Rogozińska and Dustin Herbison and Elisa Bandy and Emma Wang and Eric Noland and Erica Moreira and Evan Senter and Evgenii Eltyshev and Francesco Visin and Gabriel Rasskin and Gary Wei and Glenn Cameron and Gus Martins and Hadi Hashemi and Hanna Klimczak-Plucińska and Harleen Batra and Harsh Dhand and Ivan Nardini and Jacinda Mein and Jack Zhou and James Svensson and Jeff Stanway and Jetha Chan and Jin Peng Zhou and Joana Carrasqueira and Joana Iljazi and Jocelyn Becker and Joe Fernandez and Joost van Amersfoort and Josh Gordon and Josh Lipschultz and Josh Newlan and Ju-yeong Ji and Kareem Mohamed and Kartikeya Badola and Kat Black and Katie Millican and Keelin McDonell and Kelvin Nguyen and Kiranbir Sodhia and Kish Greene and Lars Lowe Sjoesund and Lauren Usui and Laurent Sifre and Lena Heuermann and Leticia Lago and Lilly McNealus and Livio Baldini Soares and Logan Kilpatrick and Lucas Dixon and Luciano Martins and Machel Reid and Manvinder Singh and Mark Iverson and Martin Görner and Mat Velloso and Mateo Wirth and Matt Davidow and Matt Miller and Matthew Rahtz and Matthew Watson and Meg Risdal and Mehran Kazemi and Michael Moynihan and Ming Zhang and Minsuk Kahng and Minwoo Park and Mofi Rahman and Mohit Khatwani and Natalie Dao and Nenshad Bardoliwalla and Nesh Devanathan and Neta Dumai and Nilay Chauhan and Oscar Wahltinez and Pankil Botarda and Parker Barnes and Paul Barham and Paul Michel and Pengchong Jin and Petko Georgiev and Phil Culliton and Pradeep Kuppala and Ramona Comanescu and Ramona Merhej and Reena Jana and Reza Ardeshir Rokni and Rishabh Agarwal and Ryan Mullins and Samaneh Saadat and Sara Mc Carthy and Sarah Cogan and Sarah Perrin and Sébastien M. R. Arnold and Sebastian Krause and Shengyang Dai and Shruti Garg and Shruti Sheth and Sue Ronstrom and Susan Chan and Timothy Jordan and Ting Yu and Tom Eccles and Tom Hennigan and Tomas Kocisky and Tulsee Doshi and Vihan Jain and Vikas Yadav and Vilobh Meshram and Vishal Dharmadhikari and Warren Barkley and Wei Wei and Wenming Ye and Woohyun Han and Woosuk Kwon and Xiang Xu and Zhe Shen and Zhitao Gong and Zichuan Wei and Victor Cotruta and Phoebe Kirk and Anand Rao and Minh Giang and Ludovic Peran and Tris Warkentin and Eli Collins and Joelle Barral and Zoubin Ghahramani and Raia Hadsell and D. Sculley and Jeanine Banks and Anca Dragan and Slav Petrov and Oriol Vinyals and Jeff Dean and Demis Hassabis and Koray Kavukcuoglu and Clement Farabet and Elena Buchatskaya and Sebastian Borgeaud and Noah Fiedel and Armand Joulin and Kathleen Kenealy and Robert Dadashi and Alek Andreev}, year = {2024}, eprint = {2408.00118}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2408.00118}, urldate = {2026-01-10} } @misc{phi-3, title = {Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone}, author = {Marah Abdin and Jyoti Aneja and Hany Awadalla and Ahmed Awadallah and Ammar Ahmad Awan and Nguyen Bach and Amit Bahree and Arash Bakhtiari and Jianmin Bao and Harkirat Behl and Alon Benhaim and Misha Bilenko and Johan Bjorck and Sébastien Bubeck and Martin Cai and Qin Cai and Vishrav Chaudhary and Dong Chen and Dongdong Chen and Weizhu Chen and Yen-Chun Chen and Yi-Ling Chen and Hao Cheng and Parul Chopra and Xiyang Dai and Matthew Dixon and Ronen Eldan and Victor Fragoso and Jianfeng Gao and Mei Gao and Min Gao and Amit Garg and Allie Del Giorno and Abhishek Goswami and Suriya Gunasekar and Emman Haider and Junheng Hao and Russell J. Hewett and Wenxiang Hu and Jamie Huynh and Dan Iter and Sam Ade Jacobs and Mojan Javaheripi and Xin Jin and Nikos Karampatziakis and Piero Kauffmann and Mahoud Khademi and Dongwoo Kim and Young Jin Kim and Lev Kurilenko and James R. Lee and Yin Tat Lee and Yuanzhi Li and Yunsheng Li and Chen Liang and Lars Liden and Xihui Lin and Zeqi Lin and Ce Liu and Liyuan Liu and Mengchen Liu and Weishung Liu and Xiaodong Liu and Chong Luo and Piyush Madan and Ali Mahmoudzadeh and David Majercak and Matt Mazzola and Caio César Teodoro Mendes and Arindam Mitra and Hardik Modi and Anh Nguyen and Brandon Norick and Barun Patra and Daniel Perez-Becker and Thomas Portet and Reid Pryzant and Heyang Qin and Marko Radmilac and Liliang Ren and Gustavo de Rosa and Corby Rosset and Sambudha Roy and Olatunji Ruwase and Olli Saarikivi and Amin Saied and Adil Salim and Michael Santacroce and Shital Shah and Ning Shang and Hiteshi Sharma and Yelong Shen and Swadheen Shukla and Xia Song and Masahiro Tanaka and Andrea Tupini and Praneetha Vaddamanu and Chunyu Wang and Guanhua Wang and Lijuan Wang and Shuohang Wang and Xin Wang and Yu Wang and Rachel Ward and Wen Wen and Philipp Witte and Haiping Wu and Xiaoxia Wu and Michael Wyatt and Bin Xiao and Can Xu and Jiahang Xu and Weijian Xu and Jilong Xue and Sonali Yadav and Fan Yang and Jianwei Yang and Yifan Yang and Ziyi Yang and Donghan Yu and Lu Yuan and Chenruidong Zhang and Cyril Zhang and Jianwen Zhang and Li Lyna Zhang and Yi Zhang and Yue Zhang and Yunan Zhang and Xiren Zhou}, year = {2024}, eprint = {2404.14219}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2404.14219}, urldate = {2026-01-10} } % Метрики @inproceedings{morris-asr-metrics, title = {From WER and RIL to MER and WIL: improved evaluation measures for connected speech recognition}, author = {Andrew Cameron Morris and Viktoria Maier and Phil Green}, year = {2004}, booktitle = {Interspeech 2004}, pages = {2765--2768}, doi = {10.21437/Interspeech.2004-668}, issn = {2958-1796} } @inproceedings{rouge, title = {Rouge: A package for automatic evaluation of summaries}, author = {Lin, Chin-Yew}, booktitle = {Text summarization branches out}, pages = {74--81}, year = {2004} } @inproceedings{bert-score, title = {BERTScore: Evaluating Text Generation with BERT}, author = {Tianyi Zhang* and Varsha Kishore* and Felix Wu* and Kilian Q. Weinberger and Yoav Artzi}, booktitle = {International Conference on Learning Representations}, year = {2020}, url = {https://openreview.net/forum?id=SkeHuCVFDr}, urldate = {2026-01-10} } @article{qaeval, title = {Towards question-answering as an automatic metric for evaluating the content quality of a summary}, author = {Deutsch, Daniel and Bedrax-Weiss, Tania and Roth, Dan}, journal = {Transactions of the Association for Computational Linguistics}, volume = {9}, pages = {774--789}, year = {2021}, publisher = {MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…} } @inproceedings{questeval, title={QuestEval: Summarization asks for fact-based evaluation}, author={Scialom, Thomas and Dray, Paul-Alexis and Lamprier, Sylvain and Piwowarski, Benjamin and Staiano, Jacopo and Wang, Alex and Gallinari, Patrick}, booktitle={Proceedings of the 2021 conference on empirical methods in natural language processing}, pages={6594--6604}, year={2021} } @misc{answers-unite, title = {Answers Unite! Unsupervised Metrics for Reinforced Summarization Models}, author = {Thomas Scialom and Sylvain Lamprier and Benjamin Piwowarski and Jacopo Staiano}, year = {2019}, eprint = {1909.01610}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/1909.01610}, urldate = {2026-01-10} } @inbook{qa-legal-summaries, title = {A Question-Answering Approach to Evaluating Legal Summaries}, isbn = {9781643684734}, issn = {1879-8314}, url = {http://dx.doi.org/10.3233/FAIA230977}, urldate = {2026-01-10}, doi = {10.3233/faia230977}, booktitle = {Legal Knowledge and Information Systems}, publisher = {IOS Press}, author = {Xu, Huihui and Ashley, Kevin}, year = {2023}, month = dec } @inproceedings{summequal, title={SummEQuAL: Summarization evaluation via question answering using large language models}, author={Liu, Junyuan and Shi, Zhengyan and Lipani, Aldo}, booktitle={Proceedings of the 2nd Workshop on Natural Language Reasoning and Structured Explanations (@ ACL 2024)}, pages={46--55}, year={2024} } % Датасеты (audio -> text -> summary) @inproceedings{ami, title = {The AMI meeting corpus}, author = {Kraaij, Wessel and Hain, Thomas and Lincoln, Mike and Post, Wilfried}, booktitle = {Proc. International Conference on Methods and Techniques in Behavioral Research}, pages = {1--4}, year = {2005} } @inproceedings{icsi, title = {The ICSI meeting corpus}, author = {Janin, Adam and Baron, Don and Edwards, Jane and Ellis, Dan and Gelbart, David and Morgan, Nelson and Peskin, Barbara and Pfau, Thilo and Shriberg, Elizabeth and Stolcke, Andreas and others}, booktitle = {2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003. Proceedings.(ICASSP'03).}, volume = {1}, pages = {I--I}, year = {2003}, organization = {IEEE} } @inproceedings{meetingbank, title = {MeetingBank: A Benchmark Dataset for Meeting Summarization}, author = {Yebowen Hu and Tim Ganter and Hanieh Deilamsalehy and Franck Dernoncourt and Hassan Foroosh and Fei Liu}, booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (ACL)}, month = July, year = {2023}, address = {Toronto, Canada}, publisher = {Association for Computational Linguistics} } % Датасеты (text -> summary) @inproceedings{elitr-minuting-corpus, author = {Anna Nedoluzhko and Muskaan Singh and Marie Hled{\'{\i}}kov{\'{a}} and Tirthankar Ghosal and Ond{\v{r}}ej Bojar}, title = {{ELITR} {M}inuting {C}orpus: {A} Novel Dataset for Automatic Minuting from Multi-Party Meetings in {E}nglish and {C}zech}, booktitle = {Proceedings of the 13th International Conference on Language Resources and Evaluation (LREC-2022)}, year = 2022, address = {Marseille, France}, publisher = {European Language Resources Association (ELRA)}, note = {In print.} } % Датасеты на русском языке @misc{golos, title = {Golos: Russian Dataset for Speech Research}, author = {Nikolay Karpov and Alexander Denisenko and Fedor Minkin}, year = {2021}, eprint = {2106.10161}, archiveprefix = {arXiv}, primaryclass = {eess.AS}, url = {https://arxiv.org/abs/2106.10161}, urldate = {2026-01-10} } @online{open_stt, author = {{snakers4}}, title = {{snakers4/open\_stt}: Russian Open Speech To Text (STT/ASR) Dataset}, urldate = {2026-01-14}, url = {https://github.com/snakers4/open_stt}, media = {eresource}, organization = {GitHub} } @online{synthetic-speech-diarization-ru, author = {ivkond}, title = {synthetic-speech-diarization-ru}, year = {2025}, url = {https://huggingface.co/datasets/ivkond/synthetic-speech-diarization-ru}, urldate = {2026-01-03}, media = {eresource}, organization = {Hugging Face} }