Publications – ALT Website

Our contribution to research

2026

M. Bayan Kmainasi, M. Kutlu, A. Ezzat Shahroor, A. Hasnat, and F. Alam, “Can thinking models think to detect hateful memes?,” in Proceedings of the fourth international workshop on multimodal content analysis for social good, Dubai, United Arab Emirates, 2026.
[BibTeX] [Download PDF]

@inproceedings{bayan2026can,
title = {Can Thinking Models Think to Detect Hateful Memes?},
author = {Bayan Kmainasi, Mohamed and Kutlu, Mucahid and Ezzat Shahroor, Ali and Hasnat, Abul and Alam, Firoj},
booktitle = {Proceedings of the Fourth International Workshop on Multimodal Content Analysis for Social Good},
address = {Dubai, United Arab Emirates},
publisher = {Association for Computing Machinery},
year = {2026},
note = {Co-located with The Web Conference 2026},
url={https://arxiv.org/pdf/2603.01225},
}

F. Alam, F. Ahmad, A. E. Shahroor, M. B. Kmainasi, E. Sartori, G. D. S. Martino, A. Hasnat, and R. Ali, “CritiSense: critical digital literacy and resilience against misinformation,” Arxiv preprint arxiv:2603.16672, 2026.
[BibTeX] [Download PDF]

@article{alam2026critisense,
title = {{CritiSense}: Critical Digital Literacy and Resilience Against Misinformation},
author = {Firoj Alam and Fatema Ahmad and Ali Ezzat Shahroor and Mohamed Bayan Kmainasi and Elisa Sartori and Giovanni Da San Martino and Abul Hasnat and Raian Ali},
journal = {arXiv preprint arXiv:2603.16672},
year = {2026},
url = {https://arxiv.org/abs/2603.16672},
}

U. Abbas, M. Ouzzani, M. Y. Eltabakh, O. Sinan, G. Bhatia, H. Mubarak, M. Hawasly, M. Q. Hashim, K. Darwish, and F. Alam, “Fanar-Sadiq: a multi-agent architecture for grounded islamic qa,” in Proceedings of the 64th annual meeting of the association for computational linguistics, San Diego, California, United States, 2026.
[BibTeX] [Download PDF]

@inproceedings{abbas2026fanarsadiq,
title = {{Fanar-Sadiq}: A Multi-Agent Architecture for Grounded Islamic QA},
author = {Abbas, Ummar and Ouzzani, Mourad and Eltabakh, Mohamed Y. and Sinan, Omar and Bhatia, Gagan and Mubarak, Hamdy and Hawasly, Majd and Hashim, Mohammed Qusay and Darwish, Kareem and Alam, Firoj},
booktitle = {Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics},
address = {San Diego, California, United States},
publisher = {Association for Computational Linguistics},
year = {2026},
url = {https://arxiv.org/abs/2603.08501},
}

G. Bhatia, H. Mubarak, M. Hawasly, M. Jarrar, G. Mikros, F. Zaraket, M. Alhirthani, M. Al–Khatib, L. Cochrane, K. Darwish, R. Yahiaoui, and F. Alam, “Advances in ai systems on islamic knowledge capabilities: a critical survey,” , 2026. doi:10.36227/techrxiv.177155997.77147487/v1
[BibTeX] [Download PDF]

@article{Bhatia_2026,
title={Advances in AI Systems on Islamic Knowledge Capabilities: A Critical Survey},
publisher={Institute of Electrical and Electronics Engineers (IEEE)},
author={Bhatia, Gagan and Mubarak, Hamdy and Hawasly, Majd and Jarrar, Mustafa and Mikros, George and Zaraket, Fadi and Alhirthani, Mahmoud and Al-Khatib, Mutaz and Cochrane, Logan and Darwish, Kareem and Yahiaoui, Rashid and Alam, Firoj},
url={http://dx.doi.org/10.36227/techrxiv.177155997.77147487/v1},
DOI={10.36227/techrxiv.177155997.77147487/v1},
year={2026},
}

Z. S. Ali, H. H. Bhatti, R. N. Nandi, S. A. Chowdhury, and F. Alam, “MENASpeechBank: a reference voice bank with persona-conditioned multi-turn conversations for audiollms,” Arxiv preprint arxiv:2602.07036, 2026.
[BibTeX] [Download PDF]

@article{ali2026menaspeechbankreferencevoicebank,
title = {{MENASpeechBank}: A Reference Voice Bank with Persona-Conditioned Multi-Turn Conversations for AudioLLMs},
author = {Ali, Zien Sheikh and Bhatti, Hunzalah Hassan and Nandi, Rabindra Nath and Chowdhury, Shammur Absar and Alam, Firoj},
journal = {arXiv preprint arXiv:2602.07036},
year = {2026},
url = {https://arxiv.org/abs/2602.07036},
}

A. E. Shahroor, M. B. Kmainasi, A. Hasnat, D. Dimitrov, G. Da San Martino, P. Nakov, and F. Alam, “MemeLens: multilingual multitask vlms for memes.” 2026.
[BibTeX] [Download PDF]

@inproceedings{shahroor2026memelens,
title = {{MemeLens}: Multilingual Multitask VLMs for Memes},
author = {Shahroor, Ali Ezzat and Kmainasi, Mohamed Bayan and Hasnat, Abul and Dimitrov, Dimitar and Da San Martino, Giovanni and Nakov, Preslav and Alam, Firoj},
year = {2026},
url = {https://arxiv.org/abs/2601.12539},
}

H. H. Bhatti, F. Alam, and S. A. Chowdhury, “Multi-task instruction tuning via data scheduling for low-resource arabic audiollms,” Arxiv preprint arxiv:2601.12494, 2026.
[BibTeX] [Download PDF]

@article{bhatti2026harmonizingarabicaudiospace,
title = {Multi-Task Instruction Tuning via Data Scheduling for Low-Resource Arabic AudioLLMs},
author = {Bhatti, Hunzalah Hassan and Alam, Firoj and Chowdhury, Shammur Absar},
journal = {arXiv preprint arXiv:2601.12494},
year = {2026},
url = {https://arxiv.org/abs/2601.12494},
}

B. Mousi, F. Dalvi, S. Chowdhury, F. Alam, and N. Durrani, “Once correct, still wrong: counterfactual hallucination in multilingual vision-language models,” in Proceedings of the 64th annual meeting of the association for computational linguistics, San Diego, California, United States, 2026.
[BibTeX] [Download PDF]

@inproceedings{mousi2026counterfactualhallucination,
title = {Once Correct, Still Wrong: Counterfactual Hallucination in Multilingual Vision-Language Models},
author = {Mousi, Basel and Dalvi, Fahim and Chowdhury, Shammur and Alam, Firoj and Durrani, Nadir},
booktitle = {Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics},
address = {San Diego, California, United States},
publisher = {Association for Computational Linguistics},
year = {2026},
url = {https://arxiv.org/abs/2602.05437},
}

G. Bhatia, H. Mubarak, M. Jarrar, G. Mikros, F. Zaraket, M. Alhirthani, M. Al–Khatib, L. Cochrane, K. Darwish, R. Yahiaoui, and F. Alam, “From RAG to Agentic RAG for faithful islamic question answering,” in Proceedings of the 64th annual meeting of the association for computational linguistics, San Diego, California, United States, 2026.
[BibTeX] [Download PDF]

@inproceedings{bhatia2026agenticrag,
title = {From {RAG} to {Agentic RAG} for Faithful Islamic Question Answering},
author = {Bhatia, Gagan and Mubarak, Hamdy and Jarrar, Mustafa and Mikros, George and Zaraket, Fadi and Alhirthani, Mahmoud and Al-Khatib, Mutaz and Cochrane, Logan and Darwish, Kareem and Yahiaoui, Rashid and Alam, Firoj},
booktitle = {Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics},
address = {San Diego, California, United States},
publisher = {Association for Computational Linguistics},
year = {2026},
url = {https://doi.org/10.48550/arXiv.2601.07528},
}

H. H. Bhatti and F. Alam, “Beyond MCQ: an open-ended arabic cultural qa benchmark with dialect variants,” in The 15th edition of the language resources and evaluation conference (lrec 2026), 2026.
[BibTeX]

@inproceedings{bhatti2025beyond,
title={{Beyond MCQ}: An Open-Ended Arabic Cultural QA Benchmark with Dialect Variants},
author={Bhatti, Hunzalah Hassan and Alam, Firoj},
booktitle={The 15th edition of the Language Resources and Evaluation Conference (LREC 2026)},
year={2026},
}

2025

M. Hasanain, M. A. Hasan, M. B. Kmainasi, E. Sartori, A. E. Shahroor, G. Da San Martino, and F. Alam, “PropXplain: can LLMs enable explainable propaganda detection?,” in Findings of the association for computational linguistics: emnlp 2025, Suzhou, China, 2025, p. 23855–23863. doi:10.18653/v1/2025.findings-emnlp.1296
[BibTeX] [Download PDF]

@inproceedings{hasanain-etal-2025-propxplain,
title = "{P}rop{X}plain: Can {LLM}s Enable Explainable Propaganda Detection?",
author = "Hasanain, Maram and
Hasan, Md Arid and
Kmainasi, Mohamed Bayan and
Sartori, Elisa and
Shahroor, Ali Ezzat and
Da San Martino, Giovanni and
Alam, Firoj",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.1296/",
doi = "10.18653/v1/2025.findings-emnlp.1296",
pages = "23855--23863",
ISBN = "979-8-89176-335-7"
}

H. Hassan Bhatti, Y. Ahmed, M. Arid Hasan, and F. Alam, “CultranAI at PalmX 2025: data augmentation for cultural knowledge representation,” in Proceedings of the third arabic natural language processing conference: shared tasks, Suzhou, China, 2025, p. 809–817. doi:10.18653/v1/2025.arabicnlp-sharedtasks.111
[BibTeX] [Download PDF]

@inproceedings{hassan-bhatti-etal-2025-cultranai,
title = "{C}ultran{AI} at {P}alm{X} 2025: Data Augmentation for Cultural Knowledge Representation",
author = "Hassan Bhatti, Hunzalah and
Ahmed, Youssef and
Arid Hasan, Md and
Alam, Firoj",
editor = "Darwish, Kareem and
Ali, Ahmed and
Abu Farha, Ibrahim and
Touileb, Samia and
Zitouni, Imed and
Abdelali, Ahmed and
Al-Ghamdi, Sharefah and
Alkhereyf, Sakhar and
Zaghouani, Wajdi and
Khalifa, Salam and
AlKhamissi, Badr and
Almatham, Rawan and
Hamed, Injy and
Alyafeai, Zaid and
Alowisheq, Areeb and
Inoue, Go and
Mrini, Khalil and
Alshammari, Waad",
booktitle = "Proceedings of The Third Arabic Natural Language Processing Conference: Shared Tasks",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.arabicnlp-sharedtasks.111/",
doi = "10.18653/v1/2025.arabicnlp-sharedtasks.111",
pages = "809--817",
ISBN = "979-8-89176-356-2"
}

M. Rafiul Biswas, S. Ibrahim, K. Attia, F. Alam, and W. Zaghouani, “MarsadLab at PalmX shared task: an LLM benchmark for Arabic culture and islamic civilization,” in Proceedings of the third arabic natural language processing conference: shared tasks, Suzhou, China, 2025, p. 818–824. doi:10.18653/v1/2025.arabicnlp-sharedtasks.112
[BibTeX] [Download PDF]

@inproceedings{rafiul-biswas-etal-2025-marsadlab-palmx,
title = "{M}arsad{L}ab at {P}alm{X} Shared Task: An {LLM} Benchmark for {A}rabic Culture and Islamic Civilization",
author = "Rafiul Biswas, Md. and
Ibrahim, Shimaa and
Attia, Kais and
Alam, Firoj and
Zaghouani, Wajdi",
editor = "Darwish, Kareem and
Ali, Ahmed and
Abu Farha, Ibrahim and
Touileb, Samia and
Zitouni, Imed and
Abdelali, Ahmed and
Al-Ghamdi, Sharefah and
Alkhereyf, Sakhar and
Zaghouani, Wajdi and
Khalifa, Salam and
AlKhamissi, Badr and
Almatham, Rawan and
Hamed, Injy and
Alyafeai, Zaid and
Alowisheq, Areeb and
Inoue, Go and
Mrini, Khalil and
Alshammari, Waad",
booktitle = "Proceedings of The Third Arabic Natural Language Processing Conference: Shared Tasks",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.arabicnlp-sharedtasks.112/",
doi = "10.18653/v1/2025.arabicnlp-sharedtasks.112",
pages = "818--824",
ISBN = "979-8-89176-356-2"
}

J. Tajrin, B. Ballav Roy, and F. Alam, “AYA at PalmX 2025: modeling cultural and islamic knowledge in LLMs,” in Proceedings of the third arabic natural language processing conference: shared tasks, Suzhou, China, 2025, p. 830–836. doi:10.18653/v1/2025.arabicnlp-sharedtasks.114
[BibTeX] [Download PDF]

@inproceedings{tajrin-etal-2025-aya,
title = "{AYA} at {P}alm{X} 2025: Modeling Cultural and Islamic Knowledge in {LLM}s",
author = "Tajrin, Jannatul and
Ballav Roy, Bir and
Alam, Firoj",
editor = "Darwish, Kareem and
Ali, Ahmed and
Abu Farha, Ibrahim and
Touileb, Samia and
Zitouni, Imed and
Abdelali, Ahmed and
Al-Ghamdi, Sharefah and
Alkhereyf, Sakhar and
Zaghouani, Wajdi and
Khalifa, Salam and
AlKhamissi, Badr and
Almatham, Rawan and
Hamed, Injy and
Alyafeai, Zaid and
Alowisheq, Areeb and
Inoue, Go and
Mrini, Khalil and
Alshammari, Waad",
booktitle = "Proceedings of The Third Arabic Natural Language Processing Conference: Shared Tasks",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.arabicnlp-sharedtasks.114/",
doi = "10.18653/v1/2025.arabicnlp-sharedtasks.114",
pages = "830--836",
ISBN = "979-8-89176-356-2"
}

M. Rafiul Biswas, M. Bessghaier, F. Alam, and W. Zaghouani, “MarsadLab at AraGenEval shared task: LLM-based approaches to Arabic authorship style transfer and identification,” in Proceedings of the third arabic natural language processing conference: shared tasks, Suzhou, China, 2025, p. 88–93. doi:10.18653/v1/2025.arabicnlp-sharedtasks.15
[BibTeX] [Download PDF]

@inproceedings{rafiul-biswas-etal-2025-marsadlab,
title = "{M}arsad{L}ab at {A}ra{G}en{E}val Shared Task: {LLM}-Based Approaches to {A}rabic Authorship Style Transfer and Identification",
author = "Rafiul Biswas, Md. and
Bessghaier, Mabrouka and
Alam, Firoj and
Zaghouani, Wajdi",
editor = "Darwish, Kareem and
Ali, Ahmed and
Abu Farha, Ibrahim and
Touileb, Samia and
Zitouni, Imed and
Abdelali, Ahmed and
Al-Ghamdi, Sharefah and
Alkhereyf, Sakhar and
Zaghouani, Wajdi and
Khalifa, Salam and
AlKhamissi, Badr and
Almatham, Rawan and
Hamed, Injy and
Alyafeai, Zaid and
Alowisheq, Areeb and
Inoue, Go and
Mrini, Khalil and
Alshammari, Waad",
booktitle = "Proceedings of The Third Arabic Natural Language Processing Conference: Shared Tasks",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.arabicnlp-sharedtasks.15/",
doi = "10.18653/v1/2025.arabicnlp-sharedtasks.15",
pages = "88--93",
ISBN = "979-8-89176-356-2"
}

W. Zaghouani, M. R. Biswas, M. Bessghaier, S. Ibrahim, G. Mikros, A. Hasnat, and F. Alam, “MAHED shared task: multimodal detection of hope and hate emotions in Arabic content,” in Proceedings of the third arabic natural language processing conference: shared tasks, Suzhou, China, 2025, p. 560–574. doi:10.18653/v1/2025.arabicnlp-sharedtasks.75
[BibTeX] [Download PDF]

@inproceedings{zaghouani-etal-2025-mahed,
title = "{MAHED} Shared Task: Multimodal Detection of Hope and Hate Emotions in {A}rabic Content",
author = "Zaghouani, Wajdi and
Biswas, Md. Rafiul and
Bessghaier, Mabrouka and
Ibrahim, Shimaa and
Mikros, George and
Hasnat, Abul and
Alam, Firoj",
editor = "Darwish, Kareem and
Ali, Ahmed and
Abu Farha, Ibrahim and
Touileb, Samia and
Zitouni, Imed and
Abdelali, Ahmed and
Al-Ghamdi, Sharefah and
Alkhereyf, Sakhar and
Zaghouani, Wajdi and
Khalifa, Salam and
AlKhamissi, Badr and
Almatham, Rawan and
Hamed, Injy and
Alyafeai, Zaid and
Alowisheq, Areeb and
Inoue, Go and
Mrini, Khalil and
Alshammari, Waad",
booktitle = "Proceedings of The Third Arabic Natural Language Processing Conference: Shared Tasks",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.arabicnlp-sharedtasks.75/",
doi = "10.18653/v1/2025.arabicnlp-sharedtasks.75",
pages = "560--574",
ISBN = "979-8-89176-356-2"
}

R. N. Almatham, K. M. Darwish, R. Al–Rasheed, W. T. Alshammari, M. Alhoshan, A. Almazrua, A. Al Wazrah, M. Alheraki, F. Alam, P. Nakov, N. A. Alzahrani, E. Albilali, N. Habash, A. M. El–Sheikh, M. Elmallah, H. Mubarak, Z. Alyafeai, M. Anwar, H. Li, A. Abdelali, N. Altwairesh, M. Hasanain, A. Al–Thubaity, S. Shehata, B. Alhafni, I. Hamed, G. Inoue, K. N. Elmadani, O. Obeid, F. Haouari, T. Elsayed, E. A. Alghamdi, K. Almubarak, S. Alshahrani, O. Aljareh, S. Alajlan, A. Alshaqarawi, M. Alshihri, S. Alghurabi, A. Alzeghayer, A. Altamimi, A. Alfaifi, and A. M. Alosaimy, “BALSAM: a platform for benchmarking Arabic large language models,” in Proceedings of the third arabic natural language processing conference, Suzhou, China, 2025, p. 258–277. doi:10.18653/v1/2025.arabicnlp-main.21
[BibTeX] [Download PDF]

@inproceedings{almatham-etal-2025-balsam,
title = "{BALSAM}: A Platform for Benchmarking {A}rabic Large Language Models",
author = "Almatham, Rawan Nasser and
Darwish, Kareem Mohamed and
Al-Rasheed, Raghad and
Alshammari, Waad Thuwaini and
Alhoshan, Muneera and
Almazrua, Amal and
Al Wazrah, Asma and
Alheraki, Mais and
Alam, Firoj and
Nakov, Preslav and
Alzahrani, Norah A. and
Albilali, Eman and
Habash, Nizar and
El-Sheikh, Abdelrahman Mustafa and
Elmallah, Muhammad and
Mubarak, Hamdy and
Alyafeai, Zaid and
Anwar, Mohamed and
Li, Haonan and
Abdelali, Ahmed and
Altwairesh, Nora and
Hasanain, Maram and
Al-Thubaity, Abdulmohsen and
Shehata, Shady and
Alhafni, Bashar and
Hamed, Injy and
Inoue, Go and
Elmadani, Khalid N. and
Obeid, Ossama and
Haouari, Fatima and
Elsayed, Tamer and
Alghamdi, Emad A. and
Almubarak, Khalid and
Alshahrani, Saied and
Aljareh, Ola and
Alajlan, Safa and
Alshaqarawi, Areej and
Alshihri, Maryam and
Alghurabi, Sultana and
Alzeghayer, Atikah and
Altamimi, Afrah and
Alfaifi, Abdullah and
Alosaimy, Abdulrahman M",
editor = "Darwish, Kareem and
Ali, Ahmed and
Abu Farha, Ibrahim and
Touileb, Samia and
Zitouni, Imed and
Abdelali, Ahmed and
Al-Ghamdi, Sharefah and
Alkhereyf, Sakhar and
Zaghouani, Wajdi and
Khalifa, Salam and
AlKhamissi, Badr and
Almatham, Rawan and
Hamed, Injy and
Alyafeai, Zaid and
Alowisheq, Areeb and
Inoue, Go and
Mrini, Khalil and
Alshammari, Waad",
booktitle = "Proceedings of The Third Arabic Natural Language Processing Conference",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.arabicnlp-main.21/",
doi = "10.18653/v1/2025.arabicnlp-main.21",
pages = "258--277",
ISBN = "979-8-89176-352-4"
}

M. A. Hasan, M. Hasanain, F. Ahmad, S. R. Laskar, S. Upadhyay, V. N. Sukhadia, M. Kutlu, S. A. Chowdhury, and F. Alam, “NativQA: multilingual culturally-aligned natural query for LLMs,” in Findings of the association for computational linguistics: acl 2025, Vienna, Austria, 2025, p. 14886–14909. doi:10.18653/v1/2025.findings-acl.770
[BibTeX] [Download PDF]

@inproceedings{hasan-etal-2025-nativqa,
title = "{N}ativ{QA}: Multilingual Culturally-Aligned Natural Query for {LLM}s",
author = "Hasan, Md. Arid and
Hasanain, Maram and
Ahmad, Fatema and
Laskar, Sahinur Rahman and
Upadhyay, Sunaya and
Sukhadia, Vrunda N and
Kutlu, Mucahid and
Chowdhury, Shammur Absar and
Alam, Firoj",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.770/",
doi = "10.18653/v1/2025.findings-acl.770",
pages = "14886--14909",
ISBN = "979-8-89176-256-5"
}

M. J. I. Basher, M. Kowsher, M. S. Islam, R. N. Nandi, N. J. Prottasha, M. H. Menon, T. A. Muntasir, S. A. Chowdhury, F. Alam, N. Yousefi, and O. Garibay, “BnTTS: few-shot speaker adaptation in low-resource setting,” in Findings of the association for computational linguistics: naacl 2025, Albuquerque, New Mexico, 2025, p. 4956–4968.
[BibTeX] [Download PDF]

@inproceedings{basher-etal-2025-bntts,
title = "{B}n{TTS}: Few-Shot Speaker Adaptation in Low-Resource Setting",
author = "Basher, Mohammad Jahid Ibna and
Kowsher, Md and
Islam, Md Saiful and
Nandi, Rabindra Nath and
Prottasha, Nusrat Jahan and
Menon, Mehadi Hasan and
Muntasir, Tareq Al and
Chowdhury, Shammur Absar and
Alam, Firoj and
Yousefi, Niloofar and
Garibay, Ozlem",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.279/",
pages = "4956--4968",
ISBN = "979-8-89176-195-7",
}

Proceedings of the 1stworkshop on genai content detection (genaidetect)Abu Dhabi, UAE: International conference on computational linguistics, 2025.
[BibTeX] [Download PDF]

@proceedings{genaidetect-ws-2025-1,
title = "Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)",
editor = "Alam, Firoj and
Nakov, Preslav and
Habash, Nizar and
Gurevych, Iryna and
Chowdhury, Shammur and
Shelmanov, Artem and
Wang, Yuxia and
Artemova, Ekaterina and
Kutlu, Mucahid and
Mikros, George",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2025.genaidetect-1.0/"
}

L. Dugan, A. Zhu, F. Alam, P. Nakov, M. Apidianaki, and C. Callison–Burch, “GenAI content detection task 3: cross-domain machine generated text detection challenge,” in Proceedings of the 1stworkshop on genai content detection (genaidetect), Abu Dhabi, UAE, 2025, p. 377–388.
[BibTeX] [Download PDF]

@inproceedings{dugan-etal-2025-genai,
title = "{G}en{AI} Content Detection Task 3: Cross-Domain Machine Generated Text Detection Challenge",
author = "Dugan, Liam and
Zhu, Andrew and
Alam, Firoj and
Nakov, Preslav and
Apidianaki, Marianna and
Callison-Burch, Chris",
editor = "Alam, Firoj and
Nakov, Preslav and
Habash, Nizar and
Gurevych, Iryna and
Chowdhury, Shammur and
Shelmanov, Artem and
Wang, Yuxia and
Artemova, Ekaterina and
Kutlu, Mucahid and
Mikros, George",
booktitle = "Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2025.genaidetect-1.45/",
pages = "377--388",
}

S. A. Chowdhury, H. Almerekhi, M. Kutlu, K. E. Kele{c{s}}, F. Ahmad, T. Mohiuddin, G. Mikros, and F. Alam, “GenAI content detection task 2: AI vs. human – academic essay authenticity challenge,” in Proceedings of the 1stworkshop on genai content detection (genaidetect), Abu Dhabi, UAE, 2025, p. 323–333.
[BibTeX] [Download PDF]

@inproceedings{chowdhury-etal-2025-genai,
title = "{G}en{AI} Content Detection Task 2: {AI} vs. Human {--} Academic Essay Authenticity Challenge",
author = "Chowdhury, Shammur Absar and
Almerekhi, Hind and
Kutlu, Mucahid and
Kele{\c{s}}, Kaan Efe and
Ahmad, Fatema and
Mohiuddin, Tasnim and
Mikros, George and
Alam, Firoj",
editor = "Alam, Firoj and
Nakov, Preslav and
Habash, Nizar and
Gurevych, Iryna and
Chowdhury, Shammur and
Shelmanov, Artem and
Wang, Yuxia and
Artemova, Ekaterina and
Kutlu, Mucahid and
Mikros, George",
booktitle = "Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2025.genaidetect-1.37/",
pages = "323--333",
}

R. Suwaileh, M. Hasanain, F. Hubail, W. Zaghouani, and F. Alam, “ThatiAR: subjectivity detection in arabic news sentences,” in Proceedings of the international aaai conference on web and social media (icwsm 2025), Copenhagen, Denmark, 2025.
[BibTeX]

@inproceedings{ThatiAR2025,
title = {{ThatiAR}: Subjectivity Detection in Arabic News Sentences},
author = {Reem Suwaileh and Maram Hasanain and Fatema Hubail and Wajdi Zaghouani and Firoj Alam},
booktitle = {Proceedings of the International AAAI Conference on Web and Social Media (ICWSM 2025)},
year = {2025},
address = {Copenhagen, Denmark},
month = jun,
}

B. Mousi, N. Durrani, F. Ahmad, M. A. Hasan, M. Hasanain, T. Kabbani, F. Dalvi, S. A. Chowdhury, and F. Alam, “AraDiCE: benchmarks for dialectal and cultural capabilities in LLMs,” in Proceedings of the 31st international conference on computational linguistics, Abu Dhabi, UAE, 2025, p. 4186–4218.
[BibTeX] [Download PDF]

@inproceedings{mousi-etal-2025-aradice,
title = "{A}ra{D}i{CE}: Benchmarks for Dialectal and Cultural Capabilities in {LLM}s",
author = "Mousi, Basel and
Durrani, Nadir and
Ahmad, Fatema and
Hasan, Md. Arid and
Hasanain, Maram and
Kabbani, Tameem and
Dalvi, Fahim and
Chowdhury, Shammur Absar and
Alam, Firoj",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.283/",
pages = "4186--4218",
}

F. Alam, J. M. Struß, T. Chakraborty, S. Dietze, S. Hafid, K. Korre, A. Muti, P. Nakov, F. Ruggeri, S. Schellhammer, V. Setty, M. Sundriyal, K. Todorov, and V. V., “The clef-2025 checkthat! lab: subjectivity, fact-checking, claim normalization, and retrieval,” in Advances in information retrieval, Cham, 2025, p. 467–478.
[BibTeX]

@InProceedings{10.1007/978-3-031-88720-8_68,
author="Alam, Firoj
and Stru{\ss}, Julia Maria
and Chakraborty, Tanmoy
and Dietze, Stefan
and Hafid, Salim
and Korre, Katerina
and Muti, Arianna
and Nakov, Preslav
and Ruggeri, Federico
and Schellhammer, Sebastian
and Setty, Vinay
and Sundriyal, Megha
and Todorov, Konstantin
and V., Venktesh",
editor="Hauff, Claudia
and Macdonald, Craig
and Jannach, Dietmar
and Kazai, Gabriella
and Nardini, Franco Maria
and Pinelli, Fabio
and Silvestri, Fabrizio
and Tonellotto, Nicola",
title="The CLEF-2025 CheckThat! Lab: Subjectivity, Fact-Checking, Claim Normalization, and Retrieval",
booktitle="Advances in Information Retrieval",
year="2025",
publisher="Springer Nature Switzerland",
address="Cham",
pages="467--478",
isbn="978-3-031-88720-8",
}

M. B. Kmainasi, A. E. Shahroor, M. Hasanain, S. R. Laskar, N. Hassan, and F. Alam, “LlamaLens: specialized multilingual LLM for analyzing news and social media content,” in Findings of the association for computational linguistics: naacl 2025, Albuquerque, New Mexico, 2025, p. 5627–5649.
[BibTeX] [Download PDF]

@inproceedings{kmainasi-etal-2025-llamalens,
title = "{L}lama{L}ens: Specialized Multilingual {LLM} for Analyzing News and Social Media Content",
author = "Kmainasi, Mohamed Bayan and
Shahroor, Ali Ezzat and
Hasanain, Maram and
Laskar, Sahinur Rahman and
Hassan, Naeemul and
Alam, Firoj",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.313/",
pages = "5627--5649",
ISBN = "979-8-89176-195-7",
}

F. Alam, A. E. Shahroor, M. A. Hasan, Z. S. Ali, H. H. Bhatti, M. B. Kmainasi, S. A. Chowdhury, B. Mousi, F. Dalvi, N. Durrani, and N. Milic–Frayling, “EverydayMMQA: a multilingual and multimodal framework for culturally grounded spoken visual qa,” Arxiv preprint arxiv:2510.06371, 2025.
[BibTeX]

@article{alam2025everydaymmqa,
title={{EverydayMMQA}: A Multilingual and Multimodal Framework for Culturally Grounded Spoken Visual QA},
author= {Alam, Firoj and Shahroor, Ali Ezzat and Hasan, Md. Arid and Ali, Zien Sheikh and Bhatti, Hunzalah Hassan and Kmainasi, Mohamed Bayan and Chowdhury, Shammur Absar and Mousi, Basel and Dalvi, Fahim and Durrani, Nadir and Milic-Frayling, Natasa},
journal={arXiv preprint arXiv:2510.06371},
year={2025}
}

W. Magdy, H. Mubarak, and J. Salminen, “Who should set the standards? analysing censored arabic content on facebook during the palestine-israel conflict,” in Proceedings of the 2025 chi conference on human factors in computing systems, New York, NY, USA, 2025. doi:10.1145/3706598.3713150
[BibTeX] [Abstract] [Download PDF]

Nascent research on human-computer interaction concerns itself with fairness of content moderation systems. Designing globally applicable content moderation systems requires considering historical, cultural, and socio-technical factors. Inspired by this line of work, we investigate Arab users’ perception of Facebook’s moderation practices. We collect a set of 448 deleted Arabic posts, and we ask Arab annotators to evaluate these posts based on (a) Facebook Community Standards (FBCS) and (b) their personal opinion. Each post was judged by 10 annotators to account for subjectivity. Our analysis shows a clear gap between the Arabs’ understanding of the FBCS and how Facebook implements these standards. The study highlights a need for discussion on the moderation guidelines on social media platforms about who decides the moderation guidelines, how these guidelines are interpreted, and how well they represent the views of marginalised user communities.

@inproceedings{10.1145/3706598.3713150,
author = {Magdy, Walid and Mubarak, Hamdy and Salminen, Joni},
title = {Who should set the Standards? Analysing Censored Arabic Content on Facebook during the Palestine-Israel Conflict},
year = {2025},
isbn = {9798400713941},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3706598.3713150},
doi = {10.1145/3706598.3713150},
abstract = {Nascent research on human-computer interaction concerns itself with fairness of content moderation systems. Designing globally applicable content moderation systems requires considering historical, cultural, and socio-technical factors. Inspired by this line of work, we investigate Arab users’ perception of Facebook’s moderation practices. We collect a set of 448 deleted Arabic posts, and we ask Arab annotators to evaluate these posts based on (a) Facebook Community Standards (FBCS) and (b) their personal opinion. Each post was judged by 10 annotators to account for subjectivity. Our analysis shows a clear gap between the Arabs’ understanding of the FBCS and how Facebook implements these standards. The study highlights a need for discussion on the moderation guidelines on social media platforms about who decides the moderation guidelines, how these guidelines are interpreted, and how well they represent the views of marginalised user communities.},
booktitle = {Proceedings of the 2025 CHI Conference on Human Factors in Computing Systems},
articleno = {178},
numpages = {16},
keywords = {Censorship, Content Moderation, Free Speech, Facebook, Social Media, Palestine Israel Conflict},
location = {
},
series = {CHI '25}
}

F. Alam, M. A. Hasan, and S. A. Chowdhury, “Spokennativqa: multilingual everyday spoken queries for llms,” in Proc. of the 26th Annual Conference of the International Speech Communication Association (INTERSPEECH), 2025.
[BibTeX]

@inproceedings{Alam2025SpokenNativQA,
title={SpokenNativQA: Multilingual Everyday Spoken Queries for LLMs},
author={Alam, Firoj and Hasan, Md Arid and Chowdhury, Shammur Absar},
booktitle = {{Proc. of the 26th Annual Conference of the International Speech Communication Association (INTERSPEECH)}},
year={2025}
}

A. Ersoy, B. A. Mousi, S. A. Chowdhury, F. Alam, F. I. Dalvi, and N. Durrani, “From words to waves: analyzing concept formation in speech and text-based foundation models,” in Proc. of the 26th Annual Conference of the International Speech Communication Association (INTERSPEECH), 2025.
[BibTeX]

@inproceedings{Ersoy2025from,
title={From Words to Waves: Analyzing Concept Formation in Speech and Text-Based Foundation Models},
author={ Ersoy, Asim and Mousi, Basel Ahmad and Chowdhury, Shammur Absar and Alam, Firoj and Dalvi, Fahim I and Durrani, Nadir},
booktitle = {{Proc. of the 26th Annual Conference of the International Speech Communication Association (INTERSPEECH)}},
year={2025}
}

F. Alam, J. M. Struß, T. Chakraborty, S. Dietze, S. Hafid, K. Korre, A. Muti, P. Nakov, F. Ruggeri, S. Schellhammer, and others, “The clef-2025 checkthat! lab: subjectivity, fact-checking, claim normalization, and retrieval,” in European conference on information retrieval, 2025, p. 467–478.
[BibTeX]

@inproceedings{alam2025clef,
title={The CLEF-2025 CheckThat! Lab: Subjectivity, Fact-Checking, Claim Normalization, and Retrieval},
author={Alam, Firoj and Stru{\ss}, Julia Maria and Chakraborty, Tanmoy and Dietze, Stefan and Hafid, Salim and Korre, Katerina and Muti, Arianna and Nakov, Preslav and Ruggeri, Federico and Schellhammer, Sebastian and others},
booktitle={European Conference on Information Retrieval},
pages={467--478},
year={2025},
organization={Springer}
}

A. Abouzied, F. Alam, R. Ali, and P. Papotti, “Combating misinformation in the arab world: challenges and opportunities,” Communications of the acm, vol. 68, iss. 10, p. 48–53, 2025.
[BibTeX]

@article{abouzied2025combating,
title={Combating Misinformation in the Arab World: Challenges and Opportunities},
author={Abouzied, Azza and Alam, Firoj and Ali, Raian and Papotti, Paolo},
journal={Communications of the ACM},
volume={68},
number={10},
pages={48--53},
year={2025},
publisher={ACM New York, NY, USA}
}

D. Bassi, D. I. Dimitrov, B. D{‘}Auria, F. Alam, M. Hasanain, C. Moro, L. Orrù, G. P. Turchi, P. Nakov, and G. Da San Martino, “Annotating the annotators: analysis, insights and modelling from an annotation campaign on persuasion techniques detection,” in Findings of the association for computational linguistics: acl 2025, Vienna, Austria, 2025, p. 17918–17929. doi:10.18653/v1/2025.findings-acl.922
[BibTeX] [Download PDF]

@inproceedings{bassi-etal-2025-annotating,
title = "Annotating the Annotators: Analysis, Insights and Modelling from an Annotation Campaign on Persuasion Techniques Detection",
author = "Bassi, Davide and
Dimitrov, Dimitar Iliyanov and
D{'}Auria, Bernardo and
Alam, Firoj and
Hasanain, Maram and
Moro, Christian and
Orr{\`u}, Luisa and
Turchi, Gian Piero and
Nakov, Preslav and
Da San Martino, Giovanni",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.922/",
doi = "10.18653/v1/2025.findings-acl.922",
pages = "17918--17929",
ISBN = "979-8-89176-256-5"
}

M. B. Kmainasi, A. Hasnat, M. A. Hasan, A. E. Shahroor, and F. Alam, “MemeIntel: explainable detection of propagandistic and hateful memes,” in Proceedings of the 2025 conference on empirical methods in natural language processing, Suzhou, China, 2025, p. 30251–30267. doi:10.18653/v1/2025.emnlp-main.1539
[BibTeX] [Download PDF]

@inproceedings{kmainasi-etal-2025-memeintel,
title = "{M}eme{I}ntel: Explainable Detection of Propagandistic and Hateful Memes",
author = "Kmainasi, Mohamed Bayan and
Hasnat, Abul and
Hasan, Md Arid and
Shahroor, Ali Ezzat and
Alam, Firoj",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.1539/",
doi = "10.18653/v1/2025.emnlp-main.1539",
pages = "30251--30267",
ISBN = "979-8-89176-332-6"
}

S. Al–Khalifa, N. Durrani, H. Al–Khalifa, and F. Alam, “The landscape of arabic large language models,” Communications of the acm, vol. 68, iss. 10, p. 54–61, 2025.
[BibTeX]

@article{al2025landscape,
title={The Landscape of Arabic Large Language Models},
author={Al-Khalifa, Shahad and Durrani, Nadir and Al-Khalifa, Hend and Alam, Firoj},
journal={Communications of the ACM},
volume={68},
number={10},
pages={54--61},
year={2025},
publisher={ACM New York, NY, USA}
}

F. Alam, J. M. Struß, T. Chakraborty, S. Dietze, S. Hafid, K. Korre, A. Muti, P. Nakov, F. Ruggeri, S. Schellhammer, and others, “Overview of the clef-2025 checkthat! lab: subjectivity, fact-checking, claim normalization, and retrieval,” in International conference of the cross-language evaluation forum for european languages, 2025, p. 199–223.
[BibTeX]

@inproceedings{alam2025overview,
title={Overview of the CLEF-2025 CheckThat! Lab: Subjectivity, fact-checking, claim normalization, and retrieval},
author={Alam, Firoj and Stru{\ss}, Julia Maria and Chakraborty, Tanmoy and Dietze, Stefan and Hafid, Salim and Korre, Katerina and Muti, Arianna and Nakov, Preslav and Ruggeri, Federico and Schellhammer, Sebastian and others},
booktitle={International Conference of the Cross-Language Evaluation Forum for European Languages},
pages={199--223},
year={2025},
organization={Springer}
}

2024

M. SadraeiJavaeri, E. Asgari, A. C. McHardy, and H. R. Rabiee, “Superpos-prompt: enhancing soft prompt tuning of language models with superposition of multi token embeddings,” in Neurips 2024 workshop on efficient natural language and speech processing, Vancouver, Canada, 2024.
[BibTeX]

@inproceedings{sadraeijavaeri2024superpos,
title={SuperPos-Prompt: Enhancing Soft Prompt Tuning of Language Models with Superposition of Multi Token Embeddings},
author={SadraeiJavaeri, MohammadAli and Asgari, Ehsaneddin and McHardy, Alice Carolyn and Rabiee, Hamid Reza},
booktitle = "NeurIPS 2024 Workshop on Efficient Natural Language and Speech Processing",
series = {NeurIPS~'24},
month = "dec",
year = "2024",
address = "Vancouver, Canada",
}

X. Yu, F. Dalvi, N. Durrani, M. Nouri, and H. Sajjad, “Latent concept-based explanation of NLP models,” in Proceedings of the 2024 conference on empirical methods in natural language processing, Miami, Florida, USA, 2024, p. 12435–12459. doi:10.18653/v1/2024.emnlp-main.692
[BibTeX] [Abstract] [Download PDF]

Interpreting and understanding the predictions made by deep learning models poses a formidable challenge due to their inherently opaque nature. Many previous efforts aimed at explaining these predictions rely on input features, specifically, the words within NLP models. However, such explanations are often less informative due to the discrete nature of these words and their lack of contextual verbosity. To address this limitation, we introduce the Latent Concept Attribution method (LACOAT), which generates explanations for predictions based on latent concepts. Our foundational intuition is that a word can exhibit multiple facets, contingent upon the context in which it is used. Therefore, given a word in context, the latent space derived from our training process reflects a specific facet of that word. LACOAT functions by mapping the representations of salient input words into the training latent space, allowing it to provide latent context-based explanations of the prediction.

@inproceedings{yu-etal-2024-latent,
title = "Latent Concept-based Explanation of {NLP} Models",
author = "Yu, Xuemin and
Dalvi, Fahim and
Durrani, Nadir and
Nouri, Marzia and
Sajjad, Hassan",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.692/",
doi = "10.18653/v1/2024.emnlp-main.692",
pages = "12435--12459",
abstract = "Interpreting and understanding the predictions made by deep learning models poses a formidable challenge due to their inherently opaque nature. Many previous efforts aimed at explaining these predictions rely on input features, specifically, the words within NLP models. However, such explanations are often less informative due to the discrete nature of these words and their lack of contextual verbosity. To address this limitation, we introduce the Latent Concept Attribution method (LACOAT), which generates explanations for predictions based on latent concepts. Our foundational intuition is that a word can exhibit multiple facets, contingent upon the context in which it is used. Therefore, given a word in context, the latent space derived from our training process reflects a specific facet of that word. LACOAT functions by mapping the representations of salient input words into the training latent space, allowing it to provide latent context-based explanations of the prediction."
}

M. Hasanain, F. Ahmad, and F. Alam, “Large language models for propaganda span annotation,” in Findings of the association for computational linguistics: emnlp 2024, Miami, Florida, USA, 2024, p. 14522–14532. doi:10.18653/v1/2024.findings-emnlp.850
[BibTeX] [Abstract] [Download PDF]

The use of propagandistic techniques in online content has increased in recent years aiming to manipulate online audiences. Fine-grained propaganda detection and extraction of textual spans where propaganda techniques are used, are essential for more informed content consumption. Automatic systems targeting the task over lower resourced languages are limited, usually obstructed by lack of large scale training datasets. Our study investigates whether Large Language Models (LLMs), such as GPT-4, can effectively extract propagandistic spans. We further study the potential of employing the model to collect more cost-effective annotations. Finally, we examine the effectiveness of labels provided by GPT-4 in training smaller language models for the task. The experiments are performed over a large-scale in-house manually annotated dataset. The results suggest that providing more annotation context to GPT-4 within prompts improves its performance compared to human annotators. Moreover, when serving as an expert annotator (consolidator), the model provides labels that have higher agreement with expert annotators, and lead to specialized models that achieve state-of-the-art over an unseen Arabic testing set. Finally, our work is the first to show the potential of utilizing LLMs to develop annotated datasets for propagandistic spans detection task prompting it with annotations from human annotators with limited expertise. All scripts and annotations will be shared with the community.

@inproceedings{hasanain-etal-2024-large,
title = "Large Language Models for Propaganda Span Annotation",
author = "Hasanain, Maram and
Ahmad, Fatema and
Alam, Firoj",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.850/",
doi = "10.18653/v1/2024.findings-emnlp.850",
pages = "14522--14532",
abstract = "The use of propagandistic techniques in online content has increased in recent years aiming to manipulate online audiences. Fine-grained propaganda detection and extraction of textual spans where propaganda techniques are used, are essential for more informed content consumption. Automatic systems targeting the task over lower resourced languages are limited, usually obstructed by lack of large scale training datasets. Our study investigates whether Large Language Models (LLMs), such as GPT-4, can effectively extract propagandistic spans. We further study the potential of employing the model to collect more cost-effective annotations. Finally, we examine the effectiveness of labels provided by GPT-4 in training smaller language models for the task. The experiments are performed over a large-scale in-house manually annotated dataset. The results suggest that providing more annotation context to GPT-4 within prompts improves its performance compared to human annotators. Moreover, when serving as an expert annotator (consolidator), the model provides labels that have higher agreement with expert annotators, and lead to specialized models that achieve state-of-the-art over an unseen Arabic testing set. Finally, our work is the first to show the potential of utilizing LLMs to develop annotated datasets for propagandistic spans detection task prompting it with annotations from human annotators with limited expertise. All scripts and annotations will be shared with the community."
}

@inproceedings{hasanain-etal-2024-large,
title = "Large Language Models for Propaganda Span Annotation",
author = "Hasanain, Maram and
Ahmad, Fatema and
Alam, Firoj",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.850/",
doi = "10.18653/v1/2024.findings-emnlp.850",
pages = "14522--14532"
}

P. S. Zahraei and E. Asgari, “Turingq: benchmarking ai comprehension in theory of computation,” in The 2024 conference on empirical methods in natural language processing, 2024.
[BibTeX]

@inproceedings{zahraei2024turingq,
title={TuringQ: Benchmarking AI Comprehension in Theory of Computation},
author={Zahraei, Pardis Sadat and Asgari, Ehsaneddin},
booktitle = "The 2024 Conference on Empirical Methods in Natural Language Processing",
series={EMNLP~'24},
month = "nov",
year = "2024",
publisher = "Association for Computational Linguistics",
}

F. Alam, A. Hasnat, F. Ahmad, M. A. Hasan, and M. Hasanain, “ArMeme: propagandistic content in Arabic memes,” in Proceedings of the 2024 conference on empirical methods in natural language processing, Miami, Florida, USA, 2024, p. 21071–21090. doi:10.18653/v1/2024.emnlp-main.1173
[BibTeX] [Download PDF]

@inproceedings{alam-etal-2024-armeme,
title = "{A}r{M}eme: Propagandistic Content in {A}rabic Memes",
author = "Alam, Firoj and
Hasnat, Abul and
Ahmad, Fatema and
Hasan, Md. Arid and
Hasanain, Maram",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.1173/",
doi = "10.18653/v1/2024.emnlp-main.1173",
pages = "21071--21090"
}

O. Ghahroodi, M. Nouri, M. V. Sanian, A. Sahebi, D. Dastgheib, E. Asgari, M. S. Baghshah, and M. H. Rohban, “Khayyam challenge (persianmmlu): is your llm truly wise to the persian language?,” in Proceedings of the conference on language modeling (colm) 2024, Philadelphia, PA, 2024.
[BibTeX]

@inproceedings{ghahroodi2024khayyam,
title={Khayyam Challenge (PersianMMLU): Is Your LLM Truly Wise to The Persian Language?},
author={Ghahroodi, Omid and Nouri, Marzia and Sanian, Mohammad Vali and Sahebi, Alireza and Dastgheib, Doratossadat and Asgari, Ehsaneddin and Baghshah, Mahdieh Soleymani and Rohban, Mohammad Hossein},
booktitle = "Proceedings of the Conference on Language Modeling (COLM) 2024",
series = {COLM~'24},
month = {October},
year = "2024",
publisher = "Conference on Language Modeling",
address = "Philadelphia, PA"
}

M. A. M. Khan, S. M. Bari, D. Long, W. Wang, M. R. Parvez, and S. Joty, “XCodeEval: an execution-based large scale multilingual multitask benchmark for code understanding, generation, translation and retrieval,” in Proceedings of the 62nd annual meeting of the association for computational linguistics (volume 1: long papers), Bangkok, Thailand, 2024, p. 6766–6805. doi:10.18653/v1/2024.acl-long.367
[BibTeX] [Abstract] [Download PDF]

Recently, pre-trained large language models (LLMs) have shown impressive abilities in generating codes from natural language descriptions, repairing buggy codes, translating codes between languages, and retrieving relevant code segments. However, the evaluation of these models has often been performed in a scattered way on only one or two specific tasks, in a few languages, at a partial granularity (e.g., function) level, and in many cases without proper training data. Even more concerning is that in most cases the evaluation of generated codes has been done in terms of mere lexical overlap with a reference code rather than actual execution. We introduce *xCodeEval*, the largest executable multilingual multitask benchmark to date consisting of 25 M document-level coding examples (16.5 B tokens) from about 7.5 K unique problems covering up to 11 programming languages with execution-level parallelism. It features a total of 7 tasks involving code understanding, generation, translation and retrieval. *xCodeEval* adopts an execution-based evaluation and offers a multilingual code execution engine, *ExecEval* that supports unit test based execution in all the 11 languages. To address the challenge of balancing the distributions of text-code samples over multiple attributes in validation/test sets, we propose a novel data splitting and a data selection schema based on the geometric mean and graph-theoretic principle. Our experiments with OpenAI{‘}s LLMs (zero-shot) and open-LLMs (zero-shot and fine-tuned) on the tasks and languages demonstrate to be quite challenging as per the current advancements in language models.

@inproceedings{khan-etal-2024-xcodeeval,
title = "{XC}ode{E}val: An Execution-based Large Scale Multilingual Multitask Benchmark for Code Understanding, Generation, Translation and Retrieval",
author = "Khan, Mohammad Abdullah Matin and
Bari, M Saiful and
Long, Do and
Wang, Weishi and
Parvez, Md Rizwan and
Joty, Shafiq",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.acl-long.367",
doi = "10.18653/v1/2024.acl-long.367",
pages = "6766--6805",
abstract = "Recently, pre-trained large language models (LLMs) have shown impressive abilities in generating codes from natural language descriptions, repairing buggy codes, translating codes between languages, and retrieving relevant code segments. However, the evaluation of these models has often been performed in a scattered way on only one or two specific tasks, in a few languages, at a partial granularity (e.g., function) level, and in many cases without proper training data. Even more concerning is that in most cases the evaluation of generated codes has been done in terms of mere lexical overlap with a reference code rather than actual execution. We introduce *xCodeEval*, the largest executable multilingual multitask benchmark to date consisting of 25 M document-level coding examples (16.5 B tokens) from about 7.5 K unique problems covering up to 11 programming languages with execution-level parallelism. It features a total of 7 tasks involving code understanding, generation, translation and retrieval. *xCodeEval* adopts an execution-based evaluation and offers a multilingual code execution engine, *ExecEval* that supports unit test based execution in all the 11 languages. To address the challenge of balancing the distributions of text-code samples over multiple attributes in validation/test sets, we propose a novel data splitting and a data selection schema based on the geometric mean and graph-theoretic principle. Our experiments with OpenAI{'}s LLMs (zero-shot) and open-LLMs (zero-shot and fine-tuned) on the tasks and languages demonstrate to be quite challenging as per the current advancements in language models.",
}

B. Mousi, N. Durrani, F. Dalvi, M. Hawasly, and A. Abdelali, “Exploring alignment in shared cross-lingual spaces,” in Proceedings of the 62nd annual meeting of the association for computational linguistics (volume 1: long papers), Bangkok, Thailand, 2024, p. 6326–6348.
[BibTeX] [Download PDF]

@inproceedings{mousi-etal-2024-exploring,title = {Exploring Alignment in Shared Cross-lingual Spaces},author = {Mousi, Basel and Durrani, Nadir and Dalvi, Fahim and Hawasly, Majd and Abdelali, Ahmed},editor = {Ku, Lun-Wei and Martins, Andre and Srikumar, Vivek},booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},month = aug,year = {2024},address = {Bangkok, Thailand},publisher = {Association for Computational Linguistics},url = {https://aclanthology.org/2024.acl-long.344},pages = {6326--6348},area = {Latent Concepts}}

E. Fadeeva, A. Rubashevskii, A. Shelmanov, S. Petrakov, H. Li, H. Mubarak, E. Tsymbalov, G. Kuzmin, A. Panchenko, T. Baldwin, P. Nakov, and M. Panov, “Fact-checking the output of large language models via token-level uncertainty quantification,” in Findings of the association for computational linguistics: acl 2024, Bangkok, Thailand, 2024, p. 9367–9385. doi:10.18653/v1/2024.findings-acl.558
[BibTeX] [Abstract] [Download PDF]

Large language models (LLMs) are notorious for hallucinating, i.e., producing erroneous claims in their output. Such hallucinations can be dangerous, as occasional factual inaccuracies in the generated text might be obscured by the rest of the output being generally factually correct, making it extremely hard for the users to spot them. Current services that leverage LLMs usually do not provide any means for detecting unreliable generations. Here, we aim to bridge this gap. In particular, we propose a novel fact-checking and hallucination detection pipeline based on token-level uncertainty quantification. Uncertainty scores leverage information encapsulated in the output of a neural network or its layers to detect unreliable predictions, and we show that they can be used to fact-check the atomic claims in the LLM output. Moreover, we present a novel token-level uncertainty quantification method that removes the impact of uncertainty about what claim to generate on the current step and what surface form to use. Our method Claim Conditioned Probability (CCP) measures only the uncertainty of a particular claim value expressed by the model. Experiments on the task of biography generation demonstrate strong improvements for CCP compared to the baselines for seven different LLMs and four languages. Human evaluation reveals that the fact-checking pipeline based on uncertainty quantification is competitive with a fact-checking tool that leverages external knowledge.

@inproceedings{fadeeva-etal-2024-fact,
title = "Fact-Checking the Output of Large Language Models via Token-Level Uncertainty Quantification",
author = "Fadeeva, Ekaterina and
Rubashevskii, Aleksandr and
Shelmanov, Artem and
Petrakov, Sergey and
Li, Haonan and
Mubarak, Hamdy and
Tsymbalov, Evgenii and
Kuzmin, Gleb and
Panchenko, Alexander and
Baldwin, Timothy and
Nakov, Preslav and
Panov, Maxim",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.558/",
doi = "10.18653/v1/2024.findings-acl.558",
pages = "9367--9385",
abstract = "Large language models (LLMs) are notorious for hallucinating, i.e., producing erroneous claims in their output. Such hallucinations can be dangerous, as occasional factual inaccuracies in the generated text might be obscured by the rest of the output being generally factually correct, making it extremely hard for the users to spot them. Current services that leverage LLMs usually do not provide any means for detecting unreliable generations. Here, we aim to bridge this gap. In particular, we propose a novel fact-checking and hallucination detection pipeline based on token-level uncertainty quantification. Uncertainty scores leverage information encapsulated in the output of a neural network or its layers to detect unreliable predictions, and we show that they can be used to fact-check the atomic claims in the LLM output. Moreover, we present a novel token-level uncertainty quantification method that removes the impact of uncertainty about what claim to generate on the current step and what surface form to use. Our method Claim Conditioned Probability (CCP) measures only the uncertainty of a particular claim value expressed by the model. Experiments on the task of biography generation demonstrate strong improvements for CCP compared to the baselines for seven different LLMs and four languages. Human evaluation reveals that the fact-checking pipeline based on uncertainty quantification is competitive with a fact-checking tool that leverages external knowledge."
}

A. Masry, M. Shahmohammadi, M. R. Parvez, E. Hoque, and S. Joty, “ChartInstruct: instruction tuning for chart comprehension and reasoning,” in Findings of the association for computational linguistics acl 2024, Bangkok, Thailand and virtual meeting, 2024, p. 10387–10409. doi:10.18653/v1/2024.findings-acl.619
[BibTeX] [Abstract] [Download PDF]

Charts provide visual representations of data and are widely used for analyzing information, addressing queries, and conveying insights to others. Various chart-related downstream tasks have emerged recently, such as question-answering and summarization. A common strategy to solve these tasks is to fine-tune various models originally trained on vision tasks language. However, such task-specific models are not capable of solving a wide range of chart-related tasks, constraining their real-world applicability. To overcome these challenges, we introduce ChartInsruct: a novel chart-specific vision-language Instruction-following dataset comprising 191K instructions generated with 71K charts. We then present two distinct systems for instruction tuning on such datasets: (1) an end-to-end model that connects a vision encoder for chart understanding with a LLM; and (2) a pipeline model that employs a two-step approach to extract chart data tables and input them into the LLM. In experiments on four downstream tasks, we first show the effectiveness of our model{–}achieving a new set of state-of-the-art results. Further evaluation shows that our instruction-tuning approach supports a wide array of real-world chart comprehension and reasoning scenarios, thereby expanding the scope and applicability of our models to new kinds of tasks.

@inproceedings{masry-etal-2024-chartinstruct,
title = "{C}hart{I}nstruct: Instruction Tuning for Chart Comprehension and Reasoning",
author = "Masry, Ahmed and
Shahmohammadi, Mehrad and
Parvez, Md Rizwan and
Hoque, Enamul and
Joty, Shafiq",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand and virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.619",
doi = "10.18653/v1/2024.findings-acl.619",
pages = "10387--10409",
abstract = "Charts provide visual representations of data and are widely used for analyzing information, addressing queries, and conveying insights to others. Various chart-related downstream tasks have emerged recently, such as question-answering and summarization. A common strategy to solve these tasks is to fine-tune various models originally trained on vision tasks language. However, such task-specific models are not capable of solving a wide range of chart-related tasks, constraining their real-world applicability. To overcome these challenges, we introduce ChartInsruct: a novel chart-specific vision-language Instruction-following dataset comprising 191K instructions generated with 71K charts. We then present two distinct systems for instruction tuning on such datasets: (1) an end-to-end model that connects a vision encoder for chart understanding with a LLM; and (2) a pipeline model that employs a two-step approach to extract chart data tables and input them into the LLM. In experiments on four downstream tasks, we first show the effectiveness of our model{--}achieving a new set of state-of-the-art results. Further evaluation shows that our instruction-tuning approach supports a wide array of real-world chart comprehension and reasoning scenarios, thereby expanding the scope and applicability of our models to new kinds of tasks.",
}

M. A. Islam, M. E. Ali, and M. R. Parvez, “MapCoder: multi-agent code generation for competitive problem solving,” in Proceedings of the 62nd annual meeting of the association for computational linguistics (volume 1: long papers), Bangkok, Thailand, 2024, p. 4912–4944. doi:10.18653/v1/2024.acl-long.269
[BibTeX] [Abstract] [Download PDF]

Code synthesis, which requires a deep understanding of complex natural language (NL) problem descriptions, generation of code instructions for complex algorithms and data structures, and the successful execution of comprehensive unit tests, presents a significant challenge. Thus, while large language models (LLMs) demonstrate impressive proficiency in natural language processing (NLP), their performance in code generation tasks remains limited. In this paper, we introduce a new approach to code generation tasks leveraging the multi-agent prompting that uniquely replicates the full cycle of program synthesis as observed in human developers. Our framework, MapCoder, consists of four LLM agents specifically designed to emulate the stages of this cycle: recalling relevant examples, planning, code generation, and debugging. After conducting thorough experiments, with multiple LLMs ablations and analyses across eight challenging competitive problem-solving and program synthesis benchmarks{–-}MapCoder showcases remarkable code generation capabilities, achieving their new state-of-the-art (pass@1) results{–-}(HumanEval 93.9{\%}, MBPP 83.1{\%}, APPS 22.0{\%}, CodeContests 28.5{\%}, and xCodeEval 45.3{\%}). Moreover, our method consistently delivers superior performance across various programming languages and varying problem difficulties. We open-source our framework at https://github.com/Md-Ashraful-Pramanik/MapCoder.

@inproceedings{islam-etal-2024-mapcoder,
title = "{M}ap{C}oder: Multi-Agent Code Generation for Competitive Problem Solving",
author = "Islam, Md. Ashraful and
Ali, Mohammed Eunus and
Parvez, Md Rizwan",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.acl-long.269",
doi = "10.18653/v1/2024.acl-long.269",
pages = "4912--4944",
abstract = "Code synthesis, which requires a deep understanding of complex natural language (NL) problem descriptions, generation of code instructions for complex algorithms and data structures, and the successful execution of comprehensive unit tests, presents a significant challenge. Thus, while large language models (LLMs) demonstrate impressive proficiency in natural language processing (NLP), their performance in code generation tasks remains limited. In this paper, we introduce a new approach to code generation tasks leveraging the multi-agent prompting that uniquely replicates the full cycle of program synthesis as observed in human developers. Our framework, MapCoder, consists of four LLM agents specifically designed to emulate the stages of this cycle: recalling relevant examples, planning, code generation, and debugging. After conducting thorough experiments, with multiple LLMs ablations and analyses across eight challenging competitive problem-solving and program synthesis benchmarks{---}MapCoder showcases remarkable code generation capabilities, achieving their new state-of-the-art (pass@1) results{---}(HumanEval 93.9{\%}, MBPP 83.1{\%}, APPS 22.0{\%}, CodeContests 28.5{\%}, and xCodeEval 45.3{\%}). Moreover, our method consistently delivers superior performance across various programming languages and varying problem difficulties. We open-source our framework at https://github.com/Md-Ashraful-Pramanik/MapCoder.",
}

S. Boughorbel, M. R. Parvez, and M. Hawasly, “Improving language models trained on translated data with continual pre-training and dictionary learning analysis,” in Proceedings of the second arabic natural language processing conference, Bangkok, Thailand, 2024, p. 73–88. doi:10.18653/v1/2024.arabicnlp-1.7
[BibTeX] [Abstract] [Download PDF]

Training LLMs in low resources languages usually utilizes machine translation (MT) data augmentation from English language. However, translation brings a number of challenges: there are large costs attached to translating and curating huge amounts of content with high-end machine translation solutions; the translated content carries over cultural biases; and if the translation is not faithful and accurate, the quality of the data degrades causing issues in the trained model. In this work, we investigate the role of translation and synthetic data in training language models. We translate TinyStories, a dataset of 2.2M short stories for 3-4 year old children, from English to Arabic using the open NLLB-3B MT model. We train a number of story generation models of size 1M-33M parameters using this data. We identify a number of quality and task-specific issues in the resulting models. To rectify these issues, we further pre-train the models with a small dataset of synthesized high-quality stories generated by a capable LLM in Arabic, representing 1{\%} of the original training data. We show, using GPT-4 as a judge and dictionary learning analysis from mechanistic interpretability, that the suggested approach is a practical means to resolve some of the translation pitfalls. We illustrate the improvement through case studies of linguistic and cultural bias issues.

@inproceedings{boughorbel-etal-2024-improving,
title = "Improving Language Models Trained on Translated Data with Continual Pre-Training and Dictionary Learning Analysis",
author = "Boughorbel, Sabri and
Parvez, Md Rizwan and
Hawasly, Majd",
editor = "Habash, Nizar and
Bouamor, Houda and
Eskander, Ramy and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Abdelali, Ahmed and
Touileb, Samia and
Hamed, Injy and
Onaizan, Yaser and
Alhafni, Bashar and
Antoun, Wissam and
Khalifa, Salam and
Haddad, Hatem and
Zitouni, Imed and
AlKhamissi, Badr and
Almatham, Rawan and
Mrini, Khalil",
booktitle = "Proceedings of The Second Arabic Natural Language Processing Conference",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.arabicnlp-1.7",
doi = "10.18653/v1/2024.arabicnlp-1.7",
pages = "73--88",
abstract = "Training LLMs in low resources languages usually utilizes machine translation (MT) data augmentation from English language. However, translation brings a number of challenges: there are large costs attached to translating and curating huge amounts of content with high-end machine translation solutions; the translated content carries over cultural biases; and if the translation is not faithful and accurate, the quality of the data degrades causing issues in the trained model. In this work, we investigate the role of translation and synthetic data in training language models. We translate TinyStories, a dataset of 2.2M short stories for 3-4 year old children, from English to Arabic using the open NLLB-3B MT model. We train a number of story generation models of size 1M-33M parameters using this data. We identify a number of quality and task-specific issues in the resulting models. To rectify these issues, we further pre-train the models with a small dataset of synthesized high-quality stories generated by a capable LLM in Arabic, representing 1{\%} of the original training data. We show, using GPT-4 as a judge and dictionary learning analysis from mechanistic interpretability, that the suggested approach is a practical means to resolve some of the translation pitfalls. We illustrate the improvement through case studies of linguistic and cultural bias issues.",
}

M. Hasanain, M. A. Hasan, F. Ahmad, R. Suwaileh, M. R. Biswas, W. Zaghouani, and F. Alam, “ArAIEval shared task: propagandistic techniques detection in unimodal and multimodal Arabic content,” in Proceedings of the second arabic natural language processing conference, Bangkok, Thailand, 2024, p. 456–466.
[BibTeX] [Abstract] [Download PDF]

We present an overview of the second edition of the ArAIEval shared task, organized as part of the ArabicNLP 2024 conference co-located with ACL 2024. In this edition, ArAIEval offers two tasks: (i) detection of propagandistic textual spans with persuasion techniques identification in tweets and news articles, and (ii) distinguishing between propagandistic and non-propagandistic memes. A total of 14 teams participated in the final evaluation phase, with 6 and 9 teams participating in Tasks 1 and 2, respectively. Finally, 11 teams submitted system description papers. Across both tasks, we observed that fine-tuning transformer models such as AraBERT was at the core of the majority of the participating systems. We provide a description of the task setup, including a description of the dataset construction and the evaluation setup. We further provide a brief overview of the participating systems. All datasets and evaluation scripts are released to the research community. We hope this will enable further research on these important tasks in Arabic.

@inproceedings{hasanain-etal-2024-araieval,
title = "{A}r{AIE}val Shared Task: Propagandistic Techniques Detection in Unimodal and Multimodal {A}rabic Content",
author = "Hasanain, Maram and
Hasan, Md. Arid and
Ahmad, Fatema and
Suwaileh, Reem and
Biswas, Md. Rafiul and
Zaghouani, Wajdi and
Alam, Firoj",
editor = "Habash, Nizar and
Bouamor, Houda and
Eskander, Ramy and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Abdelali, Ahmed and
Touileb, Samia and
Hamed, Injy and
Onaizan, Yaser and
Alhafni, Bashar and
Antoun, Wissam and
Khalifa, Salam and
Haddad, Hatem and
Zitouni, Imed and
AlKhamissi, Badr and
Almatham, Rawan and
Mrini, Khalil",
booktitle = "Proceedings of The Second Arabic Natural Language Processing Conference",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.arabicnlp-1.44",
pages = "456--466",
abstract = "We present an overview of the second edition of the ArAIEval shared task, organized as part of the ArabicNLP 2024 conference co-located with ACL 2024. In this edition, ArAIEval offers two tasks: (i) detection of propagandistic textual spans with persuasion techniques identification in tweets and news articles, and (ii) distinguishing between propagandistic and non-propagandistic memes. A total of 14 teams participated in the final evaluation phase, with 6 and 9 teams participating in Tasks 1 and 2, respectively. Finally, 11 teams submitted system description papers. Across both tasks, we observed that fine-tuning transformer models such as AraBERT was at the core of the majority of the participating systems. We provide a description of the task setup, including a description of the dataset construction and the evaluation setup. We further provide a brief overview of the participating systems. All datasets and evaluation scripts are released to the research community. We hope this will enable further research on these important tasks in Arabic.",
}

M. M. Abootorabi, N. Ghazizadeh, S. A. Dalili, A. Ghahramani Kure, M. Dehghani, and E. Asgari, “AIMA at SemEval-2024 task 10: history-based emotion recognition in Hindi-English code-mixed conversations,” in Proceedings of the 18th international workshop on semantic evaluation (semeval-2024), Mexico City, Mexico, 2024, p. 1704–1710. doi:10.18653/v1/2024.semeval-1.244
[BibTeX] [Download PDF]

@inproceedings{abootorabi-etal-2024-aima,
title = "{AIMA} at {S}em{E}val-2024 Task 10: History-Based Emotion Recognition in {H}indi-{E}nglish Code-Mixed Conversations",
author = "Abootorabi, Mohammad Mahdi and
Ghazizadeh, Nona and
Dalili, Seyed Arshan and
Ghahramani Kure, Alireza and
Dehghani, Mahshid and
Asgari, Ehsaneddin",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.244",
doi = "10.18653/v1/2024.semeval-1.244",
pages = "1704--1710",
}

D. Dimitrov, F. Alam, M. Hasanain, A. Hasnat, F. Silvestri, P. Nakov, and G. Da San Martino, “SemEval-2024 task 4: multilingual detection of persuasion techniques in memes,” in Proceedings of the 18th international workshop on semantic evaluation (semeval-2024), Mexico City, Mexico, 2024, p. 2009–2026. doi:https://doi.org/10.18653/v1/2024.semeval-1.275
[BibTeX] [Download PDF]

@inproceedings{dimitrov-etal-2024-semeval,
address = {Mexico City, Mexico},
author = {Dimitrov, Dimitar and Alam, Firoj and Hasanain, Maram and Hasnat, Abul and Silvestri, Fabrizio and Nakov, Preslav and Da San Martino, Giovanni},
booktitle = {Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)},
date-modified = {2024-08-03 11:44:50 +0300},
editor = {Ojha, Atul Kr. and Do{\u{g}}ru{\"o}z, A. Seza and Tayyar Madabushi, Harish and Da San Martino, Giovanni and Rosenthal, Sara and Ros{\'a}, Aiala},
month = jun,
pages = {2009--2026},
publisher = {Association for Computational Linguistics},
title = {{S}em{E}val-2024 Task 4: Multilingual Detection of Persuasion Techniques in Memes},
year = {2024},
url = {https://aclanthology.org/2024.semeval-1.275},
doi = {https://doi.org/10.18653/v1/2024.semeval-1.275},
}

A. Ghahramani Kure, M. Dehghani, M. M. Abootorabi, N. Ghazizadeh, S. A. Dalili, and E. Asgari, “AIMA at SemEval-2024 task 3: simple yet powerful emotion cause pair analysis,” in Proceedings of the 18th international workshop on semantic evaluation (semeval-2024), Mexico City, Mexico, 2024, p. 1698–1703. doi:10.18653/v1/2024.semeval-1.243
[BibTeX] [Abstract] [Download PDF]

The SemEval-2024 Task 3 presents two subtasks focusing on emotion-cause pair extraction within conversational contexts. Subtask 1 revolves around the extraction of textual emotion-cause pairs, where causes are defined and annotated as textual spans within the conversation. Conversely, Subtask 2 extends the analysis to encompass multimodal cues, including language, audio, and vision, acknowledging instances where causes may not be exclusively represented in the textual data. Our proposed model for emotion-cause analysis is meticulously structured into three core segments: (i) embedding extraction, (ii) cause-pair extraction {&} emotion classification, and (iii) cause extraction using QA after finding pairs. Leveraging state-of-the-art techniques and fine-tuning on task-specific datasets, our model effectively unravels the intricate web of conversational dynamics and extracts subtle cues signifying causality in emotional expressions. Our team, AIMA, demonstrated strong performance in the SemEval-2024 Task 3 competition. We ranked as the 10th in subtask 1 and the 6th in subtask 2 out of 23 teams.

@inproceedings{ghahramani-kure-etal-2024-aima,
title = "{AIMA} at {S}em{E}val-2024 Task 3: Simple Yet Powerful Emotion Cause Pair Analysis",
author = "Ghahramani Kure, Alireza and
Dehghani, Mahshid and
Abootorabi, Mohammad Mahdi and
Ghazizadeh, Nona and
Dalili, Seyed Arshan and
Asgari, Ehsaneddin",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.243",
doi = "10.18653/v1/2024.semeval-1.243",
pages = "1698--1703",
abstract = "The SemEval-2024 Task 3 presents two subtasks focusing on emotion-cause pair extraction within conversational contexts. Subtask 1 revolves around the extraction of textual emotion-cause pairs, where causes are defined and annotated as textual spans within the conversation. Conversely, Subtask 2 extends the analysis to encompass multimodal cues, including language, audio, and vision, acknowledging instances where causes may not be exclusively represented in the textual data. Our proposed model for emotion-cause analysis is meticulously structured into three core segments: (i) embedding extraction, (ii) cause-pair extraction {\&} emotion classification, and (iii) cause extraction using QA after finding pairs. Leveraging state-of-the-art techniques and fine-tuning on task-specific datasets, our model effectively unravels the intricate web of conversational dynamics and extracts subtle cues signifying causality in emotional expressions. Our team, AIMA, demonstrated strong performance in the SemEval-2024 Task 3 competition. We ranked as the 10th in subtask 1 and the 6th in subtask 2 out of 23 teams.",
}

O. Ghahroodi and E. Asgari, “HierarchyEverywhere at SemEval-2024 task 4: detection of persuasion techniques in memes using hierarchical text classifier,” in Proceedings of the 18th international workshop on semantic evaluation (semeval-2024), Mexico City, Mexico, 2024, p. 1727–1732. doi:10.18653/v1/2024.semeval-1.247
[BibTeX] [Download PDF]

@inproceedings{ghahroodi-asgari-2024-hierarchyeverywhere,
title = "{H}ierarchy{E}verywhere at {S}em{E}val-2024 Task 4: Detection of Persuasion Techniques in Memes Using Hierarchical Text Classifier",
author = "Ghahroodi, Omid and
Asgari, Ehsaneddin",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.247",
doi = "10.18653/v1/2024.semeval-1.247",
pages = "1727--1732",
}

H. Mubarak, H. Al–Khalifa, and K. S. Alkhalefah, “Halwasa: quantify and analyze hallucinations in large language models: Arabic as a case study,” in Proceedings of the 2024 joint international conference on computational linguistics, language resources and evaluation (lrec-coling 2024), Torino, Italia, 2024, p. 8008–8015.
[BibTeX] [Abstract] [Download PDF]

Large Language Models (LLMs) have shown superb abilities to generate texts that are indistinguishable from human-generated texts in many cases. However, sometimes they generate false, incorrect, or misleading content, which is often described as {\textquotedblleft}hallucinations{\textquotedblright}. Quantifying and analyzing hallucination in LLMs can increase their reliability and usage. While hallucination is being actively studied for English and other languages, and different benchmarking datsets have been created, this area is not studied at all for Arabic. In our paper, we create the first Arabic dataset that contains 10K of generated sentences by LLMs and annotate it for factuality and correctness. We provide detailed analysis of the dataset to analyze factual and linguistic errors. We found that 25{\%} of the generated sentences are factually incorrect. We share the dataset with the research community.

@inproceedings{mubarak-etal-2024-halwasa,
title = "Halwasa: Quantify and Analyze Hallucinations in Large Language Models: {A}rabic as a Case Study",
author = "Mubarak, Hamdy and
Al-Khalifa, Hend and
Alkhalefah, Khaloud Suliman",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.705/",
pages = "8008--8015",
abstract = "Large Language Models (LLMs) have shown superb abilities to generate texts that are indistinguishable from human-generated texts in many cases. However, sometimes they generate false, incorrect, or misleading content, which is often described as {\textquotedblleft}hallucinations{\textquotedblright}. Quantifying and analyzing hallucination in LLMs can increase their reliability and usage. While hallucination is being actively studied for English and other languages, and different benchmarking datsets have been created, this area is not studied at all for Arabic. In our paper, we create the first Arabic dataset that contains 10K of generated sentences by LLMs and annotate it for factuality and correctness. We provide detailed analysis of the dataset to analyze factual and linguistic errors. We found that 25{\%} of the generated sentences are factually incorrect. We share the dataset with the research community."
}

N. Mirzakhmedova, J. Kiesel, M. Alshomary, M. Heinrich, N. Handke, X. Cai, V. Barriere, D. Dastgheib, O. Ghahroodi, M. SadraeiJavaheri, E. Asgari, L. Kawaletz, H. Wachsmuth, and B. Stein, “The touché23-ValueEval dataset for identifying human values behind arguments,” in Proceedings of the 2024 joint international conference on computational linguistics, language resources and evaluation (lrec-coling 2024), Torino, Italia, 2024, p. 16121–16134.
[BibTeX] [Download PDF]

@inproceedings{mirzakhmedova-etal-2024-touche23,
title = "The Touch{\'e}23-{V}alue{E}val Dataset for Identifying Human Values behind Arguments",
author = "Mirzakhmedova, Nailia and
Kiesel, Johannes and
Alshomary, Milad and
Heinrich, Maximilian and
Handke, Nicolas and
Cai, Xiaoni and
Barriere, Valentin and
Dastgheib, Doratossadat and
Ghahroodi, Omid and
SadraeiJavaheri, MohammadAli and
Asgari, Ehsaneddin and
Kawaletz, Lea and
Wachsmuth, Henning and
Stein, Benno",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1402",
pages = "16121--16134",
}

M. SadraeiJavaheri, E. Asgari, and H. R. Rabiee, “Transformers for bridging Persian dialects: transliteration model for tajiki and Iranian scripts,” in Proceedings of the 2024 joint international conference on computational linguistics, language resources and evaluation (lrec-coling 2024), Torino, Italia, 2024, p. 16770–16775.
[BibTeX] [Download PDF]

@inproceedings{sadraeijavaheri-etal-2024-transformers,
title = "Transformers for Bridging {P}ersian Dialects: Transliteration Model for Tajiki and {I}ranian Scripts",
author = "SadraeiJavaheri, MohammadAli and
Asgari, Ehsaneddin and
Rabiee, Hamid Reza",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1459",
pages = "16770--16775",
}

M. A. Hasan, S. Das, A. Anjum, F. Alam, A. Anjum, A. Sarker, and S. R. H. Noori, “Zero- and few-shot prompting with LLMs: a comparative study with fine-tuned models for Bangla sentiment analysis,” in Proceedings of the 2024 joint international conference on computational linguistics, language resources and evaluation (lrec-coling 2024), Torino, Italia, 2024, p. 17808–17818.
[BibTeX] [Download PDF]

@inproceedings{hasan-etal-2024-zero,
address = {Torino, Italia},
author = {Hasan, Md. Arid and Das, Shudipta and Anjum, Afiyat and Alam, Firoj and Anjum, Anika and Sarker, Avijit and Noori, Sheak Rashed Haider},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
date-modified = {2024-08-03 11:44:50 +0300},
editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro and Sakti, Sakriani and Xue, Nianwen},
month = may,
pages = {17808--17818},
publisher = {ELRA and ICCL},
title = {Zero- and Few-Shot Prompting with {LLM}s: A Comparative Study with Fine-tuned Models for {B}angla Sentiment Analysis},
year = {2024},
url = {https://aclanthology.org/2024.lrec-main.1549},
}

M. Hasanain, F. Ahmad, and F. Alam, “Can GPT-4 identify propaganda? annotation and detection of propaganda spans in news articles,” in Proceedings of the 2024 joint international conference on computational linguistics, language resources and evaluation (lrec-coling 2024), Torino, Italia, 2024, p. 2724–2744.
[BibTeX] [Download PDF]

@inproceedings{hasanain-etal-2024-gpt,
address = {Torino, Italia},
author = {Hasanain, Maram and Ahmad, Fatema and Alam, Firoj},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
date-modified = {2024-08-03 11:44:50 +0300},
editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro and Sakti, Sakriani and Xue, Nianwen},
month = may,
pages = {2724--2744},
publisher = {ELRA and ICCL},
title = {Can {GPT}-4 Identify Propaganda? Annotation and Detection of Propaganda Spans in News Articles},
year = {2024},
url = {https://aclanthology.org/2024.lrec-main.244},
}

A. Abdelali, H. Mubarak, S. Chowdhury, M. Hasanain, B. Mousi, S. Boughorbel, S. Abdaljalil, Y. El Kheir, D. Izham, F. Dalvi, M. Hawasly, N. Nazar, Y. Elshahawy, A. Ali, N. Durrani, N. Milic–Frayling, and F. Alam, “LAraBench: benchmarking Arabic AI with large language models,” in Proceedings of the 18th conference of the european chapter of the association for computational linguistics (volume 1: long papers), St. Julian{‘}s, Malta, 2024, p. 487–520.
[BibTeX] [Abstract] [Download PDF]

Recent advancements in Large Language Models (LLMs) have significantly influenced the landscape of language and speech research. Despite this progress, these models lack specific benchmarking against state-of-the-art (SOTA) models tailored to particular languages and tasks. LAraBench addresses this gap for Arabic Natural Language Processing (NLP) and Speech Processing tasks, including sequence tagging and content classification across different domains. We utilized models such as GPT-3.5-turbo, GPT-4, BLOOMZ, Jais-13b-chat, Whisper, and USM, employing zero and few-shot learning techniques to tackle 33 distinct tasks across 61 publicly available datasets. This involved 98 experimental setups, encompassing {\textasciitilde}296K data points, {\textasciitilde}46 hours of speech, and 30 sentences for Text-to-Speech (TTS). This effort resulted in 330+ sets of experiments. Our analysis focused on measuring the performance gap between SOTA models and LLMs. The overarching trend observed was that SOTA models generally outperformed LLMs in zero-shot learning, with a few exceptions. Notably, larger computational models with few-shot learning techniques managed to reduce these performance gaps. Our findings provide valuable insights into the applicability of LLMs for Arabic NLP and speech processing tasks.

@inproceedings{abdelali-etal-2024-larabench,
address = {St. Julian{'}s, Malta},
author = {Abdelali, Ahmed and Mubarak, Hamdy and Chowdhury, Shammur and Hasanain, Maram and Mousi, Basel and Boughorbel, Sabri and Abdaljalil, Samir and El Kheir, Yassine and Izham, Daniel and Dalvi, Fahim and Hawasly, Majd and Nazar, Nizi and Elshahawy, Youssef and Ali, Ahmed and Durrani, Nadir and Milic-Frayling, Natasa and Alam, Firoj},
booktitle = {Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)},
date-modified = {2024-08-03 11:44:50 +0300},
editor = {Graham, Yvette and Purver, Matthew},
month = mar,
pages = {487--520},
publisher = {Association for Computational Linguistics},
title = {{LA}ra{B}ench: Benchmarking {A}rabic {AI} with Large Language Models},
year = {2024},
abstract = "Recent advancements in Large Language Models (LLMs) have significantly influenced the landscape of language and speech research. Despite this progress, these models lack specific benchmarking against state-of-the-art (SOTA) models tailored to particular languages and tasks. LAraBench addresses this gap for Arabic Natural Language Processing (NLP) and Speech Processing tasks, including sequence tagging and content classification across different domains. We utilized models such as GPT-3.5-turbo, GPT-4, BLOOMZ, Jais-13b-chat, Whisper, and USM, employing zero and few-shot learning techniques to tackle 33 distinct tasks across 61 publicly available datasets. This involved 98 experimental setups, encompassing {\textasciitilde}296K data points, {\textasciitilde}46 hours of speech, and 30 sentences for Text-to-Speech (TTS). This effort resulted in 330+ sets of experiments. Our analysis focused on measuring the performance gap between SOTA models and LLMs. The overarching trend observed was that SOTA models generally outperformed LLMs in zero-shot learning, with a few exceptions. Notably, larger computational models with few-shot learning techniques managed to reduce these performance gaps. Our findings provide valuable insights into the applicability of LLMs for Arabic NLP and speech processing tasks.",
url = {https://aclanthology.org/2024.eacl-long.30},
}

F. Dalvi, M. Hasanain, S. Boughorbel, B. Mousi, S. Abdaljalil, N. Nazar, A. Abdelali, S. A. Chowdhury, H. Mubarak, A. Ali, M. Hawasly, N. Durrani, and F. Alam, “LLMeBench: a flexible framework for accelerating LLMs benchmarking,” in Proceedings of the 18th conference of the european chapter of the association for computational linguistics: system demonstrations, St. Julians, Malta, 2024, p. 214–222.
[BibTeX] [Abstract] [Download PDF]

The recent development and success of Large Language Models (LLMs) necessitate an evaluation of their performance across diverse NLP tasks in different languages. Although several frameworks have been developed and made publicly available, their customization capabilities for specific tasks and datasets are often complex for different users. In this study, we introduce the LLMeBench framework, which can be seamlessly customized to evaluate LLMs for any NLP task, regardless of language. The framework features generic dataset loaders, several model providers, and pre-implements most standard evaluation metrics. It supports in-context learning with zero- and few-shot settings. A specific dataset and task can be evaluated for a given LLM in less than 20 lines of code while allowing full flexibility to extend the framework for custom datasets, models, or tasks. The framework has been tested on 31 unique NLP tasks using 53 publicly available datasets within 90 experimental setups, involving approximately 296K data points. We open-sourced LLMeBench for the community (https://github.com/qcri/LLMeBench/) and a video demonstrating the framework is available online (https://youtu.be/9cC2m{_}abk3A).

@inproceedings{dalvi-etal-2024-llmebench,
address = {St. Julians, Malta},
author = {Dalvi, Fahim and Hasanain, Maram and Boughorbel, Sabri and Mousi, Basel and Abdaljalil, Samir and Nazar, Nizi and Abdelali, Ahmed and Chowdhury, Shammur Absar and Mubarak, Hamdy and Ali, Ahmed and Hawasly, Majd and Durrani, Nadir and Alam, Firoj},
booktitle = {Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations},
date-modified = {2024-08-03 11:44:50 +0300},
editor = {Aletras, Nikolaos and De Clercq, Orphee},
month = mar,
pages = {214--222},
publisher = {Association for Computational Linguistics},
title = {{LLM}e{B}ench: A Flexible Framework for Accelerating {LLM}s Benchmarking},
year = {2024},
abstract = "The recent development and success of Large Language Models (LLMs) necessitate an evaluation of their performance across diverse NLP tasks in different languages. Although several frameworks have been developed and made publicly available, their customization capabilities for specific tasks and datasets are often complex for different users. In this study, we introduce the LLMeBench framework, which can be seamlessly customized to evaluate LLMs for any NLP task, regardless of language. The framework features generic dataset loaders, several model providers, and pre-implements most standard evaluation metrics. It supports in-context learning with zero- and few-shot settings. A specific dataset and task can be evaluated for a given LLM in less than 20 lines of code while allowing full flexibility to extend the framework for custom datasets, models, or tasks. The framework has been tested on 31 unique NLP tasks using 53 publicly available datasets within 90 experimental setups, involving approximately 296K data points. We open-sourced LLMeBench for the community (https://github.com/qcri/LLMeBench/) and a video demonstrating the framework is available online (https://youtu.be/9cC2m{\_}abk3A).",
url = {https://aclanthology.org/2024.eacl-demo.23},
}

F. Alam, S. A. Chowdhury, S. Boughorbel, and M. Hasanain, “LLMs for low resource languages in multilingual, multimodal and dialectal settings,” in Proceedings of the 18th conference of the european chapter of the association for computational linguistics: tutorial abstracts, St. Julian{‘}s, Malta, 2024, p. 27–33.
[BibTeX] [Download PDF]

@inproceedings{alam-etal-2024-llms,
address = {St. Julian{'}s, Malta},
author = {Alam, Firoj and Chowdhury, Shammur Absar and Boughorbel, Sabri and Hasanain, Maram},
booktitle = {Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Tutorial Abstracts},
date-modified = {2024-08-03 11:44:50 +0300},
editor = {Mesgar, Mohsen and Lo{\'a}iciga, Sharid},
month = mar,
pages = {27--33},
publisher = {Association for Computational Linguistics},
title = {{LLM}s for Low Resource Languages in Multilingual, Multimodal and Dialectal Settings},
year = {2024},
url = {https://aclanthology.org/2024.eacl-tutorials.5},
}

A. Barrón–Cedeño, F. Alam, T. Chakraborty, T. Elsayed, P. Nakov, P. Przyby{l}a, J. M. Struß, F. Haouari, M. Hasanain, F. Ruggeri, X. Song, and R. Suwaileh, “The CLEF-2024 CheckThat! Lab: check-worthiness, subjectivity, persuasion, roles, authorities, and adversarial robustness,” in Advances in information retrieval, 2024, p. 449–458.
[BibTeX] [Abstract]

The first five editions of the CheckThat! lab focused on the main tasks of the information verification pipeline: check-worthiness, evidence retrieval and pairing, and verification. Since the 2023 edition, it has been focusing on new problems that can support the research and decision making during the verification process. In this new edition, we focus on new problems and –-for the first time–- we propose six tasks in fifteen languages (Arabic, Bulgarian, English, Dutch, French, Georgian, German, Greek, Italian, Polish, Portuguese, Russian, Slovene, Spanish, and code-mixed Hindi-English): Task 1 estimation of check-worthiness (the only task that has been present in all CheckThat! editions), Task 2 identification of subjectivity (a follow up of CheckThat! 2023 edition), Task 3 identification of persuasion (a follow up of SemEval 2023), Task 4 detection of hero, villain, and victim from memes (a follow up of CONSTRAINT 2022), Task 5 Rumor Verification using Evidence from Authorities (a first), and Task 6 robustness of credibility assessment with adversarial examples (a first). These tasks represent challenging classification and retrieval problems at the document and at the span level, including multilingual and multimodal settings.

@InProceedings{CheckThat:ECIR2024,
author="Barr{\'o}n-Cede{\~{n}}o, Alberto
and Alam, Firoj
and Chakraborty, Tanmoy
and Elsayed, Tamer
and Nakov, Preslav
and Przyby{\l}a, Piotr
and Stru{\ss}, Julia Maria
and Haouari, Fatima
and Hasanain, Maram
and Ruggeri, Federico
and Song, Xingyi
and Suwaileh, Reem",
editor="Goharian, Nazli
and Tonellotto, Nicola
and He, Yulan
and Lipani, Aldo
and McDonald, Graham
and Macdonald, Craig
and Ounis, Iadh",
title="The {CLEF}-2024 {C}heck{T}hat! {L}ab: Check-Worthiness, Subjectivity, Persuasion, Roles, Authorities, and Adversarial Robustness",
booktitle="Advances in Information Retrieval",
year="2024",
publisher="Springer Nature Switzerland",
NOaddress="Cham",
pages="449--458",
abstract="The first five editions of the CheckThat! lab focused on the main tasks of the information verification pipeline: check-worthiness, evidence retrieval and pairing, and verification. Since the 2023 edition, it has been focusing on new problems that can support the research and decision making during the verification process. In this new edition, we focus on new problems and ---for the first time--- we propose six tasks in fifteen languages (Arabic, Bulgarian, English, Dutch, French, Georgian, German, Greek, Italian, Polish, Portuguese, Russian, Slovene, Spanish, and code-mixed Hindi-English): Task 1 estimation of check-worthiness (the only task that has been present in all CheckThat! editions), Task 2 identification of subjectivity (a follow up of CheckThat! 2023 edition), Task 3 identification of persuasion (a follow up of SemEval 2023), Task 4 detection of hero, villain, and victim from memes (a follow up of CONSTRAINT 2022), Task 5 Rumor Verification using Evidence from Authorities (a first), and Task 6 robustness of credibility assessment with adversarial examples (a first). These tasks represent challenging classification and retrieval problems at the document and at the span level, including multilingual and multimodal settings.",
isbn="978-3-031-56069-9",
}

A. Barrón–Cedeño, F. Alam, J. M. Struß, P. Nakov, T. Chakraborty, T. Elsayed, P. Przybyła, T. Caselli, G. Da San Martino, F. Haouari, C. Li, J. Piskorski, F. Ruggeri, X. Song, and R. Suwaileh, “Overview of the CLEF-2024 CheckThat! Lab: check-worthiness, subjectivity, persuasion, roles, authorities and adversarial robustness,” in Experimental ir meets multilinguality, multimodality, and interaction. proceedings of the fifteenth international conference of the clef association (clef 2024), 2024.
[BibTeX]

@InProceedings{clef-checkthat:2024-lncs,
author="Barr{\'o}n-Cede{\~{n}}o, Alberto
and Alam, Firoj
and Stru{\ss}, Julia Maria
and Nakov, Preslav
and Chakraborty, Tanmoy
and Elsayed, Tamer
and Przybyła, Piotr
and Caselli, Tommaso
and Da San Martino, Giovanni
and Haouari, Fatima
and Li, Chengkai
and Piskorski, Jakub
and Ruggeri, Federico
and Song, Xingyi
and Suwaileh, Reem",
title="Overview of the {CLEF}-2024 {CheckThat! Lab}: Check-Worthiness, Subjectivity, Persuasion, Roles,
Authorities and Adversarial Robustness",
editor="Goeuriot, Lorraine
and Mulhem, Philippe
and Quénot, Georges
and Schwab, Didier
and Soulier, Laure
and Di Nunzio, Giorgio Maria
and Galuščáková, Petra
and García Seco de Herrera, Alba
and Faggioli, Guglielmo
and Ferro, Nicola",
booktitle="Experimental IR Meets Multilinguality, Multimodality, and Interaction.
Proceedings of the Fifteenth International Conference of the CLEF Association (CLEF 2024)",
year="2024",
}

S. Abdaljalil and H. Mubarak, “Wikidata as a source of demographic information,” in Proceedings of the second arabic natural language processing conference, Bangkok, Thailand, 2024, p. 1–10. doi:10.18653/v1/2024.arabicnlp-1.1
[BibTeX] [Abstract] [Download PDF]

Names carry important information about our identities and demographics such as gender, nationality, ethnicity, etc. We investigate the use of individual{‘}s name, in both Arabic and English, to predict important attributes, namely country, region, gender, and language. We extract data from Wikidata, and normalize it, to build a comprehensive dataset consisting of more than 1 million entities and their normalized attributes. We experiment with a Linear SVM approach, as well as two Transformers approaches consisting of BERT model fine-tuning and Transformers pipeline. Our results indicate that we can predict the gender, language and region using the name only with a confidence over 0.65. The country attribute can be predicted with less accuracy. The Linear SVM approach outperforms the other approaches for all the attributes. The best performing approach was also evaluated on another dataset that consists of 1,500 names from 15 countries (covering different regions) extracted from Twitter, and yields similar results.

@inproceedings{abdaljalil-mubarak-2024-wikidata,
title = "{W}ikidata as a Source of Demographic Information",
author = "Abdaljalil, Samir and
Mubarak, Hamdy",
editor = "Habash, Nizar and
Bouamor, Houda and
Eskander, Ramy and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Abdelali, Ahmed and
Touileb, Samia and
Hamed, Injy and
Onaizan, Yaser and
Alhafni, Bashar and
Antoun, Wissam and
Khalifa, Salam and
Haddad, Hatem and
Zitouni, Imed and
AlKhamissi, Badr and
Almatham, Rawan and
Mrini, Khalil",
booktitle = "Proceedings of the Second Arabic Natural Language Processing Conference",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.arabicnlp-1.1/",
doi = "10.18653/v1/2024.arabicnlp-1.1",
pages = "1--10",
abstract = "Names carry important information about our identities and demographics such as gender, nationality, ethnicity, etc. We investigate the use of individual{'}s name, in both Arabic and English, to predict important attributes, namely country, region, gender, and language. We extract data from Wikidata, and normalize it, to build a comprehensive dataset consisting of more than 1 million entities and their normalized attributes. We experiment with a Linear SVM approach, as well as two Transformers approaches consisting of BERT model fine-tuning and Transformers pipeline. Our results indicate that we can predict the gender, language and region using the name only with a confidence over 0.65. The country attribute can be predicted with less accuracy. The Linear SVM approach outperforms the other approaches for all the attributes. The best performing approach was also evaluated on another dataset that consists of 1,500 names from 15 countries (covering different regions) extracted from Twitter, and yields similar results."
}

V. N. Sukhadia and S. A. Chowdhury, “Children’s speech recognition through discrete token enhancement,” in Proc. of the 25th Annual Conference of the International Speech Communication Association (INTERSPEECH), 2024.
[BibTeX]

@inproceedings{vrunda2024,
title={Children’s Speech Recognition through Discrete Token Enhancement},
author={Sukhadia, Vrunda N. and Chowdhury, Shammur Absar },
booktitle = {{Proc. of the 25th Annual Conference of the International Speech Communication Association (INTERSPEECH)}},
year={2024}
}

W. Zaghouani, H. Mubarak, and M. R. Biswas, “So hateful! building a multi-label hate speech annotated Arabic dataset,” in Proceedings of the 2024 joint international conference on computational linguistics, language resources and evaluation (lrec-coling 2024), Torino, Italia, 2024, p. 15044–15055.
[BibTeX] [Abstract] [Download PDF]

Social media enables widespread propagation of hate speech targeting groups based on ethnicity, religion, or other characteristics. With manual content moderation being infeasible given the volume, automatic hate speech detection is essential. This paper analyzes 70,000 Arabic tweets, from which 15,965 tweets were selected and annotated, to identify hate speech patterns and train classification models. Annotators labeled the Arabic tweets for offensive content, hate speech, emotion intensity and type, effect on readers, humor, factuality, and spam. Key findings reveal 15{\%} of tweets contain offensive language while 6{\%} have hate speech, mostly targeted towards groups with common ideological or political affiliations. Annotations capture diverse emotions, and sarcasm is more prevalent than humor. Additionally, 10{\%} of tweets provide verifiable factual claims, and 7{\%} are deemed important. For hate speech detection, deep learning models like AraBERT outperform classical machine learning approaches. By providing insights into hate speech characteristics, this work enables improved content moderation and reduced exposure to online hate. The annotated dataset advances Arabic natural language processing research and resources.

@inproceedings{zaghouani-etal-2024-hateful,
title = "So Hateful! Building a Multi-Label Hate Speech Annotated {A}rabic Dataset",
author = "Zaghouani, Wajdi and
Mubarak, Hamdy and
Biswas, Md. Rafiul",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1308/",
pages = "15044--15055",
abstract = "Social media enables widespread propagation of hate speech targeting groups based on ethnicity, religion, or other characteristics. With manual content moderation being infeasible given the volume, automatic hate speech detection is essential. This paper analyzes 70,000 Arabic tweets, from which 15,965 tweets were selected and annotated, to identify hate speech patterns and train classification models. Annotators labeled the Arabic tweets for offensive content, hate speech, emotion intensity and type, effect on readers, humor, factuality, and spam. Key findings reveal 15{\%} of tweets contain offensive language while 6{\%} have hate speech, mostly targeted towards groups with common ideological or political affiliations. Annotations capture diverse emotions, and sarcasm is more prevalent than humor. Additionally, 10{\%} of tweets provide verifiable factual claims, and 7{\%} are deemed important. For hate speech detection, deep learning models like AraBERT outperform classical machine learning approaches. By providing insights into hate speech characteristics, this work enables improved content moderation and reduced exposure to online hate. The annotated dataset advances Arabic natural language processing research and resources."
}

Y. E. Kheir, H. Mubarak, A. Ali, and S. A. Chowdhury, “Beyond orthography: automatic recovery of short vowels and dialectal sounds in arabic,” in Proc. of the 62nd annual meeting of the association for computational linguistics (acl), 2024.
[BibTeX]

@inproceedings{ElKheir2024Beyond,
title={Beyond Orthography: Automatic Recovery of Short Vowels and Dialectal Sounds in Arabic},
author={Yassine El Kheir and Hamdy Mubarak and Ahmed Ali and Shammur Absar Chowdhury},
booktitle={Proc. of the 62nd Annual Meeting of the Association for Computational Linguistics (ACL)},
year={2024}
}

The first five editions of the CheckThat! lab focused on the main tasks of the information verification pipeline: check-worthiness, evidence retrieval and pairing, and verification. Since the 2023 edition, it has been focusing on new problems that can support the research and decision making during the verification process. In this new edition, we focus on new problems and –-for the first time–- we propose six tasks in fifteen languages (Arabic, Bulgarian, English, Dutch, French, Georgian, German, Greek, Italian, Polish, Portuguese, Russian, Slovene, Spanish, and code-mixed Hindi-English): Task 1 estimation of check-worthiness (the only task that has been present in all CheckThat! editions), Task 2 identification of subjectivity (a follow up of CheckThat! 2023 edition), Task 3 identification of persuasion (a follow up of SemEval 2023), Task 4 detection of hero, villain, and victim from memes (a follow up of CONSTRAINT 2022), Task 5 Rumor Verification using Evidence from Authorities (a first), and Task 6 robustness of credibility assessment with adversarial examples (a first). These tasks represent challenging classification and retrieval problems at the document and at the span level, including multilingual and multimodal settings.

@InProceedings{CheckThat:ECIR2024,
author="Barr{\'o}n-Cede{\~{n}}o, Alberto
and Alam, Firoj
and Chakraborty, Tanmoy
and Elsayed, Tamer
and Nakov, Preslav
and Przyby{\l}a, Piotr
and Stru{\ss}, Julia Maria
and Haouari, Fatima
and Hasanain, Maram
and Ruggeri, Federico
and Song, Xingyi
and Suwaileh, Reem",
editor="Goharian, Nazli
and Tonellotto, Nicola
and He, Yulan
and Lipani, Aldo
and McDonald, Graham
and Macdonald, Craig
and Ounis, Iadh",
title="The {CLEF}-2024 {C}heck{T}hat! {L}ab: Check-Worthiness, Subjectivity, Persuasion, Roles, Authorities, and Adversarial Robustness",
booktitle="Advances in Information Retrieval",
year="2024",
publisher="Springer Nature Switzerland",
NOaddress="Cham",
pages="449--458",
abstract="The first five editions of the CheckThat! lab focused on the main tasks of the information verification pipeline: check-worthiness, evidence retrieval and pairing, and verification. Since the 2023 edition, it has been focusing on new problems that can support the research and decision making during the verification process. In this new edition, we focus on new problems and ---for the first time--- we propose six tasks in fifteen languages (Arabic, Bulgarian, English, Dutch, French, Georgian, German, Greek, Italian, Polish, Portuguese, Russian, Slovene, Spanish, and code-mixed Hindi-English): Task 1 estimation of check-worthiness (the only task that has been present in all CheckThat! editions), Task 2 identification of subjectivity (a follow up of CheckThat! 2023 edition), Task 3 identification of persuasion (a follow up of SemEval 2023), Task 4 detection of hero, villain, and victim from memes (a follow up of CONSTRAINT 2022), Task 5 Rumor Verification using Evidence from Authorities (a first), and Task 6 robustness of credibility assessment with adversarial examples (a first). These tasks represent challenging classification and retrieval problems at the document and at the span level, including multilingual and multimodal settings.",
isbn="978-3-031-56069-9",
}

F. Alam, M. R. Biswas, U. Shah, W. Zaghouani, and G. Mikros, “Propaganda to hate: a multimodal analysis of arabic memes with multi-agent llms,” in Proceedings of the 25th international web information systems engineering conference (wise), Doha, Qatar, 2024.
[BibTeX] [Download PDF]

@inproceedings{alam2024propagandahatemultimodalanalysis,
title={Propaganda to Hate: A Multimodal Analysis of Arabic Memes with Multi-Agent LLMs},
author={Firoj Alam and Md. Rafiul Biswas and Uzair Shah and Wajdi Zaghouani and Georgios Mikros},
booktitle = {Proceedings of The 25th International Web Information Systems Engineering Conference (WISE)},
year = {2024},
address = {Doha, Qatar},
url={https://arxiv.org/abs/2409.07246},
}

“Native vs non-native language prompting: a comparative analysis,” in Proceedings of the 25th international web information systems engineering conference (wise), Doha, Qatar, 2024.
[BibTeX] [Download PDF]

@inproceedings{kmainasi2024nativevsnonnativelanguage,
title={Native vs Non-Native Language Prompting: A Comparative Analysis},
booktitle = {Proceedings of The 25th International Web Information Systems Engineering Conference (WISE)},
year = {2024},
address = {Doha, Qatar},
url={https://arxiv.org/abs/2409.07054},
}

B. Mousi, N. Durrani, F. Ahmad, M. A. Hasan, M. Hasanain, T. Kabbani, F. Dalvi, S. A. Chowdhury, and F. Alam, “AraDiCE: benchmarks for dialectal and cultural capabilities in llms,” Arxiv preprint arxiv:2409.11404, 2024.
[BibTeX] [Download PDF]

@article{mousi2024aradicebenchmarksdialectalcultural,
title={{AraDiCE}: Benchmarks for Dialectal and Cultural Capabilities in LLMs},
author={Basel Mousi and Nadir Durrani and Fatema Ahmad and Md. Arid Hasan and Maram Hasanain and Tameem Kabbani and Fahim Dalvi and Shammur Absar Chowdhury and Firoj Alam},
year={2024},
journal={arXiv preprint arXiv:2409.11404},
archivePrefix={arXiv},
eprint={2409.11404},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2409.11404},
}

M. Hawasly, F. Dalvi, and N. Durrani, “Scaling up discovery of latent concepts in deep nlp models,” in Proceedings of the 18th conference of the european chapter of the association for computational linguistics (volume 1: long papers), 2024, p. 793–806.
[BibTeX]

@inproceedings{hawasly2024scaling,
title={Scaling up Discovery of Latent Concepts in Deep NLP Models},
author={Hawasly, Majd and Dalvi, Fahim and Durrani, Nadir},
booktitle={Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)},
pages={793--806},
year={2024}
}

S. B. Islam, M. A. Rahman, K. Hossain, E. Hoque, S. Joty, and M. R. Parvez, “OPEN-RAG: enhanced retrieval-augmented reasoning with open-source large language models,” Arxiv preprint arxiv:2410.01782, 2024.
[BibTeX]

@article{islam2024open,
title={{OPEN}-{RAG}: Enhanced Retrieval-Augmented Reasoning with Open-Source Large Language Models},
author={Islam, Shayekh Bin and Rahman, Md Asib and Hossain, KSM and Hoque, Enamul and Joty, Shafiq and Parvez, Md Rizwan},
journal={arXiv preprint arXiv:2410.01782},
year={2024}
}

M. T. R. Laskar, S. Alqahtani, S. M. Bari, M. Rahman, M. A. M. Khan, H. Khan, I. Jahan, A. Bhuiyan, C. W. Tan, M. R. Parvez, and others, “A systematic survey and critical review on evaluating large language models: challenges, limitations, and recommendations,” Arxiv preprint arxiv:2407.04069, 2024.
[BibTeX]

@article{laskar2024systematic,
title={A systematic survey and critical review on evaluating large language models: Challenges, limitations, and recommendations},
author={Laskar, Md Tahmid Rahman and Alqahtani, Sawsan and Bari, M Saiful and Rahman, Mizanur and Khan, Mohammad Abdullah Matin and Khan, Haidar and Jahan, Israt and Bhuiyan, Amran and Tan, Chee Wei and Parvez, Md Rizwan and others},
journal={arXiv preprint arXiv:2407.04069},
year={2024}
}

M. S. Islam, E. Hoque, S. Joty, M. T. R. Laskar, and M. R. Parvez, “DataNarrative: automated data-driven storytelling with visualizations and texts,” Arxiv preprint arxiv:2408.05346, 2024.
[BibTeX]

@article{islam2024datanarrative,
title={{D}ata{N}arrative: Automated Data-Driven Storytelling with Visualizations and Texts},
author={Islam, Mohammed Saidul and Hoque, Enamul and Joty, Shafiq and Laskar, Md Tahmid Rahman and Parvez, Md Rizwan},
journal={arXiv preprint arXiv:2408.05346},
year={2024}
}

A. Hussein, D. Zeinali, O. Klejch, M. Wiesner, B. Yan, S. Chowdhury, A. Ali, S. Watanabe, and S. Khudanpur, “Speech collage: code-switched audio generation by collaging monolingual corpora,” in Proc. of the ieee international conference on acoustics, speech and signal processing (icassp), 2024, p. 12006–12010.
[BibTeX]

@inproceedings{hussein2024speech,
title={Speech collage: code-switched audio generation by collaging monolingual corpora},
author={Hussein, Amir and Zeinali, Dorsa and Klejch, Ond{\v{r}}ej and Wiesner, Matthew and Yan, Brian and Chowdhury, Shammur and Ali, Ahmed and Watanabe, Shinji and Khudanpur, Sanjeev},
booktitle={Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages={12006--12010},
year={2024},
organization={IEEE}
}

Y. El Kheir, A. Ali, and S. A. Chowdhury, “Speech representation analysis based on inter-and intra-model similarities,” in 2024 ieee international conference on acoustics, speech, and signal processing workshops (icasspw), 2024, p. 848–852.
[BibTeX]

@inproceedings{el2024speech,
title={Speech representation analysis based on inter-and intra-model similarities},
author={El Kheir, Yassine and Ali, Ahmed and Chowdhury, Shammur Absar},
booktitle={2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)},
pages={848--852},
year={2024},
organization={IEEE}
}

Y. El Kheir, S. A. Chowdhury, and A. Ali, “L1-aware multilingual mispronunciation detection framework,” in Proc. of the ieee international conference on acoustics, speech and signal processing (icassp), 2024, p. 12752–12756.
[BibTeX]

@inproceedings{el2024l1,
title={L1-aware multilingual mispronunciation detection framework},
author={El Kheir, Yassine and Chowdhury, Shammur Absar and Ali, Ahmed},
booktitle={Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages={12752--12756},
year={2024},
organization={IEEE}
}

S. A. Chowdhury, N. Durrani, and A. Ali, “What do end-to-end speech models learn about speaker, language and channel information? a layer-wise and neuron-level analysis,” Computer speech and language, vol. 83, p. 101539, 2024. doi:https://doi.org/10.1016/j.csl.2023.101539
[BibTeX] [Download PDF]

@article{chowdhury2024:csl,title = {What do end-to-end speech models learn about speaker, language and channel information? A layer-wise and neuron-level analysis},journal = {Computer Speech and Language},address = {London, UK, UK},volume = {83},pages = {101539},year = {2024},issn = {0885-2308},doi = {https://doi.org/10.1016/j.csl.2023.101539},url = {https://www.sciencedirect.com/science/article/pii/S088523082300058X},author = {Chowdhury, Shammur Absar and Durrani, Nadir and Ali, Ahmed},keywords = {Speech, Neuron-level analysis, Interpretability, Diagnostic classifier, AI explainability, End-to-end architecture},area = {Representation Analysis}}

2023

R. N. Nandi, M. Menon, T. Muntasir, S. Sarker, Q. S. Muhtaseem, M. T. Islam, S. Chowdhury, and F. Alam, “Pseudo-labeling for domain-agnostic Bangla automatic speech recognition,” in Proceedings of the first workshop on bangla language processing (blp-2023), Singapore, 2023, p. 152–162. doi:https://doi.org/10.18653/v1/2023.banglalp-1.16
[BibTeX] [Abstract] [Download PDF]

One of the major challenges for developing automatic speech recognition (ASR) for low-resource languages is the limited access to labeled data with domain-specific variations. In this study, we propose a pseudo-labeling approach to develop a large-scale domain-agnostic ASR dataset. With the proposed methodology, we developed a 20k+ hours labeled Bangla speech dataset covering diverse topics, speaking styles, dialects, noisy environments, and conversational scenarios. We then exploited the developed corpus to design a conformer-based ASR system. We benchmarked the trained ASR with publicly available datasets and compared it with other available models. To investigate the efficacy, we designed and developed a human-annotated domain-agnostic test set composed of news, telephony, and conversational data among others. Our results demonstrate the efficacy of the model trained on psuedo-label data for the designed test-set along with publicly-available Bangla datasets. The experimental resources will be publicly available.https://github.com/hishab-nlp/Pseudo-Labeling-for-Domain-Agnostic-Bangla-ASR

@inproceedings{nandi-etal-2023-pseudo,
abstract = {One of the major challenges for developing automatic speech recognition (ASR) for low-resource languages is the limited access to labeled data with domain-specific variations. In this study, we propose a pseudo-labeling approach to develop a large-scale domain-agnostic ASR dataset. With the proposed methodology, we developed a 20k+ hours labeled Bangla speech dataset covering diverse topics, speaking styles, dialects, noisy environments, and conversational scenarios. We then exploited the developed corpus to design a conformer-based ASR system. We benchmarked the trained ASR with publicly available datasets and compared it with other available models. To investigate the efficacy, we designed and developed a human-annotated domain-agnostic test set composed of news, telephony, and conversational data among others. Our results demonstrate the efficacy of the model trained on psuedo-label data for the designed test-set along with publicly-available Bangla datasets. The experimental resources will be publicly available.https://github.com/hishab-nlp/Pseudo-Labeling-for-Domain-Agnostic-Bangla-ASR},
address = {Singapore},
author = {Nandi, Rabindra Nath and Menon, Mehadi and Muntasir, Tareq and Sarker, Sagor and Muhtaseem, Quazi Sarwar and Islam, Md. Tariqul and Chowdhury, Shammur and Alam, Firoj},
booktitle = {Proceedings of the First Workshop on Bangla Language Processing (BLP-2023)},
date-modified = {2024-08-03 12:08:38 +0300},
editor = {Alam, Firoj and Kar, Sudipta and Chowdhury, Shammur Absar and Sadeque, Farig and Amin, Ruhul},
month = dec,
pages = {152--162},
publisher = {Association for Computational Linguistics},
title = {Pseudo-Labeling for Domain-Agnostic {B}angla Automatic Speech Recognition},
year = {2023},
url = {https://aclanthology.org/2023.banglalp-1.16},
doi = {https://doi.org/10.18653/v1/2023.banglalp-1.16},
}

B. Mousi, N. Durrani, and F. Dalvi, “Can LLMs facilitate interpretation of pre-trained language models?,” in Proceedings of the 2023 conference on empirical methods in natural language processing, Singapore, 2023, p. 3248–3268. doi:10.18653/v1/2023.emnlp-main.196
[BibTeX] [Download PDF]

@inproceedings{mousi-etal-2023-llms,title = {Can {LLM}s Facilitate Interpretation of Pre-trained Language Models?},author = {Mousi, Basel and Durrani, Nadir and Dalvi, Fahim},editor = {Bouamor, Houda and Pino, Juan and Bali, Kalika},booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},month = dec,year = {2023},address = {Singapore},publisher = {Association for Computational Linguistics},url = {https://aclanthology.org/2023.emnlp-main.196},doi = {10.18653/v1/2023.emnlp-main.196},pages = {3248--3268},area = {Large Language Models}}

F. Dalvi, N. Durrani, and H. Sajjad, “Neurox library for neuron analysis of deep nlp models,” in Proceedings of the 61st annual meeting of the association for computational linguistics: system demonstrations, Toronto, Canada, 2023, p. 75–83.
[BibTeX]

@inproceedings{dalvi-etal-2023-neurox,title = {NeuroX Library for Neuron Analysis of Deep NLP Models},author = {Dalvi, Fahim and Durrani, Nadir and Sajjad, Hassan},booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics: System Demonstrations},month = jul,year = {2023},address = {Toronto, Canada},publisher = {Association for Computational Linguistics},pages = {75--83},area = {Demos and Tools}}

M. Hasanain, A. El–Shangiti, R. N. Nandi, P. Nakov, and F. Alam, “QCRI at SemEval-2023 task 3: news genre, framing and persuasion techniques detection using multilingual models,” in Proceedings of the 17th international workshop on semantic evaluation (semeval-2023), Toronto, Canada, 2023, p. 1237–1244. doi:10.18653/v1/2023.semeval-1.172
[BibTeX] [Abstract] [Download PDF]

Misinformation spreading in mainstream and social media has been misleading users in different ways. Manual detection and verification efforts by journalists and fact-checkers can no longer cope with the great scale and quick spread of misleading information. This motivated research and industry efforts to develop systems for analyzing and verifying news spreading online. The SemEval-2023 Task 3 is an attempt to address several subtasks under this overarching problem, targeting writing techniques used in news articles to affect readers{‘} opinions. The task addressed three subtasks with six languages, in addition to three {“}surprise{”} test languages, resulting in 27 different test setups. This paper describes our participating system to this task. Our team is one of the 6 teams that successfully submitted runs for all setups. The official results show that our system is ranked among the top 3 systems for 10 out of the 27 setups.

@inproceedings{hasanain-etal-2023-qcri,
title = "{QCRI} at {S}em{E}val-2023 Task 3: News Genre, Framing and Persuasion Techniques Detection Using Multilingual Models",
author = "Hasanain, Maram and
El-Shangiti, Ahmed and
Nandi, Rabindra Nath and
Nakov, Preslav and
Alam, Firoj",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Da San Martino, Giovanni and
Tayyar Madabushi, Harish and
Kumar, Ritesh and
Sartori, Elisa},
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.semeval-1.172",
doi = "10.18653/v1/2023.semeval-1.172",
pages = "1237--1244",
abstract = "Misinformation spreading in mainstream and social media has been misleading users in different ways. Manual detection and verification efforts by journalists and fact-checkers can no longer cope with the great scale and quick spread of misleading information. This motivated research and industry efforts to develop systems for analyzing and verifying news spreading online. The SemEval-2023 Task 3 is an attempt to address several subtasks under this overarching problem, targeting writing techniques used in news articles to affect readers{'} opinions. The task addressed three subtasks with six languages, in addition to three {``}surprise{''} test languages, resulting in 27 different test setups. This paper describes our participating system to this task. Our team is one of the 6 teams that successfully submitted runs for all setups. The official results show that our system is ranked among the top 3 systems for 10 out of the 27 setups.",
}

F. Dalvi, N. Durrani, H. Sajjad, T. Jaban, M. Husaini, and U. Abbas, “NxPlain: a web-based tool for discovery of latent concepts,” in Proceedings of the 17th conference of the european chapter of the association for computational linguistics: system demonstrations, Dubrovnik, Croatia, 2023, p. 75–83.
[BibTeX]

@inproceedings{dalvi-etal-2023-nxplain,title = {{N}x{P}lain: A Web-based Tool for Discovery of Latent Concepts},author = {Dalvi, Fahim and Durrani, Nadir and Sajjad, Hassan and Jaban, Tamim and Husaini, Mus{'}ab and Abbas, Ummar},booktitle = {Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations},month = may,year = {2023},address = {Dubrovnik, Croatia},publisher = {Association for Computational Linguistics},pages = {75--83},area = {Demos and Tools}}

Y. Kheir, A. Ali, and S. Chowdhury, “Automatic pronunciation assessment-a review,” in Proc. of the 2023 conference on empirical methods in natural language processing (emnlp), 2023, p. 8304–8324.
[BibTeX]

@inproceedings{kheir2023automatic,
title={Automatic Pronunciation Assessment-A Review},
author={Kheir, Yassine and Ali, Ahmed and Chowdhury, Shammur},
booktitle={Proc. of the 2023 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
pages={8304--8324},
year={2023}
}

A. Barrón–Cedeño, F. Alam, T. Caselli, G. Da San Martino, T. Elsayed, A. Galassi, F. Haouari, F. Ruggeri, J. M. Struß, R. N. Nandi, and others, “The clef-2023 checkthat! lab: checkworthiness, subjectivity, political bias, factuality, and authority,” in European conference on information retrieval, 2023, p. 506–517.
[BibTeX]

@inproceedings{barron2023clef,
author = {Barr{\'o}n-Cede{\~n}o, Alberto and Alam, Firoj and Caselli, Tommaso and Da San Martino, Giovanni and Elsayed, Tamer and Galassi, Andrea and Haouari, Fatima and Ruggeri, Federico and Stru{\ss}, Julia Maria and Nandi, Rabindra Nath and others},
booktitle = {European Conference on Information Retrieval},
organization = {Springer},
pages = {506--517},
title = {The clef-2023 checkthat! lab: Checkworthiness, subjectivity, political bias, factuality, and authority},
year = {2023},
}

H. Sajjad, F. Dalvi, N. Durrani, and P. Nakov, “On the effect of dropping layers of pre-trained transformer models,” Computer speech and language, vol. 77, iss. C, p. 101429, 2023. doi:https://doi.org/10.1016/j.csl.2022.101429
[BibTeX] [Download PDF]

@article{sajjad2023:csl,address = {London, UK, UK},author = {Sajjad, Hassan and Dalvi, Fahim and Durrani, Nadir and Nakov, Preslav},issn = {0885-2308},doi = {https://doi.org/10.1016/j.csl.2022.101429},url = {https://www.sciencedirect.com/science/article/pii/S0885230822000596},issue_date = {January 2023},journal = {Computer Speech and Language},number = {C},publisher = {Academic Press Ltd.},title = {On the Effect of Dropping Layers of Pre-trained Transformer Models},volume = {77},pages = {101429},year = {2023},area = {Transfer Learning}}

Y. Fan, F. Dalvi, N. Durrani, and H. Sajjad, “Evaluating neuron interpretation methods of nlp models,” in Advances in neural information processing systems, 2023, p. 1–13.
[BibTeX]

@inproceedings{fan-et-al-2023-neuron-eval,author = {Fan, Yimin and Dalvi, Fahin and Durrani, Nadir and Sajjad, Hassan},booktitle = {Advances in Neural Information Processing Systems},editor = {Koyejo, S. and Mohamed, S. and Agarwal, A. and Belgrave, D. and Cho, K. and Oh, A.},pages = {1--13},publisher = {Curran Associates, Inc.},title = {Evaluating Neuron Interpretation Methods of NLP Models},volume = {36},year = {2023},area = {Neuron Analysis}}

N. Durrani, F. Dalvi, and H. Sajjad, “Discovering salient neurons in deep nlp models,” Journal of machine learning research, vol. 24, iss. 362, p. 1–40, 2023.
[BibTeX]

@article{JMLR:v24:23-0074,author = {Durrani, Nadir and Dalvi, Fahim and Sajjad, Hassan},title = {Discovering Salient Neurons in deep NLP models},journal = {Journal of Machine Learning Research},year = {2023},volume = {24},number = {362},pages = {1--40},area = {Neuron Analysis}}

@inproceedings{barron2023clef,
title={The clef-2023 checkthat! lab: Checkworthiness, subjectivity, political bias, factuality, and authority},
author={Barr{\'o}n-Cede{\~n}o, Alberto and Alam, Firoj and Caselli, Tommaso and Da San Martino, Giovanni and Elsayed, Tamer and Galassi, Andrea and Haouari, Fatima and Ruggeri, Federico and Stru{\ss}, Julia Maria and Nandi, Rabindra Nath and others},
booktitle={European Conference on Information Retrieval},
pages={506--517},
year={2023},
organization={Springer}
}

G. Da San Martino, F. Alam, M. Hasanain, R. N. Nandi, D. Azizov, and P. Nakov, “Overview of the CLEF-2023 CheckThat! lab task 3 on political bias of news articles and news media,” in Working notes of CLEF 2023 – conference and labs of the evaluation forum, 2023.
[BibTeX]

@inproceedings{clef-checkthat:2023:task3,
author = {Da San Martino, Giovanni and Alam, Firoj and Hasanain, Maram and Nandi, Rabindra Nath and Azizov, Dilshod and Nakov, Preslav},
booktitle = {Working Notes of {CLEF} 2023 - Conference and Labs of the Evaluation Forum},
crossref = {clef2023-workingnotes},
date-modified = {2024-08-03 12:30:07 +0300},
title = {Overview of the {CLEF}-2023 {CheckThat}! Lab Task 3 on Political Bias of News Articles and News Media},
year = {2023},
}

Y. E. Kheir, S. A. Chowdhury, and A. Ali, “Speechblender: speech augmentation framework for mispronunciation data generation,” in Proc. of the speech and language technology in education (slate), 2023.
[BibTeX]

@inproceedings{speechBlender2023,
author = {Yassine El Kheir and Shammur Absar Chowdhury and Ahmed Ali},
title = {Speechblender: Speech augmentation framework for mispronunciation data generation},
booktitle = {Proc. of the Speech and Language Technology in Education (SLaTE)},
year = {2023}
}

A. Galassi, F. Ruggeri, A. Barrón–Cedeño, F. Alam, T. Caselli, M. Kutlu, {. M. Struss, F. Antici, M. Hasanain, J. Köhler, K. Korre, F. Leistra, A. Muti, M. Siegel, T. Mehmet Deniz, M. Wiegand, and W. Zaghouani, “Overview of the CLEF-2023 CheckThat! lab task 2 on subjectivity in news articles.” 2023.
[BibTeX]

@inproceedings{clef-checkthat:2023:task2,
author = {Galassi, Andrea and Ruggeri, Federico and Barr\'{o}n-Cede\~{n}o, Alberto and Alam, Firoj and Caselli, Tommaso and Kutlu, Mucahid and Struss, {Julia Maria} and Antici, Francesco and Hasanain, Maram and K{\"o}hler, Juliane and Korre, Katerina and Leistra, Folkert and Muti, Arianna and Siegel, Melanie and Turkmen. {Mehmet Deniz} and Wiegand, Michael and Zaghouani, Wajdi},
crossref = {clef2023-workingnotes},
date-modified = {2024-08-03 12:29:30 +0300},
title = {Overview of the {CLEF}-2023 {CheckThat}! Lab Task 2 on Subjectivity in News Articles},
year = {2023},
}

@article{sajjad2023:csl,
address = {London, UK, UK},
author = {Sajjad, Hassan and Dalvi, Fahim and Durrani, Nadir and Nakov, Preslav},
issn = {0885-2308},
doi = {https://doi.org/10.1016/j.csl.2022.101429},
url = {https://www.sciencedirect.com/science/article/pii/S0885230822000596},
issue_date = {January 2023},
journal = {Computer Speech and Language},
number = {C},
publisher = {Academic Press Ltd.},
title = {On the Effect of Dropping Layers of Pre-trained Transformer Models},
volume = {77},
pages = {101429},
year = {2023},
area = {Transfer Learning}
}

H. Sajjad, N. Durrani, and F. Dalvi, “Neuron-level Interpretation of Deep NLP Models: A Survey,” Transactions of the association for computational linguistics, vol. 11, 2023.
[BibTeX] [Abstract]

The proliferation of deep neural networks in various domains has seen an increased need for interpretability of these models. Preliminary work done along this line and papers that surveyed such, are focused on high-level representation analysis. However, a recent branch of work has concentrated on interpretability at a more granular level of analyzing neurons within these models. In this paper, we survey the work done on neuron analysis including: i) methods to discover and understand neurons in a network, ii) evaluation methods, iii) major findings including cross architectural comparisons that neuron analysis has unraveled, iv) applications of neuron probing such as: controlling the model, domain adaptation etc., and v) a discussion on open issues and future research directions.

@article{sajjad-neuron-survey,
title = "Neuron-level {I}nterpretation of {D}eep {NLP} {M}odels: {A} {S}urvey",
author = "Sajjad, Hassan and
Durrani, Nadir and
Dalvi, Fahim",
journal = "Transactions of the Association for Computational Linguistics",
volume = "11",
year = "2023",
address = "Cambridge, MA",
publisher = "MIT Press",
abstract = "The proliferation of deep neural networks in various domains has seen an increased need for interpretability of these models. Preliminary work done along this line and papers that surveyed such, are focused on high-level representation analysis. However, a recent branch of work has concentrated on interpretability at a more granular level of analyzing neurons within these models. In this paper, we survey the work done on neuron analysis including: i) methods to discover and understand neurons in a network, ii) evaluation methods, iii) major findings including cross architectural comparisons that neuron analysis has unraveled, iv) applications of neuron probing such as: controlling the model, domain adaptation etc., and v) a discussion on open issues and future research directions.",
}

M. Hasanain, F. Alam, H. Mubarak, S. Abdaljalil, W. Zaghouani, P. Nakov, G. Da San Martino, and A. Freihat, “ArAIEval shared task: persuasion techniques and disinformation detection in Arabic text,” in Proceedings of arabicnlp 2023, Singapore (Hybrid), 2023, p. 483–493.
[BibTeX] [Download PDF]

@inproceedings{hasanain-etal-2023-araieval,
address = {Singapore (Hybrid)},
author = {Hasanain, Maram and Alam, Firoj and Mubarak, Hamdy and Abdaljalil, Samir and Zaghouani, Wajdi and Nakov, Preslav and Da San Martino, Giovanni and Freihat, Abed},
booktitle = {Proceedings of ArabicNLP 2023},
date-modified = {2024-08-03 11:44:50 +0300},
editor = {Sawaf, Hassan and El-Beltagy, Samhaa and Zaghouani, Wajdi and Magdy, Walid and Abdelali, Ahmed and Tomeh, Nadi and Abu Farha, Ibrahim and Habash, Nizar and Khalifa, Salam and Keleg, Amr and Haddad, Hatem and Zitouni, Imed and Mrini, Khalil and Almatham, Rawan},
month = dec,
pages = {483--493},
publisher = {Association for Computational Linguistics},
title = {{A}r{AIE}val Shared Task: Persuasion Techniques and Disinformation Detection in {A}rabic Text},
year = {2023},
url = {https://aclanthology.org/2023.arabicnlp-1.44},
bdsk-url-2 = {https://doi.org/10.18653/v1/2023.arabicnlp-1.44},
bibtex_show={true}
}

S. Boughorbel and M. Hawasly, “Analyzing multilingual competency of llms in multi-turn instruction following: a case study of arabic,” in Proceedings of arabicnlp 2023, 2023, p. 128–139.
[BibTeX]

@inproceedings{boughorbel2023analyzing,
title={Analyzing Multilingual Competency of LLMs in Multi-Turn Instruction Following: A Case Study of Arabic},
author={Boughorbel, Sabri and Hawasly, Majd},
booktitle={Proceedings of ArabicNLP 2023},
pages={128--139},
year={2023}
}

Y. Elshahawy, Y. E. Kheir, S. A. Chowdhury, and A. Ali, “Myvoice: arabic speech resource collaboration platform,” in Interspeech, 2023.
[BibTeX]

@inproceedings{elshahawy2023myvoice,
title={MyVoice: Arabic Speech Resource Collaboration Platform},
author={Elshahawy, Yousseif and Kheir, Yassine El and Chowdhury, Shammur Absar and Ali, Ahmed},
booktitle={INTERSPEECH},
year={2023}
}

Y. E. Kheir, F. Khnaisser, S. A. Chowdhury, H. Mubarak, S. Afzal, and A. Ali, “Qvoice: arabic speech pronunciation learning application,” in Interspeech, 2023.
[BibTeX]

@inproceedings{kheir2023qvoice,
title={QVoice: Arabic Speech Pronunciation Learning Application},
author={Kheir, Yassine El and Khnaisser, Fouad and Chowdhury, Shammur Absar and Mubarak, Hamdy and Afzal, Shazia and Ali, Ahmed},
booktitle={INTERSPEECH},
year={2023}
}

S. A. Chowdhury and A. Ali, “Multilingual word error rate estimation: e-wer3,” in Icassp 2023-2023 ieee international conference on acoustics, speech and signal processing (icassp), 2023.
[BibTeX]

@inproceedings{chowdhury2023multilingual,
title={MULTILINGUAL WORD ERROR RATE ESTIMATION: E-WER3},
author={Chowdhury, Shammur Absar and Ali, Ahmed},
booktitle={ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
year={2023}
}

@article{chowdhury2023end,
title={What do end-to-end speech models learn about speaker, language and channel information? a layer-wise and neuron-level analysis},
author={Chowdhury, Shammur Absar and Durrani, Nadir and Ali, Ahmed},
journal={Computer Speech \& Language},
volume={83},
pages={101539},
year={2023},
publisher={Elsevier}
}

Y. E. Kheir, S. A. Chowdhury, and A. Ali, “Multi-view multi-task representation learning for mispronunciation detection,” in Proc. of the speech and language technology in education (slate), 2023.
[BibTeX]

@inproceedings{multiview2023,
author = {Yassine El Kheir and Shammur Absar Chowdhury and Ahmed Ali},
title = {Multi-View Multi-Task Representation Learning for Mispronunciation Detection},
booktitle = {Proc. of the Speech and Language Technology in Education (SLaTE)},
year = {2023}
}

F. Alam, F. Dalvi, N. Durrani, H. Sajjad, Khan, A. Rafae, and J. Xu, “Conceptx: a framework for latent concept analysis,” in Proceedings of the thirty-seventh aaai conference on artificial intelligence (aaai, poster presentation), 2023, pp. 16395-16397.
[BibTeX]

@inproceedings{alam:2023:AAAI,title = {ConceptX: A Framework for Latent Concept Analysis},author = {Alam, Firoj and Dalvi, Fahim and Durrani, Nadir and Sajjad, Hassan and Khan and Rafae, Abdul and Xu, Jia},booktitle = {Proceedings of the Thirty-Seventh AAAI Conference on Artificial Intelligence (AAAI, Poster presentation)},year = {2023},month = feb,pages = {16395-16397},area = {Demos and Tools}}

2022

N. Durrani, H. Sajjad, F. Dalvi, and F. Alam, “On the transformation of latent space in fine-tuned nlp models,” in The 2022 conference on empirical methods in natural language processing, Abu Dhabi, United Arab Emirates, 2022.
[BibTeX]

@inproceedings{nadir:emnlp:2022,
title = "On the Transformation of Latent Space in Fine-Tuned NLP Models",
author = "Nadir Durrani and Hassan Sajjad and Fahim Dalvi and Firoj Alam",
booktitle = "The 2022 Conference on Empirical Methods in Natural Language Processing",
series={EMNLP~'22},
month = "dec",
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
}

S. Shaar, N. Georgiev, F. Alam, G. D. S. Martino, A. Mohamed, and P. Nakov, “Assisting the human fact-checkers: detecting all previously fact-checked claims in a document,” in The 2022 conference on empirical methods in natural language processing, Abu Dhabi, United Arab Emirates, 2022.
[BibTeX]

@inproceedings{shaar-etal-2022-assisting,
title = "Assisting the Human Fact-Checkers: Detecting All Previously Fact-Checked Claims in a Document",
author = "Shaden Shaar and Nikola Georgiev and Firoj Alam and Giovanni Da San Martino and Aisha Mohamed and Preslav Nakov",
booktitle = "The 2022 Conference on Empirical Methods in Natural Language Processing",
month = "dec",
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
}

A. Abdelali, N. Durrani, F. Dalvi, and H. Sajjad, “Post-hoc analysis of arabic transformer models,” in Proceedings of the fifth blackboxnlp workshop on analyzing and interpreting neural networks for nlp, Abu Dhabi, United Arab Emirates, 2022.
[BibTeX] [Abstract]

Arabic is a Semitic language which is widely spoken with many dialects. Given the success of pre-trained language models, many transformer models trained on Arabic and its dialects have surfaced. While there have been an extrinsic evaluation of these models with respect to downstream NLP tasks, no work has been carried out to analyze and compare their internal representations. We probe how linguistic information is encoded in the transformer models, trained on different Arabic dialects. We perform a layer and neuron analysis on the models using morphological tagging tasks for different dialects of Arabic and a dialectal identification task. Our analysis enlightens interesting findings such as: i) word morphology is learned at the lower and middle layers, ii) while syntactic dependencies are predominantly captured at the higher layers, iii) despite a large overlap in their vocabulary, the MSA-based models fail to capture the nuances of Arabic dialects, iv) we found that neurons in embedding layers are polysemous in nature, while the neurons in middle layers are exclusive to specific properties.

@inproceedings{abdelali-2021-arabic-transformers,
title = "Post-hoc analysis of Arabic transformer models",
author = "Abdelali, Ahmed and
Durrani, Nadir and
Dalvi, Fahim and
Sajjad, Hassan",
booktitle = "Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP",
month = "dec",
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
abstract = "Arabic is a Semitic language which is widely spoken with many dialects. Given the success of pre-trained language models, many transformer models trained on Arabic and its dialects have surfaced. While there have been an extrinsic evaluation of these models with respect to downstream NLP tasks, no work has been carried out to analyze and compare their internal representations. We probe how linguistic information is encoded in the transformer models, trained on different Arabic dialects. We perform a layer and neuron analysis on the models using morphological tagging tasks for different dialects of Arabic and a dialectal identification task. Our analysis enlightens interesting findings such as: i) word morphology is learned at the lower and middle layers, ii) while syntactic dependencies are predominantly captured at the higher layers, iii) despite a large overlap in their vocabulary, the MSA-based models fail to capture the nuances of Arabic dialects, iv) we found that neurons in embedding layers are polysemous in nature, while the neurons in middle layers are exclusive to specific properties.",
}

F. Alam, H. Mubarak, W. Zaghouani, G. D. S. Martino, and P. Nakov, “Overview of the wanlp 2022 shared task on propaganda detection in arabic,” in The seventh arabic natural language processing workshop (wanlp 2022) at emnlp 2022, Abu Dhabi, United Arab Emirates, 2022.
[BibTeX]

@inproceedings{alam-etal-2022-wanlp,
title = "Overview of the WANLP 2022 Shared Task on Propaganda Detection in Arabic",
author = "Firoj Alam and Hamdy Mubarak and Wajdi Zaghouani and Giovanni Da San Martino and Preslav Nakov",
booktitle = "The Seventh Arabic Natural Language Processing Workshop (WANLP 2022) at EMNLP 2022",
month = "dec",
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
}

H. Mubarak, S. A. Chowdhury, and F. Alam, “ArabGend: gender analysis and inference on Arabic Twitter,” in Proceedings of the eighth workshop on noisy user-generated text (w-nut 2022), Gyeongju, Republic of Korea, 2022, p. 124–135.
[BibTeX] [Download PDF]

@inproceedings{mubarak2022arabgend,
title = "{A}rab{G}end: Gender Analysis and Inference on {A}rabic {T}witter",
author = "Mubarak, Hamdy and
Chowdhury, Shammur Absar and
Alam, Firoj",
booktitle = "Proceedings of the Eighth Workshop on Noisy User-generated Text (W-NUT 2022)",
month = "oct",
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.wnut-1.14",
pages = "124--135",
}

F. Alam, S. Cresci, T. Chakraborty, F. Silvestri, D. Dimitrov, G. D. S. Martino, S. Shaar, H. Firooz, and P. Nakov, “A survey on multimodal disinformation detection,” in Proceedings of the 29th international conference on computational linguistics, Gyeongju, Republic of Korea, 2022, p. 6625–6643.
[BibTeX] [Abstract] [Download PDF]

Recent years have witnessed the proliferation of offensive content online such as fake news, propaganda, misinformation, and disinformation. While initially this was mostly about textual content, over time images and videos gained popularity, as they are much easier to consume, attract more attention, and spread further than text. As a result, researchers started leveraging different modalities and combinations thereof to tackle online multimodal offensive content. In this study, we offer a survey on the state-of-the-art on multimodal disinformation detection covering various combinations of modalities: text, images, speech, video, social media network structure, and temporal information. Moreover, while some studies focused on factuality, others investigated how harmful the content is. While these two components in the definition of disinformation {–} (i) factuality, and (ii) harmfulness {–}, are equally important, they are typically studied in isolation. Thus, we argue for the need to tackle disinformation detection by taking into account multiple modalities as well as both factuality and harmfulness, in the same framework. Finally, we discuss current challenges and future research directions.

@inproceedings{alam-etal-2022-survey,
title = "A Survey on Multimodal Disinformation Detection",
author = "Alam, Firoj and
Cresci, Stefano and
Chakraborty, Tanmoy and
Silvestri, Fabrizio and
Dimitrov, Dimiter and
Martino, Giovanni Da San and
Shaar, Shaden and
Firooz, Hamed and
Nakov, Preslav",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = "oct",
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.576",
pages = "6625--6643",
abstract = "Recent years have witnessed the proliferation of offensive content online such as fake news, propaganda, misinformation, and disinformation. While initially this was mostly about textual content, over time images and videos gained popularity, as they are much easier to consume, attract more attention, and spread further than text. As a result, researchers started leveraging different modalities and combinations thereof to tackle online multimodal offensive content. In this study, we offer a survey on the state-of-the-art on multimodal disinformation detection covering various combinations of modalities: text, images, speech, video, social media network structure, and temporal information. Moreover, while some studies focused on factuality, others investigated how harmful the content is. While these two components in the definition of disinformation {--} (i) factuality, and (ii) harmfulness {--}, are equally important, they are typically studied in isolation. Thus, we argue for the need to tackle disinformation detection by taking into account multiple modalities as well as both factuality and harmfulness, in the same framework. Finally, we discuss current challenges and future research directions.",
}

H. Sajjad, F. Alam, F. Dalvi, and N. Durrani, “Effect of post-processing on contextualized word representations,” in Proceedings of the 29th international conference on computational linguistics, Gyeongju, Republic of Korea, 2022, p. 3127–3142.
[BibTeX] [Download PDF]

@inproceedings{sajjad-etal-2022-effect,
title = {Effect of Post-processing on Contextualized Word Representations},
author = {Sajjad, Hassan and Alam, Firoj and Dalvi, Fahim and Durrani, Nadir},
booktitle = {Proceedings of the 29th International Conference on Computational Linguistics},
month = "oct",
year = {2022},
address = {Gyeongju, Republic of Korea},
publisher = {International Committee on Computational Linguistics},
url = {https://aclanthology.org/2022.coling-1.277},
pages = {3127--3142},
area = {Representation Analysis}
}

S. Shaar, F. Alam, G. Da San Martino, and P. Nakov, “The role of context in detecting previously fact-checked claims,” in Findings of the association for computational linguistics: naacl 2022, Seattle, United States, 2022, p. 1619–1631. doi:10.18653/v1/2022.findings-naacl.122
[BibTeX] [Abstract] [Download PDF]

Recent years have seen the proliferation of disinformation and fake news online. Traditional approaches to mitigate these issues is to use manual or automatic fact-checking. Recently, another approach has emerged: checking whether the input claim has previously been fact-checked, which can be done automatically, and thus fast, while also offering credibility and explainability, thanks to the human fact-checking and explanations in the associated fact-checking article. Here, we focus on claims made in a political debate and we study the impact of modeling the context of the claim: both on the source side, i.e., in the debate, as well as on the target side, i.e., in the fact-checking explanation document. We do this by modeling the local context, the global context, as well as by means of co-reference resolution, and multi-hop reasoning over the sentences of the document describing the fact-checked claim. The experimental results show that each of these represents a valuable information source, but that modeling the source-side context is most important, and can yield 10+ points of absolute improvement over a state-of-the-art model.

@inproceedings{shaar-etal-2022-role,
title = "The Role of Context in Detecting Previously Fact-Checked Claims",
author = "Shaar, Shaden and
Alam, Firoj and
Da San Martino, Giovanni and
Nakov, Preslav",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2022",
month = "jul",
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-naacl.122",
doi = "10.18653/v1/2022.findings-naacl.122",
pages = "1619--1631",
abstract = "Recent years have seen the proliferation of disinformation and fake news online. Traditional approaches to mitigate these issues is to use manual or automatic fact-checking. Recently, another approach has emerged: checking whether the input claim has previously been fact-checked, which can be done automatically, and thus fast, while also offering credibility and explainability, thanks to the human fact-checking and explanations in the associated fact-checking article. Here, we focus on claims made in a political debate and we study the impact of modeling the context of the claim: both on the source side, i.e., in the debate, as well as on the target side, i.e., in the fact-checking explanation document. We do this by modeling the local context, the global context, as well as by means of co-reference resolution, and multi-hop reasoning over the sentences of the document describing the fact-checked claim. The experimental results show that each of these represents a valuable information source, but that modeling the source-side context is most important, and can yield 10+ points of absolute improvement over a state-of-the-art model.",
}

H. Sajjad, N. Durrani, F. Dalvi, F. Alam, A. R. Khan, and J. Xu, “Analyzing encoded concepts in transformer language models,” in Proceedings of the 2022 conference of the north american chapter of the association for computational linguistics, Seattle, Washington, USA, 2022.
[BibTeX]

@inproceedings{sajjad:naacl:2022,
title = "Analyzing Encoded Concepts in Transformer Language Models",
author = "Hassan Sajjad and Nadir Durrani and Fahim Dalvi and Firoj Alam and Abdul Rafae Khan and Jia Xu",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics",
series={NAACL~'22},
month = "Jul",
year = "2022",
address = "Seattle, Washington, USA",
publisher = "Association for Computational Linguistics",
}

H. Mubarak, S. Hassan, S. A. Chowdhury, and F. Alam, “ArCovidVac: analyzing arabic tweets about COVID-19 vaccination,” in Proceedings of the language resources and evaluation conference, Marseille, France, 2022, p. 3220–3230.
[BibTeX] [Abstract] [Download PDF]

The emergence of the COVID-19 pandemic and the first global infodemic have changed our lives in many different ways. We relied on social media to get the latest information about COVID-19 pandemic and at the same time to disseminate information. The content in social media consisted not only health related advice, plans, and informative news from policymakers, but also contains conspiracies and rumors. It became important to identify such information as soon as they are posted to make an actionable decision (e.g., debunking rumors, or taking certain measures for traveling). To address this challenge, we develop and publicly release the first largest manually annotated Arabic tweet dataset, ArCovidVac, for COVID-19 vaccination campaign, covering many countries in the Arab region. The dataset is enriched with different layers of annotation, including, (i) Informativeness more vs. less importance of the tweets); (ii) fine-grained tweet content types (e.g., advice, rumors, restriction, authenticate news/information); and (iii) stance towards vaccination (pro-vaccination, neutral, anti-vaccination). Further, we performed in-depth analysis of the data, exploring the popularity of different vaccines, trending hashtags, topics, and presence of offensiveness in the tweets. We studied the data for individual types of tweets and temporal changes in stance towards vaccine. We benchmarked the ArCovidVac dataset using transformer architectures for informativeness, content types, and stance detection.

@InProceedings{mubarak-EtAl:2022:LREC,
author = {Mubarak, Hamdy and Hassan, Sabit and Chowdhury, Shammur Absar and Alam, Firoj},
title = {{ArCovidVac:} Analyzing Arabic Tweets About {COVID-19} Vaccination},
booktitle = {Proceedings of the Language Resources and Evaluation Conference},
month = {June},
year = {2022},
address = {Marseille, France},
publisher = {European Language Resources Association},
pages = {3220--3230},
abstract = {The emergence of the COVID-19 pandemic and the first global infodemic have changed our lives in many different ways. We relied on social media to get the latest information about COVID-19 pandemic and at the same time to disseminate information. The content in social media consisted not only health related advice, plans, and informative news from policymakers, but also contains conspiracies and rumors. It became important to identify such information as soon as they are posted to make an actionable decision (e.g., debunking rumors, or taking certain measures for traveling). To address this challenge, we develop and publicly release the first largest manually annotated Arabic tweet dataset, ArCovidVac, for COVID-19 vaccination campaign, covering many countries in the Arab region. The dataset is enriched with different layers of annotation, including, (i) Informativeness more vs. less importance of the tweets); (ii) fine-grained tweet content types (e.g., advice, rumors, restriction, authenticate news/information); and (iii) stance towards vaccination (pro-vaccination, neutral, anti-vaccination). Further, we performed in-depth analysis of the data, exploring the popularity of different vaccines, trending hashtags, topics, and presence of offensiveness in the tweets. We studied the data for individual types of tweets and temporal changes in stance towards vaccine. We benchmarked the ArCovidVac dataset using transformer architectures for informativeness, content types, and stance detection.},
url = {https://aclanthology.org/2022.lrec-1.344}
}

K. Huang, K. McKeown, P. Nakov, Y. Choi, and H. Ji, “Faking fake news for real fake news detection: propaganda-loaded training data generation,” Arxiv preprint arxiv:2203.05386, 2022.
[BibTeX]

@article{huang2022faking,
Author = {Huang, Kung-Hsiang and McKeown, Kathleen and Nakov, Preslav and Choi, Yejin and Ji, Heng},
Journal = {arXiv preprint arXiv:2203.05386},
Title = {Faking Fake News for Real Fake News Detection: Propaganda-loaded Training Data Generation},
Year = {2022}}

S. Muresan, P. Nakov, and A. Villavicencio, “Findings of the association for computational linguistics: acl 2022,” in Findings of the Association for Computational Linguistics: ACL 2022, 2022.
[BibTeX]

@inproceedings{muresan2022findings,
Author = {Muresan, Smaranda and Nakov, Preslav and Villavicencio, Aline},
Booktitle = {{Findings of the Association for Computational Linguistics: ACL 2022}},
Title = {Findings of the Association for Computational Linguistics: ACL 2022},
Year = {2022}}

S. Sharma, T. Suresh, A. Kulkarni, H. Mathur, P. Nakov, M. S. Akhtar, and T. Chakraborty, “Findings of the CONSTRAINT 2022 shared task on detecting the hero, the villain, and the victim in memes,” in Proceedings of the workshop on combating online hostile posts in regional languages during emergency situations, 2022, p. 1–11.
[BibTeX]

@inproceedings{sharma2022findings,
Author = {Sharma, Shivam and Suresh, Tharun and Kulkarni, Atharva and Mathur, Himanshi and Nakov, Preslav and Akhtar, Md Shad and Chakraborty, Tanmoy},
Booktitle = {Proceedings of the Workshop on Combating Online Hostile Posts in Regional Languages during Emergency Situations},
Pages = {1--11},
Title = {Findings of the {CONSTRAINT 2022} Shared Task on Detecting the Hero, the Villain, and the Victim in Memes},
Year = {2022}}

S. Sharma, T. Suresh, A. Jitendra, H. Mathur, and P. Nakov, “Findings of the constraint 2022 shared task on detecting the hero, the villain, and the victim in memes,” in Proceedings of the workshop on combating online hostile posts in regional languages during emergency situations-CONSTRAINT, 2022.
[BibTeX]

@inproceedings{sharma2022md,
Author = {Sharma, Shivam and Suresh, Tharun and Jitendra, Atharva and Mathur, Himanshi and Nakov, Preslav},
Booktitle = {Proceedings of the Workshop on Combating Online Hostile Posts in Regional Languages during Emergency Situations-{CONSTRAINT}},
Title = {Findings of the constraint 2022 shared task on detecting the hero, the villain, and the victim in memes},
Year = {2022}}

R. N. Nandi, F. Alam, and P. Nakov, “Detecting the role of an entity in harmful memes: techniques and their limitations,” in Proceedings of the workshop on combating online hostile posts in regional languages during emergency situations, 2022, p. 43–54.
[BibTeX]

@inproceedings{nandi2022detecting,
Author = {Nandi, Rabindra Nath and Alam, Firoj and Nakov, Preslav},
Booktitle = {Proceedings of the Workshop on Combating Online Hostile Posts in Regional Languages during Emergency Situations},
Pages = {43--54},
Title = {Detecting the Role of an Entity in Harmful Memes: Techniques and their Limitations},
Year = {2022}}

S. Masud, P. Pinkesh, A. Das, M. Gupta, P. Nakov, and T. Chakraborty, “Half-day tutorial on combating online hate speech: the role of content, networks, psychology, user behavior, etc.,” in Proceedings of the fifteenth acm international conference on web search and data mining, 2022, p. 1629–1631.
[BibTeX]

@inproceedings{masud2022half,
Author = {Masud, Sarah and Pinkesh, Pinkesh and Das, Amitava and Gupta, Manish and Nakov, Preslav and Chakraborty, Tanmoy},
Booktitle = {Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining},
Pages = {1629--1631},
Title = {Half-Day Tutorial on Combating Online Hate Speech: The Role of Content, Networks, Psychology, User Behavior, etc.},
Year = {2022}}

S. Muresan, P. Nakov, and A. Villavicencio, “Proceedings of the 60th annual meeting of the association for computational linguistics,” in Proceedings of the 60th annual meeting of the association for computational linguistics, 2022.
[BibTeX]

@inproceedings{muresan2022proceedings,
Author = {Muresan, Smaranda and Nakov, Preslav and Villavicencio, Aline},
Booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics},
Title = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics},
Year = {2022}
}

T. Chakraborty, M. S. Akhtar, K. Shu, R. H. Bernard, M. Liakata, P. Nakov, S. Sharma, C. Sharma, S. Kumar, Y. K. Atri, and others, “Proceedings of the workshop on combating online hostile posts in regional languages during emergency situations,” in Proceedings of the workshop on combating online hostile posts in regional languages during emergency situations, 2022.
[BibTeX]

@inproceedings{chakraborty2022proceedings,
Author = {Chakraborty, Tanmoy and Akhtar, Md Shad and Shu, Kai and Bernard, H Russell and Liakata, Maria and Nakov, Preslav and Sharma, Shivam and Sharma, Chhavi and Kumar, Shivani and Atri, Yash Kumar and others},
Booktitle = {Proceedings of the Workshop on Combating Online Hostile Posts in Regional Languages during Emergency Situations},
Title = {Proceedings of the Workshop on Combating Online Hostile Posts in Regional Languages during Emergency Situations},
Year = {2022}}

P. Nakov, F. Alam, Y. Zhang, A. Prakash, and F. Dalvi, “QCRI’s COVID-19 disinformation detector: a system to fight the COVID-19 infodemic in social media,” Arxiv preprint arxiv:2204.03506, 2022.
[BibTeX]

@article{nakov2022qcri,
Author = {Nakov, Preslav and Alam, Firoj and Zhang, Yifan and Prakash, Animesh and Dalvi, Fahim},
Journal = {arXiv preprint arXiv:2204.03506},
Title = {{QCRI's} {COVID-19} Disinformation Detector: A System to Fight the {COVID-19} Infodemic in Social Media},
Year = {2022}}

P. Nakov, G. Da San Martino, and F. Alam, “Fact-checking, fake news, propaganda, media bias, and the COVID-19 infodemic,” in Proceedings of the fifteenth acm international conference on web search and data mining, 2022, p. 1632–1634.
[BibTeX]

@inproceedings{nakov2022fact,
Author = {Nakov, Preslav and Da San Martino, Giovanni and Alam, Firoj},
Booktitle = {Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining},
Pages = {1632--1634},
Title = {Fact-Checking, Fake News, Propaganda, Media Bias, and the {COVID-19} Infodemic},
Year = {2022}}

I. Hamed, A. Hussein, O. Chellah, S. Chowdhury, H. Mubarak, S. Sitaram, N. Habash, and A. Ali, “Benchmarking evaluation metrics for code-switching automatic speech recognition,” in Proc. of the 2022 ieee spoken language technology workshop (slt), 2022, p. 999–1005.
[BibTeX]

@inproceedings{hamed2023benchmarking,
title={Benchmarking Evaluation Metrics for Code-Switching Automatic Speech Recognition},
author={Hamed, Injy and Hussein, Amir and Chellah, Oumnia and Chowdhury, Shammur and Mubarak, Hamdy and Sitaram, Sunayana and Habash, Nizar and Ali, Ahmed},
booktitle={Proc. of the 2022 IEEE Spoken Language Technology Workshop (SLT)},
pages={999--1005},
year={2022},
organization={IEEE}
}

S. Sharma, F. Alam, M. S. Akhtar, D. Dimitrov, G. Da San Martino, H. Firooz, A. Halevy, F. Silvestri, P. Nakov, and T. Chakraborty, “Detecting and understanding harmful memes: a survey,” in Proceedings of the thirty-first international joint conference on artificial intelligence, Vienna, Austria, 2022, p. 5597–5606. doi:10.24963/ijcai.2022/781
[BibTeX] [Download PDF]

@inproceedings{sharma2022detecting,
title = {Detecting and Understanding Harmful Memes: A Survey},
author = {Sharma, Shivam and Alam, Firoj and Akhtar, Md. Shad and Dimitrov, Dimitar and Da San Martino, Giovanni and Firooz, Hamed and Halevy, Alon and Silvestri, Fabrizio and Nakov, Preslav and Chakraborty, Tanmoy},
booktitle = {Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence},
series = {IJCAI~'22},
publisher = {International Joint Conferences on Artificial Intelligence Organization},
editor = {Lud De Raedt},
pages = {5597--5606},
year = {2022},
month = {7},
note = {Survey Track},
address = {Vienna, Austria},
doi = {10.24963/ijcai.2022/781},
url = {https://doi.org/10.24963/ijcai.2022/781},
}

P. Nakov, “Computational linguistics for subjectivity,” Creating a more transparent internet: the perspective web, p. 31, 2022.
[BibTeX]

@article{nakov2022computational,
Author = {Nakov, Preslav},
Journal = {Creating a More Transparent Internet: The Perspective Web},
Pages = {31},
Publisher = {Cambridge University Press},
Title = {Computational Linguistics for Subjectivity},
Year = {2022}
}

K. Ahmad, F. Alam, J. Qadir, B. Qolomany, I. Khan, T. Khan, M. Suleman, N. Said, S. Z. Hassan, A. Gul, and others, “Global user-level perception of COVID-19 contact tracing applications: data-driven approach using natural language processing,” Jmir formative research, vol. 6, iss. 5, p. e36238, 2022.
[BibTeX]

@article{ahmad2022global,
title={Global User-Level Perception of {COVID-19} Contact Tracing Applications: Data-Driven Approach Using Natural Language Processing},
author={Ahmad, Kashif and Alam, Firoj and Qadir, Junaid and Qolomany, Basheer and Khan, Imran and Khan, Talhat and Suleman, Muhammad and Said, Naina and Hassan, Syed Zohaib and Gul, Asma and others},
journal={JMIR Formative Research},
volume={6},
number={5},
pages={e36238},
year={2022},
publisher={JMIR Publications Inc., Toronto, Canada}
}

P. Nakov, A. Barrón–Cedeño, G. Da San Martino, F. Alam, J. M. Struß, T. Mandl, R. M{‘i}guez, T. Caselli, M. Kutlu, W. Zaghouani, and others, “The CLEF-2022 CheckThat! lab on fighting the COVID-19 infodemic and fake news detection,” in European conference on information retrieval, 2022, p. 416–428.
[BibTeX]

@inproceedings{nakov2022clef,
title={The {CLEF-2022 CheckThat!} lab on fighting the {COVID-19} infodemic and fake news detection},
author={Nakov, Preslav and Barr{\'o}n-Cede{\~n}o, Alberto and Da San Martino, Giovanni and Alam, Firoj and Stru{\ss}, Julia Maria and Mandl, Thomas and M{\'\i}guez, Rub{\'e}n and Caselli, Tommaso and Kutlu, Mucahid and Zaghouani, Wajdi and others},
booktitle={European Conference on Information Retrieval},
pages={416--428},
year={2022},
organization={Springer}
}

A. Hussein, S. A. Chowdhury, A. Abdelali, N. Dehak, A. Ali, and S. Khudanpur, “Textual data augmentation for arabic-english code-switching speech recognition,” in Proc. of the 2022 ieee spoken language technology workshop (slt), 2022, p. 777–784.
[BibTeX]

@inproceedings{hussein2023textual,
title={Textual Data Augmentation for Arabic-English Code-Switching Speech Recognition},
author={Hussein, Amir and Chowdhury, Shammur Absar and Abdelali, Ahmed and Dehak, Najim and Ali, Ahmed and Khudanpur, Sanjeev},
booktitle={Proc. of the 2022 IEEE Spoken Language Technology Workshop (SLT)},
pages={777--784},
year={2022},
organization={IEEE}
}

@inproceedings{nakov2022fact,
title={Fact-Checking, Fake News, Propaganda, Media Bias, and the {COVID-19} Infodemic},
author={Nakov, Preslav and Da San Martino, Giovanni and Alam, Firoj},
booktitle={Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining},
pages={1632--1634},
year={2022}
}

F. Dalvi, A. R. Khan, F. Alam, N. Durrani, J. Xu, and H. Sajjad, “Discovering latent concepts learned in BERT,” in International conference on learning representations, 2022.
[BibTeX] [Download PDF]

@inproceedings{dalvi2022discovering,
title={Discovering Latent Concepts Learned in {BERT}},
author={Fahim Dalvi and Abdul Rafae Khan and Firoj Alam and Nadir Durrani and Jia Xu and Hassan Sajjad},
booktitle={International Conference on Learning Representations},
year={2022},
url={https://openreview.net/forum?id=POTMtpYI1xH}
}

H. Mubarak, S. Hassan, and S. A. Chowdhury, “Emojis as anchors to detect arabic offensive language and hate speech,” Natural language engineering (nle) journal, 2022.
[BibTeX]

@article{mubarak2022emojis,
title={Emojis as anchors to detect arabic offensive language and hate speech},
author={Mubarak, Hamdy and Hassan, Sabit and Chowdhury, Shammur Absar},
journal={Natural Language Engineering (NLE) Journal},
year={2022}
}

S. P. Bayerl, G. Roccabruna, S. A. Chowdhury, T. Ciulli, M. Danieli, K. Riedhammer, and G. Riccardi, “What can speech and language tell us about the working alliance in psychotherapy,” in Proc. of the 23rd Annual Conference of the International Speech Communication Association (INTERSPEECH), 2022.
[BibTeX]

@inproceedings{bayerl2022can,
title={What can Speech and Language Tell us About the Working Alliance in Psychotherapy},
author={Bayerl, Sebastian P and Roccabruna, Gabriel and Chowdhury, Shammur Absar and Ciulli, Tommaso and Danieli, Morena and Riedhammer, Korbinian and Riccardi, Giuseppe},
booktitle = {{{{Proc. of the 23rd Annual Conference of the International Speech Communication Association (INTERSPEECH)}}}},
year={2022}
}

R. Zamparelli, S. Chowdhury, D. Brunato, C. Chesi, F. Dell’Orletta, M. A. Hasan, and G. Venturi, “Semeval-2022 task 3: pretens-evaluating neural networks on presuppositional semantic knowledge,” in Proc. of the 16th international workshop on semantic evaluation (semeval-2022), 2022, p. 228–238.
[BibTeX]

@inproceedings{zamparelli2022semeval,
title={SemEval-2022 Task 3: PreTENS-Evaluating Neural Networks on Presuppositional Semantic Knowledge},
author={Zamparelli, Roberto and Chowdhury, Shammur and Brunato, Dominique and Chesi, Cristiano and Dell’Orletta, Felice and Hasan, Md Arid and Venturi, Giulia},
booktitle={Proc. of the 16th International Workshop on Semantic Evaluation (SemEval-2022)},
pages={228--238},
year={2022}
}

I. Hamed, A. Hussein, O. Chellah, S. Chowdhury, H. Mubarak, S. Sitaram, N. Habash, and A. Ali, “Benchmarking evaluation metrics for code-switching automatic speech recognition,” in 2022 ieee spoken language technology workshop (slt), 2022.
[BibTeX]

@inproceedings{hamed2022benchmarking,
title={Benchmarking Evaluation Metrics for Code-Switching Automatic Speech Recognition},
author={Hamed, Injy and Hussein, Amir and Chellah, Oumnia and Chowdhury, Shammur and Mubarak, Hamdy and Sitaram, Sunayana and Habash, Nizar and Ali, Ahmed},
booktitle={2022 IEEE Spoken Language Technology Workshop (SLT)},
year={2022}
}

R. N. Nandi, F. Alam, and P. Nakov, “Teamx@ dravidianlangtech-acl2022: a comparative analysis for troll-based meme classification,” in Proceedings of the second workshop on speech and language technologies for dravidian languages, 2022, p. 79–85.
[BibTeX]

@inproceedings{nandi2022teamx,
Author = {Nandi, Rabindra Nath and Alam, Firoj and Nakov, Preslav},
Booktitle = {Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages},
Pages = {79--85},
Title = {TeamX@ DravidianLangTech-ACL2022: A Comparative Analysis for Troll-Based Meme Classification},
Year = {2022}}

2021

S. Hassan, H. Mubarak, A. Abdelali, and K. Darwish, “ASAD: Arabic social media analytics and unDerstanding,” in Proceedings of the 16th conference of the european chapter of the association for computational linguistics: system demonstrations, Online, 2021, p. 113–118.
[BibTeX] [Abstract] [Download PDF]

This system demonstration paper describes ASAD: Arabic Social media Analysis and unDerstanding, a suite of seven individual modules that allows users to determine dialects, sentiment, news category, offensiveness, hate speech, adult content, and spam in Arabic tweets. The suite is made available through a web API and a web interface where users can enter text or upload files.

@inproceedings{hassan-etal-2021-asad,
title = "{ASAD}: {A}rabic Social media Analytics and un{D}erstanding",
author = "Hassan, Sabit and
Mubarak, Hamdy and
Abdelali, Ahmed and
Darwish, Kareem",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations",
month = "apr",
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/2021.eacl-demos.14",
pages = "113--118",
abstract = "This system demonstration paper describes ASAD: Arabic Social media Analysis and unDerstanding, a suite of seven individual modules that allows users to determine dialects, sentiment, news category, offensiveness, hate speech, adult content, and spam in Arabic tweets. The suite is made available through a web API and a web interface where users can enter text or upload files.",
}

B. J. Jansen, S. Jung, S. A. Chowdhury, and J. Salminen, “Persona analytics: analyzing the stability of online segments and content interests over time using non-negative matrix factorization,” Expert systems with applications, vol. 185, p. 115611, 2021.
[BibTeX]

@article{jansen2021persona,
title={Persona analytics: Analyzing the stability of online segments and content interests over time using non-negative matrix factorization},
author={Jansen, Bernard J and Jung, Soon-gyo and Chowdhury, Shammur A and Salminen, Joni},
journal={Expert Systems with Applications},
volume={185},
pages={115611},
year={2021},
publisher={Pergamon}
}

P. Nakov, G. Da San Martino, T. Elsayed, A. Barrón–Cedeno, R. M{‘i}guez, S. Shaar, F. Alam, F. Haouari, M. Hasanain, N. Babulkov, and others, “The clef-2021 checkthat! lab on detecting check-worthy claims, previously fact-checked claims, and fake news,” in European conference on information retrieval, 2021, p. 639–649.
[BibTeX]

@inproceedings{nakov2021clef,
Author = {Nakov, Preslav and Da San Martino, Giovanni and Elsayed, Tamer and Barr{\'o}n-Cedeno, Alberto and M{\'\i}guez, Rub{\'e}n and Shaar, Shaden and Alam, Firoj and Haouari, Fatima and Hasanain, Maram and Babulkov, Nikolay and others},
Booktitle = {European Conference on Information Retrieval},
Organization = {Springer},
Pages = {639--649},
Title = {The CLEF-2021 CheckThat! lab on detecting check-worthy claims, previously fact-checked claims, and fake news},
Year = {2021}}

S. Shaar, M. Hasanain, B. Hamdan, Z. S. Ali, F. Haouari, A. Nikolov, M. Kutlu, Y. S. Kartal, F. Alam, R. M. J. B. T. E. Da San Martino Giovanni Alberto Barró n–Cede no, and P. Nakov, “Overview of the clef-2021 checkthat! lab task 1 on check-worthiness estimation in tweets and political debates,” Working notes of clef, 2021.
[BibTeX]

@article{shaar2021alberto,
Author = {Shaar, Shaden and Hasanain, Maram and Hamdan, Bayan and Ali, Zien Sheikh and Haouari, Fatima and Nikolov, Alex and Kutlu, Mucahid and Kartal, Yavuz Selim and Alam, Firoj and Da San Martino, Giovanni Alberto Barr{\'o} n-Cede no, Rub{\'e}n M{\'\i}guez, Javier Beltr{\'a}n, Tamer Elsayed, and Preslav Nakov},
Journal = {Working Notes of CLEF},
Title = {Overview of the CLEF-2021 CheckThat! Lab Task 1 on Check-Worthiness Estimation in Tweets and Political Debates},
Year = {2021}}

K. Bozhanova, Y. Dinkov, I. Koychev, M. Castaldo, T. Venturini, and P. Nakov, “Predicting the factuality of reporting of news media using observations about user attention in their youtube channels,” in Proceedings of the international conference on recent advances in natural language processing (ranlp 2021), 2021, p. 182–189.
[BibTeX]

@inproceedings{bozhanova2021predicting,
Author = {Bozhanova, Krasimira and Dinkov, Yoan and Koychev, Ivan and Castaldo, Maria and Venturini, Tommaso and Nakov, Preslav},
Booktitle = {Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)},
Pages = {182--189},
Title = {Predicting the Factuality of Reporting of News Media Using Observations about User Attention in Their YouTube Channels},
Year = {2021}}

B. Babych, O. Kanishcheva, P. Nakov, J. Piskorski, L. Pivovarova, V. Starko, J. Steinberger, R. Yangarber, M. Marcińczuk, S. Pollak, and others, “Proceedings of the 8th workshop on balto-slavic natural language processing,” in Proceedings of the 8th workshop on balto-slavic natural language processing, 2021.
[BibTeX]

@inproceedings{babych2021proceedings,
Author = {Babych, Bogdan and Kanishcheva, Olga and Nakov, Preslav and Piskorski, Jakub and Pivovarova, Lidia and Starko, Vasyl and Steinberger, Josef and Yangarber, Roman and Marci{\'n}czuk, Micha{\l} and Pollak, Senja and others},
Booktitle = {Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing},
Title = {Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing},
Year = {2021}}

A. Feldman, G. Da San Martino, C. Leberknight, and P. Nakov, “Proceedings of the fourth workshop on nlp for internet freedom: censorship, disinformation, and propaganda,” in Proceedings of the fourth workshop on nlp for internet freedom: censorship, disinformation, and propaganda, 2021.
[BibTeX]

@inproceedings{feldman2021proceedings,
Author = {Feldman, Anna and Da San Martino, Giovanni and Leberknight, Chris and Nakov, Preslav},
Booktitle = {Proceedings of the Fourth Workshop on NLP for Internet Freedom: Censorship, Disinformation, and Propaganda},
Title = {Proceedings of the Fourth Workshop on NLP for Internet Freedom: Censorship, Disinformation, and Propaganda},
Year = {2021}}

V. Nastase, S. Szpakowicz, P. Nakov, and D. Ó. Séagdha, “Relations between nominals, relations between concepts,” in Semantic relations between nominals, second edition, Springer, 2021, p. 11–30.
[BibTeX]

@incollection{nastase2021relations,
Author = {Nastase, Vivi and Szpakowicz, Stan and Nakov, Preslav and S{\'e}agdha, Diarmuid {\'O}},
Booktitle = {Semantic Relations Between Nominals, Second Edition},
Pages = {11--30},
Publisher = {Springer},
Title = {Relations Between Nominals, Relations Between Concepts},
Year = {2021}}

V. Nastase, S. Szpakowicz, P. Nakov, and D. Ó. Séagdha, “Semantic relations and deep learning,” in Semantic relations between nominals, second edition, Springer, 2021, p. 107–164.
[BibTeX]

@incollection{nastase2021semantic,
Author = {Nastase, Vivi and Szpakowicz, Stan and Nakov, Preslav and S{\'e}agdha, Diarmuid {\'O}},
Booktitle = {Semantic Relations Between Nominals, Second Edition},
Pages = {107--164},
Publisher = {Springer},
Title = {Semantic Relations and Deep Learning},
Year = {2021}}

V. Nastase, S. Szpakowicz, P. Nakov, and D. Ó. Séagdha, “Semantic relations between nominals,” Synthesis lectures on human language technologies, vol. 14, iss. 1, p. 1–234, 2021.
[BibTeX]

@article{nastase2021semantic2,
Author = {Nastase, Vivi and Szpakowicz, Stan and Nakov, Preslav and S{\'e}agdha, Diarmuid {\'O}},
Journal = {Synthesis lectures on human language technologies},
Number = {1},
Pages = {1--234},
Publisher = {Morgan \& Claypool Publishers},
Title = {Semantic relations between nominals},
Volume = {14},
Year = {2021}}

D. Dimitrov, B. B. Ali, S. Shaar, F. Alam, F. Silvestri, H. Firooz, P. Nakov, and G. Da San Martino, “Semeval-2021 task 6: detection of persuasion techniques in texts and images,” in Proceedings of the 15th international workshop on semantic evaluation (semeval-2021), 2021, p. 70–98.
[BibTeX]

@inproceedings{dimitrov2021semeval,
Author = {Dimitrov, Dimitar and Ali, Bishr Bin and Shaar, Shaden and Alam, Firoj and Silvestri, Fabrizio and Firooz, Hamed and Nakov, Preslav and Da San Martino, Giovanni},
Booktitle = {Proceedings of the 15th International Workshop on Semantic Evaluation (SemEval-2021)},
Pages = {70--98},
Title = {SemEval-2021 Task 6: Detection of Persuasion Techniques in Texts and Images},
Year = {2021}}

M. Zampieri and P. Nakov, Similar languages, varieties, and dialects: a computational perspective, Cambridge university press, 2021.
[BibTeX]

@book{zampieri2021similar,
Author = {Zampieri, Marcos and Nakov, Preslav},
Publisher = {Cambridge University Press},
Title = {Similar Languages, Varieties, and Dialects: A Computational Perspective},
Year = {2021}}

J. Piskorski, B. Babych, Z. Kancheva, O. Kanishcheva, M. Lebedeva, M. Marcińczuk, P. Nakov, P. Osenova, L. Pivovarova, S. Pollak, and others, “Slav-ner: the 3rd cross-lingual challenge on recognition, normalization, classification, and linking of named entities across slavic languages,” in Proceedings of the 8th workshop on balto-slavic natural language processing, 2021, p. 122–133.
[BibTeX]

@inproceedings{piskorski2021slav,
Author = {Piskorski, Jakub and Babych, Bogdan and Kancheva, Zara and Kanishcheva, Olga and Lebedeva, Maria and Marci{\'n}czuk, Micha{\l} and Nakov, Preslav and Osenova, Petya and Pivovarova, Lidia and Pollak, Senja and others},
Booktitle = {Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing},
Pages = {122--133},
Title = {Slav-NER: the 3rd Cross-lingual Challenge on Recognition, Normalization, Classification, and Linking of Named Entities across Slavic Languages},
Year = {2021}}

S. Rosenthal, P. Atanasova, G. Karadzhov, M. Zampieri, and P. Nakov, “Solid: a large-scale semi-supervised dataset for offensive language identification,” in Findings of the association for computational linguistics: ACL-IJCNLP 2021, 2021, p. 915–928.
[BibTeX]

@inproceedings{rosenthal2021solid,
Author = {Rosenthal, Sara and Atanasova, Pepa and Karadzhov, Georgi and Zampieri, Marcos and Nakov, Preslav},
Booktitle = {Findings of the Association for Computational Linguistics: {ACL-IJCNLP} 2021},
Pages = {915--928},
Title = {SOLID: A Large-Scale Semi-Supervised Dataset for Offensive Language Identification},
Year = {2021}}

A. Chernyavskiy, D. Ilvovsky, and P. Nakov, “Transformers:“the end of history” for natural language processing?,” in Joint european conference on machine learning and knowledge discovery in databases, 2021, p. 677–693.
[BibTeX]

@inproceedings{chernyavskiy2021transformers,
Author = {Chernyavskiy, Anton and Ilvovsky, Dmitry and Nakov, Preslav},
Booktitle = {Joint European Conference on Machine Learning and Knowledge Discovery in Databases},
Organization = {Springer},
Pages = {677--693},
Title = {Transformers:``The End of History'' for Natural Language Processing?},
Year = {2021}}

K. Hristakieva, S. Cresci, G. D. S. Martino, M. Conti, and P. Nakov, “The spread of propaganda by coordinated communities on social media,” Arxiv preprint arxiv:2109.13046, 2021.
[BibTeX]

@article{hristakieva2021spread,
Author = {Hristakieva, Kristina and Cresci, Stefano and Martino, Giovanni Da San and Conti, Mauro and Nakov, Preslav},
Journal = {arXiv preprint arXiv:2109.13046},
Title = {The spread of propaganda by coordinated communities on social media},
Year = {2021}}

S. Pramanick, S. Sharma, D. Dimitrov, M. S. Akhtar, P. Nakov, and T. Chakraborty, “Momenta: a multimodal framework for detecting harmful memes and their targets,” in Findings of the association for computational linguistics: emnlp 2021, 2021, p. 4439–4455.
[BibTeX]

@inproceedings{pramanick2021momenta,
Author = {Pramanick, Shraman and Sharma, Shivam and Dimitrov, Dimitar and Akhtar, Md Shad and Nakov, Preslav and Chakraborty, Tanmoy},
Booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2021},
Pages = {4439--4455},
Title = {MOMENTA: A Multimodal Framework for Detecting Harmful Memes and Their Targets},
Year = {2021}}

R. Dangovski, M. Shen, D. Byrd, L. Jing, D. Tsvetkova, P. Nakov, and M. Soljačić, “We can explain your research in layman’s terms: towards automating science journalism at scale,” in Proceedings of the aaai conference on artificial intelligence, 2021, p. 12728–12737.
[BibTeX]

@inproceedings{dangovski2021we,
Author = {Dangovski, Rumen and Shen, Michelle and Byrd, Dawson and Jing, Li and Tsvetkova, Desislava and Nakov, Preslav and Solja{\v{c}}i{\'c}, Marin},
Booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
Number = {14},
Pages = {12728--12737},
Title = {We Can Explain Your Research in Layman's Terms: Towards Automating Science Journalism at Scale},
Volume = {35},
Year = {2021}}

A. Chernyavskiy, D. Ilvovsky, and P. Nakov, “Whatthewikifact: fact-checking claims against wikipedia,” in Proceedings of the 30th acm international conference on information & knowledge management, 2021, p. 4690–4695.
[BibTeX]

@inproceedings{chernyavskiy2021whatthewikifact,
Author = {Chernyavskiy, Anton and Ilvovsky, Dmitry and Nakov, Preslav},
Booktitle = {Proceedings of the 30th ACM International Conference on Information \& Knowledge Management},
Pages = {4690--4695},
Title = {WhatTheWikiFact: Fact-Checking Claims Against Wikipedia},
Year = {2021}}

K. Darwish, A. Abdelali, H. Mubarak, and M. Eldesouki, “Arabic diacritic recovery using a feature-rich bilstm model,” Transactions on asian and low-resource language information processing, vol. 20, iss. 2, p. 1–18, 2021.
[BibTeX]

@article{darwish2021arabic,
title={Arabic diacritic recovery using a feature-rich bilstm model},
author={Darwish, Kareem and Abdelali, Ahmed and Mubarak, Hamdy and Eldesouki, Mohamed},
journal={Transactions on Asian and Low-Resource Language Information Processing},
volume={20},
number={2},
pages={1--18},
year={2021},
publisher={ACM New York, NY}
}

K. Darwish, N. Habash, M. Abbas, H. Al–Khalifa, H. T. Al–Natsheh, H. Bouamor, K. Bouzoubaa, V. Cavalli–Sforza, S. R. El–Beltagy, W. El–Hajj, and others, “A panoramic survey of natural language processing in the arab world,” Communications of the acm, vol. 64, iss. 4, p. 72–81, 2021.
[BibTeX]

@article{darwish2021panoramic,
title={A panoramic survey of natural language processing in the Arab world},
author={Darwish, Kareem and Habash, Nizar and Abbas, Mourad and Al-Khalifa, Hend and Al-Natsheh, Huseein T and Bouamor, Houda and Bouzoubaa, Karim and Cavalli-Sforza, Violetta and El-Beltagy, Samhaa R and El-Hajj, Wassim and others},
journal={Communications of the ACM},
volume={64},
number={4},
pages={72--81},
year={2021},
publisher={ACM New York, NY, USA}
}

F. Harrag, M. Debbah, K. Darwish, and A. Abdelali, “Bert transformer model for detecting arabic gpt2 auto-generated tweets,” Arxiv preprint arxiv:2101.09345, 2021.
[BibTeX]

@article{harrag2021bert,
title={Bert transformer model for detecting Arabic GPT2 auto-generated tweets},
author={Harrag, Fouzi and Debbah, Maria and Darwish, Kareem and Abdelali, Ahmed},
journal={arXiv preprint arXiv:2101.09345},
year={2021}
}

A. Abdelali, S. Hassan, H. Mubarak, K. Darwish, and Y. Samih, “Pre-training bert on arabic tweets: practical considerations,” Arxiv preprint arxiv:2102.10684, 2021.
[BibTeX]

@article{abdelali2021pre,
title={Pre-Training BERT on Arabic Tweets: Practical Considerations},
author={Abdelali, Ahmed and Hassan, Sabit and Mubarak, Hamdy and Darwish, Kareem and Samih, Younes},
journal={arXiv preprint arXiv:2102.10684},
year={2021}
}

Y. Samih and K. Darwish, “A few topical tweets are enough for effective user stance detection,” in Proceedings of the 16th conference of the european chapter of the association for computational linguistics: main volume, 2021, p. 2637–2646.
[BibTeX]

@inproceedings{samih2021few,
title={A Few Topical Tweets are Enough for Effective User Stance Detection},
author={Samih, Younes and Darwish, Kareem},
booktitle={Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume},
pages={2637--2646},
year={2021}
}

A. Abdelali, H. Mubarak, Y. Samih, S. Hassan, and K. Darwish, “Qadi: arabic dialect identification in the wild,” in Proceedings of the sixth arabic natural language processing workshop, 2021, p. 1–10.
[BibTeX]

@inproceedings{abdelali2021qadi,
title={QADI: Arabic Dialect Identification in the Wild},
author={Abdelali, Ahmed and Mubarak, Hamdy and Samih, Younes and Hassan, Sabit and Darwish, Kareem},
booktitle={Proceedings of the Sixth Arabic Natural Language Processing Workshop},
pages={1--10},
year={2021}
}

F. Alam, F. Dalvi, S. Shaar, N. Durrani, H. Mubarak, A. Nikolov, G. D. S. Martino, A. Abdelali, H. Sajjad, K. Darwish, and P. Nakov, “Fighting the covid-19 infodemic in social media: a holistic perspective and a call to arms,” in Icwsm, 2021.
[BibTeX]

@inproceedings{alam2021icwsmfighting,
title={Fighting the COVID-19 Infodemic in Social Media: A Holistic Perspective and a Call to Arms},
author={Firoj Alam and Fahim Dalvi and Shaden Shaar and Nadir Durrani and Hamdy Mubarak and Alex Nikolov and Giovanni Da San Martino and Ahmed Abdelali and Hassan Sajjad and Kareem Darwish and Preslav Nakov},
year={2021},
booktitle={ICWSM},
}

F. Alam, U. Qazi, M. Imran, and F. Ofli, “HumAID: human-annotated disaster incidents data from twitter with deep learning benchmarks,” in Icwsm, 2021.
[BibTeX]

@inproceedings{alam2021humaid,
title={{HumAID}: Human-Annotated Disaster Incidents Data from Twitter with Deep Learning Benchmarks},
author={F. {Alam} and U. {Qazi} and M. {Imran} and F. {Ofli}},
booktitle={ICWSM},
year={2021}
}

F. Alam, H. Sajjad, M. Imran, and F. Ofli, “Crisisbench: benchmarking crisis-related social media datasets for humanitarian information processing,” in Icwsm, 2021.
[BibTeX]

@inproceedings{alam2020standardizing,
title={CrisisBench: Benchmarking Crisis-related Social Media Datasets for Humanitarian Information Processing},
author={Alam, Firoj and Sajjad, Hassan and Imran, Muhammad and Ofli, Ferda},
booktitle={ICWSM},
year={2021}
}

P. Nakov, G. Da San Martino, T. Elsayed, A. Barrón–Cedeño, R. M{‘i}guez, S. Shaar, F. Alam, F. Haouari, M. Hasanain, W. Mansour, and others, “Overview of the clef–2021 checkthat! lab on detecting check-worthy claims, previously fact-checked claims, and fake news,” in International conference of the cross-language evaluation forum for european languages, 2021, p. 264–291.
[BibTeX]

@inproceedings{nakov2021overview,
Author = {Nakov, Preslav and Da San Martino, Giovanni and Elsayed, Tamer and Barr{\'o}n-Cede{\~n}o, Alberto and M{\'\i}guez, Rub{\'e}n and Shaar, Shaden and Alam, Firoj and Haouari, Fatima and Hasanain, Maram and Mansour, Watheq and others},
Booktitle = {International Conference of the Cross-Language Evaluation Forum for European Languages},
Organization = {Springer},
Pages = {264--291},
Title = {Overview of the CLEF--2021 CheckThat! lab on detecting check-worthy claims, previously fact-checked claims, and fake news},
Year = {2021}}

P. Nakov and H. T. Sencar, “Jisun an, and haewoon kwak. 2021 c,” A survey on predicting the factuality and the bias of news media. arxiv/2103.12506, 2021.
[BibTeX]

@article{nakov2021jisun,
Author = {Nakov, Preslav and Sencar, Husrev Taha},
Journal = {A survey on predicting the factuality and the bias of news media. arXiv/2103.12506},
Title = {Jisun An, and Haewoon Kwak. 2021 c},
Year = {2021}}

A. Ali, S. Chowdhury, M. Afify, W. El–Hajj, H. Hajj, M. Abbas, A. Hussein, N. Ghneim, M. Abushariah, and A. Alqudah, “Connecting Arabs: bridging the gap in dialectal speech recognition,” Communications of the acm, vol. 64, iss. 4, p. 124–129, 2021.
[BibTeX]

@article{ali_connecting_2021,
title = {Connecting {Arabs}: bridging the gap in dialectal speech recognition},
volume = {64},
number = {4},
journal = {Communications of the ACM},
author = {Ali, Ahmed and Chowdhury, Shammur and Afify, Mohamed and El-Hajj, Wassim and Hajj, Hazem and Abbas, Mourad and Hussein, Amir and Ghneim, Nada and Abushariah, Mohammad and Alqudah, Assal},
year = {2021},
note = {Publisher: ACM New York, NY, USA},
pages = {124--129},
}

A. Chernyavskiy, D. Ilvovsky, P. Kalinin, and P. Nakov, “Batch-softmax contrastive loss for pairwise sentence scoring tasks,” Arxiv preprint arxiv:2110.15725, 2021.
[BibTeX]

@article{chernyavskiy2021batch,
Author = {Chernyavskiy, Anton and Ilvovsky, Dmitry and Kalinin, Pavel and Nakov, Preslav},
Journal = {arXiv preprint arXiv:2110.15725},
Title = {Batch-Softmax Contrastive Loss for Pairwise Sentence Scoring Tasks},
Year = {2021}}

S. A. Chowdhury, A. Hussein, A. Abdelali, and A. Ali, “Towards One Model to Rule All: Multilingual Strategy for Dialectal Code-Switching Arabic ASR,” in Proc. of the 22nd Annual Conference of the International Speech Communication Association (INTERSPEECH), 2021.
[BibTeX]

@inproceedings{chowdhury_towards_2021,
title = {Towards {One} {Model} to {Rule} {All}: {Multilingual} {Strategy} for {Dialectal} {Code}-{Switching} {Arabic} {ASR}},
booktitle = {{{{Proc. of the 22nd Annual Conference of the International Speech Communication Association (INTERSPEECH)}}}},
author = {Chowdhury, Shammur Absar and Hussein, Amir and Abdelali, Ahmed and Ali, Ahmed},
year = {2021}
}

A. Ali, S. Chowdhury, A. Hussein, and H. Yasser, “Arabic Code-Switching Speech Recognition using Monolingual Data,” in Proc. of the 22nd Annual Conference of the International Speech Communication Association (INTERSPEECH), 2021.
[BibTeX]

@inproceedings{ali_arabic_2021,
title = {Arabic {Code}-{Switching} {Speech} {Recognition} using {Monolingual} {Data}},
booktitle = {{{{Proc. of the 22nd Annual Conference of the International Speech Communication Association (INTERSPEECH)}}}},
author = {Ali, Ahmed and Chowdhury, Shammur and Hussein, Amir and Yasser, Hifny},
year = {2021},
}

H. Mubarak, A. Hussein, S. A. Chowdhury, and A. Ali, “QASR: QCRI Aljazeera Speech Resource. A Large Scale Annotated Arabic Speech Corpus,” in Proc. of the 59th Annual Meeting of the Association for Computational Linguistics (ACL), 2021.
[BibTeX]

@inproceedings{mubarak_qasr_2021,
title = {{QASR}: {QCRI} {Aljazeera} {Speech} {Resource}. {A} {Large} {Scale} {Annotated} {Arabic} {Speech} {Corpus}},
booktitle = {{Proc. of the 59th Annual Meeting of the Association for Computational Linguistics (ACL)}},
author = {Mubarak, Hamdy and Hussein, Amir and Chowdhury, Shammur Absar and Ali, Ahmed},
year = {2021},
}

F. Alam, T. Alam, M. Hasan, A. Hasnat, M. Imran, F. Ofli, and others, “Medic: a multi-task learning dataset for disaster image classification,” Arxiv preprint arxiv:2108.12828, 2021.
[BibTeX]

@article{alam2021medic,
title={MEDIC: a multi-task learning dataset for disaster image classification},
author={Alam, Firoj and Alam, Tanvirul and Hasan, Md and Hasnat, Abul and Imran, Muhammad and Ofli, Ferda and others},
journal={arXiv preprint arXiv:2108.12828},
year={2021}
}

I. Alsmadi, K. Ahmad, M. Nazzal, F. Alam, A. Al–Fuqaha, A. Khreishah, and A. Algosaibi, “Adversarial attacks and defenses for social network text processing applications: techniques, challenges and future research directions,” Arxiv preprint arxiv:2110.13980, 2021.
[BibTeX]

@article{alsmadi2021adversarial,
title={Adversarial attacks and defenses for social network text processing applications: Techniques, challenges and future research directions},
author={Alsmadi, Izzat and Ahmad, Kashif and Nazzal, Mahmoud and Alam, Firoj and Al-Fuqaha, Ala and Khreishah, Abdallah and Algosaibi, Abdulelah},
journal={arXiv preprint arXiv:2110.13980},
year={2021}
}

P. Nakov and J. Tiedemann, “11 machine translation between similar languages,” Similar languages, varieties, and dialects: a computational perspective, p. 219, 2021.
[BibTeX]

@article{nakov202111,
Author = {Nakov, Preslav and Tiedemann, J{\"o}rg},
Journal = {Similar Languages, Varieties, and Dialects: A Computational Perspective},
Pages = {219},
Publisher = {Cambridge University Press},
Title = {11 Machine Translation between Similar Languages},
Year = {2021}}

G. Da San Martino, S. Cresci, A. Barrón–Cedeño, S. Yu, R. Di Pietro, and P. Nakov, “A survey on computational propaganda detection,” in Proceedings of the twenty-ninth international conference on international joint conferences on artificial intelligence, 2021, p. 4826–4832.
[BibTeX]

@inproceedings{da2021survey,
Author = {Da San Martino, Giovanni and Cresci, Stefano and Barr{\'o}n-Cede{\~n}o, Alberto and Yu, Seunghak and Di Pietro, Roberto and Nakov, Preslav},
Booktitle = {Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence},
Pages = {4826--4832},
Title = {A survey on computational propaganda detection},
Year = {2021}}

F. Alam, S. Cresci, T. Chakraborty, F. Silvestri, D. Dimitrov, G. D. S. Martino, S. Shaar, H. Firooz, and P. Nakov, “A survey on multimodal disinformation detection,” Arxiv preprint arxiv:2103.12541, 2021.
[BibTeX]

@article{alam2021survey,
Author = {Alam, Firoj and Cresci, Stefano and Chakraborty, Tanmoy and Silvestri, Fabrizio and Dimitrov, Dimiter and Martino, Giovanni Da San and Shaar, Shaden and Firooz, Hamed and Nakov, Preslav},
Journal = {arXiv preprint arXiv:2103.12541},
Title = {A survey on multimodal disinformation detection},
Year = {2021}}

P. Nakov, H. T. Sencar, J. An, and H. Kwak, “A survey on predicting the factuality and the bias of news media,” Arxiv preprint arxiv:2103.12506, 2021.
[BibTeX]

@article{nakov2021survey,
Author = {Nakov, Preslav and Sencar, Husrev Taha and An, Jisun and Kwak, Haewoon},
Journal = {arXiv preprint arXiv:2103.12506},
Title = {A survey on predicting the factuality and the bias of news media},
Year = {2021}}

M. Hardalov, A. Arora, P. Nakov, and I. Augenstein, “A survey on stance detection for mis-and disinformation identification,” Arxiv preprint arxiv:2103.00242, 2021.
[BibTeX]

@article{hardalov2021survey,
Author = {Hardalov, Momchil and Arora, Arnav and Nakov, Preslav and Augenstein, Isabelle},
Journal = {arXiv preprint arXiv:2103.00242},
Title = {A survey on stance detection for mis-and disinformation identification},
Year = {2021}}

T. Alhindi, A. Alabdulkarim, A. Alshehri, M. Abdul–Mageed, and P. Nakov, “Arastance: a multi-country and multi-domain dataset of arabic stance detection for fact checking,” in Proceedings of the fourth workshop on nlp for internet freedom: censorship, disinformation, and propaganda, 2021, p. 57–65.
[BibTeX]

@inproceedings{alhindi2021arastance,
Author = {Alhindi, Tariq and Alabdulkarim, Amal and Alshehri, Ali and Abdul-Mageed, Muhammad and Nakov, Preslav},
Booktitle = {Proceedings of the Fourth Workshop on NLP for Internet Freedom: Censorship, Disinformation, and Propaganda},
Pages = {57--65},
Title = {AraStance: A Multi-Country and Multi-Domain Dataset of Arabic Stance Detection for Fact Checking},
Year = {2021}}

P. Nakov, D. Corney, M. Hasanain, F. Alam, T. Elsayed, A. B. -, P. Papotti, S. Shaar, and G. Da San Martino, “Automated fact-checking for assisting human fact-checkers,” in Proceedings of the 30th international joint conference on artificial intelligence, 2021, p. 4551–4558.
[BibTeX]

@inproceedings{survey:2021:ai:fact-checkers,
Author = {Preslav Nakov and David Corney and Maram Hasanain and Firoj Alam and Tamer Elsayed and Alberto Barr{\'{o}}n{-}Cede{\~{n}}o and Paolo Papotti and Shaden Shaar and Giovanni {Da San Martino}},
Booktitle = {Proceedings of the 30th International Joint Conference on Artificial Intelligence},
Pages = {4551--4558},
Series = {IJCAI~'21},
Title = {Automated Fact-Checking for Assisting Human Fact-Checkers},
Year = {2021}}

B. Skuczyńska, S. Shaar, J. Spenader, and P. Nakov, “Beasku at checkthat! 2021: fine-tuning sentence bert with triplet loss and limited data,” , 2021.
[BibTeX]

@article{skuczynska2021beasku,
Author = {Skuczy{\'n}ska, Beata and Shaar, Shaden and Spenader, Jennifer and Nakov, Preslav},
Title = {BeaSku at CheckThat! 2021: fine-tuning sentence BERT with triplet loss and limited data},
Year = {2021}}

S. Yu, G. Da San Martino, M. Mohtarami, J. Glass, and P. Nakov, “Interpretable propaganda detection in news articles,” in Proceedings of the international conference on recent advances in natural language processing (ranlp 2021), 2021, p. 1597–1605.
[BibTeX]

@inproceedings{yu2021interpretable,
Author = {Yu, Seunghak and Da San Martino, Giovanni and Mohtarami, Mitra and Glass, James and Nakov, Preslav},
Booktitle = {Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)},
Pages = {1597--1605},
Title = {Interpretable Propaganda Detection in News Articles},
Year = {2021}}

P. Ganesh, Y. Chen, X. Lou, M. A. Khan, Y. Yang, H. Sajjad, P. Nakov, D. Chen, and M. Winslett, “Compressing large-scale transformer-based models: a case study on bert,” Transactions of the association for computational linguistics, vol. 9, p. 1061–1080, 2021.
[BibTeX]

@article{ganesh2021compressing,
Author = {Ganesh, Prakhar and Chen, Yao and Lou, Xin and Khan, Mohammad Ali and Yang, Yin and Sajjad, Hassan and Nakov, Preslav and Chen, Deming and Winslett, Marianne},
Journal = {Transactions of the Association for Computational Linguistics},
Pages = {1061--1080},
Title = {Compressing Large-Scale Transformer-Based Models: A Case Study on BERT},
Volume = {9},
Year = {2021}}

P. Nakov, V. Nayak, K. Dent, A. Bhatawdekar, S. M. Sarwar, M. Hardalov, Y. Dinkov, D. Zlatkova, G. Bouchard, and I. Augenstein, “Detecting abusive language on online platforms: a critical analysis,” Arxiv preprint arxiv:2103.00153, 2021.
[BibTeX]

@article{nakov2021detecting,
Author = {Nakov, Preslav and Nayak, Vibha and Dent, Kyle and Bhatawdekar, Ameya and Sarwar, Sheikh Muhammad and Hardalov, Momchil and Dinkov, Yoan and Zlatkova, Dimitrina and Bouchard, Guillaume and Augenstein, Isabelle},
Journal = {arXiv preprint arXiv:2103.00153},
Title = {Detecting abusive language on online platforms: A critical analysis},
Year = {2021}}

S. Pramanick, D. Dimitrov, R. Mukherjee, S. Sharma, M. S. Akhtar, P. Nakov, and T. Chakraborty, “Detecting harmful memes and their targets,” in Findings of the association for computational linguistics: acl-ijcnlp 2021, 2021, p. 2783–2796.
[BibTeX]

@inproceedings{pramanick2021detecting,
Author = {Pramanick, Shraman and Dimitrov, Dimitar and Mukherjee, Rituparna and Sharma, Shivam and Akhtar, Md Shad and Nakov, Preslav and Chakraborty, Tanmoy},
Booktitle = {Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021},
Pages = {2783--2796},
Title = {Detecting Harmful Memes and Their Targets},
Year = {2021}}

D. Dimitrov, B. B. Ali, S. Shaar, F. Alam, F. Silvestri, H. Firooz, P. Nakov, and G. Da San Martino, “Detecting propaganda techniques in memes,” in Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing (volume 1: long papers), 2021, p. 6603–6617.
[BibTeX]

@inproceedings{dimitrov2021detecting,
Author = {Dimitrov, Dimitar and Ali, Bishr Bin and Shaar, Shaden and Alam, Firoj and Silvestri, Fabrizio and Firooz, Hamed and Nakov, Preslav and Da San Martino, Giovanni},
Booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
Pages = {6603--6617},
Title = {Detecting Propaganda Techniques in Memes},
Year = {2021}}

D. Nguyen, M. Zampieri, P. Nakov, and others, “Dialect variation on social media,” , 2021.
[BibTeX]

@article{nguyen2021dialect,
Author = {Nguyen, Dong and Zampieri, Marcos and Nakov, Preslav and others},
Publisher = {Cambridge University Press},
Title = {Dialect Variation on Social Media},
Year = {2021}}

S. Mihaylova, I. Borisova, D. Chemishanov, P. Hadzhitsanev, M. Hardalov, and P. Nakov, “Dips at checkthat! 2021: verified claim retrieval..” 2021.
[BibTeX]

@inproceedings{mihaylova2021dips,
Author = {Mihaylova, Simona and Borisova, Iva and Chemishanov, Dzhovani and Hadzhitsanev, Preslav and Hardalov, Momchil and Nakov, Preslav},
Title = {DIPS at CheckThat! 2021: Verified Claim Retrieval.},
Year = {2021}}

V. Nastase, S. Szpakowicz, P. Nakov, and D. Ó. Séagdha, “Extracting semantic relations with little or no supervision,” in Semantic relations between nominals, second edition, Springer, 2021, p. 75–106.
[BibTeX]

@incollection{nastase2021extracting,
Author = {Nastase, Vivi and Szpakowicz, Stan and Nakov, Preslav and S{\'e}agdha, Diarmuid {\'O}},
Booktitle = {Semantic Relations Between Nominals, Second Edition},
Pages = {75--106},
Publisher = {Springer},
Title = {Extracting Semantic Relations with Little or No Supervision},
Year = {2021}}

P. Nakov and G. Da San Martino, “Fake news, disinformation, propaganda, and media bias,” in Proceedings of the 30th acm international conference on information & knowledge management, 2021, p. 4862–4865.
[BibTeX]

@inproceedings{nakov2021fake,
Author = {Nakov, Preslav and Da San Martino, Giovanni},
Booktitle = {Proceedings of the 30th ACM International Conference on Information \& Knowledge Management},
Pages = {4862--4865},
Title = {Fake News, Disinformation, Propaganda, and Media Bias},
Year = {2021}}

P. Nakov and G. Da San Martino, “Fake news, disinformation, propaganda, media bias, and flattening the curve of the covid-19 infodemic,” in Proceedings of the 27th acm sigkdd conference on knowledge discovery & data mining, 2021, p. 4054–4055.
[BibTeX]

@inproceedings{nakov2021fake2,
Author = {Nakov, Preslav and Da San Martino, Giovanni},
Booktitle = {Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery \& Data Mining},
Pages = {4054--4055},
Title = {Fake News, Disinformation, Propaganda, Media Bias, and Flattening the Curve of the COVID-19 Infodemic},
Year = {2021}}

F. Alam, F. Dalvi, S. Shaar, N. Durrani, H. Mubarak, A. Nikolov, G. Da San Martino, A. Abdelali, H. Sajjad, K. Darwish, and others, “Fighting the covid-19 infodemic in social media: a holistic perspective and a call to arms.,” in Icwsm, 2021, p. 913–922.
[BibTeX]

@inproceedings{alam2021fightingicwsm,
Author = {Alam, Firoj and Dalvi, Fahim and Shaar, Shaden and Durrani, Nadir and Mubarak, Hamdy and Nikolov, Alex and Da San Martino, Giovanni and Abdelali, Ahmed and Sajjad, Hassan and Darwish, Kareem and others},
Booktitle = {ICWSM},
Pages = {913--922},
Title = {Fighting the COVID-19 Infodemic in Social Media: A Holistic Perspective and a Call to Arms.},
Year = {2021}}

F. Alam, S. Shaar, F. Dalvi, H. Sajjad, A. Nikolov, H. Mubarak, G. Da San Martino, A. Abdelali, N. Durrani, K. Darwish, and others, “Fighting the covid-19 infodemic: modeling the perspective of journalists, fact-checkers, social media platforms, policy makers, and the society,” in Findings of the association for computational linguistics: emnlp 2021, 2021, p. 611–649.
[BibTeX]

@inproceedings{alam2021fighting,
Author = {Alam, Firoj and Shaar, Shaden and Dalvi, Fahim and Sajjad, Hassan and Nikolov, Alex and Mubarak, Hamdy and Da San Martino, Giovanni and Abdelali, Ahmed and Durrani, Nadir and Darwish, Kareem and others},
Booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2021},
Pages = {611--649},
Title = {Fighting the COVID-19 Infodemic: Modeling the Perspective of Journalists, Fact-Checkers, Social Media Platforms, Policy Makers, and the Society},
Year = {2021}}

S. Shaar, F. Alam, G. Da San Martino, A. Nikolov, W. Zaghouani, P. Nakov, and A. Feldman, “Findings of the NLP4IF-2021 shared tasks on fighting the covid-19 infodemic and censorship detection,” in Proceedings of the fourth workshop on nlp for internet freedom: censorship, disinformation, and propaganda, 2021, p. 82–92.
[BibTeX]

@inproceedings{shaar2021findings,
Author = {Shaar, Shaden and Alam, Firoj and Da San Martino, Giovanni and Nikolov, Alex and Zaghouani, Wajdi and Nakov, Preslav and Feldman, Anna},
Booktitle = {Proceedings of the Fourth Workshop on NLP for Internet Freedom: Censorship, Disinformation, and Propaganda},
Pages = {82--92},
Title = {Findings of the {NLP4IF-2021} Shared Tasks on Fighting the COVID-19 Infodemic and Censorship Detection},
Year = {2021}}

K. Vachev, M. Hardalov, G. Karadzhov, G. Georgiev, I. Koychev, and P. Nakov, “Generating answer candidates for quizzes and answer-aware question generators,” in Proceedings of the student research workshop associated with RANLP, 2021, p. 203–209.
[BibTeX]

@inproceedings{vachev2021generating,
Author = {Vachev, Kristiyan and Hardalov, Momchil and Karadzhov, Georgi and Georgiev, Georgi and Koychev, Ivan and Nakov, Preslav},
Booktitle = {Proceedings of the Student Research Workshop Associated with {RANLP}},
Pages = {203--209},
Title = {Generating Answer Candidates for Quizzes and Answer-Aware Question Generators},
Year = {2021}}

A. Rashed, M. Kutlu, K. Darwish, T. Elsayed, and C. Bayrak, “Embeddings-based clustering for target specific stances: the case of a polarized turkey,” 15th international aaai conference on web and social media (icwsm-2021, 2021.
[BibTeX]

@article{rashed2020embeddings,
title={Embeddings-Based Clustering for Target Specific Stances: The Case of a Polarized Turkey},
author={Rashed, Ammar and Kutlu, Mucahid and Darwish, Kareem and Elsayed, Tamer and Bayrak, Cans{\i}n},
journal={15th International AAAI Conference on Web and Social Media (ICWSM-2021},
year={2021}
}

2020

M. Zampieri, P. Nakov, S. Rosenthal, P. Atanasova, G. Karadzhov, H. Mubarak, L. Derczynski, Z. Pitenis, and Ça{\u. g}r{i} Ç öltekin, “SemEval-2020 task 12: multilingual offensive language identification in social media,” in Proceedings of the international workshop on semantic evaluation, Barcelona, Spain, 2020.
[BibTeX]

@inproceedings{zampieri-etal-2020-semeval,
title = {{SemEval-2020} Task 12: Multilingual Offensive Language Identification in Social Media},
author = {Zampieri, Marcos and Nakov, Preslav and Rosenthal, Sara and Atanasova, Pepa and Karadzhov, Georgi and Mubarak, Hamdy and Derczynski, Leon and Pitenis, Zeses and {\c C} {\" o}ltekin, {\c C}a{\u g}r{\i}},
booktitle = {Proceedings of the International Workshop on Semantic Evaluation},
series = {SemEval~'20},
address = {Barcelona, Spain},
year = {2020},
month = {December},
}

S. Hassan, Y. Samih, H. Mubarak, and A. Abdelali, “ALT at SemEval-2020 task 12: Arabic and English offensive language identification in social media,” in Proceedings of the fourteenth workshop on semantic evaluation, Barcelona (online), 2020, p. 1891–1897.
[BibTeX] [Abstract] [Download PDF]

This paper describes the systems submitted by the Arabic Language Technology group (ALT) at SemEval-2020 Task 12: Multilingual Offensive Language Identification in Social Media. We focus on sub-task A (Offensive Language Identification) for two languages: Arabic and English. Our efforts for both languages achieved more than 90{\%} macro-averaged F1-score on the official test set. For Arabic, the best results were obtained by a system combination of Support Vector Machine, Deep Neural Network, and fine-tuned Bidirectional Encoder Representations from Transformers (BERT). For English, the best results were obtained by fine-tuning BERT.

@inproceedings{hassan-etal-2020-alt-semeval,
title = "{ALT} at {S}em{E}val-2020 Task 12: {A}rabic and {E}nglish Offensive Language Identification in Social Media",
author = "Hassan, Sabit and
Samih, Younes and
Mubarak, Hamdy and
Abdelali, Ahmed",
booktitle = "Proceedings of the Fourteenth Workshop on Semantic Evaluation",
month = "dec",
year = "2020",
address = "Barcelona (online)",
publisher = "International Committee for Computational Linguistics",
url = "https://www.aclweb.org/anthology/2020.semeval-1.249",
pages = "1891--1897",
abstract = "This paper describes the systems submitted by the Arabic Language Technology group (ALT) at SemEval-2020 Task 12: Multilingual Offensive Language Identification in Social Media. We focus on sub-task A (Offensive Language Identification) for two languages: Arabic and English. Our efforts for both languages achieved more than 90{\%} macro-averaged F1-score on the official test set. For Arabic, the best results were obtained by a system combination of Support Vector Machine, Deep Neural Network, and fine-tuned Bidirectional Encoder Representations from Transformers (BERT). For English, the best results were obtained by fine-tuning BERT.",
}

G. Da San Martino, A. Barrón–Cedeño, H. Wachsmuth, R. Petrov, and P. Nakov, “SemEval-2020 task 11: detection of propaganda techniques in news articles,” in Proceedings of the international workshop on semantic evaluation, Barcelona, Spain, 2020.
[BibTeX]

@InProceedings{DaSanMartinoSemeval20task11,
author = {Da San Martino, Giovanni and
Barr\'{o}n-Cede\~no, Alberto and
Wachsmuth, Henning and
Petrov, Rostislav and
Nakov, Preslav},
title = {{SemEval}-2020 Task 11: Detection of Propaganda Techniques in News Articles},
booktitle = {Proceedings of the International Workshop on Semantic Evaluation},
series = {SemEval~'20},
year = {2020},
address = {Barcelona, Spain},
month = {December},
}

T. Alam, A. Khan, and F. Alam, “Punctuation restoration using transformer models for high-and low-resource languages,” in Proceedings of the sixth workshop on noisy user-generated text (w-nut 2020), Online, 2020, p. 132–142. doi:10.18653/v1/2020.wnut-1.18
[BibTeX] [Download PDF]

@inproceedings{alam-etal-2020-punctuation,
title = "Punctuation Restoration using Transformer Models for High-and Low-Resource Languages",
author = "Alam, Tanvirul and
Khan, Akib and
Alam, Firoj",
booktitle = "Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)",
month = "nov",
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/2020.wnut-1.18",
doi = "10.18653/v1/2020.wnut-1.18",
pages = "132--142",
}

J. M. Wu, Y. Belinkov, H. Sajjad, N. Durrani, F. Dalvi, and J. Glass, “Similarity Analysis of Contextual Word Representation Models,” in Proceedings of the annual conference of the association for computational linguistics (acl), 2020.
[BibTeX]

@InProceedings{wu_similarity:acl20,
title={{Similarity Analysis of Contextual Word Representation Models}},
author={John M. Wu and Yonatan Belinkov and Hassan Sajjad and Nadir Durrani and Fahim Dalvi and James Glass},
booktitle={Proceedings of the Annual Conference of the Association for Computational Linguistics (ACL)},
Month ={July},
year={2020},
}

P. Stefanov, K. Darwish, A. Atanasov, and P. Nakov, “Predicting the topical stance and political leaning of media using tweets,” in Proceedings of the 58th annual meeting of the association for computational linguistics, Online, 2020, p. 527–537.
[BibTeX] [Download PDF]

@inproceedings{stefanov-etal-2020-predicting,
title = "Predicting the Topical Stance and Political Leaning of Media using Tweets",
author = "Stefanov, Peter and
Darwish, Kareem and
Atanasov, Atanas and
Nakov, Preslav",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
series = {ACL~'20},
month={July},
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/2020.acl-main.50",
pages = "527--537",
}

R. Baly, G. Karadzhov, J. An, H. Kwak, Y. Dinkov, A. Ali, J. Glass, and P. Nakov, “What was written vs. who read it: news media profiling using text analysis and social media context,” in Proceedings of the 58th annual meeting of the association for computational linguistics, Online, 2020, p. 3364–3374.
[BibTeX] [Download PDF]

@inproceedings{baly-etal-2020-written,
title = "What Was Written vs. Who Read It: News Media Profiling Using Text Analysis and Social Media Context",
author = "Baly, Ramy and
Karadzhov, Georgi and
An, Jisun and
Kwak, Haewoon and
Dinkov, Yoan and
Ali, Ahmed and
Glass, James and
Nakov, Preslav",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
series = {ACL~'20},
month={July},
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/2020.acl-main.308",
pages = "3364--3374",
}

S. Shaar, N. Babulkov, G. Da San Martino, and P. Nakov, “That is a known lie: detecting previously fact-checked claims,” in Proceedings of the 58th annual meeting of the association for computational linguistics, Online, 2020, p. 3607–3618.
[BibTeX] [Download PDF]

@inproceedings{shaar-etal-2020-known,
title = "That is a Known Lie: Detecting Previously Fact-Checked Claims",
author = "Shaar, Shaden and
Babulkov, Nikolay and
Da San Martino, Giovanni and
Nakov, Preslav",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
series = {ACL~'20},
month={July},
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/2020.acl-main.332",
pages = "3607--3618",
}

G. Da San Martino, S. Shaar, Y. Zhang, S. Yu, A. Barrón–Cedeño, and P. Nakov, “Prta: a system to support the analysis of propaganda techniques in the news,” in Proceedings of the 58th annual meeting of the association for computational linguistics: system demonstrations, Online, 2020, p. 287–293.
[BibTeX] [Download PDF]

@inproceedings{da-san-martino-etal-2020-prta,
title = "{P}rta: A System to Support the Analysis of Propaganda Techniques in the News",
author = "Da San Martino, Giovanni and
Shaar, Shaden and
Zhang, Yifan and
Yu, Seunghak and
Barr{\'o}n-Cede{\~n}o, Alberto and
Nakov, Preslav",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
series = {ACL~'20},
month={July},
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/2020.acl-demos.32",
pages = "287--293",
}

H. Mubarak, S. Hassan, and A. Abdelali, “Constructing a bilingual corpus of parallel tweets,” in Proceedings of the 13th workshop on building and using comparable corpora, Marseille, France, 2020, p. 14–21.
[BibTeX] [Abstract] [Download PDF]

In a bid to reach a larger and more diverse audience, Twitter users often post parallel tweets{–-}tweets that contain the same content but are written in different languages. Parallel tweets can be an important resource for developing machine translation (MT) systems among other natural language processing (NLP) tasks. In this paper, we introduce a generic method for collecting parallel tweets. Using this method, we collect a bilingual corpus of English-Arabic parallel tweets and a list of Twitter accounts who post English-Arabictweets regularly. Since our method is generic, it can also be used for collecting parallel tweets that cover less-resourced languages such as Serbian and Urdu. Additionally, we annotate a subset of Twitter accounts with their countries of origin and topic of interest, which provides insights about the population who post parallel tweets. This latter information can also be useful for author profiling tasks.

@inproceedings{mubarak-etal-2020-constructing,
title = "Constructing a Bilingual Corpus of Parallel Tweets",
author = "Mubarak, Hamdy and
Hassan, Sabit and
Abdelali, Ahmed",
booktitle = "Proceedings of the 13th Workshop on Building and Using Comparable Corpora",
month = {may},
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://www.aclweb.org/anthology/2020.bucc-1.3",
pages = "14--21",
abstract = "In a bid to reach a larger and more diverse audience, Twitter users often post parallel tweets{---}tweets that contain the same content but are written in different languages. Parallel tweets can be an important resource for developing machine translation (MT) systems among other natural language processing (NLP) tasks. In this paper, we introduce a generic method for collecting parallel tweets. Using this method, we collect a bilingual corpus of English-Arabic parallel tweets and a list of Twitter accounts who post English-Arabictweets regularly. Since our method is generic, it can also be used for collecting parallel tweets that cover less-resourced languages such as Serbian and Urdu. Additionally, we annotate a subset of Twitter accounts with their countries of origin and topic of interest, which provides insights about the population who post parallel tweets. This latter information can also be useful for author profiling tasks.",
language = "English",
ISBN = "979-10-95546-42-9",
}

A. B. -, T. Elsayed, P. Nakov, G. D. S. Martino, M. Hasanain, R. Suwaileh, and F. Haouari, “Checkthat! at CLEF 2020: enabling the automatic identification and verification of claims in social media,” in Proceedings of the 42nd european conference on information retrieval, Lisbon, Portugal, 2020, p. 499–507.
[BibTeX]

@InProceedings{CheckThat:ECIR2020,
author = {Alberto Barr{\'{o}}n{-}Cede{\~{n}}o and
Tamer Elsayed and
Preslav Nakov and
Giovanni Da San Martino and
Maram Hasanain and
Reem Suwaileh and
Fatima Haouari},
title = {CheckThat! at {CLEF} 2020: Enabling the Automatic Identification and Verification of Claims in Social Media},
booktitle = {Proceedings of the 42nd European Conference on Information Retrieval},
series = {ECIR~'19},
pages = {499--507},
address = {Lisbon, Portugal},
month = {April},
year = {2020},
}

S. Shaar, A. Nikolov, N. Babulkov, F. Alam, A. Barrón–Cedeño, T. Elsayed, M. Hasanain, R. Suwaileh, F. Haouari, G. {Da San Martino}, and P. Nakov, “Overview of CheckThat! 2020 English: automatic identification and verification of claims in social media,” in Working notes of clef 2020–-conference and labs of the evaluation forum, Thessaloniki, Greece, 2020.
[BibTeX]

@InProceedings{clef-checkthat-en:2020,
author = "Shaar, Shaden and
Nikolov, Alex and
Babulkov, Nikolay and
Alam, Firoj and
Barr\'{o}n-Cede{\~n}o, Alberto and
Elsayed, Tamer and
Hasanain, Maram and
Suwaileh, Reem and
Haouari, Fatima and
{Da San Martino}, Giovanni and
Nakov, Preslav",
title = "Overview of {CheckThat!} 2020 {E}nglish: Automatic Identification and Verification of Claims in Social Media",
booktitle = "Working Notes of CLEF 2020---Conference and Labs of the Evaluation Forum",
series = {CLEF~'2020},
address = {Thessaloniki, Greece},
year = {2020}
}

H. Mubarak, S. Amer, A. Abdelali, and K. Darwish, “Arabic curriculum analysis,” in Proceedings of the 28th international conference on computational linguistics: system demonstrations, 2020, p. 80–86.
[BibTeX]

@inproceedings{mubarak2020arabic,
title={Arabic Curriculum Analysis},
author={Mubarak, Hamdy and Amer, Shimaa and Abdelali, Ahmed and Darwish, Kareem},
booktitle={Proceedings of the 28th International Conference on Computational Linguistics: System Demonstrations},
pages={80--86},
year={2020}
}

G. Da San Martino, S. Cresci, A. Barrón–Cedeño, S. Yu, R. Di Pietro, and P. Nakov, “A survey on computational propaganda detection,” in Proceedings of the 29th international joint conference on artificial intelligence and the 17th pacific rim international conference on artificial intelligence, Yokohama, Japan, 2020.
[BibTeX]

@InProceedings{IJCAI2020:propaganda:survey,
author = {Da San Martino, Giovanni and Stefano Cresci and Barr\'{o}n-Cede\~no, Alberto and Seunghak Yu and Di Pietro, Roberto and Preslav Nakov},
title = {A Survey on Computational Propaganda Detection},
booktitle = {Proceedings of the 29th International Joint Conference on Artificial Intelligence and the 17th Pacific Rim International Conference on Artificial Intelligence},
series = {IJCAI-PRICAI~'20},
year = {2020},
address = {Yokohama, Japan},
}

A. Barrón–Cedeño, T. Elsayed, P. Nakov, G. {Da San Martino}, M. Hasanain, R. Suwaileh, F. Haouari, N. Babulkov, B. Hamdan, A. Nikolov, S. Shaar, and {. S. Ali, “Overview of CheckThat! 2020 –- automatic identification and verification of claims in social media,” in Proceedings of the 11th international conference of the clef association: experimental ir meets multilinguality, multimodality, and interaction, Thessaloniki, Greece, 2020.
[BibTeX]

@InProceedings{clef-checkthat:2020,
author = "Barr\'{o}n-Cede{\~n}o, Alberto and
Elsayed, Tamer and
Nakov, Preslav and
{Da San Martino}, Giovanni and
Hasanain, Maram and
Suwaileh, Reem and
Haouari, Fatima and
Babulkov, Nikolay and
Hamdan, Bayan and
Nikolov, Alex and
Shaar, Shaden and
Ali, {Zien Sheikh}",
title = "{Overview of CheckThat! 2020} --- Automatic Identification and
Verification of Claims in Social Media",
year = {2020},
booktitle = "Proceedings of the 11th International Conference of the CLEF Association: Experimental IR Meets Multilinguality, Multimodality, and Interaction",
series = {CLEF~'2020},
address = {Thessaloniki, Greece},
nopages="--",
}

K. Darwish, M. Aupetit, P. Stefanov, and P. Nakov, “Unsupervised user stance detection on twitter,” in Proceedings of the international aaai conference on web and social media, Atlanta, GA, USA, 2020, p. 141–152.
[BibTeX]

@InProceedings{ICWSM2020:Unsupervised:Stance:Twitter,
author = "Kareem Darwish and Michael Aupetit and Peter Stefanov and Preslav Nakov",
title = "Unsupervised User Stance Detection on Twitter",
booktitle = "Proceedings of the International AAAI Conference on Web and Social Media",
series = {ICWSM~'20},
year = "2020",
pages = {141--152},
address = "Atlanta, GA, USA",
}

H. Mubarak, A. Abdelali, S. Hassan, and K. Darwish, “Spam detection on arabic twitter,” in International conference on social informatics, 2020, p. 237–251.
[BibTeX]

@inproceedings{mubarak2020spam,
title={Spam Detection on Arabic Twitter},
author={Mubarak, Hamdy and Abdelali, Ahmed and Hassan, Sabit and Darwish, Kareem},
booktitle={International Conference on Social Informatics},
pages={237--251},
year={2020},
organization={Springer}
}

M. Zampieri, P. Nakov, S. Rosenthal, P. Atanasova, G. Karadzhov, H. Mubarak, L. Derczynski, Z. Pitenis, and Ça{\u{g}}r{\i}. Çöltekin, “Semeval-2020 task 12: multilingual offensive language identification in social media (offenseval 2020),” Arxiv preprint arxiv:2006.07235, 2020.
[BibTeX]

@article{zampieri2020semeval,
title={SemEval-2020 task 12: Multilingual offensive language identification in social media (OffensEval 2020)},
author={Zampieri, Marcos and Nakov, Preslav and Rosenthal, Sara and Atanasova, Pepa and Karadzhov, Georgi and Mubarak, Hamdy and Derczynski, Leon and Pitenis, Zeses and {\c{C}}{\"o}ltekin, {\c{C}}a{\u{g}}r{\i}},
journal={arXiv preprint arXiv:2006.07235},
year={2020}
}

S. Shon, A. Ali, Y. Samih, H. Mubarak, and J. Glass, “Adi17: a fine-grained arabic dialect identification dataset,” in Icassp 2020-2020 ieee international conference on acoustics, speech and signal processing (icassp), 2020, p. 8244–8248.
[BibTeX]

@inproceedings{shon2020adi17,
title={ADI17: A Fine-Grained Arabic Dialect Identification Dataset},
author={Shon, Suwon and Ali, Ahmed and Samih, Younes and Mubarak, Hamdy and Glass, James},
booktitle={ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages={8244--8248},
year={2020},
organization={IEEE}
}

S. Hassan, Y. Samih, H. Mubarak, A. Abdelali, A. Rashed, and S. A. Chowdhury, “Alt submission for osact shared task on offensive language detection,” in Proceedings of the 4th workshop on open-source arabic corpora and processing tools, with a shared task on offensive language detection, 2020, p. 61–65.
[BibTeX]

@inproceedings{hassan2020alt,
title={ALT submission for OSACT shared task on offensive language detection},
author={Hassan, Sabit and Samih, Younes and Mubarak, Hamdy and Abdelali, Ahmed and Rashed, Ammar and Chowdhury, Shammur Absar},
booktitle={Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection},
pages={61--65},
year={2020}
}

M. Hasanain, F. Haouari, R. Suwaileh, {. S. Ali, B. Hamdan, T. Elsayed, A. Barrón–Cedeño, G. {Da San Martino}, and P. Nakov, “Overview of CheckThat! 2020 Arabic: automatic identification and verification of claims in social media,” in Working notes of clef 2020–-conference and labs of the evaluation forum, Thessaloniki, Greece, 2020.
[BibTeX]

@InProceedings{clef-checkthat-ar:2020,
author = "Hasanain, Maram and
Haouari, Fatima and
Suwaileh, Reem and
Ali, {Zien Sheikh} and
Hamdan, Bayan and
Elsayed, Tamer and
Barr\'{o}n-Cede{\~n}o, Alberto and
{Da San Martino}, Giovanni and
Nakov, Preslav",
title = "Overview of {CheckThat!} 2020 {A}rabic: Automatic Identification and Verification of Claims in Social Media",
booktitle = "Working Notes of CLEF 2020---Conference and Labs of the Evaluation Forum",
series = {CLEF~'2020},
address = {Thessaloniki, Greece},
year = {2020}
}

F. Alam, S. Shaar, F. Dalvi, H. Sajjad, A. Nikolov, H. Mubarak, G. D. S. Martino, A. Abdelali, N. Durrani, K. Darwish, and P. Nakov, “Fighting the COVID-19 infodemic: modeling the perspective of journalists, fact-checkers, social media platforms, policy makers, and the society,” Arxiv preprint 2005.00033, 2020.
[BibTeX]

@article{alam2020fighting,
title={Fighting the {COVID}-19 Infodemic: Modeling the Perspective of Journalists, Fact-Checkers, Social Media Platforms, Policy Makers, and the Society},
author={Firoj Alam and Shaden Shaar and Fahim Dalvi and Hassan Sajjad and Alex Nikolov and Hamdy Mubarak and Giovanni Da San Martino and Ahmed Abdelali and Nadir Durrani and Kareem Darwish and Preslav Nakov},
year={2020},
journal={ArXiv preprint 2005.00033},
archivePrefix={arXiv},
primaryClass={cs.CL}
}

V. Nguyen, K. Sugiyama, P. Nakov, and M. Kan, “FANG: leveraging social context for fake news detection using graph representation,” in Proceedings of the 29th acm international conference on information and knowledge management, 2020.
[BibTeX]

@InProceedings{CIKM2020:FANG,
author = "Van-Hoang Nguyen and Kazunari Sugiyama and Preslav Nakov and Min-Yen Kan",
title = "{FANG}: Leveraging Social Context for Fake News Detection Using Graph Representation",
booktitle = "Proceedings of the 29th ACM International Conference on Information and Knowledge Management",
series = {CIKM~'20},
year = "2020",
}

F. Alam, S. Shaar, A. Nikolov, H. Mubarak, G. D. S. Martino, A. Abdelali, F. Dalvi, N. Durrani, H. Sajjad, K. Darwish, and others, “Fighting the covid-19 infodemic: modeling the perspective of journalists, fact-checkers, social media platforms, policy makers, and the society,” Arxiv preprint arxiv:2005.00033, 2020.
[BibTeX]

@article{alam2020fighting,
title={Fighting the COVID-19 Infodemic: Modeling the Perspective of Journalists, Fact-Checkers, Social Media Platforms, Policy Makers, and the Society},
author={Alam, Firoj and Shaar, Shaden and Nikolov, Alex and Mubarak, Hamdy and Martino, Giovanni Da San and Abdelali, Ahmed and Dalvi, Fahim and Durrani, Nadir and Sajjad, Hassan and Darwish, Kareem and others},
journal={arXiv preprint arXiv:2005.00033},
year={2020}
}

F. Alam, F. Ofli, M. Imran, T. Alam, and U. Qazi, “Deep learning benchmarks and datasets for social media image classification for disaster response,” in Asonam, 2020, pp. 151-158. doi:10.1109/ASONAM49781.2020.9381294
[BibTeX]

@INPROCEEDINGS{Alam9381294,
author={Firoj Alam and Ferda Ofli and Muhammad Imran and Tanviril Alam and Umair Qazi},
booktitle={ASONAM},
title={Deep Learning Benchmarks and Datasets for Social Media Image Classification for Disaster Response},
year={2020},
pages={151-158},
doi={10.1109/ASONAM49781.2020.9381294}
}

S. A. Chowdhury, A. Ali, S. Shon, and J. Glass, “What does an end-to-end dialect identification model learn about non-dialectal information?,” in Proceedings of the 21st annual conference of the international speech communication association (interspeech’20), Shanghai, China, 2020.
[BibTeX]

@inproceedings{INTERSPEECH2020:ADIAnalysis,
title={What does an End-to-End Dialect Identification Model Learn about Non-dialectal Information?},
author={Shammur Absar Chowdhury and Ahmed Ali and Suwon Shon and James Glass},
booktitle={Proceedings of the 21st Annual Conference of the International Speech Communication Association (INTERSPEECH'20)},
series = {INTERSPEECH~'20},
address = {Shanghai, China},
year={2020},
}

S. A. Chowdhury, Y. Samih, M. Eldesouki, and A. Ali, “Effects of dialectal code-switching on speech modules: a study using egyptian arabic broadcast speech,” in Proceedings of the 21st annual conference of the international speech communication association (interspeech’20), Shanghai, China, 2020.
[BibTeX]

@inproceedings{INTERSPEECH2020:DACS,
title={Effects of Dialectal Code-Switching on Speech Modules: A Study using Egyptian Arabic Broadcast Speech},
author={Shammur Absar Chowdhury and Younes Samih and Mohamed Eldesouki and Ahmed Ali},
booktitle={Proceedings of the 21st Annual Conference of the International Speech Communication Association (INTERSPEECH'20)},
series = {INTERSPEECH~'20},
address = {Shanghai, China},
year={2020},
}

H. Mubarak, A. Rashed, K. Darwish, Y. Samih, and A. Abdelali, Arabic offensive language on twitter: analysis and experiments, 2020.
[BibTeX]

@misc{mubarak2020arabic,
title={Arabic Offensive Language on Twitter: Analysis and Experiments},
author={Hamdy Mubarak and Ammar Rashed and Kareem Darwish and Younes Samih and Ahmed Abdelali},
year={2020},
eprint={2004.02192},
archivePrefix={arXiv},
primaryClass={cs.CL}
}

K. Darwish, A. Abdelali, H. Mubarak, and M. Eldesouki, “Arabic diacritic recovery using a feature-rich bilstm model,” Arxiv preprint arxiv:2002.01207, 2020.
[BibTeX]

@article{darwish2020arabic,
title={Arabic Diacritic Recovery Using a Feature-Rich biLSTM Model},
author={Darwish, Kareem and Abdelali, Ahmed and Mubarak, Hamdy and Eldesouki, Mohamed},
journal={arXiv preprint arXiv:2002.01207},
year={2020}
}

Y. Samih and K. Darwish, “A few topical tweets are enough for effective user-level stance detection,” Arxiv preprint arxiv:2004.03485, 2020.
[BibTeX]

@article{samih2020few,
title={A Few Topical Tweets are Enough for Effective User-Level Stance Detection},
author={Samih, Younes and Darwish, Kareem},
journal={arXiv preprint arXiv:2004.03485},
year={2020}
}

K. Darwish, M. Attia, H. Mubarak, Y. Samih, A. Abdelali, L. Màrquez, M. Eldesouki, and L. Kallmeyer, “Effective multi dialectal arabic pos tagging,” Natural language engineering, vol. 1, iss. 1, p. 18, 2020.
[BibTeX]

@article{darwish2020effective,
title={Effective Multi Dialectal Arabic POS Tagging},
author={Darwish, Kareem and Attia, Mohammed and Mubarak, Hamdy and Samih, Younes and Abdelali, Ahmed and M{\`a}rquez, Llu{\'\i}s and Eldesouki, Mohamed and Kallmeyer, Laura},
journal={Natural Language Engineering},
volume={1},
number={1},
pages={18},
year={2020},
publisher={Cambridge University Press}
}

A. Abdelali, H. Mubarak, Y. Samih, S. Hassan, and K. Darwish, “Arabic dialect identification in the wild,” Arxiv preprint arxiv:2005.06557, 2020.
[BibTeX]

@article{abdelali2020arabic,
title={Arabic Dialect Identification in the Wild},
author={Abdelali, Ahmed and Mubarak, Hamdy and Samih, Younes and Hassan, Sabit and Darwish, Kareem},
journal={arXiv preprint arXiv:2005.06557},
year={2020}
}

S. A. Chowdhury, A. Abdelali, K. Darwish, J. Soon–Gyo, J. Salminen, and B. J. Jansen, “Improving arabic text categorization using transformer training diversification,” in Proceedings of the fifth arabic natural language processing workshop, 2020, p. 226–236.
[BibTeX]

@inproceedings{chowdhury2020improving,
title={Improving Arabic text categorization using transformer training diversification},
author={Chowdhury, Shammur Absar and Abdelali, Ahmed and Darwish, Kareem and Soon-Gyo, Jung and Salminen, Joni and Jansen, Bernard J},
booktitle={Proceedings of the Fifth Arabic Natural Language Processing Workshop},
pages={226--236},
year={2020}
}

H. Mubarak, K. Darwish, W. Magdy, T. Elsayed, and H. Al–Khalifa, “Overview of osact4 arabic offensive language detection shared task,” in Proceedings of the 4th workshop on open-source arabic corpora and processing tools, with a shared task on offensive language detection, 2020, p. 48–52.
[BibTeX]

@inproceedings{mubarak2020overview,
title={Overview of OSACT4 Arabic Offensive Language Detection Shared Task},
author={Mubarak, Hamdy and Darwish, Kareem and Magdy, Walid and Elsayed, Tamer and Al-Khalifa, Hend},
booktitle={Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection},
pages={48--52},
year={2020}
}

H. Al–Khalifa, W. Magdy, K. Darwish, T. Elsayed, and H. Mubarak, “Proceedings of the 4th workshop on open-source arabic corpora and processing tools, with a shared task on offensive language detection,” in Proceedings of the 4th workshop on open-source arabic corpora and processing tools, with a shared task on offensive language detection, 2020.
[BibTeX]

@inproceedings{al2020proceedings,
title={Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection},
author={Al-Khalifa, Hend and Magdy, Walid and Darwish, Kareem and Elsayed, Tamer and Mubarak, Hamdy},
booktitle={Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection},
year={2020}
}

A. ElAlfy, K. M. Darwish, and O. Weber, “Corporations and sustainable development goals communication on social media: corporate social responsibility or just another buzzword?,” Sustainable development, 2020.
[BibTeX]

@article{elalfy2020corporations,
title={Corporations and sustainable development goals communication on social media: Corporate social responsibility or just another buzzword?},
author={ElAlfy, Amr and Darwish, Kareem M and Weber, Olaf},
journal={Sustainable Development},
year={2020},
publisher={John Wiley \& Sons, Inc. Chichester, UK}
}

C. Shurafa, K. Darwish, and W. Zaghouani, “Political framing: us covid19 blame game,” International conference on social informatics (socinfo-2020), 2020.
[BibTeX]

@article{shurafa2020political,
title={Political Framing: US COVID19 Blame Game},
author={Shurafa, Chereen and Darwish, Kareem and Zaghouani, Wajdi},
journal={International Conference on Social Informatics (SocInfo-2020)},
year={2020}
}

A. Rafae, A. Karim, H. Sajjad, F. Kamiran, and J. Xu, “A clustering framework for lexical normalization of roman Urdu,” Natural language engineering (nle), 2020.
[BibTeX]

@article{rafae2018:CL,
title={A Clustering Framework for Lexical Normalization of Roman {Urdu}},
author={Rafae, Abdul and Karim, Asim and Sajjad, Hassan and Kamiran, Faisal and Jia Xu},
Journal={Natural Language Engineering (NLE)},
year = {2020},
keywords = {journal},
}

Y. Belinkov*, N. Durrani*, H. Sajjad, F. Dalvi, and J. Glass, “On the linguistic representational power of neural machine translation models,” Computational linguistics, vol. 46, iss. 1, 2020.
[BibTeX]

@article{belinkov2020cl,
title={On the Linguistic Representational Power of Neural Machine Translation Models},
author={Yonatan Belinkov* and Nadir Durrani* and Hassan Sajjad and Fahim Dalvi and James Glass},
journal={Computational Linguistics},
year={2020},
volume = {46},
number = {1},
publisher={MIT Press One Rogers Street, Cambridge, MA 02142-1209 USA journals-info@ mit. edu},
}

S. Rosenthal, P. Atanasova, G. Karadzhov, M. Zampieri, and P. Nakov, “A large-scale semi-supervised dataset for offensive language identification,” Arxiv preprint 2004.14454, 2020.
[BibTeX]

@article{SOLID,
title={A large-scale semi-supervised dataset for offensive language identification},
author={Sara Rosenthal and Pepa Atanasova and Georgi Karadzhov and Marcos Zampieri and Preslav Nakov},
year={2020},
journal={ArXiv preprint 2004.14454},
archivePrefix={arXiv},
primaryClass={cs.CL}
}

F. Alam, F. Dalvi, S. Shaar, N. Durrani, H. Mubarak, A. Nikolov, G. D. S. Martino, A. Abdelali, H. Sajjad, K. Darwish, and P. Nakov, “Fighting the COVID-19 infodemic in social media: a holistic perspective and a call to arms,” Arxiv preprint 2007.07996, 2020.
[BibTeX]

@article{alam2020call2arms,
title={Fighting the {COVID}-19 Infodemic in Social Media: A Holistic Perspective and a Call to Arms},
author={Firoj Alam and Fahim Dalvi and Shaden Shaar and Nadir Durrani and Hamdy Mubarak and Alex Nikolov and Giovanni Da San Martino and Ahmed Abdelali and Hassan Sajjad and Kareem Darwish and Preslav Nakov},
year={2020},
journal={ArXiv preprint 2007.07996},
archivePrefix={arXiv},
primaryClass={cs.CL}
}

S. A. Chowdhury, H. Mubarak, A. Abdelali, S. Jung, B. J. Jansen, and J. Salminen, “A multi-platform arabic news comment dataset for offensive language detection,” in Proceedings of the 12th language resources and evaluation conference, 2020, p. 6203–6212.
[BibTeX]

@inproceedings{chowdhury2020multi,
title={A Multi-Platform Arabic News Comment Dataset for Offensive Language Detection},
author={Chowdhury, Shammur Absar and Mubarak, Hamdy and Abdelali, Ahmed and Jung, Soon-gyo and Jansen, Bernard J and Salminen, Joni},
booktitle={Proceedings of The 12th Language Resources and Evaluation Conference},
pages={6203--6212},
year={2020}
}

2019

D. Kopev, A. Ali, I. Koychev, and P. Nakov, “Detecting deception in political debates using acoustic and textual features,” in Proceedings of the ieee automatic speech recognition and understanding workshop, Singapore, 2019, p. 652–659.
[BibTeX]

@InProceedings{ASRU2019:deception,
author = "Daniel Kopev and Ahmed Ali and Ivan Koychev and Preslav Nakov",
title = "Detecting Deception in Political Debates Using Acoustic and Textual Features",
booktitle = "Proceedings of the IEEE Automatic Speech Recognition and Understanding Workshop",
series = {ASRU~'19},
month = {December},
year = "2019",
pages = {652--659},
address = "Singapore",
}

S. Yu, G. Da San Martino, and P. Nakov, “Experiments in detecting persuasion techniques in the news,” in Proceedings of the neurips 2019 joint workshop on ai for social good, Vancouver, Canada, 2019.
[BibTeX]

@InProceedings{NeurIPS2019:propaganda,
author = "Yu, Seunghak and Da San Martino, Giovanni and Nakov, Preslav",
title = "Experiments in Detecting Persuasion Techniques in the News",
booktitle = "Proceedings of the NeurIPS 2019 Joint Workshop on AI for Social Good",
series = {NeurIPS~'19},
month = {December},
year = "2019",
address = "Vancouver, Canada",
}

A. Atanasov, G. De Francisci Morales, and P. Nakov, “Predicting the role of political trolls in social media,” in Proceedings of the 2019 signll conference on computational natural language learning, Hong Kong, China, 2019, p. 1023–1034.
[BibTeX]

@InProceedings{CoNLL2019:troll:roles,
author = "Atanas Atanasov and De Francisci Morales, Gianmarco and Preslav Nakov",
title = "Predicting the Role of Political Trolls in Social Media",
booktitle = "Proceedings of the 2019 SIGNLL Conference on Computational Natural Language Learning",
month = {November},
series = {CoNLL~'19},
pages = "1023--1034",
year = "2019",
address = "Hong Kong, China",
}

M. Mohtarami, J. Glass, and P. Nakov, “Contrastive language adaptation for cross-lingual stance detection,” in Proceedings of the 2019 conference on empirical methods in natural language processing, Hong Kong, China, 2019, p. 4442–4452.
[BibTeX]

@InProceedings{EMNLP2019:Stance:crosslanguage:contrastive,
author = "Mitra Mohtarami and James Glass and Preslav Nakov",
title = "Contrastive Language Adaptation for Cross-Lingual Stance Detection",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = {November},
pages = {4442--4452},
series = {EMNLP~'19},
year = "2019",
address = "Hong Kong, China",
}

G. Da San Martino, S. Yu, A. Barron–Cedeno, R. Petrov, and P. Nakov, “Fine-grained analysis of propaganda in news articles,” in Proceedings of the 2019 conference on empirical methods in natural language processing, Hong Kong, China, 2019, p. 5636–5646.
[BibTeX]

@InProceedings{EMNLP2019:propaganda:finegrained,
author = "Da San Martino, Giovanni and Seunghak Yu and Alberto Barron-Cedeno and Rostislav Petrov and Preslav Nakov",
title = "Fine-Grained Analysis of Propaganda in News Articles",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = {November},
series = {EMNLP~'19},
year = "2019",
pages = "5636--5646",
address = "Hong Kong, China",
}

D. Zlatkova, P. Nakov, and I. Koychev, “Fact-checking meets fauxtography: verifying claims about images,” in Proceedings of the 2019 conference on empirical methods in natural language processing, Hong Kong, China, 2019, p. 2099–2108.
[BibTeX]

@InProceedings{EMNLP2019:fauxtography,
author = "Dimitrina Zlatkova and Preslav Nakov and Ivan Koychev",
title = "Fact-Checking Meets Fauxtography: Verifying Claims About Images",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = {November},
series = {EMNLP~'19},
pages = "2099--2108",
year = "2019",
address = "Hong Kong, China",
}

Y. Zhang, G. D. S. Martino, A. Barrón–Cedeño, S. Romeo, J. An, H. Kwak, T. Staykovski, I. Jaradat, G. Karadzhov, R. Baly, K. Darwish, and P. N. James Glass, “Tanbih: get to know what you are reading,” in Proceedings of the 2019 conference on empirical methods in natural language processing, Hong Kong, China, 2019, p. 223–228.
[BibTeX]

@InProceedings{EMNLP2019:tanbih,
author = "Yifan Zhang and Giovanni Da San Martino and Alberto Barrón-Cedeño and Salvatore Romeo and Jisun An and Haewoon Kwak and Todor Staykovski and Israa Jaradat and Georgi Karadzhov and Ramy Baly and Kareem Darwish and James Glass, Preslav Nakov",
title = "Tanbih: Get To Know What You Are Reading",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = {November},
series = {EMNLP~'19},
year = "2019",
pages = "223--228",
address = "Hong Kong, China",
}

H. Mubarak, A. Abdelali, K. Darwish, M. Eldesouki, Y. Samih, and H. Sajjad, “A system for diacritizing four varieties of Arabic,” in In proceedings of the empirical methods in natural language processing (emnlp), 2019.
[BibTeX]

@inproceedings{diacritic2019emnlp,
title={A System for Diacritizing Four Varieties of {Arabic}},
author={Hamdy Mubarak and Ahmed Abdelali and Kareem Darwish and Mohamed Eldesouki and Younes Samih and Hassan Sajjad},
booktitle={In Proceedings of the Empirical Methods in Natural Language Processing (EMNLP)},
year={2019},
month={November},
}

Y. Dinkov, A. Ali, I. Koychev, and P. Nakov, “Predicting the leading political ideology of Youtube channels using acoustic, textual and metadata information,” in Proceedings of the 20th annual conference of the international speech communication association, Graz, Austria, 2019, p. 501–505.
[BibTeX]

@inproceedings{INTERSPEECH2019:youtube,
title={Predicting the Leading Political Ideology of {Y}outube Channels Using Acoustic, Textual and Metadata Information},
author={Yoan Dinkov and Ahmed Ali and Ivan Koychev and Preslav Nakov},
booktitle={Proceedings of the 20th Annual Conference of the International Speech Communication Association},
series = {INTERSPEECH~'19},
address = {Graz, Austria},
month = {September},
pages = {501--505},
year={2019},
}

S. Vasileva, P. Atanasova, L. Màrquez, A. Barrón–Cedeño, and P. Nakov, “It takes nine to smell a rat: neural multi-task learning for check-worthiness prediction,” in Proceedings of the international conference on recent advances in natural language processing, Varna, Bulgaria, 2019, p. 1229–1239.
[BibTeX]

@inproceedings{RANLP2019:checkworthiness:multitask,
title={It Takes Nine to Smell a Rat: Neural Multi-Task Learning for Check-Worthiness Prediction},
author={Slavena Vasileva and Pepa Atanasova and Lluís Màrquez and Alberto Barrón-Cedeño and Preslav Nakov},
booktitle={Proceedings of the International Conference on Recent Advances in Natural Language Processing},
month = {September},
series = {RANLP~'19},
address = {Varna, Bulgaria},
year={2019},
pages = "1229--1239",
}

P. Atanasova, P. Nakov, G. Karadzhov, M. Mohtarami, and G. D. S. Martino, “Overview of the CLEF-2019 CheckThat! Lab on Automatic Identification and Verification of Claims. Task 1: Check-Worthiness,” in Clef 2019 working notes. working notes of clef 2019 – conference and labs of the evaluation forum (clef’19), Lugano, Switzerland, 2019.
[BibTeX]

@InProceedings{clef-checkthat-T1:2019,
author = {Pepa Atanasova and Preslav Nakov and Georgi Karadzhov and Mitra Mohtarami and Giovanni Da San Martino},
title = {Overview of the {CLEF-2019 CheckThat! Lab on Automatic Identification and Verification of Claims. Task 1: Check-Worthiness}},
booktitle = {CLEF 2019 Working Notes. Working Notes of CLEF 2019 - Conference and Labs of the Evaluation Forum (CLEF'19)},
series = {{CEUR} Workshop Proceedings},
publisher = {CEUR-WS.org},
editor = {Cappellato, L. and Ferro, N. and Losada, D.E. and M\"uller, H. },
address = {Lugano, Switzerland},
month = {September},
year = {2019},
}

M. Hasanain, R. Suwaileh, T. Elsayed, A. Barrón–Cedeño, and P. Nakov, “Overview of the CLEF-2019 CheckThat! Lab on Automatic Identification and Verification of Claims. Task 2: Evidence and Factuality,” in Clef 2019 working notes. working notes of clef 2019 – conference and labs of the evaluation forum (clef’19), Lugano, Switzerland, 2019.
[BibTeX]

@InProceedings{clef-checkthat-T2:2019,
author = {Hasanain, Maram and Suwaileh, Reem and Elsayed, Tamer and Barr\'{o}n-Cede{\~n}o, Alberto and Nakov, Preslav},
title = {Overview of the {CLEF-2019 CheckThat! Lab on Automatic Identification and Verification of Claims. Task 2: Evidence and Factuality}},
booktitle = {CLEF 2019 Working Notes. Working Notes of CLEF 2019 - Conference and Labs of the Evaluation Forum (CLEF'19)},
series = {{CEUR} Workshop Proceedings},
publisher = {CEUR-WS.org},
editor = {Cappellato, L. and Ferro, N. and Losada, D.E. and M\"uller, H. },
address = {Lugano, Switzerland},
month = {September},
year = {2019},
}

Y. Dinkov, I. Koychev, and P. Nakov, “Detecting toxicity in news articles: application to Bulgarian,” in Proceedings of the international conference on recent advances in natural language processing, Varna, Bulgaria, 2019, p. 247–258.
[BibTeX]

@inproceedings{RANLP2019:toxicity,
title={Detecting Toxicity in News Articles: Application to {B}ulgarian},
author={Yoan Dinkov and Ivan Koychev and Preslav Nakov},
booktitle={Proceedings of the International Conference on Recent Advances in Natural Language Processing},
month = {September},
series = {RANLP~'19},
address = {Varna, Bulgaria},
pages = "247--258",
year={2019},
}

T. Elsayed, P. Nakov, A. Barrón–Cedeño, M. Hasanain, R. Suwaileh, G. {Da San Martino}, and P. Atanasova, “Overview of the CLEF-2019 CheckThat!: automatic identification and verification of claims,” in Experimental ir meets multilinguality, multimodality, and interaction, Lugano, Switzerland, 2019.
[BibTeX]

@InProceedings{clef-checkthat:2019,
author = "Elsayed, Tamer and
Nakov, Preslav and
Barr\'{o}n-Cede{\~n}o, Alberto and
Hasanain, Maram and
Suwaileh, Reem and
{Da San Martino}, Giovanni and
Atanasova, Pepa",
title = "Overview of the {CLEF-2019 CheckThat!}: Automatic Identification and Verification of Claims",
booktitle = "Experimental IR Meets Multilinguality, Multimodality, and Interaction",
series = "LNCS",
publisher = "Springer",
address = "Lugano, Switzerland",
month = "September",
year = 2019
}

H. Mubarak, A. Abdelali, H. Sajjad, Y. Samih, and K. Darwish, “Highly Effective Arabic Diacritization using Sequence to Sequence Modeling,” in Proceedings of the annual conference of the north american chapter of the association for computational linguistics: human language technologies (naacl), 2019.
[BibTeX]

@InProceedings{mubarak:2019:NAACL,
title={{Highly Effective Arabic Diacritization using Sequence to Sequence Modeling}},
author={Hamdy Mubarak and Ahmed Abdelali and Hassan Sajjad and Younes Samih and Kareem Darwish},
booktitle={Proceedings of the Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL)},
year={2019},
Month = {June},
}

N. Durrani, F. Dalvi, H. Sajjad, Y. Belinkov, and P. Nakov, “One Size Does Not Fit All: Comparing NMT Representations of Different Granularities,” in Proceedings of the annual conference of the north american chapter of the association for computational linguistics: human language technologies (naacl), 2019.
[BibTeX]

@InProceedings{durrani:2019:NAACL,
title={{One Size Does Not Fit All: Comparing NMT Representations of Different Granularities}},
author={Nadir Durrani and Fahim Dalvi and Hassan Sajjad and Yonatan Belinkov and Preslav Nakov},
booktitle={Proceedings of the Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL)},
year={2019},
Month = {June},
location = {Minneapolis, US},
}

T. Mihaylova, G. Karadzhov, P. Atanasova, R. Baly, M. Mohtarami, and P. Nakov, “SemEval-2019 task 8: fact checking in community question answering forums,” in Proceedings of the 13th international workshop on semantic evaluation (semeval’19), Minneapolis, Minnesota, USA, 2019, p. 860–869.
[BibTeX] [Download PDF]

@inproceedings{mihaylova-etal-2019-semeval,
title = "{S}em{E}val-2019 Task 8: Fact Checking in Community Question Answering Forums",
author = "Mihaylova, Tsvetomila and
Karadzhov, Georgi and
Atanasova, Pepa and
Baly, Ramy and
Mohtarami, Mitra and
Nakov, Preslav",
booktitle = "Proceedings of the 13th International Workshop on Semantic Evaluation (SemEval'19)",
series = {SemEval~'19},
month = {June},
year = "2019",
address = "Minneapolis, Minnesota, USA",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/S19-2149",
pages = "860--869",
}

A. Saleh, R. Baly, A. Barrón–Cedeño, G. Da San Martino, M. Mohtarami, P. Nakov, and J. Glass, “Team QCRI-MIT at SemEval-2019 task 4: propaganda analysis meets hyperpartisan news detection,” in Proceedings of the 13th international workshop on semantic evaluation (semeval’19), Minneapolis, Minnesota, USA, 2019, p. 1041–1046.
[BibTeX] [Download PDF]

@inproceedings{saleh-etal-2019-team,
title = "Team {QCRI}-{MIT} at {S}em{E}val-2019 Task 4: Propaganda Analysis Meets Hyperpartisan News Detection",
author = "Saleh, Abdelrhman and
Baly, Ramy and
Barr{\'o}n-Cede{\~n}o, Alberto and
Da San Martino, Giovanni and
Mohtarami, Mitra and
Nakov, Preslav and
Glass, James",
booktitle = "Proceedings of the 13th International Workshop on Semantic Evaluation (SemEval'19)",
series = {SemEval~'19},
month = {June},
year = "2019",
address = "Minneapolis, Minnesota, USA",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/S19-2182",
pages = "1041--1046",
}

D. Shaprin, G. Da San Martino, A. Barrón–Cedeño, and P. Nakov, “Team jack ryder at SemEval-2019 task 4: using BERT representations for detecting hyperpartisan news,” in Proceedings of the 13th international workshop on semantic evaluation (semeval’19), Minneapolis, Minnesota, USA, 2019, p. 1012–1015.
[BibTeX] [Download PDF]

@inproceedings{shaprin-etal-2019-team,
title = "Team Jack Ryder at {S}em{E}val-2019 Task 4: Using {BERT} Representations for Detecting Hyperpartisan News",
author = "Shaprin, Daniel and
Da San Martino, Giovanni and
Barr{\'o}n-Cede{\~n}o, Alberto and
Nakov, Preslav",
booktitle = "Proceedings of the 13th International Workshop on Semantic Evaluation (SemEval'19)",
series = {SemEval~'19},
month = {June},
year = "2019",
address = "Minneapolis, Minnesota, USA",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/S19-2176",
pages = "1012--1015",
}

N. Durrani, F. Dalvi, H. Sajjad, Y. Belinkov, and P. Nakov, “One size does not fit all: comparing NMT representations of different granularities,” in Proceedings of the 2019 conference of the north American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers), Minneapolis, Minnesota, 2019, p. 1504–1516.
[BibTeX] [Download PDF]

@inproceedings{durrani-etal-2019-one,
title = "One Size Does Not Fit All: Comparing {NMT} Representations of Different Granularities",
author = "Durrani, Nadir and
Dalvi, Fahim and
Sajjad, Hassan and
Belinkov, Yonatan and
Nakov, Preslav",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)",
month = {June},
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/N19-1154",
pages = "1504--1516",
}

P. Atanasova, P. Nakov, L. Màrquez, A. Barrón–Cedeño, G. Karadzhov, T. Mihaylova, M. Mohtarami, and J. Glass, “Automatic fact-checking using context and discourse information,” J. data and information quality, vol. 11, iss. 3, p. 12:1–12:27, 2019.
[BibTeX]

@article{Atanasova:2019:AFU:3331015.3297722,
author = {Atanasova, Pepa and Nakov, Preslav and M\`{a}rquez, Llu\'{\i}s and Barr\'{o}n-Cede\~{n}o, Alberto and Karadzhov, Georgi and Mihaylova, Tsvetomila and Mohtarami, Mitra and Glass, James},
title = {Automatic Fact-Checking Using Context and Discourse Information},
journal = {J. Data and Information Quality},
issue_date = {June 2019},
volume = {11},
number = {3},
month = {May},
year = {2019},
issn = {1936-1955},
pages = {12:1--12:27},
articleno = {12},
numpages = {27},
NOurl = {http://doi.acm.org/10.1145/3297722},
NOdoi = {10.1145/3297722},
acmid = {3297722},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Fact-checking, community question-answering, discourse},
}

A. D. Bau*, Y. Belinkov*, H. Sajjad, F. Dalvi, N. Durrani, and J. Glass, “Identifying and controlling important neurons in neural machine translation,” in International conference on learning representations (iclr), 2019.
[BibTeX]

@InProceedings{individual:iclr19,
title={Identifying and Controlling Important Neurons in Neural Machine Translation},
author={D. Anthony Bau* and Yonatan Belinkov* and Hassan Sajjad and Fahim Dalvi and Nadir Durrani and James Glass},
booktitle={International Conference on Learning Representations (ICLR)},
year={2019},
keywords = {conference},
Month = {May},
location = {New Orleans, US},
}

T. Elsayed, P. Nakov, A. Barrón–Cedeño, M. Hasanain, R. Suwaileh, P. Atanasova, and G. Da San Martino, “CheckThat! at CLEF 2019: automatic identification and verification of claims,” in Proceedings of the 41st european conference on information retrieval (ecir’19), Cologne, Germany, 2019, p. 309–315.
[BibTeX]

@InProceedings{CheckThat:ECIR2019,
author = {Elsayed, Tamer and Nakov, Preslav and Barr\'{o}n-Cede\~{n}o, Alberto and Hasanain, Maram and Suwaileh, Reem and Atanasova, Pepa and Da San Martino, Giovanni},
title = {{CheckThat}! at {CLEF} 2019: Automatic Identification and Verification of Claims},
booktitle = {Proceedings of the 41st European Conference on Information Retrieval (ECIR'19)},
series = {ECIR~'19},
pages = {309--315},
address = {Cologne, Germany},
month = {April},
year = {2019},
}

R. Dangovski, L. Jing, P. Nakov, M. Tatalović, and M. Soljačić, “Rotational unit of memory: a novel representation unit for RNNs with scalable applications,” Transactions of the association for computational linguistics, vol. 7, p. 121–138, 2019. doi:10.1162/tacl_a_00258
[BibTeX] [Download PDF]

@article{dangovski-etal-2019-rotational,
title = "Rotational Unit of Memory: A Novel Representation Unit for {RNN}s with Scalable Applications",
author = "Dangovski, Rumen and
Jing, Li and
Nakov, Preslav and
Tatalovi{\'c}, Mi{\'c}o and
Solja{\v{c}}i{\'c}, Marin",
journal = "Transactions of the Association for Computational Linguistics",
volume = "7",
month = {March},
year = "2019",
url = "https://www.aclweb.org/anthology/Q19-1008",
doi = "10.1162/tacl_a_00258",
pages = "121--138",
}

F. Dalvi*, N. Durrani*, H. Sajjad*, Y. Belinkov, A. D. Bau, and J. Glass, “What is one grain of sand in the desert? analyzing individual neurons in deep nlp models,” in Proceedings of the aaai conference on artificial intelligence (aaai), 2019.
[BibTeX]

@InProceedings{grain:aaai19-1,
title={What is one Grain of Sand in the Desert? Analyzing Individual Neurons in Deep NLP Models},
author={Fahim Dalvi* and Nadir Durrani* and Hassan Sajjad* and Yonatan Belinkov and D. Anthony Bau and James Glass},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)},
year={2019},
keywords = {conference},
Month = {March},
location = {Honolulu, US},
}

F. Dalvi, A. Nortonsmith, A. D. Bau, Y. Belinkov, H. Sajjad, N. Durrani, and J. Glass, “Neurox: a toolkit for analyzing individual neurons in neural networks,” in Aaai conference on artificial intelligence (aaai), 2019.
[BibTeX]

@InProceedings{neurox-aaai19:demo,
title={NeuroX: A Toolkit for Analyzing Individual Neurons in Neural Networks},
author={Fahim Dalvi and Avery Nortonsmith and D. Anthony Bau and Yonatan Belinkov and Hassan Sajjad and Nadir Durrani and James Glass},
booktitle={AAAI Conference on Artificial Intelligence (AAAI)},
year={2019},
location = {Honolulu, USA},
month={January},
}

M. Zampieri, S. Malmasi, P. Nakov, S. Rosenthal, N. Farra, and R. Kumar, “Predicting the type and target of offensive posts in social media,” in Proceedings of the 17th annual conference of the north american chapter of the association for computational linguistics: human language technologies, Minneapolis, MN, USA, 2019, p. 1415–1420.
[BibTeX]

@InProceedings{OffenseEval:NAACL:2019,
author = "Marcos Zampieri and Shervin Malmasi and Preslav Nakov and Sara Rosenthal and Noura Farra and Ritesh Kumar",
title = "Predicting the Type and Target of Offensive Posts in Social Media",
booktitle = {Proceedings of the 17th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
series = {NAACL-HLT~'19},
year = "2019",
pages = {1415--1420},
NOpublisher = "Association for Computational Linguistics",
address = "Minneapolis, MN, USA",
NOurl = {https://www.aclweb.org/anthology/N19-1144.pdf}
}

A. Barrón–Cedeño, G. Da San Martino, I. Jaradat, and P. Nakov, “Proppy: Organizing the news based on their propagandistic content,” Information processing & management, vol. 56, iss. 5, pp. 1849-1864, 2019.
[BibTeX]

@article{Barron:19,
author = "Barr\'{o}n-Cede\~no, Alberto and
Da San Martino, Giovanni and
Jaradat, Israa and
Nakov, Preslav",
title = "{Proppy: Organizing the news based on their propagandistic content}",
journal = "Information Processing \& Management",
volume = "56",
number = "5",
pages = "1849 - 1864",
year = "2019",
issn = "0306-4573",
NOdoi = "https://doi.org/10.1016/j.ipm.2019.03.005",
NOurl = "http://www.sciencedirect.com/science/article/pii/S0306457318306058",
}

A. Barrón–Cedeño, G. Da San Martino, I. Jaradat, and P. Nakov, “Proppy: a system to unmask propaganda in online news,” in Proceedings of the thirty-third aaai conference on artificial intelligence, Honolulu, HI, USA, 2019, p. 9847–9848.
[BibTeX]

@InProceedings{AAAI2019:proppy,
author = {Alberto Barr\'on-Cede{\~n}o and Da San Martino, Giovanni and Jaradat, Israa and Nakov, Preslav},
title = {Proppy: A System to Unmask Propaganda in Online News},
booktitle = {Proceedings of the Thirty-Third AAAI Conference on Artificial Intelligence},
series = {AAAI'19},
year = {2019},
pages = {9847--9848},
address = {Honolulu, HI, USA},
NOmonth = {January},
}

A. Ali, S. Shon, Y. Samih, H. Mubarak, A. Abdelali, J. Glass, S. Renals, and K. Choukri, “The mgb-5 challenge: recognition and dialect identification of dialectal arabic speech,” in 2019 ieee automatic speech recognition and understanding workshop (asru), 2019, p. 1026–1033.
[BibTeX]

@inproceedings{ali2019mgb,
title={The mgb-5 challenge: Recognition and dialect identification of dialectal arabic speech},
author={Ali, Ahmed and Shon, Suwon and Samih, Younes and Mubarak, Hamdy and Abdelali, Ahmed and Glass, James and Renals, Steve and Choukri, Khalid},
booktitle={2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)},
pages={1026--1033},
year={2019},
organization={IEEE}
}

R. Baly, G. Karadzhov, A. Saleh, J. Glass, and P. Nakov, “Multi-task ordinal regression for jointly predicting the trustworthiness and the leading political ideology of news media,” in Proceedings of the 17th annual conference of the north american chapter of the association for computational linguistics: human language technologies, Minneapolis, MN, USA, 2019, p. 2109–2116.
[BibTeX]

@InProceedings{source:multitask:NAACL:2019,
author = "Ramy Baly and Georgi Karadzhov and Abdelrhman Saleh and James Glass and Preslav Nakov",
title = "Multi-Task Ordinal Regression for Jointly Predicting the Trustworthiness and the Leading Political Ideology of News Media",
booktitle = {Proceedings of the 17th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
series = {NAACL-HLT~'19},
year = "2019",
pages = "2109--2116",
NOpublisher = "Association for Computational Linguistics",
address = "Minneapolis, MN, USA",
NOurl = "https://www.aclweb.org/anthology/N19-1216.pdf"
}

R. Baly, G. Karadzhov, A. Saleh, J. Glass, and P. Nakov, “Multi-task ordinal regression for jointly predicting the trustworthiness and the leading political ideology news media,” in Proceedings of the 17th annual conference of the north american chapter of the association for computational linguistics: human language technologies (naacl’19), Minneapolis, MN, USA, 2019, p. 2109–2116.
[BibTeX] [Download PDF]

@InProceedings{Factuality:ordinal:2019,
author = "Baly, Ramy and
Karadzhov, Georgi and
Saleh, Abdelrhman and
Glass, James and
Nakov, Preslav",
title = "Multi-Task Ordinal Regression for Jointly Predicting the Trustworthiness and the Leading Political Ideology News Media",
booktitle = {Proceedings of the 17th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL'19)},
series = {NAACL-HLT~'19},
year = "2019",
pages = "2109--2116",
publisher = "Association for Computational Linguistics",
address = "Minneapolis, MN, USA",
url = "https://www.aclweb.org/anthology/N19-1216"
}

G. Da San Martino, A. Barron–Cedeno, and P. Nakov, “Findings of the NLP4IF-2019 shared task on fine-grained propaganda detection,” in Proceedings of the 2nd workshop on nlp for internet freedom (nlp4if): censorship, disinformation, and propaganda, Hong Kong, China, 2019, p. 162–170.
[BibTeX]

@InProceedings{NLP4IF2019:propaganda:task,
author = "Da San Martino, Giovanni and Alberto Barron-Cedeno and Preslav Nakov",
title = "Findings of the {NLP4IF}-2019 Shared Task on Fine-Grained Propaganda Detection",
booktitle = "Proceedings of the 2nd Workshop on NLP for Internet Freedom (NLP4IF): Censorship, Disinformation, and Propaganda",
series = {NLP4IF\@EMNLP~'19},
NOmonth = {November},
year = "2019",
pages = "162--170",
address = "Hong Kong, China",
}

P. Stefanov, K. Darwish, A. Atanasov, and P. Nakov, “Predicting the topical stance of media and popular twitter users,” Arxiv preprint arxiv:1907.01260, 2019.
[BibTeX]

@article{stefanov2019predicting,
title={Predicting the topical stance of media and popular twitter users},
author={Stefanov, Peter and Darwish, Kareem and Atanasov, Atanas and Nakov, Preslav},
journal={arXiv preprint arXiv:1907.01260},
year={2019}
}

P. Nakov, L. Màrquez, A. Barrón–Cedeño, P. Gencheva, G. Karadzhov, T. Mihaylova, M. Mohtarami, and J. Glass, “Automatic fact checking using context and discourse information,” Acm journal of data and information quality (acm jdiq), 2019.
[BibTeX]

@article{JDIQ2019,
author = "Preslav Nakov and
M\`{a}rquez, Llu\'{i}s and
Barr\'{o}n-Cede\~{n}o, Alberto and
Pepa Gencheva and
Georgi Karadzhov and
Tsvetomila Mihaylova and
Mitra Mohtarami and
James Glass",
title = "Automatic Fact Checking Using Context and Discourse Information",
journal = "ACM Journal of Data and Information Quality (ACM JDIQ)",
year = "2019",
}

M. Zampieri, S. Malmasi, P. Nakov, S. Rosenthal, N. Farra, and R. Kumar, “SemEval-2019 task 6: identifying and categorizing offensive language in social media (OffensEval),” in Proceedings of the international workshop on semantic evaluation (semeval’19), Minneapolis, MN, USA, 2019, p. 75–86.
[BibTeX] [Download PDF]

@InProceedings{OffenseEval:SemEval:2019,
author = {Marcos Zampieri and Shervin Malmasi and Preslav Nakov and Sara Rosenthal and Noura Farra and Ritesh Kumar},
title = {{SemEval}-2019 Task 6: Identifying and Categorizing Offensive Language in Social Media ({OffensEval})},
booktitle = {Proceedings of the International Workshop on Semantic Evaluation (SemEval'19)},
series = {SemEval~'19},
year = {2019},
pages = {75--86},
publisher = "Association for Computational Linguistics",
address = "Minneapolis, MN, USA",
url = {https://www.aclweb.org/anthology/S19-2010}
}

A. Saleh, R. Baly, A. Barrón–Cedeño, G. D. S. Martino, M. Mohtarami, P. Nakov, and J. Glass, “Team qcri-mit at semeval-2019 task 4: propaganda analysis meets hyperpartisan news detection,” Arxiv preprint arxiv:1904.03513, 2019.
[BibTeX]

@article{saleh2019team,
title="Team QCRI-MIT at SemEval-2019 Task 4: Propaganda Analysis Meets Hyperpartisan News Detection",
author="Saleh, Abdelrhman and Baly, Ramy and Barr{\'o}n-Cede{\~n}o, Alberto and Martino, Giovanni Da San and Mohtarami, Mitra and Nakov, Preslav and Glass, James",
journal="arXiv preprint arXiv:1904.03513",
year="2019"
}

T. Staykovski, A. Barrón–Cedeño, G. Da San Martino, and P. Nakov, “Dense vs. sparse representations for news stream clustering,” in Proceedings of the second international workshop on narrative extraction from texts (text2story), Cologne, Germany, 2019.
[BibTeX]

@inproceedings{Staykovski:19,
author="Staykovski, Todor and
Barr\'{o}n-Cede{\~n}o, Alberto and
Da San Martino, Giovanni and
Nakov, Preslav",
title="Dense vs. Sparse Representations for News Stream Clustering",
booktitle="Proceedings of the Second International Workshop on Narrative Extraction from Texts (Text2story)",
address = "Cologne, Germany",
year="2019"
}

S. Romeo, G. Da San Martino, Y. Belinkov, A. Barrón–Cedeño, M. Eldesouki, K. Darwish, H. Mubarak, J. Glass, and A. Moschitti, “Language processing and learning models for community question answering in arabic,” Information processing & management, vol. 56, iss. 2, p. 274–290, 2019.
[BibTeX]

@article{romeo2019language,
title={Language processing and learning models for community question answering in arabic},
author={Romeo, Salvatore and Da San Martino, Giovanni and Belinkov, Yonatan and Barr{\'o}n-Cede{\~n}o, Alberto and Eldesouki, Mohamed and Darwish, Kareem and Mubarak, Hamdy and Glass, James and Moschitti, Alessandro},
journal={Information Processing \& Management},
volume={56},
number={2},
pages={274--290},
year={2019},
publisher={Pergamon}
}

H. Mubarak, A. Abdelali, H. Sajjad, Y. Samih, and K. Darwish, “Highly effective arabic diacritization using sequence to sequence modeling,” in Proceedings of the 2019 conference of the north american chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers), 2019, p. 2390–2395.
[BibTeX]

@inproceedings{mubarak2019highly,
title={Highly effective arabic diacritization using sequence to sequence modeling},
author={Mubarak, Hamdy and Abdelali, Ahmed and Sajjad, Hassan and Samih, Younes and Darwish, Kareem},
booktitle={Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
pages={2390--2395},
year={2019}
}

Y. Samih, H. Mubarak, A. Abdelali, M. Attia, M. Eldesouki, and K. Darwish, “Qc-go submission for madar shared task: arabic fine-grained dialect identification,” in Proceedings of the fourth arabic natural language processing workshop, 2019, p. 290–294.
[BibTeX]

@inproceedings{samih2019qc,
title={QC-GO Submission for MADAR Shared Task: Arabic Fine-Grained Dialect Identification},
author={Samih, Younes and Mubarak, Hamdy and Abdelali, Ahmed and Attia, Mohammed and Eldesouki, Mohamed and Darwish, Kareem},
booktitle={Proceedings of the Fourth Arabic Natural Language Processing Workshop},
pages={290--294},
year={2019}
}

A. Barrón–Cedeño, G. Da San Martino, I. Jaradat, and P. Nakov, “Proppy: organizing news coverage on the basis of their propagandistic content,” Information processing and management, 2019.
[BibTeX]

@article{Barron:19,
author = "Barr\'{o}n-Cede\~no, Alberto and
Da San Martino, Giovanni and
Jaradat, Israa and
Nakov, Preslav",
title = "Proppy: Organizing News Coverage on the Basis of Their Propagandistic Content",
journal = "Information Processing and Management",
year = "2019"
}

M. Attia, Y. Samih, A. Elkahky, H. Mubarak, A. Abdelali, and K. Darwish, “Pos tagging for improving code-switching identification in arabic,” in Proceedings of the fourth arabic natural language processing workshop, 2019, p. 18–29.
[BibTeX]

@inproceedings{attia2019pos,
title={POS Tagging for Improving Code-Switching Identification in Arabic},
author={Attia, Mohammed and Samih, Younes and Elkahky, Ali and Mubarak, Hamdy and Abdelali, Ahmed and Darwish, Kareem},
booktitle={Proceedings of the Fourth Arabic Natural Language Processing Workshop},
pages={18--29},
year={2019}
}

M. Eldesouki, N. Gopee, A. Ali, and K. Darwish, “Farspeech: arabic natural language processing for live arabic speech.,” in Interspeech, 2019, p. 2372–2373.
[BibTeX]

@inproceedings{eldesouki2019farspeech,
title={FarSpeech: Arabic Natural Language Processing for Live Arabic Speech.},
author={Eldesouki, Mohamed and Gopee, Naassih and Ali, Ahmed and Darwish, Kareem},
booktitle={INTERSPEECH},
pages={2372--2373},
year={2019}
}

M. Kutlu, K. Darwish, C. Bayrak, A. Rashed, and T. Elsayed, “Embedding-based qualitative analysis of polarization in turkey,” Arxiv preprint arxiv:1909.10213, 2019.
[BibTeX]

@article{kutlu2019embedding,
title={Embedding-based Qualitative Analysis of Polarization in Turkey},
author={Kutlu, Mucahid and Darwish, Kareem and Bayrak, Cansin and Rashed, Ammar and Elsayed, Tamer},
journal={arXiv preprint arXiv:1909.10213},
year={2019}
}

H. Mubarak, A. Abdelali, K. Darwish, M. Eldesouki, Y. Samih, and H. Sajjad, “A system for diacritizing four varieties of arabic,” in Proceedings of the 2019 conference on empirical methods in natural language processing and the 9th international joint conference on natural language processing (emnlp-ijcnlp): system demonstrations, 2019, p. 217–222.
[BibTeX]

@inproceedings{mubarak2019system,
title={A System for Diacritizing Four Varieties of Arabic},
author={Mubarak, Hamdy and Abdelali, Ahmed and Darwish, Kareem and Eldesouki, Mohamed and Samih, Younes and Sajjad, Hassan},
booktitle={Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations},
pages={217--222},
year={2019}
}

K. Darwish, “Quantifying polarization on twitter: the kavanaugh nomination,” in International conference on social informatics (socinfo-2019), 2019, p. 188–201.
[BibTeX]

@inproceedings{darwish2019quantifying,
title={Quantifying polarization on twitter: The kavanaugh nomination},
author={Darwish, Kareem},
booktitle={International Conference on Social Informatics (SocInfo-2019)},
pages={188--201},
year={2019},
organization={Springer, Cham}
}

H. Mubarak and K. Darwish, “Arabic offensive language classification on twitter,” in International conference on social informatics, 2019, p. 269–276.
[BibTeX]

@inproceedings{mubarak2019arabic,
title={Arabic offensive language classification on twitter},
author={Mubarak, Hamdy and Darwish, Kareem},
booktitle={International Conference on Social Informatics},
pages={269--276},
year={2019},
organization={Springer, Cham}
}

I. Weber, K. M. Darwish, C. Wagner, E. Zagheni, L. Nelson, S. Aref, and F. Flöck, Social informatics: 11th international conference, socinfo 2019, doha, qatar, november 18–21, 2019, proceedingsSpringer nature, 2019.
[BibTeX]

@misc{weber2019social,
title={Social Informatics: 11th International Conference, SocInfo 2019, Doha, Qatar, November 18--21, 2019, Proceedings},
author={Weber, Ingmar and Darwish, Kareem M and Wagner, Claudia and Zagheni, Emilio and Nelson, Laura and Aref, Samin and Fl{\"o}ck, Fabian},
year={2019},
publisher={Springer Nature}
}

P. Nakov, L. Màrquez, A. Moschitti, and H. Mubarak, “Arabic community question answering,” Natural language engineering, vol. 25, iss. 1, p. 5–41, 2019.
[BibTeX]

@article{nakov2019arabic,
title={Arabic community question answering},
author={Nakov, Preslav and M{\`a}rquez, Llu{\'\i}s and Moschitti, Alessandro and Mubarak, Hamdy},
journal={Natural Language Engineering},
volume={25},
number={1},
pages={5--41},
year={2019},
publisher={Cambridge University Press}
}

2018

P. Nakov, A. Barrón–Cedeño, T. Elsayed, R. Suwaileh, L. Màrquez, W. Zaghouani, P. Atanasova, S. Kyuchukov, and G. Da San Martino, “Overview of the CLEF-2018 CheckThat! lab on automatic identification and verification of political claims,” in Proceedings of the ninth international conference of the clef association: experimental ir meets multilinguality, multimodality, and interaction (clef’18), Avignon, France, 2018, p. 372–387.
[BibTeX] [Download PDF]

@InProceedings{clef2018checkthat:overall,
author = "Nakov, Preslav and
Barr\'{o}n-Cede\~{n}o, Alberto and
Elsayed, Tamer and
Suwaileh, Reem and
M\`{a}rquez, Llu\'{i}s and
Zaghouani, Wajdi and
Atanasova, Pepa and
Kyuchukov, Spas and
Da San Martino, Giovanni",
title = "Overview of the {CLEF}-2018 {CheckThat}! Lab on Automatic Identification and Verification of Political Claims",
booktitle = "Proceedings of the Ninth International Conference of the CLEF Association: Experimental IR Meets Multilinguality, Multimodality, and Interaction (CLEF'18)",
series = "Lecture Notes in Computer Science",
publisher = "Springer",
editor = "Patrice Bellot, Chiraz Trabelsi, Josiane Mothe, Fionn Murtagh, Jian Yun Nie, Laure Soulier, Eric Sanjuan, Linda Cappellato, Nicola Ferro",
address = "Avignon, France",
month = "September",
year = "2018",
pages = "372--387",
url = "http://link.springer.com/chapter/10.1007/978-3-319-98932-7_32"
}

P. Atanasova, L. Màrquez, A. Barrón–Cedeño, T. Elsayed, R. Suwaileh, W. Zaghouani, S. Kyuchukov, G. Da San Martino, and P. Nakov, “Overview of the CLEF-2018 CheckThat! lab on automatic identification and verification of political claims, task 1: check-worthiness,” in Clef 2018 working notes. working notes of clef 2018 – conference and labs of the evaluation forum (clef’18), Avignon, France, 2018.
[BibTeX] [Download PDF]

@InProceedings{clef2018checkthat:task1,
author = "Atanasova, Pepa and
M\`{a}rquez, Llu\'{i}s and
Barr\'{o}n-Cede\~{n}o, Alberto and
Elsayed, Tamer and
Suwaileh, Reem and
Zaghouani, Wajdi and
Kyuchukov, Spas and
Da San Martino, Giovanni and
Nakov, Preslav",
title = "Overview of the {CLEF-2018 CheckThat}! Lab on Automatic Identification and Verification of Political Claims, Task 1: Check-Worthiness",
booktitle = "CLEF 2018 Working Notes. Working Notes of CLEF 2018 - Conference and Labs of the Evaluation Forum (CLEF'18)",
series = "{CEUR} Workshop Proceedings",
publisher = "CEUR-WS.org",
editor = "Cappellato, Linda and Ferro, Nicola and Nie, Jian-Yun and Soulier, Laure",
address = "Avignon, France",
month = "September",
url = "http://ceur-ws.org/Vol-2125/invited_paper_13.pdf",
year = "2018",
}

A. Barrón–Cedeõ, T. Elsayed, R. Suwaileh, L. Màrquez, P. Atanasova, W. Zaghouani, S. Kyuchukov, G. Da San Martino, and P. Nakov, “Overview of the CLEF-2018 CheckThat! lab on automatic identification and verification of political claims, task 2: factuality,” in Clef 2018 working notes. working notes of clef 2018 – conference and labs of the evaluation forum (clef’18), Avignon, France, 2018.
[BibTeX] [Download PDF]

@InProceedings{clef2018checkthat:task2,
author = "Barr\'{o}n-Cede\~o, Alberto and
Elsayed, Tamer and
Suwaileh, Reem and
M\`{a}rquez, Llu\'{i}s and
Atanasova, Pepa and
Zaghouani, Wajdi and
Kyuchukov, Spas and
Da San Martino, Giovanni and
Nakov, Preslav",
title = "Overview of the {CLEF-2018 CheckThat}! Lab on Automatic Identification and Verification of Political Claims, Task 2: Factuality",
booktitle = "CLEF 2018 Working Notes. Working Notes of CLEF 2018 - Conference and Labs of the Evaluation Forum (CLEF'18)",
series = "{CEUR} Workshop Proceedings",
publisher = "CEUR-WS.org",
editor = "Cappellato, Linda and Ferro, Nicola and Nie, Jian-Yun and Soulier, Laure",
address = "Avignon, France",
month = "September",
url = "http://ceur-ws.org/Vol-2125/invited_paper_14.pdf",
year = "2018",
}

S. Romeo, G. Da San Martino, A. Barrón–Cedeño, and A. Moschitti, “A flexible, efficient and accurate framework for community question answering pipelines,” in Proceedings of acl 2018, system demonstrations, Melbourne, Australia, 2018, p. 134–139.
[BibTeX] [Download PDF]

@inproceedings{romeo-etal-2018-flexible,
title = "A Flexible, Efficient and Accurate Framework for Community Question Answering Pipelines",
author = "Romeo, Salvatore and
Da San Martino, Giovanni and
Barr{\'o}n-Cede{\~n}o, Alberto and
Moschitti, Alessandro",
booktitle = "Proceedings of ACL 2018, System Demonstrations",
month={July},
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/P18-4023",
pages = "134--139",
}

I. Jaradat, P. Gencheva, A. Barrón–Cedeño, L. Màrquez, and P. Nakov, “Claimrank: detecting check-worthy claims in arabic and english,” in Proceedings of the 2018 conference of the north american chapter of the association for computational linguistics: demonstrations, New Orleans, Louisiana, 2018, p. 26–30. doi:10.18653/v1/N18-5006
[BibTeX] [Download PDF]

@inproceedings{jaradat-etal-2018-claimrank,
title = "ClaimRank: Detecting Check-Worthy Claims in Arabic and English",
author = "Jaradat, Israa and
Gencheva, Pepa and
Barr{\'o}n-Cede{\~n}o, Alberto and
M{\`a}rquez, Llu{\'\i}s and
Nakov, Preslav",
booktitle = "Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Demonstrations",
month = "June",
year = "2018",
address = "New Orleans, Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/N18-5006",
doi = "10.18653/v1/N18-5006",
pages = "26--30",
}

M. Walid, K. Darwish, and A. Ali, System and method for automatic generation of information-rich content from multiple microblogs, each microblog containing only sparse information, 2018.
[BibTeX]

@misc{walid2018system,
title={System and method for automatic generation of information-rich content from multiple microblogs, each microblog containing only sparse information},
author={Walid, Magdy and Darwish, Kareem and Ali, Ahmed},
year={2018},
month="jun",
note={US Patent 9,990,368}
}

F. Dalvi, N. Durrani, H. Sajjad, and S. Vogel, “Incremental decoding and training methods for simultaneous translation in neural machine translation,” in Proceedings of the 16th annual conference of the north american chapter of the association for computational linguistics: human language technologies (naacl), 2018.
[BibTeX]

@InProceedings{dalvi:2018:NAACL,
title={Incremental Decoding and Training Methods for Simultaneous Translation in Neural Machine Translation},
author={Dalvi, Fahim and Durrani, Nadir and Sajjad, Hassan and Vogel, Stephan},
booktitle={Proceedings of the 16th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL)},
year={2018},
Month = {June},
location = {New Orleans, US},
}

R. Baly, M. Mohtarami, J. Glass, L. Màrquez, A. Moschitti, and P. Nakov, “Integrating stance detection and fact checking in a unified corpus,” in Proceedings of the 2018 conference of the north american chapter of the association for computational linguistics: human language technologies (naacl-hlt’18), New Orleans, LA, 2018, p. 21–27.
[BibTeX] [Download PDF]

@InProceedings{baly-EtAl:2018:N18-2,
author = "Baly, Ramy and
Mohtarami, Mitra and
Glass, James and
M\`{a}rquez, Llu\'{i}s and
Moschitti, Alessandro and
Nakov, Preslav",
title = "Integrating Stance Detection and Fact Checking in a Unified Corpus",
booktitle = "Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT'18)",
series = "NAACL-HLT'18",
month = "June",
year = "2018",
address = "New Orleans, LA",
publisher = "Association for Computational Linguistics",
pages = "21--27",
url = "http://www.aclweb.org/anthology/N18-2004"
}

I. Jaradat, P. Gencheva, A. Barrón–Cedeño, L. Màrquez, and P. Nakov, “ClaimRank: detecting check-worthy claims in Arabic and English,” in Proceedings of the 16th annual conference of the north american chapter of the association for computational linguistics: human language technologies (naacl-hlt’18), New Orleans, LA, 2018, pp. 26-30.
[BibTeX] [Download PDF]

@InProceedings{NAACL2018:claimrank,
author = "Jaradat, Israa and
Gencheva, Pepa and
Barr\'on-Cede{\~n}o, Alberto and
M\`{a}rquez, Llu\'{i}s and
Nakov, Preslav",
title = "{ClaimRank}: Detecting Check-Worthy Claims in {A}rabic and {E}nglish",
booktitle = "Proceedings of the 16th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT'18)",
series = "NAACL-HLT'18",
year = "2018",
pages = "26-30",
address = "New Orleans, LA",
month = "June",
url = "http://aclweb.org/anthology/N18-5006"
}

H. Bouamor and H. Sajjad, “H2@bucc18: parallel sentence extraction from comparable corpora using multilingual sentence embeddings,” in Proceedings of the 11th workshop on building and using comparable corpora (bucc), 2018.
[BibTeX]

@InProceedings{bouamor:bucc18,
author = {Houda Bouamor and Hassan Sajjad},
title = {H2@BUCC18: Parallel Sentence Extraction from Comparable Corpora Using Multilingual Sentence Embeddings},
booktitle = {Proceedings of the 11th Workshop on Building and Using Comparable Corpora (BUCC)},
month = {May},
year = {2018},
location = {Miyzaki, Japan}
}

R. Alharbi, W. Magdy, K. Darwish, A. AbdelAli, and H. Mubarak, “Part-of-speech tagging for Arabic Gulf dialect using Bi-LSTM,” in Proceedings of the eleventh international conference on language resources and evaluation (LREC 2018), Miyazaki, Japan, 2018.
[BibTeX] [Download PDF]

@inproceedings{alharbi-etal-2018-part,
title = "Part-of-Speech Tagging for {A}rabic {G}ulf Dialect Using {B}i-{LSTM}",
author = "Alharbi, Randah and
Magdy, Walid and
Darwish, Kareem and
AbdelAli, Ahmed and
Mubarak, Hamdy",
booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
month = {may},
year = "2018",
address = "Miyazaki, Japan",
publisher = "European Language Resources Association (ELRA)",
url = "https://www.aclweb.org/anthology/L18-1620",
}

T. Mihaylova, P. Nakov, L. Màrquez, A. Barrón–Cedeño, M. Mohtarami, G. Karadjov, and J. Glass, “Fact checking in community forums,” in Proceedings of the thirty-second aaai conference on artificial intelligence (aaai’18), New Orleans, LA, 2018, p. 5309–5316.
[BibTeX] [Download PDF]

@InProceedings{AAAI2018:factchecking,
author = "Tsvetomila Mihaylova and
Preslav Nakov and
Llu\'{i}s M\`{a}rquez and
Alberto Barr\'on-Cede{\~n}o and
Mitra Mohtarami and
Georgi Karadjov and
James Glass",
title = "Fact Checking in Community Forums",
booktitle = "Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence (AAAI'18)",
series = "AAAI'18",
year = "2018",
address = "New Orleans, LA",
pages = "5309--5316",
month = "February",
url = "https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/viewFile/16780/16082"
}

A. Elmadany, H. Mubarak, and W. Magdy, “Arsas: an arabic speech-act and sentiment corpus of tweets,” Osact, vol. 3, p. 20, 2018.
[BibTeX]

@article{elmadany2018arsas,
title={Arsas: An arabic speech-act and sentiment corpus of tweets},
author={Elmadany, AbdelRahim and Mubarak, Hamdy and Magdy, Walid},
journal={OSACT},
volume={3},
pages={20},
year={2018}
}

R. Alharbi, W. Magdy, K. Darwish, A. Abdelali, and H. Mubarak, “Part-of-speech tagging for arabic gulf dialect using bi-lstm,” in Proceedings of the eleventh international conference on language resources and evaluation (lrec 2018), 2018.
[BibTeX]

@inproceedings{alharbi2018part,
title={Part-of-speech tagging for Arabic Gulf dialect using Bi-LSTM},
author={Alharbi, Randah and Magdy, Walid and Darwish, Kareem and Abdelali, Ahmed and Mubarak, Hamdy},
booktitle={Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)},
year={2018}
}

A. Barrón–Cedeño, T. Elsayed, R. Suwaileh, L. Màrquez, P. Atanasova, W. Zaghouani, S. Kyuchukov, G. Da San Martino, and P. Nakov, “Overview of the CLEF-2018 CheckThat! Lab on automatic identification and verification of political claims, Task 2: factuality,” in Clef 2018 working notes. working notes of clef 2018 – conference and labs of the evaluation forum, Avignon, France, 2018.
[BibTeX]

@InProceedings{clef2018checkthat:task2,
author = {Barr\'{o}n-Cede\~{n}o, Alberto and Elsayed, Tamer and Suwaileh, Reem and M\`{a}rquez, Llu\'{i}s and Atanasova, Pepa and Zaghouani, Wajdi and Kyuchukov, Spas and Da San Martino, Giovanni and Nakov, Preslav},
title = {Overview of the {CLEF-2018 CheckThat! Lab} on Automatic Identification and Verification of Political Claims, {T}ask 2: Factuality},
booktitle = {CLEF 2018 Working Notes. Working Notes of CLEF 2018 - Conference and Labs of the Evaluation Forum},
series = {{CEUR} Workshop Proceedings},
publisher = {CEUR-WS.org},
editor = {Cappellato, Linda and Ferro, Nicola and Nie, Jian-Yun and Soulier, Laure},
address = {Avignon, France},
NOmonth = {September},
year = {2018},
}

P. Atanasova, L. Màrquez, A. Barrón–Cedeño, T. Elsayed, R. Suwaileh, W. Zaghouani, S. Kyuchukov, G. Da San Martino, and P. Nakov, “Overview of the CLEF-2018 CheckThat! Lab on automatic identification and verification of political claims, Task 1: check-worthiness,” in Clef 2018 working notes. working notes of clef 2018 – conference and labs of the evaluation forum, Avignon, France, 2018.
[BibTeX]

@InProceedings{clef2018checkthat:task1,
author = {Atanasova, Pepa and M\`{a}rquez, Llu\'{i}s and Barr\'{o}n-Cede\~{n}o, Alberto and Elsayed, Tamer and Suwaileh, Reem and Zaghouani, Wajdi and Kyuchukov, Spas and Da San Martino, Giovanni and Nakov, Preslav},
title = {Overview of the {CLEF-2018 CheckThat! Lab} on Automatic Identification and Verification of Political Claims, {T}ask 1: Check-Worthiness},
booktitle = {CLEF 2018 Working Notes. Working Notes of CLEF 2018 - Conference and Labs of the Evaluation Forum},
series = {{CEUR} Workshop Proceedings},
publisher = {CEUR-WS.org},
editor = {Cappellato, Linda and Ferro, Nicola and Nie, Jian-Yun and Soulier, Laure},
address = {Avignon, France},
NOmonth = {September},
year = {2018},
}

P. Nakov, A. Barrón–Cedeño, T. Elsayed, R. Suwaileh, L. Màrquez, W. Zaghouani, P. Atanasova, S. Kyuchukov, and G. Da San Martino, “Overview of the CLEF-2018 CheckThat! Lab on automatic identification and verification of political claims,” in Proceedings of the ninth international conference of the clef association: experimental ir meets multilinguality, multimodality, and interaction, Avignon, France, 2018, p. 372–387.
[BibTeX]

@InProceedings{clef2018checkthat:overall,
author = {Nakov, Preslav and Barr\'{o}n-Cede\~{n}o, Alberto and Elsayed, Tamer and Suwaileh, Reem and M\`{a}rquez, Llu\'{i}s and Zaghouani, Wajdi and Atanasova, Pepa and Kyuchukov, Spas and Da San Martino, Giovanni},
title = {Overview of the {CLEF-2018 CheckThat! Lab} on Automatic Identification and Verification of Political Claims},
booktitle = {Proceedings of the Ninth International Conference of the CLEF Association: Experimental IR Meets Multilinguality, Multimodality, and Interaction},
series = {Lecture Notes in Computer Science},
publisher = {Springer},
address = {Avignon, France},
NOmonth = {September},
year = {2018},
pages = {372--387}
}

M. Mohtarami, R. Baly, J. Glass, P. Nakov, L. Màrquez, and A. Moschitti, “Automatic stance detection using end-to-end memory networks,” in Proceedings of the 16th annual conference of the north american chapter of the association for computational linguistics: human language technologies, New Orleans, Louisiana, USA, 2018, p. 767–776.
[BibTeX]

@InProceedings{NAACL2018:stance,
author = {Mitra Mohtarami and Ramy Baly and James Glass and Preslav Nakov and Llu\'{i}s M\`{a}rquez and Alessandro Moschitti},
title = {Automatic Stance Detection Using End-to-End Memory Networks},
booktitle = {Proceedings of the 16th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
series = {NAACL-HLT~'18},
year = {2018},
address = {New Orleans, Louisiana, USA},
pages = {767--776},
NOmonth = {June},
noURL = "http://aclweb.org/anthology/N18-1070"
}

H. Mubarak, “Build fast and accurate lemmatization for arabic,” in Proceedings of the eleventh international conference on language resources and evaluation (lrec 2018), 2018.
[BibTeX]

@inproceedings{mubarak2018build,
title={Build Fast and Accurate Lemmatization for Arabic},
author={Mubarak, Hamdy},
booktitle={Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)},
year={2018}
}

R. Baly, G. Karadzhov, D. Alexandrov, J. Glass, and P. Nakov, “Predicting factuality of reporting and bias of news media sources,” in Proceedings of the 2018 conference on empirical methods in natural language processing, Brussels, Belgium, 2018, p. 3528–3539.
[BibTeX]

@InProceedings{D18-1389,
author = "Baly, Ramy
and Karadzhov, Georgi
and Alexandrov, Dimitar
and Glass, James
and Nakov, Preslav",
title = "Predicting Factuality of Reporting and Bias of News Media Sources",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
series = {EMNLP~'18},
year = "2018",
NOpublisher = "Association for Computational Linguistics",
pages = "3528--3539",
address = "Brussels, Belgium",
NOurl = "http://aclweb.org/anthology/D18-1389"
}

I. Abbes, A. Barrón–Cedeño, and M. Jemni, “Towards opendomain crosslanguage question answering,” in Qatar foundation annual research conference proceedings, 2018, p. ICTPD881.
[BibTeX]

@inproceedings{abbes2018towards,
title="Towards OpenDomain CrossLanguage Question Answering",
author="Abbes, Ines and
Barr{\'o}n-Cede{\~n}o, Alberto and
Jemni, Mohamed",
booktitle="Qatar Foundation Annual Research Conference Proceedings",
volume="2018",
number="3",
pages="ICTPD881",
year="2018",
organization="HBKU Press Qatar"
}

K. Darwish, “To kavanaugh or not to kavanaugh: that is the polarizing question,” Arxiv preprint arxiv:1810.06687, 2018.
[BibTeX]

@article{darwish2018kavanaugh,
title={To kavanaugh or not to kavanaugh: That is the polarizing question},
author={Darwish, Kareem},
journal={arXiv preprint arXiv:1810.06687},
year={2018}
}

A. Abdelali, M. Attia, Y. Samih, K. Darwish, and H. Mubarak, “Diacritization of maghrebi arabic sub-dialects,” Arxiv preprint arxiv:1810.06619, 2018.
[BibTeX]

@article{abdelali2018diacritization,
title={Diacritization of maghrebi arabic sub-dialects},
author={Abdelali, Ahmed and Attia, Mohammed and Samih, Younes and Darwish, Kareem and Mubarak, Hamdy},
journal={arXiv preprint arXiv:1810.06619},
year={2018}
}

M. Kutlu, K. Darwish, and T. Elsayed, “Devam vs. tamam: 2018 turkish elections,” Arxiv preprint arxiv:1807.06655, 2018.
[BibTeX]

@article{kutlu2018devam,
title={Devam vs. tamam: 2018 Turkish elections},
author={Kutlu, Mucahid and Darwish, Kareem and Elsayed, Tamer},
journal={arXiv preprint arXiv:1807.06655},
year={2018}
}

K. Darwish, W. Magdy, A. Rahimi, T. Baldwin, and N. Abokhodair, “Predicting online islamophobic behavior after\# parisattacks,” The journal of web science, vol. 4, 2018.
[BibTeX]

@article{darwish2018predicting,
title={Predicting online islamophobic behavior after\# parisattacks},
author={Darwish, Kareem and Magdy, Walid and Rahimi, Afshin and Baldwin, Timothy and Abokhodair, Norah},
journal={The Journal of Web Science},
volume={4},
year={2018}
}

C. Hadda, S. Bougrine, and A. Abdelali, “Spoken arabic algerian dialect identification,” in Natural language and speech processing (icnlsp), 2018 2nd international conference on, 2018, p. 1–6.
[BibTeX]

@inproceedings{hadda2018spoken,
title={Spoken Arabic Algerian dialect identification},
author={Hadda, Cherroun and Bougrine, Soumia and Abdelali, Ahmed},
booktitle={Natural Language and Speech Processing (ICNLSP), 2018 2nd International Conference on},
pages={1--6},
year={2018},
organization={IEEE}
}

K. Darwish, H. Mubarak, A. Abdelali, M. Eldesouki, Y. Samih, R. Alharbi, M. Attia, W. Magdy, and L. Kallmeyer, “Multi-dialect arabic pos tagging: a crf approach,” in In 11th edition of the language resources and evaluation conference, 2018.
[BibTeX]

@inproceedings{darwish2018multi,
title={Multi-Dialect Arabic POS Tagging: A CRF Approach},
author={Darwish, Kareem and Mubarak, Hamdy and Abdelali, Ahmed and Eldesouki, Mohamed and Samih, Younes and Alharbi, Randah and Attia, Mohammed and Magdy, Walid and Kallmeyer, Laura},
booktitle={In 11th edition of the Language Resources and Evaluation Conference},
year={2018},
organization={Miyazaki (Japan).}
}

K. Darwish, A. Abdelali, H. Mubarak, Y. Samih, and M. Attia, “Diacritization of moroccan and tunisian arabic dialects: a crf approach,” in Proceedings of the 4th arabic natural language processing workshop (wanlp-2018), the 11th edition of the language resources and evaluation conference, 2018.
[BibTeX]

@inproceedings{darwish2018diacritization,
title={Diacritization of Moroccan and Tunisian Arabic Dialects: A CRF Approach},
author={Darwish, Kareem and Abdelali, Ahmed and Mubarak, Hamdy and Samih, Younes and Attia, Mohammed},
booktitle={Proceedings of The 4th Arabic Natural Language Processing Workshop (WANLP-2018), the 11th edition of the Language Resources and Evaluation Conference},
year={2018},
organization={Miyazaki (Japan).}
}

A. Abdelali, I. Temnikova, S. Hedaya, and S. Vogel, “The waw corpus: the first corpus of interpreted speeches and their translations for english and arabic.,” in Language resources and evaluation conference (lrec 2018), 2018, p. 2135–2140.
[BibTeX]

@inproceedings{abdelali2018waw,
title={The WAW Corpus: The First Corpus of Interpreted Speeches and Their Translations for English and Arabic.},
author={Abdelali, Ahmed and Temnikova, Irina and Hedaya, Samy and Vogel, Stephan},
booktitle={Language Resources and Evaluation Conference (LREC 2018)},
pages={2135--2140},
year={2018},
organization={Miyazaki, Japan.}
}

T. Mihaylov, T. Mihaylova, P. Nakov, L. Màrquez, G. Georgiev, and I. Koychev, “The dark side of news community forums: opinion manipulation trolls,” Internet research, vol. 28, iss. 5, p. 1292–1312, 2018. doi:10.1108/IntR-03-2017-0118
[BibTeX] [Download PDF]

@article{InternetResearchJournal:2018,
author = "Mihaylov, Todor and
Mihaylova, Tsvetomila and
Nakov, Preslav and
M\`{a}rquez, Llu\'{i}s and
Georgiev, Georgi and
Koychev, Ivan",
title = "The Dark Side of News Community Forums: Opinion Manipulation Trolls",
journal = "Internet Research",
year = "2018",
volume = "28",
number = "5",
pages = "1292--1312",
url = "http://doi.org/10.1108/IntR-03-2017-0118",
doi = "10.1108/IntR-03-2017-0118"
}

A. Barrón–Cedeño, T. Elsayed, R. Suwaileh, L. Màrquez, P. Atanasova, W. Zaghouani, S. Kyuchukov, G. Da San Martino, and P. Nakov, “Overview of the clef-2018 checkthat! lab on automatic identification and verification of political claims. task 2: factuality,” in Working notes of clef 2018 – conference and labs of the evaluation forum, 2018.
[BibTeX] [Download PDF]

@inproceedings{barron2018overview,
author="Barr{\'o}n-Cede{\~n}o, Alberto and
Elsayed, Tamer and
Suwaileh, Reem and
M{\`a}rquez, Llu{\'i}s and
Atanasova, Pepa and
Zaghouani, Wajdi and
Kyuchukov, Spas and
Da San Martino, Giovanni and
Nakov, Preslav",
title="Overview of the CLEF-2018 CheckThat! Lab on automatic identification and verification of political claims. Task 2: Factuality",
booktitle="Working Notes of CLEF 2018 - Conference and Labs of the Evaluation Forum",
year="2018",
url = "http://ceur-ws.org/Vol-2125/invited_paper_14.pdf"
}

A. Barrón–Cedeño, G. Da San Martino, Y. Zhang, A. Ali, and F. Dalvi, “Qlusty: Quick and Dirty Generation of Event Videos from Written Media Coverage,” in Proceedings of the second international workshop on recent trends in news information retrieval, Grenoble, France, 2018, p. 27–32.
[BibTeX] [Download PDF]

@inproceedings{Barron:18,
author = "Barr\'{o}n-Cede\~no, Alberto and
Da San Martino, Giovanni and
Zhang, Yifan and
Ali, Ahmed and
Dalvi, Fahim",
title = "{Qlusty: Quick and Dirty Generation of Event Videos from Written Media Coverage}",
booktitle = "Proceedings of the Second International Workshop on Recent Trends in News Information Retrieval",
pages = "27--32",
url = "http://ceur-ws.org/Vol-2079/paper7.pdf",
address = "Grenoble, France",
year = 2018
}

A. A. Freihat, G. Bella, H. Mubarak, and F. Giunchiglia, “A single-model approach for arabic segmentation, pos tagging, and named entity recognition,” in 2018 2nd international conference on natural language and speech processing (icnlsp), 2018, p. 1–8.
[BibTeX]

@inproceedings{freihat2018single,
title={A single-model approach for Arabic segmentation, POS tagging, and named entity recognition},
author={Freihat, Abed Alhakim and Bella, Gabor and Mubarak, Hamdy and Giunchiglia, Fausto},
booktitle={2018 2nd International Conference on Natural Language and Speech Processing (ICNLSP)},
pages={1--8},
year={2018},
organization={IEEE}
}

2017

H. Sajjad, N. Durrani, F. Dalvi, Y. Belinkov, and S. Vogel, “Neural machine translation training in a multi-domain scenario,” in Proceedings of the 14th international workshop on spoken language translation (iwslt), 2017.
[BibTeX]

@InProceedings{sajjad-etal:iwslt17,
author = {Hassan Sajjad and Nadir Durrani and Fahim Dalvi and Yonatan Belinkov and Stephan Vogel},
title = {Neural Machine Translation Training in a Multi-Domain Scenario},
booktitle = {Proceedings of the 14th International Workshop on Spoken Language Translation (IWSLT)},
month = {December},
year = {2017},
location = {Tokyo, Japan}
}

Y. Belinkov, L. Màrquez, H. Sajjad, N. Durrani, F. Dalvi, and J. Glass, “Evaluating layers of representation in neural machine translation on part-of-speech and semantic tagging tasks,” in Proceedings of the 8th international joint conference on natural language processing (ijcnlp), 2017.
[BibTeX]

@inproceedings{belinkov:ijcnlp2017,
author = {Yonatan Belinkov and Llu\'{i}s M\`arquez and Hassan Sajjad and Nadir Durrani and Fahim Dalvi and James Glass},
title = {Evaluating Layers of Representation in Neural Machine Translation on Part-of-Speech and Semantic Tagging Tasks},
booktitle = {Proceedings of the 8th International Joint Conference on Natural Language Processing (IJCNLP)},
year = 2017,
month = {November},
location = {Taipei, Taiwan},
}

F. Dalvi, N. Durrani, H. Sajjad, Y. Belinkov, and S. Vogel, “Understanding and improving morphological learning in the neural machine translation decoder,” in Proceedings of the 8th international joint conference on natural language processing (ijcnlp), 2017.
[BibTeX]

@inproceedings{dalvi:ijcnlp2017,
author = {Fahim Dalvi and Nadir Durrani and Hassan Sajjad and Yonatan Belinkov and Stephan Vogel},
title = {Understanding and Improving Morphological Learning in the Neural Machine Translation Decoder},
booktitle = {Proceedings of the 8th International Joint Conference on Natural Language Processing (IJCNLP)},
year = 2017,
month = {November},
location = {Taipei, Taiwan},
}

H. Sajjad, F. Dalvi, N. Durrani, A. Abdelali, Y. Belinkov, and S. Vogel, “Challenging Language-Dependent Segmentation for Arabic: An Application to Machine Translation and Part-of-Speech Tagging,” in Proceedings of the 55th conference of the association for computational linguistics (acl), 2017.
[BibTeX]

@InProceedings{sajjad-etal:2017:ACLShort,
author = {Hassan Sajjad and Fahim Dalvi and Nadir Durrani and Ahmed Abdelali and Yonatan Belinkov and Stephan Vogel},
title = {{Challenging Language-Dependent Segmentation for Arabic: An Application to Machine Translation and Part-of-Speech Tagging}},
booktitle = {Proceedings of the 55th Conference of the Association for Computational Linguistics (ACL)},
year = {2017},
Month = {August},
location = {Vancouver, Canada},
}

C. España–Bonet and A. Barrón–Cedeño, “Lump at semeval-2017 task 1: towards an interlingua semantic similarity,” in Proceedings of the 11th international workshop on semantic evaluation (semeval-2017), Vancouver, Canada, 2017, p. 144–149. doi:10.18653/v1/S17-2019
[BibTeX] [Download PDF]

@inproceedings{espana-bonet-barron-cedeno-2017-lump,
title = "Lump at SemEval-2017 Task 1: Towards an Interlingua Semantic Similarity",
author = "Espa{\~n}a-Bonet, Cristina and
Barr{\'o}n-Cede{\~n}o, Alberto",
booktitle = "Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017)",
month = "August",
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/S17-2019",
doi = "10.18653/v1/S17-2019",
pages = "144--149",
}

Y. Belinkov, N. Durrani, F. Dalvi, H. Sajjad, and J. Glass, “What do neural machine translation models learn about morphology?,” in Proceedings of the 55th conference of the association for computational linguistics (acl), 2017.
[BibTeX]

@InProceedings{belinkov:2017:ACL,
title={What do Neural Machine Translation Models Learn about Morphology?},
author={Belinkov, Yonatan and Durrani, Nadir and Dalvi, Fahim and Sajjad, Hassan and Glass, James},
booktitle={Proceedings of the 55th Conference of the Association for Computational Linguistics (ACL)},
year={2017},
Month = {August},
location = {Vancouver, Canada},
}

D. T. Nguyen, K. Al–Mannai, S. Joty, H. Sajjad, M. Imran, and P. Mitra, “Robust classification of crisis-related data on social networks using convolutional neural networks,” in Proceedings of the 11th international aaai conference on web and social media (icwsm), 2017.
[BibTeX]

@InProceedings{nguyen:icwsm2017,
title={Robust Classification of Crisis-Related Data on Social Networks using Convolutional Neural Networks},
author={Dat Tien Nguyen and Kamla Al-Mannai and Shafiq Joty and Hassan Sajjad and Muhammad Imran and Prasenjit Mitra},
booktitle={Proceedings of the 11th International AAAI Conference on Web and Social Media (ICWSM)},
location = {Montreal, Canada},
month={May},
year={2017},
}

A. Abdelali, Query expansion system and method using language and language variants, 2017.
[BibTeX]

@misc{abdelali2017query,
title={Query expansion system and method using language and language variants},
author={Abdelali, Ahmed},
year={2017},
month={may},
note={US Patent App. 15/117,107}
}

F. Dalvi, Y. Zhang, S. Khurana, N. Durrani, H. Sajjad, A. Abdelali, H. Mubarak, A. Ali, and S. Vogel, “Qcri live speech translation system,” in In proceedings of the 15th conference of the european chapter of the association for computational linguistics (eacl), 2017.
[BibTeX]

@inproceedings{dalvi2017qcri,
title={QCRI Live Speech Translation System},
author={Dalvi, Fahim and Zhang, Yifan and Khurana, Sameer and Durrani, Nadir and Sajjad, Hassan and Abdelali, Ahmed and Mubarak, Hamdy and Ali, Ahmed and Vogel, Stephan},
booktitle={In Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics (EACL)},
year={2017},
month={April},
}

R. Liepins, U. Germann, G. Barzdins, A. Birch, S. Renals, S. Weberu, P. van der Kreefti, H. Bourlard, J. PrietoJ, O. Klejch, and others, “The summa platform prototype,” in Proceedings of the 15th conference of the european chapter of the association for computational linguistics (eacl), 2017.
[BibTeX]

@inproceedings{liepins2017summa,
title={The SUMMA Platform Prototype},
author={Liepins, Renars and Germann, Ulrich and Barzdins, Guntis and Birch, Alexandra and Renals, Steve and Weberu, Susanne and van der Kreefti, Peggy and Bourlard, Herv{\'e} and PrietoJ, Jo{\~a}o and Klejch, Ondrej and others},
booktitle={Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics (EACL)},
year={2017},
month={April},
}

A. Barrón–Cedeño, G. Da San Martino, S. Filice, and A. Moschitti, “On the use of an intermediate class in boolean crowdsourced relevance annotations for learning to rank comments,” in Proceedings of the 40th international acm sigir conference on research and development in information retrieval, 2017, p. 1209–1212.
[BibTeX]

@inproceedings{barron2017use,
author="Barr{\'o}n-Cede{\~n}o, Alberto and
Da San Martino, Giovanni and
Filice, Simone and
Moschitti, Alessandro",
title="On the Use of an Intermediate Class in Boolean Crowdsourced Relevance Annotations for Learning to Rank Comments",
booktitle="Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval",
pages="1209--1212",
year="2017",
organization="ACM"
}

K. Darwish, W. Magdy, and T. Zanouda, “Improved stance prediction in a user similarity feature space,” in 2017 ieee/acm international conference on advances in social networks analysis and mining, 2017, p. 145–148.
[BibTeX]

@inproceedings{darwish2017improved,
title={Improved Stance Prediction in a User Similarity Feature Space},
author={Darwish, Kareem and Magdy, Walid and Zanouda, Tahar},
booktitle={2017 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining},
pages={145--148},
year={2017},
organization={Association for Computing Machinery}
}

M. Eldesouki, Y. Samih, A. Abdelali, M. Attia, H. Mubarak, K. Darwish, and K. Laura, “Arabic multi-dialect segmentation: bi-lstm-crf vs. svm,” Arxiv preprint arxiv:1708.05891, 2017.
[BibTeX]

@article{eldesouki2017arabic,
title={Arabic Multi-Dialect Segmentation: bi-LSTM-CRF vs. SVM},
author={Eldesouki, Mohamed and Samih, Younes and Abdelali, Ahmed and Attia, Mohammed and Mubarak, Hamdy and Darwish, Kareem and Laura, Kallmeyer},
journal={arXiv preprint arXiv:1708.05891},
year={2017}
}

S. Joty, N. Durrani, H. Sajjad, and A. Abdelali, “Domain adaptation using neural network joint model,” Computer speech and language, vol. 45, iss. C, 2017.
[BibTeX]

@article{joty2017:csl,
journal = {Computer Speech and Language},
title = {Domain Adaptation using Neural Network Joint Model},
author = {Shafiq Joty and Nadir Durrani and Hassan Sajjad and Ahmed Abdelali},
year = {2017},
issn = {0885-2308},
volume = {45},
number = {C},
issue_date = {September 2017},
publisher = {Academic Press Ltd.},
address = {London, UK, UK},
}

N. Habash, M. Diab, K. Darwish, W. El–Hajj, H. Al–Khalifa, H. Bouamor, N. Tomeh, and M. El–Haj, “Proceedings of the third arabic natural language processing workshop,” in Proceedings of the third arabic natural language processing workshop, 2017.
[BibTeX]

@inproceedings{habash2017proceedings,
title={Proceedings of the Third Arabic Natural Language Processing Workshop},
author={Habash, Nizar and Diab, Mona and Darwish, Kareem and El-Hajj, Wassim and Al-Khalifa, Hend and Bouamor, Houda and Tomeh, Nadi and El-Haj, Mahmoud},
booktitle={Proceedings of the Third Arabic Natural Language Processing Workshop},
year={2017}
}

H. Mubarak, K. Darwish, and W. Magdy, “Abusive language detection on arabic social media,” in Proceedings of the first workshop on abusive language online, 2017, p. 52–56.
[BibTeX]

@inproceedings{mubarak2017abusive,
title={Abusive language detection on Arabic social media},
author={Mubarak, Hamdy and Darwish, Kareem and Magdy, Walid},
booktitle={Proceedings of the first workshop on abusive language online},
pages={52--56},
year={2017}
}

H. Sajjad, H. Schmid, A. Fraser, and H. Schütze, “Statistical models for unsupervised, semi-supervised and supervised transliteration mining,” Computational linguistics, vol. 43, iss. 2, 2017.
[BibTeX]

@article{sajjad2017statistical,
title={Statistical models for unsupervised, semi-supervised and supervised transliteration mining},
author={Sajjad, Hassan and Schmid, Helmut and Fraser, Alexander and Sch{\"u}tze, Hinrich},
journal={Computational Linguistics},
year={2017},
volume = {43},
number = {2},
issue_date = {June 2017},
publisher={MIT Press One Rogers Street, Cambridge, MA 02142-1209 USA journals-info@ mit. edu},
}

Y. Samih, M. Eldesouki, M. Attia, K. Darwish, A. Abdelali, H. Mubarak, and L. Kallmeyer, “Learning from relatives: unified dialectal arabic segmentation,” in Proceedings of the 21st conference on computational natural language learning (conll 2017), 2017, p. 432–441.
[BibTeX]

@inproceedings{samih2017learning,
title={Learning from relatives: unified dialectal Arabic segmentation},
author={Samih, Younes and Eldesouki, Mohamed and Attia, Mohammed and Darwish, Kareem and Abdelali, Ahmed and Mubarak, Hamdy and Kallmeyer, Laura},
booktitle={Proceedings of the 21st Conference on Computational Natural Language Learning (CoNLL 2017)},
pages={432--441},
year={2017}
}

K. Darwish, W. Magdy, and T. Zanouda, “Trump vs. hillary: what went viral during the 2016 us presidential election,” in International conference on social informatics (socinfo-2017), 2017, p. 143–161.
[BibTeX]

@inproceedings{darwish2017trump,
title={Trump vs. Hillary: What went viral during the 2016 US presidential election},
author={Darwish, Kareem and Magdy, Walid and Zanouda, Tahar},
booktitle={International conference on social informatics (SocInfo-2017)},
pages={143--161},
year={2017},
organization={Springer, Cham}
}

K. Darwish, D. Alexandrov, P. Nakov, and Y. Mejova, “Seminar users in the arabic twitter sphere,” in International conference on social informatics, 2017, p. 91–108.
[BibTeX]

@inproceedings{darwish2017seminar,
title={Seminar users in the Arabic Twitter sphere},
author={Darwish, Kareem and Alexandrov, Dimitar and Nakov, Preslav and Mejova, Yelena},
booktitle={International Conference on Social Informatics},
pages={91--108},
year={2017},
organization={Springer, Cham}
}

G. Karadzhov, P. Gencheva, P. Nakov, and I. Koychev, “We built a fake news & click-bait filter: what happened next will blow your mind!,” in Proceedings of the international conference on recent advances in natural language processing (ranlp’17), Varna, Bulgaria, 2017, p. 334–343.
[BibTeX] [Download PDF]

@inproceedings{RANLP2017:clickbait,
title={We Built a Fake News \& Click-bait Filter: What Happened Next Will Blow Your Mind!},
author={Georgi Karadzhov and Pepa Gencheva and Preslav Nakov and Ivan Koychev},
booktitle={Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP'17)},
series = {RANLP~'17},
address = {Varna, Bulgaria},
pages = {334--343},
year={2017},
URL="https://www.aclweb.org/anthology/papers/R/R17/R17-1045/"
}

G. Da San Martino, S. Romeo, A. Barrón–Cedeño, S. Joty, L. Marquez, A. Moschitti, and P. Nakov, “Cross-language question re-ranking,” in Proceedings of the 40th international acm sigir conference on research and development in information retrieval (sigir-2017), 2017.
[BibTeX] [Download PDF]

@inproceedings{martino2017cross,
author="Da San Martino, Giovanni and
Romeo, Salvatore and
Barr{\'o}n-Cede{\~n}o, Alberto and
Joty, Shafiq and
Marquez, Lluis and
Moschitti, Alessandro and
Nakov, Preslav",
title="Cross-language question re-ranking",
booktitle="Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR-2017)",
year="2017",
url="https://raihanjoty.github.io/papers/martino-et-al-sigir-17.pdf"
}

C. España–Bonet, C. Ádám. Varga, A. Barrón–Cedeño, and J. van Genabith, “An empirical analysis of nmt-derived interlingual embeddings and their use in parallel sentence identification,” Ieee journal of selected topics in signal processing, vol. 11, iss. 8, p. 1340–1350, 2017.
[BibTeX] [Download PDF]

@article{espana2017empirical,
title="An empirical analysis of nmt-derived interlingual embeddings and their use in parallel sentence identification",
author="Espa{\~n}a-Bonet, Cristina and Varga, {\'A}d{\'a}m Csaba and Barr{\'o}n-Cede{\~n}o, Alberto and van Genabith, Josef",
journal="IEEE Journal of Selected Topics in Signal Processing",
volume="11",
number="8",
pages="1340--1350",
year="2017",
publisher="IEEE",
url= "https://ieeexplore.ieee.org/document/8070942"
}

P. Gencheva, P. Nakov, L. Màrquez, A. Barrón–Cedeño, and I. Koychev, “A context-aware approach for detecting worth-checking claims in political debates,” in Proceedings of the international conference on recent advances in natural language processing (ranlp’17), Varna, Bulgaria, 2017, p. 267–276.
[BibTeX] [Download PDF]

@inproceedings{RANLP2017:debates,
author="Pepa Gencheva and
Preslav Nakov and
Llu\'{i}s M\`{a}rquez and
Alberto Barr\'on-Cede{\~n}o and
Ivan Koychev",
title="A Context-Aware Approach for Detecting Worth-Checking Claims in Political Debates",
booktitle="Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP'17)",
series = "RANLP'17",
address = "Varna, Bulgaria",
pages = "267--276",
year="2017",
url="http://doi.org/10.26615/978-954-452-049-6_037",
}

G. Karadzhov, P. Nakov, L. Màrquez, A. Barrón–Cedeño, and I. Koychev, “Fully automated fact checking using external sources,” in Proceedings of the international conference on recent advances in natural language processing (ranlp’17), Varna, Bulgaria, 2017, p. 344–353.
[BibTeX] [Download PDF]

@inproceedings{RANLP2017:factchecking:external,
author="Georgi Karadzhov and
Preslav Nakov and
Llu\'{i}s M\`{a}rquez and
Alberto Barr\'on-Cede{\~n}o and
Ivan Koychev",
title="Fully Automated Fact Checking Using External Sources",
booktitle="Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP'17)",
series = "RANLP'17",
address = "Varna, Bulgaria",
year="2017",
pages = "344--353",
URL="http://doi.org/10.26615/978-954-452-049-6_046"
}

P. Nakov, T. Mihaylova, L. Màrquez, Y. Shiroya, and I. Koychev, “Do not trust the trolls: predicting credibility in community question answering forums,” in Proceedings of the international conference on recent advances in natural language processing (ranlp’17), Varna, Bulgaria, 2017, p. 551–560.
[BibTeX] [Download PDF]

@inproceedings{RANLP2017:credibility:trolls,
author="Preslav Nakov and
Tsvetomila Mihaylova and
Llu\'is M\`arquez and
Yashkumar Shiroya and
Ivan Koychev",
title="Do Not Trust the Trolls: Predicting Credibility in Community Question Answering Forums",
booktitle="Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP'17)",
series = "RANLP'17",
address = "Varna, Bulgaria",
year="2017",
pages = "551--560",
url="http://doi.org/10.26615/978-954-452-049-6_072",
}

S. Romeo, G. Da San Martino, A. Barrón–Cedeño, and A. Moschitti, “A multiple-instance learning approach to sentence selection for question ranking,” in European conference on information retrieval, 2017, p. 437–449.
[BibTeX]

@inproceedings{romeo2017multiple,
author="Romeo, Salvatore and
Da San Martino, Giovanni and
Barr{\'o}n-Cede{\~n}o, Alberto and
Moschitti, Alessandro",
title="A Multiple-Instance Learning Approach to Sentence Selection for Question Ranking",
booktitle="European Conference on Information Retrieval",
pages="437--449",
year="2017",
organization="Springer, Cham"
}

K. Darwish, H. Mubarak, A. Abdelali, and M. Eldesouki, “Arabic pos tagging: don’t abandon feature engineering just yet,” Wanlp 2017 (co-located with eacl 2017), p. 130, 2017.
[BibTeX]

@article{darwish2017arabic,
title={Arabic POS Tagging: Don’t Abandon Feature Engineering Just Yet},
author={Darwish, Kareem and Mubarak, Hamdy and Abdelali, Ahmed and Eldesouki, Mohamed},
journal={WANLP 2017 (co-located with EACL 2017)},
pages={130},
year={2017}
}

K. Darwish, D. Alexandrov, P. Nakov, and Y. Mejova, “Seminar users in the Arabic Twitter sphere,” in Proceedings of the 9th international conference on social informatics (socinfo’17), Oxford, UK, 2017, p. 91–108. doi:10.1007/978-3-319-67217-5_7
[BibTeX] [Download PDF]

@inproceedings{SeminarUsers2017,
author = "Kareem Darwish and
Dimitar Alexandrov and
Preslav Nakov and
Yelena Mejova",
title = "Seminar Users in the {A}rabic {T}witter Sphere",
booktitle = "Proceedings of the 9th International Conference on Social Informatics (SocInfo'17)",
series = "SocInfo'17",
address = "Oxford, UK",
pages = "91--108",
year = "2017",
url = "http://doi.org/10.1007/978-3-319-67217-5_7",
doi = "10.1007/978-3-319-67217-5_7",
}

H. Sajjad, F. Dalvi, N. Durrani, A. Abdelali, Y. Belinkov, and S. Vogel, “Challenging language-dependent segmentation for arabic: an application to machine translation and part-of-speech tagging,” Arxiv preprint arxiv:1709.00616, 2017.
[BibTeX]

@article{sajjad2017challenging,
title={Challenging language-dependent segmentation for arabic: An application to machine translation and part-of-speech tagging},
author={Sajjad, Hassan and Dalvi, Fahim and Durrani, Nadir and Abdelali, Ahmed and Belinkov, Yonatan and Vogel, Stephan},
journal={arXiv preprint arXiv:1709.00616},
year={2017}
}

I. Temnikova, A. Abdelali, S. Hedaya, S. Vogel, and A. Al Daher, “Interpreting strategies annotation in the waw corpus,” Ranlp 2017, p. 36, 2017.
[BibTeX]

@article{temnikova2017interpreting,
title={Interpreting strategies annotation in the WAW corpus},
author={Temnikova, Irina and Abdelali, Ahmed and Hedaya, Samy and Vogel, Stephan and Al Daher, Aishah},
journal={RANLP 2017},
pages={36},
year={2017}
}

S. Bougrine, H. Cherroun, and A. Abdelali, “Altruistic crowdsourcing for arabic speech corpus annotation,” Procedia computer science, vol. 117, p. 137–144, 2017.
[BibTeX]

@article{bougrine2017altruistic,
title={Altruistic crowdsourcing for arabic speech corpus annotation},
author={Bougrine, Soumia and Cherroun, Hadda and Abdelali, Ahmed},
journal={Procedia Computer Science},
volume={117},
pages={137--144},
year={2017},
publisher={Elsevier}
}

K. Darwish, H. Mubarak, and A. Abdelali, “Arabic diacritization: stats, rules, and hacks,” in Proceedings of the third arabic natural language processing workshop, 2017, p. 9–17.
[BibTeX]

@inproceedings{darwish2017barabic,
title={Arabic Diacritization: Stats, Rules, and Hacks},
author={Darwish, Kareem and Mubarak, Hamdy and Abdelali, Ahmed},
booktitle={Proceedings of the Third Arabic Natural Language Processing Workshop},
pages={9--17},
year={2017},
organization={Association for Computational Linguistics}
}

Y. Samih, M. Attia, M. Eldesouki, A. Abdelali, H. Mubarak, L. Kallmeyer, and K. Darwish, “A neural architecture for dialectal arabic segmentation,” in Proceedings of the third arabic natural language processing workshop, 2017, p. 46–54.
[BibTeX]

@inproceedings{samih2017neural,
title={A Neural Architecture for Dialectal Arabic Segmentation},
author={Samih, Younes and Attia, Mohammed and Eldesouki, Mohamed and Abdelali, Ahmed and Mubarak, Hamdy and Kallmeyer, Laura and Darwish, Kareem},
booktitle={Proceedings of the Third Arabic Natural Language Processing Workshop},
pages={46--54},
year={2017},
organization={Association for Computational Linguistics}
}

K. Darwish, H. Mubarak, A. Abdelali, and M. Eldesouki, “Arabic pos tagging: don’t abandon feature engineering just yet,” in Proceedings of the third arabic natural language processing workshop, 2017, p. 130–137.
[BibTeX]

@inproceedings{darwish2017aarabic,
title={Arabic POS Tagging: Don't Abandon Feature Engineering Just Yet},
author={Darwish, Kareem and Mubarak, Hamdy and Abdelali, Ahmed and Eldesouki, Mohamed},
booktitle={Proceedings of the Third Arabic Natural Language Processing Workshop},
pages={130--137},
year={2017},
organization={Association for Computational Linguistics}
}

H. Mubarak, “Crowdsourcing speech and language data for resource-poor languages,” in International conference on advanced intelligent systems and informatics, 2017, p. 440–447.
[BibTeX]

@inproceedings{mubarak2017crowdsourcing,
title={Crowdsourcing Speech and Language Data for Resource-Poor Languages},
author={Mubarak, Hamdy},
booktitle={International Conference on Advanced Intelligent Systems and Informatics},
pages={440--447},
year={2017},
organization={Springer}
}

2016

E. Hoque, S. Joty, L. Màrquez, A. Barrón–Cedeño, G. Da San Martino, A. Moschitti, P. Nakov, S. Romeo, and G. Carenini, “An interactive system for exploring community question answering forums,” in Proceedings of coling 2016, the 26th international conference on computational linguistics: system demonstrations, Osaka, Japan, 2016, p. 1–5.
[BibTeX] [Download PDF]

@inproceedings{hoque-etal-2016-interactive,
title = "An Interactive System for Exploring Community Question Answering Forums",
author = "Hoque, Enamul and
Joty, Shafiq and
M{\`a}rquez, Llu{\'\i}s and
Barr{\'o}n-Cede{\~n}o, Alberto and
Da San Martino, Giovanni and
Moschitti, Alessandro and
Nakov, Preslav and
Romeo, Salvatore and
Carenini, Giuseppe",
booktitle = "Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: System Demonstrations",
month = "December",
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://www.aclweb.org/anthology/C16-2001",
pages = "1--5",
}

N. Durrani, H. Sajjad, S. Joty, and A. Abdelali, “A deep fusion model for domain adaptation in phrase-based mt,” in Proceedings of the 26th international conference on computational linguistics (coling), Osaka, Japan, 2016.
[BibTeX]

@InProceedings{durrani-EtAl:2016:COLING,
author = {Durrani, Nadir and Sajjad, Hassan and Joty, Shafiq and Abdelali, Ahmed},
title = {A Deep Fusion Model for Domain Adaptation in Phrase-based MT},
booktitle = {Proceedings of the 26th International Conference on Computational Linguistics (COLING)},
month = {December},
year = {2016},
address = {Osaka, Japan},
}

N. Durrani, F. Dalvi, H. Sajjad, and S. Vogel, “QCRI’s Machine Translation Systems for IWSLT’2016,” in Proceedings of the 13th international workshop on spoken language translation (iwslt), 2016.
[BibTeX]

@InProceedings{durrani-etal:iwslt16,
author = {Nadir Durrani and Fahim Dalvi and Hassan Sajjad and Stephan Vogel},
title = {{QCRI’s Machine Translation Systems for IWSLT’2016}},
booktitle = {Proceedings of the 13th International Workshop on Spoken Language Translation (IWSLT)},
month = {December},
year = {2016},
location = {Seattle, USA}
}

S. Romeo, G. Da San Martino, A. Barrón–Cedeño, A. Moschitti, Y. Belinkov, W. Hsu, Y. Zhang, M. Mohtarami, and J. Glass, “Neural attention for learning to rank questions in community question answering,” in Proceedings of coling 2016, the 26th international conference on computational linguistics: technical papers, Osaka, Japan, 2016, p. 1734–1745.
[BibTeX] [Download PDF]

@inproceedings{romeo-etal-2016-neural,
title = "Neural Attention for Learning to Rank Questions in Community Question Answering",
author = "Romeo, Salvatore and
Da San Martino, Giovanni and
Barr{\'o}n-Cede{\~n}o, Alberto and
Moschitti, Alessandro and
Belinkov, Yonatan and
Hsu, Wei-Ning and
Zhang, Yu and
Mohtarami, Mitra and
Glass, James",
booktitle = "Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers",
month = "December",
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://www.aclweb.org/anthology/C16-1163",
pages = "1734--1745"
}

M. Eldesouki, F. Dalvi, H. Sajjad, and K. Darwish, “QCRI @ DSL 2016: Spoken Arabic Dialect Identification Using Textual,” in Proceedings of the 3rd workshop on nlp for similar languages, varieties and dialects, 2016.
[BibTeX]

@inproceedings{eldesouki2016qcri,
title={{QCRI @ DSL 2016: Spoken Arabic Dialect Identification Using Textual}},
author={Eldesouki, Mohamed and Dalvi, Fahim and Sajjad, Hassan and Darwish, Kareem},
booktitle={Proceedings of the 3rd Workshop on NLP for Similar Languages, Varieties and Dialects},
year={2016},
month={December},
location = {Osaka, Japan},
}

H. Sajjad, F. Guzmán, and S. Vogel, “An empirical study: post-editing effort for english to arabic hybrid machine translation,” in Proceedings of the association for machine translation in the americas (amta), 2016.
[BibTeX]

@InProceedings{sajjad16:postediting,
title={An Empirical Study: Post-editing Effort for English to Arabic Hybrid Machine Translation},
author={Sajjad, Hassan and Guzm{\'a}n, Francisco and Vogel, Stephan},
booktitle={Proceedings of the Association for Machine Translation in the Americas (AMTA)},
location = {Austin, US},
month={October},
year={2016},
}

D. T. Nguyen, S. Joty, M. Imran, H. Sajjad, and P. Mitra, “Applications of online deep learning for crisis response using social media information,” in Proceedings of the 4th international workshop on social web for disaster management (swdm), 2016.
[BibTeX]

@InProceedings{nguyen2016:swdm,
title={Applications of Online Deep Learning for Crisis Response Using Social Media Information},
author={Dat Tien Nguyen and Shafiq Joty and Muhammad Imran and Hassan Sajjad and Prasenjit Mitra},
booktitle={Proceedings of the 4th International Workshop on Social Web for Disaster Management (SWDM)},
location = {Indianapolis, US},
month={October},
year={2016},
}

W. Zaghouani, A. Abdelali, F. Guzmán, and H. Sajjad, “Normalizing mathematical expressions to improve the translation of educational content,” in Proceedings of the amta 2016 workshop semitic machine translation (semat), 2016.
[BibTeX]

@InProceedings{zaghouani2016normalizing,
title={Normalizing Mathematical Expressions to Improve the Translation of Educational Content},
author={Zaghouani, Wajdi and Abdelali, Ahmed and Guzm{\'a}n, Francisco and Sajjad, Hassan},
booktitle={Proceedings of the AMTA 2016 Workshop Semitic Machine Translation (SeMaT)},
location = {Austin, US},
month={October},
year={2016},
}

T. Mihaylov and P. Nakov, “Hunting for troll comments in news community forums,” in Proceedings of the 54th annual meeting of the association for computational linguistics, Berlin, Germany, 2016, p. 399–405.
[BibTeX] [Download PDF]

@InProceedings{mihaylov-nakov:2016:P16-2,
author = {Mihaylov, Todor and Nakov, Preslav},
title = {Hunting for troll comments in news community forums},
booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics},
series = {ACL~'16},
month = {August},
year = {2016},
address = {Berlin, Germany},
pages = {399--405},
url = {http://anthology.aclweb.org/P16-2065}
}

H. Sajjad, F. Guzmán, N. Durrani, A. Abdelali, H. Bouamor, I. P. Temnikova, and S. Vogel, “Eyes don’t lie: predicting machine translation quality using eye movement.,” in Proceedings of the 15th annual conference of the north american chapter of the association of computational linguistics: human language technologies (naacl-hlt), 2016.
[BibTeX]

@inproceedings{sajjad2016eyes,
title={Eyes Don't Lie: Predicting Machine Translation Quality Using Eye Movement.},
author={Sajjad, Hassan and Guzm{\'a}n, Francisco and Durrani, Nadir and Abdelali, Ahmed and Bouamor, Houda and Temnikova, Irina P and Vogel, Stephan},
booktitle={Proceedings of the 15th Annual Conference of the North American Chapter of the
Association of Computational Linguistics: Human Language Technologies (NAACL-HLT)},
year={2016},
location = {San Diego, US},
month = {June},
}

A. Barrón–Cedeño, D. Bonadiman, G. Da San Martino, S. Joty, A. Moschitti, F. A. Al Obaidli, S. Romeo, K. Tymoshenko, and A. Uva, “Convkn at semeval-2016 task 3: answer and question selection for question answering on arabic and english fora,” Proceedings of semeval-2016, p. 896–903, 2016.
[BibTeX] [Download PDF]

@article{barron2016convkn,
title="ConvKN at SemEval-2016 Task 3: Answer and question selection for question answering on Arabic and English fora",
author="Barr{\'o}n-Cede{\~n}o, Alberto and
Bonadiman, Daniele and
Da San Martino, Giovanni and
Joty, Shafiq and
Moschitti, Alessandro and
Al Obaidli, Fahad A and
Romeo, Salvatore and
Tymoshenko, Kateryna and
Uva, Antonio",
journal="Proceedings of SemEval-2016",
pages="896--903",
year="2016",
publisher="Association for Computational Linguistics",
url  = "https://aclweb.org/anthology/papers/S/S16/S16-1138/"
}

K. Darwish and H. Mubarak, “Farasa: a new fast and accurate arabic word segmenter,” in Proceedings of the tenth international conference on language resources and evaluation (lrec 2016), 2016.
[BibTeX]

@inproceedings{darwish2016farasa,
title={Farasa: A New Fast and Accurate Arabic Word Segmenter},
author={Darwish, Kareem and Mubarak, Hamdy},
booktitle={Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)},
year={2016},
organization={European Language Resources Association (ELRA)}
}

H. Mubarak and K. Darwish, “Demographic surveys of arab annotators on crowdflower,” in Weaving relations of trust in crowd work: transparency and reputation across platforms workshop, 2016.
[BibTeX]

@inproceedings{mubarak2016demographic,
title={Demographic surveys of arab annotators on crowdflower},
author={Mubarak, Hamdy and Darwish, Kareem},
booktitle={Weaving Relations of Trust in Crowd Work: Transparency and Reputation across Platforms Workshop},
year={2016}
}

M. Eldesouki, F. Dalvi, H. Sajjad, and K. Darwish, “Qcri@ dsl 2016: spoken arabic dialect identification using textual features,” in Proceedings of the third workshop on nlp for similar languages, varieties and dialects (vardial3), 2016, p. 221–226.
[BibTeX]

@inproceedings{eldesouki2016qcri,
title={Qcri@ dsl 2016: Spoken arabic dialect identification using textual features},
author={Eldesouki, Mohamed and Dalvi, Fahim and Sajjad, Hassan and Darwish, Kareem},
booktitle={Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial3)},
pages={221--226},
year={2016}
}

D. Parsing, “Natural language processing,” in Proceedings of the acl workshop on statistical nlp and weighted automata (statfsm), 2016, p. 32–41.
[BibTeX]

@inproceedings{parsing2016natural,
title={Natural language processing},
author={Parsing, Dependency},
booktitle={Proceedings of the ACL Workshop on Statistical NLP and Weighted Automata (StatFSM)},
pages={32--41},
year={2016}
}

A. Abdelali, K. Darwish, N. Durrani, and H. Mubarak, “Farasa: a fast and furious segmenter for arabic,” in 15th annual conference of the north american chapter of the association for computational linguistics: human language technologies, 2016, p. 11–16.
[BibTeX]

@inproceedings{abdelali2016farasa,
title={Farasa: A Fast and Furious Segmenter for Arabic},
author={Abdelali, Ahmed and Darwish, Kareem and Durrani, Nadir and Mubarak, Hamdy},
booktitle={15th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
pages={11--16},
year={2016},
organization={Association for Computational Linguistics}
}

W. Magdy, K. Darwish, N. Abokhodair, A. Rahimi, and T. Baldwin, “\# isisisnotislam or\# deportallmuslims? predicting unspoken views,” in Proceedings of the 8th acm conference on web science, 2016, p. 95–106.
[BibTeX]

@inproceedings{magdy2016isisisnotislam,
title={\# isisisnotislam or\# deportallmuslims? Predicting unspoken views},
author={Magdy, Walid and Darwish, Kareem and Abokhodair, Norah and Rahimi, Afshin and Baldwin, Timothy},
booktitle={Proceedings of the 8th ACM Conference on Web Science},
pages={95--106},
year={2016}
}

A. Barrón–Cedeño, G. Da San Martino, S. Romeo, and A. Moschitti, “Selecting sentences versus selecting tree constituents for automatic question ranking,” in Proceedings of coling 2016, the 26th international conference on computational linguistics: technical papers, 2016, p. 2515–2525.
[BibTeX] [Download PDF]

@inproceedings{barron2016selecting,
author="Barr{\'o}n-Cede\~no, Alberto and
Da San Martino, Giovanni and
Romeo, Salvatore and
Moschitti, Alessandro",
title="Selecting sentences versus selecting tree constituents for automatic question ranking",
booktitle="Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers",
pages="2515--2525",
year="2016",
url = "https://aclweb.org/anthology/papers/C/C16/C16-1237/"
}

H. Mubarak and A. Abdelali, “Arabic to english person name transliteration using twitter,” in Proceedings of the tenth international conference on language resources and evaluation (lrec 2016), 2016, p. 351–355.
[BibTeX]

@inproceedings{mubarak2016arabic,
title={Arabic to English Person Name Transliteration using Twitter},
author={Mubarak, Hamdy and Abdelali, Ahmed},
booktitle={Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)},
pages={351--355},
year={2016},
organization={European Language Resources Association (ELRA)}
}

A. Abdelali, N. Durrani, and F. Guzmán, “Iappraise: a manual machine translation evaluation environment,” in Proceedings of naacl-hlt 2016 (demonstrations), 2016, p. 17–21.
[BibTeX]

@inproceedings{abdelali2016iappraise,
title={iAppraise: A Manual Machine Translation Evaluation Environment},
author={Abdelali, Ahmed and Durrani, Nadir and Guzm{\'a}n, Francisco},
booktitle={Proceedings of NAACL-HLT 2016 (Demonstrations)},
pages={17--21},
year={2016},
organization={Association for Computational Linguistics}
}

H. Sajjad, F. Guzmán, N. Durrani, A. Abdelali, H. Bouamor, I. Temnikova, and S. Vogel, “Eyes don’t lie: predicting machine translation quality using eye movement,” in Proceedings of the 2016 conference of the north american chapter of the association for computational linguistics: human language technologies, 2016, p. 1082–1088.
[BibTeX]

@inproceedings{sajjad2016eyes,
title={Eyes Don't Lie: Predicting Machine Translation Quality Using Eye Movement},
author={Sajjad, Hassan and Guzm{\'a}n, Francisco and Durrani, Nadir and Abdelali, Ahmed and Bouamor, Houda and Temnikova, Irina and Vogel, Stephan},
booktitle={Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
pages={1082--1088},
year={2016}
}

N. Doumi, A. Lehireche, D. Maurel, and A. Abdelali, “A semi-automatic and low cost approach to build scalable lemma-based lexical resources for arabic verbs,” International journal of information technology and computer science (ijitcs), vol. 8, iss. 1, 2016.
[BibTeX]

@article{doumi2016semi,
title={A Semi-Automatic and Low Cost Approach to Build Scalable Lemma-based Lexical Resources for Arabic Verbs},
author={Doumi, Noureddine and Lehireche, Ahmed and Maurel, Denis and Abdelali, Ahmed},
journal={International Journal of Information Technology and Computer Science (IJITCS)},
volume={8},
number={1},
year={2016}
}

T. Mihaylov and P. Nakov, “SemanticZ at SemEval-2016 Task 3: ranking relevant answers in community question answering using semantic similarity based on fine-tuned word embeddings,” in Proceedings of the 10th international workshop on semantic evaluation, San Diego, California, USA, 2016, p. 879–886.
[BibTeX]

@InProceedings{SemEval2016:task3:SemanticZ,
author = {Todor Mihaylov and Preslav Nakov},
title = {{SemanticZ at SemEval-2016 Task 3}: Ranking Relevant Answers in Community Question Answering Using Semantic Similarity Based on Fine-tuned Word Embeddings},
booktitle = {Proceedings of the 10th International Workshop on Semantic Evaluation},
series = {SemEval~'16},
year = {2016},
address = {San Diego, California, USA},
pages = {879--886},
}

M. Hardalov, I. Koychev, and P. Nakov, “In search of credible news,” in Proceedings of the 17th international conference on artificial intelligence: methodology, systems, and applications, Varna, Bulgaria, 2016, p. 172–180. doi:10.1007/978-3-319-44748-3_17
[BibTeX] [Download PDF]

@Inproceedings{Hardalov2016,
author="Hardalov, Momchil
and Koychev, Ivan
and Nakov, Preslav",
editor="Dichev, Christo and Agre, Gennady",
title="In Search of Credible News",
bookTitle="Proceedings of the 17th International Conference on Artificial Intelligence: Methodology, Systems, and Applications",
series = {AIMSA~'16},
address = {Varna, Bulgaria},
year="2016",
publisher="Springer International Publishing",
pages="172--180",
isbn="978-3-319-44748-3",
doi="10.1007/978-3-319-44748-3_17",
url="https://doi.org/10.1007/978-3-319-44748-3_17",
}

G. Da San Martino, A. Barrón Cedeño, S. Romeo, A. Uva, and A. Moschitti, “Learning to re-rank questions in community question answering using advanced features,” in Proceedings of the 25th acm international on conference on information and knowledge management, 2016, p. 1997–2000.
[BibTeX]

@inproceedings{da2016learning,
author="Da San Martino, Giovanni and
Barr{\'o}n Cede{\~n}o, Alberto and
Romeo, Salvatore and
Uva, Antonio and
Moschitti, Alessandro",
title="Learning to re-rank questions in community question answering using advanced features",
booktitle="Proceedings of the 25th ACM International on Conference on Information and Knowledge Management",
pages="1997--2000",
year="2016",
organization="ACM"
}

G. {Da San Martino}, A. Barrón–Cedeño, S. Romeo, A. Moschitti, S. Joty, F. A. Al Obaidli, K. Tymoshenko, and A. Uva, “Addressing community question answering in english and arabic,” Arxiv preprint arxiv:1610.05522, 2016.
[BibTeX] [Download PDF]

@article{martino2016addressing,
author="{Da San Martino}, Giovanni and
Barr{\'o}n-Cede{\~n}o, Alberto and
Romeo, Salvatore and
Moschitti, Alessandro and
Joty, Shafiq and
Al Obaidli, Fahad A and
Tymoshenko, Kateryna and
Uva, Antonio",
title="Addressing Community Question Answering in English and Arabic",
journal="arXiv preprint arXiv:1610.05522",
year="2016",
url="http://disi.unitn.it/moschitti/since2013/2016_SIGIR_Da-San-Martino_CQA-English-Arabic.pdf"
}

A. Ali, P. Bell, J. Glass, Y. Messaoui, H. Mubarak, S. Renals, and Y. Zhang, “The mgb-2 challenge: arabic multi-dialect broadcast media recognition,” in 2016 ieee spoken language technology workshop (slt), 2016, p. 279–284.
[BibTeX]

@inproceedings{ali2016mgb,
title={The MGB-2 challenge: Arabic multi-dialect broadcast media recognition},
author={Ali, Ahmed and Bell, Peter and Glass, James and Messaoui, Yacine and Mubarak, Hamdy and Renals, Steve and Zhang, Yifan},
booktitle={2016 IEEE Spoken Language Technology Workshop (SLT)},
pages={279--284},
year={2016},
organization={IEEE}
}

2015

N. Durrani, H. Sajjad, Joty Shafiq, A. Abdelali, and S. Vogel, “Using joint models for domain adaptation in statistical machine translation,” in Proceedings of the 15th machine translation summit (mt summit xv), Florida, USA, 2015.
[BibTeX]

@inproceedings{durraniEtAl:MT-Summit2015,
address = {Florida, USA},
author = {Durrani, Nadir and Sajjad, Hassan and Joty, Shafiq, and Abdelali, Ahmed and Vogel, Stephan},
booktitle = {Proceedings of the 15th Machine Translation Summit (MT Summit XV)},
title = {Using Joint Models for Domain Adaptation in Statistical Machine Translation},
Month = {November},
year = {2015},
}

S. Joty, A. Barrón–Cedeño, G. Da San Martino, S. Filice, L. Màrquez, A. Moschitti, and P. Nakov, “Global thread-level inference for comment classification in community question answering,” in Proceedings of the 2015 conference on empirical methods in natural language processing, Lisbon, Portugal, 2015, p. 573–578. doi:10.18653/v1/D15-1068
[BibTeX] [Download PDF]

@inproceedings{joty-etal-2015-global,
title = "Global Thread-level Inference for Comment Classification in Community Question Answering",
author = "Joty, Shafiq and
Barr{\'o}n-Cede{\~n}o, Alberto and
Da San Martino, Giovanni and
Filice, Simone and
M{\`a}rquez, Llu{\'\i}s and
Moschitti, Alessandro and
Nakov, Preslav",
booktitle = "Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing",
month = "September",
year = "2015",
address = "Lisbon, Portugal",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/D15-1068",
doi = "10.18653/v1/D15-1068",
pages = "573--578",
}

T. Mihaylov, I. Koychev, G. Georgiev, and P. Nakov, “Exposing paid opinion manipulation trolls,” in Proceedings of the international conference recent advances in natural language processing, Hissar, Bulgaria, 2015, p. 443–450.
[BibTeX] [Download PDF]

@inproceedings{Mihaylov2015ExposingPO,
author = {Mihaylov, Todor and Koychev, Ivan and Georgiev, Georgi and Nakov, Preslav},
title = {Exposing paid opinion manipulation trolls},
booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing},
series = {RANLP~'15},
month = {September},
year = {2015},
address = {Hissar, Bulgaria},
publisher = {INCOMA Ltd. Shoumen, BULGARIA},
pages = {443--450},
url = {http://www.aclweb.org/anthology/R15-1058}
}

S. Joty, H. Sajjad, N. Durrani, K. Al–Mannai, A. Abdelali, and S. Vogel, “How to Avoid Unwanted Pregnancies: Domain Adaptation using Neural Network Models,” in Proceedings of the conference on empirical methods in natural language processing (emnlp), Lisbon, Portugal, 2015.
[BibTeX]

@InProceedings{joty-etAL:2015:EMNLP,
author = {Joty, Shafiq and Sajjad, Hassan and Durrani, Nadir and Al-Mannai, Kamla and Abdelali, Ahmed and Vogel, Stephan},
title = "{How to Avoid Unwanted Pregnancies: Domain Adaptation using Neural Network Models}",
booktitle = {Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP)},
month = {September},
year = {2015},
address = {Lisbon, Portugal},
}

A. Rafae, A. Qayyum, M. M. Uddin, A. Karim, H. Sajjad, and F. Kamiran, “An unsupervised method for discovering lexical variations in roman Urdu informal text.,” in Proceedings of the conference on empirical methods in natural language processing (emnlp), 2015.
[BibTeX]

@inproceedings{rafae2015unsupervised,
title={An Unsupervised Method for Discovering Lexical Variations in Roman {Urdu} Informal Text.},
author={Rafae, Abdul and Qayyum, Abdul and Uddin, Muhammad Moeen and Karim, Asim and Sajjad, Hassan and Kamiran, Faisal},
booktitle={Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP)},
location ={Lisbon, Portugal},
year={2015},
month={September},
}

T. Mihaylov, G. Georgiev, and P. Nakov, “Finding opinion manipulation trolls in news community forums,” in Proceedings of the nineteenth conference on computational natural language learning, Beijing, China, 2015, p. 310–314.
[BibTeX] [Download PDF]

@inproceedings{Mihaylov2015FindingOM,
author = {Mihaylov, Todor and Georgiev, Georgi and Nakov, Preslav},
title = {Finding opinion manipulation trolls in news community forums},
booktitle = {Proceedings of the Nineteenth Conference on Computational Natural Language Learning},
series = {CoNLL~'15},
month = {July},
year = {2015},
address = {Beijing, China},
pages = {310--314},
url = {http://www.aclweb.org/anthology/K15-1032}
}

H. Bouamor, H. Sajjad, N. Durrani, and K. Oflazer, “Qcmuq@qalb-2015 shared task: combining character level mt and error-tolerant finite-state recognition for arabic spelling correction,” in Proceedings of the workshop of arabic natural language processing (anlp), 2015.
[BibTeX]

@InProceedings{bouamor:anlp15,
title={QCMUQ@QALB-2015 Shared Task: Combining Character level MT and Error-tolerant Finite-State Recognition for Arabic
Spelling Correction},
author={Houda Bouamor and Hassan Sajjad and Nadir Durrani and Kemal Oflazer},
booktitle={Proceedings of the Workshop of Arabic Natural Language Processing (ANLP)},
year={2015},
month={July},
location={Beijing, China},
}

H. Sajjad, N. Durrani, F. Guzman, P. Nakov, A. Abdelali, S. Vogel, W. Salloum, A. E. Kholy, and N. Habash, “QCN Egyptian Arabic to English Machine Translation System for NIST OpenMT15,” in Workshop of nist openmt15, 2015.
[BibTeX]

@InProceedings{sajjad:nist15,
title={{QCN Egyptian Arabic to English Machine
Translation System for NIST OpenMT15}},
author={Hassan Sajjad and Nadir Durrani and Francisco Guzman and Preslav Nakov and Ahmed Abdelali and Stephan Vogel and
Wael Salloum and Ahmed El Kholy and Nizar Habash},
booktitle={Workshop of NIST OpenMT15},
location = {Washington DC, US},
month={June},
year={2015},
}

M. Nicosia, S. Filice, A. Barrón–Cedeño, I. Saleh, H. Mubarak, W. Gao, P. Nakov, G. Da San Martino, A. Moschitti, K. Darwish, L. Màrquez, S. Joty, and W. Magdy, “Qcri: answer selection for community question answering – experiments for arabic and english,” in Proceedings of the 9th international workshop on semantic evaluation (semeval 2015), Denver, Colorado, 2015, p. 203–209. doi:10.18653/v1/S15-2036
[BibTeX] [Download PDF]

@inproceedings{nicosia-etal-2015-qcri,
title = "QCRI: Answer Selection for Community Question Answering - Experiments for Arabic and English",
author = "Nicosia, Massimo and
Filice, Simone and
Barr{\'o}n-Cede{\~n}o, Alberto and
Saleh, Iman and
Mubarak, Hamdy and
Gao, Wei and
Nakov, Preslav and
Da San Martino, Giovanni and
Moschitti, Alessandro and
Darwish, Kareem and
M{\`a}rquez, Llu{\'\i}s and
Joty, Shafiq and
Magdy, Walid",
booktitle = "Proceedings of the 9th International Workshop on Semantic Evaluation (SemEval 2015)",
month = "June",
year = "2015",
address = "Denver, Colorado",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/S15-2036",
doi = "10.18653/v1/S15-2036",
pages = "203--209",
}

W. Magdy, H. Sajjad, T. El–Ganainy, and F. Sebastiani, “Distant supervision for tweet classification using youtube labels,” in Proceedings of the ninth international aaai conference on web and social media (icwsm), 2015.
[BibTeX]

@inproceedings{magdy2015distant,
title={Distant Supervision for Tweet Classification Using YouTube Labels},
author={Magdy, Walid and Sajjad, Hassan and El-Ganainy, Tarek and Sebastiani, Fabrizio},
booktitle={Proceedings of the Ninth International AAAI Conference on Web and Social Media (ICWSM)},
location={Oxford, UK},
month ={May},
year={2015},
}

I. Bensalem, I. Boukhalfa, P. Rosso, L. Abouenour, K. Darwish, and S. Chikhi, “Overview of the araplagdet pan@fire2015 shared task on arabic plagiarism detection,” Notebook papers of fire 2015, vol. 1587, 2015.
[BibTeX]

@article{bensalem2015overview,
title={Overview of the AraPlagDet PAN@FIRE2015 Shared Task on Arabic Plagiarism Detection},
author={Bensalem, Imene and Boukhalfa, Imene and Rosso, Paolo and Abouenour, Lahsen and Darwish, Kareem and Chikhi, Salim},
journal={Notebook Papers of FIRE 2015},
volume={1587},
year={2015}
}

N. Habash, S. Vogel, and K. Darwish, “Proceedings of the second workshop on arabic natural language processing,” in Proceedings of the second workshop on arabic natural language processing, 2015.
[BibTeX]

@inproceedings{habash2015proceedings,
title={Proceedings of the Second Workshop on Arabic Natural Language Processing},
author={Habash, Nizar and Vogel, Stephan and Darwish, Kareem},
booktitle={Proceedings of the Second Workshop on Arabic Natural Language Processing},
year={2015}
}

A. Barrón–Cedeño, C. España–Bonet, J. Boldoba, and L. Màrquez, “A factory of comparable corpora from Wikipedia,” in Proceedings of the eighth workshop on building and using comparable corpora, 2015, p. 3–13.
[BibTeX]

@inproceedings{barron2015factory,
author="Barr{\'o}n-Cede{\~n}o, Alberto and
Espa{\~n}a-Bonet, Cristina and
Boldoba, Josu and
M{\`a}rquez, Llu{\'\i}s",
title="{A factory of comparable corpora from Wikipedia}",
booktitle="Proceedings of the Eighth Workshop on Building and Using Comparable Corpora",
pages="3--13",
year="2015"
}

W. Magdy, H. Sajjad, T. El–Ganainy, and F. Sebastiani, “Bridging social media via distant supervision,” Social network analysis and mining, vol. 35, iss. 5, 2015.
[BibTeX]

@article{madgy2015:SNAM,
journal = {Social Network Analysis and Mining},
title = {Bridging social media via distant supervision},
author = {Magdy, Walid and Sajjad, Hassan and El-Ganainy, Tarek and Sebastiani, Fabrizio},
year = {2015},
volume = {35},
number = {5},
}

K. Darwish and W. Magdy, “Attitudes towards refugees in light of the paris attacks,” Arxiv preprint arxiv:1512.04310, 2015.
[BibTeX]

@article{darwish2015attitudes,
title={Attitudes towards refugees in light of the Paris attacks},
author={Darwish, Kareem and Magdy, Walid},
journal={arXiv preprint arXiv:1512.04310},
year={2015}
}

S. Wray, H. Mubarak, and A. Ali, “Best practices for crowdsourcing dialectal arabic speech transcription,” in Proceedings of the second workshop on arabic natural language processing, 2015, p. 99–107.
[BibTeX]

@inproceedings{wray2015best,
title={Best practices for crowdsourcing dialectal arabic speech transcription},
author={Wray, Samantha and Mubarak, Hamdy and Ali, Ahmed},
booktitle={Proceedings of the Second Workshop on Arabic Natural Language Processing},
pages={99--107},
year={2015}
}

Y. Belinkov, A. Barrón–Cedeno, and H. Mubarak, “Answer selection in arabic community question answering: a feature-rich approach,” in Proceedings of the second workshop on arabic natural language processing, 2015, p. 183–190.
[BibTeX]

@inproceedings{belinkov2015answer,
title={Answer selection in arabic community question answering: A feature-rich approach},
author={Belinkov, Yonatan and Barr{\'o}n-Cedeno, Alberto and Mubarak, Hamdy},
booktitle={Proceedings of the second workshop on arabic natural language processing},
pages={183--190},
year={2015}
}

W. Magdy, K. Darwish, and N. Abokhodair, “Quantifying public response towards islam on twitter after paris attacks,” Arxiv preprint arxiv:1512.04570, 2015.
[BibTeX]

@article{magdy2015quantifying,
title={Quantifying public response towards Islam on Twitter after Paris attacks},
author={Magdy, Walid and Darwish, Kareem and Abokhodair, Norah},
journal={arXiv preprint arXiv:1512.04570},
year={2015}
}

Y. Zhang, C. Li, R. Barzilay, and K. Darwish, “Randomized greedy inference for joint segmentation, pos tagging and dependency parsing,” in Proceedings of the 2015 conference of the north american chapter of the association for computational linguistics: human language technologies, 2015, p. 42–52.
[BibTeX]

@inproceedings{zhang2015randomized,
title={Randomized greedy inference for joint segmentation, POS tagging and dependency parsing},
author={Zhang, Yuan and Li, Chengtao and Barzilay, Regina and Darwish, Kareem},
booktitle={Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
pages={42--52},
year={2015}
}

W. Magdy, K. Darwish, and I. Weber, “” i like isis, but i want to watch chris nolan’s new movie” exploring isis supporters on twitter,” in Proceedings of the 26th acm conference on hypertext & social media, 2015, p. 321–322.
[BibTeX]

@inproceedings{magdy2015like,
title={" I like ISIS, but I want to watch Chris Nolan's new movie" Exploring ISIS Supporters on Twitter},
author={Magdy, Walid and Darwish, Kareem and Weber, Ingmar},
booktitle={Proceedings of the 26th ACM Conference on Hypertext \& Social Media},
pages={321--322},
year={2015}
}

H. Mubarak and K. Darwish, “Classifying arab names geographically,” in Proceedings of the second workshop on arabic natural language processing, 2015, p. 1–8.
[BibTeX]

@inproceedings{mubarak2015classifying,
title={Classifying Arab Names Geographically},
author={Mubarak, Hamdy and Darwish, Kareem},
booktitle={Proceedings of the Second Workshop on Arabic Natural Language Processing},
pages={1--8},
year={2015}
}

A. Barrón–Cedeño, S. Filice, G. Da San Martino, S. Joty, L. Màrquez, P. Nakov, and A. Moschitti, “Thread-level information for comment classification in community question answering,” in Proceedings of the 53rd annual meeting of the association for computational linguistics and the 7th international joint conference on natural language processing (volume 2: short papers), 2015, p. 687–693.
[BibTeX] [Download PDF]

@inproceedings{barron2015thread,
author="Barr{\'o}n-Cede{\~n}o, Alberto and
Filice, Simone and
Da San Martino, Giovanni and
Joty, Shafiq and
M{\`a}rquez, Llu{\'i}s and
Nakov, Preslav and
Moschitti, Alessandro",
title="Thread-level information for comment classification in community question answering",
booktitle="Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)",
volume="2",
pages="687--693",
year="2015",
url = "https://aclweb.org/anthology/papers/P/P15/P15-2113/"
}

W. Magdy, K. Darwish, and I. Weber, “\# failedrevolutions: using twitter to study the antecedents of isis support,” Arxiv preprint arxiv:1503.02401, 2015.
[BibTeX]

@article{magdy2015failedrevolutions,
title={\# failedrevolutions: Using twitter to study the antecedents of isis support},
author={Magdy, Walid and Darwish, Kareem and Weber, Ingmar},
journal={arXiv preprint arXiv:1503.02401},
year={2015}
}

J. Borge–Holthoefer, W. Magdy, K. Darwish, and I. Weber, “Content and network dynamics behind egyptian political polarization on twitter,” in Proceedings of the 18th acm conference on computer supported cooperative work & social computing, 2015, p. 700–711.
[BibTeX]

@inproceedings{borge2015content,
title={Content and network dynamics behind Egyptian political polarization on Twitter},
author={Borge-Holthoefer, Javier and Magdy, Walid and Darwish, Kareem and Weber, Ingmar},
booktitle={Proceedings of the 18th ACM Conference on Computer Supported Cooperative Work \& Social Computing},
pages={700--711},
year={2015}
}

G. Francisco, A. Abdelali, I. Temnikova, H. Sajjad, and S. Vogel, “How do humans evaluate machine translation,” in Proceedings of the tenth workshop on statistical machine translation. lisboa, portugal, 2015, p. 457–466.
[BibTeX]

@inproceedings{francisco2015humans,
title={How do Humans Evaluate Machine Translation},
author={Francisco, Guzm{\'a}n and Abdelali, Ahmed and Temnikova, Irina and Sajjad, Hassan and Vogel, Stephan},
booktitle={Proceedings of the Tenth Workshop on Statistical Machine Translation. Lisboa, Portugal},
pages={457--466},
year={2015},
organization={Association for Computational Linguistics}
}

H. S. Shafiq Joty, N. Durrani, K. Al–Mannai, A. Abdelali, and S. Vogel, “How to avoid unwanted pregnancies: domain adaptation using neural network models,” in Emnlp, 2015.
[BibTeX]

@inproceedings{shafiq2015avoid,
title={How to Avoid Unwanted Pregnancies: Domain Adaptation using Neural Network Models},
author={Shafiq Joty, Hassan Sajjad and Durrani, Nadir and Al-Mannai, Kamla and Abdelali, Ahmed and Vogel, Stephan},
booktitle={EMNLP},
year={2015}
}

H. Mubarak, K. Darwish, and A. Abdelali, “Qcri $@$ qalb-2015 shared task: correction of arabic text for native and non-native speakers’ errors,” in Proceedings of the second workshop on arabic natural language processing, 2015, p. 150–154.
[BibTeX]

@inproceedings{mubarak2015qcri,
title={QCRI $@$ QALB-2015 Shared Task: Correction of Arabic Text for Native and Non-Native Speakers’ Errors},
author={Mubarak, Hamdy and Darwish, Kareem and Abdelali, Ahmed},
booktitle={Proceedings of the Second Workshop on Arabic Natural Language Processing},
pages={150--154},
year={2015}
}

A. Abdelali, A. Ali, F. Guzmán, F. Stahlberg, S. Vogel, and Y. Zhang, “Qat2—the qcri advanced transcription and translation system,” in Sixteenth annual conference of the international speech communication association, 2015.
[BibTeX]

@inproceedings{abdelali2015qat2,
title={QAT2—The QCRI Advanced Transcription and Translation System},
author={Abdelali, Ahmed and Ali, Ahmed and Guzm{\'a}n, Francisco and Stahlberg, Felix and Vogel, Stephan and Zhang, Yifan},
booktitle={Sixteenth Annual Conference of the International Speech Communication Association},
year={2015}
}

H. Sajjad, N. Durrani, F. Guzman, P. Nakov, A. Abdelali, S. Vogel, W. Salloum, A. El Kholy, and N. Habash, “The qcn egyptian arabic to english statistical machine translation system for nist openmt’2015,” in Openmt’2015, 2015.
[BibTeX]

@inproceedings{sajjad2015qcn,
title={The QCN Egyptian Arabic to English Statistical Machine Translation System for NIST OpenMT’2015},
author={Sajjad, Hassan and Durrani, Nadir and Guzman, Francisco and Nakov, Preslav and Abdelali, Ahmed and Vogel, Stephan and Salloum, Wael and El Kholy, Ahmed and Habash, Nizar},
booktitle={OpenMT’2015},
year={2015}
}

H. Sajjad, N. Durrani, F. Guzman, P. Nakov, A. Abdelali, S. Vogel, W. Salloum, A. El Kholy, and N. Habash, “Qcn system description for nist openmt15,” , 2015.
[BibTeX]

@article{sajjad2015qcn,
title={QCN System Description for NIST OpenMT15},
author={Sajjad, Hassan and Durrani, Nadir and Guzman, Francisco and Nakov, Preslav and Abdelali, Ahmed and Vogel, Stephan and Salloum, Wael and El Kholy, Ahmed and Habash, Nizar},
year={2015}
}

L. Formiga, A. Barrón–Cedeño, L. Màrquez, C. A. Henr{‘i}quez, and J. B. Mariño, “Leveraging online user feedback to improve statistical machine translation,” Journal of artificial intelligence research, vol. 54, p. 159–192, 2015.
[BibTeX] [Download PDF]

@article{formiga2015leveraging,
author="Formiga, Llu{\'\i}s and
Barr{\'o}n-Cede\~no, Alberto and
M{\`a}rquez, Llu{\'i}s and
Henr{\'\i}quez, Carlos A. and
Mari{\~n}o, Jos{\'e} B.",
title="Leveraging online user feedback to improve statistical machine translation",
journal="Journal of Artificial Intelligence Research",
volume="54",
pages="159--192",
year="2015",
url = "https://dl.acm.org/citation.cfm?id=2910562"
}

E. Flores, A. Barrón–Cedeño, L. Moreno, and P. Rosso, “Cross-language source code re-use detection using latent semantic analysis,” J. ucs, vol. 21, iss. 13, p. 1708–1725, 2015.
[BibTeX] [Download PDF]

@article{flores2015cross,
author="Flores, Enrique and
Barr{\'o}n-Cede{\~n}o, Alberto and
Moreno, Lidia and
Rosso, Paolo",
title="Cross-Language Source Code Re-Use Detection Using Latent Semantic Analysis",
journal="J. UCS",
volume="21",
number="13",
pages="1708--1725",
year="2015",
url="http://www.jucs.org/jucs_21_13/cross_language_source_code/jucs_21_13_1708_1725_flores.pdf"
}

M. Nicosia, S. Filice, A. Barrón–Cedeno, I. Saleh, H. Mubarak, W. Gao, P. Nakov, G. D. S. MARTINO, A. Moschitti, K. Darwish, and others, “Qcri: answer selection for community question answering-experiment for arabic and english.” 2015.
[BibTeX]

@inproceedings{nicosia2015qcri,
title={QCRI: Answer selection for community question answering-Experiment for Arabic and English},
author={Nicosia, Massimo and Filice, Simone and Barr{\'o}n-Cedeno, Alberto and Saleh, Iman and Mubarak, Hamdy and Gao, Wei and Nakov, Preslav and MARTINO, Giovanni Da San and Moschitti, Alessandro and Darwish, Kareem and others},
year={2015},
organization={Association for Computational Linguistics}
}

2014

K. Al–Mannai, H. Sajjad, A. Khader, F. Al Obaidli, P. Nakov, and S. Vogel, “Unsupervised word segmentation improves dialectal Arabic to English machine translation,” in Proceedings of the workshop of arabic natural language processing (anlp), 2014.
[BibTeX]

@inproceedings{kamla:anlp2014,
title={Unsupervised word segmentation improves dialectal {Arabic to English} machine translation},
author={Al-Mannai, Kamla and Sajjad, Hassan and Khader, Alaa and Al Obaidli, Fahad and Nakov, Preslav and Vogel, Stephan},
booktitle={Proceedings of the Workshop of Arabic Natural Language Processing (ANLP)},
year={2014},
month={October},
location={Doha, Qatar},
}

K. Darwish, H. Sajjad, and H. Mubarak, “Verifiably effective arabic dialect identification.,” in Proceedings of the conference on empirical methods in natural language processing (emnlp), 2014.
[BibTeX]

@inproceedings{darwish2014verifiably,
title={Verifiably Effective Arabic Dialect Identification.},
author={Darwish, Kareem and Sajjad, Hassan and Mubarak, Hamdy},
booktitle={Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP)},
year={2014},
month={October},
location={Doha, Qatar},
}

A. Abdelali, F. Guzman, H. Sajjad, and S. Vogel, “The AMARA corpus: building parallel language resources for the educational domain,” in Proceedings of the 9th international conference on language resources and evaluation (lrec), Reykjavik, Iceland, 2014.
[BibTeX]

@InProceedings{Abdelali_2014_lrec,
author = {Ahmed Abdelali and Francisco Guzman and Hassan Sajjad and Stephan Vogel},
title = {The {AMARA} Corpus: Building Parallel Language Resources for the Educational Domain},
booktitle = {Proceedings of the 9th International Conference on Language Resources and Evaluation (LREC)},
year = {2014},
month = {May},
address = {Reykjavik, Iceland},
}

M. Moeen Uddin, M. Imran, and H. Sajjad, “Understanding types of users on Twitter,” in Proceedings of the 6th ase international conference in social computing (socialcom), 2014.
[BibTeX]

@InProceedings{uddin:socialcom2014,
author = {Moeen Uddin, Mohammad and Imran, Mohammad and Sajjad, Hassan},
title = {Understanding Types of Users on {T}witter},
booktitle = {Proceedings of the 6th ASE International Conference in Social Computing (SocialCom)},
month = {May},
year = {2014},
location = {Stanford, USA},
}

N. Durrani, H. Sajjad, H. Hoang, and P. Koehn, “Integrating an Unsupervised Transliteration Model into Statistical Machine Translation,” in Proceedings of the 15th conference of the european chapter of the acl (eacl), Gothenburg, Sweden, 2014.
[BibTeX]

@InProceedings{durrani-EtAl:2014:EACL,
author = {Durrani, Nadir and Sajjad, Hassan and Hoang, Hieu and Koehn, Philipp},
title = "{Integrating an Unsupervised Transliteration Model into Statistical Machine Translation}",
booktitle = {Proceedings of the 15th Conference of the European Chapter of the ACL (EACL)},
month = {April},
year = {2014},
address = {Gothenburg, Sweden},
}

N. Tourani and A. Abdelali, “Strategy stories of annual reports: case of sears canada incorporation,” in Society of business research conference – march 20-22, 2014, phoenix az., 2014.
[BibTeX]

@inproceedings{tourani2014strategy,
title={Strategy Stories of Annual Reports: Case of Sears Canada Incorporation},
author={Tourani, Nazanin and Abdelali, Ahmed},
booktitle={Society of Business Research Conference - March 20-22, 2014, Phoenix AZ.},
year={2014}
}

H. Mubarak and K. Darwish, “Using twitter to collect a multi-dialectal corpus of arabic,” in Proceedings of the emnlp 2014 workshop on arabic natural language processing (anlp), 2014, p. 1–7.
[BibTeX]

@inproceedings{mubarak2014using,
title={Using Twitter to collect a multi-dialectal corpus of Arabic},
author={Mubarak, Hamdy and Darwish, Kareem},
booktitle={Proceedings of the EMNLP 2014 Workshop on Arabic Natural Language Processing (ANLP)},
pages={1--7},
year={2014}
}

K. Darwish and W. Gao, “Simple effective microblog named entity recognition: arabic as an example,” in International conference on language resources and evaluation, 2014.
[BibTeX]

@inproceedings{darwish2014simple,
title={Simple Effective Microblog Named Entity Recognition: Arabic as an Example},
author={Darwish, Kareem and Gao, Wei},
booktitle={International Conference on Language Resources and Evaluation},
year={2014}
}

J. Borge–holthoefer, W. Magdy, K. Darwish, and I. Weber, “Structural and semantic evolution of egyptian political polarization on twitter,” in Qatar foundation annual research conference proceedings volume 2014 issue 1, 2014, p. SSPP0757.
[BibTeX]

@inproceedings{borge2014structural,
title={Structural And Semantic Evolution Of Egyptian Political Polarization On Twitter},
author={Borge-holthoefer, Javier and Magdy, Walid and Darwish, Kareem and Weber, Ingmar},
booktitle={Qatar Foundation Annual Research Conference Proceedings Volume 2014 Issue 1},
volume={2014},
number={1},
pages={SSPP0757},
year={2014},
organization={Hamad bin Khalifa University Press (HBKU Press)}
}

H. Mubarak and K. Darwish, “Automatic correction of arabic text: a cascaded approach,” in Proceedings of the emnlp 2014 workshop on arabic natural language processing (anlp), 2014, p. 132–136.
[BibTeX]

@inproceedings{mubarak2014automatic,
title={Automatic correction of arabic text: a cascaded approach},
author={Mubarak, Hamdy and Darwish, Kareem},
booktitle={Proceedings of the EMNLP 2014 Workshop on Arabic Natural Language Processing (ANLP)},
pages={132--136},
year={2014}
}

K. Darwish, A. Abdelali, and H. Mubarak, “Using stem-templates to improve arabic pos and gender/number tagging.,” in Lrec, 2014, p. 2926–2931.
[BibTeX]

@inproceedings{darwish2014using,
title={Using Stem-Templates to Improve Arabic POS and Gender/Number Tagging.},
author={Darwish, Kareem and Abdelali, Ahmed and Mubarak, Hamdy},
booktitle={LREC},
pages={2926--2931},
year={2014}
}

K. Darwish, H. Sajjad, and H. Mubarak, “Verifiably effective arabic dialect identification,” in Proceedings of the 2014 conference on empirical methods in natural language processing (emnlp), 2014, p. 1465–1468.
[BibTeX]

@inproceedings{darwish2014verifiably,
title={Verifiably effective arabic dialect identification},
author={Darwish, Kareem and Sajjad, Hassan and Mubarak, Hamdy},
booktitle={Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
pages={1465--1468},
year={2014}
}

H. Hassan and K. Darwish, “Statistical machine translation,” in Natural language processing of semitic languages, Springer, berlin, heidelberg, 2014, p. 199–219.
[BibTeX]

@incollection{hassan2014statistical,
title={Statistical machine translation},
author={Hassan, Hany and Darwish, Kareem},
booktitle={Natural Language Processing of Semitic Languages},
pages={199--219},
year={2014},
publisher={Springer, Berlin, Heidelberg}
}

K. Darwish and W. Magdy, Arabic information retrievalNow publishers inc, 2014.
[BibTeX]

@misc{darwish2014arabic,
title={Arabic Information Retrieval},
author={Darwish, Kareem and Magdy, Walid},
journal={Foundations and Trends in Information Retrieval},
volume={7},
number={4},
pages={239--342},
year={2014},
publisher={Now Publishers Inc}
}

K. L. Hacker, A. Abdelali, J. Johnston, and D. Boje, Analyzng iranian leaders’ conflict framing with leximancer atomated text analysisProgress, 2014.
[BibTeX]

@misc{hacker2014analyzng,
title={Analyzng iranian leaders’ conflict framing with Leximancer Atomated Text Analysis},
author={Hacker, Kenneth L and Abdelali, Ahmed and Johnston, Jennifer and Boje, David},
year={2014},
publisher={Progress}
}

K. Darwish, A. M. Ali, and A. Abdelali, “Query term expansion by automatic learning of morphological equivalence patterns from wikipedia,” in Sigir 2014 workshop on semantic matching in information retrieval (smir), 2014, p. 24–29.
[BibTeX]

@inproceedings{darwish2014query,
title={Query Term Expansion by Automatic Learning of Morphological Equivalence Patterns from Wikipedia},
author={Darwish, Kareem and Ali, Ahmed M. and Abdelali, Ahmed},
booktitle={SIGIR 2014 Workshop on Semantic Matching in Information Retrieval (SMIR)},
volume={1204},
pages={24--29},
year={2014},
organization={CEUR-WS}
}

Y. O. Elhadj, A. Abdelali, R. Bouziane, and A. H. Ammar, “Revisiting arabic part of speech tagsets,” in 2014 ieee/acs 11th international conference on computer systems and applications (aiccsa), 2014, p. 793–802.
[BibTeX]

@inproceedings{elhadj2014revisiting,
title={Revisiting Arabic part of speech tagsets},
author={Elhadj, Yahya OM and Abdelali, Ahmed and Bouziane, Rachid and Ammar, Adel H},
booktitle={2014 IEEE/ACS 11th International Conference on Computer Systems and Applications (AICCSA)},
pages={793--802},
year={2014},
organization={IEEE}
}

E. Mahmoud Ba, A. Vashist, I. Temnikova, A. Abdelali, and F. Guzmán, “Translation and transcription of educational videos,” in Qatar foundation annual research conference, 2014, p. ITSP1067.
[BibTeX]

@inproceedings{mahmoud2014translation,
title={Translation And Transcription Of Educational Videos},
author={Mahmoud, Ba, Elsherif and Vashist, Arushi and Temnikova, Irina and Abdelali, Ahmed and Guzm{\'a}n, Francisco},
booktitle={Qatar Foundation Annual Research Conference},
number={1},
pages={ITSP1067},
year={2014}
}

A. Abdelali, F. Guzman, H. Sajjad, and S. Vogel, “The amara corpus: building parallel language resources for the educational domain.,” in Lrec, 2014, p. 1044–1054.
[BibTeX]

@inproceedings{abdelali2014amara,
title={The AMARA Corpus: Building Parallel Language Resources for the Educational Domain.},
author={Abdelali, Ahmed and Guzman, Francisco and Sajjad, Hassan and Vogel, Stephan},
booktitle={LREC},
volume={14},
pages={1044--1054},
year={2014}
}

A. Ali, H. Mubarak, and S. Vogel, “Advances in dialectal arabic speech recognition: a study using twitter to improve egyptian asr,” in International workshop on spoken language translation (iwslt 2014), 2014.
[BibTeX]

@inproceedings{ali2014advances,
title={Advances in dialectal arabic speech recognition: A study using twitter to improve egyptian asr},
author={Ali, Ahmed and Mubarak, Hamdy and Vogel, Stephan},
booktitle={International Workshop on Spoken Language Translation (IWSLT 2014)},
year={2014}
}

2013

H. Sajjad, F. Guzmán, P. Nakov, A. Abdelali, K. Murray, F. A. Obaidli, and S. Vogel, “QCRI at IWSLT 2013: experiments in Arabic-English and English-Arabic spoken language translation,” in Proceedings of the 10th international workshop on spoken language technology (iwslt), 2013.
[BibTeX]

@InProceedings{sajjad-etal:iwslt13,
author = {Hassan Sajjad and Francisco Guzmán and Preslav Nakov and Ahmed Abdelali and Kenton Murray and Fahad Al Obaidli and Stephan Vogel},
title = {{QCRI} at {IWSLT} 2013: Experiments in {Arabic-English and English-Arabic} Spoken Language Translation},
booktitle = {Proceedings of the 10th International Workshop on Spoken Language Technology (IWSLT)},
month = {December},
year = {2013},
location = {Heidelberg, Germany}
}

F. Guzmán, H. Sajjad, S. Vogel, and A. Abdelali, “The AMARA corpus: building resources for translating the web’s educational content,” in Proceedings of the 10th international workshop on spoken language technology (iwslt), 2013.
[BibTeX]

@InProceedings{guzman-sajjad-etal:iwslt13,
author = {Guzm{\'a}n, Francisco and Sajjad, Hassan and Vogel, Stephan and Abdelali, Ahmed},
title = {The {AMARA} Corpus: Building Resources for Translating the Web's Educational Content},
booktitle = {Proceedings of the 10th International Workshop on Spoken Language Technology (IWSLT)},
month = {December},
year = {2013},
location = {Heidelberg, Germany}
}

M. Weller, M. Kisselew, S. Smekalova, A. Fraser, H. Schmid, N. Durrani, H. Sajjad, and R. Farkas, “Munich-Edinburgh-Stuttgart Submissions at WMT13: Morphological and Syntactic Processing for SMT,” in Proceedings of the eighth workshop on statistical machine translation (wmt), 2013.
[BibTeX]

@inproceedings{weller13:wmt13,
author = {Marion Weller and Max Kisselew and Svetlana Smekalova and Alexander Fraser and Helmut Schmid and Nadir Durrani and Hassan Sajjad and Richárd Farkas},
title = {{Munich-Edinburgh-Stuttgart Submissions at WMT13: Morphological and Syntactic Processing for SMT}},
booktitle = {Proceedings of the Eighth Workshop on Statistical Machine Translation (WMT)},
year = 2013,
location = {Sofia, Bulgaria},
month = {August},
}

N. Durrani, H. Schmid, A. Fraser, H. Sajjad, and R. Farkas, “Munich-Edinburgh-Stuttgart Submissions of OSM Systems at WMT13,” in Proceedings of the eighth workshop on statistical machine translation (wmt), 2013.
[BibTeX]

@Inproceedings{durrani-EtAl:2013:WMT,
author = {Nadir Durrani and Helmut Schmid and Alexander Fraser and Hassan Sajjad and Richárd Farkas},
title = {{Munich-Edinburgh-Stuttgart Submissions of OSM Systems at WMT13}},
booktitle = {Proceedings of the Eighth Workshop on Statistical Machine Translation (WMT)},
year = {2013},
month = {August},
location = {Sofia, Bulgaria},
}

H. Sajjad, S. Smekalova, N. Durrani, A. Fraser, and H. Schmid, “QCRI-MES submission at WMT13: using transliteration mining to improve statistical machine translation,” in Proceedings of the eighth workshop on statistical machine translation (wmt), 2013.
[BibTeX]

@Inproceedings{sajjad-EtAl:2013:WMT,
author = {Sajjad, Hassan and Smekalova, Svetlana and Durrani, Nadir and Fraser, Alexander and Schmid, Helmut},
title = {{QCRI-MES} Submission at {WMT}13: Using Transliteration Mining to Improve Statistical Machine Translation},
booktitle = {Proceedings of the Eighth Workshop on Statistical Machine Translation (WMT)},
year = {2013},
month = {August},
location = {Sofia, Bulgaria},
}

H. Sajjad, K. Darwish, and Y. Belinkov, “Translating Dialectal Arabic to English,” in Proceedings of the 51st conference of the association for computational linguistics (acl), 2013.
[BibTeX]

@inproceedings{sajjad2013translating,
title={{Translating Dialectal Arabic to English}},
author={Sajjad, Hassan and Darwish, Kareem and Belinkov, Yonatan},
booktitle={Proceedings of the 51st Conference of the Association for Computational Linguistics (ACL)},
year={2013},
month = {August},
location = {Sofia, Bulgaria},
}

A. Ali and K. Darwish, Automated admission, 2013.
[BibTeX]

@misc{ali2013automated,
title={Automated admission},
author={Ali, Ahmed and Darwish, Kareem},
year={2013},
month="jun",
note={US Patent App. 13/370,097}
}

K. Darwish, Online communities, 2013.
[BibTeX]

@misc{darwish2013online,
title={Online communities},
author={Darwish, Kareem},
year={2013},
month="feb",
note={US Patent App. 13/218,828}
}

A. Abdelali, Y. M. O. Elhadj, and R. Bouziane, “Toward an efficient arabic part of speech tagger,” in 2013 acs international conference on computer systems and applications (aiccsa), 2013, p. 1–1.
[BibTeX]

@inproceedings{abdelali2013toward,
title={Toward An Efficient Arabic Part of Speech Tagger},
author={Abdelali, Ahmed and Elhadj, Yahya O Mohamed and Bouziane, Rachid},
booktitle={2013 ACS International Conference on Computer Systems and Applications (AICCSA)},
pages={1--1},
year={2013},
organization={IEEE}
}

H. Sajjad, F. Guzmán, P. Nakov, A. Abdelali, K. Murray, F. Al Obaidli, and S. Vogel, “Qcri at iwslt 2013: experiments in arabic-english and english-arabic spoken language translation,” in Iwslt 2013, 2013.
[BibTeX]

@inproceedings{sajjad2013qcri,
title={QCRI at IWSLT 2013: Experiments in Arabic-English and English-Arabic Spoken Language Translation},
author={Sajjad, Hassan and Guzm{\'a}n, Francisco and Nakov, Preslav and Abdelali, Ahmed and Murray, Kenton and Al Obaidli, Fahad and Vogel, Stephan},
booktitle={IWSLT 2013},
year={2013}
}

F. Guzman, H. Sajjad, S. Vogel, and A. Abdelali, “The amara corpus: building resources for translating the web’s educational content,” in Proceedings of the international workshop on spoken language translation, iwslt, 2013.
[BibTeX]

@inproceedings{guzman2013amara,
title={The AMARA corpus: Building resources for translating the web’s educational content},
author={Guzman, Francisco and Sajjad, Hassan and Vogel, Stephan and Abdelali, Ahmed},
booktitle={Proceedings of the International Workshop on Spoken Language Translation, IWSLT},
volume={13},
year={2013}
}

A. Mourad and K. Darwish, “Subjectivity and sentiment analysis of modern standard arabic and arabic microblogs,” in Proceedings of the 4th workshop on computational approaches to subjectivity, sentiment and social media analysis, 2013, p. 55–64.
[BibTeX]

@inproceedings{mourad2013subjectivity,
title={Subjectivity and sentiment analysis of modern standard Arabic and Arabic microblogs},
author={Mourad, Ahmed and Darwish, Kareem},
booktitle={Proceedings of the 4th workshop on computational approaches to subjectivity, sentiment and social media analysis},
pages={55--64},
year={2013}
}

K. Darwish, “Arabizi detection and conversion to arabic,” Arxiv preprint arxiv:1306.6755, 2013.
[BibTeX]

@article{darwish2013arabizi,
title={Arabizi detection and conversion to Arabic},
author={Darwish, Kareem},
journal={arXiv preprint arXiv:1306.6755},
year={2013}
}

H. Sajjad, K. Darwish, and Y. Belinkov, “Translating dialectal arabic to english,” in Proceedings of the 51st annual meeting of the association for computational linguistics (volume 2: short papers), 2013, p. 1–6.
[BibTeX]

@inproceedings{sajjad2013translating,
title={Translating Dialectal Arabic to English},
author={Sajjad, Hassan and Darwish, Kareem and Belinkov, Yonatan},
booktitle={Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
pages={1--6},
year={2013},
organization={Association for Computational Linguistics}
}

K. Darwish, “Named entity recognition using cross-lingual resources: arabic as an example,” in Proceedings of the 51st annual meeting of the association for computational linguistics, 2013, p. 1558–1567.
[BibTeX]

@inproceedings{darwish2013named,
title={Named Entity Recognition using Cross-lingual Resources: Arabic as an Example},
author={Darwish, Kareem},
booktitle={Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics},
pages={1558--1567},
year={2013},
organization={Association for Computational Linguistics}
}

A. Kothari, W. Magdy, K. Darwish, A. Mourad, and A. Taei, “Detecting comments on news articles in microblogs.,” Icwsm, vol. 2013, 2013.
[BibTeX]

@article{kothari2013detecting,
title={Detecting Comments on News Articles in Microblogs.},
author={Kothari, Alok and Magdy, Walid and Darwish, Kareem and Mourad, Ahmed and Taei, Ahmed},
journal={ICWSM},
volume={2013},
year={2013}
}

2012

A. El Kahki, K. Darwish, M. Abdul–Wahab, and A. Taei, “Transliteration mining using large training and test sets,” in Proceedings of the 2012 conference of the north american chapter of the association for computational linguistics: human language technologies, 2012, p. 243–252.
[BibTeX]

@inproceedings{el2012transliteration,
title={Transliteration mining using large training and test sets},
author={El Kahki, Ali and Darwish, Kareem and Abdul-Wahab, Mohamed and Taei, Ahmed},
booktitle={Proceedings of the 2012 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
pages={243--252},
year={2012}
}

K. Darwish and A. Ali, “Arabic retrieval revisited: morphological hole filling,” in Proceedings of the 50th annual meeting of the association for computational linguistics (volume 2: short papers), 2012, p. 218–222.
[BibTeX]

@inproceedings{darwish2012arabic,
title={Arabic retrieval revisited: Morphological hole filling},
author={Darwish, Kareem and Ali, Ahmed},
booktitle={Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
pages={218--222},
year={2012}
}

M. Moussa, M. Fakhr, and K. Darwish, “Statistical denormalization for arabic text.,” in Konvens, 2012, p. 228–232.
[BibTeX]

@inproceedings{moussa2012statistical,
title={Statistical denormalization for Arabic text.},
author={Moussa, Mohammed and Fakhr, Mohammed and Darwish, Kareem},
booktitle={KONVENS},
pages={228--232},
year={2012}
}

K. Darwish, W. Magdy, and A. Mourad, “Language processing for arabic microblog retrieval,” in Proceedings of the 21st acm international conference on information and knowledge management, 2012, p. 2427–2430.
[BibTeX]

@inproceedings{darwish2012language,
title={Language processing for arabic microblog retrieval},
author={Darwish, Kareem and Magdy, Walid and Mourad, Ahmed},
booktitle={Proceedings of the 21st ACM international conference on Information and knowledge management},
pages={2427--2430},
year={2012}
}

W. Gao, P. Li, and K. Darwish, “Joint topic modeling for event summarization across news and social media streams,” in Proceedings of the 21st acm international conference on information and knowledge management, 2012, p. 1173–1182.
[BibTeX]

@inproceedings{gao2012joint,
title={Joint topic modeling for event summarization across news and social media streams},
author={Gao, Wei and Li, Peng and Darwish, Kareem},
booktitle={Proceedings of the 21st ACM international conference on Information and knowledge management},
pages={1173--1182},
year={2012}
}

W. Magdy, A. Ali, and K. Darwish, “A summarization tool for time-sensitive social media,” in Proceedings of the 21st acm international conference on information and knowledge management, 2012, p. 2695–2697.
[BibTeX]

@inproceedings{magdy2012summarization,
title={A summarization tool for time-sensitive social media},
author={Magdy, Walid and Ali, Ahmed and Darwish, Kareem},
booktitle={Proceedings of the 21st ACM international conference on Information and knowledge management},
pages={2695--2697},
year={2012}
}

2011

H. Sajjad, N. Durrani, H. Schmid, and A. Fraser, “Comparing two techniques for learning transliteration models using a parallel corpus,” in Proceedings of 5th international joint conference on natural language processing (ijcnlp), 2011.
[BibTeX]

@inproceedings{sajjad-EtAl:2011:IJCNLP-2011,
author = {Sajjad, Hassan and Durrani, Nadir and Schmid, Helmut and Fraser, Alexander},
title = {Comparing Two Techniques for Learning Transliteration Models Using a Parallel Corpus},
booktitle = {Proceedings of 5th International Joint Conference on Natural Language Processing (IJCNLP)},
month = {November},
year = {2011},
location = {Chiang Mai, Thailand},
}

A. Hefny, K. Darwish, and A. Alkahky, “Is a query worth translating: ask the users!,” in European conference on information retrieval, 2011, p. 238–250.
[BibTeX]

@inproceedings{hefny2011query,
title={Is a query worth translating: ask the users!},
author={Hefny, Ahmed and Darwish, Kareem and Alkahky, Ali},
booktitle={European Conference on Information Retrieval},
pages={238--250},
year={2011},
organization={Springer, Berlin, Heidelberg}
}

A. El–Kahky, K. Darwish, A. S. Aldein, M. A. El–Wahab, A. Hefny, and W. Ammar, “Improved transliteration mining using graph reinforcement,” in Proceedings of the conference on empirical methods in natural language processing, 2011, p. 1384–1393.
[BibTeX]

@inproceedings{el2011improved,
title={Improved transliteration mining using graph reinforcement},
author={El-Kahky, Ali and Darwish, Kareem and Aldein, Ahmed Saad and El-Wahab, Mohamed Abd and Hefny, Ahmed and Ammar, Waleed},
booktitle={Proceedings of the Conference on Empirical Methods in Natural Language Processing},
pages={1384--1393},
year={2011},
organization={Association for Computational Linguistics}
}

A. El Kahki and K. Darwish, “Qcri@ trec 2011: microblog track.,” in Trec, 2011.
[BibTeX]

@inproceedings{el2011qcri,
title={QCRI@ TREC 2011: Microblog Track.},
author={El Kahki, Ali and Darwish, Kareem},
booktitle={TREC},
year={2011}
}

X/Twitter feed

View on Twitter