Hello! I am a final-year PhD student in computer science at the University of Washington, advised by Yejin Choi and Noah Smith. My research aims to build better algorithms for language models, including for tokenization, data creation, and inference-time adaptation. I am grateful to be supported by the NSF Graduate Research Fellowship and OpenAI SuperAlignment Fellowship.
Preprint 2026
@misc{limisiewicz-etal-2026-compute,
title = {Compute Optimal Tokenization},
author = {Tomasz Limisiewicz and Artidoro Pagnoni and Srini Iyer and Mike Lewis and Sachin Mehta and Alisa Liu and Margaret Li and Gargi Ghosh and Luke Zettlemoyer},
year = {2026},
url = {https://arxiv.org/abs/2605.01188}
} ICML 2026
@inproceedings{hayase-etal-2026-sampling,
title = {Sampling from Your Language Model One Byte at a Time},
author = {Jonathan Hayase and Alisa Liu and Noah A. Smith and Sewoong Oh},
booktitle = {Forty-third International Conference on Machine Learning},
year = {2026},
url = {https://openreview.net/forum?id=COQV7D1dAW}
} Preprint 2026
@misc{xu-etal-2026-finish,
title = {Are You Going to Finish That? A Practical Study of the Partial Token Problem},
author = {Hao Xu and Alisa Liu and Jonathan Hayase and Yejin Choi and Noah A. Smith},
year = {2026},
url = {https://arxiv.org/abs/2601.23223}
} NeurIPS 2025 — Spotlight ๐
COLM 2025 — Top 0.2% of submissions
@inproceedings{liu-etal-2025-superbpe,
title = {{SuperBPE}: Space Travel for Language Models},
author = {Alisa Liu and Jonathan Hayase and Valentin Hofmann and Sewoong Oh and Noah A. Smith and Yejin Choi},
booktitle = {Second Conference on Language Modeling},
year = {2025},
url = {https://arxiv.org/abs/2503.13423}
} COLM 2025 — Top 0.2% of submissions
ACL Findings 2025
NeurIPS 2024
@inproceedings{shi-etal-2024-decoding,
title = {Decoding-Time Language Model Alignment with Multiple Objectives},
author = {Ruizhe Shi and Yifang Chen and Yushi Hu and Alisa Liu and Hannaneh Hajishirzi and Noah A. Smith and Simon Shaolei Du},
booktitle = {The Thirty-eighth Annual Conference on Neural Information Processing Systems},
year = {2024},
url = {https://openreview.net/forum?id=3csuL7TVpV}
} NeurIPS 2024
@inproceedings{hayase-etal-2024-data,
title = {Data Mixture Inference: What do {BPE} Tokenizers Reveal about their Training Data?},
author = {Jonathan Hayase and Alisa Liu and Yejin Choi and Sewoong Oh and Noah A. Smith},
booktitle = {The Thirty-eighth Annual Conference on Neural Information Processing Systems},
year = {2024},
url = {https://openreview.net/forum?id=EHXyeImux0}
} COLM 2024 — Spotlight ๐, top 7%
@inproceedings{liu-etal-2024-tuning,
title = {Tuning Language Models by Proxy},
author = {Alisa Liu and Xiaochuang Han and Yizhong Wang and Yulia Tsvetkov and Yejin Choi and Noah A. Smith},
booktitle = {First Conference on Language Modeling},
year = {2024},
url = {https://openreview.net/forum?id=dribhnhm1i}
} EMNLP 2023
@inproceedings{liu-etal-2023-afraid,
title = {We{'}re Afraid Language Models Aren{'}t Modeling Ambiguity},
author = {Alisa Liu and Zhaofeng Wu and Julian Michael and Alane Suhr and Peter West and Alexander Koller and Swabha Swayamdipta and Noah A. Smith and Yejin Choi},
booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},
year = {2023},
url = {https://aclanthology.org/2023.emnlp-main.51}
} EMNLP Findings 2023
@inproceedings{lee-etal-2023-last,
title = {That was the last straw, we need more: Are Translation Systems Sensitive to Disambiguating Context?},
author = {Jaechan Lee and Alisa Liu and Orevaoghene Ahia and Hila Gonen and Noah A. Smith},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
year = {2023},
url = {https://aclanthology.org/2023.findings-emnlp.302}
} ACL 2023
@inproceedings{wang-etal-2023-self-instruct,
title = {Self-Instruct: Aligning Language Models with Self-Generated Instructions},
author = {Yizhong Wang and Yeganeh Kordi and Swaroop Mishra and Alisa Liu and Noah A. Smith and Daniel Khashabi and Hannaneh Hajishirzi},
booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
year = {2023},
url = {https://aclanthology.org/2023.acl-long.754}
} ACL 2023
@inproceedings{hallinan-etal-2023-detoxifying,
title = {Detoxifying Text with {MaRCo}: Controllable Revision with Experts and Anti-Experts},
author = {Skyler Hallinan and Alisa Liu and Yejin Choi and Maarten Sap},
booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
year = {2023},
url = {https://aclanthology.org/2023.acl-short.21}
} EMNLP Findings 2022
@inproceedings{liu-etal-2022-wanli,
title = {{WANLI}: Worker and {AI} Collaboration for Natural Language Inference Dataset Creation},
author = {Alisa Liu and Swabha Swayamditta and Noah A. Smith and Yejin Choi},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2022},
year = {2022},
url = {https://aclanthology.org/2022.findings-emnlp.508}
} ACL 2021
@inproceedings{liu-etal-2021-dexperts,
title = {{DE}xperts: Decoding-Time Controlled Text Generation with Experts and Anti-Experts},
author = {Alisa Liu and Maarten Sap and Ximing Lu and Swabha Swayamdipta and Chandra Bhagavatula and Noah A. Smith and Yejin Choi},
booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
year = {2021},
url = {https://aclanthology.org/2021.acl-long.522}
} DCASE 2020 — Best Student Paper Award
ML4MD Workshop at ICML 2020
@inproceedings{fang-etal-2020-bach,
title = {Bach or Mock? A Grading Function for Chorales in the Style of J.S. Bach},
author = {Alexander Fang and Alisa Liu and Prem Seetharaman and Bryan Pardo},
booktitle = {Machine Learning for Media Discovery Workshop (ML4MD) at the 37th International Conference on Machine Learning},
year = {2020},
url = {https://arxiv.org/abs/2006.13329}
} ML4MD Workshop at ICML 2020
@inproceedings{liu-etal-2020-incorporating,
title = {Incorporating Music Knowledge in Continual Dataset Augmentation for Music Generation},
author = {Alisa Liu and Alexander Fang and Prem Seetharaman and Bryan Pardo},
booktitle = {Machine Learning for Media Discovery Workshop (ML4MD) at the 37th International Conference on Machine Learning},
year = {2020},
url = {https://arxiv.org/abs/2006.13331}
} RepEval Workshop at NAACL 2019
@inproceedings{chen-etal-2019-codah,
title = {{CODAH}: An Adversarially-Authored Question Answering Dataset for Common Sense},
author = {Michael Chen and Mike D{'}Arcy and Alisa Liu and Jared Fernandez and Doug Downey},
booktitle = {Proceedings of the 3rd Workshop on Evaluating Vector Space Representations for {NLP}},
year = {2019},
url = {https://aclanthology.org/W19-2008}
}