I am a machine learning researcher focused on developing trustworthy, interpretable, and socially beneficial AI systems.
Currently, I'm a scholar in the ML Alignment & Theory Scholars (MATS) Program, working on Scalable Oversight and AI Control with David Lindner, Erik Jenner, and Scott Emmons.
My previous research includes work on the reliability of steering vectors in Large Language Models with Dmitrii Krasheninnikov and David Krueger at the Krueger AI Safety Lab at the University of Cambridge. I have also worked on applications of representation engineering at the Health-NLP group, supervised by Seyed Ali Bahrainian and Carsten Eickhoff.
I hold a Master of Science in Machine Learning and a Bachelor of Science in Computer Science from the University of Tübingen. My research is in the intersection of Deep Learning and Natural Language Processing, driven by the challenge of building safe and equitable AI systems. As AI technologies advance, I want to contribute to addressing the societal risks and ensure equitable benefits by improving scientific understanding and effective governance approaches.
Feel free to reach out to me via mail!
@InProceedings{braun2025beyond,
author = {Joschka Braun and Carsten Eickhoff and Seyed Ali Bahrainian},
title = {Beyond Multiple Choice: Evaluating Steering Vectors for Adaptive Free-Form Summarization},
booktitle = {ICML 2025 Workshop on Reliable and Responsible Foundation Models},
year = {2025},
eprint = {2505.24859},
}
@InProceedings{braun2025understanding,
author = {Joschka Braun and Carsten Eickhoff and David Krueger and Seyed Ali Bahrainian and Dmitrii Krasheninnikov},
title = {Understanding (Un)Reliability of Steering Vectors in Language Models},
booktitle = {ICLR 2025 Workshop on Foundation Models in the Wild},
year = {2025},
eprint = {2505.22637},
}
@InProceedings{Braun2022BSCTHESIS,
author = {Joschka Braun},
title = {Verbal Epistemic Uncertainty Estimation for Numeric Values with GPT-3},
booktitle = {BSc Thesis at Univesity of Tübingen},
year = {2022},
}
@InProceedings{Braun_Steering_Blog_Post_2024,
author = {Joschka Braun and Dmitrii Krasheninnikov and Usman Anwar and Robert Kirk and Daniel Tan and David Krueger},
title = {A Sober Look at Steering Vectors for LLMs},
booktitle = {Blog post on the key challenges in controlling LLM behaviour with steering vectors. Published on The Alignment Forum.},
year = {2024},
}
@InProceedings{Braun_Reweighting_Logits_2024,
author = {Joschka Braun and Bálint Mucsányi and Seyed Ali Bahrainian},
title = {Logit Reweighting for Topic-Focused Summarization},
booktitle = {Implemented a custom LogitsProcessor class to reweight logits of topic-relevant tokens during summary generation. Evaluated and compared different strategies on the NEWTS dataset.},
year = {2024},
eprint = {2507.05235},
}
@InProceedings{rein2025hcasthumancalibratedautonomysoftware,
author = {David Rein al.},
title = {HCAST: Human-Calibrated Autonomy Software Tasks},
booktitle = {Acknowledged Contributor to HCAST benchmark: Contributed task feedback and set human performance baselines for ML Engineering tasks.},
year = {2025},
eprint = {2503.17354},
}
@InProceedings{Braun_Anthropic_Hackathon_2025,
author = {Joschka Braun and Damon Falck and Yeonwoo Jang},
title = {Anthropic Alignment Hackathon: Exploration Hacking},
booktitle = {Participated in the June 2025 Anthropic Alignment Hackathon in San Francisco, co-developing an "Exploration Hacking" prototype over two days alongside Damon Falck and Yeonwoo Jang.},
year = {2025},
}
This website is based on the template of Michael Niemeyer. Check out his Github repository for instructions on how to use it.