You can find more information on my publications on my Google Scholar page.

Journal Articles

  1. W. Brannon, Y. Virkar, and B. Thompson, “Dubbing in Practice: A Large Scale Study of Human Localization With Insights for Automatic Dubbing,” Transactions of the Association for Computational Linguistics, vol. 11, pp. 419–435, 2023, doi: 10/gr9cbz.
    @article{brannonDubbingPracticeLarge2023,
      title = {Dubbing in {{Practice}}: {{A Large Scale Study}} of {{Human Localization With Insights}} for {{Automatic Dubbing}}},
      shorttitle = {Dubbing in {{Practice}}},
      author = {Brannon, William and Virkar, Yogesh and Thompson, Brian},
      year = {2023},
      journal = {Transactions of the Association for Computational Linguistics},
      volume = {11},
      pages = {419--435},
      issn = {2307-387X},
      doi = {10/gr9cbz},
      langid = {english}
    }
    

Conference Articles

  1. S. Longpre, R. Mahari, N. Muennighoff, A. Chen, K. Perisetla, W. Brannon, J. Kabbara, L. Villa, and S. Hooker, “The Data Provenance Project,” in Proceedings of the 40th International Conference on Machine Learning, GenLaw Workshop ’23, PMLR, 2023. Available at: https://genlaw.org/CameraReady/20.pdf.
    @inproceedings{longpreDataProvenanceProject2023,
      title = {The {D}ata {P}rovenance {P}roject},
      author = {Longpre, Shayne and Mahari, Robert and Muennighoff, Niklas and Chen, Anthony and Perisetla, Kartik and Brannon, William and Kabbara, Jad and Villa, Luis and Hooker, Sara},
      booktitle = {Proceedings of the 40th International Conference on Machine Learning, {GenLaw} Workshop '23},
      booksubtitle = {},
      publisher = {PMLR},
      year = {2023},
      url = {https://genlaw.org/CameraReady/20.pdf}
    }
    
  2. D. Beeferman, W. Brannon, and D. Roy, “RadioTalk: A Large-Scale Corpus of Talk Radio Transcripts,” in Proceedings of Interspeech 2019, ISCA, 2019, pp. 564–568. doi: 10/gpcff2.
    @inproceedings{beefermanRadioTalkLargeScaleCorpus2019,
      title = {{{RadioTalk}}: {{A Large-Scale Corpus}} of {{Talk Radio Transcripts}}},
      booktitle = {Proceedings of Interspeech 2019},
      author = {Beeferman, Doug and Brannon, William and Roy, Deb},
      year = {2019},
      pages = {564--568},
      publisher = {{ISCA}},
      location = {{Graz, Austria}},
      doi = {10/gpcff2}
    }
    

Theses

  1. W. Brannon, “Mapping U.S. Talk Radio: A Textual Survey at Scale,” M.S. Thesis, Massachusetts Institute of Technology, 2020. Available at: https://hdl.handle.net/1721.1/129270.
    @mastersthesis{brannonMappingTalkRadio2020,
      author = {Brannon, William},
      title = {Mapping {{U}}.{{S}}. {{Talk Radio}}: {{A Textual Survey}} at {{Scale}}},
      school = {Massachusetts Institute of Technology},
      location = {{Cambridge, MA}},
      type = {M.S. Thesis},
      url = {https://hdl.handle.net/1721.1/129270},
      year = {2020},
      langid = {english},
      pagetotal = {140}
    }
    

Preprints

  1. S. Longpre, R. Mahari, N. Obeng-Marnu, W. Brannon, T. South, K. Gero, S. Pentland, and J. Kabbara, “Data Authenticity, Consent, & Provenance for AI Are All Broken: What Will It Take to Fix Them?,” 2024. arXiv:2404.12691.
    @unpublished{longpreDataAuthenticityConsent2024,
      title = {Data {{Authenticity}}, {{Consent}}, \& {{Provenance}} for {{AI}} Are All Broken: What Will It Take to Fix Them?},
      shorttitle = {Data {{Authenticity}}, {{Consent}}, \& {{Provenance}} for {{AI}} Are All Broken},
      author = {Longpre, Shayne and Mahari, Robert and {Obeng-Marnu}, Naana and Brannon, William and South, Tobin and Gero, Katy and Pentland, Sandy and Kabbara, Jad},
      year = {2024},
      archiveprefix = {arXiv},
      eprint = {2404.12691},
      primaryclass = {cs},
      url = {https://arxiv.org/abs/2404.12691},
      note = {}
    }
    
  2. W. Brannon, S. Fulay, H. Jiang, W. Kang, B. Roy, J. Kabbara, and D. Roy, “ConGraT: Self-Supervised Contrastive Pretraining for Joint Graph and Text Embeddings,” 2023. arXiv:2305.14321.
    @unpublished{brannonConGraTSelfSupervisedContrastive2023,
      title = {{{ConGraT}}: {{Self-Supervised Contrastive Pretraining}} for {{Joint Graph}} and {{Text Embeddings}}},
      shorttitle = {{{ConGraT}}},
      author = {Brannon, William and Fulay, Suyash and Jiang, Hang and Kang, Wonjune and Roy, Brandon and Kabbara, Jad and Roy, Deb},
      year = {2023},
      archiveprefix = {arXiv},
      eprint = {2305.14321},
      primaryclass = {cs},
      url = {https://arxiv.org/abs/2305.14321},
      note = {}
    }
    
  3. S. Longpre, R. Mahari, A. Chen, N. Obeng-Marnu, D. Sileo, W. Brannon, N. Muennighoff, et al., “The Data Provenance Initiative: A Large Scale Audit of Dataset Licensing & Attribution in AI,” 2023. arXiv:2310.16787.
    @unpublished{longpreDataProvenanceInitiative2023,
      title = {The {{Data Provenance Initiative}}: {{A Large Scale Audit}} of {{Dataset Licensing}} \& {{Attribution}} in {{AI}}},
      shorttitle = {The {{Data Provenance Initiative}}},
      author = {Longpre, Shayne and Mahari, Robert and Chen, Anthony and {Obeng-Marnu}, Naana and Sileo, Damien and Brannon, William and Muennighoff, Niklas and Khazam, Nathan and Kabbara, Jad and Perisetla, Kartik and Wu, Xinyi and Shippole, Enrico and Bollacker, Kurt and Wu, Tongshuang and Villa, Luis and Pentland, Sandy and Hooker, Sara},
      year = {2023},
      archiveprefix = {arXiv},
      eprint = {2310.16787},
      primaryclass = {cs},
      url = {https://arxiv.org/abs/2310.16787},
      note = {}
    }