@ARTICLE{26583223_910245836_2024, author = {Ekaterina Valueva and Anastasia Panfilova and Antonina Rafikova}, keywords = {, divergent thinking tests, originality, automatic processing, semantic distances, distributional semantics, latent semantic analysislarge language models}, title = {Automatic Scoring of Verbal Divergent Thinking Tests: From Lexical Databases to Large Language Models}, journal = {Psychology. Journal of Higher School of Economics}, year = {2024}, volume = {21}, number = {1}, pages = {202-225}, url = {https://psy-journal.hse.ru/en/2024-21-1/910245836.html}, publisher = {}, abstract = {The article explores the evolution of methods for automatically assessing verbal divergent thinking tests. Researchers have increasingly focused on the ability to evaluate the originality of respondents' answers by calculating their semantic distance from the stimulus task. From 2009 to 2019, latent semantic analysis became the primary method for assessing semantic distances. Overall, in terms of internal consistency and correlation with expert ratings, its application yielded satisfactory results, maintaining an acceptable balance of quality and effort expended. However, issues emerged (dependence on the corpus used, result instability, systematic distortions related to the length of analyzed responses), prompting researchers to transition to more advanced models of distributional semantics (GloVe, Word2Vec etc.), large language models, and supervised learning. Large language models, especially those fine-tuned on creativity test materials, demonstrated higher effectiveness compared to models assessing semantic distances and approached expert evaluations. In addition to evaluating originality, the article considers works proposing methods for automatic assessment of elaboration, flexibility, associative flow, and divergent semantic integration. References to online platforms that allow for automatic assessments of originality in responses to divergent tests are provided. The issue of interpreting results obtained through large language models is discussed. A drawback of using these models is the lack of understanding of the basis on which judgments of the originality of creative products are made. The perspectives of applying explainable artificial intelligence for evaluating results of verbal and non-verbal tests of creative thinking are being discussed.}, annote = {The article explores the evolution of methods for automatically assessing verbal divergent thinking tests. Researchers have increasingly focused on the ability to evaluate the originality of respondents' answers by calculating their semantic distance from the stimulus task. From 2009 to 2019, latent semantic analysis became the primary method for assessing semantic distances. Overall, in terms of internal consistency and correlation with expert ratings, its application yielded satisfactory results, maintaining an acceptable balance of quality and effort expended. However, issues emerged (dependence on the corpus used, result instability, systematic distortions related to the length of analyzed responses), prompting researchers to transition to more advanced models of distributional semantics (GloVe, Word2Vec etc.), large language models, and supervised learning. Large language models, especially those fine-tuned on creativity test materials, demonstrated higher effectiveness compared to models assessing semantic distances and approached expert evaluations. In addition to evaluating originality, the article considers works proposing methods for automatic assessment of elaboration, flexibility, associative flow, and divergent semantic integration. References to online platforms that allow for automatic assessments of originality in responses to divergent tests are provided. The issue of interpreting results obtained through large language models is discussed. A drawback of using these models is the lack of understanding of the basis on which judgments of the originality of creative products are made. The perspectives of applying explainable artificial intelligence for evaluating results of verbal and non-verbal tests of creative thinking are being discussed.} }