<?xml version="1.0" encoding="UTF-8"?>
<mets:METS xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/TR/xlink/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dim="http://www.dspace.org/xmlns/dspace/dim" OBJEDIT="/xmlui/admin/item?itemID=108679" OBJID="/xmlui/handle/11531/107094" PROFILE="DSPACE METS SIP Profile 1.0" LABEL="DSpace Item" ID="hdl:11531/107094">
<mets:dmdSec GROUPID="group_dmd_0" ID="dmd_1">
<mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="DIM">
<mets:xmlData>
<dim:dim dspaceType="ITEM">
<dim:field authority="0000-0001-6714-4795" element="contributor" qualifier="author" confidence="ACCEPTED" language="es-ES" mdschema="dc">de Rodrigo Tobías, Ignacio</dim:field>
<dim:field authority="D368380E-1440-4830-B4D8-8D6C6C55B73A" element="contributor" qualifier="author" confidence="ACCEPTED" language="es-ES" mdschema="dc">Sánchez Cuadrado, Alberto</dim:field>
<dim:field authority="0000-0002-7547-0942" element="contributor" qualifier="author" confidence="ACCEPTED" language="es-ES" mdschema="dc">Boal Martín-Larrauri, Jaime</dim:field>
<dim:field authority="0000-0001-9879-5603" element="contributor" qualifier="author" confidence="ACCEPTED" language="es-ES" mdschema="dc">López López, Álvaro Jesús</dim:field>
<dim:field element="date" qualifier="accessioned" mdschema="dc">2025-11-11T15:48:30Z</dim:field>
<dim:field element="date" qualifier="available" mdschema="dc">2025-11-11T15:48:30Z</dim:field>
<dim:field element="date" qualifier="issued" language="es_ES" mdschema="dc">2026-04-01</dim:field>
<dim:field element="identifier" qualifier="issn" language="es_ES" mdschema="dc">0031-3203</dim:field>
<dim:field element="identifier" qualifier="uri" language="es_ES" mdschema="dc">https://doi.org/10.1016/j.patcog.2025.112502</dim:field>
<dim:field element="description" language="es_ES" mdschema="dc">Artículos en revistas</dim:field>
<dim:field element="description" qualifier="abstract" language="es-ES" mdschema="dc">This paper introduces the MERIT Dataset, a multimodal, fully labeled dataset of school grade reports. Comprising over 400 labels and 33k samples, the MERIT Dataset is a resource for training models in demanding Visually-rich Document Understanding tasks. It contains multimodal features that link patterns in the textual, visual, and layout domains. The MERIT Dataset also includes biases in a controlled way, making it a valuable tool to benchmark biases induced in Language Models. The paper outlines the dataset’s generation pipeline and highlights its main features and patterns in its different domains. We benchmark the dataset for token classification, showing that it poses a significant challenge even for SOTA models.</dim:field>
<dim:field element="description" qualifier="abstract" language="en-GB" mdschema="dc">This paper introduces the MERIT Dataset, a multimodal, fully labeled dataset of school grade reports. Comprising over 400 labels and 33k samples, the MERIT Dataset is a resource for training models in demanding Visually-rich Document Understanding tasks. It contains multimodal features that link patterns in the textual, visual, and layout domains. The MERIT Dataset also includes biases in a controlled way, making it a valuable tool to benchmark biases induced in Language Models. The paper outlines the dataset’s generation pipeline and highlights its main features and patterns in its different domains. We benchmark the dataset for token classification, showing that it poses a significant challenge even for SOTA models.</dim:field>
<dim:field element="language" qualifier="iso" language="es_ES" mdschema="dc">en-GB</dim:field>
<dim:field element="source" language="es_ES" mdschema="dc">Revista: Pattern Recognition, Periodo: 1, Volumen: online, Número: Part B, Página inicial: 112502-1, Página final: 112502-14</dim:field>
<dim:field element="subject" qualifier="other" language="es_ES" mdschema="dc">Instituto de Investigación Tecnológica (IIT)</dim:field>
<dim:field element="title" language="es_ES" mdschema="dc">The MERIT Dataset: Modelling and efficiently rendering interpretable transcripts</dim:field>
<dim:field element="type" language="es_ES" mdschema="dc">info:eu-repo/semantics/article</dim:field>
<dim:field element="description" qualifier="version" language="es_ES" mdschema="dc">info:eu-repo/semantics/publishedVersion</dim:field>
<dim:field element="rights" qualifier="holder" language="es_ES" mdschema="dc"/>
<dim:field element="rights" qualifier="accessRights" language="es_ES" mdschema="dc">info:eu-repo/semantics/openAccess</dim:field>
<dim:field element="keywords" language="es-ES" mdschema="dc">Synthetic Dataset; Multimodal Dataset; Visually-rich Document Understanding; Vision-Language Models</dim:field>
<dim:field element="keywords" language="en-GB" mdschema="dc">Synthetic Dataset; Multimodal Dataset; Visually-rich Document Understanding; Vision-Language Models</dim:field>
</dim:dim>
</mets:xmlData>
</mets:mdWrap>
</mets:dmdSec>
<mets:fileSec>
<mets:fileGrp USE="CONTENT">
<mets:file CHECKSUMTYPE="MD5" GROUPID="group_file_763656" ID="file_763656" MIMETYPE="application/pdf" SIZE="5202118" CHECKSUM="48f1b082615be16e181f566fa265f6bf">
<mets:FLocat LOCTYPE="URL" xlink:title="IIT-25-307R_preprint.pdf" xlink:type="locator" xlink:href="/xmlui/bitstream/handle/11531/107094/IIT-25-307R_preprint.pdf?sequence=1&amp;isAllowed=y"/>
</mets:file>
<mets:file CHECKSUMTYPE="MD5" GROUPID="group_file_763657" ID="file_763657" MIMETYPE="application/pdf" SIZE="2832" CHECKSUM="841b686a53560d2cd154e5604a110b56">
<mets:FLocat LOCTYPE="URL" xlink:title="IIT-25-307R_preview.pdf" xlink:type="locator" xlink:href="/xmlui/bitstream/handle/11531/107094/IIT-25-307R_preview.pdf?sequence=2&amp;isAllowed=y"/>
</mets:file>
</mets:fileGrp>
</mets:fileSec>
<mets:structMap LABEL="DSpace" TYPE="LOGICAL">
<mets:div DMDID="dmd_1" TYPE="DSpace Item">
<mets:div ID="div_2" TYPE="DSpace Content Bitstream">
<mets:fptr FILEID="file_763656"/>
</mets:div>
<mets:div ID="div_3" TYPE="DSpace Content Bitstream">
<mets:fptr FILEID="file_763657"/>
</mets:div>
</mets:div>
</mets:structMap>
</mets:METS>
