<?xml version="1.0" encoding="UTF-8"?>
<mets:METS xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/TR/xlink/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dim="http://www.dspace.org/xmlns/dspace/dim" OBJEDIT="/xmlui/admin/item?itemID=112432" OBJID="/xmlui/handle/11531/110742" PROFILE="DSPACE METS SIP Profile 1.0" LABEL="DSpace Item" ID="hdl:11531/110742">
<mets:dmdSec GROUPID="group_dmd_0" ID="dmd_1">
<mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="DIM">
<mets:xmlData>
<dim:dim dspaceType="ITEM">
<dim:field authority="4048B064-C9B6-4A78-A30C-41B7BCD1F0EA" element="contributor" qualifier="author" confidence="ACCEPTED" language="es-ES" mdschema="dc">Chen, Olivia</dim:field>
<dim:field authority="9DB53F44-19B1-4520-9648-619D2EB501B1" element="contributor" qualifier="author" confidence="ACCEPTED" language="es-ES" mdschema="dc">Chou, Kara</dim:field>
<dim:field authority="9694FD28-3E4E-498E-9D33-A2EC0D0B6CCB" element="contributor" qualifier="author" confidence="ACCEPTED" language="es-ES" mdschema="dc">Nagpal, Rashmi</dim:field>
<dim:field authority="0000-0002-8963-5074" element="contributor" qualifier="author" confidence="ACCEPTED" language="es-ES" mdschema="dc">Palacios Hielscher, Rafael</dim:field>
<dim:field authority="0F8C3EEE-F268-4C93-96AA-36A2787AE675" element="contributor" qualifier="author" confidence="ACCEPTED" language="es-ES" mdschema="dc">Gupta, Amar</dim:field>
<dim:field element="date" qualifier="accessioned" mdschema="dc">2026-06-15T04:49:33Z</dim:field>
<dim:field element="date" qualifier="available" mdschema="dc">2026-06-15T04:49:33Z</dim:field>
<dim:field element="identifier" qualifier="uri" mdschema="dc">http://hdl.handle.net/11531/110742</dim:field>
<dim:field element="description" qualifier="abstract" language="es-ES" mdschema="dc"/>
<dim:field element="description" qualifier="abstract" language="en-GB" mdschema="dc">Machine learning models on tabular datasets often struggle to understand the context between features, which can limit their accuracy. We propose SemTab, a hybrid framework for generating semantic features that utilizes an open-source Large Language Model (LLM). We evaluated our framework using three benchmark datasets: Adult Income, German Credit, and Bank Marketing. We compared its performance against several off-the-shelf LLMs. The results show that SemTab achieved the highest accuracy across all the classification tasks. For instance, on the Bank Marketing dataset, SemTab achieved an accuracy of 8 0%, which is approximately 2 0% improvement over the baseline models. This work highlights that a hybrid architecture is a practical approach for applying language models to structured tabular data, yielding accurate and interpretable results for various downstream tasks.</dim:field>
<dim:field element="format" qualifier="mimetype" language="es_ES" mdschema="dc">application/pdf</dim:field>
<dim:field element="language" qualifier="iso" language="es_ES" mdschema="dc">en-GB</dim:field>
<dim:field element="rights" language="es_ES" mdschema="dc"/>
<dim:field element="rights" qualifier="uri" language="es_ES" mdschema="dc"/>
<dim:field element="title" language="es_ES" mdschema="dc">SemTab: A Hybrid Framework for Semantic Feature Generation on Tabular Data</dim:field>
<dim:field element="type" language="es_ES" mdschema="dc">info:eu-repo/semantics/workingPaper</dim:field>
<dim:field element="description" qualifier="version" language="es_ES" mdschema="dc">info:eu-repo/semantics/draft</dim:field>
<dim:field element="rights" qualifier="accessRights" language="es_ES" mdschema="dc">info:eu-repo/semantics/restrictedAccess</dim:field>
<dim:field element="keywords" language="es-ES" mdschema="dc"/>
<dim:field element="keywords" language="en-GB" mdschema="dc">Tabular Data, Semantic Feature Generation, LLMs, Model Interpretability</dim:field>
</dim:dim>
</mets:xmlData>
</mets:mdWrap>
</mets:dmdSec>
<mets:fileSec>
<mets:fileGrp USE="CONTENT">
<mets:file CHECKSUMTYPE="MD5" GROUPID="group_file_865208" ID="file_865208" MIMETYPE="application/pdf" SIZE="204144" CHECKSUM="9e718aee85f04c1f3d12b0e35edc14b4">
<mets:FLocat LOCTYPE="URL" xlink:title="IIT-25-413C.pdf" xlink:type="locator" xlink:href="/xmlui/bitstream/handle/11531/110742/IIT-25-413C.pdf?sequence=-1&amp;isAllowed=n"/>
</mets:file>
</mets:fileGrp>
</mets:fileSec>
<mets:structMap LABEL="DSpace" TYPE="LOGICAL">
<mets:div DMDID="dmd_1" TYPE="DSpace Item">
<mets:div ID="div_2" TYPE="DSpace Content Bitstream">
<mets:fptr FILEID="file_865208"/>
</mets:div>
</mets:div>
</mets:structMap>
</mets:METS>
