From c980e8192594189af10b98bd72d3b745650ef409 Mon Sep 17 00:00:00 2001
From: Nikita Klimenko <nikita.klimenko@jetbrains.com>
Date: Wed, 19 Jun 2024 17:06:24 +0300
Subject: [PATCH] Add an option to read Excel cell values as a String
 regardless of their content type

fixes #669
---
 .../jetbrains/kotlinx/dataframe/io/xlsx.kt    |  83 ++++++++++++++----
 .../kotlinx/dataframe/io/XlsxTest.kt          |  21 +++++
 .../src/test/resources/mixed_column.xlsx      | Bin 0 -> 4845 bytes
 docs/StardustDocs/topics/read.md              |  14 +--
 .../kotlinx/dataframe/samples/api/Read.kt     |  18 ++--
 5 files changed, 97 insertions(+), 39 deletions(-)
 create mode 100644 dataframe-excel/src/test/resources/mixed_column.xlsx

diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt
index 01c5cd3bd..fe2ebc2af 100644
--- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt
+++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt
@@ -6,6 +6,7 @@ import kotlinx.datetime.toKotlinLocalDateTime
 import org.apache.poi.hssf.usermodel.HSSFWorkbook
 import org.apache.poi.ss.usermodel.Cell
 import org.apache.poi.ss.usermodel.CellType
+import org.apache.poi.ss.usermodel.DataFormatter
 import org.apache.poi.ss.usermodel.DateUtil
 import org.apache.poi.ss.usermodel.RichTextString
 import org.apache.poi.ss.usermodel.Row
@@ -83,6 +84,8 @@ private fun setWorkbookTempDirectory() {
 /**
  * @param sheetName sheet to read. By default, the first sheet in the document
  * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param stringColumns range of columns to read as String regardless of a cell type.
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
  * @param skipRows number of rows before header
  * @param rowsCount number of rows to read.
  * @param nameRepairStrategy handling of column names.
@@ -93,17 +96,22 @@ public fun DataFrame.Companion.readExcel(
     sheetName: String? = null,
     skipRows: Int = 0,
     columns: String? = null,
+    stringColumns: StringColumns? = null,
     rowsCount: Int? = null,
     nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE,
 ): AnyFrame {
     setWorkbookTempDirectory()
     val wb = WorkbookFactory.create(url.openStream())
-    return wb.use { readExcel(wb, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
+    return wb.use {
+        readExcel(wb, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
+    }
 }
 
 /**
  * @param sheetName sheet to read. By default, the first sheet in the document
  * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param stringColumns range of columns to read as String regardless of a cell type.
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
  * @param skipRows number of rows before header
  * @param rowsCount number of rows to read.
  * @param nameRepairStrategy handling of column names.
@@ -114,17 +122,22 @@ public fun DataFrame.Companion.readExcel(
     sheetName: String? = null,
     skipRows: Int = 0,
     columns: String? = null,
+    stringColumns: StringColumns? = null,
     rowsCount: Int? = null,
     nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE,
 ): AnyFrame {
     setWorkbookTempDirectory()
     val wb = WorkbookFactory.create(file)
-    return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
+    return wb.use {
+        readExcel(it, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
+    }
 }
 
 /**
  * @param sheetName sheet to read. By default, the first sheet in the document
  * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param stringColumns range of columns to read as String regardless of a cell type.
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
  * @param skipRows number of rows before header
  * @param rowsCount number of rows to read.
  * @param nameRepairStrategy handling of column names.
@@ -135,13 +148,17 @@ public fun DataFrame.Companion.readExcel(
     sheetName: String? = null,
     skipRows: Int = 0,
     columns: String? = null,
+    stringColumns: StringColumns? = null,
     rowsCount: Int? = null,
     nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE,
-): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, rowsCount, nameRepairStrategy)
+): AnyFrame =
+    readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, stringColumns, rowsCount, nameRepairStrategy)
 
 /**
  * @param sheetName sheet to read. By default, the first sheet in the document
  * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param stringColumns range of columns to read as String regardless of a cell type.
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
  * @param skipRows number of rows before header
  * @param rowsCount number of rows to read.
  * @param nameRepairStrategy handling of column names.
@@ -152,17 +169,23 @@ public fun DataFrame.Companion.readExcel(
     sheetName: String? = null,
     skipRows: Int = 0,
     columns: String? = null,
+    stringColumns: StringColumns? = null,
     rowsCount: Int? = null,
     nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE,
 ): AnyFrame {
     setWorkbookTempDirectory()
     val wb = WorkbookFactory.create(inputStream)
-    return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
+    return wb.use {
+        readExcel(it, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
+    }
 }
 
 /**
  * @param sheetName sheet to read. By default, the first sheet in the document
  * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param formattingOptions range of columns to read as String regardless of a cell type.
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
+ * See also [FormattingOptions.formatter] and [DataFormatter.formatCellValue].
  * @param skipRows number of rows before header
  * @param rowsCount number of rows to read.
  * @param nameRepairStrategy handling of column names.
@@ -173,18 +196,39 @@ public fun DataFrame.Companion.readExcel(
     sheetName: String? = null,
     skipRows: Int = 0,
     columns: String? = null,
+    formattingOptions: FormattingOptions? = null,
     rowsCount: Int? = null,
     nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE,
 ): AnyFrame {
     val sheet: Sheet = sheetName
         ?.let { wb.getSheet(it) ?: error("Sheet with name $sheetName not found") }
         ?: wb.getSheetAt(0)
-    return readExcel(sheet, columns, skipRows, rowsCount, nameRepairStrategy)
+    return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy)
+}
+
+/**
+ * @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ */
+@JvmInline
+public value class StringColumns(public val range: String)
+
+public fun StringColumns.toFormattingOptions(formatter: DataFormatter = DataFormatter()): FormattingOptions =
+    FormattingOptions(range, formatter)
+
+/**
+ * @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param formatter
+ */
+public class FormattingOptions(range: String, public val formatter: DataFormatter = DataFormatter()) {
+    public val columnIndices: Set<Int> = getColumnIndices(range).toSet()
 }
 
 /**
  * @param sheet sheet to read.
  * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param formattingOptions range of columns to read as String regardless of a cell's type.
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
+ * See also [FormattingOptions.formatter] and [DataFormatter.formatCellValue].
  * @param skipRows number of rows before header
  * @param rowsCount number of rows to read.
  * @param nameRepairStrategy handling of column names.
@@ -193,19 +237,13 @@ public fun DataFrame.Companion.readExcel(
 public fun DataFrame.Companion.readExcel(
     sheet: Sheet,
     columns: String? = null,
+    formattingOptions: FormattingOptions? = null,
     skipRows: Int = 0,
     rowsCount: Int? = null,
     nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE,
 ): AnyFrame {
     val columnIndexes: Iterable<Int> = if (columns != null) {
-        columns.split(",").flatMap {
-            if (it.contains(":")) {
-                val (start, end) = it.split(":").map { CellReference.convertColStringToIndex(it) }
-                start..end
-            } else {
-                listOf(CellReference.convertColStringToIndex(it))
-            }
-        }
+        getColumnIndices(columns)
     } else {
         val headerRow = checkNotNull(sheet.getRow(skipRows)) {
             "Row number ${skipRows + 1} (1-based index) is not defined on the sheet ${sheet.sheetName}"
@@ -235,17 +273,32 @@ public fun DataFrame.Companion.readExcel(
         val name = repairNameIfRequired(nameFromCell, columnNameCounters, nameRepairStrategy)
         columnNameCounters[nameFromCell] =
             columnNameCounters.getOrDefault(nameFromCell, 0) + 1 // increase the counter for specific column name
+        val getCellValue: (Cell?) -> Any? = when {
+            formattingOptions != null && index in formattingOptions.columnIndices -> { cell: Cell? ->
+                formattingOptions.formatter.formatCellValue(cell)
+            }
 
+            else -> { cell -> cell.cellValue(sheet.sheetName) }
+        }
         val values: List<Any?> = valueRowsRange.map {
             val row: Row? = sheet.getRow(it)
             val cell: Cell? = row?.getCell(index)
-            cell.cellValue(sheet.sheetName)
+            getCellValue(cell)
         }
         DataColumn.createWithTypeInference(name, values)
     }
     return dataFrameOf(columns)
 }
 
+private fun getColumnIndices(columns: String): List<Int> = columns.split(",").flatMap {
+    if (it.contains(":")) {
+        val (start, end) = it.split(":").map { CellReference.convertColStringToIndex(it) }
+        start..end
+    } else {
+        listOf(CellReference.convertColStringToIndex(it))
+    }
+}
+
 /**
  * This is a universal function for name repairing
  * and should be moved to the API module later,
@@ -324,7 +377,7 @@ public fun <T> DataFrame<T>.writeExcel(
     keepFile: Boolean = false,
 ) {
     val factory =
-        if (keepFile){
+        if (keepFile) {
             when (workBookType) {
                 WorkBookType.XLS -> HSSFWorkbook(file.inputStream())
                 WorkBookType.XLSX -> XSSFWorkbook(file.inputStream())
diff --git a/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt b/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt
index e6a05a087..fd8c6835f 100644
--- a/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt
+++ b/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt
@@ -45,6 +45,17 @@ class XlsxTest {
         df shouldBe dataFrameOf("col1", "col2", "C")(1.0, null, 3.0)
     }
 
+    @Test
+    fun `column with empty header and with formatting`() {
+        val df = DataFrame.readExcel(
+            testResource("sample2.xlsx"),
+            "Sheet1",
+            columns = "A:C",
+            stringColumns = StringColumns("A:C")
+        )
+        df shouldBe dataFrameOf("col1", "col2", "C")("1", "", "3")
+    }
+
     @Test
     fun `limit row number`() {
         val df = DataFrame.readExcel(testResource("sample4.xls"), "Sheet1", rowsCount = 5)
@@ -179,4 +190,14 @@ class XlsxTest {
         val df = DataFrame.readExcel(testResource("formula_cell.xlsx"))
         df.columnNames() shouldBe listOf("Number", "Greater than 5", "Multiplied by 10", "Divided by 5")
     }
+
+    @Test
+    fun `read mixed column`() {
+        val df = DataFrame.readExcel(
+            testResource("mixed_column.xlsx"),
+            stringColumns = StringColumns("A")
+        )
+        df["col1"].type() shouldBe typeOf<String>()
+        df shouldBe dataFrameOf("col1")("100", "A100", "B100", "C100")
+    }
 }
diff --git a/dataframe-excel/src/test/resources/mixed_column.xlsx b/dataframe-excel/src/test/resources/mixed_column.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..0d766970ff51d01c76ef74ca1b13713528a74da4
GIT binary patch
literal 4845
zcmaJ_1z42Z)&_<iQaYs@hLTc2x<f!xnn4;QM7raU64D6LASor%c>o!tBxPu%hEAz}
z=A8RqxZZnwzvubB`JU%nd(U2L?e|@4YbXOyiIFfdF_A7E#`KWx00{qW;sUmJ1@R#6
z<q2Pv0DJ`DyS|ZcyglkkWGri1B8D>*d;?Mkug!$hcq!a{y@N3gN(wOI6#Wh%!qVw;
z_pFX1Gvy=YoJtKiqhl}0z4Nm1PPeF9>~tZFLpn-K_!a}D<K6a0bP0~gAE|h!o<Hr>
zGZtGiEud};x16)|4skFt0o+)wpG1_LK&_z{F*@O=At_ZQTXNwZE5;~tq9aSXxo@~n
z+2lY1&4lV-Wwf5<P^vZAgFMc?)iBv6467F+US@sLu8B@<bAQwr?vf=z^dqF=+Ew%u
zrd-^0bAsx|7wd2BBdpdENE*r*m{z4h4GZvPKSV)-zyFhC@Zn#)hR@;TVrTB;WXJQ`
z!9GT--*J|gpyiYk73#okY)h34(2aeAG)GX>pOzp;Y8Oe|x1XGQ10@6(u+K>23yA{f
zo6|fEpqX|XG#z^HpAB}JwLj8m<O!MS*`Bo!OXd5@po^AbAUVimIt|&JJIIHKk9qBm
zc!Mo;=!Jmu4W4KZ8l>j}aurHcA@M3ThYl72Wc4WR20xz?s9dE3d&ELQ&+p?HB{PJ_
z4B<au+kS$h5Zeva$*s1p;|dKas-TY(hP*H)f`xp^JpZ6OV-l0H@8fT+IS{MgUw~#?
zJ6Z&q_UMZjThipOT)<p>M1IQd|HL|9g+qF1VxMWj(*IAxF?y&Cqj9YiPebE~W-kt8
z(I6%wx9^5zx<1%zB@LGK<HdVy-$;t`^@BA(m&q4TO=W?Zu~y4a4-9i%vq7uHN9ME#
z+wW<#2U(CGbB0#0M~V=uWl;xi){#C~Y1`<7l}S8aMc*_I<9;T#3oPAs3nCW7=Hyq5
zhDd3&e<zEV-Zm?q%UlBy;H<L#%!!bia^ctT8%2{W!-7%z8x4Jf2FR!}g-a6DS2_2_
zk<NKOOBi>%QC8KoL>IdfVdvX5l^OJF&Xyaq6pBF~5u?WK+<V1;#!IP=zTuEQx{#pC
zukN2vM!gy2rzk(4``&2<c>orw<GsUF|0>C6VDp+rpiQ@XlMInFQLhDHr^rZ1j)33g
z4DR1K<Lc&V4|YXl3`|d5eU_KHWmikDHI9jyICYe+cibigD~5qy^<iqaBG)DBM<eUA
zqYF{Bv5-y{_i&OD)>%;7>E*$8@vFqBw+rj>wp~e?i|cIN5d$i#D#-#qO{xBmy884O
zTsz<93w+lI294tXQH@(9Oj1A^7$hk0T;I?i8zu3XHi2utEytqb`&v5Eu+KOu<?Q`2
zj99cPoz9e)olI&(jl`~IxC7&w22~@{eZuoDNJV^d#yd)@rmLK9wTWVb1+l~e1=*9n
zuXj%YY|P({FfP*F`|P5Kdc3R*X_k^iU5>$EEzf7&d4hi$1{LBV?)AJsfa*=BYHQB@
z%>@rIeoUr{+pnZ&3mTiaD)>B}{|5^zKx($V`BdpgRbAMZP5LAOO%BB~hsRxLi(yN&
zKXc!$P^u8A0fC+AZ{OOLmjYPL%kK^AyM7-xZrp7@qFBA`g^e|tAD+-hR7ApjnP-vh
z{eQG<c@;Q5*-RI9U6m;_9>qKRo*bQ{Jfy_F9@S}A&IPf*NUL*Bf8$PHnq>8r4n|tF
zU0-*>!Tp7LWuA;tS@$ITWA}%EB39K+v;f}-l{HxR-k^4Re|Ttm%{5=Q_Nfqo?lb^0
z$-<coaMz$HizHJF&B)9(a^3W4#j(Z2Pc(1}KDDrN;XMpX%W5kRPW%7MXt4Xwiz8(U
zdGA#gD5r3uuZ!z-HUzb6xk+LWe`tTC;?J{Z)i9s+ANR}Kf(3ksWko;H9apREBgyWR
zigwM<Wo6(eG@0-_rXA&(W;hpw&~5!(x0Dg_OAPP}Yx5d)lSyw?GO$$zPaI{A5wczr
zy%3z7j_~+NxtqR;Gm%r28OL_L*@Q)ep3x>hI*-WSX!S7SU^v@Z5dJQE$!{6Y)fx<T
za|QkN;6o&Gi;k+(tO!BNZY6=+x{cj*EX+sy6`{x$48?}JdxLa}iaVBzfmoiIy{W%)
zp8i<>p&qJtM4-qL&iNXI%C>j^M|LC5Nlwg@2l(>AX5;}%Z-8|f;q$b*_d%WQdW9m1
z#iq34!B`R?zbO3TY!!4<0E&hct<?mn?o-PI92YBey#NOFNU;~m$zEh>Qh?=a4W7Jx
zXOSou7HvKH=Nk>Crs<KWj=b!y8uch6bZN{#S%^YyhW!qN7hgi~l5D$*<o62?YG3E|
zV;S?@i$T|VsK8rUl4a#dVRW7F;$3s7-q4Yjz7NwvP{J%W`*zI_avpPhzsg{31>jFk
zmr~_{eB=WXdkm_odF<VWxYz1oy)@iQ#p2BojdWN>(*WF!N6oZGM(T5TZHrFG7-Qae
zDq-Oy`<%=IxS7J&C1`CV*nPI{`c(J#s$|YXLvk{{d0YbW^(=<XQwX~Z6&!#z)2{&c
z?g5+q9gPeWC5678lY)Wm^x%TpSd>udxvA6c*R&k0MNA4@&m))>Hb!xRyJ1&RdoL1s
zUrP=OV<j?;T8nI#?_Ta(d`LV@er&M;E@{~TPcev#CU(($W;o}{Y({ehsX-WU$n{S(
zYSNA;zm2qC7Gv{oZLS7fd{wp4x$e~{v13*U-3^+JejS58ws_uS{m22Fo9lzjD(cA^
zOd)DqOc_%$ai8<OlNo*j-fJmeD%s+LC_j3Q!$z}YZu6M%0NceRo|vBIneni7!amd=
zIR@%=R}$o&V%p;U*vmW$u3JQ8U*Arg(YQ^$ac?8<9eG#GawMN`V6}W|CA7n*^^RgL
zW3n-;MYy4c=$l-nKt{80O+Qx6{?=gOF2vreb9*~Nf86^%Hg}Q{nV%dQ#RJ)NP9vf@
zcif{XshDOP8w%U~paK5I{0@xI%PWNqB>g3frG*{3g&sjD%JWv?!G}?YHgn!h%<ilb
zS4GQ*C(?=jQOTBGki>H+6gfYlz9Du-QJZue2{=r(FJ-is5VG~aP~v@h>@(0TEeSfF
zdATojEXHwapn+DsHaj<)zQpklv%nc!mf|``X61JkBscG{X7O4AtbCl>nZ~VCQk$~z
zdm)%;JQYF{?k7$B^HSeFD*--O9KK|C#Fhk=Dd(3+zEG?ps3>GCk}a4Rup@SkFVtoQ
zKlYQa;j0wAv6!FxCL5ipo9DJ4xY|=xaC5zvCv&k8ANC1r$NGtc>CpHtepMb;IcL$l
zFy)2+<je66k?zRB8)>(6)a!F-*M#27@vn)=u(<A?HUtHL3G$I!;NoSC{W}Vv{F?&)
zE(q_IK7SX7@v3%Ryf|UIS0r+$J&G6>(IPF;l9g9y$h}XOhP#ClChBhdc&H4q!P~=$
z!;^=t3dfs*F`E(H(m3d)xu~HXJ+ClZ0<U&pixfJLT<C%|b6nN^dmoyKrUJ+mK?XI4
zBKkb$oo^l*<@YP8l}z@34u(AieK}AZ%S48dXq6iRY8^Q>np`_?sw^gP@l5i(vw8b)
zu^=wA@d6{7apnu0AbyM^hx66Ps}c+}Q;Y+Z!P%NZ9$4Q@tY~+=fKFJm<r;q5mrRET
z4QaCFmC!jK6ohBg_s+dW;p=ApZ8=E%FKM?na{*gwxw+UlzW7VwV}>QMQTTAshFgl*
zrtKawFpTPG@1c4HjQ*i!mL97re{`bL5lZ#)ykA*d#%ZUTOn8v`9)-0PHfn;pLrzrS
z*gmgO;KEX|yDx*_-jlGHEbBFfO^)ioiX<~`<_o;MWqrywn=6?vZUwzQ@sBLsxIH46
zPh2X-HRZ@KL(~~b06+r9(5;9I_4@SSZ@ZBX@P{VXyn^}W3Ch%c^T5>xt;6`A9Fl$l
zcdkP(Kx|uN+eC;Z_hBf>SHl-14X<eaxdMiZ1(Kzcg@%ihvn$BL$pwrM0$)udryyQp
z-}y>9-*A-3A#~np(rl}bGLYmJ_tnU`96<+?t5Z(8b)lLD?;X*{ryuq&^+<=1lh)oU
zXe^xfA$OEUM?I3vIPPqbj}_9ktSv@mT1TaXj5Vbyc5<MBN)vK$FkpI&@39iGW+)i3
zdedP!`7<@xX8jHYO150uqRC0VBh=387evp4-cvjv77nYRrXHE*w+Jry{vr24A9)yx
z5`o-uJ8)0d?#bJqYr)Bln#tcYnZ75TtmKH>ieqklKG}TAS$BnT=5XF@EAQTDt<!^4
zu-{17Nvr`#v%po*t$1N9AE&;BrX($aW8}XQR^v_(?<|+Q_qewWDja6?yrk9))ZR;B
zUHD1xB#h>q(d?z9Y-<BhUGh`LGfoOk2s^M8GD_?Fh2C34KaQ%gHSZwv!au)yOZ|I$
ztRVd|`yYk6jK}B>U^EQqWs9I~6kgSa!LEdOV47n-^3;^G){~T-E{PkyD|AGp5^1Mt
z?BG$F{cUm*+>Ml(voj)4S#MOGIuV=og5+6uog%xT85{15AS|SoMy%HGEi;d$t$0#?
z@#O{DaDagLbY=OUC(62cJ2j8ba>`0Zh{@OhjAhzZhIy51k4<v8WE`1x%L6HC=yFKB
zbF!ggdru|%?sbGv!pz;2rGlmc*N}_q6|;uCAQl`~$x{_tD9|!;!BmBT++xgV!f4dA
zCLlr9A76Z%$vDbIpkzXj4rbasT<gh0%;*HWnW?@DIUCccANeL6KbM|YqQ>>#tJjgB
z6<2%MNBwn=Iwi4=ziY}5R9f#f=!iaZ)CG!Au<BWH*Jv43R&q3xKUmv-kRiUv&||Y;
zlDA^Zc>zE;jxeKT=>qPSjNhg>5&R27X(vZFu%nxawx=`L)#$HkNBg0A7cT+yf|H?=
z?bVk60;QS2KQZ633GnYT^Tpi%I5&FjJ;bj442wM?qW|M=Yl`|Sq1j><{oupV^-PuZ
zRHhstV}6CpLG34HP`|y{0I4BRyu%pgsu^xtNq#7c8|iJ!WnGrk%TDXO#bM=Hy+TrT
zxPKNzB8M?dj~gGXQbWadUW(NZLyz<LjLF)6Tme#A4KbYU3&P;4(F13RV8rYqSJa8P
zr1**CMc8}$aF<J57<X)LDm*Do<$CAAXw2;<J%eISyZ1SZ_|0G=@btMEz&lovoJK@F
zWwg-Dsh8U%T7UG~ZdXG9%nl?d)MF3PaH4Tuv<t|goT<=$S)#svQRUE%Uk**Urt<{u
zl^L-BGNul*_o}H*7pbppj#}67x-Fc~S~0TQ={>UXyT4{t)B2-hv<dI0J<QdLl357a
zzJFy*SaOp|xnxL2*J>NbKjD`&=7ESz+{vntK0GSAXbAE_Mj=MJJ(;-Mnj=mo{u6i3
zDSkD)+n*si_ghfFr~0Lj|JC?zd5mb`ZlMZSOt|rFL-(ucU0Frc&$j@g|9js5s-}M(
z<*o!FB={D1aJh#6fqxL>U&p!2>j=TR1smL(h=u$rU%!rUmv0dKe+w#{|DguIn%~`Z
w2vWL558Sc;7L$Kb)UVceGXt?RZ*h(HKliDIG8){=NJv=lO9IZ2sejG+e@a0jn*aa+

literal 0
HcmV?d00001

diff --git a/docs/StardustDocs/topics/read.md b/docs/StardustDocs/topics/read.md
index 91f883fe9..3550da482 100644
--- a/docs/StardustDocs/topics/read.md
+++ b/docs/StardustDocs/topics/read.md
@@ -401,7 +401,7 @@ Sometimes cells can have the wrong format in an Excel file. For example, you exp
 
 ```text
 IDS
-100 <-- Intended to be String, but has wrong cell format in original .xlsx file
+100 <-- Intended to be String, but has numeric cell format in original .xlsx file
 A100
 B100
 C100
@@ -409,20 +409,12 @@ C100
 
 You will get column of `Serializable` instead (common parent for `Double` and `String`).
 
-You can fix it using the `.convert()` function:
+You can fix it by providing an additional parameter:
 
 <!---FUN fixMixedColumn-->
 
 ```kotlin
-val df = dataFrameOf("IDS")(100.0, "A100", "B100", "C100")
-val df1 = df.convert("IDS").with(Infer.Type) {
-    if (it is Double) {
-        it.toLong().toString()
-    } else {
-        it
-    }
-}
-df1["IDS"].type() shouldBe typeOf<String>()
+val df = DataFrame.readExcel("mixed_column.xlsx", stringColumns = StringColumns("A"))
 ```
 
 <!---END-->
diff --git a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt
index 371a9e566..8fd02b5b7 100644
--- a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt
+++ b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt
@@ -3,20 +3,19 @@ package org.jetbrains.kotlinx.dataframe.samples.api
 import io.kotest.matchers.shouldBe
 import org.jetbrains.kotlinx.dataframe.DataFrame
 import org.jetbrains.kotlinx.dataframe.DataRow
-import org.jetbrains.kotlinx.dataframe.api.Infer
 import org.jetbrains.kotlinx.dataframe.api.ParserOptions
 import org.jetbrains.kotlinx.dataframe.api.columnNames
 import org.jetbrains.kotlinx.dataframe.api.columnTypes
-import org.jetbrains.kotlinx.dataframe.api.convert
-import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
-import org.jetbrains.kotlinx.dataframe.api.with
 import org.jetbrains.kotlinx.dataframe.io.ColType
+import org.jetbrains.kotlinx.dataframe.io.StringColumns
 import org.jetbrains.kotlinx.dataframe.io.readArrowFeather
 import org.jetbrains.kotlinx.dataframe.io.readCSV
+import org.jetbrains.kotlinx.dataframe.io.readExcel
 import org.jetbrains.kotlinx.dataframe.io.readJson
 import org.jetbrains.kotlinx.dataframe.testArrowFeather
 import org.jetbrains.kotlinx.dataframe.testCsv
 import org.jetbrains.kotlinx.dataframe.testJson
+import org.junit.Ignore
 import org.junit.Test
 import java.util.*
 import kotlin.reflect.typeOf
@@ -63,17 +62,10 @@ class Read {
     }
 
     @Test
+    @Ignore
     fun fixMixedColumn() {
         // SampleStart
-        val df = dataFrameOf("IDS")(100.0, "A100", "B100", "C100")
-        val df1 = df.convert("IDS").with(Infer.Type) {
-            if (it is Double) {
-                it.toLong().toString()
-            } else {
-                it
-            }
-        }
-        df1["IDS"].type() shouldBe typeOf<String>()
+        val df = DataFrame.readExcel("mixed_column.xlsx", stringColumns = StringColumns("A"))
         // SampleEnd
     }