Skip to content

Commit

Permalink
improve Billa parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
StefanBratanov committed Oct 25, 2023
1 parent 451c35d commit d7db630
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ class BillaProductsExtractor : UrlProductsExtractor {
"\\*+".toRegex(IGNORE_CASE),
"Специфика\\s*:.*$".toRegex(IGNORE_CASE),
"Супер цена".toRegex(IGNORE_CASE),
"МУЛТИ ПАК \\d+\\+\\d+".toRegex(IGNORE_CASE),
"Цена\\s+за\\s+\\d+\\s*бр\\.\\s*((без|с)\\s+отстъпка)?\\s*(\\d|\\.)+\\s*лв\\."
.toRegex(IGNORE_CASE)
"(?<=МУЛТИ ПАК.+)\\d+(х|x)".toRegex(IGNORE_CASE),
"МУЛТИ ПАК (\\d+\\+\\d+)?".toRegex(IGNORE_CASE),
"(\\d+(х|x)\\s*)?Цена\\s+за\\s+\\d+\\s*бр\\.\\s*((без|с)\\s+отстъпка)?\\s*(\\d|\\.|,)+\\s*лв\\.".toRegex(
IGNORE_CASE
)
)

private val regexesToDeleteBilla =
Expand Down
40 changes: 40 additions & 0 deletions src/test/resources/extractors/billa/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -1948,5 +1948,45 @@
"picUrl": null,
"validFrom": "2021-03-18",
"validUntil": "2021-03-24"
},
{
"name": "Бира Tuborg кен",
"quantity": "0.5л",
"price": 1.33,
"oldPrice": 2.19,
"category": null,
"picUrl": null,
"validFrom": "2021-03-18",
"validUntil": "2021-03-24"
},
{
"name": "Вино Montado",
"quantity": "0.75л",
"price": 7.99,
"oldPrice": 10.99,
"category": null,
"picUrl": null,
"validFrom": "2021-03-18",
"validUntil": "2021-03-24"
},
{
"name": "Уиски Johnnie Walker Red Label",
"quantity": "0.7л",
"price": 18.99,
"oldPrice": 28.99,
"category": null,
"picUrl": null,
"validFrom": "2021-03-18",
"validUntil": "2021-03-24"
},
{
"name": "Бира Starobrno кен",
"quantity": "0.5л",
"price": 1.225,
"oldPrice": 2.45,
"category": null,
"picUrl": null,
"validFrom": "2021-03-18",
"validUntil": "2021-03-24"
}
]
44 changes: 44 additions & 0 deletions src/test/resources/extractors/billa/input.html
Original file line number Diff line number Diff line change
Expand Up @@ -3695,6 +3695,50 @@
<div style="float:left; width:15%">
<span class="price">13.99</span><span class="currency">лв.</span> </div>
</div>
<div class="product">
<div class="actualProduct" style="float:left; width:25%; margin-right:2%;">МУЛТИ ПАК Бира Tuborg кен 12х0.5л Цена за 1 бр. 2,19 лв.
Цена за 12 бр. без отстъпка 26,28 лв.</div>
<div class="priceText" style="float:left; width:10%;">СТАРА<br>ЦЕНА</div>
<div style="float:left; width:22%">
<span class="price">2.19</span> <span class="currency">лв.</span> </div>
<div class="priceText" style="float:left; width:10%;">НОВА<br>ЦЕНА</div>
<div class="discount" style="float:left; width:10%;"> - -39%</div>
<div style="float:left; width:15%">
<span class="price">1.33</span><span class="currency">лв.</span> </div>
</div>
<div class="product">
<div class="actualProduct" style="float:left; width:25%; margin-right:2%;">МУЛТИ ПАК Вино Montado 2x0.75л Цена за 1 бр. 10,99 лв.
Цена за 2 бр. без отстъпка 21,98 лв.</div>
<div class="priceText" style="float:left; width:10%;">СТАРА<br>ЦЕНА</div>
<div style="float:left; width:22%">
<span class="price">10.99</span> <span class="currency">лв.</span> </div>
<div class="priceText" style="float:left; width:10%;">НОВА<br>ЦЕНА</div>
<div class="discount" style="float:left; width:10%;"> - -27%</div>
<div style="float:left; width:15%">
<span class="price">7.99</span><span class="currency">лв.</span> </div>
</div>
<div class="product">
<div class="actualProduct" style="float:left; width:25%; margin-right:2%;">МУЛТИ ПАК 1+1 Уиски Johnnie Walker Red Label 2x0.7л Цена за 1 бр. 28,99 лв.
Цена за 2 бр. без отстъпка 57,98 лв.</div>
<div class="priceText" style="float:left; width:10%;">СТАРА<br>ЦЕНА</div>
<div style="float:left; width:22%">
<span class="price">28.99</span> <span class="currency">лв.</span> </div>
<div class="priceText" style="float:left; width:10%;">НОВА<br>ЦЕНА</div>
<div class="discount" style="float:left; width:10%;"> - -34%</div>
<div style="float:left; width:15%">
<span class="price">18.99</span><span class="currency">лв.</span> </div>
</div>
<div class="product">
<div class="actualProduct" style="float:left; width:25%; margin-right:2%;">МУЛТИ ПАК 1+1 Бира Starobrno кен 0.5л Цена за 1 бр. 2,45 лв.
Цена за 2 бр. без отстъпка 4,90 лв.</div>
<div class="priceText" style="float:left; width:10%;">СТАРА<br>ЦЕНА</div>
<div style="float:left; width:22%">
<span class="price">2.45</span> <span class="currency">лв.</span> </div>
<div class="priceText" style="float:left; width:10%;">НОВА<br>ЦЕНА</div>
<div class="discount" style="float:left; width:10%;"> - -50%</div>
<div style="float:left; width:15%">
<span class="price">1.225</span><span class="currency">лв.</span> </div>
</div>
</div>
</div>

Expand Down

0 comments on commit d7db630

Please sign in to comment.