Skip to content

Commit

Permalink
Support GSM National Language Shift Tables (#11)
Browse files Browse the repository at this point in the history
Support for GSM Language Shift Tables, information can be found here: en.wikipedia.org/wiki/GSM_03.38

Added support for Turkish, Spanish and Portuguese only, should be fairly easy to extended for other languages
  • Loading branch information
juliangut authored May 27, 2019
1 parent bf83f86 commit ccdbf6d
Show file tree
Hide file tree
Showing 3 changed files with 251 additions and 12 deletions.
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use Instasent\SMSCounter\SMSCounter;

$smsCounter = new SMSCounter();
$smsCounter->count('some-string-to-be-counted');
$smsCounter->countWithShiftTables('some-string-to-be-counted');
```

which returns
Expand All @@ -26,7 +27,7 @@ stdClass Object
)
```

You can sanitize your text to be a valid GSM 03.38 charset
You can sanitize your text to be a valid strict GSM 03.38 charset

```php
use Instasent\SMSCounter\SMSCounter;
Expand All @@ -35,6 +36,15 @@ $smsCounter = new SMSCounter();
$smsCounter->sanitizeToGSM('dadáó'); //return dadao
```

#### National Language Shift Tables

Starting release 8 of GSM 03.38 some additional charsets are allowed. This is the list of such National Language Shift Tables currently supported

- [Turkish](https://en.wikipedia.org/wiki/GSM_03.38#Turkish_language_(Latin_script))
- [Spanish](https://en.wikipedia.org/wiki/GSM_03.38#Spanish_language_(Latin_script))
- [Portuguese](https://en.wikipedia.org/wiki/GSM_03.38#Portuguese_language_(Latin_script))


## Installation

`sms-counter-php` is available via [composer](http://getcomposer.org) on [packagist](https://packagist.org/packages/instasent/sms-counter-php).
Expand Down
178 changes: 167 additions & 11 deletions SMSCounter.php
Original file line number Diff line number Diff line change
Expand Up @@ -105,18 +105,110 @@ public function getGsm7bitExMap()
);
}

public function getTurkishGsm7bitMap()
{
return [
10, 12, 13, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52,
53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68,
69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92,
93, 94, 95, 97, 98, 99, 100, 101,
102, 103, 104, 105, 106, 107, 108,
109, 110, 111, 112, 113, 114, 115,
116, 117, 118, 119, 120, 121, 122,
123, 124, 125, 126, 163, 164, 165,
167, 196, 197, 199, 201, 209, 214,
220, 223, 224, 228, 229, 231, 233,
241, 242, 246, 249, 252, 286, 287,
304, 305, 350, 351, 915, 916, 920,
923, 926, 928, 931, 934, 936, 937,
8364,
];
}

public function getAddedTurkishGsm7bitExMap()
{
return [12, 91, 92, 93, 94, 123, 124, 125, 126, 286, 287, 304, 305, 350, 351, 8364];
}

public function getAddedSpanishGsm7bitExMap()
{
return [12, 91, 92, 93, 94, 123, 124, 125, 126, 193, 205, 211, 218, 225, 231, 237, 243, 250, 8364];
}

public function getPortugueseGsm7bitMap()
{
return [
10, 12, 13, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52,
53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68,
69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92,
93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108,
109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124,
125, 126, 163, 165, 167, 170, 186, 192,
193, 194, 195, 199, 201, 202, 205, 211,
212, 213, 218, 220, 224, 225, 226, 227,
231, 233, 234, 237, 242, 243, 244, 245,
250, 252, 915, 916, 920, 928, 931, 934,
936, 937, 8364, 8734,
];
}

public function getAddedPortugueseGsm7bitExMap()
{
return [
12, 91, 92, 93, 94, 123, 124, 125,
126, 193, 194, 195, 202, 205, 211, 212,
213, 218, 225, 226, 227, 231, 234, 237,
242, 243, 245, 250, 915, 920, 928, 931,
934, 936, 937, 8364,
];
}

/**
* Detects the encoding, Counts the characters, message length, remaining characters.
*
* @return \stdClass Object with params encoding,length, per_message, remaining, messages
*/
public function count($text)
{
return $this->doCount($text, false);
}

/**
* Detects the encoding, Counts the characters, message length, remaining characters.
* Supports language shift tables characters.
*
* @return \stdClass Object with params encoding,length, per_message, remaining, messages
*/
public function countWithShiftTables($text)
{
return $this->doCount($text, true);
}

/**
* @return \stdClass Object with params encoding,length, per_message, remaining, messages
*/
private function doCount($text, $supportShiftTables)
{
$unicodeArray = $this->utf8ToUnicode($text);

// variable to catch if any ex chars while encoding detection.
$exChars = [];
$encoding = $this->detectEncoding($unicodeArray, $exChars);
$encoding = $supportShiftTables
? $this->detectEncodingWithShiftTables($text, $exChars)
: $this->detectEncoding($text, $exChars);

$length = count($unicodeArray);

if ($encoding === self::GSM_7BIT_EX) {
Expand Down Expand Up @@ -173,17 +265,56 @@ public function count($text)
public function detectEncoding($text, &$exChars)
{
if (!is_array($text)) {
$text = self::utf8ToUnicode($text);
$text = $this->utf8ToUnicode($text);
}

$utf16Chars = array_diff($text, $this->getGsm7bitExMap());

if (count($utf16Chars)) {
return self::UTF16;
}

$exChars = array_intersect($text, $this->getAddedGsm7bitExMap());
if (count($exChars)) {
return self::GSM_7BIT_EX;
}

return self::GSM_7BIT;
}

/**
* Detects the encoding of a particular text.
* Supports language shift tables characters.
*
* @return string (GSM_7BIT|GSM_7BIT_EX|UTF16)
*/
public function detectEncodingWithShiftTables($text, &$exChars)
{
if (!is_array($text)) {
$text = $this->utf8ToUnicode($text);
}

$gsmCharMap = array_merge(
$this->getGsm7bitExMap(),
$this->getTurkishGsm7bitMap(),
$this->getAddedTurkishGsm7bitExMap(),
$this->getAddedSpanishGsm7bitExMap(),
$this->getPortugueseGsm7bitMap(),
$this->getAddedPortugueseGsm7bitExMap()
);

$utf16Chars = array_diff($text, $gsmCharMap);
if (count($utf16Chars)) {
return self::UTF16;
}

$addedGsmCharMap = array_merge(
$this->getAddedGsm7bitExMap(),
$this->getAddedTurkishGsm7bitExMap(),
$this->getAddedSpanishGsm7bitExMap(),
$this->getAddedPortugueseGsm7bitExMap()
);

$exChars = array_intersect($text, $addedGsmCharMap);
if (count($exChars)) {
return self::GSM_7BIT_EX;
}
Expand Down Expand Up @@ -296,7 +427,7 @@ public function removeNonGsmChars($str)
public function replaceNonGsmChars($str, $replacement = null)
{
$validChars = $this->getGsm7bitExMap();
$allChars = self::utf8ToUnicode($str);
$allChars = $this->utf8ToUnicode($str);

if (strlen($replacement) > 1) {
return false;
Expand Down Expand Up @@ -525,27 +656,50 @@ public function removeAccents($str)
* the encoding an multipart limits to apply the truncate.
*
* @param string $str Message text
* @param int $messages Number of SMS allowed
* @param int $limitSms Number of SMS allowed
*
* @return string Truncated message
*/
public function truncate($str, $limitSms)
{
$count = $this->count($str);
return $this->doTruncate($str, $limitSms, false);
}

/**
* Truncated to the limit of chars allowed by number of SMS. It will detect
* the encoding an multipart limits to apply the truncate.
* Supports language shift tables characters.
*
* @param string $str Message text
* @param int $limitSms Number of SMS allowed
*
* @return string Truncated message
*/
public function truncateWithShiftTables($str, $limitSms)
{
return $this->doTruncate($str, $limitSms, true);
}

/**
* @return string Truncated message
*/
private function doTruncate($str, $limitSms, $supportShiftTables)
{
$count = $supportShiftTables
? $this->countWithShiftTables($str)
: $this->count($str);

if ($count->messages <= $limitSms) {
return $str;
}

if ($count->encoding == 'UTF16') {
if ($count->encoding === 'UTF16') {
$limit = self::UTF16_LEN;

if ($limitSms > 2) {
$limit = self::UTF16_LEN_MULTIPART;
}
}

if ($count->encoding != 'UTF16') {
} else {
$limit = self::GSM_7BIT_LEN;

if ($limitSms > 2) {
Expand All @@ -555,7 +709,9 @@ public function truncate($str, $limitSms)

do {
$str = mb_substr($str, 0, $limit * $limitSms);
$count = $this->count($str);
$count = $supportShiftTables
? $this->countWithShiftTables($str)
: $this->count($str);

$limit = $limit - 1;
} while ($count->messages > $limitSms);
Expand Down
73 changes: 73 additions & 0 deletions Tests/SMSCounterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,57 @@ public function testGSM()
$this->assertEquals($expected, $count);
}

public function testGSM_TR()
{
$text = 'a GSM TR ç Text';

$smsCounter = new SMSCounter();
$count = $smsCounter->countWithShiftTables($text);

$expected = new \stdClass();
$expected->encoding = SMSCounter::GSM_7BIT_EX;
$expected->length = 16;
$expected->per_message = 160;
$expected->remaining = 144;
$expected->messages = 1;

$this->assertEquals($expected, $count);
}

public function testGSM_ES()
{
$text = 'a GSM ES Ú Text';

$smsCounter = new SMSCounter();
$count = $smsCounter->countWithShiftTables($text);

$expected = new \stdClass();
$expected->encoding = SMSCounter::GSM_7BIT_EX;
$expected->length = 16;
$expected->per_message = 160;
$expected->remaining = 144;
$expected->messages = 1;

$this->assertEquals($expected, $count);
}

public function testGSM_PT()
{
$text = 'a GSM PT Ã Text';

$smsCounter = new SMSCounter();
$count = $smsCounter->countWithShiftTables($text);

$expected = new \stdClass();
$expected->encoding = SMSCounter::GSM_7BIT_EX;
$expected->length = 16;
$expected->per_message = 160;
$expected->remaining = 144;
$expected->messages = 1;

$this->assertEquals($expected, $count);
}

public function testGSMSymbols()
{
$text = 'a GSM +Text';
Expand Down Expand Up @@ -162,6 +213,17 @@ public function testTruncate1SmsGSM7()
$this->assertEquals($expectedTExt, $output);
}

public function testTruncate1SmsGSM7ShiftTable()
{
$text = 'ÚLorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem.';
$expectedTExt = 'ÚLorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturien';

$smsCounter = new SMSCounter();
$output = $smsCounter->truncateWithShiftTables($text, 1);

$this->assertEquals($expectedTExt, $output);
}

public function testTruncate2SmsGSM7()
{
$text = 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient';
Expand All @@ -173,6 +235,17 @@ public function testTruncate2SmsGSM7()
$this->assertEquals($expectedTExt, $output);
}

public function testTruncate2SmsGSM7ShiftTable()
{
$text = 'çLorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturie';
$expectedTExt = 'çLorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magni';

$smsCounter = new SMSCounter();
$output = $smsCounter->truncateWithShiftTables($text, 2);

$this->assertEquals($expectedTExt, $output);
}

public function testTruncate1SmsUnicode()
{
$text = 'Snowman shows off! ☃ Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa';
Expand Down

0 comments on commit ccdbf6d

Please sign in to comment.