Skip to content

Commit

Permalink
ISO 3166-1 support for countries
Browse files Browse the repository at this point in the history
  • Loading branch information
tuqqu committed Aug 28, 2023
1 parent 37358e8 commit 91dd7f4
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 94 deletions.
18 changes: 7 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,33 @@ $detector->getGender('Avery');
// Gender::Unisex
```

Internationalization (I18N) is fully supported:
I18N is fully supported:

```php
$detector->getGender('Želmíra');
// Gender::Female

$detector->getGender('Geirþrúður');
// Gender::Female
```

You can also specify a country or region:

```php
use GenderDetector\Country;

$detector->getGender('Robin');
// Gender::MostlyMale

$detector->getGender('Robin', Country::Usa);
$detector->getGender('Robin', 'US');
// Gender::MostlyFemale

$detector->getGender('Robin', Country::France);
$detector->getGender('Robin', 'FR');
// Gender::Male

$detector->getGender('Robin', Country::Ireland);
$detector->getGender('Robin', 'IE');
// Gender::Unisex
```

For more details about countries see [country list](/doc/country_list.md).
Country codes are case-insensitive ISO 3166-1 alpha-2 codes.
Alternatively, you can utilize enum values; the complete list of values and codes can be found [here](/src/Country.php).

Full list of all the possible values are:
Full list of all the possible gender values are:

```php
enum Gender
Expand Down
5 changes: 1 addition & 4 deletions bin/get-gender
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,7 @@ function parse_args(array $args): array
switch (true) {
case str_starts_with($arg, '--country'):
$country = parse_option($arg);
$country = Country::tryFrom($country);
if ($country === null) {
print_error_and_exit(sprintf('unrecognised country "%s"', $country));
}
$country = Country::fromISO3166($country);
break;
case str_starts_with($arg, '--help'):
print_help();
Expand Down
15 changes: 0 additions & 15 deletions doc/country_list.md

This file was deleted.

199 changes: 137 additions & 62 deletions src/Country.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,67 +4,142 @@

namespace GenderDetector;

enum Country: string
use function mb_strtoupper;

enum Country
{
case GreatBritain = 'great_britain';
case Ireland = 'ireland';
case Usa = 'usa';
case Italy = 'italy';
case Malta = 'malta';
case Portugal = 'portugal';
case Spain = 'spain';
case France = 'france';
case Belgium = 'belgium';
case Luxembourg = 'luxembourg';
case TheNetherlands = 'the_netherlands';
case EastFrisia = 'east_frisia';
case Germany = 'germany';
case Austria = 'austria';
case Swiss = 'swiss';
case Iceland = 'iceland';
case Denmark = 'denmark';
case Norway = 'norway';
case Sweden = 'sweden';
case Finland = 'finland';
case Estonia = 'estonia';
case Latvia = 'latvia';
case Lithuania = 'lithuania';
case Poland = 'poland';
case CzechRepublic = 'czech_republic';
case Slovakia = 'slovakia';
case Hungary = 'hungary';
case Romania = 'romania';
case Bulgaria = 'bulgaria';
case BosniaAndHerzegovina = 'bosnia_and_herzegovina';
case Croatia = 'croatia';
case Kosovo = 'kosovo';
case NorthMacedonia = 'north_macedonia';
case Montenegro = 'montenegro';
case Serbia = 'serbia';
case Slovenia = 'slovenia';
case Albania = 'albania';
case Greece = 'greece';
case Russia = 'russia';
case Belarus = 'belarus';
case Moldova = 'moldova';
case Ukraine = 'ukraine';
case Armenia = 'armenia';
case Azerbaijan = 'azerbaijan';
case Georgia = 'georgia';
case Kazakhstan = 'kazakhstan';
case Kyrgyzstan = 'kyrgyzstan';
case Tajikistan = 'tajikistan';
case Turkmenistan = 'turkmenistan';
case Uzbekistan = 'uzbekistan';
case Turkey = 'turkey';
case Arabia = 'arabia';
case Iran = 'iran';
case Israel = 'israel';
case China = 'china';
case India = 'india';
case SriLanka = 'sri_lanka';
case Japan = 'japan';
case Korea = 'korea';
case Vietnam = 'vietnam';
case OtherCountries = 'other_countries';
case GreatBritain;
case Ireland;
case Usa;
case Italy;
case Malta;
case Portugal;
case Spain;
case France;
case Belgium;
case Luxembourg;
case TheNetherlands;
case EastFrisia;
case Germany;
case Austria;
case Swiss;
case Iceland;
case Denmark;
case Norway;
case Sweden;
case Finland;
case Estonia;
case Latvia;
case Lithuania;
case Poland;
case CzechRepublic;
case Slovakia;
case Hungary;
case Romania;
case Bulgaria;
case BosniaAndHerzegovina;
case Croatia;
case Kosovo;
case NorthMacedonia;
case Montenegro;
case Serbia;
case Slovenia;
case Albania;
case Greece;
case Russia;
case Belarus;
case Moldova;
case Ukraine;
case Armenia;
case Azerbaijan;
case Georgia;
case Kazakhstan;
case Kyrgyzstan;
case Tajikistan;
case Turkmenistan;
case Uzbekistan;
case Turkey;
case Arabia;
case Iran;
case Israel;
case China;
case India;
case SriLanka;
case Japan;
case Korea;
case Vietnam;
case OtherCountries;

public static function fromISO3166(string $iso3166): self
{
return match (mb_strtoupper($iso3166)) {
'GB' => self::GreatBritain,
'IE' => self::Ireland,
'AU',
'CA',
'US' => self::Usa,
'IT' => self::Italy,
'MT' => self::Malta,
'PT' => self::Portugal,
'ES' => self::Spain,
'FR' => self::France,
'BE' => self::Belgium,
'LU' => self::Luxembourg,
'NL' => self::TheNetherlands,
'DE' => self::Germany,
'AT' => self::Austria,
'CH' => self::Swiss,
'IS' => self::Iceland,
'DK' => self::Denmark,
'NO' => self::Norway,
'SE' => self::Sweden,
'FI' => self::Finland,
'EE' => self::Estonia,
'LV' => self::Latvia,
'LT' => self::Lithuania,
'PL' => self::Poland,
'CZ' => self::CzechRepublic,
'SK' => self::Slovakia,
'HU' => self::Hungary,
'RO' => self::Romania,
'BG' => self::Bulgaria,
'BA' => self::BosniaAndHerzegovina,
'HR' => self::Croatia,
'XK' => self::Kosovo,
'MK' => self::NorthMacedonia,
'ME' => self::Montenegro,
'RS' => self::Serbia,
'SI' => self::Slovenia,
'AL' => self::Albania,
'GR' => self::Greece,
'RU' => self::Russia,
'BY' => self::Belarus,
'MD' => self::Moldova,
'UA' => self::Ukraine,
'AM' => self::Armenia,
'AZ' => self::Azerbaijan,
'GE' => self::Georgia,
'KZ' => self::Kazakhstan,
'KG' => self::Kyrgyzstan,
'TJ' => self::Tajikistan,
'TM' => self::Turkmenistan,
'UZ' => self::Uzbekistan,
'TR' => self::Turkey,
'AE',
'QA',
'SA',
'BH',
'EG' => self::Arabia,
'CN',
'HK',
'TW' => self::China,
'IN' => self::India,
'JP' => self::Japan,
'KP',
'KR' => self::Korea,
'VN' => self::Vietnam,
'LK' => self::SriLanka,
default => self::OtherCountries,
};
}
}
7 changes: 6 additions & 1 deletion src/GenderDetector.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
use function hexdec;
use function mb_strtolower;
use function str_replace;
use function is_string;

final class GenderDetector
{
Expand All @@ -29,8 +30,12 @@ public function addDictionaryFile(string $dictPath): void
$this->consumeDictFile($dictPath);
}

public function getGender(string $name, ?Country $country = null): ?Gender
public function getGender(string $name, string|Country|null $country = null): ?Gender
{
if (is_string($country)) {
$country = Country::fromISO3166($country);
}

$name = self::sanitizeName($name);

if (!isset($this->names[$name])) {
Expand Down
17 changes: 16 additions & 1 deletion tests/GenderDetectorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
final class GenderDetectorTest extends TestCase
{
#[DataProvider('provideNameData')]
public function testGetGender(string $name, ?Country $region, Gender $expectedGender): void
public function testGetGender(string $name, string|Country|null $region, Gender $expectedGender): void
{
$detector = new GenderDetector();
$gender = $detector->getGender($name, $region);
Expand Down Expand Up @@ -78,6 +78,21 @@ public static function provideNameData(): array
Country::Ireland,
Gender::Unisex,
],
[
'Robin',
'US',
Gender::MostlyFemale,
],
[
'Robin',
'fr',
Gender::Male,
],
[
'Robin',
'IE',
Gender::Unisex,
],
[
'Geirþrúður',
null,
Expand Down

0 comments on commit 91dd7f4

Please sign in to comment.