From 91dd7f40918e9acb7c5af0e0d5bdbc8198014af1 Mon Sep 17 00:00:00 2001 From: tuqqu Date: Tue, 29 Aug 2023 00:57:52 +0200 Subject: [PATCH] ISO 3166-1 support for countries --- README.md | 18 ++-- bin/get-gender | 5 +- doc/country_list.md | 15 --- src/Country.php | 199 ++++++++++++++++++++++++----------- src/GenderDetector.php | 7 +- tests/GenderDetectorTest.php | 17 ++- 6 files changed, 167 insertions(+), 94 deletions(-) delete mode 100644 doc/country_list.md diff --git a/README.md b/README.md index 21b8c4d..5e0a123 100644 --- a/README.md +++ b/README.md @@ -26,12 +26,9 @@ $detector->getGender('Avery'); // Gender::Unisex ``` -Internationalization (I18N) is fully supported: +I18N is fully supported: ```php -$detector->getGender('Želmíra'); -// Gender::Female - $detector->getGender('Geirþrúður'); // Gender::Female ``` @@ -39,24 +36,23 @@ $detector->getGender('Geirþrúður'); You can also specify a country or region: ```php -use GenderDetector\Country; - $detector->getGender('Robin'); // Gender::MostlyMale -$detector->getGender('Robin', Country::Usa); +$detector->getGender('Robin', 'US'); // Gender::MostlyFemale -$detector->getGender('Robin', Country::France); +$detector->getGender('Robin', 'FR'); // Gender::Male -$detector->getGender('Robin', Country::Ireland); +$detector->getGender('Robin', 'IE'); // Gender::Unisex ``` -For more details about countries see [country list](/doc/country_list.md). +Country codes are case-insensitive ISO 3166-1 alpha-2 codes. +Alternatively, you can utilize enum values; the complete list of values and codes can be found [here](/src/Country.php). -Full list of all the possible values are: +Full list of all the possible gender values are: ```php enum Gender diff --git a/bin/get-gender b/bin/get-gender index 18f76ce..d937451 100755 --- a/bin/get-gender +++ b/bin/get-gender @@ -53,10 +53,7 @@ function parse_args(array $args): array switch (true) { case str_starts_with($arg, '--country'): $country = parse_option($arg); - $country = Country::tryFrom($country); - if ($country === null) { - print_error_and_exit(sprintf('unrecognised country "%s"', $country)); - } + $country = Country::fromISO3166($country); break; case str_starts_with($arg, '--help'): print_help(); diff --git a/doc/country_list.md b/doc/country_list.md deleted file mode 100644 index 80b9c66..0000000 --- a/doc/country_list.md +++ /dev/null @@ -1,15 +0,0 @@ -## Country List - -The original list of countries/regions introduced by the 'gender.c' program remains almost unchanged. - -Some adjustments were made, such as separating grouped countries into aliases: - - `india` and `sri_lanka` are aliases. - - `kazakhstan`, `kyrgyzstan`, `tajikistan`, `turkmenistan`, `uzbekistan` are grouped. - -Some country names were changed: - - `macedonia` to `north_macedonia`. - - `persia` to `iran`. - -Note that some names are not actual countries (e.g., `arabia` or `east_frisia`), but they are still included in the list. - -You can view the full list of countries [here](/src/Country.php). diff --git a/src/Country.php b/src/Country.php index 873e4fe..ce74153 100644 --- a/src/Country.php +++ b/src/Country.php @@ -4,67 +4,142 @@ namespace GenderDetector; -enum Country: string +use function mb_strtoupper; + +enum Country { - case GreatBritain = 'great_britain'; - case Ireland = 'ireland'; - case Usa = 'usa'; - case Italy = 'italy'; - case Malta = 'malta'; - case Portugal = 'portugal'; - case Spain = 'spain'; - case France = 'france'; - case Belgium = 'belgium'; - case Luxembourg = 'luxembourg'; - case TheNetherlands = 'the_netherlands'; - case EastFrisia = 'east_frisia'; - case Germany = 'germany'; - case Austria = 'austria'; - case Swiss = 'swiss'; - case Iceland = 'iceland'; - case Denmark = 'denmark'; - case Norway = 'norway'; - case Sweden = 'sweden'; - case Finland = 'finland'; - case Estonia = 'estonia'; - case Latvia = 'latvia'; - case Lithuania = 'lithuania'; - case Poland = 'poland'; - case CzechRepublic = 'czech_republic'; - case Slovakia = 'slovakia'; - case Hungary = 'hungary'; - case Romania = 'romania'; - case Bulgaria = 'bulgaria'; - case BosniaAndHerzegovina = 'bosnia_and_herzegovina'; - case Croatia = 'croatia'; - case Kosovo = 'kosovo'; - case NorthMacedonia = 'north_macedonia'; - case Montenegro = 'montenegro'; - case Serbia = 'serbia'; - case Slovenia = 'slovenia'; - case Albania = 'albania'; - case Greece = 'greece'; - case Russia = 'russia'; - case Belarus = 'belarus'; - case Moldova = 'moldova'; - case Ukraine = 'ukraine'; - case Armenia = 'armenia'; - case Azerbaijan = 'azerbaijan'; - case Georgia = 'georgia'; - case Kazakhstan = 'kazakhstan'; - case Kyrgyzstan = 'kyrgyzstan'; - case Tajikistan = 'tajikistan'; - case Turkmenistan = 'turkmenistan'; - case Uzbekistan = 'uzbekistan'; - case Turkey = 'turkey'; - case Arabia = 'arabia'; - case Iran = 'iran'; - case Israel = 'israel'; - case China = 'china'; - case India = 'india'; - case SriLanka = 'sri_lanka'; - case Japan = 'japan'; - case Korea = 'korea'; - case Vietnam = 'vietnam'; - case OtherCountries = 'other_countries'; + case GreatBritain; + case Ireland; + case Usa; + case Italy; + case Malta; + case Portugal; + case Spain; + case France; + case Belgium; + case Luxembourg; + case TheNetherlands; + case EastFrisia; + case Germany; + case Austria; + case Swiss; + case Iceland; + case Denmark; + case Norway; + case Sweden; + case Finland; + case Estonia; + case Latvia; + case Lithuania; + case Poland; + case CzechRepublic; + case Slovakia; + case Hungary; + case Romania; + case Bulgaria; + case BosniaAndHerzegovina; + case Croatia; + case Kosovo; + case NorthMacedonia; + case Montenegro; + case Serbia; + case Slovenia; + case Albania; + case Greece; + case Russia; + case Belarus; + case Moldova; + case Ukraine; + case Armenia; + case Azerbaijan; + case Georgia; + case Kazakhstan; + case Kyrgyzstan; + case Tajikistan; + case Turkmenistan; + case Uzbekistan; + case Turkey; + case Arabia; + case Iran; + case Israel; + case China; + case India; + case SriLanka; + case Japan; + case Korea; + case Vietnam; + case OtherCountries; + + public static function fromISO3166(string $iso3166): self + { + return match (mb_strtoupper($iso3166)) { + 'GB' => self::GreatBritain, + 'IE' => self::Ireland, + 'AU', + 'CA', + 'US' => self::Usa, + 'IT' => self::Italy, + 'MT' => self::Malta, + 'PT' => self::Portugal, + 'ES' => self::Spain, + 'FR' => self::France, + 'BE' => self::Belgium, + 'LU' => self::Luxembourg, + 'NL' => self::TheNetherlands, + 'DE' => self::Germany, + 'AT' => self::Austria, + 'CH' => self::Swiss, + 'IS' => self::Iceland, + 'DK' => self::Denmark, + 'NO' => self::Norway, + 'SE' => self::Sweden, + 'FI' => self::Finland, + 'EE' => self::Estonia, + 'LV' => self::Latvia, + 'LT' => self::Lithuania, + 'PL' => self::Poland, + 'CZ' => self::CzechRepublic, + 'SK' => self::Slovakia, + 'HU' => self::Hungary, + 'RO' => self::Romania, + 'BG' => self::Bulgaria, + 'BA' => self::BosniaAndHerzegovina, + 'HR' => self::Croatia, + 'XK' => self::Kosovo, + 'MK' => self::NorthMacedonia, + 'ME' => self::Montenegro, + 'RS' => self::Serbia, + 'SI' => self::Slovenia, + 'AL' => self::Albania, + 'GR' => self::Greece, + 'RU' => self::Russia, + 'BY' => self::Belarus, + 'MD' => self::Moldova, + 'UA' => self::Ukraine, + 'AM' => self::Armenia, + 'AZ' => self::Azerbaijan, + 'GE' => self::Georgia, + 'KZ' => self::Kazakhstan, + 'KG' => self::Kyrgyzstan, + 'TJ' => self::Tajikistan, + 'TM' => self::Turkmenistan, + 'UZ' => self::Uzbekistan, + 'TR' => self::Turkey, + 'AE', + 'QA', + 'SA', + 'BH', + 'EG' => self::Arabia, + 'CN', + 'HK', + 'TW' => self::China, + 'IN' => self::India, + 'JP' => self::Japan, + 'KP', + 'KR' => self::Korea, + 'VN' => self::Vietnam, + 'LK' => self::SriLanka, + default => self::OtherCountries, + }; + } } diff --git a/src/GenderDetector.php b/src/GenderDetector.php index 443e129..4db780c 100644 --- a/src/GenderDetector.php +++ b/src/GenderDetector.php @@ -11,6 +11,7 @@ use function hexdec; use function mb_strtolower; use function str_replace; +use function is_string; final class GenderDetector { @@ -29,8 +30,12 @@ public function addDictionaryFile(string $dictPath): void $this->consumeDictFile($dictPath); } - public function getGender(string $name, ?Country $country = null): ?Gender + public function getGender(string $name, string|Country|null $country = null): ?Gender { + if (is_string($country)) { + $country = Country::fromISO3166($country); + } + $name = self::sanitizeName($name); if (!isset($this->names[$name])) { diff --git a/tests/GenderDetectorTest.php b/tests/GenderDetectorTest.php index b110677..4afaff2 100644 --- a/tests/GenderDetectorTest.php +++ b/tests/GenderDetectorTest.php @@ -13,7 +13,7 @@ final class GenderDetectorTest extends TestCase { #[DataProvider('provideNameData')] - public function testGetGender(string $name, ?Country $region, Gender $expectedGender): void + public function testGetGender(string $name, string|Country|null $region, Gender $expectedGender): void { $detector = new GenderDetector(); $gender = $detector->getGender($name, $region); @@ -78,6 +78,21 @@ public static function provideNameData(): array Country::Ireland, Gender::Unisex, ], + [ + 'Robin', + 'US', + Gender::MostlyFemale, + ], + [ + 'Robin', + 'fr', + Gender::Male, + ], + [ + 'Robin', + 'IE', + Gender::Unisex, + ], [ 'Geirþrúður', null,