diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d5d4b6a --- /dev/null +++ b/.gitignore @@ -0,0 +1,60 @@ +.idea/ +.dccache +*.code-workspace + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Scrapy stuff: +.scrapy + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# Environments +venv/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mypy +.mypy_cache/ + diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000..96ef06e --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,6 @@ +# Contributors + +- Khuyagbaatar Batsuren +- Tatu Ylonen +- Adam Twardoch +- Wiktionary Contributors diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4602b75 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## Version 0.1 (development) + +- initial \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..f7b9ffa --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,361 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. + +------------------------------------------------------------------------------ + + MIT LICENSE + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..f466123 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include wiktra/wikt *.lua \ No newline at end of file diff --git a/README.md b/README.md index 7439dd8..864fe4f 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,31 @@ -# Wiktra - Python tool of Wiktionary Transliteration modules +# Wiktra - Transliteration tool using Wiktionary transliteration modules -Wiktra is a unicode transliteration tool as a python port of Wiktionary transliteration modules that are written in Lua by the Wiktionary linguists and developers. -https://en.wiktionary.org/wiki/Category:Transliteration_modules. Therefore, it offers the highest quality of rule-based transliterations. +**Wiktra** is a Unicode transliteration tool, written in Python. It’s available as the `wiktrapy` CLI app and the `wiktra` Python 3 module. -Currently, Wiktra supports 181 languages and its 60 orthographies, and the table in the supporting languages section displays their language name and corresponding 3-letter codes. This tool is used to build a large-scale, high quality cognate database, called [CogNet](https://github.com/kbatsuren/CogNet). +Internally, it uses transliteration modules [from Wiktionary](https://en.wiktionary.org/wiki/Category:Transliteration_modules). These modules are written in Lua by the Wiktionary linguists and developers. Therefore, Wiktra offers the highest quality of rule-based transliterations. -## Installation && Setup +This is version 2 of Wiktra, maintained by [Adam Twardoch](https://twardoch.github.io/). It’s based on [Wiktra](https://github.com/kbatsuren/wiktra/) by [Khuyagbaatar Batsuren](https://github.com/kbatsuren). + +Wiktra 2 supports nearly all of languages supported by Wiktionary, except Korean, Japanese and Thai. Wiktra 1 supported 181 languages and its 60 orthographies. Wiktra 2 currently has a legacy Python function which uses the language codes supplied by the original developer, and also lets you use Wiktionary’s codes directly. + +**This is work in progress**. + +## Installation + +### macOS + +In Terminal, `cd` to the main folder and run: + +``` +./install-mac.sh +python3 -m pip install --upgrade . +``` + +This will install `brew` if needed, the installs `lua`, `luarocks`, `lua-format`, `luajit` and `python3`. Finally, it installs the Python dependencies `lupa` and `pywikiapi`. + +### Other systems + +_This is from the original developer:_ As much as you want to use your favorite version of Python, it is recommended to employ 3.5 version on the grounds that the module utilizes lupa-1.8. Lupa enables Python to adopt functionalities of Lua language, in which most of the transliteration modules are written. @@ -27,27 +47,57 @@ Start your Python (3.5.x): $ python ``` -## Fixing LuaError: module 'wikt.mw' not found: -This error is raising due to lupa is not able to find a current directory. -Easiest way to fix this issue is as follows: - -In Windows you can see similar error: -![image](https://user-images.githubusercontent.com/50955407/111557773-4671bc80-87c8-11eb-909d-bbb028b8b4a4.png) +### Troubleshooting + +If you get `LuaError: module 'wikt.mw' not found`, try: + +- create a folder `lua` in `C:\ProgramData\Miniconda3\` +- copy the entire folder of wikt from this project and paste it into `C:\ProgramData\Miniconda3\lua` + +## Usage + +### Command-line -Here all you need to do is two things. - 1) create a folder 'lua' in 'C:\ProgramData\Miniconda3\' - 2) copy the entire folder of wikt from this project and paste it into 'C:\ProgramData\Miniconda3\lua' +```sh +wiktrapy -h +``` -It should be similar to Linux and MacOs. +``` +usage: wiktrapy [-h] [-t TEXT] [-i FILE] [-l LANG] [-s SCRIPT] [-v] [-V] -## Example +optional arguments: + -h, --help show this help message and exit + -t TEXT, --text TEXT + -i FILE, --input FILE + -l LANG, --lang LANG Input language as ISO 639-2 code + -s SCRIPT, --script SCRIPT + Input script as ISO 15924 code + -v, --verbose -v show progress, -vv show debug + -V, --version show version and exit +``` -After placing the package directory inside your project directory or paths subscribed by Python, just run the following: +Example: ```sh -from Wiktra import translite as tr +$ wiktrapy -t "Привет" -l ru -s Cyrl +Privet ``` -The package offers only function `translite` and it prompts you to provide two parameters (text, 639-2 code): + +### Python (new interface) + +```python +from wiktra.Wiktra import Transliterator +tr = Transliterator() +print(tr.tr("Привет", "ru", "Cyrl") +``` + +### Python (legacy `translite` function) + +```python +from wiktra.Wiktra import translite as tr +``` + +With the function `translite`, you need to provide the text and the lang code (see table below for reference): ```sh #mongolian script @@ -59,11 +109,8 @@ tr('हिंदी लिपि', 'hin') > hindee lipi ``` -## Example comparison with the state-of-the-art universal transliteration tools -![alt text](http://ukc.disi.unitn.it/wp-content/uploads/2019/08/comparison_wiktra.jpg) - -## Supporting Languages +#### Languages supported by the legacy `translite` function | | Language | iso-3 in use | wiktionary code | Supporting script | examples | @@ -260,12 +307,10 @@ tr('हिंदी लिपि', 'hin') | 189 | Yakut | sah | sah | Cyrillic | | | 190 | Modern Greek (new) | ell | el | Greek | [tests](https://en.wiktionary.org/wiki/Module:el-translit/testcases)| +## Updating -## License -This tool is available under the Creative Commons Attribution-ShareAlike License. Read more about this license from https://creativecommons.org/licenses/by-sa/3.0/. - +This tool an update its stored Wiktionary modules. See `wiktrapy_update -h` for details. -## Reference -For the acedemic use, please cite the following article: +## License -Khuyagbaatar Batsuren, Gabor Bella, and Fausto Giunchiglia – CogNet: A large-scale cognate database, Proceedings of The 57th Annual Meeting of the Association for Computational Linguistics (ACL), 2019. +This tool is available under the [GPLv2](./LICENSE) license. diff --git a/Wiktra.py b/Wiktra.py deleted file mode 100644 index afcd709..0000000 --- a/Wiktra.py +++ /dev/null @@ -1,220 +0,0 @@ -import os -os.chdir(os.path.dirname(__file__)) - -import lupa -from lupa import LuaRuntime - -lua = LuaRuntime(unpack_returned_tuples=True) -#lua.execute("dir = io.popen'cd':read'*l'") -#lua.execute('package.path = package.path .. ";" .. dir.."\\\?.lua"') -lua.execute("mw = require('wikt.mw')") -lua.execute("mw.text = require('wikt.mw-text')") -lua.execute("mw.ustring = require ('wikt.ustring.ustring')") - -lang_map = { - 'inc-mas': ('inc-mas',''), - 'amh' : ('ethi','Ethi'), - 'byn' : ('ethi','Ethi'), - 'sem-cha' : ('ethi','Ethi'), - 'gez' : ('ethi','Ethi'), - 'har' : ('ethi','Ethi'), - 'tig' : ('ethi','Ethi'), - 'tir' : ('ethi','Ethi'), - 'rus' : ('ru', 'Cyrl'), - 'cre' : ('cr', 'Cans'), - 'pka' : ('brah', 'Brah'), - 'kho' : ('brah', 'Brah'), - 'inc-mgd' : ('brah', 'Brah'), - 'pmh' : ('brah', 'Brah'), - 'oty' : ('brah', 'Brah'), - 'inc-psc' : ('brah', 'Brah'), - 'psu' : ('brah', 'Brah'), - 'inc-ash' : ('brah', 'Brah'), - 'xpr' : ('prti', 'Parthian'), - 'ete' : ('cprt', 'Cprt'), - 'ave' : ('avst', 'Avst'), - 'guj' : ('gu', 'Gujr'), - 'vgr' : ('gu', 'Gujr'), - 'kfr' : ('gu', 'Gujr'), - 'ykg' : ('ykg', 'Cyrl'), - 'ady' : ('ady', 'Cyrl'), - 'ava' : ('av', 'Cyrl'), - 'ukr' : ('uk', 'Cyrl'), - 'xal' : ('xal', 'Cyrl'), - 'tib' : ('bo', 'Tibt'), - 'bod' : ('bo', 'Tibt'), - 'heb' : ('he', 'Hebr'), - 'mya' : ('my', 'Mymr'), - 'rue' : ('rue', 'Cyrl'), - 'kdr' : ('kdr', 'Cyrl'), - 'sin' : ('si', 'Sinh'), - 'kum' : ('kum', 'Cyrl'), - 'arm' : ('armn', 'Armn'), - 'bud' : ('bdk', 'Cyrl'), - 'dng' : ('dng', 'Cyrl'), - 'urd' : ('ur', 'ur-Arab'), - 'got' : ('goth', 'Goth'), - 'qwm' : ('armn', 'Armn'), - 'ben' : ('bn', 'Beng'), - 'lif' : ('lif', 'Limb'), - 'pra' : ('inc-ash', 'Brah'), - 'mas' : ('inc-mas', 'Beng'), - 'tha' : ('th', 'Thai'), - 'lep' : ('lep', 'Lepc'), - 'eng' : ('en', 'Latn'), - 'abq' : ('abq', 'Cyrl'), - 'bua' : ('bua', 'Cyrl'), - 'lez' : ('lez', 'Cyrl'), - 'mon' : ('mon', 'Cyrl'), - 'uig' : ('ug', 'ug-Arab'), - 'kjj' : ('kjj', 'Cyrl'), - 'chr' : ('cher', 'Cher'), - 'bho' : ('bho', 'Deva'), - 'new' : ('new', 'Deva'), - 'hin' : ('hi', 'Deva'), - 'awa' : ('hi', 'Deva'), - 'bfy' : ('hi', 'Deva'), - 'bhd' : ('hi', 'Deva'), - 'kfs' : ('hi', 'Deva'), - 'bra' : ('hi', 'Deva'), - 'bns' : ('hi', 'Deva'), - 'cdh' : ('hi', 'Deva'), - 'cdj' : ('hi', 'Deva'), - 'doi-d' : ('hi', 'Deva'), - 'gbk' : ('hi', 'Deva'), - 'gbm' : ('hi', 'Deva'), - 'bgc' : ('hi', 'Deva'), - 'xnr' : ('hi', 'Deva'), - 'kfx' : ('hi', 'Deva'), - 'bfz' : ('hi', 'Deva'), - 'mjl' : ('hi', 'Deva'), - 'mwr' : ('hi', 'Deva'), - 'mtr' : ('hi', 'Deva'), - 'unr' : ('hi', 'Deva'), - 'bpx' : ('hi', 'Deva'), - 'pgg' : ('hi', 'Deva'), - 'mal' : ('ml', 'Mlym'), - 'pal-m' : ('mani', 'Mani'), - 'xpr' : ('mani', 'Mani'), - 'sog' : ('mani', 'Mani'), - 'mdf' : ('mdf', 'Cyrl'), - 'sat' : ('sat', 'Olck'), - 'tel' : ('te', 'Telu'), - 'mkd' : ('mk', 'Cyrl'), - 'sjd' : ('sjd', 'Cyrl'), - 'kbd' : ('kbd', 'Cyrl'), - 'abk' : ('ab', 'Cyrl'), - 'kor' : ('ko', 'Kore'), - 'mns' : ('mns', 'Cyrl'), - 'kca' : ('kca', 'Cyrl'), - 'inc-ash-k' : ('inc-ash-k', 'Khar'), - 'pgd-k' : ('pgd-k', 'Khar'), - 'bel' : ('be', 'Cyrl'), - 'koi' : ('kv', 'Cyrl'), - 'kpv' : ('kv', 'Cyrl'), - 'cop' : ('copt', 'Copt'), - 'kmr' : ('kmr', 'Cyrl'), - 'lbe' : ('lbe', 'Cyrl'), - 'lak' : ('lbe', 'Cyrl'), - 'ita-old' : ('ital', 'Ital'), - 'lao' : ('lo', 'Laoo'), - 'mar' : ('mr', 'Deva'), - 'kok' : ('mr', 'Deva'), - 'udi' : ('udi', 'Cyrl'), - 'eth' : ('ethi', 'Ethi'), - 'gmy' : ('linb', 'Linb'), - 'iku' : ('iu', 'Cans'), - 'myv' : ('myv', 'Cyrl'), - 'kan' : ('kn', 'Knda'), - 'tam' : ('ta', 'Taml'), - 'kaa' : ('kaa', 'Cyrl'), - 'bul' : ('bg', 'Cyrl'), - 'jav' : ('jv', 'Javn'), - 'oss' : ('os', 'Cyrl'), - 'che' : ('ce', 'Cyrl'), - 'kas' : ('ks', 'ks-Arab'), - 'kas-d' : ('ks-deva', 'ks-Deva'), - 'pal-p' : ('phli', 'Phli'), - 'sah' : ('sah', 'Cyrl'), - 'chm' : ('chm', 'Cyrl'), - 'kaz' : ('kk', 'Cyrl'), - 'kjh' : ('kjh', 'Cyrl'), - 'fas' : ('fa', 'fa-Arab'), - 'iii' : ('ii', 'Yiii'), - 'tgk' : ('tg', 'Cyrl'), - 'wbl' : ('tg', 'Cyrl'), - 'yai' : ('tg', 'Cyrl'), - 'bak' : ('ba', 'Cyrl'), - 'krc' : ('krc', 'Cyrl'), - 'ori' : ('or', 'Orya'), - 'eve' : ('eve', 'Cyrl'), - 'khm' : ('km', 'Khmr'), - 'xbc' : ('xbc', 'Grek'), - 'txh' : ('el', 'Grek'), - 'ell' : ('el', 'Grek'), - 'san' : ('sa', 'Deva'), - 'inc-ohi' : ('sa', 'Deva'), - 'omr' : ('sa', 'Deva'), - 'inc-tak' : ('sa', 'Deva'), - 'inc-vra' : ('sa', 'Deva'), - 'nsk' : ('nsk', 'Cans'), - 'udm' : ('udm', 'Cyrl'), - 'nog' : ('nog', 'Cyrl'), - 'can' : ('cans', 'Cans'), - 'ain' : ('ain', 'Kana'), - 'nep' : ('ne', 'Deva'), - 'dty' : ('ne', 'Deva'), - 'bbl' : ('geor', 'Geor'), - 'geo' : ('geor', 'Geor'), - 'lzz' : ('geor', 'Geor'), - 'xmf' : ('geor', 'Geor'), - 'oge' : ('geor', 'Geor'), - 'geo' : ('geor', 'Geor'), - 'udi' : ('geor', 'Geor'), - 'niv' : ('niv', 'Cyrl'), - 'xlc' : ('lyci', 'Lyci'), - 'xhd' : ('sarb', 'Sarb'), - 'xha' : ('sarb', 'Sarb'), - 'inm' : ('sarb', 'Sarb'), - 'xqt' : ('sarb', 'Sarb'), - 'xsa' : ('sarb', 'Sarb'), - 'chu' : ('cv', 'Cyrl'), - 'peo' : ('peo', 'Xpeo'), - 'chu-old-c' : ('cyrs-glag', 'Cyrs'), - 'chu-old-g' : ('cyrs-glag', 'Glag'), - 'orv' : ('cyrs-glag', 'Cyrs'), - 'zle-ono-c' : ('cyrs-glag', 'Cyrs'), - 'zle-ono-g' : ('cyrs-glag', 'Glag'), - 'dar' : ('dar', 'Cyrl'), - 'pan' : ('guru', 'Guru'), - 'alt' : ('altai', 'Cyrl'), - 'inh' : ('inh', 'Cyrl'), - 'ara' : ('ar', 'Arab'), - 'tyv' : ('tyv', 'Cyrl'), - 'gre' : ('el', 'Grek'), - 'tat' : ('tt', 'Cyrl'), - 'aho' : ('ahom', 'Ahom'), - 'khb' : ('talu', ''), - 'xld' : ('lydi', 'Lydi'), - 'dlg' : ('dlg', 'Cyrl'), - 'kir' : ('ky', 'Cyrl'), - 'asm' : ('as', 'as-Beng'), - 'div' : ('dv', 'Thaa'), - 'ber' : ('ber', 'Tfng'), - 'grc' : ('grc', 'polythonic'), - 'xmk' : ('grc', 'polythonic'), - 'cpg' : ('grc', 'polythonic'), - 'oos' : ('grc', 'polythonic'), - 'ine-pae' : ('grc', 'polythonic'), - 'xpg' : ('grc', 'polythonic'), - 'pnt' : ('grc', 'polythonic'), - 'grc-c' : ('cprt', 'Cprt'), - 'evn' : ('evn', 'Cyrl') -} - -def translite(text, lang): - lang_tup = lang_map[lang.lower()] - lua_str = 'res = require("wikt.translit.' + lang_tup[0] + '-translit").tr("' + \ - text + '", "' + lang_tup[0] + '", "' + lang_tup[1] + '")' - lua.execute(lua_str) - return lua.globals().res diff --git a/__init__.py b/__init__.py deleted file mode 100644 index 1412afd..0000000 --- a/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .Wiktra import * - -__all__ = ['translite'] diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..2f7efbe --- /dev/null +++ b/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-minimal \ No newline at end of file diff --git a/install-mac.sh b/install-mac.sh new file mode 100755 index 0000000..a80da0d --- /dev/null +++ b/install-mac.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# -*- coding: utf-8 -*- + +dir=${0%/*} +if [ "$dir" = "$0" ]; then + dir="." +fi +cd "$dir" + +# Check if brew is installed +if [ ! -x "$(which brew)" ]; then + echo "# Installing 'brew'..." + /bin/bash -c \ + "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" +fi + +# Install lua if needed +if [ ! -x "$(which lua)" ]; then + brew install lua +fi + +# Install luarocks if needed +if [ ! -x "$(which luarocks)" ]; then + brew install luarocks +fi + +# Install luajit if needed +if [ ! -x "$(which luajit)" ]; then + brew install luajit-openresty && \ + brew link --force luajit-openresty +fi + +# Install python3 if needed +if [ ! -x "$(which python3)" ]; then + brew install python +fi + +# Install lua-format if needed +if [ ! -x "$(which lua-format)" ]; then + luarocks install --server=https://luarocks.org/dev luaformatter +fi + +python3 -m pip install --user --upgrade -r requirements.txt +python3 -m pip install --user --upgrade . +echo "# Done!" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e707623 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +lupa +pywikiapi diff --git a/setup.py b/setup.py index 9590b40..7a40a44 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,86 @@ -from setuptools import setup +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import re + +from setuptools import find_packages, setup + +NAME = "wiktra" + + +def get_absolute_path(*args): + """Transform relative pathnames into absolute pathnames.""" + directory = os.path.dirname(os.path.abspath(__file__)) + return os.path.join(directory, *args) + + +with open(get_absolute_path("README.md")) as f: + long_description = f.read() + + +def get_version(*args): + verstrline = open(get_absolute_path(NAME, "__init__.py"), "rt").read() + VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]" + mo = re.search(VSRE, verstrline, re.M) + if mo: + return mo.group(1) + return "undefined" + + +def get_requirements(*args): + """Get requirements from pip requirement files.""" + requirements = set() + with open(get_absolute_path(*args)) as handle: + for line in handle: + # Strip comments. + line = re.sub(r"^#.*|\s#.*", "", line) + # Ignore empty lines + if line and not line.isspace(): + requirements.add(re.sub(r"\s+", "", line)) + return sorted(requirements) + setup( - name='Wiktra', - version='1.0', - description='Wiktionary Transiltration tool', - author='Khuyagbaatar Batsuren', - author_email='khuyagbaatar@gmail.com', - packages=['','wikt'], #same as name - install_requires=[], #external packages as dependencies + name=f"{NAME}", + version="2.0.0", + description="Transliteration tool using Wiktionary transliteration modules", + long_description=long_description, + long_description_content_type="text/markdown", + author="Khuyagbaatar Batsuren", + author_email="khuyagbaatar@gmail.com", + url=f"https://twardoch.github.io/{NAME}2/", + project_urls={"Source": f"https://github.com/twardoch/{NAME}2/"}, + license="GPLv2", + download_url=f"https://github.com/twardoch/{NAME}2", + python_requires=">=3.9", + install_requires=get_requirements("requirements.txt"), + packages=find_packages(), + include_package_data=True, + scripts=[], + keywords=[ + "dictionary", + "wiktionary", + "transliteration", + "transliterate", + "lua", + ], + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Environment :: Console", + "Natural Language :: English", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Text Processing", + "Topic :: Text Processing :: Linguistic", + ], + entry_points=f""" + [console_scripts] + {NAME}py={NAME}.__main__:main + {NAME}py_update={NAME}.update:main + """, ) diff --git a/wikt/mw-html.lua b/wikt/mw-html.lua deleted file mode 100755 index 9baf50c..0000000 --- a/wikt/mw-html.lua +++ /dev/null @@ -1,433 +0,0 @@ ---[[ - A module for building complex HTML from Lua using a - fluent interface. - - Originally written on the English Wikipedia by - Toohool and Mr. Stradivarius. - - Code released under the GPL v2+ as per: - https://en.wikipedia.org/w/index.php?diff=next&oldid=581399786 - https://en.wikipedia.org/w/index.php?diff=next&oldid=581403025 - - @license GNU GPL v2+ - @author Marius Hoch < hoo@online.de > -]] - -local HtmlBuilder = {} -local options - -local util = require 'libraryUtil' -local checkType = util.checkType -local checkTypeMulti = util.checkTypeMulti - -local metatable = {} -local methodtable = {} - -local selfClosingTags = { - area = true, - base = true, - br = true, - col = true, - command = true, - embed = true, - hr = true, - img = true, - input = true, - keygen = true, - link = true, - meta = true, - param = true, - source = true, - track = true, - wbr = true, -} - -local htmlencodeMap = { - ['>'] = '>', - ['<'] = '<', - ['&'] = '&', - ['"'] = '"', -} - -metatable.__index = methodtable - -metatable.__tostring = function( t ) - local ret = {} - t:_build( ret ) - return table.concat( ret ) -end - --- Get an attribute table (name, value) and its index --- --- @param name -local function getAttr( t, name ) - for i, attr in ipairs( t.attributes ) do - if attr.name == name then - return attr, i - end - end -end - --- Is this a valid attribute name? --- --- @param s -local function isValidAttributeName( s ) - -- Good estimate: http://www.w3.org/TR/2000/REC-xml-20001006#NT-Name - return s:match( '^[a-zA-Z_:][a-zA-Z0-9_.:-]*$' ) -end - --- Is this a valid tag name? --- --- @param s -local function isValidTag( s ) - return s:match( '^[a-zA-Z0-9]+$' ) -end - --- Escape a value, for use in HTML --- --- @param s -local function htmlEncode( s ) - -- The parentheses ensure that there is only one return value - local tmp = string.gsub( s, '[<>&"]', htmlencodeMap ); - -- Don't encode strip markers here (T110143) - tmp = string.gsub( tmp, options.encodedUniqPrefixPat, options.uniqPrefixRepl ) - tmp = string.gsub( tmp, options.encodedUniqSuffixPat, options.uniqSuffixRepl ) - return tmp -end - -local function cssEncode( s ) - -- mw.ustring is so slow that it's worth searching the whole string - -- for non-ASCII characters to avoid it if possible - return ( string.find( s, '[^%z\1-\127]' ) and mw.ustring or string ) - -- XXX: I'm not sure this character set is complete. - -- bug #68011: allow delete character (\127) - .gsub( s, '[^\32-\57\60-\127]', function ( m ) - return string.format( '\\%X ', mw.ustring.codepoint( m ) ) - end ) -end - --- Create a builder object. This is a separate function so that we can show the --- correct error levels in both HtmlBuilder.create and metatable.tag. --- --- @param tagName --- @param args -local function createBuilder( tagName, args ) - if tagName ~= nil and tagName ~= '' and not isValidTag( tagName ) then - error( string.format( "invalid tag name '%s'", tagName ), 3 ) - end - - args = args or {} - local builder = {} - setmetatable( builder, metatable ) - builder.nodes = {} - builder.attributes = {} - builder.styles = {} - - if tagName ~= '' then - builder.tagName = tagName - end - - builder.parent = args.parent - builder.selfClosing = selfClosingTags[tagName] or args.selfClosing or false - return builder -end - --- Append a builder to the current node. This is separate from methodtable.node --- so that we can show the correct error level in both methodtable.node and --- methodtable.wikitext. --- --- @param builder -local function appendBuilder( t, builder ) - if t.selfClosing then - error( "self-closing tags can't have child nodes", 3 ) - end - - if builder then - table.insert( t.nodes, builder ) - end - return t -end - -methodtable._build = function( t, ret ) - if t.tagName then - table.insert( ret, '<' .. t.tagName ) - for i, attr in ipairs( t.attributes ) do - table.insert( - ret, - -- Note: Attribute names have already been validated - ' ' .. attr.name .. '="' .. htmlEncode( attr.val ) .. '"' - ) - end - if #t.styles > 0 then - table.insert( ret, ' style="' ) - local css = {} - for i, prop in ipairs( t.styles ) do - if type( prop ) ~= 'table' then -- added with cssText() - table.insert( css, htmlEncode( prop ) ) - else -- added with css() - table.insert( - css, - htmlEncode( cssEncode( prop.name ) .. ':' .. cssEncode( prop.val ) ) - ) - end - end - table.insert( ret, table.concat( css, ';' ) ) - table.insert( ret, '"' ) - end - if t.selfClosing then - table.insert( ret, ' />' ) - return - end - table.insert( ret, '>' ) - end - for i, node in ipairs( t.nodes ) do - if node then - if type( node ) == 'table' then - node:_build( ret ) - else - table.insert( ret, tostring( node ) ) - end - end - end - if t.tagName then - table.insert( ret, '' ) - end -end - --- Append a builder to the current node --- --- @param builder -methodtable.node = function( t, builder ) - return appendBuilder( t, builder ) -end - --- Appends some markup to the node. This will be treated as wikitext. -methodtable.wikitext = function( t, ... ) - for k,v in ipairs{...} do - checkTypeMulti( 'wikitext', k, v, { 'string', 'number' } ) - appendBuilder( t, v ) - end - return t -end - --- Appends a newline character to the node. -methodtable.newline = function( t ) - return t:wikitext( '\n' ) -end - --- Appends a new child node to the builder, and returns an HtmlBuilder instance --- representing that new node. --- --- @param tagName --- @param args -methodtable.tag = function( t, tagName, args ) - checkType( 'tag', 1, tagName, 'string' ) - checkType( 'tag', 2, args, 'table', true ) - args = args or {} - - args.parent = t - local builder = createBuilder( tagName, args ) - t:node( builder ) - return builder -end - --- Get the value of an html attribute --- --- @param name -methodtable.getAttr = function( t, name ) - checkType( 'getAttr', 1, name, 'string' ) - - local attr = getAttr( t, name ) - return attr and attr.val -end - --- Set an HTML attribute on the node. --- --- @param name Attribute to set, alternative table of name-value pairs --- @param val Value of the attribute. Nil causes the attribute to be unset -methodtable.attr = function( t, name, val ) - if type( name ) == 'table' then - if val ~= nil then - error( - "bad argument #2 to 'attr' " .. - '(if argument #1 is a table, argument #2 must be left empty)', - 2 - ) - end - - local callForTable = function() - for attrName, attrValue in pairs( name ) do - t:attr( attrName, attrValue ) - end - end - - if not pcall( callForTable ) then - error( - "bad argument #1 to 'attr' " .. - '(table keys must be strings, and values must be strings or numbers)', - 2 - ) - end - - return t - end - - checkType( 'attr', 1, name, 'string' ) - checkTypeMulti( 'attr', 2, val, { 'string', 'number', 'nil' } ) - - -- if caller sets the style attribute explicitly, then replace all styles - -- previously added with css() and cssText() - if name == 'style' then - t.styles = { val } - return t - end - - if not isValidAttributeName( name ) then - error( string.format( - "bad argument #1 to 'attr' (invalid attribute name '%s')", - name - ), 2 ) - end - - local attr, i = getAttr( t, name ) - if attr then - if val ~= nil then - attr.val = val - else - table.remove( t.attributes, i ) - end - elseif val ~= nil then - table.insert( t.attributes, { name = name, val = val } ) - end - - return t -end - --- Adds a class name to the node's class attribute. Spaces will be --- automatically added to delimit each added class name. --- --- @param class -methodtable.addClass = function( t, class ) - checkTypeMulti( 'addClass', 1, class, { 'string', 'number', 'nil' } ) - - if class ~= nil then - local attr = getAttr( t, 'class' ) - if attr then - attr.val = attr.val .. ' ' .. class - else - t:attr( 'class', class ) - end - end - return t -end - --- Set a CSS property to be added to the node's style attribute. --- --- @param name CSS attribute to set, alternative table of name-value pairs --- @param val The value to set. Nil causes it to be unset -methodtable.css = function( t, name, val ) - if type( name ) == 'table' then - if val ~= nil then - error( - "bad argument #2 to 'css' " .. - '(if argument #1 is a table, argument #2 must be left empty)', - 2 - ) - end - - local callForTable = function() - for attrName, attrValue in pairs( name ) do - t:css( attrName, attrValue ) - end - end - - if not pcall( callForTable ) then - error( - "bad argument #1 to 'css' " .. - '(table keys and values must be strings or numbers)', - 2 - ) - end - - return t - end - - checkTypeMulti( 'css', 1, name, { 'string', 'number' } ) - checkTypeMulti( 'css', 2, val, { 'string', 'number', 'nil' } ) - - for i, prop in ipairs( t.styles ) do - if prop.name == name then - if val ~= nil then - prop.val = val - else - table.remove( t.styles, i ) - end - return t - end - end - - if val ~= nil then - table.insert( t.styles, { name = name, val = val } ) - end - - return t -end - --- Add some raw CSS to the node's style attribute. This is typically used --- when a template allows some CSS to be passed in as a parameter --- --- @param css -methodtable.cssText = function( t, css ) - checkTypeMulti( 'cssText', 1, css, { 'string', 'number', 'nil' } ) - table.insert( t.styles, css ) - return t -end - --- Returns the parent node under which the current node was created. Like --- jQuery.end, this is a convenience function to allow the construction of --- several child nodes to be chained together into a single statement. -methodtable.done = function( t ) - return t.parent or t -end - --- Like .done(), but traverses all the way to the root node of the tree and --- returns it. -methodtable.allDone = function( t ) - while t.parent do - t = t.parent - end - return t -end - --- Create a new instance --- --- @param tagName --- @param args -function HtmlBuilder.create( tagName, args ) - checkType( 'mw.html.create', 1, tagName, 'string', true ) - checkType( 'mw.html.create', 2, args, 'table', true ) - return createBuilder( tagName, args ) -end - -function HtmlBuilder.setupInterface( opts ) - -- Boilerplate - HtmlBuilder.setupInterface = nil - mw_interface = nil - options = opts - - -- Prepare patterns for unencoding strip markers - options.encodedUniqPrefixPat = string.gsub( options.uniqPrefix, '[<>&"]', htmlencodeMap ); - options.encodedUniqPrefixPat = string.gsub( options.encodedUniqPrefixPat, '%p', '%%%0' ); - options.uniqPrefixRepl = string.gsub( options.uniqPrefix, '%%', '%%%0' ); - options.encodedUniqSuffixPat = string.gsub( options.uniqSuffix, '[<>&"]', htmlencodeMap ); - options.encodedUniqSuffixPat = string.gsub( options.encodedUniqSuffixPat, '%p', '%%%0' ); - options.uniqSuffixRepl = string.gsub( options.uniqSuffix, '%%', '%%%0' ); - - -- Register this library in the "mw" global - mw = mw or {} - mw.html = HtmlBuilder - - package.loaded['mw.html'] = HtmlBuilder -end - -return HtmlBuilder diff --git a/wikt/mw-text.lua b/wikt/mw-text.lua deleted file mode 100755 index 859b5d5..0000000 --- a/wikt/mw-text.lua +++ /dev/null @@ -1,330 +0,0 @@ -local mwtext = {} -local php -local options - -local util = require 'wikt.libraryUtil' -local checkType = util.checkType -local checkTypeForNamedArg = util.checkTypeForNamedArg - -function mwtext.setupInterface( opts ) - -- Boilerplate - mwtext.setupInterface = nil - php = mw_interface - mw_interface = nil - options = opts - - -- Register this library in the "mw" global - mw = mw or {} - mw.text = mwtext - - package.loaded['mw.text'] = mwtext -end - -function mwtext.trim( s, charset ) - charset = charset or '\t\r\n\f ' - s = mw.ustring.gsub( s, '^[' .. charset .. ']*(.-)[' .. charset .. ']*$', '%1' ) - return s -end - -local htmlencode_map = { - ['>'] = '>', - ['<'] = '<', - ['&'] = '&', - ['"'] = '"', - ["'"] = ''', - ['\194\160'] = ' ', -} -local htmldecode_map = {} -for k, v in pairs( htmlencode_map ) do - htmldecode_map[v] = k -end -local decode_named_entities = nil - -function mwtext.encode( s, charset ) - charset = charset or '<>&"\'\194\160' - s = mw.ustring.gsub( s, '[' .. charset .. ']', function ( m ) - if not htmlencode_map[m] then - local e = string.format( '&#%d;', mw.ustring.codepoint( m ) ) - htmlencode_map[m] = e - htmldecode_map[e] = m - end - return htmlencode_map[m] - end ) - return s -end - -function mwtext.decode( s, decodeNamedEntities ) - local dec - if decodeNamedEntities then - if decode_named_entities == nil then - decode_named_entities = php.getEntityTable() - setmetatable( decode_named_entities, { __index = htmldecode_map } ) - end - dec = decode_named_entities - else - dec = htmldecode_map - end - -- string.gsub is safe here, because only ASCII chars are in the pattern - s = string.gsub( s, '(&(#?x?)([a-zA-Z0-9]+);)', function ( m, flg, name ) - if not dec[m] then - local n = nil - if flg == '#' then - n = tonumber( name, 10 ) - elseif flg == '#x' then - n = tonumber( name, 16 ) - end - if n and n <= 0x10ffff then - n = mw.ustring.char( n ) - if n then - htmldecode_map[m] = n - htmlencode_map[n] = m - end - end - end - return dec[m] - end ) - return s -end - -local nowikiRepl1 = { - ['"'] = '"', - ['&'] = '&', - ["'"] = ''', - ['<'] = '<', - ['='] = '=', - ['>'] = '>', - ['['] = '[', - [']'] = ']', - ['{'] = '{', - ['|'] = '|', - ['}'] = '}', -} - -local nowikiRepl2 = { - ["\n#"] = "\n#", ["\r#"] = "\r#", - ["\n*"] = "\n*", ["\r*"] = "\r*", - ["\n:"] = "\n:", ["\r:"] = "\r:", - ["\n;"] = "\n;", ["\r;"] = "\r;", - ["\n "] = "\n ", ["\r "] = "\r ", - ["\n\n"] = "\n ", ["\r\n"] = " \n", - ["\n\r"] = "\n ", ["\r\r"] = "\r ", - ["\n\t"] = "\n ", ["\r\t"] = "\r ", -} - -local nowikiReplMagic = {} -for sp, esc in pairs( { - [' '] = ' ', - ['\t'] = ' ', - ['\r'] = ' ', - ['\n'] = ' ', - ['\f'] = ' ', -} ) do - nowikiReplMagic['ISBN' .. sp] = 'ISBN' .. esc - nowikiReplMagic['RFC' .. sp] = 'RFC' .. esc - nowikiReplMagic['PMID' .. sp] = 'PMID' .. esc -end - -function mwtext.nowiki( s ) - -- string.gsub is safe here, because we're only caring about ASCII chars - s = string.gsub( s, '["&\'<=>%[%]{|}]', nowikiRepl1 ) - s = '\n' .. s - s = string.gsub( s, '[\r\n][#*:; \n\r\t]', nowikiRepl2 ) - s = string.gsub( s, '([\r\n])%-%-%-%-', '%1----' ) - s = string.sub( s, 2 ) - s = string.gsub( s, '__', '__' ) - s = string.gsub( s, '://', '://' ) - s = string.gsub( s, 'ISBN%s', nowikiReplMagic ) - s = string.gsub( s, 'RFC%s', nowikiReplMagic ) - s = string.gsub( s, 'PMID%s', nowikiReplMagic ) - for k, v in pairs( options.nowiki_protocols ) do - s = string.gsub( s, k, v ) - end - - return s -end - -function mwtext.tag( name, attrs, content ) - local named = false - if type( name ) == 'table' then - named = true - name, attrs, content = name.name, name.attrs, name.content - checkTypeForNamedArg( 'tag', 'name', name, 'string' ) - checkTypeForNamedArg( 'tag', 'attrs', attrs, 'table', true ) - else - checkType( 'tag', 1, name, 'string' ) - checkType( 'tag', 2, attrs, 'table', true ) - end - - local ret = { '<' .. name } - for k, v in pairs( attrs or {} ) do - if type( k ) ~= 'string' then - error( "bad named argument attrs to 'tag' (keys must be strings, found " .. type( k ) .. ")", 2 ) - end - if string.match( k, '[\t\r\n\f /<>"\'=]' ) then - error( "bad named argument attrs to 'tag' (invalid key '" .. k .. "')", 2 ) - end - local tp = type( v ) - if tp == 'boolean' then - if v then - ret[#ret+1] = ' ' .. k - end - elseif tp == 'string' or tp == 'number' then - ret[#ret+1] = string.format( ' %s="%s"', k, mwtext.encode( tostring( v ) ) ) - else - error( "bad named argument attrs to 'tag' (value for key '" .. k .. "' may not be " .. tp .. ")", 2 ) - end - end - - local tp = type( content ) - if content == nil then - ret[#ret+1] = '>' - elseif content == false then - ret[#ret+1] = ' />' - elseif tp == 'string' or tp == 'number' then - ret[#ret+1] = '>' - ret[#ret+1] = content - ret[#ret+1] = '' - else - if named then - checkTypeForNamedArg( 'tag', 'content', content, 'string, number, nil, or false' ) - else - checkType( 'tag', 3, content, 'string, number, nil, or false' ) - end - end - - return table.concat( ret ) -end - -function mwtext.unstrip( s ) - return php.unstrip( s ) -end - -function mwtext.unstripNoWiki( s ) - return php.unstripNoWiki( s ) -end - -function mwtext.killMarkers( s ) - return php.killMarkers( s ) -end - -function mwtext.split( text, pattern, plain ) - local ret = {} - for m in mwtext.gsplit( text, pattern, plain ) do - ret[#ret+1] = m - end - return ret -end - -function mwtext.gsplit( text, pattern, plain ) - local s, l = 1, mw.ustring.len( text ) - return function () - if s then - local e, n = mw.ustring.find( text, pattern, s, plain ) - local ret - if not e then - ret = mw.ustring.sub( text, s ) - s = nil - elseif n < e then - -- Empty separator! - ret = mw.ustring.sub( text, s, e ) - if e < l then - s = e + 1 - else - s = nil - end - else - ret = e > s and mw.ustring.sub( text, s, e - 1 ) or '' - s = n + 1 - end - return ret - end - end, nil, nil -end - -function mwtext.listToText( list, separator, conjunction ) - separator = separator or options.comma - conjunction = conjunction or options['and'] - local n = #list - - local ret - if n > 1 then - ret = table.concat( list, separator, 1, n - 1 ) .. conjunction .. list[n] - else - ret = tostring( list[1] or '' ) - end - - return ret -end - -function mwtext.truncate( text, length, ellipsis, adjustLength ) - local l = mw.ustring.len( text ) - if l <= math.abs( length ) then - return text - end - - ellipsis = ellipsis or options.ellipsis - local elen = 0 - if adjustLength then - elen = mw.ustring.len( ellipsis ) - end - - local ret - if math.abs( length ) <= elen then - ret = ellipsis - elseif length > 0 then - ret = mw.ustring.sub( text, 1, length - elen ) .. ellipsis - else - ret = ellipsis .. mw.ustring.sub( text, length + elen ) - end - - if mw.ustring.len( ret ) < l then - return ret - else - return text - end -end - --- Check for stuff that can't even be passed to PHP properly and other stuff --- that gives different error messages in different versions of PHP -local function checkForJsonEncode( t, seen, lvl ) - local tp = type( t ) - if tp == 'table' then - if seen[t] then - error( "mw.text.jsonEncode: Cannot use recursive tables", lvl ) - end - seen[t] = 1 - for k, v in pairs( t ) do - if type( k ) == 'number' then - if k >= math.huge or k <= -math.huge then - error( string.format( "mw.text.jsonEncode: Cannot use 'inf' as a table key", type( k ) ), lvl ) - end - elseif type( k ) ~= 'string' then - error( string.format( "mw.text.jsonEncode: Cannot use type '%s' as a table key", type( k ) ), lvl ) - end - checkForJsonEncode( v, seen, lvl + 1 ) - end - seen[t] = nil - elseif tp == 'number' then - if t ~= t or t >= math.huge or t <= -math.huge then - error( "mw.text.jsonEncode: Cannot encode non-finite numbers", lvl ) - end - elseif tp ~= 'boolean' and tp ~= 'string' and tp ~= 'nil' then - error( string.format( "mw.text.jsonEncode: Cannot encode type '%s'", tp ), lvl ) - end -end - -function mwtext.jsonEncode( value, flags ) - checkForJsonEncode( value, {}, 3 ) - return php.jsonEncode( value, flags ) -end - -function mwtext.jsonDecode( json, flags ) - return php.jsonDecode( json, flags ) -end - --- Matches PHP Scribunto_LuaTextLibrary constants -mwtext.JSON_PRESERVE_KEYS = 1 -mwtext.JSON_TRY_FIXING = 2 -mwtext.JSON_PRETTY = 4 - -return mwtext diff --git a/wikt/mw-title.lua b/wikt/mw-title.lua deleted file mode 100755 index 5cd72ab..0000000 --- a/wikt/mw-title.lua +++ /dev/null @@ -1,337 +0,0 @@ -local title = {} -local php -local NS_MEDIA = -2 - -local util = require 'libraryUtil' -local checkType = util.checkType -local checkTypeForIndex = util.checkTypeForIndex - -local function checkNamespace( name, argIdx, arg ) - if type( arg ) == 'string' and tostring( tonumber( arg ) ) == arg then - arg = tonumber( arg ) - end - if type( arg ) == 'number' then - arg = math.floor( arg + 0.5 ) - if not mw.site.namespaces[arg] then - local msg = string.format( "bad argument #%d to '%s' (unrecognized namespace number '%s')", - argIdx, name, arg - ) - error( msg, 3 ) - end - elseif type( arg ) == 'string' then - local ns = mw.site.namespaces[arg] - if not ns then - local msg = string.format( "bad argument #%d to '%s' (unrecognized namespace name '%s')", - argIdx, name, arg - ) - error( msg, 3 ) - end - arg = ns.id - else - local msg = string.format( "bad argument #%d to '%s' (string or number expected, got %s)", - argIdx, name, type( arg ) - ) - error( msg, 3 ) - end - return arg -end - - -local function lt( a, b ) - if a.interwiki ~= b.interwiki then - return a.interwiki < b.interwiki - end - if a.namespace ~= b.namespace then - return a.namespace < b.namespace - end - return a.text < b.text -end - -local function makeTitleObject( data ) - if not data then - return nil - end - - local obj = {} - local checkSelf = util.makeCheckSelfFunction( 'mw.title', 'title', obj, 'title object' ); - local ns = mw.site.namespaces[data.namespace] - - data.isContentPage = ns.isContent - data.isExternal = data.interwiki ~= '' - data.isSpecialPage = data.namespace == mw.site.namespaces.Special.id - data.isTalkPage = ns.isTalk - data.subjectNsText = ns.subject.name - data.canTalk = ns.talk ~= nil - - data.prefixedText = data.text - if data.nsText ~= '' then - data.prefixedText = string.gsub( data.nsText .. ':' .. data.prefixedText, '_', ' ' ) - end - if data.interwiki ~= '' then - data.prefixedText = data.interwiki .. ':' .. data.prefixedText - end - - local firstSlash, lastSlash - if ns.hasSubpages then - firstSlash, lastSlash = string.match( data.text, '^[^/]*().*()/[^/]*$' ) - end - if firstSlash then - data.isSubpage = true - data.rootText = string.sub( data.text, 1, firstSlash - 1 ) - data.baseText = string.sub( data.text, 1, lastSlash - 1 ) - data.subpageText = string.sub( data.text, lastSlash + 1 ) - else - data.isSubpage = false - data.rootText = data.text - data.baseText = data.text - data.subpageText = data.text - end - - function data:inNamespace( ns ) - checkSelf( self, 'inNamespace' ) - ns = checkNamespace( 'inNamespace', 1, ns ) - return ns == self.namespace - end - - function data:inNamespaces( ... ) - checkSelf( self, 'inNamespaces' ) - for i = 1, select( '#', ... ) do - local ns = checkNamespace( 'inNamespaces', i, select( i, ... ) ) - if ns == self.namespace then - return true - end - end - return false - end - - function data:hasSubjectNamespace( ns ) - checkSelf( self, 'hasSubjectNamespace' ) - ns = checkNamespace( 'hasSubjectNamespace', 1, ns ) - return ns == mw.site.namespaces[self.namespace].subject.id - end - - function data:isSubpageOf( title ) - checkSelf( self, 'isSubpageOf' ) - checkType( 'isSubpageOf', 1, title, 'table' ) - return self.interwiki == title.interwiki and - self.namespace == title.namespace and - title.text .. '/' == string.sub( self.text, 1, #title.text + 1 ) - end - - function data:subPageTitle( text ) - checkSelf( self, 'subpageTitle' ) - checkType( 'subpageTitle', 1, text, 'string' ) - return title.makeTitle( data.namespace, data.text .. '/' .. text ) - end - - function data:partialUrl() - checkSelf( self, 'partialUrl' ) - return data.thePartialUrl - end - - function data:fullUrl( query, proto ) - checkSelf( self, 'fullUrl' ) - return php.getUrl( self.fullText, 'fullUrl', query, proto ) - end - - function data:localUrl( query ) - checkSelf( self, 'localUrl' ) - return php.getUrl( self.fullText, 'localUrl', query ) - end - - function data:canonicalUrl( query ) - checkSelf( self, 'canonicalUrl' ) - return php.getUrl( self.fullText, 'canonicalUrl', query ) - end - - function data:getContent() - checkSelf( self, 'getContent' ) - local content = php.getContent( self.fullText ) - data.getContent = function ( self ) - checkSelf( self, 'getContent' ) - return content - end - return content - end - - -- Known fields, both those defined above and any dynamically handled in - -- __index. Truthy values represent read-only, and falsey values represent - -- read-write. If the value is the string 'e', expensive data will be loaded - -- if the field is read. - local readOnlyFields = { - fragment = false, - fullText = true, - rootPageTitle = true, - basePageTitle = true, - talkPageTitle = true, - subjectPageTitle = true, - fileExists = true, - file = true, - protectionLevels = true, - cascadingProtection = true, - exists = 'e', - isRedirect = 'e', - contentModel = 'e', - id = 'e', - redirectTarget = true, - } - for k in pairs( data ) do - readOnlyFields[k] = true - end - - local function pairsfunc( t, k ) - local v - repeat - k = next( readOnlyFields, k ) - if k == nil then - return nil - end - v = t[k] - until v ~= nil - return k, v - end - - return setmetatable( obj, { - __eq = title.equals, - __lt = lt, - __pairs = function ( t ) - return pairsfunc, t, nil - end, - __index = function ( t, k ) - if k == 'exists' and data.namespace == NS_MEDIA then - k = 'fileExists' - end - - if readOnlyFields[k] == 'e' and data[k] == nil then - for k,v in pairs( php.getExpensiveData( t.fullText ) ) do - data[k] = v - end - end - - if k == 'fullText' then - if data.fragment ~= '' then - return data.prefixedText .. '#' .. data.fragment - else - return data.prefixedText - end - end - - if k == 'rootPageTitle' then - return title.makeTitle( data.namespace, data.rootText ) - end - if k == 'basePageTitle' then - return title.makeTitle( data.namespace, data.baseText ) - end - if k == 'talkPageTitle' then - local ns = mw.site.namespaces[data.namespace].talk - if not ns then - return nil - end - if ns.id == data.namespace then - return obj - end - return title.makeTitle( ns.id, data.text ) - end - if k == 'subjectPageTitle' then - local ns = mw.site.namespaces[data.namespace].subject - if ns.id == data.namespace then - return obj - end - return title.makeTitle( ns.id, data.text ) - end - if k == 'file' then - if data.file == nil then - data.file = php.getFileInfo( data.prefixedText ) - end - return data.file or nil - end - if k == 'fileExists' then -- Kept for backward compatibility. Since 1.25, file.exists is preferred over this - return t.file and t.file.exists - end - if k == 'protectionLevels' then - if data.protectionLevels == nil then - data.protectionLevels = php.protectionLevels( data.prefixedText ) - end - return data.protectionLevels - end - if k == 'cascadingProtection' then - if data.cascadingProtection == nil then - data.cascadingProtection = php.cascadingProtection( data.prefixedText ) - end - return data.cascadingProtection - end - if k == 'redirectTarget' then - if data.redirectTarget == nil then - data.redirectTarget = makeTitleObject( php.redirectTarget( data.prefixedText ) ) or false - end - return data.redirectTarget - end - - return data[k] - end, - __newindex = function ( t, k, v ) - if k == 'fragment' then - checkTypeForIndex( k, v, 'string' ) - v = string.gsub( v, '[%s_]+', ' ' ) - v = string.gsub( v, '^(.-) ?$', '%1' ) - data[k] = v - elseif readOnlyFields[k] then - error( "index '" .. k .. "' is read only", 2 ) - else - readOnlyFields[k] = v and false -- assigns nil if v == nil, false otherwise - rawset( t, k, v ) - end - end, - __tostring = function ( t ) - return t.prefixedText - end - } ) -end - -function title.setupInterface( options ) - -- Boilerplate - title.setupInterface = nil - php = mw_interface - mw_interface = nil - NS_MEDIA = options.NS_MEDIA - - -- Set current title - title.getCurrentTitle = function () - return makeTitleObject( mw.clone( options.thisTitle ) ) - end - - -- Register this library in the "mw" global - mw = mw or {} - mw.title = title - - package.loaded['mw.title'] = title -end - -function title.new( text_or_id, defaultNamespace ) - return makeTitleObject( php.newTitle( text_or_id, defaultNamespace ) ) -end - -function title.makeTitle( ns, title, fragment, interwiki ) - return makeTitleObject( php.makeTitle( ns, title, fragment, interwiki ) ) -end - -function title.equals( a, b ) - return a.interwiki == b.interwiki and - a.namespace == b.namespace and - a.text == b.text -end - -function title.compare( a, b ) - if a.interwiki ~= b.interwiki then - return a.interwiki < b.interwiki and -1 or 1 - end - if a.namespace ~= b.namespace then - return a.namespace < b.namespace and -1 or 1 - end - if a.text ~= b.text then - return a.text < b.text and -1 or 1 - end - return 0 -end - -return title diff --git a/wikt/mw.hash.lua b/wikt/mw.hash.lua deleted file mode 100755 index 95026e3..0000000 --- a/wikt/mw.hash.lua +++ /dev/null @@ -1,30 +0,0 @@ -local hash = {} -local php - -local util = require 'libraryUtil' -local checkType = util.checkType - -function hash.listAlgorithms() - return php.listAlgorithms() -end - -function hash.hashValue( algo, value ) - checkType( 'hashValue', 1, algo, 'string' ) - checkType( 'hashValue', 2, value, 'string' ) - - return php.hashValue( algo, value ) -end - -function hash.setupInterface() - -- Boilerplate - php = mw_interface - mw_interface = nil - - -- Register this library in the "mw" global - mw = mw or {} - mw.hash = hash - - package.loaded['mw.hash'] = hash -end - -return hash diff --git a/wikt/mw.language.lua b/wikt/mw.language.lua deleted file mode 100755 index edd7110..0000000 --- a/wikt/mw.language.lua +++ /dev/null @@ -1,189 +0,0 @@ -local language = {} -local php -local util = require 'libraryUtil' - -function language.setupInterface() - -- Boilerplate - language.setupInterface = nil - php = mw_interface - mw_interface = nil - - -- Register this library in the "mw" global - mw = mw or {} - mw.language = language - mw.getContentLanguage = language.getContentLanguage - mw.getLanguage = mw.language.new - - local lang = mw.getContentLanguage(); - - -- Extend ustring - if mw.ustring then - mw.ustring.upper = function ( s ) - return lang:uc( s ) - end - mw.ustring.lower = function ( s ) - return lang:lc( s ) - end - string.uupper = mw.ustring.upper - string.ulower = mw.ustring.lower - end - - package.loaded['mw.language'] = language -end - -function language.isSupportedLanguage( code ) - return php.isSupportedLanguage( code ) -end - -function language.isKnownLanguageTag( code ) - return php.isKnownLanguageTag( code ) -end - -function language.isValidCode( code ) - return php.isValidCode( code ) -end - -function language.isValidBuiltInCode( code ) - return php.isValidBuiltInCode( code ) -end - -function language.fetchLanguageName( code, inLanguage ) - return php.fetchLanguageName( code, inLanguage ) -end - -function language.fetchLanguageNames( inLanguage, include ) - return php.fetchLanguageNames( inLanguage, include ) -end - -function language.getFallbacksFor( code ) - return php.getFallbacksFor( code ) -end - -function language.new( code ) - if code == nil then - error( "too few arguments to mw.language.new()", 2 ) - end - - local lang = { code = code } - - local checkSelf = util.makeCheckSelfFunction( 'mw.language', 'lang', lang, 'language object' ) - - local wrappers = { - lcfirst = 1, - ucfirst = 1, - lc = 1, - uc = 1, - caseFold = 1, - formatNum = 1, - formatDate = 1, - formatDuration = 1, - getDurationIntervals = 1, - convertPlural = 2, - convertGrammar = 2, - gender = 2, - } - - for name, numArgs in pairs( wrappers ) do - lang[name] = function ( self, ... ) - checkSelf( self, name ) - if select( '#', ... ) < numArgs then - error( "too few arguments to mw.language:" .. name, 2 ) - end - return php[name]( self.code, ... ) - end - end - - -- This one could use caching - function lang:isRTL() - checkSelf( self, 'isRTL' ) - local rtl = php.isRTL( self.code ) - self.isRTL = function () - return rtl - end - return rtl - end - - -- Fix semantics - function lang:parseFormattedNumber( ... ) - checkSelf( self, 'parseFormattedNumber' ) - if select( '#', ... ) < 1 then - error( "too few arguments to mw.language:parseFormattedNumber", 2 ) - end - return tonumber( php.parseFormattedNumber( self.code, ... ) ) - end - - -- Alias - lang.plural = lang.convertPlural - - -- Parser function compat - function lang:grammar( case, word ) - checkSelf( self, name ) - return self:convertGrammar( word, case ) - end - - -- Other functions - function lang:getCode() - checkSelf( self, 'getCode' ) - return self.code - end - - function lang:getDir() - checkSelf( self, 'getDir' ) - return self:isRTL() and 'rtl' or 'ltr' - end - - function lang:getDirMark( opposite ) - checkSelf( self, 'getDirMark' ) - local b = self:isRTL() - if opposite then - b = not b - end - return b and '\226\128\143' or '\226\128\142' - end - - function lang:getDirMarkEntity( opposite ) - checkSelf( self, 'getDirMarkEntity' ) - local b = self:isRTL() - if opposite then - b = not b - end - return b and '‏' or '‎' - end - - function lang:getArrow( direction ) - checkSelf( self, 'getArrow' ) - direction = direction or 'forwards' - util.checkType( 'getArrow', 1, direction, 'string' ) - if direction == 'forwards' then - return self:isRTL() and '←' or '→' - elseif direction == 'backwards' then - return self:isRTL() and '→' or '←' - elseif direction == 'left' then - return '←' - elseif direction == 'right' then - return '→' - elseif direction == 'up' then - return '↑' - elseif direction == 'down' then - return '↓' - end - end - - function lang:getFallbackLanguages() - checkSelf( self, 'getFallbackLanguages' ) - return language.getFallbacksFor( self.code ) - end - - return lang -end - -local contLangCode - -function language.getContentLanguage() - if contLangCode == nil then - contLangCode = php.getContLangCode() - end - return language.new( contLangCode ) -end - -return language diff --git a/wikt/mw.lua b/wikt/mw.lua deleted file mode 100755 index 3110278..0000000 --- a/wikt/mw.lua +++ /dev/null @@ -1,784 +0,0 @@ -mw = mw or {} - -local packageCache -local packageModuleFunc -local php -local allowEnvFuncs = false -local logBuffer = '' -local currentFrame -local loadedData = {} -local executeFunctionDepth = 0 - ---- Put an isolation-friendly package module into the specified environment --- table. The package module will have an empty cache, because caching of --- module functions from other cloned environments would break module isolation. --- --- @param env The cloned environment -local function makePackageModule( env ) - -- Remove loaders from env, we don't want it inheriting our loadPackage. - if env.package then - env.package.loaders = nil - end - - -- Create the package globals in the given environment - setfenv( packageModuleFunc, env )() - - -- Make a loader function - local function loadPackage( modName ) - local init - if packageCache[modName] == 'missing' then - return nil - elseif packageCache[modName] == nil then - local lib = php.loadPHPLibrary( modName ) - if lib ~= nil then - init = function () - return mw.clone( lib ) - end - else - init = php.loadPackage( modName ) - if init == nil then - packageCache[modName] = 'missing' - return nil - end - end - packageCache[modName] = init - else - init = packageCache[modName] - end - - setfenv( init, env ) - return init - end - - table.insert( env.package.loaders, loadPackage ) -end - ---- Set up the base environment. The PHP host calls this function after any --- necessary host-side initialisation has been done. -function mw.setupInterface( options ) - -- Don't allow any more calls - mw.setupInterface = nil - - -- Don't allow getmetatable() on a non-table, since if you can get the metatable, - -- you can set values in it, breaking isolation - local old_getmetatable = getmetatable - function getmetatable(obj) - if type(obj) == 'table' then - return old_getmetatable(obj) - else - return nil - end - end - - if options.allowEnvFuncs then - allowEnvFuncs = true - end - - -- Store the interface table - -- - -- mw_interface.loadPackage() returns function values with their environment - -- set to the base environment, which would violate module isolation if they - -- were run from a cloned environment. We can only allow access to - -- mw_interface.loadPackage via our environment-setting wrapper. - -- - php = mw_interface - mw_interface = nil - - packageModuleFunc = php.loadPackage( 'package' ) - makePackageModule( _G ) - package.loaded.mw = mw - packageCache = {} -end - ---- Create a table like the one os.date() returns, but with a metatable that sets TTLs as the values are looked at. -local function wrapDateTable( now ) - return setmetatable( {}, { - __index = function( t, k ) - if k == 'sec' then - php.setTTL( 1 ) - elseif k == 'min' then - php.setTTL( 60 - now.sec ) - elseif k == 'hour' then - php.setTTL( 3600 - now.min * 60 - now.sec ) - elseif now[k] ~= nil then - php.setTTL( 86400 - now.hour * 3600 - now.min * 60 - now.sec ) - end - t[k] = now[k] - return now[k] - end - } ) -end - ---- Wrappers for os.date() and os.time() that set the TTL of the output, if necessary -local function ttlDate( format, time ) - if time == nil and ( format == nil or type( format ) == 'string' ) then - local now = os.date( format and format:sub( 1, 1 ) == '!' and '!*t' or '*t' ) - if format == '!*t' or format == '*t' then - return wrapDateTable( now ) - end - local cleanedFormat = format and format:gsub( '%%%%', '' ) - if not format or cleanedFormat:find( '%%[EO]?[crsSTX+]' ) then - php.setTTL( 1 ) -- second - elseif cleanedFormat:find( '%%[EO]?[MR]' ) then - php.setTTL( 60 - now.sec ) -- minute - elseif cleanedFormat:find( '%%[EO]?[HIkl]' ) then - php.setTTL( 3600 - now.min * 60 - now.sec ) -- hour - elseif cleanedFormat:find( '%%[EO]?[pP]' ) then - php.setTTL( 43200 - ( now.hour % 12 ) * 3600 - now.min * 60 - now.sec ) -- am/pm - else - -- It's not worth the complexity to figure out the exact TTL of larger units than days. - -- If they haven't used anything shorter than days, then just set the TTL to expire at - -- the end of today. - php.setTTL( 86400 - now.hour * 3600 - now.min * 60 - now.sec ) - end - end - return os.date( format, time ) -end - -local function ttlTime( t ) - if t == nil then - php.setTTL( 1 ) - end - return os.time( t ) -end - -local function newFrame( frameId, ... ) - if not php.frameExists( frameId ) then - return nil - end - - local frame = {} - local parentFrameIds = { ... } - local argCache = {} - local argNames - local args_mt = {} - - local function checkSelf( self, method ) - if self ~= frame then - error( "frame:" .. method .. ": invalid frame object. " .. - "Did you call " .. method .. " with a dot instead of a colon, i.e. " .. - "frame." .. method .. "() instead of frame:" .. method .. "()?", - 3 ) - end - end - - -- Getter for args - local function getExpandedArgument( dummy, name ) - name = tostring( name ) - if argCache[name] == nil then - local arg = php.getExpandedArgument( frameId, name ) - if arg == nil then - argCache[name] = false - else - argCache[name] = arg - end - end - if argCache[name] == false then - return nil - else - return argCache[name] - end - end - - args_mt.__index = getExpandedArgument - - -- pairs handler for args - args_mt.__pairs = function () - if not argNames then - local arguments = php.getAllExpandedArguments( frameId ) - argNames = {} - for name, value in pairs( arguments ) do - table.insert( argNames, name ) - argCache[name] = value - end - end - - local index = 0 - return function () - index = index + 1 - if argNames[index] then - return argNames[index], argCache[argNames[index]] - end - end - end - - -- ipairs 'next' function for args - local function argsInext( dummy, i ) - local value = getExpandedArgument( dummy, i + 1 ) - if value then - return i + 1, value - end - end - - args_mt.__ipairs = function () return argsInext, nil, 0 end - - frame.args = {} - setmetatable( frame.args, args_mt ) - - local function newCallbackParserValue( callback ) - local value = {} - local cache - - function value:expand() - if not cache then - cache = callback() - end - return cache - end - - return value - end - - function frame:getArgument( opt ) - checkSelf( self, 'getArgument' ) - - local name - if type( opt ) == 'table' then - name = opt.name - else - name = opt - end - - return newCallbackParserValue( - function () - return getExpandedArgument( nil, name ) - end - ) - end - - function frame:getParent() - checkSelf( self, 'getParent' ) - - return newFrame( unpack( parentFrameIds ) ) - end - - local function checkArgs( name, args ) - local ret = {} - for k, v in pairs( args ) do - local tp = type( k ) - if tp ~= 'string' and tp ~= 'number' then - error( name .. ": arg keys must be strings or numbers, " .. tp .. " given", 3 ) - end - tp = type( v ) - if tp == 'boolean' then - ret[k] = v and '1' or '' - elseif tp == 'string' or tp == 'number' then - ret[k] = tostring( v ) - else - error( name .. ": invalid type " .. tp .. " for arg '" .. k .. "'", 3 ) - end - end - return ret - end - - function frame:newChild( opt ) - checkSelf( self, 'newChild' ) - - if type( opt ) ~= 'table' then - error( "frame:newChild: the first parameter must be a table", 2 ) - end - - local title, args - if opt.title == nil then - title = false - else - title = tostring( opt.title ) - end - if opt.args == nil then - args = {} - elseif type( opt.args ) ~= 'table' then - error( "frame:newChild: args must be a table", 2 ) - else - args = checkArgs( 'frame:newChild', opt.args ) - end - - local newFrameId = php.newChildFrame( frameId, title, args ) - return newFrame( newFrameId, frameId, unpack( parentFrameIds ) ) - end - - function frame:expandTemplate( opt ) - checkSelf( self, 'expandTemplate' ) - - local title - - if type( opt ) ~= 'table' then - error( "frame:expandTemplate: the first parameter must be a table" ) - end - if opt.title == nil then - error( "frame:expandTemplate: a title is required" ) - else - if type( opt.title ) == 'table' and opt.title.namespace == 0 then - title = ':' .. tostring( opt.title ) - else - title = tostring( opt.title ) - end - end - local args - if opt.args == nil then - args = {} - elseif type( opt.args ) ~= 'table' then - error( "frame:expandTemplate: args must be a table" ) - else - args = checkArgs( 'frame:expandTemplate', opt.args ) - end - - return php.expandTemplate( frameId, title, args ) - end - - function frame:callParserFunction( name, args, ... ) - checkSelf( self, 'callParserFunction' ) - - if type( name ) == 'table' then - name, args = name.name, name.args - if type( args ) ~= 'table' then - args = { args } - end - elseif type( args ) ~= 'table' then - args = { args, ... } - end - - if name == nil then - error( "frame:callParserFunction: a function name is required", 2 ) - elseif type( name ) == 'string' or type( name ) == 'number' then - name = tostring( name ) - else - error( "frame:callParserFunction: function name must be a string or number", 2 ) - end - - args = checkArgs( 'frame:callParserFunction', args ) - - return php.callParserFunction( frameId, name, args ) - end - - function frame:extensionTag( name, content, args ) - checkSelf( self, 'extensionTag' ) - - if type( name ) == 'table' then - name, content, args = name.name, name.content, name.args - end - - if name == nil then - error( "frame:extensionTag: a function name is required", 2 ) - elseif type( name ) == 'string' or type( name ) == 'number' then - name = tostring( name ) - else - error( "frame:extensionTag: tag name must be a string or number", 2 ) - end - - if content == nil then - content = '' - elseif type( content ) == 'string' or type( content ) == 'number' then - content = tostring( content ) - else - error( "frame:extensionTag: content must be a string or number", 2 ) - end - - if args == nil then - args = { content } - elseif type( args ) == 'string' or type( args ) == 'number' then - args = { content, args } - elseif type( args ) == 'table' then - args = checkArgs( 'frame:extensionTag', args ) - table.insert( args, 1, content ) - else - error( "frame:extensionTag: args must be a string, number, or table", 2 ) - end - - return php.callParserFunction( frameId, '#tag:' .. name, args ) - end - - function frame:preprocess( opt ) - checkSelf( self, 'preprocess' ) - - local text - if type( opt ) == 'table' then - text = opt.text - else - text = opt - end - text = tostring( text ) - return php.preprocess( frameId, text ) - end - - function frame:newParserValue( opt ) - checkSelf( self, 'newParserValue' ) - - local text - if type( opt ) == 'table' then - text = opt.text - else - text = opt - end - - return newCallbackParserValue( - function () - return self:preprocess( text ) - end - ) - end - - function frame:newTemplateParserValue( opt ) - checkSelf( self, 'newTemplateParserValue' ) - - if type( opt ) ~= 'table' then - error( "frame:newTemplateParserValue: the first parameter must be a table" ) - end - if opt.title == nil then - error( "frame:newTemplateParserValue: a title is required" ) - end - return newCallbackParserValue( - function () - return self:expandTemplate( opt ) - end - ) - end - - function frame:getTitle() - checkSelf( self, 'getTitle' ) - return php.getFrameTitle( frameId ) - end - - -- For backwards compat - function frame:argumentPairs() - checkSelf( self, 'argumentPairs' ) - return pairs( self.args ) - end - - return frame -end - ---- Set up a cloned environment for execution of a module chunk, then execute --- the module in that environment. This is called by the host to implement --- {{#invoke}}. --- --- @param chunk The module chunk --- @param name The name of the function to be returned. Nil or false causes the entire export table to be returned --- @return boolean Whether the requested value was able to be returned --- @return table|function|string The requested value, or if that was unable to be returned, the type of the value returned by the module -function mw.executeModule( chunk, name ) - local env = mw.clone( _G ) - makePackageModule( env ) - - -- These are unsafe - env.mw.makeProtectedEnvFuncs = nil - env.mw.executeModule = nil - if name ~= false then -- console sets name to false when evaluating its code and nil when evaluating a module's - env.mw.getLogBuffer = nil - env.mw.clearLogBuffer = nil - end - - if allowEnvFuncs then - env.setfenv, env.getfenv = mw.makeProtectedEnvFuncs( {[_G] = true}, {} ) - else - env.setfenv = nil - env.getfenv = nil - end - - env.os.date = ttlDate - env.os.time = ttlTime - - setfenv( chunk, env ) - - local oldFrame = currentFrame - if not currentFrame then - currentFrame = newFrame( 'current', 'parent' ) - end - local res = chunk() - currentFrame = oldFrame - - if not name then -- catch console whether it's evaluating its own code or a module's - return true, res - end - if type(res) ~= 'table' then - return false, type(res) - end - return true, res[name] -end - -function mw.executeFunction( chunk ) - local frame = newFrame( 'current', 'parent' ) - local oldFrame = currentFrame - - if executeFunctionDepth == 0 then - -- math.random is defined as using C's rand(), and C's rand() uses 1 as - -- a seed if not explicitly seeded. So reseed with 1 for each top-level - -- #invoke to avoid people passing state via the RNG. - math.randomseed( 1 ) - end - executeFunctionDepth = executeFunctionDepth + 1 - - currentFrame = frame - local results = { chunk( frame ) } - currentFrame = oldFrame - - local stringResults = {} - for i, result in ipairs( results ) do - stringResults[i] = tostring( result ) - end - - executeFunctionDepth = executeFunctionDepth - 1 - - return table.concat( stringResults ) -end - -function mw.allToString( ... ) - local t = { ... } - for i = 1, select( '#', ... ) do - t[i] = tostring( t[i] ) - end - return table.concat( t, '\t' ) -end - -function mw.log( ... ) - logBuffer = logBuffer .. mw.allToString( ... ) .. '\n' -end - -function mw.dumpObject( object ) - local doneTable = {} - local doneObj = {} - local ct = {} - local function sorter( a, b ) - local ta, tb = type( a ), type( b ) - if ta ~= tb then - return ta < tb - end - if ta == 'string' or ta == 'number' then - return a < b - end - if ta == 'boolean' then - return tostring( a ) < tostring( b ) - end - return false -- Incomparable - end - local function _dumpObject( object, indent, expandTable ) - local tp = type( object ) - if tp == 'number' or tp == 'nil' or tp == 'boolean' then - return tostring( object ) - elseif tp == 'string' then - return string.format( "%q", object ) - elseif tp == 'table' then - if not doneObj[object] then - local s = tostring( object ) - if s == 'table' then - ct[tp] = ( ct[tp] or 0 ) + 1 - doneObj[object] = 'table#' .. ct[tp] - else - doneObj[object] = s - doneTable[object] = true - end - end - if doneTable[object] or not expandTable then - return doneObj[object] - end - doneTable[object] = true - - local ret = { doneObj[object], ' {\n' } - local mt = getmetatable( object ) - if mt then - ret[#ret + 1] = string.rep( " ", indent + 2 ) - ret[#ret + 1] = 'metatable = ' - ret[#ret + 1] = _dumpObject( mt, indent + 2, false ) - ret[#ret + 1] = "\n" - end - - local doneKeys = {} - for key, value in ipairs( object ) do - doneKeys[key] = true - ret[#ret + 1] = string.rep( " ", indent + 2 ) - ret[#ret + 1] = _dumpObject( value, indent + 2, true ) - ret[#ret + 1] = ',\n' - end - local keys = {} - for key in pairs( object ) do - if not doneKeys[key] then - keys[#keys + 1] = key - end - end - table.sort( keys, sorter ) - for i = 1, #keys do - local key = keys[i] - ret[#ret + 1] = string.rep( " ", indent + 2 ) - ret[#ret + 1] = '[' - ret[#ret + 1] = _dumpObject( key, indent + 3, false ) - ret[#ret + 1] = '] = ' - ret[#ret + 1] = _dumpObject( object[key], indent + 2, true ) - ret[#ret + 1] = ",\n" - end - ret[#ret + 1] = string.rep( " ", indent ) - ret[#ret + 1] = '}' - return table.concat( ret ) - else - if not doneObj[object] then - ct[tp] = ( ct[tp] or 0 ) + 1 - doneObj[object] = tostring( object ) .. '#' .. ct[tp] - end - return doneObj[object] - end - end - return _dumpObject( object, 0, true ) -end - -function mw.logObject( object, prefix ) - if prefix and prefix ~= '' then - logBuffer = logBuffer .. prefix .. ' = ' - end - logBuffer = logBuffer .. mw.dumpObject( object ) .. '\n' -end - -function mw.clearLogBuffer() - logBuffer = '' -end - -function mw.getLogBuffer() - return logBuffer -end - -function mw.getCurrentFrame() - if not currentFrame then - currentFrame = newFrame( 'current', 'parent' ) - end - return currentFrame -end - -function mw.isSubsting() - return php.isSubsting() -end - -function mw.incrementExpensiveFunctionCount() - php.incrementExpensiveFunctionCount() -end - -function mw.addWarning( text ) - php.addWarning( text ) -end - ---- --- Wrapper for mw.loadData. This creates the read-only dummy table for --- accessing the real data. --- --- @param data table Data to access --- @param seen table|nil Table of already-seen tables. --- @return table -local function dataWrapper( data, seen ) - local t = {} - seen = seen or { [data] = t } - - local function pairsfunc( s, k ) - k = next( data, k ) - if k ~= nil then - return k, t[k] - end - return nil - end - - local function ipairsfunc( s, i ) - i = i + 1 - if data[i] ~= nil then - return i, t[i] - end - return -- no nil to match default ipairs() - end - - local mt = { - mw_loadData = true, - __index = function ( tt, k ) - assert( t == tt ) - local v = data[k] - if type( v ) == 'table' then - seen[v] = seen[v] or dataWrapper( v, seen ) - return seen[v] - end - return v - end, - __newindex = function ( t, k, v ) - error( "table from mw.loadData is read-only", 2 ) - end, - __pairs = function ( tt ) - assert( t == tt ) - return pairsfunc, t, nil - end, - __ipairs = function ( tt ) - assert( t == tt ) - return ipairsfunc, t, 0 - end, - } - -- This is just to make setmetatable() fail - mt.__metatable = mt - - return setmetatable( t, mt ) -end - ---- --- Validator for mw.loadData. This scans through the data looking for things --- that are not supported, e.g. functions (which may be closures). --- --- @param d table Data to access. --- @param seen table|nil Table of already-seen tables. --- @return string|nil Error message, if any -local function validateData( d, seen ) - seen = seen or {} - local tp = type( d ) - if tp == 'nil' or tp == 'boolean' or tp == 'number' or tp == 'string' then - return nil - elseif tp == 'table' then - if seen[d] then - return nil - end - seen[d] = true - if getmetatable( d ) ~= nil then - return "data for mw.loadData contains a table with a metatable" - end - for k, v in pairs( d ) do - if type( k ) == 'table' then - return "data for mw.loadData contains a table as a key" - end - local err = validateData( k, seen ) or validateData( v, seen ) - if err then - return err - end - end - return nil - else - return "data for mw.loadData contains unsupported data type '" .. tp .. "'" - end -end - -function mw.loadData( module ) - local data = loadedData[module] - if type( data ) == 'string' then - -- No point in re-validating - error( data, 2 ) - end - if not data then - -- Don't allow accessing the current frame's info (bug 65687) - local oldFrame = currentFrame - currentFrame = newFrame( 'empty' ) - - -- The point of this is to load big data, so don't save it in package.loaded - -- where it will have to be copied for all future modules. - local l = package.loaded[module] - local _ - - _, data = mw.executeModule( function() return require( module ) end ) - - package.loaded[module] = l - currentFrame = oldFrame - - -- Validate data - local err - if type( data ) == 'table' then - err = validateData( data ) - else - err = module .. ' returned ' .. type( data ) .. ', table expected' - end - if err then - loadedData[module] = err - error( err, 2 ) - end - loadedData[module] = data - end - - return dataWrapper( data ) -end - -return mw diff --git a/wikt/mw.site.lua b/wikt/mw.site.lua deleted file mode 100755 index 09943a8..0000000 --- a/wikt/mw.site.lua +++ /dev/null @@ -1,86 +0,0 @@ -local site = {} - -function site.setupInterface( info ) - -- Boilerplate - site.setupInterface = nil - local php = mw_interface - mw_interface = nil - - site.siteName = info.siteName - site.server = info.server - site.scriptPath = info.scriptPath - site.stylePath = info.stylePath - site.currentVersion = info.currentVersion - site.stats = info.stats - site.stats.pagesInCategory = php.pagesInCategory - site.stats.pagesInNamespace = php.pagesInNamespace - site.stats.usersInGroup = php.usersInGroup - site.interwikiMap = php.interwikiMap - - -- Process namespace list into more useful tables - site.namespaces = {} - local namespacesByName = {} - site.subjectNamespaces = {} - site.talkNamespaces = {} - site.contentNamespaces = {} - for ns, data in pairs( info.namespaces ) do - data.subject = info.namespaces[data.subject] - data.talk = info.namespaces[data.talk] - data.associated = info.namespaces[data.associated] - - site.namespaces[ns] = data - - namespacesByName[data.name] = data - if data.canonicalName then - namespacesByName[data.canonicalName] = data - end - for i = 1, #data.aliases do - namespacesByName[data.aliases[i]] = data - end - - if data.isSubject then - site.subjectNamespaces[ns] = data - end - if data.isTalk then - site.talkNamespaces[ns] = data - end - if data.isContent then - site.contentNamespaces[ns] = data - end - end - - -- Set __index for namespacesByName to handle names-with-underscores - -- and non-standard case - local getNsIndex = php.getNsIndex - setmetatable( namespacesByName, { - __index = function ( t, k ) - if type( k ) == 'string' then - -- Try with fixed underscores - k = string.gsub( k, '_', ' ' ) - if rawget( t, k ) then - return rawget( t, k ) - end - - -- Ask PHP, because names are case-insensitive - local ns = getNsIndex( k ) - if ns then - rawset( t, k, site.namespaces[ns] ) - end - end - return rawget( t, k ) - end - } ) - - -- Set namespacesByName as the lookup table for site.namespaces, so - -- something like site.namespaces.Wikipedia works without having - -- pairs( site.namespaces ) iterate all those names. - setmetatable( site.namespaces, { __index = namespacesByName } ) - - -- Register this library in the "mw" global - mw = mw or {} - mw.site = site - - package.loaded['mw.site'] = site -end - -return site diff --git a/wikt/script-utilities-data.lua b/wikt/script-utilities-data.lua deleted file mode 100644 index be69104..0000000 --- a/wikt/script-utilities-data.lua +++ /dev/null @@ -1,61 +0,0 @@ -local data = {} - -data.translit = { - ["term"] = { ---[=[ can't be done until Kana transliterations are correctly parsed by [[Module:links]] - ["tag"] = "i", - ]=] - ["classes"] = "mention-tr", - }, - ["usex"] = { - ["tag"] = "i", - ["classes"] = "e-transliteration", - }, - ["head"] = { - ["classes"] = "headword-tr", - ["dir"] = "ltr", - }, - ["default"] = {}, -} - -data.transcription = { - ["head"] = { - ["tag"] = "span", - ["classes"] = "headword-ts", - ["dir"] = "ltr", - }, - ["default"] = {}, -} - -for key, value in pairs(data.translit) do - if not value.tag then - value.tag = "span" - end -end - -data.faces = { - ["term"] = { - tag = "i", - class = "mention", - }, - ["head"] = { - tag = "strong", - class = "headword", - }, - ["hypothetical"] = { - prefix = '*', - tag = "i", - class = "hypothetical", - }, - ["bold"] = { - tag = "b", - }, - ["translation"] = { - tag = "span", - }, - ["nil"] = { - tag = "span", - }, -} - -return data \ No newline at end of file diff --git a/wikt/script-utilities.lua b/wikt/script-utilities.lua deleted file mode 100644 index 38a41ce..0000000 --- a/wikt/script-utilities.lua +++ /dev/null @@ -1,369 +0,0 @@ -local export = {} - ---[=[ - Modules used: - [[Module:script utilities/data]] - [[Module:scripts]] - [[Module:languages]] - [[Module:parameters]] - [[Module:utilities]] - [[Module:debug]] -]=] - -function export.is_Latin_script(sc) - -- Latn, Latf, Latinx, nv-Latn, pjt-Latn - return sc:getCode():find("Lat") and true or false -end - --- Used by [[Template:lang]] -function export.lang_t(frame) - params = { - [1] = {}, - [2] = { allow_empty = true, default = "" }, - ["sc"] = {}, - ["face"] = {}, - ["class"] = {}, - } - - local args = require("Module:parameters").process(frame:getParent().args, params) - local NAMESPACE = mw.title.getCurrentTitle().nsText - - local lang = args[1] or (NAMESPACE == "Template" and "und") or error("Language code has not been specified. Please pass parameter 1 to the template.") - lang = require("Module:languages").getByCode(lang) or require("Module:languages").err(lang, 1) - - local text = args[2] - - local sc = args["sc"] - sc = (sc and (require("Module:scripts").getByCode(sc) or error("The script code \"" .. sc .. "\" is not valid.")) or nil) - - local face = args["face"] - - return export.tag_text(text, lang, sc, face, class) -end - --- Ustring turns on the codepoint-aware string matching. The basic string function --- should be used for simple sequences of characters, Ustring function for --- sets – []. -local function trackPattern(text, pattern, tracking, ustring) - local find = ustring and mw.ustring.find or string.find - if pattern and find(text, pattern) then - require("Module:debug").track("script/" .. tracking) - end -end - -local function track(text, lang, sc) - local U = mw.ustring.char - - if lang and text then - local langCode = lang:getCode() - - -- [[Special:WhatLinksHere/Template:tracking/script/ang/acute]] - if langCode == "ang" then - local decomposed = mw.ustring.toNFD(text) - local acute = U(0x301) - - trackPattern(decomposed, acute, "ang/acute") - - --[=[ - [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-phi]] - [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-theta]] - [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-kappa]] - [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-rho]] - ϑ, ϰ, ϱ, ϕ should generally be replaced with θ, κ, ρ, φ. - ]=] - elseif langCode == "el" or langCode == "grc" then - trackPattern(text, "ϑ", "Greek/wrong-theta") - trackPattern(text, "ϰ", "Greek/wrong-kappa") - trackPattern(text, "ϱ", "Greek/wrong-rho") - trackPattern(text, "ϕ", "Greek/wrong-phi") - - --[=[ - [[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-coronis]] - [[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-smooth-breathing]] - [[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/wrong-apostrophe]] - When spacing coronis and spacing smooth breathing are used as apostrophes, - they should be replaced with right single quotation marks (’). - ]=] - if langCode == "grc" then - trackPattern(text, U(0x1FBD), "Ancient Greek/spacing-coronis") - trackPattern(text, U(0x1FBF), "Ancient Greek/spacing-smooth-breathing") - trackPattern(text, "[" .. U(0x1FBD) .. U(0x1FBF) .. "]", "Ancient Greek/wrong-apostrophe", true) - end - - -- [[Special:WhatLinksHere/Template:tracking/script/Russian/grave-accent]] - elseif langCode == "ru" then - local decomposed = mw.ustring.toNFD(text) - - trackPattern(decomposed, U(0x300), "Russian/grave-accent") - - -- [[Special:WhatLinksHere/Template:tracking/script/Tibetan/trailing-punctuation]] - elseif langCode == "bo" then - trackPattern(text, "[་།]$", "Tibetan/trailing-punctuation", true) - trackPattern(text, "[་།]%]%]$", "Tibetan/trailing-punctuation", true) - - --[=[ - [[Special:WhatLinksHere/Template:tracking/script/Thai/broken-ae]] - [[Special:WhatLinksHere/Template:tracking/script/Thai/broken-am]] - [[Special:WhatLinksHere/Template:tracking/script/Thai/wrong-rue-lue]] - ]=] - elseif langCode == "th" then - trackPattern(text, "เ".."เ", "Thai/broken-ae") - trackPattern(text, "ํ[่้๊๋]?า", "Thai/broken-am", true) - trackPattern(text, "[ฤฦ]า", "Thai/wrong-rue-lue", true) - - --[=[ - [[Special:WhatLinksHere/Template:tracking/script/Lao/broken-ae]] - [[Special:WhatLinksHere/Template:tracking/script/Lao/broken-am]] - ]=] - elseif langCode == "lo" then - trackPattern(text, "ເ".."ເ", "Lao/broken-ae") - trackPattern(text, "ໍ[່້໊໋]?າ", "Lao/broken-am", true) - end - end -end - --- Wrap text in the appropriate HTML tags with language and script class. -function export.tag_text(text, lang, sc, face, class, id) - if not sc then - sc = require("Module:scripts").findBestScript(text, lang) - end - - track(text, lang, sc) - - -- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom. - if sc and sc:getDirection() == "down" then - --[[ First, escape targets of wikilinks and HTML tags, - which should not have their spaces - replaced with line breaks. ]] - local escaped = {} - local i = 0 - local escape_format = "$%d" - - local function escape(text, pattern, prefix) - return text:gsub( - pattern, - function(item) - i = i + 1 - escaped[i] = item - return (prefix or "") .. escape_format:format(i) - end) - end - - text = escape(text, "%[%[([^|]+|)", "[[") - text = escape(text, "<[^>]+>") - - text = text:gsub(" +", "
") - - -- Unescape whatever was escaped. - text = text:gsub( - "$(%d)", - function(a) - a = tonumber(a) - return escaped[a] - end - ) - end - - if sc:getCode() == "Imag" then - face = nil - end - - local function class_attr(classes) - table.insert(classes, 1, sc:getCode()) - if class and class ~= '' then - table.insert(classes, class) - end - return 'class="' .. table.concat(classes, ' ') .. '"' - end - - local function tag_attr(...) - local output = {} - if id then - table.insert(output, 'id="' .. require("Module:utilities").make_id(lang, id) .. '"') - end - - table.insert(output, class_attr({...}) ) - - if lang then - table.insert(output, 'lang="' .. lang:getCode() .. '"') - end - - return table.concat(output, " ") - end - - if face == "hypothetical" then - -- [[Special:WhatLinksHere/Template:tracking/script-utilities/face/hypothetical]] - require("Module:debug").track("script-utilities/face/hypothetical") - end - - local data = mw.loadData("Module:script utilities/data").faces[face or "nil"] - - if sc:getDirection() == "rtl" then - text = mw.ustring.gsub(text, "%p$", "%0‎") - end - - local post = "" - if face == "translation" and sc:getDirection() == "rtl" then - post = "‎" - end - - -- Add a script wrapper - if data then - return ( data.prefix or "" ) .. '<' .. data.tag .. ' ' .. tag_attr(data.class) .. '>' .. text .. '' .. post - else - error('Invalid script face "' .. face .. '".') - end -end - -function export.tag_translit(translit, lang, kind, attributes) - if type(lang) == "table" then - lang = lang.getCode and lang:getCode() - or error("Third argument to tag_translit should be a language code or language object.") - end - - local data = mw.loadData("Module:script utilities/data").translit[kind or "default"] - - local opening_tag = {} - - table.insert(opening_tag, data.tag) - if lang == "ja" then - table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'tr"') - else - table.insert(opening_tag, 'lang="' .. lang .. '-Latn"') - table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'tr Latn"') - end - - if data.dir then - table.insert(opening_tag, 'dir="' .. data.dir .. '"') - end - - table.insert(opening_tag, attributes) - - return "<" .. table.concat(opening_tag, " ") .. ">" .. translit .. "" -end - -function export.tag_transcription(transcription, lang, kind, attributes) - if type(lang) == "table" then - lang = lang.getCode and lang:getCode() - or error("Third argument to tag_translit should be a language code or language object.") - end - - local data = mw.loadData("Module:script utilities/data").transcription[kind or "default"] - - local opening_tag = {} - - table.insert(opening_tag, data.tag) - if lang == "ja" then - table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts"') - else - table.insert(opening_tag, 'lang="' .. lang .. '-Latn"') - table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts Latn"') - end - - if data.dir then - table.insert(opening_tag, 'dir="' .. data.dir .. '"') - end - - table.insert(opening_tag, attributes) - - return "<" .. table.concat(opening_tag, " ") .. ">" .. transcription .. "" -end - --- Add a notice to request the native script of a word -function export.request_script(lang, sc) - local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.') - - -- By default, request for "native" script - local cat_script = "native" - local disp_script = "script" - - -- If the script was not specified, and the language has only one script, use that. - if not sc and #scripts == 1 then - sc = scripts[1] - end - - -- Is the script known? - if sc then - -- If the script is Latin, return nothing. - if export.is_Latin_script(sc) then - return "" - end - - if sc:getCode() ~= scripts[1]:getCode() then - disp_script = sc:getCanonicalName() - end - - -- The category needs to be specific to script only if there is chance - -- of ambiguity. This occurs when lang=und, or when the language has - -- multiple scripts. - if lang:getCode() == "und" or scripts[2] then - cat_script = sc:getCanonicalName() - end - else - -- The script is not known. - -- Does the language have at least one non-Latin script in its list? - local has_nonlatin = false - - for i, val in ipairs(scripts) do - if not export.is_Latin_script(val) then - has_nonlatin = true - break - end - end - - -- If there are non-Latin scripts, return nothing. - if not has_nonlatin then - return "" - end - end - - local category = "" - - if mw.title.getCurrentTitle().nsText ~= "Template" then - category = "[[Category:" .. lang:getCanonicalName() .. " terms needing " .. cat_script .. " script]]" - end - - return "[" .. disp_script .. " needed]" .. category -end - -function export.template_rfscript(frame) - local args = frame.args - local lang = args[1] or error("The first parameter (language code) has not been given") - local sc = args["sc"]; if sc == "" then sc = nil end - lang = require("Module:languages").getByCode(lang) or error("The language code \"" .. lang .. "\" is not valid.") - sc = (sc and (require("Module:scripts").getByCode(sc) or error("The script code \"" .. sc .. "\" is not valid.")) or nil) - - local ret = export.request_script(lang, sc) - - if ret == "" then - error("This language is written in the Latin alphabet. It does not need a native script.") - else - return ret - end -end - -function export.checkScript(text, scriptCode, result) - local scriptObject = require("Module:scripts").getByCode(scriptCode) - - if not scriptObject then - error('The script code "' .. scriptCode .. '" is not recognized.') - end - - local originalText = text - - -- Remove non-letter characters. - text = mw.ustring.gsub(text, "[%A]", "") - - -- Remove all characters of the script in question. - text = mw.ustring.gsub(text, "[" .. scriptObject:getCharacters() .. "]", "") - - if text ~= "" then - if type(result) == "string" then - error(result) - else - error('The text "' .. originalText .. '" contains the letters "' .. text .. '" that do not belong to the ' .. scriptObject:getCategoryName() .. '.', 2) - end - end -end - -return export \ No newline at end of file diff --git a/wikt/scripts.lua b/wikt/scripts.lua deleted file mode 100644 index d5c41a6..0000000 --- a/wikt/scripts.lua +++ /dev/null @@ -1,185 +0,0 @@ -local export = {} -local Script = {} - -function Script:getCode() - return self._code -end - -function Script:getCanonicalName() - return self._rawData.canonicalName -end - -function Script:getOtherNames() - return self._rawData.otherNames or {} -end - -function Script:getParent() - return self._rawData.parent -end - -function Script:getSystems() - if not self._systemObjects then - local m_systems = require("Module:writing systems") - self._systemObjects = {} - - for _, sys in ipairs(self._rawData.systems or {}) do - table.insert(self._systemObjects, m_systems.getByCode(sys)) - end - end - - return self._systemObjects -end - ---function Script:getAllNames() --- return self._rawData.names ---end - - -function Script:getType() - return "script" -end - - -function Script:getCategoryName() - local name = self._rawData.canonicalName - - -- If the name already has "code" or "semaphore" in it, don't add it. - -- No names contain "script". - if name:find("[Cc]ode$") or name:find("[Ss]emaphore$") then - return name - else - return name .. " script" - end -end - - -function Script:getWikipediaArticle() - return self._rawData.wikipedia_article or self:getCategoryName() -end - - -function Script:getCharacters() - if self._rawData.characters then - return self._rawData.characters - else - return nil - end -end - - -function Script:countCharacters(text) - if not self._rawData.characters then - return 0 - else - local _, num = mw.ustring.gsub(text, "[" .. self._rawData.characters .. "]", "") - return num - end -end - -function Script:getDirection() - local direction = self._rawData.direction - if not direction then - return nil - else - return direction - end -end - - -function Script:getRawData() - return self._rawData -end - - -function Script:toJSON() - local ret = { - canonicalName = self:getCanonicalName(), - categoryName = self:getCategoryName(), - code = self._code, - otherNames = self:getOtherNames(), - type = self:getType(), - } - - return require("Module:JSON").toJSON(ret) -end - - -Script.__index = Script - - -function export.makeObject(code, data) - return data and setmetatable({ _rawData = data, _code = code }, Script) or nil -end - - -function export.getByCode(code) - if code == "IPAchar" then - require("Module:debug").track("IPAchar") - end - return export.makeObject(code, mw.loadData("Module:scripts/data")[code]) -end - -function export.getByCanonicalName(name) - local code = mw.loadData("Module:scripts/by name")[name] - - if not code then - return nil - end - - return export.makeObject(code, mw.loadData("Module:scripts/data")[code]) -end - --- Find the best script to use, based on the characters of a string. -function export.findBestScript(text, lang) - if not text or not lang or not lang.getScripts then - return export.getByCode("None") - end - - local scripts = lang:getScripts() - - if not scripts[2] then - return scripts[1] - end - - --[=[ - Remove any HTML entities; catfix function in [[Module:utilities]] - adds tagging to a no-break space ( ), which contains Latin characters; - hence Latin was returned as the script if "Latn" is one of the language's scripts. - ]=] - text = string.gsub(text, "&[a-zA-Z0-9]+;", "") - - -- Try to match every script against the text, - -- and return the one with the most matching characters. - local bestcount = 0 - local bestscript = nil - - -- Get length of text minus any spacing or punctuation characters. - -- Counting instances of UTF-8 character pattern is faster than mw.ustring.len. - local _, length = string.gsub(mw.ustring.gsub(text, "[%s%p]+", ""), "[\1-\127\194-\244][\128-\191]*", "") - - if length == 0 then - return export.getByCode("None") - end - - for i, script in ipairs(scripts) do - local count = script:countCharacters(text) - - if count >= length then - return script - end - - if count > bestcount then - bestcount = count - bestscript = script - end - end - - if bestscript then - return bestscript - end - - -- No matching script was found. Return "None". - return export.getByCode("None") -end - -return export \ No newline at end of file diff --git a/wikt/scripts/data.lua b/wikt/scripts/data.lua deleted file mode 100644 index 8584485..0000000 --- a/wikt/scripts/data.lua +++ /dev/null @@ -1,1187 +0,0 @@ -local u = mw.ustring.char -local m = {} - -m["Adlm"] = { - canonicalName = "Adlam", - characters = "𞤀-𞥟", - direction = "rtl", -} - -m["Afak"] = { - canonicalName = "Afaka", -} - -m["Aghb"] = { - canonicalName = "Caucasian Albanian", - characters = "𐔰-𐕣𐕯", -} - -m["Ahom"] = { - canonicalName = "Ahom", - characters = "𑜀-𑜿", - systems = {"abugida"}, -} - -m["Arab"] = { - canonicalName = "Arabic", - otherNames = {"Jawi", "Nastaliq", "Nastaleeq"}, - characters = "؀-ۿݐ-ݿࢠ-ࣿﭐ-﷽ﹰ-ﻼ", - direction = "rtl", - systems = {"abjad"}, -- more precisely, impure abjad -} - -m["fa-Arab"] = { - canonicalName = "Arabic", - otherNames = {"Perso-Arabic"}, - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - -m["kk-Arab"] = { - canonicalName = "Arabic", - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - -m["ks-Arab"] = { - canonicalName = "Arabic", - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - -m["ku-Arab"] = { - canonicalName = "Arabic", - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - -m["ms-Arab"] = { - canonicalName = "Arabic", - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - -m["mzn-Arab"] = { - canonicalName = "Arabic", - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - -m["ota-Arab"] = { - canonicalName = "Arabic", - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - -m["pa-Arab"] = { - canonicalName = "Arabic", - otherNames = {"Shahmukhi"}, - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - -m["ps-Arab"] = { - canonicalName = "Arabic", - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - -m["sd-Arab"] = { - canonicalName = "Arabic", - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - -m["tt-Arab"] = { - canonicalName = "Arabic", - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - -m["ug-Arab"] = { - canonicalName = "Arabic", - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - -m["ur-Arab"] = { - canonicalName = "Arabic", - characters = m["Arab"].characters, - direction = "rtl", - parent = "Arab", -} - --- Aran (Nastaliq) is subsumed into Arab - -m["Armi"] = { - canonicalName = "Imperial Aramaic", - characters = "𐡀-𐡟", - direction = "rtl", - systems = {"abjad"}, -} - -m["Armn"] = { - canonicalName = "Armenian", - characters = "Ա-֏ﬓ-ﬗ", -} - -m["Avst"] = { - canonicalName = "Avestan", - characters = "𐬀-𐬿", - direction = "rtl", -} - -m["Bali"] = { - canonicalName = "Balinese", - characters = "ᬀ-᭼", - systems = {"abugida"}, -} - -m["Bamu"] = { - canonicalName = "Bamum", - characters = "ꚠ-꛷𖠀-𖨸", -} - -m["Bass"] = { - canonicalName = "Bassa", - otherNames = {"Bassa Vah", "Vah"}, - characters = "𖫐-𖫵", -} - -m["Batk"] = { - canonicalName = "Batak", - characters = "ᯀ-᯿", - systems = {"abugida"}, -} - -m["Beng"] = { - canonicalName = "Bengali", - otherNames = {"Bengali-Assamese", "Assamese", "Eastern Nagari"}, - characters = "ঀ-৾", - systems = {"abugida"}, -} - -m["Bhks"] = { - canonicalName = "Bhaiksuki", - characters = "𑰀-𑱬", - systems = {"abugida"}, -} - -m["Bopo"] = { - canonicalName = "Zhuyin", - otherNames = {"Zhuyin Fuhao", "Bopomofo"}, - characters = "ㄅ-ㄯㆠ-ㆺ", -} - -m["Brah"] = { - canonicalName = "Brahmi", - characters = "𑀀-𑁿", - systems = {"abugida"}, -} - -m["Brai"] = { - canonicalName = "Braille", - characters = "⠀-⣿", -} - -m["Bugi"] = { - canonicalName = "Buginese", - otherNames = {"Lontara"}, - characters = "ᨀ-᨟", - systems = {"abugida"}, -} - -m["Buhd"] = { - canonicalName = "Buhid", - characters = "ᝀ-ᝓ", - systems = {"abugida"}, -} - -m["Cakm"] = { - canonicalName = "Chakma", - characters = "𑄀-𑅆", - systems = {"abugida"}, -} - -m["Cans"] = { - canonicalName = "Canadian syllabics", - characters = "᐀-ᙿ", - systems = {"abugida"}, -} - -m["Cari"] = { - canonicalName = "Carian", - characters = "𐊠-𐋐", - systems = {"alphabet"}, -} - -m["Cham"] = { - canonicalName = "Cham", - characters = "ꨀ-꩟", - systems = {"abugida"}, -} - -m["Cher"] = { - canonicalName = "Cherokee", - characters = "Ꭰ-Ᏼꭰ-ꮿ", - systems = {"syllabary"}, -} - -m["Copt"] = { - canonicalName = "Coptic", - characters = "Ϣ-ϯⲀ-⳿𐋡-𐋻", -- this is mostly "Coptic", not unified "Greek and Coptic" - systems = {"alphabet"}, -} - -m["Cprt"] = { - canonicalName = "Cypriot", - characters = "𐠀-𐠿", - direction = "rtl", - systems = {"syllabary"}, -} - -m["Cyrl"] = { - canonicalName = "Cyrillic", - characters = "Ѐ-џѢѣѪѫѬѭѲѳѴѵҊ-ԧꚀ-ꚗ", - systems = {"alphabet"}, -} - -m["Cyrs"] = { - canonicalName = "Old Cyrillic", - otherNames = { "Early Cyrillic" }, - characters = "Ѐ-ԧꙀ-ꚗ", - wikipedia_article = "Early Cyrillic alphabet", - systems = {"alphabet"}, -} - -m["Deva"] = { - canonicalName = "Devanagari", - characters = "ऀ-ॿ꣠-ꣿ", - systems = {"abugida"}, -} - -m["Dogr"] = { - canonicalName = "Dogra", - characters = "𑠀-𑠻", - systems = {"abugida"}, -} - -m["Dsrt"] = { - canonicalName = "Deseret", - characters = "𐐀-𐑏", - systems = {"alphabet"}, -} - -m["Dupl"] = { - canonicalName = "Duployan", - characters = "𛰀-𛲟", -} - -m["Egyd"] = { - canonicalName = "Demotic", -} - -m["Egyp"] = { - canonicalName = "Egyptian hieroglyphic", - characters = "𓀀-𓐮", -} - -m["Elba"] = { - canonicalName = "Elbasan", - characters = "𐔀-𐔧", -} - -m["Ethi"] = { - canonicalName = "Ethiopic", - otherNames = {"Ge'ez"}, - characters = "ሀ-᎙ⶀ-ⷞꬁ-ꬮ", - systems = {"abugida"}, -} - -m["Geok"] = { - canonicalName = "Khutsuri", - otherNames = {"Nuskhuri", "Asomtavruli"}, - characters = "Ⴀ-Ⴭⴀ-ⴭ", -- Ⴀ-Ⴭ is Asomtavruli, ⴀ-ⴭ is Nuskhuri - systems = {"alphabet"}, -} - -m["Geor"] = { - canonicalName = "Georgian", - otherNames = {"Mkhedruli", "Mtavruli"}, - characters = "Ⴀ-ჿᲐ-Ჿ", -- technically only the range [ა-ჿ] is Mkhedruli - systems = {"alphabet"}, -} - -m["Glag"] = { - canonicalName = "Glagolitic", - characters = "Ⰰ-ⱞ𞀀-𞀪", - systems = {"alphabet"}, -} - -m["Gong"] = { - canonicalName = "Gunjala Gondi", - characters = "𑵠-𑶩", - systems = {"abugida"}, -} - -m["Gonm"] = { - canonicalName = "Masaram Gondi", - characters = "𑴀-𑵙", - systems = {"abugida"}, -} - -m["Goth"] = { - canonicalName = "Gothic", - characters = "𐌰-𐍊", - systems = {"alphabet"}, -} - -m["Gran"] = { - canonicalName = "Grantha", - characters = "𑌁-𑍴", - systems = {"abugida"}, -} - -m["Grek"] = { - canonicalName = "Greek", - characters = "Ͱ-ϡϰ-Ͽ", - systems = {"alphabet"}, -} - -m["polytonic"] = { - canonicalName = "Greek", - characters = "ἀ-῾" .. m["Grek"].characters, - parent = "Grek", - systems = {"alphabet"}, -} - -m["Gujr"] = { - canonicalName = "Gujarati", - characters = "ઁ-૱", - systems = {"abugida"}, -} - -m["Guru"] = { - canonicalName = "Gurmukhi", - characters = "ਁ-੶", - systems = {"abugida"}, -} - -m["Hang"] = { - canonicalName = "Hangul", - characters = "ᄀ-ᇿ가-힣ㄱ-ㆎ", - systems = {"syllabary"}, -} - -m["Hani"] = { - canonicalName = "Han", - otherNames = {"Hanzi", "Chu Nom"}, - characters = "一-鿿㐀-䶵𠀀-"..u(0x2EBE0).."﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧﨨﨩⺀-⿟㇀-㇣ -〿㍻-㍿", - systems = {"logography"}, -} - -m["Hans"] = { - canonicalName = "Simplified Han", - characters = m["Hani"].characters, - systems = {"logography"}, -} - -m["Hant"] = { - canonicalName = "Traditional Han", - characters = m["Hani"].characters, - systems = {"logography"}, -} - -m["Hatr"] = { - canonicalName = "Hatran", - characters = "𐣠-𐣿", - systems = {"abjad"}, -} - -m["Hira"] = { - canonicalName = "Hiragana", - otherNames = {"Hentaigana"}, - characters = "ぁ-ゟ𛀁-𛄞", - systems = {"syllabary"}, -} - -m["Hluw"] = { - canonicalName = "Anatolian Hieroglyphs", - characters = "𔐀-𔙆", - wikipedia_article = "Anatolian hieroglyphs", -} - -m["Hung"] = { - canonicalName = "Old Hungarian", - otherNames = {"Hungarian runic"}, - characters = "𐲀-𐲲", -} - -m["Kana"] = { - canonicalName = "Katakana", - characters = "゠-ヿㇰ-ㇿ𛀀㌀-㍗", - systems = {"syllabary"}, -} - --- These should be defined after the scripts they are composed of - -m["Jpan"] = { - canonicalName = "Japanese", - characters = m["Hira"].characters .. m["Kana"].characters .. m["Hani"].characters, - systems = {"syllabary", "logography"}, -} - -m["Kore"] = { - canonicalName = "Korean", - characters = m["Hang"].characters .. m["Hani"].characters .. "!-○", - systems = {"syllabary", "logography"}, -} - -m["CGK"] = { - canonicalName = "Korean", -} - -m["Hano"] = { - canonicalName = "Hanunoo", - characters = "ᜠ-᜴", - systems = {"abugida"}, -} - -m["Hebr"] = { - canonicalName = "Hebrew", - characters = u(0x0590) .. "-" .. u(0x05FF) .. u(0xFB1D) .. "-" .. u(0xFB4F), - direction = "rtl", - systems = {"abjad"}, -- more precisely, impure abjad -} - -m["Hmng"] = { - canonicalName = "Hmong", - otherNames = {"Pahawh Hmong"}, - characters = "𖬀-𖮏", -} - -m["Ibrn"] = { - canonicalName = "Iberian", -} - -m["Imag"] = { - -- To be used to avoid any formatting or link processing - canonicalName = "Image-rendered", - -- This should not have any characters listed - character_category = false, -} - -m["Inds"] = { - canonicalName = "Indus", - otherNames = {"Harappan", "Indus Valley"}, -} - -m["IPAchar"] = { - canonicalName = "International Phonetic Alphabet", -} - -m["Ital"] = { - canonicalName = "Old Italic", - characters = "𐌀-𐌣", - systems = {"alphabet"}, -} - -m["Java"] = { - canonicalName = "Javanese", - characters = "ꦀ-꧟", - systems = {"abugida"}, -} - -m["Jurc"] = { - canonicalName = "Jurchen", -} - -m["Kali"] = { - canonicalName = "Kayah Li", - characters = "꤀-꤯", -} - -m["Khar"] = { - canonicalName = "Kharoshthi", - characters = "𐨀-𐩘", - systems = {"abugida"}, - direction = "rtl", -} - -m["Khmr"] = { - canonicalName = "Khmer", - characters = "ក-៹᧠-᧿", - systems = {"abugida"}, -} - -m["Khoj"] = { - canonicalName = "Khojki", - characters = "𑈀-𑈽", - systems = {"abugida"}, -} - -m["Kitl"] = { - canonicalName = "Khitan Large", -} - -m["Kits"] = { - canonicalName = "Khitan Small", -} - -m["Knda"] = { - canonicalName = "Kannada", - characters = "ಀ-ೲ", - systems = {"abugida"}, -} - -m["Kthi"] = { - canonicalName = "Kaithi", - characters = "𑂀-𑃍", - systems = {"abugida"}, -} - -m["Lana"] = { - canonicalName = "Tai Tham", - otherNames = {"Tham", "Tua Mueang", "Lanna"}, - characters = "ᨠ-᪭", - systems = {"abugida"}, -} - -m["Laoo"] = { - canonicalName = "Lao", - characters = "ກ-ໟ", - systems = {"abugida"}, -} - -m["Latn"] = { - canonicalName = "Latin", - otherNames = {"Roman", "Rumi", "Romaji", "Rōmaji"}, - characters = "A-Za-zÀ-ÖØ-öø-ɏḀ-ỿ", - systems = {"alphabet"}, -} - -m["Latf"] = { - canonicalName = "Fraktur", - otherNames = {"Blackletter"}, - characters = m["Latn"].characters, -} - -m["Latinx"] = { - canonicalName = "Latin", - characters = m["Latn"].characters .. "Ⱡ-Ɀ꜠-ꟿꬰ-ꭥ", - parent = "Latn", -} - -m["nv-Latn"] = { - canonicalName = "Latin", - characters = m["Latn"].characters, - parent = "Latn", -} - -m["pjt-Latn"] = { - canonicalName = "Latin", - characters = m["Latn"].characters, - parent = "Latn", -} - -m["Leke"] = { - canonicalName = "Leke", - systems = {"abugida"}, -} - -m["Lepc"] = { - canonicalName = "Lepcha", - characters = "ᰀ-ᱏ", - systems = {"abugida"}, -} - -m["Limb"] = { - canonicalName = "Limbu", - characters = "ᤀ-᥏", - systems = {"abugida"}, -} - -m["Lina"] = { - canonicalName = "Linear A", - characters = "𐘀-𐝧", -} - -m["Linb"] = { - canonicalName = "Linear B", - characters = "𐀀-𐃺", -} - -m["Lisu"] = { - canonicalName = "Lisu", - otherNames = {"Fraser"}, - characters = "ꓐ-꓿", - systems = {"alphabet"}, -} - -m["Lyci"] = { - canonicalName = "Lycian", - characters = "𐊀-𐊜", - systems = {"alphabet"}, -} - -m["Lydi"] = { - canonicalName = "Lydian", - characters = "𐤠-𐤿", - systems = {"alphabet"}, -} - -m["Mahj"] = { - canonicalName = "Mahajani", - characters = "𑅐-𑅶", - systems = {"abugida"}, -} - -m["Maka"] = { - canonicalName = "Makasar", - characters = "𑻠-𑻸", - systems = {"abugida"}, -} - -m["Mand"] = { - canonicalName = "Mandaic", - otherNames = {"Mandaean"}, - characters = "ࡀ-࡞", - direction = "rtl", -} - -m["Mani"] = { - canonicalName = "Manichaean", - characters = "𐫀-𐫶", - direction = "rtl", - systems = {"abjad"}, -} - -m["Maya"] = { - canonicalName = "Maya", - otherNames = {"Maya hieroglyphic", "Mayan", "Mayan hieroglyphic"}, - characters = "𝋠-𝋳", -} - -m["Medf"] = { - canonicalName = "Medefaidrin", - otherNames = {"Oberi Okaime", "Oberi Ɔkaimɛ"}, - characters = "𖹀-𖺚", -} - -m["Mend"] = { - canonicalName = "Mende", - otherNames = {"Mende Kikakui"}, - characters = "𞠀-𞣖", - direction = "rtl", -} - -m["Merc"] = { - canonicalName = "Meroitic cursive", - characters = "𐦠-𐦿", - systems = {"abugida"}, -} - -m["Mero"] = { - canonicalName = "Meroitic hieroglyphic", - characters = "𐦀-𐦟", - systems = {"abugida"}, -} - -m["Mlym"] = { - canonicalName = "Malayalam", - characters = "ം-ൿ", - systems = {"abugida"}, -} - -m["Modi"] = { - canonicalName = "Modi", - characters = "𑘀-𑙙", - systems = {"abugida"}, -} - -m["Mong"] = { - canonicalName = "Mongolian", - characters = "᠀-ᢪ", - direction = "down", -} - -m["Morse"] = { - canonicalName = "Morse code", -} - -m["Mroo"] = { - canonicalName = "Mro", - characters = "𖩀-𖩯", -} - -m["Mtei"] = { - canonicalName = "Meitei Mayek", - characters = "ꯀ-꯿ꫠ-꫿", -} - -m["Mult"] = { - canonicalName = "Multani", - characters = "𑊀-𑊩", - systems = {"abugida"}, -} - -m["musical"] = { - canonicalName = "Musical notation", - characters = "𝄀-𝇝", - systems = {"pictography"}, -} - -m["Mymr"] = { - canonicalName = "Burmese", - otherNames = {"Myanmar"}, - characters = "က-႟ꩠ-ꩿꧠ-ꧾ", - systems = {"abugida"}, -} - -m["Narb"] = { - canonicalName = "Old North Arabian", - characters = "𐪀-𐪟", - systems = {"abjad"}, -} - -m["Nbat"] = { - canonicalName = "Nabataean", - otherNames = {"Nabatean"}, - characters = "𐢀-𐢯", - direction = "rtl", - systems = {"abjad"}, -} - -m["Newa"] = { - canonicalName = "Newa", - otherNames = {"Newar", "Newari", "Prachalit Nepal"}, -- and Ranjana? - characters = "𑐀-𑑞", - systems = {"abugida"}, -} - -m["Nkoo"] = { - canonicalName = "N'Ko", - characters = "߀-߿", - direction = "rtl", -} - -m["None"] = { - canonicalName = "Unspecified", -- renders as 'unspecified script' - -- This should not have any characters listed - character_category = false, -- none -} - -m["Nshu"] = { - canonicalName = "Nushu", - otherNames = {"Nüshu"}, - characters = "𖿡𛅰-𛋻", - systems = {"syllabary"}, -} - -m["Ogam"] = { - canonicalName = "Ogham", - characters = " -᚜", -} - -m["Olck"] = { - canonicalName = "Ol Chiki", - characters = "᱐-᱿", -} - -m["Orkh"] = { - canonicalName = "Orkhon runes", - characters = "𐰀-𐱈", - direction = "rtl", -} - -m["Orya"] = { - canonicalName = "Oriya", - otherNames = {"Odia"}, - characters = "ଁ-୷", -} - -m["Osge"] = { - canonicalName = "Osage", - characters = "𐒰-𐓻", -} - -m["Osma"] = { - canonicalName = "Osmanya", - characters = "𐒀-𐒩", -} - -m["Palm"] = { - canonicalName = "Palmyrene", - characters = "𐡠-𐡿", -} - -m["Pauc"] = { - canonicalName = "Pau Cin Hau", - characters = "𑫀-𑫸", -} - -m["Perm"] = { - canonicalName = "Old Permic", - characters = "𐍐-𐍺", -} - -m["Phag"] = { - canonicalName = "Phags-pa", - characters = "ꡀ-꡷", - systems = {"abugida"}, -} - -m["Marc"] = { - canonicalName = "Marchen", - characters = "𑱰-𑲶", - systems = {"abugida"}, -} - -m["Phli"] = { - canonicalName = "Inscriptional Pahlavi", - characters = "𐭠-𐭿", - direction = "rtl", - systems = {"abjad"}, -} - -m["Phlp"] = { - canonicalName = "Psalter Pahlavi", - characters = "𐮀-𐮯", - direction = "rtl", - systems = {"abjad"}, -} - -m["Phlv"] = { - canonicalName = "Book Pahlavi", - direction = "rtl", - systems = {"abjad"}, - -- Not in Unicode -} - -m["Phnx"] = { - canonicalName = "Phoenician", - characters = "𐤀-𐤟", - direction = "rtl", - systems = {"abjad"}, -} - -m["Plrd"] = { - canonicalName = "Pollard", - characters = "𖼀-𖾟", - systems = {"abugida"}, -} - -m["Prti"] = { - canonicalName = "Inscriptional Parthian", - characters = "𐭀-𐭟", - direction = "rtl", -} - -m["Rjng"] = { - canonicalName = "Rejang", - characters = "ꤰ-꥟", - systems = {"abugida"}, -} - -m["Rohg"] = { - canonicalName = "Hanifi Rohingya", - characters = "𐴀-𐴹", - direction = "rtl", - systems = {"alphabet"}, -} - -m["Ruminumerals"] = { - canonicalName = "Rumi numerals", - characters = "𐹠-𐹾", - character_category = "Rumi numerals", -} - -m["Runr"] = { - canonicalName = "Runic", - characters = "ᚠ-ᛰ", - systems = {"alphabet"}, -} - -m["Samr"] = { - canonicalName = "Samaritan", - characters = "ࠀ-࠾", - direction = "rtl", - systems = {"abjad"}, -} - -m["Sarb"] = { - canonicalName = "Old South Arabian", - characters = "𐩠-𐩿", - direction = "rtl", - systems = {"abjad"}, -} - -m["Saur"] = { - canonicalName = "Saurashtra", - characters = "ꢀ-꣙", - systems = {"abugida"}, -} - -m["Semap"] = { - canonicalName = "flag semaphore", - systems = {"pictography"}, -} - -m["Sgnw"] = { - canonicalName = "SignWriting", - characters = "𝠀-𝪯", - systems = {"pictography"}, -} - -m["Shaw"] = { - canonicalName = "Shavian", - characters = "𐑐-𐑿", -} - -m["Shrd"] = { - canonicalName = "Sharada", - characters = "𑆀-𑇙", - systems = {"abugida"}, -} - -m["Sidd"] = { - canonicalName = "Siddham", - characters = "𑖀-𑗝", - systems = {"abugida"}, -} - -m["Sind"] = { - canonicalName = "Khudawadi", - characters = "𑊰-𑋹", - systems = {"abugida"}, -} - -m["Sinh"] = { - canonicalName = "Sinhalese", - characters = "ං-෴", - systems = {"abugida"}, -} - -m["Sogd"] = { - canonicalName = "Sogdian", - characters = "𐼰-𐽙", - direction = "rtl", - systems = {"abjad"}, -} - -m["Sogo"] = { - canonicalName = "Old Sogdian", - characters = "𐼀-𐼧", - direction = "rtl", - systems = {"abjad"}, -} - -m["Sora"] = { - canonicalName = "Sorang Sompeng", - otherNames = {"Sora Sompeng"}, - characters = "𑃐-𑃹", -} - -m["Soyo"] = { - canonicalName = "Soyombo", - characters = "𑩐-𑪢", - systems = {"abugida"}, -} - -m["Sund"] = { - canonicalName = "Sundanese", - characters = "ᮀ-ᮿ", - systems = {"abugida"}, -} - -m["Sylo"] = { - canonicalName = "Syloti Nagri", - otherNames = {"Sylheti Nagari"}, - characters = "ꠀ-꠫", - systems = {"abugida"}, -} - -m["Syrc"] = { - canonicalName = "Syriac", - characters = "܀-ݏ"..u(0x0860).."-"..u(0x086A), - direction = "rtl", - systems = {"abjad"}, -- more precisely, impure abjad -} - --- Syre, Syrj, Syrn are apparently subsumed into Syrc; discuss if this causes issues - -m["Tagb"] = { - canonicalName = "Tagbanwa", - characters = "ᝠ-ᝳ", - systems = {"abugida"}, -} - -m["Takr"] = { - canonicalName = "Takri", - characters = "𑚀-𑛉", - systems = {"abugida"}, -} - -m["Tale"] = { - canonicalName = "Tai Nüa", - otherNames = {"Tai Nuea", "New Tai Nüa", "New Tai Nuea", "Dehong Dai", "Tai Dehong", "Tai Le"}, - characters = "ᥐ-ᥴ", - systems = {"abugida"}, -} - -m["Talu"] = { - canonicalName = "New Tai Lue", - characters = "ᦀ-᧟", - systems = {"abugida"}, -} - -m["Taml"] = { - canonicalName = "Tamil", - characters = "ஂ-௺", - systems = {"abugida"}, -} - -m["Tang"] = { - canonicalName = "Tangut", - characters = "𖿠𗀀-𘫲", - systems = {"logography", "syllabary"}, -} - -m["Tavt"] = { - canonicalName = "Tai Viet", - characters = "ꪀ-꫟", - systems = {"abugida"}, -} - -m["Telu"] = { - canonicalName = "Telugu", - characters = "ఀ-౿", - systems = {"abugida"}, -} - -m["Teng"] = { - canonicalName = "Tengwar", -} - -m["Tfng"] = { - canonicalName = "Tifinagh", - otherNames = {"Libyco-Berber", "Berber"}, - characters = "ⴰ-⵿", - systems = {"abjad", "alphabet"}, -} - -m["Tglg"] = { - canonicalName = "Baybayin", - otherNames = {"Tagalog"}, - characters = "ᜀ-᜔", - systems = {"abugida"}, -} - -m["Thaa"] = { - canonicalName = "Thaana", - characters = "ހ-ޱ", - systems = {"abugida"}, - direction = "rtl", -} - -m["Thai"] = { - canonicalName = "Thai", - characters = "ก-๛", - systems = {"abugida"}, -} - -m["Tibt"] = { - canonicalName = "Tibetan", - characters = "ༀ-࿚", - systems = {"abugida"}, -} - -m["Tirh"] = { - canonicalName = "Tirhuta", - characters = "𑒀-𑓙", - systems = {"abugida"}, -} - -m["xzh-Tibt"] = { - canonicalName = "Zhang-Zhung", - systems = {"abugida"}, -} - -m["Ugar"] = { - canonicalName = "Ugaritic", - characters = "𐎀-𐎟", - systems = {"abjad"}, -} - -m["Vaii"] = { - canonicalName = "Vai", - characters = "ꔀ-ꘫ", - systems = {"syllabary"}, -} - -m["Wara"] = { - canonicalName = "Varang Kshiti", - characters = "𑢠-𑣿", -} - -m["Xpeo"] = { - canonicalName = "Old Persian", - characters = "𐎠-𐏕", -} - -m["Xsux"] = { - canonicalName = "Cuneiform", - otherNames = {"Sumero-Akkadian Cuneiform"}, - characters = "𒀀-𒍮𒐀-𒑳", -} - -m["Yiii"] = { - canonicalName = "Yi", - characters = "ꀀ-꓆", - systems = {"syllabary"}, -} - -m["Zanb"] = { - canonicalName = "Zanabazar Square", - characters = u(0x11A00).."-"..u(0x11A47), -} - -m["Zmth"] = { - canonicalName = "mathematical notation", - characters = "ℵ∀-⋿⟀-⟯⦀-⧿⨀-⫿𝐀-𝟿", - character_category = "Mathematical notation symbols", -- ? -} - -m["Zsym"] = { - canonicalName = "symbol", - characters = "─-➿←-⇿⌀-⏿⬀-⯾🀀-🃵-🌀-🩭", - character_category = false, -- none - systems = {"pictography"}, -} - -m["Zyyy"] = { - canonicalName = "undetermined", - -- This should not have any characters listed, probably - character_category = false, -- none - characters = m["Latn"].characters, -} - -m["Zzzz"] = { - canonicalName = "uncoded", - -- This should not have any characters listed - character_category = false, -- none -} - -return m \ No newline at end of file diff --git a/wikt/translit/ab-translit.lua b/wikt/translit/ab-translit.lua deleted file mode 100644 index c22a861..0000000 --- a/wikt/translit/ab-translit.lua +++ /dev/null @@ -1,50 +0,0 @@ ---[[ -This module will transliterate Abkhaz language text per WT:AB TR. -Transliteration for Abkhaz. -]] -local export = {} - -local tt = { - ['А'] = 'Ā', ['а'] = 'ā', ['Б'] = 'B', ['б'] = 'b', ['В'] = 'V', ['в'] = 'v', ['Г'] = 'G', ['г'] = 'g', - ['Ӷ'] = 'Γ', ['ӷ'] = 'γ', ['Д'] = 'D', ['д'] = 'd', ['Џ'] = 'Ǯ', ['џ'] = 'ǯ', ['Е'] = 'E', ['е'] = 'e', - ['Ҽ'] = 'Č', ['ҽ'] = 'č', ['Ҿ'] = 'Č̣', ['ҿ'] = 'č̣', ['Ж'] = 'Ž', ['ж'] = 'ž', ['З'] = 'Z', ['з'] = 'z', - ['Ӡ'] = 'Ʒ', ['ӡ'] = 'ʒ', ['И'] = 'I', ['и'] = 'i', ['К'] = 'Ḳ', ['к'] = 'ḳ', ['Қ'] = 'K', ['қ'] = 'k', - ['Ҟ'] = 'Q̇', ['ҟ'] = 'q̇', ['Л'] = 'L', ['л'] = 'l', ['М'] = 'M', ['м'] = 'm', ['Н'] = 'N', ['н'] = 'n', - ['О'] = 'O', ['о'] = 'o', ['Ҩ'] = 'ʿ°', ['ҩ'] = 'ʿ°', ['П'] = 'Ṗ', ['п'] = 'ṗ', ['Ԥ'] = 'P', ['ԥ'] = 'p', - ['Р'] = 'R', ['р'] = 'r', ['С'] = 'S', ['с'] = 's', ['Т'] = 'Ṭ', ['т'] = 'ṭ', ['Ҭ'] = 'T', ['ҭ'] = 't', - ['У'] = 'U', ['у'] = 'u', ['Ф'] = 'F', ['ф'] = 'f', ['Х'] = 'X', ['х'] = 'x', ['Ҳ'] = 'Ḥ', ['ҳ'] = 'ḥ', - ['Ц'] = 'C', ['ц'] = 'c', ['Ҵ'] = 'C̣', ['ҵ'] = 'c̣', ['Ч'] = 'Č̍', ['ч'] = 'č̍', ['Ҷ'] = 'Č̣̍', ['ҷ'] = 'č̣̍', - ['Ш'] = 'Š', ['ш'] = 'š', ['Ы'] = 'Ə', ['ы'] = 'ə', ['Ь'] = '’', ['ь'] = '’', ['Ә'] = 'W', ['ә'] = 'w', ['́'] = '́', - -- obsolete letters, still in use - ['Ҕ'] = 'Γ', ['ҕ'] = 'γ', ['Ҧ'] = 'P', ['ҧ'] = 'p', - -- in borrowings from Russian and other languages - ['Ё'] = 'Ë', ['ё'] = 'ë', ['Й'] = 'J', ['й'] = 'j', ['Ъ'] = 'ʺ', ['ъ'] = 'ʺ', ['Э'] = 'È', ['э'] = 'è', - ['Ю'] = 'Ju', ['ю'] = 'ju', ['Я'] = 'Ja', ['я'] = 'ja', - - -- two letter mapping - ['дә'] = 'd°', ['Дә'] = 'D°', ['ҭә'] = 't°', ['Ҭә'] = 'T°', ['тә'] = 'ṭ°', ['Тә'] = 'Ṭ°', - ['ӡә'] = 'ʒ°', ['Ӡә'] = 'Ʒ°', ['цә'] = 'c°', ['Цә'] = 'C°', ['ҵә'] = 'c̣°', ['Ҵә'] = 'C̣°', - ['џь'] = 'ǯ̍', ['Џь'] = 'Ǯ̍', ['жь'] = 'ž̍', ['Жь'] = 'Ž̍', ['шь'] = 'š̍', ['Шь'] = 'Š̍', - ['жә'] = 'ž°', ['Жә'] = 'Ž°', ['шә'] = 'š°', ['Шә'] = 'Š°', ['гь'] = 'g̍', ['Гь'] = 'G̍', - ['қь'] = 'k̍', ['Қь'] = 'K̍', ['кь'] = 'ḳ̍', ['Кь'] = 'Ḳ̍', ['ҕь'] = 'γ̍', ['Ҕь'] = 'Γ̍', - ['ӷь'] = 'γ̍', ['Ӷь'] = 'Γ̍', ['хь'] = 'x̍', ['Хь'] = 'X̍', ['гу'] = 'g°', ['Гу'] = 'G°', - ['қу'] = 'k°', ['Қу'] = 'K°', ['ку'] = 'ḳ°', ['Ку'] = 'Ḳ°', ['ҕу'] = 'γ°', ['Ҕу'] = 'Γ°', - ['ӷу'] = 'γ°', ['Ӷу'] = 'Γ°', ['ху'] = 'x°', ['Ху'] = 'X°', ['ҟь'] = 'q̇̍', ['Ҟь'] = 'Q̇̍', - ['ҟу'] = 'q̇°', ['Ҟу'] = 'Q̇°̍', ['ҳу'] = 'h°̍', ['Ҳу'] = 'H°', - ['гә'] = 'g°', ['Гә'] = 'G°', ['қә'] = 'k°', ['Қә'] = 'K°', ['кә'] = 'ḳ°', ['Кә'] = 'Ḳ°', - ['ҕә'] = 'γ°', ['Ҕә'] = 'Γ°', ['ӷә'] = 'γ°', ['Ӷә'] = 'Γ°', ['хә'] = 'x°', ['Хә'] = 'X°', - ['ҟә'] = 'q̇°', ['Ҟә'] = 'Q̇°̍', ['ҳә'] = 'h°̍', ['Ҳә'] = 'H°' -}; - -function export.tr(text, lang, sc) - -- If the script is given as Geor, then forward the transliteration to that module - if sc == "Geor" then - return require("Module:Geor-translit").tr(text, lang, sc) - end - text = mw.ustring.gsub(text, '[гГӷӶҕҔдДжЖӡӠкКқҚҟҞтТҭҬҵҴхХҳҲцЦџЏшШ].', tt) - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/abq-translit.lua b/wikt/translit/abq-translit.lua deleted file mode 100644 index 9bec8a0..0000000 --- a/wikt/translit/abq-translit.lua +++ /dev/null @@ -1,123 +0,0 @@ --- This module will transliterate Abaza language text per WT:ABQ TR. -local export = {} - -local tt = { - ["а"]="ā", ["б"]="b", ["в"]="v", ["г"]="g", ["д"]="d", ["е"]="e", ["ё"]="ë", ["ж"]="j", ["з"]="z", - ["и"]="i", ["й"]="j", ["к"]="k", ["л"]="l", ["м"]="m", ["н"]="n", ["о"]="o", - ["п"]="p", ["р"]="r", ["с"]="s", ["т"]="t", ["у"]="u", ["ф"]="f", ["х"]="x", ["ц"]="c", ["ч"]="č̍", - ["ш"]="š", ["щ"]="š̍", ["ъ"]="ʾ", ["ы"]="ə", ["ь"]="’", ["э"]="è", - ["ю"]="ju", ["я"]="ja", ["А"]="Ā", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="E", ["Ё"]="Ë", ["Ж"]="J", ["З"]="Z", - ["И"]="I", ["Й"]="J", ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["О"]="O", - ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="U", ["Ф"]="F", ["Х"]="X", ["Ц"]="C", ["Ч"]="Č̍", - ["Ш"]="Š", ["Щ"]="Š̍", ["Ъ"]="ʾ", ["Ы"]="Ə", ["Ь"]="’", ["Э"]="È", - ["Ю"]="Ju", ["Я"]="Ja"}; - -local trigraphs = { - ['Хъв'] = 'Q°', - ['Гӏв'] = 'ʿ°', - ['Гъв'] = 'Γ°', - ['Гъь'] = 'Γ̍', - ['Джв'] = 'Ǯ°', - ['Джь'] = 'Ǯ̍', - ['Къв'] = 'Q̇°', - ['Къь'] = 'Q̇̍', - ['Кӏв'] = 'Ḳ°', - ['Кӏь'] = 'Ḳ̍', - ['Хӏв'] = 'H°', - ['Чӏв'] = 'Č̣°', - ['хъв'] = 'q°', - ['гӏв'] = 'ʿ°', - ['гъв'] = 'γ°', - ['гъь'] = 'γ̍', - ['джв'] = 'ǯ°', - ['джь'] = 'ǯ̍', - ['къв'] = 'q̇°', - ['къь'] = 'q̇̍', - ['кӏв'] = 'ḳ°', - ['кӏь'] = 'ḳ̍', - ['хӏв'] = 'h°', - ['чӏв'] = 'č̣°', -} - -local digraphs = { - ['гв'] = 'g°', - ['гъ'] = 'γ', - ['гь'] = 'g̍', - ['гӏ'] = 'ʿ', - ['дж'] = 'ǯ', - ['дз'] = 'ʒ', - ['жв'] = 'ž°', - ['жь'] = 'ž̍', - ['кв'] = 'k°', - ['къ'] = 'q̇', - ['кь'] = 'k̍', - ['кӏ'] = 'ḳ', - ['тл'] = 'ł', - ['тш'] = 'č', - ['тӏ'] = 'ṭ', - ['пӏ'] = 'ṗ', - ['хв'] = 'x°', - ['хь'] = 'x̍', - ['хӏ'] = 'ḥ', - ['цӏ'] = 'c̣', - ['чв'] = 'č°', - ['чӏ'] = 'č̣̍', - ['шӏ'] = 'č̣', - ['шв'] = 'š°', - ['ль'] = 'l', - ['лӏ'] = 'ḷ', - ['хъ'] = 'q', - ['фӏ'] = 'f̣', - ['Гв'] = 'G°', - ['Гъ'] = 'Γ', - ['Гь'] = 'G̍', - ['Гӏ'] = 'ʿ', - ['Дж'] = 'Ǯ', - ['Дз'] = 'Ʒ', - ['Жв'] = 'Ž°', - ['Жь'] = 'Ž̍', - ['Кв'] = 'K°', - ['Къ'] = 'Q̇', - ['Кь'] = 'K̍', - ['Кӏ'] = 'Ḳ', - ['Тл'] = 'Ł', - ['Тш'] = 'Č', - ['Тӏ'] = 'Ṭ', - ['Пӏ'] = 'Ṗ', - ['Хв'] = 'X°', - ['Хь'] = 'X̍', - ['Хӏ'] = 'Ḥ', - ['Цӏ'] = 'C̣', - ['Чв'] = 'Č°', - ['Чӏ'] = 'Č̣̍', - ['Шӏ'] = 'Č̣', - ['Шв'] = 'Š°', - ['Ль'] = 'L', - ['Лӏ'] = 'Ḷ', - ['Хъ'] = 'Q', - ['Фӏ'] = 'F̣', -} - -function export.tr(text, lang, sc) - local str_gsub = string.gsub - local UTF8char = '[\1-\127\194-\244][\128-\191]*' - - -- Convert uppercase palochka to lowercase. Lowercase is found in tables - -- above. - text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) - - -- These two can use the basic string function. - for digraph, replacement in pairs(digraphs) do - text = str_gsub(text, digraph, replacement) - end - - for trigraph, replacement in pairs(trigraphs) do - text = str_gsub(text, trigraph, replacement) - end - - text = str_gsub(text, UTF8char, tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/ady-translit.lua b/wikt/translit/ady-translit.lua deleted file mode 100644 index db58034..0000000 --- a/wikt/translit/ady-translit.lua +++ /dev/null @@ -1,170 +0,0 @@ --- This module will transliterate Adyghe language text per WT:ADY TR. - --- Text Expected Actual --- Passed ахъчэзэблэхъущ āχčăzăblăχ°š̍ āχčăzăblăχ°š̍ --- Passed ахъчэжъгъэй āχčăẑġăj āχčăẑġăj --- Passed бгъотагъэ bġotāġă bġotāġă --- Passed бгъунджырыкӏу bġ°nǯ̍ərəḳ° bġ°nǯ̍ərəḳ° --- Passed бгъунджырыкӀу bġ°nǯ̍ərəḳ° bġ°nǯ̍ərəḳ° --- Passed бгъэхэӏушъхь bġăxăʾ°ŝḥ bġăxăʾ°ŝḥ --- Passed бгъэхэӀушъхь bġăxăʾ°ŝḥ bġăxăʾ°ŝḥ --- Passed былъытырыкӏу bəłətərəḳ° bəłətərəḳ° --- Passed былъытырыкӀу bəłətərəḳ° bəłətərəḳ° --- Passed гъогурыгъуазэ ġog°rəġ°āză ġog°rəġ°āză --- Passed жъогъо шӏӏэныгъэлӏ ẑoġo ṣ̂ʾănəġăḷ ẑoġo ṣ̂ʾănəġăḷ --- Passed жъогъо шӀӀэныгъэлӀ ẑoġo ṣ̂ʾănəġăḷ ẑoġo ṣ̂ʾănəġăḷ --- Passed жьыкъыдэгъэкӏын ž̍əqədăġăč̣̍ən ž̍əqədăġăč̣̍ən --- Passed жьыкъыдэгъэкӀын ž̍əqədăġăč̣̍ən ž̍əqədăġăč̣̍ən --- Passed къэкӏорэтхьэмафэ qăč̣̍orătḥămāfă qăč̣̍orătḥămāfă --- Passed къэкӀорэтхьэмафэ qăč̣̍orătḥămāfă qăč̣̍orătḥămāfă --- local function add_uppercase_palochka(examples) --- local out = {} --- local i = 0 --- local lowercase_palochka, uppercase_palochka = mw.ustring.char(0x4CF), mw.ustring.char(0x4C0) --- for _, example in ipairs(examples) do --- i = i + 1 --- out[i] = example --- if example[1]:find(lowercase_palochka) then --- i = i + 1 --- out[i] = { example[1]:gsub(lowercase_palochka, uppercase_palochka), example[2] } --- end --- end --- return out --- end - --- return require("Module:transliteration module testcases")( --- require("Module:ady-translit").tr, --- add_uppercase_palochka{ --- { "ахъчэзэблэхъущ", "āχčăzăblăχ°š̍" }, --- { "ахъчэжъгъэй", "āχčăẑġăj" }, --- { "бгъотагъэ", "bġotāġă" }, --- { "бгъунджырыкӏу", "bġ°nǯ̍ərəḳ°" }, --- { "бгъэхэӏушъхь", "bġăxăʾ°ŝḥ" }, --- { "былъытырыкӏу", "bəłətərəḳ°" }, --- { "гъогурыгъуазэ", "ġog°rəġ°āză" }, --- { "жъогъо шӏӏэныгъэлӏ", "ẑoġo ṣ̂ʾănəġăḷ" }, --- { "жьыкъыдэгъэкӏын", "ž̍əqədăġăč̣̍ən" }, --- { "къэкӏорэтхьэмафэ", "qăč̣̍orătḥămāfă" }, --- --[[ --- { "", "" }, --- --]] --- }, --- "Cyrl", "ady" --- ) - -local export = {} - -local tt = { - ["а"]="ā", ["б"]="b", ["в"]="v", ["г"]="ɣ", ["д"]="d", ["е"]="e", ["ё"]="ë", ["ж"]="ž", - ["з"]="z", ["и"]="i", ["й"]="j", ["к"]="k", ["л"]="l", ["м"]="m", ["н"]="n", ["о"]="o", - ["п"]="p", ["р"]="r", ["с"]="s", ["т"]="t", ["у"]="w", ["ф"]="f", ["х"]="x", ["ц"]="c", - ["ч"]="č", ["ш"]="š", ["щ"]="š̍", ["ъ"]="”", ["ы"]="ə", ["ь"]="’", ["э"]="ă", ["ю"]="ju", - ["я"]="jā", ["ӏ"]="ʾ", ["А"]="Ā", ["Б"]="B", ["В"]="V", ["Г"]="Ɣ", ["Д"]="D", ["Е"]="E", - ["Ё"]="Ë", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["Й"]="J", ["К"]="K", ["Л"]="L", ["М"]="M", - ["Н"]="N", ["О"]="O", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="W", ["Ф"]="F", - ["Х"]="X", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Š̍", ["Ъ"]="”", ["Ы"]="Ə", ["Ь"]="’", - ["Э"]="Ă", ["Ю"]="Ju", ["Я"]="Jā", ["ӏ"]="ʾ"}; - -local triQuadrigraphs = { - ['кхъу'] = 'q°', - ['Кхъу'] = 'Q°', - ['гъу'] = 'ġ°', - ['дзу'] = 'ʒ°', - ['жъу'] = 'ẑ°', - ['Кхъ'] = 'Q', - ['кхъ'] = 'q', - ['къу'] = 'q°', - ['кӏу'] = 'ḳ°', - ['пӏу'] = 'ṗ°', - ['тӏу'] = 'ṭ°', - ['хъу'] = 'χ°', - ['шъу'] = 'ŝ°', - ['шӏу'] = 'ṣ̂°', - ['гу'] = 'g°', - ['Гъу'] = 'Ġ°', - ['Дзу'] = 'Ʒ°', - ['Жъу'] = 'Ẑ°', - ['Къу'] = 'Q°', - ['Кӏу'] = 'Ḳ°', - ['Пӏу'] = 'Ṗ°', - ['Тӏу'] = 'Ṭ°', - ['Хъу'] = 'Χ°', - ['Шъу'] = 'Ŝ°', - ['Шӏу'] = 'Ṣ̂°', -} - -local digraphs = { - ['гь'] = 'ɡ’', - ['гъ'] = 'ġ', - ['дж'] = 'ǯ̍', - ['дз'] = 'ʒ', - ['жъ'] = 'ẑ', - ['жь'] = 'ž̍', - ['ку'] = 'k°', - ['къ'] = 'q', - ['кӏ'] = 'č̣̍', - ['лъ'] = 'ł', - ['лӏ'] = 'ḷ', - ['пӏ'] = 'ṗ', - ['сӏ'] = 'ṣ̣', - ['тӏ'] = 'ṭ', - ['фӏ'] = 'f̣', - ['ху'] = 'x°', - ['хъ'] = 'χ', - ['хь'] = 'ḥ', - ['цу'] = 'c°', - ['цӏ'] = 'c̣', - ['чӏ'] = 'č̣', - ['чу'] = 'č̍°', - ['чъ'] = 'č', - ['шъ'] = 'ŝ', - ['шӏ'] = 'ṣ̂', - ['ӏу'] = 'ʾ°', - ['ӏь'] = '՚̍', - ['Гу'] = 'G°', - ['Гь'] = 'ɡ’', - ['Гъ'] = 'Ġ', - ['Дж'] = 'Ǯ̍', - ['Дз'] = 'Ʒ', - ['Жъ'] = 'Ẑ', - ['Жь'] = 'Ž̍', - ['Ку'] = 'K°', - ['Къ'] = 'Q', - ['Кӏ'] = 'Č̣̍', - ['Лъ'] = 'Ł', - ['Лӏ'] = 'Ḷ', - ['Пӏ'] = 'Ṗ', - ['Сӏ'] = 'Ṣ̣', - ['Тӏ'] = 'Ṭ', - ['Фӏ'] = 'F̣', - ['Ху'] = 'X°', - ['Хъ'] = 'Χ', - ['Хь'] = 'Ḥ', - ['Цу'] = 'C°', - ['Цӏ'] = 'C̣', - ['Чӏ'] = 'Č̣', - ['Чу'] = 'Č̍°', - ['Чъ'] = 'Č', - ['Шъ'] = 'Ŝ', - ['Шӏ'] = 'Ṣ̂', -} - -function export.tr(text, lang, sc) - local str_gsub = string.gsub - local UTF8char = '[%z\1-\127\194-\244][\128-\191]*' - - -- Convert capital to lowercase palochka. Lowercase is found in tables - -- above. - text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) - - for comb, repl in pairs(triQuadrigraphs) do - text = str_gsub(text, comb, repl) - end - for comb, repl in pairs(digraphs) do - text = str_gsub(text, comb, repl) - end - - return (str_gsub(text, UTF8char, tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/ahom-translit.lua b/wikt/translit/ahom-translit.lua deleted file mode 100644 index 9ad1bbe..0000000 --- a/wikt/translit/ahom-translit.lua +++ /dev/null @@ -1,90 +0,0 @@ ---This module will transliterate text in the Ahom script. It is used to transliterate Ahom (aho). - -local export = {} -local gsub = mw.ustring.gsub -local u = mw.ustring.char -local con_cls = "(["..u(0x11700).."-"..u(0x11719).."]["..u(0x1171D)..u(0x1171E)..u(0x1171F).."]?)" - -local tt = { - -- consonants - [u(0x11700)] = "k", [u(0x11701)] = "kh", [u(0x11702)] = "ng", [u(0x11703)] = "n", [u(0x11704)] = "t", [u(0x11705)] = "t", - [u(0x11706)] = "p", [u(0x11707)] = "ph", [u(0x11708)] = "b", [u(0x11709)] = "m", [u(0x1170A)] = "y", [u(0x1170B)] = "ch", - [u(0x1170C)] = "th", [u(0x1170D)] = "r", [u(0x1170E)] = "l", [u(0x1170F)] = "s", [u(0x11710)] = "ny", [u(0x11711)] = "h", - [u(0x11712)] = "’", [u(0x11713)] = "d", [u(0x11714)] = "dh", [u(0x11715)] = "g", [u(0x11716)] = "g", [u(0x11717)] = "gh", - [u(0x11718)] = "bh", [u(0x11719)] = "jh", - -- medials - [u(0x1171D)] = "l", [u(0x1171E)] = "r", [u(0x1171F)] = "r", - -- vowels (excluding composition) - [u(0x11720)] = "a", [u(0x11721)] = "aa", [u(0x11722)] = "i", [u(0x11723)] = "ii", - [u(0x11724)] = "u", [u(0x11725)] = "uu", [u(0x11727)] = "w", [u(0x11729)] = "y", - [u(0x11726)] = "e", [u(0x11728)] = "o", - [u(0x1172A)] = "m", [u(0x1172B)] = "", - -- numerals - [u(0x11730)] = "0", [u(0x11731)] = "1", [u(0x11732)] = "2", [u(0x11733)] = "3", [u(0x11734)] = "4", - [u(0x11735)] = "5", [u(0x11736)] = "6", [u(0x11737)] = "7", [u(0x11738)] = "8", [u(0x11739)] = "9", - [u(0x1173A)] = "[10]", [u(0x1173B)] = "[20]", - -- punctuations and symbols - [u(0x1173C)] = ",", [u(0x1173D)] = ".", [u(0x1173E)] = "@", [u(0x1173F)] = "vi", - -- zero-width space (display it if it hides in a word) - [u(0x200B)] = "‼", -} - -local adjust0 = { - -- vowels (composition) - [u(0x11722)..u(0x11724)] = "ue", - [u(0x11726)..u(0x11721)] = "oo", - [u(0x11726)..u(0x11727)] = "ee", - [u(0x11729)..u(0x11724)] = "aay", -} - -function export.tr(text, lang, sc, debug_mode) - - if type(text) == "table" then -- called directly from a template - text = text.args[1] - end - - text = gsub(text, u(0x11708).."("..u(0x1172B)..")", "w%1") -- final -b becomes -w - text = gsub(text, con_cls.."(["..u(0x11700).."-"..u(0x11719).."w]"..")"..u(0x1172B), "%1a%2") - text = gsub(text, con_cls.."(["..u(0x11727)..u(0x11729)..u(0x1172A).."])", "%1a%2") - - for k, v in pairs(adjust0) do - text = gsub(text, con_cls..k, "%1"..v) - end - - text = gsub(text, ".", tt) - - return text - -end - -return export - - --- Text Expected Actual --- Passed 𑜁𑜨𑜧 khow khow --- Passed 𑜁𑜦𑜡 khoo khoo --- local tests = require("Module:UnitTests") --- local transliterate = require("Module:Ahom-translit").tr - --- local function tag(text) --- return '' .. text .. '' --- end - --- function tests:check(example, expected) --- return self:equals( --- tag(example), --- expected, --- transliterate(example) --- ) --- end - --- function tests:test_translit() --- local examples = { --- { "𑜁𑜨𑜧", "khow" }, --- { "𑜁𑜦𑜡", "khoo" }, --- } - --- tests:iterate(examples, "check") --- end - --- return tests diff --git a/wikt/translit/ain-translit.lua b/wikt/translit/ain-translit.lua deleted file mode 100644 index 6ed005b..0000000 --- a/wikt/translit/ain-translit.lua +++ /dev/null @@ -1,1020 +0,0 @@ ---[[ This module is experimental. -The details of its operation have not yet been fully decided upon. Do not deploy widely until the module is finished. -This module will transliterate Ainu language text. -]] -local export = {} -local gmatch = mw.ustring.gmatch -local find = mw.ustring.find -local gsub = mw.ustring.gsub - -local corresp = { - -- main - ["ア"] = "¤a", ["イ"] = "¤i", ["ウ"] = "¤u", ["エ"] = "¤e", ["オ"] = "¤o", - ["カ"] = "ka", ["キ"] = "ki", ["ク"] = "ku", ["ケ"] = "ke", ["コ"] = "ko", - ["シャ"] = "sa", ["シ"] = "si", ["シュ"] = "su", ["シェ"] = "se", ["ショ"] = "so", - ["タ"] = "ta", ["チ"] = "ci", ["ト゚"] = "tu", ["テ"] = "te", ["ト"] = "to", - ["チャ"] = "ca", ["ツ"] = "cu", ["セ゚"] = "ce", ["チョ"] = "co", - ["ナ"] = "na", ["ニ"] = "ni", ["ヌ"] = "nu", ["ネ"] = "ne", ["ノ"] = "no", - ["ハ"] = "ha", ["ヒ"] = "hi", ["フ"] = "hu", ["ヘ"] = "he", ["ホ"] = "ho", - ["バ"] = "ba", ["ビ"] = "bi", ["ブ"] = "bu", ["ベ"] = "be", ["ボ"] = "bo", - ["パ"] = "pa", ["ピ"] = "pi", ["プ"] = "pu", ["ペ"] = "pe", ["ポ"] = "po", - ["マ"] = "ma", ["ミ"] = "mi", ["ム"] = "mu", ["メ"] = "me", ["モ"] = "mo", - ["ヤ"] = "ya", ["ユ"] = "yu", ["イェ"] = "ye", ["ヨ"] = "yo", - ["ラ"] = "ra", ["リ"] = "ri", ["ル"] = "ru", ["レ"] = "re", ["ロ"] = "ro", - ["ワ"] = "wa", ["ウィ"] = "wi", ["ウェ"] = "we", ["ウォ"] = "wo", - - -- finals - ["ㇵ"] = "h¤", ["ㇶ"] = "h¤", ["ㇷ"] = "h¤", ["ㇸ"] = "h¤", ["ㇹ"] = "h¤", - ["ㇻ"] = "r¤", ["ㇼ"] = "r¤", ["ㇽ"] = "r¤", ["ㇾ"] = "r¤", ["ㇿ"] = "r¤", - ["ㇰ"] = "k¤", - ["ㇱ"] = "s¤", - ["ㇳ"] = "t¤", - ["ㇴ"] = "n¤", - ["ㇺ"] = "m¤", - ["ㇷ゚"] = "p¤", - - -- misc - ["ィ"] = "y¤", ["ゥ"] = "w¤", - ["ー"] = "̄", - ["・"] = "=", - - -- alt spellings? - ["サ"] = "sa", ["ス"] = "su", ["セ"] = "se", ["ソ"] = "so", - ["ツ゚"] = "tu", - ["チュ"] = "cu", ["チェ"] = "ce", - ["ヰ"] = "wi", ["ヱ"] = "we", ["ヲ"] = "wo", - - ["ㇲ"] = "s¤", - ["ッ"] = "x¤", - ["ン"] = "n¤", - - ["トゥ"] = "tu", -} - -function export.tr(text, lang, sc) - local result = {} - for string in gmatch(text, '.[ィゥェォャュョ゚]?') do - if corresp[string] then -- try to convert character sequences - string = corresp[string] - else - local str_result = {} - for char in gmatch(string, '.') do -- try again over every individual character - table.insert(str_result, corresp[char] or char) - end - string = table.concat(str_result) - end - table.insert(result, string) - end - text = table.concat(result) - text = mw.ustring.toNFC(text) - - if find(text, 'x¤[kbp]') then -- 'ッ' - text = gsub(text, 'x¤([kbp])', '%1¤%1') - else - text = gsub(text, 'x¤', 't¤') - end - - text = gsub(text, 'n¤([mbp])', 'n-%1') - - text = gsub(text, '([aiueo])¤i', '%1y') -- change 'アイ'→'ai', 'エイ'→'ei', etc. to 'ay', 'ey' - text = gsub(text, '([aiueo])¤u', '%1w') -- change 'アウ'→'au', 'エウ'→'eu', etc. to 'aw', 'ew' - text = gsub(text, '¤', '') - - return text -end - -return export - --- Text Expected Actual --- Passed アイヌ・イタㇰ aynu=itak aynu=itak --- Passed カィ ; クィ ; コィ ; カゥ ; キゥ ; ケゥ ; コゥ ; ケィ kay ; kuy ; koy ; kaw ; kiw ; kew ; kow ; key kay ; kuy ; koy ; kaw ; kiw ; kew ; kow ; key --- Passed カー ; キー ; クー ; ケー ; コー kā ; kī ; kū ; kē ; kō kā ; kī ; kū ; kē ; kō --- Passed アィヌ モシㇼ aynu mosir aynu mosir --- Failed チ カㇻ アィヌ イタㇰ ウィキペンチア カンピソ アナㇰネ ci=kar aynu itak wikipencia kanpiso anakne ci kar aynu itak wikipencia kan-piso anakne --- Passed ペッ ; ペㇳ pet ; pet pet ; pet --- Failed オッタ orta otta --- Failed アッペ akpe appe --- Passed イワン ; イワㇴ iwan ; iwan iwan ; iwan --- Passed ケㇺ kem kem --- Passed ケゥ kew kew --- Passed チュㇷ゚ケㇱ cupkes cupkes --- Passed ポンペ pon-pe pon-pe --- Passed タンモシㇼ tan-mosir tan-mosir --- Passed レプンクㇽ ; レプㇴクㇽ repunkur ; repunkur repunkur ; repunkur --- Passed エトゥ ピㇼカ etu pirka etu pirka --- Passed オンネㇷ゚ onnep onnep --- Passed ケマ フレ kema hure kema hure --- Passed コマイ komay komay --- Passed カンカイ kankay kankay --- Passed スサㇺ susam susam --- Passed トゥナカイ tunakay tunakay --- Passed ノンノ nonno nonno --- Passed ハㇱカㇷ゚ haskap haskap --- Passed ポㇰ pok pok --- Passed セイ sey sey --- Passed ラッコ rakko rakko --- Passed ルイペ ruype ruype --- Passed チャペ cape cape --- Passed アペ ape ape --- Passed カㇻ kar kar --- Passed キナ kina kina --- Passed ラㇻ rar rar --- Passed トゥスㇱケ tususke tususke --- Passed ア- a- a- --- Passed アィアィ ayay ayay --- Passed アィヌ aynu aynu --- Failed アィヌモシㇼ aynu-mosir aynumosir --- Passed アイ ay ay --- Passed アイヌ aynu aynu --- Failed アイヌイタㇰ aynu itak aynuytak --- Failed アイル airu ayru --- Passed アエㇷ゚ aep aep --- Passed アカㇺ akam akam --- Passed アキ aki aki --- Passed アシペケッ asipeket asipeket --- Passed アシㇰネ asikne asikne --- Passed アチポ acipo acipo --- Failed アチャ acha aca --- Passed アチャポ acapo acapo --- Failed アッツシ attush atcusi --- Passed アットゥㇱ attus attus --- Passed アトゥ atu atu --- Passed アパ apa apa --- Passed アフン ahun ahun --- Passed アフㇷ゚ ahup ahup --- Failed アプトアㇱ aptoash aputoas --- Failed アプトアㇱパ aptoashpa aputoaspa --- Passed アベ abe abe --- Passed アベチクニ abecikuni abecikuni --- Failed アベバシュイ abebashui abebasuy --- Passed アペ ape ape --- Passed アママ amama amama --- Passed アマㇺ amam amam --- Passed アミ ami ami --- Passed アミヒ amihi amihi --- Passed アムシペ amusipe amusipe --- Passed アムㇱペ amuspe amuspe --- Passed アリㇷ゚ arip arip --- Passed アン an an --- Passed アンケㇱ ankes ankes --- Passed アンチカㇻ ancikar ancikar --- Passed アントゥキ antuki antuki --- Passed アㇰ ak ak --- Failed アㇷ゚カシ apkash apkasi --- Passed アㇷ゚カㇱ apkas apkas --- Passed アㇷ゚ト apto apto --- Passed アㇺ am am --- Passed アㇺアㇺ amam amam --- Passed アㇻワン arwan arwan --- Passed イサ チセ isa cise isa cise --- Passed イサㇺ isam isam --- Passed イソ iso iso --- Passed イタコ itako itako --- Passed イタㇰ itak itak --- Passed イタㇵ itah itah --- Passed イナゥ inaw inaw --- Passed イナオ inao inao --- Passed イネ ine ine --- Passed イワ iwa iwa --- Passed イワン iwan iwan --- Failed ウォセカムィ wose-kamuy wosekamuy --- Passed ウタラ utara utara --- Passed ウタリ utari utari --- Passed ウタレ utare utare --- Passed ウナㇻペ unarpe unarpe --- Passed ウパㇱ upas upas --- Failed ウンマ umma un-ma --- Passed エアニ eani eani --- Passed エカㇱ ekas ekas --- Passed エサマン esaman esaman --- Failed エツ゚ ’etu etu --- Passed エトゥ etu etu --- Passed エトㇽ etor etor --- Passed エムシ emusi emusi --- Failed エムシイ emusi emusiy --- Passed オウペカ owpeka owpeka --- Passed オッカヨ okkayo okkayo --- Passed オナ ona ona --- Passed カパㇷ゚ kapap kapap --- Passed カムィ kamuy kamuy --- Failed カムィチェㇷ゚ kamuy-cep kamuycep --- Failed カムィフㇺベ kamuy-humbe kamuyhumbe --- Failed カムィモシㇼ kamuy-mosir kamuymosir --- Failed カムイモシリ kamuimosiri kamuymosiri --- Passed カㇺビ kambi kambi --- Failed カㇺビソㇱ kambisosh kambisos --- Passed キキㇼ kikir kikir --- Passed キサㇻ kisar kisar --- Failed キム kim kimu --- Passed クアニ kuani kuani --- Passed クㇷ゚ kup kup --- Failed ケス kes kesu --- Failed ケム kem kemu --- Passed ケモリッ kemorit kemorit --- Passed ケラ kera kera --- Passed コタン kotan kotan --- Passed コッ kot kot --- Failed コンボ kombo kon-bo --- Passed コㇿ kor kor --- Passed サパ sapa sapa --- Passed サポ sapo sapo --- Passed シサㇺ sisam sisam --- Passed シネ sine sine --- Passed シネペサン sinepesan sinepesan --- Failed シュマリ shumari sumari --- Passed シㇰ sik sik --- Passed シㇼ sir sir --- Passed スス susu susu --- Passed セ゚ ce ce --- Passed セタ seta seta --- Passed ソ so so --- Passed チイェネ ciyene ciyene --- Passed チイェヘ ciyehe ciyehe --- Passed チェㇷ゚ cep cep --- Passed チセ cise cise --- Failed チャシ chasi casi --- Passed チャペ cape cape --- Passed チュㇷ゚ cup cup --- Passed チㇱ cis cis --- Passed ツ゚ tu tu --- Passed ツ゚ペサン tupesan tupesan --- Failed ツ゚ンプ tunpu tun-pu --- Passed テㇰ tek tek --- Passed ト to to --- Passed ト゚ tu tu --- Passed トゥキ tuki tuki --- Passed トゥナカィ tunakay tunakay --- Passed トゥレㇱ tures tures --- Passed トペ tope tope --- Passed トンコリ tonkori tonkori --- Passed ナイ nay nay --- Passed ナン nan nan --- Passed ニ ni ni --- Passed ニㇱ nis nis --- Passed ヌカㇽ nukar nukar --- Passed ヌチャ nuca nuca --- Passed ヌプリ nupuri nupuri --- Passed ヌマリ numari numari --- Passed ヌマン numan numan --- Passed ネトパケ netopake netopake --- Passed ハポ hapo hapo --- Failed ハンペ hanpe han-pe --- Failed ハㇱカプ haskap haskapu --- Passed ハㇺ ham ham --- Passed パケ pake pake --- Failed パシユイ pashui pasiyuy --- Failed パスイ pasui pasuy --- Passed ピリカ pirika pirika --- Passed フ hu hu --- Passed フチ huci huci --- Passed フッチ hutci hutci --- Failed フンチ・ヌプリ hunci nupuri hunci=nupuri --- Failed フンペ humpe hun-pe --- Passed フㇺベ humbe humbe --- Passed プクサ pukusa pukusa --- Failed ペツ pet pecu --- Passed ホシピ hosipi hosipi --- Passed ホシピレ hosipire hosipire --- Failed ホッケ hotke hokke --- Failed ホッケレ hotkere hokkere --- Passed ホㇰ hok hok --- Passed ホㇿケゥ horkew horkew --- Passed ポル poru poru --- Failed マウ mau maw --- Passed マタキ mataki mataki --- Passed ミチ mici mici --- Passed ムックリ mukkuri mukkuri --- Passed メノコ menoko menoko --- Passed モシリ mosiri mosiri --- Passed モシㇼ mosir mosir --- Passed モユㇰ moyuk moyuk --- Passed ユポ yupo yupo --- Passed ユㇰ yuk yuk --- Failed ライ rai ray --- Passed ラッコ rakko rakko --- Passed レ re re --- Passed レエㇷ゚ reep reep --- Passed レハムㇱ rehamus rehamus --- Failed レプンカムィ repun-kamuy repunkamuy --- Passed レラ rera rera --- Passed ワッカ wakka wakka --- Passed ワン wan wan --- Failed [[ラ]] rera rera --- Failed ぺ -pe ぺ --- Failed ぺ pe ぺ --- Passed アイ ay ay --- Failed アイカㇷ゚ aykap aykap --- Passed アエㇷ゚ aep aep --- Passed アオカ aoka aoka --- Passed アオカイ aokay aokay --- Passed アクス akusu akusu --- Passed アシ asi asi --- Passed アシヌマ asinuma asinuma --- Passed アシㇰネ asikne asikne --- Passed アシㇰネン asiknen asiknen --- Passed アシㇰネㇷ゚ asiknep asiknep --- Failed アシㇽ / アシㇼ asir asir / asir --- Passed アスㇽ asur asur --- Passed アタイ atay atay --- Passed アチャポ acapo acapo --- Failed アッコチケ atkocike akkocike --- Failed アトゥイ / アト゚イ(アト゜イ) atuy atuy / atuy(ato゜i) --- Passed アニ ani ani --- Passed アヌ anu anu --- Failed [[アヌリ]] anutari anutari --- Passed アノカイ anokay anokay --- Passed アパ apa apa --- Passed アフン ahun ahun --- Passed アフンケ ahunke ahunke --- Failed アフンパㇻ / アフンパㇽ ahunpar ahun-par / ahun-par --- Failed アフンポル ahunporu ahun-poru --- Failed アフンルパㇻ / アフンルパㇽ ahunrupar ahunrupar / ahunrupar --- Failed アフㇷ゚ ahup ahup --- Passed アフㇷ゚テ ahupte ahupte --- Passed アプンノ apunno apunno --- Passed アペ ape ape --- Passed アペアリ apeari apeari --- Passed アペパスイ apepasuy apepasuy --- Passed アマㇺ amam amam --- Passed アミㇷ゚ amip amip --- Passed アリ ari ari --- Passed アリキキ arikiki arikiki --- Passed アン an an --- Failed アンぺ anpe anぺ --- Passed アンノㇱキ annoski annoski --- Passed アㇱ as as --- Passed アㇱカイ askay askay --- Failed アㇱケペㇳ / アㇱケペッ askepet askepet / askepet --- Failed アㇳ / アッ at at / at --- Passed アㇷ゚カㇱ apkas apkas --- Passed アㇷ゚ト apto apto --- Passed アㇺ am am --- Failed アㇺキㇼ / アㇺキㇽ amkir amkir / amkir --- Failed アㇻ/アㇽ -ar ar/ar --- Failed アㇻカ / アㇽカ arka arka / arka --- Failed アㇻキ / アㇽキ arki arki / arki --- Failed アㇻスイ / アㇽスイ arsuy arsuy / arsuy --- Failed アㇻパ / アㇽパ arpa arpa / arpa --- Passed アㇻワニウ arwaniw arwaniw --- Passed アㇻワン arwan arwan --- Failed アㇻワンペ arwanpe arwan-pe --- Failed イゥ/イウ -iw iw/iw --- Failed イゥ/イウ iw iw/iw --- Passed イェ ye ye --- Passed イオマンテ iomante iomante --- Passed イカㇱマ ikasma ikasma --- Passed イキ iki iki --- Passed イク iku iku --- Passed イクパスイ ikupasuy ikupasuy --- Passed イクルイ ikuruy ikuruy --- Passed イクレ ikure ikure --- Failed イサ チセ isa cise isa cise --- Failed イサ ニㇱパ isa nispa isa nispa --- Passed イサㇺ isam isam --- Passed イシタイキ isitayki isitayki --- Passed イセポ isepo isepo --- Passed イソイタㇰ isoytak isoytak --- Passed イタコ itako itako --- Passed イタンキ itanki itanki --- Passed イタㇰ itak itak --- Passed イチェン icen icen --- Passed イチャッケレレ icakkerere icakkerere --- Failed イチャㇰケレ/イチャッケレ icakkere icakkere/icakkere --- Passed イッカ ikka ikka --- Passed イッカクㇽ ikkakur ikkakur --- Passed イテキ iteki iteki --- Passed イテセ itese itese --- Passed イナウケ inawke inawke --- Passed イナン inan inan --- Passed イヌイェ inuye inuye --- Passed イネ ine ine --- Passed イネン inen inen --- Passed イネㇷ゚ inep inep --- Passed イフライェ ihuraye ihuraye --- Passed イペ ipe ipe --- Passed イペパスイ ipepasuy ipepasuy --- Passed イペルスイ iperusuy iperusuy --- Passed イペレ ipere ipere --- Passed イメル imeru imeru --- Passed イヤイイライケレ iyayiraykere iyayiraykere --- Failed イヤイライケレ iyairaykere iyayraykere --- Passed イヨッタ iyotta iyotta --- Passed イヨハイ iyohay iyohay --- Passed イララ irara irara --- Passed イルㇱカ iruska iruska --- Passed イワニウ iwaniw iwaniw --- Passed イワンケ iwanke iwanke --- Failed イワンペ iwanpe iwan-pe --- Failed イワㇴ / イワン iwan iwan / iwan --- Passed インネ inne inne --- Passed イ・ i= i= --- Passed イㇱラㇺ isram isram --- Failed イㇼワクタㇻ irwakutar irwakutar --- Passed ウェン wen wen --- Passed ウェンクㇽ wenkur wenkur --- Passed ウタロカ utaroka utaroka --- Passed ウタㇻ utar utar --- Passed ウパㇱ upas upas --- Passed ウパㇱクマ upaskuma upaskuma --- Passed ウㇱ us us --- Failed ウㇴ/ウン un un/un --- Passed エアニ eani eani --- Failed エイ・ e=i= ey= --- Passed エソロ esoro esoro --- Failed [[エチウリ]] eciutari eciwtari --- Passed エチオカ ecioka ecioka --- Passed エチオカイ eciokay eciokay --- Passed エチ・ eci= eci= --- Passed エンカ enka enka --- Failed エンカㇱ enkasi enkas --- Failed エンカㇱケ enkasike enkaske --- Passed エ・ e= e= --- Passed エㇰ ek ek --- Failed エㇰテ:ette の語源自覚的な綴り。エッテ。 ekte ekte:ette の語源自覚的な綴り。ette。 --- Passed オカ oka oka --- Passed オカイ okay okay --- Passed オナ ona ona --- Failed オハイヌ ohainu ohaynu --- Failed オハインカㇻ ohainkar ohaynkar --- Passed オマナン omanan omanan --- Failed オマㇴ/オマン oman oman/oman --- Passed オヤパ oyapa oyapa --- Passed オロ oro oro --- Failed オロ oroke oro --- Failed オワ・イヌ owa inu owa=inu --- Passed オン on on --- Passed オㇿ or or --- Passed カシ kasi kasi --- Passed カシケ kasike kasike --- Passed カネ kane kane --- Failed カパチㇼ / カパッチㇼ kapatcir kapacir / kapatcir --- Failed カㇳチ/カッチ katci katci/katci --- Passed ク・イ・ ku=i= ku=i= --- Passed クㇱ kus kus --- Passed コトㇺ kotom kotom --- Passed コㇱマ kosma kosma --- Failed コㇿ / コㇽ kor kor / kor --- Passed コㇿポックㇽ korpokkur korpokkur --- Passed サン san san --- Passed サンタン santan santan --- Passed サㇷ゚ sap sap --- Failed シサミタㇰ sisam itak sisamitak --- Failed シサㇺ / シサム sisam sisam / sisamu --- Failed シチョㇿポㇰ / シチョㇽポㇰ sicorpok sicorpok / sicorpok --- Passed シネペサニウ sinepesaniw sinepesaniw --- Passed シネペサン sinepesan sinepesan --- Failed シネペサンペ sinepesanpe sinepesan-pe --- Failed シネㇴ / シネン sinen sinen / sinen --- Passed シネㇷ゚ sinep sinep --- Passed シロマ siroma siroma --- Passed シンリッ sinrit sinrit --- Failed シㇶ six sih --- Failed シㇼ / シㇽ sir sir / sir --- Passed ス su su --- Passed ソンノ sonno sonno --- Passed タネ tane tane --- Passed タント tanto tanto --- Failed タンパ tanpa tan-pa --- Passed チキㇼ cikir cikir --- Passed チセ cise cise --- Failed チョㇿポキ / チョㇽポキ corpoki corpoki / corpoki --- Failed チョㇿポキケ / チョㇽポキケ corpokike corpokike / corpokike --- Failed チョㇿポッケ / チョㇽポッケ corpokke corpokke / corpokke --- Failed チョㇿポㇰ / チョㇽポㇰ corpok corpok / corpok --- Failed チョㇿポㇰタ / チョㇽポㇰタ corpok-ta corpokta / corpokta --- Failed テ -te te --- Passed テ te te --- Passed テエタ teeta teeta --- Failed ト゚ / ツ゚゚゚゚, トゥ tu tu / tu゚゚゚, tu --- Failed トゥペサニウ / ト゚ペサニウ(ト゜ペサニウ) tupesaniw tupesaniw / tupesaniw(to゜pesaniw) --- Failed トゥペサン / ト゚ペサン tupesan tupesan / tupesan --- Failed トゥペサンペ / ト゚ペサンペ tupesanpe tupesan-pe / tupesan-pe --- Failed トゥㇷ゚ / ト゚ㇷ゚ (ト゜プ) tup tup / tup (to゜pu) --- Passed ナ na na --- Passed ニサッタ nisatta nisatta --- Passed ニㇱパ nispa nispa --- Passed ヌマン numan numan --- Passed ハウ haw haw --- Passed ハウェアン hawean hawean --- Passed ハッ hat hat --- Failed ハㇺペ/ハンペ hampe hampe/han-pe --- Passed パ pa pa --- Passed パイェ paye paye --- Passed パイェカ payeka payeka --- Passed パイェカイ payekay payekay --- Passed パセ pase pase --- Failed パㇻ / パㇽ par par / par --- Passed ヒネ hine hine --- Passed ピㇱカニ piskani piskani --- Passed ピㇱカニケ piskanike piskanike --- Passed ピㇱカン piskan piskan --- Passed ピㇼカ pirka pirka --- Passed フㇱコ husko husko --- Passed フㇺ hum hum --- Failed ペㇳ/ペッ pet pet/pet --- Passed ホッネン hotnen hotnen --- Passed ホッネㇷ゚ hotnep hotnep --- Passed ホプニ hopuni hopuni --- Passed ポキ poki poki --- Passed ポル poru poru --- Passed ポㇰ pok pok --- Passed ミ mi mi --- Failed [[メニ{{要出典}}]] meni meni{{要出典}} --- Passed モコㇿ mokor mokor --- Failed ヤイェユカㇻ / ヤイェユカㇽ yayeyukar yayeyukar / yayeyukar --- Failed ヤㇻ/ヤㇽ -yar yar/yar --- Passed リコマ rikoma rikoma --- Failed ルヤンペ ruyanpe ruyan-pe --- Failed レ -re re --- Passed レ re re --- Passed レン ren ren --- Passed レㇰポ rekpo rekpo --- Passed レㇷ゚ rep rep --- Passed ワ wa wa --- Passed ワニウ waniw waniw --- Failed ワンペ wanpe wan-pe --- Failed ㇴ / ン -n n / n --- Failed ㇴ / ン n n / n --- Failed ㇷ゚ -p p --- Passed ㇷ゚ p p --- Failed (子音+ェ) -e (子音+ェ) --- local p = require('Module:UnitTests') --- local m = require('Module:ain-translit') - --- function p:tr(kana, roman) --- self:equals('[[' .. kana .. ']]', m.tr(kana), roman) --- end - --- function p:test_all() --- local examples = { --- { "アイヌ・イタㇰ", "aynu=itak" }, --- { "カィ ; クィ ; コィ ; カゥ ; キゥ ; ケゥ ; コゥ ; ケィ", "kay ; kuy ; koy ; kaw ; kiw ; kew ; kow ; key" }, --- { "カー ; キー ; クー ; ケー ; コー", "kā ; kī ; kū ; kē ; kō" }, --- { "アィヌ モシㇼ", "aynu mosir" }, - --- -- [[incubator:Wp/ain/Main Page]] --- { "チ カㇻ アィヌ イタㇰ ウィキペンチア カンピソ アナㇰネ", "ci=kar aynu itak wikipencia kanpiso anakne" }, - --- -- [[:ja:Wiktionary:アイヌ語のカナ表記#記述]] --- { "ペッ ; ペㇳ", "pet ; pet" }, --- { "オッタ", "orta" }, --- { "アッペ", "akpe" }, --- { "イワン ; イワㇴ", "iwan ; iwan" }, --- { "ケㇺ", "kem" }, --- { "ケゥ", "kew" }, --- { "チュㇷ゚ケㇱ", "cupkes" }, --- { "ポンペ", "pon-pe" }, --- { "タンモシㇼ", "tan-mosir" }, --- { "レプンクㇽ ; レプㇴクㇽ", "repunkur ; repunkur" }, - --- -- [[w:ja:アイヌ語#日本語に溶け込んだアイヌ語]] --- { "エトゥ ピㇼカ", "etu pirka" }, --- { "オンネㇷ゚", "onnep" }, --- { "ケマ フレ", "kema hure" }, --- { "コマイ", "komay" }, --- { "カンカイ", "kankay" }, --- { "スサㇺ", "susam" }, --- { "トゥナカイ", "tunakay" }, --- { "ノンノ", "nonno" }, --- { "ハㇱカㇷ゚", "haskap" }, --- { "ポㇰ", "pok" }, --- { "セイ", "sey" }, --- { "ラッコ", "rakko" }, --- { "ルイペ", "ruype" }, - --- -- [[w:ja:アイヌ語#雑学]] --- { "チャペ", "cape" }, --- { "アペ", "ape" }, --- { "カㇻ", "kar" }, --- { "キナ", "kina" }, --- { "ラㇻ", "rar" }, --- { "トゥスㇱケ", "tususke" }, - --- -- [[w:ja:アイヌ語の語彙一覧]] - --- -- en.wiktionary --- { "ア-", "a-" }, --- { "アィアィ", "ayay" }, --- { "アィヌ", "aynu" }, --- { "アィヌモシㇼ", "aynu-mosir" }, --- { "アイ", "ay" }, --- { "アイヌ", "aynu" }, --- { "アイヌイタㇰ", "aynu itak" }, --- { "アイル", "airu" }, --- { "アエㇷ゚", "aep" }, --- { "アカㇺ", "akam" }, --- { "アキ", "aki" }, --- { "アシペケッ", "asipeket" }, --- { "アシㇰネ", "asikne" }, --- { "アチポ", "acipo" }, --- { "アチャ", "acha" }, --- { "アチャポ", "acapo" }, --- { "アッツシ", "attush" }, --- { "アットゥㇱ", "attus" }, --- { "アトゥ", "atu" }, --- { "アパ", "apa" }, --- { "アフン", "ahun" }, --- { "アフㇷ゚", "ahup" }, --- { "アプトアㇱ", "aptoash" }, --- { "アプトアㇱパ", "aptoashpa" }, --- { "アベ", "abe" }, --- { "アベチクニ", "abecikuni" }, --- { "アベバシュイ", "abebashui" }, --- { "アペ", "ape" }, --- { "アママ", "amama" }, --- { "アマㇺ", "amam" }, --- { "アミ", "ami" }, --- { "アミヒ", "amihi" }, --- { "アムシペ", "amusipe" }, --- { "アムㇱペ", "amuspe" }, --- { "アリㇷ゚", "arip" }, --- { "アン", "an" }, --- { "アンケㇱ", "ankes" }, --- { "アンチカㇻ", "ancikar" }, --- { "アントゥキ", "antuki" }, --- { "アㇰ", "ak" }, --- { "アㇷ゚カシ", "apkash" }, --- { "アㇷ゚カㇱ", "apkas" }, --- { "アㇷ゚ト", "apto" }, --- { "アㇺ", "am" }, --- { "アㇺアㇺ", "amam" }, --- { "アㇻワン", "arwan" }, --- { "イサ チセ", "isa cise" }, --- { "イサㇺ", "isam" }, --- { "イソ", "iso" }, --- { "イタコ", "itako" }, --- { "イタㇰ", "itak" }, --- { "イタㇵ", "itah" }, --- { "イナゥ", "inaw" }, --- { "イナオ", "inao" }, --- { "イネ", "ine" }, --- { "イワ", "iwa" }, --- { "イワン", "iwan" }, --- { "ウォセカムィ", "wose-kamuy" }, --- { "ウタラ", "utara" }, --- { "ウタリ", "utari" }, --- { "ウタレ", "utare" }, --- { "ウナㇻペ", "unarpe" }, --- { "ウパㇱ", "upas" }, --- { "ウンマ", "umma" }, --- { "エアニ", "eani" }, --- { "エカㇱ", "ekas" }, --- { "エサマン", "esaman" }, --- { "エツ゚", "’etu" }, --- { "エトゥ", "etu" }, --- { "エトㇽ", "etor" }, --- { "エムシ", "emusi" }, --- { "エムシイ", "emusi" }, --- { "オウペカ", "owpeka" }, --- { "オッカヨ", "okkayo" }, --- { "オナ", "ona" }, --- { "カパㇷ゚", "kapap" }, --- { "カムィ", "kamuy" }, --- { "カムィチェㇷ゚", "kamuy-cep" }, --- { "カムィフㇺベ", "kamuy-humbe" }, --- { "カムィモシㇼ", "kamuy-mosir" }, --- { "カムイモシリ", "kamuimosiri" }, --- { "カㇺビ", "kambi" }, --- { "カㇺビソㇱ", "kambisosh" }, --- { "キキㇼ", "kikir" }, --- { "キサㇻ", "kisar" }, --- { "キム", "kim" }, --- { "クアニ", "kuani" }, --- { "クㇷ゚", "kup" }, --- { "ケス", "kes" }, --- { "ケム", "kem" }, --- { "ケモリッ", "kemorit" }, --- { "ケラ", "kera" }, --- { "コタン", "kotan" }, --- { "コッ", "kot" }, --- { "コンボ", "kombo" }, --- { "コㇿ", "kor" }, --- { "サパ", "sapa" }, --- { "サポ", "sapo" }, --- { "シサㇺ", "sisam" }, --- { "シネ", "sine" }, --- { "シネペサン", "sinepesan" }, --- { "シュマリ", "shumari" }, --- { "シㇰ", "sik" }, --- { "シㇼ", "sir" }, --- { "スス", "susu" }, --- { "セ゚", "ce" }, --- { "セタ", "seta" }, --- { "ソ", "so" }, --- { "チイェネ", "ciyene" }, --- { "チイェヘ", "ciyehe" }, --- { "チェㇷ゚", "cep" }, --- { "チセ", "cise" }, --- { "チャシ", "chasi" }, --- { "チャペ", "cape" }, --- { "チュㇷ゚", "cup" }, --- { "チㇱ", "cis" }, --- { "ツ゚", "tu" }, --- { "ツ゚ペサン", "tupesan" }, --- { "ツ゚ンプ", "tunpu" }, --- { "テㇰ", "tek" }, --- { "ト", "to" }, --- { "ト゚", "tu" }, --- { "トゥキ", "tuki" }, --- { "トゥナカィ", "tunakay" }, --- { "トゥレㇱ", "tures" }, --- { "トペ", "tope" }, --- { "トンコリ", "tonkori" }, --- { "ナイ", "nay" }, --- { "ナン", "nan" }, --- { "ニ", "ni" }, --- { "ニㇱ", "nis" }, --- { "ヌカㇽ", "nukar" }, --- { "ヌチャ", "nuca" }, --- { "ヌプリ", "nupuri" }, --- { "ヌマリ", "numari" }, --- { "ヌマン", "numan" }, --- { "ネトパケ", "netopake" }, --- { "ハポ", "hapo" }, --- { "ハンペ", "hanpe" }, --- { "ハㇱカプ", "haskap" }, --- { "ハㇺ", "ham" }, --- { "パケ", "pake" }, --- { "パシユイ", "pashui" }, --- { "パスイ", "pasui" }, --- { "ピリカ", "pirika" }, --- { "フ", "hu" }, --- { "フチ", "huci" }, --- { "フッチ", "hutci" }, --- { "フンチ・ヌプリ", "hunci nupuri" }, --- { "フンペ", "humpe" }, --- { "フㇺベ", "humbe" }, --- { "プクサ", "pukusa" }, --- { "ペツ", "pet" }, --- { "ホシピ", "hosipi" }, --- { "ホシピレ", "hosipire" }, --- { "ホッケ", "hotke" }, --- { "ホッケレ", "hotkere" }, --- { "ホㇰ", "hok" }, --- { "ホㇿケゥ", "horkew" }, --- { "ポル", "poru" }, --- { "マウ", "mau" }, --- { "マタキ", "mataki" }, --- { "ミチ", "mici" }, --- { "ムックリ", "mukkuri" }, --- { "メノコ", "menoko" }, --- { "モシリ", "mosiri" }, --- { "モシㇼ", "mosir" }, --- { "モユㇰ", "moyuk" }, --- { "ユポ", "yupo" }, --- { "ユㇰ", "yuk" }, --- { "ライ", "rai" }, --- { "ラッコ", "rakko" }, --- { "レ", "re" }, --- { "レエㇷ゚", "reep" }, --- { "レハムㇱ", "rehamus" }, --- { "レプンカムィ", "repun-kamuy" }, --- { "レラ", "rera" }, --- { "ワッカ", "wakka" }, --- { "ワン", "wan" }, - --- -- ja.wiktionary --- { "<u>レ</u>ラ", "rera" }, --- { "ぺ", "-pe" }, --- { "ぺ", "pe" }, --- { "アイ", "ay" }, --- { "アイカㇷ゚ ", "aykap" }, --- { "アエㇷ゚", "aep" }, --- { "アオカ", "aoka" }, --- { "アオカイ", "aokay" }, --- { "アクス", "akusu" }, --- { "アシ", "asi" }, --- { "アシヌマ", "asinuma" }, --- { "アシㇰネ", "asikne" }, --- { "アシㇰネン", "asiknen" }, --- { "アシㇰネㇷ゚", "asiknep" }, --- { "アシㇽ / アシㇼ", "asir" }, --- { "アスㇽ", "asur" }, --- { "アタイ", "atay" }, --- { "アチャポ", "acapo" }, --- { "アッコチケ", "atkocike" }, --- { "アトゥイ / アト゚イ(アト゜イ)", "atuy" }, --- { "アニ", "ani" }, --- { "アヌ", "anu" }, --- { "アヌ<u>タ</u>リ", "anutari" }, --- { "アノカイ", "anokay" }, --- { "アパ", "apa" }, --- { "アフン", "ahun" }, --- { "アフンケ", "ahunke" }, --- { "アフンパㇻ / アフンパㇽ", "ahunpar" }, --- { "アフンポル", "ahunporu" }, --- { "アフンルパㇻ / アフンルパㇽ", "ahunrupar" }, --- { "アフㇷ゚ ", "ahup" }, --- { "アフㇷ゚テ", "ahupte" }, --- { "アプンノ", "apunno" }, --- { "アペ", "ape" }, --- { "アペアリ", "apeari" }, --- { "アペパスイ", "apepasuy" }, --- { "アマㇺ", "amam" }, --- { "アミㇷ゚", "amip" }, --- { "アリ", "ari" }, --- { "アリキキ", "arikiki" }, --- { "アン", "an" }, --- { "アンぺ", "anpe" }, --- { "アンノㇱキ", "annoski" }, --- { "アㇱ", "as" }, --- { "アㇱカイ", "askay" }, --- { "アㇱケペㇳ / アㇱケペッ", "askepet" }, --- { "アㇳ / アッ", "at" }, --- { "アㇷ゚カㇱ", "apkas" }, --- { "アㇷ゚ト", "apto" }, --- { "アㇺ", "am" }, --- { "アㇺキㇼ / アㇺキㇽ", "amkir" }, --- { "アㇻ/アㇽ", "-ar" }, --- { "アㇻカ / アㇽカ", "arka" }, --- { "アㇻキ / アㇽキ", "arki" }, --- { "アㇻスイ / アㇽスイ", "arsuy" }, --- { "アㇻパ / アㇽパ", "arpa" }, --- { "アㇻワニウ", "arwaniw" }, --- { "アㇻワン", "arwan" }, --- { "アㇻワンペ", "arwanpe" }, --- { "イゥ/イウ", "-iw" }, --- { "イゥ/イウ", "iw" }, --- { "イェ", "ye" }, --- { "イオマンテ", "iomante" }, --- { "イカㇱマ", "ikasma" }, --- { "イキ", "iki" }, --- { "イク", "iku" }, --- { "イクパスイ", "ikupasuy" }, --- { "イクルイ", "ikuruy" }, --- { "イクレ", "ikure" }, --- { "イサ チセ", "isa cise" }, --- { "イサ ニㇱパ", "isa nispa" }, --- { "イサㇺ", "isam" }, --- { "イシタイキ", "isitayki" }, --- { "イセポ", "isepo" }, --- { "イソイタㇰ", "isoytak" }, --- { "イタコ", "itako" }, --- { "イタンキ", "itanki" }, --- { "イタㇰ", "itak" }, --- { "イチェン", "icen" }, --- { "イチャッケレレ", "icakkerere" }, --- { "イチャㇰケレ/イチャッケレ", "icakkere" }, --- { "イッカ", "ikka" }, --- { "イッカクㇽ", "ikkakur" }, --- { "イテキ", "iteki" }, --- { "イテセ", "itese" }, --- { "イナウケ", "inawke" }, --- { "イナン", "inan" }, --- { "イヌイェ", "inuye" }, --- { "イネ", "ine" }, --- { "イネン", "inen" }, --- { "イネㇷ゚", "inep" }, --- { "イフライェ", "ihuraye" }, --- { "イペ", "ipe" }, --- { "イペパスイ", "ipepasuy" }, --- { "イペルスイ", "iperusuy" }, --- { "イペレ", "ipere" }, --- { "イメル", "imeru" }, --- { "イヤイイライケレ", "iyayiraykere" }, --- { "イヤイライケレ", "iyairaykere" }, --- { "イヨッタ", "iyotta" }, --- { "イヨハイ", "iyohay" }, --- { "イララ", "irara" }, --- { "イルㇱカ", "iruska" }, --- { "イワニウ", "iwaniw" }, --- { "イワンケ", "iwanke" }, --- { "イワンペ", "iwanpe" }, --- { "イワㇴ / イワン", "iwan" }, --- { "インネ", "inne" }, --- { "イ・", "i=" }, --- { "イㇱラㇺ", "isram" }, --- { "イㇼワクタㇻ ", "irwakutar" }, --- { "ウェン", "wen" }, --- { "ウェンクㇽ", "wenkur" }, --- { "ウタロカ", "utaroka" }, --- { "ウタㇻ", "utar" }, --- { "ウパㇱ", "upas" }, --- { "ウパㇱクマ", "upaskuma" }, --- { "ウㇱ", "us" }, --- { "ウㇴ/ウン", "un" }, --- { "エアニ", "eani" }, --- { "エイ・", "e=i=" }, --- { "エソロ", "esoro" }, --- { "エチウ<u>タ</u>リ", "eciutari" }, --- { "エチオカ", "ecioka" }, --- { "エチオカイ", "eciokay" }, --- { "エチ・", "eci=" }, --- { "エンカ", "enka" }, --- { "エンカㇱ", "enkasi" }, --- { "エンカㇱケ", "enkasike" }, --- { "エ・", "e=" }, --- { "エㇰ", "ek" }, --- { "エㇰテ:ette の語源自覚的な綴り。エッテ。", "ekte" }, --- { "オカ", "oka" }, --- { "オカイ", "okay" }, --- { "オナ", "ona" }, --- { "オハイヌ", "ohainu" }, --- { "オハインカㇻ", "ohainkar" }, --- { "オマナン", "omanan" }, --- { "オマㇴ/オマン", "oman" }, --- { "オヤパ", "oyapa" }, --- { "オロ", "oro" }, --- { "オロ", "oroke" }, --- { "オワ・イヌ", "owa inu" }, --- { "オン", "on" }, --- { "オㇿ", "or" }, --- { "カシ", "kasi" }, --- { "カシケ", "kasike" }, --- { "カネ", "kane" }, --- { "カパチㇼ / カパッチㇼ", "kapatcir" }, --- { "カㇳチ/カッチ", "katci" }, --- { "ク・イ・", "ku=i=" }, --- { "クㇱ", "kus" }, --- { "コトㇺ", "kotom" }, --- { "コㇱマ", "kosma" }, --- { "コㇿ / コㇽ", "kor" }, --- { "コㇿポックㇽ", "korpokkur" }, --- { "サン", "san" }, --- { "サンタン", "santan" }, --- { "サㇷ゚", "sap" }, --- { "シサミタㇰ", "sisam itak" }, --- { "シサㇺ / シサム", "sisam" }, --- { "シチョㇿポㇰ / シチョㇽポㇰ", "sicorpok" }, --- { "シネペサニウ", "sinepesaniw" }, --- { "シネペサン", "sinepesan" }, --- { "シネペサンペ", "sinepesanpe" }, --- { "シネㇴ / シネン", "sinen" }, --- { "シネㇷ゚", "sinep" }, --- { "シロマ", "siroma" }, --- { "シンリッ", "sinrit" }, --- { "シㇶ", "six" }, --- { "シㇼ / シㇽ", "sir" }, --- { "ス", "su" }, --- { "ソンノ", "sonno" }, --- { "タネ", "tane" }, --- { "タント", "tanto" }, --- { "タンパ", "tanpa" }, --- { "チキㇼ", "cikir" }, --- { "チセ", "cise" }, --- { "チョㇿポキ / チョㇽポキ", "corpoki" }, --- { "チョㇿポキケ / チョㇽポキケ", "corpokike" }, --- { "チョㇿポッケ / チョㇽポッケ", "corpokke" }, --- { "チョㇿポㇰ / チョㇽポㇰ", "corpok" }, --- { "チョㇿポㇰタ / チョㇽポㇰタ", "corpok-ta" }, --- { "テ", "-te" }, --- { "テ", "te" }, --- { "テエタ", "teeta" }, --- { "ト゚ / ツ゚゚゚゚, トゥ", "tu" }, --- { "トゥペサニウ / ト゚ペサニウ(ト゜ペサニウ)", "tupesaniw" }, --- { "トゥペサン / ト゚ペサン", "tupesan" }, --- { "トゥペサンペ / ト゚ペサンペ", "tupesanpe" }, --- { "トゥㇷ゚ / ト゚ㇷ゚ (ト゜プ)", "tup" }, --- { "ナ", "na" }, --- { "ニサッタ", "nisatta" }, --- { "ニㇱパ", "nispa" }, --- { "ヌマン", "numan" }, --- { "ハウ", "haw" }, --- { "ハウェアン", "hawean" }, --- { "ハッ", "hat" }, --- { "ハㇺペ/ハンペ", "hampe" }, --- { "パ", "pa" }, --- { "パイェ", "paye" }, --- { "パイェカ", "payeka" }, --- { "パイェカイ", "payekay" }, --- { "パセ", "pase" }, --- { "パㇻ / パㇽ", "par" }, --- { "ヒネ", "hine" }, --- { "ピㇱカニ", "piskani" }, --- { "ピㇱカニケ", "piskanike" }, --- { "ピㇱカン", "piskan" }, --- { "ピㇼカ", "pirka" }, --- { "フㇱコ", "husko" }, --- { "フㇺ", "hum" }, --- { "ペㇳ/ペッ", "pet" }, --- { "ホッネン", "hotnen" }, --- { "ホッネㇷ゚", "hotnep" }, --- { "ホプニ", "hopuni" }, --- { "ポキ", "poki" }, --- { "ポル", "poru" }, --- { "ポㇰ", "pok" }, --- { "ミ", "mi" }, --- { "メニ{{要出典}}", "meni" }, --- { "モコㇿ", "mokor" }, --- { "ヤイェユカㇻ / ヤイェユカㇽ", "yayeyukar" }, --- { "ヤㇻ/ヤㇽ", "-yar" }, --- { "リコマ", "rikoma" }, --- { "ルヤンペ", "ruyanpe" }, --- { "レ", "-re" }, --- { "レ", "re" }, --- { "レン", "ren" }, --- { "レㇰポ", "rekpo" }, --- { "レㇷ゚", "rep" }, --- { "ワ", "wa" }, --- { "ワニウ", "waniw" }, --- { "ワンペ", "wanpe" }, --- { "ㇴ / ン", "-n" }, --- { "ㇴ / ン", "n" }, --- { "ㇷ゚", "-p" }, --- { "ㇷ゚", "p" }, --- { "(子音+ェ)", "-e" }, --- } --- self:iterate(examples, "tr") --- end - --- return p \ No newline at end of file diff --git a/wikt/translit/altai-translit.lua b/wikt/translit/altai-translit.lua deleted file mode 100644 index 12837c0..0000000 --- a/wikt/translit/altai-translit.lua +++ /dev/null @@ -1,81 +0,0 @@ --- This module will transliterate Southern Altai and Northern Altai text per WT:ALT TR and WT:ATV TR. -local export = {} - -local tab = { - ["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="E", ["Ё"]="Yo", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["Й"]="Y", ["Ј"]="J̌", ["J"]="J̌[[Category:Altai text with misused characters|J]]", - ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["Ҥ"]="Ŋ", ["О"]="O", ["Ӧ"]="Ö", ["Ö"]="Ö[[Category:Altai text with misused characters|Ö]]", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", - ["У"]="U", ["Ӱ"]="Ü", ["Ф"]="F", ["Х"]="H", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="ʺ", ["Ы"]="Ï", ["Ь"]="ʹ", - ["Э"]="E", ["Ю"]="Yu", ["Я"]="Ya", - ['а']='a', ['б']='b', ['в']='v', ['г']='g', ['д']='d', ['е']='e', ['ё']='yo', ['ж']='ž', ['з']='z', ['и']='i', ['й']='y', ["ј"]="ǰ", ["j"]="ǰ[[Category:Altai text with misused characters|j]]", - ['к']='k', ['л']='l', ['м']='m', ['н']='n', ['ҥ']='ŋ', ['о']='o', ['ӧ']='ö', ['ö']='ö[[Category:Altai text with misused characters|ö]]', ['п']='p', ['р']='r', ['с']='s', ['т']='t', - ['у']='u', ['ӱ']='ü', ['ф']='f', - ['х']='h', ['ц']='c', ['ч']='č', ['ш']='š', ['щ']='šč', ['ъ']='ʺ', ['ы']='ï', ['ь']='ʹ', ['э']='e', ['ю']='yu', ['я']='ya', -} - -local iotated = { - ["Е"] = "Ye", - ["е"] = "ye", -} - -function export.tr(text, lang, sc) - local ugsub = mw.ustring.gsub - - -- Ё needs to be composed if is decomposed (e + combining diaeresis). - -- However, this cannot happen in wikitext, only in Lua modules. - text = mw.ustring.toNFC(text) - - -- е after a vowel or at the beginning of a word becomes ye - -- Note that according to modern Altai orthography ее (instead of ээ) is occationally used for long r - text = ugsub(text, "([АОӨУҮЫЯЁЮИЪЬаоөуүыяёюиъь%A][́̀]?)е", "%1ye") - --text = mw.ustring.gsub(text, "([АОӨУҮЫЕЯЁЮИЕЪЬаоөуүыэяёюиеъь%A][́̀]?)е", "%1ye") - text = ugsub(text, "^[Ее]", iotated) - text = ugsub(text, "([^Ѐ-ӿ])([Ее])", function(a, b) - return a .. iotated[b] - end) - - return (ugsub(text, '.', tab)) -end - -return export - --- Text Expected Actual Differs at --- Passed кöндӱрe köndüre köndüre --- Failed мында mında mïnda 2 --- Failed кичӱ kïčü kičü 2 --- Failed тартыжар tartıžar tartïžar 5 --- Failed сегизинчи sägïzïnčï segizinči 2 --- local tests = require("Module:UnitTests") --- local transliterate = require("Module:Altai-translit").tr --- local normalize = mw.ustring.toNFC --- local gsub = string.gsub - --- local function tag(text) --- return '' .. text .. '' --- end - --- local function remove_categories(text) --- return gsub(text, "%[%[Category:[^%]]+%]%]", "") --- end - --- function tests:check(example, expected) --- self:equals( --- tag(example), --- remove_categories(normalize(transliterate(example))), --- normalize(expected), --- { show_difference = true } --- ) --- end - --- function tests:test_translit() --- local examples = { --- { "кöндӱрe", "köndüre" }, --- { "мында", "mında" }, --- { "кичӱ", "kïčü" }, --- { "тартыжар", "tartıžar" }, --- { "сегизинчи", "sägïzïnčï" }, --- } - --- tests:iterate(examples, "check") --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/ar-translit.lua b/wikt/translit/ar-translit.lua deleted file mode 100644 index e561267..0000000 --- a/wikt/translit/ar-translit.lua +++ /dev/null @@ -1,536 +0,0 @@ --- Authors: Benwing, ZxxZxxZ, Atitarev --- This module will transliterate Arabic language text per WT:AR TR. - -local export = {} - -local U = mw.ustring.char -local rfind = mw.ustring.find -local rsubn = mw.ustring.gsub -local rmatch = mw.ustring.match -local rsplit = mw.text.split -local gcodepoint = mw.ustring.gcodepoint - --- assigned below -local has_diacritics - --- version of rsubn() that discards all but the first return value -local function rsub(term, foo, bar) - local retval = rsubn(term, foo, bar) - return retval -end - -local zwnj = U(0x200c) -- zero-width non-joiner -local alif_madda = U(0x622) -local alif_hamza_below = U(0x625) -local alif = U(0x627) -local taa_marbuuTa = U(0x629) -local laam = U(0x644) -local waaw = U(0x648) -local alif_maqSuura = U(0x649) -local yaa = U(0x64A) -local fatHataan = U(0x64B) -local Dammataan = U(0x64C) -local kasrataan = U(0x64D) -local fatHa = U(0x64E) -local Damma = U(0x64F) -local kasra = U(0x650) -local shadda = U(0x651) -local sukuun = U(0x652) -local dagger_alif = U(0x670) -local alif_waSl = U(0x671) ---local zwj = U(0x200d) -- zero-width joiner -local lrm = U(0x200e) -- left-to-right mark -local rlm = U(0x200f) -- right-to-left mark - -local tt = { - -- consonants - ["ب"]="b", ["ت"]="t", ["ث"]="ṯ", ["ج"]="j", ["ح"]="ḥ", ["خ"]="ḵ", - ["د"]="d", ["ذ"]="ḏ", ["ر"]="r", ["ز"]="z", ["س"]="s", ["ش"]="š", - ["ص"]="ṣ", ["ض"]="ḍ", ["ط"]="ṭ", ["ظ"]="ẓ", ["ع"]="ʿ", ["غ"]="ḡ", - ["ف"]="f", ["ق"]="q", ["ك"]="k", ["ڪ"]="k", ["ل"]="l", ["م"]="m", ["ن"]="n", - ["ه"]="h", - -- tāʾ marbūṭa (special) - always after a fátḥa (a), silent at the end of - -- an utterance, "t" in ʾiḍāfa or with pronounced tanwīn. We catch - -- most instances of tāʾ marbūṭa before we get to this stage. - [taa_marbuuTa]="t", -- tāʾ marbūṭa = ة - -- control characters - [zwnj]="-", -- ZWNJ (zero-width non-joiner) - -- [zwj]="", -- ZWJ (zero-width joiner) - -- rare letters - ["پ"]="p", ["چ"]="č", ["ڤ"]="v", ["ڥ"]="v", ["گ"]="g", ["ڨ"]="g", ["ڧ"]="q", - -- semivowels or long vowels, alif, hamza, special letters - ["ا"]="ā", -- ʾalif - -- hamzated letters - ["أ"]="ʾ", -- hamza over alif - [alif_hamza_below]="ʾ", -- hamza under alif - ["ؤ"]="ʾ", -- hamza over wāw - ["ئ"]="ʾ", -- hamza over yā - ["ء"]="ʾ", -- hamza on the line - -- long vowels - [waaw]="w", --"ū" after ḍamma (u) and not before diacritic - [yaa]="y", --"ī" after kasra (i) and not before diacritic - [alif_maqSuura]="ā", -- ʾalif maqṣūra - [alif_madda]="ʾā", -- ʾalif madda - [alif_waSl]= "", -- hamzatu l-waṣl - [dagger_alif] = "ā", -- ʾalif xanjariyya = dagger ʾalif (Koranic diacritic) - -- short vowels, šádda and sukūn - [fatHataan]="an", -- fatḥatan - [Dammataan]="un", -- ḍammatan - [kasrataan]="in", -- kasratan - [fatHa]="a", -- fatḥa - [Damma]="u", -- ḍamma - [kasra]="i", -- kasra - -- šadda - doubled consonant - [sukuun]="", --sukūn - no vowel - -- ligatures - ["ﻻ"]="lā", - ["ﷲ"]="llāh", - -- taṭwīl - ["ـ"]="", -- taṭwīl, no sound - -- numerals - ["١"]="1", ["٢"]="2", ["٣"]="3", ["٤"]="4", ["٥"]="5", - ["٦"]="6", ["٧"]="7", ["٨"]="8", ["٩"]="9", ["٠"]="0", - -- punctuation (leave on separate lines) - ["؟"]="?", -- question mark - ["«"]='“', -- quotation mark - ["»"]='”', -- quotation mark - ["٫"]=".", -- decimal point - ["٬"]=",", -- thousands separator - ["٪"]="%", -- percent sign - ["،"]=",", -- comma - ["؛"]=";" -- semicolon -} - -local sun_letters = "تثدذرزسشصضطظلن" --- For use in implementing sun-letter assimilation of ال (al-) -local ttsun1 = {} -local ttsun2 = {} -local ttsun3 = {} -for cp in gcodepoint(sun_letters) do - local ch = U(cp) - ttsun1[ch] = tt[ch] - ttsun2["l-" .. ch] = tt[ch] .. "-" .. ch - table.insert(ttsun3, tt[ch]) -end --- For use in implementing elision of al- -local sun_letters_tr = table.concat(ttsun3, "") - -local consonants_needing_vowels = "بتثجحخدذرزسشصضطظعغفقكڪلمنهپچڤگڨڧأإؤئءةﷲ" --- consonants on the right side; includes alif madda -local rconsonants = consonants_needing_vowels .. "ويآ" --- consonants on the left side; does not include alif madda -local lconsonants = consonants_needing_vowels .. "وي" --- Arabic semicolon, comma, question mark; taṭwīl; period, exclamation point, --- single quote for bold/italic -local punctuation = "؟،؛" .. "ـ" .. ".!'" -local numbers = "١٢٣٤٥٦٧٨٩٠" - -local before_diacritic_checking_subs = { - ------------ transformations prior to checking for diacritics -------------- - -- convert llh for allāh into ll+shadda+dagger-alif+h - {"لله", "للّٰه"}, - -- shadda+short-vowel (including tanwīn vowels, i.e. -an -in -un) gets - -- replaced with short-vowel+shadda during NFC normalisation, which - -- MediaWiki does for all Unicode strings; however, it makes the - -- transliteration process inconvenient, so undo it. - {"([" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. dagger_alif .. "])" .. shadda, shadda .. "%1"}, - -- ignore alif jamīla (otiose alif in 3pl verb forms) - -- #1: handle ḍamma + wāw + alif (final -ū) - {Damma .. waaw .. alif, Damma .. waaw}, - -- #2: handle wāw + sukūn + alif (final -w in -aw in defective verbs) - -- this must go before the generation of w, which removes the waw here. - {waaw .. sukuun .. alif, waaw .. sukuun}, - -- ignore final alif or alif maqṣūra following fatḥatan (e.g. in accusative - -- singular or words like عَصًا "stick" or هُذًى "guidance"; this is called - -- tanwin nasb) - {fatHataan .. "[" .. alif .. alif_maqSuura .. "]", fatHataan}, - -- same but with the fatḥatan placed over the alif or alif maqṣūra - -- instead of over the previous letter (considered a misspelling but - -- common) - {"[" .. alif .. alif_maqSuura .. "]" .. fatHataan, fatHataan}, - -- tāʾ marbūṭa should always be preceded by fatḥa, alif, alif madda or - -- dagger alif; infer fatḥa if not - {"([^" .. fatHa .. alif .. alif_madda .. dagger_alif .. "])" .. taa_marbuuTa, "%1" .. fatHa .. taa_marbuuTa}, - -- similarly for alif between consonants, possibly marked with shadda - -- (does not apply to initial alif, which is silent when not marked with - -- hamza, or final alif, which might be pronounced as -an) - {"([" .. lconsonants .. "]" .. shadda .. "?)" .. alif .. "([" .. rconsonants .. "])", - "%1" .. fatHa .. alif .. "%2"}, - -- infer fatḥa in case of non-fatḥa + alif/alif-maqṣūra + dagger alif - {"([^" .. fatHa .. "])([" .. alif .. alif_maqSuura .. "]" .. dagger_alif .. ")", "%1" .. fatHa .. "%2"}, - -- infer kasra in case of hamza-under-alif not + kasra - {alif_hamza_below .. "([^" .. kasra .. "])", alif_hamza_below .. kasra .. "%1"}, - -- ignore dagger alif placed over regular alif or alif maqṣūra - {"([" .. alif .. alif_maqSuura .. "])" .. dagger_alif, "%1"}, - - ----------- rest of these concern definite article alif-lām ---------- - -- in kasra/ḍamma + alif + lam, make alif into hamzatu l-waṣl, so we - -- handle cases like بِالتَّوْفِيق (bi-t-tawfīq) correctly - {"([" .. Damma .. kasra .. "])" .. alif .. laam, "%1" .. alif_waSl .. laam}, - -- al + consonant + shadda (only recognize word-initially if regular alif): remove shadda - {"^(" .. alif .. fatHa .. "?" .. laam .. "[" .. lconsonants .. "])" .. shadda, "%1"}, - {"%s(" .. alif .. fatHa .. "?" .. laam .. "[" .. lconsonants .. "])" .. shadda, " %1"}, - {"(" .. alif_waSl .. fatHa .. "?" .. laam .. "[" .. lconsonants .. "])" .. shadda, "%1"}, - -- handle l- hamzatu l-waṣl or word-initial al- - {"^" .. alif .. fatHa .. "?" .. laam, "al-"}, - {"%s" .. alif .. fatHa .. "?" .. laam, " al-"}, - -- next one for bi-t-tawfīq - {"([" .. Damma .. kasra .. "])" .. alif_waSl .. fatHa .. "?" .. laam, "%1-l-"}, - -- next one for remaining hamzatu l-waṣl (at beginning of word) - {alif_waSl .. fatHa .. "?" .. laam, "l-"}, - -- special casing if the l in al- has a shadda on it (as in الَّذِي "that"), - -- so we don't mistakenly double the dash - {"l%-" .. shadda, "ll"}, - -- implement assimilation of sun letters - {"l%-[" .. sun_letters .. "]", ttsun2}, -} - --- Transliterate the word(s) in TEXT. LANG (the language) and SC (the script) --- are ignored. OMIT_I3RAAB means leave out final short vowels (ʾiʿrāb). --- GRAY_I3RAAB means render transliterate short vowels (ʾiʿrāb) in gray. --- FORCE_TRANSLIT causes even non-vocalized text to be transliterated --- (normally the function checks for non-vocalized text and returns nil, --- since such text is ambiguous in transliteration). -function export.tr(text, lang, sc, omit_i3raab, gray_i3raab, force_translit) - -- make it possible to call this function from a template - if type(text) == "table" then - local function f(x) return (x ~= "") and x or nil end - text, lang, sc, omit_i3raab, force_translit = - f(text.args[1]), f(text.args[2]), f(text.args[3]), f(text.args[4]), f(text.args[5]) - end - - for _, sub in ipairs(before_diacritic_checking_subs) do - text = rsub(text, sub[1], sub[2]) - end - - if not force_translit and not has_diacritics(text) then - return nil - end - - ------------ transformations after checking for diacritics -------------- - -- Replace plain alif with hamzatu l-waṣl when followed by fatḥa/ḍamma/kasra. - -- Must go after handling of initial al-, which distinguishes alif-fatḥa - -- from alif w/hamzatu l-waṣl. Must go before generation of ū and ī, which - -- eliminate the ḍamma/kasra. - text = rsub(text, alif .. "([" .. fatHa .. Damma .. kasra .. "])", alif_waSl .. "%1") - -- ḍamma + waw not followed by a diacritic is ū, otherwise w - text = rsub(text, Damma .. waaw .. "([^" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. shadda .. sukuun .. dagger_alif .. "])", "ū%1") - text = rsub(text, Damma .. waaw .. "$", "ū") - -- kasra + yaa not followed by a diacritic (or ū from prev step) is ī, otherwise y - text = rsub(text, kasra .. yaa .. "([^" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. shadda .. sukuun .. dagger_alif .. "ū])", "ī%1") - text = rsub(text, kasra .. yaa .. "$", "ī") - -- convert shadda to double letter. - text = rsub(text, "(.)" .. shadda, "%1%1") - if not omit_i3raab and gray_i3raab then -- show ʾiʿrāb grayed in transliteration - -- decide whether to gray out the t in ﺓ. If word begins with al- or l-, yes. - -- Otherwise, no if word ends in a/i/u, yes if ends in an/in/un. - text = rsub(text, "^(a?l%-[^%s]+)" .. taa_marbuuTa .. "([" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. "])", - '%1t%2') - text = rsub(text, "(%sa?l%-[^%s]+)" .. taa_marbuuTa .. "([" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. "])", - '%1t%2') - text = rsub(text, taa_marbuuTa .. "([" .. fatHa .. Damma .. kasra .. "])", "t%1") - text = rsub(text, taa_marbuuTa .. "([" .. fatHataan .. Dammataan .. kasrataan .. "])", - 't%1') - text = rsub(text, ".", { - [fatHataan] = 'an', - [kasrataan] = 'in', - [Dammataan] = 'un' - }) - text = rsub(text, "([" .. fatHa .. Damma .. kasra .. "])%s", { - [fatHa] = 'a ', - [kasra] = 'i ', - [Damma] = 'u ' - }) - text = rsub(text, "[" .. fatHa .. Damma .. kasra .. "]$", { - [fatHa] = 'a', - [kasra] = 'i', - [Damma] = 'u' - }) - text = rsub(text, '', "") - elseif omit_i3raab then -- omit ʾiʿrāb in transliteration - text = rsub(text, "[" .. fatHataan .. Dammataan .. kasrataan .. "]", "") - text = rsub(text, "[" .. fatHa .. Damma .. kasra .. "]%s", " ") - text = rsub(text, "[" .. fatHa .. Damma .. kasra .. "]$", "") - end - -- tāʾ marbūṭa should not be rendered by -t if word-final even when - -- ʾiʿrāb (desinential inflection) is shown; instead, use (t) before - -- whitespace, nothing when final; but render final -ﺍﺓ and -ﺁﺓ as -āh, - -- consistent with Wehr's dictionary - -- Left-to-right or right-to-left mark at end of text will prevent tāʾ marbūṭa - -- from being transliterated correctly. - text = string.gsub(text, lrm, "") - text = string.gsub(text, rlm, "") - text = rsub(text, "([" .. alif .. alif_madda .. "])" .. taa_marbuuTa .. "$", "%1h") - -- Ignore final tāʾ marbūṭa (it appears as "a" due to the preceding - -- short vowel). Need to do this after graying or omitting word-final - -- ʾiʿrāb. - text = rsub(text, taa_marbuuTa .. "$", "") - text = rsub(text, taa_marbuuTa .. "(%p)", "%1") - if not omit_i3raab then -- show ʾiʿrāb in transliteration - text = rsub(text, taa_marbuuTa .. "%s", "(t) ") - else - -- When omitting ʾiʿrāb, show all non-absolutely-final instances of - -- tāʾ marbūṭa as (t), with trailing ʾiʿrāb omitted. - text = rsub(text, taa_marbuuTa, "(t)") - end - -- tatwīl should be rendered as - at beginning or end of word. It will - -- be rendered as nothing in the middle of a word (FIXME, do we want - -- this?) - text = rsub(text, "^ـ", "-") - text = rsub(text, "%sـ", " -") - text = rsub(text, "ـ$", "-") - text = rsub(text, "ـ%s", "- ") - -- Now convert remaining Arabic chars according to table. - text = rsub(text, ".", tt) - text = rsub(text, "aā", "ā") - -- Implement elision of al- after a final vowel. We do this - -- conservatively, only handling elision of the definite article rather - -- than elision in other cases of hamzat al-waṣl (e.g. form-I imperatives - -- or form-VII and above verbal nouns) partly because elision in - -- these cases isn't so common in MSA and partly to avoid excessive - -- elision in case of words written with initial bare alif instead of - -- properly with hamzated alif. Possibly we should reconsider. - -- At the very least we currently don't handle elision of الَّذِي (allaḏi) - -- correctly because we special-case it to appear without the hyphen; - -- perhaps we should reconsider that. - text = rsub(text, "([aiuāīū]'* +'*)a([" .. sun_letters_tr .. "]%-)", - "%1%2") - if gray_i3raab then - text = rsub(text, "([aiuāīū]'*'* +'*)a([" .. sun_letters_tr .. "]%-)", - "%1%2") - end - -- Special-case the transliteration of allāh, without the hyphen - text = rsub(text, "^(a?)l%-lāh", "%1llāh") - text = rsub(text, "(%sa?)l%-lāh", "%1llāh") - - return text -end - -local has_diacritics_subs = { - -- FIXME! What about lam-alif ligature? - -- remove punctuation and shadda - -- must go before removing final consonants - {"[" .. punctuation .. shadda .. "]", ""}, - -- Remove consonants at end of word or utterance, so that we're OK with - -- words lacking iʿrāb (must go before removing other consonants). - -- If you want to catch places without iʿrāb, comment out the next two lines. - {"[" .. lconsonants .. "]$", ""}, - {"[" .. lconsonants .. "]%s", " "}, - -- remove consonants (or alif) when followed by diacritics - -- must go after removing shadda - -- do not remove the diacritics yet because we need them to handle - -- long-vowel sequences of diacritic + pseudo-consonant - {"[" .. lconsonants .. alif .. "]([" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. sukuun .. dagger_alif .. "])", "%1"}, - -- the following two must go after removing consonants w/diacritics because - -- we only want to treat vocalic wāw/yā' in them (we want to have removed - -- wāw/yā' followed by a diacritic) - -- remove ḍamma + wāw - {Damma .. waaw, ""}, - -- remove kasra + yā' - {kasra .. yaa, ""}, - -- remove fatḥa/fatḥatan + alif/alif-maqṣūra - {"[" .. fatHataan .. fatHa .. "][" .. alif .. alif_maqSuura .. "]", ""}, - -- remove diacritics - {"[" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. sukuun .. dagger_alif .. "]", ""}, - -- remove numbers, hamzatu l-waṣl, alif madda - {"[" .. numbers .. "ٱ" .. "آ" .. "]", ""}, - -- remove non-Arabic characters - {"[^" .. U(0x0600) .. "-" .. U(0x06FF) .. U(0x0750) .. "-" .. U(0x077F) .. - U(0x08A0) .. "-" .. U(0x08FF) .. U(0xFB50) .. "-" .. U(0xFDFF) .. - U(0xFE70) .. "-" .. U(0xFEFF) .. "]", ""} -} - --- declared as local above -function has_diacritics(text) - local count - text, count = rsubn(text, "[" .. lrm .. rlm .. "]", "") - if count > 0 then - require("Module:debug").track("ar-translit/lrm or rlm") - end - for _, sub in ipairs(has_diacritics_subs) do - text = rsub(text, unpack(sub)) - end - return #text == 0 -end - --- Return true if transliteration TR is an irregular transliteration of --- ARABIC. Return false if ARABIC can't be transliterated. For purposes of --- establishing regularity, hyphens are ignored and word-final tāʾ marbūṭa --- can be transliterated as "(t)", "" or "t". -function export.irregular_translit(arabic, tr) - if not arabic or arabic == "" or not tr or tr == "" then - return false - end - local regtr = export.tr(arabic) - if not regtr or regtr == tr then - return false - end - local arwords = rsplit(arabic, " ") - local regwords = rsplit(regtr, " ") - local words = rsplit(tr, " ") - if #regwords ~= #words or #regwords ~= #arwords then - return true - end - for i=1,#regwords do - local regword = regwords[i] - local word = words[i] - local arword = arwords[i] - -- Resolve final (t) in auto-translit to t, h or nothing - if rfind(regword, "%(t%)$") then - regword = rfind(word, "āh$") and rsub(regword, "%(t%)$", "h") or - rfind(word, "t$") and rsub(regword, "%(t%)$", "t") or - rsub(regword, "%(t%)$", "") - end - -- Resolve clitics + short a + alif-lām, which may get auto-transliterated - -- to contain long ā, to short a if the manual translit has it; note - -- that currently in cases with assimilated l, the auto-translit will - -- fail, so we won't ever get here and don't have to worry about - -- auto-translit l against manual-translit assimilated char. - local clitic_chars = "^[وفكل]" -- separate line to avoid L2R display weirdness - if rfind(arword, clitic_chars .. fatHa .. "?[" .. alif .. alif_waSl .. "]" .. laam) and rfind(word, "^[wfkl]a%-") then - regword = rsub(regword, "^([wfkl])ā", "%1a") - end - -- Ignore hyphens when comparing - if rsub(regword, "%-", "") ~= rsub(word, "%-", "") then - return true - end - end - return false -end - -return export - --- For Vim, so we get 4-space tabs --- vim: set ts=4 sw=4 noet: - --- Text Expected Actual Differs at --- Passed اَلْعَرَبِيَّة‏‎ al-ʿarabiyya al-ʿarabiyya --- Passed اَلْعَرَبِيَّة.‎ al-ʿarabiyya. al-ʿarabiyya. --- Passed لِلْكِتَاب‎ lilkitāb lilkitāb --- Passed لِلتَّأْكِيذ‎ (nil) (nil) N/A --- Passed لِلَّبَنِ‎ lillabani lillabani --- Passed لِللَّبَنِ‎ (nil) (nil) N/A --- Passed شْنِيتْزَل‎ šnītzal šnītzal --- Passed عُظْمَى‎ ʿuẓmā ʿuẓmā --- Passed إِحْدَى‎ ʾiḥdā ʾiḥdā --- Passed بِٱلتَّأْكِيد‎ bi-t-taʾkīd bi-t-taʾkīd --- Passed بِالتَّأْكِيد‎ bi-t-taʾkīd bi-t-taʾkīd --- Passed بِالتَأْكِيد‎ bi-t-taʾkīd bi-t-taʾkīd --- Passed بِالكِتَاب‎ bi-l-kitāb bi-l-kitāb --- Passed بِالْكِتَاب‎ bi-l-kitāb bi-l-kitāb --- Passed اَللُّغَةُ ٱلْعَرَبِيَّةُ‎ al-luḡatu l-ʿarabiyyatu al-luḡatu l-ʿarabiyyatu --- Passed اَللُّغَةُ الْعَرَبِيَّةُ‎ al-luḡatu l-ʿarabiyyatu al-luḡatu l-ʿarabiyyatu --- Passed نَسُوا‎ nasū nasū --- Passed رَمَوْا‎ ramaw ramaw --- Passed عَصًا‎ ʿaṣan ʿaṣan --- Passed هُذًى‎ huḏan huḏan --- Passed عَصاً‎ ʿaṣan ʿaṣan --- Passed هُذىً‎ huḏan huḏan --- Passed كاتِب‎ kātib kātib --- Passed كُتّاب‎ kuttāb kuttāb --- Passed إلاه‎ ʾilāh ʾilāh --- Passed كاتب‎ (nil) (nil) N/A --- Passed رَبّ‎ rabb rabb --- Passed نَوَاةٌ‎ nawātun nawātun --- Passed اَلشَّدَّة‎ aš-šadda aš-šadda --- Passed شَدَّة الشَكْل‎ šadda(t) aš-šakl šadda(t) aš-šakl --- Passed مُعَادَاة‎ muʿādāh muʿādāh --- Passed مِرْآة‎ mirʾāh mirʾāh --- Passed صلاح‎ (nil) (nil) N/A --- Passed اِيبَ‎ ība ība --- Passed دِيُون‎ diyūn diyūn --- Passed دُوِين‎ duwīn duwīn --- Passed الَّذِي‎ allaḏī allaḏī --- Failed رَأَيْتُ ابْنَهُ‎ raʾaytu bnahu (nil) N/A --- Passed رَأَيْتُ ٱبْنَهُ‎ raʾaytu bnahu raʾaytu bnahu --- Passed ڪُفُوًا‎ kufuwan kufuwan --- -- Unit tests for [[Module:ar-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local ar_translit = require('Module:ar-translit') - --- local full_link = require('Module:links').full_link --- local ar = require('Module:languages').getByCode('ar') - --- local rlm = mw.ustring.char(0x200F) -- right-to-left mark - --- local function link(word) --- return full_link{ term = word, lang = ar, tr = "-" } --- end - --- --TO DO --- function tests:do_test_translit(arab, roman) --- self:equals(link(arab), ar_translit.tr(arab, 'ar', 'Arab'), roman) --- end - --- function tests:test_translit_arabic() --- local examples = { --- { "اَلْعَرَبِيَّة" .. rlm, "al-ʿarabiyya" }, -- right-to-left mark is removed --- { "اَلْعَرَبِيَّة.", "al-ʿarabiyya." }, --- { 'لِلْكِتَاب', 'lilkitāb' }, -- ideally 'li-l-kitāb' --- { 'لِلتَّأْكِيذ', nil }, -- ideally 'li-t-taʾḵīḏ' --- { 'لِلَّبَنِ', 'lillabani' }, -- ideally 'li-l-labani'; correct spelling --- { 'لِللَّبَنِ', nil }, -- ideally 'li-l-labani; spelling sometimes used --- { 'شْنِيتْزَل', "šnītzal" }, --- { 'عُظْمَى', "ʿuẓmā" }, --- { 'إِحْدَى', "ʾiḥdā" }, --- -- cases with bi- prefix plus definite article --- { 'بِٱلتَّأْكِيد', "bi-t-taʾkīd" }, --- { 'بِالتَّأْكِيد', "bi-t-taʾkīd" }, --- { 'بِالتَأْكِيد', "bi-t-taʾkīd" }, --- { 'بِالكِتَاب', "bi-l-kitāb" }, --- { 'بِالْكِتَاب', "bi-l-kitāb" }, --- -- check handling of ʾiʿrāb, hamzatu l-waṣl w/al-, al- w/sun and moon letters --- { 'اَللُّغَةُ ٱلْعَرَبِيَّةُ', "al-luḡatu l-ʿarabiyyatu" }, --- -- check elision of al- after a vowel --- { 'اَللُّغَةُ الْعَرَبِيَّةُ', "al-luḡatu l-ʿarabiyyatu" }, - --- -- check two kinds of otiose alif in 3rd-masc-plural endings --- { 'نَسُوا', "nasū" }, --- { 'رَمَوْا', "ramaw" }, --- -- check otiose alif and alif maqṣūra after fatḥatan --- { 'عَصًا', "ʿaṣan" }, --- { 'هُذًى', "huḏan" }, --- -- same where fatḥatan wrongly placed over alif or alif maqṣūra --- { 'عَصاً', "ʿaṣan" }, --- { 'هُذىً', "huḏan" }, --- -- infer fatḥa on alif between consonants --- { 'كاتِب', "kātib" }, --- -- infer fatḥa on alif between consonants when first is marked with shadda --- { 'كُتّاب', "kuttāb" }, --- -- infer kasra after hamza-under-alif --- { 'إلاه', "ʾilāh" }, --- -- missing diacritic on t --- { 'كاتب', nil }, --- -- final shadda on unvocalized consonant --- { 'رَبّ', "rabb" }, --- -- tā' marbūṭa preceded by alif; don't infer fatḥa --- { 'نَوَاةٌ', "nawātun" }, --- -- alif-lam w/unvocalized lam + shadda over sun letter; final tāʾ marbūṭa --- { 'اَلشَّدَّة', "aš-šadda" }, --- -- medial tāʾ marbūṭa --- { 'شَدَّة الشَكْل', "šadda(t) aš-šakl" }, --- -- alif + tāʾ marbūṭa --> āh --- { 'مُعَادَاة', "muʿādāh" }, --- -- alif madda + tāʾ marbūṭa --> āh --- { 'مِرْآة', "mirʾāh" }, --- -- test of h for the constant --- { 'صلاح', nil }, --- -- initial unhamzated alif + ī --- { 'اِيبَ', "ība" }, --- -- iyū sequence --- { 'دِيُون', "diyūn" }, --- -- uwī sequence --- { 'دُوِين', "duwīn" }, --- -- allaḏī, with al + shadda --- { 'الَّذِي', "allaḏī" }, --- -- alif al-wasl --- { 'رَأَيْتُ ابْنَهُ', "raʾaytu bnahu" }, --- { 'رَأَيْتُ ٱبْنَهُ', "raʾaytu bnahu" }, --- -- "swash kaf" --- { 'ڪُفُوًا', "kufuwan" }, --- } - --- self:iterate(examples, "do_test_translit") --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/armn-translit.lua b/wikt/translit/armn-translit.lua deleted file mode 100644 index 4f36983..0000000 --- a/wikt/translit/armn-translit.lua +++ /dev/null @@ -1,52 +0,0 @@ --- This module will transliterate text in the Armenian script per WT:ARMN TR. --- It is used to transliterate Middle Armenian (axm), Armenian (hye), Northern Kurdish (kmr), --- Kipchak (qwm), Udi (udi) and Old Armenian (xcl). - -local export = {} - -local gsub = mw.ustring.gsub -local mapping = { - ["ա"]="a", ["բ"]="b", ["գ"]="g", ["դ"]="d", ["ե"]="e", ["զ"]="z",["է"]="ē", ["ը"]="ə", - ["թ"]="tʿ", ["ժ"]="ž", ["ի"]="i", ["լ"]="l", ["խ"]="x", ["ծ"]="c", ["կ"]="k", ["հ"]="h", - ["ձ"]="j", ["ղ"]="ł", ["ճ"]="č", ["մ"]="m", ["յ"]="y", ["ն"]="n", ["շ"]="š", ["ո"]="o", - ["չ"]="čʿ", ["պ"]="p", ["ջ"]="ǰ", ["ռ"]="ṙ", ["ս"]="s", ["վ"]="v", ["տ"]="t", ["ր"]="r", - ["ց"]="cʿ", ["ւ"]="w", ["փ"]="pʿ", ["ք"]="kʿ", ["և"]="ew", ["օ"]="ō", ["ֆ"]="f", - ["Ա"]="A", ["Բ"]="B", ["Գ"]="G", ["Դ"]="D", ["Ե"]="E", ["Զ"]="Z", ["Է"]="Ē", ["Ը"]="Ə", - ["Թ"]="Tʿ", ["Ժ"]="Ž", ["Ի"]="I", ["Լ"]="L", ["Խ"]="X", ["Ծ"]="C", ["Կ"]="K", ["Հ"]="H", - ["Ձ"]="J", ["Ղ"]="Ł", ["Ճ"]="Č", ["Մ"]="M", ["Յ"]="Y", ["Ն"]="N", ["Շ"]="Š", ["Ո"]="O", - ["Չ"]="Čʿ", ["Պ"]="P", ["Ջ"]="J̌", ["Ռ"]="Ṙ", ["Ս"]="S", ["Վ"]="V", ["Տ"]="T", ["Ր"]="R", - ["Ց"]="Cʿ", ["Ւ"]="W", ["Փ"]="Pʿ", ["Ք"]="Kʿ", ["Օ"]="Ō", ["Ֆ"]="F", ["ﬓ "]="mn", ["ﬔ"]="me", - ["ﬕ"]="mi", ["ﬖ"]="vn", ["ﬗ"]="mx", - -- punctuation - ["՝"]=",", ["։"]=".", ["․"]=";", ["՛"]="́", ["՜"]="!", ["՞"]="?", - ["՟"]=".", ["֊"]="-", ["՚"]="’", ['«']='“', ['»']='”', ['ՙ']='ʿ' -} - -local replacements = { - ['յ̵'] = 'ɦ', - ['Ո[ւՒ]'] = 'U', - ['ու'] = 'u', - ['Ո՛[ւՒ]'] = 'Ú', - ['ո՛ւ'] = 'ú', - ['Ո՜[ւՒ]'] = 'U!', - ['ո՜ւ'] = 'u!', - ['Ո՞[ւՒ]'] = 'U?', - ['ո՞ւ'] = 'u?', - ['ո̈ւ'] = 'ü', - ['Ո̈[ւՒ]'] = 'Ü', -} - -function export.tr(text, lang, sc) - if sc and sc ~= "Armn" then - return nil - end - - for regex, replacement in pairs(replacements) do - text = mw.ustring.gsub(text, regex, replacement) - end - - text = gsub(text, '.', mapping) - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/as-translit.lua b/wikt/translit/as-translit.lua deleted file mode 100644 index 7eed753..0000000 --- a/wikt/translit/as-translit.lua +++ /dev/null @@ -1,331 +0,0 @@ --- Transliteration for Assamese --- This module will transliterate Assamese language text per WT:AS TR. - -local export = {} -local gsub = mw.ustring.gsub -local match = mw.ustring.match - -local conv = { - -- consonants - ["ক্ষ"] = "kh", - ["ক"] = "k", ["খ"] = "kh", ["গ"] = "g", ["ঘ"] = "gh", ["ঙ"] = "ṅ", - ["চ"] = "s", ["ছ"] = "s", ["জ"] = "z", ["ঝ"] = "zh", ["ঞ"] = "ñ", - ["ট"] = "t", ["ঠ"] = "th", ["ড"] = "d", ["ঢ"] = "dh", ["ণ"] = "n", - ["ত"] = "t", ["থ"] = "th", ["দ"] = "d", ["ধ"] = "dh", ["ন"] = "n", - ["প"] = "p", ["ফ"] = "ph", ["ব"] = "b", ["ভ"] = "bh", ["ম"] = "m", - ["য"] = "z", ["ৰ"] = "r", ["ল"] = "l", ["ৱ"] = "w", - ["শ"] = "x", ["ষ"] = "x", ["স"] = "x", ["হ"] = "h", - ["য়"] = "y", ["ড়"] = "r", ["ঢ়"] = "rh", - - -- vowel diacritics - ["’"] = "ó", ["ি"] = "i", ["ু"] = "u", - ["ৃ"] = "ri", ["ে"] = "e", ["ে’"] = "é", ["ো"] = "ü", - ["া"] = "a", ["ী"] = "i", ["ূ"] = "u", ["ৈ"] = "oi", ["ৌ"] = "ou", - - -- visarga - ["ঃ"] = "o", - - -- vowel signs - ["অ"] = "o", ["অ’"] = "ó", ["ই"] = "i", ["উ"] = "u", - ["ঋ"] = "ri", ["এ"] = "e", ["এ’"] = "é", ["ও"] = "ü", - ["আ"] = "a", ["ঈ"] = "i", ["ঊ"] = "u", ["ঐ"] = "oi", ["ঔ"] = "ou", - - --hosonto - ["্"] = "", - - -- sondrobindu - ["ঁ"] = "̃", - - -- onusor - ["ং"] = "ṅ", - - -- hosonto to, - ["ৎ"] = "t", - - -- numerals - ["০"] = "0", ["১"] = "1", ["২"] = "2", ["৩"] = "3", ["৪"] = "4", - ["৫"] = "5", ["৬"] = "6", ["৭"] = "7", ["৮"] = "8", ["৯"] = "9", - - -- punctuation - ["।"] = ".", -- dari -} - -local conv2 = { - ["ক্ষ"] = "ḱ", ["খ"] = "ḱ", - ["ঘ"] = "ǵ", - ["ঙ"] = "ŋ", ["ং"] = "ŋ", - ["ঝ"] = "ź", - ["ঠ"] = "ṫ", ["থ"] = "ṫ", - ["ঢ"] = "ḋ", ["ধ"] = "ḋ", - ["ফ"] = "ṗ", - ["ভ"] = "ḃ", - ["ঢ়"] = "ŕ", - ["ৃ"] = "ṙ", ["ঋ"] = "ṙ", - ["ৈ"] = "ʏ", ["ঐ"] = "ʏ", - ["ৌ"] = "ɵ", ["ঔ"] = "ɵ", -} - -local consonant, vowel, vowel_sign = "ক-হড়-য়ৰৱ", "oা-ৌ’", "অ-ঔ" -local c = "[" .. consonant .. "]" -local cc = "়?" .. c -local v = "[" .. vowel .. vowel_sign .. "]" -local syncope_pattern = "(" .. v .. cc .. v .. cc .. ")o(" .. cc .. "ঁ?" .. v .. ")" - -local function rev_string(text) - local result, length = "", mw.ustring.len(text) - for i = 1, length do - result = result .. mw.ustring.sub(text, length - i + 1, length - i + 1) - end - return result -end - -function export.tr(text, lang, sc, mode) - text = gsub(text, "([^ৰ])্য", "%1্য়") - text = gsub(text, "্ব", "্ৱ") - text = gsub(text, "[শষস]্", "চ্") - text = gsub(text, "্স", "্চ") - text = gsub(text, "[োও]ৱ", "্ও") - text = gsub(text, "ক্ষ", "খ") - text = gsub(text, "’ৱ", "্অ’") - text = gsub(text, "[ুুউ]ৱ(.)", "্উ%1") - text = gsub(text, "[োও]ৱ(.)", "্ও%1") - text = gsub(text, "োঁৱ(.)", "্ওঁ%1") - text = gsub(text, "[ৌঔ]ৱ", "্ঔ") - text = gsub(text, "[িই]য়(.)", "্ই%1") - text = gsub(text, "ৃয়", "্ঋ") - text = gsub(text, "[েএ]য়(.)", "্এ%1") - text = gsub(text, "[ে’এ’]য়", "্এ’") - text = gsub(text, "[ৈঐ]য়(.)", "্ঐ%1") - text = gsub(text, "[ীঈ]য়(.)", "্ঈ%1") - text = gsub(text, "[ীঈ]য়", "্ঈঅ") -- end - text = gsub(text, "[ূূঊ]ৱ", "্ঊ") - text = gsub(text, "݁", "্অ") - text = gsub(text, "ঃ", "্অ") - text = gsub(text, "[࣪ܿ]", "্") - text = gsub(text, "বাৰ" , "্বাৰ") - text = gsub(text, "বিলাক", "্বিলাক") - text = gsub(text, "টো" , "্টো") - text = gsub(text, "খন" , "্খন") - text = gsub(text, "ডাল" , "্ডাল") - text = gsub(text, "খিনি" , "্খিনি") - text = gsub(text, "জন" , "্জন") - text = gsub(text, "জনী" , "্জনী") - text = gsub(text, "গৰাকী" , "্গৰাকী") - text = gsub(text, "সকল" , "্সকল") - text = gsub(text, "কৈ" , "্কৈ") - text = gsub(text, "ফাল" , "্ফাল") - text = gsub(text, "কেই" , "্কেই") - text = gsub(text, "[িীইঈ]ঞ", "্ইঅ͂") - text = gsub(text, "ঞ্", "ন্̃") - - text = gsub(text, "(" .. c .. "়?)([" .. vowel .. "’?্]?)", function(a, b) - return a .. (b == "" and "o" or b) end) - - for word in mw.ustring.gmatch(text, "[ঁ-৽o’]+") do - local orig_word = word - word = rev_string(word) - word = gsub(word, "^o(়?" .. c .. ")(ঁ?" .. v .. ")", "%1%2") - while match(word, syncope_pattern) do - word = gsub(word, syncope_pattern, "%1%2") - end - text = gsub(text, orig_word, rev_string(word)) - end - - if mode == "IPA" then - text = gsub(text, ".[়’]?", conv2) - text = gsub(text, ".", conv2) - end - - text = gsub(text, ".[়’]?", conv) - text = gsub(text, ".", conv) - - local consonants_Latn_no_h = "[b-df-gj-np-tv-z]" - - -- Cw - text = gsub(text, "mw", "mb") -- special case - text = gsub(text, "^(" .. consonants_Latn_no_h .. "h?)w", "%1") -- initial - text = gsub(text, "hw", "hb") - text = gsub(text, "(" .. consonants_Latn_no_h .. ")w", "%1%1") -- medial - - -- zñ - text = gsub(text, "^zñ", "gy") -- initial - text = gsub(text, "zñ", "gg") -- medial - - -- Cy - text = gsub(text, "^khy", "kh" ) - text = gsub(text, "([aéeióoüu])(" .. consonants_Latn_no_h .. ")y", "%1i%2%2") - - -- final "b" has inherent vowel - text = gsub(text, "b$", "bo") - text = gsub(text, "b ", "bo ") - - -- final r conjuncts - text = gsub(text, "r([kszt])o$", "r%1") - text = gsub(text, "r([kszt])o ", "r%1 ") - text = gsub(text, "rkho$", "rkh") - text = gsub(text, "rkho ", "rkh ") - - if match(text, "[ঁ-৽]") and mode ~= "debug" then - return nil - else - return mw.ustring.toNFC(text) - end -end - -return export - --- 6 tests failed. (refresh) - --- test_translit_assamese: --- Text Expected Actual Differs at --- Passed স্ত্ৰী stri stri --- Passed জিভা zibha zibha --- Passed হাঁওফাঁও hãüphãü hãüphãü --- Passed মেকুৰী mekuri mekuri --- Passed চৰকাৰী sorkari sorkari --- Passed হ’ল hól hól --- Passed ফুল phul phul --- Passed পুষ্প puspo puspo --- Passed ঘাঁহ ghãh ghãh --- Failed তৃণ trino trin 5 --- Passed উদ্ভিদ udbhid udbhid --- Passed গছ gos gos --- Passed স্বাধীন sadhin sadhin --- Passed পুস্তক pustok pustok --- Passed মস্তিষ্ক mostisko mostisko --- Passed শূন্য xuinno xuinno --- Passed ব্যাঘ্ৰ byaghro byaghro --- Passed ব্যৱহাৰ byowohar byowohar --- Passed ছয় soy soy --- Passed ক্ষেত্ৰ khetro khetro --- Passed কে’ক kék kék --- Passed স্পৰ্শ sporxo sporxo --- Passed স্বাদকলি sadkoli sadkoli --- Passed শ্ৰদ্ধা sroddha sroddha --- Failed আশ্চৰ্য assorzo assorz 7 --- Passed ইচ্ছা issa issa --- Passed শ্বাস sax sax --- Passed ৰং roṅ roṅ --- Passed অৰ্থাৎ orthat orthat --- Passed লোৱা lüa lüa --- Passed ৰিক্সা riksa riksa --- Passed দিয়া dia dia --- Passed অসমীয়া oxomia oxomia --- Passed ভাৰতীয় bharotio bharotio --- Passed বানপানী banpani banpani --- Passed কাৰখানা karkhana karkhana --- Passed মানুহজন manuhzon manuhzon --- Failed মানচিত্ৰ mansitro manositro 4 --- Passed গণতন্ত্ৰ gonotontro gonotontro --- Passed ডাঙৰীয়া daṅoria daṅoria --- Passed জ্বলন zolon zolon --- Passed জ্বলা zola zola --- Passed জ্বলাই zolai zolai --- Passed যাব কৰিব zabo koribo zabo koribo --- Passed ফুলক phulok phulok --- Passed চিগাৰেটক sigaretok sigaretok --- Passed ৰাষ্ট্ৰীয় rastrio rastrio --- Passed নিউয়ৰ্ক niuyork niuyork --- Passed শৰ্কৰা xorkora xorkora --- Passed আমবিলাক ambilak ambilak --- Passed শ্বাস sax sax --- Passed উজ্বল uzzol uzzol --- Passed ঘনত্ব ghonotto ghonotto --- Passed ম্ব mbo mbo --- Passed ধন্যবাদ dhoinnobad dhoinnobad --- Passed ধ্বংস dhoṅxo dhoṅxo --- Passed ঈশ্বৰ issor issor --- Passed আহ্বান ahban ahban --- Failed খ্যাত khyato khat 3 --- Failed চ্যুত suto syut 2 --- Failed জ্যোতি züti zyüti 2 --- Passed ধ্যান dhyan dhyan --- Passed ক্ষুদ্ৰ khudro khudro --- Passed বাক্য baikko baikko --- Passed বিজ্ঞান biggan biggan --- -- Unit tests for [[Module:as-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local as_translit = require('Module:as-translit') - --- local function link(word) --- return '[[' .. word .. '#Assamese|' .. word .. ']]' --- end - --- function tests:do_test_translit(beng, roman, xlit) --- return self:equals(link(beng), as_translit.tr(beng, 'as', 'Beng', 'debug'), roman) --- end - --- function tests:test_translit_assamese() --- local examples = { --- { 'স্ত্ৰী', 'stri' }, --- { 'জিভা', 'zibha' }, --- { 'হাঁওফাঁও', 'hãüphãü' }, --- { 'মেকুৰী', 'mekuri' }, --- { 'চৰকাৰী', 'sorkari' }, --- { 'হ’ল', 'hól' }, --- { 'ফুল', 'phul' }, --- { 'পুষ্প', 'puspo' }, --- { 'ঘাঁহ', 'ghãh' }, --- { 'তৃণ', 'trino' }, --- { 'উদ্ভিদ', 'udbhid' }, --- { 'গছ', 'gos' }, --- { 'স্বাধীন', 'sadhin' }, --- { 'পুস্তক', 'pustok' }, --- { 'মস্তিষ্ক', 'mostisko' }, --- { 'শূন্য', 'xuinno' }, --- { 'ব্যাঘ্ৰ', 'byaghro' }, --- { 'ব্যৱহাৰ', 'byowohar' }, --- { 'ছয়', 'soy' }, --- { 'ক্ষেত্ৰ', 'khetro' }, --- { 'কে’ক', 'kék' }, --- { 'স্পৰ্শ', 'sporxo' }, --- { 'স্বাদকলি', 'sadkoli' }, --- { 'শ্ৰদ্ধা', 'sroddha' }, --- { 'আশ্চৰ্য', 'assorzo' }, --- { 'ইচ্ছা', 'issa' }, --- { 'শ্বাস', 'sax' }, --- { 'ৰং', 'roṅ' }, --- { 'অৰ্থাৎ', 'orthat' }, --- { 'লোৱা', 'lüa' }, --- { 'ৰিক্সা', 'riksa' }, --- { 'দিয়া', 'dia' }, --- { 'অসমীয়া', 'oxomia' }, --- { 'ভাৰতীয়', 'bharotio' }, --- { 'বানপানী', 'banpani' }, --- { 'কাৰখানা', 'karkhana' }, --- { 'মানুহজন', 'manuhzon' }, --- { 'মানচিত্ৰ', 'mansitro' }, --- { 'গণতন্ত্ৰ', 'gonotontro' }, --- { 'ডাঙৰীয়া' , 'daṅoria' }, --- { 'জ্বলন' , 'zolon' }, --- { 'জ্বলা' , 'zola' }, --- { 'জ্বলাই' , 'zolai' }, --- { 'যাব কৰিব', 'zabo koribo' }, --- { 'ফুলক', 'phulok' }, --- { 'চিগাৰেটক', 'sigaretok' }, --- { 'ৰাষ্ট্ৰীয়', 'rastrio' }, --- { 'নিউয়ৰ্ক', 'niuyork' }, --- { 'শৰ্কৰা', 'xorkora' }, --- { 'আমবিলাক', 'ambilak' }, - --- -- behaviour of Cw --- { 'শ্বাস', 'sax' }, --- { 'উজ্বল', 'uzzol' }, --- { 'ঘনত্ব', 'ghonotto' }, --- { 'ম্ব', 'mbo' }, --- { 'ধন্যবাদ', 'dhoinnobad' }, --- { 'ধ্বংস', 'dhoṅxo' }, --- { 'ঈশ্বৰ', 'issor' }, --- { 'আহ্বান', 'ahban' }, --- { 'খ্যাত', 'khyato' }, --- { 'চ্যুত', 'suto' }, --- { 'জ্যোতি', 'züti' }, --- { 'ধ্যান', 'dhyan' }, --- { 'ক্ষুদ্ৰ', 'khudro' }, --- { 'বাক্য', 'baikko' }, --- { 'বিজ্ঞান', 'biggan' }, --- } --- return self:iterate(examples, "do_test_translit") --- end - --- return tests diff --git a/wikt/translit/av-translit.lua b/wikt/translit/av-translit.lua deleted file mode 100644 index bf0cb0b..0000000 --- a/wikt/translit/av-translit.lua +++ /dev/null @@ -1,92 +0,0 @@ --- This module will transliterate Avar language text per WT:AV TR. --- Language code is ava and oav. - -local export = {} - -local tt = { - ["б"]="b", ["п"]="p", ["ф"]="f", ["в"]="w", ["м"]="m", - ["д"]="d", ["т"]="t", ["й"]="j", ["н"]="n", ["з"]="z", ["ц"]="c", - ["с"]="s", ["ж"]="ž", ["ш"]="š", ["щ"]="š̄", - ["л"]="l", ["ч"]="č", ["р"]="r", ["г"]="g", ["к"]="k", ["х"]="χ", - ["ъ"]="ʾ", ["а"]="a", ["е"]="e", ["ы"]="ə", ["и"]="i", ["о"]="o", ["у"]="u", - ["ё"]="ë", ["ь"]="’", ["э"]="è", ["ю"]="ju", ["я"]="ja", - ["Б"]="B", ["П"]="P", ["Ф"]="F", ["В"]="W", ["М"]="M", - ["Д"]="D", ["Т"]="T", ["Й"]="J", ["Н"]="N", ["З"]="Z", ["Ц"]="C", - ["С"]="S", ["Ж"]="Ž", ["Ш"]="Š", ["Щ"]="Š̄", - ["Л"]="L", ["Ч"]="Č", ["Р"]="R", ["Г"]="G", ["К"]="K", ["Х"]="Χ", - ["Ъ"]="ʾ", ["А"]="A", ["Е"]="E", ["Ы"]="Ə", ["И"]="I", ["О"]="O", ["У"]="U", - ["Ё"]="Ë", ["Ь"]="’", ["Э"]="È", ["Ю"]="Ju", ["Я"]="Ja"}; - -local tetragraphs = { - ['цӏцӏ'] = 'c̣̄', - ['чӏчӏ'] = 'č̣̄', - ['кӏкӏ'] = 'ḳ̄', - ['лълъ'] = 'ł̄', - ['Цӏцӏ'] = 'C̣̄', - ['Чӏчӏ'] = 'Č̣̄', - ['Кӏкӏ'] = 'Ḳ̄', - ['Лълъ'] = 'Ł̄', -} - -local digraphs = { - ['цӏ'] = 'c̣', - ['цц'] = 'c̄', - ['тӏ'] = 'ṭ', - ['лӏ'] = 'kl', - ['сс'] = 's̄', - ['лъ'] = 'ł', - ['чч'] = 'č̄', - ['чӏ'] = 'č̣', - ['кь'] = 'kḷ', - ['кк'] = 'k̄', - ['кӏ'] = 'ḳ', - ['хь'] = 'x', - ['хъ'] = 'q̄', - ['къ'] = 'q̇̄', - ['гъ'] = 'ġ', - ['хх'] = 'χ̄', - ['гӏ'] = 'ʿ', - ['хӏ'] = 'ḥ', - ['гь'] = 'h', - ['Цӏ'] = 'C̣', - ['Цц'] = 'C̄', - ['Тӏ'] = 'Ṭ', - ['Лӏ'] = 'Kl', - ['Сс'] = 'S̄', - ['Лъ'] = 'Ł', - ['Чч'] = 'Č̄', - ['Чӏ'] = 'Č̣', - ['Кь'] = 'Kḷ', - ['Кк'] = 'K̄', - ['Кӏ'] = 'Ḳ', - ['Хь'] = 'X', - ['Хъ'] = 'Q̄', - ['Къ'] = 'Q̇̄', - ['Гъ'] = 'Ġ', - ['Хх'] = 'Χ̄', - ['Гӏ'] = 'ʿ', - ['Хӏ'] = 'Ḥ', - ['Гь'] = 'H', -} - -function export.tr(text, lang, sc) - local str_gsub = string.gsub - - -- Convert uppercase palochka to lowercase. Lowercase is found in tables - -- above. - text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) - - for grapheme, replacement in pairs(tetragraphs) do - text = str_gsub(text, grapheme, replacement) - end - - for grapheme, replacement in pairs(digraphs) do - text = str_gsub(text, grapheme, replacement) - end - - text = str_gsub(text, '[\1-\127\194-\244][\128-\191]*', tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/avst-translit.lua b/wikt/translit/avst-translit.lua deleted file mode 100644 index f0fde14..0000000 --- a/wikt/translit/avst-translit.lua +++ /dev/null @@ -1,74 +0,0 @@ --- This module will transliterate text in the Avestan script. --- It is used to transliterate Avestan (ae) and Middle Persian (pal). --- Language codes are ave and pal. -local export = {} - -local mapping = { - ["𐬀"] = "a", -- AVESTAN LETTER A - ["𐬁"] = "ā", -- AVESTAN LETTER AA - ["𐬂"] = "å", -- AVESTAN LETTER AO - ["𐬃"] = "ā̊", -- AVESTAN LETTER AAO - ["𐬄"] = "ą", -- AVESTAN LETTER AN - ["𐬅"] = "ą̇̇", -- AVESTAN LETTER AAN - ["𐬆"] = "ə", -- AVESTAN LETTER AE - ["𐬇"] = "ə̄", -- AVESTAN LETTER AEE - ["𐬈"] = "e", -- AVESTAN LETTER E - ["𐬉"] = "ē", -- AVESTAN LETTER EE - ["𐬊"] = "o", -- AVESTAN LETTER O - ["𐬋"] = "ō", -- AVESTAN LETTER OO - ["𐬌"] = "i", -- AVESTAN LETTER I - ["𐬍"] = "ī", -- AVESTAN LETTER II - ["𐬎"] = "u", -- AVESTAN LETTER U - ["𐬏"] = "ū", -- AVESTAN LETTER UU - - ["𐬐"] = "k", -- AVESTAN LETTER KE - ["𐬑"] = "x", -- AVESTAN LETTER XE - ["𐬒"] = "x́", -- AVESTAN LETTER XYE - ["𐬓"] = "xᵛ", -- AVESTAN LETTER XVE - ["𐬔"] = "g", -- AVESTAN LETTER GE - ["𐬕"] = "ġ", -- AVESTAN LETTER GGE - ["𐬖"] = "γ", -- AVESTAN LETTER GHE - ["𐬗"] = "c", -- AVESTAN LETTER CE - ["𐬘"] = "j", -- AVESTAN LETTER JE - ["𐬙"] = "t", -- AVESTAN LETTER TE - ["𐬚"] = "θ", -- AVESTAN LETTER THE - ["𐬛"] = "d", -- AVESTAN LETTER DE - ["𐬜"] = "δ", -- AVESTAN LETTER DHE - ["𐬝"] = "t̰", -- AVESTAN LETTER TTE - ["𐬞"] = "p", -- AVESTAN LETTER PE - ["𐬟"] = "f", -- AVESTAN LETTER FE - ["𐬠"] = "b", -- AVESTAN LETTER BE - ["𐬡"] = "β", -- AVESTAN LETTER BHE - ["𐬢"] = "ŋ", -- AVESTAN LETTER NGE - ["𐬣"] = "ŋ́", -- AVESTAN LETTER NGYE - ["𐬤"] = "ŋᵛ", -- AVESTAN LETTER NGVE - ["𐬥"] = "n", -- AVESTAN LETTER NE - ["𐬦"] = "ń", -- AVESTAN LETTER NYE - ["𐬧"] = "ṇ", -- AVESTAN LETTER NNE - ["𐬨"] = "m", -- AVESTAN LETTER ME - ["𐬩"] = "m̨", -- AVESTAN LETTER HME - ["𐬪"] = "ẏ", -- AVESTAN LETTER YYE - ["𐬫"] = "y", -- AVESTAN LETTER YE - ["𐬬"] = "v", -- AVESTAN LETTER VE - ["𐬭"] = "r", -- AVESTAN LETTER RE - ["𐬮"] = "l", -- AVESTAN LETTER LE - ["𐬯"] = "s", -- AVESTAN LETTER SE - ["𐬰"] = "z", -- AVESTAN LETTER ZE - ["𐬱"] = "š", -- AVESTAN LETTER SHE - ["𐬲"] = "ž", -- AVESTAN LETTER ZHE - ["𐬳"] = "š́", -- AVESTAN LETTER SHYE - ["𐬴"] = "ṣ̌", -- AVESTAN LETTER SSHE - ["𐬵"] = "h", -- AVESTAN LETTER HE - ["𐬹"] = " ", -- AVESTAN ABBREVIATION MARK - ["⸱"] = " " -- WORD SEPARATOR MIDDLE DOT -} - -function export.tr(text, lang, sc) - text = mw.ustring.gsub(text, "[⸱𐬹]?[𐬺𐬻𐬼𐬽𐬾𐬿]", ".") -- punctuation - text = string.gsub(text, "[\1-\127\194-\244][\128-\191]*", mapping) -- transliterate letters - --text = mw.ustring.gsub(text, "ii", "y") - --text = mw.ustring.gsub(text, "uu", "v") - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/ba-translit.lua b/wikt/translit/ba-translit.lua deleted file mode 100644 index 36c621e..0000000 --- a/wikt/translit/ba-translit.lua +++ /dev/null @@ -1,41 +0,0 @@ --- This module will transliterate Bashkir language text per WT:BA TR. --- Language code: bak -local export = {} - -local tt = { - ["ү"]="ü", ['Ү']='Ü', ["т"]="t", ['Т']='T', ["р"]="r", ['Р']='R', ["ф"]="f", ['Ф']='F', ["ө"]="ö", ['Ө']='Ö', - ["ю"]="yu", ['Ю']='Yu', ["ш"]="š", ['Ш']='Š', ["ь"]="’", ['Ь']='’', ["ъ"]="ʺ", ['Ъ']='ʺ', ["н"]="n", ['Н']='N', - ["п"]="p", ['П']='P', ["й"]="y", ['Й']='Y', ["л"]="l", ['Л']='L', ["з"]="z", ['З']='Z', ["е"]="e", ['Е']='E', - ["г"]="g", ['Г']='G', ["б"]="b", ['Б']='B', ["у"]="u", ['У']='U', ["с"]="s", ['С']='S', ["х"]="x", ['Х']='X', - ["ч"]="č", ['Ч']='Č', ["щ"]="šč", ['Щ']='Šč', ["я"]="ya", ['Я']='Ya', ["ы"]="ï", ['Ы']='Ï', ["э"]="e", ['Э']='E', - ["м"]="m", ['М']='M', ["о"]="o", ['О']='O', ["и"]="i", ['И']='I', ["ё"]="yo", ['Ё']='Yo', ["ж"]="ž", ['Ж']='Ž', - ["к"]="k", ['К']='K', ["д"]="d", ['Д']='D', ["в"]="v", ['В']='V', ["ц"]="ts", ['Ц']='Ts', ["а"]="a", ['А']='A', - ["ң"]="ñ", ['Ң']='Ñ', ["ғ"]="ğ", ['Ғ']='Ğ', ["ҙ"]="ð", ['Ҙ']='Đ', ["ҡ"]="q", ['Ҡ']='Q', ["ҫ"]="θ", ['Ҫ']='Θ', - ["һ"]="h", ['Һ']='H', ["ә"]="ä", ['Ә']='Ä' -}; - -local iotated = { - ['е'] = 'ye', - ['Е'] = 'Ye', -} - -function export.tr(text, lang, sc) - local str_gsub = string.gsub - local ugsub = mw.ustring.gsub - -- ү/у should be transliterated as w after vowels - text = ugsub(text, "([АаЕеЭэЮюЯяӘәИиҮүУуӨөЫы])[үу]", "%1w") - - text = ugsub(text, - "([АОӨӘУЫЕЯЁЮИЕаоөәуыэяёюиеъь%A][́̀]?)([Ее])", - function(a, e) - return a .. iotated[e] - end) - text = ugsub(text, - "^[Ее]", - iotated) - text = str_gsub(text, '[\1-\127\194-\244][\128-\191]*', tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/bdk-translit.lua b/wikt/translit/bdk-translit.lua deleted file mode 100644 index a15d2fc..0000000 --- a/wikt/translit/bdk-translit.lua +++ /dev/null @@ -1,75 +0,0 @@ --- This module will transliterate Budukh language text per WT:BDK TR. --- Language code: bdk -local export = {} - -local tt = { - ["б"]="b", ["п"]="p", ["ф"]="f", ["в"]="v", ["м"]="m", - ["д"]="d", ["т"]="t", ["й"]="j", ["н"]="n", ["з"]="z", ["ц"]="c", - ["с"]="s", ["ж"]="ž", ["ш"]="š", ["л"]="l", ["ч"]="č", ["р"]="r", ["г"]="g", ["к"]="k", ["х"]="χ", - ["ъ"]="ʔ", ["а"]="a", ["е"]="e", ["ы"]="ɨ", ["и"]="i", ["о"]="o", ["у"]="u", - ["э"]="e", - ["Б"]="B", ["П"]="P", ["Ф"]="F", ["В"]="V", ["М"]="M", - ["Д"]="D", ["Т"]="T", ["Й"]="J", ["Н"]="N", ["З"]="Z", ["Ц"]="C", - ["С"]="S", ["Ж"]="Ž", ["Ш"]="Š", - ["Л"]="L", ["Ч"]="Č", ["Р"]="R", ["Г"]="G", ["К"]="K", ["Х"]="Χ", - ["Ъ"]="ʔ", ["А"]="A", ["Е"]="E", ["Ы"]="Ɨ", ["И"]="I", ["О"]="O", ["У"]="U", - ["Э"]="E",}; - -local digraphs = { - ['аь'] = 'ä', - ['Аь'] = 'Ä', - ['гӏ'] = 'ʕ', - ['Гӏ'] = 'ʕ', - ['гъ'] = 'ġ', - ['Гъ'] = 'Ġ', - ['гь'] = 'h', - ['Гь'] = 'H', - ['дж'] = 'ǯ', - ['Дж'] = 'Ǯ', - ['кк'] = 'k̄', - ['кӏ'] = 'ḳ', - ['Кӏ'] = 'Ḳ', - ['къ'] = 'q̄', - ['Къ'] = 'Q̄', - ['кь'] = 'q̇', - ['Кь'] = 'Q̇', - ['оь'] = 'ö', - ['Оь'] = 'Ö', - ['пӏ'] = 'ṗ', - ['Пӏ'] = 'Ṗ', - ['тӏ'] = 'ṭ', - ['Тӏ'] = 'Ṭ', - ['уь'] = 'ü', - ['Уь'] = 'Ü', - ['хӏ'] = 'ħ', - ['Хӏ'] = 'Ħ', - ['хъ'] = 'q', - ['Хъ'] = 'Q', - ['хь'] = 'x', - ['Хь'] = 'X', - ['цӏ'] = 'c̣', - ['Цӏ'] = 'C̣', - ['чӏ'] = 'č̣', - ['Чӏ'] = 'Č̣', -} - -function export.tr(text, lang, sc) - local str_gsub = string.gsub - local UTF8_char = '[%z\1-\127\194-\244][\128-\191]*' - - -- Convert capital to lowercase palochka. Lowercase is found in tables - -- above. - text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) - - text = mw.ustring.gsub(text, '[Кк]ъг', 'ɢ') - - for letter, translit in pairs(digraphs) do - text = str_gsub(text, letter, translit) - end - - text = str_gsub(text, UTF8_char, tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/be-translit.lua b/wikt/translit/be-translit.lua deleted file mode 100644 index 28f7b70..0000000 --- a/wikt/translit/be-translit.lua +++ /dev/null @@ -1,24 +0,0 @@ --- This module will transliterate Belarusian language text per WT:BE TR. --- Language code: bel -local export = {} - -local tt = { - ["А"]='A', ["а"]='a', ["Б"]='B', ["б"]='b', ["В"]='V', ["в"]='v', ["Г"]='H', ["г"]='h', ["Д"]='D', ["д"]='d', - ["Е"]='Je', ["е"]='je', ["Ё"]='Jó', ["ё"]='jó', ["Ж"]='Ž', ["ж"]='ž', ["З"]='Z', ["з"]='z', ["І"]='I', ["і"]='i', - ["И"]='I', ["и"]='i', -- present for Old Belarusian; FIXME, remove when we have a separate language code for this lang - ["Й"]='J', ["й"]='j', ["К"]='K', ["к"]='k', ["Л"]='L', ["л"]='l', ["М"]='M', ["м"]='m', ["Н"]='N', ["н"]='n', - ["О"]='O', ["о"]='o', ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r', ["С"]='S', ["с"]='s', ["Т"]='T', ["т"]='t', - ["У"]='U', ["у"]='u', ["Ў"]='Ŭ', ["ў"]='ŭ', ["Ф"]='F', ["ф"]='f', ["Х"]='X', ["х"]='x', ["Ц"]='C', ["ц"]='c', - ["Ч"]='Č', ["ч"]='č', ["Ш"]='Š', ["ш"]='š', ["Ы"]='Y', ["ы"]='y', ["Ь"]='ʹ', ["ь"]='ʹ', ["Э"]='E', ["э"]='e', - ["Ю"]='Ju', ["ю"]='ju', ["Я"]='Ja', ["я"]='ja', - ["’"]='ʺ', ["ʼ"]='ʺ' -}; - -function export.tr(text, lang, sc) - text = mw.ustring.gsub(text, "'+", { ["'"] = 'ʺ' }) -- neutral apostrophe - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/ber-translit.lua b/wikt/translit/ber-translit.lua deleted file mode 100644 index 17e5944..0000000 --- a/wikt/translit/ber-translit.lua +++ /dev/null @@ -1,103 +0,0 @@ --- This module transliterates Tifinagh script and is currently used by Central Atlas Tamazight. --- Language code: tzm -local export = {} - -local t2l_common = { - ["ⴰ"] = "a", - ["ⴱ"] = "b", - ["ⴲ"] = "b", - ["ⴳ"] = "g", - ["ⴴ"] = "g", - ["ⴵ"] = "ǧ", - ["ⴶ"] = "ǧ", - ["ⴷ"] = "d", - ["ⴸ"] = "d", - ["ⴹ"] = "ḍ", - ["ⴺ"] = "ḍ", - ["ⴻ"] = "e", - ["ⴼ"] = "f", - ["ⴽ"] = "k", - ["ⴾ"] = "k", - ["ⴿ"] = "k", - ["ⵀ"] = "h", -- tmh, thv, taq, ttq, thz: "b" - ["ⵁ"] = "h", - ["ⵂ"] = "h", - ["ⵃ"] = "ḥ", - ["ⵄ"] = "ɛ", - ["ⵅ"] = "x", - ["ⵆ"] = "x", - ["ⵇ"] = "q", - ["ⵈ"] = "q", - ["ⵉ"] = "i", - ["ⵊ"] = "j", - ["ⵋ"] = "j", - ["ⵌ"] = "j", - ["ⵍ"] = "l", - ["ⵎ"] = "m", - ["ⵏ"] = "n", - ["ⵐ"] = "ny", - ["ⵑ"] = "ng", - ["ⵒ"] = "p", - ["ⵓ"] = "u", -- tmh, thv, taq, ttq, thz: "w" - ["ⵔ"] = "r", - ["ⵕ"] = "ṛ", - ["ⵖ"] = "ɣ", - ["ⵗ"] = "ɣ", - ["ⵘ"] = "j", -- thz: "ɣ" - ["ⵙ"] = "s", - ["ⵚ"] = "ṣ", - ["ⵛ"] = "š", - ["ⵜ"] = "t", - ["ⵝ"] = "t", - ["ⵞ"] = "č", - ["ⵟ"] = "ṭ", - ["ⵠ"] = "v", - ["ⵡ"] = "w", - ["ⵢ"] = "y", - ["ⵣ"] = "z", - ["ⵤ"] = "z", - ["ⵥ"] = "ẓ", - ["ⵦ"] = "e", - ["ⵧ"] = "o", - ["ⵯ"] = "ʷ", - ["⵰"] = ".", - ["⵿"] = "", -} - -local t2l_alt = { - ["tmh"] = { - ["ⵀ"] = "b", - ["ⵓ"] = "w", - }, - ["thz"] = { - ["ⵀ"] = "b", - ["ⵓ"] = "w", - ["ⵘ"] = "ɣ", - }, -} -t2l_alt["thv"] = t2l_alt["tmh"] -t2l_alt["taq"] = t2l_alt["tmh"] -t2l_alt["ttq"] = t2l_alt["tmh"] - -function export.tr(text, lang, sc) - if not sc then - sc = require("Module:scripts").findBestScript(text, require("Module:languages").getByCode(lang or "ber")):getCode() - end - - if sc == "Arab" then - -- perhaps will be implemented in the future - text = nil - elseif sc == "Latn" then - -- no need to transliterate - text = nil - elseif sc == "Tfng" then - if t2l_alt[lang] then - text = mw.ustring.gsub(text, '.', t2l_alt[lang]) - end - text = mw.ustring.gsub(text, '.', t2l_common) - end - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/bg-translit.lua b/wikt/translit/bg-translit.lua deleted file mode 100644 index e8fd9c3..0000000 --- a/wikt/translit/bg-translit.lua +++ /dev/null @@ -1,26 +0,0 @@ --- This module will transliterate Bulgarian language text per WT:BG TR. --- Language code: bul -local export = {} - -local tt = { - ["А"]='A', ["а"]='a', ["Б"]='B', ["б"]='b', ["В"]='V', ["в"]='v', ["Г"]='G', ["г"]='g', ["Д"]='D', ["д"]='d', - ["Е"]='E', ["е"]='e', ["Ж"]='J', ["ж"]='j', ["З"]='Z', ["з"]='z', ["И"]='I', ["и"]='i', ["Й"]='I', ["й"]='i', - ["К"]='K', ["к"]='k', ["Л"]='L', ["л"]='l', ["М"]='M', ["м"]='m', ["Н"]='N', ["н"]='n', ["О"]='O', ["о"]='o', - ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r', ["С"]='S', ["с"]='s', ["Т"]='T', ["т"]='t', ["У"]='U', ["у"]='u', - ["Ф"]='F', ["ф"]='f', ["Х"]='H', ["х"]='h', ["Ц"]='C', ["ц"]='c', ["Ч"]='Č', ["ч"]='č', ["Ш"]='Š', ["ш"]='š', - ["Щ"]='Št', ["щ"]='št', ["Ъ"]='Ǎ', ["ъ"]='ǎ', ["Ю"]='Ju', ["ю"]='ju', ["Я"]='Ja', ["я"]='ja', - ["ѝ"]='ì', - - -- Pre-reform - ["Ѫ"]='Ǫ', ["ѫ"]='ǫ', ["Ѣ"]='Ě', ["ѣ"]='ě', ["Ь"]='ʹ', ["ь"]='ʹ', -}; - -function export.tr(text, lang, sc) - text = mw.ustring.gsub(text, '(%w)[Ъъ]$', '%1') - text = mw.ustring.gsub(text, '(%w)[Ъъ]%f[%c%p%s]', '%1') - text = mw.ustring.gsub(text, '[Ьь]%f[Оо]', { ["Ь"]='J', ["ь"]='j' }) - text = mw.ustring.gsub(text, '.', tt) - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/bho-translit.lua b/wikt/translit/bho-translit.lua deleted file mode 100644 index 7fa003a..0000000 --- a/wikt/translit/bho-translit.lua +++ /dev/null @@ -1,144 +0,0 @@ --- Transliteration for Bhojpuri --- This module will transliterate Bhojpuri language text. --- Language code: bho -local export = {} -local gsub = mw.ustring.gsub -local match = mw.ustring.match - -local conv = { - -- consonants - ['क'] = 'k', ['ख'] = 'kh', ['ग'] = 'g', ['घ'] = 'gh', ['ङ'] = 'ṅ', - ['च'] = 'c', ['छ'] = 'ch', ['ज'] = 'j', ['झ'] = 'jh', ['ञ'] = 'ñ', - ['ट'] = 'ṭ', ['ठ'] = 'ṭh', ['ड'] = 'ḍ', ['ढ'] = 'ḍh', ['ण'] = 'ṇ', - ['त'] = 't', ['थ'] = 'th', ['द'] = 'd', ['ध'] = 'dh', ['न'] = 'n', - ['प'] = 'p', ['फ'] = 'ph', ['ब'] = 'b', ['भ'] = 'bh', ['म'] = 'm', - ['य'] = 'y', ['र'] = 'r', ['ल'] = 'l', ['व'] = 'v', ['ळ'] = 'ḷ', - ['श'] = 'ś', ['ष'] = 'ṣ', ['स'] = 's', ['ह'] = 'h', - ['ड़'] = 'ṛ', ['ढ़'] = 'ṛh', - -- ['ज्ञ'] = 'gy', - - -- vowel diacritics - ['ि'] = 'i', ['ु'] = 'u', ['े'] = 'e', ['ो'] = 'o', - ['ा'] = 'ā', ['ी'] = 'ī', ['ू'] = 'ū', - ['ृ'] = 'ŕ', - ['ै'] = 'ai', ['ौ'] = 'au', - ['ॉ'] = 'ŏ', - ['ॅ'] = 'ĕ', - ['ऽ'] = 'ː', - - -- vowel signs - ['अ'] = 'a', ['इ'] = 'i', ['उ'] = 'u', ['ए'] = 'e', ['ओ'] = 'o', - ['आ'] = 'ā', ['ई'] = 'ī', ['ऊ'] = 'ū', - ['ऋ'] = 'ŕ', - ['ऐ'] = 'ai', ['औ'] = 'au', - ['ऑ'] = 'ŏ', - ['ऍ'] = 'ĕ', - - ['ॐ'] = 'om', - - -- chandrabindu - ['ँ'] = '̃', - - -- anusvara - ['ं'] = 'ṁ', - - -- visarga - ['ः'] = 'ḥ', - - -- virama - ['्'] = '', - - -- numerals - ['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4', - ['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9', - - -- punctuation - ['।'] = '.', -- danda - ['+'] = '', -- compound separator - - -- abbreviation sign - ['॰'] = '.', -} - -local nasal_assim = { - ['ज़'] = 'न', - ['क'] = 'ङ', ['ख'] = 'ङ', ['ग'] = 'ङ', ['घ'] = 'ङ', - ['च'] = 'ञ', ['छ'] = 'ञ', ['ज'] = 'ञ', ['झ'] = 'ञ', - ['ट'] = 'ण', ['ठ'] = 'ण', ['ड'] = 'ण', ['ढ'] = 'ण', - ['प'] = 'म', ['फ'] = 'म', ['ब'] = 'म', ['भ'] = 'म', ['म'] = 'म', -} - -local perm_cl = { - ['म्ल'] = true, ['व्ल'] = true, ['न्ल'] = true, - -} - -local all_cons, special_cons = 'कखगघङचछजझञटठडढतथदधपफबभशषसयरलवहणनम', 'यरलवहनम' -local vowel, vowel_sign = 'aिुृेोाीूैौॉॅ', 'अइउएओआईऊऋऐऔऑऍऽ' -local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])a(़?[' .. gsub(all_cons, "य", "") .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])' - -local function rev_string(text) - local result, length = {}, mw.ustring.len(text) - for i = 1, length do - table.insert(result, mw.ustring.sub(text, length - i + 1, length - i + 1)) - end - return table.concat(result) -end - -function export.tr(text, lang, sc) - text = gsub( - text, - '([' .. all_cons .. ']़?)([' .. vowel .. '्]?)', - function(c, d) - return c .. (d == "" and 'a' or d) - end - ) - - for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do - local orig_word = word - - word = rev_string(word) - - word = gsub( - word, - '^a(़?)([' .. all_cons .. '])(.)(.?)', - function(opt, first, second, third) - local a = "" - if match(first, '[' .. special_cons .. ']') - and match(second, '्') - and not perm_cl[first..second..third] - or match(first .. second, 'य[ीेै]') then - a = "a" - end - - return a .. opt .. first .. second .. third - end - ) - - while match(word, syncope_pattern) do - word = gsub(word, syncope_pattern, '%1%2%3%4') - end - - word = gsub( - word, - '(.?)ं(.)', - function(succ, prev) - local mid = nasal_assim[succ] or "n" - if succ..prev == "a" then - mid = "्म" - elseif succ == "" and match(prev, '[' .. vowel .. ']') then - mid = "̃" - end - return succ .. mid .. prev - end - ) - - text = gsub(text, orig_word, rev_string(word)) - end - text = gsub(text, '.़?', conv) - text = gsub(text, 'a([iu])̃', 'a͠%1') - text = gsub(text, 'ज्ञ', conv) - return mw.ustring.toNFC(text) -end - -return export \ No newline at end of file diff --git a/wikt/translit/bn-translit.lua b/wikt/translit/bn-translit.lua deleted file mode 100644 index 6bb030d..0000000 --- a/wikt/translit/bn-translit.lua +++ /dev/null @@ -1,189 +0,0 @@ --- Transliteration for Bengali --- This module will transliterate Bengali language text per WT:BN TR. --- Language code: ben - -local export = {} -local gsub = mw.ustring.gsub -local match = mw.ustring.match - -local conv = { - -- consonants - ['ক্ষ'] = 'kkh', ['জ্ঞ'] = 'gg', - ['ক'] = 'k', ['খ'] = 'kh', ['গ'] = 'g', ['ঘ'] = 'gh', ['ঙ'] = 'ṅ', - ['চ'] = 'c', ['ছ'] = 'ch', ['জ'] = 'j', ['ঝ'] = 'jh', ['ঞ'] = 'ñ', - ['ট'] = 'ṭ', ['ঠ'] = 'ṭh', ['ড'] = 'ḍ', ['ঢ'] = 'ḍh', ['ণ'] = 'ṇ', - ['ত'] = 't', ['থ'] = 'th', ['দ'] = 'd', ['ধ'] = 'dh', ['ন'] = 'n', - ['প'] = 'p', ['ফ'] = 'ph', ['ব'] = 'b', ['ভ'] = 'bh', ['ম'] = 'm', - ['য'] = 'j', ['র'] = 'r', ['ল'] = 'l', ['ৱ'] = 'w', - ['শ'] = 'ś', ['ষ'] = 'ṣ', ['স'] = 's', ['হ'] = 'h', - ['য়'] = 'y', ['ড়'] = 'ṛ', ['ঢ়'] = 'ṛh', - - -- visarga - ['ঃ'] = 'ḥ', - - -- vowel diacritics - ['ি'] = 'i', ['ু'] = 'u', ['ৃ'] = 'ri', ['ে'] = 'e', ['ো'] = 'o', - ['া'] = 'a', ['ী'] = 'i', ['ূ'] = 'u', ['ৈ'] = 'oi', ['ৌ'] = 'ou', - - -- vowel signs - ['অ'] = 'ô', ['ই'] = 'i', ['উ'] = 'u', ['ঋ'] = 'ri', ['এ'] = 'e', ['ও'] = 'o', - ['আ'] = 'a', ['ঈ'] = 'i', ['ঊ'] = 'u', ['ঐ'] = 'oi', ['ঔ'] = 'ou', - - --hôshôntô - ['্'] = '', - - -- chôndrôbindu - ['ঁ'] = 'ṁ', - - -- ônusbar - ['ং'] = 'N', - - -- khôndô tô - ['ৎ'] = 't', - - -- numerals - ['০'] = '0', ['১'] = '1', ['২'] = '2', ['৩'] = '3', ['৪'] = '4', ['৫'] = '5', ['৬'] = '6', ['৭'] = '7', ['৮'] = '8', ['৯'] = '9', - - -- punctuation - ['।'] = '.', -- dari -} - -function export.tr(text, lang, sc) - local c = '([কষজঞকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলৱশষসহ]়?)' - local y = 'য়' - local r = 'র' - local v = '([ô্িুৃেোাীূৈৌঅইউঋএওআঈঊঐঔ])' - local virama = '্' - local n = '(ং?)' - - local no_virama = mw.ustring.gsub(v,virama,"") - - text = text .. " " - - text = mw.ustring.gsub(text,c,"%1ô") - text = mw.ustring.gsub(text,"ô"..v,"%1") - - text = mw.ustring.gsub(text,v..n..c.."ô ",function(j,k,l) --ending - return l==y and j..k..l.."ô " or j..k..l.." " - end) - - local pattern = v..n..c.."ô"..c .. no_virama - local continue = true - while continue do - continue = false - text = mw.ustring.gsub(text,"(.*)"..pattern,function(d,e,f,g,h,i) - if g~=y and g~=r then - continue = true - end - return (g==y or g==r) and d..e..f..g.."ô"..h..i or d..e..f..g..h..i - end) - end - - text = mw.ustring.gsub(text,"([যডঢ]়)",conv) - text = mw.ustring.gsub(text,"ক্ষ","kkh") - text = mw.ustring.gsub(text,"জ্ঞ","gg") - text = mw.ustring.gsub(text,".",conv) - - text = mw.ustring.gsub(text,"ː(.)","%1%1") - - text = mw.ustring.gsub(text," ?।",".") - - text = mw.ustring.gsub(text,"([āēeo]y)ô ","%1 ") - text = gsub(text,"ôN ","ông ") - text = gsub(text,"N","ng") - - text = mw.ustring.gsub(text,"([ts])b","%1") - - text = mw.ustring.gsub(text," $","") - - return mw.ustring.toNFC(text) -end - -return export - --- 14 tests failed. (refresh) - --- test_translit_bengali: --- Text Expected Actual Differs at --- Passed ত্বক tôk tôk --- Passed স্বামী sami sami --- Failed জই jôi ji 2 --- Failed মানচিত্র mancitrô mancitr 8 --- Failed সূত্র sutrô sutr 5 --- Failed মই mai mi 2 --- Failed কারখানা karkhana karôkhana 4 --- Passed দুঃখিত duḥkhit duḥkhit --- Passed লেবানন lebanôn lebanôn --- Failed যন্ত্রমানব yôntrômanôb jôntrômanôb 1 --- Passed প্রতিবেশী prôtibeśi prôtibeśi --- Passed রচনা rôcna rôcna --- Passed অঙ্গুষ্ঠানা ôṅguṣṭhana ôṅguṣṭhana --- Passed পানি pani pani --- Passed আগুন agun agun --- Passed পশ্চিমবাংলা pôścimbangla pôścimbangla --- Passed বাংলা bangla bangla --- Passed সর্বনাম sôrbnam sôrbnam --- Passed ইতিহাস itihas itihas --- Failed আর্য় ভাষা নির্গত - দশম এবং aryô bhaṣa nirgôt - dôśôm ebôṁ aryô bhaṣa nirgôt - dôśôm ebông 30 --- Failed শুভ śubhô śubh 5 --- Failed শুদ্ধ śuddhô śuddh 6 --- Passed জল jôl jôl --- Failed তদ্ভব tôdbhôbo tôdbhôb 8 --- Failed তৎসম tôtsômo tôtsôm 7 --- Passed পশ্চিম pôścim pôścim --- Passed লিফ্ট liphṭ liphṭ --- Failed পছন্দ pôcchôndô pôchônd 4 --- Failed গাল galô gal 4 --- Failed জন্মদিন jônmôdin jônmdin 5 --- -- Unit tests for [[Module:bn-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local bn_translit = require('Module:bn-translit') - --- local function link(word) --- word = string.gsub(word, "%+", "") --- return '[[' .. word .. '#Bengali|' .. word .. ']]' --- end - --- function tests:do_test_translit(beng, roman, xlit) --- self:equals(link(beng), bn_translit.tr(beng, 'bn', 'Beng'), roman) --- end - --- function tests:test_translit_bengali() --- local examples = { --- --silent b --- { 'ত্বক', 'tôk' }, --- { 'স্বামী', 'sami' }, - --- { 'জই', 'jôi' }, --- { 'মানচিত্র', 'mancitrô' }, --- { 'সূত্র', 'sutrô' }, --- { 'মই', 'mai' }, --- { 'কারখানা', 'karkhana' }, --- { 'দুঃখিত', 'duḥkhit' }, --- { 'লেবানন', 'lebanôn' }, --- { 'যন্ত্রমানব', 'yôntrômanôb' }, --- { 'প্রতিবেশী', 'prôtibeśi' }, --- { 'রচনা', 'rôcna' }, --- { 'অঙ্গুষ্ঠানা', 'ôṅguṣṭhana' }, --- { 'পানি', 'pani' }, --- { 'আগুন', 'agun' }, --- { 'পশ্চিমবাংলা', 'pôścimbangla' }, --- { 'বাংলা', 'bangla' }, --- { 'সর্বনাম', 'sôrbnam' }, --- { 'ইতিহাস', 'itihas' }, --- { 'আর্য় ভাষা নির্গত - দশম এবং', 'aryô bhaṣa nirgôt - dôśôm ebôṁ' }, --- { 'শুভ', 'śubhô' }, --- { 'শুদ্ধ', 'śuddhô' }, --- { 'জল', 'jôl' }, --- { 'তদ্ভব', 'tôdbhôbo' }, --- { 'তৎসম', 'tôtsômo' }, --- { 'পশ্চিম', 'pôścim' }, --- { 'লিফ্ট', 'liphṭ' }, --- { 'পছন্দ', 'pôcchôndô' }, --- { 'গাল', 'galô' }, --- { 'জন্মদিন', 'jônmôdin' }, --- } --- self:iterate(examples, "do_test_translit") --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/bo-translit.lua b/wikt/translit/bo-translit.lua deleted file mode 100644 index 3d2707a..0000000 --- a/wikt/translit/bo-translit.lua +++ /dev/null @@ -1,126 +0,0 @@ --- This module will transliterate Tibetan language text per the Wylie transliteration scheme. --- Language code: bod - -local export = {} -local gsub = mw.ustring.gsub -local match = mw.ustring.match - -local conv = { - ["ཀ"] = "k", ["ཁ"] = "kh", ["ག"] = "g", ["ང"] = "ng", - ["ཅ"] = "c", ["ཆ"] = "ch", ["ཇ"] = "j", ["ཉ"] = "ny", - ["ཏ"] = "t", ["ཐ"] = "th", ["ད"] = "d", ["ན"] = "n", - ["པ"] = "p", ["ཕ"] = "ph", ["བ"] = "b", ["མ"] = "m", - ["ཙ"] = "ts", ["ཚ"] = "tsh", ["ཛ"] = "dz", ["ཝ"] = "w", - ["ཞ"] = "zh", ["ཟ"] = "z", ["འ"] = "‘", ["ཡ"] = "y", - ["ར"] = "r", ["ལ"] = "l", ["ཤ"] = "sh", ["ས"] = "s", - ["ཧ"] = "h", ["ཨ"] = "a", - ["ཊ"] = "ṭ", ["ཋ"] = "ṭh", ["ཌ"] = "ḍ", ["ཎ"] = "ṇ", ["ཥ"] = "ṣ", - - ["ི"] = "i", - ["ུ"] = "u", - ["ེ"] = "e", - ["ོ"] = "o", - ["ཱ"] = "ā", - ["ཱི"] = "ī", - ["ཱུ"] = "ū", - ["ཻ"] = "ai", - ["ཽ"] = "au", - ["ྲྀ"] = "ṛ", - ["ཷ"] = "ṝ", - ["ླྀ"] = "ḷ", - ["ཹ"] = "ḹ", - ["ཾ"] = "ṃ", - ["ྃ"] = "ṃ", - ["ཿ"] = "aḥ", - ["ༀ"] = "oṃ", - - ["ྐ"] = "k", ["ྑ"] = "kh", ["ྒ"] = "g", ["ྔ"] = "ng", - ["ྕ"] = "c", ["ྖ"] = "ch", ["ྗ"] = "j", ["ྙ"] = "ny", - ["ྟ"] = "t", ["ྠ"] = "th", ["ྡ"] = "d", ["ྣ"] = "n", - ["ྤ"] = "p", ["ྥ"] = "ph", ["ྦ"] = "b", ["ྨ"] = "m", - ["ྩ"] = "ts", ["ྪ"] = "tsh", ["ྫ"] = "dz", ["ྭ"] = "w", - ["ྮ"] = "zh", ["ྯ"] = "z", ["ྰ"] = "‘", ["ྱ"] = "y", - ["ྲ"] = "r", ["ླ"] = "l", ["ྴ"] = "sh", ["ྶ"] = "s", - ["ྷ"] = "h", ["ྸ"] = "a", - ["ྚ"] = "ṭ", ["ྛ"] = "ṭh", ["ྜ"] = "ḍ", ["ྞ"] = "ṇ", ["ྵ"] = "ṣ" -} - -local symbol = { - ["༠"] = "0", ["༡"] = "1", ["༢"] = "2", ["༣"] = "3", ["༤"] = "4", - ["༥"] = "5", ["༦"] = "6", ["༧"] = "7", ["༨"] = "8", ["༩"] = "9", - ["༪"] = "0.5", ["༫"] = "1.5", ["༬"] = "2.5", ["༭"] = "3.5", ["༮"] = "4.5", - ["༯"] = "5.5", ["༰"] = "6.5", ["༱"] = "7.5", ["༲"] = "8.5", ["༳"] = "9.5", - ["་"] = " ", ["།"] = "·", ["‘"] = "'", -} - -function export.tr(text, lang, sc, debug_mode) - text = gsub(text, "[༌་]+ ?", "་") - text = gsub(text, "[་།]+$", "") - text = gsub(text, "([^་])(འ[ཱིེོིུྲཷླཹཾཿ])", "%1་‘་%2") - - for Tibetan_word in mw.ustring.gmatch(text, "[་-༑ཀ-ྼ]+") do - Tibetan_word = gsub(Tibetan_word, "་$", "") - - for syllable in mw.ustring.gmatch(Tibetan_word, "[ཀ-ྼ]+") do - local tr = {} - - local syl = gsub(syllable, "ཨ([ཱིེོིུྲཷླཹཾཿ])", "%1") - syl = gsub(syl, "([ྐྑྒྔྕྖྗྙྟྠྡྣྤྥྦྨྩྪྫྮྯྭྰྱྲླྴྶྷྸ]+)([^ཱིེོིུྲཷླཹཾཿྐྑྒྔྕྖྗྙྟྠྡྣྤྥྦྨྩྪྫྮྯྭྰྱྲླྴྶྷྸ]*)$", "%1a%2") - letter = mw.text.split(syl, "", true) - - for i = 1, #letter do - tr[i] = conv[letter[i]] or letter[i] - end - - if not match(syllable,"[ྐྑྒྔྕྖྗྙྟྠྡྣྤྥྦྨྩྪྫྮྯྭྰྱྲླྴྶྷྸིེོུཨཱཱཱིཻཽུྲྀཷླྀཹཾྃཿ]") then - if #letter < 5 then - if #letter > 3 then - if letter[2] ~= "འ" and letter[3] ~= "འ" then - table.insert(tr, 3, "a") - end - - elseif #letter > 2 then - if letter[3] == "འ" then - if letter[1] == "འ" then - tr = { "‘", tr[2], "a", "‘" } - else - tr = { tr[1], tr[2], "a", "‘" } - end - - elseif not match(letter[1], "[གདབའམ]") or (match(letter[1]..letter[2]..letter[3], "[གདབའམ][གངཐབམའརལ]ས")) then - table.insert(tr, 2, "a") - else - table.insert(tr, 3, "a") - end - - elseif match(letter[1], "[གདབའམ]") and not match(letter[2] or "", "[གངདཐབམའརལསན]") then - table.insert(tr, "a") - else - table.insert(tr, 2, "a") - end - end - end - - tr = table.concat(tr) - - if match(syllable, "གཡ") then - tr = gsub(tr, "gy", "g.y") - end - - text = gsub(text, syllable, tr, 1) - end - end - - text = gsub(text, ".", symbol) - text = gsub(text, " ' ", "") - text = gsub(text, " *· *·? *", " · ") - text = gsub(text, " *%.", ".") - - if match(text, "[ཀ-ྼ]") and not debug_mode then - return nil - else - return text - end -end - -return export \ No newline at end of file diff --git a/wikt/translit/brah-translit.lua b/wikt/translit/brah-translit.lua deleted file mode 100644 index b2a71f2..0000000 --- a/wikt/translit/brah-translit.lua +++ /dev/null @@ -1,83 +0,0 @@ ---[[This module will transliterate text in the Brahmi script. It is used to transliterate -Ashokan Prakrit (inc-ash), Magadhi Prakrit (inc-mgd), Ardhamagadhi Prakrit (pka), -Maharastri Prakrit (pmh) and Sauraseni Prakrit (psu).]] - -local export = {} - -local consonants = { ---consonants - ['𑀓']='k', ['𑀔']='kh', ['𑀕']='g', ['𑀖']='gh', ['𑀗']='ṅ', - ['𑀘']='c', ['𑀙']='ch', ['𑀚']='j', ['𑀛']='jh', ['𑀜']='ñ', - ['𑀝']='ṭ', ['𑀞']='ṭh', ['𑀟']='ḍ', ['𑀠']='ḍh', ['𑀡']='ṇ', - ['𑀢']='t', ['𑀣']='th', ['𑀤']='d', ['𑀥']='dh', ['𑀦']='n', - ['𑀧']='p', ['𑀨']='ph', ['𑀩']='b', ['𑀪']='bh', ['𑀫']='m', - ['𑀬']='y', ['𑀭']='r', ['𑀮']='l', ['𑀯']='v', ['𑀴']='ḷ', - ['𑀰']='ś', ['𑀱']='ṣ', ['𑀲']='s', ['𑀳']='h', -} - -local diacritics = { ---matras - ['𑀸']='ā', ['𑀺']='i', ['𑀻']='ī', ['𑀼']='u', ['𑀽']='ū', ['𑀾']='ṛ', ['𑀿']='ṝ', - ['𑁀']='l̥', ['𑁁']='l̥̄', ['𑁂']='e', ['𑁃']='ai', ['𑁄']='o', ['𑁅']='au', ['𑁆']='', - --bhattiprolu aa - ['𑀹']='ā', -} - -local tt = { - ---vowels - ['𑀅']='a', ['𑀆']='ā', ['𑀇']='i', ['𑀈']='ī', ['𑀉']='u', ['𑀊']='ū', ['𑀋']='ṛ', ['𑀌']='ṝ', - ['𑀍']='l̥', ['𑀎']='l̥̄', ['𑀏']='e', ['𑀐']='ai', ['𑀑']='o', ['𑀒']='au', - -- chandrabindu - ['𑀀']='m̐', --until a better method is found - -- anusvara - ['𑀁']='ṃ', --until a better method is found - -- visarga - ['𑀂']='ḥ', - --numerals - ['𑁦']='0', ['𑁧']='1', ['𑁨']='2', ['𑁩']='3', ['𑁪']='4', ['𑁫']='5', ['𑁬']='6', ['𑁭']='7', ['𑁮']='8', ['𑁯']='9', - --punctuation - ['𑁇']='.', --danda - ['𑁈']='.' --double danda -} - -function export.tr(text, lang, sc) - if sc ~= "Brah" then - return nil - end - - text = mw.ustring.gsub( - text, - '([𑀓-𑀴])'.. - '([𑀸𑀺𑀺𑀻𑀼𑀽𑀾𑀿𑁀𑁁𑁂𑁃𑁄𑁅𑁆𑀹]?)'.. - '([𑀅-𑀒]?)', - function(c, d, e) - if d == "" and e ~= "" then - return consonants[c] .. 'a' .. tt[e] .. '̈' - elseif e ~= "" then - return consonants[c] .. diacritics[d] .. tt[e] - elseif d == "" then - return consonants[c] .. 'a' - else - return consonants[c] .. diacritics[d] - end - end) - - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export - --- Text Expected Actual --- Passed 𑀧𑀁𑀟𑀺𑀅 paṃḍia paṃḍia --- return require("Module:transliteration module testcases")( --- require "Module:Brah-translit".tr, --- { --- { "𑀧𑀁𑀟𑀺𑀅", "paṃḍia" }, --- --[[ --- { "", "" }, --- --]] --- }, --- "Brah", "pmh") \ No newline at end of file diff --git a/wikt/translit/bua-translit.lua b/wikt/translit/bua-translit.lua deleted file mode 100644 index 4517b65..0000000 --- a/wikt/translit/bua-translit.lua +++ /dev/null @@ -1,20 +0,0 @@ --- This module will transliterate Buryat language text per WT:BUA TR. --- Language code: bua -local export = {} - -local tab = { - ["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="Je", ["Ё"]="Jo", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["Й"]="J", - ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["Ҥ"]="Ŋ", ["О"]="O", ["Ө"]="Ö", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", - ["У"]="U", ["Ү"]="Ü", ["Ф"]="F", ["Х"]="X", ["Һ"]="H", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="ʺ", ["Ы"]="Y", - ["Ь"]="ʹ", ["Э"]="E", ["Ю"]="Ju", ["Я"]="Ja", - ['а']='a', ['б']='b', ['в']='v', ['г']='g', ['д']='d', ['е']='je', ['ё']='jo', ['ж']='ž', ['з']='z', ['и']='i', ['й']='j', - ['к']='k', ['л']='l', ['м']='m', ['н']='n', ['ҥ']='ŋ', ['о']='o', ['ө']='ö', ['п']='p', ['р']='r', ['с']='s', ['т']='t', - ['у']='u', ['ү']='ü', ['ф']='f', ['х']='x', ['һ']='h', ['ц']='c', ['ч']='č', ['ш']='š', ['щ']='šč', ['ъ']='ʺ', ['ы']='y', - ['ь']='ʹ', ['э']='e', ['ю']='ju', ['я']='ja', -} - -function export.tr(text, lang, sc) - return (mw.ustring.gsub(text,'.',tab)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/cans-translit.lua b/wikt/translit/cans-translit.lua deleted file mode 100644 index d9cab49..0000000 --- a/wikt/translit/cans-translit.lua +++ /dev/null @@ -1,109 +0,0 @@ --- This module will transliterate text in the Canadian syllabics script. - -local export = {} --- to be discussed: not to create this module, but create individual modules instead? - -local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" - -local function iter_char(str) - return string.gmatch(str, UTF8_char) -end - -local function len(str) - local _, length = string.gsub(str, UTF8_char, "") - return length -end - -local function fetch(str, index) - local i = 0 - for char in iter_char(str) do - i = i + 1 - if i == index then - return char - end - end - return "" -end - -function export.tr(text, lang, sc) - local data = { - { --short-vowel - "ᐁᐃᐅᐊᐯᐱᐳᐸᑌᑎᑐᑕᑫᑭᑯᑲᒉᒋᒍᒐᒣᒥᒧᒪᓀᓂᓄᓇᓭᓯᓱᓴᔦᔨᔪᔭᖊᖋᖌᖍᕃᕆᕈᕍᕂᕄᕊᕋᔐᔑᔓᔕᕓᕕᕗᕙᕞᕠᕤᕦᘚᘛᘕᘔᓓᓕᓗᓚᙯᕿᖁᖃᙰᖏᖑᖓ ᙱᙳᙵᕴᕵᕷᕹ", - "1ptkcmnsyrrršfðzlq23h", - {"","ng","nng"}, - "eioa", - function(a,b) - return a..b - end - }, - { --w - "ᐌᐍᐎᐏᐒᐓᐗᐘᐺᐻᐼᐽᑀᑁᑄᑅᑗᑘᑙᑚᑝᑞᑡᑢᑴᑵᑶᑷᑺᑻᑾᑿᒒᒓᒔᒕᒘᒙᒜᒝᒬᒭᒮᒯᒲᒳᒶᒷ" - .. "ᓉᓊ ᓋᓌ ᓶᓷᓸᓹᓼᓽᔀᔁᔯᔰᔱᔲᔵᔶᔹᔺᔗᔘᔙᔚᔝᔞᔡᔢᓜᓝᓞᓟᓢᓣᓦᓧ", - "1ptkcmnsyrz", - {""}, - "eeiiooaa", - function(a,b) - return a..'w'..b - end - }, - { --long-vowel - "ᐄᐆᐋᐲᐴᐹᑏᑑᑖᑮᑰᑳᒌᒎᒑᒦᒨᒫᓃᓅᓈᓰᓲᓵᔩᔫᔮᕇᕉᕌᔒᔔᔖᕖᕘᕚᕢᕥᕧᓖᓘᓛᐐᐔᐙᖀᖂᖄᕶᕸᕺ", - "1ptkcmnsyršfðlwqh", - {""}, - "īōā", - function(a,b) - return a..b - end - }, - { --w-long - "ᐐᐑᐔᐕᐖᐙᐚᐛᐾᐿᑂᑃ ᑆᑇᑈᑛᑜᑟᑠ ᑣᑤᑥᑸᑹᑼᑽ ᒀᒁᒂᒖᒗᒚᒛ ᒞᒟᒠᒰᒱᒴᒵ ᒸᒹᒺ ᓍᓎᓏ" - .. "ᓠᓡᓤᓥ ᓨᓩ ᓺᓻᓾᓿ ᔂᔃᔄᔛᔜᔟᔠ ᔣᔤ ᔳᔴᔷᔸ ᔻᔼᔽ ᕎᕏ ᕛᕜ ᕨᕩ ", - "1ptkcmnlsšyrfð", - {""}, - "īīōōōāāā", - function(a,b) - return a..'w'..b - end - }, - { --individual - "ᑊᐟᐠᐨᒼᐣᐢᐧᐤᐦᕁᕽᓫᕑᑉᑦᒡᒃᒻᓐᔅᔥᔾᓪᕐᕪ‡ᒄᔉᖅᖕᖖᕝᖦᕀᕻᕼ", - "ptkcmnsywh11lrptckmnsšylrðð23q45vlyhh", - {"hk","kw","sk","ng","nng"}, - }, - } - - for i, item in pairs(data) do - if item[4] then - local length = len(item[4]) - local c = 0 - for s in iter_char(item[1]) do - c = c + 1 - local index = math.ceil(c / length) - local a = fetch(item[2], index) - if tonumber(a) then - a = item[3][tonumber(a)] - end - index = (c - 1) % length + 1 - local b = fetch(item[4], index) - if s ~= " " then - text = string.gsub(text, s, item[5](a,b)) - end - end - else - local iter1, iter2 = iter_char(item[1]), iter_char(item[2]) - while true do - local s, a = iter1(), iter2() - if not (s and a) then - break - end - if tonumber(a) then - a = item[3][tonumber(a)] - end - text = string.gsub(text, s, a) - end - end - end - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/ce-translit.lua b/wikt/translit/ce-translit.lua deleted file mode 100644 index 80fbeb8..0000000 --- a/wikt/translit/ce-translit.lua +++ /dev/null @@ -1,114 +0,0 @@ --- This module will transliterate Chechen language text per WT:CE TR. --- Language code: che - -local export = {} - -local tt = { - ["а"]="a", ["б"]="b", ["в"]="w", ["г"]="g", ["д"]="d", ["е"]="e", ["ё"]="ë", - ["ж"]="ž", ["з"]="z", ["и"]="i", ["й"]="j", ["к"]="k", ["л"]="l", ["м"]="m", - ["н"]="n", ["о"]="o", ["п"]="p", ["р"]="r", ["с"]="s", ["т"]="t", ["у"]="u", - ["ф"]="f", ["х"]="χ", ["ц"]="c", ["ч"]="č", ["ш"]="š", ["щ"]="šč", ["ъ"]="ʾ", ["ы"]="y", - ["ь"]="’", ["э"]="e", ["ю"]="ju", ["я"]="ja", ["ӏ"]="ʿ", ["А"]="A", ["Б"]="B", - ["В"]="W", ["Г"]="G", ["Д"]="D", ["Е"]="E", ["Ё"]="Ë", ["Ж"]="Ž", ["З"]="Z", - ["И"]="I", ["Й"]="J", ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["О"]="O", - ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="U", ["Ф"]="F", ["Х"]="Χ", - ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="ʾ", ["Ы"]="Y", ["Ь"]="’", ["Э"]="E", - ["Ю"]="Ju", ["Я"]="Ja", ["ӏ"]="ʿ"}; - -local digraphs = { - ['аь'] = 'ä', - ['гӏ'] = 'ġ', - ['дж'] = 'ǯ', - ['дз'] = 'ʒ', - ['ий'] = 'ī', - ['кх'] = 'q', - ['къ'] = 'q̇', - ['кӏ'] = 'ḳ', - ['оь'] = 'ö', - ['пӏ'] = 'ṗ', - ['тӏ'] = 'ṭ', - ['уь'] = 'ü', - ['хь'] = 'ḥ', - ['хӏ'] = 'h', - ['цӏ'] = 'c̣', - ['чӏ'] = 'č̣', - ['юь'] = 'jü', - ['яь'] = 'jä', - ['Аь'] = 'Ä', - ['Гӏ'] = 'Ġ', - ['Дж'] = 'Ǯ', - ['Дз'] = 'Ʒ', - ['Ий'] = 'Ī', - ['Кх'] = 'Q', - ['Къ'] = 'Q̇', - ['Кӏ'] = 'Ḳ', - ['Оь'] = 'Ö', - ['Пӏ'] = 'Ṗ', - ['Тӏ'] = 'Ṭ', - ['Уь'] = 'Ü', - ['Хь'] = 'Ḥ', - ['Хӏ'] = 'H', - ['Цӏ'] = 'C̣', - ['Чӏ'] = 'Č̣', - ['Юь'] = 'Jü', - ['Яь'] = 'Jä', -} - -function export.tr(text, lang, sc) - local str_gsub = string.gsub - - -- Convert capital to lowercase palochka. Lowercase is found in tables - -- above. - text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) - - for digraph, translit in pairs(digraphs) do - text = str_gsub(text, digraph, translit) - end - - text = str_gsub(text, '[%z\1-127\194-\244][\128-\191]*', tt) -- UTF-8 character pattern - - return text -end - -return export - --- 1 test failed. (refresh) - --- test: --- Text Expected Actual --- Passed нохчийн мотт noχčīn mott noχčīn mott --- Passed буьйса büjsa büjsa --- Passed бӏаьрг bʿärg bʿärg --- Passed вуьрхӏитта würhitta würhitta --- Passed Гӏалгӏайчоь Ġalġajčö Ġalġajčö --- Passed дезткъе итт deztq̇e itt deztq̇e itt --- Passed доьзткъа döztq̇a döztq̇a --- Passed жӏаьла žʿäla žʿäla --- Passed кхокха qoqa qoqa --- Passed пачхьалкх pačḥalq pačḥalq --- Passed пӏераска ṗeraska ṗeraska --- Passed шиъ šiʾ šiʾ --- Passed юьхь jüḥ jüḥ --- Passed ӏуьйре ʿüjre ʿüjre --- Failed Оьрсийн Федераци Örsīn Fiedieraci Örsīn Federaci --- return require("Module:transliteration module testcases")( --- require("Module:ce-translit").tr, --- { --- { "нохчийн мотт", "noχčīn mott" }, --- { "буьйса", "büjsa" }, --- { "бӏаьрг", "bʿärg" }, --- { "вуьрхӏитта", "würhitta" }, --- { "Гӏалгӏайчоь", "Ġalġajčö" }, --- { "дезткъе итт", "deztq̇e itt" }, --- { "доьзткъа", "döztq̇a" }, --- { "жӏаьла", "žʿäla" }, --- { "кхокха", "qoqa" }, --- { "пачхьалкх", "pačḥalq" }, --- { "пӏераска", "ṗeraska" }, --- { "шиъ", "šiʾ" }, --- { "юьхь", "jüḥ" }, --- { "ӏуьйре", "ʿüjre" }, --- { "Оьрсийн Федераци", "Örsīn Fiedieraci" }, --- }, --- "Cyrl", "ce" --- ) \ No newline at end of file diff --git a/wikt/translit/cher-translit.lua b/wikt/translit/cher-translit.lua deleted file mode 100644 index fc443c5..0000000 --- a/wikt/translit/cher-translit.lua +++ /dev/null @@ -1,142 +0,0 @@ --- This module will transliterate text in the Cherokee script. It is used to transliterate Cherokee (chr). - -local export = {} - -local tt = { - ["Ꭰ"]="a", ["ꭰ"]="a", - ["Ꭱ"]="e", ["ꭱ"]="e", - ["Ꭲ"]="i", ["ꭲ"]="i", - ["Ꭳ"]="o", ["ꭳ"]="o", - ["Ꭴ"]="u", ["ꭴ"]="u", - ["Ꭵ"]="v", ["ꭵ"]="v", - - ["Ꭶ"]="ga", ["ꭶ"]="ga", - ["Ꭷ"]="ka", ["ꭷ"]="ka", - ["Ꭸ"]="ge", ["ꭸ"]="ge", - ["Ꭹ"]="gi", ["ꭹ"]="gi", - ["Ꭺ"]="go", ["ꭺ"]="go", - ["Ꭻ"]="gu", ["ꭻ"]="gu", - ["Ꭼ"]="gv", ["ꭼ"]="gv", - - ["Ꭽ"]="ha", ["ꭽ"]="ha", - ["Ꭾ"]="he", ["ꭾ"]="he", - ["Ꭿ"]="hi", ["ꭿ"]="hi", - ["Ꮀ"]="ho", ["ꮀ"]="ho", - ["Ꮁ"]="hu", ["ꮁ"]="hu", - ["Ꮂ"]="hv", ["ꮂ"]="hv", - - ["Ꮃ"]="la", ["ꮃ"]="la", - ["Ꮄ"]="le", ["ꮄ"]="le", - ["Ꮅ"]="li", ["ꮅ"]="li", - ["Ꮆ"]="lo", ["ꮆ"]="lo", - ["Ꮇ"]="lu", ["ꮇ"]="lu", - ["Ꮈ"]="lv", ["ꮈ"]="lv", - - ["Ꮉ"]="ma", ["ꮉ"]="ma", - ["Ꮊ"]="me", ["ꮊ"]="me", - ["Ꮋ"]="mi", ["ꮋ"]="mi", - ["Ꮌ"]="mo", ["ꮌ"]="mo", - ["Ꮍ"]="mu", ["ꮍ"]="mu", - ["Ᏽ"]="mv", ["ᏽ"]="mv", - - ["Ꮎ"]="na", ["ꮎ"]="na", - ["Ꮏ"]="hna", ["ꮏ"]="hna", - ["Ꮐ"]="nah", ["ꮐ"]="nah", - ["Ꮑ"]="ne", ["ꮑ"]="ne", - ["Ꮒ"]="ni", ["ꮒ"]="ni", - ["Ꮓ"]="no", ["ꮓ"]="no", - ["Ꮔ"]="nu", ["ꮔ"]="nu", - ["Ꮕ"]="nv", ["ꮕ"]="nv", - - ["Ꮖ"]="qua", ["ꮖ"]="qua", - ["Ꮗ"]="que", ["ꮗ"]="que", - ["Ꮘ"]="qui", ["ꮘ"]="qui", - ["Ꮙ"]="quo", ["ꮙ"]="quo", - ["Ꮚ"]="quu", ["ꮚ"]="quu", - ["Ꮛ"]="quv", ["ꮛ"]="quv", - - ["Ꮝ"]="s", ["ꮝ"]="s", - ["Ꮜ"]="sa", ["ꮜ"]="sa", - ["Ꮞ"]="se", ["ꮞ"]="se", - ["Ꮟ"]="si", ["ꮟ"]="si", - ["Ꮠ"]="so", ["ꮠ"]="so", - ["Ꮡ"]="su", ["ꮡ"]="su", - ["Ꮢ"]="sv", ["ꮢ"]="sv", - - ["Ꮣ"]="da", ["ꮣ"]="da", - ["Ꮤ"]="ta", ["ꮤ"]="ta", - ["Ꮥ"]="de", ["ꮥ"]="de", - ["Ꮦ"]="te", ["ꮦ"]="te", - ["Ꮧ"]="di", ["ꮧ"]="di", - ["Ꮨ"]="ti", ["ꮨ"]="ti", - ["Ꮩ"]="do", ["ꮩ"]="do", - ["Ꮪ"]="du", ["ꮪ"]="du", - ["Ꮫ"]="dv", ["ꮫ"]="dv", - - ["Ꮬ"]="dla", ["ꮬ"]="dla", - ["Ꮭ"]="tla", ["ꮭ"]="tla", - ["Ꮮ"]="tle", ["ꮮ"]="tle", - ["Ꮯ"]="tli", ["ꮯ"]="tli", - ["Ꮰ"]="tlo", ["ꮰ"]="tlo", - ["Ꮱ"]="tlu", ["ꮱ"]="tlu", - ["Ꮲ"]="tlv", ["ꮲ"]="tlv", - - ["Ꮳ"]="tsa", ["ꮳ"]="tsa", - ["Ꮴ"]="tse", ["ꮴ"]="tse", - ["Ꮵ"]="tsi", ["ꮵ"]="tsi", - ["Ꮶ"]="tso", ["ꮶ"]="tso", - ["Ꮷ"]="tsu", ["ꮷ"]="tsu", - ["Ꮸ"]="tsv", ["ꮸ"]="tsv", - - ["Ꮹ"]="wa", ["ꮹ"]="wa", - ["Ꮺ"]="we", ["ꮺ"]="we", - ["Ꮻ"]="wi", ["ꮻ"]="wi", - ["Ꮼ"]="wo", ["ꮼ"]="wo", - ["Ꮽ"]="wu", ["ꮽ"]="wu", - ["Ꮾ"]="wv", ["ꮾ"]="wv", - - ["Ꮿ"]="ya", ["ꮿ"]="ya", - ["Ᏸ"]="ye", ["ᏸ"]="ye", - ["Ᏹ"]="yi", ["ᏹ"]="yi", - ["Ᏺ"]="yo", ["ᏺ"]="yo", - ["Ᏻ"]="yu", ["ᏻ"]="yu", - ["Ᏼ"]="yv", ["ᏼ"]="yv", -} - -function export.tr(text) - - text = mw.ustring.gsub(text, '.', tt) - - return text - -end - -return export - --- All tests passed. (refresh) - --- test_translit_cher: --- Text Expected Actual Differs at --- Passed ᎦᎸᏉᏗᎠᏓᏅᏙ galvquodiadanvdo galvquodiadanvdo --- Passed ᎠᎾᎳᏍᎩᏍᎬ analasgisgv analasgisgv --- Passed ᎠᎸᏇᏂᏯ alvqueniya alvqueniya --- -- Unit tests for [[Module:Cher-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local cher_translit = require('Module:Cher-translit') - --- function tests:check_translit(cher, roman, xlit) --- self:equals('[[' .. cher .. '#Cherokee|' .. cher .. ']]', cher_translit.tr(cher, 'chr', 'Cher'), roman) --- end - --- function tests:test_translit_cher() --- self:iterate( --- { --- { 'ᎦᎸᏉᏗᎠᏓᏅᏙ', 'galvquodiadanvdo' }, --- { 'ᎠᎾᎳᏍᎩᏍᎬ', 'analasgisgv' }, --- { 'ᎠᎸᏇᏂᏯ', 'alvqueniya' }, --- }, --- 'check_translit' --- ) --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/chm-translit.lua b/wikt/translit/chm-translit.lua deleted file mode 100644 index cec92dc..0000000 --- a/wikt/translit/chm-translit.lua +++ /dev/null @@ -1,33 +0,0 @@ --- This module will transliterate Eastern Mari language text (and also Western Mari) per WT:CHM TR. --- Language code: chm - -local export = {} - -local tab = { - ["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="E", ["Ё"]="Jo", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["Й"]="J", - ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["Ҥ"]="Ŋ", ["О"]="O", ["Ӧ"]="Ö", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", - ["У"]="U", ["Ӱ"]="Ü", ["Ф"]="F", ["Х"]="X", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="ʺ", ["Ы"]="Ə", ["Ь"]="ʹ", - ["Э"]="E", ["Ю"]="Ju", ["Я"]="Ja", - ['а']='a', ['б']='b', ['в']='v', ['г']='g', ['д']='d', ['е']='e', ['ё']='jo', ['ж']='ž', ['з']='z', ['и']='i', ['й']='j', - ['к']='k', ['л']='l', ['м']='m', ['н']='n', ['ҥ']='ŋ', ['о']='o', ['ӧ']='ö', ['п']='p', ['р']='r', ['с']='s', ['т']='t', - ['у']='u', ['ӱ']='ü', ['ф']='f', - ['х']='x', ['ц']='c', ['ч']='č', ['ш']='š', ['щ']='šč', ['ъ']='ʺ', ['ы']='ə', ['ь']='ʹ', ['э']='e', ['ю']='ju', ['я']='ja', - -- Hill (Western) Mari only, doesn't use Ҥ, ҥ - ["Ӓ"]="Ä", ["Ӹ"]="Ə̈", ['ӓ']='ä', ['ӹ']='ə̈', -} - -function export.tr(text) - -- Ё needs converting if is decomposed - text = text:gsub("ё","ё"):gsub("Ё","Ё") - - -- е after a vowel or at the beginning of a word becomes je - text = mw.ustring.gsub(text, "([АӒОӦУӰЫӸЕЯЁЮИЕЪЬаӓоӧуӱыӹэяёюиеъь%A][́̀]?)е","%1je") - text = mw.ustring.gsub(text, "^Е","Je") - text = mw.ustring.gsub(text, "^е","je") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е","%1Je") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е","%1je") - - return (mw.ustring.gsub(text,'.',tab)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/cite b/wikt/translit/cite deleted file mode 100644 index ebf1c8a..0000000 --- a/wikt/translit/cite +++ /dev/null @@ -1,25 +0,0 @@ -단어 -명사 -사람을 단속하거나 상황을 통제하기 위해 일정한 곳을 주의 깊게 지켜보는 사람. -"<구> 준법 감시인. -<구> 환경 보호 단체의 감시인. -<구> 감시인 제도. -<구> 감시인이 지켜보다. -<구> 감시인에게 들키다. -<구> 감시인으로부터 주의를 듣다. -<문장> 국립 공원은 밀렵을 방지하기 위해 감시인들을 두고 있다. -<문장> 경찰은 사건이 있었던 곳을 출입 금지 구역으로 정하고 감시인을 배치하였다. -<문장> 쓰레기 불법 투기가 많은 지역은 감시 카메라뿐만 아니라 감시인도 필요하다. " - -감시인 - -감시원 - -감시병 - -@inproceedings{Giunchiglia2018oneworld, - title={One World - Seven Thousand Languages}, - author={Giunchiglia, Fausto and Batsuren, Khuyagbaatar and Freihat, Abed Alhakim}, - booktitle={CiCling}, - year={2018} -} \ No newline at end of file diff --git a/wikt/translit/copt-translit.lua b/wikt/translit/copt-translit.lua deleted file mode 100644 index cf84c6b..0000000 --- a/wikt/translit/copt-translit.lua +++ /dev/null @@ -1,48 +0,0 @@ --- This module will transliterate text in the Coptic script. It is used to transliterate Coptic (cop). --- Language code: cop - -local export = {} - -local chars = { - ["Ⲁ"] = "A", ["ⲁ"] = "a", - ["Ⲃ"] = "B", ["ⲃ"] = "b", - ["Ⲅ"] = "G", ["ⲅ"] = "g", - ["Ⲇ"] = "D", ["ⲇ"] = "d", - ["Ⲉ"] = "E", ["ⲉ"] = "e", - ["Ⲍ"] = "Z", ["ⲍ"] = "z", - ["Ⲏ"] = "Ē", ["ⲏ"] = "ē", - ["Ⲑ"] = "Th", ["ⲑ"] = "th", - ["Ⲓ"] = "I", ["ⲓ"] = "i", - ["Ⲕ"] = "K", ["ⲕ"] = "k", - ["Ⲗ"] = "L", ["ⲗ"] = "l", - ["Ⲙ"] = "M", ["ⲙ"] = "m", - ["Ⲛ"] = "N", ["ⲛ"] = "n", ["Ⲻ"] = "N", ["ⲻ"] = "n", - ["Ⲝ"] = "Ks", ["ⲝ"] = "ks", - ["Ⲟ"] = "O", ["ⲟ"] = "o", - ["Ⲡ"] = "P", ["ⲡ"] = "p", - ["Ⲣ"] = "R", ["ⲣ"] = "r", - ["Ⲥ"] = "S", ["ⲥ"] = "s", - ["Ⲧ"] = "T", ["ⲧ"] = "t", - ["Ⲩ"] = "U", ["ⲩ"] = "u", - ["Ⲫ"] = "Ph", ["ⲫ"] = "ph", - ["Ⲭ"] = "Kh", ["ⲭ"] = "kh", - ["Ⲯ"] = "Ps", ["ⲯ"] = "ps", - ["Ⲱ"] = "Ō", ["ⲱ"] = "ō", - ["Ϣ"] = "Š", ["ϣ"] = "š", - ["Ϥ"] = "F", ["ϥ"] = "f", - ["Ϧ"] = "X", ["ϧ"] = "x", ["Ⳉ"] = "X", ["ⳉ"] = "x", - ["Ϩ"] = "H", ["ϩ"] = "h", - ["Ϫ"] = "J", ["ϫ"] = "j", - ["Ϭ"] = "Q", ["ϭ"] = "q", - ["Ϯ"] = "Ti", ["ϯ"] = "ti", - ["Ⲹ"] = "K°", ["ⲹ"] = "k°", - ["ⳤ"] = "ke", - ["Ⲳ"] = "ʾ", ["ⲳ"] = "ʾ", - ["Ⳋ"] = "Ç", ["ⳋ"] = "ç", ["Ⳃ"] = "Ç", ["ⳃ"] = "ç", -} - -function export.tr(text, lang, sc) - return (mw.ustring.gsub(text, '.', chars)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/cprt-translit.lua b/wikt/translit/cprt-translit.lua deleted file mode 100644 index 43b1edc..0000000 --- a/wikt/translit/cprt-translit.lua +++ /dev/null @@ -1,95 +0,0 @@ ---This module will transliterate text in the Cypriot script. It is used to transliterate Ancient Greek (grc). - -local export = {} - -local chars = { - ["𐠀"] = "a", - ["𐠁"] = "e", - ["𐠂"] = "i", - ["𐠃"] = "o", - ["𐠄"] = "u", - - ["𐠅"] = "ja", - -- je not in Unicode - -- ji not in Unicode - ["𐠈"] = "jo", - -- ju not in Unicode - - ["𐠊"] = "ka", - ["𐠋"] = "ke", - ["𐠌"] = "ki", - ["𐠍"] = "ko", - ["𐠎"] = "ku", - - ["𐠏"] = "la", - ["𐠐"] = "le", - ["𐠑"] = "li", - ["𐠒"] = "lo", - ["𐠓"] = "lu", - - ["𐠔"] = "ma", - ["𐠕"] = "me", - ["𐠖"] = "mi", - ["𐠗"] = "mo", - ["𐠘"] = "mu", - - ["𐠙"] = "na", - ["𐠚"] = "ne", - ["𐠛"] = "ni", - ["𐠜"] = "no", - ["𐠝"] = "nu", - - ["𐠞"] = "pa", - ["𐠟"] = "pe", - ["𐠠"] = "pi", - ["𐠡"] = "po", - ["𐠢"] = "pu", - - ["𐠣"] = "ra", - ["𐠤"] = "re", - ["𐠥"] = "ri", - ["𐠦"] = "ro", - ["𐠧"] = "ru", - - ["𐠨"] = "sa", - ["𐠩"] = "se", - ["𐠪"] = "si", - ["𐠫"] = "so", - ["𐠬"] = "su", - - ["𐠭"] = "ta", - ["𐠮"] = "te", - ["𐠯"] = "ti", - ["𐠰"] = "to", - ["𐠱"] = "tu", - - ["𐠲"] = "wa", - ["𐠳"] = "we", - ["𐠴"] = "wi", - ["𐠵"] = "wo", - -- wu not in Unicode - - ["𐠷"] = "ksa", - ["𐠸"] = "kse", - -- ksi not in Unicode - ["𐠿"] = "kso", - -- ksu not in Unicode - - ["𐠼"] = "za", - -- ze not in Unicode - -- zi not in Unicode - -- zo not in Unicode - -- zu not in Unicode -} - -function export.tr(text, lang, sc) - local ret = {} - - for c in mw.ustring.gmatch(text, ".") do - table.insert(ret, chars[c] or c) - end - - return table.concat(ret, "-") -end - -return export \ No newline at end of file diff --git a/wikt/translit/cr-translit.lua b/wikt/translit/cr-translit.lua deleted file mode 100644 index ba5bab2..0000000 --- a/wikt/translit/cr-translit.lua +++ /dev/null @@ -1,186 +0,0 @@ --- This module will transliterate Cree language text. --- It is also used to transliterate Southern East Cree (crj) and Northern East Cree (crl). --- Language code: crj and crl - -local export = {} - -function export.tr(text, lang, sc) - text = require("Module:Cans-translit").tr(text, lang, sc) - text = mw.ustring.gsub(text,"ī","ii") - text = mw.ustring.gsub(text,"ō","oo") - text = mw.ustring.gsub(text,"ā","aa") - text = mw.ustring.gsub(text,"š","sh") - text = mw.ustring.gsub(text,"ð","th") - return text -end - -function export.transcribe(text) - local replace = {"sh","th","hk","kw","sk"} - local s = "ᐁᐃᐅᐊᐯᐱᐳᐸᑌᑎᑐᑕᑫᑭᑯᑲᒉᒋᒍᒐᒣᒥᒧᒪᓀᓂᓄᓇᓭᓯᓱᓴᔦᔨᔪᔭᖊᖋᖌᖍᕃᕆᕊᕍᔐᔑᔓᔕᕓᕕᕗᕙᘚᘛᘕᘔᓓᓕᓗᓚ" - local s_tr1 = "0ptkcmnsyrl1f2z" - local s_tr2 = "eioa" - local w = "ᐍᐏᐓᐘᐻᐽᑁᑅᑘᑚᑞᑢᑵᑷᑻᑿᒓᒕᒙᒝᒭᒯᒳᒷᓊ ᓌ ᓷᓹᓽᔁᔰᔲᔶᔺᔘᔚᔞᔢᓝᓟᓣᓧ" - local w_tr1 = "0ptkcmnsyrz" - local w_tr2 = "eioa" - local l = "ᐄᐆᐋᐲᐴᐹᑏᑑᑖᑮᑰᑳᒌᒎᒑᒦᒨᒫᓃᓅᓈᓰᓲᓵᔩᔫᔮᕇᕋᕎᔒᔔᔖᕖᕘᕚᓖᓘᓛᐐᐔᐙ" - local l_tr1 = "0ptkcmnsyr1flw23" - local l_tr2 = "īōā" - local f = "ᐤᐦᕽᑉᑦᒡᒃᒻᓐᔅᔥᔾᓪᕐ‡ᒄᔉ" - local f_tr = "wh3ptckmns1ylr45" - text = mw.ustring.gsub(text,"ê","e") - text = mw.ustring.gsub(text,"u","o") - text = mw.ustring.gsub(text,"ii","ī") - text = mw.ustring.gsub(text,"oo","ō") - text = mw.ustring.gsub(text,"aa","ā") - text = mw.ustring.gsub(text,"([aeioīōā])(w?[aeioīōā])","%1 0%2") - text = mw.ustring.gsub(text,"^(w?[aeioīōā])"," 0%1") - text = mw.ustring.gsub(text," 0","0") - mw.log(text) - text = mw.ustring.gsub( - text, - "(["..w_tr1.."])w(["..w_tr2.."])", - function(a, b) - a = w_tr1:find(a) - b = w_tr2:find(b) - local i = a * 4 + b - 4 - return mw.ustring.sub(w, i, i) - end - ) - for i = 1, 3 do - text = mw.ustring.gsub(text, replace[i], tostring(i)) - end - mw.log(text) - text = mw.ustring.gsub( - text, - "(["..s_tr1.."])(["..s_tr2.."])", - function(a, b) - a = s_tr1:find(a) - b = s_tr2:find(b) - local i = a * 4 + b - 4 - return mw.ustring.sub(s, i, i) - end - ) - text = mw.ustring.gsub( - text, - "(["..l_tr1.."])(["..l_tr2.."])", - function(a, b) - a = l_tr1:find(a) - b = l_tr2:find(b) - local i = a * 3 + b - 3 - return mw.ustring.sub(l, i, i) - end - ) - for i=4, 5 do - text = mw.ustring.gsub(text, replace[i], tostring(i)) - end - mw.log(text) - text = mw.ustring.gsub( - text, - "(["..f_tr.."])", - function(a) - a = f_tr:find(a) - return mw.ustring.sub(f, a, a) - end - ) - return text -end - -return export - --- ᐁᔨᐦᑵᐤ: eyihkwew --- ᐊᐱᑕᐘᐣ: apitawan --- ᐊᑎᒼ: atim --- ᐊᓄᑭᐃᑶᑭᑲ: anokiikwikika --- ᐊᔦᒥᐅᑭᔨᑲ: ayemiokiyika --- ᐋᒨ: aamoo --- ᐧᐋᐸᔅᒄ: yaapaskw --- ᐱᔔ: pishoo --- ᐴᔒ: pooshii --- ᐸᔉᐙᐎᒧᐢᑐᐢ: paskwaawimostos --- ᐸᔦᔱᑲᓂᑭᔨᑲ: payeywikanikiyika --- ᑭᒋᐊᐱᑕᐘᐣ: kiciapitawan --- ᑭᒋᐊᓄᑭᐃᑶᑭᑲ: kicianokiikwikika --- ᑭᒧᓭᐤ: kimosew --- ᑭᓀᐱᐠ: kinepik --- ᑳᐦᑳᒎ: kaahkaacoo --- ᒥᐢᑕᑎᒼ: mistatim --- ᒥᓂᔥᑎᒄ: minishtikw --- ᒨᐦᑯᒫᓐ: moohkomaan --- ᒪᑎᓇᐅᑶᔨᑲ: matinaokwiyika --- ᒪᔉᐗ: maskwa --- ᒪᕁᐁᓰᐢ: mahkesiis --- ᓂᔅᒃ: nisk --- ᓂᔭ: niya --- ᓃᐲᔾ: niipiiy --- ᓇᑭ: naki --- ᓰᓰᑉ: siisiip --- aamoo: ᐴᒫ --- kakwa: ᑲᑿ --- kisiskāciwani-sīpiy: ᑭᓯᔅᒎᒋᐘᓂ-ᓰᐱᔾ --- makusue: ᒪᑯᓱᐁ --- mikisiw: ᒥᑭᓯᐤ --- miskan: ᒥᔅᑲᓐ --- naki: ᓇᑭ --- peepee: ᐯᐁᐯᐁ --- peepee tsoo: ᐯᐁᐯᐁ ᑦᓵ --- sawan: ᓴᐘᓐ --- sīpiy: ᓰᐱᔾ --- wikowin: ᐏᑯᐏᓐ --- êyihkwêw: ᐁᔨᐦᑵᐤ --- local export = {} - --- local result = "" - --- local function add(text) --- result = result .. "* [["..text.."#Cree|"..text.."]]: " .. require("Module:cr-translit").tr(text, "cr", "Cans") .. "\n" --- end - --- local function dda(text) --- result = result .. "* [["..text.."#Cree|"..text.."]]: " .. require("Module:cr-translit").transcribe(text) .. "\n" --- end - --- function export.show() --- add("ᐁᔨᐦᑵᐤ") --- add("ᐊᐱᑕᐘᐣ") --- add("ᐊᑎᒼ") --- add("ᐊᓄᑭᐃᑶᑭᑲ") --- add("ᐊᔦᒥᐅᑭᔨᑲ") --- add("ᐋᒨ") --- add("ᐧᐋᐸᔅᒄ") --- add("ᐱᔔ") --- add("ᐴᔒ") --- add("ᐸᔉᐙᐎᒧᐢᑐᐢ") --- add("ᐸᔦᔱᑲᓂᑭᔨᑲ") --- add("ᑭᒋᐊᐱᑕᐘᐣ") --- add("ᑭᒋᐊᓄᑭᐃᑶᑭᑲ") --- add("ᑭᒧᓭᐤ") --- add("ᑭᓀᐱᐠ") --- add("ᑳᐦᑳᒎ") --- add("ᒥᐢᑕᑎᒼ") --- add("ᒥᓂᔥᑎᒄ") --- add("ᒨᐦᑯᒫᓐ") --- add("ᒪᑎᓇᐅᑶᔨᑲ") --- add("ᒪᔉᐗ") --- add("ᒪᕁᐁᓰᐢ") --- add("ᓂᔅᒃ") --- add("ᓂᔭ") --- add("ᓃᐲᔾ") --- add("ᓇᑭ") --- add("ᓰᓰᑉ") --- dda("aamoo") --- dda("kakwa") --- dda("kisiskāciwani-sīpiy") --- dda("makusue") --- dda("mikisiw") --- dda("miskan") --- dda("naki") --- dda("peepee") --- dda("peepee tsoo") --- dda("sawan") --- dda("sīpiy") --- dda("wikowin") --- dda("êyihkwêw") --- return result --- end - --- return export \ No newline at end of file diff --git a/wikt/translit/cv-translit.lua b/wikt/translit/cv-translit.lua deleted file mode 100644 index 5282c3b..0000000 --- a/wikt/translit/cv-translit.lua +++ /dev/null @@ -1,21 +0,0 @@ --- This module will transliterate Chuvash language text per WT:CV TR. --- Language code: chv - -local export = {} - -local tt = { - ["А"]='A', ["а"]='a', ["Ӑ"]='Ă', ["ӑ"]='ă', ["Б"]='B', ["б"]='b', ["В"]='V', ["в"]='v', ["Г"]='G', ["г"]='g', - ["Д"]='D', ["д"]='d', ["Е"]='E', ["е"]='e', ["Ё"]='Jo', ["ё"]='jo', ["Ӗ"]='Ĕ', ["ӗ"]='ĕ', ["Ж"]='Ž', ["ж"]='ž', - ["З"]='Z', ["з"]='z', ["И"]='I', ["и"]='i', ["Й"]='J', ["й"]='j', ["К"]='K', ["к"]='k', ["Л"]='L', ["л"]='l', - ["М"]='M', ["м"]='m',["Н"]='N', ["н"]='n', ["О"]='O', ["о"]='o', ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r', - ["С"]='S', ["с"]='s', ["Ҫ"]='Ś', ["ҫ"]='ś', ["Т"]='T', ["т"]='t', ["У"]='U', ["у"]='u', ["Ӳ"]='Ü', ["ӳ"]='ü', - ["Ф"]='F', ["ф"]='f', ["Х"]='H', ["х"]='h', ["Ц"]='Ts', ["ц"]='ts', ["Ч"]='Č', ["ч"]='č', ["Ш"]='Š', ["ш"]='š', - ["Щ"]='Šč', ["щ"]='šč', ["Ъ"]="ʺ", ['ъ']='ʺ', ["Ы"]='Y', ["ы"]='y', ["Ь"]='ʹ', ["ь"]='ʹ', ["Э"]="E", ['э']='e', - ["Ю"]="Ju", ['ю']='ju', ["Я"]="Ja", ['я']='ja' -}; - -function export.tr(text) - return (mw.ustring.gsub(text, '.', tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/cyrs-glag-translit.lua b/wikt/translit/cyrs-glag-translit.lua deleted file mode 100644 index 1610d30..0000000 --- a/wikt/translit/cyrs-glag-translit.lua +++ /dev/null @@ -1,152 +0,0 @@ --- This module is used to transliterate text in the Old Cyrillic and Glagolitic alphabets. --- It is used to transliterate Old Church Slavonic (cu), Old East Slavic (orv) and Old Novgorodian (zle-ono). - --- Language code: chu - ---[[The language code is necessary because some letters are transliterated slightly differently depending -on the language. The script code selects between Cyrillic and Glagolitic transliteration.]] - -local export = {} - -local letters = {} -local digraphs = {} - -local double_grave = mw.ustring.char(0x30F) - -letters["Cyrs"] = { - ["А"]='A', ["а"]='a', - ["Б"]='B', ["б"]='b', - ["В"]='V', ["в"]='v', - ["Г"]='G', ["г"]='g', - ["Д"]='D', ["д"]='d', - ["Е"]='E', ["е"]='e', ["Є"]='E', ["є"]='e', - ["Ж"]='Ž', ["ж"]='ž', - ["Ѕ"]='Dz', ["ѕ"]='dz', ["Ꙃ"]='Dz', ["ꙃ"]='dz', - ["З"]='Z', ["з"]='z', ["Ꙁ"]='Z', ["ꙁ"]='z', - ["И"]='I', ["и"]='i', ["І"]='I', ["і"]='i', ["Ї"]='I', ["ї"]='i', - ["Ꙉ"]='Đ', ["ꙉ"]='đ', - ["К"]='K', ["к"]='k', - ["Л"]='L', ["л"]='l', - ["М"]='M', ["м"]='m', - ["Н"]='N', ["н"]='n', - ["О"]='O', ["о"]='o', ["Ѡ"]='O', ["ѡ"]='o', ["Ѿ"]='Otŭ', ["ѿ"]='otŭ', ["Ꙩ"]='O', ["ꙩ"]='o', ["Ꙫ"]='O', ["ꙫ"]='o', ["Ꙭ"]='O', ["ꙭ"]='o', ["ꙮ"]='o', - ["П"]='P', ["п"]='p', - ["Р"]='R', ["р"]='r', - ["С"]='S', ["с"]='s', - ["Т"]='T', ["т"]='t', - ["Ѹ"]='U', ["ѹ"]='u', ["Ꙋ"]='U', ["ꙋ"]='u', ["У"]='U', ["у"]='u', - ["Ф"]='F', ["ф"]='f', - ["Х"]='X', ["х"]='x', - - ["Ц"]='C', ["ц"]='c', - ["Ч"]='Č', ["ч"]='č', - ["Ш"]='Š', ["ш"]='š', - -- For Щ see below - ["Ъ"]='Ŭ', ["ъ"]='ŭ', - ["Ꙑ"]='Y', ["ꙑ"]='y', ["Ы"]='Y', ["ы"]='y', - ["Ь"]='Ĭ', ["ь"]='ĭ', - ["Ѣ"]='Ě', ["ѣ"]='ě', - - ["Ю"]='Ju', ["ю"]='ju', - ["Ꙗ"]='Ja', ["ꙗ"]='ja', ["Я"]='Ja', ["я"]='ja', - ["Ѥ"]='Je', ["ѥ"]='je', - ["Ѧ"]='Ę', ["ѧ"]='ę', - ["Ѩ"]='Ję', ["ѩ"]='ję', - ["Ѫ"]='Ǫ', ["ѫ"]='ǫ', - ["Ѭ"]='Jǫ', ["ѭ"]='jǫ', - ["Ꙓ"]='Jě', ["ꙓ"]='jě', - - ["Ѯ"]='Ks', ["ѯ"]='ks', - ["Ѱ"]='Ps', ["ѱ"]='ps', - ["Ѳ"]='Θ', ["ѳ"]='θ', - ["Ѵ"]='Ü', ["ѵ"]='ü', ["Ѷ"]='Ü' .. double_grave, ["ѷ"]='ü' .. double_grave, -} - -digraphs["Cyrs"] = { - ["О[Уу]"]="U", ["оу"]="u", -} - -letters["Glag"] = { - ["Ⰰ"]='A', ["ⰰ"]='a', - ["Ⰱ"]='B', ["ⰱ"]='b', - ["Ⰲ"]='V', ["ⰲ"]='v', - ["Ⰳ"]='G', ["ⰳ"]='g', - ["Ⰴ"]='D', ["ⰴ"]='d', - ["Ⰵ"]='E', ["ⰵ"]='e', - ["Ⰶ"]='Ž', ["ⰶ"]='ž', - ["Ⰷ"]='Dz', ["ⰷ"]='dz', - ["Ⰸ"]='Z', ["ⰸ"]='z', - ["Ⰹ"]='I', ["ⰹ"]='i', ["Ⰺ"]='I', ["ⰺ"]='i', ["Ⰻ"]='I', ["ⰻ"]='i', - ["Ⰼ"]='Đ', ["ⰼ"]='đ', - ["Ⰽ"]='K', ["ⰽ"]='k', - ["Ⰾ"]='L', ["ⰾ"]='l', - ["Ⰿ"]='M', ["ⰿ"]='m', - ["Ⱀ"]='N', ["ⱀ"]='n', - ["Ⱁ"]='O', ["ⱁ"]='o', ["Ⱉ"]='O', ["ⱉ"]='o', - ["Ⱂ"]='P', ["ⱂ"]='p', - ["Ⱃ"]='R', ["ⱃ"]='r', - ["Ⱄ"]='S', ["ⱄ"]='s', - ["Ⱅ"]='T', ["ⱅ"]='t', - ["Ⱆ"]='U', ["ⱆ"]='u', - ["Ⱇ"]='F', ["ⱇ"]='f', - ["Ⱈ"]='X', ["ⱈ"]='x', ["Ⱒ"]='X', ["ⱒ"]='x', - - ["Ⱌ"]='C', ["ⱌ"]='c', - ["Ⱍ"]='Č', ["ⱍ"]='č', - ["Ⱎ"]='Š', ["ⱎ"]='š', - -- For Ⱋ see below - ["Ⱏ"]='Ŭ', ["ⱏ"]='ŭ', - ["Ⱐ"]='Ĭ', ["ⱐ"]='ĭ', - ["Ⱑ"]='Ě', ["ⱑ"]='ě', - - ["Ⱓ"]='Ju', ["ⱓ"]='ju', - ["Ⱔ"]='Ę', ["ⱔ"]='ę', - ["Ⱗ"]='Ję', ["ⱗ"]='ję', - ["Ⱘ"]='Ǫ', ["ⱘ"]='ǫ', - ["Ⱙ"]='Jǫ', ["ⱙ"]='jǫ', - - ["Ⱚ"]='Θ', ["ⱚ"]='θ', - ["Ⱛ"]='Ü', ["ⱛ"]='ü', -} - -digraphs["Glag"] = { - ["Ⱏ[ⰉⰊⰹⰺ]"]="Y", ["ⱏ[ⰹⰺ]"]="y", -} - -function export.tr(text, lang, sc) - if not sc then - sc = require("Module:scripts").findBestScript(text, - require("Module:languages").getByCode(lang)):getCode() - end - - -- Щ was pronounced differently in Old East Slavic - if lang == "orv" then - letters["Cyrs"]["Щ"]='Šč' - letters["Cyrs"]["щ"]='šč' - letters["Glag"]["Ⱋ"]='Šč' - letters["Glag"]["ⱋ"]='šč' - else - letters["Cyrs"]["Щ"]='Št' - letters["Cyrs"]["щ"]='št' - letters["Glag"]["Ⱋ"]='Št' - letters["Glag"]["ⱋ"]='št' - end - - -- Transliterate the kamora as prime - text = string.gsub(text, "\210\132", "ʹ") - - if sc == "Cyrs" or sc == "Glag" then - for key, repl in pairs(digraphs[sc]) do - text = mw.ustring.gsub(text, key, repl) - end - - -- pattern for one non-ASCII character - text = string.gsub(text, '[\194-\244][\128-\191]+', letters[sc]) - else - -- error("This module can only transliterate Old Cyrillic (Cyrs) and Glagolitic (Glag).") - end - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/dar-translit.lua b/wikt/translit/dar-translit.lua deleted file mode 100644 index b251470..0000000 --- a/wikt/translit/dar-translit.lua +++ /dev/null @@ -1,49 +0,0 @@ --- This module will transliterate Dargwa language text per WT:DAR TR. --- Language code: dar - -local export = {} - -local mapping1 = { - ["б"]="b", ["п"]="p", ["ф"]="f", ["в"]="v", ["м"]="m", - ["д"]="d", ["т"]="t", ["й"]="j", ["н"]="n", ["з"]="z", ["ц"]="c", - ["с"]="s", ["ж"]="ž", ["ш"]="š", ["щ"]="šč", - ["л"]="l", ["ч"]="č", ["р"]="r", ["г"]="g", ["к"]="k", ["х"]="χ", - ["ъ"]="ʾ", ["а"]="a", ["е"]="e", ["ы"]="y", ["и"]="i", ["о"]="o", ["у"]="u", - ["ё"]="ë", ["ь"]="’", ["э"]="e", ["ю"]="ju", ["я"]="ə", - ["Б"]="B", ["П"]="P", ["Ф"]="F", ["В"]="V", ["М"]="M", - ["Д"]="D", ["Т"]="T", ["Й"]="J", ["Н"]="N", ["З"]="Z", ["Ц"]="C", - ["С"]="S", ["Ж"]="Ž", ["Ш"]="Š", ["Щ"]="Šč", - ["Л"]="L", ["Ч"]="Č", ["Р"]="R", ["Г"]="G", ["К"]="K", ["Х"]="Χ", - ["Ъ"]="ʾ", ["А"]="A", ["Е"]="E", ["Ы"]="Y", ["И"]="I", ["О"]="O", ["У"]="U", - ["Ё"]="Ë", ["Ь"]="’", ["Э"]="E", ["Ю"]="Ju", ["Я"]="Ə" -} - -local mapping2 = { - ['дз'] = 'ʒ', ['Дз'] = 'Ʒ', ['дж'] = 'ǯ', ['Дж'] = 'Ǯ', - ['пӏ'] = 'ṗ', ['Пӏ'] = 'Ṗ', ['цӏ'] = 'c̣', ['тӏ'] = 'ṭ', - ['чӏ'] = 'č̣', ['кь'] = 'q̇', ['кӏ'] = 'ḳ', ['хь'] = 'x', - ['хъ'] = 'q', ['къ'] = 'ɢ', ['гъ'] = 'γ', ['гӏ'] = 'ʿ', - ['хӏ'] = 'ḥ', ['гь'] = 'h', ['Цӏ'] = 'C̣', ['Тӏ'] = 'Ṭ', - ['Сс'] = 'S̄', ['Чӏ'] = 'Č̣', ['Кь'] = 'Q̇', ['Кӏ'] = 'Ḳ', - ['Хь'] = 'X', ['Хъ'] = 'Q', ['Къ'] = 'ɢ', ['Гъ'] = 'Γ', - ['Гӏ'] = 'ʿ', ['Хӏ'] = 'Ḥ', ['Гь'] = 'H', -} - -function export.tr(text, lang, sc) - local str_gsub = string.gsub - - -- Convert capital to lowercase palochka. Lowercase is found in tables - -- above. - text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) - - for pat, repl in pairs(mapping2) do - text = str_gsub(text, pat, repl) - end - - -- pattern for non-ASCII UTF-8 characters - text = str_gsub(text, '[\194-\244][\128-\191]+', mapping1) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/data/ko-pron.lua b/wikt/translit/data/ko-pron.lua deleted file mode 100644 index d31d4b5..0000000 --- a/wikt/translit/data/ko-pron.lua +++ /dev/null @@ -1,616 +0,0 @@ -local export = {} - -export.vowels = { - ["Ø"] = { "", "", "", "", "", "" }, - [" "] = { " ", " ", " ", " ", " ", " " }, - ["ᅡ"] = { "ᅡ", "a", "a", "a", "a", "a̠" }, - ["ᅢ"] = { "ᅢ", "ae", "ae", "ae", "ay", "e̞" }, - ["ᅣ"] = { "ᅣ", "ya", "ya", "ya", "ya", "ja̠" }, - ["ᅤ"] = { "ᅤ", "yae", "yae", "yae", "yay", "je̞" }, - ["ᅥ"] = { "ᅥ", "eo", "eo", "ŏ", "e", "ʌ̹" }, - ["ᅦ"] = { "ᅦ", "e", "e", "e", "ey", "e̞" }, - ["ᅧ"] = { "ᅧ", "yeo", "yeo", "yŏ", "ye", "jʌ̹" }, - ["ᅨ"] = { "ᅨ", "ye", "ye", "ye", "yey", "je̞" }, - ["ᅩ"] = { "ᅩ", "o", "o", "o", "o", "o̞" }, - ["ᅪ"] = { "ᅪ", "wa", "wa", "wa", "wa", "wa̠" }, - ["ᅫ"] = { "ᅫ", "wae", "wae", "wae", "way", "we̞" }, - ["ᅬ"] = { "ᅬ", "oe", "oe", "oe", "oy", "ø̞" }, - ["ᅭ"] = { "ᅭ", "yo", "yo", "yo", "yo", "jo" }, - ["ᅮ"] = { "ᅮ", "u", "u", "u", "wu", "u" }, - ["ᅯ"] = { "ᅯ", "wo", "wo", "wŏ", "we", "wʌ̹" }, - ["ᅰ"] = { "ᅰ", "we", "we", "we", "wey", "we̞" }, - ["ᅱ"] = { "ᅱ", "wi", "wi", "wi", "wi", "ɥi" }, - ["ᅲ"] = { "ᅲ", "yu", "yu", "yu", "yu", "ju" }, - ["ᅳ"] = { "ᅳ", "eu", "eu", "ŭ", "u", "ɯ" }, - ["ᅴ"] = { "ᅴ", "ui", "ui", "ŭi", "uy", "ɰi" }, - ["ᅵ"] = { "ᅵ", "i", "i", "i", "i", "i" }, -} - -export.boundary = { - -- note: Ø represents different things when used for initials and finals. - -- For initials, Ø = the next syllable is absent. - -- For finals, Ø = the absence of a jongseong jamo, but the previous syllable exists. - -- The equivalent of initial Ø is final "" (nothing). - ["Ø-Ø"] = { "", "", "", "", "", "" }, - ["-Ø"] = { "", "", "", "", "", "" }, - ["-ᄀ"] = { "ᄀ", "g", "g", "k", "k", "k" }, - ["-ᄁ"] = { "ᄁ", "kk", "kk", "kk", "kk", "k͈" }, - ["-ᄂ"] = { "ᄂ", "n", "n", "n", "n", "n" }, - ["-ᄃ"] = { "ᄃ", "d", "d", "t", "t", "t" }, - ["-ᄄ"] = { "ᄄ", "tt", "tt", "tt", "tt", "t͈" }, - ["-ᄅ"] = { "ᄅ", "r", "l", "r", "l", "ɾ" }, - ["-ᄆ"] = { "ᄆ", "m", "m", "m", "m", "m" }, - ["-ᄇ"] = { "ᄇ", "b", "b", "p", "p", "p" }, - ["-ᄈ"] = { "ᄈ", "pp", "pp", "pp", "pp", "p͈" }, - ["-ᄉ"] = { "ᄉ", "s", "s", "s", "s", "sʰ" }, - ["-ᄊ"] = { "ᄊ", "ss", "ss", "ss", "ss", "s͈" }, - ["-ᄋ"] = { "ᄋ", "", "", "", "", "" }, - ["-ᄌ"] = { "ᄌ", "j", "j", "ch", "c", "t͡ɕ" }, - ["-ᄍ"] = { "ᄍ", "jj", "jj", "tch", "cc", "t͡ɕ͈" }, - ["-ᄎ"] = { "ᄎ", "ch", "ch", "ch'", "ch", "t͡ɕʰ" }, - ["-ᄏ"] = { "ᄏ", "k", "k", "k'", "kh", "kʰ" }, - ["-ᄐ"] = { "ᄐ", "t", "t", "t'", "th", "tʰ" }, - ["-ᄑ"] = { "ᄑ", "p", "p", "p'", "ph", "pʰ" }, - ["-ᄒ"] = { "ᄒ", "h", "h", "h", "h", "h" }, - ["Ø-ᄀ"] = { "ᄀ", "g", "g", "g", "k", "ɡ" }, - ["Ø-ᄁ"] = { "ᄁ", "kk", "kk", "kk", "kk", "k͈" }, - ["Ø-ᄂ"] = { "ᄂ", "n", "n", "n", "n", "n" }, - ["Ø-ᄃ"] = { "ᄃ", "d", "d", "d", "t", "d" }, - ["Ø-ᄄ"] = { "ᄄ", "tt", "tt", "tt", "tt", "t͈" }, - ["Ø-ᄅ"] = { "ᄅ", "r", "l", "r", "l", "ɾ" }, - ["Ø-ᄆ"] = { "ᄆ", "m", "m", "m", "m", "m" }, - ["Ø-ᄇ"] = { "ᄇ", "b", "b", "b", "p", "b" }, - ["Ø-ᄈ"] = { "ᄈ", "pp", "pp", "pp", "pp", "p͈" }, - ["Ø-ᄉ"] = { "ᄉ", "s", "s", "s", "s", "sʰ" }, - ["Ø-ᄊ"] = { "ᄊ", "ss", "ss", "ss", "ss", "s͈" }, - ["Ø-ᄋ"] = { "ᄋ", "…", "…", "", "", "" }, - ["Ø-ᄌ"] = { "ᄌ", "j", "j", "j", "c", "d͡ʑ" }, - ["Ø-ᄍ"] = { "ᄍ", "jj", "jj", "tch", "cc", "t͡ɕ͈" }, - ["Ø-ᄎ"] = { "ᄎ", "ch", "ch", "ch'", "ch", "t͡ɕʰ" }, - ["Ø-ᄏ"] = { "ᄏ", "k", "k", "k'", "kh", "kʰ" }, - ["Ø-ᄐ"] = { "ᄐ", "t", "t", "t'", "th", "tʰ" }, - ["Ø-ᄑ"] = { "ᄑ", "p", "p", "p'", "ph", "pʰ" }, - ["Ø-ᄒ"] = { "ᄒ", "h", "h", "h", "h", "ɦ" }, - ["ᆨ-Ø"] = { "ᆨ", "k", "g", "k", "k", "k̚" }, - ["ᆨ-ᄀ"] = { "ᆨᄁ", "kg", "gg", "kk", "kqk", "k̚k͈" }, - ["ᆨ-ᄁ"] = { "ᆨᄁ", "kkk", "gkk", "kk", "kkk", "k̚k͈" }, - ["ᆨ-ᄂ"] = { "ᆼᄂ", "ngn", "gn", "ngn", "kn", "ŋn" }, - ["ᆨ-ᄃ"] = { "ᆨᄄ", "kd", "gd", "kt", "kqt", "k̚t͈" }, - ["ᆨ-ᄄ"] = { "ᆨᄄ", "ktt", "gtt", "ktt", "ktt", "k̚t͈" }, - ["ᆨ-ᄅ"] = { "ᆼᄂ", "ngn", "gl", "ngn", "kl", "ŋn" }, - ["ᆨ-ᄆ"] = { "ᆼᄆ", "ngm", "gm", "ngm", "km", "ŋm" }, - ["ᆨ-ᄇ"] = { "ᆨᄈ", "kb", "gb", "kp", "kqp", "k̚p͈" }, - ["ᆨ-ᄈ"] = { "ᆨᄈ", "kpp", "gpp", "kpp", "kpp", "k̚p͈" }, - ["ᆨ-ᄉ"] = { "ᆨᄊ", "ks", "gs", "ks", "kqs", "ks͈" }, - ["ᆨ-ᄊ"] = { "ᆨᄊ", "kss", "gss", "kss", "kss", "ks͈" }, - ["ᆨ-ᄋ"] = { "ᄀ", "g", "g-", "g", "k.", "ɡ" }, - ["ᆨ-ᄌ"] = { "ᆨᄍ", "kj", "gj", "kch", "kqc", "k̚t͡ɕ͈" }, - ["ᆨ-ᄍ"] = { "ᆨᄍ", "kjj", "gjj", "ktch", "kcc", "k̚t͡ɕ͈" }, - ["ᆨ-ᄎ"] = { "ᆨᄎ", "kch", "gch", "kch'", "kch", "k̚t͡ɕʰ" }, - ["ᆨ-ᄏ"] = { "ᆨᄏ", "kk", "gk", "kk'", "kkh", "k̚kʰ" }, - ["ᆨ-ᄐ"] = { "ᆨᄐ", "kt", "gt", "kt'", "kth", "k̚tʰ" }, - ["ᆨ-ᄑ"] = { "ᆨᄑ", "kp", "gp", "kp'", "kph", "k̚pʰ" }, - ["ᆨ-ᄒ"] = { "ᄏ", "kh", "g-h", "kh", "k.h", "kʰ" }, - ["ᆩ-Ø"] = { "ᆨ", "k", "kk", "k", "kk", "k̚" }, - ["ᆩ-ᄀ"] = { "ᆨᄁ", "kg", "kkg", "kk", "kkqk", "k̚k͈" }, - ["ᆩ-ᄁ"] = { "ᆨᄁ", "kkk", "kkkk", "kk", "kkkk", "k̚k͈" }, - ["ᆩ-ᄂ"] = { "ᆼᄂ", "ngn", "kkn", "ngn", "kkn", "ŋn" }, - ["ᆩ-ᄃ"] = { "ᆨᄄ", "kd", "kkd", "kt", "kkqt", "k̚t͈" }, - ["ᆩ-ᄄ"] = { "ᆨᄄ", "ktt", "kktt", "ktt", "kktt", "k̚t͈" }, - ["ᆩ-ᄅ"] = { "ᆼᄂ", "ngn", "kkl", "ngn", "kkl", "ŋn" }, - ["ᆩ-ᄆ"] = { "ᆼᄆ", "ngm", "kkm", "ngm", "kkm", "ŋm" }, - ["ᆩ-ᄇ"] = { "ᆨᄈ", "kb", "kkb", "kp", "kkqp", "k̚p͈" }, - ["ᆩ-ᄈ"] = { "ᆨᄈ", "kpp", "kkpp", "kpp", "kkpp", "k̚p͈" }, - ["ᆩ-ᄉ"] = { "ᆨᄊ", "ks", "kks", "ks", "kkqs", "ks͈" }, - ["ᆩ-ᄊ"] = { "ᆨᄊ", "kss", "kkss", "kss", "kkss", "ks͈" }, - ["ᆩ-ᄋ"] = { "ᄁ", "kk", "kk-", "kk", "kk.", "k͈" }, - ["ᆩ-ᄌ"] = { "ᆨᄍ", "kj", "kkj", "kch", "kkqc", "k̚t͡ɕ͈" }, - ["ᆩ-ᄍ"] = { "ᆨᄍ", "kjj", "kkjj", "ktch", "kkcc", "k̚t͡ɕ͈" }, - ["ᆩ-ᄎ"] = { "ᆨᄎ", "kch", "kkch", "kch'", "kkch", "k̚t͡ɕʰ" }, - ["ᆩ-ᄏ"] = { "ᆨᄏ", "kk", "kkk", "kk'", "kkkh", "k̚kʰ" }, - ["ᆩ-ᄐ"] = { "ᆨᄐ", "kt", "kkt", "kt'", "kkth", "k̚tʰ" }, - ["ᆩ-ᄑ"] = { "ᆨᄑ", "kp", "kkp", "kp'", "kkph", "k̚pʰ" }, - ["ᆩ-ᄒ"] = { "ᄏ", "kh", "kk-h", "kh", "kk.h", "kʰ" }, - ["ᆪ-Ø"] = { "ᆨ", "k", "gs", "k", "ks", "k̚" }, - ["ᆪ-ᄀ"] = { "ᆨᄁ", "kg", "gsg", "kk", "ksqk", "k̚k͈" }, - ["ᆪ-ᄁ"] = { "ᆨᄁ", "kkk", "gskk", "kk", "kskk", "k̚k͈" }, - ["ᆪ-ᄂ"] = { "ᆼᄂ", "ngn", "gsn", "ngn", "ksn", "ŋn" }, - ["ᆪ-ᄃ"] = { "ᆨᄄ", "kd", "gsd", "kt", "ksqt", "k̚t͈" }, - ["ᆪ-ᄄ"] = { "ᆨᄄ", "ktt", "gstt", "ktt", "kstt", "k̚t͈" }, - ["ᆪ-ᄅ"] = { "ᆼᄂ", "ngn", "gsl", "ngn", "ksl", "ŋn" }, - ["ᆪ-ᄆ"] = { "ᆼᄆ", "ngm", "gsm", "ngm", "ksm", "ŋm" }, - ["ᆪ-ᄇ"] = { "ᆨᄈ", "kb", "gsb", "kp", "ksqp", "k̚p͈" }, - ["ᆪ-ᄈ"] = { "ᆨᄈ", "kpp", "gspp", "kpp", "kspp", "k̚p͈" }, - ["ᆪ-ᄉ"] = { "ᆨᄊ", "ks", "gss", "ks", "ksqs", "ks͈" }, - ["ᆪ-ᄊ"] = { "ᆨᄊ", "kss", "gsss", "kss", "ksss", "ks͈" }, - ["ᆪ-ᄋ"] = { "ᆨᄊ", "ks", "gs-", "ks", "ks.", "ks͈" }, - ["ᆪ-ᄌ"] = { "ᆨᄍ", "kj", "gsj", "kch", "ksqc", "k̚t͡ɕ͈" }, - ["ᆪ-ᄍ"] = { "ᆨᄍ", "kjj", "gsjj", "ktch", "kscc", "k̚t͡ɕ͈" }, - ["ᆪ-ᄎ"] = { "ᆨᄎ", "kch", "gsch", "kch'", "ksch", "k̚t͡ɕʰ" }, - ["ᆪ-ᄏ"] = { "ᆨᄏ", "kk", "gsk", "kk'", "kskh", "k̚kʰ" }, - ["ᆪ-ᄐ"] = { "ᆨᄐ", "kt", "gst", "kt'", "ksth", "k̚tʰ" }, - ["ᆪ-ᄑ"] = { "ᆨᄑ", "kp", "gsp", "kp'", "ksph", "k̚pʰ" }, - ["ᆪ-ᄒ"] = { "ᄏ", "kh", "gs-h", "kh", "ks.h", "kʰ" }, - ["ᆫ-Ø"] = { "ᆫ", "n", "n", "n", "n", "n" }, - ["ᆫ-ᄀ"] = { "ᆫᄀ", "n-g", "ng", "n'g", "nk", "nɡ" }, - ["ᆫ-ᄁ"] = { "ᆫᄁ", "nkk", "nkk", "nkk", "nkk", "nk͈" }, - ["ᆫ-ᄂ"] = { "ᆫᄂ", "nn", "nn", "nn", "nn", "nn" }, - ["ᆫ-ᄃ"] = { "ᆫᄃ", "nd", "nd", "nd", "nt", "nd" }, - ["ᆫ-ᄄ"] = { "ᆫᄄ", "ntt", "ntt", "ntt", "ntt", "nt͈" }, - ["ᆫ-ᄅ"] = { "ᆯᄅ", "ll", "nl", "ll", "nl", "ɭɭ" }, - ["ᆫ-ᄆ"] = { "ᆫᄆ", "nm", "nm", "nm", "nm", "nm" }, - ["ᆫ-ᄇ"] = { "ᆫᄇ", "nb", "nb", "nb", "np", "nb" }, - ["ᆫ-ᄈ"] = { "ᆫᄈ", "npp", "npp", "npp", "npp", "np͈" }, - ["ᆫ-ᄉ"] = { "ᆫᄉ", "ns", "ns", "ns", "ns", "nsʰ" }, - ["ᆫ-ᄊ"] = { "ᆫᄊ", "nss", "nss", "nss", "nss", "ns͈" }, - ["ᆫ-ᄋ"] = { "ᄂ", "n", "n-", "n", "n.", "n" }, - ["ᆫ-ᄌ"] = { "ᆫᄌ", "nj", "nj", "nj", "nc", "nd͡ʑ" }, - ["ᆫ-ᄍ"] = { "ᆫᄍ", "njj", "njj", "ntch", "ncc", "nt͡ɕ͈" }, - ["ᆫ-ᄎ"] = { "ᆫᄎ", "nch", "nch", "nch'", "nch", "nt͡ɕʰ" }, - ["ᆫ-ᄏ"] = { "ᆫᄏ", "nk", "nk", "nk'", "nkh", "nkʰ" }, - ["ᆫ-ᄐ"] = { "ᆫᄐ", "nt", "nt", "nt'", "nth", "ntʰ" }, - ["ᆫ-ᄑ"] = { "ᆫᄑ", "np", "np", "np'", "nph", "npʰ" }, - ["ᆫ-ᄒ"] = { "ᆫᄒ", "nh", "n-h", "nh", "n.h", "nɦ" }, - ["ᆬ-Ø"] = { "ᆫ", "n", "nj", "n", "nc", "n" }, - ["ᆬ-ᄀ"] = { "ᆫᄁ", "ng", "njg", "nk", "ncqk", "nk͈" }, - ["ᆬ-ᄁ"] = { "ᆫᄁ", "nkk", "njkk", "nkk", "nckk", "nk͈" }, - ["ᆬ-ᄂ"] = { "ᆫᄂ", "nn", "njn", "nn", "ncn", "nn" }, - ["ᆬ-ᄃ"] = { "ᆫᄄ", "nd", "njd", "nt", "ncqt", "nt͈" }, - ["ᆬ-ᄄ"] = { "ᆫᄄ", "ntt", "njtt", "ntt", "nctt", "nt͈" }, - ["ᆬ-ᄅ"] = { "ᆫᄂ", "nn", "njl", "nn", "ncl", "nn" }, - ["ᆬ-ᄆ"] = { "ᆫᄆ", "nm", "njm", "nm", "ncm", "nm" }, - ["ᆬ-ᄇ"] = { "ᆫᄈ", "nb", "njb", "np", "ncqp", "np͈" }, - ["ᆬ-ᄈ"] = { "ᆫᄈ", "npp", "njpp", "npp", "ncpp", "np͈" }, - ["ᆬ-ᄉ"] = { "ᆫᄊ", "ns", "njs", "ns", "ncqs", "ns͈" }, - ["ᆬ-ᄊ"] = { "ᆫᄊ", "nss", "njss", "nss", "ncss", "ns͈" }, - ["ᆬ-ᄋ"] = { "ᆫᄌ", "nj", "nj-", "nj", "nc.", "nd͡ʑ" }, - ["ᆬ-ᄌ"] = { "ᆫᄍ", "nj", "njj", "nch", "ncqc", "nt͡ɕ͈" }, - ["ᆬ-ᄍ"] = { "ᆫᄍ", "njj", "njjj", "ntch", "nccc", "nt͡ɕ͈" }, - ["ᆬ-ᄎ"] = { "ᆫᄎ", "nch", "njch", "nch'", "ncch", "nt͡ɕʰ" }, - ["ᆬ-ᄏ"] = { "ᆫᄏ", "nk", "njk", "nk'", "nckh", "nkʰ" }, - ["ᆬ-ᄐ"] = { "ᆫᄐ", "nt", "njt", "nt'", "ncth", "ntʰ" }, - ["ᆬ-ᄑ"] = { "ᆫᄑ", "np", "njp", "np'", "ncph", "npʰ" }, - ["ᆬ-ᄒ"] = { "ᆫᄎ", "nch", "nj-h", "nch'", "nc.h", "nt͡ɕʰ" }, - ["ᆭ-Ø"] = { "ᆫ", "n", "nh", "n", "nh", "n" }, - ["ᆭ-ᄀ"] = { "ᆫᄏ", "nk", "nhg", "nk'", "nhk", "nkʰ" }, - ["ᆭ-ᄁ"] = { "ᆫᄏ", "nk", "nhkk", "nk'", "nhkk", "nkʰ" }, - ["ᆭ-ᄂ"] = { "ᆫᄂ", "nn", "nhn", "nn", "nhn", "nn" }, - ["ᆭ-ᄃ"] = { "ᆫᄐ", "nt", "nhd", "nt'", "nht", "ntʰ" }, - ["ᆭ-ᄄ"] = { "ᆫᄐ", "nt", "nhtt", "nt'", "nhtt", "ntʰ" }, - ["ᆭ-ᄅ"] = { "ᆫᄂ", "nn", "nhl", "nn", "nhl", "nn" }, - ["ᆭ-ᄆ"] = { "ᆫᄆ", "nm", "nhm", "nm", "nhm", "nm" }, - ["ᆭ-ᄇ"] = { "ᆫᄑ", "np", "nhb", "np'", "nhp", "npʰ" }, - ["ᆭ-ᄈ"] = { "ᆫᄑ", "np", "nhpp", "np'", "nhpp", "npʰ" }, - ["ᆭ-ᄉ"] = { "ᆫᄊ", "ns", "nhs", "nss", "nhs", "ns͈" }, - ["ᆭ-ᄊ"] = { "ᆫᄊ", "nss", "nhss", "nss", "nhss", "ns͈" }, - ["ᆭ-ᄋ"] = { "ᄂ", "n", "nh-", "n", "nh.", "n" }, - ["ᆭ-ᄌ"] = { "ᆫᄎ", "nch", "nhj", "nch'", "nhc", "nt͡ɕʰ" }, - ["ᆭ-ᄍ"] = { "ᆫᄎ", "nch", "nhjj", "nch'", "nhcc", "nt͡ɕʰ" }, - ["ᆭ-ᄎ"] = { "ᆫᄎ", "nch", "nhch", "nch'", "nhch", "nt͡ɕʰ" }, - ["ᆭ-ᄏ"] = { "ᆫᄏ", "nk", "nhk", "nk'", "nhkh", "nkʰ" }, - ["ᆭ-ᄐ"] = { "ᆫᄐ", "nt", "nht", "nt'", "nhth", "ntʰ" }, - ["ᆭ-ᄑ"] = { "ᆫᄑ", "np", "nhp", "np'", "nhph", "npʰ" }, - ["ᆭ-ᄒ"] = { "ᆫᄒ", "nh", "nhh", "nh", "nhh", "nɦ" }, - ["ᆮ-Ø"] = { "ᆮ", "t", "d", "t", "t", "t̚" }, - ["ᆮ-ᄀ"] = { "ᆮᄁ", "tg", "dg", "tk", "tqk", "t̚k͈" }, - ["ᆮ-ᄁ"] = { "ᆮᄁ", "tkk", "dkk", "tkk", "tkk", "t̚k͈" }, - ["ᆮ-ᄂ"] = { "ᆫᄂ", "nn", "dn", "nn", "tn", "nn" }, - ["ᆮ-ᄃ"] = { "ᆮᄄ", "td", "dd", "tt", "tqt", "t̚t͈" }, - ["ᆮ-ᄄ"] = { "ᆮᄄ", "ttt", "dtt", "tt", "ttt", "t̚t͈" }, - ["ᆮ-ᄅ"] = { "ᆫᄂ", "nn", "dl", "nn", "tl", "nn" }, - ["ᆮ-ᄆ"] = { "ᆫᄆ", "nm", "dm", "nm", "tm", "nm" }, - ["ᆮ-ᄇ"] = { "ᆮᄈ", "tb", "db", "tp", "tqp", "t̚p͈" }, - ["ᆮ-ᄈ"] = { "ᆮᄈ", "tpp", "dpp", "tpp", "tpp", "t̚p͈" }, - ["ᆮ-ᄉ"] = { "ᆮᄊ", "ts", "ds", "ss", "tqs", "t̚s͈" }, - ["ᆮ-ᄊ"] = { "ᆮᄊ", "tss", "dss", "ss", "tss", "t̚s͈" }, - ["ᆮ-ᄋ"] = { "ᄃ", "d", "d-", "d", "t.", "d" }, - ["ᆮ-ᄌ"] = { "ᆮᄍ", "tj", "dj", "tch", "tqc", "t̚t͡ɕ͈" }, - ["ᆮ-ᄍ"] = { "ᆮᄍ", "tjj", "djj", "tch", "tcc", "t̚t͡ɕ͈" }, - ["ᆮ-ᄎ"] = { "ᆮᄎ", "tch", "dch", "tch'", "tch", "t̚t͡ɕʰ" }, - ["ᆮ-ᄏ"] = { "ᆮᄏ", "tk", "dk", "tk'", "tkh", "t̚kʰ" }, - ["ᆮ-ᄐ"] = { "ᆮᄐ", "tt", "dt", "tt'", "tth", "t̚tʰ" }, - ["ᆮ-ᄑ"] = { "ᆮᄑ", "tp", "dp", "tp'", "tph", "t̚pʰ" }, - ["ᆮ-ᄒ"] = { "ᄐ", "th", "d-h", "th", "t.h", "tʰ" }, - ["ᆯ-Ø"] = { "ᆯ", "l", "l", "l", "l", "ɭ" }, - ["ᆯ-ᄀ"] = { "ᆯᄀ", "lg", "lg", "lg", "lk", "ɭɡ" }, - ["ᆯ-ᄁ"] = { "ᆯᄁ", "lkk", "lkk", "lkk", "lkk", "ɭk͈" }, - ["ᆯ-ᄂ"] = { "ᆯᄅ", "ll", "ln", "ll", "ln", "ɭɭ" }, - ["ᆯ-ᄃ"] = { "ᆯᄃ", "ld", "ld", "ld", "lt", "ɭd" }, - ["ᆯ-ᄄ"] = { "ᆯᄄ", "ltt", "ltt", "ltt", "ltt", "ɭt͈" }, - ["ᆯ-ᄅ"] = { "ᆯᄅ", "ll", "ll", "ll", "ll", "ɭɭ" }, - ["ᆯ-ᄆ"] = { "ᆯᄆ", "lm", "lm", "lm", "lm", "ɭm" }, - ["ᆯ-ᄇ"] = { "ᆯᄇ", "lb", "lb", "lb", "lp", "ɭb" }, - ["ᆯ-ᄈ"] = { "ᆯᄈ", "lpp", "lpp", "lpp", "lpp", "ɭp͈" }, - ["ᆯ-ᄉ"] = { "ᆯᄉ", "ls", "ls", "ls", "ls", "ɭsʰ" }, - ["ᆯ-ᄊ"] = { "ᆯᄊ", "lss", "lss", "lss", "lss", "ɭs͈" }, - ["ᆯ-ᄋ"] = { "ᄅ", "r", "l-", "r", "l.", "ɾ" }, - ["ᆯ-ᄌ"] = { "ᆯᄌ", "lj", "lj", "lj", "lc", "ɭd͡ʑ" }, - ["ᆯ-ᄍ"] = { "ᆯᄍ", "ljj", "ljj", "ltch", "lcc", "ɭt͡ɕ͈" }, - ["ᆯ-ᄎ"] = { "ᆯᄎ", "lch", "lch", "lch'", "lch", "ɭt͡ɕʰ" }, - ["ᆯ-ᄏ"] = { "ᆯᄏ", "lk", "lk", "lk'", "lkh", "ɭkʰ" }, - ["ᆯ-ᄐ"] = { "ᆯᄐ", "lt", "lt", "lt'", "lth", "ɭtʰ" }, - ["ᆯ-ᄑ"] = { "ᆯᄑ", "lp", "lp", "lp'", "lph", "ɭpʰ" }, - ["ᆯ-ᄒ"] = { "ᆯᄒ", "lh", "l-h", "rh", "l.h", "ɾɦ" }, - ["ᆰ-Ø"] = { "ᆨ", "k", "lg", "k", "lk", "k̚" }, - ["ᆰ-ᄀ"] = { "ᆯᄁ", "lg", "lgg", "lg", "lkqk", "ɭk͈" }, - ["ᆰ-ᄁ"] = { "ᆯᄁ", "lkk", "lgkk", "lkk", "lkkk", "ɭk͈" }, - ["ᆰ-ᄂ"] = { "ᆼᄂ", "ngn", "lgn", "ngn", "lkn", "ŋn" }, - ["ᆰ-ᄃ"] = { "ᆨᄄ", "kd", "lgd", "kt", "lkqt", "k̚t͈" }, - ["ᆰ-ᄄ"] = { "ᆨᄄ", "ktt", "lgtt", "ktt", "lktt", "k̚t͈" }, - ["ᆰ-ᄅ"] = { "ᆼᄂ", "ngn", "lgl", "ngn", "lkl", "ŋn" }, - ["ᆰ-ᄆ"] = { "ᆼᄆ", "ngm", "lgm", "ngm", "lkm", "ŋm" }, - ["ᆰ-ᄇ"] = { "ᆨᄈ", "kb", "lgb", "kp", "lkqp", "k̚p͈" }, - ["ᆰ-ᄈ"] = { "ᆨᄈ", "kpp", "lgpp", "kpp", "lkpp", "k̚p͈" }, - ["ᆰ-ᄉ"] = { "ᆨᄊ", "ks", "lgs", "ks", "lkqs", "ks͈" }, - ["ᆰ-ᄊ"] = { "ᆨᄊ", "kss", "lgss", "kss", "lkss", "ks͈" }, - ["ᆰ-ᄋ"] = { "ᆯᄀ", "lg", "lg-", "lg", "lk.", "ɭɡ" }, - ["ᆰ-ᄌ"] = { "ᆨᄍ", "kj", "lgj", "kch", "lkqc", "k̚t͡ɕ͈" }, - ["ᆰ-ᄍ"] = { "ᆨᄍ", "kjj", "lgjj", "ktch", "lkcc", "k̚t͡ɕ͈" }, - ["ᆰ-ᄎ"] = { "ᆨᄎ", "kch", "lgch", "kch'", "lkch", "k̚t͡ɕʰ" }, - ["ᆰ-ᄏ"] = { "ᆯᄏ", "lk", "lgk", "lk'", "lkkh", "ɭkʰ" }, - ["ᆰ-ᄐ"] = { "ᆨᄐ", "kt", "lgt", "kt'", "lkth", "k̚tʰ" }, - ["ᆰ-ᄑ"] = { "ᆨᄑ", "kp", "lgp", "kp'", "lkph", "k̚pʰ" }, - ["ᆰ-ᄒ"] = { "ᆯᄏ", "lk", "lg-h", "lk'", "lk.h", "ɭkʰ" }, - ["ᆱ-Ø"] = { "ᆷ", "m", "lm", "m", "lm", "m" }, - ["ᆱ-ᄀ"] = { "ᆷᄀ", "mg", "lmg", "mg", "lmqk", "mɡ" }, - ["ᆱ-ᄁ"] = { "ᆷᄁ", "mkk", "lmkk", "mkk", "lmkk", "mk͈" }, - ["ᆱ-ᄂ"] = { "ᆷᄂ", "mn", "lmn", "mn", "lmn", "mn" }, - ["ᆱ-ᄃ"] = { "ᆷᄄ", "md", "lmd", "mt", "lmt", "mt͈" }, - ["ᆱ-ᄄ"] = { "ᆷᄄ", "mtt", "lmtt", "mtt", "lmtt", "mt͈" }, - ["ᆱ-ᄅ"] = { "ᆷᄂ", "mn", "lml", "mn", "lml", "mn" }, - ["ᆱ-ᄆ"] = { "ᆷᄆ", "mm", "lmm", "mm", "lmm", "mm" }, - ["ᆱ-ᄇ"] = { "ᆷᄇ", "mb", "lmb", "mb", "lmqp", "mb" }, - ["ᆱ-ᄈ"] = { "ᆷᄈ", "mpp", "lmpp", "mpp", "lmpp", "mp͈" }, - ["ᆱ-ᄉ"] = { "ᆷᄉ", "ms", "lms", "ms", "lmqs", "msʰ" }, - ["ᆱ-ᄊ"] = { "ᆷᄊ", "mss", "lmss", "mss", "lmss", "ms͈" }, - ["ᆱ-ᄋ"] = { "ᆯᄆ", "lm", "lm-", "lm", "lm.", "ɭm" }, - ["ᆱ-ᄌ"] = { "ᆷᄌ", "mj", "lmj", "mj", "lmqc", "md͡ʑ" }, - ["ᆱ-ᄍ"] = { "ᆷᄍ", "mjj", "lmjj", "mtch", "lmcc", "mt͡ɕ͈" }, - ["ᆱ-ᄎ"] = { "ᆷᄎ", "mch", "lmch", "mch'", "lmch", "mt͡ɕʰ" }, - ["ᆱ-ᄏ"] = { "ᆷᄏ", "mk", "lmk", "mk'", "lmkh", "mkʰ" }, - ["ᆱ-ᄐ"] = { "ᆷᄐ", "mt", "lmt", "mt'", "lmth", "mtʰ" }, - ["ᆱ-ᄑ"] = { "ᆷᄑ", "mp", "lmp", "mp'", "lmph", "mpʰ" }, - ["ᆱ-ᄒ"] = { "ᆯᄒ", "mh", "lm-h", "mh", "lm.h", "ɾɦ" }, - ["ᆲ-Ø"] = { "ᆯ", "l", "lb", "l", "lp", "ɭ" }, - ["ᆲ-ᄀ"] = { "ᆯᄁ", "lg", "lbg", "lk", "lpqk", "ɭk͈" }, - ["ᆲ-ᄁ"] = { "ᆯᄁ", "lkk", "lbkk", "lkk", "lpkk", "ɭk͈" }, - ["ᆲ-ᄂ"] = { "ᆯᄅ", "ll", "lbn", "ll", "lpn", "ɭɭ" }, - ["ᆲ-ᄃ"] = { "ᆯᄄ", "ld", "lbd", "lt", "lpqt", "ɭt͈" }, - ["ᆲ-ᄄ"] = { "ᆯᄄ", "ltt", "lbtt", "ltt", "lptt", "ɭt͈" }, - ["ᆲ-ᄅ"] = { "ᆯᄅ", "ll", "lbl", "ll", "lpl", "ɭɭ" }, - ["ᆲ-ᄆ"] = { "ᆯᄆ", "lm", "lbm", "lm", "lpm", "ɭm" }, - ["ᆲ-ᄇ"] = { "ᆯᄈ", "lb", "lbb", "lp", "lpqp", "ɭp͈" }, - ["ᆲ-ᄈ"] = { "ᆯᄈ", "lpp", "lbpp", "lpp", "lppp", "ɭp͈" }, - ["ᆲ-ᄉ"] = { "ᆯᄊ", "ls", "lbs", "ls", "lpqs", "ɭs͈" }, - ["ᆲ-ᄊ"] = { "ᆯᄊ", "lss", "lbss", "lss", "lpss", "ɭs͈" }, - ["ᆲ-ᄋ"] = { "ᆯᄇ", "lb", "lb-", "lb", "lp.", "ɭb" }, - ["ᆲ-ᄌ"] = { "ᆯᄍ", "lj", "lbj", "lch", "lpqc", "ɭt͡ɕ͈" }, - ["ᆲ-ᄍ"] = { "ᆯᄍ", "ljj", "lbjj", "ltch", "lpcc", "ɭt͡ɕ͈" }, - ["ᆲ-ᄎ"] = { "ᆯᄎ", "lch", "lbch", "lch'", "lpch", "ɭt͡ɕʰ" }, - ["ᆲ-ᄏ"] = { "ᆯᄏ", "lk", "lbk", "lk'", "lpkh", "ɭkʰ" }, - ["ᆲ-ᄐ"] = { "ᆯᄐ", "lt", "lbt", "lt'", "lpth", "ɭtʰ" }, - ["ᆲ-ᄑ"] = { "ᆯᄑ", "lp", "lbp", "lp'", "lpph", "ɭpʰ" }, - ["ᆲ-ᄒ"] = { "ᆯᄑ", "lph", "lb-h", "lp'", "lp.h", "ɭpʰ" }, - ["ᆳ-Ø"] = { "ᆯ", "l", "ls", "l", "ls", "ɭ" }, - ["ᆳ-ᄀ"] = { "ᆯᄀ", "lg", "lsg", "lg", "lsk", "ɭɡ" }, - ["ᆳ-ᄁ"] = { "ᆯᄁ", "lkk", "lskk", "lkk", "lskk", "ɭk͈" }, - ["ᆳ-ᄂ"] = { "ᆯᄂ", "ln", "lsn", "ln", "lsn", "ɭn" }, - ["ᆳ-ᄃ"] = { "ᆯᄃ", "ld", "lsd", "ld", "lst", "ɭd" }, - ["ᆳ-ᄄ"] = { "ᆯᄄ", "ltt", "lstt", "ltt", "lstt", "ɭt͈" }, - ["ᆳ-ᄅ"] = { "ᆯᄂ", "ln", "lsl", "ln", "lsl", "ɭn" }, - ["ᆳ-ᄆ"] = { "ᆯᄆ", "lm", "lsm", "lm", "lsm", "ɭm" }, - ["ᆳ-ᄇ"] = { "ᆯᄇ", "lb", "lsb", "lb", "lsp", "ɭb" }, - ["ᆳ-ᄈ"] = { "ᆯᄈ", "lpp", "lspp", "lpp", "lspp", "ɭp͈" }, - ["ᆳ-ᄉ"] = { "ᆯᄉ", "ls", "lss", "ls", "lss", "ɭsʰ" }, - ["ᆳ-ᄊ"] = { "ᆯᄊ", "lss", "lsss", "lss", "lsss", "ɭs͈" }, - ["ᆳ-ᄋ"] = { "ᆯᄊ", "ls", "ls-", "ls", "ls.", "ɭs͈" }, - ["ᆳ-ᄌ"] = { "ᆯᄌ", "lj", "lsj", "lj", "lsc", "ɭd͡ʑ" }, - ["ᆳ-ᄍ"] = { "ᆯᄍ", "ljj", "lsjj", "ltch", "lscc", "ɭt͡ɕ͈" }, - ["ᆳ-ᄎ"] = { "ᆯᄎ", "lch", "lsch", "lch'", "lsch", "ɭt͡ɕʰ" }, - ["ᆳ-ᄏ"] = { "ᆯᄏ", "lk", "lsk", "lk'", "lskh", "ɭkʰ" }, - ["ᆳ-ᄐ"] = { "ᆯᄐ", "lt", "lst", "lt'", "lsth", "ɭtʰ" }, - ["ᆳ-ᄑ"] = { "ᆯᄑ", "lp", "lsp", "lp'", "lsph", "ɭpʰ" }, - ["ᆳ-ᄒ"] = { "ᆯᄊ", "lh", "lsh", "lh", "lsh", "ɭs͈" }, - ["ᆴ-Ø"] = { "ᆯ", "l", "lt", "l", "lth", "ɭ" }, - ["ᆴ-ᄀ"] = { "ᆯᄁ", "lg", "ltg", "lk", "lthqk", "ɭk͈" }, - ["ᆴ-ᄁ"] = { "ᆯᄁ", "lkk", "ltkk", "lkk", "lthkk", "ɭk͈" }, - ["ᆴ-ᄂ"] = { "ᆯᄅ", "ll", "ltn", "ll", "lthn", "ɭɭ" }, - ["ᆴ-ᄃ"] = { "ᆯᄄ", "ld", "ltd", "lt", "lthqt", "ɭt͈" }, - ["ᆴ-ᄄ"] = { "ᆯᄄ", "ltt", "lttt", "ltt", "lthtt", "ɭt͈" }, - ["ᆴ-ᄅ"] = { "ᆯᄅ", "ll", "ltl", "ll", "lthl", "ɭɭ" }, - ["ᆴ-ᄆ"] = { "ᆯᄆ", "lm", "ltm", "lm", "lthm", "ɭm" }, - ["ᆴ-ᄇ"] = { "ᆯᄈ", "lb", "ltb", "lp", "lthqp", "ɭp͈" }, - ["ᆴ-ᄈ"] = { "ᆯᄈ", "lpp", "ltpp", "lpp", "lthpp", "ɭp͈" }, - ["ᆴ-ᄉ"] = { "ᆯᄊ", "ls", "lts", "ls", "lthqs", "ɭs͈" }, - ["ᆴ-ᄊ"] = { "ᆯᄊ", "lss", "ltss", "lss", "lthss", "ɭs͈" }, - ["ᆴ-ᄋ"] = { "ᆯᄐ", "lt", "lt-", "lt", "lth.", "ɭtʰ" }, - ["ᆴ-ᄌ"] = { "ᆯᄍ", "lj", "ltj", "lch", "lthqc", "ɭt͡ɕ͈" }, - ["ᆴ-ᄍ"] = { "ᆯᄍ", "ljj", "ltjj", "ltch", "lthcc", "ɭt͡ɕ͈" }, - ["ᆴ-ᄎ"] = { "ᆯᄎ", "lch", "ltch", "lch'", "lthch", "ɭt͡ɕʰ" }, - ["ᆴ-ᄏ"] = { "ᆯᄏ", "lk", "ltk", "lk'", "lthkh", "ɭkʰ" }, - ["ᆴ-ᄐ"] = { "ᆯᄐ", "lt", "ltt", "lt'", "lthth", "ɭtʰ" }, - ["ᆴ-ᄑ"] = { "ᆯᄑ", "lp", "ltp", "lp'", "lthph", "ɭpʰ" }, - ["ᆴ-ᄒ"] = { "ᆯᄐ", "lh", "lt-h", "lh", "lth.h", "ɭtʰ" }, - ["ᆵ-Ø"] = { "ᆸ", "p", "lp", "p", "lph", "p̚" }, - ["ᆵ-ᄀ"] = { "ᆸᄁ", "pg", "lpg", "pk", "lphqk", "p̚k͈" }, - ["ᆵ-ᄁ"] = { "ᆸᄁ", "pkk", "lpkk", "pkk", "lphkk", "p̚k͈" }, - ["ᆵ-ᄂ"] = { "ᆷᄂ", "mn", "lpn", "mn", "lphn", "mn" }, - ["ᆵ-ᄃ"] = { "ᆸᄄ", "pd", "lpd", "pt", "lphqt", "p̚t͈" }, - ["ᆵ-ᄄ"] = { "ᆸᄄ", "ptt", "lptt", "ptt", "lphtt", "p̚t͈" }, - ["ᆵ-ᄅ"] = { "ᆷᄂ", "mn", "lpl", "mn", "lphl", "mn" }, - ["ᆵ-ᄆ"] = { "ᆷᄆ", "mm", "lpm", "mm", "lphm", "mm" }, - ["ᆵ-ᄇ"] = { "ᆸᄈ", "pb", "lpb", "pp", "lphqp", "p̚p͈" }, - ["ᆵ-ᄈ"] = { "ᆸᄈ", "ppp", "lppp", "pp", "lphpp", "p̚p͈" }, - ["ᆵ-ᄉ"] = { "ᆸᄊ", "ps", "lps", "ps", "lphqs", "ps͈" }, - ["ᆵ-ᄊ"] = { "ᆸᄊ", "pss", "lpss", "pss", "lphss", "ps͈" }, - ["ᆵ-ᄋ"] = { "ᆯᄑ", "lp", "lp-", "lp", "lph.", "ɭpʰ" }, - ["ᆵ-ᄌ"] = { "ᆸᄍ", "pj", "lpj", "pch", "lphqc", "p̚t͡ɕ͈" }, - ["ᆵ-ᄍ"] = { "ᆸᄍ", "pjj", "lpjj", "ptch", "lphcc", "p̚t͡ɕ͈" }, - ["ᆵ-ᄎ"] = { "ᆸᄎ", "pch", "lpch", "pch'", "lphch", "p̚t͡ɕʰ" }, - ["ᆵ-ᄏ"] = { "ᆸᄏ", "pk", "lpk", "pk'", "lphkh", "p̚kʰ" }, - ["ᆵ-ᄐ"] = { "ᆸᄐ", "pt", "lpt", "pt'", "lphth", "p̚tʰ" }, - ["ᆵ-ᄑ"] = { "ᆸᄑ", "pp", "lpp", "pp'", "lphph", "p̚pʰ" }, - ["ᆵ-ᄒ"] = { "ᆯᄑ", "ph", "lp-h", "ph", "lph.h", "ɭpʰ" }, - ["ᆶ-Ø"] = { "ᆯ", "l", "lh", "l", "lh", "ɭ" }, - ["ᆶ-ᄀ"] = { "ᆯᄏ", "lk", "lhg", "lk'", "lhk", "ɭkʰ" }, - ["ᆶ-ᄁ"] = { "ᆯᄏ", "lk", "lhkk", "lk'", "lhkk", "ɭkʰ" }, - ["ᆶ-ᄂ"] = { "ᆯᄅ", "ll", "lhn", "ll", "lhn", "ɭɭ" }, - ["ᆶ-ᄃ"] = { "ᆯᄐ", "lt", "lhd", "lt'", "lht", "ɭtʰ" }, - ["ᆶ-ᄄ"] = { "ᆯᄐ", "lt", "lhtt", "lt'", "lhtt", "ɭtʰ" }, - ["ᆶ-ᄅ"] = { "ᆯᄅ", "ll", "lhl", "ll", "lhl", "ɭɭ" }, - ["ᆶ-ᄆ"] = { "ᆯᄆ", "lm", "lhm", "lm", "lhm", "ɭm" }, - ["ᆶ-ᄇ"] = { "ᆯᄑ", "lp", "lhb", "lp'", "lhp", "ɭpʰ" }, - ["ᆶ-ᄈ"] = { "ᆯᄑ", "lp", "lhpp", "lp'", "lhpp", "ɭpʰ" }, - ["ᆶ-ᄉ"] = { "ᆯᄊ", "ls", "lhs", "lss", "lhs", "ɭs͈" }, - ["ᆶ-ᄊ"] = { "ᆯᄊ", "lss", "lhss", "lss", "lhss", "ɭs͈" }, - ["ᆶ-ᄋ"] = { "ᄅ", "r", "lh-", "r", "lh.", "ɾ" }, - ["ᆶ-ᄌ"] = { "ᆯᄎ", "lch", "lhj", "lch'", "lhc", "ɭt͡ɕʰ" }, - ["ᆶ-ᄍ"] = { "ᆯᄎ", "lch", "lhjj", "lch'", "lhcc", "ɭt͡ɕʰ" }, - ["ᆶ-ᄎ"] = { "ᆯᄎ", "lch", "lhch", "lch'", "lhch", "ɭt͡ɕʰ" }, - ["ᆶ-ᄏ"] = { "ᆯᄏ", "lk", "lhk", "lk'", "lhkh", "ɭkʰ" }, - ["ᆶ-ᄐ"] = { "ᆯᄐ", "lt", "lht", "lt'", "lhth", "ɭtʰ" }, - ["ᆶ-ᄑ"] = { "ᆯᄑ", "lp", "lhp", "lp'", "lhph", "ɭpʰ" }, - ["ᆶ-ᄒ"] = { "ᆯᄒ", "lh", "lhh", "lh", "lhh", "ɾɦ" }, - ["ᆷ-Ø"] = { "ᆷ", "m", "m", "m", "m", "m" }, - ["ᆷ-ᄀ"] = { "ᆷᄀ", "mg", "mg", "mg", "mk", "mɡ" }, - ["ᆷ-ᄁ"] = { "ᆷᄁ", "mkk", "mkk", "mkk", "mkk", "mk͈" }, - ["ᆷ-ᄂ"] = { "ᆷᄂ", "mn", "mn", "mn", "mn", "mn" }, - ["ᆷ-ᄃ"] = { "ᆷᄃ", "md", "md", "md", "mt", "md" }, - ["ᆷ-ᄄ"] = { "ᆷᄄ", "mtt", "mtt", "mtt", "mtt", "mt͈" }, - ["ᆷ-ᄅ"] = { "ᆷᄂ", "mn", "ml", "mn", "ml", "mn" }, - ["ᆷ-ᄆ"] = { "ᆷᄆ", "mm", "mm", "mm", "mm", "mm" }, - ["ᆷ-ᄇ"] = { "ᆷᄇ", "mb", "mb", "mb", "mp", "mb" }, - ["ᆷ-ᄈ"] = { "ᆷᄈ", "mpp", "mpp", "mpp", "mpp", "mp͈" }, - ["ᆷ-ᄉ"] = { "ᆷᄉ", "ms", "ms", "ms", "ms", "msʰ" }, - ["ᆷ-ᄊ"] = { "ᆷᄊ", "mss", "mss", "mss", "mss", "ms͈" }, - ["ᆷ-ᄋ"] = { "ᄆ", "m", "m-", "m", "m.", "m" }, - ["ᆷ-ᄌ"] = { "ᆷᄌ", "mj", "mj", "mj", "mc", "md͡ʑ" }, - ["ᆷ-ᄍ"] = { "ᆷᄍ", "mjj", "mjj", "mtch", "mcc", "mt͡ɕ͈" }, - ["ᆷ-ᄎ"] = { "ᆷᄎ", "mch", "mch", "mch'", "mch", "mt͡ɕʰ" }, - ["ᆷ-ᄏ"] = { "ᆷᄏ", "mk", "mk", "mk'", "mkh", "mkʰ" }, - ["ᆷ-ᄐ"] = { "ᆷᄐ", "mt", "mt", "mt'", "mth", "mtʰ" }, - ["ᆷ-ᄑ"] = { "ᆷᄑ", "mp", "mp", "mp'", "mph", "mpʰ" }, - ["ᆷ-ᄒ"] = { "ᆷᄒ", "mh", "mh", "mh", "mh", "mɦ" }, - ["ᆸ-Ø"] = { "ᆸ", "p", "b", "p", "p", "p̚" }, - ["ᆸ-ᄀ"] = { "ᆸᄁ", "pg", "bg", "pk", "pqk", "p̚k͈" }, - ["ᆸ-ᄁ"] = { "ᆸᄁ", "pkk", "bkk", "pkk", "pkk", "p̚k͈" }, - ["ᆸ-ᄂ"] = { "ᆷᄂ", "mn", "bn", "mn", "pn", "mn" }, - ["ᆸ-ᄃ"] = { "ᆸᄄ", "pd", "bd", "pt", "pqt", "p̚t͈" }, - ["ᆸ-ᄄ"] = { "ᆸᄄ", "ptt", "btt", "ptt", "ptt", "p̚t͈" }, - ["ᆸ-ᄅ"] = { "ᆷᄂ", "mn", "bl", "mn", "pl", "mn" }, - ["ᆸ-ᄆ"] = { "ᆷᄆ", "mm", "bm", "mm", "pm", "mm" }, - ["ᆸ-ᄇ"] = { "ᆸᄈ", "pb", "bb", "pp", "pqp", "p̚p͈" }, - ["ᆸ-ᄈ"] = { "ᆸᄈ", "ppp", "bpp", "pp", "ppp", "p̚p͈" }, - ["ᆸ-ᄉ"] = { "ᆸᄊ", "ps", "bs", "ps", "pqs", "ps͈" }, - ["ᆸ-ᄊ"] = { "ᆸᄊ", "pss", "bss", "pss", "pss", "ps͈" }, - ["ᆸ-ᄋ"] = { "ᄇ", "b", "b-", "b", "p.", "b" }, - ["ᆸ-ᄌ"] = { "ᆸᄍ", "pj", "bj", "pch", "pqc", "p̚t͡ɕ͈" }, - ["ᆸ-ᄍ"] = { "ᆸᄍ", "pjj", "bjj", "ptch", "pcc", "p̚t͡ɕ͈" }, - ["ᆸ-ᄎ"] = { "ᆸᄎ", "pch", "bch", "pch'", "pch", "p̚t͡ɕʰ" }, - ["ᆸ-ᄏ"] = { "ᆸᄏ", "pk", "bk", "pk'", "pkh", "p̚kʰ" }, - ["ᆸ-ᄐ"] = { "ᆸᄐ", "pt", "bt", "pt'", "pth", "p̚tʰ" }, - ["ᆸ-ᄑ"] = { "ᆸᄑ", "pp", "bp", "pp'", "pph", "p̚pʰ" }, - ["ᆸ-ᄒ"] = { "ᄑ", "ph", "b-h", "ph", "p.h", "pʰ" }, - ["ᆹ-Ø"] = { "ᆸ", "p", "bs", "p", "ps", "p̚" }, - ["ᆹ-ᄀ"] = { "ᆸᄁ", "pg", "bsg", "pk", "psqk", "p̚k͈" }, - ["ᆹ-ᄁ"] = { "ᆸᄁ", "pkk", "bskk", "pkk", "pskk", "p̚k͈" }, - ["ᆹ-ᄂ"] = { "ᆷᄂ", "mn", "bsn", "mn", "psn", "mn" }, - ["ᆹ-ᄃ"] = { "ᆸᄄ", "pd", "bsd", "pt", "psqt", "p̚t͈" }, - ["ᆹ-ᄄ"] = { "ᆸᄄ", "ptt", "bstt", "ptt", "pstt", "p̚t͈" }, - ["ᆹ-ᄅ"] = { "ᆷᄂ", "mn", "bsl", "mn", "psl", "mn" }, - ["ᆹ-ᄆ"] = { "ᆷᄆ", "mm", "bsm", "mm", "psm", "mm" }, - ["ᆹ-ᄇ"] = { "ᆸᄈ", "pb", "bsb", "pp", "psqp", "p̚p͈" }, - ["ᆹ-ᄈ"] = { "ᆸᄈ", "ppp", "bspp", "pp", "pspp", "p̚p͈" }, - ["ᆹ-ᄉ"] = { "ᆸᄊ", "ps", "bss", "ps", "psqs", "ps͈" }, - ["ᆹ-ᄊ"] = { "ᆸᄊ", "pss", "bsss", "pss", "psss", "ps͈" }, - ["ᆹ-ᄋ"] = { "ᆸᄊ", "ps", "bs-", "ps", "ps.", "ps͈" }, - ["ᆹ-ᄌ"] = { "ᆸᄍ", "pj", "bsj", "pch", "psqc", "p̚t͡ɕ͈" }, - ["ᆹ-ᄍ"] = { "ᆸᄍ", "pjj", "bsjj", "ptch", "pscc", "p̚t͡ɕ͈" }, - ["ᆹ-ᄎ"] = { "ᆸᄎ", "pch", "bsch", "pch'", "psch", "p̚t͡ɕʰ" }, - ["ᆹ-ᄏ"] = { "ᆸᄏ", "pk", "bsk", "pk'", "pskh", "p̚kʰ" }, - ["ᆹ-ᄐ"] = { "ᆸᄐ", "pt", "bst", "pt'", "psth", "p̚tʰ" }, - ["ᆹ-ᄑ"] = { "ᆸᄑ", "pp", "bsp", "pp'", "psph", "p̚pʰ" }, - ["ᆹ-ᄒ"] = { "ᄑ", "ph", "bsh", "ph", "psh", "pʰ" }, - ["ᆺ-Ø"] = { "ᆮ", "t", "s", "t", "s", "t̚" }, - ["ᆺ-ᄀ"] = { "ᆮᄁ", "tg", "sg", "tk", "sqk", "t̚k͈" }, - ["ᆺ-ᄁ"] = { "ᆮᄁ", "tkk", "skk", "tkk", "skk", "t̚k͈" }, - ["ᆺ-ᄂ"] = { "ᆫᄂ", "nn", "sn", "nn", "sn", "nn" }, - ["ᆺ-ᄃ"] = { "ᆮᄄ", "td", "sd", "tt", "sqt", "t̚t͈" }, - ["ᆺ-ᄄ"] = { "ᆮᄄ", "ttt", "stt", "tt", "stt", "t̚t͈" }, - ["ᆺ-ᄅ"] = { "ᆫᄂ", "nn", "sl", "nn", "sl", "nn" }, - ["ᆺ-ᄆ"] = { "ᆫᄆ", "nm", "sm", "nm", "sm", "nm" }, - ["ᆺ-ᄇ"] = { "ᆮᄈ", "tb", "sb", "tp", "sqp", "t̚p͈" }, - ["ᆺ-ᄈ"] = { "ᆮᄈ", "tpp", "spp", "tpp", "spp", "t̚p͈" }, - ["ᆺ-ᄉ"] = { "ᆮᄊ", "ts", "ss", "ss", "sqs", "t̚s͈" }, - ["ᆺ-ᄊ"] = { "ᆮᄊ", "tss", "sss", "ss", "sss", "t̚s͈" }, - ["ᆺ-ᄋ"] = { "ᄉ", "s", "s-", "s", "s.", "sʰ" }, - ["ᆺ-ᄌ"] = { "ᆮᄍ", "tj", "sj", "tch", "sqc", "t̚t͡ɕ͈" }, - ["ᆺ-ᄍ"] = { "ᆮᄍ", "tjj", "sjj", "tch", "scc", "t̚t͡ɕ͈" }, - ["ᆺ-ᄎ"] = { "ᆮᄎ", "tch", "sch", "tch'", "sch", "t̚t͡ɕʰ" }, - ["ᆺ-ᄏ"] = { "ᆮᄏ", "tk", "sk", "tk'", "skh", "t̚kʰ" }, - ["ᆺ-ᄐ"] = { "ᆮᄐ", "tt", "st", "tt'", "sth", "t̚tʰ" }, - ["ᆺ-ᄑ"] = { "ᆮᄑ", "tp", "sp", "tp'", "sph", "t̚pʰ" }, - ["ᆺ-ᄒ"] = { "ᄐ", "th", "sh", "th", "sh", "tʰ" }, - ["ᆻ-Ø"] = { "ᆮ", "t", "ss", "t", "ss", "t̚" }, - ["ᆻ-ᄀ"] = { "ᆮᄁ", "tg", "ssg", "tk", "ssqk", "t̚k͈" }, - ["ᆻ-ᄁ"] = { "ᆮᄁ", "tkk", "sskk", "tkk", "sskk", "t̚k͈" }, - ["ᆻ-ᄂ"] = { "ᆫᄂ", "nn", "ssn", "nn", "ssn", "nn" }, - ["ᆻ-ᄃ"] = { "ᆮᄄ", "td", "ssd", "tt", "ssqt", "t̚t͈" }, - ["ᆻ-ᄄ"] = { "ᆮᄄ", "ttt", "sstt", "tt", "sstt", "t̚t͈" }, - ["ᆻ-ᄅ"] = { "ᆫᄂ", "nn", "ssl", "nn", "ssl", "nn" }, - ["ᆻ-ᄆ"] = { "ᆫᄆ", "nm", "ssm", "nm", "ssm", "nm" }, - ["ᆻ-ᄇ"] = { "ᆮᄈ", "tb", "ssb", "tp", "ssqp", "t̚p͈" }, - ["ᆻ-ᄈ"] = { "ᆮᄈ", "tpp", "sspp", "tpp", "sspp", "t̚p͈" }, - ["ᆻ-ᄉ"] = { "ᆮᄊ", "ts", "sss", "ss", "ssqs", "t̚s͈" }, - ["ᆻ-ᄊ"] = { "ᆮᄊ", "tss", "ssss", "ss", "ssss", "t̚s͈" }, - ["ᆻ-ᄋ"] = { "ᄊ", "ss", "ss-", "ss", "ss.", "s͈" }, - ["ᆻ-ᄌ"] = { "ᆮᄍ", "tj", "ssj", "tch", "ssqc", "t̚t͡ɕ͈" }, - ["ᆻ-ᄍ"] = { "ᆮᄍ", "tjj", "ssjj", "tch", "sscc", "t̚t͡ɕ͈" }, - ["ᆻ-ᄎ"] = { "ᆮᄎ", "tch", "ssch", "tch'", "ssch", "t̚t͡ɕʰ" }, - ["ᆻ-ᄏ"] = { "ᆮᄏ", "tk", "ssk", "tk'", "sskh", "t̚kʰ" }, - ["ᆻ-ᄐ"] = { "ᆮᄐ", "tt", "sst", "tt'", "ssth", "t̚tʰ" }, - ["ᆻ-ᄑ"] = { "ᆮᄑ", "tp", "ssp", "tp'", "ssph", "t̚pʰ" }, - ["ᆻ-ᄒ"] = { "ᄐ", "th", "ss-h", "th", "ss.h", "tʰ" }, - ["ᆼ-Ø"] = { "ᆼ", "ng", "ng", "ng", "ng", "ŋ" }, - ["ᆼ-ᄀ"] = { "ᆼᄀ", "ngg", "ngg", "ngg", "ngk", "ŋɡ" }, - ["ᆼ-ᄁ"] = { "ᆼᄁ", "ngkk", "ngkk", "ngkk", "ngkk", "ŋk͈" }, - ["ᆼ-ᄂ"] = { "ᆼᄂ", "ngn", "ngn", "ngn", "ngn", "ŋn" }, - ["ᆼ-ᄃ"] = { "ᆼᄃ", "ngd", "ngd", "ngd", "ngt", "ŋd" }, - ["ᆼ-ᄄ"] = { "ᆼᄄ", "ngtt", "ngtt", "ngtt", "ngtt", "ŋt͈" }, - ["ᆼ-ᄅ"] = { "ᆼᄂ", "ngn", "ngl", "ngn", "ngl", "ŋn" }, - ["ᆼ-ᄆ"] = { "ᆼᄆ", "ngm", "ngm", "ngm", "ngm", "ŋm" }, - ["ᆼ-ᄇ"] = { "ᆼᄇ", "ngb", "ngb", "ngb", "ngp", "ŋb" }, - ["ᆼ-ᄈ"] = { "ᆼᄈ", "ngpp", "ngpp", "ngpp", "ngpp", "ŋp͈" }, - ["ᆼ-ᄉ"] = { "ᆼᄉ", "ngs", "ngs", "ngs", "ngs", "ŋsʰ" }, - ["ᆼ-ᄊ"] = { "ᆼᄊ", "ngss", "ngss", "ngss", "ngss", "ŋs͈" }, - ["ᆼ-ᄋ"] = { "ᆼᄋ", "ng-", "ng-", "ng", "ng.", "ŋ" }, - ["ᆼ-ᄌ"] = { "ᆼᄌ", "ngj", "ngj", "ngj", "ngc", "ŋd͡ʑ" }, - ["ᆼ-ᄍ"] = { "ᆼᄍ", "ngjj", "ngjj", "ngtch", "ngcc", "ŋt͡ɕ͈" }, - ["ᆼ-ᄎ"] = { "ᆼᄎ", "ngch", "ngch", "ngch'", "ngch", "ŋt͡ɕʰ" }, - ["ᆼ-ᄏ"] = { "ᆼᄏ", "ngk", "ngk", "ngk'", "ngkh", "ŋkʰ" }, - ["ᆼ-ᄐ"] = { "ᆼᄐ", "ngt", "ngt", "ngt'", "ngth", "ŋtʰ" }, - ["ᆼ-ᄑ"] = { "ᆼᄑ", "ngp", "ngp", "ngp'", "ngph", "ŋpʰ" }, - ["ᆼ-ᄒ"] = { "ᆼᄒ", "ngh", "ngh", "ngh", "ngh", "ŋɦ" }, - ["ᆽ-Ø"] = { "ᆮ", "t", "j", "t", "c", "t̚" }, - ["ᆽ-ᄀ"] = { "ᆮᄁ", "tg", "jg", "tk", "cqk", "t̚k͈" }, - ["ᆽ-ᄁ"] = { "ᆮᄁ", "tkk", "jkk", "tkk", "ckk", "t̚k͈" }, - ["ᆽ-ᄂ"] = { "ᆫᄂ", "nn", "jn", "nn", "cn", "nn" }, - ["ᆽ-ᄃ"] = { "ᆮᄄ", "td", "jd", "tt", "cqt", "t̚t͈" }, - ["ᆽ-ᄄ"] = { "ᆮᄄ", "ttt", "jtt", "tt", "ctt", "t̚t͈" }, - ["ᆽ-ᄅ"] = { "ᆫᄂ", "nn", "jl", "nn", "cl", "nn" }, - ["ᆽ-ᄆ"] = { "ᆫᄆ", "nm", "jm", "nm", "cm", "nm" }, - ["ᆽ-ᄇ"] = { "ᆮᄈ", "tb", "jb", "tp", "cqp", "t̚p͈" }, - ["ᆽ-ᄈ"] = { "ᆮᄈ", "tpp", "jpp", "tpp", "cpp", "t̚p͈" }, - ["ᆽ-ᄉ"] = { "ᆮᄊ", "ts", "js", "ss", "cqs", "t̚s͈" }, - ["ᆽ-ᄊ"] = { "ᆮᄊ", "tss", "jss", "ss", "css", "t̚s͈" }, - ["ᆽ-ᄋ"] = { "ᄌ", "j", "j-", "j", "c.", "d͡ʑ" }, - ["ᆽ-ᄌ"] = { "ᆮᄍ", "tj", "jj", "tch", "cqc", "t̚t͡ɕ͈" }, - ["ᆽ-ᄍ"] = { "ᆮᄍ", "tjj", "jjj", "tch", "ccc", "t̚t͡ɕ͈" }, - ["ᆽ-ᄎ"] = { "ᆮᄎ", "tch", "jch", "tch'", "cch", "t̚t͡ɕʰ" }, - ["ᆽ-ᄏ"] = { "ᆮᄏ", "tk", "jk", "tk'", "ckh", "t̚kʰ" }, - ["ᆽ-ᄐ"] = { "ᆮᄐ", "tt", "jt", "tt'", "cth", "t̚tʰ" }, - ["ᆽ-ᄑ"] = { "ᆮᄑ", "tp", "jp", "tp'", "cph", "t̚pʰ" }, - ["ᆽ-ᄒ"] = { "ᄎ", "ch", "j-h", "ch'", "c.h", "t͡ɕʰ" }, - ["ᆾ-Ø"] = { "ᆮ", "t", "ch", "t", "ch", "t̚" }, - ["ᆾ-ᄀ"] = { "ᆮᄁ", "tg", "chg", "tk", "chqk", "t̚k͈" }, - ["ᆾ-ᄁ"] = { "ᆮᄁ", "tkk", "chkk", "tkk", "chkk", "t̚k͈" }, - ["ᆾ-ᄂ"] = { "ᆫᄂ", "nn", "chn", "nn", "chn", "nn" }, - ["ᆾ-ᄃ"] = { "ᆮᄄ", "td", "chd", "tt", "chqt", "t̚t͈" }, - ["ᆾ-ᄄ"] = { "ᆮᄄ", "ttt", "chtt", "tt", "chtt", "t̚t͈" }, - ["ᆾ-ᄅ"] = { "ᆫᄂ", "nn", "chl", "nn", "chl", "nn" }, - ["ᆾ-ᄆ"] = { "ᆫᄆ", "nm", "chm", "nm", "chm", "nm" }, - ["ᆾ-ᄇ"] = { "ᆮᄈ", "tb", "chb", "tp", "chqp", "t̚p͈" }, - ["ᆾ-ᄈ"] = { "ᆮᄈ", "tpp", "chpp", "tpp", "chpp", "t̚p͈" }, - ["ᆾ-ᄉ"] = { "ᆮᄊ", "ts", "chs", "ss", "chqs", "t̚s͈" }, - ["ᆾ-ᄊ"] = { "ᆮᄊ", "tss", "chss", "ss", "chss", "t̚s͈" }, - ["ᆾ-ᄋ"] = { "ᄎ", "ch", "ch-", "ch", "ch.", "t͡ɕʰ" }, - ["ᆾ-ᄌ"] = { "ᆮᄍ", "tj", "chj", "tch", "chqc", "t̚t͡ɕ͈" }, - ["ᆾ-ᄍ"] = { "ᆮᄍ", "tjj", "chjj", "tch", "chcc", "t̚t͡ɕ͈" }, - ["ᆾ-ᄎ"] = { "ᆮᄎ", "tch", "chch", "tch'", "chch", "t̚t͡ɕʰ" }, - ["ᆾ-ᄏ"] = { "ᆮᄏ", "tk", "chk", "tk'", "chkh", "t̚kʰ" }, - ["ᆾ-ᄐ"] = { "ᆮᄐ", "tt", "cht", "tt'", "chth", "t̚tʰ" }, - ["ᆾ-ᄑ"] = { "ᆮᄑ", "tp", "chp", "tp'", "chph", "t̚pʰ" }, - ["ᆾ-ᄒ"] = { "ᄎ", "th", "ch-h", "th", "ch.h", "t͡ɕʰ" }, - ["ᆿ-Ø"] = { "ᆨ", "k", "k", "k", "kh", "k̚" }, - ["ᆿ-ᄀ"] = { "ᆨᄁ", "kg", "kg", "kk", "khqk", "k̚k͈" }, - ["ᆿ-ᄁ"] = { "ᆨᄁ", "kkk", "kkk", "kk", "khkk", "k̚k͈" }, - ["ᆿ-ᄂ"] = { "ᆼᄂ", "ngn", "kn", "ngn", "khn", "ŋn" }, - ["ᆿ-ᄃ"] = { "ᆨᄄ", "kd", "kd", "kt", "khqt", "k̚t͈" }, - ["ᆿ-ᄄ"] = { "ᆨᄄ", "ktt", "ktt", "ktt", "khtt", "k̚t͈" }, - ["ᆿ-ᄅ"] = { "ᆼᄂ", "ngn", "kl", "ngn", "khl", "ŋn" }, - ["ᆿ-ᄆ"] = { "ᆼᄆ", "ngm", "km", "ngm", "khm", "ŋm" }, - ["ᆿ-ᄇ"] = { "ᆨᄈ", "kb", "kb", "kp", "khqp", "k̚p͈" }, - ["ᆿ-ᄈ"] = { "ᆨᄈ", "kpp", "kpp", "kpp", "khpp", "k̚p͈" }, - ["ᆿ-ᄉ"] = { "ᆨᄊ", "ks", "ks", "ks", "khqs", "ks͈" }, - ["ᆿ-ᄊ"] = { "ᆨᄊ", "kss", "kss", "kss", "khss", "ks͈" }, - ["ᆿ-ᄋ"] = { "ᄏ", "k", "k-", "k'", "kh.", "kʰ" }, - ["ᆿ-ᄌ"] = { "ᆨᄍ", "kj", "kj", "kch", "khqc", "k̚t͡ɕ͈" }, - ["ᆿ-ᄍ"] = { "ᆨᄍ", "kjj", "kjj", "ktch", "khcc", "k̚t͡ɕ͈" }, - ["ᆿ-ᄎ"] = { "ᆨᄎ", "kch", "kch", "kch'", "khch", "k̚t͡ɕʰ" }, - ["ᆿ-ᄏ"] = { "ᆨᄏ", "kk", "kk", "kk'", "khkh", "k̚kʰ" }, - ["ᆿ-ᄐ"] = { "ᆨᄐ", "kt", "kt", "kt'", "khth", "k̚tʰ" }, - ["ᆿ-ᄑ"] = { "ᆨᄑ", "kp", "kp", "kp'", "khph", "k̚pʰ" }, - ["ᆿ-ᄒ"] = { "ᄏ", "kh", "k-h", "kh", "kh.h", "kʰ" }, - ["ᇀ-Ø"] = { "ᆮ", "t", "t", "t", "th", "t̚" }, - ["ᇀ-ᄀ"] = { "ᆮᄁ", "tg", "tg", "tk", "thqk", "t̚k͈" }, - ["ᇀ-ᄁ"] = { "ᆮᄁ", "tkk", "tkk", "tkk", "thkk", "t̚k͈" }, - ["ᇀ-ᄂ"] = { "ᆫᄂ", "nn", "tn", "nn", "thn", "nn" }, - ["ᇀ-ᄃ"] = { "ᆮᄄ", "td", "td", "tt", "thqt", "t̚t͈" }, - ["ᇀ-ᄄ"] = { "ᆮᄄ", "ttt", "ttt", "tt", "thtt", "t̚t͈" }, - ["ᇀ-ᄅ"] = { "ᆫᄂ", "nn", "tl", "nn", "thl", "nn" }, - ["ᇀ-ᄆ"] = { "ᆫᄆ", "nm", "tm", "nm", "thm", "nm" }, - ["ᇀ-ᄇ"] = { "ᆮᄈ", "tb", "tb", "tp", "thqp", "t̚p͈" }, - ["ᇀ-ᄈ"] = { "ᆮᄈ", "tpp", "tpp", "tpp", "thpp", "t̚p͈" }, - ["ᇀ-ᄉ"] = { "ᆮᄊ", "ts", "ts", "ss", "thqs", "t̚s͈" }, - ["ᇀ-ᄊ"] = { "ᆮᄊ", "tss", "tss", "ss", "thss", "t̚s͈" }, - ["ᇀ-ᄋ"] = { "ᄐ", "t", "t-", "t'", "th.", "tʰ" }, - ["ᇀ-ᄌ"] = { "ᆮᄍ", "tj", "tj", "tch", "thqc", "t̚t͡ɕ͈" }, - ["ᇀ-ᄍ"] = { "ᆮᄍ", "tjj", "tjj", "tch", "thcc", "t̚t͡ɕ͈" }, - ["ᇀ-ᄎ"] = { "ᆮᄎ", "tch", "tch", "tch'", "thch", "t̚t͡ɕʰ" }, - ["ᇀ-ᄏ"] = { "ᆮᄏ", "tk", "tk", "tk'", "thkh", "t̚kʰ" }, - ["ᇀ-ᄐ"] = { "ᆮᄐ", "tt", "tt", "tt'", "thth", "t̚tʰ" }, - ["ᇀ-ᄑ"] = { "ᆮᄑ", "tp", "tp", "tp'", "thph", "t̚pʰ" }, - ["ᇀ-ᄒ"] = { "ᄐ", "th", "t-h", "th", "th.h", "tʰ" }, - ["ᇁ-Ø"] = { "ᆸ", "p", "p", "p", "ph", "p̚" }, - ["ᇁ-ᄀ"] = { "ᆸᄁ", "pg", "pg", "pk", "phqk", "p̚k͈" }, - ["ᇁ-ᄁ"] = { "ᆸᄁ", "pkk", "pkk", "pkk", "phkk", "p̚k͈" }, - ["ᇁ-ᄂ"] = { "ᆷᄂ", "mn", "pn", "mn", "phn", "mn" }, - ["ᇁ-ᄃ"] = { "ᆸᄄ", "pd", "pd", "pt", "phqt", "p̚t͈" }, - ["ᇁ-ᄄ"] = { "ᆸᄄ", "ptt", "ptt", "ptt", "phtt", "p̚t͈" }, - ["ᇁ-ᄅ"] = { "ᆷᄂ", "mn", "pl", "mn", "phl", "mn" }, - ["ᇁ-ᄆ"] = { "ᆷᄆ", "mm", "pm", "mm", "phm", "mm" }, - ["ᇁ-ᄇ"] = { "ᆸᄈ", "pb", "pb", "pp", "phqp", "p̚p͈" }, - ["ᇁ-ᄈ"] = { "ᆸᄈ", "ppp", "ppp", "pp", "phpp", "p̚p͈" }, - ["ᇁ-ᄉ"] = { "ᆸᄊ", "ps", "ps", "ps", "phqs", "ps͈" }, - ["ᇁ-ᄊ"] = { "ᆸᄊ", "pss", "pss", "pss", "phss", "ps͈" }, - ["ᇁ-ᄋ"] = { "ᄑ", "p", "p-", "p'", "ph.", "pʰ" }, - ["ᇁ-ᄌ"] = { "ᆸᄍ", "pj", "pj", "pch", "phqc", "p̚t͡ɕ͈" }, - ["ᇁ-ᄍ"] = { "ᆸᄍ", "pjj", "pjj", "ptch", "phcc", "p̚t͡ɕ͈" }, - ["ᇁ-ᄎ"] = { "ᆸᄎ", "pch", "pch", "pch'", "phch", "p̚t͡ɕʰ" }, - ["ᇁ-ᄏ"] = { "ᆸᄏ", "pk", "pk", "pk'", "phkh", "p̚kʰ" }, - ["ᇁ-ᄐ"] = { "ᆸᄐ", "pt", "pt", "pt'", "phth", "p̚tʰ" }, - ["ᇁ-ᄑ"] = { "ᆸᄑ", "pp", "pp", "pp'", "phph", "p̚pʰ" }, - ["ᇁ-ᄒ"] = { "ᄑ", "ph", "p-h", "ph", "ph.h", "pʰ" }, - ["ᇂ-Ø"] = { "ᆮ", "", "h", "", "h", "t̚" }, - ["ᇂ-ᄀ"] = { "ᄏ", "k", "hg", "k'", "hk", "kʰ" }, - ["ᇂ-ᄁ"] = { "ᄏ", "k", "hkk", "k'", "hkk", "kʰ" }, - ["ᇂ-ᄂ"] = { "ᆫᄂ", "nn", "hn", "nn", "hn", "nn" }, - ["ᇂ-ᄃ"] = { "ᄐ", "t", "hd", "t'", "ht", "tʰ" }, - ["ᇂ-ᄄ"] = { "ᄐ", "t", "htt", "t'", "htt", "tʰ" }, - ["ᇂ-ᄅ"] = { "ᆫᄂ", "nn", "hl", "nn", "hl", "nn" }, - ["ᇂ-ᄆ"] = { "ᆷᄆ", "nm", "hm", "nm", "hm", "mm" }, - ["ᇂ-ᄇ"] = { "ᄑ", "p", "hb", "p'", "hp", "pʰ" }, - ["ᇂ-ᄈ"] = { "ᄑ", "p", "hpp", "p'", "hpp", "pʰ" }, - ["ᇂ-ᄉ"] = { "ᄊ", "s", "hs", "ss", "hs", "s͈" }, - ["ᇂ-ᄊ"] = { "ᆮᄊ", "ss", "hss", "ss", "hss", "t̚s͈" }, - ["ᇂ-ᄋ"] = { "ᄋ", "", "h-", "", "h.", "" }, - ["ᇂ-ᄌ"] = { "ᄎ", "ch", "hj", "ch'", "hc", "t͡ɕʰ" }, - ["ᇂ-ᄍ"] = { "ᄎ", "ch", "hjj", "ch'", "hcc", "t͡ɕʰ" }, - ["ᇂ-ᄎ"] = { "ᆮᄎ", "tch", "hch", "tch'", "hch", "t̚t͡ɕʰ" }, - ["ᇂ-ᄏ"] = { "ᆮᄏ", "k", "hk", "k'", "hkh", "t̚kʰ" }, - ["ᇂ-ᄐ"] = { "ᆮᄐ", "t", "ht", "t'", "hth", "t̚tʰ" }, - ["ᇂ-ᄑ"] = { "ᆮᄑ", "p", "hp", "p'", "hph", "t̚pʰ" }, - ["ᇂ-ᄒ"] = { "ᄐ", "h", "h-h", "h", "h.h", "tʰ" }, -} - -return export \ No newline at end of file diff --git a/wikt/translit/dlg-translit.lua b/wikt/translit/dlg-translit.lua deleted file mode 100644 index 4efd44c..0000000 --- a/wikt/translit/dlg-translit.lua +++ /dev/null @@ -1,44 +0,0 @@ --- This module will transliterate Dolgan language text per WT:DLG TR. --- Language code: dlg - -local export = {} - -local tab = { - ["А"]='A', ["а"]='a', ["Б"]='B', ["б"]='b', ["Г"]='G', ["г"]='g', ["Ҕ"]='Ğ', ["ҕ"]='ğ', - ["Д"]='D', ["д"]='d', ["И"]='İ', ["и"]='i', ["Й"]='Y', ["й"]='y', ["К"]='K', ["к"]='k', - ["Л"]='L', ["л"]='l', ["М"]='M', ["м"]='m', ["Н"]='N', ["н"]='n', ["Ҥ"]='Ñ', ["ҥ"]='ñ', - ["О"]='O', ["о"]='o', ["Ө"]='Ö', ["ө"]='ö', ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r', - ["С"]='S', ["с"]='s', ["Һ"]='H', ["һ"]='h', ["Т"]='T', ["т"]='t', ["У"]='U', ["у"]='u', - ["Ү"]='Ü', ["ү"]='ü', ["Х"]='X', ["х"]='x', ["Ч"]='Ç', ["ч"]='ç', ["Ш"]='Ś', ["ш"]='ś', - ["Ы"]='I', ["ы"]='ı', ["Э"]='E', ["э"]='e', - -- non-native letters - ["В"]='V', ["в"]='v', ["Е"]='E', ["е"]='e', ["Ё"]='Yo', ["ё"]='yo', ["Ж"]='J', ["ж"]='j', - ["З"]='Z', ["з"]='z', ["Ф"]='F', ["ф"]='f', ["Ц"]='Ts', ["ц"]='ts', ["Щ"]='Şç', ["щ"]='şç', - ['Ъ']='ʺ', ['ъ']='ʺ', ["Ь"]="’", ["ь"]="’", ["Ю"]='Yu', ["ю"]='yu', ["Я"]='Ya', ["я"]='ya', -} - -local digraphs = { - ['Дь'] = 'C', - ['дь'] = 'c', - ['Нь'] = 'Ń', - ['нь'] = 'ń', -} - -function export.tr(text, lang, sc) - local str_gsub = string.gsub - for digraph, repl in ipairs(digraphs) do - text = str_gsub(text, digraph, repl) - end - - -- е after a vowel or at the beginning of a word becomes ye - text = mw.ustring.gsub(text, "([АОӨУҮЫЕЯЁЮИЕЪЬаоөуүыэяёюиеъь%A][́̀]?)е","%1je") - text = str_gsub(text, "^Е","Ye") - text = str_gsub(text, "^е","ye") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е","%1Ye") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е","%1ye") - - -- pattern for non-ASCII UTF-8 characters - return (str_gsub(text, '[\194-\244][\128-\191]+', tab)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/dng-translit.lua b/wikt/translit/dng-translit.lua deleted file mode 100644 index 7aeeea8..0000000 --- a/wikt/translit/dng-translit.lua +++ /dev/null @@ -1,52 +0,0 @@ --- This module will transliterate Dungan language text. - -local export = {} - -local tt = { - ['Б'] = 'B', ['П'] = 'P', ['М'] = 'M', ['Ф'] = 'F', ['В'] = 'V', - ['Д'] = 'D', ['Т'] = 'T', ['Н'] = 'N', ['Л'] = 'L', - ['З'] = 'Z', ['Ц'] = 'C', ['С'] = 'S', - ['Җ'] = 'Ž', ['Ч'] = 'Č', ['Ш'] = 'Š', ['Ж'] = 'Ƶ', - --[[ ↑ ]] --[[ ↑ ]] ['Щ'] = 'X', - ['Г'] = 'G', ['К'] = 'K', ['Ң'] = 'Ŋ', ['Х'] = 'H', - ['Р'] = 'R', - - ['Ы'] = 'Ь', ['Й'] = 'J', ['Ў'] = 'W', ['Ү'] = 'Y', - --[[ ↑ ]] - ['А'] = 'A', ['Я'] = 'Ia', - ['Ә'] = 'Ə', ['Е'] = 'Iə', - ['Э'] = 'E', - ['О'] = 'O', ['Ё'] = 'Io', - ['У'] = 'U', ['Ю'] = 'Iu', - - ['И'] = 'I', - - ['б'] = 'b', ['п'] = 'p', ['м'] = 'm', ['ф'] = 'f', ['в'] = 'v', - ['д'] = 'd', ['т'] = 't', ['н'] = 'n', ['л'] = 'l', - ['з'] = 'z', ['ц'] = 'c', ['с'] = 's', - ['җ'] = 'ž', ['ч'] = 'č', ['ш'] = 'š', ['ж'] = 'ƶ', - --[[ ↑ ]] --[[ ↑ ]] ['щ'] = 'x', - ['г'] = 'g', ['к'] = 'k', ['ң'] = 'ŋ', ['х'] = 'h', - ['р'] = 'r', - - ['ы'] = 'ь', ['й'] = 'j', ['ў'] = 'w', ['ү'] = 'y', - --[[ ↑ ]] - ['а'] = 'a', ['я'] = 'ia', - ['ә'] = 'ə', ['е'] = 'iə', - ['э'] = 'e', - ['о'] = 'o', ['ё'] = 'io', - ['у'] = 'u', ['ю'] = 'iu', - - ['и'] = 'i', -}; - -function export.tr(text, lang, sc) - if (sc == 'Cyrl') then - text = mw.ustring.gsub(text, '.', tt) - return text - else - return nil - end -end - -return export \ No newline at end of file diff --git a/wikt/translit/dv-translit.lua b/wikt/translit/dv-translit.lua deleted file mode 100644 index 1e7cfd2..0000000 --- a/wikt/translit/dv-translit.lua +++ /dev/null @@ -1,61 +0,0 @@ --- This module will transliterate Dhivehi language text. --- language code: div - -local export = {} -local u = mw.ustring.char - -local consonants = { - ['ހ'] = 'h', ['ށ'] = 'sh', ['ނ'] = 'n', ['ރ'] = 'r', ['ބ'] = 'b', - ['ޅ'] = 'ḷ', ['ކ'] = 'k', ['އ'] = '', ['ވ'] = 'v', ['މ'] = 'm', - ['ފ'] = 'f', ['ދ'] = 'd', ['ތ'] = 't', ['ލ'] = 'l', ['ގ'] = 'g', - ['ޏ'] = 'ñ', ['ސ'] = 's', ['ޑ'] = 'ḍ', ['ޒ'] = 'z', ['ޓ'] = 'ṭ', - ['ޔ'] = 'y', ['ޕ'] = 'p', ['ޖ'] = 'j', ['ޗ'] = 'c', ['ޱ'] = 'ṇ', - ['ޘ'] = 'ṯ', ['ޙ'] = 'ḥ', ['ޚ'] = 'x', ['ޛ'] = 'ź', ['ޜ'] = 'ž', - ['ޝ'] = 'š', ['ޞ'] = 'ş', ['ޟ'] = 'ḋ', ['ޠ'] = 'ţ', ['ޡ'] = 'ẓ', - ['ޢ'] = 'ʿ', ['ޣ'] = 'ġ', ['ޤ'] = 'q', ['ޥ'] = 'w', - -} - -local diacritics = { - [u(0x07A6)] = 'a', [u(0x07A7)] = 'ā', [u(0x07A8)] = 'i', [u(0x07A9)] = 'ī', [u(0x07AA)] = 'u', - [u(0x07AB)] = 'ū', [u(0x07AC)] = 'e', [u(0x07AD)] = 'ē', [u(0x07AE)] = 'o', [u(0x07AF)] = 'ō', [u(0x07B0)] = '', - -- no diacritic - [''] = 'a' -} - -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - '([ހ-ޥޱ])(['..u(0x07A6)..'-'..u(0x07B0)..']?)', - function(c, d) - return consonants[c] .. diacritics[d] - end) - return text -end - -return export - --- Text Expected Actual Differs at --- Passed ކަރުދާސް karudās karudās --- Passed އޮމާމަސް omāmas omāmas --- Passed ފޮތް fot fot --- -- Unit tests for [[Module:dv-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local dv_translit = require('Module:dv-translit') - --- function tests:check_translit(Thaa, roman) --- self:equals('[[' .. Thaa .. '#Dhivehi|' .. Thaa .. ']]', dv_translit.tr(Thaa, 'dv', 'Thaa'), roman) --- end - --- function tests:test_translit_dhivehi() --- self:iterate( --- { --- { 'ކަރުދާސް', 'karudās' }, --- { 'އޮމާމަސް', 'omāmas' }, --- { 'ފޮތް', 'fot' }, --- }, --- "check_translit" --- ) --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/el-translit.lua b/wikt/translit/el-translit.lua deleted file mode 100644 index cef1741..0000000 --- a/wikt/translit/el-translit.lua +++ /dev/null @@ -1,157 +0,0 @@ --- This module will transliterate Greek language text per WT:EL TR. --- Language code: ell --- Greek language is ---[[ -ISO 639-1 el -ISO 639-2 gre (B) ell (T) -ISO 639-3 Variously: -ell – Modern Greek -grc – Ancient Greek -cpg – Cappadocian Greek -gmy – Mycenaean Greek -pnt – Pontic -tsd – Tsakonian -yej – Yevanic -Glottolog gree1276[3] -]] -local export = {} - -local tt = { - ["α"] = "a", ["ά"] = "á", ["β"] = "v", ["γ"] = "g", ["δ"] = "d", - ["ε"] = "e", ["έ"] = "é", ["ζ"] = "z", ["η"] = "i", ["ή"] = "í", - ["θ"] = "th", ["ι"] = "i", ["ί"] = "í", ["ϊ"] = "ï", ["ΐ"] = "ḯ", - ["κ"] = "k", ["λ"] = "l", ["μ"] = "m", ["ν"] = "n", ["ξ"] = "x", - ["ο"] = "o", ["ό"] = "ó", ["π"] = "p", ["ρ"] = "r", ["σ"] = "s", - ["ς"] = "s", ["τ"] = "t", ["υ"] = "y", ["ύ"] = "ý", ["ϋ"] = "ÿ", - ["ΰ"] = "ÿ́", ["φ"] = "f", ["χ"] = "ch", ["ψ"] = "ps", ["ω"] = "o", - ["ώ"] = "ó", - ["Α"] = "A", ["Ά"] = "Á", ["Β"] = "V", ["Γ"] = "G", ["Δ"] = "D", - ["Ε"] = "E", ["Έ"] = "É", ["Ζ"] = "Z", ["Η"] = "I", ["Ή"] = "Í", - ["Θ"] = "Th", ["Ι"] = "I", ["Ί"] = "Í", ["Κ"] = "K", ["Λ"] = "L", - ["Μ"] = "M", ["Ν"] = "N", ["Ξ"] = "X", ["Ο"] = "O", ["Ό"] = "Ó", - ["Π"] = "P", ["Ρ"] = "R", ["Σ"] = "S", ["Τ"] = "T", ["Υ"] = "Y", - ["Ύ"] = "Ý", ["Φ"] = "F", ["Χ"] = "Ch", ["Ψ"] = "Ps", ["Ω"] = "O", - ["Ώ"] = "Ó", - [";"] = "?", ["·"] = ";" -} - --- transliterates any words or phrases -function export.tr(text, lang, sc) - local gsub = mw.ustring.gsub - - local acute = mw.ustring.char(0x301) - local diaeresis = mw.ustring.char(0x308) - - text = gsub(text, "([αεηΑΕΗ])([υύ])(.?)", - function (vowel, upsilon, following) - return tt[vowel] - .. (upsilon == "ύ" and acute or "") - .. (("θκξπσςτφχψ"):find(following) and "f" or "v") - .. following - end) - - text = gsub(text, "([αεοωΑΕΟΩ])([ηή])", - function (vowel, ita) - if ita == "ή" then - return tt[vowel] .. "i" .. diaeresis .. acute - else - return tt[vowel] .. "i" .. diaeresis - end - end) - - text = gsub(text, "[οΟ][υύ]", - {["ου"] = "ou", ["ού"] = "oú", - ["Ου"] = "Ou", ["Ού"] = "Oú"}) - - text = gsub(text, "(.?)([μΜ])π", - function (before, mi) - if before == "" or before == " " or before == "-" then - if mi == "Μ" then - return before .. "B" - else - return before .. "b" - end - end - end) - - text = gsub(text, "(.?)([νΝ])τ", - function (before, ni) - if before == "" or before == " " or before == "-" then - if ni == "Ν" then - return before .. "D" - else - return before .. "d" - end - end - end) - - text = gsub(text, "γ([γξχ])", "n%1") - - text = gsub(text, ".", tt) - - return text -end - -return export - --- 1 test failed. (refresh) - --- test: --- Text Expected Actual --- Passed Ποσειδώνας Poseidónas Poseidónas --- Passed αγιοποιούμαι agiopoioúmai agiopoioúmai --- Passed αγγελιάζομαι angeliázomai angeliázomai --- Passed άμπελος ámpelos ámpelos --- ypsilon --- Passed αυτός aftós aftós --- Passed πλευρά plevrá plevrá --- Passed αύριο ávrio ávrio --- Passed αύξηση áfxisi áfxisi --- Passed ευημερία evimería evimería --- Passed καθαρεύουσα katharévousa katharévousa --- Passed υπάρχω ypárcho ypárcho --- diaeresis added for disambiguation --- Passed βοήθεια voḯtheia voḯtheia --- nasal–stop clusters --- Passed μπαμπάς bampás bampás --- Passed ντετέκτιβ detéktiv detéktiv --- Failed Έβαλε ντετέκτιβ Évale detéktiv Évale ntetéktiv --- Passed εντάξει entáxei entáxei --- Passed γκαράζ gkaráz gkaráz --- Passed ανάγκη anágki anágki --- diphthongs ending in iota --- Passed είναι eínai eínai --- Passed οικείος oikeíos oikeíos --- return require "Module:transliteration module testcases"( --- require "Module:el-translit".tr, --- { --- { "Ποσειδώνας", "Poseidónas" }, --- { "αγιοποιούμαι", "agiopoioúmai" }, --- { "αγγελιάζομαι", "angeliázomai" }, --- { "άμπελος", "ámpelos" }, --- "ypsilon", --- { "αυτός", "aftós" }, --- { "πλευρά", "plevrá" }, --- { "αύριο", "ávrio" }, --- { "αύξηση", "áfxisi" }, --- { "ευημερία", "evimería" }, --- { "καθαρεύουσα", "katharévousa" }, --- { "υπάρχω", "ypárcho" }, --- "diaeresis added for disambiguation", --- { "βοήθεια", "voḯtheia" }, --- "nasal–stop clusters", --- { "μπαμπάς", "bampás" }, --- { "ντετέκτιβ", "detéktiv" }, --- { "Έβαλε '''ντετέκτιβ'''", "Évale '''detéktiv'''" }, --- { "εντάξει", "entáxei" }, --- { "γκαράζ", "gkaráz" }, --- { "ανάγκη", "anágki" }, --- "diphthongs ending in iota", --- { "είναι", "eínai" }, --- { "οικείος", "oikeíos" }, --- --[[ --- Copy this to add more examples: --- { "", "" }, --- --]] --- }, --- "Grek", "el") \ No newline at end of file diff --git a/wikt/translit/en-ta-translit.lua b/wikt/translit/en-ta-translit.lua deleted file mode 100644 index 60ad79c..0000000 --- a/wikt/translit/en-ta-translit.lua +++ /dev/null @@ -1,56 +0,0 @@ -local export = {} - -local consonants = { - ['k']='க', ['g']='க', ['ṅ']='ங', ['c']='ச', ['ñ']='ஞ', ['ṭ']='ட', ['ḍ']='ட', ['ṛ']='ட', ['ṇ']='ண', - ['t']='த', ['d']='த', ['n']='ந', ['p']='ப', ['b']='ப', ['m']='ம', ['y']='ய', ['r']='ர' , ['l']='ல' , ['v']='வ' , - ['ḻ']='ழ' , ['ḷ']='ள' , ['ṟ']='ற' , ['ṉ']='ன' , ['ś']='ஶ' , ['j']='ஜ' , ['ṣ']='ஷ' , - ['s']='ஸ' , ['h']='ஹ' , ['f']='ஃப' , ['z']='ஃஜ' , ['x']='ஃஸ' , -} - -local diacritics = { - ['ai'] = 'ை', ['au'] = 'ௌ', - ['a'] = '' , - ['ā']= 'ா' , ['i']='ி' , ['ī']='ீ' , ['u']='ு' , ['ū']='ூ' , ['e']='ெ' , - ['ē']='ே' , ['o']='ொ' , ['ō']='ோ' , -} - -local nonconsonants = { - ['au']='ஔ' , ['ai']='ஐ' , - ['a']='அ' , ['0']='௦' , ['ā']='ஆ' , ['i']='இ' , ['ī']='ஈ' , ['u']='உ' , ['ū']='ஊ' , - ['e']='எ' , ['ē']='ஏ' , ['o']='ஒ' , ['ō']='ஓ' , ['ḥ']='ஃ' , - ['1']='௧' , ['2']='௨' , ['3']='௩' , ['4']='௪' , ['5']='௫' , ['6']='௬' , ['7']='௭' , ['8']='௮' , ['9']='௯' , -} - --- translit any words or phrases -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - '([kgṅcñṭḍṛṇtdnpbmyrlvḻḷṟṉśjṣshfzx])'.. - '(a[iu]?)', - function(c, d) - if d ~= "" then - return consonants[c] .. diacritics[d] - end - end) - text = mw.ustring.gsub( - text, - '([kgṅcñṭḍṛṇtdnpbmyrlvḻḷṟṉśjṣshfzx])'.. - '([aāiīuūeēoō]?)', - function(c, d) - if d ~= "" then - return consonants[c] .. diacritics[d] - else - return consonants[c] .. '்' - end - end) - text = mw.ustring.gsub(text,'(a[iu]?)', function(a) - if a ~= "" then - return nonconsonants[a] - end - end) - text = mw.ustring.gsub(text, '.', nonconsonants) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/ethi-translit.lua b/wikt/translit/ethi-translit.lua deleted file mode 100644 index 0fc6621..0000000 --- a/wikt/translit/ethi-translit.lua +++ /dev/null @@ -1,161 +0,0 @@ --- This module will transliterate text in the Ethiopic script per WT:ETHI TR. --- It is used to transliterate Amharic (am), Blin (byn), Ge'ez (gez), Tigrinya (ti) and Tigre (tig). --- Language code: amh, byn, gez, tir, tig. - -local export = {} - -local tt = { - ["ሀ"]="hä", ["ሁ"]="hu", ["ሂ"]="hi", ["ሃ"]="ha", ["ሄ"]="he", ["ህ"]="hə", ["ሆ"]="ho", - ["ለ"]="lä", ["ሉ"]="lu", ["ሊ"]="li", ["ላ"]="la", ["ሌ"]="le", ["ል"]="lə", ["ሎ"]="lo", - ["ሏ"]="lʷa", - ["ሐ"]="ḥä", ["ሑ"]="ḥu", ["ሒ"]="ḥi", ["ሓ"]="ḥa", ["ሔ"]="ḥe", ["ሕ"]="ḥə", ["ሖ"]="ḥo", - ["ሗ"]="ḥʷa", - ["መ"]="mä", ["ሙ"]="mu", ["ሚ"]="mi", ["ማ"]="ma", ["ሜ"]="me", ["ም"]="mə", ["ሞ"]="mo", - ["ሟ"]="mʷa", ["ፙ"]="mʲä", - ["ሠ"]="śä", ["ሡ"]="śu", ["ሢ"]="śi", ["ሣ"]="śa", ["ሤ"]="śe", ["ሥ"]="śə", ["ሦ"]="śo", - ["ሧ"]="śʷa", - ["ረ"]="rä", ["ሩ"]="ru", ["ሪ"]="ri", ["ራ"]="ra", ["ሬ"]="re", ["ር"]="rə", ["ሮ"]="ro", - ["ሯ"]="rʷa", ["ፘ"]="rʲä", - ["ሰ"]="sä", ["ሱ"]="su", ["ሲ"]="si", ["ሳ"]="sa", ["ሴ"]="se", ["ስ"]="sə", ["ሶ"]="so", - ["ሷ"]="sʷa", - ["ሸ"]="šä", ["ሹ"]="šu", ["ሺ"]="ši", ["ሻ"]="ša", ["ሼ"]="še", ["ሽ"]="šə", ["ሾ"]="šo", - ["ሿ"]="šʷa", - ["ቀ"]="ḳä", ["ቁ"]="ḳu", ["ቂ"]="ḳi", ["ቃ"]="ḳa", ["ቄ"]="ḳe", ["ቅ"]="ḳə", ["ቆ"]="ḳo", - ["ቈ"]="ḳʷä", ["ቊ"]="ḳʷi", ["ቋ"]="ḳʷa", ["ቌ"]="ḳʷe", ["ቍ"]="ḳʷə", - ["ቐ"]="ḳʰä", ["ቑ"]="ḳʰu", ["ቒ"]="ḳʰi", ["ቓ"]="ḳʰa", ["ቔ"]="ḳʰe", ["ቕ"]="ḳʰə", ["ቖ"]="ḳʰo", - ["ቘ"]="ḳʰʷä", ["ቚ"]="ḳʰʷi", ["ቛ"]="ḳʰʷa", ["ቜ"]="ḳʰʷe", ["ቝ"]="ḳʰʷə", - ["በ"]="bä", ["ቡ"]="bu", ["ቢ"]="bi", ["ባ"]="ba", ["ቤ"]="be", ["ብ"]="bə", ["ቦ"]="bo", - ["ቧ"]="bʷa", - ["ቨ"]="vä", ["ቩ"]="vu", ["ቪ"]="vi", ["ቫ"]="va", ["ቬ"]="ve", ["ቭ"]="və", ["ቮ"]="vo", - ["ቯ"]="vʷa", - ["ተ"]="tä", ["ቱ"]="tu", ["ቲ"]="ti", ["ታ"]="ta", ["ቴ"]="te", ["ት"]="tə", ["ቶ"]="to", - ["ቷ"]="tʷa", - ["ቸ"]="čä", ["ቹ"]="ču", ["ቺ"]="či", ["ቻ"]="ča", ["ቼ"]="če", ["ች"]="čə", ["ቾ"]="čo", - ["ቿ"]="čʷa", - ["ኀ"]="ḫä", ["ኁ"]="ḫu", ["ኂ"]="ḫi", ["ኃ"]="ḫa", ["ኄ"]="ḫe", ["ኅ"]="ḫə", ["ኆ"]="ḫo", - ["ኈ"]="ḫʷä", ["ኊ"]="ḫʷi", ["ኋ"]="ḫʷa", ["ኌ"]="ḫʷe", ["ኍ"]="ḫʷə", - ["ነ"]="nä", ["ኑ"]="nu", ["ኒ"]="ni", ["ና"]="na", ["ኔ"]="ne", ["ን"]="nə", ["ኖ"]="no", - ["ኗ"]="nʷa", - ["ኘ"]="ñä", ["ኙ"]="ñu", ["ኚ"]="ñi", ["ኛ"]="ña", ["ኜ"]="ñe", ["ኝ"]="ñə", ["ኞ"]="ño", - ["ኟ"]="ñʷa", - ["አ"]="ʾä", ["ኡ"]="ʾu", ["ኢ"]="ʾi", ["ኣ"]="ʾa", ["ኤ"]="ʾe", ["እ"]="ʾə", ["ኦ"]="ʾo", - ["ኧ"]="ʾʷa", - ["ከ"]="kä", ["ኩ"]="ku", ["ኪ"]="ki", ["ካ"]="ka", ["ኬ"]="ke", ["ክ"]="kə", ["ኮ"]="ko", - ["ኰ"]="kʷä", ["ኲ"]="kʷi", ["ኳ"]="kʷa", ["ኴ"]="kʷe", ["ኵ"]="kʷə", - ["ኸ"]="xä", ["ኹ"]="xu", ["ኺ"]="xi", ["ኻ"]="xa", ["ኼ"]="xe", ["ኽ"]="xə", ["ኾ"]="xo", - ["ወ"]="wä", ["ዉ"]="wu", ["ዊ"]="wi", ["ዋ"]="wa", ["ዌ"]="we", ["ው"]="wə", ["ዎ"]="wo", - ["ዐ"]="ʿä", ["ዑ"]="ʿu", ["ዒ"]="ʿi", ["ዓ"]="ʿa", ["ዔ"]="ʿe", ["ዕ"]="ʿə", ["ዖ"]="ʿo", - ["ዘ"]="zä", ["ዙ"]="zu", ["ዚ"]="zi", ["ዛ"]="za", ["ዜ"]="ze", ["ዝ"]="zə", ["ዞ"]="zo", - ["ዟ"]="zʷa", - ["ዠ"]="žä", ["ዡ"]="žu", ["ዢ"]="ži", ["ዣ"]="ža", ["ዤ"]="že", ["ዥ"]="žə", ["ዦ"]="žo", - ["ዧ"]="žʷa", - ["የ"]="yä", ["ዩ"]="yu", ["ዪ"]="yi", ["ያ"]="ya", ["ዬ"]="ye", ["ይ"]="yə", ["ዮ"]="yo", - ["ደ"]="dä", ["ዱ"]="du", ["ዲ"]="di", ["ዳ"]="da", ["ዴ"]="de", ["ድ"]="də", ["ዶ"]="do", - ["ዷ"]="dʷa", - ["ጀ"]="ǧä", ["ጁ"]="ǧu", ["ጂ"]="ǧi", ["ጃ"]="ǧa", ["ጄ"]="ǧe", ["ጅ"]="ǧə", ["ጆ"]="ǧo", - ["ጇ"]="ǧʷa", - ["ገ"]="gä", ["ጉ"]="gu", ["ጊ"]="gi", ["ጋ"]="ga", ["ጌ"]="ge", ["ግ"]="gə", ["ጎ"]="go", - ["ጐ"]="gʷä", ["ጒ"]="gʷi", ["ጓ"]="gʷa", ["ጔ"]="gʷe", ["ጕ"]="gʷə", - ["ጘ"]="ŋä", ["ጙ"]="ŋu", ["ጚ"]="ŋi", ["ጛ"]="ŋa", ["ጜ"]="ŋe", ["ጝ"]="ŋə", ["ጞ"]="ŋo", - ["ⶓ"]="ŋʷä", ["ⶔ"]="ŋʷi", ["ጟ"]="ŋʷa", ["ⶕ"]="ŋʷe", ["ⶖ"]="ŋʷə", - ["ጠ"]="ṭä", ["ጡ"]="ṭu", ["ጢ"]="ṭi", ["ጣ"]="ṭa", ["ጤ"]="ṭe", ["ጥ"]="ṭə", ["ጦ"]="ṭo", - ["ጧ"]="ṭʷa", - ["ጨ"]="č̣ä", ["ጩ"]="č̣u", ["ጪ"]="č̣i", ["ጫ"]="č̣a", ["ጬ"]="č̣e", ["ጭ"]="č̣ə", ["ጮ"]="č̣o", - ["ጯ"]="č̣ʷa", - ["ጰ"]="p̣ä", ["ጱ"]="p̣u", ["ጲ"]="p̣i", ["ጳ"]="p̣a", ["ጴ"]="p̣e", ["ጵ"]="p̣ə", ["ጶ"]="p̣o", - ["ጷ"]="p̣ʷa", - ["ጸ"]="ṣä", ["ጹ"]="ṣu", ["ጺ"]="ṣi", ["ጻ"]="ṣa", ["ጼ"]="ṣe", ["ጽ"]="ṣə", ["ጾ"]="ṣo", - ["ጿ"]="ṣʷa", - ["ፀ"]="ṣ́ä", ["ፁ"]="ṣ́u", ["ፂ"]="ṣ́i", ["ፃ"]="ṣ́a", ["ፄ"]="ṣ́e", ["ፅ"]="ṣ́ə", ["ፆ"]="ṣ́o", - ["ፈ"]="fä", ["ፉ"]="fu", ["ፊ"]="fi", ["ፋ"]="fa", ["ፌ"]="fe", ["ፍ"]="fə", ["ፎ"]="fo", - ["ፏ"]="fʷa", ["ፚ"]="fʲä", - ["ፐ"]="pä", ["ፑ"]="pu", ["ፒ"]="pi", ["ፓ"]="pa", ["ፔ"]="pe", ["ፕ"]="pə", ["ፖ"]="po", - ["ፗ"]="pʷa", - --punctuation - ["፠"]="§", ["፡"]="-", ["።"]=".", ["፣"]=",", ["፤"]=";", ["፥"]=":", ["፦"]=":-", ["፧"]="?", ["፨"]="¶", -} - -local number = { - ['፩']=1, ['፪']=2, ['፫']=3, ['፬']=4, ['፭']=5, ['፮']=6, ['፯']=7, ['፰']=8, ['፱']=9, - ['፲']=10, ['፳']=20, ['፴']=30, ['፵']=40, ['፶']=50, ['፷']=60, ['፸']=70, ['፹']=80, ['፺']=90 -} - --- This works on small numbers. Please improve if you find any problems. -function export.number(geez) - - local val = 0 - if mw.ustring.find(geez, '^[፻፼]') then - geez = '፩' .. geez -- prepend 1 - end - local func = mw.text.gsplit(geez, '') - - while true do - local digit = func() - if (not digit) then break end - if mw.ustring.find(digit, '[፩-፺]') then - val = val + number[digit] - elseif digit == '፻' then - val = val * 100 - elseif digit == '፼' then - val = val * 10000 - end - end - - return val - -end - -function export.tr(text, lang, sc) - - text = (mw.ustring.gsub(text, ".", tt)) - - -- remove ə that is not in the first syllable - text = mw.ustring.gsub(text, "(...)ə", "%1") - text = mw.ustring.gsub(text, "(...)ə", "%1") - text = mw.ustring.gsub(text, "[፩-፼]+", export.number) - - return text - -end - -return export - --- Text Expected Actual Differs at --- Passed ስም səm səm --- Passed እንሽላሊት ʾənšlalit ʾənšlalit --- Passed ፎርማጆ formaǧo formaǧo --- Passed ግንደ ቆርቁር gəndä ḳorḳur gəndä ḳorḳur --- Passed ኢትዮጵያ ʾityop̣ya ʾityop̣ya --- Passed አዲስ አበባ ʾädis ʾäbäba ʾädis ʾäbäba --- Passed ዩክሬን yukren yukren --- Passed አፍጋኒስታን ʾäfganistan ʾäfganistan --- Passed አውራ ዶሮ ʾäwra doro ʾäwra doro --- Passed ቢራቢሮ birabiro birabiro --- -- Unit tests for [[Module:Ethi-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local ethi_translit = require('Module:Ethi-translit') - --- --TO DO --- function tests:do_test_translit(ethi, roman, xlit) --- self:equals('[[' .. ethi .. '#Ethi|' .. ethi .. ']]', ethi_translit.tr(ethi, 'am', 'Ethi'), roman) --- end - --- function tests:test_translit_ethi() --- local examples = { --- { 'ስም', 'səm' }, --- { 'እንሽላሊት', 'ʾənšlalit' }, --- { 'ፎርማጆ', 'formaǧo' }, --- { 'ግንደ ቆርቁር', 'gəndä ḳorḳur' }, --- { 'ኢትዮጵያ', 'ʾityop̣ya' }, --- { 'አዲስ አበባ', 'ʾädis ʾäbäba' }, --- { 'ዩክሬን', 'yukren' }, --- { 'አፍጋኒስታን', 'ʾäfganistan' }, - --- { 'አውራ ዶሮ', 'ʾäwra doro' }, --- -- it's geminated "birrabirro" but it's not expressed graphically --- { 'ቢራቢሮ', 'birabiro' }, --- } --- self:iterate(examples, 'do_test_translit') --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/etymology-languages-data.lua b/wikt/translit/etymology-languages-data.lua deleted file mode 100644 index 5dfe9e4..0000000 --- a/wikt/translit/etymology-languages-data.lua +++ /dev/null @@ -1,1295 +0,0 @@ ---[[canonicalName, otherNames, wikipedia_article -These have the same meaning as they do in the data modules for the regular languages; see Module:languages/data2/documentation. -parent -The language that this etymology language is a subvariety of. For instance, "en" (English) is the parent of en-GB (British English).]] -local m = {} - - --- Aramaic varieties - -m["sem-jar"] = { - canonicalName = "Jewish Aramaic", - otherNames = {"Judeo-Aramaic"}, - parent = "arc", - wikidata_item = "Q2633045", -} - -m["tmr"] = { - canonicalName = "Jewish Babylonian Aramaic", - parent = "arc", - wikidata_item = "Q33407", -} - -m["jpa"] = { - canonicalName = "Jewish Palestinian Aramaic", - otherNames = {"Galilean Aramaic"}, - parent = "arc", - wikidata_item = "Q948909", -} - - --- Chinese varieties - -m["wuu-sha"] = { - canonicalName = "Shanghainese", - parent = "wuu", - wikidata_item = "Q36718", -} -m["Sha."] = m["wuu-sha"] - -m["hak-wuh"] = { - canonicalName = "Wuhua Chinese", - parent = "hak", -} - -m["hsn-old"] = { - canonicalName = "Old Xiang", - otherNames = {"Lou-Shao"}, - parent = "hsn", - wikidata_item = "Q7085453", -} - -m["pinhua"] = { - canonicalName = "Pinghua", - parent = "yue", - wikidata_item = "Q2735715", -} - -m["nan-hai"] = { - canonicalName = "Hainanese", - parent = "nan", - wikidata_item = "Q934541", -} - -m["nan-hok"] = { - canonicalName = "Hokkien", - parent = "nan", - wikidata_item = "Q1624231", -} - -m["nan-amo"] = { - canonicalName = "Amoy", - otherNames = {"Xiamenese"}, - parent = "nan-hok", - wikidata_item = "Q68744", -} - -m["nan-phl"] = { - canonicalName = "Philippine Hokkien", - parent = "nan-hok", - wikidata_item = "Q3236692", -} - - --- English, Scots and Old English varieties - -m["en-GB"] = { - canonicalName = "British English", - parent = "en", - wikidata_item = "Q7979", -} -m["British English"] = m["en-GB"] -m["BE."] = m["en-GB"] - -m["en-US"] = { - canonicalName = "American English", - parent = "en", - wikidata_item = "Q7976", -} -m["American English"] = m["en-US"] -m["AE."] = m["en-US"] - --- Scots varieties - -m["sco-osc"] = { - canonicalName = "Early Scots", - parent = "sco", - wikidata_item = "Q5326738", -} -m["Early Scots"] = m["sco-osc"] -m["Old Scots"] = m["sco-osc"] -m["O.Sc."] = m["sco-osc"] - -m["sco-smi"] = { - canonicalName = "Middle Scots", - parent = "sco", - wikidata_item = "Q3327000", -} -m["Middle Scots"] = m["sco-smi"] -m["Mid.Sc."] = m["sco-smi"] - -m["sco-ins"] = { - canonicalName = "Insular Scots", - parent = "sco", - wikidata_item = "Q16919205", -} -m["Insular Scots"] = m["sco-ins"] -m["Ins.Sc."] = m["sco-ins"] - -m["sco-uls"] = { - canonicalName = "Ulster Scots", - parent = "sco", - wikidata_item = "Q201966", -} -m["Ulster Scots"] = m["sco-uls"] -m["Uls.Sc."] = m["sco-uls"] - -m["sco-nor"] = { - canonicalName = "Northern Scots", - parent = "sco", - wikidata_item = "Q16928150", -} -m["Northern Scots"] = m["sco-nor"] -m["Nor.Sc."] = m["sco-nor"] - -m["sco-sou"] = { - canonicalName = "Southern Scots", - parent = "sco", - wikidata_item = "Q7570457", -} -m["Southern Scots"] = m["sco-sou"] -m["Borders Scots"] = m["sco-sou"] -m["Sou.Sc."] = m["sco-sou"] - --- Old English varieties - --- Includes both Mercian and Northumbrian. -m["ang-ang"] = { - canonicalName = "Anglian Old English", - parent = "ang", -} - -m["ang-mer"] = { - canonicalName = "Mercian Old English", - parent = "ang", -} - -m["ang-nor"] = { - canonicalName = "Northumbrian Old English", - parent = "ang", - wikidata_item = "Q1798915", -} - ---[[ -m["ang-wsx"] = { - canonicalName = "West Saxon Old English", - parent = "ang", -} -]] - - --- French and Norman varieties - -m["fro-nor"] = { - canonicalName = "Old Northern French", - otherNames = {"Old Norman", "Old Norman French"}, - parent = "fro", - wikidata_item = "Q2044917", -} -m["Old Northern French"] = m["fro-nor"] -m["ONF."] = m["fro-nor"] - -m["fro-pic"] = { - canonicalName = "Old Picard", - parent = "fro", -} - -m["xno"] = { - canonicalName = "Anglo-Norman", - parent = "fro", - wikidata_item = "Q35214", -} - -m["fr-CA"] = { - canonicalName = "Canadian French", - parent = "fr", - wikidata_item = "Q1450506", -} -m["Canadian French"] = m["fr-CA"] -m["CF."] = m["fr-CA"] - -m["fr-aca"] = { - canonicalName = "Acadian French", - parent = "fr", - wikidata_item = "Q415109", -} -m["Acadian French"] = m["fr-aca"] -m["fra-aca"] = m["fr-aca"] - -m["frc"] = { - canonicalName = "Cajun French", - otherNames = {"Louisiana French"}, - parent = "fr", - wikidata_item = "Q880301", -} - --- Norman varieties - -m["roa-grn"] = { - canonicalName = "Guernésiais", - parent = "nrf", - wikidata_item = "Q56428", -} - -m["roa-jer"] = { - canonicalName = "Jèrriais", - parent = "nrf", - wikidata_item = "Q56430", -} - - --- Gaulish - -m["xcg"] = { - canonicalName = "Cisalpine Gaulish", - parent = "cel-gau", - wikidata_item = "Q3832927", -} - -m["xtg"] = { - canonicalName = "Transalpine Gaulish", - parent = "cel-gau", - wikidata_item = "Q29977", -} - - --- German and Old High German varieties - -m["de-AT"] = { - canonicalName = "Austrian German", - parent = "de", - wikidata_item = "Q306626", -} -m["Austrian German"] = m["de-AT"] -m["AG."] = m["de-AT"] - -m["de-AT-vie"] = { - canonicalName = "Viennese German", - parent = "de-AT", - wikidata_item = "Q56474", -} -m["Viennese German"] = m["de-AT-vie"] -m["VG."] = m["de-AT-vie"] - -m["ksh"] = { - canonicalName = "Kölsch", - parent = "gmw-cfr", - wikidata_item = "Q4624", -} -m["Kölsch"] = m["ksh"] - -m["pfl"] = { - canonicalName = "Palatine German", - otherNames = {"Pfälzisch", "Pälzisch", "Palatinate German"}, - parent = "gmw-rfr", - wikidata_item = "Q23014", -} - --- Old High German varieties - -m["lng"] = { - canonicalName = "Lombardic", - parent = "goh", - wikidata_item = "Q35972", -} -m["Lombardic"] = m["lng"] -m["goh-lng"] = m["lng"] - - --- Greek varieties - -m["qfa-sub-grc"] = { - canonicalName = "Pre-Greek", - parent = "qfa-sub", - wikidata_item = "Q965052", -} -m["pregrc"] = m["qfa-sub-grc"] - -m["grc-boi"] = { - canonicalName = "Boeotian Greek", - parent = "grc", - wikidata_item = "Q406373", -} - -m["grc-koi"] = { - canonicalName = "Koine Greek", - parent = "grc", - wikidata_item = "Q107358", -} -m["Koine"] = m["grc-koi"] - -m["gkm"] = { - canonicalName = "Byzantine Greek", - otherNames = {"Medieval Greek"}, - parent = "grc", - wikidata_item = "Q36387", -} -m["Medieval Greek"] = m["gkm"] - -m["grc-dor"] = { - canonicalName = "Doric Greek", - parent = "grc", - wikidata_item = "Q285494", -} - -m["grc-att"] = { - canonicalName = "Attic Greek", - parent = "grc", - wikidata_item = "Q506588", -} - -m["grc-ion"] = { - canonicalName = "Ionic Greek", - parent = "grc", - wikidata_item = "Q504165", -} - -m["grc-pam"] = { - canonicalName = "Pamphylian Greek", - parent = "grc", - wikidata_item = "Q2271793", -} - -m["grc-kre"] = { -- code used elsewhere: see [[Module:grc:Dialects]] - canonicalName = "Cretan Ancient Greek", -- to distinguish from Cretan Greek below - parent = "grc", -} - -m["grc-arp"] = { - canonicalName = "Arcadocypriot Greek", - parent = "grc", - wikidata_item = "Q499602", -} - -m["el-arc"] = { - canonicalName = "Arcadian Greek", - parent = "el", -} - -m["el-cyp"] = { - canonicalName = "Cypriotic Greek", - parent = "el", -} - -m["el-pap"] = { - canonicalName = "Paphian Greek", - parent = "el", -} - -m["grc-aeo"] = { - canonicalName = "Aeolic Greek", - otherNames = {"Lesbic Greek", "Lesbian Greek", "Aeolian Greek"}, - parent = "grc", - wikidata_item = "Q406373", -} - -m["loc-ozo"] = { - canonicalName = "Ozolian Locrian", - parent = "el", -} - -m["loc-opu"] = { - canonicalName = "Opuntian Locrian", - parent = "el", -} - -m["el-ths"] = { - canonicalName = "Thessalian Greek", - parent = "el", - wikidata_item = "Q406373", -} - -m["grc-ela"] = { - canonicalName = "Elean Greek", - parent = "grc", -} - -m["grc-epc"] = { - canonicalName = "Epic Greek", - parent = "grc", - wikidata_item = "Q990062", -} - -m["grc-hmr"] = { - canonicalName = "Homeric Greek", - parent = "grc", - wikidata_item = "Q990062", -} - -m["el-crt"] = { - canonicalName = "Cretan Greek", - parent = "el", - wikidata_item = "Q588306", -} - - --- Hebrew varieties - -m["hbo"] = { - canonicalName = "Biblical Hebrew", - otherNames = {"Classical Hebrew"}, - parent = "he", - wikidata_item = "Q1982248", -} - -m["he-IL"] = { - canonicalName = "Modern Israeli Hebrew", - parent = "he", - wikidata_item = "Q8141", -} - -m["sem-can-pro"] = { - canonicalName = "Proto-Canaanite", - parent = "sem-pro", - wikidata_item = "Q1073942", -} - - --- Iranian varieties: --- Historical and current Iranian dialects - -m["ae-old"] = { - canonicalName = "Old Avestan", - otherNames = {"Gathic Avestan"}, - parent = "ae", - wikidata_item = "Q29572", -} - -m["ae-yng"] = { - canonicalName = "Younger Avestan", - otherNames = {"Young Avestan"}, - parent = "ae", -} - -m["bcc"] = { - canonicalName = "Southern Balochi", - otherNames = {"Southern Baluchi"}, - parent = "bal", - wikidata_item = "Q33049", -} -m["bal-sou"] = m["bcc"] - -m["bgp"] = { - canonicalName = "Eastern Balochi", - otherNames = {"Eastern Baluchi"}, - parent = "bal", - wikidata_item = "Q33049", -} -m["bal-eas"] = m["bgp"] - -m["bgn"] = { - canonicalName = "Western Balochi", - otherNames = {"Western Baluchi"}, - parent = "bal", - wikidata_item = "Q33049", -} -m["bal-wes"] = m["bgn"] - -m["ira-cen"] = { - canonicalName = "Central Iranian", - parent = "ira", -} - -m["ira-nei"] = { - canonicalName = "Northeastern Iranian", - parent = "ira", -} - -m["ira-nwi"] = { - canonicalName = "Northwestern Iranian", - parent = "ira", -} - -m["ira-swi"] = { - canonicalName = "Southwestern Iranian", - parent = "ira", -} - -m["ira-mid"] = { - canonicalName = "Middle Iranian", - parent = "ira", - wikidata_item = "Q33527", -} -m["MIr."] = m["ira-mid"] - -m["ira-old"] = { - canonicalName = "Old Iranian", - parent = "ira", - wikidata_item = "Q33527", -} -m["OIr."] = m["ira-old"] - -m["xmn"] = { - canonicalName = "Manichaean Middle Persian", - parent = "pal", -} - -m["fa-cls"] = { - canonicalName = "Classical Persian", - parent = "fa", - wikidata_item = "Q9168", -} - -m["os-dig"] = { - canonicalName = "Digor", - otherNames = {"Digoron"}, - parent = "os", - wikidata_item = "Q3027861", -} - -m["os-iro"] = { - canonicalName = "Iron", - parent = "os", -} - -m["prs"] = { - canonicalName = "Dari", - otherNames = {"Dari Persian", "Eastern Persian", "Afghan Persian"}, - parent = "fa", - wikidata_item = "Q178440", -} - -m["sog-bud"] = { - canonicalName = "Buddhist Sogdian", - parent = "sog", -} - -m["sog-man"] = { - canonicalName = "Manichean Sogdian", - parent = "sog", -} -m["sog-chr"] = { - canonicalName = "Christian Sogdian", - parent = "sog", -} - -m["oru-kan"] = { - canonicalName = "Kaniguram", - parent = "oru", - wikidata_item = "Q6363164", -} - -m["oru-log"] = { - canonicalName = "Logar", - parent = "oru", -} - --- Southwestern Fars lects - -m["fay-bur"] = { - canonicalName = "Burenjani", - parent = "fay", -} - -m["fay-bsh"] = { - canonicalName = "Bushehri", - parent = "fay", -} - -m["fay-dsh"] = { - canonicalName = "Dashtaki", - parent = "fay", -} - -m["fay-dav"] = { - canonicalName = "Davani", - parent = "fay", - wikidata_item = "Q5228140", -} - -m["fay-eze"] = { - canonicalName = "Emamzada Esma’ili", - parent = "fay", -} - -m["fay-gav"] = { - canonicalName = "Gavkoshaki", - parent = "fay", -} - -m["fay-kho"] = { - canonicalName = "Khollari", - parent = "fay", -} - -m["fay-kon"] = { - canonicalName = "Kondazi", - parent = "fay", -} - -m["fay-kzo"] = { - canonicalName = "Old Kazeruni", - parent = "fay", -} - -m["fay-mas"] = { - canonicalName = "Masarami", - parent = "fay", -} - -m["fay-pap"] = { - canonicalName = "Papuni", - parent = "fay", -} - -m["fay-sam"] = { - canonicalName = "Samghani", - parent = "fay", -} - -m["fay-shr"] = { - canonicalName = "Shirazi", - parent = "fay", -} - -m["fay-sho"] = { - canonicalName = "Old Shirazi", - parent = "fay", -} - -m["fay-sam"] = { - canonicalName = "Samghani", - parent = "fay", -} - -m["fay-kar"] = { - canonicalName = "Khargi", - parent = "fay", -} - -m["fay-sor"] = { - canonicalName = "Sorkhi", - parent = "fay", -} - --- Kermanic lects - -m["ker-ham"] = { - canonicalName = "Hamadani", - parent = "ira-ker", - wikidata_item = "Q6302426", -} - -m["ker-mah"] = { - canonicalName = "Mahallati", - parent = "ira-ker", -} - -m["ker-von"] = { - canonicalName = "Vonishuni", - parent = "ira-ker", -} - -m["ker-del"] = { - canonicalName = "Delijani", - parent = "ira-ker", -} - -m["ker-kas"] = { - canonicalName = "Kashani", - parent = "ira-ker", -} - -m["ker-kes"] = { - canonicalName = "Kese'i", - parent = "ira-ker", -} - -m["ker-mey"] = { - canonicalName = "Meyme'i", - parent = "ira-ker", -} - -m["ker-abz"] = { - canonicalName = "Abuzeydabadi", - parent = "ira-ker", -} - -m["ker-aby"] = { - canonicalName = "Abyanehi", - parent = "ira-ker", -} - -m["ker-far"] = { - canonicalName = "Farizandi", - parent = "ira-ker", -} - -m["ker-jow"] = { - canonicalName = "Jowshaqani", - parent = "ira-ker", -} - -m["ker-qoh"] = { - canonicalName = "Qohrudi", - parent = "ira-ker", -} - -m["ker-yar"] = { - canonicalName = "Yarandi", - parent = "ira-ker", -} - -m["ker-tar"] = { - canonicalName = "Tari", - parent = "ira-ker", -} - -m["ker-sed"] = { - canonicalName = "Sedehi", - parent = "ira-ker", -} - - -m["ker-ard"] = { - canonicalName = "Ardestani", - parent = "ira-ker", -} - -m["ker-zef"] = { - canonicalName = "Zefre'i", - parent = "ira-ker", -} - -m["ker-isf"] = { - canonicalName = "Isfahani", - parent = "ira-ker", -} - -m["ker-kaf"] = { - canonicalName = "Kafroni", - parent = "ira-ker", -} - -m["ker-var"] = { - canonicalName = "Varzenei", - parent = "ira-ker", -} - -m["ker-xur"] = { - canonicalName = "Khuri", - parent = "ira-ker", -} - -m["nyq-ana"] = { - canonicalName = "Anaraki", - parent = "nyq", -} - -m["gbz-krm"] = { - canonicalName = "Kermani", - parent = "gbz", -} - -m["gbz-yzd"] = { - canonicalName = "Yazdi", - parent = "gbz", -} - --- Indo-Aryan varieties - -m["awa-old"] = { - canonicalName = "Old Awadhi", - parent = "awa", -} - -m["bra-old"] = { - canonicalName = "Old Braj", - parent = "bra", -} - -m["gu-kat"] = { - canonicalName = "Kathiyawadi", - otherNames = {"Kathiyawadi Gujarati", "Kathiawadi"}, - parent = "gu", -} - -m["gu-mid"] = { - canonicalName = "Middle Gujarati", - parent = "gu", - wikidata_item = "Q24907429", -} - -m["hi-mum"] = { - canonicalName = "Bombay Hindi", - otherNames = {"Mumbai Hindi", "Bambaiyya Hindi"}, - parent = "hi", - wikidata_item = "Q3543151", -} - -m["hi-mid"] = { - canonicalName = "Middle Hindi", - parent = "hi", -} - -m["pa-old"] = { - canonicalName = "Old Punjabi", - parent = "pa", -} - -m["bn-old"] = { - canonicalName = "Old Bengali", - parent = "bn", -} - -m["bn-mid"] = { - canonicalName = "Middle Bengali", - parent = "bn", -} - -m["or-old"] = { - canonicalName = "Old Oriya", - parent = "or", -} - -m["or-mid"] = { - canonicalName = "Middle Oriya", - parent = "or", -} - -m["sa-bhs"] = { - canonicalName = "Buddhist Hybrid Sanskrit", - parent = "sa", - wikidata_item = "Q248758", -} - -m["sa-cls"] = { - canonicalName = "Classical Sanskrit", - parent = "sa", - wikidata_item = "Q11059", -} - -m["sa-neo"] = { - canonicalName = "Neo-Sanskrit", - parent = "sa", - wikidata_item = "Q11059", -} - -m["sa-ved"] = { - canonicalName = "Vedic Sanskrit", - parent = "sa", - wikidata_item = "Q36858", -} - -m["si-med"] = { - canonicalName = "Medieval Sinhalese", - otherNames = {"Medieval Sinhala"}, - parent = "si", -} - -m["kok-mid"] = { - canonicalName = "Middle Konkani", - otherNames = {"Medieval Konkani"}, - parent = "kok", -} - -m["kok-old"] = { - canonicalName = "Old Konkani", - otherNames = {"Early Konkani"}, - parent = "kok", -} - - --- Indian subcontinent languages - - --- Dhivehi varieties - -m["mlk-dv"] = { - canonicalName = "Mulaku Dhivehi", - otherNames = {"Mulaku Divehi", "Mulaku Bas"}, - parent = "dv", -} - -m["hvd-dv"] = { - canonicalName = "Huvadhu Dhivehi", - otherNames = {"Huvadhu Divehi", "Huvadhu Bas"}, - parent = "dv", -} - -m["add-dv"] = { - canonicalName = "Addu Dhivehi", - otherNames = {"Addu Divehi", "Addu Bas"}, - parent = "dv", -} - --- Dravidian varieties - -m["ta-mid"] = { - canonicalName = "Middle Tamil", - parent = "ta", - wikidata_item = "Q20987434", -} - --- Prakrits - -m["psc-prk"] = { - canonicalName = "Paisaci", - otherNames = {"Paisaci Prakrit"}, - parent = "sa", - wikidata_item = "Q2995607", -} - -m["prk-avt"] = { - canonicalName = "Avanti", - otherNames = {"Avanti Prakrit"}, - parent = "sa", -} - -m["prc-prk"] = { - canonicalName = "Pracya", - otherNames = {"Pracya Prakrit"}, - parent = "sa", -} - -m["bhl-prk"] = { - canonicalName = "Bahliki", - otherNames = {"Bahliki Prakrit"}, - parent = "sa", -} - -m["dks-prk"] = { - canonicalName = "Daksinatya", - otherNames = {"Daksinatya Prakrit"}, - parent = "sa", -} - -m["skr-prk"] = { - canonicalName = "Sakari", - otherNames = {"Sakari Prakrit"}, - parent = "sa", -} - -m["cnd-prk"] = { - canonicalName = "Candali", - otherNames = {"Candali Prakrit"}, - parent = "sa", -} - -m["sbr-prk"] = { - canonicalName = "Sabari", - otherNames = {"Sabari Prakrit"}, - parent = "sa", -} - -m["abh-prk"] = { - canonicalName = "Abhiri", - otherNames = {"Abhiri Prakrit"}, - parent = "sa", -} - -m["drm-prk"] = { - canonicalName = "Dramili", - otherNames = {"Dramili Prakrit"}, - parent = "sa", -} - -m["odr-prk"] = { - canonicalName = "Odri", - otherNames = {"Odri Prakrit"}, - parent = "sa", -} - - --- Italian, Latin and other Italic varieties - -m["it-oit"] = { - canonicalName = "Old Italian", - parent = "it", - wikidata_item = "Q652", -} -m["roa-oit"] = m["it-oit"] - --- Latin varieties by period - -m["la-lat"] = { - canonicalName = "Late Latin", - parent = "la", - wikidata_item = "Q1503113", -} -m["Late Latin"] = m["la-lat"] -m["LL."] = m["la-lat"] -m["LL"] = m["la-lat"] - -m["la-vul"] = { - canonicalName = "Vulgar Latin", - parent = "la", - wikidata_item = "Q37560", -} -m["Vulgar Latin"] = m["la-vul"] -m["VL."] = m["la-vul"] - -m["la-med"] = { - canonicalName = "Medieval Latin", - parent = "la", - wikidata_item = "Q1163234", -} -m["Medieval Latin"] = m["la-med"] -m["ML."] = m["la-med"] -m["ML"] = m["la-med"] - -m["la-ecc"] = { - canonicalName = "Ecclesiastical Latin", - otherNames = {"Church Latin"}, - parent = "la", - wikidata_item = "Q1247932", -} -m["Ecclesiastical Latin"] = m["la-ecc"] -m["EL."] = m["la-ecc"] - -m["la-ren"] = { - canonicalName = "Renaissance Latin", - parent = "la", - wikidata_item = "Q499083", -} -m["Renaissance Latin"] = m["la-ren"] -m["RL."] = m["la-ren"] - -m["la-new"] = { - canonicalName = "New Latin", - otherNames = {"Modern Latin"}, - parent = "la", - wikidata_item = "Q1248221", -} -m["New Latin"] = m["la-new"] -m["NL."] = m["la-new"] - --- other Italic lects - -m["osc-luc"] = { - canonicalName = "Lucanian", - parent = "osc", -} - -m["osc-sam"] = { - canonicalName = "Samnite", - parent = "osc", -} - -m["xum-her"] = { - canonicalName = "Hernician", - parent = "xum", -} - - --- Mongolic lects - -m["mn-kha"] = { - canonicalName = "Khalkha Mongolian", - parent = "mn", - wikidata_item = "Q6399808", -} - -m["mn-ord"] = { - canonicalName = "Ordos Mongolian", - parent = "mn", - wikidata_item = "Q716904", -} - -m["mn-cha"] = { - canonicalName = "Chakhar Mongolian", - parent = "mn", - wikidata_item = "Q907425", -} - -m["mn-khr"] = { - canonicalName = "Khorchin Mongolian", - parent = "mn", - wikidata_item = "Q3196210", -} - -m["mjg-huz"] = { - canonicalName = "Mongghul", - otherNames = {"Huzhu Monguor"}, - parent = "mjg", - wikidata_item = "Q34214", -} - -m["mjg-min"] = { - canonicalName = "Mangghuer", - otherNames = {"Minhe Monguor"}, - parent = "mjg", - wikidata_item = "Q34214", -} - - --- Phillipine varieties - -m["tl-old"] = { - canonicalName = "Old Tagalog", - parent = "tl", - wikidata_item = "Q12967437", -} - -m["tl-cls"] = { - canonicalName = "Classical Tagalog", - parent = "tl", -} - - --- Pre-Roman substrates - -m["qfa-sub-ibe"] = { - canonicalName = "a pre-Roman substrate of Iberia", - parent = "qfa-sub", - wikidata_item = "Q530799", -} - -m["qfa-sub-bal"] = { - canonicalName = "a pre-Roman substrate of the Balkans", - parent = "qfa-sub", -} - - --- Slavic varieties - -m["zle-oru"] = { - canonicalName = "Old Russian", - parent = "orv", - wikidata_item = "Q35228", -} - -m["zle-obe"] = { - canonicalName = "Old Belarusian", - parent = "orv", - wikidata_item = "Q13211", -} - -m["zle-ouk"] = { - canonicalName = "Old Ukrainian", - parent = "orv", - wikidata_item = "Q13211", -} - - --- Turkic lects - -m["trk-cmn"] = { - canonicalName = "Common Turkic", - parent = "trk-pro", - wikidata_item = "Q1126028", -} - -m["otk-kir"] = { - canonicalName = "Old Kirghiz", - parent = "otk", - wikidata_item = "Q83142", -} - -m["klj-arg"] = { - canonicalName = "Arghu", - parent = "klj", - wikidata_item = "Q33455", -} - - --- Other lects - -m["alv-kro"] = { - canonicalName = "Kromanti", - parent = "crp", - wikidata_item = "Q1093206", -} - -m["bat-pro"] = { - canonicalName = "Proto-Baltic", - parent = "ine-bsl-pro", - wikidata_item = "Q1703347", -} - -m["es-lun"] = { - canonicalName = "Lunfardo", - parent = "es", - wikidata_item = "Q1401612", -} -m["Lunfardo"] = m["es-lun"] - -m["fiu-pro"] = { - canonicalName = "Proto-Finno-Ugric", - parent = "urj-pro", - wikidata_item = "Q79890", -} - -m["gem-sue"] = { - canonicalName = "Suevic", - otherNames = {"Suebian"}, - parent = "gmw", - wikidata_item = "Q155085", -} - -m["mkh-okm-A"] = { - canonicalName = "Angkorian Old Khmer", - parent = "mkh-okm", - wikipedia_article = "Khmer language#Historical periods", - wikidata_item = "Q9205", -} - -m["mkh-okm-P"] = { - canonicalName = "Pre-Angkorian Old Khmer", - parent = "mkh-okm", - wikipedia_article = "Khmer language#Historical periods", - wikidata_item = "Q9205", -} - -m["mul-tax"] = { - canonicalName = "taxonomic name", - parent= "mul", -} -m["Tax."] = m["mul-tax"] - -m["prv"] = { - canonicalName = "Provençal", - parent = "oc", - wikidata_item = "Q241243", -} - -m["qfa-pyg"] = { - canonicalName = "a substrate language originally spoken by the Pygmies", - parent = "qfa-sub", - wikipedia_article = "Classification of Pygmy languages#Original Pygmy language(s)", -} -m["pygmy"] = m["qfa-pyg"] - -m["tai-shz"] = { - canonicalName = "Shangsi Zhuang", - parent = "za", - wikidata_item = "Q13216", -} - -m["und-tdl"] = { - canonicalName = "Turduli", - parent = "und", - wikipedia_article = "Turduli", -} - -m["und-tdt"] = { - canonicalName = "Turdetani", - parent = "und", - wikipedia_article = "Turdetani", -} - -m["und-xbi"] = { - canonicalName = "Xianbei", - parent = "und", - wikipedia_article = "Xianbei", -} - -m["und-xnu"] = { - canonicalName = "Xiongnu", - parent = "und", - wikipedia_article = "Xiongnu", -} - -m["urj-fpr-pro"] = { - canonicalName = "Proto-Finno-Permic", - parent = "urj-pro", -} - -m["woy"] = { - canonicalName = "Weyto", - parent = "und", - wikidata_item = "Q3915918", -} - - -return m \ No newline at end of file diff --git a/wikt/translit/etymology-languages.lua b/wikt/translit/etymology-languages.lua deleted file mode 100644 index fe83de8..0000000 --- a/wikt/translit/etymology-languages.lua +++ /dev/null @@ -1,112 +0,0 @@ -local export = {} - -local EtymologyLanguage = {} - - -function EtymologyLanguage:getCode() - return self._code -end - - -function EtymologyLanguage:getCanonicalName() - return self._rawData.canonicalName -end - - -function EtymologyLanguage:getOtherNames() - return self._rawData.otherNames or {} -end - - ---function EtymologyLanguage:getAllNames() --- return self._rawData.names ---end - - -function EtymologyLanguage:getCategoryName() - return self:getCanonicalName() -end - - -function EtymologyLanguage:getType() - return "etymology language" -end - - -function EtymologyLanguage:getParentCode() - return self._rawData.parent -end - - -function EtymologyLanguage:getAncestors() - if not self._ancestorObjects then - self._ancestorObjects = {} - - for _, ancestor in ipairs(self._rawData.ancestors or {}) do - table.insert(self._ancestorObjects, export.getByCode(ancestor) or require("Module:languages").getByCode(ancestor)) - end - end - - return self._ancestorObjects -end - -function EtymologyLanguage:getWikidataItem() - return self._rawData.wikidata_item -end - -function EtymologyLanguage:getWikipediaArticle() - return self._rawData.wikipedia_article or - (self:getWikidataItem() and mw.wikibase and - mw.wikibase.sitelink(self:getWikidataItem(), 'enwiki')) or - self._rawData.canonicalName -end - -function EtymologyLanguage:makeWikipediaLink() - return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]" -end - - -function EtymologyLanguage:toJSON() - local ret = { - canonicalName = self:getCanonicalName(), - categoryName = self:getCategoryName(), - code = self._code, - otherNames = self:getOtherNames(), - parent = self._rawData.parent, - type = self:getType(), - } - - return require("Module:JSON").toJSON(ret) -end - - -function EtymologyLanguage:getRawData() - return self._rawData -end - - -EtymologyLanguage.__index = EtymologyLanguage - - -function export.makeObject(code, data) - return data and setmetatable({ _rawData = data, _code = code }, EtymologyLanguage) or nil -end - - -function export.getByCode(code) - return export.makeObject(code, mw.loadData("Module:etymology languages/data")[code]) -end - - -function export.getByCanonicalName(name) - local code = mw.loadData("Module:etymology languages/by name")[name] - - if not code then - return nil - end - - return export.makeObject(code, mw.loadData("Module:etymology languages/data")[code]) -end - - -return export \ No newline at end of file diff --git a/wikt/translit/eve-translit.lua b/wikt/translit/eve-translit.lua deleted file mode 100644 index 703a2c9..0000000 --- a/wikt/translit/eve-translit.lua +++ /dev/null @@ -1,110 +0,0 @@ --- This module will transliterate Even language text. --- Language code: eve. - -local export = {} - -local u = mw.ustring.char - -local MACRON = u(0x0304) -local DOTABOVE = u(0x0307) - -local tab = { - ["А"]='A', ["а"]='a', ["Б"]='B', ["б"]='ʙ', ["В"]='W', ["в"]='w', - ["Е"]='E', ["е"]='e', ["Ё"]='Jo', ["ё"]='jo', ["Г"]='G', ["г"]='g', - ["Д"]='D', ["д"]='d', ["И"]='I', ["и"]='i', ["Ӣ"]='Ī', ["ӣ"]='ī', - ["Й"]='J', ["й"]='j', ["К"]='K', ["к"]='k', ["Л"]='L', ["л"]='l', - ["М"]='M', ["м"]='m', ["Н"]='N', ["н"]='n', ["Ӈ"]='Ŋ', ["ӈ"]='ŋ', - ["О"]='O', ["о"]='o', ["Ө"]='Ö', ["ө"]='ö', ["Ӫ"]='Ö', ["ӫ"]='ö', - ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r', ["С"]='S', ["с"]='s', - ["Т"]='T', ["т"]='t', ["У"]='U', ["у"]='u', ["Ӯ"]='Ū', ["ӯ"]='ū', - ["Ф"]='F', ["ф"]='f', ["Х"]='H', ["х"]='h', ["Ч"]='C', ["ч"]='c', - ["Ы"]='I', ["ы"]='i', ["Э"]='Ə', ["э"]='ə', ["Ю"]='Ju', ["ю"]='ju', - ["Я"]='Ẹ', ["я"]='ẹ', -- Not present in the original latinisation - -- non-native letters - ["Ж"]='Z', ["ж"]='z', ["З"]='Z', ["з"]='z', - ["Ц"]='C', ["ц"]='c', ["Ш"]='S', ["ш"]='s', ["Щ"]='S', ["щ"]='s', - ['Ъ']='ʺ', ['ъ']='ʺ', ["Ь"]="’", ["ь"]="’", - - -- non-standard letters - ["Ҕ"]='Γ', ["ҕ"]='γ', ["Ҥ"]='Ŋ', ["ҥ"]='ŋ', ["Ү"]='Ü', ["ү"]='ü', - ["Һ"]='Ḥ', ["һ"]='ḥ', ["Ӄ"]='Q', ["ӄ"]='q' -} - - -local iotatedTranslit = { - ["е"] = "je", - ["я"] = "ja", - ["и"] = "ji", - ["ӣ"] = "jī", - ["Е"] = "Je", - ["Я"] = "Ja", -} - -local replacements = { - { "Ё", 'Jo' }, - { "ё", 'jo' }, - { "Ӫ", 'Jö' }, - { "ӫ", 'jö' }, - { "Ю", 'Ju' }, - { "ю", 'ju' }, - - -- Unfortunately the Cyrillic alphabet doesn't distinguish between ʒe and ʒə - { "Де", 'Ʒe' }, - { "де", 'ʒe' }, - { "Не", 'Ņe' }, - { "не", 'ņe' }, - - { "Ди", 'Ʒi' }, - { "ди", 'ʒi' }, - { "Ни", 'Ņi' }, - { "ни", 'ņi' }, - - { "Дя", 'Ʒa' }, - { "дя", 'ʒa' }, - { "Ня", 'Ņa' }, - { "ня", 'ņa' }, - - { "Дj", 'Ʒ' }, - { "дj", 'ʒ' }, - { "Нj", 'Ņ' }, - { "нj", 'ņ' }, - - -- The following is non-standard but supposedly used in non-standard spelling and dialect forms - { "Дь", 'Ʒ' }, - { "дь", 'ʒ' }, - { "Нь", 'Ņ' }, - { "нь", 'ņ' }, - - { "Нг", 'Ŋ' }, - { "нг", 'ŋ' }, -} - -function export.tr(text, lang, sc) - local ugsub, str_gsub = mw.ustring.gsub, string.gsub - local UTF8char = '[\1-\127\194-\244][\128-\191]*' - - for i, replacement in ipairs(replacements) do - text = str_gsub(text, unpack(replacement)) - end - - -- е after a vowel or at the beginning of a word becomes ye - -- Again, the Cyrillic alphabet doesn't distinguish between je and jə - text = ugsub(text, - "([АОУЫЯЕИӢЪЬаӣиоуыэяеъьaeioöu]["..MACRON..DOTABOVE.."]?)([еяиӣ])", - function(preceding, iotated) - return preceding .. iotatedTranslit[iotated] - end) - - text = ugsub(text, "^[ЕеЯя]", iotatedTranslit) - - text = ugsub(text, "([^Ѐ-ӿ])([ЕеЯя])", - function(preceding, iotated) - return preceding .. iotatedTranslit[iotated] - end) - - text = str_gsub(text, UTF8char, tab) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/evn-translit.lua b/wikt/translit/evn-translit.lua deleted file mode 100644 index d195cfe..0000000 --- a/wikt/translit/evn-translit.lua +++ /dev/null @@ -1,129 +0,0 @@ --- This module will transliterate Evenki language text per WT:EVN TR. --- Language code: evn - -local u = mw.ustring.char - -local MACRON = u(0x0304) -local DOTABOVE = u(0x0307) -local DOTBELOW = u(0x0323) - -local str_gsub, ugsub = string.gsub, mw.ustring.gsub -local UTF8char = '[\1-\127\194-\244][\128-\191]*' - -local export = {} - -local tab = { - ["А"]='A', ["а"]='a', ["В"]='W', ["в"]='w', ["Е"]='E', ["е"]='e', - ["Ё"]='Jo', ["ё"]='jo', ["Г"]='G', ["г"]='g', ["Д"]='D', ["д"]='d', - ["И"]='I', ["и"]='i', ["Ӣ"]='Ī', ["ӣ"]='ī', ["Й"]='J', ["й"]='j', - ["К"]='K', ["к"]='k', ["Л"]='L', ["л"]='l', ["М"]='M', ["м"]='m', - ["Н"]='N', ["н"]='n', ["Ӈ"]='Ŋ', ["ӈ"]='ŋ', ["О"]='O', ["о"]='o', - ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r', ["С"]='S', ["с"]='s', - ["Т"]='T', ["т"]='t', ["У"]='U', ["у"]='u', ["Ӯ"]='Ū', ["ӯ"]='ū', - ["Ф"]='F', ["ф"]='f', ["Х"]='H', ["х"]='h', ["Ч"]='Ç', ["ч"]='ç', - ["Ы"]='I', ["ы"]='i', ["Э"]='Ə', ["э"]='ə', ["Ю"]='Ju', ["ю"]='ju', - ["Я"]='Ja', ["я"]='ja', - -- non-native letters - ["Б"]='B', ["б"]='b', ["Ж"]='Z', ["ж"]='z', ["З"]='Z', ["з"]='z', - ["Ц"]='C', ["ц"]='c', ["Ш"]='Ş', ["ш"]='ş', ["Щ"]='Ş', ["щ"]='ş', --in literary language ш is only found in Russian words and was originally represented with s, however some dialects have ш in native words - ['Ъ']='ʺ', ['ъ']='ʺ', ["Ь"]="’", ["ь"]="’" -} - -local other = { - { 'Я', 'Ja' }, - { 'я', 'ja' }, - { 'Ё', 'Jo' }, - { 'ё', 'jo' }, - { 'Ю', 'Ju' }, - { 'ю', 'ju' }, - --- Unfortunately the Cyrillic alphabet doesn't distinguish between ʒe and ʒə - { 'Де', 'Ʒe' }, - { 'де', 'ʒe' }, - { 'Не', 'Ņe' }, - { 'не', 'ņe' }, - - { 'Ди', 'Ʒi' }, - { 'ди', 'ʒi' }, - { 'Ни', 'Ņi' }, - { 'ни', 'ņi' }, - - { 'Дӣ', 'Ʒī' }, - { 'дӣ', 'ʒī' }, - { 'Нӣ', 'Ņī' }, - { 'нӣ', 'ņī' }, - - { 'Дj', 'Ʒ' }, - { 'дj', 'ʒ' }, - { 'Нj', 'Ņ' }, - { 'нj', 'ņ' }, -} - -function export.tr(text, lang, sc) - for i, replacement in ipairs(other) do - text = str_gsub(text, unpack(replacement)) - end - - -- е after a vowel or at the beginning of a word becomes ye - -- Again, the Cyrillic alphabet doesn't distinguish between je and jə - text = ugsub(text, - "([АОУЫЕИӢЪЬаӣиоуыэеъьaeiou]["..MACRON..DOTABOVE..DOTBELOW.."]?)е", - "%1je") - text = ugsub(text, - "([АОУЫЕИӢЪЬаӣиоуыэеъьaeiou]["..MACRON..DOTABOVE..DOTBELOW.."]?)и", - "%1ji") - text = ugsub(text, - "([АОУЫЕИӢЪЬаӣиоуыэеъьaeiou]["..MACRON..DOTABOVE..DOTBELOW.."]?)ӣ", - "%1jī") - text = str_gsub(text, "^Е","Je") - text = str_gsub(text, "^е","je") - text = ugsub(text, "([^Ѐ-ӿ])Е","%1Je") - text = ugsub(text, "([^Ѐ-ӿ])е","%1je") - - return (str_gsub(text, UTF8char, tab)) -end - -return export - --- All tests passed. (refresh) - --- test_translit: --- Text Expected Actual --- Passed тэкэ̄ты̄мӣ təkə̄tīmī təkə̄tīmī --- Passed тэгэен təgəjen təgəjen --- Passed тыеденэ tijeʒenə tijeʒenə --- Passed надан-дя̄гӣ nadan-ʒāgī nadan-ʒāgī --- Passed мушняла̄бӯн muşņalābūn muşņalābūn --- Passed саӈняпчамӣ saŋņapçamī saŋņapçamī --- Passed эӣмкӯн əjīmkūn əjīmkūn --- Passed эюргэ əjurgə əjurgə --- Passed дё̄нчамӣ ʒōnçamī ʒōnçamī --- Passed тырганӣ tirgaņī tirgaņī --- local tests = require("Module:UnitTests") --- local evn_translit = require("Module:evn-translit") - --- local compose = mw.ustring.toNFC - --- function tests:check_translit(Cyrl, Latn) --- self:equals( --- ('[[%s#Evenki|%s]]'):format(Cyrl, Cyrl), --- compose(evn_translit.tr(Cyrl, 'evn', 'Cyrl')), --- compose(Latn)) --- end - --- function tests:test_translit() --- self:check_translit("тэкэ̄ты̄мӣ", "təkə̄tīmī") --- self:check_translit("тэгэен", "təgəjen") --- self:check_translit("тыеденэ", "tijeʒenə") --- self:check_translit("надан-дя̄гӣ", "nadan-ʒāgī") --- self:check_translit("мушняла̄бӯн", "muşņalābūn") --- self:check_translit("саӈняпчамӣ", "saŋņapçamī") --- self:check_translit("эӣмкӯн", "əjīmkūn") --- self:check_translit("эюргэ", "əjurgə") --- self:check_translit("дё̄нчамӣ", "ʒōnçamī") --- self:check_translit("тырганӣ", "tirgaņī") - - --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/fa-translit.lua b/wikt/translit/fa-translit.lua deleted file mode 100644 index dfa63ec..0000000 --- a/wikt/translit/fa-translit.lua +++ /dev/null @@ -1,142 +0,0 @@ --- This module will transliterate Persian language text per WT:FA TR. --- Language code: pal or xmn - -local export = {} - -local U = mw.ustring.char - -local fatHatan = U(0x64B) -- What is the Persian term for this? -local fathe = U(0x64E) -- also zabar -local zamme = U(0x64F) -- also zir -local kasre = U(0x650) -- also piš -local tashdid = U(0x651) -- also called shadda -local jazm = U(0x652) - - -local mapping = { - ["ا"] = 'â', ["ب"] = 'b', ["پ"] = 'p', ["ت"] = 't', ["ث"] = 's', ["ج"] = 'j', ["چ"] = 'č', ["ح"] = 'h', ["خ"] = 'x', - ["د"] = 'd', ["ذ"] = 'z', ["ر"] = 'r', ["ز"] = 'z', ["ژ"] = 'ž', ["س"] = 's', ["ش"] = 'š', ["ص"] = 's', ["ض"] = 'z', - ["ط"] = 't', ["ظ"] = 'z', ["غ"] = 'ğ', ["ف"] = 'f', ["ق"] = 'q', ["ک"] = 'k', ["گ"] = 'g', ["ل"] = 'l', - ["م"] = 'm', ["ن"] = 'n', ["و"] = 'u', ["ه"] = 'h', ["ی"] = 'i', ["آ"] = 'â', - - -- displaying on separate lines as the viewing becomes distorted on these combinations - ["ع"] = "’", - ["ء"] = "’", - ["ئ"] = "’", - ["ؤ"] = "’", - ["أ"] = "’", - - -- diacritics - [fathe] = "a", - [kasre] = "e", - [zamme] = "o", - [jazm] = "", -- also sokun - no vowel - [U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner) - [fatHatan] = "n", - -- ligatures - ["ﻻ"] = "lâ", - ["ﷲ"] = "llâh", - -- kashida - ["ـ"] = "", -- kashida, no sound - -- numerals - ["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5", - ["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0", - -- normal arabic variants to numerals - ["١"] = "1", ["٢"] = "2", ["٣"] = "3", ["٤"] = "4", ["٥"] = "5", - ["٦"] = "6", ["٧"] = "7", ["٨"] = "8", ["٩"] = "9", ["٠"] = "0", - -- punctuation (leave on separate lines) - ["؟"] = "?", -- question mark - ["،"] = ",", -- comma - ["؛"] = ";", -- semicolon - ["«"] = '“', -- quotation mark - ["»"] = '”', -- quotation mark - ["٪"] = "%", -- percent - ["؉"] = "‰", -- per mille - ["٫"] = ".", -- decimals - ["٬"] = ",", -- thousand - ["ۀ"] = "-ye" -- he ye (in ezâfe) -} - -function export.tr(text, lang, sc) - - text = mw.ustring.gsub(text, 'ىٰ', "â") - text = mw.ustring.gsub(text, 'ا' .. fatHatan, "an") - -- text = mw.ustring.gsub(text, 'الله', "ﷲ") - -- text = mw.ustring.gsub(text, 'لا', "ﻻ") - text = mw.ustring.gsub(text, '.', mapping) - text = mw.ustring.gsub(text, 'ou', "u") - text = mw.ustring.gsub(text, 'aâ', "â") - text = mw.ustring.gsub(text, 'âa', "a") - text = mw.ustring.gsub(text, 'ei', "i") - text = mw.ustring.gsub(text, 'ai', "ey") - text = mw.ustring.gsub(text, 'au', "ou") - text = mw.ustring.gsub(text, 'u([aâeiou])', "v%1") - text = mw.ustring.gsub(text, 'i([aâeiou])', "y%1") - text = mw.ustring.gsub(text, "([aâeiou])(" .. tashdid .. ")", "%2%1") -- swapping tašdid with vowels - text = mw.ustring.gsub(text, "(.)" .. tashdid, "%1%1") -- implementing tašdid - text = mw.ustring.gsub(text, 'eh$', "e") - text = mw.ustring.gsub(text, 'eh([^aâeiouy’bdfghjklmnpqrstvyxzčğšž])', "e%1") - - return text -end - -return export - - --- 7 tests failed. (refresh) - --- test_translit_persian: --- Text Expected Actual Differs at --- Failed سَرانجام‎ saranjâm sarânjâm 4 --- Failed سَرانْجام‎ saranjâm sarânjâm 4 --- Failed سَرَانْجَام‎ saranjâm sarânjâm 4 --- Passed کُروز‎ koruz koruz --- Failed کُرُوز‎ korouz koruz 4 --- Passed طَنین‎ tanin tanin --- Failed طَنِین‎ taneyn tanin 4 --- Passed عَصاً‎ ’asan ’asan --- Failed خانه‎ xâne xânh 4 --- Passed خانِه‎ xâne xâne --- Passed کُرِۀ شُمالی‎ kore-ye šomâli kore-ye šomâli --- Failed ضَمّه‎ zamme zammh 5 --- Passed ضَمِّه‎ zamme zamme --- Passed وُدکا‎ vodkâ vodkâ --- Passed اَرمَنِستان‎ armanestân armanestân --- Passed باکو‎ bâku bâku --- -- Unit tests for [[Module:fa-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local fa_translit = require('Module:fa-translit') --- local m_links = require('Module:links') - --- local fa = require('Module:languages').getByCode('fa') --- local function link(term) --- return m_links.full_link{term = term, lang = fa} --- end - --- function tests:do_test_translit(Pers, roman, xlit) --- self:equals(link(Pers), fa_translit.tr(Pers, 'fa', 'fa-Arab'), roman) --- end - --- function tests:test_translit_persian() --- local examples = { --- { 'سَرانجام', "saranjâm" }, --- { 'سَرانْجام', "saranjâm" }, --- { 'سَرَانْجَام', "saranjâm" }, --- { 'کُروز', "koruz" }, --- { 'کُرُوز', "korouz" }, --- { 'طَنین', "tanin" }, --- { 'طَنِین', "taneyn" }, --- { 'عَصاً', "’asan" }, --- { 'خانه', "xâne" }, --- { 'خانِه', "xâne" }, --- { 'کُرِۀ شُمالی', "kore-ye šomâli" }, --- { 'ضَمّه', "zamme" }, --- { 'ضَمِّه', "zamme" }, --- { 'وُدکا', "vodkâ" }, --- { 'اَرمَنِستان', "armanestân" }, --- { 'باکو', "bâku" }, --- } --- self:iterate(examples, "do_test_translit") --- end - --- return tests diff --git a/wikt/translit/families-data.lua b/wikt/translit/families-data.lua deleted file mode 100644 index 6b8c556..0000000 --- a/wikt/translit/families-data.lua +++ /dev/null @@ -1,2189 +0,0 @@ ---[=[ - This module contains definitions for all language family codes on Wiktionary. -]=]-- - -local m = {} - -m["aav"] = { - canonicalName = "Austro-Asiatic", - otherNames = {"Austroasiatic"}, - wikidata_item = "Q33199", -} - -m["aav-ban"] = { - canonicalName = "Bahnaric", - family = "mkh", - wikidata_item = "Q56309", -} - -m["aav-khm"] = { - canonicalName = "Khmuic", - family = "aav", - wikidata_item = "Q1323245", -} - -m["aav-nbn"] = { - canonicalName = "North Bahnaric", - family = "aav-ban", - wikidata_item = "Q56309", -} - -m["aav-pal"] = { - canonicalName = "Palaungic", - family = "aav", - wikidata_item = "Q2391173", -} - -m["afa"] = { - canonicalName = "Afro-Asiatic", - otherNames = {"Afroasiatic"}, - wikidata_item = "Q25268", -} - -m["alg"] = { - canonicalName = "Algonquian", - family = "aql", - wikidata_item = "Q33392", -} - -m["alv"] = { - canonicalName = "Atlantic-Congo", - family = "nic", - wikidata_item = "Q771124", -} - -m["alv-edo"] = { - canonicalName = "Edoid", - family = "alv-von", - wikidata_item = "Q1287469", -} - -m["alv-kwa"] = { - canonicalName = "Kwa", - family = "alv", - wikidata_item = "Q33430", -} - -m["alv-mbm"] = { - canonicalName = "Mbum", - family = "alv-sav", - wikidata_item = "Q6799814", -} - -m["alv-sav"] = { - canonicalName = "Savanna", - family = "alv", - wikidata_item = "Q4403672", -} - -m["alv-sng"] = { - canonicalName = "Senegambian", - family = "alv", - wikidata_item = "Q1708753", -} - -m["alv-von"] = { - canonicalName = "Volta-Niger", - family = "alv", - wikidata_item = "Q34177", -} - -m["apa"] = { - canonicalName = "Apachean", - otherNames = {"Southern Athabaskan"}, - family = "ath", - wikidata_item = "Q27758", -} - -m["aqa"] = { - canonicalName = "Alacalufan", - wikidata_item = "Q1288430", -} - -m["aql"] = { - canonicalName = "Algic", - otherNames = {"Algonquian-Ritwan", "Algonquian-Wiyot-Yurok"}, - wikidata_item = "Q721612", -} - -m["art"] = { - canonicalName = "constructed", - otherNames = {"artificial", "planned"}, - family = "qfa-not", - wikidata_item = "Q33215", -} - -m["ath"] = { - canonicalName = "Athabaskan", - family = "xnd", - wikidata_item = "Q27475", -} - -m["ath-nor"] = { - canonicalName = "North Athabaskan", - otherNames = {"Northern Athabaskan"}, - family = "ath", -} - -m["ath-pco"] = { - canonicalName = "Pacific Coast Athabaskan", - family = "ath", - wikidata_item = "Q20654", -} - -m["auf"] = { - canonicalName = "Arauan", - otherNames = {"Arahuan", "Arauán", "Arawa", "Arawan", "Arawán"}, - wikidata_item = "Q626772", -} - ---[=[ Exceptional language and family codes for Australian Aboriginal languages - can use the prefix "aus-", though "aus" is no longer itself a family code. ]=]-- -m["aus-arn"] = { - canonicalName = "Arnhem", - otherNames = {"Gunwinyguan", "Macro-Gunwinyguan"}, - wikidata_item = "Q2581700", -} - -m["aus-bub"] = { - canonicalName = "Bunuban", - otherNames = {"Bunaban"}, - wikidata_item = "Q2495148", -} - -m["aus-cww"] = { - canonicalName = "Central New South Wales", - family = "aus-pam", - wikidata_item = "Q5061507", -} - -m["aus-dal"] = { - canonicalName = "Daly", - wikidata_item = "Q2478079", -} - -m["aus-dyb"] = { - canonicalName = "Dyirbalic", - family = "aus-pam", - wikidata_item = "Q1850666", -} - -m["aus-gar"] = { - canonicalName = "Garawan", - wikidata_item = "Q5521951", -} - -m["aus-gun"] = { - canonicalName = "Gunwinyguan", - otherNames = {"Gunwingguan"}, - family = "aus-arn", - wikidata_item = "Q2581700", -} - -m["aus-kar"] = { - canonicalName = "Karnic", - family = "aus-pam", - wikidata_item = "Q4215578", -} - -m["aus-nga"] = { - canonicalName = "Ngayarda", - family = "aus-psw", - wikidata_item = "Q16153490", -} - -m["aus-nyu"] = { - canonicalName = "Nyulnyulan", - wikidata_item = "Q2039408", -} - -m["aus-pam"] = { - canonicalName = "Pama-Nyungan", - wikidata_item = "Q33942", -} - -m["aus-pmn"] = { - canonicalName = "Paman", - family = "aus-pam", - wikidata_item = "Q2640654", -} - -m["aus-psw"] = { - canonicalName = "Southwest Pama-Nyungan", - family = "aus-pam", - wikidata_item = "Q2258160", -} - -m["aus-rnd"] = { - canonicalName = "Arandic", - family = "aus-pam", - wikidata_item = "Q4784071", -} - -m["aus-tnk"] = { - canonicalName = "Tangkic", - wikidata_item = "Q1823065", -} - -m["aus-wdj"] = { - canonicalName = "Iwaidjan", - otherNames = {"Yiwaidjan"}, - wikidata_item = "Q4196968", -} - -m["aus-yid"] = { - canonicalName = "Yidinyic", - family = "aus-pam", - wikidata_item = "Q4205849", -} - -m["aus-yol"] = { - canonicalName = "Yolngu", - otherNames = {"Yolŋu", "Yolngu Matha"}, - family = "aus-pam", - wikidata_item = "Q2511254", -} - -m["aus-yuk"] = { - canonicalName = "Yuin-Kuric", - family = "aus-pam", - wikidata_item = "Q3833021", -} - -m["awd"] = { - canonicalName = "Arawakan", - otherNames = {"Arawak", "Maipurean", "Maipuran"}, - wikidata_item = "Q626753", -} - -m["awd-nwk"] = { - canonicalName = "Nawiki", - otherNames = {"Newiki"}, - family = "awd", -} - -m["awd-taa"] = { - canonicalName = "Ta-Arawakan", - otherNames = {"Ta-Arawak", "Ta-Maipurean"}, - family = "awd", - wikidata_item = "Q7672731", -} - -m["azc"] = { - canonicalName = "Uto-Aztecan", - otherNames = {"Uto-Aztekan"}, - wikidata_item = "Q34073", -} - -m["azc-cup"] = { - canonicalName = "Cupan", - family = "azc-tak", - wikidata_item = "Q19866871", -} - -m["azc-nah"] = { - canonicalName = "Nahuan", - otherNames = {"Aztecan"}, - family = "azc", - wikidata_item = "Q11965602", -} - -m["azc-num"] = { - canonicalName = "Numic", - family = "azc", - wikidata_item = "Q2657541", -} - -m["azc-tak"] = { - canonicalName = "Takic", - family = "azc", - wikidata_item = "Q1280305", -} - -m["azc-trc"] = { - canonicalName = "Taracahitic", - otherNames = {"Taracahitan"}, - family = "azc", - wikidata_item = "Q4245032", -} - -m["bad"] = { - canonicalName = "Banda", - family = "nic-ubg", - wikidata_item = "Q806234", -} - -m["bai"] = { - canonicalName = "Bamileke", - family = "nic-grf", - wikidata_item = "Q806005", -} - -m["bat"] = { - canonicalName = "Baltic", - family = "ine-bsl", - wikidata_item = "Q33136", -} - -m["ber"] = { - canonicalName = "Berber", - otherNames = {"Tamazight"}, - family = "afa", - wikidata_item = "Q25448", -} - -m["bnt"] = { - canonicalName = "Bantu", - family = "nic-bod", - wikidata_item = "Q33146", -} - -m["bnt-ngu"] = { - canonicalName = "Nguni", - otherNames = {"Ngoni"}, - family = "bnt", - wikidata_item = "Q961559", -} - -m["btk"] = { - canonicalName = "Batak", - family = "poz-nws", - wikidata_item = "Q1998595", -} - ---[=[ Exceptional language and family codes for Central American Indian languages - may use the prefix "cai-", though "cai" is no longer itself a family code. ]=]-- - ---[=[ Exceptional language and family codes for Caucasian languages can use - the prefix "cau-", though "cau" is no longer itself a family code. ]=]-- - -m["cau-abz"] = { - canonicalName = "Abkhaz-Abaza", - otherNames = {"Abazgi", "Abkhaz-Tapanta"}, - family = "cau-nwc", - wikidata_item = "Q4663617", -} - -m["cau-ava"] = { - canonicalName = "Avaro-Andian", - otherNames = {"Avar-Andian", "Avar-Andi", "Avar-Andic", "Andian"}, - family = "cau-nec", - wikidata_item = "Q4827766", -} - -m["cau-cir"] = { - canonicalName = "Circassian", - otherNames = {"Cherkess"}, - family = "cau-nwc", - wikidata_item = "Q858543", -} - -m["cau-drg"] = { - canonicalName = "Dargwa", - otherNames = {"Dargin"}, - family = "cau-nec", - wikidata_item = "Q5222637", -} - -m["cau-lzg"] = { - canonicalName = "Lezghian", - otherNames = {"Lezgi", "Lezgian", "Lezgic"}, - family = "cau-nec", - wikidata_item = "Q2144370", -} - -m["cau-nkh"] = { - canonicalName = "Nakh", - otherNames = {"North-Central Caucasian"}, - family = "cau-nec", - wikidata_item = "Q24441", -} - -m["cau-nec"] = { - canonicalName = "Northeast Caucasian", - otherNames = {"Dagestanian", "Nakho-Dagestanian", "Caspian"}, - family = "ccn", - wikidata_item = "Q27387", -} - -m["cau-nwc"] = { - canonicalName = "Northwest Caucasian", - otherNames = {"Abkhazo-Adyghean", "Abkhaz-Adyghe", "Pontic"}, - family = "ccn", - wikidata_item = "Q33852", -} - -m["cau-tsz"] = { - canonicalName = "Tsezian", - otherNames = {"Tsezic", "Didoic"}, - family = "cau-nec", - wikidata_item = "Q1651530", -} - -m["cba"] = { - canonicalName = "Chibchan", - family = "qfa-mch", -- or none if Macro-Chibchan is considered undemonstrated - wikidata_item = "Q520478", -} - -m["ccn"] = { - canonicalName = "North Caucasian", - wikidata_item = "Q33732", -} - -m["ccs"] = { - canonicalName = "Kartvelian", - otherNames = {"South Caucasian"}, - wikidata_item = "Q34030", -} - -m["ccs-gzn"] = { - canonicalName = "Georgian-Zan", - otherNames = {"Karto-Zan"}, - family = "ccs", - wikidata_item = "Q34030", -} - -m["ccs-zan"] = { - canonicalName = "Zan", - otherNames = {"Zanuri", "Colchian"}, - family = "ccs-gzn", - wikidata_item = "Q2606912", -} - -m["cdc"] = { - canonicalName = "Chadic", - family = "afa", - wikidata_item = "Q33184", -} - -m["cdc-cbm"] = { - canonicalName = "Central Chadic", - otherNames = {"Biu-Mandara"}, - family = "cdc", - wikidata_item = "Q2251547", -} - -m["cdc-est"] = { - canonicalName = "East Chadic", - family = "cdc", - wikidata_item = "Q2276221", -} - -m["cdc-mas"] = { - canonicalName = "Masa", - family = "cdc", - wikidata_item = "Q2136092", -} - -m["cdc-wst"] = { - canonicalName = "West Chadic", - family = "cdc", - wikidata_item = "Q2447774", -} - -m["cdd"] = { - canonicalName = "Caddoan", - wikidata_item = "Q1025090", -} - -m["cel"] = { - canonicalName = "Celtic", - family = "ine", - wikidata_item = "Q25293", -} - -m["cel-bry"] = { - canonicalName = "Brythonic", - family = "cel", - wikidata_item = "Q156877", -} - -m["cel-gae"] = { - canonicalName = "Goidelic", - otherNames = {"Gaelic"}, - protoLanguage = "pgl", - family = "cel", - wikidata_item = "Q56433", -} - -m["chi"] = { - canonicalName = "Chimakuan", - wikidata_item = "Q1073088", -} - -m["cmc"] = { - canonicalName = "Chamic", - family = "poz-mcm", - wikidata_item = "Q2997506", -} - -m["crp"] = { - canonicalName = "creole or pidgin", - family = "qfa-not", -} - -m["csu"] = { - canonicalName = "Central Sudanic", - family = "ssa", - wikidata_item = "Q190822", -} - -m["csu-sar"] = { - canonicalName = "Sara", - family = "csu", - wikidata_item = "Q2036691", -} - -m["cus"] = { - canonicalName = "Cushitic", - family = "afa", - wikidata_item = "Q33248", -} - -m["day"] = { - canonicalName = "Land Dayak", - family = "poz-bop", - wikidata_item = "Q2760613", -} - -m["del"] = { - canonicalName = "Lenape", - otherNames = {"Delaware"}, - family = "alg", - wikidata_item = "Q2665761", -} - -m["dmn"] = { - canonicalName = "Mande", - family = "nic", - wikidata_item = "Q33681", -} - -m["dra"] = { - canonicalName = "Dravidian", - wikidata_item = "Q33311", -} - -m["egx"] = { - canonicalName = "Egyptian", - protoLanguage = "egy", - family = "afa", - wikidata_item = "Q50868", -} - -m["esx"] = { - canonicalName = "Eskimo-Aleut", - wikidata_item = "Q25946", -} - -m["esx-esk"] = { - canonicalName = "Eskimo", - family = "esx", - wikidata_item = "Q25946", -} - -m["esx-inu"] = { - canonicalName = "Inuit", - family = "esx-esk", - wikidata_item = "Q27796", -} - -m["euq"] = { - canonicalName = "Vasconic", - wikidata_item = "Q4669240", -} - -m["fiu-fin"] = { - canonicalName = "Finnic", - family = "urj", - wikidata_item = "Q33328", -} - -m["gem"] = { - canonicalName = "Germanic", - family = "ine", - wikidata_item = "Q21200", -} - -m["gme"] = { - canonicalName = "East Germanic", - family = "gem", - wikidata_item = "Q108662", -} - -m["gmq"] = { - canonicalName = "North Germanic", - family = "gem", - wikidata_item = "Q106085", -} - -m["gmw"] = { - canonicalName = "West Germanic", - family = "gem", - wikidata_item = "Q26721", -} - -m["gmw-fri"] = { - canonicalName = "Frisian", - protoLanguage = "ofs", - family = "gmw", - wikidata_item = "Q25325", -} - -m["grk"] = { - canonicalName = "Hellenic", - otherNames = {"Greek"}, - family = "ine", - wikidata_item = "Q2042538", -} - -m["hmn"] = { - canonicalName = "Hmong", - family = "hmx", - wikidata_item = "Q3307894", -} - -m["hmx"] = { - canonicalName = "Hmong-Mien", - otherNames = {"Miao-Yao"}, - wikidata_item = "Q33322", -} - -m["hmx-mie"] = { - canonicalName = "Mien", - family = "hmx", - wikidata_item = "Q7992695", -} - -m["hok"] = { - canonicalName = "Hokan", - wikidata_item = "Q33406", -} - -m["hyx"] = { - canonicalName = "Armenian", - family = "ine", - wikidata_item = "Q8785", -} - -m["iir"] = { - canonicalName = "Indo-Iranian", - family = "ine", - wikidata_item = "Q33514", -} - -m["iir-nur"] = { - canonicalName = "Nuristani", - family = "iir", - wikidata_item = "Q161804", -} - -m["ijo"] = { - canonicalName = "Ijoid", - family = "nic", - wikidata_item = "Q1325759", -} - -m["inc"] = { - canonicalName = "Indo-Aryan", - otherNames = {"Indic"}, - family = "iir", - wikidata_item = "Q33577", -} - -m["inc-dar"] = { - canonicalName = "Dardic", - family = "inc", - wikidata_item = "Q161101", -} - -m["inc-pah"] = { - canonicalName = "Pahari", - otherNames = {"Pahadi"}, - family = "inc", - wikidata_item = "Q946077", -} - -m["inc-rom"] = { - canonicalName = "Romani", - otherNames = {"Romany", "Gypsy", "Gipsy"}, - protoLanguage = "rom", - family = "inc", - wikidata_item = "Q13201", -} - -m["ine"] = { - canonicalName = "Indo-European", - otherNames = {"Indo-Germanic"}, - wikidata_item = "Q19860", -} - -m["ine-ana"] = { - canonicalName = "Anatolian", - family = "ine", - wikidata_item = "Q147085", -} - -m["ine-bsl"] = { - canonicalName = "Balto-Slavic", - family = "ine", - wikidata_item = "Q147356", -} - -m["ine-toc"] = { - canonicalName = "Tocharian", - otherNames = {"Tokharian"}, - family = "ine", - wikidata_item = "Q37029", -} - -m["ira"] = { - canonicalName = "Iranian", - family = "iir", - wikidata_item = "Q33527", -} - -m["ira-eas"] = { - canonicalName = "Eastern Iranian", - family = "ira", - wikidata_item = "Q391015", -} - -m["ira-krm"] = { - canonicalName = "Kermanic", - otherNames = {"Kermanian", "Kermanic Iranian", "Kermanic Central Iranian", "Central Iranian"}, - family = "ira", - wikidata_item = "Q129850", -} - -m["ira-sak"] = { - canonicalName = "Sakan", - family = "ira", -} - -m["ira-sgc"] = { - canonicalName = "Sogdic", - otherNames = {"Sogdian"}, - family = "ira", -} - -m["ira-wes"] = { - canonicalName = "Western Iranian", - family = "ira", - wikidata_item = "Q129850", -} - -m["iro"] = { - canonicalName = "Iroquoian", - wikidata_item = "Q33623", -} - -m["itc"] = { - canonicalName = "Italic", - family = "ine", - wikidata_item = "Q131848", -} - -m["jpx"] = { - canonicalName = "Japonic", - otherNames = {"Japanese", "Japanese-Ryukyuan"}, - wikidata_item = "Q33612", -} - -m["jpx-ryu"] = { - canonicalName = "Ryukyuan", - family = "jpx", - wikidata_item = "Q56393", -} - -m["kar"] = { - canonicalName = "Karen", - family = "tbq", - wikidata_item = "Q1364815", -} - ---[=[ Exceptional language and family codes for Khoisan and Kordofanian languages can use - the prefix "khi-" and "kdo-" respectively, though they are no longer family codes themselves. ]=]-- - -m["khi-kho"] = { - canonicalName = "Khoe", - otherNames = {"Kwadi-Khoe", "Central Khoisan"}, - wikidata_item = "Q2736449", -} - -m["khi-kxa"] = { - canonicalName = "Kx'a", - otherNames = {"Kxa", "Ju-ǂHoan"}, - wikidata_item = "Q6450587", -} - -m["khi-tuu"] = { - canonicalName = "Tuu", - otherNames = {"Kwi", "Taa-Kwi", "Southern Khoisan", "Taa-ǃKwi", "Taa-ǃUi", "ǃUi-Taa"}, - wikidata_item = "Q631046", -} - -m["kro"] = { - canonicalName = "Kru", - family = "nic-bco", - wikidata_item = "Q33535", -} - -m["map"] = { - canonicalName = "Austronesian", - wikidata_item = "Q49228", -} - -m["map-ata"] = { - canonicalName = "Atayalic", - family = "map", - wikidata_item = "Q716610", -} - -m["mkh"] = { - canonicalName = "Mon-Khmer", - family = "aav", - wikidata_item = "Q33199", -} - -m["mkh-vie"] = { - canonicalName = "Vietic", - family = "mkh", - wikidata_item = "Q2355546", -} - -m["mno"] = { - canonicalName = "Manobo", - family = "phi", - wikidata_item = "Q3217483", -} - -m["mun"] = { - canonicalName = "Munda", - family = "aav", - wikidata_item = "Q33892", -} - -m["myn"] = { - canonicalName = "Mayan", - wikidata_item = "Q33738", -} - ---[=[ Exceptional language and family codes for North American Indian languages - can use the prefix "nai-", though "nai" is no longer itself a family code. ]=]-- -m["nai-cat"] = { - canonicalName = "Catawban", - family = "nai-sca", - wikidata_item = "Q3446638", -} - -m["nai-chu"] = { - canonicalName = "Chumashan", - wikidata_item = "Q1288420", -} - -m["nai-ckn"] = { - canonicalName = "Chinookan", - wikidata_item = "Q610586", -} - -m["nai-ker"] = { - canonicalName = "Keresan", - wikidata_item = "Q35878", -} - -m["nai-kta"] = { - canonicalName = "Kiowa-Tanoan", - wikidata_item = "Q386288", -} - -m["nai-len"] = { - canonicalName = "Lencan", - otherNames = {"Lenca"}, - wikidata_item = "Q36189", -} - -m["nai-mdu"] = { - canonicalName = "Maiduan", - wikidata_item = "Q33502", -} - -m["nai-miz"] = { - canonicalName = "Mixe-Zoquean", - otherNames = {"Mixe-Zoque"}, - wikidata_item = "Q954016", -} - -m["nai-min"] = { - canonicalName = "Misumalpan", - otherNames = {"Misuluan", "Misumalpa"}, - family = "qfa-mch", - wikidata_item = "Q281693", -} - -m["nai-mus"] = { - canonicalName = "Muskogean", - otherNames = {"Muskhogean"}, - wikidata_item = "Q902978", -} - -m["nai-pal"] = { - canonicalName = "Palaihnihan", - wikidata_item = "Q1288332", -} - -m["nai-pom"] = { - canonicalName = "Pomoan", - otherNames = {"Pomo", "Kulanapan"}, - family = "hok", - wikidata_item = "Q2618420", -} - -m["nai-sca"] = { - canonicalName = "Siouan-Catawban", - wikidata_item = "Q34181", -} - -m["nai-shp"] = { - canonicalName = "Sahaptian", - wikidata_item = "Q114782", -} - -m["nai-shs"] = { - canonicalName = "Shastan", - family = "hok", - wikidata_item = "Q2991735", -} - -m["nai-tot"] = { - canonicalName = "Totozoquean", - wikidata_item = "Q7828419", -} - -m["nai-ttn"] = { - canonicalName = "Totonacan", - otherNames = {"Totonac", "Totonac-Tepehua", "Totonacan-Tepehuan"}, - wikidata_item = "Q34039", -} - -m["nai-tqn"] = { - canonicalName = "Tequistlatecan", - otherNames = {"Tequistlatec", "Chontal", "Chontalan", "Oaxacan Chontal", "Chontal of Oaxaca"}, - family = "hok", - wikidata_item = "Q1754988", -} - -m["nai-utn"] = { - canonicalName = "Utian", - otherNames = {"Miwok-Costanoan", "Mutsun"}, - family = "nai-you", - wikidata_item = "Q13371763", -} - -m["nai-wtq"] = { - canonicalName = "Wintuan", - otherNames = {"Wintun"}, - wikidata_item = "Q1294259", -} - -m["nai-xin"] = { - canonicalName = "Xincan", - otherNames = {"Xinca"}, - wikidata_item = "Q1546494", -} - -m["nai-yok"] = { - canonicalName = "Yokutsan", - otherNames = {"Yokuts", "Mariposan", "Mariposa"}, - family = "nai-you", - wikidata_item = "Q34249", -} - -m["nai-you"] = { - canonicalName = "Yok-Utian", - wikidata_item = "Q2886186", -} - -m["nai-yuc"] = { - canonicalName = "Yuman-Cochimí", - wikidata_item = "Q579137", -} - -m["ngf"] = { - canonicalName = "Trans-New Guinea", - wikidata_item = "Q34018", -} - -m["ngf-fin"] = { - canonicalName = "Finisterre", - family = "ngf", - wikidata_item = "Q5450373", -} - -m["ngf-mad"] = { - canonicalName = "Madang", - family = "ngf", - wikidata_item = "Q11217556", -} - -m["ngf-okk"] = { - canonicalName = "Ok", - family = "ngf", - wikidata_item = "Q7081687", -} - -m["ngf-sbh"] = { - canonicalName = "South Bird's Head", - family = "ngf", - wikidata_item = "Q7566330", -} - -m["nic"] = { - canonicalName = "Niger-Congo", - otherNames = {"Niger-Kordofanian"}, - wikidata_item = "Q33838", -} - -m["nic-bco"] = { - canonicalName = "Benue-Congo", - family = "nic-vco", - wikidata_item = "Q33253", -} - -m["nic-bod"] = { - canonicalName = "Bantoid", - family = "nic-bco", - wikidata_item = "Q806992", -} - -m["nic-grf"] = { - canonicalName = "Grassfields", - otherNames = {"Grassfields Bantu", "Wide Grassfields"}, - family = "nic-bod", - wikidata_item = "Q750932", -} - -m["nic-gur"] = { - canonicalName = "Gur", - otherNames = {"Voltaic"}, - family = "alv-sav", - wikidata_item = "Q33536", -} - -m["nic-knj"] = { - canonicalName = "Kainji", - family = "nic-bco", - wikidata_item = "Q681495", -} - -m["nic-ubg"] = { - canonicalName = "Ubangian", - family = "nic-vco", -- or none - wikidata_item = "Q33932", -} - -m["nic-vco"] = { - canonicalName = "Volta-Congo", - family = "alv", - wikidata_item = "Q37228", -} - -m["nub"] = { - canonicalName = "Nubian", - wikidata_item = "Q1517194", -} - -m["omq"] = { - canonicalName = "Oto-Manguean", - wikidata_item = "Q33669", -} - -m["omq-cha"] = { - canonicalName = "Chatino", - family = "omq-zap", - wikidata_item = "Q35111", -} - -m["omq-chi"] = { - canonicalName = "Chinantecan", - family = "omq", - wikidata_item = "Q35828", -} - -m["omq-maz"] = { - canonicalName = "Mazatecan", - otherNames = {"Mazatec"}, - family = "omq", - wikidata_item = "Q36230", -} - -m["omq-mix"] = { - canonicalName = "Mixtecan", - family = "omq", - wikidata_item = "Q21996392", -} - -m["omq-pop"] = { - canonicalName = "Popolocan", - family = "omq", - wikidata_item = "Q5132273", -} - -m["omq-tri"] = { - canonicalName = "Trique", - otherNames = {"Triqui"}, - family = "omq-mix", - wikidata_item = "Q780200", -} - -m["omq-zap"] = { - canonicalName = "Zapotecan", - family = "omq", - wikidata_item = "Q8066463", -} - -m["omv"] = { - canonicalName = "Omotic", - family = "afa", - wikidata_item = "Q33860", -} - -m["oto"] = { - canonicalName = "Otomian", - family = "omq", - wikidata_item = "Q1270220", -} - -m["paa"] = { - canonicalName = "Papuan", - family = "qfa-not", - wikidata_item = "Q236425", -} - -m["paa-arf"] = { - canonicalName = "Arafundi", - wikidata_item = "Q4783702", -} - -m["paa-iwm"] = { - canonicalName = "Iwam", - wikidata_item = "Q15147853", -} - -m["paa-kag"] = { -- recode as ngf-kag? - canonicalName = "Kainantu-Goroka", - family = "ngf", - wikidata_item = "Q3217463", -} - -m["paa-lkp"] = { - canonicalName = "Lakes Plain", - wikidata_item = "Q6478969", -} - -m["paa-lsp"] = { - canonicalName = "Lower Sepik", - otherNames = {"Nor-Pondo"}, - wikidata_item = "Q7061700", -} - -m["paa-mai"] = { - canonicalName = "Mairasi", - wikidata_item = "Q6736896", -} - -m["paa-msk"] = { - canonicalName = "Sko", - otherNames = {"Skou"}, - wikidata_item = "Q953509", -} - -m["paa-ram"] = { - canonicalName = "Ramu", - wikidata_item = "Q3442808", -} - -m["paa-spk"] = { - canonicalName = "Sepik", - wikidata_item = "Q3508772", -} - -m["paa-yam"] = { - canonicalName = "Yam", - otherNames = {"Morehead and Upper Maro River"}, - wikidata_item = "Q15062272", -} - -m["phi"] = { - canonicalName = "Philippine", - family = "poz-bop", - wikidata_item = "Q947858", -} - -m["phi-kal"] = { - canonicalName = "Kalamian", - otherNames = {"Calamian"}, - family = "phi", - wikidata_item = "Q3217466", -} - -m["poz"] = { - canonicalName = "Malayo-Polynesian", - family = "map", - wikidata_item = "Q143158", -} - -m["poz-aay"] = { - canonicalName = "Admiralty Islands", - family = "poz-oce", - wikidata_item = "Q2701306", -} - -m["poz-bnn"] = { - canonicalName = "North Bornean", - family = "poz-bop", - wikidata_item = "Q1427907", -} - -m["poz-bop"] = { - canonicalName = "Borneo-Philippines", - family = "poz", - wikidata_item = "Q4273393", -} - -m["poz-bre"] = { - canonicalName = "East Barito", - family = "poz-bop", - wikidata_item = "Q2701314", -} - -m["poz-brw"] = { - canonicalName = "West Barito", - family = "poz-bop", - wikidata_item = "Q2761679", -} - -m["poz-btk"] = { - canonicalName = "Bungku-Tolaki", - family = "poz-sus", - wikidata_item = "Q3217381", -} - -m["poz-cet"] = { - canonicalName = "Central-Eastern Malayo-Polynesian", - family = "poz", - wikidata_item = "Q2269883", -} - -m["poz-cln"] = { - canonicalName = "New Caledonian", - family = "poz-occ", - wikidata_item = "Q3091221", -} - -m["poz-hce"] = { - canonicalName = "Halmahera-Cenderawasih", - family = "pqe", - wikidata_item = "Q2526616", -} - -m["poz-kal"] = { - canonicalName = "Kaili-Pamona", - family = "poz-sus", - wikidata_item = "Q3217465", -} - -m["poz-lgx"] = { - canonicalName = "Lampungic", - family = "poz-sus", - wikidata_item = "Q49215", -} - -m["poz-mcm"] = { - canonicalName = "Malayo-Chamic", - family = "poz-msa", -} - -m["poz-mic"] = { - canonicalName = "Micronesian", - family = "poz-occ", - wikidata_item = "Q420591", -} - -m["poz-mly"] = { - canonicalName = "Malayic", - family = "poz-mcm", - wikidata_item = "Q662628", -} - -m["poz-msa"] = { - canonicalName = "Malayo-Sumbawan", - family = "poz-sus", - wikidata_item = "Q1363818", -} - -m["poz-mun"] = { - canonicalName = "Muna-Buton", - family = "poz-sus", - wikidata_item = "Q3037924", -} - -m["poz-nws"] = { - canonicalName = "Northwest Sumatran", - family = "poz-sus", - wikidata_item = "Q2071308", -} - -m["poz-occ"] = { - canonicalName = "Central-Eastern Oceanic", - family = "poz-oce", - wikidata_item = "Q2068435", -} - -m["poz-oce"] = { - canonicalName = "Oceanic", - family = "pqe", - wikidata_item = "Q324457", -} - -m["poz-ocw"] = { - canonicalName = "Western Oceanic", - family = "poz-oce", - wikidata_item = "Q2701282", -} - -m["poz-pep"] = { - canonicalName = "Eastern Polynesian", - family = "poz-pnp", - wikidata_item = "Q390979", -} - -m["poz-pnp"] = { - canonicalName = "Nuclear Polynesian", - family = "poz-pol", - wikidata_item = "Q743851", -} - -m["poz-pol"] = { - canonicalName = "Polynesian", - family = "poz-occ", - wikidata_item = "Q390979", -} - -m["poz-san"] = { - canonicalName = "Sabahan", - family = "poz-bnn", - wikidata_item = "Q3217517", -} - -m["poz-sbj"] = { - canonicalName = "Sama-Bajaw", - family = "poz-bop", - wikidata_item = "Q2160409", -} - -m["poz-slb"] = { - canonicalName = "Saluan-Banggai", - family = "poz-sus", - wikidata_item = "Q3217519", -} - -m["poz-sls"] = { - canonicalName = "Southeast Solomonic", - family = "poz-occ", - wikidata_item = "Q3119671", -} - -m["poz-ssw"] = { - canonicalName = "South Sulawesi", - family = "poz-sus", - wikidata_item = "Q2778190", -} - -m["poz-sus"] = { - canonicalName = "Sunda-Sulawesi", - family = "poz", - wikidata_item = "Q319552", -} - -m["poz-swa"] = { - canonicalName = "North Sarawakan", - family = "poz-bnn", - wikidata_item = "Q538569", -} - -m["poz-tot"] = { - canonicalName = "Tomini-Tolitoli", - family = "poz-sus", - wikidata_item = "Q3217541", -} - -m["poz-vnc"] = { - canonicalName = "North-Central Vanuatu", - family = "poz-occ", - wikidata_item = "Q3039118", -} - -m["poz-wot"] = { - canonicalName = "Wotu-Wolio", - family = "poz-sus", - wikidata_item = "Q1041317", -} - -m["pqe"] = { - canonicalName = "Eastern Malayo-Polynesian", - family = "poz-cet", - wikidata_item = "Q2269883", -} - -m["pra"] = { - canonicalName = "Prakrit", - family = "inc", - wikidata_item = "Q192170", -} - -m["qfa-adm"] = { - canonicalName = "Andamanese", - wikidata_item = "Q32940", -} - -m["qfa-cka"] = { - canonicalName = "Chukotko-Kamchatkan", - wikidata_item = "Q33255", -} - -m["qfa-dgn"] = { - canonicalName = "Dogon", - wikidata_item = "Q1234776", -} - -m["qfa-dny"] = { - canonicalName = "Dene-Yeniseian", - otherNames = {"Dené-Yeniseian"}, - wikidata_item = "Q21103", -} - -m["qfa-hur"] = { - canonicalName = "Hurro-Urartian", - wikidata_item = "Q1144159", -} - -m["qfa-iso"] = { - canonicalName = "isolate", - family = "qfa-not", - wikidata_item = "Q33648", -} - -m["qfa-kad"] = { - canonicalName = "Kadu", -- considered either Nilo-Saharan or independent/none - wikidata_item = "Q1720989", -} - -m["qfa-kor"] = { - canonicalName = "Korean", - wikidata_item = "Q11263525", -} - -m["qfa-mal"] = { - canonicalName = "Left May", - wikidata_item = "Q614468", -} - -m["qfa-mch"] = { -- used in both N and S America - canonicalName = "Macro-Chibchan", - wikidata_item = "Q3438062", -} - -m["qfa-mix"] = { - canonicalName = "mixed", - family = "qfa-not", - wikidata_item = "Q33694", -} - -m["qfa-not"] = { - canonicalName = "not a family", - family = "qfa-not", -} - -m["qfa-sub"] = { - canonicalName = "substrate", - wikidata_item = "Q20730913", -} - -m["qfa-tak"] = { - canonicalName = "Tai-Kadai", - otherNames = {"Daic", "Kadai", "Kra-Dai"}, - wikidata_item = "Q34171", -} - -m["qfa-tap"] = { - canonicalName = "Timor-Alor-Pantar", - wikidata_item = "Q16590002", -} - -m["qfa-tor"] = { - canonicalName = "Torricelli", - wikidata_item = "Q1333831", -} - -m["qfa-tyn"] = { - canonicalName = "Tyrsenian", - wikidata_item = "Q1344038", -} - -m["qfa-yen"] = { - canonicalName = "Yeniseian", - otherNames = {"Yeniseic", "Yenisei-Ostyak"}, - family = "qfa-dny", - wikidata_item = "Q27639", -} - -m["qfa-yuk"] = { - canonicalName = "Yukaghir", - otherNames = {"Yukagir", "Jukagir"}, - wikidata_item = "Q34164", -} - -m["qwe"] = { - canonicalName = "Quechuan", - wikidata_item = "Q5218", -} - -m["roa"] = { - canonicalName = "Romance", - otherNames = {"Romanic", "Latin", "Neolatin", "Neo-Latin"}, - protoLanguage = "la", - family = "itc", - wikidata_item = "Q19814", -} - -m["roa-eas"] = { - canonicalName = "Eastern Romance", - family = "roa", - wikidata_item = "Q147576", -} - -m["roa-oil"] = { - canonicalName = "Oïl", - protoLanguage = "fro", - family = "roa", - wikidata_item = "Q37351", -} - -m["roa-rhe"] = { - canonicalName = "Rhaeto-Romance", - family = "roa", - wikidata_item = "Q515593", -} - ---[=[ Exceptional language and family codes for South American Indian languages - can use the prefix "sai-", though "sai" is no longer itself a family code. ]=]-- -m["sai-ara"] = { - canonicalName = "Araucanian", - wikidata_item = "Q626630", -} - -m["sai-aym"] = { - canonicalName = "Aymaran", - wikidata_item = "Q33010", -} - -m["sai-bar"] = { - canonicalName = "Barbacoan", - otherNames = {"Barbakoan"}, - wikidata_item = "Q807304", -} - -m["sai-car"] = { - canonicalName = "Cariban", - otherNames = {"Carib"}, - wikidata_item = "Q33090", -} - -m["sai-chc"] = { - canonicalName = "Chocoan", - otherNames = {"Choco", "Chocó"}, - wikidata_item = "Q1075616", -} - -m["sai-cho"] = { - canonicalName = "Chonan", - otherNames = {"Chon"}, - wikidata_item = "Q33019", -} - -m["sai-cpc"] = { - canonicalName = "Chapacuran", - wikidata_item = "Q1062626", -} - -m["sai-crn"] = { - canonicalName = "Charruan", - otherNames = {"Charrúan"}, - wikidata_item = "Q3112423", -} - -m["sai-ctc"] = { - canonicalName = "Catacaoan", - wikidata_item = "Q5051139", -} - -m["sai-guc"] = { - canonicalName = "Guaicuruan", - otherNames = {"Guaicurú", "Guaycuruana", "Guaikurú", "Guaycuruano", "Guaykuruan", "Waikurúan"}, - family = "sai-mgc", - wikidata_item = "Q1974973", -} - -m["sai-guh"] = { - canonicalName = "Guahiban", - otherNames = {"Guahiboan", "Guajiboan", "Wahivoan"}, - wikidata_item = "Q944056", -} - -m["sai-har"] = { - canonicalName = "Harákmbut", - otherNames = {"Harákmbet"}, - family = "sai-hkt", - wikidata_item = "Q1584402", -} - -m["sai-hkt"] = { - canonicalName = "Harákmbut–Katukinan", - otherNames = {"Harákmbet"}, - wikidata_item = "Q17107635", -} - -m["sai-hrp"] = { - canonicalName = "Huarpean", - otherNames = {"Warpean", "Huarpe", "Warpe"}, - wikidata_item = "Q1578336", -} - -m["sai-jee"] = { - canonicalName = "Jê", - otherNames = {"Gê", "Jean", "Gean", "Jê-Kaingang", "Ye"}, - family = "sai-mje", - wikidata_item = "Q1483594", -} - -m["sai-jir"] = { - canonicalName = "Jirajaran", - otherNames = {"Hiraháran"}, - wikidata_item = "Q3028651", -} - -m["sai-jiv"] = { - canonicalName = "Jivaroan", - otherNames = {"Hívaro", "Jibaro", "Jibaroan", "Jibaroana", "Jívaro"}, - wikidata_item = "Q1393074", -} - -m["sai-ktk"] = { - canonicalName = "Katukinan", - otherNames = {"Catuquinan"}, - family = "sai-hkt", - wikidata_item = "Q2636000", -} - -m["sai-mas"] = { - canonicalName = "Mascoian", - otherNames = {"Mascoyan", "Maskoian", "Enlhet-Enenlhet"}, - wikidata_item = "Q1906952", -} - -m["sai-mgc"] = { - canonicalName = "Mataco-Guaicuru", - wikidata_item = "Q255512", -} - -m["sai-mje"] = { - canonicalName = "Macro-Jê", - otherNames = {"Macro-Gê"}, - wikidata_item = "Q887133", -} - -m["sai-mtc"] = { - canonicalName = "Matacoan", - family = "sai-mgc", - wikidata_item = "Q2447424", -} - -m["sai-mur"] = { - canonicalName = "Muran", - otherNames = {"Mura"}, - wikidata_item = "Q33826", -} - -m["sai-nmk"] = { - canonicalName = "Nambikwaran", - otherNames = {"Nambicuaran", "Nambiquaran", "Nambikuaran"}, - wikidata_item = "Q15548027", -} - -m["sai-otm"] = { - canonicalName = "Otomacoan", - otherNames = {"Otomákoan", "Otomakoan"}, - wikidata_item = "Q3217503", -} - -m["sai-pan"] = { - canonicalName = "Panoan", - otherNames = {"Pano"}, - family = "sai-pat", - wikidata_item = "Q1544537", -} - -m["sai-pat"] = { - canonicalName = "Pano-Tacanan", - otherNames = {"Pano-Tacana", "Pano-Takana", "Páno-Takána", "Pano-Takánan"}, - wikidata_item = "Q2475746", -} - -m["sai-tac"] = { - canonicalName = "Tacanan", - family = "sai-pat", - wikidata_item = "Q3113762", -} - -m["sai-tuc"] = { - canonicalName = "Tucanoan", - wikidata_item = "Q788144", -} - -m["sai-tyu"] = { - canonicalName = "Ticuna-Yuri", - wikidata_item = "Q4467010", -} - -m["sai-ucp"] = { - canonicalName = "Uru-Chipaya", - otherNames = {"Uru-Chipayan"}, - wikidata_item = "Q2475488", -} - -m["sai-wic"] = { - canonicalName = "Wichí", - wikidata_item = "Q3027047", -} - -m["sai-wit"] = { - canonicalName = "Witotoan", - otherNames = {"Huitotoan"}, - wikidata_item = "Q43079317", -} - -m["sai-ynm"] = { - canonicalName = "Yanomami", - otherNames = {"Yanomam", "Shamatari", "Yamomami", "Yanomaman"}, -} - -m["sai-zam"] = { - canonicalName = "Zamucoan", - otherNames = {"Samúkoan"}, - wikidata_item = "Q3048461", -} - -m["sai-zap"] = { - canonicalName = "Zaparoan", - otherNames = {"Záparoan", "Saparoan", "Sáparoan", "Záparo", "Zaparoano", "Zaparoana"}, - wikidata_item = "Q33911", -} - -m["sal"] = { - canonicalName = "Salishan", - wikidata_item = "Q33985", -} - -m["sdv"] = { - canonicalName = "Eastern Sudanic", - family = "ssa", - wikidata_item = "Q2036148", -} - -m["sem"] = { - canonicalName = "Semitic", - family = "afa", - wikidata_item = "Q34049", -} - -m["sem-ara"] = { - canonicalName = "Aramaic", - protoLanguage = "arc", - family = "sem-nwe", - wikidata_item = "Q28602", -} - -m["sem-arb"] = { - canonicalName = "Arabic", - protoLanguage = "ar", - family = "sem-cen", - wikidata_item = "Q164667", -} - -m["sem-can"] = { - canonicalName = "Canaanite", - family = "sem-nwe", - wikidata_item = "Q747547", -} - -m["sem-cen"] = { - canonicalName = "Central Semitic", - family = "sem-wes", - wikidata_item = "Q3433228", -} - -m["sem-eas"] = { - canonicalName = "East Semitic", - family = "sem", - wikidata_item = "Q164273", -} - -m["sem-eth"] = { - canonicalName = "Ethiopian Semitic", - otherNames = {"Afro-Semitic", "Ethiopian", "Ethiopic", "Ethiosemitic"}, - family = "sem-sou", - wikidata_item = "Q163629", -} - -m["sem-nwe"] = { - canonicalName = "Northwest Semitic", - family = "sem-cen", - wikidata_item = "Q162996", -} - -m["sem-osa"] = { - canonicalName = "Old South Arabian", - otherNames = {"Epigraphic South Arabian", "Sayhadic"}, - family = "sem-sou", - wikidata_item = "Q35025", -} - -m["sem-sar"] = { - canonicalName = "South Arabian", - otherNames = {"Modern South Arabian"}, - family = "sem-sou", - wikidata_item = "Q1163682", -} - -m["sem-sou"] = { - canonicalName = "South Semitic", - family = "sem-wes", - wikidata_item = "Q38890", -} - -m["sem-wes"] = { - canonicalName = "West Semitic", - family = "sem", - wikidata_item = "Q124901", -} - -m["sgn"] = { - canonicalName = "sign", - family = "qfa-not", - wikidata_item = "Q34228", -} - -m["sgn-fsl"] = { - canonicalName = "French Sign Languages", - family = "sgn", - wikidata_item = "Q5501921", -} - -m["sgn-gsl"] = { - canonicalName = "German Sign Languages", - family = "sgn", - wikidata_item = "Q5551235", -} - -m["sgn-jsl"] = { - canonicalName = "Japanese Sign Languages", - family = "sgn", - wikidata_item ="Q11722508", -} - -m["sio"] = { - canonicalName = "Siouan", - family = "nai-sca", - wikidata_item = "Q34181", -} - -m["sit"] = { - canonicalName = "Sino-Tibetan", - wikidata_item = "Q45961", -} - -m["sit-qia"] = { - canonicalName = "Qiangic", - family = "sit", - wikidata_item = "Q1636765", -} - -m["sit-tan"] = { - canonicalName = "Tani", - family = "sit", - wikidata_item = "Q3217538", -} - -m["sla"] = { - canonicalName = "Slavic", - otherNames = {"Slavonic"}, - family = "ine-bsl", - wikidata_item = "Q23526", -} - -m["smi"] = { - canonicalName = "Sami", - otherNames = {"Saami", "Samic", "Saamic"}, - family = "urj", - wikidata_item = "Q56463", -} - -m["son"] = { - canonicalName = "Songhay", - otherNames = {"Songhai"}, - family = "ssa", - wikidata_item = "Q505198", -} - -m["sqj"] = { - canonicalName = "Albanian", - family = "ine", - wikidata_item = "Q8748", -} - -m["ssa"] = { - canonicalName = "Nilo-Saharan", -- possibly not a genetic grouping - wikidata_item = "Q33705", -} - -m["ssa-fur"] = { - canonicalName = "Fur", - family = "ssa", - wikidata_item = "Q2989512", -} - -m["ssa-sah"] = { - canonicalName = "Saharan", - family = "ssa", - wikidata_item = "Q1757661", -} - -m["syd"] = { - canonicalName = "Samoyedic", - otherNames = {"Samoyed", "Samodeic"}, - family = "urj", - wikidata_item = "Q34005", -} - -m["tai"] = { - canonicalName = "Tai", - family = "qfa-tak", - wikidata_item = "Q749720", -} - -m["tai-wen"] = { - canonicalName = "Wenma-Southwestern Tai", - family = "tai", -} - -m["tai-tay"] = { - canonicalName = "Tày", - family = "tai-wen", -} - -m["tai-sap"] = { - canonicalName = "Sapa-Southwestern Tai", - otherNames = {"Sapa-Thai"}, - family = "tai-wen", -} - -m["tai-swe"] = { - canonicalName = "Southwestern Tai", - family = "tai-sap", - wikidata_item = "Q3447105", -} - -m["tai-cho"] = { - canonicalName = "Chongzuo Tai", - family = "tai", - wikidata_item = "Q13216", -} - -m["tai-cen"] = { - canonicalName = "Central Tai", -- gonna obsolete - family = "tai", - wikidata_item = "Q5061891", -} - -m["tai-nor"] = { - canonicalName = "Northern Tai", - family = "tai", - wikidata_item = "Q7059014", -} - -m["tbq"] = { - canonicalName = "Tibeto-Burman", - family = "sit", - wikidata_item = "Q34064", -} - -m["tbq-brm"] = { - canonicalName = "Burmish", - family = "tbq", - wikidata_item = "Q865713", -} - -m["tbq-kuk"] = { - canonicalName = "Kukish", - family = "tbq", - wikidata_item = "Q832413", -} - -m["tbq-lol"] = { - canonicalName = "Loloish", - family = "tbq", - wikidata_item = "Q37035", -} - -m["trk"] = { - canonicalName = "Turkic", - wikidata_item = "Q34090", -} - -m["trk-kip"] = { - canonicalName = "Kipchak", - otherNames = {"Kypchak", "Qypchaq", "Northwestern Turkic"}, - protoLanguage = "qwm", - family = "trk", - wikidata_item = "Q1339898", -} - -m["trk-ogr"] = { - canonicalName = "Oghur", - otherNames = {"Lir-Turkic", "r-Turkic"}, - family = "trk", - wikidata_item = "Q1422731", -} - -m["trk-ogz"] = { - canonicalName = "Oghuz", - otherNames = {"Southwestern Turkic"}, - family = "trk", - wikidata_item = "Q494600", -} - -m["tup"] = { - canonicalName = "Tupian", - otherNames = {"Tupi"}, - wikidata_item = "Q34070", -} - -m["tup-gua"] = { - canonicalName = "Tupi-Guarani", - otherNames = {"Tupí-Guaraní"}, - family = "tup", - wikidata_item = "Q148610", -} - -m["tut"] = { - canonicalName = "Altaic", - wikidata_item = "Q37845", -} - -m["tuw"] = { - canonicalName = "Tungusic", - otherNames = {"Manchu-Tungus", "Tungus"}, - wikidata_item = "Q34230", -} - -m["urj"] = { - canonicalName = "Uralic", - otherNames = {"Finno-Ugric"}, - wikidata_item = "Q34113", -} - -m["urj-mdv"] = { - canonicalName = "Mordvinic", - family = "urj", - wikidata_item = "Q627313", -} - -m["urj-prm"] = { - canonicalName = "Permic", - family = "urj", - wikidata_item = "Q161493", -} - -m["urj-ugr"] = { - canonicalName = "Ugric", - family = "urj", - wikidata_item = "Q156631", -} - -m["wak"] = { - canonicalName = "Wakashan", - wikidata_item = "Q60069", -} - -m["wen"] = { - canonicalName = "Sorbian", - otherNames = {"Lusatian", "Wendish"}, - family = "zlw", - wikidata_item = "Q25442", -} - -m["xgn"] = { - canonicalName = "Mongolic", - otherNames = {"Mongolian"}, - wikidata_item = "Q33750", -} - -m["xnd"] = { - canonicalName = "Na-Dene", - otherNames = {"Na-Dené"}, - family = "qfa-dny", - wikidata_item = "Q26986", -} - -m["ypk"] = { - canonicalName = "Yupik", - otherNames = {"Yup'ik", "Yuit"}, - family = "esx-esk", - wikidata_item = "Q27970", -} - -m["zhx"] = { - canonicalName = "Sinitic", - otherNames = {"Chinese"}, - protoLanguage = "och", - family = "sit", - wikidata_item = "Q33857", -} - -m["zle"] = { - canonicalName = "East Slavic", - protoLanguage = "orv", - family = "sla", - wikidata_item = "Q144713", -} - -m["zls"] = { - canonicalName = "South Slavic", - family = "sla", - wikidata_item = "Q146665", -} - -m["zlw"] = { - canonicalName = "West Slavic", - family = "sla", - wikidata_item = "Q145852", -} - -m["znd"] = { - canonicalName = "Zande", - wikidata_item = "Q8066072", -} - -return m \ No newline at end of file diff --git a/wikt/translit/families.lua b/wikt/translit/families.lua deleted file mode 100644 index db47ccc..0000000 --- a/wikt/translit/families.lua +++ /dev/null @@ -1,186 +0,0 @@ ---[[This module is used to retrieve and manage Wiktionary's various language families and the information associated with them. See Wiktionary:Families for more information. - -This module provides access to other modules. To access the information from within a template, see Module:families/templates. - -The information itself is stored in Module:families/data. This module should not be used directly by any other module, the data should only be accessed through the functions provided by Module:families.]] ---[[ -Finding and retrieving families -The module exports a number of functions that are used to find families. - -getByCode -getByCode(code) - -Finds the family whose code matches the one provided. If it exists, it returns a Family object representing the family. Otherwise, it returns nil. - -getByCanonicalName -getByCanonicalName(name) - -Looks for the family whose canonical name (the name used to represent that language on Wiktionary) matches the one provided. If it exists, it returns a Family object representing the family. Otherwise, it returns nil. The canonical name of families should always be unique (it is an error for two families on Wiktionary to share the same canonical name), so this is guaranteed to give at most one result. - -Family objects -A Family object is returned from one of the functions above. It is a Lua representation of a family and the data associated with it. It has a number of methods that can be called on it, using the : syntax. For example: - -local m_families = require("Module:families") -local fam = m_families.getByCode("ine") -local name = fam:getCanonicalName() --- "name" will now be "Indo-European" -Family:getCode -:getCode() - -Returns the family code of the family. Example: "ine" for the Indo-European languages. - -Family:getCanonicalName -:getCanonicalName() - -Returns the canonical name of the family. This is the name used to represent that language family on Wiktionary, and is guaranteed to be unique to that family alone. Example: "Indo-European" for the Indo-European languages. - -Family:getAllNames -:getAllNames() - -Returns a table of all names that the family is known by, including the canonical name. The names are not guaranteed to be unique, sometimes more than one family is known by the same name. Example: {"Slavic", "Slavonic"} for the Slavic languages. - -Family:getFamily -:getFamily() - -Returns a Family object for the parent family that the family is a part of. - -Family:getProtoLanguage -:getProtoLanguage() - -Returns a Language object (see Module:languages) for the proto-language of this family, if one exists. - -Family:getCategoryName -:getCategoryName() - -Returns the name of the main category of that family. Example: "Germanic languages" for the Germanic languages, whose category is at Category:Germanic languages. - -Family:getWikidataItem -:getWikidataItem() - -Returns the Wikidata item of that family. - -Family:getWikipediaArticle -:getWikipediaArticle() - -Returns the Wikipedia article of that family, usually derived from :getWikidataItem(). -]] -local export = {} - -local Family = {} - - -function Family:getCode() - return self._code -end - - -function Family:getCanonicalName() - return self._rawData.canonicalName -end - - ---function Family:getAllNames() --- return self._rawData.names ---end - - -function Family:getOtherNames() - return self._rawData.otherNames or {} -end - - -function Family:getType() - return "family" -end - - -function Family:getFamily() - if self._rawData.family and not self._familyObject then - self._familyObject = export.getByCode(self._rawData.family) - end - - return self._familyObject -end - - -function Family:getProtoLanguage() - if not self._protoLanguage then - self._protoLanguage = require("Module:languages").getByCode(self._rawData.protoLanguage or self._code .. "-pro") - end - - return self._protoLanguage -end - - -function Family:getCategoryName() - local name = self._rawData.canonicalName - - -- If the name already has "languages" in it, don't add it. - if name:find("[Ll]anguages$") then - return name - else - return name .. " languages" - end -end - -function Family:getWikidataItem() - return self._rawData.wikidata_item -end - -function Family:getWikipediaArticle() - return (self:getWikidataItem() and mw.wikibase and mw.wikibase.sitelink(self:getWikidataItem(), 'enwiki')) or - self:getCategoryName() -end - -function Family:makeWikipediaLink() - return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]" -end - - -function Family:toJSON() - local ret = { - canonicalName = self:getCanonicalName(), - categoryName = self:getCategoryName(), - code = self._code, - family = self._rawData.family, - otherNames = self:getOtherNames(), - type = self:getType(), - wikidataItem = self:getWikidataItem(), - } - - return require("Module:JSON").toJSON(ret) -end - - -function Family:getRawData() - return self._rawData -end - - -Family.__index = Family - - -function export.makeObject(code, data) - return data and setmetatable({ _rawData = data, _code = code }, Family) or nil -end - - -function export.getByCode(code) - if code == 'kdo' then - require('Module:debug').track('Kordofanian') - end - - return export.makeObject(code, mw.loadData("Module:families/data")[code]) -end - -function export.getByCanonicalName(name) - local code = mw.loadData("Module:families/by name")[name] - - if not code then - return nil - end - - return export.makeObject(code, mw.loadData("Module:families/data")[code]) -end - -return export \ No newline at end of file diff --git a/wikt/translit/geor-translit.lua b/wikt/translit/geor-translit.lua deleted file mode 100644 index ebad8fe..0000000 --- a/wikt/translit/geor-translit.lua +++ /dev/null @@ -1,25 +0,0 @@ --- This module will transliterate text in the Georgian script per WT:GEOR TR. --- It is used to transliterate Bats (bbl), Georgian (ka), Laz (lzz), Old Georgian (oge), --- Udi (udi) and Mingrelian (xmf). --- Language code: bbl, kat(geo), lzz, oge, udi, xmf. - -local export = {} - -- Keep synchronized with [[Module:sva-translit]] -local gsub = mw.ustring.gsub -local tt = { - ["ა"]="a", ["ბ"]="b", ["გ"]="g", ["დ"]="d", ["ე"]="e", ["ვ"]="v", ["ზ"]="z", ["ჱ"]="ē", - ["თ"]="t", ["ი"]="i", ["კ"]="ḳ", ["ლ"]="l", ["მ"]="m", ["ნ"]="n", ["ჲ"]="y", ["ო"]="o", - ["პ"]="ṗ", ["ჟ"]="ž", ["რ"]="r", ["ს"]="s", ["ტ"]="ṭ", ["ჳ"]="wi", ["უ"]="u", ["ფ"]="p", - ["ქ"]="k", ["ღ"]="ɣ", ["ყ"]="q̇", ["შ"]="š", ["ჩ"]="č", ["ც"]="c", - ["ძ"]="ʒ", ["წ"]="c̣", ["ჭ"]="č̣", ["ხ"]="x", ["ჴ"]="q", ["ჯ"]="ǯ", ["ჰ"]="h", ["ჵ"]="ō", ["ჶ"]="f", ["ჷ"]="ə", ["ჸ"]="ʾ" -}; - -function export.tr(text, lang, sc) - -- Transliterating vowel nasalization in Bats - text = gsub(text, 'ჼ', '̃') - text = gsub(text, '', '̃') - text = gsub(text, '.', tt) - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/goth-translit.lua b/wikt/translit/goth-translit.lua deleted file mode 100644 index 47e2c6e..0000000 --- a/wikt/translit/goth-translit.lua +++ /dev/null @@ -1,62 +0,0 @@ --- This module will transliterate text in the Gothic script. It is used to transliterate Gothic (got). --- Language code: got - -local export = {} - -local Goth_Latn = { - ["𐌰"] = "a", - ["𐌱"] = "b", - ["𐌲"] = "g", - ["𐌳"] = "d", - ["𐌴"] = "ē", - ["𐌵"] = "q", - ["𐌶"] = "z", - ["𐌷"] = "h", - ["𐌸"] = "þ", - ["𐌹"] = "i", - ["𐌺"] = "k", - ["𐌻"] = "l", - ["𐌼"] = "m", - ["𐌽"] = "n", - ["𐌾"] = "j", - ["𐌿"] = "u", - ["𐍀"] = "p", - ["𐍁"] = "?", - ["𐍂"] = "r", - ["𐍃"] = "s", - ["𐍄"] = "t", - ["𐍅"] = "w", - ["𐍆"] = "f", - ["𐍇"] = "x", - ["𐍈"] = "ƕ", - ["𐍉"] = "ō", - ["𐍊"] = "?", -} - -local Latn_Goth = { - ["ā"] = "𐌰", - ["e"] = "𐌴", - ["ī"] = "𐌹", - ["o"] = "𐍉", - ["ū"] = "𐌿", - ["y"] = "𐍅", - } - -for g, l in pairs(Goth_Latn) do - if l ~= "?" then - Latn_Goth[l] = g - end -end - -function export.tr(text, lang, sc) - text = mw.ustring.gsub(text, "𐌴𐌹", "ei") - - return (mw.ustring.gsub(text, '.', Goth_Latn)) -end - -function export.tr_reverse(text) - text = mw.ustring.lower(text) - return (mw.ustring.gsub(text, '.', Latn_Goth)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/grc-translit.lua b/wikt/translit/grc-translit.lua deleted file mode 100644 index ef917fa..0000000 --- a/wikt/translit/grc-translit.lua +++ /dev/null @@ -1,328 +0,0 @@ ---[[ -This module will transliterate Ancient Greek language text per WT:GRC TR. -It is also used to transliterate Cappadocian Greek (cpg), Paeonian (ine-pae), -Pontic Greek (pnt) and Ancient Macedonian (xmk). -]] - -local export = {} - -local data = {} - -local U = mw.ustring.char -local macron = U(0x304) -local spacing_macron = U(0xAF) -local modifier_macron = U(0x2C9) -local breve = U(0x306) -local spacing_breve = U(0x2D8) -local rough = U(0x314) -local smooth = U(0x313) -local diaeresis = U(0x308) -local acute = U(0x301) -local grave = U(0x300) -local circum = U(0x342) -local Latin_circum = U(0x302) -local coronis = U(0x343) -local subscript = U(0x345) -local undertie = mw.ustring.char(0x35C) -- actually "combining double breve below" - -data["diacritics"] = { - ["macron"] = macron, - ["spacing_macron"] = spacing_macron, - ["modifier_macron"] = modifier_macron, - ["breve"] = breve, - ["spacing_breve"] = spacing_breve, - ["rough"] = rough, - ["smooth"] = smooth, - ["diaeresis"] = diaeresis, - ["acute"] = acute, - ["grave"] = grave, - ["circum"] = circum, - ["Latin_circum"] = Latin_circum, - ["coronis"] = coronis, - ["subscript"] = subscript, -} - -data.diacritics.all = "" -for name, diacritic in pairs(data.diacritics) do - data.diacritics.all = data.diacritics.all .. diacritic -end - -data["named"] = data["diacritics"] - -data["diacritic"] = "[" .. data.diacritics.all .. "]" -data["all"] = data["diacritic"] - -data["diacritic_groups"] = { - [1] = "[".. macron .. breve .."]", - [2] = "[".. diaeresis .. smooth .. rough .."]", - [3] = "[".. acute .. grave .. circum .. "]", - [4] = subscript, -} -data["groups"] = data["diacritic_groups"] -data["diacritic_groups"]["accents"] = data["groups"][3] - -data["diacritic_order"] = { - [macron] = 1, - [breve] = 1, - [rough] = 2, - [smooth] = 2, - [diaeresis] = 2, - [acute] = 3, - [grave] = 3, - [circum] = 3, - [subscript] = 4, -} - -data["diacritical_conversions"] = { - -- Convert spacing to combining diacritics - [spacing_macron] = macron, -- macron - [modifier_macron] = macron, - [spacing_breve] = breve, -- breve - ["῾"] = rough, -- rough breathing, modifier letter reversed comma - ["ʽ"] = rough, - ["᾿"] = smooth, -- smooth breathing, modifier letter apostrophe, coronis, combining coronis - ["ʼ"] = smooth, - [coronis] = smooth, - ["´"] = acute, -- acute - ["`"] = grave, -- grave - ["῀"] = circum, -- Greek circumflex (perispomeni), circumflex, combining circumflex - ["ˆ"] = circum, - [Latin_circum] = circum, - ["῎"] = smooth .. acute, -- smooth and acute - ["῍"] = smooth .. grave, -- smooth and grave - ["῏"] = smooth .. circum, -- smooth and circumflex - ["῞"] = rough .. acute, -- rough and acute - ["῝"] = rough .. grave, -- rough and grave - ["῟"] = rough .. circum, -- rough and circumflex - ["¨"] = diaeresis, - ["΅"] = diaeresis .. acute, - ["῭"] = diaeresis .. grave, - ["῁"] = diaeresis .. circum, -} -data["conversions"] = data["diacritical_conversions"] - -data["consonants"] = "ΒβΓγΔδΖζΘθΚκΛλΜμΝνΞξΠπΡρΣσςΤτΦφΧχΨψ" -data["consonant"] = "[" .. data.consonants .. "]" -data["vowels"] = "ΑαΕεΗηΙιΟοΥυΩω" -data["vowel"] = "[" .. data.vowels .. "]" -data["combining_diacritics"] = table.concat{ - macron, breve, - rough, smooth, diaeresis, - acute, grave, circum, - subscript -} -data["combining_diacritic"] = "[" .. data.combining_diacritics .. "]" - --- Basic letters with and without diacritics -local letters_with_diacritics = 'ΆΈ-ώϜϝἀ-ᾼῂ-ῌῐ-' .. - -- capital iota with oxia, normalized to capital iota with tonos if entered - -- literally in a string - U(0x1FDB) .. - 'Ὶῠ-Ῥῲ-ῼ' -data.word_characters = letters_with_diacritics .. data.combining_diacritics .. - undertie -data.word_character = "[" .. data.word_characters .. "]" - -local m_data = data -local tokenize = require('/usr/local/lib/lua/wikt/translit/utilities/grc').tokenize - -local ufind = mw.ustring.find -local ugsub = mw.ustring.gsub -local U = mw.ustring.char -local ulower = mw.ustring.lower -local uupper = mw.ustring.upper - -local UTF8char = '[%z\1-\127\194-\244][\128-\191]*' - --- Diacritics -local diacritics = m_data.named - --- Greek -local acute = diacritics.acute -local grave = diacritics.grave -local circumflex = diacritics.circum -local diaeresis = diacritics.diaeresis -local smooth = diacritics.smooth -local rough = diacritics.rough -local macron = diacritics.macron -local breve = diacritics.breve -local subscript = diacritics.subscript - --- Latin -local hat = diacritics.Latin_circum - -local macron_diaeresis = macron .. diaeresis .. "?" .. hat -local a_subscript = '^[αΑ].*' .. subscript .. '$' -local velar = 'κγχξ' - -local tt = { - -- Vowels - ["α"] = "a", - ["ε"] = "e", - ["η"] = "e" .. macron, - ["ι"] = "i", - ["ο"] = "o", - ["υ"] = "u", - ["ω"] = "o" .. macron, - - -- Consonants - ["β"] = "b", - ["γ"] = "g", - ["δ"] = "d", - ["ζ"] = "z", - ["θ"] = "th", - ["κ"] = "k", - ["λ"] = "l", - ["μ"] = "m", - ["ν"] = "n", - ["ξ"] = "x", - ["π"] = "p", - ["ρ"] = "r", - ["σ"] = "s", - ["ς"] = "s", - ["τ"] = "t", - ["φ"] = "ph", - ["χ"] = "kh", - ["ψ"] = "ps", - - -- Archaic letters - ["ϝ"] = "w", - ["ϻ"] = "ś", - ["ϙ"] = "q", - ["ϡ"] = "š", - ["ͷ"] = "v", - - -- Incorrect characters: see [[Wiktionary:About Ancient Greek#Miscellaneous]]. - -- These are tracked by [[Module:script utilities]]. - ["ϐ"] = "b", - ["ϑ"] = "th", - ["ϰ"] = "k", - ["ϱ"] = "r", - ["ϲ"] = "s", - ["ϕ"] = "ph", - - -- Diacritics - -- unchanged: macron, diaeresis, grave, acute - [breve] = '', - [smooth] = '', - [rough] = '', - [circumflex] = hat, - [subscript] = 'i', -} - -function export.tr(text, lang, sc) - -- If the script is given as Cprt, then forward the transliteration to that module. - -- This should not be necessary, as [[Module:translit-redirect]] redirects - -- to this module only if script is polytonic. - if sc == "Cprt" then - -- [[Special:WhatLinksHere/Template:tracking/grc-translit/Cprt]] - require('Module:debug').track('grc-translit/Cprt') - return require('Module:Cprt-translit').tr(text, lang, sc) - end - - if text == '῾' then - return 'h' - end - - --[[ - Replace semicolon or Greek question mark with regular question mark, - except after an ASCII alphanumeric character (to avoid converting - semicolons in HTML entities). - ]] - text = ugsub(text, "([^A-Za-z0-9])[;" .. U(0x37E) .. "]", "%1?") - - -- Handle the middle dot. It is equivalent to semicolon or colon, but semicolon is probably more common. - text = text:gsub("·", ";") - - local tokens = tokenize(text) - - --now read the tokens - local output = {} - for i, token in pairs(tokens) do - -- Convert token to lowercase and substitute each character - -- for its transliteration - local translit = ulower(token):gsub(UTF8char, tt) - - local next_token = tokens[i + 1] - - if token == 'γ' and next_token and velar:find(next_token, 1, true) then - -- γ before a velar should be - translit = 'n' - elseif token == 'ρ' and tokens[i - 1] == 'ρ' then - -- ρ after ρ should be - translit = 'rh' - elseif ufind(token, a_subscript) then - -- add macron to ᾳ - translit = ugsub(translit, '([aA])', '%1' .. macron) - end - - if token:find(rough) then - if ufind(token, '^[Ρρ]') then - translit = translit .. 'h' - else -- vowel - translit = 'h' .. translit - end - end - - -- Remove macron from a vowel that has a circumflex. - if ufind(translit, macron_diaeresis) then - translit = translit:gsub(macron, '') - end - - -- Capitalize first character of transliteration. - if token ~= ulower(token) then - translit = translit:gsub("^" .. UTF8char, uupper) - end - - table.insert(output, translit) - end - output = table.concat(output) - - return output -end - -return export - - --- Text Expected Actual --- Passed λόγος lógos lógos --- Passed σφίγξ sphínx sphínx --- Passed ϝάναξ wánax wánax --- Passed οἷαι hoîai hoîai --- u/y --- Passed ταῦρος taûros taûros --- Passed νηῦς nēûs nēûs --- Passed σῦς sûs sûs --- Passed ὗς hûs hûs --- Passed γυῖον guîon guîon --- Passed ἀναῡ̈τέω anaṻtéō anaṻtéō --- Passed δαΐφρων daḯphrōn daḯphrōn --- vowel length --- Passed τῶν tôn tôn --- Passed τοὶ toì toì --- Passed τῷ tôi tôi --- Passed τούτῳ toútōi toútōi --- Passed σοφίᾳ sophíāi sophíāi --- Passed μᾱ̆νός mānós mānós --- h (rough breathing) --- Passed ὁ ho ho --- Passed οἱ hoi hoi --- Passed εὕρισκε heúriske heúriske --- Passed ὑϊκός huïkós huïkós --- Passed πυρρός purrhós purrhós --- Passed ῥέω rhéō rhéō --- Passed σάἁμον sáhamon sáhamon --- capitals --- Passed Ὀδυσσεύς Odusseús Odusseús --- Passed Εἵλως Heílōs Heílōs --- Passed ᾍδης Hā́idēs Hā́idēs --- Passed ἡ Ἑλήνη hē Helḗnē hē Helḗnē --- Passed 𐠠𐠒𐠯𐠗 pi-lo-ti-mo pi-lo-ti-mo --- punctuation --- Passed ἔχεις μοι εἰπεῖν, ὦ Σώκρατες, ἆρα διδακτὸν ἡ ἀρετή; ékheis moi eipeîn, ô Sṓkrates, âra didaktòn hē aretḗ? ékheis moi eipeîn, ô Sṓkrates, âra didaktòn hē aretḗ? --- Passed τί τηνικάδε ἀφῖξαι, ὦ Κρίτων; ἢ οὐ πρῲ ἔτι ἐστίν; tí tēnikáde aphîxai, ô Krítōn? ḕ ou prṑi éti estín? tí tēnikáde aphîxai, ô Krítōn? ḕ ou prṑi éti estín? --- Passed τούτων φωνήεντα μέν ἐστιν ἑπτά· α ε η ι ο υ ω. toútōn phōnḗenta mén estin heptá; a e ē i o u ō. toútōn phōnḗenta mén estin heptá; a e ē i o u ō. --- Passed πήγ(νῡμῐ) pḗg(nūmi) pḗg(nūmi) --- HTML entities --- Passed καλός καὶ ἀγαθός kalós kaì agathós kalós kaì agathós --- Passed καλός καὶ ἀγαθός kalós kaì agathós kalós kaì agathós \ No newline at end of file diff --git a/wikt/translit/gu-translit.lua b/wikt/translit/gu-translit.lua deleted file mode 100644 index 745504d..0000000 --- a/wikt/translit/gu-translit.lua +++ /dev/null @@ -1,195 +0,0 @@ --- This module will transliterate Gujarati language(gu) text. It is also used to transliterate Kachchi (kfr). --- Language code: guj, kfr - -local export = {} - -local gsub = mw.ustring.gsub -local match = mw.ustring.match - -local conv = { - -- consonants - ['ક'] = 'k', ['ખ'] = 'kh', ['ગ'] = 'g', ['ઘ'] = 'gh', ['ઙ'] = 'ṅ', - ['ચ'] = 'c', ['છ'] = 'ch', ['જ'] = 'j', ['ઝ'] = 'jh', ['ઞ'] = 'ñ', - ['ટ'] = 'ṭ', ['ઠ'] = 'ṭh', ['ડ'] = 'ḍ', ['ઢ'] = 'ḍh', ['ણ'] = 'ṇ', - ['ત'] = 't', ['થ'] = 'th', ['દ'] = 'd', ['ધ'] = 'dh', ['ન'] = 'n', - ['પ'] = 'p', ['ફ'] = 'ph', ['બ'] = 'b', ['ભ'] = 'bh', ['મ'] = 'm', - ['ય'] = 'y', ['ર'] = 'r', ['લ'] = 'l', ['વ'] = 'v', ['ળ'] = 'ḷ', - ['શ'] = 'ś', ['ષ'] = 'ṣ', ['સ'] = 's', ['હ'] = 'h', - ['ત઼'] = 't̰', ['જ઼'] = 'z', ['ંઘ઼'] = 'ng', ['ડ઼'] = 'ṛ', ['ઢ઼'] = 'ṛh', ['ન઼'] = 'ṉ', ['ફ઼'] = 'f', - - --vowel diacritics - ['ા'] = 'ā', ['િ'] = 'i', ['ી'] = 'ī', ['ુ'] = 'u', ['ૂ'] = 'ū', ['ૃ'] = 'ru', ['ૄ'] = 'ṝ', - ['ે'] = 'e', ['ૈ'] = 'ai', ['ો'] = 'o', ['ૌ'] = 'au', ['ૅ'] = 'ɛ', ['ૉ'] = 'ɔ', - - -- vowel mātras - ['અ'] = 'a', ['આ'] = 'ā', ['ઇ'] = 'i', ['ઈ'] = 'ī', ['ઉ'] = 'u', ['ઊ'] = 'ū', ['ઋ'] = 'ru', ['ૠ'] = 'ṝ', - ['એ'] = 'e', ['ઐ'] = 'ai', ['ઓ'] = 'o', ['ઔ'] = 'au', ['ઍ'] = 'ɛ', ['ઑ'] = 'ɔ', - - -- chandrabindu - ['ઁ'] = 'm̐', --until a better method is found - - -- anusvara - ['ં'] = 'ṃ', --until a better method is found - - -- visarga - ['ઃ'] = 'ḥ', - - -- virama - ['્'] = '', - - -- avagraha - ['ઽ'] = '’', - - --numerals - ['૦'] = '0', ['૧'] = '1', ['૨'] = '2', ['૩'] = '3', ['૪'] = '4', ['૫'] = '5', ['૬'] = '6', ['૭'] = '7', ['૮'] = '8', ['૯'] = '9', - - --punctuation - ['।'] = '.', --danda - ['+'] = '', -- compound separator - - --om - ['ૐ'] = 'OM', -} - -local nasal_assim = { - ["[kg]h?"] = "ṅ", - ["[cj]h?"] = "ñ", - ["[ṭḍ]h?"] = "ṇ", - ["[td]h?"] = "n", - ["[pb]h?"] = "m", - ["n"] = "n", - ["m"] = "m", -} - -function export.tr(text, lang, sc) - local c = '([કખગઘઙચછજઝઞટઠડઢતથદધપફબભશષસયરલવહણનમ]઼?)' - local no_drop = 'ય' - local final_no_drop = 'યરલવહનમ' - local v = '([a્ાિીુૂેૈોૌૃૄૅૉ]ઁ?)' - local virama = '(્)' - local n = '(ં?)' - local nukta = '([તજઘડઢનફ]઼)' - - local can_drop = gsub(c,"["..no_drop.."]","") - local final_can_drop = gsub(c,"["..final_no_drop.."]","") - local no_virama = gsub(v,virama,"") - - text = text .. " " - - --text = gsub(text,"(%S)"..c.."%2","%1ː%2") - - text = gsub(text,c,"%1a") - text = gsub(text,"a"..v,"%1") - text = gsub(text,no_virama..n..can_drop.."a ","%1%2%3 ") --ending - text = gsub(text,virama..n..final_can_drop.."a ","%1%2%3 ") --ending - local pattern = no_virama..n..can_drop.."a"..c..no_virama - while match(text,"(.*)"..pattern) do - text = gsub(text,"(.*)"..pattern,"%1%2%3%4%5%6") - end - - text = gsub(text,nukta,conv) - text = gsub(text,".",conv) - - for key,val in pairs(nasal_assim) do - text = gsub(text,"([aeiou])ṃ("..key..")", "%1"..val.."%2") - end - - text = gsub(text,"([aiueēoāīū])ṃ", "%1̃") - - text = gsub(text,"ː(.)","%1%1") - - text = gsub(text," $","") - - text = gsub(text,"ā̃tar","āntar") - - text = gsub(text,"OM","oṃ") - - return mw.ustring.toNFC(text) -end - -return export - --- 9 tests failed. (refresh) - --- test_translit_gujarati: --- Text Expected Actual Differs at --- Failed રુગ્ણાલય rugṇālay rugṇālya 7 --- Failed અતિવલય ativalay ativalya 7 --- Passed ક્ષમા kṣamā kṣamā --- Passed ગોળો goḷo goḷo --- Passed ગુજરાતી gujrātī gujrātī --- Passed બત્તી battī battī --- Passed ઉંદર undar undar --- Passed એરું erũ erũ --- Passed હ્યત઼્ hyat̰ hyat̰ --- Passed સંપત્તિ sampatti sampatti --- Passed જિંદગી jindgī jindgī --- Passed સંન્યાસી sannyāsī sannyāsī --- Passed પૂછવું pūchvũ pūchvũ --- Passed છોકરું chokrũ chokrũ --- Passed ઊંચાં ū̃cā̃ ū̃cā̃ --- Passed ખડબચડું khaḍbacḍũ khaḍbacḍũ --- Passed સમજાવવું samjāvvũ samjāvvũ --- Passed વાંકું vā̃kũ vā̃kũ --- Passed બળજબરી baḷjabrī baḷjabrī --- Passed વર્તવું vartavũ vartavũ --- Passed એંસી ẽsī ẽsī --- Passed ઇચ્છવું icchavũ icchavũ --- Passed વિદુગ્ધધુ vidugdhadhu vidugdhadhu --- Passed આંતર āntar āntar --- Failed અતિઘણું atighaṇũ atighṇũ 6 --- Failed ઉદાહરણ udāharaṇ udāhraṇ 5 --- Failed અતિશયોક્તિ atiśayokti atiśyokti 5 --- Failed કેળવણી keḷavṇī keḷvaṇī 4 --- Failed ચકચકિત cakcakit cakackit 4 --- Failed દસ્તાવેજીકરણ dastāvejīkaraṇ dastāvejīkraṇ 11 --- Failed જાળવવું jālavvũ jāḷvavũ 3 --- Passed ગઈ gaī gaī --- -- Unit tests for [[Module:gu-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local transliterate = require('Module:gu-translit').tr - --- --TO DO --- function tests:do_test_translit(gujr, roman, xlit) --- self:equals('[[' .. gujr .. '#Gujarati|' .. gujr .. ']]', transliterate(gujr, 'gu', 'Gujr'), roman) --- end - --- function tests:test_translit_gujarati() --- local examples = { --- { 'રુગ્ણાલય', 'rugṇālay' }, --- { 'અતિવલય', 'ativalay' }, --- { 'ક્ષમા', 'kṣamā' }, --- { 'ગોળો', 'goḷo' }, --- { 'ગુજરાતી', 'gujrātī' }, --- { 'બત્તી', 'battī' }, --- { 'ઉંદર', 'undar' }, --- { 'એરું', 'erũ' }, --- { 'હ્યત઼્', 'hyat̰' }, --- { 'સંપત્તિ', 'sampatti' }, --- { 'જિંદગી', 'jindgī' }, --- { 'સંન્યાસી', 'sannyāsī' }, --- { 'પૂછવું', 'pūchvũ' }, --- { 'છોકરું', 'chokrũ' }, --- { 'ઊંચાં', 'ū̃cā̃' }, --- { 'ખડબચડું', 'khaḍbacḍũ' }, --- { 'સમજાવવું', 'samjāvvũ' }, --- { 'વાંકું', 'vā̃kũ' }, --- { 'બળજબરી', 'baḷjabrī' }, --- { 'વર્તવું', 'vartavũ' }, --- { 'એંસી', 'ẽsī' }, --- { 'ઇચ્છવું', 'icchavũ' }, --- { 'વિદુગ્ધધુ', 'vidugdhadhu' }, --- { 'આંતર', 'āntar' }, --- { 'અતિઘણું', 'atighaṇũ' }, --- { 'ઉદાહરણ', 'udāharaṇ' }, --- { 'અતિશયોક્તિ', 'atiśayokti' }, --- { 'કેળવણી', 'keḷavṇī' }, --- { 'ચકચકિત', 'cakcakit' }, --- { 'દસ્તાવેજીકરણ', 'dastāvejīkaraṇ' }, --- { 'જાળવવું', 'jālavvũ' }, --- {'ગઈ', 'gaī'}, --- } --- self:iterate(examples, 'do_test_translit') --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/guru-translit.lua b/wikt/translit/guru-translit.lua deleted file mode 100644 index cb40ac1..0000000 --- a/wikt/translit/guru-translit.lua +++ /dev/null @@ -1,102 +0,0 @@ --- This module will transliterate text in the Gurmukhi script. It is used to transliterate Punjabi (pa). --- Language code: pan – Eastern Punjabi --- pnb – Western Punjabi -local export = {} - -local conv = { - --consonants without nukta - ["ਸ"] = "s", - ["ਹ"] = "h", - ["ਕ"] = "k", ["ਖ"] = "kh", ["ਗ"] = "g", ["ਘ"] = "gh", ["ਙ"] = "ṅ", - ["ਚ"] = "c", ["ਛ"] = "ch", ["ਜ"] = "j", ["ਝ"] = "jh", ["ਞ"] = "ñ", - ["ਟ"] = "ṭ", ["ਠ"] = "ṭh", ["ਡ"] = "ḍ", ["ਢ"] = "ḍh", ["ਣ"] = "ṇ", - ["ਤ"] = "t", ["ਥ"] = "th", ["ਦ"] = "d", ["ਧ"] = "dh", ["ਨ"] = "n", - ["ਪ"] = "p", ["ਫ"] = "ph", ["ਬ"] = "b", ["ਭ"] = "bh", ["ਮ"] = "m", - ["ਯ"] = "y", ["ਰ"] = "r", ["ਲ"] = "l", ["ਵ"] = "v", ["ੜ"] = "ṛ", - - --consonants with nukta - ["ਸ਼"] = "ś", - ["ਖ਼"] = "x", - ["ਗ਼"] = "ġ", - ["ਜ਼"] = "z", - ["ਫ਼"] = "f", - ["ਲ਼"] = "ḷ", - - -- vowels - ["ਾ"] = "ā", - ["ਿ"] = "i", ["ੀ"] = "ī", - ["ੁ"] = "u", ["ੂ"] = "ū", - ["ੇ"] = "e", ["ੈ"] = "ē", - ["ੋ"] = "o", ["ੌ"] = "au", - - -- other diacritics - ["ੰ"] = "N", --ṭippi: nasalize - ["ਂ"] = "N", --bindi: nasalize - ["ੱ"] = "ː", --addak: germinate - ["੍"] = "", --halant, supresses the inherent vowel "a" - ["ਃ"] = "h", --voiceless "h" sound (tone raiser) - - -- independent vowels - ["ਅ"] = "a", ["ਆ"] = "ā", - ["ਇ"] = "i", ["ਈ"] = "ī", - ["ਉ"] = "u", ["ਊ"] = "ū", - ["ਏ"] = "e", ["ਐ"] = "ē", - ["ਓ"] = "o", ["ਔ"] = "ō", - - -- digits - ["੦"] = "0", ["੧"] = "1", ["੨"] = "2", ["੩"] = "3", ["੪"] = "4", - ["੫"] = "5", ["੬"] = "6", ["੭"] = "7", ["੮"] = "8", ["੯"] = "9", -} - -local nasal_assim = { - ["[kg]h?"] = "ṅ", - ["[cj]h?"] = "ñ", - ["[ṭḍ]h?"] = "ṇ", - ["[td]h?"] = "n", - ["[pb]h?"] = "m", - ["n"] = "n", - ["m"] = "m", - ["s"] = "n", -} - --- translit any words or phrases -function export.tr(text, lang, sc) - local c = "([ਸਹਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮਯਰਲਵੜː]਼?)" - local y = "ਯ" - local v = "([aਾਿੀੁੂੇੈੋੌ੍])" - local virama = "੍" - local n = "([ੰਂ]?)" - local nukta = "([ਸਖਗਜਫਲ]਼)" - - can_drop = mw.ustring.gsub(c,y,"") - no_virama = mw.ustring.gsub(v,virama,"") - - text = text .. " " - - text = mw.ustring.gsub(text,c,"%1a") - text = mw.ustring.gsub(text,"a"..v,"%1") - -- mw.log(text) - text = mw.ustring.gsub(text,v..n..can_drop.."a ","%1%2%3 ") --ending - -- mw.log(text) - text = mw.ustring.gsub(text,v..n..can_drop.."a"..c..v,"%1%2%3%4%5") - -- mw.log(text) - - text = mw.ustring.gsub(text,nukta,conv) - text = mw.ustring.gsub(text,".",conv) - - for key,val in pairs(nasal_assim) do - text = mw.ustring.gsub(text,"N("..key..")",val.."%1") - end - text = mw.ustring.gsub(text,"([aiueēoāīū])N ", "%1̃ ") - text = mw.ustring.gsub(text,"(.?)N", "%1̃") - - text = mw.ustring.gsub(text,"ː(.)","%1%1") - - text = mw.ustring.gsub(text," ?।",".") - - text = mw.ustring.gsub(text," $","") - - return mw.ustring.toNFC(text) -end - -return export \ No newline at end of file diff --git a/wikt/translit/he-translit.lua b/wikt/translit/he-translit.lua deleted file mode 100644 index 452f12f..0000000 --- a/wikt/translit/he-translit.lua +++ /dev/null @@ -1,691 +0,0 @@ --- This module will transliterate Hebrew language text per WT:HE TR. --- Language code: ---[[ -heb – Modern Hebrew -hbo – Classical Hebrew (liturgical) -smp – Samaritan Hebrew (liturgical) -obm – Moabite (extinct) -xdm – Edomite (extinct) -]] -local export = {} -local U = mw.ustring.char -local gsub = mw.ustring.gsub - -local sheva = U(0x05B0) -local hataf_segol = U(0x05B1) -local hataf_patah = U(0x05B2) -local hataf_qamats = U(0x05B3) -local hiriq = U(0x05B4) -local tsere = U(0x05B5) -local segol = U(0x05B6) -local patah = U(0x05B7) -local qamats = U(0x05B8) -local qamats_qatan = U(0x05C7) -local holam = U(0x05B9) -local holam_haser_for_waw = U(0x05BA) -local qubuts = U(0x05BB) -local dagesh_mappiq = U(0x05BC) -local shin_dot = U(0x05C1) -local sin_dot = U(0x05C2) - -local macron_above = U(0x0304) -local macron_below = U(0x0331) -local macron = "[" .. macron_above .. macron_below .. "]" - -local alef = "א" -local he = "ה" -local waw = "ו" -local yod = "י" -local vowel_letters = alef .. he .. waw .. yod -local vowel_letter = "[" .. vowel_letters .. "]" - --- '0' represents silent sheva -local vowel_points = ( - sheva .. hataf_segol .. hataf_patah .. hataf_qamats .. hiriq .. tsere .. - segol .. patah .. qamats .. qamats_qatan .. holam .. qubuts .. '0' .. - holam_haser_for_waw -) -local vowel_point = "[" .. vowel_points .. "]" -local short_vowels = segol .. patah .. hiriq .. qubuts .. qamats_qatan -local short_vowel = "[" .. short_vowels .. "]" - -local shuruq = waw .. dagesh_mappiq -local holam_male = waw .. holam - --- use dummies characters that do not match as punctuation --- the dummy letter stands in for final silent alef or he, or for the hiatus before a furtive patah, --- or comes before a pre-transliterated waw to aid in matching -local dummy_letter = U(0x0627) -- ARABIC LETTER ALEF -local dummy_geresh = U(0x064E) -- ARABIC FATHA -local dummy_gershayim = U(0x064B) -- ARABIC FATHATAN -local real_geresh = '׳' -local real_gershayim = '״' -local letter_modifier = "[" .. shin_dot .. sin_dot .. "]?[" .. dummy_geresh .. dummy_gershayim .. "]?" -local letters = "אבגדהוזחטיכךלמםנןסעפףצץקרשת" -local letter = "[" .. letters .. dummy_letter .. "]" .. letter_modifier -local letter_not_waw = "[אבגדהזחטיכךלמםנןסעפףצץקרשת" .. dummy_letter .. "]" .. letter_modifier -local gutturals = "אהחע" -local guttural = "[" .. gutturals .. "]" - -local vowel_letter_or_geresh = "[" .. vowel_letters .. dummy_geresh .. dummy_gershayim .. "]" - --- note, the geresh and gershayim are included in this, which is why dummies are used in their place -local word_break_chars = "%s%p" -local word_break = "[" .. word_break_chars .. "]" -local word_start = "%f[^" .. word_break_chars .. "]" -- matches the boundary but not the actual word break characters -local word_end = "%f[" .. word_break_chars .. "]" -- matches the boundary but not the actual word break characters - -local tr_vowels = "aeiouāēīōūəăĕŏ0" - -local biblical_to_modern = { - ['ʾ'] = '\'', - ['b' .. macron_below] = 'v', - ['g' .. macron_above] = 'g', - ['d' .. macron_below] = 'd', - ['w'] = 'v', - ['ž'] = 'zh', - ['ḥ'] = 'kh', - ['ṭ'] = 't', - ['k' .. macron_below] = 'kh', - ['ʿ'] = '\'', - ['p' .. macron_above] = 'f', - ['ṣ'] = 'ts', - ['č'] = 'ch', - ['q'] = 'k', - ['š'] = 'sh', - ['ś'] = 's', - ['t' .. macron_below] = 't', - - ['ə'] = '\'', - ['ĕ'] = 'e', - ['ă'] = 'a', - ['ŏ'] = 'o', - ['ī'] = 'i', - ['ē'] = 'e', - ['ā'] = 'a', - ['ō'] = 'o', - ['ū'] = 'u', -} - --- helper function to remove vowel letters but keep gereshes -local function gereshes(str) - return gsub(str, vowel_letter, '') -end - -local biblical = { - { - -- replace geresh and gershayim with their dummy equivalents so that they won't match as word boundaries - [real_geresh] = dummy_geresh, - [real_gershayim] = dummy_gershayim, - }, - - { - -- The default order is: consonant, vowel point, dagesh or mappiq, shin or sin dot. - -- The desired order is: consonant, shin or sin dot, dagesh or mappiq, vowel point. - -- Also, move geresh and gershayim closer to the letter for easier handling (will be moved back later if not actually a modifier) - ["([" .. letters .. "])(" .. vowel_point .. "*)(" .. dagesh_mappiq .. "*)([" .. shin_dot .. sin_dot .. "]*)([" .. dummy_geresh .. dummy_gershayim .. "]*)"] = "%1%4%5%3%2", - }, - - { - -- special case: change qamats in כל to qamats qatan - -- the problem is that כל might be preceded by prefixed clitics, which maybe be chained indefinitely, - -- while other unrelated words might happen to end in כל with a qamats gadol; therefore, match either - -- the entire word or only when preceded by a precisely recognized prefix - [word_start .. "(כ" .. dagesh_mappiq .. "?)" .. qamats .. "(ל)" .. word_end] = "%1" .. qamats_qatan .. "%2", - ["([הבכל]" .. dagesh_mappiq .. "?" .. patah .. "כ" .. dagesh_mappiq .. ")" .. qamats .. "(ל)" .. word_end] = "%1" .. qamats_qatan .. "%2", - ["(מ" .. dagesh_mappiq .. "?" .. hiriq .. "כ" .. dagesh_mappiq .. ")" .. qamats .. "(ל)" .. word_end] = "%1" .. qamats_qatan .. "%2", - ["(ש" .. shin_dot .. dagesh_mappiq .. "?[" .. segol .. patah .. "]כ" .. dagesh_mappiq .. ")" .. qamats .. "(ל)" .. word_end] = "%1" .. qamats_qatan .. "%2", -- patah is very archaic - ["([ובכלד]" .. dagesh_mappiq .. "?" .. sheva .. "כ)" .. qamats .. "(ל)" .. word_end] = "%1" .. qamats_qatan .. "%2", - }, - - { - -- remove final alef and he, but only when preceded by a vowel - ["(" .. vowel_point .. vowel_letter_or_geresh .. "*)[" .. alef .. he .. "]" .. word_end] = "%1" .. dummy_letter, - ["(" .. shuruq .. vowel_letter_or_geresh .. "*)[" .. alef .. he .. "]" .. word_end] = "%1" .. dummy_letter, - }, - - { - -- these are the cases, other than the above, where a final letter should be ignored - [hiriq .. vowel_letter_or_geresh .. "-[" .. yod .. dummy_letter .. "]" .. word_end] = "ī", - ["([" .. tsere .. segol .. "])" .. vowel_letter_or_geresh .. "-[" .. yod .. "]" .. word_end] = "%1", - ["([" .. holam .. qubuts .. "])" .. vowel_letter_or_geresh .. "-[" .. waw .. "]" .. word_end] = "%1", - }, - - { - [sheva .. "(" .. letter .. ")" .. sheva] = "0%1" .. sheva, -- two shevas in a row - ["(" .. short_vowel .. letter .. ")" .. sheva] = "%10", -- after a short vowel, assume(!) a silent sheva - ["(" .. guttural .. ")" .. sheva] = "%10", -- gutturals cannot have a vocal sheva - - ["(" .. vowel_point .. ")" .. shuruq] = "%1" .. dummy_letter .. "ww", -- when waw + dagesh is not a shuruq - ["(" .. vowel_point .. vowel_letter_or_geresh .. "-)" .. shuruq .. "(" .. vowel_letter_or_geresh .. "-" .. vowel_point .. ")"] = "%1" .. dummy_letter .. "ww%2", -- when waw + dagesh is not a shuruq - ["(" .. vowel_point .. ")" .. holam_male] = "%1" .. dummy_letter .. "w" .. holam, -- when waw + holam is not a holam male - - ["([" .. alef .. he .. "])" .. dagesh_mappiq] = "%1", -- handle mappiq (very rarely occurs on an alef) - }, - - { - [shuruq .. shuruq] = shuruq .. "ww", -- another potential case when waw + dagesh is not a shuruq - [shuruq .. holam_male] = shuruq .. "w" .. holam, -- another potential case when waw + holam is not a holam male - - -- tentatively lengthen hiriqs with vowel letters - [hiriq .. "(" .. vowel_letter_or_geresh .. "+)(" .. letter .. ")"] = function(vlg, l) return "ī" .. gereshes(vlg) .. l end, - - -- rearrange furtive patach (mappiq should already have been removed, but handle it just in case) - ["(" .. guttural .. dagesh_mappiq .. "?)" .. patah .. word_end] = dummy_letter .. "a%1", - }, - - { - -- remove vowel letters - ["(" .. letter .. ")(" .. vowel_letter_or_geresh .. "+)" .. shuruq] = function(l, vlg) return l .. gereshes(vlg) .. shuruq end, - [shuruq .. "(" .. vowel_letter_or_geresh .. "+)" .. "(" .. letter_not_waw .. ")"] = function(vlg, l) return shuruq .. gereshes(vlg) .. l end, - [shuruq .. "(" .. vowel_letter_or_geresh .. "+)" .. "(" .. waw .. "[^" .. holam .. dagesh_mappiq .. "])"] = function(vlg, l) return shuruq .. gereshes(vlg) .. l end, - ["(" .. vowel_point .. ")" .. "(" .. vowel_letter_or_geresh .. "+)" .. "(" .. letter_not_waw .. ")"] = function(vp, vlg, l) return vp .. gereshes(vlg) .. l end, - ["(" .. vowel_point .. ")" .. "(" .. vowel_letter_or_geresh .. "+)" .. "(" .. waw .. "[^" .. holam .. dagesh_mappiq .. "])"] = function(vp, vlg, l) return vp .. gereshes(vlg) .. l end, - }, - - { - -- handle two-character combinations first - ['ג' .. dummy_geresh] = 'j', - ['ז' .. dummy_geresh] = 'ž', - ['[צץ]' .. dummy_geresh] = 'č', - ['ש' .. shin_dot] = 'š', - ['ש' .. sin_dot] = 'ś', - }, - - { - ['א'] = 'ʾ', - ['ב'] = 'b' .. macron_below, - ['ג'] = 'g' .. macron_above, - ['ד'] = 'd' .. macron_below, - ['ה'] = 'h', - ['ז'] = 'z', - ['ח'] = 'ḥ', - ['ט'] = 'ṭ', - ['י'] = 'y', - ['[כך]'] = 'k' .. macron_below, - ['ל'] = 'l', - ['[מם]'] = 'm', - ['[נן]'] = 'n', - ['ס'] = 's', - ['ע'] = 'ʿ', - ['[פף]'] = 'p' .. macron_above, - ['[צץ]'] = 'ṣ', - ['ק'] = 'q', - ['ר'] = 'r', - ['ת'] = 't' .. macron_below, - }, - - { - [word_start .. '([bgdkptj])' .. macron .. '?' .. dagesh_mappiq] = '%1', -- assume(!) dagesh qal at the beginning of a word - ['[0' .. sheva .. ']([bgdkptj])' .. macron .. '?' .. dagesh_mappiq] = '0%1', -- dagesh qal after sheva, and assume(!) silent sheva - ['(%l)0%1'] = '%1' .. sheva .. '%1', -- vocal sheva between identical consonants - [shuruq] = 'ū', - }, - - { - -- restore geresh and gershayim order - ["([" .. dummy_geresh .. dummy_gershayim .. "])(" .. dagesh_mappiq .. "*)(" .. vowel_point .. "*)"] = "%2%3%1", - }, - - { - -- handle ירושלם - [hiriq .. patah] = "ayi", -- in this case, the vowels are reversed by Unicode normalization rules - [patah .. hiriq] = "ayi", -- just in case they're in the correct order - [hiriq .. qamats] = "āyi", -- pausal form of above - [qamats .. hiriq] = "āyi", -- as above - -- handle ירושלמה - ["[0" .. sheva .. "]" .. patah] = "ay", -- in this case, the vowels are reversed by Unicode normalization rules - [patah .. "[0" .. sheva .. "]"] = "ay", -- just in case they're in the correct order - ["[0" .. sheva .. "]" .. qamats] = "āy", -- pausal form of above - [qamats .. "[0" .. sheva .. "]"] = "āy", -- as above - }, - - { - [sheva] = 'ə', - [hataf_segol] = 'ĕ', - [hataf_patah] = 'ă', - [hataf_qamats] = 'ŏ', - [hiriq] = 'i', - [tsere] = 'ē', - [segol] = 'e', - [patah] = 'a', - [qamats] = 'ā', - [qamats_qatan] = 'o', - [qubuts] = 'u', - [shin_dot] = '', - [sin_dot] = '', - [holam_male] = 'ō', - [waw .. holam_haser_for_waw] = 'wō', - }, - - { - ['(.)' .. macron .. '?' .. dagesh_mappiq] = '%1%1', -- gemination - }, - - { - ['(śśā)[שś](k' .. macron_below .. ')'] = '%1%2', -- special case for יששכר - }, - - { - ['ā(%l' .. macron .. '?0)'] = 'o%1', -- assume(!) qamats qatan before silent sheva - - [holam] = 'ō', - ['ו'] = 'w', - ['ש'] = 'š', -- assume(!) shin if no shin or sin dot - }, - - { - -- handle bgdkpt letters in unvocalized words (such as acronyms) - [word_start .. "([^" .. tr_vowels .. "]-[bgdkpt]" .. macron .. "[^" .. tr_vowels .. "]-)" .. word_end] = function(w) return gsub(w, "([bgdkpt])" .. macron, "%1") end - }, - - { - ["[0" .. dummy_letter .. "]"] = "", - - -- short vowels in non-final closed syllables (this rule should be expanded) - ["ū(%l)%1"] = "u%1%1", - ["ī(%l)%1"] = "i%1%1", - }, - - { - ['ə' .. word_end] = "", -- final sheva is always silent - - [dummy_geresh] = '′', - [dummy_gershayim] = '″', - ['׃'] = '.', -- sof pasuq - ['־'] = '-', -- maqaf - }, -} - -function export.tr(text, lang, sc) - -- default to modern for Hebrew, but not for other languages, such as Aramaic - local modern = lang == "he" - return export.biblical(text, modern) -end - -function export.biblical(text, modern) - -- decompose - text = mw.ustring.toNFD(text) - - -- wrap with spaces to make initial and final replacements easier - text = ' ' .. text .. ' ' - - for _, replacements in ipairs(biblical) do - for regex, replacement in pairs(replacements) do - text = gsub(text, regex, replacement) - end - end - - -- unwrap spaces - text = mw.ustring.match(text, "^ (.*) $") - if text == nil then error("Something went wrong, wrapped spaces were deleted.") end - - -- must happen before recomposition - if modern then - text = gsub(text, "([%lʾʿ])%1", "%1") - text = gsub(text, "[%lʾʿ]" .. macron .. "?", function(x) return biblical_to_modern[x] or x end) - text = gsub(text, "''", "'") - end - - -- recompose - text = mw.ustring.toNFC(text) - - return text -end - -return export - --- 10 tests failed. (refresh) - --- test_biblical: --- Text Expected Actual Differs at Comments --- Passed בַּיִת‎ bayiṯ bayiṯ --- Passed בֵּית‎ bēṯ bēṯ --- Passed בָּתִּים‎ bāttīm bāttīm --- Passed מַחֲנֶה‎ maḥăne maḥăne --- Passed בָּרָא‎ bārā bārā --- Passed רֶגֶל‎ reḡel reḡel --- Passed כֹּהֵן‎ kōhēn kōhēn --- Passed מֶלֶךְ‎ meleḵ meleḵ --- Passed מַמְלָכָה‎ mamlāḵā mamlāḵā --- Passed הַמַּמְלָכָה‎ hammamlāḵā hammamlāḵā --- Passed הַלְּלוּיָהּ‎ halləlūyāh halləlūyāh --- Passed הַלְלוּיָהּ‎ haləlūyāh haləlūyāh --- Passed יָדַע‎ yāḏaʿ yāḏaʿ --- Passed שָׁבוּעַ‎ šāḇūaʿ šāḇūaʿ --- Passed רוּחַ‎ rūaḥ rūaḥ --- Passed גָּבֹהַּ‎ gāḇōah gāḇōah --- Passed מָשִׁיחַ‎ māšīaḥ māšīaḥ --- Passed רֵיחַ‎ rēaḥ rēaḥ --- Passed שָׂדֶה‎ śāḏe śāḏe --- Passed שְׂדֵה‎ śəḏē śəḏē --- Passed בָּנַי‎ bānay bānay --- Passed בְּנֵי‎ bənē bənē --- Passed צָרְכִּי‎ ṣorkī ṣorkī --- Passed חָכְמָה‎ ḥāḵəmā ḥāḵəmā ambiguous case: could be ḥāḵəmā or ḥoḵmā, but I think ḥāḵəmā is the preferred default --- Passed שִׁפְרָה‎ šip̄rā šip̄rā --- Passed שָׁכְבְּךָ‎ šoḵbəḵā šoḵbəḵā --- Passed הָפְכָּה‎ hop̄kā hop̄kā made-up word, but a particular potentially problematic Unicode situation --- Passed קָטְבּוֹ‎ qoṭbō qoṭbō another particular potentially problematic Unicode situation --- Passed נִשְׂרְפָה‎ niśrəp̄ā niśrəp̄ā --- Passed בָּנָיו‎ bānāw bānāw --- Passed בָּנֶיהָ‎ bānehā bānehā --- Passed מִצְוֹת‎ miṣwōṯ miṣwōṯ --- Passed זִוּוּג‎ ziwwūḡ ziwwūḡ --- Passed רֹאשׁ‎ rōš rōš --- Passed רֵאשִׁית‎ rēšīṯ rēšīṯ --- Passed רִאשׁוֹן‎ rīšōn rīšōn --- Passed מְלָאכָה‎ məlāḵā məlāḵā --- Passed מְלֶאכֶת‎ məleḵeṯ məleḵeṯ --- Passed חֵטְא‎ ḥēṭ ḥēṭ --- Passed בָּרָאתָ‎ bārāṯā bārāṯā --- Passed חַטֹּאות‎ ḥaṭṭōṯ ḥaṭṭōṯ --- Passed יְראוּ‎ yərū yərū --- Passed וַיֶּאְסֹר‎ wayyeʾsōr wayyeʾsōr --- Passed הָחְלַט‎ hoḥlaṭ hoḥlaṭ --- Passed וַיֵּבְךְּ‎ wayyēḇk wayyēḇk --- Passed אַרְאֶךָּ‎ ʾarʾekkā ʾarʾekkā --- Passed וַיַּשְׁקְ‎ wayyašq wayyašq --- Passed אַתְּ‎ ʾatt ʾatt --- Passed וּוָווֹ‎ ūwāwō ūwāwō --- Passed וָו‎ wāw wāw --- Passed תָּו‎ tāw tāw --- Passed קַו‎ qaw qaw --- Passed לָאו‎ lāw lāw --- Passed חַי‎ ḥay ḥay --- Passed חָי‎ ḥāy ḥāy pausal --- Passed פִּיו‎ pīw pīw --- Passed כִּסְלֵו‎ kislēw kislēw --- Passed גּוֹי‎ gōy gōy --- Passed גֹּי‎ gōy gōy --- Passed גֹּיִים‎ gōyīm gōyīm --- Passed רָאוּי‎ rāʾūy rāʾūy --- Passed קִיא‎ qī qī --- Failed יָבִיאוּ‎ yāḇīʾū yāḇīū 5 --- Failed יְבִיאוּן‎ yəḇīʾūn yəḇīūn 5 --- Passed מֵאוּן‎ mēʾūn mēʾūn --- Failed מֵיאוּן‎ mēʾūn mēyūn 3 --- Passed בּוֹאוּ‎ bōʾū bōʾū --- Passed בֹּאוּ‎ bōʾū bōʾū --- Passed בּוּאוּ‎ būʾū būʾū made-up word, but may help identify the issue --- Passed אָבִיאָה‎ ʾāḇīʾā ʾāḇīʾā --- Passed מֵאָה‎ mēʾā mēʾā --- Passed גֵּיאָהּ‎ gēʾāh gēʾāh --- Passed אָבוֹאָה‎ ʾāḇōʾā ʾāḇōʾā --- Passed אָבֹאָה‎ ʾāḇōʾā ʾāḇōʾā --- Passed נְשׂוּאָה‎ nəśūʾā nəśūʾā --- Failed קִיאוֹ‎ qīʾō qīō 3 --- Passed גֵּאוֹ‎ gēʾō gēʾō --- Passed גֵּיאוֹ‎ gēʾō gēʾō --- Passed בּוֹאוֹ‎ bōʾō bōʾō --- Passed בֹּאוֹ‎ bōʾō bōʾō --- Passed מִלּוּאוֹ‎ millūʾō millūʾō --- Passed מִי‎ mī mī --- Passed אִיִּים‎ ʾiyyīm ʾiyyīm --- Passed אִיּוֹב‎ ʾiyyōḇ ʾiyyōḇ --- Passed אִיּוּן‎ ʾiyyūn ʾiyyūn --- Passed אַיִן‎ ʾayin ʾayin --- Passed בּוֹא‎ bō bō --- Passed יְפֵהפֶה‎ yəp̄ēp̄e yəp̄ēp̄e --- Passed אֹהֶל‎ ʾōhel ʾōhel --- Passed הָאֹהֱלָה‎ hāʾōhĕlā hāʾōhĕlā --- Failed אָהֳלוֹ‎ ʾohŏlō ʾāhŏlō 2 --- Failed אָהָלְךָ‎ ʾoholəḵā ʾāhāləḵā 2 --- Passed יִשָּׂשכָר‎ yiśśāḵār yiśśāḵār Still undecided if this actually needs to be handled --- Passed הוֹשִׁיעָה נָּא‎ hōšīʿā nnā hōšīʿā nnā --- Passed עַד בֹּאֲךָ‎ ʿaḏ bōʾăḵā ʿaḏ bōʾăḵā --- Passed וַיַּשְׁקְ אֶת הַצֹּאן‎ wayyašq ʾeṯ haṣṣōn wayyašq ʾeṯ haṣṣōn --- Passed בְּנֵי בְרָק‎ bənē ḇərāq bənē ḇərāq --- Passed בְרָק‎ ḇərāq ḇərāq --- Passed אִישׁ יְהוּדִי הָיָה בְּשׁוּשַׁן הַבִּירָה וּשְׁמוֹ מָרְדֳּכַי בֶּן יָאִיר בֶּן־שִׁמְעִי בֶּן־קִישׁ אִישׁ יְמִינִי׃‎ ʾīš yəhūḏī hāyā bəšūšan habbīrā ūšəmō mordŏḵay ben yāʾīr ben-šimʿī ben-qīš ʾīš yəmīnī. ʾīš yəhūḏī hāyā bəšūšan habbīrā ūšəmō mordŏḵay ben yāʾīr ben-šimʿī ben-qīš ʾīš yəmīnī. --- Failed אִ֣ישׁ יְהוּדִ֔י הָיָ֖ה בְּשׁוּשַׁ֣ן הַבִּירָ֑ה וּשְׁמ֣וֹ מָרְדֳּכַ֗י בֶּ֣ן יָאִ֧יר בֶּן־שִׁמְעִ֛י בֶּן־קִ֖ישׁ אִ֥ישׁ יְמִינִֽי׃‎ ʾīš yəhūḏī hāyā bəšūšan habbīrā ūšəmō mordŏḵay ben yāʾīr ben-šimʿī ben-qīš ʾīš yəmīnī. ʾi֣yš yəhūḏi֔y hāyā֖h bəšūša֣n habbīrā֑h ūšəm֣ō mordŏḵa֗y be֣n yāʾi֧yr ben-šimʿi֛y ben-qi֖yš ʾi֥yš yəmīniֽy. 2 fully accented verse; stress should not be indicated in the final syllable --- Failed וַיְהִי הַמַּבּוּל אַרְבָּעִים יוֹם עַל־הָאָרֶץ וַיִּרְבּוּ הַמַּיִם וַיִּשְׂאוּ אֶת־הַתֵּבָה וַתָּרָם מֵעַל הָאָרֶץ׃‎‎ wayəhī hammabbūl ʾarbāʿīm yōm ʿal-hāʾā́reṣ wayyirbū hammáyim wayyiśəʾū ʾeṯ-hattēḇā wattā́rom mēʿal hāʾāreṣ. wayhī hammabbūl ʾarbāʿīm yōm ʿal-hāʾāreṣ wayyirbū hammayim wayyiśʾū ʾeṯ-hattēḇā wattārām mēʿal hāʾāreṣ.‎ 4 a reminder of why this is hard --- Failed וַיְהִ֧י הַמַּבּ֛וּל אַרְבָּעִ֥ים י֖וֹם עַל־הָאָ֑רֶץ וַיִּרְבּ֣וּ הַמַּ֗יִם וַיִּשְׂאוּ֙ אֶת־הַתֵּבָ֔ה וַתָּ֖רָם מֵעַ֥ל הָאָֽרֶץ׃‎ wayəhī hammabbūl ʾarbāʿīm yōm ʿal-hāʾā́reṣ wayyirbū hammáyim wayyiśəʾū ʾeṯ-hattēḇā wattā́rom mēʿal hāʾāreṣ. wayhi֧y hammabb֛ūl ʾarbāʿi֥ym y֖ōm ʿal-hāʾā֑reṣ wayyirb֣ū hamma֗yim wayyiśʾū֙ ʾeṯ-hattēḇā֔h wattā֖rām mēʿa֥l hāʾāֽreṣ. 4 fully accented verse version of the above --- implicit ktiv/qre that would be nice to have --- Passed הִוא‎ hī hī --- Passed יְרוּשָׁלִַם‎ yərūšālayim yərūšālayim --- Passed יְרוּשָׁלִָם‎ yərūšālāyim yərūšālāyim pausal form --- Passed יְרוּשָׁלְַמָה‎ yərūšālaymā yərūšālaymā --- Passed יְרוּשָׁלְָמָה‎ yərūšālāymā yərūšālāymā --- ktiv male tests --- Passed חַיָּיב‎ ḥayyāḇ ḥayyāḇ --- Passed חַוָּוה‎ ḥawwā ḥawwā --- Passed הֱוֵוה‎ hĕwē hĕwē --- Passed הַיְינוּ‎ haynū haynū --- Passed הִתְכַּוְּונוּ‎ hiṯkawwənū hiṯkawwənū --- Passed גַּוְונָא‎ gawnā gawnā --- Passed מְייוּחָד‎ məyūḥāḏ məyūḥāḏ there is no way to tell that it really should be məyuḥāḏ, but anyway this test is for the double yod --- Passed כְּדַאי‎ kəḏay kəḏay --- Passed כּוּלָּם‎ kullām kullām shuruk does not necessarily imply a long vowel --- Passed קִידּוּשׁ‎ qiddūš qiddūš chiriq male does not necessarily imply a long vowel --- test_translit_hebrew: --- Text Expected Actual Differs at Comments --- Passed מַקְלֵעַ‎ maklea' maklea' --- Passed אַבְּסוּרְד‎ 'ab'sur'd 'ab'sur'd not sure about what should be expected here --- Passed בִּיּוֹמֶטְרִיָּה‎ biyometriya biyometriya --- Passed קַפְרִיסִין‎ kafrisin kafrisin --- Passed חֹרֶף‎ khoref khoref --- Failed טוּרְקִיז‎ turkiz tur'kiz 4 --- Passed טַחַב‎ takhav takhav --- Passed יִוָּלֵד‎ yivaled yivaled --- Passed יָקִינְתּוֹן‎ yakinton yakinton --- Passed כֻּתְנָה‎ kutna kutna --- Passed נַגָּרִיָּה‎ nagariya nagariya --- Passed נַעֲלֶה‎ na'ale na'ale --- Passed מִצְווֹת‎ mitsvot mitsvot --- Passed מָקוֹם‎ makom makom --- Passed פֶּרוּאָנִי‎ peru'ani peru'ani --- Passed צִדְפָּה‎ tsidpa tsidpa --- Passed תׇּכְנָה‎ tokhna tokhna --- Passed רְאוּ‎ r'u r'u --- Passed גּ׳וּק‎ juk juk --- Passed ג׳וּק‎ juk juk --- Passed גִּ׳ירָאפָה‎ jirafa jirafa --- Passed גִ׳ירָאפָה‎ jirafa jirafa --- Passed זַ׳רְגוֹן‎ zhargon zhargon --- Passed קַפּוּצִ׳ינוֹ‎ kapuchino kapuchino --- Passed סְקוֹץ׳‎ s'koch s'koch --- Passed סְתוֹם תַּ׳פֶּה‎ s'tom ta′pe s'tom ta′pe --- Passed אִמָּא׳לֶה‎ 'ima′le 'ima′le --- Passed חָזָ״ל‎ khaza″l khaza″l --- Passed נַחַ״ל‎ nakha″l nakha″l --- Passed רה״מ‎ rh″m rh″m --- Passed ב״ה‎ b″h b″h --- Passed ת״א‎ t″' t″' --- -- Unit tests for [[Module:he-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local he_translit = require('Module:he-translit') --- local full_link = require('Module:links').full_link --- local lang = require('Module:languages').getByCode('he') --- local sc = require('Module:scripts').getByCode('Hebr') - --- local function link(term) --- return full_link{lang = lang, sc = sc, term = term} --- end - --- function tests:do_test_translit(system) --- local tr --- if system == "modern" then --- local modern = he_translit.tr --- function tr(text) --- return modern(text, 'he', 'Hebr') --- end --- elseif system == "biblical" then --- tr = he_translit.biblical --- end - --- return function (self, text, expected_tr, comment) --- self:equals(link(text), tr(text), expected_tr, {comment=comment}) --- end --- end - --- function tests:test_translit_hebrew() --- local examples = { --- { 'מַקְלֵעַ', "maklea'" }, --- { 'אַבְּסוּרְד', "'ab'sur'd" , "not sure about what should be expected here"}, --- -- { 'ביומטריה', ? }, --- { 'בִּיּוֹמֶטְרִיָּה', "biyometriya" }, --- { 'קַפְרִיסִין', "kafrisin" }, --- { 'חֹרֶף', "khoref" }, --- { 'טוּרְקִיז', "turkiz" }, --- { 'טַחַב', "takhav" }, --- { 'יִוָּלֵד', "yivaled" }, --- { 'יָקִינְתּוֹן', "yakinton" }, --- { 'כֻּתְנָה', "kutna" }, --- { 'נַגָּרִיָּה', "nagariya" }, --- { 'נַעֲלֶה', "na'ale" }, --- { 'מִצְווֹת', "mitsvot" }, --- { 'מָקוֹם', "makom" }, --- { 'פֶּרוּאָנִי', "peru'ani" }, --- { 'צִדְפָּה', "tsidpa" }, --- { 'תׇּכְנָה', "tokhna" }, --- { 'רְאוּ', "r'u" }, - --- { 'גּ׳וּק', "juk" }, --- { 'ג׳וּק', "juk" }, --- { 'גִּ׳ירָאפָה', "jirafa" }, --- { 'גִ׳ירָאפָה', "jirafa" }, --- { 'זַ׳רְגוֹן', "zhargon" }, --- { 'קַפּוּצִ׳ינוֹ', "kapuchino" }, - --- { 'סְקוֹץ׳', "s'koch" }, --- { 'סְתוֹם תַּ׳פֶּה', "s'tom ta′pe" }, --- { 'אִמָּא׳לֶה', "'ima′le" }, --- { 'חָזָ״ל', "khaza″l" }, --- { 'נַחַ״ל', "nakha″l" }, --- { 'רה״מ', "rh″m" }, --- { 'ב״ה', "b″h" }, --- { 'ת״א', "t″'" }, --- } - --- self:iterate(examples, self:do_test_translit("modern")) --- end - --- function tests:test_biblical() --- local examples = { --- { "בַּיִת", "bayiṯ" }, --- { "בֵּית", "bēṯ" }, --- { "בָּתִּים", "bāttīm" }, --- { "מַחֲנֶה", "maḥăne" }, --- { "בָּרָא", "bārā" }, --- { "רֶגֶל", "reḡel" }, --- { "כֹּהֵן", "kōhēn" }, --- { "מֶלֶךְ", "meleḵ" }, --- { "מַמְלָכָה", "mamlāḵā" }, --- { "הַמַּמְלָכָה", "hammamlāḵā" }, --- { "הַלְּלוּיָהּ", "halləlūyāh" }, --- { "הַלְלוּיָהּ", "haləlūyāh" }, --- { "יָדַע", "yāḏaʿ" }, --- { "שָׁבוּעַ", "šāḇūaʿ" }, --- { "רוּחַ", "rūaḥ" }, --- { "גָּבֹהַּ", "gāḇōah" }, --- { "מָשִׁיחַ", "māšīaḥ" }, --- { "רֵיחַ", "rēaḥ" }, --- { "שָׂדֶה", "śāḏe" }, --- { "שְׂדֵה", "śəḏē" }, --- { "בָּנַי", "bānay" }, --- { "בְּנֵי", "bənē" }, --- { "צָרְכִּי", "ṣorkī" }, --- { "חָכְמָה", "ḥāḵəmā", "ambiguous case: could be ḥāḵəmā or ḥoḵmā, but I think ḥāḵəmā is the preferred default" }, --- { "שִׁפְרָה", "šip̄rā" }, --- { "שָׁכְבְּךָ", "šoḵbəḵā" }, --- { "הָפְכָּה", "hop̄kā", "made-up word, but a particular potentially problematic Unicode situation" }, --- { "קָטְבּוֹ", "qoṭbō", "another particular potentially problematic Unicode situation" }, --- { "נִשְׂרְפָה", "niśrəp̄ā" }, --- { "בָּנָיו", "bānāw" }, --- { "בָּנֶיהָ", "bānehā" }, --- { "מִצְוֹת", "miṣwōṯ" }, --- { "זִוּוּג", "ziwwūḡ" }, --- { "רֹאשׁ", "rōš" }, --- { "רֵאשִׁית", "rēšīṯ" }, --- { "רִאשׁוֹן", "rīšōn" }, --- { "מְלָאכָה", "məlāḵā" }, --- { "מְלֶאכֶת", "məleḵeṯ" }, --- { "חֵטְא", "ḥēṭ" }, --- { "בָּרָאתָ", "bārāṯā" }, --- { "חַטֹּאות", "ḥaṭṭōṯ" }, --- { "יְראוּ", "yərū" }, --- { "וַיֶּאְסֹר", "wayyeʾsōr" }, --- { "הָחְלַט", "hoḥlaṭ" }, --- { "וַיֵּבְךְּ", "wayyēḇk" }, --- { "אַרְאֶךָּ", "ʾarʾekkā" }, --- { "וַיַּשְׁקְ", "wayyašq" }, --- { "אַתְּ", "ʾatt" }, --- { "וּוָווֹ", "ūwāwō" }, --- { "וָו", "wāw" }, --- { "תָּו", "tāw" }, --- { "קַו", "qaw" }, --- { "לָאו", "lāw" }, --- { "חַי", "ḥay" }, --- { "חָי", "ḥāy", "pausal" }, --- { "פִּיו", "pīw" }, --- { "כִּסְלֵו", "kislēw" }, --- { "גּוֹי", "gōy" }, --- { "גֹּי", "gōy" }, --- { "גֹּיִים", "gōyīm" }, --- { "רָאוּי", "rāʾūy" }, --- { "קִיא", "qī" }, --- { "יָבִיאוּ", "yāḇīʾū" }, --- { "יְבִיאוּן", "yəḇīʾūn" }, --- { "מֵאוּן", "mēʾūn" }, --- { "מֵיאוּן", "mēʾūn" }, --- { "בּוֹאוּ", "bōʾū" }, --- { "בֹּאוּ", "bōʾū" }, --- { "בּוּאוּ", "būʾū", "made-up word, but may help identify the issue" }, --- { "אָבִיאָה", "ʾāḇīʾā" }, --- { "מֵאָה", "mēʾā" }, --- { "גֵּיאָהּ", "gēʾāh" }, --- { "אָבוֹאָה", "ʾāḇōʾā" }, --- { "אָבֹאָה", "ʾāḇōʾā" }, --- { "נְשׂוּאָה", "nəśūʾā" }, --- { "קִיאוֹ", "qīʾō" }, --- { "גֵּאוֹ", "gēʾō" }, --- { "גֵּיאוֹ", "gēʾō" }, --- { "בּוֹאוֹ", "bōʾō" }, --- { "בֹּאוֹ", "bōʾō" }, --- { "מִלּוּאוֹ", "millūʾō" }, --- { "מִי", "mī" }, --- { "אִיִּים", "ʾiyyīm" }, --- { "אִיּוֹב", "ʾiyyōḇ" }, --- { "אִיּוּן", "ʾiyyūn" }, --- { "אַיִן", "ʾayin" }, --- { "בּוֹא", "bō" }, --- { "יְפֵהפֶה", "yəp̄ēp̄e" }, --- { "אֹהֶל", "ʾōhel" }, --- { "הָאֹהֱלָה", "hāʾōhĕlā" }, --- { "אָהֳלוֹ", "ʾohŏlō" }, --- { "אָהָלְךָ", "ʾoholəḵā" }, --- { "יִשָּׂשכָר", "yiśśāḵār", "Still undecided if this actually needs to be handled" }, --- { "הוֹשִׁיעָה נָּא", "hōšīʿā nnā" }, --- { "עַד בֹּאֲךָ", "ʿaḏ bōʾăḵā" }, --- { "וַיַּשְׁקְ אֶת הַצֹּאן", "wayyašq ʾeṯ haṣṣōn" }, --- { "בְּנֵי בְרָק", "bənē ḇərāq" }, --- { "בְרָק", "ḇərāq" }, --- { "אִישׁ יְהוּדִי הָיָה בְּשׁוּשַׁן הַבִּירָה וּשְׁמוֹ מָרְדֳּכַי בֶּן יָאִיר בֶּן־שִׁמְעִי בֶּן־קִישׁ אִישׁ יְמִינִי׃", "ʾīš yəhūḏī hāyā bəšūšan habbīrā ūšəmō mordŏḵay ben yāʾīr ben-šimʿī ben-qīš ʾīš yəmīnī." }, --- { "אִ֣ישׁ יְהוּדִ֔י הָיָ֖ה בְּשׁוּשַׁ֣ן הַבִּירָ֑ה וּשְׁמ֣וֹ מָרְדֳּכַ֗י בֶּ֣ן יָאִ֧יר בֶּן־שִׁמְעִ֛י בֶּן־קִ֖ישׁ אִ֥ישׁ יְמִינִֽי׃", "ʾīš yəhūḏī hāyā bəšūšan habbīrā ūšəmō mordŏḵay ben yāʾīr ben-šimʿī ben-qīš ʾīš yəmīnī.", "fully accented verse; stress should not be indicated in the final syllable" }, --- { "וַיְהִי הַמַּבּוּל אַרְבָּעִים יוֹם עַל־הָאָרֶץ וַיִּרְבּוּ הַמַּיִם וַיִּשְׂאוּ אֶת־הַתֵּבָה וַתָּרָם מֵעַל הָאָרֶץ׃‎", "wayəhī hammabbūl ʾarbāʿīm yōm ʿal-hāʾā́reṣ wayyirbū hammáyim wayyiśəʾū ʾeṯ-hattēḇā wattā́rom mēʿal hāʾāreṣ.", "a reminder of why this is hard" }, --- { "וַיְהִ֧י הַמַּבּ֛וּל אַרְבָּעִ֥ים י֖וֹם עַל־הָאָ֑רֶץ וַיִּרְבּ֣וּ הַמַּ֗יִם וַיִּשְׂאוּ֙ אֶת־הַתֵּבָ֔ה וַתָּ֖רָם מֵעַ֥ל הָאָֽרֶץ׃", "wayəhī hammabbūl ʾarbāʿīm yōm ʿal-hāʾā́reṣ wayyirbū hammáyim wayyiśəʾū ʾeṯ-hattēḇā wattā́rom mēʿal hāʾāreṣ.", "fully accented verse version of the above" }, - --- "implicit ktiv/qre that would be nice to have", --- { "הִוא", "hī" }, --- { "יְרוּשָׁלִַם", "yərūšālayim" }, --- { "יְרוּשָׁלִָם", "yərūšālāyim", "pausal form" }, --- { "יְרוּשָׁלְַמָה", "yərūšālaymā" }, --- { "יְרוּשָׁלְָמָה", "yərūšālāymā" }, - --- "ktiv male tests", --- { "חַיָּיב", "ḥayyāḇ" }, --- { "חַוָּוה", "ḥawwā" }, --- { "הֱוֵוה", "hĕwē" }, --- { "הַיְינוּ", "haynū" }, --- { "הִתְכַּוְּונוּ", "hiṯkawwənū" }, --- { "גַּוְונָא", "gawnā" }, --- { "מְייוּחָד", "məyūḥāḏ" , "there is no way to tell that it really should be məyuḥāḏ, but anyway this test is for the double yod"}, --- { "כְּדַאי", "kəḏay" }, --- { "כּוּלָּם", "kullām", "shuruk does not necessarily imply a long vowel" }, --- { "קִידּוּשׁ", "qiddūš", "chiriq male does not necessarily imply a long vowel" }, --- } --- self:iterate(examples, self:do_test_translit("biblical")) --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/hi-translit.lua b/wikt/translit/hi-translit.lua deleted file mode 100644 index 646df21..0000000 --- a/wikt/translit/hi-translit.lua +++ /dev/null @@ -1,113 +0,0 @@ -local export = {} - -local gsub = mw.ustring.gsub -local match = mw.ustring.match - -local conv = { - -- consonants - ['क'] = 'k', ['ख'] = 'kh', ['ग'] = 'g', ['घ'] = 'gh', ['ङ'] = 'ṅ', - ['च'] = 'ch', ['छ'] = 'ch', ['ज'] = 'j', ['झ'] = 'jh', ['ञ'] = 'ñ', - ['ट'] = 'ṭ', ['ठ'] = 'ṭh', ['ड'] = 'ḍ', ['ढ'] = 'ḍh', ['ण'] = 'ṇ', - ['त'] = 't', ['थ'] = 'th', ['द'] = 'd', ['ध'] = 'dh', ['न'] = 'n', - ['प'] = 'p', ['फ'] = 'ph', ['ब'] = 'b', ['भ'] = 'bh', ['म'] = 'm', - ['य'] = 'y', ['र'] = 'r', ['ल'] = 'l', ['व'] = 'v', ['ळ'] = 'ḷ', - ['श'] = 'ś', ['ष'] = 'ṣ', ['स'] = 's', ['ह'] = 'h', - ['क़'] = 'q', ['ख़'] = 'x', ['ग़'] = 'ġ', ['ऴ'] = 'ḻ', - ['ज़'] = 'z', ['ष़'] = 'ḻ', ['झ़'] = 'ž', ['ड़'] = 'ṛ', ['ढ़'] = 'ṛh', - ['फ़'] = 'f', ['थ़'] = 'θ', ['ऩ'] = 'ṉ', ['ऱ'] = 'ṟ', - -- ['ज्ञ'] = 'gy', - - -- vowel diacritics - ['ि'] = 'i', ['ु'] = 'u', ['े'] = 'e', ['ो'] = 'o', - ['ा'] = 'ā', ['ी'] = 'ī', ['ू'] = 'ū', - ['ृ'] = 'ŕ', - ['ै'] = 'ai', ['ौ'] = 'au', - ['ॉ'] = 'ŏ', - ['ॅ'] = 'ĕ', - - -- vowel signs - ['अ'] = 'a', ['इ'] = 'i', ['उ'] = 'u', ['ए'] = 'e', ['ओ'] = 'o', - ['आ'] = 'ā', ['ई'] = 'ī', ['ऊ'] = 'ū', - ['ऋ'] = 'ŕ', - ['ऐ'] = 'ai', ['औ'] = 'au', - ['ऑ'] = 'ŏ', - ['ऍ'] = 'ĕ', - - ['ॐ'] = 'om', - - -- chandrabindu - ['ँ'] = '̃', - - -- anusvara - ['ं'] = 'ṁ', - - -- visarga - ['ः'] = 'ḥ', - - -- virama - ['्'] = '', - - -- numerals - ['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4', - ['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9', - - -- punctuation - ['।'] = '.', -- danda - ['॥'] = '.', -- double danda - ['+'] = '', -- compound separator - - -- abbreviation sign - ['॰'] = '.', -} - -local nasal_assim = { - ['क'] = 'ङ', ['ख'] = 'ङ', ['ग'] = 'ङ', ['घ'] = 'ङ', - ['च'] = 'ञ', ['छ'] = 'ञ', ['ज'] = 'ञ', ['झ'] = 'ञ', - ['ट'] = 'ण', ['ठ'] = 'ण', ['ड'] = 'ण', ['ढ'] = 'ण', - ['प'] = 'म', ['फ'] = 'म', ['ब'] = 'म', ['भ'] = 'म', ['म'] = 'म', - ['व'] = 'ँ', ['य'] = 'ँ', -} - -local perm_cl = { - ['म्ल'] = true, ['व्ल'] = true, ['न्ल'] = true, - -} - -local all_cons, special_cons = 'कखगघङचछजझञटठडढतथदधपफबभशषसयरलवहणनम', 'यरलवहनम' -local vowel, vowel_sign = 'aिुृेोाीूैौॉॅ', 'अइउएओआईऊऋऐऔऑऍ' -local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])a(़?[' .. gsub(all_cons, "य", "") .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])' - -local function rev_string(text) - local result, length = {}, mw.ustring.len(text) - for i = length, 1, -1 do - table.insert(result, mw.ustring.sub(text, i, i)) - end - return table.concat(result) -end - -function export.tr(text, lang, sc) - text = gsub(text, '([' .. all_cons .. ']़?)([' .. vowel .. '्]?)', function(c, d) - return c .. (d == "" and 'a' or d) end) - for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do - local orig_word = word - word = rev_string(word) - word = gsub(word, '^a(़?)([' .. all_cons .. '])(.)(.?)', function(opt, first, second, third) - return (((match(first, '[' .. special_cons .. ']') and match(second, '्') and not perm_cl[first..second..third]) - or match(first .. second, 'य[ीेै]')) - and 'a' or "") .. opt .. first .. second .. third end) - while match(word, syncope_pattern) do - word = gsub(word, syncope_pattern, '%1%2%3%4') - end - word = gsub(word, '(.?)ं(.)', function(succ, prev) - return succ .. (succ..prev == "a" and "्म" or - (succ == "" and match(prev, '[' .. vowel .. ']') and "̃" or nasal_assim[succ] or "n")) .. prev end) - text = gsub(text, orig_word, rev_string(word)) - end - text = gsub(text, '.़?', conv) - text = gsub(text, 'a([iu])̃', 'a͠%1') - text = gsub(text, 'jñ', 'gy') - text = gsub(text, 'ñz', 'nz') - return mw.ustring.toNFC(text) -end - -return export \ No newline at end of file diff --git a/wikt/translit/ii-translit.lua b/wikt/translit/ii-translit.lua deleted file mode 100644 index 7e28c18..0000000 --- a/wikt/translit/ii-translit.lua +++ /dev/null @@ -1,409 +0,0 @@ --- This module will transliterate Sichuan Yi language text per Yiyu Pinyin. --- Language code: iii - -local export = {} -local gsub = mw.ustring.gsub - -local convert = { - ['ꀀ'] = 'it', ['ꀁ'] = 'ix', ['ꀂ'] = 'i', ['ꀃ'] = 'ip', - ['ꀄ'] = 'iet', ['ꀅ'] = 'iex', ['ꀆ'] = 'ie', ['ꀇ'] = 'iep', - ['ꀈ'] = 'at', ['ꀉ'] = 'ax', ['ꀊ'] = 'a', ['ꀋ'] = 'ap', - ['ꀌ'] = 'uox', ['ꀍ'] = 'uo', ['ꀎ'] = 'uop', - ['ꀏ'] = 'ot', ['ꀐ'] = 'ox', ['ꀑ'] = 'o', ['ꀒ'] = 'op', - ['ꀓ'] = 'ex', ['ꀔ'] = 'e', - ['ꀕ'] = 'w', - - ['ꀖ'] = 'bit', ['ꀗ'] = 'bix', ['ꀘ'] = 'bi', ['ꀙ'] = 'bip', - ['ꀚ'] = 'biet', ['ꀛ'] = 'biex', ['ꀜ'] = 'bie', ['ꀝ'] = 'biep', - ['ꀞ'] = 'bat', ['ꀟ'] = 'bax', ['ꀠ'] = 'ba', ['ꀡ'] = 'bap', - ['ꀢ'] = 'buox', ['ꀣ'] = 'buo', ['ꀤ'] = 'buop', - ['ꀥ'] = 'bot', ['ꀦ'] = 'box', ['ꀧ'] = 'bo', ['ꀨ'] = 'bop', - ['ꀩ'] = 'bex', ['ꀪ'] = 'be', ['ꀫ'] = 'bep', - ['ꀬ'] = 'but', ['ꀭ'] = 'bux', ['ꀮ'] = 'bu', ['ꀯ'] = 'bup', - ['ꀰ'] = 'burx', ['ꀱ'] = 'bur', - ['ꀲ'] = 'byt', ['ꀳ'] = 'byx', ['ꀴ'] = 'by', ['ꀵ'] = 'byp', - ['ꀶ'] = 'byrx', ['ꀷ'] = 'byr', - - ['ꀸ'] = 'pit', ['ꀹ'] = 'pix', ['ꀺ'] = 'pi', ['ꀻ'] = 'pip', - ['ꀼ'] = 'piex', ['ꀽ'] = 'pie', ['ꀾ'] = 'piep', - ['ꀿ'] = 'pat', ['ꁀ'] = 'pax', ['ꁁ'] = 'pa', ['ꁂ'] = 'pap', - ['ꁃ'] = 'puox', ['ꁄ'] = 'puo', ['ꁅ'] = 'puop', - ['ꁆ'] = 'pot', ['ꁇ'] = 'pox', ['ꁈ'] = 'po', ['ꁉ'] = 'pop', - ['ꁊ'] = 'put', ['ꁋ'] = 'pux', ['ꁌ'] = 'pu', ['ꁍ'] = 'pup', - ['ꁎ'] = 'purx', ['ꁏ'] = 'pur', - ['ꁐ'] = 'pyt', ['ꁑ'] = 'pyx', ['ꁒ'] = 'py', ['ꁓ'] = 'pyp', - ['ꁔ'] = 'pyrx', ['ꁕ'] = 'pyr', - - ['ꁖ'] = 'bbit', ['ꁗ'] = 'bbix', ['ꁘ'] = 'bbi', ['ꁙ'] = 'bbip', - ['ꁚ'] = 'bbiet', ['ꁛ'] = 'bbiex', ['ꁜ'] = 'bbie', ['ꁝ'] = 'bbiep', - ['ꁞ'] = 'bbat', ['ꁟ'] = 'bbax', ['ꁠ'] = 'bba', ['ꁡ'] = 'bbap', - ['ꁢ'] = 'bbuox', ['ꁣ'] = 'bbuo', ['ꁤ'] = 'bbuop', - ['ꁥ'] = 'bbot', ['ꁦ'] = 'bbox', ['ꁧ'] = 'bbo', ['ꁨ'] = 'bbop', - ['ꁩ'] = 'bbex', ['ꁪ'] = 'bbe', ['ꁫ'] = 'bbep', - ['ꁬ'] = 'bbut', ['ꁭ'] = 'bbux', ['ꁮ'] = 'bbu', ['ꁯ'] = 'bbup', - ['ꁰ'] = 'bburx', ['ꁱ'] = 'bbur', - ['ꁲ'] = 'bbyt', ['ꁳ'] = 'bbyx', ['ꁴ'] = 'bby', ['ꁵ'] = 'bbyp', - - ['ꁶ'] = 'nbit', ['ꁷ'] = 'nbix', ['ꁸ'] = 'nbi', ['ꁹ'] = 'nbip', - ['ꁺ'] = 'nbiex', ['ꁻ'] = 'nbie', ['ꁼ'] = 'nbiep', - ['ꁽ'] = 'nbat', ['ꁾ'] = 'nbax', ['ꁿ'] = 'nba', ['ꂀ'] = 'nbap', - ['ꂁ'] = 'nbot', ['ꂂ'] = 'nbox', ['ꂃ'] = 'nbo', ['ꂄ'] = 'nbop', - ['ꂅ'] = 'nbut', ['ꂆ'] = 'nbux', ['ꂇ'] = 'nbu', ['ꂈ'] = 'nbup', - ['ꂉ'] = 'nburx', ['ꂊ'] = 'nbur', - ['ꂋ'] = 'nbyt', ['ꂌ'] = 'nbyx', ['ꂍ'] = 'nby', ['ꂎ'] = 'nbyp', - ['ꂏ'] = 'nbyrx', ['ꂐ'] = 'nbyr', - - ['ꂑ'] = 'hmit', ['ꂒ'] = 'hmix', ['ꂓ'] = 'hmi', ['ꂔ'] = 'hmip', - ['ꂕ'] = 'hmiex', ['ꂖ'] = 'hmie', ['ꂗ'] = 'hmiep', - ['ꂘ'] = 'hmat', ['ꂙ'] = 'hmax', ['ꂚ'] = 'hma', ['ꂛ'] = 'hmap', - ['ꂜ'] = 'hmuox', ['ꂝ'] = 'hmuo', ['ꂞ'] = 'hmuop', - ['ꂟ'] = 'hmot', ['ꂠ'] = 'hmox', ['ꂡ'] = 'hmo', ['ꂢ'] = 'hmop', - ['ꂣ'] = 'hmut', ['ꂤ'] = 'hmux', ['ꂥ'] = 'hmu', ['ꂦ'] = 'hmup', - ['ꂧ'] = 'hmurx', ['ꂨ'] = 'hmur', - ['ꂩ'] = 'hmyx', ['ꂪ'] = 'hmy', ['ꂫ'] = 'hmyp', - ['ꂬ'] = 'hmyrx', ['ꂭ'] = 'hmyr', - - ['ꂮ'] = 'mit', ['ꂯ'] = 'mix', ['ꂰ'] = 'mi', ['ꂱ'] = 'mip', - ['ꂲ'] = 'miex', ['ꂳ'] = 'mie', ['ꂴ'] = 'miep', - ['ꂵ'] = 'mat', ['ꂶ'] = 'max', ['ꂷ'] = 'ma', ['ꂸ'] = 'map', - ['ꂹ'] = 'muot', ['ꂺ'] = 'muox', ['ꂻ'] = 'muo', ['ꂼ'] = 'muop', - ['ꂽ'] = 'mot', ['ꂾ'] = 'mox', ['ꂿ'] = 'mo', ['ꃀ'] = 'mop', - ['ꃁ'] = 'mex', ['ꃂ'] = 'me', - ['ꃃ'] = 'mut', ['ꃄ'] = 'mux', ['ꃅ'] = 'mu', ['ꃆ'] = 'mup', - ['ꃇ'] = 'murx', ['ꃈ'] = 'mur', - ['ꃉ'] = 'myt', ['ꃊ'] = 'myx', ['ꃋ'] = 'my', ['ꃌ'] = 'myp', - - ['ꃍ'] = 'fit', ['ꃎ'] = 'fix', ['ꃏ'] = 'fi', ['ꃐ'] = 'fip', - ['ꃑ'] = 'fat', ['ꃒ'] = 'fax', ['ꃓ'] = 'fa', ['ꃔ'] = 'fap', - ['ꃕ'] = 'fox', ['ꃖ'] = 'fo', ['ꃗ'] = 'fop', - ['ꃘ'] = 'fut', ['ꃙ'] = 'fux', ['ꃚ'] = 'fu', ['ꃛ'] = 'fup', - ['ꃜ'] = 'furx', ['ꃝ'] = 'fur', - ['ꃞ'] = 'fyt', ['ꃟ'] = 'fyx', ['ꃠ'] = 'fy', ['ꃡ'] = 'fyp', - - ['ꃢ'] = 'vit', ['ꃣ'] = 'vix', ['ꃤ'] = 'vi', ['ꃥ'] = 'vip', - ['ꃦ'] = 'viet', ['ꃧ'] = 'viex', ['ꃨ'] = 'vie', ['ꃩ'] = 'viep', - ['ꃪ'] = 'vat', ['ꃫ'] = 'vax', ['ꃬ'] = 'va', ['ꃭ'] = 'vap', - ['ꃮ'] = 'vot', ['ꃯ'] = 'vox', ['ꃰ'] = 'vo', ['ꃱ'] = 'vop', - ['ꃲ'] = 'vex', ['ꃳ'] = 'vep', - ['ꃴ'] = 'vut', ['ꃵ'] = 'vux', ['ꃶ'] = 'vu', ['ꃷ'] = 'vup', - ['ꃸ'] = 'vurx', ['ꃹ'] = 'vur', - ['ꃺ'] = 'vyt', ['ꃻ'] = 'vyx', ['ꃼ'] = 'vy', ['ꃽ'] = 'vyp', - ['ꃾ'] = 'vyrx', ['ꃿ'] = 'vyr', - - ['ꄀ'] = 'dit', ['ꄁ'] = 'dix', ['ꄂ'] = 'di', ['ꄃ'] = 'dip', - ['ꄄ'] = 'diex', ['ꄅ'] = 'die', ['ꄆ'] = 'diep', - ['ꄇ'] = 'dat', ['ꄈ'] = 'dax', ['ꄉ'] = 'da', ['ꄊ'] = 'dap', - ['ꄋ'] = 'duox', ['ꄌ'] = 'duo', - ['ꄍ'] = 'dot', ['ꄎ'] = 'dox', ['ꄏ'] = 'do', ['ꄐ'] = 'dop', - ['ꄑ'] = 'dex', ['ꄒ'] = 'de', ['ꄓ'] = 'dep', - ['ꄔ'] = 'dut', ['ꄕ'] = 'dux', ['ꄖ'] = 'du', ['ꄗ'] = 'dup', - ['ꄘ'] = 'durx', ['ꄙ'] = 'dur', - - ['ꄚ'] = 'tit', ['ꄛ'] = 'tix', ['ꄜ'] = 'ti', ['ꄝ'] = 'tip', - ['ꄞ'] = 'tiex', ['ꄟ'] = 'tie', ['ꄠ'] = 'tiep', - ['ꄡ'] = 'tat', ['ꄢ'] = 'tax', ['ꄣ'] = 'ta', ['ꄤ'] = 'tap', - ['ꄥ'] = 'tuot', ['ꄦ'] = 'tuox', ['ꄧ'] = 'tuo', ['ꄨ'] = 'tuop', - ['ꄩ'] = 'tot', ['ꄪ'] = 'tox', ['ꄫ'] = 'to', ['ꄬ'] = 'top', - ['ꄭ'] = 'tex', ['ꄮ'] = 'te', ['ꄯ'] = 'tep', - ['ꄰ'] = 'tut', ['ꄱ'] = 'tux', ['ꄲ'] = 'tu', ['ꄳ'] = 'tup', - ['ꄴ'] = 'turx', ['ꄵ'] = 'tur', - - ['ꄶ'] = 'ddit', ['ꄷ'] = 'ddix', ['ꄸ'] = 'ddi', ['ꄹ'] = 'ddip', - ['ꄺ'] = 'ddiex', ['ꄻ'] = 'ddie', ['ꄼ'] = 'ddiep', - ['ꄽ'] = 'ddat', ['ꄾ'] = 'ddax', ['ꄿ'] = 'dda', ['ꅀ'] = 'ddap', - ['ꅁ'] = 'dduox', ['ꅂ'] = 'dduo', ['ꅃ'] = 'dduop', - ['ꅄ'] = 'ddot', ['ꅅ'] = 'ddox', ['ꅆ'] = 'ddo', ['ꅇ'] = 'ddop', - ['ꅈ'] = 'ddex', ['ꅉ'] = 'dde', ['ꅊ'] = 'ddep', - ['ꅋ'] = 'ddut', ['ꅌ'] = 'ddux', ['ꅍ'] = 'ddu', ['ꅎ'] = 'ddup', - ['ꅏ'] = 'ddurx', ['ꅐ'] = 'ddur', - - ['ꅑ'] = 'ndit', ['ꅒ'] = 'ndix', ['ꅓ'] = 'ndi', ['ꅔ'] = 'ndip', - ['ꅕ'] = 'ndiex', ['ꅖ'] = 'ndie', - ['ꅗ'] = 'ndat', ['ꅘ'] = 'ndax', ['ꅙ'] = 'nda', ['ꅚ'] = 'ndap', - ['ꅛ'] = 'ndot', ['ꅜ'] = 'ndox', ['ꅝ'] = 'ndo', ['ꅞ'] = 'ndop', - ['ꅟ'] = 'ndex', ['ꅠ'] = 'nde', ['ꅡ'] = 'ndep', - ['ꅢ'] = 'ndut', ['ꅣ'] = 'ndux', ['ꅤ'] = 'ndu', ['ꅥ'] = 'ndup', - ['ꅦ'] = 'ndurx', ['ꅧ'] = 'ndur', - - ['ꅨ'] = 'hnit', ['ꅩ'] = 'hnix', ['ꅪ'] = 'hni', ['ꅫ'] = 'hnip', - ['ꅬ'] = 'hniet', ['ꅭ'] = 'hniex', ['ꅮ'] = 'hnie', ['ꅯ'] = 'hniep', - ['ꅰ'] = 'hnat', ['ꅱ'] = 'hnax', ['ꅲ'] = 'hna', ['ꅳ'] = 'hnap', - ['ꅴ'] = 'hnuox', ['ꅵ'] = 'hnuo', - ['ꅶ'] = 'hnot', ['ꅷ'] = 'hnox', ['ꅸ'] = 'hnop', - ['ꅹ'] = 'hnex', ['ꅺ'] = 'hne', ['ꅻ'] = 'hnep', - ['ꅼ'] = 'hnut', - - ['ꅽ'] = 'nit', ['ꅾ'] = 'nix', ['ꅿ'] = 'ni', ['ꆀ'] = 'nip', - ['ꆁ'] = 'niex', ['ꆂ'] = 'nie', ['ꆃ'] = 'niep', - ['ꆄ'] = 'nax', ['ꆅ'] = 'na', ['ꆆ'] = 'nap', - ['ꆇ'] = 'nuox', ['ꆈ'] = 'nuo', ['ꆉ'] = 'nuop', - ['ꆊ'] = 'not', ['ꆋ'] = 'nox', ['ꆌ'] = 'no', ['ꆍ'] = 'nop', - ['ꆎ'] = 'nex', ['ꆏ'] = 'ne', ['ꆐ'] = 'nep', - ['ꆑ'] = 'nut', ['ꆒ'] = 'nux', ['ꆓ'] = 'nu', ['ꆔ'] = 'nup', - ['ꆕ'] = 'nurx', ['ꆖ'] = 'nur', - - ['ꆗ'] = 'hlit', ['ꆘ'] = 'hlix', ['ꆙ'] = 'hli', ['ꆚ'] = 'hlip', - ['ꆛ'] = 'hliex', ['ꆜ'] = 'hlie', ['ꆝ'] = 'hliep', - ['ꆞ'] = 'hlat', ['ꆟ'] = 'hlax', ['ꆠ'] = 'hla', ['ꆡ'] = 'hlap', - ['ꆢ'] = 'hluox', ['ꆣ'] = 'hluo', ['ꆤ'] = 'hluop', - ['ꆥ'] = 'hlox', ['ꆦ'] = 'hlo', ['ꆧ'] = 'hlop', - ['ꆨ'] = 'hlex', ['ꆩ'] = 'hle', ['ꆪ'] = 'hlep', - ['ꆫ'] = 'hlut', ['ꆬ'] = 'hlux', ['ꆭ'] = 'hlu', ['ꆮ'] = 'hlup', - ['ꆯ'] = 'hlurx', ['ꆰ'] = 'hlur', - ['ꆱ'] = 'hlyt', ['ꆲ'] = 'hlyx', ['ꆳ'] = 'hly', ['ꆴ'] = 'hlyp', - ['ꆵ'] = 'hlyrx', ['ꆶ'] = 'hlyr', - - ['ꆷ'] = 'lit', ['ꆸ'] = 'lix', ['ꆹ'] = 'li', ['ꆺ'] = 'lip', - ['ꆻ'] = 'liet', ['ꆼ'] = 'liex', ['ꆽ'] = 'lie', ['ꆾ'] = 'liep', - ['ꆿ'] = 'lat', ['ꇀ'] = 'lax', ['ꇁ'] = 'la', ['ꇂ'] = 'lap', - ['ꇃ'] = 'luot', ['ꇄ'] = 'luox', ['ꇅ'] = 'luo', ['ꇆ'] = 'luop', - ['ꇇ'] = 'lot', ['ꇈ'] = 'lox', ['ꇉ'] = 'lo', ['ꇊ'] = 'lop', - ['ꇋ'] = 'lex', ['ꇌ'] = 'le', ['ꇍ'] = 'lep', - ['ꇎ'] = 'lut', ['ꇏ'] = 'lux', ['ꇐ'] = 'lu', ['ꇑ'] = 'lup', - ['ꇒ'] = 'lurx', ['ꇓ'] = 'lur', - ['ꇔ'] = 'lyt', ['ꇕ'] = 'lyx', ['ꇖ'] = 'ly', ['ꇗ'] = 'lyp', - ['ꇘ'] = 'lyrx', ['ꇙ'] = 'lyr', - - ['ꇚ'] = 'git', ['ꇛ'] = 'gix', ['ꇜ'] = 'gi', ['ꇝ'] = 'gip', - ['ꇞ'] = 'giet', ['ꇟ'] = 'giex', ['ꇠ'] = 'gie', ['ꇡ'] = 'giep', - ['ꇢ'] = 'gat', ['ꇣ'] = 'gax', ['ꇤ'] = 'ga', ['ꇥ'] = 'gap', - ['ꇦ'] = 'guot', ['ꇧ'] = 'guox', ['ꇨ'] = 'guo', ['ꇩ'] = 'guop', - ['ꇪ'] = 'got', ['ꇫ'] = 'gox', ['ꇬ'] = 'go', ['ꇭ'] = 'gop', - ['ꇮ'] = 'get', ['ꇯ'] = 'gex', ['ꇰ'] = 'ge', ['ꇱ'] = 'gep', - ['ꇲ'] = 'gut', ['ꇳ'] = 'gux', ['ꇴ'] = 'gu', ['ꇵ'] = 'gup', - ['ꇶ'] = 'gurx', ['ꇷ'] = 'gur', - - ['ꇸ'] = 'kit', ['ꇹ'] = 'kix', ['ꇺ'] = 'ki', ['ꇻ'] = 'kip', - ['ꇼ'] = 'kiex', ['ꇽ'] = 'kie', ['ꇾ'] = 'kiep', - ['ꇿ'] = 'kat', ['ꈀ'] = 'kax', ['ꈁ'] = 'ka', ['ꈂ'] = 'kap', - ['ꈃ'] = 'kuox', ['ꈄ'] = 'kuo', ['ꈅ'] = 'kuop', - ['ꈆ'] = 'kot', ['ꈇ'] = 'kox', ['ꈈ'] = 'ko', ['ꈉ'] = 'kop', - ['ꈊ'] = 'ket', ['ꈋ'] = 'kex', ['ꈌ'] = 'ke', ['ꈍ'] = 'kep', - ['ꈎ'] = 'kut', ['ꈏ'] = 'kux', ['ꈐ'] = 'ku', ['ꈑ'] = 'kup', - ['ꈒ'] = 'kurx', ['ꈓ'] = 'kur', - - ['ꈔ'] = 'ggit', ['ꈕ'] = 'ggix', ['ꈖ'] = 'ggi', - ['ꈗ'] = 'ggiex', ['ꈘ'] = 'ggie', ['ꈙ'] = 'ggiep', - ['ꈚ'] = 'ggat', ['ꈛ'] = 'ggax', ['ꈜ'] = 'gga', ['ꈝ'] = 'ggap', - ['ꈞ'] = 'gguot', ['ꈟ'] = 'gguox', ['ꈠ'] = 'gguo', ['ꈡ'] = 'gguop', - ['ꈢ'] = 'ggot', ['ꈣ'] = 'ggox', ['ꈤ'] = 'ggo', ['ꈥ'] = 'ggop', - ['ꈦ'] = 'gget', ['ꈧ'] = 'ggex', ['ꈨ'] = 'gge', ['ꈩ'] = 'ggep', - ['ꈪ'] = 'ggut', ['ꈫ'] = 'ggux', ['ꈬ'] = 'ggu', ['ꈭ'] = 'ggup', - ['ꈮ'] = 'ggurx', ['ꈯ'] = 'ggur', - - ['ꈰ'] = 'mgiex', ['ꈱ'] = 'mgie', - ['ꈲ'] = 'mgat', ['ꈳ'] = 'mgax', ['ꈴ'] = 'mga', ['ꈵ'] = 'mgap', - ['ꈶ'] = 'mguox', ['ꈷ'] = 'mguo', ['ꈸ'] = 'mguop', - ['ꈹ'] = 'mgot', ['ꈺ'] = 'mgox', ['ꈻ'] = 'mgo', ['ꈼ'] = 'mgop', - ['ꈽ'] = 'mgex', ['ꈾ'] = 'mge', ['ꈿ'] = 'mgep', - ['ꉀ'] = 'mgut', ['ꉁ'] = 'mgux', ['ꉂ'] = 'mgu', ['ꉃ'] = 'mgup', - ['ꉄ'] = 'mgurx', ['ꉅ'] = 'mgur', - - ['ꉆ'] = 'hxit', ['ꉇ'] = 'hxix', ['ꉈ'] = 'hxi', ['ꉉ'] = 'hxip', - ['ꉊ'] = 'hxiet', ['ꉋ'] = 'hxiex', ['ꉌ'] = 'hxie', ['ꉍ'] = 'hxiep', - ['ꉎ'] = 'hxat', ['ꉏ'] = 'hxax', ['ꉐ'] = 'hxa', ['ꉑ'] = 'hxap', - ['ꉒ'] = 'hxuot', ['ꉓ'] = 'hxuox', ['ꉔ'] = 'hxuo', ['ꉕ'] = 'hxuop', - ['ꉖ'] = 'hxot', ['ꉗ'] = 'hxox', ['ꉘ'] = 'hxo', ['ꉙ'] = 'hxop', - ['ꉚ'] = 'hxex', ['ꉛ'] = 'hxe', ['ꉜ'] = 'hxep', - - ['ꉝ'] = 'ngiex', ['ꉞ'] = 'ngie', ['ꉟ'] = 'ngiep', - ['ꉠ'] = 'ngat', ['ꉡ'] = 'ngax', ['ꉢ'] = 'nga', ['ꉣ'] = 'ngap', - ['ꉤ'] = 'nguot', ['ꉥ'] = 'nguox', ['ꉦ'] = 'nguo', - ['ꉧ'] = 'ngot', ['ꉨ'] = 'ngox', ['ꉩ'] = 'ngo', ['ꉪ'] = 'ngop', - ['ꉫ'] = 'ngex', ['ꉬ'] = 'nge', ['ꉭ'] = 'ngep', - - ['ꉮ'] = 'hit', - ['ꉯ'] = 'hiex', ['ꉰ'] = 'hie', - ['ꉱ'] = 'hat', ['ꉲ'] = 'hax', ['ꉳ'] = 'ha', ['ꉴ'] = 'hap', - ['ꉵ'] = 'huot', ['ꉶ'] = 'huox', ['ꉷ'] = 'huo', ['ꉸ'] = 'huop', - ['ꉹ'] = 'hot', ['ꉺ'] = 'hox', ['ꉻ'] = 'ho', ['ꉼ'] = 'hop', - ['ꉽ'] = 'hex', ['ꉾ'] = 'he', ['ꉿ'] = 'hep', - - ['ꊀ'] = 'wat', ['ꊁ'] = 'wax', ['ꊂ'] = 'wa', ['ꊃ'] = 'wap', - ['ꊄ'] = 'wuox', ['ꊅ'] = 'wuo', ['ꊆ'] = 'wuop', - ['ꊇ'] = 'wox', ['ꊈ'] = 'wo', ['ꊉ'] = 'wop', - ['ꊊ'] = 'wex', ['ꊋ'] = 'we', ['ꊌ'] = 'wep', - - ['ꊍ'] = 'zit', ['ꊎ'] = 'zix', ['ꊏ'] = 'zi', ['ꊐ'] = 'zip', - ['ꊑ'] = 'ziex', ['ꊒ'] = 'zie', ['ꊓ'] = 'ziep', - ['ꊔ'] = 'zat', ['ꊕ'] = 'zax', ['ꊖ'] = 'za', ['ꊗ'] = 'zap', - ['ꊘ'] = 'zuox', ['ꊙ'] = 'zuo', ['ꊚ'] = 'zuop', - ['ꊛ'] = 'zot', ['ꊜ'] = 'zox', ['ꊝ'] = 'zo', ['ꊞ'] = 'zop', - ['ꊟ'] = 'zex', ['ꊠ'] = 'ze', ['ꊡ'] = 'zep', - ['ꊢ'] = 'zut', ['ꊣ'] = 'zux', ['ꊤ'] = 'zu', ['ꊥ'] = 'zup', - ['ꊦ'] = 'zurx', ['ꊧ'] = 'zur', - ['ꊨ'] = 'zyt', ['ꊩ'] = 'zyx', ['ꊪ'] = 'zy', ['ꊫ'] = 'zyp', - ['ꊬ'] = 'zyrx', ['ꊭ'] = 'zyr', - - ['ꊮ'] = 'cit', ['ꊯ'] = 'cix', ['ꊰ'] = 'ci', ['ꊱ'] = 'cip', - ['ꊲ'] = 'ciet', ['ꊳ'] = 'ciex', ['ꊴ'] = 'cie', ['ꊵ'] = 'ciep', - ['ꊶ'] = 'cat', ['ꊷ'] = 'cax', ['ꊸ'] = 'ca', ['ꊹ'] = 'cap', - ['ꊺ'] = 'cuox', ['ꊻ'] = 'cuo', ['ꊼ'] = 'cuop', - ['ꊽ'] = 'cot', ['ꊾ'] = 'cox', ['ꊿ'] = 'co', ['ꋀ'] = 'cop', - ['ꋁ'] = 'cex', ['ꋂ'] = 'ce', ['ꋃ'] = 'cep', - ['ꋄ'] = 'cut', ['ꋅ'] = 'cux', ['ꋆ'] = 'cu', ['ꋇ'] = 'cup', - ['ꋈ'] = 'curx', ['ꋉ'] = 'cur', - ['ꋊ'] = 'cyt', ['ꋋ'] = 'cyx', ['ꋌ'] = 'cy', ['ꋍ'] = 'cyp', - ['ꋎ'] = 'cyrx', ['ꋏ'] = 'cyr', - - ['ꋐ'] = 'zzit', ['ꋑ'] = 'zzix', ['ꋒ'] = 'zzi', ['ꋓ'] = 'zzip', - ['ꋔ'] = 'zziet', ['ꋕ'] = 'zziex', ['ꋖ'] = 'zzie', ['ꋗ'] = 'zziep', - ['ꋘ'] = 'zzat', ['ꋙ'] = 'zzax', ['ꋚ'] = 'zza', ['ꋛ'] = 'zzap', - ['ꋜ'] = 'zzox', ['ꋝ'] = 'zzo', ['ꋞ'] = 'zzop', - ['ꋟ'] = 'zzex', ['ꋠ'] = 'zze', ['ꋡ'] = 'zzep', - ['ꋢ'] = 'zzux', ['ꋣ'] = 'zzu', ['ꋤ'] = 'zzup', - ['ꋥ'] = 'zzurx', ['ꋦ'] = 'zzur', - ['ꋧ'] = 'zzyt', ['ꋨ'] = 'zzyx', ['ꋩ'] = 'zzy', ['ꋪ'] = 'zzyp', - ['ꋫ'] = 'zzyrx', ['ꋬ'] = 'zzyr', - - ['ꋭ'] = 'nzit', ['ꋮ'] = 'nzix', ['ꋯ'] = 'nzi', ['ꋰ'] = 'nzip', - ['ꋱ'] = 'nziex', ['ꋲ'] = 'nzie', ['ꋳ'] = 'nziep', - ['ꋴ'] = 'nzat', ['ꋵ'] = 'nzax', ['ꋶ'] = 'nza', ['ꋷ'] = 'nzap', - ['ꋸ'] = 'nzuox', ['ꋹ'] = 'nzuo', - ['ꋺ'] = 'nzox', ['ꋻ'] = 'nzop', - ['ꋼ'] = 'nzex', ['ꋽ'] = 'nze', - ['ꋾ'] = 'nzux', ['ꋿ'] = 'nzu', ['ꌀ'] = 'nzup', - ['ꌁ'] = 'nzurx', ['ꌂ'] = 'nzur', - ['ꌃ'] = 'nzyt', ['ꌄ'] = 'nzyx', ['ꌅ'] = 'nzy', ['ꌆ'] = 'nzyp', - ['ꌇ'] = 'nzyrx', ['ꌈ'] = 'nzyr', - - ['ꌉ'] = 'sit', ['ꌊ'] = 'six', ['ꌋ'] = 'si', ['ꌌ'] = 'sip', - ['ꌍ'] = 'siex', ['ꌎ'] = 'sie', ['ꌏ'] = 'siep', - ['ꌐ'] = 'sat', ['ꌑ'] = 'sax', ['ꌒ'] = 'sa', ['ꌓ'] = 'sap', - ['ꌔ'] = 'suox', ['ꌕ'] = 'suo', ['ꌖ'] = 'suop', - ['ꌗ'] = 'sot', ['ꌘ'] = 'sox', ['ꌙ'] = 'so', ['ꌚ'] = 'sop', - ['ꌛ'] = 'sex', ['ꌜ'] = 'se', ['ꌝ'] = 'sep', - ['ꌞ'] = 'sut', ['ꌟ'] = 'sux', ['ꌠ'] = 'su', ['ꌡ'] = 'sup', - ['ꌢ'] = 'surx', ['ꌣ'] = 'sur', - ['ꌤ'] = 'syt', ['ꌥ'] = 'syx', ['ꌦ'] = 'sy', ['ꌧ'] = 'syp', - ['ꌨ'] = 'syrx', ['ꌩ'] = 'syr', - - ['ꌪ'] = 'ssit', ['ꌫ'] = 'ssix', ['ꌬ'] = 'ssi', ['ꌭ'] = 'ssip', - ['ꌮ'] = 'ssiex', ['ꌯ'] = 'ssie', ['ꌰ'] = 'ssiep', - ['ꌱ'] = 'ssat', ['ꌲ'] = 'ssax', ['ꌳ'] = 'ssa', ['ꌴ'] = 'ssap', - ['ꌵ'] = 'ssot', ['ꌶ'] = 'ssox', ['ꌷ'] = 'sso', ['ꌸ'] = 'ssop', - ['ꌹ'] = 'ssex', ['ꌺ'] = 'sse', ['ꌻ'] = 'ssep', - ['ꌼ'] = 'ssut', ['ꌽ'] = 'ssux', ['ꌾ'] = 'ssu', ['ꌿ'] = 'ssup', - ['ꍀ'] = 'ssyt', ['ꍁ'] = 'ssyx', ['ꍂ'] = 'ssy', ['ꍃ'] = 'ssyp', - ['ꍄ'] = 'ssyrx', ['ꍅ'] = 'ssyr', - - ['ꍆ'] = 'zhat', ['ꍇ'] = 'zhax', ['ꍈ'] = 'zha', ['ꍉ'] = 'zhap', - ['ꍊ'] = 'zhuox', ['ꍋ'] = 'zhuo', ['ꍌ'] = 'zhuop', - ['ꍍ'] = 'zhot', ['ꍎ'] = 'zhox', ['ꍏ'] = 'zho', ['ꍐ'] = 'zhop', - ['ꍑ'] = 'zhet', ['ꍒ'] = 'zhex', ['ꍓ'] = 'zhe', ['ꍔ'] = 'zhep', - ['ꍕ'] = 'zhut', ['ꍖ'] = 'zhux', ['ꍗ'] = 'zhu', ['ꍘ'] = 'zhup', - ['ꍙ'] = 'zhurx', ['ꍚ'] = 'zhur', - ['ꍛ'] = 'zhyt', ['ꍜ'] = 'zhyx', ['ꍝ'] = 'zhy', ['ꍞ'] = 'zhyp', - ['ꍟ'] = 'zhyrx', ['ꍠ'] = 'zhyr', - - ['ꍡ'] = 'chat', ['ꍢ'] = 'chax', ['ꍣ'] = 'cha', ['ꍤ'] = 'chap', - ['ꍥ'] = 'chuot', ['ꍦ'] = 'chuox', ['ꍧ'] = 'chuo', ['ꍨ'] = 'chuop', - ['ꍩ'] = 'chot', ['ꍪ'] = 'chox', ['ꍫ'] = 'cho', ['ꍬ'] = 'chop', - ['ꍭ'] = 'chet', ['ꍮ'] = 'chex', ['ꍯ'] = 'che', ['ꍰ'] = 'chep', - ['ꍱ'] = 'chux', ['ꍲ'] = 'chu', ['ꍳ'] = 'chup', - ['ꍴ'] = 'churx', ['ꍵ'] = 'chur', - ['ꍶ'] = 'chyt', ['ꍷ'] = 'chyx', ['ꍸ'] = 'chy', ['ꍹ'] = 'chyp', - ['ꍺ'] = 'chyrx', ['ꍻ'] = 'chyr', - - ['ꍼ'] = 'rrax', ['ꍽ'] = 'rra', - ['ꍾ'] = 'rruox', ['ꍿ'] = 'rruo', - ['ꎀ'] = 'rrot', ['ꎁ'] = 'rrox', ['ꎂ'] = 'rro', ['ꎃ'] = 'rrop', - ['ꎄ'] = 'rret', ['ꎅ'] = 'rrex', ['ꎆ'] = 'rre', ['ꎇ'] = 'rrep', - ['ꎈ'] = 'rrut', ['ꎉ'] = 'rrux', ['ꎊ'] = 'rru', ['ꎋ'] = 'rrup', - ['ꎌ'] = 'rrurx', ['ꎍ'] = 'rrur', - ['ꎎ'] = 'rryt', ['ꎏ'] = 'rryx', ['ꎐ'] = 'rry', ['ꎑ'] = 'rryp', - ['ꎒ'] = 'rryrx', ['ꎓ'] = 'rryr', - - ['ꎔ'] = 'nrat', ['ꎕ'] = 'nrax', ['ꎖ'] = 'nra', ['ꎗ'] = 'nrap', - ['ꎘ'] = 'nrox', ['ꎙ'] = 'nro', ['ꎚ'] = 'nrop', - ['ꎛ'] = 'nret', ['ꎜ'] = 'nrex', ['ꎝ'] = 'nre', ['ꎞ'] = 'nrep', - ['ꎟ'] = 'nrut', ['ꎠ'] = 'nrux', ['ꎡ'] = 'nru', ['ꎢ'] = 'nrup', - ['ꎣ'] = 'nrurx', ['ꎤ'] = 'nrur', - ['ꎥ'] = 'nryt', ['ꎦ'] = 'nryx', ['ꎧ'] = 'nry', ['ꎨ'] = 'nryp', - ['ꎩ'] = 'nryrx', ['ꎪ'] = 'nryr', - - ['ꎫ'] = 'shat', ['ꎬ'] = 'shax', ['ꎭ'] = 'sha', ['ꎮ'] = 'shap', - ['ꎯ'] = 'shuox', ['ꎰ'] = 'shuo', ['ꎱ'] = 'shuop', - ['ꎲ'] = 'shot', ['ꎳ'] = 'shox', ['ꎴ'] = 'sho', ['ꎵ'] = 'shop', - ['ꎶ'] = 'shet', ['ꎷ'] = 'shex', ['ꎸ'] = 'she', ['ꎹ'] = 'shep', - ['ꎺ'] = 'shut', ['ꎻ'] = 'shux', ['ꎼ'] = 'shu', ['ꎽ'] = 'shup', - ['ꎾ'] = 'shurx', ['ꎿ'] = 'shur', - ['ꏀ'] = 'shyt', ['ꏁ'] = 'shyx', ['ꏂ'] = 'shy', ['ꏃ'] = 'shyp', - ['ꏄ'] = 'shyrx', ['ꏅ'] = 'shyr', - - ['ꏆ'] = 'rat', ['ꏇ'] = 'rax', ['ꏈ'] = 'ra', ['ꏉ'] = 'rap', - ['ꏊ'] = 'ruox', ['ꏋ'] = 'ruo', ['ꏌ'] = 'ruop', - ['ꏍ'] = 'rot', ['ꏎ'] = 'rox', ['ꏏ'] = 'ro', ['ꏐ'] = 'rop', - ['ꏑ'] = 'rex', ['ꏒ'] = 're', ['ꏓ'] = 'rep', - ['ꏔ'] = 'rut', ['ꏕ'] = 'rux', ['ꏖ'] = 'ru', ['ꏗ'] = 'rup', - ['ꏘ'] = 'rurx', ['ꏙ'] = 'rur', - ['ꏚ'] = 'ryt', ['ꏛ'] = 'ryx', ['ꏜ'] = 'ry', ['ꏝ'] = 'ryp', - ['ꏞ'] = 'ryrx', ['ꏟ'] = 'ryr', - - ['ꏠ'] = 'jit', ['ꏡ'] = 'jix', ['ꏢ'] = 'ji', ['ꏣ'] = 'jip', - ['ꏤ'] = 'jiet', ['ꏥ'] = 'jiex', ['ꏦ'] = 'jie', ['ꏧ'] = 'jiep', - ['ꏨ'] = 'juot', ['ꏩ'] = 'juox', ['ꏪ'] = 'juo', ['ꏫ'] = 'juop', - ['ꏬ'] = 'jot', ['ꏭ'] = 'jox', ['ꏮ'] = 'jo', ['ꏯ'] = 'jop', - ['ꏰ'] = 'jut', ['ꏱ'] = 'jux', ['ꏲ'] = 'ju', ['ꏳ'] = 'jup', - ['ꏴ'] = 'jurx', ['ꏵ'] = 'jur', - ['ꏶ'] = 'jyt', ['ꏷ'] = 'jyx', ['ꏸ'] = 'jy', ['ꏹ'] = 'jyp', - ['ꏺ'] = 'jyrx', ['ꏻ'] = 'jyr', - - ['ꏼ'] = 'qit', ['ꏽ'] = 'qix', ['ꏾ'] = 'qi', ['ꏿ'] = 'qip', - ['ꐀ'] = 'qiet', ['ꐁ'] = 'qiex', ['ꐂ'] = 'qie', ['ꐃ'] = 'qiep', - ['ꐄ'] = 'quot', ['ꐅ'] = 'quox', ['ꐆ'] = 'quo', ['ꐇ'] = 'quop', - ['ꐈ'] = 'qot', ['ꐉ'] = 'qox', ['ꐊ'] = 'qo', ['ꐋ'] = 'qop', - ['ꐌ'] = 'qut', ['ꐍ'] = 'qux', ['ꐎ'] = 'qu', ['ꐏ'] = 'qup', - ['ꐐ'] = 'qurx', ['ꐑ'] = 'qur', - ['ꐒ'] = 'qyt', ['ꐓ'] = 'qyx', ['ꐔ'] = 'qy', ['ꐕ'] = 'qyp', - ['ꐖ'] = 'qyrx', ['ꐗ'] = 'qyr', - - ['ꐘ'] = 'jjit', ['ꐙ'] = 'jjix', ['ꐚ'] = 'jji', ['ꐛ'] = 'jjip', - ['ꐜ'] = 'jjiet', ['ꐝ'] = 'jjiex', ['ꐞ'] = 'jjie', ['ꐟ'] = 'jjiep', - ['ꐠ'] = 'jjuox', ['ꐡ'] = 'jjuo', ['ꐢ'] = 'jjuop', - ['ꐣ'] = 'jjot', ['ꐤ'] = 'jjox', ['ꐥ'] = 'jjo', ['ꐦ'] = 'jjop', - ['ꐧ'] = 'jjut', ['ꐨ'] = 'jjux', ['ꐩ'] = 'jju', ['ꐪ'] = 'jjup', - ['ꐫ'] = 'jjurx', ['ꐬ'] = 'jjur', - ['ꐭ'] = 'jjyt', ['ꐮ'] = 'jjyx', ['ꐯ'] = 'jjy', ['ꐰ'] = 'jjyp', - - ['ꐱ'] = 'njit', ['ꐲ'] = 'njix', ['ꐳ'] = 'nji', ['ꐴ'] = 'njip', - ['ꐵ'] = 'njiet', ['ꐶ'] = 'njiex', ['ꐷ'] = 'njie', ['ꐸ'] = 'njiep', - ['ꐹ'] = 'njuox', ['ꐺ'] = 'njuo', - ['ꐻ'] = 'njot', ['ꐼ'] = 'njox', ['ꐽ'] = 'njo', ['ꐾ'] = 'njop', - ['ꐿ'] = 'njux', ['ꑀ'] = 'nju', ['ꑁ'] = 'njup', - ['ꑂ'] = 'njurx', ['ꑃ'] = 'njur', - ['ꑄ'] = 'njyt', ['ꑅ'] = 'njyx', ['ꑆ'] = 'njy', ['ꑇ'] = 'njyp', - ['ꑈ'] = 'njyrx', ['ꑉ'] = 'njyr', - ['ꑊ'] = 'nyit', ['ꑋ'] = 'nyix', ['ꑌ'] = 'nyi', ['ꑍ'] = 'nyip', - ['ꑎ'] = 'nyiet', ['ꑏ'] = 'nyiex', ['ꑐ'] = 'nyie', ['ꑑ'] = 'nyiep', - ['ꑒ'] = 'nyuox', ['ꑓ'] = 'nyuo', ['ꑔ'] = 'nyuop', - ['ꑕ'] = 'nyot', ['ꑖ'] = 'nyox', ['ꑗ'] = 'nyo', ['ꑘ'] = 'nyop', - ['ꑙ'] = 'nyut', ['ꑚ'] = 'nyux', ['ꑛ'] = 'nyu', ['ꑜ'] = 'nyup', - - ['ꑝ'] = 'xit', ['ꑞ'] = 'xix', ['ꑟ'] = 'xi', ['ꑠ'] = 'xip', - ['ꑡ'] = 'xiet', ['ꑢ'] = 'xiex', ['ꑣ'] = 'xie', ['ꑤ'] = 'xiep', - ['ꑥ'] = 'xuox', ['ꑦ'] = 'xuo', - ['ꑧ'] = 'xot', ['ꑨ'] = 'xox', ['ꑩ'] = 'xo', ['ꑪ'] = 'xop', - ['ꑫ'] = 'xyt', ['ꑬ'] = 'xyx', ['ꑭ'] = 'xy', ['ꑮ'] = 'xyp', - ['ꑯ'] = 'xyrx', ['ꑰ'] = 'xyr', - - ['ꑱ'] = 'yit', ['ꑲ'] = 'yix', ['ꑳ'] = 'yi', ['ꑴ'] = 'yip', - ['ꑵ'] = 'yiet', ['ꑶ'] = 'yiex', ['ꑷ'] = 'yie', ['ꑸ'] = 'yiep', - ['ꑹ'] = 'yuot', ['ꑺ'] = 'yuox', ['ꑻ'] = 'yuo', ['ꑼ'] = 'yuop', - ['ꑽ'] = 'yot', ['ꑾ'] = 'yox', ['ꑿ'] = 'yo', ['ꒀ'] = 'yop', - ['ꒁ'] = 'yut', ['ꒂ'] = 'yux', ['ꒃ'] = 'yu', ['ꒄ'] = 'yup', - ['ꒅ'] = 'yurx', ['ꒆ'] = 'yur', - ['ꒇ'] = 'yyt', ['ꒈ'] = 'yyx', ['ꒉ'] = 'yy', ['ꒊ'] = 'yyp', - ['ꒋ'] = 'yyrx', ['ꒌ'] = 'yyr', -} - -function export.tr(text, lang, sc) - text = gsub(text, "('?'?'?)(%^?[^'])('?'?'?)", "%1%2%3 ") - text = gsub(text, "%S", convert) - text = gsub(text, "%^(%l)", string.upper) - text = gsub(text, " $", "") - - return (text) -end - -return export \ No newline at end of file diff --git a/wikt/translit/inc-ash-translit.lua b/wikt/translit/inc-ash-translit.lua deleted file mode 100644 index cd10903..0000000 --- a/wikt/translit/inc-ash-translit.lua +++ /dev/null @@ -1,24 +0,0 @@ --- This module will transliterate Ashokan Prakrit language text. -local export = {} - -function export.tr(text, lang, sc, debug_mode) - - local out_text - if (sc == 'Brah') then - out_text = require('Module:Brah-translit').tr(text, lang, sc, debug_mode) - elseif (sc == 'Khar') then - out_text = require('Module:Khar-translit').tr(text, lang, sc, debug_mode) - else - local namespace = mw.title:getCurrentTitle().nsText - if namespace == "Category" then - out_text = nil - else - error('Invalid script for Ashokan Prakrit language.') - end - end - - return out_text - -end - -return export \ No newline at end of file diff --git a/wikt/translit/inc-mas-translit.lua b/wikt/translit/inc-mas-translit.lua deleted file mode 100644 index df4e56a..0000000 --- a/wikt/translit/inc-mas-translit.lua +++ /dev/null @@ -1,58 +0,0 @@ --- This module will transliterate Middle Assamese language text. -local export = {} - -local consonants = { - ['ক']='k', ['খ']='kh', ['গ']='g', ['ঘ']='gh', ['ঙ']='ṅ', - ['চ']='c', ['ছ']='ch', ['জ']='j', ['ঝ']='jh', ['ঞ']='ñ', - ['ট']='ṭ', ['ঠ']='ṭh', ['ড']='ḍ', ['ঢ']='ḍh', ['ণ']='ṇ', - ['ত']='t', ['থ']='th', ['দ']='d', ['ধ']='dh', ['ন']='n', - ['প']='p', ['ফ']='ph', ['ব']='b', ['ভ']='bh', ['ম']='m', - ['য']='y', ['ৰ']='r', ['ল']='l', ['ৱ']='v', ['ळ']='ḷ', - ['শ']='ś', ['ষ']='ṣ', ['স']='s', ['হ']='h', ['ড়']='r', ['ঢ়']='rh', ['য়']=y -} - -local diacritics = { - ['া']='a', ['ি']='i', ['ী']='ī', ['ু']='u', ['ূ']='ū', ['ৃ']='ṛ', ['ৄ']='ṝ', - ['ৢ']='ḷ', ['ৣ']='ḹ', ['ে']='e', ['ৈ']='oi', ['ো']='ó', ['ৌ']='ou', ['্']='', -} - -local tt = { - -- vowels - ['অ']='o', ['আ']='a', ['ই']='i', ['ঈ']='ī', ['উ']='u', ['ঊ']='ū', ['ঋ']='ṛ', ['ৠ']='ṝ', - ['ঌ']='ḷ', ['ৡ']='ḹ', ['এ']='e', ['ঐ']='oi', ['ও']='ó', ['ঔ']='ou', - -- chandrabindu - ['ঁ']='̐', --until a better method is found - -- anusvara - ['ং']='ṅ', --until a better method is found - -- visarga - ['ঃ']='ḥ', - -- avagraha - ['ঽ']='’', - --numerals - ['০']='0', ['১']='1', ['২']='2', ['৩']='3', ['৪']='4', ['৫']='5', ['৬']='6', ['৭']='7', ['৮']='8', ['৯']='9', - --punctuation - ['॥']='.', --double danda - ['।']='.', - --reconstructed - ['*'] = '', -} - -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - '([কখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযৰলৱশষসহড়ঢ়য়])'.. - '([ািীুূৃৄৢৣেৈোৌ্]?)', - function(c, d) - if d == "" then - return consonants[c] .. 'o' - else - return consonants[c] .. diacritics[d] - end - end) - - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/inh-translit.lua b/wikt/translit/inh-translit.lua deleted file mode 100644 index cc7c004..0000000 --- a/wikt/translit/inh-translit.lua +++ /dev/null @@ -1,100 +0,0 @@ --- This module will transliterate Ingush language text per WT:INH TR. --- Language code: inh - -local export = {} - -local mapping1 = { - ["а"]="a" , ["б"]="b" , ["в"]="w" , ["г"]="g" , ["д"]="d" , ["е"]="e", ["ё"]="ë", - ["ж"]="ž" , ["з"]="z" , ["и"]="i" , ["й"]="j" , ["к"]="k" , ["л"]="l" , ["м"]="m", - ["н"]="n" , ["о"]="o" , ["п"]="p" , ["р"]="r" , ["с"]="s" , ["т"]="t" , ["у"]="u", - ["ф"]="f" , ["х"]="χ" , ["ц"]="c" , ["ч"]="č" , ["ш"]="š" , ["щ"]="šč", ["ъ"]="ʾ", - ["ы"]="ə" , ["ь"]="’" , ["э"]="è" , ["ю"]="ju", ["я"]="ja", ["ӏ"]="ʿ" , ["А"]="A", - ["Б"]="B" , ["В"]="W" , ["Г"]="G" , ["Д"]="D" , ["Е"]="E", ["Ё"]="Ë" , ["Ж"]="Ž", - ["З"]="Z" , ["И"]="I" , ["Й"]="J" , ["К"]="K" , ["Л"]="L" , ["М"]="M" , ["Н"]="N", - ["О"]="O" , ["П"]="P" , ["Р"]="R" , ["С"]="S" , ["Т"]="T" , ["У"]="U" , ["Ф"]="F", - ["Х"]="Χ" , ["Ц"]="C" , ["Ч"]="Č" , ["Ш"]="Š" , ["Щ"]="Šč", ["Ъ"]="ʾ" , ["Ы"]="Ə", - ["Ь"]="’" , ["Э"]="È" , ["Ю"]="Ju", ["Я"]="Ja", ["Ӏ"]="ʿ" -} - -local mapping2 = { - ['аь'] = 'ä', ['гӏ'] = 'ġ' , ['кх'] = 'q', ['къ'] = 'q̇', - ['кӏ'] = 'ḳ', ['пӏ'] = 'ṗ' , ['тӏ'] = 'ṭ', ['цӏ'] = 'c̣', - ['чӏ'] = 'č̣', ['яь'] = 'jä', ['ий'] = 'ī', ['ув'] = 'ū', - ['хь'] = 'ḥ', ['хӏ'] = 'h' , ['Аь'] = 'Ä', ['Гӏ'] = 'Ġ', - ['Кх'] = 'Q', ['Къ'] = 'Q̇' , ['Кӏ'] = 'Ḳ', ['Пӏ'] = 'Ṗ', - ['Тӏ'] = 'Ṭ', ['Цӏ'] = 'C̣' , ['Чӏ'] = 'Č̣', ['Яь'] = 'Jä', - ['Ий'] = 'Ī', ['Ув'] = 'Ū' , ['Хь'] = 'Ḥ', ['Хӏ'] = 'H', -} - -function export.tr(text, lang, sc) - local str_gsub = string.gsub - local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" - - -- Convert capital to lowercase palochka. - text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) - - text = str_gsub(text, 'рхӏ', 'ρ') - text = str_gsub(text, 'Рхӏ', 'Ρ') - for pat, repl in pairs(mapping2) do - text = str_gsub(text, pat, repl) - end - text = str_gsub(text, UTF8_char, mapping1) - - return text -end - -return export - --- All tests passed. (refresh) - --- test: --- Text Expected Actual --- Passed аьккхий-мархий бутт äkqī-marχī butt äkqī-marχī butt --- Passed кӏиранди ḳirandi ḳirandi --- Passed кӀиранди ḳirandi ḳirandi --- Passed Гӏалгӏай Ġalġaj Ġalġaj --- Passed ГӀалгӀай Ġalġaj Ġalġaj --- Passed пӏаьраска ṗäraska ṗäraska --- Passed пӀаьраска ṗäraska ṗäraska --- Passed ӏуйре ʿujre ʿujre --- Passed Ӏуйре ʿujre ʿujre --- Passed цӏи c̣i c̣i --- Passed цӀи c̣i c̣i --- Passed таьрахь täraḥ täraḥ --- Passed кхаьра qära qära --- Passed маъы бутт maʾə butt maʾə butt --- Passed ножганцӏей nožganc̣ej nožganc̣ej --- Passed ножганцӀей nožganc̣ej nožganc̣ej --- Passed сахьат saḥat saḥat --- Passed тха tχa tχa --- Passed жӏали žʿali žʿali --- Passed жӀали žʿali žʿali --- return require('Module:transliteration module testcases')( --- require('Module:inh-translit').tr, --- { --- { 'аьккхий-мархий бутт', 'äkqī-marχī butt' }, --- { 'кӏиранди', 'ḳirandi' }, -- lowercase palochka --- { 'кӀиранди', 'ḳirandi' }, -- uppercase palochka --- { 'Гӏалгӏай', 'Ġalġaj' }, -- lowercase --- { 'ГӀалгӀай', 'Ġalġaj' }, -- uppercase --- { 'пӏаьраска', 'ṗäraska' }, -- lowercase --- { 'пӀаьраска', 'ṗäraska' }, -- uppercase --- { 'ӏуйре', 'ʿujre' }, -- lowercase --- { 'Ӏуйре', 'ʿujre' }, -- uppercase --- { 'цӏи', 'c̣i' }, -- lowercase --- { 'цӀи', 'c̣i' },-- uppercase --- { 'таьрахь', 'täraḥ' }, --- { 'кхаьра', 'qära' }, --- { 'маъы бутт', 'maʾə butt' }, --- { 'ножганцӏей', 'nožganc̣ej' }, -- lowercase --- { 'ножганцӀей', 'nožganc̣ej' }, -- uppercase --- { 'сахьат', 'saḥat' }, --- { 'тха', 'tχa' }, --- { 'жӏали', 'žʿali' }, -- lowercase --- { 'жӀали', 'žʿali' }, -- uppercase --- --[[ --- { '', '' }, --- --]] --- }, --- 'Cyrl', 'inh' --- ) \ No newline at end of file diff --git a/wikt/translit/ital-translit.lua b/wikt/translit/ital-translit.lua deleted file mode 100644 index 7bb5640..0000000 --- a/wikt/translit/ital-translit.lua +++ /dev/null @@ -1,149 +0,0 @@ --- This module will transliterate text in the Old Italic script. --- It is used to transliterate Etruscan (ett), Old Latin (itc-ola), North Picene (nrp), --- Oscan (osc), South Picene (spx), Camunic (xcc), Faliscan (xfa), Lemnian (xle), --- Lepontic (xlp), Raetic (xrr), Umbrian (xum) and Venetic (xve). - -local export = {} - --- Standard transcription -local common_rules = { - ['𐌀'] = 'a', - ['𐌁'] = 'b', - ['𐌂'] = 'c', - ['𐌃'] = 'd', - ['𐌄'] = 'e', - ['𐌅'] = 'v', - ['𐌆'] = 'z', - ['𐌇'] = 'h', - ['𐌈'] = 'θ', - ['𐌉'] = 'i', - ['𐌊'] = 'k', - ['𐌋'] = 'l', - ['𐌌'] = 'm', - ['𐌍'] = 'n', - ['𐌎'] = 'š', - ['𐌏'] = 'o', - ['𐌐'] = 'p', - ['𐌑'] = 'ś', - ['𐌒'] = 'q', - ['𐌓'] = 'r', - ['𐌔'] = 's', - ['𐌕'] = 't', - ['𐌖'] = 'u', - ['𐌗'] = 'x', - ['𐌘'] = 'φ', - ['𐌙'] = 'χ', - ['𐌚'] = 'f', - ['𐌛'] = 'ř', - ['𐌜'] = 'ç', - ['𐌝'] = 'í', - ['𐌞'] = 'ú', - ['𐌟'] = 'k', - -- Numerals - ['𐌠'] = 'Ⅰ', - ['𐌡'] = 'Ⅴ', - ['𐌢'] = 'Ⅹ', - ['𐌣'] = 'Ⅼ', - -- Punctuation - ['·'] = ' ', - ['⁚'] = ' ', - ['⁝'] = ' ', -} - -local lang_rules = { - ['ett'] = { -- Etruscan - ['𐌟'] = 'Ⅽ', - }; - - ['itc-ola'] = { -- Old Latin - ['𐌅'] = 'f', - }; - - ['nrc'] = { -- Noric - ['𐌂'] = 'g', - ['𐌈'] = 'd', - ['𐌙'] = 'g', - }; - - ['nrp'] = { -- North Picene - ['𐌂'] = 'g', - }; - - ['osc'] = { -- Oscan - ['𐌂'] = 'g', - }; - - ['spx'] = { -- South Picene - ['𐌂'] = 'g', - ['𐌑'] = 'í', - ['·'] = 'o', - ['⁚'] = 'f', - }; - - ['xcc'] = { -- Camunic - ['𐌁'] = 'ś', - ['𐌂'] = 'g', - ['𐌑'] = 'b', - ['𐌙'] = 's', - ['𐌟'] = 'þþ', - ['𐌣'] = 'þ', - }; - - ['xrr'] = { -- Raetic - ['𐌁'] = 'þ', - ['𐌂'] = '?', - }; - - ['xum'] = { -- Umbrian - ['𐌈'] = 't', - }; - - ['xve'] = { -- Venetic - ['𐌂'] = 'j', - ['𐌆'] = 'd', - ['𐌇𐌅'] = 'f', - ['𐌘'] = 'b', - ['𐌙'] = 'g', - }; -} - -function export.tr(text, lang, sc) - -- If the script is not Ital, do not transliterate - if sc ~= "Ital" then - return - end - - -- Transliterate language-specific exceptions - if lang == "xve" then - text = mw.ustring.gsub(text, '𐌇𐌅', 'f') - end - - if lang_rules[lang] then - text = mw.ustring.gsub(text, '.', lang_rules[lang]) - end - - -- Transliterate remaining characters - text = mw.ustring.gsub(text, '.', common_rules) - - return text -end - -return export - --- Text Expected Actual --- Passed South Picene: 𐌌𐌄⁚𐌉𐌑𐌍 mefiín mefiín --- -- Unit tests for [[Module:Ital-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local module = require('Module:Ital-translit') --- local m_lang = require('Module:languages') - --- function tests:check_translit(lang, text, translit) --- local langname = m_lang.getByCode(lang):getCanonicalName() --- self:equals(langname .. ': [[' .. text .. '#' .. langname .. '|' .. text .. ']]', module.tr(text, lang, 'Ital'), translit) --- end - --- function tests:test_translits() --- self:check_translit('spx', '𐌌𐌄⁚𐌉𐌑𐌍', 'mefiín') --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/iu-translit.lua b/wikt/translit/iu-translit.lua deleted file mode 100644 index ccc02a0..0000000 --- a/wikt/translit/iu-translit.lua +++ /dev/null @@ -1,366 +0,0 @@ --- This module will transliterate Inuktitut language text. --- Language code: iku ---[[ -ISO 639-3 iku – inclusive code Inuktitut -Individual codes: -ike – Eastern Canadian Inuktitut -ikt – Inuinnaqtun -]] -local export = {} - -function export.tr(text, lang, sc) - text = require("Module:Cans-translit").tr(text, lang, sc) - - local repl = { - ["ī"] = "ii", - ["ō"] = "uu", - ["ā"] = "aa", - ["š"] = "sh", - ["ð"] = "th", - ["e"] = "ai", - ["o"] = "u", - ["y"] = "j", - ["f"] = "v", - ["c"] = "g", - } - - for char, replacement in pairs(repl) do - text = mw.ustring.gsub(text, char, replacement) - end - - return text -end - -return export - --- ᐁᕕᖃ: aiviqa --- ᐃᐦᐃᑉᕆᐅᖅᑐᖅ: ihipriuqtuq --- ᐃᐱᕋᐅᑕᖅ: ipirautaq --- ᐃᑉᐸᒃᓴᖅ: ippaksaq --- ᐃᒡᓕᖅ: igliq --- ᐃᒡᓗ: iglu --- ᐃᒥᖅ: imiq --- ᐃᒥᖅᑐᖅ: imiqtuq --- ᐃᒪᖅ: imaq --- ᐃᒻᒧᒃ: immuk --- ᐃᒻᒪᖄ: immaqaa --- ᐃᓄᒃ: inuk --- ᐃᓄᒃᑎᑐᑦ: inuktitut --- ᐃᓄᒃᓱᒃ: inuksuk --- ᐃᓄᖅ: inuq --- ᐃᓕᔅᓯ: ilissi --- ᐃᓗ: ilu --- ᐃᓪᓗ: illu --- ᐃᓴᕈᖅ: isaruq --- ᐃᔨ: iji --- ᐃᕐᒥᐅᑦ: irmiut --- ᐃᕐᓂᖅ: irniq --- ᐃᕝᕕᑦ: ivvit --- ᐃᖃᓗᐃᑦ: iqaluit --- ᐃᖃᓗᒃ: iqaluk --- ᐄ: ii --- ᐅᐃᒃ: uik --- ᐅᐃᕖᑎᑐᓪᓗ: uiviititullu --- ᐅᐱᕐᓐᖄᖅ: upirnqaaq --- ᐅᑭᐅᖅ: ukiuq --- ᐅᒃᐱᒃ: ukpik --- ᐅᒥᒃ: umik --- ᐅᓈᖅ: unaaq --- ᐅᓐᓄᖅ: unnuq --- ᐅᓗ: ulu --- ᐅᓪᓗᒥ: ullumi --- ᐅᓪᓗᕆᐊᖅ: ulluriaq --- ᐅᓵᐃᐅ: usaaiu --- ᐅᕙᒍᑦ: uvagut --- ᐅᕙᖓ: uvanga --- ᐅᖃᐅᓯᖅ: uqausiq --- ᐅᖅᓱᖅ: uqsuq --- ᐆᒪᔪᖅ: uumajuq --- ᐆᒻᒪᑎ: uummati --- ᐊᐅᒃ: auk --- ᐊᐅᔪᐃᑦᑐᖅ: aujuittuq --- ᐊᐅᔭ: auja --- ᐊᐳᑦ: aput --- ᐊᑎᖅᐳᖅ: atiqpuq --- ᐊᑖᑕ: ataata --- ᐊᑭᓯᖅ: akisiq --- ᐊᒡᒐᒃ: aggak --- ᐊᒡᓘᒃᑲᖅ: agluukkaq --- ᐊᒥᐃᐅᓕᐅᕈ: amiiuliuru --- ᐊᒪᕈᖅ: amaruq --- ᐊᓄᕆ: anuri --- ᐊᓇᐅᓪᓚᐅᑦ: anaullaut --- ᐊᓈᓇ: anaana --- ᐊᓘᑦ: aluut --- ᐊᓪᓕᖅ: alliq --- ᐊᕐᓇᖅ: arnaq --- ᐊᕐᕕᒃ: arvik --- ᐊᕕᙵᖅ: avinngaq --- ᐊᖏᖅᑲᖅᑐᖅ: angiqkaqtuq --- ᐊᖑᑦ: angut --- ᐊᖑᓇᓱᒃᑐᖅ: angunasuktuq --- ᐊᖓᔪᖅ: angajuq --- ᐋᒃᑲ: aakka --- ᐯᑉᐹᖅ: paippaaq --- ᐱ: pi --- ᐱᓱᒃ: pisuk --- ᐳᐊᓘᒃ: pualuuk --- ᐸᓂᖅ: paniq --- ᑎᑎᕋᐅᑦ: titiraut --- ᑎᑭᖅ: tikiq --- ᑎᒥ: timi --- ᑎᕆᒐᓂᐊᖅ: tiriganiaq --- ᑎᕆᒐᓐᓂᐊᖅ: tiriganniaq --- ᑎᖕᒥᐊᖅ: tingmiaq --- ᑎᖕᒥᓲᖅ: tingmisuuq --- ᑏ: tii --- ᑐᐊᐸᑦ: tuapat --- ᑐᑐ: tutu --- ᑐᒃᑐ: tuktu --- ᑐᕙᖅ: tuvaq --- ᑑᒑᓕᒃ: tuugaalik --- ᑑᒑᖅ: tuugaaq --- ᑕᒃᑐᒃ: taktuk --- ᑕᓪᓗ: tallu --- ᑕᓯᖅ: tasiq --- ᑕᕆᐅᖅ: tariuq --- ᑕᕝᕙ: tavva --- ᑭᓇ: kina --- ᑭᖓᖅ: kingaq --- ᑮᓇᐅᔭᖅ: kiinaujaq --- ᑮᓇᖅ: kiinaq --- ᑰᒃ: kuuk --- ᑲᒥᒃ: kamik --- ᑲᓇᑕ: kanata --- ᑳᐱ: kaapi --- ᑳᒃᑐᖅ: kaaktuq --- ᒪᐅᔭ: mauja --- ᒪᒻᒪᓛᑎᒃ: mammalaatik --- ᓂᐅ: niu --- ᓂᐅᒃ: niuk --- ᓂᐊᖁᖅ: niaquq --- ᓂᐊᖂᔮᖅ: niaquujaaq --- ᓂᕆᔪᖅ: nirijuq --- ᓂᕿ: niqi --- ᓄᑲᖅ: nukaq --- ᓄᓇ: nuna --- ᓄᓇᑐᑲᕗᑦ: nunatukavut --- ᓄᓇᑦᓯᐊᕗᑦ: nunatsiavut --- ᓄᓇᕕᒃ: nunavik --- ᓄᓇᕕᒻᒥᐅᑦ: nunavimmiut --- ᓄᓇᕗᑦ: nunavut --- ᓄᓇᕗᒻᒥᐅᖅ: nunavummiuq --- ᓄᓕᐊᖅ: nuliaq --- ᓄᔭᑦ: nujat --- ᓇᓂ: nani --- ᓇᓄᒃ: nanuk --- ᓇᓄᖅ: nanuq --- ᓕᐅᓇ: liuna --- ᓯᐅᑦ: siut --- ᓯᐱᓂᖅ: sipiniq --- ᓯᓈ: sinaa --- ᓯᓚ: sila --- ᓯᓚᒥ: silami --- ᓯᕿᓂᖅ: siqiniq --- ᓱᓇᒐᖅ: sunagaq --- ᓲᓴᓐ: suusan --- ᓴᐃᒧ: saimu --- ᓴᑯ: saku --- ᓴᕕᒃ: savik --- ᔪᐊᑕᓐ: juatan --- ᔭᓐᓄᐊᓕ: jannuali --- ᕼᐊᓐᑕ: hanta --- ᕼᐋᑭ: haaki --- ᕿᒃᑐᕆᐊᖅ: qikturiaq --- ᕿᒃᑐᕆᐊᖅᑕᐅᓂᖅ: qikturiaqtauniq --- ᕿᒻᒥᒃ: qimmik --- ᕿᒻᒧᒃᓯᑦ: qimmuksit --- ᕿᓚᓗᒐᖅ: qilalugaq --- ᕿᓯᒃ: qisik --- ᕿᖓᖅ: qingaq --- ᖁᑭᐅᑦ: qukiut --- ᖁᔭᓐᓇᒦᒃ: qujannamiik --- ᖃᐅᒃᐸᑦ: qaukpat --- ᖃᐅᓯᖅᑐᖅ: qausiqtuq --- ᖃᐅᔨᒪᓂᑐᖃᖏᑦ: qaujimanituqangit --- ᖃᐅᔨᒪᔭᑐᖃᖏᑦ: qaujimajatuqangit --- ᖃᒧᑏᒃ: qamutiik --- ᖃᓂᖅ: qaniq --- ᖃᓪᓗᓈᖅ: qallunaaq --- ᖃᔭᖅ: qajaq --- ᖃᖓ: qanga --- ᖄᓐᕿᔭᐅᑦ: qaanqijaut --- local export = {} - --- local result = "" - --- local function _add(t) --- return function(text) --- table.insert(t, "* [["..text.."#Inuktitut|"..text.."]]: " .. require("Module:iu-translit").tr(text, "iu", "Cans")) --- end --- end - --- function export.show() --- local output = {} --- local add = _add(output) --- add("ᐁᕕᖃ") --- add("ᐃᐦᐃᑉᕆᐅᖅᑐᖅ") --- add("ᐃᐱᕋᐅᑕᖅ") --- add("ᐃᑉᐸᒃᓴᖅ") --- add("ᐃᒡᓕᖅ") --- add("ᐃᒡᓗ") --- add("ᐃᒥᖅ") --- add("ᐃᒥᖅᑐᖅ") --- add("ᐃᒪᖅ") --- add("ᐃᒻᒧᒃ") --- add("ᐃᒻᒪᖄ") --- add("ᐃᓄᒃ") --- add("ᐃᓄᒃᑎᑐᑦ") --- add("ᐃᓄᒃᓱᒃ") --- add("ᐃᓄᖅ") --- add("ᐃᓕᔅᓯ") --- add("ᐃᓗ") --- add("ᐃᓪᓗ") --- add("ᐃᓴᕈᖅ") --- add("ᐃᔨ") --- add("ᐃᕐᒥᐅᑦ") --- add("ᐃᕐᓂᖅ") --- add("ᐃᕝᕕᑦ") --- add("ᐃᖃᓗᐃᑦ") --- add("ᐃᖃᓗᒃ") --- add("ᐄ") --- add("ᐅᐃᒃ") --- add("ᐅᐃᕖᑎᑐᓪᓗ") --- add("ᐅᐱᕐᓐᖄᖅ") --- add("ᐅᑭᐅᖅ") --- add("ᐅᒃᐱᒃ") --- add("ᐅᒥᒃ") --- add("ᐅᓈᖅ") --- add("ᐅᓐᓄᖅ") --- add("ᐅᓗ") --- add("ᐅᓪᓗᒥ") --- add("ᐅᓪᓗᕆᐊᖅ") --- add("ᐅᓵᐃᐅ") --- add("ᐅᕙᒍᑦ") --- add("ᐅᕙᖓ") --- add("ᐅᖃᐅᓯᖅ") --- add("ᐅᖅᓱᖅ") --- add("ᐆᒪᔪᖅ") --- add("ᐆᒻᒪᑎ") --- add("ᐊᐅᒃ") --- add("ᐊᐅᔪᐃᑦᑐᖅ") --- add("ᐊᐅᔭ") --- add("ᐊᐳᑦ") --- add("ᐊᑎᖅᐳᖅ") --- add("ᐊᑖᑕ") --- add("ᐊᑭᓯᖅ") --- add("ᐊᒡᒐᒃ") --- add("ᐊᒡᓘᒃᑲᖅ") --- add("ᐊᒥᐃᐅᓕᐅᕈ") --- add("ᐊᒪᕈᖅ") --- add("ᐊᓄᕆ") --- add("ᐊᓇᐅᓪᓚᐅᑦ") --- add("ᐊᓈᓇ") --- add("ᐊᓘᑦ") --- add("ᐊᓪᓕᖅ") --- add("ᐊᕐᓇᖅ") --- add("ᐊᕐᕕᒃ") --- add("ᐊᕕᙵᖅ") --- add("ᐊᖏᖅᑲᖅᑐᖅ") --- add("ᐊᖑᑦ") --- add("ᐊᖑᓇᓱᒃᑐᖅ") --- add("ᐊᖓᔪᖅ") --- add("ᐋᒃᑲ") --- add("ᐯᑉᐹᖅ") --- add("ᐱ") --- add("ᐱᓱᒃ") --- add("ᐳᐊᓘᒃ") --- add("ᐸᓂᖅ") --- add("ᑎᑎᕋᐅᑦ") --- add("ᑎᑭᖅ") --- add("ᑎᒥ") --- add("ᑎᕆᒐᓂᐊᖅ") --- add("ᑎᕆᒐᓐᓂᐊᖅ") --- add("ᑎᖕᒥᐊᖅ") --- add("ᑎᖕᒥᓲᖅ") --- add("ᑏ") --- add("ᑐᐊᐸᑦ") --- add("ᑐᑐ") --- add("ᑐᒃᑐ") --- add("ᑐᕙᖅ") --- add("ᑑᒑᓕᒃ") --- add("ᑑᒑᖅ") --- add("ᑕᒃᑐᒃ") --- add("ᑕᓪᓗ") --- add("ᑕᓯᖅ") --- add("ᑕᕆᐅᖅ") --- add("ᑕᕝᕙ") --- add("ᑭᓇ") --- add("ᑭᖓᖅ") --- add("ᑮᓇᐅᔭᖅ") --- add("ᑮᓇᖅ") --- add("ᑰᒃ") --- add("ᑲᒥᒃ") --- add("ᑲᓇᑕ") --- add("ᑳᐱ") --- add("ᑳᒃᑐᖅ") --- add("ᒪᐅᔭ") --- add("ᒪᒻᒪᓛᑎᒃ") --- add("ᓂᐅ") --- add("ᓂᐅᒃ") --- add("ᓂᐊᖁᖅ") --- add("ᓂᐊᖂᔮᖅ") --- add("ᓂᕆᔪᖅ") --- add("ᓂᕿ") --- add("ᓄᑲᖅ") --- add("ᓄᓇ") --- add("ᓄᓇᑐᑲᕗᑦ") --- add("ᓄᓇᑦᓯᐊᕗᑦ") --- add("ᓄᓇᕕᒃ") --- add("ᓄᓇᕕᒻᒥᐅᑦ") --- add("ᓄᓇᕗᑦ") --- add("ᓄᓇᕗᒻᒥᐅᖅ") --- add("ᓄᓕᐊᖅ") --- add("ᓄᔭᑦ") --- add("ᓇᓂ") --- add("ᓇᓄᒃ") --- add("ᓇᓄᖅ") --- add("ᓕᐅᓇ") --- add("ᓯᐅᑦ") --- add("ᓯᐱᓂᖅ") --- add("ᓯᓈ") --- add("ᓯᓚ") --- add("ᓯᓚᒥ") --- add("ᓯᕿᓂᖅ") --- add("ᓱᓇᒐᖅ") --- add("ᓲᓴᓐ") --- add("ᓴᐃᒧ") --- add("ᓴᑯ") --- add("ᓴᕕᒃ") --- add("ᔪᐊᑕᓐ") --- add("ᔭᓐᓄᐊᓕ") --- add("ᕼᐊᓐᑕ") --- add("ᕼᐋᑭ") --- add("ᕿᒃᑐᕆᐊᖅ") --- add("ᕿᒃᑐᕆᐊᖅᑕᐅᓂᖅ") --- add("ᕿᒻᒥᒃ") --- add("ᕿᒻᒧᒃᓯᑦ") --- add("ᕿᓚᓗᒐᖅ") --- add("ᕿᓯᒃ") --- add("ᕿᖓᖅ") --- add("ᖁᑭᐅᑦ") --- add("ᖁᔭᓐᓇᒦᒃ") --- add("ᖃᐅᒃᐸᑦ") --- add("ᖃᐅᓯᖅᑐᖅ") --- add("ᖃᐅᔨᒪᓂᑐᖃᖏᑦ") --- add("ᖃᐅᔨᒪᔭᑐᖃᖏᑦ") --- add("ᖃᒧᑏᒃ") --- add("ᖃᓂᖅ") --- add("ᖃᓪᓗᓈᖅ") --- add("ᖃᔭᖅ") --- add("ᖃᖓ") --- add("ᖄᓐᕿᔭᐅᑦ") --- return table.concat(output, "\n") --- end - --- return export \ No newline at end of file diff --git a/wikt/translit/jdt-translit.lua b/wikt/translit/jdt-translit.lua deleted file mode 100644 index c46e953..0000000 --- a/wikt/translit/jdt-translit.lua +++ /dev/null @@ -1,305 +0,0 @@ --- This module will transliterate Judeo-Tat language text per WT:JDT TR. - -local export = {} - -local tt_he_a2 = { - ["או"] = "y", - ["אי"] = "e", - ["אָ"] = "o", - ["אַ"] = "a", - ["אִ"] = "i", -} - -local tt_he_dg = { - ["כּ"] = "k", - ["ךּ"] = "k", - ["בּ"] = "b", - ["פּ"] = "p", - ["ףּ"] = "p", - ["ג׳"] = "c", - ["ז׳"] = "ç", - ["ג'"] = "c", - ["ז'"] = "ç", -} - -local tt_he = { - ["א"] = "ə", - ["ב"] = "v", - ["ג"] = "g", - ["ד"] = "d", - ["ה"] = "h", - ["ז"] = "z", - ["ח"] = "ħ", - ["י"] = "j", - ["כ"] = "x", - ["ך"] = "x", - ["ל"] = "l", - ["מ"] = "m", - ["ם"] = "m", - ["נ"] = "n", - ["ן"] = "n", - ["ס"] = "s", - ["ע"] = "ḩ", - ["פ"] = "f", - ["ף"] = "f", - ["צ"] = "c", - ["ץ"] = "c", - ["ק"] = "q", - ["ר"] = "r", - ["ש"] = "ş", - ["ת"] = "t", - ["ת"] = "t", - ["׃"] = ":", - ["׳"] = "'", - ["״"] = "\"", - ["־"] = "-", -} - -local tt_cy_2 = { - ["ГЬ"] = "H", - ["Гь"] = "H", - ["гЬ"] = "h", - ["гь"] = "h", - ["ГӀ"] = "Ḩ", -- palochka - ["Гӏ"] = "Ḩ", -- palochka (rare lowercase) - ["ГI"] = "Ḩ", -- Latn I - ["ГІ"] = "Ḩ", -- Cyrl І - ["Гi"] = "Ḩ", -- Latn i - ["Гі"] = "Ḩ", -- Cyrl і - ["гӀ"] = "ḩ", -- palochka - ["гӏ"] = "ḩ", -- palochka (rare lowercase) - ["гI"] = "ḩ", -- Latn I - ["гІ"] = "ḩ", -- Cyrl І - ["гi"] = "ḩ", -- Latn i - ["гі"] = "ḩ", -- Cyrl і - ["ХЬ"] = "Ħ", - ["Хь"] = "Ħ", - ["хЬ"] = "ħ", - ["хь"] = "ħ", - ["ГЪ"] = "Q", - ["Гъ"] = "Q", - ["гЪ"] = "q", - ["гъ"] = "q", - ["УЬ"] = "Y", - ["Уь"] = "Y", - ["уЬ"] = "y", - ["уь"] = "y", -} - -local tt_cy = { - ["А"] = "A", ["а"] = "a", - ["Б"] = "B", ["б"] = "b", - ["Ч"] = "C", ["ч"] = "c", - ["Ж"] = "Ç", ["ж"] = "ç", - ["Д"] = "D", ["д"] = "d", - ["Е"] = "E", ["е"] = "e", - ["Э"] = "Ə", ["э"] = "ə", - ["Ф"] = "F", ["ф"] = "f", - ["Г"] = "G", ["г"] = "g", - ["И"] = "I", ["и"] = "i", - ["Й"] = "J", ["й"] = "j", - ["К"] = "K", ["к"] = "k", - ["Л"] = "L", ["л"] = "l", - ["М"] = "M", ["м"] = "m", - ["Н"] = "N", ["н"] = "n", - ["О"] = "O", ["о"] = "o", - ["П"] = "P", ["п"] = "p", - ["Р"] = "R", ["р"] = "r", - ["С"] = "S", ["с"] = "s", - ["Ш"] = "Ş", ["ш"] = "ş", - ["Т"] = "T", ["т"] = "t", - ["У"] = "U", ["у"] = "u", - ["В"] = "V", ["в"] = "v", - ["Х"] = "X", ["х"] = "x", - ["З"] = "Z", ["з"] = "z", -} - -local tt_ipa = { - ["A"] = "a", ["a"] = "a", - ["B"] = "b", ["b"] = "b", - ["C"] = "tʃ", ["c"] = "tʃ", - ["Ç"] = "dʒ", ["ç"] = "dʒ", - ["D"] = "d", ["d"] = "d", - ["E"] = "ɛ", ["e"] = "ɛ", - ["Ə"] = "æ", ["ə"] = "æ", - ["F"] = "f", ["f"] = "f", - ["G"] = "ɡ", ["g"] = "ɡ", - ["H"] = "h", ["h"] = "h", - ["Ḩ"] = "ʕ", ["ḩ"] = "ʕ", - ["Ħ"] = "ħ", ["ħ"] = "ħ", - ["I"] = "ɪ", ["i"] = "ɪ", - ["J"] = "j", ["j"] = "j", - ["K"] = "k", ["k"] = "k", - ["L"] = "l", ["l"] = "l", - ["M"] = "m", ["m"] = "m", - ["N"] = "n", ["n"] = "n", - ["O"] = "o", ["o"] = "o", - ["P"] = "p", ["p"] = "p", - ["Q"] = "ɢ", ["q"] = "ɢ", - ["R"] = "ɾ", ["r"] = "ɾ", - ["S"] = "s", ["s"] = "s", - ["Ş"] = "ʃ", ["ş"] = "ʃ", - ["T"] = "t", ["t"] = "t", - ["U"] = "u", ["u"] = "u", - ["V"] = "v", ["v"] = "v", - ["X"] = "χ", ["x"] = "χ", - ["Y"] = "y", ["y"] = "y", - ["Z"] = "z", ["z"] = "z", -} - -local tt_tocy = { - ["A"] = "А", ["a"] = "а", - ["B"] = "Б", ["b"] = "б", - ["C"] = "Ч", ["c"] = "ч", - ["Ç"] = "Ж", ["ç"] = "ж", - ["D"] = "d", ["d"] = "д", - ["E"] = "Е", ["e"] = "е", - ["Ə"] = "Э", ["ə"] = "э", - ["F"] = "Ф", ["f"] = "ф", - ["G"] = "Г", ["g"] = "г", - ["H"] = "Гь", ["h"] = "гь", - ["Ḩ"] = "ГӀ", ["ḩ"] = "гӀ", - ["Ħ"] = "Хь", ["ħ"] = "хь", - ["I"] = "И", ["i"] = "и", - ["J"] = "Й", ["j"] = "й", - ["K"] = "К", ["k"] = "к", - ["L"] = "Л", ["l"] = "л", - ["M"] = "М", ["m"] = "м", - ["N"] = "Н", ["n"] = "н", - ["O"] = "О", ["o"] = "о", - ["P"] = "П", ["p"] = "п", - ["Q"] = "Гъ", ["q"] = "гъ", - ["R"] = "Р", ["r"] = "р", - ["S"] = "С", ["s"] = "с", - ["Ş"] = "Ш", ["ş"] = "ш", - ["T"] = "Т", ["t"] = "т", - ["U"] = "У", ["u"] = "у", - ["V"] = "В", ["v"] = "в", - ["X"] = "Х", ["x"] = "х", - ["Y"] = "Уь", ["y"] = "уь", - ["Z"] = "З", ["z"] = "з", -} - -local tt_tohe = { - ["A"] = "אַ", ["a"] = "אַ", - ["B"] = "בּ", ["b"] = "בּ", - ["C"] = "ג׳", ["c"] = "ג׳", - ["Ç"] = "ז׳", ["ç"] = "ז׳", - ["D"] = "ד", ["d"] = "ד", - ["E"] = "אי", ["e"] = "אי", - ["Ə"] = "א", ["ə"] = "א", - ["F"] = "פ", ["f"] = "פ", - ["G"] = "ג", ["g"] = "ג", - ["H"] = "ה", ["h"] = "ה", - ["Ḩ"] = "ע", ["ḩ"] = "ע", - ["Ħ"] = "ח", ["ħ"] = "ח", - ["I"] = "אִ", ["i"] = "אִ", - ["J"] = "י", ["j"] = "י", - ["K"] = "כּ", ["k"] = "כּ", - ["L"] = "ל", ["l"] = "ל", - ["M"] = "מ", ["m"] = "מ", - ["N"] = "נ", ["n"] = "נ", - ["O"] = "אָ", ["o"] = "אָ", - ["P"] = "פּ", ["p"] = "פּ", - ["Q"] = "ק", ["q"] = "ק", - ["R"] = "ר", ["r"] = "ר", - ["S"] = "ס", ["s"] = "ס", - ["Ş"] = "ש", ["ş"] = "ש", - ["T"] = "ת", ["t"] = "ת", - ["U"] = "אוּ", ["u"] = "אוּ", - ["V"] = "ב", ["v"] = "ב", - ["X"] = "כ", ["x"] = "כ", - ["Y"] = "או", ["y"] = "או", - ["Z"] = "ז", ["z"] = "ז", -} - --- Keep in mind RTL issues when viewing the following: -local he_to_final = { - ["כ"] = "ך", - ["מ"] = "ם", - ["נ"] = "ן", - ["פ"] = "ף", - ["צ"] = "ץ", -} - -local function tr_he(text) - text = mw.ustring.gsub(text, "אוּ", "u") - text = mw.ustring.gsub(text, ".%f[^א].", tt_he_a2) - text = mw.ustring.gsub(text, ".%f[׳ּ'].", tt_he_dg) - text = mw.ustring.gsub(text, ".", tt_he) - return text -end - -local function tr_cy(text) - text = mw.ustring.gsub(text, ".%f[ӀIІӏiіЬьЪъ].", tt_cy_2) - text = mw.ustring.gsub(text, ".", tt_cy) - return text -end - -function export.tr(text, lang, sc) - if not sc then - sc = require("Module:scripts").findBestScript(text, require("Module:languages").getByCode(lang or "jdt")):getCode() - end - - if sc == "Hebr" then - text = tr_he(text) - elseif sc == "Cyrl" then - text = tr_cy(text) - else - text = nil - end - - return text -end - -function export.ipa(text, lang, sc) - if type(text) == 'table' then - local args = text:getParent().args - text = args[1] ~= "" and args[1] or mw.title.getCurrentTitle().subpageText - lang = args["lang"] ~= "" and args["lang"] or "jdt" - sc = args["sc"] ~= "" and args["sc"] - end - return (mw.ustring.gsub(export.tr(text, lang, sc) or text, ".", tt_ipa)) -end - -function export.la(text, lang, sc) - if type(text) == 'table' then - local args = text:getParent().args - text = args[1] ~= "" and args[1] or mw.title.getCurrentTitle().subpageText - lang = args["lang"] ~= "" and args["lang"] or "jdt" - sc = args["sc"] ~= "" and args["sc"] - end - return (export.tr(text, lang, sc) or text) -end - -function export.cy(text, lang, sc) - if type(text) == 'table' then - local args = text:getParent().args - text = args[1] ~= "" and args[1] or mw.title.getCurrentTitle().subpageText - lang = args["lang"] ~= "" and args["lang"] or "jdt" - sc = args["sc"] ~= "" and args["sc"] - end - return (mw.ustring.gsub(export.tr(text, lang, sc) or text, ".", tt_tocy)) -end - -local function he_finals_replacer(letter, rest) - return (he_to_finals[letter] .. rest) -end - -local function he_finals(text) - -- Keep in mind RTL issues when viewing the following pattern: - text = mw.ustring.gsub(text, "([כמנפצ])([^א-ת]-%f[%s־-])", he_finals_replacer) - return text -end - -function export.he(text, lang, sc) - if type(text) == 'table' then - local args = text:getParent().args - text = args[1] ~= "" and args[1] or mw.title.getCurrentTitle().subpageText - lang = args["lang"] ~= "" and args["lang"] or "jdt" - sc = args["sc"] ~= "" and args["sc"] - end - return he_finals(mw.ustring.gsub(export.tr(text, lang, sc) or text, ".", tt_tohe)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/json-data.lua b/wikt/translit/json-data.lua deleted file mode 100644 index 1af2742..0000000 --- a/wikt/translit/json-data.lua +++ /dev/null @@ -1,271 +0,0 @@ -local export = {} - --- optimisation: local variable lookup is slightly faster than global lookup -local tab_concat, type, tostring, pairs, ipairs = table.concat, type, tostring, pairs, ipairs - -local function export_str(s) - -- rudimentary escaping, to save time - return '"' .. tostring(s):gsub('["\\]', '\\%0') .. '"' -end - -local function export_array(tab) - local items = {} - for key, value in ipairs(tab) do - if type(value) == 'string' then - items[#items + 1] = export_str(value) - elseif type(value) == 'boolean' then - items[#items + 1] = tostring(value) - else - error("serialisation failed: unsupported array element type") - end - end - return "[" .. tab_concat(items, ",") .. "]" -end - --- the second argument is a rudimentary "schema" which specifies --- whether a table value at a given key should be serialised --- as an array or an object; Lua uses the same table type for both -local function export_object(tab, schema) - local items = {} - if tab == nil then - return "null" - end - - for key, value in pairs(tab) do - if type(value) == 'string' then - items[#items + 1] = export_str(key) .. ':' .. export_str(value) - elseif type(value) == 'boolean' then - items[#items + 1] = export_str(key) .. ':' .. tostring(value) - elseif type(value) == 'table' then - if not schema then - error("no schema given for array with table values") - end - local ktype = schema[key] - if ktype == false then - items[#items + 1] = export_str(key) .. ':' .. export_array(value) - elseif type(ktype) == 'table' then - items[#items + 1] = export_str(key) .. ':' .. export_object(value, ktype) - else - error("serialisation failed: table value at key '" .. key .. "' has no schema") - end - else - error("serialisation failed: unsupported object value type") - end - end - return "{" .. tab_concat(items, ",") .. "}" -end - -function export.export_languages(item_filter, key_filter, skip_nulls) - if type(item_filter) == "table" then - key_filter = {} - local i = 2 - while item_filter.args[i] do - key_filter[#key_filter + 1] = item_filter.args[i] - i = i + 1 - end - if #key_filter == 0 then - key_filter = nil - end - skip_nulls = require('Module:yesno')(item_filter.args.nulls) - item_filter = item_filter.args[1] - end - - item_filter = (item_filter ~= "") and item_filter or function() return true end - if type(item_filter) == 'string' then - if item_filter == "TWO_LETTER" then - function item_filter(key, value) - return #key == 2 - end - elseif item_filter == "TWO_THREE_LETTER" then - function item_filter(key, value) - return #key <= 3 - end - elseif item_filter == "TWO_THREE_LETTER_REGULAR" then - function item_filter(key, value) - return (#key <= 3) and value.type == 'regular' - end - elseif item_filter:sub(1, 1) == '=' then - local list = {} - for item in mw.text.gsplit(item_filter:sub(2), ',') do - list[item] = true - end - function item_filter(key, value) - return list[key] - end - else - local t = item_filter - function item_filter(key, value) - return value.type == t - end - end - end - - local data = mw.loadData("Module:languages/alldata") - local items = {} - - local schema = { - canonicalName = false, - type = false, - scripts = false, - family = false, - otherNames = false, - ancestors = false, - wikimedia_codes = false, - sort_key = { - from = false, - to = false - }, - entry_name = { - from = false, - to = false - } - } - - for key, value in pairs(data) do - if item_filter(key, value) then - if key_filter then - if #key_filter == 1 then - local item = value[key_filter[1]] - local itsc = schema[key_filter[1]] - - if item == nil then - if not skip_nulls then - items[#items + 1] = export_str(key) .. ':null' - end - else - items[#items + 1] = export_str(key) .. ':' .. - ((type(item) == "string" and export_str(item)) - or (itsc and export_object(item, itsc)) - or export_array(item)) - end - else - local langobj = {} - for _, fkey in pairs(key_filter) do - langobj[fkey] = value[fkey] - end - items[#items + 1] = export_str(key) .. ':' .. export_object(langobj, schema) - end - else - items[#items + 1] = export_str(key) .. ':' .. export_object(value, schema) - end - end - end - - return "{" .. tab_concat(items, ",") .. "}" -end - -function export.export_scripts() - local data = mw.loadData("Module:scripts/data") - - local items = {} - - for key, value in pairs(data) do - items[#items + 1] = export_str(key) .. ':' .. export_object(value, { - canonicalName = false, - characters = false, - systems = false, - otherNames = false - }) - end - - return "{" .. tab_concat(items, ",") .. "}" -end - -function export.export_etymology_languages() - local data = mw.loadData("Module:etymology languages/data") - - local items = {} - - for key, value in pairs(data) do - items[#items + 1] = export_str(key) .. ':' .. export_object(value, { - canonicalName = false, - parent = false, - wikipedia_article = false, - otherNames = false - }) - end - - return "{" .. tab_concat(items, ",") .. "}" -end - -function export.export_families() - local data = mw.loadData("Module:families/data") - - local items = {} - - for key, value in pairs(data) do - items[#items + 1] = export_str(key) .. ':' .. export_object(value, { - canonicalName = false, - otherNames = false, - family = false - }) - end - - return "{" .. tab_concat(items, ",") .. "}" -end - -function export.export_labels() - local data = mw.loadData("Module:labels/data") - - local labels, aliases = {}, {} - - for key, value in pairs(data.labels) do - labels[#labels + 1] = export_str(key) .. ':' .. export_object(value, { - plain_categories = false, - topical_categories = false, - pos_categories = false, - regional_categories = false - }) - end - - for key, value in pairs(data.aliases) do - aliases[#aliases + 1] = export_str(key) .. ':' .. export_str(value) - end - - return ('{"labels":{%s},"aliases":{%s},"deprecated":%s}'):format( - tab_concat(labels, ','), tab_concat(aliases, ','), export_object(data.deprecated or {}) - ) -end - -function export.export_wgs() - local m_wgdata = mw.loadData('Module:workgroup ping/data') - local items = {} - - for key, value in pairs(m_wgdata) do - if type(value) == 'string' then - items[#items + 1] = export_str(key) .. ':' .. export_str(value) - else - local item = { desc = value.desc; category = value.category; members = {} } - - for _, user in ipairs(value) do - item.members[#item.members + 1] = user - end - - items[#items + 1] = export_str(key) .. ':' .. export_object(item, { - members = false - }) - end - end - - return "{" .. tab_concat(items, ",") .. "}" -end - --- replacement for using the [[mw:API]] to do [[Special:PrefixIndex/Template:langrev/]] --- TODO: limits? -function export.complete_langname(frame) - local m_langs = mw.loadData("Module:languages/alldata") - local target = frame.args[1] - - local items = {} - for code, data in pairs(m_langs) do - for _, name in ipairs(data.names) do - if name:sub(1, #target) == target then - items[#items + 1] = export_str(name) .. ":" .. export_str(code) - end - end - end - - return "{" .. tab_concat(items, ",") .. "}" -end - -return export \ No newline at end of file diff --git a/wikt/translit/jv-translit.lua b/wikt/translit/jv-translit.lua deleted file mode 100644 index 138c061..0000000 --- a/wikt/translit/jv-translit.lua +++ /dev/null @@ -1,198 +0,0 @@ ---This module will transliterate Javanese language text. It is also used to transliterate Old Javanese (kaw). ---[[jav – Javanese -jvn – Caribbean Javanese -jas – New Caledonian Javanese -osi – Osing -tes – Tenggerese -kaw – Kawi -]] - -local export = {} - -local conv = { - -- finals (U+A980 - U+A983): - - ["ꦀ"] = "m", - ["ꦁ"] = "ng", - ["ꦂ"] = "r", - ["ꦃ"] = "h", - - -- independent vowels (U+A984 - U+A98E): - - ["ꦄ"] = "a", ["ꦄꦴ"] = "a", - ["ꦅ"] = "i", - ["ꦆ"] = "i", - ["ꦇ"] = "i", - ["ꦈ"] = "u", ["ꦈꦴ"] = "u", - ["ꦉ"] = "re", ["ꦉꦴ"] = "reu", - ["ꦊ"] = "le", - ["ꦋ"] = "leu", - ["ꦌ"] = "e", - ["ꦍ"] = "ai", - ["ꦎ"] = "o", - - -- independent consonants (U+A98F - U+A9B2): - - ["ꦏ"] = "k", ["ꦏ꦳"] = "kh", - ["ꦐ"] = "q", - ["ꦑ"] = "kh", - ["ꦒ"] = "g", ["ꦒ꦳"] = "gh", - ["ꦓ"] = "gh", - ["ꦔ"] = "ng", ["ꦔ꦳"] = "'", - - ["ꦕ"] = "c", - ["ꦖ"] = "ch", - ["ꦗ"] = "j", ["ꦗ꦳"] = "z", - ["ꦘ"] = "jny", - ["ꦙ"] = "jh", - ["ꦚ"] = "n", - - ["ꦛ"] = "th", - ["ꦜ"] = "th", - ["ꦝ"] = "dh", - ["ꦞ"] = "dh", - ["ꦟ"] = "nn", - - ["ꦠ"] = "t", - ["ꦡ"] = "th", - ["ꦢ"] = "d", ["ꦢ꦳"] = "dz", - ["ꦣ"] = "dh", - ["ꦤ"] = "n", - - ["ꦥ"] = "p", ["ꦥ꦳"] = "f", - ["ꦦ"] = "ph", - ["ꦧ"] = "b", - ["ꦨ"] = "bh", - ["ꦩ"] = "m", - - ["ꦪ"] = "y", - ["ꦫ"] = "r", - ["ꦬ"] = "r", - ["ꦭ"] = "l", - ["ꦮ"] = "w", ["ꦮ꦳"] = "v", - ["ꦯ"] = "sh", - ["ꦰ"] = "ss", - ["ꦱ"] = "s", ["ꦱ꦳"] = "sy", - ["ꦲ"] = "h", ["ꦲ꦳"] = "h", - - -- cecak_telu/nukta (U+A9B3): - - ["꦳"] = "", - - -- dependent vowels (U+A9B4 - A9BD): - - ["ꦴ"] = "a", -- tarung - ["ꦵ"] = "o", - ["ꦶ"] = "i", - ["ꦷ"] = "i", - ["ꦸ"] = "u", - ["ꦹ"] = "u", - ["ꦺ"] = "é", ["ꦺꦴ"] = "o", - ["ꦻ"] = "ai", ["ꦻꦴ"] = "au", - ["ꦼ"] = "e", - ["ꦽ"] = "re", ["ꦽꦴ"] = "reu", - - -- medials (U+A9BE - U+A9BF): - ["ꦾ"] = "y", - ["ꦿ"] = "r", - - -- pangkon/virama (U+A9C0): - - ["꧀"] = "", - - -- punctuation (U+A9C1 - U+A9CF): - - ["꧁"] = "(starts title)", - ["꧂"] = "(ends title)", - ["꧃"] = "(letter to younger age or lower rank)", - ["꧄"] = "(letter to equal age or equal rank)", - ["꧅"] = "(letter to older age or higher rank)", - ["꧆"] = "(pada windu)", - ["꧇"] = ":", -- number indicator - ["꧈"] = ",", - ["꧉"] = ".", - ["꧊"] = "\"", - ["꧋"] = "//", - ["꧌"] = "(", - ["꧍"] = ")", - ["ꧏ"] = "2", - - -- digits (U+A9D0 - U+A9D9): - - ["꧐"] = "0", - ["꧑"] = "1", - ["꧒"] = "2", - ["꧓"] = "3", - ["꧔"] = "4", - ["꧕"] = "5", - ["꧖"] = "6", - ["꧗"] = "7", - ["꧘"] = "8", - ["꧙"] = "9", - - -- ellipsis (U+A9DE - U+A9DF): - - ["꧞"] = "-", - ["꧟"] = "-", -} - -function export.tr(text, lang, sc) - local CSVC = { - initial = "([ꦏ-ꦲ]꦳?)", - medial = "([ꦾꦿ]?)", - nucleus = "([ꦴ-ꦽ꧀]?ꦴ?)", - final = "([ꦀ-ꦃ]?)", - } - local VC = { - nucleus = "([ꦄ-ꦎ]ꦴ?)", - final = "([ꦀ-ꦃ]*)", - } - - local number_indicator = "꧇" - local digits = "[꧐-꧙]" - - local initial = true - - text = mw.ustring.gsub( - text, - CSVC.initial .. CSVC.medial .. CSVC.nucleus.. CSVC.final, - function(a, b, c, d) - a = conv[a] or error("Initial not recognized: " .. a) - b = b == "" and "" or conv[b] or error("Medial not recognized: " .. b) - c = c == "" and "a" or conv[c] or error("Nucleus not recognized: " .. c) - d = d == "" and "" or conv[d] or error("Final not recognized: " .. d) - if initial and a == "h" then - a = "" - end - initial = false - return a .. b .. c .. d - end - ) - - text = mw.ustring.gsub( - text, - VC.nucleus .. VC.final, - function(a, b) - a = conv[a] - b = (b == "" and "" or conv[b]) - initial = false - return a .. b - end - ) - - text = mw.ustring.gsub( - text, - number_indicator .. "(" .. digits .. "+)" .. number_indicator, - function(a) - a = mw.ustring.gsub(a, ".", conv) - initial = true - return a - end - ) - - text = mw.ustring.gsub(text, ".", conv) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/kaa-translit.lua b/wikt/translit/kaa-translit.lua deleted file mode 100644 index d2353f8..0000000 --- a/wikt/translit/kaa-translit.lua +++ /dev/null @@ -1,21 +0,0 @@ --- This module will transliterate Karakalpak language text per WT:KAA TR. - -local export = {} - -local tt = { - ["ү"]="ü",['Ү']='Ü', ["т"]="t",['Т']='T', ["Ў"]="W",['ў']='w', ["р"]="r",['Р']='R', ["ф"]="f",['Ф']='F', - ["ю"]="yu",['Ю']='Yu', ["ш"]="ş",['Ш']='Ş', ["ь"]="ʹ",['Ь']='ʹ', ["ъ"]="ʺ",['Ъ']='ʺ', ["н"]="n",['Н']='N', - ["п"]="p",['П']='P', ["й"]="y",['Й']='Y', ["л"]="l",['Л']='L', ["з"]="z",['З']='Z', ["е"]="e",['Е']='E', - ["г"]="g",['Г']='G', ["б"]="b",['Б']='B', ["у"]="u",['У']='U', ["с"]="s",['С']='S', ["х"]="h",['Х']='H', - ["ч"]="ç",['Ч']='Ç', ["щ"]="şç",['Щ']='Şç', ["я"]="ya",['Я']='Ya', ["ы"]="ı",['Ы']='I', ["э"]="e",['Э']='E', - ["м"]="m",['М']='M', ["о"]="o",['О']='O', ["и"]="i",['И']='I', ["ё"]="yo",['Ё']='Yo', ["ж"]="j",['Ж']='J', - ["к"]="k",['К']='K', ["д"]="d",['Д']='D', ["в"]="v",['В']='V', ["ц"]="ts",['Ц']='Ts', ["а"]="a",['А']='A', - ["ң"]="ñ",['Ң']='Ñ', ["ә"]="ä",['Ә']='Ä', ["э"]="é",['Э']='É', ["қ"]="q",['Қ']="Q", - ["ғ"]="ğ",['Ғ']='Ğ', ["ө"]="ö",['Ө']='Ö' -}; - -function export.tr(text) - return (mw.ustring.gsub(text, '.', tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/kbd-translit.lua b/wikt/translit/kbd-translit.lua deleted file mode 100644 index 19c22a3..0000000 --- a/wikt/translit/kbd-translit.lua +++ /dev/null @@ -1,100 +0,0 @@ --- This module will transliterate Kabardian language text per WT:KBD TR. - -local export = {} - -local tt = { - ["а"]="ā", ["б"]="b", ["в"]="v", ["г"]="γ", ["д"]="d", ["е"]="e", ["ё"]="ë", ["ж"]="ž", - ["з"]="z", ["и"]="i", ["й"]="j", ["к"]="k", ["л"]="l", ["м"]="m", ["н"]="n", ["о"]="o", - ["п"]="p", ["р"]="r", ["с"]="s", ["т"]="t", ["у"]="w", ["ф"]="f", ["х"]="x", ["ц"]="c", - ["ч"]="č", ["ш"]="š", ["щ"]="ŝ", ["ъ"]="”", ["ы"]="ə", ["ь"]="’", ["э"]="ă", ["ю"]="ju", - ["я"]="jā", ["ӏ"]="ʾ", ["А"]="Ā", ["Б"]="B", ["В"]="V", ["Г"]="Γ", ["Д"]="D", ["Е"]="E", - ["Ё"]="Ë", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["Й"]="J", ["К"]="K", ["Л"]="L", ["М"]="M", - ["Н"]="N", ["О"]="O", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="W", ["Ф"]="F", - ["Х"]="X", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Ŝ", ["Ъ"]="”", ["Ы"]="Ə", ["Ь"]="’", - ["Э"]="Ă", ["Ю"]="Ju", ["Я"]="Jā", ["ӏ"]="ʾ"}; - -local trigraphs = { - ['кхъу'] = 'q°', - ['Кхъу'] = 'Q°', - ['гъу'] = 'ġ°', - ['Кхъ'] = 'Q', - ['кхъ'] = 'q', - ['къу'] = 'q̇°', - ['кӏу'] = 'ḳ°', - ['хъу'] = 'χ°', -} - -local digraphs = { - ['гу'] = 'g°', - ['гь'] = 'ɡ’', - ['гъ'] = 'ġ', - ['дж'] = 'ǯ̍', - ['дз'] = 'ʒ', - ['жъ'] = 'ẑ', - ['жь'] = 'ẑ', - ['ку'] = 'k°', - ['къ'] = 'q̇', - ['кӏ'] = 'č̣̍', - ['лъ'] = 'ł', - ['лӏ'] = 'ḷ', - ['пӏ'] = 'ṗ', - ['сӏ'] = 'ṣ̣', - ['тӏ'] = 'ṭ', - ['фӏ'] = 'f̣', - ['ху'] = 'x°', - ['хъ'] = 'χ', - ['хь'] = 'ḥ', - ['цӏ'] = 'c̣', - ['чу'] = 'č̍°', - ['щӏ'] = 'ṣ̂', - ['ӏу'] = 'ʾ°', - ['Гъу'] = 'Ġ°', - ['Къу'] = 'Q̇°', - ['Кӏу'] = 'Ḳ°', - ['Хъу'] = 'Χ°', - ['Гу'] = 'G°', - ['Гь'] = 'ɡ’', - ['Гъ'] = 'Ġ', - ['Дж'] = 'Ǯ̍', - ['Дз'] = 'Ʒ', - ['Жъ'] = 'Ẑ', - ['Жь'] = 'Ẑ̂', - ['Ку'] = 'K°', - ['Къ'] = 'Q̇', - ['Кӏ'] = 'Č̣̍', - ['Лъ'] = 'Ł', - ['Лӏ'] = 'Ḷ', - ['Пӏ'] = 'Ṗ', - ['Сӏ'] = 'Ṣ̣', - ['Тӏ'] = 'Ṭ', - ['Фӏ'] = 'F̣', - ['Ху'] = 'X°', - ['Хъ'] = 'Χ', - ['Хь'] = 'Ḥ', - ['Цӏ'] = 'C̣', - ['Чу'] = 'Č̍°', - ['Щӏ'] = 'Ṣ̂', -} - -function export.tr(text) - local str_gsub = string.gsub - local UTF8char = '[%z\1-\127\194-\244][\128-\191]*' - - -- Convert uppercase palochka to lowercase. Lowercase is found in tables - -- above. - text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) - - for trigraph, replacement in pairs(trigraphs) do - text = str_gsub(text, trigraph, replacement) - end - - for digraph, replacement in pairs(digraphs) do - text = str_gsub(text, digraph, replacement) - end - - text = str_gsub(text, UTF8char, tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/kca-translit.lua b/wikt/translit/kca-translit.lua deleted file mode 100644 index 1c301de..0000000 --- a/wikt/translit/kca-translit.lua +++ /dev/null @@ -1,31 +0,0 @@ --- This module will transliterate Khanty language text per WT:KCA TR. --- Language code: kca - -local export = {} - -local tt = { - ["а"]="a", ["ӓ"]="ä", ["ӑ"]="ă", ["б"]="b", ["в"]="w", ["г"]="g", ["д"]="d", - ["е"]="e", ["ё"]="ë", ["ә"]="ə", ["ӛ"]="ə̂", ["ж"]="ž", ["з"]="z", ["и"]="i", - ["і"]="i", ["й"]="j", ["к"]="k", ["к"]="k", ["қ"]="k̦", ["ӄ"]="ḳ", ["л"]="l", - ["ӆ"]="ł", ["ԓ"]="ḷ", ["љ"]="l`", ["м"]="m", ["н"]="n", ["ң"]="n̦", ["ӈ"]="ň", - ["њ"]="n`", ["о"]="o", ["ӧ"]="ö", ["ө"]="ô", ["ӫ"]="ő", ["п"]="p", ["р"]="r", - ["с"]="s", ["ҫ"]="ş", ["т"]="t", ["ҭ"]="ț", ["у"]="u", ["ӱ"]="ü", ["ў"]="ŭ", - ["ф"]="f", ["х"]="x", ["ҳ"]="h̦", ["ц"]="c", ["ч"]="č", ["ҷ"]="c̦", ["ш"]="š", - ["щ"]="ŝ", ["ъ"]="”", ["ы"]="y", ["ь"]="’", ["э"]="è", ["є"]="ê", ["є̈ "]="̈ê", - ["ю"]="û", ["ю̆"]="̆û", ["я"]="â", ["я̆"]="̆â", ["я̈"]="̈â", - ["А"]="A", ["Ӓ"]="Ä", ["Ӑ"]="Ă", ["Б"]="B", ["В"]="W", ["Г"]="G", ["Д"]="D", - ["Е"]="E", ["Ё"]="Ë", ["Ә"]="Ə", ["Ӛ"]="Ə̂", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", - ["І"]="I", ["Й"]="J", ["К"]="K", ["К"]="K", ["Қ"]="K̦", ["Ӄ"]="Ḳ", ["Л"]="L", - ["Ӆ"]="Ł", ["Ԓ"]="Ḷ", ["Љ"]="L`", ["М"]="M", ["Н"]="N", ["Ң"]="N̦", ["Ӈ"]="Ň", - ["Њ"]="N`", ["О"]="O", ["Ӧ"]="Ö", ["Ө"]="Ô", ["Ӫ"]="Ő", ["П"]="P", ["Р"]="R", - ["С"]="S", ["Ҫ"]="Ş", ["Т"]="T", ["Ҭ"]="Ț", ["У"]="U", ["Ӱ"]="Ü", ["Ў"]="Ŭ", - ["Ф"]="F", ["Х"]="X", ["Ҳ"]="H̦", ["Ц"]="C", ["Ч"]="Č", ["Ҷ"]="C̦", ["Ш"]="Š", - ["Щ"]="Ŝ", ["Ъ"]="”", ["Ы"]="Y", ["Ь"]="’", ["Э"]="È", ["Є"]="Ê", ["Є̈ "]="̈Ê", - ["Ю"]="Û", ["Ю̆"]="̆Û", ["Я"]="Â", ["Я̆"]="̆Â", ["Я̈"]="̈Â", -}; - -function export.tr(text) - return (mw.ustring.gsub(text, '.', tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/kdr-translit.lua b/wikt/translit/kdr-translit.lua deleted file mode 100644 index 6753980..0000000 --- a/wikt/translit/kdr-translit.lua +++ /dev/null @@ -1,33 +0,0 @@ --- This module will transliterate Karaim language text per WT:KDR TR. - -local export = {} -local tab = - {["А"] ="A" , ["Б"] ="B", ["В"] ="V", ["Г"] ="G", ["Д"] ="D", ["Е"] ="E", ["Ё"] ="Yo", ["Ж"] ="J" , ["З"] ="Z", ["И"] ="I", ["Й"] ="Y", - - ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["Ҥ"]="Ñ", ["О"]="O", ["Ӧ"]="Ö", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", -["У"]="U", ["Ӱ"]="Ü", ["Ф"]="F", ["Х"]="KH", ["Һ"]="H", ["Ц"]="Ts",["Ч"]="Ć", ["Ш"]="Ś", ["Щ"]="Śç", ["Ъ"]="ʺ", ["Ы"]="İ", -["Ь"]="ʹ", ["Э"]="E", ["Ю"]="Yu", ["Я"]="Ya", -['а']='a', ['б']='b', ['в']='v', ['г']='g', ['д']='d', ['е']='e', ['ё']='yo', ['ж']='j', ['з']='z', ['и']='i', ['й']='y', -['к']='k', ['л']='l', ['м']='m', ['н']='n', ['о']='o', ['ӧ']='ö', ['п']='p', ['р']='r', ['с']='s', ['т']='t', -['у']='u', ['ӱ']='ü', ['ф']='f', ['х']='kh', ['һ']='h', ['ц']='ts', ['ч']='ć', ['ш']='ś', ['щ']='śç',['ъ']='ʺ', ['ы']='ı', -['ь']='ʹ', ['э']='e', ['ю']='yu', ['я']='ya', -} -local mapping = { ['дж']= 'c' , ['Дж']= 'C', ['ң']='ñ' ,['Ң']='Ñ' , - ['къ']='q', ['Къ']='Q' , ['нъ']='ñ',['Нъ']='Ñ' - -} -function export.tr(text, lang, sc) - -- Ё needs converting if is decomposed - text = text:gsub("ё","ё"):gsub("Ё","Ё") - - -- е after a vowel or at the beginning of a text becomes ye - text = mw.ustring.gsub(text, "([АОӨУҮЫЕЯЁЮИЕЪЬаоөуүыэяёюиеъь%A][\204\129\204\128]?)е","%1yö") - text = mw.ustring.gsub(text, "^Е","Yö") - text = mw.ustring.gsub(text, "^е","yö") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е","%1yö") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е","%1yö") - - return (mw.ustring.gsub(text,'.',tab)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/khar-translit.lua b/wikt/translit/khar-translit.lua deleted file mode 100644 index f5b3a20..0000000 --- a/wikt/translit/khar-translit.lua +++ /dev/null @@ -1,79 +0,0 @@ --- Khar scripts, and this module is used by inc-ash-translit module. -local export = {} - -local consonants = { ---consonants - ['𐨐']='k', ['𐨑']='kh', ['𐨒']='g', ['𐨓']='gh', - ['𐨕']='c', ['𐨖']='ch', ['𐨗']='j', ['𐨙']='ñ', - ['𐨚']='ṭ', ['𐨛']='ṭh', ['𐨜']='ḍ', ['𐨝']='ḍh', ['𐨞']='ṇ', - ['𐨟']='t', ['𐨠']='th', ['𐨡']='d', ['𐨢']='dh', ['𐨣']='n', - ['𐨤']='p', ['𐨥']='ph', ['𐨦']='b', ['𐨧']='bh', ['𐨨']='m', - ['𐨩']='y', ['𐨪']='r', ['𐨫']='l', ['𐨬']='v', - ['𐨭']='ś', ['𐨮']='ṣ', ['𐨯']='s', ['𐨰']='z', ['𐨱']='h', - ['𐨲']='ḱ', ['𐨳']='ṭ́h', -} - -local diacritics = { ---matras - ['𐨁']='i', ['𐨂']='u', ['𐨃']='ṛ', - ['𐨅']='e', ['𐨆']='o', - ['𐨁𐨌']='ī', ['𐨂𐨌']='ū', ['𐨃𐨌']='ṝ', - ['𐨅𐨌']='ai', ['𐨆𐨌']='au', ['𐨿']='', -} - -local tt = { - ---vowels - -- anusvara - ['𐨎']='ṃ', --until a better method is found - -- visarga - ['𐨏']='ḥ', - --numerals - ['𐩀']='1', ['𐩁']='2', ['𐩂']='3', ['𐩃']='4', -['𐩄']='10', ['𐩅']='20', ['𐩆']='100', ['𐩇']='1000', - --punctuation - ['𐩖']='.', --danda - ['𐩗']='.' --double danda -} - -function export.tr(text, lang, sc) - if sc ~= "Khar" then - return nil - end - - text = mw.ustring.gsub( - text, - '([𐨐-𐨳])'.. - '([𐨁-𐨆𐨿]?)'.. - '(𐨌?)', - function(c, d, e) - if d == "" and e ~= "" then - return consonants[c] .. 'ā' - elseif d == "" then - return consonants[c] .. 'a' - else - return consonants[c] .. diacritics[d .. e] - end - end) - - text = mw.ustring.gsub( - text, - '([𐨀])'.. - '([𐨁-𐨆𐨿]?)'.. - '(𐨌?)', - function(c, d, e) - if d == "" and e ~= "" then - return 'ā' - elseif d == "" then - return 'a' - else - return diacritics[d .. e] - end - end) - - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/kjh-translit.lua b/wikt/translit/kjh-translit.lua deleted file mode 100644 index 860396b..0000000 --- a/wikt/translit/kjh-translit.lua +++ /dev/null @@ -1,23 +0,0 @@ --- This module will transliterate Khakas language text per WT:KJH TR. --- Language code: kjh - -local export = {} - -local tt = { - ["А"]='A', ["а"]='a', ["Б"]='B', ["б"]='b', ["В"]='V', ["в"]='v', ["Г"]='G', ["г"]='g', - ["Ғ"]='Ğ', ["ғ"]='ğ', ["Д"]='D', ["д"]='d', ["Е"]='E', ["е"]='e', ["Ё"]='Yo', ["ё"]='yo', - ["Ж"]='J', ["ж"]='j', ["З"]='Z', ["з"]='z', ["И"]='İ', ["и"]='i', ["Й"]='Y', ["й"]='y', - ["І"]='Í', ["і"]='í', ["К"]='K', ["к"]='k', ["Л"]='L', ["л"]='l', ["М"]='M', ["м"]='m', - ["Н"]='N', ["н"]='n', ["Ң"]='Ñ', ["ң"]='ñ', ["О"]='O', ["о"]='o', ["Ö"]='Ö', ["ö"]='ö', - ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r', ["С"]='S', ["с"]='s', ["Т"]='T', ["т"]='t', - ["У"]='U', ["у"]='u', ["Ӱ"]='Ü', ["ӱ"]='ü', ["Ф"]='F', ["ф"]='f', ["Х"]='X', ["х"]='x', - ["Ц"]='Ts', ["ц"]='ts', ["Ч"]='Ç', ["ч"]='ç', ["Ӌ"]='C', ["ӌ"]='c', ["Ш"]='Ş', ["ш"]='ş', - ["Щ"]='Şç', ["щ"]='şç', ['Ъ']='ʺ', ['ъ']='ʺ', ["Ы"]='I', ["ы"]='ı', ["Ь"]="’", ["ь"]="’", - ["Э"]='E', ["э"]='e', ["Ю"]='Yu', ["ю"]='yu', ["Я"]='Ya', ["я"]='ya', -}; - -function export.tr(text) - return (mw.ustring.gsub(text, '.', tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/kjj-translit.lua b/wikt/translit/kjj-translit.lua deleted file mode 100644 index 5c2e207..0000000 --- a/wikt/translit/kjj-translit.lua +++ /dev/null @@ -1,95 +0,0 @@ --- This module will transliterate Khinalug language text. --- Language code: kjj - -local export = {} - -local tt = { - ["б"]="b", ["п"]="p", ["в"]="w", ["ф"]="f", - ["д"]="d", ["т"]="t", ["ц"]="c", ["з"]="z", ["с"]="s", ["ч"]="č", ["ж"]="ž", ["ш"]="š", - ["г"]="g", ["к"]="k", ["х"]="χ", ["ъ"]="ʔ", ["м"]="m", ["н"]="n", ["р"]="r", ["л"]="l", - ["й"]="j", ["и"]="i", ["е"]="e", ["э"]="e", ["а"]="a", ["о"]="o", ["у"]="u", ["ы"]="ɨ", - ["Б"]="B", ["П"]="P", ["В"]="W", ["Ф"]="F", - ["Д"]="D", ["Т"]="T", ["Ц"]="C", ["З"]="Z", ["С"]="S", ["Ч"]="Č", ["Ж"]="Ž", ["Ш"]="Š", - ["Г"]="G", ["К"]="K", ["Х"]="Χ", ["Ъ"]="ʔ", ["М"]="M", ["Н"]="N", ["Р"]="R", ["Л"]="L", - ["Й"]="J", ["И"]="I", ["Е"]="E", ["Э"]="E", ["А"]="A", ["О"]="O", ["У"]="U", ["Ы"]="Ɨ"}; - -local tetraTrigraphs = { - ['къкъ'] = 'qː', - ['кхь'] = 'kx', - ['Къкъ'] = 'Qː', - ['Кхь'] = 'Kx', -} - -local digraphs = { - ['пп'] = 'pː', - ['пӏ'] = 'ṗ', - ['вв'] = 'wː', - ['тт'] = 'tː', - ['тӏ'] = 'ṭ', - ['цц'] = 'cː', - ['цӏ'] = 'c̣', - ['дж'] = 'ǯ', - ['чч'] = 'čː', - ['чӏ'] = 'č̣', - ['кк'] = 'kː', - ['кӏ'] = 'ḳ', - ['гг'] = 'ɣ', - ['хь'] = 'x', - ['къ'] = 'qː', - ['хъ'] = 'q', - ['кь'] = 'q̇', - ['гъ'] = 'ʁ', - ['гӏ'] = 'ʕ', - ['хӏ'] = 'ħ', - ['гь'] = 'h', - ['уь'] = 'ü', - ['оь'] = 'ö', - ['аь'] = 'ä', - ['Пп'] = 'Pː', - ['Пӏ'] = 'Ṗ', - ['Вв'] = 'Wː', - ['Тт'] = 'Tː', - ['Тӏ'] = 'Ṭ', - ['Цц'] = 'Cː', - ['Цӏ'] = 'C̣', - ['Дж'] = 'Ǯ', - ['Чч'] = 'Čː', - ['Чӏ'] = 'Č̣', - ['Кк'] = 'Kː', - ['Кӏ'] = 'Ḳ', - ['Гг'] = 'Ɣ', - ['Хь'] = 'X', - ['Къ'] = 'Qː', - ['Хъ'] = 'Q', - ['Кь'] = 'Q̇', - ['Гъ'] = 'ʁ', - ['Гӏ'] = 'ʕ', - ['Хӏ'] = 'Ħ', - ['Гь'] = 'H', - ['Уь'] = 'Ü', - ['Оь'] = 'Ö', - ['Аь'] = 'Ä', -} - -function export.tr(text, lang, sc) - local str_gsub = string.gsub - local UTF8char = '[%z\1-\127\194-\244][\128-\191]*' - - -- Convert uppercase palochka to lowercase. Lowercase is found in tables - -- above. - text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) - - for grapheme, replacement in pairs(tetraTrigraphs) do - text = str_gsub(text, grapheme, replacement) - end - - for digraph, replacement in pairs(digraphs) do - text = str_gsub(text, digraph, replacement) - end - - text = str_gsub(text, UTF8char, tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/kk-translit.lua b/wikt/translit/kk-translit.lua deleted file mode 100644 index 3bbac27..0000000 --- a/wikt/translit/kk-translit.lua +++ /dev/null @@ -1,24 +0,0 @@ --- This module will transliterate Kazakh language text per WT:KK TR. --- Language code: kaz - -local export = {} - -local tt = { - ["ү"]="ü",['Ү']='Ü', ["т"]="t",['Т']='T', ["һ"]="h",['Һ']='H', ["р"]="r",['Р']='R', ["ф"]="f",['Ф']='F', - ["ю"]="yw",['Ю']='Yw', ["ш"]="ş",['Ш']='Ş', ["ь"]="ʹ",['Ь']='ʹ', ["ъ"]="ʺ",['Ъ']='ʺ', ["н"]="n",['Н']='N', - ["п"]="p",['П']='P', ["й"]="y",['Й']='Y', ["л"]="l",['Л']='L', ["з"]="z",['З']='Z', ["е"]="e",['Е']='E', - ["г"]="g",['Г']='G', ["б"]="b",['Б']='B', ["у"]="w",['У']='W', ["с"]="s",['С']='S', ["х"]="x",['Х']='X', - ["ч"]="ç",['Ч']='Ç', ["щ"]="şş",['Щ']='Şş', ["я"]="ya",['Я']='Ya', ["ы"]="ı",['Ы']='I', ["э"]="e",['Э']='E', - ["м"]="m",['М']='M', ["о"]="o",['О']='O', ["и"]="ï",['И']='Ï', ["ё"]="yo",['Ё']='Yo', ["ж"]="j",['Ж']='J', - ["к"]="k",['К']='K', ["д"]="d",['Д']='D', ["в"]="v",['В']='V', ["ц"]="c",['Ц']='C', ["а"]="a",['А']='A', - ["ң"]="ñ",['Ң']='Ñ', ["ә"]="ä",['Ә']='Ä', ["э"]="é",['Э']='É', ["ұ"]="u",['Ұ']='U', ["қ"]="q",['Қ']="Q", - ["ғ"]="ğ",['Ғ']='Ğ', ["і"]="i",['І']='İ', ["ө"]="ö",['Ө']='Ö', ["ӯ"]="u",['Ӯ']='U' -}; - -function export.tr(text, lang, sc) - if sc == "Cyrl" then - return (mw.ustring.gsub(text, '.', tt)) - end -end - -return export \ No newline at end of file diff --git a/wikt/translit/km-translit.lua b/wikt/translit/km-translit.lua deleted file mode 100644 index 214b879..0000000 --- a/wikt/translit/km-translit.lua +++ /dev/null @@ -1,345 +0,0 @@ --- This module will transliterate Khmer language text per WT:KM TR. --- Language code: khm - -local export = {} -local gsub = mw.ustring.gsub -local len = mw.ustring.len -local match = mw.ustring.match -local sub = mw.ustring.sub - -local cons_conv = { - ["ក"] = { "k", "a" }, - ["ខ"] = { "kh", "a" }, - ["គ"] = { "k", "o" }, - ["ឃ"] = { "kh", "o" }, - ["ង"] = { "ng", "o" }, - ["ច"] = { "ch", "a" }, - ["ឆ"] = { "chh", "a" }, - ["ជ"] = { "ch", "o" }, - ["ឈ"] = { "chh", "o" }, - ["ញ"] = { "nh", "o" }, - ["ដ"] = { "d", "a" }, - ["ឋ"] = { "th", "a" }, - ["ឌ"] = { "d", "o" }, - ["ឍ"] = { "th", "o" }, - ["ណ"] = { "n", "a" }, - ["ត"] = { "t", "a" }, - ["ថ"] = { "th", "a" }, - ["ទ"] = { "t", "o" }, - ["ធ"] = { "th", "o" }, - ["ន"] = { "n", "o" }, - ["ប"] = { "b", "a" }, - ["ផ"] = { "ph", "a" }, - ["ព"] = { "p", "o" }, - ["ភ"] = { "ph", "o" }, - ["ម"] = { "m", "o" }, - ["យ"] = { "y", "o" }, - ["រ"] = { "r", "o" }, - ["ល"] = { "l", "o" }, - ["វ"] = { "v", "o" }, - ["ឝ"] = { "sh", "a" }, - ["ឞ"] = { "ss", "o" }, - ["ស"] = { "s", "a" }, - ["ហ"] = { "h", "a" }, - ["ឡ"] = { "l", "a" }, - ["អ"] = { "’", "a" }, - [""] = { "", "" }, - - ["ប៉"] = { "p", "a" }, -} - -local digraph = { - ["ហ្គ"] = "g", ["ហ្ន"] = "n", ["ហ្ម"] = "m", ["ហ្ល"] = "l", ["ហ្វ"] = "f", ["ហ្ស"] = "z", -} - -local indep_vowel = { - ["ឣ"] = "’â", ["ឤ"] = "’a", ["ឥ"] = "’ĕ", ["ឦ"] = "’ei", - ["ឧ"] = "’ŏ", ["ឨ"] = "’ŏk", ["ឩ"] = "’ŭ", ["ឪ"] = "’ŏu", - ["ឫ"] = "rœ̆", ["ឬ"] = "rœ", - ["ឭ"] = "lœ̆", ["ឮ"] = "lœ", - ["ឯ"] = "’é", ["ឰ"] = "’ai", ["ឱ"] = "’aô", ["ឲ"] = "’aô", ["ឳ"] = "’âu", -} - -local vowel_conv = { - [""] = { ["a"] = "â", ["o"] = "ô" }, - ["ា"] = { ["a"] = "a", ["o"] = "éa" }, - ["ិ"] = { ["a"] = "ĕ", ["o"] = "ĭ" }, - ["ី"] = { ["a"] = "ei", ["o"] = "i" }, - ["ឹ"] = { ["a"] = "œ̆", ["o"] = "œ̆" }, - ["ឺ"] = { ["a"] = "œ", ["o"] = "œ" }, - ["ុ"] = { ["a"] = "ŏ", ["o"] = "ŭ" }, - ["ូ"] = { ["a"] = "o", ["o"] = "u" }, - ["ួ"] = { ["a"] = "uŏ", ["o"] = "uŏ" }, - ["ើ"] = { ["a"] = "aeu", ["o"] = "eu" }, - ["ឿ"] = { ["a"] = "eua", ["o"] = "eua" }, - ["ៀ"] = { ["a"] = "iĕ", ["o"] = "iĕ" }, - ["េ"] = { ["a"] = "é", ["o"] = "é" }, - ["ែ"] = { ["a"] = "ê", ["o"] = "ê" }, - ["ៃ"] = { ["a"] = "ai", ["o"] = "ey" }, - ["ោ"] = { ["a"] = "aô", ["o"] = "oŭ" }, - ["ៅ"] = { ["a"] = "au", ["o"] = "ŏu" }, - ["ុំ"] = { ["a"] = "om", ["o"] = "ŭm" }, - ["ំ"] = { ["a"] = "âm", ["o"] = "um" }, - ["ាំ"] = { ["a"] = "ăm", ["o"] = "ŏâm" }, - ["ាំង"] = { ["a"] = "ăng", ["o"] = "eăng" }, - ["ះ"] = { ["a"] = "ăh", ["o"] = "eăh" }, - ["ុះ"] = { ["a"] = "ŏh", ["o"] = "uh" }, - ["េះ"] = { ["a"] = "éh", ["o"] = "éh" }, - ["ោះ"] = { ["a"] = "aŏh", ["o"] = "uŏh" }, - ["ឹះ"] = { ["a"] = "ĕh", ["o"] = "ĭh" }, - ["ិះ"] = { ["a"] = "ĕh", ["o"] = "ĭh" }, - ["ៈ"] = { ["a"] = "a’", ["o"] = "éa’" }, - ["័"] = { ["a"] = 'â', ["o"] = 'ô' }, -} - -local char_type = { - ["ក"] = "consonant", ["ខ"] = "consonant", ["គ"] = "consonant", ["ឃ"] = "consonant", ["ង"] = "consonant", - ["ច"] = "consonant", ["ឆ"] = "consonant", ["ជ"] = "consonant", ["ឈ"] = "consonant", ["ញ"] = "consonant", - ["ដ"] = "consonant", ["ឋ"] = "consonant", ["ឌ"] = "consonant", ["ឍ"] = "consonant", ["ណ"] = "consonant", - ["ត"] = "consonant", ["ថ"] = "consonant", ["ទ"] = "consonant", ["ធ"] = "consonant", ["ន"] = "consonant", - ["ប"] = "consonant", ["ផ"] = "consonant", ["ព"] = "consonant", ["ភ"] = "consonant", ["ម"] = "consonant", - ["យ"] = "consonant", ["រ"] = "consonant", ["ល"] = "consonant", ["វ"] = "consonant", ["ឝ"] = "consonant", - ["ឞ"] = "consonant", ["ស"] = "consonant", ["ហ"] = "consonant", ["ឡ"] = "consonant", ["អ"] = "consonant", - ["ឣ"] = "indep_vowel", ["ឤ"] = "indep_vowel", ["ឥ"] = "indep_vowel", ["ឦ"] = "indep_vowel", ["ឧ"] = "indep_vowel", - ["ឨ"] = "indep_vowel", ["ឩ"] = "indep_vowel", ["ឪ"] = "indep_vowel", ["ឫ"] = "indep_vowel", ["ឬ"] = "indep_vowel", - ["ឭ"] = "indep_vowel", ["ឮ"] = "indep_vowel", ["ឯ"] = "indep_vowel", ["ឰ"] = "indep_vowel", ["ឱ"] = "indep_vowel", - ["ឲ"] = "indep_vowel", ["ឳ"] = "indep_vowel", - ["ា"] = "vowel_sign", ["ិ"] = "vowel_sign", ["ី"] = "vowel_sign", ["ឹ"] = "vowel_sign", ["ឺ"] = "vowel_sign", - ["ុ"] = "vowel_sign", ["ូ"] = "vowel_sign", ["ួ"] = "vowel_sign", ["ើ"] = "vowel_sign", ["ឿ"] = "vowel_sign", - ["ៀ"] = "vowel_sign", ["េ"] = "vowel_sign", ["ែ"] = "vowel_sign", - ["ៃ"] = "terminating_vowel", - ["ោ"] = "vowel_sign", ["ៅ"] = "vowel_sign", - ["ំ"] = "terminating_vowel", ["ះ"] = "terminating_vowel", ["ៈ"] = "terminating_vowel", - ["៉"] = "consonant_shift", ["៊"] = "consonant_shift", - ["់"] = "terminating_sign", - ["៌"] = "sign", ["៍"] = "sign", ["៎"] = "sign", ["៏"] = "sign", ["័"] = "sign", ["៑"] = "sign", - ["្"] = "combining_sign", - ["៓"] = "sign", - ["។"] = "punctuation", ["៕"] = "punctuation", - ["៖"] = "sign", - ["ៗ"] = "punctuation", ["៘"] = "punctuation", ["៙"] = "punctuation", ["៚"] = "punctuation", ["៛"] = "punctuation", - ["ៜ"] = "sign", ["៝"] = "sign", - ["​"] = "ZWS", -} - -local sp_symbols = { - ["០"] = "0", ["១"] = "1", ["២"] = "2", ["៣"] = "3", ["៤"] = "4", - ["៥"] = "5", ["៦"] = "6", ["៧"] = "7", ["៨"] = "8", ["៩"] = "9", - ["៰"] = "0", ["៱"] = "1", ["៲"] = "2", ["៳"] = "3", ["៴"] = "4", - ["៵"] = "5", ["៶"] = "6", ["៷"] = "7", ["៸"] = "8", ["៹"] = "9", -} - -function export.tr(text, lang, sc, debug_mode) - text = gsub(text, '[០-៹]', sp_symbols) - text = gsub(text, '(.)្(.្.)', '%1​%2') - text = gsub(text, '([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]្[កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])', '​%1%2') - text = gsub(text, '([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]្?[កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])', '%1​%2') - text = gsub(text, '(.៍)', '​%1') - - for word in mw.ustring.gmatch(text, '[ក-៝​]+') do - local original_text = word - local c, chartype, syl, curr_syl = {}, {}, {}, {} - local progress = 'none' - - for i = 1, len(word) do - c[i] = sub(word, i, i) - chartype[i] = char_type[c[i]] - end - - for i = 1, #c + 1 do - local next_types = {} - if i == #c + 1 or chartype[i] == 'ZWS' then - progress = 'none' - table.insert(syl, table.concat(curr_syl, "")) - curr_syl = {} - elseif progress == 'none' then - if chartype[i] == 'consonant' then - table.insert(curr_syl, c[i]) - progress = 'initial' - else - table.insert(syl, c[i]) - end - elseif progress == 'initial' then - if chartype[i] == 'combining_sign' then - table.insert(curr_syl, c[i]) - progress = 'initial_combining' - elseif chartype[i] == 'sign' or chartype[i] == 'consonant_shift' then - table.insert(curr_syl, c[i]) - elseif chartype[i] == 'vowel_sign' then - table.insert(curr_syl, c[i]) - progress = 'vowel' - elseif chartype[i] == 'terminating_vowel' then - if c[i-1] .. c[i] .. (c[i+1] or '') == 'ាំង' and (i == #c - 1 or (i > #c + 1 and chartype[i+2] == 'consonant')) then - table.insert(curr_syl, c[i]) - progress = 'vowel' - else - table.insert(curr_syl, c[i]) - table.insert(syl, table.concat(curr_syl, "")) - curr_syl = {} - progress = 'none' - end - elseif chartype[i] == 'consonant' then - vowel_found = false - local j, skipped = i, 0 - while not vowel_found do - if not chartype[j] or chartype[j] == 'punctuation' or chartype[j] == 'indep_vowel' or chartype[j] == 'terminating_sign' or chartype[j] == 'ZWS' then - skipped = 1 - break - elseif chartype[j] == 'consonant' or chartype[j] == 'combining_sign' or (chartype[j] == 'sign' and c[j] ~= '័') then - table.insert(next_types, chartype[j]) - else - vowel_found = true - end - j = j + 1 - end - if skipped ~= 0 or match(table.concat(next_types, " "), 'consonant s?i?g?n? ?consonant') then - table.insert(curr_syl, c[i]) - progress = 'coda' - else - table.insert(syl, table.concat(curr_syl, "")) - curr_syl = {c[i]} - progress = 'initial' - end - else - table.insert(syl, c[i]) - progress = 'none' - end - elseif progress == 'initial_combining' then - if chartype[i] == 'consonant' then - table.insert(curr_syl, c[i]) - progress = 'initial' - else - table.insert(syl, c[i]) - progress = 'none' - end - elseif progress == 'vowel' then - if chartype[i] == 'vowel_sign' then - table.insert(curr_syl, c[i]) - elseif chartype[i] == 'terminating_vowel' then - if c[i-1] .. c[i] .. (c[i+1] or '') == 'ាំង' and (i == #c - 1 or (i > #c + 1 and chartype[i+2] == 'consonant')) then - table.insert(curr_syl, c[i]) - progress = 'vowel' - else - table.insert(curr_syl, c[i]) - table.insert(syl, table.concat(curr_syl, "")) - curr_syl = {} - progress = 'none' - end - elseif chartype[i] == 'consonant' then - vowel_found = false - local j, skipped = i, 0 - while not vowel_found do - if not chartype[j] or chartype[j] == 'punctuation' or chartype[j] == 'indep_vowel' or chartype[j] == 'terminating_sign' or chartype[j] == 'ZWS' then - skipped = 1 - break - elseif chartype[j] == 'consonant' or chartype[j] == 'combining_sign' or (chartype[j] == 'sign' and c[j] ~= '័') then - table.insert(next_types, chartype[j]) - else - vowel_found = true - end - j = j + 1 - end - if skipped ~= 0 or match(table.concat(next_types, " "), 'consonant s?i?g?n? ?consonant') then - table.insert(curr_syl, c[i]) - progress = 'coda' - else - table.insert(syl, table.concat(curr_syl, "")) - curr_syl = {c[i]} - progress = 'initial' - end - else - table.insert(syl, c[i]) - progress = 'none' - end - elseif progress == 'coda' then - if chartype[i] == 'combining_sign' then - table.insert(curr_syl, c[i]) - progress = 'coda_combining' - elseif chartype[i] == 'sign' or chartype[i] == 'terminating_sign' then - table.insert(curr_syl, c[i]) - else - table.insert(syl, table.concat(curr_syl, "")) - curr_syl = {} - if chartype[i] == 'consonant' then - table.insert(curr_syl, c[i]) - progress = 'initial' - else - table.insert(syl, c[i]) - progress = 'none' - end - end - elseif progress == 'coda_combining' then - if chartype[i] == 'consonant' then - table.insert(curr_syl, c[i]) - progress = 'coda' - else - table.insert(syl, table.concat(curr_syl, "")) - curr_syl = {} - progress = 'none' - end - end - end - - for i = 1, #syl do - if match(syl[i], '៍') then - syl[i] = '' .. gsub(syl[i], '.', function(consonant) - if cons_conv[consonant] then - return cons_conv[consonant][1] - end end) .. '' - break - end - syl[i] = gsub(syl[i], '់$', '') - - syl[i] = gsub(syl[i], '^([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])្?([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]?)([៉៊]?)([ិីឹឺុូួើឿៀេែៃោៅា័]?[ំះៈ]?)([៉៊]?)([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]?៉?)្?([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]?)(៖?)$', function(initial_a, initial_b, cons_shifter_a, vowel, cons_shifter_b, coda_a, coda_b, optional_sign) - if cons_shifter_a .. cons_shifter_b .. vowel .. coda_a .. coda_b == '' and initial_b ~= '' and not match(syl[i], '្') then - coda_a = initial_b - initial_b = '' - end - base = initial_a - if initial_b ~= '' and not match(initial_b, '[ងញនមយរលវ]') then - base = initial_b - end - if vowel .. coda_a .. coda_b == 'ាំង' then - vowel, coda_a, coda_b = 'ាំង', '', '' - end - optional_sign = gsub(optional_sign, '៖', 'ː') - - cons_shifter = cons_shifter_a .. cons_shifter_b - if cons_shifter == '' and cons_conv[base] then - vowel_class = cons_conv[base][2] - elseif cons_shifter == '៉' then - vowel_class = 'a' - elseif cons_shifter == '៊' then - vowel_class = 'o' - else - return initial_a .. initial_b .. cons_shifter .. vowel .. coda_a .. coda_b .. optional_sign - end - - if digraph[initial_a .. '្' .. initial_b] and (digraph[coda_a .. '្' .. coda_b] or (cons_conv[coda_a] and cons_conv[coda_b])) and vowel_conv[vowel] then - return digraph[initial_a .. '្' .. initial_b] .. vowel_conv[vowel][vowel_class] .. (digraph[coda_a .. '្' .. coda_b] or cons_conv[coda_a][1] .. cons_conv[coda_b][1]) .. optional_sign - - elseif cons_conv[initial_a] and cons_conv[initial_b] and vowel_conv[vowel] and cons_conv[coda_a] and cons_conv[coda_b] then - return cons_conv[initial_a][1] .. cons_conv[initial_b][1] .. vowel_conv[vowel][vowel_class] .. cons_conv[coda_a][1] .. cons_conv[coda_b][1] .. optional_sign - end end) - - if syl[i] == 'ៗ' and i > 1 then - syl[i] = syl[i-1] - end - end - word = table.concat(syl, "") - text = gsub(text, original_text, word) - end - - text = gsub(text, '.', indep_vowel) - text = gsub(text, '([^ ]*) ៗ', '%1 %1') - - if match(text, '[ក-៹]') and not debug_mode then - return nil - else - return text - end - - -- To do: other signs -end - -return export \ No newline at end of file diff --git a/wikt/translit/kmr-translit.lua b/wikt/translit/kmr-translit.lua deleted file mode 100644 index 06ec01a..0000000 --- a/wikt/translit/kmr-translit.lua +++ /dev/null @@ -1,41 +0,0 @@ --- This module will transliterate Northern Kurdish language text. - -local export = {} - -local tt = { - ["а"]="a", ["б"]="b", ["щ"]="c", ["ч"]="ç", ["д"]="d", ["ә"]="e", - ["е"]="ê", ["э"]="ê", ["ф"]="f", ["г"]="g", ["h"]="h", ["ь"]="i", - ["и"]="î", ["ж"]="j", ["к"]="k", ["л"]="l", ["м"]="m", ["н"]="n", - ["о"]="o", ["п"]="p", ["ԛ"]="q", ["р"]="r", ["с"]="s", ["ш"]="ş", - ["т"]="t", ["ӧ"]="u", ["у"]="û", ["в"]="v", ["ԝ"]="w", ["х"]="x", - ["й"]="j", ["з"]="z", - ["А"]="A", ["Б"]="B", ["Щ"]="C", ["Ч"]="Ç", ["Д"]="D", ["Ә"]="E", - ["Е"]="Ê", ["Э"]="Ê", ["Ф"]="F", ["Г"]="G", ["H"]="H", ["Ь"]="I", - ["И"]="Î", ["Ж"]="J", ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", - ["О"]="O", ["П"]="P", ["Ԛ"]="Q", ["Р"]="R", ["С"]="S", ["Ш"]="Ş", - ["Т"]="T", ["Ӧ"]="U", ["У"]="Û", ["В"]="V", ["ԝ"]="W", ["Х"]="X", - ["Й"]="J", ["З"]="Z" - - }; - -function export.tr(text, lang, sc) - if sc == "Latn" then - return nil - elseif sc == "ku-Arab" then - return nil - elseif sc == "Arab" then - return nil - elseif sc == "Armn" then - return require("Module:Armn-translit").tr(text, lang, sc) - else - text = mw.ustring.gsub(text, 'г’', 'x’') - text = mw.ustring.gsub(text, 'Г’', 'X’') - - text = mw.ustring.gsub(text, '.', tt) - - return text - end - -end - -return export \ No newline at end of file diff --git a/wikt/translit/kn-translit.lua b/wikt/translit/kn-translit.lua deleted file mode 100644 index e5fceda..0000000 --- a/wikt/translit/kn-translit.lua +++ /dev/null @@ -1,62 +0,0 @@ -local export = {} - -local consonants = { - ['ಕ']='k', ['ಖ']='kh', ['ಗ']='g', ['ಘ']='gh', ['ಙ']='ṅ', - ['ಚ']='c', ['ಛ']='ch', ['ಜ']='j', ['ಝ']='jh', ['ಞ']='ñ', - ['ಟ']='ṭ', ['ಠ']='ṭh', ['ಡ']='ḍ', ['ಢ']='ḍh', ['ಣ']='ṇ', - ['ತ']='t', ['ಥ']='th', ['ದ']='d', ['ಧ']='dh', ['ನ']='n', - ['ಪ']='p', ['ಫ']='ph', ['ಬ']='b', ['ಭ']='bh', ['ಮ']='m', - ['ಯ']='y', ['ರ']='r', ['ಱ']='ṟ', ['ಲ']='l', ['ವ']='v', ['ಶ']='ś', - ['ಷ']='ṣ', ['ಸ']='s', ['ಹ']='h', ['ಳ']='ḷ', ['ೞ']='ḻ', - ['ಫ಼']='f', ['ಜ಼']='z', ['ಳ಼']='zh', -} - -local diacritics = { - ['ಾ']= 'ā' , ['ಿ']='i' , ['ೀ']='ī' , ['ು']='u' , ['ೂ']='ū' , ['ೃ']='ru' , ['ೄ']='rū' , - ['ೆ']='e' , ['ೇ']='ē' , ['ೈ']='ai' , ['ೊ']='o' , ['ೋ']='ō' , ['ೌ']='au' -} - -local nonconsonants = { - -- vowels - ['ಅ']='a' , ['ಆ']='ā' , ['ಇ']='i' , ['ಈ']='ī' , ['ಉ']='u' , ['ಊ']='ū' , - ['ಋ']='ru' , ['ೠ']='rū' , ['ಌ']='l̥' , ['ೡ']='l̥̄', ['ಎ']='e' , ['ಏ']='ē' , - ['ಐ']='ai' , ['ಒ']='o' , ['ಓ']='ō' , ['ಔ']='au' , ['ಅಂ']='aṃ' , ['ಅಃ']='ah' , - -- other symbols - ['ಂ']='ṃ', -- anusvara - ['ಃ']='ḥ', -- visarga - --halant, supresses the inherent vowel "a" - ['್']='', - -- digits - ['೦'] = '0', ['೧'] = '1', ['೨'] = '2', ['೩'] = '3', ['೪'] = '4', - ['೫'] = '5', ['೬'] = '6', ['೭'] = '7', ['೮'] = '8', ['೯'] = '9', -} - --- translit any words or phrases -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - '([ಕಖಗಘಙಚಛಜಝಞಟಠಡಢಣತಥದಧನಪಫಬಭಮಯರಱಲವಶಷಸಹಳೞಕಖ][಼]?)'.. - '([ಾಿೀುೂೃೆೇೈೊೋೌ್]?)', - function(c, d) - -- mw.log('match', c, d) - c = consonants[c] or c - if d == "" then - return c .. 'a' - else - return c .. (diacritics[d] or d) - end - end) - - text = mw.ustring.gsub(text, '.', nonconsonants) - - -- anusvara - text = mw.ustring.gsub(text, 'ṃ([kgṅ])', 'ṅ%1') - text = mw.ustring.gsub(text, 'ṃ([cjñ])', 'ñ%1') - text = mw.ustring.gsub(text, 'ṃ([ṭḍṇ])', 'ṇ%1') - text = mw.ustring.gsub(text, 'ṃ([tdn])', 'n%1') - text = mw.ustring.gsub(text, 'ṃ([pbm])', 'm%1') - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/krc-translit.lua b/wikt/translit/krc-translit.lua deleted file mode 100644 index 8959302..0000000 --- a/wikt/translit/krc-translit.lua +++ /dev/null @@ -1,30 +0,0 @@ --- This module will transliterate Karachay-Balkar language text per WT:KRC TR. - -local export = {} -local tab = - {["А"] ="A" , ["Б"] ="B", ["В"] ="V", ["Г"] ="G", ["Д"] ="D", ["Е"] ="E", ["Ё"] ="Yo", ["Ж"] ="C" , ["З"] ="Z", ["И"] ="I", ["Й"] ="Y", - - ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["Ҥ"]="Ñ", ["О"]="O", ["Ө"]="Ö", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", -["У"]="U", ["Ү"]="Ü", ["Ф"]="F", ["Х"]="KH", ["Һ"]="H", ["Ц"]="Ts",["Ч"]="Ć", ["Ш"]="Ś", ["Щ"]="Śç", ["Ъ"]="ʺ", ["Ы"]="İ", -["Ь"]="ʹ", ["Э"]="E", ["Ю"]="Yu", ["Я"]="Ya", -['а']='a', ['б']='b', ['в']='v', ['г']='g', ['д']='d', ['е']='e', ['ё']='yo', ['ж']='c', ['з']='z', ['и']='i', ['й']='y', -['к']='k', ['л']='l', ['м']='m', ['н']='n', ['о']='o', ['ө']='ö', ['п']='p', ['р']='r', ['с']='s', ['т']='t', -['у']='u', ['ү']='ü', ['ф']='f', ['х']='kh', ['һ']='h', ['ц']='ts', ['ч']='ć', ['ш']='ś', ['щ']='śç',['ъ']='ʺ', ['ы']='ı', -['ь']='ʹ', ['э']='e', ['ю']='yu', ['я']='ya', -} -local mapping = { ['дж']= 'ǵ' , ['Дж']= 'Ĝ', ['ң']='ñ' ,['Ң']='Ñ' , - ['къ']='q', ['Къ']='Q' , ['нъ']='ñ',['Нъ']='Ñ' - -} -function export.tr(text, lang, sc) - -- е after a vowel or at the beginning of a text becomes ye - text = mw.ustring.gsub(text, "([АОӨУҮЫЕЯЁЮИЕЪЬаоөуүыэяёюиеъь%A][\204\129\204\128]?)е","%1yö") - text = mw.ustring.gsub(text, "^Е","Yö") - text = mw.ustring.gsub(text, "^е","yö") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е","%1yö") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е","%1yö") - - return (mw.ustring.gsub(text,'.',tab)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/ks-translit.lua b/wikt/translit/ks-translit.lua deleted file mode 100644 index 0381dbb..0000000 --- a/wikt/translit/ks-translit.lua +++ /dev/null @@ -1,83 +0,0 @@ --- This module will transliterate Kashmiri language text. --- Language code: kas - -local export = {} - -local consonants = { - ['क']='k', ['ख']='kh', ['ग']='g', ['ङ']='ṅ', - ['च']='c', ['छ']='ch', ['ज']='j', ['ञ']='ñ', - ['ट']='ṭ', ['ठ']='ṭh', ['ड']='ḍ', ['ण']='ṇ', - ['त']='t', ['थ']='th', ['द']='d', ['न']='n', - ['प']='p', ['फ']='ph', ['ब']='b', ['म']='m', - ['य']='y', ['र']='r', ['ल']='l', ['व']='w', - ['श']='ś', ['ष']='ṣ', ['स']='s', ['ह']='h', - ['च़']='ts', ['छ़']='tsh', ['ज़']='z', -} - -local diacritics = { - ['ा']='ā', ['ॅ']='ə', ['ॉ']='ə̄', ['ॖ']='ü', ['ॗ']='ǖ', ['ि']='i', ['ी']='ī', ['ु']='u', ['ू']='ū', ['ृ']='ṛ', - ['ॆ']='e', ['े']='ē', ['ै']='ai', ['ॊ']='o', ['ो']='ō', ['ॏ']='ô', ['ौ']='au', ['्']='', -} - -local tt = { - -- vowels - ['अ']='a', ['आ']='ā', ['ॲ']='ə', ['ऑ']='ə̄', ['ॶ']='ü', ['ॷ']='ǖ', ['इ']='i', ['ई']='ī', ['उ']='u', ['ऊ']='ū', ['ऋ']='ṛ', - ['ऎ']='e', ['ए']='ē', ['ऐ']='ai', ['ऒ']='o', ['ओ']='ō', ['ॵ']='ô', ['औ']='au', - -- chandrabindu - ['ँ']='m̐', --until a better method is found - -- anusvara - ['ं']='ṃ', --until a better method is found - -- visarga - ['ः']='ḥ', - -- avagraha - ['ऽ']='’', - --numerals - ['०']='0', ['१']='1', ['२']='2', ['३']='3', ['४']='4', ['५']='5', ['६']='6', ['७']='7', ['८']='8', ['९']='9', - --punctuation - ['।']='.', --danda -} - -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - '([कखगघङचछजझञटठडढणतथदधनपफबभमयरलळवशषसह]़?)'.. --tbd later - '([ािीुूृॄॢॣेैोौ्ॆॉॊौॏ]?)', - function(c, d) - if d == "" then - return consonants[c] .. 'a' - else - return consonants[c] .. diacritics[d] - end - end) - - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export - --- 2 tests failed. (refresh) - --- test_translit_ks: --- Text Expected Actual Differs at --- Passed च़ tsa tsa --- Failed कॉशुर koshur kə̄śura 2 --- Failed जहाज़ jahāz jahāza 6 --- Passed बादाम् bādām bādām --- -- Unit tests for [[Module:ks-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local ks_translit = require('Module:ks-translit') - --- function tests:do_test_translit(deva, roman, xlit) --- self:equals('[[' .. mw.ustring.gsub(deva, "%+", "") .. '#Kashmiri|' .. mw.ustring.gsub(deva, "%+", "") .. ']]', ks_translit.tr(deva, 'ks', 'Deva'), roman) --- end - --- function tests:test_translit_ks() --- self:do_test_translit('च़','tsa') --- self:do_test_translit('कॉशुर','koshur') --- self:do_test_translit('जहाज़','jahāz') --- self:do_test_translit('बादाम्','bādām') --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/kum-translit.lua b/wikt/translit/kum-translit.lua deleted file mode 100644 index 3c99fbe..0000000 --- a/wikt/translit/kum-translit.lua +++ /dev/null @@ -1,36 +0,0 @@ --- This module will transliterate Kumyk language text per WT:KUM TR. - -local export = {} - -local tt = { - ['т']='t',['Т']='T', ['р']='r',['Р']='R', ['ф']='f',['Ф']='F', - ['ю']='yu',['Ю']='Yu', ['ш']='š',['Ш']='Š', ['ь']='ʹ',['Ь']='ʹ', ['ъ']='ʺ',['Ъ']='ʺ', ['н']='n',['Н']='N', - ['п']='p',['П']='P', ['й']='y',['Й']='Y', ['л']='l',['Л']='L', ['з']='z',['З']='Z', ['е']='e',['Е']='E', - ['г']='g',['Г']='G', ['б']='b',['Б']='B', ['у']='u',['У']='U', ['с']='s',['С']='S', ['х']='h',['Х']='H', - ['ч']='č',['Ч']='Č', ['щ']='šč',['Щ']='Šč', ['я']='ya',['Я']='Ya', ['ы']='ı',['Ы']='I', ['э']='e',['Э']='E', - ['м']='m',['М']='M', ['о']='o',['О']='O', ['и']='i',['И']='I', ['ё']='yo',['Ё']='Yo', ['ж']='j',['Ж']='J', - ['к']='k',['К']='K', ['д']='d',['Д']='D', ['в']='v',['В']='V', ['ц']='ts',['Ц']='Ts', ['а']='a',['А']='A' -}; -local mapping = { ['гь']= 'h' , ['Гь']= 'H', ['гъ']='ğ' ,['Гъ']='Ğ' , ['уь']='ü',['Уь']='Ü', - ['къ']='q', ['Къ']='Q' , ['нг']='ñ',['Нг']='Ñ' , ['оь']='ö',['Оь']='Ö' - -} -function export.tr(text, lang, sc) - for digraph, replacement in pairs(mapping) do - text = mw.ustring.gsub(text, digraph, replacement) - end - - text = mw.ustring.gsub( - text, - '([АОӨӘУЫЕЯЁЮИЕаоөәуыэяёюиеъь%A][́̀]?)([Ее])', - function(a, e) - return a .. ( e == 'е' and 'ye' or 'Ye' ) - end - ) - :gsub('^Е','Ye') - :gsub('^е','ye'); - - return (mw.ustring.gsub(text, '.', tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/kv-translit.lua b/wikt/translit/kv-translit.lua deleted file mode 100644 index 340a1cd..0000000 --- a/wikt/translit/kv-translit.lua +++ /dev/null @@ -1,29 +0,0 @@ --- This module will transliterate Komi-Zyrian language text per WT:KPV TR. --- It is used to transliterate Komi-Permyak (koi) and Komi-Zyrian (kpv). - -local export = {} - -local tab = { - ["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="E", ["Ё"]="Jo", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["І"]="Ï", ["Й"]="J", - ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["О"]="O", ["Ӧ"]="Ö", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="U", ["Ф"]="F", - ["Х"]="X", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="ʺ", ["Ы"]="Y", ["Ь"]="ʹ", ["Э"]="E", ["Ю"]="Ju", ["Я"]="Ja", - ['а']='a', ['б']='b', ['в']='v', ['г']='g', ['д']='d', ['е']='e', ['ё']='jo', ['ж']='ž', ['з']='z', ['и']='i', ['і']='ï', ['й']='j', - ['к']='k', ['л']='l', ['м']='m', ['н']='n', ['о']='o', ['ӧ']='ö', ['п']='p', ['р']='r', ['с']='s', ['т']='t', ['у']='u', ['ф']='f', - ['х']='x', ['ц']='c', ['ч']='č', ['ш']='š', ['щ']='šč', ['ъ']='ʺ', ['ы']='y', ['ь']='ʹ', ['э']='e', ['ю']='ju', ['я']='ja', -} - -function export.tr(text, lang, sc) - -- Ё needs converting if is decomposed - text = text:gsub("ё","ё"):gsub("Ё","Ё") - - -- е after a vowel or at the beginning of a word becomes je - text = mw.ustring.gsub(text, "([АОÖУЫЕЯЁЮИIЕЪЬаоöуыэяёюиiеъь%A][\204\129\204\128]?)е","%1je") - text = mw.ustring.gsub(text, "^Е","Je") - text = mw.ustring.gsub(text, "^е","je") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е","%1Je") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е","%1je") - - return (mw.ustring.gsub(text,'.',tab)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/ky-translit.lua b/wikt/translit/ky-translit.lua deleted file mode 100644 index 7f41be5..0000000 --- a/wikt/translit/ky-translit.lua +++ /dev/null @@ -1,30 +0,0 @@ --- This module will transliterate Kyrgyz language text per WT:KY TR. - -local export = {} - -local tt = { - ["ү"]="ü",['Ү']='Ü', ["т"]="t",['Т']='T', ["р"]="r",['Р']='R', ["ф"]="f",['Ф']='F', ["ө"]="ö",['Ө']='Ö', - ["ю"]="yu",['Ю']='Yu', ["ш"]="ş",['Ш']='Ş', ["ь"]="ʹ",['Ь']='ʹ', ["ъ"]="ʺ",['Ъ']='ʺ', ["н"]="n",['Н']='N', - ["п"]="p",['П']='P', ["й"]="y",['Й']='Y', ["л"]="l",['Л']='L', ["з"]="z",['З']='Z', ["е"]="e",['Е']='E', - ["г"]="g",['Г']='G', ["б"]="b",['Б']='B', ["у"]="u",['У']='U', ["с"]="s",['С']='S', ["х"]="h",['Х']='H', - ["ч"]="ç",['Ч']='Ç', ["щ"]="şç",['Щ']='Şç', ["я"]="ya",['Я']='Ya', ["ы"]="ı",['Ы']='I', ["э"]="e",['Э']='E', - ["м"]="m",['М']='M', ["о"]="o",['О']='O', ["и"]="i",['И']='İ', ["ё"]="yo",['Ё']='Yo', ["ж"]="c",['Ж']='C', - ["к"]="k",['К']='K', ["д"]="d",['Д']='D', ["в"]="v",['В']='V', ["ц"]="ts",['Ц']='Ts', ["а"]="a",['А']='A', - ["ң"]="ñ",['Ң']='Ñ' -}; - -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - "([АОӨӘУЫЕЯЁЮИЕаоөәуыэяёюиеъь%A][́̀]?)([Ее])", - function(a,e) - return a .. ( e == 'е' and 'ye' or 'Ye' ) - end - ) - :gsub("^Е",'Ye') - :gsub("^е",'ye'); - - return (mw.ustring.gsub(text, '.', tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/languages-json.lua b/wikt/translit/languages-json.lua deleted file mode 100644 index 1428536..0000000 --- a/wikt/translit/languages-json.lua +++ /dev/null @@ -1,11 +0,0 @@ -local export = {} - -function export.getByCode(frame) - local args = frame.args - local langcode = args[1] or error("Language code has not been specified. Please pass parameter 1 to the module invocation.") - local lang = require("Module:languages").getByCode(langcode) or error("Language code not found") - - return lang:toJSON() -end - -return export \ No newline at end of file diff --git a/wikt/translit/languages.lua b/wikt/translit/languages.lua deleted file mode 100644 index 242492a..0000000 --- a/wikt/translit/languages.lua +++ /dev/null @@ -1,439 +0,0 @@ -local export = {} - -local find = mw.ustring.find - ---[=[ This function checks for things that could plausibly be a language code: - two or three lowercase letters, two or three groups of three lowercase - letters with hyphens between them. If such a pattern is not found, - it is likely the editor simply forgot to enter a language code. ]=] - -function export.err(langCode, param, text) - local ordinals = { "first", "second", "third", "fourth" } - - local paramType = type(param) - if paramType == "number" then - ordinal = ordinals[param] - param = ordinal .. ' parameter' - elseif paramType == "string" then - param = 'parameter "' .. param .. '"' - else - error("The parameter name is " - .. (paramType == "table" and "a table" or tostring(param)) - .. ", but it should be a number or a string.") - end - - --[[ Can't use "%l" because that would include all Unicode - lowercase letters; language codes only use ASCII. ]] - local lower = "[a-z]" - - if not langCode or langCode == "" then - error("The " .. param .. " (" .. (text or "language code") .. ") is missing.", 2) - elseif find(langCode, "^" .. lower .. lower .. lower .. "?$") - or find(langCode, "^" .. lower .. lower .. lower - .. "%-" .. lower .. lower .. lower .. "$") - or find(langCode, "^" .. lower .. lower .. lower - .. "%-" .. lower .. lower .. lower - .. "%-" .. lower .. lower .. lower .. "$") then - error("The language code \"" .. langCode .. "\" is not valid.", 2) - else - error("Please enter a " .. (text or "language code") .. " in the " .. param .. ".", 2) - end -end - -local Language = {} - -function Language:getCode() - return self._code -end - - -function Language:getCanonicalName() - return self._rawData[1] or self._rawData.canonicalName -end - - -function Language:getOtherNames() - return self._rawData.otherNames or {} -end - - -function Language:getType() - return self._rawData.type or "regular" -end - - -function Language:getWikimediaLanguages() - if not self._wikimediaLanguageObjects then - local m_wikimedia_languages = require("Module:wikimedia languages") - self._wikimediaLanguageObjects = {} - local wikimedia_codes = self._rawData.wikimedia_codes or { self._code } - - for _, wlangcode in ipairs(wikimedia_codes) do - table.insert(self._wikimediaLanguageObjects, m_wikimedia_languages.getByCode(wlangcode)) - end - end - - return self._wikimediaLanguageObjects -end - -function Language:getWikipediaArticle() - if self._rawData.wikipedia_article then - return self._rawData.wikipedia_article - elseif self._wikipedia_article then - return self._wikipedia_article - elseif self:getWikidataItem() and mw.wikibase then - self._wikipedia_article = mw.wikibase.sitelink(self:getWikidataItem(), 'enwiki') - end - if not self._wikipedia_article then - self._wikipedia_article = mw.ustring.gsub(self:getCategoryName(), "Creole language", "Creole") - end - return self._wikipedia_article -end - -function Language:makeWikipediaLink() - return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]" -end - -function Language:getWikidataItem() - return self._rawData[2] or self._rawData.wikidata_item -end - -function Language:getScripts() - if not self._scriptObjects then - local m_scripts = require("Module:scripts") - self._scriptObjects = {} - - for _, sc in ipairs(self._rawData.scripts or { "None" }) do - table.insert(self._scriptObjects, m_scripts.getByCode(sc)) - end - end - - return self._scriptObjects -end - -function Language:getScriptCodes() - return self._rawData.scripts or { "None" } -end - -function Language:getFamily() - if self._familyObject then - return self._familyObject - end - - local family = self._rawData[3] or self._rawData.family - if family then - self._familyObject = require("Module:families").getByCode(family) - end - - return self._familyObject -end - - -function Language:getAncestors() - if not self._ancestorObjects then - self._ancestorObjects = {} - - if self._rawData.ancestors then - for _, ancestor in ipairs(self._rawData.ancestors) do - table.insert(self._ancestorObjects, export.getByCode(ancestor) or require("Module:etymology languages").getByCode(ancestor)) - end - else - local fam = self:getFamily() - local protoLang = fam and fam:getProtoLanguage() or nil - - -- For the case where the current language is the proto-language - -- of its family, we need to step up a level higher right from the start. - if protoLang and protoLang:getCode() == self:getCode() then - fam = fam:getFamily() - protoLang = fam and fam:getProtoLanguage() or nil - end - - while not protoLang and not (not fam or fam:getCode() == "qfa-not") do - fam = fam:getFamily() - protoLang = fam and fam:getProtoLanguage() or nil - end - - table.insert(self._ancestorObjects, protoLang) - end - end - - return self._ancestorObjects -end - -local function iterateOverAncestorTree(node, func) - for _, ancestor in ipairs(node:getAncestors()) do - if ancestor then - local ret = func(ancestor) or iterateOverAncestorTree(ancestor, func) - if ret then - return ret - end - end - end -end - -function Language:getAncestorChain() - if not self._ancestorChain then - self._ancestorChain = {} - local step = #self:getAncestors() == 1 and self:getAncestors()[1] or nil - - while step do - table.insert(self._ancestorChain, 1, step) - step = #step:getAncestors() == 1 and step:getAncestors()[1] or nil - end - end - - return self._ancestorChain -end - - -function Language:hasAncestor(otherlang) - local function compare(ancestor) - return ancestor:getCode() == otherlang:getCode() - end - - return iterateOverAncestorTree(self, compare) or false -end - - -function Language:getCategoryName() - local name = self:getCanonicalName() - - -- If the name already has "language" in it, don't add it. - if name:find("[Ll]anguage$") then - return name - else - return name .. " language" - end -end - - -function Language:getStandardCharacters() - return self._rawData.standardChars -end - - -function Language:makeEntryName(text) - text = mw.ustring.gsub(text, "^[¿¡]", "") - text = mw.ustring.gsub(text, "(.)[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]$", "%1") - - if self:getCode() == "ar" then - local U = mw.ustring.char - local taTwiil = U(0x640) - local waSla = U(0x671) - -- diacritics ordinarily removed by entry_name replacements - local Arabic_diacritics = U(0x64B, 0x64C, 0x64D, 0x64E, 0x64F, 0x650, 0x651, 0x652, 0x670) - - if text == waSla or mw.ustring.find(text, "^" .. taTwiil .. "?[" .. Arabic_diacritics .. "]" .. "$") then - return text - end - end - - if type(self._rawData.entry_name) == "table" then - for i, from in ipairs(self._rawData.entry_name.from) do - local to = self._rawData.entry_name.to[i] or "" - text = mw.ustring.gsub(text, from, to) - end - end - - --[=[ For instance, ᾰ (alpha-breve) + combining smooth breathing is converted - to alpha + combining smooth breathing by the entry_name replacements. - It must be re-combined to alpha-smooth breathing (ἀ) so that - allowSelfLink in [[Module:links]] will work properly. ]=] - if self:getCode() == "grc" then - text = mw.ustring.toNFC(text) - end - - return text -end - - --- Add to data tables? -local has_dotted_undotted_i = { - ["az"] = true, - ["crh"] = true, - ["gag"] = true, - ["kaa"] = true, - ["tt"] = true, - ["tr"] = true, - ["zza"] = true, -} - -function Language:makeSortKey(name, sc) - if has_dotted_undotted_i[self:getCode()] then - name = mw.ustring.gsub(name, "I", "ı") - end - - name = mw.ustring.lower(name) - - -- Remove initial hyphens and * - local hyphens_regex = "^[-־ـ*]+(.)" - name = mw.ustring.gsub(name, hyphens_regex, "%1") - - -- If there are language-specific rules to generate the key, use those - if type(self._rawData.sort_key) == "table" then - for i, from in ipairs(self._rawData.sort_key.from) do - local to = self._rawData.sort_key.to[i] or "" - name = mw.ustring.gsub(name, from, to) - end - elseif type(self._rawData.sort_key) == "string" then - name = require("Module:" .. self._rawData.sort_key).makeSortKey(name, self:getCode(), sc and sc:getCode()) - end - - -- Remove parentheses, as long as they are either preceded or followed by something - name = mw.ustring.gsub(name, "(.)[()]+", "%1") - name = mw.ustring.gsub(name, "[()]+(.)", "%1") - - if has_dotted_undotted_i[self:getCode()] then - name = mw.ustring.gsub(name, "i", "İ") - end - - return mw.ustring.upper(name) -end - -function Language:overrideManualTranslit() - if self._rawData.override_translit then - return true - else - return false - end -end - - -function Language:transliterate(text, sc, module_override) - if not ((module_override or self._rawData.translit_module) and text) then - return nil - end - - if module_override then - require("Module:debug").track("module_override") - end - - return require("Module:" .. (module_override or self._rawData.translit_module)).tr(text, self:getCode(), sc and sc:getCode() or nil) -end - -function Language:hasTranslit() - return self._rawData.translit_module and true or false -end - - -function Language:link_tr() - return self._rawData.link_tr and true or false -end - - -function Language:toJSON() - local entryNamePatterns = nil - - if self._rawData.entry_name then - entryNamePatterns = {} - - for i, from in ipairs(self._rawData.entry_name.from) do - local to = self._rawData.entry_name.to[i] or "" - table.insert(entryNamePatterns, { from = from, to = to }) - end - end - - local ret = { - ancestors = self._rawData.ancestors, - canonicalName = self:getCanonicalName(), - categoryName = self:getCategoryName(), - code = self._code, - entryNamePatterns = entryNamePatterns, - family = self._rawData[3] or self._rawData.family, - otherNames = self:getOtherNames(), - scripts = self._rawData.scripts, - type = self:getType(), - wikimediaLanguages = self._rawData.wikimedia_codes, - wikidataItem = self:getWikidataItem(), - } - - return require("Module:JSON").toJSON(ret) -end - - --- Do NOT use this method! --- All uses should be pre-approved on the talk page! -function Language:getRawData() - return self._rawData -end - -Language.__index = Language - - -function export.getDataModuleName(code) - if code:find("^[a-z][a-z]$") then - return "languages/data2" - elseif code:find("^[a-z][a-z][a-z]$") then - local prefix = code:sub(1, 1) - return "languages/data3/" .. prefix - elseif code:find("^[a-z-]+$") then - return "languages/datax" - else - return nil - end -end - - -local function getRawLanguageData(code) - local modulename = export.getDataModuleName(code) - return modulename and mw.loadData("Module:" .. modulename)[code] or nil -end - - -function export.makeObject(code, data) - if data and data.deprecated then - require("Module:debug").track { - "languages/deprecated", - "languages/deprecated/" .. code - } - end - - return data and setmetatable({ _rawData = data, _code = code }, Language) or nil -end - - -function export.getByCode(code) - if type(code) ~= "string" then - error("The function getByCode expects a string as its first argument, but received " .. (code == nil and "nil" or "a " .. type(code)) .. ".") - end - - return export.makeObject(code, getRawLanguageData(code)) -end - - -function export.getByName(name) - local byName = mw.loadData("Module:languages/by name") - local code = byName.all and byName.all[name] or byName[name] - - if not code then - return nil - end - - return export.makeObject(code, getRawLanguageData(code)) -end - - -function export.getByCanonicalName(name) - local byName = mw.loadData("Module:languages/canonical names") - local code = byName and byName[name] - - if not code then - return nil - end - - return export.makeObject(code, getRawLanguageData(code)) -end - - -function export.iterateAll() - mw.incrementExpensiveFunctionCount() - local m_data = mw.loadData("Module:languages/alldata") - local func, t, var = pairs(m_data) - - return function() - local code, data = func(t, var) - return export.makeObject(code, data) - end -end - -return export \ No newline at end of file diff --git a/wikt/translit/lbe-translit.lua b/wikt/translit/lbe-translit.lua deleted file mode 100644 index 6d9baf6..0000000 --- a/wikt/translit/lbe-translit.lua +++ /dev/null @@ -1,91 +0,0 @@ --- This module will transliterate Lak language text per WT:LBE TR. -local export = {} - -local tt = { - ["б"]="b", ["п"]="p", ["ф"]="f", ["в"]="w", ["м"]="m", - ["д"]="d", ["т"]="t", ["й"]="j", ["н"]="n", ["з"]="z", ["ц"]="c", - ["с"]="s", ["ж"]="ž", ["ш"]="š", ["щ"]="š̄", - ["л"]="l", ["ч"]="č", ["р"]="r", ["г"]="g", ["к"]="k", ["х"]="χ", - ["ъ"]="ʾ", ["а"]="a", ["е"]="ə̱", ["ы"]="y", ["и"]="i", ["о"]="o", ["у"]="u", - ["ё"]="ë", ["ь"]="’", ["э"]="ə̱", ["ю"]="ju", ["я"]="ja", - ["Б"]="B", ["П"]="P", ["Ф"]="F", ["В"]="W", ["М"]="M", - ["Д"]="D", ["Т"]="T", ["Й"]="J", ["Н"]="N", ["З"]="Z", ["Ц"]="C", - ["С"]="S", ["Ж"]="Ž", ["Ш"]="Š", ["Щ"]="Š̄", - ["Л"]="L", ["Ч"]="Č", ["Р"]="R", ["Г"]="G", ["К"]="K", ["Х"]="Χ", - ["Ъ"]="ʾ", ["А"]="A", ["Е"]="Ə̱", ["Ы"]="Y", ["И"]="I", ["О"]="O", ["У"]="U", - ["Ё"]="Ë", ["Ь"]="’", ["Э"]="Ə̱", ["Ю"]="Ju", ["Я"]="Ja"}; - -local tetragraphs = { - ['хьхь'] = 'x̄', - ['Хьхь'] = 'X̄', -} - -local digraphs = { - ['пп'] = 'p̄', - ['пӏ'] = 'ṗ', - ['тт'] = 't̄', - ['аь'] = 'a̱', - ['оь'] = 'o̱', - ['Пп'] = 'P̄', - ['Пӏ'] = 'Ṗ', - ['Тт'] = 'T̄', - ['Аь'] = 'A̱', - ['Оь'] = 'O̱', - ['цӏ'] = 'c̣', - ['цц'] = 'c̄', - ['тӏ'] = 'ṭ', - ['сс'] = 's̄', - ['чч'] = 'č̄', - ['чӏ'] = 'č̣', - ['кь'] = 'q̇', - ['кк'] = 'k̄', - ['кӏ'] = 'ḳ', - ['хь'] = 'x', - ['хъ'] = 'q', - ['къ'] = 'q̄', - ['гъ'] = 'ġ', - ['хх'] = 'χ̄', - ['гӏ'] = 'ʿ', - ['хӏ'] = 'ḥ', - ['гь'] = 'h', - ['Цӏ'] = 'C̣', - ['Цц'] = 'C̄', - ['Тӏ'] = 'Ṭ', - ['Сс'] = 'S̄', - ['Чч'] = 'Č̄', - ['Чӏ'] = 'Č̣', - ['Кь'] = 'Q̇', - ['Кк'] = 'K̄', - ['Кӏ'] = 'Ḳ', - ['Хь'] = 'X', - ['Хъ'] = 'Q', - ['Къ'] = 'Q̄', - ['Гъ'] = 'Ġ', - ['Хх'] = 'Χ̄', - ['Гӏ'] = 'ʿ', - ['Хӏ'] = 'Ḥ', - ['Гь'] = 'H', -} - -function export.tr(text, lang, sc) - local str_gsub = string.gsub - local UTF8_char = '[%z\1-\127\194-\244][\128-\191]*' - - -- Convert capital to lowercase palochka. Lowercase is found in tables - -- above. - text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) - - for tetragraph, replacement in pairs(tetragraphs) do - text = str_gsub(text, tetragraph, replacement) - end - - for digraph, replacement in pairs(digraphs) do - text = str_gsub(text, digraph, replacement) - end - - text = str_gsub(text, UTF8_char, tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/lep-translit.lua b/wikt/translit/lep-translit.lua deleted file mode 100644 index 9380c64..0000000 --- a/wikt/translit/lep-translit.lua +++ /dev/null @@ -1,154 +0,0 @@ --- This module will transliterate Lepcha language text. - -local export = {} - -local conv = { - [""] = "", - ["\225\176\128"] = "k", --U+1C00 - ["\225\176\129"] = "kl", --U+1C01 - ["\225\176\130"] = "kh", --U+1C02 - ["\225\176\131"] = "g", --U+1C03 - ["\225\176\132"] = "gl", --U+1C04 - ["\225\176\133"] = "ng", --U+1C05 - ["\225\176\134"] = "c", --U+1C06 - ["\225\176\135"] = "ch", --U+1C07 - ["\225\176\136"] = "j", --U+1C08 - ["\225\176\137"] = "ny", --U+1C09 - ["\225\176\138"] = "t", --U+1C0A - ["\225\176\139"] = "th", --U+1C0B - ["\225\176\140"] = "d", --U+1C0C - ["\225\176\141"] = "n", --U+1C0D - ["\225\176\142"] = "p", --U+1C0E - ["\225\176\143"] = "pl", --U+1C0F - ["\225\176\144"] = "ph", --U+1C10 - ["\225\176\145"] = "f", --U+1C11 - ["\225\176\146"] = "fl", --U+1C12 - ["\225\176\147"] = "b", --U+1C13 - ["\225\176\148"] = "bl", --U+1C14 - ["\225\176\149"] = "m", --U+1C15 - ["\225\176\150"] = "ml", --U+1C16 - ["\225\176\151"] = "ts", --U+1C17 - ["\225\176\152"] = "tsh", --U+1C18 - ["\225\176\153"] = "dz", --U+1C19 - ["\225\176\154"] = "y", --U+1C1A - ["\225\176\155"] = "r", --U+1C1B - ["\225\176\156"] = "l", --U+1C1C - ["\225\176\157"] = "h", --U+1C1D - ["\225\176\158"] = "hl", --U+1C1E - ["\225\176\159"] = "v", --U+1C1F - ["\225\176\160"] = "s", --U+1C20 - ["\225\176\161"] = "sh", --U+1C21 - ["\225\176\162"] = "w", --U+1C22 - ["\225\176\163"] = "", --U+1C23 - ["\225\176\164"] = "y", --U+1C24 - ["\225\176\165"] = "r", --U+1C25 - ["\225\176\166"] = "aa", --U+1C26 - ["\225\176\167"] = "i", --U+1C27 - ["\225\176\168"] = "o", --U+1C28 - ["\225\176\169"] = "oo", --U+1C29 - ["\225\176\170"] = "u", --U+1C2A - ["\225\176\171"] = "uu", --U+1C2B - ["\225\176\172"] = "e", --U+1C2C - ["\225\176\173"] = "k", --U+1C2D - ["\225\176\174"] = "m", --U+1C2E - ["\225\176\175"] = "l", --U+1C2F - ["\225\176\176"] = "n", --U+1C30 - ["\225\176\177"] = "p", --U+1C31 - ["\225\176\178"] = "r", --U+1C32 - ["\225\176\179"] = "t", --U+1C33 - ["\225\176\180"] = "ng", --U+1C34 - ["\225\176\181"] = "ng", --U+1C35 - ["\225\176\182"] = "^", --U+1C36 - ["\225\176\183"] = "", --U+1C37 - ["\225\176\184"] = "[ta-rol]", --U+1C38 - ["\225\176\185"] = "[nyet thyoom ta-rol]", --U+1C39 - ["\225\176\186"] = "[cer-wa]", --U+1C3A - ["\225\176\187"] = "[tshook cer-wa]", --U+1C3B - ["\225\176\188"] = "[tshook]", --U+1C3C - ["\225\176\189"] = "0", --U+1C3D - ["\225\176\190"] = "1", --U+1C3E - ["\225\176\191"] = "2", --U+1C3F - ["\225\177\128"] = "3", --U+1C40 - ["\225\177\129"] = "4", --U+1C41 - ["\225\177\130"] = "5", --U+1C42 - ["\225\177\131"] = "6", --U+1C43 - ["\225\177\132"] = "7", --U+1C44 - ["\225\177\133"] = "8", --U+1C45 - ["\225\177\134"] = "9", --U+1C46 - ["\225\177\141"] = "tt", --U+1C4D - ["\225\177\142"] = "tth", --U+1C4E - ["\225\177\143"] = "dd", --U+1C4F -} - -function export.tr(text, lang, sc) - local nukta = "\225\176\183" - - local initials = "([\225\176\128-\225\176\163\225\177\141-\225\177\143])" - local medials = "([\225\176\164-\225\176\165]?)" - local vowels_after = "([\225\176\166\225\176\170-\225\176\172])" - local vowels_before = "([\225\176\167-\225\176\169])" - local finals_after = "([\225\176\173-\225\176\179\225\176\182]?)" - local finals_before = "([\225\176\180\225\176\181]?)" - local others = "[\225\176\184-\225\177\134]" - - local im = initials..medials - local imf = initials..medials..finals_after - local fim = finals_before..initials..medials - - local imvf = initials..medials..vowels_after..finals_after - local fimv = finals_before..initials..medials..vowels_after - local vimf = vowels_before..initials..medials..finals_after - local fvim = finals_before..vowels_before..initials..medials - - text = mw.ustring.gsub(text, nukta, "") - - text = mw.ustring.gsub(text, imvf, function(i,m,v,f) - return conv[i] .. conv[m] .. conv[v] .. conv[f] - end) - text = mw.ustring.gsub(text, fimv, function(f,i,m,v) - return conv[i] .. conv[m] .. conv[v] .. conv[f] - end) - text = mw.ustring.gsub(text, vimf, function(v,i,m,f) - return conv[i] .. conv[m] .. conv[v] .. conv[f] - end) - text = mw.ustring.gsub(text, fvim, function(f,v,i,m) - return conv[i] .. conv[m] .. conv[v] .. conv[f] - end) - - text = mw.ustring.gsub(text, imf, function(i,m,f) - return conv[i] .. conv[m] .. "a" .. conv[f] - end) - text = mw.ustring.gsub(text, fim, function(f,i,m) - return conv[i] .. conv[m] .. "a" .. conv[f] - end) - - text = mw.ustring.gsub(text, im, function(i,m) - return conv[i] .. conv[m] .. "a" - end) - - text = mw.ustring.gsub(text, others, conv) - - return text -end - -return export - - --- All tests passed. (refresh) - --- test_translit_lepcha: --- Text Expected Actual Differs at --- Passed ᰈᰬᰲ jer jer --- -- Unit tests for [[Module:lep-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local lep_translit = require('Module:lep-translit') - --- function tests:do_test_translit(lepc, roman, xlit) --- self:equals('[[' .. mw.ustring.gsub(lepc, "%+", "") .. '#Lepcha|' .. mw.ustring.gsub(lepc, "%+", "") .. ']]', lep_translit.tr(lepc, 'lep', 'Lepc'), roman) --- end - --- function tests:test_translit_lepcha() --- self:do_test_translit('ᰈᰬᰲ', 'jer') --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/lez-translit.lua b/wikt/translit/lez-translit.lua deleted file mode 100644 index 17fe695..0000000 --- a/wikt/translit/lez-translit.lua +++ /dev/null @@ -1,113 +0,0 @@ --- This module will transliterate Lezgi language text per WT:LEZ TR. -local export = {} - -local tt = { - ["б"]="b", ["п"]="p", ["ф"]="f", ["в"]="v", ["м"]="m", - ["д"]="d", ["т"]="t", ["й"]="j", ["н"]="n", ["з"]="z", ["ц"]="c", - ["с"]="s", ["ж"]="ž", ["ш"]="š", ["щ"]="šč", - ["л"]="l", ["ч"]="č", ["р"]="r", ["г"]="g", ["к"]="k", ["х"]="χ", - ["ъ"]="ʾ", ["а"]="a", ["е"]="e", ["ы"]="y", ["и"]="i", ["о"]="o", ["у"]="u", - ["ё"]="ë", ["ь"]="’", ["э"]="è", ["ю"]="ju", ["я"]="ä", - ["Б"]="B", ["П"]="P", ["Ф"]="F", ["В"]="V", ["М"]="M", - ["Д"]="D", ["Т"]="T", ["Й"]="J", ["Н"]="N", ["З"]="Z", ["Ц"]="C", - ["С"]="S", ["Ж"]="Ž", ["Ш"]="Š", ["Щ"]="Šč", - ["Л"]="L", ["Ч"]="Č", ["Р"]="R", ["Г"]="G", ["К"]="K", ["Х"]="Χ", - ["Ъ"]="ʾ", ["А"]="A", ["Е"]="E", ["Ы"]="Y", ["И"]="I", ["О"]="O", ["У"]="U", - ["Ё"]="Ë", ["Ь"]="’", ["Э"]="È", ["Ю"]="Ju", ["Я"]="Ä"}; - -local trigraphs = { - ['хъв'] = 'q°', - ['Хъв'] = 'Q°', - ['къв'] = 'q̄°', - ['Къв'] = 'Q̄°', - ['кьв'] = 'q̇°', - ['Кьв'] = 'Q̇°', - ['гъв'] = 'ġ°', - ['Гъв'] = 'Ġ°', - ['ттв'] = 't̄°', - ['Ттв'] = 't̄°', - ['ццв'] = 'c̄°', - ['Ццв'] = 'C̄°', - ['ккв'] = 'k̄°', - ['Ккв'] = 'K̄°', - ['тӏв'] = 'ṭ°', - ['Тӏв'] = 'Ṭ°', - ['цӏв'] = 'c̣°', - ['Цӏв'] = 'C̣°', - ['кӏв'] = 'ḳ°', - ['Кӏв'] = 'Ḳ°', -} - -local digraphs = { - ['тв'] = 't°', - ['Тв'] = 'T°', - ['зв'] = 'z°', - ['Зв'] = 'Z°', - ['хв'] = 'χ°', - ['Хв'] = 'Χ°', - ['цв'] = 'c°', - ['Цв'] = 'C°', - ['св'] = 's°', - ['Св'] = 'S°', - ['пп'] = 'p̄', - ['пӏ'] = 'ṗ', - ['тт'] = 't̄', - ['Пп'] = 'P̄', - ['Пӏ'] = 'Ṗ', - ['Тт'] = 'T̄', - ['цӏ'] = 'c̣', - ['цц'] = 'c̄', - ['тӏ'] = 'ṭ', - ['чч'] = 'č̄', - ['чӏ'] = 'č̣', - ['кь'] = 'q̇', - ['кк'] = 'k̄', - ['кӏ'] = 'ḳ', - ['хъ'] = 'q', - ['къ'] = 'q̄', - ['гъ'] = 'ġ', - ['гь'] = 'h', - ['Цӏ'] = 'C̣', - ['Цц'] = 'C̄', - ['Тӏ'] = 'Ṭ', - ['Чч'] = 'Č̄', - ['Чӏ'] = 'Č̣', - ['Кь'] = 'Q̇', - ['Кк'] = 'K̄', - ['Кӏ'] = 'Ḳ', - ['Хъ'] = 'Q', - ['Къ'] = 'Q̄', - ['Гъ'] = 'Ġ', - ['Гь'] = 'H', - ['уь'] = 'ü', - ['Уь'] = 'Ü', - ['хь'] = 'x', - ['Хь'] = 'X', - ['гв'] = 'g°', - ['Гв'] = 'G°', - ['кв'] = 'k°', - ['Кв'] = 'K°', -} - -function export.tr(text, lang, sc) - local str_gsub = string.gsub - local UTF8_char = '[%z\1-\127\194-\244][\128-\191]*' - - -- Convert capital to lowercase palochka. Lowercase is found in tables - -- above. - text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) - - for digraph, replacement in pairs(digraphs) do - text = str_gsub(text, digraph, replacement) - end - - for trigraph, replacement in pairs(trigraphs) do - text = str_gsub(text, trigraph, replacement) - end - - text = str_gsub(text, UTF8_char, tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/lif-translit.lua b/wikt/translit/lif-translit.lua deleted file mode 100644 index 1179f1f..0000000 --- a/wikt/translit/lif-translit.lua +++ /dev/null @@ -1,67 +0,0 @@ --- This module will transliterate Limbu language text. - -local export = {} - -local consonants = { - ['ᤁ']='k', ['ᤂ']='kh', ['ᤃ']='g', ['ᤄ']='gh', ['ᤅ']='ṅ', - ['ᤆ']='c', ['ᤇ']='ch', ['ᤈ']='j', ['ᤉ']='jh', ['ᤊ']='ny', - ['ᤋ']='t', ['ᤌ']='th', ['ᤍ']='d', ['ᤎ']='dh', ['ᤏ']='n', - ['ᤐ']='p', ['ᤑ']='ph', ['ᤒ']='b', ['ᤓ']='bh', ['ᤔ']='m', - ['ᤕ']='y', ['ᤖ']='r', ['ᤗ']='l', ['ᤘ']='w', - ['ᤙ']='ś', ['ᤚ']='ṣ', ['ᤛ']='s', ['ᤜ']='h', - ['ᤝ']='gy', ['ᤞ']='tr', ['ᤀ']='', -} -local diacritics = { - ['ᤠ']='a' , ['ᤡ']='i' , ['ᤢ']='u' , ['ᤣ']='e' , ['ᤤ']='ai' , ['ᤥ']='o' , ['ᤦ']='au' , ['ᤧ']='ê' , ['ᤨ']='ô' -} - -local special = { - -- idk what to call these - ['᤺']='̄', --kemphreng (vowel lengthener) - ['᤹']='’', --mukphreng (glottalizer) - ['ᤲ'] = '̃', --anusvara (now obsolete) -} - -local subjoined = { - ['ᤪ']='r', ['ᤫ']='w', ['ᤩ']='y', -} - -local finals = { - ['ᤰ']='k', ['ᤱ']='ṅ', ['ᤳ']='t', ['ᤴ']='n', ['ᤵ']='p', ['ᤶ']='m', ['ᤷ']='r', ['ᤸ']='l', -} - -local nonconsonants = { - - -- digits - ['᥆'] = '0', ['᥇'] = '1', ['᥈'] = '2', ['᥉'] = '3', ['᥊'] = '4', - ['᥋'] = '5', ['᥌'] = '6', ['᥍'] = '7', ['᥎'] = '8', ['᥏'] = '9', - ['॥']='.', ['᥄']='!', ['᥅']='?', - ['᥀']='lo' -} - --- translit any words or phrases -function export.tr(text, lang, sc) - text = mw.ustring.gsub(text, '([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])᤻', '᤺%1⌫') -- treat underscore as kemphreng - text = mw.ustring.gsub( - text, - '([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])'.. - '([ᤪᤫᤩ]?)'.. - '([ᤠᤡᤢᤣᤤᤥᤦᤧ ᤨ]?)'.. - '([᤹᤺ᤲ]?)'.. - '([ᤰᤱᤳᤴᤵᤶᤷᤸ]?)', - function(c, d, e, f, g) - -- mw.log('match', c, d) - return (consonants[c] or c) .. - (subjoined[d] or d) .. - (diacritics[e] or (e ~= "") and e or 'ô') .. - (special[f] or f) .. - (finals[g] or g) - end) - - text = mw.ustring.gsub(text, '.', nonconsonants) - text = mw.ustring.gsub(text, '(.)⌫', '') - - return mw.ustring.toNFC(text) -end - -return export \ No newline at end of file diff --git a/wikt/translit/linb-translit.lua b/wikt/translit/linb-translit.lua deleted file mode 100644 index 5f03f4f..0000000 --- a/wikt/translit/linb-translit.lua +++ /dev/null @@ -1,158 +0,0 @@ -local export = {} - -local chars = { - ["𐀀"] = "a", - ["𐀁"] = "e", - ["𐀂"] = "i", - ["𐀃"] = "o", - ["𐀄"] = "u", - - ["𐀅"] = "da", - ["𐀆"] = "de", - ["𐀇"] = "di", - ["𐀈"] = "do", - ["𐀉"] = "du", - - ["𐀊"] = "ja", - ["𐀋"] = "je", - -- ji not in Unicode - ["𐀍"] = "jo", - ["𐀎"] = "ju", - - ["𐀏"] = "ka", - ["𐀐"] = "ke", - ["𐀑"] = "ki", - ["𐀒"] = "ko", - ["𐀓"] = "ku", - - ["𐀔"] = "ma", - ["𐀕"] = "me", - ["𐀖"] = "mi", - ["𐀗"] = "mo", - ["𐀘"] = "mu", - - ["𐀙"] = "na", - ["𐀚"] = "ne", - ["𐀛"] = "ni", - ["𐀜"] = "no", - ["𐀝"] = "nu", - - ["𐀞"] = "pa", - ["𐀟"] = "pe", - ["𐀠"] = "pi", - ["𐀡"] = "po", - ["𐀢"] = "pu", - - ["𐀣"] = "qa", - ["𐀤"] = "qe", - ["𐀥"] = "qi", - ["𐀦"] = "qo", - -- qu not in Unicode - - ["𐀨"] = "ra", - ["𐀩"] = "re", - ["𐀪"] = "ri", - ["𐀫"] = "ro", - ["𐀬"] = "ru", - - ["𐀭"] = "sa", - ["𐀮"] = "se", - ["𐀯"] = "si", - ["𐀰"] = "so", - ["𐀱"] = "su", - - ["𐀲"] = "ta", - ["𐀳"] = "te", - ["𐀴"] = "ti", - ["𐀵"] = "to", - ["𐀶"] = "tu", - - ["𐀷"] = "wa", - ["𐀸"] = "we", - ["𐀹"] = "wi", - ["𐀺"] = "wo", - -- wu not in Unicode - - ["𐀼"] = "za", - ["𐀽"] = "ze", - -- zi not in Unicode - ["𐀿"] = "zo", - -- zu not in Unicode - - ["𐁀"] = "ha", - ["𐁁"] = "ai", - ["𐁂"] = "au", - ["𐁃"] = "dwe", - ["𐁄"] = "dwo", - ["𐁅"] = "nwo", - ["𐁆"] = "phu", - ["𐁇"] = "pte", - ["𐁈"] = "rya", - ["𐁉"] = "rai", - ["𐁊"] = "ryo", - ["𐁋"] = "tya", - ["𐁌"] = "twe", - ["𐁍"] = "two", - - ["𐁐"] = "*18", - ["𐁑"] = "*19", - ["𐁒"] = "*22", - ["𐁓"] = "*34", - ["𐁔"] = "*47", - ["𐁕"] = "*49", - ["𐁖"] = "*56", - ["𐁗"] = "*63", - ["𐁘"] = "*64", - ["𐁙"] = "*79", - ["𐁚"] = "*82", - ["𐁛"] = "*83", - ["𐁜"] = "*86", - ["𐁝"] = "*89", - - -- explicit morpheme boundary - ["-"] = "`", -} - -function export.tr(text, lang, sc) - local ret = {} - local i = 1 - - for c in string.gmatch(text, "[%z\1-\127\194-\244][\128-\191]*") do -- UTF-8 character pattern - ret[i] = chars[c] or c - i = i + 1 - end - - text = string.gsub(table.concat(ret, "-"), "%- %-", " ") - text = string.gsub(text, "%-?`%-?", "-") - - return text -end - -return export - --- All tests passed. (refresh) - --- test: --- Text Expected Actual --- Passed 𐀷𐀙𐀏 wa-na-ka wa-na-ka --- Passed 𐀂𐀦 i-qo i-qo --- Passed 𐀣𐀯𐀩𐀄 qa-si-re-u qa-si-re-u --- Passed 𐀇𐀺𐀝𐀰 di-wo-nu-so di-wo-nu-so --- Passed 𐀀𐀳𐀖𐀵 a-te-mi-to a-te-mi-to --- Passed 𐀏𐀅𐀖𐀊 ka-da-mi-ja ka-da-mi-ja --- Passed 𐀀𐀂𐀴𐀍𐀦 a-i-ti-jo-qo a-i-ti-jo-qo --- Passed 𐀗𐀪𐀺𐀈 mo-ri-wo-do mo-ri-wo-do --- return require("Module:transliteration module testcases") ( --- require("Module:Linb-translit").tr, --- { --- { "𐀷𐀙𐀏", "wa-na-ka" }, --- { "𐀂𐀦", "i-qo" }, --- { "𐀣𐀯𐀩𐀄", "qa-si-re-u" }, --- { "𐀇𐀺𐀝𐀰", "di-wo-nu-so" }, --- { "𐀀𐀳𐀖𐀵", "a-te-mi-to" }, --- { "𐀏𐀅𐀖𐀊", "ka-da-mi-ja" }, --- { "𐀀𐀂𐀴𐀍𐀦", "a-i-ti-jo-qo" }, --- { "𐀗𐀪𐀺𐀈", "mo-ri-wo-do" }, --- }, --- "Linb", "gmy" --- ) \ No newline at end of file diff --git a/wikt/translit/lo-translit.lua b/wikt/translit/lo-translit.lua deleted file mode 100644 index 8b510b9..0000000 --- a/wikt/translit/lo-translit.lua +++ /dev/null @@ -1,397 +0,0 @@ --- This module will transliterate Lao language text per the LC (Library of Congress) scheme. - -local export = {} -local gsub = mw.ustring.gsub -local len = mw.ustring.len -local match = mw.ustring.match -local sub = mw.ustring.sub - -local initial_conv = { - ["ກ"] = "k", ["ຂ"] = "kh", ["ຄ"] = "kh", ["ງ"] = "ng", - ["ຈ"] = "ch", ["ສ"] = "s", ["ຊ"] = "s", ["ຍ"] = "ny", - ["ດ"] = "d", ["ຕ"] = "t", ["ຖ"] = "th", ["ທ"] = "th", ["ນ"] = "n", - ["ບ"] = "b", ["ປ"] = "p", ["ຜ"] = "ph", ["ຝ"] = "f", ["ພ"] = "ph", ["ຟ"] = "f", ["ມ"] = "m", - ["ຢ"] = "y", ["ຣ"] = "r", ["ລ"] = "l", ["ວ"] = "w", - ["ຫ"] = "h", ["ອ"] = "’", ["ຮ"] = "h", - - ["ຫງ"] = "ng", - ["ຫຍ"] = "ny", - ["ໜ"] = "n", ["ຫນ"] = "n", - ["ໝ"] = "m", ["ຫມ"] = "m", - ["ຫຼ"] = "r", ["ຫຣ"] = "r", - ["ຫຼ"] = "l", ["ຫລ"] = "l", - ["ຫວ"] = "w" -} - -local vowel_conv = { - ["ະ"] = "a", ["ັ"] = "a", - ["ິ"] = "i", - ["ຶ"] = "ư", ["ຸ"] = "u", - ["ເະ"] = "e", ["ເັ"] = "e", - ["ແະ"] = "æ", ["ແັ"] = "æ", - ["ໂະ"] = "o", ["ົ"] = "o", - ["ເາະ"] = "ǫ", ["ັອ"] = "ǫ", - ["ເິ"] = "œ", - ["ເັຍ"] = "ia", ["ັຽ"] = "ia", - ["ເຶອ"] = "ưa", - ["ົວະ"] = "ua", ["ັວ"] = "ua", ["ວັ"] = "ua", - ["ໄ"] = "ai", ["ໃ"] = "ai", ["ັຍ"] = "ai", - ["ເົາ"] = "ao", - ["ົາວ"] = "uau", - ["ຳ"] = "am", ["ໍາ"] = "am", - ["ວຳ"] = "uam", - - ["າ"] = "ā", - ["າວ"] = "āo", - ["ີ"] = "ī", - ["ື"] = "ư̄", - ["ູ"] = "ū", - ["ເ"] = "ē", - ["ແ"] = "ǣ", - ["ໂ"] = "ō", - ["ໂຍ"] = "ōi", ["ໂຽ"] = "ōi", - ["ໍ"] = "ǭ", ["ອ"] = "ǭ", - ["ອຍ"] = "ǭi", ["ອຽ"] = "ǭi", - ["ເີ"] = "œ̄", - ["ເີຽ"] = "œ̄i", ["ເີຍ"] = "œ̄i", - ["ເຍ"] = "īa", ["ເັຽ"] = "īa", ["ຽ"] = "īa", - ["ເືອ"] = "ư̄a", - ["ົວ"] = "ūa", ["ວ"] = "ūa", - ["ວຍ"] = "uāi", ["ວຽ"] = "uāi", ["ວຽນ"] = "uīan", - ["າຍ"] = "āi", ["າຽ"] = "āi", - ["ວາ"] = "uā", - ["ວາຍ"] = "uāi", ["ວາຽ"] = "uāi", - ["ແວ"] = "ǣu", -- ແ_ວ can bo both ǣu and uǣ. The first is more common. - ["ີວ"] = "īu", ["ິວ"] = "iu", - ["ຽວ"] = "iāu", - ["ວີວ"] = "uīu", -} - -local coda_conv = { - ["ກ"] = "k", ["ຂ"] = "k", ["ຄ"] = "k", - ["ງ"] = "ng", - ["ຈ"] = "t", ["ສ"] = "t", ["ຊ"] = "t", - ["ຍ"] = "ny", - ["ດ"] = "t", ["ຕ"] = "t", ["ຖ"] = "t", ["ທ"] = "t", - ["ນ"] = "n", - ["ບ"] = "p", ["ປ"] = "p", ["ຜ"] = "p", ["ຝ"] = "p", ["ພ"] = "p", ["ຟ"] = "p", - ["ມ"] = "m", - ["ຢ"] = "y", - ["ຣ"] = "n", ["ລ"] = "n", - ["ວ"] = "w", - [""] = "", -} - -local sp_symbols = { - ["ຯ"] = "〃", ["ໆ"] = "〃", - ["໌"] = "", - ["໐"] = "0", ["໑"] = "1", ["໒"] = "2", ["໓"] = "3", ["໔"] = "4", - ["໕"] = "5", ["໖"] = "6", ["໗"] = "7", ["໘"] = "8", ["໙"] = "9" -} - -local char_type = { - ['ກ'] = 'coda', ['ຂ'] = 'coda', ['ຄ'] = 'coda', ['ງ'] = 'coda', - ['ຈ'] = 'coda', ['ຊ'] = 'coda', ['ຍ'] = 'ambig', - ['ດ'] = 'coda', ['ຕ'] = 'coda', ['ຖ'] = 'coda', ['ທ'] = 'coda', ['ນ'] = 'coda', - ['ບ'] = 'coda', ['ປ'] = 'coda', ['ຜ'] = 'coda', ['ຝ'] = 'coda', ['ພ'] = 'coda', ['ຟ'] = 'coda', ['ມ'] = 'coda', - ['ຢ'] = 'coda', ['ຣ'] = 'coda', ['ລ'] = 'coda', ['ວ'] = 'ambig', - ['ສ'] = 'coda', ['ຫ'] = 'cons', ['ອ'] = 'ambig', ['ຮ'] = 'cons', - ['ຯ'] = 'iter_symbol', - ['ະ'] = 'vowel_let', ['ັ'] = 'suf_vowel', ['າ'] = 'vowel_let', ['ຳ'] = 'suf_vowel', - ['ິ'] = 'suf_vowel', ['ີ'] = 'suf_vowel', ['ຶ'] = 'suf_vowel', ['ື'] = 'suf_vowel', - ['ຸ'] = 'suf_vowel', ['ູ'] = 'suf_vowel', ['ົ'] = 'suf_vowel', - ['ຼ'] = 'cons', - ['ຽ'] = 'vowel_let', - ['ເ'] = 'pref_vowel', ['ແ'] = 'pref_vowel', - ['ໂ'] = 'pref_vowel', ['ໃ'] = 'pref_vowel', ['ໄ'] = 'pref_vowel', - ['ໆ'] = 'iter_symbol', - ['່'] = 'tone', ['້'] = 'tone', ['໊'] = 'tone', ['໋'] = 'tone', - ['໌'] = 'canc_symbol', ['ໍ'] = 'suf_vowel', - ['໐'] = 'number', ['໑'] = 'number', ['໒'] = 'number', ['໓'] = 'number', ['໔'] = 'number', - ['໕'] = 'number', ['໖'] = 'number', ['໗'] = 'number', ['໘'] = 'number', ['໙'] = 'number', - ['ໜ'] = 'cons', ['ໝ'] = 'cons' -} - -function export.tr(text, lang, sc, debug_mode) - text = gsub(text, '[່້໊໋​]', '') - - for lao_text in mw.ustring.gmatch(text, '[ກ-ໝ]+') do - local word, c, chartype, output = {}, {}, {}, {} - local curr_word, curr_initial, curr_vowel, curr_coda = {}, {}, {}, {} - local i = 1 - local original_text = lao_text - - for i = 1, len(lao_text) do - c[i] = sub(lao_text, i, i) - chartype[i] = char_type[c[i]] or table.insert(word, c[i]) - end - - for i = 1, #c + 1 do - if chartype[i] == 'pref_vowel' or i == #c + 1 then - if #curr_word ~= 0 then - table.insert(word, table.concat(curr_word)) - curr_word, curr_initial, curr_vowel, curr_coda = {c[i]}, {}, {c[i]}, {} - else - table.insert(curr_vowel, c[i]) - table.insert(curr_word, c[i]) - end - - elseif chartype[i] == 'suf_vowel' then - table.insert(curr_vowel, c[i]) - table.insert(curr_word, c[i]) - - elseif chartype[i] == 'ambig' then - if #curr_initial ~= 0 and vowel_conv[table.concat(curr_vowel)..c[i]] and (chartype[i+1] ~= 'suf_vowel' or match(c[i+1], '[ໍຳີັ]')) and #curr_coda == 0 then - table.insert(curr_vowel, c[i]) - table.insert(curr_word, c[i]) - elseif (#curr_initial == 0 and char_type[table.concat(curr_vowel)] == 'pref_vowel') or (#curr_initial ~= 0 and initial_conv[table.concat(curr_initial)..c[i]]) then - table.insert(curr_initial, c[i]) - table.insert(curr_word, c[i]) - else - if #curr_word ~= 0 then - table.insert(word, table.concat(curr_word)) - end - curr_word, curr_initial, curr_vowel, curr_coda = {c[i]}, {c[i]}, {}, {} - end - - elseif chartype[i] == 'vowel_let' then - table.insert(curr_vowel, c[i]) - table.insert(curr_word, c[i]) - - elseif chartype[i] == 'coda' and #curr_coda == 0 and #curr_initial ~= 0 and chartype[i+1] ~= 'suf_vowel' and chartype[i+1] ~= 'vowel_let' and not (chartype[i+1] == 'ambig' and match(chartype[i+2] or '', 'co')) and table.concat(curr_vowel) ~= "ວີວ" then - table.insert(curr_coda, c[i]) - table.insert(curr_word, c[i]) - - elseif chartype[i] == 'cons' or chartype[i] == 'coda' then - if #curr_coda == 0 and initial_conv[table.concat(curr_initial)..c[i]] and (#curr_vowel == 0 or char_type[table.concat(curr_vowel)] == 'pref_vowel') then - table.insert(curr_initial, c[i]) - table.insert(curr_word, c[i]) - else - table.insert(word, table.concat(curr_word)) - curr_word, curr_initial, curr_vowel, curr_coda = {c[i]}, {c[i]}, {}, {} - end - - elseif chartype[i] == 'iter_symbol' then - if #curr_word ~= 0 then - table.insert(word, table.concat(curr_word)) - end - curr_word, curr_initial, curr_vowel, curr_coda = {c[i]}, {}, {}, {} - - elseif chartype[i] == 'canc_symbol' then - table.insert(curr_word, c[i]) - - elseif chartype[i] == 'number' then - table.insert(curr_word, sp_symbols[c[i]]) - end - end - for i = 1, #word do - word[i] = gsub(word[i], '^([ເແໂໄໃຽ]?)(ຫ?[ກຂຄງຈສຊຍດຕຖທນບປຜຝພຟມຢຣລວຫອຮໜໝ]ຼ?)([^໌]*)(໌?)$', function(a, b, c, e) - if match(sub(c, -1, -1), '[ກຂຄງຈສຊຍດຕຖທນບປຜຝພຟມຢຣລວ]') then - d = sub(c, -1, -1) - c = sub(c, 1, -2) - else - d = '' - end - if a..c == '' then - c = 'ະ' - end - vowel = vowel_conv[a..c..d] or (vowel_conv[a..c] or a .. c) .. (coda_conv[d] or d) - if match(vowel, '[ກ-ໝ]') then - vowel = gsub(vowel, '^(.*)([ຍອວ])(.*)$', function(x, y, z) - return (vowel_conv[x] or x) .. ' ' .. (initial_conv[y] or y) .. (vowel_conv[z] or z) end) - end - return (initial_conv[b] or b) .. vowel .. e end) - - if char_type[word[i]] == 'iter_symbol' and i >= 2 then - word[i] = '' .. word[i-1] .. '' - end - - if match(word[i], '໌') and len(word[i]) > 1 then - word[i] = gsub(word[i], '(.)໌', '%1') - end - - table.insert(output, word[i]) - end - lao_text = table.concat(output, " ") - lao_text = gsub(lao_text, '.', sp_symbols) - - text = gsub(text, original_text, lao_text) - end - - if match(text, '[ກ-ໝ]') and not debug_mode then - return nil - else - return text - end -end - -return export - - --- Text Expected Actual Differs at --- Failed ຫວຍ hūai wany 1 --- Failed ອາຫານວ່າງ ’ā hān wāng ’ā hານ wā nga 5 --- Failed ກິໂລກຼາມ ki lō krām ki lōk ຼາມ 6 --- Failed ອຸບັດເຫດ ’u bat hēt ’u bat ຫດē 8 --- Failed ຫອຍນ້ຳຈືດ hǭi nam chư̄t ຫອany nam chư̄t 1 --- Failed ຫອຍ hǭi ຫອany 1 --- Failed ຫອຽ hǭi ຫອīa 1 --- Failed ກະໂຫລກ ka lōk ka lō ka 6 --- Passed ມາເຊດວນ mā sē dūan mā sē dūan --- Failed ຄຸຍ khui khu nya 4 --- Passed ແລ້ວ lǣu lǣu --- Passed ຮ້ອຽແກ້ວ hǭi kǣu hǭi kǣu --- Failed ເຂົ້າຈີ່ຝຣັ່ງ khao chī fa rang khao chīp rang 9 --- Failed ຫວຍທວາຍ hūai thuāi wຍທ wā nya 1 --- Failed ການປະຕິວັດ kān pa ti wat kān pa ti wັt 12 --- Passed ດ້າຽ dāi dāi --- Failed ກາບຫອຍທະເລ kāp hǭi tha lē kāp ຫອany tha lē 5 --- Failed ຄາວຽກ khā wīak khā wຽk 6 --- Failed ເຂົ້າຊອຍ khao sǭi khaot ’a nya 5 --- Passed ຂາຽ khāi khāi --- Failed ຫ້ອງ hǭng ຫອang 1 --- Failed ຫອຍນາງລົມ hǭi nāng lom ຫອany nāng lom 1 --- Passed ຄວາຍ khuāi khuāi --- Failed ອຸບັດເຫດ ’u bat hēt ’u bat ຫດē 8 --- Failed ເປືອຍ pư̄ai pư̄a nya 5 --- Passed ແຈ່ວບອງ chǣu bǭng chǣu bǭng --- Failed ແຫ້ງ hǣng ngǣ 1 --- Failed ສົງກຣານ song krān song ka rān 7 --- Passed ອິ່ມແລ້ວ ’im lǣu ’im lǣu --- Failed ນິວັດ ni wat ni wັt 5 --- Passed ແນວໃດ nǣu dai nǣu dai --- Passed ໝໍປົວແຂ້ວ mǭ pūa khǣu mǭ pūa khǣu --- Failed ແຫນງ nǣng nǣ nga 3 --- Passed ແກ້ວ kǣu kǣu --- Failed ຕຸ້ຍ tui tu nya 3 --- Failed ຫນ່ອຍ nǭi na ’a nya 2 --- Failed ຫ້ອງເຢັນ hǭng yen ຫອang yen 1 --- Passed ອານຸສາວະລີ ’ā nu sā wa lī ’ā nu sā wa lī --- Passed ກ້ວຍ kuāi kuāi --- Passed ເວັບເບຣົາວ໌ເຊີຣ໌ wep bē ruau sœ̄n wep bē ruau sœ̄n --- Failed ແຂວງ khuǣng khǣung 3 --- Passed ຍ່ອຽ nyǭi nyǭi --- Passed ກີ່ວ kīu kīu --- Passed ກົວລາລໍາເປີ kūa lā lam pœ̄ kūa lā lam pœ̄ --- Passed ສະຫະລັດອະເມລິກາ sa ha lat ’a mē li kā sa ha lat ’a mē li kā --- Passed ປະເທດອິນເດຍ pa thēt ’in dīa pa thēt ’in dīa --- Passed ສຍາມ sa nyām sa nyām --- Passed ການເມືອງ kān mư̄ang kān mư̄ang --- Passed ຊົນນະບົດ son na bot son na bot --- Passed ທິດຕາເວັນອອກ thit tā wen ’ǭk thit tā wen ’ǭk --- Passed ທະນາຄານ tha nā khān tha nā khān --- Passed ຄວາມໝາຍ khuām māi khuām māi --- Passed ພະໂພທິສັດ pha phō thi sat pha phō thi sat --- Passed ທາງຫຼວງ thāng lūang thāng lūang --- Passed ມຶງ mưng mưng --- Passed ເຂົາ khao khao --- Passed ເຂືອ khư̄a khư̄a --- Passed ຂ້ອຍ khǭi khǭi --- Passed ກວຽນ kuīan kuīan --- Passed ກະດານ ka dān ka dān --- Passed ຈະເຣີນອາຫານ cha rœ̄n ’ā hān cha rœ̄n ’ā hān --- Passed ເຈົ້າ chao chao --- Passed ໃນ​ເວ​ລາ​ທີ່ nai wē lā thī nai wē lā thī --- Passed ໂຮງຫັດຖະກັມ hōng hat tha kam hōng hat tha kam --- Passed ການ​ຕໍ່​ສູ້​ກັບ kān tǭ sū kap kān tǭ sū kap --- Passed ພາສາໄຕ phā sā tai phā sā tai --- Passed ພາສາລາວສືບທອດມາຈາກ phā sā lāo sư̄p thǭt mā chāk phā sā lāo sư̄p thǭt mā chāk --- Passed ຢູ່ພາກໃຕ້ຂອງຈີນເຊິ່ງເປັນຈຸດດັ່ງເດີມຂອງຫຼາຍພາສາຕ່າງໆ yū phāk tai khǭng chīn sœng pen chut dang dœ̄m khǭng lāi phā sā tāng tāng yū phāk tai khǭng chīn sœng pen chut dang dœ̄m khǭng lāi phā sā tāng tāng --- Passed ທີ່ຍັງໃຊ້ແລະເວົ້າຢູ່ໂດຍຫຼາຍຊົນເຜົ່າ thī nyang sai læ wao yū dōi lāi son phao thī nyang sai læ wao yū dōi lāi son phao --- Failed ເນື່ອງຈາກຖືກຄວາມກົດດັນຈາກການຂະຫຍາຍຕົວຂອງອານາຈັກຈີນ nư̄ang chāk thư̄k khuām kot dan chāk kān kha gnāi tūa khǭng ’ā nā chak chīn nư̄ang chāk thư̄k khuām kot dan chāk kān kha nyāi tūa khǭng ’ā nā chak chīn 46 --- Passed ວຽງຈັນ wīang chan wīang chan --- Passed ວຽງຈັນທນ໌ wīang chan than wīang chan than --- Passed ລັດເຊັຽ lat sīa lat sīa --- Passed ໌ --- Passed ຯ 〃 〃 --- local tests = require('Module:UnitTests') --- local lo_translit = require('Module:lo-translit') - --- function tests:check_tr(Laoo, Latn) --- return self:equals(('[[%s#Lao|%s]]'):format(Laoo, Laoo), lo_translit.tr(Laoo, 'lo', 'Laoo', true), Latn) --- end - --- function tests:test_translit() --- local examples = { --- { "ຫວຍ", "hūai" }, --- { "ອາຫານວ່າງ", "’ā hān wāng" }, --- { "ກິໂລກຼາມ", "ki lō krām" }, --- { "ອຸບັດເຫດ", "’u bat hēt" }, --- { "ຫອຍນ້ຳຈືດ", "hǭi nam chư̄t" }, --- { "ຫອຍ", "hǭi" }, --- { "ຫອຽ", "hǭi" }, --- { "ກະໂຫລກ", "ka lōk" }, --- { "ມາເຊດວນ", "mā sē dūan" }, --- { "ຄຸຍ", "khui" }, --- { "ແລ້ວ", "lǣu" }, --- { "ຮ້ອຽແກ້ວ", "hǭi kǣu" }, --- { "ເຂົ້າຈີ່ຝຣັ່ງ", "khao chī fa rang" }, --- { "ຫວຍທວາຍ", "hūai thuāi" }, --- { "ການປະຕິວັດ", "kān pa ti wat" }, --- { "ດ້າຽ", "dāi" }, --- { "ກາບຫອຍທະເລ", "kāp hǭi tha lē" }, --- { "ຄາວຽກ", "khā wīak" }, --- { "ເຂົ້າຊອຍ", "khao sǭi" }, --- { "ຂາຽ", "khāi" }, --- { "ຫ້ອງ", "hǭng" }, --- { "ຫອຍນາງລົມ", "hǭi nāng lom" }, --- { "ຄວາຍ", "khuāi" }, --- { "ອຸບັດເຫດ", "’u bat hēt" }, --- { "ເປືອຍ", "pư̄ai" }, --- { "ແຈ່ວບອງ", "chǣu bǭng" }, --- { "ແຫ້ງ", "hǣng" }, --- { "ສົງກຣານ", "song krān" }, --- { "ອິ່ມແລ້ວ", "’im lǣu" }, --- -- { "ຄືນຫລັໍງ", "khư̄n lang" }, --- { "ນິວັດ", "ni wat" }, --- { "ແນວໃດ", "nǣu dai" }, --- { "ໝໍປົວແຂ້ວ", "mǭ pūa khǣu" }, --- { "ແຫນງ", "nǣng" }, --- { "ແກ້ວ", "kǣu" }, --- { "ຕຸ້ຍ", "tui" }, --- { "ຫນ່ອຍ", "nǭi" }, --- { "ຫ້ອງເຢັນ", "hǭng yen" }, --- { "ອານຸສາວະລີ", "’ā nu sā wa lī" }, --- { "ກ້ວຍ", "kuāi" }, --- { "ເວັບເບຣົາວ໌ເຊີຣ໌", "wep bē ruau sœ̄n" }, --- { "ແຂວງ", "khuǣng" }, --- { "ຍ່ອຽ", "nyǭi" }, --- { "ກີ່ວ", "kīu" }, --- { "ກົວລາລໍາເປີ", "kūa lā lam pœ̄" }, --- { "ສະຫະລັດອະເມລິກາ", "sa ha lat ’a mē li kā" }, --- { "ປະເທດອິນເດຍ", "pa thēt ’in dīa" }, --- { "ສຍາມ", "sa nyām" }, --- { "ການເມືອງ", "kān mư̄ang" }, --- { "ຊົນນະບົດ", "son na bot" }, --- { "ທິດຕາເວັນອອກ", "thit tā wen ’ǭk" }, --- { "ທະນາຄານ", "tha nā khān" }, --- { "ຄວາມໝາຍ", "khuām māi" }, --- { "ພະໂພທິສັດ", "pha phō thi sat" }, --- { "ທາງຫຼວງ", "thāng lūang" }, --- { "ມຶງ", "mưng" }, --- { "ເຂົາ", "khao" }, --- { "ເຂືອ", "khư̄a" }, --- { "ຂ້ອຍ", "khǭi" }, --- { "ກວຽນ", "kuīan" }, --- { "ກະດານ", "ka dān" }, --- { "ຈະເຣີນອາຫານ", "cha rœ̄n ’ā hān" }, --- { "ເຈົ້າ", "chao" }, --- { "ໃນ​ເວ​ລາ​ທີ່", "nai wē lā thī" }, --- { "ໂຮງຫັດຖະກັມ", "hōng hat tha kam" }, --- { "ການ​ຕໍ່​ສູ້​ກັບ", "kān tǭ sū kap" }, --- { "ພາສາໄຕ", "phā sā tai" }, --- { "ພາສາລາວສືບທອດມາຈາກ", "phā sā lāo sư̄p thǭt mā chāk" }, --- { "ຢູ່ພາກໃຕ້ຂອງຈີນເຊິ່ງເປັນຈຸດດັ່ງເດີມຂອງຫຼາຍພາສາຕ່າງໆ", "yū phāk tai khǭng chīn sœng pen chut dang dœ̄m khǭng lāi phā sā tāng tāng" }, --- { "ທີ່ຍັງໃຊ້ແລະເວົ້າຢູ່ໂດຍຫຼາຍຊົນເຜົ່າ", "thī nyang sai læ wao yū dōi lāi son phao" }, --- { "ເນື່ອງຈາກຖືກຄວາມກົດດັນຈາກການຂະຫຍາຍຕົວຂອງອານາຈັກຈີນ", "nư̄ang chāk thư̄k khuām kot dan chāk kān kha gnāi tūa khǭng ’ā nā chak chīn" }, --- { "ວຽງຈັນ", "wīang chan" }, --- { "ວຽງຈັນທນ໌", "wīang chan than" }, --- { "ລັດເຊັຽ", "lat sīa" }, --- { "໌", "" }, --- { "ຯ", "〃" }, --- } - --- return self:iterate(examples, "check_tr") --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/lyci-translit.lua b/wikt/translit/lyci-translit.lua deleted file mode 100644 index ab50b89..0000000 --- a/wikt/translit/lyci-translit.lua +++ /dev/null @@ -1,39 +0,0 @@ -local export = {} - -local chars = { - ["𐊀"] = "a", - ["𐊂"] = "b", - ["𐊄"] = "g", - ["𐊅"] = "d", - ["𐊆"] = "i", - ["𐊇"] = "w", - ["𐊈"] = "z", - ["𐊛"] = "h", - ["𐊉"] = "θ", - ["𐊊"] = "j", - ["𐊋"] = "k", - ["𐊍"] = "l", - ["𐊎"] = "m", - ["𐊏"] = "n", - ["𐊒"] = "u", - ["𐊓"] = "p", - ["𐊔"] = "k", - ["𐊕"] = "r", - ["𐊖"] = "s", - ["𐊗"] = "t", - ["𐊁"] = "e", - ["𐊙"] = "ã", - ["𐊚"] = "ẽ", - ["𐊐"] = "m̃", - ["𐊑"] = "ñ", - ["𐊘"] = "τ", - ["𐊌"] = "q", - ["𐊃"] = "β", - ["𐊜"] = "χ", -} - -function export.tr(text, lang, sc) - return (mw.ustring.gsub(text, '.', chars)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/lydi-translit.lua b/wikt/translit/lydi-translit.lua deleted file mode 100644 index e6994ec..0000000 --- a/wikt/translit/lydi-translit.lua +++ /dev/null @@ -1,36 +0,0 @@ -local export = {} - -local chars = { - ["𐤠"] = "a", - ["𐤵"] = "ã", - ["𐤡"] = "b", - ["𐤹"] = "c", - ["𐤣"] = "d", - ["𐤤"] = "e", - ["𐤶"] = "ẽ", - ["𐤱"] = "f", - ["𐤢"] = "g", - ["𐤦"] = "i", - ["𐤧"] = "y", - ["𐤨"] = "k", - ["𐤩"] = "l", - ["𐤷"] = "λ", - ["𐤪"] = "m", - ["𐤫"] = "n", - ["𐤸"] = "v", - ["𐤬"] = "o", - ["𐤲"] = "q", - ["𐤭"] = "r", - ["𐤳"] = "s", - ["𐤮"] = "ś", - ["𐤯"] = "t", - ["𐤴"] = "τ", - ["𐤰"] = "u", - ["𐤥"] = "v", -} - -function export.tr(text, lang, sc) - return (mw.ustring.gsub(text, '.', chars)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/mani-translit.lua b/wikt/translit/mani-translit.lua deleted file mode 100644 index c0054a2..0000000 --- a/wikt/translit/mani-translit.lua +++ /dev/null @@ -1,65 +0,0 @@ --- This module will transliterate text in the Manichaean script. It is used to transliterate Middle Persian (pal), --- Sogdian (sog) and Parthian (xpr). - -local export = {} - -local tt = { - ["𐫀"] = "ʾ", -- aleph - ["𐫁"] = "b", -- beth - ["𐫂"] = "β", -- bheth - ["𐫃"] = "g", -- gimel - ["𐫄"] = "ɣ", -- ghimel - ["𐫅"] = "d", -- daleth - ["𐫆"] = "ẖ", -- he - ["𐫇"] = "w", -- waw - ["𐫈"] = "ẉ̇", -- ud (conjunction) - ["𐫉"] = "z", -- zayin - ["𐫊"] = "ž", -- zhayin - ["𐫋"] = "j", -- jayin - ["𐫌"] = "ǰ", -- jhayin - ["𐫍"] = "h", -- heth - ["𐫎"] = "ṯ", -- teth - ["𐫏"] = "y", -- yodh - ["𐫐"] = "k", -- kaph - ["𐫑"] = "k̇", -- xaph - ["𐫒"] = "k̈", -- khaph - ["𐫓"] = "l", -- lamedh - ["𐫔"] = "δ", -- dhamedh - ["𐫕"] = "θ", -- thamedh - ["𐫖"] = "m", -- mem - ["𐫗"] = "n", -- nun - ["𐫘"] = "s", -- samekh - ["𐫙"] = "ʿ", -- ayin - ["𐫚"] = "ʿ̈ ", -- aayin - ["𐫛"] = "p", -- pe - ["𐫜"] = "f", -- fe - ["𐫝"] = "c", -- sadhe - ["𐫞"] = "q", -- qoph - ["𐫟"] = "x", -- xoph - ["𐫠"] = "q̈", -- qhoph - ["𐫡"] = "r", -- resh - ["𐫢"] = "š", -- shin - ["𐫣"] = "ś", -- sshin - ["𐫤"] = "t", -- taw - ["◌𐫥"] = "̃", -- abbreviation mark above - ["◌𐫦"] = "̃", -- abbreviation mark below - ["𐫫"] = "1", -- one - ["𐫬"] = "5", -- five - ["𐫭"] = "10", -- ten - ["𐫮"] = "20", -- twenty - ["𐫯"] = "100", -- one hundred -} - -function export.tr(text, lang, sc) - -- If the script is not Mani, do not transliterate - if sc ~= "Mani" then - return - end - - -- Transliterate characters - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/mdf-translit.lua b/wikt/translit/mdf-translit.lua deleted file mode 100644 index a188c0f..0000000 --- a/wikt/translit/mdf-translit.lua +++ /dev/null @@ -1,28 +0,0 @@ --- This module will transliterate Moksha language text per WT:MDF TR. - -local export = {} - -local tab = { - ["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="E", ["Ё"]="Jo", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["Й"]="J", - ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["О"]="O", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="U", ["Ф"]="F", - ["Х"]="X", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="ʺ", ["Ы"]="Y", ["Ь"]="ʹ", ["Э"]="E", ["Ю"]="Ju", ["Я"]="Ja", - ['а']='a', ['б']='b', ['в']='v', ['г']='g', ['д']='d', ['е']='e', ['ё']='jo', ['ж']='ž', ['з']='z', ['и']='i', ['й']='j', - ['к']='k', ['л']='l', ['м']='m', ['н']='n', ['о']='o', ['п']='p', ['р']='r', ['с']='s', ['т']='t', ['у']='u', ['ф']='f', - ['х']='x', ['ц']='c', ['ч']='č', ['ш']='š', ['щ']='šč', ['ъ']='ʺ', ['ы']='y', ['ь']='ʹ', ['э']='e', ['ю']='ju', ['я']='ja', -} - -function export.tr(text, lang, sc) - -- Ё needs converting if is decomposed - text = text:gsub("ё","ё"):gsub("Ё","Ё") - - -- е after a vowel or at the beginning of a word becomes je - text = mw.ustring.gsub(text, "([АОУЫЕЯЁЮИЕЪЬаоуыэяёюиеъь%A][́̀]?)е","%1je") - text = mw.ustring.gsub(text, "^Е","Je") - text = mw.ustring.gsub(text, "^е","je") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е","%1Je") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е","%1je") - - return (mw.ustring.gsub(text,'.',tab)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/mk-translit.lua b/wikt/translit/mk-translit.lua deleted file mode 100644 index a766dcd..0000000 --- a/wikt/translit/mk-translit.lua +++ /dev/null @@ -1,42 +0,0 @@ --- This module will transliterate Macedonian language text per WT:MK TR. -local export = {} - -local tt = { - ["А"]='A', ["а"]='a', - ["Б"]='B', ["б"]='b', - ["В"]='V', ["в"]='v', - ["Г"]='G', ["г"]='g', - ["Ѓ"]='Ǵ', ["ѓ"]='ǵ', - ["Д"]='D', ["д"]='d', - ["Е"]='E', ["е"]='e', ["Ѐ"]='È', ["ѐ"]='è', - ["Ж"]='Ž', ["ж"]='ž', - ["З"]='Z', ["з"]='z', - ["Ѕ"]='Dz', ["ѕ"]='dz', - ["И"]='I', ["и"]='i', ["Ѝ"]='Ì', ["ѝ"]='ì', - ["Ј"]='J', ["ј"]='j', - ["К"]='K', ["к"]='k', - ["Л"]='L', ["л"]='l', - ["Љ"]='Lj', ["љ"]='lj', - ["М"]='M', ["м"]='m', - ["Н"]='N', ["н"]='n', - ["Њ"]='Nj', ["њ"]='nj', - ["О"]='O', ["о"]='o', - ["П"]='P', ["п"]='p', - ["Р"]='R', ["р"]='r', - ["С"]='S', ["с"]='s', - ["Т"]='T', ["т"]='t', - ["Ќ"]='Ḱ', ["ќ"]='ḱ', - ["У"]='U', ["у"]='u', - ["Ф"]='F', ["ф"]='f', - ["Х"]='H', ["х"]='h', - ["Ц"]='C', ["ц"]='c', - ["Ч"]='Č', ["ч"]='č', - ["Џ"]='Dž', ["џ"]='dž', - ["Ш"]='Š', ["ш"]='š', -}; - -function export.tr(text, lang, sc) - return (mw.ustring.gsub(text, '.', tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/ml-translit.lua b/wikt/translit/ml-translit.lua deleted file mode 100644 index d52bd89..0000000 --- a/wikt/translit/ml-translit.lua +++ /dev/null @@ -1,114 +0,0 @@ --- This module will transliterate Malayalam language text. - -local export = {} - -local consonants = { - ['ക']='k', ['ഖ']='kh', ['ഗ']='g', ['ഘ']='gh', ['ങ']='ṅ', - ['ച']='c', ['ഛ']='ch', ['ജ']='j', ['ഝ']='jh', ['ഞ']='ñ', - ['ട']='ṭ', ['ഠ']='ṭh', ['ഡ']='ḍ', ['ഢ']='ḍh', ['ണ']='ṇ', - ['ത']='t', ['ഥ']='th', ['ദ']='d', ['ധ']='dh', ['ന']='n', - ['പ']='p', ['ഫ']='ph', ['ബ']='b', ['ഭ']='bh', ['മ']='m', - ['യ']='y', ['ര']='r', ['ല']='l', ['വ']='v', - ['ശ']='ś', ['ഷ']='ṣ', ['സ']='s', ['ഹ']='h', - ['ള']='ḷ', ['ഴ']='ḻ', ['റ']='ṟ' , ['ഩ']='ṉ' , ['ഺ']='ṯ' , -} - -local diacritics = { - ['\224\181\129\224\181\141'] = 'ŭ', - ['\224\180\190'] = 'ā' , - ['\224\180\191'] = 'i' , - ['\224\181\128'] = 'ī' , - ['\224\181\129'] = 'u' , - ['\224\181\130'] = 'ū' , - ['\224\181\131'] = 'r̥' , - ['\224\181\132'] = 'r̥̄' , - ['\224\181\134'] = 'e' , - ['\224\181\135'] = 'ē' , - ['\224\181\136'] = 'ai', - ['\224\181\138'] = 'o' , - ['\224\181\139'] = 'ō' , - ['\224\181\151'] = 'au', - ['\224\181\162'] = 'l̥ ', - ['\224\181\163'] = 'l̥̄' , - --halant, supresses the inherent vowel "a" - ['\224\181\141'] = '', - -- no diacritic - [''] = 'a' -} - -local nonconsonants = { - -- vowels - ['അ']='a' , ['ആ']='ā' , ['ഇ']='i' , ['ഈ']='ī' , ['ഉ']='u' , ['ഊ']='ū' , - ['ഋ']='r̥' , ['ൠ']='r̥̄' , ['ഌ']='l̥' , ['ൡ']='l̥̄', ['എ']='e' , ['ഏ']='ē' , - ['ഐ']='ai' , ['ഒ']='o' , ['ഓ']='ō' , ['ഔ']='au' , - -- other symbols - ['ം']='ṃ', -- anusvara - ['ഃ']='ḥ' , -- visarga - ['ഽ']='’', -- praślēṣam - -- chillus, consonants that never take vowels - ['ൺ']='ṇ' , ['ൻ']='n' , ['ർ']='r' , ['ൽ']='l' , ['ൾ']='ḷ' , ['ൿ']='k' , - -- digits - ['൦'] = '0', ['൧'] = '1', ['൨'] = '2', ['൩'] = '3', ['൪'] = '4', - ['൫'] = '5', ['൬'] = '6', ['൭'] = '7', ['൮'] = '8', ['൯']= '9', - ['൰']='10', ['൱']='100', ['൲']='1000', ['൳']='¼', ['൴']='½', ['൵']='¾', -} - --- translit any words or phrases -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - '([കഖഗഘങചഛജഝഞടഠഡഢണതഥദധനപഫബഭമയരലവശഷസഹളഴറഩഺ])'.. - '(\224\181\129?[\224\180\190\224\180\191\224\181\128\224\181\129\224\181\130\224\181\131\224\181\132\224\181\162\224\181\163\224\181\134\224\181\135\224\181\138\224\181\139\224\181\136\224\181\151\224\181\141]?)', - function(c, d) - return consonants[c] .. (diacritics[d] or d) - end) - - text = mw.ustring.gsub(text, '.', nonconsonants) - - -- anusvara - text = mw.ustring.gsub(text, 'ṃ([kgṅ])', 'ṅ%1') - text = mw.ustring.gsub(text, 'ṃ([cjñ])', 'ñ%1') - text = mw.ustring.gsub(text, 'ṃ([ṭḍṇ])', 'ṇ%1') - text = mw.ustring.gsub(text, 'ṃ([tdn])', 'n%1') - text = mw.ustring.gsub(text, 'ṃ([pbm])', 'm%1') - - return text -end - -return export - - --- 1 test failed. (refresh) - --- test_translit_malayalam: --- Text Expected Actual Differs at --- Failed ഡിസംബര് ḍisaṃbar ḍisambar 5 --- Passed രാജാവു് rājāvŭ rājāvŭ --- Passed ഹിന്ദുമതം hindumataṃ hindumataṃ --- Passed അവൻ avan avan --- Passed ലളിതാഽപി laḷitā’pi laḷitā’pi --- Passed അനുസ്വാരഃ anusvāraḥ anusvāraḥ --- Passed ആത്മാവു് ātmāvŭ ātmāvŭ --- Passed വിജ്ഞാനകോശം vijñānakōśaṃ vijñānakōśaṃ --- Passed അസ്സലാമു അലൈക്കും assalāmu alaikkuṃ assalāmu alaikkuṃ --- -- Unit tests for [[Module:ml-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local ml_translit = require('Module:ml-translit') - --- function tests:check_translit(Mlym, roman) --- self:equals('[[' .. Mlym .. '#Malayalam|' .. Mlym .. ']]', ml_translit.tr(Mlym, 'ml', 'Mlym'), roman) --- end - --- function tests:test_translit_malayalam() --- self:check_translit('ഡിസംബര്', 'ḍisaṃbar') --- self:check_translit('രാജാവു്', 'rājāvŭ') --- self:check_translit('ഹിന്ദുമതം', 'hindumataṃ') --- self:check_translit('അവൻ', 'avan') --- self:check_translit('ലളിതാഽപി', 'laḷitā’pi') --- self:check_translit('അനുസ്വാരഃ', 'anusvāraḥ') --- self:check_translit('ആത്മാവു്', 'ātmāvŭ') --- self:check_translit('വിജ്ഞാനകോശം', 'vijñānakōśaṃ') --- self:check_translit('അസ്സലാമു അലൈക്കും', 'assalāmu alaikkuṃ') --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/mnc-translit.lua b/wikt/translit/mnc-translit.lua deleted file mode 100644 index 86c8579..0000000 --- a/wikt/translit/mnc-translit.lua +++ /dev/null @@ -1,162 +0,0 @@ --- This module will transliterate Manchu language text. - -local export = {} - -local conv = { - ["ᠠ"] = "a", - ["ᡝ"] = "e", - ["ᡳ"] = "i", - ["ᡟ"] = "y", - ["ᠣ"] = "o", - ["ᡠ"] = "u", - ["ᡡ"] = "ū", - ["ᠨ"] = "n", - ["ᠩ"] = "ng", - ["ᡴ"] = "k", - ["ᡤ"] = "g", - ["ᡥ"] = "h", - ["ᠪ"] = "b", - ["ᡦ"] = "p", - ["ᠰ"] = "s", - ["ᡧ"] = "š", - ["ᡨ"] = "t", - ["ᡩ"] = "d", - ["ᠯ"] = "l", - ["ᠮ"] = "m", - ["ᠴ"] = "c", - ["ᠵ"] = "j", - ["ᠶ"] = "y", - ["ᡵ"] = "r", - ["ᡶ"] = "f", - ["ᠸ"] = "w", - ["ᠺ"] = "k'", - ["ᡬ"] = "g'", - ["ᡭ"] = "h'", - ["ᡮ"] = "ts'", - ["ᡮᡟ"] = "ts", - ["ᡯ"] = "dz", - ["ᡰ"] = "ž", - ["ᠰᡟ"] = "sy", - ["ᡱ"] = "c'", - ["ᡱᡳ"] = "c'y", - ["ᡷ"] = "j", - ["ᡷᡳ"] = "jy", - ["᠎"] = "-", - ["᠈"] = ",", - ["᠉"] = ".", -} - -function export.tr(text, lang, sc) - if sc ~= "Mong" then - return nil - end - - text = mw.ustring.gsub(text, "([ᠠᡝᡳᡟᠣᡠᡡᠨᠩᡴᡤᡥᠪᡦᠰᡧᡨᡩᠯᠮᠴᠵᠶᡵᡶᠸᠺᡬᡭᡮᡯᡰᡱᡷ᠈᠉])([ᡟᡳ]?)", function(a, b) - return conv[a..b] or conv[a] .. conv[b] end) - return text -end - -return export - --- All tests passed. (refresh) - --- test_translit_manchu: --- Text Expected Actual Differs at --- Passed ᠠᠨᡳᠶᠠ aniya aniya --- Passed ᠠᡤᠠ aga aga --- Passed ᠣᡵᡳᠨ orin orin --- Passed ᠨᠠᡩᠠᠨ nadan nadan --- Passed ᠨᠠᡩᠠᠨᠵᡠ nadanju nadanju --- Passed ᠨᡳᠨᠵᡠ ninju ninju --- Passed ᠨᡳᠩᡤᡠᠨ ninggun ninggun --- Passed ᠨᡳᡴᠠᠨ nikan nikan --- Passed ᠪᡳ bi bi --- Passed ᠪᡳᡵᠠ bira bira --- Passed ᠮᠠᠨᠵᡠ manju manju --- Passed ᠮᠠᠨᠵᡠ ᡤᡳᠰᡠᠨ manju gisun manju gisun --- Passed ᠮᠣᡵᡳᠨ morin morin --- Passed ᠮᡠᠰᡝ muse muse --- Passed ᠮᡠᡴᡝ muke muke --- Passed ᠮᡳᠩᡤᠠᠨ minggan minggan --- Passed ᠰᠣᠯᠣᠨᡨᡠ solontu solontu --- Passed ᠰᡠᠨᠵᠠ sunja sunja --- Passed ᠰᡠᠰᠠᡳ susai susai --- Passed ᠰᡠᠸᡝ suwe suwe --- Passed ᠰᡳ si si --- Passed ᠴᡝ ce ce --- Passed ᠵᠠᡴᡡᠨ jakūn jakūn --- Passed ᠵᠠᡴᡡᠨᠵᡠ jakūnju jakūnju --- Passed ᠵᡠᠸᠠᠨ juwan juwan --- Passed ᠵᡠᠸᡝ juwe juwe --- Passed ᡝᠮᡠ emu emu --- Passed ᡠᠰᡳᡥᠠ usiha usiha --- Passed ᡠᠶᡠᠨ uyun uyun --- Passed ᡠᠶᡠᠨᠵᡠ uyunju uyunju --- Passed ᡤᡡᠰᡳᠨ gūsin gūsin --- Passed ᡤᡳᠰᡠᠨ gisun gisun --- Passed ᡥᠠᡥᠠ haha haha --- Passed ᡦᠣᡠ pou pou --- Passed ᡧᠣᠯᠣᠨᡨᡠ šolontu šolontu --- Passed ᡧᡠᠨ šun šun --- Passed ᡨᠠᠩᡤᡡ tanggū tanggū --- Passed ᡨᠠᠴᡳᡴᡡ tacikū tacikū --- Passed ᡩᡝᡥᡳ dehi dehi --- Passed ᡩᡠᡳᠨ duin duin --- Passed ᡳ i i --- Passed ᡳᠯᠠᠨ ilan ilan --- Passed ᡳᠯᡥᠠ ilha ilha --- -- Unit tests for [[Module:mnc-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local mnc_translit = require('Module:mnc-translit') - --- function tests:do_test_translit(mong, roman, xlit) --- self:equals('[[' .. mong .. '#Manchu|' .. mong .. ']]', mnc_translit.tr(mong, 'mnc', 'Mong'), roman) --- end - --- function tests:test_translit_manchu() --- self:do_test_translit('ᠠᠨᡳᠶᠠ', 'aniya') --- self:do_test_translit('ᠠᡤᠠ', 'aga') --- self:do_test_translit('ᠣᡵᡳᠨ', 'orin') --- self:do_test_translit('ᠨᠠᡩᠠᠨ', 'nadan') --- self:do_test_translit('ᠨᠠᡩᠠᠨᠵᡠ', 'nadanju') --- self:do_test_translit('ᠨᡳᠨᠵᡠ', 'ninju') --- self:do_test_translit('ᠨᡳᠩᡤᡠᠨ', 'ninggun') --- self:do_test_translit('ᠨᡳᡴᠠᠨ', 'nikan') --- self:do_test_translit('ᠪᡳ', 'bi') --- self:do_test_translit('ᠪᡳᡵᠠ', 'bira') --- self:do_test_translit('ᠮᠠᠨᠵᡠ', 'manju') --- self:do_test_translit('ᠮᠠᠨᠵᡠ ᡤᡳᠰᡠᠨ', 'manju gisun') --- self:do_test_translit('ᠮᠣᡵᡳᠨ', 'morin') --- self:do_test_translit('ᠮᡠᠰᡝ', 'muse') --- self:do_test_translit('ᠮᡠᡴᡝ', 'muke') --- self:do_test_translit('ᠮᡳᠩᡤᠠᠨ', 'minggan') --- self:do_test_translit('ᠰᠣᠯᠣᠨᡨᡠ', 'solontu') --- self:do_test_translit('ᠰᡠᠨᠵᠠ', 'sunja') --- self:do_test_translit('ᠰᡠᠰᠠᡳ', 'susai') --- self:do_test_translit('ᠰᡠᠸᡝ', 'suwe') --- self:do_test_translit('ᠰᡳ', 'si') --- self:do_test_translit('ᠴᡝ', 'ce') --- self:do_test_translit('ᠵᠠᡴᡡᠨ', 'jakūn') --- self:do_test_translit('ᠵᠠᡴᡡᠨᠵᡠ', 'jakūnju') --- self:do_test_translit('ᠵᡠᠸᠠᠨ', 'juwan') --- self:do_test_translit('ᠵᡠᠸᡝ', 'juwe') --- self:do_test_translit('ᡝᠮᡠ', 'emu') --- self:do_test_translit('ᡠᠰᡳᡥᠠ', 'usiha') --- self:do_test_translit('ᡠᠶᡠᠨ', 'uyun') --- self:do_test_translit('ᡠᠶᡠᠨᠵᡠ', 'uyunju') --- self:do_test_translit('ᡤᡡᠰᡳᠨ', 'gūsin') --- self:do_test_translit('ᡤᡳᠰᡠᠨ', 'gisun') --- self:do_test_translit('ᡥᠠᡥᠠ', 'haha') --- self:do_test_translit('ᡦᠣᡠ', 'pou') --- self:do_test_translit('ᡧᠣᠯᠣᠨᡨᡠ', 'šolontu') --- self:do_test_translit('ᡧᡠᠨ', 'šun') --- self:do_test_translit('ᡨᠠᠩᡤᡡ', 'tanggū') --- self:do_test_translit('ᡨᠠᠴᡳᡴᡡ', 'tacikū') --- self:do_test_translit('ᡩᡝᡥᡳ', 'dehi') --- self:do_test_translit('ᡩᡠᡳᠨ', 'duin') --- self:do_test_translit('ᡳ', 'i') --- self:do_test_translit('ᡳᠯᠠᠨ', 'ilan') --- self:do_test_translit('ᡳᠯᡥᠠ', 'ilha') --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/mns-translit.lua b/wikt/translit/mns-translit.lua deleted file mode 100644 index 48db907..0000000 --- a/wikt/translit/mns-translit.lua +++ /dev/null @@ -1,27 +0,0 @@ --- This module will transliterate Mansi language text. --- Language code: mns - -local export = {} - -local tt = { - ["а"]="a", ["а̄"]="ā", ["б"]="b", ["в"]="v", ["г"]="g", ["д"]="d", ["е"]="e", - ["е̄"]="ē", ["ё"]="ë", ["ё̄"]="ë̄", ["ж"]="ž", ["з"]="z", ["и"]="i", ["ӣ"]="ī", - ["й"]="j", ["к"]="k", ["л"]="l", ["м"]="m", ["н"]="n", ["ӈ"]="ň", ["о"]="o", - ["о̄"]="ō", ["п"]="p", ["р"]="r", ["с"]="s", ["т"]="t", ["у"]="u", ["ӯ"]="ū", - ["ф"]="f", ["х"]="h", ["ц"]="c", ["ч"]="č", ["ш"]="š", ["щ"]="ŝ", ["ъ"]="ʺ", - ["ы"]="y", ["ы̄"]="ȳ", ["ь"]="ʹ", ["э"]="è", ["э̄"]="è̄", ["ю"]="û", ["ю̄"]="û̄", - ["я"]="â", ["я̄"]="â̄", - ["А"]="A", ["А̄"]="Ā", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="E", - ["Е̄"]="Ē", ["Ё"]="Ë", ["Ё̄"]="Ë̄", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["Ӣ"]="Ī", - ["Й"]="J", ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["Ӈ"]="Ň", ["О"]="O", - ["О̄"]="Ō", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="U", ["Ӯ"]="Ū", - ["Ф"]="F", ["Х"]="H", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Ŝ", ["Ъ"]="ʺ", - ["Ы"]="Y", ["Ы̄"]="Ȳ", ["Ь"]="ʹ", ["Э"]="È", ["Э̄"]="È̄", ["Ю"]="Û", ["Ю̄"]="Û̄", - ["Я"]="Â", ["Я̄"]="Â̄", -}; - -function export.tr(text) - return (mw.ustring.gsub(text, '.', tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/mon-translit.lua b/wikt/translit/mon-translit.lua deleted file mode 100644 index faade1d..0000000 --- a/wikt/translit/mon-translit.lua +++ /dev/null @@ -1,71 +0,0 @@ --- This module will transliterate Mongolian language text per WT:MN TR. --- It is also used to transliterate Classical Mongolian (cmg) and Middle Mongolian (xng). - -local export = {} - -local trfunc = {} - -local tt_Cyrl = { - ["Щ"] = "Šč", ["щ"] = "šč", ["Ы"] = "Y", ["ы"] = "y", ["Э"] = "E", ["э"] = "e", - ["Я"] = "Ya", ["я"] = "ya", ["У"] = "U", ["у"] = "u", ["С"] = "S", ["с"] = "s", - ["Ч"] = "Č", ["ч"] = "č", ["Х"] = "H", ["х"] = "h", ["Ж"] = "J", ["ж"] = "j", - ["Д"] = "D", ["д"] = "d", ["В"] = "V", ["в"] = "v", ["А"] = "A", ["а"] = "a", - ["М"] = "M", ["м"] = "m", ["О"] = "O", ["о"] = "o", ["И"] = "I", ["и"] = "i", - ["К"] = "K", ["к"] = "k", ["Ш"] = "Š", ["ш"] = "š", ["Ъ"] = "ʺ", ["ъ"] = "ʺ", - ["Ё"] = "Yo", ["ё"] = "yo", ["Ь"] = "ʹ", ["ь"] = "ʹ", ["Ю"] = "Yu", ["ю"] = "yu", - ["Т"] = "T", ["т"] = "t", ["Р"] = "R", ["р"] = "r", ["Ц"] = "Ts", ["ц"] = "ts", - ["Ф"] = "F", ["ф"] = "f", ["З"] = "Z", ["з"] = "z", ["Е"] = "E", ["е"] = "e", - ["Г"] = "G", ["г"] = "g", ["Б"] = "B", ["б"] = "b", ["Н"] = "N", ["н"] = "n", - ["П"] = "P", ["п"] = "p", ["Й"] = "I", ["й"] = "i", ["Л"] = "L", ["л"] = "l", - ["Ү"] = "Ü", ["ү"] = "ü", ["Ө"] = "Ö", ["ө"] = "ö" -} - -local tt_Mong = { - ["ᠠ"] = "a", ["ᠡ"] = "e", ["ᠢ"] = "i", ["ᠣ"] = "o", ["ᠤ"] = "u", - ["ᠥ"] = "ö", ["ᠦ"] = "ü", ["ᠧ"] = "ē", - ["ᠨ"] = "n", ["ᠩ"] = "ng", ["ᠪ"] = "b", ["ᠫ"] = "p", - ["ᠬ"] = "q", ["ᢉ"] = "k", ["ᠭ"] = "ɣ", ["ᢉ"] = "g", - ["ᠮ"] = "m", ["ᠯ"] = "l", ["ᠰ"] = "s", ["ᠱ"] = "š", - ["ᠲ"] = "t", ["ᠳ"] = "d", ["ᠴ"] = "č", ["ᠵ"] = "ǰ", - ["ᠶ"] = "y", ["ᠷ"] = "r", ["ᠸ"] = "w", ["ᠹ"] = "f", - ["ᠺ"] = "g", ["ᠻ"] = "k", ["ᠼ"] = "c", ["ᠽ"] = "z", - ["ᠾ"] = "h", ["ᠿ"] = "ž", ["ᡀ"] = "lh", ["ᡁ"] = "zh", ["ᡂ"] = "ch", - ["᠎"] = "-", ["︖"] = "?", ["︕"] = "!", ["᠂"] = ",", ["᠃"] = "." -} - -function trfunc.Cyrl(text) - text = mw.ustring.gsub(text, "([АОУЫЕЯЁЮИЕЪЬаоуыэяёюиеъь%A][\204\129\204\128]?)([Ее])", - function (a, e) - return a .. (e == "е" and "je" or "Je") - end):gsub("^Е", "Je"):gsub("^е", "je") - return (mw.ustring.gsub(text, ".", tt_Cyrl)) -end - -function trfunc.Mong(text) - local velar_conv = { ["q"] = "k", ["ɣ"] = "g" } - text = mw.ustring.gsub(text, ".", tt_Mong) - text = mw.ustring.gsub(text, "([qɣ])(.?)", function(velar, vowel) - return ((mw.ustring.match(vowel, "[eöü ]") or vowel == "") - and mw.ustring.gsub(velar, "[qɣ]", velar_conv) or velar) .. vowel end) - return text -end - -function export.tr(text, lang, sc) - -- if not sc then - -- sc = require("Module:scripts").findBestScript(text, require("Module:languages").getByCode("mn")) - -- if sc then - -- sc = sc:getCode() - -- else - -- return nil - -- end - -- end - - local func = trfunc[sc] - if func then - return trfunc[sc](text) - else - return nil - end -end - -return export \ No newline at end of file diff --git a/wikt/translit/mr-translit.lua b/wikt/translit/mr-translit.lua deleted file mode 100644 index 4af01c6..0000000 --- a/wikt/translit/mr-translit.lua +++ /dev/null @@ -1,307 +0,0 @@ --- Transliteration for Marathi (in progress) --- This module will transliterate Marathi language text per WT:MR TR. --- Language code: mar - -local export = {} -local gsub = mw.ustring.gsub -local find = mw.ustring.find - -local conv = { - -- consonants - ['क'] = 'k', ['ख'] = 'kh', ['ग'] = 'g', ['घ'] = 'gh', ['ङ'] = 'ṅ', - ['च'] = 'c', ['छ'] = 'ch', ['ज'] = 'j', ['झ'] = 'jh', ['ञ'] = 'ñ', - ['ट'] = 'ṭ', ['ठ'] = 'ṭh', ['ड'] = 'ḍ', ['ढ'] = 'ḍh', ['ण'] = 'ṇ', - ['त'] = 't', ['थ'] = 'th', ['द'] = 'd', ['ध'] = 'dh', ['न'] = 'n', - ['प'] = 'p', ['फ'] = 'ph', ['ब'] = 'b', ['भ'] = 'bh', ['म'] = 'm', - ['य'] = 'y', ['र'] = 'r', ['ल'] = 'l', ['व'] = 'v', ['ळ'] = 'ḷ', - ['श'] = 'ś', ['ष'] = 'ṣ', ['स'] = 's', ['ह'] = 'h', - ['ऱ'] = 'r', - -- ['ज्ञ'] = 'dny', - - -- vowel diacritics - ['ि'] = 'i', ['ु'] = 'u', ['े'] = 'e', ['ो'] = 'o', - ['ा'] = 'ā', ['ी'] = 'ī', ['ू'] = 'ū', - ['ृ'] = 'ru', - ['ै'] = 'ai', ['ौ'] = 'au', - ['ॉ'] = 'ŏ', - ['ॅ'] = 'ĕ', - - -- vowel signs - ['अ'] = 'a', ['इ'] = 'i', ['उ'] = 'u', ['ए'] = 'e', ['ओ'] = 'o', - ['आ'] = 'ā', ['ई'] = 'ī', ['ऊ'] = 'ū', - ['ऋ'] = 'ŕ', - ['ऐ'] = 'ai', ['औ'] = 'au', - ['ऑ'] = 'ŏ', - ['ॲ'] = 'ĕ', ['ऍ'] = 'ĕ', - - ['ॐ'] = 'om', - - -- chandrabindu - ['ँ'] = '̃', - - -- anusvara - ['ं'] = 'ṁ', - - -- visarga - ['ः'] = 'ḥ', - - -- virama - ['्'] = '', - - -- numerals - ['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4', - ['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9', - - -- punctuation - ['।'] = '.', -- danda - ['॥'] = '.', -- double danda - ['+'] = '', -- compound separator - - -- abbreviation sign - ['॰'] = '.', -} - -local nasal_assim = { - ['क'] = 'ङ', ['ख'] = 'ङ', ['ग'] = 'ङ', ['घ'] = 'ङ', - ['च'] = 'ञ', ['छ'] = 'ञ', ['ज'] = 'ञ', ['झ'] = 'ञ', - ['ट'] = 'ण', ['ठ'] = 'ण', ['ड'] = 'ण', ['ढ'] = 'ण', - ['प'] = 'म', ['फ'] = 'म', ['ब'] = 'म', ['भ'] = 'म', ['म'] = 'म', - ['य'] = 'इ', ['र'] = 'उ', ['ल'] = 'उ', ['व'] = 'उ', -} - -local perm_cl = { - ['म्ल'] = true, ['व्ल'] = true, ['न्ल'] = true, - -} - -local all_cons, special_cons = 'कखगघङचछजझञटठडढतथदधपफबभशषसयरलवहणनमळ', 'यरलवहनम' -local vowel, vowel_sign = 'aिुृेोाीूैौॉॅ', 'अइउएओआईऊऋऐऔऑऍ' -local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])a(़?[' .. gsub(all_cons, "य", "") .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])' - -local function reverse(t) - local new_t = {} - local new_t_i = 1 - for i = #t, 1, -1 do - new_t[new_t_i] = t[i] - new_t_i = new_t_i + 1 - end - return new_t -end - -local function rev_string(text) - local char_array, i = {}, 1 - for char in string.gmatch(text, "[%z\1-\127\194-\244][\128-\191]*") do -- UTF-8 character pattern - char_array[i] = char - i = i + 1 - end - -- local table1=require("table") - -- print(char_array) - return table.concat(reverse(char_array)) -end - -function export.tr(text, lang, sc) - text = gsub(text, 'ाँ', 'ॉ' .. 'ं') - text = gsub(text, 'ँ', 'ॅ' .. 'ं') - text = gsub(text, '([^' .. vowel .. vowel_sign .. '])ं ', '%1अ ') - text = gsub(text, '([^' .. vowel .. vowel_sign .. '])ं$', '%1अ') - text = gsub(text, '([' .. all_cons .. ']़?)([' .. vowel .. '्]?)', function(c, d) - return c .. (d == "" and 'a' or d) end) - for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do - local orig_word = word - word = rev_string(word) - word = gsub(word, '^a(़?)([' .. all_cons .. '])(.)(.?)', function(opt, first, second, third) - return (((find(first, '[' .. special_cons .. ']') and find(second, '्') and not perm_cl[first..second..third]) - or find(first .. second, 'य[ीेै]')) - and 'a' or "") .. opt .. first .. second .. third end) - while find(word, syncope_pattern) do - word = gsub(word, syncope_pattern, '%1%2%3%4') - end - word = gsub(word, '(.?)ं(.)', function(succ, prev) - return succ .. (succ..prev == "a" and "्म" or - (succ == "" and find(prev, '[' .. vowel .. ']') and "̃" or nasal_assim[succ] or "n")) .. prev end) - text = gsub(text, orig_word, rev_string(word)) - end - text = gsub(text, '.़?', conv) - text = gsub(text, 'a([iu])̃', 'a͠%1') - text = gsub(text, 'aa', 'a') - text = gsub(text, 'ñjñ', 'ndny') - text = gsub(text, 'jñ', 'dny') - return mw.ustring.toNFC(text) -end - -return export - - --- 19 tests failed. (refresh) - --- test_translit_marathi: --- Text Expected Actual Differs at Comments --- Passed भारत bhārat bhārat --- Passed मराठी marāṭhī marāṭhī --- Passed गंगा gaṅgā gaṅgā anusvara before ग' is its homorganic nasal ('ṅ') --- Passed लंड laṇḍ laṇḍ --- Passed कंबल kambal kambal --- Failed रक्त rakta rakt 5 --- Passed काव्य kāvya kāvya --- Failed मंद manda mand 5 --- Failed उंच unċa uñc 2 ċ --- Passed कृपा krupā krupā ‘ऋ’ is ‘ru’, कृपा has a Wiktionary entry --- Passed ज्ञान dnyān dnyān --- Passed ऱ्हास rhās rhās --- Failed दऱ्या darya daryā 5 --- Failed दर्या darya daryā 5 --- Passed कैरी kairī kairī --- Passed हौस haus haus --- Passed संरक्षण saurakṣaṇ saurakṣaṇ --- Passed संशय sanśay sanśay --- Passed दंष्ट्र danṣṭra danṣṭra --- Passed हंस hans hans --- Passed संयोग saiyog saiyog --- Passed संलग्न saulagna saulagna --- Passed संवाद sauvād sauvād --- Failed सिंह siṅha sinh 3 --- Failed संहार saṅhār sanhār 3 --- Passed संज्ञा sandnyā sandnyā --- Passed माझं mājha mājha --- Passed बॅट bĕṭ bĕṭ --- Passed बँक bĕṅk bĕṅk --- Passed ॲप ĕp ĕp --- Passed ऍप ĕp ĕp --- Passed कॉट kŏṭ kŏṭ --- Passed हाँग काँग hŏṅg kŏṅg hŏṅg kŏṅg --- Passed ऑस्ट्रेलिया ŏsṭreliyā ŏsṭreliyā --- Failed च्या cā cyā 2 --- Failed तुझ्या tujhā tujhyā 5 --- Failed चार ċār cār 1 --- Passed चार cār cār --- Failed काचा kāċā kācā 3 --- Passed काचा kācā kācā --- Failed चराचर ċarāċar carācar 1 --- Passed चराचर carācar carācar --- Failed जप j̈ap jap 2 --- Passed जप jap jap --- Failed मोजणे moj̈ṇe mojṇe 4 --- Failed लाज lāj̈ lāj 4 --- Failed झकझक j̈hakj̈hak jhakjhak 2 --- Passed झकझक jhakjhak jhakjhak --- Failed झापड j̈hāpaḍ jhāpaḍ 2 --- Passed झापड jhāpaḍ jhāpaḍ --- Failed झीज jhīj̈ jhīj 5 --- Failed चीज cīj̈ cīj 4 --- -- Unit tests for [[Module:mr-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local mr_translit = require('Module:mr-translit') - --- --TO DO --- function tests:do_test_translit(deva, roman, comment) --- self:equals( --- '[[' .. deva .. '#Marathi|' .. deva .. ']]', --- mr_translit.tr(deva, 'mr', 'Deva'), --- roman, --- { comment = comment }) --- end - --- function tests:test_translit_marathi() --- local examples = { --- --Fundamental tests --- { 'भारत', 'bhārat' }, --- { 'मराठी', 'marāṭhī' }, - --- --Homorganic nasal assimilation --- { 'गंगा', 'gaṅgā', "anusvara before ग' is its homorganic nasal ('ṅ')" }, --- { 'लंड', 'laṇḍ' }, --anusvara before 'ड' is its homorganic nasal ('ṇ') --- { 'कंबल', 'kambal' }, --anusvara before 'ब' is its homorganic nasal ('m') - --- --The following two categories could be problematic since they could interfere with cases without schwa deletion --- --such as English borrowings (ऑगस्ट) --- --Word-final consonant clusters with no schwa deletion --- { 'रक्त', 'rakta' }, --- { 'काव्य', 'kāvya' }, --य-final is a subcase - --- --Word-final nasal assimilation consonant clusters with no schwa deletion (where nasal is न) --- { 'मंद', 'manda' }, --Sanskrit borrowing --- { 'उंच', 'unċa', 'ċ'}, - --- --Tests individual letters --- { 'कृपा', 'krupā', [=[‘ऋ’ is ‘ru’, [[कृपा]] has a Wiktionary entry]=] }, --- { 'ज्ञान', 'dnyān' }, --‘ज्ञ’ is ‘dnya’ (even word initially such as Dnyaneshwar) - --- --Eyelash र --- { 'ऱ्हास', 'rhās' }, --- { 'दऱ्या', 'darya' }, --‘दऱ्या’ is the plural of ‘दरी’ --- { 'दर्या', 'darya' }, - --- --Ensures औ and ऐ remain diphthongs --- { 'कैरी', 'kairī' }, --‘ऐ’ is a diphthong --- { 'हौस', 'haus' }, --‘औ’ is a diphthong - --- --Anusvāra before र, श, ष, स: Provincial Class --- { 'संरक्षण', 'saurakṣaṇ' }, --Anusvāra transliterated before र is ‘u’ --- { 'संशय', 'sanśay' }, --- { 'दंष्ट्र', 'danṣṭra' }, --Word-final ‘a’ --- { 'हंस', 'hans' }, - --- --Anusvāra before य, ल, व: Classical Class --- { 'संयोग', 'saiyog' }, --Anusvāra transliterated before य is ‘i’ --- { 'संलग्न', 'saulagna' }, --Anusvāra transliterated before ल is ‘u’ + Word-final ‘a’ --- { 'संवाद', 'sauvād' }, --Anusvāra is transliterated before व is ‘u’ - --- --Anusvāra: Other --- { 'सिंह', 'siṅha' }, --Anusvāra before ‘ह’ and after ‘इ’ + Word-final ‘a’ --- { 'संहार', 'saṅhār' }, --Anusvāra before ‘ह’ --- { 'संज्ञा', 'sandnyā' }, --Anusvāra before ‘ज्ञ’ --- { 'माझं', 'mājha' }, --Anusvara to stop schwa dropping - --- --English borrowings --- { 'बॅट', 'bĕṭ' }, --‘ ॅ‘ is IPA /æ/ (बॅट = bat) --- { 'बँक', 'bĕṅk' }, -- ‘ँ’ = ‘ॅ’ + ‘ं’, chandrabindu is never used as in Hindi --- --Rurally ‘ॅ’ is ‘yā’, so बँक becomes ‘byāṅk’ --- { 'ॲप', 'ĕp' }, --Independent form of ‘ ॅ‘ (ॲप = app) --- { 'ऍप', 'ĕp' }, --Another independent form of ‘ ॅ‘ (ऍप = app) --- { 'कॉट', 'kŏṭ' }, --‘ॉ‘ is IPA /ɔ/ (कॉट = cot), RP ‘ɒ’ and ‘ɔ’ are both represented with ‘ॉ‘ --- --Rurally ‘ॉ‘ is ‘ā’, so डॉकटर becomes ‘ḍākṭar’ --- { 'हाँग काँग', 'hŏṅg kŏṅg' }, --‘ॉ’ = ‘ॉ’ + ‘ं (हाँग काँग = Hong Kong) --- { 'ऑस्ट्रेलिया', 'ŏsṭreliyā' }, --Independent form of ‘ ॉ‘ (ऑस्ट्रेलिया = Australia) - --- --The remaining tests concern: c and ċ, j and j̈, jh and j̈h --- --य denotes palatalisation and has no independent realisation (Perhaps this should be in Mod:mr-IPA instead) --- { 'च्या', 'cā' }, --‘ċ’ palatalised to ‘c’ in genitive oblique --- { 'तुझ्या', 'tujhā' }, --‘j̈h’ palatalised to ‘jh’ in genitive oblique - --- --च word-initial minimal pair --- { 'चार', 'ċār' }, --Word-initial voiceless alveolar affricate, ‘चार’ means ‘four’ --- { 'चार', 'cār' }, --Word-initial voiceless palato-alveolar affricate, ‘चार’ means ‘graze’ or ‘young green grass’ - --- --च word-medial minimal pair --- { 'काचा', 'kāċā' }, --Word-medial voiceless alveolar affricate, ‘काचा’ is the plural form of ‘काच’ kāċ, which means ‘glass’ --- { 'काचा', 'kācā' }, --Word-medial voiceless palato-affricate, ‘काचा’ means ‘button-hole’ or ‘tuck’ - --- --च word-initial and word-medial minimal pair --- { 'चराचर', 'ċarāċar' }, --Word-medial voiceless alveolar affricate, ‘चराचर’ is an onomatopoeia that means ‘tearing sound’ or ‘rapidly’ --- { 'चराचर', 'carācar' }, --Word-medial voiceless palato-affricate, ‘चराचर’ means ‘every created thing, animate or inanimate‘ - --- --ज word-initial minimal pair --- { 'जप', 'j̈ap' }, --Word-initial voiced alveolar affricate, ‘जप’ is an imperative that means ‘be careful’ --- { 'जप', 'jap' }, --Word-initial voiced palato-affricate, ‘जप’ means ‘counting beads’ - --- --ज alveolar affricate word-medial --- { 'मोजणे', 'moj̈ṇe' }, --Word-medial voiced alveolar affricate - --- --ज alveolar affricate word-final --- { 'लाज', 'lāj̈' }, --Word-final voiced alveolar affricate - --- --झ word-initial and word-medial two minimal pairs --- { 'झकझक ', 'j̈hakj̈hak' }, --Word-initial and word-medial breathy-voiced alveolar affricate, means ‘bright light’ --- { 'झकझक', 'jhakjhak' }, --Word-initial and word-medial breathy-voiced palato-alveolar affricate, means ‘complaining’ --- { 'झापड', 'j̈hāpaḍ' }, --Word-initial and word-medial breathy-voiced alveolar affricate, means ‘drowsiness’ --- { 'झापड', 'jhāpaḍ' }, --Word-initial and word-medial breathy-voiced palato-alveolar affricate, means ‘slap’ - --- --One alveolar affricate and one palato-alveolar affricate in a single word --- { 'झीज', 'jhīj̈' }, --‘झीज’ means ‘erosion’, see Wiktionary entry for ‘झिजणे’ --- { 'चीज', 'cīj̈' }, --‘चीज’ means ‘thing’, see Wiktionary entry for ‘चीज़’ --- } - --- self:iterate(examples, 'do_test_translit') --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/my-pron.lua b/wikt/translit/my-pron.lua deleted file mode 100644 index f3857be..0000000 --- a/wikt/translit/my-pron.lua +++ /dev/null @@ -1,575 +0,0 @@ -local export = {} -local gsub = mw.ustring.gsub -local sub = mw.ustring.sub -local match = mw.ustring.match - -local system_list = { - { 1, ["type"] = "phonetic", ["name"] = "IPA" }, - { 2, ["type"] = "orthographic", ["name"] = "MLCTS" }, - { 3, ["type"] = "orthographic", ["name"] = "ALA-LC" }, - { 4, ["type"] = "phonetic", ["name"] = "BGN/PCGN" }, - { 5, ["type"] = "phonetic", ["name"] = "Okell" }, -} - -local initial_table = { - ["က"] = { "k", "k", "k", "k", "k" }, - ["ကျ"] = { "t͡ɕ", "ky", "ky", "ky", "c" }, - ["ကြ"] = { "t͡ɕ", "kr", "kr", "ky", "c" }, - ["ကျွ"] = { "t͡ɕw", "kyw", "kyv", "kyw", "cw" }, - ["ကြွ"] = { "t͡ɕw", "krw", "krv", "kyw", "cw" }, - ["ကွ"] = { "kw", "kw", "kv", "kw", "kw" }, - ["ခ"] = { "kʰ", "hk", "kh", "hk", "hk" }, - ["ချ"] = { "t͡ɕʰ", "hky", "khy", "ch", "hc" }, - ["ခြ"] = { "t͡ɕʰ", "hkr", "khr", "ch", "hc" }, - ["ချွ"] = { "t͡ɕʰw", "hkyw", "khyv", "chw", "hcw" }, - ["ခြွ"] = { "t͡ɕʰw", "hkrw", "khrv", "chw", "hcw" }, - ["ခွ"] = { "kʰw", "hkw", "khv", "hkw", "hkw" }, - ["ဂ"] = { "ɡ", "g", "g", "g", "g" }, - ["ဂျ"] = { "d͡ʑ", "gy", "gy", "gy", "j" }, - ["ဂြ"] = { "d͡ʑ", "gr", "gr", "gy", "j" }, - ["ဂျွ"] = { "d͡ʑw", "gyw", "gyv", "gyw", "jw" }, - ["ဂွ"] = { "ɡw", "gw", "gv", "gw", "gw" }, - ["ဃ"] = { "ɡ", "gh", "gh", "g", "g" }, - ["င"] = { "ŋ", "ng", "ṅ", "ng", "ng" }, - ["ငှ"] = { "ŋ̊", "hng", "ṅh", "hng", "hng" }, - ["ငြ"] = { "ɲ", "ngr", "ṅr", "ny", "ny" }, - ["ငြှ"] = { "ɲ̊", "hngr", "ṅrh", "hny", "hny" }, - ["ငွ"] = { "ŋw", "ngw", "ṅv", "ngw", "ngw" }, - ["ငွှ"] = { "ŋ̊w", "hngw", "ṅvh", "hngw", "hngw" }, - ["စ"] = { "s", "c", "c", "s", "s" }, - ["စွ"] = { "sw", "cw", "cv", "sw", "sw" }, - ["ဆ"] = { "sʰ", "hc", "ch", "hs", "hs" }, - ["ဆွ"] = { "sʰw", "hcw", "chv", "hsw", "hsw" }, - ["ဇ"] = { "z", "j", "j", "z", "z" }, - ["ဇွ"] = { "zw", "jw", "jv", "zw", "zw" }, - ["ဈ"] = { "z", "jh", "jh", "z", "z" }, - ["ဉ"] = { "ɲ", "ny", "ñ", "ny", "ny" }, - ["ည"] = { "ɲ", "ny", "ññ", "ny", "ny" }, - ["ဉှ"] = { "ɲ̊", "hny", "ñh", "hny", "hny" }, - ["ညှ"] = { "ɲ̊", "hny", "ññh", "hny", "hny" }, - ["ညွ"] = { "ɲw", "nyw", "ñv", "nyw", "nyw" }, - ["ညွှ"] = { "ɲ̊w", "hnyw", "ñvh", "hnyw", "hnyw" }, - ["ဋ"] = { "t", "t", "ṭ", "t", "t" }, - ["ဌ"] = { "tʰ", "ht", "ṭh", "ht", "ht" }, - ["ဍ"] = { "d", "d", "ḍ", "d", "d" }, - ["ဎ"] = { "d", "dh", "ḍh", "d", "d" }, - ["ဏ"] = { "n", "n", "ṇ", "n", "n" }, - ["ဏှ"] = { "n̥", "hn", "ṇh", "hn", "hn" }, - ["တ"] = { "t", "t", "t", "t", "t" }, - ["တျ"] = { "tj", "ty", "ty", "ty", "ty" }, - ["တြ"] = { "tɹ", "tr", "tr", "tr", "tr" }, - ["တွ"] = { "tw", "tw", "tv", "tw", "tw" }, - ["ထ"] = { "tʰ", "ht", "th", "ht", "ht" }, - ["ထွ"] = { "tʰw", "htw", "thv", "htw", "htw" }, - ["ဒ"] = { "d", "d", "d", "d", "d" }, - ["ဒျ"] = { "dj", "dy", "dy", "dy", "dy" }, - ["ဒြ"] = { "dɹ", "dr", "dr", "dr", "dr" }, - ["ဒွ"] = { "dw", "dw", "dv", "dw", "dw" }, - ["ဓ"] = { "d", "dh", "dh", "d", "d" }, - ["န"] = { "n", "n", "n", "n", "n" }, - ["နှ"] = { "n̥", "hn", "nh", "hn", "hn" }, - ["နွ"] = { "nw", "nw", "nv", "nw", "nw" }, - ["နွှ"] = { "n̥w", "hnw", "nvh", "hnw", "hnw" }, - ["ပ"] = { "p", "p", "p", "p", "p" }, - ["ပျ"] = { "pj", "py", "py", "py", "py" }, - ["ပြ"] = { "pj", "pr", "pr", "py", "py" }, - ["ပြွ"] = { "pw", "prw", "prv", "pw", "pw" }, - ["ပွ"] = { "pw", "pw", "pv", "pw", "pw" }, - ["ဖ"] = { "pʰ", "hp", "ph", "hp", "hp" }, - ["ဖျ"] = { "pʰj", "hpy", "phy", "hpy", "hpy" }, - ["ဖြ"] = { "pʰj", "hpr", "phr", "hpy", "hpy" }, - ["ဖွ"] = { "pʰw", "hpw", "phv", "hpw", "hpw" }, - ["ဗ"] = { "b", "b", "b", "b", "b" }, - ["ဗျ"] = { "bj", "by", "by", "by", "by" }, - ["ဗြ"] = { "bj", "br", "br", "by", "by" }, - ["ဗွ"] = { "bw", "bw", "bv", "bw", "bw" }, - ["ဘ"] = { "b", "bh", "bh", "b", "b" }, - ["-ဘ"] = { "pʰ", "bh", "bh", "hp", "hp" }, - ["ဘွ"] = { "bw", "bhw", "bhv", "bw", "bw" }, - ["-ဘွ"] = { "pʰw", "bhw", "bhw", "hpw", "hpw" }, - ["မ"] = { "m", "m", "m", "m", "m" }, - ["မှ"] = { "m̥", "hm", "mh", "hm", "hm" }, - ["မျ"] = { "mj", "my", "my", "my", "my" }, - ["မျှ"] = { "m̥j", "hmy", "myh", "hmy", "hmy" }, - ["မြ"] = { "mj", "mr", "mr", "my", "my" }, - ["မြှ"] = { "m̥j", "hmr", "mrh", "hmy", "hmy" }, - ["မြွ"] = { "mw", "mrw", "mrv", "mw", "mw" }, - ["မွ"] = { "mw", "mw", "mv", "mw", "mw" }, - ["မွှ"] = { "m̥w", "hmw", "mvh", "hmw", "hmw" }, - ["ယ"] = { "j", "y", "y", "y", "y" }, - ["ယှ"] = { "ʃ", "hy", "yh", "sh", "hy" }, - ["ယွ"] = { "jw", "yw", "yv", "yw", "yw" }, - ["ရ"] = { "j", "r", "r", "y", "y" }, - ["*ရ"] = { "ɹ", "r", "r", "r", "r" }, - ["ရှ"] = { "ʃ", "hr", "rh", "sh", "hy" }, - ["ရွ"] = { "jw", "rw", "rv", "yw", "yw" }, - ["ရွှ"] = { "ʃw", "hrw", "rvh", "shw", "hyw" }, - ["လ"] = { "l", "l", "l", "l", "l" }, - ["လှ"] = { "l̥", "hl", "lh", "hl", "hl" }, - ["လျ"] = { "j", "ly", "ly", "y", "y" }, - ["*လျ"] = { "lj", "ly", "ly", "ly", "ly" }, - ["လျှ"] = { "ʃ", "hly", "lyh", "sh", "hy" }, - ["*လျှ"] = { "l̥j", "hly", "lyh", "hly", "hly" }, - ["လွ"] = { "lw", "lw", "lv", "lw", "lw" }, - ["လွှ"] = { "l̥w", "hlw", "lvh", "hlw", "hlw" }, - ["ဝ"] = { "w", "w", "v", "w", "w" }, - ["ဝှ"] = { "ʍ", "hw", "vh", "hw", "hw" }, - ["သ"] = { "θ", "s", "s", "th", "th" }, - ["+သ"] = { "ð", "s", "s", "dh", "th" }, - ["သွ"] = { "θw", "sw", "sv", "thw", "thw" }, - ["+သွ"] = { "ðw", "sw", "sw", "dhw", "thw" }, - ["ဟ"] = { "h", "h", "h", "h", "h" }, - ["ဟွ"] = { "hw", "hw", "hv", "hw", "hw" }, - ["ဠ"] = { "l", "l", "ḷ", "l", "l" }, - ["အ"] = { "ʔ", "", "’", "", "" }, - ["ဿ"] = { nil, "ss", "ss", nil, nil }, - [""] = { "ʔ", "", "", "", "" }, - ["-"] = { "", "", "", "", "" }, - - ["ျ"] = { nil, "y", "y", nil, nil }, - ["ြ"] = { nil, "r", "r", nil, nil }, - ["ွ"] = { nil, "w", "w", nil, nil }, -} - -local initial_voicing = { - ["+က"] = "ဂ", - ["+ခ"] = "ဂ", - ["+စ"] = "ဇ", - ["+ဆ"] = "ဇ", - ["+ဋ"] = "ဍ", - ["+ဌ"] = "ဍ", - ["+တ"] = "ဒ", - ["+ထ"] = "ဒ", - ["+ပ"] = "ဗ", - ["+ဖ"] = "ဗ", - ["-ဘ"] = "ဖ", -} - -local final_table = { - [""] = { "a̰", "a.", "a", "a.", "á" }, - ["က်"] = { "ɛʔ", "ak", "ak‘", "et", "eʔ" }, - ["င်"] = { "ɪ̀ɴ", "ang", "aṅ‘", "in", "iñ" }, - ["စ်"] = { "ɪʔ", "ac", "ac‘", "it", "iʔ" }, - ["ည်"] = { "ì", "any", "aññ‘", "i", "i" }, - ["ည်2"] = { "è", "any", "aññ‘", "e", "ei" }, - ["ည်3"] = { "ɛ̀", "any", "aññ‘", "è", "e" }, - ["ဉ်"] = { "ɪ̀ɴ", "any", "añ‘", "in", "iñ" }, - ["တ်"] = { "aʔ", "at", "at‘", "at", "aʔ" }, - ["န်"] = { "àɴ", "an", "an‘", "an", "añ" }, - ["ပ်"] = { "aʔ", "ap", "ap‘", "at", "aʔ" }, - ["မ်"] = { "àɴ", "am", "am‘", "an", "añ" }, - ["ယ်"] = { "ɛ̀", "ai", "ay‘", "è", "e" }, - ["ံ"] = { "àɴ", "am", "aṃ", "an", "añ" }, - ["ာ"] = { "à", "a", "ā", "a", "a" }, - ["ါ"] = { "à", "a", "ā", "a", "a" }, - ["ိ"] = { "ḭ", "i.", "i", "i.", "í" }, - ["ိတ်"] = { "eɪʔ", "it", "it‘", "eik", "eiʔ" }, - ["ိန်"] = { "èɪɴ", "in", "in‘", "ein", "eiñ" }, - ["ိပ်"] = { "eɪʔ", "ip", "ip‘", "eik", "eiʔ" }, - ["ိမ်"] = { "èɪɴ", "im", "im‘", "ein", "eiñ" }, - ["ိံ"] = { "èɪɴ", "im", "iṃ", "ein", "eiñ" }, - ["ီ"] = { "ì", "i", "ī", "i", "i" }, - ["ု"] = { "ṵ", "u.", "u", "u.", "ú" }, - ["ုတ်"] = { "oʊʔ", "ut", "ut‘", "ok", "ouʔ" }, - ["ုန်"] = { "òʊɴ", "un", "un‘", "on", "ouñ" }, - ["ုပ်"] = { "oʊʔ", "up", "up‘", "ok", "ouʔ" }, - ["ုမ်"] = { "òʊɴ", "um", "um‘", "on", "ouñ" }, - ["ုံ"] = { "òʊɴ", "um", "uṃ", "on", "ouñ" }, - ["ူ"] = { "ù", "u", "ū", "u", "u" }, - ["ေ"] = { "è", "e", "e", "e", "ei" }, - ["ဲ"] = { "ɛ́", "ai:", "ai", "è:", "è" }, - ["ော"] = { "ɔ́", "au:", "o", "aw:", "ò" }, - ["ောက်"] = { "aʊʔ", "auk", "ok‘", "auk", "auʔ" }, - ["ောင်"] = { "àʊɴ", "aung", "oṅ‘", "aung", "auñ" }, - ["ော်"] = { "ɔ̀", "au", "o‘", "aw", "o" }, - ["ို"] = { "ò", "ui", "ui", "o", "ou" }, - ["ိုက်"] = { "aɪʔ", "uik", "uik‘", "aik", "aiʔ" }, - ["ိုင်"] = { "àɪɴ", "uing", "uiṅ‘", "aing", "aiñ" }, - ["ွတ်"] = { "ʊʔ", "wat", "vat‘", "ut", "uʔ" }, - ["ွန်"] = { "ʊ̀ɴ", "wan", "van‘", "un", "uñ" }, - ["ွပ်"] = { "ʊʔ", "wap", "vap‘", "ut", "uʔ" }, - ["ွမ်"] = { "ʊ̀ɴ", "wam", "vam‘", "un", "uñ" }, - ["ွံ"] = { "ʊ̀ɴ", "wam", "vaṃ", "un", "uñ" }, - ["'"] = { "ə", "a", "a", "ă", "ă" }, - ["်"] = { "", "", "‘", "", "" }, -} - -local nucleus_table = { - [""] = { "à", "a", "a", "a", "a" }, - ["ိ"] = { "ì", "i", "i", "i", "i" }, - ["ု"] = { "ù", "u", "u", "u", "u" }, - ["ော"] = { "ɔ̀", "au", "o", "aw", "o" }, - ["ေါ"] = { "ɔ̀", "au", "o", "aw", "o" }, - ["ွ"] = { "ʊ̀", "wa", "va", "u", "u" }, -} - -local indep_letter_table = { - ["ဣ"] = { "ḭ", "i.", "i", "i.", "í" }, - ["ဤ"] = { "ì", "i", "ī", "i", "i" }, - ["ဥ"] = { "ṵ", "u.", "u", "u.", "ú" }, - ["ဦ"] = { "ù", "u", "ū", "u", "u" }, - ["ဧ"] = { "è", "e", "e", "e", "ei" }, - ["၏"] = { "ɛ̰", "e", "e*", "è.", "é" }, - ["ဩ"] = { "ɔ́", "au:", "o", "aw:", "ò" }, - ["ဪ"] = { "ɔ̀", "au", "o‘", "aw", "o" }, - ["၌"] = { "n̥aɪʔ", "hnai.", "n*", "hnaik", "hnaiʔ" }, - ["၍"] = { "jwḛ", "rwe", "r*", "ywe.", "yweí" }, -} - -local tone_table = { - ["း"] = { "́", ":", "″", ":", "̀" }, - ["့"] = { "̰", ".", "′", ".", "́" }, -} - -local ambig_intersyl = { - [1] = { - }, - - [2] = { - ["ky"] = 1, ["kr"] = 1, ["kw"] = 1, - ["gy"] = 1, ["gr"] = 1, ["gw"] = 1, - ["ng"] = 1, ["ny"] = 1, - ["cw"] = 1, ["tw"] = 1, ["nw"] = 1, - ["py"] = 1, ["pr"] = 1, ["pw"] = 1, - ["my"] = 1, ["mr"] = 1, ["mw"] = 1, - }, - - [3] = { - }, - - [4] = { - ["ky"] = 1, ["kr"] = 1, ["kw"] = 1, - ["gy"] = 1, ["gr"] = 1, ["gw"] = 1, - ["ng"] = 1, ["ny"] = 1, - ["cw"] = 1, ["tw"] = 1, ["nw"] = 1, - ["tr"] = 1, ["tw"] = 1, - ["py"] = 1, ["pr"] = 1, ["pw"] = 1, - ["my"] = 1, ["mr"] = 1, ["mw"] = 1, - }, - - [5] = { - ["ou"] = 1, - }, -} - -local reverse_table = { - ["hm"] = "မှ", ["m"] = "မ", - ["hn"] = "နှ", ["n"] = "န", - ["hny"] = "ညှ", ["ny"] = "ည", - ["hng"] = "ငှ", ["ng"] = "င", - ["p"] = "ပ", ["hp"] = "ဖ", ["b"] = "ဗ", - ["t"] = "တ", ["ht"] = "ထ", ["d"] = "ဒ", - ["c"] = "ကျ", ["hc"] = "ချ", ["j"] = "ဂျ", - ["k"] = "က", ["hk"] = "ခ", ["g"] = "ဂ", - [""] = "အ", - ["th"] = "သ", ["+th"] = "+သ", - ["s"] = "စ", ["hs"] = "ဆ", ["z"] = "ဇ", - ["hy"] = "ရှ", - ["h"] = "ဟ", - ["r"] = "*ရ", - ["y"] = "ယ", - ["hw"] = "ဝှ", ["w"] = "ဝ", - ["hl"] = "လှ", ["l"] = "လ", - ["hmw"] = "မွှ", ["mw"] = "မွ", ["hmy"] = "မျှ", ["my"] = "မျ", - ["hnw"] = "နွှ", ["nw"] = "နွ", - ["hnyw"] = "ညွှ", ["nyw"] = "ညွ", - ["hngw"] = "ငွှ", ["ngw"] = "ငွ", - ["pw"] = "ပွ", ["hpw"] = "ဖွ", ["bw"] = "ဗွ", - ["py"] = "ပျ", ["hpy"] = "ဖျ", ["by"] = "ဗျ", - ["tw"] = "တွ", ["htw"] = "ထွ", ["dw"] = "ဒွ", - ["cw"] = "ကျွ", ["hcw"] = "ချွ", ["jw"] = "ဂျွ", - ["kw"] = "ကွ", ["hkw"] = "ခွ", ["gw"] = "ဂွ", - ["thw"] = "သွ", - ["sw"] = "စွ", ["hsw"] = "ဆွ", ["zw"] = "ဇွ", - ["hyw"] = "ရွှ", - ["hw"] = "ဟွ", - ["yw"] = "ယွ", - ["hlw"] = "လွှ", ["lw"] = "လွ", ["hly"] = "*လျှ", ["ly"] = "*လျ", - - ["i"] = "ီ", ["i\\"] = "ီး", ["i/"] = "ိ", ["i?"] = "စ်", - ["i~"] = "င်", ["i\\~"] = "င်း", ["i/~"] = "င့်", - ["ei"] = "ေ", ["ei\\"] = "ေး", ["ei/"] = "ေ့", ["ei?"] = "ိတ်", - ["ei~"] = "ိန်", ["ei\\~"] = "ိန်း", ["ei/~"] = "ိန့်", - ["e"] = "ယ်", ["e\\"] = "ဲ", ["e/"] = "ယ့်", ["e?"] = "က်", - ["ai~"] = "ိုင်", ["ai\\~"] = "ိုင်း", ["ai/~"] = "ိုင့်", - ["ai?"] = "ိုက်", - ["a"] = "ာ", ["a\\"] = "ား", ["a/"] = "", ["a?"] = "တ်", - ["a~"] = "န်", ["a\\~"] = "န်း", ["a/~"] = "န့်", - ["o"] = "ော်", ["o\\"] = "ော", ["o/"] = "ော့", ["au?"] = "ောက်", - ["au~"] = "ောင်", ["au\\~"] = "ောင်း", ["au/~"] = "ောင့်", - ["ou"] = "ို", ["ou\\"] = "ိုး", ["ou/"] = "ို့", ["ou?"] = "ုပ်", - ["ou~"] = "ုန်", ["ou\\~"] = "ုန်း", ["ou/~"] = "ုန့်", - ["u"] = "ူ", ["u\\"] = "ူး", ["u/"] = "ု", ["u?"] = "ွတ်", - ["u~"] = "ွန်", ["u\\~"] = "ွန်း", ["u/~"] = "ွန့်", - ["a'"] = "'", -} - -local repl_string = "([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဿ][ျြွှ]*[ံ့းွာါါိီုူေဲ]*)([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဿ][့]?[^့်္])" - -function syllabify(text) - text = gsub(text, "('?)([%+%-%*]*)", function(a, b) - if a .. b ~= "" then return a .. " " .. b end - end) - - text = gsub(text, "([ဣဤဥဦဧဩဪ၏၌၍][့း်]?)(.?)(.?)", function(a, b, c) - return (c == "္" and " "..a..b.." "..c or (c == "်" and " "..a..b..c or " "..a.." "..b..c)) - end) .. " " - - text = gsub(text, "(်း?'?)", "%1 ") - text = gsub(text, "([း့])([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအ]်)", "%2%1") - - while match(text, repl_string) do - text = gsub(text, repl_string, "%1 %2") - end - - text = gsub(text, "္", " , ") - text = gsub(text, " +", " ") - text = gsub(text, "^ ?(.*[^ ]) ?$", "%1") - text = gsub(text, " , ", " ") - text = gsub(text, " ([23])", "%1") - return text -end - -function initial_by_char(initial_string, system_index, ref_table) - local initial_set = {} - for character in mw.text.gsplit(initial_string, "") do - local temp_initial = ref_table[character] or error("Initial data not found.") - table.insert(initial_set, temp_initial[system_index] or temp_initial) - end - return table.concat(initial_set) -end - -function generate_respelling(text) - text = gsub(text, " ", "   ") - text = gsub(text, "ါ", "ာ") - if match(text, "[က-႟ꩠ-ꩻ]") then return text end - text = gsub(text, "(%+?)([^%?%+'/\\~aeiou ]*)(/?)([%?'/\\~aeiou]+)", function(voicing_mark, latin_initial, opt_sep, latin_final) - return - voicing_mark .. - (reverse_table[latin_initial] or initial_by_char(latin_initial, nil, reverse_table)) .. - opt_sep .. - reverse_table[latin_final] - end) - return text -end - -function process(initial, final, tone, schwa, system, system_index) - if match(initial .. final, "ွှ?[တနပမံ]") and system["type"] == "phonetic" then - initial = gsub(initial, "[ွ/]", "") - final = "ွ" .. final - else - initial = gsub(initial, "/", "") - end - - initial_new = system["type"] == "phonetic" and gsub(initial, "%+.", initial_voicing) or initial - - if indep_letter_table[initial_new] then - initial_new = match(initial_new, "[၌၍]") and "-" or "" - final = initial .. final - end - - initial_data = - initial_table[initial_new] - or initial_table[gsub(initial_new, "[%+%-%*]", "")] - or (system["type"] == "orthographic" - and initial_by_char(initial_new, system_index, initial_table) - or error("Initial data not found.")) - - initial_value = initial_data[system_index] or initial_data - - if match(initial, "^%+") and system_index == 5 then - initial_value = initial_table[gsub(initial, "%+", "")][system_index] - initial_value = gsub(initial_value, "^([^rwy]+)", "%1") - end - - final_data = - final_table[system["type"] .. schwa == "phonetic'" and schwa or final] - or (system["type"] == "phonetic" - and (final_table[final .. "်"] or indep_letter_table[final]) - or indep_letter_table[final]) - or gsub(final, "^([^်]*)([^်])(်?)$", function(first, second, third) - first_data = nucleus_table[first] or final_table[first] or indep_letter_table[first] or first - second_data = initial_table[second] or second - first = first_data ~= first and first_data[system_index] or first - second = second_data ~= second - and second_data[system_index] .. ((system_index == 3 and third ~= "") and "‘" or "") - or second - return (gsub(first .. second, "([%.:])(.*)", "%2")) - end) - - final_value = type(final_data) == "table" and final_data[system_index] or final_data - final_value = mw.ustring.toNFD(final_value) - if tone == "" then - tone_value = "" - else - if system_index ~= 4 then final_value = gsub(final_value, "̀", "") end - final_value = gsub(final_value, "[́:%.]", "") - if system["type"] .. schwa == "phonetic'" then - tone_value = "" - else - tone_data = tone_table[tone] or error("Tone data not found.") - tone_value = tone_data[system_index] - end - end - - if system_index == 1 then - final_value = gsub(final_value, "^([aeəɛiɪoɔuʊ])", "%1" .. tone_value) - elseif system_index == 5 then - final_value = gsub(final_value, "([aeiou])([^aeiou]*)$", "%1" .. tone_value .. "%2") - else - final_value = final_value .. tone_value - end - - return mw.ustring.toNFC(initial_value .. final_value) -end - -function remove_wide_space(text) - return (gsub(text, " ", "")) -end - -function concatenate(set, system_index) - if system_index == 1 then return remove_wide_space(table.concat(set)) end - result_text = remove_wide_space(table.concat(set, " ")) - - for count = 1, 3 do - result_text = gsub(result_text, "(.) (.)([^ ]?)", - function(previous, next, after_next) - if ambig_intersyl[system_index][previous .. next] - or ((system_index == 2 or system_index == 4) - and (match(previous .. " " .. next, "[ptkgmngy] [aeiou]") - or (match(previous .. next .. after_next, "[aeiou][ptkmn][rwyg]") and not match(after_next, "[aeiou]")))) then - return previous .. "-" .. next .. after_next - else - return previous .. next .. after_next - end - end) - end - - return result_text -end - -function export.get_romanisation(word, pronunciations, system, system_index, mode) - local sentences = {} - word = gsub(word, " ", "|") - word = syllabify(word) - word = gsub(word, "ါ", "ာ") - if system["type"] == "phonetic" then - word = gsub(word, "ဝ([တနပမံ])", "ဝွ%1") - end - for phrase in mw.text.gsplit(word, "|", true) do - local temp = {} - local syllable = mw.text.split(phrase, " ", true) - for syllable_index = 1, #syllable do - syllable[syllable_index] = gsub(syllable[syllable_index], "([း့])(်)", "%2%1") - temp[syllable_index] = gsub( - syllable[syllable_index], - "^([%+%-%*]*[ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဣဤဥဦဧဩဪ၏၌၍ဿ][ျြ]?ွ?ှ?/?)([^း့']*)([း့]?)('?)$", - function(initial, final, tone, schwa) - return process(initial, final, tone, schwa, system, system_index) - end) - end - table.insert(sentences, concatenate(temp, system_index)) - end - if mode == "translit_module" then return table.concat(sentences, " ") end - table.insert(pronunciations[system_index], table.concat(sentences, " ")) - return pronunciations[system_index] -end - -function respelling_format(phonetic, page_title) - local page_title_set = mw.text.split(syllabify(page_title), " ") - local new_respellings = {} - for _, respelling in ipairs(phonetic) do - local respelling_set = mw.text.split(syllabify(respelling), " ") - if gsub(table.concat(respelling_set), "[%+%-%*']", "") == (gsub(table.concat(page_title_set), "ါ", "ာ")) then - for index, element in ipairs(respelling_set) do - if element ~= page_title_set[index] then - respelling_set[index] = '' .. element .. '' - end - end - end - table.insert(new_respellings, table.concat(respelling_set)) - end - text = table.concat(new_respellings, ", ") - text = remove_wide_space(text) - text = gsub(text, "[%+%-].", initial_voicing) - text = gsub(text, "([ခဂငဒပဝ]ေ?)ာ", "%1ါ") - return text -end - -function export.generate_tests(word, respelling) - respelling, word = generate_respelling(respelling), generate_respelling(word) - local pronunciations = { - [1] = {}, - [2] = {}, - [3] = {}, - [4] = {}, - [5] = {}, - } - local p, result = { ["orthographic"] = word, ["phonetic"] = respelling or word }, {} - - table.sort(system_list, function(first, second) return first[1] < second[1] end) - for system_index, system in ipairs(system_list) do - pronunciations[system_index] = export.get_romanisation(p[system["type"]], pronunciations, system, system_index) - end - for system_index = 1, 5 do - table.insert(result, table.concat(pronunciations[system_index])) - end - return (gsub(gsub(table.concat(result, " | "), "", "("), "", ")")) -end - -function export.make(frame) - local args = frame:getParent().args - local page_title = mw.title.getCurrentTitle().text - local title = generate_respelling(args["word"] or page_title) - - local p, result = { ["orthographic"] = { title }, ["phonetic"] = {} }, {} - local pronunciations = { - [1] = {}, - [2] = {}, - [3] = {}, - [4] = {}, - [5] = {}, - } - - if not args[1] then args = { title } end - for index, item in ipairs(args) do - table.insert(p["phonetic"], (item ~= "") and generate_respelling(item) or nil) - end - - table.sort(system_list, function(first, second) return first[1] < second[1] end) - for system_index, system in ipairs(system_list) do - for _, word in ipairs(p[system["type"]]) do - pronunciations[system_index] = export.get_romanisation(word, pronunciations, system, system_index) - end - end - - if title ~= table.concat(args) then - table.insert(result, - "* Phonetic respelling" .. (#p["phonetic"] > 1 and "s" or "") .. ": " .. - tostring( mw.html.create( "span" ) - :attr( "lang", "my" ) - :attr( "class", "Mymr" ) - :wikitext( respelling_format( p["phonetic"], page_title ))) .. "\n" ) - end - - table.insert(result, - '* [[Wiktionary:International Phonetic Alphabet|IPA]]' .. - '([[Appendix:Burmese pronunciation|key]]): ' .. - - (tostring( mw.html.create( "span" ) - :attr( "class", "IPA" ) - :wikitext( "/" .. gsub(table.concat(pronunciations[1], "/, /"), "ʔʔ", "ʔ.ʔ") .. "/" ))) .. - - '\n* [[Wiktionary:Burmese transliteration|Romanization:]] ') - - for system_index = 2, 5 do - table.insert(result, - (system_index ~= 2 and " • " or "") .. - "''" .. system_list[system_index]["name"] .. ":'' " .. - table.concat(pronunciations[system_index], "/")) - end - - return table.concat(result) -end - -return export \ No newline at end of file diff --git a/wikt/translit/my-translit.lua b/wikt/translit/my-translit.lua deleted file mode 100644 index 8e63f82..0000000 --- a/wikt/translit/my-translit.lua +++ /dev/null @@ -1,201 +0,0 @@ --- This module will transliterate Burmese language text per the MLCTS scheme. --- Language code: mya - -local export = {} -local gsub = mw.ustring.gsub - -local symbols = { - ["၀"] = "0", ["၁"] = "1", ["၂"] = "2", ["၃"] = "3", ["၄"] = "4", - ["၅"] = "5", ["၆"] = "6", ["၇"] = "7", ["၈"] = "8", ["၉"] = "9", - ["၊"] = "|", ["။"] = "||" -} - -function export.tr(text, lang, sc, debug_mode) - local m_pron = require("/usr/local/lib/lua/wikt/translit/my-pron").get_romanisation - text = gsub(text, ".", symbols) - for word in mw.ustring.gmatch(text, "[က-႟ꩠ-ꩻ]+") do - success, translit = pcall(m_pron, word, nil, { 2, ["type"] = "orthographic", ["name"] = "MLCTS" }, 2, "translit_module") - if success then - text = gsub(text, word, translit, 1) - else - return nil - end - end - if mw.ustring.match(text, "[က-႟ꩠ-ꩻ]") and not debug_mode then - return nil - end - return text -end - -return export - --- 2 tests failed. (refresh) - --- test_translit: --- Text Expected Actual Differs at --- Failed သျှိုး (nil) N/A --- Failed ယောက်ျား yaukya: (nil) N/A --- Passed ပြဿနာ pra.ssa.na pra.ssa.na --- Passed ဩစတြီးယား au:ca.tri:ya: au:ca.tri:ya: --- Passed အီကွေဒေါ i-kwedau: i-kwedau: --- Passed ဘင်္ဂလားဒေ့ရှ် bhangga.la:de.hr bhangga.la:de.hr --- Passed ဘလော့ဂ် bha.laug. bha.laug. --- Passed အကျဉ်းထောင် a.kyany:htaung a.kyany:htaung --- Passed တာ့ခ်မင်နစ္စတန် tahk.mangnacca.tan tahk.mangnacca.tan --- Passed နှစ်သစ်ကူး မင်္ဂလာပါ hnacsacku: mangga.lapa hnacsacku: mangga.lapa --- Passed ဆယ့်လေး hcai.le: hcai.le: --- Passed ဗော့ဒ်ကာ၏ baud.kae baud.kae --- Passed သမီး၏ sa.mi:e sa.mi:e --- Passed အီးမေးလ်၏ i:mel:e i:mel:e --- Passed ကြိုဆိုပါ၏ kruihcuipae kruihcuipae --- Passed ဥက္ကဌ ukka.hta. ukka.hta. --- Passed ၊ | | --- Passed ။ || || --- Passed ထိုင်းနိုင်ငံ htuing:nuingngam htuing:nuingngam --- Passed ယိုးဒယားနိုင်ငံ yui:da.ya:nuingngam yui:da.ya:nuingngam --- Passed ကိုယ့်မင်းကိုယ့်ချင်း kuiy.mang:kuiy.hkyang: kuiy.mang:kuiy.hkyang: --- Passed ကမ်းခြေ kam:hkre kam:hkre --- Passed ဝိဇ္ဇာ wijja wijja --- Passed ဒုက္ခ dukhka. dukhka. --- Passed ကဏ္ဍ kanda. kanda. --- Passed မန္တလေး manta.le: manta.le: --- Passed ပိဿာ pi.ssa pi.ssa --- Passed ကြက်ဆင် krakhcang krakhcang --- Passed မြန်မာဘာသာ mranmabhasa mranmabhasa --- Passed ဒူးရင်းသီး du:rang:si: du:rang:si: --- Passed ကျောင်းသူ kyaung:su kyaung:su --- Passed အင်္ဂလိပ် angga.lip angga.lip --- Passed ကမ္ဘာ kambha kambha --- Passed ရင်ဘတ် rangbhat rangbhat --- Passed ရုပ်ရှင်ရုံ ruphrang-rum ruphrang-rum --- Passed နွေဦးပေါက် nweu:pauk nweu:pauk --- Passed စာကြည့်တိုက် ca-krany.tuik ca-krany.tuik --- Passed နှာခေါင်း hnahkaung: hnahkaung: --- Passed ဆောင်းဦး hcaung:u: hcaung:u: --- Passed ဗမာစကား ba.maca.ka: ba.maca.ka: --- Passed သန်ကောင် sankaung sankaung --- Passed သည်းခြေ sany:hkre sany:hkre --- Passed သင်္ချိုင်း sanghkyuing: sanghkyuing: --- Passed နာမည် namany namany --- Passed ဒေါင်း daung: daung: --- Passed ရင်ပတ် rangpat rangpat --- Passed မျောက် myauk myauk --- Passed မျက်စိ myakci. myakci. --- Passed မိန်းမ min:ma. min:ma. --- Passed လွတ်လပ်ခွင့် lwatlaphkwang. lwatlaphkwang. --- Passed လှေ hle hle --- Passed ဆေးရုံ hce:rum hce:rum --- Passed ဘာသာဗေဒ bhasabeda. bhasabeda. --- Passed ဘူတာရုံ bhutarum bhutarum --- Passed တက္ကသိုလ် takka.suil takka.suil --- Passed တရုတ်စကား ta.rutca.ka: ta.rutca.ka: --- Passed ဗုဒ္ဓ buddha. buddha. --- Passed ဂြိုဟ် gruih gruih --- Passed အင်္ဂလိပ်ဘာသာ angga.lipbhasa angga.lipbhasa --- Passed အဏ္ဏဝါ anna.wa anna.wa --- Passed ဦးချို u:hkyui u:hkyui --- Passed အိပ်ရာ ip-ra ip-ra --- Passed အဘိဓာန် a.bhi.dhan a.bhi.dhan --- Passed လမ်းလျှောက် lam:hlyauk lam:hlyauk --- Passed ရန်ဖြစ် ranhprac ranhprac --- Passed အော့အန် au.an au.an --- Passed ပြည်ထောင်စု မြန်မာ နိုင်ငံတော် pranyhtaungcu. mranma nuingngamtau pranyhtaungcu. mranma nuingngamtau --- Passed အိန္ဒိယနိုင်ငံ indi.ya.nuingngam indi.ya.nuingngam --- Passed ရန်ကုန်မြို့ rankunmrui. rankunmrui. --- Passed ထိုင်ဝမ် htuing-wam htuing-wam --- Passed အီတလီနိုင်ငံ ita.linuingngam ita.linuingngam --- Passed ရွှေဖရုံသီး hrwehpa.rumsi: hrwehpa.rumsi: --- Passed ဖြစ်ခေါင့်ဖြစ်ခဲ hprachkaung.hprachkai: hprachkaung.hprachkai: --- Passed ခေါင့် hkaung. hkaung. --- Passed ဝက်အူ wak-u wak-u --- Passed ဥစ် uc uc --- Passed မလာနိုင်ပါဘူး။ ma.lanuingpabhu:|| ma.lanuingpabhu:|| --- local tests = require('Module:UnitTests') --- local my_translit = require('Module:my-translit') - --- function tests:check_tr(Mymr, Latn) --- self:equals(('[[%s#Burmese|%s]]'):format(Mymr, Mymr), my_translit.tr(Mymr, 'my', 'Mymr'), Latn) --- end - --- function tests:test_translit() - --- self:check_tr("သျှိုး", "") --- self:check_tr("ယောက်ျား", "yaukya:") --- self:check_tr("ပြဿနာ", "pra.ssa.na") --- self:check_tr("ဩစတြီးယား", "au:ca.tri:ya:") --- self:check_tr("အီကွေဒေါ", "i-kwedau:") --- self:check_tr("ဘင်္ဂလားဒေ့ရှ်", "bhangga.la:de.hr") --- self:check_tr("ဘလော့ဂ်", "bha.laug.") --- self:check_tr("အကျဉ်းထောင်", "a.kyany:htaung") --- self:check_tr("တာ့ခ်မင်နစ္စတန်", "tahk.mangnacca.tan") --- self:check_tr("နှစ်သစ်ကူး မင်္ဂလာပါ", "hnacsacku: mangga.lapa") --- self:check_tr("ဆယ့်လေး", "hcai.le:") --- self:check_tr("ဗော့ဒ်ကာ၏", "baud.kae") --- self:check_tr("သမီး၏", "sa.mi:e") --- self:check_tr("အီးမေးလ်၏", "i:mel:e") --- self:check_tr("ကြိုဆိုပါ၏", "kruihcuipae") --- self:check_tr("ဥက္ကဌ", "ukka.hta.") --- self:check_tr("၊", "|") --- self:check_tr("။", "||") --- self:check_tr("ထိုင်းနိုင်ငံ", "htuing:nuingngam") --- self:check_tr("ယိုးဒယားနိုင်ငံ", "yui:da.ya:nuingngam") --- self:check_tr("ကိုယ့်မင်းကိုယ့်ချင်း", "kuiy.mang:kuiy.hkyang:") --- self:check_tr("ကမ်းခြေ", "kam:hkre") --- self:check_tr("ဝိဇ္ဇာ", "wijja") --- self:check_tr("ဒုက္ခ", "dukhka.") --- self:check_tr("ကဏ္ဍ", "kanda.") --- self:check_tr("မန္တလေး", "manta.le:") --- self:check_tr("ပိဿာ", "pi.ssa") --- self:check_tr("ကြက်ဆင်", "krakhcang") --- self:check_tr("မြန်မာဘာသာ", "mranmabhasa") --- self:check_tr("ဒူးရင်းသီး", "du:rang:si:") --- self:check_tr("ကျောင်းသူ", "kyaung:su") --- self:check_tr("အင်္ဂလိပ်", "angga.lip") --- self:check_tr("ကမ္ဘာ", "kambha") --- self:check_tr("ရင်ဘတ်", "rangbhat") --- self:check_tr("ရုပ်ရှင်ရုံ", "ruphrang-rum") --- self:check_tr("နွေဦးပေါက်", "nweu:pauk") --- self:check_tr("စာကြည့်တိုက်", "ca-krany.tuik") --- self:check_tr("နှာခေါင်း", "hnahkaung:") --- self:check_tr("ဆောင်းဦး", "hcaung:u:") --- self:check_tr("ဗမာစကား", "ba.maca.ka:") --- self:check_tr("သန်ကောင်", "sankaung") --- self:check_tr("သည်းခြေ", "sany:hkre") --- self:check_tr("သင်္ချိုင်း", "sanghkyuing:") --- self:check_tr("နာမည်", "namany") --- self:check_tr("ဒေါင်း", "daung:") --- self:check_tr("ရင်ပတ်", "rangpat") --- self:check_tr("မျောက်", "myauk") --- self:check_tr("မျက်စိ", "myakci.") --- self:check_tr("မိန်းမ", "min:ma.") --- self:check_tr("လွတ်လပ်ခွင့်", "lwatlaphkwang.") --- self:check_tr("လှေ", "hle") --- self:check_tr("ဆေးရုံ", "hce:rum") --- self:check_tr("ဘာသာဗေဒ", "bhasabeda.") --- self:check_tr("ဘူတာရုံ", "bhutarum") --- self:check_tr("တက္ကသိုလ်", "takka.suil") --- self:check_tr("တရုတ်စကား", "ta.rutca.ka:") --- self:check_tr("ဗုဒ္ဓ", "buddha.") --- self:check_tr("ဂြိုဟ်", "gruih") --- self:check_tr("အင်္ဂလိပ်ဘာသာ", "angga.lipbhasa") --- self:check_tr("အဏ္ဏဝါ", "anna.wa") --- self:check_tr("ဦးချို", "u:hkyui") --- self:check_tr("အိပ်ရာ", "ip-ra") --- self:check_tr("အဘိဓာန်", "a.bhi.dhan") --- self:check_tr("လမ်းလျှောက်", "lam:hlyauk") --- self:check_tr("ရန်ဖြစ်", "ranhprac") --- self:check_tr("အော့အန်", "au.an") --- self:check_tr("ပြည်ထောင်စု မြန်မာ နိုင်ငံတော်", "pranyhtaungcu. mranma nuingngamtau") --- self:check_tr("အိန္ဒိယနိုင်ငံ", "indi.ya.nuingngam") --- self:check_tr("ရန်ကုန်မြို့", "rankunmrui.") --- self:check_tr("ထိုင်ဝမ်", 'htuing-wam') --- self:check_tr("အီတလီနိုင်ငံ", 'ita.linuingngam') --- self:check_tr("ရွှေဖရုံသီး", 'hrwehpa.rumsi:') --- self:check_tr("ဖြစ်ခေါင့်ဖြစ်ခဲ", 'hprachkaung.hprachkai:') --- self:check_tr("ခေါင့်", "hkaung.") --- self:check_tr("ဝက်အူ", "wak-u") --- self:check_tr("ဥစ်", "uc") --- self:check_tr("မလာနိုင်ပါဘူး။", "ma.lanuingpabhu:||") --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/myv-translit.lua b/wikt/translit/myv-translit.lua deleted file mode 100644 index d382b4c..0000000 --- a/wikt/translit/myv-translit.lua +++ /dev/null @@ -1,28 +0,0 @@ --- This module will transliterate Erzya language text per WT:MYV TR. --- Language code: myv -local export = {} - -local tab = { - ["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="E", ["Ё"]="Jo", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["Й"]="J", - ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["О"]="O", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="U", ["Ф"]="F", - ["Х"]="X", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="ʺ", ["Ы"]="Y", ["Ь"]="ʹ", ["Э"]="E", ["Ю"]="Ju", ["Я"]="Ja", - ['а']='a', ['б']='b', ['в']='v', ['г']='g', ['д']='d', ['е']='e', ['ё']='jo', ['ж']='ž', ['з']='z', ['и']='i', ['й']='j', - ['к']='k', ['л']='l', ['м']='m', ['н']='n', ['о']='o', ['п']='p', ['р']='r', ['с']='s', ['т']='t', ['у']='u', ['ф']='f', - ['х']='x', ['ц']='c', ['ч']='č', ['ш']='š', ['щ']='šč', ['ъ']='ʺ', ['ы']='y', ['ь']='ʹ', ['э']='e', ['ю']='ju', ['я']='ja', -} - -function export.tr(text, lang, sc) - -- Ё needs converting if is decomposed - text = text:gsub("ё","ё"):gsub("Ё","Ё") - - -- е after a vowel or at the beginning of a word becomes je - text = mw.ustring.gsub(text, "([АОУЫЕЯЁЮИЕЪЬаоуыэяёюиеъь%A][́̀]?)е","%1je") - text = mw.ustring.gsub(text, "^Е","Je") - text = mw.ustring.gsub(text, "^е","je") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е","%1Je") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е","%1je") - - return (mw.ustring.gsub(text,'.',tab)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/ne-translit.lua b/wikt/translit/ne-translit.lua deleted file mode 100644 index 2be0aee..0000000 --- a/wikt/translit/ne-translit.lua +++ /dev/null @@ -1,100 +0,0 @@ --- Transliteration for Nepali --- This module will transliterate Nepali language text per WT:NE TR. It is also used to transliterate Doteli (dty). - -local export = {} -local gsub = mw.ustring.gsub -local match = mw.ustring.match - -local conv = { - -- consonants - ['क'] = 'k', ['ख'] = 'kh', ['ग'] = 'g', ['घ'] = 'gh', ['ङ'] = 'ṅ', - ['च'] = 'c', ['छ'] = 'ch', ['ज'] = 'j', ['झ'] = 'jh', ['ञ'] = 'ñ', - ['ट'] = 'ṭ', ['ठ'] = 'ṭh', ['ड'] = 'ḍ', ['ढ'] = 'ḍh', ['ण'] = 'ṇ', - ['त'] = 't', ['थ'] = 'th', ['द'] = 'd', ['ध'] = 'dh', ['न'] = 'n', - ['प'] = 'p', ['फ'] = 'ph', ['ब'] = 'b', ['भ'] = 'bh', ['म'] = 'm', - ['य'] = 'y', ['र'] = 'r', ['ल'] = 'l', ['व'] = 'w', ['ळ'] = 'ḷ', - ['श'] = 'ś', ['ष'] = 'ṣ', ['स'] = 's', ['ह'] = 'h', - ['क़'] = 'q', ['ख़'] = 'x', ['ग़'] = 'ġ', ['व़'] = 'v', ['ऴ'] = 'ḻ', - ['ज़'] = 'z', ['झ़'] = 'ž', ['ड़'] = 'ṛ', ['ढ़'] = 'ṛh', - ['फ़'] = 'f', ['थ़'] = 'θ', ['ऩ'] = 'ṉ', ['ऱ'] = 'ṟ', - ['ॽ'] = "'", - - -- vowel diacritics - ['ि'] = 'i', ['ु'] = 'u', ['े'] = 'e', ['ो'] = 'o', - ['ा'] = 'ā', ['ी'] = 'ī', ['ू'] = 'ū', - ['ृ'] = 'ŕ', - ['ै'] = 'ai', ['ौ'] = 'au', - ['ॉ'] = 'ŏ', - ['ॅ'] = 'ĕ', - - -- vowel signs - ['अ'] = 'a', ['इ'] = 'i', ['उ'] = 'u', ['ए'] = 'e', ['ओ'] = 'o', - ['आ'] = 'ā', ['ई'] = 'ī', ['ऊ'] = 'ū', - ['ऋ'] = 'ŕ', - ['ऐ'] = 'ai', ['औ'] = 'au', - ['ऑ'] = 'ŏ', - ['ऍ'] = 'ĕ', - - -- chandrabindu - ['ँ'] = '̃', - - -- anusvara - ['ं'] = '̃', - - -- visarga - ['ः'] = 'ḥ', - - -- virama - ['्'] = '', - - -- om - ['ॐ'] = 'oṁ', - - -- numerals - ['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4', ['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9', - - -- punctuation - ['।'] = '.', -- danda - ['+'] = '', -- compound separator -} - -local all_cons, special_cons = 'कखगघङचछजझञटठडढतथदधपफबभशषसयरलवहणनम', 'यरलवहनम' -local vowel = 'aिुृेोाीूैौॉॅ' - -function export.tr(text, lang, sc, reduction) - text = gsub( - text, - '([' .. all_cons .. ']़?)([' .. vowel .. '्]?)', - function(c, d) - return c .. ( d == "" and 'a' or d ) - end - ) - if reduction and not match(text, "[<>]") then - for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do - text = gsub(text, word, "<" .. word .. ">") - end - end - for word in mw.ustring.gmatch(text, "<[^<>]+>") do - local orig_word = word - word = gsub(word, "[<>]", "") - word = gsub( - word, - '(.?)(.)([' .. all_cons .. '])(़?)a$', - function(pre, first, second, opt) - local last = "" - if match(second, '[' .. special_cons .. ']') and match(first, '्') or - match(second .. first, '[ीेै]य') or - pre == second and first == "्" then - last = 'a' - end - return pre .. first .. second .. opt .. last - end - ) - text = gsub(text, orig_word, word) - end - text = gsub(text, ".़?", conv) - text = gsub(text, "[<>]", "") - return mw.ustring.toNFC(text) -end - -return export \ No newline at end of file diff --git a/wikt/translit/new-translit.lua b/wikt/translit/new-translit.lua deleted file mode 100644 index d662258..0000000 --- a/wikt/translit/new-translit.lua +++ /dev/null @@ -1,87 +0,0 @@ --- This module will transliterate Newari language text. - -local export = {} - -local consonants = { - ['क']='k', ['ख']='kh', ['ग']='g', ['घ']='gh', ['ङ']='ṅ', - ['च']='c', ['छ']='ch', ['ज']='j', ['झ']='jh', ['ञ']='ñ', - ['ट']='ṭ', ['ठ']='ṭh', ['ड']='ḍ', ['ढ']='ḍh', ['ण']='ṇ', - ['त']='t', ['थ']='th', ['द']='d', ['ध']='dh', ['न']='n', - ['प']='p', ['फ']='ph', ['ब']='b', ['भ']='bh', ['म']='m', - ['य']='y', ['र']='r', ['ल']='l', ['व']='v', ['ळ']='ḷ', - ['श']='ś', ['ष']='ṣ', ['स']='s', ['ह']='h', -} - -local diacritics = { - ['ा']='ā', ['ि']='i', ['ी']='ī', ['ु']='u', ['ू']='ū', ['ृ']='ṛ', ['ॄ']='ṝ', - ['ॢ']='ḷ', ['ॣ']='ḹ', ['े']='e', ['ै']='ai', ['ो']='o', ['ौ']='au', ['्']='', -} - -local tt = { - -- vowels - ['अ']='a', ['आ']='ā', ['इ']='i', ['ई']='ī', ['उ']='u', ['ऊ']='ū', ['ऋ']='ṛ', ['ॠ']='ṝ', - ['ऌ']='ḷ', ['ॡ']='ḹ', ['ए']='e', ['ऐ']='ai', ['ओ']='o', ['औ']='au', - -- chandrabindu - ['ँ']='̃', - -- anusvara - ['ं']='̃', - -- visarga - ['ः']='ː', - -- avagraha - ['ऽ']='’', - --numerals - ['०']='0', ['१']='1', ['२']='2', ['३']='3', ['४']='4', ['५']='5', ['६']='6', ['७']='7', ['८']='8', ['९']='9', - --punctuation - ['॥']='.', --double danda - ['।']='.', --danda - --Om - ['ॐ']='oṃ', -} - -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - '([कखगघङचछजझञटठडढणतथदधनपफबभमयरलळवशषसह])'.. - '([ािीुूृॄॢॣेैोौ्]?)', - function(c, d) - if d == "" then - return consonants[c] .. 'a' - else - return consonants[c] .. diacritics[d] - end - end) - - text = mw.ustring.gsub(text, '.', tt) - - return mw.ustring.toNFC(text) -end - -return export - --- Text Expected Actual Differs at --- Passed सर्गः sargaː sargaː --- Passed सँक्वः sãkvaː sãkvaː --- Passed प्याखं pyākhã pyākhã --- Passed नवःघाँय् navaːghā̃y navaːghā̃y --- Passed छेँ chẽ chẽ --- -- Unit tests for [[Module:new-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local new_translit = require('Module:new-translit') - --- --TO DO --- function tests:do_test_translit(sans, roman, xlit) --- return self:equals('[[' .. sans .. '#Newari|' .. sans .. ']]', new_translit.tr(sans, 'new', 'Deva'), roman) --- end - --- function tests:test_translit_newari() --- local examples = { --- { 'सर्गः', 'sargaː' }, --- { 'सँक्वः', 'sãkvaː' }, --- { 'प्याखं', 'pyākhã' }, --- { 'नवःघाँय्', 'navaːghā̃y' }, --- { 'छेँ', 'chẽ' }, --- } --- return self:iterate(examples, "do_test_translit") --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/niv-translit.lua b/wikt/translit/niv-translit.lua deleted file mode 100644 index d45e3a7..0000000 --- a/wikt/translit/niv-translit.lua +++ /dev/null @@ -1,122 +0,0 @@ --- This module will transliterate Nivkh language text. - -local u = mw.ustring.char - -local UTF8_char = "[\1-\127\194-\244][\128-\191]*" - -local export = {} - -local tab = { - ["А"]='A', ["а"]='a', ["В"]='V', ["в"]='v', ["Е"]='E', ["е"]='e', - ["Ё"]='Jo', ["ё"]='jo', ["Г"]='G', ["г"]='g', ["Ӷ"]='', ["ӷ"]='', - ["Ғ"]='Ꜧ', ["ғ"]='ꜧ', ["Ӻ"]='Ꜧ̇', ["ӻ"]='ꜧ̇', ["Д"]='D', ["д"]='d', - ["И"]='I', ["и"]='i', ["Й"]='J', ["й"]='j', - ["К"]='K', ["к"]='k', ["Ӄ"]='Q', ["ӄ"]='q', ["Л"]='L', ["л"]='l', - ["М"]='M', ["м"]='m', ["Н"]='N', ["н"]='n', ["Ӈ"]='Ŋ', ["ӈ"]='ŋ', - ["О"]='O', ["о"]='o', ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r', - ["Р̌"]='Ř', ["р̌"]='ř', ["С"]='S', ["с"]='s', ["Т"]='T', ["т"]='t', - ["У"]='U', ["у"]='u', ["Ф"]='F', ["ф"]='f', ["Х"]='X', ["х"]='x', - ["Ӽ"]='Ẋ', ["ӽ"]='ẋ', ["Ӿ"]='H', ["ӿ"]='h', ["Ч"]='Ț’', ["ч"]='ț’', - ["Ы"]='Ə', ["ы"]='ə', ["Э"]='E', ["э"]='e', ["Ю"]='Ju', ["ю"]='ju', - ["Я"]='Ja', ["я"]='ja', ["’"]='ʼ', ["ʼ"]='ʼ', - -- non-native letters - ["Б"]='B', ["б"]='b', ["Ж"]='ž', ["ж"]='ž', ["З"]='Z', ["з"]='z', - ["Ц"]='C', ["ц"]='c', ["Ш"]='Š', ["ш"]='š', ["Щ"]='Šč', ["щ"]='šč', - ['Ъ']='ʺ', ['ъ']='ʺ', ["Ь"]="’", ["ь"]="’" -} - -local palatal = { - { 'Дj', 'D̦' }, - { 'дj', 'd̦' }, - { 'Нj', 'Ņ' }, - { 'нj', 'ņ' }, - { 'Тj', 'Ț' }, - { 'тj', 'ț' }, -} - -function export.tr(text, lang, sc) - --[=[ - Unfortunately the Cyrillic alphabet doesn't distinguish between ţi and ti - or ţe and te and so on. - - Represent iotation with j to allow the palatal consonant replacements. - ]=] - text = string.gsub(text, UTF8_char, - { - ['Я'] = 'Ja', ['я'] = 'ja', - ['Ё'] = 'Jo', ['ё'] = 'jo', - ['Ю'] = 'Ju', ['ю'] = 'ju', - ['ь'] = 'j' - } - ) - - for _, item in ipairs(palatal) do - text = string.gsub(text, unpack(item)) - end - - local vowels = {} - for char in string.gmatch("АОУЫЕИЪЬаӣиоуыэеъьaeiou", UTF8_char) do - vowels[char] = true - end - text = mw.ustring.gsub(text, - "(.?)([Ее])", - function (preceding, e) - -- modifier letter apostrophe or right single quotation mark - local capital = e == "Е" - if preceding == "ʼ" or preceding == "’" then - e = capital and "E" or "e" - elseif preceding == "" or vowels[preceding] or mw.ustring.find(preceding, "[^Ѐ-ӿ]") then - e = capital and "Je" or "je" - else - mw.log("Module:niv-translit could not decide how to transliterate " .. e .. - " after " .. preceding .. ".") - end - return preceding .. e - end) - - return string.gsub(text, UTF8_char, tab) -end - -return export - - --- 1 test failed. (refresh) - --- test_translit: --- Text Expected Actual --- Passed ераӄ jeraq jeraq --- Passed маёдь majod̦ majod̦ --- Passed поезд pojezd pojezd --- Passed няӽ ņaẋ ņaẋ --- Passed няӻр ņaꜧ̇r ņaꜧ̇r --- Failed атьх aţx ațx --- Passed кʼеӄ kʼeq kʼeq --- Passed иф урладьғугирпарк ескидь if urlad̦ꜧugirpark jeskid̦ if urlad̦ꜧugirpark jeskid̦ --- Passed нюдь ņud̦ ņud̦ --- Passed ӿилх hilx hilx --- local tests = require("Module:UnitTests") --- local translit = require("Module:niv-translit") - --- function tests:check_translit(Cyrl, Latn) --- self:equals(('[[%s#Nivkh|%s]]'):format(Cyrl, Cyrl), translit.tr(Cyrl, 'niv', 'Cyrl'), Latn) --- end - --- function tests:test_translit() --- local examples = { --- { "ераӄ", "jeraq" }, --- { "маёдь", "majod̦" }, --- { "поезд", "pojezd" }, --- { "няӽ", "ņaẋ" }, --- { "няӻр", "ņaꜧ̇r" }, --- { "атьх", "aţx" }, --- { "кʼеӄ", "kʼeq" }, --- { "иф урладьғугирпарк ескидь", "if urlad̦ꜧugirpark jeskid̦" }, --- { "нюдь", "ņud̦" }, --- { "ӿилх", "hilx" }, --- } - --- self:iterate(examples, "check_translit") - --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/nog-translit.lua b/wikt/translit/nog-translit.lua deleted file mode 100644 index ad495e3..0000000 --- a/wikt/translit/nog-translit.lua +++ /dev/null @@ -1,47 +0,0 @@ --- This module will transliterate Nogai language text per WT:NOG TR. - -local export = {} - -local tt = { - ["а"]="a", ["б"]="b", ["в"]="v", ["г"]="g", ["д"]="d", ["е"]="e", ["ё"]="yo", ["ж"]="j", - ["з"]="z", ["и"]="i", ["й"]="y", ["к"]="k", ["л"]="l", ["м"]="m", ["н"]="n", ["о"]="o", - ["п"]="p", ["р"]="r", ["с"]="s", ["т"]="t", ["у"]="u", ["ф"]="f", ["х"]="x", ["ц"]="c", - ["ч"]="ç", ["ш"]="ş", ["щ"]="şç", ["ъ"]="”", ["ы"]="ı", ["ь"]="’", ["э"]="é", ["ю"]="yu", - ["я"]="ya", ["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="E", ["Ё"]="Yo", ["Ж"]="J", - ["З"]="Z", ["И"]="I", ["Й"]="Y", ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["О"]="O", - ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="U", ["Ф"]="F", ["Х"]="X", ["Ц"]="C", - ["Ч"]="Ç", ["Ш"]="Ş", ["Щ"]="Şç", ["Ъ"]="”", ["Ы"]="I", ["Ь"]="’", ["Э"]="É", ["Ю"]="Yu", - ["Я"]="Ya"}; - -local digraphs = { - ['аь'] = 'ä', - ['Аь'] = 'Ä', - ['нъ'] = 'ñ', - ['Нъ'] = 'Ñ', - ['оь'] = 'ö', - ['Оь'] = 'Ö', - ['уь'] = 'ü', - ['Уь'] = 'Ü', -} - -function export.tr(text) - text = mw.ustring.gsub( - text, - "([АОУЫЕЯЁЮИЕаоуыэяёюиеь%A][́̀]?)([Ее])", - function(a,e) - return a .. (e == 'е' and 'ye' or 'Ye') - end - ) - :gsub("^Е",'Ye') - :gsub("^е",'ye'); - - for digraph, translit in pairs(digraphs) do - text = mw.ustring.gsub(text, digraph, translit) - end - - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export diff --git a/wikt/translit/nsk-translit.lua b/wikt/translit/nsk-translit.lua deleted file mode 100644 index b084e8a..0000000 --- a/wikt/translit/nsk-translit.lua +++ /dev/null @@ -1,32 +0,0 @@ --- This module will transliterate Naskapi language text. - -local export = {} - -local replacements = { - ["c"] = "ch", - ["ī"] = "ii", - ["ō"] = "oo", - ["ā"] = "aa", - ["o"] = "u", - ["š"] = "sh", - ["ð"] = "th", - ["(.)ː"] = "%1%1", - "[ᔌᔍᔎᔏ]", { - ["ᔌ"]="spwaa", - ["ᔍ"]="stwaa", - ["ᔎ"]="skwaa", - ["ᔏ"]="schwaa", - } -} - -function export.tr(text, lang, sc) - text = require("Module:Cans-translit").tr(text, lang, sc) - - for regex, replacement in pairs(replacements) do - text = mw.ustring.gsub(text, regex, replacement) - end - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/or-translit.lua b/wikt/translit/or-translit.lua deleted file mode 100644 index 11969a0..0000000 --- a/wikt/translit/or-translit.lua +++ /dev/null @@ -1,98 +0,0 @@ --- This module will transliterate Oriya language text. Odia --- Language code: ori ---[[ - ori – inclusive code -Individual codes: -ory – Odia -spv – Sambalpuri -bpv – Dakshni -ort – Adivasi Odia (Kotia) -dso – Desiya (duplicate of [ort])[3] -]] - -local export = {} - -local consonants = { - ['କ']='k', ['ଖ']='kh', ['ଗ']='g', ['ଘ']='gh', ['ଙ']='ṅ', - ['ଚ']='c', ['ଛ']='ch', ['ଜ']='j', ['ଝ']='jh', ['ଞ']='ñ', - ['ଟ']='ṭ', ['ଠ']='ṭh', ['ଡ']='ḍ', ['ଢ']='ḍh', ['ଣ']='ṇ', - ['ତ']='t', ['ଥ']='th', ['ଦ']='d', ['ଧ']='dh', ['ନ']='n', - ['ପ']='p', ['ଫ']='ph', ['ବ']='b', ['ଵ']='v', ['ଭ']='bh', ['ମ']='m', - ['ଯ']='j', ['ୟ']='y', ['ର']='r', ['ଲ']='l', ['ଳ']='ḷ', ['ୱ']='w', - ['ଶ']='ś', ['ଷ']='ṣ', ['ସ']='s', ['ହ']='h', - ['ଡ଼']='ṛ', ['ଢ଼']='ṛh', -} - -local diacritics = { - ['ା']='a', ['ି']='i', ['ୀ']='ī', ['ୁ']='u', ['ୂ']='ū', ['ୃ']='ru', ['ୄ']='rū', - ['ୢ']='lu', ['ୣ']='lū', ['େ']='e', ['ୈ']='ôi', ['ୖ']='ôi', ['ୋ']='o', ['ୌ']='ôu', ['ୗ']='ôu', - ['୍']='', -} - -local tt = { - -- vowels - ['ଅ']='ô', ['ଆ']='a', ['ଇ']='i', ['ଈ']='ī', ['ଉ']='u', ['ଊ']='ū', ['ଋ']='ru', ['ୠ']='rū', - ['ଌ']='lu', ['ୡ']='lū', ['ଏ']='e', ['ଐ']='ôi', ['ଓ']='o', ['ଔ']='ôu', - -- chandrabindu - ['ଁ']='m̐', --until a better method is found - -- anusvara - ['ଂ']='ṃ', --until a better method is found - -- visarga - ['ଃ ']='ḥ', - -- avagraha - ['ଽ']='’', - --numerals - ['୦']='0', ['୧']='1', ['୨']='2', ['୩']='3', ['୪']='4', ['୫']='5', ['୬']='6', ['୭']='7', ['୮']='8', ['୯']='9', - ['୲']='¼', ['୳']='½', ['୴']='¾', ['୵']='¹⁄₁₆', ['୶']='⅛', ['୷']='³⁄₁₆', - --punctuation - ['।']='.', --danda -} - -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - '([କଖଗଘଙଚଛଜଝଞଟଠଡଢଣତଥଦଧନପଫବଵଭମଯୟରଲଳୱଶଷସହ]଼?)'.. - '([ାିୀୁୂୃୄେୈୖୋୌୗ୍ୢୣ]?)', - function(c, d) - if d == "" then - return consonants[c] .. 'ô' - else - return consonants[c] .. diacritics[d] - end - end) - - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export - --- All tests passed. (refresh) - --- test_translit_oriya: --- Text Expected Actual Differs at --- Passed ଓଡ଼ିଆ oṛia oṛia --- Passed ଓଡ଼ିଶା oṛiśa oṛiśa --- Passed ଚାରି cari cari --- Passed ଏକ ekô ekô --- Passed ପାଞ୍ଚ pañcô pañcô --- -- Unit tests for [[Module:or-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local or_translit = require('Module:or-translit') - --- --TO DO --- function tests:do_test_translit(orya, roman, xlit) --- self:equals('[[' .. orya .. '#Oriya|' .. orya .. ']]', or_translit.tr(orya, 'or', 'Orya'), roman) --- end - --- function tests:test_translit_oriya() --- self:do_test_translit('ଓଡ଼ିଆ', 'oṛia') --- self:do_test_translit('ଓଡ଼ିଶା', 'oṛiśa') --- self:do_test_translit('ଚାରି', 'cari') --- self:do_test_translit('ଏକ', 'ekô') --- self:do_test_translit('ପାଞ୍ଚ', 'pañcô') - --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/os-translit.lua b/wikt/translit/os-translit.lua deleted file mode 100644 index 909dd44..0000000 --- a/wikt/translit/os-translit.lua +++ /dev/null @@ -1,53 +0,0 @@ --- This module will transliterate Ossetian language text per WT:OS TR. --- Language code: oss - -local export = {} - -local mapping1 = { - ["ӕ"]="æ" ,['Ӕ']='Æ' , ["т"]="t" ,['Т']='T' , ["р"]="r" ,['Р']='R' , ["ф"]="f",['Ф']='F', ["э"]="è",['Э']='È', - ["ю"]="ju",['Ю']='Ju', ["ш"]="š" ,['Ш']='Š' , ["ь"]="ʹ" ,['Ь']='ʹ' , ["ъ"]="ʺ",['Ъ']='ʺ', ["н"]="n",['Н']='N', - ["п"]="p" ,['П']='P' , ["й"]="j" ,['Й']='J' , ["л"]="l" ,['Л']='L' , ["з"]="z",['З']='Z', ["е"]="e",['Е']='E', - ["г"]="g" ,['Г']='G' , ["б"]="b" ,['Б']='B' , ["у"]="u" ,['У']='U' , ["с"]="s",['С']='S', ["х"]="x",['Х']='X', - ["ч"]="ḱ" ,['Ч']='Ḱ' , ["щ"]="šč",['Щ']='ŠČ', ["я"]="ja",['Я']='Ja', ["ы"]="y",['Ы']='Y', ["э"]="è",['Э']='È', - ["м"]="m" ,['М']='M' , ["о"]="o" ,['О']='O' , ["и"]="i" ,['И']='I' , ["ё"]="ë",['Ё']='Ë', ["ж"]="ž",['Ж']='Ž', - ["к"]="k" ,['К']='K' , ["д"]="d" ,['Д']='D' , ["в"]="v" ,['В']='V' , ["ц"]="c",['Ц']='C', ["а"]="a",['А']='A' -} - -local mapping2 = { - ['къ'] = 'k’', ['Къ'] = 'K’', ['пъ'] = 'p’', ['Пъ'] = 'P’', - ['тъ'] = 't’', ['Tъ'] = 'T’', ['цъ'] = 'c’', ['Цъ'] = 'C’', - ['чъ'] = 'ḱ’', ['Чъ'] = 'Ḱ’', ['хъ'] = 'q' , ['Хъ'] = 'Q', - ['гъ'] = 'ǧ' , ['Гъ'] = 'Ǧ' , ['дж'] = 'ǵ' , ['Дж'] = 'Ǵ', - ['дз'] = 'ʒ' , ['Дз'] = 'Ʒ' , ['ау'] = 'aw', ['Ау'] = 'Aw', - ['ӕу'] = 'æw', ['Ӕу'] = 'Æw', ['иу'] = 'iw', ['Иу'] = 'Iw', - ['ыу'] = 'yw', ['Ыу'] = 'Yw', ['еу'] = 'ew', ['Еу'] = 'Ew', - ['уа'] = 'wa', ['Уа'] = 'Wa', ['уӕ'] = 'wæ', ['Уӕ'] = 'Wæ', - ['уи'] = 'wi', ['Уи'] = 'Wi', ['уы'] = 'wy', ['Уы'] = 'Wy', - ['уе'] = 'we', ['Уе'] = 'We', -} - -local mapping3 = { - ['гуы'] = 'g°y', ['Гуы'] = 'G°y', - ['куы'] = 'k°y', ['Kуы'] = 'K°y', - ['хуы'] = 'x°y', ['Хуы'] = 'X°y', -} - -function export.tr(text, lang, sc) - -- If the script is given as Geor, then forward the transliteration to that module - if sc == "Geor" then - return require("Module:Geor-translit").tr(text, lang, sc) - end - text = mw.ustring.gsub(text, 'къуы', 'k’°y') - text = mw.ustring.gsub(text, 'Kъуы', 'K’°y') - for pat, repl in pairs(mapping3) do - text = mw.ustring.gsub(text, pat, repl) - end - for pat, repl in pairs(mapping2) do - text = mw.ustring.gsub(text, pat, repl) - end - text = mw.ustring.gsub(text, '.', mapping1) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/peo-translit.lua b/wikt/translit/peo-translit.lua deleted file mode 100644 index d29ba0b..0000000 --- a/wikt/translit/peo-translit.lua +++ /dev/null @@ -1,134 +0,0 @@ --- This module will transliterate Old Persian language text. - -local export = {} - -local tt = { - ["𐎠"] = "a", - ["𐎡"] = "i", - ["𐎢"] = "u", - ["𐎣"] = "k", - ["𐎤"] = "ku", - ["𐎥"] = "g", - ["𐎦"] = "gu", - ["𐎧"] = "x", - ["𐎨"] = "c", - ["𐎩"] = "j", - ["𐎪"] = "ji", - ["𐎫"] = "t", - ["𐎬"] = "tu", - ["𐎭"] = "d", - ["𐎮"] = "di", - ["𐎯"] = "du", - ["𐎰"] = "θ", - ["𐎱"] = "p", - ["𐎲"] = "b", - ["𐎳"] = "f", - ["𐎴"] = "n", - ["𐎵"] = "nu", - ["𐎶"] = "m", - ["𐎷"] = "mi", - ["𐎸"] = "mu", - ["𐎹"] = "y", - ["𐎺"] = "v", - ["𐎻"] = "vi", - ["𐎼"] = "r", - ["𐎽"] = "ru", - ["𐎾"] = "l", - ["𐎿"] = "s", - ["𐏀"] = "z", - ["𐏁"] = "š", - ["𐏂"] = "ç", - ["𐏃"] = "h", - ["𐏈"] = "AM", -- Auramazdā - ["𐏉"] = "AM", -- Auramazdā - ["𐏊"] = "AMha", -- Auramazdāha - ["𐏋"] = "XŠ", -- xšāyathiya - ["𐏌"] = "DH", -- dahyāuš - ["𐏍"] = "DH", -- dahyāuš - ["𐏎"] = "BG", -- baga - ["𐏏"] = "BU", -- būmiš - ["𐏐"] = " ", --word devider - ["-"] = "-", -} - -local numbers = { - ["𐏑"] = 1, - ["𐏒"] = 2, - ["𐏓"] = 10, - ["𐏔"] = 20, - ["𐏕"] = 100, -} - -function export.convert_numbers(numeric_str) - local total = 0 - for c in mw.ustring.gmatch(numeric_str, ".") do - total = total + numbers[c] - end - return total -end - -function export.tr(text, lang, sc) - -- If the script is not Xpeo, do not transliterate - if sc ~= "Xpeo" then - return - end - - local t = {} - local preceding_num = false - -- Transliterate characters - mw.ustring.gsub(text, - ".", - function(c) - if mw.ustring.match(c, "[𐏑-𐏕]") then - if preceding_num then - t[#t] = t[#t] + numbers[c] - else - t[#t + 1] = numbers[c] - end - preceding_num = true - else - preceding_num = false - t[#t + 1] = tt[c] - end - end) - - text = mw.ustring.gsub(table.concat(t, "-"), "%-?(%s)%-?", "%1") - text = mw.ustring.gsub(mw.ustring.gsub(text, "^%-", ""), "%-$", "") - - return text -end - -return export - - --- Text Expected Actual --- Words --- Passed 𐎫𐎼𐎿𐎫𐎡𐎹 t-r-s-t-i-y t-r-s-t-i-y --- Passed 𐎭𐎭𐎠𐎬𐎢𐎺 d-d-a-tu-u-v d-d-a-tu-u-v --- Passed 𐎱𐎠𐎮𐎹 p-a-di-y p-a-di-y --- Passed 𐎪 ji ji --- Passed 𐎲𐎼𐏀𐎶𐎴𐎡𐎹 b-r-z-m-n-i-y b-r-z-m-n-i-y --- Passed 𐎩𐎮𐎡𐎹𐎠𐎷𐎡𐎹 j-di-i-y-a-mi-i-y j-di-i-y-a-mi-i-y --- Passed 𐎠𐎼𐎫𐎠𐎨𐎠 a-r-t-a-c-a a-r-t-a-c-a --- Numbers --- Passed 𐏕 100 100 --- return require "Module:transliteration module testcases"( --- require "Module:peo-translit".tr, --- { --- 'Words', --- { "𐎫𐎼𐎿𐎫𐎡𐎹", "t-r-s-t-i-y" }, --- { "𐎭𐎭𐎠𐎬𐎢𐎺", "d-d-a-tu-u-v" }, --- { "𐎱𐎠𐎮𐎹", "p-a-di-y" }, --- { "𐎪", "ji" }, --- { "𐎲𐎼𐏀𐎶𐎴𐎡𐎹", "b-r-z-m-n-i-y" }, --- { "𐎩𐎮𐎡𐎹𐎠𐎷𐎡𐎹", "j-di-i-y-a-mi-i-y" }, --- { "𐎠𐎼𐎫𐎠𐎨𐎠", "a-r-t-a-c-a" }, - --- 'Numbers', --- { "𐏕", "100" }, --- --[[ --- Copy this to add more examples: --- { "", "" }, --- --]] --- }, --- "Xpeo", "peo") \ No newline at end of file diff --git a/wikt/translit/phli-translit.lua b/wikt/translit/phli-translit.lua deleted file mode 100644 index a9afa80..0000000 --- a/wikt/translit/phli-translit.lua +++ /dev/null @@ -1,86 +0,0 @@ --- This module will transliterate text in the Inscriptional Pahlavi script. It is used to transliterate Middle Persian (pal). --- Language code: pal - -local export = {} - -local tt = { - ["𐭠"] = "ʾ", -- aleph - ["𐭡"] = "b", -- beth - ["𐭢"] = "g", -- gimil - ["𐭣"] = "d", -- daleth - ["𐭤"] = "h", -- he - ["𐭥"] = "ʿ", -- waw-ayin-resh - ["𐭦"] = "z", -- zayin - ["𐭧"] = "ḥ", -- heth - ["𐭨"] = "ṭ", -- teth - ["𐭩"] = "y", -- yodh - ["𐭪"] = "k", -- kaph - ["𐭫"] = "l", -- lamedh - ["𐭬"] = "m", -- mem-qoph - ["𐭭"] = "n", -- nun - ["𐭮"] = "s", -- samekh - ["𐭯"] = "p", -- pe - ["𐭰"] = "c", -- sadhe - ["𐭱"] = "š", -- shin - ["𐭲"] = "t", -- taw -} - -local numbers = { - ["𐭸"] = 1, - ["𐭹"] = 2, - ["𐭺"] = 3, - ["𐭻"] = 4, - ["𐭼"] = 10, - ["𐭽"] = 20, - ["𐭾"] = 100, - ["𐭿"] = 1000, -} - -function export.convert_numbers(numeric_str) - local total = 0 - for c in mw.ustring.gmatch(numeric_str, ".") do - total = total + numbers[c] - end - return total -end - -function export.tr(text, lang, sc) - -- If the script is not Phli, do not transliterate - if sc ~= "Phli" then - return - end - - if mw.ustring.match(text, "[𐭠-𐭿]") then - text = mw.ustring.gsub(text, "[𐭸-𐭿]+", export.convert_numbers) - end - - -- Transliterate characters - text = mw.ustring.gsub(text, ".", tt) - - return text -end - -return export - --- All tests passed. (refresh) - --- test: --- Text Expected Actual --- Words --- Passed 𐭮𐭯𐭠𐭧 spʾḥ spʾḥ --- Numbers --- Passed 𐭾 100 100 --- return require "Module:transliteration module testcases"( --- require "Module:Phli-translit".tr, --- { --- 'Words', --- { "𐭮𐭯𐭠𐭧", "spʾḥ" }, - --- 'Numbers', --- { "𐭾", "100" }, --- --[[ --- Copy this to add more examples: --- { "", "" }, --- --]] --- }, --- "Phli", "pal") diff --git a/wikt/translit/prti-translit.lua b/wikt/translit/prti-translit.lua deleted file mode 100644 index dff6add..0000000 --- a/wikt/translit/prti-translit.lua +++ /dev/null @@ -1,100 +0,0 @@ --- This module will transliterate text in the Inscriptional Parthian script. It is used to transliterate Parthian (xpr). --- Language code: xpr - -local export = {} - -local tt = { - ['𐭀'] = 'ʾ', - ['𐭁'] = 'b', - ['𐭂'] = 'g', - ['𐭃'] = 'd', - ['𐭄'] = 'h', - ['𐭅'] = 'w', - ['𐭆'] = 'z', - ['𐭇'] = 'ḥ', - ['𐭈'] = 'ṭ', - ['𐭉'] = 'y', - ['𐭊'] = 'k', - ['𐭋'] = 'l', - ['𐭌'] = 'm', - ['𐭍'] = 'n', - ['𐭎'] = 's', - ['𐭏'] = 'ʿ', - ['𐭐'] = 'p', - ['𐭑'] = 'c', - ['𐭒'] = 'q', - ['𐭓'] = 'r', - ['𐭔'] = 'š', - ['𐭕'] = 't', -} - -local numbers = { - ['𐭘'] = 1, - ['𐭙'] = 2, - ['𐭚'] = 3, - ['𐭛'] = 4, - ['𐭜'] = 10, - ['𐭝'] = 20, - ['𐭞'] = 100, - ['𐭟'] = 1000, -} - - -function export.convert_numbers(numeric_str) - local total = 0 - for c in mw.ustring.gmatch(numeric_str, ".") do - total = total + numbers[c] - end - return total -end - - -function export.tr(text, lang, sc) - -- If the script is not Prti, do not transliterate - if sc ~= "Prti" then - return - end - - if mw.ustring.match(text, '[𐭘-𐭟]') then - text = mw.ustring.gsub(text, '[𐭘-𐭟]+', export.convert_numbers) - end - - -- Transliterate characters - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export - --- All tests passed. (refresh) - --- test: --- Text Expected Actual --- Script --- Passed 𐭌𐭆𐭃𐭉𐭆𐭍 mzdyzn mzdyzn --- Passed 𐭔𐭇𐭉𐭐𐭅𐭇𐭓 šḥypwḥr šḥypwḥr --- Passed 𐭀𐭃𐭉𐭅𐭓𐭐𐭉 ʾdywrpy ʾdywrpy --- Passed 𐭂𐭓𐭌𐭀𐭍𐭉𐭀 𐭇𐭔𐭕𐭓 grmʾnyʾ ḥštr grmʾnyʾ ḥštr --- Passed 𐭍𐭓𐭉𐭎𐭇𐭅 nrysḥw nrysḥw --- Numerals --- Passed 𐭝𐭝𐭝𐭝𐭛 84 84 --- --[=[ --- Unit tests for [[Module:Prti-translit]]. --- ]=] - --- return require("Module:transliteration module testcases")( --- require('Module:Prti-translit').tr, --- { --- 'Script', --- { '𐭌𐭆𐭃𐭉𐭆𐭍', 'mzdyzn'}, --- { '𐭔𐭇𐭉𐭐𐭅𐭇𐭓', 'šḥypwḥr' }, --- { '𐭀𐭃𐭉𐭅𐭓𐭐𐭉', 'ʾdywrpy' }, --- { '𐭂𐭓𐭌𐭀𐭍𐭉𐭀 𐭇𐭔𐭕𐭓', 'grmʾnyʾ ḥštr' }, --- { '𐭍𐭓𐭉𐭎𐭇𐭅', 'nrysḥw' }, - --- 'Numerals', --- { '𐭝𐭝𐭝𐭝𐭛', '84' }, --- }, --- 'Prti', 'xpr' --- ) \ No newline at end of file diff --git a/wikt/translit/qwm-translit.lua b/wikt/translit/qwm-translit.lua deleted file mode 100644 index 717fd36..0000000 --- a/wikt/translit/qwm-translit.lua +++ /dev/null @@ -1,13 +0,0 @@ -local export = {} - -function export.tr(text, lang, sc) - if sc == "Latn" or sc == "Arab" then - return nil - elseif sc == "Armn" then - return require("Module:Armn-translit").tr(text, lang, sc) - else - error("Huệ nương!") - end -end - -return export \ No newline at end of file diff --git a/wikt/translit/ru-translit.lua b/wikt/translit/ru-translit.lua deleted file mode 100644 index 200168b..0000000 --- a/wikt/translit/ru-translit.lua +++ /dev/null @@ -1,309 +0,0 @@ -local export = {} - ---[=[ - -FIXME: - -1. (DONE) If you write '''Б'''ез, it transliterates to '''B'''jez instead of - '''B'''ez. -2. (DONE) Convert ъ to nothing before comma or other non-letter particle, e.g. - in Однимъ словомъ, идешь на чтеніе. -3. (DONE) Make special-casing for adjectives in -го and for что (and friends) - be the default, and implement transformations in Cyrillic rather than after - translit so that we can display the transformed Cyrillic in the - "phonetic respelling" notation of {{ru-IPA}}. -]=] - -local u = mw.ustring.char -local rfind = mw.ustring.find -local rsub = mw.ustring.gsub -- WARNING: Don't return this directly in a function, or surround in parens -local rmatch = mw.ustring.match -local rsplit = mw.text.split -local ulower = mw.ustring.lower -local usub = mw.ustring.sub - -local GR = u(0x0300) -- grave = ̀ -local TEMP_G = u(0xFFF1) -- substitute to preserve g from changing to v - -local function ine(x) -- if not empty - if x == "" then return nil else return x end -end - --- In this table, we now map Cyrillic е and э to je and e, and handle the --- post-consonant version (plain e and ɛ) specially. -local tab = { - ["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="Je", ["Ё"]="Jó", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["Й"]="J", - ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["О"]="O", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="U", ["Ф"]="F", - ["Х"]="X", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="ʺ", ["Ы"]="Y", ["Ь"]="ʹ", ["Э"]="E", ["Ю"]="Ju", ["Я"]="Ja", - ['а']='a', ['б']='b', ['в']='v', ['г']='g', ['д']='d', ['е']='je', ['ё']='jó', ['ж']='ž', ['з']='z', ['и']='i', ['й']='j', - ['к']='k', ['л']='l', ['м']='m', ['н']='n', ['о']='o', ['п']='p', ['р']='r', ['с']='s', ['т']='t', ['у']='u', ['ф']='f', - ['х']='x', ['ц']='c', ['ч']='č', ['ш']='š', ['щ']='šč', ['ъ']='ʺ', ['ы']='y', ['ь']='ʹ', ['э']='e', ['ю']='ju', ['я']='ja', - -- Russian style quotes - ['«']='“', ['»']='”', - -- archaic, pre-1918 letters - ['І']='I', ['і']='i', ['Ѳ']='F', ['ѳ']='f', - ['Ѣ']='Jě', ['ѣ']='jě', ['Ѵ']='I', ['ѵ']='i', -} - --- following based on ru-common for use with is_monosyllabic() --- any Cyrillic or Latin vowel, including ёЁ and composed Cyrillic vowels with grave accent; --- not including accented Latin vowels except ě (FIXME, might want to change this) -local vowels = "аеиоуяэыюіѣѵүАЕИОУЯЭЫЮІѢѴҮѐЀѝЍёЁAEIOUYĚƐaeiouyěɛ" - --- FIXME! Doesn't work with ɣ, which gets included in this character set -local non_consonants = "[" .. vowels .. "ЪЬъьʹʺ%A]" -local consonants = "[^" .. vowels .. "ЪЬъьʹʺ%A]" - -local map_to_plain_e_map = {["Е"] = "E", ["е"] = "e", ["Ѣ"] = "Ě", ["ѣ"] = "ě", ["Э"] = "Ɛ", ["э"] = "ɛ"} -local function map_to_plain_e(pre, e) - return pre .. map_to_plain_e_map[e] -end - -local map_to_je_map = {["Е"] = "Je", ["е"] = "je", ["Ѣ"] = "Jě", ["ѣ"] = "jě", ["Э"] = "E", ["э"] = "e"} -local function map_to_je(pre, e) - if e == nil then - e = pre - pre = "" - end - return pre .. map_to_je_map[e] -end - --- decompose composed grave chars; they will map to uncomposed Latin letters for --- consistency with other char+grave combinations, and we do this early to --- avoid problems converting to e or je -local decompose_grave_map = {['ѐ'] = 'е' .. GR, ['Ѐ'] = 'Е' .. GR, ['ѝ'] = 'и' .. GR, ['Ѝ'] = 'И' .. GR} - --- True if Cyrillic or decomposed Latin word has no more than one vowel; --- includes non-syllabic stems such as льд-; copied from ru-common and modified --- to avoid having to import that module (which would slow things down --- significantly) -local function is_monosyllabic(word) - return not rfind(word, "[" .. vowels .. "].*[" .. vowels .. "]") -end - --- Apply transformations to the Cyrillic to more closely match pronunciation. --- Return two arguments: the "original" text (after decomposing composed --- grave characters), and the transformed text. If the two are different, --- {{ru-IPA}} should display a "phonetic respelling" notation. --- NOADJ disables special-casing for adjectives in -го, while FORCEADJ forces --- special-casing for adjectives, including those in -аго (pre-reform spelling) --- and disables checking for exceptions (e.g. много, ого). NOSHTO disables --- special-casing for что and related words. -function export.apply_tr_fixes(text, noadj, noshto, forceadj) - -- decompose composed grave characters before we convert Cyrillic е to - -- Latin e or je - text = rsub(text, "[ѐЀѝЍ]", decompose_grave_map) - - local origtext = text - -- the second half of the if-statement below is an optimization; see above. - if not noadj and text:find("го") then - if not forceadj then - -- handle много - text = rsub(text, "%f[%a\204\129\204\128]([Мм]но[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle немного, намного - text = rsub(text, "%f[%a\204\129\204\128]([Нн][еа]мно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle до́рого [short form of дорогой, adverb] - text = rsub(text, "%f[%a\204\129\204\128]([Дд]о[\204\129\204\128]?ро)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle недо́рого [short form of недорогой, adverb] - text = rsub(text, "%f[%a\204\129\204\128]([Нн]едо[\204\129\204\128]?ро)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle стро́го - text = rsub(text, "%f[%a\204\129\204\128]([Сс]тро[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle нестро́го - text = rsub(text, "%f[%a\204\129\204\128]([Нн]естро[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle убо́го - text = rsub(text, "%f[%a\204\129\204\128]([Уу]бо[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle поло́го - text = rsub(text, "%f[%a\204\129\204\128]([Пп]оло[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle длинноно́го - text = rsub(text, "%f[%a\204\129\204\128]([Дд]линноно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle коротконо́го - text = rsub(text, "%f[%a\204\129\204\128]([Кк]оротконо[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle кривоно́го - text = rsub(text, "%f[%a\204\129\204\128]([Кк]ривоно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle пе́го [short form of пе́гий "piebald"] - text = rsub(text, "%f[%a\204\129\204\128]([Пп]е[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle лого, сого, ого - text = rsub(text, "%f[%a\204\129\204\128]([лсЛС]?[Оо][\204\129\204\128]?)г(о[\204\129\204\128]?)%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "%2") - -- handle Того, То́го (but not того or Того́, which have /v/) - text = rsub(text, "%f[%a\204\129\204\128](То́?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle лего - text = rsub(text, "%f[%a\204\129\204\128]([Лл]е[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle игого, огого; note, we substitute TEMP_G for both г's - -- because otherwise the ого- at the beginning gets converted to ово - text = rsub(text, "%f[%a\204\129\204\128]([ИиОо])гог(о[\204\129\204\128]?)%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о" .. TEMP_G .. "%2") - -- handle Диего - text = rsub(text, "%f[%a\204\129\204\128](Дие́?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - -- handle бо́лого - text = rsub(text, "%f[%a\204\129\204\128]([Бб]о[\204\129\204\128]?ло)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") - end - --handle genitive/accusative endings, which are spelled -ого/-его/-аго - -- (-ogo/-ego/-ago) but transliterated -ovo/-evo/-avo; only for adjectives - -- and pronouns, excluding words like много, ого (-аго occurs in - -- pre-reform spelling); \204\129 is an acute accent, \204\128 is a grave accent - local pattern = "([оеОЕ" .. (forceadj and "аА" or "") .. "][\204\129\204\128]?)([гГ])([оО][\204\129\204\128]?)" - local reflexive = "([сС][яЯ][\204\129\204\128]?)" - local v = {["г"] = "в", ["Г"] = "В"} - local repl = function(e, g, o, sja) return e .. v[g] .. o .. (sja or "") end - text = rsub(text, pattern .. "%f[^%a\204\129\204\128]", repl) - text = rsub(text, pattern .. reflexive .. "%f[^%a\204\129\204\128]", repl) - -- handle сегодня - text = rsub(text, "%f[%a\204\129\204\128]([Сс]е)г(о[\204\129\204\128]?дня)%f[^%a\204\129\204\128]", "%1в%2") - -- handle сегодняшн- - text = rsub(text, "%f[%a\204\129\204\128]([Сс]е)г(о[\204\129\204\128]?дняшн)", "%1в%2") - -- replace TEMP_G with g; must be done after the -go -> -vo changes - text = rsub(text, TEMP_G, "г") - end - - -- the second half of the if-statement below is an optimization; see above. - if not noshto and text:find("то") then - local ch2sh = {["ч"] = "ш", ["Ч"] = "Ш"} - -- Handle что - text = rsub(text, "%f[%a\204\129\204\128]([Чч])(то[\204\129\204\128]?)%f[^%a\204\129\204\128]", - function(ch, to) return ch2sh[ch] .. to end) - -- Handle чтобы, чтоб - text = rsub(text, "%f[%a\204\129\204\128]([Чч])(то[\204\129\204\128]?бы?)%f[^%a\204\129\204\128]", - function(ch, to) return ch2sh[ch] .. to end) - -- Handle ничто - text = rsub(text, "%f[%a\204\129\204\128]([Нн]и)ч(то[\204\129\204\128]?)%f[^%a\204\129\204\128]", "%1ш%2") - end - - text = rsub(text, "([МмЛл][яеё][\204\129\204\128]?)г([кч])", "%1х%2") - - return origtext, text -end - --- Transliterate after the pronunciation-related transformations of --- export.apply_tr_fixes() have been applied. Called from {{ru-IPA}}. --- INCLUDE_MONOSYLLABIC_JO_ACCENT is as in export.tr(). -function export.tr_after_fixes(text, include_monosyllabic_jo_accent) - -- Remove word-final hard sign, either utterance-finally or followed by - -- a non-letter character such as space, comma, period, hyphen, etc. - text = rsub(text, "[Ъъ]$", "") - text = rsub(text, "[Ъъ]([%A])", "%1") - - -- the if-statement below isn't necessary but may speed things up, - -- particularly when include_monosyllabic_jo_accent isn't set, in that - -- in the majority of cases where ё doesn't occur, we avoid a pattern find - -- (in is_monosyllabic()) and three pattern subs. The translit module needs - -- to be as fast as possible since it may be called hundreds or - -- thousands of times on some pages. - if rfind(text, "[Ёё]") then - -- We need to special-case ё after a "hushing" consonant, which becomes - -- ó (or o), without j. We also need special cases for monosyllabic ё - -- when INCLUDE_MONOSYLLABIC_JO_ACCENT isn't set, so we don't add the - -- accent mark that we would otherwise include. - if not include_monosyllabic_jo_accent and is_monosyllabic(text) then - text = rsub(text, "([жшчщЖШЧЩ])ё","%1o") - text = text:gsub("ё", "jo") - text = text:gsub("Ё", "Jo") - else - text = rsub(text, "([жшчщЖШЧЩ])ё","%1ó") - -- conversion of remaining ё will occur as a result of 'tab'. - end - end - - -- ю after ж and ш becomes u (e.g. брошюра, жюри) - text = rsub(text, "([жшЖШ])ю","%1u") - - -- the if-statement below isn't necessary but may speed things up in that - -- in the majority of cases where the letters below don't occur, we avoid - -- six pattern subs. - if rfind(text, "[ЕеѢѣЭэ]") then - -- е after a dash at the beginning of a word becomes e, and э becomes ɛ - -- (like after a consonant) - text = rsub(text, "^(%-)([ЕеѢѣЭэ])", map_to_plain_e) - text = rsub(text, "(%s%-)([ЕеѢѣЭэ])", map_to_plain_e) - -- don't get confused by single quote or parens between consonant and е; - -- e.g. Б'''ез''', американ(ец) - text = rsub(text, "(" .. consonants .. "['%(%)]*)([ЕеѢѣЭэ])", map_to_plain_e) - - -- This is now the default - -- е after a vowel or at the beginning of a word becomes je, and э becomes e - -- text = rsub(text, "^([ЕеѢѣЭэ])", map_to_je) - -- text = rsub(text, "(" .. non_consonants .. ")([ЕеѢѣЭэ])", map_to_je) - -- -- need to do it twice in case of sequences of such vowels - -- text = rsub(text, "^([ЕеѢѣЭэ])", map_to_je) - -- text = rsub(text, "(" .. non_consonants .. ")([ЕеѢѣЭэ])", map_to_je) - end - - text = (rsub(text,'.',tab)) - return text -end - --- Transliterates text, which should be a single word or phrase. It should --- include stress marks, which are then preserved in the transliteration. --- ё is a special case: it is rendered (j)ó in multisyllabic words and --- monosyllabic words in multi-word phrases, but rendered (j)o without an --- accent in isolated monosyllabic words, unless INCLUDE_MONOSYLLABIC_JO_ACCENT --- is specified. (This is used in conjugation and declension tables.) --- NOADJ disables special-casing for adjectives in -го, while FORCEADJ forces --- special-casing for adjectives and disables checking for exceptions --- (e.g. много). NOSHTO disables special-casing for что and related words. -function export.tr(text, lang, sc, include_monosyllabic_jo_accent, noadj, noshto, forceadj) - local origtext, subbed_text = export.apply_tr_fixes(text, noadj, noshto, forceadj) - return export.tr_after_fixes(subbed_text, include_monosyllabic_jo_accent) -end - --- translit with various special-case substitutions; NOADJ disables --- special-casing for adjectives in -го, while FORCEADJ forces special-casing --- for adjectives and disables checking for expections (e.g. много). --- NOSHTO disables special-casing for что and related words. SUB is used --- to implement arbitrary substitutions in the Cyrillic text before other --- transformations are applied and before translit. It is of the form --- FROM/TO,FROM/TO,... -function export.tr_sub(text, include_monosyllabic_jo_accent, noadj, noshto, sub, - forceadj) - if type(text) == 'table' then -- called directly from a template - include_monosyllabic_jo_accent = ine(text.args.include_monosyllabic_jo_accent) - noadj = ine(text.args.noadj) - noshto = ine(text.args.noshto) - sub = ine(text.args.sub) - text = text.args[1] - end - - if sub then - local subs = rsplit(sub, ",") - for _, subpair in ipairs(subs) do - local subsplit = rsplit(subpair, "/") - text = rsub(text, subsplit[1], subsplit[2]) - end - end - - return export.tr(text, nil, nil, include_monosyllabic_jo_accent, noadj, noshto, forceadj) -end - ---for adjectives, pronouns -function export.tr_adj(text, include_monosyllabic_jo_accent) - if type(text) == 'table' then -- called directly from a template - include_monosyllabic_jo_accent = ine(text.args.include_monosyllabic_jo_accent) - text = text.args[1] - end - - -- we have to include "forceadj" because typically when tr_adj() is called - -- from the noun or adjective modules, it's called with suffix ого, which - -- would otherwise trigger the exceptional case and be transliterated as ogo - return export.tr(text, nil, nil, include_monosyllabic_jo_accent, false, - "noshto", "forceadj") -end - -return export - --- For Vim, so we get 4-space tabs --- vim: set ts=4 sw=4 noet: - --- All tests passed. (refresh) - --- test: --- Text Expected Actual --- Passed без bez bez --- Passed То́го Tógo Tógo --- Passed того́ tovó tovó --- return require("Module:transliteration module testcases")( --- require("Module:ru-translit").tr, --- { --- { "'''б'''ез", "'''b'''ez" }, --- { "То́го", "Tógo" }, --- { "того́", "tovó" }, --- }, --- "Cyrl", "ru" --- ) \ No newline at end of file diff --git a/wikt/translit/rue-translit.lua b/wikt/translit/rue-translit.lua deleted file mode 100644 index a971771..0000000 --- a/wikt/translit/rue-translit.lua +++ /dev/null @@ -1,27 +0,0 @@ --- This module will transliterate Rusyn language text per WT:RUE TR. --- Language code: rue - -local export = {} - -local tt = { - ["А"]='A', ["а"]='a', ["Б"]='B', ["б"]='b', ["В"]='V', ["в"]='v', ["Г"]='H', ["г"]='h', - ["Ґ"]='G', ["ґ"]='g', ["Д"]='D', ["д"]='d', ["Е"]='E', ["е"]='e', ["Є"]='Je', ["є"]='je', - ["Ё"]='Jo', ["ё"]='jo', ["Ж"]='Ž', ["ж"]='ž', ["З"]='Z', ["з"]='z', ["И"]='Y', ["и"]='y', - ["І"]='I', ["і"]='i', ["Ї"]='Ji', ["ї"]='ji', ["Й"]='J', ["й"]='j', ["К"]='K', ["к"]='k', - ["Л"]='L', ["л"]='l', ["М"]='M', ["м"]='m', ["Н"]='N', ["н"]='n', ["О"]='O', ["о"]='o', - ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r', ["С"]='S', ["с"]='s', ["Т"]='T', ["т"]='t', - ["У"]='U', ["у"]='u', ["Ф"]='F', ["ф"]='f', ["Х"]='X', ["х"]='x', ["Ц"]='C', ["ц"]='c', - ["Ч"]='Č', ["ч"]='č', ["Ш"]='Š', ["ш"]='š', ["Щ"]='Šč', ["щ"]='šč', ["Ю"]='Ju', ["ю"]='ju', - ["Я"]='Ja', ["я"]='ja', ["Ы"]='Ŷ', ["ы"]='ŷ', ["Ь"]='ʹ', ["ь"]='ʹ', - -- neutral apostrophe, right single quotation mark, modifier letter apostrophe → modifier letter double prime - ["’"]='ʺ', ["ʼ"]= 'ʺ', - -- obsolete letters - ["Ъ"]='ʺ', ["ъ"]='ʺ', ["Ѣ"]='Ě', ["ѣ"]='ě', ["Э"]='È', ["э"]='è', -} - -function export.tr(text, lang, sc) - text = mw.ustring.gsub(text, "'+", { ["'"] = 'ʺ' }) - return (mw.ustring.gsub(text, '.', tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/sa-translit.lua b/wikt/translit/sa-translit.lua deleted file mode 100644 index 775ea8f..0000000 --- a/wikt/translit/sa-translit.lua +++ /dev/null @@ -1,62 +0,0 @@ - -local export = {} - -local consonants = { - ['क']='k', ['ख']='kh', ['ग']='g', ['घ']='gh', ['ङ']='ṅ', - ['च']='c', ['छ']='ch', ['ज']='j', ['झ']='jh', ['ञ']='ñ', - ['ट']='ṭ', ['ठ']='ṭh', ['ड']='ḍ', ['ढ']='ḍh', ['ण']='ṇ', - ['त']='t', ['थ']='th', ['द']='d', ['ध']='dh', ['न']='n', - ['प']='p', ['फ']='ph', ['ब']='b', ['भ']='bh', ['म']='m', - ['य']='y', ['र']='r', ['ल']='l', ['व']='v', ['ळ']='ḷ', - ['श']='ś', ['ष']='ṣ', ['स']='s', ['ह']='h', -} - -local diacritics = { - ['ा']='ā', ['ि']='i', ['ी']='ī', ['ु']='u', ['ू']='ū', ['ृ']='ṛ', ['ॄ']='ṝ', - ['ॢ']='ḷ', ['ॣ']='ḹ', ['े']='e', ['ै']='ai', ['ो']='o', ['ौ']='au', ['्']='', -} - -local tt = { - -- vowels - ['अ']='a', ['आ']='ā', ['इ']='i', ['ई']='ī', ['उ']='u', ['ऊ']='ū', ['ऋ']='ṛ', ['ॠ']='ṝ', - ['ऌ']='ḷ', ['ॡ']='ḹ', ['ए']='e', ['ऐ']='ai', ['ओ']='o', ['औ']='au', - -- chandrabindu - ['ँ']='m̐', --until a better method is found - -- anusvara - ['ं']='ṃ', --until a better method is found - -- visarga - ['ः']='ḥ', - -- avagraha - ['ऽ']='’', - --numerals - ['०']='0', ['१']='1', ['२']='2', ['३']='3', ['४']='4', ['५']='5', ['६']='6', ['७']='7', ['८']='8', ['९']='9', - --punctuation - ['॥']='.', --double danda - ['।']='.', --danda - --Vedic extensions - ['ᳵ']='x', ['ᳶ']='f', - --Om - ['ॐ']='oṃ', - --reconstructed - ['*'] = '', -} - -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - '([कखगघङचछजझञटठडढणतथदधनपफबभमयरलळवशषसह])'.. - '([ािीुूृॄॢॣेैोौ्]?)', - function(c, d) - if d == "" then - return consonants[c] .. 'a' - else - return consonants[c] .. diacritics[d] - end - end) - - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/sah-translit.lua b/wikt/translit/sah-translit.lua deleted file mode 100644 index 225fc77..0000000 --- a/wikt/translit/sah-translit.lua +++ /dev/null @@ -1,36 +0,0 @@ --- This module will transliterate Yakut language text per WT:SAH TR. --- Language code: sah - -local export = {} - -local tab = { - ["А"]='A', ["а"]='a', ["Б"]='B', ["б"]='b', ["Г"]='G', ["г"]='g', ["Ҕ"]='Ğ', ["ҕ"]='ğ', - ["Д"]='D', ["д"]='d', ["И"]='İ', ["и"]='i', ["Й"]='Y', ["й"]='y', ["К"]='K', ["к"]='k', - ["Л"]='L', ["л"]='l', ["М"]='M', ["м"]='m', ["Н"]='N', ["н"]='n', ["Ҥ"]='Ŋ', ["ҥ"]='ŋ', - ["О"]='O', ["о"]='o', ["Ө"]='Ö', ["ө"]='ö', ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r', - ["С"]='S', ["с"]='s', ["Һ"]='H', ["һ"]='h', ["Т"]='T', ["т"]='t', ["У"]='U', ["у"]='u', - ["Ү"]='Ü', ["ү"]='ü', ["Х"]='X', ["х"]='x', ["Ч"]='Ç', ["ч"]='ç', ["Ш"]='Ş', ["ш"]='ş', - ["Ы"]='I', ["ы"]='ı', ["Э"]='E', ["э"]='e', - -- non-native letters - ["В"]='V', ["в"]='v', ["Е"]='E', ["е"]='e', ["Ё"]='Yo', ["ё"]='yo', ["Ж"]='J', ["ж"]='j', - ["З"]='Z', ["з"]='z', ["Ф"]='F', ["ф"]='f', ["Ц"]='Ts', ["ц"]='ts', ["Щ"]='Şç', ["щ"]='şç', - ['Ъ']='ʺ', ['ъ']='ʺ', ["Ь"]="’", ["ь"]="’", ["Ю"]='Yu', ["ю"]='yu', ["Я"]='Ya', ["я"]='ya', -} - -function export.tr(text, lang, sc) - text = mw.ustring.gsub(text, 'Дь', 'C') - text = mw.ustring.gsub(text, 'дь', 'c') - text = mw.ustring.gsub(text, 'Нь', 'Ń') - text = mw.ustring.gsub(text, 'нь', 'ń') - - -- е after a vowel or at the beginning of a word becomes ye - text = mw.ustring.gsub(text, "([АОӨУҮЫЕЯЁЮИЕЪЬаоөуүыэяёюиеъь%A][́̀]?)е","%1je") - text = mw.ustring.gsub(text, "^Е","Ye") - text = mw.ustring.gsub(text, "^е","ye") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е","%1Ye") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е","%1ye") - - return (mw.ustring.gsub(text,'.',tab)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/sarb-translit.lua b/wikt/translit/sarb-translit.lua deleted file mode 100644 index c75356b..0000000 --- a/wikt/translit/sarb-translit.lua +++ /dev/null @@ -1,71 +0,0 @@ --- This module will transliterate text in the Old South Arabian script. - -local export = {} - -local correspondences = { - ["𐩠"] = "h", ["𐩡"] = "l", ["𐩢"] = "ḥ", ["𐩣"] = "m", ["𐩤"] = "q", - ["𐩥"]= "w", ["𐩦"] = "s²", ["𐩧"] = "r", ["𐩨"] = "b", ["𐩩"] = "t", - ["𐩪"] = "s¹", ["𐩫"] = "k", ["𐩬"] = "n", ["𐩭"] = "ḫ", ["𐩮"] = "ṣ", - ["𐩯"] = "s³", ["𐩰"] = "f", ["𐩱"] = "ʾ", ["𐩲"] = "ʿ", ["𐩳"] = "ḍ", - ["𐩴"] = "g", ["𐩵"] = "d", ["𐩶"] = "ġ", ["𐩷"] = "ṭ", ["𐩸"] = "z", - ["𐩹"] = "ḏ", ["𐩺"] = "y", ["𐩻"] = "ṯ", ["𐩼"] = "ẓ", - - ["𐩽"] = " ", -} - -local numbers = { - ["𐩽"] = "1", - ["𐩭"] = "5", - ["𐩲"] = "10", - ["𐩾"] = "50", - ["𐩣"] = "100", - ["𐩱"] = "1000", -} - -function export.tr(text, lang, sc) - -- Interpret numbers. - -- Will not work for thousands! - text = text:gsub( - "𐩿([^𐩿])𐩿", - function (number) - local value = 0 - for digit in mw.ustring.gmatch(number, ".") do - value = value + numbers[digit] or error("The character " .. digit .. " in " .. number .. " does not have a numeric value.") - end - return value - end) - - text = mw.ustring.gsub(text, ".", correspondences) - - return text -end - -return export - --- 5 tests failed. (refresh) - --- test: --- Text Expected Actual --- Failed 𐩱𐩫𐩪𐩥𐩩 ʾkswt ʾks¹wt --- Passed 𐩠𐩧𐩥𐩺 hrwy hrwy --- Passed 𐩹𐩩𐩢𐩣𐩺𐩣 ḏtḥmym ḏtḥmym --- Failed 𐩣𐩡𐩫𐩩𐩪𐩨𐩱 mlktsbʾ mlkts¹bʾ --- Passed 𐩫𐩥𐩫𐩨 kwkb kwkb --- Passed 𐩧𐩢𐩨𐩩 rḥbt rḥbt --- Failed 𐩦𐩣𐩪𐩣 šmsm s²ms¹m --- Failed 𐩪𐩨𐩡𐩩 sblt s¹blt --- Failed 𐩬𐩴𐩦 ngš ngs² --- return require "Module:transliteration module testcases"( --- require "Module:Sarb-translit".tr, --- { --- { "𐩱𐩫𐩪𐩥𐩩", "ʾkswt" }, --- { "𐩠𐩧𐩥𐩺", "hrwy" }, --- { "𐩹𐩩𐩢𐩣𐩺𐩣", "ḏtḥmym" }, --- { "𐩣𐩡𐩫𐩩𐩪𐩨𐩱", "mlktsbʾ" }, --- { "𐩫𐩥𐩫𐩨", "kwkb" }, --- { "𐩧𐩢𐩨𐩩", "rḥbt" }, --- { "𐩦𐩣𐩪𐩣", "šmsm" }, --- { "𐩪𐩨𐩡𐩩", "sblt" }, --- { "𐩬𐩴𐩦", "ngš" }, --- }, --- "Sarb", "sem-srb") \ No newline at end of file diff --git a/wikt/translit/sat-translit.lua b/wikt/translit/sat-translit.lua deleted file mode 100644 index 08c415e..0000000 --- a/wikt/translit/sat-translit.lua +++ /dev/null @@ -1,34 +0,0 @@ --- This module will transliterate Santali language text. --- Language code: sat - -local export = {} - -local tt = { - ['ᱛ'] = 't', ['ᱜ'] = 'g', ['ᱝ'] = 'ṅ', ['ᱞ'] = 'l', - ['ᱠ'] = 'k', ['ᱡ'] = 'j', ['ᱢ'] = 'm', ['ᱣ'] = 'w', - ['ᱥ'] = 's', ['ᱦ'] = 'h', ['ᱧ'] = 'ñ', ['ᱨ'] = 'r', - ['ᱪ'] = 'c', ['ᱫ'] = 'd', ['ᱬ'] = 'ṇ', ['ᱭ'] = 'y', - ['ᱯ'] = 'p', ['ᱰ'] = 'ḍ', ['ᱱ'] = 'n', ['ᱲ'] = 'ṛ', - ['ᱴ'] = 'ṭ', ['ᱵ'] = 'b', ['ᱶ'] = 'v', ['ᱷ'] = 'ʰ', - - -- vowels - ['ᱚ'] = 'ô', ['ᱟ'] = 'a', ['ᱤ'] = 'i', ['ᱩ'] = 'u', ['ᱮ'] = 'e', ['ᱳ'] = 'o', - - -- special stuff - ['ᱸ']='ṃ', ['ᱺ'] = 'ḥ', --until a better method is found - - --numerals - ['᱐']='0', ['᱑']='1', ['᱒']='2', ['᱓']='3', ['᱔']='4', ['᱕']='5', ['᱖']='6', ['᱗']='7', ['᱘']='8', ['᱙']='9', - - --punctuation - ['᱾'] = '.', ['᱿'] = '.', -} - -function export.tr(text, lang, sc) - text = mw.ustring.gsub(text, '[᱐᱑᱒᱓᱔᱕᱖᱗᱘᱙ᱚᱛᱜᱝᱞᱟᱠᱡᱢᱣᱤᱥᱦᱧᱨᱩᱪᱫᱬᱭᱮᱯᱰᱱᱲᱳᱴᱵᱶᱷᱸᱹᱺᱻᱼᱽ᱾᱿].', tt) - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/script-utilities-data.lua b/wikt/translit/script-utilities-data.lua deleted file mode 100644 index be69104..0000000 --- a/wikt/translit/script-utilities-data.lua +++ /dev/null @@ -1,61 +0,0 @@ -local data = {} - -data.translit = { - ["term"] = { ---[=[ can't be done until Kana transliterations are correctly parsed by [[Module:links]] - ["tag"] = "i", - ]=] - ["classes"] = "mention-tr", - }, - ["usex"] = { - ["tag"] = "i", - ["classes"] = "e-transliteration", - }, - ["head"] = { - ["classes"] = "headword-tr", - ["dir"] = "ltr", - }, - ["default"] = {}, -} - -data.transcription = { - ["head"] = { - ["tag"] = "span", - ["classes"] = "headword-ts", - ["dir"] = "ltr", - }, - ["default"] = {}, -} - -for key, value in pairs(data.translit) do - if not value.tag then - value.tag = "span" - end -end - -data.faces = { - ["term"] = { - tag = "i", - class = "mention", - }, - ["head"] = { - tag = "strong", - class = "headword", - }, - ["hypothetical"] = { - prefix = '*', - tag = "i", - class = "hypothetical", - }, - ["bold"] = { - tag = "b", - }, - ["translation"] = { - tag = "span", - }, - ["nil"] = { - tag = "span", - }, -} - -return data \ No newline at end of file diff --git a/wikt/translit/script-utilities.lua b/wikt/translit/script-utilities.lua deleted file mode 100644 index 38a41ce..0000000 --- a/wikt/translit/script-utilities.lua +++ /dev/null @@ -1,369 +0,0 @@ -local export = {} - ---[=[ - Modules used: - [[Module:script utilities/data]] - [[Module:scripts]] - [[Module:languages]] - [[Module:parameters]] - [[Module:utilities]] - [[Module:debug]] -]=] - -function export.is_Latin_script(sc) - -- Latn, Latf, Latinx, nv-Latn, pjt-Latn - return sc:getCode():find("Lat") and true or false -end - --- Used by [[Template:lang]] -function export.lang_t(frame) - params = { - [1] = {}, - [2] = { allow_empty = true, default = "" }, - ["sc"] = {}, - ["face"] = {}, - ["class"] = {}, - } - - local args = require("Module:parameters").process(frame:getParent().args, params) - local NAMESPACE = mw.title.getCurrentTitle().nsText - - local lang = args[1] or (NAMESPACE == "Template" and "und") or error("Language code has not been specified. Please pass parameter 1 to the template.") - lang = require("Module:languages").getByCode(lang) or require("Module:languages").err(lang, 1) - - local text = args[2] - - local sc = args["sc"] - sc = (sc and (require("Module:scripts").getByCode(sc) or error("The script code \"" .. sc .. "\" is not valid.")) or nil) - - local face = args["face"] - - return export.tag_text(text, lang, sc, face, class) -end - --- Ustring turns on the codepoint-aware string matching. The basic string function --- should be used for simple sequences of characters, Ustring function for --- sets – []. -local function trackPattern(text, pattern, tracking, ustring) - local find = ustring and mw.ustring.find or string.find - if pattern and find(text, pattern) then - require("Module:debug").track("script/" .. tracking) - end -end - -local function track(text, lang, sc) - local U = mw.ustring.char - - if lang and text then - local langCode = lang:getCode() - - -- [[Special:WhatLinksHere/Template:tracking/script/ang/acute]] - if langCode == "ang" then - local decomposed = mw.ustring.toNFD(text) - local acute = U(0x301) - - trackPattern(decomposed, acute, "ang/acute") - - --[=[ - [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-phi]] - [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-theta]] - [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-kappa]] - [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-rho]] - ϑ, ϰ, ϱ, ϕ should generally be replaced with θ, κ, ρ, φ. - ]=] - elseif langCode == "el" or langCode == "grc" then - trackPattern(text, "ϑ", "Greek/wrong-theta") - trackPattern(text, "ϰ", "Greek/wrong-kappa") - trackPattern(text, "ϱ", "Greek/wrong-rho") - trackPattern(text, "ϕ", "Greek/wrong-phi") - - --[=[ - [[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-coronis]] - [[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-smooth-breathing]] - [[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/wrong-apostrophe]] - When spacing coronis and spacing smooth breathing are used as apostrophes, - they should be replaced with right single quotation marks (’). - ]=] - if langCode == "grc" then - trackPattern(text, U(0x1FBD), "Ancient Greek/spacing-coronis") - trackPattern(text, U(0x1FBF), "Ancient Greek/spacing-smooth-breathing") - trackPattern(text, "[" .. U(0x1FBD) .. U(0x1FBF) .. "]", "Ancient Greek/wrong-apostrophe", true) - end - - -- [[Special:WhatLinksHere/Template:tracking/script/Russian/grave-accent]] - elseif langCode == "ru" then - local decomposed = mw.ustring.toNFD(text) - - trackPattern(decomposed, U(0x300), "Russian/grave-accent") - - -- [[Special:WhatLinksHere/Template:tracking/script/Tibetan/trailing-punctuation]] - elseif langCode == "bo" then - trackPattern(text, "[་།]$", "Tibetan/trailing-punctuation", true) - trackPattern(text, "[་།]%]%]$", "Tibetan/trailing-punctuation", true) - - --[=[ - [[Special:WhatLinksHere/Template:tracking/script/Thai/broken-ae]] - [[Special:WhatLinksHere/Template:tracking/script/Thai/broken-am]] - [[Special:WhatLinksHere/Template:tracking/script/Thai/wrong-rue-lue]] - ]=] - elseif langCode == "th" then - trackPattern(text, "เ".."เ", "Thai/broken-ae") - trackPattern(text, "ํ[่้๊๋]?า", "Thai/broken-am", true) - trackPattern(text, "[ฤฦ]า", "Thai/wrong-rue-lue", true) - - --[=[ - [[Special:WhatLinksHere/Template:tracking/script/Lao/broken-ae]] - [[Special:WhatLinksHere/Template:tracking/script/Lao/broken-am]] - ]=] - elseif langCode == "lo" then - trackPattern(text, "ເ".."ເ", "Lao/broken-ae") - trackPattern(text, "ໍ[່້໊໋]?າ", "Lao/broken-am", true) - end - end -end - --- Wrap text in the appropriate HTML tags with language and script class. -function export.tag_text(text, lang, sc, face, class, id) - if not sc then - sc = require("Module:scripts").findBestScript(text, lang) - end - - track(text, lang, sc) - - -- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom. - if sc and sc:getDirection() == "down" then - --[[ First, escape targets of wikilinks and HTML tags, - which should not have their spaces - replaced with line breaks. ]] - local escaped = {} - local i = 0 - local escape_format = "$%d" - - local function escape(text, pattern, prefix) - return text:gsub( - pattern, - function(item) - i = i + 1 - escaped[i] = item - return (prefix or "") .. escape_format:format(i) - end) - end - - text = escape(text, "%[%[([^|]+|)", "[[") - text = escape(text, "<[^>]+>") - - text = text:gsub(" +", "
") - - -- Unescape whatever was escaped. - text = text:gsub( - "$(%d)", - function(a) - a = tonumber(a) - return escaped[a] - end - ) - end - - if sc:getCode() == "Imag" then - face = nil - end - - local function class_attr(classes) - table.insert(classes, 1, sc:getCode()) - if class and class ~= '' then - table.insert(classes, class) - end - return 'class="' .. table.concat(classes, ' ') .. '"' - end - - local function tag_attr(...) - local output = {} - if id then - table.insert(output, 'id="' .. require("Module:utilities").make_id(lang, id) .. '"') - end - - table.insert(output, class_attr({...}) ) - - if lang then - table.insert(output, 'lang="' .. lang:getCode() .. '"') - end - - return table.concat(output, " ") - end - - if face == "hypothetical" then - -- [[Special:WhatLinksHere/Template:tracking/script-utilities/face/hypothetical]] - require("Module:debug").track("script-utilities/face/hypothetical") - end - - local data = mw.loadData("Module:script utilities/data").faces[face or "nil"] - - if sc:getDirection() == "rtl" then - text = mw.ustring.gsub(text, "%p$", "%0‎") - end - - local post = "" - if face == "translation" and sc:getDirection() == "rtl" then - post = "‎" - end - - -- Add a script wrapper - if data then - return ( data.prefix or "" ) .. '<' .. data.tag .. ' ' .. tag_attr(data.class) .. '>' .. text .. '' .. post - else - error('Invalid script face "' .. face .. '".') - end -end - -function export.tag_translit(translit, lang, kind, attributes) - if type(lang) == "table" then - lang = lang.getCode and lang:getCode() - or error("Third argument to tag_translit should be a language code or language object.") - end - - local data = mw.loadData("Module:script utilities/data").translit[kind or "default"] - - local opening_tag = {} - - table.insert(opening_tag, data.tag) - if lang == "ja" then - table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'tr"') - else - table.insert(opening_tag, 'lang="' .. lang .. '-Latn"') - table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'tr Latn"') - end - - if data.dir then - table.insert(opening_tag, 'dir="' .. data.dir .. '"') - end - - table.insert(opening_tag, attributes) - - return "<" .. table.concat(opening_tag, " ") .. ">" .. translit .. "" -end - -function export.tag_transcription(transcription, lang, kind, attributes) - if type(lang) == "table" then - lang = lang.getCode and lang:getCode() - or error("Third argument to tag_translit should be a language code or language object.") - end - - local data = mw.loadData("Module:script utilities/data").transcription[kind or "default"] - - local opening_tag = {} - - table.insert(opening_tag, data.tag) - if lang == "ja" then - table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts"') - else - table.insert(opening_tag, 'lang="' .. lang .. '-Latn"') - table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts Latn"') - end - - if data.dir then - table.insert(opening_tag, 'dir="' .. data.dir .. '"') - end - - table.insert(opening_tag, attributes) - - return "<" .. table.concat(opening_tag, " ") .. ">" .. transcription .. "" -end - --- Add a notice to request the native script of a word -function export.request_script(lang, sc) - local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.') - - -- By default, request for "native" script - local cat_script = "native" - local disp_script = "script" - - -- If the script was not specified, and the language has only one script, use that. - if not sc and #scripts == 1 then - sc = scripts[1] - end - - -- Is the script known? - if sc then - -- If the script is Latin, return nothing. - if export.is_Latin_script(sc) then - return "" - end - - if sc:getCode() ~= scripts[1]:getCode() then - disp_script = sc:getCanonicalName() - end - - -- The category needs to be specific to script only if there is chance - -- of ambiguity. This occurs when lang=und, or when the language has - -- multiple scripts. - if lang:getCode() == "und" or scripts[2] then - cat_script = sc:getCanonicalName() - end - else - -- The script is not known. - -- Does the language have at least one non-Latin script in its list? - local has_nonlatin = false - - for i, val in ipairs(scripts) do - if not export.is_Latin_script(val) then - has_nonlatin = true - break - end - end - - -- If there are non-Latin scripts, return nothing. - if not has_nonlatin then - return "" - end - end - - local category = "" - - if mw.title.getCurrentTitle().nsText ~= "Template" then - category = "[[Category:" .. lang:getCanonicalName() .. " terms needing " .. cat_script .. " script]]" - end - - return "[" .. disp_script .. " needed]" .. category -end - -function export.template_rfscript(frame) - local args = frame.args - local lang = args[1] or error("The first parameter (language code) has not been given") - local sc = args["sc"]; if sc == "" then sc = nil end - lang = require("Module:languages").getByCode(lang) or error("The language code \"" .. lang .. "\" is not valid.") - sc = (sc and (require("Module:scripts").getByCode(sc) or error("The script code \"" .. sc .. "\" is not valid.")) or nil) - - local ret = export.request_script(lang, sc) - - if ret == "" then - error("This language is written in the Latin alphabet. It does not need a native script.") - else - return ret - end -end - -function export.checkScript(text, scriptCode, result) - local scriptObject = require("Module:scripts").getByCode(scriptCode) - - if not scriptObject then - error('The script code "' .. scriptCode .. '" is not recognized.') - end - - local originalText = text - - -- Remove non-letter characters. - text = mw.ustring.gsub(text, "[%A]", "") - - -- Remove all characters of the script in question. - text = mw.ustring.gsub(text, "[" .. scriptObject:getCharacters() .. "]", "") - - if text ~= "" then - if type(result) == "string" then - error(result) - else - error('The text "' .. originalText .. '" contains the letters "' .. text .. '" that do not belong to the ' .. scriptObject:getCategoryName() .. '.', 2) - end - end -end - -return export \ No newline at end of file diff --git a/wikt/translit/scripts.lua b/wikt/translit/scripts.lua deleted file mode 100644 index d5c41a6..0000000 --- a/wikt/translit/scripts.lua +++ /dev/null @@ -1,185 +0,0 @@ -local export = {} -local Script = {} - -function Script:getCode() - return self._code -end - -function Script:getCanonicalName() - return self._rawData.canonicalName -end - -function Script:getOtherNames() - return self._rawData.otherNames or {} -end - -function Script:getParent() - return self._rawData.parent -end - -function Script:getSystems() - if not self._systemObjects then - local m_systems = require("Module:writing systems") - self._systemObjects = {} - - for _, sys in ipairs(self._rawData.systems or {}) do - table.insert(self._systemObjects, m_systems.getByCode(sys)) - end - end - - return self._systemObjects -end - ---function Script:getAllNames() --- return self._rawData.names ---end - - -function Script:getType() - return "script" -end - - -function Script:getCategoryName() - local name = self._rawData.canonicalName - - -- If the name already has "code" or "semaphore" in it, don't add it. - -- No names contain "script". - if name:find("[Cc]ode$") or name:find("[Ss]emaphore$") then - return name - else - return name .. " script" - end -end - - -function Script:getWikipediaArticle() - return self._rawData.wikipedia_article or self:getCategoryName() -end - - -function Script:getCharacters() - if self._rawData.characters then - return self._rawData.characters - else - return nil - end -end - - -function Script:countCharacters(text) - if not self._rawData.characters then - return 0 - else - local _, num = mw.ustring.gsub(text, "[" .. self._rawData.characters .. "]", "") - return num - end -end - -function Script:getDirection() - local direction = self._rawData.direction - if not direction then - return nil - else - return direction - end -end - - -function Script:getRawData() - return self._rawData -end - - -function Script:toJSON() - local ret = { - canonicalName = self:getCanonicalName(), - categoryName = self:getCategoryName(), - code = self._code, - otherNames = self:getOtherNames(), - type = self:getType(), - } - - return require("Module:JSON").toJSON(ret) -end - - -Script.__index = Script - - -function export.makeObject(code, data) - return data and setmetatable({ _rawData = data, _code = code }, Script) or nil -end - - -function export.getByCode(code) - if code == "IPAchar" then - require("Module:debug").track("IPAchar") - end - return export.makeObject(code, mw.loadData("Module:scripts/data")[code]) -end - -function export.getByCanonicalName(name) - local code = mw.loadData("Module:scripts/by name")[name] - - if not code then - return nil - end - - return export.makeObject(code, mw.loadData("Module:scripts/data")[code]) -end - --- Find the best script to use, based on the characters of a string. -function export.findBestScript(text, lang) - if not text or not lang or not lang.getScripts then - return export.getByCode("None") - end - - local scripts = lang:getScripts() - - if not scripts[2] then - return scripts[1] - end - - --[=[ - Remove any HTML entities; catfix function in [[Module:utilities]] - adds tagging to a no-break space ( ), which contains Latin characters; - hence Latin was returned as the script if "Latn" is one of the language's scripts. - ]=] - text = string.gsub(text, "&[a-zA-Z0-9]+;", "") - - -- Try to match every script against the text, - -- and return the one with the most matching characters. - local bestcount = 0 - local bestscript = nil - - -- Get length of text minus any spacing or punctuation characters. - -- Counting instances of UTF-8 character pattern is faster than mw.ustring.len. - local _, length = string.gsub(mw.ustring.gsub(text, "[%s%p]+", ""), "[\1-\127\194-\244][\128-\191]*", "") - - if length == 0 then - return export.getByCode("None") - end - - for i, script in ipairs(scripts) do - local count = script:countCharacters(text) - - if count >= length then - return script - end - - if count > bestcount then - bestcount = count - bestscript = script - end - end - - if bestscript then - return bestscript - end - - -- No matching script was found. Return "None". - return export.getByCode("None") -end - -return export \ No newline at end of file diff --git a/wikt/translit/si-translit.lua b/wikt/translit/si-translit.lua deleted file mode 100644 index deac53c..0000000 --- a/wikt/translit/si-translit.lua +++ /dev/null @@ -1,67 +0,0 @@ --- This module will transliterate Sinhalese language text per WT:SI TR. --- Language code: sin - -local export = {} - -local consonants = { - ['ක']='k' , ['ඛ']='kh' , ['ග']='g' , ['ඝ']='gh' , ['ඞ']='ṅ' , ['ඟ']='n̆g' , ['ච']='c' , ['ඡ']='ch' , ['ජ']='j' , ['ඣ']='jh' , - ['ඤ']='ñ' , ['ඥ']='gn' , ['ඦ']='n̆j' , ['ට']='ṭ' , ['ඨ']='ṭh' , ['ඩ']='ḍ' , ['ඪ']='ḍh' , ['ණ']='ṇ' , ['ඬ']='n̆ḍ' , - ['ත']='t' , ['ථ']='th' , ['ද']='d' , ['ධ']='dh' , ['න']='n' , ['ඳ']='n̆d' , - ['ප']='p' , ['ෆ']='f' , ['ඵ']='ph' , ['බ']='b' , ['භ']='bh' , ['ම']='m' , ['ඹ']='m̆b' , ['ය']='y' , ['ර']='r' , ['ල']='l' , ['ව']='v' , - ['ශ']='ś' , ['ෂ']='ṣ' , ['ස']='s' , ['හ']='h' , ['ළ']='ḷ' , ['ෆ']='f' -} - -local diacritics = { - ['ා'] = 'ā', - ['ැ'] = 'æ', - ['ෑ'] = 'ǣ', - ['ි'] = 'i', - ['ී'] = 'ī', - ['ු'] = 'u', - ['ූ'] = 'ū', - ['ෙ'] = 'e', - ['ේ'] = 'ē', - ['ෛ'] = 'ai', - ['ො'] = 'o', - ['ෝ'] = 'ō', - ['ෞ'] = 'au', - ['ෘ'] = 'ṛ', - ['ෟ'] = 'ḷ', - ['ෲ'] = 'ṝ', - ['ෳ'] = 'ḹ', - ['්'] = '' -} -local tt = { - -- vowels - ['අ']='a' , ['ආ']='ā' , ['ඇ']='æ' , ['ඈ']='ǣ' , ['ඉ']='i' , ['ඊ']='ī' , ['උ']='u' , ['ඌ']='ū' , - ['එ']='e' , ['ඒ']='ē' , ['ඓ']='ai' , ['ඔ']='o' , ['ඕ']='ō' , ['ඖ']='au' , - ['ඍ']='ṛ' , ['ඎ']='ṝ' , ['ඏ']='ḷ' , ['ඐ']='ḹ' , - -- other symbols - ['ං']='ṁ' , -- anusvara - ['ඃ']='ḥ' , -- visarga - ['්']='' , --hal kirīma, supresses the inherent vowel "a" - -- punctuation - ['෴']='.' , -- kunddaliya (obsolete) -} - --- translit any words or phrases -function export.tr(text) - if type(text) == 'table' then text = text.args[1] end - text = mw.ustring.gsub( - text, - '([කඛගඝඞඟචඡජඣඤඥඦටඨඩඪණඬතථදධනඳපපඵබභමඹයරලවශෂසහළෆ])'.. - '([\224\183\153\224\183\146\224\183\156\224\183\148\224\183\144\224\183\146\224\183\143\224\183\154\224\183\157\224\183\150\224\183\145\224\183\147\224\183\152\224\183\159\224\183\178\224\183\179\224\183\155\224\183\158\224\183\138]?)', - function(c, d) - if d == "" then - return consonants[c] .. 'a' - else - return consonants[c] .. diacritics[d] - end - end) - - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/sjd-translit.lua b/wikt/translit/sjd-translit.lua deleted file mode 100644 index f5a1d30..0000000 --- a/wikt/translit/sjd-translit.lua +++ /dev/null @@ -1,72 +0,0 @@ --- This module will transliterate Kildin Sami language text. - -local export = {} - -local tt = { - ["А"]='A', ["а"]='a', - ["Ӓ"]="ʹa", ["ӓ"]="ʹa", - ["Б"]='B', ["б"]='b', - ["В"]='V', ["в"]='v', - ["Г"]='G', ["г"]='g', - ["Д"]='D', ["д"]='d', - ["Е"]='Je', ["е"]='je', - ["Ё"]="Jo", ["ё"]="jo", - ["Ж"]='Ž', ["ж"]='ž', - ["З"]='Z', ["з"]='z', - ["Һ"]="H", ["һ"]="h", ["ʼ"]="h", - ["И"]='I', ["и"]='i', - ["Ӣ"]="Ī", ["ӣ"]="ī", - ["Й"]="J", ["й"]="j", - ["Ј"]="J̥", ["ј"]="j̥", ["Ҋ"]="J̥", ["ҋ"]="j̥", - ["К"]='K', ["к"]='k', - ["Л"]='L', ["л"]='l', - ["Ӆ"]='L̥', ["ӆ"]='l̥', - ["М"]='M', ["м"]='m', - ["Ӎ"]="M̥", ["ӎ"]="m̥", - ["Н"]='N', ["н"]='n', - ["Ӊ"]="N̥", ["ӊ"]="n̥", - ["Ӈ"]="Ŋ", ["ӈ"]="ŋ", - ["О"]='O', ["о"]='o', - ["П"]='P', ["п"]='p', - ["Р"]='R', ["р"]='r', - ["Ҏ"]="R̥", ["ҏ"]="r̥", - ["С"]='S', ["с"]='s', - ["Т"]='T', ["т"]='t', - ["У"]='U', ["у"]='u', - ["Ӯ"]="Ū", ["ӯ"]="ū", - ["Ф"]='F', ["ф"]='f', - ["Х"]='X', ["х"]='x', - ["Ц"]='C', ["ц"]='c', - ["Ч"]='Č', ["ч"]='č', - ["Ш"]='Š', ["ш"]='š', - ["Щ"]="Šč", ["щ"]="šč", - ["Ы"]="Ɨ", ["ы"]="ɨ", - ["Ъ"]="", ["ъ"]="", - ["Ь"]="ʹ", ["ь"]="ʹ", ["Ҍ"]="ʹ", ["ҍ"]="ʹ", - ["Э"]="E", ["э"]="e", - ["Ӭ"]="ʹE", ["ӭ"]="ʹe", - ["Ю"]="Ju", ["ю"]="ju", - ["Я"]="Ja", ["я"]="ja", -} - - -local vowel = "аӓеёиӣоуӯыэӭюяАӒЕЁИӢОУӮЫЭӬЮЯ" - -function export.tr(text, lang, sc) - text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])е", "%1ьэ") - text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])ё", "%1ьо") - text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])ю", "%1ьу") - text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])я", "%1ьа") - - text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])Е", "%1ЬЭ") - text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])Ё", "%1ЬО") - text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])Ю", "%1ЬУ") - text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])Я", "%1ЬА") - - text = mw.ustring.gsub(text, "([нН])ь", "%1й") - text = mw.ustring.gsub(text, "НЬ", "НЙ") - - return (mw.ustring.gsub(text, '.', tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/ta-translit.lua b/wikt/translit/ta-translit.lua deleted file mode 100644 index 7edd1fd..0000000 --- a/wikt/translit/ta-translit.lua +++ /dev/null @@ -1,77 +0,0 @@ --- This module will transliterate Tamil language text. --- Language code: tam - -local export = {} - -local consonants = { - ['க']='k' , ['ங']='ṅ' , ['ச']='c' , ['ஞ']='ñ' , ['ட']='ṭ' , ['ண']='ṇ' , ['த']='t' , - ['ந']='n' , ['ப']='p', ['ம']='m' , ['ய']='y' , ['ர']='r' , ['ல']='l' , ['வ']='v' , - ['ழ']='ḻ' , ['ள']='ḷ' , ['ற']='ṟ' , ['ன']='ṉ' , ['ஶ']='ś' , ['ஜ']='j' , ['ஷ']='ṣ' , - ['ஸ']='s' , ['ஹ']='h' , ['ஃப']='f' , ['ஃஜ']='z' , ['ஃஸ']='x' , - ['ஃ']='ḥ' , -} - -local diacritics = { - ['ா']= 'ā' , ['ி']='i' , ['ீ']='ī' , ['ு']='u' , ['ூ']='ū' , ['ெ']='e' , - ['ே']='ē' , ['ை']='ai' , ['ொ']='o' , ['ோ']='ō' , ['ௌ']='au' , - ['்']='', --halant, supresses the inherent vowel "a" - -- no diacritic - [''] = 'a' -} - -local nonconsonants = { - -- vowels - ['அ']='a' , ['ஆ']='ā' , ['இ']='i' , ['ஈ']='ī' , ['உ']='u' , ['ஊ']='ū' , - ['எ']='e' , ['ஏ']='ē' , ['ஐ']='ai' , ['ஒ']='o' , ['ஓ']='ō' , ['ஔ']='au' , - -- other symbols --- ['ஃ']='' , -} - --- translit any words or phrases -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - '(ஃ?)([கஙசஞடணதநபமயரலவழளறனஶஜஷஸஹ])'.. - '([ாிீுூெேைொோௌ்]?)', - function(h, c, d) - return (consonants[h..c] or consonants[h] .. (consonants[c] or c)) .. diacritics[d] - end) - - text = mw.ustring.gsub(text, '.', nonconsonants) - - return text -end - -return export - --- All tests passed. (refresh) - --- test_translit_tamil: --- Text Expected Actual Differs at --- Passed அஃகம் aḥkam aḥkam --- Passed கஞ்சாவை kañcāvai kañcāvai --- Passed இடியாப்பம் iṭiyāppam iṭiyāppam --- Passed காடைக்கண்ணி kāṭaikkaṇṇi kāṭaikkaṇṇi --- Passed ரெஃஸ் rex rex --- Passed அசிஃப் acif acif --- Passed ஔவை auvai auvai --- -- Unit tests for [[Module:ta-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local ta_translit = require('Module:ta-translit') - --- function tests:check_translit(Taml, roman) --- self:equals('[[' .. Taml .. '#Tamil|' .. Taml .. ']]', ta_translit.tr(Taml, 'ta', 'Taml'), roman) --- end - --- function tests:test_translit_tamil() --- self:check_translit('அஃகம்', 'aḥkam') --- self:check_translit('கஞ்சாவை', 'kañcāvai') --- self:check_translit('இடியாப்பம்', 'iṭiyāppam') --- self:check_translit('காடைக்கண்ணி', 'kāṭaikkaṇṇi') --- self:check_translit("ரெஃஸ்","rex") --- self:check_translit("அசிஃப்","acif") --- self:check_translit("ஔவை", "auvai") - --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/talu-translit.lua b/wikt/translit/talu-translit.lua deleted file mode 100644 index e0f8898..0000000 --- a/wikt/translit/talu-translit.lua +++ /dev/null @@ -1,48 +0,0 @@ -local export = {} -local gsub = mw.ustring.gsub - -local tt = { - -- consonants - ['ᦀ'] = 'q\204\135', ['ᦁ'] = 'q', - ['ᦂ'] = 'k\204\135', ['ᦃ'] = 'x\204\135', ['ᦄ'] = 'n\204\135g', ['ᦅ'] = 'k', ['ᦆ'] = 'x', ['ᦇ'] = 'ng', - ['ᦈ'] = 't\204\135s', ['ᦉ'] = 's\204\135', ['ᦊ'] = 'y\204\135', ['ᦋ'] = 'ts', ['ᦌ'] = 's', ['ᦍ'] = 'y', - ['ᦎ'] = 't\204\135', ['ᦏ'] = 't\204\135h', ['ᦐ'] = 'n\204\135', ['ᦑ'] = 't', ['ᦒ'] = 'th', ['ᦓ'] = 'n', - ['ᦔ'] = 'p\204\135', ['ᦕ'] = 'p\204\135h', ['ᦖ'] = 'm\204\135', ['ᦗ'] = 'p', ['ᦘ'] = 'ph', ['ᦙ'] = 'm', - ['ᦚ'] = 'f\204\135', ['ᦛ'] = 'v\204\135', ['ᦜ'] = 'l\204\135', ['ᦝ'] = 'f', ['ᦞ'] = 'v', ['ᦟ'] = 'l', - ['ᦠ'] = 'h\204\135', ['ᦡ'] = 'd\204\135', ['ᦢ'] = 'b\204\135', ['ᦣ'] = 'h', ['ᦤ'] = 'd', ['ᦥ'] = 'b', - ['ᦦ'] = 'k\204\135v', ['ᦧ'] = 'x\204\135v', ['ᦨ'] = 'kv', ['ᦩ'] = 'xv', ['ᦪ'] = 's\204\135v', ['ᦫ'] = 'sv', - -- vowels and finals (visual ordering by Unicode 8) - ['ᦰ'] = '!', ['ᦱ'] = 'aa', ['ᦲ'] = 'ii', ['ᦳ'] = 'u', ['ᦴ'] = 'uu', ['ᦸ'] = 'oa', ['ᦹ'] = 'ue', - ['ᦵ'] = 'e', ['ᦶ'] = 'ae', ['ᦷ'] = 'o', ['ᦺ'] = 'ay', -- this line to be swapped - ['ᦻ'] = 'aay', ['ᦼ'] = 'uy', ['ᦽ'] = 'oy', ['ᦾ'] = 'oay', ['ᦿ'] = 'uey', ['ᧀ'] = 'iy', - ['ᧁ'] = 'w', ['ᧂ'] = 'ng', ['ᧃ'] = 'n', ['ᧄ'] = 'm', ['ᧅ'] = 'k', ['ᧆ'] = 'd', ['ᧇ'] = 'b', - -- tones - ['ᧈ'] = '1', ['ᧉ'] = '2', - -- numerals - ['᧐'] = '0', ['᧑'] = '1', ['᧒'] = '2', ['᧓'] = '3', ['᧔'] = '4', - ['᧕'] = '5', ['᧖'] = '6', ['᧗'] = '7', ['᧘'] = '8', ['᧙'] = '9', - ['᧚'] = '1', - -- ligatures ᧞ ᧟ sorted after ᦶᦜ - ['᧞'] = 'l\204\135ae', ['᧟'] = 'l\204\135aew', -} - -function export.tr(text, lang, sc, debug_mode) - - if type(text) == 'table' then -- called directly from a template - text = text.args[1] - end - - text = gsub(text, '([ᦵᦶᦷᦺ])([ᦀ-ᦫ])', '%2%1') -- swapped - text = gsub(text, '([ᦀ-ᦫ])([ᧁ-ᧇ])', '%1a%2') - - text = gsub(text, '.', tt) - - text = gsub(text, 'aa!', 'a') - text = gsub(text, 'ii!', 'i') - text = gsub(text, 'uu!', 'u') - - return text - -end - -return export \ No newline at end of file diff --git a/wikt/translit/te-translit.lua b/wikt/translit/te-translit.lua deleted file mode 100644 index 13a9d47..0000000 --- a/wikt/translit/te-translit.lua +++ /dev/null @@ -1,88 +0,0 @@ ---This module will transliterate Telugu language text per WT:TE TR. ---Language code: tel -local export = {} - -local consonants = { - ['క']='k' , ['ఖ']='kh' , ['గ']='g' , ['ఘ']='gh' , ['ఙ']='ṅ' , - ['చ']='c' , ['ఛ']='ch' , ['జ']='j' , ['ఝ']='jh' , ['ఞ']='ñ' , - ['ట']='ṭ' , ['ఠ']='ṭh' , ['డ']='ḍ' , ['ఢ']='ḍh' , ['ణ']='ṇ' , - ['త']='t' , ['థ']='th' , ['ద']='d' , ['ధ']='dh' , ['న']='n' , - ['ప']='p' , ['ఫ']='ph' , ['బ']='b' , ['భ']='bh' , ['మ']='m' , - ['య']='y' , ['ర']='r' , ['ల']='l' , ['వ']='v' , ['ళ']='ḷ' , - ['శ']='ś' , ['ష']='ṣ' , ['స']='s' , ['హ']='h' , ['ఱ']='ṛ' , - ['ౘ']='ts' , ['ౙ']='dz' , ['ౚ']='rrr' , -} - -local diacritics = { - ['ా']= 'ā' , ['ి']='i' , ['ీ']='ī' , ['ు']='u' , ['ూ']='ū' , ['ృ']='r̥' , ['ౄ']='r̥̄' , - ['ె']='e' , ['ే']='ē' , ['ై']='ai' , ['ొ']='o' , ['ో']='ō' , ['ౌ']='au' , ['్']='' , -} -local tt = { - -- vowels - ['అ']='a' , ['ఆ']='ā' , ['ఇ']='i' , ['ఈ']='ī' , ['ఉ']='u' , ['ఊ']='ū' , - ['ఋ']='r̥' , ['ౠ']='r̥̄' , ['ఌ']='l̥' , ['ౡ']='l̥̄', ['ఎ']='e' , ['ఏ']='ē' , - ['ఐ']='ai' , ['ఒ']='o' , ['ఓ']='ō' , ['ఔ']='au' , ['అం']='aṅ' , ['అఁ']='aṃ' , ['అః']='ah' , - -- other symbols - ['ం']='ṃ',-- anusvara - ['ః']='ḥ' , -- visarga - ['ఁ']='ṅ' , -- candrabindu/arthanusvāra/aranusa - ['ఽ']='’' , -- avagraha --- digits - ['౦'] = '0', ['౧'] = '1', ['౨'] = '2', ['౩'] = '3', ['౪'] = '4', - ['౫'] = '5', ['౬'] = '6', ['౭'] = '7', ['౮'] = '8', ['౯']= '9', - ['౸']='0⁄4', ['౹']='¼', ['౺']='2⁄4', ['౻']='¾', - ['౦']='0⁄16', ['౼']='1⁄16', ['౽']='2⁄16', ['౾']='3⁄16' , -} - --- translit any words or phrases -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - '([కఖగఘఙచఛజఝఞటఠడఢణతథదధనపఫబభమయరలవళశషసహఱౘౙౚ])'.. - '([ాిీుూృ̥ౄ̥̄ెేైొోౌ్]?)', - function(c, d) - if d == "" then - return consonants[c] .. 'a' - else - return consonants[c] .. diacritics[d] - end - end) - - text = mw.ustring.gsub(text, '.', tt) - - -- anusvara - text = mw.ustring.gsub(text, 'ṃ([kgṅ])', 'ṅ%1') - text = mw.ustring.gsub(text, 'ṃ([cjñ])', 'ñ%1') - text = mw.ustring.gsub(text, 'ṃ([ṭḍṇ])', 'ṇ%1') - text = mw.ustring.gsub(text, 'ṃ([tdn])', 'n%1') - text = mw.ustring.gsub(text, 'ṃ([pbm])', 'm%1') - - return text -end - -return export - --- test_translit_telugu: --- Text Expected Actual Differs at --- Failed అంకపాళి amkapāḷi aṅkapāḷi 2 --- Passed పా pā pā --- Passed ళి ḷi ḷi --- Passed ౙమకట్టు dzamakaṭṭu dzamakaṭṭu --- -- Unit tests for [[Module:te-translit]]. Refresh page to run tests. --- local tests = require('Module:UnitTests') --- local te_translit = require('Module:te-translit') - --- --TO DO --- function tests:do_test_translit(telu, roman, xlit) --- self:equals('[[' .. telu .. '#Telu|' .. telu .. ']]', te_translit.tr(telu, 'te', 'Telu'), roman) --- end - --- function tests:test_translit_telugu() --- self:do_test_translit('అంకపాళి', 'amkapāḷi') --- self:do_test_translit('పా', 'pā') --- self:do_test_translit('ళి', 'ḷi') --- self:do_test_translit('ౙమకట్టు', 'dzamakaṭṭu') - --- end - --- return tests \ No newline at end of file diff --git a/wikt/translit/tests.lua b/wikt/translit/tests.lua deleted file mode 100644 index c5091ef..0000000 --- a/wikt/translit/tests.lua +++ /dev/null @@ -1,302 +0,0 @@ -mw = require('mw') -mw.text = require('mw-text') -mw.ustring = require ('ustring/ustring') - -kan= require ('translit/kn-translit') -kan.tr('ಲಲ','kn','Knda') -kan.tr('ಅಕ್ಕಿ','kn','Knda') - -hin = require ('translit/hi-translit') -hin.tr('चमचा', 'hi', 'Deva') -hin.tr('प्यार', 'hi', 'Deva') -hin.tr('भारतीय', 'hi', 'Deva') -hin.tr('प्यार', 'hi', 'Deva') -hin.tr('हमसफ़र', 'hi', 'Deva') -hin.tr('चमत्कार', 'hi', 'Deva') -hin.tr('कयामत', 'hi', 'Deva') -hin.tr('काफी', 'hi', 'Deva') - - - -guj = require ('translit/gu-translit') -guj.tr('અંક મેળવવો','gu', 'Gujr') -ยูโด -ยูโด - -tha = require ('translit/th-translit') -tha = require 'translit/tha-pron' -tha.tr('ยูโด','th', 'Thai') -tha.tr('ผล็อย','th', 'Thai') -tha.tr('ยูโด','th', 'Thai') --- --- works -tha.tr('เหดสุดวิไส','th', 'Thai') --- not works -tha.tr('ไวน์มุสกาเดต์','th', 'Thai') - -tha.tr('สวย','th', 'Thai') -tha.tr('เหดสุดวิไส','th', 'Thai') -tha.tr('เหนื่อย','th', 'Thai') - -san = require ('translit/sa-translit') - -san.tr('सोऽहम्','sa', 'Deva') -san.tr('दुःख','sa', 'Deva') - -pa = require ('translit/guru-translit') -pa.tr("ਅਣਸੁਣਿਆ", 'pa', 'Guru') - -mn = require ('translit/mon-translit') -mn.tr('хаврын сайхан өдөр', 'mn', 'Cyrl') - -bg = require ('translit/bg-translit') -bg.tr('жудо сумо', 'bg', 'Cyrl') - -fas = require ('translit/fa-translit') -fas.tr("آب تنی", 'fa', 'fa-Arab') -fas.tr("اجازه عبور دادن", 'fa', 'fa-Arab') -fas.tr("أحَد أعْضاء الثالُوث الأقْدَس", 'fa', 'fa-Arab') - -pnb = require ('translit/fa-translit') -pnb.tr("سکھ مت", 'pnb', 'fa-Arab') -pnb.tr("روزہ", 'pnb', 'fa-Arab') - - -heb = require ('translit/he-translit') -heb.tr('הַמַּמְלָכָה', 'he', 'Hebr') -heb.tr("!בַּעֲלַת בַּיִת", 'he', 'Hebr') -heb.tr("בֵּית-הָעִירִיָּה", 'he', 'Hebr') -heb.tr("לשון הקודש", 'he', 'Hebr') - - -grc = require ('translit/grc-translit') -grc.tr('σάἁμον', 'el', "polytonic") -grc.tr("θάλαμος ψύξης υγρών", 'el', "polytonic") - -abk = require ('translit/ab-translit') -abk.tr("ааероплан",'ab', 'Cyrl') - -abq = require ('translit/abq-translit') -abq.tr("август",'abq', 'Cyrl') - -ady = require ('translit/ady-translit') -ady.tr('бгъотагъэ', 'ady', 'Cyrl') -ady.tr("помидор", 'ady', 'Cyrl') -ady.tr("бзэгужъый", 'ady', 'Cyrl') - -alt = require ('translit/altai-translit') - -alt.tr("булут", 'alt', 'Cyrl') - -ava = require ('translit/av-translit') -ava.tr("чӏегӏера", 'ava', 'Cyrl') - -bua = require ('translit/bua-translit') -bua.tr("hархяаг", 'bua', 'Cyrl') -bua.tr("аяга", 'bua', 'Cyrl') - - -che = require ('translit/ce-translit') -che.tr("буьйсанан мокха хьоза", 'ce', 'Cyrl') - -chv = require ('translit/cv-translit') -chv.tr("сăмахсен кĕнеки", 'cv', 'Cyrl') - -cop = require ('translit/copt-translit') -cop.tr("ⲡⲁⲧⲣⲓⲁⲣⲭⲏⲥ", 'copt', 'Coptic') - -dlg = require ('translit/dlg-translit') -dlg.tr("былыт", 'dlg', 'Cyrl') - -dng = require ('translit/dng-translit') -dng.tr("музей", 'dng', 'Cyrl') - -div = require ('translit/dv-translit') -div.tr("ފިރިހެން ހަށި", "dv", "Thaa") - -grc = require ('translit/grc-translit') -grc.tr("ἐξητηριος λόγος", 'grc', "polytonic") - -hye = require 'translit/armn-translit' -hye.tr("Հայաստան", "hy", "Armn") -hye.tr("ալցհայմերի հիվանդություն", "hy", "Armn") - -inh = require 'translit/inh-translit' -inh.tr("аьккхий-мархий бутт", 'inh', 'Cyrl') - -inh = require 'translit/inh-translit' -inh.tr("аьккхий-мархий бутт", 'inh', 'Cyrl') - ---- -kaa = require 'translit/kaa-translit' -kaa.tr("аьккхий-мархий бутт", 'kaa', 'Cyrl') - -kaz = require 'translit/kk-translit' -kaz.tr("аьккхий-мархий бутт", 'inh', 'Cyrl') - -kbd = require 'translit/kbd-translit' -kbd.tr("аьккхий-мархий бутт", 'kbd', 'Cyrl') - -khm = require 'translit/km-translit' -khm.tr("ការអោយរួចជាអ្នកជា", 'km', 'Khmr') -khm.tr("កាលប្បវត្តិវិទ្យា", 'km', 'Khmr') - -kir = require 'translit/ky-translit' -kir.tr("аьккхий-мархий бутт", 'ky', 'Cyrl') - -kjh = require 'translit/kjh-translit' -kjh.tr("аьккхий-мархий бутт", 'kjh', 'Cyrl') - -koi = require 'translit/kv-translit' -koi.tr("аьккхий-мархий бутт", 'koi', 'Cyrl') - -kor = require 'translit/ko-translit' -kor.tr("사무실에서.", "ko", "Kore") -kor.tr("모든 인간은 태어날 때부터 자유로우며 그 존엄과 권리에 있어 동등하다.", "ko", "Kore") - -krc = require 'translit/krc-translit' -krc.tr("агъач къоян", "krc", "Cyrl") - -lbe = require 'translit/lbe-translit' -lbe.tr("ахънилсри", "lbe", "Cyrl") - -mar = require 'translit/mr-translit' -mar.tr("ऑस्ट्रेलिया", 'mr', 'Deva') -mar.tr("अमेरिकन इंग्लिश", 'mr', 'Deva') - -lbe = require 'translit/lbe-translit' -lbe.tr("ахънилсри", "lbe", "Cyrl") - -lzz = require 'translit/geor-translit' -lzz.tr("ფანწალა", "lzz", "Geor") - -ben = require 'translit/bn-translit' -ben.tr("নি্র্যাতন নি্র্য্যাতন", "bn", "Beng") -ben.tr("গোলমাল", "bn", "Beng") -ben.tr("বিশ্বাস", "bn", "Beng") - -ben.tr("অপমান", "bn", "Beng") -ben.tr("পশ্চিমবাংলা", "bn", "Beng") -ben.tr("মতামত", "bn", "Beng") -ben.tr("নি্র্যাতন নি্র্য্যাতন", "bn", "Beng") - - -pan = require ('translit/guru-translit') -pan.tr("ਬਦਚਲਣੀ ਦੁਰਾਚਾਰ",'pan', 'Guru') - -rus = require ('translit/ru-translit') -rus.tr("без",'ru', 'Cyrl') - -sat = require 'translit/sat-translit' -sat.tr("ᱜᱚᱱᱰ ᱜᱟᱨᱩᱲ", 'sat', 'Olck') - -sin = require 'translit/si-translit' -sin.tr("ඉලෙක්ට්‍රොනික විද්‍යාව", 'si', 'Sinh') - -tat = require 'translit/tt-translit' -tat.tr("чәршәмбе", 'tt', 'Cyrl') - -tgk = require 'translit/tg-translit' -tgk.tr("машин","tg","Cyrl") - -uig = require 'translit/ug-translit' -uig.tr("бирйуза","ug","Cyrl") -uig.tr("ئىقتىسادشۇناسلىق","ug","ug-Arab") - -yrk = require 'translit/sjd-translit' -yrk.tr("ӈылека", 'yrk', 'Cyrl') - -xmf = require 'translit/geor-translit' -xmf.tr("აფხაზური", 'xmf', 'Geor') - -asm = require 'translit/as-translit' -asm.tr ("জ্যোতি",'as','Beng') -asm.tr ("সঞ্জাত",'as','Beng') - -asm.tr ("নি্র্যাতন নি্র্য্যাতন",'as','Beng') - -tel = require 'translit/te-translit' -tel.tr("ఉత్పన్నమయిన",'te','Telu') -tel.tr("నీతికిసంబంధించిన పని",'te','Telu') - -tam = require 'translit/ta-translit' -tam.tr("ஒழுக்கமானசெயல்", 'ta', 'Taml') -tam.tr("ஊட்டச்சத்து", 'ta', 'Taml') - -ori = require 'translit/or-translit' -ori.tr("ଅନୁକୂଳତା",'or','Orya') - -nep =require 'translit/ne-translit' -nep.tr("सच्चरित्रता",'ne', 'Deva') - -mal = require 'translit/ml-translit' -mal.tr("ജന്മമെടുത്ത", 'ml', 'Mlym') - -"नवेंच शिकिल्लें" - -জ্যোতি - -brx = require 'translit/hi-translit' -brx.tr("गोजोन नङि", 'brx', 'Deva') - -mya= require 'translit/my-translit' -mya.tr("ခေါင်းစွပ်ဆွယ်တာ", "my", "Mymr") -mya.tr("ချူးအင်းဂမ်း", "my", "Mymr") - -mya.tr("ငွေကြေးစုဆောင်းခြင်း", "my", "Mymr") -mya.tr("ကွန်ဖူး", "my", "Mymr") - -amh = require 'translit/ethi-translit' -amh.tr("ልምቡጥ","am","Ethi") - -tly = require 'translit/ug-translit' -tly.tr("вазах","tly","Cyrl") -tly.tr("ام روز","tly","ug-Arab") - -bod = require 'translit/bo-translit' -bod.tr("གཟའ་ཉི་མ",'bo','Tibt') - -jpa = require 'translit/ja' -jpa.kana_to_romaji("はっきり感じとれるほど") - -ryu = require 'translit/ja' -ryu.kana_to_romaji("いいび") - - -chr = require 'translit/cher-translit' -chr.tr("ᏗᎦᎾᏙᎬ ᎠᏐᎠᏍᏙ", "chr", "Cher") - -chu = require 'translit/cyrs-glag-translit' -chu.tr("ⰲⱑⱅⱃⱏ","chu","Glag") -chu.tr("ⱄⱏⰴⱃⰰⰲⱏ","chu","Glag") -chu.tr("цвѣтъ","chu","Cyrs") -chu.tr("тръжьникъ","chu","Glag") - -iii = require 'translit/ii-translit' -iii.tr("ꃶ",'ii', 'Yiii') -iii.tr("ꀑ",'ii', 'Yiii') - -kat =require 'translit/geor-translit' -kat.tr("აბრეშუმის ჭია","ka", "Geor") - -lao = require ('translit/lo-translit') -lao.tr("ການທ່ອງທ່ຽວ",'lo', 'Laoo') -lao.tr("ການສໍ້ລາດບັງຫຼວງ",'lo', 'Laoo') - - -lao.tr("ดอกทานตะวัน",'lao', 'Thai') -lao.tr("เด๊ดสะมอเร่",'lao', 'Thai') - -lao = require ('translit/th-pron') - -lao.translit('ข้าว','th', 'Thai', 'paiboon', 'translit-module') -lao.translit("ການສຶກສາ",'th', 'Thai', 'paiboon', 'translit-module') -lao.translit('เด๊ดสะมอเร','th', 'Thai', 'royin', 'translit-module') -lao.translit('เด๊ดสะมอเร','th', 'Thai', 'ipa', 'translit-module') -lao.translit('เด๊ดสะมอเร','th', 'Thai', 'charPhon', 'translit-module') -lao.translit('เด๊ดสะมอเร','th', 'Thai', 'homophone', 'translit-module') -lao.translit('เด๊ดสะมอเร','th', 'Thai', 'file', 'translit-module') - - -2738 -"агъач къоян" \ No newline at end of file diff --git a/wikt/translit/tg-translit.lua b/wikt/translit/tg-translit.lua deleted file mode 100644 index 0fcd8d3..0000000 --- a/wikt/translit/tg-translit.lua +++ /dev/null @@ -1,40 +0,0 @@ --- This module will transliterate Tajik language text per WT:TG TR. --- Language code: tgk - -local export = {} - -local tt={ - ["т"]="t",['Т']='T', ["р"]="r",['Р']='R', ["ф"]="f",['Ф']='F', - ["ю"]="yu",['Ю']='Yu', ["ш"]="š",['Ш']='Š', ["ҳ"]="h",['Ҳ']='H', ["ъ"]="ʾ",['Ъ']='ʾ', ["н"]="n",['Н']='N', - ["п"]="p",['П']='P', ["й"]="y",['Й']='Y', ["л"]="l",['Л']='L', ["з"]="z",['З']='Z', ["е"]="e",['Е']='E', - ["г"]="g",['Г']='G', ["б"]="b",['Б']='B', ["у"]="u",['У']='U', ["с"]="s",['С']='S', ["х"]="x",['Х']='X', - ["ч"]="č",['Ч']='Č', ["я"]="ya",['Я']='Ya', - ["м"]="m",['М']='M', ["о"]="o",['О']='O', ["и"]="i",['И']='I', ["ё"]="yo",['Ё']='Yo', ["ж"]="ž",['Ж']='Ž', - ["к"]="k",['К']='K', ["д"]="d",['Д']='D', ["в"]="v",['В']='V', ["а"]="a",['А']='A', ["ҷ"]="j",['Ҷ']='J', - ["ӯ"]="ü",['Ӯ']='Ü', ["э"]="e",['Э']='E', ["ӣ"]="ī",['Ӣ']='Ī', ["қ"]="q",['Қ']="Q", ["ғ"]="ġ",['Ғ']='Ġ', - ["і"]="i",['І']='I' -}; - -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - "([АОУЕЯЁЮИӢЕЪаоуэяёюиӣе][́̀]?)([ЕеИиӢӣ])", - function(a, e) - local iotated = { - ['е'] = 'ye', - ['Е'] = 'Ye', - ['и'] = 'yi', - ['И'] = 'Yi', - ['ӣ'] = 'yī', - ['Ӣ'] = 'Yī', - } - return a .. iotated[e] - end - ) - - text:gsub("^Е",'Ye'):gsub("^е",'ye') - - return (mw.ustring.gsub(text, '.', tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/tt-translit.lua b/wikt/translit/tt-translit.lua deleted file mode 100644 index e61330b..0000000 --- a/wikt/translit/tt-translit.lua +++ /dev/null @@ -1,49 +0,0 @@ --- This module will transliterate Tatar language text per WT:TT TR. This may not match the Latin spelling used in Tatar. --- Г is replaced by the module with ğ and к with q after а, о, у, ы or in word start before them, otherwise they are turned into g and k. --- Language code: tat - -local export = {} - -local tt = { - ["ү"]="ü",['Ү']='Ü', ["т"]="t",['Т']='T', ["р"]="r",['Р']='R', ["ф"]="f",['Ф']='F', - ["ю"]="yu",['Ю']='Yu', ["ш"]="ş",['Ш']='Ş', ["ь"]="’",['Ь']='’', ["ъ"]="ʺ",['Ъ']='ʺ', ["н"]="n",['Н']='N', - ["п"]="p",['П']='P', ["й"]="y",['Й']='Y', ["л"]="l",['Л']='L', ["з"]="z",['З']='Z', ["е"]="e",['Е']='E', - ["г"]="g",['Г']='G', ["б"]="b",['Б']='B', ["у"]="u",['У']='U', ["с"]="s",['С']='S', ["х"]="x",['Х']='X', - ["ч"]="ç",['Ч']='Ç', ["щ"]="şç",['Щ']='Şç', ["я"]="ya",['Я']='Ya', ["ы"]="ı",['Ы']='I', ["э"]="e",['Э']='E', - ["м"]="m",['М']='M', ["о"]="o",['О']='O', ["ө"]="ö",['Ө']='Ö', ["и"]="i",['И']='İ', ["ё"]="yo",['Ё']='Yo', - ["ж"]="j",['Ж']='J', ["к"]="k",['К']='K', ["д"]="d",['Д']='D', ["в"]="w",['В']='W', ["ц"]="ts",['Ц']='Ts', - ["а"]="a",['А']='A', ["ң"]="ñ",['Ң']='Ñ', ["җ"]="c",['Җ']='C', ["һ"]="h",['Һ']='H', ["ә"]="ä",['Ә']='Ä' -}; - -function export.tr(text, lang, sc) - text = mw.ustring.gsub( - text, - "([АОӘУЫЕЯЁЮИЕаоәуыэяёюиеъь%A][́̀]?)([Ее])", - function(a,e) return a..(e=='е' and 'ye' or 'Ye') end - ) - - text:gsub("^Е",'Ye'):gsub("^е",'ye'):gsub("ия$",'iyä') --not last word end handled in code end - - -- Deal with dual nature of к, г, transliterated either to "front" variants - -- k/g or "back" variants q/ğ. The back variants occur before hard signs - -- (Ъ/ъ), which then disappear, and also in the vicinity of the back vowels - -- а/о/у/ы/ә (and their capital equivalents А/О/У/Ы/Ә). The code below that - -- handles this appears to say that the sound of word-initial к/г is - -- determined by the following vowel, and the sound of non-word-initial - -- к/г is determined by the preceding vowel. FIXME: Not sure if this is - -- correct. - - local t = {['К']='Q',['к']='q',['Г']='Ğ',['г']='ğ'} - text = mw.ustring.gsub(text, '([КкГг])([Ъъ])', function(a,b) return t[a] end) - text = mw.ustring.gsub( - text, - "(%a?)([КкГг])(.?)", - function(b,c,a) - return b .. (mw.ustring.match(b>'' and b or a,"[АОУЫӘаоуыә]") and t[c] or tt[c]) .. a - end - ) - - return (mw.ustring.gsub(mw.ustring.gsub(text, "ия%A",'iyä'), '.', tt)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/tyv-translit.lua b/wikt/translit/tyv-translit.lua deleted file mode 100644 index 2b4e480..0000000 --- a/wikt/translit/tyv-translit.lua +++ /dev/null @@ -1,33 +0,0 @@ --- This module will transliterate Tuvan language text per WT:TYV TR. --- Language code: tyv - -local export = {} - -local tab = { - ["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="Ä", ["Ё"]="Yo", ["Ж"]="Ž", ["З"]="Z", ["И"]="Ï", ["Й"]="Y", - ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["Ң"]="Ŋ", ["О"]="O", ["Ө"]="Ö", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", - ["У"]="U", ["Ү"]="Ü", ["Ф"]="F", ["Х"]="H", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="ʺ", ["Ы"]="I", ["Ь"]="ʹ", - ["Э"]="Ä", ["Ю"]="Yu", ["Я"]="Ya", - ['а']='a', ['б']='b', ['в']='v', ['г']='g', ['д']='d', ['е']='ä', ['ё']='yo', ['ж']='ž', ['з']='z', ['и']='ï', ['й']='y', - ['к']='k', ['л']='l', ['м']='m', ['н']='n', ['ң']='ŋ', ['о']='o', ['ө']='ö', ['п']='p', ['р']='r', ['с']='s', ['т']='t', - ['у']='u', ['ү']='ü', ['ф']='f', - ['х']='h', ['ц']='c', ['ч']='č', ['ш']='š', ['щ']='šč', ['ъ']='ʺ', ['ы']='ı', ['ь']='ʹ', ['э']='ä', ['ю']='yu', ['я']='ya', -} - -function export.tr(text, lang, sc) - -- Ё needs converting if is decomposed - text = text:gsub("ё","ё"):gsub("Ё","Ё") - - -- е after a vowel or at the beginning of a word becomes yä - -- Note that according to modern Tuvan orthography ее (instead of ээ) is occationally used for long ä - text = mw.ustring.gsub(text, "([АОӨУҮЫЯЁЮИЪЬаоөуүыяёюиъь%A][́̀]?)е","%1yä") - --text = mw.ustring.gsub(text, "([АОӨУҮЫЕЯЁЮИЕЪЬаоөуүыэяёюиеъь%A][́̀]?)е","%1yä") - text = mw.ustring.gsub(text, "^Е","Yä") - text = mw.ustring.gsub(text, "^е","yä") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е","%1Yä") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е","%1yä") - - return (mw.ustring.gsub(text,'.',tab)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/udi-translit.lua b/wikt/translit/udi-translit.lua deleted file mode 100644 index 1fd9331..0000000 --- a/wikt/translit/udi-translit.lua +++ /dev/null @@ -1,77 +0,0 @@ --- This module will transliterate Udi language text per WT:UDI TR. --- Language code: udi - -local export = {} - -local tt = { - ["б"]="b", ["п"]="p", ["в"]="v", ["ф"]="f", ["м"]="m", ["б"]="b", - ["д"]="d", ["т"]="t", ["ц"]="c", ["з"]="z", ["с"]="s", ["н"]="n", - ["л"]="l", ["ч"]="č", ["ж"]="ž", ["ш"]="š", ["р"]="r", ["г"]="g", - ["к"]="k", ["х"]="χ", ["й"]="j", ["и"]="i", ["у"]="u", ["е"]="e", - ["о"]="o", ["а"]="a", ["ы"]="ə", ["ҝ"]="gʲ", - }; - -local trigraphs = { - ['джъ'] = 'ǯ:', - ['джӏ'] = 'ǯ:', - ['чӏъ'] = 'č̣:', -} -local digraphs = { - ['пӏ'] = 'ṗ', - ['тӏ'] = 'ṭ', - ['дз'] = 'ʒ', - ['цӏ'] = 'c̣', - ['дж'] = 'ǯ', - ['чӏ'] = 'č̣', - ['чъ'] = 'č:', - ['жъ'] = 'ž:', - ['жӏ'] = 'ž:', - ['шъ'] = 'š:', - ['шӏ'] = 'š:', - ['кӏ'] = 'ḳ', - ['гъ'] = 'ɣ', - ['къ'] = 'q̇', - ['хъ'] = 'q', - ['гь'] = 'h', - ['уь'] = 'ü', - ['оь'] = 'ö', - ['аь'] = 'ä', - ['иӏ'] = 'i̱', - ['иъ'] = 'i̱', - ['уӏ'] = 'u̱', - ['уъ'] = 'u̱', - ['еӏ'] = 'e̱', - ['еъ'] = 'e̱', - ['оӏ'] = 'o̱', - ['оъ'] = 'o̱', - ['аӏ'] = 'a̱', - ['аъ'] = 'a̱', - ['ыъ'] = 'ə̱', -} - -function export.tr(text, lang, sc) - if sc ~= "Cyrl" then - return nil - end - - local str_gsub = string.gsub - local UTF8_char = '[%z\1-\127\194-\244][\128-\191]*' - - -- Convert capital to lowercase palochka. Lowercase is found in tables - -- above. - text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) - - for trigraph, translit in pairs(trigraphs) do - text = str_gsub(text, trigraph, translit) - end - - for digraph, translit in pairs(digraphs) do - text = str_gsub(text, digraph, translit) - end - - text = str_gsub(text, UTF8_char, tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/udm-translit.lua b/wikt/translit/udm-translit.lua deleted file mode 100644 index af70160..0000000 --- a/wikt/translit/udm-translit.lua +++ /dev/null @@ -1,30 +0,0 @@ --- This module will transliterate Udmurt language text per WT:UDM TR. - -local export = {} - -local mapping = { - ["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="E", ["Ё"]="Jo", ["Ж"]="Ž", ["Ӝ"]="Dž", ["З"]="Z", ["Ӟ"]="Dź", ["И"]="I", - ["Ӥ"]="Ï", ["Й"]="J", ["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["О"]="O", ["Ӧ"]="Ö", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", - ["У"]="U", ["Ф"]="F", ["Х"]="X", ["Ц"]="C", ["Ч"]="Ć", ["Ӵ"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="ʺ", ["Ы"]="Y", ["Ь"]="ʹ", ["Э"]="E", - ["Ю"]="Ju", ["Я"]="Ja", - ['а']='a', ['б']='b', ['в']='v', ['г']='g', ['д']='d', ['е']='e', ['ё']='jo', ['ж']='ž', ['ӝ']='dž', ['з']='z', ['ӟ']='dź', ['и']='i', - ['ӥ']='ï', ['й']='j', ['к']='k', ['л']='l', ['м']='m', ['н']='n', ['о']='o', ['ӧ']='ö', ['п']='p', ['р']='r', ['с']='s', ['т']='t', - ['у']='u', ['ў']='w', ['ф']='f', ['х']='x', ['ц']='c', ['ч']='ć', ['ӵ']='č', ['ш']='š', ['щ']='šč', ['ъ']='ʺ', ['ы']='y', ['ь']='ʹ', ['э']='e', - ['ю']='ju', ['я']='ja', -} - -function export.tr(text, lang, sc) - -- Ё needs converting if is decomposed - text = text:gsub("ё","ё"):gsub("Ё","Ё") - - -- е after a vowel or at the beginning of a word becomes je - text = mw.ustring.gsub(text, "([АОӦУЫЕЯЁЮИӤЕЪЬаоӧуыэяёюиӥеъь%A][́̀]?)е","%1je") - text = mw.ustring.gsub(text, "^Е","Je") - text = mw.ustring.gsub(text, "^е","je") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е","%1Je") - text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е","%1je") - - return (mw.ustring.gsub(text, '.', mapping)) -end - -return export \ No newline at end of file diff --git a/wikt/translit/ug-translit.lua b/wikt/translit/ug-translit.lua deleted file mode 100644 index 3d663c2..0000000 --- a/wikt/translit/ug-translit.lua +++ /dev/null @@ -1,52 +0,0 @@ -local export = {} - -local data = {} - -data["ug-Arab"] = { - -- consonants - ["م"] = "m", ["ن"] = "n", ["د"] = "d", ["ت"] = "t", - ["ب"] = "b", ["پ"] = "p", ["ف"] = "f", ["ق"] = "q", - ["ك"] = "k", ["ڭ"] = "ng", ["گ"] = "g", ["غ"] = "gh", - ["ع"] = "ğ", ["ھ"] = "h", ["خ"] = "x", ["چ"] = "ch", - ["ج"] = "j", ["ژ"] = "zh", ["ز"] = "z", ["س"] = "s", - ["ش"] = "sh", ["ر"] = "r", ["ل"] = "l", ["ئ"] = "'", - ["ي"] = "y", ["ۋ"] = "w", - -- vowels - ["ا"] = "a", ["ە"] = "e", ["ې"] = "ë", ["ى"] = "i", - ["و"] = "o", ["ۆ"] = "ö", ["ۇ"] = "u", ["ۈ"] = "ü", - -- punctuation - ["؟"]="?", - ["،"]=",", - ["؛"]=";" -} - -data["Cyrl"] = { - ["А"] = "A", ["Б"] = "B", ["В"] = "W", ["Г"] = "G", ["Ғ"] = "Gh", ["Д"] = "D", ["Е"] = "Ë", ["Ә"] = "E", ["Ж"] = "Zh", ["Җ"] = "J", - ["З"] = "Z", ["И"] = "I", ["Й"] = "Y", ["К"] = "K", ["Қ"] = "Q", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["Ң"] = "Ng", ["О"] = "O", - ["Ө"] = "Ö", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ү"] = "Ü", ["Ф"] = "F", ["Х"] = "X", ["Һ"] = "H", - ["Ч"] = "Ch", ["Ш"] = "Sh", ["Ю"] = "Yu", ["Я"] = "Ya", - ["Э"] = "É", - ["а"] = "a", ["б"] = "b", ["в"] = "w", ["г"] = "g", ["ғ"] = "gh", ["д"] = "d", ["е"] = "ë", ["ә"] = "e", ["ж"] = "zh", ["җ"] = "j", - ["з"] = "z", ["и"] = "i", ["й"] = "y", ["к"] = "k", ["қ"] = "q", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["ң"] = "ng", ["о"] = "o", - ["ө"] = "ö", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ү"] = "ü", ["ф"] = "f", ["х"] = "x", ["һ"] = "h", - ["ч"] = "ch", ["ш"] = "sh", ["ю"] = "yu", ["я"] = "ya", - ["э"] = "é", -} - - -function export.tr(text, lang, sc) - if not data[sc] then - return nil - end - - -- remove initial hamza - text = mw.ustring.gsub(text, "^\216\166(.)", "%1") - text = mw.ustring.gsub(text, "%s\216\166(.)", " %1") - - -- transliterate letters one to one - text = mw.ustring.gsub(text, ".", data[sc]) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/uk-translit.lua b/wikt/translit/uk-translit.lua deleted file mode 100644 index d394f9a..0000000 --- a/wikt/translit/uk-translit.lua +++ /dev/null @@ -1,27 +0,0 @@ -local export = {} - -local tt = { - ["А"]='A', ["а"]='a', ["Б"]='B', ["б"]='b', ["В"]='V', ["в"]='v', ["Г"]='H', ["г"]='h', - ["Ґ"]='G', ["ґ"]='g', ["Д"]='D', ["д"]='d', ["Е"]='E', ["е"]='e', ["Є"]='Je', ["є"]='je', - ["Ж"]='Ž', ["ж"]='ž', ["З"]='Z', ["з"]='z', ["И"]='Y', ["и"]='y', ["І"]='I', ["і"]='i', - ["Ї"]='Ji', ["ї"]='ji', ["Й"]='J', ["й"]='j', ["К"]='K', ["к"]='k', ["Л"]='L', ["л"]='l', - ["М"]='M', ["м"]='m', ["Н"]='N', ["н"]='n', ["О"]='O', ["о"]='o', ["П"]='P', ["п"]='p', - ["Р"]='R', ["р"]='r', ["С"]='S', ["с"]='s', ["Т"]='T', ["т"]='t', ["У"]='U', ["у"]='u', - ["Ф"]='F', ["ф"]='f', ["Х"]='X', ["х"]='x', ["Ц"]='C', ["ц"]='c', ["Ч"]='Č', ["ч"]='č', - ["Ш"]='Š', ["ш"]='š', ["Щ"]='Šč', ["щ"]='šč', ["Ь"]='ʹ', ["ь"]='ʹ', ["Ю"]='Ju', ["ю"]='ju', - ["Я"]='Ja', ["я"]='ja', - -- right single quotation mark, modifier letter apostrophe → modifier letter double prime - ["’"]='ʺ', ["ʼ"]= 'ʺ', - -- obsolete letters - ["Ё"]='Ë', ["ё"]='ë', ["Ъ"]='ʺ', ["ъ"]='ʺ', ["Ы"]='Y', ["ы"]='y', ["Ѣ"]='Ě', ["ѣ"]='ě', - ["Э"]='È', ["э"]='è', -} - -function export.tr(text)--translit any words or phrases - text = mw.ustring.gsub(text, "'+", { ["'"] = 'ʺ' }) -- neutral apostrophe - text = mw.ustring.gsub(text, '.', tt) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/ur-translit.lua b/wikt/translit/ur-translit.lua deleted file mode 100644 index f5a163c..0000000 --- a/wikt/translit/ur-translit.lua +++ /dev/null @@ -1,73 +0,0 @@ -local export = {} - -local U = mw.ustring.char - -local fatHatan = U(0x64B) -local zabar = U(0x64E) -local zer = U(0x64F) -local pesh = U(0x650) -local tashdid = U(0x651) -- also called shadda -local jazm = U(0x652) - - -local mapping = { - ["ا"] = 'ā', ["ب"] = 'b', ["پ"] = 'p', ["ت"] = 't', ["ٹ‬"] = 'ṭ', ["ث"] = 's', - ["ج"] = 'j', ["چ"] = 'c', ["ح"] = 'h', ["خ"] = 'x', - ["د"] = 'd', ["ڈ‬"] = 'ḍ', ["ذ"] = 'z', ["ر"] = 'r', ["ڑ‬"] = "ṛ", ["ز"] = 'z', ["ژ"] = 'ž', - ["س"] = 's', ["ش"] = 'ś', ["ص"] = 's', ["ض"] = 'z', - ["ط"] = 't', ["ظ"] = 'z', ["غ"] = 'ġ', ["ف"] = 'f', ["ق"] = 'q', - ["ک"] = 'k', ["گ"] = 'g', - ["ل"] = 'l', ["م"] = 'm', ["ن"] = 'n', ["و"] = 'u', ["ه"] = 'h', ["ی"] = 'i', ["آ"] = 'â', - - -- nun gunnah - ["ں‬"] = '̃', - - ["و‬"] = "W", - ["ه‬"] = "h", ["ھ‬"] = "h", - ["ي‬"] = "ī", - - ["ع"] = '’', - ["ء"] = '’', - ["ئ"] = '’', - ["ؤ"] = '’', - ["أ"] = '’', - - -- diacritics - [zabar] = "a", - [zer] = "i", - [pesh] = "u", - [jazm] = "", -- also sokun - no vowel - [U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner) - [fatHatan] = "n", - -- ligatures - ["ﻻ"] = "lā", - ["ﷲ"] = "llāh", - -- kashida - ["ـ"] = "", -- kashida, no sound - -- numerals - ["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5", - ["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0", - -- normal arabic variants to numerals - ["١"] = "1", ["٢"] = "2", ["٣"] = "3", ["٤"] = "4", ["٥"] = "5", - ["٦"] = "6", ["٧"] = "7", ["٨"] = "8", ["٩"] = "9", ["٠"] = "0", - -- punctuation (leave on separate lines) - ["؟"] = "?", -- question mark - ["،"] = ",", -- comma - ["؛"] = ";", -- semicolon - ["«"] = '“', -- quotation mark - ["»"] = '”', -- quotation mark - ["٪"] = "%", -- percent - ["؉"] = "‰", -- per mille - ["٫"] = ".", -- decimals - ["٬"] = ",", -- thousand - ["ۀ"] = "-ye" -- he ye (in ezâfe) -} - -function export.tr(text, lang, sc) - - text = mw.ustring.gsub(text, '.', mapping) - - return text -end - -return export \ No newline at end of file diff --git a/wikt/translit/utilities/grc-data.lua b/wikt/translit/utilities/grc-data.lua deleted file mode 100644 index 39d130e..0000000 --- a/wikt/translit/utilities/grc-data.lua +++ /dev/null @@ -1,118 +0,0 @@ -local data = {} - -local U = mw.ustring.char -local macron = U(0x304) -local spacing_macron = U(0xAF) -local modifier_macron = U(0x2C9) -local breve = U(0x306) -local spacing_breve = U(0x2D8) -local rough = U(0x314) -local smooth = U(0x313) -local diaeresis = U(0x308) -local acute = U(0x301) -local grave = U(0x300) -local circum = U(0x342) -local Latin_circum = U(0x302) -local coronis = U(0x343) -local subscript = U(0x345) -local undertie = mw.ustring.char(0x35C) -- actually "combining double breve below" - -data["diacritics"] = { - ["macron"] = macron, - ["spacing_macron"] = spacing_macron, - ["modifier_macron"] = modifier_macron, - ["breve"] = breve, - ["spacing_breve"] = spacing_breve, - ["rough"] = rough, - ["smooth"] = smooth, - ["diaeresis"] = diaeresis, - ["acute"] = acute, - ["grave"] = grave, - ["circum"] = circum, - ["Latin_circum"] = Latin_circum, - ["coronis"] = coronis, - ["subscript"] = subscript, -} - -data.diacritics.all = "" -for name, diacritic in pairs(data.diacritics) do - data.diacritics.all = data.diacritics.all .. diacritic -end - -data["named"] = data["diacritics"] - -data["diacritic"] = "[" .. data.diacritics.all .. "]" -data["all"] = data["diacritic"] - -data["diacritic_groups"] = { - [1] = "[".. macron .. breve .."]", - [2] = "[".. diaeresis .. smooth .. rough .."]", - [3] = "[".. acute .. grave .. circum .. "]", - [4] = subscript, -} -data["groups"] = data["diacritic_groups"] -data["diacritic_groups"]["accents"] = data["groups"][3] - -data["diacritic_order"] = { - [macron] = 1, - [breve] = 1, - [rough] = 2, - [smooth] = 2, - [diaeresis] = 2, - [acute] = 3, - [grave] = 3, - [circum] = 3, - [subscript] = 4, -} - -data["diacritical_conversions"] = { - -- Convert spacing to combining diacritics - [spacing_macron] = macron, -- macron - [modifier_macron] = macron, - [spacing_breve] = breve, -- breve - ["῾"] = rough, -- rough breathing, modifier letter reversed comma - ["ʽ"] = rough, - ["᾿"] = smooth, -- smooth breathing, modifier letter apostrophe, coronis, combining coronis - ["ʼ"] = smooth, - [coronis] = smooth, - ["´"] = acute, -- acute - ["`"] = grave, -- grave - ["῀"] = circum, -- Greek circumflex (perispomeni), circumflex, combining circumflex - ["ˆ"] = circum, - [Latin_circum] = circum, - ["῎"] = smooth .. acute, -- smooth and acute - ["῍"] = smooth .. grave, -- smooth and grave - ["῏"] = smooth .. circum, -- smooth and circumflex - ["῞"] = rough .. acute, -- rough and acute - ["῝"] = rough .. grave, -- rough and grave - ["῟"] = rough .. circum, -- rough and circumflex - ["¨"] = diaeresis, - ["΅"] = diaeresis .. acute, - ["῭"] = diaeresis .. grave, - ["῁"] = diaeresis .. circum, -} -data["conversions"] = data["diacritical_conversions"] - -data["consonants"] = "ΒβΓγΔδΖζΘθΚκΛλΜμΝνΞξΠπΡρΣσςΤτΦφΧχΨψ" -data["consonant"] = "[" .. data.consonants .. "]" -data["vowels"] = "ΑαΕεΗηΙιΟοΥυΩω" -data["vowel"] = "[" .. data.vowels .. "]" -data["combining_diacritics"] = table.concat{ - macron, breve, - rough, smooth, diaeresis, - acute, grave, circum, - subscript -} -data["combining_diacritic"] = "[" .. data.combining_diacritics .. "]" - --- Basic letters with and without diacritics -local letters_with_diacritics = 'ΆΈ-ώϜϝἀ-ᾼῂ-ῌῐ-' .. - -- capital iota with oxia, normalized to capital iota with tonos if entered - -- literally in a string - U(0x1FDB) .. - 'Ὶῠ-Ῥῲ-ῼ' -data.word_characters = letters_with_diacritics .. data.combining_diacritics .. - undertie -data.word_character = "[" .. data.word_characters .. "]" - -return data \ No newline at end of file diff --git a/wikt/translit/utilities/grc.lua b/wikt/translit/utilities/grc.lua deleted file mode 100644 index 40641d8..0000000 --- a/wikt/translit/utilities/grc.lua +++ /dev/null @@ -1,493 +0,0 @@ ---[[ -This module contains four functions, three of which are called by other modules. - -standardDiacritics takes spacing or nonstandard diacritics and converts them to standard combining diacritics. -This function is used by pronunciationOrder. - -reorderDiacritics takes the diacritics, removes them from the letter (mw.ustring.toNFD), and reorders them so that -macrons or breves are first; diaeresis or breathing mark is second; acute, grave, or circumflex is third; -and iota subscript is last. Aside from the iota subscript part, this is the only order in which -the diacritics can display correctly, as explained elsewhere. -This function is used by Module:typing-aids and {{chars}}. - -ά̓̆νερ (α◌́◌̓◌̆νερ) → ᾰ̓́νερ (α◌̆◌̓◌́νερ) -pronunciationOrder does the same thing, except it puts the macron or breve and iota subscript last and recombines -the diacritics (mw.ustring.toNFC) after reordering them. The diaeresis or breathing mark and accent mark will -recombine, while the macron and breve remains uncombined as a combining character. -This function is used by Module:grc-pronunciation and {{grc-IPA}}. - -Module:grc-utilities/data holds the diacritic definitions and substitutions that are used by this module. - -Tokenization -The function tokenize breaks the text into meaningful units of a single consonant or monophthong letter, -or diphthong, with any diacritics, as shown below. -This function is used by Module:grc-translit and Module:grc-accent, and by the sandbox module Module:grc-pronunciation/sandbox. - -The first argument is the word to be tokenized. -The second is a boolean: if true, the function will group εω together as a diphthong, for instance in πόλεως (póleōs), -genitive of πόλῐς (pólis, “city state”). - -word tokens -ἡμεῖς ἡ, μ, εῖ, ς -οἷαι οἷ, αι -ἀναῡ̈τέω ἀ, ν, α, ῡ̈, τ, έ, ω -δαΐφρων δ, α, ΐ, φ, ρ, ω, ν -τούτῳ τ, ού, τ, ῳ -ὑϊκός ὑ, ϊ, κ, ό, ς -ἡ Ἑλήνη ἡ,  , Ἑ, λ, ή, ν, η -νηῦς ν, ηῦ, ς -υἱός υἱ, ό, ς -ὄργυιᾰ ὄ, ρ, γ, υι, ᾰ -οὐ δοκεῖν ἀλλ᾽ εἶναι ἀγαθὸν οὐ,  , δ, ο, κ, εῖ, ν,  , ἀ, λ, λ, ᾽,  , εἶ, ν, αι,  , ἀ, γ, α, θ, ὸ, ν -]] - -local export = {} - ---local m_script_utils = require("Module:script utilities") ---local m_links = require("Module:links") ---local lang = require("Module:languages").getByCode("grc") ---local sc = require("Module:scripts").getByCode("polytonic") - -local data = {} - -local U = mw.ustring.char -local macron = U(0x304) -local spacing_macron = U(0xAF) -local modifier_macron = U(0x2C9) -local breve = U(0x306) -local spacing_breve = U(0x2D8) -local rough = U(0x314) -local smooth = U(0x313) -local diaeresis = U(0x308) -local acute = U(0x301) -local grave = U(0x300) -local circum = U(0x342) -local Latin_circum = U(0x302) -local coronis = U(0x343) -local subscript = U(0x345) -local undertie = mw.ustring.char(0x35C) -- actually "combining double breve below" - -data["diacritics"] = { - ["macron"] = macron, - ["spacing_macron"] = spacing_macron, - ["modifier_macron"] = modifier_macron, - ["breve"] = breve, - ["spacing_breve"] = spacing_breve, - ["rough"] = rough, - ["smooth"] = smooth, - ["diaeresis"] = diaeresis, - ["acute"] = acute, - ["grave"] = grave, - ["circum"] = circum, - ["Latin_circum"] = Latin_circum, - ["coronis"] = coronis, - ["subscript"] = subscript, -} - -data.diacritics.all = "" -for name, diacritic in pairs(data.diacritics) do - data.diacritics.all = data.diacritics.all .. diacritic -end - -data["named"] = data["diacritics"] - -data["diacritic"] = "[" .. data.diacritics.all .. "]" -data["all"] = data["diacritic"] - -data["diacritic_groups"] = { - [1] = "[".. macron .. breve .."]", - [2] = "[".. diaeresis .. smooth .. rough .."]", - [3] = "[".. acute .. grave .. circum .. "]", - [4] = subscript, -} -data["groups"] = data["diacritic_groups"] -data["diacritic_groups"]["accents"] = data["groups"][3] - -data["diacritic_order"] = { - [macron] = 1, - [breve] = 1, - [rough] = 2, - [smooth] = 2, - [diaeresis] = 2, - [acute] = 3, - [grave] = 3, - [circum] = 3, - [subscript] = 4, -} - -data["diacritical_conversions"] = { - -- Convert spacing to combining diacritics - [spacing_macron] = macron, -- macron - [modifier_macron] = macron, - [spacing_breve] = breve, -- breve - ["῾"] = rough, -- rough breathing, modifier letter reversed comma - ["ʽ"] = rough, - ["᾿"] = smooth, -- smooth breathing, modifier letter apostrophe, coronis, combining coronis - ["ʼ"] = smooth, - [coronis] = smooth, - ["´"] = acute, -- acute - ["`"] = grave, -- grave - ["῀"] = circum, -- Greek circumflex (perispomeni), circumflex, combining circumflex - ["ˆ"] = circum, - [Latin_circum] = circum, - ["῎"] = smooth .. acute, -- smooth and acute - ["῍"] = smooth .. grave, -- smooth and grave - ["῏"] = smooth .. circum, -- smooth and circumflex - ["῞"] = rough .. acute, -- rough and acute - ["῝"] = rough .. grave, -- rough and grave - ["῟"] = rough .. circum, -- rough and circumflex - ["¨"] = diaeresis, - ["΅"] = diaeresis .. acute, - ["῭"] = diaeresis .. grave, - ["῁"] = diaeresis .. circum, -} -data["conversions"] = data["diacritical_conversions"] - -data["consonants"] = "ΒβΓγΔδΖζΘθΚκΛλΜμΝνΞξΠπΡρΣσςΤτΦφΧχΨψ" -data["consonant"] = "[" .. data.consonants .. "]" -data["vowels"] = "ΑαΕεΗηΙιΟοΥυΩω" -data["vowel"] = "[" .. data.vowels .. "]" -data["combining_diacritics"] = table.concat{ - macron, breve, - rough, smooth, diaeresis, - acute, grave, circum, - subscript -} -data["combining_diacritic"] = "[" .. data.combining_diacritics .. "]" - --- Basic letters with and without diacritics -local letters_with_diacritics = 'ΆΈ-ώϜϝἀ-ᾼῂ-ῌῐ-' .. - -- capital iota with oxia, normalized to capital iota with tonos if entered - -- literally in a string - U(0x1FDB) .. - 'Ὶῠ-Ῥῲ-ῼ' -data.word_characters = letters_with_diacritics .. data.combining_diacritics .. - undertie -data.word_character = "[" .. data.word_characters .. "]" - - -local m_data = data -local groups = m_data.groups -local diacritic_order = m_data.diacritic_order -local conversions = m_data.conversions -local diacritics = m_data.diacritics -local diacritic = m_data.diacritic -local macron = diacritics.macron -local breve = diacritics.breve -local spacing_macron = diacritics.spacing_macron -local spacing_breve = diacritics.spacing_breve -local rough = diacritics.rough -local smooth = diacritics.smooth -local diaeresis = diacritics.diaeresis -local acute = diacritics.acute -local grave = diacritics.grave -local circumflex = diacritics.circum -local subscript = diacritics.subscript -local combining_diacritic = m_data.combining_diacritic - -local UTF8_char = "[\1-\127\194-\244][\128-\191]*" -local basic_Greek = "[\206-\207][\128-\191]" -- excluding first line of Greek and Coptic block: ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ - -local find = mw.ustring.find -local match = mw.ustring.match -local gmatch = mw.ustring.gmatch -local sub = mw.ustring.sub -local gsub = mw.ustring.gsub -local toNFC = mw.ustring.toNFC -local decompose = mw.ustring.toNFD - -local info = {} --- The tables are shared among different characters so that they can be checked --- for equality if needed, and to use less space. -local vowel_t = { vowel = true } -local iota_t = { vowel = true, offglide = true } -local upsilon_t = { vowel = true, offglide = true } --- These don't need any contents. -local rho_t = {} --- local consonant_t = {} -local diacritic_t = { diacritic = true } --- Needed for equality comparisons. -local breathing_t = { diacritic = true } - -local function add_info(characters, t) - if type(characters) == "string" then - for character in string.gmatch(characters, UTF8_char) do - info[character] = t - end - else - for i, character in ipairs(characters) do - info[character] = t - end - end -end - -add_info({ macron, breve, - diaeresis, - acute, grave, circumflex, - subscript, - }, diacritic_t) - -add_info({rough, smooth}, breathing_t) -add_info("ΑΕΗΟΩαεηοω", vowel_t) -add_info("Ιι", iota_t) -add_info("Υυ", upsilon_t) --- add_info("ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨϜϘϺϷͶϠβγδζθκλμνξπρσςτφχψϝϙϻϸͷϡ", consonant_t) -add_info("Ρρ", rho_t) - -local not_recognized = {} -setmetatable(info, { __index = - function(t, key) - return not_recognized - end -}) - --- local sparseConcat = require("Module:table").sparseConcat - --- local checkType = require "libraryUtil".checkType - --- local function _check(funcName) --- return function(argIndex, arg, expectType, nilOk) --- return checkType(funcName, argIndex, arg, expectType, nilOk) --- end --- end - --- -- Perform a function on each Unicode character in a string. --- local function forEach(str, func) --- for char in string.gmatch(str, UTF8_char) do --- func(char) --- end --- end - --- -- This concatenates or inserts a character, then removes it from the text. --- local function add(list, index, chars, text) --- if not chars then --- error("The function add cannot act on a nil character.") --- end --- if list[index] then --- list[index] = list[index] .. chars --- else --- list[index] = chars --- end --- -- Basic string function works here. --- return text:sub(#chars + 1) --- end - --- function export.tag(term, face) --- return m_script_utils.tag_text(term, lang, sc, face) --- end - --- function export.link(term, face, alt, tr) --- return m_links.full_link( { term = term, alt = alt, lang = lang, sc = sc, tr = tr }, face) --- end - --- local function linkNoTag(term, alt) --- return m_links.language_link{ term = term, lang = lang, alt = alt } --- end - --- -- Convert spacing to combining diacritics, and nonstandard to standard polytonic Greek. --- function export.standardDiacritics(text) --- text = decompose(text) - --- text = text:gsub(UTF8_char, conversions) - --- return text --- end - --- --[=[ This function arranges diacritics in the following order: --- 1. macron or breve --- 2. breathings or diaeresis --- 3. acute, circumflex, or grave --- 4. iota subscript --- Used by [[Module:typing-aids]]. - --- Returns an error if a sequence of diacritics contains more than one --- of each category. --- ]=] --- local function reorderDiacriticSequence(diacritics) --- local output = {} --- forEach(diacritics, --- function (diacritic) --- local index = diacritic_order[diacritic] --- if not output[index] then --- output[index] = diacritic --- else --- -- Place breve after macron. --- if diacritic == breve then --- index = index + 1 --- end --- -- The following might have odd results when there --- -- are three or more diacritics. --- table.insert(output, index, diacritic) --- -- [[Special:WhatLinksHere/Template:tracking/grc-utils/too many diacritics]] --- require("Module:debug").track("grc-utils/too many diacritics") --- --[[ --- local m_templates = require("Module:grc-utilities/templates") --- error("There are two diacritics, " .. --- m_templates.addDottedCircle(output[index]) .. " and " .. --- m_templates.addDottedCircle(diacritic) .. --- " that belong in the same position. There should be only one." --- ) --- --]] --- end --- end) --- return sparseConcat(output) --- end - --- function export.reorderDiacritics(text) --- local d = diacritics - --- return (gsub(decompose(text), --- combining_diacritic .. combining_diacritic .. "+", --- reorderDiacriticSequence)) --- end - ---[=[ - This breaks a word into meaningful "tokens", which are - individual letters or diphthongs with their diacritics. - Used by [[Module:grc-accent]] and [[Module:grc-pronunciation]]. ---]=] -local function make_tokens(text) - local tokens, prev_info = {}, {} - local token_i, vowel_count = 1, 0 -- Vowel count tracks . - local prev - for character in string.gmatch(decompose(text), UTF8_char) do - local curr_info = info[character] - -- Split vowels between tokens if not a diphthong. - if curr_info.vowel then - vowel_count = vowel_count + 1 - if prev and (not (vowel_count == 2 and curr_info.offglide and prev_info.vowel) - -- υυ → υ, υ - -- ιυ → ι, υ - or prev_info.offglide and curr_info == upsilon_t) then - token_i = token_i + 1 - if prev_info.vowel then - vowel_count = 1 - end - elseif vowel_count == 2 then - vowel_count = 0 - end - tokens[token_i] = (tokens[token_i] or "") .. character - elseif curr_info.diacritic then - vowel_count = 0 - tokens[token_i] = (tokens[token_i] or "") .. character - if prev_info.diacritic or prev_info.vowel then - if character == diaeresis then - -- Split the diphthong in the current token if a diaeresis was found: - -- the first letter, then the second letter plus any diacritics. - local previous_vowel, vowel_with_diaeresis = - string.match(tokens[token_i], - "^(" .. basic_Greek .. ")(" .. basic_Greek .. ".+)") - if previous_vowel then - tokens[token_i], tokens[token_i + 1] = previous_vowel, vowel_with_diaeresis - token_i = token_i + 1 - else - -- The vowel preceding the vowel with the diaeresis will already be - -- placed in the previous token if it has a diacritic: - -- Περικλῆῐ̈ → Π ε ρ ι κ λ ῆ ῐ̈ - --[[ - mw.log('Diaeresis was found in ' .. text .. ', but the previous token ' .. - require("Module:Unicode data").add_dotted_circle(tokens[token_i]) .. - ' couldn’t be split because it does not consist of two Basic Greek characters followed by other characters.') - --]] - end - end - elseif prev_info == rho_t then - if curr_info ~= breathing_t then - mw.log(string.format("The character %s in %s should not have the accent %s on it.", - prev, text, require("Module:grc-utilities/templates").addDottedCircle(character))) - end - else - mw.log("The character " .. prev .. " cannot have a diacritic on it.") - end - else - vowel_count = 0 - if prev then - token_i = token_i + 1 - end - tokens[token_i] = (tokens[token_i] or "") .. character - end - prev = character - prev_info = curr_info - end - return tokens -end - -local cache = {} -function export.tokenize(text) - local decomposed = decompose(text) - if not cache[decomposed] then - cache[decomposed] = make_tokens(text) - end - return cache[decomposed] -end - --- --[=[ Places diacritics in the following order: --- 1. breathings or diaeresis --- 2. acute, circumflex, or grave --- 3. macron or breve --- 4. iota subscript --- Used by [[Module:grc-pronunciation]]. ]=] --- function export.pronunciationOrder(text) --- text = export.standardDiacritics(text) - --- if find(text, groups[1]) then - --- text = gsub(text, --- diacritic .. diacritic .. "+", --- function(sequence) --- -- Put breathing and diaeresis first, then accents, then macron or breve --- return table.concat{ --- match(sequence, groups[2]) or "", --- match(sequence, groups[3]) or "", --- match(sequence, groups[1]) or "", --- match(sequence, groups[4]) or "" --- } --- end) - --- text = gsub(text, macron, spacing_macron) -- combining to spacing macron --- text = gsub(text, breve, spacing_breve) -- combining to spacing breve --- end - --- return toNFC(text) --- end - - --- -- Returns a table of any ambiguous vowels in the text, language-tagged. --- function export.findAmbig(text, noTag) --- if (not text) or type(text) ~= "string" then --- error("The input to function findAmbig is nonexistent or not a string") --- end - --- local lengthDiacritic = "[" .. macron .. breve .. circumflex .. subscript .. "]" --- local aiu_diacritic = "^([" .. "αιυ" .. "])(" .. diacritic .. "*)$" - --- -- breaks the word into units --- local output, vowels = {}, {} --- for _, token in ipairs(export.tokenize(text)) do --- if not find(token, m_data.consonant) then --- local vowel, diacritics = match( --- token, --- aiu_diacritic --- ) - --- if vowel and (diacritics == "" or --- not find(diacritics, lengthDiacritic)) then --- local diacriticked_vowel --- if not noTag then --- diacriticked_vowel = export.tag(vowel .. diacritics) --- else --- diacriticked_vowel = vowel --- end - --- table.insert(output, diacriticked_vowel) - --- -- Lists the vowel letters that are ambiguous, for categorization purposes. --- vowels[mw.ustring.lower(vowel)] = true --- end --- end --- end - --- return output, vowels --- end - -return export \ No newline at end of file diff --git a/wikt/translit/utilities/ko.lua b/wikt/translit/utilities/ko.lua deleted file mode 100644 index 8a84cb4..0000000 --- a/wikt/translit/utilities/ko.lua +++ /dev/null @@ -1,283 +0,0 @@ -local M = {} -local gsub = mw.ustring.gsub -local match = mw.ustring.match - -local lang = require("Module:languages").getByCode("ko") - --- makes hanjatab automatically -function M.hanjatab() - local hanja = mw.ustring.gsub(mw.title.getCurrentTitle().text, '[^一-鿌]', '') - local table_head = '' - return table_head .. - mw.ustring.gsub(hanja, '(.)', '') .. - '
[[hanja|Hanja]] in this term
[[%1#Korean|%1]]
' -end - --- return only non-hangeul contained in text -function M.remove_hangeul(f) - local nonhangeul = mw.ustring.gsub(f.args[1], '[가-힣]', '') - return nonhangeul -end - -function M.boldify(f) - local pagename = mw.title.getCurrentTitle().text - hangul = f.args[1] - if match(hangul, pagename) and not match(hangul, "'") then - hangul = gsub(hangul, pagename, "'''" .. pagename .. "'''") - end - return hangul -end - -function M.usex_hangul(f) - local pagename = mw.title.getCurrentTitle().text - hangul = f.args[1] - if match(hangul, pagename) and not match(hangul, "'") then - hangul = gsub(hangul, pagename, "'''" .. pagename .. "'''") - end - i = 1 - local front, back = '', '' - for bold in mw.ustring.gmatch(hangul, "'''") do - hangul = gsub(hangul, "'''", (i % 2 == 1 and front or back), 1) - i = i + 1 - end - hangul = gsub(hangul, '[%^%-]', '') - return hangul -end - -function M.link(frame) - local arg = frame:getParent().args - local args, distances = {}, {} - local m_pron = require("Module:ko-translit") - local curr_distance, closest_match = 1000, 0 - local word, translit, definition, hanja, note = false, false, false, false, false - - for i = 1, 4, 1 do - if arg[i] and arg[i] ~= "" then - table.insert(args, arg[i]) - end - end - - local curr_hangul_level, closest_hangul = 0, 0 - for i, parameter in ipairs(args) do - local _, tentative_hangul_level = gsub(parameter, "[가-힣\225\132\128-\225\135\191]", "") - if tentative_hangul_level > curr_hangul_level then - curr_hangul_level = tentative_hangul_level - closest_hangul = i - end - end - - if curr_hangul_level > 0 then - word = args[closest_hangul] - table.remove(args, closest_hangul) - end - - local function compute_distance(str1, str2) - local len1, len2 = #str1, #str2 - local char1, char2, distance = {}, {}, {} - - str1:gsub('.', function (c) - table.insert(char1, c) end) - - str2:gsub('.', function (c) - table.insert(char2, c) end) - - for i = 0, len1 do - distance[i] = {} - end - - for i = 0, len1 do - distance[i][0] = i - end - - for i = 0, len2 do - distance[0][i] = i - end - - for i = 1, len1 do - for j = 1, len2 do - distance[i][j] = math.min( - distance[i-1][j] + 1, - distance[i][j-1] + 1, - distance[i-1][j-1] + (char1[i] == char2[j] and 0 or 1) - ) - end - end - - return distance[len1][len2] - end - - local m_link = require("Module:links") - local test_translit = m_pron.tr_revised(m_link.remove_links(word or arg[1])) or "" - - if arg[5] and arg[5] ~= "" then - note = arg[5] - end - - for i, parameter in ipairs(args) do - if not match(parameter, '[가-힣㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟]') then - local tentative_distance = compute_distance(test_translit, parameter) - if tentative_distance < curr_distance then - curr_distance = tentative_distance - closest_match = i - end - end - end - - if curr_distance < 3 and #args > 1 then - translit = args[closest_match] - table.remove(args, closest_match) - end - - for i, parameter in ipairs(args) do - if match(parameter, "[㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟]") then - hanja = parameter - table.remove(args, i) - end - end - - if not hanja and not word then - word = args[1] - table.remove(args, 1) - end - - if #args > 1 then - translit = args[1] - definition = args[2] - - elseif #args > 0 then - definition = args[1] - end - - if hanja and not match(hanja, "[%[%]]") then - for hanja_word in mw.ustring.gmatch(hanja, "[㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟]+") do - hanja = gsub(hanja, hanja_word, "[[" .. hanja_word .. "]]") - end - for hangul_word in mw.ustring.gmatch(hanja, "[가-힣]+") do - hanja = gsub(hanja, hangul_word, "[[" .. hangul_word .. "]]") - end - hanja = gsub(hanja, "%[%[%[%[", "[[") - hanja = gsub(hanja, "%]%]%]%]", "]]") - end - - if arg["tr"] or translit or test_translit then - translit = '' .. - (arg["tr"] or translit or test_translit) .. "" - end - - if definition then - if not match(definition, "^''.+''$") then definition = "“" .. definition .. "”" end - end - - if hanja then - hanja = '' .. m_link.language_link({lang = lang, term = hanja}, true) .. '' - end - - word = gsub(word, "%^", "") - - if not match(word, "[%[%]]") then - if match(word, "^—.+—$") then - word = gsub(word, "—(.+)—", "—[[%1]]—") - - elseif match(word, "^—.+$") then - word = gsub(word, "—(.+)", "—[[%1]]") - - elseif match(word, "^.+—$") then - word = gsub(word, "(.+)—", "[[%1다|%1—]]") - - elseif match(word, "^.+–$") then - word = gsub(word, "(.+)–", "[[%1]]—") - - elseif match(word, "^\*") then - word = gsub(word, "\*", "") - - else - word = "[[" .. word .. "]]" - end - end - - local info = {} - table.insert(info, word and (hanja or nil) or nil) - table.insert(info, translit or nil) - table.insert(info, definition or nil) - - local result = word - and - ("" .. word .. "") - or - ('' .. m_link.language_link({lang = lang, term = hanja}, true) .. '') - - if #info > 0 then - result = result .. " (" .. table.concat(info, ", ") .. ")" - end - - if note then - result = result .. " (" .. note .. ")" - end - - return result -end - -function M.new(frame) - local args = frame:getParent().args - - local function other(class, title, args) - local code = "" - if args[class] then - code = code .. "\n\n===" .. title .. "===\n* {{ko-l|" .. args[class] .. "}}" - - if args[class .. "2"] then - code = code .. "\n* {{ko-l|" .. args[class .. "2"] .. "}}" - - if args[class .. "3"] then - code = code .. "\n* {{ko-l|" .. args[class .. "3"] .. "}}" - - if args[class .. "4"] then - code = code .. "\n* {{ko-l|" .. args[class .. "4"] .. "}}" - end - end - end - end - return code - end - - local result = "==Korean==" - if args["wp"] then - result = result .. "\n{{wikipedia|lang=ko}}" - end - - result = result .. other("alt", "Alternative forms", args) - - if args["e"] or args["ee"] or args["h"] or args["h1"] then - result = result .. "\n\n===Etymology===\n" - - result = result .. (args["ee"] or - ("From {{etyl|" .. (args["el"] or "en") .. "|ko}} {{m|" .. - (args["el"] or "en") .. "|" .. args["e"] .. "}}.")) - end -end - -function M.decompose_jamo(syllable) - if not match(syllable, "[가-힣]") then - if match(syllable, "[ᄀ-ᄒ]") then return { initial = syllable, vowel = "Ø", final = "Ø" } - elseif match(syllable, "[ᅡ-ᅵ]") then return { initial = "Ø", vowel = syllable, final = "Ø" } - elseif match(syllable, "[ᆨ-ᇂ]") then return { initial = "Ø", vowel = "Ø", final = syllable } - elseif match(syllable, "[ㄱ-ㆎ]") then return { initial = "Ø", vowel = "Ø", final = syllable } - else return { initial = "Ø", vowel = " ", final = "X" } end - end - local char = mw.ustring.char - local cp = mw.ustring.codepoint(syllable) - if not cp then return { "", "", "" } end - local relative_cp = cp - 0xAC00 - local jongseong = relative_cp % 28 - local jungseong = math.floor((relative_cp % 588) / 28) - local choseong = math.floor(relative_cp / 588) - choseong, jungseong, jongseong = - char(0x1100 + choseong), - char(0x1161 + jungseong), - jongseong ~= 0 and char(0x11A7 + jongseong) or "" - return { initial = choseong, vowel = jungseong, final = jongseong } -end - -return M \ No newline at end of file diff --git a/wikt/translit/xbc-translit.lua b/wikt/translit/xbc-translit.lua deleted file mode 100644 index b7e9bbe..0000000 --- a/wikt/translit/xbc-translit.lua +++ /dev/null @@ -1,24 +0,0 @@ --- This module will transliterate Bactrian language text. --- Language code : xbc -local export = {} - -local mapping = { - ["α"] = "a", ["β"] = "b", ["γ"] = "g", ["δ"] = "d", ["ε"] = "e", ["ζ"] = "z", - ["η"] = "ē", ["θ"] = "θ", ["ι"] = "i", ["κ"] = "k", ["λ"] = "l", ["μ"] = "m", - ["ν"] = "n", ["ο"] = "o", ["π"] = "p", ["ρ"] = "r", ["ϸ"] = "š", ["σ"] = "s", - ["τ"] = "t", ["υ"] = "u", ["φ"] = "f", ["χ"] = "x", ["ω"] = "ō", ["ς"] = "s", - - ["Α"] = "A", ["Β"] = "B", ["Γ"] = "G", ["Δ"] = "D", ["Ε"] = "E", ["Ζ"] = "Z", - ["Η"] = "Ē", ["Θ"] = "Θ", ["Ι"] = "I", ["Κ"] = "K", ["Λ"] = "L", ["Μ"] = "M", - ["Ν"] = "N", ["Ο"] = "O", ["Π"] = "P", ["Ρ"] = "R", ["Ϸ"] = "Š", ["Σ"] = "S", - ["Τ"] = "T", ["Υ"] = "U", ["Φ"] = "F", ["Χ"] = "X", ["Ω"] = "Ō", -} - -function export.tr(text, lang, sc) - if sc == "Grek" then - text = string.gsub(text, "[\1-\127\194-\244][\128-\191]*", mapping) - return text - end -end - -return export \ No newline at end of file diff --git a/wikt/translit/ykg-translit.lua b/wikt/translit/ykg-translit.lua deleted file mode 100644 index 61c491e..0000000 --- a/wikt/translit/ykg-translit.lua +++ /dev/null @@ -1,55 +0,0 @@ --- This module will transliterate Northern Yukaghir language text. - -local u = mw.ustring.char - -local MACRON = u(0x0304) -local DOTABOVE = u(0x0307) -local DOTBELOW = u(0x0323) - -local str_gsub, ugsub = string.gsub, mw.ustring.gsub -local UTF8char = '[\1-\127\194-\244][\128-\191]*' - -local export = {} - -local tab = { - ["А"]='A', ["а"]='a', ["Б"]='B', ["б"]='b', ["В"]='W', ["в"]='w', - ["Г"]='G', ["г"]='g', ["Г̧̧"]='H', ["г̧"]='h', ["Ғ"]='H', ["ғ"]='h', ["Ҕ"]='H', ["ҕ"]='h', --Atlasova, Wikipedia and Kurilov all differ in this - ["Д"]='D', ["д"]='d', ["И"]='I', ["и"]='i', ["Й"]='J', ["й"]='j', - ["К"]='K', ["к"]='k', ["Л"]='L', ["л"]='l', ["М"]='M', ["м"]='m', - ["Н"]='N', ["н"]='n', ["Ӈ"]='Ŋ', ["ӈ"]='ŋ', ["Ң"]='Ŋ', ["ң"]='ŋ', ["Ҥ"]='Ŋ', ["ҥ"]='ŋ', --Same as h above - ["О"]='O', ["о"]='o', ["Ө"]='Ö', ["ө"]='ö', ["Ӧ"]='Ö', ["ӧ"]='ö', - ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r', ["С"]='S', ["с"]='s', - ["Т"]='T', ["т"]='t', ["У"]='U', ["у"]='u', ["Ф"]='F', ["ф"]='f', - ["Х"]='Q', ["х"]='q', ["Ч"]='Ț', ["ч"]='ț', ["Э"]='E', ["э"]='e', - - -- non-native letters - ["Е"]='E', ["е"]='e', ["Ё"]='Jo', ["ё"]='jo', - ["Ж"]='Z', ["ж"]='z', ["З"]='Z', ["з"]='z', - ["Ц"]='C', ["ц"]='c', ["Ш"]='Ş', ["ш"]='ş', ["Щ"]='Ş', ["щ"]='ş', - ["Ы"]='Y', ["ы"]='y', ["Ю"]='Ju', ["ю"]='ju', - ["Я"]='Ja', ["я"]='ja', - ['Ъ']='', ['ъ']='', ["Ь"]="", ["ь"]="" -} - -local other = { - { 'Дь', 'D̦' }, - { 'дь', 'd̦' }, - { 'Нь', 'Ņ' }, - { 'нь', 'ņ' }, - { 'Ль', 'Ļ' }, - { 'ль', 'ļ' }, - { 'Г̧', 'H' }, - { 'г̧', 'h' }, -} - -function export.tr(text, lang, sc) - for i, replacement in ipairs(other) do - text = str_gsub(text, unpack(replacement)) - end - - -- Intrestingly the orthography(ies) don't use Cyrillic palatal vowels - - return (str_gsub(text, UTF8char, tab)) -end - -return export \ No newline at end of file diff --git a/wiktra/Wiktra.py b/wiktra/Wiktra.py new file mode 100755 index 0000000..b4b0955 --- /dev/null +++ b/wiktra/Wiktra.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +from pathlib import Path +from lupa import LuaRuntime + +lua_folder = str(Path(Path(__file__).parent)) + +os.environ["LUA_PATH"] = ";".join([ + f"{lua_folder}/?.lua", + f"{lua_folder}/wikt/?.lua", + f"{lua_folder}/wikt/translit/?.lua", + f"{lua_folder}/wikt/legacy/?.lua", + f"{lua_folder}/wikt/legacy/translit/?.lua", + f"{os.environ.get('LUA_PATH','')}", +]) + + +lang_map = { + "inc-mas": ("inc-mas", ""), + "amh": ("ethi", "Ethi"), + "byn": ("ethi", "Ethi"), + "sem-cha": ("ethi", "Ethi"), + "gez": ("ethi", "Ethi"), + "har": ("ethi", "Ethi"), + "tig": ("ethi", "Ethi"), + "tir": ("ethi", "Ethi"), + "rus": ("ru", "Cyrl"), + "cre": ("cr", "Cans"), + "pka": ("brah", "Brah"), + "kho": ("brah", "Brah"), + "inc-mgd": ("brah", "Brah"), + "pmh": ("brah", "Brah"), + "oty": ("brah", "Brah"), + "inc-psc": ("brah", "Brah"), + "psu": ("brah", "Brah"), + "inc-ash": ("brah", "Brah"), + "xpr": ("prti", "Parthian"), + "ete": ("cprt", "Cprt"), + "ave": ("avst", "Avst"), + "guj": ("gu", "Gujr"), + "vgr": ("gu", "Gujr"), + "kfr": ("gu", "Gujr"), + "ykg": ("ykg", "Cyrl"), + "ady": ("ady", "Cyrl"), + "ava": ("av", "Cyrl"), + "ukr": ("uk", "Cyrl"), + "xal": ("xal", "Cyrl"), + "tib": ("bo", "Tibt"), + "bod": ("bo", "Tibt"), + "heb": ("he", "Hebr"), + "mya": ("my", "Mymr"), + "rue": ("rue", "Cyrl"), + "kdr": ("kdr", "Cyrl"), + "sin": ("si", "Sinh"), + "kum": ("kum", "Cyrl"), + "arm": ("armn", "Armn"), + "bud": ("bdk", "Cyrl"), + "dng": ("dng", "Cyrl"), + "urd": ("ur", "ur-Arab"), + "got": ("goth", "Goth"), + "qwm": ("armn", "Armn"), + "ben": ("bn", "Beng"), + "lif": ("lif", "Limb"), + "pra": ("inc-ash", "Brah"), + "mas": ("inc-mas", "Beng"), + "tha": ("th", "Thai"), + "lep": ("lep", "Lepc"), + "eng": ("en", "Latn"), + "abq": ("abq", "Cyrl"), + "bua": ("bua", "Cyrl"), + "lez": ("lez", "Cyrl"), + "mon": ("mon", "Cyrl"), + "uig": ("ug", "ug-Arab"), + "kjj": ("kjj", "Cyrl"), + "chr": ("cher", "Cher"), + "bho": ("bho", "Deva"), + "new": ("new", "Deva"), + "hin": ("hi", "Deva"), + "awa": ("hi", "Deva"), + "bfy": ("hi", "Deva"), + "bhd": ("hi", "Deva"), + "kfs": ("hi", "Deva"), + "bra": ("hi", "Deva"), + "bns": ("hi", "Deva"), + "cdh": ("hi", "Deva"), + "cdj": ("hi", "Deva"), + "doi-d": ("hi", "Deva"), + "gbk": ("hi", "Deva"), + "gbm": ("hi", "Deva"), + "bgc": ("hi", "Deva"), + "xnr": ("hi", "Deva"), + "kfx": ("hi", "Deva"), + "bfz": ("hi", "Deva"), + "mjl": ("hi", "Deva"), + "mwr": ("hi", "Deva"), + "mtr": ("hi", "Deva"), + "unr": ("hi", "Deva"), + "bpx": ("hi", "Deva"), + "pgg": ("hi", "Deva"), + "mal": ("ml", "Mlym"), + "pal-m": ("mani", "Mani"), + "xpr": ("mani", "Mani"), + "sog": ("mani", "Mani"), + "mdf": ("mdf", "Cyrl"), + "sat": ("sat", "Olck"), + "tel": ("te", "Telu"), + "mkd": ("mk", "Cyrl"), + "sjd": ("sjd", "Cyrl"), + "kbd": ("kbd", "Cyrl"), + "abk": ("ab", "Cyrl"), + "kor": ("ko", "Kore"), + "mns": ("mns", "Cyrl"), + "kca": ("kca", "Cyrl"), + "inc-ash-k": ("inc-ash-k", "Khar"), + "pgd-k": ("pgd-k", "Khar"), + "bel": ("be", "Cyrl"), + "koi": ("kv", "Cyrl"), + "kpv": ("kv", "Cyrl"), + "cop": ("copt", "Copt"), + "kmr": ("kmr", "Cyrl"), + "lbe": ("lbe", "Cyrl"), + "lak": ("lbe", "Cyrl"), + "ita-old": ("ital", "Ital"), + "lao": ("lo", "Laoo"), + "mar": ("mr", "Deva"), + "kok": ("mr", "Deva"), + "udi": ("udi", "Cyrl"), + "eth": ("ethi", "Ethi"), + "gmy": ("linb", "Linb"), + "iku": ("iu", "Cans"), + "myv": ("myv", "Cyrl"), + "kan": ("kn", "Knda"), + "tam": ("ta", "Taml"), + "kaa": ("kaa", "Cyrl"), + "bul": ("bg", "Cyrl"), + "jav": ("jv", "Javn"), + "oss": ("os", "Cyrl"), + "che": ("ce", "Cyrl"), + "kas": ("ks", "ks-Arab"), + "kas-d": ("ks-deva", "ks-Deva"), + "pal-p": ("phli", "Phli"), + "sah": ("sah", "Cyrl"), + "chm": ("chm", "Cyrl"), + "kaz": ("kk", "Cyrl"), + "kjh": ("kjh", "Cyrl"), + "fas": ("fa", "fa-Arab"), + "iii": ("ii", "Yiii"), + "tgk": ("tg", "Cyrl"), + "wbl": ("tg", "Cyrl"), + "yai": ("tg", "Cyrl"), + "bak": ("ba", "Cyrl"), + "krc": ("krc", "Cyrl"), + "ori": ("or", "Orya"), + "eve": ("eve", "Cyrl"), + "khm": ("km", "Khmr"), + "xbc": ("xbc", "Grek"), + "txh": ("el", "Grek"), + "ell": ("el", "Grek"), + "san": ("sa", "Deva"), + "inc-ohi": ("sa", "Deva"), + "omr": ("sa", "Deva"), + "inc-tak": ("sa", "Deva"), + "inc-vra": ("sa", "Deva"), + "nsk": ("nsk", "Cans"), + "udm": ("udm", "Cyrl"), + "nog": ("nog", "Cyrl"), + "can": ("cans", "Cans"), + "ain": ("ain", "Kana"), + "nep": ("ne", "Deva"), + "dty": ("ne", "Deva"), + "bbl": ("geor", "Geor"), + "geo": ("geor", "Geor"), + "lzz": ("geor", "Geor"), + "xmf": ("geor", "Geor"), + "oge": ("geor", "Geor"), + "geo": ("geor", "Geor"), + "udi": ("geor", "Geor"), + "niv": ("niv", "Cyrl"), + "xlc": ("lyci", "Lyci"), + "xhd": ("sarb", "Sarb"), + "xha": ("sarb", "Sarb"), + "inm": ("sarb", "Sarb"), + "xqt": ("sarb", "Sarb"), + "xsa": ("sarb", "Sarb"), + "chu": ("cv", "Cyrl"), + "peo": ("peo", "Xpeo"), + "chu-old-c": ("cyrs-glag", "Cyrs"), + "chu-old-g": ("cyrs-glag", "Glag"), + "orv": ("cyrs-glag", "Cyrs"), + "zle-ono-c": ("cyrs-glag", "Cyrs"), + "zle-ono-g": ("cyrs-glag", "Glag"), + "dar": ("dar", "Cyrl"), + "pan": ("guru", "Guru"), + "alt": ("altai", "Cyrl"), + "inh": ("inh", "Cyrl"), + "ara": ("ar", "Arab"), + "tyv": ("tyv", "Cyrl"), + "gre": ("el", "Grek"), + "tat": ("tt", "Cyrl"), + "aho": ("ahom", "Ahom"), + "khb": ("talu", ""), + "xld": ("lydi", "Lydi"), + "dlg": ("dlg", "Cyrl"), + "kir": ("ky", "Cyrl"), + "asm": ("as", "as-Beng"), + "div": ("dv", "Thaa"), + "ber": ("ber", "Tfng"), + "grc": ("grc", "polythonic"), + "xmk": ("grc", "polythonic"), + "cpg": ("grc", "polythonic"), + "oos": ("grc", "polythonic"), + "ine-pae": ("grc", "polythonic"), + "xpg": ("grc", "polythonic"), + "pnt": ("grc", "polythonic"), + "grc-c": ("cprt", "Cprt"), + "evn": ("evn", "Cyrl"), +} + + +class Transliterator(object): + def __init__(self): + self.lua = LuaRuntime(unpack_returned_tuples=True) + self.lua.execute("mw = require('wikt.mw')") + + def e(self, lua_str): + self.lua.execute(lua_str) + return self.lua.globals().res + + def tr_legacy(self, text, lang): + lang, sc = lang_map[lang.lower()] + lua_str = f"""res = require("wikt.translit.{lang}-translit").tr("{text}", "{lang}", "{sc}")""" + return self.e(lua_str) + + def tr(self, text, lang, sc): + res = None + res = self.e( + f"""res = require("wikt.translit.translit-redirect").tr("{text}", "{lang}", "{sc}")""" + ) + if not res: + res = self.e( + f"""res = require("wikt.translit.{lang}-translit").tr("{text}", "{lang}", "{sc}")""" + ) + return res + + def test_load(self): + reqs = [] + mods = [str(Path(p.parent, p.stem)) for p in Path("wikt","translit").glob("**/*.lua")] + for mod in mods: + reqs.append(f"""require("{mod}")""") + sreqs = "\n".join(reqs) + l = f""" + {sreqs} + res = "OK" + """ + return self.e(l) + + + +def translite(text, lang): + tr = Transliterator() + return tr.tr_legacy(text, lang) diff --git a/wiktra/__init__.py b/wiktra/__init__.py new file mode 100644 index 0000000..e217f44 --- /dev/null +++ b/wiktra/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +from .Wiktra import * + +__version__ = "2.0.0" +__all__ = ['translite'] diff --git a/wiktra/__main__.py b/wiktra/__main__.py new file mode 100644 index 0000000..f01cdc1 --- /dev/null +++ b/wiktra/__main__.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +""" + +import wiktra +from argparse import ArgumentParser +import logging +from pathlib import Path + +PROG = "wiktrapy" + + +def cli(): + parser = ArgumentParser(prog=f"{PROG}") + parser.add_argument("-t", "--text", metavar="TEXT", dest="text") + parser.add_argument("-i", "--input", metavar="FILE", dest="in_file") + parser.add_argument( + "-l", + "--lang", + metavar="LANG", + dest="in_lang", + help="Input language as ISO 639-2 code", + ) + parser.add_argument( + "-s", + "--script", + metavar="SCRIPT", + dest="in_script", + help="Input script as ISO 15924 code", + ) + parser.add_argument( + "-v", + "--verbose", + action="count", + default=1, + help="-v show progress, -vv show debug", + ) + parser.add_argument( + "-V", + "--version", + action="version", + version="%s %s" % (PROG, wiktra.__version__), + help="show version and exit", + ) + return parser + + +def main(*args, **kwargs): + parser = cli(*args, **kwargs) + args = parser.parse_args() + args.verbose = 40 - (10 * args.verbose) if args.verbose > 0 else 0 + logging.basicConfig( + level=args.verbose, + format="%(asctime)s %(levelname)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + opts = vars(args) + logging.debug("Running with options:\n%s" % repr(opts)) + del opts["verbose"] + if opts["in_file"]: + with open(Path(opts["in_file"]), "r", encoding="utf8") as f: + text = f.read() + else: + text = opts["text"] + tr = wiktra.Wiktra.Transliterator() + res = tr.tr(text, opts["in_lang"], opts["in_script"]) + print(res) + + +if __name__ == "__main__": + main() diff --git a/wiktra/update.py b/wiktra/update.py new file mode 100755 index 0000000..668a68c --- /dev/null +++ b/wiktra/update.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# pip install pywikiapi + +import argparse +import logging +import subprocess +import re +from wiktra.Wiktra import * + +import pywikiapi + +logging.basicConfig(level=logging.INFO) + + +class WiktionaryModuleDownload(object): + + re_require = re.compile(r"""(require|loadData)[\( ]['"][Mm]odule:(.*?)['"]""") + exclude_modules = [ + "ja", + "ja/k2r-old", + "ko-translit", + "Shah-translit", + "th-translit", + "th-hom", + "th-pron", + "tts-pron", + "tts-translit", + "zh-sortkey", + "transliteration module testcases", + "cjs-translit/testcases", + "labels", + "labels/data", + "labels/data/functions", + "labels/data/subvarieties", + "qualifier", + "omk-translit/testcases", + ] + + def __init__(self, output_folder, force=False, deps=True): + self.outf = output_folder + self.force = force + self.deps = deps + if not Path(self.outf).is_dir(): + Path(self.outf).mkdir(parents=True, exist_ok=True) + self.wk = pywikiapi.Site( + "https://en.wiktionary.org/w/api.php", json_object_hook=pywikiapi.AttrDict + ) + self.downloaded = [] + + def check_write(self, path): + if Path(path).exists() and not self.force: + return False + else: + return True + + def get_module_code(self, page="Goth-translit"): + try: + res = self.wk("parse", page=f"Module:{page}", prop="wikitext") + except pywikiapi.utils.ApiError: + return "" + return res.parse.wikitext + + def write_modules_category(self, cat="Transliteration_modules"): + logging.info(f"Category: '{cat}'") + for r in self.wk.query(list="categorymembers", cmtitle=f"Category:{cat}"): + for page in r.categorymembers: + page = page.title.split(":") + if page[0] == "Module": + if len(page) > 1: + if page[1] != "User": + self.write_module(page[1]) + + def special_process(self, page, text): + text = text + if page in ("languages/alldata"): + for m in re.findall(r"""\[['"]Module:(languages/.*?)['"]\]""", text): + if self.deps: + self.write_module(m) + text = re.sub( + r"""(\[['"])Module:(.*?)(['"]\])""", + r"\1\2\3", + text, + ) + text = text.replace( + """mname:gsub("data", "extradata")""", """mname:gsub("data", "data")""" + ) + elif page in ("translit-redirect"): + text = text.replace( + '''pcall(require, "Module:"''', + '''pcall(require, ""''', + ) + elif page in ("xal-translit"): + text = text.replace( + """\ʺ""", + """ʺ""", + ) + elif page in ("UnitTests"): + text = text.replace( + '''require"Module:Unicode data"''', + """require("Unicode data")""", + ) + return text + + def preprocess_lua_file(self, path): + cmd = [ + "lua-format", + "--in-place", + "--column-limit=999", + "--column-table-limit=999", + "--single-quote-to-double-quote", + "--no-spaces-inside-functiondef-parens", + "--no-spaces-inside-functioncall-parens", + path, + ] + res = subprocess.run(cmd, capture_output=True) + if res.returncode: + logging.error(f"""Luaformat failed: {res.stderr}""") + + def write_module(self, page="Goth-translit", parent=None): + text = "" + inpath = Path(page) + outfolder = Path(self.outf, inpath.parent) + outpath = Path(outfolder, f"{inpath.name}.lua") + if self.check_write(outpath) and page not in self.exclude_modules: + info = f"Page: '{page}'" + if parent: + info = f"Page: '{parent}' > " + info + logging.info(info) + text = self.get_module_code(page) + if len(text): + if not outfolder.is_dir(): + outfolder.mkdir(parents=True, exist_ok=True) + self.downloaded.append(page) + with open(outpath, "w", encoding="utf-8") as f: + f.write(text) + self.preprocess_lua_file(outpath) + with open(outpath, "r", encoding="utf-8") as f: + text = f.read() + for fun, mod in self.re_require.findall(text): + if mod not in self.downloaded and self.deps: + self.write_module(mod, parent=page) + text = re.sub( + r"""(require|loadData)([\( ])(['"])([Mm]odule:)(""" + + mod + + r""")(['"])""", + r"\1\2\3" + r"\5" + r"\6", + text, + ) + text = text.replace("""loadData("Module:""", '''loadData("''') + text = text.replace("""require("Module:""", '''require("''') + text = self.special_process(page, text) + with open(outpath, "w", encoding="utf-8") as f: + f.write(text) + + def exec_module(self, prefix, mod): + to_exec = mod + if len(prefix): + to_exec = f"""{prefix}/{mod}""" + self.lua = LuaRuntime(unpack_returned_tuples=True) + logging.info(f"Executing {to_exec}") + res = "" + path = Path(self.outf, to_exec + ".lua") + with open(Path(path), "r", encoding="utf8") as f: + lua_str = f.read() + try: + self.lua.execute(lua_str) + res = self.lua.globals().res + except lupa._lupa.LuaError as err: + mods = re.findall(r"""module '(.*?)' not found""", str(err)) + for smod in mods: + if self.deps: + sto_exec = f"""{mod}""" + if prefix: + sto_exec = f"""{prefix}/{sto_exec}""" + logging.info(f"From execution writing {sto_exec}") + self.write_module(sto_exec) + return None + + +def cli(): + parser = argparse.ArgumentParser( + prog="wiktrapy_update", + description="""Downloads specified Lua modules from Wiktionary""", + ) + parser.add_argument( + "-o", + "--output", + default=Path(lua_folder, "wikt", "translit"), + required=False, + metavar="folder", + dest="output", + help="""Folder in which the code will be created""", + ) + parser.add_argument( + "-p", + "--page", + default=None, + required=False, + metavar="page", + dest="page", + help="""Get specific page""", + ) + parser.add_argument( + "-f", + "--force", + action="store_true", + dest="force", + help="""Force overwrite""", + ) + parser.add_argument( + "-D", + "--no-dependencies", + action="store_true", + dest="no_deps", + help="""Ignore dependencies""", + ) + return parser + + +def main(*args, **kwargs): + parser = cli(*args, **kwargs) + args = parser.parse_args() + wkd = WiktionaryModuleDownload( + args.output, + force=args.force, + deps=not args.no_deps + ) + if args.page: + wkd.write_module(args.page) + else: + logging.info("# Writing transliteration modules") + wkd.write_modules_category("Transliteration_modules") + logging.info("# Writing additional modules") + for mod in [ + "languages/byTranslitModule", + "scripts/code_to_canonical_name", + ]: + wkd.write_module(mod) + + +if __name__ == "__main__": + main() diff --git a/wikt/bit32.lua b/wiktra/wikt/bit32.lua old mode 100755 new mode 100644 similarity index 100% rename from wikt/bit32.lua rename to wiktra/wikt/bit32.lua diff --git a/wikt/translit/ja.lua b/wiktra/wikt/legacy/translit/ja.lua old mode 100644 new mode 100755 similarity index 100% rename from wikt/translit/ja.lua rename to wiktra/wikt/legacy/translit/ja.lua diff --git a/wikt/translit/ko-pron.lua b/wiktra/wikt/legacy/translit/ko-pron.lua old mode 100644 new mode 100755 similarity index 100% rename from wikt/translit/ko-pron.lua rename to wiktra/wikt/legacy/translit/ko-pron.lua diff --git a/wikt/translit/ko-translit.lua b/wiktra/wikt/legacy/translit/ko-translit.lua old mode 100644 new mode 100755 similarity index 100% rename from wikt/translit/ko-translit.lua rename to wiktra/wikt/legacy/translit/ko-translit.lua diff --git a/wikt/translit/th-pron.lua b/wiktra/wikt/legacy/translit/th-pron.lua old mode 100644 new mode 100755 similarity index 100% rename from wikt/translit/th-pron.lua rename to wiktra/wikt/legacy/translit/th-pron.lua diff --git a/wikt/translit/th-translit.lua b/wiktra/wikt/legacy/translit/th-translit.lua old mode 100644 new mode 100755 similarity index 100% rename from wikt/translit/th-translit.lua rename to wiktra/wikt/legacy/translit/th-translit.lua diff --git a/wikt/translit/tha-pron.lua b/wiktra/wikt/legacy/translit/tha-pron.lua old mode 100644 new mode 100755 similarity index 100% rename from wikt/translit/tha-pron.lua rename to wiktra/wikt/legacy/translit/tha-pron.lua diff --git a/wikt/translit/tha-pron1.lua b/wiktra/wikt/legacy/translit/tha-pron1.lua old mode 100644 new mode 100755 similarity index 100% rename from wikt/translit/tha-pron1.lua rename to wiktra/wikt/legacy/translit/tha-pron1.lua diff --git a/wikt/libraryUtil.lua b/wiktra/wikt/libraryUtil.lua old mode 100755 new mode 100644 similarity index 100% rename from wikt/libraryUtil.lua rename to wiktra/wikt/libraryUtil.lua diff --git a/wikt/luabit/bit.lua b/wiktra/wikt/luabit/bit.lua old mode 100755 new mode 100644 similarity index 94% rename from wikt/luabit/bit.lua rename to wiktra/wikt/luabit/bit.lua index 6bb3f48..4cc5cff --- a/wikt/luabit/bit.lua +++ b/wiktra/wikt/luabit/bit.lua @@ -1,264 +1,264 @@ ---[[--------------- -LuaBit v0.4 -------------------- -a bitwise operation lib for lua. - -http://luaforge.net/projects/bit/ - -How to use: -------------------- - bit.bnot(n) -- bitwise not (~n) - bit.band(m, n) -- bitwise and (m & n) - bit.bor(m, n) -- bitwise or (m | n) - bit.bxor(m, n) -- bitwise xor (m ^ n) - bit.brshift(n, bits) -- right shift (n >> bits) - bit.blshift(n, bits) -- left shift (n << bits) - bit.blogic_rshift(n, bits) -- logic right shift(zero fill >>>) - -Please note that bit.brshift and bit.blshift only support number within -32 bits. - -2 utility functions are provided too: - bit.tobits(n) -- convert n into a bit table(which is a 1/0 sequence) - -- high bits first - bit.tonumb(bit_tbl) -- convert a bit table into a number -------------------- - -Under the MIT license. - -copyright(c) 2006~2007 hanzhao (abrash_han@hotmail.com) - -2013-02-20: Brad Jorsch: Fix to not try messing with globals, doesn't work in Scribunto ---]]--------------- - -do - ------------------------- --- bit lib implementions - -local function check_int(n) - -- checking not float - if(n - math.floor(n) > 0) then - error("trying to use bitwise operation on non-integer!") - end -end - -local function to_bits(n) - check_int(n) - if(n < 0) then - -- negative - return to_bits(bit.bnot(math.abs(n)) + 1) - end - -- to bits table - local tbl = {} - local cnt = 1 - while (n > 0) do - local last = math.mod(n,2) - if(last == 1) then - tbl[cnt] = 1 - else - tbl[cnt] = 0 - end - n = (n-last)/2 - cnt = cnt + 1 - end - - return tbl -end - -local function tbl_to_number(tbl) - local n = table.getn(tbl) - - local rslt = 0 - local power = 1 - for i = 1, n do - rslt = rslt + tbl[i]*power - power = power*2 - end - - return rslt -end - -local function expand(tbl_m, tbl_n) - local big = {} - local small = {} - if(table.getn(tbl_m) > table.getn(tbl_n)) then - big = tbl_m - small = tbl_n - else - big = tbl_n - small = tbl_m - end - -- expand small - for i = table.getn(small) + 1, table.getn(big) do - small[i] = 0 - end - -end - -local function bit_or(m, n) - local tbl_m = to_bits(m) - local tbl_n = to_bits(n) - expand(tbl_m, tbl_n) - - local tbl = {} - local rslt = math.max(table.getn(tbl_m), table.getn(tbl_n)) - for i = 1, rslt do - if(tbl_m[i]== 0 and tbl_n[i] == 0) then - tbl[i] = 0 - else - tbl[i] = 1 - end - end - - return tbl_to_number(tbl) -end - -local function bit_and(m, n) - local tbl_m = to_bits(m) - local tbl_n = to_bits(n) - expand(tbl_m, tbl_n) - - local tbl = {} - local rslt = math.max(table.getn(tbl_m), table.getn(tbl_n)) - for i = 1, rslt do - if(tbl_m[i]== 0 or tbl_n[i] == 0) then - tbl[i] = 0 - else - tbl[i] = 1 - end - end - - return tbl_to_number(tbl) -end - -local function bit_not(n) - - local tbl = to_bits(n) - local size = math.max(table.getn(tbl), 32) - for i = 1, size do - if(tbl[i] == 1) then - tbl[i] = 0 - else - tbl[i] = 1 - end - end - return tbl_to_number(tbl) -end - -local function bit_xor(m, n) - local tbl_m = to_bits(m) - local tbl_n = to_bits(n) - expand(tbl_m, tbl_n) - - local tbl = {} - local rslt = math.max(table.getn(tbl_m), table.getn(tbl_n)) - for i = 1, rslt do - if(tbl_m[i] ~= tbl_n[i]) then - tbl[i] = 1 - else - tbl[i] = 0 - end - end - - --table.foreach(tbl, print) - - return tbl_to_number(tbl) -end - -local function bit_rshift(n, bits) - check_int(n) - - local high_bit = 0 - if(n < 0) then - -- negative - n = bit_not(math.abs(n)) + 1 - high_bit = 2147483648 -- 0x80000000 - end - - for i=1, bits do - n = n/2 - n = bit_or(math.floor(n), high_bit) - end - return math.floor(n) -end - --- logic rightshift assures zero filling shift -local function bit_logic_rshift(n, bits) - check_int(n) - if(n < 0) then - -- negative - n = bit_not(math.abs(n)) + 1 - end - for i=1, bits do - n = n/2 - end - return math.floor(n) -end - -local function bit_lshift(n, bits) - check_int(n) - - if(n < 0) then - -- negative - n = bit_not(math.abs(n)) + 1 - end - - for i=1, bits do - n = n*2 - end - return bit_and(n, 4294967295) -- 0xFFFFFFFF -end - -local function bit_xor2(m, n) - local rhs = bit_or(bit_not(m), bit_not(n)) - local lhs = bit_or(m, n) - local rslt = bit_and(lhs, rhs) - return rslt -end - --------------------- --- bit lib interface - -local bit = { - -- bit operations - bnot = bit_not, - band = bit_and, - bor = bit_or, - bxor = bit_xor, - brshift = bit_rshift, - blshift = bit_lshift, - bxor2 = bit_xor2, - blogic_rshift = bit_logic_rshift, - - -- utility func - tobits = to_bits, - tonumb = tbl_to_number, -} - -return bit - -end - ---[[ -for i = 1, 100 do - for j = 1, 100 do - if(bit.bxor(i, j) ~= bit.bxor2(i, j)) then - error("bit.xor failed.") - end - end -end ---]] - - - - - - - - - - - - - +--[[--------------- +LuaBit v0.4 +------------------- +a bitwise operation lib for lua. + +http://luaforge.net/projects/bit/ + +How to use: +------------------- + bit.bnot(n) -- bitwise not (~n) + bit.band(m, n) -- bitwise and (m & n) + bit.bor(m, n) -- bitwise or (m | n) + bit.bxor(m, n) -- bitwise xor (m ^ n) + bit.brshift(n, bits) -- right shift (n >> bits) + bit.blshift(n, bits) -- left shift (n << bits) + bit.blogic_rshift(n, bits) -- logic right shift(zero fill >>>) + +Please note that bit.brshift and bit.blshift only support number within +32 bits. + +2 utility functions are provided too: + bit.tobits(n) -- convert n into a bit table(which is a 1/0 sequence) + -- high bits first + bit.tonumb(bit_tbl) -- convert a bit table into a number +------------------- + +Under the MIT license. + +copyright(c) 2006~2007 hanzhao (abrash_han@hotmail.com) + +2013-02-20: Brad Jorsch: Fix to not try messing with globals, doesn't work in Scribunto +--]]--------------- + +do + +------------------------ +-- bit lib implementions + +local function check_int(n) + -- checking not float + if(n - math.floor(n) > 0) then + error("trying to use bitwise operation on non-integer!") + end +end + +local function to_bits(n) + check_int(n) + if(n < 0) then + -- negative + return to_bits(bit.bnot(math.abs(n)) + 1) + end + -- to bits table + local tbl = {} + local cnt = 1 + while (n > 0) do + local last = math.mod(n,2) + if(last == 1) then + tbl[cnt] = 1 + else + tbl[cnt] = 0 + end + n = (n-last)/2 + cnt = cnt + 1 + end + + return tbl +end + +local function tbl_to_number(tbl) + local n = table.getn(tbl) + + local rslt = 0 + local power = 1 + for i = 1, n do + rslt = rslt + tbl[i]*power + power = power*2 + end + + return rslt +end + +local function expand(tbl_m, tbl_n) + local big = {} + local small = {} + if(table.getn(tbl_m) > table.getn(tbl_n)) then + big = tbl_m + small = tbl_n + else + big = tbl_n + small = tbl_m + end + -- expand small + for i = table.getn(small) + 1, table.getn(big) do + small[i] = 0 + end + +end + +local function bit_or(m, n) + local tbl_m = to_bits(m) + local tbl_n = to_bits(n) + expand(tbl_m, tbl_n) + + local tbl = {} + local rslt = math.max(table.getn(tbl_m), table.getn(tbl_n)) + for i = 1, rslt do + if(tbl_m[i]== 0 and tbl_n[i] == 0) then + tbl[i] = 0 + else + tbl[i] = 1 + end + end + + return tbl_to_number(tbl) +end + +local function bit_and(m, n) + local tbl_m = to_bits(m) + local tbl_n = to_bits(n) + expand(tbl_m, tbl_n) + + local tbl = {} + local rslt = math.max(table.getn(tbl_m), table.getn(tbl_n)) + for i = 1, rslt do + if(tbl_m[i]== 0 or tbl_n[i] == 0) then + tbl[i] = 0 + else + tbl[i] = 1 + end + end + + return tbl_to_number(tbl) +end + +local function bit_not(n) + + local tbl = to_bits(n) + local size = math.max(table.getn(tbl), 32) + for i = 1, size do + if(tbl[i] == 1) then + tbl[i] = 0 + else + tbl[i] = 1 + end + end + return tbl_to_number(tbl) +end + +local function bit_xor(m, n) + local tbl_m = to_bits(m) + local tbl_n = to_bits(n) + expand(tbl_m, tbl_n) + + local tbl = {} + local rslt = math.max(table.getn(tbl_m), table.getn(tbl_n)) + for i = 1, rslt do + if(tbl_m[i] ~= tbl_n[i]) then + tbl[i] = 1 + else + tbl[i] = 0 + end + end + + --table.foreach(tbl, print) + + return tbl_to_number(tbl) +end + +local function bit_rshift(n, bits) + check_int(n) + + local high_bit = 0 + if(n < 0) then + -- negative + n = bit_not(math.abs(n)) + 1 + high_bit = 2147483648 -- 0x80000000 + end + + for i=1, bits do + n = n/2 + n = bit_or(math.floor(n), high_bit) + end + return math.floor(n) +end + +-- logic rightshift assures zero filling shift +local function bit_logic_rshift(n, bits) + check_int(n) + if(n < 0) then + -- negative + n = bit_not(math.abs(n)) + 1 + end + for i=1, bits do + n = n/2 + end + return math.floor(n) +end + +local function bit_lshift(n, bits) + check_int(n) + + if(n < 0) then + -- negative + n = bit_not(math.abs(n)) + 1 + end + + for i=1, bits do + n = n*2 + end + return bit_and(n, 4294967295) -- 0xFFFFFFFF +end + +local function bit_xor2(m, n) + local rhs = bit_or(bit_not(m), bit_not(n)) + local lhs = bit_or(m, n) + local rslt = bit_and(lhs, rhs) + return rslt +end + +-------------------- +-- bit lib interface + +local bit = { + -- bit operations + bnot = bit_not, + band = bit_and, + bor = bit_or, + bxor = bit_xor, + brshift = bit_rshift, + blshift = bit_lshift, + bxor2 = bit_xor2, + blogic_rshift = bit_logic_rshift, + + -- utility func + tobits = to_bits, + tonumb = tbl_to_number, +} + +return bit + +end + +--[[ +for i = 1, 100 do + for j = 1, 100 do + if(bit.bxor(i, j) ~= bit.bxor2(i, j)) then + error("bit.xor failed.") + end + end +end +--]] + + + + + + + + + + + + + diff --git a/wikt/luabit/hex.lua b/wiktra/wikt/luabit/hex.lua old mode 100755 new mode 100644 similarity index 94% rename from wikt/luabit/hex.lua rename to wiktra/wikt/luabit/hex.lua index ee6a69e..0c87a8d --- a/wikt/luabit/hex.lua +++ b/wiktra/wikt/luabit/hex.lua @@ -1,99 +1,99 @@ ---[[--------------- -Hex v0.4 -------------------- -Hex conversion lib for lua. - -How to use: - hex.to_hex(n) -- convert a number to a hex string - hex.to_dec(hex) -- convert a hex string(prefix with '0x' or '0X') to number - -Part of LuaBit(http://luaforge.net/projects/bit/). - -Under the MIT license. - -copyright(c) 2006~2007 hanzhao (abrash_han@hotmail.com) - -2013-02-20: Brad Jorsch: Fix to not try messing with globals, doesn't work in Scribunto ---]]--------------- - -local bit = require 'bit' - -do - -local function to_hex(n) - if(type(n) ~= "number") then - error("non-number type passed in.") - end - - -- checking not float - if(n - math.floor(n) > 0) then - error("trying to apply bitwise operation on non-integer!") - end - - if(n < 0) then - -- negative - n = bit.tobits(bit.bnot(math.abs(n)) + 1) - n = bit.tonumb(n) - end - - hex_tbl = {'A', 'B', 'C', 'D', 'E', 'F'} - hex_str = "" - - while(n ~= 0) do - last = math.mod(n, 16) - if(last < 10) then - hex_str = tostring(last) .. hex_str - else - hex_str = hex_tbl[last-10+1] .. hex_str - end - n = math.floor(n/16) - end - if(hex_str == "") then - hex_str = "0" - end - return "0x" .. hex_str -end - -local function to_dec(hex) - if(type(hex) ~= "string") then - error("non-string type passed in.") - end - - head = string.sub(hex, 1, 2) - - if( head ~= "0x" and head ~= "0X") then - error("wrong hex format, should lead by 0x or 0X.") - end - - v = tonumber(string.sub(hex, 3), 16) - - return v; -end - --------------------- --- hex lib interface -local hex = { - to_dec = to_dec, - to_hex = to_hex, -} - -return hex - -end - ---[[ --- test -d = 4341688 -h = to_hex(d) -print(h) -print(to_dec(h)) - - -for i = 1, 100000 do - h = hex.to_hex(i) - d = hex.to_dec(h) - if(d ~= i) then - error("failed " .. i .. ", " .. h) - end -end ---]] +--[[--------------- +Hex v0.4 +------------------- +Hex conversion lib for lua. + +How to use: + hex.to_hex(n) -- convert a number to a hex string + hex.to_dec(hex) -- convert a hex string(prefix with '0x' or '0X') to number + +Part of LuaBit(http://luaforge.net/projects/bit/). + +Under the MIT license. + +copyright(c) 2006~2007 hanzhao (abrash_han@hotmail.com) + +2013-02-20: Brad Jorsch: Fix to not try messing with globals, doesn't work in Scribunto +--]]--------------- + +local bit = require 'bit' + +do + +local function to_hex(n) + if(type(n) ~= "number") then + error("non-number type passed in.") + end + + -- checking not float + if(n - math.floor(n) > 0) then + error("trying to apply bitwise operation on non-integer!") + end + + if(n < 0) then + -- negative + n = bit.tobits(bit.bnot(math.abs(n)) + 1) + n = bit.tonumb(n) + end + + hex_tbl = {'A', 'B', 'C', 'D', 'E', 'F'} + hex_str = "" + + while(n ~= 0) do + last = math.mod(n, 16) + if(last < 10) then + hex_str = tostring(last) .. hex_str + else + hex_str = hex_tbl[last-10+1] .. hex_str + end + n = math.floor(n/16) + end + if(hex_str == "") then + hex_str = "0" + end + return "0x" .. hex_str +end + +local function to_dec(hex) + if(type(hex) ~= "string") then + error("non-string type passed in.") + end + + head = string.sub(hex, 1, 2) + + if( head ~= "0x" and head ~= "0X") then + error("wrong hex format, should lead by 0x or 0X.") + end + + v = tonumber(string.sub(hex, 3), 16) + + return v; +end + +-------------------- +-- hex lib interface +local hex = { + to_dec = to_dec, + to_hex = to_hex, +} + +return hex + +end + +--[[ +-- test +d = 4341688 +h = to_hex(d) +print(h) +print(to_dec(h)) + + +for i = 1, 100000 do + h = hex.to_hex(i) + d = hex.to_dec(h) + if(d ~= i) then + error("failed " .. i .. ", " .. h) + end +end +--]] diff --git a/wikt/luabit/readme.txt b/wiktra/wikt/luabit/readme.txt similarity index 96% rename from wikt/luabit/readme.txt rename to wiktra/wikt/luabit/readme.txt index db83172..7a0a24e 100755 --- a/wikt/luabit/readme.txt +++ b/wiktra/wikt/luabit/readme.txt @@ -1,143 +1,143 @@ -LuaBit ------- -LuaBit is a bitwise operation lib completely written in Lua. It's -written in the belief that Lua is self-contained. - -The supported operations are: not, and, or, xor, right shift, logic -right shift and left shift. - -Several utilities are designed to leverage the power of bit operation: - 1. hex: a dec <-> hex number converter - 2. utf8: convert utf8 string to ucs2 - 3. noki: convert nokia pc suite backuped SMS file to .txt - -Under the MIT license. - -Visit http://luaforge.net/projects/bit/ to get latest version. - -Status ------- -Now LuaBit is in v0.4. -Release date: Mar 18, 2007 - -Content -------- -3 files are there for LuaBit: - 1) bit.lua - is the bitwise operation lib, all operations are implemented here. - - 2) hex.lua - is a helper lib for ease of using hex numbers with bitwise - operation. - - 3) noki.lua - a utility(based on bit and hex) to convert Nokia PC Suite backuped - SMS to a unicode .txt file, which is more accessible than the - original .nfb or .nfc file. - - 4) utf8.lua - convert utf8 string to ucs2 string - -How to use ----------- -Bit ---- -Just require 'bit' in your project and the bit lib will be -available: - bit.bnot(n) -- bitwise not (~n) - bit.band(m, n) -- bitwise and (m & n) - bit.bor(m, n) -- bitwise or (m | n) - bit.bxor(m, n) -- bitwise xor (m ^ n) - bit.brshift(n, bits) -- right shift (n >> bits) - bit.blshift(n, bits) -- left shift (n << bits) - bit.blogic_rshift(n, bits) -- logic right shift(zero fill >>>) - -Please note that bit.brshift and bit.blshift only support number within -32 bits. - -2 utility functions are provided too: - bit.tobits(n) -- convert n into a bit table(which is a 1/0 sequence) - -- high bits first - bit.tonumb(bit_tbl) -- convert a bit table into a number - -Hex ---- -For ease of using hex numbers, a utility hex lib is also included in -LuaBit. You can require 'hex' to use them: - hex.to_hex(n) -- convert a number to a hex string - hex.to_dec(hex) -- convert a hex string(prefix with '0x' or '0X') to number - -With hex, you can write code like: - bit.band(258, hex.to_dec('0xFF')) -to get the lower 8 bits of 258, that's 2. - -Noki ----- -require 'noki', to save your sms to .txt file: - noki.save_sms('nokia.nfb', 'sms.txt') -and you can view the output sms.txt in notepad or other editor which -support unicode. - -Utf8 ----- -require 'utf8', to convert a utf8 string: - ucs2_string = utf8.utf_to_uni(utf8_string) - -History -------- -v0.4 -* utf8 to ucs2 converter(utf8.lua). -* clean up for compatible with Lua5.1 and 5.0. -* add 'How to use' section for bit.lua and hex.lua. - -v0.3 -* noki added as an application of bit. -* typo correction. - -v0.2 -* add logic right shift(>>>) support: bit.blogic_rshift. -* add 2 utility functions: bit.tobits and bit.tonumb. -* update hex.to_hex(in hex.lua) to support negative number. - -v0.1 -LuaBit is written when I do my own game project(Fio at http://fio.edithis.info). -When loading resources, I have to do some bit operation. And I do not -like the embedded way of bit operation. So I decide to implement those -ops in lua. And that's LuaBit. It's not as fast as the embedded one, but -it works. And Lua is self-contained :-) - -To-Do List ---------- -v0.1 -It'll be useful if LuaBit support those bitwise op like: - bit.band(258, '0xFF') -ease to type and use. This will be supported in next release. - -v0.2 -I decide to delay this feature to later version for it'll mess up the -interface of LuaBit. - -v0.3 -May more utility functions add to Noki - phonebook might be a nice candidate. - -v0.4 -There's no UCS2 -> UTF8 convertion now, this feature may add in next release -or when the project need. - -Noki'll be be exluded from LuaBit in next release; I decide to let Noki grow -into a powerful tool to support more Nokia PC Suite backup format(.nfb, -.nfc and .nbu). - -Trial Noki demo at http://nokisms.googlepages.com/(in Chinese) - -Known issues ------------- -LuaBit doesn't play very well with negative number. The return value of the -bitwise operations might change to positive when applied on negative numbers -though the bit sequence is correct. So if you want do some arithmetic with -the result of bit operation, be careful. - -Feedback --------- -Please send your comments, bugs, patches or change request to -hanzhao(abrash_han@hotmail.com). +LuaBit +------ +LuaBit is a bitwise operation lib completely written in Lua. It's +written in the belief that Lua is self-contained. + +The supported operations are: not, and, or, xor, right shift, logic +right shift and left shift. + +Several utilities are designed to leverage the power of bit operation: + 1. hex: a dec <-> hex number converter + 2. utf8: convert utf8 string to ucs2 + 3. noki: convert nokia pc suite backuped SMS file to .txt + +Under the MIT license. + +Visit http://luaforge.net/projects/bit/ to get latest version. + +Status +------ +Now LuaBit is in v0.4. +Release date: Mar 18, 2007 + +Content +------- +3 files are there for LuaBit: + 1) bit.lua + is the bitwise operation lib, all operations are implemented here. + + 2) hex.lua + is a helper lib for ease of using hex numbers with bitwise + operation. + + 3) noki.lua + a utility(based on bit and hex) to convert Nokia PC Suite backuped + SMS to a unicode .txt file, which is more accessible than the + original .nfb or .nfc file. + + 4) utf8.lua + convert utf8 string to ucs2 string + +How to use +---------- +Bit +--- +Just require 'bit' in your project and the bit lib will be +available: + bit.bnot(n) -- bitwise not (~n) + bit.band(m, n) -- bitwise and (m & n) + bit.bor(m, n) -- bitwise or (m | n) + bit.bxor(m, n) -- bitwise xor (m ^ n) + bit.brshift(n, bits) -- right shift (n >> bits) + bit.blshift(n, bits) -- left shift (n << bits) + bit.blogic_rshift(n, bits) -- logic right shift(zero fill >>>) + +Please note that bit.brshift and bit.blshift only support number within +32 bits. + +2 utility functions are provided too: + bit.tobits(n) -- convert n into a bit table(which is a 1/0 sequence) + -- high bits first + bit.tonumb(bit_tbl) -- convert a bit table into a number + +Hex +--- +For ease of using hex numbers, a utility hex lib is also included in +LuaBit. You can require 'hex' to use them: + hex.to_hex(n) -- convert a number to a hex string + hex.to_dec(hex) -- convert a hex string(prefix with '0x' or '0X') to number + +With hex, you can write code like: + bit.band(258, hex.to_dec('0xFF')) +to get the lower 8 bits of 258, that's 2. + +Noki +---- +require 'noki', to save your sms to .txt file: + noki.save_sms('nokia.nfb', 'sms.txt') +and you can view the output sms.txt in notepad or other editor which +support unicode. + +Utf8 +---- +require 'utf8', to convert a utf8 string: + ucs2_string = utf8.utf_to_uni(utf8_string) + +History +------- +v0.4 +* utf8 to ucs2 converter(utf8.lua). +* clean up for compatible with Lua5.1 and 5.0. +* add 'How to use' section for bit.lua and hex.lua. + +v0.3 +* noki added as an application of bit. +* typo correction. + +v0.2 +* add logic right shift(>>>) support: bit.blogic_rshift. +* add 2 utility functions: bit.tobits and bit.tonumb. +* update hex.to_hex(in hex.lua) to support negative number. + +v0.1 +LuaBit is written when I do my own game project(Fio at http://fio.edithis.info). +When loading resources, I have to do some bit operation. And I do not +like the embedded way of bit operation. So I decide to implement those +ops in lua. And that's LuaBit. It's not as fast as the embedded one, but +it works. And Lua is self-contained :-) + +To-Do List +--------- +v0.1 +It'll be useful if LuaBit support those bitwise op like: + bit.band(258, '0xFF') +ease to type and use. This will be supported in next release. + +v0.2 +I decide to delay this feature to later version for it'll mess up the +interface of LuaBit. + +v0.3 +May more utility functions add to Noki - phonebook might be a nice candidate. + +v0.4 +There's no UCS2 -> UTF8 convertion now, this feature may add in next release +or when the project need. + +Noki'll be be exluded from LuaBit in next release; I decide to let Noki grow +into a powerful tool to support more Nokia PC Suite backup format(.nfb, +.nfc and .nbu). + +Trial Noki demo at http://nokisms.googlepages.com/(in Chinese) + +Known issues +------------ +LuaBit doesn't play very well with negative number. The return value of the +bitwise operations might change to positive when applied on negative numbers +though the bit sequence is correct. So if you want do some arithmetic with +the result of bit operation, be careful. + +Feedback +-------- +Please send your comments, bugs, patches or change request to +hanzhao(abrash_han@hotmail.com). diff --git a/wiktra/wikt/mw-hash.lua b/wiktra/wikt/mw-hash.lua new file mode 100644 index 0000000..a43ec8f --- /dev/null +++ b/wiktra/wikt/mw-hash.lua @@ -0,0 +1,20 @@ +-- Simplified implementation of mw.hash for running WikiMedia Scribunto code +-- under Python +-- +-- Copyright (c) 2020 Tatu Ylonen. See file LICENSE and https://ylonen.org + +function mw_hash_hashValue(algo, value) + print("MW_HASH_HASHVALUE") +end + +function mw_hash_listAlgorithms() + print("MW_HASH_LISTALGORITHMS") + return {} +end + +local mw_hash = { + hashValue = mw_hash_hashValue, + listAlgorithms = mw_hash_listAlgorithms, +} + +return mw_hash diff --git a/wiktra/wikt/mw-html.lua b/wiktra/wikt/mw-html.lua new file mode 100644 index 0000000..a44c475 --- /dev/null +++ b/wiktra/wikt/mw-html.lua @@ -0,0 +1,204 @@ +-- Implementation of mw.html for running WikiMedia Scribunto code under +-- Python. +-- +-- Copyright (c) 2020 Tatu Ylonen. See file LICENSE and https://ylonen.org + +mw_html = { +} + +local Html = { + -- node + -- wikitext + -- newline + -- tag + -- attr + -- getAttr + -- addClass + -- css + -- cssText + -- done + -- allDone +} + +function mw_html.create(tagName, args) + local selfClosing = args and args.selfClosing + if (tagName == "hr" or tagName == "br" or + tagname == "references") then + selfClosing = true + end + local obj = Html:new() + obj._parent = nil + obj._tagName = tagName -- can be nil + obj._attrs = {} + obj._css = {} + obj._children = {} + obj._selfClosing = selfClosing + return obj +end + +function Html:new(obj) + obj = obj or {} + setmetatable(obj, self) + self.__index = self + return obj +end + +function Html:_push_css() + local parts = {} + for k, v in pairs(self._css) do + if type(v) ~= "function" then + table.insert(parts, "" .. k .. ":" .. v .. ";") + end + end + if #parts == 0 then return "" end + table.sort(parts) + local css = table.concat(parts, "") + self._css = {} + self:cssText(css) +end + +function Html:_attrs_to_string() + self:_push_css() + local parts = {} + for k, v in pairs(self._attrs) do + if type(v) ~= "function" then + local encoded = mw.text.encode(v) + table.insert(parts, " " .. k .. '="' .. encoded .. '"') + end + end + if #parts == 0 then return "" end + table.sort(parts) + return table.concat(parts, "") +end + +function Html:_start() + if self._tagName == nil then return "" end + local parts = {"<", self._tagName, self:_attrs_to_string()} + if self._selfClosing then table.insert(parts, " /") end + table.insert(parts, ">") + return table.concat(parts, "") +end + +function Html:_end() + if self._tagName == nil then return "" end + if self._selfClosing then return "" end + return "" +end + +function Html:__tostring() + local parts = {} + local start_tag = self:_start() + if start_tag then table.insert(parts, start_tag) end + for k, v in ipairs(self._children) do + table.insert(parts, tostring(v)) + end + local end_tag = self:_end() + if end_tag then table.insert(parts, end_tag) end + return table.concat(parts, "") +end + +function Html:node(builder) + if builder then + table.insert(self._children, builder) + end + return self +end + +function Html:wikitext1(v) + if v == nil then return end + -- I am not quite sure how this should work. This now just inserts the text + -- into the node, assuming that wikitext processing will be done later on + -- the text returned by Lua. Is this the normal case, or should we call + -- Python here to expand the wikitext to HTML here? + table.insert(self._children, v) +end + +function Html:wikitext(...) + for k, v in ipairs({...}) do + self:wikitext1(v) + end + return self +end + +function Html:newline() + table.insert(self._children, "\n") + return self +end + +function Html:tag(tagName, args) + local child = mw_html.create(tagName, args) + child._parent = self + table.insert(self._children, child) + return child +end + +function Html:attr(name, value) + if type(name) == "table" then + for k, v in pairs(name) do + if type(v) ~= "function" then + self:attr(k, v) + end + end + end + self._attrs[name] = value + return self +end + +function Html:getAttr(name) + return self._attrs[name] +end + +function Html:addClass(new_class) + if new_class == nil then return end + local classes = self:getAttr("class") or "" + local new_classes = {} + for cl in mw.ustring.gmatch(classes, "([^%s]+)") do + if cl == new_class then return end + end + if classes == "" then + classes = new_class + else + classes = classes .. " " .. new_class + end + self:attr("class", classes) + return self +end + +function Html:css(name, value) + if type(name) == "table" then + for k, v in pairs(name) do + if type(v) ~= "function" then + self:css(k, v) + end + end + return + end + self._css[name] = value + return self +end + +function Html:cssText(new_css) + if new_css == nil or new_css == "" then return end + if mw.ustring.sub(new_css, -1) ~= ";" then new_css = new_css .. ";" end + local css = self:getAttr("style") or "" + if css == "" then + css = new_css + else + css = css .. new_css + end + self:attr("style", css) + return self +end + +function Html:done() + if self._parent then return self._parent end + return self +end + +function Html:allDone() + local node = self + while node._parent do node = node._parent end + return node +end + +return mw_html diff --git a/wiktra/wikt/mw-language.lua b/wiktra/wikt/mw-language.lua new file mode 100644 index 0000000..86bf95f --- /dev/null +++ b/wiktra/wikt/mw-language.lua @@ -0,0 +1,351 @@ +-- Simplified implementation of mw.language for running WikiMedia Scribunto +-- code under Python +-- +-- Copyright (c) 2020-2021 Tatu Ylonen. See file LICENSE and https://ylonen.org + +local ustring = require("ustring.ustring") + +-- The fallback data is based on +-- https://upload.wikimedia.org/wikipedia/commons/2/26/MediaWiki_fallback_chains.svg +local fallbacks = { + qug = {"es", "qu"}, + qu = {"qug", "es"}, + gn = {"es"}, + ast = {"es"}, + ext = {"es"}, + arn = {"es"}, + lad = {"es"}, + ["cbk-zam"] = {"es"}, + nah = {"es"}, + ["es-formal"] = {"es"}, + an = {"es"}, + ay = {"es"}, + -- XXX ru fallbacks + -- XXX fr fallbacks + -- XXX de fallbacks + -- XXX id fallbacks + -- XXX ur fallbacks + -- XXX fa fallbacks + -- XXX zh-hans fallbacks + -- XXX en fallbacks + -- XXX pl fallbacks + -- XXX da fallbacks + -- XXX fi fallbacks + -- XXX tr fallbacks + -- XXX et fallbacks + -- XXX nl fallbacks + -- XXX ro fallbacks + -- XXX hi fallbacks + -- XXX it fallbacks + -- XXX hr fallbacks + -- XXX kk-cyrl fallbacks + -- XXX pt fallbacks + kjp = {"my"}, + mnw = {"my"}, + sgs = {"lt"}, + tcy = {"kn"}, + xmf = {"ka"}, + cs = {"sk"}, + sk = {"cs"}, + io = {"eo"}, + nn = {"nb"}, + nb = {"nn"}, + yi = {"he"}, + ["ko-kp"] = {"ko"}, + sr = {"sr-ec"}, + ["be-tarask"] = {"be"}, + ["hu-formal"] = {"hu"}, + ady = {"ady-cyrl"}, + crh = {"crh-latn"}, + hif = {"hif-latn"}, + iu = {"ike-cans"}, + kbd = {"kbd-cyrl"}, + ["ruq-cyrl"] = {"mk"}, + ks = {"ks-arab"}, + ku = {"ku-latn"}, + ["ku-arab"] = {"ckb"}, + tg = {"tg-cyrl"}, + ug = {"ug-arab"}, + aln = {"sq"}, + bh = {"bho"}, + bpy = {"bn"}, + dtp = {"ms"}, + dty = {"ne"}, + hyw = {"hy"}, + ltg = {"lv"}, + pnt = {"el"}, +} + + + +local Language = { + -- :getCode() + -- :getFallbackLanguages() + -- :isRTL() + -- :lc(s) + -- :lcfirst(s) + -- :uc(s) + -- :ucfirst(s) + -- :caseFold(s) + -- :formatNum(n, options=None) + -- :formatdate(format, timestamp, local) + -- :formatDuration(seconds, allowedIntervals=None) + -- :parseFormattedNumber(s) + -- :convertPlural(n, forms) + -- :plural(n, forms) + -- :convertGrammar(word, case) + -- :grammar(case, word) + -- :gender(what, masculine, feminine, neutral) / :gender(what, {masculine, feminine, neutral}) + -- :getArrow(direction) + -- :getDir() + -- :getDirMark(opposite) + -- :getDirMarkEntity(opposite) + -- :getDurationIntervals(seconds, allowedIntervals) +} + +function Language:new(obj) + -- print("mw.language Language:new called") + obj = obj or {} + setmetatable(obj, self) + self.__index = self + return obj +end + +function Language:getCode() + return self.code +end + +function Language:getFallbackLanguages() + return fallbacks[lang] or {} +end + +function Language:isRTL() + -- XXX actually determine this somehow + return false +end + +function Language:lc(s) + -- XXX language-specific variants + return ustring.lower(s) +end + +function Language:lcfirst(s) + return self:lc(ustring.sub(s, 1, 1)) .. ustring.sub(s, 2) +end + + +function Language:uc(s) + -- XXX language-specific variants + return ustring.upper(s) +end + +function Language:ucfirst(s) + return self:uc(ustring.sub(s, 1, 1)) .. ustring.sub(s, 2) +end + +function Language:caseFold(s) + return self:lc(s) +end + +function Language:formatNum(n, options) + local noCommafy = options and options.noCommafy + -- implement language-specific conventions + return tostring(n) +end + +function Language:formatDate(format, timestamp, localtime) + -- XXX currently ignores localtime + if not timestamp then + timestamp = os.date("%Y-%m-%d %X") + end + -- XXX actually format the time. See + -- https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions#.23time + -- form supported timestamp formats and format specification. + return timestamp +end + +function Language:formatDuration(seconds, allowedIntervals) + -- XXX actually implement language-specific formatting + if seconds == 1 then + return "1 second" + end + return tostring(seconds) .. " seconds" +end + +function Language:parseFormattedNumber(s) + -- XXX make this language-specific + s = ustring.gsub(s, ",", "") + return tonumber(s) +end + +function Language:convertPlural(n, forms) + -- XXX ... form + print("XXX Language.convertPlural not yet implemented") + assert(false) +end + +function Language:plural(n, forms) + -- XXX ... form + return self:convertPlural(n, forms) +end + +function Language:convertGrammar(word, case) + print("XXX Language.convertGrammar not yet implemented") + assert(false) +end + +function Language:grammar(case, word) + return self:convertGrammar(word, case) +end + +function Language:gender(what, masculine, feminine, neutral) + if type(masculine) == "table" then + feminine = masculine[1] + neutral = masculine[2] + masculine = masculine[0] + end + -- XXX if what is a registered user name, determine its gender + if what == "feminine" then return feminine end + if what == "neutral" then return neutral end + return masculine +end + +function Language:getArrow(direction) + -- XXX implement language-specific + if direction == "forwards" then return "\u{2192}" end + if direction == "backwards" then return "\u{2190}" end + if direction == "left" then return "\u{2190}" end + if direction == "right" then return "\u{2192}" end + if direction == "up" then return "\u{2191}" end + if direction == "down" then return "\u{2193}" end + print("Language.getArrow unrecognized direction", direction) + return "\u{2192}" +end + +function Language:getDir() + -- XXX make this language specific + return "ltr" +end + +function Language:getDirMark(opposite) + local dir = self:getDir() + if opposite then + if dir == "ltr" then dir = "rtl" else dir = "ltr" end + end + if dir == "rtr" then return "\u{200f}" end + return "\u{200e}" +end + +function Language:getDirMarkEntity(opposite) + local dir = self:getDir() + if opposite then + if dir == "ltr" then dir = "rtl" else dir = "ltr" end + end + if dir == "rtr" then return "‏" end + return "‎" +end + +local intervalBases = { + { "millennia", 1000 * 3600 * 24 * 365 }, + { "centuries", 100 * 3600 * 24 * 365 }, + { "decades", 10 * 3600 * 24 * 365 }, + { "years", 3600 * 24 * 365 }, + { "days", 3600 * 24 }, + { "hours", 3600 }, + { "minutes", 60 } +} + +function Language.getDurationIntervals(self, seconds, allowedIntervals) + if not allowedIntervals then + allowedIntervals = {} + for i=1,len(intervalBases) do + table.insert(allowedIntervals, intervalBases[i][0]) + end + end + ret = {} + for i=1,len(intervalBases) do + local name = intervalBases[i][0] + local interval = intervalBases[i][1] + local found = false + for j=1,len(allowedIntervals) do + if allowedIntervals[j] == name then + found = true + break + end + end + if found then + local v = math.floor(seconds / interval) + seconds = seconds - v * interval + ret[name] = v + end + end + ret["seconds"] = seconds + return ret +end + +_en_lang = Language:new{code="en"} + +local mw_language = { + -- fetchLanguageName(code, inLanguage) + -- fetchLanguageNames(inLanguage=None, include=None) + -- getContentLanguage() (see below) + -- getFallbacksFor(code) + -- isKnownLanguageTag(code) -- assigned in lua_set_fns + -- isSupportedLanguage(code) + -- isValidBuiltInCode(code) + -- isValidCode(code) + -- new(code) +} + +function mw_language.fetchLanguageName(code, inLanguage) + -- XXX inLanguage + return mw_python_fetch_language_name(code) +end + +function mw_language.fetchLanguageNames(inLanguage, include) + print("mw.language.fetchLanguageNames called") + include = include or "mw" + return mw_python_fetch_language_names(include) +end + +function mw_language.getContentLanguage() + -- This appears to be called very commonly by English nouns + -- print("mw.language.getContentLanguage called") + return _en_lang +end + +function mw_language.getFallbacksFor(code) + print("mw.language.getFallbacksFor called") + return fallbacks[code] or {} +end + +function mw_language.isKnownLanguageTag(code) + return mw.language.fetchLanguageName(code) ~= nil +end + +function mw_language.isSupportedLanguage(code) + print("mw.language.isSupportedLanguage called") + -- XXX + return code == "en" +end + +function mw_language.isValidBuiltInCode(code) + if ustring.match(code, "[a-z0-9][-a-z0-9]*[a-z0-9]") then return true end + return false +end + +function mw_language.isValidCode(code) + if len(code) < 1 then return false end + if ustring.find(code, "[:'\"/\\<>]") then return false end + return true +end + +function mw_language.new(code) + return Language:new{code=code} +end + +function mw_language.getLanguage(code) + return mw.language.new(code) +end + +return mw_language diff --git a/wiktra/wikt/mw-site.lua b/wiktra/wikt/mw-site.lua new file mode 100644 index 0000000..d1cb196 --- /dev/null +++ b/wiktra/wikt/mw-site.lua @@ -0,0 +1,229 @@ +-- Simplified implementation of mw.site for running WikiMedia Scribunto +-- code under Python +-- +-- Copyright (c) 2020 Tatu Ylonen. See file LICENSE and https://ylonen.org + +local Namespace = { + hasGenderDistinction = true, + isCapitalized = false, + isMovable = false, + defaultContentModel = "wikitext", + aliases = {}, + associated = {}, +} +Namespace.__index = Namespace + +function Namespace:new(obj) + obj = obj or {} + setmetatable(obj, self) + obj.canonicalName = obj.name + obj.displayName = obj.name + obj.hasSubpages = obj.name == "Main" or obj.name == "Module" + return obj +end + +-- These duplicate definitions in wikiparserfns.py +local media_ns = Namespace:new{id=-2, name="Media", isSubject=true} +local special_ns = Namespace:new{id=-1, name="Special", isSubject=true} +local main_ns = Namespace:new{id=0, name="Main", isContent=true, isSubject=true} +local talk_ns = Namespace:new{id=1, name="Talk", isTalk=true, subject=main_ns} +local user_ns = Namespace:new{id=2, name="User", isSubject=true} +local user_talk_ns = Namespace:new{id=3, name="User_talk", isTalk=true, + subject=user_ns} +local project_ns = Namespace:new{id=4, name="Project", isSubject=true} +local project_talk_ns = Namespace:new{id=5, name="Project_talk", isTalk=true, + subject=project_ns} +local image_ns = Namespace:new{id=6, name="File", aliases={"Image"}, + isSubject=true} +local image_talk_ns = Namespace:new{id=7, name="File_talk", + aliases={"Image_talk"}, + isTalk=true, subject=image_ns} +local mediawiki_ns = Namespace:new{id=8, name="MediaWiki", isSubject=true} +local mediawiki_talk_ns = Namespace:new{id=9, name="MediaWiki_talk", + isTalk=true, subject=mediawiki_ns} +local template_ns = Namespace:new{id=10, name="Template", isSubject=true} +local template_talk_ns = Namespace:new{id=11, name="Template_talk", isTalk=true, + subject=template_ns} +local help_ns = Namespace:new{id=12, name="Help", isSubject=true} +local help_talk_ns = Namespace:new{id=13, name="Help_talk", isTalk=true, + subject=help_ns} +local category_ns = Namespace:new{id=14, name="Category", isSubject=true} +local category_talk_ns = Namespace:new{id=15, name="Category_talk", isTalk=true, + subject=category_ns} +local appendix_ns = Namespace:new{id=100, name="Appendix", isSubject=true} +local appendix_talk_ns = Namespace:new{id=101, name="Appendix_talk", + isTalk=true, subject=appendix_ns} +local thesaurus_ns = Namespace:new{id=110, name="Thesaurus", isSubject=true} +local thesaurus_talk_ns = Namespace:new{id=111, name="Thesaurus_talk", + isTalk=true, subject=thesaurus_ns} +local reconstruction_ns = Namespace:new{id=118, name="Reconstruction", + isSubject=true} +local reconstruction_talk_ns = Namespace:new{id=119, name="Reconstruction_talk", + isTalk=true, + subject=reconstruction_ns} +local module_ns = Namespace:new{id=828, name="Module", isIncludable=true, + isSubject=true} +local module_talk_ns = Namespace:new{id=829, name="Module_talk", isTalk=true, + subject=module_ns} +main_ns.talk = talk_ns +user_ns.talk = user_talk_ns +project_ns.talk = project_talk_ns +mediawiki_ns.talk = mediawiki_talk_ns +template_ns.talk = template_talk_ns +help_ns.talk = help_talk_ns +category_ns.talk = category_talk_ns +appendix_ns.talk = appendix_talk_ns +thesaurus_ns.talk = thesaurus_talk_ns +reconstruction_ns.talk = reconstruction_talk_ns +module_ns.talk = module_talk_ns + +function add_ns(t, ns) + assert(ns.name ~= nil) + assert(ns.id ~= nil) + t[ns.id] = ns + t[ns.name] = ns +end + +local mw_site_namespaces = {} +add_ns(mw_site_namespaces, media_ns) +add_ns(mw_site_namespaces, special_ns) +add_ns(mw_site_namespaces, main_ns) +add_ns(mw_site_namespaces, talk_ns) +add_ns(mw_site_namespaces, user_ns) +add_ns(mw_site_namespaces, user_talk_ns) +add_ns(mw_site_namespaces, project_ns) +add_ns(mw_site_namespaces, project_talk_ns) +add_ns(mw_site_namespaces, image_ns) +add_ns(mw_site_namespaces, image_talk_ns) +add_ns(mw_site_namespaces, mediawiki_ns) +add_ns(mw_site_namespaces, mediawiki_talk_ns) +add_ns(mw_site_namespaces, template_ns) +add_ns(mw_site_namespaces, template_talk_ns) +add_ns(mw_site_namespaces, help_ns) +add_ns(mw_site_namespaces, help_talk_ns) +add_ns(mw_site_namespaces, category_ns) +add_ns(mw_site_namespaces, category_talk_ns) +add_ns(mw_site_namespaces, appendix_ns) +add_ns(mw_site_namespaces, appendix_talk_ns) +add_ns(mw_site_namespaces, thesaurus_ns) +add_ns(mw_site_namespaces, thesaurus_talk_ns) +add_ns(mw_site_namespaces, reconstruction_ns) +add_ns(mw_site_namespaces, reconstruction_talk_ns) +add_ns(mw_site_namespaces, module_ns) +add_ns(mw_site_namespaces, module_talk_ns) + +local mw_site_contentNamespaces = {} +add_ns(mw_site_contentNamespaces, main_ns) +add_ns(mw_site_contentNamespaces, appendix_ns) +add_ns(mw_site_contentNamespaces, thesaurus_ns) +add_ns(mw_site_contentNamespaces, reconstruction_ns) + +local mw_site_subjectNamespaces = {} +add_ns(mw_site_subjectNamespaces, media_ns) +add_ns(mw_site_subjectNamespaces, special_ns) +add_ns(mw_site_subjectNamespaces, main_ns) +add_ns(mw_site_subjectNamespaces, user_ns) +add_ns(mw_site_subjectNamespaces, project_ns) +add_ns(mw_site_subjectNamespaces, image_ns) +add_ns(mw_site_subjectNamespaces, mediawiki_ns) +add_ns(mw_site_subjectNamespaces, template_ns) +add_ns(mw_site_subjectNamespaces, help_ns) +add_ns(mw_site_subjectNamespaces, category_ns) +add_ns(mw_site_subjectNamespaces, appendix_ns) +add_ns(mw_site_subjectNamespaces, thesaurus_ns) +add_ns(mw_site_subjectNamespaces, reconstruction_ns) +add_ns(mw_site_subjectNamespaces, module_ns) + +local mw_site_talkNamespaces = {} +add_ns(mw_site_talkNamespaces, talk_ns) +add_ns(mw_site_talkNamespaces, user_talk_ns) +add_ns(mw_site_talkNamespaces, project_talk_ns) +add_ns(mw_site_talkNamespaces, image_talk_ns) +add_ns(mw_site_talkNamespaces, mediawiki_talk_ns) +add_ns(mw_site_talkNamespaces, template_talk_ns) +add_ns(mw_site_talkNamespaces, help_talk_ns) +add_ns(mw_site_talkNamespaces, category_talk_ns) +add_ns(mw_site_talkNamespaces, appendix_talk_ns) +add_ns(mw_site_talkNamespaces, thesaurus_talk_ns) +add_ns(mw_site_talkNamespaces, reconstruction_talk_ns) +add_ns(mw_site_talkNamespaces, module_talk_ns) + +function mw_site_index(x, ns) + return mw.site.findNamespace(ns) +end + +local mw_site = { + __index = mw_site_index, + server = "server.dummy", + siteName = "Dummy Site", + namespaces = mw_site_namespaces, + contentNamespaces = mw_site_contentNamespaces, + subjectNamespaces = mw_site_subjectNamespaces, + talkNamespaces = mw_site_talkNamespaces, + stats = { + pages = 0, + articles = 0, + files = 0, + users = 0, + activeUsers = 0, + admins = 0 + } +} + +function mw_site.matchNamespaceName(v, name) + -- Internal function to match namespace against name + -- namespace prefixes are case-insensitive + if type(name) == "number" then + if name == v.id then return true end + return false + end + assert(type(name) == "string") + name = mw.ustring.upper(name) + if name == mw.ustring.upper(v.name) then return true end + if name == mw.ustring.upper(v.canonicalName) then return true end + for i, alias in ipairs(v.aliases) do + if name == mw.ustring.upper(alias) then return true end + end + return false +end + +function mw_site.findNamespace(name) + -- Internal function to find the namespace object corresponding to a name + if type(name) == "string" then + -- strip surrounding whitespaces + name = name:gsub("^%s(.-)%s*$", "%1") + end + for k, v in pairs(mw.site.namespaces) do + if mw.site.matchNamespaceName(v, name) then + return v + end + end + return nil +end + +function mw_site.stats.pagesInCategory(category, which) + if which == "*" or which == nil then + return { + all = 0, + subcats = 0, + files = 0, + pages = 0 + } + end + return 0 +end + +function mw_site.stats.pagesInNamespace(ns) + return 0 +end + +function mw_site.stats.usersInGroup(filter) + return 0 +end + +function mw_site.interwikiMap(filter) + print("mw.site.interwikiMap called", filter) + return {} +end + +return mw_site diff --git a/wiktra/wikt/mw-text.lua b/wiktra/wikt/mw-text.lua new file mode 100644 index 0000000..8511656 --- /dev/null +++ b/wiktra/wikt/mw-text.lua @@ -0,0 +1,217 @@ +-- Simplified implementation of mw.text for running WikiMedia Scribunto code +-- under Python +-- +-- Copyright (c) 2020 Tatu Ylonen. See file LICENSE and https://ylonen.org + +local mw_text = { + -- decode (set from Python) + -- encode (set from Python) + -- gsplit (see below) + -- jsonDecode (see below, calls Python) + -- jsonEncode (see below, calls Python) + -- killMarkers (see below) + -- listToText (see below) + -- nowiki (see below) + -- split (see below) + -- tag (see below) + -- trim (see below) + -- truncate (see below) + -- unstripNoWiki (see below) + -- unstrip (see below) + JSON_PRESERVE_KEYS = 1, + JSON_TRY_FIXING = 2 -- we ignore this flag +} + +function mw_text.gsplit(text, pattern, plain) + local result = mw_text.split(text, pattern, plain) + local i = 0 + local n = table.getn(result) + return function() + i = i + 1 + if i <= n then return result[i] end + end +end + +function mw_text.jsonDecode(value, flags) + flags = flags or 0 + return mw_jsondecode_python(value, flags) +end + +function mw_text.jsonEncode(value, flags) + flags = flags or 0 + return mw_jsonencode_python(value, flags) +end + +function mw_text.decode(value, decodeNamedEntities) + return mw_decode_python(value, decodeNamedEntities) +end + +function mw_text.encode(value, charset) + if charset == nil then charset="\"<>& " end + return mw_encode_python(value, charset) +end + +function mw_text.killMarkers(s) + -- we have our magic characters, but I don't think they are visible to Lua + -- (except perhaps the nowiki magic) + print("mw.text.killMarkers called") + return s +end + +function mw_text.tag(name, attrs, content) + if type(name) == "table" then + attrs = name.attrs + content = name.content + name = name.name + end + local t = mw.html.create(name) + if attrs ~= nil then + for k, v in pairs(attrs) do + t:attr(k, v) + end + end + if content ~= nil and content ~= false then + t:wikitext(content) + end + return tostring(t) +end + +function mw_text.trim(s, charset) + charset = charset or "\r\n\t\f " + local ret = mw.ustring.gsub(s, "^[" .. charset .. "]*(.-)[" .. + charset .. "]*$", "%1") + return ret +end + +function mw_text.truncate(text, length, ellipsis, adjustLength) + if not length or length == 0 then + return text + end + if ellipsis == nil then + ellipsis = "…" + end + if #text <= length then + return text + end + if length >= 0 then + if adjustLength and ellipsis then + length = length - #ellipsis + end + text = mw.ustring.sub(text, 1, length) + if ellipsis then + text = text .. ellipsis + end + else + if adjustLength and ellipsis then + length = length + #ellipsis + end + text = mw.ustring.sub(text, #text + length + 1) + if ellipsis then + text = ellipsis .. text + end + end + return text +end + +function mw_text.unstripNoWiki(s) + print("mw.text.unstripNoWiki called") + -- We don't currently do anything here + return s +end + +function mw_text.unstrip(s) + return mw.text.killMarkers(mw.text.untripNoWiki(s)) +end + +function mw_text.listToText(list, separator, conjunction) + -- XXX default separators should be language-dependent + if separator == nil then separator = "," end + if conjunction == nil then conjunction = "and" end + if #list == 0 then return "" end + if #list == 1 then return list[1] end + if #list == 2 then return list[1] .. " " .. conjunction .. " " .. list[2] end + local lst = {} + for i = 1, #list - 2 do + table.insert(lst, list[i]) + table.insert(lst, separator) + table.insert(lst, " ") + end + table.insert(lst, list[#list - 1]) + table.insert(lst, " ") + table.insert(lst, conjunction) + table.insert(lst, " ") + table.insert(lst, list[#list]) + return table.concat(lst, "") +end + +function mw_text.split(text, pattern, plain) + local result = {} + local start = 1 + local length = mw.ustring.len(text) + while start <= length do + local ofs, last = mw.ustring.find(text, pattern, start, plain) + if ofs == nil then + break + elseif ofs > last then + -- empty match + table.insert(result, mw.ustring.sub(text, start, ofs)) + start = ofs + 1 + if start == length then + table.insert(result, mw.ustring.sub(text, start)) + end + if start >= length then + return result + end + elseif ofs == start then + table.insert(result, "") + start = last + 1 + else + table.insert(result, mw.ustring.sub(text, start, ofs - 1)) + start = last + 1 + end + end + table.insert(result, mw.ustring.sub(text, start)) + return result +end + +function mw_text.nowiki(s) + s = s:gsub("&", "&") + s = s:gsub('"', """) + s = s:gsub("'", "'") + s = s:gsub("<", "<") + s = s:gsub(">", ">") + s = s:gsub("=", "=") + s = s:gsub("%[", "[") + s = s:gsub("%]", "]") + s = s:gsub("{", "{") + s = s:gsub("}", "}") + s = s:gsub("|", "|") + s = s:gsub("^#", "#") + s = s:gsub("\n#", "\n#") + s = s:gsub("^:", ":") + s = s:gsub("\n:", "\n:") + s = s:gsub("^;", ";") + s = s:gsub("\n;", "\n;") + s = s:gsub("^ ", " ") + s = s:gsub("\n ", "\n ") + s = s:gsub("^\t", " ") + s = s:gsub("\n\t", "\n ") + s = s:gsub("\n\n", "\n ") + s = s:gsub("^%-%-%-%-", "−---") + s = s:gsub("\n%-%-%-%-", "\n−---") + s = s:gsub("^__", "__") + s = s:gsub("\n__", "\n__") + s = s:gsub("://", "://") + s = s:gsub("ISBN ", "ISBN ") + s = s:gsub("ISBN\t", "ISBN ") + s = s:gsub("ISBN\n", "ISBN ") + s = s:gsub("RFC ", "ISBN ") + s = s:gsub("RFC\t", "ISBN ") + s = s:gsub("RFC\n", "ISBN ") + s = s:gsub("PMID ", "ISBN ") + s = s:gsub("PMID\t", "ISBN ") + s = s:gsub("PMID\n", "ISBN ") + return s +end + +return mw_text diff --git a/wiktra/wikt/mw-title.lua b/wiktra/wikt/mw-title.lua new file mode 100644 index 0000000..71aa713 --- /dev/null +++ b/wiktra/wikt/mw-title.lua @@ -0,0 +1,283 @@ +-- Simplified implementation of mw.title for running WikiMedia Scribunto +-- code under Python +-- +-- Copyright (c) 2020-2021 Tatu Ylonen. See file LICENSE and https://ylonen.org + +local mw_title_meta = { +} + +function mw_title_meta:__index(key) + local v = rawget(mw_title_meta, key) + if v ~= nil then return v end + if key == "basePageTitle" then + return mw.title.new(self.baseText, self.nsText) + end + if key == "rootPageTitle" then + return mw.title.new(self.rootText, self.nsText) + end + if key == "subjectPageTitle" then + return mw.title.new(self.text, self.subjectNsText) + end + if key == "contentModel" then return "wikitext" end + if key == "talkPageTitle" then + local talk_ns = mw.site.namespaces[self.namespace].talk + if talk_ns == nil then return nil end + return mw.title.new(self.text, talk_ns.name) + end + if key == "protectionLevels" then return { nil } end + if key == "cascadingProtection" then + return { restrictions = {}, sources = {} } + end + if key == "canTalk" then return false end + if key == "redirectTarget" then + return mw.title.new(self._redirectTarget) + end + return nil +end + +function mw_title_meta.__eq(a, b) + return a.prefixedText == b.prefixedText +end + +function mw_title_meta.__lt(a, b) + return a.prefixedText < b.prefixedText +end + +function mw_title_meta:__tostring() + return self.prefixedText +end + +function mw_title_meta:isSubpageOf(titleobj2) + assert(type(titleobj2) == "table") + if self.nsText ~= titleobj2.nsText then return false end + local t1 = titleobj2.text + local t2 = self.text + if #t1 >= #t2 then + return false + end + if mw.ustring.sub(t2, 1, #t1) ~= t1 then + return false + end + if mw.ustring.sub(t2, #t1 + 1, #t1 + 1) ~= "/" then + return false + end + return true +end + +function mw_title_meta:inNamespace(ns) + assert(type(ns) == "string" or type(ns) == "number") + if type(ns) == "string" then + -- strip surrounding whitespaces + ns = ns:gsub("^%s(.-)%s*$", "%1") + end + local ns1 = mw.site.namespaces[self.namespace] + local ns2 = mw.site.namespaces[ns] + if ns2 == nil then + return false + end + if ns1.name == ns2.name then return true end + return false +end + +function mw_title_meta:inNamespaces(...) + for i, ns in ipairs({...}) do + if self:inNamespace(ns) then return true end + end + return false +end + +function mw_title_meta:hasSubjectNamespace(namespace) + local ns = mw.site.findNamespace(namespace) + return ns.name == self.subjectNsText +end + +function mw_title_meta:subPageTitle(text) + return mw.title.makeTitle(self.namespace, self.text .. "/" .. text) +end + +function mw_title_meta:partialUrl() + return mw.uri.encode(self.text, "WIKI") +end + +function mw_title_meta:fullUrl(query, proto) + local uri = mw.uri.fullUrl(self.fullText, query) + if proto ~= nil and proto ~= "" then uri = proto .. ":" .. uri end + return uri +end + +function mw_title_meta:localUrl(query) + return mw.uri.localUrl(self.fullText, query) +end + +function mw_title_meta:canonicalUrl(query) + return mw.uri.canonicalUrl(self.fullText, query) +end + +function mw_title_meta:getContent() + return mw_python_get_page_content(self.fullText) +end + +local mw_title = { + -- equals + -- compare + -- getCurrentTitle + -- new + -- makeTitle (see below) +} + +function mw_title.makeTitle(namespace, title, fragment, interwiki) + if title == nil or title == "" then return nil end + if title:find("%%[0-9a-fA-F][0-9a-fA-F]") then return nil end + if title:find("#") then return nil end + if title:find("<") then return nil end + if title:find(">") then return nil end + if title:find("%[") then return nil end + if title:find("%]") then return nil end + if title:find("|") then return nil end + if title:find("{") then return nil end + if title:find("}") then return nil end + if title:find("_") then return nil end + if title:sub(1, 1) == ":" then return nil end + if title == "." or title == ".." then return nil end + if title:sub(1, 2) == "./" or title:sub(1, 3) == "../" then return nil end + if title:find("/%./") or title:find("/%.%./") then return nil end + if title:sub(-2) == "/." or title:sub(-3) == "/.." then return nil end + if #title > 255 then return nil end + if title:sub(1, 1) == " " or title:sub(-1) == " " then return nil end + if title:find(" ") then return nil end + if title:find("~~~~") then return nil end + local prefixes = {"Talk:", "WP:", "WT:", "Project:", "Image:", + "Media:", "Special:"} + -- XXX other disallowed prefixes, see + -- https://www.mediawiki.org/wiki/Special:Interwiki + for i, prefix in ipairs(prefixes) do + if title:sub(1, #prefix) == prefix then return nil end + end + -- XXX there are also other disallowed titles, see + -- https://www.mediawiki.org/wiki/Manual:Page_title + if not namespace or namespace == "" then namespace = "Main" end + local ns = mw.site.findNamespace(namespace) + if not ns then + return nil + end + if interwiki then + error("XXX unimplemented: mw_title.makeTitle called with interwiki: " .. + interwiki) + end + -- XXX how should interwiki be handled? + -- w: (wikipedia) + -- m: (or meta:) for Meta-Wiki + -- mw: (MediaWiki) + -- wikt: (Wiktionary) + -- en: (English) + -- fr: (French language) + -- de: (German language) + -- and other language prefixes + -- :en: links to English wikipedia etc + -- interwiki prefixes are case-insensitive + local isContent = false + for i, v in pairs(mw.site.contentNamespaces) do + if mw.site.matchNamespaceName(v, namespace) then + isContent = true + break + end + end + local root = mw.ustring.gsub(title, "/.*$", "") + local parent = mw.ustring.gsub(title, "/[^/]*$", "") + local subpage = mw.ustring.gsub(title, "^.*/", "") + local fullName + if ns.name == "Main" then + fullName = title + else + fullName = ns.name .. ":" .. title + end + local withFrag + if fragment then + withFrag = fullName .. "#" .. fragment + else + withFrag = fullName + end + + -- mw_title.python_get_page_info is set in lua_set_fns + local dt = mw_python_get_page_info(ns.name .. ":" .. title) + local id = dt.id + local exists = dt.exists + local redirectTo = dt.redirectTo + + local t = { + namespace = ns.id, + id = id, + interwiki = interwiki or "", + fragment = fragment, + nsText = ns.name ~= "Main" and ns.name or "", + subjectNsText = (ns.subject or ns).name, + text = title, + prefixedText = ns.name .. ":" .. title, + fullText = withFrag, + rootText = root, + baseText = parent, + subpageText = subpage, + exists = exists, + -- XXX file: see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual + file = nil, + isContentPage = isContent, + isExternal = interwiki ~= nil, -- ??? + isLocal = interwiki == nil, -- ??? + isRedirect = redirectTo ~= nil, + isSpecialPage = ns.name == "Special", + isSubpage = title ~= base, + isTalkPage = (ns.name == "Talk" or + mw.ustring.find(ns.name, "_talk") ~= nil), + _redirectTarget = redirectTo, + } + setmetatable(t, mw_title_meta) + return t +end + +function mw_title.new(text, namespace) + if text == nil then return nil end + if type(text) == "number" then + error("XXX mw.title.new with id not yet implemented") + end + assert(type(text) == "string") + if not namespace then namespace = "Main" end + local idx = mw.ustring.find(text, ":") + if idx ~= nil then + local ns1 = mw.ustring.sub(text, 1, idx - 1) + local nsobj = mw.site.findNamespace(ns1) + if nsobj ~= nil then + namespace = ns1 + text = mw.ustring.sub(text, idx + 1) + end + end + return mw_title.makeTitle(namespace, text) +end + +function mw_title.getCurrentTitle() + local t = mw_title.new(_mw_pageTitle) + if t == nil then + print("mw.title.getCurrentTitle returns nil") + end + return t + -- local frame = mw.getCurrentFrame() + -- local parent = frame:getParent() or frame + -- local title = parent:getTitle() + -- local newtitle = mw_title.new(title, "Main") + -- return newtitle +end + +function mw_title.equals(a, b) + return a.fullText == b.fullText +end + +function mw_title.compare(a, b) + if a.interwiki < b.interwiki then return -1 end + if a.interwiki > b.interwiki then return 1 end + if a.nsText < b.nsText then return -1 end + if a.nsText > b.nsText then return 1 end + if a.text < b.text then return -1 end + if a.text > b.text then return 1 end + return 0 +end + +return mw_title diff --git a/wiktra/wikt/mw-uri.lua b/wiktra/wikt/mw-uri.lua new file mode 100644 index 0000000..d72479a --- /dev/null +++ b/wiktra/wikt/mw-uri.lua @@ -0,0 +1,323 @@ +-- Simplified implementation of mw.uri for running WikiMedia Scribunto code +-- under Python +-- +-- Copyright (c) 2020 Tatu Ylonen. See file LICENSE and https://ylonen.org + +local scribunto_mwuri = require("mw.uri") + +local DEFAULT_HOST = "wiki.local" + +local Uri = { + protocol = "https", + -- user + -- password + host = DEFAULT_HOST, + port = 80, + path = "/w/index.php", + query = {}, + fragment = "" + -- userInfo + -- hostPort + -- authority + -- queryString + -- relativePath + -- completeUrl (internal) +} + +function Uri:new(obj) + obj = obj or {} + setmetatable(obj, self) + self.__index = self + return obj +end + +function Uri:__tostring() + return self.completeUrl +end + +function Uri:update() + -- internal function for updating userInfo, hostPoret, authority, + -- queryString, relativePath, completeUrl after computing rest + local enc = function(s) return mw.uri.encode(s, "QUERY") end + local url = enc(self.protocol) .. "://" + if self.user then + local userinfo = enc(self.user) + if self.password then + userinfo = userinfo .. ":" .. enc(self.password) + end + self.userInfo = userinfo + else + self.userInfo = "" + end + local hostport = self.host + if self.port and self.port ~= 80 then + hostport = hostport .. ":" .. tostring(self.port) + end + self.hostPort = hostport + if self.userInfo ~= "" then + self.authority = self.userInfo .. "@" .. self.hostPort + else + self.authority = self.hostPort + end + url = url .. self.authority + local relpath = mw.uri.encode(self.path, "WIKI") + local qs = {} + local first = true + for k, v in pairs(self.query) do + if type(v) ~= "function" then + if v == false then + table.insert(qs, k) + else + table.insert(qs, enc(tostring(k)) .. "=" .. enc(tostring(v))) + end + end + end + table.sort(qs) + self.queryString = table.concat(qs, "&") + if self.queryString ~= "" then + relpath = relpath .. "?" .. self.queryString + end + if self.fragment and self.fragment ~= "" then + relpath = relpath .. "#" .. enc(self.fragment) + end + self.relativePath = relpath + url = url .. relpath + self.completeUrl = url +end + +function Uri:parse(s) + local ofs + if mw.ustring.match(s, "[a-z0-9]+:") then + ofs = mw.ustring.find(s, ":") + self.protocol = mw.ustring.sub(s, 1, ofs - 1) + s = mw.ustring.sub(s, ofs + 1) + end + if mw.ustring.sub(s, 1, 2) == "//" then + s = mw.ustring.sub(s, 3) + -- next is optional user@password, followed by mandatory host + if mw.ustring.match(s, "[^#?/@]+@.*") then + ofs = mw.ustring.find(s, "@") + local userpass = mw.ustring.sub(s, 1, ofs - 1) + s = mw.ustring.sub(s, ofs + 1) + ofs = mw.ustring.find(userpass, ":") + if ofs then + local user = mw.ustring.sub(userpass, 1, ofs - 1) + local pass = mw.ustring.sub(userpass, ofs + 1) + self.user = mw.uri.decode(user, "QUERY") + self.pass = mw.uri.decode(pass, "QUERY") + end + end + -- next is host + local host + ofs = mw.ustring.find(s, "/") + if ofs then + host = mw.ustring.sub(s, 1, ofs - 1) + s = mw.ustring.sub(s, ofs) -- initial / is part of path + self.host = mw.uri.decode(host, "QUERY") + else + -- there is no path, but there could be fragment or query string + ofs = mw.ustring.find(s, "#") + if ofs then + host = mw.ustring.sub(s, 1, ofs - 1) + s = mw.ustring.sub(s, ofs - 1) + self.host = mw.uri.decode(host, "QUERY") + else + ofs = mw.ustring.find(s, "?") + if ofs then + host = mw.ustring.sub(s, 1, ofs - 1) + s = mw.ustring.sub(s, ofs - 1) + self.host = mw.uri.decode(host, "QUERY") + else + self.host = mw.uri.decode(host, s) + s = "" + end + end + end + end + -- whatever remains is path, fragment and/or query string + local qs = "" + ofs = mw.ustring.find(s, "?") + if ofs then + -- have query string + local path = mw.ustring.sub(s, 1, ofs - 1) + s = mw.ustring.sub(s, ofs + 1) + self.path = mw.uri.decode(path, "PATH") + ofs = mw.ustring.find(s, "#") + if ofs then + -- have both query string and fragment + qs = mw.ustring.sub(s, ofs - 1) + s = mw.ustring.sub(s, ofs + 1) + else + -- no fragment after query string + qs = s + s = "" + end + else + -- no query string + ofs = mw.ustring.find(s, "#") + if ofs then + -- have fragment + local path = mw.ustring.sub(s, 1, ofs - 1) + self.path = mw.uri.decode(path, "PATH") + s = mw.ustring.sub(s, ofs + 1) + else + -- no fragment + self.path = mw.uri.decode(s, "PATH") + s = "" + end + end + + -- parse any trailing fragment + if s ~= "" then + if mw.ustring.sub(s, 1, 1) ~= "#" then + print("Uri:parse unexpected stuff at end:", s) + s = "" + else + local frag = mw.ustring.sub(s, 2) + self.fragment = mw.uri.decode(frag, "PATH") + end + end + + -- parse query string into a table + self.query = {} + if qs ~= "" then + for x in mw.ustring.gmatch(qs, "([^&]*)") do + ofs = mw.ustring.find(x, "=") + if ofs then + k = mw.ustring.sub(x, 1, ofs - 1) + v = mw.ustring.sub(x, 1, ofs + 1) + else + k = x + v = "" + end + self.query[k] = v + end + end + + -- Compute completeUrl and its components + self:update() +end + +function Uri:clone() + return mw.clone(self) +end + +function Uri:extend(query) + if query == nil then return end + if type(query) == "string" then + -- print("Uri:extend string query", query) + for k, v in mw.ustring.gmatch(query, "([^=&]+)(=([^&]*))?&?") do + if v == nil then v = "" end + self.query[k] = v + end + else + for k, v in pairs(query) do + if type(v) ~= "function" then + self.query[k] = v + end + end + end + self:update() +end + +local mw_uri = { + encode = scribunto_mwuri.encode, + decode = scribunto_mwuri.decode, + validate = scribunto_mwuri.validate +} + +function mw_uri.anchorEncode(s) + -- XXX how exactly should this work? + s = s:gsub(" ", "_") + return s +end + +function mw_uri.localUrl(page, query) + local fragment = page:gmatch("#(.*)$", "")() or "" + page = page:gsub("#.*$", "") + local uri = Uri:new{} + uri:extend({title=page}) + uri:extend(query) + local ret = uri.relativePath + if fragment ~= "" then ret = ret .. "#" .. fragment end + return ret +end + +function mw_uri.fullUrl(page, query) + local fragment = page:gmatch("#(.*)$", "")() or "" + page = page:gsub("#.*$", "") + local uri = Uri:new{} + uri:extend({title=page}) + uri:extend(query) + local ret = "//" .. uri.hostPort .. uri.relativePath + if fragment ~= "" then ret = ret .. "#" .. fragment end + return ret +end + +function mw_uri.canonicalUrl(page, query) + local fragment = page:gmatch("#(.*)$", "")() or "" + page = page:gsub("#.*$", "") + local uri = Uri:new{} + uri:parse("/wiki/" .. mw.uri.encode(page, "WIKI")) + uri:extend(query) + local ret = uri.completeUrl + if fragment ~= "" then ret = ret .. "#" .. fragment end + return ret +end + +function mw_uri.new(s) + local url = Uri:new{} + if type(s) == "string" then + url:parse(s) + elseif type(s) == "table" then + url.protocol = s.protocol + url.user = s.user + url.password = s.password + url.host = s.host + url.port = s.port + url.path = s.path + url.query = mw.clone(s.query) + url.fragment = s.fragment + end +end + +function mw_uri.buildQueryString(args) + local parts = {} + for k, v in pairs(args) do + if type(v) ~= "function" then + local x = k .. "=" .. mw.uri.encode(tostring(v), "QUERY") + table.insert(parts, x) + end + end + table.sort(parts) + return table.concat(parts, "&") +end + +function mw_uri.parseQueryString(s, i, j) + if i == nil then i = 1 end + if i < 0 then i = #s + i end + if j == nil then j = #s - i + 1 end + s = "&" .. mw.ustring.sub(s, i, j) .. "&" + args = {} + for k in mw.ustring.gmatch(s, "&([^&]+)") do + local ofs = mw.ustring.find(k, "=") + if ofs == nil then + v = false + else + v = mw.ustring.sub(k, ofs + 1) + k = mw.ustring.sub(k, 1, ofs - 1) + v = mw.uri.decode(v) + end + if args[k] ~= nil then + local lst = args[k] + if type(lst) ~= "table" then lst = {lst} end + table.insert(lst, v) + args[k] = lst + else + args[k] = v + end + end + return args +end + +return mw_uri diff --git a/wiktra/wikt/mw.lua b/wiktra/wikt/mw.lua new file mode 100644 index 0000000..22ee876 --- /dev/null +++ b/wiktra/wikt/mw.lua @@ -0,0 +1,133 @@ +-- Simplified implementation of mw for running WikiMedia Scribunto code +-- under Python +-- +-- Copyright (c) 2020-2021 Tatu Ylonen. See file LICENSE and https://ylonen.org + +local mw_autoload = { + hash = "mw-hash", + html = "mw-html", + language = "mw-language", + site = "mw-site", + text = "mw-text", + title = "mw-title", + uri = "mw-uri", + ustring = "ustring.ustring", + getContentLanguage = function(table) + return table.language.getContentLanguage + end, + getLanguage = function(table) + return table.language.getContentLanguage + end +} + +local mw_meta = {} + +mw = { + -- addWarning (see below) + -- allToString (see below) + -- clone (see below) + -- dumpObject (see below) + -- getCurrentFrame -- assigned in lua_invoke for each call + -- hash - autoloaded + -- html - autoloaded + -- incrementExpensiveFunctionCount (see below) + -- isSubsting (see below) + -- language - autoloaded + -- loadData (see below) + -- log (see below) + -- logObject (see below) + -- XXX message.* + -- site - autoloaded + -- text - autoloaded + -- title - autoloaded + -- uri - autoloaded + -- ustring - autoloaded +} +setmetatable(mw, mw_meta) + +function mw_meta.__index(table, key) + local modname = mw_autoload[key] + if modname == nil then return nil end + local ret + if type(modname) == "string" then + ret = require(modname) + elseif type(modname) == "function" then + ret = modname(table) + else + error("mw_meta.__index had modname", modname) + end + table[key] = ret + return ret +end + +function mw.addWarning(text) + print("mw.addWarning", text) +end + +function mw.allToString(...) + local ret = "" + for k, v in pairs(...) do + ret = ret .. tostring(v) + end + return ret +end + +local function _mw_deepcopy(obj, visited) + -- non-table objects can be returned as-is + if type(obj) ~= "table" then return obj end + -- handle cyclic data structures + if visited[obj] ~= nil then return visited[obj] end + -- Create new table + local new_table = {} + -- track that we have visited this node and save the copy + visited[obj] = new_table + -- clear metatable during the copy, as it could interfere + local old_meta = getmetatable(obj) + setmetatable(obj, nil) + -- Copy fields of the object + for k, v in pairs(obj) do + new_table[_mw_deepcopy(k, visited)] = _mw_deepcopy(v, visited) + end + -- copy metatable pointer for copy + setmetatable(obj, old_meta) + setmetatable(new_table, old_meta) + return new_table +end + +function mw.clone(v) + local ret = _mw_deepcopy(v, {}) + -- print("mw_clone: " .. tostring(ret)) + return ret +end + +function mw.dumpObject(obj) + print("mw.dumpObject", obj) +end + +function mw.incrementExpensiveFunctionCount() + print("mw.incrementExpensiveFunctionCount") +end + +function mw.isSubsting() + return false +end + +-- mw.loadData function - loads a data file. This is same as require(), +-- which already implements caching. +function mw.loadData(modname) + return require(modname) +end + +function mw.log(...) + -- print("mw.log", ...) +end + +function mw.logObject(obj) + -- print("mw.logObject", obj) +end + +function mw.getCurrentFrame() + return _mw_frame +end + +return mw diff --git a/wikt/mw.message.lua b/wiktra/wikt/mw/message.lua old mode 100755 new mode 100644 similarity index 100% rename from wikt/mw.message.lua rename to wiktra/wikt/mw/message.lua diff --git a/wikt/mw.uri.lua b/wiktra/wikt/mw/uri.lua similarity index 100% rename from wikt/mw.uri.lua rename to wiktra/wikt/mw/uri.lua diff --git a/wikt/mw.ustring.lua b/wiktra/wikt/mw/ustring.lua old mode 100755 new mode 100644 similarity index 100% rename from wikt/mw.ustring.lua rename to wiktra/wikt/mw/ustring.lua diff --git a/wikt/mwInit.lua b/wiktra/wikt/mwInit.lua old mode 100755 new mode 100644 similarity index 100% rename from wikt/mwInit.lua rename to wiktra/wikt/mwInit.lua diff --git a/wikt/package.lua b/wiktra/wikt/package.luax old mode 100755 new mode 100644 similarity index 100% rename from wikt/package.lua rename to wiktra/wikt/package.luax diff --git a/wikt/table.lua b/wiktra/wikt/table.lua similarity index 100% rename from wikt/table.lua rename to wiktra/wikt/table.lua diff --git a/wiktra/wikt/translit/Aghb-translit.lua b/wiktra/wikt/translit/Aghb-translit.lua new file mode 100644 index 0000000..943491b --- /dev/null +++ b/wiktra/wikt/translit/Aghb-translit.lua @@ -0,0 +1,12 @@ +local export = {} + +local tt = {["𐔰"] = "a", ["𐔱"] = "b", ["𐔲"] = "g", ["𐔳"] = "d", ["𐔴"] = "e", ["𐔵"] = "z", ["𐔶"] = "ē", ["𐔷"] = "ž", ["𐔸"] = "t", ["𐔹"] = "ć̣", ["𐔺"] = "y", ["𐔻"] = "ź", ["𐔼"] = "i", ["𐔽"] = "ʕ", ["𐔾"] = "l", ["𐔿"] = "n'", ["𐕀"] = "x", ["𐕁"] = "d'", ["𐕂"] = "c̣", ["𐕃"] = "ʒ́", ["𐕄"] = "ḳ", ["𐕅"] = "l'", ["𐕆"] = "h", ["𐕇"] = "x̣", ["𐕈"] = "å", ["𐕉"] = "ć", ["𐕊"] = "č̣", ["𐕋"] = "c'", ["𐕌"] = "m", ["𐕍"] = "q̇", ["𐕎"] = "n", ["𐕏"] = "ʒˈ", ["𐕐"] = "š", ["𐕑"] = "ǯ", ["𐕒"] = "o", ["𐕓"] = "ṭ'", ["𐕔"] = "f", ["𐕕"] = "ʒ", ["𐕖"] = "č", ["𐕗"] = "ṗ", ["𐕘"] = "ġ", ["𐕙"] = "r", ["𐕚"] = "s", ["𐕛"] = "v", ["𐕜"] = "ṭ", ["𐕝"] = "ś", ["𐕞"] = "ü", ["𐕟"] = "c̣'", ["𐕠"] = "c", ["𐕡"] = "w", ["𐕢"] = "p", ["𐕣"] = "k", ["𐕯"] = "»"}; + +function export.tr(text) + text = mw.ustring.gsub(text, "𐕒𐕡", "u") + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Armi-translit.lua b/wiktra/wikt/translit/Armi-translit.lua new file mode 100644 index 0000000..3f1859f --- /dev/null +++ b/wiktra/wikt/translit/Armi-translit.lua @@ -0,0 +1,47 @@ +local export = {} + +local tt = { + ["𐡀"] = "ʾ", -- aleph + ["𐡁"] = "b", -- beth + ["𐡂"] = "g", -- gimel + ["𐡃"] = "d", -- daleth + ["𐡄"] = "h", -- he + ["𐡅"] = "w", -- waw + ["𐡆"] = "z", -- zayin + ["𐡇"] = "ḥ", -- heth + ["𐡈"] = "ṭ", -- teth + ["𐡉"] = "y", -- yodh + ["𐡊"] = "k", -- khaph + ["𐡋"] = "l", -- lamedh + ["𐡌"] = "m", -- mem + ["𐡍"] = "n", -- nun + ["𐡎"] = "s", -- samekh + ["𐡏"] = "ʿ", -- ayin + ["𐡐"] = "p", -- pe + ["𐡑"] = "ṣ", -- sadhe + ["𐡒"] = "q", -- qoph + ["𐡓"] = "r", -- resh + ["𐡔"] = "š", -- shin + ["𐡕"] = "t", -- taw + ["𐡗"] = "|", -- section sign + ["𐡘"] = "1", -- one + ["𐡙"] = "2", -- two + ["𐡚"] = "3", -- three + ["𐡛"] = "10", -- ten + ["𐡜"] = "20", -- twenty + ["𐡝"] = "100", -- one hundred + ["𐡞"] = "1000", -- one thousand + ["𐡟"] = "10000" -- ten thousand +} + +function export.tr(text, lang, sc) + + if sc ~= "Armi" then return end + + -- Transliterate characters + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Beng-Deva-translit.lua b/wiktra/wikt/translit/Beng-Deva-translit.lua new file mode 100644 index 0000000..123dd4c --- /dev/null +++ b/wiktra/wikt/translit/Beng-Deva-translit.lua @@ -0,0 +1,105 @@ +local export = {} +local U = mw.ustring.char +local gsub = mw.ustring.gsub +local match = mw.ustring.match +local sub = mw.ustring.sub + +local conv = { + ["ক"] = "क", + ["খ"] = "ख", + ["গ"] = "ग", + ["ঘ"] = "घ", + ["ঙ"] = "ङ", + ["চ"] = "च", + ["ছ"] = "छ", + ["জ"] = "ज", + ["ঝ"] = "झ", + ["ঞ"] = "ञ", + ["ট"] = "ट", + ["ঠ"] = "ठ", + ["ড"] = "ड", + ["ঢ"] = "ढ", + ["ণ"] = "ण", + ["ত"] = "त", + ["থ"] = "थ", + ["দ"] = "द", + ["ধ"] = "ध", + ["ন"] = "न", + ["প"] = "प", + ["ফ"] = "फ", + ["ব"] = "ब", + ["ভ"] = "भ", + ["ম"] = "म", + ["য"] = "य", + ["র"] = "र", + ["ল"] = "ल", + ["শ"] = "श", + ["ষ"] = "ष", + ["স"] = "स", + ["হ"] = "ह", + + ["া"] = "ा", + ["ি"] = "ि", + ["ী"] = "ी", + ["ু"] = "ु", + ["ূ"] = "ू", + ["ৃ"] = "ृ", + ["ৄ"] = "ॄ", + ["ৢ"] = "ॢ", + ["ৣ"] = "ॣ", + ["ে"] = "े", + ["ৈ"] = "ै", + ["ো"] = "ो", + ["ৌ"] = "ौ", + ["্"] = "्", + ["়"] = "़", + + -- vowels + ["অ"] = "अ", + ["আ"] = "आ", + ["ই"] = "इ", + ["ঈ"] = "ई", + ["উ"] = "उ", + ["ঊ"] = "ऊ", + ["ঋ"] = "ऋ", + ["ৠ"] = "ॠ", + ["ঌ"] = "ऌ", + ["ৡ"] = "ॡ", + ["এ"] = "ए", + ["ঐ"] = "ऐ", + ["ও"] = "ओ", + ["ঔ"] = "औ", + -- chandrabindu + ["ঁ"] = "ँ", + -- anusvara + ["ং"] = "ं", + -- visarga + ["ঃ"] = "ः", + -- avagraha + ["ঽ"] = "ऽ", + -- punctuation + ["॥"] = "॥", + ["।"] = "।", + ["ওঁ"] = "ॐ", + -- Vedic extensions + ["ᳵ"] = "ᳵ", + ["ᳶ"] = "ᳶ", + + ["০"] = "०", + ["১"] = "१", + ["২"] = "२", + ["৩"] = "३", + ["৪"] = "४", + ["৫"] = "५", + ["৬"] = "६", + ["৭"] = "७", + ["৮"] = "८", + ["৯"] = "९" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".", function(c) return conv[c] end) + return text +end + +return export diff --git a/wiktra/wikt/translit/Chrs-translit.lua b/wiktra/wikt/translit/Chrs-translit.lua new file mode 100644 index 0000000..754b0a7 --- /dev/null +++ b/wiktra/wikt/translit/Chrs-translit.lua @@ -0,0 +1,44 @@ +local export = {} + +local tt = { + ["𐾰"] = "ʾ", -- aleph + ["𐾱"] = "ʾ", -- small aleph + ["𐾲"] = "β", -- beth + ["𐾳"] = "ɣ", -- gimel + ["𐾴"] = "d", -- daleth + ["𐾵"] = "h", -- he + ["𐾶"] = "w", -- waw + ["𐾷"] = "w", -- curled waw + ["𐾸"] = "z", -- zayin + ["𐾹"] = "x", -- heth + ["𐾺"] = "y", -- yodh + ["𐾻"] = "k", -- kaph + ["𐾼"] = "δ", -- lamedh + ["𐾽"] = "m", -- mem + ["𐾾"] = "n", -- nun + ["𐾿"] = "s", -- samekh + ["𐿀"] = "ʿ", -- ayin + ["𐿁"] = "p", -- pe + ["𐿂"] = "r", -- resh + ["𐿃"] = "š", -- shin + ["𐿄"] = "t", -- taw + ["𐿅"] = "1", -- one + ["𐿆"] = "2", -- two + ["𐿇"] = "3", -- three + ["𐿈"] = "4", -- four + ["𐿉"] = "10", -- ten + ["𐿊"] = "20", -- twenty + ["𐿋"] = "100" -- one hundred +} + +function export.tr(text, lang, sc) + -- If the script is not Chrs, do not transliterate + if sc ~= "Chrs" then return end + + -- Transliterate characters + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Deva-Beng-translit.lua b/wiktra/wikt/translit/Deva-Beng-translit.lua new file mode 100644 index 0000000..873b6cf --- /dev/null +++ b/wiktra/wikt/translit/Deva-Beng-translit.lua @@ -0,0 +1,104 @@ +local export = {} +local U = mw.ustring.char +local gsub = mw.ustring.gsub +local match = mw.ustring.match +local sub = mw.ustring.sub + +local conv = { + ["क"] = "ক", + ["ख"] = "খ", + ["ग"] = "গ", + ["घ"] = "ঘ", + ["ङ"] = "ঙ", + ["च"] = "চ", + ["छ"] = "ছ", + ["ज"] = "জ", + ["झ"] = "ঝ", + ["ञ"] = "ঞ", + ["ट"] = "ট", + ["ठ"] = "ঠ", + ["ड"] = "ড", + ["ढ"] = "ঢ", + ["ण"] = "ণ", + ["त"] = "ত", + ["थ"] = "থ", + ["द"] = "দ", + ["ध"] = "ধ", + ["न"] = "ন", + ["प"] = "প", + ["फ"] = "ফ", + ["ब"] = "ব", + ["भ"] = "ভ", + ["म"] = "ম", + ["य"] = "য", + ["र"] = "র", + ["ल"] = "ল", + ["व"] = "ব", -- ["ळ"] = "ळ", + ["श"] = "শ", + ["ष"] = "ষ", + ["स"] = "স", + ["ह"] = "হ", + + ["अ"] = "অ", + ["आ"] = "আ", + ["इ"] = "ই", + ["ई"] = "ঈ", + ["उ"] = "উ", + ["ऊ"] = "ঊ", + ["ऋ"] = "ঋ", + ["ॠ"] = "ৠ", + ["ऌ"] = "ঌ", + ["ॡ"] = "ৡ", + ["ए"] = "এ", + ["ऐ"] = "ঐ", + ["ओ"] = "ও", + ["औ"] = "ঔ", + + ["ा"] = "া", + ["ि"] = "ি", + ["ी"] = "ী", + ["ु"] = "ু", + ["ू"] = "ূ", + ["ृ"] = "ৃ", + ["ॄ"] = "ৄ", + ["ॢ"] = "ৢ", + ["ॣ"] = "ৣ", + ["े"] = "ে", + ["ै"] = "ৈ", + ["ो"] = "ো", + ["ौ"] = "ৌ", + ["्"] = "্", + + ["ँ"] = "ঁ", + ["ं"] = "ং", + ["ः"] = "ঃ", + ["ऽ"] = "ঽ", + + ["०"] = "০", + ["१"] = "১", + ["२"] = "২", + ["३"] = "৩", + ["४"] = "৪", + ["५"] = "৫", + ["६"] = "৬", + ["७"] = "৭", + ["८"] = "৮", + ["९"] = "৯", + + ["॥"] = "॥", + ["।"] = "।", + + -- ["ᳵ"] = "ᳵ", ["ᳶ"] = "ᳶ", + + ["ॐ"] = "ওঁ", + + ["*"] = "*" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".", function(c) return conv[c] end) + -- text = mw.ustring.gsub(text, '𑑄$', "𑑈") + return text +end + +return export diff --git a/wiktra/wikt/translit/Deva-Kthi-translit.lua b/wiktra/wikt/translit/Deva-Kthi-translit.lua new file mode 100644 index 0000000..76a04db --- /dev/null +++ b/wiktra/wikt/translit/Deva-Kthi-translit.lua @@ -0,0 +1,79 @@ +local export = {} + +local conv = { + ["क"] = "𑂍", + ["ख"] = "𑂎", + ["ग"] = "𑂏", + ["घ"] = "𑂐", + ["ङ"] = "𑂑", + ["च"] = "𑂒", + ["छ"] = "𑂓", + ["ज"] = "𑂔", + ["झ"] = "𑂕", + ["ञ"] = "𑂖", + ["ट"] = "𑂗", + ["ठ"] = "𑂘", + ["ड"] = "𑂙", + ["ढ"] = "𑂛", + ["ण"] = "𑂝", + ["त"] = "𑂞", + ["थ"] = "𑂟", + ["द"] = "𑂠", + ["ध"] = "𑂡", + ["न"] = "𑂢", + ["प"] = "𑂣", + ["फ"] = "𑂤", + ["ब"] = "𑂥", + ["भ"] = "𑂦", + ["म"] = "𑂧", + ["य"] = "𑂨", + ["र"] = "𑂩", + ["ल"] = "𑂪", + ["व"] = "𑂫", + ["श"] = "𑂬", + ["ष"] = "𑂭", + ["स"] = "𑂮", + ["ह"] = "𑂯", + + ["ा"] = "𑂰", + ["ि"] = "𑂱", + ["ी"] = "𑂲", + ["ु"] = "𑂳", + ["ू"] = "𑂴", + ["े"] = "𑂵", + ["ै"] = "𑂶", + ["ो"] = "𑂷", + ["ौ"] = "𑂸", + ["्"] = "𑂹", + ["़"] = "𑂺", + + -- vowels + ["अ"] = "𑂃", + ["आ"] = "𑂄", + ["इ"] = "𑂅", + ["ई"] = "𑂆", + ["उ"] = "𑂇", + ["ऊ"] = "𑂈", + ["ऋ"] = "𑂩𑂲", + ["ए"] = "𑂉", + ["ऐ"] = "𑂊", + ["ओ"] = "𑂋", + ["औ"] = "𑂌", + -- chandrabindu + ["ँ"] = "𑂀", + -- anusvara + ["ं"] = "𑂁", + -- visarga + ["ः"] = "𑂂", + -- punctuation + ["॥"] = "𑃁", + ["।"] = "𑃀" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".", function(c) return conv[c] end) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Deva-Limb-translit.lua b/wiktra/wikt/translit/Deva-Limb-translit.lua new file mode 100644 index 0000000..3d50b3f --- /dev/null +++ b/wiktra/wikt/translit/Deva-Limb-translit.lua @@ -0,0 +1,136 @@ +local export = {} +local U = mw.ustring.char +local gsub = mw.ustring.gsub +local match = mw.ustring.match +local sub = mw.ustring.sub +local conv = { + ["क"] = "ᤁ", + ["ख"] = "ᤂ", + ["ग"] = "ᤃ", + ["घ"] = "ᤄ", + ["ङ"] = "ᤅ", + ["च"] = "ᤆ", + ["छ"] = "ᤇ", + ["ज"] = "ᤈ", + ["झ"] = "ᤋ", + ["ञ"] = "ᤊ", + ["ट"] = "ᤋ", + ["ठ"] = "ᤌ", + ["ड"] = "ᤍ", + ["ढ"] = "ᤎ", + ["ण"] = "ᤏ", + ["त"] = "ᤋ", + ["थ"] = "ᤌ", + ["द"] = "ᤍ", + ["ध"] = "ᤎ", + ["न"] = "ᤏ", + ["प"] = "ᤐ", + ["फ"] = "ᤑ", + ["ब"] = "ᤒ", + ["भ"] = "ᤓ", + ["म"] = "ᤔ", + ["य"] = "ᤕ", + ["र"] = "ᤖ", + ["ल"] = "ᤗ", + ["व"] = "ᤘ", + ["ळ"] = "ᤗ", + ["श"] = "ᤙ", + ["ष"] = "ᤚ", + ["स"] = "ᤛ", + ["ह"] = "ᤜ", + ["ॽ"] = "᤹", + + ["ा"] = "ᤠ", + ["ि"] = "ᤡ", + ["ी"] = "ᤡ᤺", + ["ु"] = "ᤢ", + ["ू"] = "ᤢ᤺", + ["ृ"] = "ᤪᤡ", + ["ॄ"] = "ᤪᤡ᤺", + ["ॢ"] = "ᤡ", + ["ॣ"] = "ᤡ᤺", + ["े"] = "ᤣ", + ["ै"] = "ᤤ", + ["ो"] = "ᤥ", + ["ौ"] = "ᤦ", + ["ॅ"] = "ᤧ", + ["ॉ"] = "ᤨ", + ["ॆ"] = "ᤧ", + ["ॊ"] = "ᤨ", + + -- vowels + ["अ"] = "ᤀ", + ["आ"] = "ᤀᤠ", + ["इ"] = "ᤀᤡ", + ["ई"] = "ᤀᤡ᤺", + ["उ"] = "ᤀᤢ", + ["ऊ"] = "ᤀᤢ᤺", + ["ऋ"] = "ᤖᤡ", + ["ॠ"] = "ᤖᤡ᤺", + ["ऌ"] = "ᤗᤪᤡ", + ["ॡ"] = "ᤗᤪᤡ᤺", + ["ए"] = "ᤀᤣ", + ["ऐ"] = "ᤀᤤ", + ["ओ"] = "ᤀᤥ", + ["औ"] = "ᤀᤦ", + ["ऍ"] = "ᤀᤧ", + ["ऑ"] = "ᤀᤨ", + ["ऎ"] = "ᤀᤧ", + ["ऒ"] = "ᤀᤨ", + -- chandrabindu + ["ँ"] = "ᤲ", + -- anusvara + ["ं"] = "ᤲ", + -- visarga + ["ः"] = "᤺", + -- avagraha + ["ऽ"] = "", + -- punctuation + ["॰"] = ".", + ["॥"] = "॥", + ["।"] = "॥", + ["ॐ"] = "ᤀᤥᤶ", + ["!"] = "᥄", + ["?"] = "᥅", + + -- Vedic extensions + ["ᳵ"] = "", + ["ᳶ"] = "", + + ["०"] = "᥆", + ["१"] = "᥇", + ["२"] = "᥈", + ["३"] = "᥉", + ["४"] = "᥊", + ["५"] = "᥋", + ["६"] = "᥌", + ["७"] = "᥍", + ["८"] = "᥎", + ["९"] = "᥏" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".", function(c) return conv[c] end) + text = gsub(text, "([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])([᤺]?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)(़?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)([᤺]?)ᤁ्", "%1%2%3%4%5%6ᤰ") + text = gsub(text, "([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])([᤺]?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)(़?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)([᤺]?)ᤅ्", "%1%2%3%4%5%6ᤱ") + text = gsub(text, "([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])([᤺]?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)(़?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)([᤺]?)ᤋ्", "%1%2%3%4%5%6ᤳ") + text = gsub(text, "([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])([᤺]?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)(़?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)([᤺]?)ᤏ्", "%1%2%3%4%5%6ᤴ") + text = gsub(text, "([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])([᤺]?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)(़?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)([᤺]?)ᤐ्", "%1%2%3%4%5%6ᤵ") + text = gsub(text, "([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])([᤺]?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)(़?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)([᤺]?)ᤔ्", "%1%2%3%4%5%6ᤶ") + text = gsub(text, "([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])([᤺]?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)(़?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)([᤺]?)ᤖ्", "%1%2%3%4%5%6ᤷ") + text = gsub(text, "([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])([᤺]?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)(़?)([ᤠ ᤡ ᤢ ᤣ ᤤ ᤥ ᤦ ᤧ ᤨ ᤩ ᤪ]?)([᤺]?)ᤗ्", "%1%2%3%4%5%6ᤸ") + text = gsub(text, "्ᤖ", "ᤪ") + text = gsub(text, "्ᤕ", "ᤩ") + text = gsub(text, "्ᤘ", "ᤫ") + text = gsub(text, "ज्ञ", "ᤝ") + text = gsub(text, "त्र", "ᤞ") + text = mw.ustring.gsub(text, "ᤀᤣ़", "ᤀᤧ") + text = mw.ustring.gsub(text, "ᤀᤥ़", "ᤀᤨ") + text = mw.ustring.gsub(text, "ᤣ़", "ᤧ") + text = mw.ustring.gsub(text, "ᤥ़", "ᤨ") + text = mw.ustring.gsub(text, "़", "") + text = mw.ustring.gsub(text, "्", "") + return text +end + +return export diff --git a/wiktra/wikt/translit/Deva-Newa-translit.lua b/wiktra/wikt/translit/Deva-Newa-translit.lua new file mode 100644 index 0000000..15a2364 --- /dev/null +++ b/wiktra/wikt/translit/Deva-Newa-translit.lua @@ -0,0 +1,115 @@ +local export = {} +local U = mw.ustring.char +local gsub = mw.ustring.gsub +local match = mw.ustring.match +local sub = mw.ustring.sub + +local conv = { + ["क"] = "𑐎", + ["ख"] = "𑐏", + ["ग"] = "𑐐", + ["घ"] = "𑐑", + ["ङ"] = "𑐒", + ["च"] = "𑐔", + ["छ"] = "𑐕", + ["ज"] = "𑐖", + ["झ"] = "𑐗", + ["ञ"] = "𑐘", + ["ट"] = "𑐚", + ["ठ"] = "𑐛", + ["ड"] = "𑐜", + ["ढ"] = "𑐝", + ["ण"] = "𑐞", + ["त"] = "𑐟", + ["थ"] = "𑐠", + ["द"] = "𑐡", + ["ध"] = "𑐢", + ["न"] = "𑐣", + ["प"] = "𑐥", + ["फ"] = "𑐦", + ["ब"] = "𑐧", + ["भ"] = "𑐨", + ["म"] = "𑐩", + ["य"] = "𑐫", + ["र"] = "𑐬", + ["ल"] = "𑐮", + ["व"] = "𑐰", + ["ळ"] = "𑐮", + ["श"] = "𑐱", + ["ष"] = "𑐲", + ["स"] = "𑐳", + ["ह"] = "𑐴", + + ["ा"] = "𑐵", + ["ि"] = "𑐶", + ["ी"] = "𑐷", + ["ु"] = "𑐸", + ["ू"] = "𑐹", + ["ृ"] = "𑐺", + ["ॄ"] = "𑐻", + ["ॢ"] = "𑐼", + ["ॣ"] = "𑐽", + ["े"] = "𑐾", + ["ै"] = "𑐿", + ["ो"] = "𑑀", + ["ौ"] = "𑑁", + ["्"] = "𑑂", + ["़"] = "𑑆", + + -- vowels + ["अ"] = "𑐀", + ["आ"] = "𑐁", + ["इ"] = "𑐂", + ["ई"] = "𑐃", + ["उ"] = "𑐄", + ["ऊ"] = "𑐅", + ["ऋ"] = "𑐆", + ["ॠ"] = "𑐇", + ["ऌ"] = "𑐈", + ["ॡ"] = "𑐉", + ["ए"] = "𑐊", + ["ऐ"] = "𑐋", + ["ओ"] = "𑐌", + ["औ"] = "𑐍", + -- chandrabindu + ["ँ"] = "𑑃", + -- anusvara + ["ं"] = "𑑄", + -- visarga + ["ः"] = "𑑅", + -- avagraha + ["ऽ"] = "𑑇", + -- punctuation + ["॰"] = "𑑏", + ["॥"] = "𑑌", + ["।"] = "𑑋", + ["ॐ"] = "𑑉", + -- Vedic extensions + ["ᳵ"] = "𑑠", + ["ᳶ"] = "𑑡", + + ["०"] = "𑑐", + ["१"] = "𑑑", + ["२"] = "𑑒", + ["३"] = "𑑓", + ["४"] = "𑑔", + ["५"] = "𑑕", + ["६"] = "𑑖", + ["७"] = "𑑗", + ["८"] = "𑑘", + ["९"] = "𑑙" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".", function(c) return conv[c] end) + text = mw.ustring.gsub(text, "𑐣𑑂𑐴", "𑐤") + text = mw.ustring.gsub(text, "𑐒𑑂𑐴", "𑐓") + text = mw.ustring.gsub(text, "𑐬𑑂𑐴", "𑐭") + text = mw.ustring.gsub(text, "𑐮𑑂𑐴", "𑐯") + text = mw.ustring.gsub(text, "𑐩𑑂𑐴", "𑐪") + text = mw.ustring.gsub(text, "𑐘𑑂𑐴", "𑐙") + -- text = mw.ustring.gsub(text, '𑑄$', "𑑈") + return text +end + +return export diff --git a/wiktra/wikt/translit/Deva-Sind-translit.lua b/wiktra/wikt/translit/Deva-Sind-translit.lua new file mode 100644 index 0000000..9fb78f7 --- /dev/null +++ b/wiktra/wikt/translit/Deva-Sind-translit.lua @@ -0,0 +1,82 @@ +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + ["क"] = "𑊺", + ["ख"] = "𑊻", + ["ग"] = "𑊼", + ["ॻ"] = "𑊽", + ["घ"] = "𑊾", + ["ङ"] = "𑊿", + ["च"] = "𑋀", + ["छ"] = "𑋁", + ["ज"] = "𑋂", + ["ॼ"] = "𑋄", + ["झ"] = "𑋄", + ["ञ"] = "𑋅", + ["ट"] = "𑋆", + ["ठ"] = "𑋇", + ["ड"] = "𑋈", + ["ॾ"] = "𑋉", + ["ढ"] = "𑋋", + ["ण"] = "𑋌", + ["त"] = "𑋍", + ["थ"] = "𑋎", + ["द"] = "𑋏", + ["ध"] = "𑋐", + ["न"] = "𑋑", + ["प"] = "𑋒", + ["फ"] = "𑋓", + ["ब"] = "𑋔", + ["ॿ"] = "𑋕", + ["भ"] = "𑋖", + ["म"] = "𑋗", + ["य"] = "𑋘", + ["र"] = "𑋙", + ["ल"] = "𑋚", + ["व"] = "𑋛", + ["श"] = "𑋜", + ["स"] = "𑋝", + ["ह"] = "𑋞", + ["ड़"] = "𑋊", + ["ख़"] = "𑊻𑋩", + ["ग़"] = "𑊼𑋩", + ["क़"] = "𑊺𑋩", + ["ज"] = "𑋂𑋩", + + ["ा"] = "𑋠", + ["ि"] = "𑋡", + ["ी"] = "𑋢", + ["ु"] = "𑋣", + ["ू"] = "𑋤", + ["े"] = "𑋥", + ["ै"] = "𑋦", + ["ो"] = "𑋧", + ["ौ"] = "𑋨", + ["्"] = "𑋪", + ["़"] = "𑋩", + ["𑋈𑋩"] = "𑋊", + + -- vowels + ["अ"] = "𑊰", + ["आ"] = "𑊱", + ["इ"] = "𑊲", + ["ई"] = "𑊳", + ["उ"] = "𑊴", + ["ऊ"] = "𑊵", + ["ए"] = "𑊶", + ["ऐ"] = "𑊷", + ["ओ"] = "𑊸", + ["औ"] = "𑊹", + + -- anusvara + ["ं"] = "𑋟" +} +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".़?", function(c) return conv[c] end) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Deva-Tirh-translit.lua b/wiktra/wikt/translit/Deva-Tirh-translit.lua new file mode 100644 index 0000000..2dfa9d9 --- /dev/null +++ b/wiktra/wikt/translit/Deva-Tirh-translit.lua @@ -0,0 +1,91 @@ +local export = {} + +local conv = { + ["क"] = "𑒏", + ["ख"] = "𑒐", + ["ग"] = "𑒑", + ["घ"] = "𑒒", + ["ङ"] = "𑒓", + ["च"] = "𑒔", + ["छ"] = "𑒕", + ["ज"] = "𑒖", + ["झ"] = "𑒗", + ["ञ"] = "𑒘", + ["ट"] = "𑒙", + ["ठ"] = "𑒚", + ["ड"] = "𑒛", + ["ढ"] = "𑒜", + ["ण"] = "𑒝", + ["त"] = "𑒞", + ["थ"] = "𑒟", + ["द"] = "𑒠", + ["ध"] = "𑒡", + ["न"] = "𑒢", + ["प"] = "𑒣", + ["फ"] = "𑒤", + ["ब"] = "𑒥", + ["भ"] = "𑒦", + ["म"] = "𑒧", + ["य"] = "𑒨", + ["र"] = "𑒩", + ["ल"] = "𑒪", + ["व"] = "𑒫", + ["ळ"] = "𑒪𑓃", + ["श"] = "𑒬", + ["ष"] = "𑒭", + ["स"] = "𑒮", + ["ह"] = "𑒯", + + ["ा"] = "𑒰", + ["ि"] = "𑒱", + ["ी"] = "𑒱", + ["ु"] = "𑒳", + ["ू"] = "𑒴", + ["ृ"] = "𑒵", + ["ॄ"] = "𑒶", + ["ॢ"] = "𑒷", + ["ॣ"] = "𑒸", + ["े"] = "𑒹", + ["ै"] = "𑒻", + ["ो"] = "𑒼", + ["ौ"] = "𑒾", + ["्"] = "𑓂", + ["़"] = "𑓃", + + -- vowels + ["अ"] = "𑒁", + ["आ"] = "𑒂", + ["इ"] = "𑒃", + ["ई"] = "𑒄", + ["उ"] = "𑒅", + ["ऊ"] = "𑒆", + ["ऋ"] = "𑒇", + ["ॠ"] = "𑒈", + ["ऌ"] = "𑒉", + ["ॡ"] = "𑒊", + ["ए"] = "𑒋", + ["ऐ"] = "𑒌", + ["ओ"] = "𑒍", + ["औ"] = "𑒎", + -- chandrabindu + ["ँ"] = "𑒿", + -- anusvara + ["ं"] = "𑓀", + -- visarga + ["ः"] = "𑓁", + -- avagraha + [""] = "𑓄", + -- punctuation + ["॰"] = "𑓆", + ["॥"] = "॥", + ["।"] = "।", + ["ॐ"] = "𑓇" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".", function(c) return conv[c] end) + + return text +end + +return export diff --git a/wiktra/wikt/translit/JSON.lua b/wiktra/wikt/translit/JSON.lua new file mode 100644 index 0000000..44cc496 --- /dev/null +++ b/wiktra/wikt/translit/JSON.lua @@ -0,0 +1,116 @@ +local export = {} + +-- Given a finite real number x, returns a string containing its JSON +-- representation, with enough precision that it *should* round-trip correctly +-- (depending on the well-behavedness of the system on the other end). +function export.json_fromNumber(x) + if type(x) ~= "number" then error("Not of type \"number\": " .. x .. " (" .. type(x) .. ")") end + if x ~= x or x == math.huge or x == -math.huge then error("Not a finite real number: " .. x) end + return string.format("%.17g", x) +end + +-- This function makes an effort to convert an arbitrary Lua value to a string +-- containing a JSON representation of it. It's not intended to be very robust, +-- but may be useful for prototyping. +function export.toJSON(val, opts) + opts = opts or {} + + local function converter(val) + if type(val) == "nil" then + return "null" + elseif type(val) == "boolean" then + return val and "true" or "false" + elseif type(val) == "number" then + return export.json_fromNumber(val) + elseif type(val) == "string" then + return export.json_fromString(val) + elseif type(val) == "table" then + -- If the table has a toJSON member function, call that. + if val.toJSON then + return val:toJSON() + else + return export.json_fromTable(val, converter) + end + else + error("Unsupported type: " .. type(val)) + end + end + + return converter(val) +end + +local escape_char_map = {["\\"] = "\\\\", ["\""] = "\\\"", ["\b"] = "\\b", ["\f"] = "\\f", ["\n"] = "\\n", ["\r"] = "\\r", ["\t"] = "\\t"} + +local function escape_char(c) return escape_char_map[c] or string.format("\\u%04X", mw.ustring.codepoint(c)) end + +-- Given a string, escapes any illegal characters and wraps it in double-quotes. +-- Raises an error if the string is not valid UTF-8. +function export.json_fromString(s) + if type(s) ~= "string" or not mw.ustring.isutf8(s) then error("Not a valid UTF-8 string: " .. s) end + + -- U+2029 (LINE SEPARATOR, \226\128\168 in UTF-8) + -- and U+2028 (PARAGRAPH SEPARATOR, \226\128\169 in UTF-8) are allowed + -- in JSON, but must be escaped for compatibility with JavaScript. + s = mw.ustring.gsub(s, "[\\\"%c\226\128\168\226\128\169]", escape_char) + + return "\"" .. s .. "\"" +end + +-- Given a table, treats it as an array and assembles its values in the form +-- '[ v1, v2, v3 ]'. Optionally takes a function to JSONify the values before +-- assembly; if that function is omitted, then the values should already be +-- strings containing valid JSON data. +function export.json_arrayFromTable(t, f) + f = f or function(x) return x end + + local ret = {} + + for _, elem in ipairs(t) do + elem = f(elem) + if elem ~= nil then + table.insert(ret, ", ") + table.insert(ret, elem) + end + end + + if #ret == 0 then return "[]" end + + ret[1] = "[ " + table.insert(ret, " ]") + + return table.concat(ret) +end + +-- Given a table whose keys are all strings, assembles its keys and values in +-- the form '{ "k1": v1, "k2": v2, "k3": v3 }'. Optionally takes a function to +-- JSONify the values before assembly; if that function is omitted, then the +-- values should already be strings containing valid JSON data. (The keys, by +-- contrast, should just be regular Lua strings; they will be passed to this +-- module's jsonStringFromString.) +function export.json_fromTable(val, converter) + converter = converter or function(x) return x end + + local as_array = {} + local as_object = {} + local string_key = false + + for key, value in pairs(val) do + value = converter(value) + + if type(key) ~= "number" then string_key = true end + + if value ~= nil then + key = export.json_fromString(tostring(key)) + table.insert(as_array, value) + table.insert(as_object, key .. " : " .. value) + end + end + + if string_key then + return "{" .. table.concat(as_object, ", ") .. "}" + else + return "[" .. table.concat(as_array, ", ") .. "]" + end +end + +return export diff --git a/wiktra/wikt/translit/Kali-translit.lua b/wiktra/wikt/translit/Kali-translit.lua new file mode 100644 index 0000000..b148b7d --- /dev/null +++ b/wiktra/wikt/translit/Kali-translit.lua @@ -0,0 +1,93 @@ +local export = {} +local gsub = mw.ustring.gsub +local u = mw.ustring.char + +local tt1 = { + -- consonants + ["ꤊ"] = "k", + ["ꤋ"] = "hk", + ["ꤌ"] = "g", + ["ꤍ"] = "ng", + ["ꤎ"] = "s", + ["ꤏ"] = "hs", + ["ꤐ"] = "z", + ["ꤑ"] = "ny", + ["ꤒ"] = "t", + ["ꤓ"] = "ht", + ["ꤔ"] = "n", + ["ꤕ"] = "p", + ["ꤖ"] = "hp", + ["ꤗ"] = "m", + ["ꤘ"] = "d", + ["ꤙ"] = "b", + ["ꤚ"] = "r", + ["ꤛ"] = "y", + ["ꤜ"] = "l", + ["ꤝ"] = "w", + ["ꤞ"] = "th", + ["ꤟ"] = "h", + ["ꤠ"] = "v", + ["ꤡ"] = "c", + -- vowels + ["ꤢ"] = "a", + ["ꤣ"] = "oe", + ["ꤤ"] = "i", + ["ꤥ"] = "o", + -- tones + ["꤫"] = u(0x0301), + ["꤬"] = u(0x0300), + ["꤭"] = u(0x0304), + -- marks + ["꤮"] = "-", + ["꤯"] = ".", + -- numerals + ["꤀"] = "0", + ["꤁"] = "1", + ["꤂"] = "2", + ["꤃"] = "3", + ["꤄"] = "4", + ["꤅"] = "5", + ["꤆"] = "6", + ["꤇"] = "7", + ["꤈"] = "8", + ["꤉"] = "9", + -- zero-width space (display it if it hides in a word) + [u(0x200B)] = "‼" +} + +local tt2 = { + -- vowels + ["ꤢꤦ"] = "ue", + ["ꤢꤧ"] = "ae", + ["ꤢꤨ"] = "u", + ["ꤢꤩ"] = "e", + ["ꤢꤪ"] = "oa", + ["ꤣꤦ"] = "oeue", + ["ꤣꤧ"] = "ueae", + ["ꤣꤨ"] = "oeu", + ["ꤣꤩ"] = "oee", + ["ꤣꤪ"] = "oeoa" +} + +function export.tr(text, lang, sc, debug_mode) + + if type(text) == "table" then -- called directly from a template + text = text.args[1] + end + + text = gsub(text, "([ꤢ-ꤥ][ꤦ-꤭]*)([ꤢ-ꤥ])", "%1’%2") -- add apostrophe between adjacent two vowels + text = gsub(text, "([ꤊ-ꤡ])ꤟ([ꤢ-ꤥ][ꤦ-ꤪ]?)", "%1%2̤") -- change h between initial and vowel into subcolon + + text = gsub(text, "ꤟꤌꤣ", "u" .. u(0x0324) .. "eoe") -- use u() to break Unicode normalization + text = gsub(text, "ꤛꤣ", "ueoe") + text = gsub(text, "[ꤢꤣ][ꤦ-ꤪ]", tt2) + text = gsub(text, ".", tt1) + + text = gsub(text, "([aeiou])([aeiou]*)̤", "%1̤%2") -- move subcolon to first aeiou + text = gsub(text, "([aeiou]̤?)([aeiou]*)([" .. u(0x0301) .. u(0x0300) .. u(0x0304) .. "])", "%1%3%2") -- add tone mark on first aeiou + + return text + +end + +return export diff --git a/wiktra/wikt/translit/Limb-Deva-translit.lua b/wiktra/wikt/translit/Limb-Deva-translit.lua new file mode 100644 index 0000000..5ecfb5b --- /dev/null +++ b/wiktra/wikt/translit/Limb-Deva-translit.lua @@ -0,0 +1,79 @@ +local export = {} +local gsub = mw.ustring.gsub +local consonants = {["ᤁ"] = "क", ["ᤂ"] = "ख", ["ᤃ"] = "ग", ["ᤄ"] = "घ", ["ᤅ"] = "ङ", ["ᤆ"] = "च", ["ᤇ"] = "छ", ["ᤈ"] = "ज", ["ᤉ"] = "झ", ["ᤊ"] = "ञ", ["ᤋ"] = "त", ["ᤌ"] = "थ", ["ᤍ"] = "द", ["ᤎ"] = "ध", ["ᤏ"] = "न", ["ᤐ"] = "प", ["ᤑ"] = "फ", ["ᤒ"] = "ब", ["ᤓ"] = "भ", ["ᤔ"] = "म", ["ᤕ"] = "य", ["ᤖ"] = "र", ["ᤗ"] = "ल", ["ᤘ"] = "व", ["ᤙ"] = "श", ["ᤚ"] = "ष", ["ᤛ"] = "स", ["ᤜ"] = "ह", ["ᤝ"] = "ज्ञ", ["ᤞ"] = "त्र", ["ᤀ"] = "अ"} +local diacritics = {["ᤠ"] = "ा", ["ᤡ"] = "ि", ["ᤢ"] = "ु", ["ᤣ"] = "े", ["ᤤ"] = "ै", ["ᤥ"] = "ो", ["ᤦ"] = "ौ", ["ᤧ"] = "े़", ["ᤨ"] = "ो़"} + +-- ['ᤧ']='ॅ' , --['ᤨ']='', + +local special = { + ["᤹"] = "ॽ", -- mukphreng (glottalizer) + ["ᤲ"] = "ं" -- anusvara (now obsolete) +} + +local subjoined = {["ᤪ"] = "्र", ["ᤫ"] = "्व", ["ᤩ"] = "्य"} + +local finals = {["ᤰ"] = "क्", ["ᤱ"] = "ङ्", ["ᤳ"] = "त्", ["ᤴ"] = "न्", ["ᤵ"] = "प्", ["ᤶ"] = "म्", ["ᤷ"] = "र्", ["ᤸ"] = "ल्"} + +local nonconsonants = { + + -- digits + ["᥆"] = "०", + ["᥇"] = "१", + ["᥈"] = "२", + ["᥉"] = "३", + ["᥊"] = "४", + ["᥋"] = "५", + ["᥌"] = "६", + ["᥍"] = "७", + ["᥎"] = "८", + ["᥏"] = "९", + ["॥"] = "॥", + ["᥄"] = "!", + ["᥅"] = "?", + ["."] = ",", + ["᥀"] = "लो" +} + +-- translit any words or phrases +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])᤻", "᤺%1") -- treat underscore as kemphreng + text = mw.ustring.gsub(text, "([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])" .. "([ᤪᤫᤩ]?)" .. "([ᤠᤡᤢᤣᤤᤥᤦᤧ ᤨ]?)" .. "([᤹᤺ᤲ]?)" .. "([ᤰᤱᤳᤴᤵᤶᤷᤸ]?)", function(c, d, e, f, g) + -- mw.log('match', c, d) + return (consonants[c] or c) .. (subjoined[d] or d) .. (diacritics[e] or (e ~= "") and e or "") .. (special[f] or f) .. (finals[g] or g) + end) + + text = mw.ustring.gsub(text, ".", nonconsonants) + text = mw.ustring.gsub(text, "(.)⌫", "") + text = gsub(text, "ाᤣ", "ो") + text = gsub(text, "ᤣᤣ", "ै") + text = gsub(text, "ाᤣᤣ", "ौ") + text = gsub(text, "ाःᤣ", "ओः") + text = mw.ustring.gsub(text, "᤺", "ः") + -- text = mw.ustring.gsub(text, 'िः', 'ी') + -- text = mw.ustring.gsub(text, 'ुः', 'ू') + text = mw.ustring.gsub(text, "ᤰ", "क्") + text = mw.ustring.gsub(text, "ᤱ", "ङ्") + text = mw.ustring.gsub(text, "ᤳ", "त्") + text = mw.ustring.gsub(text, "ᤴ", "न्") + text = mw.ustring.gsub(text, "ᤵ", "प्") + text = mw.ustring.gsub(text, "ᤶ", "म्") + text = mw.ustring.gsub(text, "ᤷ", "र्") + text = mw.ustring.gsub(text, "ᤸ", "ल्") + text = mw.ustring.gsub(text, "अा", "आ") + text = mw.ustring.gsub(text, "अि", "इ") + text = mw.ustring.gsub(text, "अी", "ई") + text = mw.ustring.gsub(text, "अु", "उ") + text = mw.ustring.gsub(text, "अू", "ऊ") + text = mw.ustring.gsub(text, "अे", "ए") + text = mw.ustring.gsub(text, "अै", "ऐ") + text = mw.ustring.gsub(text, "अो", "ओ") + text = mw.ustring.gsub(text, "अौ", "औ") + text = mw.ustring.gsub(text, "अॅ", "ऍ") + text = mw.ustring.gsub(text, "अे़", "ए़") + text = mw.ustring.gsub(text, "अो़", "ओ़") + text = gsub(text, "ाःᤣ", "ोः") + text = mw.ustring.gsub(text, "ाःᤣ", "ोः") + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/Modi-translit.lua b/wiktra/wikt/translit/Modi-translit.lua new file mode 100644 index 0000000..8106b24 --- /dev/null +++ b/wiktra/wikt/translit/Modi-translit.lua @@ -0,0 +1,79 @@ +local export = {} + +local consonants = {["𑘎"] = "k", ["𑘏"] = "kh", ["𑘐"] = "g", ["𑘑"] = "gh", ["𑘒"] = "ṅ", ["𑘓"] = "c", ["𑘔"] = "ch", ["𑘕"] = "j", ["𑘖"] = "jh", ["𑘗"] = "ñ", ["𑘘"] = "ṭ", ["𑘙"] = "ṭh", ["𑘚"] = "ḍ", ["𑘛"] = "ḍh", ["𑘜"] = "ṇ", ["𑘝"] = "t", ["𑘞"] = "th", ["𑘟"] = "d", ["𑘠"] = "dh", ["𑘡"] = "n", ["𑘢"] = "p", ["𑘣"] = "ph", ["𑘤"] = "b", ["𑘥"] = "bh", ["𑘦"] = "m", ["𑘧"] = "y", ["𑘨"] = "r", ["𑘩"] = "l", ["𑘪"] = "v", ["𑘯"] = "ḷ", ["𑘫"] = "ś", ["𑘬"] = "ṣ", ["𑘭"] = "s", ["𑘮"] = "h"} + +local diacritics = { + ["𑘰"] = "ā", + ["𑘱"] = "i", + ["𑘲"] = "ī", + ["𑘳"] = "u", + ["𑘴"] = "ū", + ["𑘵"] = "ṛ", + ["𑘶"] = "ṝ", + ["𑘷"] = "ḷ", + ["𑘸"] = "ḹ", + ["𑘹"] = "e", + ["𑘺"] = "ai", + ["𑘻"] = "o", + ["𑘼"] = "au", + -- virama + ["𑘿"] = "" +} + +local tt = { + -- vowel signs + ["𑘀"] = "a", + ["𑘂"] = "i", + ["𑘄"] = "u", + ["𑘊"] = "e", + ["𑘌"] = "o", + ["𑘁"] = "ā", + ["𑘃"] = "ī", + ["𑘅"] = "ū", + ["𑘆"] = "ŕ", + ["𑘋"] = "ai", + ["𑘍"] = "au", + ["𑘁𑙀"] = "ŏ", + ["𑘀𑙀"] = "ĕ", + ["𑘊𑙀"] = "ĕ", + -- anusvara + ["𑘽"] = "ṃ", + -- visarga + ["𑘾"] = "ḥ", + -- numerals + ["𑙐"] = "0", + ["𑙑"] = "1", + ["𑙒"] = "2", + ["𑙓"] = "3", + ["𑙔"] = "4", + ["𑙕"] = "5", + ["𑙖"] = "6", + ["𑙗"] = "7", + ["𑙘"] = "8", + ["𑙙"] = "9", + -- punctuation + ["𑙁"] = ".", -- danda + ["𑙂"] = ".", -- double danda + ["+"] = "", -- compound separator + + -- abbreviation sign + ["𑙃"] = ".", + -- Om + ["ॐ"] = "oṃ" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([𑘎𑘏𑘐𑘑𑘒𑘓𑘔𑘕𑘖𑘗𑘘𑘙𑘚𑘛𑘜𑘝𑘞𑘟𑘠𑘡𑘢𑘣𑘤𑘥𑘦𑘧𑘨𑘩𑘪𑘯𑘫𑘬𑘭𑘮])" .. "([𑘰𑘱𑘲𑘳𑘴𑘵𑘶𑘷𑘸𑘹𑘺𑘻𑘼𑘿]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".[𑙀]?", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Narb-translit.lua b/wiktra/wikt/translit/Narb-translit.lua new file mode 100644 index 0000000..44fc1c7 --- /dev/null +++ b/wiktra/wikt/translit/Narb-translit.lua @@ -0,0 +1,7 @@ +local export = {} + +local chars = {["𐪏"] = "s³", ["𐪙"] = "ḏ", ["𐪍"] = "ẖ", ["𐪊"] = "s¹", ["𐪆"] = "s²", ["𐪛"] = "ṯ", ["𐪑"] = "ʾ", ["𐪒"] = "ʿ", ["𐪈"] = "b", ["𐪕"] = "d", ["𐪓"] = "ḍ", ["𐪐"] = "f", ["𐪔"] = "g", ["𐪖"] = "ġ", ["𐪀"] = "h", ["𐪂"] = "ḥ", ["𐪋"] = "k", ["𐪁"] = "l", ["𐪃"] = "m", ["𐪌"] = "n", ["𐪄"] = "q", ["𐪇"] = "r", ["𐪎"] = "ṣ", ["𐪉"] = "t", ["𐪗"] = "ṭ", ["𐪅"] = "w", ["𐪚"] = "y", ["𐪘"] = "z", ["𐪜"] = "ẓ"} + +function export.tr(text, lang, sc) return (mw.ustring.gsub(text, ".", chars)) end + +return export diff --git a/wiktra/wikt/translit/Newa-Deva-translit.lua b/wiktra/wikt/translit/Newa-Deva-translit.lua new file mode 100644 index 0000000..207c133 --- /dev/null +++ b/wiktra/wikt/translit/Newa-Deva-translit.lua @@ -0,0 +1,116 @@ +local export = {} +local U = mw.ustring.char +local gsub = mw.ustring.gsub +local match = mw.ustring.match +local sub = mw.ustring.sub + +local conv = { + ["𑐎"] = "क", + ["𑐏"] = "ख", + ["𑐐"] = "ग", + ["𑐑"] = "घ", + ["𑐒"] = "ङ", + ["𑐔"] = "च", + ["𑐕"] = "छ", + ["𑐖"] = "ज", + ["𑐗"] = "झ", + ["𑐘"] = "ञ", + ["𑐚"] = "ट", + ["𑐛"] = "ठ", + ["𑐜"] = "ड", + ["𑐝"] = "ढ", + ["𑐞"] = "ण", + ["𑐟"] = "त", + ["𑐠"] = "थ", + ["𑐡"] = "द", + ["𑐢"] = "ध", + ["𑐣"] = "न", + ["𑐥"] = "प", + ["𑐦"] = "फ", + ["𑐧"] = "ब", + ["𑐨"] = "भ", + ["𑐩"] = "म", + ["𑐫"] = "य", + ["𑐬"] = "र", + ["𑐮"] = "ल", + ["𑐰"] = "व", + ["𑐱"] = "श", + ["𑐲"] = "ष", + ["𑐳"] = "स", + ["𑐴"] = "ह", + + -- breathy + ["𑐤"] = "न्ह", + ["𑐓"] = "ङ्ह", + ["𑐭"] = "र्ह", + ["𑐯"] = "ल्ह", + ["𑐪"] = "म्ह", + ["𑐙"] = "ञ्ह", + + ["𑐵"] = "ा", + ["𑐶"] = "ि", + ["𑐷"] = "ी", + ["𑐸"] = "ु", + ["𑐹"] = "ू", + ["𑐺"] = "ृ", + ["𑐻"] = "ॄ", + ["𑐼"] = "ॢ", + ["𑐽"] = "ॣ", + ["𑐾"] = "े", + ["𑐿"] = "ै", + ["𑑀"] = "ो", + ["𑑁"] = "ौ", + ["𑑂"] = "्", + ["𑑆"] = "़", + + -- vowels + ["𑐀"] = "अ", + ["𑐁"] = "आ", + ["𑐂"] = "इ", + ["𑐃"] = "ई", + ["𑐄"] = "उ", + ["𑐅"] = "ऊ", + ["𑐆"] = "ऋ", + ["𑐇"] = "ॠ", + ["𑐈"] = "ऌ", + ["𑐉"] = "ॡ", + ["𑐊"] = "ए", + ["𑐋"] = "ऐ", + ["𑐌"] = "ओ", + ["𑐍"] = "औ", + -- chandrabindu + ["𑑃"] = "ँ", + -- anusvara + ["𑑄"] = "ं", + ["𑑈"] = "ं", + -- visarga + ["𑑅"] = "ः", + -- avagraha + ["𑑇"] = "ऽ", + -- punctuation + ["𑑏"] = "॰", + ["𑑌"] = "॥", + ["𑑋"] = "।", + ["𑑉"] = "ॐ", + -- Vedic extensions + ["𑑠"] = "ᳵ", + ["𑑡"] = "ᳶ", + + ["𑑐"] = "०", + ["𑑑"] = "१", + ["𑑒"] = "२", + ["𑑓"] = "३", + ["𑑔"] = "४", + ["𑑕"] = "५", + ["𑑖"] = "६", + ["𑑗"] = "७", + ["𑑘"] = "८", + ["𑑙"] = "९" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".", function(c) return conv[c] end) + return text +end + +return export diff --git a/wiktra/wikt/translit/Olck-translit.lua b/wiktra/wikt/translit/Olck-translit.lua new file mode 100644 index 0000000..d51b6d7 --- /dev/null +++ b/wiktra/wikt/translit/Olck-translit.lua @@ -0,0 +1,86 @@ +local export = {} + +local conv = { + ["ᱚ"] = "ô", + ["ᱛ"] = "t", + ["ᱜ"] = "g", + ["ᱝ"] = "ṅ", + ["ᱞ"] = "l", + ["ᱟ"] = "a", + ["ᱠ"] = "k", + ["ᱡ"] = "j", + ["ᱢ"] = "m", + ["ᱣ"] = "v", + ["ᱤ"] = "i", + ["ᱥ"] = "s", + ["ᱦ"] = "h", + ["ᱧ"] = "ñ", + ["ᱨ"] = "r", + ["ᱩ"] = "u", + ["ᱪ"] = "c", + ["ᱫ"] = "d", + ["ᱬ"] = "ṇ", + ["ᱭ"] = "y", + ["ᱮ"] = "e", + ["ᱯ"] = "p", + ["ᱰ"] = "ḍ", + ["ᱱ"] = "n", + ["ᱲ"] = "ṛ", + ["ᱳ"] = "o", + ["ᱴ"] = "ṭ", + ["ᱵ"] = "b", + ["ᱶ"] = "w̃", + ["ᱷ"] = "h", -- aspiration + + -- numerals + ["᱐"] = "0", + ["᱑"] = "1", + ["᱒"] = "2", + ["᱓"] = "3", + ["᱔"] = "4", + ["᱕"] = "5", + ["᱖"] = "6", + ["᱗"] = "7", + ["᱘"] = "8", + ["᱙"] = "9", + + -- punctuation + ["᱿"] = ".", + ["᱾"] = ".", + + -- special chars + ["ᱸ"] = "̃", -- mu tudag: nasalization + ["ᱺ"] = "ᱹ̃", -- mu gahla tudag: nasalization + ["ᱻ"] = "ː" -- rela: gemination +} + +local gahla_tudag = {["ô"] = "ŏ", ["a"] = "ă", ["e"] = "ĕ"} + +local ahad = {["k’"] = "g", ["c’"] = "j", ["t’"] = "d", ["p’"] = "b", ["h’"] = "h"} + +local pharka = {["g"] = "k’", ["j"] = "c’", ["d"] = "t’", ["b"] = "p’", ["h"] = "h’"} + +local punctuation = "([ ᱾᱿,!?\"'])" + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".", function(c) return conv[c] end) + + -- word-final glottalization + text = mw.ustring.gsub(text, "[gjdb]$", pharka) + text = mw.ustring.gsub(text, "([gjdb])" .. punctuation, function(c, d) return pharka[c] .. d end) + + -- gahla tudag + text = mw.ustring.gsub(text, "(.)ᱹ", function(c) return gahla_tudag[c] end) + + -- ahad + text = mw.ustring.gsub(text, "(.’)ᱽ", function(c) return ahad[c] end) + + -- parkha + text = mw.ustring.gsub(text, "(.)ᱼ", function(c) return pharka[c] end) + + text = mw.ustring.gsub(text, "h’", "ʔ") + + return text +end + +return export diff --git a/wiktra/wikt/translit/Orkh-Bitig-translit.lua b/wiktra/wikt/translit/Orkh-Bitig-translit.lua new file mode 100644 index 0000000..14dfa99 --- /dev/null +++ b/wiktra/wikt/translit/Orkh-Bitig-translit.lua @@ -0,0 +1,55 @@ +local export = {} + +local tt = { + ["𐰀"] = "a", -- ORKHON A + ["𐰃"] = "i", -- ORKHON I + ["𐰆"] = "U", -- ORKHON O + ["𐰇"] = "u", -- ORKHON OE + ["𐰉"] = "B", -- ORKHON AB + ["𐰋"] = "b", -- ORKHON AEB + ["𐰍"] = "G", -- ORKHON AG + ["𐰏"] = "g", -- ORKHON AEG + ["𐰑"] = "D", -- ORKHON AD + ["𐰓"] = "d", -- ORKHON AED + ["𐰔"] = "z", -- ORKHON EZ + ["𐰖"] = "J", -- ORKHON AY + ["𐰘"] = "j", -- ORKHON AEY + ["𐰚"] = "K", -- ORKHON AEK + ["𐰜"] = "q", -- ORKHON OEK + ["𐰞"] = "L", -- ORKHON AL + ["𐰠"] = "l", -- ORKHON AEL + ["𐰡"] = "w", -- ORKHON ELT + ["𐰢"] = "m", -- ORKHON EM + ["𐰣"] = "N", -- ORKHON AN + ["𐰤"] = "n", -- ORKHON AEN + ["𐰦"] = "O", -- ORKHON ENT + ["𐰨"] = "W", -- ORKHON ENC + ["𐰪"] = "F", -- ORKHON ENY + ["𐰭"] = "y", -- ORKHON ENG + ["𐰯"] = "p", -- ORKHON EP + ["𐰲"] = "C", -- ORKHON EC + ["𐰴"] = "Q", -- ORKHON AQ + ["𐰶"] = "k", -- ORKHON IQ + ["𐰸"] = "x", -- ORKHON OQ + ["𐰺"] = "R", -- ORKHON AR + ["𐰼"] = "r", -- ORKHON AER + ["𐰽"] = "S", -- ORKHON AS + ["𐰾"] = "s", -- ORKHON AES + ["𐰿"] = "c", -- ORKHON ASH + ["𐱁"] = "Y", -- ORKHON ESH + ["𐱃"] = "T", -- ORKHON AT + ["𐱅"] = "t", -- ORKHON AET + ["𐱈"] = "V" -- ORKHON BASH +} + +function export.tr(text, lang, sc) + -- If the script is not Orkh, do not transliterate + if sc ~= "Orkh" then return end + + -- Transliterate characters + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Orkh-translit.lua b/wiktra/wikt/translit/Orkh-translit.lua new file mode 100644 index 0000000..ed3cb75 --- /dev/null +++ b/wiktra/wikt/translit/Orkh-translit.lua @@ -0,0 +1,57 @@ +local export = {} + +local tt = { + ["𐰀"] = "a", -- ORKHON A + ["𐰃"] = "i", -- ORKHON I + ["𐰆"] = "u", -- ORKHON O + ["𐰇"] = "ü", -- ORKHON OE + ["𐰉"] = "b¹", -- ORKHON AB + ["𐰋"] = "b²", -- ORKHON AEB + ["𐰍"] = "ǧ", -- ORKHON AG + ["𐰏"] = "g", -- ORKHON AEG + ["𐰑"] = "d¹", -- ORKHON AD + ["𐰓"] = "d²", -- ORKHON AED + ["𐰕"] = "z", -- ORKHON EZ + ["𐰖"] = "y¹", -- ORKHON AY + ["𐰘"] = "y²", -- ORKHON AEY + ["𐰚"] = "k", -- ORKHON AEK + ["𐰜"] = "k̥", -- ORKHON OEK + ["𐰞"] = "l¹", -- ORKHON AL + ["𐰠"] = "l²", -- ORKHON AEL + ["𐰡"] = "lt", -- ORKHON ELT + ["𐰢"] = "m", -- ORKHON EM + ["𐰣"] = "n¹", -- ORKHON AN + ["𐰤"] = "n²", -- ORKHON AEN + ["𐰦"] = "nt", -- ORKHON ENT + ["𐰨"] = "nč", -- ORKHON ENC + ["𐰪"] = "ń", -- ORKHON ENY + ["𐰭"] = "ŋ", -- ORKHON ENG + ["𐰯"] = "p", -- ORKHON EP + ["𐰰"] = "p̊", -- ORKHON OP + ["𐰱"] = "č̥", -- ORKHO IC + ["𐰲"] = "č", -- ORKHON EC + ["𐰴"] = "q", -- ORKHON AQ + ["𐰶"] = "q̊²", -- ORKHON IQ + ["𐰸"] = "q̊¹", -- ORKHON OQ + ["𐰺"] = "r¹", -- ORKHON AR + ["𐰼"] = "r²", -- ORKHON AER + ["𐰽"] = "s¹", -- ORKHON AS + ["𐰾"] = "s²", -- ORKHON AES + ["𐱁"] = "š", -- ORKHON ESH + ["𐱃"] = "t¹", -- ORKHON AT + ["𐱅"] = "t²", -- ORKHON AET + ["𐱇"] = "t̥", -- ORKHON OT + ["𐱈"] = "bš" -- ORKHON BASH +} + +function export.tr(text, lang, sc) + -- If the script is not Orkh, do not transliterate + if sc ~= "Orkh" then return end + + -- Transliterate characters + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Palm-translit.lua b/wiktra/wikt/translit/Palm-translit.lua new file mode 100644 index 0000000..9231f23 --- /dev/null +++ b/wiktra/wikt/translit/Palm-translit.lua @@ -0,0 +1,48 @@ +local export = {} + +local tt = { + ["𐡠"] = "ʾ", -- aleph + ["𐡡"] = "b", -- beth + ["𐡢"] = "g", -- gimel + ["𐡣"] = "d", -- daleth + ["𐡤"] = "h", -- he + ["𐡥"] = "w", -- waw + ["𐡦"] = "z", -- zayin + ["𐡧"] = "ḥ", -- heth + ["𐡨"] = "ṭ", -- teth + ["𐡩"] = "y", -- yodh + ["𐡪"] = "k", -- kaph + ["𐡫"] = "l", -- lamedh + ["𐡬"] = "m", -- mem + ["𐡭"] = "n", -- final nun + ["𐡮"] = "n", -- nun + ["𐡯"] = "s", -- samekh + ["𐡰"] = "ʿ", -- ayin + ["𐡱"] = "p", -- pe + ["𐡲"] = "ṣ", -- sadhe + ["𐡳"] = "q", -- qoph + ["𐡴"] = "r", -- resh + ["𐡵"] = "š", -- shin + ["𐡶"] = "t", -- taw + ["𐡷"] = "☙", -- left-pointing fleuron + ["𐡸"] = "❧", -- right-pointing fleuron + ["𐡹"] = "1", -- one + ["𐡺"] = "2", -- two + ["𐡻"] = "3", -- three + ["𐡼"] = "4", -- four + ["𐡽"] = "5", -- five + ["𐡾"] = "10", -- ten + ["𐡿"] = "20" -- twenty +} + +function export.tr(text, lang, sc) + + if sc ~= "Palm" then return end + + -- Transliterate characters + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Phlv-translit.lua b/wiktra/wikt/translit/Phlv-translit.lua new file mode 100644 index 0000000..b4b7ffa --- /dev/null +++ b/wiktra/wikt/translit/Phlv-translit.lua @@ -0,0 +1,56 @@ +local export = {} + +local U = mw.ustring.char + +local gdy_pattern = "[" .. U(0x10BCB, 0x10BCD, 0x10BCC, 0x10BCE) .. "]" .. U(0x10BB2) + +local gdy_map = { + ["𐯋𐮲"] = "g", -- gimel-daleth-yodh with two dots above + ["𐯌𐮲"] = "d", -- gimel-daleth-yodh with hat above + ["𐯍𐮲"] = "y", -- gimel-daleth-yodh with two dots below + ["𐯎𐮲"] = "j" -- gimel-daleth-yodh with dot below +} + +local tt = { + ["𐮰"] = "ʾ", -- aleph-het + ["𐮱"] = "b", -- beth + ["𐮲"] = "?", -- gimel-daleth-yodh (Should have been transliterated) + ["𐮳"] = "ḍ", -- old daleth + ["𐮴"] = "h", -- he + ["𐮵"] = "'", -- waw-nun-ayin-resh + ["𐮶"] = "z", -- zayin + ["𐮷"] = "k", -- kaph + ["𐮸"] = "k", -- old kaph + ["𐮹"] = "l", -- lamedh + ["𐮺"] = "ƚ", -- old lamedh + ["𐮻"] = "ḷ", -- l-lamedh + ["𐮼"] = "m", -- mem-qoph + ["𐮽"] = "s", -- samekh + ["𐮾"] = "p", -- pe + ["𐮿"] = "c", -- sadhe + ["𐯀"] = "š", -- shin + ["𐯁"] = "t", -- taw + ["𐯂"] = "yt", -- x1 + ["𐯃"] = "ty", -- x2 + ["𐯐"] = "", -- I + ["𐯑"] = "" -- ahriman +} + +function export.tr(text, lang, sc) + -- If the script is not Phlv, do not transliterate + if sc ~= "Phlv" then return end + + text = mw.ustring.gsub(text, gdy_pattern, gdy_map) + + -- Category to catch automatic transcriptions of waw-ayin-resh and mem-qoph + -- [[Category:Automatic Book Pahlavi transliterations containing ambiguous characters]] + local ambig_note = "" + if mw.ustring.match(text, "[𐮲𐮵𐮼]") then ambig_note = "[[Category:Automatic Book Pahlavi transliterations containing ambiguous characters]]" end + + -- Transliterate characters + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Phnx-translit.lua b/wiktra/wikt/translit/Phnx-translit.lua new file mode 100644 index 0000000..f4afc9a --- /dev/null +++ b/wiktra/wikt/translit/Phnx-translit.lua @@ -0,0 +1,17 @@ +local export = {} +local gsub = mw.ustring.gsub +local u = mw.ustring.char + +-- Beware! Phnx is rtl +local tt = {["𐤀"] = "ʾ", ["𐤁"] = "b", ["𐤂"] = "g", ["𐤃"] = "d", ["𐤄"] = "h", ["𐤅"] = "w", ["𐤆"] = "z", ["𐤇"] = "ḥ", ["𐤈"] = "ṭ", ["𐤉"] = "y", ["𐤊"] = "k", ["𐤋"] = "l", ["𐤌"] = "m", ["𐤍"] = "n", ["𐤎"] = "s", ["𐤏"] = "ʿ", ["𐤐"] = "p", ["𐤑"] = "ṣ", ["𐤒"] = "q", ["𐤓"] = "r", ["𐤔"] = "š", ["𐤕"] = "t", ["𐤖"] = "[1]", ["𐤗"] = "[10]", ["𐤘"] = "[20]", ["𐤙"] = "[100]", ["𐤚"] = "[2]", ["𐤛"] = "[3]", ["𐤟"] = " "} + +function export.tr(text, lang, sc, debug_mode) + + if type(text) == "table" then -- called directly from a template + text = text.args[1] + end + + return (text:gsub("[%z\1-\127\194-\244][\128-\191]*", tt)) -- UTF-8 character pattern +end + +return export diff --git a/wiktra/wikt/translit/Rohg-translit.lua b/wiktra/wikt/translit/Rohg-translit.lua new file mode 100644 index 0000000..7af7c60 --- /dev/null +++ b/wiktra/wikt/translit/Rohg-translit.lua @@ -0,0 +1,65 @@ +local export = {} + +local tt = { + -- consonants + ["𐴀"] = "", + ["𐴁"] = "b", + ["𐴃"] = "t", + ["𐴄"] = "th", + ["𐴅"] = "j", + ["𐴆"] = "ch", + ["𐴇"] = "h", + ["𐴈"] = "kh", + ["𐴉"] = "f", + ["𐴂"] = "p", + ["𐴊"] = "d", + ["𐴋"] = "dh", + ["𐴌"] = "r", + ["𐴍"] = "ç", + ["𐴎"] = "z", + ["𐴏"] = "s", + ["𐴐"] = "c", + ["𐴑"] = "k", + ["𐴒"] = "g", + ["𐴓"] = "l", + ["𐴔"] = "m", + ["𐴕"] = "n", + ["𐴖"] = "w", + ["𐴗"] = "u̯", + ["𐴘"] = "y", + ["𐴙"] = "i̯", + ["𐴚"] = "ng", + ["𐴛"] = "ny", + ["𐴜"] = "v", + + -- vowels + ["𐴝"] = "a", + ["𐴞"] = "i", + ["𐴟"] = "u", + ["𐴠"] = "e", + ["𐴡"] = "o", + ["𐴢"] = "", + + -- numerals + ["𐴰"] = "0", + ["𐴱"] = "1", + ["𐴲"] = "2", + ["𐴳"] = "3", + ["𐴴"] = "4", + ["𐴵"] = "5", + ["𐴶"] = "6", + ["𐴷"] = "7", + ["𐴸"] = "8", + ["𐴹"] = "9" +}; + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".", tt) + text = mw.ustring.gsub(text, "(.)𐴤", "%1́") + text = mw.ustring.gsub(text, "(.)𐴥", "%1́%1") + text = mw.ustring.gsub(text, "(.)𐴦", "%1%1́") + text = mw.ustring.gsub(text, "(.)𐴧", "%1%1") + return text +end + +return export diff --git a/wiktra/wikt/translit/Runr-translit.lua b/wiktra/wikt/translit/Runr-translit.lua new file mode 100644 index 0000000..f7daa10 --- /dev/null +++ b/wiktra/wikt/translit/Runr-translit.lua @@ -0,0 +1,56 @@ +local export = {} + +local letters = {} + +local export = {} + +local letters = {} + +-- Elder futhark +letters["gmq-pro"] = { + ["ᚠ"] = "f", + ["ᚢ"] = "u", + ["ᚦ"] = "þ", + ["ᚨ"] = "a", + ["ᚼ"] = "A", -- transitional period + ["ᚱ"] = "r", + ["ᚲ"] = "k", + ["ᚳ"] = "k", + ["ᚴ"] = "k", -- transitional period + ["ᚷ"] = "g", + ["ᚹ"] = "w", + ["ᚺ"] = "h", + ["ᚻ"] = "h", + ["ᚾ"] = "n", + ["ᛁ"] = "i", + ["ᛃ"] = "j", + ["ᛇ"] = "ï", + ["ᛈ"] = "p", + ["ᛉ"] = "z", + ["ᛦ"] = "z", -- transitional period + ["ᛊ"] = "s", + ["ᛋ"] = "s", + ["ᛏ"] = "t", + ["ᛒ"] = "b", + ["ᛖ"] = "e", + ["ᛗ"] = "m", + ["ᛚ"] = "l", + ["ᛜ"] = "ŋ", + ["ᛝ"] = "ŋ", + ["ᛟ"] = "o", + ["ᛞ"] = "d" +} + +-- Anglo-Saxon futhorc +letters["ang"] = {["ᚠ"] = "f", ["ᚢ"] = "u", ["ᚦ"] = "þ", ["ᚩ"] = "ó", ["ᚱ"] = "r", ["ᚳ"] = "c", ["ᚷ"] = "ȝ", ["ᚸ"] = "g", ["ᚹ"] = "w", ["ᚺ"] = "h", ["ᚻ"] = "h", ["ᚾ"] = "n", ["ᛁ"] = "i", ["ᛄ"] = "j", ["ᛇ"] = "eo", ["ᛈ"] = "p", ["ᛉ"] = "x", ["ᛋ"] = "s", ["ᛏ"] = "t", ["ᛒ"] = "b", ["ᛖ"] = "e", ["ᛗ"] = "m", ["ᛚ"] = "l", ["ᛜ"] = "ŋ", ["ᛝ"] = "ŋ", ["ᛟ"] = "œ", ["ᛞ"] = "d", ["ᚪ"] = "a", ["ᚫ"] = "æ", ["ᚣ"] = "y", ["ᛡ"] = "io", ["ᛠ"] = "ea"} + +-- Younger futhark +letters["non"] = {["ᚠ"] = "f", ["ᚢ"] = "u", ["ᚦ"] = "þ", ["ᚬ"] = "ą", ["ᚱ"] = "r", ["ᚴ"] = "k", ["ᚼ"] = "h", ["ᚽ"] = "h", ["ᚾ"] = "n", ["ᚿ"] = "n", ["ᛁ"] = "i", ["ᛅ"] = "a", ["ᛆ"] = "a", ["ᛋ"] = "s", ["ᛌ"] = "s", ["ᛏ"] = "t", ["ᛐ"] = "t", ["ᛒ"] = "b", ["ᛓ"] = "b", ["ᛘ"] = "m", ["ᛙ"] = "m", ["ᛚ"] = "l", ["ᛦ"] = "ʀ", ["ᛧ"] = "ʀ", ["ᛂ"] = "e", ["ᚮ"] = "o"} + +function export.tr(text, lang, sc) + if letters[lang] then return (mw.ustring.gsub(text, ".", letters[lang])) end + + return nil +end + +return export diff --git a/wiktra/wikt/translit/Samr-translit.lua b/wiktra/wikt/translit/Samr-translit.lua new file mode 100644 index 0000000..3f5a2be --- /dev/null +++ b/wiktra/wikt/translit/Samr-translit.lua @@ -0,0 +1,76 @@ +local export = {} +local U = mw.ustring.char +local gsub = mw.ustring.gsub + +local cons_list = "ʾbgdhwbzḥṭyklmnsʿfpṣqršt" +local cons = "[" .. cons_list .. "]" + +local tt = { + [1] = { + -- occlusion + [U(0x0805, 0x0818)] = "b", + [U(0x0805, 0x0810)] = "p" + }, + [2] = { + -- consonants + [U(0x0800)] = "ʾ", -- ALAF + [U(0x0801)] = "b", -- BIT + [U(0x0802)] = "g", -- GAMAN + [U(0x0803)] = "d", -- DALAT + [U(0x0804)] = "h", -- IY + [U(0x0805)] = "w", -- BAA + [U(0x0806)] = "z", -- ZEN + [U(0x0807)] = "ḥ", -- IT + [U(0x0808)] = "ṭ", -- TIT + [U(0x0809)] = "y", -- YUT + [U(0x080A)] = "k", -- KAAF + [U(0x080B)] = "l", -- LABAT + [U(0x080C)] = "m", -- MIM + [U(0x080D)] = "n", -- NUN + [U(0x080E)] = "s", -- SINGAAT + [U(0x080F)] = "ʿ", -- IN + [U(0x0810)] = "f", -- FI + [U(0x0811)] = "ṣ", -- TSAADIY + [U(0x0812)] = "q", -- QUF + [U(0x0813)] = "r", -- RISH + [U(0x0814)] = "š", -- SHAN + [U(0x0815)] = "t", -- TAAF + -- consonant modifier + [U(0x0816)] = "ʿ", -- IN + [U(0x0817)] = "ʿ", -- IN-ALAF + [U(0x081A)] = "ỹ", -- MODIFIER EPENTHETIC YUT + [U(0x081B)] = "ỹ", -- EPENTHETIC YUT + -- vowel signs + [U(0x081C)] = "ê", -- LONG E + [U(0x081D)] = "e", -- E + [U(0x081E)] = "å̂ː", -- OVERLONG AA + [U(0x081F)] = "å̂", -- LONG AA + [U(0x0820)] = "å", -- AA + [U(0x0821)] = "âː", -- OVERLONG A + [U(0x0822)] = "â", -- LONG A + [U(0x0823)] = "a", -- A + [U(0x0824)] = "ă", -- MODIFIER SHORT A + [U(0x0825)] = "ă", -- SHORT A + [U(0x0826)] = "u", -- U + [U(0x0827)] = "û", -- LONG U + [U(0x0828)] = "i", -- MODIFIER LETTER I + [U(0x0829)] = "î", -- LONG I + [U(0x082A)] = "i", -- I + [U(0x082B)] = "o", -- O + [U(0x082C)] = "", -- SUKUN + -- punctuation + [U(0x0830)] = " " -- PUNCTUATION NEQUDAA + }, + [3] = { + -- consonant with dagesh + ["(" .. cons .. ")" .. U(0x0819)] = "%1%1" + } +} + +function export.tr(text, lang, sc) + for _, replacements in ipairs(tt) do for regex, replacement in pairs(replacements) do text = gsub(text, regex, replacement) end end + + return text +end + +return export diff --git a/wiktra/wikt/translit/Shrd-translit.lua b/wiktra/wikt/translit/Shrd-translit.lua new file mode 100644 index 0000000..c053be4 --- /dev/null +++ b/wiktra/wikt/translit/Shrd-translit.lua @@ -0,0 +1,68 @@ +local export = {} + +local consonants = {["𑆑"] = "k", ["𑆒"] = "kh", ["𑆓"] = "g", ["𑆔"] = "gh", ["𑆕"] = "ṅ", ["𑆖"] = "c", ["𑆗"] = "ch", ["𑆘"] = "j", ["𑆙"] = "jh", ["𑆚"] = "ñ", ["𑆛"] = "ṭ", ["𑆜"] = "ṭh", ["𑆝"] = "ḍ", ["𑆞"] = "ḍh", ["𑆟"] = "ṇ", ["𑆠"] = "t", ["𑆡"] = "th", ["𑆢"] = "d", ["𑆣"] = "dh", ["𑆤"] = "n", ["𑆥"] = "p", ["𑆦"] = "ph", ["𑆧"] = "b", ["𑆨"] = "bh", ["𑆩"] = "m", ["𑆪"] = "y", ["𑆫"] = "r", ["𑆬"] = "l", ["𑆮"] = "v", ["𑆭"] = "ḷ", ["𑆯"] = "ś", ["𑆰"] = "ṣ", ["𑆱"] = "s", ["𑆲"] = "h"} + +local diacritics = {["𑆳"] = "ā", ["𑆴"] = "i", ["𑆵"] = "ī", ["𑆶"] = "u", ["𑆷"] = "ū", ["𑆸"] = "ṛ", ["𑆹"] = "ṝ", ["𑆺"] = "ḷ", ["𑆻"] = "ḹ", ["𑆻"] = "e", ["𑆽"] = "ai", ["𑆾"] = "o", ["𑆿"] = "au", ["𑇀"] = ""} + +local tt = { + -- vowels + ["𑆃"] = "a", + ["𑆄"] = "ā", + ["𑆅"] = "i", + ["𑆆"] = "ī", + ["𑆇"] = "u", + ["𑆈"] = "ū", + ["𑆉"] = "ṛ", + ["𑆊"] = "ṝ", + ["𑆋"] = "ḷ", + ["𑆌"] = "ḹ", + ["𑆍"] = "e", + ["𑆎"] = "ai", + ["𑆏"] = "o", + ["𑆐"] = "au", + -- chandrabindu + ["𑆀"] = "m̐", -- until a better method is found + -- anusvara + ["𑆁"] = "ṃ", -- until a better method is found + -- visarga + ["𑆂"] = "ḥ", + -- avagraha + ["𑇁"] = "’", + -- numerals + ["𑇐"] = "0", + ["𑇑"] = "1", + ["𑇒"] = "2", + ["𑇓"] = "3", + ["𑇔"] = "4", + ["𑇕"] = "5", + ["𑇖"] = "6", + ["𑇗"] = "7", + ["𑇘"] = "8", + ["𑇙"] = "9", + -- punctuation + ["𑇆"] = ".", -- double danda + ["𑇅"] = ".", -- danda + -- Vedic extensions + ["𑇂"] = "x", + ["𑇃"] = "f", + -- Om + ["𑇄"] = "oṃ", + -- reconstructed + ["*"] = "" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([𑆑𑆒𑆓𑆔𑆕𑆖𑆗𑆘𑆙𑆚𑆛𑆜𑆝𑆞𑆟𑆠𑆡𑆢𑆣𑆤𑆥𑆦𑆧𑆨𑆩𑆪𑆫𑆬𑆮𑆭𑆯𑆰𑆱𑆲])" .. "([𑆳𑆴𑆵𑆶𑆷𑆸𑆹𑆺𑆻𑆼𑆽𑆾𑆿𑇀]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Sind-translit.lua b/wiktra/wikt/translit/Sind-translit.lua new file mode 100644 index 0000000..70b7cf3 --- /dev/null +++ b/wiktra/wikt/translit/Sind-translit.lua @@ -0,0 +1,102 @@ +local export = {} + +local consonants = { + ["𑊺"] = "k", + ["𑊻"] = "kh", + ["𑊼"] = "g", + ["𑊽"] = "g̈", + ["𑊾"] = "gh", + ["𑊿"] = "ṅ", + ["𑋀"] = "c", + ["𑋁"] = "ch", + ["𑋂"] = "j", + ["𑋃"] = "j̈", + ["𑋄"] = "jh", + ["𑋅"] = "ñ", + ["𑋆"] = "ṭ", + ["𑋇"] = "ṭh", + ["𑋈"] = "ḍ", + ["𑋉"] = "d̤", + ["𑋊"] = "ṛ", + ["𑋋"] = "ḍh", + ["𑋌"] = "ṇ", + ["𑋍"] = "t", + ["𑋎"] = "th", + ["𑋏"] = "d", + ["𑋐"] = "dh", + ["𑋑"] = "n", + ["𑋒"] = "p", + ["𑋓"] = "ph", + ["𑋔"] = "b", + ["𑋕"] = "b̤", + ["𑋖"] = "bh", + ["𑋗"] = "m", + ["𑋘"] = "y", + ["𑋙"] = "r", + ["𑋚"] = "l", + ["𑋛"] = "v", + ["𑋜"] = "ś", + ["𑋝"] = "s", + ["𑋞"] = "h", + + -- consonants with nukta + ["𑊻𑋩"] = "x", + ["𑊼𑋩"] = "ġ", + ["𑋂𑋩"] = "z", + ["𑋓𑋩"] = "f" +} + +local diacritics = {["𑋠"] = "ā", ["𑋡"] = "i", ["𑋢"] = "ī", ["𑋣"] = "u", ["𑋤"] = "ū", ["𑋥"] = "e", ["𑋦"] = "ai", ["𑋧"] = "o", ["𑋨"] = "au", ["𑋪"] = ""} + +local nonconsonants = { + -- vowels + ["𑊰"] = "a", + ["𑊱"] = "ā", + ["𑊲"] = "i", + ["𑊳"] = "ī", + ["𑊴"] = "u", + ["𑊵"] = "ū", + ["𑊶"] = "e", + ["𑊷"] = "ai", + ["𑊸"] = "o", + ["𑊹"] = "au", + -- other symbols + ["𑋟"] = "ṁ", -- anusvara + ["𑋩"] = ".", -- nukta + -- digits + ["𑋰"] = "0", + ["𑋱"] = "1", + ["𑋲"] = "2", + ["𑋳"] = "3", + ["𑋴"] = "4", + ["𑋵"] = "5", + ["𑋶"] = "6", + ["𑋷"] = "7", + ["𑋸"] = "8", + ["𑋹"] = "9" +} + +local nasal_assim = {["[kg]h?"] = "ṅ", ["[cj]h?"] = "ñ", ["[ṭḍ]h?"] = "ṇ", ["[td]h?"] = "n", ["[pb]h?"] = "m", ["n"] = "n", ["m"] = "m", ["s"] = "n"} + +-- translit any words or phrases +function export.tr(text, lang, sc) + local nukta = "([𑊻𑊼𑋂𑋓]𑋩)" + text = mw.ustring.gsub(text, "([𑊺𑊻𑊼𑊽𑊾𑊿𑋀𑋁𑋂𑋃𑋄𑋅𑋆𑋇𑋈𑋉𑋊𑋋𑋌𑋍𑋎𑋏𑋐𑋑𑋒𑋓𑋔𑋕𑋖𑋗𑋘𑋙𑋚𑋛𑋜𑋝𑋞][𑋩]?)" .. "([𑋠𑋡𑋢𑋣𑋤𑋥𑋦𑋧𑋨]?)", function(c, d) + -- mw.log('match', c, d) + c = consonants[c] or c + if d == "" then + return c .. "a" + else + return c .. (diacritics[d] or d) + end + end) + + text = mw.ustring.gsub(text, nukta, consonants) + text = mw.ustring.gsub(text, ".", nonconsonants) + for key, val in pairs(nasal_assim) do text = mw.ustring.gsub(text, "ṁ(" .. key .. ")", val .. "%1") end + text = mw.ustring.gsub(text, "([aiueēoāīū])ṁ ", "%1̃ ") + text = mw.ustring.gsub(text, "(.?)ṁ", "%1̃") + return text +end + +return export diff --git a/wiktra/wikt/translit/Sogd-translit.lua b/wiktra/wikt/translit/Sogd-translit.lua new file mode 100644 index 0000000..e027647 --- /dev/null +++ b/wiktra/wikt/translit/Sogd-translit.lua @@ -0,0 +1,53 @@ +local export = {} + +local tt = { + ["𐼰"] = "ʾ", -- aleph + ["𐼱"] = "β", -- beth + ["𐼲"] = "ɣ", -- gimel + ["𐼳"] = "h", -- he + ["𐼴"] = "w", -- waw + ["𐼵"] = "z", -- zayin + ["𐼶"] = "x", -- heth + ["𐼷"] = "y", -- yodh + ["𐼸"] = "k", -- kaph + ["𐼹"] = "δ", -- lamedh + ["𐼺"] = "m", -- mem + ["𐼻"] = "n", -- nun + ["𐼼"] = "s", -- samekh + ["𐼽"] = "ʿ", -- ayin + ["𐼾"] = "p", -- pe + ["𐼿"] = "c", -- sadhe + ["𐽀"] = "r", -- resh-ayin + ["𐽁"] = "š", -- shin + ["𐽂"] = "t", -- taw + ["𐽃"] = "f", -- fesh + ["𐽄"] = "l", -- lesh + ["𐽅"] = "š", -- phonogram shin + ["𐽆"] = "", -- dot above + ["𐽇"] = "", -- dot above + ["𐽈"] = "", -- two dots above + ["𐽉"] = "", -- two dots below + ["𐽊"] = "", -- curve above + ["𐽋"] = "", -- curve below + ["𐽌"] = "", -- hook above + ["𐽍"] = "", -- hook below + ["𐽎"] = "", -- long hook below + ["𐽏"] = "", -- resh below + ["𐽐"] = "", -- stroke below + ["𐽑"] = "1", -- one + ["𐽒"] = "10", -- ten + ["𐽓"] = "20", -- twenty + ["𐽔"] = "100" -- one hundred +} + +function export.tr(text, lang, sc) + -- If the script is not Sogd, do not transliterate + if sc ~= "Sogd" then return end + + -- Transliterate characters + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Sogo-translit.lua b/wiktra/wikt/translit/Sogo-translit.lua new file mode 100644 index 0000000..304d2ec --- /dev/null +++ b/wiktra/wikt/translit/Sogo-translit.lua @@ -0,0 +1,58 @@ +local export = {} + +local tt = { + ["𐼀"] = "ʾ", -- aleph + ["𐼁"] = "ʾ", -- final aleph + ["𐼂"] = "β", -- beth + ["𐼃"] = "β", -- final beth + ["𐼄"] = "ɣ", -- gimel + ["𐼅"] = "h", -- he + ["𐼆"] = "h", -- final he + ["𐼇"] = "w", -- waw + ["𐼈"] = "z", -- zayin + ["𐼉"] = "x", -- heth + ["𐼊"] = "y", -- yodh + ["𐼋"] = "k", -- kaph + ["𐼌"] = "δ", -- lamedh + ["𐼍"] = "m", -- mem + ["𐼎"] = "n", -- nun + ["𐼏"] = "n", -- final nun + ["𐼐"] = "n", -- final nun with vertical tail + ["𐼑"] = "s", -- samekh + ["𐼒"] = "ʿ", -- ayin + ["𐼓"] = "ʿD", -- alternative ayin + ["𐼔"] = "p", -- pe + ["𐼕"] = "c", -- sadhe + ["𐼖"] = "c", -- final sadhe + ["𐼗"] = "c", -- final sadhe with vertical tail + ["𐼘"] = "r", -- resh-ayin-deleth + ["𐼙"] = "š", -- shin + ["𐼚"] = "t", -- taw + ["𐼛"] = "t", -- final taw + ["𐼜"] = "t" -- final taw with vertical tail +}, { + ["𐼝"] = "1", -- one + ["𐼞"] = "2", -- two + ["𐼟"] = "3", -- three + ["𐼠"] = "4", -- four + ["𐼡"] = "5", -- five + ["𐼢"] = "10", -- ten + ["𐼣"] = "20", -- twenty + ["𐼤"] = "30", -- thirty + ["𐼥"] = "100", -- one hundred + ["𐼦"] = "½" -- one half +}, { + ["𐼧"] = "r" -- ayin-daleth ligature +} + +function export.tr(text, lang, sc) + -- If the script is not Sogd, do not transliterate + if sc ~= "Sogo" then return end + + -- Transliterate characters + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/Tale-translit.lua b/wiktra/wikt/translit/Tale-translit.lua new file mode 100644 index 0000000..b13cd94 --- /dev/null +++ b/wiktra/wikt/translit/Tale-translit.lua @@ -0,0 +1,85 @@ +local export = {} +local gsub = mw.ustring.gsub +local u = mw.ustring.char +-- pattern ([ᥐ-ᥢ])([ᥣ-ᥬ]?)([ᥐᥒᥖᥙᥛᥝᥢᥭ]?)([ᥰ-ᥴ{dia-tones}]?) + +local tt = { + -- consonants + ["ᥐ"] = "k", + ["ᥑ"] = "x", + ["ᥒ"] = "ng", + ["ᥓ"] = "ts", + ["ᥔ"] = "s", + ["ᥕ"] = "y", + ["ᥖ"] = "t", + ["ᥗ"] = "th", + ["ᥘ"] = "l", + ["ᥙ"] = "p", + ["ᥚ"] = "ph", + ["ᥛ"] = "m", + ["ᥜ"] = "f", + ["ᥝ"] = "w", + ["ᥞ"] = "h", + ["ᥟ"] = "ʼ", + ["ᥠ"] = "kh", + ["ᥡ"] = "tsh", + ["ᥢ"] = "n", + -- vowels + ["ᥣ"] = "aa", + ["ᥤ"] = "i", + ["ᥥ"] = "e", + ["ᥦ"] = "ae", + ["ᥧ"] = "u", + ["ᥨ"] = "o", + ["ᥩ"] = "oa", + ["ᥪ"] = "ue", + ["ᥫ"] = "oe", + ["ᥬ"] = "aue", + ["ᥭ"] = "y" +} + +local tone_table = { + -- different ordering from Unicode: http://www.seasite.niu.edu/tai/TaiDehong/index.htm + -- also supports old orthography + ["ᥰ"] = u(0x0308), + [u(0x0308)] = u(0x0308), + [u(0x00A8)] = u(0x0308), -- 2 ä + ["ᥱ"] = u(0x030C), + [u(0x030C)] = u(0x030C), + [u(0x02C7)] = u(0x030C), -- 3 ǎ + ["ᥲ"] = u(0x0300), + [u(0x0300)] = u(0x0300), + [u(0x0060)] = u(0x0300), + [u(0x02CB)] = u(0x0300), -- 4 à + ["ᥳ"] = u(0x0307), + [u(0x0307)] = u(0x0307), + [u(0x02D9)] = u(0x0307), -- 5 ȧ + ["ᥴ"] = u(0x0301), + [u(0x0301)] = u(0x0301), + [u(0x00B4)] = u(0x0301), + [u(0x02CA)] = u(0x0301), -- 1 á + [""] = "" -- 6 a +} + +local tone_key = "([ᥰ-ᥴ" .. u(0x0308) .. u(0x00A8) .. u(0x030C) .. u(0x02C7) .. u(0x0300) .. u(0x0060) .. u(0x02CB) .. u(0x0307) .. u(0x02D9) .. u(0x0301) .. u(0x00B4) .. u(0x02CA) .. "]?)" + +function export.tr(text, lang, sc, debug_mode) + + if type(text) == "table" then -- called directly from a template + text = text.args[1] + end + + text = gsub(text, "([ᥐ-ᥢ])([ᥐᥒᥖᥙᥛᥝᥢᥭ])", "%1a%2") + text = gsub(text, ".", tt) + + -- adds tone diacritic + for old in mw.text.gsplit(text, " ") do + new = gsub(old, "([aeiou])([a-z]*)" .. tone_key, function(v, x, t) return v .. tone_table[t] .. x end) + text = gsub(text, old, new, 1) + end + + return text + +end + +return export diff --git a/wiktra/wikt/translit/Tavt-translit.lua b/wiktra/wikt/translit/Tavt-translit.lua new file mode 100644 index 0000000..3e021d1 --- /dev/null +++ b/wiktra/wikt/translit/Tavt-translit.lua @@ -0,0 +1,99 @@ +local export = {} +local gsub = mw.ustring.gsub + +local tt = { + -- consonants + ["ꪀ"] = "k", + ["ꪁ"] = "ḵ", + ["ꪂ"] = "k̄h", + ["ꪃ"] = "kh", + ["ꪄ"] = "ḳ̄h", + ["ꪅ"] = "kʹh", + ["ꪆ"] = "?", + ["ꪇ"] = "?", + ["ꪈ"] = "h̄ng", + ["ꪉ"] = "ng", + ["ꪊ"] = "c", + ["ꪋ"] = "c̱", + ["ꪌ"] = "c̄h", + ["ꪍ"] = "ch", + ["ꪎ"] = "s̄", + ["ꪏ"] = "s", + ["ꪐ"] = "h̄ỵ", + ["ꪑ"] = "ỵ", + ["ꪒ"] = "d", + ["ꪓ"] = "ḏ", + ["ꪔ"] = "t", + ["ꪕ"] = "ṯ", + ["ꪖ"] = "t̄h", + ["ꪗ"] = "th", + ["ꪘ"] = "h̄n", + ["ꪙ"] = "n", + ["ꪚ"] = "b", + ["ꪛ"] = "ḇ", + ["ꪜ"] = "p", + ["ꪝ"] = "p̱", + ["ꪞ"] = "p̄h", + ["ꪟ"] = "ph", + ["ꪠ"] = "f̄", + ["ꪡ"] = "f", + ["ꪢ"] = "h̄m", + ["ꪣ"] = "m", + ["ꪤ"] = "h̄y", + ["ꪥ"] = "y", + ["ꪦ"] = "h̄r", + ["ꪧ"] = "r", + ["ꪨ"] = "h̄l", + ["ꪩ"] = "l", + ["ꪪ"] = "h̄w", + ["ꪫ"] = "w", + ["ꪬ"] = "h̄", + ["ꪭ"] = "ḥ", + ["ꪮ"] = "x", + ["ꪯ"] = "x̱", + -- vowels and finals (visual ordering) + ["ꪰ"] = "'", + ["ꪱ"] = "ā", + ["ꪲ"] = "l", + ["ꪳ"] = "ụ", + ["ꪴ"] = "ุ", + ["ꪵ"] = "æ", + ["ꪶ"] = "o", + ["ꪷ"] = "'", + ["ꪸ"] = "y", + ["ꪹ"] = "e", + ["ꪺ"] = "ัw", + ["ꪻ"] = "i", + ["ꪼ"] = "ị", + ["ꪽ"] = "ัn", + ["ꪾ"] = "ả", + -- tones + ["꪿"] = "'", + ["ꫀ"] = "1", + ["꫁"] = "'", + ["ꫂ"] = "2", + -- symbols + ["ꫛ"] = "ko̱n", + ["ꫜ"] = "nụ̀ng", + ["ꫝ"] = "«", + ["꫞"] = "§", + ["꫟"] = "»" +} + +function export.tr(text, lang, sc, debug_mode) + + if type(text) == "table" then -- called directly from a template + text = text.args[1] + end + + text = gsub(text, ".", tt) + + text = gsub(text, "([่้๋̱])([ัิีึืุู])", "%2%1") + text = gsub(text, "([^%s%p%z])bả", "%1ัb") + text = gsub(text, "(ả)([่้๋̱])", "%2%1") + + return text + +end + +return export diff --git a/wiktra/wikt/translit/Tfng-translit.lua b/wiktra/wikt/translit/Tfng-translit.lua new file mode 100644 index 0000000..974dfb7 --- /dev/null +++ b/wiktra/wikt/translit/Tfng-translit.lua @@ -0,0 +1,108 @@ +local export = {} + +local tt = {} + +tt["Tfng"] = { + ["common"] = { + ["ⴰ"] = "a", + ["ⴱ"] = "b", + ["ⴲ"] = "b", + ["ⴳ"] = "g", + ["ⴴ"] = "g", + ["ⴵ"] = "ǧ", + ["ⴶ"] = "ǧ", + ["ⴷ"] = "d", + ["ⴸ"] = "d", + ["ⴹ"] = "ḍ", + ["ⴺ"] = "ḍ", + ["ⴻ"] = "e", + ["ⴼ"] = "f", + ["ⴽ"] = "k", + ["ⴾ"] = "k", + ["ⴿ"] = "k", + ["ⵀ"] = "h", -- tmh, thv, taq, ttq, thz: "b" + ["ⵁ"] = "h", + ["ⵂ"] = "h", + ["ⵃ"] = "ḥ", + ["ⵄ"] = "ɛ", + ["ⵅ"] = "x", + ["ⵆ"] = "x", + ["ⵇ"] = "q", + ["ⵈ"] = "q", + ["ⵉ"] = "i", + ["ⵊ"] = "j", + ["ⵋ"] = "j", + ["ⵌ"] = "j", + ["ⵍ"] = "l", + ["ⵎ"] = "m", + ["ⵏ"] = "n", + ["ⵐ"] = "ny", + ["ⵑ"] = "ng", + ["ⵒ"] = "p", + ["ⵓ"] = "u", -- tmh, thv, taq, ttq, thz: "w" + ["ⵔ"] = "r", + ["ⵕ"] = "ṛ", + ["ⵖ"] = "ɣ", + ["ⵗ"] = "ɣ", + ["ⵘ"] = "j", -- thz: "ɣ" + ["ⵙ"] = "s", + ["ⵚ"] = "ṣ", + ["ⵛ"] = "š", + ["ⵜ"] = "t", + ["ⵝ"] = "t", + ["ⵞ"] = "č", + ["ⵟ"] = "ṭ", + ["ⵠ"] = "v", + ["ⵡ"] = "w", + ["ⵢ"] = "y", + ["ⵣ"] = "z", + ["ⵤ"] = "z", + ["ⵥ"] = "ẓ", + ["ⵦ"] = "e", + ["ⵧ"] = "o", + ["ⵯ"] = "ʷ", + ["⵰"] = ".", + ["⵿"] = "" + }, + ["tmh"] = {["ⵀ"] = "b", ["ⵓ"] = "w"}, + ["thv"] = {["ⵀ"] = "b", ["ⵓ"] = "w"}, + ["taq"] = {["ⵀ"] = "b", ["ⵓ"] = "w"}, + ["ttq"] = {["ⵀ"] = "b", ["ⵓ"] = "w"}, + ["thz"] = {["ⵀ"] = "b", ["ⵓ"] = "w", ["ⵘ"] = "ɣ"} +} + +tt["Latn"] = {["common"] = {["a"] = "ⴰ", ["ā"] = "ⴰ", ["b"] = "ⴱ", ["g"] = "ⴳ", ["d"] = "ⴷ", ["ḏ"] = "ⴷ", ["ḍ"] = "ⴹ", ["e"] = "ⴻ", ["f"] = "ⴼ", ["ǧ"] = "ⴵ", ["k"] = "ⴽ", ["h"] = "ⵀ", ["ḥ"] = "ⵃ", ["ɛ"] = "ⵄ", ["x"] = "ⵅ", ["q"] = "ⵇ", ["i"] = "ⵉ", ["j"] = "ⵊ", ["l"] = "ⵍ", ["m"] = "ⵎ", ["n"] = "ⵏ", ["p"] = "ⵒ", ["u"] = "ⵓ", ["r"] = "ⵔ", ["ṛ"] = "ⵕ", ["ɣ"] = "ⵖ", ["s"] = "ⵙ", ["ṣ"] = "ⵚ", ["š"] = "ⵛ", ["t"] = "ⵜ", ["ṯ"] = "ⵜ", ["č"] = "ⵞ", ["ṭ"] = "ⵟ", ["v"] = "ⵠ", ["w"] = "ⵡ", ["y"] = "ⵢ", ["z"] = "ⵣ", ["ẓ"] = "ⵥ", ["o"] = "ⵧ", ["ʷ"] = "ⵯ", ["."] = "⵰"}, ["tmh"] = {["b"] = "ⵀ", ["w"] = "ⵓ"}, ["thv"] = {["b"] = "ⵀ", ["w"] = "ⵓ"}, ["taq"] = {["b"] = "ⵀ", ["w"] = "ⵓ"}, ["ttq"] = {["b"] = "ⵀ", ["w"] = "ⵓ"}, ["thz"] = {["b"] = "ⵀ", ["w"] = "wⵓ", ["ɣ"] = "ⵘ"}} + +function export.tr_alt(text, lang, sc) + if not sc then sc = require("scripts").findBestScript(text, require("languages").getByCode(lang or "ber")):getCode() end + + if sc == "Latn" then + if tt[sc][lang] then text = mw.ustring.gsub(text, ".", tt[sc][lang]) end + text = mw.ustring.gsub(text, ".", tt[sc]["common"]) + elseif sc == "Arab" then + text = nil + elseif sc == "Tfng" then + text = nil + end + + return text +end + +function export.tr(text, lang, sc) + if not sc then sc = require("scripts").findBestScript(text, require("languages").getByCode(lang or "ber")):getCode() end + + if sc == "Arab" then + -- perhaps will be implemented in the future + text = nil + elseif sc == "Latn" then + -- no need to transliterate + text = nil + elseif sc == "Tfng" then + if tt[sc][lang] then text = mw.ustring.gsub(text, ".", tt[sc][lang]) end + text = mw.ustring.gsub(text, ".", tt[sc]["common"]) + end + + return text +end + +return export diff --git a/wiktra/wikt/translit/Tirh-Deva-translit.lua b/wiktra/wikt/translit/Tirh-Deva-translit.lua new file mode 100644 index 0000000..1753e54 --- /dev/null +++ b/wiktra/wikt/translit/Tirh-Deva-translit.lua @@ -0,0 +1,126 @@ +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + + -- consonants + ["𑒏"] = "क", + ["𑒐"] = "ख", + ["𑒑"] = "ग", + ["𑒒"] = "घ", + ["𑒓"] = "ङ", + ["𑒔"] = "च", + ["𑒕"] = "छ", + ["𑒖"] = "ज", + ["𑒗"] = "झ", + ["𑒘"] = "ञ", + ["𑒙"] = "ट", + ["𑒚"] = "ठ", + ["𑒛"] = "ड", + ["𑒜"] = "ढ", + ["𑒝"] = "ण", + ["𑒞"] = "त", + ["𑒟"] = "थ", + ["𑒠"] = "द", + ["𑒡"] = "ध", + ["𑒢"] = "न", + ["𑒣"] = "प", + ["𑒤"] = "फ", + ["𑒥"] = "ब", + ["𑒦"] = "भ", + ["𑒧"] = "म", + ["𑒨"] = "य", + ["𑒩"] = "र", + ["𑒪"] = "ल", + ["𑒫"] = "व", + ["𑒪𑓃"] = "ळ", + ["𑒬"] = "श", + ["𑒭"] = "ष", + ["𑒮"] = "स", + ["𑒯"] = "ह", + ["𑒛𑓃"] = "ड़", + ["𑒜𑓃"] = "ढ़", + ["𑓃"] = "़", + -- vowel diacritics + ["𑒱"] = "ि", + ["𑒳"] = "ु", + ["𑒹"] = "े", + ["𑒺"] = "ॆ", + ["𑒼"] = "ो", + ["𑒽"] = "ॊ", + ["𑒰"] = "ा", + ["𑒲"] = "ी", + ["𑒴"] = "ू", + ["𑒵"] = "ृ", + ["𑒶"] = "ॄ", + ["𑒻"] = "ै", + ["𑒾"] = "ौ", + ["𑒷"] = "ॢ", + ["𑒸"] = "ॣ", + ["ॏ"] = "ॏ", + ["ऺ"] = "ऺ", + ["ॅ"] = "ॅ", + -- vowels + ["𑒁"] = "अ", + ["𑒂"] = "आ", + ["𑒃"] = "इ", + ["𑒄"] = "ई", + ["𑒅"] = "उ", + ["𑒆"] = "ऊ", + ["𑒇"] = "ऋ", + ["𑒈"] = "ॠ", + ["𑒉"] = "ऌ", + ["𑒊"] = "ॡ", + ["𑒋"] = "ए", + ["𑒌"] = "ऐ", + ["𑒍"] = "ओ", + ["𑒎"] = "औ", + + ["ऎ"] = "ऎ", + ["ऒ"] = "ऒ", + ["व़"] = "व़", + ["य़"] = "य़", + ["ॵ"] = "ॵ", + ["ॴ"] = "ॴ", + ["ऍ"] = "ऍ", + ["इऺ"] = "इऺ", + ["उऺ"] = "उऺ", + ["२"] = "’", + ["ऽ"] = "ऽ", + -- chandrabindu + ["𑒿"] = "ँ", + -- anusvara + ["𑓀"] = "ं", + -- gvang + ["𑓅"] = "ँ", + -- visarga + ["𑓁"] = "ः", + -- virama + ["𑓂"] = "्", + -- om + ["𑓇"] = "ॐ", + -- numerals + ["𑓐"] = "०", + ["𑓑"] = "१", + ["𑓒"] = "२", + ["𑓓"] = "३", + ["𑓔"] = "४", + ["𑓕"] = "५", + ["𑓖"] = "६", + ["𑓗"] = "७", + ["𑓘"] = "८", + ["𑓙"] = "९", + -- punctuation + ["।"] = "।", -- danda + ["॥"] = "॥", -- double danda + ["+"] = "+", -- compound separator + -- abbreviation sign + ["॰"] = "॰" +} +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".", function(c) return conv[c] end) + return text +end + +return export diff --git a/wiktra/wikt/translit/Unicode data.lua b/wiktra/wikt/translit/Unicode data.lua new file mode 100644 index 0000000..0a63549 --- /dev/null +++ b/wiktra/wikt/translit/Unicode data.lua @@ -0,0 +1,320 @@ +local export = {} + +local floor = math.floor + +local function errorf(first_arg, ...) + if type(first_arg) == "number" then + return error(string.format(...), first_arg + 1) + else + return error(string.format(first_arg, ...), 2) + end +end + +local function binary_range_search(codepoint, ranges) + local low, mid, high + low, high = 1, ranges.length or require"Module:table".length(ranges) + while low <= high do + mid = floor((low + high) / 2) + local range = ranges[mid] + if codepoint < range[1] then + high = mid - 1 + elseif codepoint <= range[2] then + return range, mid + else + low = mid + 1 + end + end + return nil, mid +end +export.binary_range_search = binary_range_search + +local function linear_range_search(codepoint, ranges) + for i, range in ipairs(ranges) do + if codepoint < range[1] then + break + elseif codepoint <= range[2] then + return range + end + end +end + +-- Load a module by indexing "loader" with the name of the module minus the +-- "Module:Unicode data/" part. For instance, loader.blocks returns +-- [[Module:Unicode data/blocks]]. If a module cannot be loaded, false will be +-- returned. +local loader = setmetatable({}, { + __index = function(self, key) + local success, data = pcall(mw.loadData, "Module:Unicode data/" .. key) + if not success then data = false end + self[key] = data + return data + end +}) + +-- For the algorithm used to generate Hangul Syllable names, +-- see "Hangul Syllable Name Generation" in section 3.12 of the +-- Unicode Specification: +-- https://www.unicode.org/versions/latest/ch03.pdf +-- For most of the name rules given here, see the subsection +-- "Unicode Name Property" in section 4.8 (Name) and the table 4-8 +-- (Name Derivation Rule Prefix Strings): +-- https://www.unicode.org/versions/latest/ch04.pdf +local name_hooks = { + {0x00, 0x1F, ""}, -- C0 control characters + {0x7F, 0x9F, ""}, -- DEL and C1 control characters + {0x3400, 0x4DBF, "CJK UNIFIED IDEOGRAPH-%04X"}, -- CJK Ideograph Extension A + {0x4E00, 0x9FFC, "CJK UNIFIED IDEOGRAPH-%04X"}, -- CJK Ideograph + { + 0xAC00, 0xD7A3, function(codepoint) -- Hangul Syllables + local Hangul_data = loader.Hangul + local syllable_index = codepoint - 0xAC00 + + return ("HANGUL SYLLABLE %s%s%s"):format(Hangul_data.leads[floor(syllable_index / Hangul_data.final_count)], Hangul_data.vowels[floor((syllable_index % Hangul_data.final_count) / Hangul_data.trail_count)], Hangul_data.trails[syllable_index % Hangul_data.trail_count]) + end + }, -- High Surrogates, High Private Use Surrogates, Low Surrogates + {0xD800, 0xDFFF, ""}, {0xE000, 0xF8FF, ""}, -- Private Use + -- CJK Compatibility Ideographs + {0xF900, 0xFA6D, "CJK COMPATIBILITY IDEOGRAPH-%04X"}, {0xFA70, 0xFAD9, "CJK COMPATIBILITY IDEOGRAPH-%04X"}, {0x17000, 0x187F7, "TANGUT IDEOGRAPH-%04X"}, -- Tangut + {0x18800, 0x18AFF, function(codepoint) return ("TANGUT COMPONENT-%03d"):format(codepoint - 0x187FF) end}, {0x18D00, 0x18D08, "TANGUT IDEOGRAPH-%04X"}, -- Tangut + {0x18B00, 0x18CD5, "KHITAN SMALL SCRIPT CHARACTER-%04X"}, {0x1B170, 0x1B2FB, "NUSHU CHARACTER-%04X"}, -- Nushu + {0x20000, 0x2A6DD, "CJK UNIFIED IDEOGRAPH-%04X"}, -- CJK Ideograph Extension B + {0x2A700, 0x2B734, "CJK UNIFIED IDEOGRAPH-%04X"}, -- CJK Ideograph Extension C + {0x2A740, 0x2B81D, "CJK UNIFIED IDEOGRAPH-%04X"}, -- CJK Ideograph Extension D + {0x2B820, 0x2CEA1, "CJK UNIFIED IDEOGRAPH-%04X"}, -- CJK Ideograph Extension E + {0x2CEB0, 0x2EBE0, "CJK UNIFIED IDEOGRAPH-%04X"}, -- CJK Ideograph Extension F + -- CJK Compatibility Ideographs Supplement (Supplementary Ideographic Plane) + {0x2F800, 0x2FA1D, "CJK COMPATIBILITY IDEOGRAPH-%04X"}, {0x30000, 0x3134A, "CJK UNIFIED IDEOGRAPH-%04X"}, -- CJK Ideograph Extension G + { + 0xE0100, 0xE01EF, function(codepoint) -- Variation Selectors Supplement + return ("VARIATION SELECTOR-%d"):format(codepoint - 0xE0100 + 17) + end + }, {0xF0000, 0xFFFFD, ""}, -- Plane 15 Private Use + {0x100000, 0x10FFFD, ""} -- Plane 16 Private Use +} +name_hooks.length = #name_hooks + +local name_range_cache + +local function generate_name(data, codepoint) + if type(data) == "string" then + return data:format(codepoint) + else + return data(codepoint) + end +end + +--[[ +-- Checks that the code point is a number and in range. +-- Does not check whether code point is an integer. +-- Not used +local function check_codepoint(funcName, argIdx, val) + require 'libraryUtil'.checkType(funcName, argIdx, val, 'number') + if codepoint < 0 or 0x10FFFF < codepoint then + errorf("Codepoint %04X out of range", codepoint) + end +end +--]] + +-- https://www.unicode.org/versions/latest/ch04.pdf, section 4.8 +function export.lookup_name(codepoint) + -- U+FDD0-U+FDEF and all code points ending in FFFE or FFFF are Unassigned + -- (Cn) and specifically noncharacters: + -- https://www.unicode.org/faq/private_use.html#nonchar4 + if 0xFDD0 <= codepoint and (codepoint <= 0xFDEF or floor(codepoint % 0x10000) >= 0xFFFE) then return (""):format(codepoint) end + + if name_range_cache -- Check if previously used "name hook" applies to this code point. + and codepoint >= name_range_cache[1] and codepoint <= name_range_cache[2] then return generate_name(name_range_cache[3], codepoint) end + + local range = binary_range_search(codepoint, name_hooks) + if range then + name_range_cache = range + return generate_name(range[3], codepoint) + end + + local data = loader[("names/%03X"):format(codepoint / 0x1000)] + + if data and data[codepoint] then + return data[codepoint] + + -- Unassigned (Cn) consists of noncharacters and reserved characters. + -- The character has been established not to be a noncharacter, + -- and if it were assigned, its name would already been retrieved, + -- so it must be reserved. + else + return (""):format(codepoint) + end +end + +function export.lookup_image(codepoint) + local data = loader[("images/%03X"):format(codepoint / 0x1000)] + + if data then return data[codepoint] end +end + +local planes = {[0] = "Basic Multilingual Plane", [1] = "Supplementary Multilingual Plane", [2] = "Supplementary Ideographic Plane", [3] = "Tertiary Ideographic Plane", [14] = "Supplementary Special-purpose Plane", [15] = "Supplementary Private Use Area-A", [16] = "Supplementary Private Use Area-B"} + +-- Load [[Module:Unicode data/blocks]] if needed and assign it to this variable. +local blocks + +local function block_iter(blocks, i) + i = i + 1 + local data = blocks[i] + if data then + -- Unpack doesn't work on tables loaded with mw.loadData. + return i, data[3], data[1], data[2] + end +end + +-- An ipairs-type iterator generator for the list of blocks. +function export.enum_blocks() + local blocks = loader.blocks + return block_iter, blocks, 0 +end + +function export.get_block_range(name) + local range + + for i, block in ipairs(loader.blocks) do if block[3] == name then range = block end end + + if range then return range[1], range[2] end +end + +function export.lookup_plane(codepoint) + local i = floor(codepoint / 0x10000) + return planes[i] or ("Plane %u"):format(i) +end + +function export.lookup_block(codepoint) + local blocks = loader.blocks + local range = binary_range_search(codepoint, blocks) + if range then + return range[3] + else + return "No Block" + end +end + +function export.get_block_info(name) for i, block in ipairs(loader.blocks) do if block[3] == name then return block end end end + +function export.is_valid_pagename(pagename) + local has_nonws = false + + for cp in mw.ustring.gcodepoint(pagename) do + if (cp == 0x0023) -- # + or (cp == 0x005B) -- [ + or (cp == 0x005D) -- ] + or (cp == 0x007B) -- { + or (cp == 0x007C) -- | + or (cp == 0x007D) -- } + or (cp == 0x180E) -- MONGOLIAN VOWEL SEPARATOR + or ((cp >= 0x2000) and (cp <= 0x200A)) -- spaces in General Punctuation block + or (cp == 0xFFFD) -- REPLACEMENT CHARACTER + then return false end + + local printable, result = export.is_printable(cp) + if not printable then return false end + + if result ~= "space-separator" then has_nonws = true end + end + + return has_nonws +end + +local function manual_unpack(what, from) + if what[from + 1] == nil then return what[from] end + + local result = {} + from = from or 1 + for i, item in ipairs(what) do if i >= from then table.insert(result, item) end end + return unpack(result) +end + +local function compare_ranges(range1, range2) return range1[1] < range2[1] end + +-- Creates a function to look up data in a module that contains "singles" (a +-- code point-to-data map) and "ranges" (an array containing arrays that contain +-- the low and high code points of a range and the data associated with that +-- range). +-- "loader" loads and returns the "singles" and "ranges" tables. +-- "match_func" is passed the code point and either the data or the "dots", and +-- generates the final result of the function. +-- The varargs ("dots") describes the default data to be returned if there wasn't +-- a match. +-- In case the function is used more than once, "cache" saves ranges that have +-- already been found to match, or a range whose data is the default if there +-- was no match. +local function memo_lookup(data_module_subpage, match_func, ...) + local dots = {...} + local cache = {} + local singles, ranges + + return function(codepoint) + if not singles then + local data_module = loader[data_module_subpage] + singles, ranges = data_module.singles, data_module.ranges + end + + if singles[codepoint] then return match_func(codepoint, singles[codepoint]) end + + local range = binary_range_search(codepoint, cache) + if range then return match_func(codepoint, manual_unpack(range, 3)) end + + local range, index = binary_range_search(codepoint, ranges) + if range then + table.insert(cache, range) + table.sort(cache, compare_ranges) + return match_func(codepoint, manual_unpack(range, 3)) + end + + if ranges[index] then + local dots_range + if codepoint > ranges[index][2] then + dots_range = {ranges[index][2] + 1, ranges[index + 1] and ranges[index + 1][1] - 1 or 0x10FFFF, unpack(dots)} + else -- codepoint < range[index][1] + dots_range = {ranges[index - 1] and ranges[index - 1][2] + 1 or 0, ranges[index][1] - 1, unpack(dots)} + end + table.sort(cache, compare_ranges) + end + + return match_func(codepoint, unpack(dots)) + end +end + +-- Get a code point's combining class value in [[Module:Unicode data/combining]], +-- and return whether this value is not zero. Zero is assigned as the default +-- if the combining class value is not found in this data module. +-- That is, return true if character is combining, or false if it is not. +-- See https://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values for +-- more information. +export.is_combining = memo_lookup("combining", function(codepoint, combining_class) return combining_class and combining_class ~= 0 or false end, 0) + +function export.add_dotted_circle(str) return (mw.ustring.gsub(str, ".", function(char) if export.is_combining(mw.ustring.codepoint(char)) then return "◌" .. char end end)) end + +local lookup_control = memo_lookup("control", function(codepoint, ccc) return ccc or "assigned" end, "assigned") +export.lookup_control = lookup_control + +function export.is_assigned(codepoint) return lookup_control(codepoint) ~= "unassigned" end + +function export.is_printable(codepoint) + local result = lookup_control(codepoint) + return (result == "assigned") or (result == "space-separator"), result +end + +function export.is_whitespace(codepoint) + local result = lookup_control(codepoint) + return (result == "space-separator"), result +end + +export.lookup_category = memo_lookup("category", function(codepoint, category) return category end, "Cn") + +export.lookup_script = memo_lookup("scripts", function(codepoint, script) return script end, "Zzzz") + +local unsupported_title = {[0x0020] = "Unsupported titles/Space", [0x0023] = "Unsupported titles/Number sign", [0x002E] = "Unsupported titles/Full stop", [0x003A] = "Unsupported titles/Colon", [0x003C] = "Unsupported titles/Less than", [0x003E] = "Unsupported titles/Greater than", [0x005B] = "Unsupported titles/Left square bracket", [0x005D] = "Unsupported titles/Right square bracket", [0x005F] = "Unsupported titles/Low line", [0x007B] = "Unsupported titles/Left curly bracket", [0x007C] = "Unsupported titles/Vertical line", [0x007D] = "Unsupported titles/Right curly bracket", [0x1680] = "Unsupported titles/Ogham space", [0xFFFD] = "Unsupported titles/Replacement character"} + +function export.get_entry_title(codepoint) + if unsupported_title[codepoint] then return unsupported_title[codepoint] end + if lookup_control(codepoint) ~= "assigned" then return nil end + return mw.ustring.char(codepoint) +end + +return export diff --git a/wiktra/wikt/translit/UnitTests.lua b/wiktra/wikt/translit/UnitTests.lua new file mode 100644 index 0000000..a82de86 --- /dev/null +++ b/wiktra/wikt/translit/UnitTests.lua @@ -0,0 +1,309 @@ +local UnitTester = {} + +local ustring = mw.ustring +local is_combining = require("Unicode data").is_combining +local UTF8_char = "[\1-\127\194-\244][\128-\191]*" + +local sorted_pairs = require("table").sortedPairs +local Array = require("array") + +local tick, cross = "[[File:Yes check.svg|20px|alt=Passed|link=|Test passed]]", "[[File:X mark.svg|20px|alt=Failed|link=|Test failed]]" + +local result_table_header = "{| class=\"unit-tests wikitable\"\n! class=\"unit-tests-img-corner\" style=\"cursor:pointer\" title=\"Only failed tests\"| !! Text !! Expected !! Actual" + +local function iter_UTF8(str) return string.gmatch(str, UTF8_char) end + +-- Skips over bytes that are not used by UTF-8, and will count overlong encodings. +local function len(str) + local _, length = string.gsub(str, UTF8_char, "") + return length +end + +local function first_difference(s1, s2) + if type(s1) ~= "string" or type(s2) ~= "string" then return "N/A" end + if s1 == s2 then return "" end + local next_char1, next_char2 = iter_UTF8(s1), iter_UTF8(s2) + local max = math.min(len(s1), len(s2)) + for i = 1, max do + local c1, c2 = next_char1(), next_char2() + if c1 ~= c2 then return i end + end + return max + 1 +end + +local function highlight(str) + if ustring.find(str, "%s") then + return "" .. string.gsub(str, " ", " ") .. "" + else + return "" .. str .. "" + end +end + +local function find_noncombining(str, i, incr) + local char = ustring.sub(str, i, i) + while char ~= "" and is_combining(ustring.codepoint(char)) do + i = i + incr + char = ustring.sub(str, i, i) + end + return i +end + +-- Highlight character where a difference was found. Start highlight at first +-- non-combining character before the position. End it after the first non- +-- combining characters after the position. Can specify a custom highlighing +-- function. +local function highlight_difference(actual, expected, differs_at, func) + if type(differs_at) ~= "number" or not (actual and expected) then return actual end + differs_at = find_noncombining(expected, differs_at, -1) + local i = find_noncombining(actual, differs_at, -1) + local j = find_noncombining(actual, differs_at + 1, 1) + j = j - 1 + return ustring.sub(actual, 1, i - 1) .. (type(func) == "function" and func or highlight)(ustring.sub(actual, i, j)) .. ustring.sub(actual, j + 1, -1) +end + +local function val_to_str(v) + if type(v) == "string" then + v = string.gsub(v, "\n", "\\n") + if string.find(string.gsub(v, "[^'\"]", ""), "^\"+$") then return "'" .. v .. "'" end + return "\"" .. string.gsub(v, "\"", "\\\"") .. "\"" + elseif type(v) == "table" then + local result, done = Array(), {} + for k, val in ipairs(v) do + result:insert(val_to_str(val)) + done[k] = true + end + for k, val in sorted_pairs(v) do + if not done[k] then + if (type(k) ~= "string") or not string.find(k, "^[_%a][_%a%d]*$") then k = "[" .. val_to_str(k) .. "]" end + result:insert(k .. "=" .. val_to_str(val)) + end + end + return "{" .. result:concat(", ") .. "}" + else + return tostring(v) + end +end + +local function deep_compare(t1, t2, ignore_mt) + local ty1, ty2 = type(t1), type(t2) + if ty1 ~= ty2 then + return false + elseif ty1 ~= "table" then + return t1 == t2 + end + + local mt = getmetatable(t1) + if not ignore_mt and mt and mt.__eq then return t1 == t2 end + + for k1, v1 in pairs(t1) do + local v2 = t2[k1] + if v2 == nil or not deep_compare(v1, v2) then return false end + end + for k2, v2 in pairs(t2) do + local v1 = t1[k2] + if v1 == nil or not deep_compare(v1, v2) then return false end + end + + return true +end + +function UnitTester:preprocess_equals(text, expected, options) + local actual = self.frame:preprocess(text) + if actual == expected then + self.result_table:insert("|- class=\"unit-test-pass\"\n | " .. tick) + else + self.result_table:insert("|- class=\"unit-test-fail\"\n | " .. cross) + self.num_failures = self.num_failures + 1 + end + local differs_at = self.differs_at and (" || " .. first_difference(expected, actual)) or "" + local comment = self.comments and (" || " .. (options and options.comment or "")) or "" + actual = tostring(actual) + expected = tostring(expected) + if self.nowiki or options and options.nowiki then + expected = mw.text.nowiki(expected) + actual = mw.text.nowiki(actual) + end + self.result_table:insert(" || " .. mw.text.nowiki(text) .. " || " .. expected .. " || " .. actual .. differs_at .. comment .. "\n") + self.total_tests = self.total_tests + 1 +end + +function UnitTester:preprocess_equals_many(prefix, suffix, cases, options) for _, case in ipairs(cases) do self:preprocess_equals(prefix .. case[1] .. suffix, case[2], options) end end + +function UnitTester:preprocess_equals_preprocess(text1, text2, options) + local actual = self.frame:preprocess(text1) + local expected = self.frame:preprocess(text2) + if actual == expected then + self.result_table:insert("|- class=\"unit-test-pass\"\n | " .. tick) + else + self.result_table:insert("|- class=\"unit-test-fail\"\n | " .. cross) + self.num_failures = self.num_failures + 1 + end + if self.nowiki or options and options.nowiki then + expected = mw.text.nowiki(expected) + actual = mw.text.nowiki(actual) + end + local differs_at = self.differs_at and (" || " .. first_difference(expected, actual)) or "" + local comment = self.comments and (" || " .. (options and options.comment or "")) or "" + self.result_table:insert(" || " .. mw.text.nowiki(text1) .. " || " .. expected .. " || " .. actual .. differs_at .. comment .. "\n") + self.total_tests = self.total_tests + 1 +end + +function UnitTester:preprocess_equals_preprocess_many(prefix1, suffix1, prefix2, suffix2, cases, options) for _, case in ipairs(cases) do self:preprocess_equals_preprocess(prefix1 .. case[1] .. suffix1, prefix2 .. (case[2] and case[2] or case[1]) .. suffix2, options) end end + +function UnitTester:equals(name, actual, expected, options) + if actual == expected then + self.result_table:insert("|- class=\"unit-test-pass\"\n | " .. tick) + else + self.result_table:insert("|- class=\"unit-test-fail\"\n | " .. cross) + self.num_failures = self.num_failures + 1 + end + local difference = first_difference(expected, actual) + if options and options.show_difference and type(difference) == "number" then actual = highlight_difference(actual, expected, difference, type(options.show_difference) == "function" and options.show_difference) end + local differs_at = self.differs_at and (" || " .. difference) or "" + local comment = self.comments and (" || " .. (options and options.comment or "")) or "" + if expected == nil then + expected = "(nil)" + else + expected = tostring(expected) + end + if actual == nil then + actual = "(nil)" + else + actual = tostring(actual) + end + if self.nowiki or options and options.nowiki then + expected = mw.text.nowiki(expected) + actual = mw.text.nowiki(actual) + end + + if options and type(options.display) == "function" then + expected = options.display(expected) + actual = options.display(actual) + end + + self.result_table:insert(" || " .. name .. " || " .. expected .. " || " .. actual .. differs_at .. comment .. "\n") + self.total_tests = self.total_tests + 1 +end + +function UnitTester:equals_deep(name, actual, expected, options) + if deep_compare(actual, expected) then + self.result_table:insert("|- class=\"unit-test-pass\"\n | " .. tick) + else + self.result_table:insert("|- class=\"unit-test-fail\"\n | " .. cross) + self.num_failures = self.num_failures + 1 + end + local actual_str = val_to_str(actual) + local expected_str = val_to_str(expected) + + if self.nowiki or options and options.nowiki then + expected_str = mw.text.nowiki(expected_str) + actual_str = mw.text.nowiki(actual_str) + end + + if options and type(options.display) == "function" then + expected_str = options.display(expected_str) + actual_str = options.display(actual_str) + end + + local differs_at = self.differs_at and (" || " .. first_difference(expected_str, actual_str)) or "" + local comment = self.comments and (" || " .. (options and options.comment or "")) or "" + self.result_table:insert(" || " .. name .. " || " .. expected_str .. " || " .. actual_str .. differs_at .. comment .. "\n") + self.total_tests = self.total_tests + 1 +end + +function UnitTester:iterate(examples, func) + require"libraryUtil".checkType("iterate", 1, examples, "table") + if type(func) == "string" then + func = self[func] + elseif type(func) ~= "function" then + error(("bad argument #2 to 'iterate' (expected function or string, got %s)"):format(type(func)), 2) + end + + for i, example in ipairs(examples) do + if type(example) == "table" then + func(self, unpack(example)) + elseif type(example) == "string" then + self:heading(example) + else + error(("bad example #%d (expected table or string, got %s)"):format(i, type(example)), 2) + end + end +end + +function UnitTester:heading(text) self.result_table:insert((" |-\n ! colspan=\"%u\" style=\"text-align: left\" | %s\n"):format(self.columns, text)) end + +function UnitTester:run(frame) + self.num_failures = 0 + + local output = Array() + + self.frame = frame + self.nowiki = frame.args["nowiki"] + self.differs_at = frame.args["differs_at"] + self.comments = frame.args["comments"] + self.summarize = frame.args["summarize"] + self.total_tests = 0 + self.result_table = Array() + + self.columns = 4 + local table_header = result_table_header + if self.differs_at then + self.columns = self.columns + 1 + table_header = table_header .. " !! Differs at" + end + if self.comments then + self.columns = self.columns + 1 + table_header = table_header .. " !! Comments" + end + + -- Sort results into alphabetical order. + local self_sorted = Array() + for key, value in pairs(self) do if key:find("^test") then self_sorted:insert(key) end end + self_sorted:sort() + + -- Add results to the results table. + for _, key in ipairs(self_sorted) do + self.result_table:insert(table_header .. "\n") + self.result_table:insert("|+ style=\"text-align: left; font-weight: bold;\" | " .. key .. ":\n|-\n") + local traceback = "(no traceback)" + local success, mesg = xpcall(function() return self[key](self) end, function(mesg) + traceback = debug.traceback("", 2) + return mesg + end) + if not success then + self.result_table:insert((" |-\n | colspan=\"%u\" style=\"text-align: left\" | Script error during testing: %s%s\n"):format(self.columns, mw.text.nowiki(mesg), frame:extensionTag("pre", traceback))) + self.num_failures = self.num_failures + 1 + end + self.result_table:insert("|}") + output:insert(self.result_table:concat()) + self.result_table = Array() + end + + local refresh_link = tostring(mw.uri.fullUrl(mw.title.getCurrentTitle().fullText, "action=purge&forcelinkupdate")) + + local failure_cat = "[[Category:Failing module unit tests]]" + if mw.title.getCurrentTitle().text:find("/documentation$") then failure_cat = "" end + + local num_successes = self.total_tests - self.num_failures + + if (self.summarize) then + if (self.num_failures == 0) then + return "" .. self.total_tests .. "/" .. self.total_tests .. " tests passed" + else + return "" .. num_successes .. "/" .. self.total_tests .. " tests passed" + end + else + return (self.num_failures == 0 and "All tests passed." or "" .. self.num_failures .. " test" .. (self.num_failures == 1 and "" or "s") .. " failed." .. failure_cat) .. " [" .. refresh_link .. " (refresh)]\n\n" .. output:concat("\n\n") + end +end + +function UnitTester:new() + local o = {} + setmetatable(o, self) + self.__index = self + return o +end + +local p = UnitTester:new() +function p.run_tests(frame) return p:run(frame) end +return p diff --git a/wiktra/wikt/translit/VL-translit.lua b/wiktra/wikt/translit/VL-translit.lua new file mode 100644 index 0000000..8a2e004 --- /dev/null +++ b/wiktra/wikt/translit/VL-translit.lua @@ -0,0 +1,288 @@ +local la_utils = require("la-utilities") + +local export = {} + +local vowel_patterns = {} + +vowel_patterns["all"] = {{"ā", "a"}, {"ae", "ę"}, {"áé", "ę́"}, {"e", "ę"}, {"o", "ǫ"}} + +vowel_patterns["It-W"] = {{"ē", "ẹ"}, {"i", "ẹ"}, {"ī", "i"}, {"ō", "ọ"}, {"u", "ọ"}, {"ū", "u"}} + +vowel_patterns["Italo-Western"] = vowel_patterns["It-W"] +vowel_patterns["Western"] = vowel_patterns["It-W"] + +vowel_patterns["E"] = {{"ē", "ẹ"}, {"i", "ẹ"}, {"ī", "i"}, {"ō", "o"}, {"ū", "u"}} + +vowel_patterns["Eastern"] = vowel_patterns["E"] +vowel_patterns["Romanian"] = vowel_patterns["E"] + +vowel_patterns["S"] = {{"ē", "e"}, {"ẹ", "e"}, {"ī", "i"}, {"ō", "o"}, {"ọ", "o"}, {"ū", "u"}} + +vowel_patterns["Sardinian"] = vowel_patterns["S"] + +local dictionary = {["a"] = "a", ["e"] = "e", ["i"] = "i", ["o"] = "o", ["u"] = "u", ["ā"] = "ā", ["ē"] = "ē", ["ī"] = "ī", ["ō"] = "ō", ["ū"] = "ū", ["ae"] = "ae", ["oe"] = "ē", ["ai"] = "aị", ["ei"] = "ėị", ["au"] = "aụ", ["eu"] = "ėụ", ["b"] = "b", ["d"] = "d", ["f"] = "f", ["c"] = "c", ["g"] = "g", ["v"] = "v", ["x"] = "x", ["qu"] = "qŭ", ["'"] = "'"} + +local vowels = {"a", "e", "i", "o", "u", "ā", "ē", "ī", "ō", "ū", "ae", "oe", "aị", "ėị", "aụ", "ėụ", "-"} + +local onsets = {"b", "p", "d", "t", "g", "c", "cu", "qŭ", "f", "s", "z", "l", "m", "n", "r", "j", "v", "w", "bl", "pl", "br", "pr", "ps", "dr", "tr", "gl", "cl", "gr", "cr", "fl", "fr", "sp", "st", "sc", "scu", "sl", "sm", "sn", "su", "spr", "str", "scr", "spl", "scl"} + +local codas = {"b", "p", "d", "t", "g", "c", "f", "s", "z", "l", "m", "n", "r", "j", "sp", "st", "sc", "lp", "lt", "lc", "lb", "ld", "lg", "lf", "rp", "rt", "rc", "rb", "rd", "rg", "rf", "mp", "nt", "nc", "mb", "nd", "ng", "lm", "rl", "rm", "rn", "ps", "ts", "cs", "x", "ls", "ns", "rs", "lcs", "ncs", "rcs", "lms", "rls", "rms", "rns"} + +for i, val in ipairs(vowels) do vowels[val] = true end + +for i, val in ipairs(onsets) do onsets[val] = true end + +for i, val in ipairs(codas) do codas[val] = true end + +-- These phonetic rules apply to the whole word, not just a syllable +local word_rules_start = { + {"h", ""}, {"k", "c"}, -- {"w", "v"}, + {"y", "i"}, {"ȳ", "ī"}, {"x('?)s", "x%1"}, {"x('?)([ct])", "s%1%2"}, {"b([st])", "p%1"}, {"d([st])", "t%1"}, {"g([st])", "c%1"}, {"n([bp])", "m%1"}, -- {"qu", "qv"}, + -- {"ngu([aeiouāēīōū])", "ngv%1"}, + {"([aeiouāēīōū])('?)b([aeiouāēīōū])", "%1%2v%3"}, -- This is not a copypaste error, the pattern needs to run twice to catch the edge case of -ababa- + -- (common in the VL conditional) + {"([aeiouāēīōū])('?)b([aeiouāēīōū])", "%1%2v%3"}, {"um$", "u"} +} + +local stress_shift_rules = {["qu"] = "'qu", ["ngu"] = "n'gu", ["gu"] = "'gu", ["v"] = "'v", ["bl"] = "'bl", ["pl"] = "'pl", ["br"] = "'br", ["pr"] = "'pr", ["dr"] = "'dr", ["tr"] = "'tr", ["gl"] = "'gl", ["cl"] = "'cl", ["gr"] = "'gr", ["cr"] = "'cr", ["fl"] = "'fl", ["fr"] = "'fr", ["ct"] = "c't", ["pt"] = "p't", ["gd"] = "g'd", ["sl"] = "s'l", ["sm"] = "s'm", ["sn"] = "s'n", ["su"] = "s'u", ["st"] = "s't", ["xt"] = "x't", ["spr"] = "s'pr", ["str"] = "s'tr", ["scr"] = "s'cr", ["spl"] = "s'pl", ["scl"] = "s'cl", ["nct"] = "nc't"} + +local word_rules_end = { + {"^ĭ", "j"}, -- {"g([ei])", "j%1"}, + -- {"dĭ", "j"}, + -- {"gĭ", "j"}, + -- {"z", "j"}, + {"ė", "e"}, {"ị", "i"}, {"ụ", "u"}, {"ĭ", "i"}, {"ŭ", "u"}, {"ei", "i"}, {"ii", "i"}, {"ee$", "ie"}, {"([aẹęeọǫou])(́?)e$", "%1%2i"} +} + +local nasalized = {} + +nasalized["It-W"] = {{"[ẹęeēi](́?)[nm]", "ẽ%1"}, {"[ī](́?)[nm]", "ĩ%1"}, {"[ū](́?)[nm]", "ũ%1"}, {"[ọǫoōu](́?)[nm]", "õ%1"}} + +nasalized["E"] = {{"[ẹęeēi](́?)[nm]", "ẽ%1"}, {"[ī](́?)[nm]", "ĩ%1"}, {"[uū](́?)[nm]", "ũ%1"}, {"[ọoō](́?)[nm]", "õ%1"}} + +nasalized["S"] = {{"[eē](́?)[nm]", "ẽ%1"}, {"[iī](́?)[nm]", "ĩ%1"}, {"[uū](́?)[nm]", "ũ%1"}, {"[oō](́?)[nm]", "õ%1"}} + +local function nasalize_vowels(word, family) + word = mw.ustring.gsub(word, "[aā](́?)[nm]$", "ã%1") + word = mw.ustring.gsub(word, "[aā][nm](́?)s", "ã%1s") + for _, rule in ipairs(nasalized[family]) do + word = mw.ustring.gsub(word, rule[1] .. "$", rule[2]) + word = mw.ustring.gsub(word, rule[1] .. "(́?)s", rule[2] .. "%2s") + end + return word +end + +local function segment_word(word) + local phonemes = {} + + while mw.ustring.len(word) > 0 do + local longestmatch = "" + + for letter, ipa in pairs(dictionary) do if mw.ustring.len(letter) > mw.ustring.len(longestmatch) and mw.ustring.sub(word, 1, mw.ustring.len(letter)) == letter then longestmatch = letter end end + + if mw.ustring.len(longestmatch) > 0 then + table.insert(phonemes, dictionary[longestmatch]) + word = mw.ustring.sub(word, mw.ustring.len(longestmatch) + 1) + else + table.insert(phonemes, mw.ustring.sub(word, 1, 1)) + word = mw.ustring.sub(word, 2) + end + end + + return phonemes +end + +local function get_onset(syll) + local consonants = {} + + for i = 1, #syll do + if vowels[syll[i]] then break end + if syll[i] ~= "'" then table.insert(consonants, syll[i]) end + end + + return table.concat(consonants) +end + +local function get_coda(syll) + local consonants = {} + + for i = #syll, 1, -1 do + if vowels[syll[i]] then break end + + table.insert(consonants, 1, syll[i]) + end + + return table.concat(consonants) +end + +local function get_vowel(syll) for i = 1, #syll do if vowels[syll[i]] then return syll[i] end end end + +local function split_syllables(remainder) + local syllables = {} + local syll = {} + + while #remainder > 0 do + local phoneme = table.remove(remainder, 1) + + if phoneme == "'" then + if #syll > 0 then table.insert(syllables, syll) end + syll = {"'"} + elseif vowels[phoneme] then + table.insert(syll, phoneme) + table.insert(syllables, syll) + syll = {} + else + table.insert(syll, phoneme) + end + end + + -- If there are phonemes left, then the word ends in a consonant + -- Add them to the last syllable + for _, phoneme in ipairs(syll) do table.insert(syllables[#syllables], phoneme) end + + -- Split consonant clusters between syllables + for i, current in ipairs(syllables) do + if i > 1 then + local previous = syllables[i - 1] + local onset = get_onset(current) + -- Shift over consonants until the syllable onset is valid + while not (onset == "" or onsets[onset]) do + table.insert(previous, table.remove(current, 1)) + onset = get_onset(current) + end + + -- If the preceding syllable still ends with a vowel, and the current one begins with s + another consonant, or with gn, then shift it over + if get_coda(previous) == "" and ((current[1] == "s" and not vowels[current[2]]) or (current[1] == "g" and current[2] == "n")) then table.insert(previous, table.remove(current, 1)) end + + -- If there is no vowel at all in this syllable + if not get_vowel(current) then + for j = 1, #current do table.insert(syllables[i - 1], table.remove(current, 1)) end + table.remove(syllables, i) + end + + end + end + + for i, syll in ipairs(syllables) do + local onset = get_onset(syll) + local coda = get_coda(syll) + + if not (onset == "" or onsets[onset]) then + require("debug").track("VL-noun/bad onset") + error("onset error:[" .. onset .. "]") + end + + if not (coda == "" or codas[coda]) then + require("debug").track("VL-noun/bad coda") + error("coda error:[" .. coda .. "]") + end + end + + return syllables +end + +local function detect_accent(syllables) + -- Manual override + for i = 1, #syllables do + for j = 1, #syllables[i] do + if syllables[i][j] == "'" then + table.remove(syllables[i], j) + return i + end + end + end + if #syllables > 2 then + -- Does the penultimate syllable end in a single vowel? + local penult = syllables[#syllables - 1] + + if mw.ustring.find(penult[#penult], "^[aeiou]$") then + local ult = syllables[#syllables] + if ult[2] and mw.ustring.find(ult[1] .. ult[2], "[bdg][lr]") then + return #syllables - 1 + else + return #syllables - 2 + end + else + return #syllables - 1 + end + elseif #syllables == 2 then + return #syllables - 1 + end + return #syllables +end + +local function place_accent(syllable) + -- Special case: i before a or o + new_syllable = mw.ustring.gsub(syllable, "i([aoāō])", "i%1́") + if syllable == new_syllable then new_syllable = mw.ustring.gsub(syllable, "([aeẹęioọǫuāēīōūėịụ-])", "%1́") end + + return new_syllable +end + +local function convert_word(word, vowel_pattern) + -- Prothetic i before s + consonant + if vowel_pattern == "It-W" then word = mw.ustring.gsub(word, "^s([ptclmn])", "is%1") end + + -- do starting word-based rules + for _, rule in ipairs(word_rules_start) do word = mw.ustring.gsub(word, rule[1], rule[2]) end + + for k, v in pairs(stress_shift_rules) do word = mw.ustring.gsub(word, k .. "'", v) end + + -- Double consonant stress shifts + for _, v in ipairs({"b", "c", "d", "f", "g", "l", "m", "n", "p", "r", "s", "t"}) do word = mw.ustring.gsub(word, v .. v .. "'", v .. "'" .. "v") end + + local phonemes = segment_word(word) + + local syllables = split_syllables(phonemes) + + local accent = detect_accent(syllables) + + -- Check antepenult for e, i > j (written i) + --[[local antepenult = syllables[#syllables - 2] + local penult = syllables[#syllables - 1] + + if antepenult and penult then + if syllables[accent] == antepenult and mw.ustring.find(antepenult[#antepenult], "^[eēiī]$") and mw.ustring.find(penult[#penult], "^[aāoō]$") then + syllables[#syllables-2][#antepenult] = "ị" + accent = accent + 1 + end + end]] -- + + for i, syll in ipairs(syllables) do + if syllables[i + 1] then + if mw.ustring.find(syll[#syll], "^[eēiī]$") and mw.ustring.find(syllables[i + 1][1], "^[aāoōuū]$") then + syll[#syll] = "ĭ" + if syllables[accent] == syll then accent = accent + 1 end + end + end + end + + for i, syll in ipairs(syllables) do + syll = table.concat(syll) + for i, rule in ipairs(vowel_patterns["all"]) do syll = mw.ustring.gsub(syll, rule[1], rule[2]) end + for i, rule in ipairs(vowel_patterns[vowel_pattern]) do syll = mw.ustring.gsub(syll, rule[1], rule[2]) end + --[[if i ~= accent then + syll = mw.ustring.gsub(syll, "ẹ", "e") + syll = mw.ustring.gsub(syll, "ọ", "o") + end]] + syllables[i] = (i == accent and place_accent(syll) or syll) + end + + word = table.concat(syllables) + + for _, rule in ipairs(word_rules_end) do word = mw.ustring.gsub(word, rule[1], rule[2]) end + + word = nasalize_vowels(word, vowel_pattern) + + return word +end + +function export.convert_words(words, vowel_pattern) + word_table = {} + + for word in mw.ustring.gmatch(words, "%S+") do table.insert(word_table, convert_word(word, vowel_pattern)) end + + return "*" .. table.concat(word_table, " ") +end + +return export diff --git a/wiktra/wikt/translit/Vaii-translit.lua b/wiktra/wikt/translit/Vaii-translit.lua new file mode 100644 index 0000000..7c6c5a5 --- /dev/null +++ b/wiktra/wikt/translit/Vaii-translit.lua @@ -0,0 +1,14 @@ +local export = {} + +function export.tr(text) + local data = mw.loadData("Vaii-translit/data") + + text = mw.ustring.gsub(text, "꘎꘎", "!") + text = mw.ustring.gsub(text, "..", data.tt_doubled) + text = mw.ustring.gsub(text, ".", data.tt) + + return text + +end + +return export diff --git a/wiktra/wikt/translit/Vaii-translit/data.lua b/wiktra/wikt/translit/Vaii-translit/data.lua new file mode 100644 index 0000000..1f578bf --- /dev/null +++ b/wiktra/wikt/translit/Vaii-translit/data.lua @@ -0,0 +1,311 @@ +local data = {} + +data.tt = { + ["ꔀ"] = "e", + ["ꔤ"] = "i", + ["ꕉ"] = "a", + ["ꕱ"] = "o", + ["ꖕ"] = "u", + ["ꖺ"] = "ɔ", + ["ꗡ"] = "ɛ", + ["ꔁ"] = "ẽ", + ["ꔥ"] = "ĩ", + ["ꕊ"] = "ã", + ["ꕲ"] = "õ", + ["ꖖ"] = "ũ", + ["ꖻ"] = "ɔ̃", + ["ꗢ"] = "ɛ̃", + ["ꕋ"] = "ŋã", + ["ꖼ"] = "ŋɔ̃", + ["ꗣ"] = "ŋɛ̃", + ["ꔂ"] = "he", + ["ꔦ"] = "hi", + ["ꕌ"] = "ha", + ["ꕳ"] = "ho", + ["ꖗ"] = "hu", + ["ꖽ"] = "hɔ", + ["ꗤ"] = "hɛ", + ["ꔧ"] = "hĩ", + ["ꕍ"] = "hã", + ["ꖘ"] = "hũ", + ["ꖾ"] = "hɔ̃", + ["ꗥ"] = "hɛ̃", + ["ꔃ"] = "we", + ["ꔨ"] = "wi", + ["ꕎ"] = "wa", + ["ꕴ"] = "wo", + ["ꖙ"] = "wu", + ["ꖿ"] = "wɔ", + ["ꗦ"] = "wɛ", + ["ꔄ"] = "wẽ", + ["ꔩ"] = "wĩ", + ["ꕏ"] = "wã", + ["ꕵ"] = "wõ", + ["ꖚ"] = "wũ", + ["ꗀ"] = "wɔ̃", + ["ꗧ"] = "wɛ̃", + ["ꔅ"] = "pe", + ["ꔪ"] = "pi", + ["ꕐ"] = "pa", + ["ꕶ"] = "po", + ["ꖛ"] = "pu", + ["ꗁ"] = "pɔ", + ["ꗨ"] = "pɛ", + ["ꔆ"] = "be", + ["ꔫ"] = "bi", + ["ꕑ"] = "ba", + ["ꕷ"] = "bo", + ["ꖜ"] = "bu", + ["ꗂ"] = "bɔ", + ["ꗩ"] = "bɛ", + ["ꔇ"] = "ɓe", + ["ꔬ"] = "ɓi", + ["ꕒ"] = "ɓa", + ["ꕸ"] = "ɓo", + ["ꖝ"] = "ɓu", + ["ꗃ"] = "ɓɔ", + ["ꗪ"] = "ɓɛ", + ["ꔈ"] = "mɓe", + ["ꔭ"] = "mɓi", + ["ꕓ"] = "mɓa", + ["ꕹ"] = "mɓo", + ["ꖞ"] = "mɓu", + ["ꗄ"] = "mɓɔ", + ["ꗫ"] = "mɓɛ", + ["ꔉ"] = "kpe", + ["ꔮ"] = "kpi", + ["ꕔ"] = "kpa", + ["ꕺ"] = "kpo", + ["ꖟ"] = "kpu", + ["ꗅ"] = "kpɔ", + ["ꗬ"] = "kpɛ", + ["ꕕ"] = "kpã", + ["ꗭ"] = "kpɛ̃", + ["ꔊ"] = "mgbe", + ["ꔯ"] = "mgbi", + ["ꕖ"] = "mgba", + ["ꕻ"] = "mgbo", + ["ꖠ"] = "mgbu", + ["ꗆ"] = "mgbɔ", + ["ꗮ"] = "mgbɛ", + ["ꔋ"] = "gbe", + ["ꔰ"] = "gbi", + ["ꕗ"] = "gba", + ["ꕼ"] = "gbo", + ["ꖡ"] = "gbu", + ["ꗇ"] = "gbɔ", + ["ꗯ"] = "gbɛ", + ["ꗈ"] = "gbɔ̃", + ["ꗰ"] = "gbɛ̃", + ["ꔌ"] = "fe", + ["ꔱ"] = "fi", + ["ꕘ"] = "fa", + ["ꕽ"] = "fo", + ["ꖢ"] = "fu", + ["ꗉ"] = "fɔ", + ["ꗱ"] = "fɛ", + ["ꔍ"] = "ve", + ["ꔲ"] = "vi", + ["ꕙ"] = "va", + ["ꕾ"] = "vo", + ["ꖣ"] = "vu", + ["ꗊ"] = "vɔ", + ["ꗲ"] = "vɛ", + ["ꔎ"] = "te", + ["ꔳ"] = "ti", + ["ꕚ"] = "ta", + ["ꕿ"] = "to", + ["ꖤ"] = "tu", + ["ꗋ"] = "tɔ", + ["ꗳ"] = "tɛ", + ["ꔏ"] = "θe", + ["ꔴ"] = "θi", + ["ꕛ"] = "θa", + ["ꖀ"] = "θo", + ["ꖥ"] = "θu", + ["ꗌ"] = "θɔ", + ["ꗴ"] = "θɛ", + ["ꔐ"] = "de", + ["ꔵ"] = "di", + ["ꕜ"] = "da", + ["ꖁ"] = "do", + ["ꖦ"] = "du", + ["ꗍ"] = "dɔ", + ["ꗵ"] = "dɛ", + ["ꔑ"] = "ðe", + ["ꔶ"] = "ði", + ["ꕝ"] = "ða", + ["ꖂ"] = "ðo", + ["ꖧ"] = "ðu", + ["ꗎ"] = "ðɔ", + ["ꗶ"] = "ðɛ", + ["ꔒ"] = "le", + ["ꔷ"] = "li", + ["ꕞ"] = "la", + ["ꖃ"] = "lo", + ["ꖨ"] = "lu", + ["ꗏ"] = "lɔ", + ["ꗷ"] = "lɛ", + ["ꔓ"] = "re", + ["ꔸ"] = "ri", + ["ꕟ"] = "ra", + ["ꖄ"] = "ro", + ["ꖩ"] = "ru", + ["ꗐ"] = "rɔ", + ["ꗸ"] = "rɛ", + ["ꔔ"] = "ɗe", + ["ꔹ"] = "ɗi", + ["ꕠ"] = "ɗa", + ["ꖅ"] = "ɗo", + ["ꖪ"] = "ɗu", + ["ꗑ"] = "ɗɔ", + ["ꗹ"] = "ɗɛ", + ["ꔕ"] = "nɗe", + ["ꔺ"] = "nɗi", + ["ꕡ"] = "nɗa", + ["ꖆ"] = "nɗo", + ["ꖫ"] = "nɗu", + ["ꗒ"] = "nɗɔ", + ["ꗺ"] = "nɗɛ", + ["ꔖ"] = "se", + ["ꔻ"] = "si", + ["ꕢ"] = "sa", + ["ꖇ"] = "so", + ["ꖬ"] = "su", + ["ꗓ"] = "sɔ", + ["ꗻ"] = "sɛ", + ["ꔗ"] = "ʃe", + ["ꔼ"] = "ʃi", + ["ꕣ"] = "ʃa", + ["ꖈ"] = "ʃo", + ["ꖭ"] = "ʃu", + ["ꗔ"] = "ʃɔ", + ["ꗼ"] = "ʃɛ", + ["ꔘ"] = "ze", + ["ꔽ"] = "zi", + ["ꕤ"] = "za", + ["ꖉ"] = "zo", + ["ꖮ"] = "zu", + ["ꗕ"] = "zɔ", + ["ꗽ"] = "zɛ", + ["ꔙ"] = "ʒe", + ["ꔾ"] = "ʒi", + ["ꕥ"] = "ʒa", + ["ꖊ"] = "ʒo", + ["ꖯ"] = "ʒu", + ["ꗖ"] = "ʒɔ", + ["ꗾ"] = "ʒɛ", + ["ꔛ"] = "dʒe", + ["ꕀ"] = "dʒi", + ["ꕧ"] = "dʒa", + ["ꖌ"] = "dʒo", + ["ꖱ"] = "dʒu", + ["ꗘ"] = "dʒɔ", + ["ꘀ"] = "dʒɛ", + ["ꔜ"] = "ndʒe", + ["ꕁ"] = "ndʒi", + ["ꕨ"] = "ndʒa", + ["ꖍ"] = "ndʒo", + ["ꖲ"] = "ndʒu", + ["ꗙ"] = "ndʒɔ", + ["ꘁ"] = "ndʒɛ", + ["ꔝ"] = "ye", + ["ꕂ"] = "yi", + ["ꕩ"] = "ya", + ["ꖎ"] = "yo", + ["ꖳ"] = "yu", + ["ꗚ"] = "yɔ", + ["ꘂ"] = "yɛ", + ["ꔞ"] = "ke", + ["ꕃ"] = "ki", + ["ꕪ"] = "ka", + ["ꖏ"] = "ko", + ["ꖴ"] = "ku", + ["ꗛ"] = "kɔ", + ["ꘃ"] = "kɛ", + ["ꕫ"] = "kã", + ["ꔟ"] = "ŋge", + ["ꕄ"] = "ŋgi", + ["ꕬ"] = "ŋga", + ["ꖐ"] = "ŋgo", + ["ꖵ"] = "ŋgu", + ["ꗜ"] = "ŋgɔ", + ["ꘄ"] = "ŋgɛ", + ["ꘅ"] = "ŋgɛ̃", + ["ꔠ"] = "ge", + ["ꕅ"] = "gi", + ["ꕭ"] = "ga", + ["ꖑ"] = "go", + ["ꖶ"] = "gu", + ["ꗝ"] = "gɔ", + ["ꘆ"] = "gɛ", + ["ꘇ"] = "gɛ̃", + ["ꔡ"] = "me", + ["ꕆ"] = "mi", + ["ꕮ"] = "ma", + ["ꖒ"] = "mo", + ["ꖷ"] = "mu", + ["ꗞ"] = "mɔ", + ["ꘈ"] = "mɛ", + ["ꔢ"] = "ne", + ["ꕇ"] = "ni", + ["ꕯ"] = "na", + ["ꖓ"] = "no", + ["ꖸ"] = "nu", + ["ꗟ"] = "nɔ", + ["ꘉ"] = "nɛ", + ["ꔣ"] = "ɲe", + ["ꕈ"] = "ɲi", + ["ꕰ"] = "ɲa", + ["ꖔ"] = "ɲo", + ["ꖹ"] = "ɲu", + ["ꗠ"] = "ɲɔ", + ["ꘊ"] = "ɲɛ", + ["ꘋ"] = "ŋ", + ["꘍"] = ",", + ["꘎"] = ".", + ["꘏"] = "?", + ["ꘓ"] = "feŋ", + ["ꘔ"] = "keŋ", + ["ꘕ"] = "tiŋ", + ["ꘖ"] = "nii", + ["ꘗ"] = "ɓaŋ", + ["ꘘ"] = "faa", + ["ꘙ"] = "taa", + ["ꘚ"] = "ɗaŋ", + ["ꘛ"] = "ɗoŋ", + ["ꘜ"] = "kuŋ", + ["ꘝ"] = "tɔŋ", + ["ꘞ"] = "ɗɔɔ", + ["ꘟ"] = "dʒɔŋ", + ["ꔔ"] = "ɗeŋ", + ["ꗑ"] = "lɔ", + ["ꘑ"] = "ka", + ["ꘐ"] = "fa", + ["ꘒ"] = "so", + ["ꘪ"] = "ma", + ["ꘫ"] = "ɗɔ", + ["꘠"] = "0", + ["꘡"] = "1", + ["꘢"] = "2", + ["꘣"] = "3", + ["꘤"] = "4", + ["꘥"] = "5", + ["꘦"] = "6", + ["꘧"] = "7", + ["꘨"] = "8", + ["꘩"] = "9" +} + +local vowels = {["e"] = true, ["i"] = true, ["a"] = true, ["o"] = true, ["u"] = true, ["ɔ"] = true, ["ɛ"] = true, ["ẽ"] = true, ["ĩ"] = true, ["ã"] = true, ["õ"] = true, ["ũ"] = true, ["ɔ̃"] = true, ["ɛ̃"] = true} + +local function double_vowel(syllable) + for v in pairs(vowels) do if mw.ustring.find(syllable, v) and not mw.ustring.find(syllable, v .. v) then syllable = mw.ustring.gsub(syllable, v, v .. v) end end + return syllable +end + +data.tt_doubled = {} +local tt_doubled = data.tt_doubled +for k, v in pairs(data.tt) do tt_doubled[k .. "ꘌ"] = double_vowel(v) end + +return data diff --git a/wiktra/wikt/translit/ab-translit.lua b/wiktra/wikt/translit/ab-translit.lua new file mode 100644 index 0000000..a59248a --- /dev/null +++ b/wiktra/wikt/translit/ab-translit.lua @@ -0,0 +1,176 @@ +--[[ +Transliteration for Abkhaz. +]] local export = {} + +local tt = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "V", + ["в"] = "v", + ["Г"] = "G", + ["г"] = "g", + ["Ӷ"] = "Ğ", + ["ӷ"] = "ğ", + ["Д"] = "D", + ["д"] = "d", + ["Џ"] = "Dž", + ["џ"] = "dž", + ["Е"] = "E", + ["е"] = "e", + ["Ҽ"] = "Č", + ["ҽ"] = "č", + ["Ҿ"] = "Č̣", + ["ҿ"] = "č̣", + ["Ж"] = "Ž", + ["ж"] = "ž", + ["З"] = "Z", + ["з"] = "z", + ["Ӡ"] = "Dz", + ["ӡ"] = "dz", + ["И"] = "I", + ["и"] = "i", + ["К"] = "Ḳ", + ["к"] = "ḳ", + ["Қ"] = "K", + ["қ"] = "k", + ["Ҟ"] = "Q̇", + ["ҟ"] = "q̇", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["О"] = "O", + ["о"] = "o", + ["Ҩ"] = "j°", + ["ҩ"] = "j°", + ["П"] = "Ṗ", + ["п"] = "ṗ", + ["Ԥ"] = "P", + ["ԥ"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "Ṭ", + ["т"] = "ṭ", + ["Ҭ"] = "T", + ["ҭ"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "X", + ["х"] = "x", + ["Ҳ"] = "Ḥ", + ["ҳ"] = "ḥ", + ["Ц"] = "C", + ["ц"] = "c", + ["Ҵ"] = "C̣", + ["ҵ"] = "c̣", + ["Ч"] = "Č̍", + ["ч"] = "č̍", + ["Ҷ"] = "Č̣̍", + ["ҷ"] = "č̣̍", + ["Ш"] = "Š", + ["ш"] = "š", + ["Ы"] = "Ə", + ["ы"] = "ə", + ["Ь"] = "’", + ["ь"] = "’", + ["Ә"] = "W", + ["ә"] = "w", + ["́"] = "́", + + -- in borrowings from Russian and other languages + ["Ё"] = "Ë", + ["ё"] = "ë", + ["Й"] = "J", + ["й"] = "j", + ["Ъ"] = "ʺ", + ["ъ"] = "ʺ", + ["Э"] = "È", + ["э"] = "è", + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Я"] = "Ja", + ["я"] = "ja", + + -- two letter mapping + ["дә"] = "d°", + ["Дә"] = "D°", + ["ҭә"] = "t°", + ["Ҭә"] = "T°", + ["тә"] = "ṭ°", + ["Тә"] = "Ṭ°", + ["ӡә"] = "dz°", + ["Ӡә"] = "Dz°", + ["цә"] = "c°", + ["Цә"] = "C°", + ["ҵә"] = "c̣°", + ["Ҵә"] = "C̣°", + ["џь"] = "dž̍", + ["Џь"] = "Dž̍", + ["жь"] = "ž̍", + ["Жь"] = "Ž̍", + ["шь"] = "š̍", + ["Шь"] = "Š̍", + ["жә"] = "ž°", + ["Жә"] = "Ž°", + ["шә"] = "š°", + ["Шә"] = "Š°", + ["гь"] = "g̍", + ["Гь"] = "G̍", + ["қь"] = "k̍", + ["Қь"] = "K̍", + ["кь"] = "ḳ̍", + ["Кь"] = "Ḳ̍", + ["ӷь"] = "ğ̍", + ["Ӷь"] = "Ğ̍", + ["хь"] = "x̍", + ["Хь"] = "X̍", + ["гу"] = "g°", + ["Гу"] = "G°", + ["қу"] = "k°", + ["Қу"] = "K°", + ["ку"] = "ḳ°", + ["Ку"] = "Ḳ°", + ["ӷу"] = "ğ°", + ["Ӷу"] = "Ğ°", + ["ху"] = "x°", + ["Ху"] = "X°", + ["ҟь"] = "q̇̍", + ["Ҟь"] = "Q̇̍", + ["ҟу"] = "q̇°", + ["Ҟу"] = "Q̇°̍", + ["ҳу"] = "ḥ°̍", + ["Ҳу"] = "Ḥ°", + ["гә"] = "g°", + ["Гә"] = "G°", + ["қә"] = "k°", + ["Қә"] = "K°", + ["кә"] = "ḳ°", + ["Кә"] = "Ḳ°", + ["ӷә"] = "ğ°", + ["Ӷә"] = "Ğ°", + ["хә"] = "x°", + ["Хә"] = "X°", + ["ҟә"] = "q̇°", + ["Ҟә"] = "Q̇°̍", + ["ҳә"] = "ḥ°̍", + ["Ҳә"] = "Ḥ°" +}; + +function export.tr(text, lang, sc) + -- If the script is given as Geor, then forward the transliteration to that module + if sc == "Geor" then return require("Geor-translit").tr(text, lang, sc) end + text = mw.ustring.gsub(text, "[гГӷӶдДжЖӡӠкКқҚҟҞтТҭҬҵҴхХҳҲцЦџЏшШ].", tt) + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/abq-translit.lua b/wiktra/wikt/translit/abq-translit.lua new file mode 100644 index 0000000..4e38456 --- /dev/null +++ b/wiktra/wikt/translit/abq-translit.lua @@ -0,0 +1,104 @@ +local export = {} + +local mapping1 = {["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "ë", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "w", ["ф"] = "f", ["х"] = "x", ["ц"] = "c", ["ч"] = "č̍", ["ш"] = "š", ["щ"] = "š̍", ["ъ"] = "ʾ", ["ы"] = "ə", ["ь"] = "’", ["э"] = "è", ["ю"] = "ju", ["я"] = "ja", ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Ë", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "W", ["Ф"] = "F", ["Х"] = "X", ["Ц"] = "C", ["Ч"] = "Č̍", ["Ш"] = "Š", ["Щ"] = "Š̍", ["Ъ"] = "ʾ", ["Ы"] = "Ə", ["Ь"] = "’", ["Э"] = "È", ["Ю"] = "Ju", ["Я"] = "Ja"}; + +local mapping2 = { + ["Хъв"] = "Q°", + ["Гӏв"] = "ʿ°", + ["Гъв"] = "Ğ°", + ["Гъь"] = "Ğ̍", + ["Джв"] = "Ǯ°", + ["Джь"] = "Ǯ̍", + ["Къв"] = "Q̇°", + ["Къь"] = "Q̇̍", + ["Кӏв"] = "Ḳ°", + ["Кӏь"] = "Ḳ̍", + ["Хӏв"] = "Ḥ°", + ["Чӏв"] = "Č̣°", + ["хъв"] = "q°", + ["гӏв"] = "ʿ°", + ["гъв"] = "ğ°", + ["гъь"] = "ğ̍", + ["джв"] = "ǯ°", + ["джь"] = "ǯ̍", + ["къв"] = "q̇°", + ["къь"] = "q̇̍", + ["кӏв"] = "ḳ°", + ["кӏь"] = "ḳ̍", + ["хӏв"] = "ḥ°", + ["чӏв"] = "č̣°", + ["гв"] = "g°", + ["гъ"] = "ğ", + ["гь"] = "g̍", + ["гӏ"] = "ʿ", + ["дж"] = "ǯ", + ["дз"] = "ʒ", + ["жв"] = "ž°", + ["жь"] = "ž̍", + ["кв"] = "k°", + ["къ"] = "q̇", + ["кь"] = "k̍", + ["кӏ"] = "ḳ", + ["тл"] = "ł", + ["тш"] = "č", + ["тӏ"] = "ṭ", + ["пӏ"] = "ṗ", + ["хв"] = "x°", + ["хь"] = "x̍", + ["хӏ"] = "ḥ", + ["цӏ"] = "c̣", + ["чв"] = "č°", + ["чӏ"] = "č̣̍", + ["шӏ"] = "č̣", + ["шв"] = "š°", + ["ль"] = "l", + ["лӏ"] = "ḷ", + ["хъ"] = "q", + ["фӏ"] = "f̣", + ["Гв"] = "G°", + ["Гъ"] = "Ğ", + ["Гь"] = "G̍", + ["Гӏ"] = "ʿ", + ["Дж"] = "Ǯ", + ["Дз"] = "Ʒ", + ["Жв"] = "Ž°", + ["Жь"] = "Ž̍", + ["Кв"] = "K°", + ["Къ"] = "Q̇", + ["Кь"] = "K̍", + ["Кӏ"] = "Ḳ", + ["Тл"] = "Ł", + ["Тш"] = "Č", + ["Тӏ"] = "Ṭ", + ["Пӏ"] = "Ṗ", + ["Хв"] = "X°", + ["Хь"] = "X̍", + ["Хӏ"] = "Ḥ", + ["Цӏ"] = "C̣", + ["Чв"] = "Č°", + ["Чӏ"] = "Č̣̍", + ["Шӏ"] = "Č̣", + ["Шв"] = "Š°", + ["Ль"] = "L", + ["Лӏ"] = "Ḷ", + ["Хъ"] = "Q", + ["Фӏ"] = "F̣" +} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local UTF8char = "[\1-\127\194-\244][\128-\191]*" + + -- Convert uppercase palochka to lowercase. Lowercase is found in tables + -- above. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + -- These two can use the basic string function. + for pat, repl in pairs(mapping2) do text = str_gsub(text, pat, repl) end + + text = str_gsub(text, UTF8char, mapping1) + + return text +end + +return export diff --git a/wiktra/wikt/translit/ady-translit.lua b/wiktra/wikt/translit/ady-translit.lua new file mode 100644 index 0000000..bf7f74b --- /dev/null +++ b/wiktra/wikt/translit/ady-translit.lua @@ -0,0 +1,92 @@ +local export = {} + +local tt = { + ["а"] = "ā", + ["б"] = "b", + ["в"] = "v", + ["г"] = "ɣ", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "ë", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["й"] = "j", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "w", + ["ф"] = "f", + ["х"] = "x", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "š̍", + ["ъ"] = "”", + ["ы"] = "ə", + ["ь"] = "’", + ["э"] = "ă", + ["ю"] = "ju", + ["я"] = "jā", + ["ӏ"] = "ʾ", + ["А"] = "Ā", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "Ɣ", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Ë", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "W", + ["Ф"] = "F", + ["Х"] = "X", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Š̍", + ["Ъ"] = "”", + ["Ы"] = "Ə", + ["Ь"] = "’", + ["Э"] = "Ă", + ["Ю"] = "Ju", + ["Я"] = "Jā", + ["ӏ"] = "ʾ" +}; + +local triQuadrigraphs = {["кхъу"] = "q°", ["Кхъу"] = "Q°", ["гъу"] = "ġ°", ["дзу"] = "ʒ°", ["жъу"] = "ẑ°", ["Кхъ"] = "Q", ["кхъ"] = "q", ["къу"] = "q°", ["кӏу"] = "ḳ°", ["пӏу"] = "ṗ°", ["тӏу"] = "ṭ°", ["хъу"] = "χ°", ["шъу"] = "ŝ°", ["шӏу"] = "ṣ̂°", ["гу"] = "g°", ["Гъу"] = "Ġ°", ["Дзу"] = "Ʒ°", ["Жъу"] = "Ẑ°", ["Къу"] = "Q°", ["Кӏу"] = "Ḳ°", ["Пӏу"] = "Ṗ°", ["Тӏу"] = "Ṭ°", ["Хъу"] = "Χ°", ["Шъу"] = "Ŝ°", ["Шӏу"] = "Ṣ̂°"} + +local digraphs = {["гь"] = "ɡ’", ["гъ"] = "ġ", ["дж"] = "ǯ̍", ["дз"] = "ʒ", ["жъ"] = "ẑ", ["жь"] = "ž̍", ["ку"] = "k°", ["къ"] = "q", ["кӏ"] = "č̣̍", ["лъ"] = "ł", ["лӏ"] = "ḷ", ["пӏ"] = "ṗ", ["сӏ"] = "ṣ̣", ["тӏ"] = "ṭ", ["фӏ"] = "f̣", ["ху"] = "x°", ["хъ"] = "χ", ["хь"] = "ḥ", ["цу"] = "c°", ["цӏ"] = "c̣", ["чӏ"] = "č̣", ["чу"] = "č̍°", ["чъ"] = "č", ["шъ"] = "ŝ", ["шӏ"] = "ṣ̂", ["ӏу"] = "ʾ°", ["ӏь"] = "՚̍", ["Гу"] = "G°", ["Гь"] = "ɡ’", ["Гъ"] = "Ġ", ["Дж"] = "Ǯ̍", ["Дз"] = "Ʒ", ["Жъ"] = "Ẑ", ["Жь"] = "Ž̍", ["Ку"] = "K°", ["Къ"] = "Q", ["Кӏ"] = "Č̣̍", ["Лъ"] = "Ł", ["Лӏ"] = "Ḷ", ["Пӏ"] = "Ṗ", ["Сӏ"] = "Ṣ̣", ["Тӏ"] = "Ṭ", ["Фӏ"] = "F̣", ["Ху"] = "X°", ["Хъ"] = "Χ", ["Хь"] = "Ḥ", ["Цу"] = "C°", ["Цӏ"] = "C̣", ["Чӏ"] = "Č̣", ["Чу"] = "Č̍°", ["Чъ"] = "Č", ["Шъ"] = "Ŝ", ["Шӏ"] = "Ṣ̂"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local UTF8char = "[%z\1-\127\194-\244][\128-\191]*" + + -- Convert capital to lowercase palochka. Lowercase is found in tables + -- above. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for comb, repl in pairs(triQuadrigraphs) do text = str_gsub(text, comb, repl) end + for comb, repl in pairs(digraphs) do text = str_gsub(text, comb, repl) end + + return (str_gsub(text, UTF8char, tt)) +end + +return export diff --git a/wiktra/wikt/translit/ahom-translit.lua b/wiktra/wikt/translit/ahom-translit.lua new file mode 100644 index 0000000..219e971 --- /dev/null +++ b/wiktra/wikt/translit/ahom-translit.lua @@ -0,0 +1,101 @@ +local export = {} +local gsub = mw.ustring.gsub +local u = mw.ustring.char +local con_cls = "([" .. u(0x11700) .. "-" .. u(0x1171A) .. "][" .. u(0x1171D) .. u(0x1171E) .. u(0x1171F) .. "]?)" + +local tt = { + -- consonants + [u(0x11700)] = "k", + [u(0x11701)] = "kh", + [u(0x11702)] = "ng", + [u(0x11703)] = "n", + [u(0x11704)] = "t", + [u(0x11705)] = "t", + [u(0x11706)] = "p", + [u(0x11707)] = "ph", + [u(0x11708)] = "b", + [u(0x11709)] = "m", + [u(0x1170A)] = "y", + [u(0x1170B)] = "ch", + [u(0x1170C)] = "th", + [u(0x1170D)] = "r", + [u(0x1170E)] = "l", + [u(0x1170F)] = "s", + [u(0x11710)] = "ny", + [u(0x11711)] = "h", + [u(0x11712)] = "ʼ", + [u(0x11713)] = "d", + [u(0x11714)] = "dh", + [u(0x11715)] = "g", + [u(0x11716)] = "g", + [u(0x11717)] = "gh", + [u(0x11718)] = "bh", + [u(0x11719)] = "jh", + [u(0x1171A)] = "b", + -- medials + [u(0x1171D)] = "l", + [u(0x1171E)] = "r", + [u(0x1171F)] = "r", + -- vowels (excluding composition) + [u(0x11720)] = "a", + [u(0x11721)] = "aa", + [u(0x11722)] = "i", + [u(0x11723)] = "ii", + [u(0x11724)] = "u", + [u(0x11725)] = "uu", + [u(0x11727)] = "w", + [u(0x11729)] = "y", + [u(0x11726)] = "e", + [u(0x11728)] = "o", + [u(0x1172A)] = "m", + [u(0x1172B)] = "", + -- numerals + [u(0x11730)] = "0", + [u(0x11731)] = "1", + [u(0x11732)] = "2", + [u(0x11733)] = "3", + [u(0x11734)] = "4", + [u(0x11735)] = "5", + [u(0x11736)] = "6", + [u(0x11737)] = "7", + [u(0x11738)] = "8", + [u(0x11739)] = "9", + [u(0x1173A)] = "[10]", + [u(0x1173B)] = "[20]", + -- punctuations and symbols + [u(0x1173C)] = ",", + [u(0x1173D)] = ".", + [u(0x1173E)] = "@", + [u(0x1173F)] = "wi", + -- zero-width space (display it if it hides in a word) + [u(0x200B)] = "‼" +} + +local adjust0 = { + -- vowels (composition) + [u(0x11722) .. u(0x11724)] = "ue", + [u(0x11726) .. u(0x11721)] = "oo", + [u(0x11728) .. u(0x11726) .. u(0x11721)] = "woo", + [u(0x11726) .. u(0x11727)] = "ee", + [u(0x11729) .. u(0x11724)] = "aay" +} + +function export.tr(text, lang, sc, debug_mode) + + if type(text) == "table" then -- called directly from a template + text = text.args[1] + end + + text = gsub(text, u(0x11708) .. "(" .. u(0x1172B) .. ")", "w%1") -- final -b becomes -w + text = gsub(text, con_cls .. "([" .. u(0x11700) .. "-" .. u(0x11719) .. "w]" .. ")" .. u(0x1172B), "%1a%2") + text = gsub(text, con_cls .. "([" .. u(0x11727) .. u(0x11729) .. u(0x1172A) .. "])", "%1a%2") + + for k, v in pairs(adjust0) do text = gsub(text, con_cls .. k, "%1" .. v) end + + text = gsub(text, ".", tt) + + return text + +end + +return export diff --git a/wiktra/wikt/translit/ain-translit.lua b/wiktra/wikt/translit/ain-translit.lua new file mode 100644 index 0000000..a10ad8d --- /dev/null +++ b/wiktra/wikt/translit/ain-translit.lua @@ -0,0 +1,168 @@ +local export = {} +local gmatch = mw.ustring.gmatch +local find = mw.ustring.find +local gsub = mw.ustring.gsub + +local corresp = { + -- main + ["ア"] = "¤a", + ["イ"] = "¤i", + ["ウ"] = "¤u", + ["エ"] = "¤e", + ["オ"] = "¤o", + ["カ"] = "ka", + ["キ"] = "ki", + ["ク"] = "ku", + ["ケ"] = "ke", + ["コ"] = "ko", + ["シャ"] = "sa", + ["シ"] = "si", + ["シュ"] = "su", + ["シェ"] = "se", + ["ショ"] = "so", + ["タ"] = "ta", + ["チ"] = "ci", + ["ト゚"] = "tu", + ["テ"] = "te", + ["ト"] = "to", + ["チャ"] = "ca", + ["ツ"] = "cu", + ["セ゚"] = "ce", + ["チョ"] = "co", + ["ナ"] = "na", + ["ニ"] = "ni", + ["ヌ"] = "nu", + ["ネ"] = "ne", + ["ノ"] = "no", + ["ハ"] = "ha", + ["ヒ"] = "hi", + ["フ"] = "hu", + ["ヘ"] = "he", + ["ホ"] = "ho", + ["バ"] = "ba", + ["ビ"] = "bi", + ["ブ"] = "bu", + ["ベ"] = "be", + ["ボ"] = "bo", + ["パ"] = "pa", + ["ピ"] = "pi", + ["プ"] = "pu", + ["ペ"] = "pe", + ["ポ"] = "po", + ["マ"] = "ma", + ["ミ"] = "mi", + ["ム"] = "mu", + ["メ"] = "me", + ["モ"] = "mo", + ["ヤ"] = "ya", + ["ユ"] = "yu", + ["イェ"] = "ye", + ["ヨ"] = "yo", + ["ラ"] = "ra", + ["リ"] = "ri", + ["ル"] = "ru", + ["レ"] = "re", + ["ロ"] = "ro", + ["ワ"] = "wa", + ["ウィ"] = "wi", + ["ウェ"] = "we", + ["ウォ"] = "wo", + + -- finals + ["ㇵ"] = "h¤", + ["ㇶ"] = "h¤", + ["ㇷ"] = "h¤", + ["ㇸ"] = "h¤", + ["ㇹ"] = "h¤", + ["ㇻ"] = "r¤", + ["ㇼ"] = "r¤", + ["ㇽ"] = "r¤", + ["ㇾ"] = "r¤", + ["ㇿ"] = "r¤", + ["ㇰ"] = "k¤", + ["ㇱ"] = "s¤", + ["ㇳ"] = "t¤", + ["ㇴ"] = "n¤", + ["ㇺ"] = "m¤", + ["ㇷ゚"] = "p¤", + + -- misc + ["ィ"] = "y¤", + ["ゥ"] = "w¤", + ["ー"] = "̄", + ["・"] = "=", + + -- alt spellings + ["サ"] = "sa", + ["ス"] = "su", + ["セ"] = "se", + ["ソ"] = "so", + ["ツ゚"] = "tu", + ["チュ"] = "cu", + ["チェ"] = "ce", + ["ヰ"] = "wi", + ["ヱ"] = "we", + ["ヲ"] = "wo", + ["ㇲ"] = "s¤", + ["ッ"] = "x¤", + ["ン"] = "n¤", + ["トゥ"] = "tu", + + -- dialectal characters + ["ヂャ"] = "zya", + ["ヂ"] = "zi", + ["ヂュ"] = "zyu", + ["ヂェ"] = "zye", + ["ヂョ"] = "zyo", + ["ダ"] = "da", + ["ドゥ"] = "du", + ["デ"] = "de", + ["ド"] = "do", + ["ガ"] = "ga", + ["ギ"] = "gi", + ["グ"] = "gu", + ["ゲ"] = "ge", + ["ゴ"] = "go", + + -- loanword characters + ["ジ"] = "zi", + ["ジャ"] = "zya", + ["ジュ"] = "zyu", + ["ジェ"] = "zye", + ["ジョ"] = "zyo", + ["ディ"] = "di", + ["ザ"] = "za", + ["ズ"] = "zu", + ["ゼ"] = "ze", + ["ゾ"] = "zo" +} + +function export.tr(text, lang, sc) + local result = {} + for string in gmatch(text, ".[ィゥェォャュョ゚]?") do + if corresp[string] then -- try to convert character sequences + string = corresp[string] + else + local str_result = {} + for char in gmatch(string, ".") do -- try again over every individual character + table.insert(str_result, corresp[char] or char) + end + string = table.concat(str_result) + end + table.insert(result, string) + end + text = table.concat(result) + text = mw.ustring.toNFC(text) + + if find(text, "x¤[kbp]") then -- 'ッ' + text = gsub(text, "x¤([kbp])", "%1¤%1") + else + text = gsub(text, "x¤", "t¤") + end + + text = gsub(text, "¤", "") + + return text +end + +return export diff --git a/wiktra/wikt/translit/altai-translit.lua b/wiktra/wikt/translit/altai-translit.lua new file mode 100644 index 0000000..203d620 --- /dev/null +++ b/wiktra/wikt/translit/altai-translit.lua @@ -0,0 +1,103 @@ +local export = {} + +local tab = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Yo", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "Y", + ["Ј"] = "J̌", + ["J"] = "J̌[[Category:Altai text with misused characters|J]]", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["Ҥ"] = "Ŋ", + ["О"] = "O", + ["Ӧ"] = "Ö", + ["Ö"] = "Ö[[Category:Altai text with misused characters|Ö]]", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ӱ"] = "Ü", + ["Ф"] = "F", + ["Х"] = "H", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Šč", + ["Ъ"] = "ʺ", + ["Ы"] = "Ï", + ["Ь"] = "ʹ", + ["Э"] = "E", + ["Ю"] = "Yu", + ["Я"] = "Ya", + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "yo", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["й"] = "y", + ["ј"] = "ǰ", + ["j"] = "ǰ[[Category:Altai text with misused characters|j]]", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["ҥ"] = "ŋ", + ["о"] = "o", + ["ӧ"] = "ö", + ["ö"] = "ö[[Category:Altai text with misused characters|ö]]", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ӱ"] = "ü", + ["ф"] = "f", + ["х"] = "h", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "šč", + ["ъ"] = "ʺ", + ["ы"] = "ï", + ["ь"] = "ʹ", + ["э"] = "e", + ["ю"] = "yu", + ["я"] = "ya" +} + +local iotated = {["Е"] = "Ye", ["е"] = "ye"} + +function export.tr(text, lang, sc) + local ugsub = mw.ustring.gsub + + -- Ё needs to be composed if is decomposed (e + combining diaeresis). + -- However, this cannot happen in wikitext, only in Lua modules. + text = mw.ustring.toNFC(text) + + -- е after a vowel or at the beginning of a word becomes ye + -- Note that according to modern Altai orthography ее (instead of ээ) is occationally used for long r + text = ugsub(text, "([АОӨУҮЫЯЁЮИЪЬаоөуүыяёюиъь%A][́̀]?)е", "%1ye") + -- text = mw.ustring.gsub(text, "([АОӨУҮЫЕЯЁЮИЕЪЬаоөуүыэяёюиеъь%A][́̀]?)е", "%1ye") + text = ugsub(text, "^[Ее]", iotated) + text = ugsub(text, "([^Ѐ-ӿ])([Ее])", function(a, b) return a .. iotated[b] end) + + return (ugsub(text, ".", tab)) +end + +return export diff --git a/wiktra/wikt/translit/ar-translit.lua b/wiktra/wikt/translit/ar-translit.lua new file mode 100644 index 0000000..b4da0a5 --- /dev/null +++ b/wiktra/wikt/translit/ar-translit.lua @@ -0,0 +1,370 @@ +-- Authors: Benwing, ZxxZxxZ, Atitarev +local export = {} + +local U = mw.ustring.char +local rfind = mw.ustring.find +local rsubn = mw.ustring.gsub +local rmatch = mw.ustring.match +local rsplit = mw.text.split +local gcodepoint = mw.ustring.gcodepoint + +-- assigned below +local has_diacritics + +-- version of rsubn() that discards all but the first return value +local function rsub(term, foo, bar) + local retval = rsubn(term, foo, bar) + return retval +end + +local zwnj = U(0x200c) -- zero-width non-joiner +local alif_madda = U(0x622) +local alif_hamza_below = U(0x625) +local alif = U(0x627) +local taa_marbuuTa = U(0x629) +local laam = U(0x644) +local waaw = U(0x648) +local alif_maqSuura = U(0x649) +local yaa = U(0x64A) +local fatHataan = U(0x64B) +local Dammataan = U(0x64C) +local kasrataan = U(0x64D) +local fatHa = U(0x64E) +local Damma = U(0x64F) +local kasra = U(0x650) +local shadda = U(0x651) +local sukuun = U(0x652) +local dagger_alif = U(0x670) +local alif_waSl = U(0x671) +-- local zwj = U(0x200d) -- zero-width joiner +local lrm = U(0x200e) -- left-to-right mark +local rlm = U(0x200f) -- right-to-left mark + +local tt = { + -- consonants + ["ب"] = "b", + ["ت"] = "t", + ["ث"] = "ṯ", + ["ج"] = "j", + ["ح"] = "ḥ", + ["خ"] = "ḵ", + ["د"] = "d", + ["ذ"] = "ḏ", + ["ر"] = "r", + ["ز"] = "z", + ["س"] = "s", + ["ش"] = "š", + ["ص"] = "ṣ", + ["ض"] = "ḍ", + ["ط"] = "ṭ", + ["ظ"] = "ẓ", + ["ع"] = "ʿ", + ["غ"] = "ḡ", + ["ف"] = "f", + ["ق"] = "q", + ["ك"] = "k", + ["ڪ"] = "k", + ["ل"] = "l", + ["م"] = "m", + ["ن"] = "n", + ["ه"] = "h", + -- tāʾ marbūṭa (special) - always after a fátḥa (a), silent at the end of + -- an utterance, "t" in ʾiḍāfa or with pronounced tanwīn. We catch + -- most instances of tāʾ marbūṭa before we get to this stage. + [taa_marbuuTa] = "t", -- tāʾ marbūṭa = ة + -- control characters + [zwnj] = "-", -- ZWNJ (zero-width non-joiner) + -- [zwj]="", -- ZWJ (zero-width joiner) + -- rare letters + ["پ"] = "p", + ["چ"] = "č", + ["ڤ"] = "v", + ["ڥ"] = "v", + ["گ"] = "g", + ["ڨ"] = "g", + ["ڧ"] = "q", + ["ڢ"] = "f", + ["ں"] = "n", + ["ڭ"] = "g", + -- semivowels or long vowels, alif, hamza, special letters + ["ا"] = "ā", -- ʾalif + -- hamzated letters + ["أ"] = "ʾ", -- hamza over alif + [alif_hamza_below] = "ʾ", -- hamza under alif + ["ؤ"] = "ʾ", -- hamza over wāw + ["ئ"] = "ʾ", -- hamza over yā + ["ء"] = "ʾ", -- hamza on the line + -- long vowels + [waaw] = "w", -- "ū" after ḍamma (u) and not before diacritic + [yaa] = "y", -- "ī" after kasra (i) and not before diacritic + [alif_maqSuura] = "ā", -- ʾalif maqṣūra + [alif_madda] = "ʾā", -- ʾalif madda + [alif_waSl] = "", -- hamzatu l-waṣl + [dagger_alif] = "ā", -- ʾalif xanjariyya = dagger ʾalif (Koranic diacritic) + -- short vowels, šádda and sukūn + [fatHataan] = "an", -- fatḥatan + [Dammataan] = "un", -- ḍammatan + [kasrataan] = "in", -- kasratan + [fatHa] = "a", -- fatḥa + [Damma] = "u", -- ḍamma + [kasra] = "i", -- kasra + -- šadda - doubled consonant + [sukuun] = "", -- sukūn - no vowel + -- ligatures + ["ﻻ"] = "lā", + ["ﷲ"] = "llāh", + -- taṭwīl + ["ـ"] = "", -- taṭwīl, no sound + -- numerals + ["١"] = "1", + ["٢"] = "2", + ["٣"] = "3", + ["٤"] = "4", + ["٥"] = "5", + ["٦"] = "6", + ["٧"] = "7", + ["٨"] = "8", + ["٩"] = "9", + ["٠"] = "0", + -- punctuation (leave on separate lines) + ["؟"] = "?", -- question mark + ["«"] = "“", -- quotation mark + ["»"] = "”", -- quotation mark + ["٫"] = ".", -- decimal point + ["٬"] = ",", -- thousands separator + ["٪"] = "%", -- percent sign + ["،"] = ",", -- comma + ["؛"] = ";" -- semicolon +} + +local sun_letters = "تثدذرزسشصضطظلن" +-- For use in implementing sun-letter assimilation of ال (al-) +local ttsun1 = {} +local ttsun2 = {} +local ttsun3 = {} +for cp in gcodepoint(sun_letters) do + local ch = U(cp) + ttsun1[ch] = tt[ch] + ttsun2["l-" .. ch] = tt[ch] .. "-" .. ch + table.insert(ttsun3, tt[ch]) +end +-- For use in implementing elision of al- +local sun_letters_tr = table.concat(ttsun3, "") + +local consonants_needing_vowels = "بتثجحخدذرزسشصضطظعغفقكڪلمنهپچڤگڨڧڢںڭأإؤئءةﷲ" +-- consonants on the right side; includes alif madda +local rconsonants = consonants_needing_vowels .. "ويآ" +-- consonants on the left side; does not include alif madda +local lconsonants = consonants_needing_vowels .. "وي" +-- Arabic semicolon, comma, question mark; taṭwīl; period, exclamation point, +-- single quote for bold/italic, double quotes for quoted material +local punctuation = "؟،؛" .. "ـ" .. ".!'" .. "\"" +local space_like = "%s'" .. "\"" +local space_like_class = "[" .. space_like .. "]" +local numbers = "١٢٣٤٥٦٧٨٩٠" + +local before_diacritic_checking_subs = { + ------------ transformations prior to checking for diacritics -------------- + -- convert llh for allāh into ll+shadda+dagger-alif+h + {"لله", "للّٰه"}, -- shadda+short-vowel (including tanwīn vowels, i.e. -an -in -un) gets + -- replaced with short-vowel+shadda during NFC normalisation, which + -- MediaWiki does for all Unicode strings; however, it makes the + -- transliteration process inconvenient, so undo it. + {"([" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. dagger_alif .. "])" .. shadda, shadda .. "%1"}, -- ignore alif jamīla (otiose alif in 3pl verb forms) + -- #1: handle ḍamma + wāw + alif (final -ū) + {Damma .. waaw .. alif, Damma .. waaw}, -- #2: handle wāw + sukūn + alif (final -w in -aw in defective verbs) + -- this must go before the generation of w, which removes the waw here. + {waaw .. sukuun .. alif, waaw .. sukuun}, -- ignore final alif or alif maqṣūra following fatḥatan (e.g. in accusative + -- singular or words like عَصًا "stick" or هُدًى "guidance"; this is called + -- tanwin nasb) + {fatHataan .. "[" .. alif .. alif_maqSuura .. "]", fatHataan}, -- same but with the fatḥatan placed over the alif or alif maqṣūra + -- instead of over the previous letter (considered a misspelling but + -- common) + {"[" .. alif .. alif_maqSuura .. "]" .. fatHataan, fatHataan}, -- tāʾ marbūṭa should always be preceded by fatḥa, alif, alif madda or + -- dagger alif; infer fatḥa if not + {"([^" .. fatHa .. alif .. alif_madda .. dagger_alif .. "])" .. taa_marbuuTa, "%1" .. fatHa .. taa_marbuuTa}, -- similarly for alif between consonants, possibly marked with shadda + -- (does not apply to initial alif, which is silent when not marked with + -- hamza, or final alif, which might be pronounced as -an) + {"([" .. lconsonants .. "]" .. shadda .. "?)" .. alif .. "([" .. rconsonants .. "])", "%1" .. fatHa .. alif .. "%2"}, -- infer fatḥa in case of non-fatḥa + alif/alif-maqṣūra + dagger alif + {"([^" .. fatHa .. "])([" .. alif .. alif_maqSuura .. "]" .. dagger_alif .. ")", "%1" .. fatHa .. "%2"}, -- infer kasra in case of hamza-under-alif not + kasra + {alif_hamza_below .. "([^" .. kasra .. kasrataan .. "])", alif_hamza_below .. kasra .. "%1"}, -- ignore dagger alif placed over regular alif or alif maqṣūra + {"([" .. alif .. alif_maqSuura .. "])" .. dagger_alif, "%1"}, ----------- rest of these concern definite article alif-lām ---------- + -- in kasra/ḍamma + alif + lam, make alif into hamzatu l-waṣl, so we + -- handle cases like بِالتَّوْفِيق (bi-t-tawfīq) correctly + {"([" .. Damma .. kasra .. "])" .. alif .. laam, "%1" .. alif_waSl .. laam}, -- al + consonant + shadda (only recognize word-initially if regular alif): remove shadda + {"^(" .. alif .. fatHa .. "?" .. laam .. "[" .. lconsonants .. "])" .. shadda, "%1"}, {"(" .. space_like_class .. alif .. fatHa .. "?" .. laam .. "[" .. lconsonants .. "])" .. shadda, "%1"}, {"(" .. alif_waSl .. fatHa .. "?" .. laam .. "[" .. lconsonants .. "])" .. shadda, "%1"}, -- handle l- hamzatu l-waṣl or word-initial al- + {"^" .. alif .. fatHa .. "?" .. laam, "al-"}, {"(" .. space_like_class .. ")" .. alif .. fatHa .. "?" .. laam, "%1al-"}, -- next one for bi-t-tawfīq + {"([" .. Damma .. kasra .. "])" .. alif_waSl .. fatHa .. "?" .. laam, "%1-l-"}, -- next one for remaining hamzatu l-waṣl (at beginning of word) + {alif_waSl .. fatHa .. "?" .. laam, "l-"}, -- special casing if the l in al- has a shadda on it (as in الَّذِي "that"), + -- so we don't mistakenly double the dash + {"l%-" .. shadda, "ll"}, -- implement assimilation of sun letters + {"l%-[" .. sun_letters .. "]", ttsun2} +} + +-- Transliterate the word(s) in TEXT. LANG (the language) and SC (the script) +-- are ignored. OMIT_I3RAAB means leave out final short vowels (ʾiʿrāb). +-- GRAY_I3RAAB means render transliterate short vowels (ʾiʿrāb) in gray. +-- FORCE_TRANSLIT causes even non-vocalized text to be transliterated +-- (normally the function checks for non-vocalized text and returns nil, +-- since such text is ambiguous in transliteration). +function export.tr(text, lang, sc, omit_i3raab, gray_i3raab, force_translit) + -- make it possible to call this function from a template + if type(text) == "table" then + local function f(x) return (x ~= "") and x or nil end + text, lang, sc, omit_i3raab, force_translit = f(text.args[1]), f(text.args[2]), f(text.args[3]), f(text.args[4]), f(text.args[5]) + end + + for _, sub in ipairs(before_diacritic_checking_subs) do text = rsub(text, sub[1], sub[2]) end + + if not force_translit and not has_diacritics(text) then return nil end + + ------------ transformations after checking for diacritics -------------- + -- Replace plain alif with hamzatu l-waṣl when followed by fatḥa/ḍamma/kasra. + -- Must go after handling of initial al-, which distinguishes alif-fatḥa + -- from alif w/hamzatu l-waṣl. Must go before generation of ū and ī, which + -- eliminate the ḍamma/kasra. + text = rsub(text, alif .. "([" .. fatHa .. Damma .. kasra .. "])", alif_waSl .. "%1") + -- ḍamma + waw not followed by a diacritic is ū, otherwise w + text = rsub(text, Damma .. waaw .. "([^" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. shadda .. sukuun .. dagger_alif .. "])", "ū%1") + text = rsub(text, Damma .. waaw .. "$", "ū") + -- kasra + yaa not followed by a diacritic (or ū from prev step) is ī, otherwise y + text = rsub(text, kasra .. yaa .. "([^" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. shadda .. sukuun .. dagger_alif .. "ū])", "ī%1") + text = rsub(text, kasra .. yaa .. "$", "ī") + -- convert shadda to double letter. + text = rsub(text, "(.)" .. shadda, "%1%1") + if not omit_i3raab and gray_i3raab then -- show ʾiʿrāb grayed in transliteration + -- decide whether to gray out the t in ﺓ. If word begins with al- or l-, yes. + -- Otherwise, no if word ends in a/i/u, yes if ends in an/in/un. + text = rsub(text, "^(a?l%-[^%s]+)" .. taa_marbuuTa .. "([" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. "])", "%1t%2") + text = rsub(text, "(" .. space_like_class .. "a?l%-[^%s]+)" .. taa_marbuuTa .. "([" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. "])", "%1t%2") + text = rsub(text, taa_marbuuTa .. "([" .. fatHa .. Damma .. kasra .. "])", "t%1") + text = rsub(text, taa_marbuuTa .. "([" .. fatHataan .. Dammataan .. kasrataan .. "])", "t%1") + text = rsub(text, ".", {[fatHataan] = "an", [kasrataan] = "in", [Dammataan] = "un"}) + text = rsub(text, "([" .. fatHa .. Damma .. kasra .. "])(" .. space_like_class .. ")", function(vowel, space) + vowel_repl = {[fatHa] = "a ", [kasra] = "i ", [Damma] = "u "} + return vowel_repl[vowel] .. space + end) + text = rsub(text, "[" .. fatHa .. Damma .. kasra .. "]$", {[fatHa] = "a", [kasra] = "i", [Damma] = "u"}) + text = rsub(text, "", "") + elseif omit_i3raab then -- omit ʾiʿrāb in transliteration + text = rsub(text, "[" .. fatHataan .. Dammataan .. kasrataan .. "]", "") + text = rsub(text, "[" .. fatHa .. Damma .. kasra .. "](" .. space_like_class .. ")", "%1") + text = rsub(text, "[" .. fatHa .. Damma .. kasra .. "]$", "") + end + -- tāʾ marbūṭa should not be rendered by -t if word-final even when + -- ʾiʿrāb (desinential inflection) is shown; instead, use (t) before + -- whitespace, nothing when final; but render final -ﺍﺓ and -ﺁﺓ as -āh, + -- consistent with Wehr's dictionary + -- Left-to-right or right-to-left mark at end of text will prevent tāʾ marbūṭa + -- from being transliterated correctly. + text = string.gsub(text, lrm, "") + text = string.gsub(text, rlm, "") + text = rsub(text, "([" .. alif .. alif_madda .. "])" .. taa_marbuuTa .. "$", "%1h") + -- Ignore final tāʾ marbūṭa (it appears as "a" due to the preceding + -- short vowel). Need to do this after graying or omitting word-final + -- ʾiʿrāb. + text = rsub(text, taa_marbuuTa .. "$", "") + text = rsub(text, taa_marbuuTa .. "(%p)", "%1") + if not omit_i3raab then -- show ʾiʿrāb in transliteration + text = rsub(text, taa_marbuuTa .. "(" .. space_like_class .. ")", "(t)%1") + else + -- When omitting ʾiʿrāb, show all non-absolutely-final instances of + -- tāʾ marbūṭa as (t), with trailing ʾiʿrāb omitted. + text = rsub(text, taa_marbuuTa, "(t)") + end + -- tatwīl should be rendered as - at beginning or end of word. It will + -- be rendered as nothing in the middle of a word (FIXME, do we want + -- this?) + text = rsub(text, "^ـ", "-") + text = rsub(text, "(" .. space_like_class .. ")ـ", "%1-") + text = rsub(text, "ـ$", "-") + text = rsub(text, "ـ(" .. space_like_class .. ")", "-%1") + -- Now convert remaining Arabic chars according to table. + text = rsub(text, ".", tt) + text = rsub(text, "aā", "ā") + -- Implement elision of al- after a final vowel. We do this + -- conservatively, only handling elision of the definite article rather + -- than elision in other cases of hamzat al-waṣl (e.g. form-I imperatives + -- or form-VII and above verbal nouns) partly because elision in + -- these cases isn't so common in MSA and partly to avoid excessive + -- elision in case of words written with initial bare alif instead of + -- properly with hamzated alif. Possibly we should reconsider. + -- At the very least we currently don't handle elision of الَّذِي (allaḏi) + -- correctly because we special-case it to appear without the hyphen; + -- perhaps we should reconsider that. + text = rsub(text, "([aiuāīū]'* +'*)a([" .. sun_letters_tr .. "]%-)", "%1%2") + if gray_i3raab then text = rsub(text, "([aiuāīū]'*'* +'*)a([" .. sun_letters_tr .. "]%-)", "%1%2") end + -- Special-case the transliteration of allāh, without the hyphen + text = rsub(text, "^(a?)l%-lāh", "%1llāh") + text = rsub(text, "(" .. space_like_class .. "a?)l%-lāh", "%1llāh") + + return text +end + +local has_diacritics_subs = { + -- FIXME! What about lam-alif ligature? + -- remove punctuation and shadda + -- must go before removing final consonants + {"[" .. punctuation .. shadda .. "]", ""}, -- Remove consonants at end of word or utterance, so that we're OK with + -- words lacking iʿrāb (must go before removing other consonants). + -- If you want to catch places without iʿrāb, comment out the next two lines. + {"[" .. lconsonants .. "]$", ""}, {"[" .. lconsonants .. "](" .. space_like_class .. ")", "%1"}, -- remove consonants (or alif) when followed by diacritics + -- must go after removing shadda + -- do not remove the diacritics yet because we need them to handle + -- long-vowel sequences of diacritic + pseudo-consonant + {"[" .. lconsonants .. alif .. "]([" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. sukuun .. dagger_alif .. "])", "%1"}, -- the following two must go after removing consonants w/diacritics because + -- we only want to treat vocalic wāw/yā' in them (we want to have removed + -- wāw/yā' followed by a diacritic) + -- remove ḍamma + wāw + {Damma .. waaw, ""}, -- remove kasra + yā' + {kasra .. yaa, ""}, -- remove fatḥa/fatḥatan + alif/alif-maqṣūra + {"[" .. fatHataan .. fatHa .. "][" .. alif .. alif_maqSuura .. "]", ""}, -- remove diacritics + {"[" .. fatHataan .. Dammataan .. kasrataan .. fatHa .. Damma .. kasra .. sukuun .. dagger_alif .. "]", ""}, -- remove numbers, hamzatu l-waṣl, alif madda + {"[" .. numbers .. "ٱ" .. "آ" .. "]", ""}, -- remove non-Arabic characters + {"[^" .. U(0x0600) .. "-" .. U(0x06FF) .. U(0x0750) .. "-" .. U(0x077F) .. U(0x08A0) .. "-" .. U(0x08FF) .. U(0xFB50) .. "-" .. U(0xFDFF) .. U(0xFE70) .. "-" .. U(0xFEFF) .. "]", ""} +} + +-- declared as local above +function has_diacritics(text) + local count + text, count = rsubn(text, "[" .. lrm .. rlm .. "]", "") + if count > 0 then require("debug").track("ar-translit/lrm or rlm") end + for _, sub in ipairs(has_diacritics_subs) do text = rsub(text, unpack(sub)) end + return #text == 0 +end + +-- Return true if transliteration TR is an irregular transliteration of +-- ARABIC. Return false if ARABIC can't be transliterated. For purposes of +-- establishing regularity, hyphens are ignored and word-final tāʾ marbūṭa +-- can be transliterated as "(t)", "" or "t". +function export.irregular_translit(arabic, tr) + if not arabic or arabic == "" or not tr or tr == "" then return false end + local regtr = export.tr(arabic) + if not regtr or regtr == tr then return false end + local arwords = rsplit(arabic, " ") + local regwords = rsplit(regtr, " ") + local words = rsplit(tr, " ") + if #regwords ~= #words or #regwords ~= #arwords then return true end + for i = 1, #regwords do + local regword = regwords[i] + local word = words[i] + local arword = arwords[i] + -- Resolve final (t) in auto-translit to t, h or nothing + if rfind(regword, "%(t%)$") then regword = rfind(word, "āh$") and rsub(regword, "%(t%)$", "h") or rfind(word, "t$") and rsub(regword, "%(t%)$", "t") or rsub(regword, "%(t%)$", "") end + -- Resolve clitics + short a + alif-lām, which may get auto-transliterated + -- to contain long ā, to short a if the manual translit has it; note + -- that currently in cases with assimilated l, the auto-translit will + -- fail, so we won't ever get here and don't have to worry about + -- auto-translit l against manual-translit assimilated char. + local clitic_chars = "^[وفكل]" -- separate line to avoid L2R display weirdness + if rfind(arword, clitic_chars .. fatHa .. "?[" .. alif .. alif_waSl .. "]" .. laam) and rfind(word, "^[wfkl]a%-") then regword = rsub(regword, "^([wfkl])ā", "%1a") end + -- Ignore hyphens when comparing + if rsub(regword, "%-", "") ~= rsub(word, "%-", "") then return true end + end + return false +end + +return export + +-- For Vim, so we get 4-space tabs +-- vim: set ts=4 sw=4 noet: diff --git a/wiktra/wikt/translit/armn-translit.lua b/wiktra/wikt/translit/armn-translit.lua new file mode 100644 index 0000000..c0057bd --- /dev/null +++ b/wiktra/wikt/translit/armn-translit.lua @@ -0,0 +1,113 @@ +local export = {} + +local gsub = mw.ustring.gsub +local mapping = { + ["ա"] = "a", + ["բ"] = "b", + ["գ"] = "g", + ["դ"] = "d", + ["ե"] = "e", + ["զ"] = "z", + ["է"] = "ē", + ["ը"] = "ə", + ["թ"] = "tʿ", + ["ժ"] = "ž", + ["ի"] = "i", + ["լ"] = "l", + ["խ"] = "x", + ["ծ"] = "c", + ["կ"] = "k", + ["հ"] = "h", + ["ձ"] = "j", + ["ղ"] = "ł", + ["ճ"] = "č", + ["մ"] = "m", + ["յ"] = "y", + ["ն"] = "n", + ["շ"] = "š", + ["ո"] = "o", + ["չ"] = "čʿ", + ["պ"] = "p", + ["ջ"] = "ǰ", + ["ռ"] = "ṙ", + ["ս"] = "s", + ["վ"] = "v", + ["տ"] = "t", + ["ր"] = "r", + ["ց"] = "cʿ", + ["ւ"] = "w", + ["փ"] = "pʿ", + ["ք"] = "kʿ", + ["և"] = "ew", + ["օ"] = "ō", + ["ֆ"] = "f", + ["Ա"] = "A", + ["Բ"] = "B", + ["Գ"] = "G", + ["Դ"] = "D", + ["Ե"] = "E", + ["Զ"] = "Z", + ["Է"] = "Ē", + ["Ը"] = "Ə", + ["Թ"] = "Tʿ", + ["Ժ"] = "Ž", + ["Ի"] = "I", + ["Լ"] = "L", + ["Խ"] = "X", + ["Ծ"] = "C", + ["Կ"] = "K", + ["Հ"] = "H", + ["Ձ"] = "J", + ["Ղ"] = "Ł", + ["Ճ"] = "Č", + ["Մ"] = "M", + ["Յ"] = "Y", + ["Ն"] = "N", + ["Շ"] = "Š", + ["Ո"] = "O", + ["Չ"] = "Čʿ", + ["Պ"] = "P", + ["Ջ"] = "J̌", + ["Ռ"] = "Ṙ", + ["Ս"] = "S", + ["Վ"] = "V", + ["Տ"] = "T", + ["Ր"] = "R", + ["Ց"] = "Cʿ", + ["Ւ"] = "W", + ["Փ"] = "Pʿ", + ["Ք"] = "Kʿ", + ["Օ"] = "Ō", + ["Ֆ"] = "F", + ["ﬓ "] = "mn", + ["ﬔ"] = "me", + ["ﬕ"] = "mi", + ["ﬖ"] = "vn", + ["ﬗ"] = "mx", + -- punctuation + ["՝"] = ",", + ["։"] = ".", + ["․"] = ";", + ["՛"] = "́", + ["՜"] = "!", + ["՞"] = "?", + ["՟"] = ".", + ["֊"] = "-", + ["՚"] = "’", + ["«"] = "“", + ["»"] = "”", + ["ՙ"] = "ʿ" +} + +local replacements = {["յ̵"] = "ɦ", ["Ո[ւՒ]"] = "U", ["ու"] = "u", ["Ո՛[ւՒ]"] = "Ú", ["ո՛ւ"] = "ú", ["Ո՜[ւՒ]"] = "U!", ["ո՜ւ"] = "u!", ["Ո՞[ւՒ]"] = "U?", ["ո՞ւ"] = "u?", ["ո̈ւ"] = "ü", ["Ո̈[ւՒ]"] = "Ü", ["օ̈"] = "ö", ["Օ̈"] = "Ö"} + +function export.tr(text, lang, sc) + if sc and sc ~= "Armn" then return nil end + + for regex, replacement in pairs(replacements) do text = mw.ustring.gsub(text, regex, replacement) end + + text = gsub(text, ".", mapping) + return text +end + +return export diff --git a/wiktra/wikt/translit/array.lua b/wiktra/wikt/translit/array.lua new file mode 100644 index 0000000..71f8d75 --- /dev/null +++ b/wiktra/wikt/translit/array.lua @@ -0,0 +1,213 @@ +local Array = {} +local array_constructor + +-- Copy table library so as not to unexpectedly change the behavior of code that +-- uses it. +local array_methods = mw.clone(table) + +-- Create version of table.sort that returns the table. +array_methods.sort = function(t, comp) + table.sort(t, comp) + return t +end + +-- ipairs and unpack operate on arrays. +array_methods.ipairs = ipairs +array_methods.unpack = unpack + +function array_methods:type() + local mt = getmetatable(self) + return type(mt) == "table" and mt.__type or nil +end + +function array_methods:adjustIndex(index) + index = math.floor(index) + if index < 0 then index = #self + index + 1 end + return index +end + +-- string.sub-style slicing. +function array_methods:slice(i, j) + if i == nil then + i = 1 + elseif type(i) == "number" then + i = self:adjust_index(i) + else + error("Expected number, got " .. type(i)) + end + + if j == nil or type(j) == "number" then + j = self:adjust_index(j or -1) + else + error("Expected number, got " .. type(j)) + end + + local new_arr = array_constructor() + local k = 0 + for index = i, j do + k = k + 1 + new_arr[k] = self[index] + end + return new_arr +end + +-- A function to convert string key-table modules such +-- as [[Module:languages/data2]] into arrays. +-- "from" is a bad name. +-- field_for_key supplies the field name in which the +-- key will be stored. +local function to_array(map, field_for_key) + m_table = m_table or require "table" + + local arr = {} + local i = 0 + for key, val in pairs(map) do + i = i + 1 + local new_val = m_table.shallowcopy(val) + if field_for_key then new_val[field_for_key] = key end + arr[i] = new_val + end + + return array_constructor(arr) +end + +-- Functions from [[Module:table]] that operate on arrays or sparse arrays. +-- List copied from [[Module:table/documentation]]. +local operate_on_array = { + -- non-sparse + "removeDuplicates", "length", "contains", "serialCommaJoin", "reverseIpairs", "reverse", "invert", "listToSet", "isArray", -- sparse + "numKeys", "maxIndex", "compressSparseArray", "sparseIpairs", -- tables in general + "shallowcopy", "deepcopy" +} + +-- Not all of these operate on arrays. +local create_new_array = { + -- Functions from [[Module:table]] that create an array. + -- List copied from [[Module:table/documentation]]. + "removeDuplicates", "numKeys", "affixNums", "compressSparseArray", "keysToList", "reverse", -- Functions from [[Module:table]] that create an table. + "shallowcopy", "deepcopy", -- Functions from [[Module:fun]] that create an array. + "map", "filter" +} + +-- Functions from [[Module:fun]] that take an array in the second argument. +-- They just have to have the argument order reversed to work as methods of the +-- array object. +local second_argument_is_array = {"map", "some", "all", "filter"} + +-- Add aliases for the functions from [[Module:table]] whose names +-- contain "array" or "list", which is redundant, and whose names don't conform +-- to the usual camel case. +-- The key redirects to the value. +local alias_of = {compress = "compressSparseArray", keys = "keysToList", toSet = "listToSet", deepCopy = "deepcopy", shallowCopy = "shallowcopy"} + +local function get_module_function(key, module, module_name) return module[key] or error("No function named " .. tostring(key) .. " in Module:" .. module_name) end + +local function wrap_in_array_constructor(func) return function(...) return array_constructor(func(...)) end end + +local function create_array_generating_func(key, module, module_name) return wrap_in_array_constructor(get_module_function(key, module, module_name)) end + +local function reverse_arguments(func) return function(a, b) return func(b, a, true) end end + +local function underscore_to_camel_case(str) + if type(str) ~= "string" then return str end + str = str:gsub("_(.)", string.upper) + return str +end + +local m_table, m_fun +local Array = {} +Array.__type = "array" +function Array:__index(key) + if type(key) ~= "string" then return nil end + + -- Convert underscores to camel case: num_keys -> numKeys. + key = underscore_to_camel_case(key) + + local val = array_methods[key] + if val then return val end + + key = alias_of[key] or key + + local func + m_table = m_table or require "table" + if m_table.contains(operate_on_array, key) then + if m_table.contains(create_new_array, key) then + func = create_array_generating_func(key, m_table, "table") + else + func = m_table[key] + end + elseif m_table.contains(second_argument_is_array, key) then + m_fun = m_fun or require "fun" + + local raw_func = reverse_arguments(get_module_function(key, m_fun, "fun")) + if m_table.contains(create_new_array, key) then + func = wrap_in_array_constructor(raw_func) + else + func = raw_func + end + elseif key == "fold" then + m_fun = m_fun or require "fun" + + local raw_func = get_module_function(key, m_fun, "fun") + func = function(t, func, accum) return raw_func(func, t, accum) end + end + + if func then + array_methods[key] = func + return func + end +end + +function Array.__add(a, b) + if type(a) == "table" and type(b) == "table" then + m_table = m_table or require "table" + + local new_arr = array_constructor(m_table.shallowcopy(a)) + + for _, val in ipairs(b) do new_arr:insert(val) end + + return new_arr + end +end + +function Array:new(...) + local arr + if select("#", ...) == 1 and type((...)) == "table" then + arr = ... + + local mt = getmetatable(arr) + -- If table has been loaded with mw.loadData, copy it to avoid the + -- limitations of it being a virtual table. + if mt and mt.mw_loadData then + m_table = m_table or require "table" + arr = m_table.shallowcopy(arr) + end + else + arr = {...} + end + return setmetatable(arr, self) +end + +-- Declared as local above. +function array_constructor(...) return Array:new(...) end + +local array_generating_funcs = {from = to_array} +local Array_library_mt = {__call = Array.new, __index = array_generating_funcs} +setmetatable(Array, Array_library_mt) + +function Array_library_mt:__index(key) + key = underscore_to_camel_case(key) + key = alias_of[key] or key + + if array_generating_funcs[key] then return array_generating_funcs[key] end + + m_table = m_table or require "table" + + if m_table.contains(create_new_array, key) then + local func = create_array_generating_func(key, m_table, "table") + array_generating_funcs[key] = func + return func + end +end + +return Array diff --git a/wiktra/wikt/translit/as-translit.lua b/wiktra/wikt/translit/as-translit.lua new file mode 100644 index 0000000..9016251 --- /dev/null +++ b/wiktra/wikt/translit/as-translit.lua @@ -0,0 +1,215 @@ +-- Transliteration for Assamese +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["ক্ষ"] = "kh", + ["ক"] = "k", + ["খ"] = "kh", + ["গ"] = "g", + ["ঘ"] = "gh", + ["ঙ"] = "ṅ", + ["চ"] = "s", + ["ছ"] = "s", + ["জ"] = "z", + ["ঝ"] = "zh", + ["ঞ"] = "ñ", + ["ট"] = "t", + ["ঠ"] = "th", + ["ড"] = "d", + ["ঢ"] = "dh", + ["ণ"] = "n", + ["ত"] = "t", + ["থ"] = "th", + ["দ"] = "d", + ["ধ"] = "dh", + ["ন"] = "n", + ["প"] = "p", + ["ফ"] = "ph", + ["ব"] = "b", + ["ভ"] = "bh", + ["ম"] = "m", + ["য"] = "z", + ["ৰ"] = "r", + ["ল"] = "l", + ["ৱ"] = "w", + ["শ"] = "x", + ["ষ"] = "x", + ["স"] = "x", + ["হ"] = "h", + ["য়"] = "y", + ["ড়"] = "r", + ["ঢ়"] = "rh", + + -- vowel diacritics + ["’"] = "ö", + ["ি"] = "i", + ["ু"] = "u", + ["ৃ"] = "ri", + ["ে"] = "e", + ["ে’"] = "ë", + ["ো"] = "ü", + ["া"] = "a", + ["ী"] = "i", + ["ূ"] = "u", + ["ৈ"] = "oi", + ["ৌ"] = "ou", + + -- visarga + ["ঃ"] = "o", + + -- vowel signs + ["অ"] = "o", + ["অ’"] = "ó", + ["ই"] = "i", + ["উ"] = "u", + ["ঋ"] = "ri", + ["এ"] = "e", + ["এ’"] = "é", + ["ও"] = "ü", + ["আ"] = "a", + ["ঈ"] = "i", + ["ঊ"] = "u", + ["ঐ"] = "oi", + ["ঔ"] = "ou", + + -- hosonto + ["্"] = "", + + -- sondrobindu + ["ঁ"] = "̃", + + -- owogroho + ["ঽ"] = "o", + + -- onusor + ["ং"] = "ṅ", + + -- hosonto to, + ["ৎ"] = "t", + + -- numerals + ["০"] = "0", + ["১"] = "1", + ["২"] = "2", + ["৩"] = "3", + ["৪"] = "4", + ["৫"] = "5", + ["৬"] = "6", + ["৭"] = "7", + ["৮"] = "8", + ["৯"] = "9", + + -- punctuation + ["।"] = "." -- dari +} + +local conv2 = {["ক্ষ"] = "ḱ", ["খ"] = "ḱ", ["ঘ"] = "ǵ", ["ঙ"] = "ŋ", ["ং"] = "ŋ", ["ঝ"] = "ź", ["ঠ"] = "ṫ", ["থ"] = "ṫ", ["ঢ"] = "ḋ", ["ধ"] = "ḋ", ["ফ"] = "ṗ", ["ভ"] = "ḃ", ["ঢ়"] = "ŕ", ["ৃ"] = "ṙ", ["ঋ"] = "ṙ", ["ৈ"] = "ʏ", ["ঐ"] = "ʏ", ["ৌ"] = "ɵ", ["ঔ"] = "ɵ"} + +local consonant, vowel, vowel_sign = "ক-হড়-য়ৰৱ", "oা-ৌ’", "অ-ঔ" +local c = "[" .. consonant .. "]" +local cc = "়?" .. c +local v = "[" .. vowel .. vowel_sign .. "]" +local syncope_pattern = "(" .. v .. cc .. v .. cc .. ")o(" .. cc .. "ঁ?" .. v .. ")" + +local function rev_string(text) + local result, length = "", mw.ustring.len(text) + for i = 1, length do result = result .. mw.ustring.sub(text, length - i + 1, length - i + 1) end + return result +end + +function export.tr(text, lang, sc, mode) + text = gsub(text, "([^ৰ])্য", "%1্য়") + text = gsub(text, "্ব", "্ৱ") + text = gsub(text, "[শষস]্", "চ্") + text = gsub(text, "্স", "্চ") + text = gsub(text, "[োও]ৱ", "্ও") + text = gsub(text, "ক্ষ", "খ") + text = gsub(text, "’ৱ", "্অ’") + text = gsub(text, "[ুুউ]ৱ(.)", "্উ%1") + text = gsub(text, "[োও]ৱ(.)", "্ও%1") + text = gsub(text, "োঁৱ(.)", "্ওঁ%1") + text = gsub(text, "[ৌঔ]ৱ", "্ঔ") + text = gsub(text, "[িই]য়(.)", "্ই%1") + text = gsub(text, "ৃয়", "্ঋ") + text = gsub(text, "[েএ]য়(.)", "্এ%1") + text = gsub(text, "[ে’এ’]য়", "্এ’") + text = gsub(text, "[ৈঐ]য়(.)", "্ঐ%1") + text = gsub(text, "[ীঈ]য়(.)", "্ঈ%1") + text = gsub(text, "[ীঈ]য়", "্ঈঅ") -- end + text = gsub(text, "[ূূঊ]ৱ", "্ঊ") + text = gsub(text, "݁", "্অ") + text = gsub(text, "ঃ", "্অ") + text = gsub(text, "[࣪ܿ]", "্") + text = gsub(text, "বাৰ", "্বাৰ") + text = gsub(text, "বিলাক", "্বিলাক") + text = gsub(text, "টো", "্টো") + text = gsub(text, "খন", "্খন") + text = gsub(text, "ডাল", "্ডাল") + text = gsub(text, "খিনি", "্খিনি") + text = gsub(text, "জন", "্জন") + text = gsub(text, "জনী", "্জনী") + text = gsub(text, "গৰাকী", "্গৰাকী") + text = gsub(text, "সকল", "্সকল") + text = gsub(text, "কৈ", "্কৈ") + text = gsub(text, "কে", "্কে") + text = gsub(text, "ফাল", "্ফাল") + text = gsub(text, "কেই", "্কেই") + text = gsub(text, "মান", "্মান") + text = gsub(text, "[িীইঈ]ঞ", "্ইঅ͂") + text = gsub(text, "ঞ্", "ন্̃") + + text = gsub(text, "(" .. c .. "়?)([" .. vowel .. "’?্]?)", function(a, b) return a .. (b == "" and "o" or b) end) + + for word in mw.ustring.gmatch(text, "[ঁ-৽o’]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^o(়?" .. c .. ")(ঁ?" .. v .. ")", "%1%2") + while match(word, syncope_pattern) do word = gsub(word, syncope_pattern, "%1%2") end + text = gsub(text, orig_word, rev_string(word)) + end + + if mode == "IPA" then + text = gsub(text, ".[়’]?", conv2) + text = gsub(text, ".", conv2) + end + + text = gsub(text, ".[়’]?", conv) + text = gsub(text, ".", conv) + + local consonants_Latn_no_h = "[b-df-gj-np-tv-z]" + + -- Cw + text = gsub(text, "mw", "mb") -- special case + text = gsub(text, "^(" .. consonants_Latn_no_h .. "h?)w", "%1") -- initial + text = gsub(text, "hw", "hb") + text = gsub(text, "(" .. consonants_Latn_no_h .. ")w", "%1%1") -- medial + + -- zñ + text = gsub(text, "^zñ", "gy") -- initial + text = gsub(text, "zñ", "gg") -- medial + + -- Cy + text = gsub(text, "^khy", "kh") + text = gsub(text, "([aéeióoüu])(" .. consonants_Latn_no_h .. ")y", "%1i%2%2") + + -- final "b" has inherent vowel + text = gsub(text, "b$", "bo") + text = gsub(text, "b ", "bo ") + + -- final r conjuncts + text = gsub(text, "r([kszt])o$", "r%1") + text = gsub(text, "r([kszt])o ", "r%1 ") + text = gsub(text, "rkho$", "rkh") + text = gsub(text, "rkho ", "rkh ") + + if match(text, "[ঁ-৽]") and mode ~= "debug" then + return nil + else + return mw.ustring.toNFC(text) + end +end + +return export diff --git a/wiktra/wikt/translit/av-translit.lua b/wiktra/wikt/translit/av-translit.lua new file mode 100644 index 0000000..4fb7d7c --- /dev/null +++ b/wiktra/wikt/translit/av-translit.lua @@ -0,0 +1,25 @@ +local export = {} + +local tt = {["б"] = "b", ["п"] = "p", ["ф"] = "f", ["в"] = "w", ["м"] = "m", ["д"] = "d", ["т"] = "t", ["й"] = "j", ["н"] = "n", ["з"] = "z", ["ц"] = "c", ["с"] = "s", ["ж"] = "ž", ["ш"] = "š", ["щ"] = "š̄", ["л"] = "l", ["ч"] = "č", ["р"] = "r", ["г"] = "g", ["к"] = "k", ["х"] = "χ", ["ъ"] = "ʾ", ["а"] = "a", ["е"] = "e", ["ы"] = "ə", ["и"] = "i", ["о"] = "o", ["у"] = "u", ["ё"] = "ë", ["ь"] = "’", ["э"] = "è", ["ю"] = "ju", ["я"] = "ja", ["Б"] = "B", ["П"] = "P", ["Ф"] = "F", ["В"] = "W", ["М"] = "M", ["Д"] = "D", ["Т"] = "T", ["Й"] = "J", ["Н"] = "N", ["З"] = "Z", ["Ц"] = "C", ["С"] = "S", ["Ж"] = "Ž", ["Ш"] = "Š", ["Щ"] = "Š̄", ["Л"] = "L", ["Ч"] = "Č", ["Р"] = "R", ["Г"] = "G", ["К"] = "K", ["Х"] = "Χ", ["Ъ"] = "ʾ", ["А"] = "A", ["Е"] = "E", ["Ы"] = "Ə", ["И"] = "I", ["О"] = "O", ["У"] = "U", ["Ё"] = "Ë", ["Ь"] = "’", ["Э"] = "È", ["Ю"] = "Ju", ["Я"] = "Ja"}; + +local tetragraphs = {["цӏцӏ"] = "c̣̄", ["чӏчӏ"] = "č̣̄", ["кӏкӏ"] = "ḳ̄", ["лълъ"] = "ł̄", ["Цӏцӏ"] = "C̣̄", ["Чӏчӏ"] = "Č̣̄", ["Кӏкӏ"] = "Ḳ̄", ["Лълъ"] = "Ł̄"} + +local digraphs = {["цӏ"] = "c̣", ["цц"] = "c̄", ["тӏ"] = "ṭ", ["лӏ"] = "kl", ["сс"] = "s̄", ["лъ"] = "ł", ["чч"] = "č̄", ["чӏ"] = "č̣", ["кь"] = "kḷ", ["кк"] = "k̄", ["кӏ"] = "ḳ", ["хь"] = "x", ["хъ"] = "q̄", ["къ"] = "q̇̄", ["гъ"] = "ġ", ["хх"] = "χ̄", ["гӏ"] = "ʿ", ["хӏ"] = "ḥ", ["гь"] = "h", ["Цӏ"] = "C̣", ["Цц"] = "C̄", ["Тӏ"] = "Ṭ", ["Лӏ"] = "Kl", ["Сс"] = "S̄", ["Лъ"] = "Ł", ["Чч"] = "Č̄", ["Чӏ"] = "Č̣", ["Кь"] = "Kḷ", ["Кк"] = "K̄", ["Кӏ"] = "Ḳ", ["Хь"] = "X", ["Хъ"] = "Q̄", ["Къ"] = "Q̇̄", ["Гъ"] = "Ġ", ["Хх"] = "Χ̄", ["Гӏ"] = "ʿ", ["Хӏ"] = "Ḥ", ["Гь"] = "H"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + + -- Convert uppercase palochka to lowercase. Lowercase is found in tables + -- above. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for grapheme, replacement in pairs(tetragraphs) do text = str_gsub(text, grapheme, replacement) end + + for grapheme, replacement in pairs(digraphs) do text = str_gsub(text, grapheme, replacement) end + + text = str_gsub(text, "[\1-\127\194-\244][\128-\191]*", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/avst-translit.lua b/wiktra/wikt/translit/avst-translit.lua new file mode 100644 index 0000000..e7feb05 --- /dev/null +++ b/wiktra/wikt/translit/avst-translit.lua @@ -0,0 +1,83 @@ +local export = {} + +local mapping = { + ["𐬀"] = "a", -- AVESTAN LETTER A + ["𐬁"] = "ā", -- AVESTAN LETTER AA + ["𐬂"] = "å", -- AVESTAN LETTER AO + ["𐬃"] = "ā̊", -- AVESTAN LETTER AAO + ["𐬄"] = "ą", -- AVESTAN LETTER AN + ["𐬅"] = "ą̇̇", -- AVESTAN LETTER AAN + ["𐬆"] = "ə", -- AVESTAN LETTER AE + ["𐬇"] = "ə̄", -- AVESTAN LETTER AEE + ["𐬈"] = "e", -- AVESTAN LETTER E + ["𐬉"] = "ē", -- AVESTAN LETTER EE + ["𐬊"] = "o", -- AVESTAN LETTER O + ["𐬋"] = "ō", -- AVESTAN LETTER OO + ["𐬌"] = "i", -- AVESTAN LETTER I + ["𐬍"] = "ī", -- AVESTAN LETTER II + ["𐬎"] = "u", -- AVESTAN LETTER U + ["𐬏"] = "ū", -- AVESTAN LETTER UU + + ["𐬐"] = "k", -- AVESTAN LETTER KE + ["𐬑"] = "x", -- AVESTAN LETTER XE + ["𐬒"] = "x́", -- AVESTAN LETTER XYE + ["𐬓"] = "xᵛ", -- AVESTAN LETTER XVE + ["𐬔"] = "g", -- AVESTAN LETTER GE + ["𐬕"] = "ġ", -- AVESTAN LETTER GGE + ["𐬖"] = "γ", -- AVESTAN LETTER GHE + ["𐬗"] = "c", -- AVESTAN LETTER CE + ["𐬘"] = "j", -- AVESTAN LETTER JE + ["𐬙"] = "t", -- AVESTAN LETTER TE + ["𐬚"] = "θ", -- AVESTAN LETTER THE + ["𐬛"] = "d", -- AVESTAN LETTER DE + ["𐬜"] = "δ", -- AVESTAN LETTER DHE + ["𐬝"] = "t̰", -- AVESTAN LETTER TTE + ["𐬞"] = "p", -- AVESTAN LETTER PE + ["𐬟"] = "f", -- AVESTAN LETTER FE + ["𐬠"] = "b", -- AVESTAN LETTER BE + ["𐬡"] = "β", -- AVESTAN LETTER BHE + ["𐬢"] = "ŋ", -- AVESTAN LETTER NGE + ["𐬣"] = "ŋ́", -- AVESTAN LETTER NGYE + ["𐬤"] = "ŋᵛ", -- AVESTAN LETTER NGVE + ["𐬥"] = "n", -- AVESTAN LETTER NE + ["𐬦"] = "ń", -- AVESTAN LETTER NYE + ["𐬧"] = "ṇ", -- AVESTAN LETTER NNE + ["𐬨"] = "m", -- AVESTAN LETTER ME + ["𐬩"] = "m̨", -- AVESTAN LETTER HME + ["𐬪"] = "ẏ", -- AVESTAN LETTER YYE + ["𐬫"] = "y", -- AVESTAN LETTER YE + ["𐬬"] = "v", -- AVESTAN LETTER VE + ["𐬭"] = "r", -- AVESTAN LETTER RE + ["𐬮"] = "l", -- AVESTAN LETTER LE + ["𐬯"] = "s", -- AVESTAN LETTER SE + ["𐬰"] = "z", -- AVESTAN LETTER ZE + ["𐬱"] = "š", -- AVESTAN LETTER SHE + ["𐬲"] = "ž", -- AVESTAN LETTER ZHE + ["𐬳"] = "š́", -- AVESTAN LETTER SHYE + ["𐬴"] = "ṣ̌", -- AVESTAN LETTER SSHE + ["𐬵"] = "h", -- AVESTAN LETTER HE + ["𐬹"] = " ", -- AVESTAN ABBREVIATION MARK + ["⸱"] = "." -- WORD SEPARATOR MIDDLE DOT +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "[⸱𐬹]?[𐬺𐬻𐬼𐬽𐬾𐬿]", ".") -- punctuation + text = string.gsub(text, "[\1-\127\194-\244][\128-\191]*", mapping) -- transliterate letters + -- text = mw.ustring.gsub(text, "ii", "y") + -- text = mw.ustring.gsub(text, "uu", "v") + + text = mw.ustring.gsub(text, "([aāəō])rə([kxgγcjtθδpfbβnmrlszšž])", "%1rə%2") -- r > rə /V_C + text = mw.ustring.gsub(text, "([aāəō])rə$", "%1rə") -- r > rə /V_# + + text = mw.ustring.gsub(text, "([aāeēəoōuū])i(rm[eiī])", "%1i%2") -- i-epenthesis + text = mw.ustring.gsub(text, "([aāeēəoōuū])i(ṇt[eiī])", "%1i%2") -- i-epenthesis + text = mw.ustring.gsub(text, "([aāeēəoōuū])i([kxgγtθδpfbβnrl][eiī])", "%1i%2") -- i-epenthesis + text = mw.ustring.gsub(text, "(%.%s)i([θr][iī])", "%1i%2") -- i-epenthesis + text = mw.ustring.gsub(text, "^i([θr][iī])", "i%1") -- i-epenthesis + + text = mw.ustring.gsub(text, "([aāeēəiīoō])u(r[ouū])", "%1u%2") -- u-epenthesis + + return text +end + +return export diff --git a/wiktra/wikt/translit/az-translit.lua b/wiktra/wikt/translit/az-translit.lua new file mode 100644 index 0000000..b71c226 --- /dev/null +++ b/wiktra/wikt/translit/az-translit.lua @@ -0,0 +1,149 @@ +local export = {} + +local single_letters = { + ["а"] = "a", + ["А"] = "A", + ["б"] = "b", + ["Б"] = "B", + ["в"] = "v", + ["В"] = "V", + ["г"] = "q", + ["Г"] = "Q", + ["д"] = "d", + ["Д"] = "D", + ["е"] = "e", + ["Е"] = "E", + ["ё"] = "yo", + ["Ё"] = "Yo", + ["ж"] = "j", + ["Ж"] = "J", + ["з"] = "z", + ["З"] = "Z", + ["и"] = "i", + ["И"] = "İ", + ["й"] = "y", + ["Й"] = "Y", + ["к"] = "k", + ["К"] = "K", + ["л"] = "l", + ["Л"] = "L", + ["м"] = "m", + ["М"] = "M", + ["н"] = "n", + ["Н"] = "N", + ["о"] = "o", + ["О"] = "O", + ["п"] = "p", + ["П"] = "P", + ["р"] = "r", + ["Р"] = "R", + ["с"] = "s", + ["С"] = "S", + ["т"] = "t", + ["Т"] = "T", + ["у"] = "u", + ["У"] = "U", + ["ф"] = "f", + ["Ф"] = "F", + ["х"] = "x", + ["Х"] = "X", + ["ц"] = "s", + ["Ц"] = "S", + ["ч"] = "ç", + ["Ч"] = "Č", + ["ш"] = "ş", + ["Ш"] = "Ş", + ["щ"] = "şç", + ["Щ"] = "Şç", + ["ъ"] = "", + ["Ъ"] = "", + ["ы"] = "ı", + ["Ы"] = "I", + ["ь"] = "ʹ", + ["Ь"] = "ʹ", + ["э"] = "e", + ["Э"] = "E", + ["ю"] = "yu", + ["Ю"] = "Yu", + ["я"] = "ya", + ["Я"] = "Ya", + ["ј"] = "y", + ["Ј"] = "Y", + ["һ"] = "h", + ["Һ"] = "H", + ["ғ"] = "ğ", + ["Ғ"] = "Ğ", + ["ә"] = "ə", + ["Ә"] = "Ə", + ["ө"] = "ö", + ["Ө"] = "Ö", + ["ү"] = "ü", + ["Ү"] = "Ü", + ["ҹ"] = "ç", + ["Ҹ"] = "Ç", + ["ҝ"] = "g", + ["Ҝ"] = "G" +} + +function export.tr(text, lang, sc) + + if not sc then sc = require("scripts").findBestScript(text, require("languages").getByCode("az")):getCode() end + + if sc == "Arab" then + text = nil + elseif sc == "Latn" then + text = nil + elseif sc == "Cyrl" then + text = mw.ustring.gsub(text, "()([ЕеЮюЁё])", function(pos, iotated) + -- modifier letter apostrophe or right single quotation mark + local preceding = mw.ustring.sub(text, math.max(1, pos - 2), math.max(0, pos - 1)) + local capital = iotated == "Е" or iotated == "Ю" + local lower = mw.ustring.lower(iotated) + + local translit + if preceding == "" or mw.ustring.match(preceding, "[АОӨӘУЫЕЯЁЮИЕаоөәуыэяёюиеъь%A][́̀]?$") then + if capital then + if lower == "ю" then + return "Yu" + elseif lower == "ё" then + return "Yo" + else + return "Ye" + end + else + if lower == "ю" then + return "yu" + elseif lower == "ё" then + return "yo" + else + return "ye" + end + end + else + if capital then + if lower == "ю" then + return "Ü" + elseif lower == "ё" then + return "Ö" + else + return "E" + end + else + if lower == "ю" then + return "ü" + elseif lower == "ё" then + return "ö" + else + return "e" + end + end + end + return translit + end) + + text = mw.ustring.gsub(text, ".", single_letters) + end + + return text +end +return export diff --git a/wiktra/wikt/translit/ba-translit.lua b/wiktra/wikt/translit/ba-translit.lua new file mode 100644 index 0000000..911a7de --- /dev/null +++ b/wiktra/wikt/translit/ba-translit.lua @@ -0,0 +1,105 @@ +local export = {} + +local tt = { + ["ү"] = "ü", + ["Ү"] = "Ü", + ["т"] = "t", + ["Т"] = "T", + ["р"] = "r", + ["Р"] = "R", + ["ф"] = "f", + ["Ф"] = "F", + ["ө"] = "ö", + ["Ө"] = "Ö", + ["ю"] = "yu", + ["Ю"] = "Yu", + ["ш"] = "š", + ["Ш"] = "Š", + ["ь"] = "’", + ["Ь"] = "’", + ["ъ"] = "ʺ", + ["Ъ"] = "ʺ", + ["н"] = "n", + ["Н"] = "N", + ["п"] = "p", + ["П"] = "P", + ["й"] = "y", + ["Й"] = "Y", + ["л"] = "l", + ["Л"] = "L", + ["з"] = "z", + ["З"] = "Z", + ["е"] = "e", + ["Е"] = "E", + ["г"] = "g", + ["Г"] = "G", + ["б"] = "b", + ["Б"] = "B", + ["у"] = "u", + ["У"] = "U", + ["с"] = "s", + ["С"] = "S", + ["х"] = "x", + ["Х"] = "X", + ["ч"] = "č", + ["Ч"] = "Č", + ["щ"] = "šč", + ["Щ"] = "Šč", + ["я"] = "ya", + ["Я"] = "Ya", + ["ы"] = "ï", + ["Ы"] = "Ï", + ["э"] = "e", + ["Э"] = "E", + ["м"] = "m", + ["М"] = "M", + ["о"] = "o", + ["О"] = "O", + ["и"] = "i", + ["И"] = "I", + ["ё"] = "yo", + ["Ё"] = "Yo", + ["ж"] = "ž", + ["Ж"] = "Ž", + ["к"] = "k", + ["К"] = "K", + ["д"] = "d", + ["Д"] = "D", + ["в"] = "v", + ["В"] = "V", + ["ц"] = "ts", + ["Ц"] = "Ts", + ["а"] = "a", + ["А"] = "A", + ["ң"] = "ñ", + ["Ң"] = "Ñ", + ["ғ"] = "ğ", + ["Ғ"] = "Ğ", + ["ҙ"] = "ð", + ["Ҙ"] = "Đ", + ["ҡ"] = "q", + ["Ҡ"] = "Q", + ["ҫ"] = "θ", + ["Ҫ"] = "Θ", + ["һ"] = "h", + ["Һ"] = "H", + ["ә"] = "ä", + ["Ә"] = "Ä" +}; + +local iotated = {["е"] = "ye", ["Е"] = "Ye"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local ugsub = mw.ustring.gsub + -- ү/у should be transliterated as w after vowels + text = ugsub(text, "([АаЕеЭэЮюЯяӘәИиҮүУуӨөЫы])[үу]", "%1w") + + text = ugsub(text, "([АОӨӘУЫЕЯЁЮИЕаоөәуыэяёюиеъь%A][́̀]?)([Ее])", function(a, e) return a .. iotated[e] end) + text = ugsub(text, "^[Ее]", iotated) + text = str_gsub(text, "[\1-\127\194-\244][\128-\191]*", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/bdk-translit.lua b/wiktra/wikt/translit/bdk-translit.lua new file mode 100644 index 0000000..3df6ed2 --- /dev/null +++ b/wiktra/wikt/translit/bdk-translit.lua @@ -0,0 +1,24 @@ +local export = {} + +local tt = {["б"] = "b", ["п"] = "p", ["ф"] = "f", ["в"] = "v", ["м"] = "m", ["д"] = "d", ["т"] = "t", ["й"] = "j", ["н"] = "n", ["з"] = "z", ["ц"] = "c", ["с"] = "s", ["ж"] = "ž", ["ш"] = "š", ["л"] = "l", ["ч"] = "č", ["р"] = "r", ["г"] = "g", ["к"] = "k", ["х"] = "χ", ["ъ"] = "ʔ", ["а"] = "a", ["е"] = "e", ["ы"] = "ɨ", ["и"] = "i", ["о"] = "o", ["у"] = "u", ["э"] = "e", ["Б"] = "B", ["П"] = "P", ["Ф"] = "F", ["В"] = "V", ["М"] = "M", ["Д"] = "D", ["Т"] = "T", ["Й"] = "J", ["Н"] = "N", ["З"] = "Z", ["Ц"] = "C", ["С"] = "S", ["Ж"] = "Ž", ["Ш"] = "Š", ["Л"] = "L", ["Ч"] = "Č", ["Р"] = "R", ["Г"] = "G", ["К"] = "K", ["Х"] = "Χ", ["Ъ"] = "ʔ", ["А"] = "A", ["Е"] = "E", ["Ы"] = "Ɨ", ["И"] = "I", ["О"] = "O", ["У"] = "U", ["Э"] = "E"}; + +local digraphs = {["аь"] = "ä", ["Аь"] = "Ä", ["гӏ"] = "ʕ", ["Гӏ"] = "ʕ", ["гъ"] = "ġ", ["Гъ"] = "Ġ", ["гь"] = "h", ["Гь"] = "H", ["дж"] = "ǯ", ["Дж"] = "Ǯ", ["кк"] = "k̄", ["кӏ"] = "ḳ", ["Кӏ"] = "Ḳ", ["къ"] = "q̄", ["Къ"] = "Q̄", ["кь"] = "q̇", ["Кь"] = "Q̇", ["оь"] = "ö", ["Оь"] = "Ö", ["пӏ"] = "ṗ", ["Пӏ"] = "Ṗ", ["тӏ"] = "ṭ", ["Тӏ"] = "Ṭ", ["уь"] = "ü", ["Уь"] = "Ü", ["хӏ"] = "ħ", ["Хӏ"] = "Ħ", ["хъ"] = "q", ["Хъ"] = "Q", ["хь"] = "x", ["Хь"] = "X", ["цӏ"] = "c̣", ["Цӏ"] = "C̣", ["чӏ"] = "č̣", ["Чӏ"] = "Č̣"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" + + -- Convert capital to lowercase palochka. Lowercase is found in tables + -- above. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + text = mw.ustring.gsub(text, "[Кк]ъг", "ɢ") + + for letter, translit in pairs(digraphs) do text = str_gsub(text, letter, translit) end + + text = str_gsub(text, UTF8_char, tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/be-translit.lua b/wiktra/wikt/translit/be-translit.lua new file mode 100644 index 0000000..44b784a --- /dev/null +++ b/wiktra/wikt/translit/be-translit.lua @@ -0,0 +1,126 @@ +local export = {} + +local AC = mw.ustring.char(0x0301) -- acute = ́ + +local rsubn = mw.ustring.gsub + +-- version of rsubn() that discards all but the first return value +local function rsub(term, foo, bar) + local retval = rsubn(term, foo, bar) + return retval +end + +local tt = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "V", + ["в"] = "v", + ["Г"] = "H", + ["г"] = "h", + ["Д"] = "D", + ["д"] = "d", + ["Е"] = "Je", + ["е"] = "je", + ["Ё"] = "Jo", + ["ё"] = "jo", + ["Ж"] = "Ž", + ["ж"] = "ž", + ["З"] = "Z", + ["з"] = "z", + ["І"] = "I", + ["і"] = "i", + ["И"] = "I", + ["и"] = "i", -- present for Old Belarusian; FIXME, remove when we have a separate language code for this lang + ["Й"] = "J", + ["й"] = "j", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["О"] = "O", + ["о"] = "o", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ў"] = "Ŭ", + ["ў"] = "ŭ", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "X", + ["х"] = "x", + ["Ц"] = "C", + ["ц"] = "c", + ["Ч"] = "Č", + ["ч"] = "č", + ["Ш"] = "Š", + ["ш"] = "š", + ["Ы"] = "Y", + ["ы"] = "y", + ["Ь"] = "ʹ", + ["ь"] = "ʹ", + ["Э"] = "E", + ["э"] = "e", + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Я"] = "Ja", + ["я"] = "ja", + ["’"] = "ʺ", + ["ʼ"] = "ʺ", + -- currently non-standard, used in some older norms + ["Ґ"] = "G", + ["ґ"] = "g", + -- Belarusian style quotes + ["«"] = "“", + ["»"] = "”" +}; + +local unstressed_vowels = "aeiyuAEIYU" +local unstressed_vowel = "[" .. unstressed_vowels .. "]" + +local acute_decomposer = {["á"] = "a" .. AC, ["é"] = "e" .. AC, ["í"] = "i" .. AC, ["ó"] = "o" .. AC, ["ú"] = "u" .. AC, ["ý"] = "y" .. AC, ["Á"] = "A" .. AC, ["É"] = "E" .. AC, ["Í"] = "I" .. AC, ["Ó"] = "O" .. AC, ["Ú"] = "U" .. AC, ["Ý"] = "Y" .. AC} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "'+", {["'"] = "ʺ"}) -- neutral apostrophe + text = mw.ustring.gsub(text, ".", tt) + + -- Mark word boundaries + text = mw.ustring.gsub(text, "(%s+)", "#%1#") + text = "#" .. text .. "#" + + -- Mark stress on + text = mw.ustring.gsub(text, "(#[^#Oo" .. AC .. "]*)([Oo])([^#Oo" .. AC .. "]*" .. unstressed_vowel .. "[^#Oo" .. AC .. "]*#)", "%1%2" .. AC .. "%3") + text = mw.ustring.gsub(text, "(#[^#Oo" .. AC .. "]*" .. unstressed_vowel .. "[^#Oo" .. AC .. "]*)([Oo])([^#Oo" .. AC .. "]*#)", "%1%2" .. AC .. "%3") + + -- Strip hashes + text = mw.ustring.gsub(text, "#", "") + + return text +end + +function export.reverse_tr(text) -- reverse-translit any words or phrases + local reverse_tt = {} + for k, v in pairs(tt) do reverse_tt[v] = k end + reverse_tt["ʺ"] = "'" + reverse_tt["ʹ"] = "ь" + reverse_tt["i"] = "і" + reverse_tt["I"] = "І" + text = rsub(text, ".", acute_decomposer) + text = rsub(text, "[Jj][aeou]", reverse_tt) + text = rsub(text, ".", reverse_tt) + return text +end + +return export diff --git a/wiktra/wikt/translit/bg-translit.lua b/wiktra/wikt/translit/bg-translit.lua new file mode 100644 index 0000000..c3cb1c2 --- /dev/null +++ b/wiktra/wikt/translit/bg-translit.lua @@ -0,0 +1,97 @@ +local export = {} + +local tt = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "V", + ["в"] = "v", + ["Г"] = "G", + ["г"] = "g", + ["Д"] = "D", + ["д"] = "d", + ["Е"] = "E", + ["е"] = "e", + ["Ж"] = "Ž", + ["ж"] = "ž", + ["З"] = "Z", + ["з"] = "z", + ["И"] = "I", + ["и"] = "i", + ["Й"] = "J", + ["й"] = "j", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["О"] = "O", + ["о"] = "o", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "H", + ["х"] = "h", + ["Ц"] = "C", + ["ц"] = "c", + ["Ч"] = "Č", + ["ч"] = "č", + ["Ш"] = "Š", + ["ш"] = "š", + ["Щ"] = "Št", + ["щ"] = "št", + ["Ъ"] = "Ǎ", + ["ъ"] = "ǎ", + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Я"] = "Ja", + ["я"] = "ja", + ["Ѐ"] = "È", + ["ѐ"] = "è", + ["Ѝ"] = "Ì", + ["ѝ"] = "ì", + + -- Pre-reform + ["Ѫ"] = "Ǫ", + ["ѫ"] = "ǫ", + ["Ѣ"] = "Ě", + ["ѣ"] = "ě", + ["Ь"] = "ʹ", + ["ь"] = "ʹ", + -- ?? used in old texts + ["Ы"] = "Y", + ["ы"] = "y", + ["Ѧ"] = "Ę", + ["ѧ"] = "ę", + ["Ѩ"] = "Ję", + ["ѩ"] = "ję", + ["Ѭ"] = "Jǫ", + ["ѭ"] = "jǫ", + -- Quotes + ["«"] = "“", + ["»"] = "”" +}; + +function export.tr(text, lang, sc) + if mw.ustring.find(text, "[̀ЀѐЍѝ]") then require("debug").track("bg-translit/grave") end + text = mw.ustring.gsub(text, "(%w)[Ъъ]$", "%1") + text = mw.ustring.gsub(text, "(%w)[Ъъ]%f[%c%p%s]", "%1") + text = mw.ustring.gsub(text, "[Ьь]%f[ЕеОоЪъ]", {["Ь"] = "J", ["ь"] = "j"}) + text = mw.ustring.gsub(text, ".", tt) + return text +end + +return export diff --git a/wiktra/wikt/translit/bho-Kthi-translit.lua b/wiktra/wikt/translit/bho-Kthi-translit.lua new file mode 100644 index 0000000..1d34dbc --- /dev/null +++ b/wiktra/wikt/translit/bho-Kthi-translit.lua @@ -0,0 +1,155 @@ +-- Transliteration for Bhojpuri +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["𑂍"] = "k", + ["𑂎"] = "kh", + ["𑂏"] = "g", + ["𑂐"] = "gh", + ["𑂑"] = "ṅ", + ["𑂒"] = "c", + ["𑂓"] = "ch", + ["𑂔"] = "j", + ["𑂕"] = "jh", + ["𑂖"] = "ñ", + ["𑂗"] = "ṭ", + ["𑂘"] = "ṭh", + ["𑂙"] = "ḍ", + ["𑂛"] = "ḍh", + ["𑂝"] = "ṇ", + ["𑂞"] = "t", + ["𑂟"] = "th", + ["𑂠"] = "d", + ["𑂡"] = "dh", + ["𑂢"] = "n", + ["𑂣"] = "p", + ["𑂤"] = "ph", + ["𑂥"] = "b", + ["𑂦"] = "bh", + ["𑂧"] = "m", + ["𑂨"] = "y", + ["𑂩"] = "r", + ["𑂪"] = "l", + ["𑂫"] = "v", + ["𑂫"] = "v", + ["ळ"] = "ḷ", + ["𑂬"] = "ś", + ["𑂭"] = "ṣ", + ["𑂮"] = "s", + ["𑂯"] = "h", + ["𑂚"] = "ṛ", + ["𑂚"] = "ṛ", + ["𑂜"] = "ṛh", + ["𑂜"] = "ṛh", + -- ['𑂔𑂹𑂖'] = 'gy', + + -- vowel diacritics + ["𑂱"] = "i", + ["𑂳"] = "u", + ["𑂵"] = "e", + ["𑂷"] = "o", + ["𑂰"] = "ā", + ["𑂲"] = "ī", + ["𑂴"] = "ū", + ["𑂶"] = "ai", + ["𑂸"] = "au", + + -- vowel signs + ["𑂃"] = "a", + ["𑂅"] = "i", + ["𑂇"] = "u", + ["𑂉"] = "e", + ["𑂋"] = "o", + ["𑂄"] = "ā", + ["𑂆"] = "ī", + ["𑂈"] = "ū", + ["𑂊"] = "ai", + ["𑂌"] = "au", + + ["ॐ"] = "om", + + -- chandrabindu + ["𑂀"] = "̃", + + -- anusvara + ["𑂁"] = "ṁ", + + -- visarga + ["𑂂"] = "ḥ", + + -- virama + ["𑂹"] = "", + + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + + -- punctuation + ["𑃀"] = ".", -- danda + ["+"] = "", -- compound separator + + -- abbreviation sign + ["𑂻"] = "." +} + +local nasal_assim = {["𑂍"] = "𑂑", ["𑂎"] = "𑂑", ["𑂏"] = "𑂑", ["𑂐"] = "𑂑", ["𑂒"] = "𑂖", ["𑂓"] = "𑂖", ["𑂔"] = "𑂖", ["𑂕"] = "𑂖", ["𑂗"] = "𑂝", ["𑂘"] = "𑂝", ["𑂙"] = "𑂝", ["𑂛"] = "𑂝", ["𑂣"] = "𑂧", ["𑂤"] = "𑂧", ["𑂥"] = "𑂧", ["𑂦"] = "𑂧", ["𑂧"] = "𑂧"} + +local perm_cl = {["𑂧𑂹𑂪"] = true, ["𑂫𑂹𑂪"] = true, ["𑂫𑂹𑂪"] = true, ["𑂢𑂹𑂪"] = true} + +local all_cons, special_cons = "𑂍𑂎𑂏𑂐𑂑𑂒𑂓𑂔𑂕𑂖𑂗𑂘𑂙𑂚𑂛𑂜𑂞𑂟𑂠𑂡𑂣𑂤𑂥𑂦𑂬𑂭𑂮𑂨𑂩𑂪𑂫𑂯𑂝𑂢𑂧", "𑂨𑂩𑂪𑂥𑂫𑂯𑂢𑂧" +local vowel, vowel_sign = "a𑂰𑂱𑂲𑂳𑂴𑂵𑂶𑂷𑂸", "𑂃𑂄𑂅𑂆𑂇𑂈𑂉𑂊𑂋𑂌" +local syncope_pattern = "([" .. vowel .. vowel_sign .. "])(𑂺?[" .. all_cons .. "])a(𑂺?[" .. gsub(all_cons, "𑂨", "") .. "])([𑂁𑂀]?[" .. vowel .. vowel_sign .. "])" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = 1, length do table.insert(result, mw.ustring.sub(text, length - i + 1, length - i + 1)) end + return table.concat(result) +end + +function export.tr(text, lang, sc) + text = gsub(text, "([" .. all_cons .. "]𑂺?)([" .. vowel .. "𑂹]?)", function(c, d) return c .. (d == "" and "a" or d) end) + + for word in mw.ustring.gmatch(text, "[𑂀-𑃁a]+") do + local orig_word = word + + word = rev_string(word) + + word = gsub(word, "^a(𑂺?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) + local a = "" + if match(first, "[" .. special_cons .. "]") and match(second, "𑂹") and not perm_cl[first .. second .. third] or match(first .. second, "𑂨[𑂲𑂵𑂶]") then a = "a" end + + return a .. opt .. first .. second .. third + end) + + while match(word, syncope_pattern) do word = gsub(word, syncope_pattern, "%1%2%3%4") end + + word = gsub(word, "(.?)𑂁(.)", function(succ, prev) + local mid = nasal_assim[succ] or "n" + if succ .. prev == "a" then + mid = "𑂺𑂧" + elseif succ == "" and match(prev, "[" .. vowel .. "]") then + mid = "̃" + end + return succ .. mid .. prev + end) + + text = gsub(text, orig_word, rev_string(word)) + end + text = gsub(text, ".𑂺?", conv) + text = gsub(text, "a([iu])̃", "a͠%1") + text = gsub(text, "𑂔𑂹𑂖", conv) + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/bho-translit.lua b/wiktra/wikt/translit/bho-translit.lua new file mode 100644 index 0000000..0015c9c --- /dev/null +++ b/wiktra/wikt/translit/bho-translit.lua @@ -0,0 +1,159 @@ +-- Transliteration for Bhojpuri +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["क"] = "k", + ["ख"] = "kh", + ["ग"] = "g", + ["घ"] = "gh", + ["ङ"] = "ṅ", + ["च"] = "c", + ["छ"] = "ch", + ["ज"] = "j", + ["झ"] = "jh", + ["ञ"] = "ñ", + ["ट"] = "ṭ", + ["ठ"] = "ṭh", + ["ड"] = "ḍ", + ["ढ"] = "ḍh", + ["ण"] = "ṇ", + ["त"] = "t", + ["थ"] = "th", + ["द"] = "d", + ["ध"] = "dh", + ["न"] = "n", + ["प"] = "p", + ["फ"] = "ph", + ["ब"] = "b", + ["भ"] = "bh", + ["म"] = "m", + ["य"] = "y", + ["र"] = "r", + ["ल"] = "l", + ["व"] = "v", + ["ळ"] = "ḷ", + ["श"] = "ś", + ["ष"] = "ṣ", + ["स"] = "s", + ["ह"] = "h", + ["ड़"] = "ṛ", + ["ढ़"] = "ṛh", + -- ['ज्ञ'] = 'gy', + + -- vowel diacritics + ["ि"] = "i", + ["ु"] = "u", + ["े"] = "e", + ["ो"] = "o", + ["ा"] = "ā", + ["ी"] = "ī", + ["ू"] = "ū", + ["ृ"] = "ŕ", + ["ै"] = "ai", + ["ौ"] = "au", + ["ॉ"] = "ŏ", + ["ॅ"] = "ĕ", + ["ऽ"] = "ː", + + -- vowel signs + ["अ"] = "a", + ["इ"] = "i", + ["उ"] = "u", + ["ए"] = "e", + ["ओ"] = "o", + ["आ"] = "ā", + ["ई"] = "ī", + ["ऊ"] = "ū", + ["ऋ"] = "ŕ", + ["ऐ"] = "ai", + ["औ"] = "au", + ["ऑ"] = "ŏ", + ["ऍ"] = "ĕ", + + ["ॐ"] = "om", + + -- chandrabindu + ["ँ"] = "̃", + + -- anusvara + ["ं"] = "ṁ", + + -- visarga + ["ः"] = "ḥ", + + -- virama + ["्"] = "", + + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + + -- punctuation + ["।"] = ".", -- danda + ["+"] = "", -- compound separator + + -- abbreviation sign + ["॰"] = "." +} + +local nasal_assim = {["ज़"] = "न", ["क"] = "ङ", ["ख"] = "ङ", ["ग"] = "ङ", ["घ"] = "ङ", ["च"] = "ञ", ["छ"] = "ञ", ["ज"] = "ञ", ["झ"] = "ञ", ["ट"] = "ण", ["ठ"] = "ण", ["ड"] = "ण", ["ढ"] = "ण", ["प"] = "म", ["फ"] = "म", ["ब"] = "म", ["भ"] = "म", ["म"] = "म"} + +local perm_cl = {["म्ल"] = true, ["व्ल"] = true, ["न्ल"] = true} + +local all_cons, special_cons = "कखगघङचछजझञटठडढतथदधपफबभशषसयरलवहणनम", "यरलवहनम" +local vowel, vowel_sign = "aिुृेोाीूैौॉॅ", "अइउएओआईऊऋऐऔऑऍऽ" +local syncope_pattern = "([" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])a(़?[" .. gsub(all_cons, "य", "") .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = 1, length do table.insert(result, mw.ustring.sub(text, length - i + 1, length - i + 1)) end + return table.concat(result) +end + +function export.tr(text, lang, sc) + text = gsub(text, "([" .. all_cons .. "]़?)([" .. vowel .. "्]?)", function(c, d) return c .. (d == "" and "a" or d) end) + + for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do + local orig_word = word + + word = rev_string(word) + + word = gsub(word, "^a(़?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) + local a = "" + if match(first, "[" .. special_cons .. "]") and match(second, "्") and not perm_cl[first .. second .. third] or match(first .. second, "य[ीेै]") then a = "a" end + + return a .. opt .. first .. second .. third + end) + + while match(word, syncope_pattern) do word = gsub(word, syncope_pattern, "%1%2%3%4") end + + word = gsub(word, "(.?)ं(.)", function(succ, prev) + local mid = nasal_assim[succ] or "n" + if succ .. prev == "a" then + mid = "्म" + elseif succ == "" and match(prev, "[" .. vowel .. "]") then + mid = "̃" + end + return succ .. mid .. prev + end) + + text = gsub(text, orig_word, rev_string(word)) + end + text = gsub(text, ".़?", conv) + text = gsub(text, "a([iu])̃", "a͠%1") + text = gsub(text, "ज्ञ", conv) + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/bn-translit.lua b/wiktra/wikt/translit/bn-translit.lua new file mode 100644 index 0000000..b1f9f3e --- /dev/null +++ b/wiktra/wikt/translit/bn-translit.lua @@ -0,0 +1,164 @@ +-- Transliteration for Bengali +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["ক্ষ"] = "kkh", + ["জ্ঞ"] = "gg", + ["ক"] = "k", + ["খ"] = "kh", + ["গ"] = "g", + ["ঘ"] = "gh", + ["ঙ"] = "ṅ", + ["চ"] = "c", + ["ছ"] = "ch", + ["জ"] = "j", + ["ঝ"] = "jh", + ["ঞ"] = "ñ", + ["ট"] = "ṭ", + ["ঠ"] = "ṭh", + ["ড"] = "ḍ", + ["ঢ"] = "ḍh", + ["ণ"] = "n", + ["ত"] = "t", + ["থ"] = "th", + ["দ"] = "d", + ["ধ"] = "dh", + ["ন"] = "n", + ["প"] = "p", + ["ফ"] = "ph", + ["ব"] = "b", + ["ভ"] = "bh", + ["ম"] = "m", + ["য"] = "j", + ["র"] = "r", + ["ল"] = "l", + ["শ"] = "ś", + ["ষ"] = "ś", + ["স"] = "ś", + ["হ"] = "h", + ["য়"] = "ẏ", + ["ড়"] = "ṛ", + ["ঢ়"] = "ṛh", + + -- biśôrgô + ["ঃ"] = "ḥ", + + -- vowel diacritics + ["ি"] = "i", + ["ু"] = "u", + ["ৃ"] = "ri", + ["ে"] = "e", + ["ো"] = "o", + ["া"] = "a", + ["ী"] = "i", + ["ূ"] = "u", + ["ৈ"] = "oi", + ["ৌ"] = "ou", + + -- vowel signs + ["অ"] = "ô", + ["ই"] = "i", + ["উ"] = "u", + ["ঋ"] = "ri", + ["এ"] = "e", + ["ও"] = "o", + ["আ"] = "a", + ["ঈ"] = "i", + ["ঊ"] = "u", + ["ঐ"] = "oi", + ["ঔ"] = "ou", + + -- hôśôntô + ["্"] = "", + + -- côndrôbindu + ["ঁ"] = "̃", + + -- ônuśśar + ["ং"] = "ṅ", + + -- ôbôgrôhô + ["ঽ"] = "’", + + -- khôndô tô + ["ৎ"] = "t", + + -- numerals + ["০"] = "0", + ["১"] = "1", + ["২"] = "2", + ["৩"] = "3", + ["৪"] = "4", + ["৫"] = "5", + ["৬"] = "6", + ["৭"] = "7", + ["৮"] = "8", + ["৯"] = "9", + + -- punctuations + ["।"] = ".", -- dãṛi + ["॥"] = "." -- double dãṛi +} + +local deaspirate = {["খ"] = "ক", ["ঘ"] = "গ", ["ছ"] = "চ", ["ঝ"] = "জ", ["ঠ"] = "ট", ["ঢ"] = "ড", ["থ"] = "ত", ["ধ"] = "দ", ["ফ"] = "প", ["ভ"] = "ব", ["ঢ়"] = "ড়"} + +function export.tr(text, lang, sc) + local c = "([কষজঞকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ]়?)" + local y = "য" + local r = "র" + local v = "([ô্িুৃেোাীূৈৌঅইউঋএওআঈঊঐঔ])" + local virama = "্" + local n = "(ং?)" + + local no_virama = gsub(v, virama, "") + + text = text .. " " + + text = gsub(text, c .. "্য", function(f) if deaspirate[f] then return deaspirate[f] .. "্" .. f end end) + + text = gsub(text, c, "%1ô") + text = gsub(text, "ô" .. "([ô্িুৃেোাীূৈৌ])", "%1") + + text = gsub(text, v .. n .. c .. "ô ", function(j, k, l) -- ending + return l == y and j .. k .. l .. "ô " or j .. k .. l .. " " + end) + + local pattern = v .. n .. c .. "ô" .. c .. no_virama + local continue = true + while continue do + continue = false + text = gsub(text, "(.*)" .. pattern, function(d, e, f, g, h, i) + if g ~= y and g ~= r then continue = true end + return (g == y or g == r) and d .. e .. f .. g .. "ô" .. h .. i or d .. e .. f .. g .. h .. i + end) + end + + text = gsub(text, "ওয়", "ẇ") + text = gsub(text, "([যডঢ]়)", conv) + text = gsub(text, "ক্ষ", "kkh") + text = gsub(text, "জ্ঞ", "gg") + text = gsub(text, ".", conv) + + text = gsub(text, "ː(.)", "%1%1") + + text = gsub(text, " ?।", ".") + + -- vowel fix + text = gsub(text, "([āēeo]y)ô ", "%1 ") + + -- nasalization fix + text = gsub(text, "ôN ", "ông ") + text = gsub(text, "N", "ng") + + -- tb / sb -> t / s + text = gsub(text, "([tsś])b", "%1") + + text = gsub(text, " $", "") + + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/bo-translit.lua b/wiktra/wikt/translit/bo-translit.lua new file mode 100644 index 0000000..ccd6a47 --- /dev/null +++ b/wiktra/wikt/translit/bo-translit.lua @@ -0,0 +1,171 @@ +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + ["ཀ"] = "k", + ["ཁ"] = "kh", + ["ག"] = "g", + ["ང"] = "ng", + ["ཅ"] = "c", + ["ཆ"] = "ch", + ["ཇ"] = "j", + ["ཉ"] = "ny", + ["ཏ"] = "t", + ["ཐ"] = "th", + ["ད"] = "d", + ["ན"] = "n", + ["པ"] = "p", + ["ཕ"] = "ph", + ["བ"] = "b", + ["མ"] = "m", + ["ཙ"] = "ts", + ["ཚ"] = "tsh", + ["ཛ"] = "dz", + ["ཝ"] = "w", + ["ཞ"] = "zh", + ["ཟ"] = "z", + ["འ"] = "‘", + ["ཡ"] = "y", + ["ར"] = "r", + ["ལ"] = "l", + ["ཤ"] = "sh", + ["ས"] = "s", + ["ཧ"] = "h", + ["ཨ"] = "a", + ["ཊ"] = "ṭ", + ["ཋ"] = "ṭh", + ["ཌ"] = "ḍ", + ["ཎ"] = "ṇ", + ["ཥ"] = "ṣ", + + ["ི"] = "i", + ["ུ"] = "u", + ["ེ"] = "e", + ["ོ"] = "o", + ["ཱ"] = "ā", + ["ཱི"] = "ī", + ["ཱུ"] = "ū", + ["ཻ"] = "ai", + ["ཽ"] = "au", + ["ྲྀ"] = "ṛ", + ["ཷ"] = "ṝ", + ["ླྀ"] = "ḷ", + ["ཹ"] = "ḹ", + ["ཾ"] = "ṃ", + ["ྃ"] = "ṃ", + ["ཿ"] = "aḥ", + ["ༀ"] = "oṃ", + + ["ྐ"] = "k", + ["ྑ"] = "kh", + ["ྒ"] = "g", + ["ྔ"] = "ng", + ["ྕ"] = "c", + ["ྖ"] = "ch", + ["ྗ"] = "j", + ["ྙ"] = "ny", + ["ྟ"] = "t", + ["ྠ"] = "th", + ["ྡ"] = "d", + ["ྣ"] = "n", + ["ྤ"] = "p", + ["ྥ"] = "ph", + ["ྦ"] = "b", + ["ྨ"] = "m", + ["ྩ"] = "ts", + ["ྪ"] = "tsh", + ["ྫ"] = "dz", + ["ྭ"] = "w", + ["ྮ"] = "zh", + ["ྯ"] = "z", + ["ྰ"] = "‘", + ["ྱ"] = "y", + ["ྲ"] = "r", + ["ླ"] = "l", + ["ྴ"] = "sh", + ["ྶ"] = "s", + ["ྷ"] = "h", + ["ྸ"] = "a", + ["ྚ"] = "ṭ", + ["ྛ"] = "ṭh", + ["ྜ"] = "ḍ", + ["ྞ"] = "ṇ", + ["ྵ"] = "ṣ" +} + +local symbol = {["༠"] = "0", ["༡"] = "1", ["༢"] = "2", ["༣"] = "3", ["༤"] = "4", ["༥"] = "5", ["༦"] = "6", ["༧"] = "7", ["༨"] = "8", ["༩"] = "9", ["༪"] = "0.5", ["༫"] = "1.5", ["༬"] = "2.5", ["༭"] = "3.5", ["༮"] = "4.5", ["༯"] = "5.5", ["༰"] = "6.5", ["༱"] = "7.5", ["༲"] = "8.5", ["༳"] = "9.5", ["་"] = " ", ["།"] = "·", ["‘"] = "'"} + +function export.tr(text, lang, sc, debug_mode) + text = gsub(text, "[༌་]+ ?", "་") + text = gsub(text, "[་།]+$", "") + text = gsub(text, "([^་])(འ[ཱིེོིུྲཷླཹཾཿ])", "%1་‘་%2") + + for Tibetan_word in mw.ustring.gmatch(text, "[་-༑ཀ-ྼ]+") do + Tibetan_word = gsub(Tibetan_word, "་$", "") + + for syllable in mw.ustring.gmatch(Tibetan_word, "[ཀ-ྼ]+") do + local tr = {} + + local syl = gsub(syllable, "ཨ([ཱིེོིུྲཷླཹཾཿ])", "%1") + syl = gsub(syl, "([ྐྑྒྔྕྖྗྙྟྠྡྣྤྥྦྨྩྪྫྮྯྭྰྱྲླྴྶྷྸ]+)([^ཱིེོིུྲཷླཹཾཿྐྑྒྔྕྖྗྙྟྠྡྣྤྥྦྨྩྪྫྮྯྭྰྱྲླྴྶྷྸ]*)$", "%1a%2") + letter = mw.text.split(syl, "", true) + + for i = 1, #letter do tr[i] = conv[letter[i]] or letter[i] end + + if not match(syllable, "[ྐྑྒྔྕྖྗྙྟྠྡྣྤྥྦྨྩྪྫྮྯྭྰྱྲླྴྶྷྸིེོུཨཱཱཱིཻཽུྲྀཷླྀཹཾྃཿ]") then + if #letter < 5 then + if #letter > 3 then + if letter[2] ~= "འ" and letter[3] ~= "འ" then table.insert(tr, 3, "a") end + + elseif #letter > 2 then + if letter[3] == "འ" then + if letter[1] == "འ" then + tr = {"‘", tr[2], "a", "‘"} + else + tr = {tr[1], tr[2], "a", "‘"} + end + + elseif not match(letter[1], "[གདབའམ]") or (match(letter[1] .. letter[2] .. letter[3], "[གདབའམ][གངཐབམའརལ]ས")) then + table.insert(tr, 2, "a") + else + table.insert(tr, 3, "a") + end + + elseif match(letter[1], "[གདབའམ]") and not match(letter[2] or "", "[གངདཐབམའརལསན]") then + table.insert(tr, "a") + else + table.insert(tr, 2, "a") + end + end + end + + tr = table.concat(tr) + + if match(syllable, "གཡ") then tr = gsub(tr, "gy", "g.y") end + + if syllable == "འགས" then tr = gsub(tr, "‘ags", "‘gas") end + + if syllable == "དབས" then tr = gsub(tr, "dabs", "dbas") end + + if syllable == "དགས" then tr = gsub(tr, "dags", "dgas") end + + if syllable == "དམས" then tr = gsub(tr, "dams", "dmas") end + + text = gsub(text, syllable, tr, 1) + end + end + + text = gsub(text, ".", symbol) + text = gsub(text, " ' ", "") + text = gsub(text, " *· *·? *", " · ") + text = gsub(text, " *%.", ".") + + if match(text, "[ཀ-ྼ]") and not debug_mode then + return nil + else + return text + end +end + +return export diff --git a/wiktra/wikt/translit/bpy-translit.lua b/wiktra/wikt/translit/bpy-translit.lua new file mode 100644 index 0000000..153c5e2 --- /dev/null +++ b/wiktra/wikt/translit/bpy-translit.lua @@ -0,0 +1,159 @@ +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["ক্ষ"] = "kkh", + ["জ্ঞ"] = "gy", + ["ক"] = "k", + ["খ"] = "kh", + ["গ"] = "g", + ["ঘ"] = "gh", + ["ঙ"] = "ng", + ["চ"] = "c", + ["ছ"] = "ch", + ["জ"] = "j", + ["ঝ"] = "jh", + ["ঞ"] = "n", + ["ট"] = "ţ", + ["ঠ"] = "ţh", + ["ড"] = "đ", + ["ঢ"] = "ŗh", + ["ণ"] = "n", + ["ত"] = "t", + ["থ"] = "th", + ["দ"] = "d", + ["ধ"] = "dh", + ["ন"] = "n", + ["প"] = "p", + ["ফ"] = "ph", + ["ব"] = "b", + ["ভ"] = "bh", + ["ম"] = "m", + ["য"] = "j", + ["র"] = "r", + ["ল"] = "l", + ["ৱ"] = "w", + ["শ"] = "ş", + ["ষ"] = "şş", + ["স"] = "s", + ["হ"] = "h", + ["য়"] = "y", + ["ড়"] = "đh", + ["ঢ়"] = "ŗh", + + -- visarga + ["ঃ"] = "ḥ", + + -- vowel diacritics + ["ি"] = "i", + ["ু"] = "u", + ["ৃ"] = "ri", + ["ে"] = "e", + ["ো"] = "o", + ["া"] = "a", + ["ী"] = "i", + ["ূ"] = "u", + ["ৈ"] = "oi", + ["ৌ"] = "ou", + + -- vowel signs + ["অ"] = "ô", + ["ই"] = "i", + ["উ"] = "u", + ["ঋ"] = "ri", + ["এ"] = "e", + ["ও"] = "o", + ["আ"] = "a", + ["ঈ"] = "i", + ["ঊ"] = "u", + ["ঐ"] = "oi", + ["ঔ"] = "ou", + + -- hôshôntô + ["্"] = "", + + -- chôndrôbindu + ["ঁ"] = "̃", + + -- ônusbar + ["ং"] = "ng", + + -- khôndô tô + ["ৎ"] = "t", + + -- numerals + ["০"] = "0", + ["১"] = "1", + ["২"] = "2", + ["৩"] = "3", + ["৪"] = "4", + ["৫"] = "5", + ["৬"] = "6", + ["৭"] = "7", + ["৮"] = "8", + ["৯"] = "9", + + -- punctuation + ["।"] = "." -- dari +} + +local deaspirate = {["খ"] = "ক", ["ঘ"] = "গ", ["ছ"] = "চ", ["ঝ"] = "জ", ["ঠ"] = "ট", ["ঢ"] = "ড", ["থ"] = "ত", ["ধ"] = "দ", ["ফ"] = "প", ["ভ"] = "ব", ["ঢ়"] = "ড়"} + +function export.tr(text, lang, sc) + local c = "([কষজঞকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলৱশষসহ]়?)" + local y = "য়" + local r = "র" + local v = "([ô্িুৃেোাীূৈৌঅইউঋএওআঈঊঐঔ])" + local virama = "্" + local n = "(ং?)" + + local no_virama = gsub(v, virama, "") + + text = text .. " " + + text = gsub(text, c .. "্য", function(f) if deaspirate[f] then return deaspirate[f] .. "্" .. f end end) + + text = gsub(text, c, "%1ô") + text = gsub(text, "ô" .. "([ô্িুৃেোাীূৈৌ])", "%1") + + text = gsub(text, v .. n .. c .. "ô ", function(j, k, l) -- ending + return l == y and j .. k .. l .. "ô " or j .. k .. l .. " " + end) + + local pattern = v .. n .. c .. "ô" .. c .. no_virama + local continue = true + while continue do + continue = false + text = gsub(text, "(.*)" .. pattern, function(d, e, f, g, h, i) + if g ~= y and g ~= r then continue = true end + return (g == y or g == r) and d .. e .. f .. g .. "ô" .. h .. i or d .. e .. f .. g .. h .. i + end) + end + + text = gsub(text, "([যডঢ]়)", conv) + text = gsub(text, "ক্ষ", "kkh") + text = gsub(text, "জ্ঞ", "gg") + text = gsub(text, ".", conv) + + text = gsub(text, "ː(.)", "%1%1") + + text = gsub(text, " ?।", ".") + + -- vowel fix + text = gsub(text, "([āēeo]y)ô ", "%1") + + -- nasalization fix + text = gsub(text, "ôN ", "ông") + text = gsub(text, "N", "ng") + + -- tb / sb -> t / s + text = gsub(text, "([tsś])b", "%1") + + text = gsub(text, " $", "") + + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/brah-translit.lua b/wiktra/wikt/translit/brah-translit.lua new file mode 100644 index 0000000..ce60732 --- /dev/null +++ b/wiktra/wikt/translit/brah-translit.lua @@ -0,0 +1,128 @@ +local export = {} + +local consonants = { + -- consonants + ["𑀓"] = "k", + ["𑀔"] = "kh", + ["𑀕"] = "g", + ["𑀖"] = "gh", + ["𑀗"] = "ṅ", + ["𑀘"] = "c", + ["𑀙"] = "ch", + ["𑀚"] = "j", + ["𑀛"] = "jh", + ["𑀜"] = "ñ", + ["𑀝"] = "ṭ", + ["𑀞"] = "ṭh", + ["𑀟"] = "ḍ", + ["𑀠"] = "ḍh", + ["𑀡"] = "ṇ", + ["𑀢"] = "t", + ["𑀣"] = "th", + ["𑀤"] = "d", + ["𑀥"] = "dh", + ["𑀦"] = "n", + ["𑀧"] = "p", + ["𑀨"] = "ph", + ["𑀩"] = "b", + ["𑀪"] = "bh", + ["𑀫"] = "m", + ["𑀬"] = "y", + ["𑀭"] = "r", + ["𑀮"] = "l", + ["𑀯"] = "v", + ["𑀴"] = "ḷ", + ["𑀰"] = "ś", + ["𑀱"] = "ṣ", + ["𑀲"] = "s", + ["𑀳"] = "h" +} + +local diacritics = { + -- matras + ["𑀸"] = "ā", + ["𑀺"] = "i", + ["𑀻"] = "ī", + ["𑀼"] = "u", + ["𑀽"] = "ū", + ["𑀾"] = "ṛ", + ["𑀿"] = "ṝ", + ["𑁀"] = "l̥", + ["𑁁"] = "l̥̄", + ["𑁂"] = "e", + ["𑁃"] = "ai", + ["𑁄"] = "o", + ["𑁅"] = "au", + ["𑁆"] = "", + -- bhattiprolu aa + ["𑀹"] = "ā" +} + +local tt = { + + -- vowels + ["𑀅"] = "a", + ["𑀆"] = "ā", + ["𑀇"] = "i", + ["𑀈"] = "ī", + ["𑀉"] = "u", + ["𑀊"] = "ū", + ["𑀋"] = "ṛ", + ["𑀌"] = "ṝ", + ["𑀍"] = "l̥", + ["𑀎"] = "l̥̄", + ["𑀏"] = "e", + ["𑀐"] = "ai", + ["𑀑"] = "o", + ["𑀒"] = "au", + -- chandrabindu + ["𑀀"] = "m̐", -- until a better method is found + -- anusvara + ["𑀁"] = "ṃ", -- until a better method is found + -- visarga + ["𑀂"] = "ḥ", + -- numerals + ["𑁦"] = "0", + ["𑁧"] = "1", + ["𑁨"] = "2", + ["𑁩"] = "3", + ["𑁪"] = "4", + ["𑁫"] = "5", + ["𑁬"] = "6", + ["𑁭"] = "7", + ["𑁮"] = "8", + ["𑁯"] = "9", + -- punctuation + ["𑁇"] = ".", -- danda + ["𑁈"] = "." -- double danda +} + +function export.tr(text, lang, sc) + if sc ~= "Brah" then return nil end + + text = mw.ustring.gsub(text, "([𑀓-𑀴])" .. "([𑀸𑀺𑀺𑀻𑀼𑀽𑀾𑀿𑁀𑁁𑁂𑁃𑁄𑁅𑁆𑀹]?)" .. "([𑀅-𑀒]?)", function(c, d, e) + if d == "" and e ~= "" then + if tt[e] == "i" or tt[e] == "u" then + return consonants[c] .. "a" .. tt[e] .. "̈" + else + return consonants[c] .. "a" .. tt[e] + end + elseif e ~= "" then + return consonants[c] .. diacritics[d] .. tt[e] + elseif d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + if (lang == "sa" or lang == "pi") and mw.ustring.match(text, "l̥") then + text = mw.ustring.gsub(text, "l̥", "ḷ") + text = mw.ustring.toNFC(text) + end + + return text +end + +return export diff --git a/wiktra/wikt/translit/brx-translit.lua b/wiktra/wikt/translit/brx-translit.lua new file mode 100644 index 0000000..16c49b0 --- /dev/null +++ b/wiktra/wikt/translit/brx-translit.lua @@ -0,0 +1,119 @@ +-- Transliteration for Hindi (possibly other languages using Devanagari script, except for Sanskrit) +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["ख"] = "k", + ["ग"] = "g", + ["ङ"] = "ṅ", + ["ज"] = "j", + ["थ"] = "t", + ["द"] = "d", + ["न"] = "n", + ["फ"] = "f", + ["ब"] = "b", + ["म"] = "m", + ["य"] = "y", + ["र"] = "r", + ["ल"] = "l", + ["व"] = "o", + ["स"] = "s", + ["ह"] = "h", + + -- vowel diacritics + ["ि"] = "i", + ["ु"] = "u", + ["े"] = "e", + ["ो"] = "w", + ["ा"] = "a", + ["ै"] = "wi", + ["ौ"] = "wu", + ["’"] = "o", + + -- vowel signs + ["अ"] = "o", + ["इ"] = "i", + ["उ"] = "u", + ["ए"] = "e", + ["ओ"] = "w", + ["आ"] = "a", + ["ऐ"] = "wi", + ["औ"] = "wu", + ["अ’"] = "o", + + ["ॐ"] = "om", + + -- chandrabindu + ["ँ"] = "ṅ", + + -- anusvara + ["ं"] = "ṅ", + + -- visarga + ["ः"] = "ḥ", + + -- virama + ["्"] = "", + + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + + -- punctuation + ["।"] = ".", -- danda + ["॥"] = ".", -- double danda + ["+"] = "", -- compound separator + + -- abbreviation sign + ["॰"] = "." +} + +-- These clusters when occurring word-finally will not trigger a schwa added +-- after them even though the second consonant is in special_cons, which normally +-- causes the extra schwa to be added. NOTE: The clusters are reversed from their +-- ultimate effect, e.g. the first cluster is written 'ml' but actually applies +-- to words ending in 'lm'. The clusters below overall refer to the six clusters +-- describable by [rl][mnv], i.e. rm, rn, rv, lm, ln, lv. +local perm_cl = {["म्ल"] = true, ["व्ल"] = true, ["न्ल"] = true, ["म्र"] = true, ["व्र"] = true, ["न्र"] = true} + +local all_cons, special_cons = "कखगघङचछजझञटठडढतथदधपफबभशषसयरलवहणनम", "यरलवहनम" +local vowel, vowel_sign = "*िुेोाैौ’o'", "अइउएओआऐऔअ’'" +local long_vowel, short_vowel = "ाैौआऐऔ", "*िुेो’अइउएओअ'" +local syncope_pattern = "([" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])o(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = length, 1, -1 do table.insert(result, mw.ustring.sub(text, i, i)) end + return table.concat(result) +end + +function export.tr(text, lang, sc) + -- abbreviation dot + text = gsub(text, "॰", ".") + text = gsub(text, "([" .. all_cons .. "]़?)([" .. vowel .. "्]?)", function(c, d) return c .. (d == "" and "o" or d) end) + for word in mw.ustring.gmatch(text, "[ऀऀ-ॿo']+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^o(़?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) return (((match(first, "[" .. special_cons .. "]") and match(second, "्") and not perm_cl[first .. second .. third]) or match(first .. second, "य[ी]")) and "o" or "") .. opt .. first .. second .. third end) + while match(word, syncope_pattern) do word = gsub(word, syncope_pattern, "%1%2%3%4") end + word = rev_string(word) + -- Convert * to %* so we can match it in a regex. + local escaped_orig_word = gsub(orig_word, "%*", "%*") + text = gsub(text, escaped_orig_word, word) + end + text = gsub(text, ".़?", conv) + text = gsub(text, "%*", "o") + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/btx-translit.lua b/wiktra/wikt/translit/btx-translit.lua new file mode 100644 index 0000000..aef05a9 --- /dev/null +++ b/wiktra/wikt/translit/btx-translit.lua @@ -0,0 +1,30 @@ +local export = {} + +local consonants = {["ᯀ"] = "(h)", ["ᯂ"] = "k", ["ᯅ"] = "b", ["ᯇ"] = "p", ["ᯉ"] = "n", ["ᯋ"] = "w", ["ᯎ"] = "g", ["ᯐ"] = "j", ["ᯑ"] = "d", ["ᯒ"] = "r", ["ᯔ"] = "m", ["ᯗ"] = "t", ["ᯘ"] = "s", ["ᯛ"] = "y", ["ᯝ"] = "ng", ["ᯞ"] = "l", ["ᯠ"] = "c", ["ᯡ"] = "c"} + +local diacritics = {["ᯧ"] = "e", ["ᯩ"] = "e", ["ᯪ"] = "i", ["ᯫ"] = "i", ["ᯨ"] = "o", ["ᯭ"] = "o", ["ᯬ"] = "u", ["᯳"] = ""} + +local nonconsonants = { + -- vowels + ["ᯤ"] = "i", + ["ᯥ"] = "u", + -- aditional characters + ["ᯰ"] = "ng", + ["ᯱ"] = "h" +} + +-- translit any words or phrases +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([ᯀᯂᯅᯇᯉᯋᯎᯐᯑᯒᯔᯗᯘᯛᯝᯞᯠᯡ])" .. "([ᯧᯩᯪᯫᯨᯭᯬ᯳]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + text = mw.ustring.gsub(text, ".", nonconsonants) + + return text +end + +return export diff --git a/wiktra/wikt/translit/bua-translit.lua b/wiktra/wikt/translit/bua-translit.lua new file mode 100644 index 0000000..d1eba01 --- /dev/null +++ b/wiktra/wikt/translit/bua-translit.lua @@ -0,0 +1,82 @@ +local export = {} + +local tab = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "Je", + ["Ё"] = "Jo", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["Ҥ"] = "Ŋ", + ["О"] = "O", + ["Ө"] = "Ö", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ү"] = "Ü", + ["Ф"] = "F", + ["Х"] = "X", + ["Һ"] = "H", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Šč", + ["Ъ"] = "ʺ", + ["Ы"] = "Y", + ["Ь"] = "ʹ", + ["Э"] = "E", + ["Ю"] = "Ju", + ["Я"] = "Ja", + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "je", + ["ё"] = "jo", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["й"] = "j", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["ҥ"] = "ŋ", + ["о"] = "o", + ["ө"] = "ö", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ү"] = "ü", + ["ф"] = "f", + ["х"] = "x", + ["һ"] = "h", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "šč", + ["ъ"] = "ʺ", + ["ы"] = "y", + ["ь"] = "ʹ", + ["э"] = "e", + ["ю"] = "ju", + ["я"] = "ja" +} + +function export.tr(text, lang, sc) return (mw.ustring.gsub(text, ".", tab)) end + +return export diff --git a/wiktra/wikt/translit/cans-translit.lua b/wiktra/wikt/translit/cans-translit.lua new file mode 100644 index 0000000..a0fa658 --- /dev/null +++ b/wiktra/wikt/translit/cans-translit.lua @@ -0,0 +1,63 @@ +local export = {} +-- to be discussed: not to create this module, but create individual modules instead? + +local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" + +local function iter_char(str) return string.gmatch(str, UTF8_char) end + +local function len(str) + local _, length = string.gsub(str, UTF8_char, "") + return length +end + +local function fetch(str, index) + local i = 0 + for char in iter_char(str) do + i = i + 1 + if i == index then return char end + end + return "" +end + +function export.tr(text, lang, sc) + local data = { + { -- short-vowel + "ᐁᐃᐅᐊᐯᐱᐳᐸᑌᑎᑐᑕᑫᑭᑯᑲᒉᒋᒍᒐᒣᒥᒧᒪᓀᓂᓄᓇᓭᓯᓱᓴᔦᔨᔪᔭᖊᖋᖌᖍᕃᕆᕈᕍᕂᕄᕊᕋᔐᔑᔓᔕᕓᕕᕗᕙᕞᕠᕤᕦᘚᘛᘕᘔᓓᓕᓗᓚᙯᕿᖁᖃᙰᖏᖑᖓ ᙱᙳᙵᕴᕵᕷᕹ", "1ptkcmnsyrrršfðzlq23h", {"", "ng", "nng"}, "eioa", function(a, b) return a .. b end + }, { -- w + "ᐌᐍᐎᐏᐒᐓᐗᐘᐺᐻᐼᐽᑀᑁᑄᑅᑗᑘᑙᑚᑝᑞᑡᑢᑴᑵᑶᑷᑺᑻᑾᑿᒒᒓᒔᒕᒘᒙᒜᒝᒬᒭᒮᒯᒲᒳᒶᒷ" .. "ᓉᓊ ᓋᓌ ᓶᓷᓸᓹᓼᓽᔀᔁᔯᔰᔱᔲᔵᔶᔹᔺᔗᔘᔙᔚᔝᔞᔡᔢᓜᓝᓞᓟᓢᓣᓦᓧ", "1ptkcmnsyrz", {""}, "eeiiooaa", function(a, b) return a .. "w" .. b end + }, { -- long-vowel + "ᐄᐆᐋᐲᐴᐹᑏᑑᑖᑮᑰᑳᒌᒎᒑᒦᒨᒫᓃᓅᓈᓰᓲᓵᔩᔫᔮᕇᕉᕌᔒᔔᔖᕖᕘᕚᕢᕥᕧᓖᓘᓛᐐᐔᐙᖀᖂᖄᕶᕸᕺ", "1ptkcmnsyršfðlwqh", {""}, "īōā", function(a, b) return a .. b end + }, { -- w-long + "ᐐᐑᐔᐕᐖᐙᐚᐛᐾᐿᑂᑃ ᑆᑇᑈᑛᑜᑟᑠ ᑣᑤᑥᑸᑹᑼᑽ ᒀᒁᒂᒖᒗᒚᒛ ᒞᒟᒠᒰᒱᒴᒵ ᒸᒹᒺ ᓍᓎᓏ" .. "ᓠᓡᓤᓥ ᓨᓩ ᓺᓻᓾᓿ ᔂᔃᔄᔛᔜᔟᔠ ᔣᔤ ᔳᔴᔷᔸ ᔻᔼᔽ ᕎᕏ ᕛᕜ ᕨᕩ ", "1ptkcmnlsšyrfð", {""}, "īīōōōāāā", function(a, b) return a .. "w" .. b end + }, { -- individual + "ᑊᐟᐠᐨᒼᐣᐢᐧᐤᐦᕁᕽᓫᕑᑉᑦᒡᒃᒻᓐᔅᔥᔾᓪᕐᕪ‡ᒄᔉᖅᖕᖖᕝᖦᕀᕻᕼ", "ptkcmnsywh11lrptckmnsšylrðð23q45vlyhh", {"hk", "kw", "sk", "ng", "nng"} + } + } + + for i, item in pairs(data) do + if item[4] then + local length = len(item[4]) + local c = 0 + for s in iter_char(item[1]) do + c = c + 1 + local index = math.ceil(c / length) + local a = fetch(item[2], index) + if tonumber(a) then a = item[3][tonumber(a)] end + index = (c - 1) % length + 1 + local b = fetch(item[4], index) + if s ~= " " then text = string.gsub(text, s, item[5](a, b)) end + end + else + local iter1, iter2 = iter_char(item[1]), iter_char(item[2]) + while true do + local s, a = iter1(), iter2() + if not (s and a) then break end + if tonumber(a) then a = item[3][tonumber(a)] end + text = string.gsub(text, s, a) + end + end + end + return text +end + +return export diff --git a/wiktra/wikt/translit/ce-translit.lua b/wiktra/wikt/translit/ce-translit.lua new file mode 100644 index 0000000..1c509aa --- /dev/null +++ b/wiktra/wikt/translit/ce-translit.lua @@ -0,0 +1,23 @@ +local export = {} + +local tt = {["а"] = "a", ["б"] = "b", ["в"] = "w", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "ë", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "χ", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["щ"] = "šč", ["ъ"] = "ʾ", ["ы"] = "y", ["ь"] = "’", ["э"] = "e", ["ю"] = "ju", ["я"] = "ja", ["ӏ"] = "ʿ", ["А"] = "A", ["Б"] = "B", ["В"] = "W", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Ë", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "Χ", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Щ"] = "Šč", ["Ъ"] = "ʾ", ["Ы"] = "Y", ["Ь"] = "’", ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ja", ["ӏ"] = "ʿ"}; + +local digraphs = {["Аь"] = "Ä", ["аь"] = "ä", ["Гӏ"] = "Ġ", ["гӏ"] = "ġ", ["Дж"] = "Ǯ", ["дж"] = "ǯ", ["Дз"] = "Ʒ", ["дз"] = "ʒ", ["Ий"] = "Ī", ["ий"] = "ī", ["Кх"] = "Q", ["кх"] = "q", ["Кӏ"] = "Ḳ", ["кӏ"] = "ḳ", ["Къ"] = "Q̇", ["къ"] = "q̇", ["Оь"] = "Ö", ["оь"] = "ö", ["Пӏ"] = "Ṗ", ["пӏ"] = "ṗ", ["Тӏ"] = "Ṭ", ["тӏ"] = "ṭ", ["Уь"] = "Ü", ["уь"] = "ü", ["Хӏ"] = "H", ["хӏ"] = "h", ["Хь"] = "Ḥ", ["хь"] = "ḥ", ["Цӏ"] = "C̣", ["цӏ"] = "c̣", ["Чӏ"] = "Č̣", ["чӏ"] = "č̣", ["Юь"] = "Jü", ["юь"] = "jü", ["Яь"] = "Jä", ["яь"] = "jä"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + + -- Convert capital to lowercase palochka. Lowercase is found in tables + -- above. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + text = str_gsub(text, "ккх", "qː") + text = str_gsub(text, "Ккх", "Qː") + + for digraph, translit in pairs(digraphs) do text = str_gsub(text, digraph, translit) end + + text = str_gsub(text, "[%z\1-127\194-\244][\128-\191]*", tt) -- UTF-8 character pattern + + return text +end + +return export diff --git a/wiktra/wikt/translit/cher-translit.lua b/wiktra/wikt/translit/cher-translit.lua new file mode 100644 index 0000000..34463a0 --- /dev/null +++ b/wiktra/wikt/translit/cher-translit.lua @@ -0,0 +1,198 @@ +local export = {} + +local tt = { + ["Ꭰ"] = "a", + ["ꭰ"] = "a", + ["Ꭱ"] = "e", + ["ꭱ"] = "e", + ["Ꭲ"] = "i", + ["ꭲ"] = "i", + ["Ꭳ"] = "o", + ["ꭳ"] = "o", + ["Ꭴ"] = "u", + ["ꭴ"] = "u", + ["Ꭵ"] = "v", + ["ꭵ"] = "v", + + ["Ꭶ"] = "ga", + ["ꭶ"] = "ga", + ["Ꭷ"] = "ka", + ["ꭷ"] = "ka", + ["Ꭸ"] = "ge", + ["ꭸ"] = "ge", + ["Ꭹ"] = "gi", + ["ꭹ"] = "gi", + ["Ꭺ"] = "go", + ["ꭺ"] = "go", + ["Ꭻ"] = "gu", + ["ꭻ"] = "gu", + ["Ꭼ"] = "gv", + ["ꭼ"] = "gv", + + ["Ꭽ"] = "ha", + ["ꭽ"] = "ha", + ["Ꭾ"] = "he", + ["ꭾ"] = "he", + ["Ꭿ"] = "hi", + ["ꭿ"] = "hi", + ["Ꮀ"] = "ho", + ["ꮀ"] = "ho", + ["Ꮁ"] = "hu", + ["ꮁ"] = "hu", + ["Ꮂ"] = "hv", + ["ꮂ"] = "hv", + + ["Ꮃ"] = "la", + ["ꮃ"] = "la", + ["Ꮄ"] = "le", + ["ꮄ"] = "le", + ["Ꮅ"] = "li", + ["ꮅ"] = "li", + ["Ꮆ"] = "lo", + ["ꮆ"] = "lo", + ["Ꮇ"] = "lu", + ["ꮇ"] = "lu", + ["Ꮈ"] = "lv", + ["ꮈ"] = "lv", + + ["Ꮉ"] = "ma", + ["ꮉ"] = "ma", + ["Ꮊ"] = "me", + ["ꮊ"] = "me", + ["Ꮋ"] = "mi", + ["ꮋ"] = "mi", + ["Ꮌ"] = "mo", + ["ꮌ"] = "mo", + ["Ꮍ"] = "mu", + ["ꮍ"] = "mu", + ["Ᏽ"] = "mv", + ["ᏽ"] = "mv", + + ["Ꮎ"] = "na", + ["ꮎ"] = "na", + ["Ꮏ"] = "hna", + ["ꮏ"] = "hna", + ["Ꮐ"] = "nah", + ["ꮐ"] = "nah", + ["Ꮑ"] = "ne", + ["ꮑ"] = "ne", + ["Ꮒ"] = "ni", + ["ꮒ"] = "ni", + ["Ꮓ"] = "no", + ["ꮓ"] = "no", + ["Ꮔ"] = "nu", + ["ꮔ"] = "nu", + ["Ꮕ"] = "nv", + ["ꮕ"] = "nv", + + ["Ꮖ"] = "qua", + ["ꮖ"] = "qua", + ["Ꮗ"] = "que", + ["ꮗ"] = "que", + ["Ꮘ"] = "qui", + ["ꮘ"] = "qui", + ["Ꮙ"] = "quo", + ["ꮙ"] = "quo", + ["Ꮚ"] = "quu", + ["ꮚ"] = "quu", + ["Ꮛ"] = "quv", + ["ꮛ"] = "quv", + + ["Ꮝ"] = "s", + ["ꮝ"] = "s", + ["Ꮜ"] = "sa", + ["ꮜ"] = "sa", + ["Ꮞ"] = "se", + ["ꮞ"] = "se", + ["Ꮟ"] = "si", + ["ꮟ"] = "si", + ["Ꮠ"] = "so", + ["ꮠ"] = "so", + ["Ꮡ"] = "su", + ["ꮡ"] = "su", + ["Ꮢ"] = "sv", + ["ꮢ"] = "sv", + + ["Ꮣ"] = "da", + ["ꮣ"] = "da", + ["Ꮤ"] = "ta", + ["ꮤ"] = "ta", + ["Ꮥ"] = "de", + ["ꮥ"] = "de", + ["Ꮦ"] = "te", + ["ꮦ"] = "te", + ["Ꮧ"] = "di", + ["ꮧ"] = "di", + ["Ꮨ"] = "ti", + ["ꮨ"] = "ti", + ["Ꮩ"] = "do", + ["ꮩ"] = "do", + ["Ꮪ"] = "du", + ["ꮪ"] = "du", + ["Ꮫ"] = "dv", + ["ꮫ"] = "dv", + + ["Ꮬ"] = "dla", + ["ꮬ"] = "dla", + ["Ꮭ"] = "tla", + ["ꮭ"] = "tla", + ["Ꮮ"] = "tle", + ["ꮮ"] = "tle", + ["Ꮯ"] = "tli", + ["ꮯ"] = "tli", + ["Ꮰ"] = "tlo", + ["ꮰ"] = "tlo", + ["Ꮱ"] = "tlu", + ["ꮱ"] = "tlu", + ["Ꮲ"] = "tlv", + ["ꮲ"] = "tlv", + + ["Ꮳ"] = "tsa", + ["ꮳ"] = "tsa", + ["Ꮴ"] = "tse", + ["ꮴ"] = "tse", + ["Ꮵ"] = "tsi", + ["ꮵ"] = "tsi", + ["Ꮶ"] = "tso", + ["ꮶ"] = "tso", + ["Ꮷ"] = "tsu", + ["ꮷ"] = "tsu", + ["Ꮸ"] = "tsv", + ["ꮸ"] = "tsv", + + ["Ꮹ"] = "wa", + ["ꮹ"] = "wa", + ["Ꮺ"] = "we", + ["ꮺ"] = "we", + ["Ꮻ"] = "wi", + ["ꮻ"] = "wi", + ["Ꮼ"] = "wo", + ["ꮼ"] = "wo", + ["Ꮽ"] = "wu", + ["ꮽ"] = "wu", + ["Ꮾ"] = "wv", + ["ꮾ"] = "wv", + + ["Ꮿ"] = "ya", + ["ꮿ"] = "ya", + ["Ᏸ"] = "ye", + ["ᏸ"] = "ye", + ["Ᏹ"] = "yi", + ["ᏹ"] = "yi", + ["Ᏺ"] = "yo", + ["ᏺ"] = "yo", + ["Ᏻ"] = "yu", + ["ᏻ"] = "yu", + ["Ᏼ"] = "yv", + ["ᏼ"] = "yv" +} + +function export.tr(text) + + text = mw.ustring.gsub(text, ".", tt) + + return text + +end + +return export diff --git a/wiktra/wikt/translit/chg-translit.lua b/wiktra/wikt/translit/chg-translit.lua new file mode 100644 index 0000000..c64f1f3 --- /dev/null +++ b/wiktra/wikt/translit/chg-translit.lua @@ -0,0 +1,41 @@ +local export = {} + +local data = {} + +data["chg-Arab"] = { + -- consonants + ["م"] = "m", + ["ن"] = "n", + ["د"] = "d", + ["ت"] = "t", + ["ع"] = "g", + ["غ"] = "ğ", + ["ى"] = "i", + ["و"] = "o", + ["ج"] = "c", + ["چ"] = "ç", + ["ژ"] = "j", + ["ش"] = "ş", + ["س"] = "s", + ["ي"] = "y", + ["ل"] = "l", + ["خ"] = "h", + ["ق"] = "q", + ["ك"] = "k", + ["ې"] = "e", + ["ۈ"] = "ü", + ["ۆ"] = "ö", + ["ۇ"] = "u", + ["ۋ"] = "w", + ["ڢ"] = "f", + ["ه"] = "ä", + ["ڭ"] = "ñ", + ["گ"] = "g", + ["ز"] = "z", + ["ر"] = "r", + ["ې"] = "ı", + -- punctuation + ["؟"] = "?", + ["،"] = "," + +} diff --git a/wiktra/wikt/translit/chm-translit.lua b/wiktra/wikt/translit/chm-translit.lua new file mode 100644 index 0000000..37dccdf --- /dev/null +++ b/wiktra/wikt/translit/chm-translit.lua @@ -0,0 +1,97 @@ +local export = {} + +local tab = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Ë", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["Ҥ"] = "Ṅ", + ["О"] = "O", + ["Ӧ"] = "Ö", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ӱ"] = "Ü", + ["Ф"] = "F", + ["Х"] = "H", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Ŝ", + ["Ъ"] = "ʺ", + ["Ы"] = "Y", + ["Ь"] = "ʹ", + ["Э"] = "È", + ["Ю"] = "Û", + ["Я"] = "Â", + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "ë", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["й"] = "j", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["ҥ"] = "ṅ", + ["о"] = "o", + ["ӧ"] = "ö", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ӱ"] = "ü", + ["ф"] = "f", + ["х"] = "h", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "ŝ", + ["ъ"] = "ʺ", + ["ы"] = "y", + ["ь"] = "ʹ", + ["э"] = "è", + ["ю"] = "û", + ["я"] = "â", + -- Hill (Western) Mari only, doesn't use Ҥ, ҥ + ["Ӓ"] = "Ä", + ["Ӹ"] = "Ÿ", + ["ӓ"] = "ä", + ["ӹ"] = "ÿ" +} + +function export.tr(text) + -- Ё needs converting if is decomposed + text = text:gsub("ё", "ё"):gsub("Ё", "Ё") + + -- е after a vowel or at the beginning of a word becomes je + text = mw.ustring.gsub(text, "([АӒОӦУӰЫӸЕЯЁЮИЕЪЬаӓоӧуӱыӹэяёюиеъь%A][́̀]?)е", "%1je") + text = mw.ustring.gsub(text, "^Е", "Je") + text = mw.ustring.gsub(text, "^е", "je") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е", "%1Je") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е", "%1je") + + return (mw.ustring.gsub(text, ".", tab)) +end + +return export diff --git a/wiktra/wikt/translit/cjs-translit.lua b/wiktra/wikt/translit/cjs-translit.lua new file mode 100644 index 0000000..3a17bba --- /dev/null +++ b/wiktra/wikt/translit/cjs-translit.lua @@ -0,0 +1,106 @@ +local export = {} + +local tab = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Ғ"] = "Ĝ", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Yo", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "Y", + ["Ј"] = "J̌[[Category:Shor text with old orthography]]", + ["J"] = "J̌[[Category:Shor text with misused character(s)]][[Category:Shor text with old orthography]]", + ["К"] = "K", + ["Қ"] = "K̂", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["Ң"] = "Ŋ", + ["Ҥ"] = "Ŋ[[Category:Shor text with old orthography]]", + ["О"] = "O", + ["Ӧ"] = "Ö", + ["Ö"] = "Ö[[Category:Shor text with misused character(s)]]", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ӱ"] = "Ü", + ["Ф"] = "F", + ["Х"] = "H", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Šč", + ["Ъ"] = "ʺ", + ["Ы"] = "Ï", + ["Ь"] = "ʹ", + ["Э"] = "E", + ["Ю"] = "Yu", + ["Я"] = "Ya", + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["ғ"] = "ĝ", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "yo", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["й"] = "y", + ["ј"] = "ǰ[[Category:Shor text with old orthography]]", + ["j"] = "ǰ[[Category:Shor text with misused character(s)]][[Category:Shor text with old orthography]]", + ["к"] = "k", + ["қ"] = "k̂", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["ң"] = "ŋ", + ["ҥ"] = "ŋ[[Category:Shor text with old orthography]]", + ["о"] = "o", + ["ӧ"] = "ö", + ["ö"] = "ö[[Category:Shor text with misused character(s)]]", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ӱ"] = "ü", + ["ф"] = "f", + ["х"] = "h", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "šč", + ["ъ"] = "ʺ", + ["ы"] = "ı", + ["ь"] = "ʹ", + ["э"] = "e", + ["ю"] = "yu", + ["я"] = "ya" +} + +function export.tr(text, lang, sc) + -- Ё needs converting if is decomposed + text = text:gsub("ё", "ё"):gsub("Ё", "Ё") + + -- е after a vowel or at the beginning of a word becomes ye + -- Note that according to modern Shor orthography ее (instead of ээ) is occationally used for long e + text = mw.ustring.gsub(text, "([АОӨУҮЫЯЁЮИЪЬаоөуүыяёюиъь%A][́̀]?)е", "%1ye") + -- text = mw.ustring.gsub(text, "([АОӨУҮЫЕЯЁЮИЕЪЬаоөуүыэяёюиеъь%A][́̀]?)е","%1ye") + text = mw.ustring.gsub(text, "^Е", "Ye") + text = mw.ustring.gsub(text, "^е", "ye") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е", "%1Ye") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е", "%1ye") + + return (mw.ustring.gsub(text, ".", tab)) +end + +return export diff --git a/wiktra/wikt/translit/ckb-translit.lua b/wiktra/wikt/translit/ckb-translit.lua new file mode 100644 index 0000000..41ca338 --- /dev/null +++ b/wiktra/wikt/translit/ckb-translit.lua @@ -0,0 +1,175 @@ +-- Authors: JavaScript ئاسۆ; Lua Ghybu, Calak +local export = {} + +local gsub = mw.ustring.gsub +local U = mw.ustring.char + +local mapping = { + ["ا"] = "a", + ["ب"] = "b", + ["چ"] = "ç", + ["ج"] = "c", + ["د"] = "d", + ["ە"] = "e", + ["ێ"] = "ê", + ["ف"] = "f", + ["گ"] = "g", + ["ھ"] = "h", + ["ه"] = "h", + ["ح"] = "ḧ", + ["ژ"] = "j", + ["ک"] = "k", + ["ڵ"] = "ll", + ["ل"] = "l", + ["م"] = "m", + ["ن"] = "n", + ["ۆ"] = "o", + ["پ"] = "p", + ["ق"] = "q", + ["ر"] = "r", + ["ڕ"] = "r", + ["س"] = "s", + ["ش"] = "ş", + ["ت"] = "t", + ["ڤ"] = "v", + ["خ"] = "x", + ["غ"] = "ẍ", + ["ز"] = "z", + ["ئ"] = "", + ["ع"] = "'", + + [U(0x200C)] = "", -- ZWNJ (zero-width non-joiner) + ["ـ"] = "", -- kashida, no sound + + -- numerals + ["١"] = "1", + ["٢"] = "2", + ["٣"] = "3", + ["٤"] = "4", + ["٥"] = "5", + ["٦"] = "6", + ["٧"] = "7", + ["٨"] = "8", + ["٩"] = "9", + ["٠"] = "0", + -- persian variants to numerals + ["۱"] = "1", + ["۲"] = "2", + ["۳"] = "3", + ["۴"] = "4", + ["۵"] = "5", + ["۶"] = "6", + ["۷"] = "7", + ["۸"] = "8", + ["۹"] = "9", + ["۰"] = "0" +} + +-- punctuation (leave on separate lines) +local punctuation = { + ["؟"] = "?", -- question mark + ["،"] = ",", -- comma + ["؛"] = ";", -- semicolon + ["«"] = "“", -- quotation mark + ["»"] = "”", -- quotation mark + ["٪"] = "%", -- percent + ["؉"] = "‰", -- per mille + ["٫"] = ".", -- decimals + ["٬"] = "," -- thousand +} + +-- translit +local function tr_word(word) + + word = gsub(word, ".", punctuation) + + -- Remove punctuation at the end of the word. + if mw.ustring.find(word, "[%.%!،؛»«٪؉٫٬%p]$") then + ponct = mw.ustring.sub(word, -1) + word = gsub(word, "[%.%!،؛»«٪؉٫٬%p]$", "") + else + word = word + ponct = "" + end + + word = gsub(word, "ه‌", "ە") -- correct unicode for letter ە + -- U+0647 (Arabic letter heh) + U+200C (zero-width non-joiner) + -- ↓ + -- U+06D5 (Arabic letter ae) + + -- diacritics + word = gsub(word, "ْ", "i") -- U+0652, Arabic sukun + word = gsub(word, "ِ", "i") -- U+0650, Arabic kasra + + -- managing 'و' and 'ی' + word = gsub(word, "و([iاێۆە])", "w%1") -- و + vowel => w (e.g. wan) + word = gsub(word, "ی([iاێۆە])", "y%1") -- ی + vowel => y (e.g. yas) + word = gsub(word, "([iاێۆە])و", "%1w") -- vowel + و => w (e.g. kew) + word = gsub(word, "([iاێۆە])ی", "%1y") -- vowel + ی => y (e.g. bey) + word = gsub(word, "^و$", "û") -- non-letter + 'و' + non-letter => û (=and) + word = gsub(word, "([^ء-يٱ-ەiwy])و", "%1w") -- non-letter + 'و' => w (e.g. wtar) + + word = gsub(word, "^و", "w") -- first 'و' => w (e.g. wtar) + word = gsub(word, "یو", "îw") -- 'ی' + 'و' => îw (e.g. nîw) + word = gsub(word, "([^و])یی", "%1îy") -- 'ی' + 'ی' => îy (e.g. kanîy) + word = gsub(word, "وی", "uy") -- 'و' + 'ی' => uy (e.g. buyn) + word = gsub(word, "وو", "û") -- 'و' + 'و' => û (e.g. bû) + word = gsub(word, "ی", "î") + word = gsub(word, "و", "u") + word = gsub(word, "uu", "û") -- 'و' + 'و' => û (e.g. bû) + word = gsub(word, "([ء-يٱ-ەiîuûwy])ڕ", "%1rr") -- when 'ڕ' not at the beginning of a word => rr + word = gsub(word, "([ء-يٱ-ەiîuûwy])ئ", "%1'") -- when 'ئ' not at the beginning of a word => ' + + word = gsub(word, ".", mapping) + + -- insert i where applicable + word = gsub(word, "ll", "Ľ") -- temporary conversion to avoid seeing ll as 2 letters + word = gsub(word, "rr", "Ŕ") -- temporary conversion to avoid seeing rr as 2 letters + + word = gsub(word, "([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([fjlĽmnrŔsşvwxẍyz])([fjlĽmnrŔsşvwxẍyz])([^aeêiîouûy])", "%1%2i%3%4") -- e.g. grft -> grift + word = gsub(word, "([aeêiîouû])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$", "%1%2%3i%4") -- e.g. cejnt -> cejnit + word = gsub(word, "([fjlĽrŔsşwyz])([fjlĽmnrŔsşvwxẍyz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])", "%1i%2%3") -- e.g. wrd -> wird + + word = gsub(word, "([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwxẍyz])([^aeêiîouû])", "%1i%2%3") -- e.g. prd -> pird + word = gsub(word, "([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwxẍyz])$", "%1i%2") -- like above + + word = gsub(word, "([^aeêiîouû])([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwxẍyz])([^aeêiîouû])", "%1%2i%3%4") -- repeat the latter expression, in case skipped + word = gsub(word, "([^aeêiîouû])([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwxẍyz])$", "%1%2i%3") -- repeat the latter expression, in case skipped + + word = gsub(word, "^([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([^aeêiîouû])", "%1i%2%3") -- e.g. ktk -> kitk + word = gsub(word, "^([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$", "%1i%2") -- e.g. ktk -> kitk + word = gsub(word, "([^aeêiîouy])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([^aeêiîouû])", "%1%2i%3%4") -- e.g. ktk -> kitk + word = gsub(word, "([^aeêiîouy])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$", "%1%2i%3") -- e.g. ktk -> kitk + + word = gsub(word, "([^a-zçşêîûĽŔ])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$", "%1%2i") -- e.g. j -> ji + word = gsub(word, "^([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$", "%1i") -- e.g. j -> ji + + -- word = gsub(word, '([^a-zêîûçş0-9\'’])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvxẍz])', "%1%2i%3") --e.g. bra -> bira + -- word = gsub(word, '^([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvxẍz])', "%1i%2") --e.g. bra -> bira + + -- word = gsub(word, '([bcçdfghḧjklmnpqrsştvwxẍz][bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])', "%1i%2") --e.g. aşkra -> aşkira + + -- word = gsub(word, 'si([tp][aeêiîouû])', "s%1") -- sp, st cluster + + word = gsub(word, "Ľ", "ll") -- revert the temporary conversion + word = gsub(word, "Ŕ", "rr") -- revert the temporary conversion + + -- Add the punctuation who had previously deleted. + word = word .. ponct + + return word +end + +function export.tr(text, lang, sc) + local textTab = {} + + -- Create a word table separated by a space (%s). + for _, word in ipairs(mw.text.split(text, "%s+")) do table.insert(textTab, word) end + + -- Tablo of translit. + for key, word in ipairs(textTab) do textTab[key] = tr_word(word) end + + return table.concat(textTab, " ") +end + +return export diff --git a/wiktra/wikt/translit/ckt-translit.lua b/wiktra/wikt/translit/ckt-translit.lua new file mode 100644 index 0000000..c5591b4 --- /dev/null +++ b/wiktra/wikt/translit/ckt-translit.lua @@ -0,0 +1,84 @@ +local export = {} + +local letters = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "V", + ["в"] = "v", + ["Г"] = "G", + ["г"] = "g", + ["Д"] = "D", + ["д"] = "d", + ["Е"] = "E", + ["е"] = "e", + ["Ё"] = "Jo", + ["ё"] = "jo", + ["Ж"] = "Ž", + ["ж"] = "ž", + ["З"] = "Z", + ["з"] = "z", + ["И"] = "I", + ["и"] = "i", + ["Й"] = "J", + ["й"] = "j", + ["К"] = "K", + ["к"] = "k", + ["Ӄ"] = "Q", + ["ӄ"] = "q", + ["Л"] = "l", + ["л"] = "l", + ["Ԓ"] = "Ḷ", + ["ԓ"] = "ḷ", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["Ӈ"] = "Ṇ", + ["ӈ"] = "ṇ", + ["О"] = "O", + ["о"] = "o", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "H", + ["х"] = "h", + ["Ц"] = "C", + ["ц"] = "c", + ["Ч"] = "Č", + ["ч"] = "č", + ["Ш"] = "Š", + ["ш"] = "š", + ["Щ"] = "Šč", + ["щ"] = "šč", + ["Ъ"] = "ʺ", + ["ъ"] = "ʺ", + ["Ы"] = "Y", + ["ы"] = "y", + ["Ь"] = "ʹ", + ["ь"] = "ʹ", + ["Э"] = "Ė", + ["э"] = "ė", + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Я"] = "Ja", + ["я"] = "ja", + ["ʼ"] = "ʼ" -- No change +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".", letters) + return text +end + +return export diff --git a/wiktra/wikt/translit/clw-translit.lua b/wiktra/wikt/translit/clw-translit.lua new file mode 100644 index 0000000..5d0b71d --- /dev/null +++ b/wiktra/wikt/translit/clw-translit.lua @@ -0,0 +1,107 @@ +local export = {} + +local tab = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Yo", + ["Ж"] = "J", + ["З"] = "Z", + ["И"] = "İ", + ["Й"] = "Y", + ["І"] = "Ï", + ["Ј"] = "C", + ["К"] = "K", + ["Қ"] = "Q", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["Ҥ"] = "Ñ", + ["О"] = "O", + ["Ӧ"] = "Ö", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ӱ"] = "Ü", + ["Ф"] = "F", + ["Х"] = "X", + ["Һ"] = "h", + ["Ц"] = "C", + ["Ч"] = "Ç", + ["Ҹ"] = "Ć", + ["Ш"] = "Ş", + ["Щ"] = "Ś", + ["Ъ"] = "ʺ", + ["Ы"] = "I", + ["Ь"] = "ʹ", + ["Э"] = "Ä", + ["Ю"] = "Yu", + ["Я"] = "Ya", + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "yo", + ["ж"] = "j", + ["з"] = "z", + ["и"] = "i", + ["й"] = "y", + ["і"] = "ï", + ["j"] = "c", + ["к"] = "k", + ["қ"] = "q", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["ҥ"] = "ñ", + ["о"] = "o", + ["ӧ"] = "ö", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ӱ"] = "ü", + ["ф"] = "f", + ["х"] = "x", + ["һ"] = "h", + ["ц"] = "c", + ["ч"] = "ç", + ["ҹ"] = "ć", + ["ш"] = "ş", + ["щ"] = "ś", + ["ъ"] = "ʺ", + ["ы"] = "ı", + ["ь"] = "ʹ", + ["э"] = "ä", + ["ю"] = "yu", + ["я"] = "ya" +} + +local iotated = {["Е"] = "Ye", ["е"] = "ye"} + +function export.tr(text, lang, sc) + local ugsub = mw.ustring.gsub + + -- Ё needs to be composed if is decomposed (e + combining diaeresis). + -- However, this cannot happen in wikitext, only in Lua modules. + text = mw.ustring.toNFC(text) + + -- е after a vowel or at the beginning of a word becomes ye + -- Note that according to modern Altai orthography ее (instead of ээ) is occationally used for long r + text = ugsub(text, "([АОӨУҮЫЯЁЮИЪЬаоөуүыяёюиъь%A][́̀]?)е", "%1ye") + -- text = mw.ustring.gsub(text, "([АОӨУҮЫЕЯЁЮИЕЪЬаоөуүыэяёюиеъь%A][́̀]?)е", "%1ye") + text = ugsub(text, "^[Ее]", iotated) + text = ugsub(text, "([^Ѐ-ӿ])([Ее])", function(a, b) return a .. iotated[b] end) + + return (ugsub(text, ".", tab)) +end + +return export diff --git a/wiktra/wikt/translit/copt-translit.lua b/wiktra/wikt/translit/copt-translit.lua new file mode 100644 index 0000000..08055c8 --- /dev/null +++ b/wiktra/wikt/translit/copt-translit.lua @@ -0,0 +1,114 @@ +local export = {} + +local chars = { + ["Ⲁ"] = "A", + ["ⲁ"] = "a", + ["Ⲃ"] = "B", + ["ⲃ"] = "b", + ["Ⲅ"] = "G", + ["ⲅ"] = "g", + ["Ⲇ"] = "D", + ["ⲇ"] = "d", + ["Ⲉ"] = "E", + ["ⲉ"] = "e", + ["Ⲍ"] = "Z", + ["ⲍ"] = "z", + ["Ⲏ"] = "Ē", + ["ⲏ"] = "ē", + ["Ⲑ"] = "Th", + ["ⲑ"] = "th", + ["Ⲓ"] = "I", + ["ⲓ"] = "i", + ["Ⲕ"] = "K", + ["ⲕ"] = "k", + ["Ⲗ"] = "L", + ["ⲗ"] = "l", + ["Ⲙ"] = "M", + ["ⲙ"] = "m", + ["Ⲛ"] = "N", + ["ⲛ"] = "n", + ["Ⲻ"] = "N", + ["ⲻ"] = "n", + ["⳯"] = "n", + ["Ⲳ"] = "N", + ["ⲳ"] = "n", + ["Ⲝ"] = "Ks", + ["ⲝ"] = "ks", + ["Ⲟ"] = "O", + ["ⲟ"] = "o", + ["Ⲡ"] = "P", + ["ⲡ"] = "p", + ["Ⲣ"] = "R", + ["ⲣ"] = "r", + ["Ⲥ"] = "S", + ["ⲥ"] = "s", + ["Ⲧ"] = "T", + ["ⲧ"] = "t", + ["Ⲩ"] = "U", + ["ⲩ"] = "u", + ["Ⲫ"] = "Ph", + ["ⲫ"] = "ph", + ["Ⲭ"] = "Kh", + ["ⲭ"] = "kh", + ["Ⲯ"] = "Ps", + ["ⲯ"] = "ps", + ["Ⲱ"] = "Ō", + ["ⲱ"] = "ō", + ["Ⲿ"] = "Ō", + ["ⲿ"] = "ō", + ["Ϣ"] = "Š", + ["ϣ"] = "š", + ["Ⳇ"] = "Š", + ["ⳇ"] = "š", + ["Ⳅ"] = "Š", + ["ⳅ"] = "š", + ["Ϥ"] = "F", + ["ϥ"] = "f", + ["Ϧ"] = "X", + ["ϧ"] = "x", + ["Ⳉ"] = "X", + ["ⳉ"] = "x", + ["Ⳳ"] = "X", + ["ⳳ"] = "x", + ["Ϩ"] = "H", + ["ϩ"] = "h", + ["Ⳏ"] = "h", + ["ⳏ"] = "h", + ["Ⳑ"] = "h", + ["ⳑ"] = "h", + ["Ϫ"] = "Č", + ["ϫ"] = "č", + ["Ⳗ"] = "Č", + ["ⳗ"] = "č", + ["Ⳙ"] = "Č", + ["ⳙ"] = "č", + ["Ϭ"] = "C", + ["ϭ"] = "c", + ["Ⳛ"] = "C", + ["ⳛ"] = "c", + ["Ϯ"] = "Ti", + ["ϯ"] = "ti", + ["Ⲹ"] = "Q", + ["ⲹ"] = "q", + ["Ⳋ"] = "Ç", + ["ⳋ"] = "ç", + ["Ⳃ"] = "Ç", + ["ⳃ"] = "ç", + ["Ⳓ"] = "Ḫ", + ["ⳓ"] = "ḫ", + ["Ⳕ"] = "ḥ", + ["ⳕ"] = "ḥ", + ["Ⳍ"] = "ḥ", + ["ⳍ"] = "ḥ", + ["Ⲵ"] = "ʿ", + ["ⲵ"] = "ʿ", + ["ⳤ"] = "ke", + ["⳦"] = "pros", + ["⳧"] = "stauros", + ["⳨"] = "taur", + ["⳪"] = "cōis" +} + +function export.tr(text, lang, sc) return (mw.ustring.gsub(text, ".", chars)) end + +return export diff --git a/wiktra/wikt/translit/cprt-translit.lua b/wiktra/wikt/translit/cprt-translit.lua new file mode 100644 index 0000000..9fa9488 --- /dev/null +++ b/wiktra/wikt/translit/cprt-translit.lua @@ -0,0 +1,91 @@ +local export = {} + +local chars = { + ["𐠀"] = "a", + ["𐠁"] = "e", + ["𐠂"] = "i", + ["𐠃"] = "o", + ["𐠄"] = "u", + + ["𐠅"] = "ja", + -- je not in Unicode + -- ji not in Unicode + ["𐠈"] = "jo", + -- ju not in Unicode + + ["𐠊"] = "ka", + ["𐠋"] = "ke", + ["𐠌"] = "ki", + ["𐠍"] = "ko", + ["𐠎"] = "ku", + + ["𐠏"] = "la", + ["𐠐"] = "le", + ["𐠑"] = "li", + ["𐠒"] = "lo", + ["𐠓"] = "lu", + + ["𐠔"] = "ma", + ["𐠕"] = "me", + ["𐠖"] = "mi", + ["𐠗"] = "mo", + ["𐠘"] = "mu", + + ["𐠙"] = "na", + ["𐠚"] = "ne", + ["𐠛"] = "ni", + ["𐠜"] = "no", + ["𐠝"] = "nu", + + ["𐠞"] = "pa", + ["𐠟"] = "pe", + ["𐠠"] = "pi", + ["𐠡"] = "po", + ["𐠢"] = "pu", + + ["𐠣"] = "ra", + ["𐠤"] = "re", + ["𐠥"] = "ri", + ["𐠦"] = "ro", + ["𐠧"] = "ru", + + ["𐠨"] = "sa", + ["𐠩"] = "se", + ["𐠪"] = "si", + ["𐠫"] = "so", + ["𐠬"] = "su", + + ["𐠭"] = "ta", + ["𐠮"] = "te", + ["𐠯"] = "ti", + ["𐠰"] = "to", + ["𐠱"] = "tu", + + ["𐠲"] = "wa", + ["𐠳"] = "we", + ["𐠴"] = "wi", + ["𐠵"] = "wo", + -- wu not in Unicode + + ["𐠷"] = "ksa", + ["𐠸"] = "kse", + -- ksi not in Unicode + ["𐠿"] = "kso", + -- ksu not in Unicode + + ["𐠼"] = "za" + -- ze not in Unicode + -- zi not in Unicode + -- zo not in Unicode + -- zu not in Unicode +} + +function export.tr(text, lang, sc) + local ret = {} + + for c in mw.ustring.gmatch(text, ".") do table.insert(ret, chars[c] or c) end + + return table.concat(ret, "-") +end + +return export diff --git a/wiktra/wikt/translit/cr-east-translit.lua b/wiktra/wikt/translit/cr-east-translit.lua new file mode 100644 index 0000000..9edc827 --- /dev/null +++ b/wiktra/wikt/translit/cr-east-translit.lua @@ -0,0 +1,66 @@ +local export = {} + +function export.tr(text, lang, sc) + text = require("Cans-translit").tr(text, lang, sc) + text = mw.ustring.gsub(text, "o", "u") + text = mw.ustring.gsub(text, "ī", "ii") + text = mw.ustring.gsub(text, "ō", "uu") + text = mw.ustring.gsub(text, "ā", "aa") + text = mw.ustring.gsub(text, "š", "sh") + text = mw.ustring.gsub(text, "ð", "th") + text = mw.ustring.gsub(text, "c", "ch") + return text +end + +function export.transcribe(text) + local replace = {"ch", "sh", "th", "kw"} + local s = "ᐁᐃᐅᐊᐯᐱᐳᐸᑌᑎᑐᑕᑫᑭᑯᑲᒉᒋᒍᒐᒣᒥᒧᒪᓀᓂᓄᓇᓓᓕᓗᓚᓭᓯᓱᓴᔐᔑᔓᔕᔦᔨᔪᔭᕃᕆᕊᕍᕓᕕᕗᕙᕞᕠᕤᕦ" + local s_tr1 = "0ptk1mnls2yrv3" + local s_tr2 = "eiua" + local w = "ᐍᐏᐓᐘᐻ ᑅᑘ ᑢᑵ ᑿᒓ ᒝᒭ ᒷᓊ ᓌᓝ ᓧᓷ ᔁᔘ ᔢᔰ ᔺᣎ ᕎᐧᕓ ᕛᐧᕞ ᕨ" + local w_tr1 = "0ptk1mnls2yrv3" + local w_tr2 = "eiua" + local l = "ᐄᐆᐋᐲᐴᐹᑏᑑᑖᑮᑰᑳᒌᒎᒑᒦᒨᒫᓃᓅᓈᓖᓘᓛᓰᓲᓵᔒᔔᔖᔩᔫᔮᕇᕋᕎᕖᕘᕚᕢᕥᕧᐐᐔᐙ" + local l_tr1 = "0ptk1mnls2yr1v3w" + local l_tr2 = "īūā" + local f = "ᐤᐦᑉᑦᒃᒄᒡᒻᓐᓪᔅᔥᔾᕐᕝᕪ" + local f_tr = "uhptk41mnls2yrv3" + text = mw.ustring.gsub(text, "ê", "e") + text = mw.ustring.gsub(text, "u", "u") + text = mw.ustring.gsub(text, "ii", "ī") + text = mw.ustring.gsub(text, "uu", "ū") + text = mw.ustring.gsub(text, "aa", "ā") + text = mw.ustring.gsub(text, "([aeiuīūā])(w?[aeiuīūā])", "%1 0%2") + text = mw.ustring.gsub(text, "^(w?[aeiuīūā])", " 0%1") + text = mw.ustring.gsub(text, " 0", "0") + mw.log(text) + text = mw.ustring.gsub(text, "([" .. w_tr1 .. "])w([" .. w_tr2 .. "])", function(a, b) + a = w_tr1:find(a) + b = w_tr2:find(b) + local i = a * 4 + b - 4 + return mw.ustring.sub(w, i, i) + end) + for i = 1, 3 do text = mw.ustring.gsub(text, replace[i], tostring(i)) end + mw.log(text) + text = mw.ustring.gsub(text, "([" .. s_tr1 .. "])([" .. s_tr2 .. "])", function(a, b) + a = s_tr1:find(a) + b = s_tr2:find(b) + local i = a * 4 + b - 4 + return mw.ustring.sub(s, i, i) + end) + text = mw.ustring.gsub(text, "([" .. l_tr1 .. "])([" .. l_tr2 .. "])", function(a, b) + a = l_tr1:find(a) + b = l_tr2:find(b) + local i = a * 3 + b - 3 + return mw.ustring.sub(l, i, i) + end) + for i = 4, 5 do text = mw.ustring.gsub(text, replace[i], tostring(i)) end + mw.log(text) + text = mw.ustring.gsub(text, "([" .. f_tr .. "])", function(a) + a = f_tr:find(a) + return mw.ustring.sub(f, a, a) + end) + return text +end + +return export diff --git a/wiktra/wikt/translit/cr-translit.lua b/wiktra/wikt/translit/cr-translit.lua new file mode 100644 index 0000000..0b8ed68 --- /dev/null +++ b/wiktra/wikt/translit/cr-translit.lua @@ -0,0 +1,64 @@ +local export = {} + +function export.tr(text, lang, sc) + text = require("Cans-translit").tr(text, lang, sc) + text = mw.ustring.gsub(text, "ī", "ii") + text = mw.ustring.gsub(text, "ō", "oo") + text = mw.ustring.gsub(text, "ā", "aa") + text = mw.ustring.gsub(text, "š", "sh") + text = mw.ustring.gsub(text, "ð", "th") + return text +end + +function export.transcribe(text) + local replace = {"sh", "th", "hk", "kw", "sk"} + local s = "ᐁᐃᐅᐊᐯᐱᐳᐸᑌᑎᑐᑕᑫᑭᑯᑲᒉᒋᒍᒐᒣᒥᒧᒪᓀᓂᓄᓇᓭᓯᓱᓴᔦᔨᔪᔭᖊᖋᖌᖍᕃᕆᕊᕍᔐᔑᔓᔕᕓᕕᕗᕙᘚᘛᘕᘔᓓᓕᓗᓚ" + local s_tr1 = "0ptkcmnsyrl1f2z" + local s_tr2 = "eioa" + local w = "ᐍᐏᐓᐘᐻᐽᑁᑅᑘᑚᑞᑢᑵᑷᑻᑿᒓᒕᒙᒝᒭᒯᒳᒷᓊ ᓌ ᓷᓹᓽᔁᔰᔲᔶᔺᔘᔚᔞᔢᓝᓟᓣᓧ" + local w_tr1 = "0ptkcmnsyrz" + local w_tr2 = "eioa" + local l = "ᐄᐆᐋᐲᐴᐹᑏᑑᑖᑮᑰᑳᒌᒎᒑᒦᒨᒫᓃᓅᓈᓰᓲᓵᔩᔫᔮᕇᕋᕎᔒᔔᔖᕖᕘᕚᓖᓘᓛᐐᐔᐙ" + local l_tr1 = "0ptkcmnsyr1flw23" + local l_tr2 = "īōā" + local f = "ᐤᐦᕽᑉᑦᒡᒃᒻᓐᔅᔥᔾᓪᕐ‡ᒄᔉ" + local f_tr = "wh3ptckmns1ylr45" + text = mw.ustring.gsub(text, "ê", "e") + text = mw.ustring.gsub(text, "u", "o") + text = mw.ustring.gsub(text, "ii", "ī") + text = mw.ustring.gsub(text, "oo", "ō") + text = mw.ustring.gsub(text, "aa", "ā") + text = mw.ustring.gsub(text, "([aeioīōā])(w?[aeioīōā])", "%1 0%2") + text = mw.ustring.gsub(text, "^(w?[aeioīōā])", " 0%1") + text = mw.ustring.gsub(text, " 0", "0") + mw.log(text) + text = mw.ustring.gsub(text, "([" .. w_tr1 .. "])w([" .. w_tr2 .. "])", function(a, b) + a = w_tr1:find(a) + b = w_tr2:find(b) + local i = a * 4 + b - 4 + return mw.ustring.sub(w, i, i) + end) + for i = 1, 3 do text = mw.ustring.gsub(text, replace[i], tostring(i)) end + mw.log(text) + text = mw.ustring.gsub(text, "([" .. s_tr1 .. "])([" .. s_tr2 .. "])", function(a, b) + a = s_tr1:find(a) + b = s_tr2:find(b) + local i = a * 4 + b - 4 + return mw.ustring.sub(s, i, i) + end) + text = mw.ustring.gsub(text, "([" .. l_tr1 .. "])([" .. l_tr2 .. "])", function(a, b) + a = l_tr1:find(a) + b = l_tr2:find(b) + local i = a * 3 + b - 3 + return mw.ustring.sub(l, i, i) + end) + for i = 4, 5 do text = mw.ustring.gsub(text, replace[i], tostring(i)) end + mw.log(text) + text = mw.ustring.gsub(text, "([" .. f_tr .. "])", function(a) + a = f_tr:find(a) + return mw.ustring.sub(f, a, a) + end) + return text +end + +return export diff --git a/wiktra/wikt/translit/cv-translit.lua b/wiktra/wikt/translit/cv-translit.lua new file mode 100644 index 0000000..3644421 --- /dev/null +++ b/wiktra/wikt/translit/cv-translit.lua @@ -0,0 +1,97 @@ +local u = mw.ustring.char + +local export = {} + +local CARONBELOW = u(0x032c) + +local rsub = mw.ustring.gsub + +local tt = { + ["А"] = "A", + ["а"] = "a", + ["Ӑ"] = "Ă", + ["ӑ"] = "ă", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "V", + ["в"] = "v", + ["Г"] = "G", + ["г"] = "g", + ["Д"] = "D", + ["д"] = "d", + ["Е"] = "E", + ["е"] = "e", + ["Ё"] = "Jo", + ["ё"] = "jo", + ["Ӗ"] = "Ĕ", + ["ӗ"] = "ĕ", + ["Ж"] = "Ž", + ["ж"] = "ž", + ["З"] = "Z", + ["з"] = "z", + ["И"] = "I", + ["и"] = "i", + ["Й"] = "J", + ["й"] = "j", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["О"] = "O", + ["о"] = "o", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Ҫ"] = "Ś", + ["ҫ"] = "ś", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ӳ"] = "Ü", + ["ӳ"] = "ü", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "H", + ["х"] = "h", + ["Ц"] = "Ts", + ["ц"] = "ts", + ["Ч"] = "Č", + ["ч"] = "č", + ["Ш"] = "Š", + ["ш"] = "š", + ["Щ"] = "Šč", + ["щ"] = "šč", + ["Ъ"] = "ʺ", + ["ъ"] = "ʺ", + ["Ы"] = "Y", + ["ы"] = "y", + ["Ь"] = "ʹ", + ["ь"] = "ʹ", + ["Э"] = "E", + ["э"] = "e", + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Я"] = "Ja", + ["я"] = "ja" +}; + +local VOWEL1 = "[АӐЕЁӖИОУӲЫЭЮЯМНЛРЙаӑеёӗиоуӳыэюямнлрй]" +local CONS = "[КХПТСШҪЧкхптсшҫч]" +local VOWEL2 = "[АӐЕЁӖИОУӲЫЭЮЯаӑеёӗиоуӳыэюя]" +local CHARC = "[ '-]" + +function export.tr(text, lang, sc) + text = rsub(text, "(" .. VOWEL1 .. CHARC .. "?" .. CONS .. ")(" .. CHARC .. "?)%f" .. VOWEL2, "%1" .. CARONBELOW .. "%2") + text = rsub(text, ".", tt) + return text +end + +return export diff --git a/wiktra/wikt/translit/cyrs-glag-translit.lua b/wiktra/wikt/translit/cyrs-glag-translit.lua new file mode 100644 index 0000000..5581e4a --- /dev/null +++ b/wiktra/wikt/translit/cyrs-glag-translit.lua @@ -0,0 +1,271 @@ +local export = {} + +local letters = {} +local digraphs = {} + +local double_grave = mw.ustring.char(0x30F) + +letters["Cyrs"] = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "V", + ["в"] = "v", + ["Г"] = "G", + ["г"] = "g", + ["Д"] = "D", + ["д"] = "d", + ["Е"] = "E", + ["е"] = "e", + ["Є"] = "E", + ["є"] = "e", + ["Ж"] = "Ž", + ["ж"] = "ž", + ["Ѕ"] = "Dz", + ["ѕ"] = "dz", + ["Ꙃ"] = "Dz", + ["ꙃ"] = "dz", + ["З"] = "Z", + ["з"] = "z", + ["Ꙁ"] = "Z", + ["ꙁ"] = "z", + ["И"] = "I", + ["и"] = "i", + ["І"] = "I", + ["і"] = "i", + ["Ї"] = "I", + ["ї"] = "i", + ["Ѝ"] = "Ì", + ["ѝ"] = "ì", + ["Ꙉ"] = "Đ", + ["ꙉ"] = "đ", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["О"] = "O", + ["о"] = "o", + ["Ѡ"] = "O", + ["ѡ"] = "o", + ["Ѿ"] = "Otŭ", + ["ѿ"] = "otŭ", + ["Ꙩ"] = "O", + ["ꙩ"] = "o", + ["Ꙫ"] = "O", + ["ꙫ"] = "o", + ["Ꙭ"] = "O", + ["ꙭ"] = "o", + ["ꙮ"] = "o", + ["Ѻ"] = "O", + ["ѻ"] = "o", + ["Ꙍ"] = "O", + ["ꙍ"] = "o", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "T", + ["т"] = "t", + ["Ѹ"] = "U", + ["ѹ"] = "u", + ["Ꙋ"] = "U", + ["ꙋ"] = "u", + ["У"] = "U", + ["у"] = "u", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "X", + ["х"] = "x", + + ["Ц"] = "C", + ["ц"] = "c", + ["Ч"] = "Č", + ["ч"] = "č", + ["Ш"] = "Š", + ["ш"] = "š", + -- For Щ see below + ["Ъ"] = "Ŭ", + ["ъ"] = "ŭ", + ["Ꙑ"] = "Y", + ["ꙑ"] = "y", + ["Ы"] = "Y", + ["ы"] = "y", + ["Ь"] = "Ĭ", + ["ь"] = "ĭ", + ["Ѣ"] = "Ě", + ["ѣ"] = "ě", + + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Ꙗ"] = "Ja", + ["ꙗ"] = "ja", + ["Я"] = "Ja", + ["я"] = "ja", + ["Ѥ"] = "Je", + ["ѥ"] = "je", + ["Ѧ"] = "Ę", + ["ѧ"] = "ę", + ["Ѩ"] = "Ję", + ["ѩ"] = "ję", + ["Ѫ"] = "Ǫ", + ["ѫ"] = "ǫ", + ["Ѭ"] = "Jǫ", + ["ѭ"] = "jǫ", + ["Ꙓ"] = "Jě", + ["ꙓ"] = "jě", + + ["Ѯ"] = "Ks", + ["ѯ"] = "ks", + ["Ѱ"] = "Ps", + ["ѱ"] = "ps", + ["Ѳ"] = "Θ", + ["ѳ"] = "θ", + ["Ѵ"] = "Ü", + ["ѵ"] = "ü", + ["Ѷ"] = "Ü" .. double_grave, + ["ѷ"] = "ü" .. double_grave, + -- newer letters + ["Й"] = "J", + ["й"] = "j" -- starting from 15th century +} + +digraphs["Cyrs"] = {["О[УѴуѵ]"] = "U", ["о[уѵ]"] = "u", ["Ъ[Ии]"] = "Y", ["ъи"] = "y"} + +letters["Glag"] = { + ["Ⰰ"] = "A", + ["ⰰ"] = "a", + ["Ⱝ"] = "A", + ["ⱝ"] = "a", + ["Ⰱ"] = "B", + ["ⰱ"] = "b", + ["Ⰲ"] = "V", + ["ⰲ"] = "v", + ["Ⰳ"] = "G", + ["ⰳ"] = "g", + ["Ⰴ"] = "D", + ["ⰴ"] = "d", + ["Ⰵ"] = "E", + ["ⰵ"] = "e", + ["Ⰶ"] = "Ž", + ["ⰶ"] = "ž", + ["Ⰷ"] = "Dz", + ["ⰷ"] = "dz", + ["Ⰸ"] = "Z", + ["ⰸ"] = "z", + ["Ⰹ"] = "I", + ["ⰹ"] = "i", + ["Ⰺ"] = "I", + ["ⰺ"] = "i", + ["Ⰻ"] = "I", + ["ⰻ"] = "i", + ["Ⰼ"] = "Đ", + ["ⰼ"] = "đ", + ["Ⰽ"] = "K", + ["ⰽ"] = "k", + ["Ⰾ"] = "L", + ["ⰾ"] = "l", + ["Ⰿ"] = "M", + ["ⰿ"] = "m", + ["Ⱞ"] = "M", + ["ⱞ"] = "m", + ["Ⱀ"] = "N", + ["ⱀ"] = "n", + ["Ⱁ"] = "O", + ["ⱁ"] = "o", + ["Ⱉ"] = "O", + ["ⱉ"] = "o", + ["Ⱂ"] = "P", + ["ⱂ"] = "p", + ["Ⱃ"] = "R", + ["ⱃ"] = "r", + ["Ⱄ"] = "S", + ["ⱄ"] = "s", + ["Ⱅ"] = "T", + ["ⱅ"] = "t", + ["Ⱆ"] = "U", + ["ⱆ"] = "u", + ["Ⱇ"] = "F", + ["ⱇ"] = "f", + ["Ⱈ"] = "X", + ["ⱈ"] = "x", + ["Ⱒ"] = "X", + ["ⱒ"] = "x", + + ["Ⱌ"] = "C", + ["ⱌ"] = "c", + ["Ⱍ"] = "Č", + ["ⱍ"] = "č", + ["Ⱎ"] = "Š", + ["ⱎ"] = "š", + -- For Ⱋ see below + ["Ⱏ"] = "Ŭ", + ["ⱏ"] = "ŭ", + ["Ⱐ"] = "Ĭ", + ["ⱐ"] = "ĭ", + ["Ⱜ"] = "Ĭ", + ["ⱜ"] = "ĭ", + ["Ⱑ"] = "Ě", + ["ⱑ"] = "ě", + + ["Ⱓ"] = "Ju", + ["ⱓ"] = "ju", + ["Ⱔ"] = "Ę", + ["ⱔ"] = "ę", + ["Ⱕ"] = "Y̨", + ["ⱕ"] = "y̨", + ["Ⱗ"] = "Ję", + ["ⱗ"] = "ję", + ["Ⱘ"] = "Ǫ", + ["ⱘ"] = "ǫ", + ["Ⱖ"] = "Ǫ", + ["ⱖ"] = "ǫ", + ["Ⱙ"] = "Jǫ", + ["ⱙ"] = "jǫ", + + ["Ⱚ"] = "Θ", + ["ⱚ"] = "θ", + ["Ⱛ"] = "Ü", + ["ⱛ"] = "ü" +} + +digraphs["Glag"] = {["Ⱏ[ⰉⰊⰋⰹⰺⰻ]"] = "Y", ["ⱏ[ⰹⰺⰻ]"] = "y"} + +function export.tr(text, lang, sc) + if not sc then sc = require("scripts").findBestScript(text, require("languages").getByCode(lang)):getCode() end + + -- Щ was pronounced differently in Old East Slavic + if lang == "orv" then + letters["Cyrs"]["Щ"] = "Šč" + letters["Cyrs"]["щ"] = "šč" + letters["Glag"]["Ⱋ"] = "Šč" + letters["Glag"]["ⱋ"] = "šč" + else + letters["Cyrs"]["Щ"] = "Št" + letters["Cyrs"]["щ"] = "št" + letters["Glag"]["Ⱋ"] = "Št" + letters["Glag"]["ⱋ"] = "št" + end + + -- Transliterate the kamora as prime + text = string.gsub(text, "\210\132", "ʹ") + + if sc == "Cyrs" or sc == "Glag" then + for key, repl in pairs(digraphs[sc]) do text = mw.ustring.gsub(text, key, repl) end + + -- pattern for one non-ASCII character + text = string.gsub(text, "[\194-\244][\128-\191]+", letters[sc]) + else + -- error("This module can only transliterate Old Cyrillic (Cyrs) and Glagolitic (Glag).") + end + + return text +end + +return export diff --git a/wiktra/wikt/translit/dar-translit.lua b/wiktra/wikt/translit/dar-translit.lua new file mode 100644 index 0000000..74024e0 --- /dev/null +++ b/wiktra/wikt/translit/dar-translit.lua @@ -0,0 +1,22 @@ +local export = {} + +local mapping1 = {["б"] = "b", ["п"] = "p", ["ф"] = "f", ["в"] = "v", ["м"] = "m", ["д"] = "d", ["т"] = "t", ["й"] = "j", ["н"] = "n", ["з"] = "z", ["ц"] = "c", ["с"] = "s", ["ж"] = "ž", ["ш"] = "š", ["щ"] = "šč", ["л"] = "l", ["ч"] = "č", ["р"] = "r", ["г"] = "g", ["к"] = "k", ["х"] = "χ", ["ъ"] = "ʾ", ["а"] = "a", ["е"] = "e", ["ы"] = "y", ["и"] = "i", ["о"] = "o", ["у"] = "u", ["ё"] = "ë", ["ь"] = "’", ["э"] = "e", ["ю"] = "ju", ["я"] = "ə", ["Б"] = "B", ["П"] = "P", ["Ф"] = "F", ["В"] = "V", ["М"] = "M", ["Д"] = "D", ["Т"] = "T", ["Й"] = "J", ["Н"] = "N", ["З"] = "Z", ["Ц"] = "C", ["С"] = "S", ["Ж"] = "Ž", ["Ш"] = "Š", ["Щ"] = "Šč", ["Л"] = "L", ["Ч"] = "Č", ["Р"] = "R", ["Г"] = "G", ["К"] = "K", ["Х"] = "Χ", ["Ъ"] = "ʾ", ["А"] = "A", ["Е"] = "E", ["Ы"] = "Y", ["И"] = "I", ["О"] = "O", ["У"] = "U", ["Ё"] = "Ë", ["Ь"] = "’", ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ə"} + +local mapping2 = {["дз"] = "ʒ", ["Дз"] = "Ʒ", ["дж"] = "ǯ", ["Дж"] = "Ǯ", ["пӏ"] = "ṗ", ["Пӏ"] = "Ṗ", ["цӏ"] = "c̣", ["тӏ"] = "ṭ", ["чӏ"] = "č̣", ["кь"] = "q̇", ["кӏ"] = "ḳ", ["хь"] = "x", ["хъ"] = "q", ["къ"] = "ɢ", ["гъ"] = "γ", ["гӏ"] = "ʿ", ["хӏ"] = "ḥ", ["гь"] = "h", ["Цӏ"] = "C̣", ["Тӏ"] = "Ṭ", ["Сс"] = "S̄", ["Чӏ"] = "Č̣", ["Кь"] = "Q̇", ["Кӏ"] = "Ḳ", ["Хь"] = "X", ["Хъ"] = "Q", ["Къ"] = "ɢ", ["Гъ"] = "Γ", ["Гӏ"] = "ʿ", ["Хӏ"] = "Ḥ", ["Гь"] = "H"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + + -- Convert capital to lowercase palochka. Lowercase is found in tables + -- above. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for pat, repl in pairs(mapping2) do text = str_gsub(text, pat, repl) end + + -- pattern for non-ASCII UTF-8 characters + text = str_gsub(text, "[\194-\244][\128-\191]+", mapping1) + + return text +end + +return export diff --git a/wiktra/wikt/translit/ddo-translit.lua b/wiktra/wikt/translit/ddo-translit.lua new file mode 100644 index 0000000..cc8c70f --- /dev/null +++ b/wiktra/wikt/translit/ddo-translit.lua @@ -0,0 +1,20 @@ +local export = {} + +local mapping1 = {["п"] = "p", ["б"] = "b", ["т"] = "t", ["д"] = "d", ["к"] = "k", ["г"] = "g", ["ц"] = "c", ["ч"] = "č", ["с"] = "s", ["з"] = "z", ["ш"] = "š", ["ж"] = "ž", ["х"] = "x", ["м"] = "m", ["н"] = "n", ["р"] = "r", ["л"] = "l", ["в"] = "v", ["й"] = "y", ["и"] = "i", ["е"] = "e", ["э"] = "e", ["а"] = "a", ["о"] = "o", ["у"] = "u", ["ъ"] = "ʾ"} + +local mapping2 = {["пӏ"] = "p’", ["тӏ"] = "t’", ["кӏ"] = "k’", ["къ"] = "q’", ["цӏ"] = "c’", ["лӏ"] = "ƛ", ["кь"] = "ƛ’", ["чӏ"] = "č’", ["хъ"] = "q", ["лъ"] = "λ", ["гъ"] = "ġ", ["хӏ"] = "ḥ", ["гӏ"] = "a̯", ["гь"] = "h", ["аь"] = "ä", ["аӏ"] = "aʿ", ["еӏ"] = "eʿ", ["иӏ"] = "iʿ", ["оӏ"] = "oʿ", ["уӏ"] = "uʿ"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" + + -- Convert capital to lowercase palochka. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for pat, repl in pairs(mapping2) do text = str_gsub(text, pat, repl) end + text = str_gsub(text, UTF8_char, mapping1) + + return text +end + +return export diff --git a/wiktra/wikt/translit/debug.lua b/wiktra/wikt/translit/debug.lua new file mode 100644 index 0000000..0567cf9 --- /dev/null +++ b/wiktra/wikt/translit/debug.lua @@ -0,0 +1,171 @@ +local export = {} + +local escape +do + local escapes = {["\a"] = "a", ["\b"] = "b", ["\f"] = "f", ["\n"] = "n", ["\r"] = "r", ["\t"] = "t", ["\v"] = "v", ["\\"] = "\\", ["\""] = "\"", ["'"] = "'"} + + local function helper(char) return escapes[char] and "\\" .. escapes[char] or ("\\%03d"):format(char:byte()) end + + -- Escape control characters, backslash, double quote, and bytes that aren't + -- used in UTF-8. + -- Escape stuff that can't be saved in a MediaWiki page, like invalid UTF-8 + -- and NFD character sequences? Hard. + -- Similar to string.format("%q", str), which does not use C-like simple + -- escapes and does not escape bytes that are not used in UTF-8. + escape = function(str) return (str:gsub("[%z\1-\31\\\"\127\192\193\245-\255]", helper)) end +end + +export.escape = escape + +-- Convert a value to a string +function export.dump(value, prefix, tsort) + local t = type(value) + + prefix = prefix or "" + + if t == "string" then + return "\"" .. escape(value) .. "\"" + elseif t == "table" then + local str_table = {} + + table.insert(str_table, " {") + + for key, val in require("table").sortedPairs(value, tsort) do table.insert(str_table, " " .. prefix .. "\t[" .. export.dump(key, prefix .. "\t") .. "] = " .. export.dump(val, prefix .. "\t"):gsub("^ ", "") .. ",") end + + table.insert(str_table, " " .. prefix .. "}") + + return table.concat(str_table, "\n") + else + return tostring(value) + end +end + +function export.highlight_dump(value, prefix, tsort, options) + options = options or {} + + local func = options.modified and "modified_dump" or "dump" + + local dump = export[func](value, prefix, tsort) + + -- Remove spaces at beginnings of lines (which are simply to force a
 tag).
+    dump = dump:gsub("^ ", "")
+    dump = dump:gsub("\n ", "\n")
+
+    return export.highlight(dump)
+end
+
+-- Returns true if table contains a table as one of its values
+local function containsTable(t)
+    for key, value in pairs(t) do if type(value) == "table" then return true end end
+    return false
+end
+
+local function containsTablesWithSize(t, size)
+    for key, value in pairs(t) do if type(value) == "table" and require("table").size(value) ~= size then return false end end
+    return true
+end
+
+--[=[
+	Convert a value to a string.
+	Like dump below, but if a table has consecutive numbered keys and does not
+	have a table as one of its values, it will be placed on a single line.
+	Used by [[Module:User:Erutuon/script recognition]].
+]=]
+function export.modified_dump(value, prefix, tsort)
+    local t = type(value)
+
+    prefix = prefix or ""
+
+    if t == "string" then
+        return "\"" .. value .. "\""
+    elseif t == "table" then
+        local str_table = {}
+
+        local containsTable = containsTable(value)
+        local consecutive = require("table").isArray(value)
+        if consecutive and not containsTable or containsTable and containsTablesWithSize(value, 3) then
+            table.insert(str_table, "{")
+
+            for key, val in require("table").sortedPairs(value, tsort) do
+                if containsTable then
+                    table.insert(str_table, "\n\t" .. prefix)
+                else
+                    table.insert(str_table, " ")
+                end
+
+                if type(key) == "string" then table.insert(str_table, "[" .. export.modified_dump(key) .. "] = ") end
+
+                table.insert(str_table, type(key) == "number" and type(val) == "number" and string.format("0x%05X", val) or export.modified_dump(val))
+
+                if not (consecutive and #value == 3) or type(key) == "number" and value[key + 1] then table.insert(str_table, ",") end
+            end
+
+            if containsTable then
+                table.insert(str_table, "\n" .. prefix)
+            else
+                table.insert(str_table, " ")
+            end
+
+            table.insert(str_table, "}")
+            return table.concat(str_table)
+        end
+
+        table.insert(str_table, " {")
+
+        for key, val in require("table").sortedPairs(value, tsort) do table.insert(str_table, " " .. prefix .. "\t[" .. export.modified_dump(key, prefix .. "\t") .. "] = " .. export.modified_dump(val, prefix .. "\t"):gsub("^ ", "") .. ",") end
+
+        table.insert(str_table, " " .. prefix .. "}")
+
+        return table.concat(str_table, "\n")
+    elseif t == "number" and value > 46 then
+        return string.format("0x%05X", value)
+    else
+        return tostring(value)
+    end
+end
+
+function export.track(key)
+    local frame = mw.getCurrentFrame()
+    if key then
+        if type(key) ~= "table" then key = {key} end
+
+        for i, value in pairs(key) do pcall(frame.expandTemplate, frame, {title = "tracking/" .. value}) end
+    else
+        error("No tracking key supplied to the function \"" .. track .. "\".")
+    end
+end
+
+-- Trigger a script error from a template
+function export.error(frame) error(frame.args[1] or "(no message specified)") end
+
+--[[
+	Convenience function for generating syntaxhighlight tags.
+	Display defaults to block.
+	Options is a table. To display inline text with HTML highlighting:
+		{ inline = true, lang = "html" }
+]]
+function export.highlight(content, options)
+    if type(content) == "table" then
+        options = content
+        options = {lang = options.lang or "lua", inline = options.inline and true}
+        return function(content) return mw.getCurrentFrame():extensionTag{name = "syntaxhighlight", content = content, args = options} end
+    else
+        return mw.getCurrentFrame():extensionTag{name = "syntaxhighlight", content = content, args = {lang = options and options.lang or "lua", inline = options and options.inline and true or nil}}
+    end
+end
+
+function export.track_unrecognized_args(args, template_name)
+    local function track(code) export.track(template_name .. "/" .. code) end
+
+    track("unrecognized arg")
+
+    local arg_list = {}
+    for arg, value in pairs(args) do
+        track("unrecognized arg/" .. arg)
+        table.insert(arg_list, ("|%s=%s"):format(arg, value))
+    end
+
+    mw.log(("Unrecognized parameter%s in {{%s}}: %s."):format(arg_list[2] and "s" or "", template_name, table.concat(arg_list, ", ")))
+end
+
+return export
diff --git a/wiktra/wikt/translit/dlg-translit.lua b/wiktra/wikt/translit/dlg-translit.lua
new file mode 100644
index 0000000..856ee6b
--- /dev/null
+++ b/wiktra/wikt/translit/dlg-translit.lua
@@ -0,0 +1,100 @@
+local export = {}
+
+local tab = {
+    ["А"] = "A",
+    ["а"] = "a",
+    ["Б"] = "B",
+    ["б"] = "b",
+    ["Г"] = "G",
+    ["г"] = "g",
+    ["Ҕ"] = "Ğ",
+    ["ҕ"] = "ğ",
+    ["Д"] = "D",
+    ["д"] = "d",
+    ["И"] = "İ",
+    ["и"] = "i",
+    ["Й"] = "Y",
+    ["й"] = "y",
+    ["К"] = "K",
+    ["к"] = "k",
+    ["Л"] = "L",
+    ["л"] = "l",
+    ["М"] = "M",
+    ["м"] = "m",
+    ["Н"] = "N",
+    ["н"] = "n",
+    ["Ӈ"] = "Ñ",
+    ["ӈ"] = "ñ",
+    ["О"] = "O",
+    ["о"] = "o",
+    ["Ө"] = "Ö",
+    ["ө"] = "ö",
+    ["П"] = "P",
+    ["п"] = "p",
+    ["Р"] = "R",
+    ["р"] = "r",
+    ["С"] = "S",
+    ["с"] = "s",
+    ["Һ"] = "H",
+    ["һ"] = "h",
+    ["Т"] = "T",
+    ["т"] = "t",
+    ["У"] = "U",
+    ["у"] = "u",
+    ["Ү"] = "Ü",
+    ["ү"] = "ü",
+    ["Х"] = "X",
+    ["х"] = "x",
+    ["Ч"] = "Ç",
+    ["ч"] = "ç",
+    ["Ш"] = "Ś",
+    ["ш"] = "ś",
+    ["Ы"] = "I",
+    ["ы"] = "ı",
+    ["Э"] = "E",
+    ["э"] = "e",
+    -- non-native letters
+    ["В"] = "V",
+    ["в"] = "v",
+    ["Е"] = "E",
+    ["е"] = "e",
+    ["Ё"] = "Yo",
+    ["ё"] = "yo",
+    ["Ж"] = "J",
+    ["ж"] = "j",
+    ["З"] = "Z",
+    ["з"] = "z",
+    ["Ф"] = "F",
+    ["ф"] = "f",
+    ["Ц"] = "Ts",
+    ["ц"] = "ts",
+    ["Щ"] = "Şç",
+    ["щ"] = "şç",
+    ["Ъ"] = "ʺ",
+    ["ъ"] = "ʺ",
+    ["Ь"] = "’",
+    ["ь"] = "’",
+    ["Ю"] = "Yu",
+    ["ю"] = "yu",
+    ["Я"] = "Ya",
+    ["я"] = "ya"
+}
+
+local digraphs = {["Дь"] = "C", ["дь"] = "c", ["Нь"] = "Ń", ["нь"] = "ń"}
+
+function export.tr(text, lang, sc)
+    local str_gsub = string.gsub
+    for digraph, repl in ipairs(digraphs) do text = str_gsub(text, digraph, repl) end
+
+    -- е after a vowel or at the beginning of a word becomes ye
+    text = mw.ustring.gsub(text, "([АОӨУҮЫЕЯЁЮИЕЪЬаоөуүыэяёюиеъь%A][́̀]?)е", "%1je")
+    text = str_gsub(text, "^Е", "Ye")
+    text = str_gsub(text, "^е", "ye")
+    text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е", "%1Ye")
+    text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е", "%1ye")
+
+    -- pattern for non-ASCII UTF-8 characters
+    return (str_gsub(text, "[\194-\244][\128-\191]+", tab))
+end
+
+return export
diff --git a/wiktra/wikt/translit/dng-translit.lua b/wiktra/wikt/translit/dng-translit.lua
new file mode 100644
index 0000000..f9dd5b4
--- /dev/null
+++ b/wiktra/wikt/translit/dng-translit.lua
@@ -0,0 +1,95 @@
+local export = {}
+
+local tt = {
+    ["Б"] = "B",
+    ["П"] = "P",
+    ["М"] = "M",
+    ["Ф"] = "F",
+    ["В"] = "V",
+    ["Д"] = "D",
+    ["Т"] = "T",
+    ["Н"] = "N",
+    ["Л"] = "L",
+    ["З"] = "Z",
+    ["Ц"] = "C",
+    ["С"] = "S",
+    ["Җ"] = "Ž",
+    ["Ч"] = "Č",
+    ["Ш"] = "Š",
+    ["Ж"] = "Ř",
+    --[[  ↑  ]] --[[  ↑  ]]
+    ["Щ"] = "X",
+    ["Г"] = "G",
+    ["К"] = "K",
+    ["Ң"] = "Ŋ",
+    ["Х"] = "H",
+    ["Р"] = "R",
+
+    ["Ы"] = "Ɨ",
+    ["Й"] = "Y",
+    ["Ў"] = "W",
+    ["Ү"] = "Ü",
+    --[[  ↑  ]]
+    ["А"] = "A",
+    ["Я"] = "I͡a",
+    ["Ә"] = "Ə",
+    ["Е"] = "I͡ə",
+    ["Э"] = "E",
+    ["О"] = "O",
+    ["Ё"] = "I͡o",
+    ["У"] = "U",
+    ["Ю"] = "I͡u",
+
+    ["И"] = "I",
+
+    ["б"] = "b",
+    ["п"] = "p",
+    ["м"] = "m",
+    ["ф"] = "f",
+    ["в"] = "v",
+    ["д"] = "d",
+    ["т"] = "t",
+    ["н"] = "n",
+    ["л"] = "l",
+    ["з"] = "z",
+    ["ц"] = "c",
+    ["с"] = "s",
+    ["җ"] = "ž",
+    ["ч"] = "č",
+    ["ш"] = "š",
+    ["ж"] = "ř",
+    --[[  ↑  ]] --[[  ↑  ]]
+    ["щ"] = "x",
+    ["г"] = "g",
+    ["к"] = "k",
+    ["ң"] = "ŋ",
+    ["х"] = "h",
+    ["р"] = "r",
+
+    ["ы"] = "ɨ",
+    ["й"] = "y",
+    ["ў"] = "w",
+    ["ү"] = "ü",
+    --[[  ↑  ]]
+    ["а"] = "a",
+    ["я"] = "i͡a",
+    ["ә"] = "ə",
+    ["е"] = "i͡ə",
+    ["э"] = "e",
+    ["о"] = "o",
+    ["ё"] = "i͡o",
+    ["у"] = "u",
+    ["ю"] = "i͡u",
+
+    ["и"] = "i"
+};
+
+function export.tr(text, lang, sc)
+    if (sc) and (sc ~= "Cyrl") then return nil end
+
+    text = mw.ustring.gsub(text, ".", tt)
+
+    return text
+end
+
+return export
diff --git a/wiktra/wikt/translit/dv-translit.lua b/wiktra/wikt/translit/dv-translit.lua
new file mode 100644
index 0000000..65eb78f
--- /dev/null
+++ b/wiktra/wikt/translit/dv-translit.lua
@@ -0,0 +1,44 @@
+local export = {}
+local u = mw.ustring.char
+
+local consonants = {["ހ"] = "h", ["ށ"] = "ṣ", ["ނ"] = "n", ["ރ"] = "r", ["ބ"] = "b", ["ޅ"] = "ḷ", ["ކ"] = "k", ["އ"] = "", ["ވ"] = "v", ["މ"] = "m", ["ފ"] = "f", ["ދ"] = "d", ["ތ"] = "t", ["ލ"] = "l", ["ގ"] = "g", ["ޏ"] = "ñ", ["ސ"] = "s", ["ޑ"] = "ḍ", ["ޒ"] = "z", ["ޓ"] = "ṭ", ["ޔ"] = "y", ["ޕ"] = "p", ["ޖ"] = "j", ["ޗ"] = "c", ["ޱ"] = "ṇ", ["ޘ"] = "ṯ", ["ޙ"] = "ḥ", ["ޚ"] = "x", ["ޛ"] = "ź", ["ޜ"] = "ž", ["ޝ"] = "š", ["ޞ"] = "ş", ["ޟ"] = "ḋ", ["ޠ"] = "ţ", ["ޡ"] = "ẓ", ["ޢ"] = "ʿ", ["ޣ"] = "ġ", ["ޤ"] = "q", ["ޥ"] = "w"}
+
+-- this controls prenasalized stop digraphs consisting of ނ and a voiced stop
+local prenasal = {["ބ"] = "ᵐ", ["ދ"] = "ⁿ", ["ގ"] = "ⁿ", ["ޑ"] = "ⁿ"}
+
+local diacritics = {
+    [u(0x07A6)] = "a",
+    [u(0x07A7)] = "ā",
+    [u(0x07A8)] = "i",
+    [u(0x07A9)] = "ī",
+    [u(0x07AA)] = "u",
+    [u(0x07AB)] = "ū",
+    [u(0x07AC)] = "e",
+    [u(0x07AD)] = "ē",
+    [u(0x07AE)] = "o",
+    [u(0x07AF)] = "ō",
+    [u(0x07B0)] = "",
+    -- no diacritic
+    [""] = ""
+}
+
+function export.tr(text, lang, sc)
+    -- final k
+    text = mw.ustring.gsub(text, "އް$", "ކް")
+    -- V + sukun + C -> CC
+    text = mw.ustring.gsub(text, "އ" .. u(0x07B0) .. "([ހ-ޥޱ])", function(c) return c .. c end)
+    -- prenasalized digraphs
+    text = mw.ustring.gsub(text, "ނ([ހ-ޥޱ])", function(c) return (prenasal[c] or "n") .. c end)
+
+    -- handle all standard akuru (C+V or C+virama) units
+    text = mw.ustring.gsub(text, "([ހ-ޥޱ])([" .. u(0x07A6) .. "-" .. u(0x07B0) .. "]?)", function(c, d)
+        if mw.ustring.match(c, "[ށނތކމ]") and d == u(0x07B0) then
+            return consonants[c] .. "̊"
+        else
+            return consonants[c] .. diacritics[d]
+        end
+    end)
+    return text
+end
+
+return export
diff --git a/wiktra/wikt/translit/el-translit.lua b/wiktra/wikt/translit/el-translit.lua
new file mode 100644
index 0000000..24305af
--- /dev/null
+++ b/wiktra/wikt/translit/el-translit.lua
@@ -0,0 +1,55 @@
+local export = {}
+
+local tt = {["α"] = "a", ["ά"] = "á", ["β"] = "v", ["γ"] = "g", ["δ"] = "d", ["ε"] = "e", ["έ"] = "é", ["ζ"] = "z", ["η"] = "i", ["ή"] = "í", ["θ"] = "th", ["ι"] = "i", ["ί"] = "í", ["ϊ"] = "ï", ["ΐ"] = "ḯ", ["κ"] = "k", ["λ"] = "l", ["μ"] = "m", ["ν"] = "n", ["ξ"] = "x", ["ο"] = "o", ["ό"] = "ó", ["π"] = "p", ["ρ"] = "r", ["σ"] = "s", ["ς"] = "s", ["τ"] = "t", ["υ"] = "y", ["ύ"] = "ý", ["ϋ"] = "ÿ", ["ΰ"] = "ÿ́", ["φ"] = "f", ["χ"] = "ch", ["ψ"] = "ps", ["ω"] = "o", ["ώ"] = "ó", ["Α"] = "A", ["Ά"] = "Á", ["Β"] = "V", ["Γ"] = "G", ["Δ"] = "D", ["Ε"] = "E", ["Έ"] = "É", ["Ζ"] = "Z", ["Η"] = "I", ["Ή"] = "Í", ["Θ"] = "Th", ["Ι"] = "I", ["Ί"] = "Í", ["Κ"] = "K", ["Λ"] = "L", ["Μ"] = "M", ["Ν"] = "N", ["Ξ"] = "X", ["Ο"] = "O", ["Ό"] = "Ó", ["Π"] = "P", ["Ρ"] = "R", ["Σ"] = "S", ["Τ"] = "T", ["Υ"] = "Y", ["Ύ"] = "Ý", ["Φ"] = "F", ["Χ"] = "Ch", ["Ψ"] = "Ps", ["Ω"] = "O", ["Ώ"] = "Ó", ["·"] = ";"}
+
+-- transliterates any words or phrases
+function export.tr(text, lang, sc)
+    local gsub = mw.ustring.gsub
+    local U = mw.ustring.char
+    local acute = mw.ustring.char(0x301)
+    local diaeresis = mw.ustring.char(0x308)
+
+    text = gsub(text, "([^A-Za-z0-9])[;" .. U(0x37E) .. "]", "%1?")
+
+    text = gsub(text, "([αεηΑΕΗ])([υύ])(.?)", function(vowel, upsilon, following) return tt[vowel] .. (upsilon == "ύ" and acute or "") .. (("θκξπσςτφχψ"):find(following) and "f" or "v") .. following end)
+
+    text = gsub(text, "([αεοωΑΕΟΩ])([ηή])", function(vowel, ita)
+        if ita == "ή" then
+            return tt[vowel] .. "i" .. diaeresis .. acute
+        else
+            return tt[vowel] .. "i" .. diaeresis
+        end
+    end)
+
+    text = gsub(text, "[ωΩ][ιί]", {["ωι"] = "oï", ["ωί"] = "oḯ", ["Ωι"] = "Oï", ["Ωί"] = "Oḯ"})
+
+    text = gsub(text, "[οΟ][υύ]", {["ου"] = "ou", ["ού"] = "oú", ["Ου"] = "Ou", ["Ού"] = "Oú"})
+
+    text = gsub(text, "(.?)([μΜ])π", function(before, mi)
+        if before == "" or before == " " or before == "-" then
+            if mi == "Μ" then
+                return before .. "B"
+            else
+                return before .. "b"
+            end
+        end
+    end)
+
+    text = gsub(text, "(.?)([νΝ])τ", function(before, ni)
+        if before == "" or before == " " or before == "-" then
+            if ni == "Ν" then
+                return before .. "D"
+            else
+                return before .. "d"
+            end
+        end
+    end)
+
+    text = gsub(text, "γ([γξχ])", "n%1")
+
+    text = gsub(text, ".", tt)
+
+    return text
+end
+
+return export
diff --git a/wiktra/wikt/translit/en-ta-translit.lua b/wiktra/wikt/translit/en-ta-translit.lua
new file mode 100644
index 0000000..50a22f4
--- /dev/null
+++ b/wiktra/wikt/translit/en-ta-translit.lua
@@ -0,0 +1,25 @@
+local export = {}
+
+local consonants = {["k"] = "க", ["g"] = "க", ["ṅ"] = "ங", ["c"] = "ச", ["ñ"] = "ஞ", ["ṭ"] = "ட", ["ḍ"] = "ட", ["ṛ"] = "ட", ["ṇ"] = "ண", ["t"] = "த", ["d"] = "த", ["n"] = "ந", ["p"] = "ப", ["b"] = "ப", ["m"] = "ம", ["y"] = "ய", ["r"] = "ர", ["l"] = "ல", ["v"] = "வ", ["ḻ"] = "ழ", ["ḷ"] = "ள", ["ṟ"] = "ற", ["ṉ"] = "ன", ["ś"] = "ஶ", ["j"] = "ஜ", ["ṣ"] = "ஷ", ["s"] = "ஸ", ["h"] = "ஹ", ["f"] = "ஃப", ["z"] = "ஃஜ", ["x"] = "ஃஸ"}
+
+local diacritics = {["ai"] = "ை", ["au"] = "ௌ", ["a"] = "", ["ā"] = "ா", ["i"] = "ி", ["ī"] = "ீ", ["u"] = "ு", ["ū"] = "ூ", ["e"] = "ெ", ["ē"] = "ே", ["o"] = "ொ", ["ō"] = "ோ"}
+
+local nonconsonants = {["au"] = "ஔ", ["ai"] = "ஐ", ["a"] = "அ", ["0"] = "௦", ["ā"] = "ஆ", ["i"] = "இ", ["ī"] = "ஈ", ["u"] = "உ", ["ū"] = "ஊ", ["e"] = "எ", ["ē"] = "ஏ", ["o"] = "ஒ", ["ō"] = "ஓ", ["ḥ"] = "ஃ", ["1"] = "௧", ["2"] = "௨", ["3"] = "௩", ["4"] = "௪", ["5"] = "௫", ["6"] = "௬", ["7"] = "௭", ["8"] = "௮", ["9"] = "௯"}
+
+-- translit any words or phrases
+function export.tr(text, lang, sc)
+    text = mw.ustring.gsub(text, "([kgṅcñṭḍṛṇtdnpbmyrlvḻḷṟṉśjṣshfzx])" .. "(a[iu]?)", function(c, d) if d ~= "" then return consonants[c] .. diacritics[d] end end)
+    text = mw.ustring.gsub(text, "([kgṅcñṭḍṛṇtdnpbmyrlvḻḷṟṉśjṣshfzx])" .. "([aāiīuūeēoō]?)", function(c, d)
+        if d ~= "" then
+            return consonants[c] .. diacritics[d]
+        else
+            return consonants[c] .. "்"
+        end
+    end)
+    text = mw.ustring.gsub(text, "(a[iu]?)", function(a) if a ~= "" then return nonconsonants[a] end end)
+    text = mw.ustring.gsub(text, ".", nonconsonants)
+
+    return text
+end
+
+return export
diff --git a/wiktra/wikt/translit/ethi-translit.lua b/wiktra/wikt/translit/ethi-translit.lua
new file mode 100644
index 0000000..acef964
--- /dev/null
+++ b/wiktra/wikt/translit/ethi-translit.lua
@@ -0,0 +1,384 @@
+local export = {}
+
+local tt = {
+    ["ሀ"] = "hä",
+    ["ሁ"] = "hu",
+    ["ሂ"] = "hi",
+    ["ሃ"] = "ha",
+    ["ሄ"] = "he",
+    ["ህ"] = "hə",
+    ["ሆ"] = "ho",
+    ["ለ"] = "lä",
+    ["ሉ"] = "lu",
+    ["ሊ"] = "li",
+    ["ላ"] = "la",
+    ["ሌ"] = "le",
+    ["ል"] = "lə",
+    ["ሎ"] = "lo",
+    ["ሏ"] = "lʷa",
+    ["ሐ"] = "ḥä",
+    ["ሑ"] = "ḥu",
+    ["ሒ"] = "ḥi",
+    ["ሓ"] = "ḥa",
+    ["ሔ"] = "ḥe",
+    ["ሕ"] = "ḥə",
+    ["ሖ"] = "ḥo",
+    ["ሗ"] = "ḥʷa",
+    ["መ"] = "mä",
+    ["ሙ"] = "mu",
+    ["ሚ"] = "mi",
+    ["ማ"] = "ma",
+    ["ሜ"] = "me",
+    ["ም"] = "mə",
+    ["ሞ"] = "mo",
+    ["ሟ"] = "mʷa",
+    ["ፙ"] = "mʲä",
+    ["ሠ"] = "śä",
+    ["ሡ"] = "śu",
+    ["ሢ"] = "śi",
+    ["ሣ"] = "śa",
+    ["ሤ"] = "śe",
+    ["ሥ"] = "śə",
+    ["ሦ"] = "śo",
+    ["ሧ"] = "śʷa",
+    ["ረ"] = "rä",
+    ["ሩ"] = "ru",
+    ["ሪ"] = "ri",
+    ["ራ"] = "ra",
+    ["ሬ"] = "re",
+    ["ር"] = "rə",
+    ["ሮ"] = "ro",
+    ["ሯ"] = "rʷa",
+    ["ፘ"] = "rʲä",
+    ["ሰ"] = "sä",
+    ["ሱ"] = "su",
+    ["ሲ"] = "si",
+    ["ሳ"] = "sa",
+    ["ሴ"] = "se",
+    ["ስ"] = "sə",
+    ["ሶ"] = "so",
+    ["ሷ"] = "sʷa",
+    ["ሸ"] = "šä",
+    ["ሹ"] = "šu",
+    ["ሺ"] = "ši",
+    ["ሻ"] = "ša",
+    ["ሼ"] = "še",
+    ["ሽ"] = "šə",
+    ["ሾ"] = "šo",
+    ["ሿ"] = "šʷa",
+    ["ቀ"] = "ḳä",
+    ["ቁ"] = "ḳu",
+    ["ቂ"] = "ḳi",
+    ["ቃ"] = "ḳa",
+    ["ቄ"] = "ḳe",
+    ["ቅ"] = "ḳə",
+    ["ቆ"] = "ḳo",
+    ["ቈ"] = "ḳʷä",
+    ["ቊ"] = "ḳʷi",
+    ["ቋ"] = "ḳʷa",
+    ["ቌ"] = "ḳʷe",
+    ["ቍ"] = "ḳʷə",
+    ["ቐ"] = "ḳʰä",
+    ["ቑ"] = "ḳʰu",
+    ["ቒ"] = "ḳʰi",
+    ["ቓ"] = "ḳʰa",
+    ["ቔ"] = "ḳʰe",
+    ["ቕ"] = "ḳʰə",
+    ["ቖ"] = "ḳʰo",
+    ["ቘ"] = "ḳʰʷä",
+    ["ቚ"] = "ḳʰʷi",
+    ["ቛ"] = "ḳʰʷa",
+    ["ቜ"] = "ḳʰʷe",
+    ["ቝ"] = "ḳʰʷə",
+    ["በ"] = "bä",
+    ["ቡ"] = "bu",
+    ["ቢ"] = "bi",
+    ["ባ"] = "ba",
+    ["ቤ"] = "be",
+    ["ብ"] = "bə",
+    ["ቦ"] = "bo",
+    ["ቧ"] = "bʷa",
+    ["ቨ"] = "vä",
+    ["ቩ"] = "vu",
+    ["ቪ"] = "vi",
+    ["ቫ"] = "va",
+    ["ቬ"] = "ve",
+    ["ቭ"] = "və",
+    ["ቮ"] = "vo",
+    ["ቯ"] = "vʷa",
+    ["ተ"] = "tä",
+    ["ቱ"] = "tu",
+    ["ቲ"] = "ti",
+    ["ታ"] = "ta",
+    ["ቴ"] = "te",
+    ["ት"] = "tə",
+    ["ቶ"] = "to",
+    ["ቷ"] = "tʷa",
+    ["ቸ"] = "čä",
+    ["ቹ"] = "ču",
+    ["ቺ"] = "či",
+    ["ቻ"] = "ča",
+    ["ቼ"] = "če",
+    ["ች"] = "čə",
+    ["ቾ"] = "čo",
+    ["ቿ"] = "čʷa",
+    ["ኀ"] = "ḫä",
+    ["ኁ"] = "ḫu",
+    ["ኂ"] = "ḫi",
+    ["ኃ"] = "ḫa",
+    ["ኄ"] = "ḫe",
+    ["ኅ"] = "ḫə",
+    ["ኆ"] = "ḫo",
+    ["ኈ"] = "ḫʷä",
+    ["ኊ"] = "ḫʷi",
+    ["ኋ"] = "ḫʷa",
+    ["ኌ"] = "ḫʷe",
+    ["ኍ"] = "ḫʷə",
+    ["ነ"] = "nä",
+    ["ኑ"] = "nu",
+    ["ኒ"] = "ni",
+    ["ና"] = "na",
+    ["ኔ"] = "ne",
+    ["ን"] = "nə",
+    ["ኖ"] = "no",
+    ["ኗ"] = "nʷa",
+    ["ኘ"] = "ñä",
+    ["ኙ"] = "ñu",
+    ["ኚ"] = "ñi",
+    ["ኛ"] = "ña",
+    ["ኜ"] = "ñe",
+    ["ኝ"] = "ñə",
+    ["ኞ"] = "ño",
+    ["ኟ"] = "ñʷa",
+    ["አ"] = "ʾä",
+    ["ኡ"] = "ʾu",
+    ["ኢ"] = "ʾi",
+    ["ኣ"] = "ʾa",
+    ["ኤ"] = "ʾe",
+    ["እ"] = "ʾə",
+    ["ኦ"] = "ʾo",
+    ["ኧ"] = "ʾʷa",
+    ["ከ"] = "kä",
+    ["ኩ"] = "ku",
+    ["ኪ"] = "ki",
+    ["ካ"] = "ka",
+    ["ኬ"] = "ke",
+    ["ክ"] = "kə",
+    ["ኮ"] = "ko",
+    ["ኰ"] = "kʷä",
+    ["ኲ"] = "kʷi",
+    ["ኳ"] = "kʷa",
+    ["ኴ"] = "kʷe",
+    ["ኵ"] = "kʷə",
+    ["ኸ"] = "xä",
+    ["ኹ"] = "xu",
+    ["ኺ"] = "xi",
+    ["ኻ"] = "xa",
+    ["ኼ"] = "xe",
+    ["ኽ"] = "xə",
+    ["ኾ"] = "xo",
+    ["ዅ"] = "xʷə",
+    ["ዀ"] = "xʷä",
+    ["ዂ"] = "xʷi",
+    ["ዃ"] = "xʷa",
+    ["ዄ"] = "xʷe",
+    ["ወ"] = "wä",
+    ["ዉ"] = "wu",
+    ["ዊ"] = "wi",
+    ["ዋ"] = "wa",
+    ["ዌ"] = "we",
+    ["ው"] = "wə",
+    ["ዎ"] = "wo",
+    ["ዐ"] = "ʿä",
+    ["ዑ"] = "ʿu",
+    ["ዒ"] = "ʿi",
+    ["ዓ"] = "ʿa",
+    ["ዔ"] = "ʿe",
+    ["ዕ"] = "ʿə",
+    ["ዖ"] = "ʿo",
+    ["ዘ"] = "zä",
+    ["ዙ"] = "zu",
+    ["ዚ"] = "zi",
+    ["ዛ"] = "za",
+    ["ዜ"] = "ze",
+    ["ዝ"] = "zə",
+    ["ዞ"] = "zo",
+    ["ዟ"] = "zʷa",
+    ["ዠ"] = "žä",
+    ["ዡ"] = "žu",
+    ["ዢ"] = "ži",
+    ["ዣ"] = "ža",
+    ["ዤ"] = "že",
+    ["ዥ"] = "žə",
+    ["ዦ"] = "žo",
+    ["ዧ"] = "žʷa",
+    ["የ"] = "yä",
+    ["ዩ"] = "yu",
+    ["ዪ"] = "yi",
+    ["ያ"] = "ya",
+    ["ዬ"] = "ye",
+    ["ይ"] = "yə",
+    ["ዮ"] = "yo",
+    ["ደ"] = "dä",
+    ["ዱ"] = "du",
+    ["ዲ"] = "di",
+    ["ዳ"] = "da",
+    ["ዴ"] = "de",
+    ["ድ"] = "də",
+    ["ዶ"] = "do",
+    ["ዷ"] = "dʷa",
+    ["ጀ"] = "ǧä",
+    ["ጁ"] = "ǧu",
+    ["ጂ"] = "ǧi",
+    ["ጃ"] = "ǧa",
+    ["ጄ"] = "ǧe",
+    ["ጅ"] = "ǧə",
+    ["ጆ"] = "ǧo",
+    ["ጇ"] = "ǧʷa",
+    ["ገ"] = "gä",
+    ["ጉ"] = "gu",
+    ["ጊ"] = "gi",
+    ["ጋ"] = "ga",
+    ["ጌ"] = "ge",
+    ["ግ"] = "gə",
+    ["ጎ"] = "go",
+    ["ጐ"] = "gʷä",
+    ["ጒ"] = "gʷi",
+    ["ጓ"] = "gʷa",
+    ["ጔ"] = "gʷe",
+    ["ጕ"] = "gʷə",
+    ["ጘ"] = "ŋä",
+    ["ጙ"] = "ŋu",
+    ["ጚ"] = "ŋi",
+    ["ጛ"] = "ŋa",
+    ["ጜ"] = "ŋe",
+    ["ጝ"] = "ŋə",
+    ["ጞ"] = "ŋo",
+    ["ⶓ"] = "ŋʷä",
+    ["ⶔ"] = "ŋʷi",
+    ["ጟ"] = "ŋʷa",
+    ["ⶕ"] = "ŋʷe",
+    ["ⶖ"] = "ŋʷə",
+    ["ጠ"] = "ṭä",
+    ["ጡ"] = "ṭu",
+    ["ጢ"] = "ṭi",
+    ["ጣ"] = "ṭa",
+    ["ጤ"] = "ṭe",
+    ["ጥ"] = "ṭə",
+    ["ጦ"] = "ṭo",
+    ["ጧ"] = "ṭʷa",
+    ["ጨ"] = "č̣ä",
+    ["ጩ"] = "č̣u",
+    ["ጪ"] = "č̣i",
+    ["ጫ"] = "č̣a",
+    ["ጬ"] = "č̣e",
+    ["ጭ"] = "č̣ə",
+    ["ጮ"] = "č̣o",
+    ["ጯ"] = "č̣ʷa",
+    ["ጰ"] = "p̣ä",
+    ["ጱ"] = "p̣u",
+    ["ጲ"] = "p̣i",
+    ["ጳ"] = "p̣a",
+    ["ጴ"] = "p̣e",
+    ["ጵ"] = "p̣ə",
+    ["ጶ"] = "p̣o",
+    ["ጷ"] = "p̣ʷa",
+    ["ጸ"] = "ṣä",
+    ["ጹ"] = "ṣu",
+    ["ጺ"] = "ṣi",
+    ["ጻ"] = "ṣa",
+    ["ጼ"] = "ṣe",
+    ["ጽ"] = "ṣə",
+    ["ጾ"] = "ṣo",
+    ["ጿ"] = "ṣʷa",
+    ["ፀ"] = "ṣ́ä",
+    ["ፁ"] = "ṣ́u",
+    ["ፂ"] = "ṣ́i",
+    ["ፃ"] = "ṣ́a",
+    ["ፄ"] = "ṣ́e",
+    ["ፅ"] = "ṣ́ə",
+    ["ፆ"] = "ṣ́o",
+    ["ፈ"] = "fä",
+    ["ፉ"] = "fu",
+    ["ፊ"] = "fi",
+    ["ፋ"] = "fa",
+    ["ፌ"] = "fe",
+    ["ፍ"] = "fə",
+    ["ፎ"] = "fo",
+    ["ፏ"] = "fʷa",
+    ["ፚ"] = "fʲä",
+    ["ፐ"] = "pä",
+    ["ፑ"] = "pu",
+    ["ፒ"] = "pi",
+    ["ፓ"] = "pa",
+    ["ፔ"] = "pe",
+    ["ፕ"] = "pə",
+    ["ፖ"] = "po",
+    ["ፗ"] = "pʷa",
+    -- punctuation
+    ["፠"] = "§",
+    ["፡"] = "-",
+    ["።"] = ".",
+    ["፣"] = ",",
+    ["፤"] = ";",
+    ["፥"] = ":",
+    ["፦"] = ":-",
+    ["፧"] = "?",
+    ["፨"] = "¶"
+}
+
+local number = {["፩"] = 1, ["፪"] = 2, ["፫"] = 3, ["፬"] = 4, ["፭"] = 5, ["፮"] = 6, ["፯"] = 7, ["፰"] = 8, ["፱"] = 9, ["፲"] = 10, ["፳"] = 20, ["፴"] = 30, ["፵"] = 40, ["፶"] = 50, ["፷"] = 60, ["፸"] = 70, ["፹"] = 80, ["፺"] = 90}
+
+-- This works on small numbers. Please improve if you find any problems.
+function export.number(geez)
+
+    local val = 0
+    if mw.ustring.find(geez, "^[፻፼]") then
+        geez = "፩" .. geez -- prepend 1
+    end
+    local func = mw.text.gsplit(geez, "")
+
+    while true do
+        local digit = func()
+        if (not digit) then break end
+        if mw.ustring.find(digit, "[፩-፺]") then
+            val = val + number[digit]
+        elseif digit == "፻" then
+            val = val * 100
+        elseif digit == "፼" then
+            val = val * 10000
+        end
+    end
+
+    return val
+
+end
+
+function export.tr(text, lang, sc)
+
+    text = (mw.ustring.gsub(text, ".", tt))
+
+    -- remove ə that is not in the first syllable
+    -- [bdfghklmnprstvwxyzñčŋśšžǧʾʿḥḫḳṣṭ][ʰʲʷ̣́]*
+    -- matches one transliterated consonant
+    local prev_end_pos
+    local prev_schwa_removed = false
+    local text_len = mw.ustring.len(text)
+    text = mw.ustring.gsub(text, "()(([bdfghklmnprstvwxyzñčŋśšžǧʾʿḥḫḳṣṭ][ʰʲʷ̣́]*)ə)()", function(start_pos, syllable, consonant, end_pos)
+        local ret
+        if (start_pos == 1 or mw.ustring.find(text, "^[%s%p]", start_pos - 1)) or (prev_end_pos == start_pos and prev_schwa_removed and not (end_pos == text_len + 1 or mw.ustring.find(text, "^[%s%p]", end_pos))) then
+            ret = syllable
+        else
+            ret = consonant
+        end
+        prev_schwa_removed = ret == consonant
+        prev_end_pos = end_pos
+        return ret
+    end)
+    text = mw.ustring.gsub(text, "[፩-፼]+", export.number)
+
+    return text
+
+end
+
+return export
diff --git a/wiktra/wikt/translit/etymology languages.lua b/wiktra/wikt/translit/etymology languages.lua
new file mode 100644
index 0000000..afbf6e6
--- /dev/null
+++ b/wiktra/wikt/translit/etymology languages.lua	
@@ -0,0 +1,79 @@
+local export = {}
+
+local EtymologyLanguage = {}
+
+function EtymologyLanguage:getCode() return self._code end
+
+function EtymologyLanguage:getCanonicalName() return self._rawData.canonicalName end
+
+function EtymologyLanguage:getDisplayForm() return self:getCanonicalName() end
+
+function EtymologyLanguage:getOtherNames(onlyOtherNames) return require("language-like").getOtherNames(self, onlyOtherNames) end
+
+function EtymologyLanguage:getAliases() return self._rawData.aliases or {} end
+
+function EtymologyLanguage:getVarieties(flatten) return require("language-like").getVarieties(self, flatten) end
+
+-- function EtymologyLanguage:getAllNames()
+--	return self._rawData.names
+-- end
+
+function EtymologyLanguage:getCategoryName(nocap)
+    local name = self:getCanonicalName()
+    if not nocap then name = mw.getContentLanguage():ucfirst(name) end
+    return name
+end
+
+function EtymologyLanguage:makeCategoryLink() return "[[:Category:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]" end
+
+function EtymologyLanguage:getType() return "etymology language" end
+
+function EtymologyLanguage:getParentCode() return self._rawData.parent end
+
+function EtymologyLanguage:getAncestors()
+    if not self._ancestorObjects then
+        self._ancestorObjects = {}
+
+        for _, ancestor in ipairs(self._rawData.ancestors or {}) do table.insert(self._ancestorObjects, export.getByCode(ancestor) or require("languages").getByCode(ancestor)) end
+    end
+
+    return self._ancestorObjects
+end
+
+function EtymologyLanguage:getWikidataItem()
+    local item = self._rawData.wikidata_item
+
+    if type(item) == "number" then
+        return "Q" .. item
+    else
+        return item
+    end
+end
+
+function EtymologyLanguage:getWikipediaArticle() return self._rawData.wikipedia_article or (self:getWikidataItem() and mw.wikibase and mw.wikibase.sitelink(self:getWikidataItem(), "enwiki")) or self._rawData.canonicalName end
+
+function EtymologyLanguage:makeWikipediaLink() return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]" end
+
+function EtymologyLanguage:toJSON()
+    local ret = {canonicalName = self:getCanonicalName(), categoryName = self:getCategoryName("nocap"), code = self._code, otherNames = self:getOtherNames(true), aliases = self:getAliases(), varieties = self:getVarieties(), parent = self._rawData.parent, type = self:getType()}
+
+    return require("JSON").toJSON(ret)
+end
+
+function EtymologyLanguage:getRawData() return self._rawData end
+
+EtymologyLanguage.__index = EtymologyLanguage
+
+function export.makeObject(code, data) return data and setmetatable({_rawData = data, _code = code}, EtymologyLanguage) or nil end
+
+function export.getByCode(code) return export.makeObject(code, mw.loadData("etymology languages/data")[code]) end
+
+function export.getByCanonicalName(name)
+    local code = mw.loadData("etymology languages/by name")[name]
+
+    if not code then return nil end
+
+    return export.makeObject(code, mw.loadData("etymology languages/data")[code])
+end
+
+return export
diff --git a/wiktra/wikt/translit/etymology languages/by name.lua b/wiktra/wikt/translit/etymology languages/by name.lua
new file mode 100644
index 0000000..4f6adc2
--- /dev/null
+++ b/wiktra/wikt/translit/etymology languages/by name.lua	
@@ -0,0 +1,5 @@
+local export = {}
+
+for code, data in pairs(mw.loadData("etymology languages/data")) do export[data.canonicalName] = code end
+
+return export
diff --git a/wiktra/wikt/translit/etymology languages/data.lua b/wiktra/wikt/translit/etymology languages/data.lua
new file mode 100644
index 0000000..0c7c69d
--- /dev/null
+++ b/wiktra/wikt/translit/etymology languages/data.lua	
@@ -0,0 +1,1038 @@
+local m = {}
+
+-- Albanian varieties
+
+m["aln"] = {canonicalName = "Gheg Albanian", aliases = {"Gheg"}, parent = "sq", wikidata_item = 181037}
+
+m["aae"] = {canonicalName = "Arbëresh Albanian", aliases = {"Arbëreshë", "Arbëresh"}, parent = "sq", wikidata_item = 1075302}
+
+m["aat"] = {canonicalName = "Arvanitika Albanian", aliases = {"Arvanitika"}, parent = "sq", wikidata_item = 29347}
+
+m["als"] = {canonicalName = "Tosk Albanian", aliases = {"Tosk"}, parent = "sq", wikidata_item = 180937}
+
+-- Bantu varieties
+
+m["bnt-cmn"] = {canonicalName = "Common Bantu", parent = "bnt-pro"}
+
+-- Semitic varieties
+
+-- Akkadian varieties
+
+m["akk-old"] = {canonicalName = "Old Akkadian", parent = "akk"}
+
+m["akk-obb"] = {canonicalName = "Old Babylonian", parent = "akk"}
+
+m["akk-oas"] = {canonicalName = "Old Assyrian", parent = "akk"}
+
+m["akk-mbb"] = {canonicalName = "Middle Babylonian", parent = "akk"}
+
+m["akk-mas"] = {canonicalName = "Middle Assyrian", parent = "akk"}
+
+m["akk-nbb"] = {canonicalName = "Neo-Babylonian", parent = "akk"}
+
+m["akk-nas"] = {canonicalName = "Neo-Assyrian", parent = "akk"}
+
+m["akk-lbb"] = {canonicalName = "Late Babylonian", parent = "akk"}
+
+-- Aramaic varieties
+
+m["arc-bib"] = {canonicalName = "Biblical Aramaic", parent = "arc", wikidata_item = 843235}
+
+m["arc-cpa"] = {canonicalName = "Christian Palestinian Aramaic", aliases = {"Melkite Aramaic", "Palestinian Syriac", "Syropalestinian Aramaic"}, parent = "arc", wikidata_item = 60790119}
+
+m["arc-imp"] = {canonicalName = "Imperial Aramaic", aliases = {"Official Aramaic"}, parent = "arc", wikidata_item = 7079491}
+
+m["arc-hat"] = {canonicalName = "Hatran Aramaic", parent = "arc", wikidata_item = 3832926}
+
+m["arc-jla"] = {canonicalName = "Jewish Literary Aramaic", parent = "arc", wikidata_item = 105952842}
+
+m["arc-nab"] = {canonicalName = "Nabataean Aramaic", parent = "arc", wikidata_item = 36178}
+
+m["arc-old"] = {canonicalName = "Old Aramaic", parent = "arc", wikidata_item = 3398392}
+
+m["arc-pal"] = {canonicalName = "Palmyrene Aramaic", parent = "arc", wikidata_item = 1510113}
+
+m["tmr"] = {canonicalName = "Jewish Babylonian Aramaic", parent = "arc", wikidata_item = 33407}
+
+m["jpa"] = {canonicalName = "Jewish Palestinian Aramaic", aliases = {"Galilean Aramaic"}, parent = "arc", wikidata_item = 948909}
+
+-- Catalan varieties
+
+m["ca-val"] = {canonicalName = "Valencian", parent = "ca", wikidata_item = 32641}
+
+-- Central Nicobarese varieties
+
+m["ncb-cam"] = {canonicalName = "Camorta", aliases = {"Kamorta"}, parent = "ncb", wikidata_item = 5026908}
+
+m["ncb-kat"] = {canonicalName = "Katchal", aliases = {"Tehnu"}, parent = "ncb", wikidata_item = 17064263}
+
+m["ncb-nan"] = {canonicalName = "Nancowry", aliases = {"Nankwari"}, parent = "ncb", wikidata_item = 6962504}
+
+-- Chinese varieties
+
+m["och-ear"] = {canonicalName = "Early Old Chinese", parent = "och"}
+
+m["och-lat"] = {canonicalName = "Late Old Chinese", parent = "och"}
+
+m["ltc-ear"] = {canonicalName = "Early Middle Chinese", parent = "ltc"}
+
+m["ltc-lat"] = {canonicalName = "Late Middle Chinese", parent = "ltc"}
+
+m["cmn-ear"] = {canonicalName = "Early Mandarin", parent = "cmn"}
+
+m["wuu-sha"] = {canonicalName = "Shanghainese", parent = "wuu", wikidata_item = 36718}
+
+m["hsn-old"] = {canonicalName = "Old Xiang", aliases = {"Lou-Shao"}, parent = "hsn", wikidata_item = 7085453}
+
+m["zhx-pin"] = {canonicalName = "Pinghua", parent = "yue", wikidata_item = 2735715}
+
+m["nan-hai"] = {canonicalName = "Hainanese", parent = "nan", wikidata_item = 934541}
+
+m["nan-hok"] = {canonicalName = "Hokkien", parent = "nan", wikidata_item = 1624231}
+
+m["nan-xm"] = {canonicalName = "Amoy", aliases = {"Xiamenese"}, parent = "nan-hok", wikidata_item = 68744}
+
+m["nan-zz"] = {canonicalName = "Zhangzhou Hokkien", parent = "nan-hok"}
+
+m["nan-qz"] = {canonicalName = "Quanzhou Hokkien", parent = "nan-hok"}
+
+m["nan-jj"] = {canonicalName = "Jinjiang Hokkien", parent = "nan-qz"}
+
+m["nan-ph"] = {canonicalName = "Philippine Hokkien", parent = "nan-jj", wikidata_item = 3236692}
+
+-- Coptic varieties
+
+m["cop-akh"] = {canonicalName = "Akhmimic Coptic", aliases = {"Akhmimic"}, parent = "cop"}
+
+m["cop-boh"] = {canonicalName = "Bohairic Coptic", aliases = {"Bohairic", "Memphitic Coptic", "Memphitic"}, parent = "cop", wikidata_item = 890733}
+
+m["cop-ggg"] = {canonicalName = "Coptic Dialect G", aliases = {"Dialect G", "Mansuric Coptic", "Mansuric"}, parent = "cop"}
+
+m["cop-jjj"] = {canonicalName = "Coptic Dialect J", parent = "cop"}
+
+m["cop-kkk"] = {canonicalName = "Coptic Dialect K", parent = "cop"}
+
+m["cop-ppp"] = {canonicalName = "Coptic Dialect P", aliases = {"Proto-Theban Coptic", "Palaeo-Theban Coptic"}, parent = "cop"}
+
+m["cop-fay"] = {canonicalName = "Fayyumic Coptic", aliases = {"Fayyumic", "Faiyumic Coptic", "Faiyumic", "Fayumic Coptic", "Fayumic", "Bashmuric Coptic", "Bashmuric"}, parent = "cop", wikidata_item = 1399115}
+
+m["cop-her"] = {canonicalName = "Hermopolitan Coptic", aliases = {"Hermopolitan", "Coptic Dialect H", "Ashmuninic", "Ashmuninic Coptic"}, parent = "cop"}
+
+m["cop-lyc"] = {canonicalName = "Lycopolitan Coptic", aliases = {"Lycopolitan", "Assiutic Coptic", "Asyutic Coptic", "Assiutic", "Asyutic", "Lyco-Diospolitan Coptic", "Lyco-Diospolitan", "Subakhmimic Coptic", "Subakhmimic"}, parent = "cop"}
+
+m["cop-old"] = {canonicalName = "Old Coptic", parent = "cop"}
+
+m["cop-oxy"] = {canonicalName = "Oxyrhynchite Coptic", aliases = {"Oxyrhynchite", "Mesokemic Coptic", "Mesokemic", "Middle Egyptian Coptic"}, parent = "cop"}
+
+m["cop-ply"] = {canonicalName = "Proto-Lycopolitan Coptic", aliases = {"Coptic Dialect i", "Proto-Lyco-Diospolitan Coptic"}, parent = "cop"}
+
+m["cop-sah"] = {canonicalName = "Sahidic Coptic", aliases = {"Sahidic", "Saidic Coptic", "Saidic", "Thebaic Coptic", "Thebaic"}, parent = "cop", wikidata_item = 2645851}
+
+-- Egyptian varieties
+
+m["egy-old"] = {canonicalName = "Old Egyptian", parent = "egy", wikidata_item = 447117}
+
+m["egy-mid"] = {canonicalName = "Middle Egyptian", aliases = {"Classical Egyptian"}, parent = "egy", wikidata_item = 657330}
+
+m["egy-nmi"] = {canonicalName = "Neo-Middle Egyptian", aliases = {"Égyptien de tradition", "Traditional Egyptian"}, parent = "egy"}
+
+m["egy-lat"] = {canonicalName = "Late Egyptian", parent = "egy", wikidata_item = 1852329}
+
+-- Elamite varieties
+
+m["elx-old"] = {canonicalName = "Old Elamite", parent = "elx"}
+
+m["elx-mid"] = {canonicalName = "Middle Elamite", parent = "elx"}
+
+m["elx-neo"] = {canonicalName = "Neo-Elamite", parent = "elx"}
+
+m["elx-ach"] = {canonicalName = "Achaemenid Elamite", parent = "elx"}
+
+-- English, Scots and Old English varieties
+
+m["en-GB"] = {canonicalName = "British English", parent = "en", wikidata_item = 7979}
+m["British English"] = m["en-GB"]
+m["BE."] = m["en-GB"]
+
+m["en-US"] = {canonicalName = "American English", parent = "en", wikidata_item = 7976}
+m["American English"] = m["en-US"]
+m["AE."] = m["en-US"]
+
+m["en-geo"] = {canonicalName = "Geordie English", parent = "en"}
+
+-- Scots varieties
+
+m["sco-osc"] = {canonicalName = "Early Scots", parent = "enm", wikidata_item = 5326738}
+m["Early Scots"] = m["sco-osc"]
+m["Old Scots"] = m["sco-osc"]
+m["O.Sc."] = m["sco-osc"]
+
+m["sco-smi"] = {canonicalName = "Middle Scots", parent = "sco-osc", wikidata_item = 3327000}
+m["Middle Scots"] = m["sco-smi"]
+m["Mid.Sc."] = m["sco-smi"]
+
+m["sco-ins"] = {canonicalName = "Insular Scots", parent = "sco", wikidata_item = 16919205}
+m["Insular Scots"] = m["sco-ins"]
+m["Ins.Sc."] = m["sco-ins"]
+
+m["sco-uls"] = {canonicalName = "Ulster Scots", parent = "sco", wikidata_item = 201966}
+m["Ulster Scots"] = m["sco-uls"]
+m["Uls.Sc."] = m["sco-uls"]
+
+m["sco-nor"] = {canonicalName = "Northern Scots", parent = "sco", wikidata_item = 16928150}
+m["Northern Scots"] = m["sco-nor"]
+m["Nor.Sc."] = m["sco-nor"]
+
+m["sco-sou"] = {canonicalName = "South Scots", parent = "sco", wikidata_item = 7570457}
+m["Southern Scots"] = m["sco-sou"]
+m["Borders Scots"] = m["sco-sou"]
+m["Sou.Sc."] = m["sco-sou"]
+
+-- Middle English varieties
+m["enm-nor"] = {canonicalName = "Northern Middle English", aliases = {"Northumbrian Middle English"}, parent = "enm"}
+
+-- Old English varieties
+
+-- Includes both Mercian and Northumbrian.
+m["ang-ang"] = {canonicalName = "Anglian Old English", parent = "ang"}
+
+m["ang-mer"] = {canonicalName = "Mercian Old English", parent = "ang"}
+
+m["ang-nor"] = {canonicalName = "Northumbrian Old English", parent = "ang", wikidata_item = 1798915}
+
+--[[
+m["ang-wsx"] = {
+	canonicalName = "West Saxon Old English",
+	parent = "ang",
+}
+]]
+
+-- French and Norman varieties
+
+m["fro-nor"] = {canonicalName = "Old Northern French", aliases = {"Old Norman", "Old Norman French"}, parent = "fro", wikidata_item = 2044917}
+m["Old Northern French"] = m["fro-nor"]
+m["ONF."] = m["fro-nor"]
+
+m["fro-pic"] = {canonicalName = "Picard Old French", parent = "fro"}
+
+m["xno"] = {canonicalName = "Anglo-Norman", parent = "fro", wikidata_item = 35214}
+
+m["fr-CA"] = {canonicalName = "Canadian French", parent = "fr", wikidata_item = 1450506}
+m["Canadian French"] = m["fr-CA"]
+m["CF."] = m["fr-CA"]
+
+m["fr-CH"] = {canonicalName = "Switzerland French", parent = "fr", wikidata_item = 1480152}
+m["Swiss French"] = m["fr-CH"]
+m["Switzerland French"] = m["fr-CH"]
+
+m["fr-aca"] = {canonicalName = "Acadian French", parent = "fr", wikidata_item = 415109}
+m["Acadian French"] = m["fr-aca"]
+m["fra-aca"] = m["fr-aca"]
+
+m["frc"] = {canonicalName = "Cajun French", aliases = {"Louisiana French"}, parent = "fr", wikidata_item = 880301}
+
+-- Norman varieties
+
+m["roa-grn"] = {canonicalName = "Guernsey Norman", aliases = {"Guernsey"}, parent = "nrf", wikidata_item = 56428}
+m["nrf-grn"] = m["roa-grn"]
+
+m["roa-jer"] = {canonicalName = "Jersey Norman", aliases = {"Jersey"}, parent = "nrf", wikidata_item = 56430}
+m["nrf-jer"] = m["roa-jer"]
+
+-- Brythonic
+
+m["bry-ear"] = {canonicalName = "Early Brythonic", parent = "cel-bry-pro"}
+
+m["bry-lat"] = {canonicalName = "Late Brythonic", parent = "cel-bry-pro"}
+
+-- Gaulish
+
+m["xcg"] = {canonicalName = "Cisalpine Gaulish", parent = "cel-gau", wikidata_item = 3832927}
+
+m["xtg"] = {canonicalName = "Transalpine Gaulish", parent = "cel-gau", wikidata_item = 29977}
+
+-- Portuguese varieties
+
+m["pt-BR"] = {canonicalName = "Brazilian Portuguese", parent = "pt", wikidata_item = 750553}
+
+-- Spanish varieties
+
+m["es-AR"] = {canonicalName = "Rioplatense Spanish", parent = "es", wikidata_item = 509780}
+
+m["es-MX"] = {canonicalName = "Mexican Spanish", parent = "es", wikidata_item = 616620}
+
+m["es-US"] = {canonicalName = "United States Spanish", aliases = {"US Spanish"}, parent = "es", wikidata_item = 2301077}
+-- use label "US Spanish" to put Spanish terms in this category
+
+m["es-PR"] = {canonicalName = "Puerto Rican Spanish", parent = "es", wikidata_item = 7258609}
+
+-- Germanic varieties
+-- (modern) German varieties
+
+m["de-AT"] = {canonicalName = "Austrian German", parent = "de", wikidata_item = 306626}
+m["Austrian German"] = m["de-AT"]
+m["AG."] = m["de-AT"]
+
+m["de-AT-vie"] = {canonicalName = "Viennese German", parent = "de-AT", wikidata_item = 56474}
+m["Viennese German"] = m["de-AT-vie"]
+m["VG."] = m["de-AT-vie"]
+
+m["de-CH"] = {canonicalName = "Swiss High German", aliases = {"Schweizer Hochdeutsch", "Swiss Standard German"}, parent = "de", wikidata_item = 1366643}
+
+m["ksh"] = {canonicalName = "Kölsch", parent = "gmw-cfr", wikidata_item = 4624}
+m["Kölsch"] = m["ksh"]
+
+m["pfl"] = {canonicalName = "Palatine German", aliases = {"Pfälzisch", "Pälzisch", "Palatinate German"}, parent = "gmw-rfr", wikidata_item = 23014}
+
+m["sli"] = {canonicalName = "Silesian German", aliases = {"Silesian"}, parent = "gmw-ecg", wikidata_item = 152965}
+
+m["sxu"] = {canonicalName = "Upper Saxon", parent = "gmw-ecg", wikidata_item = 699284}
+
+-- Old High German varieties
+
+m["lng"] = {canonicalName = "Lombardic", parent = "goh", wikidata_item = 35972}
+m["Lombardic"] = m["lng"]
+m["goh-lng"] = m["lng"]
+
+-- Proto-West Germanic varieties
+
+m["frk"] = {canonicalName = "Frankish", aliases = {"Old Frankish"}, parent = "gmw-pro", wikidata_item = 10860505}
+
+-- Old Norse varieties
+
+m["non-oen"] = {canonicalName = "Old East Norse", parent = "non", wikidata_item = 10498031}
+
+m["non-ogt"] = {canonicalName = "Old Gutnish", aliases = {"Old Gotlandic"}, parent = "non", wikidata_item = 1133488}
+
+m["non-own"] = {canonicalName = "Old West Norse", parent = "non", wikidata_item = 10498026}
+
+-- Greek varieties
+
+m["qfa-sub-grc"] = {canonicalName = "Pre-Greek", parent = "qfa-sub", wikidata_item = 965052}
+m["pregrc"] = m["qfa-sub-grc"]
+
+m["grc-boi"] = {canonicalName = "Boeotian Greek", parent = "grc-aeo", wikidata_item = 406373}
+
+m["grc-koi"] = {canonicalName = "Koine Greek", aliases = {"Hellenistic Greek"}, parent = "grc", wikidata_item = 107358}
+m["Koine"] = m["grc-koi"]
+
+m["gkm"] = {canonicalName = "Byzantine Greek", aliases = {"Medieval Greek"}, parent = "grc", wikidata_item = 36387}
+m["Medieval Greek"] = m["gkm"]
+
+m["grc-dor"] = {canonicalName = "Doric Greek", parent = "grc", wikidata_item = 285494}
+
+m["grc-att"] = {canonicalName = "Attic Greek", parent = "grc", wikidata_item = 506588}
+
+m["grc-ion"] = {canonicalName = "Ionic Greek", parent = "grc", wikidata_item = 504165}
+
+m["grc-pam"] = {canonicalName = "Pamphylian Greek", parent = "grc", wikidata_item = 2271793}
+
+m["grc-kre"] = { -- code used elsewhere: see [[Module:grc:Dialects]]
+    canonicalName = "Cretan Ancient Greek", -- to distinguish from Cretan Greek below
+    parent = "grc-dor"
+}
+
+m["grc-arp"] = {canonicalName = "Arcadocypriot Greek", parent = "grc", wikidata_item = 499602}
+
+m["grc-arc"] = {canonicalName = "Arcadian Greek", parent = "grc-arp"}
+
+m["el-cyp"] = {canonicalName = "Cypriot Greek", aliases = {"Cypriotic Greek"}, parent = "el"}
+
+m["el-pap"] = {canonicalName = "Paphian Greek", parent = "el"}
+
+m["grc-aeo"] = {canonicalName = "Aeolic Greek", aliases = {"Lesbic Greek", "Lesbian Greek", "Aeolian Greek"}, parent = "grc", wikidata_item = 406373}
+
+m["loc-ozo"] = {canonicalName = "Ozolian Locrian", parent = "grc"}
+
+m["loc-opu"] = {canonicalName = "Opuntian Locrian", parent = "grc"}
+
+m["grc-ths"] = {canonicalName = "Thessalian Greek", parent = "grc-aeo", wikidata_item = 406373}
+
+m["grc-ela"] = {canonicalName = "Elean Greek", parent = "grc"}
+
+m["grc-epc"] = {canonicalName = "Epic Greek", aliases = {"Homeric Greek"}, parent = "grc", wikidata_item = 990062}
+
+m["el-crt"] = {canonicalName = "Cretan Greek", parent = "el", wikidata_item = 588306}
+
+-- Hebrew varieties
+
+m["hbo"] = {canonicalName = "Biblical Hebrew", aliases = {"Classical Hebrew"}, parent = "he", wikidata_item = 1982248}
+
+m["he-mis"] = {canonicalName = "Mishnaic Hebrew", parent = "he", wikidata_item = 1649362}
+
+m["he-med"] = {canonicalName = "Medieval Hebrew", parent = "he", wikidata_item = 2712572}
+
+m["he-IL"] = {canonicalName = "Modern Israeli Hebrew", parent = "he", wikidata_item = 8141}
+
+m["bsh-kat"] = {canonicalName = "Kativiri", aliases = {"Katə́viri"}, parent = "bsh", wikidata_item = 2605045}
+
+m["bsh-kam"] = {canonicalName = "Kamviri", aliases = {"Kamvíri"}, parent = "bsh", wikidata_item = 1193495}
+m["xvi"] = m["bsh-kam"]
+
+m["bsh-mum"] = {canonicalName = "Mumviri", aliases = {"Mumvíri"}, parent = "bsh"}
+
+-- Iranian varieties:
+
+m["qfa-sub-bma"] = {canonicalName = "the BMAC substrate", parent = "qfa-sub", wikidata_item = 1054850}
+
+-- Historical and current Iranian dialects
+
+m["ae-old"] = {canonicalName = "Old Avestan", aliases = {"Gathic Avestan"}, parent = "ae", wikidata_item = 29572}
+
+m["ae-yng"] = {canonicalName = "Younger Avestan", aliases = {"Young Avestan"}, parent = "ae-old"}
+
+m["bcc"] = {canonicalName = "Southern Balochi", aliases = {"Southern Baluchi"}, parent = "bal", wikidata_item = 33049}
+m["bal-sou"] = m["bcc"]
+
+m["bgp"] = {canonicalName = "Eastern Balochi", aliases = {"Eastern Baluchi"}, parent = "bal", wikidata_item = 33049}
+m["bal-eas"] = m["bgp"]
+
+m["bgn"] = {canonicalName = "Western Balochi", aliases = {"Western Baluchi"}, parent = "bal", wikidata_item = 33049}
+m["bal-wes"] = m["bgn"]
+
+m["bgn"] = {canonicalName = "Western Balochi", aliases = {"Western Baluchi"}, parent = "bal"}
+
+m["bsg-ban"] = {canonicalName = "Bandari", parent = "bsg"}
+
+m["bsg-bas"] = {canonicalName = "Bashkardi", parent = "bsg"}
+
+m["bsg-hor"] = {canonicalName = "Hormozi", parent = "bsg"}
+
+m["bsg-min"] = {canonicalName = "Minabi", parent = "bsg"}
+
+m["ira-mid"] = {canonicalName = "Middle Iranian", parent = "ira", wikidata_item = 33527}
+m["MIr."] = m["ira-mid"]
+
+m["ira-old"] = {canonicalName = "Old Iranian", parent = "ira", wikidata_item = 33527}
+m["OIr."] = m["ira-old"]
+
+m["kho-old"] = {canonicalName = "Old Khotanese", parent = "kho"}
+
+m["kho-lat"] = {canonicalName = "Late Khotanese", parent = "kho-old"}
+
+m["peo-ear"] = {canonicalName = "Early Old Persian", parent = "peo"}
+
+m["peo-lat"] = {canonicalName = "Late Old Persian", parent = "peo"}
+
+m["pal-ear"] = {canonicalName = "Early Middle Persian", parent = "pal"}
+
+m["pal-lat"] = {canonicalName = "Late Middle Persian", parent = "pal"}
+
+m["ps-nwe"] = {canonicalName = "Northwestern Pashto", parent = "ps"}
+
+m["ps-cgi"] = {canonicalName = "Central Ghilzay", parent = "ps-nwe"}
+
+m["ps-mah"] = {canonicalName = "Mahsudi", parent = "ps-nwe"}
+
+m["ps-nea"] = {canonicalName = "Northeastern Pashto", parent = "ps"}
+
+m["ps-afr"] = {canonicalName = "Afridi", parent = "ps-nea"}
+
+m["ps-bng"] = {canonicalName = "Bangash", parent = "ps-nea"}
+
+m["ps-xat"] = {canonicalName = "Khatak", parent = "ps-nea"}
+
+m["ps-pes"] = {canonicalName = "Peshawari", parent = "ps-nea"}
+
+m["ps-sea"] = {canonicalName = "Southeastern Pashto", parent = "ps"}
+
+m["ps-ban"] = {canonicalName = "Bannu", parent = "ps-sea"}
+
+m["ps-kak"] = {canonicalName = "Kakari", parent = "ps-sea"}
+
+m["ps-ser"] = {canonicalName = "Sher", parent = "ps-sea"}
+
+m["ps-waz"] = {canonicalName = "Waziri", parent = "ps-sea"}
+
+m["ps-swe"] = {canonicalName = "Southwestern Pashto", parent = "ps"}
+
+m["ps-kan"] = {canonicalName = "Kandahari", parent = "ps-swe"}
+
+m["ps-jad"] = {canonicalName = "Jadrani", parent = "ps"}
+
+m["xme-azr"] = {canonicalName = "Old Azari", aliases = {"Old Azeri", "Azari", "Azeri", "Āḏarī", "Adari", "Adhari"}, parent = "xme-ott"}
+
+m["xme-ttc-cen"] = {canonicalName = "Central Tati", parent = "xme-ott"}
+
+m["xme-ttc-eas"] = {canonicalName = "Eastern Tati", parent = "xme-ott"}
+
+m["xme-ttc-nor"] = {canonicalName = "Northern Tati", parent = "xme-ott"}
+
+m["xme-ttc-sou"] = {canonicalName = "Southern Tati", parent = "xme-ott"}
+
+m["xme-ttc-wes"] = {canonicalName = "Western Tati", parent = "xme-ott"}
+
+m["xmn"] = {canonicalName = "Manichaean Middle Persian", parent = "pal"}
+
+m["fa-ira"] = {canonicalName = "Iranian Persian", aliases = {"Modern Persian", "Western Persian"}, parent = "fa", wikidata_item = 3513637}
+
+m["fa-cls"] = {canonicalName = "Classical Persian", parent = "fa", wikidata_item = 9168}
+
+m["prs"] = {canonicalName = "Dari", aliases = {"Dari Persian", "Central Persian", "Eastern Persian", "Afghan Persian"}, parent = "fa", wikidata_item = 178440}
+
+m["os-dig"] = {canonicalName = "Digor", aliases = {"Digoron"}, parent = "os", wikidata_item = 3027861}
+
+m["os-iro"] = {canonicalName = "Iron", parent = "os"}
+
+m["sog-ear"] = {canonicalName = "Early Sogdian", parent = "sog"}
+
+m["sog-lat"] = {canonicalName = "Late Sogdian", parent = "sog-ear"}
+
+m["oru-kan"] = {canonicalName = "Kaniguram", parent = "oru", wikidata_item = 6363164}
+
+m["oru-log"] = {canonicalName = "Logar", parent = "oru"}
+
+m["oos-ear"] = {canonicalName = "Early Old Ossetic", parent = "oos"}
+
+m["oos-lat"] = {canonicalName = "Late Old Ossetic", parent = "oos"}
+
+m["xln"] = {canonicalName = "Alanic", parent = "oos"}
+
+m["rdb-rud"] = {canonicalName = "Rudbari", parent = "rdb"}
+
+m["rdb-jir"] = {canonicalName = "Jirofti", parent = "rdb"}
+
+m["rdb-kah"] = {canonicalName = "Kahnuji", parent = "rdb"}
+
+-- Southwestern Fars lects
+
+m["fay-bur"] = {canonicalName = "Burenjani", parent = "fay"}
+
+m["fay-bsh"] = {canonicalName = "Bushehri", parent = "fay"}
+
+m["fay-dsh"] = {canonicalName = "Dashtaki", parent = "fay"}
+
+m["fay-dav"] = {canonicalName = "Davani", parent = "fay", wikidata_item = 5228140}
+
+m["fay-eze"] = {canonicalName = "Emamzada Esma’ili", parent = "fay"}
+
+m["fay-gav"] = {canonicalName = "Gavkoshaki", parent = "fay"}
+
+m["fay-kho"] = {canonicalName = "Khollari", parent = "fay"}
+
+m["fay-kon"] = {canonicalName = "Kondazi", parent = "fay"}
+
+m["fay-kzo"] = {canonicalName = "Old Kazeruni", parent = "fay"}
+
+m["fay-mas"] = {canonicalName = "Masarami", parent = "fay"}
+
+m["fay-pap"] = {canonicalName = "Papuni", parent = "fay"}
+
+m["fay-sam"] = {canonicalName = "Samghani", parent = "fay"}
+
+m["fay-shr"] = {canonicalName = "Shirazi", parent = "fay"}
+
+m["fay-sho"] = {canonicalName = "Old Shirazi", parent = "fay"}
+
+m["fay-sam"] = {canonicalName = "Samghani", parent = "fay"}
+
+m["fay-kar"] = {canonicalName = "Khargi", parent = "fay"}
+
+m["fay-sor"] = {canonicalName = "Sorkhi", parent = "fay"}
+
+-- Talysh lects
+
+m["tly-cen"] = {canonicalName = "Central Talysh", parent = "tly"}
+
+m["tly-asa"] = {canonicalName = "Asalemi", parent = "tly-cen"}
+
+m["tly-kar"] = {canonicalName = "Karganrudi", parent = "tly-cen"}
+
+m["tly-tul"] = {canonicalName = "Tularudi", parent = "tly-cen"}
+
+m["tly-tal"] = {canonicalName = "Taleshdulabi", parent = "tly-cen"}
+
+m["tly-nor"] = {canonicalName = "Northern Talysh", parent = "tly"}
+
+m["tly-aze"] = {canonicalName = "Azerbaijani Talysh", parent = "tly-nor"}
+
+m["tly-anb"] = {canonicalName = "Anbarani", parent = "tly-nor"}
+
+m["tly-sou"] = {canonicalName = "Southern Talysh", parent = "tly"}
+
+m["tly-fum"] = {canonicalName = "Fumani", parent = "tly-sou"}
+
+m["tly-msu"] = {canonicalName = "Masulei", parent = "tly-sou"}
+
+m["tly-msa"] = {canonicalName = "Masali", parent = "tly-sou"}
+
+m["tly-san"] = {canonicalName = "Shandarmani", parent = "tly-sou"}
+
+-- Tafreshi lects
+
+m["xme-amo"] = {canonicalName = "Amorehi", parent = "xme-taf"}
+
+m["xme-ast"] = {canonicalName = "Ashtiani", parent = "xme-taf", wikipedia_article = "Ashtiani language", wikidata_item = 3436590}
+m["atn"] = m["xme-ast"]
+
+m["xme-bor"] = {canonicalName = "Borujerdi", parent = "xme-taf"}
+
+m["xme-ham"] = {canonicalName = "Hamadani", parent = "xme-taf", wikidata_item = 6302426}
+
+m["xme-kah"] = {canonicalName = "Kahaki", parent = "xme-taf"}
+
+m["xme-vaf"] = {canonicalName = "Vafsi", parent = "xme-taf", wikidata_item = 32611}
+m["vaf"] = m["xme-vaf"]
+
+-- Kermanic lects
+
+m["xme-xun"] = {canonicalName = "Khunsari", parent = "xme-ker", wikipedia_article = "Khunsari language", wikidata_item = 6403030}
+m["kfm"] = m["xme-xun"]
+
+m["xme-mah"] = {canonicalName = "Mahallati", parent = "xme-ker"}
+
+m["xme-von"] = {canonicalName = "Vonishuni", parent = "xme-ker"}
+
+m["xme-bdr"] = {canonicalName = "Badrudi", parent = "xme-ker"}
+
+m["xme-del"] = {canonicalName = "Delijani", parent = "xme-ker"}
+
+m["xme-kas"] = {canonicalName = "Kashani", parent = "xme-ker"}
+
+m["xme-kes"] = {canonicalName = "Kesehi", parent = "xme-ker"}
+
+m["xme-mey"] = {canonicalName = "Meymehi", parent = "xme-ker"}
+
+m["xme-nat"] = {canonicalName = "Natanzi", parent = "xme-ker", wikipedia_article = "Natanzi language", wikidata_item = 6968399}
+m["ntz"] = m["xme-nat"]
+
+m["xme-abz"] = {canonicalName = "Abuzeydabadi", parent = "xme-ker"}
+
+m["xme-aby"] = {canonicalName = "Abyanehi", parent = "xme-ker"}
+
+m["xme-far"] = {canonicalName = "Farizandi", parent = "xme-ker"}
+
+m["xme-jow"] = {canonicalName = "Jowshaqani", parent = "xme-ker"}
+
+m["xme-nas"] = {canonicalName = "Nashalji", parent = "xme-ker"}
+
+m["xme-qoh"] = {canonicalName = "Qohrudi", parent = "xme-ker"}
+
+m["xme-yar"] = {canonicalName = "Yarandi", parent = "xme-ker"}
+
+m["xme-soi"] = {canonicalName = "Soi", aliases = {"Sohi"}, parent = "xme-ker", wikipedia_article = "Soi language", wikidata_item = 7930463}
+m["soj"] = m["xme-soi"]
+
+m["xme-tar"] = {canonicalName = "Tari", parent = "xme-ker"}
+
+m["xme-gaz"] = {canonicalName = "Gazi", parent = "xme-ker", wikipedia_article = "Gazi language", wikidata_item = 5529130}
+m["gzi"] = m["xme-gaz"]
+
+m["xme-sed"] = {canonicalName = "Sedehi", parent = "xme-ker"}
+
+m["xme-ard"] = {canonicalName = "Ardestani", parent = "xme-ker"}
+
+m["xme-zef"] = {canonicalName = "Zefrehi", parent = "xme-ker"}
+
+m["xme-isf"] = {canonicalName = "Isfahani", parent = "xme-ker"}
+
+m["xme-kaf"] = {canonicalName = "Kafroni", parent = "xme-ker"}
+
+m["xme-vrz"] = {canonicalName = "Varzenehi", parent = "xme-ker"}
+
+m["xme-xur"] = {canonicalName = "Khuri", parent = "xme-ker"}
+
+m["xme-nay"] = {canonicalName = "Nayini", parent = "xme-ker", wikipedia_article = "Nayini language", wikidata_item = 6983146}
+m["nyq"] = m["xme-nay"]
+
+m["xme-ana"] = {canonicalName = "Anaraki", parent = "xme-ker"}
+
+m["xme-dar"] = {canonicalName = "Zoroastrian Dari", aliases = {"Behdināni", "Gabri", "Gavrŭni", "Gabrōni"}, parent = "xme-ker", wikipedia_article = "Zoroastrian Dari language", wikidata_item = 32389}
+m["gbz"] = m["xme-dar"]
+
+m["xme-krm"] = {canonicalName = "Kermani", parent = "xme-ker"}
+
+m["xme-yaz"] = {canonicalName = "Yazdi", parent = "xme-ker"}
+
+m["xme-bid"] = {canonicalName = "Bidhandi", parent = "xme-ker"}
+
+m["xme-bij"] = {canonicalName = "Bijagani", parent = "xme-ker"}
+
+m["xme-cim"] = {canonicalName = "Chimehi", parent = "xme-ker"}
+
+m["xme-han"] = {canonicalName = "Hanjani", parent = "xme-ker"}
+
+m["xme-kom"] = {canonicalName = "Komjani", parent = "xme-ker"}
+
+m["xme-nar"] = {canonicalName = "Naraqi", parent = "xme-ker"}
+
+m["xme-nus"] = {canonicalName = "Nushabadi", parent = "xme-ker"}
+
+m["xme-qal"] = {canonicalName = "Qalhari", parent = "xme-ker"}
+
+m["xme-trh"] = {canonicalName = "Tarehi", parent = "xme-ker"}
+
+m["xme-val"] = {canonicalName = "Valujerdi", parent = "xme-ker"}
+
+m["xme-var"] = {canonicalName = "Varani", parent = "xme-ker"}
+
+m["xme-zor"] = {canonicalName = "Zori", parent = "xme-ker"}
+
+-- Ramandi lects
+
+m["tks-ebr"] = {canonicalName = "Ebrahimabadi", parent = "tks"}
+
+m["tks-sag"] = {canonicalName = "Sagzabadi", parent = "tks"}
+
+m["tks-esf"] = {canonicalName = "Esfarvarini", parent = "tks"}
+
+m["tks-tak"] = {canonicalName = "Takestani", parent = "tks"}
+
+m["tks-cal"] = {canonicalName = "Chali", parent = "tks"}
+
+m["tks-dan"] = {canonicalName = "Danesfani", parent = "tks"}
+
+m["tks-xia"] = {canonicalName = "Khiaraji", parent = "tks"}
+
+m["tks-xoz"] = {canonicalName = "Khoznini", parent = "tks"}
+
+-- Shughni dialects
+
+m["sgh-bro"] = {canonicalName = "Bartangi-Oroshori", parent = "sgh"}
+
+m["sgh-bar"] = {canonicalName = "Bartangi", parent = "sgh-bro"}
+
+m["sgh-oro"] = {canonicalName = "Oroshori", parent = "sgh-bro", aliases = {"Roshorvi"}}
+
+m["sgh-rsx"] = {canonicalName = "Roshani-Khufi", parent = "sgh"}
+
+m["sgh-xuf"] = {canonicalName = "Khufi", parent = "sgh-rsx"}
+
+m["sgh-ros"] = {canonicalName = "Roshani", parent = "sgh-rsx"}
+
+m["sgh-xgb"] = {canonicalName = "Khughni-Bajui", parent = "sgh"}
+
+m["sgh-xug"] = {canonicalName = "Khughni", parent = "sgh-xgb"}
+
+m["sgh-baj"] = {canonicalName = "Bajui", parent = "sgh-xgb"}
+
+-- Indo-Aryan varieties
+
+m["inc-mit"] = {canonicalName = "Mitanni", parent = "inc-pro", wikidata_item = 1986700}
+
+m["awa-old"] = {canonicalName = "Old Awadhi", parent = "awa"}
+
+m["bra-old"] = {canonicalName = "Old Braj", parent = "bra"}
+
+m["gu-kat"] = {canonicalName = "Kathiyawadi", aliases = {"Kathiyawadi Gujarati", "Kathiawadi"}, parent = "gu"}
+
+m["gu-lda"] = {canonicalName = "Lisan ud-Dawat Gujarati", aliases = {"Lisan ud-Dawat", "LDA"}, parent = "gu"}
+
+m["hi-mum"] = {canonicalName = "Bombay Hindi", aliases = {"Mumbai Hindi", "Bambaiyya Hindi"}, parent = "hi", wikidata_item = 3543151}
+
+m["hi-mid"] = {canonicalName = "Middle Hindi", parent = "hi"}
+
+m["sa-bhs"] = {canonicalName = "Buddhist Hybrid Sanskrit", parent = "sa", wikidata_item = 248758}
+
+m["sa-bra"] = {canonicalName = "Brahmanic Sanskrit", parent = "sa", wikidata_item = 36858}
+
+m["sa-cls"] = {canonicalName = "Classical Sanskrit", parent = "sa", wikidata_item = 11059}
+
+m["sa-neo"] = {canonicalName = "New Sanskrit", parent = "sa", wikidata_item = 11059}
+
+m["sa-ved"] = {canonicalName = "Vedic Sanskrit", parent = "sa", wikidata_item = 36858}
+
+m["si-med"] = {canonicalName = "Medieval Sinhalese", aliases = {"Medieval Sinhala"}, parent = "si"}
+
+m["kok-mid"] = {canonicalName = "Middle Konkani", aliases = {"Medieval Konkani"}, parent = "kok"}
+
+m["kok-old"] = {canonicalName = "Old Konkani", aliases = {"Early Konkani"}, parent = "kok"}
+
+-- Indian subcontinent languages
+
+-- Dhivehi varieties
+
+m["mlk-dv"] = {canonicalName = "Mulaku Dhivehi", aliases = {"Mulaku Divehi", "Mulaku Bas"}, parent = "dv"}
+
+m["hvd-dv"] = {canonicalName = "Huvadhu Dhivehi", aliases = {"Huvadhu Divehi", "Huvadhu Bas"}, parent = "dv"}
+
+m["add-dv"] = {canonicalName = "Addu Dhivehi", aliases = {"Addu Divehi", "Addu Bas"}, parent = "dv"}
+
+-- Dravidian varieties
+
+m["ta-mid"] = {canonicalName = "Middle Tamil", parent = "ta", wikidata_item = 20987434}
+
+m["kn-hav"] = {canonicalName = "Havyaka Kannada", aliases = {"Havigannada"}, parent = "kn", wikidata_item = 24276369}
+
+-- Prakrits
+
+m["inc-pka"] = {canonicalName = "Ardhamagadhi Prakrit", aliases = {"Ardhamagadhi"}, parent = "inc-pra", wikidata_item = 35217}
+
+m["inc-elu"] = {canonicalName = "Helu Prakrit", aliases = {"Elu", "Elu Prakrit", "Helu"}, parent = "inc-pra", wikidata_item = 15080869}
+
+m["inc-khs"] = {canonicalName = "Khasa Prakrit", aliases = {"Khasa"}, parent = "inc-pra"}
+
+m["inc-pmg"] = {canonicalName = "Magadhi Prakrit", aliases = {"Magadhi"}, parent = "inc-pra", wikidata_item = 2652214}
+
+m["inc-pmh"] = {canonicalName = "Maharastri Prakrit", aliases = {"Maharashtri Prakrit", "Maharastri", "Maharashtri"}, parent = "inc-pra", wikidata_item = 2586773}
+
+m["inc-psi"] = {canonicalName = "Paisaci Prakrit", aliases = {"Paisaci", "Paisachi"}, parent = "inc-pra", wikidata_item = 2995607}
+
+m["inc-pse"] = {canonicalName = "Sauraseni Prakrit", aliases = {"Sauraseni", "Shauraseni"}, parent = "inc-pra", wikidata_item = 2452885}
+
+m["prk-avt"] = {canonicalName = "Avanti", aliases = {"Avanti Prakrit"}, parent = "inc-pra"}
+
+m["prc-prk"] = {canonicalName = "Pracya", aliases = {"Pracya Prakrit"}, parent = "inc-pra"}
+
+m["bhl-prk"] = {canonicalName = "Bahliki", aliases = {"Bahliki Prakrit"}, parent = "inc-pra"}
+
+m["dks-prk"] = {canonicalName = "Daksinatya", aliases = {"Daksinatya Prakrit"}, parent = "inc-pra"}
+
+m["skr-prk"] = {canonicalName = "Sakari", aliases = {"Sakari Prakrit"}, parent = "inc-pra"}
+
+m["cnd-prk"] = {canonicalName = "Candali", aliases = {"Candali Prakrit"}, parent = "inc-pra"}
+
+m["sbr-prk"] = {canonicalName = "Sabari", aliases = {"Sabari Prakrit"}, parent = "inc-pra"}
+
+m["abh-prk"] = {canonicalName = "Abhiri", aliases = {"Abhiri Prakrit"}, parent = "inc-pra"}
+
+m["drm-prk"] = {canonicalName = "Dramili", aliases = {"Dramili Prakrit"}, parent = "inc-pra"}
+
+m["odr-prk"] = {canonicalName = "Odri", aliases = {"Odri Prakrit"}, parent = "inc-pra"}
+
+-- Italian, Latin and other Italic varieties
+
+m["it-oit"] = {canonicalName = "Old Italian", parent = "it", wikidata_item = 652}
+m["roa-oit"] = m["it-oit"]
+
+m["it-CH"] = {canonicalName = "Switzerland Italian", parent = "it", wikidata_item = 672147}
+m["Swiss Italian"] = m["it-CH"]
+m["Switzerland Italian"] = m["it-CH"]
+
+-- Latin varieties by period
+
+m["la-lat"] = {canonicalName = "Late Latin", parent = "la", wikidata_item = 1503113}
+m["Late Latin"] = m["la-lat"]
+m["LL."] = m["la-lat"]
+m["LL"] = m["la-lat"]
+
+m["la-vul"] = {canonicalName = "Vulgar Latin", parent = "la", wikidata_item = 37560}
+m["Vulgar Latin"] = m["la-vul"]
+m["VL."] = m["la-vul"]
+
+m["la-med"] = {canonicalName = "Medieval Latin", parent = "la", wikidata_item = 1163234}
+m["Medieval Latin"] = m["la-med"]
+m["ML."] = m["la-med"]
+m["ML"] = m["la-med"]
+
+m["la-ecc"] = {canonicalName = "Ecclesiastical Latin", aliases = {"Church Latin"}, parent = "la", wikidata_item = 1247932}
+m["Ecclesiastical Latin"] = m["la-ecc"]
+m["EL."] = m["la-ecc"]
+
+m["la-ren"] = {canonicalName = "Renaissance Latin", parent = "la", wikidata_item = 499083}
+m["Renaissance Latin"] = m["la-ren"]
+m["RL."] = m["la-ren"]
+
+m["la-new"] = {canonicalName = "New Latin", aliases = {"Modern Latin"}, parent = "la", wikidata_item = 1248221}
+m["New Latin"] = m["la-new"]
+m["NL."] = m["la-new"]
+
+-- other Italic lects
+
+m["osc-luc"] = {canonicalName = "Lucanian", parent = "osc"}
+
+m["osc-sam"] = {canonicalName = "Samnite", parent = "osc"}
+
+m["xum-her"] = {canonicalName = "Hernician", parent = "xum"}
+
+-- Malay and related varieties
+
+m["ms-old"] = {canonicalName = "Old Malay", parent = "ms"}
+
+m["ms-cla"] = {canonicalName = "Classical Malay", parent = "ms"}
+
+m["pse-bsm"] = {canonicalName = "Besemah", parent = "pse"}
+
+m["bew-kot"] = {canonicalName = "Betawi Kota", parent = "bew"}
+
+m["bew-ora"] = {canonicalName = "Betawi Ora", parent = "bew"}
+
+m["bew-udi"] = {canonicalName = "Betawi Udik", parent = "bew"}
+
+-- Mongolic lects
+
+m["xng-ear"] = {canonicalName = "Early Middle Mongolian", parent = "xng"}
+
+m["xng-lat"] = {canonicalName = "Late Middle Mongolian", parent = "xng"}
+
+m["mn-kha"] = {canonicalName = "Khalkha Mongolian", aliases = {"Khalkha"}, parent = "mn", wikidata_item = 6399808}
+
+m["mn-ord"] = {canonicalName = "Ordos Mongolian", aliases = {"Ordos"}, parent = "mn", wikidata_item = 716904}
+
+m["mn-cha"] = {canonicalName = "Chakhar Mongolian", aliases = {"Chakhar"}, parent = "mn", wikidata_item = 907425}
+
+m["mn-khr"] = {canonicalName = "Khorchin Mongolian", aliases = {"Khorchin"}, parent = "mn", wikidata_item = 3196210}
+
+m["mjg-huz"] = {canonicalName = "Mongghul", aliases = {"Huzhu Monguor"}, parent = "mjg", wikidata_item = 34214}
+
+m["mjg-min"] = {canonicalName = "Mangghuer", aliases = {"Minhe Monguor"}, parent = "mjg", wikidata_item = 34214}
+
+-- Japanese varieties
+
+m["ja-mid"] = {canonicalName = "Middle Japanese", parent = "ojp", wikidata_item = 6841474}
+
+m["ja-mid-ear"] = {canonicalName = "Early Middle Japanese", parent = "ja-mid", wikidata_item = 182695}
+
+m["ja-mid-lat"] = {canonicalName = "Late Middle Japanese", parent = "ja-mid", wikidata_item = 1816184}
+
+m["ja-ear"] = {canonicalName = "Early Modern Japanese", parent = "ja", wikidata_item = 5326692}
+
+-- Kartvelian varieties
+
+m["ka-mid"] = {canonicalName = "Middle Georgian", parent = "ka"}
+
+-- Korean varieties
+
+m["oko-lat"] = {canonicalName = "Late Old Korean", parent = "oko"}
+
+m["okm-ear"] = {canonicalName = "Early Middle Korean", parent = "okm"}
+
+m["ko-ear"] = {canonicalName = "Early Modern Korean", parent = "ko", wikidata_item = 756014}
+
+m["ko-c"] = {canonicalName = "Central Korean", parent = "ko"}
+
+m["ko-se"] = {canonicalName = "Southeastern Korean", parent = "ko"}
+
+m["ko-sw"] = {canonicalName = "Southwestern Korean", parent = "ko"}
+
+m["ko-nw"] = {canonicalName = "Northwestern Korean", parent = "ko"}
+
+m["ko-ne"] = {canonicalName = "Northeastern Korean", parent = "ko"}
+
+m["ko-yuk"] = {canonicalName = "Yukjin Korean", parent = "ko"}
+
+-- Occitan varieties
+
+m["oc-auv"] = {canonicalName = "Auvergnat", aliases = {"Auvernhat", "Auvergnese"}, parent = "oc", wikidata_item = 35359}
+
+m["oc-gas"] = {canonicalName = "Gascon", parent = "oc", wikidata_item = 35735}
+
+-- standardized dialect of Gascon
+m["oc-ara"] = {canonicalName = "Aranese", parent = "oc-gas", wikidata_item = 10196}
+
+m["oc-lan"] = {canonicalName = "Languedocien", aliases = {"Lengadocian"}, parent = "oc", wikidata_item = 942602}
+
+m["oc-lim"] = {canonicalName = "Limousin", parent = "oc", wikidata_item = 427614}
+
+m["oc-pro"] = {canonicalName = "Provençal", aliases = {"Provencal"}, parent = "oc", wikidata_item = 241243}
+
+m["prv"] = m["oc-pro"]
+
+m["oc-viv"] = {canonicalName = "Vivaro-Alpine", parent = "oc", wikidata_item = 1649613}
+
+m["oc-jud"] = {canonicalName = "Shuadit", aliases = {"Chouhadite", "Chouhadit", "Chouadite", "Chouadit", "Shuhadit", "Judeo-Occitan", "Judæo-Occitan", "Judaeo-Occitan", "Judeo-Provençal", "Judæo-Provençal", "Judaeo-Provençal", "Judeo-Provencal", "Judaeo-Provencal", "Judeo-Comtadin", "Judæo-Comtadin", "Judaeo-Comtadin"}, parent = "oc", wikidata_item = 56472}
+
+-- Phillipine varieties
+
+m["tl-old"] = {canonicalName = "Old Tagalog", parent = "tl", wikidata_item = 12967437}
+
+m["tl-cls"] = {canonicalName = "Classical Tagalog", parent = "tl"}
+
+-- Pre-Roman substrates
+
+m["qfa-sub-ibe"] = {canonicalName = "a pre-Roman substrate of Iberia", parent = "qfa-sub", wikidata_item = 530799}
+
+m["qfa-sub-bal"] = {canonicalName = "a pre-Roman substrate of the Balkans", parent = "qfa-sub"}
+
+-- Sardinian varieties
+
+m["sc-src"] = {canonicalName = "Logudorese", aliases = {"Logudorese Sardinian"}, parent = "sc", wikidata_item = 777974}
+
+m["sc-nuo"] = {canonicalName = "Nuorese", aliases = {"Nuorese Sardinian"}, parent = "sc-src"}
+
+m["sc-sro"] = {canonicalName = "Campidanese", aliases = {"Campidanese Sardinian"}, parent = "sc", wikidata_item = 35348}
+
+-- Rwanda-Rundi varieties
+
+m["rw-kin"] = {canonicalName = "Kinyarwanda", aliases = {"Rwanda"}, parent = "rw", wikidata_item = 33573}
+
+m["rw-run"] = {canonicalName = "Kirundi", aliases = {"Rundi"}, parent = "rw", wikidata_item = 33583}
+
+-- Slavic varieties
+
+m["cs-ear"] = {canonicalName = "Early Modern Czech", parent = "cs"}
+
+m["zle-oru"] = {canonicalName = "Old Russian", parent = "orv", wikidata_item = 35228}
+
+m["zle-obe"] = {canonicalName = "Old Belarusian", parent = "orv", wikidata_item = 13211}
+
+m["zle-ouk"] = {canonicalName = "Old Ukrainian", parent = "orv", wikidata_item = 13211}
+
+---- Serbo-Croatian varieties
+
+m["sh-cha"] = {canonicalName = "Chakavian Serbo-Croatian", aliases = {"Čakavian"}, parent = "sh", wikidata_item = 337565}
+m["ckm"] = m["sh-cha"] -- ISO 639-3 code
+
+m["sh-kaj"] = {canonicalName = "Kajkavian Serbo-Croatian", parent = "sh", wikidata_item = 838165}
+m["kjv"] = m["sh-kaj"] -- ISO 639-3 code
+
+m["sh-tor"] = { -- Linguist code srp-tor
+    canonicalName = "Torlakian Serbo-Croatian",
+    aliases = {"Torlak"},
+    parent = "sh",
+    wikidata_item = 1078803
+}
+
+-- Turkic lects
+
+m["trk-cmn"] = {canonicalName = "Common Turkic", parent = "trk-pro", wikidata_item = 1126028}
+
+m["trk-ogz-pro"] = {canonicalName = "Proto-Oghuz", aliases = {"Southwestern Common Turkic"}, parent = "trk-pro", ancestors = {"trk-pro"}, wikidata_item = 494600}
+
+m["otk-kir"] = {canonicalName = "Old Kirghiz", parent = "otk", wikidata_item = 83142}
+
+m["klj-arg"] = {canonicalName = "Arghu", parent = "klj", wikidata_item = 33455}
+
+m["qwm-arm"] = {canonicalName = "Armeno-Kipchak", parent = "qwm", wikidata_item = 2027503}
+
+m["qwm-mam"] = {canonicalName = "Mamluk-Kipchak", parent = "qwm", wikidata_item = 4279942}
+
+m["az-cls"] = {canonicalName = "Classical Azerbaijani", aliases = {"Classical Azeri"}, parent = "az"}
+
+-- Other lects
+
+m["alv-kro"] = {canonicalName = "Kromanti", parent = "crp", wikidata_item = 1093206}
+
+m["bat-pro"] = {canonicalName = "Proto-Baltic", parent = "ine-bsl-pro", wikidata_item = 1703347}
+
+m["es-lun"] = {canonicalName = "Lunfardo", parent = "es", wikidata_item = 1401612}
+m["Lunfardo"] = m["es-lun"]
+
+m["fiu-pro"] = {canonicalName = "Proto-Finno-Ugric", parent = "urj-pro", wikidata_item = 79890}
+
+m["gem-sue"] = {canonicalName = "Suevic", aliases = {"Suebian"}, parent = "gmw-pro", wikidata_item = 155085}
+
+m["mkh-okm-A"] = {canonicalName = "Angkorian Old Khmer", parent = "mkh-okm", wikipedia_article = "Khmer language#Historical periods", wikidata_item = 9205}
+
+m["mkh-okm-P"] = {canonicalName = "Pre-Angkorian Old Khmer", parent = "mkh-okm", wikipedia_article = "Khmer language#Historical periods", wikidata_item = 9205}
+
+m["mul-tax"] = {canonicalName = "taxonomic name", parent = "mul"}
+m["Tax."] = m["mul-tax"]
+
+m["qfa-pyg"] = {canonicalName = "a substrate language originally spoken by the Pygmies", parent = "qfa-sub", wikipedia_article = "Classification of Pygmy languages#Original Pygmy language(s)"}
+m["pygmy"] = m["qfa-pyg"]
+
+m["tai-shz"] = {canonicalName = "Shangsi Zhuang", parent = "za", wikidata_item = 13216}
+
+m["tbq-pro"] = {canonicalName = "Proto-Tibeto-Burman", parent = "sit-pro", wikidata_item = 7251864}
+
+m["und-idn"] = {
+    canonicalName = "Idiom Neutral",
+    parent = "und", -- or "vo"
+    wikipedia_article = "Idiom Neutral",
+    wikidata_item = 35847
+}
+
+m["und-tdl"] = {canonicalName = "Turduli", parent = "und", wikipedia_article = "Turduli"}
+
+m["und-tdt"] = {canonicalName = "Turdetani", parent = "und", wikipedia_article = "Turdetani"}
+
+m["und-xbi"] = {canonicalName = "Xianbei", parent = "und", wikipedia_article = "Xianbei"}
+
+m["und-xnu"] = {canonicalName = "Xiongnu", parent = "und", wikipedia_article = "Xiongnu"}
+
+m["urj-fpr-pro"] = {canonicalName = "Proto-Finno-Permic", parent = "urj-pro"}
+
+m["woy"] = {canonicalName = "Weyto", parent = "und", wikidata_item = 3915918}
+
+return m
diff --git a/wiktra/wikt/translit/eve-translit.lua b/wiktra/wikt/translit/eve-translit.lua
new file mode 100644
index 0000000..0727575
--- /dev/null
+++ b/wiktra/wikt/translit/eve-translit.lua
@@ -0,0 +1,127 @@
+local export = {}
+
+local u = mw.ustring.char
+
+local MACRON = u(0x0304)
+local DOTABOVE = u(0x0307)
+
+local tab = {
+    ["А"] = "A",
+    ["а"] = "a",
+    ["Б"] = "B",
+    ["б"] = "ʙ",
+    ["В"] = "W",
+    ["в"] = "w",
+    ["Е"] = "E",
+    ["е"] = "e",
+    ["Ё"] = "Jo",
+    ["ё"] = "jo",
+    ["Г"] = "G",
+    ["г"] = "g",
+    ["Д"] = "D",
+    ["д"] = "d",
+    ["И"] = "I",
+    ["и"] = "i",
+    ["Ӣ"] = "Ī",
+    ["ӣ"] = "ī",
+    ["Й"] = "J",
+    ["й"] = "j",
+    ["К"] = "K",
+    ["к"] = "k",
+    ["Л"] = "L",
+    ["л"] = "l",
+    ["М"] = "M",
+    ["м"] = "m",
+    ["Н"] = "N",
+    ["н"] = "n",
+    ["Ӈ"] = "Ŋ",
+    ["ӈ"] = "ŋ",
+    ["О"] = "O",
+    ["о"] = "o",
+    ["Ө"] = "Ö",
+    ["ө"] = "ö",
+    ["Ӫ"] = "Ö",
+    ["ӫ"] = "ö",
+    ["П"] = "P",
+    ["п"] = "p",
+    ["Р"] = "R",
+    ["р"] = "r",
+    ["С"] = "S",
+    ["с"] = "s",
+    ["Т"] = "T",
+    ["т"] = "t",
+    ["У"] = "U",
+    ["у"] = "u",
+    ["Ӯ"] = "Ū",
+    ["ӯ"] = "ū",
+    ["Ф"] = "F",
+    ["ф"] = "f",
+    ["Х"] = "H",
+    ["х"] = "h",
+    ["Ч"] = "C",
+    ["ч"] = "c",
+    ["Ы"] = "I",
+    ["ы"] = "i",
+    ["Э"] = "Ə",
+    ["э"] = "ə",
+    ["Ю"] = "Ju",
+    ["ю"] = "ju",
+    ["Я"] = "Ẹ",
+    ["я"] = "ẹ", -- Not present in the original latinisation
+    -- non-native letters
+    ["Ж"] = "Z",
+    ["ж"] = "z",
+    ["З"] = "Z",
+    ["з"] = "z",
+    ["Ц"] = "C",
+    ["ц"] = "c",
+    ["Ш"] = "S",
+    ["ш"] = "s",
+    ["Щ"] = "S",
+    ["щ"] = "s",
+    ["Ъ"] = "ʺ",
+    ["ъ"] = "ʺ",
+    ["Ь"] = "’",
+    ["ь"] = "’",
+
+    -- non-standard letters
+    ["Ҕ"] = "Γ",
+    ["ҕ"] = "γ",
+    ["Ҥ"] = "Ŋ",
+    ["ҥ"] = "ŋ",
+    ["Ү"] = "Ü",
+    ["ү"] = "ü",
+    ["Һ"] = "Ḥ",
+    ["һ"] = "ḥ",
+    ["Ӄ"] = "Q",
+    ["ӄ"] = "q"
+}
+
+local iotatedTranslit = {["е"] = "je", ["я"] = "ja", ["и"] = "ji", ["ӣ"] = "jī", ["Е"] = "Je", ["Я"] = "Ja"}
+
+local replacements = {
+    {"Ё", "Jo"}, {"ё", "jo"}, {"Ӫ", "Jö"}, {"ӫ", "jö"}, {"Ю", "Ju"}, {"ю", "ju"}, -- Unfortunately the Cyrillic alphabet doesn't distinguish between ʒe and ʒə
+    {"Де", "Ʒe"}, {"де", "ʒe"}, {"Не", "Ņe"}, {"не", "ņe"}, {"Ди", "Ʒi"}, {"ди", "ʒi"}, {"Ни", "Ņi"}, {"ни", "ņi"}, {"Дя", "Ʒa"}, {"дя", "ʒa"}, {"Ня", "Ņa"}, {"ня", "ņa"}, {"Дj", "Ʒ"}, {"дj", "ʒ"}, {"Нj", "Ņ"}, {"нj", "ņ"}, -- The following is non-standard but supposedly used in non-standard spelling and dialect forms
+    {"Дь", "Ʒ"}, {"дь", "ʒ"}, {"Нь", "Ņ"}, {"нь", "ņ"}, {"Нг", "Ŋ"}, {"нг", "ŋ"}
+}
+
+function export.tr(text, lang, sc)
+    local ugsub, str_gsub = mw.ustring.gsub, string.gsub
+    local UTF8char = "[\1-\127\194-\244][\128-\191]*"
+
+    for i, replacement in ipairs(replacements) do text = str_gsub(text, unpack(replacement)) end
+
+    -- е after a vowel or at the beginning of a word becomes ye
+    -- Again, the Cyrillic alphabet doesn't distinguish between je and jə
+    text = ugsub(text, "([АОУЫЯЕИӢЪЬаӣиоуыэяеъьaeioöu][" .. MACRON .. DOTABOVE .. "]?)([еяиӣ])", function(preceding, iotated) return preceding .. iotatedTranslit[iotated] end)
+
+    text = ugsub(text, "^[ЕеЯя]", iotatedTranslit)
+
+    text = ugsub(text, "([^Ѐ-ӿ])([ЕеЯя])", function(preceding, iotated) return preceding .. iotatedTranslit[iotated] end)
+
+    text = str_gsub(text, UTF8char, tab)
+
+    return text
+end
+
+return export
diff --git a/wiktra/wikt/translit/evn-translit.lua b/wiktra/wikt/translit/evn-translit.lua
new file mode 100644
index 0000000..c368df3
--- /dev/null
+++ b/wiktra/wikt/translit/evn-translit.lua
@@ -0,0 +1,109 @@
+local u = mw.ustring.char
+
+local MACRON = u(0x0304)
+local DOTABOVE = u(0x0307)
+local DOTBELOW = u(0x0323)
+
+local str_gsub, ugsub = string.gsub, mw.ustring.gsub
+local UTF8char = "[\1-\127\194-\244][\128-\191]*"
+
+local export = {}
+
+local tab = {
+    ["А"] = "A",
+    ["а"] = "a",
+    ["В"] = "W",
+    ["в"] = "w",
+    ["Е"] = "E",
+    ["е"] = "e",
+    ["Ё"] = "Jo",
+    ["ё"] = "jo",
+    ["Г"] = "G",
+    ["г"] = "g",
+    ["Д"] = "D",
+    ["д"] = "d",
+    ["И"] = "I",
+    ["и"] = "i",
+    ["Ӣ"] = "Ī",
+    ["ӣ"] = "ī",
+    ["Й"] = "J",
+    ["й"] = "j",
+    ["К"] = "K",
+    ["к"] = "k",
+    ["Л"] = "L",
+    ["л"] = "l",
+    ["М"] = "M",
+    ["м"] = "m",
+    ["Н"] = "N",
+    ["н"] = "n",
+    ["Ӈ"] = "Ŋ",
+    ["ӈ"] = "ŋ",
+    ["О"] = "O",
+    ["о"] = "o",
+    ["П"] = "P",
+    ["п"] = "p",
+    ["Р"] = "R",
+    ["р"] = "r",
+    ["С"] = "S",
+    ["с"] = "s",
+    ["Т"] = "T",
+    ["т"] = "t",
+    ["У"] = "U",
+    ["у"] = "u",
+    ["Ӯ"] = "Ū",
+    ["ӯ"] = "ū",
+    ["Ф"] = "F",
+    ["ф"] = "f",
+    ["Х"] = "H",
+    ["х"] = "h",
+    ["Ч"] = "Ç",
+    ["ч"] = "ç",
+    ["Ы"] = "I",
+    ["ы"] = "i",
+    ["Э"] = "Ə",
+    ["э"] = "ə",
+    ["Ю"] = "Ju",
+    ["ю"] = "ju",
+    ["Я"] = "Ja",
+    ["я"] = "ja",
+    -- non-native letters
+    ["Б"] = "B",
+    ["б"] = "b",
+    ["Ж"] = "Z",
+    ["ж"] = "z",
+    ["З"] = "Z",
+    ["з"] = "z",
+    ["Ц"] = "C",
+    ["ц"] = "c",
+    ["Ш"] = "Ş",
+    ["ш"] = "ş",
+    ["Щ"] = "Ş",
+    ["щ"] = "ş", -- in literary language ш is only found in Russian words and was originally represented with s, however some dialects have ш in native words
+    ["Ъ"] = "ʺ",
+    ["ъ"] = "ʺ",
+    ["Ь"] = "’",
+    ["ь"] = "’"
+}
+
+local other = {
+    {"Я", "Ja"}, {"я", "ja"}, {"Ё", "Jo"}, {"ё", "jo"}, {"Ю", "Ju"}, {"ю", "ju"}, -- Unfortunately the Cyrillic alphabet doesn't distinguish between ʒe and ʒə
+    {"Де", "Ʒe"}, {"де", "ʒe"}, {"Не", "Ņe"}, {"не", "ņe"}, {"Ди", "Ʒi"}, {"ди", "ʒi"}, {"Ни", "Ņi"}, {"ни", "ņi"}, {"Дӣ", "Ʒī"}, {"дӣ", "ʒī"}, {"Нӣ", "Ņī"}, {"нӣ", "ņī"}, {"Дj", "Ʒ"}, {"дj", "ʒ"}, {"Нj", "Ņ"}, {"нj", "ņ"}
+}
+
+function export.tr(text, lang, sc)
+    for i, replacement in ipairs(other) do text = str_gsub(text, unpack(replacement)) end
+
+    -- е after a vowel or at the beginning of a word becomes ye
+    -- Again, the Cyrillic alphabet doesn't distinguish between je and jə
+    text = ugsub(text, "([АОУЫЕИӢЪЬаӣиоуыэеъьaeiou][" .. MACRON .. DOTABOVE .. DOTBELOW .. "]?)е", "%1je")
+    text = ugsub(text, "([АОУЫЕИӢЪЬаӣиоуыэеъьaeiou][" .. MACRON .. DOTABOVE .. DOTBELOW .. "]?)и", "%1ji")
+    text = ugsub(text, "([АОУЫЕИӢЪЬаӣиоуыэеъьaeiou][" .. MACRON .. DOTABOVE .. DOTBELOW .. "]?)ӣ", "%1jī")
+    text = str_gsub(text, "^Е", "Je")
+    text = str_gsub(text, "^е", "je")
+    text = ugsub(text, "([^Ѐ-ӿ])Е", "%1Je")
+    text = ugsub(text, "([^Ѐ-ӿ])е", "%1je")
+
+    return (str_gsub(text, UTF8char, tab))
+end
+
+return export
diff --git a/wiktra/wikt/translit/fa-translit.lua b/wiktra/wikt/translit/fa-translit.lua
new file mode 100644
index 0000000..f65ed10
--- /dev/null
+++ b/wiktra/wikt/translit/fa-translit.lua
@@ -0,0 +1,144 @@
+local U = mw.ustring.char
+local gsub = mw.ustring.gsub
+local export = {}
+
+local fatHatan = U(0x64B) -- What is the Persian term for this?
+local fathe = U(0x64E) -- also zabar
+local kasre = U(0x650) -- also zir
+local zamme = U(0x64F) -- also piš
+local tashdid = U(0x651) -- also called shadda
+local jazm = U(0x652)
+local alif = "ا"
+local zwnj = "‌"
+local he = "ه"
+
+local waw = U(0x0648)
+local ye = U(0x06CC)
+
+local group = "بپتثجچحخدذرزژسشصضطظغفقکگلمنوهی"
+local ZZP = "َُِ"
+
+local mapping = {
+    ["ا"] = "â",
+    ["ب"] = "b",
+    ["پ"] = "p",
+    ["ت"] = "t",
+    ["ث"] = "s",
+    ["ج"] = "j",
+    ["چ"] = "č",
+    ["ح"] = "h",
+    ["خ"] = "x",
+    ["د"] = "d",
+    ["ذ"] = "z",
+    ["ر"] = "r",
+    ["ز"] = "z",
+    ["ژ"] = "ž",
+    ["س"] = "s",
+    ["ش"] = "š",
+    ["ص"] = "s",
+    ["ض"] = "z",
+    ["ط"] = "t",
+    ["ظ"] = "z",
+    ["غ"] = "ğ",
+    ["ف"] = "f",
+    ["ق"] = "q",
+    ["ک"] = "k",
+    ["گ"] = "g",
+    ["ل"] = "l",
+    ["م"] = "m",
+    ["ن"] = "n",
+    ["و"] = "u",
+    ["ه"] = "h",
+    ["ی"] = "i",
+    ["آ"] = "â",
+    ["‌"] = "-",
+
+    ["َ"] = "a",
+    ["ِ"] = "e",
+    ["ُ"] = "o",
+
+    -- displaying on separate lines as the viewing becomes distorted on these combinations
+    ["ع"] = "’",
+    ["ء"] = "’",
+    ["ئ"] = "’",
+    ["ؤ"] = "’",
+    ["أ"] = "’",
+
+    -- diacritics
+    [fathe] = "a",
+    [kasre] = "e",
+    [zamme] = "o",
+    [jazm] = "", -- also sokun - no vowel
+    [U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
+    [fatHatan] = "n",
+    ["‌"] = "-",
+    -- ligatures
+    ["ﻻ"] = "lâ",
+    ["ﷲ"] = "llâh",
+    -- kashida
+    ["ـ"] = "", -- kashida, no sound
+    -- numerals
+    ["۱"] = "1",
+    ["۲"] = "2",
+    ["۳"] = "3",
+    ["۴"] = "4",
+    ["۵"] = "5",
+    ["۶"] = "6",
+    ["۷"] = "7",
+    ["۸"] = "8",
+    ["۹"] = "9",
+    ["۰"] = "0",
+    -- normal arabic variants to numerals
+    ["١"] = "1",
+    ["٢"] = "2",
+    ["٣"] = "3",
+    ["٤"] = "4",
+    ["٥"] = "5",
+    ["٦"] = "6",
+    ["٧"] = "7",
+    ["٨"] = "8",
+    ["٩"] = "9",
+    ["٠"] = "0",
+    -- punctuation (leave on separate lines)
+    ["؟"] = "?", -- question mark
+    ["،"] = ",", -- comma
+    ["؛"] = ";", -- semicolon
+    ["«"] = "“", -- quotation mark
+    ["»"] = "”", -- quotation mark
+    ["٪"] = "%", -- percent
+    ["؉"] = "‰", -- per mille
+    ["٫"] = ".", -- decimals
+    ["٬"] = ",", -- thousand
+    ["ۀ"] = "-ye" -- he ye (in ezâfe)
+}
+
+function export.tr(text, lang, sc)
+
+    text = mw.ustring.gsub(text, "([" .. group .. "]" .. tashdid .. "?)ه$", "%1e")
+    text = gsub(text, alif .. fathe, "a")
+    text = gsub(text, he .. zwnj, "e-")
+
+    text = gsub(text, kasre .. ye .. "([" .. group .. "])", "ey%1")
+    text = gsub(text, jazm .. alif, "a")
+
+    text = mw.ustring.gsub(text, "ىٰ", "â")
+    text = mw.ustring.gsub(text, "ٰ", "â")
+    text = mw.ustring.gsub(text, "ا" .. fatHatan, "an")
+    -- text = mw.ustring.gsub(text, 'الله', "ﷲ")
+    -- text = mw.ustring.gsub(text, 'لا', "ﻻ")
+    text = mw.ustring.gsub(text, ".", mapping)
+
+    text = mw.ustring.gsub(text, "aâ", "â")
+    text = mw.ustring.gsub(text, "âa", "a")
+
+    text = mw.ustring.gsub(text, "u([aâeiou])", "v%1")
+    text = mw.ustring.gsub(text, "i([aâeiou])", "y%1")
+    text = mw.ustring.gsub(text, "([aâeiou])(" .. tashdid .. ")", "%2%1") -- swapping tašdid with vowels
+    text = mw.ustring.gsub(text, "(.)" .. tashdid, "%1%1") -- implementing tašdid
+    text = mw.ustring.gsub(text, "eh$", "e")
+    text = mw.ustring.gsub(text, "eh([^aâeiouy’bdfghjklmnpqrstvyxzčğšž])", "e%1")
+
+    return text
+end
+
+return export
diff --git a/wiktra/wikt/translit/families.lua b/wiktra/wikt/translit/families.lua
new file mode 100644
index 0000000..7a451dc
--- /dev/null
+++ b/wiktra/wikt/translit/families.lua
@@ -0,0 +1,87 @@
+local export = {}
+
+local Family = {}
+
+function Family:getCode() return self._code end
+
+function Family:getCanonicalName() return self._rawData.canonicalName end
+
+function Family:getDisplayForm() return self:getCategoryName("nocap") end
+
+function Family:getOtherNames(onlyOtherNames) return require("language-like").getOtherNames(self, onlyOtherNames) end
+
+function Family:getAliases() return self._rawData.aliases or {} end
+
+function Family:getVarieties(flatten) return require("language-like").getVarieties(self, flatten) end
+
+-- function Family:getAllNames()
+--	return self._rawData.names
+-- end
+
+function Family:getType() return "family" end
+
+function Family:getFamily()
+    if self._rawData.family and not self._familyObject then self._familyObject = export.getByCode(self._rawData.family) end
+
+    return self._familyObject
+end
+
+function Family:getProtoLanguage()
+    if not self._protoLanguage then self._protoLanguage = require("languages").getByCode(self._rawData.protoLanguage or self._code .. "-pro") end
+
+    return self._protoLanguage
+end
+
+function Family:getCategoryName(nocap)
+    local name = self._rawData.canonicalName
+
+    -- If the name already has "languages" in it, don't add it.
+    if not name:find("[Ll]anguages$") then name = name .. " languages" end
+    if not nocap then name = mw.getContentLanguage():ucfirst(name) end
+    return name
+end
+
+function Family:makeCategoryLink() return "[[:Category:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]" end
+
+function Family:getWikidataItem()
+    local item = self._rawData.wikidata_item
+
+    if type(item) == "number" then
+        return "Q" .. item
+    else
+        return item
+    end
+end
+
+function Family:getWikipediaArticle() return (self:getWikidataItem() and mw.wikibase and mw.wikibase.sitelink(self:getWikidataItem(), "enwiki")) or self:getCategoryName() end
+
+function Family:makeWikipediaLink() return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]" end
+
+function Family:toJSON()
+    local ret = {canonicalName = self:getCanonicalName(), categoryName = self:getCategoryName("nocap"), code = self._code, family = self._rawData.family, protoLanguage = self._rawData.protoLanguage, otherNames = self:getOtherNames(true), aliases = self:getAliases(), varieties = self:getVarieties(), type = self:getType(), wikidataItem = self:getWikidataItem()}
+
+    return require("JSON").toJSON(ret)
+end
+
+function Family:getRawData() return self._rawData end
+
+Family.__index = Family
+
+function export.makeObject(code, data) return data and setmetatable({_rawData = data, _code = code}, Family) or nil end
+
+function export.getByCode(code)
+    -- FIXME! Remove this when we've tracked down all uses.
+    if code == "kdo" then require("debug").track("Kordofanian") end
+
+    return export.makeObject(code, mw.loadData("families/data")[code])
+end
+
+function export.getByCanonicalName(name)
+    local code = mw.loadData("families/by name")[name]
+
+    if not code then return nil end
+
+    return export.makeObject(code, mw.loadData("families/data")[code])
+end
+
+return export
diff --git a/wiktra/wikt/translit/families/by name.lua b/wiktra/wikt/translit/families/by name.lua
new file mode 100644
index 0000000..e9e3e14
--- /dev/null
+++ b/wiktra/wikt/translit/families/by name.lua	
@@ -0,0 +1,5 @@
+local export = {}
+
+for code, data in pairs(mw.loadData("families/data")) do export[data.canonicalName] = code end
+
+return export
diff --git a/wiktra/wikt/translit/families/data.lua b/wiktra/wikt/translit/families/data.lua
new file mode 100644
index 0000000..0c493c1
--- /dev/null
+++ b/wiktra/wikt/translit/families/data.lua
@@ -0,0 +1,1790 @@
+--[=[
+	This module contains definitions for all language family codes on Wiktionary.
+]=] --
+local m = {}
+
+m["aav"] = {canonicalName = "Austroasiatic", aliases = {"Austro-Asiatic"}, wikidata_item = 33199}
+
+m["aav-khs"] = {canonicalName = "Khasian", aliases = {"Khasic"}, family = "aav", wikidata_item = 3073734}
+
+m["aav-nic"] = {canonicalName = "Nicobarese", family = "aav", wikidata_item = 217380}
+
+m["aav-pkl"] = {canonicalName = "Pnar-Khasi-Lyngngam", family = "aav-khs"}
+
+m["afa"] = {canonicalName = "Afroasiatic", aliases = {"Afro-Asiatic"}, wikidata_item = 25268}
+
+m["alg"] = {canonicalName = "Algonquian", family = "aql", wikidata_item = 33392}
+
+m["alg-abp"] = {canonicalName = "Abenaki-Penobscot", family = "alg-eas", wikidata_item = 197936}
+
+m["alg-ara"] = {canonicalName = "Arapahoan", family = "alg", wikidata_item = 2153686}
+
+m["alg-eas"] = {canonicalName = "Eastern Algonquian", family = "alg", wikidata_item = 2257525}
+
+m["alg-sfk"] = {canonicalName = "Sac-Fox-Kickapoo", family = "alg", wikidata_item = 1440172}
+
+m["alv"] = {canonicalName = "Atlantic-Congo", family = "nic", wikidata_item = 771124}
+
+m["alv-aah"] = {canonicalName = "Ayere-Ahan", family = "alv-von", wikidata_item = 750953}
+
+m["alv-ada"] = {canonicalName = "Adamawa", family = "alv-sav", wikidata_item = 32906}
+
+m["alv-bag"] = {canonicalName = "Baga", family = "alv-mel"}
+
+m["alv-bak"] = {canonicalName = "Bak", family = "alv-sng", wikidata_item = 1708174}
+
+m["alv-bam"] = {canonicalName = "Bambukic", aliases = {"Yungur-Jen"}, family = "alv-ada", wikidata_item = 4853456}
+
+m["alv-bny"] = {canonicalName = "Banyum", family = "alv-nyn"}
+
+m["alv-bua"] = {canonicalName = "Bua", family = "alv-mbd", wikidata_item = 4982094}
+
+m["alv-bwj"] = {canonicalName = "Bikwin-Jen", family = "alv-bam"}
+
+m["alv-cng"] = {canonicalName = "Cangin", family = "alv-fwo", wikidata_item = 1033184}
+
+m["alv-ctn"] = {canonicalName = "Central Tano", aliases = {"Akan"}, family = "alv-ptn", wikidata_item = 1658486}
+
+m["alv-dlt"] = {canonicalName = "Delta Edoid", family = "alv-edo"}
+
+m["alv-dur"] = {canonicalName = "Duru", family = "alv-lni", wikidata_item = 5316788}
+
+m["alv-ede"] = {canonicalName = "Ede", family = "alv-yor", wikidata_item = 35368}
+
+m["alv-edo"] = {canonicalName = "Edoid", family = "alv-von", wikidata_item = 1287469}
+
+m["alv-eeo"] = {canonicalName = "Edo-Esan-Ora", family = "alv-nce"}
+
+m["alv-fli"] = {canonicalName = "Fali", family = "alv", wikidata_item = 3450166}
+
+m["alv-fwo"] = {canonicalName = "Fula-Wolof", family = "alv-sng", wikidata_item = 12631267}
+
+m["alv-gba"] = {canonicalName = "Gbaya", family = "alv-sav", wikidata_item = 3099986, protoLanguage = "gba"}
+
+m["alv-gbe"] = {canonicalName = "Gbe", family = "alv-von", wikidata_item = 668284}
+
+m["alv-gbf"] = {canonicalName = "Eastern Gbaya", family = "alv-gba"}
+
+m["alv-gbs"] = {canonicalName = "Southern Gbaya", family = "alv-gba"}
+
+m["alv-gbw"] = {canonicalName = "Western Gbaya", family = "alv-gba"}
+
+m["alv-gda"] = {canonicalName = "Ga-Dangme", family = "alv-kwa", wikidata_item = 3443338}
+
+m["alv-gng"] = {canonicalName = "Guang", family = "alv-ptn", wikidata_item = 684009}
+
+m["alv-gtm"] = {canonicalName = "Ghana-Togo Mountain", aliases = {"Togo Remnant", "Central Togo"}, family = "alv-kwa", wikidata_item = 493020}
+
+m["alv-hei"] = {canonicalName = "Heiban", family = "alv-the"}
+
+m["alv-ido"] = {canonicalName = "Idomoid", family = "alv-von", wikidata_item = 974196}
+
+m["alv-igb"] = {canonicalName = "Igboid", family = "alv-von", wikidata_item = 1429100}
+
+m["alv-jfe"] = {canonicalName = "Jola-Felupe", aliases = {"Ejamat"}, family = "alv-jol", wikidata_item = 1708174}
+
+m["alv-jol"] = {canonicalName = "Jola", aliases = {"Diola"}, family = "alv-bak", wikidata_item = 35176}
+
+m["alv-kim"] = {canonicalName = "Kim", family = "alv-mbd", wikidata_item = 6409701}
+
+m["alv-kis"] = {canonicalName = "Kissi", family = "alv-mel"}
+
+m["alv-krb"] = {canonicalName = "Karaboro", family = "alv-snf", wikidata_item = 4213541}
+
+m["alv-ktg"] = {canonicalName = "Ka-Togo", family = "alv-gtm", wikidata_item = 5972796}
+
+m["alv-kul"] = {canonicalName = "Kulango", family = "alv-sav", wikidata_item = 16977424, aliases = {"Kulango-Lorhon", "Kulango-Lorom"}}
+
+m["alv-kwa"] = {canonicalName = "Kwa", family = "nic-vco", wikidata_item = 33430}
+
+m["alv-lag"] = {canonicalName = "Lagoon", family = "alv-kwa"}
+
+m["alv-lek"] = {
+    canonicalName = "Leko",
+    otherNames = {"Sambaic"}, -- appears to be an alias in Glottolog
+    family = "alv-lni",
+    wikidata_item = 6520642
+}
+
+m["alv-lim"] = {canonicalName = "Limba", family = "alv"}
+
+m["alv-lni"] = {canonicalName = "Leko-Nimbari", otherNames = {"Central Adamawa"}, aliases = {"Chamba-Mumuye"}, family = "alv-ada", wikidata_item = 1708170}
+
+m["alv-mbd"] = {canonicalName = "Mbum-Day", family = "alv-ada", wikidata_item = 6799816}
+
+m["alv-mbm"] = {canonicalName = "Mbum", family = "alv-mbd", wikidata_item = 6799814}
+
+m["alv-mel"] = {canonicalName = "Mel", family = "alv", wikidata_item = 12122355}
+
+m["alv-mum"] = {canonicalName = "Mumuye", family = "alv-mye", wikidata_item = 84607009}
+
+m["alv-mye"] = {canonicalName = "Mumuye-Yendang", family = "alv-lni", wikidata_item = 6935539}
+
+m["alv-nal"] = {canonicalName = "Nalu", family = "alv-sng"}
+
+m["alv-nce"] = {canonicalName = "North-Central Edoid", family = "alv-edo"}
+
+m["alv-ngb"] = {canonicalName = "Nupe-Gbagyi", aliases = {"Nupe-Gbari"}, family = "alv-nup"}
+
+m["alv-ntg"] = {canonicalName = "Na-Togo", family = "alv-gtm"}
+
+m["alv-nup"] = {canonicalName = "Nupoid", family = "alv-von", wikidata_item = 1429143}
+
+m["alv-nwd"] = {canonicalName = "Northwestern Edoid", family = "alv-edo"}
+
+m["alv-nyn"] = {canonicalName = "Nyun", family = "alv-fwo"}
+
+m["alv-pap"] = {canonicalName = "Papel", family = "alv-bak", wikidata_item = 7132562}
+
+m["alv-pph"] = {canonicalName = "Phla–Pherá", family = "alv-gbe", wikidata_item = 3849625}
+
+m["alv-ptn"] = {canonicalName = "Potou-Tano", family = "alv-kwa", wikidata_item = 1475003}
+
+m["alv-sav"] = {canonicalName = "Savanna", family = "nic-vco", wikidata_item = 4403672, aliases = {"Savannas"}}
+
+m["alv-sma"] = {canonicalName = "Suppire-Mamara", family = "alv-snf", wikidata_item = 4446348}
+
+m["alv-snf"] = {canonicalName = "Senufo", family = "alv", wikidata_item = 33795, aliases = {"Senufic", "Senoufo"}}
+
+m["alv-sng"] = {canonicalName = "Senegambian", family = "alv", wikidata_item = 1708753}
+
+m["alv-snr"] = {canonicalName = "Senari", family = "alv-snf", wikidata_item = 4416084}
+
+m["alv-swd"] = {canonicalName = "Southwestern Edoid", family = "alv-edo"}
+
+m["alv-tal"] = {canonicalName = "Talodi", family = "alv-the", wikidata_item = 12643302}
+
+m["alv-tdj"] = {canonicalName = "Tagwana-Djimini", family = "alv-snf", wikidata_item = 7675362}
+
+m["alv-ten"] = {canonicalName = "Tenda", family = "alv-fwo", wikidata_item = 3217535}
+
+m["alv-the"] = {canonicalName = "Talodi-Heiban", family = "alv", wikidata_item = 1521145}
+
+m["alv-von"] = {canonicalName = "Volta-Niger", family = "nic-vco", wikidata_item = 34177}
+
+m["alv-wan"] = {canonicalName = "Wara-Natyoro", family = "alv-sav", wikidata_item = 7968830}
+
+m["alv-wjk"] = {canonicalName = "Waja-Kam", family = "alv-ada"}
+
+m["alv-yek"] = {canonicalName = "Yekhee", family = "alv-nce"}
+
+m["alv-yor"] = {canonicalName = "Yoruboid", family = "alv-von", wikidata_item = 1789745}
+
+m["alv-yun"] = {canonicalName = "Yungur", aliases = {"Bena-Mboi"}, family = "alv-bam"}
+
+m["apa"] = {canonicalName = "Apachean", aliases = {"Southern Athabaskan"}, family = "ath", wikidata_item = 27758}
+
+m["aqa"] = {canonicalName = "Alacalufan", wikidata_item = 1288430}
+
+m["aql"] = {canonicalName = "Algic", aliases = {"Algonquian-Ritwan", "Algonquian-Wiyot-Yurok"}, wikidata_item = 721612}
+
+m["art"] = {canonicalName = "constructed", aliases = {"artificial", "planned"}, family = "qfa-not", wikidata_item = 33215}
+
+m["ath"] = {canonicalName = "Athabaskan", family = "xnd", wikidata_item = 27475}
+
+m["ath-nor"] = {canonicalName = "North Athabaskan", aliases = {"Northern Athabaskan"}, family = "ath"}
+
+m["ath-pco"] = {canonicalName = "Pacific Coast Athabaskan", family = "ath", wikidata_item = 20654}
+
+m["auf"] = {canonicalName = "Arauan", aliases = {"Arahuan", "Arauán", "Arawa", "Arawan", "Arawán"}, wikidata_item = 626772}
+
+--[=[
+	Exceptional language and family codes for Australian Aboriginal languages
+	can use the prefix "aus-", though "aus" is no longer itself a family code.
+]=] --
+m["aus-arn"] = {canonicalName = "Arnhem", aliases = {"Gunwinyguan", "Macro-Gunwinyguan"}, wikidata_item = 2581700}
+
+m["aus-bub"] = {canonicalName = "Bunuban", aliases = {"Bunaban"}, wikidata_item = 2495148}
+
+m["aus-cww"] = {canonicalName = "Central New South Wales", family = "aus-pam", wikidata_item = 5061507}
+
+m["aus-dal"] = {canonicalName = "Daly", wikidata_item = 2478079}
+
+m["aus-dyb"] = {canonicalName = "Dyirbalic", family = "aus-pam", wikidata_item = 1850666}
+
+m["aus-gar"] = {canonicalName = "Garawan", wikidata_item = 5521951}
+
+m["aus-gun"] = {canonicalName = "Gunwinyguan", aliases = {"Gunwingguan"}, family = "aus-arn", wikidata_item = 2581700}
+
+m["aus-jar"] = {canonicalName = "Jarrakan", wikidata_item = 2039423}
+
+m["aus-kar"] = {canonicalName = "Karnic", family = "aus-pam", wikidata_item = 4215578}
+
+m["aus-mir"] = {canonicalName = "Mirndi", wikidata_item = 4294095}
+
+m["aus-nga"] = {canonicalName = "Ngayarda", family = "aus-psw", wikidata_item = 16153490}
+
+m["aus-nyu"] = {canonicalName = "Nyulnyulan", wikidata_item = 2039408}
+
+m["aus-pam"] = {canonicalName = "Pama-Nyungan", wikidata_item = 33942}
+
+m["aus-pmn"] = {canonicalName = "Paman", family = "aus-pam", wikidata_item = 2640654}
+
+m["aus-psw"] = {canonicalName = "Southwest Pama-Nyungan", family = "aus-pam", wikidata_item = 2258160}
+
+m["aus-rnd"] = {canonicalName = "Arandic", family = "aus-pam", wikidata_item = 4784071}
+
+m["aus-tnk"] = {canonicalName = "Tangkic", wikidata_item = 1823065}
+
+m["aus-wdj"] = {canonicalName = "Iwaidjan", aliases = {"Yiwaidjan"}, wikidata_item = 4196968}
+
+m["aus-wor"] = {canonicalName = "Worrorran", wikidata_item = 2038619}
+
+m["aus-yid"] = {canonicalName = "Yidinyic", family = "aus-pam", wikidata_item = 4205849}
+
+m["aus-yng"] = {canonicalName = "Yangmanic", wikidata_item = 42727644}
+
+m["aus-yol"] = {canonicalName = "Yolngu", aliases = {"Yolŋu", "Yolngu Matha"}, family = "aus-pam", wikidata_item = 2511254}
+
+m["aus-yuk"] = {canonicalName = "Yuin-Kuric", family = "aus-pam", wikidata_item = 3833021}
+
+m["awd"] = {canonicalName = "Arawakan", aliases = {"Arawak", "Maipurean", "Maipuran"}, wikidata_item = 626753}
+
+m["awd-nwk"] = {canonicalName = "Nawiki", aliases = {"Newiki"}, family = "awd"}
+
+m["awd-taa"] = {canonicalName = "Ta-Arawakan", aliases = {"Ta-Arawak", "Ta-Maipurean"}, family = "awd", wikidata_item = 7672731}
+
+m["azc"] = {canonicalName = "Uto-Aztecan", aliases = {"Uto-Aztekan"}, wikidata_item = 34073}
+
+m["azc-cup"] = {canonicalName = "Cupan", family = "azc-tak", wikidata_item = 19866871}
+
+m["azc-nah"] = {canonicalName = "Nahuan", aliases = {"Aztecan"}, family = "azc", wikidata_item = 11965602}
+
+m["azc-num"] = {canonicalName = "Numic", family = "azc", wikidata_item = 2657541}
+
+m["azc-tak"] = {canonicalName = "Takic", family = "azc", wikidata_item = 1280305}
+
+m["azc-trc"] = {canonicalName = "Taracahitic", aliases = {"Taracahitan"}, family = "azc", wikidata_item = 4245032}
+
+m["bad"] = {canonicalName = "Banda", family = "nic-ubg", wikidata_item = 806234}
+
+m["bad-cnt"] = {canonicalName = "Central Banda", family = "bad", wikidata_item = 3438391}
+
+m["bai"] = {canonicalName = "Bamileke", family = "nic-gre", wikidata_item = 806005}
+
+m["bat"] = {canonicalName = "Baltic", family = "ine-bsl", wikidata_item = 33136}
+
+m["ber"] = {canonicalName = "Berber", aliases = {"Tamazight"}, family = "afa", wikidata_item = 25448}
+
+m["bnt"] = {canonicalName = "Bantu", family = "nic-bds", wikidata_item = 33146}
+
+m["bnt-baf"] = {canonicalName = "Bafia", family = "bnt", wikidata_item = 799784}
+
+m["bnt-bbo"] = {canonicalName = "Bafo-Bonkeng", family = "bnt-saw"}
+
+m["bnt-bdz"] = {canonicalName = "Boma-Dzing", family = "bnt", wikidata_item = 1729203}
+
+m["bnt-bek"] = {canonicalName = "Bekwilic", family = "bnt-ndb"}
+
+m["bnt-bki"] = {canonicalName = "Bena-Kinga", family = "bnt-bne", wikidata_item = 16113307}
+
+m["bnt-bmo"] = {canonicalName = "Bangi-Moi", family = "bnt-bnm"}
+
+m["bnt-bne"] = {canonicalName = "Northeast Bantu", family = "bnt", wikidata_item = 7057832}
+
+m["bnt-bnm"] = {canonicalName = "Bangi-Ntomba", family = "bnt-bte", wikidata_item = 806477}
+
+m["bnt-boa"] = {canonicalName = "Boan", aliases = {"Buan", "Ababuan"}, family = "bnt", wikidata_item = 4931250}
+
+m["bnt-bot"] = {canonicalName = "Botatwe", family = "bnt", wikidata_item = 4948532}
+
+m["bnt-bsa"] = {canonicalName = "Basaa", family = "bnt", wikidata_item = 809739}
+
+m["bnt-bsh"] = {canonicalName = "Bushoong", family = "bnt-bte", wikidata_item = 5001551}
+
+m["bnt-bso"] = {canonicalName = "Southern Bantu", family = "bnt", wikidata_item = 980498}
+
+m["bnt-bta"] = {canonicalName = "Bati-Angba", otherNames = {"Late Bomokandian"}, aliases = {"Bwa"}, family = "bnt-boa", wikidata_item = 4869303}
+
+m["bnt-btb"] = {canonicalName = "Beti", family = "bnt", wikidata_item = 35118}
+
+m["bnt-bte"] = {canonicalName = "Bangi-Tetela", family = "bnt", wikidata_item = 4855181}
+
+m["bnt-bun"] = {canonicalName = "Buja-Ngombe", family = "bnt-mbb", wikidata_item = 4986733}
+
+m["bnt-chg"] = {canonicalName = "Chaga", family = "bnt-cht", wikidata_item = 33016}
+
+m["bnt-cht"] = {canonicalName = "Chaga-Taita", family = "bnt-bne"}
+
+m["bnt-clu"] = {canonicalName = "Chokwe-Luchazi", family = "bnt", wikidata_item = 3339273}
+
+m["bnt-com"] = {canonicalName = "Comorian", family = "bnt-sab", wikidata_item = 33077}
+
+m["bnt-glb"] = {canonicalName = "Great Lakes Bantu", family = "bnt-bne", wikidata_item = 5599420}
+
+m["bnt-haj"] = {canonicalName = "Haya-Jita", family = "bnt-glb", wikidata_item = 25502360}
+
+m["bnt-kak"] = {canonicalName = "Kako", family = "bnt-pob"}
+
+m["bnt-kav"] = {canonicalName = "Kavango", family = "bnt-ksb"}
+
+m["bnt-kbi"] = {canonicalName = "Komo-Bira", family = "bnt-boa", wikidata_item = 6428591}
+
+m["bnt-kel"] = {canonicalName = "Kele", aliases = {"Sheke"}, family = "bnt-kts", wikidata_item = 1738162}
+
+m["bnt-kil"] = {canonicalName = "Kilombero", family = "bnt", wikidata_item = 6408121}
+
+m["bnt-kka"] = {canonicalName = "Kikuyu-Kamba", aliases = {"Thagiicu"}, family = "bnt-bne", wikidata_item = 18419596}
+
+m["bnt-kmb"] = {canonicalName = "Kimbundu", family = "bnt", wikidata_item = 16947687}
+
+m["bnt-kng"] = {canonicalName = "Kongo", family = "bnt", wikidata_item = 6429214}
+
+m["bnt-kpw"] = {canonicalName = "Kpwe", family = "bnt-saw"}
+
+m["bnt-ksb"] = {canonicalName = "Kavango-Southwest Bantu", family = "bnt", wikidata_item = 6379098}
+
+m["bnt-kts"] = {canonicalName = "Kele-Tsogo", family = "bnt", wikidata_item = 6385577}
+
+m["bnt-lbn"] = {canonicalName = "Luban", family = "bnt", wikidata_item = 4536504}
+
+m["bnt-leb"] = {canonicalName = "Lebonya", family = "bnt", wikidata_item = 6511395}
+
+m["bnt-lgb"] = {canonicalName = "Lega-Binja", family = "bnt", wikidata_item = 6517694}
+
+m["bnt-lok"] = {canonicalName = "Logooli-Kuria", family = "bnt-glb"}
+
+m["bnt-lub"] = {canonicalName = "Luba", family = "bnt-lbn"}
+
+m["bnt-lun"] = {canonicalName = "Lunda", family = "bnt", wikidata_item = 6704091}
+
+m["bnt-mak"] = {canonicalName = "Makua", aliases = {"Makhuwa"}, family = "bnt-bso", wikidata_item = 6740431}
+
+m["bnt-mbb"] = {canonicalName = "Mboshi-Buja", family = "bnt", wikidata_item = 6799764}
+
+m["bnt-mbe"] = {canonicalName = "Mbole-Enya", family = "bnt", wikidata_item = 6799728}
+
+m["bnt-mbi"] = {canonicalName = "Mbinga", family = "bnt-rur"}
+
+m["bnt-mbo"] = {canonicalName = "Mboshi", family = "bnt-mbb", wikidata_item = 6799763}
+
+m["bnt-mbt"] = {canonicalName = "Mbete", family = "bnt-tmb", wikidata_item = 1346910, aliases = {"Mbere"}}
+
+m["bnt-mby"] = {canonicalName = "Mbeya", family = "bnt-ruk"}
+
+m["bnt-mij"] = {canonicalName = "Mijikenda", family = "bnt-sab", wikidata_item = 6845474}
+
+m["bnt-mka"] = {canonicalName = "Makaa", family = "bnt-ndb"}
+
+m["bnt-mne"] = {canonicalName = "Manenguba", aliases = {"Mbo", "Ngoe"}, family = "bnt", wikidata_item = 31147471}
+
+m["bnt-mnj"] = {canonicalName = "Makaa-Njem", family = "bnt-pob", wikidata_item = 1603899}
+
+m["bnt-mon"] = {canonicalName = "Mongo", family = "bnt-bnm"}
+
+m["bnt-mra"] = {canonicalName = "Mbugwe-Rangi", family = "bnt", wikidata_item = 6799795}
+
+m["bnt-msl"] = {canonicalName = "Masaba-Luhya", family = "bnt-glb", wikidata_item = 12636428}
+
+m["bnt-mwi"] = {canonicalName = "Mwika", family = "bnt-ruk"}
+
+m["bnt-ncb"] = {canonicalName = "Northeast Coast Bantu", family = "bnt-bne", wikidata_item = 7057848}
+
+m["bnt-ndb"] = {canonicalName = "Ndzem-Bomwali", family = "bnt-mnj"}
+
+m["bnt-ngn"] = {canonicalName = "Ngondi-Ngiri", family = "bnt-mbb", wikidata_item = 7022532}
+
+m["bnt-ngu"] = {canonicalName = "Nguni", aliases = {"Ngoni"}, family = "bnt-bso", wikidata_item = 961559}
+
+m["bnt-nya"] = {canonicalName = "Nyali", family = "bnt-leb", wikidata_item = 7070832}
+
+m["bnt-nyb"] = {canonicalName = "Nyanga-Buyi", family = "bnt", wikidata_item = 7070882}
+
+m["bnt-nyg"] = {canonicalName = "Nyoro-Ganda", family = "bnt-glb", wikidata_item = 12638666}
+
+m["bnt-nys"] = {canonicalName = "Nyasa", family = "bnt", wikidata_item = 7070921}
+
+m["bnt-nze"] = {canonicalName = "Nzebi", family = "bnt-tmb", wikidata_item = 1755498, aliases = {"Njebi"}}
+
+m["bnt-ova"] = {canonicalName = "Ovambo", aliases = {"Oshivambo", "Oshiwambo", "Owambo"}, family = "bnt-swb", wikidata_item = 36489}
+
+m["bnt-par"] = {canonicalName = "Pare", family = "bnt-ncb"}
+
+m["bnt-pen"] = {canonicalName = "Pende", family = "bnt", wikidata_item = 7162373}
+
+m["bnt-pob"] = {canonicalName = "Pomo-Bomwali", family = "bnt"}
+
+m["bnt-ruk"] = {canonicalName = "Rukwa", family = "bnt", wikidata_item = 7378902}
+
+m["bnt-run"] = {canonicalName = "Rungwe", family = "bnt-ruk"}
+
+m["bnt-rur"] = {canonicalName = "Rufiji-Ruvuma", family = "bnt", wikidata_item = 7377947}
+
+m["bnt-ruv"] = {canonicalName = "Ruvu", family = "bnt-ncb"}
+
+m["bnt-rvm"] = {canonicalName = "Ruvuma", family = "bnt-rur"}
+
+m["bnt-sab"] = {canonicalName = "Sabaki", family = "bnt-ncb", wikidata_item = 2209395}
+
+m["bnt-saw"] = {canonicalName = "Sawabantu", family = "bnt", wikidata_item = 532003}
+
+m["bnt-sbi"] = {canonicalName = "Sabi", family = "bnt", wikidata_item = 7396071}
+
+m["bnt-seu"] = {canonicalName = "Seuta", family = "bnt-ncb"}
+
+m["bnt-shh"] = {canonicalName = "Shi-Havu", family = "bnt-glb"}
+
+m["bnt-sho"] = {canonicalName = "Shona", family = "bnt", wikidata_item = 2904660}
+
+m["bnt-sir"] = {canonicalName = "Sira", family = "bnt", wikidata_item = 1436372, aliases = {"Shira-Punu"}}
+
+m["bnt-ske"] = {canonicalName = "Soko-Kele", family = "bnt-bte"}
+
+m["bnt-sna"] = {canonicalName = "Sena", family = "bnt-nys"}
+
+m["bnt-sts"] = {canonicalName = "Sotho-Tswana", family = "bnt-bso", wikidata_item = 2038386}
+
+m["bnt-swb"] = {canonicalName = "Southwest Bantu", family = "bnt-ksb"}
+
+m["bnt-swh"] = {canonicalName = "Swahili", family = "bnt-sab"}
+
+m["bnt-tek"] = {canonicalName = "Teke", family = "bnt-tmb", wikidata_item = 36528}
+
+m["bnt-tet"] = {canonicalName = "Tetela", family = "bnt-bte", wikidata_item = 7706059}
+
+m["bnt-tkc"] = {canonicalName = "Central Teke", family = "bnt-tek"}
+
+m["bnt-tkm"] = {canonicalName = "Takama", family = "bnt-bne"}
+
+m["bnt-tmb"] = {canonicalName = "Teke-Mbede", family = "bnt", wikidata_item = 7695332, aliases = {"Teke-Mbere"}}
+
+m["bnt-tso"] = {
+    canonicalName = "Tsogo",
+    otherNames = {"Okani"}, -- appears to be an alias in Glottolog
+    family = "bnt-kts",
+    wikidata_item = 2458420
+}
+
+m["bnt-tsr"] = {canonicalName = "Tswa-Ronga", family = "bnt-bso", wikidata_item = 12643962}
+
+m["bnt-yak"] = {canonicalName = "Yaka", family = "bnt", wikidata_item = 8047027}
+
+m["bnt-yko"] = {canonicalName = "Yasa-Kombe", family = "bnt-saw"}
+
+m["bnt-zbi"] = {canonicalName = "Zamba-Binza", family = "bnt-bnm"}
+
+m["btk"] = {canonicalName = "Batak", family = "poz-nws", wikidata_item = 1998595}
+
+--[=[
+	Exceptional language and family codes for Central American Indian languages
+	may use the prefix "cai-", though "cai" is no longer itself a family code.
+]=] --
+
+--[=[
+	Exceptional language and family codes for Caucasian languages can use
+	the prefix "cau-", though "cau" is no longer itself a family code.
+]=] --
+
+m["cau-abz"] = {canonicalName = "Abkhaz-Abaza", otherNames = {"Abkhaz-Tapanta"}, aliases = {"Abazgi"}, family = "cau-nwc", wikidata_item = 4663617}
+
+m["cau-ava"] = {canonicalName = "Avaro-Andian", otherNames = {"Andian"}, aliases = {"Avar-Andian", "Avar-Andi", "Avar-Andic"}, family = "cau-nec", wikidata_item = 4827766}
+
+m["cau-cir"] = {canonicalName = "Circassian", aliases = {"Cherkess"}, family = "cau-nwc", wikidata_item = 858543}
+
+m["cau-drg"] = {canonicalName = "Dargwa", otherNames = {"Dargin"}, family = "cau-nec", wikidata_item = 5222637}
+
+m["cau-lzg"] = {canonicalName = "Lezghian", aliases = {"Lezgi", "Lezgian", "Lezgic"}, family = "cau-nec", wikidata_item = 2144370}
+
+m["cau-nkh"] = {canonicalName = "Nakh", aliases = {"North-Central Caucasian"}, family = "cau-nec", wikidata_item = 24441}
+
+m["cau-nec"] = {canonicalName = "Northeast Caucasian", aliases = {"Dagestanian", "Nakho-Dagestanian", "Caspian"}, family = "ccn", wikidata_item = 27387}
+
+m["cau-nwc"] = {canonicalName = "Northwest Caucasian", aliases = {"Abkhazo-Adyghean", "Abkhaz-Adyghe", "Pontic"}, family = "ccn", wikidata_item = 33852}
+
+m["cau-tsz"] = {canonicalName = "Tsezian", aliases = {"Tsezic", "Didoic"}, family = "cau-nec", wikidata_item = 1651530}
+
+m["cau-vay"] = {canonicalName = "Vainakh", aliases = {"Veinakh", "Vaynakh"}, family = "cau-nkh", wikidata_item = 4102486}
+
+m["cba"] = {
+    canonicalName = "Chibchan",
+    family = "qfa-mch", -- or none if Macro-Chibchan is considered undemonstrated
+    wikidata_item = 520478
+}
+
+m["ccn"] = {canonicalName = "North Caucasian", wikidata_item = 33732}
+
+m["ccs"] = {canonicalName = "Kartvelian", aliases = {"South Caucasian"}, wikidata_item = 34030}
+
+m["ccs-gzn"] = {canonicalName = "Georgian-Zan", aliases = {"Karto-Zan"}, family = "ccs", wikidata_item = 34030}
+
+m["ccs-zan"] = {canonicalName = "Zan", aliases = {"Zanuri", "Colchian"}, family = "ccs-gzn", wikidata_item = 2606912}
+
+m["cdc"] = {canonicalName = "Chadic", family = "afa", wikidata_item = 33184}
+
+m["cdc-cbm"] = {canonicalName = "Central Chadic", aliases = {"Biu-Mandara"}, family = "cdc", wikidata_item = 2251547}
+
+m["cdc-est"] = {canonicalName = "East Chadic", family = "cdc", wikidata_item = 2276221}
+
+m["cdc-mas"] = {canonicalName = "Masa", family = "cdc", wikidata_item = 2136092}
+
+m["cdc-wst"] = {canonicalName = "West Chadic", family = "cdc", wikidata_item = 2447774}
+
+m["cdd"] = {canonicalName = "Caddoan", wikidata_item = 1025090}
+
+m["cel"] = {canonicalName = "Celtic", family = "ine", wikidata_item = 25293}
+
+m["cel-bry"] = {canonicalName = "Brythonic", family = "cel", wikidata_item = 156877}
+
+m["cel-gae"] = {canonicalName = "Goidelic", aliases = {"Gaelic"}, protoLanguage = "pgl", family = "cel", wikidata_item = 56433}
+
+m["chi"] = {canonicalName = "Chimakuan", wikidata_item = 1073088}
+
+m["cmc"] = {canonicalName = "Chamic", family = "poz-mcm", wikidata_item = 2997506}
+
+m["crp"] = {canonicalName = "creole or pidgin", family = "qfa-not"}
+
+m["csu"] = {canonicalName = "Central Sudanic", family = "ssa", wikidata_item = 190822}
+
+m["csu-bba"] = {canonicalName = "Bongo-Bagirmi", family = "csu", wikidata_item = 3505042}
+
+m["csu-bbk"] = {canonicalName = "Bongo-Baka", family = "csu-bba", wikidata_item = 4941917}
+
+m["csu-bgr"] = {canonicalName = "Bagirmi", family = "csu-bba", wikidata_item = 4841948, aliases = {"Bagirmic"}}
+
+m["csu-bkr"] = {canonicalName = "Birri-Kresh", family = "csu"}
+
+m["csu-ecs"] = {canonicalName = "Eastern Central Sudanic", family = "csu", wikidata_item = 16911698, aliases = {"East Central Sudanic", "Central Sudanic East", "Lendu-Mangbetu"}}
+
+m["csu-kab"] = {canonicalName = "Kaba", family = "csu-bba", wikidata_item = 6343715}
+
+m["csu-lnd"] = {canonicalName = "Lendu", family = "csu-ecs", wikidata_item = 6522357, aliases = {"Lenduic"}}
+
+m["csu-maa"] = {canonicalName = "Mangbetu", family = "csu-ecs", wikidata_item = 6748874, aliases = {"Mangbetu-Asoa", "Mangbetu-Asua"}}
+
+m["csu-mle"] = {canonicalName = "Mangbutu-Lese", family = "csu-ecs", wikidata_item = 17009406, aliases = {"Mangbutu–Efe", "Mangbutu", "Membi-Mangbutu-Efe"}}
+
+m["csu-mma"] = {canonicalName = "Moru-Madi", family = "csu-ecs", wikidata_item = 6915156}
+
+m["csu-sar"] = {canonicalName = "Sara", family = "csu-bba", wikidata_item = 2036691}
+
+m["csu-val"] = {canonicalName = "Vale", family = "csu-bba", wikidata_item = 7909520}
+
+m["cus"] = {canonicalName = "Cushitic", family = "afa", wikidata_item = 33248}
+
+m["day"] = {canonicalName = "Land Dayak", family = "poz-bop", wikidata_item = 2760613}
+
+m["del"] = {canonicalName = "Lenape", aliases = {"Delaware"}, family = "alg-eas", wikidata_item = 2665761}
+
+m["dmn"] = {canonicalName = "Mande", family = "nic", wikidata_item = 33681}
+
+m["dmn-bbu"] = {canonicalName = "Bisa-Busa", family = "dmn-mde", wikidata_item = 12627956}
+
+m["dmn-emn"] = {canonicalName = "East Manding", family = "dmn-man"}
+
+m["dmn-jje"] = {canonicalName = "Jogo-Jeri", family = "dmn-mjo"}
+
+m["dmn-man"] = {canonicalName = "Manding", family = "dmn-mmo", wikidata_item = 35772}
+
+m["dmn-mda"] = {canonicalName = "Mano-Dan", family = "dmn-mse"}
+
+m["dmn-mdc"] = {canonicalName = "Central Mande", family = "dmn-mdw", wikidata_item = 5972907}
+
+m["dmn-mde"] = {canonicalName = "Eastern Mande", family = "dmn", wikidata_item = 12633080}
+
+m["dmn-mdw"] = {canonicalName = "Western Mande", family = "dmn", wikidata_item = 16113831}
+
+m["dmn-mjo"] = {canonicalName = "Manding-Jogo", family = "dmn-mdc", wikidata_item = 12636153}
+
+m["dmn-mmo"] = {canonicalName = "Manding-Mokole", family = "dmn-mva"}
+
+m["dmn-mnk"] = {canonicalName = "Maninka", family = "dmn-emn"}
+
+m["dmn-mnw"] = {canonicalName = "Northwestern Mande", family = "dmn-mdw", wikidata_item = 5972910}
+
+m["dmn-mok"] = {canonicalName = "Mokole", family = "dmn-mmo", wikidata_item = 16935447}
+
+m["dmn-mse"] = {canonicalName = "Southeastern Mande", family = "dmn-mde", wikidata_item = 5972912}
+
+m["dmn-msw"] = {canonicalName = "Southwestern Mande", family = "dmn-mdw", wikidata_item = 12633904}
+
+m["dmn-mva"] = {canonicalName = "Manding-Vai", family = "dmn-mjo"}
+
+m["dmn-nbe"] = {canonicalName = "Nwa-Beng", family = "dmn-mse"}
+
+m["dmn-sam"] = {canonicalName = "Samo", family = "dmn-bbu", aliases = {"Samuic"}, wikidata_item = 36327}
+
+m["dmn-smg"] = {canonicalName = "Samogo", family = "dmn-mnw", wikidata_item = 7410000, aliases = {"Duun-Seenku"}}
+
+m["dmn-snb"] = {canonicalName = "Soninke-Bobo", family = "dmn-mnw", wikidata_item = 16111680}
+
+m["dmn-sya"] = {canonicalName = "Susu-Yalunka", family = "dmn-mdc"}
+
+m["dmn-vak"] = {canonicalName = "Vai-Kono", family = "dmn-mva"}
+
+m["dmn-wmn"] = {canonicalName = "West Manding", family = "dmn-man"}
+
+m["dra"] = {canonicalName = "Dravidian", wikidata_item = 33311}
+
+m["egx"] = {canonicalName = "Egyptian", protoLanguage = "egy", family = "afa", wikidata_item = 50868}
+
+m["esx"] = {canonicalName = "Eskimo-Aleut", wikidata_item = 25946}
+
+m["esx-esk"] = {canonicalName = "Eskimo", family = "esx", wikidata_item = 25946}
+
+m["esx-inu"] = {canonicalName = "Inuit", family = "esx-esk", wikidata_item = 27796}
+
+m["euq"] = {canonicalName = "Vasconic", wikidata_item = 4669240}
+
+m["fiu-fin"] = {canonicalName = "Finnic", family = "urj", wikidata_item = 33328}
+
+m["gem"] = {canonicalName = "Germanic", family = "ine", wikidata_item = 21200}
+
+m["gme"] = {canonicalName = "East Germanic", family = "gem", wikidata_item = 108662}
+
+m["gmq"] = {canonicalName = "North Germanic", family = "gem", wikidata_item = 106085}
+
+m["gmw"] = {canonicalName = "West Germanic", family = "gem", wikidata_item = 26721}
+
+m["gmw-fri"] = {canonicalName = "Frisian", protoLanguage = "ofs", family = "gmw", wikidata_item = 25325}
+
+m["grk"] = {canonicalName = "Hellenic", aliases = {"Greek"}, family = "ine", wikidata_item = 2042538}
+
+m["him"] = {canonicalName = "Western Pahari", aliases = {"Himachali"}, family = "inc-pah", wikidata_item = 12645574}
+
+m["hmn"] = {canonicalName = "Hmong", family = "hmx", wikidata_item = 3307894}
+
+m["hmx"] = {canonicalName = "Hmong-Mien", aliases = {"Miao-Yao"}, wikidata_item = 33322}
+
+m["hmx-mie"] = {canonicalName = "Mien", family = "hmx", wikidata_item = 7992695}
+
+m["hok"] = {canonicalName = "Hokan", wikidata_item = 33406}
+
+m["hyx"] = {canonicalName = "Armenian", family = "ine", wikidata_item = 8785}
+
+m["iir"] = {canonicalName = "Indo-Iranian", family = "ine", wikidata_item = 33514}
+
+m["iir-nur"] = {canonicalName = "Nuristani", family = "iir", wikidata_item = 161804}
+
+m["nur-nor"] = {canonicalName = "Northern Nuristani", family = "iir-nur"}
+
+m["nur-sou"] = {canonicalName = "Southern Nuristani", family = "iir-nur"}
+
+m["ijo"] = {
+    canonicalName = "Ijoid",
+    otherNames = {"Ijaw"}, -- Ijaw may be a subfamily
+    family = "nic",
+    wikidata_item = 1325759
+}
+
+m["inc"] = {canonicalName = "Indo-Aryan", aliases = {"Indic"}, family = "iir", wikidata_item = 33577}
+
+m["inc-bhi"] = {canonicalName = "Bhil", family = "inc-cen", wikidata_item = 4901727}
+
+m["inc-cen"] = {canonicalName = "Central Indo-Aryan", family = "inc-psu", wikidata_item = 10979187, protoLanguage = "inc-cen-pro"}
+
+m["inc-dar"] = {canonicalName = "Dardic", family = "inc-old", wikidata_item = 161101}
+
+m["inc-eas"] = {canonicalName = "Eastern Indo-Aryan", family = "inc", wikidata_item = 16590069, protoLanguage = "inc-mgd"}
+
+m["inc-hie"] = {canonicalName = "Eastern Hindi", aliases = {"Purabiyā"}, family = "inc", wikidata_item = 4126648, protoLanguage = "inc-pka"}
+
+m["inc-hiw"] = {canonicalName = "Western Hindi", family = "inc-cen", wikidata_item = 12600937, protoLanguage = "inc-sap"}
+
+m["inc-hnd"] = {canonicalName = "Hindustani", aliases = {"Hindi-Urdu"}, family = "inc-hiw", wikidata_item = 11051}
+
+m["inc-ins"] = {canonicalName = "Insular Indo-Aryan", family = "inc", protoLanguage = "elu-prk"}
+
+m["inc-mid"] = {canonicalName = "Middle Indo-Aryan", aliases = {"Middle Indic"}, family = "inc", wikidata_item = 3236316}
+
+m["inc-nwe"] = {canonicalName = "Northwestern Indo-Aryan", family = "inc-psu", wikidata_item = 41355020, protoLanguage = "inc-psc"}
+
+m["inc-nor"] = {canonicalName = "Northern Indo-Aryan", family = "inc", wikidata_item = 12642170, protoLanguage = "inc-kha"}
+
+m["inc-old"] = {canonicalName = "Old Indo-Aryan", aliases = {"Old Indic"}, family = "inc", protoLanguage = "sa"}
+
+m["inc-pah"] = {canonicalName = "Pahari", aliases = {"Pahadi"}, family = "inc-nor", wikidata_item = 946077}
+
+m["inc-pan"] = {canonicalName = "Punjabi-Lahnda", family = "inc-nwe", protoLanguage = "inc-tak"}
+
+m["inc-psu"] = {canonicalName = "Sauraseni Prakrit", aliases = {"Sauraseni", "Shauraseni"}, family = "pra"}
+
+m["inc-rom"] = {canonicalName = "Romani", aliases = {"Romany", "Gypsy", "Gipsy"}, protoLanguage = "rom", family = "inc-psu", wikidata_item = 13201}
+
+m["inc-snd"] = {canonicalName = "Sindhi", family = "inc-nwe", wikidata_item = 7522212, protoLanguage = "inc-vra"}
+
+m["inc-sou"] = {canonicalName = "Southern Indo-Aryan", family = "inc", wikidata_item = 12179304, protoLanguage = "pmh"}
+
+m["inc-wes"] = {canonicalName = "Western Indo-Aryan", family = "inc-psu", protoLanguage = "inc-gup"}
+
+m["ine"] = {canonicalName = "Indo-European", aliases = {"Indo-Germanic"}, wikidata_item = 19860}
+
+m["ine-ana"] = {canonicalName = "Anatolian", family = "ine", wikidata_item = 147085}
+
+m["ine-bsl"] = {canonicalName = "Balto-Slavic", family = "ine", wikidata_item = 147356}
+
+m["ine-toc"] = {canonicalName = "Tocharian", aliases = {"Tokharian"}, family = "ine", wikidata_item = 37029}
+
+m["ira"] = {canonicalName = "Iranian", family = "iir", wikidata_item = 33527}
+
+m["ira-csp"] = {canonicalName = "Caspian", family = "ira-mpr"}
+
+m["ira-cen"] = {canonicalName = "Central Iranian", family = "ira"}
+
+m["xme"] = {canonicalName = "Median", family = "ira-mpr", protoLanguage = "xme-old"}
+
+m["ira-mny"] = {canonicalName = "Munji-Yidgha", aliases = {"Yidgha-Munji"}, family = "ira-sym"}
+
+m["ira-msh"] = {canonicalName = "Mazanderani-Shahmirzadi", family = "ira-csp"}
+
+m["ira-nei"] = {canonicalName = "Northeastern Iranian", family = "ira"}
+
+m["ira-nwi"] = {canonicalName = "Northwestern Iranian", family = "ira-wes"}
+
+m["ira-orp"] = {canonicalName = "Ormuri-Parachi", family = "ira-sei"}
+
+m["ira-pat"] = {canonicalName = "Pathan", family = "ira-sei", protoLanguage = "ira-pat-pro"}
+
+m["ira-sbc"] = {canonicalName = "Sogdo-Bactrian", family = "ira-nei"}
+
+m["xsc"] = {canonicalName = "Scythian", family = "ira-nei"}
+
+m["xsc-skw"] = {canonicalName = "Saka-Wakhi", family = "xsc"}
+
+m["xsc-sak"] = {canonicalName = "Sakan", aliases = {"Saka"}, family = "xsc-skw"}
+
+m["ira-mpr"] = {canonicalName = "Medo-Parthian", aliases = {"Partho-Median"}, family = "ira-nwi"}
+
+m["ira-kms"] = {canonicalName = "Komisenian", aliases = {"Semnani"}, family = "ira-mpr"}
+
+m["ira-sgi"] = {canonicalName = "Sanglechi-Ishkashimi", family = "ira-sei"}
+
+m["ira-shy"] = {canonicalName = "Shughni-Yazghulami", family = "ira-sym"}
+
+m["ira-sgc"] = {canonicalName = "Sogdic", aliases = {"Sogdian"}, family = "ira-sbc"}
+
+m["ira-sei"] = {canonicalName = "Southeastern Iranian", family = "ira"}
+
+m["ira-swi"] = {canonicalName = "Southwestern Iranian", family = "ira-wes"}
+
+m["ira-sym"] = {canonicalName = "Shughni-Yazghulami-Munji", family = "ira-sei"}
+
+m["xme-ttc"] = {canonicalName = "Tatic", family = "xme", protoLanguage = "xme-ttc-pro"}
+
+m["ira-wes"] = {canonicalName = "Western Iranian", family = "ira", wikidata_item = 129850}
+
+m["ira-zgr"] = {canonicalName = "Zaza-Gorani", aliases = {"Zaza-Gurani", "Gorani-Zaza"}, family = "ira-mpr"}
+
+m["iro"] = {canonicalName = "Iroquoian", wikidata_item = 33623}
+
+m["itc"] = {canonicalName = "Italic", family = "ine", wikidata_item = 131848}
+
+m["jpx"] = {canonicalName = "Japonic", aliases = {"Japanese", "Japanese-Ryukyuan"}, wikidata_item = 33612}
+
+m["jpx-ryu"] = {canonicalName = "Ryukyuan", family = "jpx", wikidata_item = 56393}
+
+m["kar"] = {canonicalName = "Karen", family = "sit", wikidata_item = 1364815}
+
+--[=[
+	Exceptional language and family codes for Khoisan and Kordofanian languages can use
+	the prefix "khi-" and "kdo-" respectively, though they are no longer family codes themselves.
+]=] --
+
+m["khi-kal"] = {canonicalName = "Kalahari Khoe", family = "khi-kho"}
+
+m["khi-khk"] = {canonicalName = "Khoekhoe", family = "khi-kho"}
+
+m["khi-kkw"] = {canonicalName = "Khoe-Kwadi", aliases = {"Kwadi-Khoe"}, wikidata_item = 3833005}
+
+m["khi-kho"] = {canonicalName = "Khoe", aliases = {"Central Khoisan"}, family = "khi-kkw", wikidata_item = 2736449}
+
+m["khi-kxa"] = {canonicalName = "Kx'a", aliases = {"Kxa", "Ju-ǂHoan"}, wikidata_item = 6450587}
+
+m["khi-tuu"] = {canonicalName = "Tuu", aliases = {"Kwi", "Taa-Kwi", "Southern Khoisan", "Taa-ǃKwi", "Taa-ǃUi", "ǃUi-Taa"}, wikidata_item = 631046}
+
+m["kro"] = {canonicalName = "Kru", family = "nic-vco", wikidata_item = 33535}
+
+m["kro-aiz"] = {canonicalName = "Aizi", family = "kro", wikidata_item = 4699431}
+
+m["kro-bet"] = {canonicalName = "Bété", family = "kro-ekr", wikidata_item = 32956}
+
+m["kro-did"] = {canonicalName = "Dida", family = "kro-ekr", wikidata_item = 32685}
+
+m["kro-ekr"] = {canonicalName = "Eastern Kru", family = "kro", wikidata_item = 5972899}
+
+m["kro-grb"] = {canonicalName = "Grebo", family = "kro-wkr", wikidata_item = 5601537}
+
+m["kro-wee"] = {canonicalName = "Wee", family = "kro-wkr"}
+
+m["kro-wkr"] = {canonicalName = "Western Kru", family = "kro", wikidata_item = 5972897}
+
+m["ku"] = {canonicalName = "Kurdish", family = "ira-nwi", protoLanguage = "ku-pro", wikidata_item = 36368}
+
+m["map"] = {canonicalName = "Austronesian", wikidata_item = 49228}
+
+m["map-ata"] = {canonicalName = "Atayalic", family = "map", wikidata_item = 716610}
+
+m["mkh"] = {canonicalName = "Mon-Khmer", family = "aav", wikidata_item = 33199}
+
+m["mkh-asl"] = {canonicalName = "Aslian", family = "mkh", wikidata_item = 3111082}
+
+m["mkh-ban"] = {canonicalName = "Bahnaric", family = "mkh", wikidata_item = 56309}
+
+m["mkh-kat"] = {canonicalName = "Katuic", family = "mkh", wikidata_item = 56697}
+
+m["mkh-khm"] = {canonicalName = "Khmuic", family = "mkh", wikidata_item = 1323245}
+
+m["mkh-kmr"] = {canonicalName = "Khmeric", family = "mkh"}
+
+m["mkh-mnc"] = {canonicalName = "Monic", family = "mkh", wikidata_item = 3217497}
+
+m["mkh-mng"] = {canonicalName = "Mangic", family = "mkh", wikidata_item = 3509556}
+
+m["mkh-nbn"] = {canonicalName = "North Bahnaric", family = "mkh-ban", wikidata_item = 56309}
+
+m["mkh-pal"] = {canonicalName = "Palaungic", family = "mkh", wikidata_item = 2391173}
+
+m["mkh-pea"] = {canonicalName = "Pearic", family = "mkh", wikidata_item = 3073022}
+
+m["mkh-pkn"] = {canonicalName = "Pakanic", family = "mkh-mng"}
+
+m["mkh-vie"] = {canonicalName = "Vietic", family = "mkh", wikidata_item = 2355546}
+
+m["mno"] = {canonicalName = "Manobo", family = "phi", wikidata_item = 3217483}
+
+m["mun"] = {canonicalName = "Munda", family = "aav", wikidata_item = 33892}
+
+m["myn"] = {canonicalName = "Mayan", wikidata_item = 33738}
+
+--[=[
+	Exceptional language and family codes for North American Indian languages
+	can use the prefix "nai-", though "nai" is no longer itself a family code.
+]=] --
+m["nai-cat"] = {canonicalName = "Catawban", family = "nai-sca", wikidata_item = 3446638}
+
+m["nai-chu"] = {canonicalName = "Chumashan", wikidata_item = 1288420}
+
+m["nai-ckn"] = {canonicalName = "Chinookan", wikidata_item = 610586}
+
+m["nai-coo"] = {canonicalName = "Coosan", wikidata_item = 940278}
+
+m["nai-ker"] = {canonicalName = "Keresan", wikidata_item = 35878}
+
+m["nai-klp"] = {canonicalName = "Kalapuyan", wikidata_item = 1569040}
+
+m["nai-kta"] = {canonicalName = "Kiowa-Tanoan", wikidata_item = 386288}
+
+m["nai-len"] = {canonicalName = "Lencan", aliases = {"Lenca"}, wikidata_item = 36189}
+
+m["nai-mdu"] = {canonicalName = "Maiduan", wikidata_item = 33502}
+
+m["nai-miz"] = {canonicalName = "Mixe-Zoquean", aliases = {"Mixe-Zoque"}, wikidata_item = 954016}
+
+m["nai-min"] = {canonicalName = "Misumalpan", aliases = {"Misuluan", "Misumalpa"}, family = "qfa-mch", wikidata_item = 281693}
+
+m["nai-mus"] = {canonicalName = "Muskogean", aliases = {"Muskhogean"}, wikidata_item = 902978}
+
+m["nai-pak"] = {canonicalName = "Pakawan", family = "hok", wikidata_item = 65085487}
+
+m["nai-pal"] = {canonicalName = "Palaihnihan", wikidata_item = 1288332}
+
+m["nai-plp"] = {canonicalName = "Plateau Penutian", wikidata_item = 2307476}
+
+m["nai-pom"] = {canonicalName = "Pomoan", aliases = {"Pomo", "Kulanapan"}, family = "hok", wikidata_item = 2618420}
+
+m["nai-sca"] = {canonicalName = "Siouan-Catawban", wikidata_item = 34181}
+
+m["nai-shp"] = {canonicalName = "Sahaptian", wikidata_item = 114782, family = "nai-plp"}
+
+m["nai-shs"] = {canonicalName = "Shastan", family = "hok", wikidata_item = 2991735}
+
+m["nai-tot"] = {canonicalName = "Totozoquean", wikidata_item = 7828419}
+
+m["nai-ttn"] = {canonicalName = "Totonacan", aliases = {"Totonac-Tepehua", "Totonacan-Tepehuan"}, varieties = {"Totonac"}, wikidata_item = 34039}
+
+m["nai-tqn"] = {canonicalName = "Tequistlatecan", aliases = {"Tequistlatec", "Chontal", "Chontalan", "Oaxacan Chontal", "Chontal of Oaxaca"}, family = "hok", wikidata_item = 1754988}
+
+m["nai-tsi"] = {canonicalName = "Tsimshianic", wikidata_item = 34134}
+
+m["nai-utn"] = {canonicalName = "Utian", aliases = {"Miwok-Costanoan", "Mutsun"}, family = "nai-you", wikidata_item = 13371763}
+
+m["nai-wtq"] = {canonicalName = "Wintuan", aliases = {"Wintun"}, wikidata_item = 1294259}
+
+m["nai-xin"] = {canonicalName = "Xincan", aliases = {"Xinca"}, wikidata_item = 1546494}
+
+m["nai-yok"] = {canonicalName = "Yokutsan", aliases = {"Yokuts", "Mariposan", "Mariposa"}, family = "nai-you", wikidata_item = 34249}
+
+m["nai-you"] = {canonicalName = "Yok-Utian", wikidata_item = 2886186}
+
+m["nai-yuc"] = {canonicalName = "Yuman-Cochimí", wikidata_item = 579137}
+
+m["ngf"] = {canonicalName = "Trans-New Guinea", wikidata_item = 34018}
+
+m["ngf-fin"] = {canonicalName = "Finisterre", family = "ngf", wikidata_item = 5450373}
+
+m["ngf-mad"] = {canonicalName = "Madang", family = "ngf", wikidata_item = 11217556}
+
+m["ngf-okk"] = {canonicalName = "Ok", family = "ngf", wikidata_item = 7081687}
+
+m["ngf-sbh"] = {canonicalName = "South Bird's Head", family = "ngf", wikidata_item = 7566330}
+
+m["nic"] = {canonicalName = "Niger-Congo", aliases = {"Niger-Kordofanian"}, wikidata_item = 33838}
+
+m["nic-alu"] = {canonicalName = "Alumic", family = "nic-plt", wikidata_item = 4737355}
+
+m["nic-bas"] = {canonicalName = "Basa", family = "nic-knj", wikidata_item = 4866154}
+
+m["nic-bbe"] = {canonicalName = "Eastern Beboid", family = "nic-beb"}
+
+m["nic-bco"] = {canonicalName = "Benue-Congo", family = "nic-vco", wikidata_item = 33253}
+
+m["nic-bcr"] = {canonicalName = "Bantoid-Cross", family = "nic-bco", wikidata_item = 806983}
+
+m["nic-bdn"] = {canonicalName = "Northern Bantoid", aliases = {"North Bantoid"}, family = "nic-bod"}
+
+m["nic-bds"] = {canonicalName = "Southern Bantoid", aliases = {"Wide Bantu", "Bin"}, family = "nic-bod", wikidata_item = 3183152}
+
+m["nic-beb"] = {canonicalName = "Beboid", family = "nic-bds", wikidata_item = 813549}
+
+m["nic-ben"] = {canonicalName = "Bendi", family = "nic-bcr", wikidata_item = 4887065}
+
+m["nic-beo"] = {canonicalName = "Beromic", family = "nic-plt", wikidata_item = 4894642}
+
+m["nic-bod"] = {canonicalName = "Bantoid", family = "nic-bcr", wikidata_item = 806992}
+
+m["nic-buk"] = {canonicalName = "Buli-Koma", family = "nic-ovo"}
+
+m["nic-bwa"] = {canonicalName = "Bwa", otherNames = {"Bwamu", "Bomu"}, family = "nic-gur", wikidata_item = 12628562}
+
+m["nic-cde"] = {canonicalName = "Central Delta", family = "nic-cri", wikidata_item = 3813191}
+
+m["nic-cri"] = {canonicalName = "Cross River", family = "nic-bcr", wikidata_item = 1141096}
+
+m["nic-dag"] = {canonicalName = "Dagbani", family = "nic-wov"}
+
+m["nic-dak"] = {canonicalName = "Dakoid", family = "nic-bdn", wikidata_item = 1157745}
+
+m["nic-dge"] = {canonicalName = "Escarpment Dogon", family = "qfa-dgn", wikidata_item = 5397128}
+
+m["nic-dgw"] = {canonicalName = "West Dogon", family = "qfa-dgn"}
+
+m["nic-eko"] = {canonicalName = "Ekoid", family = "nic-bds", wikidata_item = 1323395}
+
+m["nic-eov"] = {canonicalName = "Eastern Oti-Volta", family = "nic-ovo", aliases = {"Samba"}}
+
+m["nic-fru"] = {canonicalName = "Furu", family = "nic-bds", wikidata_item = 5509783}
+
+m["nic-gne"] = {canonicalName = "Eastern Gurunsi", aliases = {"Eastern Grũsi"}, family = "nic-gns", wikidata_item = 12633072}
+
+m["nic-gnn"] = {canonicalName = "Northern Gurunsi", aliases = {"Northern Grũsi"}, family = "nic-gns"}
+
+m["nic-gnw"] = {canonicalName = "Western Gurunsi", aliases = {"Western Grũsi"}, family = "nic-gns"}
+
+m["nic-gns"] = {canonicalName = "Gurunsi", aliases = {"Grũsi"}, family = "nic-gur", wikidata_item = 721007}
+
+m["nic-gre"] = {canonicalName = "Eastern Grassfields", family = "nic-grf", wikidata_item = 5330160}
+
+m["nic-grf"] = {canonicalName = "Grassfields", aliases = {"Grassfields Bantu", "Wide Grassfields"}, family = "nic-bds", wikidata_item = 750932}
+
+m["nic-grm"] = {canonicalName = "Gurma", family = "nic-ovo", wikidata_item = 30587833}
+
+m["nic-grs"] = {canonicalName = "Southwest Grassfields", family = "nic-grf", wikidata_item = 7571285}
+
+m["nic-gur"] = {canonicalName = "Gur", aliases = {"Voltaic"}, family = "alv-sav", wikidata_item = 33536}
+
+m["nic-ief"] = {canonicalName = "Ibibio-Efik", family = "nic-lcr", wikidata_item = 2743643}
+
+m["nic-jer"] = {canonicalName = "Jera", family = "nic-kne"}
+
+m["nic-jkn"] = {canonicalName = "Jukunoid", family = "nic-pla", wikidata_item = 1711622}
+
+m["nic-jrn"] = {canonicalName = "Jarawan", family = "nic-mba", wikidata_item = 1683430}
+
+m["nic-jrw"] = {canonicalName = "Jarawa", family = "nic-jrn", wikidata_item = 35423}
+
+m["nic-kam"] = {canonicalName = "Kambari", family = "nic-knj", wikidata_item = 6356294}
+
+m["nic-ktl"] = {canonicalName = "Katloid", family = "nic"}
+
+m["nic-kau"] = {canonicalName = "Kauru", family = "nic-kne"}
+
+m["nic-kmk"] = {canonicalName = "Kamuku", family = "nic-knj", wikidata_item = 6359821}
+
+m["nic-kne"] = {canonicalName = "East Kainji", family = "nic-knj", wikidata_item = 5328687}
+
+m["nic-knj"] = {canonicalName = "Kainji", family = "nic-pla", wikidata_item = 681495}
+
+m["nic-knn"] = {canonicalName = "Northwest Kainji", family = "nic-knj", wikidata_item = 7060098}
+
+m["nic-ktl"] = {canonicalName = "Katloid", aliases = {"Katla", "Katla-Tima"}, family = "nic", wikidata_item = 6377681}
+
+m["nic-lcr"] = {canonicalName = "Lower Cross River", family = "nic-cri", wikidata_item = 3813193}
+
+m["nic-mam"] = {canonicalName = "Mamfe", aliases = {"Nyang"}, family = "nic-bds", wikidata_item = 2005898}
+
+m["nic-mba"] = {canonicalName = "Mbam", family = "nic-bds", wikidata_item = 687826}
+
+m["nic-mbc"] = {canonicalName = "Mba", family = "nic-ubg", wikidata_item = 6799561}
+
+m["nic-mbw"] = {canonicalName = "West Mbam", family = "nic-mba"}
+
+m["nic-mmb"] = {
+    canonicalName = "Mambiloid",
+    otherNames = {"North Bantoid"}, -- per Wikipedia, North Bantoid is the parent family
+    family = "nic-bdn",
+    wikidata_item = 1888151
+}
+
+m["nic-mom"] = {canonicalName = "Momo", family = "nic-grf", wikidata_item = 6897393}
+
+m["nic-mre"] = {canonicalName = "Moré", family = "nic-wov"}
+
+m["nic-ngd"] = {canonicalName = "Ngbandi", family = "nic-ubg", wikidata_item = 36439}
+
+m["nic-nge"] = {canonicalName = "Ngemba", family = "nic-gre", wikidata_item = 7022271}
+
+m["nic-ngk"] = {canonicalName = "Ngbaka", family = "nic-ubg", wikidata_item = 3217499}
+
+m["nic-nin"] = {canonicalName = "Ninzic", family = "nic-plt", wikidata_item = 7039282}
+
+m["nic-nka"] = {canonicalName = "Nkambe", family = "nic-gre", wikidata_item = 7042520}
+
+m["nic-nkb"] = {canonicalName = "Baka", family = "nic-nkw"}
+
+m["nic-nke"] = {canonicalName = "Eastern Ngbaka", family = "nic-ngk"}
+
+m["nic-nkg"] = {canonicalName = "Gbanziri", family = "nic-nkw"}
+
+m["nic-nkk"] = {canonicalName = "Kpala", family = "nic-nkw"}
+
+m["nic-nkm"] = {canonicalName = "Mbaka", family = "nic-nkw"}
+
+m["nic-nkw"] = {canonicalName = "Western Ngbaka", family = "nic-ngk"}
+
+m["nic-npd"] = {canonicalName = "North Plateau Dogon", family = "qfa-dgn"}
+
+m["nic-nun"] = {canonicalName = "Nun", family = "nic-gre", wikidata_item = 13654297}
+
+m["nic-nwa"] = {canonicalName = "Nanga-Walo", family = "qfa-dgn"}
+
+m["nic-ogo"] = {canonicalName = "Ogoni", aliases = {"Ogonoid"}, family = "nic-cri", wikidata_item = 2350726}
+
+m["nic-ovo"] = {canonicalName = "Oti-Volta", family = "nic-gur", wikidata_item = 1157178}
+
+m["nic-pla"] = {canonicalName = "Platoid", aliases = {"Central Nigerian"}, family = "nic-bco", wikidata_item = 453244}
+
+m["nic-plc"] = {canonicalName = "Central Plateau", family = "nic-plt", wikidata_item = 5061668}
+
+m["nic-pld"] = {canonicalName = "Plains Dogon", family = "qfa-dgn"}
+
+m["nic-ple"] = {canonicalName = "East Plateau", family = "nic-plt", wikidata_item = 5329154}
+
+m["nic-pls"] = {canonicalName = "South Plateau", aliases = {"Jilic-Eggonic"}, family = "nic-plt", wikidata_item = 7568236}
+
+m["nic-plt"] = {canonicalName = "Plateau", family = "nic-pla", wikidata_item = 1267471}
+
+m["nic-ras"] = {canonicalName = "Rashad", family = "nic", wikidata_item = 3401986}
+
+m["nic-rnc"] = {canonicalName = "Central Ring", family = "nic-rng"}
+
+m["nic-rng"] = {canonicalName = "Ring", aliases = {"Ring Road"}, family = "nic-grf", wikidata_item = 2269051}
+
+m["nic-rnn"] = {canonicalName = "Northern Ring", family = "nic-rng"}
+
+m["nic-rnw"] = {canonicalName = "Western Ring", family = "nic-rng"}
+
+m["nic-ser"] = {canonicalName = "Sere", family = "nic-ubg", wikidata_item = 7453058}
+
+m["nic-shi"] = {canonicalName = "Shiroro", aliases = {"Pongu"}, family = "nic-knj", wikidata_item = 7498953}
+
+m["nic-sis"] = {canonicalName = "Sisaala", family = "nic-gnw", wikidata_item = 36532}
+
+m["nic-tar"] = {canonicalName = "Tarokoid", family = "nic-plt", wikidata_item = 2394472}
+
+m["nic-tiv"] = {canonicalName = "Tivoid", family = "nic-bds", wikidata_item = 752377}
+
+m["nic-tvc"] = {canonicalName = "Central Tivoid", family = "nic-tiv"}
+
+m["nic-tvn"] = {canonicalName = "Northern Tivoid", family = "nic-tiv"}
+
+m["nic-ubg"] = {
+    canonicalName = "Ubangian",
+    family = "nic-vco", -- or none
+    wikidata_item = 33932
+}
+
+m["nic-uce"] = {canonicalName = "East-West Upper Cross River", family = "nic-ucr"}
+
+m["nic-ucn"] = {canonicalName = "North-South Upper Cross River", family = "nic-ucr"}
+
+m["nic-ucr"] = {canonicalName = "Upper Cross River", aliases = {"Upper Cross"}, family = "nic-cri", wikidata_item = 4108624}
+
+m["nic-vco"] = {canonicalName = "Volta-Congo", family = "alv", wikidata_item = 37228}
+
+m["nic-wov"] = {canonicalName = "Western Oti-Volta", family = "nic-ovo", aliases = {"Moré-Dagbani"}}
+
+m["nic-ykb"] = {canonicalName = "Yukubenic", aliases = {"Oohum"}, family = "nic-plt", wikidata_item = 16909196}
+
+m["nic-ymb"] = {canonicalName = "Yambasa", family = "nic-mba"}
+
+m["nic-yon"] = {canonicalName = "Yom-Nawdm", family = "nic-ovo", aliases = {"Moré-Dagbani"}}
+
+m["nub"] = {canonicalName = "Nubian", family = "sdv-nes", wikidata_item = 1517194}
+
+m["nub-hil"] = {canonicalName = "Hill Nubian", family = "nub", wikidata_item = 5762211, aliases = {"Kordofan Nubian"}}
+
+m["omq"] = {canonicalName = "Oto-Manguean", wikidata_item = 33669}
+
+m["omq-cha"] = {canonicalName = "Chatino", family = "omq-zap", wikidata_item = 35111}
+
+m["omq-chi"] = {canonicalName = "Chinantecan", family = "omq", wikidata_item = 35828}
+
+m["omq-cui"] = {canonicalName = "Cuicatec", family = "omq-mix", wikidata_item = 616024}
+
+m["omq-maz"] = {canonicalName = "Mazatecan", aliases = {"Mazatec"}, family = "omq", wikidata_item = 36230}
+
+m["omq-mix"] = {canonicalName = "Mixtecan", family = "omq", wikidata_item = 21996392}
+
+m["omq-mxt"] = {canonicalName = "Mixtec", family = "omq-mix", wikidata_item = 36363}
+
+m["omq-otp"] = {canonicalName = "Oto-Pamean", family = "omq"}
+
+m["omq-pop"] = {canonicalName = "Popolocan", family = "omq", wikidata_item = 5132273}
+
+m["omq-tri"] = {canonicalName = "Trique", aliases = {"Triqui"}, family = "omq-mix", wikidata_item = 780200}
+
+m["omq-zap"] = {canonicalName = "Zapotecan", family = "omq", wikidata_item = 8066463}
+
+m["omq-zpc"] = {canonicalName = "Zapotec", family = "omq-zap", wikidata_item = 13214}
+
+m["omv"] = {canonicalName = "Omotic", family = "afa", wikidata_item = 33860}
+
+m["omv-aro"] = {canonicalName = "Aroid", family = "omv", wikidata_item = 3699526, aliases = {"Ari-Banna", "South Omotic", "Somotic"}}
+
+m["omv-diz"] = {canonicalName = "Dizoid", family = "omv", wikidata_item = 430251, aliases = {"Maji", "Majoid"}}
+
+m["omv-eom"] = {canonicalName = "East Ometo", family = "omv-ome", wikidata_item = 20527288}
+
+m["omv-gon"] = {canonicalName = "Gonga", family = "omv", wikidata_item = 4143043, aliases = {"Kefoid"}}
+
+m["omv-mao"] = {canonicalName = "Mao", family = "omv", wikidata_item = 1351495}
+
+m["omv-nom"] = {canonicalName = "North Ometo", family = "omv-ome"}
+
+m["omv-ome"] = {canonicalName = "Ometo", family = "omv", wikidata_item = 36310}
+
+m["oto"] = {canonicalName = "Otomian", family = "omq", wikidata_item = 1270220}
+
+m["oto-otm"] = {canonicalName = "Otomi", family = "oto"}
+
+m["paa"] = {canonicalName = "Papuan", family = "qfa-not", wikidata_item = 236425}
+
+m["paa-arf"] = {canonicalName = "Arafundi", wikidata_item = 4783702}
+
+m["paa-asa"] = {canonicalName = "Arai-Samaia", wikidata_item = 48803569}
+
+m["paa-bng"] = {canonicalName = "Baining", wikidata_item = 748487, aliases = {"East New Britain"}}
+
+m["paa-brd"] = {canonicalName = "Border", wikidata_item = 1752158, aliases = {"Upper Tami"}}
+
+m["paa-egb"] = {canonicalName = "East Geelvink Bay", wikidata_item = 1497678, aliases = {"East Cenderawasih"}}
+
+m["paa-eng"] = {canonicalName = "Engan", wikidata_item = 3217449}
+
+m["paa-iwm"] = {canonicalName = "Iwam", wikidata_item = 15147853}
+
+m["paa-kag"] = { -- recode as ngf-kag?
+    canonicalName = "Kainantu-Goroka",
+    family = "ngf",
+    wikidata_item = 3217463
+}
+
+m["paa-kiw"] = {canonicalName = "Kiwaian", wikidata_item = 338449}
+
+m["paa-kut"] = {canonicalName = "Kutubuan", family = "paa-pag", wikidata_item = 48767893}
+
+m["paa-kwm"] = {canonicalName = "Kwomtari", wikidata_item = 2075415}
+
+m["paa-lkp"] = {canonicalName = "Lakes Plain", wikidata_item = 6478969}
+
+m["paa-lsp"] = {canonicalName = "Lower Sepik", aliases = {"Nor-Pondo"}, wikidata_item = 7061700}
+
+m["paa-mai"] = {canonicalName = "Mairasi", wikidata_item = 6736896}
+
+m["paa-msk"] = {canonicalName = "Sko", aliases = {"Skou"}, wikidata_item = 953509}
+
+m["paa-nbo"] = {canonicalName = "North Bougainville", wikidata_item = 749496}
+
+m["paa-nim"] = {canonicalName = "Nimboran", wikidata_item = 12638426}
+
+m["paa-pag"] = {canonicalName = "Papuan Gulf", wikidata_item = 48803685}
+
+m["paa-pau"] = {canonicalName = "Pauwasi", wikidata_item = 7155496}
+
+m["paa-ram"] = {canonicalName = "Ramu", wikidata_item = 3442808}
+
+m["paa-sbo"] = {canonicalName = "South Bougainville", wikidata_item = 3217380}
+
+m["paa-sen"] = {canonicalName = "Sentani", family = "paa-wpa", wikidata_item = 17044584}
+
+m["paa-spk"] = {canonicalName = "Sepik", wikidata_item = 3508772}
+
+m["paa-tkw"] = {canonicalName = "Tor-Kwerba", wikidata_item = 7827523}
+
+m["paa-wpa"] = {canonicalName = "West Papuan", wikidata_item = 1363026}
+
+m["paa-yam"] = {canonicalName = "Yam", aliases = {"Morehead and Upper Maro River"}, wikidata_item = 15062272}
+
+m["paa-yua"] = {canonicalName = "Yuat", wikidata_item = 8060096}
+
+m["phi"] = {canonicalName = "Philippine", family = "poz-bop", wikidata_item = 947858}
+
+m["phi-kal"] = {canonicalName = "Kalamian", aliases = {"Calamian"}, family = "phi", wikidata_item = 3217466}
+
+m["poz"] = {canonicalName = "Malayo-Polynesian", family = "map", wikidata_item = 143158}
+
+m["poz-aay"] = {canonicalName = "Admiralty Islands", family = "poz-oce", wikidata_item = 2701306}
+
+m["poz-bnn"] = {canonicalName = "North Bornean", family = "poz-bop", wikidata_item = 1427907}
+
+m["poz-bop"] = {canonicalName = "Borneo-Philippines", family = "poz", wikidata_item = 4273393}
+
+m["poz-bre"] = {canonicalName = "East Barito", family = "poz-bop", wikidata_item = 2701314}
+
+m["poz-brw"] = {canonicalName = "West Barito", family = "poz-bop", wikidata_item = 2761679}
+
+m["poz-btk"] = {canonicalName = "Bungku-Tolaki", family = "poz-clb", wikidata_item = 3217381}
+
+m["poz-cet"] = {canonicalName = "Central-Eastern Malayo-Polynesian", family = "poz", wikidata_item = 2269883}
+
+m["poz-clb"] = {canonicalName = "Celebic", family = "poz-sus", wikidata_item = 1078041}
+
+m["poz-cln"] = {canonicalName = "New Caledonian", family = "poz-occ", wikidata_item = 3091221}
+
+m["poz-cma"] = {canonicalName = "Central Maluku", family = "poz-cet", wikidata_item = 3217479}
+
+m["poz-hce"] = {canonicalName = "Halmahera-Cenderawasih", family = "pqe", wikidata_item = 2526616}
+
+m["poz-kal"] = {canonicalName = "Kaili-Pamona", family = "poz-clb", wikidata_item = 3217465}
+
+m["poz-lgx"] = {canonicalName = "Lampungic", family = "poz-sus", wikidata_item = 49215}
+
+m["poz-mcm"] = {canonicalName = "Malayo-Chamic", family = "poz-msa"}
+
+m["poz-mic"] = {canonicalName = "Micronesian", family = "poz-occ", wikidata_item = 420591}
+
+m["poz-mly"] = {canonicalName = "Malayic", family = "poz-mcm", wikidata_item = 662628}
+
+m["poz-msa"] = {canonicalName = "Malayo-Sumbawan", family = "poz-sus", wikidata_item = 1363818}
+
+m["poz-mun"] = {canonicalName = "Muna-Buton", family = "poz-clb", wikidata_item = 3037924}
+
+m["poz-nws"] = {canonicalName = "Northwest Sumatran", family = "poz-sus", wikidata_item = 2071308}
+
+m["poz-occ"] = {canonicalName = "Central-Eastern Oceanic", family = "poz-oce", wikidata_item = 2068435}
+
+m["poz-oce"] = {canonicalName = "Oceanic", family = "pqe", wikidata_item = 324457}
+
+m["poz-ocw"] = {canonicalName = "Western Oceanic", family = "poz-oce", wikidata_item = 2701282}
+
+m["poz-pep"] = {canonicalName = "Eastern Polynesian", family = "poz-pnp", wikidata_item = 390979}
+
+m["poz-pnp"] = {canonicalName = "Nuclear Polynesian", family = "poz-pol", wikidata_item = 743851}
+
+m["poz-pol"] = {canonicalName = "Polynesian", family = "poz-occ", wikidata_item = 390979}
+
+m["poz-san"] = {canonicalName = "Sabahan", family = "poz-bnn", wikidata_item = 3217517}
+
+m["poz-sbj"] = {canonicalName = "Sama-Bajaw", family = "poz-bop", wikidata_item = 2160409}
+
+m["poz-slb"] = {canonicalName = "Saluan-Banggai", family = "poz-clb", wikidata_item = 3217519}
+
+m["poz-sls"] = {canonicalName = "Southeast Solomonic", family = "poz-occ", wikidata_item = 3119671}
+
+m["poz-ssw"] = {canonicalName = "South Sulawesi", family = "poz-sus", wikidata_item = 2778190}
+
+m["poz-sus"] = {canonicalName = "Sunda-Sulawesi", family = "poz", wikidata_item = 319552}
+
+m["poz-swa"] = {canonicalName = "North Sarawakan", family = "poz-bnn", wikidata_item = 538569}
+
+m["poz-tim"] = {canonicalName = "Timoric", family = "poz-cet", wikidata_item = 7806987}
+
+m["poz-tot"] = {canonicalName = "Tomini-Tolitoli", family = "poz-clb", wikidata_item = 3217541}
+
+m["poz-vnc"] = {canonicalName = "North-Central Vanuatu", family = "poz-occ", wikidata_item = 3039118}
+
+m["poz-wot"] = {canonicalName = "Wotu-Wolio", family = "poz-clb", wikidata_item = 1041317}
+
+m["pqe"] = {canonicalName = "Eastern Malayo-Polynesian", family = "poz-cet", wikidata_item = 2269883}
+
+m["pra"] = {canonicalName = "Prakrit", family = "inc", wikidata_item = 192170}
+
+m["qfa-adc"] = {canonicalName = "Central Great Andamanese", family = "qfa-adm"}
+
+m["qfa-adm"] = {canonicalName = "Great Andamanese", wikidata_item = 3515103}
+
+m["qfa-adn"] = {canonicalName = "Northern Great Andamanese", family = "qfa-adm"}
+
+m["qfa-ads"] = {canonicalName = "Southern Great Andamanese", family = "qfa-adm"}
+
+m["qfa-bet"] = {canonicalName = "Be-Tai", protoLanguage = "qfa-bet-pro", aliases = {"Tai-Be", "Daic-Beic", "Beic-Daic"}, family = "qfa-tak"}
+
+m["qfa-buy"] = {canonicalName = "Buyang", family = "qfa-kra"}
+
+m["qfa-cka"] = {canonicalName = "Chukotko-Kamchatkan", wikidata_item = 33255}
+
+m["qfa-dgn"] = {canonicalName = "Dogon", family = "nic", wikidata_item = 1234776}
+
+m["qfa-dny"] = {canonicalName = "Dene-Yeniseian", aliases = {"Dené-Yeniseian"}, wikidata_item = 21103}
+
+m["qfa-gel"] = {canonicalName = "Gelao", family = "qfa-kra"}
+
+m["qfa-hur"] = {canonicalName = "Hurro-Urartian", wikidata_item = 1144159}
+
+m["qfa-iso"] = {canonicalName = "isolate", family = "qfa-not", wikidata_item = 33648}
+
+m["qfa-kad"] = {
+    canonicalName = "Kadu", -- considered either Nilo-Saharan or independent/none
+    wikidata_item = 1720989
+}
+
+m["qfa-kms"] = {canonicalName = "Kam-Sui", family = "qfa-tak", wikidata_item = 1023641}
+
+m["qfa-kor"] = {canonicalName = "Korean", wikidata_item = 11263525}
+
+m["qfa-kra"] = {canonicalName = "Kra", family = "qfa-tak", wikidata_item = 1022087}
+
+m["qfa-lic"] = {canonicalName = "Hlai", protoLanguage = "qfa-lic-pro", aliases = {"Hlaic"}, family = "qfa-tak", wikidata_item = 1023648}
+
+m["qfa-mal"] = {canonicalName = "Left May", family = "paa-asa", wikidata_item = 614468}
+
+m["qfa-mch"] = { -- used in both N and S America
+    canonicalName = "Macro-Chibchan",
+    wikidata_item = 3438062
+}
+
+m["qfa-mix"] = {canonicalName = "mixed", family = "qfa-not", wikidata_item = 33694}
+
+m["qfa-not"] = {canonicalName = "not a family", family = "qfa-not"}
+
+m["qfa-onb"] = {canonicalName = "Be", protoLanguage = "qfa-onb-pro", aliases = {"Ong-Be", "Beic"}, family = "qfa-bet"}
+
+m["qfa-ong"] = {canonicalName = "Ongan", aliases = {"Angan", "South Andamanese", "Jarawa-Onge"}, wikidata_item = 2090575}
+
+m["qfa-sub"] = {canonicalName = "substrate", wikidata_item = 20730913}
+
+m["qfa-tak"] = {canonicalName = "Kra-Dai", aliases = {"Tai-Kadai", "Kadai"}, wikidata_item = 34171}
+
+m["qfa-tap"] = {canonicalName = "Timor-Alor-Pantar", wikidata_item = 16590002}
+
+m["qfa-tor"] = {canonicalName = "Torricelli", wikidata_item = 1333831}
+
+m["qfa-tyn"] = {canonicalName = "Tyrsenian", wikidata_item = 1344038}
+
+m["qfa-yen"] = {canonicalName = "Yeniseian", aliases = {"Yeniseic", "Yenisei-Ostyak"}, family = "qfa-dny", wikidata_item = 27639}
+
+m["qfa-yuk"] = {canonicalName = "Yukaghir", aliases = {"Yukagir", "Jukagir"}, wikidata_item = 34164}
+
+m["qwe"] = {canonicalName = "Quechuan", wikidata_item = 5218}
+
+m["roa"] = {canonicalName = "Romance", aliases = {"Romanic", "Latin", "Neolatin", "Neo-Latin"}, protoLanguage = "la", family = "itc", wikidata_item = 19814}
+
+m["roa-eas"] = {canonicalName = "Eastern Romance", family = "roa", wikidata_item = 147576}
+
+m["roa-ibe"] = {canonicalName = "West Iberian", family = "roa", wikidata_item = 1377152}
+
+m["roa-itd"] = {canonicalName = "Italo-Dalmatian", family = "roa", wikidata_item = 3313381}
+
+m["roa-git"] = {canonicalName = "Gallo-Italic", family = "roa", wikidata_item = 516074}
+
+m["roa-oil"] = {canonicalName = "Oïl", protoLanguage = "fro", family = "roa", wikidata_item = 37351}
+
+m["roa-rhe"] = {canonicalName = "Rhaeto-Romance", family = "roa", wikidata_item = 515593}
+
+--[=[
+	Exceptional language and family codes for South American Indian languages
+	can use the prefix "sai-", though "sai" is no longer itself a family code.
+]=] --
+m["sai-ara"] = {canonicalName = "Araucanian", wikidata_item = 626630}
+
+m["sai-aym"] = {canonicalName = "Aymaran", wikidata_item = 33010}
+
+m["sai-bar"] = {canonicalName = "Barbacoan", aliases = {"Barbakoan"}, wikidata_item = 807304}
+
+m["sai-bor"] = {canonicalName = "Boran", wikidata_item = 43079266}
+
+m["sai-cah"] = {canonicalName = "Cahuapanan", wikidata_item = 1025793}
+
+m["sai-car"] = {canonicalName = "Cariban", aliases = {"Carib"}, wikidata_item = 33090}
+
+m["sai-cer"] = {canonicalName = "Cerrado", aliases = {"Amazonian Jê"}, family = "sai-jee", wikidata_item = 98078151}
+
+m["sai-chc"] = {canonicalName = "Chocoan", aliases = {"Choco", "Chocó"}, wikidata_item = 1075616}
+
+m["sai-cho"] = {canonicalName = "Chonan", aliases = {"Chon"}, wikidata_item = 33019}
+
+m["sai-cje"] = {canonicalName = "Central Jê", aliases = {"Akuwẽ"}, family = "sai-cer", wikidata_item = 18010843}
+
+m["sai-cpc"] = {canonicalName = "Chapacuran", wikidata_item = 1062626}
+
+m["sai-crn"] = {canonicalName = "Charruan", aliases = {"Charrúan"}, wikidata_item = 3112423}
+
+m["sai-ctc"] = {canonicalName = "Catacaoan", wikidata_item = 5051139}
+
+m["sai-guc"] = {canonicalName = "Guaicuruan", aliases = {"Guaicurú", "Guaycuruana", "Guaikurú", "Guaycuruano", "Guaykuruan", "Waikurúan"}, family = "sai-mgc", wikidata_item = 1974973}
+
+m["sai-guh"] = {canonicalName = "Guahiban", aliases = {"Guahiboan", "Guajiboan", "Wahivoan"}, wikidata_item = 944056}
+
+m["sai-har"] = {canonicalName = "Harákmbut", aliases = {"Harákmbet"}, family = "sai-hkt", wikidata_item = 1584402}
+
+m["sai-hkt"] = {canonicalName = "Harákmbut-Katukinan", wikidata_item = 17107635}
+
+m["sai-hoc"] = {canonicalName = "Huitoto-Ocaina"}
+
+m["sai-hrp"] = {canonicalName = "Huarpean", aliases = {"Warpean", "Huarpe", "Warpe"}, wikidata_item = 1578336}
+
+m["sai-jee"] = {canonicalName = "Jê", aliases = {"Gê", "Jean", "Gean", "Jê-Kaingang", "Ye"}, family = "sai-mje", wikidata_item = 1483594}
+
+m["sai-jir"] = {canonicalName = "Jirajaran", aliases = {"Hiraháran"}, wikidata_item = 3028651}
+
+m["sai-jiv"] = {canonicalName = "Jivaroan", aliases = {"Hívaro", "Jibaro", "Jibaroan", "Jibaroana", "Jívaro"}, wikidata_item = 1393074}
+
+m["sai-ktk"] = {canonicalName = "Katukinan", aliases = {"Catuquinan"}, family = "sai-hkt", wikidata_item = 2636000}
+
+m["sai-mas"] = {canonicalName = "Mascoian", aliases = {"Mascoyan", "Maskoian", "Enlhet-Enenlhet"}, wikidata_item = 1906952}
+
+m["sai-mgc"] = {canonicalName = "Mataco-Guaicuru", wikidata_item = 255512}
+
+m["sai-mje"] = {canonicalName = "Macro-Jê", aliases = {"Macro-Gê"}, wikidata_item = 887133}
+
+m["sai-mtc"] = {canonicalName = "Matacoan", family = "sai-mgc", wikidata_item = 2447424}
+
+m["sai-mur"] = {canonicalName = "Muran", aliases = {"Mura"}, wikidata_item = 33826}
+
+m["sai-nad"] = {canonicalName = "Nadahup", aliases = {"Makú", "Macú", "Vaupés-Japurá"}, wikidata_item = 1856439}
+
+m["sai-nje"] = {canonicalName = "Northern Jê", aliases = {"Core Jê"}, family = "sai-cer", wikidata_item = 98078225}
+
+m["sai-nmk"] = {canonicalName = "Nambikwaran", aliases = {"Nambicuaran", "Nambiquaran", "Nambikuaran"}, wikidata_item = 15548027}
+
+m["sai-otm"] = {canonicalName = "Otomacoan", aliases = {"Otomákoan", "Otomakoan"}, wikidata_item = 3217503}
+
+m["sai-pan"] = {canonicalName = "Panoan", aliases = {"Pano"}, family = "sai-pat", wikidata_item = 1544537}
+
+m["sai-pat"] = {canonicalName = "Pano-Tacanan", aliases = {"Pano-Tacana", "Pano-Takana", "Páno-Takána", "Pano-Takánan"}, wikidata_item = 2475746}
+
+m["sai-sje"] = {canonicalName = "Southern Jê", family = "sai-jee", wikidata_item = 98078245}
+
+m["sai-tac"] = {canonicalName = "Tacanan", family = "sai-pat", wikidata_item = 3113762}
+
+m["sai-tuc"] = {canonicalName = "Tucanoan", wikidata_item = 788144}
+
+m["sai-tyu"] = {canonicalName = "Ticuna-Yuri", wikidata_item = 4467010}
+
+m["sai-ucp"] = {canonicalName = "Uru-Chipaya", aliases = {"Uru-Chipayan"}, wikidata_item = 2475488}
+
+m["sai-wic"] = {canonicalName = "Wichí", wikidata_item = 3027047}
+
+m["sai-wit"] = {canonicalName = "Witotoan", aliases = {"Huitotoan", "Uitotoan"}, wikidata_item = 43079317, family = "sai-hoc"}
+
+m["sai-ynm"] = {canonicalName = "Yanomami", aliases = {"Yanomam", "Shamatari", "Yamomami", "Yanomaman"}}
+
+m["sai-zam"] = {canonicalName = "Zamucoan", aliases = {"Samúkoan"}, wikidata_item = 3048461}
+
+m["sai-zap"] = {canonicalName = "Zaparoan", aliases = {"Záparoan", "Saparoan", "Sáparoan", "Záparo", "Zaparoano", "Zaparoana"}, wikidata_item = 33911}
+
+m["sal"] = {canonicalName = "Salishan", wikidata_item = 33985}
+
+m["sdv"] = {canonicalName = "Eastern Sudanic", family = "ssa", wikidata_item = 2036148}
+
+m["sdv-bri"] = {canonicalName = "Bari", family = "sdv-nie"}
+
+m["sdv-daj"] = {canonicalName = "Daju", family = "sdv", wikidata_item = 956724}
+
+m["sdv-dnu"] = {canonicalName = "Dinka-Nuer", family = "sdv-niw"}
+
+m["sdv-eje"] = {canonicalName = "Eastern Jebel", family = "sdv", wikidata_item = 3408878}
+
+m["sdv-kln"] = {canonicalName = "Kalenjin", family = "sdv-nis", wikidata_item = 637228}
+
+m["sdv-lma"] = {canonicalName = "Lotuko-Maa", family = "sdv-nie"}
+
+m["sdv-lon"] = {canonicalName = "Northern Luo", family = "sdv-luo"}
+
+m["sdv-los"] = {canonicalName = "Southern Luo", family = "sdv-luo", wikidata_item = 7570103}
+
+m["sdv-luo"] = {canonicalName = "Luo", family = "sdv-niw"}
+
+m["sdv-nes"] = {canonicalName = "Northern Eastern Sudanic", family = "sdv", wikidata_item = 4810496, aliases = {"Astaboran", "Ek Sudanic"}}
+
+m["sdv-nie"] = {canonicalName = "Eastern Nilotic", family = "sdv-nil", wikidata_item = 153795}
+
+m["sdv-nil"] = {canonicalName = "Nilotic", family = "sdv", wikidata_item = 513408}
+
+m["sdv-nis"] = {canonicalName = "Southern Nilotic", family = "sdv-nil", wikidata_item = 1552410}
+
+m["sdv-niw"] = {canonicalName = "Western Nilotic", family = "sdv-nil", wikidata_item = 3114989}
+
+m["sdv-nma"] = {canonicalName = "Nandi-Markweta", family = "sdv-kln"}
+
+m["sdv-nyi"] = {canonicalName = "Nyima", family = "sdv-nes", wikidata_item = 11688746, aliases = {"Nyimang"}}
+
+m["sdv-tmn"] = {canonicalName = "Taman", family = "sdv-nes", wikidata_item = 3408873, aliases = {"Tamaic"}}
+
+m["sdv-ttu"] = {canonicalName = "Teso-Turkana", family = "sdv-nie", wikidata_item = 7705551, aliases = {"Ateker"}}
+
+m["sem"] = {canonicalName = "Semitic", family = "afa", wikidata_item = 34049}
+
+m["sem-ara"] = {canonicalName = "Aramaic", protoLanguage = "arc", family = "sem-nwe", wikidata_item = 28602}
+
+m["sem-arb"] = {canonicalName = "Arabic", protoLanguage = "ar", family = "sem-cen", wikidata_item = 164667}
+
+m["sem-are"] = {canonicalName = "Eastern Aramaic", family = "sem-ara", wikidata_item = 3410322}
+
+m["sem-arw"] = {canonicalName = "Western Aramaic", family = "sem-ara", wikidata_item = 3394214}
+
+m["sem-ase"] = {canonicalName = "Southeastern Aramaic", family = "sem-are", wikidata_item = 3410322}
+
+m["sem-can"] = {canonicalName = "Canaanite", family = "sem-nwe", wikidata_item = 747547}
+
+m["sem-cen"] = {canonicalName = "Central Semitic", family = "sem-wes", wikidata_item = 3433228}
+
+m["sem-cna"] = {canonicalName = "Central Neo-Aramaic", family = "sem-are", wikidata_item = 3410322}
+
+m["sem-eas"] = {canonicalName = "East Semitic", family = "sem", wikidata_item = 164273}
+
+m["sem-eth"] = {canonicalName = "Ethiopian Semitic", aliases = {"Afro-Semitic", "Ethiopian", "Ethiopic", "Ethiosemitic"}, family = "sem-wes", wikidata_item = 163629}
+
+m["sem-nna"] = {canonicalName = "Northeastern Neo-Aramaic", family = "sem-are", wikidata_item = 2560578}
+
+m["sem-nwe"] = {canonicalName = "Northwest Semitic", family = "sem-cen", wikidata_item = 162996}
+
+m["sem-osa"] = {canonicalName = "Old South Arabian", aliases = {"Epigraphic South Arabian", "Sayhadic"}, family = "sem-cen", wikidata_item = 35025}
+
+m["sem-sar"] = {canonicalName = "Modern South Arabian", family = "sem-wes", wikidata_item = 1981908}
+
+m["sem-wes"] = {canonicalName = "West Semitic", family = "sem", wikidata_item = 124901}
+
+m["sgn"] = {canonicalName = "sign", family = "qfa-not", wikidata_item = 34228}
+
+m["sgn-fsl"] = {canonicalName = "French Sign Languages", family = "sgn", wikidata_item = 5501921}
+
+m["sgn-gsl"] = {canonicalName = "German Sign Languages", family = "sgn", wikidata_item = 5551235}
+
+m["sgn-jsl"] = {canonicalName = "Japanese Sign Languages", family = "sgn", wikidata_item = 11722508}
+
+m["sio"] = {canonicalName = "Siouan", family = "nai-sca", wikidata_item = 34181}
+
+m["sio-dhe"] = {canonicalName = "Dhegihan", family = "sio-msv", wikidata_item = 3217420}
+
+m["sio-dkt"] = {canonicalName = "Dakotan", family = "sio-msv", wikidata_item = 17188640}
+
+m["sio-mor"] = {canonicalName = "Missouri River Siouan", family = "sio", wikidata_item = 26807266}
+
+m["sio-msv"] = {canonicalName = "Mississippi Valley Siouan", family = "sio", wikidata_item = 17188638}
+
+m["sio-ohv"] = {canonicalName = "Ohio Valley Siouan", family = "sio", wikidata_item = 21070931}
+
+m["sit"] = {canonicalName = "Sino-Tibetan", wikidata_item = 45961}
+
+m["sit-aao"] = {canonicalName = "Ao", aliases = {"Central Naga languages"}, family = "sit", wikidata_item = 615474}
+
+m["sit-alm"] = {canonicalName = "Almora", family = "sit-whm"}
+
+m["sit-bdi"] = {canonicalName = "Bodish", family = "sit", wikidata_item = 1814078}
+
+m["sit-dhi"] = {canonicalName = "Dhimalish", family = "sit", wikidata_item = 1207648}
+
+m["sit-ebo"] = {canonicalName = "East Bodish", family = "sit-bdi", wikidata_item = 56402}
+
+m["sit-gma"] = {canonicalName = "Greater Magaric", family = "sit", wikidata_item = 55612963}
+
+m["sit-gsi"] = {canonicalName = "Greater Siangic", family = "sit", wikidata_item = 52698851}
+
+m["sit-hrs"] = {canonicalName = "Hrusish", aliases = {"Southeast Kamengic"}, family = "sit", wikidata_item = 1632501}
+
+m["sit-jnp"] = {canonicalName = "Jingphoic", aliases = {"Jingpho"}, family = "sit-jpl"}
+
+m["sit-jpl"] = {canonicalName = "Kachin-Luic", aliases = {"Jingpho-Luish", "Jingpho-Asakian", "Kachinic"}, family = "tbq-bkj", wikidata_item = 1515454}
+
+m["sit-kch"] = {canonicalName = "Konyak-Chang", family = "sit-kon"}
+
+m["sit-kha"] = {canonicalName = "Kham", family = "sit-gma", wikidata_item = 33305}
+
+m["sit-khb"] = {canonicalName = "Kho-Bwa", aliases = {"Bugunish", "Kamengic"}, family = "sit", wikidata_item = 6401917}
+
+m["sit-kic"] = {canonicalName = "Central Kiranti", family = "sit-kir"}
+
+m["sit-kie"] = {canonicalName = "Eastern Kiranti", family = "sit-kir"}
+
+m["sit-kin"] = {canonicalName = "Kinnauri", family = "sit-whm"}
+
+m["sit-kir"] = {canonicalName = "Kiranti", family = "sit", wikidata_item = 922148}
+
+m["sit-kiw"] = {canonicalName = "Western Kiranti", family = "sit-kir", wikidata_item = 922148}
+
+m["sit-kon"] = {canonicalName = "Konyak", family = "tbq-bkj", wikidata_item = 774590}
+
+m["sit-kyk"] = {canonicalName = "Kyirong-Kagate", family = "sit-tib", wikidata_item = 6450957}
+
+m["sit-lab"] = {canonicalName = "Ladakhi-Balti", family = "sit-tib", wikidata_item = 6450957}
+
+m["sit-las"] = {canonicalName = "Lahuli-Spiti", family = "sit-tib", wikidata_item = 6473510}
+
+m["sit-luu"] = {canonicalName = "Luish", aliases = {"Asakian", "Sak"}, family = "sit-jpl", wikidata_item = 55621439}
+
+m["sit-mar"] = {canonicalName = "Maringic", family = "sit-tma"}
+
+m["sit-mdz"] = {canonicalName = "Midzu", aliases = {"Geman", "Midzuish", "Miju-Meyor", "Southern Mishmi"}, family = "sit", wikidata_item = 6843504}
+
+m["sit-mnz"] = {canonicalName = "Mondzish", aliases = {"Mangish"}, family = "tbq-lob", wikidata_item = 6898839}
+
+m["sit-mru"] = {canonicalName = "Mruic", aliases = {"Mru-Hkongso"}, family = "sit", wikidata_item = 16908870}
+
+m["sit-nax"] = {canonicalName = "Naic", aliases = {"Naxish"}, family = "tbq-buq", wikidata_item = 6982999}
+
+m["sit-new"] = {canonicalName = "Newaric", family = "sit", wikidata_item = 55625069}
+
+m["sit-nng"] = {canonicalName = "Nungish", aliases = {"Nung"}, family = "sit", wikidata_item = 1515482}
+
+m["sit-qia"] = {canonicalName = "Qiangic", family = "tbq-buq", wikidata_item = 1636765}
+
+m["sit-rgy"] = {canonicalName = "Rgyalrongic", aliases = {"Jiarongic"}, family = "sit-qia", wikidata_item = 56936}
+
+m["sit-tam"] = {canonicalName = "Tamangic", aliases = {"West Bodish"}, family = "sit", wikidata_item = 3309439}
+
+m["sit-tan"] = {canonicalName = "Tani", family = "sit", wikidata_item = 3217538}
+
+m["sit-tib"] = {canonicalName = "Tibetic", family = "sit-bdi", wikidata_item = 1641150}
+
+m["sit-tja"] = {canonicalName = "Tujia", family = "sit"}
+
+m["sit-tma"] = {canonicalName = "Tangkhul-Maring", family = "sit"}
+
+m["sit-tng"] = {canonicalName = "Tangkhulic", aliases = {"Tangkhul"}, family = "sit-tma", wikidata_item = 1516657}
+
+m["sit-tno"] = {canonicalName = "Tangsa-Nocte", family = "sit-kon"}
+
+m["sit-tsk"] = {canonicalName = "Tshangla", family = "sit-bdi"}
+
+m["sit-whm"] = {canonicalName = "West Himalayish", family = "sit", wikidata_item = 2301695}
+
+m["sit-zem"] = {canonicalName = "Zeme", aliases = {"Zeliangrong", "Zemeic"}, family = "sit", wikidata_item = 189291}
+
+m["sla"] = {canonicalName = "Slavic", aliases = {"Slavonic"}, family = "ine-bsl", wikidata_item = 23526}
+
+m["smi"] = {canonicalName = "Sami", aliases = {"Saami", "Samic", "Saamic"}, family = "urj", wikidata_item = 56463}
+
+m["son"] = {canonicalName = "Songhay", aliases = {"Songhai"}, family = "ssa", wikidata_item = 505198}
+
+m["sqj"] = {canonicalName = "Albanian", family = "ine", wikidata_item = 8748}
+
+m["ssa"] = {
+    canonicalName = "Nilo-Saharan", -- possibly not a genetic grouping
+    wikidata_item = 33705
+}
+
+m["ssa-fur"] = {canonicalName = "Fur", family = "ssa", wikidata_item = 2989512}
+
+m["ssa-klk"] = {canonicalName = "Kuliak", aliases = {"Rub"}, family = "ssa", wikidata_item = 1791476}
+
+m["ssa-kom"] = {canonicalName = "Koman", family = "ssa", wikidata_item = 1781084}
+
+m["ssa-sah"] = {canonicalName = "Saharan", family = "ssa", wikidata_item = 1757661}
+
+m["syd"] = {canonicalName = "Samoyedic", aliases = {"Samoyed", "Samodeic"}, family = "urj", wikidata_item = 34005}
+
+m["tai"] = {canonicalName = "Tai", aliases = {"Daic"}, family = "qfa-bet", wikidata_item = 749720}
+
+m["tai-wen"] = {canonicalName = "Wenma-Southwestern Tai", family = "tai"}
+
+m["tai-tay"] = {canonicalName = "Tày", family = "tai-wen"}
+
+m["tai-sap"] = {canonicalName = "Sapa-Southwestern Tai", aliases = {"Sapa-Thai"}, family = "tai-wen"}
+
+m["tai-swe"] = {canonicalName = "Southwestern Tai", family = "tai-sap", wikidata_item = 3447105}
+
+m["tai-cho"] = {canonicalName = "Chongzuo Tai", family = "tai", wikidata_item = 13216}
+
+m["tai-cen"] = {canonicalName = "Central Tai", family = "tai", wikidata_item = 5061891}
+
+m["tai-nor"] = {canonicalName = "Northern Tai", family = "tai", wikidata_item = 7059014}
+
+m["tbq"] = {canonicalName = "Tibeto-Burman", family = "sit", wikidata_item = 34064}
+
+m["tbq-anp"] = {canonicalName = "Angami-Pochuri", family = "sit", wikidata_item = 530460}
+
+m["tbq-bdg"] = {canonicalName = "Bodo-Garo", family = "tbq-bkj", wikidata_item = 4090000}
+
+m["tbq-bkj"] = {
+    canonicalName = "Sal",
+    family = "sit",
+    -- Brahmaputran appears to be Glottolog's term
+    aliases = {"Bodo-Konyak-Jinghpaw", "Brahmaputran", "Jingpho-Konyak-Bodo"},
+    wikidata_item = 889900
+}
+
+m["tbq-brm"] = {canonicalName = "Burmish", family = "tbq-lob", wikidata_item = 865713}
+
+m["tbq-buq"] = {canonicalName = "Burmo-Qiangic", family = "sit", wikidata_item = 16056278}
+
+m["tbq-kuk"] = {canonicalName = "Kukish", family = "sit", wikidata_item = 832413}
+
+m["tbq-lal"] = {canonicalName = "Lalo", family = "tbq-lol", wikidata_item = 56548}
+
+m["tbq-lob"] = {canonicalName = "Lolo-Burmese", family = "tbq-buq", wikidata_item = 1635712}
+
+m["tbq-lol"] = {canonicalName = "Loloish", family = "tbq-lob", wikidata_item = 37035}
+
+m["trk"] = {canonicalName = "Turkic", wikidata_item = 34090}
+
+m["trk-kar"] = {canonicalName = "Karluk", aliases = {"Qarluq", "Uyghur-Uzbek", "Southeastern Turkic"}, varieties = {"Eastern Turkic"}, family = "trk"}
+
+m["trk-kbu"] = {canonicalName = "Kipchak-Bulgar", aliases = {"Uralian", "Uralo-Caspian"}, family = "trk-kip", wikidata_item = 3512539}
+
+m["trk-kcu"] = {canonicalName = "Kipchak-Cuman", aliases = {"Ponto-Caspian"}, family = "trk-kip", wikidata_item = 4370412}
+
+m["trk-kip"] = {canonicalName = "Kipchak", otherNames = {"Western Turkic"}, aliases = {"Kypchak", "Qypchaq", "Northwestern Turkic", "Western Turkic"}, protoLanguage = "qwm", family = "trk", wikidata_item = 1339898}
+
+m["trk-kno"] = {canonicalName = "Kipchak-Nogai", aliases = {"Aralo-Caspian"}, family = "trk-kip"}
+
+m["trk-ogr"] = {canonicalName = "Oghur", aliases = {"Lir-Turkic", "r-Turkic"}, family = "trk", wikidata_item = 1422731}
+
+m["trk-ogz"] = {canonicalName = "Oghuz", aliases = {"Southwestern Turkic"}, family = "trk", wikidata_item = 494600}
+
+m["trk-sib"] = {canonicalName = "Siberian Turkic", otherNames = {"Northern Turkic"}, aliases = {"Northeastern Turkic"}, family = "trk"}
+
+m["tup"] = {canonicalName = "Tupian", aliases = {"Tupi"}, wikidata_item = 34070}
+
+m["tup-gua"] = {canonicalName = "Tupi-Guarani", aliases = {"Tupí-Guaraní"}, family = "tup", wikidata_item = 148610}
+
+m["tuw"] = {canonicalName = "Tungusic", aliases = {"Manchu-Tungus", "Tungus"}, wikidata_item = 34230}
+
+m["urj"] = {canonicalName = "Uralic", varieties = {"Finno-Ugric"}, wikidata_item = 34113}
+
+m["urj-mdv"] = {canonicalName = "Mordvinic", family = "urj", wikidata_item = 627313}
+
+m["urj-prm"] = {canonicalName = "Permic", family = "urj", wikidata_item = 161493}
+
+m["urj-ugr"] = {canonicalName = "Ugric", family = "urj", wikidata_item = 156631}
+
+m["wak"] = {canonicalName = "Wakashan", wikidata_item = 60069}
+
+m["wen"] = {canonicalName = "Sorbian", aliases = {"Lusatian", "Wendish"}, family = "zlw", wikidata_item = 25442}
+
+m["xgn"] = {canonicalName = "Mongolic", aliases = {"Mongolian"}, wikidata_item = 33750}
+
+m["xnd"] = {canonicalName = "Na-Dene", aliases = {"Na-Dené"}, family = "qfa-dny", wikidata_item = 26986}
+
+m["ypk"] = {canonicalName = "Yupik", aliases = {"Yup'ik", "Yuit"}, family = "esx-esk", wikidata_item = 27970}
+
+m["zhx"] = {canonicalName = "Sinitic", aliases = {"Chinese"}, protoLanguage = "och", family = "sit", wikidata_item = 33857}
+
+m["zhx-gba"] = {canonicalName = "Greater Bai", aliases = {"Macro-Bai"}, family = "zhx", wikidata_item = 16963847}
+
+m["zhx-min"] = {canonicalName = "Min", protoLanguage = "zhx-min-pro", family = "zhx"}
+
+m["zhx-min-hai"] = {canonicalName = "Coastal Min", family = "zhx-min"}
+
+m["zhx-min-shn"] = {canonicalName = "Inland Min", family = "zhx-min"}
+
+m["zle"] = {canonicalName = "East Slavic", protoLanguage = "orv", family = "sla", wikidata_item = 144713}
+
+m["zls"] = {canonicalName = "South Slavic", family = "sla", wikidata_item = 146665}
+
+m["zlw"] = {canonicalName = "West Slavic", family = "sla", wikidata_item = 145852}
+
+m["zlw-lch"] = {canonicalName = "Lechitic", aliases = {"Lekhitic"}, family = "zlw", wikidata_item = 742782}
+
+m["znd"] = {canonicalName = "Zande", family = "nic-ubg", wikidata_item = 8066072}
+
+return m
diff --git a/wiktra/wikt/translit/fun.lua b/wiktra/wikt/translit/fun.lua
new file mode 100644
index 0000000..fdead11
--- /dev/null
+++ b/wiktra/wikt/translit/fun.lua
@@ -0,0 +1,224 @@
+local export = {}
+
+local ustring = mw.ustring
+local libraryUtil = require "libraryUtil"
+local checkType = libraryUtil.checkType
+local checkTypeMulti = libraryUtil.checkTypeMulti
+
+local iterableTypes = {"table", "string"}
+
+local function _check(funcName, expectType)
+    if type(expectType) == "string" then
+        return function(argIndex, arg, nilOk) return checkType(funcName, argIndex, arg, expectType, nilOk) end
+    else
+        return function(argIndex, arg, expectType, nilOk)
+            if type(expectType) == "table" then
+                if not (nilOk and arg == nil) then return checkTypeMulti(funcName, argIndex, arg, expectType) end
+            else
+                return checkType(funcName, argIndex, arg, expectType, nilOk)
+            end
+        end
+    end
+end
+
+-- Iterate over UTF-8-encoded codepoints in string.
+local function iterString(str)
+    local iter = string.gmatch(str, "[%z\1-\127\194-\244][\128-\191]*")
+    local i = 0
+    local function iterator()
+        i = i + 1
+        local char = iter()
+        if char then return i, char end
+    end
+
+    return iterator
+end
+
+function export.chain(func1, func2, ...) return func1(func2(...)) end
+
+--	map(function(number) return number ^ 2 end,
+--		{ 1, 2, 3 })									--> { 1, 4, 9 }
+--	map(function (char) return string.char(string.byte(char) - 0x20) end,
+--		"abc")											--> { "A", "B", "C" }
+function export.map(func, iterable, isArray)
+    local check = _check "map"
+    check(1, func, "function")
+    check(2, iterable, iterableTypes)
+
+    local array = {}
+    local iterator = type(iterable) == "string" and iterString or (isArray or iterable[1] ~= nil) and ipairs or pairs
+    for i_or_k, val in iterator(iterable) do array[i_or_k] = func(val, i_or_k, iterable) end
+    return array
+end
+
+function export.mapIter(func, iter, iterable, initVal)
+    local check = _check "mapIter"
+    check(1, func, "function")
+    check(2, iter, "function")
+    check(3, iterable, iterableTypes, true)
+
+    -- initVal could be anything
+
+    local array = {}
+    local i = 0
+    for x, y in iter, iterable, initVal do
+        i = i + 1
+        array[i] = func(y, x, iterable)
+    end
+    return array
+end
+
+function export.forEach(func, iterable, isArray)
+    local check = _check "forEach"
+    check(1, func, "function")
+    check(2, iterable, iterableTypes)
+
+    local iterator = type(iterable) == "string" and iterString or (isArray or iterable[1] ~= nil) and ipairs or pairs
+    for i_or_k, val in iterator(iterable) do func(val, i_or_k, iterable) end
+    return nil
+end
+
+-------------------------------------------------
+-- From http://lua-users.org/wiki/CurriedLua
+-- reverse(...) : take some tuple and return a tuple of elements in reverse order
+--
+-- e.g. "reverse(1,2,3)" returns 3,2,1
+local function reverse(...)
+    -- reverse args by building a function to do it, similar to the unpack() example
+    local function reverseHelper(acc, v, ...)
+        if select("#", ...) == 0 then
+            return v, acc()
+        else
+            return reverseHelper(function() return v, acc() end, ...)
+        end
+    end
+
+    -- initial acc is the end of the list
+    return reverseHelper(function() return end, ...)
+end
+
+function export.curry(func, numArgs)
+    -- currying 2-argument functions seems to be the most popular application
+    numArgs = numArgs or 2
+
+    -- no sense currying for 1 arg or less
+    if numArgs <= 1 then return func end
+
+    -- helper takes an argTrace function, and number of arguments remaining to be applied
+    local function curryHelper(argTrace, n)
+        if n == 0 then
+            -- kick off argTrace, reverse argument list, and call the original function
+            return func(reverse(argTrace()))
+        else
+            -- "push" argument (by building a wrapper function) and decrement n
+            return function(onearg) return curryHelper(function() return onearg, argTrace() end, n - 1) end
+        end
+    end
+
+    -- push the terminal case of argTrace into the function first
+    return curryHelper(function() return end, numArgs)
+end
+
+-------------------------------------------------
+
+--	some(function(val) return val % 2 == 0 end,
+--		{ 2, 3, 5, 7, 11 })						--> true
+function export.some(func, t, isArray)
+    if isArray or t[1] ~= nil then -- array
+        for i, v in ipairs(t) do if func(v, i, t) then return true end end
+    else
+        for k, v in pairs(t) do if func(v, k, t) then return true end end
+    end
+    return false
+end
+
+--	all(function(val) return val % 2 == 0 end,
+--		{ 2, 4, 8, 10, 12 })					--> true
+function export.all(func, t, isArray)
+    if isArray or t[1] ~= nil then -- array
+        for i, v in ipairs(t) do if not func(v, i, t) then return false end end
+    else
+        for k, v in pairs(t) do if not func(v, k, t) then return false end end
+    end
+    return true
+end
+
+function export.filter(func, t, isArray)
+    local new_t = {}
+    if isArray or t[1] ~= nil then -- array
+        local new_i = 0
+        for i, v in ipairs(t) do
+            if func(v, i, t) then
+                new_i = new_i + 1
+                new_t[new_i] = v
+            end
+        end
+    else
+        for k, v in pairs(t) do
+            if func(v, k, t) then
+                new_t[k] = v -- or create array?
+            end
+        end
+    end
+    return new_t
+end
+
+function export.fold(func, t, accum)
+    for i, v in ipairs(t) do accum = func(accum, v, i, t) end
+    return accum
+end
+
+-------------------------------
+-- Fancy stuff
+local function capture(...)
+    local vals = {n = select("#", ...), ...}
+    return function() return unpack(vals, 1, vals.n) end
+end
+
+-- Log input and output of function.
+-- Receives a function and returns a modified form of that function.
+function export.logReturnValues(func, prefix)
+    return function(...)
+        local inputValues = capture(...)
+        local returnValues = capture(func(...))
+        if prefix then
+            mw.log(prefix, inputValues())
+            mw.log(returnValues())
+        else
+            mw.log(inputValues())
+            mw.log(returnValues())
+        end
+        return returnValues()
+    end
+end
+
+export.log = export.logReturnValues
+
+-- Convenience function to make all functions in a table log their input and output.
+function export.logAll(t)
+    for k, v in pairs(t) do if type(v) == "function" then t[k] = export.logReturnValues(v, tostring(k)) end end
+    return t
+end
+
+----- M E M O I Z A T I O N-----
+-- metamethod that does the work
+-- Currently supports one argument and one return value.
+local func_key = {}
+local function callMethod(self, x)
+    local output = self[x]
+    if not output then
+        output = self[func_key](x)
+        self[x] = output
+    end
+    return output
+end
+
+-- shared metatable
+local mt = {__call = callMethod}
+
+-- Create callable table.
+function export.memoize(func) return setmetatable({[func_key] = func}, mt) end
+
+-------------------------------
+
+return export
diff --git a/wiktra/wikt/translit/gender and number.lua b/wiktra/wikt/translit/gender and number.lua
new file mode 100644
index 0000000..1f7191b
--- /dev/null
+++ b/wiktra/wikt/translit/gender and number.lua	
@@ -0,0 +1,256 @@
+--[=[
+	This module creates standardised displays for gender and number.
+	It converts a gender specification into Wiki/HTML format.
+	
+	A gender/number specification consists of one or more gender/number elements, separated by hyphens.
+	Examples are: "n" (neuter gender), "f-p" (feminine plural), "m-an-p" (masculine animate plural),
+	"pf" (perfective aspect). Each gender/number element has the following properties:
+	1. A code, as used in the spec, e.g. "f" for feminine, "p" for plural".
+	2. A type, e.g. "gender", "number" or "animacy". Each element in a given spec must be of a different type.
+	3. A display form, which in turn consists of a display code and a tooltip gloss. The display code
+	   may not be the same as the spec code, e.g. the spec code "an" has display code "anim" and tooltip
+	   gloss "animate".
+    4. A category into which lemmas of the right part of speech are placed if they have a gender/number
+	   spec containing the given element. For example, a noun with gender/number spec "m-an-p" is placed
+	   into the categories "LANG masculine nouns", "LANG animate nouns" and "LANG pluralia tantum".
+]=] --
+local export = {}
+
+local codes = {}
+
+-- A list of all possible "parts" that a specification can be made out of. For each part, we list
+-- the class it's in (gender, animacy, etc.), the associated category (if any) and the display form.
+-- In a given gender/number spec, only one part of each class is allowed.
+
+codes["?"] = {type = "other", display = "?"}
+
+-- Genders
+codes["m"] = {type = "gender", cat = "masculine POS", display = "m"}
+codes["f"] = {type = "gender", cat = "feminine POS", display = "f"}
+codes["n"] = {type = "gender", cat = "neuter POS", display = "n"}
+codes["c"] = {type = "gender", cat = "common-gender POS", display = "c"}
+
+-- Animacy
+codes["an"] = {type = "animacy", cat = "animate POS", display = "anim"}
+codes["in"] = {type = "animacy", cat = "inanimate POS", display = "inan"}
+-- Animal (for Ukrainian, Belarusian, Polish)
+codes["anml"] = {type = "animacy", cat = "animal POS", display = "animal"}
+-- Personal (for Ukrainian, Belarusian, Polish)
+codes["pr"] = {type = "animacy", cat = "personal POS", display = "pers"}
+-- Nonpersonal not currently used
+codes["np"] = {type = "animacy", cat = "nonpersonal POS", display = "npers"}
+
+-- Virility (for Polish)
+codes["vr"] = {type = "virility", cat = "virile POS", display = "vir"}
+codes["nv"] = {type = "virility", cat = "nonvirile POS", display = "nvir"}
+
+-- Numbers
+codes["s"] = {type = "number", display = "sg"}
+codes["d"] = {type = "number", cat = "dualia tantum", display = "du"}
+codes["p"] = {type = "number", cat = "pluralia tantum", display = "pl"}
+
+-- Verb qualifiers
+codes["impf"] = {type = "aspect", cat = "imperfective POS", display = "impf"}
+codes["pf"] = {type = "aspect", cat = "perfective POS", display = "pf"}
+
+-- Combined codes that are equivalent to giving multiple specs. `mf` is the same as specifying two separate specs,
+-- one with `m` in it and the other with `f`. `mfbysense` is similar but is used for nouns that can be either masculine
+-- or feminine according as to whether they refer to masculine or feminine beings.
+local combined_codes = {}
+combined_codes["mf"] = {codes = {"m", "f"}}
+combined_codes["mfbysense"] = {codes = {"m", "f"}, cat = "masculine and feminine POS by sense"}
+
+-- Categories when multiple gender/number specs of a given type occur in different 
+local codetype_cats = {}
+codetype_cats["gender"] = "POS with multiple genders"
+codetype_cats["animacy"] = "POS with multiple animacies"
+codetype_cats["aspect"] = "biaspectual POS"
+
+-- Version of format_list that can be invoked from a template.
+function export.show_list(frame)
+    local args = frame.args
+    local lang = args["lang"];
+    if lang == "" then lang = nil end
+    local list = {}
+    local i = 1
+
+    while args[i] and args[i] ~= "" do
+        table.insert(list, args[i])
+        i = i + 1
+    end
+
+    return export.format_list(list, lang)
+end
+
+-- Older entry point; equivalent to format_genders() except that it formats the
+-- categories and returns them appended to the formatted gender text rather than
+-- returning the formatted text and categories separately.
+function export.format_list(specs, lang, pos_for_cat, sort_key)
+    local text, cats = export.format_genders(specs, lang, pos_for_cat)
+    if #cats == 0 then return text end
+    return text .. require("utilities").format_categories(cats, lang, sort_key)
+end
+
+-- Format one or more gender/number specifications. Each spec is either a string, e.g. "f-p", or
+-- a table of the form {spec = "SPEC", qualifiers = {"QUALIFIER", "QUALIFIER", ...}} where `.spec`
+-- is a gender/number spec such as "f-p" and `.qualifiers` is a list of qualifiers to display before
+-- the formatted gender/number spec. `.spec` must be present but `.qualifiers` may be omitted.
+-- The function returns two values:
+-- (a) the formatted text;
+-- (b) a list of the categories to add.
+-- If `lang` and `pos_for_cat` are given, gender categories such as "German masculine nouns" or
+-- "Russian imperfective verbs" are added to the categories. Otherwise, if only `lang` is given,
+-- the only category that may be returned is "Requests for gender in LANG entries". If both are
+-- omitted, the returned list is empty.
+function export.format_genders(specs, lang, pos_for_cat)
+    local formatted_specs = {}
+    local categories = {}
+    local seen_types = {}
+    local category_text = ""
+    local all_is_nounclass = nil
+
+    local function do_gender_spec(spec, parts)
+        local types = {}
+
+        for key, code in ipairs(parts) do
+            -- Is this code valid?
+            if not codes[code] then error("The tag \"" .. code .. "\" in the gender specification \"" .. spec.spec .. "\" is not valid.") end
+
+            -- Check for multiple genders/numbers/animacies in a single spec.
+            local typ = codes[code].type
+            if typ ~= "other" and types[typ] then
+                -- require("debug").track("gender and number/multiple")
+                -- require("debug").track("gender and number/multiple/" .. spec)
+                error("The gender specification \"" .. spec.spec .. "\" contains multiple tags of type \"" .. typ .. "\".")
+            end
+            types[typ] = true
+
+            if spec.qualifiers and #spec.qualifiers > 0 then
+                parts[key] = require("qualifier").format_qualifier(spec.qualifiers) .. " " .. codes[code].display
+            else
+                parts[key] = codes[code].display
+            end
+
+            -- Generate categories if called for.
+            if lang and pos_for_cat then
+                local cat = codes[code].cat
+                if cat then table.insert(categories, lang:getCanonicalName() .. " " .. cat) end
+                if seen_types[typ] and seen_types[typ] ~= code then
+                    cat = codetype_cats[typ]
+                    if cat then table.insert(categories, lang:getCanonicalName() .. " " .. cat) end
+                end
+                seen_types[typ] = code
+            end
+        end
+
+        -- Add the processed codes together with non-breaking spaces
+        if #parts == 1 then return parts[1] end
+        return table.concat(parts, " ")
+    end
+
+    for _, spec in ipairs(specs) do
+        if type(spec) ~= "table" then spec = {spec = spec} end
+        local is_nounclass
+        -- If the specification starts with cX, then it is a noun class specification.
+        if spec.spec:find("^[1-9]") or spec.spec:find("^c[^-]") then
+            is_nounclass = true
+            code = spec.spec:gsub("^c", "")
+
+            local text
+            if code == "?" then
+                text = "?"
+            else
+                text = "" .. code .. ""
+                if lang and pos_for_cat then table.insert(categories, lang:getCanonicalName() .. " class " .. code .. " POS") end
+            end
+            local text_with_qual
+            if spec.qualifiers and #spec.qualifiers > 0 then
+                text_with_qual = require("qualifier").format_qualifier(spec.qualifiers) .. " " .. text
+            else
+                text_with_qual = text
+            end
+            table.insert(formatted_specs, text_with_qual)
+        else
+            -- Split the parts and iterate over each part, converting it into its display form
+            local parts = mw.text.split(spec.spec, "%-")
+            local extra_cats = {}
+
+            local has_combined = false
+            for _, code in ipairs(parts) do
+                if combined_codes[code] then
+                    has_combined = true
+                    break
+                end
+            end
+
+            if not has_combined then
+                table.insert(formatted_specs, do_gender_spec(spec, parts))
+            else
+                local all_parts = {{}}
+
+                for i, code in ipairs(parts) do
+                    if combined_codes[code] then
+                        local new_all_parts = {}
+                        for _, one_parts in ipairs(all_parts) do
+                            for _, one_code in ipairs(combined_codes[code].codes) do
+                                local new_combined_parts = mw.clone(one_parts)
+                                table.insert(new_combined_parts, one_code)
+                                table.insert(new_all_parts, new_combined_parts)
+                            end
+                        end
+                        all_parts = new_all_parts
+                        if lang and pos_for_cat then
+                            local extra_cat = combined_codes[code].cat
+                            if extra_cat then table.insert(extra_cats, lang:getCanonicalName() .. " " .. extra_cat) end
+                        end
+                    else
+                        for _, one_parts in ipairs(all_parts) do table.insert(one_parts, code) end
+                    end
+                end
+
+                for _, parts in ipairs(all_parts) do table.insert(formatted_specs, do_gender_spec(spec, parts)) end
+            end
+
+            if #extra_cats > 0 then for _, cat in ipairs(extra_cats) do table.insert(categories, cat) end end
+
+            if lang then
+                -- Do some additional gender checks if a language was given
+                -- Is this an incomplete gender?
+                if spec.spec:find("?") then table.insert(categories, "Requests for gender in " .. lang:getCanonicalName() .. " entries") end
+
+                -- Check if the specification is valid
+                -- elseif langinfo.genders then
+                --	local valid_genders = {}
+                --	for _, g in ipairs(langinfo.genders) do valid_genders[g] = true end
+                --	
+                --	if not valid_genders[spec.spec] then
+                --		local valid_string = {}
+                --		for i, g in ipairs(langinfo.genders) do valid_string[i] = g end
+                --		error('The gender specification "' .. spec.spec .. '" is not valid for ' .. langinfo.names[1] .. ". Valid are: " .. table.concat(valid_string, ", "))
+                --	end
+                -- end
+            end
+
+            is_nounclass = false
+        end
+
+        -- Ensure that the specifications are either all noun classes, or none are.
+        if all_is_nounclass == nil then
+            all_is_nounclass = is_nounclass
+        elseif all_is_nounclass ~= is_nounclass then
+            error("Noun classes and genders cannot be mixed. Please use either one or the other.")
+        end
+    end
+
+    if lang and pos_for_cat then for i, cat in ipairs(categories) do categories[i] = cat:gsub("POS", pos_for_cat) end end
+
+    if is_nounclass then
+        -- Add the processed codes together with slashes
+        return "class " .. table.concat(formatted_specs, "/") .. "", categories
+    else
+        -- Add the processed codes together with " or "
+        return "" .. table.concat(formatted_specs, " or ") .. "", categories
+    end
+end
+
+return export
diff --git a/wiktra/wikt/translit/geor-translit.lua b/wiktra/wikt/translit/geor-translit.lua
new file mode 100644
index 0000000..2b9a586
--- /dev/null
+++ b/wiktra/wikt/translit/geor-translit.lua
@@ -0,0 +1,24 @@
+local export = {}
+-- Keep synchronized with [[Module:sva-translit]]
+local gsub = mw.ustring.gsub
+local tt = {["ა"] = "a", ["ბ"] = "b", ["გ"] = "g", ["დ"] = "d", ["ე"] = "e", ["ვ"] = "v", ["ზ"] = "z", ["ჱ"] = "ē", ["თ"] = "t", ["ი"] = "i", ["კ"] = "ḳ", ["ლ"] = "l", ["მ"] = "m", ["ნ"] = "n", ["ჲ"] = "y", ["ო"] = "o", ["პ"] = "ṗ", ["ჟ"] = "ž", ["რ"] = "r", ["ს"] = "s", ["ტ"] = "ṭ", ["ჳ"] = "wi", ["უ"] = "u", ["ფ"] = "p", ["ქ"] = "k", ["ღ"] = "ɣ", ["ყ"] = "q̇", ["შ"] = "š", ["ჩ"] = "č", ["ც"] = "c", ["ძ"] = "ʒ", ["წ"] = "c̣", ["ჭ"] = "č̣", ["ხ"] = "x", ["ჴ"] = "q", ["ჯ"] = "ǯ", ["ჰ"] = "h", ["ჵ"] = "ō", ["ჶ"] = "f", ["ჷ"] = "ə", ["ჸ"] = "ʾ", ["ჺ"] = "ʿ"};
+
+function export.tr(text, lang, sc)
+    -- Transliterating vowel nasalization in Bats
+    text = gsub(text, "ჼ", "̃")
+    text = gsub(text, "", "̃")
+
+    -- Transliterate uppercase characters from the Georgian Extended block as
+    -- the uppercase version of the transliteration of the lowercase  characters
+    -- from the Georgian block.
+    -- U+10D0: start of Georgian block
+    -- U+1C90: start of Georgian Extended block
+    text = gsub(text, "[" .. mw.ustring.char(0x1C90) .. "-" .. mw.ustring.char(0x1CBF) .. "]", function(char)
+        local translit = tt[mw.ustring.char(mw.ustring.codepoint(char) - 0x1C90 + 0x10D0)]
+        return translit and mw.ustring.upper(translit)
+    end)
+    text = gsub(text, ".", tt)
+    return text
+end
+
+return export
diff --git a/wiktra/wikt/translit/gin-translit.lua b/wiktra/wikt/translit/gin-translit.lua
new file mode 100644
index 0000000..a7d8654
--- /dev/null
+++ b/wiktra/wikt/translit/gin-translit.lua
@@ -0,0 +1,20 @@
+local export = {}
+
+local mapping1 = {["п"] = "p", ["б"] = "b", ["т"] = "t", ["д"] = "d", ["к"] = "k", ["г"] = "g", ["ц"] = "c", ["ч"] = "č", ["с"] = "s", ["з"] = "z", ["ш"] = "š", ["ж"] = "ž", ["х"] = "x", ["м"] = "m", ["н"] = "n", ["р"] = "r", ["л"] = "l", ["в"] = "v", ["й"] = "y", ["и"] = "i", ["е"] = "e", ["э"] = "e", ["а"] = "a", ["о"] = "o", ["у"] = "u", ["ӥ"] = "ü", ["ъ"] = "ʾ"}
+
+local mapping2 = {["пӏ"] = "p’", ["тӏ"] = "t’", ["кӏ"] = "k’", ["къ"] = "q’", ["цӏ"] = "c’", ["лӏ"] = "ƛ", ["кь"] = "ƛ’", ["чӏ"] = "c’", ["хъ"] = "q", ["лъ"] = "λ", ["гъ"] = "ġ", ["хӏ"] = "ḥ", ["гӏ"] = "a̯", ["гь"] = "h", ["кӏв"] = "k’ʷ", ["хъв"] = "qʷ", ["къв"] = "q’ʷ", ["гъв"] = "ġʷ", ["хв"] = "xʷ"}
+
+function export.tr(text, lang, sc)
+    local str_gsub = string.gsub
+    local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"
+
+    -- Convert capital to lowercase palochka.
+    text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF))
+
+    for pat, repl in pairs(mapping2) do text = str_gsub(text, pat, repl) end
+    text = str_gsub(text, UTF8_char, mapping1)
+
+    return text
+end
+
+return export
diff --git a/wiktra/wikt/translit/goth-translit.lua b/wiktra/wikt/translit/goth-translit.lua
new file mode 100644
index 0000000..5d71399
--- /dev/null
+++ b/wiktra/wikt/translit/goth-translit.lua
@@ -0,0 +1,20 @@
+local export = {}
+
+local Goth_Latn = {["𐌰"] = "a", ["𐌱"] = "b", ["𐌲"] = "g", ["𐌳"] = "d", ["𐌴"] = "ē", ["𐌵"] = "q", ["𐌶"] = "z", ["𐌷"] = "h", ["𐌸"] = "þ", ["𐌹"] = "i", ["𐌺"] = "k", ["𐌻"] = "l", ["𐌼"] = "m", ["𐌽"] = "n", ["𐌾"] = "j", ["𐌿"] = "u", ["𐍀"] = "p", ["𐍁"] = "?", ["𐍂"] = "r", ["𐍃"] = "s", ["𐍄"] = "t", ["𐍅"] = "w", ["𐍆"] = "f", ["𐍇"] = "x", ["𐍈"] = "ƕ", ["𐍉"] = "ō", ["𐍊"] = "?"}
+
+local Latn_Goth = {["ā"] = "𐌰", ["e"] = "𐌴", ["ī"] = "𐌹", ["o"] = "𐍉", ["ū"] = "𐌿", ["y"] = "𐍅"}
+
+for g, l in pairs(Goth_Latn) do if l ~= "?" then Latn_Goth[l] = g end end
+
+function export.tr(text, lang, sc)
+    text = mw.ustring.gsub(text, "𐌴𐌹", "ei")
+
+    return (mw.ustring.gsub(text, ".", Goth_Latn))
+end
+
+function export.tr_reverse(text)
+    text = mw.ustring.lower(text)
+    return (mw.ustring.gsub(text, ".", Latn_Goth))
+end
+
+return export
diff --git a/wiktra/wikt/translit/grc-translit.lua b/wiktra/wikt/translit/grc-translit.lua
new file mode 100644
index 0000000..1583974
--- /dev/null
+++ b/wiktra/wikt/translit/grc-translit.lua
@@ -0,0 +1,146 @@
+local export = {}
+
+local m_data = require("grc-utilities/data")
+local tokenize = require("grc-utilities").tokenize
+
+local ufind = mw.ustring.find
+local ugsub = mw.ustring.gsub
+local U = mw.ustring.char
+local ulower = mw.ustring.lower
+local uupper = mw.ustring.upper
+
+local UTF8char = "[%z\1-\127\194-\244][\128-\191]*"
+
+-- Diacritics
+local diacritics = m_data.named
+
+-- Greek
+local acute = diacritics.acute
+local grave = diacritics.grave
+local circumflex = diacritics.circum
+local diaeresis = diacritics.diaeresis
+local smooth = diacritics.smooth
+local rough = diacritics.rough
+local macron = diacritics.macron
+local breve = diacritics.breve
+local subscript = diacritics.subscript
+
+-- Latin
+local hat = diacritics.Latin_circum
+
+local macron_diaeresis = macron .. diaeresis .. "?" .. hat
+local a_subscript = "^[αΑ].*" .. subscript .. "$"
+local velar = "κγχξ"
+
+local tt = {
+    -- Vowels
+    ["α"] = "a",
+    ["ε"] = "e",
+    ["η"] = "e" .. macron,
+    ["ι"] = "i",
+    ["ο"] = "o",
+    ["υ"] = "u",
+    ["ω"] = "o" .. macron,
+
+    -- Consonants
+    ["β"] = "b",
+    ["γ"] = "g",
+    ["δ"] = "d",
+    ["ζ"] = "z",
+    ["θ"] = "th",
+    ["κ"] = "k",
+    ["λ"] = "l",
+    ["μ"] = "m",
+    ["ν"] = "n",
+    ["ξ"] = "x",
+    ["π"] = "p",
+    ["ρ"] = "r",
+    ["σ"] = "s",
+    ["ς"] = "s",
+    ["τ"] = "t",
+    ["φ"] = "ph",
+    ["χ"] = "kh",
+    ["ψ"] = "ps",
+
+    -- Archaic letters
+    ["ϝ"] = "w",
+    ["ϻ"] = "ś",
+    ["ϙ"] = "q",
+    ["ϡ"] = "š",
+    ["ͷ"] = "v",
+
+    -- Incorrect characters: see [[Wiktionary:About Ancient Greek#Miscellaneous]].
+    -- These are tracked by [[Module:script utilities]].
+    ["ϐ"] = "b",
+    ["ϑ"] = "th",
+    ["ϰ"] = "k",
+    ["ϱ"] = "r",
+    ["ϲ"] = "s",
+    ["ϕ"] = "ph",
+
+    -- Diacritics
+    -- unchanged: macron, diaeresis, grave, acute
+    [breve] = "",
+    [smooth] = "",
+    [rough] = "",
+    [circumflex] = hat,
+    [subscript] = "i"
+}
+
+function export.tr(text, lang, sc)
+    if text == "῾" then return "h" end
+
+    --[[
+		Replace semicolon or Greek question mark with regular question mark,
+		except after an ASCII alphanumeric character (to avoid converting
+		semicolons in HTML entities).
+	]]
+    text = ugsub(text, "([^A-Za-z0-9])[;" .. U(0x37E) .. "]", "%1?")
+
+    -- Handle the middle dot. It is equivalent to semicolon or colon, but semicolon is probably more common.
+    text = text:gsub("·", ";")
+
+    local tokens = tokenize(text)
+
+    -- now read the tokens
+    local output = {}
+    for i, token in pairs(tokens) do
+        -- Convert token to lowercase and substitute each character
+        -- for its transliteration
+        local translit = ulower(token):gsub(UTF8char, tt)
+
+        local next_token = tokens[i + 1]
+
+        if token == "γ" and next_token and velar:find(next_token, 1, true) then
+            -- γ before a velar should be 
+            translit = "n"
+        elseif token == "ρ" and tokens[i - 1] == "ρ" then
+            -- ρ after ρ should be 
+            translit = "rh"
+        elseif ufind(token, a_subscript) then
+            -- add macron to ᾳ
+            translit = ugsub(translit, "([aA])", "%1" .. macron)
+        end
+
+        if token:find(rough) then
+            if ufind(token, "^[Ρρ]") then
+                translit = translit .. "h"
+            else -- vowel
+                translit = "h" .. translit
+            end
+        end
+
+        -- Remove macron from a vowel that has a circumflex.
+        if ufind(translit, macron_diaeresis) then translit = translit:gsub(macron, "") end
+
+        -- Capitalize first character of transliteration.
+        if token ~= ulower(token) then translit = translit:gsub("^" .. UTF8char, uupper) end
+
+        table.insert(output, translit)
+    end
+    output = table.concat(output)
+
+    return output
+end
+
+return export
diff --git a/wiktra/wikt/translit/grc-utilities.lua b/wiktra/wikt/translit/grc-utilities.lua
new file mode 100644
index 0000000..3907b1c
--- /dev/null
+++ b/wiktra/wikt/translit/grc-utilities.lua
@@ -0,0 +1,273 @@
+local export = {}
+
+local m_script_utils = require("script utilities")
+local m_links = require("links")
+local lang = require("languages").getByCode("grc")
+local sc = require("scripts").getByCode("polytonic")
+
+local m_data = mw.loadData("grc-utilities/data")
+local groups = m_data.groups
+local diacritic_order = m_data.diacritic_order
+local conversions = m_data.conversions
+local diacritics = m_data.diacritics
+local diacritic = m_data.diacritic
+local macron = diacritics.macron
+local breve = diacritics.breve
+local spacing_macron = diacritics.spacing_macron
+local spacing_breve = diacritics.spacing_breve
+local rough = diacritics.rough
+local smooth = diacritics.smooth
+local diaeresis = diacritics.diaeresis
+local acute = diacritics.acute
+local grave = diacritics.grave
+local circumflex = diacritics.circum
+local subscript = diacritics.subscript
+local combining_diacritic = m_data.combining_diacritic
+
+local UTF8_char = "[\1-\127\194-\244][\128-\191]*"
+local basic_Greek = "[\206-\207][\128-\191]" -- excluding first line of Greek and Coptic block: ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ
+
+local find = mw.ustring.find
+local match = mw.ustring.match
+local gmatch = mw.ustring.gmatch
+local sub = mw.ustring.sub
+local gsub = mw.ustring.gsub
+local toNFC = mw.ustring.toNFC
+local decompose = mw.ustring.toNFD
+
+local info = {}
+-- The tables are shared among different characters so that they can be checked
+-- for equality if needed, and to use less space.
+local vowel_t = {vowel = true}
+local iota_t = {vowel = true, offglide = true}
+local upsilon_t = {vowel = true, offglide = true}
+-- These don't need any contents.
+local rho_t = {}
+-- local consonant_t = {}
+local diacritic_t = {diacritic = true}
+-- Needed for equality comparisons.
+local breathing_t = {diacritic = true}
+
+local function add_info(characters, t)
+    if type(characters) == "string" then
+        for character in string.gmatch(characters, UTF8_char) do info[character] = t end
+    else
+        for i, character in ipairs(characters) do info[character] = t end
+    end
+end
+
+add_info({macron, breve, diaeresis, acute, grave, circumflex, subscript}, diacritic_t)
+
+add_info({rough, smooth}, breathing_t)
+add_info("ΑΕΗΟΩαεηοω", vowel_t)
+add_info("Ιι", iota_t)
+add_info("Υυ", upsilon_t)
+-- add_info("ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨϜϘϺϷͶϠβγδζθκλμνξπρσςτφχψϝϙϻϸͷϡ", consonant_t)
+add_info("Ρρ", rho_t)
+
+local not_recognized = {}
+setmetatable(info, {__index = function(t, key) return not_recognized end})
+
+local sparseConcat = require("table").sparseConcat
+
+local checkType = require"libraryUtil".checkType
+
+local function _check(funcName) return function(argIndex, arg, expectType, nilOk) return checkType(funcName, argIndex, arg, expectType, nilOk) end end
+
+-- Perform a function on each Unicode character in a string.
+local function forEach(str, func) for char in string.gmatch(str, UTF8_char) do func(char) end end
+
+-- This concatenates or inserts a character, then removes it from the text.
+local function add(list, index, chars, text)
+    if not chars then error("The function add cannot act on a nil character.") end
+    if list[index] then
+        list[index] = list[index] .. chars
+    else
+        list[index] = chars
+    end
+    -- Basic string function works here.
+    return text:sub(#chars + 1)
+end
+
+function export.tag(term, face) return m_script_utils.tag_text(term, lang, sc, face) end
+
+function export.link(term, face, alt, tr) return m_links.full_link({term = term, alt = alt, lang = lang, sc = sc, tr = tr}, face) end
+
+local function linkNoTag(term, alt) return m_links.language_link {term = term, lang = lang, alt = alt} end
+
+-- Convert spacing to combining diacritics, and nonstandard to standard polytonic Greek.
+function export.standardDiacritics(text)
+    text = decompose(text)
+
+    text = text:gsub(UTF8_char, conversions)
+
+    return text
+end
+
+--[=[	This function arranges diacritics in the following order:
+			1. macron or breve
+			2. breathings or diaeresis
+			3. acute, circumflex, or grave
+			4. iota subscript
+		Used by [[Module:typing-aids]].
+		
+		Returns an error if a sequence of diacritics contains more than one
+		of each category.
+]=]
+local function reorderDiacriticSequence(diacritics)
+    local output = {}
+    forEach(diacritics, function(diacritic)
+        local index = diacritic_order[diacritic]
+        if not output[index] then
+            output[index] = diacritic
+        else
+            -- Place breve after macron.
+            if diacritic == breve then index = index + 1 end
+            -- The following might have odd results when there
+            -- are three or more diacritics.
+            table.insert(output, index, diacritic)
+            -- [[Special:WhatLinksHere/Template:tracking/grc-utils/too many diacritics]]
+            require("debug").track("grc-utils/too many diacritics")
+            --[[
+				local m_templates = require("grc-utilities/templates")
+				error("There are two diacritics, " ..
+						m_templates.addDottedCircle(output[index]) .. " and " ..
+						m_templates.addDottedCircle(diacritic) ..
+						" that belong in the same position. There should be only one."
+				)
+				--]]
+        end
+    end)
+    return sparseConcat(output)
+end
+
+function export.reorderDiacritics(text)
+    local d = diacritics
+
+    return (gsub(decompose(text), combining_diacritic .. combining_diacritic .. "+", reorderDiacriticSequence))
+end
+
+--[=[
+		This breaks a word into meaningful "tokens", which are
+		individual letters or diphthongs with their diacritics.
+		Used by [[Module:grc-accent]] and [[Module:grc-pronunciation]].
+--]=]
+local function make_tokens(text)
+    local tokens, prev_info = {}, {}
+    local token_i, vowel_count = 1, 0 -- Vowel count tracks .
+    local prev
+    for character in string.gmatch(decompose(text), UTF8_char) do
+        local curr_info = info[character]
+        -- Split vowels between tokens if not a diphthong.
+        if curr_info.vowel then
+            vowel_count = vowel_count + 1
+            if prev and (not (vowel_count == 2 and curr_info.offglide and prev_info.vowel) -- υυ → υ, υ
+            -- ιυ → ι, υ
+            or prev_info.offglide and curr_info == upsilon_t or curr_info == prev_info) then
+                token_i = token_i + 1
+                if prev_info.vowel then vowel_count = 1 end
+            elseif vowel_count == 2 then
+                vowel_count = 0
+            end
+            tokens[token_i] = (tokens[token_i] or "") .. character
+        elseif curr_info.diacritic then
+            vowel_count = 0
+            tokens[token_i] = (tokens[token_i] or "") .. character
+            if prev_info.diacritic or prev_info.vowel then
+                if character == diaeresis then
+                    -- Split the diphthong in the current token if a diaeresis was found:
+                    -- the first letter, then the second letter plus any diacritics.
+                    local previous_vowel, vowel_with_diaeresis = string.match(tokens[token_i], "^(" .. basic_Greek .. ")(" .. basic_Greek .. ".+)")
+                    if previous_vowel then
+                        tokens[token_i], tokens[token_i + 1] = previous_vowel, vowel_with_diaeresis
+                        token_i = token_i + 1
+                    else
+                        -- The vowel preceding the vowel with the diaeresis will already be
+                        -- placed in the previous token if it has a diacritic:
+                        -- Περικλῆῐ̈ → Π ε ρ ι κ λ ῆ ῐ̈
+                        --[[
+						mw.log('Diaeresis was found in ' .. text .. ', but the previous token ' ..
+							require("Unicode data").add_dotted_circle(tokens[token_i]) ..
+							' couldn’t be split because it does not consist of two Basic Greek characters followed by other characters.')
+						--]]
+                    end
+                end
+            elseif prev_info == rho_t then
+                if curr_info ~= breathing_t then mw.log(string.format("The character %s in %s should not have the accent %s on it.", prev, text, require("grc-utilities/templates").addDottedCircle(character))) end
+            else
+                mw.log("The character " .. prev .. " cannot have a diacritic on it.")
+            end
+        else
+            vowel_count = 0
+            if prev then token_i = token_i + 1 end
+            tokens[token_i] = (tokens[token_i] or "") .. character
+        end
+        prev = character
+        prev_info = curr_info
+    end
+    return tokens
+end
+
+local cache = {}
+function export.tokenize(text)
+    local decomposed = decompose(text)
+    if not cache[decomposed] then cache[decomposed] = make_tokens(text) end
+    return cache[decomposed]
+end
+
+--[=[	Places diacritics in the following order:
+			1. breathings or diaeresis
+			2. acute, circumflex, or grave
+			3. macron or breve
+			4. iota subscript
+		Used by [[Module:grc-pronunciation]].		]=]
+function export.pronunciationOrder(text)
+    text = export.standardDiacritics(text)
+
+    if find(text, groups[1]) then
+
+        text = gsub(text, diacritic .. diacritic .. "+", function(sequence)
+            -- Put breathing and diaeresis first, then accents, then macron or breve
+            return table.concat {match(sequence, groups[2]) or "", match(sequence, groups[3]) or "", match(sequence, groups[1]) or "", match(sequence, groups[4]) or ""}
+        end)
+
+        text = gsub(text, macron, spacing_macron) -- combining to spacing macron
+        text = gsub(text, breve, spacing_breve) -- combining to spacing breve
+    end
+
+    return toNFC(text)
+end
+
+-- Returns a table of any ambiguous vowels in the text, language-tagged.
+function export.findAmbig(text, noTag)
+    if (not text) or type(text) ~= "string" then error("The input to function findAmbig is nonexistent or not a string") end
+
+    local lengthDiacritic = "[" .. macron .. breve .. circumflex .. subscript .. "]"
+    local aiu_diacritic = "^([" .. "αιυ" .. "])(" .. diacritic .. "*)$"
+
+    -- breaks the word into units
+    local output, vowels = {}, {}
+    for _, token in ipairs(export.tokenize(text)) do
+        if not find(token, m_data.consonant) then
+            local vowel, diacritics = match(token, aiu_diacritic)
+
+            if vowel and (diacritics == "" or not find(diacritics, lengthDiacritic)) then
+                local diacriticked_vowel
+                if not noTag then
+                    diacriticked_vowel = export.tag(vowel .. diacritics)
+                else
+                    diacriticked_vowel = vowel
+                end
+
+                table.insert(output, diacriticked_vowel)
+
+                -- Lists the vowel letters that are ambiguous, for categorization purposes.
+                vowels[mw.ustring.lower(vowel)] = true
+            end
+        end
+    end
+
+    return output, vowels
+end
+
+return export
diff --git a/wiktra/wikt/translit/grc-utilities/data.lua b/wiktra/wikt/translit/grc-utilities/data.lua
new file mode 100644
index 0000000..b705b49
--- /dev/null
+++ b/wiktra/wikt/translit/grc-utilities/data.lua
@@ -0,0 +1,78 @@
+local data = {}
+
+local U = mw.ustring.char
+local macron = U(0x304)
+local spacing_macron = U(0xAF)
+local modifier_macron = U(0x2C9)
+local breve = U(0x306)
+local spacing_breve = U(0x2D8)
+local rough = U(0x314)
+local smooth = U(0x313)
+local diaeresis = U(0x308)
+local acute = U(0x301)
+local grave = U(0x300)
+local circum = U(0x342)
+local Latin_circum = U(0x302)
+local coronis = U(0x343)
+local subscript = U(0x345)
+local undertie = mw.ustring.char(0x35C) -- actually "combining double breve below"
+
+data["diacritics"] = {["macron"] = macron, ["spacing_macron"] = spacing_macron, ["modifier_macron"] = modifier_macron, ["breve"] = breve, ["spacing_breve"] = spacing_breve, ["rough"] = rough, ["smooth"] = smooth, ["diaeresis"] = diaeresis, ["acute"] = acute, ["grave"] = grave, ["circum"] = circum, ["Latin_circum"] = Latin_circum, ["coronis"] = coronis, ["subscript"] = subscript}
+
+data.diacritics.all = ""
+for name, diacritic in pairs(data.diacritics) do data.diacritics.all = data.diacritics.all .. diacritic end
+
+data["named"] = data["diacritics"]
+
+data["diacritic"] = "[" .. data.diacritics.all .. "]"
+data["all"] = data["diacritic"]
+
+data["diacritic_groups"] = {[1] = "[" .. macron .. breve .. "]", [2] = "[" .. diaeresis .. smooth .. rough .. "]", [3] = "[" .. acute .. grave .. circum .. "]", [4] = subscript}
+data["groups"] = data["diacritic_groups"]
+data["diacritic_groups"]["accents"] = data["groups"][3]
+
+data["diacritic_order"] = {[macron] = 1, [breve] = 1, [rough] = 2, [smooth] = 2, [diaeresis] = 2, [acute] = 3, [grave] = 3, [circum] = 3, [subscript] = 4}
+
+data["diacritical_conversions"] = {
+    -- Convert spacing to combining diacritics
+    [spacing_macron] = macron, -- macron
+    [modifier_macron] = macron,
+    [spacing_breve] = breve, -- breve
+    ["῾"] = rough, -- rough breathing, modifier letter reversed comma
+    ["ʽ"] = rough,
+    ["᾿"] = smooth, -- smooth breathing, modifier letter apostrophe, coronis, combining coronis
+    ["ʼ"] = smooth,
+    [coronis] = smooth,
+    ["´"] = acute, -- acute
+    ["`"] = grave, -- grave
+    ["῀"] = circum, -- Greek circumflex (perispomeni), circumflex, combining circumflex
+    ["ˆ"] = circum,
+    [Latin_circum] = circum,
+    ["῎"] = smooth .. acute, -- smooth and acute
+    ["῍"] = smooth .. grave, -- smooth and grave
+    ["῏"] = smooth .. circum, -- smooth and circumflex
+    ["῞"] = rough .. acute, -- rough and acute
+    ["῝"] = rough .. grave, -- rough and grave
+    ["῟"] = rough .. circum, -- rough and circumflex
+    ["¨"] = diaeresis,
+    ["΅"] = diaeresis .. acute,
+    ["῭"] = diaeresis .. grave,
+    ["῁"] = diaeresis .. circum
+}
+data["conversions"] = data["diacritical_conversions"]
+
+data["consonants"] = "ΒβΓγΔδΖζΘθΚκΛλΜμΝνΞξΠπΡρΣσςΤτΦφΧχΨψ"
+data["consonant"] = "[" .. data.consonants .. "]"
+data["vowels"] = "ΑαΕεΗηΙιΟοΥυΩω"
+data["vowel"] = "[" .. data.vowels .. "]"
+data["combining_diacritics"] = table.concat {macron, breve, rough, smooth, diaeresis, acute, grave, circum, subscript}
+data["combining_diacritic"] = "[" .. data.combining_diacritics .. "]"
+
+-- Basic letters with and without diacritics
+local letters_with_diacritics = "ΆΈ-ώϜϝἀ-ᾼῂ-ῌῐ-" .. -- capital iota with oxia, normalized to capital iota with tonos if entered
+-- literally in a string
+U(0x1FDB) .. "Ὶῠ-Ῥῲ-ῼ"
+data.word_characters = letters_with_diacritics .. data.combining_diacritics .. undertie
+data.word_character = "[" .. data.word_characters .. "]"
+
+return data
diff --git a/wiktra/wikt/translit/grc-utilities/templates.lua b/wiktra/wikt/translit/grc-utilities/templates.lua
new file mode 100644
index 0000000..e3d5cb1
--- /dev/null
+++ b/wiktra/wikt/translit/grc-utilities/templates.lua
@@ -0,0 +1,98 @@
+local export = {}
+
+local m_table = require("table")
+local m_utils = require("grc-utilities")
+local m_data = require("grc-utilities/data")
+local tag = m_utils.tag
+local link = m_utils.link
+local tokenize = m_utils.tokenize
+local diacritic = m_data.diacritic
+
+local U = mw.ustring.char
+local toNFD = mw.ustring.toNFD
+local gsub = mw.ustring.gsub
+
+local dottedCircle = U(0x25CC)
+
+export.addDottedCircle = require("Unicode data").add_dotted_circle
+
+function export.printTokens(frame)
+    text = frame.args[1]
+    local token_format = "%s"
+    local spacing = {["\n"] = "¶", ["\r"] = "¶", [" "] = " "}
+
+    if text then
+        local tokens = m_table.shallowcopy(tokenize(text))
+        for i, token in pairs(tokens) do tokens[i] = token_format:format(string.gsub(token, "%s", spacing)) end
+        return "|-\n| " .. tag(text) .. " || " .. tag(table.concat(tokens, ", "))
+    else
+        error("Provide text to tokenize in first parameter.")
+    end
+end
+
+function export.printDiacritics(frame)
+    local functionToPrint = frame.args[1] or error("Specify a function in the first parameter.")
+    local term = frame.args[2] or error("Add text in the second parameter.")
+
+    local result = m_utils[functionToPrint](term)
+
+    -- Show diacritics above or below a dotted circle.
+    content = {term = tag(term), term_decomposition = tag(export.addDottedCircle(toNFD(term))), result = tag(result), result_decomposition = tag(export.addDottedCircle(result))}
+
+    local output = [[ term (term_decomposition) → result (result_decomposition)]]
+
+    local function addContent(item) return content[item] or "" end
+
+    return (output:gsub("[%a_]+", addContent))
+end
+
+function export.decompose(frame)
+    local params = {[1] = {}, ["link"] = {type = "boolean"}}
+
+    args = require("parameters").process(frame.args, params)
+
+    local text = args[1]
+    text = toNFD(text)
+    local link = args.link
+    local composed
+
+    if link then
+        composed = link(text, nil, nil, "-")
+    else
+        composed = tag(text)
+    end
+
+    local decomposed = export.addDottedCircle(text)
+
+    if link then
+        local result = {}
+        for seat, letter in gmatch(decomposed, "(" .. dottedCircle .. "?)(.)") do
+            local link
+            if letter then link = linkNoTag(letter, seat .. letter) end
+
+            table.insert(result, link)
+        end
+        decomposed = table.concat(result)
+    end
+
+    decomposed = tag(decomposed)
+
+    return composed .. " (" .. decomposed .. ")"
+end
+
+function export.tokenize(frame)
+    local map = require("fun").map
+    local token_format = "%s"
+    local spacing = {["\n"] = "¶", ["\r"] = "¶", [" "] = " "}
+    local _tokenize = tokenize
+    local function tokenize(word, ...) return _tokenize(word) end
+    local function print_tokens(tokens)
+        if type(tokens) == "string" then return tokens end
+        local output = {}
+        for i, token in ipairs(tokens) do output[i] = string.format(token_format, string.gsub(token, "%s", spacing)) end
+        return table.concat(output, " ")
+    end
+    return table.concat(map(print_tokens, map(tokenize, frame.args)), "
") +end + +return export diff --git a/wiktra/wikt/translit/gu-translit.lua b/wiktra/wikt/translit/gu-translit.lua new file mode 100644 index 0000000..a71f12d --- /dev/null +++ b/wiktra/wikt/translit/gu-translit.lua @@ -0,0 +1,161 @@ +local export = {} + +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["ક"] = "k", + ["ખ"] = "kh", + ["ગ"] = "g", + ["ઘ"] = "gh", + ["ઙ"] = "ṅ", + ["ચ"] = "c", + ["છ"] = "ch", + ["જ"] = "j", + ["ઝ"] = "jh", + ["ઞ"] = "ñ", + ["ટ"] = "ṭ", + ["ઠ"] = "ṭh", + ["ડ"] = "ḍ", + ["ઢ"] = "ḍh", + ["ણ"] = "ṇ", + ["ત"] = "t", + ["થ"] = "th", + ["દ"] = "d", + ["ધ"] = "dh", + ["ન"] = "n", + ["પ"] = "p", + ["ફ"] = "ph", + ["બ"] = "b", + ["ભ"] = "bh", + ["મ"] = "m", + ["ય"] = "y", + ["ર"] = "r", + ["લ"] = "l", + ["વ"] = "v", + ["ળ"] = "ḷ", + ["શ"] = "ś", + ["ષ"] = "ṣ", + ["સ"] = "s", + ["હ"] = "h", + ["ત઼"] = "t̰", + ["જ઼"] = "z", + ["ંઘ઼"] = "ng", + ["ડ઼"] = "ṛ", + ["ઢ઼"] = "ṛh", + ["ન઼"] = "ṉ", + ["ફ઼"] = "f", + + -- vowel diacritics + ["ા"] = "ā", + ["િ"] = "i", + ["ી"] = "ī", + ["ુ"] = "u", + ["ૂ"] = "ū", + ["ૃ"] = "ṛ", + ["ૄ"] = "ṝ", + ["ે"] = "e", + ["ૈ"] = "ai", + ["ો"] = "o", + ["ૌ"] = "au", + ["ૅ"] = "ɛ", + ["ૉ"] = "ɔ", + + -- vowel mātras + ["અ"] = "a", + ["આ"] = "ā", + ["ઇ"] = "i", + ["ઈ"] = "ī", + ["ઉ"] = "u", + ["ઊ"] = "ū", + ["ઋ"] = "ru", + ["ૠ"] = "ṝ", + ["એ"] = "e", + ["ઐ"] = "ai", + ["ઓ"] = "o", + ["ઔ"] = "au", + ["ઍ"] = "ɛ", + ["ઑ"] = "ɔ", + + -- chandrabindu + ["ઁ"] = "m̐", -- until a better method is found + + -- anusvara + ["ં"] = "ṃ", -- until a better method is found + + -- visarga + ["ઃ"] = "ḥ", + + -- virama + ["્"] = "", + + -- avagraha + ["ઽ"] = "’", + + -- numerals + ["૦"] = "0", + ["૧"] = "1", + ["૨"] = "2", + ["૩"] = "3", + ["૪"] = "4", + ["૫"] = "5", + ["૬"] = "6", + ["૭"] = "7", + ["૮"] = "8", + ["૯"] = "9", + + -- punctuation + ["।"] = ".", -- danda + ["+"] = "", -- compound separator + + -- om + ["ૐ"] = "OM" +} + +local nasal_assim = {["[kg]h?"] = "ṅ", ["[cj]h?"] = "ñ", ["[ṭḍṛ]h?"] = "ṇ", ["[td]h?"] = "n", ["[pb]h?"] = "m", ["n"] = "n", ["m"] = "m"} + +function export.tr(text, lang, sc) + local c = "([કખગઘઙચછજઝઞટઠડઢતથદધપફબભશષસયરલવહણનમ]઼?)" + local no_drop = "ય" + local final_no_drop = "યરલવહનમ" + local v = "([a્ાિીુૂેૈોૌૃૄૅૉ]ઁ?)" + local virama = "(્)" + local n = "(ં?)" + local nukta = "([તજઘડઢનફ]઼)" + + local can_drop = gsub(c, "[" .. no_drop .. "]", "") + local final_can_drop = gsub(c, "[" .. final_no_drop .. "]", "") + local no_virama = gsub(v, virama, "") + + text = text .. " " + + -- text = gsub(text,"(%S)"..c.."%2","%1ː%2") + + text = gsub(text, c, "%1a") + text = gsub(text, "a" .. v, "%1") + text = gsub(text, no_virama .. n .. can_drop .. "a ", "%1%2%3 ") -- ending + text = gsub(text, virama .. n .. final_can_drop .. "a ", "%1%2%3 ") -- ending + local pattern = no_virama .. n .. can_drop .. "a" .. c .. no_virama + while match(text, "(.*)" .. pattern) do text = gsub(text, "(.*)" .. pattern, "%1%2%3%4%5%6") end + + text = gsub(text, nukta, conv) + text = gsub(text, ".", conv) + + for key, val in pairs(nasal_assim) do text = gsub(text, "([aeiou])ṃ(" .. key .. ")", "%1" .. val .. "%2") end + + text = gsub(text, "([aiueēoāīū])ṃ", "%1̃") + + text = gsub(text, "ː(.)", "%1%1") + + text = gsub(text, " $", "") + + text = gsub(text, "ā̃tar", "āntar") + + text = gsub(text, "OM", "oṃ") + text = gsub(text, "a*%*a*", "a") + + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/guru-translit.lua b/wiktra/wikt/translit/guru-translit.lua new file mode 100644 index 0000000..d6febd4 --- /dev/null +++ b/wiktra/wikt/translit/guru-translit.lua @@ -0,0 +1,131 @@ +local export = {} + +local conv = { + -- consonants without nukta + ["ਸ"] = "s", + ["ਹ"] = "h", + ["ਕ"] = "k", + ["ਖ"] = "kh", + ["ਗ"] = "g", + ["ਘ"] = "gh", + ["ਙ"] = "ṅ", + ["ਚ"] = "c", + ["ਛ"] = "ch", + ["ਜ"] = "j", + ["ਝ"] = "jh", + ["ਞ"] = "ñ", + ["ਟ"] = "ṭ", + ["ਠ"] = "ṭh", + ["ਡ"] = "ḍ", + ["ਢ"] = "ḍh", + ["ਣ"] = "ṇ", + ["ਤ"] = "t", + ["ਥ"] = "th", + ["ਦ"] = "d", + ["ਧ"] = "dh", + ["ਨ"] = "n", + ["ਪ"] = "p", + ["ਫ"] = "ph", + ["ਬ"] = "b", + ["ਭ"] = "bh", + ["ਮ"] = "m", + ["ਯ"] = "y", + ["ਰ"] = "r", + ["ਲ"] = "l", + ["ਵ"] = "v", + ["ੜ"] = "ṛ", + + -- consonants with nukta + ["ਸ਼"] = "ś", + ["ਖ਼"] = "x", + ["ਗ਼"] = "ġ", + ["ਜ਼"] = "z", + ["ਫ਼"] = "f", + ["ਲ਼"] = "ḷ", + ["ਕ਼"] = "q", + ["ਡ਼"] = "ṛ", + + -- vowels + ["ਾ"] = "ā", + ["ਿ"] = "i", + ["ੀ"] = "ī", + ["ੁ"] = "u", + ["ੂ"] = "ū", + ["ੇ"] = "ē", + ["ੈ"] = "ai", + ["ੋ"] = "o", + ["ੌ"] = "au", + + -- other diacritics + ["ੰ"] = "N", -- ṭippi: nasalize + ["ਂ"] = "N", -- bindi: nasalize + ["ੱ"] = "ː", -- addak: geminate + ["੍"] = "", -- halant, supresses the inherent vowel "a" + ["ਃ"] = "h", -- voiceless "h" sound (tone raiser) + + -- independent vowels + ["ਅ"] = "a", + ["ਆ"] = "ā", + ["ਇ"] = "i", + ["ਈ"] = "ī", + ["ਉ"] = "u", + ["ਊ"] = "ū", + ["ਏ"] = "ē", + ["ਐ"] = "ai", + ["ਓ"] = "o", + ["ਔ"] = "ō", + + -- digits + ["੦"] = "0", + ["੧"] = "1", + ["੨"] = "2", + ["੩"] = "3", + ["੪"] = "4", + ["੫"] = "5", + ["੬"] = "6", + ["੭"] = "7", + ["੮"] = "8", + ["੯"] = "9" +} + +local nasal_assim = {["[kg]h?"] = "ṅ", ["[cj]h?"] = "ñ", ["[ṭḍ]h?"] = "ṇ", ["[td]h?"] = "n", ["[pb]h?"] = "m", ["n"] = "n", ["m"] = "m", ["s"] = "n"} + +-- translit any words or phrases +function export.tr(text, lang, sc) + local c = "([ਸਹਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮਯਰਲਵੜː]਼?)" + local y = "ਯ" + local v = "([aਾਿੀੁੂੇੈੋੌ੍])" + local virama = "੍" + local n = "([ੰਂ]?)" + local nukta = "([ਸਖਗਜਫਲਕਡ]਼)" + + local can_drop = mw.ustring.gsub(c, y, "") + local no_virama = mw.ustring.gsub(v, virama, "") + + text = text .. " " + + text = mw.ustring.gsub(text, c, "%1a") + text = mw.ustring.gsub(text, "a" .. v, "%1") + -- mw.log(text) + text = mw.ustring.gsub(text, v .. n .. can_drop .. "a ", "%1%2%3 ") -- ending + -- mw.log(text) + text = mw.ustring.gsub(text, v .. n .. can_drop .. "a" .. c .. v, "%1%2%3%4%5") + -- mw.log(text) + + text = mw.ustring.gsub(text, nukta, conv) + text = mw.ustring.gsub(text, ".", conv) + + for key, val in pairs(nasal_assim) do text = mw.ustring.gsub(text, "N(" .. key .. ")", val .. "%1") end + text = mw.ustring.gsub(text, "([aiuēaioāīū])N ", "%1̃ ") + text = mw.ustring.gsub(text, "(.?)N", "%1̃") + + text = mw.ustring.gsub(text, "ː(.)", "%1%1") + + text = mw.ustring.gsub(text, " ?।", ".") + + text = mw.ustring.gsub(text, " $", "") + + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/he-translit.lua b/wiktra/wikt/translit/he-translit.lua new file mode 100644 index 0000000..b6855b3 --- /dev/null +++ b/wiktra/wikt/translit/he-translit.lua @@ -0,0 +1,214 @@ +local export = {} +local U = mw.ustring.char +local gsub = mw.ustring.gsub + +local sheva = U(0x05B0) +local hataf_segol = U(0x05B1) +local hataf_patah = U(0x05B2) +local hataf_qamats = U(0x05B3) +local hiriq = U(0x05B4) +local tsere = U(0x05B5) +local segol = U(0x05B6) +local patah = U(0x05B7) +local qamats = U(0x05B8) +local qamats_qatan = U(0x05C7) +local holam = U(0x05B9) +local holam_haser_for_waw = U(0x05BA) +local qubuts = U(0x05BB) +local dagesh_mappiq = U(0x05BC) +local shin_dot = U(0x05C1) +local sin_dot = U(0x05C2) + +local macron_above = U(0x0304) +local macron_below = U(0x0331) +local macron = "[" .. macron_above .. macron_below .. "]" + +local alef = "א" +local he = "ה" +local waw = "ו" +local yod = "י" +local vowel_letters = alef .. he .. waw .. yod +local vowel_letter = "[" .. vowel_letters .. "]" + +-- '0' represents silent sheva +local vowel_points = (sheva .. hataf_segol .. hataf_patah .. hataf_qamats .. hiriq .. tsere .. segol .. patah .. qamats .. qamats_qatan .. holam .. qubuts .. "0" .. holam_haser_for_waw) +local vowel_point = "[" .. vowel_points .. "]" +local short_vowels = segol .. patah .. hiriq .. qubuts .. qamats_qatan +local short_vowel = "[" .. short_vowels .. "]" + +local shuruq = waw .. dagesh_mappiq +local holam_male = waw .. holam + +-- use dummies characters that do not match as punctuation +-- the dummy letter stands in for final silent alef or he, or for the hiatus before a furtive patah, +-- or comes before a pre-transliterated waw to aid in matching +local dummy_letter = U(0x0627) -- ARABIC LETTER ALEF +local dummy_geresh = U(0x064E) -- ARABIC FATHA +local dummy_gershayim = U(0x064B) -- ARABIC FATHATAN +local real_geresh = "׳" +local real_gershayim = "״" +local letter_modifier = "[" .. shin_dot .. sin_dot .. "]?[" .. dummy_geresh .. dummy_gershayim .. "]?" +local letters = "אבגדהוזחטיכךלמםנןסעפףצץקרשת" +local letter = "[" .. letters .. dummy_letter .. "]" .. letter_modifier +local letter_not_waw = "[אבגדהזחטיכךלמםנןסעפףצץקרשת" .. dummy_letter .. "]" .. letter_modifier +local gutturals = "אהחע" +local guttural = "[" .. gutturals .. "]" + +local vowel_letter_or_geresh = "[" .. vowel_letters .. dummy_geresh .. dummy_gershayim .. "]" + +-- note, the geresh and gershayim are included in this, which is why dummies are used in their place +local word_break_chars = "%s%p" +local word_break = "[" .. word_break_chars .. "]" +local word_start = "%f[^" .. word_break_chars .. "]" -- matches the boundary but not the actual word break characters +local word_end = "%f[" .. word_break_chars .. "]" -- matches the boundary but not the actual word break characters + +local tr_vowels = "aeiouāēīōūəăĕŏ0" + +local biblical_to_modern = {["ʾ"] = "'", ["b" .. macron_below] = "v", ["g" .. macron_above] = "g", ["d" .. macron_below] = "d", ["w"] = "v", ["ž"] = "zh", ["ḥ"] = "kh", ["ṭ"] = "t", ["k" .. macron_below] = "kh", ["ʿ"] = "'", ["p" .. macron_above] = "f", ["ṣ"] = "ts", ["č"] = "ch", ["q"] = "k", ["š"] = "sh", ["ś"] = "s", ["t" .. macron_below] = "t", ["ə"] = "'", ["ĕ"] = "e", ["ă"] = "a", ["ŏ"] = "o", ["ī"] = "i", ["ē"] = "e", ["ā"] = "a", ["ō"] = "o", ["ū"] = "u"} + +-- helper function to remove vowel letters but keep gereshes +local function gereshes(str) return gsub(str, vowel_letter, "") end + +local biblical = { + { + -- replace geresh and gershayim with their dummy equivalents so that they won't match as word boundaries + [real_geresh] = dummy_geresh, + [real_gershayim] = dummy_gershayim + }, { + -- The default order is: consonant, vowel point, dagesh or mappiq, shin or sin dot. + -- The desired order is: consonant, shin or sin dot, dagesh or mappiq, vowel point. + -- Also, move geresh and gershayim closer to the letter for easier handling (will be moved back later if not actually a modifier) + ["([" .. letters .. "])(" .. vowel_point .. "*)(" .. dagesh_mappiq .. "*)([" .. shin_dot .. sin_dot .. "]*)([" .. dummy_geresh .. dummy_gershayim .. "]*)"] = "%1%4%5%3%2" + }, { + -- special case: change qamats in כל to qamats qatan + -- the problem is that כל might be preceded by prefixed clitics, which maybe be chained indefinitely, + -- while other unrelated words might happen to end in כל with a qamats gadol; therefore, match either + -- the entire word or only when preceded by a precisely recognized prefix + [word_start .. "(כ" .. dagesh_mappiq .. "?)" .. qamats .. "(ל)" .. word_end] = "%1" .. qamats_qatan .. "%2", + ["([הבכל]" .. dagesh_mappiq .. "?" .. patah .. "כ" .. dagesh_mappiq .. ")" .. qamats .. "(ל)" .. word_end] = "%1" .. qamats_qatan .. "%2", + ["(מ" .. dagesh_mappiq .. "?" .. hiriq .. "כ" .. dagesh_mappiq .. ")" .. qamats .. "(ל)" .. word_end] = "%1" .. qamats_qatan .. "%2", + ["(ש" .. shin_dot .. dagesh_mappiq .. "?[" .. segol .. patah .. "]כ" .. dagesh_mappiq .. ")" .. qamats .. "(ל)" .. word_end] = "%1" .. qamats_qatan .. "%2", -- patah is very archaic + ["([ובכלד]" .. dagesh_mappiq .. "?" .. sheva .. "כ)" .. qamats .. "(ל)" .. word_end] = "%1" .. qamats_qatan .. "%2" + }, { + -- remove final alef and he, but only when preceded by a vowel + ["(" .. vowel_point .. vowel_letter_or_geresh .. "*)[" .. alef .. he .. "]" .. word_end] = "%1" .. dummy_letter, + ["(" .. shuruq .. vowel_letter_or_geresh .. "*)[" .. alef .. he .. "]" .. word_end] = "%1" .. dummy_letter + }, { + -- these are the cases, other than the above, where a final letter should be ignored + [hiriq .. vowel_letter_or_geresh .. "-[" .. yod .. dummy_letter .. "]" .. word_end] = "ī", + ["([" .. tsere .. segol .. "])" .. vowel_letter_or_geresh .. "-[" .. yod .. "]" .. word_end] = "%1", + ["([" .. holam .. qubuts .. "])" .. vowel_letter_or_geresh .. "-[" .. waw .. "]" .. word_end] = "%1" + }, { + [sheva .. "(" .. letter .. ")" .. sheva] = "0%1" .. sheva, -- two shevas in a row + ["(" .. short_vowel .. letter .. ")" .. sheva] = "%10", -- after a short vowel, assume(!) a silent sheva + ["(" .. guttural .. ")" .. sheva] = "%10", -- gutturals cannot have a vocal sheva + + ["(" .. vowel_point .. ")" .. shuruq] = "%1" .. dummy_letter .. "ww", -- when waw + dagesh is not a shuruq + ["(" .. vowel_point .. vowel_letter_or_geresh .. "-)" .. shuruq .. "(" .. vowel_letter_or_geresh .. "-" .. vowel_point .. ")"] = "%1" .. dummy_letter .. "ww%2", -- when waw + dagesh is not a shuruq + ["(" .. vowel_point .. ")" .. holam_male] = "%1" .. dummy_letter .. "w" .. holam, -- when waw + holam is not a holam male + + ["([" .. alef .. he .. "])" .. dagesh_mappiq] = "%1" -- handle mappiq (very rarely occurs on an alef) + }, { + [shuruq .. shuruq] = shuruq .. "ww", -- another potential case when waw + dagesh is not a shuruq + [shuruq .. holam_male] = shuruq .. "w" .. holam, -- another potential case when waw + holam is not a holam male + + -- tentatively lengthen hiriqs with vowel letters + [hiriq .. "(" .. vowel_letter_or_geresh .. "+)(" .. letter .. ")"] = function(vlg, l) return "ī" .. gereshes(vlg) .. l end, + + -- rearrange furtive patach (mappiq should already have been removed, but handle it just in case) + ["(" .. guttural .. dagesh_mappiq .. "?)" .. patah .. word_end] = dummy_letter .. "a%1" + }, { + -- remove vowel letters + ["(" .. letter .. ")(" .. vowel_letter_or_geresh .. "+)" .. shuruq] = function(l, vlg) return l .. gereshes(vlg) .. shuruq end, + [shuruq .. "(" .. vowel_letter_or_geresh .. "+)" .. "(" .. letter_not_waw .. ")"] = function(vlg, l) return shuruq .. gereshes(vlg) .. l end, + [shuruq .. "(" .. vowel_letter_or_geresh .. "+)" .. "(" .. waw .. "[^" .. holam .. dagesh_mappiq .. "])"] = function(vlg, l) return shuruq .. gereshes(vlg) .. l end, + ["(" .. vowel_point .. ")" .. "(" .. vowel_letter_or_geresh .. "+)" .. "(" .. letter_not_waw .. ")"] = function(vp, vlg, l) return vp .. gereshes(vlg) .. l end, + ["(" .. vowel_point .. ")" .. "(" .. vowel_letter_or_geresh .. "+)" .. "(" .. waw .. "[^" .. holam .. dagesh_mappiq .. "])"] = function(vp, vlg, l) return vp .. gereshes(vlg) .. l end + }, { + -- handle two-character combinations first + ["ג" .. dummy_geresh] = "j", + ["ז" .. dummy_geresh] = "ž", + ["[צץ]" .. dummy_geresh] = "č", + ["ש" .. shin_dot] = "š", + ["ש" .. sin_dot] = "ś" + }, {["א"] = "ʾ", ["ב"] = "b" .. macron_below, ["ג"] = "g" .. macron_above, ["ד"] = "d" .. macron_below, ["ה"] = "h", ["ז"] = "z", ["ח"] = "ḥ", ["ט"] = "ṭ", ["י"] = "y", ["[כך]"] = "k" .. macron_below, ["ל"] = "l", ["[מם]"] = "m", ["[נן]"] = "n", ["ס"] = "s", ["ע"] = "ʿ", ["[פף]"] = "p" .. macron_above, ["[צץ]"] = "ṣ", ["ק"] = "q", ["ר"] = "r", ["ת"] = "t" .. macron_below}, { + [word_start .. "([bgdkptj])" .. macron .. "?" .. dagesh_mappiq] = "%1", -- assume(!) dagesh qal at the beginning of a word + ["[0" .. sheva .. "]([bgdkptj])" .. macron .. "?" .. dagesh_mappiq] = "0%1", -- dagesh qal after sheva, and assume(!) silent sheva + ["(%l)0%1"] = "%1" .. sheva .. "%1", -- vocal sheva between identical consonants + [shuruq] = "ū" + }, { + -- restore geresh and gershayim order + ["([" .. dummy_geresh .. dummy_gershayim .. "])(" .. dagesh_mappiq .. "*)(" .. vowel_point .. "*)"] = "%2%3%1" + }, { + -- handle ירושלם + [hiriq .. patah] = "ayi", -- in this case, the vowels are reversed by Unicode normalization rules + [patah .. hiriq] = "ayi", -- just in case they're in the correct order + [hiriq .. qamats] = "āyi", -- pausal form of above + [qamats .. hiriq] = "āyi", -- as above + -- handle ירושלמה + ["[0" .. sheva .. "]" .. patah] = "ay", -- in this case, the vowels are reversed by Unicode normalization rules + [patah .. "[0" .. sheva .. "]"] = "ay", -- just in case they're in the correct order + ["[0" .. sheva .. "]" .. qamats] = "āy", -- pausal form of above + [qamats .. "[0" .. sheva .. "]"] = "āy" -- as above + }, {[sheva] = "ə", [hataf_segol] = "ĕ", [hataf_patah] = "ă", [hataf_qamats] = "ŏ", [hiriq] = "i", [tsere] = "ē", [segol] = "e", [patah] = "a", [qamats] = "ā", [qamats_qatan] = "o", [qubuts] = "u", [shin_dot] = "", [sin_dot] = "", [holam_male] = "ō", [waw .. holam_haser_for_waw] = "wō"}, { + ["(.)" .. macron .. "?" .. dagesh_mappiq] = "%1%1" -- gemination + }, { + ["(śśā)[שś](k" .. macron_below .. ")"] = "%1%2" -- special case for יששכר + }, { + ["ā(%l" .. macron .. "?0)"] = "o%1", -- assume(!) qamats qatan before silent sheva + + [holam] = "ō", + ["ו"] = "w", + ["ש"] = "š" -- assume(!) shin if no shin or sin dot + }, { + -- handle bgdkpt letters in unvocalized words (such as acronyms) + [word_start .. "([^" .. tr_vowels .. "]-[bgdkpt]" .. macron .. "[^" .. tr_vowels .. "]-)" .. word_end] = function(w) return gsub(w, "([bgdkpt])" .. macron, "%1") end + }, { + ["[0" .. dummy_letter .. "]"] = "", + + -- short vowels in non-final closed syllables (this rule should be expanded) + ["ū(%l)%1"] = "u%1%1", + ["ī(%l)%1"] = "i%1%1" + }, { + ["ə" .. word_end] = "", -- final sheva is always silent + + [dummy_geresh] = "′", + [dummy_gershayim] = "″", + ["׃"] = ".", -- sof pasuq + ["־"] = "-" -- maqaf + } +} + +function export.tr(text, lang, sc) + -- default to modern for Hebrew, but not for other languages, such as Aramaic + local modern = lang == "he" + return export.biblical(text, modern) +end + +function export.biblical(text, modern) + -- decompose + text = mw.ustring.toNFD(text) + + -- wrap with spaces to make initial and final replacements easier + text = " " .. text .. " " + + for _, replacements in ipairs(biblical) do for regex, replacement in pairs(replacements) do text = gsub(text, regex, replacement) end end + + -- unwrap spaces + text = mw.ustring.match(text, "^ (.*) $") + if text == nil then error("Something went wrong, wrapped spaces were deleted.") end + + -- must happen before recomposition + if modern then + text = gsub(text, "([%lʾʿ])%1", "%1") + text = gsub(text, "[%lʾʿ]" .. macron .. "?", function(x) return biblical_to_modern[x] or x end) + text = gsub(text, "''", "'") + end + + -- recompose + text = mw.ustring.toNFC(text) + + return text +end + +return export diff --git a/wiktra/wikt/translit/hi-translit.lua b/wiktra/wikt/translit/hi-translit.lua new file mode 100644 index 0000000..83cbba1 --- /dev/null +++ b/wiktra/wikt/translit/hi-translit.lua @@ -0,0 +1,184 @@ +-- Transliteration for Hindi (possibly other languages using Devanagari script, except for Sanskrit) +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["क"] = "k", + ["ख"] = "kh", + ["ग"] = "g", + ["घ"] = "gh", + ["ङ"] = "ṅ", + ["च"] = "c", + ["छ"] = "ch", + ["ज"] = "j", + ["झ"] = "jh", + ["ञ"] = "ñ", + ["ट"] = "ṭ", + ["ठ"] = "ṭh", + ["ड"] = "ḍ", + ["ढ"] = "ḍh", + ["ण"] = "ṇ", + ["त"] = "t", + ["थ"] = "th", + ["द"] = "d", + ["ध"] = "dh", + ["न"] = "n", + ["प"] = "p", + ["फ"] = "ph", + ["ब"] = "b", + ["भ"] = "bh", + ["म"] = "m", + ["य"] = "y", + ["र"] = "r", + ["ल"] = "l", + ["व"] = "v", + ["ळ"] = "ḷ", + ["श"] = "ś", + ["ष"] = "ṣ", + ["स"] = "s", + ["ह"] = "h", + ["क़"] = "q", + ["ख़"] = "x", + ["ग़"] = "ġ", + ["ऴ"] = "ḻ", + ["ज़"] = "z", + ["ष़"] = "ḻ", + ["झ़"] = "ž", + ["ड़"] = "ṛ", + ["ढ़"] = "ṛh", + ["फ़"] = "f", + ["थ़"] = "θ", + ["ऩ"] = "ṉ", + ["ऱ"] = "ṟ", + -- ['ज्ञ'] = 'gy', + + -- vowel diacritics + ["ि"] = "i", + ["ु"] = "u", + ["े"] = "e", + ["ो"] = "o", + ["ॊ"] = "ǒ", + ["ॆ"] = "ě", + ["ा"] = "ā", + ["ी"] = "ī", + ["ू"] = "ū", + ["ृ"] = "ŕ", + ["ै"] = "ai", + ["ौ"] = "au", + ["ॉ"] = "ŏ", + ["ॅ"] = "ĕ", + + -- vowel signs + ["अ"] = "a", + ["इ"] = "i", + ["उ"] = "u", + ["ए"] = "e", + ["ओ"] = "o", + ["आ"] = "ā", + ["ई"] = "ī", + ["ऊ"] = "ū", + ["ऎ"] = "ě", + ["ऒ"] = "ǒ", + ["ऋ"] = "ŕ", + ["ऐ"] = "ai", + ["औ"] = "au", + ["ऑ"] = "ŏ", + ["ऍ"] = "ĕ", + + ["ॐ"] = "om", + + -- chandrabindu + ["ँ"] = "̃", + + -- anusvara + ["ं"] = "̃", + + -- visarga + ["ः"] = "ḥ", + + -- virama + ["्"] = "", + + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + + -- punctuation + ["।"] = ".", -- danda + ["॥"] = ".", -- double danda + ["+"] = "", -- compound separator + + -- abbreviation sign + ["॰"] = "." +} + +local nasal_assim_short = {["क"] = "ङ", ["ख"] = "ङ", ["ग"] = "ङ", ["घ"] = "ङ", ["ङ"] = "ङ", ["च"] = "ञ", ["छ"] = "ञ", ["ज"] = "ञ", ["झ"] = "ञ", ["ञ"] = "ञ", ["ट"] = "ण", ["ठ"] = "ण", ["ड"] = "ण", ["ढ"] = "ण", ["ण"] = "ण", ["त"] = "न", ["थ"] = "न", ["द"] = "न", ["ध"] = "न", ["न"] = "न", ["प"] = "म", ["फ"] = "म", ["ब"] = "म", ["भ"] = "म", ["म"] = "म", ["व"] = "म", ["य"] = "ँ", ["ष"] = "न", ["श"] = "न", ["स"] = "न", ["ह"] = "ँ"} + +local nasal_assim_long = {["क"] = "ँ", ["ख"] = "ँ", ["ग"] = "ङ", ["घ"] = "ङ", ["ङ"] = "ङ", ["च"] = "ँ", ["छ"] = "ञ्", ["ज"] = "ञ", ["झ"] = "ञ", ["ञ"] = "ञ", ["ट"] = "ँ", ["ठ"] = "ँ", ["ड"] = "ण", ["ढ"] = "ण", ["ण"] = "ण", ["त"] = "न", ["थ"] = "न", ["द"] = "न", ["ध"] = "न", ["न"] = "न", ["प"] = "ँ", ["फ"] = "ँ", ["ब"] = "म", ["भ"] = "म", ["म"] = "म", ["ह"] = "ँ"} + +-- These clusters when occurring word-finally will not trigger a schwa added +-- after them even though the second consonant is in special_cons, which normally +-- causes the extra schwa to be added. NOTE: The clusters are reversed from their +-- ultimate effect, e.g. the first cluster is written 'ml' but actually applies +-- to words ending in 'lm'. The clusters below overall refer to the six clusters +-- describable by [rl][mnv], i.e. rm, rn, rv, lm, ln, lv. +local perm_cl = {["म्ल"] = true, ["व्ल"] = true, ["न्ल"] = true, ["म्र"] = true, ["व्र"] = true, ["न्र"] = true} + +local all_cons, special_cons = "कखगघङचछजझञटठडढतथदधपफबभशषसयरलवहणनम", "यरलवहनमञ" +local vowel, vowel_sign = "*aिुृेोाीूैौॉॅॆॊ'", "अइउएओआईऊऋऐऔऑऍ'" +local long_vowel, short_vowel = "ाीूआईऊ", "*aिुृॉॅॆॊअइउऋऑऍोैौेओऔएऐ'" +local syncope_pattern = "([" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])a(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = length, 1, -1 do table.insert(result, mw.ustring.sub(text, i, i)) end + return table.concat(result) +end + +function export.tr(text, lang, sc) + -- force word-final anusvara to behave as a pure nasal + text = gsub(text, "ं$", "ँ") + text = gsub(text, "ं ", "ँ ") + text = gsub(text, "ं%-", "ँ-") + -- abbreviation dot + text = gsub(text, "॰", ".") + text = gsub(text, "([" .. all_cons .. "]़?)([" .. vowel .. "्]?)", function(c, d) return c .. (d == "" and "a" or d) end) + for word in mw.ustring.gmatch(text, "[ऀ-ॿa*]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^a(़?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) return (((match(first, "[" .. special_cons .. "]") and match(second, "्") and not perm_cl[first .. second .. third]) or match(first .. second, "य[ी]")) and "a" or "") .. opt .. first .. second .. third end) + while match(word, syncope_pattern) do word = gsub(word, syncope_pattern, "%1%2%3%4") end + word = rev_string(word) + -- sometimes chandrabindu != anusvara + word = gsub(word, "([" .. short_vowel .. long_vowel .. "])ं([सशषवय])", function(prev, succ) return prev .. (nasal_assim_short[succ] or "̃") .. succ end) + word = gsub(word, "([" .. short_vowel .. long_vowel .. "])ँ([सशषवय])", function(prev, succ) return prev .. "̃" .. succ end) + word = gsub(word, "([" .. short_vowel .. long_vowel .. "])ं([तदडपछ])", function(prev, succ) return prev .. (nasal_assim_short[succ] or "̃") .. succ end) + word = gsub(word, "([" .. short_vowel .. long_vowel .. "])ँ([तदडपछ])", function(prev, succ) return prev .. "̃" .. succ end) + -- force chandrabindu to behave as anusvara + word = gsub(word, "ँ", "ं") + word = gsub(word, "([" .. short_vowel .. "])ं(.़?)", function(prev, succ) return prev .. (nasal_assim_short[succ] or "̃") .. succ end) + word = gsub(word, "([" .. long_vowel .. "])ं(.़?)", function(prev, succ) return prev .. (nasal_assim_long[succ] or "̃") .. succ end) + -- Convert * to %* so we can match it in a regex. + local escaped_orig_word = gsub(orig_word, "%*", "%*") + text = gsub(text, escaped_orig_word, word) + end + text = gsub(text, ".़?", conv) + text = gsub(text, "a([iu])̃", "a͠%1") + text = gsub(text, "jñ", "gy") + text = gsub(text, "ñz", "nz") + text = gsub(text, "ṇṛ", "nz") + text = gsub(text, "%*", "a") + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/hit-translit.lua b/wiktra/wikt/translit/hit-translit.lua new file mode 100644 index 0000000..14cf54a --- /dev/null +++ b/wiktra/wikt/translit/hit-translit.lua @@ -0,0 +1,303 @@ +local export = {} + +local bit32 = require("bit32") +local m_table = require("table") +local m_tag = require("hit-translit/tag") +local sign_list = mw.loadData("hit-translit/data") + +local ulen = mw.ustring.len +local usub = mw.ustring.sub + +local segments = { + -- vowels + ["a"] = "a", + ["á"] = "a", + ["à"] = "a", + ["e"] = "e", + ["é"] = "e", + ["è"] = "e", + ["i"] = "i", + ["í"] = "i", + ["ì"] = "i", + ["u"] = "u", + ["ú"] = "u", + ["ù"] = "u", + + -- consonants with voicing alternaternates + ["b"] = "b", + ["p"] = "p", + ["d"] = "d", + ["t"] = "t", + ["g"] = "g", + ["k"] = "k", + ["q"] = "q", + + -- single consonants + ["ḫ"] = "h", + ["r"] = "r", + ["l"] = "l", + ["m"] = "m", + ["n"] = "n", + ["š"] = "s", + ["z"] = "z", + ["y"] = "y", + ["w"] = "w", + + -- numbers + ["0"] = "0", + ["1"] = "1", + ["2"] = "2", + ["3"] = "3", + ["4"] = "4", + ["5"] = "5", + ["6"] = "6", + ["7"] = "7", + ["8"] = "8", + ["9"] = "9" +} + +--[=[ +-- Set up bit array to for marking which onsets and codas are available for ambiguous characters +]=] +local sort_order = { + -- vowels + ["a"] = 2 ^ 0, + ["i"] = 2 ^ 1, -- I've chosen "i" over "e" + ["e"] = 2 ^ 2, + ["u"] = 2 ^ 3, + + -- consonants with voicing alternaternates + ["p"] = 2 ^ 4, + ["b"] = 2 ^ 5, + ["t"] = 2 ^ 6, + ["d"] = 2 ^ 7, + ["k"] = 2 ^ 8, + ["g"] = 2 ^ 9, + ["q"] = 2 ^ 10, + + -- single consonants + ["h"] = 2 ^ 11, + ["r"] = 2 ^ 12, + ["l"] = 2 ^ 13, + ["m"] = 2 ^ 14, + ["n"] = 2 ^ 15, + ["s"] = 2 ^ 16, + ["z"] = 2 ^ 17, + ["y"] = 2 ^ 18, + ["w"] = 2 ^ 19, + + -- numbers + ["0"] = 2 ^ 20, + ["1"] = 2 ^ 21, + ["2"] = 2 ^ 22, + ["3"] = 2 ^ 23, + ["4"] = 2 ^ 24, + ["5"] = 2 ^ 25, + ["6"] = 2 ^ 26, + ["7"] = 2 ^ 27, + ["8"] = 2 ^ 28, + ["9"] = 2 ^ 29 +} + +local function inplace_multikey_sort(t) + -- Sorts a table inplace by the onset and then coda + table.sort(t, function(a, b) + if a.o ~= b.o then return sort_order[a.o] < sort_order[b.o] end + + return sort_order[a.c] < sort_order[b.c] + end) + return t +end + +local function find_seg(syl, rev) + -- [=[ + -- A helper function that iterates forwards or backwards (if "rev" is set) + -- in order to find the first phonetic segment and return the normalized + -- form of that segment. Thus: + -- find_seg("šaq") gives "s" + -- find_seg("luḫ", true) gives "h" + -- -- ]=] + local f + for i = 1, ulen(syl) do + f = usub(syl, rev and -i or i, rev and -i or i) + if segments[f] then -- + return segments[f] + end + end + error("Could not find a " .. (rev and "coda" or "onset") .. " for the syllable \"" .. syl .. "\".") +end + +function export.find_onset(syl) + -- [=[ + -- Find the normalized onset character of a syllable + -- -- ]=] + return find_seg(syl) +end + +function export.find_coda(syl) + -- [=[ + -- Find the normalized coda character of a syllable + -- -- ]=] + return find_seg(syl, true) +end + +function export.hash_sign(sign) + -- [=[ + -- Turn the list of Hittite syllables into a list of list containing: + -- The syllable + -- The normalized onset character of the syllable + -- The normalized coda character of the syllable + -- And add a hashes of the onsets and codas in the syllables. Thus + -- { "it", "id", "et", "ed", hit = true } + -- becomes: + -- { + -- { "it", o = "i", c = "t" }, + -- { "id", o = "i", c = "d" }, + -- { "et", o = "e", c = "t" }, + -- { "ed", o = "e", c = "d" }, + -- o_hash = 6, c_hash = 192, hit = true + -- } + -- -- ]=] + sign.o_hash, sign.c_hash = 0, 0 -- init onset and coda hashes for signs + for i, syl in ipairs(sign) do + sign[i] = {syl, o = export.find_onset(syl), c = export.find_coda(syl)} + sign.o_hash = bit32.bor(sign.o_hash, sort_order[sign[i].o]) + sign.c_hash = bit32.bor(sign.c_hash, sort_order[sign[i].c]) + end +end + +function export.copy_sign(sign) + -- copy, sort, and set up new sign + local new = m_table.deepcopy(sign_list[sign], true) + if new.hit then -- has Hittite signs + export.hash_sign(new) + inplace_multikey_sort(new) + end + + return new +end + +local function remove_syls(first, second, mask) + -- [=[ + -- For two adjacent sets of Hittite syllables and a mask of their shared characters, + -- go through each one and remove the unnecessary values, and update the hashes. + -- -- ]=] + local new_o_hash, new_c_hash, new_first, new_second = 0, 0, {hit = true}, {hit = true} + for _, syl in ipairs(first) do + if bit32.band(sort_order[syl.c], mask) > 0 then + table.insert(new_first, syl) + new_o_hash = bit32.bor(new_o_hash, sort_order[syl.o]) -- unnecessary, but useful for tracking + end + end + new_first.o_hash = new_o_hash + new_first.c_hash = mask + + for _, syl in ipairs(second) do + if bit32.band(sort_order[syl.o], mask) > 0 then + table.insert(new_second, syl) + new_c_hash = bit32.bor(new_c_hash, sort_order[syl.c]) + end + end + new_second.o_hash = mask + new_second.c_hash = new_c_hash + + return new_first, new_second +end + +local related_character_masks = { + -- voicing alternates + bit32.bor(sort_order["p"], sort_order["b"]), bit32.bor(sort_order["t"], sort_order["d"]), bit32.bor(sort_order["k"], sort_order["g"], sort_order["q"]), -- "u" patterns next to "w" + bit32.bor(sort_order["u"], sort_order["w"]), -- numbers pattern together + bit32.bor(sort_order["0"], sort_order["1"], sort_order["2"], sort_order["3"], sort_order["4"], sort_order["5"], sort_order["6"], sort_order["7"], sort_order["8"], sort_order["9"]) +} + +local function approx_match(first_hash, second_hash) + -- [=[ + -- Builds a bit mask for all approximate matches like "p" and "b", or "t" and "d". + -- -- ]=] + local new_mask = 0 + for _, mask in ipairs(related_character_masks) do if bit32.band(mask, first_hash) > 0 and bit32.band(mask, second_hash) > 0 then new_mask = bit32.bor(new_mask, mask) end end + return new_mask +end + +function export.fit_signs(first, second) + -- [=[ + -- Takes two adjacent signs and removes unlikely Hittite syllables. + -- -- ]=] + if first and second then -- two signs + if first.hit and second.hit then -- both have Hittite syllables + local match_mask = bit32.band(first.c_hash, second.o_hash) + if match_mask > 0 then -- there are matching chars in each + return remove_syls(first, second, match_mask) + end + + match_mask = approx_match(first.c_hash, second.o_hash) + if match_mask > 0 then -- there are approxiamtely matching chars in each + return remove_syls(first, second, match_mask) + end + end + elseif first then -- final sign + -- nothing yet + else -- initial sing + -- nothing yet + end + return first, second +end + +local function assemble_word(signs) + -- [=[ + -- Choose all the signs, tag when appropriate, then concatenate + -- -- ]=] + local word = {} + for _, sign in ipairs(signs) do + if sign.hit then -- If Hittite, take first sign + table.insert(word, sign[1][1]) + elseif sign.sum then -- If Sumerogram, take and tag first sign + table.insert(word, m_tag.tag_sumerogram(sign[1])) + elseif sign.akk then -- If Akkadogram, take and tag first sign + table.insert(word, m_tag.tag_akkadogram(sign[1])) + elseif sign.hurr then -- If Hurrian, take and tag first sign + table.insert(word, m_tag.tag_hurrian_tr(sign[1])) + elseif sign.hatt then -- If Hattic, take and tag first sign + table.insert(word, m_tag.tag_hattic_tr(sign[1])) + elseif sign.glossenkeil then -- If Glossenkeil, display it + table.insert(word, m_tag.glossenkeil()) + end + end + + return table.concat(word, "-") +end + +function export.transpose(text) + -- [=[ + -- Takes a continuous Cuneiform string and converts it to transliteration + -- -- ]=] + local signs = {} + while ulen(text) > 0 do + if sign_list[usub(text, 1, 3)] then + table.insert(signs, export.copy_sign(usub(text, 1, 3))) -- add in new sign + text = usub(text, 4) -- truncate string + elseif sign_list[usub(text, 1, 2)] then + table.insert(signs, export.copy_sign(usub(text, 1, 2))) + text = usub(text, 3) + elseif sign_list[usub(text, 1, 1)] then + table.insert(signs, export.copy_sign(usub(text, 1, 1))) + text = usub(text, 2) + end + signs[#signs - 1], signs[#signs] = export.fit_signs(signs[#signs - 1], signs[#signs]) -- fit two signs + end + + signs[#signs] = export.fit_signs(signs[#signs], nil) -- fit end of word + + return assemble_word(signs) +end + +function export.tr(text, lang, sc) + if sc ~= "Xsux" then return nil end + + text = mw.ustring.gsub(text, "[𒀀-𒑱]+", export.transpose) + + return m_tag.tag_hittite_tr(text) +end + +return export diff --git a/wiktra/wikt/translit/hit-translit/data.lua b/wiktra/wikt/translit/hit-translit/data.lua new file mode 100644 index 0000000..6d50cb0 --- /dev/null +++ b/wiktra/wikt/translit/hit-translit/data.lua @@ -0,0 +1,370 @@ +local export = { + ["𒀸"] = {"aš", hit = true}, -- HZL 1 + ["𒄬"] = {"ḫal", hit = true}, -- HZL 2 + ["𒉾"] = {"PÉŠ", sum = true}, -- HZL 3 + ["𒁄"] = {"pal", "bal", hit = true}, -- HZL 4 + ["𒋓"] = {"šir", hit = true}, -- HZL 5 + ["𒄈"] = {"GÍR", "UL₄", sum = true}, -- HZL 6 + ["𒋻"] = {"tar", "ḫaš", hit = true}, -- HZL 7 + ["𒀭"] = {"an", hit = true}, -- HZL 8 + ["𒀳"] = {"APIN", "ENGAR", sum = true}, -- HZL 9 + ["𒈤"] = {"maḫ", hit = true}, -- HZL 10 + ["𒉡"] = {"nu", hit = true}, -- HZL 11 + ["𒆰"] = {"kul", hit = true}, -- HZL 12 + ["𒁁"] = {"pát", "pád", "pít", "píd", "pè", "pì", hit = true}, -- HZL 13 + ["𒁴"] = {"dim", "tim", hit = true}, -- HZL 14 + ["𒈾"] = {"na", hit = true}, -- HZL 15 + ["𒀴"] = {"ÀR", "ARAD", "NÍTA", sum = true}, -- HZL 16 + ["𒈬"] = {"mu", hit = true}, -- HZL 17 + ["𒁵"] = {"MUN", sum = true}, -- HZL 18 + ["𒈜"] = {"NAR", sum = true}, -- HZL 19 + ["𒈦"] = {"pár", "bar", "maš", hit = true}, -- HZL 20 + ["𒋡"] = {"qa", "ga₅", "ka₄", hit = true}, -- HZL 21 + ["𒈮"] = {"MUG", sum = true}, -- HZL 22 + ["𒍚"] = {"UZ₆", sum = true}, -- HZL 23 + ["𒄷"] = {"ḫu", hit = true}, -- HZL 24 + ["𒄷𒋛"] = {"U₅", sum = true}, -- HZL 25 + ["𒄷𒄭"] = {"MUD", sum = true}, -- HZL 26 + ["𒈺"] = {"SE₂₄", "SÈD", "ŠE₁₂", sum = true}, -- HZL 27 + ["𒋥"] = {"rat", "rad", hit = true}, -- HZL 29 + ["𒄀"] = {"gi", "ge", hit = true}, -- HZL 30 + ["𒄁"] = {"geₑ", hurr = true}, -- HZL 31 + ["𒊑"] = {"ri", "re", "tal", "dal", hit = true}, -- HZL 32 + ["𒍣"] = {"zi", "ze", hit = true}, -- HZL 33 + ["𒉣𒇬"] = {"TÙR", sum = true}, -- HZL 34 + ["𒆲"] = {"KUN", sum = true}, -- HZL 35 + ["𒉣"] = {"NUN", sum = true}, -- HZL 36 + ["𒋾"] = {"ti", "dì", hit = true}, -- HZL 37 + ["𒈧"] = {"MÁŠ", sum = true}, -- HZL 38 + ["𒉆"] = {"nam", hit = true}, -- HZL 39 + ["𒂗"] = {"en", "in₄", hit = true}, -- HZL 40 + ["𒈹"] = {"INANNA", "INNIN", sum = true}, -- HZL 41 + ["𒋩"] = {"šur", hit = true}, -- HZL 42 + ["𒊒"] = {"ru", hit = true}, -- HZL 43 + ["𒅁"] = {"IB", "URAŠ", "URTA", sum = true}, -- HZL 44 + ["𒇇"] = {"U₈", "US₅", "USDUḪA", sum = true}, -- HZL 45 + ["𒂄"] = {"zul", hit = true}, -- HZL 46 + ["𒆥"] = {"GUR₁₀", "KIN", sum = true}, -- HZL 47 + ["𒑚"] = {"ŠUŠANA", sum = true}, -- HZL 48 + ["𒆏"] = {"kap", "kab", "gáp", "gáb", hit = true}, -- HZL 49 + ["𒄸"] = {"ḪÚB", sum = true}, -- HZL 49 + ["𒄽"] = {"ḫub", "ḫup", hit = true}, -- HZL 50 + ["𒌨"] = {"ur", "lik", "lig", hit = true}, -- HZL 51 + ["𒄇"] = {"GIDIM", sum = true}, -- HZL 52 + ["𒌇"] = {"TUK", "TUKU", sum = true}, -- HZL 53 + ["𒃢"] = {"SILA₄", sum = true}, -- HZL 54 + ["𒇅"] = {"BÚGIN", "BÚNIN", sum = true}, -- HZL 55 + ["𒂷"] = {"GÁ", "MAL", "PISAN", sum = true}, -- HZL 56 + ["𒂼"] = {"AMA", "DAGAL", sum = true}, -- HZL 57 + ["𒃡"] = {"ÙR", sum = true}, -- HZL 58 + ["𒃥"] = {"ÀRAḪ", "ÉSAG", sum = true}, -- HZL 59 + ["𒃌"] = {"GALGA", sum = true}, -- HZL 60 + ["𒃷"] = {"gán", "kán", hit = true}, -- HZL 61 + ["𒂞"] = {"ERIN", "EREN", sum = true}, -- HZL 62 + ["𒋁"] = {"ŠÉŠ", sum = true}, -- HZL 63 + ["𒋗𒆸"] = {"ŠU-NÍGIN", sum = true}, -- HZL 64 + ["𒋠"] = {"SÍG", "SÍK", "SIKI", sum = true}, -- HZL 65 + ["𒋠𒊩"] = {"SÍG-MUNUS", sum = true}, -- HZL 66 + ["𒅅"] = {"iq", "eq", "ig", "ik", "eg", "ek", hit = true}, -- HZL 67 + ["𒋗"] = {"šu", hit = true}, -- HZL 68 + ["𒆬"] = {"KÙ", "KUG", "AZAG", "GUŠKIN", sum = true}, -- HZL 69 + ["𒊷"] = {"GIŠIMMAR", sum = true}, -- HZL 70 + ["𒁰"] = {"DÀRA", sum = true}, -- HZL 71 + ["𒉌"] = {"ni", "né", hit = true}, -- HZL 72 + ["𒉌𒌓"] = {"IA₄", "NA₄", "ZÁ", sum = true}, -- HZL 73 + ["𒉏"] = {"DÀḪ", "ELAM", "NIM", sum = true}, -- HZL 74 + ["𒆕"] = {"DÙ", "GAG", "RÚ", sum = true}, -- HZL 75 + ["𒉍"] = {"néₑ", hurr = true}, -- HZL 76 + ["𒅕"] = {"ir", "er", hit = true}, -- HZL 77 + ["𒇽"] = {"LÚ", sum = true}, -- HZL 78 + ["𒋀"] = {"SES", "ŠEŠ", sum = true}, -- HZL 79 + ["𒀿"] = {"AŠGAB", sum = true}, -- HZL 80 + ["𒀝"] = {"ag", "ak", "aq", hit = true}, -- HZL 81 + ["𒀞"] = {"MÈ", sum = true}, -- HZL 82 + ["𒁯"] = {"GÙN", sum = true}, -- HZL 83 + ["𒌗"] = {"ITI", "ITU", sum = true}, -- HZL 84 + ["𒋒"] = {"ŠINIG", sum = true}, -- HZL 85 + ["𒋛"] = {"ší", "šé", hit = true}, -- HZL 86 + ["𒈣"] = {"MÁ", sum = true}, -- HZL 87 + ["𒂈"] = {"ŠÙDUL", "ŠÙDUN", sum = true}, -- HZL 88 + ["𒋛𒀀"] = {"DIRI", "SA₅", sum = true}, -- HZL 89 + ["𒋰"] = {"tab", "tap", hit = true}, -- HZL 90 + ["𒋳"] = {"šum", hit = true}, -- HZL 91 + ["𒊍"] = {"az", hit = true}, -- HZL 92 + ["𒊌"] = {"uq", "ug", "uk", hit = true}, -- HZL 93 + ["𒊋"] = {"NIB", sum = true}, -- HZL 94 + ["𒆷"] = {"la", hit = true}, -- HZL 95 + ["𒂇"] = {"UKU", sum = true}, -- HZL 96 + ["𒀊"] = {"ab", "ap", hit = true}, -- HZL 97 + ["𒌝"] = {"um", hit = true}, -- HZL 98 + ["𒁾"] = {"tub", "tup", hit = true}, -- HZL 99 + ["𒀮"] = {"nap", "nab", hit = true}, -- HZL 100 + ["𒀯"] = {"MUL", sum = true}, -- HZL 101 + ["𒌤"] = {"DÉ", sum = true}, -- HZL 102 + ["𒉓"] = {"šàm", hit = true}, -- HZL 103 + ["𒉙"] = {"AZU", "ÚZU", sum = true}, -- HZL 104 + ["𒀜"] = {"at", "ad", hit = true}, -- HZL 105 + ["𒆟"] = {"KÉŠ", "KEŠDA", "SÌR", "ŠÈR", "ŠÌR", sum = true}, -- HZL 106 + ["𒍢"] = {"zé", "zí", hit = true}, -- HZL 108 + ["𒍏"] = {"URUDU", "TABIRA", "ÙMMEDA", sum = true}, -- HZL 109 + ["𒌘"] = {"MÚRU", "MURUB₄", sum = true}, -- HZL 110 + ["𒀔"] = {"ERI₁₁", "UNU", "UNUG", sum = true}, -- HZL 111 + ["𒈩"] = {"miš", hit = true}, -- HZL 112 + ["𒃶"] = {"ḫé", "ḫí", hit = true}, -- HZL 113 + ["𒂦"] = {"BÀD", sum = true}, -- HZL 114 + ["𒈗"] = {"LUGAL", sum = true}, -- HZL 115 + ["𒊐𒃵"] = {"DÌM", sum = true}, -- HZL 116 + ["𒅋"] = {"il", "él", hit = true}, -- HZL 117 + ["𒉼"] = {"PAN", sum = true}, -- HZL 118 + ["𒉒"] = {"NÍNDA", sum = true}, -- HZL 119 + ["𒄣"] = {"kum", "gum", hit = true}, -- HZL 120 + ["𒉘"] = {"ÁG", "ÁGA", sum = true}, -- HZL 121 + ["𒄤"] = {"gaz", hit = true}, -- HZL 122 + ["𒉚𒀀𒀭"] = {"ŠÁM", sum = true}, -- HZL 123 + ["𒌫"] = {"úr", hit = true}, -- HZL 124 + ["𒌈"] = {"tum", "dum", "tu₄", hit = true}, -- HZL 125 + ["𒂕"] = {"EGIR", sum = true}, -- HZL 126 + ["𒇸"] = {"LIL", sum = true}, -- HZL 127 + ["𒁺"] = {"du", "tù", hit = true}, -- HZL 128 + ["𒁽"] = {"KAŠ₄", sum = true}, -- HZL 129 + ["𒂀"] = {"DÚB", sum = true}, -- HZL 130 + ["𒃾"] = {"wi₅", hit = true}, -- HZL 131 + ["𒍑"] = {"uš", hit = true}, -- HZL 132 + ["𒅗"] = {"ka", hit = true}, -- HZL 133 + ["𒅞"] = {"UKKIN", sum = true}, -- HZL 134 + ["𒅻"] = {"NUNDUM", sum = true}, -- HZL 135 + ["𒅾"] = {"SU₆", sum = true}, -- HZL 136 + ["𒅯"] = {"KIR₁₄", sum = true}, -- HZL 140 + ["𒆇"] = {"KAxU", sum = true}, -- HZL 141 + ["𒆉"] = {"ZU₆", sum = true}, -- HZL 143 + ["𒅮"] = {"BÚN", sum = true}, -- HZL 144 + ["𒆁"] = {"TÚKUR", sum = true}, -- HZL 145 + ["𒅲"] = {"MU₇", "TU₆", sum = true}, -- HZL 146 + ["𒅴"] = {"EME", sum = true}, -- HZL 147 + ["𒅘"] = {"NAG", sum = true}, -- HZL 148 + ["𒅥"] = {"GU₇", sum = true}, -- HZL 149 + ["𒅖"] = {"iš", "eš₁₅", "mil", "mel", hit = true}, -- HZL 151 + ["𒌒"] = {"up", "ub", hit = true}, -- HZL 152 + ["𒁉"] = {"pí", "pé", "bi", "be", "kaš", "gaš", hit = true}, -- HZL 153 + ["𒋆"] = {"LUNGA", "ŠEM", "ŠIM", sum = true}, -- HZL 154 + ["𒀫"] = {"AMAR", sum = true}, -- HZL 155 + ["𒀬"] = {"SISKUR", "SÍSKUR", sum = true}, -- HZL 156 + ["𒄞"] = {"GU₄", "GUD", sum = true}, -- HZL 157 + ["𒊭"] = {"ša", hit = true}, -- HZL 158 + ["𒂵"] = {"ga", hit = true}, -- HZL 159 + ["𒋫"] = {"ta", "dá", hit = true}, -- HZL 160 + ["𒅍"] = {"ÍL", sum = true}, -- HZL 161 + ["𒂁"] = {"DUG", "BÁḪAR", "BAḪARÌ", sum = true}, -- HZL 162 + ["𒋋"] = {"BAPPIR", sum = true}, -- HZL 163 + ["𒂃"] = {"DU₈", "DUḪ", sum = true}, -- HZL 164 + ["𒃮"] = {"GAB", "GABA", sum = true}, -- HZL 164 + ["𒁶"] = {"DÍM", "GIM", sum = true}, -- HZL 165 + ["𒌢"] = {"UMBIN", sum = true}, -- HZL 166 + ["𒆍"] = {"KÁ", "ABUL", sum = true}, -- HZL 167 + ["𒂔"] = {"EDIN", sum = true}, -- HZL 168 + ["𒄠"] = {"am", hit = true}, -- HZL 168 + ["𒉈"] = {"ne", "ni₅", hit = true}, -- HZL 169 + ["𒋭"] = {"LÀL", sum = true}, -- HZL 170 + ["𒈭"] = {"taḫ", "daḫ", "túḫ", hit = true}, -- HZL 171 + ["𒉋"] = {"píl", "bíl", "pél", hit = true}, -- HZL 172 + ["𒃰"] = {"gad", "gat", "kad", "kat", hit = true}, -- HZL 173 + ["𒉺"] = {"pa", "ḫat", "ḫad", hit = true}, -- HZL 174 + ["𒉺𒅁"] = {"šab", "šap", "šìp", hit = true}, -- HZL 175 + ["𒉺𒁽"] = {"MAŠKIM", sum = true}, -- HZL 176 + ["𒉺𒇻"] = {"SIPA", "SIPAD", sum = true}, -- HZL 177 + ["𒄑"] = {"ez", "iz", "níš", hit = true}, -- HZL 178 + ["𒆸"] = {"ḫab", "ḫap", hit = true}, -- HZL 179 + ["𒇥"] = {"túl", hit = true}, -- HZL 180 + ["𒇡"] = {"LAGABxSUM", sum = true}, -- HZL 181 + ["𒆹"] = {"AMBAR", "BUGIN", "BUNIN", "SUG", sum = true}, -- HZL 182 + ["𒀠"] = {"al", hit = true}, -- HZL 183 + ["𒆯"] = {"KU₇", sum = true}, -- HZL 184 + ["𒄥"] = {"gur", "kùr", hit = true}, -- HZL 185 + ["𒇬"] = {"LAGAR", sum = true}, -- HZL 186 + ["𒂊"] = {"e", hit = true}, -- HZL 187 + ["𒁟"] = {"UTUL₅", sum = true}, -- HZL 188 + ["𒁛"] = {"UBUR", sum = true}, -- HZL 189 + ["𒁗"] = {"E!-KISIM₅xA-MAŠ", sum = true}, -- HZL 190 + ["𒈥"] = {"mar", hit = true}, -- HZL 191 + ["𒊕"] = {"šaq", "šag", "šak", "riš", hit = true}, -- HZL 192 + ["𒄧"] = {"GURUN", sum = true}, -- HZL 193 + ["𒆤"] = {"LÍL", "GÉ", "KE4", "KID", sum = true}, -- HZL 194 + ["𒌑"] = {"ú", "sam", hit = true}, -- HZL 195 + ["𒆗"] = {"kal", "dan", "tan", hit = true}, -- HZL 196 + ["𒌦"] = {"un", hit = true}, -- HZL 197 + ["𒈛"] = {"luḫ", hit = true}, -- HZL 198 + ["𒂍"] = {"É", sum = true}, -- HZL 199 + ["𒊓"] = {"SA", sum = true}, -- HZL 200 + ["𒄘"] = {"GÚ", "GUN", sum = true}, -- HZL 201 + ["𒄙"] = {"dur", "túr", hit = true}, -- HZL 202 + ["𒍜"] = {"UZU", sum = true}, -- HZL 203 + ["𒉪"] = {"nir", "nàr", hit = true}, -- HZL 204 + ["𒁀"] = {"ba", "pá", hit = true}, -- HZL 205 + ["𒂉"] = {"DÚR", "DÚRU", "DURUN", "TUKUL", "TUŠ", sum = true}, -- HZL 206 + ["𒆪"] = {"ku", "gu₅", hit = true}, -- HZL 206 + ["𒆪𒌋𒆕"] = {"MUD₄", sum = true}, -- HZL 207 + ["𒈠"] = {"ma", hit = true}, -- HZL 208 + ["𒍪"] = {"zu", hit = true}, -- HZL 209 + ["𒂅"] = {"GÍN", sum = true}, -- HZL 209 + ["𒁳"] = {"lu", hit = true}, -- HZL 210 + ["𒇻"] = {"DAB", "DIB", "DUL₈", sum = true}, -- HZL 210 + ["𒇯"] = {"DU₆", sum = true}, -- HZL 211 + ["𒌆"] = {"TÚG", "ÁZLAG", "GI₇", "ŠÈ", "TU₉", "ZÀ", "ZÍD", sum = true}, -- HZL 212 + ["𒋢"] = {"KUŠ", "SU", sum = true}, -- HZL 213 + ["𒁕"] = {"da", "tá", hit = true}, -- HZL 214 + ["𒀉"] = {"it", "id", "et", "ed", hit = true}, -- HZL 215 + ["𒇉"] = {"ENGUR", sum = true}, -- HZL 216 + ["𒄿"] = {"i", hit = true}, -- HZL 217 + ["𒅀"] = {"ya", hit = true}, -- HZL 218 + ["𒈢"] = {"ḪAŠḪUR", sum = true}, -- HZL 219 + ["𒈖"] = {"GÚG", "GAR₅", "LÙ", "NIMGIR", sum = true}, -- HZL 220, 221, 222 + ["𒂆"] = {"NÍR", "GÍN", sum = true}, -- HZL 223 + ["𒇀"] = {"GIGIR", sum = true}, -- HZL 224 + ["𒁆"] = {"BALAG", sum = true}, -- HZL 225 + ["𒀩"] = {"ALAM", sum = true}, -- HZL 226 + ["𒋺"] = {"TAG₄", "TAK₄", sum = true}, -- HZL 227 + ["𒆦"] = {"KISAL", sum = true}, -- HZL 228 + ["𒌷"] = {"URU", "BANŠUR", "RÍ", "U₁₉", sum = true}, -- HZL 229 + ["𒊿"] = {"ŠEN", "ALAL", "PÀSAN", "DUR₁₀", sum = true}, -- HZL 230 + ["𒋃"] = {"SANGA", "ÀKA", "ŠID", "ŠIT", "ŠITA₅", "ŠITI", sum = true}, -- HZL 231 + ["𒁤"] = {"LAḪTA", sum = true}, -- HZL 232 + ["𒊏"] = {"ra", hit = true}, -- HZL 233 + ["𒄄"] = {"GI₄", "GE₄", sum = true}, -- HZL 234 + ["𒁈"] = {"BÁR", sum = true}, -- HZL 235 + ["𒁻"] = {"LAḪ₄", sum = true}, -- HZL 236 + ["𒌉"] = {"DUMU", "TUR", "KUN₅", sum = true}, -- HZL 237 + ["𒍠"] = {"ZÀ", "ZAG", sum = true}, -- HZL 238 + ["𒅊"] = {"SIG₇", sum = true}, -- HZL 239 + ["𒃼"] = {"gar", "kar", hit = true}, -- HZL 240 + ["𒀾"] = {"tàš", "áš", hit = true}, -- HZL 241 + ["𒃲"] = {"gal", "kál", hit = true}, -- HZL 242 + ["𒁖"] = {"taq", "tág", "tak", "daq", "dag", "dak", hit = true}, -- HZL 243 + ["𒄫"] = {"kir", "gir", "piš", "paš", "pùš", "gir", "biš", hit = true}, -- HZL 244 + ["𒁓"] = {"bur", "pur", hit = true}, -- HZL 245 + ["𒌵"] = {"URI", sum = true}, -- HZL 246 + ["𒃵"] = {"GAM", sum = true}, -- HZL 247 + ["𒑱"] = {glossenkeil = true}, -- HZL 248 + ["𒋼"] = {"te", hit = true}, -- HZL 249 + ["𒋼𒀀"] = {"kar", "kir₈", hit = true}, -- HZL 250 + ["𒋙"] = {"šú", hit = true}, -- HZL 251 + ["𒇻𒄾𒆲"] = {"GUKKAL?+KUN", sum = true}, -- HZL 252 + ["𒈦𒄘𒃼"] = {"IDIGNA", sum = true}, -- HZL 253 + ["𒄾"] = {"ÚKUŠ", sum = true}, -- HZL 254 + ["𒋝"] = {"SIG", sum = true}, -- HZL 255 + ["𒉽"] = {"KÚR", "PAB", "PA₅", sum = true}, -- HZL 256 + ["𒉽𒄬"] = {"PAB-ḪAL", sum = true}, -- HZL 256 + ["𒉽𒉽"] = {"BÙLUG", "DIM₄", "MUNU₈", sum = true}, -- HZL 257 + ["𒄃"] = {"GILIM", sum = true}, -- HZL 258 + ["𒆜"] = {"KASKAL", "RAŠ", "DANNA", "ILLAT", sum = true}, -- HZL 259 + ["𒄒"] = {"kib", "kip", hit = true}, -- HZL 260 + ["𒌋"] = {"u", hit = true}, -- HZL 261 + ["𒌋𒌆"] = {"DUL", sum = true}, -- HZL 262 + ["𒌋𒁯"] = {"IŠTAR", akk = true}, -- HZL 263 + ["𒌋𒈬"] = {"UDUN", sum = true}, -- HZL 264 + ["𒅇"] = {"Ù", "LIBIR", sum = true}, -- HZL 265 + ["𒌋𒀜"] = {"GIR₄", sum = true}, -- HZL 266 + ["𒈪"] = {"mi", "mé", hit = true}, -- HZL 267 + ["𒂂"] = {"DUGUD", sum = true}, -- HZL 268 + ["𒈪𒉭"] = {"GIG", sum = true}, -- HZL 269 + ["𒌋𒃶"] = {"ŠAGAN", "ŠÁMAN", sum = true}, -- HZL 270 + ["𒄢"] = {"gul", hit = true}, -- HZL 271 + ["𒌋𒅗"] = {"UGU", sum = true}, -- HZL 272 + ["𒆧"] = {"kiš", hit = true}, -- HZL 273 + ["𒂘"] = {"BURU₁₄", "BAR₈", "SULLIM", sum = true}, -- HZL 274 + ["𒌌"] = {"ul", hit = true}, -- HZL 275 + ["𒀖"] = {"ÁB", sum = true}, -- HZL 277 + ["𒆨"] = {"KISIM₅", sum = true}, -- HZL 278 + ["𒉐"] = {"TÙM", sum = true}, -- HZL 279 + ["𒇺"] = {"liš", "li₁₂", hit = true}, -- HZL 286 + ["𒌋𒁹"] = {"11", hit = true}, -- HZL 287 + ["𒅆"] = {"ši", hit = true}, -- HZL 288 + ["𒅈"] = {"ar", hit = true}, -- HZL 289 + ["𒅆𒌨"] = {"ḫul", hit = true}, -- HZL 290 + ["𒅆𒁾"] = {"AGRIG", "GISKIM", "ISKIM", sum = true}, -- HZL 291 + ["𒅆𒊒"] = {"pà", hit = true}, -- HZL 292 + ["𒅆𒂟"] = {"SIG₅", sum = true}, -- HZL 293 + ["𒊮"] = {"ŠÀ", "ŠAG₄", "TIBULA", sum = true}, -- HZL 294 + ["𒉻"] = {"PAD", sum = true}, -- HZL 295 + ["𒌋𒌋"] = {"man", "20", hit = true}, -- HZL 296 + ["𒊩"] = {"šal", hit = true}, -- HZL 297 + ["𒁮"] = {"dam", "tám", hit = true}, -- HZL 298 + ["𒊩𒌆"] = {"NIN", "EREŠ", "NIN₉", sum = true}, -- HZL 299 + ["𒍮"] = {"zum", hit = true}, -- HZL 300 + ["𒄊"] = {"GÌR", "GÌRI", "NÈ", sum = true}, -- HZL 301 + ["𒀲"] = {"ANŠE", "DÙR", sum = true}, -- HZL 302 + ["𒄋"] = {"ALIM", sum = true}, -- HZL 303 + ["𒄖"] = {"gu", hit = true}, -- HZL 304 + ["𒊩𒆳"] = {"GÉME", sum = true}, -- HZL 305 + ["𒇴"] = {"lam", hit = true}, -- HZL 306 + ["𒂖"] = {"el", "il₅", hit = true}, -- HZL 307 + ["𒉄"] = {"NAGAR", sum = true}, -- HZL 308 + ["𒋚"] = {"šaḫ", hit = true}, -- HZL 309 + ["𒈝"] = {"lum", hit = true}, -- HZL 310 + ["𒋞"] = {"MUR₇", "MURGU", "SIG₄", sum = true}, -- HZL 311 + ["𒁲"] = {"di", "de", "ti₄", hit = true}, -- HZL 312 + ["𒆠"] = {"ki", "ke", "gi₅", "ge₅", hit = true}, -- HZL 313 + ["𒈿"] = {"NÁ", "NÚ", sum = true}, -- HZL 314 + ["𒋽"] = {"URU₅", sum = true}, -- HZL 315 + ["𒌓"] = {"ut", "ud", "pir", hit = true}, -- HZL 316 + ["𒉿"] = {"wa", hit = true}, -- HZL 317 + ["𒊁"] = {"waₐₚ", hurr = true}, -- HZL 318 + ["𒊅"] = {"wiᵢ", hurr = true}, -- HZL 319 + ["𒊂"] = {"wiₚí", hatt = true}, -- HZL 320 + ["𒊈"] = {"wuú", hurr = true}, -- HZL 321 + ["𒊄"] = {"weₑ", hurr = true}, -- HZL 322 + ["𒊇"] = {"wuᵤ", hurr = true}, -- HZL 323 + ["𒊃"] = {"wuₚᵤ", hurr = true}, -- HZL 324 + ["𒊆"] = {"wiᵢₚ", hurr = true}, -- HZL 325 + ["𒊀"] = {"waₐ", hurr = true}, -- HZL 326 + ["𒂟"] = {"ÉRIN", "NUNUS", "RÍN", sum = true}, -- HZL 327 + ["𒉭"] = {"NUNUZ", "ÉRIN", "RÍN", sum = true}, -- HZL 328 + ["𒆳"] = {"kur", hit = true}, -- HZL 329 + ["𒁷"] = {"tin", "tén", hit = true}, -- HZL 330 + ["𒌍"] = {"eš", "ìš", "30", hit = true}, -- HZL 331 + ["𒄴"] = {"aḫ", "eḫ", "iḫ", "uḫ", hit = true}, -- HZL 332 + ["𒄯"] = {"ḫar", "ḫur", "mur", hit = true}, -- HZL 333 + ["𒄵"] = {"BIR", "ÉLLAG", sum = true}, -- HZL 334 + ["𒄭"] = {"ḫi", "ḫe", hit = true}, -- HZL 335 + ["𒃽"] = {"GAŠAN", sum = true}, -- HZL 336 + ["𒅎"] = {"em", "im", hit = true}, -- HZL 337 + ["𒊺"] = {"še", hit = true}, -- HZL 338 + ["𒁍"] = {"pu", "bu", hit = true}, -- HZL 339 + ["𒊻"] = {"uz", hit = true}, -- HZL 340 + ["𒋤"] = {"SUD", "SIR", sum = true}, -- HZL 341 + ["𒈲"] = {"MUŠ", sum = true}, -- HZL 342 + ["𒇷"] = {"li", "le", hit = true}, -- HZL 343 + ["𒌁"] = {"tir", "ter", hit = true}, -- HZL 344 + ["𒊺𒉀"] = {"NIDABA", "NISABA", "TÈ", sum = true}, -- HZL 345 + ["𒌅"] = {"tu", "dú", hit = true}, -- HZL 346 + ["𒐐"] = {"NINNU", "50", sum = true}, -- HZL 347 + ["𒄭𒄊"] = {"ḪUŠ", sum = true}, -- HZL 348 + ["𒋦"] = {"SUḪUR", sum = true}, -- HZL 349 + ["𒋧"] = {"SUM", "SUMU", "ŠÚM", "SÍ", sum = true}, -- HZL 350 + ["𒉀"] = {"NAGA", "NÍDABA", "NÍSABA", "DALḪAMUN₄", sum = true}, -- HZL 352 + ["𒅔"] = {"šar", hit = true}, -- HZL 353 + ["𒄰"] = {"in", "en₆", hit = true}, -- HZL 354 + ["𒁹"] = {"1", "diš", "tiš", "dáš", "táš", hit = true}, -- HZL 356 + ["𒈨"] = {"me", "mi", hit = true}, -- HZL 357 + ["𒇲"] = {"lal", hit = true}, -- HZL 358 + ["𒁹𒌋"] = {"70", sum = true}, -- HZL 359 + ["𒈨𒌍"] = {"meš", hit = true}, -- HZL 360 + ["𒈫"] = {"2", hit = true}, -- HZL 361 + ["𒇳"] = {"LA₅", sum = true}, -- HZL 362 + ["𒇳𒊬"] = {"ŠUR₄", akk = true}, -- HZL 363 + ["𒀀"] = {"a", hit = true}, -- HZL 364 + ["𒀀𒇉"] = {"ÍD", "I₇", sum = true}, -- HZL 365 + ["𒍝"] = {"za", hit = true}, -- HZL 366 + ["𒄩"] = {"ḫa", hit = true}, -- HZL 367 + ["𒐈"] = {"3", hit = true}, -- HZL 368 + ["𒃻"] = {"šá", hit = true}, -- HZL 369 + ["𒐉"] = {"4", hit = true}, -- HZL 370 + ["𒐼"] = {"4", hit = true}, -- HZL 370 + ["𒐊"] = {"5", hit = true}, -- HZL 371 + ["𒐋"] = {"6", hit = true}, -- HZL 372 + ["𒐌"] = {"7", hit = true}, -- HZL 373 + ["𒐍"] = {"8", hit = true}, -- HZL 374 + ["𒑆"] = {"9", hit = true}, -- HZL 375 + ["𒐎"] = {"9", hit = true} -- HZL 375 +} + +return export diff --git a/wiktra/wikt/translit/hit-translit/tag.lua b/wiktra/wikt/translit/hit-translit/tag.lua new file mode 100644 index 0000000..f3881e8 --- /dev/null +++ b/wiktra/wikt/translit/hit-translit/tag.lua @@ -0,0 +1,21 @@ +local export = {} + +function export.tag_sumerogram(syl) return "" .. syl .. "" end + +function export.tag_sumerogram_determiner(syl) return "" .. syl .. "" end + +function export.tag_akkadogram(syl) return "" .. syl .. "" end + +function export.tag_akkadogram_determiner(syl) return "" .. syl .. "" end + +function export.tag_hittite_tr(tr) return "" .. tr .. "" end + +function export.tag_hurrian_tr(tr) return "" .. tr .. "" end + +function export.tag_hattic_tr(tr) return "" .. tr .. "" end + +function export.tag_hittite(text) return "[[" .. text .. "#Hittite|" .. text .. "]]" end + +function export.glossenkeil() return "𒑱" end + +return export diff --git a/wiktra/wikt/translit/huz-translit.lua b/wiktra/wikt/translit/huz-translit.lua new file mode 100644 index 0000000..cc311c5 --- /dev/null +++ b/wiktra/wikt/translit/huz-translit.lua @@ -0,0 +1,25 @@ +local export = {} + +local mapping1 = {["п"] = "p", ["б"] = "b", ["т"] = "t", ["д"] = "d", ["к"] = "k", ["г"] = "g", ["ц"] = "c", ["ч"] = "č", ["с"] = "s", ["з"] = "z", ["ш"] = "š", ["ж"] = "ž", ["х"] = "x", ["м"] = "m", ["н"] = "n", ["р"] = "r", ["л"] = "l", ["в"] = "v", ["й"] = "y", ["и"] = "i", ["е"] = "e", ["э"] = "e", ["а"] = "a", ["о"] = "o", ["у"] = "u", ["ы"] = "ɨ", ["ә"] = "ə", ["ӣ"] = "ī", ["ā"] = "ā", ["ō"] = "ō", ["ӯ"] = "ū", ["ъ"] = "ʾ", ["ᵸ"] = "̃"} + +local digraph = {["пӏ"] = "p’", ["тӏ"] = "t’", ["кӏ"] = "k’", ["къ"] = "q’", ["цӏ"] = "c’", ["лӏ"] = "ƛ", ["кь"] = "ƛ’", ["чӏ"] = "č’", ["хъ"] = "q", ["лъ"] = "λ", ["гъ"] = "ġ", ["хӏ"] = "ḥ", ["гӏ"] = "a̯", ["гь"] = "h", ["аъ"] = "å", ["а̄ъ"] = "å̄", ["аᵸ"] = "ã", ["еᵸ"] = "ẽ", ["иᵸ"] = "ĩ", ["оᵸ"] = "õ", ["уᵸ"] = "ũ", ["ыᵸ"] = "ɨ̃", ["әᵸ"] = "ə̃"} + +local tetragraph = {["аъᵸ"] = "å̃"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" + + -- Convert capital to lowercase palochka. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for digraph, replacement in pairs(digraph) do text = str_gsub(text, digraph, replacement) end + + for tetragraph, replacement in pairs(tetragraph) do text = str_gsub(text, tetragraph, replacement) end + + text = str_gsub(text, UTF8_char, mapping1) + + return text +end + +return export diff --git a/wiktra/wikt/translit/ii-translit.lua b/wiktra/wikt/translit/ii-translit.lua new file mode 100644 index 0000000..7b70808 --- /dev/null +++ b/wiktra/wikt/translit/ii-translit.lua @@ -0,0 +1,1223 @@ +local export = {} +local gsub = mw.ustring.gsub + +local convert = { + ["ꀀ"] = "it", + ["ꀁ"] = "ix", + ["ꀂ"] = "i", + ["ꀃ"] = "ip", + ["ꀄ"] = "iet", + ["ꀅ"] = "iex", + ["ꀆ"] = "ie", + ["ꀇ"] = "iep", + ["ꀈ"] = "at", + ["ꀉ"] = "ax", + ["ꀊ"] = "a", + ["ꀋ"] = "ap", + ["ꀌ"] = "uox", + ["ꀍ"] = "uo", + ["ꀎ"] = "uop", + ["ꀏ"] = "ot", + ["ꀐ"] = "ox", + ["ꀑ"] = "o", + ["ꀒ"] = "op", + ["ꀓ"] = "ex", + ["ꀔ"] = "e", + ["ꀕ"] = "w", + + ["ꀖ"] = "bit", + ["ꀗ"] = "bix", + ["ꀘ"] = "bi", + ["ꀙ"] = "bip", + ["ꀚ"] = "biet", + ["ꀛ"] = "biex", + ["ꀜ"] = "bie", + ["ꀝ"] = "biep", + ["ꀞ"] = "bat", + ["ꀟ"] = "bax", + ["ꀠ"] = "ba", + ["ꀡ"] = "bap", + ["ꀢ"] = "buox", + ["ꀣ"] = "buo", + ["ꀤ"] = "buop", + ["ꀥ"] = "bot", + ["ꀦ"] = "box", + ["ꀧ"] = "bo", + ["ꀨ"] = "bop", + ["ꀩ"] = "bex", + ["ꀪ"] = "be", + ["ꀫ"] = "bep", + ["ꀬ"] = "but", + ["ꀭ"] = "bux", + ["ꀮ"] = "bu", + ["ꀯ"] = "bup", + ["ꀰ"] = "burx", + ["ꀱ"] = "bur", + ["ꀲ"] = "byt", + ["ꀳ"] = "byx", + ["ꀴ"] = "by", + ["ꀵ"] = "byp", + ["ꀶ"] = "byrx", + ["ꀷ"] = "byr", + + ["ꀸ"] = "pit", + ["ꀹ"] = "pix", + ["ꀺ"] = "pi", + ["ꀻ"] = "pip", + ["ꀼ"] = "piex", + ["ꀽ"] = "pie", + ["ꀾ"] = "piep", + ["ꀿ"] = "pat", + ["ꁀ"] = "pax", + ["ꁁ"] = "pa", + ["ꁂ"] = "pap", + ["ꁃ"] = "puox", + ["ꁄ"] = "puo", + ["ꁅ"] = "puop", + ["ꁆ"] = "pot", + ["ꁇ"] = "pox", + ["ꁈ"] = "po", + ["ꁉ"] = "pop", + ["ꁊ"] = "put", + ["ꁋ"] = "pux", + ["ꁌ"] = "pu", + ["ꁍ"] = "pup", + ["ꁎ"] = "purx", + ["ꁏ"] = "pur", + ["ꁐ"] = "pyt", + ["ꁑ"] = "pyx", + ["ꁒ"] = "py", + ["ꁓ"] = "pyp", + ["ꁔ"] = "pyrx", + ["ꁕ"] = "pyr", + + ["ꁖ"] = "bbit", + ["ꁗ"] = "bbix", + ["ꁘ"] = "bbi", + ["ꁙ"] = "bbip", + ["ꁚ"] = "bbiet", + ["ꁛ"] = "bbiex", + ["ꁜ"] = "bbie", + ["ꁝ"] = "bbiep", + ["ꁞ"] = "bbat", + ["ꁟ"] = "bbax", + ["ꁠ"] = "bba", + ["ꁡ"] = "bbap", + ["ꁢ"] = "bbuox", + ["ꁣ"] = "bbuo", + ["ꁤ"] = "bbuop", + ["ꁥ"] = "bbot", + ["ꁦ"] = "bbox", + ["ꁧ"] = "bbo", + ["ꁨ"] = "bbop", + ["ꁩ"] = "bbex", + ["ꁪ"] = "bbe", + ["ꁫ"] = "bbep", + ["ꁬ"] = "bbut", + ["ꁭ"] = "bbux", + ["ꁮ"] = "bbu", + ["ꁯ"] = "bbup", + ["ꁰ"] = "bburx", + ["ꁱ"] = "bbur", + ["ꁲ"] = "bbyt", + ["ꁳ"] = "bbyx", + ["ꁴ"] = "bby", + ["ꁵ"] = "bbyp", + + ["ꁶ"] = "nbit", + ["ꁷ"] = "nbix", + ["ꁸ"] = "nbi", + ["ꁹ"] = "nbip", + ["ꁺ"] = "nbiex", + ["ꁻ"] = "nbie", + ["ꁼ"] = "nbiep", + ["ꁽ"] = "nbat", + ["ꁾ"] = "nbax", + ["ꁿ"] = "nba", + ["ꂀ"] = "nbap", + ["ꂁ"] = "nbot", + ["ꂂ"] = "nbox", + ["ꂃ"] = "nbo", + ["ꂄ"] = "nbop", + ["ꂅ"] = "nbut", + ["ꂆ"] = "nbux", + ["ꂇ"] = "nbu", + ["ꂈ"] = "nbup", + ["ꂉ"] = "nburx", + ["ꂊ"] = "nbur", + ["ꂋ"] = "nbyt", + ["ꂌ"] = "nbyx", + ["ꂍ"] = "nby", + ["ꂎ"] = "nbyp", + ["ꂏ"] = "nbyrx", + ["ꂐ"] = "nbyr", + + ["ꂑ"] = "hmit", + ["ꂒ"] = "hmix", + ["ꂓ"] = "hmi", + ["ꂔ"] = "hmip", + ["ꂕ"] = "hmiex", + ["ꂖ"] = "hmie", + ["ꂗ"] = "hmiep", + ["ꂘ"] = "hmat", + ["ꂙ"] = "hmax", + ["ꂚ"] = "hma", + ["ꂛ"] = "hmap", + ["ꂜ"] = "hmuox", + ["ꂝ"] = "hmuo", + ["ꂞ"] = "hmuop", + ["ꂟ"] = "hmot", + ["ꂠ"] = "hmox", + ["ꂡ"] = "hmo", + ["ꂢ"] = "hmop", + ["ꂣ"] = "hmut", + ["ꂤ"] = "hmux", + ["ꂥ"] = "hmu", + ["ꂦ"] = "hmup", + ["ꂧ"] = "hmurx", + ["ꂨ"] = "hmur", + ["ꂩ"] = "hmyx", + ["ꂪ"] = "hmy", + ["ꂫ"] = "hmyp", + ["ꂬ"] = "hmyrx", + ["ꂭ"] = "hmyr", + + ["ꂮ"] = "mit", + ["ꂯ"] = "mix", + ["ꂰ"] = "mi", + ["ꂱ"] = "mip", + ["ꂲ"] = "miex", + ["ꂳ"] = "mie", + ["ꂴ"] = "miep", + ["ꂵ"] = "mat", + ["ꂶ"] = "max", + ["ꂷ"] = "ma", + ["ꂸ"] = "map", + ["ꂹ"] = "muot", + ["ꂺ"] = "muox", + ["ꂻ"] = "muo", + ["ꂼ"] = "muop", + ["ꂽ"] = "mot", + ["ꂾ"] = "mox", + ["ꂿ"] = "mo", + ["ꃀ"] = "mop", + ["ꃁ"] = "mex", + ["ꃂ"] = "me", + ["ꃃ"] = "mut", + ["ꃄ"] = "mux", + ["ꃅ"] = "mu", + ["ꃆ"] = "mup", + ["ꃇ"] = "murx", + ["ꃈ"] = "mur", + ["ꃉ"] = "myt", + ["ꃊ"] = "myx", + ["ꃋ"] = "my", + ["ꃌ"] = "myp", + + ["ꃍ"] = "fit", + ["ꃎ"] = "fix", + ["ꃏ"] = "fi", + ["ꃐ"] = "fip", + ["ꃑ"] = "fat", + ["ꃒ"] = "fax", + ["ꃓ"] = "fa", + ["ꃔ"] = "fap", + ["ꃕ"] = "fox", + ["ꃖ"] = "fo", + ["ꃗ"] = "fop", + ["ꃘ"] = "fut", + ["ꃙ"] = "fux", + ["ꃚ"] = "fu", + ["ꃛ"] = "fup", + ["ꃜ"] = "furx", + ["ꃝ"] = "fur", + ["ꃞ"] = "fyt", + ["ꃟ"] = "fyx", + ["ꃠ"] = "fy", + ["ꃡ"] = "fyp", + + ["ꃢ"] = "vit", + ["ꃣ"] = "vix", + ["ꃤ"] = "vi", + ["ꃥ"] = "vip", + ["ꃦ"] = "viet", + ["ꃧ"] = "viex", + ["ꃨ"] = "vie", + ["ꃩ"] = "viep", + ["ꃪ"] = "vat", + ["ꃫ"] = "vax", + ["ꃬ"] = "va", + ["ꃭ"] = "vap", + ["ꃮ"] = "vot", + ["ꃯ"] = "vox", + ["ꃰ"] = "vo", + ["ꃱ"] = "vop", + ["ꃲ"] = "vex", + ["ꃳ"] = "vep", + ["ꃴ"] = "vut", + ["ꃵ"] = "vux", + ["ꃶ"] = "vu", + ["ꃷ"] = "vup", + ["ꃸ"] = "vurx", + ["ꃹ"] = "vur", + ["ꃺ"] = "vyt", + ["ꃻ"] = "vyx", + ["ꃼ"] = "vy", + ["ꃽ"] = "vyp", + ["ꃾ"] = "vyrx", + ["ꃿ"] = "vyr", + + ["ꄀ"] = "dit", + ["ꄁ"] = "dix", + ["ꄂ"] = "di", + ["ꄃ"] = "dip", + ["ꄄ"] = "diex", + ["ꄅ"] = "die", + ["ꄆ"] = "diep", + ["ꄇ"] = "dat", + ["ꄈ"] = "dax", + ["ꄉ"] = "da", + ["ꄊ"] = "dap", + ["ꄋ"] = "duox", + ["ꄌ"] = "duo", + ["ꄍ"] = "dot", + ["ꄎ"] = "dox", + ["ꄏ"] = "do", + ["ꄐ"] = "dop", + ["ꄑ"] = "dex", + ["ꄒ"] = "de", + ["ꄓ"] = "dep", + ["ꄔ"] = "dut", + ["ꄕ"] = "dux", + ["ꄖ"] = "du", + ["ꄗ"] = "dup", + ["ꄘ"] = "durx", + ["ꄙ"] = "dur", + + ["ꄚ"] = "tit", + ["ꄛ"] = "tix", + ["ꄜ"] = "ti", + ["ꄝ"] = "tip", + ["ꄞ"] = "tiex", + ["ꄟ"] = "tie", + ["ꄠ"] = "tiep", + ["ꄡ"] = "tat", + ["ꄢ"] = "tax", + ["ꄣ"] = "ta", + ["ꄤ"] = "tap", + ["ꄥ"] = "tuot", + ["ꄦ"] = "tuox", + ["ꄧ"] = "tuo", + ["ꄨ"] = "tuop", + ["ꄩ"] = "tot", + ["ꄪ"] = "tox", + ["ꄫ"] = "to", + ["ꄬ"] = "top", + ["ꄭ"] = "tex", + ["ꄮ"] = "te", + ["ꄯ"] = "tep", + ["ꄰ"] = "tut", + ["ꄱ"] = "tux", + ["ꄲ"] = "tu", + ["ꄳ"] = "tup", + ["ꄴ"] = "turx", + ["ꄵ"] = "tur", + + ["ꄶ"] = "ddit", + ["ꄷ"] = "ddix", + ["ꄸ"] = "ddi", + ["ꄹ"] = "ddip", + ["ꄺ"] = "ddiex", + ["ꄻ"] = "ddie", + ["ꄼ"] = "ddiep", + ["ꄽ"] = "ddat", + ["ꄾ"] = "ddax", + ["ꄿ"] = "dda", + ["ꅀ"] = "ddap", + ["ꅁ"] = "dduox", + ["ꅂ"] = "dduo", + ["ꅃ"] = "dduop", + ["ꅄ"] = "ddot", + ["ꅅ"] = "ddox", + ["ꅆ"] = "ddo", + ["ꅇ"] = "ddop", + ["ꅈ"] = "ddex", + ["ꅉ"] = "dde", + ["ꅊ"] = "ddep", + ["ꅋ"] = "ddut", + ["ꅌ"] = "ddux", + ["ꅍ"] = "ddu", + ["ꅎ"] = "ddup", + ["ꅏ"] = "ddurx", + ["ꅐ"] = "ddur", + + ["ꅑ"] = "ndit", + ["ꅒ"] = "ndix", + ["ꅓ"] = "ndi", + ["ꅔ"] = "ndip", + ["ꅕ"] = "ndiex", + ["ꅖ"] = "ndie", + ["ꅗ"] = "ndat", + ["ꅘ"] = "ndax", + ["ꅙ"] = "nda", + ["ꅚ"] = "ndap", + ["ꅛ"] = "ndot", + ["ꅜ"] = "ndox", + ["ꅝ"] = "ndo", + ["ꅞ"] = "ndop", + ["ꅟ"] = "ndex", + ["ꅠ"] = "nde", + ["ꅡ"] = "ndep", + ["ꅢ"] = "ndut", + ["ꅣ"] = "ndux", + ["ꅤ"] = "ndu", + ["ꅥ"] = "ndup", + ["ꅦ"] = "ndurx", + ["ꅧ"] = "ndur", + + ["ꅨ"] = "hnit", + ["ꅩ"] = "hnix", + ["ꅪ"] = "hni", + ["ꅫ"] = "hnip", + ["ꅬ"] = "hniet", + ["ꅭ"] = "hniex", + ["ꅮ"] = "hnie", + ["ꅯ"] = "hniep", + ["ꅰ"] = "hnat", + ["ꅱ"] = "hnax", + ["ꅲ"] = "hna", + ["ꅳ"] = "hnap", + ["ꅴ"] = "hnuox", + ["ꅵ"] = "hnuo", + ["ꅶ"] = "hnot", + ["ꅷ"] = "hnox", + ["ꅸ"] = "hnop", + ["ꅹ"] = "hnex", + ["ꅺ"] = "hne", + ["ꅻ"] = "hnep", + ["ꅼ"] = "hnut", + + ["ꅽ"] = "nit", + ["ꅾ"] = "nix", + ["ꅿ"] = "ni", + ["ꆀ"] = "nip", + ["ꆁ"] = "niex", + ["ꆂ"] = "nie", + ["ꆃ"] = "niep", + ["ꆄ"] = "nax", + ["ꆅ"] = "na", + ["ꆆ"] = "nap", + ["ꆇ"] = "nuox", + ["ꆈ"] = "nuo", + ["ꆉ"] = "nuop", + ["ꆊ"] = "not", + ["ꆋ"] = "nox", + ["ꆌ"] = "no", + ["ꆍ"] = "nop", + ["ꆎ"] = "nex", + ["ꆏ"] = "ne", + ["ꆐ"] = "nep", + ["ꆑ"] = "nut", + ["ꆒ"] = "nux", + ["ꆓ"] = "nu", + ["ꆔ"] = "nup", + ["ꆕ"] = "nurx", + ["ꆖ"] = "nur", + + ["ꆗ"] = "hlit", + ["ꆘ"] = "hlix", + ["ꆙ"] = "hli", + ["ꆚ"] = "hlip", + ["ꆛ"] = "hliex", + ["ꆜ"] = "hlie", + ["ꆝ"] = "hliep", + ["ꆞ"] = "hlat", + ["ꆟ"] = "hlax", + ["ꆠ"] = "hla", + ["ꆡ"] = "hlap", + ["ꆢ"] = "hluox", + ["ꆣ"] = "hluo", + ["ꆤ"] = "hluop", + ["ꆥ"] = "hlox", + ["ꆦ"] = "hlo", + ["ꆧ"] = "hlop", + ["ꆨ"] = "hlex", + ["ꆩ"] = "hle", + ["ꆪ"] = "hlep", + ["ꆫ"] = "hlut", + ["ꆬ"] = "hlux", + ["ꆭ"] = "hlu", + ["ꆮ"] = "hlup", + ["ꆯ"] = "hlurx", + ["ꆰ"] = "hlur", + ["ꆱ"] = "hlyt", + ["ꆲ"] = "hlyx", + ["ꆳ"] = "hly", + ["ꆴ"] = "hlyp", + ["ꆵ"] = "hlyrx", + ["ꆶ"] = "hlyr", + + ["ꆷ"] = "lit", + ["ꆸ"] = "lix", + ["ꆹ"] = "li", + ["ꆺ"] = "lip", + ["ꆻ"] = "liet", + ["ꆼ"] = "liex", + ["ꆽ"] = "lie", + ["ꆾ"] = "liep", + ["ꆿ"] = "lat", + ["ꇀ"] = "lax", + ["ꇁ"] = "la", + ["ꇂ"] = "lap", + ["ꇃ"] = "luot", + ["ꇄ"] = "luox", + ["ꇅ"] = "luo", + ["ꇆ"] = "luop", + ["ꇇ"] = "lot", + ["ꇈ"] = "lox", + ["ꇉ"] = "lo", + ["ꇊ"] = "lop", + ["ꇋ"] = "lex", + ["ꇌ"] = "le", + ["ꇍ"] = "lep", + ["ꇎ"] = "lut", + ["ꇏ"] = "lux", + ["ꇐ"] = "lu", + ["ꇑ"] = "lup", + ["ꇒ"] = "lurx", + ["ꇓ"] = "lur", + ["ꇔ"] = "lyt", + ["ꇕ"] = "lyx", + ["ꇖ"] = "ly", + ["ꇗ"] = "lyp", + ["ꇘ"] = "lyrx", + ["ꇙ"] = "lyr", + + ["ꇚ"] = "git", + ["ꇛ"] = "gix", + ["ꇜ"] = "gi", + ["ꇝ"] = "gip", + ["ꇞ"] = "giet", + ["ꇟ"] = "giex", + ["ꇠ"] = "gie", + ["ꇡ"] = "giep", + ["ꇢ"] = "gat", + ["ꇣ"] = "gax", + ["ꇤ"] = "ga", + ["ꇥ"] = "gap", + ["ꇦ"] = "guot", + ["ꇧ"] = "guox", + ["ꇨ"] = "guo", + ["ꇩ"] = "guop", + ["ꇪ"] = "got", + ["ꇫ"] = "gox", + ["ꇬ"] = "go", + ["ꇭ"] = "gop", + ["ꇮ"] = "get", + ["ꇯ"] = "gex", + ["ꇰ"] = "ge", + ["ꇱ"] = "gep", + ["ꇲ"] = "gut", + ["ꇳ"] = "gux", + ["ꇴ"] = "gu", + ["ꇵ"] = "gup", + ["ꇶ"] = "gurx", + ["ꇷ"] = "gur", + + ["ꇸ"] = "kit", + ["ꇹ"] = "kix", + ["ꇺ"] = "ki", + ["ꇻ"] = "kip", + ["ꇼ"] = "kiex", + ["ꇽ"] = "kie", + ["ꇾ"] = "kiep", + ["ꇿ"] = "kat", + ["ꈀ"] = "kax", + ["ꈁ"] = "ka", + ["ꈂ"] = "kap", + ["ꈃ"] = "kuox", + ["ꈄ"] = "kuo", + ["ꈅ"] = "kuop", + ["ꈆ"] = "kot", + ["ꈇ"] = "kox", + ["ꈈ"] = "ko", + ["ꈉ"] = "kop", + ["ꈊ"] = "ket", + ["ꈋ"] = "kex", + ["ꈌ"] = "ke", + ["ꈍ"] = "kep", + ["ꈎ"] = "kut", + ["ꈏ"] = "kux", + ["ꈐ"] = "ku", + ["ꈑ"] = "kup", + ["ꈒ"] = "kurx", + ["ꈓ"] = "kur", + + ["ꈔ"] = "ggit", + ["ꈕ"] = "ggix", + ["ꈖ"] = "ggi", + ["ꈗ"] = "ggiex", + ["ꈘ"] = "ggie", + ["ꈙ"] = "ggiep", + ["ꈚ"] = "ggat", + ["ꈛ"] = "ggax", + ["ꈜ"] = "gga", + ["ꈝ"] = "ggap", + ["ꈞ"] = "gguot", + ["ꈟ"] = "gguox", + ["ꈠ"] = "gguo", + ["ꈡ"] = "gguop", + ["ꈢ"] = "ggot", + ["ꈣ"] = "ggox", + ["ꈤ"] = "ggo", + ["ꈥ"] = "ggop", + ["ꈦ"] = "gget", + ["ꈧ"] = "ggex", + ["ꈨ"] = "gge", + ["ꈩ"] = "ggep", + ["ꈪ"] = "ggut", + ["ꈫ"] = "ggux", + ["ꈬ"] = "ggu", + ["ꈭ"] = "ggup", + ["ꈮ"] = "ggurx", + ["ꈯ"] = "ggur", + + ["ꈰ"] = "mgiex", + ["ꈱ"] = "mgie", + ["ꈲ"] = "mgat", + ["ꈳ"] = "mgax", + ["ꈴ"] = "mga", + ["ꈵ"] = "mgap", + ["ꈶ"] = "mguox", + ["ꈷ"] = "mguo", + ["ꈸ"] = "mguop", + ["ꈹ"] = "mgot", + ["ꈺ"] = "mgox", + ["ꈻ"] = "mgo", + ["ꈼ"] = "mgop", + ["ꈽ"] = "mgex", + ["ꈾ"] = "mge", + ["ꈿ"] = "mgep", + ["ꉀ"] = "mgut", + ["ꉁ"] = "mgux", + ["ꉂ"] = "mgu", + ["ꉃ"] = "mgup", + ["ꉄ"] = "mgurx", + ["ꉅ"] = "mgur", + + ["ꉆ"] = "hxit", + ["ꉇ"] = "hxix", + ["ꉈ"] = "hxi", + ["ꉉ"] = "hxip", + ["ꉊ"] = "hxiet", + ["ꉋ"] = "hxiex", + ["ꉌ"] = "hxie", + ["ꉍ"] = "hxiep", + ["ꉎ"] = "hxat", + ["ꉏ"] = "hxax", + ["ꉐ"] = "hxa", + ["ꉑ"] = "hxap", + ["ꉒ"] = "hxuot", + ["ꉓ"] = "hxuox", + ["ꉔ"] = "hxuo", + ["ꉕ"] = "hxuop", + ["ꉖ"] = "hxot", + ["ꉗ"] = "hxox", + ["ꉘ"] = "hxo", + ["ꉙ"] = "hxop", + ["ꉚ"] = "hxex", + ["ꉛ"] = "hxe", + ["ꉜ"] = "hxep", + + ["ꉝ"] = "ngiex", + ["ꉞ"] = "ngie", + ["ꉟ"] = "ngiep", + ["ꉠ"] = "ngat", + ["ꉡ"] = "ngax", + ["ꉢ"] = "nga", + ["ꉣ"] = "ngap", + ["ꉤ"] = "nguot", + ["ꉥ"] = "nguox", + ["ꉦ"] = "nguo", + ["ꉧ"] = "ngot", + ["ꉨ"] = "ngox", + ["ꉩ"] = "ngo", + ["ꉪ"] = "ngop", + ["ꉫ"] = "ngex", + ["ꉬ"] = "nge", + ["ꉭ"] = "ngep", + + ["ꉮ"] = "hit", + ["ꉯ"] = "hiex", + ["ꉰ"] = "hie", + ["ꉱ"] = "hat", + ["ꉲ"] = "hax", + ["ꉳ"] = "ha", + ["ꉴ"] = "hap", + ["ꉵ"] = "huot", + ["ꉶ"] = "huox", + ["ꉷ"] = "huo", + ["ꉸ"] = "huop", + ["ꉹ"] = "hot", + ["ꉺ"] = "hox", + ["ꉻ"] = "ho", + ["ꉼ"] = "hop", + ["ꉽ"] = "hex", + ["ꉾ"] = "he", + ["ꉿ"] = "hep", + + ["ꊀ"] = "wat", + ["ꊁ"] = "wax", + ["ꊂ"] = "wa", + ["ꊃ"] = "wap", + ["ꊄ"] = "wuox", + ["ꊅ"] = "wuo", + ["ꊆ"] = "wuop", + ["ꊇ"] = "wox", + ["ꊈ"] = "wo", + ["ꊉ"] = "wop", + ["ꊊ"] = "wex", + ["ꊋ"] = "we", + ["ꊌ"] = "wep", + + ["ꊍ"] = "zit", + ["ꊎ"] = "zix", + ["ꊏ"] = "zi", + ["ꊐ"] = "zip", + ["ꊑ"] = "ziex", + ["ꊒ"] = "zie", + ["ꊓ"] = "ziep", + ["ꊔ"] = "zat", + ["ꊕ"] = "zax", + ["ꊖ"] = "za", + ["ꊗ"] = "zap", + ["ꊘ"] = "zuox", + ["ꊙ"] = "zuo", + ["ꊚ"] = "zuop", + ["ꊛ"] = "zot", + ["ꊜ"] = "zox", + ["ꊝ"] = "zo", + ["ꊞ"] = "zop", + ["ꊟ"] = "zex", + ["ꊠ"] = "ze", + ["ꊡ"] = "zep", + ["ꊢ"] = "zut", + ["ꊣ"] = "zux", + ["ꊤ"] = "zu", + ["ꊥ"] = "zup", + ["ꊦ"] = "zurx", + ["ꊧ"] = "zur", + ["ꊨ"] = "zyt", + ["ꊩ"] = "zyx", + ["ꊪ"] = "zy", + ["ꊫ"] = "zyp", + ["ꊬ"] = "zyrx", + ["ꊭ"] = "zyr", + + ["ꊮ"] = "cit", + ["ꊯ"] = "cix", + ["ꊰ"] = "ci", + ["ꊱ"] = "cip", + ["ꊲ"] = "ciet", + ["ꊳ"] = "ciex", + ["ꊴ"] = "cie", + ["ꊵ"] = "ciep", + ["ꊶ"] = "cat", + ["ꊷ"] = "cax", + ["ꊸ"] = "ca", + ["ꊹ"] = "cap", + ["ꊺ"] = "cuox", + ["ꊻ"] = "cuo", + ["ꊼ"] = "cuop", + ["ꊽ"] = "cot", + ["ꊾ"] = "cox", + ["ꊿ"] = "co", + ["ꋀ"] = "cop", + ["ꋁ"] = "cex", + ["ꋂ"] = "ce", + ["ꋃ"] = "cep", + ["ꋄ"] = "cut", + ["ꋅ"] = "cux", + ["ꋆ"] = "cu", + ["ꋇ"] = "cup", + ["ꋈ"] = "curx", + ["ꋉ"] = "cur", + ["ꋊ"] = "cyt", + ["ꋋ"] = "cyx", + ["ꋌ"] = "cy", + ["ꋍ"] = "cyp", + ["ꋎ"] = "cyrx", + ["ꋏ"] = "cyr", + + ["ꋐ"] = "zzit", + ["ꋑ"] = "zzix", + ["ꋒ"] = "zzi", + ["ꋓ"] = "zzip", + ["ꋔ"] = "zziet", + ["ꋕ"] = "zziex", + ["ꋖ"] = "zzie", + ["ꋗ"] = "zziep", + ["ꋘ"] = "zzat", + ["ꋙ"] = "zzax", + ["ꋚ"] = "zza", + ["ꋛ"] = "zzap", + ["ꋜ"] = "zzox", + ["ꋝ"] = "zzo", + ["ꋞ"] = "zzop", + ["ꋟ"] = "zzex", + ["ꋠ"] = "zze", + ["ꋡ"] = "zzep", + ["ꋢ"] = "zzux", + ["ꋣ"] = "zzu", + ["ꋤ"] = "zzup", + ["ꋥ"] = "zzurx", + ["ꋦ"] = "zzur", + ["ꋧ"] = "zzyt", + ["ꋨ"] = "zzyx", + ["ꋩ"] = "zzy", + ["ꋪ"] = "zzyp", + ["ꋫ"] = "zzyrx", + ["ꋬ"] = "zzyr", + + ["ꋭ"] = "nzit", + ["ꋮ"] = "nzix", + ["ꋯ"] = "nzi", + ["ꋰ"] = "nzip", + ["ꋱ"] = "nziex", + ["ꋲ"] = "nzie", + ["ꋳ"] = "nziep", + ["ꋴ"] = "nzat", + ["ꋵ"] = "nzax", + ["ꋶ"] = "nza", + ["ꋷ"] = "nzap", + ["ꋸ"] = "nzuox", + ["ꋹ"] = "nzuo", + ["ꋺ"] = "nzox", + ["ꋻ"] = "nzop", + ["ꋼ"] = "nzex", + ["ꋽ"] = "nze", + ["ꋾ"] = "nzux", + ["ꋿ"] = "nzu", + ["ꌀ"] = "nzup", + ["ꌁ"] = "nzurx", + ["ꌂ"] = "nzur", + ["ꌃ"] = "nzyt", + ["ꌄ"] = "nzyx", + ["ꌅ"] = "nzy", + ["ꌆ"] = "nzyp", + ["ꌇ"] = "nzyrx", + ["ꌈ"] = "nzyr", + + ["ꌉ"] = "sit", + ["ꌊ"] = "six", + ["ꌋ"] = "si", + ["ꌌ"] = "sip", + ["ꌍ"] = "siex", + ["ꌎ"] = "sie", + ["ꌏ"] = "siep", + ["ꌐ"] = "sat", + ["ꌑ"] = "sax", + ["ꌒ"] = "sa", + ["ꌓ"] = "sap", + ["ꌔ"] = "suox", + ["ꌕ"] = "suo", + ["ꌖ"] = "suop", + ["ꌗ"] = "sot", + ["ꌘ"] = "sox", + ["ꌙ"] = "so", + ["ꌚ"] = "sop", + ["ꌛ"] = "sex", + ["ꌜ"] = "se", + ["ꌝ"] = "sep", + ["ꌞ"] = "sut", + ["ꌟ"] = "sux", + ["ꌠ"] = "su", + ["ꌡ"] = "sup", + ["ꌢ"] = "surx", + ["ꌣ"] = "sur", + ["ꌤ"] = "syt", + ["ꌥ"] = "syx", + ["ꌦ"] = "sy", + ["ꌧ"] = "syp", + ["ꌨ"] = "syrx", + ["ꌩ"] = "syr", + + ["ꌪ"] = "ssit", + ["ꌫ"] = "ssix", + ["ꌬ"] = "ssi", + ["ꌭ"] = "ssip", + ["ꌮ"] = "ssiex", + ["ꌯ"] = "ssie", + ["ꌰ"] = "ssiep", + ["ꌱ"] = "ssat", + ["ꌲ"] = "ssax", + ["ꌳ"] = "ssa", + ["ꌴ"] = "ssap", + ["ꌵ"] = "ssot", + ["ꌶ"] = "ssox", + ["ꌷ"] = "sso", + ["ꌸ"] = "ssop", + ["ꌹ"] = "ssex", + ["ꌺ"] = "sse", + ["ꌻ"] = "ssep", + ["ꌼ"] = "ssut", + ["ꌽ"] = "ssux", + ["ꌾ"] = "ssu", + ["ꌿ"] = "ssup", + ["ꍀ"] = "ssyt", + ["ꍁ"] = "ssyx", + ["ꍂ"] = "ssy", + ["ꍃ"] = "ssyp", + ["ꍄ"] = "ssyrx", + ["ꍅ"] = "ssyr", + + ["ꍆ"] = "zhat", + ["ꍇ"] = "zhax", + ["ꍈ"] = "zha", + ["ꍉ"] = "zhap", + ["ꍊ"] = "zhuox", + ["ꍋ"] = "zhuo", + ["ꍌ"] = "zhuop", + ["ꍍ"] = "zhot", + ["ꍎ"] = "zhox", + ["ꍏ"] = "zho", + ["ꍐ"] = "zhop", + ["ꍑ"] = "zhet", + ["ꍒ"] = "zhex", + ["ꍓ"] = "zhe", + ["ꍔ"] = "zhep", + ["ꍕ"] = "zhut", + ["ꍖ"] = "zhux", + ["ꍗ"] = "zhu", + ["ꍘ"] = "zhup", + ["ꍙ"] = "zhurx", + ["ꍚ"] = "zhur", + ["ꍛ"] = "zhyt", + ["ꍜ"] = "zhyx", + ["ꍝ"] = "zhy", + ["ꍞ"] = "zhyp", + ["ꍟ"] = "zhyrx", + ["ꍠ"] = "zhyr", + + ["ꍡ"] = "chat", + ["ꍢ"] = "chax", + ["ꍣ"] = "cha", + ["ꍤ"] = "chap", + ["ꍥ"] = "chuot", + ["ꍦ"] = "chuox", + ["ꍧ"] = "chuo", + ["ꍨ"] = "chuop", + ["ꍩ"] = "chot", + ["ꍪ"] = "chox", + ["ꍫ"] = "cho", + ["ꍬ"] = "chop", + ["ꍭ"] = "chet", + ["ꍮ"] = "chex", + ["ꍯ"] = "che", + ["ꍰ"] = "chep", + ["ꍱ"] = "chux", + ["ꍲ"] = "chu", + ["ꍳ"] = "chup", + ["ꍴ"] = "churx", + ["ꍵ"] = "chur", + ["ꍶ"] = "chyt", + ["ꍷ"] = "chyx", + ["ꍸ"] = "chy", + ["ꍹ"] = "chyp", + ["ꍺ"] = "chyrx", + ["ꍻ"] = "chyr", + + ["ꍼ"] = "rrax", + ["ꍽ"] = "rra", + ["ꍾ"] = "rruox", + ["ꍿ"] = "rruo", + ["ꎀ"] = "rrot", + ["ꎁ"] = "rrox", + ["ꎂ"] = "rro", + ["ꎃ"] = "rrop", + ["ꎄ"] = "rret", + ["ꎅ"] = "rrex", + ["ꎆ"] = "rre", + ["ꎇ"] = "rrep", + ["ꎈ"] = "rrut", + ["ꎉ"] = "rrux", + ["ꎊ"] = "rru", + ["ꎋ"] = "rrup", + ["ꎌ"] = "rrurx", + ["ꎍ"] = "rrur", + ["ꎎ"] = "rryt", + ["ꎏ"] = "rryx", + ["ꎐ"] = "rry", + ["ꎑ"] = "rryp", + ["ꎒ"] = "rryrx", + ["ꎓ"] = "rryr", + + ["ꎔ"] = "nrat", + ["ꎕ"] = "nrax", + ["ꎖ"] = "nra", + ["ꎗ"] = "nrap", + ["ꎘ"] = "nrox", + ["ꎙ"] = "nro", + ["ꎚ"] = "nrop", + ["ꎛ"] = "nret", + ["ꎜ"] = "nrex", + ["ꎝ"] = "nre", + ["ꎞ"] = "nrep", + ["ꎟ"] = "nrut", + ["ꎠ"] = "nrux", + ["ꎡ"] = "nru", + ["ꎢ"] = "nrup", + ["ꎣ"] = "nrurx", + ["ꎤ"] = "nrur", + ["ꎥ"] = "nryt", + ["ꎦ"] = "nryx", + ["ꎧ"] = "nry", + ["ꎨ"] = "nryp", + ["ꎩ"] = "nryrx", + ["ꎪ"] = "nryr", + + ["ꎫ"] = "shat", + ["ꎬ"] = "shax", + ["ꎭ"] = "sha", + ["ꎮ"] = "shap", + ["ꎯ"] = "shuox", + ["ꎰ"] = "shuo", + ["ꎱ"] = "shuop", + ["ꎲ"] = "shot", + ["ꎳ"] = "shox", + ["ꎴ"] = "sho", + ["ꎵ"] = "shop", + ["ꎶ"] = "shet", + ["ꎷ"] = "shex", + ["ꎸ"] = "she", + ["ꎹ"] = "shep", + ["ꎺ"] = "shut", + ["ꎻ"] = "shux", + ["ꎼ"] = "shu", + ["ꎽ"] = "shup", + ["ꎾ"] = "shurx", + ["ꎿ"] = "shur", + ["ꏀ"] = "shyt", + ["ꏁ"] = "shyx", + ["ꏂ"] = "shy", + ["ꏃ"] = "shyp", + ["ꏄ"] = "shyrx", + ["ꏅ"] = "shyr", + + ["ꏆ"] = "rat", + ["ꏇ"] = "rax", + ["ꏈ"] = "ra", + ["ꏉ"] = "rap", + ["ꏊ"] = "ruox", + ["ꏋ"] = "ruo", + ["ꏌ"] = "ruop", + ["ꏍ"] = "rot", + ["ꏎ"] = "rox", + ["ꏏ"] = "ro", + ["ꏐ"] = "rop", + ["ꏑ"] = "rex", + ["ꏒ"] = "re", + ["ꏓ"] = "rep", + ["ꏔ"] = "rut", + ["ꏕ"] = "rux", + ["ꏖ"] = "ru", + ["ꏗ"] = "rup", + ["ꏘ"] = "rurx", + ["ꏙ"] = "rur", + ["ꏚ"] = "ryt", + ["ꏛ"] = "ryx", + ["ꏜ"] = "ry", + ["ꏝ"] = "ryp", + ["ꏞ"] = "ryrx", + ["ꏟ"] = "ryr", + + ["ꏠ"] = "jit", + ["ꏡ"] = "jix", + ["ꏢ"] = "ji", + ["ꏣ"] = "jip", + ["ꏤ"] = "jiet", + ["ꏥ"] = "jiex", + ["ꏦ"] = "jie", + ["ꏧ"] = "jiep", + ["ꏨ"] = "juot", + ["ꏩ"] = "juox", + ["ꏪ"] = "juo", + ["ꏫ"] = "juop", + ["ꏬ"] = "jot", + ["ꏭ"] = "jox", + ["ꏮ"] = "jo", + ["ꏯ"] = "jop", + ["ꏰ"] = "jut", + ["ꏱ"] = "jux", + ["ꏲ"] = "ju", + ["ꏳ"] = "jup", + ["ꏴ"] = "jurx", + ["ꏵ"] = "jur", + ["ꏶ"] = "jyt", + ["ꏷ"] = "jyx", + ["ꏸ"] = "jy", + ["ꏹ"] = "jyp", + ["ꏺ"] = "jyrx", + ["ꏻ"] = "jyr", + + ["ꏼ"] = "qit", + ["ꏽ"] = "qix", + ["ꏾ"] = "qi", + ["ꏿ"] = "qip", + ["ꐀ"] = "qiet", + ["ꐁ"] = "qiex", + ["ꐂ"] = "qie", + ["ꐃ"] = "qiep", + ["ꐄ"] = "quot", + ["ꐅ"] = "quox", + ["ꐆ"] = "quo", + ["ꐇ"] = "quop", + ["ꐈ"] = "qot", + ["ꐉ"] = "qox", + ["ꐊ"] = "qo", + ["ꐋ"] = "qop", + ["ꐌ"] = "qut", + ["ꐍ"] = "qux", + ["ꐎ"] = "qu", + ["ꐏ"] = "qup", + ["ꐐ"] = "qurx", + ["ꐑ"] = "qur", + ["ꐒ"] = "qyt", + ["ꐓ"] = "qyx", + ["ꐔ"] = "qy", + ["ꐕ"] = "qyp", + ["ꐖ"] = "qyrx", + ["ꐗ"] = "qyr", + + ["ꐘ"] = "jjit", + ["ꐙ"] = "jjix", + ["ꐚ"] = "jji", + ["ꐛ"] = "jjip", + ["ꐜ"] = "jjiet", + ["ꐝ"] = "jjiex", + ["ꐞ"] = "jjie", + ["ꐟ"] = "jjiep", + ["ꐠ"] = "jjuox", + ["ꐡ"] = "jjuo", + ["ꐢ"] = "jjuop", + ["ꐣ"] = "jjot", + ["ꐤ"] = "jjox", + ["ꐥ"] = "jjo", + ["ꐦ"] = "jjop", + ["ꐧ"] = "jjut", + ["ꐨ"] = "jjux", + ["ꐩ"] = "jju", + ["ꐪ"] = "jjup", + ["ꐫ"] = "jjurx", + ["ꐬ"] = "jjur", + ["ꐭ"] = "jjyt", + ["ꐮ"] = "jjyx", + ["ꐯ"] = "jjy", + ["ꐰ"] = "jjyp", + + ["ꐱ"] = "njit", + ["ꐲ"] = "njix", + ["ꐳ"] = "nji", + ["ꐴ"] = "njip", + ["ꐵ"] = "njiet", + ["ꐶ"] = "njiex", + ["ꐷ"] = "njie", + ["ꐸ"] = "njiep", + ["ꐹ"] = "njuox", + ["ꐺ"] = "njuo", + ["ꐻ"] = "njot", + ["ꐼ"] = "njox", + ["ꐽ"] = "njo", + ["ꐾ"] = "njop", + ["ꐿ"] = "njux", + ["ꑀ"] = "nju", + ["ꑁ"] = "njup", + ["ꑂ"] = "njurx", + ["ꑃ"] = "njur", + ["ꑄ"] = "njyt", + ["ꑅ"] = "njyx", + ["ꑆ"] = "njy", + ["ꑇ"] = "njyp", + ["ꑈ"] = "njyrx", + ["ꑉ"] = "njyr", + ["ꑊ"] = "nyit", + ["ꑋ"] = "nyix", + ["ꑌ"] = "nyi", + ["ꑍ"] = "nyip", + ["ꑎ"] = "nyiet", + ["ꑏ"] = "nyiex", + ["ꑐ"] = "nyie", + ["ꑑ"] = "nyiep", + ["ꑒ"] = "nyuox", + ["ꑓ"] = "nyuo", + ["ꑔ"] = "nyuop", + ["ꑕ"] = "nyot", + ["ꑖ"] = "nyox", + ["ꑗ"] = "nyo", + ["ꑘ"] = "nyop", + ["ꑙ"] = "nyut", + ["ꑚ"] = "nyux", + ["ꑛ"] = "nyu", + ["ꑜ"] = "nyup", + + ["ꑝ"] = "xit", + ["ꑞ"] = "xix", + ["ꑟ"] = "xi", + ["ꑠ"] = "xip", + ["ꑡ"] = "xiet", + ["ꑢ"] = "xiex", + ["ꑣ"] = "xie", + ["ꑤ"] = "xiep", + ["ꑥ"] = "xuox", + ["ꑦ"] = "xuo", + ["ꑧ"] = "xot", + ["ꑨ"] = "xox", + ["ꑩ"] = "xo", + ["ꑪ"] = "xop", + ["ꑫ"] = "xyt", + ["ꑬ"] = "xyx", + ["ꑭ"] = "xy", + ["ꑮ"] = "xyp", + ["ꑯ"] = "xyrx", + ["ꑰ"] = "xyr", + + ["ꑱ"] = "yit", + ["ꑲ"] = "yix", + ["ꑳ"] = "yi", + ["ꑴ"] = "yip", + ["ꑵ"] = "yiet", + ["ꑶ"] = "yiex", + ["ꑷ"] = "yie", + ["ꑸ"] = "yiep", + ["ꑹ"] = "yuot", + ["ꑺ"] = "yuox", + ["ꑻ"] = "yuo", + ["ꑼ"] = "yuop", + ["ꑽ"] = "yot", + ["ꑾ"] = "yox", + ["ꑿ"] = "yo", + ["ꒀ"] = "yop", + ["ꒁ"] = "yut", + ["ꒂ"] = "yux", + ["ꒃ"] = "yu", + ["ꒄ"] = "yup", + ["ꒅ"] = "yurx", + ["ꒆ"] = "yur", + ["ꒇ"] = "yyt", + ["ꒈ"] = "yyx", + ["ꒉ"] = "yy", + ["ꒊ"] = "yyp", + ["ꒋ"] = "yyrx", + ["ꒌ"] = "yyr" +} + +function export.tr(text, lang, sc) + text = gsub(text, "('?'?'?)(%^?[^'])('?'?'?)", "%1%2%3 ") + text = gsub(text, "%S", convert) + text = gsub(text, "%^(%l)", string.upper) + text = gsub(text, " $", "") + + return (text) +end + +return export diff --git a/wiktra/wikt/translit/inc-ash-translit.lua b/wiktra/wikt/translit/inc-ash-translit.lua new file mode 100644 index 0000000..830091d --- /dev/null +++ b/wiktra/wikt/translit/inc-ash-translit.lua @@ -0,0 +1,23 @@ +local export = {} + +function export.tr(text, lang, sc, debug_mode) + + local out_text + if (sc == "Brah") then + out_text = require("Brah-translit").tr(text, lang, sc, debug_mode) + elseif (sc == "Khar") then + out_text = require("Khar-translit").tr(text, lang, sc, debug_mode) + else + local namespace = mw.title:getCurrentTitle().nsText + if namespace == "Category" then + out_text = nil + else + error("Invalid script for Ashokan Prakrit language.") + end + end + + return out_text + +end + +return export diff --git a/wiktra/wikt/translit/inc-mas-translit.lua b/wiktra/wikt/translit/inc-mas-translit.lua new file mode 100644 index 0000000..aed0468 --- /dev/null +++ b/wiktra/wikt/translit/inc-mas-translit.lua @@ -0,0 +1,215 @@ +-- Transliteration for Assamese +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["ক্ষ"] = "kh", + ["ক"] = "k", + ["খ"] = "kh", + ["গ"] = "g", + ["ঘ"] = "gh", + ["ঙ"] = "ṅ", + ["চ"] = "s", + ["ছ"] = "s", + ["জ"] = "z", + ["ঝ"] = "zh", + ["ঞ"] = "ñ", + ["ট"] = "t", + ["ঠ"] = "th", + ["ড"] = "d", + ["ঢ"] = "dh", + ["ণ"] = "n", + ["ত"] = "t", + ["থ"] = "th", + ["দ"] = "d", + ["ধ"] = "dh", + ["ন"] = "n", + ["প"] = "p", + ["ফ"] = "ph", + ["ব"] = "b", + ["ভ"] = "bh", + ["ম"] = "m", + ["য"] = "z", + ["ৰ"] = "r", + ["ল"] = "l", + ["ৱ"] = "w", + ["শ"] = "x", + ["ষ"] = "x", + ["স"] = "x", + ["হ"] = "h", + ["য়"] = "y", + ["ড়"] = "r", + ["ঢ়"] = "rh", + + -- vowel diacritics + ["’"] = "ö", + ["ি"] = "i", + ["ু"] = "u", + ["ৃ"] = "ri", + ["ে"] = "e", + ["ে’"] = "ë", + ["ো"] = "ü", + ["া"] = "a", + ["ী"] = "i", + ["ূ"] = "u", + ["ৈ"] = "oi", + ["ৌ"] = "ou", + + -- visarga + ["ঃ"] = "o", + + -- vowel signs + ["অ"] = "o", + ["অ’"] = "ó", + ["ই"] = "i", + ["উ"] = "u", + ["ঋ"] = "ri", + ["এ"] = "e", + ["এ’"] = "é", + ["ও"] = "ü", + ["আ"] = "a", + ["ঈ"] = "i", + ["ঊ"] = "u", + ["ঐ"] = "oi", + ["ঔ"] = "ou", + + -- hosonto + ["্"] = "", + + -- sondrobindu + ["ঁ"] = "̃", + + -- owogroho + ["ঽ"] = "’", + + -- onusor + ["ং"] = "ṅ", + + -- hosonto to, + ["ৎ"] = "t", + + -- numerals + ["০"] = "0", + ["১"] = "1", + ["২"] = "2", + ["৩"] = "3", + ["৪"] = "4", + ["৫"] = "5", + ["৬"] = "6", + ["৭"] = "7", + ["৮"] = "8", + ["৯"] = "9", + + -- punctuation + ["।"] = "." -- dari +} + +local conv2 = {["ক্ষ"] = "ḱ", ["খ"] = "ḱ", ["ঘ"] = "ǵ", ["ঙ"] = "ŋ", ["ং"] = "ŋ", ["ঝ"] = "ź", ["ঠ"] = "ṫ", ["থ"] = "ṫ", ["ঢ"] = "ḋ", ["ধ"] = "ḋ", ["ফ"] = "ṗ", ["ভ"] = "ḃ", ["ঢ়"] = "ŕ", ["ৃ"] = "ṙ", ["ঋ"] = "ṙ", ["ৈ"] = "ʏ", ["ঐ"] = "ʏ", ["ৌ"] = "ɵ", ["ঔ"] = "ɵ"} + +local consonant, vowel, vowel_sign = "ক-হড়-য়ৰৱ", "oা-ৌ’", "অ-ঔ" +local c = "[" .. consonant .. "]" +local cc = "়?" .. c +local v = "[" .. vowel .. vowel_sign .. "]" +local syncope_pattern = "(" .. v .. cc .. v .. cc .. ")o(" .. cc .. "ঁ?" .. v .. ")" + +local function rev_string(text) + local result, length = "", mw.ustring.len(text) + for i = 1, length do result = result .. mw.ustring.sub(text, length - i + 1, length - i + 1) end + return result +end + +function export.tr(text, lang, sc, mode) + text = gsub(text, "([^ৰ])্য", "%1্য়") + text = gsub(text, "্ব", "্ৱ") + text = gsub(text, "[শষস]্", "চ্") + text = gsub(text, "্স", "্চ") + text = gsub(text, "[োও]ৱ", "্ও") + text = gsub(text, "ক্ষ", "খ") + text = gsub(text, "’ৱ", "্অ’") + text = gsub(text, "[ুুউ]ৱ(.)", "্উ%1") + text = gsub(text, "[োও]ৱ(.)", "্ও%1") + text = gsub(text, "োঁৱ(.)", "্ওঁ%1") + text = gsub(text, "[ৌঔ]ৱ", "্ঔ") + text = gsub(text, "[িই]য়(.)", "্ই%1") + text = gsub(text, "ৃয়", "্ঋ") + text = gsub(text, "[েএ]য়(.)", "্এ%1") + text = gsub(text, "[ে’এ’]য়", "্এ’") + text = gsub(text, "[ৈঐ]য়(.)", "্ঐ%1") + text = gsub(text, "[ীঈ]য়(.)", "্ঈ%1") + text = gsub(text, "[ীঈ]য়", "্ঈঅ") -- end + text = gsub(text, "[ূূঊ]ৱ", "্ঊ") + text = gsub(text, "݁", "্অ") + text = gsub(text, "ঃ", "্অ") + text = gsub(text, "[࣪ܿ]", "্") + text = gsub(text, "বাৰ", "্বাৰ") + text = gsub(text, "বিলাক", "্বিলাক") + text = gsub(text, "টো", "্টো") + text = gsub(text, "খন", "্খন") + text = gsub(text, "ডাল", "্ডাল") + text = gsub(text, "খিনি", "্খিনি") + text = gsub(text, "জন", "্জন") + text = gsub(text, "জনী", "্জনী") + text = gsub(text, "গৰাকী", "্গৰাকী") + text = gsub(text, "সকল", "্সকল") + text = gsub(text, "কৈ", "্কৈ") + text = gsub(text, "কে", "্কে") + text = gsub(text, "ফাল", "্ফাল") + text = gsub(text, "কেই", "্কেই") + text = gsub(text, "মান", "্মান") + text = gsub(text, "[িীইঈ]ঞ", "্ইঅ͂") + text = gsub(text, "ঞ্", "ন্̃") + + text = gsub(text, "(" .. c .. "়?)([" .. vowel .. "’?্]?)", function(a, b) return a .. (b == "" and "o" or b) end) + + for word in mw.ustring.gmatch(text, "[ঁ-৽o’]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^o(়?" .. c .. ")(ঁ?" .. v .. ")", "%1%2") + while match(word, syncope_pattern) do word = gsub(word, syncope_pattern, "%1%2") end + text = gsub(text, orig_word, rev_string(word)) + end + + if mode == "IPA" then + text = gsub(text, ".[়’]?", conv2) + text = gsub(text, ".", conv2) + end + + text = gsub(text, ".[়’]?", conv) + text = gsub(text, ".", conv) + + local consonants_Latn_no_h = "[b-df-gj-np-tv-z]" + + -- Cw + text = gsub(text, "mw", "mb") -- special case + text = gsub(text, "^(" .. consonants_Latn_no_h .. "h?)w", "%1") -- initial + text = gsub(text, "hw", "hb") + text = gsub(text, "(" .. consonants_Latn_no_h .. ")w", "%1%1") -- medial + + -- zñ + text = gsub(text, "^zñ", "gy") -- initial + text = gsub(text, "zñ", "gg") -- medial + + -- Cy + text = gsub(text, "^khy", "kh") + text = gsub(text, "([aéeióoüu])(" .. consonants_Latn_no_h .. ")y", "%1i%2%2") + + -- final "b" has inherent vowel + text = gsub(text, "b$", "bo") + text = gsub(text, "b ", "bo ") + + -- final r conjuncts + text = gsub(text, "r([kszt])o$", "r%1") + text = gsub(text, "r([kszt])o ", "r%1 ") + text = gsub(text, "rkho$", "rkh") + text = gsub(text, "rkho ", "rkh ") + + if match(text, "[ঁ-৽]") and mode ~= "debug" then + return nil + else + return mw.ustring.toNFC(text) + end +end + +return export diff --git a/wiktra/wikt/translit/inc-mbn-translit.lua b/wiktra/wikt/translit/inc-mbn-translit.lua new file mode 100644 index 0000000..fba4f34 --- /dev/null +++ b/wiktra/wikt/translit/inc-mbn-translit.lua @@ -0,0 +1,50 @@ +local export = {} + +local consonants = {["ক"] = "k", ["খ"] = "kh", ["গ"] = "g", ["ঘ"] = "gh", ["ঙ"] = "ṅ", ["চ"] = "c", ["ছ"] = "ch", ["জ"] = "j", ["ঝ"] = "jh", ["ঞ"] = "ñ", ["ট"] = "ṭ", ["ঠ"] = "ṭh", ["ড"] = "ḍ", ["ড়"] = "ṛ", ["ঢ"] = "ḍh", ["ঢ়"] = "ṛh", ["ণ"] = "ṇ", ["ত"] = "t", ["থ"] = "th", ["দ"] = "d", ["ধ"] = "dh", ["ন"] = "n", ["প"] = "p", ["ফ"] = "ph", ["ব"] = "b", ["ভ"] = "bh", ["ম"] = "m", ["য"] = "j", ["য়"] = "ẏ", ["র"] = "r", ["ল"] = "l", ["শ"] = "ś", ["ষ"] = "ś", ["স"] = "ś", ["হ"] = "h"} + +local diacritics = {["া"] = "ā", ["ি"] = "i", ["ী"] = "i", ["ু"] = "u", ["ূ"] = "u", ["ৃ"] = "ri", ["ে"] = "e", ["ৈ"] = "ôi", ["ো"] = "o", ["ৌ"] = "au", ["্"] = ""} + +local tt = { + -- vowels + ["অ"] = "a", + ["আ"] = "ā", + ["ই"] = "i", + ["ী"] = "i", + ["উ"] = "u", + ["ঊ"] = "u", + ["ঋ"] = "ri", + ["এ"] = "e", + ["ঐ"] = "ai", + ["ও"] = "o", + ["ঔ"] = "au", + + -- candrabindu + ["ঁ"] = "̃", + + -- anusvara + ["ং"] = "ṅ", + + -- visarga + ["ঃ"] = "ḥ", + + -- punctuation + ["।"] = "." -- danda +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([কখগঘঙচছজঝঞটঠডঢণতথদধনপফবরভমযলশষসহ]়?)" .. "([a্িুৃেোাীূৈৌ]?)", function(c, d) + if not consonants[c] then return c end + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text + +end + +return export diff --git a/wiktra/wikt/translit/inc-oas-translit.lua b/wiktra/wikt/translit/inc-oas-translit.lua new file mode 100644 index 0000000..e60eb4b --- /dev/null +++ b/wiktra/wikt/translit/inc-oas-translit.lua @@ -0,0 +1,65 @@ +local export = {} + +local consonants = {["ক"] = "k", ["খ"] = "kh", ["গ"] = "g", ["ঘ"] = "gh", ["ঙ"] = "ṅ", ["চ"] = "c", ["ছ"] = "ch", ["জ"] = "j", ["ঝ"] = "jh", ["ঞ"] = "ñ", ["ট"] = "ṭ", ["ঠ"] = "ṭh", ["ড"] = "ḍ", ["ঢ"] = "ḍh", ["ণ"] = "ṇ", ["ত"] = "t", ["থ"] = "th", ["দ"] = "d", ["ধ"] = "dh", ["ন"] = "n", ["প"] = "p", ["ফ"] = "ph", ["ব"] = "b", ["ভ"] = "bh", ["ম"] = "m", ["য"] = "j", ["ৰ"] = "r", ["ল"] = "l", ["ৱ"] = "w", ["শ"] = "ś", ["ষ"] = "ṣ", ["স"] = "s", ["হ"] = "h", ["ড়"] = "ṛ", ["ঢ়"] = "ṛh", ["য়"] = "y", ["ক্ষ"] = "khy"} + +local diacritics = {["া"] = "a", ["ি"] = "i", ["ী"] = "ī", ["ু"] = "u", ["ূ"] = "ū", ["ৃ"] = "ṛ", ["ৄ"] = "ṝ", ["ৢ"] = "ḷ", ["ৣ"] = "ḹ", ["ে"] = "e", ["ৈ"] = "oi", ["ো"] = "ö", ["ৌ"] = "ou", ["্"] = ""} + +local tt = { + -- vowels + ["অ"] = "o", + ["আ"] = "a", + ["ই"] = "i", + ["ঈ"] = "ī", + ["উ"] = "u", + ["ঊ"] = "ū", + ["ঋ"] = "ṛ", + ["ৠ"] = "ṝ", + ["ঌ"] = "ḷ", + ["ৡ"] = "ḹ", + ["এ"] = "e", + ["ঐ"] = "oi", + ["ও"] = "ö", + ["ঔ"] = "ou", + -- chandrabindu + ["ঁ"] = "̐", -- until a better method is found + -- anusvara + ["ং"] = "ṅ", -- until a better method is found + -- visarga + ["ঃ"] = "ḥ", + -- avagraha + ["ঽ"] = "’", + -- + ["ৎ"] = "t", + -- numerals + ["০"] = "0", + ["১"] = "1", + ["২"] = "2", + ["৩"] = "3", + ["৪"] = "4", + ["৫"] = "5", + ["৬"] = "6", + ["৭"] = "7", + ["৮"] = "8", + ["৯"] = "9", + -- punctuation + ["॥"] = ".", -- double danda + ["।"] = ",", + -- reconstructed + ["*"] = "" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([কখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযৰলৱশষসহ]়?)" .. "([ািীুূৃৄৢৣেৈোৌ্]?)", function(c, d) + if d == "" then + return consonants[c] .. "o" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/inc-pra-Deva-translit.lua b/wiktra/wikt/translit/inc-pra-Deva-translit.lua new file mode 100644 index 0000000..38d3157 --- /dev/null +++ b/wiktra/wikt/translit/inc-pra-Deva-translit.lua @@ -0,0 +1,118 @@ +local export = {} + +local consonants = { + -- consonants + ["क"] = "k", + ["ख"] = "kh", + ["ग"] = "g", + ["घ"] = "gh", + ["ङ"] = "ṅ", + ["च"] = "c", + ["छ"] = "ch", + ["ज"] = "j", + ["झ"] = "jh", + ["ञ"] = "ñ", + ["ट"] = "ṭ", + ["ठ"] = "ṭh", + ["ड"] = "ḍ", + ["ढ"] = "ḍh", + ["ण"] = "ṇ", + ["त"] = "t", + ["थ"] = "th", + ["द"] = "d", + ["ध"] = "dh", + ["न"] = "n", + ["प"] = "p", + ["फ"] = "ph", + ["ब"] = "b", + ["भ"] = "bh", + ["म"] = "m", + ["य"] = "y", + ["र"] = "r", + ["ल"] = "l", + ["व"] = "v", + ["ळ"] = "ḷ", + ["श"] = "ś", + ["ष"] = "ṣ", + ["स"] = "s", + ["ह"] = "h" +} + +local diacritics = { + -- matras + ["ा"] = "ā", + ["ि"] = "i", + ["ी"] = "ī", + ["ु"] = "u", + ["ू"] = "ū", + ["ृ"] = "ṛ", + ["ॄ"] = "ṝ", + ["ॢ"] = "l̥", + ["ॣ"] = "l̥̄", + ["े"] = "e", + ["ै"] = "ai", + ["ो"] = "o", + ["ौ"] = "au", + ["्"] = "" +} + +local tt = { + + -- vowels + ["अ"] = "a", + ["आ"] = "ā", + ["इ"] = "i", + ["ई"] = "ī", + ["उ"] = "u", + ["ऊ"] = "ū", + ["ऋ"] = "ṛ", + ["ॠ"] = "ṝ", + ["ऌ"] = "l̥", + ["ॡ"] = "l̥̄", + ["ए"] = "e", + ["ऐ"] = "ai", + ["ओ"] = "o", + ["औ"] = "au", + -- chandrabindu + ["ँ"] = "m̐", -- until a better method is found + -- anusvara + ["ं"] = "ṃ", -- until a better method is found + -- visarga + ["ः"] = "ḥ", + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + -- punctuation + ["।"] = ".", -- danda + ["॥"] = "." -- double danda +} + +function export.tr(text, lang, sc) + if sc ~= "Deva" then return nil end + + text = mw.ustring.gsub(text, "([क-ह])" .. "([ािीुूृॄॢॣेैोौ्]?)" .. "([अ-औ]?)", function(c, d, e) + if d == "" and e ~= "" then + return consonants[c] .. "a" .. tt[e] .. "̈" + elseif e ~= "" then + return consonants[c] .. diacritics[d] .. tt[e] + elseif d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/inc-pra-Knda-translit.lua b/wiktra/wikt/translit/inc-pra-Knda-translit.lua new file mode 100644 index 0000000..e26521a --- /dev/null +++ b/wiktra/wikt/translit/inc-pra-Knda-translit.lua @@ -0,0 +1,118 @@ +local export = {} + +local consonants = { + -- consonants + ["ಕ"] = "k", + ["ಖ"] = "kh", + ["ಗ"] = "g", + ["ಘ"] = "gh", + ["ಙ"] = "ṅ", + ["ಚ"] = "c", + ["ಛ"] = "ch", + ["ಜ"] = "j", + ["ಝ"] = "jh", + ["ಞ"] = "ñ", + ["ಟ"] = "ṭ", + ["ಠ"] = "ṭh", + ["ಡ"] = "ḍ", + ["ಢ"] = "ḍh", + ["ಣ"] = "ṇ", + ["ತ"] = "t", + ["ಥ"] = "th", + ["ದ"] = "d", + ["ಧ"] = "dh", + ["ನ"] = "n", + ["ಪ"] = "p", + ["ಫ"] = "ph", + ["ಬ"] = "b", + ["ಭ"] = "bh", + ["ಮ"] = "m", + ["ಯ"] = "y", + ["ರ"] = "r", + ["ಲ"] = "l", + ["ವ"] = "v", + ["ಳ"] = "ḷ", + ["ಶ"] = "ś", + ["ಷ"] = "ṣ", + ["ಸ"] = "s", + ["ಹ"] = "h" +} + +local diacritics = { + -- matras + ["ಾ"] = "ā", + ["ಿ"] = "i", + ["ೀ"] = "ī", + ["ು"] = "u", + ["ೂ"] = "ū", + ["ೃ"] = "ṛ", + ["ೄ"] = "ṝ", + ["ೢ"] = "l̥", + ["ೣ"] = "l̥̄", + ["ೇ"] = "e", + ["ೈ"] = "ai", + ["ೋ"] = "o", + ["ೌ"] = "au", + ["್"] = "" +} + +local tt = { + + -- vowels + ["ಅ"] = "a", + ["ಆ"] = "ā", + ["ಇ"] = "i", + ["ಈ"] = "ī", + ["ಉ"] = "u", + ["ಊ"] = "ū", + ["ಋ"] = "ṛ", + ["ೠ"] = "ṝ", + ["ಌ"] = "l̥", + ["ೡ"] = "l̥̄", + ["ಏ"] = "e", + ["ಐ"] = "ai", + ["ಓ"] = "o", + ["ಔ"] = "au", + -- chandrabindu + ["ಁ"] = "m̐", -- until a better method is found + -- anusvara + ["ಂ"] = "ṃ", -- until a better method is found + -- visarga + ["ಃ"] = "ḥ", + -- numerals + ["೦"] = "0", + ["೧"] = "1", + ["೨"] = "2", + ["೩"] = "3", + ["೪"] = "4", + ["೪"] = "5", + ["೬"] = "6", + ["೭"] = "7", + ["೮"] = "8", + ["೯"] = "9", + -- punctuation + ["।"] = ".", -- danda + ["॥"] = "." -- double danda +} + +function export.tr(text, lang, sc) + if sc ~= "Knda" then return nil end + + text = mw.ustring.gsub(text, "([ಕ-ಹ])" .. "([ಾಿೀುೂೃೄೢೣೇೈೋೌ್]?)" .. "([ಅ-ಔ]?)", function(c, d, e) + if d == "" and e ~= "" then + return consonants[c] .. "a" .. tt[e] .. "̈" + elseif e ~= "" then + return consonants[c] .. diacritics[d] .. tt[e] + elseif d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/inh-translit.lua b/wiktra/wikt/translit/inh-translit.lua new file mode 100644 index 0000000..2f522c3 --- /dev/null +++ b/wiktra/wikt/translit/inh-translit.lua @@ -0,0 +1,91 @@ +local export = {} + +local mapping1 = { + ["а"] = "a", + ["б"] = "b", + ["в"] = "w", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "ë", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["й"] = "j", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ф"] = "f", + ["х"] = "χ", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "šč", + ["ъ"] = "ʾ", + ["ы"] = "ə", + ["ь"] = "’", + ["э"] = "è", + ["ю"] = "ju", + ["я"] = "ja", + ["ӏ"] = "ʿ", + ["А"] = "A", + ["Б"] = "B", + ["В"] = "W", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Ë", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "Χ", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Šč", + ["Ъ"] = "ʾ", + ["Ы"] = "Ə", + ["Ь"] = "’", + ["Э"] = "È", + ["Ю"] = "Ju", + ["Я"] = "Ja", + ["Ӏ"] = "ʿ" +} + +local mapping2 = {["аь"] = "ä", ["гӏ"] = "ġ", ["кх"] = "q", ["къ"] = "q̇", ["кӏ"] = "ḳ", ["пӏ"] = "ṗ", ["тӏ"] = "ṭ", ["цӏ"] = "c̣", ["чӏ"] = "č̣", ["яь"] = "jä", ["ий"] = "ī", ["ув"] = "ū", ["хь"] = "ḥ", ["хӏ"] = "h", ["Аь"] = "Ä", ["Гӏ"] = "Ġ", ["Кх"] = "Q", ["Къ"] = "Q̇", ["Кӏ"] = "Ḳ", ["Пӏ"] = "Ṗ", ["Тӏ"] = "Ṭ", ["Цӏ"] = "C̣", ["Чӏ"] = "Č̣", ["Яь"] = "Jä", ["Ий"] = "Ī", ["Ув"] = "Ū", ["Хь"] = "Ḥ", ["Хӏ"] = "H"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" + + -- Convert capital to lowercase palochka. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + text = str_gsub(text, "рхӏ", "ρ") + text = str_gsub(text, "Рхӏ", "Ρ") + for pat, repl in pairs(mapping2) do text = str_gsub(text, pat, repl) end + text = str_gsub(text, UTF8_char, mapping1) + + return text +end + +return export diff --git a/wiktra/wikt/translit/ital-translit.lua b/wiktra/wikt/translit/ital-translit.lua new file mode 100644 index 0000000..38f88a4 --- /dev/null +++ b/wiktra/wikt/translit/ital-translit.lua @@ -0,0 +1,120 @@ +local export = {} + +-- Standard transcription +local common_rules = { + ["𐌀"] = "a", + ["𐌁"] = "b", + ["𐌂"] = "c", + ["𐌃"] = "d", + ["𐌄"] = "e", + ["𐌅"] = "v", + ["𐌆"] = "z", + ["𐌇"] = "h", + ["𐌈"] = "θ", + ["𐌉"] = "i", + ["𐌊"] = "k", + ["𐌋"] = "l", + ["𐌌"] = "m", + ["𐌍"] = "n", + ["𐌎"] = "š", + ["𐌏"] = "o", + ["𐌐"] = "p", + ["𐌑"] = "ś", + ["𐌒"] = "q", + ["𐌓"] = "r", + ["𐌔"] = "s", + ["𐌕"] = "t", + ["𐌖"] = "u", + ["𐌗"] = "x", + ["𐌘"] = "φ", + ["𐌙"] = "χ", + ["𐌚"] = "f", + ["𐌛"] = "ř", + ["𐌜"] = "ç", + ["𐌝"] = "í", + ["𐌞"] = "ú", + ["𐌟"] = "k", + -- Numerals + ["𐌠"] = "Ⅰ", + ["𐌡"] = "Ⅴ", + ["𐌢"] = "Ⅹ", + ["𐌣"] = "Ⅼ", + -- Punctuation + ["·"] = " ", + ["⁚"] = " ", + ["⁝"] = " " +} + +local lang_rules = { + ["ett"] = { -- Etruscan + ["𐌟"] = "Ⅽ" + }, + + ["itc-ola"] = { -- Old Latin + ["𐌅"] = "f" + }, + + ["nrc"] = { -- Noric + ["𐌂"] = "g", + ["𐌈"] = "d", + ["𐌙"] = "g" + }, + + ["nrp"] = { -- North Picene + ["𐌂"] = "g" + }, + + ["osc"] = { -- Oscan + ["𐌂"] = "g" + }, + + ["spx"] = { -- South Picene + ["𐌂"] = "g", + ["𐌑"] = "í", + ["·"] = "o", + ["⁚"] = "f" + }, + + ["xcc"] = { -- Camunic + ["𐌁"] = "ś", + ["𐌂"] = "g", + ["𐌑"] = "b", + ["𐌙"] = "s", + ["𐌟"] = "þþ", + ["𐌣"] = "þ" + }, + + ["xrr"] = { -- Raetic + ["𐌁"] = "þ", + ["𐌂"] = "?" + }, + + ["xum"] = { -- Umbrian + ["𐌈"] = "t" + }, + + ["xve"] = { -- Venetic + ["𐌂"] = "j", + ["𐌆"] = "d", + ["𐌇𐌅"] = "f", + ["𐌘"] = "b", + ["𐌙"] = "g" + } +} + +function export.tr(text, lang, sc) + -- If the script is not Ital, do not transliterate + if sc ~= "Ital" then return end + + -- Transliterate language-specific exceptions + if lang == "xve" then text = mw.ustring.gsub(text, "𐌇𐌅", "f") end + + if lang_rules[lang] then text = mw.ustring.gsub(text, ".", lang_rules[lang]) end + + -- Transliterate remaining characters + text = mw.ustring.gsub(text, ".", common_rules) + + return text +end + +return export diff --git a/wiktra/wikt/translit/iu-translit.lua b/wiktra/wikt/translit/iu-translit.lua new file mode 100644 index 0000000..70ea42d --- /dev/null +++ b/wiktra/wikt/translit/iu-translit.lua @@ -0,0 +1,13 @@ +local export = {} + +function export.tr(text, lang, sc) + text = require("Cans-translit").tr(text, lang, sc) + + local repl = {["ī"] = "ii", ["ō"] = "uu", ["ā"] = "aa", ["š"] = "sh", ["ð"] = "th", ["e"] = "ai", ["o"] = "u", ["y"] = "j", ["f"] = "v", ["c"] = "g"} + + for char, replacement in pairs(repl) do text = mw.ustring.gsub(text, char, replacement) end + + return text +end + +return export diff --git a/wiktra/wikt/translit/jdt-translit.lua b/wiktra/wikt/translit/jdt-translit.lua new file mode 100644 index 0000000..c6757c8 --- /dev/null +++ b/wiktra/wikt/translit/jdt-translit.lua @@ -0,0 +1,127 @@ +local export = {} + +local tt_he_a2 = {["או"] = "y", ["אי"] = "e", ["אָ"] = "o", ["אַ"] = "a", ["אִ"] = "i"} + +local tt_he_dg = {["כּ"] = "k", ["ךּ"] = "k", ["בּ"] = "b", ["פּ"] = "p", ["ףּ"] = "p", ["ג׳"] = "c", ["ז׳"] = "ç", ["ג'"] = "c", ["ז'"] = "ç"} + +local tt_he = {["א"] = "ə", ["ב"] = "v", ["ג"] = "g", ["ד"] = "d", ["ה"] = "h", ["ז"] = "z", ["ח"] = "ħ", ["י"] = "j", ["כ"] = "x", ["ך"] = "x", ["ל"] = "l", ["מ"] = "m", ["ם"] = "m", ["נ"] = "n", ["ן"] = "n", ["ס"] = "s", ["ע"] = "ḩ", ["פ"] = "f", ["ף"] = "f", ["צ"] = "c", ["ץ"] = "c", ["ק"] = "q", ["ר"] = "r", ["ש"] = "ş", ["ת"] = "t", ["ת"] = "t", ["׃"] = ":", ["׳"] = "'", ["״"] = "\"", ["־"] = "-"} + +local tt_cy_2 = { + ["ГЬ"] = "H", + ["Гь"] = "H", + ["гЬ"] = "h", + ["гь"] = "h", + ["ГӀ"] = "Ḩ", -- palochka + ["Гӏ"] = "Ḩ", -- palochka (rare lowercase) + ["ГI"] = "Ḩ", -- Latn I + ["ГІ"] = "Ḩ", -- Cyrl І + ["Гi"] = "Ḩ", -- Latn i + ["Гі"] = "Ḩ", -- Cyrl і + ["гӀ"] = "ḩ", -- palochka + ["гӏ"] = "ḩ", -- palochka (rare lowercase) + ["гI"] = "ḩ", -- Latn I + ["гІ"] = "ḩ", -- Cyrl І + ["гi"] = "ḩ", -- Latn i + ["гі"] = "ḩ", -- Cyrl і + ["ХЬ"] = "Ħ", + ["Хь"] = "Ħ", + ["хЬ"] = "ħ", + ["хь"] = "ħ", + ["ГЪ"] = "Q", + ["Гъ"] = "Q", + ["гЪ"] = "q", + ["гъ"] = "q", + ["УЬ"] = "Y", + ["Уь"] = "Y", + ["уЬ"] = "y", + ["уь"] = "y" +} + +local tt_cy = {["А"] = "A", ["а"] = "a", ["Б"] = "B", ["б"] = "b", ["Ч"] = "C", ["ч"] = "c", ["Ж"] = "Ç", ["ж"] = "ç", ["Д"] = "D", ["д"] = "d", ["Е"] = "E", ["е"] = "e", ["Э"] = "Ə", ["э"] = "ə", ["Ф"] = "F", ["ф"] = "f", ["Г"] = "G", ["г"] = "g", ["И"] = "I", ["и"] = "i", ["Й"] = "J", ["й"] = "j", ["К"] = "K", ["к"] = "k", ["Л"] = "L", ["л"] = "l", ["М"] = "M", ["м"] = "m", ["Н"] = "N", ["н"] = "n", ["О"] = "O", ["о"] = "o", ["П"] = "P", ["п"] = "p", ["Р"] = "R", ["р"] = "r", ["С"] = "S", ["с"] = "s", ["Ш"] = "Ş", ["ш"] = "ş", ["Т"] = "T", ["т"] = "t", ["У"] = "U", ["у"] = "u", ["В"] = "V", ["в"] = "v", ["Х"] = "X", ["х"] = "x", ["З"] = "Z", ["з"] = "z"} + +local tt_ipa = {["A"] = "a", ["a"] = "a", ["B"] = "b", ["b"] = "b", ["C"] = "tʃ", ["c"] = "tʃ", ["Ç"] = "dʒ", ["ç"] = "dʒ", ["D"] = "d", ["d"] = "d", ["E"] = "ɛ", ["e"] = "ɛ", ["Ə"] = "æ", ["ə"] = "æ", ["F"] = "f", ["f"] = "f", ["G"] = "ɡ", ["g"] = "ɡ", ["H"] = "h", ["h"] = "h", ["Ḩ"] = "ʕ", ["ḩ"] = "ʕ", ["Ħ"] = "ħ", ["ħ"] = "ħ", ["I"] = "ɪ", ["i"] = "ɪ", ["J"] = "j", ["j"] = "j", ["K"] = "k", ["k"] = "k", ["L"] = "l", ["l"] = "l", ["M"] = "m", ["m"] = "m", ["N"] = "n", ["n"] = "n", ["O"] = "o", ["o"] = "o", ["P"] = "p", ["p"] = "p", ["Q"] = "ɢ", ["q"] = "ɢ", ["R"] = "ɾ", ["r"] = "ɾ", ["S"] = "s", ["s"] = "s", ["Ş"] = "ʃ", ["ş"] = "ʃ", ["T"] = "t", ["t"] = "t", ["U"] = "u", ["u"] = "u", ["V"] = "v", ["v"] = "v", ["X"] = "χ", ["x"] = "χ", ["Y"] = "y", ["y"] = "y", ["Z"] = "z", ["z"] = "z"} + +local tt_tocy = {["A"] = "А", ["a"] = "а", ["B"] = "Б", ["b"] = "б", ["C"] = "Ч", ["c"] = "ч", ["Ç"] = "Ж", ["ç"] = "ж", ["D"] = "d", ["d"] = "д", ["E"] = "Е", ["e"] = "е", ["Ə"] = "Э", ["ə"] = "э", ["F"] = "Ф", ["f"] = "ф", ["G"] = "Г", ["g"] = "г", ["H"] = "Гь", ["h"] = "гь", ["Ḩ"] = "ГӀ", ["ḩ"] = "гӀ", ["Ħ"] = "Хь", ["ħ"] = "хь", ["I"] = "И", ["i"] = "и", ["J"] = "Й", ["j"] = "й", ["K"] = "К", ["k"] = "к", ["L"] = "Л", ["l"] = "л", ["M"] = "М", ["m"] = "м", ["N"] = "Н", ["n"] = "н", ["O"] = "О", ["o"] = "о", ["P"] = "П", ["p"] = "п", ["Q"] = "Гъ", ["q"] = "гъ", ["R"] = "Р", ["r"] = "р", ["S"] = "С", ["s"] = "с", ["Ş"] = "Ш", ["ş"] = "ш", ["T"] = "Т", ["t"] = "т", ["U"] = "У", ["u"] = "у", ["V"] = "В", ["v"] = "в", ["X"] = "Х", ["x"] = "х", ["Y"] = "Уь", ["y"] = "уь", ["Z"] = "З", ["z"] = "з"} + +local tt_tohe = {["A"] = "אַ", ["a"] = "אַ", ["B"] = "בּ", ["b"] = "בּ", ["C"] = "ג׳", ["c"] = "ג׳", ["Ç"] = "ז׳", ["ç"] = "ז׳", ["D"] = "ד", ["d"] = "ד", ["E"] = "אי", ["e"] = "אי", ["Ə"] = "א", ["ə"] = "א", ["F"] = "פ", ["f"] = "פ", ["G"] = "ג", ["g"] = "ג", ["H"] = "ה", ["h"] = "ה", ["Ḩ"] = "ע", ["ḩ"] = "ע", ["Ħ"] = "ח", ["ħ"] = "ח", ["I"] = "אִ", ["i"] = "אִ", ["J"] = "י", ["j"] = "י", ["K"] = "כּ", ["k"] = "כּ", ["L"] = "ל", ["l"] = "ל", ["M"] = "מ", ["m"] = "מ", ["N"] = "נ", ["n"] = "נ", ["O"] = "אָ", ["o"] = "אָ", ["P"] = "פּ", ["p"] = "פּ", ["Q"] = "ק", ["q"] = "ק", ["R"] = "ר", ["r"] = "ר", ["S"] = "ס", ["s"] = "ס", ["Ş"] = "ש", ["ş"] = "ש", ["T"] = "ת", ["t"] = "ת", ["U"] = "אוּ", ["u"] = "אוּ", ["V"] = "ב", ["v"] = "ב", ["X"] = "כ", ["x"] = "כ", ["Y"] = "או", ["y"] = "או", ["Z"] = "ז", ["z"] = "ז"} + +-- Keep in mind RTL issues when viewing the following: +local he_to_final = {["כ"] = "ך", ["מ"] = "ם", ["נ"] = "ן", ["פ"] = "ף", ["צ"] = "ץ"} + +local function tr_he(text) + text = mw.ustring.gsub(text, "אוּ", "u") + text = mw.ustring.gsub(text, ".%f[^א].", tt_he_a2) + text = mw.ustring.gsub(text, ".%f[׳ּ'].", tt_he_dg) + text = mw.ustring.gsub(text, ".", tt_he) + return text +end + +local function tr_cy(text) + text = mw.ustring.gsub(text, ".%f[ӀIІӏiіЬьЪъ].", tt_cy_2) + text = mw.ustring.gsub(text, ".", tt_cy) + return text +end + +function export.tr(text, lang, sc) + if not sc then sc = require("scripts").findBestScript(text, require("languages").getByCode(lang or "jdt")):getCode() end + + if sc == "Hebr" then + text = tr_he(text) + elseif sc == "Cyrl" then + text = tr_cy(text) + else + text = nil + end + + return text +end + +function export.ipa(text, lang, sc) + if type(text) == "table" then + local args = text:getParent().args + text = args[1] ~= "" and args[1] or mw.title.getCurrentTitle().subpageText + lang = args["lang"] ~= "" and args["lang"] or "jdt" + sc = args["sc"] ~= "" and args["sc"] + end + return (mw.ustring.gsub(export.tr(text, lang, sc) or text, ".", tt_ipa)) +end + +function export.la(text, lang, sc) + if type(text) == "table" then + local args = text:getParent().args + text = args[1] ~= "" and args[1] or mw.title.getCurrentTitle().subpageText + lang = args["lang"] ~= "" and args["lang"] or "jdt" + sc = args["sc"] ~= "" and args["sc"] + end + return (export.tr(text, lang, sc) or text) +end + +function export.cy(text, lang, sc) + if type(text) == "table" then + local args = text:getParent().args + text = args[1] ~= "" and args[1] or mw.title.getCurrentTitle().subpageText + lang = args["lang"] ~= "" and args["lang"] or "jdt" + sc = args["sc"] ~= "" and args["sc"] + end + return (mw.ustring.gsub(export.tr(text, lang, sc) or text, ".", tt_tocy)) +end + +local function he_finals_replacer(letter, rest) return (he_to_finals[letter] .. rest) end + +local function he_finals(text) + -- Keep in mind RTL issues when viewing the following pattern: + text = mw.ustring.gsub(text, "([כמנפצ])([^א-ת]-%f[%s־-])", he_finals_replacer) + return text +end + +function export.he(text, lang, sc) + if type(text) == "table" then + local args = text:getParent().args + text = args[1] ~= "" and args[1] or mw.title.getCurrentTitle().subpageText + lang = args["lang"] ~= "" and args["lang"] or "jdt" + sc = args["sc"] ~= "" and args["sc"] + end + return he_finals(mw.ustring.gsub(export.tr(text, lang, sc) or text, ".", tt_tohe)) +end + +return export diff --git a/wiktra/wikt/translit/jv-translit.lua b/wiktra/wikt/translit/jv-translit.lua new file mode 100644 index 0000000..7d5df2a --- /dev/null +++ b/wiktra/wikt/translit/jv-translit.lua @@ -0,0 +1,182 @@ +local export = {} + +local conv = { + -- finals (U+A980 - U+A983): + + ["ꦀ"] = "m", + ["ꦁ"] = "ng", + ["ꦂ"] = "r", + ["ꦃ"] = "h", + + -- independent vowels (U+A984 - U+A98E): + + ["ꦄ"] = "a", + ["ꦄꦴ"] = "a", + ["ꦅ"] = "i", + ["ꦆ"] = "i", + ["ꦇ"] = "i", + ["ꦈ"] = "u", + ["ꦈꦴ"] = "u", + ["ꦉ"] = "re", + ["ꦉꦴ"] = "reu", + ["ꦊ"] = "le", + ["ꦋ"] = "leu", + ["ꦌ"] = "e", + ["ꦍ"] = "ai", + ["ꦎ"] = "o", + + -- independent consonants (U+A98F - U+A9B2): + + ["ꦏ"] = "k", + ["ꦏ꦳"] = "kh", + ["ꦐ"] = "q", + ["ꦑ"] = "kh", + ["ꦒ"] = "g", + ["ꦒ꦳"] = "gh", + ["ꦓ"] = "gh", + ["ꦔ"] = "ng", + ["ꦔ꦳"] = "'", + + ["ꦕ"] = "c", + ["ꦖ"] = "ch", + ["ꦗ"] = "j", + ["ꦗ꦳"] = "z", + ["ꦘ"] = "jny", + ["ꦙ"] = "jh", + ["ꦚ"] = "ny", + + ["ꦛ"] = "th", + ["ꦜ"] = "th", + ["ꦝ"] = "dh", + ["ꦞ"] = "dh", + ["ꦟ"] = "nn", + + ["ꦠ"] = "t", + ["ꦡ"] = "th", + ["ꦢ"] = "d", + ["ꦢ꦳"] = "dz", + ["ꦣ"] = "dh", + ["ꦤ"] = "n", + + ["ꦥ"] = "p", + ["ꦥ꦳"] = "f", + ["ꦦ"] = "ph", + ["ꦧ"] = "b", + ["ꦨ"] = "bh", + ["ꦩ"] = "m", + + ["ꦪ"] = "y", + ["ꦫ"] = "r", + ["ꦬ"] = "r", + ["ꦭ"] = "l", + ["ꦮ"] = "w", + ["ꦮ꦳"] = "v", + ["ꦯ"] = "sh", + ["ꦰ"] = "ss", + ["ꦱ"] = "s", + ["ꦱ꦳"] = "sy", + ["ꦲ"] = "h", + ["ꦲ꦳"] = "h", + + -- cecak_telu/nukta (U+A9B3): + + ["꦳"] = "", + + -- dependent vowels (U+A9B4 - A9BD): + + ["ꦴ"] = "a", -- tarung + ["ꦵ"] = "o", + ["ꦶ"] = "i", + ["ꦷ"] = "i", + ["ꦸ"] = "u", + ["ꦹ"] = "ū", + ["ꦺ"] = "é", + ["ꦺꦴ"] = "o", + ["ꦻ"] = "ai", + ["ꦻꦴ"] = "au", + ["ꦼ"] = "e", + ["ꦽ"] = "re", + ["ꦽꦴ"] = "reu", + + -- medials (U+A9BE - U+A9BF): + ["ꦾ"] = "y", + ["ꦿ"] = "r", + + -- pangkon/virama (U+A9C0): + + ["꧀"] = "", + + -- punctuation (U+A9C1 - U+A9CF): + + ["꧁"] = "(starts title)", + ["꧂"] = "(ends title)", + ["꧃"] = "(letter to younger age or lower rank)", + ["꧄"] = "(letter to equal age or equal rank)", + ["꧅"] = "(letter to older age or higher rank)", + ["꧆"] = "(pada windu)", + ["꧇"] = ":", -- number indicator + ["꧈"] = ",", + ["꧉"] = ".", + ["꧊"] = "\"", + ["꧋"] = "//", + ["꧌"] = "(", + ["꧍"] = ")", + ["ꧏ"] = "2", + + -- digits (U+A9D0 - U+A9D9): + + ["꧐"] = "0", + ["꧑"] = "1", + ["꧒"] = "2", + ["꧓"] = "3", + ["꧔"] = "4", + ["꧕"] = "5", + ["꧖"] = "6", + ["꧗"] = "7", + ["꧘"] = "8", + ["꧙"] = "9", + + -- ellipsis (U+A9DE - U+A9DF): + + ["꧞"] = "-", + ["꧟"] = "-" +} + +function export.tr(text, lang, sc) + local CSVC = {initial = "([ꦏ-ꦲ]꦳?)", medial = "([ꦾꦿ]?)", nucleus = "([ꦴ-ꦽ꧀]?ꦴ?)", final = "([ꦀ-ꦃ]?)"} + local VC = {nucleus = "([ꦄ-ꦎ]ꦴ?)", final = "([ꦀ-ꦃ]*)"} + + local number_indicator = "꧇" + local digits = "[꧐-꧙]" + + local initial = true + + text = mw.ustring.gsub(text, CSVC.initial .. CSVC.medial .. CSVC.nucleus .. CSVC.final, function(a, b, c, d) + a = conv[a] or error("Initial not recognized: " .. a) + b = b == "" and "" or conv[b] or error("Medial not recognized: " .. b) + c = c == "" and "a" or conv[c] or error("Nucleus not recognized: " .. c) + d = d == "" and "" or conv[d] or error("Final not recognized: " .. d) + if initial and a == "h" then a = "" end + initial = false + return a .. b .. c .. d + end) + + text = mw.ustring.gsub(text, VC.nucleus .. VC.final, function(a, b) + a = conv[a] + b = (b == "" and "" or conv[b]) + initial = false + return a .. b + end) + + text = mw.ustring.gsub(text, number_indicator .. "(" .. digits .. "+)" .. number_indicator, function(a) + a = mw.ustring.gsub(a, ".", conv) + initial = true + return a + end) + + text = mw.ustring.gsub(text, ".", conv) + + return text +end + +return export diff --git a/wiktra/wikt/translit/kaa-translit.lua b/wiktra/wikt/translit/kaa-translit.lua new file mode 100644 index 0000000..afaae11 --- /dev/null +++ b/wiktra/wikt/translit/kaa-translit.lua @@ -0,0 +1,90 @@ +local export = {} + +local tt = { + ["ү"] = "ü", + ["Ү"] = "Ü", + ["т"] = "t", + ["Т"] = "T", + ["Ў"] = "W", + ["ў"] = "w", + ["р"] = "r", + ["Р"] = "R", + ["ф"] = "f", + ["Ф"] = "F", + ["ю"] = "yu", + ["Ю"] = "Yu", + ["ш"] = "ş", + ["Ш"] = "Ş", + ["ь"] = "ʹ", + ["Ь"] = "ʹ", + ["ъ"] = "ʺ", + ["Ъ"] = "ʺ", + ["н"] = "n", + ["Н"] = "N", + ["п"] = "p", + ["П"] = "P", + ["й"] = "y", + ["Й"] = "Y", + ["л"] = "l", + ["Л"] = "L", + ["з"] = "z", + ["З"] = "Z", + ["е"] = "e", + ["Е"] = "E", + ["г"] = "g", + ["Г"] = "G", + ["б"] = "b", + ["Б"] = "B", + ["у"] = "u", + ["У"] = "U", + ["с"] = "s", + ["С"] = "S", + ["х"] = "h", + ["Х"] = "H", + ["ч"] = "ç", + ["Ч"] = "Ç", + ["щ"] = "şç", + ["Щ"] = "Şç", + ["я"] = "ya", + ["Я"] = "Ya", + ["ы"] = "ı", + ["Ы"] = "I", + ["э"] = "e", + ["Э"] = "E", + ["м"] = "m", + ["М"] = "M", + ["о"] = "o", + ["О"] = "O", + ["и"] = "i", + ["И"] = "I", + ["ё"] = "yo", + ["Ё"] = "Yo", + ["ж"] = "j", + ["Ж"] = "J", + ["к"] = "k", + ["К"] = "K", + ["д"] = "d", + ["Д"] = "D", + ["в"] = "v", + ["В"] = "V", + ["ц"] = "ts", + ["Ц"] = "Ts", + ["а"] = "a", + ["А"] = "A", + ["ң"] = "ñ", + ["Ң"] = "Ñ", + ["ә"] = "ä", + ["Ә"] = "Ä", + ["э"] = "é", + ["Э"] = "É", + ["қ"] = "q", + ["Қ"] = "Q", + ["ғ"] = "ğ", + ["Ғ"] = "Ğ", + ["ө"] = "ö", + ["Ө"] = "Ö" +}; + +function export.tr(text) return (mw.ustring.gsub(text, ".", tt)) end + +return export diff --git a/wiktra/wikt/translit/kap-translit.lua b/wiktra/wikt/translit/kap-translit.lua new file mode 100644 index 0000000..76a15fa --- /dev/null +++ b/wiktra/wikt/translit/kap-translit.lua @@ -0,0 +1,25 @@ +local export = {} + +local mapping1 = {["п"] = "p", ["б"] = "b", ["т"] = "t", ["д"] = "d", ["к"] = "k", ["г"] = "g", ["ц"] = "c", ["ч"] = "č", ["с"] = "s", ["з"] = "z", ["ш"] = "š", ["ж"] = "ž", ["х"] = "x", ["м"] = "m", ["н"] = "n", ["р"] = "r", ["л"] = "l", ["в"] = "v", ["й"] = "y", ["и"] = "i", ["е"] = "e", ["э"] = "e", ["а"] = "a", ["о"] = "o", ["у"] = "u", ["ъ"] = "ʾ", ["ᵸ"] = "̃"} + +local mapping2 = {["пӏ"] = "p’", ["тӏ"] = "t’", ["кӏ"] = "k’", ["къ"] = "q’", ["цӏ"] = "c’", ["лӏ"] = "ƛ", ["кь"] = "ƛ’", ["чӏ"] = "c’̌", ["хъ"] = "q", ["лъ"] = "λ", ["гъ"] = "ġ", ["хӏ"] = "ḥ", ["гӏ"] = "a̯", ["гь"] = "h", ["аь"] = "ä", ["оь"] = "ö", ["уь"] = "ü", ["аᵸ"] = "ã", ["еᵸ"] = "ẽ", ["иᵸ"] = "ĩ", ["оᵸ"] = "õ", ["уᵸ"] = "ũ"} + +local tetragraph = {["аьᵸ"] = "ä̃"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" + + -- Convert capital to lowercase palochka. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for pat, repl in pairs(mapping2) do text = str_gsub(text, pat, repl) end + + for tetragraph, replacement in pairs(tetragraph) do text = str_gsub(text, tetragraph, replacement) end + + text = str_gsub(text, UTF8_char, mapping1) + + return text +end + +return export diff --git a/wiktra/wikt/translit/kbd-translit.lua b/wiktra/wikt/translit/kbd-translit.lua new file mode 100644 index 0000000..10a1c95 --- /dev/null +++ b/wiktra/wikt/translit/kbd-translit.lua @@ -0,0 +1,26 @@ +local export = {} + +local tt = {["а"] = "ā", ["б"] = "b", ["в"] = "v", ["г"] = "γ", ["д"] = "d", ["е"] = "e", ["ё"] = "ë", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "w", ["ф"] = "f", ["х"] = "x", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["щ"] = "ŝ", ["ъ"] = "”", ["ы"] = "ə", ["ь"] = "’", ["э"] = "ă", ["ю"] = "ju", ["я"] = "jā", ["ӏ"] = "ʾ", ["А"] = "Ā", ["Б"] = "B", ["В"] = "V", ["Г"] = "Γ", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Ë", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "W", ["Ф"] = "F", ["Х"] = "X", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Щ"] = "Ŝ", ["Ъ"] = "”", ["Ы"] = "Ə", ["Ь"] = "’", ["Э"] = "Ă", ["Ю"] = "Ju", ["Я"] = "Jā", ["ӏ"] = "ʾ"}; + +local trigraphs = {["кхъу"] = "q°", ["Кхъу"] = "Q°", ["гъу"] = "ġ°", ["Кхъ"] = "Q", ["кхъ"] = "q", ["къу"] = "q̇°", ["кӏу"] = "ḳ°", ["хъу"] = "χ°"} + +local digraphs = {["гу"] = "g°", ["гь"] = "ɡ’", ["гъ"] = "ġ", ["дж"] = "ǯ̍", ["дз"] = "ʒ", ["жъ"] = "ẑ", ["жь"] = "ẑ", ["ку"] = "k°", ["къ"] = "q̇", ["кӏ"] = "č̣̍", ["лъ"] = "ł", ["лӏ"] = "ḷ", ["пӏ"] = "ṗ", ["сӏ"] = "ṣ̣", ["тӏ"] = "ṭ", ["фӏ"] = "f̣", ["ху"] = "x°", ["хъ"] = "χ", ["хь"] = "ḥ", ["цӏ"] = "c̣", ["чу"] = "č̍°", ["щӏ"] = "ṣ̂", ["ӏу"] = "ʾ°", ["Гъу"] = "Ġ°", ["Къу"] = "Q̇°", ["Кӏу"] = "Ḳ°", ["Хъу"] = "Χ°", ["Гу"] = "G°", ["Гь"] = "ɡ’", ["Гъ"] = "Ġ", ["Дж"] = "Ǯ̍", ["Дз"] = "Ʒ", ["Жъ"] = "Ẑ", ["Жь"] = "Ẑ̂", ["Ку"] = "K°", ["Къ"] = "Q̇", ["Кӏ"] = "Č̣̍", ["Лъ"] = "Ł", ["Лӏ"] = "Ḷ", ["Пӏ"] = "Ṗ", ["Сӏ"] = "Ṣ̣", ["Тӏ"] = "Ṭ", ["Фӏ"] = "F̣", ["Ху"] = "X°", ["Хъ"] = "Χ", ["Хь"] = "Ḥ", ["Цӏ"] = "C̣", ["Чу"] = "Č̍°", ["Щӏ"] = "Ṣ̂"} + +function export.tr(text) + local str_gsub = string.gsub + local UTF8char = "[%z\1-\127\194-\244][\128-\191]*" + + -- Convert uppercase palochka to lowercase. Lowercase is found in tables + -- above. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for trigraph, replacement in pairs(trigraphs) do text = str_gsub(text, trigraph, replacement) end + + for digraph, replacement in pairs(digraphs) do text = str_gsub(text, digraph, replacement) end + + text = str_gsub(text, UTF8char, tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/kca-translit.lua b/wiktra/wikt/translit/kca-translit.lua new file mode 100644 index 0000000..ad564b0 --- /dev/null +++ b/wiktra/wikt/translit/kca-translit.lua @@ -0,0 +1,130 @@ +local export = {} + +local tt = { + ["а"] = "a", + ["ӓ"] = "ä", + ["ӑ"] = "ă", + ["б"] = "b", + ["в"] = "w", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "ë", + ["ә"] = "ə", + ["ӛ"] = "ə̂", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["і"] = "i", + ["й"] = "j", + ["к"] = "k", + ["к"] = "k", + ["қ"] = "k̦", + ["ӄ"] = "ḳ", + ["л"] = "l", + ["ӆ"] = "ł", + ["ԓ"] = "ḷ", + ["љ"] = "l`", + ["м"] = "m", + ["н"] = "n", + ["ң"] = "n̦", + ["ӈ"] = "ň", + ["њ"] = "n`", + ["о"] = "o", + ["ӧ"] = "ö", + ["ө"] = "ô", + ["ӫ"] = "ő", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["ҫ"] = "ş", + ["т"] = "t", + ["ҭ"] = "ț", + ["у"] = "u", + ["ӱ"] = "ü", + ["ў"] = "ŭ", + ["ф"] = "f", + ["х"] = "x", + ["ҳ"] = "h̦", + ["ц"] = "c", + ["ч"] = "č", + ["ҷ"] = "c̦", + ["ш"] = "š", + ["щ"] = "ŝ", + ["ъ"] = "”", + ["ы"] = "y", + ["ь"] = "’", + ["э"] = "è", + ["є"] = "ê", + ["є̈ "] = "̈ê", + ["ю"] = "û", + ["ю̆"] = "̆û", + ["я"] = "â", + ["я̆"] = "̆â", + ["я̈"] = "̈â", + ["А"] = "A", + ["Ӓ"] = "Ä", + ["Ӑ"] = "Ă", + ["Б"] = "B", + ["В"] = "W", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Ë", + ["Ә"] = "Ə", + ["Ӛ"] = "Ə̂", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["І"] = "I", + ["Й"] = "J", + ["К"] = "K", + ["К"] = "K", + ["Қ"] = "K̦", + ["Ӄ"] = "Ḳ", + ["Л"] = "L", + ["Ӆ"] = "Ł", + ["Ԓ"] = "Ḷ", + ["Љ"] = "L`", + ["М"] = "M", + ["Н"] = "N", + ["Ң"] = "N̦", + ["Ӈ"] = "Ň", + ["Њ"] = "N`", + ["О"] = "O", + ["Ӧ"] = "Ö", + ["Ө"] = "Ô", + ["Ӫ"] = "Ő", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Ҫ"] = "Ş", + ["Т"] = "T", + ["Ҭ"] = "Ț", + ["У"] = "U", + ["Ӱ"] = "Ü", + ["Ў"] = "Ŭ", + ["Ф"] = "F", + ["Х"] = "X", + ["Ҳ"] = "H̦", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ҷ"] = "C̦", + ["Ш"] = "Š", + ["Щ"] = "Ŝ", + ["Ъ"] = "”", + ["Ы"] = "Y", + ["Ь"] = "’", + ["Э"] = "È", + ["Є"] = "Ê", + ["Є̈ "] = "̈Ê", + ["Ю"] = "Û", + ["Ю̆"] = "̆Û", + ["Я"] = "Â", + ["Я̆"] = "̆Â", + ["Я̈"] = "̈Â" +}; + +function export.tr(text) return (mw.ustring.gsub(text, ".", tt)) end + +return export diff --git a/wiktra/wikt/translit/kdr-translit.lua b/wiktra/wikt/translit/kdr-translit.lua new file mode 100644 index 0000000..169cf9e --- /dev/null +++ b/wiktra/wikt/translit/kdr-translit.lua @@ -0,0 +1,91 @@ +local export = {} +local tab = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Yo", + ["Ж"] = "J", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "Y", + + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["Ӧ"] = "Ö", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ӱ"] = "Ü", + ["Ф"] = "F", + ["Х"] = "Kh", + ["Һ"] = "H", + ["Ц"] = "Ts", + ["Ч"] = "Ç", + ["Ш"] = "Š", + ["Щ"] = "Şç", + ["Ъ"] = "ʺ", + ["Ы"] = "İ", + ["Ь"] = "ʹ", + ["Э"] = "E", + ["Ю"] = "Yu", + ["Я"] = "Ya", + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "yo", + ["ж"] = "j", + ["з"] = "z", + ["и"] = "i", + ["й"] = "y", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["ӧ"] = "ö", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ӱ"] = "ü", + ["ф"] = "f", + ["х"] = "kh", + ["ц"] = "ts", + ["ч"] = "ç", + ["ш"] = "ş", + ["щ"] = "şç", + ["ъ"] = "ʺ", + ["ы"] = "ı", + ["ь"] = "ʹ", + ["э"] = "e", + ["ю"] = "yu", + ["я"] = "ya" +} +local mapping = {["дж"] = "c", ["Дж"] = "C", ["ң"] = "ñ", ["Ң"] = "Ñ", ["къ"] = "q", ["Къ"] = "Q", ["нъ"] = "ñ", ["Нъ"] = "Ñ"} +function export.tr(text, lang, sc) + -- Ё needs converting if is decomposed + text = text:gsub("ё", "ё"):gsub("Ё", "Ё") + + -- е after a vowel or at the beginning of a text becomes ye + text = mw.ustring.gsub(text, "([АОӨУҮЫЕЯЁЮИЕЪЬаоөуүыэяёюиеъь%A][\204\129\204\128]?)е", "%1yö") + text = mw.ustring.gsub(text, "^Е", "Yö") + text = mw.ustring.gsub(text, "^е", "yö") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е", "%1yö") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е", "%1yö") + + return (mw.ustring.gsub(text, ".", tab)) +end + +return export diff --git a/wiktra/wikt/translit/kham-translit.lua b/wiktra/wikt/translit/kham-translit.lua new file mode 100644 index 0000000..a200791 --- /dev/null +++ b/wiktra/wikt/translit/kham-translit.lua @@ -0,0 +1,166 @@ +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["क"] = "k", + ["ख"] = "kh", + ["ग"] = "g", + ["घ"] = "gh", + ["ङ"] = "ṅ", + ["च"] = "c", + ["छ"] = "ch", + ["ज"] = "j", + ["झ"] = "jh", + ["ञ"] = "ñ", + ["ट"] = "ṭ", + ["ठ"] = "ṭh", + ["ड"] = "ḍ", + ["ढ"] = "ḍh", + ["ण"] = "ṇ", + ["त"] = "t", + ["थ"] = "th", + ["द"] = "d", + ["ध"] = "dh", + ["न"] = "n", + ["प"] = "p", + ["फ"] = "ph", + ["ब"] = "b", + ["भ"] = "bh", + ["म"] = "m", + ["य"] = "y", + ["र"] = "r", + ["ल"] = "l", + ["व"] = "w", + ["ळ"] = "ḷ", + ["श"] = "ś", + ["ष"] = "ṣ", + ["स"] = "s", + ["ह"] = "h", + ["ज़"] = "z", + ["झ़"] = "zh", + ["व़"] = "ẏ", + -- + ["क़"] = "q", + ["ख़"] = "x", + ["ग़"] = "ġ", + ["ऴ"] = "ḻ", + ["ड़"] = "ṛ", + ["ढ़"] = "ṛh", + ["फ़"] = "f", + ["थ़"] = "θ", + ["द़"] = "ð", + ["ऩ"] = "ṉ", + ["ऱ"] = "ṟ", + ["स़"] = "z", + ["च़"] = "c", + ["छ़"] = "ch", + ["ॹ"] = "ž", + ["श़"] = "ž", + -- vowel diacritics + ["ि"] = "i", + ["ु"] = "u", + ["े"] = "e", + ["ो"] = "o", + ["ा"] = "ā", + ["ी"] = "ī", + ["ू"] = "ū", + ["ृ"] = "r̥", + ["ॄ"] = "r̥̄", + ["ॢ"] = "l̥", + ["ॣ"] = "l̥̄", + ["ै"] = "ai", + ["ौ"] = "au", + ["ॉ"] = "ŏ", + ["ॅ"] = "ĕ", + ["ॆ"] = "e", + ["ॊ"] = "o", + -- vowel signs + ["अ"] = "a", + ["इ"] = "i", + ["इ़"] = "ü", + ["उ"] = "u", + ["उ़"] = "ï", + ["ए"] = "e", + ["ए़"] = "ø", + ["ओ"] = "o", + ["आ"] = "ā", + ["ई"] = "ī", + ["ऊ"] = "ū", + ["ऋ"] = "r̥", + ["ॠ"] = "r̥̄", + ["ऌ"] = "l̥", + ["ॡ"] = "l̥̄", + ["ऐ"] = "ai", + ["औ"] = "au", + ["ऑ"] = "ŏ", + ["ॲ"] = "ĕ", + ["ऍ"] = "ĕ", + ["ऎ"] = "e", + ["ऒ"] = "o", + -- chandrabindu + ["ँ"] = "̃", + -- anusvara + ["ं"] = "̃", + -- visarga + ["ः"] = "ḥ", + -- virama + ["्"] = "", + -- glottal stop + ["ॽ"] = "’", + -- high spacing dot + ["ॱ"] = "'", + -- avagraha + ["ऽ"] = "'", + --- dot + ["."] = "'", + -- + ["°"] = "'", + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + -- punctuation + ["।"] = ".", -- danda + ["॥"] = ".", -- double danda + ["+"] = "", -- compound separator + -- abbreviation sign + ["॰"] = "." +} + +local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "कखगघचछजझटठडढणतथदधनपफबभमयरलवशषसह" +local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊॄॢॣॆॊ", "अइउएओआईऊऋॠॡऌऐऔऎऒए़इ़ऑऍ" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = length, 1, -1 do table.insert(result, mw.ustring.sub(text, i, i)) end + return table.concat(result) +end +function export.tr(text, lang, sc) + text = gsub(text, "([" .. all_cons .. "]़?)([" .. vowel .. "्]?)", function(c, d) return c .. (d == "" and "a" or d) end) + for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^a(़?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) return (((match(first, "[" .. special_cons .. "]") and match(second, "ं") or match(first, "[" .. special_cons .. "]") and match(second, "्") and not perm_cl[first .. second .. third]) or match(first .. second, "य[aिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]") or match(first .. second, "ह[अaिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]")) and "a" or "") .. opt .. first .. second .. third end) + + local escaped_orig_word = gsub(orig_word, "%+", "") + text = gsub(text, orig_word, rev_string(word)) + text = gsub(text, "ज्ञ", "gy") + text = gsub(text, "ॱ([" .. all_cons .. "])(़?)(्?)([वयरल]?)(़?)([" .. vowel .. "]?)([ँँं]?)(ः?)", "%1%2%3%4%5%6%7%8'") + end + text = gsub(text, ".़?", conv) + text = gsub(text, "a([iu])̃", "a͠%1") + text = gsub(text, "rh̥", "hr̥") + text = gsub(text, "[<>]", "") + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/khar-translit.lua b/wiktra/wikt/translit/khar-translit.lua new file mode 100644 index 0000000..1b6ea56 --- /dev/null +++ b/wiktra/wikt/translit/khar-translit.lua @@ -0,0 +1,133 @@ +local export = {} +local u = mw.ustring.char +local gsub = mw.ustring.gsub + +local consonants = { + -- defer transliteration of null consonant. + ["𐨀"] = "𐨀", + -- consonants + ["𐨐"] = "k", + ["𐨑"] = "kh", + ["𐨒"] = "g", + ["𐨓"] = "gh", + ["𐨕"] = "c", + ["𐨖"] = "ch", + ["𐨗"] = "j", + ["𐨙"] = "ñ", + ["𐨚"] = "ṭ", + ["𐨛"] = "ṭh", + ["𐨜"] = "ḍ", + ["𐨝"] = "ḍh", + ["𐨞"] = "ṇ", + ["𐨟"] = "t", + ["𐨠"] = "th", + ["𐨡"] = "d", + ["𐨢"] = "dh", + ["𐨣"] = "n", + ["𐨤"] = "p", + ["𐨥"] = "ph", + ["𐨦"] = "b", + ["𐨧"] = "bh", + ["𐨨"] = "m", + ["𐨩"] = "y", + ["𐨪"] = "r", + ["𐨫"] = "l", + ["𐨬"] = "v", + ["𐨭"] = "ś", + ["𐨮"] = "ṣ", + ["𐨯"] = "s", + ["𐨰"] = "z", + ["𐨱"] = "h", + ["𐨲"] = "ḱ", + ["𐨳"] = "ṭ́h", + ["𐨴"] = "ṭ́", + ["𐨵"] = "vh" +} + +local diacritics = { + -- matras + ["𐨁"] = "i", + ["𐨂"] = "u", + ["𐨃"] = "ṛ", + ["𐨅"] = "e", + ["𐨆"] = "o", + ["𐨌"] = "ā", + ["𐨁𐨌"] = "ī", + ["𐨂𐨌"] = "ū", + ["𐨃𐨌"] = "ṝ", + ["𐨅𐨌"] = "ai", + ["𐨆𐨌"] = "au", + ["𐨿"] = "", + ["‍𐨿"] = "Ⓙ", + ["𐨍"] = "a͚", + ["𐨂𐨍"] = "u͚" +} + +local tt = { + + -- vowels + ["𐨀"] = "", + -- anusvara + ["𐨎"] = "ṃ", -- until a better method is found + -- visarga + ["𐨏"] = "ḥ", + -- numerals + ["𐩀"] = "1", + ["𐩁"] = "2", + ["𐩂"] = "3", + ["𐩃"] = "4", + ["𐩄"] = "10", + ["𐩅"] = "20", + ["𐩆"] = "100", + ["𐩇"] = "1000", + -- punctuation + ["𐩖"] = ".", -- danda + ["𐩗"] = "." -- double danda +} + +function export.tr(text, lang, sc) + local failed = nil + if sc ~= "Khar" then return nil end + -- Consonants appear as part of sequences canonically equivalent to consonant+nukta+vowel, so + -- allowing for multiplicity and the string being normalised (by Wikimedia policy), the matching + -- RE is: + local form = "([𐨀𐨐-𐨵])(𐨹?)(‍?𐨿?)(𐨺?)(𐨍?)(𐨸?)([𐨁-𐨆]?[𐨁-𐨆]?𐨌?𐨍?)" + text = gsub(text, form, function(c, n1, h, n2, vi, n3, vm) + local d = h .. vi .. vm -- Expect at most one catenand to be non-empty. + local cdia = "" + local cons = consonants[c] + local retval + if n1 ~= "" then -- Cauda + if cons == "s" or cons == "ś" then + cdia = cdia .. u(0x0331) -- Combining macron below + else + cdia = cdia .. u(0x0301) -- Combining acute + end + end + if n2 ~= "" then cdia = cdia .. u(0x0323) end -- Combining dot below + if n3 ~= "" then cdia = cdia .. u(0x0304) end -- Combining macron + if d == "" then + retval = cons .. cdia .. "a" + else + local dia = diacritics[d] + if not dia then + mw.addWarning("Failed to transliterate 𐨀" .. d .. " in " .. text) + failed = true + dia = "D" + end + retval = cons .. cdia .. dia + end + return mw.ustring.toNFD(retval) + end) + + text = gsub(text, "(a)𐨀([iu])", "%1%2" .. u(0x308)) -- Apply diaeresis + text = gsub(text, ".", tt) + text = gsub(text, "lⒿy", "lý") + text = gsub(text, "Ⓙ", "") + text = gsub(text, u(0x301, 0x304), u(0x304, 0x301)) + + if failed then return nil end + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/khv-translit.lua b/wiktra/wikt/translit/khv-translit.lua new file mode 100644 index 0000000..5a774cf --- /dev/null +++ b/wiktra/wikt/translit/khv-translit.lua @@ -0,0 +1,19 @@ +local export = {} + +local mapping1 = {["п"] = "p", ["б"] = "b", ["т"] = "t", ["д"] = "d", ["к"] = "k", ["г"] = "g", ["ц"] = "c", ["ч"] = "č", ["с"] = "s", ["з"] = "z", ["ш"] = "š", ["ж"] = "ž", ["х"] = "x", ["м"] = "m", ["н"] = "n", ["р"] = "r", ["л"] = "l", ["в"] = "v", ["й"] = "y", ["а"] = "a", ["е"] = "e", ["э"] = "e", ["и"] = "i", ["о"] = "o", ["у"] = "u", ["ы"] = "ɨ", ["ā"] = "ā", ["е̄"] = "ē", ["ӣ"] = "ī", ["о̄"] = "ō", ["ӯ"] = "ū", ["ы̄"] = "ɨ̄", ["ъ"] = "ʾ"} + +local mapping2 = {["пӏ"] = "p’", ["тӏ"] = "t’", ["кӏ"] = "k’", ["къ"] = "q’", ["цӏ"] = "c’", ["лӏ"] = "ƛ", ["кь"] = "ƛ’", ["чӏ"] = "č’", ["хъ"] = "q", ["лъ"] = "λ", ["гъ"] = "ġ", ["хӏ"] = "ḥ", ["гӏ"] = "a̯", ["гь"] = "h", ["аᵸ"] = "ã", ["еᵸ"] = "ẽ", ["эᵸ"] = "ẽ", ["иᵸ"] = "ĩ", ["оᵸ"] = "õ", ["уᵸ"] = "ũ", ["аӏ"] = "aʿ", ["еӏ"] = "eʿ", ["иӏ"] = "iʿ", ["оӏ"] = "oʿ", ["уӏ"] = "uʿ"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" + + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for pat, repl in pairs(mapping2) do text = str_gsub(text, pat, repl) end + text = str_gsub(text, UTF8_char, mapping1) + + return text +end + +return export diff --git a/wiktra/wikt/translit/kim-translit.lua b/wiktra/wikt/translit/kim-translit.lua new file mode 100644 index 0000000..57431bf --- /dev/null +++ b/wiktra/wikt/translit/kim-translit.lua @@ -0,0 +1,94 @@ +local export = {} + +local tt = { + ["ү"] = "ü", + ["Ү"] = "Ü", + ["т"] = "t", + ["Т"] = "T", + ["һ"] = "h", + ["Һ"] = "H", + ["р"] = "r", + ["Р"] = "R", + ["ф"] = "f", + ["Ф"] = "F", + ["ю"] = "yu", + ["Ю"] = "Yu", + ["ш"] = "ş", + ["Ш"] = "Ş", + ["ь"] = "ʹ", + ["Ь"] = "ʹ", + ["ъ"] = "ʺ", + ["Ъ"] = "ʺ", + ["н"] = "n", + ["Н"] = "N", + ["п"] = "p", + ["П"] = "P", + ["й"] = "y", + ["Й"] = "Y", + ["л"] = "l", + ["Л"] = "L", + ["з"] = "z", + ["З"] = "Z", + ["е"] = "e", + ["Е"] = "E", + ["г"] = "g", + ["Г"] = "G", + ["б"] = "b", + ["Б"] = "B", + ["у"] = "u", + ["У"] = "U", + ["с"] = "s", + ["С"] = "S", + ["х"] = "x", + ["Х"] = "X", + ["ч"] = "ç", + ["Ч"] = "Ç", + ["щ"] = "şç", + ["Щ"] = "Şç", + ["я"] = "ya", + ["Я"] = "Ya", + ["ы"] = "ı", + ["Ы"] = "I", + ["э"] = "e", + ["Э"] = "E", + ["м"] = "m", + ["М"] = "M", + ["о"] = "o", + ["О"] = "O", + ["и"] = "i", + ["И"] = "I", + ["ё"] = "yo", + ["Ё"] = "Yo", + ["ж"] = "j", + ["Ж"] = "J", + ["к"] = "k", + ["К"] = "K", + ["д"] = "d", + ["Д"] = "D", + ["в"] = "v", + ["В"] = "V", + ["ц"] = "Ts", + ["Ц"] = "ts", + ["а"] = "a", + ["А"] = "A", + ["ң"] = "ñ", + ["Ң"] = "Ñ", + ["ә"] = "ä", + ["Ә"] = "Ä", + ["э"] = "e", + ["Э"] = "E", + ["Ч̡"] = "C", + ["ч̡"] = "c", + ["ӄ"] = "q", + ["Ӄ"] = "Q", + ["ғ"] = "ğ", + ["Ғ"] = "Ğ", + ["һ"] = "h", + ["Һ"] = "H", + ["ө"] = "ö", + ["Ө"] = "Ö" +}; + +function export.tr(text) return (mw.ustring.gsub(text, ".", tt)) end + +return export diff --git a/wiktra/wikt/translit/kjh-translit.lua b/wiktra/wikt/translit/kjh-translit.lua new file mode 100644 index 0000000..d55d9f8 --- /dev/null +++ b/wiktra/wikt/translit/kjh-translit.lua @@ -0,0 +1,86 @@ +local export = {} + +local tt = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "V", + ["в"] = "v", + ["Г"] = "G", + ["г"] = "g", + ["Ғ"] = "Ğ", + ["ғ"] = "ğ", + ["Д"] = "D", + ["д"] = "d", + ["Е"] = "E", + ["е"] = "e", + ["Ё"] = "Yo", + ["ё"] = "yo", + ["Ж"] = "J", + ["ж"] = "j", + ["З"] = "Z", + ["з"] = "z", + ["И"] = "İ", + ["и"] = "i", + ["Й"] = "Y", + ["й"] = "y", + ["І"] = "Ì", + ["і"] = "ì", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["Ң"] = "Ñ", + ["ң"] = "ñ", + ["О"] = "O", + ["о"] = "o", + ["Ö"] = "Ö", + ["ö"] = "ö", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ӱ"] = "Ü", + ["ӱ"] = "ü", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "X", + ["х"] = "x", + ["Ц"] = "Ts", + ["ц"] = "ts", + ["Ч"] = "Ç", + ["ч"] = "ç", + ["Ӌ"] = "C", + ["ӌ"] = "c", + ["Ш"] = "Ş", + ["ш"] = "ş", + ["Щ"] = "Şç", + ["щ"] = "şç", + ["Ъ"] = "ʺ", + ["ъ"] = "ʺ", + ["Ы"] = "I", + ["ы"] = "ı", + ["Ь"] = "’", + ["ь"] = "’", + ["Э"] = "E", + ["э"] = "e", + ["Ю"] = "Yu", + ["ю"] = "yu", + ["Я"] = "Ya", + ["я"] = "ya" +}; + +function export.tr(text) return (mw.ustring.gsub(text, ".", tt)) end + +return export diff --git a/wiktra/wikt/translit/kjj-translit.lua b/wiktra/wikt/translit/kjj-translit.lua new file mode 100644 index 0000000..a4f3583 --- /dev/null +++ b/wiktra/wikt/translit/kjj-translit.lua @@ -0,0 +1,26 @@ +local export = {} + +local tt = {["б"] = "b", ["п"] = "p", ["в"] = "w", ["ф"] = "f", ["д"] = "d", ["т"] = "t", ["ц"] = "c", ["з"] = "z", ["с"] = "s", ["ч"] = "č", ["ж"] = "ž", ["ш"] = "š", ["г"] = "g", ["к"] = "k", ["х"] = "χ", ["ъ"] = "ʔ", ["м"] = "m", ["н"] = "n", ["р"] = "r", ["л"] = "l", ["й"] = "j", ["и"] = "i", ["е"] = "e", ["э"] = "e", ["а"] = "a", ["о"] = "o", ["у"] = "u", ["ы"] = "ɨ", ["Б"] = "B", ["П"] = "P", ["В"] = "W", ["Ф"] = "F", ["Д"] = "D", ["Т"] = "T", ["Ц"] = "C", ["З"] = "Z", ["С"] = "S", ["Ч"] = "Č", ["Ж"] = "Ž", ["Ш"] = "Š", ["Г"] = "G", ["К"] = "K", ["Х"] = "Χ", ["Ъ"] = "ʔ", ["М"] = "M", ["Н"] = "N", ["Р"] = "R", ["Л"] = "L", ["Й"] = "J", ["И"] = "I", ["Е"] = "E", ["Э"] = "E", ["А"] = "A", ["О"] = "O", ["У"] = "U", ["Ы"] = "Ɨ"}; + +local tetraTrigraphs = {["къкъ"] = "qː", ["кхь"] = "kx", ["Къкъ"] = "Qː", ["Кхь"] = "Kx"} + +local digraphs = {["пп"] = "pː", ["пӏ"] = "ṗ", ["вв"] = "wː", ["тт"] = "tː", ["тӏ"] = "ṭ", ["цц"] = "cː", ["цӏ"] = "c̣", ["дж"] = "ǯ", ["чч"] = "čː", ["чӏ"] = "č̣", ["кк"] = "kː", ["кӏ"] = "ḳ", ["гг"] = "ɣ", ["хь"] = "x", ["къ"] = "qː", ["хъ"] = "q", ["кь"] = "q̇", ["гъ"] = "ʁ", ["гӏ"] = "ʕ", ["хӏ"] = "ħ", ["гь"] = "h", ["уь"] = "ü", ["оь"] = "ö", ["аь"] = "ä", ["Пп"] = "Pː", ["Пӏ"] = "Ṗ", ["Вв"] = "Wː", ["Тт"] = "Tː", ["Тӏ"] = "Ṭ", ["Цц"] = "Cː", ["Цӏ"] = "C̣", ["Дж"] = "Ǯ", ["Чч"] = "Čː", ["Чӏ"] = "Č̣", ["Кк"] = "Kː", ["Кӏ"] = "Ḳ", ["Гг"] = "Ɣ", ["Хь"] = "X", ["Къ"] = "Qː", ["Хъ"] = "Q", ["Кь"] = "Q̇", ["Гъ"] = "ʁ", ["Гӏ"] = "ʕ", ["Хӏ"] = "Ħ", ["Гь"] = "H", ["Уь"] = "Ü", ["Оь"] = "Ö", ["Аь"] = "Ä"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local UTF8char = "[%z\1-\127\194-\244][\128-\191]*" + + -- Convert uppercase palochka to lowercase. Lowercase is found in tables + -- above. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for grapheme, replacement in pairs(tetraTrigraphs) do text = str_gsub(text, grapheme, replacement) end + + for digraph, replacement in pairs(digraphs) do text = str_gsub(text, digraph, replacement) end + + text = str_gsub(text, UTF8char, tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/kjp-translit.lua b/wiktra/wikt/translit/kjp-translit.lua new file mode 100644 index 0000000..cbdc794 --- /dev/null +++ b/wiktra/wikt/translit/kjp-translit.lua @@ -0,0 +1,25 @@ +local export = {} +local u = mw.ustring.char +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local initial_table = {["က"] = "k", ["ခ"] = "kh", ["ဂ"] = "g", ["ဃ"] = "gh", ["င"] = "ng", ["စ"] = "ts", ["ဆ"] = "tsh", ["ဇ"] = "z", ["စျ"] = "zh", ["ည"] = "ny", ["ဋ"] = "tta", ["ဌ"] = "tth", ["ဍ"] = "dda", ["ဎ"] = "ddha", ["ၮ"] = "nna", ["တ"] = "t", ["ထ"] = "th", ["ဒ"] = "d", ["ဓ"] = "dha", ["န"] = "n", ["ပ"] = "p", ["ဖ"] = "ph", ["ဗ"] = "f", ["ဘ"] = "bha", ["မ"] = "m", ["ယ"] = "y", ["ရ"] = "r", ["လ"] = "l", ["ဝ"] = "w", ["သ"] = "x", ["ဟ"] = "s", ["ဠ"] = "h", ["အ"] = "ll", ["ၜ"] = "’", ["ၯ"] = "ywa", ["ၰ"] = "ghwa"} + +local symbols = {["၀"] = "0", ["၁"] = "1", ["၂"] = "2", ["၃"] = "3", ["၄"] = "4", ["၅"] = "5", ["၆"] = "6", ["၇"] = "7", ["၈"] = "8", ["၉"] = "9", ["၊"] = "|", ["။"] = "||"} + +function export.tr(text, lang, sc, debug_mode) + local m_pron = require("my-pron").get_romanisation + text = gsub(text, ".", symbols) + for word in mw.ustring.gmatch(text, "[က-႟ꩠ-ꩻ]+") do + success, translit = pcall(m_pron, word, nil, {2, ["type"] = "orthographic", ["name"] = "MLCTS"}, 2, "translit_module") + if success then + text = gsub(text, word, translit, 1) + else + return nil + end + end + if mw.ustring.match(text, "[က-႟ꩠ-ꩻ]") and not debug_mode then return nil end + return text +end + +return export diff --git a/wiktra/wikt/translit/kk-translit.lua b/wiktra/wikt/translit/kk-translit.lua new file mode 100644 index 0000000..3c9a875 --- /dev/null +++ b/wiktra/wikt/translit/kk-translit.lua @@ -0,0 +1,96 @@ +local export = {} + +local tt = { + ["ү"] = "ü", + ["Ү"] = "Ü", + ["т"] = "t", + ["Т"] = "T", + ["һ"] = "h", + ["Һ"] = "H", + ["р"] = "r", + ["Р"] = "R", + ["ф"] = "f", + ["Ф"] = "F", + ["ю"] = "yw", + ["Ю"] = "Yw", + ["ш"] = "ş", + ["Ш"] = "Ş", + ["ь"] = "ʹ", + ["Ь"] = "ʹ", + ["ъ"] = "ʺ", + ["Ъ"] = "ʺ", + ["н"] = "n", + ["Н"] = "N", + ["п"] = "p", + ["П"] = "P", + ["й"] = "y", + ["Й"] = "Y", + ["л"] = "l", + ["Л"] = "L", + ["з"] = "z", + ["З"] = "Z", + ["е"] = "e", + ["Е"] = "E", + ["г"] = "g", + ["Г"] = "G", + ["б"] = "b", + ["Б"] = "B", + ["у"] = "w", + ["У"] = "W", + ["с"] = "s", + ["С"] = "S", + ["х"] = "x", + ["Х"] = "X", + ["ч"] = "ç", + ["Ч"] = "Ç", + ["щ"] = "şş", + ["Щ"] = "Şş", + ["я"] = "ya", + ["Я"] = "Ya", + ["ы"] = "ı", + ["Ы"] = "I", + ["э"] = "e", + ["Э"] = "E", + ["м"] = "m", + ["М"] = "M", + ["о"] = "o", + ["О"] = "O", + ["и"] = "ï", + ["И"] = "Ï", + ["ё"] = "yo", + ["Ё"] = "Yo", + ["ж"] = "j", + ["Ж"] = "J", + ["к"] = "k", + ["К"] = "K", + ["д"] = "d", + ["Д"] = "D", + ["в"] = "v", + ["В"] = "V", + ["ц"] = "c", + ["Ц"] = "C", + ["а"] = "a", + ["А"] = "A", + ["ң"] = "ñ", + ["Ң"] = "Ñ", + ["ә"] = "ä", + ["Ә"] = "Ä", + ["э"] = "é", + ["Э"] = "É", + ["ұ"] = "u", + ["Ұ"] = "U", + ["қ"] = "q", + ["Қ"] = "Q", + ["ғ"] = "ğ", + ["Ғ"] = "Ğ", + ["і"] = "i", + ["І"] = "İ", + ["ө"] = "ö", + ["Ө"] = "Ö", + ["ӯ"] = "u", + ["Ӯ"] = "U" +}; + +function export.tr(text, lang, sc) if sc == "Cyrl" then return (mw.ustring.gsub(text, ".", tt)) end end + +return export diff --git a/wiktra/wikt/translit/km-translit.lua b/wiktra/wikt/translit/km-translit.lua new file mode 100644 index 0000000..dcb18ae --- /dev/null +++ b/wiktra/wikt/translit/km-translit.lua @@ -0,0 +1,348 @@ +local export = {} +local gsub = mw.ustring.gsub +local len = mw.ustring.len +local match = mw.ustring.match +local sub = mw.ustring.sub + +local cons_conv = {["ក"] = {"k", "a"}, ["ខ"] = {"kh", "a"}, ["គ"] = {"k", "o"}, ["ឃ"] = {"kh", "o"}, ["ង"] = {"ng", "o"}, ["ច"] = {"ch", "a"}, ["ឆ"] = {"chh", "a"}, ["ជ"] = {"ch", "o"}, ["ឈ"] = {"chh", "o"}, ["ញ"] = {"nh", "o"}, ["ដ"] = {"d", "a"}, ["ឋ"] = {"th", "a"}, ["ឌ"] = {"d", "o"}, ["ឍ"] = {"th", "o"}, ["ណ"] = {"n", "a"}, ["ត"] = {"t", "a"}, ["ថ"] = {"th", "a"}, ["ទ"] = {"t", "o"}, ["ធ"] = {"th", "o"}, ["ន"] = {"n", "o"}, ["ប"] = {"b", "a"}, ["ផ"] = {"ph", "a"}, ["ព"] = {"p", "o"}, ["ភ"] = {"ph", "o"}, ["ម"] = {"m", "o"}, ["យ"] = {"y", "o"}, ["រ"] = {"r", "o"}, ["ល"] = {"l", "o"}, ["វ"] = {"v", "o"}, ["ឝ"] = {"sh", "a"}, ["ឞ"] = {"ss", "o"}, ["ស"] = {"s", "a"}, ["ហ"] = {"h", "a"}, ["ឡ"] = {"l", "a"}, ["អ"] = {"ʼ", "a"}, [""] = {"", ""}, ["ប៉"] = {"p", "a"}} + +local digraph = {["ហ្គ"] = "g", ["ហ្ន"] = "n", ["ហ្ម"] = "m", ["ហ្ល"] = "l", ["ហ្វ"] = "f", ["ហ្ស"] = "z"} + +local indep_vowel = {["ឣ"] = "ʼâ", ["ឤ"] = "ʼa", ["ឥ"] = "ʼĕ", ["ឦ"] = "ʼei", ["ឧ"] = "ʼŏ", ["ឨ"] = "ʼŏk", ["ឩ"] = "ʼŭ", ["ឪ"] = "ʼŏu", ["ឫ"] = "rœ̆", ["ឬ"] = "rœ", ["ឭ"] = "lœ̆", ["ឮ"] = "lœ", ["ឯ"] = "ʼé", ["ឰ"] = "ʼai", ["ឱ"] = "ʼaô", ["ឲ"] = "ʼaô", ["ឳ"] = "ʼâu"} + +local vowel_conv = { + [""] = {["a"] = "â", ["o"] = "ô"}, + ["ា"] = {["a"] = "a", ["o"] = "éa"}, + ["ិ"] = {["a"] = "ĕ", ["o"] = "ĭ"}, + ["ី"] = {["a"] = "ei", ["o"] = "i"}, + ["ឹ"] = {["a"] = "œ̆", ["o"] = "œ̆"}, + ["ឺ"] = {["a"] = "œ", ["o"] = "œ"}, + ["ុ"] = {["a"] = "ŏ", ["o"] = "ŭ"}, + ["ូ"] = {["a"] = "o", ["o"] = "u"}, + ["ួ"] = {["a"] = "uŏ", ["o"] = "uŏ"}, + ["ើ"] = {["a"] = "aeu", ["o"] = "eu"}, + ["ឿ"] = {["a"] = "eua", ["o"] = "eua"}, + ["ៀ"] = {["a"] = "iĕ", ["o"] = "iĕ"}, + ["េ"] = {["a"] = "é", ["o"] = "é"}, + ["ែ"] = {["a"] = "ê", ["o"] = "ê"}, + ["ៃ"] = {["a"] = "ai", ["o"] = "ey"}, + ["ោ"] = {["a"] = "aô", ["o"] = "oŭ"}, + ["ៅ"] = {["a"] = "au", ["o"] = "ŏu"}, + ["ុំ"] = {["a"] = "om", ["o"] = "ŭm"}, + ["ំ"] = {["a"] = "âm", ["o"] = "um"}, + ["ាំ"] = {["a"] = "ăm", ["o"] = "ŏâm"}, + ["ាំង"] = {["a"] = "ăng", ["o"] = "eăng"}, + ["ះ"] = {["a"] = "ăh", ["o"] = "eăh"}, + ["ុះ"] = {["a"] = "ŏh", ["o"] = "uh"}, + ["េះ"] = {["a"] = "éh", ["o"] = "éh"}, + ["ោះ"] = {["a"] = "aŏh", ["o"] = "uŏh"}, + ["ឹះ"] = {["a"] = "ĕh", ["o"] = "ĭh"}, + ["ិះ"] = {["a"] = "ĕh", ["o"] = "ĭh"}, + ["ៈ"] = {["a"] = "aʼ", ["o"] = "éaʼ"}, + ["័"] = {["a"] = "â", ["o"] = "ô"} +} + +local char_type = { + ["ក"] = "consonant", + ["ខ"] = "consonant", + ["គ"] = "consonant", + ["ឃ"] = "consonant", + ["ង"] = "consonant", + ["ច"] = "consonant", + ["ឆ"] = "consonant", + ["ជ"] = "consonant", + ["ឈ"] = "consonant", + ["ញ"] = "consonant", + ["ដ"] = "consonant", + ["ឋ"] = "consonant", + ["ឌ"] = "consonant", + ["ឍ"] = "consonant", + ["ណ"] = "consonant", + ["ត"] = "consonant", + ["ថ"] = "consonant", + ["ទ"] = "consonant", + ["ធ"] = "consonant", + ["ន"] = "consonant", + ["ប"] = "consonant", + ["ផ"] = "consonant", + ["ព"] = "consonant", + ["ភ"] = "consonant", + ["ម"] = "consonant", + ["យ"] = "consonant", + ["រ"] = "consonant", + ["ល"] = "consonant", + ["វ"] = "consonant", + ["ឝ"] = "consonant", + ["ឞ"] = "consonant", + ["ស"] = "consonant", + ["ហ"] = "consonant", + ["ឡ"] = "consonant", + ["អ"] = "consonant", + ["ឣ"] = "indep_vowel", + ["ឤ"] = "indep_vowel", + ["ឥ"] = "indep_vowel", + ["ឦ"] = "indep_vowel", + ["ឧ"] = "indep_vowel", + ["ឨ"] = "indep_vowel", + ["ឩ"] = "indep_vowel", + ["ឪ"] = "indep_vowel", + ["ឫ"] = "indep_vowel", + ["ឬ"] = "indep_vowel", + ["ឭ"] = "indep_vowel", + ["ឮ"] = "indep_vowel", + ["ឯ"] = "indep_vowel", + ["ឰ"] = "indep_vowel", + ["ឱ"] = "indep_vowel", + ["ឲ"] = "indep_vowel", + ["ឳ"] = "indep_vowel", + ["ា"] = "vowel_sign", + ["ិ"] = "vowel_sign", + ["ី"] = "vowel_sign", + ["ឹ"] = "vowel_sign", + ["ឺ"] = "vowel_sign", + ["ុ"] = "vowel_sign", + ["ូ"] = "vowel_sign", + ["ួ"] = "vowel_sign", + ["ើ"] = "vowel_sign", + ["ឿ"] = "vowel_sign", + ["ៀ"] = "vowel_sign", + ["េ"] = "vowel_sign", + ["ែ"] = "vowel_sign", + ["ៃ"] = "terminating_vowel", + ["ោ"] = "vowel_sign", + ["ៅ"] = "vowel_sign", + ["ំ"] = "terminating_vowel", + ["ះ"] = "terminating_vowel", + ["ៈ"] = "terminating_vowel", + ["៉"] = "consonant_shift", + ["៊"] = "consonant_shift", + ["់"] = "terminating_sign", + ["៌"] = "sign", + ["៍"] = "sign", + ["៎"] = "sign", + ["៏"] = "sign", + ["័"] = "sign", + ["៑"] = "sign", + ["្"] = "combining_sign", + ["៓"] = "sign", + ["។"] = "punctuation", + ["៕"] = "punctuation", + ["៖"] = "sign", + ["ៗ"] = "punctuation", + ["៘"] = "punctuation", + ["៙"] = "punctuation", + ["៚"] = "punctuation", + ["៛"] = "punctuation", + ["ៜ"] = "sign", + ["៝"] = "sign", + ["​"] = "ZWS" +} + +local sp_symbols = {["០"] = "0", ["១"] = "1", ["២"] = "2", ["៣"] = "3", ["៤"] = "4", ["៥"] = "5", ["៦"] = "6", ["៧"] = "7", ["៨"] = "8", ["៩"] = "9", ["៰"] = "0", ["៱"] = "1", ["៲"] = "2", ["៳"] = "3", ["៴"] = "4", ["៵"] = "5", ["៶"] = "6", ["៷"] = "7", ["៸"] = "8", ["៹"] = "9"} + +function export.tr(text, lang, sc, debug_mode) + text = gsub(text, "[០-៹]", sp_symbols) + text = gsub(text, "(.)្(.្.)", "%1​%2") + text = gsub(text, "([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]្[កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])", "​%1%2") + text = gsub(text, "([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]្?[កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])", "%1​%2") + text = gsub(text, "(.៍)", "​%1") + + for word in mw.ustring.gmatch(text, "[ក-៝​]+") do + local original_text = word + local c, chartype, syl, curr_syl = {}, {}, {}, {} + local progress = "none" + + for i = 1, len(word) do + c[i] = sub(word, i, i) + chartype[i] = char_type[c[i]] + end + + for i = 1, #c + 1 do + local next_types = {} + if i == #c + 1 or chartype[i] == "ZWS" then + progress = "none" + table.insert(syl, table.concat(curr_syl, "")) + curr_syl = {} + elseif progress == "none" then + if chartype[i] == "consonant" then + table.insert(curr_syl, c[i]) + progress = "initial" + else + table.insert(syl, c[i]) + end + elseif progress == "initial" then + if chartype[i] == "combining_sign" then + table.insert(curr_syl, c[i]) + progress = "initial_combining" + elseif chartype[i] == "sign" or chartype[i] == "consonant_shift" then + table.insert(curr_syl, c[i]) + elseif chartype[i] == "vowel_sign" then + table.insert(curr_syl, c[i]) + progress = "vowel" + elseif chartype[i] == "terminating_vowel" then + if c[i - 1] .. c[i] .. (c[i + 1] or "") == "ាំង" and (i == #c - 1 or (i > #c + 1 and chartype[i + 2] == "consonant")) then + table.insert(curr_syl, c[i]) + progress = "vowel" + else + table.insert(curr_syl, c[i]) + table.insert(syl, table.concat(curr_syl, "")) + curr_syl = {} + progress = "none" + end + elseif chartype[i] == "consonant" then + vowel_found = false + local j, skipped = i, 0 + while not vowel_found do + if not chartype[j] or chartype[j] == "punctuation" or chartype[j] == "indep_vowel" or chartype[j] == "terminating_sign" or chartype[j] == "ZWS" then + skipped = 1 + break + elseif chartype[j] == "consonant" or chartype[j] == "combining_sign" or (chartype[j] == "sign" and c[j] ~= "័") then + table.insert(next_types, chartype[j]) + else + vowel_found = true + end + j = j + 1 + end + if skipped ~= 0 or match(table.concat(next_types, " "), "consonant s?i?g?n? ?consonant") then + table.insert(curr_syl, c[i]) + progress = "coda" + else + table.insert(syl, table.concat(curr_syl, "")) + curr_syl = {c[i]} + progress = "initial" + end + else + table.insert(syl, c[i]) + progress = "none" + end + elseif progress == "initial_combining" then + if chartype[i] == "consonant" then + table.insert(curr_syl, c[i]) + progress = "initial" + else + table.insert(syl, c[i]) + progress = "none" + end + elseif progress == "vowel" then + if chartype[i] == "vowel_sign" then + table.insert(curr_syl, c[i]) + elseif chartype[i] == "terminating_vowel" then + if c[i - 1] .. c[i] .. (c[i + 1] or "") == "ាំង" and (i == #c - 1 or (i > #c + 1 and chartype[i + 2] == "consonant")) then + table.insert(curr_syl, c[i]) + progress = "vowel" + else + table.insert(curr_syl, c[i]) + table.insert(syl, table.concat(curr_syl, "")) + curr_syl = {} + progress = "none" + end + elseif chartype[i] == "consonant" then + vowel_found = false + local j, skipped = i, 0 + while not vowel_found do + if not chartype[j] or chartype[j] == "punctuation" or chartype[j] == "indep_vowel" or chartype[j] == "terminating_sign" or chartype[j] == "ZWS" then + skipped = 1 + break + elseif chartype[j] == "consonant" or chartype[j] == "combining_sign" or (chartype[j] == "sign" and c[j] ~= "័") then + table.insert(next_types, chartype[j]) + else + vowel_found = true + end + j = j + 1 + end + if skipped ~= 0 or match(table.concat(next_types, " "), "consonant s?i?g?n? ?consonant") then + table.insert(curr_syl, c[i]) + progress = "coda" + else + table.insert(syl, table.concat(curr_syl, "")) + curr_syl = {c[i]} + progress = "initial" + end + else + table.insert(syl, c[i]) + progress = "none" + end + elseif progress == "coda" then + if chartype[i] == "combining_sign" then + table.insert(curr_syl, c[i]) + progress = "coda_combining" + elseif chartype[i] == "sign" or chartype[i] == "terminating_sign" then + table.insert(curr_syl, c[i]) + else + table.insert(syl, table.concat(curr_syl, "")) + curr_syl = {} + if chartype[i] == "consonant" then + table.insert(curr_syl, c[i]) + progress = "initial" + else + table.insert(syl, c[i]) + progress = "none" + end + end + elseif progress == "coda_combining" then + if chartype[i] == "consonant" then + table.insert(curr_syl, c[i]) + progress = "coda" + else + table.insert(syl, table.concat(curr_syl, "")) + curr_syl = {} + progress = "none" + end + end + end + + for i = 1, #syl do + if match(syl[i], "៍") then + syl[i] = "" .. gsub(syl[i], ".", function(consonant) if cons_conv[consonant] then return cons_conv[consonant][1] end end) .. "" + break + end + syl[i] = gsub(syl[i], "់$", "") + + syl[i] = gsub(syl[i], "^([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])្?([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]?)([៉៊]?)([ិីឹឺុូួើឿៀេែៃោៅា័]?[ំះៈ]?)([៉៊]?)([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]?៉?)្?([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]?)(៖?)$", function(initial_a, initial_b, cons_shifter_a, vowel, cons_shifter_b, coda_a, coda_b, optional_sign) + if cons_shifter_a .. cons_shifter_b .. vowel .. coda_a .. coda_b == "" and initial_b ~= "" and not match(syl[i], "្") then + coda_a = initial_b + initial_b = "" + end + base = initial_a + if initial_b ~= "" and not match(initial_b, "[ងញនមយរលវ]") then base = initial_b end + if vowel .. coda_a .. coda_b == "ាំង" then vowel, coda_a, coda_b = "ាំង", "", "" end + optional_sign = gsub(optional_sign, "៖", "ː") + + cons_shifter = cons_shifter_a .. cons_shifter_b + if cons_shifter == "" and cons_conv[base] then + vowel_class = cons_conv[base][2] + elseif cons_shifter == "៉" then + vowel_class = "a" + elseif cons_shifter == "៊" then + vowel_class = "o" + else + return initial_a .. initial_b .. cons_shifter .. vowel .. coda_a .. coda_b .. optional_sign + end + + if digraph[initial_a .. "្" .. initial_b] and (digraph[coda_a .. "្" .. coda_b] or (cons_conv[coda_a] and cons_conv[coda_b])) and vowel_conv[vowel] then + return digraph[initial_a .. "្" .. initial_b] .. vowel_conv[vowel][vowel_class] .. (digraph[coda_a .. "្" .. coda_b] or cons_conv[coda_a][1] .. cons_conv[coda_b][1]) .. optional_sign + + elseif cons_conv[initial_a] and cons_conv[initial_b] and vowel_conv[vowel] and cons_conv[coda_a] and cons_conv[coda_b] then + return cons_conv[initial_a][1] .. cons_conv[initial_b][1] .. vowel_conv[vowel][vowel_class] .. cons_conv[coda_a][1] .. cons_conv[coda_b][1] .. optional_sign + end + end) + + if syl[i] == "ៗ" and i > 1 then syl[i] = syl[i - 1] end + end + word = table.concat(syl, "") + text = gsub(text, original_text, word) + end + + text = gsub(text, ".", indep_vowel) + text = gsub(text, "([^ ]*) ៗ", "%1 %1") + + if match(text, "[ក-៹]") and not debug_mode then + return nil + else + return text + end + + -- To do: other signs +end + +return export diff --git a/wiktra/wikt/translit/kmr-translit.lua b/wiktra/wikt/translit/kmr-translit.lua new file mode 100644 index 0000000..3b2f3a9 --- /dev/null +++ b/wiktra/wikt/translit/kmr-translit.lua @@ -0,0 +1,37 @@ +local export = {} + +local tt = {["а"] = "a", ["б"] = "b", ["щ"] = "c", ["ч"] = "ç", ["д"] = "d", ["ә"] = "e", ["е"] = "ê", ["э"] = "ê", ["ф"] = "f", ["г"] = "g", ["h"] = "h", ["ь"] = "i", ["и"] = "î", ["ж"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["ԛ"] = "q", ["р"] = "r", ["с"] = "s", ["ш"] = "ş", ["т"] = "t", ["ӧ"] = "u", ["у"] = "û", ["в"] = "v", ["ԝ"] = "w", ["х"] = "x", ["й"] = "y", ["з"] = "z", ["А"] = "A", ["Б"] = "B", ["Щ"] = "C", ["Ч"] = "Ç", ["Д"] = "D", ["Ә"] = "E", ["Е"] = "Ê", ["Э"] = "Ê", ["Ф"] = "F", ["Г"] = "G", ["H"] = "H", ["Ь"] = "I", ["И"] = "Î", ["Ж"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Ԛ"] = "Q", ["Р"] = "R", ["С"] = "S", ["Ш"] = "Ş", ["Т"] = "T", ["Ӧ"] = "U", ["У"] = "Û", ["В"] = "V", ["ԝ"] = "W", ["Х"] = "X", ["Й"] = "Y", ["З"] = "Z", ["г’"] = "ẍ", ["Г’"] = "Ẍ", ["h’"] = "ḧ", ["H’"] = "Ḧ"}; + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "[ГгHh]’", tt) + + text = mw.ustring.gsub(text, ".", tt) + + text = mw.ustring.gsub(text, "^([rR])’", "%1") -- р’ож: roj + text = mw.ustring.gsub(text, "([rR])’", "%1%1") -- пьр’: pirr + + local consonants = "bcçdfghḧjklmnpqrsştvwxẍzBCÇDFGHḦJKLMNPQRSŞTVWXẌZ" + local vowels = "aeêiîouûAEÊIÎOUÛ" + + -- handle ә’ + text = mw.ustring.gsub(text, "()([eE])’", function(pos, e) + if pos == 1 then -- ә’ at beginning of string + local after1, after2 = mw.ustring.match(text, "^(.?)(.?)", pos + 2) + if consonants:find(after1) and (after2 == "" -- followed by single consonant: ә’к → 'ek + or consonants:find(after2)) then -- followed by two consonants: ә’скәр → 'esker + return "'" .. e + end + else + local before = mw.ustring.sub(text, pos - 1, pos - 1) + if vowels:find(before) then -- preceded by vowel: щьмаә’т → cima'et + return "'" .. e + end + end + + return e .. "'" + end) + + return text +end + +return export diff --git a/wiktra/wikt/translit/kn-translit.lua b/wiktra/wikt/translit/kn-translit.lua new file mode 100644 index 0000000..aed5826 --- /dev/null +++ b/wiktra/wikt/translit/kn-translit.lua @@ -0,0 +1,69 @@ +local export = {} + +local consonants = {["ಕ"] = "k", ["ಖ"] = "kh", ["ಗ"] = "g", ["ಘ"] = "gh", ["ಙ"] = "ṅ", ["ಚ"] = "c", ["ಛ"] = "ch", ["ಜ"] = "j", ["ಝ"] = "jh", ["ಞ"] = "ñ", ["ಟ"] = "ṭ", ["ಠ"] = "ṭh", ["ಡ"] = "ḍ", ["ಢ"] = "ḍh", ["ಣ"] = "ṇ", ["ತ"] = "t", ["ಥ"] = "th", ["ದ"] = "d", ["ಧ"] = "dh", ["ನ"] = "n", ["ಪ"] = "p", ["ಫ"] = "ph", ["ಬ"] = "b", ["ಭ"] = "bh", ["ಮ"] = "m", ["ಯ"] = "y", ["ರ"] = "r", ["ಱ"] = "ṟ", ["ಲ"] = "l", ["ವ"] = "v", ["ಶ"] = "ś", ["ಷ"] = "ṣ", ["ಸ"] = "s", ["ಹ"] = "h", ["ಳ"] = "ḷ", ["ೞ"] = "ḻ", ["ಫ಼"] = "f", ["ಜ಼"] = "z", ["ಳ಼"] = "zh"} + +local diacritics = {["ಾ"] = "ā", ["ಿ"] = "i", ["ೀ"] = "ī", ["ು"] = "u", ["ೂ"] = "ū", ["ೃ"] = "ṛ", ["ೄ"] = "r̥̄", ["ೆ"] = "e", ["ೇ"] = "ē", ["ೈ"] = "ai", ["ೊ"] = "o", ["ೋ"] = "ō", ["ೌ"] = "au"} + +local nonconsonants = { + -- vowels + ["ಅ"] = "a", + ["ಆ"] = "ā", + ["ಇ"] = "i", + ["ಈ"] = "ī", + ["ಉ"] = "u", + ["ಊ"] = "ū", + ["ಋ"] = "ṛ", + ["ೠ"] = "r̥̄", + ["ಌ"] = "l̥", + ["ೡ"] = "l̥̄", + ["ಎ"] = "e", + ["ಏ"] = "ē", + ["ಐ"] = "ai", + ["ಒ"] = "o", + ["ಓ"] = "ō", + ["ಔ"] = "au", + ["ಅಂ"] = "aṃ", + ["ಅಃ"] = "ah", + -- other symbols + ["ಂ"] = "ṃ", -- anusvara + ["ಃ"] = "ḥ", -- visarga + -- halant, supresses the inherent vowel "a" + ["್"] = "", + -- digits + ["೦"] = "0", + ["೧"] = "1", + ["೨"] = "2", + ["೩"] = "3", + ["೪"] = "4", + ["೫"] = "5", + ["೬"] = "6", + ["೭"] = "7", + ["೮"] = "8", + ["೯"] = "9" +} + +-- translit any words or phrases +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([ಕಖಗಘಙಚಛಜಝಞಟಠಡಢಣತಥದಧನಪಫಬಭಮಯರಱಲವಶಷಸಹಳೞಕಖ][಼]?)" .. "([ಾಿೀುೂೃೆೇೈೊೋೌ್]?)", function(c, d) + -- mw.log('match', c, d) + c = consonants[c] or c + if d == "" then + return c .. "a" + else + return c .. (diacritics[d] or d) + end + end) + + text = mw.ustring.gsub(text, ".", nonconsonants) + + -- anusvara + text = mw.ustring.gsub(text, "ṃ([kgṅ])", "ṅ%1") + text = mw.ustring.gsub(text, "ṃ([cjñ])", "ñ%1") + text = mw.ustring.gsub(text, "ṃ([ṭḍṇ])", "ṇ%1") + text = mw.ustring.gsub(text, "ṃ([tdn])", "n%1") + text = mw.ustring.gsub(text, "ṃ([pbm])", "m%1") + + return text +end + +return export diff --git a/wiktra/wikt/translit/krc-translit.lua b/wiktra/wikt/translit/krc-translit.lua new file mode 100644 index 0000000..59035e8 --- /dev/null +++ b/wiktra/wikt/translit/krc-translit.lua @@ -0,0 +1,60 @@ +local export = {} + +local digraphs = {["гь"] = "h", ["Гь"] = "H", ["гъ"] = "ğ", ["Гъ"] = "Ğ", ["уь"] = "ü", ["Уь"] = "Ü", ["юь"] = "yü", ["Юь"] = "Yü", ["дж"] = "c", ["Дж"] = "C", ["къ"] = "q", ["Къ"] = "Q", ["нг"] = "ñ", ["Нг"] = "Ñ", ["оь"] = "ö", ["Оь"] = "Ö", ["ёь"] = "yö", ["Ёь"] = "Yö", ["нъ"] = "ñ", ["Нъ"] = "Ñ"} + +local single_letters = {["а"] = "a", ["А"] = "A", ["б"] = "b", ["Б"] = "B", ["в"] = "v", ["В"] = "V", ["г"] = "g", ["Г"] = "G", ["д"] = "d", ["Д"] = "D", ["е"] = "e", ["Е"] = "E", ["ё"] = "yo", ["Ё"] = "Yo", ["ж"] = "j", ["Ж"] = "J", ["з"] = "z", ["З"] = "Z", ["и"] = "i", ["И"] = "İ", ["й"] = "y", ["Й"] = "Y", ["к"] = "k", ["К"] = "K", ["л"] = "l", ["Л"] = "L", ["м"] = "m", ["М"] = "M", ["н"] = "n", ["Н"] = "N", ["о"] = "o", ["О"] = "O", ["п"] = "p", ["П"] = "P", ["р"] = "r", ["Р"] = "R", ["с"] = "s", ["С"] = "S", ["т"] = "t", ["Т"] = "T", ["у"] = "u", ["У"] = "U", ["ф"] = "f", ["Ф"] = "F", ["х"] = "x", ["Х"] = "X", ["ц"] = "ts", ["Ц"] = "Ts", ["ч"] = "ç", ["Ч"] = "Č", ["ш"] = "ş", ["Ш"] = "Ş", ["щ"] = "şç", ["Щ"] = "Şç", ["ъ"] = "", ["Ъ"] = "", ["ы"] = "ı", ["Ы"] = "I", ["ь"] = "ʹ", ["Ь"] = "ʹ", ["э"] = "e", ["Э"] = "E", ["ю"] = "yu", ["Ю"] = "Yu", ["я"] = "ya", ["Я"] = "Ya"} + +function export.tr(text, lang, sc) + for digraph, replacement in pairs(digraphs) do text = mw.ustring.gsub(text, digraph, replacement) end + + text = mw.ustring.gsub(text, "()([ЕеЮюЁё])", function(pos, iotated) + -- modifier letter apostrophe or right single quotation mark + local preceding = mw.ustring.sub(text, math.max(1, pos - 2), math.max(0, pos - 1)) + local capital = iotated == "Е" or iotated == "Ю" + local lower = mw.ustring.lower(iotated) + + local translit + if preceding == "" or mw.ustring.match(preceding, "[АОӨӘУЫЕЯЁЮИЕаоөәуыэяёюиеъь%A][́̀]?$") then + if capital then + if lower == "ю" then + return "Yu" + elseif lower == "ё" then + return "Yo" + else + return "Ye" + end + else + if lower == "ю" then + return "yu" + elseif lower == "ё" then + return "yo" + else + return "ye" + end + end + else + if capital then + if lower == "ю" then + return "Ü" + elseif lower == "ё" then + return "Ö" + else + return "E" + end + else + if lower == "ю" then + return "ü" + elseif lower == "ё" then + return "ö" + else + return "e" + end + end + end + return translit + end) + + return (mw.ustring.gsub(text, ".", single_letters)) +end + +return export diff --git a/wiktra/wikt/translit/ks-Arab-to-Deva-translit.lua b/wiktra/wikt/translit/ks-Arab-to-Deva-translit.lua new file mode 100644 index 0000000..a3654ad --- /dev/null +++ b/wiktra/wikt/translit/ks-Arab-to-Deva-translit.lua @@ -0,0 +1,211 @@ +-- TODO: long í support +-- TODO: sort out short e vs palatalisation +-- TODO: add rule for CẹC = CyaC +local u = mw.ustring.char +local gsub = mw.ustring.gsub + +local export = {} + +local vav = u(0x0648) +local ye = u(0x06CC) +local alif = u(0x0627) +local he = "ه" + +local vw_s_cfu = u(0x0650) -- pesh (i) +local vw_s_ccu = u(0x0655) -- hamza below +local vw_s_cbr = u(0x064F) -- zer (u) +local vw_s_mcu = u(0x0654) -- hamza above +local vw_s_ocu = u(0x064E) -- zabar (a) + +local vw_l_cbr = u(0x0657) -- inverted zer +local vw_l_cfu = u(0x0656) -- subscript alif + +local hat = u(0x065A) +local inverted_hat = u(0x065B) +local hats = hat .. inverted_hat + +local short_vowels_list = vw_s_cfu .. vw_s_ccu .. vw_s_cbr .. vw_s_mcu .. vw_s_ocu + +-- carrier + diacritic combos +local long_u = vav .. vw_l_cbr +local short_o = vav .. inverted_hat +local long_i = ye .. vw_l_cfu +local short_e = ye .. inverted_hat + +local vocalised_carrier = long_u .. short_o .. long_i .. short_e .. "ۆ" .. "ێ" .. "ۆ" + +local palatalisers = "ۍؠ" + +local consonants = "بپتٹجچدڈرڑزژسشفکگلمنهھےثحخذصضطظعغقۍۄیٲآ" +local consonants_no_conj = "بپتٹجچدڈرڑزژسشفکگلمنهےثحخذصضطظعغق" +local consonants_no_conj_r = "بپتٹجچدڈڑزژسشفکگلمنهےثحخذصضطظعغق" +local consonants_no_conj_n = "بپتٹجچدڈرڑزژسشفکگلمهےثحخذصضطظعغق" +local consonants_no_conj_r_n = "بپتٹجچدڈڑزژسشفکگلمهےثحخذصضطظعغق" + +local conv = { + -- consonants + ["ب"] = "b", + ["پ"] = "p", + ["ت"] = "t", + ["ٹ"] = "ṭ", + ["ث"] = "s", + ["ج"] = "j", + ["چ"] = "c", + ["ح"] = "h", + ["خ‬"] = "x", + ["د"] = "d", + ["ڈ"] = "ḍ", + ["ذ‬"] = "z", + ["ر"] = "r", + ["ڑ"] = "ṛ", + ["ز"] = "z", + ["ژ"] = "ć", + ["س"] = "s", + ["ش"] = "ś", + ["ص‬"] = "s", + ["ض‬"] = "z", + ["ط‬"] = "t", + ["ظ"] = "z", + ["ع"] = "ʿ", + ["غ"] = "ġ", + ["ف"] = "f", + ["ق"] = "q", + ["ک"] = "k", + ["گ"] = "g", + ["ل"] = "l", + ["م"] = "m", + ["ن"] = "n", + ["ه"] = "h", + ["ھ"] = "h", + + -- always word-final + ["ے"] = "y", + + -- incorrect palatalisation marker + ["ۍ"] = "'", + + -- broken/open vowels + ["ۄ"] = "ọ", + ["ؠ"] = "ẹ", -- optionally ẹ = ya + + -- a carries long vowels + ["ٲ"] = "ạ̄", + ["آ"] = "ā", + + -- short vowels + ["ۆ"] = "o", + ["ۆ"] = "o", + ["ێ"] = "e", + ["أ"] = "ạ", + ["إ"] = "ụ", + + -- numerals + ["۰"] = "0", + ["۱"] = "1", + ["۲"] = "2", + ["۳"] = "3", + ["۴"] = "4", + ["۵"] = "5", + ["۶"] = "6", + ["۷"] = "7", + ["۸"] = "8", + ["۹"] = "9" +} + +local short_vowels = { + -- high vowels + [u(0x0650)] = "i", + [u(0x0655)] = "ụ", + [u(0x064F)] = "u", + [u(0x065F)] = "ụ̄", + + -- central vowels + [u(0x0654)] = "ạ", + + -- low vowels + [u(0x064E)] = "a" +} + +local alif = "ا" +local waw = "و" +local ye = "ی" + +-- Devanagari begins here + +local p1 = {["kh"] = "ख्", ["gh"] = "घ्", ["ch"] = "छ्", ["ćh"] = "छ़्", ["jh"] = "झ्", ["ṭh"] = "ठ्", ["ḍh"] = "ढ्", ["th"] = "थ्", ["dh"] = "ध्", ["ph"] = "फ्", ["bh"] = "भ्"} + +local p2 = {["k"] = "क्", ["g"] = "ग्", ["c"] = "च्", ["ć"] = "च़्", ["j"] = "ज्", ["z"] = "ज़्", ["ṭ"] = "ट्", ["ḍ"] = "ड्", ["t"] = "त्", ["d"] = "द्", ["n"] = "न्", ["p"] = "प्", ["b"] = "ब्", ["m"] = "म्", ["r"] = "र्", ["y"] = "य्", ["'"] = "य्", ["l"] = "ल्", ["ś"] = "श", ["s"] = "स्", ["v"] = "व्", ["0"] = "०", ["1"] = "१", ["2"] = "२", ["3"] = "३", ["4"] = "४", ["5"] = "५", ["6"] = "६", ["7"] = "७", ["8"] = "८", ["9"] = "९"} + +local vowels_non_initial = {["a"] = "", ["ā"] = "ा", ["ọ"] = "ॅ", ["ọ̄"] = "ॉ", ["e"] = "ॆ", ["ē"] = "े", ["ạ"] = "ऺ", ["ạ̄"] = "ऻ", ["i"] = "ि", ["ī"] = "ी", ["ụ"] = "ॖ", ["ụ̄"] = "ॗ", ["u"] = "ु", ["ū"] = "ू", ["o"] = "ॊ", ["ō"] = "ो"} + +local vowels_initial = {["a"] = "अ", ["ā"] = "आ", ["ọ"] = "अॅ", ["ọ̄"] = "ऑ", ["e"] = "ए'", ["ē"] = "ए", ["ạ"] = "अ'", ["ạ̄"] = "आ'", ["i"] = "इ", ["ī"] = "ई", ["ụ"] = "उ'", ["ụ̄"] = "ऊ'", ["u"] = "उ", ["ū"] = "ऊ", ["o"] = "ओ'", ["ō"] = "ओ"} + +function export.tr(text, lang, sc) + text = gsub(text, "([" .. palatalisers .. "]%f[%s%z])", "'") + + -- interconsonantal vav is a long ō sound + text = gsub(text, "([" .. consonants .. "]ھ?)" .. vav .. "([" .. consonants .. "])", "%1ō%2") + + -- intervocalic alif is a long a sound + text = gsub(text, "([" .. consonants .. "و " .. "])" .. alif .. "([" .. consonants .. "و" .. "])", "%1ā%2") + -- long /u:/ and /i:/ + text = gsub(text, vav .. vw_s_cbr .. vav .. "([" .. consonants .. "])", vav .. "ū%1") + text = gsub(text, "([" .. consonants .. "])" .. vw_s_cfu .. ye .. "([" .. consonants .. "])", "%1ī%2") + + -- vav with hat = short o + text = gsub(text, vav .. hats, "o") + + -- vav with short vowel + text = gsub(text, vav .. "([" .. short_vowels_list .. "])", function(c) return "v" .. short_vowels[c] end) + + -- final he + short vowel disregards the he and transliterates the vowel + text = gsub(text, "ہ([" .. short_vowels_list .. "])", short_vowels) + + -- word-initial alif + vowelled carrier drops the alif + text = gsub(text, "^" .. alif .. "([" .. vocalised_carrier .. "])", "%1") + + -- word-initial alif + short vowel diacritic drops the alif + text = gsub(text, "^" .. alif .. "([" .. short_vowels_list .. "])", "%1") + + -- nun or re with hat + -- TODO: add support for re + text = gsub(text, "ن" .. "([" .. hats .. "])", "n") + + -- ye with hat = short e + text = gsub(text, ye .. "([" .. hats .. "])", "e") + + -- vav with inverted pish = long u + text = gsub(text, long_u, "ū") + + -- intervocalic ye is a long a sound + text = gsub(text, "([" .. consonants .. "])" .. ye .. "([" .. consonants .. "])", "%1ē%2") + + -- word-final alif and ye + text = gsub(text, "([" .. consonants .. "])" .. ye .. "$", "%1ī") + text = gsub(text, "([" .. consonants .. "])" .. alif .. "$", "%1ā") + + -- regard the consonant + short vowel combinations throughout + text = gsub(text, ".", short_vowels) + + text = gsub(text, "[أإبپتٹجچدڈرڑزژسشفکگلمنهھےثحخذصضطظعغقۍۄؠٲآۆۆێ]", conv) + + -- normal consonants left over + text = gsub(text, vav, "v") + text = gsub(text, "ہ", "h") + text = gsub(text, "ی", "y") + + -- Turn everything into Devanagari + text = gsub(text, ".", p1) + text = gsub(text, ".", p2) + + text = gsub(text, "[n][g]", "") + + -- introduce schwa and long /waa/ sound + text = gsub(text, "^[aāạạ̄eēoōiīuūụụ̄ọọ̄]", vowels_initial) + + text = gsub(text, "aāạạ̄eēoōiīuūụụ̄ọọ̄", vowels_non_initial) + + return text +end + +return export diff --git a/wiktra/wikt/translit/ks-Arab-translit.lua b/wiktra/wikt/translit/ks-Arab-translit.lua new file mode 100644 index 0000000..302d6eb --- /dev/null +++ b/wiktra/wikt/translit/ks-Arab-translit.lua @@ -0,0 +1,213 @@ +-- TODO: long í support +-- TODO: sort out short e vs palatalisation +-- TODO: add rule for CẹC = CyaC +local u = mw.ustring.char +local gsub = mw.ustring.gsub + +local export = {} + +local vav = u(0x0648) +local ye = u(0x06CC) +local alif = u(0x0627) +local he = "ه" + +local re = "ر" + +local vw_s_cfu = u(0x0650) -- pesh (i) +local vw_s_ccu = u(0x0655) -- hamza below +local vw_s_cbr = u(0x064F) -- zer (u) +local vw_s_mcu = u(0x0654) -- hamza above +local vw_s_ocu = u(0x064E) -- zabar (a) + +local vw_l_cbr = u(0x0657) -- inverted zer +local vw_l_cfu = u(0x0656) -- subscript alif + +local hat = u(0x065A) -- V +local inverted_hat = u(0x065B) -- inverted V +local hats = hat .. inverted_hat + +local short_vowels_list = vw_s_cfu .. vw_s_ccu .. vw_s_cbr .. vw_s_mcu .. vw_s_ocu + +-- carrier + diacritic combos +local long_u = vav .. vw_l_cbr +local short_o = vav .. inverted_hat +local long_i = ye .. vw_l_cfu +local short_e = ye .. inverted_hat + +local vocalised_carrier = long_u .. short_o .. long_i .. short_e .. "ۆ" .. "ێ" .. "ۆ" .. "اٟ" +local palatalisers = "ۍؠ" + +local consonants = "بپتٹجچدڈرڑزژسشفکگلمنهھےثحخذصضطظعغقۍۄیٲآ" +local consonants_latn = "bptṭsjchxdḍzrċsśzʿġfqkglmnhv" + +local conv = { + -- consonants + ["ب"] = "b", + ["پ"] = "p", + ["ت"] = "t", + ["ٹ"] = "ṭ", + ["ث"] = "s", + ["ج"] = "j", + ["چ"] = "c", + ["ح"] = "h", + ["خ‬"] = "x", + ["د"] = "d", + ["ڈ"] = "ḍ", + ["ذ‬"] = "z", + ["ر"] = "r", + ["ڑ"] = "ḍ", + ["ز"] = "z", + ["ژ"] = "ċ", + ["س"] = "s", + ["ش"] = "ś", + ["ص‬"] = "s", + ["ض‬"] = "z", + ["ط‬"] = "t", + ["ظ"] = "z", + ["ع"] = "ʿ", + ["غ"] = "ġ", + ["ف"] = "f", + ["ق"] = "q", + ["ک"] = "k", + ["گ"] = "g", + ["ل"] = "l", + ["م"] = "m", + ["ن"] = "n", + ["ه"] = "h", + ["ھ"] = "h", + + -- Why is this separate? + ["خ"] = "kh", + + -- always word-final + ["ے"] = "ē", + + -- short e to be treated separately + + -- incorrect palatalisation marker + ["ۍ"] = "'", + + -- broken/open vowels + -- confirm if there are other use cases for these two + ["ۄ"] = "ọ", + ["ؠ"] = "ẹ", -- optionally ẹ = ya or used at the end to indicate palatalisation + + -- a carries long vowels + ["ٲ"] = "ạ̄", + ["آ"] = "ā", + + ["یٖ"] = "ī", + ["اٟ"] = "ụ̄", + + -- vowels + ["ۆ"] = "o", + ["ێ"] = "e", + ["أ"] = "ạ", + + -- numerals + ["۰"] = "0", + ["۱"] = "1", + ["۲"] = "2", + ["۳"] = "3", + ["۴"] = "4", + ["۵"] = "5", + ["۶"] = "6", + ["۷"] = "7", + ["۸"] = "8", + ["۹"] = "9" +} + +local short_vowels = { + -- high vowels + [u(0x0650)] = "i", + [u(0x0655)] = "ụ", + [u(0x064F)] = "u", + [u(0x065F)] = "ụ̄", + + -- central vowels + [u(0x0654)] = "ạ", + + -- low vowels + [u(0x064E)] = "a" +} + +local alif = "ا" +local waw = "و" +local ye = "ی" + +function export.tr(text, lang, sc) + text = gsub(text, "([" .. palatalisers .. "]%f[%s%z])", "'") + + -- short e at the end of words is ē with V sign + text = gsub(text, "ے" .. hat, "e") + + -- ye with inverted hat is /j/ + -- always occurs after a consonant + text = gsub(text, consonants .. ye .. inverted_hat, "%1y") + + -- interconsonantal vav is a long ō sound + text = gsub(text, "([" .. consonants .. "]ھ?)" .. vav .. "([" .. consonants .. "])", "%1ō%2") + + -- intervocalic alif is a long a sound + text = gsub(text, "([" .. consonants .. "و " .. "])" .. alif .. "([" .. consonants .. "و" .. "])", "%1ā%2") + + -- final he + short vowel disregards the he and transliterates the vowel + text = gsub(text, "ہ([" .. short_vowels_list .. "])", short_vowels) + + -- word-initial alif + vowelled carrier drops the alif + text = gsub(text, "^" .. alif .. "([" .. vocalised_carrier .. "])", "%1") + + -- word-initial alif + short vowel diacritic drops the alif + text = gsub(text, "^" .. alif .. "([" .. short_vowels_list .. "])", "%1") + + -- re with inverted hat is just re + text = gsub(text, re .. inverted_hat, re) + + -- long /u:/ and /i:/ + text = gsub(text, vav .. vw_s_cbr .. vav .. "([" .. consonants .. "])", vav .. "ū%1") + text = gsub(text, "([" .. consonants .. "])" .. vw_s_cfu .. ye .. "([" .. consonants .. "])", "%1ī%2") + + -- vav with hat = short o + text = gsub(text, vav .. "[" .. hats .. "]", "o") + + -- vav with short vowel + text = gsub(text, vav .. "([" .. short_vowels_list .. "])", function(c) return "v" .. short_vowels[c] end) + + -- nun or re with hat + -- TODO: add support for re -- done? + text = gsub(text, "ن" .. "[" .. hats .. "]", "n") + + -- ye with hat = short e + -- defunct? + text = gsub(text, ye .. "[" .. hats .. "]", "e") + + -- vav with inverted pish = long u + text = gsub(text, long_u, "ū") + + -- long i + text = gsub(text, ye .. vw_l_cfu, "ī") + + -- intervocalic ye is a long a sound + text = gsub(text, "([" .. consonants .. "])" .. ye .. "([" .. consonants .. "])", "%1ē%2") + + -- word-final alif and ye + text = gsub(text, "([" .. consonants .. "])" .. ye .. "$", "%1ī") + text = gsub(text, "([" .. consonants .. "])" .. alif .. "$", "%1ā") + + -- regard the consonant + short vowel combinations throughout + text = gsub(text, ".", short_vowels) + + text = gsub(text, "[أإبپتٹجچدڈرڑزژسشفکگلمنهھےثحخذصضطظعغقۍۄؠٲآۆۆێ]", conv) + + -- normal consonants left over + text = gsub(text, vav, "v") + text = gsub(text, "ہ", "h") + text = gsub(text, "ی", "y") + + -- CẹC = CyaC + text = gsub(text, "([" .. consonants_latn .. "])ẹ([" .. consonants_latn .. "])", "%1ya%2") + + return text +end + +return export diff --git a/wiktra/wikt/translit/ks-Deva-translit.lua b/wiktra/wikt/translit/ks-Deva-translit.lua new file mode 100644 index 0000000..17e2c89 --- /dev/null +++ b/wiktra/wikt/translit/ks-Deva-translit.lua @@ -0,0 +1,69 @@ +local export = {} + +local consonants = {["क"] = "k", ["ख"] = "kh", ["ग"] = "g", ["ङ"] = "ṅ", ["च"] = "c", ["छ"] = "ch", ["ज"] = "j", ["ञ"] = "ñ", ["ट"] = "ṭ", ["ठ"] = "ṭh", ["ड"] = "ḍ", ["ण"] = "ṇ", ["त"] = "t", ["थ"] = "th", ["द"] = "d", ["न"] = "n", ["प"] = "p", ["फ"] = "ph", ["ब"] = "b", ["म"] = "m", ["य"] = "y", ["र"] = "r", ["ल"] = "l", ["व"] = "w", ["श"] = "ś", ["ष"] = "ṣ", ["स"] = "s", ["ह"] = "h", ["च़"] = "ts", ["छ़"] = "ċh", ["ज़"] = "z", ["फ़"] = "f"} + +local diacritics = {["ा"] = "ā", ["ॅ"] = "ạ", ["ॉ"] = "ạ̄", ["ॖ"] = "ụ", ["ॗ"] = "ụ̄", ["ि"] = "i", ["ी"] = "ī", ["ु"] = "u", ["ू"] = "ū", ["ृ"] = "ṛ", ["ॆ"] = "e", ["े"] = "ē", ["ै"] = "ai", ["ॊ"] = "o", ["ो"] = "ō", ["ॏ"] = "ô", ["ौ"] = "au", ["्"] = "", ["ऺ"] = "ạ", ["ऻ"] = "ạ̄"} + +local tt = { + -- vowels + ["अ"] = "a", + ["आ"] = "ā", + ["ॲ"] = "ạ", + ["ऑ"] = "ạ̄", + ["ॶ"] = "ụ", + ["ॷ"] = "ụ̄", + ["इ"] = "i", + ["ई"] = "ī", + ["उ"] = "u", + ["ऊ"] = "ū", + ["ऋ"] = "ṛ", + ["ऎ"] = "e", + ["ए"] = "ē", + ["ऐ"] = "ai", + ["ऒ"] = "o", + ["ओ"] = "ō", + ["ॵ"] = "ô", + ["औ"] = "au", + ["ॳ"] = "ạ", + ["ॴ"] = "ạ̄", + -- chandrabindu + ["ँ"] = "m̐", -- until a better method is found + -- anusvara + ["ं"] = "ṃ", -- until a better method is found + -- visarga + ["ः"] = "ḥ", + -- avagraha + ["ऽ"] = "’", + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + -- punctuation + ["।"] = "." -- danda +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "(.[़]?.[़]?)$", "%1्") + text = mw.ustring.gsub(text, "([कखगघङचछजझञटठडढणतथदधनपफबभमयरलळवशषसह]़?)" .. -- tbd later + "([ािॅॉीुॖॗूृॄॢॣेैोौ्ॆॉॊौॏऺऻ]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + text = mw.ustring.gsub(text, "्$", "") + + return text +end + +return export diff --git a/wiktra/wikt/translit/ks-translit.lua b/wiktra/wikt/translit/ks-translit.lua new file mode 100644 index 0000000..cb0a801 --- /dev/null +++ b/wiktra/wikt/translit/ks-translit.lua @@ -0,0 +1,135 @@ +local export = {} + +local u = mw.ustring.char + +local conv = { + -- regular consonants + ["ب"] = "b", + ["پ"] = "p", + ["ت"] = "t", + ["ٹ"] = "ṭ", + ["ج"] = "j", + ["چ"] = "c", + ["د"] = "d", + ["ڈ"] = "ḍ", + ["ر"] = "r", + ["ڑ"] = "ḍ", + ["ز"] = "z", + ["ژ"] = "ts", + ["س"] = "s", + ["ش"] = "sh", + ["ف"] = "f", + ["ک"] = "k", + ["گ"] = "g", + ["ل"] = "l", + ["م"] = "m", + ["ن"] = "n", + ["ه"] = "h", + ["ھ"] = "h", + + -- always word-final + ["ے"] = "y", + + -- arabic specific letters + ["ث"] = "th", + ["ح"] = "ḥ", + ["ح"] = "ẖ", + ["ذ"] = "ḏ", + ["ص"] = "ṣ", + ["ض"] = "ḍ", + ["ط"] = "ṭ", + ["ظ"] = "ẓ", + ["ع"] = "ʿ", + ["غ"] = "ġ", + ["ق"] = "q", + + -- palatalisation + ["ۍ"] = "'", + + -- broken vowels + ["ۄ"] = "wa", + ["ؠ"] = "ya", + + -- a carries long vowels + ["ٲ"] = "èa", + ["آ"] = "aa", + + -- numerals + ["۰"] = "0", + ["۱"] = "1", + ["۲"] = "2", + ["۳"] = "3", + ["۴"] = "4", + ["۵"] = "5", + ["۶"] = "6", + ["۷"] = "7", + ["۸"] = "8", + ["۹"] = "9" +} + +local short_vowels = { + -- high vowels + [u(0x0650)] = "i", + [u(0x0655)] = "ì", + [u(0x064F)] = "u", + + -- central vowels + [u(0x0654)] = "è", + + -- low vowels + [u(0x064E)] = "a" +} + +local y_diacritics = { + -- /e/ + [u(0x065B)] = "", + + -- /i:/ + [u(0x0656)] = "" +} + +local w_diacritics = { + -- /o/ + [u(0x065B)] = "", + + -- /u:/ + [u(0x0657)] = "" +} + +local a_diacritics = { + -- long closed central vowel + [u(0x065F)] = "" +} + +local C_diacritics = { -- this is just the short vowel marker set +} + +local n_diacritics = { + -- /n/ nasalise preceding vowel, no following vowel + [u(0x065B)] = "" +} + +local r_diacritics = { + -- /r/ cancel preceding vowel + [u(0x065B)] = "" +} + +local alif = "ا" +local waw = "و" +local ye = "ی" + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([بپتٹجچدڈرڑزژسشفکگلمنهھےثححذصضطظعغقۍۄؠٲآيٖ]?)" .. "([short_vowels]?)", function(c, d) + if d == "" then + return conv[c] + else + return conv[c] .. short_vowels[d] + end + end) + + text = mw.ustring.gsub(text, ".", conv) + + return text +end + +return export diff --git a/wiktra/wikt/translit/ksw-translit.lua b/wiktra/wikt/translit/ksw-translit.lua new file mode 100644 index 0000000..5a082e8 --- /dev/null +++ b/wiktra/wikt/translit/ksw-translit.lua @@ -0,0 +1,91 @@ +local export = {} +local gsub = mw.ustring.gsub +local u = mw.ustring.char + +local tt1 = { + -- consonants + ["က"] = "kə", + ["ခ"] = "khə", + ["ဂ"] = "ghə", + ["ဃ"] = "xə", + ["င"] = "ngə", + ["စ"] = "sə", + ["ဆ"] = "hsə", + ["ၡ"] = "shə", + ["ည"] = "nyə", + ["တ"] = "tə", + ["ထ"] = "hṭə", + ["ဒ"] = "də", + ["န"] = "nə", + ["ပ"] = "pə", + ["ဖ"] = "hpə", + ["ဘ"] = "bə", + ["မ"] = "mə", + ["ယ"] = "yə", + ["ရ"] = "rə", + ["လ"] = "lə", + ["ဝ"] = "wə", + ["သ"] = "thə", + ["ဟ"] = "hə", + ["အ"] = "ʼə", + ["ဧ"] = "ahh ", + -- medials + ["ှ"] = "ˣhgə", + ["ၠ"] = "ˣyə", + ["ြ"] = "ˣrə", + ["ျ"] = "ˣlə", + ["ွ"] = "ˣwə", + -- dependent vowels + ["ါ"] = "ah ", + ["ံ"] = "ee ", + ["ု"] = "u ", + ["ူ"] = "oo ", + ["ၢ"] = "uh ", + ["့"] = "ay ", + ["ဲ"] = "eh ", + ["ိ"] = "oh ", + ["ီ"] = "aw ", + -- numerals + ["၀"] = "0", + ["၁"] = "1", + ["၂"] = "2", + ["၃"] = "3", + ["၄"] = "4", + ["၅"] = "5", + ["၆"] = "6", + ["၇"] = "7", + ["၈"] = "8", + ["၉"] = "9", + -- zero-width space (display it if it hides in a word) + [u(0x200B)] = "‼" +} + +local adjust0 = { + -- tones + ["ၢ်"] = u(0x0331), + ["ာ်"] = u(0x0323), + ["း"] = u(0x0308), + ["ၣ်"] = u(0x0300), + ["ၤ"] = u(0x0304) -- prolonged +} + +function export.tr(text, lang, sc, debug_mode) + + if type(text) == "table" then -- called directly from a template + text = text.args[1] + end + + for k, v in pairs(adjust0) do text = gsub(text, k, v) end + text = gsub(text, ".", tt1) + + text = gsub(text, "əˣ", "") + text = gsub(text, "ə([aeiou])", "%1") + text = gsub(text, "ə", "a ") + text = gsub(text, "%s([" .. u(0x0331) .. u(0x0323) .. u(0x0308) .. u(0x0300) .. u(0x0304) .. "])", "%1 ") + text = gsub(text, "(.-)%s*$", "%1") -- remove space at the end + + return text + +end + +return export diff --git a/wiktra/wikt/translit/kum-translit.lua b/wiktra/wikt/translit/kum-translit.lua new file mode 100644 index 0000000..cd4bc5e --- /dev/null +++ b/wiktra/wikt/translit/kum-translit.lua @@ -0,0 +1,60 @@ +local export = {} + +local digraphs = {["гь"] = "h", ["Гь"] = "H", ["гъ"] = "ğ", ["Гъ"] = "Ğ", ["уь"] = "ü", ["Уь"] = "Ü", ["къ"] = "q", ["Къ"] = "Q", ["нг"] = "ñ", ["Нг"] = "Ñ", ["оь"] = "ö", ["Оь"] = "Ö"} + +local single_letters = {["а"] = "a", ["А"] = "A", ["б"] = "b", ["Б"] = "B", ["в"] = "w", ["В"] = "w", ["г"] = "g", ["Г"] = "G", ["д"] = "d", ["Д"] = "D", ["е"] = "e", ["Е"] = "E", ["ё"] = "yo", ["Ё"] = "Yo", ["ж"] = "j", ["Ж"] = "J", ["з"] = "z", ["З"] = "Z", ["и"] = "i", ["И"] = "İ", ["й"] = "y", ["Й"] = "Y", ["к"] = "k", ["К"] = "K", ["л"] = "l", ["Л"] = "L", ["м"] = "m", ["М"] = "M", ["н"] = "n", ["Н"] = "N", ["о"] = "o", ["О"] = "O", ["п"] = "p", ["П"] = "P", ["р"] = "r", ["Р"] = "R", ["с"] = "s", ["С"] = "S", ["т"] = "t", ["Т"] = "T", ["у"] = "u", ["У"] = "U", ["ф"] = "f", ["Ф"] = "F", ["х"] = "x", ["Х"] = "X", ["ц"] = "ts", ["Ц"] = "Ts", ["ч"] = "ç", ["Ч"] = "Č", ["ш"] = "ş", ["Ш"] = "Ş", ["щ"] = "şç", ["Щ"] = "Şç", ["ъ"] = "", ["Ъ"] = "", ["ы"] = "ı", ["Ы"] = "I", ["ь"] = "ʹ", ["Ь"] = "ʹ", ["э"] = "e", ["Э"] = "E", ["ю"] = "yu", ["Ю"] = "Yu", ["я"] = "ya", ["Я"] = "Ya"} + +function export.tr(text, lang, sc) + for digraph, replacement in pairs(digraphs) do text = mw.ustring.gsub(text, digraph, replacement) end + + text = mw.ustring.gsub(text, "()([ЕеЮюЁё])", function(pos, iotated) + -- modifier letter apostrophe or right single quotation mark + local preceding = mw.ustring.sub(text, math.max(1, pos - 2), math.max(0, pos - 1)) + local capital = iotated == "Е" or iotated == "Ю" + local lower = mw.ustring.lower(iotated) + + local translit + if preceding == "" or mw.ustring.match(preceding, "[АОӨӘУЫЕЯЁЮИЕаоөәуыэяёюиеъь%A][́̀]?$") then + if capital then + if lower == "ю" then + return "Yu" + elseif lower == "ё" then + return "Yo" + else + return "Ye" + end + else + if lower == "ю" then + return "yu" + elseif lower == "ё" then + return "yo" + else + return "ye" + end + end + else + if capital then + if lower == "ю" then + return "Ü" + elseif lower == "ё" then + return "Ö" + else + return "E" + end + else + if lower == "ю" then + return "ü" + elseif lower == "ё" then + return "ö" + else + return "e" + end + end + end + return translit + end) + + return (mw.ustring.gsub(text, ".", single_letters)) +end + +return export diff --git a/wiktra/wikt/translit/kv-translit.lua b/wiktra/wikt/translit/kv-translit.lua new file mode 100644 index 0000000..684a917 --- /dev/null +++ b/wiktra/wikt/translit/kv-translit.lua @@ -0,0 +1,90 @@ +local export = {} + +local tab = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Jo", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["І"] = "Ï", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["Ӧ"] = "Ö", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "X", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Šč", + ["Ъ"] = "ʺ", + ["Ы"] = "Y", + ["Ь"] = "ʹ", + ["Э"] = "E", + ["Ю"] = "Ju", + ["Я"] = "Ja", + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "jo", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["і"] = "ï", + ["й"] = "j", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["ӧ"] = "ö", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ф"] = "f", + ["х"] = "x", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "šč", + ["ъ"] = "ʺ", + ["ы"] = "y", + ["ь"] = "ʹ", + ["э"] = "e", + ["ю"] = "ju", + ["я"] = "ja" +} + +function export.tr(text, lang, sc) + -- Ё needs converting if is decomposed + text = text:gsub("ё", "ё"):gsub("Ё", "Ё") + + -- е after a vowel or at the beginning of a word becomes je + text = mw.ustring.gsub(text, "([АОÖУЫЕЯЁЮИIЕЪЬаоöуыэяёюиiеъь%A][\204\129\204\128]?)е", "%1je") + text = mw.ustring.gsub(text, "^Е", "Je") + text = mw.ustring.gsub(text, "^е", "je") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е", "%1Je") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е", "%1je") + + return (mw.ustring.gsub(text, ".", tab)) +end + +return export diff --git a/wiktra/wikt/translit/ky-translit.lua b/wiktra/wikt/translit/ky-translit.lua new file mode 100644 index 0000000..c29555d --- /dev/null +++ b/wiktra/wikt/translit/ky-translit.lua @@ -0,0 +1,177 @@ +local export = {} + +local digraphs = { + ["ейка"] = "eyka", + ["Ейка"] = "Eyka", + ["эк"] = "ek", + ["Эк"] = "Ek", + ["фак"] = "fak", + ["Фак"] = "Fak", + ["конт"] = "kont", + ["Конт"] = "Kont", + ["кл"] = "kl", + ["Кл"] = "Kl", + ["ишк"] = "işk", + ["Ишк"] = "İşk", + ["кте"] = "kte", + ["Кте"] = "Kte", + ["кре"] = "kre", + ["Кре"] = "Kre", + ["гр"] = "gr", + ["Гр"] = "Gr", + ["үрк"] = "ürk", + ["Үрк"] = "Ürk", + ["эрк"] = "erk", + ["Эрк"] = "Erk", + ["ерк"] = "erk", + ["Ерк"] = "Erk", + ["ирк"] = "irk", + ["Ирк"] = "İrk", + ["көрк"] = "körk", + ["Көрк"] = "Körk", + ["күкү"] = "kükü", + ["Күкү"] = "Kükü", + ["коф"] = "kof", + ["Коф"] = "Kof", + ["курст"] = "kurst", + ["Курст"] = "Kurst", + ["ынок"] = "ınok", + ["Ынок"] = "Inok", + ["ганд"] = "gand", + ["Ганд"] = "Gand", + ["рока"] = "roka", + ["Рока"] = "Roka", + ["окси"] = "oksi", + ["Окси"] = "Oksi", + ["ика"] = "ika", + ["Ика"] = "İka", + ["гог"] = "gog", + ["Гог"] = "Gog", + ["ечка"] = "eçka", + ["Ечка"] = "Eçka", + ["очка"] = "oçka", + ["Очка"] = "Oçka", + ["гор"] = "gor", + ["Гор"] = "Gor", + ["фг"] = "fg", + ["Фг"] = "Fg", + ["ки"] = "ki", + ["Ки"] = "Ki", + ["ик"] = "ik", + ["Ик"] = "İk", + ["ке"] = "ke", + ["Ке"] = "Ke", + ["ек"] = "ek", + ["Ек"] = "Ek", + ["кү"] = "kü", + ["Кү"] = "Kü", + ["үк"] = "ük", + ["Үк"] = "Ük", + ["кө"] = "kö", + ["Кө"] = "Kö", + ["өк"] = "ök", + ["Өк"] = "Ök", + ["ги"] = "gi", + ["Ги"] = "Gi", + ["иг"] = "ig", + ["Иг"] = "İg", + ["ге"] = "ge", + ["Ге"] = "Ge", + ["ег"] = "eg", + ["Ег"] = "Eg", + ["гү"] = "gü", + ["Гү"] = "Gü", + ["үг"] = "üg", + ["Үг"] = "Üg", + ["гө"] = "gö", + ["Гө"] = "Gö", + ["өг"] = "ög", + ["Өг"] = "Ög" +} + +local tt = { + ["ү"] = "ü", + ["Ү"] = "Ü", + ["т"] = "t", + ["Т"] = "T", + ["р"] = "r", + ["Р"] = "R", + ["ф"] = "f", + ["Ф"] = "F", + ["ө"] = "ö", + ["Ө"] = "Ö", + ["ю"] = "yu", + ["Ю"] = "Yu", + ["ш"] = "ş", + ["Ш"] = "Ş", + ["ь"] = "ʹ", + ["Ь"] = "ʹ", + ["ъ"] = "ʺ", + ["Ъ"] = "ʺ", + ["н"] = "n", + ["Н"] = "N", + ["п"] = "p", + ["П"] = "P", + ["й"] = "y", + ["Й"] = "Y", + ["л"] = "l", + ["Л"] = "L", + ["з"] = "z", + ["З"] = "Z", + ["е"] = "e", + ["Е"] = "E", + ["г"] = "ğ", + ["Г"] = "Ğ", + ["б"] = "b", + ["Б"] = "B", + ["у"] = "u", + ["У"] = "U", + ["с"] = "s", + ["С"] = "S", + ["х"] = "h", + ["Х"] = "H", + ["ч"] = "ç", + ["Ч"] = "Ç", + ["щ"] = "şç", + ["Щ"] = "Şç", + ["я"] = "ya", + ["Я"] = "Ya", + ["ы"] = "ı", + ["Ы"] = "I", + ["э"] = "e", + ["Э"] = "E", + ["м"] = "m", + ["М"] = "M", + ["о"] = "o", + ["О"] = "O", + ["и"] = "i", + ["И"] = "İ", + ["ё"] = "yo", + ["Ё"] = "Yo", + ["ж"] = "j", + ["Ж"] = "J", + ["к"] = "q", + ["К"] = "Q", + ["д"] = "d", + ["Д"] = "D", + ["в"] = "v", + ["В"] = "V", + ["ц"] = "ts", + ["Ц"] = "Ts", + ["а"] = "a", + ["А"] = "A", + ["ң"] = "ñ", + ["Ң"] = "Ñ" +}; + +function export.tr(text, lang, sc) + if sc == "Arab" then return nil end + + for digraph, replacement in pairs(digraphs) do text = mw.ustring.gsub(text, digraph, replacement) end + + text = mw.ustring.gsub(text, "([АОӨӘУЫЕЯЁЮИЕаоөәуыэяёюиеъь%A][́̀]?)([Ее])", function(a, e) return a .. (e == "е" and "ye" or "Ye") end):gsub("^Е", "Ye"):gsub("^е", "ye"); + + return (mw.ustring.gsub(text, ".", tt)) +end + +return export diff --git a/wiktra/wikt/translit/la-utilities.lua b/wiktra/wikt/translit/la-utilities.lua new file mode 100644 index 0000000..3644b60 --- /dev/null +++ b/wiktra/wikt/translit/la-utilities.lua @@ -0,0 +1,40 @@ +local m_links = require("links") + +local export = {} + +local lang = require("languages").getByCode("la") + +-- A wrapper function allowing the contents of this module to be called from +-- templates. For example, '{{#invoke:la-utilities|main|strip_macrons|mȳthos}}' +-- produces 'mythos'. +function export.main(frame) + if (frame.args[1] == "strip_macrons") then return lang:makeEntryName(frame.args[2]) end + if type(p[frame.args[1]]) == "function" then + return p[frame.args[1]](frame.args[2], frame.args[3]) + else + return p[frame.args[1]][frame.args[2]] + end +end + +function export.strip_macrons(frame_or_text) + if type(frame_or_text) == "table" then frame_or_text = frame_or_text.args[1] end + return lang:makeEntryName(frame_or_text) +end + +local patterns = {{"tūdō", "tūdin"}, {"is", ""}, {"ēs", ""}, {"āns", "ant"}, {"ēns", "ent"}, {"ōns", "ont"}, {"ceps", "cipit"}, {"us", "or"}, {"ex", "ic"}, {"ma", "mat"}, {"e", ""}, {"al", "āl"}, {"ar", "ār"}, {"men", "min"}, {"er", "r"}, {"or", "ōr"}, {"gō", "gin"}, {"ō", "ōn"}, {"ps", "p"}, {"bs", "b"}, {"s", "t"}, {"x", "c"}} + +function export.make_stem2(stem) + for _, pattern in ipairs(patterns) do + local key = pattern[1] + local val = pattern[2] + if mw.ustring.match(stem, key .. "$") then + stem = mw.ustring.gsub(stem, key .. "$", val) + require("debug").track("la-utilities/" .. key) + return stem + end + end + require("debug").track("la-utilities") + return stem +end + +return export diff --git a/wiktra/wikt/translit/language-like.lua b/wiktra/wikt/translit/language-like.lua new file mode 100644 index 0000000..70289b1 --- /dev/null +++ b/wiktra/wikt/translit/language-like.lua @@ -0,0 +1,135 @@ +local export = {} + +-- Implementation of getOtherNames() for languages, etymology languages, +-- families and scripts. If `onlyOtherNames` is passed in, only return +-- the names in the `otherNames` field, otherwise combine `otherNames`, +-- `aliases` and `varieties`. +function export.getOtherNames(self, onlyOtherNames) + local data + if self._extraData then + data = self._extraData + else + data = self._rawData + end + if onlyOtherNames then return data.otherNames or {} end + -- Combine otherNames, aliases and varieties. First try to optimize and not create any + -- new memory. This is possible if exactly one of the three exist, and if it's `varieties`, + -- there are no nested lists in `varieties`. + if data.otherNames and not data.aliases and not data.varieties then + return data.otherNames + elseif data.aliases and not data.otherNames and not data.varieties then + return data.aliases + elseif data.varieties and not data.otherNames and not data.aliases then + local saw_table = false + for _, name in ipairs(data.varieties) do + if type(name) == "table" then + saw_table = true + break + end + end + if not saw_table then return data.varieties end + end + + -- Have to do it the "hard way". + local ret = {} + if data.otherNames then for _, name in ipairs(data.otherNames) do table.insert(ret, name) end end + if data.aliases then for _, name in ipairs(data.aliases) do table.insert(ret, name) end end + if data.varieties then + for _, name in ipairs(data.varieties) do + if type(name) == "table" then + for _, n in ipairs(name) do table.insert(ret, n) end + else + table.insert(ret, name) + end + end + end + return ret +end + +-- Implementation of getVarieties() for languages, etymology languages, +-- families and scripts. If `flatten` is passed in, flatten down to a +-- list of stings; otherwise, keep the structure. +function export.getVarieties(self, flatten) + local data + if self._extraData then + data = self._extraData + else + data = self._rawData + end + if data.varieties then + -- If flattening not requested, just return them. + if not flatten then return data.varieties end + -- Check if no nested table; if so, just return the result. + local saw_table = false + for _, name in ipairs(data.varieties) do + if type(name) == "table" then + saw_table = true + break + end + end + if not saw_table then return data.varieties end + -- At this point, we need to flatten the varieties. + local ret = {} + for _, name in ipairs(data.varieties) do + if type(name) == "table" then + for _, n in ipairs(name) do table.insert(ret, n) end + else + table.insert(ret, name) + end + end + return ret + else + return {} + end +end + +-- Implementation of template-callable getByCode() function for languages, +-- etymology languages, families and scripts. `item` is the language, +-- family or script in question; `args` is the arguments passed in by the +-- module invocation; `extra_processing`, if specified, is a function of +-- one argument (the requested property) and should return the value to +-- be returned to the caller, or nil if the property isn't recognized. +-- `extra_processing` is called after special-cased properties are handled +-- and before general-purpose processing code that works for all string +-- properties. +function export.templateGetByCode(item, args, extra_processing) + -- The item that the caller wanted to look up + local itemname = args[2] or error("Function to call (parameter 2) has not been specified.") + local list + if itemname == "getOtherNames" then + list = item:getOtherNames() + elseif itemname == "getOnlyOtherNames" then + list = item:getOtherNames(true) + elseif itemname == "getAliases" then + list = item:getAliases() + elseif itemname == "getVarieties" then + list = item:getVarieties(true) + end + if list then + local index = args[3]; + if index == "" then index = nil end + index = tonumber(index or error("Numeric index of the desired item in the list (parameter 3) has not been specified.")) + return list[index] or "" + end + + if itemname == "getFamily" and item.getFamily then return item:getFamily():getCode() end + + if extra_processing then + local retval = extra_processing(itemname) + if retval then return retval end + end + + if item[itemname] then + local ret = item[itemname](item) + + if type(ret) == "string" then + return ret + else + error("The function \"" .. itemname .. "\" did not return a string value.") + end + end + + error("Requested invalid item name \"" .. itemname .. "\".") +end + +return export diff --git a/wiktra/wikt/translit/languages.lua b/wiktra/wikt/translit/languages.lua new file mode 100644 index 0000000..e201463 --- /dev/null +++ b/wiktra/wikt/translit/languages.lua @@ -0,0 +1,485 @@ +local export = {} + +--[=[ +Throw an error for an invalid language code or script code. + +`lang_code` (required) is the bad code and can be nil or a non-string. + +`param` (required) is the name of the parameter in which the code was contained. It can be a string, a number + (for a numeric param, in which case the param will show up in the error message as an ordinal such as + "first" or "second"), or `true` if no parameter can be clearly identified. + +`code_desc` (optional) is text describing what the code is; by default, "language code". + +`template_text` (optional) is a string specifying the template that generated the error, or a function + to generate this string. If given, it will be displayed in the error message. + +`not_real_lang` (optional), if given, indicates that the code is not in the form of a language code + (e.g. it's a script code). Normally, this function checks for things that could plausibly be a language code: + two or three lowercase letters, two or three groups of three lowercase letters with hyphens between them. + If such a pattern is found, a different error message is displayed (indicating an invalid code) than otherwise + (indicating a missing code). If `not_real_lang` is given, this check is suppressed. +]=] + +function export.err(lang_code, param, code_desc, template_tag, not_real_lang) + local ordinals = {"first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth", "tenth", "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth", "sixteenth", "seventeenth", "eighteenth", "nineteenth", "twentieth"} + + code_desc = code_desc or "language code" + + if not template_tag then + template_tag = "" + else + if type(template_tag) ~= "string" then template_tag = template_tag() end + template_tag = " (Original template: " .. template_tag .. ")" + end + local function err(msg) error(msg .. template_tag, 3) end + local param_type = type(param) + local in_the_param + if param == true then + -- handled specially below + in_the_param = "" + else + if param_type == "number" then + param = ordinals[param] .. " parameter" + elseif param_type == "string" then + param = "parameter \"" .. param .. "\"" + else + err("The parameter name is " .. (param_type == "table" and "a table" or tostring(param)) .. ", but it should be a number or a string.") + end + in_the_param = " in the " .. param + end + + if not lang_code or lang_code == "" then + if param == true then + err("The " .. code_desc .. " is missing.") + else + err("The " .. param .. " (" .. code_desc .. ") is missing.") + end + elseif type(lang_code) ~= "string" then + err("The " .. code_desc .. in_the_param .. " is supposed to be a string but is a " .. type(lang_code) .. ".") + -- Can use string.find because language codes only contain ASCII. + elseif not_real_lang or lang_code:find("^%l%l%l?$") or lang_code:find("^%l%l%l%-%l%l%l$") or lang_code:find("^%l%l%l%-%l%l%l%-%l%l%l$") then + err("The " .. code_desc .. " \"" .. lang_code .. "\"" .. in_the_param .. " is not valid.") + else + err("Please specify a " .. code_desc .. in_the_param .. ". The value \"" .. lang_code .. "\" is not valid.") + end +end + +local function do_entry_name_or_sort_key_replacements(text, replacements) + if replacements.from then + for i, from in ipairs(replacements.from) do + local to = replacements.to[i] or "" + text = mw.ustring.gsub(text, from, to) + end + end + + if replacements.remove_diacritics then + text = mw.ustring.toNFD(text) + text = mw.ustring.gsub(text, "[" .. replacements.remove_diacritics .. "]", "") + text = mw.ustring.toNFC(text) + end + + return text +end + +local Language = {} + +function Language:getCode() return self._code end + +function Language:getCanonicalName() return self._rawData[1] or self._rawData.canonicalName end + +function Language:getDisplayForm() return self:getCanonicalName() end + +function Language:getOtherNames(onlyOtherNames) + self:loadInExtraData() + return require("language-like").getOtherNames(self, onlyOtherNames) +end + +function Language:getAliases() + self:loadInExtraData() + return self._extraData.aliases or {} +end + +function Language:getVarieties(flatten) + self:loadInExtraData() + return require("language-like").getVarieties(self, flatten) +end + +function Language:getType() return self._rawData.type or "regular" end + +function Language:getWikimediaLanguages() + if not self._wikimediaLanguageObjects then + local m_wikimedia_languages = require("wikimedia languages") + self._wikimediaLanguageObjects = {} + local wikimedia_codes = self._rawData.wikimedia_codes or {self._code} + + for _, wlangcode in ipairs(wikimedia_codes) do table.insert(self._wikimediaLanguageObjects, m_wikimedia_languages.getByCode(wlangcode)) end + end + + return self._wikimediaLanguageObjects +end + +function Language:getWikipediaArticle() + if self._rawData.wikipedia_article then + return self._rawData.wikipedia_article + elseif self._wikipedia_article then + return self._wikipedia_article + elseif self:getWikidataItem() and mw.wikibase then + self._wikipedia_article = mw.wikibase.sitelink(self:getWikidataItem(), "enwiki") + end + if not self._wikipedia_article then self._wikipedia_article = mw.ustring.gsub(self:getCategoryName(), "Creole language", "Creole") end + return self._wikipedia_article +end + +function Language:makeWikipediaLink() return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]" end + +function Language:getWikidataItem() + local item = self._rawData[2] + + if type(item) == "number" then + return "Q" .. item + else + return item + end +end + +function Language:getScripts() + if not self._scriptObjects then + local m_scripts = require("scripts") + self._scriptObjects = {} + + for _, sc in ipairs(self:getScriptCodes()) do table.insert(self._scriptObjects, m_scripts.getByCode(sc)) end + end + + return self._scriptObjects +end + +function Language:getScriptCodes() return self._rawData.scripts or self._rawData[4] or {"None"} end + +function Language:getFamily() + if self._familyObject then return self._familyObject end + + local family = self._rawData[3] or self._rawData.family + if family then self._familyObject = require("families").getByCode(family) end + + return self._familyObject +end + +function Language:getAncestors() + if not self._ancestorObjects then + self._ancestorObjects = {} + + if self._rawData.ancestors then + for _, ancestor in ipairs(self._rawData.ancestors) do table.insert(self._ancestorObjects, export.getByCode(ancestor) or require("etymology languages").getByCode(ancestor)) end + else + local fam = self:getFamily() + local protoLang = fam and fam:getProtoLanguage() or nil + + -- For the case where the current language is the proto-language + -- of its family, we need to step up a level higher right from the start. + if protoLang and protoLang:getCode() == self:getCode() then + fam = fam:getFamily() + protoLang = fam and fam:getProtoLanguage() or nil + end + + while not protoLang and not (not fam or fam:getCode() == "qfa-not") do + fam = fam:getFamily() + protoLang = fam and fam:getProtoLanguage() or nil + end + + table.insert(self._ancestorObjects, protoLang) + end + end + + return self._ancestorObjects +end + +local function iterateOverAncestorTree(node, func) + for _, ancestor in ipairs(node:getAncestors()) do + if ancestor then + local ret = func(ancestor) or iterateOverAncestorTree(ancestor, func) + if ret then return ret end + end + end +end + +function Language:getAncestorChain() + if not self._ancestorChain then + self._ancestorChain = {} + local step = #self:getAncestors() == 1 and self:getAncestors()[1] or nil + + while step do + table.insert(self._ancestorChain, 1, step) + step = #step:getAncestors() == 1 and step:getAncestors()[1] or nil + end + end + + return self._ancestorChain +end + +function Language:hasAncestor(otherlang) + local function compare(ancestor) return ancestor:getCode() == otherlang:getCode() end + + return iterateOverAncestorTree(self, compare) or false +end + +function Language:getCategoryName(nocap) + local name = self:getCanonicalName() + + -- If the name already has "language" in it, don't add it. + if not name:find("[Ll]anguage$") then name = name .. " language" end + if not nocap then name = mw.getContentLanguage():ucfirst(name) end + return name +end + +function Language:makeCategoryLink() return "[[:Category:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]" end + +function Language:getStandardCharacters() return self._rawData.standardChars end + +function Language:makeEntryName(text) + text = mw.ustring.match(text, "^[¿¡]?(.-[^%s%p].-)%s*[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]?$") or text + + if self:getCode() == "ar" then + local U = mw.ustring.char + local taTwiil = U(0x640) + local waSla = U(0x671) + -- diacritics ordinarily removed by entry_name replacements + local Arabic_diacritics = U(0x64B, 0x64C, 0x64D, 0x64E, 0x64F, 0x650, 0x651, 0x652, 0x670) + + if text == waSla or mw.ustring.find(text, "^" .. taTwiil .. "?[" .. Arabic_diacritics .. "]" .. "$") then return text end + end + + if type(self._rawData.entry_name) == "table" then text = do_entry_name_or_sort_key_replacements(text, self._rawData.entry_name) end + + return text +end + +-- Return true if the language has display processing enabled, i.e. lang:makeDisplayText() +-- does non-trivial processing. +function Language:hasDisplayProcessing() return not not self._rawData.display end + +-- Apply display-text replacements to `text`, if any. +function Language:makeDisplayText(text) + if type(self._rawData.display) == "table" then text = do_entry_name_or_sort_key_replacements(text, self._rawData.display) end + + return text +end + +-- Add to data tables? +local has_dotted_undotted_i = {["az"] = true, ["crh"] = true, ["gag"] = true, ["kaa"] = true, ["tt"] = true, ["tr"] = true, ["zza"] = true} + +function Language:makeSortKey(name, sc) + if has_dotted_undotted_i[self:getCode()] then name = name:gsub("I", "ı") end + + name = mw.ustring.lower(name) + + -- Remove initial hyphens and * + local hyphens_regex = "^[-־ـ*]+(.)" + name = mw.ustring.gsub(name, hyphens_regex, "%1") + + -- If there are language-specific rules to generate the key, use those + if type(self._rawData.sort_key) == "table" then + name = do_entry_name_or_sort_key_replacements(name, self._rawData.sort_key) + elseif type(self._rawData.sort_key) == "string" then + name = require("" .. self._rawData.sort_key).makeSortKey(name, self:getCode(), sc and sc:getCode()) + end + + -- Remove parentheses, as long as they are either preceded or followed by something + name = mw.ustring.gsub(name, "(.)[()]+", "%1") + name = mw.ustring.gsub(name, "[()]+(.)", "%1") + + if has_dotted_undotted_i[self:getCode()] then name = name:gsub("i", "İ") end + + return mw.ustring.upper(name) +end + +function Language:overrideManualTranslit() + if self._rawData.override_translit then + return true + else + return false + end +end + +function Language:transliterate(text, sc, module_override) + if not ((module_override or self._rawData.translit_module) and text) then return nil end + + if module_override then require("debug").track("module_override") end + + return require("" .. (module_override or self._rawData.translit_module)).tr(text, self:getCode(), sc and sc:getCode() or nil) +end + +function Language:hasTranslit() return self._rawData.translit_module and true or false end + +function Language:link_tr() return self._rawData.link_tr and true or false end + +function Language:toJSON() + local entryNamePatterns = nil + local entryNameRemoveDiacritics = nil + + if self._rawData.entry_name then + entryNameRemoveDiacritics = self._rawData.entry_name.remove_diacritics + if self._rawData.entry_name.from then + entryNamePatterns = {} + for i, from in ipairs(self._rawData.entry_name.from) do + local to = self._rawData.entry_name.to[i] or "" + table.insert(entryNamePatterns, {from = from, to = to}) + end + end + end + + local ret = {ancestors = self._rawData.ancestors, canonicalName = self:getCanonicalName(), categoryName = self:getCategoryName("nocap"), code = self._code, entryNamePatterns = entryNamePatterns, entryNameRemoveDiacritics = entryNameRemoveDiacritics, family = self._rawData[3] or self._rawData.family, otherNames = self:getOtherNames(true), aliases = self:getAliases(), varieties = self:getVarieties(), scripts = self._rawData.scripts or self._rawData[4], type = self:getType(), wikimediaLanguages = self._rawData.wikimedia_codes, wikidataItem = self:getWikidataItem()} + + return require("JSON").toJSON(ret) +end + +-- Do NOT use these methods! +-- All uses should be pre-approved on the talk page! +function Language:getRawData() return self._rawData end + +function Language:getRawExtraData() + self:loadInExtraData() + return self._extraData +end + +Language.__index = Language + +function export.getDataModuleName(code) + if code:find("^%l%l$") then + return "languages/data2" + elseif code:find("^%l%l%l$") then + local prefix = code:sub(1, 1) + return "languages/data3/" .. prefix + elseif code:find("^[%l-]+$") then + return "languages/datax" + else + return nil + end +end + +function export.getExtraDataModuleName(code) + if code:find("^%l%l$") then + return "languages/extradata2" + elseif code:find("^%l%l%l$") then + local prefix = code:sub(1, 1) + return "languages/extradata3/" .. prefix + elseif code:find("^[%l-]+$") then + return "languages/extradatax" + else + return nil + end +end + +local function getRawLanguageData(code) + local modulename = export.getDataModuleName(code) + return modulename and mw.loadData("" .. modulename)[code] or nil +end + +local function getRawExtraLanguageData(code) + local modulename = export.getExtraDataModuleName(code) + return modulename and mw.loadData("" .. modulename)[code] or nil +end + +function Language:loadInExtraData() + if not self._extraData then + -- load extra data from module and assign to meta table + -- use empty table as a fallback if extra data is nil + local meta = getmetatable(self) + meta._extraData = getRawExtraLanguageData(self._code) or {} + setmetatable(self, meta) + end +end + +function export.makeObject(code, data) + if data and data.deprecated then require("debug").track {"languages/deprecated", "languages/deprecated/" .. code} end + + return data and setmetatable({_rawData = data, _code = code}, Language) or nil +end + +function export.getByCode(code, paramForError, allowEtymLang, allowFamily) + if type(code) ~= "string" then error("The function getByCode expects a string as its first argument, but received " .. (code == nil and "nil" or "a " .. type(code)) .. ".") end + + local retval = export.makeObject(code, getRawLanguageData(code)) + if not retval and allowEtymLang then retval = require("etymology languages").getByCode(code) end + if not retval and allowFamily then retval = require("families").getByCode(code) end + if not retval and paramForError then + local codetext = nil + if allowEtymLang and allowFamily then + codetext = "language, etymology language or family code" + elseif allowEtymLang then + codetext = "language or etymology language code" + elseif allowFamily then + codetext = "language or family code" + else + codetext = "language code" + end + export.err(code, paramForError, codetext) + end + return retval +end + +function export.getByName(name, errorIfInvalid) + local byName = mw.loadData("languages/by name") + local code = byName.all and byName.all[name] or byName[name] + + if not code then + if errorIfInvalid then + error("The language name \"" .. name .. "\" is not valid.") + else + return nil + end + end + + return export.makeObject(code, getRawLanguageData(code)) +end + +function export.getByCanonicalName(name, errorIfInvalid, allowEtymLang, allowFamily) + local byName = mw.loadData("languages/canonical names") + local code = byName and byName[name] + + local retval = code and export.makeObject(code, getRawLanguageData(code)) or nil + if not retval and allowEtymLang then retval = require("etymology languages").getByCanonicalName(name) end + if not retval and allowFamily then + local famname = name:match("^(.*) languages$") + famname = famname or name + retval = require("families").getByCanonicalName(famname) + end + if not retval and errorIfInvalid then + local text + if allowEtymLang and allowFamily then + text = "language, etymology language or family name" + elseif allowEtymLang then + text = "language or etymology language name" + elseif allowFamily then + text = "language or family name" + else + text = "language name" + end + error("The " .. text .. " \"" .. name .. "\" is not valid.") + end + return retval +end + +function export.iterateAll() + mw.incrementExpensiveFunctionCount() + local m_data = mw.loadData("languages/alldata") + local func, t, var = pairs(m_data) + + return function() + local code, data = func(t, var) + return export.makeObject(code, data) + end +end + +--[[ If language is an etymology language, iterates through parent languages + until it finds a non-etymology language. ]] +function export.getNonEtymological(lang) + while lang:getType() == "etymology language" do + local parentCode = lang:getParentCode() + lang = export.getByCode(parentCode) or require("etymology languages").getByCode(parentCode) or require("families").getByCode(parentCode) + end + + return lang +end + +return export diff --git a/wiktra/wikt/translit/languages/alldata.lua b/wiktra/wikt/translit/languages/alldata.lua new file mode 100644 index 0000000..f84ccc6 --- /dev/null +++ b/wiktra/wikt/translit/languages/alldata.lua @@ -0,0 +1,40 @@ +local modules = { + ["languages/data2"] = true, + ["languages/data3/a"] = true, + ["languages/data3/b"] = true, + ["languages/data3/c"] = true, + ["languages/data3/d"] = true, + ["languages/data3/e"] = true, + ["languages/data3/f"] = true, + ["languages/data3/g"] = true, + ["languages/data3/h"] = true, + ["languages/data3/i"] = true, + ["languages/data3/j"] = true, + ["languages/data3/k"] = true, + ["languages/data3/l"] = true, + ["languages/data3/m"] = true, + ["languages/data3/n"] = true, + ["languages/data3/o"] = true, + ["languages/data3/p"] = true, + ["languages/data3/q"] = true, + ["languages/data3/r"] = true, + ["languages/data3/s"] = true, + ["languages/data3/t"] = true, + ["languages/data3/u"] = true, + ["languages/data3/v"] = true, + ["languages/data3/w"] = true, + ["languages/data3/x"] = true, + ["languages/data3/y"] = true, + ["languages/data3/z"] = true, + ["languages/datax"] = true +} + +local m = {} + +for mname in pairs(modules) do + for key, value in pairs(require(mname)) do m[key] = value end + local xname = mname:gsub("data", "data") + for lkey, lvalue in pairs(require(xname)) do if m[lkey] then for key, value in pairs(lvalue) do m[lkey][key] = lvalue[key] end end end +end + +return m diff --git a/wiktra/wikt/translit/languages/by name.lua b/wiktra/wikt/translit/languages/by name.lua new file mode 100644 index 0000000..b4fa3f7 --- /dev/null +++ b/wiktra/wikt/translit/languages/by name.lua @@ -0,0 +1,11 @@ +local export = {} + +for code, data in pairs(require("languages/alldata")) do + export[data[1]] = code + + if data.otherNames then for i, otherName in pairs(data.otherNames) do if not export[otherName] then export[otherName] = code end end end + + if data.aliases then for i, alias in pairs(data.aliases) do if not export[alias] then export[alias] = code end end end +end + +return export diff --git a/wiktra/wikt/translit/languages/byTranslitModule.lua b/wiktra/wikt/translit/languages/byTranslitModule.lua new file mode 100644 index 0000000..bf8b743 --- /dev/null +++ b/wiktra/wikt/translit/languages/byTranslitModule.lua @@ -0,0 +1,21 @@ +return function(translitModule) + local m_redirect = mw.loadData("translit-redirect/data") + local langs = {} + + for code, data in pairs(require("languages/alldata")) do + if data.translit_module == translitModule then + langs[code] = data + elseif data.translit_module == "translit-redirect" and m_redirect[code] then + for scriptCode, translitRedirectData in pairs(m_redirect[code]) do if type(translitRedirectData) == "table" and translitRedirectData.module == translitModule then langs[code] = data end end + end + end + + local result = {} + local i = 0 + for code, data in pairs(langs) do + i = i + 1 + result[i] = require("languages").makeObject(code, data) + end + + return result +end diff --git a/wiktra/wikt/translit/languages/canonical names.lua b/wiktra/wikt/translit/languages/canonical names.lua new file mode 100644 index 0000000..6f2fc7b --- /dev/null +++ b/wiktra/wikt/translit/languages/canonical names.lua @@ -0,0 +1,8169 @@ +return { + ["'Are'are"] = "alu", + ["A'ou"] = "aou", + ["A-Hmao"] = "hmd", + ["A-Pucikwar"] = "apq", + ["Aari"] = "aiw", + ["Aasax"] = "aas", + ["Aba"] = "utp", + ["Abaga"] = "abg", + ["Abai"] = "poz-abi", + ["Abai Sungai"] = "abf", + ["Abanyom"] = "abm", + ["Abau"] = "aau", + ["Abaza"] = "abq", + ["Abenaki"] = "abe", + ["Abenlen Ayta"] = "abp", + ["Abidji"] = "abi", + ["Abinomn"] = "bsa", + ["Abipon"] = "axb", + ["Abishira"] = "ash", + ["Abkhaz"] = "ab", + ["Abom"] = "aob", + ["Abon"] = "abo", + ["Abron"] = "abr", + ["Abu"] = "ado", + ["Abu' Arapesh"] = "aah", + ["Abua"] = "abn", + ["Abui"] = "abz", + ["Abun"] = "kgr", + ["Abung"] = "abl", + ["Abure"] = "abu", + ["Abureni"] = "mgj", + ["Abé"] = "aba", + ["Acatepec Me'phaa"] = "tpx", + ["Acehnese"] = "ace", + ["Achagua"] = "aca", + ["Achang"] = "acn", + ["Ache"] = "yif", + ["Acheron"] = "acz", + ["Achi"] = "acr", + ["Acholi"] = "ach", + ["Achuar"] = "acu", + ["Achumawi"] = "acv", + ["Aché"] = "guq", + ["Acroá"] = "acs", + ["Adabe"] = "adb", + ["Adai"] = "xad", + ["Adamorobe Sign Language"] = "ads", + ["Adang"] = "adn", + ["Adangbe"] = "adq", + ["Adangme"] = "ada", + ["Adap"] = "adp", + ["Adasen"] = "tiu", + ["Adele"] = "ade", + ["Adhola"] = "adh", + ["Adi"] = "adi", + ["Adioukrou"] = "adj", + ["Adithinngithigh"] = "dth", + ["Adivasi Oriya"] = "ort", + ["Adiwasi Garasia"] = "gas", + ["Adja"] = "ajg", + ["Adnyamathanha"] = "adt", + ["Adonara"] = "adr", + ["Aduge"] = "adu", + ["Adyghe"] = "ady", + ["Adzera"] = "adz", + ["Aeka"] = "aez", + ["Aekyom"] = "awi", + ["Aequian"] = "xae", + ["Aer"] = "aeq", + ["Afade"] = "aal", + ["Afar"] = "aa", + ["Afghan Sign Language"] = "afg", + ["Afitti"] = "aft", + ["Afra"] = "ulf", + ["Afrihili"] = "afh", + ["Afrikaans"] = "af", + ["Afro-Seminole Creole"] = "afs", + ["Agarabi"] = "agd", + ["Agariya"] = "agi", + ["Agatu"] = "agc", + ["Agavotaguerra"] = "avo", + ["Agawam"] = "alg-aga", + ["Aghem"] = "agq", + ["Aghu"] = "ahh", + ["Aghu Tharrnggala"] = "gtu", + ["Aghul"] = "agx", + ["Aghwan"] = "xag", + ["Agi"] = "aif", + ["Agob"] = "kit", + ["Agoi"] = "ibm", + ["Aguacateca"] = "agu", + ["Aguano"] = "aga", + ["Aguaruna"] = "agr", + ["Aguna"] = "aug", + ["Agusan Manobo"] = "msm", + ["Agutaynen"] = "agn", + ["Agwagwune"] = "yay", + ["Ahanta"] = "aha", + ["Ahirani"] = "ahr", + ["Ahom"] = "aho", + ["Ahtna"] = "aht", + ["Ahwai"] = "nfd", + ["Ai-Cham"] = "aih", + ["Aighon"] = "aix", + ["Aikanã"] = "tba", + ["Aiklep"] = "mwg", + ["Aimele"] = "ail", + ["Aimol"] = "aim", + ["Ainbai"] = "aic", + ["Ainu"] = "ain", + ["Aiome"] = "aki", + ["Airoran"] = "air", + ["Aisi"] = "mmq", + ["Aiton"] = "aio", + ["Aiwoo"] = "nfl", + ["Aja"] = "aja", + ["Ajagua"] = "sai-ajg", + ["Ajawa"] = "ajw", + ["Ajië"] = "aji", + ["Ajyíninka Apurucayali"] = "cpc", + ["Ak"] = "akq", + ["Aka (Central Africa)"] = "axk", + ["Aka (Sudan)"] = "soh", + ["Aka-Bea"] = "abj", + ["Aka-Bo"] = "akm", + ["Aka-Cari"] = "aci", + ["Aka-Kede"] = "akx", + ["Aka-Kol"] = "aky", + ["Aka-Kora"] = "ack", + ["Akan"] = "ak", + ["Akar-Bale"] = "acl", + ["Akaselem"] = "aks", + ["Akatek"] = "knj", + ["Akawaio"] = "ake", + ["Ake"] = "aik", + ["Akebu"] = "keu", + ["Akei"] = "tsr", + ["Akeu"] = "aeu", + ["Akha"] = "ahk", + ["Akhvakh"] = "akv", + ["Akkadian"] = "akk", + ["Akkala Sami"] = "sia", + ["Aklanon"] = "akl", + ["Akolet"] = "akt", + ["Akoose"] = "bss", + ["Akoye"] = "miw", + ["Akpa"] = "akf", + ["Akpes"] = "ibe", + ["Akrukay"] = "afi", + ["Akuku"] = "ayk", + ["Akum"] = "aku", + ["Akuntsu"] = "aqz", + ["Akurio"] = "ako", + ["Akuwagel"] = "bey", + ["Akwa"] = "akw", + ["Akyaung Ari"] = "nqy", + ["Al-Sayyid Bedouin Sign Language"] = "syy", + ["Alaba"] = "alw", + ["Alabama"] = "akz", + ["Alabat Island Agta"] = "dul", + ["Alacatlatzala Mixtec"] = "mim", + ["Alago"] = "ala", + ["Alagwa"] = "wbj", + ["Alak"] = "alk", + ["Alamblak"] = "amp", + ["Alangan"] = "alj", + ["Alapmunte"] = "apv", + ["Alas-Kluet Batak"] = "btz", + ["Alawa"] = "alh", + ["Alazapa"] = "nai-ala", + ["Albanian"] = "sq", + ["Albanian Sign Language"] = "sqk", + ["Alcozauca Mixtec"] = "xta", + ["Alege"] = "alf", + ["Alekano"] = "gah", + ["Alemannic German"] = "gsw", + ["Aleut"] = "ale", + ["Algerian Arabic"] = "arq", + ["Algerian Sign Language"] = "asp", + ["Algonquin"] = "alq", + ["Ali"] = "aiy", + ["Alladian"] = "ald", + ["Allar"] = "all", + ["Allentiac"] = "sai-all", + ["Alngith"] = "aid", + ["Alo Phola"] = "ypo", + ["Alor"] = "aol", + ["Aloápam Zapotec"] = "zaq", + ["Alsea"] = "aes", + ["Alu"] = "mte", + ["Alu Kurumba"] = "xua", + ["Alugu"] = "aub", + ["Alumu-Tesu"] = "aab", + ["Alune"] = "alp", + ["Alungul"] = "aus-alu", + ["Aluo"] = "yna", + ["Alur"] = "alz", + ["Alutiiq"] = "ems", + ["Alutor"] = "alr", + ["Alviri-Vidari"] = "avd", + ["Alyawarr"] = "aly", + ["Ama"] = "amm", + ["Amahai"] = "amq", + ["Amahuaca"] = "amc", + ["Amaimon"] = "ali", + ["Amal"] = "aad", + ["Amanab"] = "amn", + ["Amanayé"] = "ama", + ["Amara"] = "aie", + ["Amarakaeri"] = "amr", + ["Amarasi"] = "aaz", + ["Amarizana"] = "awd-ama", + ["Amasi"] = "alv-ama", + ["Amatlán Zapotec"] = "zpo", + ["Amba"] = "rwm", + ["Ambai"] = "amk", + ["Ambakich"] = "aew", + ["Ambala Ayta"] = "abc", + ["Ambelau"] = "amv", + ["Ambele"] = "ael", + ["Amblong"] = "alm", + ["Ambo"] = "amb", + ["Ambonese Malay"] = "abs", + ["Ambrak"] = "aag", + ["Ambul"] = "apo", + ["Ambulas"] = "abt", + ["Amdang"] = "amj", + ["Amele"] = "aey", + ["American Sign Language"] = "ase", + ["Amganad Ifugao"] = "ifa", + ["Amharic"] = "am", + ["Ami"] = "amy", + ["Amis"] = "ami", + ["Ammonite"] = "sem-amm", + ["Amo"] = "amo", + ["Amol"] = "alx", + ["Amoltepec Mixtec"] = "mbz", + ["Amondawa"] = "adw", + ["Amorite"] = "sem-amo", + ["Ampanang"] = "apg", + ["Ampari Dogon"] = "aqd", + ["Amri Karbi"] = "ajz", + ["Amto"] = "amt", + ["Amurdag"] = "amg", + ["Ana Tinga Dogon"] = "dti", + ["Anaang"] = "anw", + ["Anakalangu"] = "akg", + ["Anal"] = "anm", + ["Anam"] = "pda", + ["Anambé"] = "aan", + ["Anamgura"] = "imi", + ["Anasi"] = "bpo", + ["Anauyá"] = "awd-ana", + ["Ancient Greek"] = "grc", + ["Ancient Ligurian"] = "xlg", + ["Ancient Macedonian"] = "xmk", + ["Ancient North Arabian"] = "xna", + ["Ancient Zapotec"] = "xzp", + ["Andai"] = "afd", + ["Andajin"] = "ajn", + ["Andalusian Arabic"] = "xaa", + ["Andaman Creole Hindi"] = "hca", + ["Andaqui"] = "ana", + ["Andarum"] = "aod", + ["Andegerebinha"] = "adg", + ["Andh"] = "anr", + ["Andi"] = "ani", + ["Andio"] = "bzb", + ["Andjingith"] = "aus-and", + ["Andoa"] = "anb", + ["Andoque"] = "ano", + ["Andoquero"] = "sai-and", + ["Andra-Hus"] = "anx", + ["Aneityum"] = "aty", + ["Anem"] = "anz", + ["Aneme Wake"] = "aby", + ["Anfillo"] = "myo", + ["Angaataha"] = "agm", + ["Angaité"] = "aqt", + ["Angal"] = "age", + ["Angal Enen"] = "aoe", + ["Angal Heneng"] = "akh", + ["Angami"] = "njm", + ["Angevin"] = "roa-ang", + ["Angguruk Yali"] = "yli", + ["Angika"] = "anp", + ["Angkamuthi"] = "avm", + ["Angkola Batak"] = "akb", + ["Angkula"] = "aus-ang", + ["Angloromani"] = "rme", + ["Angolar"] = "aoa", + ["Angor"] = "agg", + ["Angoram"] = "aog", + ["Angosturas Tunebo"] = "tnd", + ["Anguthimri"] = "awg", + ["Ani Phowa"] = "ypn", + ["Anii"] = "blo", + ["Animere"] = "anf", + ["Anindilyakwa"] = "aoi", + ["Anjam"] = "boj", + ["Ankave"] = "aak", + ["Anmatyerre"] = "amx", + ["Annobonese"] = "fab", + ["Anong"] = "nun", + ["Anor"] = "anj", + ["Anserma"] = "ans", + ["Ansus"] = "and", + ["Antakarinya"] = "ant", + ["Antigua and Barbuda Creole English"] = "aig", + ["Antillean Creole"] = "gcf", + ["Anu"] = "anl", + ["Anuak"] = "anu", + ["Anufo"] = "cko", + ["Anuki"] = "aui", + ["Anus"] = "auq", + ["Anuta"] = "aud", + ["Anyi"] = "any", + ["Anyin Morofo"] = "mtb", + ["Ao"] = "njo", + ["Aoheng"] = "pni", + ["Aore"] = "aor", + ["Ap Ma"] = "kbx", + ["Apalachee"] = "xap", + ["Apalaí"] = "apy", + ["Apali"] = "ena", + ["Apasco-Apoala Mixtec"] = "mip", + ["Apatani"] = "apt", + ["Apiaká"] = "api", + ["Apinayé"] = "apn", + ["Apma"] = "app", + ["Apolista"] = "awd-apo", + ["Aproumu Aizi"] = "ahp", + ["Apurinã"] = "apu", + ["Aputai"] = "apx", + ["Aquitanian"] = "xaq", + ["Arabana"] = "ard", + ["Arabela"] = "arl", + ["Arabic"] = "ar", + ["Aragonese"] = "an", + ["Araki"] = "akr", + ["Arakwal"] = "rkw", + ["Aralle-Tabulahan"] = "atq", + ["Aramaic"] = "arc", + ["Arammba"] = "stk", + ["Aranadan"] = "aaf", + ["Aranama-Tamique"] = "xrt", + ["Arandai"] = "jbj", + ["Araona"] = "aro", + ["Arapaho"] = "arp", + ["Arapaso"] = "arj", + ["Arara-Karo"] = "arr", + ["Ararandewára"] = "xaj", + ["Arawak"] = "arw", + ["Araweté"] = "awt", + ["Arawum"] = "awm", + ["Arbore"] = "arv", + ["Archi"] = "aqc", + ["Ardhamagadhi Prakrit"] = "pka", + ["Are"] = "mwc", + ["Areba"] = "aea", + ["Arem"] = "aem", + ["Argentine Sign Language"] = "aed", + ["Argobba"] = "agj", + ["Arguni"] = "agf", + ["Arhuaco"] = "arh", + ["Arhâ"] = "aqr", + ["Arhö"] = "aok", + ["Ari"] = "aac", + ["Aribwatsa"] = "laz", + ["Aribwaung"] = "ylu", + ["Arifama-Miniafia"] = "aai", + ["Arigidi"] = "aqg", + ["Arikapú"] = "ark", + ["Arikara"] = "ari", + ["Arikem"] = "ait", + ["Arin"] = "xrn", + ["Aringa"] = "luc", + ["Armazic"] = "xrm", + ["Armenian"] = "hy", + ["Armenian Sign Language"] = "aen", + ["Aromanian"] = "rup", + ["Arop-Lokep"] = "apr", + ["Arop-Sissano"] = "aps", + ["Arosi"] = "aia", + ["Arritinngithigh"] = "rrt", + ["Arta"] = "atz", + ["Arua"] = "aru", + ["Aruamu"] = "msy", + ["Aruek"] = "aur", + ["Aruop"] = "lsr", + ["Arutani"] = "atx", + ["Aruá"] = "arx", + ["As"] = "asz", + ["Asaro'o"] = "mtv", + ["Ashe"] = "ahs", + ["Ashkun"] = "ask", + ["Asho Chin"] = "csh", + ["Ashokan Prakrit"] = "inc-ash", + ["Ashraaf"] = "cus-ash", + ["Asháninka"] = "cni", + ["Ashéninka Pajonal"] = "cjo", + ["Ashéninka Perené"] = "prq", + ["Asi"] = "bno", + ["Asilulu"] = "asl", + ["Askopan"] = "eiv", + ["Asoa"] = "asv", + ["Assamese"] = "as", + ["Assan"] = "xss", + ["Assangori"] = "sjg", + ["Assiniboine"] = "asb", + ["Assyrian Neo-Aramaic"] = "aii", + ["Asturian"] = "ast", + ["Asu"] = "aum", + ["Asue Awyu"] = "psa", + ["Asumboa"] = "aua", + ["Asunción Mixtepec Zapotec"] = "zoo", + ["Asuri"] = "asr", + ["Ata"] = "atm", + ["Ata Manobo"] = "atd", + ["Atakapa"] = "aqp", + ["Atampaya"] = "amz", + ["Atanques"] = "cba-ata", + ["Atatláhuca Mixtec"] = "mib", + ["Atayal"] = "tay", + ["Atemble"] = "ate", + ["Ateso"] = "teo", + ["Athpare"] = "aph", + ["Ati"] = "atk", + ["Atikamekw"] = "atj", + ["Atohwaim"] = "aqm", + ["Atong (Cameroon)"] = "ato", + ["Atong (India)"] = "aot", + ["Atorada"] = "aox", + ["Atsahuaca"] = "atc", + ["Atsam"] = "cch", + ["Atsugewi"] = "atw", + ["Attapady Kurumba"] = "pkr", + ["Attié"] = "ati", + ["Au"] = "avt", + ["Auhelawa"] = "kud", + ["Aukan"] = "djk", + ["Aulua"] = "aul", + ["Aurá"] = "aux", + ["Aushi"] = "auh", + ["Aushiri"] = "avs", + ["Auslan"] = "asf", + ["Austral"] = "aut", + ["Australian Aboriginal Sign Language"] = "asw", + ["Austrian Sign Language"] = "asq", + ["Austronesian Mari"] = "hob", + ["Auwe"] = "smf", + ["Auyana"] = "auy", + ["Auye"] = "auu", + ["Auyokawa"] = "auo", + ["Avar"] = "av", + ["Avatime"] = "avn", + ["Avau"] = "avb", + ["Avava"] = "tmb", + ["Avestan"] = "ae", + ["Avikam"] = "avi", + ["Avokaya"] = "avu", + ["Avá-Canoeiro"] = "avv", + ["Awa (China)"] = "vwa", + ["Awa (New Guinea)"] = "awb", + ["Awa-Cuaiquer"] = "kwi", + ["Awabakal"] = "awk", + ["Awadhi"] = "awa", + ["Awak"] = "awo", + ["Awar"] = "aya", + ["Awara"] = "awx", + ["Awbono"] = "awh", + ["Aweer"] = "bob", + ["Awera"] = "awr", + ["Awetí"] = "awe", + ["Awing"] = "azo", + ["Awjila"] = "auj", + ["Awngi"] = "awn", + ["Awngthim"] = "gwm", + ["Awtuw"] = "kmn", + ["Awu"] = "yiu", + ["Awun"] = "aww", + ["Awutu"] = "afu", + ["Awyi"] = "auw", + ["Axamb"] = "ahb", + ["Axi Yi"] = "yix", + ["Ayabadhu"] = "ayd", + ["Ayautla Mazatec"] = "vmy", + ["Ayere"] = "aye", + ["Ayerrerenge"] = "axe", + ["Ayi"] = "ayq", + ["Ayizi"] = "yyz", + ["Ayizo"] = "ayb", + ["Aymara"] = "ay", + ["Aynu"] = "aib", + ["Ayomán"] = "sai-ayo", + ["Ayoquesco Zapotec"] = "zaf", + ["Ayoreo"] = "ayo", + ["Ayu"] = "ayu", + ["Ayutla Mixtec"] = "miy", + ["Azerbaijani"] = "az", + ["Azha"] = "aza", + ["Azhe"] = "yiz", + ["Azoyú Me'phaa"] = "tpc", + ["Baa"] = "kwb", + ["Baagandji"] = "drl", + ["Baan"] = "bvj", + ["Baangi"] = "bqx", + ["Baatonum"] = "bba", + ["Baba"] = "bbw", + ["Baba Malay"] = "mbf", + ["Babango"] = "bbm", + ["Babanki"] = "bbk", + ["Babatana"] = "baa", + ["Babine-Witsuwit'en"] = "bcr", + ["Babole"] = "bvx", + ["Babungo"] = "bav", + ["Babuza"] = "bzg", + ["Bacama"] = "bcy", + ["Bacanese Malay"] = "btj", + ["Bactrian"] = "xbc", + ["Bada"] = "bhz", + ["Badaga"] = "bfq", + ["Badanchi"] = "bau", + ["Bade"] = "bde", + ["Badeshi"] = "bdz", + ["Badimaya"] = "bia", + ["Badui"] = "bac", + ["Badyara"] = "pbp", + ["Baeggu"] = "bvd", + ["Baekje"] = "pkc", + ["Baelelea"] = "bvc", + ["Baenan"] = "sai-bae", + ["Baetora"] = "btr", + ["Bafanji"] = "bfj", + ["Bafaw"] = "bwt", + ["Bafia"] = "ksf", + ["Bafut"] = "bfd", + ["Baga Kaloum"] = "bqf", + ["Baga Koga"] = "bgo", + ["Baga Manduri"] = "bmd", + ["Baga Pokur"] = "bcg", + ["Baga Sitemu"] = "bsp", + ["Baga Sobané"] = "bsv", + ["Bagheli"] = "bfy", + ["Bagirmi"] = "bmi", + ["Bago-Kusuntu"] = "bqg", + ["Bagri"] = "bgq", + ["Bagua"] = "sai-bag", + ["Bagupi"] = "bpi", + ["Bagusa"] = "bqb", + ["Bagvalal"] = "kva", + ["Baha"] = "yha", + ["Baham"] = "bdw", + ["Bahamian Creole"] = "bah", + ["Baharna Arabic"] = "abv", + ["Bahau"] = "bhv", + ["Bahinemo"] = "bjh", + ["Bahing"] = "bhj", + ["Bahnar"] = "bdq", + ["Bahonsuai"] = "bsu", + ["Bai"] = "bdj", + ["Baibai"] = "bbf", + ["Baikeno"] = "bkx", + ["Baima"] = "bqh", + ["Baimak"] = "bmx", + ["Bainouk-Gunyaamolo"] = "bcz", + ["Bainouk-Gunyuño"] = "bab", + ["Bainouk-Samik"] = "bcb", + ["Baiso"] = "bsw", + ["Baissa Fali"] = "fah", + ["Bajan"] = "bjs", + ["Bajelani"] = "bjm", + ["Baka"] = "bkc", + ["Bakairí"] = "bkq", + ["Bakaka"] = "bqz", + ["Bakhtiari"] = "bqi", + ["Baki"] = "bki", + ["Bakoko"] = "bkh", + ["Bakole"] = "kme", + ["Bakpinka"] = "bbs", + ["Bakulung"] = "bbu", + ["Bakumpai"] = "bkr", + ["Bakung"] = "xkl", + ["Bakwé"] = "bjw", + ["Balaesang"] = "bls", + ["Balangao"] = "blw", + ["Balangingi"] = "sse", + ["Balanta-Ganja"] = "bjt", + ["Balanta-Kentohe"] = "ble", + ["Balantak"] = "blz", + ["Balau"] = "blg", + ["Baldemu"] = "bdn", + ["Bali"] = "bcp", + ["Baliledo"] = "poz-bal", + ["Balinese"] = "ban", + ["Balinese Malay"] = "mhp", + ["Balkan Gagauz Turkish"] = "bgx", + ["Balkan Romani"] = "rmn", + ["Balo"] = "bqo", + ["Baloi"] = "biz", + ["Balong"] = "bnt-bal", + ["Balti"] = "bft", + ["Baltic Romani"] = "rml", + ["Baluan-Pam"] = "blq", + ["Baluchi"] = "bal", + ["Bamako Sign Language"] = "bog", + ["Bamali"] = "bbq", + ["Bambalang"] = "bmo", + ["Bambam"] = "ptu", + ["Bambara"] = "bm", + ["Bambassi"] = "myf", + ["Bambili-Bambui"] = "baw", + ["Bamenyam"] = "bce", + ["Bamu"] = "bcf", + ["Bamukumbit"] = "bqt", + ["Bamum"] = "bax", + ["Bamunka"] = "bvm", + ["Bamwe"] = "bmg", + ["Ban Khor Sign Language"] = "bfk", + ["Bana"] = "bcw", + ["Banam Bay"] = "vrt", + ["Banao Itneg"] = "bjx", + ["Banaro"] = "byz", + ["Banda"] = "bnd", + ["Banda Malay"] = "bpq", + ["Banda-Bambari"] = "liy", + ["Banda-Banda"] = "bpd", + ["Banda-Mbrès"] = "bqk", + ["Banda-Ndélé"] = "bfl", + ["Banda-Yangere"] = "yaj", + ["Bandi"] = "bza", + ["Bandial"] = "bqj", + ["Bandjalang"] = "bdy", + ["Bangala"] = "bxg", + ["Bangandu"] = "bgf", + ["Bangba"] = "bbe", + ["Banggai"] = "bgz", + ["Bangi"] = "bni", + ["Bangi Me"] = "dba", + ["Bangka"] = "mfb", + ["Bangolan"] = "bgj", + ["Bangubangu"] = "bnx", + ["Bangwinji"] = "bsj", + ["Baniva"] = "bvv", + ["Baniwa"] = "bwi", + ["Banjarese"] = "bjn", + ["Banka"] = "bxw", + ["Bankan Tey Dogon"] = "dbw", + ["Bankon"] = "abb", + ["Banoni"] = "bcm", + ["Bantawa"] = "bap", + ["Bantayanon"] = "bfx", + ["Bantik"] = "bnq", + ["Banyumasan"] = "map-bms", + ["Baoule"] = "bci", + ["Baraamu"] = "brd", + ["Barai"] = "bbb", + ["Barakai"] = "baj", + ["Baram Kayan"] = "kys", + ["Barama"] = "bbg", + ["Barambu"] = "brm", + ["Baramu"] = "bmz", + ["Barapasi"] = "brp", + ["Baras"] = "brs", + ["Barasana"] = "bsn", + ["Barbareño"] = "boi", + ["Barclayville Grebo"] = "gry", + ["Bardi"] = "bcj", + ["Barein"] = "bva", + ["Bargam"] = "mlp", + ["Bari"] = "bfa", + ["Bariai"] = "bch", + ["Bariji"] = "bjc", + ["Barikanchi"] = "bxo", + ["Barikewa"] = "jbk", + ["Barngarla"] = "bjb", + ["Barok"] = "bjk", + ["Barombi"] = "bbi", + ["Barranbinya"] = "aus-bra", + ["Barro Negro Tunebo"] = "tbn", + ["Barrow Point"] = "bpt", + ["Baruga"] = "bjz", + ["Barunggam"] = "aus-brm", + ["Baruya"] = "byr", + ["Barwe"] = "bwg", + ["Barzani Jewish Neo-Aramaic"] = "bjf", + ["Baré"] = "bae", + ["Barí"] = "mot", + ["Basa"] = "bzw", + ["Basa-Gumna"] = "bsl", + ["Basa-Gurmana"] = "buj", + ["Basaa"] = "bas", + ["Basap"] = "bdb", + ["Basay"] = "byq", + ["Bashkardi"] = "bsg", + ["Bashkir"] = "ba", + ["Basketo"] = "bst", + ["Basque"] = "eu", + ["Bassa"] = "bsq", + ["Bassa-Kontagora"] = "bsr", + ["Bassari"] = "bsc", + ["Bassossi"] = "bsi", + ["Bata"] = "bta", + ["Bataan Ayta"] = "ayt", + ["Batad Ifugao"] = "ifb", + ["Batanga"] = "bnm", + ["Batek"] = "btq", + ["Bateri"] = "btv", + ["Bathari"] = "bhm", + ["Bati (Cameroon)"] = "btc", + ["Bati (Indonesia)"] = "bvt", + ["Bats"] = "bbl", + ["Batu"] = "btu", + ["Batui"] = "zbt", + ["Batuley"] = "bay", + ["Bau"] = "bbd", + ["Bau Bidayuh"] = "sne", + ["Bauchi"] = "bsf", + ["Baure"] = "brg", + ["Bauria"] = "bge", + ["Bauro"] = "bxa", + ["Bauwaki"] = "bwk", + ["Bauzi"] = "bvz", + ["Bavarian"] = "bar", + ["Bawm Chin"] = "bgr", + ["Bay Miwok"] = "mkq", + ["Bayali"] = "bjy", + ["Baybayanon"] = "bvy", + ["Baygo"] = "byg", + ["Bayogoula"] = "nai-bay", + ["Bayono"] = "byl", + ["Bayot"] = "bda", + ["Bayungu"] = "bxj", + ["Bazigar"] = "bfr", + ["Baïnounk Gubëeher"] = "alv-bgu", + ["Beami"] = "beo", + ["Beaver"] = "bea", + ["Beba"] = "bfp", + ["Bebe"] = "bzv", + ["Bebele"] = "beb", + ["Bebeli"] = "bek", + ["Bebil"] = "bxp", + ["Bedik"] = "tnr", + ["Bedjond"] = "bjv", + ["Bedoanas"] = "bed", + ["Beeke"] = "bkf", + ["Beele"] = "bxq", + ["Beembe"] = "beq", + ["Beezen"] = "bnz", + ["Befang"] = "bby", + ["Begbere-Ejar"] = "bqv", + ["Beja"] = "bej", + ["Bekati'"] = "bei", + ["Bekwarra"] = "bkv", + ["Bekwel"] = "bkw", + ["Belait"] = "beg", + ["Belanda Bor"] = "bxb", + ["Belanda Viri"] = "bvi", + ["Belarusian"] = "be", + ["Belhariya"] = "byw", + ["Beli"] = "blm", + ["Belizean Creole"] = "bzj", + ["Bella Coola"] = "blc", + ["Bellari"] = "brw", + ["Bemba"] = "bem", + ["Bembe"] = "bmb", + ["Ben Tey"] = "dbt", + ["Bena"] = "yun", + ["Benabena"] = "bef", + ["Bench"] = "bcq", + ["Bende"] = "bdp", + ["Bendi"] = "bct", + ["Beneraf"] = "bnv", + ["Beng"] = "nhb", + ["Benga"] = "bng", + ["Bengali"] = "bn", + ["Benggoi"] = "bgy", + ["Bengkala Sign Language"] = "bqy", + ["Bentong"] = "bnu", + ["Benyadu'"] = "byd", + ["Beothuk"] = "bue", + ["Bepour"] = "bie", + ["Bera"] = "brf", + ["Berakou"] = "bxv", + ["Berau Malay"] = "bve", + ["Berawan"] = "lod", + ["Berbice Creole Dutch"] = "brc", + ["Berik"] = "bkl", + ["Berinomo"] = "bit", + ["Berom"] = "bom", + ["Berta"] = "wti", + ["Berti"] = "byt", + ["Besisi"] = "mhe", + ["Besme"] = "bes", + ["Besoa"] = "bep", + ["Betaf"] = "bfe", + ["Betawi"] = "bew", + ["Bete"] = "byf", + ["Bete-Bendi"] = "btt", + ["Betoi"] = "sai-bet", + ["Betta Kurumba"] = "xub", + ["Bezhta"] = "kap", + ["Bhadrawahi"] = "bhd", + ["Bhalay"] = "bhx", + ["Bharia"] = "bha", + ["Bhatri"] = "bgw", + ["Bhattiyali"] = "bht", + ["Bhaya"] = "bhe", + ["Bhele"] = "bhy", + ["Bhilali"] = "bhi", + ["Bhili"] = "bhb", + ["Bhojpuri"] = "bho", + ["Bhoti Kinnauri"] = "nes", + ["Bhunjia"] = "bhu", + ["Biafada"] = "bif", + ["Biage"] = "bdf", + ["Biak"] = "bhw", + ["Biali"] = "beh", + ["Bian Marind"] = "bpv", + ["Biangai"] = "big", + ["Biao"] = "byk", + ["Biao Mon"] = "bmt", + ["Biao-Jiao Mien"] = "bje", + ["Biatah Bidayuh"] = "bth", + ["Bibaali"] = "bcn", + ["Bibbulman"] = "xbp", + ["Bidiyo"] = "bid", + ["Bidyara"] = "bym", + ["Bidyogo"] = "bjg", + ["Biem"] = "bmc", + ["Bierebo"] = "bnk", + ["Bieria"] = "brj", + ["Biete"] = "biu", + ["Big Nambas"] = "nmb", + ["Biga"] = "bhc", + ["Bigambal"] = "xbe", + ["Bih"] = "ibh", + ["Bihari"] = "bh", + ["Bijori"] = "bix", + ["Bikaru"] = "bic", + ["Bikol Central"] = "bcl", + ["Bikya"] = "byb", + ["Bila"] = "bip", + ["Bilakura"] = "bql", + ["Bilaspuri"] = "kfs", + ["Bilba"] = "bpz", + ["Bilbil"] = "brz", + ["Bile"] = "bil", + ["Biliau"] = "bcu", + ["Biloxi"] = "bll", + ["Bilua"] = "blb", + ["Bilur"] = "bxf", + ["Bima"] = "bhp", + ["Bimin"] = "bhl", + ["Bimoba"] = "bim", + ["Bina"] = "bmn", + ["Binahari"] = "bxz", + ["Binandere"] = "bhg", + ["Binawa"] = "byj", + ["Bindal"] = "xbd", + ["Bine"] = "bon", + ["Binji"] = "bpj", + ["Binongan Itneg"] = "itb", + ["Bintauna"] = "bne", + ["Bintulu"] = "bny", + ["Binukid"] = "bkd", + ["Binumarien"] = "bjr", + ["Bipi"] = "biq", + ["Birao"] = "brr", + ["Birgid"] = "brk", + ["Birgit"] = "btf", + ["Birhor"] = "biy", + ["Biri"] = "bzr", + ["Biritai"] = "bqq", + ["Birri"] = "bvq", + ["Birrpayi"] = "xbj", + ["Birwa"] = "brl", + ["Biseni"] = "ije", + ["Bishnupriya Manipuri"] = "bpy", + ["Bishuo"] = "bwh", + ["Bisis"] = "bnw", + ["Bislama"] = "bi", + ["Bisorio"] = "bir", + ["Bissa"] = "bib", + ["Bisu"] = "bzi", + ["Bit"] = "bgk", + ["Bitare"] = "brt", + ["Bitur"] = "mcc", + ["Biwat"] = "bwm", + ["Biyo"] = "byo", + ["Biyom"] = "bpm", + ["Blablanga"] = "blp", + ["Black Speech"] = "art-bsp", + ["Blackfoot"] = "bla", + ["Blafe"] = "bfh", + ["Blagar"] = "beu", + ["Blang"] = "blr", + ["Blin"] = "byn", + ["Bo"] = "bgl", + ["Bo-Rukul"] = "mae", + ["Bo-Ung"] = "mux", + ["Boano (Maluku)"] = "bzn", + ["Boano (Sulawesi)"] = "bzl", + ["Bobongko"] = "bgb", + ["Bobot"] = "bty", + ["Bodo (Central Africa)"] = "boy", + ["Bodo (India)"] = "brx", + ["Bodo Gadaba"] = "gbj", + ["Bodo Parja"] = "bdv", + ["Bofi"] = "bff", + ["Boga"] = "bvw", + ["Bogaya"] = "boq", + ["Boghom"] = "bux", + ["Boguru"] = "bqu", + ["Bohtan Neo-Aramaic"] = "bhn", + ["Boikin"] = "bzf", + ["Bokar"] = "sit-bok", + ["Bokha"] = "ybk", + ["Boko"] = "bqc", + ["Bokobaru"] = "bus", + ["Bokoto"] = "bdt", + ["Bokyi"] = "bky", + ["Bola"] = "bnp", + ["Bolak"] = "art-blk", + ["Bolango"] = "bld", + ["Bole"] = "bol", + ["Bolgo"] = "bvo", + ["Bolia"] = "bli", + ["Bolinao"] = "smk", + ["Bolivian Sign Language"] = "bvl", + ["Boloki"] = "bkt", + ["Bolon"] = "bof", + ["Bolondo"] = "bzm", + ["Bolongan"] = "blj", + ["Bolyu"] = "ply", + ["Bom"] = "bmf", + ["Boma Nkuu"] = "bnt-bon", + ["Boma Yumu"] = "bnt-boy", + ["Bomboli"] = "bml", + ["Bomboma"] = "bws", + ["Bomitaba"] = "zmx", + ["Bomu"] = "bmq", + ["Bomwali"] = "bmw", + ["Bon Gula"] = "glc", + ["Bonan"] = "peh", + ["Bondei"] = "bou", + ["Bondo"] = "bfw", + ["Bondoukou Kulango"] = "kzc", + ["Bondum Dom Dogon"] = "dbu", + ["Bonerate"] = "bna", + ["Bonggi"] = "bdg", + ["Bonggo"] = "bpg", + ["Bongili"] = "bui", + ["Bongo"] = "bot", + ["Bongu"] = "bpu", + ["Bonjo"] = "bok", + ["Bonkeng"] = "bvg", + ["Bonkiman"] = "bop", + ["Bookan"] = "bnb", + ["Boon"] = "bnl", + ["Boor"] = "bvf", + ["Bora"] = "boa", + ["Border Kuna"] = "kvn", + ["Borei"] = "gai", + ["Boro"] = "xxb", + ["Borong"] = "ksr", + ["Boruca"] = "brn", + ["Borôro"] = "bor", + ["Boselewa"] = "bwf", + ["Bosngun"] = "bqs", + ["Bote-Majhi"] = "bmj", + ["Botlikh"] = "bph", + ["Botolan Sambal"] = "sbl", + ["Bouna Kulango"] = "nku", + ["Bourbonnais-Berrichon"] = "roa-bbn", + ["Bourguignon"] = "roa-brg", + ["Bouyei"] = "pcc", + ["Bozaba"] = "bzo", + ["Bragat"] = "aof", + ["Brahui"] = "brh", + ["Braj"] = "bra", + ["Brazilian Sign Language"] = "bzs", + ["Brek Karen"] = "kvl", + ["Brem"] = "buq", + ["Breri"] = "brq", + ["Breton"] = "br", + ["Bribri"] = "bzd", + ["British Sign Language"] = "bfi", + ["Brokkat"] = "bro", + ["Brokpake"] = "sgt", + ["Brokskat"] = "bkk", + ["Brooke's Point Palawano"] = "plw", + ["Broome Pearling Lugger Pidgin"] = "bpl", + ["Brunei Bisaya"] = "bsb", + ["Brunei Malay"] = "kxd", + ["Bruny Island"] = "xpz", + ["Bu"] = "jid", + ["Bu-Nao Bunu"] = "bwx", + ["Bua"] = "bub", + ["Bualkhaw Chin"] = "cbl", + ["Buamu"] = "box", + ["Bube"] = "bvb", + ["Bubi"] = "buw", + ["Bubia"] = "bbx", + ["Budeh Stieng"] = "stt", + ["Budibud"] = "btp", + ["Budong-Budong"] = "bdx", + ["Budu"] = "buu", + ["Budukh"] = "bdk", + ["Buduma"] = "bdm", + ["Budza"] = "bja", + ["Buena Vista Yokuts"] = "nai-bvy", + ["Bugan"] = "bbh", + ["Bughotu"] = "bgt", + ["Buginese"] = "bug", + ["Buglere"] = "sab", + ["Bugun"] = "bgg", + ["Buhi'non Bikol"] = "ubl", + ["Buhid"] = "bku", + ["Buhutu"] = "bxh", + ["Bujhyal"] = "byh", + ["Bukar-Sadung Bidayuh"] = "sdo", + ["Bukat"] = "bvk", + ["Bukawa"] = "buk", + ["Bukhari"] = "bhh", + ["Bukit Malay"] = "bvu", + ["Bukitan"] = "bkn", + ["Bukiyip"] = "ape", + ["Buksa"] = "tkb", + ["Bukusu"] = "bxk", + ["Bulgar"] = "xbo", + ["Bulgarian"] = "bg", + ["Bulgarian Sign Language"] = "bqn", + ["Bulgebi"] = "bmp", + ["Buli (Ghana)"] = "bwu", + ["Buli (Indonesia)"] = "bzq", + ["Bulo Stieng"] = "sti", + ["Bulu (Cameroon)"] = "bum", + ["Bulu (New Guinea)"] = "bjl", + ["Bum"] = "bmv", + ["Bumaji"] = "byp", + ["Bumang"] = "bvp", + ["Bumbita Arapesh"] = "aon", + ["Bumthangkha"] = "kjz", + ["Bun"] = "buv", + ["Buna"] = "bvn", + ["Bunaba"] = "bck", + ["Bunak"] = "bfn", + ["Bunama"] = "bdd", + ["Bundeli"] = "bns", + ["Bung"] = "bqd", + ["Bungain"] = "but", + ["Bunganditj"] = "xbg", + ["Bungku"] = "bkz", + ["Bungu"] = "wun", + ["Bunoge"] = "dgb", + ["Bunun"] = "bnn", + ["Buol"] = "blf", + ["Bura"] = "bwr", + ["Bura Mabang"] = "mde", + ["Burak"] = "bys", + ["Buraka"] = "bkg", + ["Burarra"] = "bvr", + ["Burate"] = "bti", + ["Burduna"] = "bxn", + ["Bure"] = "bvh", + ["Burgundian"] = "gem-bur", + ["Burji"] = "bji", + ["Burmese"] = "my", + ["Burmeso"] = "bzu", + ["Buru (Indonesia)"] = "mhs", + ["Buru (Nigeria)"] = "bqw", + ["Burui"] = "bry", + ["Burumakok"] = "aip", + ["Burun"] = "bdi", + ["Burunge"] = "bds", + ["Burushaski"] = "bsk", + ["Burusu"] = "bqr", + ["Buruwai"] = "asi", + ["Buryat"] = "bua", + ["Busa"] = "bqp", + ["Busam"] = "bxs", + ["Busami"] = "bsm", + ["Busang Kayan"] = "bfg", + ["Bushoong"] = "buf", + ["Buso"] = "bso", + ["Busoa"] = "bup", + ["Bussa"] = "dox", + ["Busuu"] = "bju", + ["Butbut Kalinga"] = "kyb", + ["Butchulla"] = "xby", + ["Butmas-Tur"] = "bnr", + ["Butuanon"] = "btw", + ["Buwal"] = "bhs", + ["Buyeo"] = "xpy", + ["Buyu"] = "byi", + ["Buyuan Jinuo"] = "jiy", + ["Bwa"] = "bww", + ["Bwaidoka"] = "bwd", + ["Bwanabwana"] = "tte", + ["Bwatoo"] = "bwa", + ["Bwe Karen"] = "bwe", + ["Bwela"] = "bwl", + ["Bwile"] = "bwc", + ["Bwisi"] = "bwz", + ["Byangsi"] = "bee", + ["Byep"] = "mkk", + ["Bädi Kanum"] = "khd", + ["Caac"] = "msq", + ["Cabiyarí"] = "cbb", + ["Cabécar"] = "cjp", + ["Cacaloxtepec Mixtec"] = "miu", + ["Cacaopera"] = "ccr", + ["Cacgia Roglai"] = "roc", + ["Cacua"] = "cbv", + ["Cacán"] = "sai-cac", + ["Caddo"] = "cad", + ["Cafundó"] = "ccd", + ["Cahuarano"] = "cah", + ["Cahuilla"] = "chl", + ["Cajonos Zapotec"] = "zad", + ["Caka"] = "ckx", + ["Cakchiquel-Quiché Mixed Language"] = "ckz", + ["Cakfem-Mushere"] = "cky", + ["Calabrian Greek"] = "grk-cal", + ["Calamian Tagbanwa"] = "tbk", + ["Callawalla"] = "caw", + ["Calusa"] = "nai-cal", + ["Caluyanun"] = "clu", + ["Caló"] = "rmq", + ["Camarines Norte Agta"] = "abd", + ["Cameroon Mambila"] = "mcu", + ["Cameroon Pidgin"] = "wes", + ["Campalagian"] = "cml", + ["Camsá"] = "kbh", + ["Camtho"] = "cmt", + ["Camunic"] = "xcc", + ["Candoshi-Shapra"] = "cbu", + ["Canela"] = "ram", + ["Canichana"] = "caz", + ["Cantonese"] = "yue", + ["Cao Miao"] = "cov", + ["Caolan"] = "mlc", + ["Capanahua"] = "kaq", + ["Capiznon"] = "cps", + ["Cappadocian Greek"] = "cpg", + ["Caquinte"] = "cot", + ["Car Nicobarese"] = "caq", + ["Cara"] = "cfd", + ["Carabayo"] = "cby", + ["Caramanta"] = "crf", + ["Caranqui"] = "sai-caq", + ["Carapana"] = "cbc", + ["Carian"] = "xcr", + ["Cariay"] = "awd-kar", + ["Caribbean Hindustani"] = "hns", + ["Caribbean Javanese"] = "jvn", + ["Carijona"] = "cbd", + ["Carolina Algonquian"] = "crr", + ["Carolinian"] = "cal", + ["Carpathian Romani"] = "rmc", + ["Carrier"] = "crx", + ["Cashibo-Cacataibo"] = "cbr", + ["Cashinahua"] = "cbs", + ["Casiguran Dumagat Agta"] = "dgc", + ["Casuarina Coast Asmat"] = "asc", + ["Catacao"] = "sai-cat", + ["Catalan"] = "ca", + ["Catalan Sign Language"] = "csc", + ["Catawba"] = "chc", + ["Catuquinaru"] = "sai-ctq", + ["Catío Chibcha"] = "cba-cat", + ["Cauca"] = "cca", + ["Cavere"] = "awd-cav", + ["Cavineña"] = "cav", + ["Cayubaba"] = "cyb", + ["Cayuga"] = "cay", + ["Cayuse"] = "xcy", + ["Cazcan"] = "azc-caz", + ["Cañari"] = "sai-cnr", + ["Cebaara Senoufo"] = "sef", + ["Cebuano"] = "ceb", + ["Celtiberian"] = "xce", + ["Cemuhî"] = "cam", + ["Cen"] = "cen", + ["Central Asmat"] = "cns", + ["Central Atlas Tamazight"] = "tzm", + ["Central Awyu"] = "awu", + ["Central Bai"] = "bca", + ["Central Bontoc"] = "lbk", + ["Central Cagayan Agta"] = "agt", + ["Central Dusun"] = "dtp", + ["Central Franconian"] = "gmw-cfr", + ["Central Grebo"] = "grv", + ["Central Huasteca Nahuatl"] = "nch", + ["Central Huishui Hmong"] = "hmc", + ["Central Kurdish"] = "ckb", + ["Central Maewo"] = "mwo", + ["Central Mahuatlán Zapoteco"] = "zam", + ["Central Malay"] = "pse", + ["Central Masela"] = "mxz", + ["Central Mashan Hmong"] = "hmm", + ["Central Mazahua"] = "maz", + ["Central Melanau"] = "mel", + ["Central Mnong"] = "cmo", + ["Central Nahuatl"] = "nhn", + ["Central Nicobarese"] = "ncb", + ["Central Ojibwa"] = "ojc", + ["Central Palawano"] = "plc", + ["Central Pame"] = "pbs", + ["Central Pomo"] = "poo", + ["Central Puebla Nahuatl"] = "ncx", + ["Central Sama"] = "sml", + ["Central Siberian Yupik"] = "ess", + ["Central Sierra Miwok"] = "csm", + ["Central Subanen"] = "syb", + ["Central Tagbanwa"] = "tgt", + ["Central Tarahumara"] = "tar", + ["Central Teke"] = "nzu", + ["Central Tunebo"] = "tuf", + ["Centúúm"] = "cet", + ["Cerma"] = "cme", + ["Ch'olti'"] = "myn-chl", + ["Ch'orti'"] = "caa", + ["Chaap Wuurong"] = "tjw", + ["Chachi"] = "cbi", + ["Chadian Arabic"] = "shu", + ["Chadian Sign Language"] = "cds", + ["Chadong"] = "cdy", + ["Chagatai"] = "chg", + ["Chaha"] = "sem-cha", + ["Chaima"] = "ciy", + ["Chairel"] = "sit-cha", + ["Chak"] = "ckh", + ["Chakali"] = "cli", + ["Chakma"] = "ccp", + ["Chala"] = "cll", + ["Chaldean Neo-Aramaic"] = "cld", + ["Chali"] = "tgf", + ["Chamacoco"] = "ceg", + ["Chamalal"] = "cji", + ["Chamba Daka"] = "ccg", + ["Chamba Leko"] = "ndi", + ["Chambeali"] = "cdh", + ["Chambri"] = "can", + ["Chamicuro"] = "ccc", + ["Chamling"] = "rab", + ["Chamorro"] = "ch", + ["Champenois"] = "roa-cha", + ["Chang"] = "nbc", + ["Changriwa"] = "cga", + ["Changthang"] = "cna", + ["Chantyal"] = "chx", + ["Chaná"] = "sai-chn", + ["Chané"] = "caj", + ["Chapacura"] = "sai-chp", + ["Chara"] = "cra", + ["Charrua"] = "sai-chr", + ["Chaudangsi"] = "cdn", + ["Chaura"] = "crv", + ["Chavacano"] = "cbk", + ["Chayahuita"] = "cbt", + ["Chayuco Mixtec"] = "mih", + ["Chazumba Mixtec"] = "xtb", + ["Che"] = "ruk", + ["Chechen"] = "ce", + ["Cheke Holo"] = "mrn", + ["Chemakum"] = "xch", + ["Chenapian"] = "cjn", + ["Chenchu"] = "cde", + ["Chenoua"] = "cnu", + ["Chepang"] = "cdm", + ["Chepya"] = "ycp", + ["Cherepon"] = "cpn", + ["Cherokee"] = "chr", + ["Chesu"] = "ych", + ["Chetco-Tolowa"] = "ctc", + ["Chewong"] = "cwg", + ["Cheyenne"] = "chy", + ["Chhattisgarhi"] = "hne", + ["Chhintange"] = "ctn", + ["Chhulung"] = "cur", + ["Chiangmai Sign Language"] = "csd", + ["Chiapanec"] = "cip", + ["Chibcha"] = "chb", + ["Chicahuaxtla Triqui"] = "trs", + ["Chichewa"] = "ny", + ["Chichicapan Zapotec"] = "zpv", + ["Chichimeca-Jonaz"] = "pei", + ["Chichonyi-Chidzihana-Chikauma"] = "coh", + ["Chickasaw"] = "cic", + ["Chicomuceltec"] = "cob", + ["Chiduruma"] = "dug", + ["Chigmecatitlán Mixtec"] = "mii", + ["Chilcotin"] = "clc", + ["Chilean Sign Language"] = "csg", + ["Chilisso"] = "clh", + ["Chiltepec Chinantec"] = "csa", + ["Chimalapa Zoque"] = "zoh", + ["Chimariko"] = "cid", + ["Chimila"] = "cbg", + ["Chimwiini"] = "bnt-cmw", + ["Chinali"] = "cih", + ["Chinbon Chin"] = "cnb", + ["Chinese"] = "zh", + ["Chinese Pidgin English"] = "cpi", + ["Chinese Sign Language"] = "csl", + ["Chinook"] = "chh", + ["Chinook Jargon"] = "chn", + ["Chipaya"] = "cap", + ["Chipewyan"] = "chp", + ["Chiquihuitlán Mazatec"] = "maq", + ["Chiquimulilla"] = "nai-chi", + ["Chiquitano"] = "cax", + ["Chiricahua"] = "apm", + ["Chirino"] = "sai-chi", + ["Chiripá"] = "nhd", + ["Chiru"] = "cdf", + ["Chitimacha"] = "ctm", + ["Chitkuli Kinnauri"] = "cik", + ["Chittagonian"] = "ctg", + ["Chitwania Tharu"] = "the", + ["Chiwere"] = "iow", + ["Choapan Zapotec"] = "zpc", + ["Chocangaca"] = "cgk", + ["Chochotec"] = "coz", + ["Choctaw"] = "cho", + ["Chodri"] = "cdi", + ["Chokri Naga"] = "nri", + ["Chokwe"] = "cjk", + ["Chol"] = "ctu", + ["Cholón"] = "cht", + ["Chong"] = "cog", + ["Choni"] = "cda", + ["Chono"] = "sai-cno", + ["Chopi"] = "cce", + ["Chothe Naga"] = "nct", + ["Chrau"] = "crw", + ["Chru"] = "cje", + ["Chuabo"] = "chw", + ["Chuanqiandian Cluster Miao"] = "cqd", + ["Chuave"] = "cjv", + ["Chug"] = "cvg", + ["Chuj"] = "cac", + ["Chuka"] = "cuh", + ["Chukchi"] = "ckt", + ["Chukwa"] = "cuw", + ["Chulym"] = "clw", + ["Chumburung"] = "ncu", + ["Churahi"] = "cdj", + ["Churuya"] = "sai-chu", + ["Chut"] = "scb", + ["Chuukese"] = "chk", + ["Chuvan"] = "xcv", + ["Chuvash"] = "cv", + ["Chácobo"] = "cao", + ["Ci Gbe"] = "cib", + ["Cia-Cia"] = "cia", + ["Cibak"] = "ckl", + ["Cicipu"] = "awc", + ["Ciguayo"] = "nai-cig", + ["Cimbrian"] = "cim", + ["Cinamiguin Manobo"] = "mkx", + ["Cinda-Regi-Tiyal"] = "cdr", + ["Cineni"] = "cie", + ["Cinta Larga"] = "cin", + ["Cishingini"] = "asg", + ["Citak"] = "txt", + ["Ciwogai"] = "tgd", + ["Classical Mandaic"] = "myz", + ["Classical Mongolian"] = "cmg", + ["Classical Nahuatl"] = "nci", + ["Classical Newar"] = "nwc", + ["Classical Quechua"] = "qwc", + ["Classical Syriac"] = "syc", + ["Classical Tibetan"] = "xct", + ["Coahuilteco"] = "xcw", + ["Coast Miwok"] = "csi", + ["Coastal Kadazan"] = "kzj", + ["Coastal Konjo"] = "kjc", + ["Coatecas Altas Zapotec"] = "zca", + ["Coatepec Nahuatl"] = "naz", + ["Coatlán Mixe"] = "mco", + ["Coatlán Zapotec"] = "zps", + ["Coatzospan Mixtec"] = "miz", + ["Cocama"] = "cod", + ["Cochimi"] = "coj", + ["Cocopa"] = "coc", + ["Cocos Islands Malay"] = "coa", + ["Coeruna"] = "sai-coe", + ["Coeur d'Alene"] = "crd", + ["Cofán"] = "con", + ["Cogui"] = "kog", + ["Col"] = "liw", + ["Colombian Sign Language"] = "csn", + ["Colonia Tovar German"] = "gct", + ["Columbia-Wenatchi"] = "col", + ["Colán"] = "sai-col", + ["Comaltepec Chinantec"] = "cco", + ["Comanche"] = "com", + ["Comechingon"] = "sai-cmg", + ["Comecrudo"] = "xcm", + ["Communicationssprache"] = "art-com", + ["Como Karim"] = "cfg", + ["Comox"] = "coo", + ["Con"] = "cno", + ["Coos"] = "csz", + ["Copainalá Zoque"] = "zoc", + ["Copala Triqui"] = "trc", + ["Copallén"] = "sai-cop", + ["Coptic"] = "cop", + ["Coquille"] = "coq", + ["Cora"] = "crn", + ["Cori"] = "cry", + ["Cornish"] = "kw", + ["Coroado Puri"] = "sai-crd", + ["Corsican"] = "co", + ["Cosoleacaque Nahuatl"] = "nhk", + ["Costa Rican Sign Language"] = "csr", + ["Cotabato Manobo"] = "mta", + ["Cotoname"] = "xcn", + ["Cowlitz"] = "cow", + ["Coyaima"] = "coy", + ["Coyotepec Popoloca"] = "pbf", + ["Coyutla Totonac"] = "toc", + ["Cree"] = "cr", + ["Creek"] = "mus", + ["Crimean Gothic"] = "gme-cgo", + ["Crimean Tatar"] = "crh", + ["Croatian Sign Language"] = "csq", + ["Cross River Mbembe"] = "mfn", + ["Crow"] = "cro", + ["Cruzeño"] = "crz", + ["Cua"] = "cua", + ["Cuban Sign Language"] = "csf", + ["Cubeo"] = "cub", + ["Cueva"] = "sai-cva", + ["Cuiba"] = "cui", + ["Cuitlatec"] = "cuy", + ["Culina"] = "cul", + ["Culli"] = "sai-cul", + ["Cumanagoto"] = "cuo", + ["Cumbric"] = "xcb", + ["Cun"] = "cuq", + ["Cung"] = "cug", + ["Cupeño"] = "cup", + ["Curonian"] = "xcu", + ["Curripaco"] = "kpc", + ["Cutchi-Swahili"] = "ccl", + ["Cuvok"] = "cuv", + ["Cuyamecalco Mixtec"] = "xtu", + ["Cuyunon"] = "cyo", + ["Cwi Bwamu"] = "bwy", + ["Cypriot Arabic"] = "acy", + ["Czech"] = "cs", + ["Czech Sign Language"] = "cse", + ["Côông"] = "cnc", + ["Da'a Kaili"] = "kzf", + ["Daai Chin"] = "dao", + ["Daantanai'"] = "lni", + ["Daasanach"] = "dsh", + ["Daba"] = "dbq", + ["Dabarre"] = "dbr", + ["Dabe"] = "dbe", + ["Dacian"] = "xdc", + ["Dadanitic"] = "sem-dad", + ["Dadi Dadi"] = "dda", + ["Dadibi"] = "mps", + ["Dadiya"] = "dbd", + ["Daga"] = "dgz", + ["Dagaari Dioula"] = "dgd", + ["Dagba"] = "dgk", + ["Dagbani"] = "dag", + ["Dagik"] = "dec", + ["Dagoman"] = "dgn", + ["Dahalik"] = "dlk", + ["Dahalo"] = "dal", + ["Daho-Doo"] = "das", + ["Dai"] = "dij", + ["Dair"] = "drb", + ["Dairi Batak"] = "btd", + ["Dakaka"] = "bpa", + ["Dakka"] = "dkk", + ["Dakota"] = "dak", + ["Dakpa"] = "dka", + ["Dalmatian"] = "dlm", + ["Daloa Bété"] = "bev", + ["Dama (Nigeria)"] = "dmm", + ["Dama (Sierra Leone)"] = "dmn-dam", + ["Damakawa"] = "dam", + ["Damal"] = "uhn", + ["Dambi"] = "dac", + ["Dameli"] = "dml", + ["Dampelas"] = "dms", + ["Dan"] = "dnj", + ["Danaru"] = "dnr", + ["Danau"] = "dnu", + ["Dandami Maria"] = "daq", + ["Dangaléat"] = "daa", + ["Dangaura Tharu"] = "thl", + ["Danish"] = "da", + ["Danish Sign Language"] = "dsl", + ["Dano"] = "aso", + ["Danu"] = "dnv", + ["Danuwar"] = "dhw", + ["Dao"] = "daz", + ["Daonda"] = "dnd", + ["Dar Daju Daju"] = "djc", + ["Dar Fur Daju"] = "daj", + ["Dar Sila Daju"] = "dau", + ["Darai"] = "dry", + ["Dargwa"] = "dar", + ["Darkinjung"] = "xda", + ["Darlong"] = "dln", + ["Darmiya"] = "drd", + ["Daro-Matu Melanau"] = "dro", + ["Darumbal"] = "xgm", + ["Dass"] = "dot", + ["Datooga"] = "tcc", + ["Daungwurrung"] = "dgw", + ["Daur"] = "dta", + ["Davawenyo"] = "daw", + ["Dawawa"] = "dww", + ["Dawera-Daweloor"] = "ddw", + ["Dawro"] = "dwr", + ["Day"] = "dai", + ["Dayi"] = "dax", + ["Dazaga"] = "dzg", + ["Deccani"] = "dcc", + ["Dedua"] = "ded", + ["Defaka"] = "afn", + ["Defi Gbe"] = "gbh", + ["Deg"] = "mzw", + ["Deg Xinag"] = "ing", + ["Degema"] = "deg", + ["Degenan"] = "dge", + ["Dehwari"] = "deh", + ["Dek"] = "dek", + ["Dela-Oenale"] = "row", + ["Delo"] = "ntr", + ["Delta Yokuts"] = "nai-dly", + ["Dem"] = "dem", + ["Dema"] = "dmx", + ["Demisa"] = "dei", + ["Demotic"] = "egx-dem", + ["Demta"] = "dmy", + ["Dena'ina"] = "tfn", + ["Dendi"] = "ddn", + ["Dengese"] = "dez", + ["Dengka"] = "dnk", + ["Deno"] = "dbb", + ["Denya"] = "anv", + ["Dení"] = "dny", + ["Deori"] = "der", + ["Desano"] = "des", + ["Desiya"] = "dso", + ["Dewas Rai"] = "dwz", + ["Dewoin"] = "dee", + ["Dezfuli"] = "def", + ["Dghwede"] = "dgh", + ["Dhaiso"] = "dhs", + ["Dhalandji"] = "dhl", + ["Dhangu"] = "dhg", + ["Dhanki"] = "dhn", + ["Dhao"] = "nfa", + ["Dharug"] = "xdk", + ["Dhatki"] = "mki", + ["Dhimal"] = "dhi", + ["Dhivehi"] = "dv", + ["Dhodia"] = "dho", + ["Dhofari Arabic"] = "adf", + ["Dhudhuroa"] = "ddr", + ["Dhungaloo"] = "dhx", + ["Dhurga"] = "dhu", + ["Dhuwal"] = "dwu", + ["Dhuwaya"] = "dwy", + ["Dia"] = "dia", + ["Dibabawon Manobo"] = "mbd", + ["Dibiyaso"] = "dby", + ["Dibo"] = "dio", + ["Dicamay Agta"] = "duy", + ["Didinga"] = "did", + ["Dieri"] = "dif", + ["Digo"] = "dig", + ["Dii"] = "dur", + ["Dijim-Bwilim"] = "cfa", + ["Dilling"] = "dil", + ["Dima"] = "jma", + ["Dimasa"] = "dis", + ["Dimbong"] = "dii", + ["Dime"] = "dim", + ["Dinapigue Agta"] = "phi-din", + ["Dineor"] = "mrx", + ["Ding"] = "diz", + ["Dinka"] = "din", + ["Diodio"] = "ddi", + ["Dirasha"] = "gdl", + ["Diri"] = "dwa", + ["Dirim"] = "dir", + ["Disa"] = "dsi", + ["Ditammari"] = "tbz", + ["Ditidaht"] = "dtd", + ["Diuwe"] = "diy", + ["Diuxi-Tilantongo Mixtec"] = "xtd", + ["Dixon Reef"] = "dix", + ["Dizin"] = "mdx", + ["Djadjawurrung"] = "dja", + ["Djambarrpuyngu"] = "djr", + ["Djangun"] = "djf", + ["Djauan"] = "djn", + ["Djawi"] = "djw", + ["Djimini"] = "dyi", + ["Djinang"] = "dji", + ["Djinba"] = "djb", + ["Djiwarli"] = "djl", + ["Dobel"] = "kvo", + ["Dobu"] = "dob", + ["Doe"] = "doe", + ["Doga"] = "dgg", + ["Doghoro"] = "dgx", + ["Dogoso"] = "dgs", + ["Dogosé"] = "dos", + ["Dogri"] = "doi", + ["Dogrib"] = "dgr", + ["Dogul Dom"] = "dbg", + ["Doka"] = "dbi", + ["Doko-Uyanga"] = "uya", + ["Dolgan"] = "dlg", + ["Dom"] = "doa", + ["Domaaki"] = "dmk", + ["Domari"] = "rmt", + ["Dominican Sign Language"] = "doq", + ["Dompo"] = "doy", + ["Domu"] = "dof", + ["Domung"] = "dev", + ["Dondo"] = "dok", + ["Dong"] = "doh", + ["Dongo"] = "doo", + ["Dongolawi"] = "kzh", + ["Dongotono"] = "ddd", + ["Dongshanba Lalo"] = "yik", + ["Dongxiang"] = "sce", + ["Donno So Dogon"] = "dds", + ["Doondo"] = "dde", + ["Dorasque"] = "cba-dor", + ["Dori'o"] = "dor", + ["Dorig"] = "wwo", + ["Doromu-Koki"] = "kqc", + ["Dorze"] = "doz", + ["Doso"] = "dol", + ["Doteli"] = "dty", + ["Dothraki"] = "art-dtk", + ["Doura"] = "don", + ["Doutai"] = "tds", + ["Doyayo"] = "dow", + ["Drehu"] = "dhv", + ["Drung"] = "duu", + ["Duala"] = "dua", + ["Duano"] = "dup", + ["Duau"] = "dva", + ["Dubli"] = "dub", + ["Dubu"] = "dmu", + ["Dugun"] = "ndu", + ["Duguri"] = "dbm", + ["Dugwor"] = "dme", + ["Duhwa"] = "kbz", + ["Duit"] = "cba-dui", + ["Duke"] = "nke", + ["Dukhan"] = "trk-dkh", + ["Dulbu"] = "dbo", + ["Duli"] = "duz", + ["Duma"] = "dma", + ["Dumaitic"] = "sem-dum", + ["Dumbea"] = "duf", + ["Dumi"] = "dus", + ["Dumpas"] = "dmv", + ["Dumun"] = "dui", + ["Duna"] = "duc", + ["Dungan"] = "dng", + ["Dungmali"] = "raa", + ["Dungra Bhil"] = "duh", + ["Dungu"] = "dbv", + ["Dupaningan Agta"] = "duo", + ["Dura"] = "drq", + ["Duri"] = "mvp", + ["Duriankere"] = "dbn", + ["Duruwa"] = "pci", + ["Dusner"] = "dsn", + ["Dusun Deyah"] = "dun", + ["Dusun Malang"] = "duq", + ["Dusun Witu"] = "duw", + ["Dutch"] = "nl", + ["Dutch Low Saxon"] = "nds-nl", + ["Dutch Sign Language"] = "dse", + ["Duun"] = "dux", + ["Duupa"] = "dae", + ["Duvle"] = "duv", + ["Duwai"] = "dbp", + ["Duwet"] = "gve", + ["Dwang"] = "nnu", + ["Dyaabugay"] = "dyy", + ["Dyaberdyaber"] = "dyb", + ["Dyan"] = "dya", + ["Dyangadi"] = "dyn", + ["Dyirbal"] = "dbl", + ["Dyugun"] = "dyd", + ["Dyula"] = "dyu", + ["Dza"] = "jen", + ["Dzala"] = "dzl", + ["Dzando"] = "dzn", + ["Dzao Min"] = "bpn", + ["Dzodinka"] = "add", + ["Dzongkha"] = "dz", + ["Dzuun"] = "dnn", + ["Dâw"] = "kwa", + ["E"] = "eee", + ["E'ma Buyang"] = "yzg", + ["Early Assamese"] = "inc-oas", + ["Early Tripuri"] = "xtr", + ["East Central German"] = "gmw-ecg", + ["East Damar"] = "dmr", + ["East Franconian"] = "vmf", + ["East Futuna"] = "fud", + ["East Kewa"] = "kjs", + ["East Limba"] = "lma", + ["East Makian"] = "mky", + ["East Masela"] = "vme", + ["East Nyala"] = "nle", + ["East Tarangan"] = "tre", + ["East Yugur"] = "yuy", + ["Eastern Acipa"] = "acp", + ["Eastern Arrernte"] = "aer", + ["Eastern Bolivian Guaraní"] = "gui", + ["Eastern Bontoc"] = "ebk", + ["Eastern Bru"] = "bru", + ["Eastern Canadian Inuktitut"] = "ike", + ["Eastern Cham"] = "cjm", + ["Eastern Durango Nahuatl"] = "azd", + ["Eastern Gorkha Tamang"] = "tge", + ["Eastern Gurung"] = "ggn", + ["Eastern Highland Chatino"] = "cly", + ["Eastern Highland Otomi"] = "otm", + ["Eastern Huasteca Nahuatl"] = "nhe", + ["Eastern Huishui Hmong"] = "hme", + ["Eastern Karaboro"] = "xrb", + ["Eastern Katu"] = "ktv", + ["Eastern Kayah"] = "eky", + ["Eastern Keres"] = "kee", + ["Eastern Krahn"] = "kqo", + ["Eastern Lalu"] = "yit", + ["Eastern Lawa"] = "lwl", + ["Eastern Magar"] = "mgp", + ["Eastern Maninkakan"] = "emk", + ["Eastern Mari"] = "chm", + ["Eastern Meohang"] = "emg", + ["Eastern Mnong"] = "mng", + ["Eastern Muria"] = "emu", + ["Eastern Ngad'a"] = "nea", + ["Eastern Nisu"] = "nos", + ["Eastern Ojibwa"] = "ojg", + ["Eastern Parbate Kham"] = "kif", + ["Eastern Penan"] = "pez", + ["Eastern Pomo"] = "peb", + ["Eastern Pwo"] = "kjp", + ["Eastern Qiandong Miao"] = "hmq", + ["Eastern Tamang"] = "taj", + ["Eastern Tawbuid"] = "bnj", + ["Eastern Xiangxi Miao"] = "muq", + ["Eastern Xwla Gbe"] = "gbx", + ["Ebira"] = "igb", + ["Eblaite"] = "xeb", + ["Ebrié"] = "ebr", + ["Ebughu"] = "ebg", + ["Ecuadorian Sign Language"] = "ecs", + ["Ede Cabe"] = "cbj", + ["Ede Ica"] = "ica", + ["Ede Idaca"] = "idd", + ["Ede Ije"] = "ijj", + ["Ede Nago"] = "nqg", + ["Edera Awyu"] = "awy", + ["Edo"] = "bin", + ["Edolo"] = "etr", + ["Edomite"] = "xdm", + ["Edopi"] = "dbf", + ["Efai"] = "efa", + ["Efe"] = "efe", + ["Efik"] = "efi", + ["Efutop"] = "ofu", + ["Ega"] = "ega", + ["Eggon"] = "ego", + ["Egyptian"] = "egy", + ["Egyptian Arabic"] = "arz", + ["Egyptian Sign Language"] = "esl", + ["Ehueun"] = "ehu", + ["Eipomek"] = "eip", + ["Eitiep"] = "eit", + ["Ejagham"] = "etu", + ["Ejamat"] = "eja", + ["Ekajuk"] = "eka", + ["Ekari"] = "ekg", + ["Ekele"] = "khy", + ["Eki"] = "eki", + ["Ekit"] = "eke", + ["Ekpeye"] = "ekp", + ["El Alto Zapotec"] = "zpp", + ["El Hugeirat"] = "elh", + ["El Molo"] = "elo", + ["Elamite"] = "elx", + ["Eleme"] = "elm", + ["Elepi"] = "ele", + ["Elfdalian"] = "ovd", + ["Elip"] = "ekm", + ["Elkei"] = "elk", + ["Eloi"] = "art-elo", + ["Elotepec Zapotec"] = "zte", + ["Eloyi"] = "afo", + ["Elseng"] = "mrf", + ["Elu"] = "elu", + ["Elymian"] = "xly", + ["Emae"] = "mmw", + ["Emai"] = "ema", + ["Eman"] = "emn", + ["Embaloh"] = "emb", + ["Emberá-Baudó"] = "bdc", + ["Emberá-Catío"] = "cto", + ["Emberá-Chamí"] = "cmi", + ["Emberá-Tadó"] = "tdc", + ["Embu"] = "ebu", + ["Emem"] = "enr", + ["Emerillon"] = "eme", + ["Emilian"] = "egl", + ["Emplawas"] = "emw", + ["En"] = "enc", + ["Enawené-Nawé"] = "unk", + ["Ende"] = "end", + ["Enga"] = "enq", + ["Engenni"] = "enn", + ["Enggano"] = "eno", + ["English"] = "en", + ["Enlhet"] = "enl", + ["Enrekang"] = "ptt", + ["Enu"] = "enu", + ["Enwan"] = "env", + ["Enwang"] = "enw", + ["Enxet"] = "enx", + ["Enya"] = "gey", + ["Eotile"] = "eot", + ["Epena"] = "sja", + ["Epi-Olmec"] = "xep", + ["Epie"] = "epi", + ["Epigraphic Mayan"] = "emy", + ["Eravallan"] = "era", + ["Erave"] = "kjy", + ["Ere"] = "twp", + ["Erie"] = "iro-ere", + ["Eritai"] = "ert", + ["Erokwanas"] = "erw", + ["Erre"] = "err", + ["Erromintxela"] = "emx", + ["Ersu"] = "ers", + ["Eruwa"] = "erh", + ["Erzya"] = "myv", + ["Esan"] = "ish", + ["Ese"] = "mcq", + ["Ese Ejja"] = "ese", + ["Eshtehardi"] = "esh", + ["Esimbi"] = "ags", + ["Eskayan"] = "esy", + ["Esmeralda"] = "sai-esm", + ["Esperanto"] = "eo", + ["Esselen"] = "esq", + ["Estado de México Otomi"] = "ots", + ["Estonian"] = "et", + ["Estonian Sign Language"] = "eso", + ["Esuma"] = "esm", + ["Etchemin"] = "etc", + ["Etebi"] = "etb", + ["Eten"] = "etx", + ["Eteocretan"] = "ecr", + ["Eteocypriot"] = "ecy", + ["Ethiopian Sign Language"] = "eth", + ["Etkywan"] = "ich", + ["Eton (Cameroon)"] = "eto", + ["Eton (Vanuatu)"] = "etn", + ["Etruscan"] = "ett", + ["Etulo"] = "utr", + ["Evant"] = "bzz", + ["Even"] = "eve", + ["Evenki"] = "evn", + ["Ewage-Notu"] = "nou", + ["Ewarhuyana"] = "sai-ewa", + ["Ewe"] = "ee", + ["Ewondo"] = "ewo", + ["Extremaduran"] = "ext", + ["Eyak"] = "eya", + ["Ezaa"] = "eza", + ["Fagani"] = "faf", + ["Faire Atta"] = "azt", + ["Faita"] = "faj", + ["Faiwol"] = "fai", + ["Fakkanci"] = "gel", + ["Fala"] = "fax", + ["Falam Chin"] = "cfm", + ["Fali"] = "fli", + ["Faliscan"] = "xfa", + ["Fam"] = "fam", + ["Fanagalo"] = "fng", + ["Fanamaket"] = "bjp", + ["Fang (Bantu)"] = "fan", + ["Fang (Beboid)"] = "fak", + ["Fania"] = "fni", + ["Far Western Muria"] = "fmu", + ["Farefare"] = "gur", + ["Faroese"] = "fo", + ["Fas"] = "fqs", + ["Fasu"] = "faa", + ["Fataleka"] = "far", + ["Fataluku"] = "ddg", + ["Fayu"] = "fau", + ["Fe'fe'"] = "fmp", + ["Fedan"] = "pdn", + ["Fembe"] = "agl", + ["Fer"] = "kah", + ["Feroge"] = "fer", + ["Fiji Hindi"] = "hif", + ["Fijian"] = "fj", + ["Filomena Mata-Coahuitlán Totonac"] = "tlp", + ["Finisterre Yau"] = "yuw", + ["Finnish"] = "fi", + ["Finnish Sign Language"] = "fse", + ["Finnish-Swedish Sign Language"] = "fss", + ["Finongan"] = "fag", + ["Fipa"] = "fip", + ["Firan"] = "fir", + ["Fiwaga"] = "fiw", + ["Flemish Sign Language"] = "vgt", + ["Flinders Island"] = "fln", + ["Foau"] = "flh", + ["Fogaha"] = "ber-fog", + ["Foi"] = "foi", + ["Foia Foia"] = "ffi", + ["Folopa"] = "ppo", + ["Foma"] = "fom", + ["Fon"] = "fon", + ["Fongoro"] = "fgr", + ["Foodo"] = "fod", + ["Forak"] = "frq", + ["Fordata"] = "frd", + ["Fore"] = "for", + ["Forest Enets"] = "enf", + ["Forest Nenets"] = "syd-fne", + ["Fortsenal"] = "frt", + ["Fox"] = "sac", + ["Franc-Comtois"] = "roa-fcm", + ["Francisco León Zoque"] = "zos", + ["Franco-Provençal"] = "frp", + ["French"] = "fr", + ["French Belgian Sign Language"] = "sfb", + ["French Sign Language"] = "fsl", + ["Friulian"] = "fur", + ["Fula"] = "ff", + ["Fuliiru"] = "flr", + ["Fulniô"] = "fun", + ["Fum"] = "fum", + ["Fungwa"] = "ula", + ["Fur"] = "fvr", + ["Furu"] = "fuu", + ["Futuna-Aniwa"] = "fut", + ["Fuyug"] = "fuy", + ["Fwe"] = "fwe", + ["Fwâi"] = "fwa", + ["Fyam"] = "pym", + ["Fyer"] = "fie", + ["Ga"] = "gaa", + ["Ga'anda"] = "gqa", + ["Ga'dang"] = "gdg", + ["Gaa"] = "ttb", + ["Gaam"] = "tbi", + ["Gabadi"] = "kbt", + ["Gabi"] = "gbw", + ["Gabri"] = "gab", + ["Gabrielino-Fernandeño"] = "xgf", + ["Gadang"] = "gdk", + ["Gaddang"] = "gad", + ["Gaddi"] = "gbk", + ["Gade"] = "ged", + ["Gadjerawang"] = "gdh", + ["Gadsup"] = "gaj", + ["Gafat"] = "gft", + ["Gagadu"] = "gbu", + ["Gagauz"] = "gag", + ["Gagnoa Bété"] = "btg", + ["Gahri"] = "bfu", + ["Gaikundi"] = "gbf", + ["Gaina"] = "gcn", + ["Gal"] = "gap", + ["Galambu"] = "glo", + ["Galatian"] = "xga", + ["Galela"] = "gbi", + ["Galeya"] = "gar", + ["Galibi Carib"] = "car", + ["Galice"] = "gce", + ["Galician"] = "gl", + ["Galindan"] = "xgl", + ["Gallaecian"] = "cel-gal", + ["Gallo"] = "roa-gal", + ["Gallurese"] = "sdn", + ["Galo"] = "adl", + ["Galoli"] = "gal", + ["Gamale Kham"] = "kgj", + ["Gambera"] = "gma", + ["Gamela"] = "sai-gam", + ["Gamilaraay"] = "kld", + ["Gamit"] = "gbl", + ["Gamkonora"] = "gak", + ["Gamo"] = "gmv", + ["Gamo-Ningi"] = "bte", + ["Gan"] = "gan", + ["Gana"] = "gnq", + ["Ganang"] = "gne", + ["Gandhari"] = "pgd", + ["Gane"] = "gzn", + ["Ganggalida"] = "gcd", + ["Ganglau"] = "ggl", + ["Gangte"] = "gnb", + ["Gangulu"] = "gnl", + ["Gants"] = "gao", + ["Ganza"] = "gza", + ["Ganzi"] = "gnz", + ["Gao"] = "gga", + ["Gapapaiwa"] = "pwg", + ["Garawa"] = "wrk", + ["Garhwali"] = "gbm", + ["Garifuna"] = "cab", + ["Garingbal"] = "xgi", + ["Garo"] = "grt", + ["Garre"] = "gex", + ["Garus"] = "gyb", + ["Garza"] = "xgr", + ["Gashowu"] = "nai-gsy", + ["Gata'"] = "gaq", + ["Gaulish"] = "cel-gau", + ["Gavak"] = "dmc", + ["Gavar"] = "gou", + ["Gavião do Jiparaná"] = "gvo", + ["Gawar-Bati"] = "gwt", + ["Gawwada"] = "gwd", + ["Gayil"] = "gyl", + ["Gayo"] = "gay", + ["Gayón"] = "sai-gay", + ["Gbagyi"] = "gbr", + ["Gban"] = "ggu", + ["Gbanu"] = "gbv", + ["Gbanziri"] = "gbg", + ["Gbari"] = "gby", + ["Gbaya"] = "gba", + ["Gbaya-Bossangoa"] = "gbp", + ["Gbaya-Bozoum"] = "gbq", + ["Gbaya-Mbodomo"] = "gmm", + ["Gbayi"] = "gyg", + ["Gbesi Gbe"] = "gbs", + ["Gbii"] = "ggb", + ["Gbin"] = "xgb", + ["Gbiri-Niragu"] = "grh", + ["Gboloo Grebo"] = "gec", + ["Gciriku"] = "diu", + ["Gcwi"] = "gwj", + ["Ge"] = "hmj", + ["Ge'ez"] = "gez", + ["Geba Karen"] = "kvq", + ["Gebe"] = "gei", + ["Gedaged"] = "gdd", + ["Gedeo"] = "drs", + ["Geji"] = "gji", + ["Geko Karen"] = "ghk", + ["Gela"] = "nlg", + ["Gelao"] = "gio", + ["Gele'"] = "sbc", + ["Geme"] = "geq", + ["Gen"] = "gej", + ["Gende"] = "gaf", + ["Gengle"] = "geg", + ["Georgian"] = "ka", + ["Gepo"] = "ygp", + ["Gera"] = "gew", + ["Gerka"] = "gek", + ["German"] = "de", + ["German Low German"] = "nds-de", + ["German Sign Language"] = "gsg", + ["Geruma"] = "gea", + ["Geser-Gorom"] = "ges", + ["Gey"] = "guv", + ["Ghadames"] = "gha", + ["Ghanaian Sign Language"] = "gse", + ["Ghandruk Sign Language"] = "gds", + ["Ghanongga"] = "ghn", + ["Ghari"] = "gri", + ["Ghayavi"] = "bmk", + ["Ghera"] = "ghr", + ["Ghomala'"] = "bbj", + ["Ghomara"] = "gho", + ["Ghotuo"] = "aaa", + ["Ghulfan"] = "ghl", + ["Giangan"] = "bgi", + ["Gibanawa"] = "gib", + ["Gidar"] = "gid", + ["Gikyode"] = "acd", + ["Gilaki"] = "glk", + ["Gilbertese"] = "gil", + ["Gilima"] = "gix", + ["Gimi (Austronesian)"] = "gip", + ["Gimi (Goroka)"] = "gim", + ["Gimme"] = "kmp", + ["Gimnime"] = "gmn", + ["Ginuman"] = "gnm", + ["Girawa"] = "bbr", + ["Giryama"] = "nyf", + ["Githabul"] = "gih", + ["Gitua"] = "ggt", + ["Gitxsan"] = "git", + ["Giyug"] = "giy", + ["Gizrra"] = "tof", + ["Glaro-Twabo"] = "glr", + ["Glavda"] = "glw", + ["Glio-Oubi"] = "oub", + ["Glosa"] = "igs", + ["Gnau"] = "gnu", + ["Goa'uld"] = "art-gld", + ["Goaria"] = "gig", + ["Gobasi"] = "goi", + ["Gobu"] = "gox", + ["Godié"] = "god", + ["Godoberi"] = "gdo", + ["Godwari"] = "gdx", + ["Goemai"] = "ank", + ["Gofa"] = "gof", + ["Gogo"] = "gog", + ["Gogodala"] = "ggw", + ["Goguryeo"] = "zkg", + ["Gojri"] = "gju", + ["Gokana"] = "gkn", + ["Gokhy"] = "sit-gkh", + ["Gola"] = "gol", + ["Golin"] = "gvf", + ["Golpa"] = "lja", + ["Gondi"] = "gon", + ["Gone Dau"] = "goo", + ["Gong"] = "ugo", + ["Gongduk"] = "goe", + ["Gonja"] = "gjn", + ["Gooniyandi"] = "gni", + ["Gor"] = "gqr", + ["Gorakor"] = "goc", + ["Gorap"] = "goq", + ["Goreng"] = "xgg", + ["Gorontalo"] = "gor", + ["Gorovu"] = "grq", + ["Gorowa"] = "gow", + ["Gothic"] = "got", + ["Gottscheerish"] = "gmw-gts", + ["Goundo"] = "goy", + ["Gourmanchéma"] = "gux", + ["Gowlan"] = "goj", + ["Gowro"] = "gwf", + ["Gozarkhani"] = "goz", + ["Grangali"] = "nli", + ["Grass Koiari"] = "kbk", + ["Grebo"] = "grb", + ["Greek"] = "el", + ["Greek Sign Language"] = "gss", + ["Green Gelao"] = "giq", + ["Green Hmong"] = "hnj", + ["Greenlandic"] = "kl", + ["Grenadian Creole English"] = "gcl", + ["Gresi"] = "grs", + ["Groma"] = "gro", + ["Gros Ventre"] = "ats", + ["Gua"] = "gwx", + ["Guahibo"] = "guh", + ["Guajajára"] = "gub", + ["Guajá"] = "gvj", + ["Guambiano"] = "gum", + ["Guamo"] = "sai-gmo", + ["Guanano"] = "gvc", + ["Guanche"] = "gnc", + ["Guanyinqiao"] = "jiq", + ["Guaraní"] = "gn", + ["Guarayu"] = "gyr", + ["Guatemalan Sign Language"] = "gsm", + ["Guató"] = "gta", + ["Guayabero"] = "guo", + ["Guazacapán"] = "nai-guz", + ["Gudang"] = "xgd", + ["Gudanji"] = "nji", + ["Gude"] = "gde", + ["Gudu"] = "gdu", + ["Guduf-Gava"] = "gdf", + ["Guerrero Amuzgo"] = "amu", + ["Guerrero Nahuatl"] = "ngu", + ["Guevea de Humboldt Zapotec"] = "zpg", + ["Gugadj"] = "ggd", + ["Gugu Badhun"] = "gdc", + ["Gugu Warra"] = "wrw", + ["Guhu-Samane"] = "ghs", + ["Guianese Creole"] = "gcr", + ["Guiberoua Bété"] = "bet", + ["Guinau"] = "awd-gnu", + ["Guinea Kpelle"] = "gkp", + ["Guinea-Bissau Creole"] = "pov", + ["Guinean Sign Language"] = "gus", + ["Guiqiong"] = "gqi", + ["Gujarati"] = "gu", + ["Gula"] = "glu", + ["Gula'alaa"] = "gmb", + ["Gulay"] = "gvl", + ["Gule"] = "gly", + ["Gulf Arabic"] = "afb", + ["Gullah"] = "gul", + ["Gumalu"] = "gmu", + ["Gumatj"] = "gnn", + ["Gumawana"] = "gvs", + ["Gumuz"] = "guk", + ["Gun"] = "guw", + ["Gundi"] = "gdi", + ["Gunditjmara"] = "gjm", + ["Gundungurra"] = "xrd", + ["Gungabula"] = "gyf", + ["Gungu"] = "rub", + ["Guntai"] = "gnt", + ["Gunu"] = "yas", + ["Gunwinggu"] = "gup", + ["Gunya"] = "gyy", + ["Gupa-Abawa"] = "gpa", + ["Gupapuyngu"] = "guf", + ["Gur Lama"] = "las", + ["Guragone"] = "gge", + ["Guramalum"] = "grz", + ["Gurani"] = "hac", + ["Gureng Gureng"] = "gnr", + ["Gurgula"] = "ggg", + ["Guriaso"] = "grx", + ["Gurindji"] = "gue", + ["Gurjar Apabhramsa"] = "inc-gup", + ["Gurmana"] = "gvm", + ["Guro"] = "goa", + ["Guruntum"] = "grd", + ["Gusan"] = "gsn", + ["Gusii"] = "guz", + ["Gusilay"] = "gsl", + ["Gutnish"] = "gmq-gut", + ["Guugu Yimidhirr"] = "kky", + ["Guwa"] = "xgw", + ["Guwamu"] = "gwu", + ["Guwar"] = "aus-guw", + ["Guya"] = "gka", + ["Guyanese Creole English"] = "gyn", + ["Guyani"] = "gvy", + ["Guébie"] = "gie", + ["Gvoko"] = "ngs", + ["Gwa"] = "gwb", + ["Gwahatike"] = "dah", + ["Gwak"] = "jgk", + ["Gwamhi-Wuri"] = "bga", + ["Gwandara"] = "gwn", + ["Gwara"] = "alv-gwa", + ["Gweda"] = "grw", + ["Gweno"] = "gwe", + ["Gwere"] = "gwr", + ["Gwich'in"] = "gwi", + ["Gyalsumdo"] = "gyo", + ["Gyele"] = "gyi", + ["Gyem"] = "gye", + ["Güenoa"] = "sai-gue", + ["Habu"] = "hbu", + ["Hadiyya"] = "hdy", + ["Hadothi"] = "hoj", + ["Hadrami"] = "xhd", + ["Hadza"] = "hts", + ["Haeke"] = "aek", + ["Hahon"] = "hah", + ["Haida"] = "hai", + ["Haigwai"] = "hgw", + ["Hainyaxo Bozo"] = "bzx", + ["Haiphong Sign Language"] = "haf", + ["Haisla"] = "has", + ["Haitian Creole"] = "ht", + ["Haitian Vodoun Culture Language"] = "hvc", + ["Haiǁom"] = "hgm", + ["Haji"] = "hji", + ["Hajong"] = "haj", + ["Hakka"] = "hak", + ["Hakö"] = "hao", + ["Halang"] = "hal", + ["Halang Doan"] = "hld", + ["Halbi"] = "hlb", + ["Halia"] = "hla", + ["Halkomelem"] = "hur", + ["Hamap"] = "hmu", + ["Hamba"] = "hba", + ["Hamer-Banna"] = "amf", + ["Hamtai"] = "hmt", + ["Hanga"] = "hag", + ["Hanga Hundi"] = "wos", + ["Hani"] = "hni", + ["Hanoi Sign Language"] = "hab", + ["Hanunoo"] = "hnn", + ["Harami"] = "xha", + ["Harari"] = "har", + ["Haraza"] = "nub-har", + ["Harijan Kinnauri"] = "kjo", + ["Haroi"] = "hro", + ["Harsusi"] = "hss", + ["Haruai"] = "tmd", + ["Haruku"] = "hrk", + ["Haryanvi"] = "bgc", + ["Harzani"] = "hrz", + ["Hasaitic"] = "sem-has", + ["Hasha"] = "ybj", + ["Hassaniya"] = "mey", + ["Hatam"] = "had", + ["Hattic"] = "xht", + ["Hausa"] = "ha", + ["Hausa Sign Language"] = "hsl", + ["Haush"] = "sai-hau", + ["Havasupai-Walapai-Yavapai"] = "yuf", + ["Haveke"] = "hvk", + ["Havu"] = "hav", + ["Hawai'i Pidgin Sign Language"] = "hps", + ["Hawaiian"] = "haw", + ["Hawaiian Creole"] = "hwc", + ["Haya"] = "hay", + ["Hazaragi"] = "haz", + ["Hdi"] = "xed", + ["Hebrew"] = "he", + ["Hehe"] = "heh", + ["Heiban"] = "hbn", + ["Heiltsuk"] = "hei", + ["Helong"] = "heg", + ["Helu"] = "elu-prk", + ["Hema"] = "nix", + ["Hemba"] = "hem", + ["Herdé"] = "hed", + ["Herero"] = "hz", + ["Hermit"] = "llf", + ["Hernican"] = "xhr", + ["Hewa"] = "ham", + ["Heyo"] = "auk", + ["Hibito"] = "hib", + ["Hidatsa"] = "hid", + ["Higaonon"] = "mba", + ["Highland Konjo"] = "kjk", + ["Highland Oaxaca Chontal"] = "chd", + ["Highland Popoluca"] = "poi", + ["Highland Puebla Nahuatl"] = "azz", + ["Highland Totonac"] = "tos", + ["Hijazi Arabic"] = "acw", + ["Hijuk"] = "hij", + ["Hiligaynon"] = "hil", + ["Hill Maria"] = "mrr", + ["Himarimã"] = "hir", + ["Himyaritic"] = "sem-him", + ["Hindi"] = "hi", + ["Hindi Dogri"] = "dgo", + ["Hinduri"] = "hii", + ["Hinukh"] = "gin", + ["Hiri Motu"] = "ho", + ["Hismaic"] = "sem-his", + ["Hitchiti"] = "nai-hit", + ["Hittite"] = "hit", + ["Hitu"] = "htu", + ["Hiw"] = "hiw", + ["Hixkaryana"] = "hix", + ["Hlai"] = "lic", + ["Hlepho Phowa"] = "yhl", + ["Hlersu"] = "hle", + ["Hmar"] = "hmr", + ["Hmong Don"] = "hmf", + ["Hmong Dô"] = "hmv", + ["Hmong Shua"] = "hmz", + ["Hmwaveke"] = "mrk", + ["Ho"] = "hoc", + ["Ho Chi Minh City Sign Language"] = "hos", + ["Hoava"] = "hoa", + ["Hobyót"] = "hoh", + ["Hoia Hoia"] = "hhi", + ["Holikachuk"] = "hoi", + ["Holiya"] = "hoy", + ["Holma"] = "hod", + ["Holoholo"] = "hoo", + ["Holu"] = "hol", + ["Homa"] = "hom", + ["Honduran Lenca"] = "len", + ["Honduras Sign Language"] = "hds", + ["Hone"] = "juh", + ["Hong Kong Sign Language"] = "hks", + ["Honi"] = "how", + ["Hopi"] = "hop", + ["Horned Miao"] = "hrm", + ["Horo"] = "hor", + ["Horom"] = "hoe", + ["Horpa"] = "ero", + ["Hote"] = "hot", + ["Hoti"] = "hti", + ["Hovongan"] = "hov", + ["Hoyahoya"] = "hhy", + ["Hozo"] = "hoz", + ["Hpon"] = "hpo", + ["Hrangkhol"] = "hra", + ["Hre"] = "hre", + ["Hruso"] = "hru", + ["Hu"] = "huo", + ["Huachipaeri"] = "hug", + ["Huambisa"] = "hub", + ["Huaorani"] = "auc", + ["Huarijio"] = "var", + ["Huaulu"] = "hud", + ["Huautla Mazatec"] = "mau", + ["Huave"] = "huv", + ["Huaxcaleca Nahuatl"] = "nhq", + ["Huba"] = "hbb", + ["Huehuetla Tepehua"] = "tee", + ["Huetar"] = "cba-hue", + ["Huichol"] = "hch", + ["Huilliche"] = "huh", + ["Huitepec Mixtec"] = "mxs", + ["Huizhou"] = "czh", + ["Hukumina"] = "huw", + ["Hula"] = "hul", + ["Hulaulá"] = "huy", + ["Huli"] = "hui", + ["Hulung"] = "huk", + ["Humburi Senni"] = "hmb", + ["Humene"] = "huf", + ["Hun"] = "uth", + ["Hunde"] = "hke", + ["Hung"] = "hnu", + ["Hungana"] = "hum", + ["Hungarian"] = "hu", + ["Hungarian Sign Language"] = "hsh", + ["Hungworo"] = "nat", + ["Hunjara-Kaina Ke"] = "hkk", + ["Hunnic"] = "xhc", + ["Hunsrik"] = "hrx", + ["Hunzib"] = "huz", + ["Hupa"] = "hup", + ["Hupdë"] = "jup", + ["Hupla"] = "hap", + ["Hurrian"] = "xhu", + ["Hutterisch"] = "geh", + ["Hwana"] = "hwo", + ["Hya"] = "hya", + ["Hyam"] = "jab", + ["Hän"] = "haa", + ["Hértevin"] = "hrt", + ["I-Wak"] = "iwk", + ["Iaai"] = "iai", + ["Iamalele"] = "yml", + ["Iatmul"] = "ian", + ["Iau"] = "tmu", + ["Ibali Teke"] = "tek", + ["Ibaloi"] = "ibl", + ["Iban"] = "iba", + ["Ibanag"] = "ibg", + ["Ibani"] = "iby", + ["Ibatan"] = "ivb", + ["Iberian"] = "xib", + ["Ibibio"] = "ibb", + ["Ibino"] = "ibn", + ["Iboko"] = "bkp", + ["Ibu"] = "ibu", + ["Ibuoro"] = "ibr", + ["Icelandic"] = "is", + ["Icelandic Sign Language"] = "icl", + ["Iceve-Maci"] = "bec", + ["Ida'an"] = "dbj", + ["Idakho-Isukha-Tiriki"] = "ida", + ["Idaté"] = "idt", + ["Idere"] = "ide", + ["Idesa"] = "ids", + ["Idi"] = "idi", + ["Ido"] = "io", + ["Idoma"] = "idu", + ["Idon"] = "idc", + ["Idu"] = "clk", + ["Idun"] = "ldb", + ["Iduna"] = "viv", + ["Ifo"] = "iff", + ["Ifè"] = "ife", + ["Igala"] = "igl", + ["Igana"] = "igg", + ["Igbo"] = "ig", + ["Igede"] = "ige", + ["Ignaciano"] = "ign", + ["Igo"] = "ahl", + ["Iguta"] = "nar", + ["Igwe"] = "igw", + ["Iha"] = "ihp", + ["Ihievbe"] = "ihi", + ["Ija-Zuba"] = "vki", + ["Ik"] = "ikx", + ["Ika"] = "ikk", + ["Ikaranggal"] = "ikr", + ["Ikizu"] = "ikz", + ["Iko"] = "iki", + ["Ikobi-Mena"] = "meb", + ["Ikoma"] = "ntk", + ["Ikpeng"] = "txi", + ["Ikpeshi"] = "ikp", + ["Ikposo"] = "kpo", + ["Iku-Gora-Ankwa"] = "ikv", + ["Ikulu"] = "ikl", + ["Ikwere"] = "ikw", + ["Ikwo"] = "iqw", + ["Ila"] = "ilb", + ["Ile Ape"] = "ila", + ["Ilgar"] = "ilg", + ["Ili Turki"] = "ili", + ["Ili'uun"] = "ilu", + ["Ilianen Manobo"] = "mbi", + ["Illyrian"] = "xil", + ["Ilocano"] = "ilo", + ["Ilongot"] = "ilk", + ["Ilue"] = "ilv", + ["Ilwana"] = "mlk", + ["Imbongu"] = "imo", + ["Imonda"] = "imn", + ["Imroing"] = "imr", + ["Inabaknon"] = "abx", + ["Inapang"] = "mzu", + ["Inari Sami"] = "smn", + ["Indanga"] = "bnt-ind", + ["Indian Sign Language"] = "ins", + ["Indo-Portuguese"] = "idb", + ["Indonesian"] = "id", + ["Indonesian Bajau"] = "bdl", + ["Indonesian Sign Language"] = "inl", + ["Indri"] = "idr", + ["Indus Kohistani"] = "mvy", + ["Indus Valley Language"] = "xiv", + ["Inebu One"] = "oin", + ["Ineseño"] = "inz", + ["Inga"] = "inb", + ["Ingrian"] = "izh", + ["Ingush"] = "inh", + ["Inlaod Itneg"] = "iti", + ["Inoke-Yate"] = "ino", + ["Inonhan"] = "loc", + ["Inor"] = "ior", + ["Inpui Naga"] = "nkf", + ["Interlingua"] = "ia", + ["Interlingue"] = "ie", + ["International Sign"] = "ils", + ["Intha"] = "int", + ["Inuinnaqtun"] = "esx-inq", + ["Inuit Sign Language"] = "iks", + ["Inuktitut"] = "iu", + ["Inuktun"] = "esx-ink", + ["Inupiaq"] = "ik", + ["Inuvialuktun"] = "ikt", + ["Ipai"] = "nai-ipa", + ["Ipalapa Amuzgo"] = "azm", + ["Ipiko"] = "ipo", + ["Ipili"] = "ipi", + ["Ipulo"] = "ass", + ["Iquito"] = "iqu", + ["Ir"] = "irr", + ["Irantxe"] = "irn", + ["Iranun"] = "ill", + ["Iraqi Arabic"] = "acm", + ["Iraqw"] = "irk", + ["Irarutu"] = "irh", + ["Iraya"] = "iry", + ["Iresim"] = "ire", + ["Iriga Bicolano"] = "bto", + ["Irish"] = "ga", + ["Irish Sign Language"] = "isg", + ["Irula"] = "iru", + ["Isabi"] = "isa", + ["Isan"] = "tts", + ["Isanzu"] = "isn", + ["Isarog Agta"] = "agk", + ["Isaurian"] = "und-isa", + ["Isconahua"] = "isc", + ["Isebe"] = "igo", + ["Isekiri"] = "its", + ["Ishkashimi"] = "isk", + ["Isinai"] = "inn", + ["Isirawa"] = "srl", + ["Island Carib"] = "crb", + ["Islander Creole English"] = "icr", + ["Isnag"] = "isd", + ["Isoko"] = "iso", + ["Israeli Sign Language"] = "isr", + ["Isthmus Mixe"] = "mir", + ["Isthmus Zapotec"] = "zai", + ["Istriot"] = "ist", + ["Istro-Romanian"] = "ruo", + ["Isu"] = "isu", + ["Isubu"] = "szv", + ["Italian"] = "it", + ["Italian Sign Language"] = "ise", + ["Italiot Greek"] = "grk-ita", + ["Itawit"] = "itv", + ["Itelmen"] = "itl", + ["Itene"] = "ite", + ["Iteri"] = "itr", + ["Itik"] = "itx", + ["Ito"] = "itw", + ["Itonama"] = "ito", + ["Itu Mbon Uzo"] = "itm", + ["Itundujia Mixtec"] = "mce", + ["Itzá"] = "itz", + ["Iu Mien"] = "ium", + ["Ivatan"] = "ivv", + ["Iwaidja"] = "ibd", + ["Iwal"] = "kbm", + ["Iwam"] = "iwm", + ["Iwur"] = "iwo", + ["Ixcatec"] = "ixc", + ["Ixcatlán Mazatec"] = "mzi", + ["Ixil"] = "ixl", + ["Ixtayutla Mixtec"] = "vmj", + ["Ixtenco Otomi"] = "otz", + ["Iyayu"] = "iya", + ["Iyive"] = "uiv", + ["Iyo"] = "nca", + ["Iyo'wujwa Chorote"] = "crq", + ["Iyojwa'ja Chorote"] = "crt", + ["Izere"] = "izr", + ["Izi"] = "izz", + ["Izi-Ezaa-Ikwo-Mgbo"] = "izi", + ["Izon"] = "ijc", + ["Izora"] = "cbo", + ["Iñapari"] = "inp", + ["Jabem"] = "jae", + ["Jabutí"] = "jbt", + ["Jad"] = "jda", + ["Jadgali"] = "jdg", + ["Jah Hut"] = "jah", + ["Jahanka"] = "jad", + ["Jair Awyu"] = "awv", + ["Jakaltek"] = "jac", + ["Jakati"] = "jat", + ["Jalapa de Díaz Mazatec"] = "maj", + ["Jalkunan"] = "bxl", + ["Jamaican Country Sign Language"] = "jcs", + ["Jamaican Creole"] = "jam", + ["Jamaican Sign Language"] = "jls", + ["Jamamadí"] = "jaa", + ["Jambi Malay"] = "jax", + ["Jamiltepec Mixtec"] = "mxt", + ["Jaminjung"] = "djd", + ["Jamsay"] = "djm", + ["Jamtish"] = "gmq-jmk", + ["Jandavra"] = "jnd", + ["Janday"] = "jan", + ["Jangkang"] = "djo", + ["Jangshung"] = "jna", + ["Janji"] = "jni", + ["Japanese"] = "ja", + ["Japanese Sign Language"] = "jsl", + ["Japhug"] = "sit-jap", + ["Japrería"] = "jru", + ["Jaqaru"] = "jqr", + ["Jara"] = "jaf", + ["Jarai"] = "jra", + ["Jarawa"] = "anq", + ["Jaru"] = "ddj", + ["Jassic"] = "ysc", + ["Jaunsari"] = "jns", + ["Javanese"] = "jv", + ["Javindo"] = "jvd", + ["Jawe"] = "jaz", + ["Jaya"] = "jyy", + ["Jebero"] = "jeb", + ["Jeh"] = "jeh", + ["Jehai"] = "jhi", + ["Jeikó"] = "sai-jko", + ["Jeju"] = "jje", + ["Jemez"] = "tow", + ["Jenaama Bozo"] = "bze", + ["Jeng"] = "jeg", + ["Jennu Kurumba"] = "xuj", + ["Jere"] = "jer", + ["Jeri Kuo"] = "jek", + ["Jersey Dutch"] = "gmw-jdt", + ["Jeru"] = "akj", + ["Jerung"] = "jee", + ["Jhankot Sign Language"] = "jhs", + ["Jiamao"] = "jio", + ["Jiba"] = "juo", + ["Jibu"] = "jib", + ["Jicarilla"] = "apj", + ["Jiiddu"] = "jii", + ["Jilbe"] = "jie", + ["Jili"] = "mgi", + ["Jilim"] = "jil", + ["Jimi"] = "jmi", + ["Jimjimen"] = "jim", + ["Jin"] = "cjy", + ["Jina"] = "jia", + ["Jingpho"] = "kac", + ["Jingulu"] = "jig", + ["Jiongnai Bunu"] = "pnu", + ["Jirajara"] = "sai-jrj", + ["Jirel"] = "jul", + ["Jiru"] = "jrr", + ["Jita"] = "jit", + ["Jju"] = "kaj", + ["Joba"] = "job", + ["Jofotek-Bromnya"] = "jbr", + ["Jola-Fonyi"] = "dyo", + ["Jola-Kasa"] = "csk", + ["Jonkor Bourmataguil"] = "jeu", + ["Jordanian Sign Language"] = "jos", + ["Jorá"] = "jor", + ["Jowulu"] = "jow", + ["Ju"] = "juu", + ["Juang"] = "jun", + ["Juba Arabic"] = "pga", + ["Judeo-Arabic"] = "jrb", + ["Judeo-Berber"] = "jbe", + ["Judeo-Iraqi Arabic"] = "yhd", + ["Judeo-Italian"] = "itk", + ["Judeo-Moroccan Arabic"] = "aju", + ["Judeo-Persian"] = "jpr", + ["Judeo-Tat"] = "jdt", + ["Judeo-Tripolitanian Arabic"] = "yud", + ["Judeo-Tunisian Arabic"] = "ajt", + ["Judeo-Yemeni Arabic"] = "jye", + ["Jukun Takum"] = "jbu", + ["Jumaytepeque"] = "nai-jum", + ["Jumjum"] = "jum", + ["Jumla Sign Language"] = "jus", + ["Jumli"] = "jml", + ["Jungle Inga"] = "inj", + ["Juquila Mixe"] = "mxq", + ["Jur Modo"] = "bex", + ["Juray"] = "juy", + ["Jurchen"] = "juc", + ["Jurúna"] = "jur", + ["Jutiapa"] = "nai-jtp", + ["Jutish"] = "jut", + ["Juwal"] = "mwb", + ["Juxtlahuaca Mixtec"] = "vmc", + ["Juǀ'hoan"] = "ktz", + ["Jwira-Pepesa"] = "jwi", + ["Júma"] = "jua", + ["K'iche'"] = "quc", + ["Kaamba"] = "xku", + ["Kaan"] = "ldl", + ["Kaang Chin"] = "ckn", + ["Kaansa"] = "gna", + ["Kaapor Sign Language"] = "uks", + ["Kaba"] = "ksp", + ["Kabalai"] = "kvf", + ["Kabardian"] = "kbd", + ["Kabatei"] = "xkp", + ["Kabba-Laka"] = "lap", + ["Kabishiana"] = "tup-kab", + ["Kabiyé"] = "kbp", + ["Kabola"] = "klz", + ["Kabore One"] = "onk", + ["Kabras"] = "lkb", + ["Kaburi"] = "uka", + ["Kabutra"] = "kbu", + ["Kabuverdianu"] = "kea", + ["Kabwa"] = "cwa", + ["Kabwari"] = "kcw", + ["Kabyle"] = "kab", + ["Kachama-Ganjule"] = "kcx", + ["Kachari"] = "xac", + ["Kachchi"] = "kfr", + ["Kachi Koli"] = "gjk", + ["Kacipo-Balesi"] = "koe", + ["Kaco'"] = "xkk", + ["Kadai"] = "kzd", + ["Kadar"] = "kej", + ["Kadara"] = "kad", + ["Kadaru"] = "kdu", + ["Kadiwéu"] = "kbc", + ["Kado"] = "kdv", + ["Kadugli"] = "xtc", + ["Kaduo"] = "ktp", + ["Kaera"] = "jka", + ["Kafa"] = "kbr", + ["Kafoa"] = "kpu", + ["Kagan Kalagan"] = "kll", + ["Kagate"] = "syw", + ["Kagayanen"] = "cgc", + ["Kagoma"] = "kdm", + ["Kagoro"] = "xkg", + ["Kagulu"] = "kki", + ["Kahe"] = "hka", + ["Kahua"] = "agw", + ["Kaian"] = "kct", + ["Kaibobo"] = "kzb", + ["Kaidipang"] = "kzp", + ["Kaiep"] = "kbw", + ["Kaikadi"] = "kep", + ["Kaike"] = "kzq", + ["Kaiku"] = "kkq", + ["Kaimbulawa"] = "zka", + ["Kaimbé"] = "xai", + ["Kaingang"] = "kgp", + ["Kairak"] = "ckr", + ["Kairiru"] = "kxa", + ["Kairui-Midiki"] = "krd", + ["Kais"] = "kzm", + ["Kaivi"] = "kce", + ["Kaiwá"] = "kgk", + ["Kaiy"] = "tcq", + ["Kajakse"] = "ckq", + ["Kajali"] = "xkj", + ["Kajaman"] = "kag", + ["Kakabai"] = "kqf", + ["Kakabe"] = "kke", + ["Kakanda"] = "kka", + ["Kaki Ae"] = "tbd", + ["Kakihum"] = "kxe", + ["Kako"] = "kkj", + ["Kakwa"] = "keo", + ["Kala"] = "kcl", + ["Kala Lagaw Ya"] = "mwp", + ["Kalaamaya"] = "lkm", + ["Kalabakan"] = "kve", + ["Kalabari"] = "ijn", + ["Kalabra"] = "kzz", + ["Kalagan"] = "kqe", + ["Kalaktang Monpa"] = "kkf", + ["Kalam"] = "kmh", + ["Kalami"] = "gwc", + ["Kalamsé"] = "knz", + ["Kalanadi"] = "wkl", + ["Kalanga"] = "kck", + ["Kalao"] = "kly", + ["Kalapuya"] = "kyl", + ["Kalarko"] = "kba", + ["Kalasha"] = "kls", + ["Kalasuri"] = "xme-kls", + ["Kalenjin"] = "kln", + ["Kalkatungu"] = "ktg", + ["Kalkoti"] = "xka", + ["Kalmyk"] = "xal", + ["Kalo Finnish Romani"] = "rmf", + ["Kalou"] = "ywa", + ["Kaluli"] = "bco", + ["Kalumpang"] = "kli", + ["Kam"] = "kdx", + ["Kamakan"] = "vkm", + ["Kamang"] = "woi", + ["Kamano"] = "kbq", + ["Kamantan"] = "kci", + ["Kamar"] = "keq", + ["Kamara"] = "jmr", + ["Kamarian"] = "kzx", + ["Kamaru"] = "kgx", + ["Kamarupi Prakrit"] = "inc-kam", + ["Kamasa"] = "klp", + ["Kamasau"] = "kms", + ["Kamassian"] = "xas", + ["Kamayo"] = "kyk", + ["Kamayurá"] = "kay", + ["Kamba"] = "kam", + ["Kambaata"] = "ktb", + ["Kambaira"] = "kyy", + ["Kambera"] = "xbr", + ["Kamberataro"] = "kbv", + ["Kamberau"] = "irx", + ["Kambiwá"] = "xbw", + ["Kami"] = "kmi", + ["Kamkata-viri"] = "bsh", + ["Kamo"] = "kcq", + ["Kamoro"] = "kgq", + ["Kamta"] = "rkt", + ["Kamu"] = "xmu", + ["Kamula"] = "xla", + ["Kamwe"] = "hig", + ["Kanakanabu"] = "xnb", + ["Kanakuru"] = "kna", + ["Kanamari"] = "knm", + ["Kanashi"] = "xns", + ["Kanasi"] = "soq", + ["Kandas"] = "kqw", + ["Kandawo"] = "gam", + ["Kande"] = "kbs", + ["Kang"] = "kyp", + ["Kanga"] = "kcp", + ["Kangean"] = "kkv", + ["Kanggape"] = "igm", + ["Kangjia"] = "kxs", + ["Kango"] = "kty", + ["Kango-Sua"] = "kzy", + ["Kangri"] = "xnr", + ["Kaniet"] = "ktk", + ["Kanikkaran"] = "kev", + ["Kaningdon-Nindem"] = "kdp", + ["Kaningi"] = "kzo", + ["Kaningra"] = "knr", + ["Kaninuwa"] = "wat", + ["Kanite"] = "kmu", + ["Kanjari"] = "kft", + ["Kanju"] = "kbe", + ["Kankanaey"] = "kne", + ["Kannada"] = "kn", + ["Kannada Kurumba"] = "kfi", + ["Kannauji"] = "bjj", + ["Kanowit"] = "kxn", + ["Kanoé"] = "kxo", + ["Kansa"] = "ksk", + ["Kantosi"] = "xkt", + ["Kanu"] = "khx", + ["Kanufi"] = "kni", + ["Kanuri"] = "kr", + ["Kanyok"] = "kny", + ["Kao"] = "kax", + ["Kaonde"] = "kqn", + ["Kap"] = "ykm", + ["Kapampangan"] = "pam", + ["Kapauri"] = "khp", + ["Kapin"] = "tbx", + ["Kapinawá"] = "xpn", + ["Kapingamarangi"] = "kpg", + ["Kapriman"] = "dju", + ["Kaptiau"] = "kbi", + ["Kapya"] = "klo", + ["Kaqchikel"] = "cak", + ["Kara (New Guinea)"] = "leu", + ["Kara (Tanzania)"] = "reg", + ["Karachay-Balkar"] = "krc", + ["Karadjeri"] = "gbd", + ["Karaga Mandaya"] = "mry", + ["Karaim"] = "kdr", + ["Karajá"] = "kpj", + ["Karakalpak"] = "kaa", + ["Karakhanid"] = "xqa", + ["Karami"] = "xar", + ["Karamojong"] = "kdj", + ["Karang"] = "kzr", + ["Karanga"] = "kth", + ["Karankawa"] = "zkk", + ["Karao"] = "kyj", + ["Karas"] = "kgv", + ["Karata"] = "kpt", + ["Karawa"] = "xrw", + ["Karbi"] = "mjw", + ["Kare (Africa)"] = "kbn", + ["Kare (New Guinea)"] = "kmf", + ["Karekare"] = "kai", + ["Karelian"] = "krl", + ["Karey"] = "kyd", + ["Kari"] = "kbj", + ["Karingani"] = "kgn", + ["Karipuna"] = "kuq", + ["Karipúna"] = "kgm", + ["Karipúna Creole French"] = "kmv", + ["Kariri"] = "kzw", + ["Karitiâna"] = "ktn", + ["Kariya"] = "kil", + ["Kariyarra"] = "vka", + ["Karkar-Yuri"] = "yuj", + ["Karkin"] = "krb", + ["Karko"] = "kko", + ["Karnai"] = "bbv", + ["Karo"] = "kxh", + ["Karo Batak"] = "btx", + ["Karok"] = "kyh", + ["Karolanos"] = "kyn", + ["Karon"] = "krx", + ["Karon Dori"] = "kgw", + ["Karore"] = "xkx", + ["Karranga"] = "xrq", + ["Karuwali"] = "rxw", + ["Kasanga"] = "ccj", + ["Kasem"] = "xsm", + ["Kashaya"] = "kju", + ["Kashmiri"] = "ks", + ["Kashubian"] = "csb", + ["Kasiguranin"] = "ksn", + ["Kaska"] = "kkz", + ["Kaskean"] = "zsk", + ["Kaskihá"] = "gva", + ["Kassite"] = "und-kas", + ["Kassonke"] = "kao", + ["Kasua"] = "khs", + ["Kataang"] = "kgd", + ["Katabaga"] = "ktq", + ["Katawixi"] = "xat", + ["Katembri"] = "sai-kat", + ["Kathlamet"] = "nai-kat", + ["Kathoriya Tharu"] = "tkt", + ["Kathu"] = "ykt", + ["Katkari"] = "kfu", + ["Katla"] = "kcr", + ["Kato"] = "ktw", + ["Katso"] = "kaf", + ["Katua"] = "kta", + ["Katukina"] = "knt", + ["Kaulong"] = "pss", + ["Kaur"] = "vkk", + ["Kaure"] = "bpp", + ["Kaurna"] = "zku", + ["Kauwera"] = "xau", + ["Kavalan"] = "ckv", + ["Kavet"] = "krv", + ["Kawacha"] = "kcb", + ["Kawaiisu"] = "xaw", + ["Kawe"] = "kgb", + ["Kawishana"] = "awd-kaw", + ["Kawésqar"] = "alc", + ["Kaxararí"] = "ktx", + ["Kaxuyana"] = "kbb", + ["Kaya"] = "zra", + ["Kayabí"] = "kyz", + ["Kayagar"] = "kyt", + ["Kayan"] = "pdu", + ["Kayan Mahakam"] = "xay", + ["Kayan River Kayan"] = "xkn", + ["Kayapa Kallahan"] = "kak", + ["Kayapó"] = "txu", + ["Kayardild"] = "gyd", + ["Kayeli"] = "kzl", + ["Kayong"] = "kxy", + ["Kayort"] = "kyv", + ["Kaytetye"] = "gbb", + ["Kayupulau"] = "kzu", + ["Kazakh"] = "kk", + ["Kazukuru"] = "kzk", + ["Ke'o"] = "xxk", + ["Keak"] = "keh", + ["Keapara"] = "khz", + ["Kedah Malay"] = "meo", + ["Kedang"] = "ksx", + ["Keder"] = "kdy", + ["Kehu"] = "khh", + ["Kei"] = "kei", + ["Keiga"] = "kec", + ["Kein"] = "bmh", + ["Keiyo"] = "eyo", + ["Kela-Yela"] = "kel", + ["Kelabit"] = "kzi", + ["Keley-I Kallahan"] = "ify", + ["Keliko"] = "kbo", + ["Kelo"] = "xel", + ["Kelon"] = "kyo", + ["Kemak"] = "kem", + ["Kembayan"] = "xem", + ["Kemberano"] = "bzp", + ["Kembra"] = "xkw", + ["Kemezung"] = "dmo", + ["Kemi Sami"] = "sjk", + ["Kemiehua"] = "kfj", + ["Kemtuik"] = "kmt", + ["Kenaboi"] = "xbn", + ["Kenati"] = "gat", + ["Kendayan"] = "knx", + ["Kendeje"] = "klf", + ["Kendem"] = "kvm", + ["Kenga"] = "kyq", + ["Keningau Murut"] = "kxi", + ["Keninjal"] = "knl", + ["Kensiu"] = "kns", + ["Kenswei Nsei"] = "ndb", + ["Kenyan Sign Language"] = "xki", + ["Kenyang"] = "ken", + ["Kenyi"] = "lke", + ["Keoru-Ahia"] = "xeu", + ["Kepkiriwát"] = "kpn", + ["Kepo'"] = "kuk", + ["Kera"] = "ker", + ["Kerak"] = "hhr", + ["Kereho"] = "xke", + ["Kerek"] = "krk", + ["Kerewe"] = "ked", + ["Kerewo"] = "kxz", + ["Kerinci"] = "kvr", + ["Kermanic"] = "xme-ker", + ["Kesawai"] = "xes", + ["Ket"] = "ket", + ["Ketangalan"] = "kae", + ["Kete"] = "kcv", + ["Ketengban"] = "xte", + ["Ketum"] = "ktt", + ["Kewa"] = "kew", + ["Keyagana"] = "kyg", + ["Kgalagadi"] = "xkv", + ["Khakas"] = "kjh", + ["Khalaj"] = "klj", + ["Khaling"] = "klr", + ["Kham"] = "kjl", + ["Khamnigan Mongol"] = "xgn-kha", + ["Khamti"] = "kht", + ["Khamyang"] = "ksu", + ["Khana"] = "ogo", + ["Khandeshi"] = "khn", + ["Khanty"] = "kca", + ["Khao"] = "xao", + ["Kharam Naga"] = "kfw", + ["Kharia"] = "khr", + ["Kharia Thar"] = "ksy", + ["Khasa Prakrit"] = "inc-kha", + ["Khasi"] = "kha", + ["Khayo"] = "lko", + ["Khazar"] = "zkz", + ["Khe"] = "kqg", + ["Khehek"] = "tlx", + ["Khengkha"] = "xkf", + ["Khetrani"] = "xhe", + ["Khezha Naga"] = "nkh", + ["Khiamniungan Naga"] = "kix", + ["Khinalug"] = "kjj", + ["Khirwar"] = "kwx", + ["Khisa"] = "kqm", + ["Khitan"] = "zkt", + ["Khlor"] = "llo", + ["Khlula"] = "ykl", + ["Khmer"] = "km", + ["Khmu"] = "kjg", + ["Khoekhoe"] = "naq", + ["Khoibu Naga"] = "nkb", + ["Khoini"] = "xkc", + ["Kholok"] = "ktc", + ["Kholosi"] = "inc-kho", + ["Khonso"] = "kxc", + ["Khorasani Turkish"] = "kmz", + ["Khorezmian Turkic"] = "zkh", + ["Khotanese"] = "kho", + ["Khowar"] = "khw", + ["Khua"] = "xhv", + ["Khuen"] = "khf", + ["Khumi Chin"] = "cnk", + ["Khvarshi"] = "khv", + ["Khwarezmian"] = "xco", + ["Khwe"] = "xuu", + ["Kháng"] = "kjm", + ["Khün"] = "kkh", + ["Kibala"] = "blv", + ["Kibena"] = "bez", + ["Kibet"] = "kie", + ["Kibiri"] = "prm", + ["Kichwa"] = "qwe-kch", + ["Kickapoo"] = "kic", + ["Kikai"] = "kzg", + ["Kikami"] = "kcu", + ["Kikuyu"] = "ki", + ["Kildin Sami"] = "sjd", + ["Kilit"] = "xme-klt", + ["Kilivila"] = "kij", + ["Kiliwa"] = "klb", + ["Kilmeri"] = "kih", + ["Kim"] = "kia", + ["Kim Mun"] = "mji", + ["Kimaama"] = "kig", + ["Kimaragang"] = "kqr", + ["Kimbu"] = "kiv", + ["Kimbundu"] = "kmb", + ["Kimki"] = "sbt", + ["Kimré"] = "kqp", + ["Kinabalian"] = "cbw", + ["Kinalakna"] = "kco", + ["Kinaray-a"] = "krj", + ["Kinga"] = "zga", + ["Kings River Yokuts"] = "nai-kry", + ["Kinikinao"] = "gqn", + ["Kinnauri"] = "kfk", + ["Kintaq"] = "knq", + ["Kinuku"] = "kkd", + ["Kioko"] = "ues", + ["Kiong"] = "kkm", + ["Kiorr"] = "xko", + ["Kiowa"] = "kio", + ["Kipchak"] = "qwm", + ["Kipfokomo"] = "pkb", + ["Kipsigis"] = "sgc", + ["Kiput"] = "kyi", + ["Kir-Balar"] = "kkr", + ["Kire"] = "geb", + ["Kirfi"] = "kks", + ["Kirike"] = "okr", + ["Kirikiri"] = "kiy", + ["Kirya-Konzel"] = "fkk", + ["Kis"] = "kis", + ["Kisa"] = "lks", + ["Kisan"] = "xis", + ["Kisankasa"] = "kqh", + ["Kisar"] = "kje", + ["Kisi"] = "kiz", + ["Kistane"] = "gru", + ["Kita Maninkakan"] = "mwk", + ["Kitanemuk"] = "azc-ktn", + ["Kitembo"] = "tbt", + ["Kitja"] = "gia", + ["Kitsai"] = "kii", + ["Kituba"] = "ktu", + ["Kiunum"] = "wei", + ["Kla"] = "lda", + ["Klallam"] = "clm", + ["Klamath-Modoc"] = "kla", + ["Klao"] = "klu", + ["Klias River Kadazan"] = "kqt", + ["Klingon"] = "tlh", + ["Knaanic"] = "czk", + ["Ko"] = "fuj", + ["Koalib"] = "kib", + ["Koasati"] = "cku", + ["Koba"] = "kpd", + ["Kobiana"] = "kcj", + ["Kobol"] = "kgu", + ["Kobon"] = "kpw", + ["Koch"] = "kdq", + ["Kochila Tharu"] = "thq", + ["Koda"] = "cdz", + ["Kodaku"] = "ksz", + ["Kodava"] = "kfa", + ["Kodeoha"] = "vko", + ["Kodi"] = "kod", + ["Kodia"] = "kwp", + ["Koenoem"] = "kcs", + ["Kofa"] = "kso", + ["Kofei"] = "kpi", + ["Kofyar"] = "kwl", + ["Kohin"] = "kkx", + ["Kohistani Shina"] = "plk", + ["Koho"] = "kpm", + ["Kohumono"] = "bcs", + ["Koi"] = "kkt", + ["Koibal"] = "zkb", + ["Koireng"] = "nkd", + ["Koitabu"] = "kqi", + ["Koiwat"] = "kxt", + ["Kok-Nar"] = "gko", + ["Kok-Paponk"] = "okg", + ["Kokata"] = "ktd", + ["Kokborok"] = "trp", + ["Koke"] = "kou", + ["Koko-Bera"] = "kkp", + ["Kokoda"] = "xod", + ["Kokola"] = "kzn", + ["Kokota"] = "kkk", + ["Kol (Cameroon)"] = "biw", + ["Kol (New Guinea)"] = "kol", + ["Kola"] = "kvv", + ["Kolami"] = "kfb", + ["Kolbila"] = "klc", + ["Kolhe"] = "ekl", + ["Kolibugan Subanon"] = "skn", + ["Kolom"] = "klm", + ["Koluwawa"] = "klx", + ["Kom (Cameroon)"] = "bkm", + ["Kom (India)"] = "kmm", + ["Koma"] = "kmy", + ["Komba"] = "kpf", + ["Kombai"] = "tyn", + ["Kombio"] = "xbi", + ["Komering"] = "kge", + ["Komi-Permyak"] = "koi", + ["Komi-Zyrian"] = "kpv", + ["Kominimung"] = "xoi", + ["Komo"] = "xom", + ["Komodo"] = "kvh", + ["Kompane"] = "kvp", + ["Komyandaret"] = "kzv", + ["Kon Keu"] = "kkn", + ["Konabéré"] = "bbo", + ["Konai"] = "kxw", + ["Konda"] = "knd", + ["Konda-Dora"] = "kfc", + ["Kondekor"] = "gau", + ["Koneraw"] = "kdw", + ["Kongo"] = "kg", + ["Konkani"] = "kok", + ["Konkomba"] = "xon", + ["Konni"] = "kma", + ["Kono (Guinea)"] = "knu", + ["Kono (Nigeria)"] = "klk", + ["Kono (Sierra Leone)"] = "kno", + ["Konomala"] = "koa", + ["Konomihu"] = "nai-knm", + ["Konongo"] = "kcz", + ["Konyak Naga"] = "nbe", + ["Konyanka Maninka"] = "mku", + ["Konzo"] = "koo", + ["Koonzime"] = "ozm", + ["Koorete"] = "kqy", + ["Kopar"] = "xop", + ["Kopkaka"] = "opk", + ["Korafe-Yegha"] = "kpr", + ["Korak"] = "koz", + ["Korana"] = "kqz", + ["Korandje"] = "kcy", + ["Korean"] = "ko", + ["Korean Sign Language"] = "kvk", + ["Koreguaje"] = "coe", + ["Koresh-e Rostam"] = "okh", + ["Korku"] = "kfq", + ["Korlai Creole Portuguese"] = "vkp", + ["Koro (India)"] = "jkr", + ["Koro (New Guinea)"] = "kxr", + ["Koro (Vanuatu)"] = "krf", + ["Koro (West Africa)"] = "kfo", + ["Koromfé"] = "kfz", + ["Koromira"] = "kqj", + ["Koronadal Blaan"] = "bpr", + ["Koroni"] = "xkq", + ["Korop"] = "krp", + ["Koropó"] = "xxr", + ["Koroshi"] = "ktl", + ["Korowai"] = "khe", + ["Korra Koraga"] = "kfd", + ["Korubo"] = "xor", + ["Korupun-Sela"] = "kpq", + ["Korwa"] = "kfp", + ["Koryak"] = "kpy", + ["Kosadle"] = "kiq", + ["Kosarek Yale"] = "kkl", + ["Kosena"] = "kze", + ["Koshin"] = "kid", + ["Kosraean"] = "kos", + ["Kota (Gabon)"] = "koq", + ["Kota (India)"] = "kfe", + ["Kota Bangun Kutai Malay"] = "mqg", + ["Kota Marudu Talantang"] = "grm", + ["Kota Marudu Tinagas"] = "ktr", + ["Kotafon Gbe"] = "kqk", + ["Kotava"] = "avk", + ["Koti"] = "eko", + ["Kott"] = "zko", + ["Kou"] = "snz", + ["Kouya"] = "kyf", + ["Kovai"] = "kqb", + ["Kove"] = "kvc", + ["Kowaki"] = "xow", + ["Kowiai"] = "kwh", + ["Koy Sanjaq Surat"] = "kqd", + ["Koya"] = "kff", + ["Koyaga"] = "kga", + ["Koyo"] = "koh", + ["Koyra Chiini"] = "khq", + ["Koyraboro Senni"] = "ses", + ["Koyukon"] = "koy", + ["Kpagua"] = "kuw", + ["Kpala"] = "kpl", + ["Kpan"] = "kpk", + ["Kpasam"] = "pbn", + ["Kpati"] = "koc", + ["Kpatili"] = "kym", + ["Kpee"] = "cpo", + ["Kpelle"] = "kpe", + ["Kpessi"] = "kef", + ["Kplang"] = "kph", + ["Krache"] = "kye", + ["Krahô"] = "xra", + ["Kraol"] = "rka", + ["Krenak"] = "kqq", + ["Kresh"] = "krs", + ["Krevinian"] = "zkv", + ["Kreye"] = "xre", + ["Krikati-Timbira"] = "xri", + ["Krim"] = "krm", + ["Krio"] = "kri", + ["Kriol"] = "rop", + ["Krisa"] = "ksi", + ["Kristang"] = "mcm", + ["Krobu"] = "kxb", + ["Krongo"] = "kgo", + ["Kru'ng"] = "krr", + ["Krymchak"] = "jct", + ["Kryts"] = "kry", + ["Kua"] = "tyu", + ["Kua-nsi"] = "ykn", + ["Kuamasi"] = "yku", + ["Kuan"] = "uan", + ["Kuanhua"] = "xnh", + ["Kube"] = "kgf", + ["Kubi"] = "kof", + ["Kubo"] = "jko", + ["Kubu"] = "kvb", + ["Kucong"] = "lkc", + ["Kudiya"] = "kfg", + ["Kudmali"] = "kyw", + ["Kudu-Camo"] = "kov", + ["Kugama"] = "kow", + ["Kugbo"] = "kes", + ["Kugu-Muminh"] = "xmh", + ["Kui (India)"] = "kxu", + ["Kui (Indonesia)"] = "kvd", + ["Kuijau"] = "dkr", + ["Kuikúro"] = "kui", + ["Kujarge"] = "vkj", + ["Kuk"] = "kfn", + ["Kukatja"] = "kux", + ["Kukele"] = "kez", + ["Kukna"] = "kex", + ["Kuku-Mangk"] = "xmq", + ["Kuku-Mu'inh"] = "xmp", + ["Kuku-Thaypan"] = "typ", + ["Kuku-Ugbanh"] = "ugb", + ["Kuku-Uwanh"] = "uwa", + ["Kuku-Yalanji"] = "gvn", + ["Kula"] = "tpg", + ["Kulaal"] = "glj", + ["Kulere"] = "kul", + ["Kulfa"] = "kxj", + ["Kulina"] = "xpk", + ["Kulisusu"] = "vkl", + ["Kullu Pahari"] = "kfx", + ["Kulon-Pazeh"] = "uun", + ["Kulung"] = "kle", + ["Kumak"] = "nee", + ["Kumalu"] = "ksl", + ["Kumam"] = "kdi", + ["Kuman"] = "kue", + ["Kumaoni"] = "kfy", + ["Kumarbhag Paharia"] = "kmj", + ["Kumba"] = "ksm", + ["Kumbainggar"] = "kgs", + ["Kumbaran"] = "wkb", + ["Kumbewaha"] = "xks", + ["Kumeyaay"] = "nai-kum", + ["Kumhali"] = "kra", + ["Kumu"] = "kmw", + ["Kumukio"] = "kuo", + ["Kumyk"] = "kum", + ["Kumzari"] = "zum", + ["Kuna"] = "cuk", + ["Kunama"] = "kun", + ["Kunbarlang"] = "wlg", + ["Kunda"] = "kdn", + ["Kundal Shahi"] = "shd", + ["Kunduvadi"] = "wku", + ["Kung"] = "kfl", + ["Kungarakany"] = "ggk", + ["Kungardutyi"] = "gdt", + ["Kunggari"] = "kgl", + ["Kungkari"] = "lku", + ["Kuni"] = "kse", + ["Kuni-Boazi"] = "kvg", + ["Kunigami"] = "xug", + ["Kunimaipa"] = "kup", + ["Kunja"] = "pep", + ["Kunjen"] = "kjn", + ["Kunyi"] = "njx", + ["Kunza"] = "kuz", + ["Kuo"] = "xuo", + ["Kuot"] = "kto", + ["Kupa"] = "kug", + ["Kupang Malay"] = "mkn", + ["Kupia"] = "key", + ["Kupsabiny"] = "kpz", + ["Kur"] = "kuv", + ["Kura Ede Nago"] = "nqk", + ["Kurama"] = "krh", + ["Kuranko"] = "knk", + ["Kuri"] = "nbn", + ["Kuria"] = "kuj", + ["Kurichiya"] = "kfh", + ["Kurmukar"] = "kfv", + ["Kurnai"] = "unn", + ["Kurrama"] = "vku", + ["Kurti"] = "ktm", + ["Kurtjar"] = "gdj", + ["Kurtop"] = "xkz", + ["Kurudu"] = "kjr", + ["Kurukh"] = "kru", + ["Kuruáya"] = "kyr", + ["Kusaal"] = "kus", + ["Kusaghe"] = "ksg", + ["Kushi"] = "kuh", + ["Kustenau"] = "awd-kus", + ["Kusu"] = "ksv", + ["Kusunda"] = "kgg", + ["Kutang Ghale"] = "ght", + ["Kutenai"] = "kut", + ["Kutep"] = "kub", + ["Kuthant"] = "xut", + ["Kutto"] = "kpa", + ["Kutu"] = "kdc", + ["Kuturmi"] = "khj", + ["Kuuk Thaayorre"] = "thd", + ["Kuuk Yak"] = "uky", + ["Kuuku-Ya'u"] = "kuy", + ["Kuvale"] = "olu", + ["Kuvi"] = "kxv", + ["Kuwaa"] = "blh", + ["Kuwaataay"] = "cwt", + ["Kuwani"] = "paa-kwn", + ["Kuy"] = "kdt", + ["Kven"] = "fkv", + ["Kw'adza"] = "wka", + ["Kwa'"] = "bko", + ["Kwaami"] = "ksq", + ["Kwadi"] = "kwz", + ["Kwaio"] = "kwd", + ["Kwaja"] = "kdz", + ["Kwak"] = "kwq", + ["Kwak'wala"] = "kwk", + ["Kwakum"] = "kwu", + ["Kwalhioqua-Tlatskanai"] = "qwt", + ["Kwama"] = "kmq", + ["Kwambi"] = "kwm", + ["Kwamera"] = "tnk", + ["Kwami"] = "ktf", + ["Kwamtim One"] = "okk", + ["Kwang"] = "kvi", + ["Kwanga"] = "kwj", + ["Kwangali"] = "kwn", + ["Kwanja"] = "knp", + ["Kwanka"] = "bij", + ["Kwanyama"] = "kj", + ["Kwara'ae"] = "kwf", + ["Kwasio"] = "nmg", + ["Kwaya"] = "kya", + ["Kwaza"] = "xwa", + ["Kwegu"] = "xwg", + ["Kwer"] = "kwr", + ["Kwerba"] = "kwe", + ["Kwerba Mamberamo"] = "xwr", + ["Kwere"] = "cwe", + ["Kwerisa"] = "kkb", + ["Kwese"] = "kws", + ["Kwesten"] = "kwt", + ["Kwini"] = "gww", + ["Kwinsu"] = "kuc", + ["Kwinti"] = "kww", + ["Kwoma"] = "kmo", + ["Kwomtari"] = "kwo", + ["Kyak"] = "bka", + ["Kyaka"] = "kyc", + ["Kyan-Karyaw Naga"] = "nqq", + ["Kyenele"] = "kql", + ["Kyenga"] = "tye", + ["Kyerung"] = "kgy", + ["Kyrgyz"] = "ky", + ["Kâte"] = "kmg", + ["Kélé"] = "keb", + ["Kómnzo"] = "paa-kom", + ["La'bi"] = "lbi", + ["Laal"] = "gdm", + ["Laalaa"] = "cae", + ["Laba"] = "lau", + ["Label"] = "lbb", + ["Labir"] = "jku", + ["Labo"] = "mwi", + ["Labo Phowa"] = "ypb", + ["Laboya"] = "lmy", + ["Labu"] = "lbu", + ["Labuk-Kinabatangan Kadazan"] = "dtb", + ["Lacandon"] = "lac", + ["Lachi"] = "lbt", + ["Lachiguiri Zapotec"] = "zpa", + ["Lachixío Zapotec"] = "zpl", + ["Ladakhi"] = "lbj", + ["Ladin"] = "lld", + ["Ladino"] = "lad", + ["Ladji-Ladji"] = "llj", + ["Laeko-Libuat"] = "lkl", + ["Lafofa"] = "laf", + ["Laghu"] = "lgb", + ["Laghuu"] = "lgh", + ["Lagwan"] = "kot", + ["Laha (Indonesia)"] = "lhh", + ["Laha (Vietnam)"] = "lha", + ["Lahanan"] = "lhn", + ["Lahnda"] = "lah", + ["Lahta Karen"] = "kvt", + ["Lahu"] = "lhu", + ["Lahu Shi"] = "lhi", + ["Lahul Lohar"] = "lhl", + ["Lai"] = "cnh", + ["Laimbue"] = "lmx", + ["Laitu Chin"] = "clj", + ["Laiyolo"] = "lji", + ["Lak"] = "lbe", + ["Laka"] = "lak", + ["Lakalei"] = "lka", + ["Lake Miwok"] = "lmw", + ["Lakha"] = "lkh", + ["Laki"] = "lki", + ["Lakkia"] = "lbc", + ["Lakon"] = "lkn", + ["Lakondê"] = "lkd", + ["Lakota"] = "lkt", + ["Lakota Dida"] = "dic", + ["Lala (New Guinea)"] = "nrz", + ["Lala (South Africa)"] = "bnt-lal", + ["Lala-Bisa"] = "leb", + ["Lala-Roba"] = "lla", + ["Lalana Chinantec"] = "cnl", + ["Lama Bai"] = "lay", + ["Lamaholot"] = "slp", + ["Lamalera"] = "lmr", + ["Lamang"] = "hia", + ["Lamatuka"] = "lmq", + ["Lamba"] = "lam", + ["Lambadi"] = "lmn", + ["Lambichhong"] = "lmh", + ["Lambya"] = "lai", + ["Lame"] = "bma", + ["Lamenu"] = "lmu", + ["Lamet"] = "lbn", + ["Lamja-Dengsa-Tola"] = "ldh", + ["Lamkang"] = "lmk", + ["Lamma"] = "lev", + ["Lamnso'"] = "lns", + ["Lamogai"] = "lmg", + ["Lampung Api"] = "ljp", + ["Lamu"] = "llh", + ["Lamu-Lamu"] = "lby", + ["Lanas Lobu"] = "ruu", + ["Landoma"] = "ldm", + ["Lang'e"] = "yne", + ["Langam"] = "lnm", + ["Langbashe"] = "lna", + ["Langi"] = "lag", + ["Langnian Buyang"] = "yln", + ["Lango (Sudan)"] = "lno", + ["Lango (Uganda)"] = "laj", + ["Lanima"] = "lnw", + ["Lanoh"] = "lnh", + ["Lao"] = "lo", + ["Lao Naga"] = "nlq", + ["Laomian"] = "lwm", + ["Laopang"] = "lbg", + ["Laos Sign Language"] = "lso", + ["Lapaguía-Guivini Zapotec"] = "ztl", + ["Lapine"] = "art-lap", + ["Lapuyan Subanun"] = "laa", + ["Laragia"] = "lrg", + ["Larantuka Malay"] = "lrt", + ["Lardil"] = "lbz", + ["Larevat"] = "lrv", + ["Lari"] = "lrl", + ["Larike-Wakasihu"] = "alo", + ["Laro"] = "lro", + ["Larteh"] = "lar", + ["Laru"] = "lan", + ["Lasalimu"] = "llm", + ["Lasgerdi"] = "lsa", + ["Lashi"] = "lsi", + ["Lasi"] = "lss", + ["Latgalian"] = "ltg", + ["Latin"] = "la", + ["Latu"] = "ltu", + ["Latundê"] = "ltn", + ["Latvian"] = "lv", + ["Latvian Sign Language"] = "lsl", + ["Lau"] = "llu", + ["Laua"] = "luf", + ["Lauan"] = "llx", + ["Lauje"] = "law", + ["Laura"] = "lur", + ["Laurentian"] = "lre", + ["Lautu Chin"] = "clt", + ["Lavatbura-Lamusong"] = "lbv", + ["Lave"] = "brb", + ["Laven"] = "lbo", + ["Lavukaleve"] = "lvk", + ["Lawangan"] = "lbx", + ["Lawi"] = "lvi", + ["Lawu"] = "lwu", + ["Lawunuia"] = "tgi", + ["Layakha"] = "lya", + ["Laz"] = "lzz", + ["Laze"] = "tbq-laz", + ["Lealao Chinantec"] = "cle", + ["Leco"] = "lec", + ["Ledo Kaili"] = "lew", + ["Leelau"] = "ldk", + ["Lefa"] = "lfa", + ["Lega-Mwenga"] = "lgm", + ["Lega-Shabunda"] = "lea", + ["Legbo"] = "agb", + ["Legenyem"] = "lcc", + ["Lehali"] = "tql", + ["Lehalurup"] = "urr", + ["Leinong Naga"] = "lzn", + ["Leipon"] = "lek", + ["Lela"] = "dri", + ["Lelak"] = "llk", + ["Lele (Chad)"] = "lln", + ["Lele (Congo)"] = "lel", + ["Lele (Guinea)"] = "llc", + ["Lele (New Guinea)"] = "lle", + ["Lelemi"] = "lef", + ["Lelepa"] = "lpa", + ["Lembena"] = "leq", + ["Lemerig"] = "lrz", + ["Lemio"] = "lei", + ["Lemnian"] = "xle", + ["Lemolang"] = "ley", + ["Lemoro"] = "ldj", + ["Lenakel"] = "tnl", + ["Lendu"] = "led", + ["Lengilu"] = "lgi", + ["Lengo"] = "lgr", + ["Lengola"] = "lej", + ["Lenje"] = "leh", + ["Lenkau"] = "ler", + ["Lenyima"] = "ldg", + ["Leonese"] = "roa-leo", + ["Lepcha"] = "lep", + ["Lepki"] = "lpe", + ["Lepontic"] = "xlp", + ["Lere"] = "gnh", + ["Lese"] = "les", + ["Lesing-Gelimi"] = "let", + ["Letemboi"] = "nms", + ["Leti (Cameroon)"] = "leo", + ["Leti (Indonesia)"] = "lti", + ["Levuka"] = "lvu", + ["Lewo"] = "lww", + ["Lewo Eleng"] = "lwe", + ["Lewotobi"] = "lwt", + ["Leyigha"] = "ayi", + ["Lezgi"] = "lez", + ["Lhao Vo"] = "mhx", + ["Lhokpu"] = "lhp", + ["Li'o"] = "ljl", + ["Liabuku"] = "lix", + ["Liana-Seti"] = "ste", + ["Liangmai Naga"] = "njn", + ["Liberia Kpelle"] = "xpe", + ["Liberian English"] = "lir", + ["Libido"] = "liq", + ["Libinza"] = "liz", + ["Libon Bikol"] = "lbl", + ["Liburnian"] = "xli", + ["Libyan Arabic"] = "ayl", + ["Libyan Sign Language"] = "lbs", + ["Ligbi"] = "lig", + ["Ligenza"] = "lgz", + ["Ligurian"] = "lij", + ["Lihir"] = "lih", + ["Lika"] = "lik", + ["Liki"] = "lio", + ["Likila"] = "lie", + ["Likuba"] = "kxx", + ["Likum"] = "lib", + ["Likwala"] = "kwc", + ["Lilau"] = "lll", + ["Lillooet"] = "lil", + ["Limassa"] = "bme", + ["Limbu"] = "lif", + ["Limbum"] = "lmp", + ["Limburgish"] = "li", + ["Limi"] = "ylm", + ["Limilngan"] = "lmc", + ["Limos Kalinga"] = "kmk", + ["Lindu"] = "klw", + ["Linear A"] = "lab", + ["Lingala"] = "ln", + ["Lingao"] = "onb", + ["Lingkhim"] = "lii", + ["Lingua Franca Nova"] = "lfn", + ["Linngithigh"] = "lnj", + ["Lipan"] = "apl", + ["Lipo"] = "lpo", + ["Lisabata-Nuniali"] = "lcs", + ["Lisela"] = "lcl", + ["Lish"] = "lsh", + ["Lishana Deni"] = "lsd", + ["Lishanid Noshan"] = "aij", + ["Lishán Didán"] = "trg", + ["Lisu"] = "lis", + ["Literary Chinese"] = "lzh", + ["Lithuanian"] = "lt", + ["Lithuanian Sign Language"] = "lls", + ["Little Swanport"] = "aus-lsw", + ["Litzlitz"] = "lzl", + ["Livonian"] = "liv", + ["Livvi"] = "olo", + ["Lizu"] = "sit-liz", + ["Lo-Toga"] = "lht", + ["Loarki"] = "lrk", + ["Lobala"] = "loq", + ["Lobi"] = "lob", + ["Lodhi"] = "lbm", + ["Logba"] = "lgq", + ["Logo"] = "log", + ["Logol"] = "lof", + ["Logooli"] = "rag", + ["Logorik"] = "liu", + ["Lojban"] = "jbo", + ["Lokaa"] = "yaz", + ["Loko"] = "lok", + ["Lokoya"] = "lky", + ["Lola"] = "lcd", + ["Lolak"] = "llq", + ["Lole"] = "llg", + ["Lolo"] = "llb", + ["Loloda"] = "loa", + ["Lolopo"] = "ycl", + ["Lomaiviti"] = "lmv", + ["Lomakka"] = "loi", + ["Lomavren"] = "rmi", + ["Lombard"] = "lmo", + ["Lombi"] = "lmi", + ["Lombo"] = "loo", + ["Lomwe"] = "ngl", + ["Loncong"] = "lce", + ["Long Phuri Naga"] = "lpn", + ["Long Wat"] = "ttw", + ["Longgu"] = "lgu", + ["Longto"] = "wok", + ["Longuda"] = "lnu", + ["Loniu"] = "los", + ["Lonwolwol"] = "crc", + ["Loo"] = "ldo", + ["Looma"] = "lom", + ["Lopa"] = "lop", + ["Lopi"] = "lov", + ["Lopit"] = "lpx", + ["Lorang"] = "lrn", + ["Lorediakarkar"] = "lnn", + ["Lorrain"] = "roa-lor", + ["Lote"] = "uvl", + ["Lotha Naga"] = "njh", + ["Lotud"] = "dtr", + ["Lotuko"] = "lot", + ["Lou"] = "loj", + ["Louisiana Creole French"] = "lou", + ["Loun"] = "lox", + ["Loup A"] = "xlo", + ["Loup B"] = "xlb", + ["Lovono"] = "vnk", + ["Low German"] = "nds", + ["Lower Burdekin"] = "xbb", + ["Lower Chehalis"] = "cea", + ["Lower Grand Valley Dani"] = "dni", + ["Lower Nossob"] = "nsb", + ["Lower Sorbian"] = "dsb", + ["Lower Southern Aranda"] = "axl", + ["Lower Ta'oih"] = "tto", + ["Lower Tanana"] = "taa", + ["Lowland Oaxaca Chontal"] = "clo", + ["Lowland Tarahumara"] = "tac", + ["Loxicha Zapotec"] = "ztp", + ["Lozi"] = "loz", + ["Luang"] = "lex", + ["Luba-Kasai"] = "lua", + ["Luba-Katanga"] = "lu", + ["Lubila"] = "kcc", + ["Lubu"] = "lcf", + ["Lubuagan Kalinga"] = "knb", + ["Luchazi"] = "lch", + ["Lucumi"] = "luq", + ["Ludian"] = "lud", + ["Lufu"] = "ldq", + ["Luganda"] = "lg", + ["Lugbara"] = "lgg", + ["Luguru"] = "ruf", + ["Luhu"] = "lcq", + ["Luhya"] = "luy", + ["Luimbi"] = "lum", + ["Luiseño"] = "lui", + ["Lukpa"] = "dop", + ["Lule"] = "ule", + ["Lule Sami"] = "smj", + ["Lumba-Yakkha"] = "luu", + ["Lumbee"] = "lmz", + ["Lumbu"] = "lup", + ["Lumun"] = "lmd", + ["Lun Bawang"] = "lnd", + ["Luna"] = "luj", + ["Lunanakha"] = "luk", + ["Lunda"] = "lun", + ["Lungga"] = "lga", + ["Luo"] = "luo", + ["Luopohe Hmong"] = "hml", + ["Luri (Nigeria)"] = "ldd", + ["Lusengo"] = "lse", + ["Lushootseed"] = "lut", + ["Lusi"] = "khl", + ["Lusitanian"] = "xls", + ["Lutachoni"] = "lts", + ["Lutos"] = "ndy", + ["Luvale"] = "lue", + ["Luwati"] = "luv", + ["Luwian"] = "xlu", + ["Luwo"] = "lwo", + ["Luxembourgish"] = "lb", + ["Luyana"] = "lyn", + ["Lwalu"] = "lwa", + ["Lwel"] = "bnt-lwl", + ["Lycian"] = "xlc", + ["Lydian"] = "xld", + ["Lyngngam"] = "lyg", + ["Lyélé"] = "lee", + ["Láadan"] = "ldn", + ["Láá Láá Bwamu"] = "bwj", + ["Lü"] = "khb", + ["Ma"] = "msj", + ["Ma Manda"] = "skc", + ["Ma'anyan"] = "mhy", + ["Ma'di"] = "mhi", + ["Ma'ya"] = "slz", + ["Maa"] = "cma", + ["Maaka"] = "mew", + ["Maale"] = "mdy", + ["Maasai"] = "mas", + ["Maay"] = "ymm", + ["Maba"] = "mqa", + ["Mabaale"] = "mmz", + ["Mabaan"] = "mfz", + ["Mabaka Valley Kalinga"] = "kkg", + ["Mabire"] = "muj", + ["Maca"] = "mca", + ["Macaguaje"] = "mcl", + ["Macaguán"] = "mbn", + ["Macanese"] = "mzs", + ["Macau Pidgin Portuguese"] = "crp-mpp", + ["Macedonian"] = "mk", + ["Machame"] = "jmc", + ["Machiguenga"] = "mcb", + ["Machinere"] = "mpd", + ["Machinga"] = "mvw", + ["Macoris"] = "nai-mac", + ["Macuna"] = "myy", + ["Macushi"] = "mbc", + ["Mada (Cameroon)"] = "mxu", + ["Mada (Nigeria)"] = "mda", + ["Madagascar Sign Language"] = "mzc", + ["Madak"] = "mmx", + ["Maden"] = "xmx", + ["Madhi Madhi"] = "dmd", + ["Madi"] = "grg", + ["Madngele"] = "zml", + ["Madukayang Kalinga"] = "kmd", + ["Madurese"] = "mad", + ["Mae"] = "mme", + ["Maek"] = "hmk", + ["Maeng Itneg"] = "itt", + ["Mafa"] = "maf", + ["Mafea"] = "mkv", + ["Mag-Anchi Ayta"] = "sgb", + ["Mag-Indi Ayta"] = "blx", + ["Magadhi Prakrit"] = "inc-mgd", + ["Magahat"] = "mtw", + ["Magahi"] = "mag", + ["Magdalena Peñasco Mixtec"] = "xtm", + ["Magiyi"] = "gmg", + ["Magoma"] = "gmx", + ["Magori"] = "zgr", + ["Maguindanao"] = "mdh", + ["Magɨ"] = "gkd", + ["Mahali"] = "mjx", + ["Maharastri Prakrit"] = "pmh", + ["Mahasu Pahari"] = "bfz", + ["Mahican"] = "mjy", + ["Mahongwe"] = "mhb", + ["Mahou"] = "mxx", + ["Maia"] = "sks", + ["Maiadomu"] = "mzz", + ["Maiani"] = "tnh", + ["Maii"] = "mmm", + ["Mailu"] = "mgu", + ["Maindo"] = "cwb", + ["Mairasi"] = "zrs", + ["Maisin"] = "mbq", + ["Maithili"] = "mai", + ["Maiwa (Indonesia)"] = "wmm", + ["Maiwa (New Guinea)"] = "mti", + ["Maiwala"] = "mum", + ["Majang"] = "mpe", + ["Majera"] = "xmj", + ["Majhi"] = "mjz", + ["Majhwar"] = "mmj", + ["Mak (China)"] = "mkg", + ["Mak (Nigeria)"] = "pbl", + ["Makaa"] = "mcp", + ["Makah"] = "myh", + ["Makalero"] = "mjb", + ["Makasae"] = "mkz", + ["Makasar"] = "mak", + ["Makassar Malay"] = "mfp", + ["Makayam"] = "aup", + ["Makhuwa"] = "vmw", + ["Makhuwa-Marrevone"] = "xmc", + ["Makhuwa-Meetto"] = "mgh", + ["Makhuwa-Moniga"] = "mhm", + ["Makhuwa-Saka"] = "xsq", + ["Makhuwa-Shirima"] = "vmk", + ["Maklew"] = "mgf", + ["Makolkol"] = "zmh", + ["Makonde"] = "kde", + ["Maku"] = "xak", + ["Maku'a"] = "lva", + ["Makuri Naga"] = "jmn", + ["Makuráp"] = "mpu", + ["Makwe"] = "ymk", + ["Makyan Naga"] = "umn", + ["Mal"] = "mlf", + ["Mal Paharia"] = "mkb", + ["Mala (New Guinea)"] = "ped", + ["Mala (Nigeria)"] = "ruy", + ["Mala Malasar"] = "ima", + ["Malaccan Creole Malay"] = "ccm", + ["Malagasy"] = "mg", + ["Malalamai"] = "mmt", + ["Malalí"] = "sai-mal", + ["Malango"] = "mln", + ["Malankuravan"] = "mjo", + ["Malapandaram"] = "mjp", + ["Malaryan"] = "mjq", + ["Malas"] = "mkr", + ["Malasanga"] = "mqz", + ["Malasar"] = "ymr", + ["Malavedan"] = "mjr", + ["Malawi Lomwe"] = "lon", + ["Malawian Sign Language"] = "lws", + ["Malay"] = "ms", + ["Malayalam"] = "ml", + ["Malayic Dayak"] = "xdy", + ["Malaynon"] = "mlz", + ["Malaysian Sign Language"] = "xml", + ["Malba Birifor"] = "bfo", + ["Male"] = "mdc", + ["Malecite-Passamaquoddy"] = "pqm", + ["Maleng"] = "pkt", + ["Maleu-Kilenge"] = "mgl", + ["Malfaxal"] = "mlx", + ["Malgana"] = "vml", + ["Malgbe"] = "mxf", + ["Mali"] = "gcc", + ["Malibu"] = "sai-mlb", + ["Malila"] = "mgq", + ["Malimba"] = "mzd", + ["Malimpung"] = "mli", + ["Malinaltepec Tlapanec"] = "tcf", + ["Malol"] = "mbk", + ["Maltese"] = "mt", + ["Maltese Sign Language"] = "mdl", + ["Malua Bay"] = "mll", + ["Malvi"] = "mup", + ["Maléku Jaíka"] = "gut", + ["Mam"] = "mam", + ["Mama"] = "mma", + ["Mamaa"] = "mhf", + ["Mamaindé"] = "wmd", + ["Mamanwa"] = "mmn", + ["Mamara Senoufo"] = "myk", + ["Mamasa"] = "mqj", + ["Mambae"] = "mgm", + ["Mambai"] = "mcs", + ["Mamboru"] = "mvd", + ["Mambwe-Lungu"] = "mgr", + ["Mampruli"] = "maw", + ["Mamuju"] = "mqx", + ["Mamulique"] = "emm", + ["Mamusi"] = "kdf", + ["Mamvu"] = "mdi", + ["Man Met"] = "mml", + ["Manado Malay"] = "xmm", + ["Manam"] = "mva", + ["Manambu"] = "mle", + ["Manangba"] = "nmm", + ["Manangkari"] = "znk", + ["Manao"] = "awd-man", + ["Manchu"] = "mnc", + ["Manda (Australia)"] = "zma", + ["Manda (India)"] = "mha", + ["Manda (Tanzania)"] = "mgs", + ["Mandahuaca"] = "mht", + ["Mandaic"] = "mid", + ["Mandailing Batak"] = "btm", + ["Mandalorian"] = "art-man", + ["Mandan"] = "mhq", + ["Mandandanyi"] = "zmk", + ["Mandar"] = "mdr", + ["Mandara"] = "tbf", + ["Mandari"] = "mqu", + ["Mandarin"] = "cmn", + ["Mandeali"] = "mjl", + ["Mander"] = "mqr", + ["Mandingo"] = "man", + ["Mandinka"] = "mnk", + ["Mandjak"] = "mfv", + ["Mandobo Atas"] = "aax", + ["Mandobo Bawah"] = "bwp", + ["Manem"] = "jet", + ["Mang"] = "zng", + ["Mangala"] = "mem", + ["Mangarayi"] = "mpc", + ["Mangarevan"] = "mrv", + ["Mangas"] = "zns", + ["Mangayat"] = "myj", + ["Mangbetu"] = "mdj", + ["Mangbutu"] = "mdk", + ["Mangerr"] = "zme", + ["Mangga Buang"] = "mmo", + ["Manggarai"] = "mqy", + ["Mangghuer"] = "xgn-mgr", + ["Mango"] = "mge", + ["Mangole"] = "mqc", + ["Mangseng"] = "mbh", + ["Manigri-Kambolé Ede Nago"] = "xkb", + ["Manikion"] = "mnx", + ["Manipa"] = "mqp", + ["Manipuri"] = "mni", + ["Mankanya"] = "knf", + ["Mankiyali"] = "nlm", + ["Manna-Dora"] = "mju", + ["Mannan"] = "mjv", + ["Mano"] = "mev", + ["Manombai"] = "woo", + ["Mansaka"] = "msk", + ["Mansi"] = "mns", + ["Mansoanka"] = "msw", + ["Manta"] = "myg", + ["Mantsi"] = "nty", + ["Manumanaw Karen"] = "kxf", + ["Manusela"] = "wha", + ["Manx"] = "gv", + ["Manya"] = "mzj", + ["Manyawa"] = "mny", + ["Manza"] = "mzv", + ["Mao Naga"] = "nbi", + ["Maonan"] = "mmd", + ["Maore Comorian"] = "swb", + ["Maori"] = "mi", + ["Mape"] = "mlh", + ["Mapena"] = "mnm", + ["Mapia"] = "mpy", + ["Mapidian"] = "mpw", + ["Mapos Buang"] = "bzh", + ["Mapoyo"] = "mcg", + ["Mapudungun"] = "arn", + ["Mapun"] = "sjm", + ["Maquiritari"] = "mch", + ["Mara"] = "mec", + ["Mara Chin"] = "mrh", + ["Marachi"] = "lri", + ["Maraghei"] = "vmh", + ["Maragus"] = "mrs", + ["Maram Naga"] = "nma", + ["Marama"] = "lrm", + ["Maranao"] = "mrw", + ["Maranungku"] = "zmr", + ["Mararit"] = "mgb", + ["Marathi"] = "mr", + ["Maratino"] = "sai-mar", + ["Marau"] = "mvr", + ["Marawan"] = "awd-mar", + ["Marba"] = "mpg", + ["Marenje"] = "vmr", + ["Marfa"] = "mvu", + ["Margany"] = "zmc", + ["Marghi South"] = "mfm", + ["Margi"] = "mrt", + ["Margu"] = "mhg", + ["Maria"] = "mds", + ["Mariaté"] = "awd-mrt", + ["Maricopa"] = "mrc", + ["Maridan"] = "zmd", + ["Maridjabin"] = "zmj", + ["Marik"] = "dad", + ["Marimanindji"] = "zmm", + ["Marind"] = "mrz", + ["Maring"] = "mbw", + ["Maring Naga"] = "nng", + ["Maringarr"] = "zmt", + ["Marino"] = "mrb", + ["Mariri"] = "mqi", + ["Maritime Sign Language"] = "nsr", + ["Maritsauá"] = "msp", + ["Mariupol Greek"] = "grk-mar", + ["Mariyedi"] = "zmy", + ["Marka"] = "rkm", + ["Markweeta"] = "enb", + ["Marma"] = "rmz", + ["Maroon Spirit Language"] = "cpe-mar", + ["Marovo"] = "mvo", + ["Marriammu"] = "xru", + ["Marrithiyel"] = "mfr", + ["Marrucinian"] = "umc", + ["Marshallese"] = "mh", + ["Marsian"] = "ims", + ["Martha's Vineyard Sign Language"] = "mre", + ["Marti Ke"] = "zmg", + ["Martu Wangka"] = "mpj", + ["Martuthunira"] = "vma", + ["Marwari"] = "mwr", + ["Marúbo"] = "mzr", + ["Masaba"] = "myx", + ["Masadiit Itneg"] = "tis", + ["Masakará"] = "sai-msk", + ["Masalit"] = "mls", + ["Masana"] = "mcn", + ["Masbate Sorsogon"] = "bks", + ["Masbatenyo"] = "msb", + ["Mashco Piro"] = "cuj", + ["Mashi"] = "mho", + ["Masimasi"] = "ism", + ["Masiwang"] = "bnf", + ["Maskelynes"] = "klv", + ["Maslam"] = "msv", + ["Masmaje"] = "mes", + ["Massachusett"] = "wam", + ["Massalat"] = "mdg", + ["Massep"] = "mvs", + ["Matagalpa"] = "mtn", + ["Matal"] = "mfh", + ["Matanawi"] = "sai-mat", + ["Matbat"] = "xmt", + ["Matengo"] = "mgv", + ["Matepi"] = "mqe", + ["Matigsalug Manobo"] = "mbt", + ["Matipuhy"] = "mzo", + ["Matlatzinca"] = "mat", + ["Mato"] = "met", + ["Mato Grosso Arára"] = "axg", + ["Mator"] = "mtm", + ["Matsés"] = "mcf", + ["Mattole"] = "mvb", + ["Matukar"] = "mjk", + ["Matumbi"] = "mgw", + ["Matya Samo"] = "stj", + ["Matís"] = "mpq", + ["Maung"] = "mph", + ["Mauritian Creole"] = "mfe", + ["Mauritian Sign Language"] = "lsy", + ["Mauwake"] = "mhl", + ["Mawa"] = "mcw", + ["Mawak"] = "mjj", + ["Mawan"] = "mcz", + ["Mawayana"] = "mzx", + ["Mawchi"] = "mke", + ["Mawes"] = "mgk", + ["Maxakalí"] = "mbl", + ["Maxi Gbe"] = "mxl", + ["Maya Samo"] = "sym", + ["Mayaguduna"] = "xmy", + ["Mayangna"] = "yan", + ["Mayawali"] = "yxa", + ["Maybrat"] = "ayz", + ["Mayeka"] = "myc", + ["Mayi-Thakurti"] = "xyt", + ["Maykulan"] = "mnt", + ["Maynas"] = "sai-mys", + ["Mayo"] = "mfy", + ["Mayogo"] = "mdm", + ["Mayoyao Ifugao"] = "ifu", + ["Maypure"] = "awd-mpr", + ["Mazagway"] = "dkx", + ["Mazaltepec Zapotec"] = "zpy", + ["Mazanderani"] = "mzn", + ["Mazatlán Mazatec"] = "vmz", + ["Mazatlán Mixe"] = "mzl", + ["Mba"] = "mfc", + ["Mbabaram"] = "vmb", + ["Mbala"] = "mdp", + ["Mbalanhu"] = "lnb", + ["Mbandja"] = "zmz", + ["Mbangala"] = "mxg", + ["Mbangi"] = "mgn", + ["Mbangwe"] = "zmn", + ["Mbara (Australia)"] = "mvl", + ["Mbara (Chad)"] = "mpk", + ["Mbariman-Gudhinma"] = "zmv", + ["Mbati"] = "mdn", + ["Mbato"] = "gwa", + ["Mbay"] = "myb", + ["Mbe"] = "mfo", + ["Mbe'"] = "mtk", + ["Mbelime"] = "mql", + ["Mbere"] = "mdt", + ["Mbesa"] = "zms", + ["Mbiywom"] = "aus-mbi", + ["Mbo (Cameroon)"] = "mbo", + ["Mbo (Congo)"] = "zmw", + ["Mboi"] = "moi", + ["Mboko"] = "mdu", + ["Mbole"] = "mdq", + ["Mbonga"] = "xmb", + ["Mbongno"] = "bgu", + ["Mbosi"] = "mdw", + ["Mbowe"] = "mxo", + ["Mbre"] = "mka", + ["Mbu'"] = "muc", + ["Mbudum"] = "xmd", + ["Mbugu"] = "mhd", + ["Mbugwe"] = "mgz", + ["Mbuko"] = "mqb", + ["Mbukushu"] = "mhw", + ["Mbula"] = "mna", + ["Mbula-Bwazza"] = "mbu", + ["Mbule"] = "mlb", + ["Mbulungish"] = "mbv", + ["Mbum"] = "mdd", + ["Mbunda"] = "mck", + ["Mbunga"] = "mgy", + ["Mburku"] = "bbt", + ["Mbuun"] = "zmp", + ["Mbwela"] = "mfu", + ["Mbyá Guaraní"] = "gun", + ["Me'en"] = "mym", + ["Mea"] = "meg", + ["Mebu"] = "mjn", + ["Mecayapan Nahuatl"] = "nhx", + ["Medebur"] = "mjm", + ["Medefaidrin"] = "dmf", + ["Media Lengua"] = "mue", + ["Mednyj Aleut"] = "mud", + ["Medumba"] = "byv", + ["Mefele"] = "mfj", + ["Megam"] = "mef", + ["Megleno-Romanian"] = "ruq", + ["Mehek"] = "nux", + ["Mehináku"] = "mmh", + ["Mehri"] = "gdq", + ["Mekeo"] = "mek", + ["Mekmek"] = "mvk", + ["Mekwei"] = "msf", + ["Mel-Khaonh"] = "hkn", + ["Mele-Fila"] = "mxe", + ["Melo"] = "mfx", + ["Melpa"] = "med", + ["Memoni"] = "mby", + ["Mendalam Kayan"] = "xkd", + ["Mendankwe-Nkwen"] = "mfd", + ["Mende"] = "men", + ["Mengaka"] = "xmg", + ["Mengen"] = "mee", + ["Menien"] = "sai-men", + ["Menka"] = "mea", + ["Menominee"] = "mez", + ["Mentawai"] = "mwv", + ["Menya"] = "mcr", + ["Meoswar"] = "mvx", + ["Mer"] = "mnu", + ["Meramera"] = "mxm", + ["Merei"] = "lmb", + ["Merey"] = "meq", + ["Meriam"] = "ulk", + ["Merlav"] = "mrm", + ["Meroitic"] = "xmr", + ["Meru"] = "mer", + ["Mesaka"] = "iyo", + ["Mese"] = "mci", + ["Mesme"] = "zim", + ["Mesmes"] = "mys", + ["Mesqan"] = "mvz", + ["Messapic"] = "cms", + ["Meta'"] = "mgo", + ["Metlatónoc Mixtec"] = "mxv", + ["Mewari"] = "mtr", + ["Mewati"] = "wtm", + ["Mexican Sign Language"] = "mfs", + ["Meyah"] = "mej", + ["Mezontla Popoloca"] = "pbe", + ["Mezquital Otomi"] = "ote", + ["Meänkieli"] = "fit", + ["Mfinu"] = "zmf", + ["Mfumte"] = "nfu", + ["Mgbo"] = "gmz", + ["Mi'kmaq"] = "mic", + ["Miami"] = "mia", + ["Mian"] = "mpt", + ["Miani"] = "pla", + ["Michif"] = "crg", + ["Michigamea"] = "cmm", + ["Michoacán Mazahua"] = "mmc", + ["Michoacán Nahuatl"] = "ncl", + ["Mid Grand Valley Dani"] = "dnt", + ["Mid-Southern Banda"] = "bjo", + ["Middle Armenian"] = "axm", + ["Middle Assamese"] = "inc-mas", + ["Middle Bengali"] = "inc-mbn", + ["Middle Breton"] = "xbm", + ["Middle Chinese"] = "ltc", + ["Middle Cornish"] = "cnx", + ["Middle Dutch"] = "dum", + ["Middle English"] = "enm", + ["Middle French"] = "frm", + ["Middle Gujarati"] = "inc-mgu", + ["Middle High German"] = "gmh", + ["Middle Irish"] = "mga", + ["Middle Kannada"] = "dra-mkn", + ["Middle Khmer"] = "mkh-mkm", + ["Middle Korean"] = "okm", + ["Middle Low German"] = "gml", + ["Middle Median"] = "xme-mid", + ["Middle Mon"] = "mkh-mmn", + ["Middle Mongolian"] = "xng", + ["Middle Newar"] = "nwx", + ["Middle Norwegian"] = "gmq-mno", + ["Middle Oriya"] = "inc-mor", + ["Middle Persian"] = "pal", + ["Middle Vietnamese"] = "mkh-mvi", + ["Middle Watut"] = "mpl", + ["Middle Welsh"] = "wlm", + ["Midob"] = "mei", + ["Migaama"] = "mmy", + ["Migabac"] = "mpp", + ["Miji"] = "sjl", + ["Miju"] = "mxj", + ["Mikasuki"] = "mik", + ["Milang"] = "und-mil", + ["Mili"] = "ymh", + ["Millcayac"] = "sai-mil", + ["Miltu"] = "mlj", + ["Miluk"] = "iml", + ["Milyan"] = "imy", + ["Mimi of Decorse"] = "und-mmd", + ["Mimi of Nachtigal"] = "und-mmn", + ["Min Bei"] = "mnp", + ["Min Dong"] = "cdo", + ["Min Nan"] = "nan", + ["Min Zhong"] = "czo", + ["Mina"] = "hna", + ["Minaean"] = "inm", + ["Minang"] = "xrg", + ["Minangkabau"] = "min", + ["Minanibai"] = "mcv", + ["Minaveha"] = "mvn", + ["Minderico"] = "drc", + ["Mindiri"] = "mpn", + ["Mingang Doso"] = "mko", + ["Mingo"] = "iro-min", + ["Mingrelian"] = "xmf", + ["Minica Huitoto"] = "hto", + ["Minidien"] = "wii", + ["Minigir"] = "vmg", + ["Minjungbal"] = "xjb", + ["Minkin"] = "xxm", + ["Minoan"] = "omn", + ["Minokok"] = "mqq", + ["Minriq"] = "mnq", + ["Mintil"] = "mzt", + ["Miqie"] = "yiq", + ["Mirandese"] = "mwl", + ["Miraya Bikol"] = "rbl", + ["Mire"] = "mvh", + ["Mirgan"] = "zrg", + ["Miriti"] = "mmv", + ["Miriwoong Sign Language"] = "rsm", + ["Miriwung"] = "mep", + ["Mirpur Panjabi"] = "pmu", + ["Misantla Totonac"] = "tlc", + ["Miship"] = "mjs", + ["Misima-Paneati"] = "mpx", + ["Mising"] = "mrg", + ["Miskito"] = "miq", + ["Mitla Zapotec"] = "zaw", + ["Mitlatongo Mixtec"] = "vmm", + ["Mittu"] = "mwu", + ["Mituku"] = "zmq", + ["Miu"] = "mpo", + ["Miwa"] = "vmi", + ["Mixed Great Andamanese"] = "gac", + ["Mixifore"] = "mfg", + ["Mixtepec Mixtec"] = "mix", + ["Mixtepec Zapotec"] = "zpm", + ["Miya"] = "mkf", + ["Miyako"] = "mvi", + ["Miyobe"] = "soy", + ["Mizo"] = "lus", + ["Mlabri"] = "mra", + ["Mlahsö"] = "lhs", + ["Mlap"] = "kja", + ["Mlomp"] = "mlo", + ["Mmaala"] = "mmu", + ["Mmani"] = "buy", + ["Mmen"] = "bfm", + ["Mo"] = "wkd", + ["Mo'da"] = "gbn", + ["Moabite"] = "obm", + ["Moba"] = "mfq", + ["Mobilian"] = "mod", + ["Mobumrin Aizi"] = "ahm", + ["Mocana"] = "sai-mcn", + ["Mochi"] = "old", + ["Mochica"] = "omc", + ["Mocho"] = "mhc", + ["Mocoví"] = "moc", + ["Modang"] = "mxd", + ["Modole"] = "mqo", + ["Moere"] = "mvq", + ["Mofu-Gudur"] = "mif", + ["Mogholi"] = "mhj", + ["Mogum"] = "mou", + ["Mohawk"] = "moh", + ["Mohegan-Pequot"] = "xpq", + ["Moi (Congo)"] = "mow", + ["Moi (Indonesia)"] = "mxn", + ["Moikodi"] = "mkp", + ["Moingi"] = "mwz", + ["Mojave"] = "mov", + ["Moji"] = "ymi", + ["Mok"] = "mqt", + ["Moken"] = "mwt", + ["Mokerang"] = "mft", + ["Mokilese"] = "mkj", + ["Moklen"] = "mkm", + ["Mokole"] = "mkl", + ["Mokpwe"] = "bri", + ["Moksha"] = "mdf", + ["Molale"] = "mbe", + ["Molbog"] = "pwm", + ["Moldova Sign Language"] = "vsi", + ["Molengue"] = "bxc", + ["Molima"] = "mox", + ["Molmo One"] = "aun", + ["Molo"] = "zmo", + ["Molof"] = "msl", + ["Moloko"] = "mlw", + ["Mom Jango"] = "ver", + ["Moma"] = "myl", + ["Momare"] = "msz", + ["Mombo Dogon"] = "dmb", + ["Mombum"] = "mso", + ["Momina"] = "mmb", + ["Momuna"] = "mqf", + ["Mon"] = "mnw", + ["Monastic Sign Language"] = "mzg", + ["Mondropolon"] = "npn", + ["Mondé"] = "mnd", + ["Mongghul"] = "xgn-mgl", + ["Mongo"] = "lol", + ["Mongol"] = "mgt", + ["Mongolian"] = "mn", + ["Mongolian Sign Language"] = "msr", + ["Mongondow"] = "mog", + ["Monguor"] = "mjg", + ["Moni"] = "mnz", + ["Monimbo"] = "mom", + ["Mono (California)"] = "mnr", + ["Mono (Cameroon)"] = "mru", + ["Mono (Congo)"] = "mnh", + ["Monom"] = "moo", + ["Monsang Naga"] = "nmh", + ["Montagnais"] = "moe", + ["Montana Salish"] = "fla", + ["Montol"] = "mtl", + ["Monumbo"] = "mxk", + ["Monzombo"] = "moj", + ["Moo"] = "gwg", + ["Moore"] = "mos", + ["Moose Cree"] = "crm", + ["Mopan Maya"] = "mop", + ["Mor (Austronesian)"] = "mhz", + ["Mor (Papuan)"] = "moq", + ["Moraid"] = "msg", + ["Moran"] = "sit-mor", + ["Morawa"] = "mze", + ["Morelos Nahuatl"] = "nhm", + ["Morerebi"] = "xmo", + ["Moresada"] = "msx", + ["Mori Atas"] = "mzq", + ["Mori Bawah"] = "xmz", + ["Morigi"] = "mdb", + ["Moro"] = "mor", + ["Moroccan Amazigh"] = "zgh", + ["Moroccan Arabic"] = "ary", + ["Moroccan Sign Language"] = "xms", + ["Morokodo"] = "mgc", + ["Morom"] = "bdo", + ["Moronene"] = "mqn", + ["Morori"] = "mok", + ["Morouas"] = "mrp", + ["Mortlockese"] = "mrl", + ["Moru"] = "mgd", + ["Mosimo"] = "mqv", + ["Moskona"] = "mtj", + ["Mota"] = "mtt", + ["Motembo"] = "tmv", + ["Motu"] = "meu", + ["Mouk-Aria"] = "mwh", + ["Mount Iraya Agta"] = "atl", + ["Mount Iriga Agta"] = "agz", + ["Mountain Koiari"] = "kpx", + ["Mouwase"] = "jmw", + ["Movima"] = "mzp", + ["Moyadan Itneg"] = "ity", + ["Moyon Naga"] = "nmo", + ["Mozambican Sign Language"] = "mzy", + ["Mozarabic"] = "mxi", + ["Mpade"] = "mpi", + ["Mpalitjanh"] = "xpj", + ["Mpi"] = "mpz", + ["Mpiemo"] = "mcx", + ["Mpiin"] = "bnt-mpi", + ["Mpinda"] = "pnd", + ["Mpongmpong"] = "mgg", + ["Mpoto"] = "mpa", + ["Mpotovoro"] = "mvt", + ["Mpuono"] = "bnt-mpu", + ["Mpur"] = "akc", + ["Mro Chin"] = "cmr", + ["Mru"] = "mro", + ["Mser"] = "kqx", + ["Muak Sa-aak"] = "ukk", + ["Mualang"] = "mtd", + ["Mubami"] = "tsx", + ["Mubi"] = "mub", + ["Mucuchí"] = "sai-muc", + ["Muda"] = "ymd", + ["Mudburra"] = "dmw", + ["Mudu Koraga"] = "vmd", + ["Muduapa"] = "wiv", + ["Muduga"] = "udg", + ["Muellama"] = "sai-mue", + ["Mufian"] = "aoj", + ["Muher"] = "sem-mhr", + ["Muinane"] = "bmr", + ["Mukha-Dora"] = "mmk", + ["Mukulu"] = "moz", + ["Mulaha"] = "mfw", + ["Mulam"] = "mlm", + ["Mulao"] = "giu", + ["Mullu Kurumba"] = "kpb", + ["Mullukmulluk"] = "mpb", + ["Muluridyi"] = "vmu", + ["Mum"] = "kqa", + ["Mumuye"] = "mzm", + ["Muna"] = "mnb", + ["Munda"] = "unx", + ["Mundabli"] = "boe", + ["Mundang"] = "mua", + ["Mundani"] = "mnf", + ["Mundari"] = "unr", + ["Mundat"] = "mmf", + ["Mundolinco"] = "art-mun", + ["Mundurukú"] = "myu", + ["Mungaka"] = "mhk", + ["Mungbam"] = "mij", + ["Munggui"] = "mth", + ["Mungkip"] = "mpv", + ["Muniche"] = "myr", + ["Munit"] = "mtc", + ["Munji"] = "mnj", + ["Munsee"] = "umu", + ["Muong"] = "mtq", + ["Mur Pano"] = "tkv", + ["Muratayak"] = "asx", + ["Murik (Malaysia)"] = "mxr", + ["Murik (New Guinea)"] = "mtf", + ["Murkim"] = "rmh", + ["Murle"] = "mur", + ["Murrinh-Patha"] = "mwf", + ["Mursi"] = "muz", + ["Murui Huitoto"] = "huu", + ["Murupi"] = "mqw", + ["Muruwari"] = "zmu", + ["Musan"] = "mmp", + ["Musar"] = "mmi", + ["Musasa"] = "smm", + ["Musey"] = "mse", + ["Musgu"] = "mug", + ["Musi"] = "mui", + ["Muskum"] = "mje", + ["Musom"] = "msu", + ["Mussau-Emira"] = "emi", + ["Muthuvan"] = "muv", + ["Mutu"] = "tuc", + ["Muya"] = "mvm", + ["Muyang"] = "muy", + ["Muyuw"] = "myw", + ["Muzi"] = "ymz", + ["Muzo"] = "sai-muz", + ["Mvanip"] = "mcj", + ["Mvuba"] = "mxh", + ["Mwaghavul"] = "sur", + ["Mwali Comorian"] = "wlc", + ["Mwan"] = "moa", + ["Mwani"] = "wmw", + ["Mwatebu"] = "mwa", + ["Mwera"] = "mwe", + ["Mwimbi-Muthambi"] = "mws", + ["Mwotlap"] = "mlv", + ["Mycenaean Greek"] = "gmy", + ["Myene"] = "mye", + ["Mysian"] = "yms", + ["Mzieme Naga"] = "nme", + ["Mághdì"] = "gmd", + ["Mòcheno"] = "mhn", + ["Mün Chin"] = "mwq", + ["Mündü"] = "muh", + ["N'Ko"] = "nqo", + ["Na"] = "nbt", + ["Na'vi"] = "art-nav", + ["Naaba"] = "nao", + ["Naba"] = "mne", + ["Nabak"] = "naf", + ["Nabi"] = "mty", + ["Nachering"] = "ncd", + ["Nadruvian"] = "ndf", + ["Nadëb"] = "mbj", + ["Nafaanra"] = "nfr", + ["Nafi"] = "srf", + ["Nafri"] = "nxx", + ["Naga Pidgin"] = "nag", + ["Nagarchal"] = "nbg", + ["Nage"] = "nxe", + ["Nagtipunan Agta"] = "phi-nag", + ["Nagu"] = "ngr", + ["Nagumi"] = "ngv", + ["Nahali"] = "nlx", + ["Nahari"] = "nhh", + ["Nahavaq"] = "sns", + ["Nahuatl"] = "nah", + ["Nai"] = "bio", + ["Najdi Arabic"] = "ars", + ["Naka'ela"] = "nae", + ["Nakai"] = "nkj", + ["Nakame"] = "nib", + ["Nakanai"] = "nak", + ["Nakara"] = "nck", + ["Nake"] = "nbk", + ["Naki"] = "mff", + ["Nakwi"] = "nax", + ["Nalca"] = "nlc", + ["Nali"] = "nss", + ["Nalik"] = "nal", + ["Nalu"] = "naj", + ["Naluo Yi"] = "ylo", + ["Nalögo"] = "nlz", + ["Namakura"] = "nmk", + ["Namat"] = "nkm", + ["Nambikwara"] = "nab", + ["Nambo"] = "ncm", + ["Nambya"] = "nmq", + ["Namia"] = "nnm", + ["Namiae"] = "nvm", + ["Namibian Sign Language"] = "nbs", + ["Namla"] = "naa", + ["Namo"] = "mxw", + ["Namonuito"] = "nmt", + ["Namosi-Naitasiri-Serua"] = "bwb", + ["Namuyi"] = "nmy", + ["Nanai"] = "gld", + ["Nancere"] = "nnc", + ["Nande"] = "nnb", + ["Nandi"] = "niq", + ["Nanerigé Sénoufo"] = "sen", + ["Nanga Dama Dogon"] = "nzz", + ["Nankina"] = "nnk", + ["Nanti"] = "cox", + ["Nanticoke"] = "nnt", + ["Nanubae"] = "afk", + ["Naolan"] = "nai-nao", + ["Napu"] = "npy", + ["Nar Phu"] = "npa", + ["Nara"] = "nrb", + ["Narak"] = "nac", + ["Narango"] = "nrg", + ["Narau"] = "nxu", + ["Narim"] = "loh", + ["Naro"] = "nhr", + ["Narom"] = "nrm", + ["Narragansett"] = "xnt", + ["Narua"] = "nru", + ["Narungga"] = "nnr", + ["Nasal"] = "nsy", + ["Nasarian"] = "nvh", + ["Nasioi"] = "nas", + ["Naskapi"] = "nsk", + ["Nasu"] = "ywq", + ["Natagaimas"] = "nts", + ["Natchez"] = "ncz", + ["Nateni"] = "ntm", + ["Nathembo"] = "nte", + ["Natioro"] = "nti", + ["Natú"] = "sai-nat", + ["Natügu"] = "ntu", + ["Nauete"] = "nxa", + ["Naukanski"] = "ynk", + ["Nauna"] = "ncn", + ["Nauo"] = "nwo", + ["Nauruan"] = "na", + ["Navajo"] = "nv", + ["Navarro-Aragonese"] = "roa-oan", + ["Navut"] = "nsw", + ["Nawaru"] = "nwr", + ["Nawathinehena"] = "nwa", + ["Nawdm"] = "nmz", + ["Nawuri"] = "naw", + ["Naxi"] = "nxq", + ["Nayi"] = "noz", + ["Ncane"] = "ncr", + ["Nchumbulu"] = "nlu", + ["Nda'nda'"] = "nnz", + ["Ndai"] = "gke", + ["Ndaka"] = "ndk", + ["Ndali"] = "ndh", + ["Ndam"] = "ndm", + ["Ndamba"] = "ndj", + ["Ndambomo"] = "nxo", + ["Ndasa"] = "nda", + ["Ndau"] = "ndc", + ["Nde-Gbite"] = "ned", + ["Nde-Nsele-Nta"] = "ndd", + ["Ndemli"] = "nml", + ["Ndendeule"] = "dne", + ["Ndengereko"] = "ndg", + ["Nding"] = "eli", + ["Ndjébbana"] = "djj", + ["Ndo"] = "ndp", + ["Ndobo"] = "ndw", + ["Ndoe"] = "nbb", + ["Ndogo"] = "ndz", + ["Ndolo"] = "ndl", + ["Ndom"] = "nqm", + ["Ndombe"] = "ndq", + ["Ndonga"] = "ng", + ["Ndoola"] = "ndr", + ["Ndrulo"] = "dno", + ["Nduga"] = "ndx", + ["Ndumu"] = "nmd", + ["Ndunda"] = "nuh", + ["Ndunga"] = "ndt", + ["Ndut"] = "ndv", + ["Ndyuka-Trio Pidgin"] = "njt", + ["Ndzwani Comorian"] = "wni", + ["Neapolitan"] = "nap", + ["Nedebang"] = "nec", + ["Nefamese"] = "nef", + ["Nefusa"] = "jbn", + ["Negerhollands"] = "dcr", + ["Negeri Sembilan Malay"] = "zmi", + ["Negidal"] = "neg", + ["Nehan"] = "nsn", + ["Nek"] = "nif", + ["Nekgini"] = "nkg", + ["Neko"] = "nej", + ["Neku"] = "nek", + ["Neme"] = "nex", + ["Nemi"] = "nem", + ["Nen"] = "nqn", + ["Nend"] = "anh", + ["Nengone"] = "nen", + ["Neo"] = "neu", + ["Nepalese Sign Language"] = "nsp", + ["Nepali"] = "ne", + ["Nepali Kurux"] = "kxl", + ["Nete"] = "net", + ["Neve'ei"] = "vnm", + ["Neverver"] = "lgk", + ["New Caledonian Javanese"] = "jas", + ["New River Shasta"] = "nai-nrs", + ["New Zealand Sign Language"] = "nzs", + ["Newar"] = "new", + ["Neyo"] = "ney", + ["Nez Perce"] = "nez", + ["Nga La"] = "hlt", + ["Ngaanyatjarra"] = "ntj", + ["Ngadha"] = "nxg", + ["Ngadjunmaya"] = "nju", + ["Ngadjuri"] = "jui", + ["Ngaing"] = "nnf", + ["Ngaju"] = "nij", + ["Ngala"] = "nud", + ["Ngalakan"] = "nig", + ["Ngalkbun"] = "ngk", + ["Ngalum"] = "szb", + ["Ngam"] = "nmc", + ["Ngamambo"] = "nbv", + ["Ngambay"] = "sba", + ["Ngamini"] = "nmv", + ["Ngamo"] = "nbh", + ["Ngan'gityemerri"] = "nam", + ["Nganakarti"] = "xnk", + ["Nganasan"] = "nio", + ["Ngandi"] = "nid", + ["Ngando (Central African Republic)"] = "ngd", + ["Ngando (Congo)"] = "nxd", + ["Ngandyera"] = "nne", + ["Ngangam"] = "gng", + ["Ngantangarra"] = "ntg", + ["Nganyaywana"] = "nyx", + ["Ngardi"] = "rxd", + ["Ngarigu"] = "xni", + ["Ngarinman"] = "nbj", + ["Ngarinyin"] = "ung", + ["Ngarla"] = "nrk", + ["Ngarluma"] = "nrl", + ["Ngarrindjeri"] = "nay", + ["Ngas"] = "anc", + ["Ngasa"] = "nsg", + ["Ngatik Men's Creole"] = "ngm", + ["Ngawn Chin"] = "cnw", + ["Ngawun"] = "nxn", + ["Ngazidja Comorian"] = "zdj", + ["Ngbaka"] = "nga", + ["Ngbaka Ma'bo"] = "nbm", + ["Ngbaka Manza"] = "ngg", + ["Ngbee"] = "jgb", + ["Ngbinda"] = "nbd", + ["Ngbundu"] = "nuu", + ["Ngelima"] = "agh", + ["Ngemba"] = "nge", + ["Ngen"] = "gnj", + ["Ngendelengo"] = "nql", + ["Ngeq"] = "ngt", + ["Ngete"] = "nnn", + ["Nggem"] = "nbq", + ["Nggwahyi"] = "ngx", + ["Ngie"] = "ngj", + ["Ngiemboon"] = "nnh", + ["Ngile"] = "jle", + ["Ngindo"] = "nnq", + ["Ngiti"] = "niy", + ["Ngiyambaa"] = "wyb", + ["Ngizim"] = "ngi", + ["Ngkoth"] = "aus-ngk", + ["Ngkâlmpw Kanum"] = "kcd", + ["Ngom"] = "nra", + ["Ngomba"] = "jgo", + ["Ngombale"] = "nla", + ["Ngombe (Central African Republic)"] = "nmj", + ["Ngombe (Congo)"] = "ngc", + ["Ngong"] = "nnx", + ["Ngongo"] = "noq", + ["Ngoni"] = "ngo", + ["Ngoreme"] = "ngq", + ["Ngoshie"] = "nsh", + ["Ngul"] = "nlo", + ["Ngulu"] = "ngp", + ["Nguluwan"] = "nuw", + ["Ngumbi"] = "nui", + ["Ngunawal"] = "xul", + ["Ngundi"] = "ndn", + ["Ngundu"] = "nue", + ["Ngungwel"] = "ngz", + ["Ngurmbur"] = "nrx", + ["Nguôn"] = "nuo", + ["Ngwaba"] = "ngw", + ["Ngwe"] = "nwe", + ["Ngwo"] = "ngn", + ["Ngäbere"] = "gym", + ["Nhanda"] = "nha", + ["Nheengatu"] = "yrl", + ["Nhirrpi"] = "hrp", + ["Nhuwala"] = "nhf", + ["Nias"] = "nia", + ["Nicaraguan Creole"] = "bzk", + ["Nicaraguan Sign Language"] = "ncs", + ["Nicola"] = "ath-nic", + ["Niellim"] = "nie", + ["Nigeria Mambila"] = "mzk", + ["Nigerian Pidgin"] = "pcm", + ["Nigerian Sign Language"] = "nsi", + ["Nihali"] = "nll", + ["Nii"] = "nii", + ["Niksek"] = "gbe", + ["Nila"] = "nil", + ["Nilamba"] = "nim", + ["Nimadi"] = "noe", + ["Nimanbur"] = "nmp", + ["Nimbari"] = "nmr", + ["Nimboran"] = "nir", + ["Nimi"] = "nis", + ["Nimo"] = "niw", + ["Nimoa"] = "nmw", + ["Ninam"] = "shb", + ["Nindi"] = "nxi", + ["Ningera"] = "nby", + ["Ninggerum"] = "nxr", + ["Ningil"] = "niz", + ["Ninia Yali"] = "nlk", + ["Ninzo"] = "nin", + ["Nipsan"] = "nps", + ["Nisa"] = "njs", + ["Nisenan"] = "nsz", + ["Nisga'a"] = "ncg", + ["Nisi"] = "yso", + ["Niuafo'ou"] = "num", + ["Niuatoputapu"] = "nkp", + ["Niuean"] = "niu", + ["Nivaclé"] = "cag", + ["Nivkh"] = "niv", + ["Niwer Mil"] = "hrc", + ["Niya Prakrit"] = "pra-niy", + ["Njalgulgule"] = "njl", + ["Njebi"] = "nzb", + ["Njen"] = "njj", + ["Njerep"] = "njr", + ["Njyem"] = "njy", + ["Nkami"] = "nkq", + ["Nkangala"] = "nkn", + ["Nkari"] = "nkz", + ["Nkem-Nkum"] = "isi", + ["Nkhumbi"] = "khu", + ["Nkongho"] = "nkc", + ["Nkonya"] = "nko", + ["Nkoroo"] = "nkx", + ["Nkoya"] = "nka", + ["Nkukoli"] = "nbo", + ["Nkutu"] = "nkw", + ["Nnam"] = "nbp", + ["Nobiin"] = "fia", + ["Nobonob"] = "gaw", + ["Nocamán"] = "nom", + ["Nocte Naga"] = "njb", + ["Nogai"] = "nog", + ["Noiri"] = "noi", + ["Nokuku"] = "nkk", + ["Nomaande"] = "lem", + ["Nomane"] = "nof", + ["Nomatsiguenga"] = "not", + ["Nomlaki"] = "nol", + ["Nomu"] = "noh", + ["Nong Zhuang"] = "zhn", + ["Nonuya"] = "noj", + ["Nooksack"] = "nok", + ["Noon"] = "snf", + ["Noone"] = "nhu", + ["Nootka"] = "nuk", + ["Nopala Chatino"] = "cya", + ["Noric"] = "nrc", + ["Norman"] = "nrf", + ["Norn"] = "nrn", + ["Norra"] = "nrr", + ["North Alaskan Inupiatun"] = "esi", + ["North Ambrym"] = "mmg", + ["North Asmat"] = "nks", + ["North Awyu"] = "yir", + ["North Babar"] = "bcd", + ["North Boma"] = "boh", + ["North Central Mixe"] = "neq", + ["North Efate"] = "llp", + ["North Fali"] = "fll", + ["North Frisian"] = "frr", + ["North Giziga"] = "gis", + ["North Levantine Arabic"] = "apc", + ["North Marquesan"] = "mrq", + ["North Mesopotamian Arabic"] = "ayp", + ["North Mofu"] = "mfk", + ["North Moluccan Malay"] = "max", + ["North Muyu"] = "kti", + ["North Nuaulu"] = "nni", + ["North Picene"] = "nrp", + ["North Slavey"] = "scs", + ["North Tairora"] = "tbg", + ["North Tanna"] = "tnn", + ["North Wahgi"] = "whg", + ["North Watut"] = "una", + ["Northeast Kiwai"] = "kiw", + ["Northeast Maidu"] = "nmu", + ["Northeast Pashayi"] = "aee", + ["Northeastern Dinka"] = "dip", + ["Northeastern Pomo"] = "pef", + ["Northern Alta"] = "aqn", + ["Northern Altai"] = "atv", + ["Northern Amami-Oshima"] = "ryn", + ["Northern Bai"] = "bfc", + ["Northern Bontoc"] = "rbk", + ["Northern Catanduanes Bicolano"] = "cts", + ["Northern Dagara"] = "dgi", + ["Northern East Cree"] = "crl", + ["Northern Emberá"] = "emp", + ["Northern Ghale"] = "ghh", + ["Northern Grebo"] = "gbo", + ["Northern Guiyang Hmong"] = "huj", + ["Northern Haida"] = "hdn", + ["Northern Hindko"] = "hno", + ["Northern Huishui Hmong"] = "hmi", + ["Northern Kalapuya"] = "nrt", + ["Northern Kam"] = "doc", + ["Northern Kankanay"] = "xnn", + ["Northern Khmer"] = "kxm", + ["Northern Kissi"] = "kqs", + ["Northern Kurdish"] = "kmr", + ["Northern Lorung"] = "lbr", + ["Northern Luri"] = "lrc", + ["Northern Mashan Hmong"] = "hmp", + ["Northern Muji"] = "ymx", + ["Northern Ndebele"] = "nd", + ["Northern Ngbandi"] = "ngb", + ["Northern Nisu"] = "yiv", + ["Northern Nuni"] = "nuv", + ["Northern Oaxaca Nahuatl"] = "nhy", + ["Northern Ohlone"] = "cst", + ["Northern One"] = "onr", + ["Northern Paiute"] = "pao", + ["Northern Pame"] = "pmq", + ["Northern Pomo"] = "pej", + ["Northern Puebla Nahuatl"] = "ncj", + ["Northern Pumi"] = "pmi", + ["Northern Pwo"] = "pww", + ["Northern Qiandong Miao"] = "hea", + ["Northern Qiang"] = "cng", + ["Northern Rengma Naga"] = "nnl", + ["Northern Roglai"] = "rog", + ["Northern Sami"] = "se", + ["Northern Sierra Miwok"] = "nsq", + ["Northern Sotho"] = "nso", + ["Northern Subanen"] = "stb", + ["Northern Tarahumara"] = "thh", + ["Northern Tepehuan"] = "ntp", + ["Northern Thai"] = "nod", + ["Northern Tidong"] = "ntd", + ["Northern Tlaxiaco Mixtec"] = "xtn", + ["Northern Toussian"] = "tsp", + ["Northern Tujia"] = "tji", + ["Northern Tutchone"] = "ttm", + ["Northern Valley Yokuts"] = "nai-nvy", + ["Northern Yukaghir"] = "ykg", + ["Northwest Alaska Inupiatun"] = "esk", + ["Northwest Gbaya"] = "gya", + ["Northwest Maidu"] = "mjd", + ["Northwest Oaxaca Mixtec"] = "mxa", + ["Northwest Pashayi"] = "glh", + ["Northwestern Dinka"] = "diw", + ["Northwestern Fars"] = "faz", + ["Northwestern Ojibwa"] = "ojb", + ["Northwestern Tamang"] = "tmk", + ["Norwegian"] = "no", + ["Norwegian Bokmål"] = "nb", + ["Norwegian Nynorsk"] = "nn", + ["Norwegian Sign Language"] = "nsl", + ["Notre"] = "bly", + ["Notsi"] = "ncf", + ["Nottoway"] = "ntw", + ["Nottoway-Meherrin"] = "nwy", + ["Novial"] = "nov", + ["Noxilo"] = "art-nox", + ["Noy"] = "noy", + ["Nsari"] = "asj", + ["Nsenga"] = "nse", + ["Nshi"] = "nsc", + ["Nsong"] = "soo", + ["Nsongo"] = "nsx", + ["Ntcham"] = "bud", + ["Ntomba"] = "nto", + ["Ntra'ngith"] = "dgt", + ["Nubaca"] = "baf", + ["Nubi"] = "kcn", + ["Nuer"] = "nus", + ["Nuguria"] = "nur", + ["Nuk"] = "noc", + ["Nukak Makú"] = "mbr", + ["Nukna"] = "klt", + ["Nukuini"] = "nuc", + ["Nukumanu"] = "nuq", + ["Nukunu"] = "nnv", + ["Nukunul"] = "xnu", + ["Nukuoro"] = "nkr", + ["Numana"] = "nbr", + ["Numanggang"] = "nop", + ["Numbami"] = "sij", + ["Nume"] = "tgs", + ["Numee"] = "kdk", + ["Numidian"] = "nxm", + ["Nung"] = "nut", + ["Nungali"] = "nug", + ["Nunggubuyu"] = "nuy", + ["Nungon"] = "paa-nun", + ["Nungu"] = "rin", + ["Nupbikha"] = "npb", + ["Nupe"] = "nup", + ["Nusa Laut"] = "nul", + ["Nusu"] = "nuf", + ["Nutabe"] = "cba-nut", + ["Nyabwa"] = "nwb", + ["Nyah Kur"] = "cbn", + ["Nyaheun"] = "nev", + ["Nyakyusa"] = "nyy", + ["Nyali"] = "nlj", + ["Nyam"] = "nmi", + ["Nyamal"] = "nly", + ["Nyambo"] = "now", + ["Nyamusa-Molo"] = "nwm", + ["Nyamwanga"] = "mwn", + ["Nyamwezi"] = "nym", + ["Nyaneka"] = "nyk", + ["Nyang'i"] = "nyp", + ["Nyanga (Congo)"] = "nyj", + ["Nyanga (Togo)"] = "ayg", + ["Nyanga-li"] = "nyc", + ["Nyangatom"] = "nnj", + ["Nyangbo"] = "nyb", + ["Nyangga"] = "nny", + ["Nyangumarta"] = "nna", + ["Nyankole"] = "nyn", + ["Nyarafolo Senoufo"] = "sev", + ["Nyaturu"] = "rim", + ["Nyaw"] = "nyw", + ["Nyawaygi"] = "nyt", + ["Nyemba"] = "nba", + ["Nyengo"] = "nye", + ["Nyenkha"] = "neh", + ["Nyeu"] = "nyl", + ["Nyigina"] = "nyh", + ["Nyiha"] = "nih", + ["Nyika"] = "nkt", + ["Nyimang"] = "nyi", + ["Nyindrou"] = "lid", + ["Nyindu"] = "nyg", + ["Nyishi"] = "njz", + ["Nyiyaparli"] = "xny", + ["Nyokon"] = "nvo", + ["Nyole (Kenya)"] = "nyd", + ["Nyole (Uganda)"] = "nuj", + ["Nyong"] = "muo", + ["Nyoro"] = "nyo", + ["Nyulnyul"] = "nyv", + ["Nyunga"] = "nys", + ["Nyungwe"] = "nyu", + ["Nyâlayu"] = "yly", + ["Nzadi"] = "nzd", + ["Nzakambay"] = "nzy", + ["Nzakara"] = "nzk", + ["Nzanyi"] = "nja", + ["Nzima"] = "nzi", + ["Ná-Meo"] = "neo", + ["Nüpode Huitoto"] = "hux", + ["Nǀuu"] = "ngh", + ["O'chi'chi'"] = "xoc", + ["O'du"] = "tyh", + ["O'odham"] = "ood", + ["Obanliku"] = "bzy", + ["Obispeño"] = "obi", + ["Oblo"] = "obl", + ["Obo Manobo"] = "obo", + ["Obokuitai"] = "afz", + ["Obolo"] = "ann", + ["Obulom"] = "obu", + ["Ocaina"] = "oca", + ["Occitan"] = "oc", + ["Ocotepec Mixtec"] = "mie", + ["Ocotlán Zapotec"] = "zac", + ["Od"] = "odk", + ["Odiai"] = "bhf", + ["Odoodee"] = "kkc", + ["Odual"] = "odu", + ["Odut"] = "oda", + ["Ofayé"] = "opy", + ["Ofo"] = "ofo", + ["Ogbah"] = "ogc", + ["Ogbia"] = "ogb", + ["Ogbogolo"] = "ogg", + ["Ogbronuagum"] = "ogu", + ["Ogea"] = "eri", + ["Oirata"] = "oia", + ["Ojibwe"] = "oj", + ["Ojitlán Chinantec"] = "chj", + ["Okanagan"] = "oka", + ["Oki-No-Erabu"] = "okn", + ["Okiek"] = "oki", + ["Okinawan"] = "ryu", + ["Oko-Eni-Osayen"] = "oks", + ["Oko-Juwoi"] = "okj", + ["Okobo"] = "okb", + ["Okodia"] = "okd", + ["Okolod"] = "kqv", + ["Okpamheri"] = "opa", + ["Okpe (Northwestern Edo)"] = "okx", + ["Okpe (Southwestern Edo)"] = "oke", + ["Okpela"] = "atg", + ["Oksapmin"] = "opm", + ["Oku"] = "oku", + ["Okwanuchu"] = "nai-okw", + ["Old Anatolian Turkish"] = "trk-oat", + ["Old Armenian"] = "xcl", + ["Old Avar"] = "oav", + ["Old Bengali"] = "inc-obn", + ["Old Breton"] = "obt", + ["Old Burmese"] = "obr", + ["Old Catalan"] = "roa-oca", + ["Old Chinese"] = "och", + ["Old Church Slavonic"] = "cu", + ["Old Cornish"] = "oco", + ["Old Czech"] = "zlw-ocs", + ["Old Danish"] = "gmq-oda", + ["Old Dutch"] = "odt", + ["Old East Slavic"] = "orv", + ["Old English"] = "ang", + ["Old French"] = "fro", + ["Old Frisian"] = "ofs", + ["Old Georgian"] = "oge", + ["Old Gujarati"] = "inc-ogu", + ["Old High German"] = "goh", + ["Old Hindi"] = "inc-ohi", + ["Old Hungarian"] = "ohu", + ["Old Irish"] = "sga", + ["Old Japanese"] = "ojp", + ["Old Javanese"] = "kaw", + ["Old Kamta"] = "inc-ork", + ["Old Kannada"] = "dra-okn", + ["Old Kentish Sign Language"] = "okl", + ["Old Khmer"] = "mkh-okm", + ["Old Korean"] = "oko", + ["Old Latin"] = "itc-ola", + ["Old Leonese"] = "roa-ole", + ["Old Lithuanian"] = "olt", + ["Old Manipuri"] = "omp", + ["Old Marathi"] = "omr", + ["Old Median"] = "xme-old", + ["Old Mon"] = "omx", + ["Old Norse"] = "non", + ["Old Novgorodian"] = "zle-ono", + ["Old Nubian"] = "onw", + ["Old Occitan"] = "pro", + ["Old Oriya"] = "inc-oor", + ["Old Ossetic"] = "oos", + ["Old Persian"] = "peo", + ["Old Polish"] = "zlw-opl", + ["Old Portuguese"] = "roa-opt", + ["Old Prussian"] = "prg", + ["Old Punjabi"] = "inc-opa", + ["Old Saxon"] = "osx", + ["Old South Arabian"] = "sem-srb", + ["Old Spanish"] = "osp", + ["Old Swedish"] = "gmq-osw", + ["Old Tamil"] = "oty", + ["Old Tati"] = "xme-ott", + ["Old Tupi"] = "tpw", + ["Old Turkic"] = "otk", + ["Old Uyghur"] = "oui", + ["Old Welsh"] = "owl", + ["Olekha"] = "ole", + ["Ollari"] = "gdb", + ["Olo"] = "ong", + ["Oloma"] = "olm", + ["Olrat"] = "olr", + ["Olu'bo"] = "lul", + ["Olulumo-Ikom"] = "iko", + ["Oluta Popoluca"] = "plo", + ["Olutsotso"] = "lto", + ["Omagua"] = "omg", + ["Omaha-Ponca"] = "oma", + ["Omani Arabic"] = "acx", + ["Omba"] = "omb", + ["Ombamba"] = "mbm", + ["Ombo"] = "oml", + ["Ometepec Nahuatl"] = "nht", + ["Omi"] = "omi", + ["Omok"] = "omk", + ["Omotik"] = "omt", + ["Omurano"] = "omu", + ["Oneida"] = "one", + ["Ong"] = "oog", + ["Ongota"] = "bxe", + ["Onin"] = "oni", + ["Onjob"] = "onj", + ["Ono"] = "ons", + ["Onobasulu"] = "onn", + ["Onondaga"] = "ono", + ["Ontenu"] = "ont", + ["Ontong Java"] = "ojv", + ["Oorlams"] = "oor", + ["Opao"] = "opo", + ["Opata"] = "opt", + ["Opuuo"] = "lgn", + ["Opón"] = "sai-opo", + ["Oraon Sadri"] = "sdr", + ["Orejón"] = "ore", + ["Oring"] = "org", + ["Oriya"] = "or", + ["Orizaba Nahuatl"] = "nlv", + ["Orléanais"] = "roa-orl", + ["Ormu"] = "orz", + ["Ormuri"] = "oru", + ["Oro"] = "orx", + ["Oro Win"] = "orw", + ["Oroch"] = "oac", + ["Oroha"] = "ora", + ["Orok"] = "oaa", + ["Orokaiva"] = "okv", + ["Oroko"] = "bdu", + ["Orokolo"] = "oro", + ["Oromo"] = "om", + ["Oroqen"] = "orh", + ["Orowe"] = "bpk", + ["Oruma"] = "orr", + ["Orya"] = "ury", + ["Osage"] = "osa", + ["Osamayi"] = "syx", + ["Osatu"] = "ost", + ["Oscan"] = "osc", + ["Osing"] = "osi", + ["Ososo"] = "oso", + ["Ossetian"] = "os", + ["Ot Danum"] = "otd", + ["Otank"] = "uta", + ["Oti"] = "oti", + ["Otomaco"] = "sai-oto", + ["Otoro"] = "otr", + ["Ottawa"] = "otw", + ["Ottoman Turkish"] = "ota", + ["Otuke"] = "otu", + ["Ouma"] = "oum", + ["Oune"] = "oue", + ["Owa"] = "stn", + ["Owenia"] = "wsr", + ["Owiniga"] = "owi", + ["Oy"] = "oyb", + ["Oya'oya"] = "oyy", + ["Oyda"] = "oyd", + ["Ozolotepec Zapotec"] = "zao", + ["Ozumacín Chinantec"] = "chz", + ["Pa"] = "ppt", + ["Pa Di"] = "pdi", + ["Pa'a"] = "pqa", + ["Pa'o Karen"] = "blk", + ["Pa-Hng"] = "pha", + ["Paama"] = "pma", + ["Paasaal"] = "sig", + ["Pacahuara"] = "pcp", + ["Pacoh"] = "pac", + ["Padoe"] = "pdo", + ["Paelignian"] = "pgn", + ["Paeonian"] = "ine-pae", + ["Pagi"] = "pgi", + ["Pagibete"] = "pae", + ["Pagu"] = "pgu", + ["Pahanan Agta"] = "apf", + ["Pahari-Potwari"] = "phr", + ["Pahi"] = "lgt", + ["Pahlavani"] = "phv", + ["Pai Tavytera"] = "pta", + ["Pai-lang"] = "tbq-plg", + ["Paicî"] = "pri", + ["Paikoneka"] = "awd-pai", + ["Paipai"] = "ppi", + ["Paisaci Prakrit"] = "inc-psc", + ["Paite"] = "pck", + ["Paiwan"] = "pwn", + ["Pajapan Nahuatl"] = "nhp", + ["Pak-Tong"] = "pkg", + ["Pakanha"] = "pkn", + ["Pakistan Sign Language"] = "pks", + ["Paku"] = "pku", + ["Paku Karen"] = "kpp", + ["Pal"] = "abw", + ["Palaic"] = "plq", + ["Palaka Senoufo"] = "plr", + ["Palantla Chinantec"] = "cpa", + ["Palauan"] = "pau", + ["Palawan Batak"] = "bya", + ["Paleni"] = "pnl", + ["Palenquero"] = "pln", + ["Palewyami"] = "nai-ply", + ["Pali"] = "pi", + ["Palikur"] = "plu", + ["Paliyan"] = "pcf", + ["Pallanganmiddang"] = "pmd", + ["Palor"] = "fap", + ["Palta"] = "sai-pal", + ["Palu'e"] = "ple", + ["Paluan"] = "plz", + ["Palya Bareli"] = "bpx", + ["Pam"] = "pmn", + ["Pambia"] = "pmb", + ["Pamigua"] = "sai-pam", + ["Pamlico"] = "pmk", + ["Pamona"] = "pmf", + ["Pamosu"] = "hih", + ["Pamplona Atta"] = "att", + ["Pana (Central Africa)"] = "pnz", + ["Pana (West Africa)"] = "pnq", + ["Panamanian Sign Language"] = "lsp", + ["Panamint"] = "par", + ["Panare"] = "pbh", + ["Panará"] = "kre", + ["Panasuan"] = "psn", + ["Panawa"] = "pwb", + ["Pancana"] = "pnp", + ["Panchpargania"] = "tdb", + ["Pande"] = "bkj", + ["Pangasinan"] = "pag", + ["Pangseng"] = "pgs", + ["Pangutaran Sama"] = "slm", + ["Pangwa"] = "pbr", + ["Pangwali"] = "pgg", + ["Panim"] = "pnr", + ["Paniya"] = "pcg", + ["Pankararé"] = "pax", + ["Pankararú"] = "paz", + ["Pankhu"] = "pkh", + ["Pannei"] = "pnc", + ["Panobo"] = "pno", + ["Panyjima"] = "pnw", + ["Panzaleo"] = "sai-pnz", + ["Pao"] = "ppa", + ["Papantla Totonac"] = "top", + ["Papapana"] = "ppn", + ["Papar"] = "dpp", + ["Papasena"] = "pas", + ["Papel"] = "pbo", + ["Papi"] = "ppe", + ["Papiamentu"] = "pap", + ["Papitalai"] = "pat", + ["Papora"] = "ppu", + ["Papua New Guinean Sign Language"] = "pgz", + ["Papuan Malay"] = "pmy", + ["Papuma"] = "ppm", + ["Para Naga"] = "pzn", + ["Parachi"] = "prc", + ["Paraguayan Guaraní"] = "gug", + ["Paraguayan Sign Language"] = "pys", + ["Parakanã"] = "pak", + ["Paranan"] = "prf", + ["Paranawát"] = "paf", + ["Paratió"] = "sai-par", + ["Paraujano"] = "pbg", + ["Parauk"] = "prk", + ["Parawen"] = "prw", + ["Pardhan"] = "pch", + ["Pardhi"] = "pcl", + ["Pare"] = "asa", + ["Pareci"] = "pab", + ["Paredarerme"] = "xpd", + ["Parenga"] = "pcj", + ["Parkari Koli"] = "kvx", + ["Parthian"] = "xpr", + ["Parya"] = "paq", + ["Pará Arára"] = "aap", + ["Pará Gavião"] = "gvp", + ["Pashto"] = "ps", + ["Pasi"] = "psq", + ["Pass Valley Yali"] = "yac", + ["Passé"] = "awd-pas", + ["Patagón"] = "sai-ptg", + ["Patamona"] = "pbc", + ["Patani"] = "ptn", + ["Pataxó Hã-Ha-Hãe"] = "pth", + ["Patep"] = "ptp", + ["Pathiya"] = "pty", + ["Patpatar"] = "gfk", + ["Pattani"] = "lae", + ["Pattani Malay"] = "mfa", + ["Pattapu"] = "ptq", + ["Patwin"] = "pwi", + ["Paulohi"] = "plh", + ["Paumarí"] = "pad", + ["Paunaca"] = "pnk", + ["Pauri Bareli"] = "bfb", + ["Pauserna"] = "psm", + ["Pawaia"] = "pwa", + ["Pawnee"] = "paw", + ["Payaguá"] = "sai-pyg", + ["Paynamar"] = "pmr", + ["Pe"] = "pai", + ["Pear"] = "pcb", + ["Pech"] = "pay", + ["Pecheneg"] = "xpc", + ["Peerapper"] = "xpw", + ["Peere"] = "pfe", + ["Pei"] = "ppq", + ["Pekal"] = "pel", + ["Pela"] = "bxd", + ["Pele-Ata"] = "ata", + ["Pemon"] = "aoc", + ["Penang Sign Language"] = "psg", + ["Penchal"] = "pek", + ["Pendau"] = "ums", + ["Pengo"] = "peg", + ["Pennsylvania German"] = "pdc", + ["Penobscot"] = "aaq", + ["Penrhyn"] = "pnh", + ["Pentlatch"] = "ptw", + ["Perai"] = "wet", + ["Peranakan Indonesian"] = "pea", + ["Perema"] = "wom", + ["Pericú"] = "nai-per", + ["Pero"] = "pip", + ["Persian"] = "fa", + ["Persian Sign Language"] = "psc", + ["Peruvian Sign Language"] = "prl", + ["Petapa Zapotec"] = "zpe", + ["Petats"] = "pex", + ["Petjo"] = "pey", + ["Peñoles Mixtec"] = "mil", + ["Phai"] = "prt", + ["Phake"] = "phk", + ["Phala"] = "ypa", + ["Phalura"] = "phl", + ["Phana'"] = "phq", + ["Phangduwali"] = "phw", + ["Phende"] = "pem", + ["Philippine Sign Language"] = "psp", + ["Philistine"] = "und-phi", + ["Phimbi"] = "phm", + ["Phoenician"] = "phn", + ["Phola"] = "ypg", + ["Pholo"] = "yip", + ["Phom"] = "nph", + ["Phong-Kniang"] = "pnx", + ["Phrae Pwo"] = "kjt", + ["Phrygian"] = "xpg", + ["Phu Thai"] = "pht", + ["Phuan"] = "phu", + ["Phudagi"] = "phd", + ["Phuie"] = "pug", + ["Phukha"] = "phh", + ["Phuma"] = "ypm", + ["Phunoi"] = "pho", + ["Phuong"] = "phg", + ["Phupa"] = "ypp", + ["Phupha"] = "yph", + ["Phuthi"] = "bnt-phu", + ["Phuza"] = "ypz", + ["Piamatsina"] = "ptr", + ["Piame"] = "pin", + ["Piapoco"] = "pio", + ["Piaroa"] = "pid", + ["Picard"] = "pcd", + ["Pichinglis"] = "fpe", + ["Pichis Ashéninka"] = "cpu", + ["Pictish"] = "xpi", + ["Picuris"] = "nai-pic", + ["Pidgin Delaware"] = "dep", + ["Pidgin Iha"] = "ihb", + ["Pidgin Onin"] = "onx", + ["Piedmontese"] = "pms", + ["Pijao"] = "pij", + ["Pije"] = "piz", + ["Pijin"] = "pis", + ["Pilagá"] = "plg", + ["Pileni"] = "piv", + ["Pima Bajo"] = "pia", + ["Pimbwe"] = "piw", + ["Pinai-Hagahai"] = "pnn", + ["Pingelapese"] = "pif", + ["Pini"] = "pii", + ["Pinigura"] = "pnv", + ["Pinjarup"] = "pnj", + ["Pinji"] = "pic", + ["Pinotepa Nacional Mixtec"] = "mio", + ["Pintiini"] = "pti", + ["Pintupi-Luritja"] = "piu", + ["Pinyin"] = "pny", + ["Pipil"] = "ppl", + ["Pirahã"] = "myp", + ["Piratapuyo"] = "pir", + ["Pirlatapa"] = "bxi", + ["Piro"] = "pie", + ["Pirriya"] = "xpa", + ["Pisabo"] = "pig", + ["Pisaflores Tepehua"] = "tpp", + ["Piscataway"] = "psy", + ["Pisidian"] = "xps", + ["Pitcairn-Norfolk"] = "pih", + ["Pite Sami"] = "sje", + ["Piti"] = "pcn", + ["Pitjantjatjara"] = "pjt", + ["Pitta-Pitta"] = "pit", + ["Piu"] = "pix", + ["Piya-Kwonci"] = "piy", + ["Plains Apache"] = "apk", + ["Plains Cree"] = "crk", + ["Plains Indian Sign Language"] = "psd", + ["Plains Miwok"] = "pmw", + ["Plapo Krumen"] = "ktj", + ["Plautdietsch"] = "pdt", + ["Playero"] = "gob", + ["Pnar"] = "pbv", + ["Pochuri Naga"] = "npo", + ["Pochutec"] = "xpo", + ["Podoko"] = "pbi", + ["Pogolo"] = "poy", + ["Pohnpeian"] = "pon", + ["Poitevin-Saintongeais"] = "roa-poi", + ["Pokangá"] = "pok", + ["Poke"] = "pof", + ["Pol"] = "pmm", + ["Polabian"] = "pox", + ["Polci"] = "plj", + ["Polish"] = "pl", + ["Polish Sign Language"] = "pso", + ["Polonombauk"] = "plb", + ["Pom"] = "pmo", + ["Pomeranian"] = "zlw-pom", + ["Ponam"] = "ncc", + ["Pongu"] = "png", + ["Ponosakan"] = "pns", + ["Pontic Greek"] = "pnt", + ["Ponyo"] = "npg", + ["Poqomam"] = "poc", + ["Poqomchi'"] = "poh", + ["Porohanon"] = "prh", + ["Port Sandwich"] = "psw", + ["Port Sorell"] = "xpl", + ["Port Vato"] = "ptv", + ["Portuguese"] = "pt", + ["Portuguese Sign Language"] = "psr", + ["Potawatomi"] = "pot", + ["Potiguára"] = "pog", + ["Poumei Naga"] = "pmx", + ["Pouye"] = "bye", + ["Powari"] = "pwr", + ["Powhatan"] = "pim", + ["Poyanáwa"] = "pyn", + ["Prakrit"] = "inc-pra", + ["Prasuni"] = "prn", + ["Primitive Irish"] = "pgl", + ["Principense"] = "pre", + ["Proto-Abkhaz-Abaza"] = "cau-abz-pro", + ["Proto-Afroasiatic"] = "afa-pro", + ["Proto-Albanian"] = "sqj-pro", + ["Proto-Algic"] = "aql-pro", + ["Proto-Algonquian"] = "alg-pro", + ["Proto-Amuesha-Chamicuro"] = "awd-amc-pro", + ["Proto-Anatolian"] = "ine-ana-pro", + ["Proto-Apachean"] = "apa-pro", + ["Proto-Arawa"] = "auf-pro", + ["Proto-Arawak"] = "awd-pro", + ["Proto-Armenian"] = "hyx-pro", + ["Proto-Arnhem"] = "aus-arn-pro", + ["Proto-Aroid"] = "omv-aro-pro", + ["Proto-Aslian"] = "mkh-asl-pro", + ["Proto-Atayalic"] = "map-ata-pro", + ["Proto-Athabaskan"] = "ath-pro", + ["Proto-Atlantic-Congo"] = "alv-pro", + ["Proto-Austroasiatic"] = "aav-pro", + ["Proto-Austronesian"] = "map-pro", + ["Proto-Avaro-Andian"] = "cau-ava-pro", + ["Proto-Bahnaric"] = "mkh-ban-pro", + ["Proto-Balto-Slavic"] = "ine-bsl-pro", + ["Proto-Bantoid"] = "nic-bod-pro", + ["Proto-Bantu"] = "bnt-pro", + ["Proto-Basque"] = "euq-pro", + ["Proto-Batak"] = "btk-pro", + ["Proto-Be"] = "qfa-onb-pro", + ["Proto-Be-Tai"] = "qfa-bet-pro", + ["Proto-Benue-Congo"] = "nic-bco-pro", + ["Proto-Berber"] = "ber-pro", + ["Proto-Bodo-Garo"] = "tbq-bdg-pro", + ["Proto-Bongo-Bagirmi"] = "csu-bba-pro", + ["Proto-Boran"] = "sai-bor-pro", + ["Proto-Brythonic"] = "cel-bry-pro", + ["Proto-Bua"] = "alv-bua-pro", + ["Proto-Bungku-Tolaki"] = "poz-btk-pro", + ["Proto-Caddoan"] = "cdd-pro", + ["Proto-Cangin"] = "alv-cng-pro", + ["Proto-Celtic"] = "cel-pro", + ["Proto-Central Chadic"] = "cdc-cbm-pro", + ["Proto-Central Indo-Aryan"] = "inc-cen-pro", + ["Proto-Central Jê"] = "sai-cje-pro", + ["Proto-Central New South Wales"] = "aus-cww-pro", + ["Proto-Central Sudanic"] = "csu-pro", + ["Proto-Central Togo"] = "alv-gtm-pro", + ["Proto-Central-Eastern Malayo-Polynesian"] = "poz-cet-pro", + ["Proto-Cerrado"] = "sai-cer-pro", + ["Proto-Chadic"] = "cdc-pro", + ["Proto-Chamic"] = "cmc-pro", + ["Proto-Chatino"] = "omq-cha-pro", + ["Proto-Chibchan"] = "cba-pro", + ["Proto-Chimakuan"] = "chi-pro", + ["Proto-Chinookan"] = "nai-ckn-pro", + ["Proto-Chukotko-Kamchatkan"] = "qfa-cka-pro", + ["Proto-Chumash"] = "nai-chu-pro", + ["Proto-Circassian"] = "cau-cir-pro", + ["Proto-Cupan"] = "azc-cup-pro", + ["Proto-Cushitic"] = "cus-pro", + ["Proto-Daju"] = "sdv-daj-pro", + ["Proto-Daly"] = "aus-dal-pro", + ["Proto-Dardic"] = "inc-dar-pro", + ["Proto-Dargwa"] = "cau-drg-pro", + ["Proto-Dizoid"] = "omv-diz-pro", + ["Proto-Dravidian"] = "dra-pro", + ["Proto-Eastern Jebel"] = "sdv-eje-pro", + ["Proto-Eastern Malayo-Polynesian"] = "pqe-pro", + ["Proto-Eastern Oti-Volta"] = "nic-eov-pro", + ["Proto-Eastern Polynesian"] = "poz-pep-pro", + ["Proto-Edoid"] = "alv-edo-pro", + ["Proto-Eskimo"] = "esx-esk-pro", + ["Proto-Eskimo-Aleut"] = "esx-pro", + ["Proto-Fali"] = "alv-fli-pro", + ["Proto-Finnic"] = "fiu-fin-pro", + ["Proto-Gbe"] = "alv-gbe-pro", + ["Proto-Georgian-Zan"] = "ccs-gzn-pro", + ["Proto-Germanic"] = "gem-pro", + ["Proto-Grassfields"] = "nic-grf-pro", + ["Proto-Great Andamanese"] = "qfa-adm-pro", + ["Proto-Guang"] = "alv-gng-pro", + ["Proto-Gur"] = "nic-gur-pro", + ["Proto-Gurunsi"] = "nic-gns-pro", + ["Proto-Halmahera-Cenderawasih"] = "poz-hce-pro", + ["Proto-Heiban"] = "alv-hei-pro", + ["Proto-Hellenic"] = "grk-pro", + ["Proto-Hlai"] = "qfa-lic-pro", + ["Proto-Hmong"] = "hmn-pro", + ["Proto-Hmong-Mien"] = "hmx-pro", + ["Proto-Hrusish"] = "sit-hrs-pro", + ["Proto-Huitoto-Ocaina"] = "sai-hoc-pro", + ["Proto-Hurro-Urartian"] = "qfa-hur-pro", + ["Proto-Igboid"] = "alv-igb-pro", + ["Proto-Ijoid"] = "ijo-pro", + ["Proto-Indo-Aryan"] = "inc-pro", + ["Proto-Indo-European"] = "ine-pro", + ["Proto-Indo-Iranian"] = "iir-pro", + ["Proto-Inuit"] = "esx-inu-pro", + ["Proto-Iranian"] = "ira-pro", + ["Proto-Iroquoian"] = "iro-pro", + ["Proto-Italic"] = "itc-pro", + ["Proto-Iwaidjan"] = "aus-wdj-pro", + ["Proto-Japonic"] = "jpx-pro", + ["Proto-Jukunoid"] = "nic-jkn-pro", + ["Proto-Jê"] = "sai-jee-pro", + ["Proto-Kadu"] = "qfa-kad-pro", + ["Proto-Kalamian"] = "phi-kal-pro", + ["Proto-Kalapuyan"] = "nai-klp-pro", + ["Proto-Kam-Sui"] = "qfa-kms-pro", + ["Proto-Kampa"] = "awd-kmp-pro", + ["Proto-Karen"] = "kar-pro", + ["Proto-Kartvelian"] = "ccs-pro", + ["Proto-Katuic"] = "mkh-kat-pro", + ["Proto-Kham"] = "sit-kha-pro", + ["Proto-Khasian"] = "aav-khs-pro", + ["Proto-Khmeric"] = "mkh-kmr-pro", + ["Proto-Khmuic"] = "mkh-khm-pro", + ["Proto-Khoe"] = "khi-kho-pro", + ["Proto-Koman"] = "ssa-kom-pro", + ["Proto-Komisenian"] = "ira-kms-pro", + ["Proto-Korean"] = "qfa-kor-pro", + ["Proto-Kra"] = "qfa-kra-pro", + ["Proto-Kra-Dai"] = "qfa-tak-pro", + ["Proto-Kru"] = "kro-pro", + ["Proto-Kuki-Chin"] = "tbq-kuk-pro", + ["Proto-Kuliak"] = "ssa-klk-pro", + ["Proto-Kurdish"] = "ku-pro", + ["Proto-Kwa"] = "alv-kwa-pro", + ["Proto-Lalo"] = "tbq-lal-pro", + ["Proto-Lampungic"] = "poz-lgx-pro", + ["Proto-Lezghian"] = "cau-lzg-pro", + ["Proto-Lolo-Burmese"] = "tbq-lob-pro", + ["Proto-Loloish"] = "tbq-lol-pro", + ["Proto-Lower Cross River"] = "nic-lcr-pro", + ["Proto-Luish"] = "sit-luu-pro", + ["Proto-Maidun"] = "nai-mdu-pro", + ["Proto-Malayic"] = "poz-mly-pro", + ["Proto-Malayo-Chamic"] = "poz-mcm-pro", + ["Proto-Malayo-Polynesian"] = "poz-pro", + ["Proto-Malayo-Sumbawan"] = "poz-msa-pro", + ["Proto-Mande"] = "dmn-pro", + ["Proto-Mangbetu"] = "csu-maa-pro", + ["Proto-Masa"] = "cdc-mas-pro", + ["Proto-Mayan"] = "myn-pro", + ["Proto-Mazatec"] = "omq-maz-pro", + ["Proto-Medo-Parthian"] = "ira-mpr-pro", + ["Proto-Mien"] = "hmx-mie-pro", + ["Proto-Min"] = "zhx-min-pro", + ["Proto-Mixe-Zoque"] = "nai-miz-pro", + ["Proto-Mixtec"] = "omq-mxt-pro", + ["Proto-Mixtecan"] = "omq-mix-pro", + ["Proto-Mon-Khmer"] = "mkh-pro", + ["Proto-Mongolic"] = "xgn-pro", + ["Proto-Monic"] = "mkh-mnc-pro", + ["Proto-Mordvinic"] = "urj-mdv-pro", + ["Proto-Mumuye"] = "alv-mum-pro", + ["Proto-Munda"] = "mun-pro", + ["Proto-Munji-Yidgha"] = "ira-mny-pro", + ["Proto-Na-Dene"] = "xnd-pro", + ["Proto-Nahuan"] = "azc-nah-pro", + ["Proto-Nakh"] = "cau-nkh-pro", + ["Proto-Nawiki"] = "awd-nwk-pro", + ["Proto-Nguni"] = "bnt-ngu-pro", + ["Proto-Nicobarese"] = "aav-nic-pro", + ["Proto-Niger-Congo"] = "nic-pro", + ["Proto-Nilo-Saharan"] = "ssa-pro", + ["Proto-Nilotic"] = "sdv-nil-pro", + ["Proto-Norse"] = "gmq-pro", + ["Proto-North Caucasian"] = "ccn-pro", + ["Proto-North Sarawak"] = "poz-swa-pro", + ["Proto-Northeast Caucasian"] = "cau-nec-pro", + ["Proto-Northern Jê"] = "sai-nje-pro", + ["Proto-Northwest Caucasian"] = "cau-nwc-pro", + ["Proto-Nubian"] = "nub-pro", + ["Proto-Nuclear Polynesian"] = "poz-pnp-pro", + ["Proto-Numic"] = "azc-num-pro", + ["Proto-Nupoid"] = "alv-nup-pro", + ["Proto-Nuristani"] = "iir-nur-pro", + ["Proto-Nyima"] = "sdv-nyi-pro", + ["Proto-Nyulnyulan"] = "aus-nyu-pro", + ["Proto-Oceanic"] = "poz-oce-pro", + ["Proto-Ogoni"] = "nic-ogo-pro", + ["Proto-Omotic"] = "omv-pro", + ["Proto-Ongan"] = "qfa-ong-pro", + ["Proto-Ossetic"] = "os-pro", + ["Proto-Oti-Volta"] = "nic-ovo-pro", + ["Proto-Oto-Manguean"] = "omq-pro", + ["Proto-Oto-Pamean"] = "omq-otp-pro", + ["Proto-Otomi"] = "oto-otm-pro", + ["Proto-Otomian"] = "oto-pro", + ["Proto-Pakanic"] = "mkh-pkn-pro", + ["Proto-Palaungic"] = "mkh-pal-pro", + ["Proto-Pama-Nyungan"] = "aus-pam-pro", + ["Proto-Paresi-Waura"] = "awd-prw-pro", + ["Proto-Pathan"] = "ira-pat-pro", + ["Proto-Pearic"] = "mkh-pea-pro", + ["Proto-Permic"] = "urj-prm-pro", + ["Proto-Philippine"] = "phi-pro", + ["Proto-Plateau"] = "nic-plt-pro", + ["Proto-Plateau Penutian"] = "nai-plp-pro", + ["Proto-Pnar-Khasi-Lyngngam"] = "aav-pkl-pro", + ["Proto-Polynesian"] = "poz-pol-pro", + ["Proto-Pomo"] = "nai-pom-pro", + ["Proto-Rukai"] = "dru-pro", + ["Proto-Ryukyuan"] = "jpx-ryu-pro", + ["Proto-Saka"] = "xsc-sak-pro", + ["Proto-Saka-Wakhi"] = "xsc-skw-pro", + ["Proto-Salish"] = "sal-pro", + ["Proto-Samic"] = "smi-pro", + ["Proto-Samoyedic"] = "syd-pro", + ["Proto-Sanglechi-Ishkashimi"] = "ira-sgi-pro", + ["Proto-Sara"] = "csu-sar-pro", + ["Proto-Scythian"] = "xsc-pro", + ["Proto-Semitic"] = "sem-pro", + ["Proto-Shughni-Roshani"] = "ira-shr-pro", + ["Proto-Shughni-Yazghulami"] = "ira-shy-pro", + ["Proto-Shughni-Yazghulami-Munji"] = "ira-sym-pro", + ["Proto-Sino-Tibetan"] = "sit-pro", + ["Proto-Siouan"] = "sio-pro", + ["Proto-Siouan-Catawban"] = "nai-sca-pro", + ["Proto-Slavic"] = "sla-pro", + ["Proto-Sogdic"] = "ira-sgc-pro", + ["Proto-Songhay"] = "son-pro", + ["Proto-Sotho-Tswana"] = "bnt-sts-pro", + ["Proto-South Sulawesi"] = "poz-ssw-pro", + ["Proto-Southern Jê"] = "sai-sje-pro", + ["Proto-Southwestern Tai"] = "tai-swe-pro", + ["Proto-Sunda-Sulawesi"] = "poz-sus-pro", + ["Proto-Ta-Arawak"] = "awd-taa-pro", + ["Proto-Tai"] = "tai-pro", + ["Proto-Takic"] = "azc-tak-pro", + ["Proto-Taman"] = "sdv-tmn-pro", + ["Proto-Tani"] = "sit-tan-pro", + ["Proto-Tatic"] = "xme-ttc-pro", + ["Proto-Tocharian"] = "ine-toc-pro", + ["Proto-Totozoquean"] = "nai-tot-pro", + ["Proto-Trans-New Guinea"] = "ngf-pro", + ["Proto-Trique"] = "omq-tri-pro", + ["Proto-Tsezian"] = "cau-tsz-pro", + ["Proto-Tsimshianic"] = "nai-tsi-pro", + ["Proto-Tungusic"] = "tuw-pro", + ["Proto-Tupi-Guarani"] = "tup-gua-pro", + ["Proto-Tupian"] = "tup-pro", + ["Proto-Turkic"] = "trk-pro", + ["Proto-Ubangian"] = "nic-ubg-pro", + ["Proto-Ugric"] = "urj-ugr-pro", + ["Proto-Upper Cross River"] = "nic-ucr-pro", + ["Proto-Uralic"] = "urj-pro", + ["Proto-Utian"] = "nai-utn-pro", + ["Proto-Uto-Aztecan"] = "azc-pro", + ["Proto-Vietic"] = "mkh-vie-pro", + ["Proto-Volta-Congo"] = "nic-vco-pro", + ["Proto-West Germanic"] = "gmw-pro", + ["Proto-West Semitic"] = "sem-wes-pro", + ["Proto-Western Mande"] = "dmn-mdw-pro", + ["Proto-Witotoan"] = "sai-wit-pro", + ["Proto-Yeniseian"] = "qfa-yen-pro", + ["Proto-Yoruboid"] = "alv-yor-pro", + ["Proto-Yukaghir"] = "qfa-yuk-pro", + ["Proto-Yupik"] = "ypk-pro", + ["Proto-Zapotec"] = "omq-zpc-pro", + ["Proto-Zapotecan"] = "omq-zap-pro", + ["Proto-Zaza-Gorani"] = "ira-zgr-pro", + ["Providencia Sign Language"] = "prz", + ["Psikye"] = "kvj", + ["Puare"] = "pux", + ["Pudtol Atta"] = "atp", + ["Puebla Mazatec"] = "pbm", + ["Puelche"] = "pue", + ["Puerto Rican Sign Language"] = "psl", + ["Puimei Naga"] = "npu", + ["Puinave"] = "pui", + ["Puiron"] = "sit-prn", + ["Pukapukan"] = "pkp", + ["Pulabu"] = "pup", + ["Puluwat"] = "puw", + ["Puma"] = "pum", + ["Pumpokol"] = "xpm", + ["Pumé"] = "yae", + ["Punan Aput"] = "pud", + ["Punan Bah-Biau"] = "pna", + ["Punan Batu"] = "pnm", + ["Punan Merah"] = "puf", + ["Punan Merap"] = "puc", + ["Punan Tubu"] = "puj", + ["Punic"] = "xpu", + ["Punjabi"] = "pa", + ["Punu"] = "puu", + ["Puoc"] = "puo", + ["Puquina"] = "puq", + ["Puragi"] = "pru", + ["Purari"] = "iar", + ["Purepecha"] = "pua", + ["Puri"] = "prr", + ["Purik"] = "prx", + ["Purisimeño"] = "puy", + ["Puruborá"] = "pur", + ["Puruhá"] = "sai-prh", + ["Purukotó"] = "sai-pur", + ["Purum"] = "pub", + ["Putai"] = "mfl", + ["Putoh"] = "put", + ["Putukwam"] = "afe", + ["Puxian"] = "cpx", + ["Puyo-Paekche"] = "xpp", + ["Puyuma"] = "pyu", + ["Pwaamei"] = "pme", + ["Pwapwa"] = "pop", + ["Pyapun"] = "pcw", + ["Pye Krumen"] = "pye", + ["Pyemmairre"] = "xpb", + ["Pyen"] = "pyy", + ["Pykobjê"] = "sai-pyk", + ["Pyu"] = "pby", + ["Páez"] = "pbb", + ["Pááfang"] = "pfa", + ["Päri"] = "lkr", + ["Pémono"] = "pev", + ["Pévé"] = "lme", + ["Pökoot"] = "pko", + ["Q'anjob'al"] = "kjb", + ["Q'eqchi"] = "kek", + ["Qabiao"] = "laq", + ["Qaqet"] = "byx", + ["Qatabanian"] = "xqt", + ["Qau"] = "gqu", + ["Qila Muji"] = "ymq", + ["Qimant"] = "ahg", + ["Quapaw"] = "qua", + ["Quebec Sign Language"] = "fcs", + ["Quechua"] = "qu", + ["Quenya"] = "qya", + ["Querétaro Otomi"] = "otq", + ["Quetzaltepec Mixe"] = "pxm", + ["Queyu"] = "qvy", + ["Quiavicuzas Zapotec"] = "zpj", + ["Quileute"] = "qui", + ["Quimbaya"] = "sai-qmb", + ["Quinault"] = "qun", + ["Quinigua"] = "nai-qng", + ["Quinqui"] = "quq", + ["Quioquitani-Quierí Zapotec"] = "ztq", + ["Quiotepec Chinantec"] = "chq", + ["Quiripi"] = "qyp", + ["Quitemo"] = "sai-qtm", + ["Rabha"] = "rah", + ["Rabona"] = "sai-rab", + ["Rade"] = "rad", + ["Raetic"] = "xrr", + ["Raga"] = "lml", + ["Rahambuu"] = "raz", + ["Rajah Kabunsuwan Manobo"] = "mqk", + ["Rajasthani"] = "raj", + ["Rajbanshi"] = "rjs", + ["Raji"] = "rji", + ["Rajong"] = "rjg", + ["Rajput Garasia"] = "gra", + ["Rakahanga-Manihiki"] = "rkh", + ["Rakhine"] = "rki", + ["Ralte"] = "ral", + ["Rama"] = "rma", + ["Ramandi"] = "tks", + ["Ramanos"] = "sai-ram", + ["Ramoaaina"] = "rai", + ["Ramopa"] = "kjx", + ["Rampi"] = "lje", + ["Rana Tharu"] = "thr", + ["Rang"] = "rax", + ["Rangkas"] = "rgk", + ["Ranglong"] = "rnl", + ["Rao"] = "rao", + ["Rapa"] = "ray", + ["Rapa Nui"] = "rap", + ["Rapoisi"] = "kyx", + ["Rapting"] = "rpt", + ["Rara Bakati'"] = "lra", + ["Rarotongan"] = "rar", + ["Rasawa"] = "rac", + ["Ratagnon"] = "btn", + ["Ratahan"] = "rth", + ["Rathawi"] = "rtw", + ["Rathwi Bareli"] = "bgd", + ["Raute"] = "rau", + ["Ravula"] = "yea", + ["Rawa"] = "rwo", + ["Rawang"] = "raw", + ["Rawat"] = "jnl", + ["Rawo"] = "rwa", + ["Rayón Zoque"] = "zor", + ["Razajerdi"] = "rat", + ["Razihi"] = "rzh", + ["Reang"] = "ria", + ["Red Gelao"] = "gir", + ["Reel"] = "atu", + ["Rejang"] = "rej", + ["Rejang Kayan"] = "ree", + ["Reli"] = "rei", + ["Rema"] = "bow", + ["Rembarunga"] = "rmb", + ["Rembong"] = "reb", + ["Remo"] = "rem", + ["Remontado Agta"] = "agv", + ["Rempi"] = "rmp", + ["Remun"] = "lkj", + ["Rendille"] = "rel", + ["Rengao"] = "ren", + ["Rennellese"] = "mnv", + ["Repanbitip"] = "rpn", + ["Rer Bare"] = "rer", + ["Rerau"] = "rea", + ["Rerep"] = "pgk", + ["Reshe"] = "res", + ["Resígaro"] = "rgr", + ["Retta"] = "ret", + ["Reyesano"] = "rey", + ["Rhine Franconian"] = "gmw-rfr", + ["Riang"] = "ril", + ["Riantana"] = "ran", + ["Ribun"] = "rir", + ["Rigwe"] = "iri", + ["Rikbaktsa"] = "rkb", + ["Rincón Zapotec"] = "zar", + ["Ringgou"] = "rgu", + ["Ririo"] = "rri", + ["Ritarungo"] = "rit", + ["Riung"] = "riu", + ["Riverain Sango"] = "snj", + ["Rogo"] = "rod", + ["Rohingya"] = "rhg", + ["Roma"] = "rmm", + ["Romagnol"] = "rgn", + ["Romam"] = "rmx", + ["Romani"] = "rom", + ["Romani Greek"] = "rge", + ["Romanian"] = "ro", + ["Romanian Sign Language"] = "rms", + ["Romano-Serbian"] = "rsb", + ["Romanova"] = "rmv", + ["Romansch"] = "rm", + ["Romblomanon"] = "rol", + ["Rombo"] = "rof", + ["Romkun"] = "rmk", + ["Ron"] = "cla", + ["Ronga"] = "rng", + ["Rongga"] = "ror", + ["Rongmei Naga"] = "nbu", + ["Rongpo"] = "rnp", + ["Ronji"] = "roe", + ["Roon"] = "rnn", + ["Roria"] = "rga", + ["Roro"] = "rro", + ["Rotokas"] = "roo", + ["Rotuman"] = "rtm", + ["Roviana"] = "rug", + ["Ruching Palaung"] = "pce", + ["Rudbari"] = "rdb", + ["Rufiji"] = "rui", + ["Ruga"] = "ruh", + ["Rukai"] = "dru", + ["Rukiga"] = "cgg", + ["Ruma"] = "ruz", + ["Rumai Palaung"] = "rbb", + ["Rumu"] = "klq", + ["Runga"] = "rou", + ["Rungtu"] = "rtc", + ["Rungus"] = "drg", + ["Rungwa"] = "rnw", + ["Russenorsk"] = "crp-rsn", + ["Russian"] = "ru", + ["Russian Sign Language"] = "rsl", + ["Rusyn"] = "rue", + ["Rutul"] = "rut", + ["Ruuli"] = "ruc", + ["Ruwund"] = "rnd", + ["Rwa"] = "rwk", + ["Rwanda-Rundi"] = "rw", + ["Réunion Creole French"] = "rcf", + ["S'gaw Karen"] = "ksw", + ["Sa"] = "sax", + ["Sa'a"] = "apb", + ["Sa'ban"] = "snv", + ["Sa'och"] = "scq", + ["Saafi-Saafi"] = "sav", + ["Saam"] = "raq", + ["Saamia"] = "lsm", + ["Saanich"] = "str", + ["Saare"] = "uss", + ["Saaroa"] = "sxr", + ["Saba"] = "saa", + ["Sabaean"] = "xsa", + ["Sabah Bisaya"] = "bsy", + ["Sabah Malay"] = "msi", + ["Sabanê"] = "sae", + ["Sabaot"] = "spy", + ["Sabine"] = "sbv", + ["Sabir"] = "pml", + ["Sabu"] = "hvn", + ["Sabüm"] = "sbo", + ["Sacapulteco"] = "quv", + ["Sadri"] = "sck", + ["Saek"] = "skb", + ["Saep"] = "spd", + ["Safaitic"] = "sem-saf", + ["Safaliba"] = "saf", + ["Safeyoka"] = "apz", + ["Safwa"] = "sbk", + ["Sagala"] = "sbm", + ["Sagalla"] = "tga", + ["Sahaptin"] = "nai-spt", + ["Saho"] = "ssy", + ["Sahu"] = "saj", + ["Saisiyat"] = "xsy", + ["Sajau Basap"] = "sjb", + ["Sakachep"] = "sch", + ["Sakam"] = "skm", + ["Sakao"] = "sku", + ["Sakata"] = "skt", + ["Sake"] = "sak", + ["Sakirabiá"] = "skf", + ["Sakizaya"] = "szy", + ["Sala"] = "shq", + ["Salampasu"] = "slx", + ["Salar"] = "slr", + ["Salas"] = "sgu", + ["Salchuq"] = "slq", + ["Saleman"] = "sau", + ["Saliba (Colombia)"] = "slc", + ["Saliba (New Guinea)"] = "sbe", + ["Salinan"] = "sln", + ["Salt-Yui"] = "sll", + ["Saluan"] = "loe", + ["Salumá"] = "slj", + ["Salvadoran Lenca"] = "nai-sln", + ["Salvadoran Sign Language"] = "esn", + ["Sam"] = "snx", + ["Sama"] = "smd", + ["Samaritan Aramaic"] = "sam", + ["Samaritan Hebrew"] = "smp", + ["Samarokena"] = "tmj", + ["Samatao"] = "ysd", + ["Samba"] = "smx", + ["Sambali"] = "xsb", + ["Sambalpuri"] = "spv", + ["Sambe"] = "xab", + ["Samberigi"] = "ssx", + ["Samburu"] = "saq", + ["Samei"] = "smh", + ["Samo"] = "smq", + ["Samoan"] = "sm", + ["Samoan Plantation Pidgin"] = "cpe-spp", + ["Samogitian"] = "sgs", + ["Samosa"] = "swm", + ["Sampang"] = "rav", + ["Samre"] = "sxm", + ["Samtao"] = "stu", + ["Samvedi"] = "smv", + ["San Agustín Mixtepec Zapotec"] = "ztm", + ["San Baltazar Loxicha Zapotec"] = "zpx", + ["San Felipe Otlaltepec Popoloca"] = "pow", + ["San Jerónimo Tecóatl Mazatec"] = "maa", + ["San Juan Atzingo Popoloca"] = "poe", + ["San Juan Colorado Mixtec"] = "mjc", + ["San Juan Guelavía Zapotec"] = "zab", + ["San Juan Quiahije Chatino"] = "ctp-san", + ["San Juan Teita Mixtec"] = "xtj", + ["San Luís Temalacayuca Popoloca"] = "pps", + ["San Marcos Tlalcoyalco Popoloca"] = "pls", + ["San Martín Itunyoso Triqui"] = "trq", + ["San Miguel Creole French"] = "scf", + ["San Miguel Piedras Mixtec"] = "xtp", + ["San Miguel el Grande Mixtec"] = "mig", + ["San Pablo Güilá Zapotec"] = "ztu", + ["San Pedro Amuzgos Amuzgo"] = "azg", + ["San Pedro Quiatoni Zapotec"] = "zpf", + ["San Vicente Coatlán Zapotec"] = "zpt", + ["Sanapaná"] = "spn", + ["Sanaviron"] = "sai-san", + ["Sandawe"] = "sad", + ["Sanga (Congo)"] = "sng", + ["Sanga (Nigeria)"] = "xsn", + ["Sanggau"] = "scg", + ["Sangil"] = "snl", + ["Sangir"] = "sxn", + ["Sangisari"] = "sgr", + ["Sangkong"] = "sgk", + ["Sanglechi"] = "sgy", + ["Sango"] = "sg", + ["Sangtam Naga"] = "nsa", + ["Sangu (Gabon)"] = "snq", + ["Sangu (Tanzania)"] = "sbp", + ["Sani"] = "ysn", + ["Sanie"] = "ysy", + ["Saniyo-Hiyewe"] = "sny", + ["Sankaran Maninka"] = "msc", + ["Sansi"] = "ssi", + ["Sanskrit"] = "sa", + ["Santa Catarina Albarradas Zapotec"] = "ztn", + ["Santa Inés Ahuatempan Popoloca"] = "pca", + ["Santa Inés Yatzechi Zapotec"] = "zpn", + ["Santa Lucía Monteverde Mixtec"] = "mdv", + ["Santa María La Alta Nahuatl"] = "nhz", + ["Santa María Quiegolani Zapotec"] = "zpi", + ["Santa María Zacatepec Mixtec"] = "mza", + ["Santa Teresa Cora"] = "cok", + ["Santali"] = "sat", + ["Santiago Xanica Zapotec"] = "zpr", + ["Santo Domingo Albarradas Zapotec"] = "zas", + ["Sanumá"] = "xsu", + ["Sapa"] = "tys", + ["Saparua"] = "spr", + ["Sapará"] = "sai-sap", + ["Sapo"] = "krn", + ["Saponi"] = "spi", + ["Saposa"] = "sps", + ["Sapuan"] = "spu", + ["Sapé"] = "spc", + ["Sar"] = "mwm", + ["Sara"] = "sre", + ["Sara Kaba"] = "sbz", + ["Sara Kaba Deme"] = "kwg", + ["Sara Kaba Náà"] = "kwv", + ["Saraiki"] = "skr", + ["Saramaccan"] = "srm", + ["Sarangani Blaan"] = "bps", + ["Sarangani Manobo"] = "mbs", + ["Sarasira"] = "zsa", + ["Saraveca"] = "sar", + ["Sarcee"] = "srs", + ["Sardinian"] = "sc", + ["Sarikoli"] = "srh", + ["Sarli"] = "sdf", + ["Sartang"] = "onp", + ["Sarua"] = "swy", + ["Sarudu"] = "sdu", + ["Saruga"] = "sra", + ["Sasak"] = "sas", + ["Sasaru"] = "sxs", + ["Sassarese"] = "sdc", + ["Satawalese"] = "stw", + ["Saterland Frisian"] = "stq", + ["Sateré-Mawé"] = "mav", + ["Sathmar Swabian"] = "gmw-stm", + ["Saudi Arabian Sign Language"] = "sdl", + ["Sauraseni Apabhramsa"] = "inc-sap", + ["Sauraseni Prakrit"] = "psu", + ["Saurashtra"] = "saz", + ["Sauri"] = "srt", + ["Sause"] = "sao", + ["Sausi"] = "ssj", + ["Savi"] = "sdg", + ["Savosavo"] = "svs", + ["Sawai"] = "szw", + ["Saweru"] = "swr", + ["Sawi"] = "saw", + ["Sawila"] = "swt", + ["Sawriya Paharia"] = "mjt", + ["Saxwe Gbe"] = "sxw", + ["Saya"] = "say", + ["Sayula Popoluca"] = "pos", + ["Scanian"] = "gmq-scy", + ["Scots"] = "sco", + ["Scottish Gaelic"] = "gd", + ["Seba"] = "kdg", + ["Sebat Bet Gurage"] = "sgw", + ["Seberuang"] = "sbx", + ["Sebop"] = "sib", + ["Sebuyau"] = "snb", + ["Sechelt"] = "sec", + ["Sechura"] = "sai-sec", + ["Secoya"] = "sey", + ["Sedang"] = "sed", + ["Sedoa"] = "tvw", + ["Seenku"] = "sos", + ["Segai"] = "sge", + ["Segeju"] = "seg", + ["Seget"] = "sbg", + ["Sehwi"] = "sfw", + ["Seim"] = "sim", + ["Seimat"] = "ssg", + ["Seit-Kaitetu"] = "hik", + ["Sekani"] = "sek", + ["Sekapan"] = "skp", + ["Sekar"] = "skz", + ["Seke"] = "skj", + ["Sekele"] = "vaj", + ["Seki"] = "syi", + ["Seko Padang"] = "skx", + ["Seko Tengah"] = "sko", + ["Sekpele"] = "lip", + ["Selangor Sign Language"] = "kgi", + ["Selaru"] = "slu", + ["Selayar"] = "sly", + ["Selee"] = "snw", + ["Selepet"] = "spl", + ["Selk'nam"] = "ona", + ["Selkup"] = "sel", + ["Selonian"] = "sxl", + ["Selungai Murut"] = "slg", + ["Seluwasan"] = "sws", + ["Sema"] = "nsm", + ["Semai"] = "sea", + ["Semandang"] = "sdm", + ["Semaq Beri"] = "szc", + ["Sembakung Murut"] = "sbr", + ["Semelai"] = "sza", + ["Semimi"] = "etz", + ["Semnam"] = "ssm", + ["Semnani"] = "smy", + ["Sempan"] = "xse", + ["Sena"] = "seh", + ["Senara Sénoufo"] = "seq", + ["Senaya"] = "syn", + ["Sene"] = "sej", + ["Seneca"] = "see", + ["Sened"] = "sds", + ["Sengele"] = "szg", + ["Senggi"] = "snu", + ["Sengo"] = "spk", + ["Sengseng"] = "ssz", + ["Senhaja De Srair"] = "sjs", + ["Sensi"] = "sni", + ["Sentani"] = "set", + ["Senthang Chin"] = "sez", + ["Sentinelese"] = "std", + ["Sepa (Indonesia)"] = "spb", + ["Sepa (New Guinea)"] = "spe", + ["Sepen"] = "spm", + ["Sepik Iwam"] = "iws", + ["Sepik Mari"] = "mbx", + ["Sera"] = "sry", + ["Serbo-Croatian"] = "sh", + ["Sere"] = "swf", + ["Serer"] = "srr", + ["Seri"] = "sei", + ["Serili"] = "sve", + ["Seroa"] = "kqu", + ["Serrano"] = "ser", + ["Seru"] = "szd", + ["Serua"] = "srw", + ["Serudung Murut"] = "srk", + ["Serui-Laut"] = "seu", + ["Seta"] = "stf", + ["Setaman"] = "stm", + ["Seti"] = "sbi", + ["Severn Ojibwa"] = "ojs", + ["Sewa Bay"] = "sew", + ["Seychellois Creole"] = "crs", + ["Seze"] = "sze", + ["Sha"] = "scw", + ["Shabak"] = "sdb", + ["Shabo"] = "sbf", + ["Shahmirzadi"] = "srz", + ["Shahrudi"] = "shm", + ["Shall-Zwall"] = "sha", + ["Shama-Sambuga"] = "sqa", + ["Shamang"] = "xsh", + ["Shambala"] = "ksb", + ["Shan"] = "shn", + ["Shanenawa"] = "swo", + ["Shanga"] = "sho", + ["Shangzhai"] = "jih", + ["Shaozhou Tuhua"] = "zhx-sht", + ["Sharanahua"] = "mcd", + ["Shark Bay"] = "ssv", + ["Sharwa"] = "swq", + ["Shasta"] = "sht", + ["Shatt"] = "shj", + ["Shau"] = "sqh", + ["Shawnee"] = "sjw", + ["She"] = "shx", + ["Shebayo"] = "awd-she", + ["Shehri"] = "shv", + ["Shekkacho"] = "moy", + ["Sheko"] = "she", + ["Shelta"] = "sth", + ["Shendu"] = "shl", + ["Sheni"] = "scv", + ["Sherbro"] = "bun", + ["Sherdukpen"] = "sdp", + ["Sherpa"] = "xsr", + ["Sheshi Kham"] = "kip", + ["Shi"] = "shr", + ["Shihhi Arabic"] = "ssh", + ["Shiki"] = "gua", + ["Shilluk"] = "shk", + ["Shina"] = "scl", + ["Shinasha"] = "bwo", + ["Shipibo-Conibo"] = "shp", + ["Shixing"] = "sxg", + ["Sholaga"] = "sle", + ["Shom Peng"] = "sii", + ["Shona"] = "sn", + ["Shoo-Minda-Nye"] = "bcv", + ["Shor"] = "cjs", + ["Shoshone"] = "shh", + ["Shua"] = "shg", + ["Shuar"] = "jiv", + ["Shuba"] = "cbq", + ["Shughni"] = "sgh", + ["Shumashti"] = "sts", + ["Shumcho"] = "scu", + ["Shuswap"] = "shs", + ["Shuwa-Zamani"] = "ksa", + ["Shwai"] = "shw", + ["Shwe Palaung"] = "pll", + ["Sialum"] = "slw", + ["Siamou"] = "sif", + ["Sian"] = "spg", + ["Siane"] = "snp", + ["Siang"] = "sya", + ["Siar-Lak"] = "sjr", + ["Sibe"] = "nco", + ["Siberian Tatar"] = "sty", + ["Sibu Melanau"] = "sdx", + ["Sicanian"] = "sxc", + ["Sicel"] = "scx", + ["Sichuan Yi"] = "ii", + ["Sicilian"] = "scn", + ["Siculo-Arabic"] = "sqr", + ["Sidamo"] = "sid", + ["Sidetic"] = "xsd", + ["Sie"] = "erg", + ["Sierra Leone Sign Language"] = "sgx", + ["Sierra Negra Nahuatl"] = "nsu", + ["Sierra de Juárez Zapotec"] = "zaa", + ["Sighu"] = "sxe", + ["Sihan"] = "snr", + ["Sika"] = "ski", + ["Sikaiana"] = "sky", + ["Sikaritai"] = "tty", + ["Sikiana"] = "sik", + ["Sikkimese"] = "sip", + ["Sikule"] = "skh", + ["Sila"] = "slt", + ["Silacayoapan Mixtec"] = "mks", + ["Sileibi"] = "sbq", + ["Silesian"] = "szl", + ["Silimo"] = "wul", + ["Siliput"] = "mkc", + ["Silopi"] = "xsp", + ["Silt'e"] = "stv", + ["Simaa"] = "sie", + ["Simalungun Batak"] = "bts", + ["Simba"] = "sbw", + ["Simbali"] = "smg", + ["Simbari"] = "smb", + ["Simbo"] = "sbb", + ["Simeku"] = "smz", + ["Simeulue"] = "smr", + ["Simte"] = "smt", + ["Sinacantán"] = "nai-sin", + ["Sinagen"] = "siu", + ["Sinasina"] = "sst", + ["Sinaugoro"] = "snc", + ["Sindarin"] = "sjn", + ["Sindhi"] = "sd", + ["Sindhi Bhil"] = "sbn", + ["Sindihui Mixtec"] = "xts", + ["Singa"] = "sgm", + ["Singapore Sign Language"] = "sls", + ["Singpho"] = "sgp", + ["Sinhalese"] = "si", + ["Sinicahua Mixtec"] = "xti", + ["Sininkere"] = "skq", + ["Sinte Romani"] = "rmo", + ["Sinyar"] = "sys", + ["Sinúfana"] = "sai-sin", + ["Sio"] = "xsi", + ["Siona"] = "snn", + ["Sipakapense"] = "qum", + ["Sira"] = "swj", + ["Siraya"] = "fos", + ["Sirenik"] = "ysr", + ["Siri"] = "sir", + ["Siriano"] = "sri", + ["Sirionó"] = "srq", + ["Sirmauri"] = "srx", + ["Siroi"] = "ssd", + ["Sissala"] = "sld", + ["Sissano"] = "sso", + ["Situ"] = "sit-sit", + ["Siuslaw"] = "sis", + ["Sivandi"] = "siy", + ["Siwai"] = "siw", + ["Siwi"] = "siz", + ["Siwu"] = "akp", + ["Siyin Chin"] = "csy", + ["Skagit"] = "ska", + ["Skalvian"] = "svx", + ["Ske"] = "ske", + ["Skepi Creole Dutch"] = "skw", + ["Skolt Sami"] = "sms", + ["Skou"] = "skv", + ["Slavey"] = "den", + ["Slavomolisano"] = "svm", + ["Slovak"] = "sk", + ["Slovakian Sign Language"] = "svk", + ["Slovene"] = "sl", + ["Slovincian"] = "zlw-slv", + ["Small Flowery Miao"] = "sfm", + ["Smärky Kanum"] = "kxq", + ["Snohomish"] = "sno", + ["So'a"] = "ssq", + ["Sobei"] = "sob", + ["Sochiapam Chinantec"] = "cso", + ["Soga"] = "xog", + ["Sogdian"] = "sog", + ["Sok"] = "skk", + ["Sokna"] = "swn", + ["Soko"] = "soc", + ["Sokoro"] = "sok", + ["Solano"] = "xso", + ["Soli"] = "sby", + ["Solon"] = "tuw-sol", + ["Solong"] = "aaw", + ["Solos"] = "sol", + ["Som"] = "smc", + ["Somali"] = "so", + ["Somba-Siawari"] = "bmu", + ["Somra"] = "ntx", + ["Somrai"] = "sor", + ["Somray"] = "smu", + ["Somyev"] = "kgt", + ["Sonaga"] = "ysg", + ["Sonde"] = "shc", + ["Songe"] = "sop", + ["Songlai Chin"] = "csj", + ["Songomeno"] = "soe", + ["Songoora"] = "sod", + ["Sonha"] = "soi", + ["Sonia"] = "siq", + ["Soninke"] = "snk", + ["Sonsorolese"] = "sov", + ["Soo"] = "teu", + ["Sop"] = "urw", + ["Soqotri"] = "sqt", + ["Sora"] = "srb", + ["Sori-Harengan"] = "sbh", + ["Sorkhei"] = "sqo", + ["Sorothaptic"] = "sxo", + ["Sorsogon Ayta"] = "ays", + ["Sos Kundi"] = "sdk", + ["Sota Kanum"] = "krz", + ["Sotho"] = "st", + ["Sou"] = "sqq", + ["South African Sign Language"] = "sfs", + ["South Awyu"] = "aws", + ["South Boma"] = "bnt-sbo", + ["South Central Banda"] = "lnl", + ["South Central Dinka"] = "dib", + ["South Efate"] = "erk", + ["South Fali"] = "fal", + ["South Giziga"] = "giz", + ["South Lembata"] = "lmf", + ["South Levantine Arabic"] = "ajp", + ["South Marquesan"] = "mqm", + ["South Muyu"] = "kts", + ["South Nuaulu"] = "nxl", + ["South Picene"] = "spx", + ["South Slavey"] = "xsl", + ["South Tairora"] = "omw", + ["South Ucayali Ashéninka"] = "cpy", + ["South Watut"] = "mcy", + ["Southeast Ambrym"] = "tvk", + ["Southeast Babar"] = "vbb", + ["Southeast Ijo"] = "ijs", + ["Southeast Pashayi"] = "psi", + ["Southeast Tasmanian"] = "xpf", + ["Southeastern Dinka"] = "dks", + ["Southeastern Ixtlán Zapotec"] = "zpd", + ["Southeastern Kolami"] = "nit", + ["Southeastern Nochixtlán Mixtec"] = "mxy", + ["Southeastern Pomo"] = "pom", + ["Southeastern Puebla Nahuatl"] = "npl", + ["Southeastern Tarahumara"] = "tcu", + ["Southeastern Tepehuan"] = "stp", + ["Southern Alta"] = "agy", + ["Southern Altai"] = "alt", + ["Southern Amami-Oshima"] = "ams", + ["Southern Bai"] = "bfs", + ["Southern Birifor"] = "biv", + ["Southern Bobo"] = "bwq", + ["Southern Bontoc"] = "obk", + ["Southern Carrier"] = "caf", + ["Southern Catanduanes Bicolano"] = "bln", + ["Southern Dagaare"] = "dga", + ["Southern East Cree"] = "crj", + ["Southern Ghale"] = "ghe", + ["Southern Grebo"] = "grj", + ["Southern Guiyang Hmong"] = "hmy", + ["Southern Haida"] = "hax", + ["Southern Hindko"] = "hnd", + ["Southern Kalapuya"] = "sxk", + ["Southern Kalinga"] = "ksc", + ["Southern Kam"] = "kmc", + ["Southern Kissi"] = "kss", + ["Southern Kiwai"] = "kjd", + ["Southern Kurdish"] = "sdh", + ["Southern Lolopo"] = "ysp", + ["Southern Lorung"] = "lrr", + ["Southern Luri"] = "luz", + ["Southern Ma'di"] = "snm", + ["Southern Mashan Hmong"] = "hma", + ["Southern Mnong"] = "mnn", + ["Southern Muji"] = "ymc", + ["Southern Ndebele"] = "nr", + ["Southern Ngbandi"] = "nbw", + ["Southern Nicobarese"] = "nik", + ["Southern Nisu"] = "nsd", + ["Southern Nuni"] = "nnw", + ["Southern Ohlone"] = "css", + ["Southern One"] = "osu", + ["Southern Pame"] = "pmz", + ["Southern Pomo"] = "peq", + ["Southern Puebla Mixtec"] = "mit", + ["Southern Puget Sound Salish"] = "slh", + ["Southern Pumi"] = "pmj", + ["Southern Qiandong Miao"] = "hms", + ["Southern Qiang"] = "qxs", + ["Southern Rengma Naga"] = "nre", + ["Southern Rincon Zapotec"] = "zsr", + ["Southern Roglai"] = "rgs", + ["Southern Sama"] = "ssb", + ["Southern Sami"] = "sma", + ["Southern Samo"] = "sbd", + ["Southern Sierra Miwok"] = "skd", + ["Southern Thai"] = "sou", + ["Southern Tidong"] = "itd", + ["Southern Tiwa"] = "tix", + ["Southern Toussian"] = "wib", + ["Southern Tujia"] = "tjs", + ["Southern Tutchone"] = "tce", + ["Southern Valley Yokuts"] = "nai-svy", + ["Southern Yukaghir"] = "yux", + ["Southwest Gbaya"] = "gso", + ["Southwest Palawano"] = "plv", + ["Southwest Pashayi"] = "psh", + ["Southwest Tanna"] = "nwi", + ["Southwestern Bontoc"] = "vbk", + ["Southwestern Dinka"] = "dik", + ["Southwestern Fars"] = "fay", + ["Southwestern Guiyang Hmong"] = "hmg", + ["Southwestern Huishui Hmong"] = "hmh", + ["Southwestern Nisu"] = "nsv", + ["Southwestern Tamang"] = "tsf", + ["Southwestern Tarahumara"] = "twr", + ["Southwestern Tepehuan"] = "tla", + ["Southwestern Tlaxiaco Mixtec"] = "meh", + ["Sowa"] = "sww", + ["Sowanda"] = "sow", + ["Soyaltepec Mazatec"] = "vmp", + ["Soyaltepec Mixtec"] = "vmq", + ["Spanish"] = "es", + ["Spanish Sign Language"] = "ssp", + ["Spiti Bhoti"] = "spt", + ["Spokane"] = "spo", + ["Squamish"] = "squ", + ["Sranan Tongo"] = "srn", + ["Sri Lankan Creole Malay"] = "sci", + ["Sri Lankan Sign Language"] = "sqs", + ["Stod Bhoti"] = "sbu", + ["Stoney"] = "sto", + ["Suabo"] = "szp", + ["Suarmin"] = "seo", + ["Suau"] = "swp", + ["Suba"] = "sxb", + ["Suba-Simbiti"] = "ssc", + ["Subi"] = "xsj", + ["Subiya"] = "sbs", + ["Subtiaba"] = "sut", + ["Sudanese Arabic"] = "apd", + ["Sudest"] = "tgo", + ["Sudovian"] = "xsv", + ["Suena"] = "sue", + ["Suga"] = "sgi", + ["Suganga"] = "sug", + ["Sugut Dusun"] = "kzs", + ["Sui"] = "swi", + ["Suki"] = "sui", + ["Suku"] = "sub", + ["Sukuma"] = "suk", + ["Sukur"] = "syk", + ["Sukurum"] = "zsu", + ["Sula"] = "szn", + ["Sulka"] = "sua", + ["Sulod"] = "srg", + ["Sulung"] = "suv", + ["Suma"] = "sqm", + ["Sumariup"] = "siv", + ["Sumau"] = "six", + ["Sumbawa"] = "smw", + ["Sumbwa"] = "suw", + ["Sumerian"] = "sux", + ["Sumtu Chin"] = "csv", + ["Sunam"] = "ssk", + ["Sundanese"] = "su", + ["Sunum"] = "ymn", + ["Sunwar"] = "suz", + ["Suoy"] = "syo", + ["Supyire"] = "spp", + ["Sur"] = "tdl", + ["Surbakhal"] = "sbj", + ["Suri"] = "suq", + ["Surigaonon"] = "sgd", + ["Surjapuri"] = "sjp", + ["Sursurunga"] = "sgz", + ["Suruahá"] = "swx", + ["Surubu"] = "sde", + ["Suruí"] = "sru", + ["Suruí Do Pará"] = "mdz", + ["Susquehannock"] = "sqn", + ["Susu"] = "sus", + ["Susuami"] = "ssu", + ["Suundi"] = "sdj", + ["Suwawa"] = "swu", + ["Suyá"] = "suy", + ["Svan"] = "sva", + ["Swabian"] = "swg", + ["Swahili"] = "sw", + ["Swampy Cree"] = "csw", + ["Swazi"] = "ss", + ["Swedish"] = "sv", + ["Swedish Sign Language"] = "swl", + ["Swiss-French Sign Language"] = "ssr", + ["Swiss-German Sign Language"] = "sgg", + ["Swiss-Italian Sign Language"] = "slf", + ["Swo"] = "sox", + ["Syenara Senoufo"] = "shz", + ["Sylheti"] = "syl", + ["Sácata"] = "sai-sac", + ["São Paulo Kaingáng"] = "zkp", + ["Sãotomense"] = "cri", + ["Sìcìté Sénoufo"] = "sep", + ["Sô"] = "sss", + ["T'en"] = "tct", + ["Taabwa"] = "tap", + ["Tabaa Zapotec"] = "zat", + ["Tabancale"] = "sai-tab", + ["Tabaru"] = "tby", + ["Tabasaran"] = "tab", + ["Tabasco Chontal"] = "chf", + ["Tabasco Nahuatl"] = "nhc", + ["Tabasco Zoque"] = "zoq", + ["Tabla"] = "tnm", + ["Tabo"] = "knv", + ["Tabriak"] = "tzx", + ["Tacahua Mixtec"] = "xtt", + ["Tacana"] = "tna", + ["Tachawit"] = "shy", + ["Tadaksahak"] = "dsq", + ["Tadyawan"] = "tdy", + ["Tae'"] = "rob", + ["Tafi"] = "tcd", + ["Tafreshi"] = "xme-taf", + ["Tagabawa"] = "bgs", + ["Tagakaulu Kalagan"] = "klg", + ["Tagal Murut"] = "mvv", + ["Tagalog"] = "tl", + ["Tagargrent"] = "oua", + ["Tagbanwa"] = "tbw", + ["Tagbu"] = "tbm", + ["Tagdal"] = "tda", + ["Tagish"] = "tgx", + ["Tagoi"] = "tag", + ["Tagwana Senoufo"] = "tgw", + ["Tahaggart Tamahaq"] = "thv", + ["Tahitian"] = "ty", + ["Tahltan"] = "tht", + ["Tai"] = "taw", + ["Tai Daeng"] = "tyr", + ["Tai Dam"] = "blt", + ["Tai Do"] = "tyj", + ["Tai Dón"] = "twh", + ["Tai Hang Tong"] = "thc", + ["Tai Hongjin"] = "tiz", + ["Tai Loi"] = "tlq", + ["Tai Long"] = "thi", + ["Tai Mène"] = "tmp", + ["Tai Nüa"] = "tdd", + ["Tai Pao"] = "tpo", + ["Tai Thanh"] = "tmm", + ["Tai Ya"] = "cuu", + ["Taiap"] = "gpn", + ["Taikat"] = "aos", + ["Taimyr Pidgin Russian"] = "crp-tpr", + ["Tainae"] = "ago", + ["Tairuma"] = "uar", + ["Taishanese"] = "zhx-tai", + ["Taita"] = "dav", + ["Taivoan"] = "tvx", + ["Taiwan Sign Language"] = "tss", + ["Taje"] = "pee", + ["Tajik"] = "tg", + ["Tajiki Arabic"] = "abh", + ["Tajio"] = "tdj", + ["Tajuasohn"] = "tja", + ["Takelma"] = "tkm", + ["Takia"] = "tbc", + ["Takka Apabhramsa"] = "inc-tak", + ["Takua"] = "tkz", + ["Takuu"] = "nho", + ["Takwane"] = "tke", + ["Tal"] = "tal", + ["Tala"] = "tak", + ["Talaud"] = "tld", + ["Taliabu"] = "tlv", + ["Talieng"] = "tdf", + ["Talinga-Bwisi"] = "tlj", + ["Talise"] = "tlr", + ["Tallán"] = "sai-tal", + ["Talodi"] = "tlo", + ["Taloki"] = "tlk", + ["Talondo'"] = "tln", + ["Talossan"] = "tzl", + ["Talu"] = "yta", + ["Talysh"] = "tly", + ["Tama (Chad)"] = "tma", + ["Tama (Colombia)"] = "ten", + ["Tamagario"] = "tcg", + ["Tamambo"] = "mla", + ["Taman (Burma)"] = "tcl", + ["Taman (Indonesia)"] = "tmn", + ["Tamanaku"] = "tmz", + ["Tamasheq"] = "taq", + ["Tamazola Mixtec"] = "vmx", + ["Tambas"] = "tdk", + ["Tambora"] = "xxt", + ["Tambotalo"] = "tls", + ["Tambunan Dusun"] = "kzt", + ["Tami"] = "tmy", + ["Tamil"] = "ta", + ["Tamki"] = "tax", + ["Tamnim Citak"] = "tml", + ["Tampias Lobu"] = "low", + ["Tampuan"] = "tpu", + ["Tampulma"] = "tpm", + ["Tanacross"] = "tcb", + ["Tanahmerah"] = "tcm", + ["Tanapag"] = "tpv", + ["Tandaganon"] = "tgn", + ["Tandia"] = "tni", + ["Tanema"] = "tnx", + ["Tangale"] = "tan", + ["Tangam"] = "sit-tgm", + ["Tangchangya"] = "tnv", + ["Tanggu"] = "tgu", + ["Tangkhul Naga"] = "nmf", + ["Tangko"] = "tkx", + ["Tanglang"] = "ytl", + ["Tangoa"] = "tgp", + ["Tangsa"] = "nst", + ["Tanguat"] = "tbs", + ["Tangut"] = "txg", + ["Tanimbili"] = "tbe", + ["Tanimuca-Retuarã"] = "tnc", + ["Tanjijili"] = "uji", + ["Tanudan Kalinga"] = "kml", + ["Tanzanian Sign Language"] = "tza", + ["Taos"] = "twf", + ["Tapachultec"] = "nai-tap", + ["Taparita"] = "sai-tpr", + ["Tapayuna"] = "sai-tap", + ["Tapeba"] = "tbb", + ["Tapei"] = "afp", + ["Tapieté"] = "tpj", + ["Tapirapé"] = "taf", + ["Tar Gula"] = "kcm", + ["Tara Baka"] = "bdh", + ["Tarairiú"] = "sai-trr", + ["Tarantino"] = "roa-tar", + ["Tarao"] = "tro", + ["Taraon"] = "mhu", + ["Tareng"] = "tgr", + ["Tariana"] = "tae", + ["Tarifit"] = "rif", + ["Tarjumo"] = "txj", + ["Tarok"] = "yer", + ["Taroko"] = "trv", + ["Tarpia"] = "tpf", + ["Tartessian"] = "txr", + ["Taruma"] = "tdm", + ["Tasawaq"] = "twq", + ["Tashelhit"] = "shi", + ["Tasmanian"] = "xtz", + ["Tasmate"] = "tmt", + ["Tat"] = "ttt", + ["Tataltepec Chatino"] = "cta", + ["Tatana"] = "txx", + ["Tatar"] = "tt", + ["Tataviam"] = "azc-tat", + ["Tatuyo"] = "tav", + ["Tauade"] = "ttd", + ["Taulil"] = "tuh", + ["Taungyo"] = "tco", + ["Taupota"] = "tpa", + ["Tause"] = "tad", + ["Taushiro"] = "trr", + ["Tausug"] = "tsg", + ["Tauya"] = "tya", + ["Taveta"] = "tvs", + ["Tavoyan"] = "tvn", + ["Tavringer Romani"] = "rmu", + ["Tawala"] = "tbo", + ["Tawallammat Tamajaq"] = "ttq", + ["Tawandê"] = "xtw", + ["Tawang Monpa"] = "twm", + ["Tawasa"] = "nai-taw", + ["Taworta"] = "tbp", + ["Tawoyan"] = "twy", + ["Tawr Chin"] = "tcp", + ["Tay Boi"] = "tas", + ["Tay Khang"] = "tnu", + ["Tayabas Ayta"] = "ayy", + ["Tayart Tamajeq"] = "thz", + ["Taymanitic"] = "sem-tay", + ["Tayo"] = "cks", + ["Taznatit"] = "grr", + ["Taíno"] = "tnq", + ["Tboli"] = "tbl", + ["Tchitchege"] = "tck", + ["Tchumbuli"] = "bqa", + ["Te'un"] = "tve", + ["Teanu"] = "tkw", + ["Tebul Sign Language"] = "tsy", + ["Tebul Ure Dogon"] = "dtu", + ["Tecpatlán Totonac"] = "tcw", + ["Tedaga"] = "tuq", + ["Tedim Chin"] = "ctd", + ["Tee"] = "tkq", + ["Tefaro"] = "tfo", + ["Tegali"] = "ras", + ["Tehit"] = "kps", + ["Tehuelche"] = "teh", + ["Teiwa"] = "twe", + ["Tejalapan Zapotec"] = "ztt", + ["Teke-Fuumu"] = "ifm", + ["Teke-Kukuya"] = "kkw", + ["Teke-Laali"] = "lli", + ["Teke-Tege"] = "teg", + ["Teke-Tsaayi"] = "tyi", + ["Teke-Tyee"] = "tyx", + ["Tektiteko"] = "ttc", + ["Tela-Masbuar"] = "tvm", + ["Telefol"] = "tlf", + ["Telugu"] = "te", + ["Teluti"] = "tlt", + ["Tem"] = "kdh", + ["Temacine Tamazight"] = "tjo", + ["Temascaltepec Nahuatl"] = "nhv", + ["Tembé"] = "tqb", + ["Teme"] = "tdo", + ["Temein"] = "teq", + ["Temi"] = "soz", + ["Temiar"] = "tea", + ["Temne"] = "tem", + ["Temoaya Otomi"] = "ott", + ["Temoq"] = "tmo", + ["Tempasuk Dusun"] = "tdu", + ["Ten'edn"] = "tnz", + ["Tenango Otomi"] = "otn", + ["Tene Kan Dogon"] = "dtk", + ["Tenggarong Kutai Malay"] = "vkt", + ["Tengger"] = "tes", + ["Tenharim"] = "pah", + ["Tenino"] = "tqn", + ["Tenis"] = "tns", + ["Tennet"] = "tex", + ["Teochew"] = "zhx-teo", + ["Teojomulco Chatino"] = "omq-teo", + ["Teop"] = "tio", + ["Teor"] = "tev", + ["Tepecano"] = "tep", + ["Tepetotutla Chinantec"] = "cnt", + ["Tepeuxila Cuicatec"] = "cux", + ["Tepinapa Chinantec"] = "cte", + ["Tepo Krumen"] = "ted", + ["Teposcolula Mixtec"] = "omq-tel", + ["Tequistlatec"] = "nai-teq", + ["Ter Sami"] = "sjt", + ["Tera"] = "ttr", + ["Terebu"] = "trb", + ["Terei"] = "buo", + ["Tereno"] = "ter", + ["Teressa"] = "tef", + ["Tereweng"] = "twg", + ["Teribe"] = "tfr", + ["Terik"] = "tec", + ["Termanu"] = "twu", + ["Ternate"] = "tft", + ["Ternateño"] = "tmg", + ["Tese"] = "keg", + ["Teshenawa"] = "twc", + ["Tetela"] = "tll", + ["Tetelcingo Nahuatl"] = "nhg", + ["Tetete"] = "teb", + ["Tetserret"] = "tez", + ["Tetum"] = "tet", + ["Tetun Dili"] = "tdt", + ["Teushen"] = "sai-teu", + ["Teutila Cuicatec"] = "cut", + ["Tewa"] = "tew", + ["Texcatepec Otomi"] = "otx", + ["Texistepec Popoluca"] = "poq", + ["Texmelucan Zapotec"] = "zpz", + ["Tezoatlán Mixtec"] = "mxb", + ["Tha"] = "thy", + ["Thachanadan"] = "thn", + ["Thado Chin"] = "tcz", + ["Thai"] = "th", + ["Thai Sign Language"] = "tsq", + ["Thai Song"] = "soa", + ["Thaiphum Chin"] = "cth", + ["Thakali"] = "ths", + ["Thamudic"] = "sem-tha", + ["Thangal Naga"] = "nki", + ["Thangmi"] = "thf", + ["Thao"] = "ssf", + ["Tharaka"] = "thk", + ["Tharrgari"] = "dhr", + ["Thavung"] = "thm", + ["Thawa"] = "xtv", + ["Tho"] = "tou", + ["Thompson"] = "thp", + ["Thopho"] = "ytp", + ["Thracian"] = "txh", + ["Thu Lao"] = "tyl", + ["Thulung"] = "tdh", + ["Thurawal"] = "tbh", + ["Thuri"] = "thu", + ["Tiagbamrin Aizi"] = "ahi", + ["Tiale"] = "mnl", + ["Tiang"] = "tbj", + ["Tibea"] = "ngy", + ["Tibetan"] = "bo", + ["Ticuna"] = "tca", + ["Tidaá Mixtec"] = "mtx", + ["Tidikelt Tamazight"] = "tia", + ["Tidore"] = "tvo", + ["Tiemacèwè Bozo"] = "boo", + ["Tiene"] = "tii", + ["Tifal"] = "tif", + ["Tigak"] = "tgc", + ["Tigon Mbembe"] = "nza", + ["Tigre"] = "tig", + ["Tigrinya"] = "ti", + ["Tii"] = "txq", + ["Tijaltepec Mixtec"] = "xtl", + ["Tikar"] = "tik", + ["Tikopia"] = "tkp", + ["Tilapa Otomi"] = "otl", + ["Tillamook"] = "til", + ["Tilquiapan Zapotec"] = "zts", + ["Tilung"] = "tij", + ["Tima"] = "tms", + ["Timbe"] = "tim", + ["Timor Pidgin"] = "tvy", + ["Timote"] = "sai-tim", + ["Timucua"] = "tjm", + ["Timugon Murut"] = "tih", + ["Tinani"] = "lbf", + ["Tindi"] = "tin", + ["Tingui-Boto"] = "tgv", + ["Tinigua"] = "tit", + ["Tinoc Kallahan"] = "tne", + ["Tinputz"] = "tpz", + ["Tipai"] = "nai-tip", + ["Tippera"] = "tpe", + ["Tira"] = "tic", + ["Tirahi"] = "tra", + ["Tiranige Diga Dogon"] = "tde", + ["Tircul"] = "pyx", + ["Tiri"] = "cir", + ["Tiruray"] = "tiy", + ["Tita"] = "tdq", + ["Titan"] = "ttv", + ["Tiv"] = "tiv", + ["Tiwa"] = "lax", + ["Tiwi"] = "tiw", + ["Tiéfo"] = "tiq", + ["Tiéyaxo Bozo"] = "boz", + ["Tjurruru"] = "tju", + ["Tlachichilco Tepehua"] = "tpt", + ["Tlacoapa Me'phaa"] = "tpl", + ["Tlacoatzintepec Chinantec"] = "ctl", + ["Tlacolulita Zapotec"] = "zpk", + ["Tlahuica"] = "ocu", + ["Tlahuitoltepec Mixe"] = "mxp", + ["Tlamacazapa Nahuatl"] = "nuz", + ["Tlazoyaltepec Mixtec"] = "mqh", + ["Tlingit"] = "tli", + ["To"] = "toz", + ["To'abaita"] = "mlu", + ["Toaripi"] = "tqo", + ["Toba"] = "tob", + ["Toba Batak"] = "bbc", + ["Toba-Maskoy"] = "tmf", + ["Tobagonian Creole English"] = "tgh", + ["Tobanga"] = "tng", + ["Tobati"] = "tti", + ["Tobelo"] = "tlb", + ["Tobian"] = "tox", + ["Tobilung"] = "tgb", + ["Tobo"] = "tbv", + ["Tocantins Asurini"] = "asu", + ["Tocharian A"] = "xto", + ["Tocharian B"] = "txb", + ["Tocho"] = "taz", + ["Toda"] = "tcx", + ["Todrah"] = "tdr", + ["Tofa"] = "kim", + ["Tofanma"] = "tlg", + ["Tofin Gbe"] = "tfi", + ["Togbo-Vara Banda"] = "tor", + ["Togoyo"] = "tgy", + ["Tojolabal"] = "toj", + ["Tok Pisin"] = "tpi", + ["Toka-Leya"] = "dov", + ["Tokano"] = "zuh", + ["Tokelauan"] = "tkl", + ["Toki Pona"] = "art-top", + ["Toku-No-Shima"] = "tkn", + ["Tol"] = "jic", + ["Tolai"] = "ksd", + ["Tolaki"] = "lbw", + ["Tolomako"] = "tlm", + ["Tolowa"] = "tol", + ["Toma"] = "tod", + ["Tomadino"] = "tdi", + ["Tombelala"] = "ttp", + ["Tombonuo"] = "txa", + ["Tombulu"] = "tom", + ["Tomini"] = "txm", + ["Tommeginne"] = "xpv", + ["Tommo So"] = "dto", + ["Tomo Kan Dogon"] = "dtm", + ["Tomoip"] = "tqp", + ["Tondano"] = "tdn", + ["Tonga (Malawi)"] = "tog", + ["Tonga (Mozambique)"] = "toh", + ["Tonga (Zambia)"] = "toi", + ["Tongan"] = "to", + ["Tongwe"] = "tny", + ["Tonjon"] = "tjn", + ["Tonkawa"] = "tqw", + ["Tonsawang"] = "tnw", + ["Tonsea"] = "txs", + ["Tontemboan"] = "tnt", + ["Toogee"] = "xpx", + ["Tooro"] = "ttj", + ["Topoiyo"] = "toy", + ["Toposa"] = "toq", + ["Toraja-Sa'dan"] = "sda", + ["Toram"] = "trj", + ["Torau"] = "ttu", + ["Toro"] = "tdv", + ["Toro So Dogon"] = "dts", + ["Toro Tegu Dogon"] = "dtt", + ["Toromono"] = "tno", + ["Torona"] = "tqr", + ["Torres Strait Creole"] = "tcs", + ["Torricelli"] = "tei", + ["Torricelli Yau"] = "yyu", + ["Torwali"] = "trw", + ["Torá"] = "trz", + ["Tosu"] = "sit-tos", + ["Totela"] = "ttl", + ["Toto"] = "txo", + ["Totoli"] = "txe", + ["Totomachapan Zapotec"] = "zph", + ["Totontepec Mixe"] = "mto", + ["Totoro"] = "ttk", + ["Touo"] = "tqu", + ["Toura"] = "neb", + ["Tourangeau"] = "roa-tou", + ["Towei"] = "ttn", + ["Translingual"] = "mul", + ["Transylvanian Saxon"] = "gmw-tsx", + ["Traveller Danish"] = "rmd", + ["Traveller Norwegian"] = "rmg", + ["Traveller Scottish"] = "trl", + ["Tregami"] = "trm", + ["Tremembé"] = "tme", + ["Trieng"] = "stg", + ["Trimuris"] = "tip", + ["Tring"] = "tgq", + ["Tringgus"] = "trx", + ["Trinidad and Tobago Sign Language"] = "lst", + ["Trinidadian Creole English"] = "trf", + ["Trinitario"] = "trn", + ["Trió"] = "tri", + ["Truká"] = "tka", + ["Trumai"] = "tpy", + ["Ts'ün-Lao"] = "tsl", + ["Tsaangi"] = "tsa", + ["Tsafiki"] = "cof", + ["Tsakhur"] = "tkr", + ["Tsakonian"] = "tsd", + ["Tsakwambo"] = "kvz", + ["Tsamai"] = "tsb", + ["Tsat"] = "huq", + ["Tsetsaut"] = "txc", + ["Tsez"] = "ddo", + ["Tshangla"] = "tsj", + ["Tshobdun"] = "sit-tsh", + ["Tshwa"] = "hio", + ["Tsikimba"] = "kdl", + ["Tsimané"] = "cas", + ["Tsimshian"] = "tsi", + ["Tsishingini"] = "tsw", + ["Tso"] = "ldp", + ["Tsogo"] = "tsv", + ["Tsonga"] = "ts", + ["Tsotsitaal"] = "fly", + ["Tsou"] = "tsu", + ["Tsum"] = "ttz", + ["Tsuvadi"] = "tvd", + ["Tsuvan"] = "tsh", + ["Tswa"] = "tsc", + ["Tswana"] = "tn", + ["Tswapong"] = "two", + ["Tuamotuan"] = "pmt", + ["Tuareg"] = "tmh", + ["Tubar"] = "tbu", + ["Tucano"] = "tuo", + ["Tugen"] = "tuy", + ["Tugun"] = "tzn", + ["Tugutil"] = "tuj", + ["Tukang Besi North"] = "khc", + ["Tukang Besi South"] = "bhq", + ["Tuki"] = "bag", + ["Tukpa"] = "tpq", + ["Tukudede"] = "tkd", + ["Tukumanféd"] = "tkf", + ["Tula"] = "tul", + ["Tule-Kaweah Yokuts"] = "nai-tky", + ["Tulehu"] = "tlu", + ["Tulishi"] = "tey", + ["Tulu"] = "tcy", + ["Tulu-Bohuai"] = "rak", + ["Tulua"] = "aus-tul", + ["Tuma-Irumu"] = "iou", + ["Tumak"] = "tmc", + ["Tumbuka"] = "tum", + ["Tumi"] = "kku", + ["Tumleo"] = "tmq", + ["Tumshuqese"] = "xtq", + ["Tumtum"] = "tbr", + ["Tumulung Sisaala"] = "sil", + ["Tumzabt"] = "mzb", + ["Tundra Enets"] = "enh", + ["Tundra Nenets"] = "yrk", + ["Tunen"] = "tvu", + ["Tungag"] = "lcm", + ["Tunggare"] = "trt", + ["Tunia"] = "tug", + ["Tunica"] = "tun", + ["Tunisian Arabic"] = "aeb", + ["Tunisian Sign Language"] = "tse", + ["Tunjung"] = "tjg", + ["Tunni"] = "tqq", + ["Tunumiisut"] = "esx-tut", + ["Tunzu"] = "dza", + ["Tuotomb"] = "ttf", + ["Tuparí"] = "tpr", + ["Tupinambá"] = "tpn", + ["Tupinikin"] = "tpk", + ["Tupuri"] = "tui", + ["Turaka"] = "trh", + ["Turi"] = "trd", + ["Turiwára"] = "twt", + ["Turka"] = "tuz", + ["Turkana"] = "tuv", + ["Turkish"] = "tr", + ["Turkish Sign Language"] = "tsm", + ["Turkmen"] = "tk", + ["Turks and Caicos Creole English"] = "tch", + ["Turoyo"] = "tru", + ["Turumsa"] = "tqm", + ["Turung"] = "try", + ["Tuscarora"] = "tus", + ["Tutelo"] = "tta", + ["Tutong"] = "ttg", + ["Tutsa Naga"] = "tvt", + ["Tutuba"] = "tmi", + ["Tututepec Mixtec"] = "mtu", + ["Tututni"] = "tuu", + ["Tuvaluan"] = "tvl", + ["Tuvan"] = "tyv", + ["Tuwali Ifugao"] = "ifk", + ["Tuwari"] = "tww", + ["Tuwuli"] = "bov", + ["Tuxináwa"] = "tux", + ["Tuxá"] = "tud", + ["Tuyuca"] = "tue", + ["Twana"] = "twa", + ["Twendi"] = "twn", + ["Tyap"] = "kcg", + ["Tyaraity"] = "woa", + ["Tyerrernotepanner"] = "xph", + ["Tz'utujil"] = "tzj", + ["Tzeltal"] = "tzh", + ["Tzotzil"] = "tzo", + ["Tày"] = "tyz", + ["Tày Tac"] = "tyt", + ["Téén"] = "lor", + ["Tübatulabal"] = "tub", + ["U"] = "uuu", + ["Uab Meto"] = "aoz", + ["Uamué"] = "uam", + ["Uare"] = "ksj", + ["Ubaghara"] = "byc", + ["Ubang"] = "uba", + ["Ubi"] = "ubi", + ["Ubir"] = "ubr", + ["Ubykh"] = "uby", + ["Ucayali-Yurúa Ashéninka"] = "cpb", + ["Uda"] = "uda", + ["Udi"] = "udi", + ["Udihe"] = "ude", + ["Udmurt"] = "udm", + ["Uduk"] = "udu", + ["Ufim"] = "ufi", + ["Ugandan Sign Language"] = "ugn", + ["Ugaritic"] = "uga", + ["Ughele"] = "uge", + ["Uhami"] = "uha", + ["Uisai"] = "uis", + ["Ujir"] = "udj", + ["Ukaan"] = "kcf", + ["Ukhwejo"] = "ukh", + ["Ukit"] = "umi", + ["Ukpe-Bayobiri"] = "ukp", + ["Ukpet-Ehom"] = "akd", + ["Ukrainian"] = "uk", + ["Ukrainian Sign Language"] = "ukl", + ["Ukue"] = "uku", + ["Ukuriguma"] = "ukg", + ["Ukwa"] = "ukq", + ["Ukwuani-Aboh-Ndoni"] = "ukw", + ["Ulau-Suain"] = "svb", + ["Ulch"] = "ulc", + ["Uldeme"] = "udl", + ["Ulithian"] = "uli", + ["Ullatan"] = "ull", + ["Ulukwumi"] = "ulb", + ["Ulumanda'"] = "ulm", + ["Ulwa"] = "ulw", + ["Uma"] = "ppk", + ["Uma' Lasan"] = "xky", + ["Uma' Lung"] = "ulu", + ["Umanakaina"] = "gdn", + ["Umatilla"] = "uma", + ["Umbindhamu"] = "umd", + ["Umbrian"] = "xum", + ["Umbu-Ungu"] = "ubu", + ["Umbugarla"] = "umr", + ["Umbundu"] = "umb", + ["Umbuygamu"] = "umg", + ["Ume Sami"] = "sju", + ["Umeda"] = "upi", + ["Umiida"] = "xud", + ["Umiray Dumaget Agta"] = "due", + ["Umon"] = "umm", + ["Umotína"] = "umo", + ["Umpila"] = "ump", + ["Una"] = "mtg", + ["Unami"] = "unm", + ["Unas"] = "art-una", + ["Unde Kaili"] = "unz", + ["Undetermined"] = "und", + ["Uneapa"] = "bbn", + ["Uneme"] = "une", + ["Unggaranggu"] = "xun", + ["Unggumi"] = "xgu", + ["Unserdeutsch"] = "uln", + ["Unua"] = "onu", + ["Unubahe"] = "unu", + ["Uokha"] = "uok", + ["Upper Chehalis"] = "cjh", + ["Upper Grand Valley Dani"] = "dna", + ["Upper Kinabatangan"] = "dmg", + ["Upper Kuskokwim"] = "kuu", + ["Upper Necaxa Totonac"] = "tku", + ["Upper Sorbian"] = "hsb", + ["Upper Ta'oih"] = "tth", + ["Upper Tanana"] = "tau", + ["Upper Taromi"] = "tov", + ["Upper Umpqua"] = "xup", + ["Ura (New Guinea)"] = "uro", + ["Ura (Vanuatu)"] = "uur", + ["Uradhi"] = "urf", + ["Urak Lawoi'"] = "urk", + ["Urali"] = "url", + ["Urapmin"] = "urm", + ["Urarina"] = "ura", + ["Urartian"] = "xur", + ["Urat"] = "urt", + ["Urdu"] = "ur", + ["Urhobo"] = "urh", + ["Uri"] = "uvh", + ["Urigina"] = "urg", + ["Urim"] = "uri", + ["Urimo"] = "urx", + ["Uripiv-Wala-Rano-Atchin"] = "upv", + ["Urningangg"] = "urc", + ["Uru"] = "ure", + ["Uru-Eu-Wau-Wau"] = "urz", + ["Uru-Pa-In"] = "urp", + ["Uruangnirin"] = "urn", + ["Uruava"] = "urv", + ["Urubú-Kaapor"] = "urb", + ["Uruguayan Sign Language"] = "ugy", + ["Urum"] = "uum", + ["Urumi"] = "uru", + ["Usaghade"] = "usk", + ["Usan"] = "wnu", + ["Usarufa"] = "usa", + ["Ushojo"] = "ush", + ["Usila Chinantec"] = "cuc", + ["Uspanteco"] = "usp", + ["Usui"] = "usi", + ["Utarmbung"] = "omo", + ["Ute"] = "ute", + ["Utu"] = "utu", + ["Uvbie"] = "evh", + ["Uwinymil"] = "aus-uwi", + ["Uya"] = "usu", + ["Uyajitaya"] = "duk", + ["Uyghur"] = "ug", + ["Uzbek"] = "uz", + ["Uzbeki Arabic"] = "auz", + ["Uzekwe"] = "eze", + ["Vaagri Booli"] = "vaa", + ["Vaghri"] = "vgr", + ["Vaghua"] = "tva", + ["Vagla"] = "vag", + ["Vai"] = "vai", + ["Vaiphei"] = "vap", + ["Vale"] = "vae", + ["Valencian Sign Language"] = "vsv", + ["Valle Nacional Chinantec"] = "cvn", + ["Valley Maidu"] = "vmv", + ["Valman"] = "van", + ["Valpei"] = "vlp", + ["Vamale"] = "mkt", + ["Vame"] = "mlr", + ["Vandalic"] = "xvn", + ["Vangunu"] = "mpr", + ["Vanimo"] = "vam", + ["Vanji"] = "ira-wnj", + ["Vanuma"] = "vau", + ["Vao"] = "vao", + ["Varhadi-Nagpuri"] = "vah", + ["Varisi"] = "vrs", + ["Varli"] = "vav", + ["Vasavi"] = "vas", + ["Vayu"] = "vay", + ["Veddah"] = "ved", + ["Vehes"] = "val", + ["Vemgo-Mabas"] = "vem", + ["Venda"] = "ve", + ["Venetian"] = "vec", + ["Venetic"] = "xve", + ["Venezuelan Sign Language"] = "vsl", + ["Ventureño"] = "veo", + ["Veps"] = "vep", + ["Vera'a"] = "vra", + ["Vestinian"] = "xvs", + ["Vidunda"] = "vid", + ["Viemo"] = "vig", + ["Vietnamese"] = "vi", + ["Vilamovian"] = "wym", + ["Vilela"] = "vil", + ["Vili"] = "vif", + ["Villa Viciosa Agta"] = "dyg", + ["Vincentian Creole English"] = "svc", + ["Virgin Islands Creole"] = "vic", + ["Vishavan"] = "vis", + ["Viti"] = "vit", + ["Vitou"] = "vto", + ["Viya"] = "gev", + ["Vlax Romani"] = "rmy", + ["Volapük"] = "vo", + ["Volga German"] = "gmw-vog", + ["Volscian"] = "xvo", + ["Vono"] = "kch", + ["Voro"] = "vor", + ["Votic"] = "vot", + ["Vracada Apabhramsa"] = "inc-vra", + ["Vumbu"] = "vum", + ["Vunapu"] = "vnp", + ["Vunjo"] = "vun", + ["Vurës"] = "msn", + ["Vute"] = "vut", + ["Võro"] = "vro", + ["Wa"] = "wbm", + ["Wa'ema"] = "wag", + ["Waama"] = "wwa", + ["Waamwang"] = "wmn", + ["Waata"] = "ssn", + ["Wab"] = "wab", + ["Wabo"] = "wbb", + ["Waboda"] = "kmx", + ["Waci Gbe"] = "wci", + ["Wadaginam"] = "wdg", + ["Waddar"] = "wbq", + ["Wadi Wadi"] = "xwd", + ["Wadiyara Koli"] = "kxp", + ["Wadjabangayi"] = "wdy", + ["Wadjiginy"] = "wdj", + ["Wadjigu"] = "wdu", + ["Wae Rana"] = "wrx", + ["Waffa"] = "waj", + ["Wagawaga"] = "wgb", + ["Wagaya"] = "wga", + ["Wagdi"] = "wbr", + ["Wageman"] = "waq", + ["Wagi"] = "fad", + ["Wahau Kayan"] = "whu", + ["Wahau Kenyah"] = "whk", + ["Wahgi"] = "wgi", + ["Waigali"] = "wbk", + ["Waigeo"] = "wgo", + ["Waikuri"] = "nai-wai", + ["Wailaki"] = "wlk", + ["Wailapa"] = "wlr", + ["Waima'a"] = "wmh", + ["Waimaha"] = "bao", + ["Waimiri-Atroari"] = "atr", + ["Wainumá"] = "awd-wai", + ["Waioli"] = "wli", + ["Waitaká"] = "sai-wai", + ["Waiwai"] = "waw", + ["Waja"] = "wja", + ["Wajarri"] = "wbv", + ["Wajuk"] = "xwj", + ["Waka"] = "wav", + ["Wakawaka"] = "wkw", + ["Wakhi"] = "wbl", + ["Wakoná"] = "waf", + ["Wala"] = "lgl", + ["Walak"] = "wlw", + ["Walangama"] = "nlw", + ["Wali (Ghana)"] = "wlx", + ["Wali (Sudan)"] = "wll", + ["Waling"] = "wly", + ["Walio"] = "wla", + ["Walla Walla"] = "waa", + ["Wallisian"] = "wls", + ["Walloon"] = "wa", + ["Walmajarri"] = "wmt", + ["Wam"] = "wmo", + ["Wamas"] = "wmc", + ["Wambaya"] = "wmb", + ["Wambon"] = "wms", + ["Wambule"] = "wme", + ["Wamey"] = "cou", + ["Wamin"] = "wmi", + ["Wampar"] = "lbq", + ["Wampur"] = "waz", + ["Wan"] = "wan", + ["Wanambre"] = "wnb", + ["Wanap"] = "wnp", + ["Wancho"] = "nnp", + ["Wanda"] = "wbh", + ["Wandala"] = "mfi", + ["Wandamen"] = "wad", + ["Wandarang"] = "wnd", + ["Wandji"] = "wdd", + ["Waneci"] = "wne", + ["Wanga"] = "lwg", + ["Wanggamala"] = "wnm", + ["Wangganguru"] = "wgg", + ["Wanggom"] = "wng", + ["Wangkayutyuru"] = "wky", + ["Wangkumara"] = "xwk", + ["Wanham"] = "sai-wnm", + ["Wanji"] = "wbi", + ["Wanman"] = "wbt", + ["Wannu"] = "jub", + ["Wano"] = "wno", + ["Wantoat"] = "wnc", + ["Wanukaka"] = "wnk", + ["Wanyi"] = "wny", + ["Wané"] = "hwa", + ["Wapan"] = "juk", + ["Wapishana"] = "wap", + ["Wappo"] = "wao", + ["War-Jaintia"] = "aml", + ["Wara"] = "wbf", + ["Warao"] = "wba", + ["Warapu"] = "wra", + ["Waray Sorsogon"] = "srv", + ["Waray-Waray"] = "war", + ["Wardaman"] = "wrr", + ["Wardandi"] = "wxw", + ["Warekena"] = "gae", + ["Warembori"] = "wsa", + ["Wari'"] = "pav", + ["Waris"] = "wrs", + ["Waritai"] = "wbe", + ["Wariyangga"] = "wri", + ["Warji"] = "wji", + ["Warkay-Bipim"] = "bgv", + ["Warlmanpa"] = "wrl", + ["Warlpiri"] = "wbp", + ["Warluwara"] = "wrb", + ["Warnang"] = "wrn", + ["Waropen"] = "wrp", + ["Warray"] = "wrz", + ["Warrgamay"] = "wgy", + ["Warrwa"] = "wwr", + ["Waru"] = "wru", + ["Warumungu"] = "wrm", + ["Waruna"] = "wrv", + ["Warungu"] = "wrg", + ["Warwar Feni"] = "hrw", + ["Wasa"] = "wss", + ["Wasco-Wishram"] = "wac", + ["Wasembo"] = "gsp", + ["Washo"] = "was", + ["Waskia"] = "wsk", + ["Wastek"] = "hus", + ["Wasu"] = "wsu", + ["Watakataui"] = "wtk", + ["Watam"] = "wax", + ["Wathaurong"] = "wth", + ["Watiwa"] = "wtf", + ["Watubela"] = "wah", + ["Waube"] = "kop", + ["Wauja"] = "wau", + ["Wauyai"] = "wuy", + ["Wawa"] = "www", + ["Wawonii"] = "wow", + ["Waxianghua"] = "wxa", + ["Wayampi"] = "oym", + ["Wayana"] = "way", + ["Wayanad Chetti"] = "ctt", + ["Wayoró"] = "wyr", + ["Wayumará"] = "sai-way", + ["Wayuu"] = "guc", + ["Wedau"] = "wed", + ["Weh"] = "weh", + ["Welaung"] = "weu", + ["Weliki"] = "klh", + ["Welsh"] = "cy", + ["Welsh Romani"] = "rmw", + ["Wemale"] = "weo", + ["Wemba-Wemba"] = "xww", + ["Weme Gbe"] = "wem", + ["Weri"] = "wer", + ["Wersing"] = "kvw", + ["West Albay Bikol"] = "fbl", + ["West Ambae"] = "nnd", + ["West Central Banda"] = "bbp", + ["West Coast Bajau"] = "bdr", + ["West Damar"] = "drn", + ["West Flemish"] = "vls", + ["West Frisian"] = "fy", + ["West Greenlandic Pidgin"] = "crp-gep", + ["West Lembata"] = "lmj", + ["West Makian"] = "mqs", + ["West Masela"] = "mss", + ["West Tarangan"] = "txn", + ["West Uvean"] = "uve", + ["West-Central Limba"] = "lia", + ["Western Apache"] = "apw", + ["Western Arrernte"] = "are", + ["Western Bolivian Guaraní"] = "gnw", + ["Western Bru"] = "brv", + ["Western Bukidnon Manobo"] = "mbb", + ["Western Cham"] = "cja", + ["Western Dani"] = "dnw", + ["Western Durango Nahuatl"] = "azn", + ["Western Fijian"] = "wyy", + ["Western Gurung"] = "gvr", + ["Western Highland Chatino"] = "ctp", + ["Western Huasteca Nahuatl"] = "nhw", + ["Western Jicaque"] = "und-wji", + ["Western Juxtlahuaca Mixtec"] = "jmx", + ["Western Karaboro"] = "kza", + ["Western Katu"] = "kuf", + ["Western Kayah"] = "kyu", + ["Western Keres"] = "kjq", + ["Western Krahn"] = "krw", + ["Western Lalu"] = "ywl", + ["Western Lawa"] = "lcp", + ["Western Magar"] = "mrd", + ["Western Maninkakan"] = "mlq", + ["Western Mari"] = "mrj", + ["Western Mashan Hmong"] = "hmw", + ["Western Meohang"] = "raf", + ["Western Muria"] = "mut", + ["Western Neo-Aramaic"] = "amw", + ["Western Ojibwa"] = "ojw", + ["Western Panjabi"] = "pnb", + ["Western Penan"] = "pne", + ["Western Pwo"] = "pwo", + ["Western Sisaala"] = "ssl", + ["Western Subanon"] = "suc", + ["Western Tamang"] = "tdg", + ["Western Tawbuid"] = "twb", + ["Western Totonac"] = "tqt", + ["Western Tunebo"] = "tnb", + ["Western Xiangxi Miao"] = "mmr", + ["Western Xwla Gbe"] = "xwl", + ["Western Yugur"] = "ybe", + ["Westrobothnian"] = "gmq-bot", + ["Wewaw"] = "wea", + ["Weyewa"] = "wew", + ["White Gelao"] = "giw", + ["White Hmong"] = "mww", + ["White Lachi"] = "lwh", + ["Whitesands"] = "tnp", + ["Wiarumus"] = "tua", + ["Wichita"] = "wic", + ["Wichí Lhamtés Güisnay"] = "mzh", + ["Wichí Lhamtés Nocten"] = "mtp", + ["Wichí Lhamtés Vejoz"] = "wlv", + ["Wik-Epa"] = "wie", + ["Wik-Iiyanh"] = "wij", + ["Wik-Keyangan"] = "wif", + ["Wik-Me'anha"] = "wih", + ["Wik-Mungkan"] = "wim", + ["Wik-Ngathana"] = "wig", + ["Wikalkan"] = "wik", + ["Wikngenchera"] = "wua", + ["Wilawila"] = "wil", + ["Winnebago"] = "win", + ["Wintu"] = "wnw", + ["Winyé"] = "kst", + ["Wipi"] = "gdr", + ["Wiradhuri"] = "wrh", + ["Wiraféd"] = "wir", + ["Wirangu"] = "wgu", + ["Wiru"] = "wiu", + ["Wirö"] = "wpc", + ["Wiwa"] = "mbp", + ["Wiyot"] = "wiy", + ["Woccon"] = "xwc", + ["Wogamusin"] = "wog", + ["Wogeo"] = "woc", + ["Woi"] = "wbw", + ["Woiwurrung"] = "wyi", + ["Wojenaka"] = "jod", + ["Wolane"] = "wle", + ["Wolani"] = "wod", + ["Wolaytta"] = "wal", + ["Woleaian"] = "woe", + ["Wolio"] = "wlo", + ["Wolof"] = "wo", + ["Womo"] = "wmx", + ["Wong-gie"] = "aus-won", + ["Wongo"] = "won", + ["Woods Cree"] = "cwd", + ["Woria"] = "wor", + ["Worimi"] = "kda", + ["Worodougou"] = "jud", + ["Worora"] = "wro", + ["Wotapuri-Katarqalai"] = "wsv", + ["Wotu"] = "wtw", + ["Woun Meu"] = "noa", + ["Written Oirat"] = "xwo", + ["Wu"] = "wuu", + ["Wudu"] = "wud", + ["Wulguru"] = "aus-wul", + ["Wuliwuli"] = "wlu", + ["Wulna"] = "wux", + ["Wumboko"] = "bqm", + ["Wumbvu"] = "wum", + ["Wumeng Nasu"] = "ywu", + ["Wunai Bunu"] = "bwn", + ["Wunambal"] = "wub", + ["Wurrugu"] = "wur", + ["Wusa Nasu"] = "yig", + ["Wushi"] = "bse", + ["Wusi"] = "wsi", + ["Wutung"] = "wut", + ["Wutunhua"] = "wuh", + ["Wuvulu-Aua"] = "wuv", + ["Wyandot"] = "wya", + ["Wára"] = "tci", + ["Wãpha"] = "juw", + ["Wè Northern"] = "wob", + ["Wè Southern"] = "gxx", + ["Wè Western"] = "wec", + ["Xadani Zapotec"] = "zax", + ["Xakriabá"] = "xkr", + ["Xamtanga"] = "xan", + ["Xanaguía Zapotec"] = "ztg", + ["Xaragure"] = "axx", + ["Xavante"] = "xav", + ["Xerénte"] = "xer", + ["Xetá"] = "xet", + ["Xhosa"] = "xh", + ["Xiang"] = "hsn", + ["Xibe"] = "sjo", + ["Xicotepec de Juárez Totonac"] = "too", + ["Xinca"] = "xin", + ["Xingú Asuriní"] = "asn", + ["Xipaya"] = "xiy", + ["Xiri"] = "xii", + ["Xiriâna"] = "xir", + ["Xishanba Lalo"] = "ywt", + ["Xocó"] = "sai-xoc", + ["Xokleng"] = "xok", + ["Xukurú"] = "xoo", + ["Xwela Gbe"] = "xwe", + ["Xârâcùù"] = "ane", + ["Yaa"] = "iyx", + ["Yaaku"] = "muu", + ["Yabarana"] = "yar", + ["Yabaâna"] = "ybn", + ["Yaben"] = "ybm", + ["Yabong"] = "ybo", + ["Yabula Yabula"] = "yxy", + ["Yace"] = "ekr", + ["Yaeyama"] = "rys", + ["Yafi"] = "wfg", + ["Yagara"] = "yxg", + ["Yagaria"] = "ygr", + ["Yagnobi"] = "yai", + ["Yagomi"] = "ygm", + ["Yagua"] = "yad", + ["Yagwoia"] = "ygw", + ["Yahadian"] = "ner", + ["Yahang"] = "rhp", + ["Yahuna"] = "ynu", + ["Yaka"] = "yaf", + ["Yakaikeke"] = "ykk", + ["Yakan"] = "yka", + ["Yakima"] = "yak", + ["Yakkha"] = "ybh", + ["Yakoma"] = "yky", + ["Yakut"] = "sah", + ["Yala"] = "yba", + ["Yalahatan"] = "jal", + ["Yalakalore"] = "xyl", + ["Yalarnnga"] = "ylr", + ["Yale"] = "nce", + ["Yaleba"] = "ylb", + ["Yalunka"] = "yal", + ["Yalálag Zapotec"] = "zpu", + ["Yamap"] = "ymp", + ["Yamba"] = "yam", + ["Yambes"] = "ymb", + ["Yambeta"] = "yat", + ["Yamdena"] = "jmd", + ["Yameo"] = "yme", + ["Yami"] = "tao", + ["Yaminahua"] = "yaa", + ["Yamongeri"] = "ymg", + ["Yamphu"] = "ybi", + ["Yan-nhangu"] = "jay", + ["Yana"] = "ynn", + ["Yanda"] = "yda", + ["Yanda Dogon"] = "dym", + ["Yandjibara"] = "xyb", + ["Yandruwandha"] = "ynd", + ["Yanesha'"] = "ame", + ["Yangben"] = "yav", + ["Yangkaal"] = "aus-ynk", + ["Yangkam"] = "bsx", + ["Yangman"] = "jng", + ["Yango"] = "yng", + ["Yangulam"] = "ynl", + ["Yangum Dey"] = "yde", + ["Yangum Gel"] = "ygl", + ["Yangum Mon"] = "ymo", + ["Yankunytjatjara"] = "kdd", + ["Yanomamö"] = "guu", + ["Yanomámi"] = "wca", + ["Yansi"] = "yns", + ["Yanyuwa"] = "jao", + ["Yao"] = "yao", + ["Yao (South America)"] = "sai-yao", + ["Yaosakor Asmat"] = "asy", + ["Yaouré"] = "yre", + ["Yapese"] = "yap", + ["Yapunda"] = "yev", + ["Yaqay"] = "jaq", + ["Yaqui"] = "yaq", + ["Yarawata"] = "yrw", + ["Yareba"] = "yrb", + ["Yareni Zapotec"] = "zae", + ["Yarli"] = "yxl", + ["Yarluyandi"] = "yry", + ["Yaroamë"] = "yro", + ["Yarumá"] = "sai-yar", + ["Yarí"] = "yri", + ["Yasa"] = "yko", + ["Yatay"] = "yty", + ["Yatee Zapotec"] = "zty", + ["Yatzachi Zapotec"] = "zav", + ["Yaul"] = "yla", + ["Yaur"] = "jau", + ["Yautepec Zapotec"] = "zpb", + ["Yavapai"] = "nai-yav", + ["Yavitero"] = "yvt", + ["Yawa"] = "yva", + ["Yawalapití"] = "yaw", + ["Yawanawa"] = "ywn", + ["Yawarawarga"] = "yww", + ["Yaweyuha"] = "yby", + ["Yawijibaya"] = "jbw", + ["Yawiyo"] = "ybx", + ["Yawuru"] = "ywr", + ["Yaygir"] = "xya", + ["Yazghulami"] = "yah", + ["Yei"] = "jei", + ["Yekhee"] = "ets", + ["Yekora"] = "ykr", + ["Yele"] = "yle", + ["Yelmek"] = "jel", + ["Yelogu"] = "ylg", + ["Yemba"] = "ybb", + ["Yemeni Arabic"] = "ayn", + ["Yemsa"] = "jnj", + ["Yendang"] = "yen", + ["Yeni"] = "yei", + ["Yeniche"] = "yec", + ["Yerakai"] = "yra", + ["Yeretuar"] = "gop", + ["Yerong"] = "yrn", + ["Yerukula"] = "yeu", + ["Yeskwa"] = "yes", + ["Yessan-Mayo"] = "yss", + ["Yetfa"] = "yet", + ["Yevanic"] = "yej", + ["Yeyi"] = "yey", + ["Yiddish"] = "yi", + ["Yidgha"] = "ydg", + ["Yidiny"] = "yii", + ["Yil"] = "yll", + ["Yimas"] = "yee", + ["Yimchungru Naga"] = "yim", + ["Yinbaw Karen"] = "kvu", + ["Yinchia"] = "yin", + ["Yindjibarndi"] = "yij", + ["Yindjilandji"] = "yil", + ["Yine"] = "pib", + ["Yinggarda"] = "yia", + ["Yinhawangka"] = "ywg", + ["Yiningayi"] = "ygi", + ["Yintale Karen"] = "kvy", + ["Yinwum"] = "yxm", + ["Yir-Yoront"] = "yiy", + ["Yirandali"] = "ljw", + ["Yis"] = "yis", + ["Yitha Yitha"] = "xth", + ["Yoba"] = "yob", + ["Yocoboué Dida"] = "gud", + ["Yogad"] = "yog", + ["Yoidik"] = "ydk", + ["Yoke"] = "yki", + ["Yola"] = "yol", + ["Yolmo"] = "scp", + ["Yolngu Sign Language"] = "ygs", + ["Yoloxochitl Mixtec"] = "xty", + ["Yom"] = "pil", + ["Yombe"] = "yom", + ["Yonaguni"] = "yoi", + ["Yong"] = "yno", + ["Yongkom"] = "yon", + ["Yopno"] = "yut", + ["Yora"] = "mts", + ["Yoron"] = "yox", + ["Yorta Yorta"] = "xyy", + ["Yoruba"] = "yo", + ["Yosondúa Mixtec"] = "mpm", + ["Youle Jinuo"] = "jiu", + ["Younuo Bunu"] = "buh", + ["Yout Wam"] = "ytw", + ["Yoy"] = "yoy", + ["Yuaga"] = "nua", + ["Yucatec Maya"] = "yua", + ["Yucatec Maya Sign Language"] = "msd", + ["Yuchi"] = "yuc", + ["Yucuañe Mixtec"] = "mvg", + ["Yucuna"] = "ycn", + ["Yug"] = "yug", + ["Yugambal"] = "yub", + ["Yugoslavian Sign Language"] = "ysl", + ["Yugul"] = "ygu", + ["Yuhup"] = "yab", + ["Yuki"] = "yuk", + ["Yukpa"] = "yup", + ["Yukuben"] = "ybl", + ["Yulu"] = "yul", + ["Yuma"] = "yum", + ["Yumana"] = "awd-yum", + ["Yup'ik"] = "esu", + ["Yupiltepeque"] = "nai-yup", + ["Yupua"] = "sai-yup", + ["Yuqui"] = "yuq", + ["Yuracare"] = "yuz", + ["Yuri"] = "sai-yri", + ["Yurok"] = "yur", + ["Yuru"] = "ljx", + ["Yurumanguí"] = "sai-yur", + ["Yurutí"] = "yui", + ["Yutanduchi Mixtec"] = "mab", + ["Yuwana"] = "yau", + ["Yuyu"] = "yxu", + ["Yámana"] = "yag", + ["Zaachila Zapotec"] = "ztx", + ["Zabana"] = "kji", + ["Zacatepec Chatino"] = "ctz", + ["Zacatlán-Ahuacatlán-Tepetzintla Nahuatl"] = "nhi", + ["Zaghawa"] = "zag", + ["Zaiwa"] = "atb", + ["Zakhring"] = "zkr", + ["Zambian Sign Language"] = "zsl", + ["Zan Gula"] = "zna", + ["Zanaki"] = "zak", + ["Zande"] = "zne", + ["Zangskari"] = "zau", + ["Zangwal"] = "zah", + ["Zaniza Zapotec"] = "zpw", + ["Zapotec"] = "zap", + ["Zaramo"] = "zaj", + ["Zari"] = "zaz", + ["Zarma"] = "dje", + ["Zauzou"] = "zal", + ["Zay"] = "zwa", + ["Zayein Karen"] = "kxk", + ["Zayse-Zergulla"] = "zay", + ["Zazaki"] = "zza", + ["Zazao"] = "jaj", + ["Zbu"] = "sit-zbu", + ["Zealandic"] = "zea", + ["Zeem"] = "zua", + ["Zemba"] = "dhm", + ["Zeme Naga"] = "nzm", + ["Zemgalian"] = "xzm", + ["Zenag"] = "zeg", + ["Zenaga"] = "zen", + ["Zenzontepec Chatino"] = "czn", + ["Zhaba"] = "zhb", + ["Zhang-Zhung"] = "xzh", + ["Zhire"] = "zhi", + ["Zhoa"] = "zhw", + ["Zhuang"] = "za", + ["Zhár"] = "jjr", + ["Zia"] = "zia", + ["Zialo"] = "zil", + ["Zigula"] = "ziw", + ["Zimakani"] = "zik", + ["Zimba"] = "zmb", + ["Zimbabwe Sign Language"] = "zib", + ["Zinza"] = "zin", + ["Zipser German"] = "gmw-zps", + ["Zire"] = "sih", + ["Zirenkel"] = "zrn", + ["Ziriya"] = "zir", + ["Zizilivakan"] = "ziz", + ["Zo'é"] = "pto", + ["Zokhuo"] = "yzk", + ["Zoogocho Zapotec"] = "zpq", + ["Zotung Chin"] = "czt", + ["Zou"] = "zom", + ["Zulgo-Gemzek"] = "gnd", + ["Zulu"] = "zu", + ["Zumaya"] = "zuy", + ["Zumbun"] = "jmb", + ["Zuni"] = "zun", + ["Zuojiang Zhuang"] = "zzj", + ["Zyphe"] = "zyp", + ["Záparo"] = "zro", + ["Àhàn"] = "ahn", + ["Áncá"] = "acb", + ["Ömie"] = "aom", + ["Önge"] = "oon", + ["ǀXam"] = "xam", + ["ǁAni"] = "hnh", + ["ǁGana"] = "gnk", + ["ǁXegwi"] = "xeg", + ["ǂHoan"] = "huc", + ["ǃKung"] = "khi-kun", + ["ǃXóõ"] = "nmn" +} diff --git a/wiktra/wikt/translit/languages/data2.lua b/wiktra/wikt/translit/languages/data2.lua new file mode 100644 index 0000000..2cfe773 --- /dev/null +++ b/wiktra/wikt/translit/languages/data2.lua @@ -0,0 +1,542 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) +local OGONEK = u(0x0328) +local DOUBLEINVBREVE = u(0x0361) + +-- Punctuation to be used for standardChars field +local PUNCTUATION = " !#%&*+,-./:;<=>?@^_`|~'()" + +local Cyrl = {"Cyrl"} +local Latn = {"Latn"} +local LatnArab = {"Latn", "Arab"} + +local m = {} + +m["aa"] = {"Afar", 27811, "cus", Latn, entry_name = {remove_diacritics = ACUTE}} + +m["ab"] = {"Abkhaz", 5111, "cau-abz", {"Cyrl", "Geor", "Latn"}, translit_module = "ab-translit", override_translit = true, entry_name = {from = {GRAVE, ACUTE}, to = {}}} + +m["ae"] = {"Avestan", 29572, "ira-cen", {"Avst", "Gujr"}, translit_module = "Avst-translit", wikipedia_article = "Avestan"} + +m["af"] = {"Afrikaans", 14196, "gmw", LatnArab, ancestors = {"nl"}, sort_key = {from = {"[äáâà]", "[ëéêè]", "[ïíîì]", "[öóôò]", "[üúûù]", "[ÿýŷỳ]", "^-", "'"}, to = {"a", "e", "i", "o", "u", "y"}}} + +m["ak"] = {"Akan", 28026, "alv-ctn", Latn} + +m["am"] = {"Amharic", 28244, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit"} + +m["an"] = {"Aragonese", 8765, "roa-ibe", Latn, ancestors = {"roa-oan"}} + +m["ar"] = { + "Arabic", + 13955, + "sem-arb", + {"Arab", "Hebr", "Brai"}, + -- replace alif waṣl with alif + -- remove tatweel and diacritics: fathatan, dammatan, kasratan, fatha, + -- damma, kasra, shadda, sukun, superscript (dagger) alef + entry_name = {from = {u(0x0671), u(0x0640), "[" .. u(0x064B) .. "-" .. u(0x0652) .. "]", u(0x0670)}, to = {u(0x0627)}}, + translit_module = "ar-translit", + standardChars = "ء-غف-ْٰٱ" .. PUNCTUATION .. "٠-٩،؛؟٫٬ـ" +} + +m["as"] = {"Assamese", 29401, "inc-eas", {"as-Beng"}, ancestors = {"inc-mas"}, translit_module = "as-translit"} + +m["av"] = {"Avar", 29561, "cau-nec", Cyrl, ancestors = {"oav"}, translit_module = "av-translit", override_translit = true, entry_name = {from = {GRAVE, ACUTE}, to = {}}} + +m["ay"] = {"Aymara", 4627, "sai-aym", Latn} + +m["az"] = {"Azerbaijani", 9292, "trk-ogz", {"Latn", "Cyrl", "fa-Arab"}, ancestors = {"trk-oat"}} + +m["ba"] = {"Bashkir", 13389, "trk-kbu", Cyrl, translit_module = "ba-translit", override_translit = true} + +m["be"] = {"Belarusian", 9091, "zle", Cyrl, ancestors = {"orv"}, translit_module = "be-translit", sort_key = {from = {"Ё", "ё"}, to = {"Е", "е"}}, entry_name = {from = {"Ѐ", "ѐ", GRAVE, ACUTE}, to = {"Е", "е"}}} + +m["bg"] = {"Bulgarian", 7918, "zls", {"Cyrl"}, ancestors = {"cu"}, translit_module = "bg-translit", entry_name = {from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE}, to = {"Е", "е", "И", "и"}}} + +m["bh"] = {"Bihari", 135305, "inc-eas", {"Deva"}, ancestors = {"inc-mgd"}} + +m["bi"] = {"Bislama", 35452, "crp", Latn, ancestors = {"en"}} + +m["bm"] = {"Bambara", 33243, "dmn-emn", Latn} + +m["bn"] = {"Bengali", 9610, "inc-eas", {"Beng", "Newa"}, ancestors = {"inc-mbn"}, translit_module = "bn-translit"} + +m["bo"] = { + "Tibetan", + 34271, + "sit-tib", + {"Tibt"}, -- sometimes Deva? + ancestors = {"xct"}, + translit_module = "bo-translit", + override_translit = true +} + +m["br"] = {"Breton", 12107, "cel-bry", Latn, ancestors = {"xbm"}} + +m["ca"] = {"Catalan", 7026, "roa", Latn, ancestors = {"roa-oca"}, sort_key = {from = {"à", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "l·l"}, to = {"a", "e", "i", "o", "u", "c", "ll"}}} + +m["ce"] = {"Chechen", 33350, "cau-vay", Cyrl, translit_module = "ce-translit", override_translit = true, entry_name = {from = {MACRON}, to = {}}} + +m["ch"] = {"Chamorro", 33262, "poz-sus", Latn} + +m["co"] = {"Corsican", 33111, "roa-itd", Latn} + +m["cr"] = {"Cree", 33390, "alg", {"Cans", "Latn"}, translit_module = "translit-redirect"} + +m["cs"] = {"Czech", 9056, "zlw", Latn, ancestors = {"zlw-ocs"}, sort_key = {from = {"á", "é", "í", "ó", "[úů]", "ý"}, to = {"a", "e", "i", "o", "u", "y"}}} + +m["cu"] = { + "Old Church Slavonic", + 35499, + "zls", + {"Cyrs", "Glag"}, + translit_module = "Cyrs-Glag-translit", + entry_name = { + from = {u(0x0484)}, -- kamora + to = {} + }, + sort_key = {from = {"оу", "є"}, to = {"у", "е"}} +} + +m["cv"] = {"Chuvash", 33348, "trk-ogr", Cyrl, ancestors = {"xbo"}, translit_module = "cv-translit", override_translit = true} + +m["cy"] = {"Welsh", 9309, "cel-bry", Latn, ancestors = {"wlm"}, sort_key = {from = {"[âáàä]", "ch", "dd", "[êéèë]", "ff", "ngh", "[îíìï]", "ll", "[ôóòö]", "ph", "rh", "th", "[ûúùü]", "[ŵẃẁẅ]", "[ŷýỳÿ]", "'"}, to = {"a", "c~", "d~", "e", "f~", "g~h", "i", "l~", "o", "p~", "r~", "t~", "u", "w", "y"}}, standardChars = "A-IL-PR-UWYa-il-pr-uwy0-9ÂâÊêÎîÔôÛûŴŵŶŷ" .. PUNCTUATION} + +m["da"] = {"Danish", 9035, "gmq", Latn, ancestors = {"gmq-oda"}} + +m["de"] = {"German", 188, "gmw", {"Latn", "Latf"}, ancestors = {"gmh"}, sort_key = {from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]", "ß"}, to = {"a", "e", "i", "o", "u", "ss"}}, standardChars = "A-Za-z0-9ÄäÖöÜüß" .. PUNCTUATION} + +m["dv"] = {"Dhivehi", 32656, "inc-ins", {"Thaa"}, ancestors = {"elu-prk"}, translit_module = "dv-translit", override_translit = true} + +m["dz"] = {"Dzongkha", 33081, "sit-tib", {"Tibt"}, ancestors = {"xct"}, translit_module = "bo-translit", override_translit = true} + +m["ee"] = {"Ewe", 30005, "alv-gbe", Latn} + +m["el"] = { + "Greek", + 9129, + "grk", + {"Grek", "Brai"}, + ancestors = {"grc"}, + translit_module = "el-translit", + override_translit = true, + sort_key = { -- Keep this synchronized with grc, cpg, pnt, tsd + from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"}, + to = {"α", "ε", "η", "ι", "ο", "υ", "ω", "ρ", "σ"} + }, + standardChars = "ͺ;΄-ώϜϝ" .. PUNCTUATION +} + +m["en"] = { + "English", + 1860, + "gmw", + {"Latn", "Brai", "Shaw", "Dsrt"}, -- entries in Shaw or Dsrt might require prior discussion + ancestors = {"enm"}, + sort_key = {from = {"[äàáâåā]", "[ëèéêē]", "[ïìíîī]", "[öòóôō]", "[üùúûū]", "æ", "œ", "[çč]", "ñ", "'"}, to = {"a", "e", "i", "o", "u", "ae", "oe", "c", "n"}}, + wikimedia_codes = {"en", "simple"}, + standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF) +} + +m["eo"] = {"Esperanto", 143, "art", Latn, sort_key = {from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ĉ]", "[ĝ]", "[ĥ]", "[ĵ]", "[ŝ]", "[ŭ]"}, to = {"a", "e", "i", "o", "u", "cĉ", "gĉ", "hĉ", "jĉ", "sĉ", "uĉ"}}, standardChars = "A-PRSTUVZa-prstuvzĉĈĝĜĵĴŝŜŭŬ0-9" .. PUNCTUATION} + +m["es"] = {"Spanish", 1321, "roa-ibe", {"Latn", "Brai"}, ancestors = {"osp"}, sort_key = {from = {"á", "é", "í", "ó", "[úü]", "ç", "ñ"}, to = {"a", "e", "i", "o", "u", "c", "n"}}, standardChars = "A-VXYZa-vxyz0-9ÁáÉéÍíÓóÚúÑñ¿¡" .. PUNCTUATION} + +m["et"] = {"Estonian", 9072, "fiu-fin", Latn} + +m["eu"] = {"Basque", 8752, "euq", Latn} + +m["fa"] = { + "Persian", + 9168, + "ira-swi", + {"fa-Arab"}, + ancestors = {"pal"}, -- "ira-mid" + entry_name = {from = {u(0x064E), u(0x0640), u(0x064F), u(0x0650), u(0x0651), u(0x0652)}, to = {}} +} + +m["ff"] = {"Fula", 33454, "alv-fwo", {"Latn", "Adlm"}} + +m["fi"] = { + "Finnish", + 1412, + "fiu-fin", + Latn, + entry_name = { + from = {"ˣ"}, -- Used to indicate gemination of the next consonant + to = {} + }, + sort_key = {from = {"[áàâã]", "[éèêẽ]", "[íìîĩ]", "[óòôõ]", "[úùûũ]", "[ýỳŷüű]", "[øõő]", "æ", "œ", "[čç]", "š", "ž", "ß", "[':]"}, to = {"a", "e", "i", "o", "u", "y", "ö", "ae", "oe", "c", "s", "z", "ss"}} +} + +m["fj"] = {"Fijian", 33295, "poz-occ", Latn} + +m["fo"] = {"Faroese", 25258, "gmq", Latn, ancestors = {"non"}} + +m["fr"] = {"French", 150, "roa-oil", {"Latn", "Brai"}, ancestors = {"frm"}, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ", "œ", "'"}, to = {"a", "e", "i", "o", "u", "y", "c", "ae", "oe"}}, standardChars = "A-Za-z0-9ÀÂÇÉÈÊËÎÏÔŒÛÙÜàâçéèêëîïôœûùü«»" .. PUNCTUATION} + +m["fy"] = {"West Frisian", 27175, "gmw-fri", Latn, ancestors = {"ofs"}, sort_key = {from = {"[àáâä]", "[èéêë]", "[ìíîïyỳýŷÿ]", "[òóôö]", "[ùúûü]", "æ", "[ /.-]"}, to = {"a", "e", "i", "o", "u", "ae"}}, standardChars = "A-PR-WYZa-pr-wyz0-9Ææâäàéêëèïìôöòúûüùỳ" .. PUNCTUATION} + +m["ga"] = {"Irish", 9142, "cel-gae", Latn, ancestors = {"mga"}, sort_key = {from = {"á", "é", "í", "ó", "ú", "ý", "ḃ", "ċ", "ḋ", "ḟ", "ġ", "ṁ", "ṗ", "ṡ", "ṫ"}, to = {"a", "e", "i", "o", "u", "y", "bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}}, standardChars = "A-IL-PR-Ua-il-pr-u0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION} + +m["gd"] = {"Scottish Gaelic", 9314, "cel-gae", Latn, ancestors = {"mga"}, sort_key = {from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ýỳ]"}, to = {"a", "e", "i", "o", "u", "y"}}, standardChars = "A-IL-PR-Ua-il-pr-u0-9ÀàÈèÌìÒòÙù" .. PUNCTUATION} + +m["gl"] = {"Galician", 9307, "roa-ibe", Latn, ancestors = {"roa-opt"}, sort_key = {from = {"á", "é", "í", "ó", "ú"}, to = {"a", "e", "i", "o", "u"}}} + +m["gn"] = {"Guaraní", 35876, "tup-gua", Latn} + +m["gu"] = {"Gujarati", 5137, "inc-wes", {"Gujr"}, ancestors = {"inc-mgu"}, translit_module = "gu-translit"} + +m["gv"] = {"Manx", 12175, "cel-gae", Latn, ancestors = {"mga"}, sort_key = {from = {"ç", "-"}, to = {"c"}}, standardChars = "A-WYÇa-wyç0-9" .. PUNCTUATION} + +m["ha"] = {"Hausa", 56475, "cdc-wst", LatnArab, sort_key = {from = {"ɓ", "ɗ", "ƙ", "'y", "ƴ", "'"}, to = {"b~", "d~", "k~", "y~", "y~", ""}}, entry_name = {from = {"R̃", "r̃", "À", "à", "È", "è", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Î", "î", "Ô", "ô", "Û", "û", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú", "Ā̀", "ā̀", "Ḕ", "ḕ", "Ī̀", "ī̀", "Ṑ", "ṑ", "Ū̀", "ū̀", GRAVE, ACUTE}, to = {"R", "r", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}} + +m["he"] = {"Hebrew", 9288, "sem-can", {"Hebr", "Phnx", "Brai"}, entry_name = {from = {"[" .. u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. "]"}, to = {}}} + +m["hi"] = {"Hindi", 1568, "inc-hnd", {"Deva", "Kthi", "Newa"}, ancestors = {"inc-ohi"}, translit_module = "hi-translit", standardChars = "ँंअ-ऊएऐओ-घच-झट-नप-रलवशसहा-ूेैो-◌्।-॰ड़ढ़" .. PUNCTUATION} + +m["ho"] = {"Hiri Motu", 33617, "crp", Latn, ancestors = {"meu"}} + +m["ht"] = {"Haitian Creole", 33491, "crp", Latn, ancestors = {"fr"}} + +m["hu"] = {"Hungarian", 9067, "urj-ugr", {"Latn", "Hung"}, ancestors = {"ohu"}, sort_key = {from = {"á", "é", "í", "ó", "ú", "[öő]", "[üű]", "cs", "dzs", "gy", "ly", "ny", "zs"}, to = {"a", "e", "i", "o", "u", "o~", "u~", "c~", "dz~", "g~", "l~", "n~", "z~"}}} + +m["hy"] = {"Armenian", 8785, "hyx", {"Armn", "Brai"}, ancestors = {"axm"}, translit_module = "Armn-translit", override_translit = true, sort_key = {from = {"ու", "և", "եւ"}, to = {"ւ", "եվ", "եվ"}}, entry_name = {from = {"՞", "՜", "՛", "՟", "և", "յ", "ի", "է"}, to = {"", "", "", "", "եւ", "յ", "ի", "է"}}} + +m["hz"] = {"Herero", 33315, "bnt-swb", Latn} + +m["ia"] = {"Interlingua", 35934, "art", Latn} + +m["id"] = {"Indonesian", 9240, "poz-mly", Latn, ancestors = {"ms"}} + +m["ie"] = {"Interlingue", 35850, "art", Latn, type = "appendix-constructed"} + +m["ig"] = {"Igbo", 33578, "alv-igb", Latn, sort_key = {from = {"ụ", "ị", "ọ", "gb", "gh", "gw", "kp", "kw", "ṅ", "nw", "ny", "sh"}, to = {"u~", "i~", "o~", "gy", "gz", "g~", "kz", "k~", "ny", "nz", "n~", "s~"}}, entry_name = {remove_diacritics = ACUTE .. GRAVE .. MACRON}} + +m["ii"] = {"Sichuan Yi", 34235, "tbq-lol", {"Yiii"}, translit_module = "ii-translit"} + +m["ik"] = {"Inupiaq", 27183, "esx-inu", Latn} + +m["io"] = {"Ido", 35224, "art", Latn} + +m["is"] = {"Icelandic", 294, "gmq", Latn, ancestors = {"non"}} + +m["it"] = {"Italian", 652, "roa-itd", Latn, sort_key = {from = {"[àáâäå]", "[èéêë]", "[ìíîï]", "[òóôö]", "[ùúûü]"}, to = {"a", "e", "i", "o", "u"}}, standardChars = "A-IL-VZa-il-vz0-9" .. PUNCTUATION} + +m["iu"] = {"Inuktitut", 29921, "esx-inu", {"Cans", "Latn"}, translit_module = "translit-redirect", override_translit = true} + +m["ja"] = { + "Japanese", + 5287, + "jpx", + {"Jpan", "Brai"}, + ancestors = {"ojp"} + --[=[ + -- Handled by jsort function in [[Module:ja]]. + sort_key = { + from = {"[ぁァア]", "[ぃィイ]", "[ぅゔゥウヴ]", "[ぇェエ]", "[ぉォオ]", "[がゕカガヵ]", "[ぎキギ]", "[ぐクグㇰ]", "[げゖケゲヶ]", "[ごコゴ]", "[ざサザ]", "[じシジㇱ]", "[ずスズㇲ]", "[ぜセゼ]", "[ぞソゾ]", "[だタダ]", "[ぢチヂ]", "[っづッツヅ]", "[でテデ]", "[どトドㇳ]", "ナ", "ニ", "[ヌㇴ]", "ネ", "ノ", "[ばぱハバパㇵ]", "[びぴヒビピㇶ]", "[ぶぷフブプㇷ]", "[べぺヘベペㇸ]", "[ぼぽホボポㇹ]", "マ", "ミ", "[ムㇺ]", "メ", "モ", "[ゃャヤ]", "[ゅュユ]", "[ょョヨ]", "[ラㇻ]", "[リㇼ]", "[ルㇽ]", "[レㇾ]", "[ロㇿ]", "[ゎヮワヷ]", "[ヰヸ]", "[ヱヹ]", "[ヲヺ]", "ン", "[゙゚゛゜ゝゞ・ヽヾ]", "𛀀"}, + to = {"あ", "い", "う", "え", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "や", "ゆ", "よ", "ら", "り", "る", "れ", "ろ", "わ", "ゐ", "ゑ", "を", "ん", "", "え"}}, + --]=] +} + +m["jv"] = {"Javanese", 33549, "poz-sus", {"Latn", "Java"}, translit_module = "jv-translit", ancestors = {"kaw"}, link_tr = true} + +m["ka"] = { + "Georgian", + 8108, + "ccs-gzn", + {"Geor", "Geok", "Hebr"}, -- Hebr is used to write Judeo-Georgian + ancestors = {"oge"}, + translit_module = "Geor-translit", + override_translit = true, + entry_name = {from = {"̂"}, to = {""}} +} + +m["kg"] = {"Kongo", 33702, "bnt-kng", Latn} + +m["ki"] = {"Kikuyu", 33587, "bnt-kka", Latn} + +m["kj"] = {"Kwanyama", 1405077, "bnt-ova", Latn} + +m["kk"] = {"Kazakh", 9252, "trk-kno", {"Cyrl", "Latn", "kk-Arab"}, translit_module = "kk-translit", override_translit = true} + +m["kl"] = {"Greenlandic", 25355, "esx-inu", Latn} + +m["km"] = {"Khmer", 9205, "mkh-kmr", {"Khmr"}, ancestors = {"mkh-mkm"}, translit_module = "km-translit"} + +m["kn"] = {"Kannada", 33673, "dra", {"Knda"}, ancestors = {"dra-mkn"}, translit_module = "kn-translit"} + +m["ko"] = { + "Korean", + 9176, + "qfa-kor", + {"Kore", "Brai"}, + ancestors = {"okm"}, + -- 20210122 idea: strip parenthesized hanja from entry link + -- Hani regex is a reasonable subset of Hani from [[Module:scripts/data]], + -- last updated on 20210214. + entry_name = {from = {" *%([一-鿿㐀-䶿𠀀-𮯯𰀀-𱍏﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧﨨﨩]+%)"}, to = {""}}, + display = {from = {"%-"}, to = {}}, + translit_module = "ko-translit" +} + +m["kr"] = { + "Kanuri", + 36094, + "ssa-sah", + LatnArab, + sort_key = {from = {"ny", "ǝ", "sh"}, to = {"n~", "e~", "s~"}}, -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically + entry_name = {from = {"À", "à", "È", "è", "Ǝ̀", "ǝ̀", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Ǝ̂", "ǝ̂", "Î", "î", "Ô", "ô", "Û", "û", "Ă", "ă", "Ĕ", "ĕ", "Ǝ̆", "ǝ̆", "Ĭ", "ĭ", "Ŏ", "ŏ", "Ŭ", "ŭ", "Á", "á", "É", "é", "Ǝ́", "ǝ́", "Í", "í", "Ó", "ó", "Ú", "ú", GRAVE, ACUTE}, to = {"A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u"}} +} + +m["ks"] = {"Kashmiri", 33552, "inc-dar", {"ks-Arab", "Deva", "Shrd", "Latn"}, translit_module = "translit-redirect", ancestors = {"inc-dar-pro"}} + +-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT + +m["kw"] = {"Cornish", 25289, "cel-bry", Latn, ancestors = {"cnx"}} + +m["ky"] = {"Kyrgyz", 9255, "trk-kip", {"Cyrl", "Latn", "Arab"}, translit_module = "ky-translit", override_translit = true} + +m["la"] = {"Latin", 397, "itc", Latn, ancestors = {"itc-ola"}, entry_name = {remove_diacritics = MACRON .. BREVE .. DIAER .. DOUBLEINVBREVE}, standardChars = "A-Za-z0-9Æ挜Ā-ăĒ-ĕĪ-ĭŌ-ŏŪ-ŭȲȳ" .. MACRON .. BREVE .. PUNCTUATION} + +m["lb"] = {"Luxembourgish", 9051, "gmw", Latn, ancestors = {"gmh"}} + +m["lg"] = {"Luganda", 33368, "bnt-nyg", Latn, entry_name = {from = {"á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ", "â", "Â", "ê", "Ê", "î", "Î", "ô", "Ô", "û", "Û"}, to = {"a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U"}}, sort_key = {from = {"ŋ"}, to = {"n"}}} + +m["li"] = {"Limburgish", 102172, "gmw", Latn, ancestors = {"dum"}} + +m["ln"] = {"Lingala", 36217, "bnt-bmo", Latn} + +m["lo"] = {"Lao", 9211, "tai-swe", {"Laoo"}, translit_module = "lo-translit", sort_key = {from = {"[%pໆ]", "[່-ໍ]", "ຼ", "ຽ", "ໜ", "ໝ", "([ເແໂໃໄ])([ກ-ຮ])"}, to = {"", "", "ລ", "ຍ", "ຫນ", "ຫມ", "%2%1"}}, standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. PUNCTUATION} + +m["lt"] = {"Lithuanian", 9083, "bat", Latn, ancestors = {"olt"}, entry_name = {from = {"[áãà]", "[ÁÃÀ]", "[éẽè]", "[ÉẼÈ]", "[íĩì]", "[ÍĨÌ]", "[ýỹ]", "[ÝỸ]", "ñ", "[óõò]", "[ÓÕÒ]", "[úũù]", "[ÚŨÙ]", ACUTE, GRAVE, TILDE}, to = {"a", "A", "e", "E", "i", "I", "y", "Y", "n", "o", "O", "u", "U"}}} + +m["lu"] = {"Luba-Katanga", 36157, "bnt-lub", Latn} + +m["lv"] = { + "Latvian", + 9078, + "bat", + Latn, + entry_name = { + -- This attempts to convert vowels with tone marks to vowels either with + -- or without macrons. Specifically, there should be no macrons if the + -- vowel is part of a diphthong (including resonant diphthongs such + -- pìrksts -> pirksts not #pīrksts). What we do is first convert the + -- vowel + tone mark to a vowel + tilde in a decomposed fashion, + -- then remove the tilde in diphthongs, then convert the remaining + -- vowel + tilde sequences to macroned vowels, then delete any other + -- tilde. We leave already-macroned vowels alone: Both e.g. ar and ār + -- occur before consonants. FIXME: This still might not be sufficient. + from = {"Ȩ", "ȩ", "[ÂÃÀ]", "[âãà]", "[ÊẼÈ]", "[êẽè]", "[ÎĨÌ]", "[îĩì]", "[ÔÕÒ]", "[ôõò]", "[ÛŨÙ]", "[ûũù]", "[ÑǸ]", "[ñǹ]", "[" .. CIRC .. TILDE .. GRAVE .. "]", "([aAeEiIoOuU])" .. TILDE .. "?([lrnmuiLRNMUI])" .. TILDE .. "?([^aAeEiIoOuUāĀēĒīĪūŪ])", "([aAeEiIoOuU])" .. TILDE .. "?([lrnmuiLRNMUI])" .. TILDE .. "?$", "([iI])" .. TILDE .. "?([eE])" .. TILDE .. "?", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "U" .. TILDE, "u" .. TILDE, TILDE}, + to = {"E", "e", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "O", "o", "U" .. TILDE, "u" .. TILDE, "N", "n", TILDE, "%1%2%3", "%1%2", "%1%2", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ū", "ū", ""} + } +} + +m["mg"] = {"Malagasy", 7930, "poz-bre", Latn} + +m["mh"] = {"Marshallese", 36280, "poz-mic", Latn, sort_key = {from = {"ā", "ļ", "m̧", "ņ", "n̄", "o̧", "ō", "ū"}, to = {"a~", "l~", "m~", "n~", "n~~", "o~", "o~~", "u~"}}} + +m["mi"] = {"Maori", 36451, "poz-pep", Latn} + +m["mk"] = {"Macedonian", 9296, "zls", Cyrl, translit_module = "mk-translit", entry_name = {from = {ACUTE}, to = {}}} + +m["ml"] = {"Malayalam", 36236, "dra", {"Mlym"}, translit_module = "ml-translit", override_translit = true} + +m["mn"] = { + "Mongolian", + 9246, + "xgn", + {"Cyrl", "Mong", "Soyo", "Zanb"}, -- entries in Soyo or Zanb might require prior discussion + ancestors = {"cmg"}, + translit_module = "mn-translit", + override_translit = true +} + +-- "mo" IS TREATED AS "ro", SEE WT:LT + +m["mr"] = {"Marathi", 1571, "inc-sou", {"Deva", "Modi"}, ancestors = {"omr"}, translit_module = "mr-translit"} + +m["ms"] = {"Malay", 9237, "poz-mly", {"Latn", "ms-Arab"}} + +m["mt"] = {"Maltese", 9166, "sem-arb", Latn, ancestors = {"sqr"}, sort_key = {from = {"ċ", "ġ", "ħ"}, to = {"c", "g", "h"}}} + +m["my"] = {"Burmese", 9228, "tbq-brm", {"Mymr"}, ancestors = {"obr"}, translit_module = "my-translit", override_translit = true, sort_key = {from = {"ျ", "ြ", "ွ", "ှ", "ဿ"}, to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "သ္သ"}}} + +m["na"] = {"Nauruan", 13307, "poz-mic", Latn} + +m["nb"] = {"Norwegian Bokmål", 25167, "gmq", Latn, ancestors = {"gmq-mno"}, wikimedia_codes = {"no"}} + +m["nd"] = {"Northern Ndebele", 35613, "bnt-ngu", Latn, entry_name = {from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a", "e", "i", "o", "u", "m", "n"}}} + +m["ne"] = {"Nepali", 33823, "inc-pah", {"Deva", "Newa"}, translit_module = "ne-translit"} + +m["ng"] = {"Ndonga", 33900, "bnt-ova", Latn} + +m["nl"] = {"Dutch", 7411, "gmw", Latn, ancestors = {"dum"}, sort_key = {from = {"[äáâå]", "[ëéê]", "[ïíî]", "[öóô]", "[üúû]", "ç", "ñ", "^-"}, to = {"a", "e", "i", "o", "u", "c", "n"}}, standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF)} + +m["nn"] = {"Norwegian Nynorsk", 25164, "gmq", Latn, ancestors = {"gmq-mno"}} + +m["no"] = {"Norwegian", 9043, "gmq", Latn, ancestors = {"gmq-mno"}} + +m["nr"] = {"Southern Ndebele", 36785, "bnt-ngu", Latn, entry_name = {from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a", "e", "i", "o", "u", "m", "n"}}} + +m["nv"] = { + "Navajo", + 13310, + "apa", + Latn, + sort_key = {from = {"[áą]", "[éę]", "[íį]", "[óǫ]", "ń", "^n([djlt])", "ł", "[ʼ’']", ACUTE}, to = {"a", "e", "i", "o", "n", "ni%1", "l~"}} -- the tilde is used to guarantee that ł will always be sorted after all other words with l +} + +m["ny"] = {"Chichewa", 33273, "bnt-nys", Latn, entry_name = {from = {"ŵ", "Ŵ", "á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ"}, to = {"w", "W", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M"}}, sort_key = {from = {"ng'"}, to = {"ng"}}} + +m["oc"] = {"Occitan", 14185, "roa", {"Latn", "Hebr"}, ancestors = {"pro"}, sort_key = {from = {"[àá]", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "([lns])·h"}, to = {"a", "e", "i", "o", "u", "c", "%1h"}}} + +m["oj"] = {"Ojibwe", 33875, "alg", {"Cans", "Latn"}, sort_key = {from = {"aa", "ʼ", "ii", "oo", "sh", "zh"}, to = {"a~", "h~", "i~", "o~", "s~", "z~"}}} + +m["om"] = {"Oromo", 33864, "cus", {"Latn", "Ethi"}} + +m["or"] = {"Oriya", 33810, "inc-eas", {"Orya"}, ancestors = {"inc-mor"}, translit_module = "or-translit"} + +m["os"] = {"Ossetian", 33968, "xsc", {"Cyrl", "Geor", "Latn"}, ancestors = {"oos"}, translit_module = "os-translit", override_translit = true, entry_name = {from = {GRAVE, ACUTE}, to = {}}} + +m["pa"] = {"Punjabi", 58635, "inc-pan", {"Guru", "pa-Arab"}, ancestors = {"inc-opa"}, translit_module = "translit-redirect", entry_name = {from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652)}, to = {}}} + +m["pi"] = {"Pali", 36727, "inc-mid", {"Latn", "Brah", "Deva", "Beng", "Sinh", "Mymr", "Thai", "Lana", "Laoo", "Khmr"}, ancestors = {"sa"}, translit_module = "translit-redirect", sort_key = {from = {"ā", "ī", "ū", "ḍ", "ḷ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṭ", "([เโ])([ก-ฮ])", "([ເໂ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)}, to = {"a~", "i~", "u~", "d~", "l~", "m~", "n~", "n~~", "n~~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}}, entry_name = {from = {u(0xFE00)}, to = {}}} + +m["pl"] = {"Polish", 809, "zlw-lch", Latn, ancestors = {"zlw-opl"}, sort_key = {from = {"[Ąą]", "[Ćć]", "[Ęę]", "[Łł]", "[Ńń]", "[Óó]", "[Śś]", "[Żż]", "[Źź]"}, to = {"a" .. u(0x10FFFF), "c" .. u(0x10FFFF), "e" .. u(0x10FFFF), "l" .. u(0x10FFFF), "n" .. u(0x10FFFF), "o" .. u(0x10FFFF), "s" .. u(0x10FFFF), "z" .. u(0x10FFFF), "z" .. u(0x10FFFE)}}} + +m["ps"] = {"Pashto", 58680, "ira-pat", {"ps-Arab"}, ancestors = {"ira-pat-pro"}} + +m["pt"] = {"Portuguese", 5146, "roa-ibe", {"Latn", "Brai"}, ancestors = {"roa-opt"}, sort_key = {from = {"[àãáâä]", "[èẽéêë]", "[ìĩíï]", "[òóôõö]", "[üúùũ]", "ç", "ñ"}, to = {"a", "e", "i", "o", "u", "c", "n"}}} + +m["qu"] = {"Quechua", 5218, "qwe", Latn} + +m["rm"] = {"Romansch", 13199, "roa-rhe", Latn} + +m["ro"] = {"Romanian", 7913, "roa-eas", {"Latn", "Cyrl"}, sort_key = {from = {"ă", "â", "î", "ș", "ț"}, to = {"a~", "a~~", "i~", "s~", "t~"}}} + +m["ru"] = {"Russian", 7737, "zle", {"Cyrl", "Brai"}, translit_module = "ru-translit", sort_key = {from = {"ё"}, to = {"е" .. mw.ustring.char(0x10FFFF)}}, entry_name = {from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE, DIAER}, to = {"Е", "е", "И", "и"}}, standardChars = "ЁА-яё0-9—" .. PUNCTUATION} + +m["rw"] = {"Rwanda-Rundi", 3217514, "bnt-glb", Latn, entry_name = {from = {"[áāâǎā́]", "[éēêěḗ]", "[íīîǐī́]", "[óōôǒṓ]", "[úūûǔū́]"}, to = {"a", "e", "i", "o", "u"}}} + +m["sa"] = {"Sanskrit", 11059, "inc-old", {"Deva", "Bali", "as-Beng", "Beng", "Bhks", "Brah", "Gran", "Gujr", "Guru", "Java", "Khar", "Khmr", "Knda", "Lana", "Laoo", "Mlym", "Modi", "Mymr", "Newa", "Orya", "Saur", "Shrd", "Sidd", "Sinh", "Taml", "Telu", "Thai", "Tibt", "Tirh"}, sort_key = {from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)}, to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}}, entry_name = {from = {u(0xFE00)}, to = {}}, translit_module = "translit-redirect"} + +m["sc"] = {"Sardinian", 33976, "roa", Latn} + +m["sd"] = {"Sindhi", 33997, "inc-snd", {"sd-Arab", "Deva", "Sind", "Khoj"}, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}, ancestors = {"inc-vra"}} + +m["se"] = {"Northern Sami", 33947, "smi", Latn, entry_name = {from = {"ạ", "[ēẹ]", "ī", "[ōọ]", "ū", "ˈ"}, to = {"a", "e", "i", "o", "u"}}, sort_key = {from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"}, to = {"a²", "c²", "d²", "n²", "s²", "t²", "z²"}}, standardChars = "A-PR-VZa-pr-vz0-9ÁáČčĐđŊŋŠšŦŧŽž" .. PUNCTUATION} + +m["sg"] = {"Sango", 33954, "crp", Latn, ancestors = {"ngb"}} + +m["sh"] = {"Serbo-Croatian", 9301, "zls", {"Latn", "Cyrl", "Glag"}, entry_name = {from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "R", "r", "U", "u", "Е", "е", "И", "и", "У", "у"}}, wikimedia_codes = {"sh", "bs", "hr", "sr"}} + +m["si"] = {"Sinhalese", 13267, "inc-ins", {"Sinh"}, ancestors = {"elu-prk"}, translit_module = "si-translit", override_translit = true} + +m["sk"] = {"Slovak", 9058, "zlw", Latn, sort_key = {from = {"[áä]", "é", "í", "[óô]", "ú", "ý", "ŕ", "ĺ", "[" .. DIAER .. ACUTE .. CIRC .. "]"}, to = {"a", "e", "i", "o", "u", "y", "r", "l", ""}}} + +m["sl"] = {"Slovene", 9063, "zls", Latn, entry_name = {from = {"[ÁÀÂĀȂȀ]", "[áàâāȃȁ]", "[ÉÈÊĒȆȄỆẸ]", "[éèêēȇȅệẹə]", "[ÍÌÎĪȊȈ]", "[íìîīȋȉ]", "[ÓÒÔŌȎȌỘỌ]", "[óòôōȏȍộọ]", "[ŔȒȐ]", "[ŕȓȑ]", "[ÚÙÛŪȖȔ]", "[úùûūȗȕ]", "ł", GRAVE, ACUTE, CIRC, MACRON, DGRAVE, INVBREVE, DOTBELOW}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "R", "r", "U", "u", "l"}}, sort_key = {from = {"č", "š", "ž"}, to = {"c²", "s²", "z²"}}} + +m["sm"] = {"Samoan", 34011, "poz-pnp", Latn} + +m["sn"] = {"Shona", 34004, "bnt-sho", Latn, entry_name = {remove_diacritics = ACUTE}} + +m["so"] = {"Somali", 13275, "cus", {"Latn", "Arab", "Osma"}, entry_name = {from = {"[ÁÀÂ]", "[áàâ]", "[ÉÈÊ]", "[éèê]", "[ÍÌÎ]", "[íìî]", "[ÓÒÔ]", "[óòô]", "[ÚÙÛ]", "[úùû]", "[ÝỲ]", "[ýỳ]"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "Y", "y"}}} + +m["sq"] = {"Albanian", 8748, "sqj", {"Latn", "Grek", "Elba"}, entry_name = {remove_diacritics = ACUTE}, sort_key = {from = {"[âãä]", "[ÂÃÄ]", "[êẽë]", "[ÊẼË]", "ĩ", "Ĩ", "õ", "Õ", "ũ", "Ũ", "ỹ", "Ỹ", "ç", "Ç"}, to = {"a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "y", "Y", "c", "C"}}} + +m["ss"] = {"Swazi", 34014, "bnt-ngu", Latn, entry_name = {from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a", "e", "i", "o", "u", "m", "n"}}} + +m["st"] = {"Sotho", 34340, "bnt-sts", Latn, entry_name = {from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a", "e", "i", "o", "u", "m", "n"}}} + +m["su"] = {"Sundanese", 34002, "poz-msa", {"Latn", "Sund"}, translit_module = "su-translit"} + +m["sv"] = {"Swedish", 9027, "gmq", Latn, ancestors = {"gmq-osw"}} + +m["sw"] = {"Swahili", 7838, "bnt-swh", LatnArab, sort_key = {from = {"ng'", "^-"}, to = {"ngz"}}} + +m["ta"] = {"Tamil", 5885, "dra", {"Taml"}, ancestors = {"oty"}, translit_module = "ta-translit", override_translit = true} + +m["te"] = {"Telugu", 8097, "dra", {"Telu"}, translit_module = "te-translit", override_translit = true} + +m["tg"] = { + "Tajik", + 9260, + "ira-swi", + {"Cyrl", "fa-Arab", "Latn"}, + ancestors = {"pal"}, -- same as "fa", see WT:T:AFA + translit_module = "tg-translit", + override_translit = true, + sort_key = {from = {"Ё", "ё"}, to = {"Е", "е"}}, + entry_name = {from = {ACUTE}, to = {}} +} + +m["th"] = {"Thai", 9217, "tai-swe", {"Thai", "Brai"}, translit_module = "th-translit", sort_key = {from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["ti"] = {"Tigrinya", 34124, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit"} + +m["tk"] = {"Turkmen", 9267, "trk-ogz", {"Latn", "Cyrl", "Arab"}, entry_name = {from = {"ā", "ē", "ī", "ō", "ū", "ȳ", "ȫ", "ǖ", MACRON}, to = {"a", "e", "i", "o", "u", "y", "ö", "ü", ""}}, ancestors = {"trk-ogz-pro"}} + +m["tl"] = {"Tagalog", 34057, "phi", {"Latn", "Tglg"}, entry_name = {from = {"[áàâ]", "[éèê]", "[íìî]", "[óòô]", "[úùû]", ACUTE, GRAVE, CIRC}, to = {"a", "e", "i", "o", "u"}}} + +m["tn"] = {"Tswana", 34137, "bnt-sts", Latn} + +m["to"] = {"Tongan", 34094, "poz-pol", Latn, sort_key = {from = {"ā", "ē", "ī", "ō", "ū", MACRON}, to = {"a", "e", "i", "o", "u", ""}}, entry_name = {from = {"á", "é", "í", "ó", "ú", ACUTE}, to = {"a", "e", "i", "o", "u", ""}}} + +m["tr"] = {"Turkish", 256, "trk-ogz", Latn, ancestors = {"ota"}} + +m["ts"] = {"Tsonga", 34327, "bnt-tsr", Latn} + +m["tt"] = {"Tatar", 25285, "trk-kbu", {"Cyrl", "Latn", "tt-Arab"}, translit_module = "tt-translit", override_translit = true} + +-- "tw" IS TREATED AS "ak", SEE WT:LT + +m["ty"] = {"Tahitian", 34128, "poz-pep", Latn} + +m["ug"] = {"Uyghur", 13263, "trk-kar", {"ug-Arab", "Latn", "Cyrl"}, ancestors = {"chg"}, translit_module = "ug-translit", override_translit = true} + +m["uk"] = {"Ukrainian", 8798, "zle", Cyrl, ancestors = {"orv"}, translit_module = "uk-translit", entry_name = {from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE}, to = {"Е", "е", "И", "и"}}, standardChars = "ЄІЇА-ЩЬЮ-щьюяєії" .. PUNCTUATION} +m["ur"] = {"Urdu", 1617, "inc-hnd", {"ur-Arab"}, ancestors = {"inc-ohi"}, entry_name = {from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658)}, to = {}}} + +m["uz"] = {"Uzbek", 9264, "trk-kar", {"Latn", "Cyrl", "fa-Arab"}, ancestors = {"chg"}} + +m["ve"] = {"Venda", 32704, "bnt-bso", Latn} + +m["vi"] = {"Vietnamese", 9199, "mkh-vie", {"Latn", "Hani"}, ancestors = {"mkh-mvi"}, sort_key = "vi-sortkey"} + +m["vo"] = {"Volapük", 36986, "art", Latn} + +m["wa"] = {"Walloon", 34219, "roa-oil", Latn, ancestors = {"fro"}, sort_key = {from = {"[áàâäå]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"}, to = {"a", "e", "i", "o", "u", "y", "c"}}} + +m["wo"] = {"Wolof", 34257, "alv-fwo", LatnArab} + +m["xh"] = {"Xhosa", 13218, "bnt-ngu", Latn, entry_name = {from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a", "e", "i", "o", "u", "m", "n"}}} + +m["yi"] = {"Yiddish", 8641, "gmw", {"Hebr"}, ancestors = {"gmh"}, sort_key = {from = {"[אַאָ]", "בּ", "[וֹוּ]", "יִ", "ײַ", "פֿ"}, to = {"א", "ב", "ו", "י", "יי", "פ"}}, translit_module = "yi-translit"} + +m["yo"] = {"Yoruba", 34311, "alv-yor", Latn, sort_key = {from = {"ẹ", "ọ", "gb", "ṣ"}, to = {"e~", "o~", "g~", "s~"}}, entry_name = {remove_diacritics = ACUTE .. GRAVE .. MACRON}} + +m["za"] = {"Zhuang", 13216, "tai", {"Latn", "Hani"}, sort_key = {from = {"%p"}, to = {""}}} + +m["zh"] = {"Chinese", 7850, "zhx", {"Hani", "Brai", "Nshu"}, ancestors = {"ltc"}, sort_key = "zh-sortkey"} + +m["zu"] = {"Zulu", 10179, "bnt-ngu", Latn, entry_name = {from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a", "e", "i", "o", "u", "m", "n"}}} + +return m diff --git a/wiktra/wikt/translit/languages/data3/a.lua b/wiktra/wikt/translit/languages/data3/a.lua new file mode 100644 index 0000000..1de61ed --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/a.lua @@ -0,0 +1,1073 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +-- Punctuation to be used for standardChars field +local PUNCTUATION = " !#%&*+,-./:;<=>?@^_`|~'()" + +-- Use these in "scripts" to save a little memory. +local Arab = {"Arab"} +local Cyrl = {"Cyrl"} +local Deva = {"Deva"} +local Latn = {"Latn"} + +local m = {} + +m["aaa"] = {"Ghotuo", 35463, "alv-yek", Latn} + +m["aab"] = {"Alumu-Tesu", 35034, "nic-alu", Latn} + +m["aac"] = {"Ari", 1811224, "paa-pag", Latn} + +m["aad"] = {"Amal", 56708, "paa-iwm", Latn} + +-- "aae" IS TREATED AS "sq", SEE WT:LT + +m["aaf"] = {"Aranadan", 3507928, "dra", {"Mlym"}} + +m["aag"] = {"Ambrak", 4741706, "qfa-tor", Latn} + +m["aah"] = {"Abu' Arapesh", 4670715, "qfa-tor", Latn} + +m["aai"] = {"Arifama-Miniafia", 4790560, "poz-ocw", Latn} + +m["aak"] = {"Ankave", 3446690, "ngf", Latn} + +m["aal"] = {"Afade", 56434, "cdc-cbm", Latn} + +m["aan"] = {"Anambé", 3507873, "tup-gua", Latn} + +m["aap"] = {"Pará Arára", 56807, "sai-car", Latn} + +m["aaq"] = {"Penobscot", 3515185, "alg-abp", Latn} + +m["aas"] = {"Aasax", 56620, "cus", Latn} + +-- "aat" IS TREATED AS "sq", SEE WT:LT + +m["aau"] = {"Abau", 3073568, "paa-spk", Latn} + +m["aaw"] = {"Solong", 7558834, "poz-ocw", Latn} + +m["aax"] = {"Mandobo Atas", 12636156, "ngf", Latn} + +m["aaz"] = {"Amarasi", 4740192, "poz-tim", Latn} + +m["aba"] = {"Abé", 34833, "alv-lag", Latn} + +m["abb"] = {"Bankon", 34860, "bnt-bsa", Latn} + +m["abc"] = {"Ambala Ayta", 3448896, "phi", Latn} + +m["abd"] = {"Camarines Norte Agta", 3399682, "phi", Latn} + +m["abe"] = {"Abenaki", 17502788, "alg-abp", Latn} + +m["abf"] = {"Abai Sungai", 4663287, "poz-san", Latn} + +m["abg"] = {"Abaga", 3507954, "paa-kag", Latn} + +m["abh"] = {"Tajiki Arabic", 56833, "sem-arb", Arab} + +m["abi"] = {"Abidji", 34781, "alv-lag", Latn} + +m["abj"] = {"Aka-Bea", 2356391, "qfa-ads", Latn} + +m["abl"] = {"Abung", 49215, "poz-lgx", Latn} + +m["abm"] = {"Abanyom", 7502, "nic-eko", Latn} + +m["abn"] = {"Abua", 34835, "nic-cde", Latn} + +m["abo"] = {"Abon", 35121, "nic-tvn", Latn} + +m["abp"] = {"Abenlen Ayta", 3436621, "phi", Latn} + +m["abq"] = {"Abaza", 27567, "cau-abz", Cyrl, translit_module = "abq-translit", override_translit = true} + +m["abr"] = {"Abron", 34831, "alv-ctn", Latn, ancestors = {"ak"}} + +m["abs"] = {"Ambonese Malay", 3124354, "crp", Latn, ancestors = {"ms"}} + +m["abt"] = {"Ambulas", 3508015, "paa-spk", Latn} + +m["abu"] = {"Abure", 34767, "alv-ptn", Latn} + +m["abv"] = {"Baharna Arabic", 56576, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["abw"] = {"Pal", 7126121, "ngf-mad", Latn} + +m["abx"] = {"Inabaknon", 2820163, "poz-sbj", Latn} + +m["aby"] = {"Aneme Wake", 3508107, "ngf", Latn} + +m["abz"] = {"Abui", 2822110, "qfa-tap", Latn} + +m["aca"] = {"Achagua", 2822982, "awd", Latn} + +m["acb"] = {"Áncá", 11130787, "nic-mom", Latn} + +m["acd"] = {"Gikyode", 35256, "alv-gng", Latn} + +m["ace"] = { + "Acehnese", + 27683, + "cmc", + {"Latn", "ms-Arab"}, + standardChars = "A-Za-z0-9ÈÉËÔÖèéëôö" .. PUNCTUATION -- current orthography (not yet add Arab) +} + +m["ach"] = {"Acholi", 34926, "sdv-los", Latn} + +m["aci"] = {"Aka-Cari", 2670418, "qfa-adn", Latn} + +m["ack"] = {"Aka-Kora", 3433680, "qfa-adn", Latn} + +m["acl"] = {"Akar-Bale", 3436825, "qfa-ads", Latn} + +m["acm"] = {"Iraqi Arabic", 56232, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["acn"] = {"Achang", 56582, "tbq-brm", Latn} + +m["acp"] = {"Eastern Acipa", 5329945, "nic-kmk", Latn} + +m["acr"] = {"Achi", 34774, "myn", Latn} + +m["acs"] = {"Acroá", 2829146, "sai-cje", Latn} + +m["acu"] = {"Achuar", 2823170, "sai-jiv", Latn} + +m["acv"] = {"Achumawi", 56661, "nai-pal", Latn} + +m["acw"] = {"Hijazi Arabic", 56608, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["acx"] = {"Omani Arabic", 56630, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["acy"] = {"Cypriot Arabic", 56416, "sem-arb", Arab, ancestors = {"acm"}} + +m["acz"] = {"Acheron", 34769, "alv-tal", Latn} + +m["ada"] = {"Adangme", 35141, "alv-gda", Latn} + +m["adb"] = { -- rename or remove, see RFM + "Adabe", 36872, nil, Latn +} + +m["add"] = {"Dzodinka", 35266, "nic-nka", Latn} + +m["ade"] = {"Adele", 27740, "alv-ntg", Latn} + +m["adf"] = {"Dhofari Arabic", 56565, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["adg"] = {"Andegerebinha", 3508123, "aus-pam", Latn} + +m["adh"] = {"Adhola", 1971400, "sdv-los", Latn} + +m["adi"] = {"Adi", 56440, "sit-tan", Latn} + +m["adj"] = {"Adioukrou", 34738, "alv-lag", Latn} + +m["adl"] = {"Galo", 2857892, "sit-tan", Latn} + +m["adn"] = {"Adang", 3398276, "qfa-tap", Latn} + +m["ado"] = {"Abu", 56659, "paa-ram", Latn} + +m["adp"] = { + "Adap", + 3512402, + "sit-tib", + {"Tibt"}, + ancestors = {"dz"}, + wikipedia_article = "Dzongkha" -- Considered a dialect of Dzongkha +} + +m["adq"] = {"Adangbe", 34730, "alv-gda", Latn, ancestors = {"ada"}} + +m["adr"] = {"Adonara", 4684505, "poz-cet", Latn} + +m["ads"] = { + "Adamorobe Sign Language", 27709, "sgn", Latn -- when documented +} + +m["adt"] = {"Adnyamathanha", 2225391, "aus-psw", Latn} + +m["adu"] = {"Aduge", 34734, "alv-nwd", Latn, ancestors = {"opa"}, wikipedia_article = "Okpamheri language"} + +m["adw"] = {"Amondawa", 12626847, "tup-gua", Latn} + +m["ady"] = {"Adyghe", 27776, "cau-cir", Cyrl, translit_module = "ady-translit", override_translit = true} + +m["adz"] = {"Adzera", nil, "poz-ocw", Latn} + +m["aea"] = {"Areba", 3509129, "aus-pam", Latn} + +m["aeb"] = {"Tunisian Arabic", 56240, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["aed"] = { + "Argentine Sign Language", 3322073, "sgn", Latn -- when documented +} + +m["aee"] = {"Northeast Pashayi", 12642198, "inc-dar", Latn} + +m["aek"] = {"Haeke", 5638166, "poz-cln", Latn} + +m["ael"] = {"Ambele", 34818, "nic-grf", Latn} + +m["aem"] = {"Arem", 3507920, "mkh-vie", Latn} + +m["aen"] = {"Armenian Sign Language", 3446604, "sgn"} + +m["aeq"] = {"Aer", 3246741, "inc-wes", Arab, ancestors = {"inc-gup"}} + +m["aer"] = {"Eastern Arrernte", 10728232, "aus-pam", Latn} + +m["aes"] = {"Alsea", 2395641, nil, Latn} + +m["aeu"] = {"Akeu", 4700657, "tbq-lol", Latn} + +m["aew"] = {"Ambakich", 56642, "paa-ram", Latn} + +m["aey"] = {"Amele", 3508025, "ngf-mad", Latn} + +m["aez"] = { + "Aeka", + 16110528, + "ngf", + Latn, + wikipedia_article = "Orokaiva language" -- subvariety? +} + +m["afb"] = {"Gulf Arabic", 56385, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["afd"] = {"Andai", 4753480, "paa-arf", Latn} + +m["afe"] = {"Putukwam", 3914930, "nic-ben", Latn} + +m["afg"] = {"Afghan Sign Language", 4689093, "sgn"} + +m["afh"] = {"Afrihili", 384707, "art", Latn, type = "appendix-constructed"} + +m["afi"] = {"Akrukay", 57003, "paa-ram", Latn} + +m["afk"] = {"Nanubae", 6964416, "paa-arf", Latn} + +m["afn"] = {"Defaka", 35174, "nic", Latn} + +m["afo"] = {"Eloyi", 3914066, "nic-plt", Latn} + +m["afp"] = {"Tapei", 16887371, "paa-arf", Latn} + +m["afs"] = {"Afro-Seminole Creole", 27867, "crp", Latn, ancestors = {"en"}} + +m["aft"] = {"Afitti", 3400829, "sdv-nyi", Latn} + +m["afu"] = {"Awutu", 34847, "alv-gng", Latn} + +m["afz"] = {"Obokuitai", 7075258, "paa-lkp", Latn} + +m["aga"] = {"Aguano", 3331203, nil, Latn} + +m["agb"] = {"Legbo", 35584, "nic-uce", Latn} + +m["agc"] = {"Agatu", 34732, "alv-ido", Latn} + +m["agd"] = {"Agarabi", 3399642, "paa-kag", Latn} + +m["age"] = {"Angal", 10951553, "paa-eng", Latn} + +m["agf"] = {"Arguni", 12473346, "poz-cet", Latn} + +m["agg"] = {"Angor", 3508100, "paa", Latn} + +m["agh"] = {"Ngelima", 7022266, "bnt-bta", Latn} + +m["agi"] = {"Agariya", 663586, "mun", Deva} + +m["agj"] = {"Argobba", 29292, "sem-eth", {"Ethi"}} + +m["agk"] = {"Isarog Agta", 6078982, "phi", Latn} + +m["agl"] = {"Fembe", 372927, "ngf", Latn} + +m["agm"] = {"Angaataha", 3508001, "ngf", Latn} + +m["agn"] = {"Agutaynen", 3399717, "phi-kal", Latn} + +m["ago"] = {"Tainae", 7676186, "ngf", Latn} + +m["agq"] = {"Aghem", 34737, "nic-rnw", Latn} + +m["agr"] = {"Aguaruna", 1526530, "sai-jiv", Latn} + +m["ags"] = {"Esimbi", 35260, "nic-bds", Latn} + +m["agt"] = {"Central Cagayan Agta", 5017296, "phi", Latn} + +m["agu"] = {"Aguacateca", 35091, "myn", Latn} + +m["agv"] = {"Remontado Agta", 3508085, "phi", Latn} + +m["agw"] = {"Kahua", 3191906, "poz-sls", Latn} + +m["agx"] = {"Aghul", 36498, "cau-lzg", Cyrl} + +m["agy"] = {"Southern Alta", 7569611, "phi", Latn} + +m["agz"] = {"Mount Iriga Agta", 6921432, "phi", Latn} + +m["aha"] = {"Ahanta", 34729, "alv-ctn", Latn} + +m["ahb"] = {"Axamb", 2874710, "poz-vnc", Latn} + +m["ahg"] = {"Qimant", 35663, "cus", Latn} + +m["ahh"] = {"Aghu", 3436645, "ngf", Latn} + +m["ahi"] = {"Tiagbamrin Aizi", 3400073, "kro-aiz", Latn} + +m["ahk"] = {"Akha", 56643, "tbq-lol", {"Latn", "Mymr", "Thai"}, sort_key = {from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["ahl"] = {"Igo", 35412, "alv-ktg", Latn} + +m["ahm"] = {"Mobumrin Aizi", 35967, "kro-aiz", Latn} + +m["ahn"] = {"Àhàn", 34723, "alv-aah", Latn} + +m["aho"] = {"Ahom", 34778, "tai-swe", {"Ahom"}, translit_module = "Ahom-translit"} + +m["ahp"] = {"Aproumu Aizi", 34810, "alv-kwa", Latn} + +m["ahr"] = {"Ahirani", 15549890, "inc-wes", Deva, ancestors = {"psu"}} + +m["ahs"] = {"Ashe", 34823, "nic-plc", Latn} + +m["aht"] = {"Ahtna", 21058, "ath-nor", Latn} + +m["aia"] = {"Arosi", 2863483, "poz-sls", Latn} + +m["aib"] = {"Aynu", 27927, "trk-kar", {"Arab", "Latn"}} + +m["aic"] = {"Ainbai", 3332149, "paa-brd", Latn} + +m["aid"] = {"Alngith", 3279409, "aus-pmn", Latn} + +m["aie"] = {"Amara", 2841180, "poz-ocw", Latn} + +m["aif"] = {"Agi", 3331491, "qfa-tor", Latn} + +m["aig"] = {"Antigua and Barbuda Creole English", 3244184, "crp", Latn, ancestors = {"en"}} + +m["aih"] = {"Ai-Cham", 2827749, "qfa-kms", {"Latn", "Hani"}} + +m["aii"] = {"Assyrian Neo-Aramaic", 29440, "sem-nna", {"Syrc"}, entry_name = {from = {"[" .. u(0x0304) .. u(0x0308) .. u(0x0331) .. u(0x0730) .. "-" .. u(0x0748) .. "]"}, to = {}}} + +m["aij"] = {"Lishanid Noshan", 3436467, "sem-nna", {"Hebr"}} + +m["aik"] = {"Ake", 34808, "nic-pls", Latn} + +m["ail"] = {"Aimele", 3327418, "ngf", Latn} + +m["aim"] = {"Aimol", 4697175, "tbq-kuk", {"Latn", "Beng"}} + +m["ain"] = {"Ainu", 27969, "qfa-iso", {"Kana", "Latn", "Cyrl"}} + +m["aio"] = { + "Aiton", + 3399725, + "tai-swe", + {"Mymr"}, + entry_name = { + from = {u(0xFE00)}, -- VS01 + to = {""} + } +} + +m["aip"] = {"Burumakok", 5000984, "ngf-okk", Latn} + +m["air"] = {"Airoran", 3321131, "paa-tkw", Latn} + +m["ait"] = {"Arikem", 3446679, "tup", Latn} + +m["aiw"] = {"Aari", 7495, "omv-aro", Latn} + +m["aix"] = {"Aighon", 3504287, "poz-ocw", Latn} + +m["aiy"] = {"Ali", 34814, "alv-gbf", Latn} + +m["aja"] = {"Aja", 3237491, "csu-bkr", Latn} + +m["ajg"] = {"Adja", 35035, "alv-gbe", Latn} + +m["aji"] = {"Ajië", 2828867, "poz-cln", Latn} + +m["ajn"] = {"Andajin", 16111302, "aus-wor", Latn} + +m["ajp"] = {"South Levantine Arabic", nil, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["ajt"] = {"Judeo-Tunisian Arabic", 56597, "sem-arb", {"Hebr"}, ancestors = {"jrb"}} + +m["aju"] = {"Judeo-Moroccan Arabic", 56595, "sem-arb", {"Hebr"}, ancestors = {"jrb"}} + +m["ajw"] = {"Ajawa", 56645, "cdc-wst", Latn} + +m["ajz"] = {"Amri Karbi", 3508092, "tbq-kuk", Latn, ancestors = {"mjw"}} + +m["akb"] = {"Angkola Batak", 2640686, "btk", {"Latn", "Batk"}} + +m["akc"] = {"Mpur", 3327139, "paa-wpa", Latn} + +m["akd"] = {"Ukpet-Ehom", 36618, "nic-ucr", Latn} + +m["ake"] = {"Akawaio", 28059, "sai-car", Latn} + +m["akf"] = {"Akpa", 34801, "alv-ido", Latn} + +m["akg"] = {"Anakalangu", 4750964, "poz-cet", Latn} + +m["akh"] = {"Angal Heneng", 10950354, "paa-eng", Latn} + +m["aki"] = {"Aiome", 56735, "paa", Latn} + +m["akj"] = {"Jeru", 2919121, "qfa-adn", {"Latn", "Deva"}} + +m["akk"] = {"Akkadian", 35518, "sem-eas", {"Xsux", "Latn"}} + +m["akl"] = {"Aklanon", 8773, "phi", Latn} + +m["akm"] = {"Aka-Bo", 35361, "qfa-adn", Latn} + +m["ako"] = {"Akurio", 56650, "sai-car", Latn} + +m["akp"] = {"Siwu", 36470, "alv-ntg", Latn} + +m["akq"] = {"Ak", 56654, "paa", Latn} + +m["akr"] = {"Araki", 2699882, "poz-vnc", Latn} + +m["aks"] = {"Akaselem", 34817, "nic-grm", Latn} + +m["akt"] = {"Akolet", 3330162, "poz-ocw", Latn} + +m["aku"] = {"Akum", 34799, "nic-ykb", Latn} + +m["akv"] = {"Akhvakh", 56423, "cau-ava", Cyrl} + +m["akw"] = {"Akwa", 34802, "bnt-mbo", Latn} + +m["akx"] = {"Aka-Kede", 3436816, "qfa-adc", Latn} + +m["aky"] = {"Aka-Kol", 3436784, "qfa-adc", Latn} + +m["akz"] = {"Alabama", 1815020, "nai-mus", Latn} + +m["ala"] = {"Alago", 34813, "alv-ido", Latn} + +m["alc"] = {"Kawésqar", 56544, "aqa", Latn} + +m["ald"] = {"Alladian", 34837, "alv-lag", Latn} + +m["ale"] = {"Aleut", 27210, "esx", Latn} + +m["alf"] = {"Alege", 34815, "nic-ben", Latn} + +m["alh"] = {"Alawa", 2147917, "aus-gun", Latn} + +m["ali"] = {"Amaimon", 3327427, "ngf-mad", Latn} + +m["alj"] = {"Alangan", 3327423, "phi", Latn} + +m["alk"] = {"Alak", 2714690, "mkh", Latn} + +m["all"] = {"Allar", 3393634, "dra", {"Mlym"}} + +-- "aln" IS TREATED AS "sq", SEE WT:LT + +m["alm"] = {"Amblong", 11022615, "poz-vnc", Latn} + +m["alo"] = {"Larike-Wakasihu", 3217929, "poz-cma", Latn} + +m["alp"] = {"Alune", 3327367, "poz-cet", Latn} + +m["alq"] = {"Algonquin", 28092, "alg", Latn, ancestors = {"oj"}} + +m["alr"] = {"Alutor", 28213, "qfa-cka", Cyrl} + +m["alt"] = {"Southern Altai", 1991779, "trk-sib", Cyrl, translit_module = "Altai-translit"} + +m["alu"] = {"'Are'are", 5160, "poz-sls", Latn} + +m["alw"] = {"Alaba", 56652, "cus", Latn} + +m["alx"] = {"Amol", 3504260, "qfa-tor", Latn} + +m["aly"] = {"Alyawarr", 3327389, "aus-pam", Latn} + +m["alz"] = {"Alur", 56507, "sdv-los", Latn} + +m["ama"] = {"Amanayé", 3508053, "tup-gua", Latn} + +m["amb"] = {"Ambo", 3450142, "nic-tvn", Latn} + +m["amc"] = {"Amahuaca", 2669150, "sai-pan", Latn} + +m["ame"] = {"Yanesha'", 3088540, "awd", Latn} + +m["amf"] = {"Hamer-Banna", 35764, "omv-aro", Latn} + +m["amg"] = {"Amurdag", 3360016, "aus-wdj", Latn} + +m["ami"] = {"Amis", 35132, "map", Latn} + +m["amj"] = {"Amdang", 28335, "ssa-fur", Latn} + +m["amk"] = {"Ambai", 1875885, "poz-hce", Latn} + +m["aml"] = {"War-Jaintia", 56321, "aav-khs", Latn} + +m["amm"] = {"Ama", 3446626, "qfa-mal", Latn} + +m["amn"] = {"Amanab", 3327399, "paa-brd", Latn} + +m["amo"] = {"Amo", 34826, "nic-kne", Latn} + +m["amp"] = {"Alamblak", 56688, "paa", Latn} + +m["amq"] = {"Amahai", 3327384, "poz-cma", Latn} + +m["amr"] = {"Amarakaeri", 35128, "sai-har", Latn} + +m["ams"] = {"Southern Amami-Oshima", 2840986, "jpx-ryu", {"Jpan"}} + +m["amt"] = {"Amto", 56517, "paa-asa", Latn} + +m["amu"] = {"Guerrero Amuzgo", 3501942, "omq", Latn} + +m["amv"] = {"Ambelau", 2669214, "poz-cma", Latn} + +m["amw"] = {"Western Neo-Aramaic", 34226, "sem-arw", {"Armi", "Syrc", "Latn"}} + +m["amx"] = {"Anmatyerre", 10412317, "aus-pam", Latn} + +m["amy"] = {"Ami", 12626835, "aus-dal", Latn} + +m["amz"] = {"Atampaya", 3446651, "aus-pam", Latn} + +m["ana"] = {"Andaqui", 2846078, nil, Latn} + +m["anb"] = {"Andoa", 2846171, "sai-zap", Latn} + +m["anc"] = {"Ngas", 35999, "cdc-wst", Latn} + +m["and"] = {"Ansus", 3513300, "poz-hce", Latn} + +m["ane"] = {"Xârâcùù", 3571097, "poz-cln", Latn} + +m["anf"] = {"Animere", 34783, "alv-ktg", Latn} + +m["ang"] = { + "Old English", + 42365, + "gmw", + {"Latinx", "Runr"}, + translit_module = "translit-redirect", + entry_name = {from = {"[ĀÁ]", "[āá]", "[ǢǼ]", "[ǣǽ]", "Ċ", "ċ", "[ĒÉ]", "[ēé]", "Ġ", "ġ", "[ĪÍ]", "[īí]", "[ŌÓ]", "[ōó]", "[ŪÚ]", "[ūú]", "[ȲÝ]", "[ȳý]", "Ƿ", "ƿ", MACRON, ACUTE, DOTABOVE}, to = {"A", "a", "Æ", "æ", "C", "c", "E", "e", "G", "g", "I", "i", "O", "o", "U", "u", "Y", "y", "W", "w"}}, + sort_key = { + -- most dictionaries sort æ as if written ae, and þ/ð after t + -- most dictionaries don't have ƿ at all (normalized to w); for now, + -- put after w to keep them from cluttering up the w lists + from = {"[æǣǽ]", "[þð]", "ƿ"}, + to = {"ae", "t~", "w~"} + } +} + +m["anh"] = {"Nend", 6991554, "ngf-mad", Latn} + +m["ani"] = {"Andi", 34849, "cau-ava", Cyrl} + +m["anj"] = {"Anor", 56458, "paa", Latn} + +m["ank"] = {"Goemai", 35272, "cdc-wst", Latn} + +m["anl"] = {"Anu", 4777679, "sit-mru", Latn} + +m["anm"] = {"Anal", 56235, "tbq-kuk", Latn} + +m["ann"] = {"Obolo", 36614, "nic-lcr", Latn} + +m["ano"] = {"Andoque", 2669225, "qfa-iso", Latn} + +m["anp"] = {"Angika", 28378, "inc-eas", Deva, ancestors = {"bh"}} + +m["anq"] = {"Jarawa", 2475526, "qfa-ong", Latn} + +m["anr"] = {"Andh", 4754314, "inc-sou", Deva, ancestors = {"pmh"}} + +m["ans"] = {"Anserma", 3446613, "sai-chc", Latn} + +m["ant"] = {"Antakarinya", 921304, "aus-psw", Latn} + +m["anu"] = {"Anuak", 56677, "sdv-lon", Latn} + +m["anv"] = {"Denya", 35187, "nic-mam", Latn} + +m["anw"] = {"Anaang", 2845320, "nic-ief", Latn} + +m["anx"] = {"Andra-Hus", 2846195, "poz-aay", Latn} + +m["any"] = {"Anyi", 28395, "alv-ctn", Latn} + +m["anz"] = {"Anem", 56512, "paa", Latn} + +m["aoa"] = {"Angolar", 34994, "crp", Latn, ancestors = {"pt"}} + +m["aob"] = {"Abom", 3446647, "ngf", Latn} + +m["aoc"] = {"Pemon", 10729616, "sai-car", Latn} + +m["aod"] = {"Andarum", 3507888, "paa", Latn} + +m["aoe"] = {"Angal Enen", 10951638, "paa-eng", Latn} + +m["aof"] = {"Bragat", 3507977, "qfa-tor", Latn} + +m["aog"] = { + "Angoram", 56366, -- cf 6754745 for merged dialect + "paa-lsp", Latn +} + +m["aoi"] = {"Anindilyakwa", 2714654, "aus-arn", Latn} + +m["aoj"] = {"Mufian", 3507881, "qfa-tor", Latn} + +m["aok"] = {"Arhö", 4790086, "poz-cln", Latn} + +m["aol"] = {"Alor", 3332062, "poz", Latn} + +m["aom"] = {"Ömie", 8078975, "ngf", Latn} + +m["aon"] = {"Bumbita Arapesh", 3508044, "qfa-tor", Latn} + +m["aor"] = {"Aore", 12627129, "poz-vnc", Latn} + +m["aos"] = {"Taikat", 7676018, "paa-brd", Latn} + +m["aot"] = {"Atong (India)", 5646, "tbq-bdg", {"Latn", "Beng"}} + +m["aou"] = { + "A'ou", + 16109994, + "qfa-gel", + Latn, -- also Hani? + wikipedia_article = "Gelao language" -- might change, as A'ou is a dialect of Gelao +} + +m["aox"] = {"Atorada", 3507932, "awd", Latn} + +m["aoz"] = {"Uab Meto", 3441962, "poz-tim", Latn} + +m["apb"] = {"Sa'a", 36294, "poz-sls", Latn} + +m["apc"] = {"North Levantine Arabic", 22809485, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["apd"] = {"Sudanese Arabic", 56573, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["ape"] = {"Bukiyip", 3507895, "qfa-tor", Latn} + +m["apf"] = {"Pahanan Agta", 7135432, "phi", Latn} + +m["apg"] = {"Ampanang", 4748035, "poz", Latn} + +m["aph"] = {"Athpare", 3449126, "sit-kie", {"Deva", "Latn"}} + +m["api"] = {"Apiaká", 3507941, "tup-gua", Latn} + +m["apj"] = {"Jicarilla", 28277, "apa", Latn} + +m["apk"] = {"Plains Apache", 27861, "apa", Latn} + +m["apl"] = {"Lipan", 28269, "apa", Latn} + +m["apm"] = {"Chiricahua", 13368, "apa", Latn} + +m["apn"] = {"Apinayé", 2858311, "sai-nje", Latn} + +m["apo"] = {"Ambul", 12627135, "poz-ocw", Latn} + +m["app"] = {"Apma", 2669188, "poz-vnc", Latn} + +m["apq"] = {"A-Pucikwar", 28466, "qfa-adc", Latn} + +m["apr"] = {"Arop-Lokep", 2863482, "poz-ocw", Latn} + +m["aps"] = {"Arop-Sissano", 12627242, "poz-ocw", Latn} + +m["apt"] = {"Apatani", 56306, "sit-tan", Latn} + +m["apu"] = {"Apurinã", 2859081, "awd", Latn} + +m["apv"] = {"Alapmunte", 16110782, "sai-nmk", Latn} + +m["apw"] = {"Western Apache", 28060, "apa", Latn} + +m["apx"] = {"Aputai", 12473343, "poz-tim", Latn} + +m["apy"] = {"Apalaí", 2736980, "sai-car", Latn} + +m["apz"] = {"Safeyoka", 7398693, "ngf", Latn} + +m["aqc"] = {"Archi", 34915, "cau-lzg", Cyrl} + +m["aqd"] = {"Ampari Dogon", 4748057, "nic-dgw", Latn} + +m["aqg"] = {"Arigidi", 34829, "alv-von", Latn} + +m["aqm"] = {"Atohwaim", 11732297, "ngf", Latn} + +m["aqn"] = {"Northern Alta", 7058116, "phi", Latn} + +m["aqp"] = {"Atakapa", 10975683, "qfa-iso", Latn} + +m["aqr"] = {"Arhâ", 4790085, "poz-cln", Latn} + +m["aqt"] = {"Angaité", 15736037, "sai-mas", Latn} + +m["aqz"] = {"Akuntsu", 4701960, "tup", Latn} + +m["arc"] = { + "Aramaic", + 28602, + "sem-ara", + {"Hebr", "Armi", "Syrc", "Palm", "Nbat", "Phnx", "Mand", "Samr", "Hatr"}, + -- varieties are in [[Module:etymology language/data]] + translit_module = "translit-redirect", + entry_name = {from = {"[" .. u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. "]", "[" .. u(0x0304) .. u(0x0308) .. u(0x0331) .. u(0x0730) .. "-" .. u(0x0748) .. "]"}, to = {}} +} + +m["ard"] = {"Arabana", 3507959, "aus-kar", Latn} + +m["are"] = {"Western Arrernte", 12645549, "aus-pam", Latn} + +m["arh"] = {"Arhuaco", 2640621, "cba", Latn} + +m["ari"] = {"Arikara", 56539, "cdd", Latn} + +m["arj"] = {"Arapaso", 12627166, "sai-tuc", Latn} + +m["ark"] = {"Arikapú", 3446640, "sai-mje", Latn} + +m["arl"] = {"Arabela", 2591221, "sai-zap", Latn} + +m["arn"] = {"Mapudungun", 33730, "sai-ara", Latn} + +m["aro"] = {"Araona", 958414, "sai-tac", Latn} + +m["arp"] = {"Arapaho", 56417, "alg-ara", Latn} + +m["arq"] = {"Algerian Arabic", 56499, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["arr"] = {"Arara-Karo", 35539, "tup", Latn} + +m["ars"] = {"Najdi Arabic", 56574, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["aru"] = {"Arua", 2746221, "auf", Latn} + +m["arv"] = {"Arbore", 56883, "cus", Latn} + +m["arw"] = {"Arawak", 2655664, "awd-taa", Latn, ancestors = {"awd-taa-pro"}} + +m["arx"] = {"Aruá", 3507907, "tup", Latn} + +m["ary"] = {"Moroccan Arabic", 56426, "sem-arb", {"Arab"}, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["arz"] = {"Egyptian Arabic", 29919, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["asa"] = {"Pare", 36403, "bnt-par", Latn} + +m["asb"] = {"Assiniboine", 2591288, "sio-dkt", Latn} + +m["asc"] = {"Casuarina Coast Asmat", 11732046, "ngf", Latn} + +m["ase"] = {"American Sign Language", 14759, "sgn", {"Sgnw"}} + +m["asf"] = { + "Auslan", 29525, "sgn", Latn -- when documented +} + +m["asg"] = {"Cishingini", 35199, "nic-kam", Latn} + +m["ash"] = {"Abishira", 2871740, "qfa-iso", Latn} + +m["asi"] = {"Buruwai", 5001031, "ngf", Latn} + +m["asj"] = {"Nsari", 36418, "nic-bbe", Latn} + +m["ask"] = {"Ashkun", 29379, "nur-sou", {"Arab", "Latn"}} + +m["asl"] = {"Asilulu", 12473347, "poz-cma", Latn} + +m["asn"] = {"Xingú Asuriní", 8044571, "tup-gua", Latn} + +m["aso"] = {"Dano", 5220979, "paa-kag", {"Latn"}} + +m["asp"] = {"Algerian Sign Language", 3135421, "sgn"} + +m["asq"] = { + "Austrian Sign Language", 36668, "sgn", Latn -- when documented +} + +m["asr"] = { + "Asuri", 3504321, "mun", Latn -- when documented +} + +m["ass"] = {"Ipulo", 35408, "nic-tvc", Latn} + +m["ast"] = {"Asturian", 29507, "roa-ibe", Latn, ancestors = {"roa-ole"}} + +m["asu"] = {"Tocantins Asurini", 32041490, "tup-gua", Latn} + +m["asv"] = {"Asoa", 56296, "csu-maa", Latn} + +m["asw"] = { + "Australian Aboriginal Sign Language", 955216, "sgn", Latn -- when documented +} + +m["asx"] = {"Muratayak", 11732766, "ngf-fin", Latn} + +m["asy"] = {"Yaosakor Asmat", 16113158, "ngf", Latn} + +m["asz"] = {"As", 2866218, "poz-hce", Latn} + +m["ata"] = {"Pele-Ata", 56511, "paa", Latn} + +m["atb"] = { + "Zaiwa", 56594, "tbq-brm", Latn -- also Hani? +} + +m["atc"] = {"Atsahuaca", 4817730, "sai-pan", Latn} + +m["atd"] = {"Ata Manobo", 12627315, "mno", Latn} + +m["ate"] = {"Atemble", 4813055, "ngf-mad", Latn} + +m["atg"] = {"Okpela", 7082551, "alv-yek", Latn} + +m["ati"] = {"Attié", 34844, "alv-lag", Latn} + +m["atj"] = {"Atikamekw", 56590, "alg", Latn, ancestors = {"cr"}} + +m["atk"] = {"Ati", 4815751, "phi", Latn} + +m["atl"] = {"Mount Iraya Agta", 6921430, "phi", Latn} + +m["atm"] = {"Ata", 4812603, "phi", Latn} + +m["ato"] = {"Atong (Cameroon)", 34824, "nic-grs", Latn} + +m["atp"] = {"Pudtol Atta", 12640726, "phi", Latn} + +m["atq"] = {"Aralle-Tabulahan", 4783889, "poz-ssw", Latn} + +m["atr"] = {"Waimiri-Atroari", 56865, "sai-car", Latn} + +m["ats"] = {"Gros Ventre", 56628, "alg-ara", Latn} + +m["att"] = {"Pamplona Atta", 12639245, "phi", Latn} + +m["atu"] = {"Reel", 7306882, "sdv-dnu", Latn} + +m["atv"] = {"Northern Altai", 2640863, "trk-sib", Cyrl, translit_module = "Altai-translit"} + +m["atw"] = {"Atsugewi", 56718, "nai-pal", Latn} + +m["atx"] = {"Arutani", 56609, nil, Latn} + +m["aty"] = {"Aneityum", 2379113, "poz-oce", Latn} + +m["atz"] = {"Arta", 3508067, "phi", Latn} + +m["aua"] = {"Asumboa", 4811870, "poz-oce", Latn} + +m["aub"] = { + "Alugu", 12626798, "tbq-lol", Latn -- also Hani? +} + +m["auc"] = {"Huaorani", 758570, "qfa-iso", Latn} + +m["aud"] = {"Anuta", 35326, "poz-pnp", Latn} + +m["aug"] = {"Aguna", 34733, "alv-gbe", Latn} + +m["auh"] = {"Aushi", 2872082, "bnt-sbi", Latn} + +m["aui"] = {"Anuki", 3508132, "poz-ocw", Latn} + +m["auj"] = {"Awjila", 56398, "ber", {"Latn", "Arab", "Tfng"}} + +m["auk"] = {"Heyo", 3504295, "qfa-tor", Latn} + +m["aul"] = {"Aulua", 427300, "poz-vnc", Latn} + +m["aum"] = {"Asu", 34798, "alv-ngb", Latn} + +m["aun"] = {"Molmo One", 12637224, "qfa-tor", Latn} + +m["auo"] = {"Auyokawa", 56247, "cdc-wst", Latn} + +m["aup"] = {"Makayam", 6738863, "ngf", Latn} + +m["auq"] = {"Anus", 23855, "poz-ocw", Latn} + +m["aur"] = {"Aruek", 3504279, "qfa-tor", Latn} + +m["aut"] = {"Austral", 2669261, "poz-pep", Latn} + +m["auu"] = {"Auye", 4827334, "ngf", Latn} + +m["auw"] = {"Awyi", 3513326, "paa-brd", Latn} + +m["aux"] = {"Aurá", 3507995, "tup-gua", Latn} + +m["auy"] = {"Auyana", 2873211, "paa-kag", Latn} + +m["auz"] = {"Uzbeki Arabic", 3399507, "sem-arb", Arab} + +m["avb"] = {"Avau", 12627412, "poz-ocw", Latn} + +m["avd"] = {"Alviri-Vidari", 3327357, "xme", {"fa-Arab"}, ancestors = {"xme-mid"}} + +m["avi"] = {"Avikam", 34840, "alv-lag", Latn} + +m["avk"] = {"Kotava", 1377116, "art", Latn, type = "appendix-constructed"} + +m["avm"] = {"Angkamuthi", nil, "aus-pmn", Latn} + +m["avn"] = {"Avatime", 34796, "alv-ktg", Latn} + +m["avo"] = {"Agavotaguerra", 3508007, "awd", Latn} + +m["avs"] = {"Aushiri", 3409318, "sai-zap", Latn} + +m["avt"] = {"Au", 3446608, "qfa-tor", Latn} + +m["avu"] = {"Avokaya", 56685, "csu-mma", Latn} + +m["avv"] = {"Avá-Canoeiro", 4829584, "tup-gua", Latn} + +m["awa"] = {"Awadhi", 29579, "inc-hie", {"Deva", "Kthi", "fa-Arab"}, ancestors = {"inc-pra"}, translit_module = "hi-translit"} + +m["awb"] = {"Awa (New Guinea)", 2874650, "paa-kag", Latn} + +m["awc"] = {"Cicipu", 35193, "nic-kam", Latn} + +m["awe"] = {"Awetí", 4830038, "tup", Latn} + +m["awg"] = {"Anguthimri", 4764288, "aus-pam", Latn} + +m["awh"] = {"Awbono", 3446684, "ngf", Latn} + +m["awi"] = {"Aekyom", 3399691, "ngf", Latn} + +m["awk"] = {"Awabakal", 3449138, "aus-pam", Latn} + +m["awm"] = {"Arawum", 4784537, "ngf-mad", Latn} + +m["awn"] = {"Awngi", 34934, "cus", {"Ethi"}} + +m["awo"] = {"Awak", 3446643, "alv-wjk", Latn} + +m["awr"] = {"Awera", 56379, "paa-lkp", Latn} + +m["aws"] = {"South Awyu", 12633986, "ngf", Latn} + +m["awt"] = {"Araweté", 4784535, "tup-gua", Latn} + +m["awu"] = {"Central Awyu", 12628801, "ngf", Latn} + +m["awv"] = {"Jair Awyu", 16110177, "ngf", Latn} + +m["aww"] = {"Awun", 56369, "paa-spk", Latn} + +m["awx"] = {"Awara", 2874670, "ngf-fin", Latn} + +m["awy"] = {"Edera Awyu", 12630425, "ngf", Latn} + +m["axb"] = {"Abipon", 11252539, "sai-guc", Latn} + +m["axe"] = {"Ayerrerenge", 16112737, "aus-pam", Latn} + +m["axg"] = {"Mato Grosso Arára", 3446660, nil, Latn} + +m["axk"] = {"Aka (Central Africa)", 11010149, "bnt-ngn", Latn} + +m["axl"] = {"Lower Southern Aranda", 6693295, "aus-pam", Latn} + +m["axm"] = {"Middle Armenian", 4438498, "hyx", {"Armn"}, ancestors = {"xcl"}, translit_module = "Armn-translit", override_translit = true, entry_name = {from = {"և", "՞", "՜", "՛", "՟"}, to = {"եւ"}}} + +m["axx"] = {"Xaragure", 8045635, "poz-cln", Latn} + +m["aya"] = {"Awar", 56876, "paa", Latn} + +m["ayb"] = {"Ayizo", 34841, "alv-pph", Latn} + +m["ayd"] = {"Ayabadhu", 3509164, "aus-pmn", Latn} + +m["aye"] = {"Ayere", 34788, "alv-aah", Latn} + +m["ayg"] = {"Nyanga (Togo)", 35446, "alv-gng", Latn} + +m["ayi"] = {"Leyigha", 3914492, "nic-uce", Latn} + +m["ayk"] = {"Akuku", 3450179, "alv-nwd", Latn} + +m["ayl"] = {"Libyan Arabic", 56503, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["ayn"] = {"Yemeni Arabic", 1686766, "sem-arb", Arab, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["ayo"] = {"Ayoreo", 56634, "sai-zam", Latn} + +m["ayp"] = {"North Mesopotamian Arabic", 56577, "sem-arb", Arab, ancestors = {"acm"}, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["ayq"] = {"Ayi", 56449, "paa-spk", Latn} + +m["ays"] = {"Sorsogon Ayta", 7563752, "phi", Latn} + +m["ayt"] = {"Bataan Ayta", 4921648, "phi", Latn} + +m["ayu"] = {"Ayu", 34786, "alv", Latn} + +m["ayy"] = {"Tayabas Ayta", 7689745, "phi", Latn} + +m["ayz"] = {"Maybrat", 4830892, "paa-wpa", Latn} + +m["aza"] = {"Azha", 4832486, "tbq-lol", Latn} + +m["azd"] = {"Eastern Durango Nahuatl", 16115449, "azc-nah", Latn} + +m["azg"] = {"San Pedro Amuzgos Amuzgo", 35092, "omq", Latn} + +m["azm"] = {"Ipalapa Amuzgo", 12633013, "omq", Latn} + +m["azn"] = {"Western Durango Nahuatl", 12645553, "azc-nah", Latn} + +m["azo"] = {"Awing", 34856, "nic-nge", Latn} + +m["azt"] = {"Faire Atta", 12630884, "phi", Latn} + +m["azz"] = {"Highland Puebla Nahuatl", 12953754, "azc-nah", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/b.lua b/wiktra/wikt/translit/languages/data3/b.lua new file mode 100644 index 0000000..7fcfcc8 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/b.lua @@ -0,0 +1,1280 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +-- Use these in "scripts" to save a little memory. +local Arab = {"Arab"} +local Cyrl = {"Cyrl"} +local Deva = {"Deva"} +local Latn = {"Latn"} + +local m = {} + +m["baa"] = {"Babatana", 2877785, "poz-ocw", Latn} + +m["bab"] = {"Bainouk-Gunyuño", 35508, "alv-bny", Latn} + +m["bac"] = {"Badui", 3449885, "poz-msa", Latn} + +m["bae"] = {"Baré", 3504087, "awd", Latn} + +m["baf"] = {"Nubaca", 36270, "nic-ymb", Latn} + +m["bag"] = {"Tuki", 36621, "nic-mba", Latn} + +m["bah"] = {"Bahamian Creole", 2669229, "crp", Latn, ancestors = {"en"}} + +m["baj"] = {"Barakai", 3502030, "poz-cet", Latn} + +m["bal"] = {"Baluchi", 33049, "ira-nwi", {"fa-Arab"}} + +m["ban"] = {"Balinese", 33070, "poz-mcm", {"Latn", "Bali"}} + +m["bao"] = {"Waimaha", 2883738, "sai-tuc", Latn} + +m["bap"] = {"Bantawa", 56500, "sit-kic", Deva} + +m["bar"] = {"Bavarian", 29540, "gmw", Latn, ancestors = {"gmh"}} + +m["bas"] = {"Basaa", 33093, "bnt-bsa", Latn} + +m["bau"] = {"Badanchi", 11001650, "nic-jrw", Latn} + +m["bav"] = {"Babungo", 34885, "nic-rnn", Latn} + +m["baw"] = {"Bambili-Bambui", 34880, "nic-nge", Latn} + +m["bax"] = {"Bamum", 35280, "nic-nun", {"Latn", "Bamu"}} + +m["bay"] = {"Batuley", 8828787, "poz", Latn} + +m["bba"] = {"Baatonum", 34889, "alv-sav", Latn} + +m["bbb"] = {"Barai", 4858206, "ngf", Latn} + +m["bbc"] = {"Toba Batak", 33017, "btk", {"Latn", "Batk"}} + +m["bbd"] = {"Bau", 4873415, "ngf-mad", Latn} + +m["bbe"] = {"Bangba", 34895, "nic-nke", Latn} + +m["bbf"] = {"Baibai", 56902, "paa", Latn} + +m["bbg"] = {"Barama", 34884, "bnt-sir", Latn} + +m["bbh"] = {"Bugan", 3033554, "mkh-pkn", Latn} + +m["bbi"] = {"Barombi", 34985, "bnt-bsa", Latn} + +m["bbj"] = {"Ghomala'", 35271, "bai", Latn} + +m["bbk"] = {"Babanki", 34790, "nic-rnc", Latn} + +m["bbl"] = {"Bats", 33259, "cau-nkh", {"Geor"}, translit_module = "Geor-translit", entry_name = {from = {"̃", "", MACRON, BREVE}, to = {"", "ნ"}}} + +m["bbm"] = { -- name includes prefix + "Babango", 34819, "bnt-bta", Latn +} + +m["bbn"] = {"Uneapa", 7884126, "poz-ocw", Latn} + +m["bbo"] = {"Konabéré", 35371, "dmn-snb", Latn} + +m["bbp"] = {"West Central Banda", 7984377, "bad", Latn} + +m["bbq"] = {"Bamali", 34901, "nic-nun", Latn} + +m["bbr"] = {"Girawa", 5564185, "ngf-mad", Latn} + +m["bbs"] = {"Bakpinka", 3515061, "nic-ucr", Latn} + +m["bbt"] = {"Mburku", 3441324, "cdc-wst", Latn} + +m["bbu"] = {"Bakulung", 35580, "nic-jrn", Latn} + +m["bbv"] = {"Karnai", 6372803, "poz-ocw", Latn} + +m["bbw"] = {"Baba", 34822, "nic-nun", Latn} + +m["bbx"] = { -- cf bvb + "Bubia", + 34953, + "nic-bds", + Latn, + ancestors = {"bvb"} +} + +m["bby"] = {"Befang", 34960, "nic-bds", Latn} + +m["bca"] = {"Central Bai", 12628803, "zhx-gba", {"Hani", "Latn"}} + +m["bcb"] = {"Bainouk-Samik", 36390, "alv-bny", Latn} + +m["bcd"] = {"North Babar", 7054041, "poz-tim", Latn} + +m["bce"] = {"Bamenyam", 34968, "nic-nun", Latn} + +m["bcf"] = {"Bamu", 3503788, "paa-kiw", Latn} + +m["bcg"] = {"Baga Pokur", 31172660, "alv-nal", Latn} + +m["bch"] = {"Bariai", 2884502, "poz-ocw", Latn} + +m["bci"] = {"Baoule", 35107, "alv-ctn", Latn} + +m["bcj"] = {"Bardi", 3913852, "aus-nyu", Latn} + +m["bck"] = {"Bunaba", 580923, "aus-bub", Latn} + +m["bcl"] = {"Bikol Central", 33284, "phi", Latn} + +m["bcm"] = {"Banoni", 2882857, "poz-ocw", Latn} + +m["bcn"] = {"Bibaali", 34892, "alv-mye", Latn} + +m["bco"] = {"Kaluli", 6354586, "ngf", Latn} + +m["bcp"] = {"Bali", 3515074, "bnt-kbi", Latn} + +m["bcq"] = {"Bench", 35108, "omv", Latn} + +m["bcr"] = {"Babine-Witsuwit'en", 27864, "ath-nor", Latn} + +m["bcs"] = {"Kohumono", 35590, "nic-ucn", Latn} + +m["bct"] = {"Bendi", 8836662, "csu-mle", Latn} + +m["bcu"] = {"Biliau", 2874658, "poz-ocw", Latn} + +m["bcv"] = {"Shoo-Minda-Nye", 36548, "nic-jkn", Latn} + +m["bcw"] = {"Bana", 56272, "cdc-cbm", Latn} + +m["bcy"] = {"Bacama", 56274, "cdc-cbm", Latn} + +m["bcz"] = {"Bainouk-Gunyaamolo", 35506, "alv-bny", Latn} + +m["bda"] = {"Bayot", 35019, "alv-jol", Latn} + +m["bdb"] = {"Basap", 3504208, "poz-bnn", Latn} + +m["bdc"] = {"Emberá-Baudó", 11173166, "sai-chc", Latn} + +m["bdd"] = {"Bunama", 4997416, "poz-ocw", Latn} + +m["bde"] = {"Bade", 56239, "cdc-wst", Latn} + +m["bdf"] = {"Biage", 48037487, "ngf", Latn} + +m["bdg"] = {"Bonggi", 2910053, "poz-bnn", Latn} + +m["bdh"] = {"Tara Baka", 2880165, "csu-bbk", Latn} + +m["bdi"] = {"Burun", 35040, "sdv-niw", Latn} + +m["bdj"] = {"Bai", 34894, "nic-ser", Latn} + +m["bdk"] = {"Budukh", 35397, "cau-lzg", Cyrl, translit_module = "bdk-translit", override_translit = true, entry_name = {from = {GRAVE, ACUTE}, to = {}}} + +m["bdl"] = {"Indonesian Bajau", 2880038, "poz", Latn} + +m["bdm"] = {"Buduma", 56287, "cdc-cbm", Latn} + +m["bdn"] = {"Baldemu", 56280, "cdc-cbm", Latn} + +m["bdo"] = {"Morom", 759770, "csu-bgr", Latn} + +m["bdp"] = {"Bende", 8836490, "bnt", Latn} + +m["bdq"] = {"Bahnar", 32924, "mkh-ban", Latn, ancestors = {"mkh-ban-pro"}} + +m["bdr"] = {"West Coast Bajau", 2880037, "poz-sbj", Latn} + +m["bds"] = {"Burunge", 56617, "cus", Latn} + +m["bdt"] = {"Bokoto", 4938812, "alv-gbw", Latn} + +m["bdu"] = {"Oroko", 36278, "bnt-saw", Latn} + +m["bdv"] = {"Bodo Parja", 8845881, "inc-eas", {"Orya"}, ancestors = {"inc-mgd"}} + +m["bdw"] = {"Baham", 3513309, "paa", Latn} + +m["bdx"] = {"Budong-Budong", 4985158, "poz-ssw", Latn} + +m["bdy"] = {"Bandjalang", 2980386, "aus-pam", Latn} + +m["bdz"] = {"Badeshi", 33028, "inc-wes", ancestors = {"noe"}} + +m["bea"] = {"Beaver", 20826, "ath-nor", Latn} + +m["beb"] = {"Bebele", 34976, "bnt-btb", Latn} + +m["bec"] = {"Iceve-Maci", 35449, "nic-tvc", Latn} + +m["bed"] = {"Bedoanas", 4879330, "poz-hce", Latn} + +m["bee"] = {"Byangsi", 56904, "sit-alm", Deva} + +m["bef"] = {"Benabena", 2895638, "paa-kag", Latn} + +m["beg"] = {"Belait", 2894198, "poz-swa", Latn} + +m["beh"] = {"Biali", 34961, "nic-eov", Latn} + +m["bei"] = {"Bekati'", 3441683, "day", Latn} + +m["bej"] = {"Beja", 33025, "cus", {"Arab", "Latn"}} + +m["bek"] = {"Bebeli", 4878430, "poz-ocw", Latn} + +m["bem"] = {"Bemba", 33052, "bnt-sbi", Latn} + +m["beo"] = {"Beami", 3504079, "paa", Latn} + +m["bep"] = {"Besoa", 8840465, "poz-kal", Latn} + +m["beq"] = {"Beembe", 3196320, "bnt-kng", Latn} + +m["bes"] = {"Besme", 289832, "alv-kim", Latn} + +m["bet"] = {"Guiberoua Bété", 11019185, "kro-bet", Latn} + +m["beu"] = {"Blagar", 4923846, "ngf", Latn} + +m["bev"] = {"Daloa Bété", 11155819, "kro-bet", Latn} + +m["bew"] = {"Betawi", 33014, "crp", Latn, ancestors = {"ms"}} + +m["bex"] = {"Jur Modo", 56682, "csu-bbk", Latn} + +m["bey"] = {"Akuwagel", 3504170, "qfa-tor", Latn} + +m["bez"] = {"Kibena", 2502949, "bnt-bki", Latn} + +m["bfa"] = {"Bari", 35042, "sdv-bri", Latn} + +m["bfb"] = {"Pauri Bareli", 7155462, "inc-bhi", Deva} + +m["bfc"] = {"Northern Bai", 12642165, "zhx-gba", {"Hani", "Latn"}} + +m["bfd"] = {"Bafut", 34888, "nic-nge", Latn} + +m["bfe"] = {"Betaf", 4897329, "paa-tkw", Latn} + +m["bff"] = {"Bofi", 34914, "alv-gbf", Latn} + +m["bfg"] = {"Busang Kayan", 9231909, "poz", Latn} + +m["bfh"] = {"Blafe", 12628007, "paa", Latn} + +m["bfi"] = { + "British Sign Language", 33000, "sgn", Latn -- when documented +} + +m["bfj"] = {"Bafanji", 34890, "nic-nun", Latn} + +m["bfk"] = {"Ban Khor Sign Language", 3441103, "sgn"} + +m["bfl"] = {"Banda-Ndélé", 34850, "bad-cnt", Latn} + +m["bfm"] = {"Mmen", 36132, "nic-rnc", Latn} + +m["bfn"] = {"Bunak", 35101, "ngf", Latn} + +m["bfo"] = {"Malba Birifor", 11150710, "nic-mre", Latn} + +m["bfp"] = {"Beba", 35050, "nic-nge", Latn} + +m["bfq"] = {"Badaga", 33205, "dra", {"Knda"}} + +m["bfr"] = {"Bazigar", 8829558, "dra"} + +m["bfs"] = {"Southern Bai", 12952250, "zhx-gba", {"Hani", "Latn"}} + +m["bft"] = {"Balti", 33086, "sit-lab", {"Arab", "Tibt"}} + +m["bfu"] = {"Gahri", 5516952, "sit-whm", {"Takr", "Tibt"}} + +m["bfw"] = {"Bondo", 2567942, "mun", {"Orya"}} + +m["bfx"] = {"Bantayanon", 16837866, "phi", Latn} + +m["bfy"] = {"Bagheli", 2356364, "inc-hie", Deva, ancestors = {"inc-pra"}, translit_module = "hi-translit"} + +m["bfz"] = {"Mahasu Pahari", 6733460, "him", Deva, translit_module = "hi-translit"} + +m["bga"] = {"Gwamhi-Wuri", 6707102, "nic-knn", Latn} + +m["bgb"] = {"Bobongko", 4935896, "poz-slb", Latn} + +m["bgc"] = {"Haryanvi", 33410, "inc-cen", Deva, ancestors = {"inc-ohi"}, translit_module = "hi-translit"} + +m["bgd"] = {"Rathwi Bareli", 7295692, "inc-bhi", Deva} + +m["bge"] = {"Bauria", 4873579, "inc-bhi", Deva} + +m["bgf"] = {"Bangandu", 34938, "alv-gbs", Latn} + +m["bgg"] = {"Bugun", 3514220, "sit-khb", Latn} + +m["bgi"] = {"Giangan", 4842057, "phi", Latn} + +m["bgj"] = {"Bangolan", 34862, "nic-nun", Latn} + +m["bgk"] = { + "Bit", 2904868, "mkh-pal", Latn -- also Hani? +} + +m["bgl"] = {"Bo", 8845514, "mkh-vie"} + +m["bgo"] = {"Baga Koga", 35695, "alv-bag", Latn} + +m["bgq"] = {"Bagri", 2426319, "inc-wes", Deva, ancestors = {"raj"}} + +m["bgr"] = {"Bawm Chin", 56765, "tbq-kuk", Latn} + +m["bgs"] = {"Tagabawa", 7675121, "mno", Latn} + +m["bgt"] = {"Bughotu", 2927723, "poz-sls", Latn} + +m["bgu"] = {"Mbongno", 36141, "nic-mmb", Latn} + +m["bgv"] = {"Warkay-Bipim", 4915439, "ngf", Latn} + +m["bgw"] = {"Bhatri", 8841054, "inc-eas", Deva, ancestors = {"inc-mgd"}} + +m["bgx"] = {"Balkan Gagauz Turkish", 2360396, "trk-ogz", Latn, ancestors = {"trk-oat"}} + +m["bgy"] = {"Benggoi", 4887742, "poz-cma", Latn} + +m["bgz"] = {"Banggai", 3441692, "poz-slb", Latn} + +m["bha"] = {"Bharia", 4901287, "dra", Deva} + +m["bhb"] = {"Bhili", 33229, "inc-bhi", Deva} + +m["bhc"] = {"Biga", 2902375, "poz-hce", Latn} + +m["bhd"] = {"Bhadrawahi", 4900565, "him", {"Arab", "Deva"}, translit_module = "hi-translit"} + +m["bhe"] = {"Bhaya", 8841168, "inc-wes", ancestors = {"raj"}} + +m["bhf"] = {"Odiai", 56690, "paa-kwm", Latn} + +m["bhg"] = {"Binandere", 3503802, "ngf", Latn} + +m["bhh"] = {"Bukhari", 56469, "ira-swi", {"Cyrl", "Hebr", "Latn", "fa-Arab"}, ancestors = {"tg"}} + +m["bhi"] = {"Bhilali", 4901729, "inc-bhi", Deva} + +m["bhj"] = {"Bahing", 56442, "sit-kiw", {"Deva", "Latn"}} + +m["bhl"] = {"Bimin", 4913743, "ngf-okk", Latn} + +m["bhm"] = {"Bathari", 2586893, "sem-sar", {"Arab", "Latn"}} + +m["bhn"] = {"Bohtan Neo-Aramaic", 33230, "sem-nna"} + +m["bho"] = {"Bhojpuri", 33268, "inc-hie", {"Deva", "Kthi"}, ancestors = {"bh"}, translit_module = "translit-redirect"} + +m["bhp"] = {"Bima", 2796873, "poz-cet", Latn} + +m["bhq"] = {"Tukang Besi South", 12643975, "poz-mun", Latn} + +m["bhs"] = {"Buwal", 3515065, "cdc-cbm", Latn} + +m["bht"] = {"Bhattiyali", 4901452, "him", Deva} + +m["bhu"] = {"Bhunjia", 8841766, "inc-eas", {"Deva", "Orya"}, ancestors = {"inc-mgd"}} + +m["bhv"] = {"Bahau", 3502039, "poz", Latn} + +m["bhw"] = {"Biak", 1961488, "poz-hce", Latn} + +m["bhx"] = { -- spurious? + "Bhalay", 8840773, "inc" +} + +m["bhy"] = {"Bhele", 4901671, "bnt-kbi", Latn} + +m["bhz"] = {"Bada", 4840520, "poz-kal", Latn} + +m["bia"] = {"Badimaya", 3442745, "aus-psw", Latn} + +m["bib"] = {"Bissa", 32934, "dmn-bbu", Latn} + +m["bic"] = {"Bikaru", 56342, "paa-eng", Latn} + +m["bid"] = {"Bidiyo", 56258, "cdc-est", Latn} + +m["bie"] = {"Bepour", 4890914, "ngf-mad", Latn} + +m["bif"] = {"Biafada", 35099, "alv-ten", Latn} + +m["big"] = {"Biangai", 8842027, "paa", Latn} + +m["bij"] = {"Kwanka", 35598, "nic-tar", Latn} + +m["bil"] = {"Bile", 34987, "nic-jrn", Latn} + +m["bim"] = {"Bimoba", 34971, "nic-grm", Latn} + +m["bin"] = {"Edo", 35375, "alv-eeo", Latn} + +m["bio"] = {"Nai", 3508074, "paa-kwm", Latn} + +m["bip"] = {"Bila", 2902626, "bnt-kbi", Latn} + +m["biq"] = {"Bipi", 2904312, "poz-aay", Latn} + +m["bir"] = {"Bisorio", 8844749, "paa-eng", Latn} + +m["bit"] = {"Berinomo", 56447, "paa-spk", Latn} + +m["biu"] = {"Biete", 4904687, "tbq-kuk", Latn} + +m["biv"] = {"Southern Birifor", 32859745, "nic-mre", Latn} + +m["biw"] = {"Kol (Cameroon)", 35582, "bnt-mka", Latn} + +m["bix"] = {"Bijori", 3450686, "mun", Deva} + +m["biy"] = {"Birhor", 3450469, "mun", Deva} + +m["biz"] = {"Baloi", 3450590, "bnt-ngn", Latn} + +m["bja"] = {"Budza", 3046889, "bnt-bun", Latn} + +m["bjb"] = {"Barngarla", 3439071, "aus-pam", Latn} + +m["bjc"] = {"Bariji", 4690919, "ngf", Latn} + +m["bje"] = {"Biao-Jiao Mien", 3503800, "hmx-mie", {"Hani", "Latn"}} + +m["bjf"] = { + "Barzani Jewish Neo-Aramaic", 33234, "sem-nna", {"Hebr"} -- maybe others +} + +m["bjg"] = {"Bidyogo", 35365, "alv-bak", Latn} + +m["bjh"] = {"Bahinemo", 56361, "paa-spk", Latn} + +m["bji"] = {"Burji", 34999, "cus", {"Latn", "Ethi"}} + +m["bjj"] = {"Kannauji", 2726867, "inc-cen", Deva, ancestors = {"inc-ohi"}} + +m["bjk"] = {"Barok", 2884743, "poz-ocw", Latn} + +m["bjl"] = {"Bulu (New Guinea)", 4997162, "poz-ocw", Latn} + +m["bjm"] = {"Bajelani", 4848866, "ira-zgr", {"Latn", "Arab"}, ancestors = {"hac"}} + +m["bjn"] = {"Banjarese", 33151, "poz-mly", {"Latn", "Arab"}} + +m["bjo"] = {"Mid-Southern Banda", 42303990, "bad-cnt", Latn} + +m["bjp"] = {"Fanamaket", nil, "poz-oce", Latn} + +m["bjr"] = {"Binumarien", 538364, "paa-kag", Latn} + +m["bjs"] = {"Bajan", 2524014, "crp", Latn, ancestors = {"en"}} + +m["bjt"] = {"Balanta-Ganja", 19359034, "alv-bak", {"Arab", "Latn"}} + +m["bju"] = {"Busuu", 35046, "nic-fru", Latn} + +m["bjv"] = {"Bedjond", 8829831, "csu-sar", Latn} + +m["bjw"] = {"Bakwé", 34899, "kro-ekr", Latn} + +m["bjx"] = {"Banao Itneg", 12627559, "phi", Latn} + +m["bjy"] = {"Bayali", 4874263, "aus-pam", Latn} + +m["bjz"] = {"Baruga", 2886189, "ngf", Latn} + +m["bka"] = {"Kyak", 35653, "alv-bwj", Latn} + +m["bkc"] = {"Baka", 34905, "nic-nkb", Latn} + +m["bkd"] = {"Binukid", 4914553, "mno", Latn} + +m["bkf"] = {"Beeke", 3441375, "bnt-kbi", Latn} + +m["bkg"] = {"Buraka", 35066, "nic-nkg", Latn} + +m["bkh"] = {"Bakoko", 34866, "bnt-bsa", Latn} + +m["bki"] = {"Baki", 11024697, "poz-vnc", Latn} + +m["bkj"] = {"Pande", 36263, "bnt-ngn", Latn} + +m["bkk"] = { -- written in Balti script + "Brokskat", 2925988, "inc-dar" +} + +m["bkl"] = {"Berik", 378743, "paa-tkw", Latn} + +m["bkm"] = {"Kom (Cameroon)", 1656595, "nic-rnc", Latn} + +m["bkn"] = {"Bukitan", 3446774, "poz-bnn", Latn} + +m["bko"] = {"Kwa'", 35567, "bai", Latn} + +m["bkp"] = {"Iboko", 35089, "bnt-ngn", Latn} + +m["bkq"] = {"Bakairí", 56846, "sai-car", Latn} + +m["bkr"] = {"Bakumpai", 3436626, "poz-brw", Latn} + +m["bks"] = {"Masbate Sorsogon", 16113356, "phi", Latn} + +m["bkt"] = {"Boloki", 4144560, "bnt-zbi", Latn, ancestors = {"lse"}} + +m["bku"] = {"Buhid", 1002956, "phi", {"Buhd"}} + +m["bkv"] = {"Bekwarra", 34954, "nic-ben", Latn} + +m["bkw"] = {"Bekwel", 34950, "bnt-bek", Latn} + +m["bkx"] = {"Baikeno", 11200640, "poz-tim", Latn} + +m["bky"] = {"Bokyi", 35087, "nic-ben", Latn} + +m["bkz"] = {"Bungku", 2928207, "poz-btk", Latn} + +m["bla"] = {"Blackfoot", 33060, "alg", {"Latn", "Cans"}} + +m["blb"] = {"Bilua", 35003, "ngf", Latn} + +m["blc"] = {"Bella Coola", 977808, "sal", Latn} + +m["bld"] = {"Bolango", 3450578, "phi", Latn} + +m["ble"] = {"Balanta-Kentohe", 56789, "alv-bak", Latn} + +m["blf"] = {"Buol", 2928278, "phi", Latn} + +m["blg"] = {"Balau", 4850134, "poz-mly", Latn} + +m["blh"] = {"Kuwaa", 35579, "kro", Latn} + +m["bli"] = {"Bolia", 34910, "bnt-mon", Latn} + +m["blj"] = {"Bolongan", 9229310, "poz", Latn} + +m["blk"] = {"Pa'o Karen", 7121294, "kar", {"Mymr"}} + +m["bll"] = {"Biloxi", 2903780, "sio-ohv", Latn} + +m["blm"] = {"Beli", 56821, "csu-bbk", Latn} + +m["bln"] = {"Southern Catanduanes Bicolano", 7569754, "phi", Latn} + +m["blo"] = {"Anii", 34838, "alv-ntg", Latn} + +m["blp"] = {"Blablanga", 2905245, "poz-ocw", Latn} + +m["blq"] = {"Baluan-Pam", 2881675, "poz-aay", Latn} + +m["blr"] = {"Blang", 4925096, "mkh-pal", {"Latn", "Tale", "Lana", "Thai"}, sort_key = {from = {"[%pᪧๆ]", "[᩠ᩳ-᩿]", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "ᩈᩈ", "ᩁ", "ᩃ", "ᨦ", "%1ᨮ", "%1ᨻ", "ᩣ", "", "%2%1"}}} + +m["bls"] = {"Balaesang", 4849796, "poz", Latn} + +m["blt"] = { + "Tai Dam", + 56407, + "tai-swe", + {"Tavt", "Latn"}, + -- translit_module = "Tavt-translit", + sort_key = {from = {"[꪿ꫀ꫁ꫂ]", "([ꪵꪶꪹꪻꪼ])([ꪀ-ꪯ])"}, to = {"", "%2%1"}} +} + +m["blv"] = {"Kibala", 4939959, "bnt-kmb", Latn} + +m["blw"] = {"Balangao", 4850033, "phi", Latn} + +m["blx"] = {"Mag-Indi Ayta", 1931221, "phi", Latn} + +m["bly"] = {"Notre", 11009194, "nic-wov", Latn} + +m["blz"] = {"Balantak", 4850053, "poz-slb", Latn} + +m["bma"] = {"Lame", 3913997, "nic-jrn", Latn} + +m["bmb"] = {"Bembe", 4885023, "bnt-lgb", Latn} + +m["bmc"] = {"Biem", 4904523, "poz-ocw", Latn} + +m["bmd"] = {"Baga Manduri", 35815, "alv-bag", Latn} + +m["bme"] = {"Limassa", 11004666, "nic-nkb", Latn} + +m["bmf"] = {"Bom", 35088, "alv-mel", Latn} + +m["bmg"] = {"Bamwe", 34867, "bnt-bun", Latn} + +m["bmh"] = {"Kein", 6383764, "ngf-mad", Latn} + +m["bmi"] = {"Bagirmi", 34903, "csu-bgr", Latn} + +m["bmj"] = {"Bote-Majhi", 9229570, "inc-eas", Deva, ancestors = {"bh"}} + +m["bmk"] = {"Ghayavi", 5555976, "poz-ocw", Latn} + +m["bml"] = {"Bomboli", 35055, "bnt-ngn", Latn} + +m["bmn"] = {"Bina", 8843664, "poz-ocw", Latn} + +m["bmo"] = {"Bambalang", 34868, "nic-nun", Latn} + +m["bmp"] = {"Bulgebi", 4996380, "ngf-fin", Latn} + +m["bmq"] = {"Bomu", 35065, "nic-bwa", Latn} + +m["bmr"] = {"Muinane", 3027894, "sai-bor", Latn} + +m["bmt"] = {"Biao Mon", 8842159, "hmx-mie"} + +m["bmu"] = {"Somba-Siawari", 5000983, "ngf", Latn} + +m["bmv"] = {"Bum", 35058, "nic-rnc", Latn} + +m["bmw"] = {"Bomwali", 34984, "bnt-ndb", Latn} + +m["bmx"] = {"Baimak", 3450546, "ngf-mad", Latn} + +m["bmz"] = {"Baramu", 4858315, "ngf", Latn} + +m["bna"] = {"Bonerate", 4941729, "poz-mun", Latn} + +m["bnb"] = {"Bookan", 4943150, "poz-san", Latn} + +m["bnd"] = {"Banda", 3504147, "poz-cma", Latn} + +m["bne"] = {"Bintauna", 4914533, "phi", Latn} + +m["bnf"] = {"Masiwang", 6783305, "poz-cma", Latn} + +m["bng"] = {"Benga", 34952, "bnt-saw", Latn} + +m["bni"] = {"Bangi", 34936, "bnt-bmo", Latn} + +m["bnj"] = {"Eastern Tawbuid", 18757427, "phi", Latn} + +m["bnk"] = {"Bierebo", 2902029, "poz-vnc", Latn} + +m["bnl"] = {"Boon", 56616, "cus", Latn} + +m["bnm"] = {"Batanga", 34979, "bnt-saw", Latn} + +m["bnn"] = {"Bunun", 56505, "map", Latn} + +m["bno"] = {"Asi", 29490, "phi", Latn} + +m["bnp"] = {"Bola", 4938876, "poz-ocw", Latn} + +m["bnq"] = {"Bantik", 2883521, "poz", Latn} + +m["bnr"] = {"Butmas-Tur", 2928942, "poz-vnc", Latn} + +m["bns"] = {"Bundeli", 56399, "inc-cen", Deva, ancestors = {"inc-ohi"}, translit_module = "hi-translit"} + +m["bnu"] = {"Bentong", 4890644, "poz-ssw", Latn} + +m["bnv"] = {"Beneraf", 4941733, "paa-tkw", Latn} + +m["bnw"] = {"Bisis", 56356, "paa-spk", Latn} + +m["bnx"] = {"Bangubangu", 3438330, "bnt-lbn", Latn} + +m["bny"] = {"Bintulu", 3450775, "poz-swa", Latn} + +m["bnz"] = {"Beezen", 35083, "nic-ykb", Latn} + +m["boa"] = {"Bora", 2375468, "sai-bor", Latn} + +m["bob"] = {"Aweer", 56526, "cus", Latn} + +m["boe"] = {"Mundabli", 36127, "nic-beb", Latn} + +m["bof"] = {"Bolon", 3913301, "dmn-emn", Latn} + +m["bog"] = {"Bamako Sign Language", 4853284, "sgn"} + +m["boh"] = {"North Boma", 35080, "bnt-bdz", Latn} + +m["boi"] = {"Barbareño", 56391, "nai-chu", Latn} + +m["boj"] = {"Anjam", 3504136, "ngf-mad", Latn} + +m["bok"] = {"Bonjo", 34942, "alv", Latn} + +m["bol"] = {"Bole", 3436680, "cdc-wst", Latn} + +m["bom"] = {"Berom", 35013, "nic-beo", Latn} + +m["bon"] = {"Bine", 4914077, "paa", Latn} + +m["boo"] = { + "Tiemacèwè Bozo", 12643582, "dmn-snb", Latn -- and others? +} + +m["bop"] = {"Bonkiman", 4942134, "ngf-fin", Latn} + +m["boq"] = {"Bogaya", 7207578, "ngf", Latn} + +m["bor"] = {"Borôro", 32986, "sai-mje", Latn} + +m["bot"] = {"Bongo", 2910067, "csu-bbk", Latn} + +m["bou"] = {"Bondei", 4941378, "bnt-seu", Latn} + +m["bov"] = {"Tuwuli", 36974, "alv-ktg", Latn} + +m["bow"] = {"Rema", 7311502, "paa", Latn} + +m["box"] = {"Buamu", 35157, "nic-bwa", Latn} + +m["boy"] = {"Bodo (Central Africa)", 4936715, "bnt-leb", Latn} + +m["boz"] = {"Tiéyaxo Bozo", 32860401, "dmn-snb", Latn} + +m["bpa"] = {"Dakaka", 1157729, "poz-vnc", Latn} + +m["bpd"] = {"Banda-Banda", 3450674, "bad-cnt", Latn} + +m["bpg"] = {"Bonggo", 4941860, "poz-ocw", Latn} + +m["bph"] = {"Botlikh", 56560, "cau-ava", Cyrl} + +m["bpi"] = {"Bagupi", 3450697, "ngf-mad", Latn} + +m["bpj"] = {"Binji", 4914403, "bnt-lbn", Latn} + +m["bpk"] = {"Orowe", 7103905, "poz-cln", Latn} + +m["bpl"] = {"Broome Pearling Lugger Pidgin", 4975277, "crp", Latn, ancestors = {"ms"}} + +m["bpm"] = {"Biyom", 4919327, "ngf-mad", Latn} + +m["bpn"] = {"Dzao Min", 3042189, "hmx-mie"} + +m["bpo"] = {"Anasi", 11207813, "paa-egb", Latn} + +m["bpp"] = {"Kaure", 20526532, "paa", Latn} + +m["bpq"] = {"Banda Malay", 12473442, "crp", Latn, ancestors = {"ms"}} + +m["bpr"] = {"Koronadal Blaan", 16115430, "phi", Latn} + +m["bps"] = {"Sarangani Blaan", 16117272, "phi", Latn} + +m["bpt"] = {"Barrow Point", 2567916, "aus-pmn", Latn} + +m["bpu"] = {"Bongu", 4941930, "ngf-mad", Latn} + +m["bpv"] = {"Bian Marind", 8841889, "ngf", Latn} + +m["bpx"] = {"Palya Bareli", 7128872, "inc-bhi", {"Deva"}, ancestors = {"psu"}, translit_module = "hi-translit"} + +m["bpy"] = {"Bishnupriya Manipuri", 37059, "inc-eas", {"Beng"}, ancestors = {"inc-obn"}} + +m["bpz"] = {"Bilba", 8843362, "poz-tim", Latn} + +m["bqa"] = {"Tchumbuli", 11008162, "alv-ctn", Latn, ancestors = {"ak"}} + +m["bqb"] = {"Bagusa", 4842178, "paa-tkw", Latn} + +m["bqc"] = {"Boko", 34983, "dmn-bbu", Latn} + +m["bqd"] = {"Bung", 3436612, "nic-bdn", Latn} + +m["bqf"] = {"Baga Kaloum", 3502293, "alv-bag", Latn} + +m["bqg"] = {"Bago-Kusuntu", 34878, "nic-gne"} + +m["bqh"] = {"Baima", 674990, "sit-qia"} + +m["bqi"] = {"Bakhtiari", 257829, "ira-swi", {"fa-Arab"}, ancestors = {"pal"}} + +m["bqj"] = {"Bandial", 34872, "alv-jol", Latn} + +m["bqk"] = {"Banda-Mbrès", 3450724, "bad-cnt", Latn} + +m["bql"] = {"Bilakura", 4907504, "ngf-mad", Latn} + +m["bqm"] = {"Wumboko", 37051, "bnt-kpw", Latn} + +m["bqn"] = {"Bulgarian Sign Language", 3438325, "sgn"} + +m["bqo"] = {"Balo", 34865, "nic-grs", Latn} + +m["bqp"] = {"Busa", 35185, "dmn-bbu", Latn} + +m["bqq"] = {"Biritai", 56382, "paa-lkp", Latn} + +m["bqr"] = {"Burusu", 5001028, "poz-san", Latn} + +m["bqs"] = {"Bosngun", 56838, "paa", Latn} + +m["bqt"] = {"Bamukumbit", 35078, "nic-nge", Latn} + +m["bqu"] = {"Boguru", 3438444, "bnt-boa", Latn} + +m["bqv"] = {"Begbere-Ejar", 7194098, "nic-plc", Latn} + +m["bqw"] = {"Buru (Nigeria)", 1017152, "nic-bds", Latn} + +m["bqx"] = {"Baangi", 3450648, "nic-kam", Latn} + +m["bqy"] = {"Bengkala Sign Language", 3322119, "sgn"} + +m["bqz"] = {"Bakaka", 34855, "bnt-mne", Latn} + +m["bra"] = {"Braj", 35243, "inc-cen", Deva, ancestors = {"inc-ohi"}, translit_module = "hi-translit"} + +m["brb"] = {"Lave", 4957737, "mkh-ban"} + +m["brc"] = {"Berbice Creole Dutch", 35215, "crp", Latn, ancestors = {"nl"}} + +m["brd"] = {"Baraamu", 56804, "sit-new", Deva} + +m["brf"] = {"Bera", 2896850, "bnt-kbi", Latn} + +m["brg"] = {"Baure", 2839722, "awd", Latn} + +m["brh"] = {"Brahui", 33202, "dra", {"Arab", "Latn"}} + +m["bri"] = {"Mokpwe", 36428, "bnt-kpw", Latn} + +m["brj"] = {"Bieria", 4904607, "poz-vnc", Latn} + +m["brk"] = {"Birgid", 56823, "nub", Latn} + +m["brl"] = {"Birwa", 3501019, "bnt-sts", Latn} + +m["brm"] = {"Barambu", 34893, "znd", Latn} + +m["brn"] = {"Boruca", 4946773, "cba", Latn} + +m["bro"] = {"Brokkat", 56605, "sit-tib", {"Tibt", "Latn"}} + +m["brp"] = {"Barapasi", 56995, "paa-egb", Latn} + +m["brq"] = {"Breri", 4961835, "paa", Latn} + +m["brr"] = {"Birao", 2904383, "poz-sls", Latn} + +m["brs"] = {"Baras", 8827053, "poz", Latn} + +m["brt"] = {"Bitare", 34946, "nic-tvn", Latn} + +m["bru"] = {"Eastern Bru", 16115463, "mkh-kat", {"Latn", "Laoo", "Thai"}} + +m["brv"] = {"Western Bru", 16113806, "mkh-kat", {"Latn", "Laoo"}} + +m["brw"] = {"Bellari", 4883496, "dra"} + +m["brx"] = {"Bodo (India)", 33223, "tbq-bdg", {"Deva", "Latn"}, translit_module = "brx-translit"} + +m["bry"] = {"Burui", 5000976, "paa-spk", Latn} + +m["brz"] = {"Bilbil", 4907473, "poz-ocw", Latn} + +m["bsa"] = {"Abinomn", 56648, "qfa-iso", Latn} + +m["bsb"] = {"Brunei Bisaya", 3450611, "poz-san", Latn} + +m["bsc"] = {"Bassari", 35098, "alv-ten", Latn} + +m["bse"] = {"Wushi", 36973, "nic-rnn", Latn} + +m["bsf"] = {"Bauchi", 34974, "nic-shi", Latn} + +m["bsg"] = {"Bashkardi", 33030, "ira-swi", {"fa-Arab", "Latn"}} + +m["bsh"] = {"Kamkata-viri", 2605045, "nur-nor", {"Latn", "Arab"}, ancestors = {"iir-nur-pro"}} + +m["bsi"] = {"Bassossi", 34940, "bnt-mne", Latn} + +m["bsj"] = {"Bangwinji", 3446631, "alv-wjk", Latn} + +m["bsk"] = {"Burushaski", 216286, "qfa-iso", Latn} + +m["bsl"] = {"Basa-Gumna", 4866150, "nic-bas", Latn} + +m["bsm"] = {"Busami", 5001255, "poz-hce", Latn} + +m["bsn"] = {"Barasana", 2883843, "sai-tuc", Latn} + +m["bso"] = {"Buso", 3441370, "cdc-est", Latn} + +m["bsp"] = {"Baga Sitemu", 36466, "alv-bag", Latn} + +m["bsq"] = {"Bassa", 34949, "kro-wkr", {"Latn", "Bass"}} + +m["bsr"] = {"Bassa-Kontagora", 4866152, "nic-bas", Latn} + +m["bss"] = {"Akoose", 34806, "bnt-mne", Latn} + +m["bst"] = {"Basketo", 56531, "omv-ome", {"Ethi"}} + +m["bsu"] = {"Bahonsuai", 2879298, "poz-btk", Latn} + +m["bsv"] = {"Baga Sobané", 3450433, "alv-bag", Latn} + +m["bsw"] = {"Baiso", 56615, "cus", Latn} + +m["bsx"] = {"Yangkam", 36922, "nic-tar", Latn} + +m["bsy"] = {"Sabah Bisaya", 12641557, "poz-san", Latn} + +m["bta"] = {"Bata", 56254, "cdc-cbm", Latn} + +m["btc"] = {"Bati (Cameroon)", 34944, "nic-mbw", Latn} + +m["btd"] = {"Dairi Batak", 2891045, "btk", {"Latn", "Batk"}} + +m["bte"] = {"Gamo-Ningi", 5520366, "nic-jer", Latn} + +m["btf"] = {"Birgit", 56302, "cdc-est", Latn} + +m["btg"] = {"Gagnoa Bété", 11005602, "kro-bet", Latn} + +m["bth"] = {"Biatah Bidayuh", 2900881, "day", Latn} + +m["bti"] = {"Burate", 56900, "paa-egb", Latn} + +m["btj"] = {"Bacanese Malay", 8828608, "poz-mly", Latn} + +m["btm"] = {"Mandailing Batak", 2891049, "btk", {"Latn", "Batk"}} + +m["btn"] = {"Ratagnon", 13197, "phi", Latn} + +m["bto"] = {"Iriga Bicolano", 12633026, "phi", Latn} + +m["btp"] = {"Budibud", 4985086, "poz-ocw", Latn} + +m["btq"] = {"Batek", 860315, "mkh-asl", Latn} + +m["btr"] = {"Baetora", 2878874, "poz-vnc", Latn} + +m["bts"] = {"Simalungun Batak", 2891054, "btk", {"Latn", "Batk"}} + +m["btt"] = {"Bete-Bendi", 4887064, "nic-ben", Latn} + +m["btu"] = {"Batu", 34964, "nic-tvn", Latn} + +m["btv"] = {"Bateri", 3812564, "inc-dar", Deva} + +m["btw"] = {"Butuanon", 5003156, "phi", Latn} + +m["btx"] = {"Karo Batak", 33012, "btk", {"Latn", "Batk"}} + +m["bty"] = {"Bobot", 3446788, "poz-cma", Latn} + +m["btz"] = {"Alas-Kluet Batak", 2891042, "btk", {"Latn", "Batk"}} + +m["bua"] = {"Buryat", 33120, "xgn", {"Cyrl", "Mong"}, translit_module = "bua-translit", override_translit = true, wikimedia_codes = {"bxr"}} + +m["bub"] = {"Bua", 32928, "alv-bua", Latn} + +m["bud"] = {"Ntcham", 36266, "nic-grm", Latn} + +m["bue"] = {"Beothuk", 56234, nil, Latn} + +m["buf"] = {"Bushoong", 3449964, "bnt-bsh", Latn} + +m["bug"] = {"Buginese", 33190, "poz-ssw", {"Bugi", "Latn"}} + +m["buh"] = {"Younuo Bunu", 56299, "hmn", Latn} + +m["bui"] = {"Bongili", 35084, "bnt-ngn", Latn} + +m["buj"] = {"Basa-Gurmana", 6432515, "nic-bas", Latn} + +m["buk"] = {"Bukawa", 35043, "poz-ocw", Latn} + +m["bum"] = {"Bulu (Cameroon)", 35028, "bnt-btb", Latn} + +m["bun"] = {"Sherbro", 36339, "alv-mel", Latn} + +m["buo"] = {"Terei", 56831, "paa-sbo", Latn} + +m["bup"] = {"Busoa", 5002001, "poz", Latn} + +m["buq"] = {"Brem", 4960502, "ngf", Latn} + +m["bus"] = {"Bokobaru", 9228931, "dmn-bbu", Latn} + +m["but"] = {"Bungain", 3450623, "qfa-tor", Latn} + +m["buu"] = {"Budu", 3450207, "bnt-nya", Latn} + +m["buv"] = {"Bun", 56351, "paa-yua", Latn} + +m["buw"] = {"Bubi", 35017, "bnt-tso", Latn} + +m["bux"] = {"Boghom", 3440412, "cdc-wst", Latn} + +m["buy"] = {"Mmani", 35061, "alv-mel", Latn} + +m["bva"] = {"Barein", 56285, "cdc-est", Latn} + +m["bvb"] = {"Bube", 35110, "nic-bds", Latn} + +m["bvc"] = {"Baelelea", 2878833, "poz-sls", Latn} + +m["bvd"] = {"Baeggu", 2878850, "poz-sls", Latn} + +m["bve"] = {"Berau Malay", 3915770, "poz-mly", Latn} + +m["bvf"] = {"Boor", 56250, "cdc-est", Latn} + +m["bvg"] = {"Bonkeng", 34958, "bnt-bbo", Latn} + +m["bvh"] = {"Bure", 56294, "cdc-wst", Latn} + +m["bvi"] = {"Belanda Viri", 35247, "nic-ser", Latn} + +m["bvj"] = {"Baan", 3515067, "nic-ogo", Latn} + +m["bvk"] = {"Bukat", 4986814, "poz-bnn", Latn} + +m["bvl"] = { + "Bolivian Sign Language", 1783590, "sgn", Latn -- when documented +} + +m["bvm"] = {"Bamunka", 34882, "nic-rnn", Latn} + +m["bvn"] = {"Buna", 3450516, "qfa-tor", Latn} + +m["bvo"] = {"Bolgo", 35038, "alv-bua", Latn} + +m["bvp"] = {"Bumang", 4997235, "mkh-pal"} + +m["bvq"] = {"Birri", 56514, "csu-bkr", Latn} + +m["bvr"] = {"Burarra", 4998124, "aus-arn", Latn} + +m["bvt"] = {"Bati (Indonesia)", 4869253, "poz-cma", Latn} + +m["bvu"] = {"Bukit Malay", 9230148, "poz-mly", Latn} + +m["bvv"] = {"Baniva", 3515198, "awd", Latn} + +m["bvw"] = {"Boga", 56262, "cdc-cbm", Latn} + +m["bvx"] = {"Babole", 35180, "bnt-ngn", Latn} + +m["bvy"] = {"Baybayanon", 16839275, "phi", Latn} + +m["bvz"] = {"Bauzi", 56360, "paa-egb", Latn} + +m["bwa"] = {"Bwatoo", 9232446, "poz-cln", Latn} + +m["bwb"] = {"Namosi-Naitasiri-Serua", 3130290, "poz-occ", Latn} + +m["bwc"] = {"Bwile", 3447440, "bnt-sbi", Latn} + +m["bwd"] = {"Bwaidoka", 2929111, "poz-ocw", Latn} + +m["bwe"] = {"Bwe Karen", 56994, "kar"} + +m["bwf"] = {"Boselewa", 4947229, "poz-ocw", Latn} + +m["bwg"] = {"Barwe", 8826802, "bnt-sna", Latn} + +m["bwh"] = {"Bishuo", 34973, "nic-fru", Latn} + +m["bwi"] = {"Baniwa", 3501735, "awd-nwk", Latn} + +m["bwj"] = {"Láá Láá Bwamu", 11017275, "nic-bwa", Latn} + +m["bwk"] = {"Bauwaki", 4873607, "ngf", Latn} + +m["bwl"] = {"Bwela", 5003678, "bnt-bun", Latn} + +m["bwm"] = {"Biwat", 56352, "paa-yua", Latn} + +m["bwn"] = {"Wunai Bunu", 56452, "hmn"} + +m["bwo"] = {"Shinasha", 56260, "omv-gon", Latn} + +m["bwp"] = {"Mandobo Bawah", 12636155, "ngf", Latn} + +m["bwq"] = {"Southern Bobo", 11001714, "dmn-snb", Latn} + +m["bwr"] = {"Bura", 56552, "cdc-cbm", Latn} + +m["bws"] = {"Bomboma", 9229429, "bnt-bun", Latn} + +m["bwt"] = {"Bafaw", 34853, "bnt-bbo", Latn} + +m["bwu"] = {"Buli (Ghana)", 35085, "nic-buk", Latn} + +m["bww"] = {"Bwa", 3515058, "bnt-bta", Latn} + +m["bwx"] = {"Bu-Nao Bunu", 56411, "hmn", Latn} + +m["bwy"] = {"Cwi Bwamu", 11150714, "nic-bwa", Latn} + +m["bwz"] = {"Bwisi", 35067, "bnt-sir", Latn} + +m["bxa"] = {"Bauro", 2892068, "poz-sls", Latn} + +m["bxb"] = {"Belanda Bor", 56678, "sdv-lon", Latn} + +m["bxc"] = {"Molengue", 13345, "bnt-kel", Latn} + +m["bxd"] = {"Pela", 57000, "tbq-brm"} + +m["bxe"] = {"Ongota", 36344, "cus", Latn} + +m["bxf"] = {"Bilur", 2903788, "poz-ocw", Latn} + +m["bxg"] = {"Bangala", 34989, "bnt-bmo", Latn} + +m["bxh"] = {"Buhutu", 4986329, "poz-ocw", Latn} + +m["bxi"] = {"Pirlatapa", 10632195, "aus-kar", Latn} + +m["bxj"] = {"Bayungu", 10427485, "aus-psw", Latn} + +m["bxk"] = {"Bukusu", 32930, "bnt-msl", Latn} + +m["bxl"] = {"Jalkunan", 11009787, "dmn-jje", Latn} + +m["bxn"] = {"Burduna", 4998313, "aus-psw", Latn} + +m["bxo"] = {"Barikanchi", 3450802, "crp", Latn, ancestors = {"ha"}} + +m["bxp"] = {"Bebil", 34941, "bnt-btb", Latn} + +m["bxq"] = {"Beele", 56238, "cdc-wst", Latn} + +m["bxs"] = {"Busam", 35189, "nic-grs", Latn} + +m["bxv"] = {"Berakou", 56796, "csu-bgr", Latn} + +m["bxw"] = {"Banka", 3438402, "dmn-smg", Latn} + +m["bxz"] = {"Binahari", 4913840, "ngf", Latn} + +m["bya"] = {"Palawan Batak", 3450443, "phi", {"Tagb"}} + +m["byb"] = {"Bikya", 33257, "nic-fru", Latn} + +m["byc"] = {"Ubaghara", 36625, "nic-ucn", Latn} + +m["byd"] = {"Benyadu'", 11173588, "day", Latn} + +m["bye"] = {"Pouye", 7235814, "paa-spk", Latn} + +m["byf"] = {"Bete", 32932, "nic-ykb", Latn} + +m["byg"] = {"Baygo", 56836, "sdv-daj", Latn} + +m["byh"] = {"Bujhyal", 56317, "sit-gma", Deva} + +m["byi"] = {"Buyu", 5003401, "bnt-nyb", Latn} + +m["byj"] = {"Binawa", 4913807, "nic-kau", Latn} + +m["byk"] = { + "Biao", 4902547, "qfa-tak", Latn -- also Hani? +} + +m["byl"] = {"Bayono", 3503856, "ngf", Latn} + +m["bym"] = {"Bidyara", 8842355, "aus-pam", Latn} + +m["byn"] = {"Blin", 56491, "cus", {"Ethi", "Latn"}, translit_module = "translit-redirect"} + +m["byo"] = {"Biyo", 56848, "tbq-lol", {"Latn", "Hani"}} + +m["byp"] = {"Bumaji", 4997234, "nic-ben", Latn} + +m["byq"] = {"Basay", 716647, "map", Latn} + +m["byr"] = {"Baruya", 3450812, "ngf", Latn} + +m["bys"] = {"Burak", 4998097, "alv-bwj", Latn} + +m["byt"] = {"Berti", 35008, "ssa-sah", Latn} + +m["byv"] = {"Medumba", 36019, "bai", Latn} + +m["byw"] = {"Belhariya", 32961, "sit-kie", Deva} + +m["byx"] = {"Qaqet", 3503009, "paa-bng", Latn} + +m["byz"] = {"Banaro", 56858, "paa", Latn} + +m["bza"] = {"Bandi", 34912, "dmn-msw", Latn} + +m["bzb"] = {"Andio", 4754487, "poz-slb", Latn} + +m["bzd"] = {"Bribri", 28400, "cba", Latn} + +m["bze"] = {"Jenaama Bozo", 10950633, "dmn-snb", Latn} + +m["bzf"] = {"Boikin", 56829, "paa-spk", Latn} + +m["bzg"] = {"Babuza", 716615, "map"} + +m["bzh"] = {"Mapos Buang", 2927370, "poz-ocw", Latn} + +m["bzi"] = {"Bisu", 56852, "tbq-lol", {"Latn", "Thai"}} + +m["bzj"] = {"Belizean Creole", 1363055, "crp", Latn, ancestors = {"en"}} + +m["bzk"] = {"Nicaraguan Creole", 3504097, "crp", Latn, ancestors = {"en"}} + +m["bzl"] = { -- supposedly also called "Bolano", but I can find no evidence of that + "Boano (Sulawesi)", 4931258, "poz", Latn +} + +m["bzm"] = {"Bolondo", 35071, "bnt-bun", Latn} + +m["bzn"] = {"Boano (Maluku)", 4931255, "poz-cma", Latn} + +m["bzo"] = {"Bozaba", 4952785, "bnt-ngn", Latn} + +m["bzp"] = {"Kemberano", 12634399, "ngf-sbh", Latn} + +m["bzq"] = {"Buli (Indonesia)", 2927952, "poz-hce", Latn} + +m["bzr"] = {"Biri", 4087011, "aus-pam", Latn} + +m["bzs"] = {"Brazilian Sign Language", 3436689, "sgn", Latn} + +m["bzu"] = {"Burmeso", 56746, "paa-wpa", Latn} + +m["bzv"] = {"Bebe", 34977, "nic-bbe", Latn} + +m["bzw"] = {"Basa", 34898, "nic-bas", Latn} + +m["bzx"] = {"Hainyaxo Bozo", 11159536, "dmn-snb", Latn} + +m["bzy"] = {"Obanliku", 36276, "nic-ben", Latn} + +m["bzz"] = {"Evant", 35259, "nic-tvc", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/c.lua b/wiktra/wikt/translit/languages/data3/c.lua new file mode 100644 index 0000000..25bb651 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/c.lua @@ -0,0 +1,697 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local OVERLINE = u(0x0305) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +-- Puncuation to be used for standardChars field +local PUNCTUATION = " !#$%&*+,-./:;<=>?@^_`|~'()" + +local Cyrl = {"Cyrl"} +local Deva = {"Deva"} +local Latn = {"Latn"} + +local m = {} + +m["caa"] = {"Ch'orti'", 35177, "myn", Latn} + +m["cab"] = {"Garifuna", 35490, "awd-taa", Latn, ancestors = {"awd-taa-pro"}} + +m["cac"] = {"Chuj", 35233, "myn", Latn} + +m["cad"] = {"Caddo", 56756, "cdd", Latn} + +m["cae"] = {"Laalaa", 35564, "alv-cng", Latn} + +m["caf"] = {"Southern Carrier", 12953426, "ath-nor", Latn} + +m["cag"] = {"Nivaclé", 3182557, "sai-mtc", Latn} + +m["cah"] = {"Cahuarano", 2933175, "sai-zap", Latn} + +m["caj"] = {"Chané", 56721, "awd", Latn} + +m["cak"] = {"Kaqchikel", 35115, "myn", Latn} + +m["cal"] = {"Carolinian", 28427, "poz-mic", Latn} + +m["cam"] = {"Cemuhî", 3009690, "poz-cln", Latn} + +m["can"] = {"Chambri", 5069707, "paa-lsp", Latn} + +m["cao"] = {"Chácobo", 2591202, "sai-pan", Latn} + +m["cap"] = {"Chipaya", 35235, "sai-ucp", Latn} + +m["caq"] = {"Car Nicobarese", 35156, "aav-nic", Latn} + +m["car"] = {"Galibi Carib", 56611, "sai-car", Latn} + +m["cas"] = {"Tsimané", 35950, "qfa-iso", Latn} + +m["cav"] = {"Cavineña", 524102, "sai-tac", Latn} + +m["caw"] = {"Callawalla", 266417, "qfa-mix", Latn} + +m["cax"] = {"Chiquitano", 1844993, "qfa-iso", Latn} + +m["cay"] = {"Cayuga", 32967, "iro", Latn} + +m["caz"] = {"Canichana", 2936374, "qfa-iso", Latn} + +m["cbb"] = {"Cabiyarí", 3450660, "awd-nwk", Latn} + +m["cbc"] = {"Carapana", 924405, "sai-tuc", Latn} + +m["cbd"] = {"Carijona", 3446655, "sai-car", Latn} + +m["cbg"] = {"Chimila", 2963680, "cba", Latn} + +m["cbi"] = {"Chachi", 2591329, "sai-bar", Latn} + +m["cbj"] = {"Ede Cabe", 33112829, "alv-ede", Latn} + +m["cbk"] = {"Chavacano", 33281, "crp", Latn, ancestors = {"es"}} + +m["cbl"] = {"Bualkhaw Chin", 9229830, "tbq-kuk", Latn} + +m["cbn"] = {"Nyah Kur", 116849, "mkh-mnc", {"Thai"}, ancestors = {"omx"}, sort_key = {from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["cbo"] = {"Izora", 3915454, "nic-jer", Latn} + +m["cbq"] = {"Shuba", nil, "nic-knj", Latn, ancestors = {"lan"}} + +m["cbr"] = {"Cashibo-Cacataibo", 5359560, "sai-pan", Latn} + +m["cbs"] = {"Cashinahua", 2591230, "sai-pan", Latn} + +m["cbt"] = {"Chayahuita", 1526525, "sai-cah", Latn} + +m["cbu"] = {"Candoshi-Shapra", 642843, "qfa-iso", Latn} + +m["cbv"] = {"Cacua", 3192052, "sai-nad", Latn, ancestors = {"mbr"}} + +m["cbw"] = {"Kinabalian", 6410324, "phi", Latn} + +m["cby"] = {"Carabayo", 3441762, "sai-tyu", Latn} + +m["cca"] = {"Cauca", 5054242, "sai-chc", Latn} + +m["ccc"] = {"Chamicuro", 2155119, "awd", Latn} + +m["ccd"] = {"Cafundó", 3331506, "roa-ibe", Latn, ancestors = {"pt"}} + +m["cce"] = {"Chopi", 3437616, "bnt-bso", Latn} + +m["ccg"] = {"Chamba Daka", 33120805, "nic-dak", Latn} + +m["cch"] = {"Atsam", 34794, "nic-kne", Latn} + +m["ccj"] = {"Kasanga", 35542, "alv-nyn", Latn} + +m["ccl"] = {"Cutchi-Swahili", 5196729, "crp", Latn, ancestors = {"sw"}} + +m["ccm"] = {"Malaccan Creole Malay", 12636092, "crp", Latn, ancestors = {"ms"}} + +m["cco"] = {"Comaltepec Chinantec", 2963735, "omq-chi", Latn} + +m["ccp"] = {"Chakma", 32952, "inc-eas", {"Cakm"}, ancestors = {"inc-obn"}} + +m["ccr"] = {"Cacaopera", 3438338, "nai-min", Latn} + +m["cda"] = {"Choni", 2964447, "sit-tib"} + +m["cde"] = {"Chenchu", 32981, "dra", {"Telu"}, ancestors = {"te"}} + +m["cdf"] = {"Chiru", 5102016, "tbq-kuk", {"Latn", "Beng"}} + +m["cdh"] = {"Chambeali", 12953424, "him", {"Deva", "Takr"}, translit_module = "hi-translit"} + +m["cdi"] = {"Chodri", 5103788, "inc-bhi", {"Gujr"}} + +m["cdj"] = {"Churahi", 12629039, "him", translit_module = "hi-translit"} + +m["cdm"] = {"Chepang", 5091700, "sit-gma", Deva} + +m["cdn"] = {"Chaudangsi", 5088056, "sit-alm"} + +m["cdo"] = {"Min Dong", 36455, "zhx-min-hai", {"Hani"}} + +m["cdr"] = {"Cinda-Regi-Tiyal", 35596, "nic-kmk", Latn} + +m["cds"] = { + "Chadian Sign Language", 10322099, "sgn", Latn -- when documented +} + +m["cdy"] = {"Chadong", 926742, "qfa-kms"} + +m["cdz"] = {"Koda", 6425038, "mun", {"Beng"}} + +m["cea"] = {"Lower Chehalis", 6693377, "sal", Latn} + +m["ceb"] = {"Cebuano", 33239, "phi", Latn} + +m["ceg"] = {"Chamacoco", 3436637, "sai-zam", Latn} + +m["cen"] = {"Cen", 12628777, "nic-plc", Latn, ancestors = {"izr"}} + +m["cet"] = {"Centúúm", 33608, "qfa-iso", Latn} + +m["cfa"] = {"Dijim-Bwilim", 3438350, "alv-wjk", Latn} + +m["cfd"] = {"Cara", 35048, "nic-beo", Latn} + +m["cfg"] = {"Como Karim", 35304, "nic-jkn", Latn} + +m["cfm"] = {"Falam Chin", 56815, "tbq-kuk", {"Beng", "Latn"}} + +m["cga"] = {"Changriwa", 5072105, "paa-yua", Latn} + +m["cgc"] = {"Kagayanen", 6346422, "mno", Latn} + +m["cgg"] = {"Rukiga", 3270727, "bnt-nyg", Latn} + +m["cgk"] = {"Chocangaca", 56604, "sit-tib", {"Tibt"}, ancestors = {"xct"}} + +m["chb"] = {"Chibcha", 2356431, "cba"} + +m["chc"] = {"Catawba", 5051602, "nai-cat", Latn} + +m["chd"] = {"Highland Oaxaca Chontal", 2964457, "nai-tqn", Latn} + +m["chf"] = {"Tabasco Chontal", 35175, "myn", Latn} + +m["chg"] = {"Chagatai", 36831, "trk-kar", {"Arab"}, translit_module = "ar-translit", entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["chh"] = {"Chinook", 6693380, "nai-ckn", Latn} + +m["chj"] = {"Ojitlán Chinantec", 5100110, "omq-chi", Latn} + +m["chk"] = {"Chuukese", 33161, "poz-mic", Latn} + +m["chl"] = {"Cahuilla", 56438, "azc-cup", Latn, entry_name = {from = {"Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú", MACRON}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}} + +m["chm"] = {"Eastern Mari", 973685, "urj", Cyrl, translit_module = "chm-translit", override_translit = true, wikimedia_codes = {"mhr"}, entry_name = {from = {GRAVE, ACUTE}, to = {}}} + +m["chn"] = {"Chinook Jargon", 35173, "crp", {"Latn", "Dupl"}, ancestors = {"chh", "nuk"}} + +m["cho"] = {"Choctaw", 32979, "nai-mus", Latn} + +m["chp"] = {"Chipewyan", 27692, "ath-nor", {"Latn", "Cans"}} + +m["chq"] = {"Quiotepec Chinantec", 5758709, "omq-chi", Latn} + +m["chr"] = {"Cherokee", 33388, "iro", {"Cher"}, translit_module = "Cher-translit"} + +m["cht"] = {"Cholón", 2591243, nil, Latn} + +m["chw"] = {"Chuabo", 5118412, "bnt-mak", Latn} + +m["chx"] = {"Chantyal", 4926344, "sit-tam", Deva} + +m["chy"] = { + "Cheyenne", + 33265, + "alg", + Latn, + sort_key = {from = {"[àáāȧ]", "[èéēė]", "[òóōȯ]", GRAVE, ACUTE, MACRON, DOTABOVE, "-"}, to = {"a", "e", "o"}}, + standardChars = "0-9'AEHKMNOPSŠTVXaehkmnopsštvxÀÁĀȦÈÉĒĖÒÓŌȮàáāȧèéēėòóōȯ" .. PUNCTUATION -- umlaut and circumflex not allowed +} + +m["chz"] = {"Ozumacín Chinantec", 5100111, "omq-chi", Latn} + +m["cia"] = {"Cia-Cia", 35284, "poz-mun", {"Hang", "Latn", "Arab"}} + +m["cib"] = {"Ci Gbe", 12952445, "alv-gbe", Latn} + +m["cic"] = {"Chickasaw", 33192, "nai-mus", Latn} + +m["cid"] = {"Chimariko", 1294251, "qfa-iso", Latn} + +m["cie"] = {"Cineni", 56243, "cdc-cbm", Latn} + +m["cih"] = {"Chinali", 11855245, "inc", Deva, ancestors = {"sa"}} + +m["cik"] = {"Chitkuli Kinnauri", 15615982, "sit-kin"} + +m["cim"] = {"Cimbrian", 37053, "gmw", Latn, varieties = {"Luserna Cimbrian", "Sette Comuni Cimbrian", "Thirteen Communities Cimbrian"}, ancestors = {"bar"}, entry_name = {from = {"à", "ì", "ù"}, to = {"a", "i", "u"}}, sort_key = {from = {"å", "č", "[èé]", "[òóôö]", "ü"}, to = {"a", "c", "e", "o", "u"}}} + +m["cin"] = {"Cinta Larga", 5121095, "tup", Latn} + +m["cip"] = {"Chiapanec", 3364475, "omq", Latn} + +m["cir"] = {"Tiri", 7862281, "poz-cln", Latn} + +m["ciy"] = {"Chaima", 12628867, "sai-car", Latn} + +m["cja"] = { + "Western Cham", 12645578, "cmc", {"Arab"} -- Western Cham script is not yet available. +} + +m["cje"] = {"Chru", 2967321, "cmc", Latn} + +m["cjh"] = {"Upper Chehalis", 2962074, "sal", Latn} + +m["cji"] = {"Chamalal", 56567, "cau-ava", Cyrl} + +m["cjk"] = {"Chokwe", 2422065, "bnt-clu", Latn} + +m["cjm"] = {"Eastern Cham", 2948019, "cmc", {"Cham", "Latn"}} + +m["cjn"] = {"Chenapian", 5091044, "paa-spk", Latn} + +m["cjo"] = {"Ashéninka Pajonal", 3450481, "awd", Latn} + +m["cjp"] = {"Cabécar", 27878, "cba", Latn} + +m["cjs"] = {"Shor", 34139, "trk-sib", Cyrl} + +m["cjv"] = {"Chuave", 5115226, "ngf", Latn} + +m["cjy"] = {"Jin", 56479, "zhx", {"Hani"}, ancestors = {"ltc"}} + +m["ckb"] = {"Central Kurdish", 36811, "ku", {"ku-Arab"}, translit_module = "ckb-translit", entry_name = {from = {u(0x0650), u(0x0652)}, to = {}}, ancestors = {"ku-pro"}} + +m["ckh"] = {"Chak", 12628870, "sit-luu", Latn, ancestors = {"kdv"}} + +m["ckl"] = {"Cibak", 56279, "cdc-cbm", Latn} + +m["ckn"] = {"Kaang Chin", 6343432, "tbq-kuk", Latn} + +m["cko"] = {"Anufo", 34845, "alv-ctn", Latn} + +m["ckq"] = {"Kajakse", 3440422, "cdc-est", Latn} + +m["ckr"] = {"Kairak", 3503002, "paa-bng", Latn} + +m["cks"] = {"Tayo", 1133089, "crp", Latn, ancestors = {"fr"}} + +m["ckt"] = {"Chukchi", 33170, "qfa-cka", Cyrl} + +m["cku"] = {"Koasati", 35162, "nai-mus", Latn} + +m["ckv"] = {"Kavalan", 716627, "map", Latn} + +m["ckx"] = {"Caka", 5018037, "nic-tvc", Latn} + +m["cky"] = {"Cakfem-Mushere", 3441199, "cdc-wst", Latn} + +m["ckz"] = {"Cakchiquel-Quiché Mixed Language", 5054550, "myn", Latn} + +m["cla"] = {"Ron", 3440432, "cdc-wst", Latn} + +m["clc"] = {"Chilcotin", 28535, "ath-nor", Latn} + +m["cld"] = {"Chaldean Neo-Aramaic", 33236, "sem-are", {"Syrc"}} + +m["cle"] = {"Lealao Chinantec", 6509365, "omq-chi", Latn} + +m["clh"] = {"Chilisso", 3250629, "inc-dar"} + +m["cli"] = {"Chakali", 35206, "nic-gnw", Latn} + +m["clj"] = {"Laitu Chin", 6474196, "tbq-kuk"} + +m["clk"] = {"Idu", 56412, "sit-gsi", {"Tibt"}} + +m["cll"] = {"Chala", 35190, "nic-gne", Latn} + +m["clm"] = {"Klallam", 33404, "sal", Latn} + +m["clo"] = {"Lowland Oaxaca Chontal", 2964450, "nai-tqn", Latn} + +m["clt"] = {"Lautu Chin", 6502107, "tbq-kuk"} + +m["clu"] = {"Caluyanun", 32964, "phi", Latn} + +m["clw"] = {"Chulym", 33125, "trk-sib", {"Latn", "Cyrl"}} + +m["cly"] = {"Eastern Highland Chatino", 12642078, "omq-cha", Latn} + +m["cma"] = {"Maa", 12953680, "mkh-ban", Latn} + +m["cme"] = {"Cerma", 35074, "nic-gur", Latn} + +m["cmg"] = {"Classical Mongolian", 5128303, "xgn", {"Mong"}, ancestors = {"xng"}, translit_module = "mn-translit"} + +m["cmi"] = {"Emberá-Chamí", 3052042, "sai-chc", Latn} + +m["cml"] = {"Campalagian", 5027893, "poz-ssw", Latn} + +m["cmm"] = {"Michigamea", 12636809, "sio-msv", Latn} + +m["cmn"] = {"Mandarin", 9192, "zhx", {"Hani", "Latn"}, ancestors = {"ltc"}, wikimedia_codes = {"zh"}} + +m["cmo"] = {"Central Mnong", 33369881, "mkh-ban"} + +m["cmr"] = {"Mro Chin", 16889978, "tbq-kuk"} + +m["cms"] = { + "Messapic", 36383, "ine", Latn -- when documented here; otherwise written in a native script +} + +m["cmt"] = {"Camtho", 10441336, "crp", Latn, ancestors = {"fly", "zu"}} + +m["cna"] = {"Changthang", 12952322, "sit-lab", {"Tibt"}} + +m["cnb"] = {"Chinbon Chin", 12952327, "tbq-kuk"} + +m["cnc"] = {"Côông", 5202780, "tbq-lol"} + +m["cng"] = {"Northern Qiang", 56559, "sit-qia"} + +m["cnh"] = {"Lai", 3250286, "tbq-kuk"} + +m["cni"] = {"Asháninka", 3437230, "awd", Latn} + +m["cnk"] = {"Khumi Chin", 56308, "tbq-kuk"} + +m["cnl"] = {"Lalana Chinantec", 12953437, "omq-chi", Latn} + +m["cno"] = {"Con", 3440883, "mkh-pal"} + +m["cns"] = {"Central Asmat", 11732048, "ngf", Latn} + +m["cnt"] = {"Tepetotutla Chinantec", 5100113, "omq-chi", Latn} + +m["cnu"] = {"Chenoua", 33276, "ber"} + +m["cnw"] = {"Ngawn Chin", 6583675, "tbq-kuk"} + +m["cnx"] = {"Middle Cornish", 12642603, "cel-bry", Latn, ancestors = {"oco"}} + +m["coa"] = {"Cocos Islands Malay", 3441699, "crp", Latn, ancestors = {"ms"}} + +m["cob"] = {"Chicomuceltec", 3307204, "myn", Latn} + +m["coc"] = {"Cocopa", 33044, "nai-yuc", Latn} + +m["cod"] = {"Cocama", 33317, "tup", Latn} + +m["coe"] = {"Koreguaje", 3198924, "sai-tuc", Latn} + +m["cof"] = {"Tsafiki", 2567055, "sai-bar", Latn} + +m["cog"] = {"Chong", 3914630, "mkh-pea", {"Thai", "Khmr"}, sort_key = {from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["coh"] = {"Chichonyi-Chidzihana-Chikauma", 12629011, "bnt-mij", Latn} + +m["coj"] = {"Cochimi", 3915551, "nai-yuc", Latn} + +m["cok"] = {"Santa Teresa Cora", 12641754, "azc", Latn} + +m["col"] = {"Columbia-Wenatchi", 3324744, "sal", Latn} + +m["com"] = {"Comanche", 32972, "azc-num", Latn} + +m["con"] = {"Cofán", 2669254, "qfa-iso", Latn} + +m["coo"] = {"Comox", 13583746, "sal", Latn} + +m["cop"] = { + "Coptic", + 36155, + "egx", + -- Don't include varieties in [[Module:etymology languages/data]] + {"Copt"}, + translit_module = "Copt-translit", + ancestors = {"egx-dem"}, + entry_name = {from = {"ˋ", GRAVE, MACRON, DIAER, OVERLINE}, to = {}}, + sort_key = "cop-sortkey" +} + +m["coq"] = {"Coquille", 12953452, "ath-pco", Latn} + +m["cot"] = {"Caquinte", 3915557, "awd", Latn} + +m["cou"] = {"Wamey", 36935, "alv-ten", Latn} + +m["cov"] = {"Cao Miao", 2936935, "qfa-tak"} + +m["cow"] = {"Cowlitz", 3001877, "sal", Latn} + +m["cox"] = {"Nanti", 15342275, "awd", Latn} + +m["coy"] = {"Coyaima", 56450, "sai-car", Latn} + +m["coz"] = {"Chochotec", 2964262, "omq-pop", Latn} + +m["cpa"] = {"Palantla Chinantec", 5100112, "omq-chi", Latn} + +m["cpb"] = {"Ucayali-Yurúa Ashéninka", 3501858, "awd", Latn} + +m["cpc"] = {"Ajyíninka Apurucayali", 3327405, "awd", Latn} + +m["cpg"] = { + "Cappadocian Greek", + 853414, + "grk", + {"polytonic", "fa-Arab"}, + ancestors = {"grc"}, + translit_module = "grc-translit", + sort_key = { -- Keep this synchronized with el, grc, pnt + from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"}, + to = {"α", "ε", "η", "ι", "ο", "υ", "ω", "ρ", "σ"} + } +} + +m["cpi"] = {"Chinese Pidgin English", 3435078, "crp", Latn, ancestors = {"en"}} + +m["cpn"] = {"Cherepon", 35181, "alv-gng", Latn} + +m["cpo"] = {"Kpee", 6435722, "dmn-jje"} + +m["cps"] = {"Capiznon", 2937525, "phi", Latn} + +m["cpu"] = {"Pichis Ashéninka", 7190661, "awd", Latn} + +m["cpx"] = {"Puxian", 56583, "zhx-min-hai", {"Hani"}} + +m["cpy"] = {"South Ucayali Ashéninka", 3501868, "awd", Latn} + +m["cqd"] = {"Chuanqiandian Cluster Miao", 3307894, "hmn", {"Latn", "Plrd"}} + +m["cra"] = {"Chara", 5073694, "omv", Latn} + +m["crb"] = {"Island Carib", 3450735, "awd-taa", Latn, ancestors = {"awd-taa-pro"}} + +m["crc"] = {"Lonwolwol", 3259216, "poz-oce", Latn} + +m["crd"] = {"Coeur d'Alene", 32915, "sal", Latn} + +m["crf"] = {"Caramanta", 3504195, "sai-chc", Latn} + +m["crg"] = {"Michif", 13315, "qfa-mix", Latn, ancestors = {"cr", "fr"}} + +m["crh"] = {"Crimean Tatar", 33357, "trk-kcu", {"Latn", "Cyrl"}} + +m["cri"] = {"Sãotomense", 36536, "crp", Latn, ancestors = {"pt"}} + +m["crj"] = {"Southern East Cree", 12953464, "alg", {"Cans"}, ancestors = {"cr"}, translit_module = "cr-translit"} + +m["crk"] = {"Plains Cree", 56699, "alg", {"Cans", "Latn"}, ancestors = {"cr"}} + +m["crl"] = {"Northern East Cree", 12642195, "alg", {"Cans"}, ancestors = {"cr"}, translit_module = "cr-translit"} + +m["crm"] = {"Moose Cree", 3446671, "alg", {"Cans"}, ancestors = {"cr"}} + +m["crn"] = {"Cora", 12953454, "azc", Latn} + +m["cro"] = {"Crow", 1207611, "sio-mor", Latn} + +m["crq"] = {"Iyo'wujwa Chorote", 3540927, "sai-mtc", Latn} + +m["crr"] = {"Carolina Algonquian", 16113723, "alg-eas", Latn} + +m["crs"] = {"Seychellois Creole", 34015, "crp", Latn, ancestors = {"fr"}} + +m["crt"] = {"Iyojwa'ja Chorote", 3504118, "sai-mtc", Latn} + +m["crv"] = {"Chaura", 2605680, "aav-nic"} + +m["crw"] = {"Chrau", 5105629, "mkh-ban", Latn} + +m["crx"] = {"Carrier", 12953431, "ath-nor", {"Latn", "Cans"}} + +m["cry"] = {"Cori", 35204, "nic-plc", Latn} + +m["crz"] = {"Cruzeño", 2967636, "nai-chu", Latn} + +m["csa"] = {"Chiltepec Chinantec", 12953435, "omq-chi", Latn} + +m["csb"] = {"Kashubian", 33690, "zlw-lch", Latn, ancestors = {"zlw-pom"}} + +m["csc"] = { + "Catalan Sign Language", 35768, "sgn", Latn -- when documented +} + +m["csd"] = {"Chiangmai Sign Language", 5095211, "sgn"} + +m["cse"] = { + "Czech Sign Language", 5201809, "sgn", Latn -- when documented +} + +m["csf"] = { + "Cuban Sign Language", 5192046, "sgn", Latn -- when documented +} + +m["csg"] = { + "Chilean Sign Language", 3322112, "sgn", Latn -- when documented +} + +m["csh"] = {"Asho Chin", 12627282, "tbq-kuk"} + +m["csi"] = {"Coast Miwok", 2981109, "nai-you", Latn} + +m["csj"] = {"Songlai Chin", 7561280, "tbq-kuk"} + +m["csk"] = {"Jola-Kasa", 3446622, "alv-jol", Latn} + +m["csl"] = {"Chinese Sign Language", 1094190, "sgn"} + +m["csm"] = {"Central Sierra Miwok", 2944443, "nai-you", Latn} + +m["csn"] = { + "Colombian Sign Language", 2748229, "sgn", Latn -- when documented +} + +m["cso"] = {"Sochiapam Chinantec", 7550388, "omq-chi", Latn} + +m["csq"] = {"Croatian Sign Language", 3507506, "sgn"} + +m["csr"] = { + "Costa Rican Sign Language", 5174901, "sgn", Latn -- when documented +} + +m["css"] = {"Southern Ohlone", 25559664, "nai-you", Latn} + +m["cst"] = {"Northern Ohlone", 25559666, "nai-you", Latn} + +m["csv"] = {"Sumtu Chin", 7638087, "tbq-kuk"} + +m["csw"] = {"Swampy Cree", 56696, "alg", {"Latn", "Cans"}, ancestors = {"cr"}} + +m["csy"] = {"Siyin Chin", 7533375, "tbq-kuk"} + +m["csz"] = {"Coos", 3126783, "nai-coo", Latn} + +m["cta"] = {"Tataltepec Chatino", 7687853, "omq-cha", Latn} + +m["ctc"] = {"Chetco-Tolowa", 12628946, "ath-pco", Latn} + +m["ctd"] = {"Tedim Chin", 56357, "tbq-kuk", {"Latn", "Pauc"}} + +m["cte"] = {"Tepinapa Chinantec", 12953443, "omq-chi", Latn} + +m["ctg"] = {"Chittagonian", 33173, "inc-eas", {"Beng"}, ancestors = {"inc-obn"}} + +m["cth"] = {"Thaiphum Chin", 16912048, "tbq-kuk"} + +m["ctl"] = {"Tlacoatzintepec Chinantec", 12643657, "omq-chi", Latn} + +m["ctm"] = {"Chitimacha", 1294227, "qfa-iso", Latn} + +m["ctn"] = {"Chhintange", 32994, "sit-kie", Deva} + +m["cto"] = {"Emberá-Catío", 3052039, "sai-chc", Latn} + +m["ctp"] = {"Western Highland Chatino", 32861734, "omq-cha", Latn, sort_key = {from = {"á", "é", "í", "ó", "ú"}, to = {"a", "e", "i", "o", "u"}}, entry_name = {from = {"[¹²³⁴⁵]"}, to = {}}} + +m["cts"] = {"Northern Catanduanes Bicolano", 7130477, "phi", Latn} + +m["ctt"] = {"Wayanad Chetti", 7975850, "dra", {"Taml"}} + +m["ctu"] = {"Chol", 35179, "myn", Latn} + +m["ctz"] = {"Zacatepec Chatino", 8063754, "omq-cha", Latn} + +m["cua"] = {"Cua", 3441115, "mkh-ban", Latn} + +m["cub"] = {"Cubeo", 3006705, "sai-tuc", Latn} + +m["cuc"] = {"Usila Chinantec", 7901979, "omq-chi", Latn} + +m["cug"] = {"Cung", 35194, "nic-bbe", Latn} + +m["cuh"] = {"Chuka", 12952344, "bnt-kka", Latn} + +m["cui"] = {"Cuiba", 2980421, "sai-guh", Latn} + +m["cuj"] = {"Mashco Piro", 3446596, "awd", Latn} + +m["cuk"] = {"Kuna", 12953659, "cba", Latn} + +m["cul"] = {"Culina", 2475442, "auf", Latn} + +m["cuo"] = {"Cumanagoto", 5193784, "sai-cpc", Latn} + +m["cup"] = {"Cupeño", 143130, "azc-cup", Latn} + +m["cuq"] = {"Cun", 2475478, "qfa-lic", Latn} + +m["cur"] = {"Chhulung", 5116126, "sit-kie", Deva} + +m["cut"] = {"Teutila Cuicatec", 12953453, "omq-cui", Latn} + +m["cuu"] = {"Tai Ya", 3441122, "qfa-tak", Latn} + +m["cuv"] = {"Cuvok", 3515056, "cdc-cbm", Latn} + +m["cuw"] = {"Chukwa", 12629033, "sit-kic"} + +m["cux"] = {"Tepeuxila Cuicatec", 20527242, "omq-cui", Latn} + +m["cuy"] = {"Cuitlatec", 2030998, "qfa-iso", Latn} + +m["cvg"] = {"Chug", 47683644, "sit-khb"} + +m["cvn"] = {"Valle Nacional Chinantec", 12953442, "omq-chi", Latn} + +m["cwa"] = {"Kabwa", 6344537, "bnt-lok", Latn} + +m["cwb"] = {"Maindo", 11002891, "bnt-mak", Latn, ancestors = {"chw"}} + +m["cwd"] = {"Woods Cree", 56305, "alg", {"Latn", "Cans"}, ancestors = {"cr"}} + +m["cwe"] = {"Kwere", 779632, "bnt-ruv", Latn} + +m["cwg"] = {"Chewong", 646718, "mkh-asl", Latn} + +m["cwt"] = {"Kuwaataay", 35699, "alv-jol", Latn} + +m["cya"] = {"Nopala Chatino", 15616302, "omq-cha", Latn} + +m["cyb"] = {"Cayubaba", 3183382, "qfa-iso", Latn} + +m["cyo"] = {"Cuyunon", 33153, "phi", Latn} + +m["czh"] = { + "Huizhou", + 56546, + "zhx", + {"Hani"}, -- ? + ancestors = {"ltc"} +} + +m["czk"] = {"Knaanic", 56384, "zlw", {"Hebr"}, ancestors = {"zlw-ocs"}} + +m["czn"] = {"Zenzontepec Chatino", 603106, "omq-cha", Latn} + +m["czo"] = {"Min Zhong", 56435, "zhx-min-shn", {"Hani"}} + +m["czt"] = {"Zotung Chin", 8074599, "tbq-kuk", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/d.lua b/wiktra/wikt/translit/languages/data3/d.lua new file mode 100644 index 0000000..5403450 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/d.lua @@ -0,0 +1,608 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +-- Puncuation to be used for standardChars field +local PUNCTUATION = " !#$%&*+,-./:;<=>?@^_`|~'()" + +local Deva = {"Deva"} +local Latn = {"Latn"} + +local m = {} + +m["daa"] = {"Dangaléat", 942591, "cdc-est", Latn} + +m["dac"] = {"Dambi", 12629491, "poz-ocw", Latn} + +m["dad"] = {"Marik", 6763404, "poz-ocw", Latn} + +m["dae"] = {"Duupa", 35263, "alv-dur", Latn} + +m["dag"] = {"Dagbani", 32238, "nic-dag", Latn} + +m["dah"] = {"Gwahatike", 5623246, "ngf-fin", Latn} + +m["dai"] = {"Day", 35163, "alv-mbd", Latn} + +m["daj"] = {"Dar Fur Daju", 56370, "sdv-daj", Latn} + +m["dak"] = {"Dakota", 530384, "sio-dkt", Latn} + +m["dal"] = {"Dahalo", 35143, "cus", Latn} + +m["dam"] = {"Damakawa", 1158134, "nic-knn", Latn} + +m["dao"] = {"Daai Chin", 860029, "tbq-kuk", Latn} + +m["daq"] = {"Dandami Maria", 12952805, "dra", ancestors = {"gon"}} + +m["dar"] = {"Dargwa", 32332, "cau-drg", {"Cyrl"}, translit_module = "dar-translit", override_translit = true} + +m["das"] = {"Daho-Doo", 3915369, "kro-wee", Latn} + +m["dau"] = {"Dar Sila Daju", 7514020, "sdv-daj", Latn} + +m["dav"] = {"Taita", 2387274, "bnt-cht", Latn} + +m["daw"] = {"Davawenyo", 5228174, "phi", Latn} + +m["dax"] = {"Dayi", 10467281, "aus-yol", Latn} + +m["daz"] = {"Dao", 5221513, "ngf", Latn} + +m["dba"] = {"Bangi Me", 1982696, "qfa-iso", Latn} + +m["dbb"] = {"Deno", 56275, "cdc-wst", Latn} + +m["dbd"] = {"Dadiya", 3914436, "alv-wjk", Latn} + +m["dbe"] = {"Dabe", 5207451, "paa-tkw", Latn} + +m["dbf"] = {"Edopi", 12953516, "paa-lkp", Latn} + +m["dbg"] = {"Dogul Dom", 3912880, "nic-npd", Latn} + +m["dbi"] = {"Doka", 3913293, "nic-plc", Latn} + +m["dbj"] = {"Ida'an", 3041552, "poz-san", Latn} + +m["dbl"] = {"Dyirbal", 35465, "aus-dyb", Latn} + +m["dbm"] = {"Duguri", 7194057, "nic-jrw", Latn} + +m["dbn"] = {"Duriankere", 5316627, "ngf-sbh", Latn} + +m["dbo"] = {"Dulbu", 5313310, "nic-jrn", Latn} + +m["dbp"] = {"Duwai", 56301, "cdc-wst", Latn} + +m["dbq"] = {"Daba", 3913342, "cdc-cbm", Latn} + +m["dbr"] = {"Dabarre", 3447286, "cus"} + +m["dbt"] = {"Ben Tey", 4886561, "nic-nwa", Latn} + +m["dbu"] = {"Bondum Dom Dogon", 3912758, "nic-npd", Latn} + +m["dbv"] = {"Dungu", 5315230, "nic-kau", Latn} + +m["dbw"] = {"Bankan Tey Dogon", 4856243, "nic-nwa", Latn} + +m["dby"] = {"Dibiyaso", 5272268, "ngf", Latn} + +m["dcc"] = {"Deccani", 669431, "inc-hnd", {"ur-Arab"}, ancestors = {"ur"}} + +m["dcr"] = {"Negerhollands", 1815830, "crp", Latn, ancestors = {"nl"}} + +m["dda"] = {"Dadi Dadi", nil, "aus-pam", Latn} + +m["ddd"] = {"Dongotono", 56676, "sdv-lma"} + +m["dde"] = {"Doondo", 11003401, "bnt-kng", Latn} + +m["ddg"] = {"Fataluku", 35353, "qfa-tap", Latn} + +m["ddi"] = {"Diodio", 3028668, "poz-ocw", Latn} + +m["ddj"] = {"Jaru", 3162806, "aus-pam", Latn} + +m["ddn"] = {"Dendi", 35164, "son", Latn} + +m["ddo"] = {"Tsez", 34033, "cau-tsz", {"Cyrl"}, translit_module = "ddo-translit"} + +m["ddr"] = {"Dhudhuroa", 5269842, "aus-pam", Latn} + +m["dds"] = {"Donno So Dogon", 1234776, "nic-dge", Latn} + +m["ddw"] = {"Dawera-Daweloor", 5242304, "poz-tim", Latn} + +m["dec"] = {"Dagik", 35125, "alv-tal", Latn} + +m["ded"] = {"Dedua", 5249850, "ngf", Latn} + +m["dee"] = {"Dewoin", 3914892, "kro-wkr", Latn} + +m["def"] = {"Dezfuli", 4115412, "ira-swi"} + +m["deg"] = {"Degema", 35182, "alv-dlt", Latn} + +m["deh"] = {"Dehwari", 5704314, "ira-swi", ancestors = {"fa"}} + +m["dei"] = {"Demisa", 56380, "paa-egb", Latn} + +m["dek"] = { -- called "unattested alleged language" by Wikipedia + "Dek", 5252754, nil, Latn +} + +m["dem"] = {"Dem", 5254989, "paa", Latn} + +m["den"] = {"Slavey", 13272, "ath-nor", Latn} + +m["dep"] = {"Pidgin Delaware", nil, "crp", Latn, ancestors = {"unm"}} + +-- deq is not included, see [[WT:LT]] + +m["der"] = {"Deori", 56478, "tbq-bdg", {"Beng", "Latn"}} + +m["des"] = {"Desano", 962392, "sai-tuc", Latn} + +m["dev"] = {"Domung", 5291378, "ngf-fin", Latn} + +m["dez"] = {"Dengese", 2909984, "bnt-tet", Latn} + +m["dga"] = {"Southern Dagaare", 35159, "nic-mre", Latn} + +m["dgb"] = {"Bunoge", 4985178, "nic-dgw", Latn} + +m["dgc"] = {"Casiguran Dumagat Agta", 5313599, "phi", Latn} + +m["dgd"] = {"Dagaari Dioula", 11153465, "nic-mre", Latn} + +m["dge"] = {"Degenan", 5251770, "ngf-fin", Latn} + +m["dgg"] = {"Doga", 3033726, "poz-ocw", Latn} + +m["dgh"] = {"Dghwede", 56293, "cdc-cbm", Latn} + +m["dgi"] = {"Northern Dagara", 11004218, "nic-mre", Latn} + +m["dgk"] = {"Dagba", 12952357, "csu-sar", Latn} + +m["dgn"] = {"Dagoman", 10465931, "aus-yng", Latn} + +m["dgo"] = {"Hindi Dogri", nil, "him", {"Deva", "Arab", "Takr"}, ancestors = {"doi"}} + +m["dgr"] = {"Dogrib", 20979, "ath-nor", Latn} + +m["dgs"] = {"Dogoso", 35343, "nic-gur"} + +m["dgt"] = {"Ntra'ngith", 6983809, "aus-pam", Latn} + +-- dgu is not a language; see [[w:Dhekaru]] + +m["dgw"] = {"Daungwurrung", 5228050, "aus-pam", Latn} + +m["dgx"] = {"Doghoro", 12952392, "ngf", Latn} + +m["dgz"] = {"Daga", 5208442, "ngf", Latn} + +m["dhg"] = {"Dhangu", 5268960, "aus-yol", Latn} + +m["dhi"] = {"Dhimal", 35229, "sit-dhi", Deva} + +m["dhl"] = {"Dhalandji", 5268787, "aus-psw", Latn} + +m["dhm"] = {"Zemba", 3502283, "bnt-swb", Latn, ancestors = {"hz"}} + +m["dhn"] = {"Dhanki", 5268992, "inc-bhi"} + +m["dho"] = {"Dhodia", 5269658, "inc-bhi", Deva} + +m["dhr"] = {"Tharrgari", 10470289, "aus-psw", Latn} + +m["dhs"] = {"Dhaiso", 11001788, "bnt-kka", Latn} + +m["dhu"] = {"Dhurga", 1285318, "aus-yuk", Latn} + +m["dhv"] = {"Drehu", 3039319, "poz-occ", Latn} + +m["dhw"] = {"Danuwar", 3522797, "inc-bhi", Deva} + +m["dhx"] = {"Dhungaloo", 16960599, "aus-pam", Latn} + +m["dia"] = {"Dia", 3446591, "qfa-tor", Latn} + +m["dib"] = {"South Central Dinka", 35154, "sdv-dnu", Latn, ancestors = {"din"}} + +m["dic"] = {"Lakota Dida", 11001730, "kro-did", Latn} + +m["did"] = {"Didinga", 56365, "sdv", Latn} + +m["dif"] = {"Dieri", 25559563, "aus-kar", Latn} + +m["dig"] = {"Digo", 3362072, "bnt-mij", Latn} + +-- "dih" IS SPLIT INTO nai-ipa, nai-kum, nai-tip, SEE WT:LT + +m["dii"] = {"Dimbong", 35196, "bnt-baf", Latn} + +m["dij"] = {"Dai", 5209056, "poz-tim"} + +m["dik"] = {"Southwestern Dinka", 36540, "sdv-dnu", Latn, ancestors = {"din"}} + +m["dil"] = {"Dilling", 35152, "nub-hil", Latn} + +m["dim"] = {"Dime", 35311, "omv-aro"} + +m["din"] = {"Dinka", 56466, "sdv-dnu", Latn} + +m["dio"] = {"Dibo", 3914891, "alv-ngb", Latn} + +m["dip"] = {"Northeastern Dinka", 36246, "sdv-dnu", Latn, ancestors = {"din"}} + +m["dir"] = {"Dirim", 11130804, "nic-dak", Latn} + +m["dis"] = {"Dimasa", 56664, "tbq-bdg", {"Latn", "Beng"}} + +m["diu"] = {"Gciriku", 3780954, "bnt-kav", Latn} + +m["diw"] = {"Northwestern Dinka", 36249, "sdv-dnu", Latn, ancestors = {"din"}} + +m["dix"] = {"Dixon Reef", 5284967, "poz-vnc", Latn} + +m["diy"] = {"Diuwe", 5283765, "ngf"} + +m["diz"] = {"Ding", 35202, "bnt-bdz", Latn} + +m["dja"] = {"Djadjawurrung", 5285190, "aus-pam", Latn} + +m["djb"] = {"Djinba", 5285351, "aus-yol", Latn} + +m["djc"] = {"Dar Daju Daju", 5209890, "sdv-daj", Latn} + +m["djd"] = {"Jaminjung", 6147825, "aus-mir", Latn} + +m["dje"] = {"Zarma", 36990, "son", {"Latn", "Arab", "Brai"}} + +m["djf"] = {"Djangun", 10474818, "aus-pmn", Latn} + +m["dji"] = {"Djinang", 5285350, "aus-yol", Latn} + +m["djj"] = {"Ndjébbana", 5285274, "aus-arn", Latn} + +m["djk"] = {"Aukan", 2659044, "crp", {"Latn", "Afak"}, ancestors = {"en"}} + +m["djl"] = {"Djiwarli", nil, "aus-psw", Latn} + +m["djm"] = {"Jamsay", 3913290, "nic-pld", Latn} + +m["djn"] = {"Djauan", 13553748, "aus-gun", Latn} + +m["djo"] = {"Jangkang", 12952388, "day"} + +m["djr"] = {"Djambarrpuyngu", 3915679, "aus-yol", Latn} + +m["dju"] = {"Kapriman", 6367199, "paa-spk", Latn} + +m["djw"] = {"Djawi", 3913844, "aus-nyu", Latn, ancestors = {"bcj"}} + +m["dka"] = {"Dakpa", 3695189, "sit-ebo", {"Tibt"}} + +m["dkk"] = {"Dakka", 5209962, "poz-ssw"} + +m["dkr"] = {"Kuijau", 13580777, "poz-bnn"} + +m["dks"] = {"Southeastern Dinka", 36538, "sdv-dnu", Latn, ancestors = {"din"}} + +m["dkx"] = {"Mazagway", 6798209, "cdc-cbm", Latn} + +m["dlg"] = {"Dolgan", 32878, "trk-sib", {"Cyrl"}} + +m["dlk"] = {"Dahalik", 32260, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit"} + +m["dlm"] = {"Dalmatian", 35527, "roa-itd", Latn} + +m["dln"] = {"Darlong", 5224029, "tbq-kuk", Latn} + +m["dma"] = {"Duma", 35319, "bnt-nze", Latn} + +m["dmb"] = {"Mombo Dogon", 6897074, "nic-dgw", Latn} + +m["dmc"] = {"Gavak", 5277406, "ngf-mad", Latn} + +m["dmd"] = {"Madhi Madhi", 6727353, "aus-pam", Latn} + +m["dme"] = {"Dugwor", 56313, "cdc-cbm", Latn} + +m["dmf"] = {"Medefaidrin", 1519764, "art", {"Medf"}, type = "appendix-constructed"} + +m["dmg"] = {"Upper Kinabatangan", 16109975, "poz-san", Latn} + +m["dmk"] = {"Domaaki", 32900, "inc-dar"} + +m["dml"] = {"Dameli", 32288, "inc-dar"} + +m["dmm"] = {"Dama (Nigeria)", 5211865, "alv-mbm", Latn} + +m["dmo"] = {"Kemezung", 35562, "nic-bbe", Latn} + +m["dmr"] = {"East Damar", 5328200, "poz-cet", Latn} + +m["dms"] = {"Dampelas", 5212928, "poz-tot", Latn} + +m["dmu"] = {"Dubu", 7692059, "paa-pau", Latn} + +m["dmv"] = {"Dumpas", 12953512, "poz-san", Latn} + +m["dmw"] = {"Mudburra", 6931573, "aus-pam", Latn} + +m["dmx"] = {"Dema", 3553423, "bnt-sho", Latn} + +m["dmy"] = {"Demta", 14466283, "paa-sen", Latn} + +m["dna"] = {"Upper Grand Valley Dani", 12952361, "ngf", Latn} + +m["dnd"] = {"Daonda", 5221528, "paa-brd", Latn} + +m["dne"] = {"Ndendeule", 6983725, "bnt-mbi", Latn} + +m["dng"] = {"Dungan", 33050, "zhx", {"Cyrl", "Hani", "Arab"}, ancestors = {"cmn"}, translit_module = "dng-translit"} + +m["dni"] = {"Lower Grand Valley Dani", 12635807, "ngf", Latn} + +m["dnj"] = {"Dan", 1158971, "dmn-mda", Latn} + +m["dnk"] = {"Dengka", 5256954, "poz-tim", Latn} + +m["dnn"] = {"Dzuun", 10973260, "dmn-smg"} + +m["dno"] = {"Ndrulo", 60785094, "csu-lnd"} + +m["dnr"] = {"Danaru", 5214932, "ngf-mad", Latn} + +m["dnt"] = {"Mid Grand Valley Dani", 12952359, "ngf", Latn} + +m["dnu"] = {"Danau", 5013745, "mkh-pal"} + +m["dnv"] = {"Danu", 5221251, "tbq-brm", ancestors = {"obr"}} + +m["dnw"] = {"Western Dani", 7987774, "ngf", Latn} + +m["dny"] = {"Dení", 56562, "auf", Latn} + +m["doa"] = {"Dom", 5289770, "ngf", Latn} + +m["dob"] = {"Dobu", 952133, "poz-ocw", Latn} + +m["doc"] = {"Northern Kam", 17195499, "qfa-tak", Latn} + +m["doe"] = {"Doe", 5288055, "bnt-ruv", Latn} + +m["dof"] = {"Domu", 5291375, "ngf", Latn} + +m["doh"] = {"Dong", 3438405, "nic-dak", Latn} + +m["doi"] = { + "Dogri", + 32730, + "him", + {"Deva", "Takr", "fa-Arab", "Dogr"}, + translit_module = "hi-translit" -- for now +} + +m["dok"] = {"Dondo", 5295571, "poz-tot", Latn} + +m["dol"] = {"Doso", 4167202, "paa", Latn} + +m["don"] = {"Doura", 7829037, "poz-ocw", Latn} + +m["doo"] = {"Dongo", 35303, "nic-mbc", Latn} + +m["dop"] = {"Lukpa", 3258739, "nic-gne", Latn} + +m["doq"] = { + "Dominican Sign Language", 5290820, "sgn", Latn -- when documented +} + +m["dor"] = {"Dori'o", 3037084, "poz-sls", Latn} + +m["dos"] = {"Dogosé", 3913314, "nic-gur", Latn} + +m["dot"] = {"Dass", 3441293, "cdc-wst", Latn} + +m["dov"] = {"Toka-Leya", 11001779, "bnt-bot", Latn, ancestors = {"toi"}} + +m["dow"] = {"Doyayo", 35299, "alv-dur", Latn} + +m["dox"] = {"Bussa", 35123, "cus", Latn} + +m["doy"] = {"Dompo", 35270, "alv-gng", Latn} + +m["doz"] = {"Dorze", 56336, "omv-nom", Latn} + +m["dpp"] = {"Papar", 7132487, "poz-san", Latn} + +m["drb"] = {"Dair", 12952360, "nub-hil", Latn} + +m["drc"] = {"Minderico", 6863806, "roa-ibe", Latn, ancestors = {"pt"}} + +m["drd"] = {"Darmiya", 5224058, "sit-alm"} + +m["drg"] = {"Rungus", 6897407, "poz-san", Latn} + +m["dri"] = {"Lela", 3914004, "nic-knn", Latn} + +m["drl"] = {"Baagandji", 5223941, "aus-pam", Latn} + +m["drn"] = {"West Damar", 3450459, "poz-tim", Latn} + +m["dro"] = {"Daro-Matu Melanau", 5224156, "poz-bnn", Latn} + +m["drq"] = {"Dura", 3449842, "sit-gma"} + +m["drs"] = {"Gedeo", 56622, "cus", {"Ethi"}} + +m["dru"] = {"Rukai", 49232, "map", Latn, ancestors = {"dru-pro"}} + +m["dry"] = {"Darai", 46995026, "inc-bhi", Deva} + +m["dsb"] = { + "Lower Sorbian", + 13286, + "wen", + Latn, + sort_key = {from = {"b́", "č", "ć", "ě", "ł", "ḿ", "ń", "ó", "ṕ", "ŕ", "š", "ś", "ẃ", "[žż]", "ź"}, to = {"bj", "c~", "c~~", "e~", "l*", "mj", "n~", "o", "pj", "r~", "s~", "s~~", "wj", "z~", "z~~"}}, -- ł comes before l in alphabetic order + standardChars = "A-PR-UWYZa-pr-uwyz0-9ÓóĆćČčĚ죳ŃńŔশŠšŹźŽž" .. PUNCTUATION +} + +m["dse"] = { + "Dutch Sign Language", 2201099, "sgn", Latn -- when documented +} + +m["dsh"] = {"Daasanach", 56637, "cus", Latn} + +m["dsi"] = {"Disa", 3914455, "csu-bgr", Latn} + +m["dsl"] = { + "Danish Sign Language", 2605298, "sgn", Latn -- when documented +} + +m["dsn"] = {"Dusner", 5316948, "poz-hce", Latn} + +m["dso"] = {"Desiya", 12629755, "inc-eas", {"Orya"}, ancestors = {"or"}} + +m["dsq"] = {"Tadaksahak", 36568, "son", {"Arab", "Latn"}} + +m["dta"] = {"Daur", 32430, "xgn", {"Latn", "Hani", "Cyrl", "Mong"}} + +m["dtb"] = {"Labuk-Kinabatangan Kadazan", 5330240, "poz-san", Latn} + +m["dtd"] = {"Ditidaht", 13728042, "wak", Latn} + +m["dth"] = { -- contrast 'rrt' + "Adithinngithigh", 4683034, "aus-pmn", Latn +} + +m["dti"] = {"Ana Tinga Dogon", 4750346, "qfa-dgn", Latn} + +m["dtk"] = {"Tene Kan Dogon", 11018863, "nic-pld", Latn} + +m["dtm"] = {"Tomo Kan Dogon", 11137719, "nic-pld", Latn} + +m["dto"] = {"Tommo So", 47012992, "nic-dge", Latn} + +m["dtp"] = {"Central Dusun", 5317225, "poz-san", Latn} + +m["dtr"] = {"Lotud", 6685078, "poz-san", Latn} + +m["dts"] = {"Toro So Dogon", 11003311, "nic-dge", Latn} + +m["dtt"] = {"Toro Tegu Dogon", 3913924, "nic-pld", Latn} + +m["dtu"] = {"Tebul Ure Dogon", 7692089, "qfa-dgn", Latn} + +m["dty"] = {"Doteli", 18415595, "inc-pah", Deva, translit_module = "ne-translit", ancestors = {"ne"}} + +m["dua"] = {"Duala", 33013, "bnt-saw", Latn} + +m["dub"] = {"Dubli", 5310792, "inc-bhi"} + +m["duc"] = {"Duna", 5314039, "paa", Latn} + +m["due"] = {"Umiray Dumaget Agta", 7881585, "phi", Latn} + +m["duf"] = {"Dumbea", 6983819, "poz-cln", Latn} + +m["dug"] = {"Chiduruma", 35614, "bnt-mij", Latn} + +m["duh"] = {"Dungra Bhil", 12953513, "inc-bhi", {"Deva", "Gujr"}} + +m["dui"] = {"Dumun", 5314004, "ngf-mad", Latn} + +m["duk"] = {"Uyajitaya", 7904085, "ngf-mad", Latn} + +m["dul"] = {"Alabat Island Agta", 3399709, "phi", Latn} + +m["dum"] = {"Middle Dutch", 178806, "gmw", Latn, ancestors = {"odt"}, entry_name = {from = {"[ĀÂ]", "[āâ]", "[ĒÊË]", "[ēêë]", "[ĪÎ]", "[īî]", "[ŌÔ]", "[ōô]", "[ŪÛ]", "[ūû]"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}} + +m["dun"] = {"Dusun Deyah", 2784033, "poz-bre", Latn} + +m["duo"] = {"Dupaningan Agta", 5315912, "phi", Latn} + +m["dup"] = {"Duano", 3040468, "poz-mly", Latn} + +m["duq"] = {"Dusun Malang", 3041711, "poz-bre", Latn} + +m["dur"] = {"Dii", nil, "alv-dur", Latn} + +m["dus"] = {"Dumi", 56315, "sit-kiw", Deva} + +m["duu"] = {"Drung", 56406, "sit-nng"} + +m["duv"] = {"Duvle", 56364, "paa-lkp", Latn} + +m["duw"] = {"Dusun Witu", 2381310, "poz-bre", Latn} + +m["dux"] = {"Duun", 3914880, "dmn-smg", Latn} + +m["duy"] = {"Dicamay Agta", 5272321, "phi", Latn} + +m["duz"] = {"Duli", 5313405, "alv-ada", Latn} + +m["dva"] = {"Duau", 5310448, "poz-ocw", Latn} + +m["dwa"] = {"Diri", 56286, "cdc-wst", Latn} + +m["dwr"] = {"Dawro", 12629647, "omv-nom", {"Ethi", "Latn"}} + +m["dwu"] = {"Dhuwal", nil, "aus-yol", Latn} + +m["dww"] = {"Dawawa", 5242286, "poz-ocw", Latn} + +m["dwy"] = {"Dhuwaya", nil, "aus-yol", Latn} + +m["dwz"] = {"Dewas Rai", 62663667, "inc-bhi"} + +m["dya"] = {"Dyan", 35340, "nic-gur", Latn} + +m["dyb"] = {"Dyaberdyaber", 5285185, "aus-nyu", Latn} + +m["dyd"] = {"Dyugun", 3913785, "aus-nyu", Latn} + +m["dyg"] = {"Villa Viciosa Agta", 12626611, "phi", Latn} + +m["dyi"] = {"Djimini", 35336, "alv-tdj", Latn} + +m["dym"] = {"Yanda Dogon", 8048316, "qfa-dgn", Latn} + +m["dyn"] = {"Dyangadi", 3913820, "aus-cww", Latn} + +m["dyo"] = {"Jola-Fonyi", 3507832, "alv-jol", Latn} + +m["dyu"] = {"Dyula", 32706, "dmn-man", Latn} + +m["dyy"] = {"Dyaabugay", 2591320, "aus-pmn", Latn} + +m["dza"] = {"Tunzu", 3915845, "nic-jer", Latn} + +m["dzg"] = {"Dazaga", 35244, "ssa-sah", Latn} + +m["dzl"] = {"Dzala", 56607, "sit-ebo", {"Tibt"}} + +m["dzn"] = {"Dzando", 5319622, "bnt-bun", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/e.lua b/wiktra/wikt/translit/languages/data3/e.lua new file mode 100644 index 0000000..a58dd77 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/e.lua @@ -0,0 +1,257 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["ebg"] = {"Ebughu", 35294, "nic-lcr", Latn} + +m["ebk"] = {"Eastern Bontoc", nil, "phi", Latn} + +m["ebr"] = {"Ebrié", 36644, "alv-ptn", Latn} + +m["ebu"] = {"Embu", 35318, "bnt-kka", Latn} + +m["ecr"] = {"Eteocretan", 35461, nil, {"Grek"}} + +m["ecs"] = { + "Ecuadorian Sign Language", 3436769, "sgn", Latn -- when documented +} + +m["ecy"] = {"Eteocypriot", 35309, nil, {"Cprt"}} + +m["eee"] = {"E", 35386, "qfa-mix", {"Hani", "Latn"}} + +m["efa"] = {"Efai", 3813297, "nic-ief", Latn} + +m["efe"] = {"Efe", 56354, "csu-mle", Latn} + +m["efi"] = {"Efik", 35377, "nic-ief", Latn} + +m["ega"] = {"Ega", 3914927, "alv", Latn} + +m["egl"] = {"Emilian", 1057898, "roa-git", Latn, wikimedia_codes = {"eml"}} + +m["ego"] = {"Eggon", 35300, "nic-pls", Latn} + +m["egy"] = {"Egyptian", 50868, "egx", {"Latinx", "Egyp", "Egyd"}} + +m["ehu"] = {"Ehueun", 3441392, "alv-nwd", Latn} + +m["eip"] = {"Eipomek", 5349839, "ngf", Latn} + +m["eit"] = {"Eitiep", 5350030, "qfa-tor", Latn} + +m["eiv"] = {"Askopan", 56324, "paa-nbo", Latn} + +m["eja"] = {"Ejamat", 6269820, "alv-jfe", Latn} + +m["eka"] = {"Ekajuk", 35250, "nic-eko", Latn} + +m["eke"] = {"Ekit", 3509628, "nic-ief", Latn} + +m["ekg"] = {"Ekari", 5350305, "ngf", Latn} + +m["eki"] = {"Eki", 5350418, "nic-ief", Latn} + +m["ekl"] = {"Kolhe", 6426945, "mun", Latn} + +m["ekm"] = {"Elip", 12952414, "nic-ymb", Latn} + +m["eko"] = {"Koti", 29930, "bnt-mak", Latn} + +m["ekp"] = {"Ekpeye", 35254, "alv-igb", Latn} + +m["ekr"] = {"Yace", 36901, "alv-ido", Latn} + +m["eky"] = {"Eastern Kayah", 25559417, "kar", {"Kali"}} + +m["ele"] = {"Elepi", 5359444, "qfa-tor", Latn} + +m["elh"] = {"El Hugeirat", 5351410, "nub-hil", Latn} + +m["eli"] = {"Nding", 36176, "alv-tal", Latn} + +m["elk"] = {"Elkei", 5364210, "qfa-tor", Latn} + +m["elm"] = {"Eleme", 3914427, "nic-ogo", Latn} + +m["elo"] = {"El Molo", 56719, "cus", Latn} + +m["elu"] = {"Elu", 3364594, "poz-aay", Latn} + +m["elx"] = {"Elamite", 35470, "qfa-iso", {"Xsux"}} + +m["ema"] = {"Emai", 35428, "alv-eeo", Latn} + +m["emb"] = {"Embaloh", 5369424, "poz", Latn} + +m["eme"] = {"Emerillon", 3588942, "tup-gua", Latn} + +m["emg"] = {"Eastern Meohang", 12952840, "sit-kie", {"Deva"}} + +m["emi"] = {"Mussau-Emira", 6943093, "poz-ocw", Latn} + +m["emk"] = {"Eastern Maninkakan", 11002130, "dmn-mnk", {"Latn", "Arab", "Nkoo"}} + +m["emm"] = {"Mamulique", 3285082, "nai-pak", Latn} + +m["emn"] = {"Eman", 5368975, "nic-tvc", Latn} + +m["emp"] = {"Northern Emberá", 2391297, "sai-chc", Latn} + +m["ems"] = {"Alutiiq", 27992, "ypk", Latn} + +m["emu"] = {"Eastern Muria", 12952883, "dra", ancestors = {"gon"}} + +m["emw"] = {"Emplawas", 5374265, "poz-tim", Latn} + +m["emx"] = {"Erromintxela", 1122188, "qfa-mix", Latn, ancestors = {"eu", "rom"}} + +m["emy"] = {"Epigraphic Mayan", 301355, "myn", {"Maya"}} + +m["ena"] = {"Apali", 3504201, "ngf-mad", Latn} + +m["enb"] = {"Markweeta", 56874, "sdv-nma", Latn} + +m["enc"] = {"En", 3504110, "qfa-buy", Latn} + +m["end"] = {"Ende", 2067656, "poz-cet", Latn} + +m["enf"] = {"Forest Enets", 30249597, "syd", Latn} + +m["enh"] = {"Tundra Enets", 25559411, "syd", Latn} + +m["enl"] = {"Enlhet", 15462671, "sai-mas", Latn} + +m["enm"] = {"Middle English", 36395, "gmw", Latn, ancestors = {"ang"}, entry_name = {from = {"[ĀÁ]", "[āá]", "[ǢǼ]", "[ǣǽ]", "Ċ", "ċ", "[ĒÉĖ]", "[ēéė]", "Ġ", "ġ", "[ĪÍ]", "[īí]", "[ŌÓ]", "[ōó]", "[ŪÚ]", "[ūú]", "[ȲÝ]", "[ȳý]", MACRON, ACUTE, DOTABOVE}, to = {"A", "a", "Æ", "æ", "C", "c", "E", "e", "G", "g", "I", "i", "O", "o", "U", "u", "Y", "y"}}} + +m["enn"] = {"Engenni", 3915365, "alv-dlt", Latn} + +m["eno"] = {"Enggano", 2669164, "poz", Latn} + +m["enq"] = {"Enga", 1143040, "paa-eng", Latn} + +m["enr"] = {"Emem", 5370369, "paa-pau"} + +m["enu"] = {"Enu", 5380858, "tbq-lol"} + +m["env"] = {"Enwan", 3438334, "alv-yek", Latn} + +m["enw"] = {"Enwang", 11134434, "nic-lcr", Latn} + +m["enx"] = {"Enxet", 15462609, "sai-mas", Latn} + +m["eot"] = {"Eotile", 3915347, "alv-ptn", Latn} + +m["epi"] = {"Epie", 35291, "alv-dlt", Latn} + +m["era"] = {"Eravallan", 5385061, "dra"} + +m["erg"] = {"Sie", 426254, "poz-occ", Latn} + +m["erh"] = {"Eruwa", 3441244, "alv-swd", Latn} + +m["eri"] = {"Ogea", 7079984, "ngf-mad", Latn} + +m["erk"] = {"South Efate", 3449070, "poz-vnc", Latn} + +m["ero"] = {"Horpa", 56854, "sit-rgy"} + +m["err"] = {"Erre", 10488401, "qfa-iso", Latn} + +m["ers"] = { + "Ersu", 12952417, "sit-qia", Latn -- also Ersu Shaba +} + +m["ert"] = {"Eritai", 56376, "paa-lkp", Latn} + +m["erw"] = {"Erokwanas", 5395296, "poz-hce", Latn} + +m["ese"] = {"Ese Ejja", 2980381, "sai-tac", Latn} + +m["esh"] = {"Eshtehardi", 12952418, "xme-ttc", {"fa-Arab", "Latn"}, ancestors = {"xme-ttc-sou"}} + +m["esi"] = {"North Alaskan Inupiatun", nil, "esx-inu", Latn} + +m["esk"] = {"Northwest Alaska Inupiatun", 25559714, "esx-inu", Latn} + +m["esl"] = {"Egyptian Sign Language", 5348443, "sgn"} + +m["esm"] = {"Esuma", 16927555, "alv-kwa", Latn} + +m["esn"] = { + "Salvadoran Sign Language", 7406492, "sgn", Latn -- when documented +} + +m["eso"] = { + "Estonian Sign Language", 3196221, "sgn", Latn -- when documented +} + +m["esq"] = {"Esselen", 1294243, "qfa-iso", Latn} + +m["ess"] = {"Central Siberian Yupik", 27993, "ypk", {"Cyrl"}} + +m["esu"] = {"Yup'ik", 21117, "ypk", Latn} + +m["esy"] = { + "Eskayan", 867086, "art", Latn -- also its own native script +} + +m["etb"] = {"Etebi", 11002851, "nic-ief", Latn} + +m["etc"] = {"Etchemin", 5402493, "alg-eas", Latn} + +m["eth"] = {"Ethiopian Sign Language", 3501903, "sgn"} + +m["etn"] = {"Eton (Vanuatu)", 3059362, "poz-oce", Latn} + +m["eto"] = {"Eton (Cameroon)", 35317, "bnt-btb", Latn} + +m["etr"] = {"Edolo", 5340184, "ngf", Latn} + +m["ets"] = {"Yekhee", 3915848, "alv-yek", Latn} + +m["ett"] = {"Etruscan", 35726, "qfa-tyn", {"Ital"}, translit_module = "Ital-translit"} + +m["etu"] = {"Ejagham", 35296, "nic-eko", Latn} + +m["etx"] = {"Eten", 3915392, "nic-beo", Latn} + +m["etz"] = {"Semimi", 10950308, "paa-mai", Latn} + +m["eve"] = {"Even", 29960, "tuw", {"Cyrl", "Latn"}, entry_name = {from = {"[Ӣ]", "[ӣ]", "[Ӯ]", "[ӯ]", MACRON, DOTBELOW, DOTABOVE}, to = {"И", "и", "У", "у"}}, translit_module = "eve-translit"} + +m["evh"] = {"Uvbie", 3441344, "alv-swd", Latn} + +m["evn"] = {"Evenki", 30004, "tuw", {"Cyrl"}, entry_name = {from = {"[Ӣ]", "[ӣ]", "[Ӯ]", "[ӯ]", MACRON, DOTBELOW, DOTABOVE}, to = {"И", "и", "У", "у"}}, translit_module = "evn-translit"} + +m["ewo"] = {"Ewondo", 35459, "bnt-btb", Latn} + +m["ext"] = {"Extremaduran", 30007, "roa-ibe", Latn} + +m["eya"] = {"Eyak", 27480, "xnd", Latn} + +m["eyo"] = {"Keiyo", 56856, "sdv-nma", Latn} + +m["eza"] = {"Ezaa", 11921436, "alv-igb", Latn, ancestors = {"izi"}} + +m["eze"] = {"Uzekwe", 3502244, "nic-ucn", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/f.lua b/wiktra/wikt/translit/languages/data3/f.lua new file mode 100644 index 0000000..1d44e4c --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/f.lua @@ -0,0 +1,199 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["faa"] = {"Fasu", 3446687, "paa-kut", Latn} + +m["fab"] = {"Annobonese", 34992, "crp", Latn, ancestors = {"pt"}} + +m["fad"] = {"Wagi", 7959569, "ngf-mad", Latn} + +m["faf"] = {"Fagani", 3063759, "poz-sls", Latn} + +m["fag"] = {"Finongan", 3450761, "ngf-fin", Latn} + +m["fah"] = {"Baissa Fali", 3446632, "nic-bco", Latn} + +m["fai"] = {"Faiwol", 3501773, "ngf-okk", Latn} + +m["faj"] = {"Faita", 976953, "ngf-mad", Latn} + +m["fak"] = {"Fang (Beboid)", 5433811, "nic-beb", Latn} + +m["fal"] = {"South Fali", 15637351, "alv-fli", Latn} + +m["fam"] = {"Fam", 35290, "nic-mmb", Latn} + +m["fan"] = {"Fang (Bantu)", 33484, "bnt-btb", Latn} + +m["fap"] = {"Palor", 36318, "alv-cng", Latn} + +m["far"] = {"Fataleka", 3067168, "poz-sls", Latn} + +-- "fat" IS TREATED AS "ak", SEE WT:LT + +m["fau"] = {"Fayu", 5439113, "paa-lkp", Latn} + +m["fax"] = {"Fala", 300402, "roa-ibe", Latn, ancestors = {"roa-opt"}} + +m["fay"] = {"Southwestern Fars", 5228140, "ira-swi"} + +m["faz"] = {"Northwestern Fars", 7060307, "ira-swi"} + +m["fbl"] = {"West Albay Bikol", 18603801, "phi", Latn} + +m["fcs"] = { + "Quebec Sign Language", 13193, "sgn", Latn -- when documented +} + +m["fer"] = {"Feroge", 35287, "nic-ser", Latn} + +m["ffi"] = {"Foia Foia", 8564176, "ngf", Latn} + +-- "ffm" IS TREATED AS "ff", SEE WT:LT + +m["fgr"] = {"Fongoro", 3437645, "csu", Latn} + +m["fia"] = {"Nobiin", 36503, "nub", Latn, ancestors = {"onw"}} + +m["fie"] = {"Fyer", 56273, "cdc-wst", Latn} + +-- "fil" IS TREATED AS "tl", SEE WT:LT + +m["fip"] = {"Fipa", 667747, "bnt-mwi", Latn} + +m["fir"] = {"Firan", 3915847, "nic-plc", Latn} + +m["fit"] = {"Meänkieli", 13357, "fiu-fin", Latn, ancestors = {"fi"}} + +m["fiw"] = {"Fiwaga", 5456292, "paa-kut", Latn} + +m["fkk"] = {"Kirya-Konzel", 6416310, "cdc-cbm", Latn} + +m["fkv"] = {"Kven", 165795, "fiu-fin", Latn, ancestors = {"fi"}} + +m["fla"] = {"Montana Salish", 3111983, "sal", Latn} + +m["flh"] = {"Foau", 5463819, "paa-lkp", Latn} + +m["fli"] = {"Fali", 56244, "cdc-cbm", Latn} + +m["fll"] = {"North Fali", 12952419, "alv-fli", Latn} + +m["fln"] = {"Flinders Island", 3915702, "aus-pmn", Latn} + +m["flr"] = {"Fuliiru", 7166821, "bnt-shh", Latn} + +m["fly"] = {"Tsotsitaal", 12643960, "crp", Latn, ancestors = {"af"}} + +m["fmp"] = {"Fe'fe'", 35276, "bai", Latn} + +m["fmu"] = {"Far Western Muria", 42589412, "dra", ancestors = {"gon"}} + +m["fng"] = {"Fanagalo", 35727, "crp", Latn, ancestors = {"zu"}} + +m["fni"] = {"Fania", 317642, "alv-bua", Latn} + +m["fod"] = {"Foodo", 5465566, "alv-gng", Latn} + +m["foi"] = {"Foi", 5464146, "paa-kut", Latn} + +m["fom"] = {"Foma", 5464911, "bnt-ske", Latn, ancestors = {"khy"}} + +m["fon"] = {"Fon", 33291, "alv-gbe", Latn} + +m["for"] = {"Fore", 3077126, "paa-kag", Latn} + +m["fos"] = {"Siraya", 716604, "map", Latn} + +m["fpe"] = {"Pichinglis", 35288, "crp", Latn, ancestors = {"en"}} + +m["fqs"] = {"Fas", 56320, "paa", Latn} + +-- "frc" IS TREATED AS "fr" (or as etymology-only), SEE WT:LT + +m["frd"] = {"Fordata", 5468035, "poz", Latn} + +m["frm"] = {"Middle French", 1473289, "roa-oil", Latn, ancestors = {"fro"}, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"}, to = {"a", "e", "i", "o", "u", "y", "c"}}} + +m["fro"] = {"Old French", 35222, "roa-oil", {"Latn", "Hebr"}, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"}, to = {"a", "e", "i", "o", "u", "y", "c"}}} + +m["frp"] = {"Franco-Provençal", 15087, "roa", Latn, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"}, to = {"a", "e", "i", "o", "u", "y", "c"}}} + +m["frq"] = {"Forak", 5467173, "ngf-fin", Latn} + +m["frr"] = {"North Frisian", 28224, "gmw-fri", Latn} + +-- "frs" IS NOT USED, SEE WT:LT + +m["frt"] = {"Fortsenal", 2666835, "poz-vnc", Latn} + +m["fse"] = { + "Finnish Sign Language", 33225, "sgn", Latn -- when documented +} + +m["fsl"] = { + "French Sign Language", 33302, "sgn-fsl", Latn -- when documented +} + +m["fss"] = { + "Finnish-Swedish Sign Language", 5450448, "sgn", Latn -- when documented +} + +-- "fub" IS TREATED AS "ff", SEE WT:LT + +-- "fuc" IS TREATED AS "ff", SEE WT:LT + +m["fud"] = {"East Futuna", 35334, "poz-pnp", Latn} + +-- "fue" IS TREATED AS "ff", SEE WT:LT + +-- "fuf" IS TREATED AS "ff", SEE WT:LT + +-- "fuh" IS TREATED AS "ff", SEE WT:LT + +-- "fui" IS TREATED AS "ff", SEE WT:LT + +m["fuj"] = {"Ko", 35693, "alv-hei", Latn} + +m["fum"] = {"Fum", 11011870, "nic-nka", Latn} + +m["fun"] = {"Fulniô", 774441, "qfa-iso", Latn} + +-- "fuq" IS TREATED AS "ff", SEE WT:LT + +m["fur"] = {"Friulian", 33441, "roa-rhe", Latn} + +m["fut"] = {"Futuna-Aniwa", 3064409, "poz-pnp", Latn} + +m["fuu"] = {"Furu", 3441160, "csu-bkr", Latn} + +-- "fuv" IS TREATED AS "ff", SEE WT:LT + +m["fuy"] = {"Fuyug", 3073472, "ngf", Latn} + +m["fvr"] = {"Fur", 33364, "ssa-fur", Latn} + +m["fwa"] = {"Fwâi", 3091331, "poz-cln", Latn} + +m["fwe"] = {"Fwe", 5511159, "bnt-bot", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/g.lua b/wiktra/wikt/translit/languages/data3/g.lua new file mode 100644 index 0000000..e7e301d --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/g.lua @@ -0,0 +1,696 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local COMMA = u(0x0313) -- combining smooth breathing +local REV_COMMA = u(0x0314) -- combining rough breathing +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) +local GRC_CIRC = u(0x0342) +local UNDERTIE = u(0x035C) -- actually "combining double breve below" +local RSQUO = u(0x2019) +local PSILI = u(0x1FBD) +local CORONIS = u(0x1FBF) + +-- Puncuation to be used for standardChars field +local PUNCTUATION = " !#$%&*+,-./:;<=>?@^_`|~'()" + +local Latn = {"Latn"} + +local m = {} + +m["gaa"] = {"Ga", 33287, "alv-gda", Latn} + +m["gab"] = {"Gabri", 3441237, "cdc-est", Latn} + +m["gac"] = {"Mixed Great Andamanese", nil, "qfa-adn", Latn} + +m["gad"] = { -- not to be confused with gdk, gdg + "Gaddang", 3438830, "phi", Latn +} + +m["gae"] = {"Warekena", 1091095, "awd-nwk", Latn} + +m["gaf"] = {"Gende", 3100425, "paa-kag", Latn} + +m["gag"] = {"Gagauz", 33457, "trk-ogz", {"Latn", "Cyrl"}, ancestors = {"trk-oat"}} + +m["gah"] = {"Alekano", 3441595, "paa-kag", Latn} + +m["gai"] = {"Borei", 6799756, "paa", Latn} + +m["gaj"] = {"Gadsup", 5516467, "paa-kag", Latn} + +m["gak"] = {"Gamkonora", 5520226, "paa-wpa", Latn} + +m["gal"] = {"Galoli", 35322, "poz-tim", Latn} + +m["gam"] = {"Kandawo", 6361369, "ngf", Latn} + +m["gan"] = {"Gan", 33475, "zhx", {"Hani"}, ancestors = {"ltc"}} + +m["gao"] = {"Gants", 5521529, "ngf-mad", Latn} + +m["gap"] = {"Gal", 5517742, "ngf-mad", Latn} + +m["gaq"] = {"Gata'", 3501920, "mun"} + +m["gar"] = {"Galeya", 5518509, "poz-ocw", Latn} + +m["gas"] = {"Adiwasi Garasia", 12953522, "inc-bhi", ancestors = {"bhb"}} + +m["gat"] = {"Kenati", 4219330, "paa-kag", Latn} + +m["gau"] = {"Kondekor", 12952433, "dra"} + +m["gaw"] = {"Nobonob", 11732205, "paa", Latn} + +m["gay"] = {"Gayo", 33286, "poz-nws", Latn} + +m["gba"] = {"Gbaya", 3099986, "alv-gba", Latn} + +m["gbb"] = {"Kaytetye", 6380709, "aus-rnd", Latn} + +m["gbd"] = {"Karadjeri", 3913837, "aus-pam", Latn} + +m["gbe"] = {"Niksek", 56375, "paa", Latn} + +m["gbf"] = {"Gaikundi", 5517032, "paa-spk", Latn} + +m["gbg"] = {"Gbanziri", 35306, "nic-nkg", Latn} + +m["gbh"] = {"Defi Gbe", 12952446, "alv-gbe", Latn} + +m["gbi"] = {"Galela", 3094570, "paa-wpa", Latn} + +m["gbj"] = {"Bodo Gadaba", 3347070, "mun", {"Orya"}} + +m["gbk"] = {"Gaddi", 17455500, "him", {"Deva", "Takr"}, translit_module = "hi-translit"} + +m["gbl"] = {"Gamit", 2731717, "inc-bhi", {"Deva", "Gujr"}} + +m["gbm"] = {"Garhwali", 33459, "inc-pah", {"Deva"}, translit_module = "hi-translit"} + +m["gbn"] = {"Mo'da", 12755683, "csu-bbk", Latn} + +m["gbo"] = {"Northern Grebo", 11157042, "kro-grb", Latn, ancestors = {"grb"}} + +m["gbp"] = {"Gbaya-Bossangoa", 11011295, "alv-gbw", Latn} + +m["gbq"] = {"Gbaya-Bozoum", 4952879, "alv-gbw", Latn} + +m["gbr"] = {"Gbagyi", 11015105, "alv-ngb", Latn} + +m["gbs"] = {"Gbesi Gbe", 12952448, "alv-pph", Latn} + +m["gbu"] = {"Gagadu", 35677, "aus-arn", Latn} + +m["gbv"] = {"Gbanu", 3914945, "alv-gbf", Latn} + +m["gbw"] = {"Gabi", 5515391, "aus-pam", Latn} + +m["gbx"] = {"Eastern Xwla Gbe", 18379975, "alv-pph", Latn} + +m["gby"] = {"Gbari", 3915451, "alv-ngb", Latn} + +m["gcc"] = {"Mali", 6743338, "paa-bng", Latn} + +m["gcd"] = {"Ganggalida", 3913765, "aus-tnk", Latn} + +m["gce"] = {"Galice", 20711, "ath-pco", Latn} + +m["gcf"] = {"Antillean Creole", 3006280, "crp", Latn, ancestors = {"fr"}} + +m["gcl"] = {"Grenadian Creole English", 4252500, "crp", Latn, ancestors = {"en"}} + +m["gcn"] = {"Gaina", 11732195, "ngf", Latn} + +m["gcr"] = {"Guianese Creole", 1363072, "crp", Latn, ancestors = {"fr"}} + +m["gct"] = {"Colonia Tovar German", 1138351, "gmw", Latn, ancestors = {"gsw"}} + +m["gdb"] = {"Ollari", 33906, "dra"} + +m["gdc"] = {"Gugu Badhun", 10510360, "aus-pam", Latn} + +m["gdd"] = {"Gedaged", 35292, "poz-ocw", Latn} + +m["gde"] = {"Gude", 3441230, "cdc-cbm", Latn} + +m["gdf"] = {"Guduf-Gava", 3441350, "cdc-cbm", Latn} + +m["gdg"] = { -- not to be confused with gad, gdk + "Ga'dang", 5515189, "phi" +} + +m["gdh"] = {"Gadjerawang", 3913817, "aus-jar", Latn} + +m["gdi"] = {"Gundi", 11137851, "nic-nkb", Latn} + +m["gdj"] = {"Kurtjar", 5619931, "aus-pmn", Latn} + +m["gdk"] = { -- not to be confused with gad, gdg + "Gadang", 56256, "cdc-est", Latn +} + +m["gdl"] = {"Dirasha", 56809, "cus", {"Ethi"}} + +m["gdm"] = {"Laal", 33436, nil, Latn} + +m["gdn"] = {"Umanakaina", 7881084, "ngf", Latn} + +m["gdo"] = {"Godoberi", 56515, "cau-ava", {"Cyrl"}} + +m["gdq"] = {"Mehri", 13361, "sem-sar", {"Arab", "Latn"}} + +m["gdr"] = {"Wipi", 8026711, "paa", Latn} + +m["gds"] = {"Ghandruk Sign Language", 15971577, "sgn"} + +m["gdt"] = {"Kungardutyi", 6444517, "aus-kar", Latn} + +m["gdu"] = {"Gudu", 3441172, "cdc-cbm", Latn} + +m["gdx"] = {"Godwari", 3540922, "inc-wes", ancestors = {"mwr"}} + +m["gea"] = {"Geruma", 3438789, "cdc-wst", Latn} + +m["geb"] = {"Kire", 11129733, "paa", Latn} + +m["gec"] = {"Gboloo Grebo", 11019342, "kro-grb", Latn, ancestors = {"grb"}} + +m["ged"] = {"Gade", 3914459, "alv-nup", Latn} + +m["geg"] = {"Gengle", 3438345, "alv-mye", Latn, ancestors = {"kow"}} + +m["geh"] = {"Hutterisch", 33385, "gmw", Latn, ancestors = {"bar"}} + +m["gei"] = {"Gebe", 3100032, "poz-hce", Latn} + +m["gej"] = {"Gen", 33450, "alv-gbe", Latn} + +m["gek"] = {"Gerka", 3441277, "cdc-wst", Latn} + +m["gel"] = {"Fakkanci", 36627, "nic-knn", Latn} + +m["geq"] = {"Geme", 3915851, "znd", Latn} + +m["ges"] = {"Geser-Gorom", 5553579, "poz-cma", Latn} + +m["gev"] = {"Viya", 7937974, "bnt-tso", Latn} + +m["gew"] = {"Gera", 3438725, "cdc-wst", Latn} + +m["gex"] = {"Garre", 56618, "cus", Latn} + +m["gey"] = {"Enya", 5381452, "bnt-mbe", Latn} + +m["gez"] = {"Ge'ez", 35667, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit"} + +m["gfk"] = {"Patpatar", 3368846, "poz-ocw", Latn} + +m["gft"] = {"Gafat", 56910, "sem-eth", {"Ethi", "Latn"}} + +m["gga"] = {"Gao", 3095228, "poz-ocw", Latn} + +m["ggb"] = {"Gbii", 3914390, "kro-wkr", Latn} + +m["ggd"] = {"Gugadj", 5615186, "aus-pmn", Latn} + +m["gge"] = {"Guragone", 5619801, "aus-arn", Latn} + +m["ggg"] = {"Gurgula", 5620032, "inc-wes", {"Arab"}, ancestors = {"mwr"}} + +m["ggk"] = {"Kungarakany", 6444516, "aus-arn", Latn} + +m["ggl"] = {"Ganglau", 5521140, "ngf-mad", Latn} + +m["ggn"] = {"Eastern Gurung", 12952472, "sit-tam", {"Deva", "Latn"}} + +m["ggt"] = {"Gitua", 3107865, "poz-ocw", Latn} + +m["ggu"] = {"Gban", 3913317, "dmn-nbe", Latn} + +m["ggw"] = {"Gogodala", 3512161, "paa-pag", Latn} + +m["gha"] = { + "Ghadames", 56747, "ber", Latn -- and other scripts? +} + +m["ghe"] = {"Southern Ghale", 12952453, "sit-tam", {"Deva"}} + +m["ghh"] = {"Northern Ghale", 22662104, "sit-tam", {"Deva"}} + +m["ghk"] = {"Geko Karen", 5530317, "kar"} + +m["ghl"] = { + "Ghulfan", 16885737, "nub-hil", Latn -- and others? +} + +m["ghn"] = {"Ghanongga", 3104772, "poz-ocw", Latn} + +m["gho"] = {"Ghomara", 35315, "ber"} + +m["ghr"] = {"Ghera", 22808992, "inc-hiw"} + +m["ghs"] = {"Guhu-Samane", 11732219, "ngf", Latn} + +m["ght"] = {"Kutang Ghale", 6448337, "sit-tam", {"Tibt"}} + +m["gia"] = {"Kitja", 1284877, "aus-jar", Latn} + +m["gib"] = {"Gibanawa", 12953530, "crp", Latn, ancestors = {"ha"}} + +m["gid"] = {"Gidar", 35265, "cdc-cbm", Latn} + +m["gie"] = {"Guébie", nil, "kro-did", Latn} + +m["gig"] = {"Goaria", 33269, "inc-wes", {"Arab"}, ancestors = {"mwr"}} + +m["gih"] = {"Githabul", nil, "aus-pam", Latn} + +m["gil"] = {"Gilbertese", 30898, "poz-mic", Latn} + +m["gim"] = {"Gimi (Goroka)", 11732209, "paa", Latn} + +m["gin"] = {"Hinukh", 33283, "cau-tsz", {"Cyrl"}, translit_module = "gin-translit"} + +m["gio"] = {"Gelao", 56401, "qfa-tak"} + +m["gip"] = {"Gimi (Austronesian)", 12952457, "poz-ocw"} + +m["giq"] = {"Green Gelao", 12953525, "qfa-gel"} + +m["gir"] = {"Red Gelao", 3100264, "qfa-gel"} + +m["gis"] = {"North Giziga", 3515084, "cdc-cbm"} + +m["git"] = {"Gitxsan", 3107862, "nai-tsi"} + +m["giu"] = {"Mulao", 11092831, "qfa-gel"} + +m["giw"] = {"White Gelao", 8843040, "qfa-gel"} + +m["gix"] = {"Gilima", 10977716, "nic-nkm", Latn} + +m["giy"] = {"Giyug", 5565906} + +m["giz"] = {"South Giziga", 3502232, "cdc-cbm"} + +m["gji"] = {"Geji", 3914890, "cdc-wst", Latn} + +m["gjk"] = {"Kachi Koli", 12953646, "inc-wes"} + +m["gjm"] = {"Gunditjmara", 6448731, "aus-pam"} + +m["gjn"] = {"Gonja", 35267, "alv-gng", Latn} + +m["gju"] = {"Gojri", 3241731, "inc-wes", ancestors = {"raj"}} + +m["gka"] = {"Guya", 11732221, "ngf-fin", Latn} + +m["gkd"] = {"Magɨ", nil, "ngf-mad", Latn} + +m["gke"] = {"Ndai", 6983667, "alv-mbm"} + +m["gkn"] = {"Gokana", 3075137, "nic-ogo", Latn} + +m["gko"] = {"Kok-Nar", 6426526, "aus-pmn", Latn} + +m["gkp"] = {"Guinea Kpelle", 11052867, "dmn-msw", ancestors = {"kpe"}} + +m["glc"] = {"Bon Gula", 289816, "alv-bua"} + +m["gld"] = {"Nanai", 13303, "tuw", {"Cyrl"}, entry_name = {from = {"[Ӣ]", "[ӣ]", "[Ӯ]", "[ӯ]", MACRON}, to = {"И", "и", "У", "у"}}} + +m["glh"] = {"Northwest Pashayi", 23713532, "inc-dar"} + +m["glj"] = {"Kulaal", 33360, "alv-bua"} + +m["glk"] = {"Gilaki", 33657, "ira-csp", {"fa-Arab"}} + +m["glo"] = {"Galambu", 2598797, "cdc-wst", Latn} + +m["glr"] = {"Glaro-Twabo", 3915313, "kro-wee"} + +m["glu"] = {"Gula", 5617176, "csu-bgr"} + +m["glw"] = {"Glavda", 3441285, "cdc-cbm", Latn} + +m["gly"] = {"Gule", 3120736, "ssa-kom"} + +m["gma"] = {"Gambera", 10502327, "aus-wor"} + +m["gmb"] = {"Gula'alaa", 3120733, "poz-sls", Latn} + +m["gmd"] = {"Mághdì", 3914475, "alv-bwj"} + +m["gmg"] = {"Magiyi", 16926155, "ngf-mad", Latn} + +m["gmh"] = {"Middle High German", 837985, "gmw", Latn, ancestors = {"goh"}, entry_name = {from = {"[ĀÂ]", "[āâ]", "[ĒÊË]", "[ēêë]", "[ĪÎ]", "[īî]", "[ŌÔ]", "[ōô]", "[ŪÛ]", "[ūû]", "[ǣæ̂]", "[ǢÆ̂]"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "æ", "Æ"}}} + +m["gml"] = {"Middle Low German", 505674, "gmw", Latn, ancestors = {"osx"}, entry_name = {from = {"[ĀÂÄǞ]", "[āâäǟ]", "[ĒÊ]", "[ēê]", "[ĪÎ]", "[īî]", "[ŌÔÖȪ]", "[ōôöȫ]", "[ŪÛÜǕ]", "[ūûüǖ]", "[̂]"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u", ""}}} + +m["gmm"] = {"Gbaya-Mbodomo", 6799713, "alv-gbf", Latn} + +m["gmn"] = {"Gimnime", 11016905, "alv-dur"} + +m["gmu"] = {"Gumalu", 5618027, "ngf-mad"} + +m["gmv"] = {"Gamo", 16116386, "omv-nom", {"Latn", "Ethi"}} + +m["gmx"] = {"Magoma", 16939552, "bnt-bki"} + +m["gmy"] = {"Mycenaean Greek", 668366, "grk", {"Linb"}, translit_module = "Linb-translit", override_translit = true} + +m["gmz"] = {"Mgbo", 6826835, "alv-igb", ancestors = {"izi"}} + +m["gna"] = {"Kaansa", 56802, "nic-gur"} + +m["gnb"] = {"Gangte", 12952442, "tbq-kuk"} + +m["gnc"] = {"Guanche", 35762, "ber"} + +m["gnd"] = {"Zulgo-Gemzek", 56800, "cdc-cbm", Latn} + +m["gne"] = {"Ganang", nil, "nic-plc", ancestors = {"izr"}} + +m["gng"] = {"Ngangam", 35888, "nic-grm"} + +m["gnh"] = {"Lere", 3915319, "nic-jer"} + +m["gni"] = {"Gooniyandi", 2669219, "aus-bub", Latn} + +m["gnj"] = {"Ngen", nil, "dmn-nbe", Latn} + +m["gnk"] = {"ǁGana", 1975199, "khi-kal"} + +m["gnl"] = {"Gangulu", 4916329, "aus-pam"} + +m["gnm"] = {"Ginuman", 11732210, "ngf"} + +m["gnn"] = {"Gumatj", 10510745, "aus-yol", Latn} + +m["gnq"] = {"Gana", 5520523, "poz-san"} + +m["gnr"] = {"Gureng Gureng", 5619998, "aus-pam"} + +m["gnt"] = {"Guntai", 12952475, "paa"} + +m["gnu"] = {"Gnau", 3915810, "qfa-tor"} + +m["gnw"] = {"Western Bolivian Guaraní", 3775037, "tup-gua", Latn, ancestors = {"gn"}} + +m["gnz"] = {"Ganzi", 11137942, "nic-nkb", Latn} + +m["goa"] = {"Guro", 35251, "dmn-mda", Latn} + +m["gob"] = {"Playero", 3027923, "sai-guh"} + +m["goc"] = {"Gorakor", 12952463, "poz-ocw", Latn} + +m["god"] = {"Godié", 3914412, "kro-bet"} + +m["goe"] = {"Gongduk", 2669221, "sit"} + +m["gof"] = {"Gofa", 12631584, "omv-nom", {"Latn", "Ethi"}} + +m["gog"] = {"Gogo", 3272630, "bnt-ruv", Latn} + +m["goh"] = {"Old High German", 35218, "gmw", Latn, entry_name = {from = {"[ĀÂ]", "[āâ]", "[ĒÊË]", "[ēêë]", "[ĪÎ]", "[īî]", "[ŌÔ]", "[ōô]", "[ŪÛ]", "[ūû]", "ʒ"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "z"}}} + +m["goi"] = {"Gobasi", 5575414, "ngf", Latn} + +m["goj"] = {"Gowlan", 12953532, "inc-sou"} + +-- gok is a spurious language, see [[w:Spurious languages]] + +m["gol"] = {"Gola", 35482, "alv"} + +m["gon"] = {"Gondi", 1775361, "dra", {"Telu", "Gonm", "Gong"}} + +m["goo"] = {"Gone Dau", 3110470, "poz-occ", Latn} + +m["gop"] = {"Yeretuar", 8052565, "poz-hce", Latn} + +m["goq"] = {"Gorap", 3110816, "crp", ancestors = {"ms"}} + +m["gor"] = {"Gorontalo", 2501174, "phi", Latn} + +m["got"] = {"Gothic", 35722, "gme", {"Goth", "Runr"}, translit_module = "Goth-translit", link_tr = true, entry_name = {from = {"Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}} + +m["gou"] = {"Gavar", 3441180, "cdc-cbm"} + +m["gow"] = {"Gorowa", 3437626, "cus"} + +m["gox"] = {"Gobu", 7194986, "bad-cnt"} + +m["goy"] = {"Goundo", 317636, "alv-kim"} + +m["goz"] = {"Gozarkhani", 5590235, "xme-ttc", ancestors = {"xme-ttc-eas"}} + +m["gpa"] = {"Gupa-Abawa", 3915352, "alv-ngb"} + +m["gpn"] = {"Taiap", 56237, "paa"} + +m["gqa"] = {"Ga'anda", 56245, "cdc-cbm", Latn} + +m["gqi"] = {"Guiqiong", 3120647, "sit-qia"} + +m["gqn"] = { -- a variety of 'ter' + "Kinikinao", nil, "awd" +} + +m["gqr"] = {"Gor", 759992, "csu-sar"} + +m["gqu"] = {"Qau", 17284874, "qfa-gel"} + +m["gra"] = {"Rajput Garasia", 21041529, "inc-bhi", {"Deva", "Gujr"}, ancestors = {"bhb"}} + +m["grb"] = {"Grebo", 35257, "kro-grb", Latn} + +m["grc"] = { + "Ancient Greek", + 35497, + "grk", + {"polytonic", "Cprt"}, + translit_module = "translit-redirect", + override_translit = true, + -- Keep this synchronized with el, cpg, pnt; compare ine-pae + sort_key = {remove_diacritics = ACUTE .. GRAVE .. GRC_CIRC .. DIAER .. COMMA .. REV_COMMA}, + entry_name = {remove_diacritics = MACRON .. BREVE .. UNDERTIE, from = {"[" .. RSQUO .. PSILI .. CORONIS .. "]"}, to = {"'"}}, + standardChars = "ͺ;΄-ώϜϝ" .. PUNCTUATION .. "ἀ-῾𐠀-𐠿" +} + +m["grd"] = {"Guruntum", 3441272, "cdc-wst", Latn} + +m["grg"] = {"Madi", 6727664, "ngf-fin", Latn} + +m["grh"] = {"Gbiri-Niragu", 3913936, "nic-kau", Latn} + +m["gri"] = {"Ghari", 3104782, "poz-sls", Latn} + +m["grj"] = {"Southern Grebo", 3914444, "kro-grb", Latn, ancestors = {"grb"}} + +m["grm"] = {"Kota Marudu Talantang", 6433808, "poz-san", Latn} + +m["gro"] = {"Groma", 56551, "sit-tib"} + +m["grq"] = {"Gorovu", 56355, "paa", Latn} + +m["grr"] = {"Taznatit", 3063974, "ber"} + +m["grs"] = {"Gresi", 5607612, "paa-nim", Latn} + +m["grt"] = {"Garo", 36137, "tbq-bdg", {"Latn", "Beng", "Brai"}} + +m["gru"] = {"Kistane", 13273, "sem-eth", Latn} + +m["grv"] = {"Central Grebo", 18385114, "kro-grb", Latn, ancestors = {"grb"}} + +m["grw"] = {"Gweda", 5623387, "poz-ocw", Latn} + +m["grx"] = {"Guriaso", 12631954, "paa-kwm", Latn} + +m["gry"] = {"Barclayville Grebo", 11157342, "kro-grb", Latn, ancestors = {"grb"}} + +m["grz"] = {"Guramalum", 3120935, "poz-ocw", Latn} + +m["gse"] = { + "Ghanaian Sign Language", 35289, "sgn", Latn -- when documented +} + +m["gsg"] = {"German Sign Language", 33282, "sgn-gsl", {"Sgnw"}} + +m["gsl"] = {"Gusilay", 35439, "alv-jol", Latn} + +m["gsm"] = { + "Guatemalan Sign Language", 2886781, "sgn", Latn -- when documented +} + +m["gsn"] = {"Gusan", 11732224, "ngf-fin", Latn} + +m["gso"] = {"Southwest Gbaya", 4919322, "alv-gbs", Latn} + +m["gsp"] = {"Wasembo", 7971402, "ngf-mad", Latn} + +m["gss"] = {"Greek Sign Language", 3565084, "sgn"} + +m["gsw"] = {"Alemannic German", 131339, "gmw", Latn, ancestors = {"gmh"}, wikimedia_codes = {"als"}} + +m["gta"] = {"Guató", 3027940, "qfa-iso", Latn} + +m["gtu"] = {"Aghu Tharrnggala", 16825981, "aus-pmn", Latn} + +m["gua"] = {"Shiki", 3913946, "nic-jrn", Latn} + +m["gub"] = {"Guajajára", 7699720, "tup-gua", Latn} + +m["guc"] = {"Wayuu", 891085, "awd-taa", Latn, ancestors = {"awd-taa-pro"}} + +m["gud"] = {"Yocoboué Dida", 21074781, "kro-did", Latn} + +m["gue"] = {"Gurindji", 10511016, "aus-pam", Latn} + +m["guf"] = {"Gupapuyngu", 10511004, "aus-yol", Latn} + +m["gug"] = {"Paraguayan Guaraní", 17478066, "tup-gua", Latn, ancestors = {"gn"}} + +m["guh"] = {"Guahibo", 2669193, "sai-guh", Latn} + +m["gui"] = {"Eastern Bolivian Guaraní", 2963912, "tup-gua", Latn, ancestors = {"gn"}} + +m["guk"] = {"Gumuz", 2396970, "ssa", {"Latn", "Ethi"}} + +m["gul"] = {"Gullah", 33395, "crp", Latn, ancestors = {"en"}} + +m["gum"] = {"Guambiano", 2744745, "sai-bar", Latn} + +m["gun"] = {"Mbyá Guaraní", 3915584, "tup-gua", Latn, ancestors = {"gn"}} + +m["guo"] = {"Guayabero", 2980375, "sai-guh", Latn} + +m["gup"] = {"Gunwinggu", 1406574, "aus-gun", Latn} + +m["guq"] = {"Aché", 383701, "tup", Latn} + +m["gur"] = {"Farefare", 35331, "nic-mre", Latn} + +m["gus"] = { + "Guinean Sign Language", 15983937, "sgn", Latn -- when documented +} + +m["gut"] = {"Maléku Jaíka", 3915782, "cba", Latn} + +m["guu"] = {"Yanomamö", 8048928, "sai-ynm", Latn} + +m["guv"] = {"Gey", nil, "alv-sav", Latn} + +m["guw"] = {"Gun", 3111668, "alv-gbe", Latn} + +m["gux"] = {"Gourmanchéma", 35474, "nic-grm", Latn} + +m["guz"] = {"Gusii", 33603, "bnt-lok", Latn} + +m["gva"] = {"Kaskihá", 3033534, "sai-mas", Latn} + +m["gvc"] = {"Guanano", 3566001, "sai-tuc", Latn} + +m["gve"] = {"Duwet", 5317647, "poz-ocw", Latn} + +m["gvf"] = {"Golin", 3110291, "ngf", Latn} + +m["gvj"] = {"Guajá", 3915506, "tup", Latn} + +m["gvl"] = {"Gulay", 641737, "csu-sar", Latn} + +m["gvm"] = {"Gurmana", 3913363, "nic-shi", Latn} + +m["gvn"] = {"Kuku-Yalanji", 5621973, "aus-pam", Latn} + +m["gvo"] = {"Gavião do Jiparaná", 5528335, "tup", Latn} + +m["gvp"] = {"Pará Gavião", 3365443, "sai-nje", Latn} + +m["gvr"] = {"Western Gurung", 2392342, "sit-tam", {"Deva"}} + +m["gvs"] = {"Gumawana", 5618041, "poz-ocw", Latn} + +m["gvy"] = {"Guyani", 10511230, "aus-pam", Latn} + +m["gwa"] = {"Mbato", 3914941, "alv-ptn", Latn} + +m["gwb"] = {"Gwa", 5623219, "nic-jrn", Latn} + +m["gwc"] = {"Kalami", 1675961, "inc-dar", {"Arab"}} + +m["gwd"] = {"Gawwada", 3032135, "cus"} + +m["gwe"] = {"Gweno", 3358211, "bnt-chg", Latn} + +m["gwf"] = {"Gowro", 3812403, "inc-dar"} + +m["gwg"] = {"Moo", 6907057, "alv-bwj", Latn} + +m["gwi"] = {"Gwich'in", 21057, "ath-nor", Latn} + +m["gwj"] = {"Gcwi", 12631978, "khi-kal", {"Latinx"}} + +m["gwm"] = {"Awngthim", 4830109, "aus-pmn", Latn} + +m["gwn"] = {"Gwandara", 56521, "cdc-wst", Latn} + +m["gwr"] = {"Gwere", 5623559, "bnt-nyg", Latn} + +m["gwt"] = {"Gawar-Bati", 33894, "inc-dar"} + +m["gwu"] = {"Guwamu", 10511225, "aus-pam", Latn} + +m["gww"] = {"Kwini", 10551249, "aus-wor", Latn} + +m["gwx"] = {"Gua", 35422, "alv-gng", Latn} + +m["gxx"] = {"Wè Southern", 19921582, "kro-wee", Latn} + +m["gya"] = {"Northwest Gbaya", 36594, "alv-gbw", Latn} + +m["gyb"] = {"Garus", 5524492, "ngf-mad", Latn} + +m["gyd"] = {"Kayardild", 3913770, "aus-tnk", Latn} + +m["gye"] = {"Gyem", 5624046, "nic-jer", Latn} + +m["gyf"] = {"Gungabula", 10510783, "aus-pam", Latn} + +m["gyg"] = {"Gbayi", 11137618, "nic-ngd", Latn} + +m["gyi"] = {"Gyele", 35434, "bnt-mnj", Latn} + +m["gyl"] = {"Gayil", 5528771, "omv-aro", Latn} + +m["gym"] = {"Ngäbere", 3915581, "cba", Latn} + +m["gyn"] = {"Guyanese Creole English", 3305477, "crp", Latn, ancestors = {"en"}} + +m["gyo"] = {"Gyalsumdo", 53575940, "sit-kyk"} + +m["gyr"] = {"Guarayu", 3118779, "tup", Latn} + +m["gyy"] = {"Gunya", 10511001, "aus-pam", Latn} + +m["gza"] = {"Ganza", 5521556, "omv-mao", Latn} + +m["gzn"] = {"Gane", 3095108, "poz-hce", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/h.lua b/wiktra/wikt/translit/languages/data3/h.lua new file mode 100644 index 0000000..258123d --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/h.lua @@ -0,0 +1,383 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["haa"] = {"Hän", 28272, "ath-nor", Latn} + +m["hab"] = { + "Hanoi Sign Language", 12632107, "sgn", Latn -- when documented +} + +m["hac"] = {"Gurani", 33733, "ira-zgr", {"ku-Arab"}, translit_module = "ckb-translit"} + +m["had"] = {"Hatam", 56825, "paa-wpa"} + +m["haf"] = {"Haiphong Sign Language", 39868240, "sgn"} + +m["hag"] = {"Hanga", 35426, "nic-dag", Latn} + +m["hah"] = {"Hahon", 3125730, "poz-ocw", Latn} + +m["hai"] = {"Haida", 33303, "qfa-iso", Latn} + +m["haj"] = {"Hajong", 3350576, "qfa-mix", {"as-Beng", "Latn"}, ancestors = {"inc-oas", "tbq-pro"}} + +m["hak"] = {"Hakka", 33375, "zhx", {"Hani"}, ancestors = {"ltc"}} + +m["hal"] = {"Halang", 56307, "mkh"} + +m["ham"] = {"Hewa", 5748345, "paa-spk"} + +m["hao"] = {"Hakö", 3125871, "poz-ocw", Latn} + +m["hap"] = {"Hupla", 5946223, "ngf"} + +m["har"] = {"Harari", 33626, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit"} + +m["has"] = {"Haisla", 3107399, "wak"} + +m["hav"] = {"Havu", 5684097, "bnt-shh", Latn} + +m["haw"] = {"Hawaiian", 33569, "poz-pep", Latn, sort_key = {from = {"ā", "ē", "ī", "ō", "ū"}, to = {"a", "e", "i", "o", "u"}}} + +m["hax"] = {"Southern Haida", 12953543, "qfa-iso", ancestors = {"hai"}} + +m["hay"] = {"Haya", 35756, "bnt-haj"} + +m["haz"] = {"Hazaragi", 33398, "ira-swi"} + +m["hba"] = {"Hamba", 11028905, "bnt-tet"} + +m["hbb"] = {"Huba", 56290, "cdc-cbm"} + +m["hbn"] = {"Heiban", 35523, "alv-hei"} + +m["hbu"] = {"Habu", 1567033, "poz-cet", Latn} + +m["hca"] = {"Andaman Creole Hindi", 7599417, "crp", ancestors = {"hi", "bn", "ta"}} + +m["hch"] = {"Huichol", 35575, "azc", Latn} + +m["hdn"] = {"Northern Haida", 20054484, "qfa-iso", ancestors = {"hai"}} + +m["hds"] = { + "Honduras Sign Language", 3915496, "sgn", Latn -- when documented +} + +m["hdy"] = {"Hadiyya", 56613, "cus"} + +m["hea"] = {"Northern Qiandong Miao", 3138832, "hmn"} + +m["hed"] = {"Herdé", 56253, "cdc-mas"} + +m["heg"] = {"Helong", 35432, "poz-tim", Latn} + +m["heh"] = {"Hehe", 3129390, "bnt-bki", Latn} + +m["hei"] = {"Heiltsuk", 5699507, "wak"} + +m["hem"] = {"Hemba", 5711209, "bnt-lbn"} + +m["hgm"] = {"Haiǁom", 4494781, "khi-khk", Latn} + +m["hgw"] = {"Haigwai", 5639108, "poz-ocw", Latn} + +m["hhi"] = {"Hoia Hoia", 5877767, "ngf"} + +m["hhr"] = {"Kerak", 11010783, "alv-jfe"} + +m["hhy"] = {"Hoyahoya", nil, "ngf"} + +m["hia"] = {"Lamang", 35700, "cdc-cbm", Latn} + +m["hib"] = {"Hibito", 3135164} + +m["hid"] = {"Hidatsa", 3135234, "sio-mor", Latn} + +m["hif"] = {"Fiji Hindi", 46728, "inc-hie", Latn, ancestors = {"awa"}} + +m["hig"] = {"Kamwe", 56271, "cdc-cbm"} + +m["hih"] = {"Pamosu", 12953011, "ngf-mad"} + +m["hii"] = {"Hinduri", 5766763, "him"} + +m["hij"] = {"Hijuk", 35274, "bnt-bsa"} + +m["hik"] = {"Seit-Kaitetu", 7446989, "poz-cma"} + +m["hil"] = {"Hiligaynon", 35978, "phi", Latn, entry_name = {from = {"Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú", MACRON}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}} + +m["hio"] = {"Tshwa", 963636, "khi-kal"} + +m["hir"] = {"Himarimã", 5765127} + +m["hit"] = {"Hittite", 35668, "ine-ana", {"Xsux"}} + +m["hiw"] = {"Hiw", 3138713, "poz-vnc", Latn} + +m["hix"] = {"Hixkaryana", 56522, "sai-car", Latn} + +m["hji"] = {"Haji", 5639933, "poz-mly"} + +m["hka"] = {"Kahe", 3892562, "bnt-chg", Latn} + +m["hke"] = {"Hunde", 3065432, "bnt-shh", Latn} + +m["hkk"] = {"Hunjara-Kaina Ke", nil, "ngf"} + +m["hkn"] = {"Mel-Khaonh", nil, "mkh-ban"} + +m["hks"] = {"Hong Kong Sign Language", 17038844, "sgn"} + +m["hla"] = {"Halia", 3125959, "poz-ocw", Latn} + +m["hlb"] = {"Halbi", 3695692, "inc-eas", {"Deva", "Orya"}, ancestors = {"inc-mgd"}} + +m["hld"] = {"Halang Doan", 3914632, "mkh-ban"} + +m["hle"] = {"Hlersu", 5873537, "tbq-lol"} + +m["hlt"] = {"Nga La", 12952942, "tbq-kuk"} + +m["hma"] = {"Southern Mashan Hmong", 12953560, "hmn"} + +m["hmb"] = {"Humburi Senni", 35486, "son"} + +m["hmc"] = {"Central Huishui Hmong", 12953558, "hmn"} + +m["hmd"] = {"A-Hmao", 1108934, "hmn", {"Latn", "Plrd"}} + +m["hme"] = {"Eastern Huishui Hmong", 12953559, "hmn"} + +m["hmf"] = {"Hmong Don", 22911602, "hmn"} + +m["hmg"] = {"Southwestern Guiyang Hmong", 27478542, "hmn"} + +m["hmh"] = {"Southwestern Huishui Hmong", 12953565, "hmn"} + +m["hmi"] = {"Northern Huishui Hmong", 27434946, "hmn"} + +m["hmj"] = {"Ge", 11251864, "hmn"} + +m["hmk"] = {"Maek", 8050724, "qfa-kor"} + +m["hml"] = {"Luopohe Hmong", 14468943, "hmn"} + +m["hmm"] = {"Central Mashan Hmong", 12953561, "hmn"} + +m["hmp"] = {"Northern Mashan Hmong", 12953564, "hmn"} + +m["hmq"] = {"Eastern Qiandong Miao", 27431369, "hmn"} + +m["hmr"] = {"Hmar", 2992841, "tbq-kuk", ancestors = {"lus"}} + +m["hms"] = {"Southern Qiandong Miao", 12953562, "hmn"} + +m["hmt"] = {"Hamtai", 5646436, "ngf"} + +m["hmu"] = {"Hamap", 12952484, "qfa-tap"} + +m["hmv"] = {"Hmong Dô", 22911598, "hmn"} + +m["hmw"] = {"Western Mashan Hmong", 12953563, "hmn"} + +m["hmy"] = {"Southern Guiyang Hmong", 12953553, "hmn"} + +m["hmz"] = {"Hmong Shua", 25559603, "hmn"} + +m["hna"] = {"Mina", 56532, "cdc-cbm"} + +m["hnd"] = {"Southern Hindko", 382273, "inc-pan", ancestors = {"lah"}} + +m["hne"] = {"Chhattisgarhi", 33158, "inc-hie", {"Deva"}, ancestors = {"inc-pra"}} + +m["hnh"] = {"ǁAni", 3832982, "khi-kal", {"Latinx"}} + +m["hni"] = {"Hani", 56516, "tbq-lol"} + +m["hnj"] = {"Green Hmong", 3138831, "hmn", {"Latn", "Hmng"}} + +m["hnn"] = {"Hanunoo", 35435, "phi", {"Hano", "Latn"}} + +m["hno"] = {"Northern Hindko", 6346358, "inc-pan", {"Arab"}, ancestors = {"lah"}} + +m["hns"] = {"Caribbean Hindustani", 1843468, "inc-hie", ancestors = {"bho"}} + +m["hnu"] = {"Hung", 12632753, "mkh-vie"} + +m["hoa"] = {"Hoava", 3138887, "poz-ocw", Latn} + +m["hob"] = {"Austronesian Mari", 6760941, "poz-ocw", Latn} + +m["hoc"] = {"Ho", 33270, "mun", {"Wara", "Orya", "Deva", "Latn"}} + +m["hod"] = {"Holma", 56331, "cdc-cbm", Latn} + +m["hoe"] = {"Horom", 3914008, "nic-ple", Latn} + +m["hoh"] = {"Hobyót", 33299, "sem-sar", {"Arab", "Latn"}} + +m["hoi"] = {"Holikachuk", 28508, "ath-nor", Latn} + +m["hoj"] = {"Hadothi", 33227, "inc-wes", ancestors = {"mwr"}} + +m["hol"] = {"Holu", 4121133, "bnt-pen", Latn} + +m["hom"] = {"Homa", 3449953, "bnt-boa", Latn} + +m["hoo"] = {"Holoholo", 3139484, "bnt-tkm", Latn} + +m["hop"] = {"Hopi", 56421, "azc", Latn} + +m["hor"] = {"Horo", 641748, "csu-sar"} + +m["hos"] = { + "Ho Chi Minh City Sign Language", 16111971, "sgn", Latn -- when documented +} + +m["hot"] = {"Hote", 12632404, "poz-ocw", Latn} + +m["hov"] = {"Hovongan", 5917269, "poz"} + +m["how"] = {"Honi", 56842, "tbq-lol"} + +m["hoy"] = {"Holiya", 5880707, "dra"} + +m["hoz"] = {"Hozo", 5923010, "omv-mao"} + +m["hpo"] = {"Hpon", 5923277, "tbq-brm"} + +m["hps"] = { + "Hawai'i Pidgin Sign Language", 33358, "sgn", Latn -- when documented +} + +m["hra"] = {"Hrangkhol", 5923435, "tbq-kuk"} + +m["hrc"] = {"Niwer Mil", nil, "poz-oce", Latn} + +m["hre"] = {"Hre", 3915794, "mkh-nbn"} + +m["hrk"] = {"Haruku", 5675762, "poz-cma"} + +m["hrm"] = {"Horned Miao", nil, "hmn"} + +m["hro"] = {"Haroi", 3127568, "cmc", Latn} + +m["hrp"] = {"Nhirrpi", 32571318, "aus-kar"} + +m["hrt"] = {"Hértevin", 33290, "sem-nna", Latn} + +m["hru"] = {"Hruso", 5923933, "sit-hrs"} + +m["hrw"] = {"Warwar Feni", nil, "poz-oce", Latn} + +m["hrx"] = {"Hunsrik", 304049, "gmw", Latn, ancestors = {"gmw-cfr"}} + +m["hrz"] = {"Harzani", 56464, "xme-ttc", ancestors = {"xme-ttc-nor"}} + +m["hsb"] = { + "Upper Sorbian", + 13248, + "wen", + Latn, + sort_key = {from = {"č", "ć", "ě", "ch", "ł", "ń", "ó", "ř", "š", "ž", "ź"}, to = {"c~", "c~~", "e~", "h~", "l`", "n~", "o", "r~", "s~", "z~", "z~~"}} -- the digraph ch comes after h; ł comes before l +} + +m["hsh"] = { + "Hungarian Sign Language", 13636869, "sgn", Latn -- when documented +} + +m["hsl"] = { + "Hausa Sign Language", 3915462, "sgn", Latn -- when documented +} + +m["hsn"] = {"Xiang", 13220, "zhx", {"Hani"}, ancestors = {"ltc"}} + +m["hss"] = {"Harsusi", 33423, "sem-sar", {"Arab", "Latn"}} + +m["hti"] = {"Hoti", 5912372, "poz-cma", Latn} + +m["hto"] = {"Minica Huitoto", 948514, "sai-wit", Latn} + +m["hts"] = {"Hadza", 33411, "qfa-iso", Latn} + +m["htu"] = {"Hitu", 5872700, "poz-cma", Latn} + +m["hub"] = {"Huambisa", 1526037, "sai-jiv", Latn} + +m["huc"] = {"ǂHoan", 2053913, "khi-kxa", {"Latinx"}} + +m["hud"] = {"Huaulu", 12952504, "poz-cma", Latn} + +m["huf"] = {"Humene", 11732231, "ngf", Latn} + +m["hug"] = {"Huachipaeri", 3446617, "sai-har", Latn} + +m["huh"] = {"Huilliche", 35531, "sai-ara", Latn} + +m["hui"] = {"Huli", 3125121, "paa-eng", Latn} + +m["huj"] = {"Northern Guiyang Hmong", 12953554, "hmn"} + +m["huk"] = {"Hulung", 12952505, "poz-cet"} + +m["hul"] = {"Hula", 6382179, "poz-ocw", Latn} + +m["hum"] = {"Hungana", 10975396, "bnt-yak"} + +m["huo"] = {"Hu", 3141783, "mkh-pal"} + +m["hup"] = {"Hupa", 28058, "ath-pco", Latn} + +m["huq"] = {"Tsat", 34133, "cmc"} + +m["hur"] = {"Halkomelem", 35388, "sal", Latn} + +m["hus"] = {"Wastek", 35573, "myn", Latn} + +m["huu"] = {"Murui Huitoto", 2640935, "sai-wit", Latn} + +m["huv"] = {"Huave", 12954031, "qfa-iso", Latn} + +m["huw"] = {"Hukumina", 3142988, "poz-cma", Latn} + +m["hux"] = {"Nüpode Huitoto", 56333, "sai-wit", Latn} + +m["huy"] = {"Hulaulá", 33426, "sem-nna"} + +m["huz"] = {"Hunzib", 56564, "cau-tsz", {"Cyrl"}, translit_module = "huz-translit", entry_name = {from = {ACUTE}, to = {}}} + +m["hvc"] = {"Haitian Vodoun Culture Language", 3504239, "crp", Latn} + +m["hvk"] = {"Haveke", 5683513, "poz-cln", Latn} + +m["hvn"] = {"Sabu", 3128792, "poz-cet", Latn} + +m["hwa"] = {"Wané", 3914887, "kro-ekr", Latn} + +m["hwc"] = {"Hawaiian Creole", 35602, "crp", Latn} + +m["hwo"] = {"Hwana", 56498, "cdc-cbm", Latn} + +m["hya"] = {"Hya", 56798, "cdc-cbm", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/i.lua b/wiktra/wikt/translit/languages/data3/i.lua new file mode 100644 index 0000000..4ec8269 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/i.lua @@ -0,0 +1,351 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["iai"] = {"Iaai", 282888, "poz-occ", Latn} + +m["ian"] = {"Iatmul", 5983460, "paa-spk"} + +m["iar"] = {"Purari", 3499934, "paa"} + +m["iba"] = {"Iban", 33424, "poz-mly", Latn} + +m["ibb"] = {"Ibibio", 33792, "nic-ief", Latn} + +m["ibd"] = {"Iwaidja", 1977429, "aus-wdj", Latn} + +m["ibe"] = {"Akpes", 35457, "alv-von", Latn} + +m["ibg"] = {"Ibanag", 1775596, "phi"} + +m["ibh"] = {"Bih", nil, "cmc", Latn} + +m["ibl"] = {"Ibaloi", 3147383, "phi"} + +m["ibm"] = {"Agoi", 34727, "nic-ucr", Latn} + +m["ibn"] = {"Ibino", 3813281, "nic-lcr", Latn} + +m["ibr"] = {"Ibuoro", 3813306, "nic-ief"} + +m["ibu"] = {"Ibu", 11732235, "paa-wpa"} + +m["iby"] = {"Ibani", 11280479, "ijo"} + +m["ica"] = {"Ede Ica", 12952405, "alv-ede", Latn} + +m["ich"] = {"Etkywan", 3914462, "nic-jkn", Latn} + +m["icl"] = { + "Icelandic Sign Language", 3436654, "sgn", Latn -- when documented +} + +m["icr"] = {"Islander Creole English", 2044587, "crp", Latn, ancestors = {"en"}} + +m["ida"] = {"Idakho-Isukha-Tiriki", 12952512, "bnt-lok"} + +m["idb"] = {"Indo-Portuguese", 6025550, "crp", Latn, ancestors = {"pt"}} + +m["idc"] = {"Idon", 3913366, "nic-plc"} + +m["idd"] = {"Ede Idaca", 13123376, "alv-ede", Latn} + +m["ide"] = {"Idere", 3813288, "nic-ief"} + +m["idi"] = {"Idi", 5988630, "paa"} + +m["idr"] = {"Indri", 35662, "nic-ser"} + +m["ids"] = {"Idesa", 3913979, "alv-swd", Latn, ancestors = {"oke"}} + +m["idt"] = {"Idaté", 12952511, "poz-tim", Latn} + +m["idu"] = {"Idoma", 35478, "alv-ido", Latn} + +m["ifa"] = {"Amganad Ifugao", 18748222, "phi"} + +m["ifb"] = {"Batad Ifugao", 12953578, "phi"} + +m["ife"] = {"Ifè", 33606, "alv-ede", Latn} + +m["iff"] = {"Ifo", 7902545, "poz-oce", Latn} + +m["ifk"] = {"Tuwali Ifugao", 7857158, "phi"} + +m["ifm"] = {"Teke-Fuumu", 36603, "bnt-tek"} + +m["ifu"] = {"Mayoyao Ifugao", 12953579, "phi", Latn} + +m["ify"] = {"Keley-I Kallahan", 3192221, "phi"} + +m["igb"] = {"Ebira", 35363, "alv-nup", Latn} + +m["ige"] = {"Igede", 35420, "alv-ido", Latn} + +m["igg"] = {"Igana", 5991454, "paa", Latn} + +m["igl"] = {"Igala", 35513, "alv-yor"} + +m["igm"] = {"Kanggape", 6362743, "paa", Latn} + +m["ign"] = {"Ignaciano", 3148190, "awd"} + +m["igo"] = {"Isebe", 11732248, "ngf-mad"} + +m["igs"] = {"Glosa", 2314240, "art", type = "appendix-constructed"} + +m["igw"] = {"Igwe", 3913985, "alv-yek", Latn} + +m["ihb"] = {"Pidgin Iha", 12639686, "crp", ancestors = {"ihp"}} + +m["ihi"] = {"Ihievbe", 3441193, "alv-eeo", Latn, ancestors = {"ema"}} + +m["ihp"] = {"Iha", 5994495, "ngf"} + +m["ijc"] = {"Izon", 35483, "ijo", Latn} + +m["ije"] = {"Biseni", 35010, "ijo"} + +m["ijj"] = {"Ede Ije", 12952406, "alv-ede", Latn} + +m["ijn"] = {"Kalabari", 35697, "ijo"} + +m["ijs"] = {"Southeast Ijo", 3915854, "ijo", Latn} + +m["ike"] = {"Eastern Canadian Inuktitut", 4126517, "esx-inu", {"Cans"}} + +m["iki"] = {"Iko", 3813290, "nic-lcr", Latn} + +m["ikk"] = {"Ika", 35406, "alv-igb"} + +m["ikl"] = {"Ikulu", 425973, "nic-plc", Latn} + +m["iko"] = {"Olulumo-Ikom", 3914402, "nic-uce", Latn} + +m["ikp"] = {"Ikpeshi", 3912777, "alv-yek", Latn} + +m["ikr"] = {"Ikaranggal", 5995402, "aus-pam"} + +m["iks"] = { + "Inuit Sign Language", 13360244, "sgn", Latn -- when documented +} + +m["ikt"] = {"Inuvialuktun", 27990, "esx-inu", {"Cans", "Latn"}} + +m["ikv"] = {"Iku-Gora-Ankwa", 3913940, "nic-plc"} + +m["ikw"] = {"Ikwere", 35399, "alv-igb"} + +m["ikx"] = {"Ik", 35472, "ssa-klk", Latn} + +m["ikz"] = {"Ikizu", 10977626, "bnt-lok", Latn} + +m["ila"] = {"Ile Ape", 12473380, "poz-cet"} + +m["ilb"] = {"Ila", 10962725, "bnt-bot", Latn} + +m["ilg"] = {"Ilgar", 5997810, "aus-wdj", Latn} + +m["ili"] = {"Ili Turki", 33627, "trk-kar"} + +m["ilk"] = {"Ilongot", 3148787, "phi", Latn} + +m["ill"] = {"Iranun", 12953581, "phi", {"Latn", "Arab"}} + +m["ilo"] = {"Ilocano", 35936, "phi", {"Latn", "Tglg"}, entry_name = {from = {"[áàâ]", "[éèê]", "[íìî]", "[óòô]", "[úùû]", ACUTE, GRAVE, CIRC}, to = {"a", "e", "i", "o", "u"}}} + +m["ils"] = {"International Sign", 35754, "sgn"} + +m["ilu"] = {"Ili'uun", 12632888, "poz-tim"} + +m["ilv"] = {"Ilue", 3813301, "nic-lcr", Latn} + +m["ima"] = {"Mala Malasar", 6740693, "dra"} + +m["imi"] = {"Anamgura", 3501881, "ngf-mad"} + +m["iml"] = {"Miluk", 3314550, "nai-coo", Latn} + +m["imn"] = {"Imonda", 6005721, "paa-brd"} + +m["imo"] = {"Imbongu", 12632895, "ngf-mad"} + +m["imr"] = {"Imroing", 6008394, "poz-tim"} + +m["ims"] = {"Marsian", 1265446, "itc", Latn} + +m["imy"] = {"Milyan", 3832946, "ine-ana", {"Lyci"}} + +m["inb"] = {"Inga", 35491, "qwe", ancestors = {"qwe-kch"}} + +m["ing"] = {"Deg Xinag", 27782, "ath-nor"} + +m["inh"] = {"Ingush", 33509, "cau-vay", {"Cyrl"}, translit_module = "inh-translit", override_translit = true, entry_name = {from = {MACRON}, to = {}}} + +m["inj"] = {"Jungle Inga", 16115012, "qwe", ancestors = {"qwe-kch"}} + +m["inl"] = { + "Indonesian Sign Language", 3915477, "sgn", Latn -- when documented +} + +m["inm"] = {"Minaean", 737784, "sem-osa", {"Sarb"}, translit_module = "Sarb-translit"} + +m["inn"] = {"Isinai", 6081098, "phi"} + +m["ino"] = {"Inoke-Yate", 6036531, "paa-kag"} + +m["inp"] = {"Iñapari", 15338035, "awd", Latn} + +m["ins"] = {"Indian Sign Language", 12953486, "sgn"} + +m["int"] = {"Intha", 6057507, "tbq-brm", ancestors = {"obr"}} + +m["inz"] = {"Ineseño", 35443, "nai-chu", Latn} + +m["ior"] = {"Inor", 35763, "sem-eth", {"Ethi"}} + +m["iou"] = {"Tuma-Irumu", 7852460, "ngf-fin", Latn} + +m["iow"] = {"Chiwere", 56737, "sio-msv", Latn} + +m["ipi"] = {"Ipili", 6065141, "paa-eng"} + +m["ipo"] = {"Ipiko", 10566515, "ngf"} + +m["iqu"] = {"Iquito", 2669184, "sai-zap", Latn} + +m["iqw"] = {"Ikwo", 11926474, "alv-igb", Latn, ancestors = {"izi"}} + +m["ire"] = {"Iresim", 6069398, "poz-hce", Latn} + +m["irh"] = {"Irarutu", 3027928, "poz", Latn} + +m["iri"] = {"Rigwe", 3912756, "nic-plc", Latn} + +m["irk"] = {"Iraqw", 33595, "cus", Latn} + +m["irn"] = {"Irantxe", 3409301, nil, Latn} + +m["irr"] = {"Ir", 3071880, "mkh-kat"} + +m["iru"] = {"Irula", 33363, "dra", {"Taml"}} + +m["irx"] = {"Kamberau", 6356317, "ngf"} + +m["iry"] = {"Iraya", 6068356, "phi"} + +m["isa"] = {"Isabi", 11732247, "paa-kag"} + +m["isc"] = {"Isconahua", 3052971, "sai-pan", Latn} + +m["isd"] = {"Isnag", 6085162, "phi", Latn} + +m["ise"] = { + "Italian Sign Language", 375619, "sgn", Latn -- when documented +} + +m["isg"] = { + "Irish Sign Language", 14183, "sgn", Latn -- when documented +} + +m["ish"] = {"Esan", 35268, "alv-eeo", Latn} + +m["isi"] = {"Nkem-Nkum", 36261, "nic-eko", Latn} + +m["isk"] = {"Ishkashimi", 33419, "ira-sgi", ancestors = {"ira-sgi-pro"}} + +m["ism"] = {"Masimasi", 6783273, "poz-ocw", Latn} + +m["isn"] = {"Isanzu", 6078891, "bnt-tkm", Latn} + +m["iso"] = {"Isoko", 35414, "alv-swd", Latn} + +m["isr"] = {"Israeli Sign Language", 2911863, "sgn", {"Sgnw"}} + +m["ist"] = {"Istriot", 35845, "roa-itd", Latn} + +m["isu"] = {"Isu", 6089423, "nic-rnw", Latn} + +m["itb"] = {"Binongan Itneg", 12953584, "phi"} + +m["itd"] = {"Southern Tidong", 7049643, "poz-san"} + +m["ite"] = {"Itene", 3038640, "sai-cpc", Latn} + +m["iti"] = {"Inlaod Itneg", 12953585, "phi"} + +m["itk"] = {"Judeo-Italian", 1145414, "roa-itd", {"Hebr"}} + +m["itl"] = {"Itelmen", 33624, "qfa-cka", {"Cyrl", "Latn"}} + +m["itm"] = {"Itu Mbon Uzo", 10977737, "nic-ief", Latn, ancestors = {"ibr"}} + +m["ito"] = {"Itonama", 950585, "qfa-iso"} + +m["itr"] = {"Iteri", 2083185, "paa-asa"} + +m["its"] = {"Isekiri", 36045, "alv-yor", Latn} + +m["itt"] = {"Maeng Itneg", 18748761, "phi"} + +m["itv"] = {"Itawit", 3915527, "phi", Latn} + +m["itw"] = {"Ito", 11128810, "nic-ief", ancestors = {"ibr"}} + +m["itx"] = {"Itik", 6094713, "paa-tkw"} + +m["ity"] = {"Moyadan Itneg", 12953583, "phi"} + +m["itz"] = {"Itzá", 35537, "myn"} + +m["ium"] = {"Iu Mien", 2498808, "hmx-mie"} + +m["ivb"] = {"Ibatan", 18748212, "phi", Latn} + +m["ivv"] = {"Ivatan", 3547080, "phi", Latn} + +m["iwk"] = {"I-Wak", 12632789, "phi"} + +m["iwm"] = {"Iwam", 3915215, "paa-spk"} + +m["iwo"] = {"Iwur", 6101006, "ngf-okk"} + +m["iws"] = {"Sepik Iwam", 16893603, "paa-spk"} + +m["ixc"] = {"Ixcatec", 56706, "omq"} + +m["ixl"] = {"Ixil", 35528, "myn", Latn} + +m["iya"] = {"Iyayu", 3913390, "alv-nwd", Latn} + +m["iyo"] = {"Mesaka", 36080, "nic-tiv", Latn} + +m["iyx"] = {"Yaa", 36909, "bnt-nze", Latn} + +m["izh"] = {"Ingrian", 33559, "fiu-fin", Latn} + +m["izi"] = {"Izi-Ezaa-Ikwo-Mgbo", nil, "alv-igb"} + +m["izr"] = {"Izere", 6101921, "nic-plc", Latn} + +m["izz"] = {"Izi", 3914387, "alv-igb", Latn, ancestors = {"izi"}} + +return m diff --git a/wiktra/wikt/translit/languages/data3/j.lua b/wiktra/wikt/translit/languages/data3/j.lua new file mode 100644 index 0000000..27f16d0 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/j.lua @@ -0,0 +1,290 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["jaa"] = {"Jamamadí", 3053275, "auf", Latn} + +m["jab"] = {"Hyam", 35403, "nic-plc", Latn} + +m["jac"] = {"Jakaltek", 33393, "myn", Latn} + +m["jad"] = {"Jahanka", 3913992, "dmn-wmn", Latn} + +m["jae"] = {"Jabem", 3571232, "poz-ocw", Latn} + +m["jaf"] = {"Jara", 56289, "cdc", Latn} + +m["jah"] = {"Jah Hut", 2742661, "mkh-asl"} + +m["jaj"] = {"Zazao", 3574969, "poz-ocw", Latn} + +-- "jak" IS TREATED AS "ms", SEE WT:LT + +m["jal"] = {"Yalahatan", 8047298, "poz-cma", Latn} + +m["jam"] = {"Jamaican Creole", 35939, "crp", Latn, ancestors = {"en"}} + +m["jan"] = {"Janday", 6150919, "aus-pam", Latn} + +m["jao"] = {"Yanyuwa", 34241, "aus-pam", Latn} + +m["jaq"] = {"Yaqay", 8049134, "ngf", Latn} + +m["jas"] = {"New Caledonian Javanese", 12953527, "poz-sus", Latn, ancestors = {"jv"}} + +m["jat"] = {"Jakati", 4159744, "inc-pan", ancestors = {"lah"}} + +m["jau"] = {"Yaur", 8050346, "poz-hce", Latn} + +m["jax"] = {"Jambi Malay", 3915769, "poz-mly", Latn} + +m["jay"] = {"Yan-nhangu", 10723405, "aus-yol", Latn} + +m["jaz"] = {"Jawe", 3163200, "poz-cln", Latn} + +m["jbe"] = {"Judeo-Berber", 35854, "ber", {"Hebr"}} + +m["jbj"] = {"Arandai", 4784070, "ngf", Latn} + +m["jbk"] = {"Barikewa", nil, "ngf", Latn} + +m["jbn"] = {"Nefusa", 36151, "ber"} + +m["jbo"] = {"Lojban", 36350, "art", Latn, type = "appendix-constructed"} + +m["jbr"] = {"Jofotek-Bromnya", 16886849, "paa-tkw"} + +m["jbt"] = {"Jabutí", 2060023, "sai-mje", Latn} + +m["jbu"] = {"Jukun Takum", 35447, "nic-jkn", Latn} + +m["jbw"] = {"Yawijibaya", 31722921, "aus-wor", Latn} + +m["jcs"] = { + "Jamaican Country Sign Language", 6127418, "sgn", Latn -- when documented +} + +m["jct"] = {"Krymchak", 33723, "trk-kcu", {"Latn", "Cyrl"}} + +m["jda"] = {"Jad", 12633440, "sit-las"} + +m["jdg"] = {"Jadgali", 13560607, "inc-snd", ancestors = {"inc-vra"}} + +m["jdt"] = {"Judeo-Tat", 56495, "ira-swi", {"Latn", "Cyrl", "Hebr"}, ancestors = {"fa"}, translit_module = "jdt-translit"} + +m["jeb"] = {"Jebero", 967031, "sai-cah"} + +m["jee"] = {"Jerung", 56372, "sit-kiw"} + +m["jeg"] = {"Jeng", 5091274, "mkh-ban", Latn} + +m["jeh"] = {"Jeh", 3914636, "mkh-ban", Latn} + +m["jei"] = {"Yei", 8051326} + +m["jek"] = {"Jeri Kuo", 11031936, "dmn-jje", Latn} + +m["jel"] = {"Yelmek", 8052020} + +m["jen"] = {"Dza", 35558, "alv-bwj"} + +m["jer"] = {"Jere", 3915449, "nic-jer"} + +m["jet"] = {"Manem", 6748412, "paa-brd"} + +m["jeu"] = {"Jonkor Bourmataguil", 56269} + +m["jgb"] = {"Ngbee", 7022243} + +-- "jge" IS TREATED AS "ka", SEE WT:LT + +m["jgk"] = {"Gwak", 17523694, "nic-jrw"} + +m["jgo"] = {"Ngomba", 36287, "bai", Latn} + +m["jhi"] = {"Jehai", 3176748, "mkh-asl"} + +m["jhs"] = {"Jhankot Sign Language", 6190889, "sgn"} + +m["jia"] = {"Jina", 56297} + +m["jib"] = {"Jibu", 3914448, "nic-jkn", Latn} + +m["jic"] = {"Tol", 3178609, "hok", Latn} + +m["jid"] = {"Bu", 3913321, "nic-nin", Latn} + +m["jie"] = {"Jilbe", 56281} + +m["jig"] = {"Jingulu", 6202435, "aus-mir"} + +m["jih"] = {"Shangzhai", 25559440, "sit-rgy"} + +m["jii"] = {"Jiiddu", 56769, "cus"} + +m["jil"] = {"Jilim", 6192674, "ngf-mad"} + +m["jim"] = {"Jimjimen", 56288, "cdc-cbm", Latn} + +m["jio"] = {"Jiamao", 3178570, nil, Latn} + +m["jiq"] = {"Guanyinqiao", 3118757, "sit-rgy"} + +m["jit"] = {"Jita", 6203228, "bnt-haj", Latn} + +m["jiu"] = {"Youle Jinuo", 12952530, "tbq-lol"} + +m["jiv"] = {"Shuar", 617291, "sai-jiv", Latn} + +m["jiy"] = {"Buyuan Jinuo", 12952528, "tbq-lol"} + +m["jje"] = { + "Jeju", 129648, "qfa-kor", {"Kore"} + -- translit_module = "ko-translit", +} + +m["jjr"] = {"Zhár", 17523697, "nic-jrw"} + +m["jka"] = {"Kaera", 16910923, "ngf", Latn} + +m["jko"] = {"Kubo", 12952670, "ngf"} + +m["jkr"] = {"Koro (India)", 36162, "sit-gsi"} + +m["jku"] = {"Labir", 1990210, "nic-jrn"} + +m["jle"] = {"Ngile", 36329, "alv-tal"} + +m["jls"] = { + "Jamaican Sign Language", 6127433, "sgn", Latn -- when documented +} + +m["jma"] = {"Dima", 5277140} + +m["jmb"] = {"Zumbun", 56252, "cdc-wst"} + +m["jmc"] = {"Machame", 12952751, "bnt-chg", Latn} + +m["jmd"] = {"Yamdena", 8048030, "poz-cet", Latn} + +m["jmi"] = {"Jimi", 3502308, "cdc-wst", Latn} + +m["jml"] = {"Jumli", 6310993, "inc-pah"} + +m["jmn"] = {"Makuri Naga", 6740482, "sit-aao"} + +m["jmr"] = {"Kamara", 35561, "nic-dag"} + +-- "jms" IS TREATED AS "mff", SEE WT:LT + +m["jmw"] = {"Mouwase", nil, "ngf", Latn} + +m["jmx"] = {"Western Juxtlahuaca Mixtec", 12953731, "omq-mxt", Latn} + +m["jna"] = {"Jangshung", 12633505, "sit-kin"} + +m["jnd"] = {"Jandavra", 6150941} + +m["jng"] = {"Yangman", 10723416, "aus-yng"} + +m["jni"] = {"Janji", 3915330, "nic-jer"} + +m["jnj"] = {"Yemsa", 36873, "omv"} + +m["jnl"] = {"Rawat", 7296948, "sit-gma"} + +m["jns"] = {"Jaunsari", 6164857, "him"} + +m["job"] = {"Joba", 13123409, "bnt-shh"} + +m["jod"] = {"Wojenaka", 11029540, "dmn-mnk"} + +m["jor"] = {"Jorá", 5393974, "tup-gua", Latn} + +m["jos"] = {"Jordanian Sign Language", 6534917, "sgn", {"Sgnw"}} + +m["jow"] = {"Jowulu", 3914487, "dmn-mnw", Latn} + +-- "jpa" IS NOT USED, SEE WT:LT + +m["jpr"] = {"Judeo-Persian", 33367, "ira-swi", {"Hebr"}, ancestors = {"fa"}} + +m["jqr"] = {"Jaqaru", 33443, "sai-aym", Latn} + +m["jra"] = {"Jarai", 33370, "cmc", Latn} + +m["jrb"] = {"Judeo-Arabic", 37733, "sem-arb", {"Hebr"}} + +m["jrr"] = {"Jiru", 6203123, "nic-jkn"} + +m["jru"] = {"Japrería", 3441409, "sai-car", Latn} + +m["jsl"] = {"Japanese Sign Language", 35601, "sgn-jsl"} + +m["jua"] = {"Júma", 12953587, "tup-gua", Latn} + +m["jub"] = {"Wannu", 3914905, "nic-jkn"} + +m["juc"] = {"Jurchen", 56731, "tuw", {"Latn"}} + +m["jud"] = {"Worodougou", 11155821, "dmn-mnk"} + +m["juh"] = {"Hone", 5964576, "nic-jkn", Latn} + +m["jui"] = {"Ngadjuri", 16897028, "aus-pam", Latn} + +m["juk"] = {"Wapan", 3914914, "nic-jkn"} + +m["jul"] = {"Jirel", 56863, "sit-tib"} + +m["jum"] = {"Jumjum", 11283696, "sdv"} + +m["jun"] = {"Juang", 33362, "mun", {"Orya"}} + +m["juo"] = {"Jiba", 6191995, "nic-jkn"} + +m["jup"] = {"Hupdë", 3143384, "sai-nad", Latn} + +m["jur"] = {"Jurúna", 4023175, "tup", Latn} + +m["jus"] = {"Jumla Sign Language", 6310991, "sgn"} + +m["jut"] = {"Jutish", 1340322, "gmq", Latn, ancestors = {"da"}} + +m["juu"] = {"Ju", 3914897} + +m["juw"] = {"Wãpha", 3914934, "nic-jkn", Latn} + +m["juy"] = {"Juray", 6314963, "mun"} + +m["jvd"] = {"Javindo", 2719893} + +m["jvn"] = {"Caribbean Javanese", 11732256, "poz-sus", Latn, ancestors = {"jv"}} + +m["jwi"] = {"Jwira-Pepesa", 35467, "alv-ctn", Latn} + +-- "jya" IS TREATED AS "sit-sit", "sit-jap", "sit-tsh", "sit-zbu", SEE WT:LT + +m["jye"] = {"Judeo-Yemeni Arabic", 56596, "sem-arb", {"Hebr"}, ancestors = {"jrb"}} + +m["jyy"] = {"Jaya", 641720, "csu-bgr", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/k.lua b/wiktra/wikt/translit/languages/data3/k.lua new file mode 100644 index 0000000..eeb3911 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/k.lua @@ -0,0 +1,1299 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Cyrl = {"Cyrl"} +local Deva = {"Deva"} +local Latn = {"Latn"} +local Latinx = {"Latinx"} + +local m = {} + +m["kaa"] = {"Karakalpak", 33541, "trk-kno", {"Latn", "Cyrl"}} + +m["kab"] = {"Kabyle", 35853, "ber", Latn} + +m["kac"] = {"Jingpho", 33332, "sit-jnp", {"Latn", "Mymr"}} + +m["kad"] = {"Kadara", 3914011, "nic-plc", Latn} + +m["kae"] = {"Ketangalan", 2779411, "map"} + +m["kaf"] = {"Katso", 246122, "tbq-lol"} + +m["kag"] = {"Kajaman", 6348863, "poz", Latn} + +m["kah"] = {"Fer", 5443742, "csu-bgr", Latn} + +m["kai"] = {"Karekare", 3438770, "cdc-wst", Latn} + +m["kaj"] = {"Jju", 35401, "nic-plc", Latn} + +m["kak"] = {"Kayapa Kallahan", 3192220, "phi", Latn} + +m["kam"] = {"Kamba", 2574767, "bnt-kka", Latn} + +m["kao"] = {"Kassonke", 36905, "dmn-wmn", Latn} + +m["kap"] = {"Bezhta", 33054, "cau-tsz", Cyrl, translit_module = "kap-translit"} + +m["kaq"] = {"Capanahua", 2937196, "sai-pan", Latn} + +m["kaw"] = { + "Old Javanese", + 49341, + "poz-sus", + {"Latn", "Java"}, + translit_module = "jv-translit" -- same as jv +} + +m["kax"] = {"Kao", 3192799} + +m["kay"] = {"Kamayurá", 3192336, "tup-gua", Latn} + +m["kba"] = {"Kalarko", 5517764, "aus-pam", Latn} + +m["kbb"] = {"Kaxuyana", 12953626, "sai-car", Latn} + +m["kbc"] = {"Kadiwéu", 18168288, "sai-guc", Latn} + +m["kbd"] = {"Kabardian", 33522, "cau-cir", Cyrl, translit_module = "kbd-translit", override_translit = true} + +m["kbe"] = {"Kanju", 10543322, "aus-pam", Latn} + +m["kbh"] = {"Camsá", 2842667, "qfa-iso", Latn} + +m["kbi"] = {"Kaptiau", 6367294, "poz-oce", Latn} + +m["kbj"] = {"Kari", 6370438, "bnt-boa", Latn} + +m["kbk"] = {"Grass Koiari", 12952642, "ngf", Latn} + +m["kbm"] = {"Iwal", 3156391, "poz-ocw", Latn} + +m["kbn"] = {"Kare (Africa)", 35554, "alv-mbm", Latn} + +m["kbo"] = {"Keliko", 11275553, "csu-mma"} + +m["kbp"] = {"Kabiyé", 35475, "nic-gne", Latn} + +m["kbq"] = {"Kamano", 11732272, "paa-kag", Latn} + +m["kbr"] = {"Kafa", 35481, "omv-gon", {"Ethi", "Latn"}} + +m["kbs"] = {"Kande", 35556, "bnt-tso", Latn} + +m["kbt"] = {"Gabadi", 3291159, "poz-ocw", Latn} + +m["kbu"] = {"Kabutra", 10966761, "inc-wes", ancestors = {"raj"}} + +m["kbv"] = {"Kamberataro", 5261289, "paa", Latn} + +m["kbw"] = {"Kaiep", 6347632, "poz-ocw", Latn} + +m["kbx"] = {"Ap Ma", 56298, "paa-ram"} + +m["kbz"] = {"Duhwa", 56295, "cdc-wst", Latn} + +m["kca"] = {"Khanty", 33563, "urj-ugr", Cyrl, translit_module = "kca-translit", override_translit = true} + +m["kcb"] = {"Kawacha", 11732302, "ngf"} + +m["kcc"] = {"Lubila", 3914381, "nic-uce", Latn} + +m["kcd"] = {"Ngkâlmpw Kanum", 12952566, "paa-yam"} + +m["kce"] = {"Kaivi", 6348685, "nic-kau"} + +m["kcf"] = {"Ukaan", 36651, "nic-bco"} + +m["kcg"] = {"Tyap", 3912765, "nic-plc"} + +m["kch"] = {"Vono", 3913920, "nic-kau"} + +m["kci"] = {"Kamantan", 3914019, "nic-plc"} + +m["kcj"] = {"Kobiana", 35609, "alv-nyn"} + +m["kck"] = {"Kalanga", 33672, "bnt-sho", Latn} + +m["kcl"] = {"Kala", 6349982, "poz-ocw", Latn} + +m["kcm"] = {"Tar Gula", 277963, "csu-bba"} + +m["kcn"] = {"Nubi", 36388, "crp", {"Latn", "Arab"}, ancestors = {"apd"}, entry_name = {from = {"Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}} + +m["kco"] = {"Kinalakna", 11732320, "ngf"} + +m["kcp"] = {"Kanga", 6362384, "qfa-kad", Latn} + +m["kcq"] = {"Kamo", 3914879, "alv-wjk"} + +m["kcr"] = {"Katla", 35688, "nic-ktl"} + +m["kcs"] = {"Koenoem", 3438755, "cdc-wst"} + +m["kct"] = {"Kaian", 6347538, "paa-ram"} + +m["kcu"] = {"Kikami", 3915212, "bnt-ruv", Latn} + +m["kcv"] = {"Kete", 3195598, "bnt-lub"} + +m["kcw"] = {"Kabwari", 6344539, "bnt-glb"} + +m["kcx"] = {"Kachama-Ganjule", 12634070, "omv-eom"} + +m["kcy"] = {"Korandje", 33427, "son"} + +m["kcz"] = {"Konongo", 11732345, "bnt-tkm", Latn} + +m["kda"] = {"Worimi", 3914062, "aus-pam", Latn} + +m["kdc"] = {"Kutu", 6448634, "bnt-ruv"} + +m["kdd"] = {"Yankunytjatjara", 34207, "aus-pam", Latn} + +m["kde"] = {"Makonde", 35172, "bnt-rvm", Latn} + +m["kdf"] = {"Mamusi", 6746036, "poz-ocw", Latn} + +m["kdg"] = {"Seba", 7442316, "bnt-sbi", Latn} + +m["kdh"] = {"Tem", 36531, "nic-gne"} + +m["kdi"] = {"Kumam", 6443410, "sdv-los"} + +m["kdj"] = {"Karamojong", 56326, "sdv-ttu", Latn} + +m["kdk"] = {"Numee", 3346774, "poz-cln"} + +m["kdl"] = {"Tsikimba", 3914404, "nic-kam"} + +m["kdm"] = {"Kagoma", 3914420, "nic-plc"} + +m["kdn"] = {"Kunda", 4121130, "bnt-sna"} + +m["kdp"] = {"Kaningdon-Nindem", 3914956, "nic-nin"} + +m["kdq"] = {"Koch", 56431, "tbq-bdg"} + +m["kdr"] = {"Karaim", 33725, "trk-kcu", {"Cyrl", "Latn", "Hebr"}} + +m["kdt"] = {"Kuy", 56310, "mkh-kat"} + +m["kdu"] = {"Kadaru", 35441, "nub-hil", Latn} + +m["kdv"] = {"Kado", 7402721, "sit-luu"} + +m["kdw"] = {"Koneraw", 11732341, "ngf"} + +m["kdx"] = {"Kam", 36753, "alv-wjk"} + +m["kdy"] = {"Keder", 6383641, "paa-tkw"} + +m["kdz"] = {"Kwaja", 11128866, "nic-nka", Latn} + +m["kea"] = {"Kabuverdianu", 35963, "crp", Latn, ancestors = {"pt"}} + +m["keb"] = {"Kélé", 35559, "bnt-kel"} + +m["kec"] = {"Keiga", 3409311, "qfa-kad", Latn} + +m["ked"] = {"Kerewe", 6393846, "bnt-haj"} + +m["kee"] = {"Eastern Keres", 15649021, "nai-ker", Latn} + +m["kef"] = {"Kpessi", 35748, "alv-gbe"} + +m["keg"] = {"Tese", 16887296, "sdv"} + +m["keh"] = {"Keak", 6382110, "paa-spk"} + +m["kei"] = {"Kei", 2410352} + +m["kej"] = {"Kadar", 6345179, "dra"} + +m["kek"] = {"Q'eqchi", 35536, "myn", Latn} + +m["kel"] = {"Kela-Yela", 6385426, "bnt-mon", Latn} + +m["kem"] = {"Kemak", 35549, "poz-tim"} + +m["ken"] = {"Kenyang", 35650, "nic-mam", Latn} + +m["keo"] = {"Kakwa", 3033547, "sdv-bri"} + +m["kep"] = {"Kaikadi", 6347757, "dra"} + +m["keq"] = {"Kamar", 14916877, "inc-eas", ancestors = {"inc-mgd"}} + +m["ker"] = {"Kera", 56251, "cdc-est", Latn} + +m["kes"] = {"Kugbo", 3813394, "nic-cde", Latn} + +m["ket"] = {"Ket", 33485, "qfa-yen", Cyrl} + +m["keu"] = {"Akebu", 35026, "alv-ktg"} + +m["kev"] = {"Kanikkaran", 6363201, "dra"} + +m["kew"] = {"Kewa", 12952619, "paa-eng", Latn} + +m["kex"] = {"Kukna", 5031131, "inc-eas", ancestors = {"bh"}} + +m["key"] = {"Kupia", 6445354, "inc-eas"} + +m["kez"] = {"Kukele", 3915391, "nic-ucn", Latn} + +m["kfa"] = {"Kodava", 33531, "dra", {"Knda"}} + +m["kfb"] = {"Kolami", 33479, "dra", Deva} + +m["kfc"] = {"Konda-Dora", 35679, "dra", {"Telu"}} + +m["kfd"] = {"Korra Koraga", 12952655, "dra", {"Knda"}} + +m["kfe"] = {"Kota (India)", 33483, "dra", {"Taml"}} + +m["kff"] = {"Koya", 33471, "dra"} + +m["kfg"] = {"Kudiya", 12952667, "dra"} + +m["kfh"] = {"Kurichiya", 12952676, "dra"} + +m["kfi"] = {"Kannada Kurumba", 56589, "dra"} + +m["kfj"] = {"Kemiehua", 27144776, "mkh-pal"} + +m["kfk"] = {"Kinnauri", 2383208, "sit-kin"} + +m["kfl"] = {"Kung", 6444510, "nic-rnc", Latn} + +m["kfn"] = {"Kuk", 6442398, "nic-rnc", Latn} + +m["kfo"] = {"Koro (West Africa)", 11160588, "dmn-mnk", {"Latn", "Nkoo"}} + +m["kfp"] = {"Korwa", 6432786, "mun"} + +m["kfq"] = {"Korku", 33715, "mun"} + +m["kfr"] = {"Kachchi", 56487, "inc-snd", {"Gujr", "sd-Arab"}, translit_module = "gu-translit", ancestors = {"inc-vra"}} + +m["kfs"] = {"Bilaspuri", 12953397, "him", {"Deva", "Takr"}, translit_module = "hi-translit"} + +m["kft"] = {"Kanjari", 12953610, "inc-pan", ancestors = {"pa"}} + +m["kfu"] = {"Katkari", 6377671, "inc-sou", ancestors = {"pmh"}} + +m["kfv"] = {"Kurmukar", 6446193, "inc-eas", ancestors = {"inc-mgd"}} + +m["kfw"] = {"Kharam Naga", 12952906, "tbq-kuk"} + +m["kfx"] = {"Kullu Pahari", 6443148, "him", Deva, translit_module = "hi-translit"} + +m["kfy"] = {"Kumaoni", 33529, "inc-pah", {"Deva", "Shrd", "Takr"}} + +m["kfz"] = {"Koromfé", 35701, "nic-gur", Latn} + +m["kga"] = {"Koyaga", 11155632, "dmn-mnk"} + +m["kgb"] = {"Kawe", 12952750, "poz-hce"} + +m["kgd"] = {"Kataang", 12953622, "mkh"} + +m["kge"] = {"Komering", 49224, "poz-lgx"} + +m["kgf"] = {"Kube", 11732359, "ngf"} + +m["kgg"] = {"Kusunda", 33630, "qfa-iso", Latn} + +m["kgi"] = {"Selangor Sign Language", 33731, "sgn"} + +m["kgj"] = {"Gamale Kham", 22236996, "sit-kha"} + +m["kgk"] = {"Kaiwá", 3111883, "tup-gua", Latn} + +m["kgl"] = {"Kunggari", 10550184, "aus-pam"} + +m["kgm"] = {"Karipúna", 6371069} + +m["kgn"] = {"Karingani", 6371041, "xme-ttc", ancestors = {"xme-ttc-nor"}} + +m["kgo"] = {"Krongo", 6438927, "qfa-kad", Latn} + +m["kgp"] = {"Kaingang", 2665734, "sai-sje", Latn} + +m["kgq"] = {"Kamoro", 6359001, "ngf"} + +m["kgr"] = {"Abun", 56657, "paa"} + +m["kgs"] = {"Kumbainggar", 3915412, "aus-pam"} + +m["kgt"] = {"Somyev", 3913354, "nic-mmb", Latn} + +m["kgu"] = {"Kobol", 11732325, "ngf-mad"} + +m["kgv"] = {"Karas", 6368621, "ngf"} + +m["kgw"] = {"Karon Dori", 56817} + +m["kgx"] = {"Kamaru", 12953604, "poz"} + +m["kgy"] = {"Kyerung", 12952691, "sit-kyk"} + +m["kha"] = {"Khasi", 33584, "aav-pkl", {"Latn", "as-Beng"}} + +m["khb"] = {"Lü", 36948, "tai-swe", {"Talu", "Lana"}, translit_module = "translit-redirect", sort_key = {from = {"[%pᪧ]", "᧞", "᧟", "([ᦵᦶᦷᦺ])([ᦀ-ᦫ])", "[᩠ᩳ-᩿]", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0x200C)}, to = {"", "ᦶᦜ", "ᦶᦜᧁ", "%2%1", "", "ᩈᩈ", "ᩁ", "ᩃ", "ᨦ", "%1ᨮ", "%1ᨻ", "ᩣ"}}, entry_name = {from = {" ", u(0x200C)}, to = {}}} + +m["khc"] = {"Tukang Besi North", 18611555, "poz"} + +m["khd"] = {"Bädi Kanum", 20888004, "paa-yam"} + +m["khe"] = {"Korowai", 6432598, "ngf"} + +m["khf"] = {"Khuen", 27144893, "mkh"} + +m["khh"] = {"Kehu", 10994953} + +m["khj"] = {"Kuturmi", 3914490, "nic-plc", Latn} + +m["khl"] = {"Lusi", 3267788, "poz-ocw", Latn} + +m["khn"] = {"Khandeshi", 33726, "inc-sou", ancestors = {"pmh"}} + +m["kho"] = {"Khotanese", 6583551, "xsc-sak", {"Brah", "Khar"}, translit_module = "Brah-translit"} + +m["khp"] = {"Kapauri", 3502575, "paa-tkw"} + +m["khq"] = {"Koyra Chiini", 33600, "son"} + +m["khr"] = {"Kharia", 3915562, "mun"} + +m["khs"] = {"Kasua", 6374863, "ngf"} + +m["kht"] = { + "Khamti", + 3915502, + "tai-swe", + {"Mymr"}, + entry_name = { + from = {u(0xFE00)}, -- VS01 + to = {""} + } +} + +m["khu"] = {"Nkhumbi", 11019169, "bnt-swb"} + +m["khv"] = {"Khvarshi", 56425, "cau-tsz", Cyrl, translit_module = "khv-translit"} + +m["khw"] = {"Khowar", 938216, "inc-dar", {"Arab"}} + +m["khx"] = {"Kanu", 12952571, "bnt-lgb"} + +m["khy"] = {"Ekele", 6385549, "bnt-ske", Latn} + +m["khz"] = {"Keapara", 12952603, "poz-ocw", Latn} + +m["kia"] = {"Kim", 35685, "alv-kim"} + +m["kib"] = {"Koalib", 35859, "alv-hei"} + +m["kic"] = {"Kickapoo", 20162127, "alg-sfk", Latn} + +m["kid"] = {"Koshin", 35632, "nic-beb", Latn} + +m["kie"] = {"Kibet", 56893} + +m["kif"] = {"Eastern Parbate Kham", 12953022, "sit-kha"} + +m["kig"] = {"Kimaama", 11732321, "ngf"} + +m["kih"] = {"Kilmeri", 6408020, "paa-brd"} + +m["kii"] = {"Kitsai", 56627, "cdd", Latn} + +m["kij"] = {"Kilivila", 3196601, "poz-ocw", Latn} + +m["kil"] = {"Kariya", 3438708, "cdc-wst"} + +m["kim"] = {"Tofa", 36848, "trk-sib", Cyrl} + +m["kio"] = {"Kiowa", 56631, "nai-kta", Latn} + +m["kip"] = {"Sheshi Kham", 12952622, "sit-kha"} + +m["kiq"] = {"Kosadle", 6432994} + +m["kis"] = {"Kis", 6416362, "poz-ocw", Latn} + +m["kit"] = {"Agob", 3332143} + +m["kiv"] = {"Kimbu", 10997740, "bnt-tkm"} + +m["kiw"] = {"Northeast Kiwai", 11732324, "paa-kiw"} + +m["kix"] = {"Khiamniungan Naga", 6401546, "sit-kch"} + +m["kiy"] = {"Kirikiri", 6415159, "paa-lkp"} + +m["kiz"] = {"Kisi", 3912772, "bnt-bki"} + +m["kja"] = {"Mlap", 6885683, "paa-nim"} + +m["kjb"] = {"Q'anjob'al", 35551, "myn", Latn} + +m["kjc"] = {"Coastal Konjo", 3198689, "poz"} + +m["kjd"] = {"Southern Kiwai", 11732322, "paa-kiw"} + +m["kje"] = {"Kisar", 3197441, "poz"} + +m["kjg"] = {"Khmu", 33335, "mkh", {"Laoo"}, sort_key = {from = {"[%pໆ]", "[່-ໍ]", "ຼ", "ຽ", "ໜ", "ໝ", "([ເແໂໃໄ])([ກ-ຮໞໟ])"}, to = {"", "", "ລ", "ຍ", "ຫນ", "ຫມ", "%2%1"}}} + +m["kjh"] = {"Khakas", 33575, "trk-sib", Cyrl, translit_module = "kjh-translit", override_translit = true} + +m["kji"] = {"Zabana", 379130, "poz-ocw", Latn} + +m["kjj"] = {"Khinalug", 35278, "cau-nec", Cyrl, translit_module = "kjj-translit", override_translit = true, entry_name = {from = {ACUTE}, to = {}}} + +m["kjk"] = {"Highland Konjo", 3198688, "poz"} + +m["kjl"] = {"Kham", 22237017, "sit-kha"} + +m["kjm"] = {"Kháng", 6403501, "mkh-pal"} + +m["kjn"] = {"Kunjen", 3200468, "aus-pmn", Latn} + +m["kjo"] = {"Harijan Kinnauri", 5657463, "him"} + +m["kjp"] = {"Eastern Pwo", 5330390, "kar", {"Mymr", "Leke", "Thai"}} + +m["kjq"] = {"Western Keres", 12645568, "nai-ker", Latn} + +m["kjr"] = {"Kurudu", 12952678, "poz-hce", Latn} + +m["kjs"] = {"East Kewa", 20050949, "paa-eng"} + +m["kjt"] = {"Phrae Pwo", 7187991, "kar", {"Thai"}} + +m["kju"] = {"Kashaya", 3193689, "nai-pom", Latn} + +m["kjx"] = {"Ramopa", 56830, "paa-nbo"} + +m["kjy"] = {"Erave", 12952416, "paa-eng"} + +m["kjz"] = {"Bumthangkha", 2786408, "sit-ebo", {"Tibt"}} + +m["kka"] = {"Kakanda", 3915342, "alv-ngb"} + +m["kkb"] = {"Kwerisa", 56881, "paa-lkp"} + +m["kkc"] = {"Odoodee", 12952987} + +m["kkd"] = {"Kinuku", 6414422, "nic-kau"} + +m["kke"] = {"Kakabe", 3913966, "dmn-mok", Latn} + +m["kkf"] = {"Kalaktang Monpa", nil, "sit-tsk"} + +m["kkg"] = {"Mabaka Valley Kalinga", 18753304, "phi"} + +m["kkh"] = {"Khün", 3545044, "tai-swe", {"Lana", "Thai"}, sort_key = {from = {"[%pᪧๆ]", "[᩠ᩳ-᩿]", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "ᩈᩈ", "ᩁ", "ᩃ", "ᨦ", "%1ᨮ", "%1ᨻ", "ᩣ", "", "%2%1"}}} + +m["kki"] = {"Kagulu", 12952537, "bnt-ruv", Latn} + +m["kkj"] = {"Kako", 35755, "bnt-kak"} + +m["kkk"] = {"Kokota", 3198399, "poz-ocw", Latn} + +m["kkl"] = {"Kosarek Yale", 6432995, "ngf"} + +m["kkm"] = {"Kiong", 6414512, "nic-ucr", Latn} + +m["kkn"] = {"Kon Keu", 6428686, "mkh-pal"} + +m["kko"] = {"Karko", 35529, "nub-hil"} + +m["kkp"] = {"Koko-Bera", 6426699, "aus-pmn", Latn} + +m["kkq"] = {"Kaiku", 6347840, "bnt-kbi", Latn} + +m["kkr"] = {"Kir-Balar", 3440527, "cdc-wst", Latn} + +m["kks"] = {"Kirfi", 56242, "cdc-wst", Latn} + +m["kkt"] = {"Koi", 6426194, "sit-kiw"} + +m["kku"] = {"Tumi", 3913934, "nic-kau"} + +m["kkv"] = {"Kangean", 2071325} + +m["kkw"] = {"Teke-Kukuya", 36560, "bnt-tek"} + +m["kkx"] = {"Kohin", 6425997, "poz-brw"} + +m["kky"] = {"Guugu Yimidhirr", 56543, "aus-pam", Latn} + +m["kkz"] = {"Kaska", 20823, "ath-nor", Latn} + +m["kla"] = {"Klamath-Modoc", 2669248, "nai-plp", Latn} + +m["klb"] = {"Kiliwa", 3182593, "nai-yuc", Latn} + +m["klc"] = {"Kolbila", 6427122, "alv-lek"} + +m["kld"] = {"Gamilaraay", 3111818, "aus-cww", Latn} + +m["kle"] = {"Kulung", 6443304, "sit-kic"} + +m["klf"] = {"Kendeje", 56895} + +m["klg"] = {"Tagakaulu Kalagan", 18756514, "phi"} + +m["klh"] = {"Weliki", 7981017, "ngf-fin", Latn} + +m["kli"] = {"Kalumpang", 13561407, "poz"} + +m["klj"] = {"Khalaj", 33455, "trk"} + +m["klk"] = {"Kono (Nigeria)", 6429589, "nic-kau", Latn} + +m["kll"] = {"Kagan Kalagan", 18748913, "phi"} + +m["klm"] = {"Kolom", 6844970, "ngf-mad", Latn} + +m["kln"] = {"Kalenjin", 637228, "sdv-nma", Latn} + +m["klo"] = {"Kapya", 6367410, "nic-ykb"} + +m["klp"] = {"Kamasa", 6356107, "ngf"} + +m["klq"] = {"Rumu", 7379420, "ngf"} + +m["klr"] = {"Khaling", 56381, "sit-kiw"} + +m["kls"] = {"Kalasha", 33416, "inc-dar", {"Latn", "ks-Arab"}} + +m["klt"] = {"Nukna", 7068874, "ngf-fin", Latn} + +m["klu"] = {"Klao", 3914866, "kro-wkr"} + +m["klv"] = {"Maskelynes", 3297282, "poz-vnc", Latn} + +m["klw"] = {"Lindu", 18390055, "poz-kal"} + +m["klx"] = {"Koluwawa", 6427954, "poz-ocw", Latn} + +m["kly"] = {"Kalao", 6350643, "poz"} + +m["klz"] = {"Kabola", 11732258, "qfa-tap"} + +m["kma"] = {"Konni", 35680, "nic-buk"} + +m["kmb"] = {"Kimbundu", 35891, "bnt-kmb", Latn} + +m["kmc"] = {"Southern Kam", 35379, "qfa-kms", Latn} + +m["kmd"] = {"Madukayang Kalinga", 18753305, "phi"} + +m["kme"] = {"Bakole", 35068, "bnt-kpw"} + +m["kmf"] = {"Kare (New Guinea)", 11732286, "ngf-mad", Latn} + +m["kmg"] = {"Kâte", 3201059, "ngf"} + +m["kmh"] = {"Kalam", 12952550, "ngf-mad"} + +m["kmi"] = {"Kami", 3915372, "alv-ngb", Latn} + +m["kmj"] = {"Kumarbhag Paharia", 3130374, "dra", {"Beng", "Deva"}} + +m["kmk"] = {"Limos Kalinga", 18753303, "phi"} + +m["kml"] = {"Tanudan Kalinga", 18753307, "phi", Latn} + +m["kmm"] = {"Kom (India)", 12952647, "tbq-kuk"} + +m["kmn"] = {"Awtuw", 3504217, "paa-spk"} + +m["kmo"] = {"Kwoma", 11732376, "paa-spk"} + +m["kmp"] = {"Gimme", 11152236, "alv-dur"} + +m["kmq"] = {"Kwama", 2591184, "ssa-kom"} + +m["kmr"] = {"Northern Kurdish", 36163, "ku", {"Latn", "Cyrl", "Armn", "ku-Arab"}, translit_module = "translit-redirect", entry_name = {from = {"'"}, to = {"’"}}, wikimedia_codes = {"ku"}, ancestors = {"ku-pro"}} + +m["kms"] = {"Kamasau", 6356117, "qfa-tor", Latn} + +m["kmt"] = {"Kemtuik", 6387179, "paa-nim"} + +m["kmu"] = {"Kanite", 12952567, "paa-kag"} + +m["kmv"] = {"Karipúna Creole French", 2523999, "crp", ancestors = {"fr"}} + +m["kmw"] = {"Kumu", 6428450, "bnt-kbi", Latn} + +m["kmx"] = {"Waboda", 7958705, "paa-kiw"} + +m["kmy"] = {"Koma", 35634, "alv-dur"} + +m["kmz"] = {"Khorasani Turkish", 35373, "trk-ogz", ancestors = {"trk-oat"}} + +m["kna"] = {"Kanakuru", 56811, "cdc-wst", Latn} + +m["knb"] = {"Lubuagan Kalinga", 12953602, "phi"} + +m["knd"] = {"Konda", 11732340, "ngf-sbh", Latn} + +m["kne"] = {"Kankanaey", 18753329, "phi", Latn} + +m["knf"] = {"Mankanya", 35789, "alv-pap"} + +m["kni"] = {"Kanufi", 3913297, "nic-nin", Latn} + +m["knj"] = {"Akatek", 34923, "myn", Latn} + +m["knk"] = {"Kuranko", 3198896, "dmn-mok", Latn} + +m["knl"] = {"Keninjal", 6389309, "poz-mly"} + +m["knm"] = { -- two unrelated lects have this name; this is the Katukinian one + "Kanamari", 3438373, "sai-ktk", Latn +} + +m["kno"] = {"Kono (Sierra Leone)", 35675, "dmn-vak"} + +m["knp"] = {"Kwanja", 35641, "nic-mmb", Latn} + +m["knq"] = {"Kintaq", 6414335, "mkh-asl"} + +m["knr"] = {"Kaningra", 6363253, "paa-spk"} + +m["kns"] = {"Kensiu", 6391529, "mkh-asl"} + +m["knt"] = {"Katukina", 3194265, "sai-pan", Latn} + +m["knu"] = { -- a dialect of 'kpe' + "Kono (Guinea)", + 3198703, + "dmn-msw", + Latn, + ancestors = {"kpe"} +} + +m["knv"] = {"Tabo", 7959888, "aav"} + +m["knx"] = {"Kendayan", 6388963, "poz-mly", Latn} + +m["kny"] = {"Kanyok", 11110766, "bnt-lub"} + +m["knz"] = {"Kalamsé", 3914000, "nic-gnn"} + +m["koa"] = {"Konomala", 3198732, "poz-ocw", Latn} + +m["koc"] = {"Kpati", 3913279, "nic-nge", Latn} + +m["kod"] = {"Kodi", 4577633} + +m["koe"] = {"Kacipo-Balesi", 5364424, "sdv"} + +m["kof"] = {"Kubi", 3438718, "cdc-wst", Latn} + +m["kog"] = {"Cogui", 3198286, "cba"} + +m["koh"] = {"Koyo", 35649, "bnt-mbo", Latn} + +m["koi"] = {"Komi-Permyak", 56318, "urj-prm", {"Cyrl", "Perm"}, translit_module = "kv-translit", override_translit = true} + +m["kok"] = {"Konkani", 34239, "inc-sou", {"Deva", "Knda", "Mlym", "fa-Arab", "Latn"}, ancestors = {"pmh"}, translit_module = "mr-translit"} + +m["kol"] = {"Kol (New Guinea)", 4227542} + +m["koo"] = {"Konzo", 2361829, "bnt-glb"} + +m["kop"] = {"Waube", 11732373, "ngf-mad"} + +m["koq"] = {"Kota (Gabon)", 35607, "bnt-kel", Latn} + +m["kos"] = {"Kosraean", 33464, "poz-mic", Latn} + +m["kot"] = {"Lagwan", 3502264, "cdc-cbm", Latn} + +m["kou"] = {"Koke", 797249, "alv-bua"} + +m["kov"] = {"Kudu-Camo", 3915850, "nic-jer"} + +m["kow"] = {"Kugama", 3913307, "alv-mye"} + +m["koy"] = {"Koyukon", 28304, "ath-nor", Latn} + +m["koz"] = {"Korak", 6431365, "ngf-mad"} + +m["kpa"] = {"Kutto", 3437656, "cdc-wst"} + +m["kpb"] = {"Mullu Kurumba", 19573111, "dra"} + +m["kpc"] = {"Curripaco", 2882543, "awd-nwk", Latn} + +m["kpd"] = {"Koba", 6424249, "poz"} + +m["kpe"] = {"Kpelle", 35673, "dmn-msw", Latn} + +m["kpf"] = {"Komba", 6428239, "ngf"} + +m["kpg"] = {"Kapingamarangi", 35771, "poz-pnp", Latn} + +m["kph"] = {"Kplang", 35628, "alv-gng"} + +m["kpi"] = {"Kofei", 6425665, "paa-egb"} + +m["kpj"] = {"Karajá", 10322066, "sai-mje", Latn} + +m["kpk"] = {"Kpan", 3915380, "nic-jkn", Latn} + +m["kpl"] = {"Kpala", 11154769, "nic-nkk", Latn} + +m["kpm"] = {"Koho", 3511919, "mkh-ban", Latn} + +m["kpn"] = {"Kepkiriwát", 3195366, "tup", Latn} + +m["kpo"] = {"Ikposo", 35029, "alv-ktg", Latn} + +m["kpp"] = {"Paku Karen", nil} + +m["kpq"] = {"Korupun-Sela", 6432769, "ngf"} + +m["kpr"] = {"Korafe-Yegha", 11732347, "ngf"} + +m["kps"] = {"Tehit", 7694851} + +m["kpt"] = {"Karata", 56636, "cau-ava", Cyrl} + +m["kpu"] = {"Kafoa", 6346151, "qfa-tap"} + +m["kpv"] = {"Komi-Zyrian", 34114, "urj-prm", Cyrl, translit_module = "kv-translit", override_translit = true, wikimedia_codes = {"kv"}} + +m["kpw"] = {"Kobon", 11732326, "ngf-mad"} + +m["kpx"] = {"Mountain Koiari", 6925030, "ngf"} + +m["kpy"] = {"Koryak", 36199, "qfa-cka", Cyrl} + +m["kpz"] = {"Kupsabiny", 56445, "sdv-kln"} + +m["kqa"] = {"Mum", 6935252, "ngf-mad"} + +m["kqb"] = {"Kovai", 6434822, "ngf"} + +m["kqc"] = {"Doromu-Koki", 5298175, "ngf"} + +m["kqd"] = {"Koy Sanjaq Surat", 33463, "sem-nna"} + +m["kqe"] = {"Kalagan", 18748906, "phi"} + +m["kqf"] = {"Kakabai", 6349119, "poz-ocw", Latn} + +m["kqg"] = {"Khe", 3914015, "nic-gur"} + +m["kqh"] = {"Kisankasa", 6416409, "sdv"} + +m["kqi"] = {"Koitabu", 6426363, "ngf"} + +m["kqj"] = {"Koromira", 6432520, "paa-sbo"} + +m["kqk"] = {"Kotafon Gbe", 12952447, "alv-pph"} + +m["kql"] = {"Kyenele", 11732453, "paa-yua"} + +m["kqm"] = {"Khisa", 3913955, "nic-gur"} + +m["kqn"] = {"Kaonde", 33601, "bnt-lub", Latn} + +m["kqo"] = {"Eastern Krahn", 3915374, "kro-wee"} + +m["kqp"] = {"Kimré", 3441210, "cdc-est"} + +m["kqq"] = {"Krenak", 6436747, "sai-cer"} + +m["kqr"] = {"Kimaragang", 3196845, "poz-san", Latn} + +m["kqs"] = {"Northern Kissi", 19921576, "alv-kis"} + +m["kqt"] = {"Klias River Kadazan", 12953594, "poz-san"} + +m["kqu"] = {"Seroa", 33127766, "khi-tuu"} + +m["kqv"] = {"Okolod", 7082487, "poz-san"} + +m["kqw"] = {"Kandas", 3192590, "poz-ocw", Latn} + +m["kqx"] = {"Mser", 3502347, "cdc-cbm"} + +m["kqy"] = {"Koorete", 6430753, "omv-eom"} + +m["kqz"] = {"Korana", 2756709, "khi-khk", Latinx} + +m["kra"] = {"Kumhali", 13580783, "inc-eas", ancestors = {"bh"}} + +m["krb"] = {"Karkin", 3193345, "nai-you", Latn} + +m["krc"] = {"Karachay-Balkar", 33714, "trk-kcu", Cyrl, translit_module = "krc-translit"} + +m["krd"] = {"Kairui-Midiki", 12953277, "poz-tim"} + +m["kre"] = {"Panará", 3361895, "sai-cer"} + +m["krf"] = {"Koro (Vanuatu)", 3198995, "poz-oce", Latn} + +m["krh"] = {"Kurama", 35593, "nic-kau"} + +m["kri"] = {"Krio", 35744, "crp", Latn, ancestors = {"en"}} + +m["krj"] = {"Kinaray-a", 33720, "phi", Latn} + +m["krk"] = {"Kerek", 332792, "qfa-cka", Cyrl} + +m["krl"] = {"Karelian", 33557, "fiu-fin", Latn} + +m["krm"] = {"Krim", 35713, "alv"} + +m["krn"] = {"Sapo", 3915386, "kro-wee"} + +m["krp"] = {"Korop", 35626, "nic-ucr", Latn} + +m["krr"] = {"Kru'ng", 12953650, "mkh-ban"} + +m["krs"] = {"Kresh", 56674, "csu-bkr"} + +m["kru"] = {"Kurukh", 33492, "dra", Deva} + +m["krv"] = {"Kavet", 12953649, "sai-ktk", Latn} + +m["krw"] = {"Western Krahn", 10975611, "kro-wee"} + +m["krx"] = {"Karon", 35704, "alv-jol"} + +m["kry"] = {"Kryts", 35861, "cau-lzg"} + +m["krz"] = {"Sota Kanum", 12952568, "paa-yam"} + +m["ksa"] = {"Shuwa-Zamani", 3913929, "nic-kau"} + +m["ksb"] = {"Shambala", 3788739, "bnt-seu", Latn} + +m["ksc"] = {"Southern Kalinga", 18753301, "phi"} + +m["ksd"] = {"Tolai", 35870, "poz-ocw", Latn} + +m["kse"] = {"Kuni", 6444619, "poz-ocw", Latn} + +m["ksf"] = {"Bafia", 34930, "bnt-baf"} + +m["ksg"] = {"Kusaghe", 3200638, "poz-ocw", Latn} + +m["ksi"] = {"Krisa", 841704, "paa-msk", Latn} + +m["ksj"] = {"Uare", 6450052, "ngf"} + +m["ksk"] = {"Kansa", 3192772, "sio-dhe"} + +m["ksl"] = {"Kumalu", 17584381, "poz-ocw", Latn} + +m["ksm"] = {"Kumba", 3913972, "alv-mye"} + +m["ksn"] = {"Kasiguranin", 6374525, "phi"} + +m["kso"] = {"Kofa", 56278, "cdc-cbm"} + +m["ksp"] = {"Kaba", 3915316, "csu-sar"} + +m["ksq"] = {"Kwaami", 3440525, "cdc-wst"} + +m["ksr"] = {"Borong", 4946263, "ngf"} + +m["kss"] = {"Southern Kissi", 11028974, "alv-kis"} + +m["kst"] = {"Winyé", 3913360, "nic-gnw"} + +m["ksu"] = {"Khamyang", 6583541, "tai-swe"} + +m["ksv"] = {"Kusu", 6448199, "bnt-tet"} + +m["ksw"] = {"S'gaw Karen", 56410, "kar", {"Mymr"}, translit_module = "ksw-translit"} + +m["ksx"] = {"Kedang", 6382520, "poz", Latn} + +m["ksy"] = {"Kharia Thar", 6400661, "inc-eas", ancestors = {"inc-mgd"}} + +m["ksz"] = {"Kodaku", 21179986, "mun"} + +m["kta"] = {"Katua", 6378404, "mkh-ban"} + +m["ktb"] = {"Kambaata", 35664, "cus"} + +m["ktc"] = {"Kholok", 3440464, "cdc-wst"} + +m["ktd"] = {"Kokata", 10547021, "aus-pam"} + +m["ktf"] = {"Kwami", 12952687, "bnt-lgb"} + +m["ktg"] = {"Kalkatungu", 3914057, "aus-pam", Latn} + +m["kth"] = {"Karanga", 713643} + +m["kti"] = {"North Muyu", 20857698, "ngf", Latn} + +m["ktj"] = {"Plapo Krumen", 10975356, "kro-grb"} + +m["ktk"] = {"Kaniet", 3399050, "poz-aay", Latn} + +m["ktl"] = {"Koroshi", 3775265, "ira-nwi", ancestors = {"bal"}} + +m["ktm"] = {"Kurti", 3200615, "poz-aay", Latn} + +m["ktn"] = {"Karitiâna", 3112184, "tup", Latn} + +m["kto"] = {"Kuot", 56537} + +m["ktp"] = {"Kaduo", 769809, "tbq-lol"} + +m["ktq"] = {"Katabaga", 3193895} + +m["ktr"] = {"Kota Marudu Tinagas", 18642280} + +m["kts"] = {"South Muyu", 42308820, "ngf", Latn} + +m["ktt"] = {"Ketum", 12952616, "ngf"} + +m["ktu"] = {"Kituba", 35746, "crp", Latn, ancestors = {"kg"}} + +m["ktv"] = {"Eastern Katu", 22808951, "mkh-kat"} + +m["ktw"] = {"Kato", 20831, "ath-pco", Latn} + +m["ktx"] = {"Kaxararí", 6380124, "sai-pan", Latn} + +m["kty"] = {"Kango", 6362818, "bnt-bta", Latn} + +m["ktz"] = {"Juǀ'hoan", 1192295, "khi-kxa", Latn} + +m["kub"] = {"Kutep", 35645, "nic-jkn"} + +m["kuc"] = {"Kwinsu", 6450460, "paa-tkw"} + +m["kud"] = {"Auhelawa", 5166, "poz-ocw", Latn} + +m["kue"] = {"Kuman", 137525, "ngf", Latn} + +m["kuf"] = {"Western Katu", 6378400, "mkh-kat", {"Laoo", "Tale"}} + +m["kug"] = {"Kupa", 3915336, "alv-ngb"} + +m["kuh"] = {"Kushi", 3438747, "cdc-wst"} + +m["kui"] = {"Kuikúro", 3915522, "sai-car", Latn} + +m["kuj"] = {"Kuria", 6445968, "bnt-lok", Latn} + +m["kuk"] = {"Kepo'", 6393217, "poz"} + +m["kul"] = {"Kulere", 3440506, "cdc-wst"} + +m["kum"] = {"Kumyk", 36209, "trk-kcu", Cyrl, translit_module = "kum-translit"} + +m["kun"] = {"Kunama", 36041} + +m["kuo"] = {"Kumukio", 11732362, "ngf"} + +m["kup"] = {"Kunimaipa", 6444696} + +m["kuq"] = {"Karipuna", 6371071, "tup-gua", Latn} + +m["kus"] = {"Kusaal", 35708, "nic-dag", Latn} + +m["kut"] = {"Kutenai", 33434, "qfa-iso"} + +m["kuu"] = {"Upper Kuskokwim", 28062, "ath-nor"} + +m["kuv"] = {"Kur", 12635082, "poz-cma", Latn} + +m["kuw"] = {"Kpagua", 11137573, "bad-cnt"} + +m["kux"] = {"Kukatja", 10549839, "aus-pam"} + +m["kuy"] = {"Kuuku-Ya'u", 10550697, "aus-pmn"} + +m["kuz"] = {"Kunza", 2669181, "qfa-iso"} + +m["kva"] = {"Bagvalal", 56638, "cau-ava"} + +m["kvb"] = {"Kubu", 6441341, "poz-mly"} + +m["kvc"] = {"Kove", 3199402, "poz-ocw", Latn} + +m["kvd"] = {"Kui (Indonesia)", 6442230, "ngf"} + +m["kve"] = {"Kalabakan", 6350003, "poz-san"} + +m["kvf"] = {"Kabalai", 3440427, "cdc-est"} + +m["kvg"] = {"Kuni-Boazi", 2907551, "ngf"} + +m["kvh"] = {"Komodo", 3198565, "poz-cet"} + +m["kvi"] = {"Kwang", 3440398, "cdc-est", Latn} + +m["kvj"] = {"Psikye", 56304, "cdc-cbm"} + +m["kvk"] = {"Korean Sign Language", 3073428, "sgn-jsl"} + +m["kvl"] = {"Brek Karen", 12952577, "kar"} + +m["kvm"] = {"Kendem", 35751, "nic-mam", Latn} + +m["kvn"] = {"Border Kuna", 31777873, "cba"} + +m["kvo"] = {"Dobel", 5286559, "poz"} + +m["kvp"] = {"Kompane", 18343041, "poz"} + +m["kvq"] = {"Geba Karen", 12952581, "kar"} + +m["kvr"] = {"Kerinci", 3195442, "poz-mly"} + +m["kvt"] = {"Lahta Karen", 12952582, "kar"} + +m["kvu"] = {"Yinbaw Karen", 14426328, "kar"} + +m["kvv"] = {"Kola", 6426967, "poz"} + +m["kvw"] = {"Wersing", 7983599, "qfa-tap"} + +m["kvx"] = {"Parkari Koli", 3244176, "inc-wes"} + +m["kvy"] = {"Yintale Karen", 14426329, "kar"} + +m["kvz"] = {"Tsakwambo", 7849438, "ngf"} + +m["kwa"] = {"Dâw", 3042278, "sai-nad"} + +m["kwb"] = {"Baa", 34842, "alv-ada"} + +m["kwc"] = {"Likwala", 35597, "bnt-mbo"} + +m["kwd"] = {"Kwaio", 3200796, "poz-sls", Latn} + +m["kwe"] = {"Kwerba", 6450328, "paa-tkw"} + +m["kwf"] = {"Kwara'ae", 3200829, "poz-oce"} + +m["kwg"] = {"Sara Kaba Deme", 3915384, "csu-kab"} + +m["kwh"] = {"Kowiai", 6435028, "poz"} + +m["kwi"] = {"Awa-Cuaiquer", 2603103, "sai-bar", Latn} + +m["kwj"] = {"Kwanga", 3438383, "paa-spk"} + +m["kwk"] = {"Kwak'wala", 2640628, "wak", Latn} + +m["kwl"] = {"Kofyar", 3441382, "cdc-wst", Latn} + +m["kwm"] = {"Kwambi", 3487165, "bnt-ova"} + +m["kwn"] = {"Kwangali", 36334, "bnt-kav", Latn} + +m["kwo"] = {"Kwomtari", 3508116} + +m["kwp"] = {"Kodia", 3914867, "kro-ekr"} + +m["kwq"] = {"Kwak", 11014183, "nic-nka", ancestors = {"yam"}} + +m["kwr"] = {"Kwer", 12635137, "ngf-okk"} + +m["kws"] = {"Kwese", 3200846, "bnt-pen"} + +m["kwt"] = {"Kwesten", 6450354, "paa-tkw"} + +m["kwu"] = {"Kwakum", 35624, "bnt-kak"} + +m["kwv"] = {"Sara Kaba Náà", 3915361, "csu-kab"} + +m["kww"] = {"Kwinti", 721182} + +m["kwx"] = {"Khirwar", 12976968, "dra"} + +m["kwz"] = {"Kwadi", 2364661, "khi-kkw", Latn} + +m["kxa"] = {"Kairiru", 3398785, "poz-ocw", Latn} + +m["kxb"] = {"Krobu", 35586, "alv-ptn", Latn} + +m["kxc"] = {"Khonso", 56624, "cus"} + +m["kxd"] = {"Brunei Malay", 3182878, "poz-mly", Latn} + +m["kxe"] = {"Kakihum", 3914433, "nic-kam", ancestors = {"tvd"}} + +m["kxf"] = {"Manumanaw Karen", 12952592, "kar", {"Mymr", "Latn"}} + +m["kxh"] = {"Karo", 3447116, "omv-aro"} + +m["kxi"] = {"Keningau Murut", 6389308, "poz-san", Latn} + +m["kxj"] = {"Kulfa", 713654, "csu-kab"} + +m["kxk"] = {"Zayein Karen", 14352960, "kar"} + +m["kxl"] = {"Nepali Kurux", 3200624, "dra", Deva} + +m["kxm"] = {"Northern Khmer", 3502234, "mkh-kmr", {"Thai", "Khmr"}, ancestors = {"mkh-mkm"}, sort_key = {from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["kxn"] = {"Kanowit", 6364300, "poz-bnn", Latn} + +m["kxo"] = {"Kanoé", 4356223, "qfa-iso"} + +m["kxp"] = {"Wadiyara Koli", 12953645, "inc-wes"} + +m["kxq"] = {"Smärky Kanum", 12952569, "paa-yam"} + +m["kxr"] = {"Koro (New Guinea)", 3198994, "poz-oce", Latn} + +m["kxs"] = {"Kangjia", 3182570, "xgn"} + +m["kxt"] = {"Koiwat", 6426388, "paa-spk"} + +m["kxu"] = {"Kui (India)", 33919, "dra", {"Orya"}} + +m["kxv"] = {"Kuvi", 3200721, "dra", {"Orya"}} + +m["kxw"] = {"Konai", 11732339} + +m["kxx"] = {"Likuba", 35646, "bnt-bmo"} + +m["kxy"] = {"Kayong", 6380673, "mkh"} + +m["kxz"] = {"Kerewo", 6393847, "paa-kiw"} + +m["kya"] = {"Kwaya", 6450276, "bnt-haj", Latn} + +m["kyb"] = {"Butbut Kalinga", 18753300, "phi"} + +m["kyc"] = {"Kyaka", 12952690, "paa-eng"} + +m["kyd"] = {"Karey", 6370196, "poz"} + +m["kye"] = {"Krache", 35658, "alv-gng"} + +m["kyf"] = {"Kouya", 35595, "kro-bet"} + +m["kyg"] = {"Keyagana", 6398208, "paa-kag"} + +m["kyh"] = {"Karok", 1288440, "qfa-iso", Latn} + +m["kyi"] = {"Kiput", 3038653, "poz-swa", Latn} + +m["kyj"] = {"Karao", 3192950, "phi"} + +m["kyk"] = {"Kamayo", 3192339, "phi"} + +m["kyl"] = {"Kalapuya", 3192120, "nai-klp"} + +m["kym"] = {"Kpatili", 3913982, "znd"} + +m["kyn"] = {"Karolanos", 6373093, "phi"} + +m["kyo"] = {"Kelon", 6386414, "ngf"} + +m["kyp"] = {"Kang", 25559558, "tai"} + +m["kyq"] = {"Kenga", 35707, "csu-bgr"} + +m["kyr"] = {"Kuruáya", 3200633, "tup", Latn} + +m["kys"] = {"Baram Kayan", 2883794, "poz"} + +m["kyt"] = {"Kayagar", 6380394, "ngf"} + +m["kyu"] = {"Western Kayah", 12952596, "kar", {"Kali", "Mymr", "Latn"}, translit_module = "translit-redirect"} + +m["kyv"] = {"Kayort", 6380675, "inc-eas", {"as-Beng"}, ancestors = {"inc-mgd"}} + +m["kyw"] = {"Kudmali", 6446173, "inc-eas", ancestors = {"bh"}} + +m["kyx"] = {"Rapoisi", 7294279, "paa-nbo"} + +m["kyy"] = {"Kambaira", 6356254, "paa-kag"} + +m["kyz"] = {"Kayabí", 6380372, "tup-gua", Latn} + +m["kza"] = {"Western Karaboro", 36601, "alv-krb"} + +m["kzb"] = {"Kaibobo", 6347565, "poz-cma"} + +m["kzc"] = {"Bondoukou Kulango", 11031321, "alv-kul"} + +m["kzd"] = {"Kadai", 7679471, "poz-cma", Latn} + +m["kze"] = {"Kosena", 12952663, "ngf", Latn} + +m["kzf"] = {"Da'a Kaili", 33103997, "poz-kal", Latn} + +m["kzg"] = {"Kikai", 3196527, "jpx-ryu", {"Jpan"}} + +m["kzh"] = {"Dongolawi", 5295991, "nub", Latn} + +m["kzi"] = {"Kelabit", 6385445, "poz-swa", Latn} + +m["kzj"] = {"Coastal Kadazan", 3307195, "poz-san", Latn} + +m["kzk"] = {"Kazukuru", 1089069, "poz-ocw"} + +m["kzl"] = {"Kayeli", 4207444, "poz-cma", Latn} + +m["kzm"] = {"Kais", 6348319, "paa", Latn} + +m["kzn"] = {"Kokola", 11128329, "bnt-mak", Latn, ancestors = {"vmw"}} + +m["kzo"] = {"Kaningi", 35683, "bnt-mbt"} + +m["kzp"] = {"Kaidipang", 6347611, "phi"} + +m["kzq"] = {"Kaike", 10951226, "sit-tam"} + +m["kzr"] = {"Karang", 35681, "alv-mbm", Latn} + +m["kzs"] = {"Sugut Dusun", 12953510, "poz-san", Latn} + +m["kzt"] = {"Tambunan Dusun", 12953514, "poz-san", Latn} + +m["kzu"] = {"Kayupulau", 6380723, "poz-ocw"} + +m["kzv"] = {"Komyandaret", 6428671, "ngf-okk", Latn} + +m["kzw"] = { -- contrast xoo, sai-kat, sai-xoc, the last of which the ISO conflated into this code + "Kariri", 12953620, "sai-mje", Latn +} + +m["kzx"] = {"Kamarian", 6356040, "poz-cma", Latn} + +m["kzy"] = {"Kango-Sua", 11008360, "bnt-kbi", Latn, ancestors = {"bip"}} + +m["kzz"] = {"Kalabra", 6350038, "paa", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/l.lua b/wiktra/wikt/translit/languages/data3/l.lua new file mode 100644 index 0000000..61c4ae4 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/l.lua @@ -0,0 +1,682 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["laa"] = {"Lapuyan Subanun", 12635302, "phi"} + +m["lab"] = {"Linear A", nil} + +m["lac"] = {"Lacandon", 35766, "myn"} + +m["lad"] = {"Ladino", 36196, "roa-ibe", {"Hebr", "Latn", "Cyrl"}, ancestors = {"osp"}} + +m["lae"] = {"Pattani", 7148323, "sit-whm"} + +m["laf"] = {"Lafofa", 35711, "alv"} + +m["lag"] = {"Langi", 584983, "bnt-mra"} + +m["lah"] = {"Lahnda", 1334774, "inc-pan", {"pa-Arab"}, ancestors = {"inc-tak"}} + +m["lai"] = {"Lambya", 6481626, "bnt-mby"} + +m["laj"] = {"Lango (Uganda)", 35670, "sdv-los", Latn} + +m["lak"] = { + "Laka", 6474529, -- also Q55616620 + "csu-sar" -- formerly classified as "alv-mbm"; see [[w:Lau Laka language]] +} + +m["lam"] = {"Lamba", 36098, "bnt-sbi", Latn} + +m["lan"] = {"Laru", 3913987, "nic-knj", Latn} + +m["lap"] = {"Kabba-Laka", 6474528, "csu-sar"} + +m["laq"] = {"Qabiao", 3436700, "qfa-kra"} + +m["lar"] = {"Larteh", 35639, "alv-gng", Latn} + +m["las"] = {"Gur Lama", 35652, "nic-gne", Latn} + +m["lau"] = {"Laba", 12952694} + +m["law"] = {"Lauje", 6498258, "poz", Latn} + +m["lax"] = {"Tiwa", 7810466, "tbq-bdg", Latn} + +m["lay"] = {"Lama Bai", 6480756, "zhx-gba"} + +m["laz"] = {"Aribwatsa", 3502104, "poz-ocw", Latn} + +m["lbb"] = {"Label", 3214296, "poz-ocw", Latn} + +m["lbc"] = {"Lakkia", 3027879, "qfa-tak"} + +m["lbe"] = {"Lak", 36206, "cau-nec", {"Cyrl"}, translit_module = "lbe-translit", override_translit = true} + +m["lbf"] = {"Tinani", 784502, "sit-whm"} + +m["lbg"] = {"Laopang", 12952711, "tbq-lol"} + +m["lbi"] = {"La'bi", 6460637, "alv-mbm"} + +m["lbj"] = {"Ladakhi", 35833, "sit-lab", translit_module = "bo-translit"} + +m["lbk"] = {"Central Bontoc", nil, "phi", Latn} + +m["lbl"] = {"Libon Bikol", 18664462, "phi"} + +m["lbm"] = {"Lodhi", 6666374, "mun"} + +m["lbn"] = {"Lamet", 3216723, "mkh-pal"} + +m["lbo"] = {"Laven", 6298648, "mkh-ban", Latn} + +m["lbq"] = {"Wampar", 7966946, "poz-ocw", Latn} + +m["lbr"] = {"Northern Lorung", 6668040, "sit-kie"} + +m["lbs"] = {"Libyan Sign Language", 11775688, "sgn"} + +m["lbt"] = {"Lachi", 6583606, "qfa-kra"} + +m["lbu"] = {"Labu", 6467660, "poz-ocw", Latn} + +m["lbv"] = {"Lavatbura-Lamusong", 2405981, "poz-ocw", Latn} + +m["lbw"] = {"Tolaki", 3033597, "poz-btk", Latn} + +m["lbx"] = {"Lawangan", 3120345, "poz-bre", Latn} + +m["lby"] = {"Lamu-Lamu", 6482727, nil, Latn} + +m["lbz"] = {"Lardil", 3915688, "aus-tnk", Latn} + +m["lcc"] = {"Legenyem", 12952713, "poz-hce", Latn} + +m["lcd"] = {"Lola", 6668867, "poz-cet", Latn} + +m["lce"] = {"Loncong", 3058192} + +m["lcf"] = {"Lubu", 3264685} + +m["lch"] = {"Luchazi", 3265143, "bnt-clu"} + +m["lcl"] = {"Lisela", 6558753, "poz-cma", Latn} + +m["lcm"] = {"Tungag", 3542085, "poz-ocw", Latn} + +m["lcp"] = {"Western Lawa", 18644465, "mkh-pal", {"Thai"}, sort_key = {from = {"[%p]", "[็-๎]", "([เแโใไ])(ʼ?[ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["lcq"] = {"Luhu", 6699890, "poz-cma", Latn} + +m["lcs"] = {"Lisabata-Nuniali", 6558534} + +m["lda"] = {"Kla", nil, "dmn-mda", Latn} + +m["ldb"] = {"Idun", 3914441, "nic-plc", Latn} + +m["ldd"] = {"Luri (Nigeria)", 4701277, "cdc-wst"} + +m["ldg"] = {"Lenyima", 3914423, "nic-uce", Latn} + +m["ldh"] = {"Lamja-Dengsa-Tola", 11001739, "nic-dak"} + +m["ldj"] = {"Lemoro", 3912761, "nic-jer"} + +m["ldk"] = {"Leelau", 3914465, "alv-bwj"} + +m["ldl"] = {"Kaan", 3914501, "alv-yun"} + +m["ldm"] = {"Landoma", 35568, "alv-mel"} + +m["ldn"] = {"Láadan", 35757, "art", Latn, type = "appendix-constructed"} + +m["ldo"] = {"Loo", 3915378, "alv-bwj"} + +m["ldp"] = {"Tso", 3913953, "alv-wjk"} + +m["ldq"] = {"Lufu", 35796, "nic-ykb", Latn} + +m["lea"] = {"Lega-Shabunda", 12952719, "bnt-lgb"} + +m["leb"] = {"Lala-Bisa", 6480112, "bnt-sbi"} + +m["lec"] = {"Leco", 2625398, "qfa-iso"} + +m["led"] = {"Lendu", 523823, "csu-lnd", Latn} + +m["lee"] = {"Lyélé", 3089032, "nic-gnn"} + +m["lef"] = {"Lelemi", 35585, "alv-ntg"} + +m["leh"] = {"Lenje", 6522666, "bnt-bot"} + +m["lei"] = {"Lemio", 6521165, "ngf-mad"} + +m["lej"] = {"Lengola", 6522474, "bnt-leb"} + +m["lek"] = {"Leipon", 3229216, "poz-aay", Latn} + +m["lel"] = {"Lele (Congo)", 56733, "bnt-bsh"} + +m["lem"] = {"Nomaande", 13479983, "nic-mbw", Latn} + +m["len"] = {"Honduran Lenca", 36189, "nai-len", Latn} + +m["leo"] = {"Leti (Cameroon)", 1345684, "nic-mba", ancestors = {"bag"}} + +m["lep"] = {"Lepcha", 35990, "sit", {"Lepc"}, translit_module = "lep-translit"} + +m["leq"] = {"Lembena", 6521067, "paa-eng"} + +m["ler"] = {"Lenkau", 3229472, "poz-aay", Latn} + +m["les"] = {"Lese", 11033939, "csu-mle"} + +m["let"] = {"Lesing-Gelimi", 12635445, "poz-ocw", Latn} + +m["leu"] = {"Kara (New Guinea)", 3192889, "poz-ocw", Latn} + +m["lev"] = {"Lamma", 6583582, "ngf"} + +m["lew"] = { -- this code was basically assigned as a catch-all for things that aren't brs, kzf or unz + "Ledo Kaili", 35877, "poz-kal", Latn +} + +m["lex"] = {"Luang", 6695015, "poz-tim"} + +m["ley"] = {"Lemolang", 3033560} + +m["lez"] = {"Lezgi", 31746, "cau-lzg", {"Cyrl"}, translit_module = "lez-translit", override_translit = true, entry_name = {from = {GRAVE, ACUTE}, to = {}}} + +m["lfa"] = {"Lefa", 35643, "bnt-baf"} + +m["lfn"] = {"Lingua Franca Nova", 146803, "art", {"Latn", "Cyrl"}, type = "appendix-constructed"} + +m["lga"] = {"Lungga", 3267590, "poz-ocw", Latn} + +m["lgb"] = {"Laghu", 3216169, "poz-ocw", Latn} + +m["lgg"] = {"Lugbara", 3272737, "csu-mma"} + +m["lgh"] = {"Laghuu", 6472114, "tbq-lol"} + +m["lgi"] = {"Lengilu", 6522465, "poz-swa", Latn} + +m["lgk"] = {"Neverver", 3241515, "poz-vnc", Latn} + +m["lgl"] = {"Wala", 3565284, "poz-sls"} + +m["lgm"] = {"Lega-Mwenga", 14916883, "bnt-lgb"} + +m["lgn"] = {"Opuuo", 3354339, "ssa-kom"} + +m["lgq"] = {"Logba", 35813, "alv-ntg", Latn} + +m["lgr"] = {"Lengo", 3229454, "poz-sls", Latn} + +m["lgt"] = {"Pahi", 7124545, "paa-spk", Latn} + +m["lgu"] = {"Longgu", 3259105, "poz-sls"} + +m["lgz"] = {"Ligenza", 5531038, "bnt-bun"} + +m["lha"] = {"Laha (Vietnam)", 3112363, "qfa-kra"} + +m["lhh"] = {"Laha (Indonesia)", 6473107, "poz-cma"} + +m["lhi"] = {"Lahu Shi", 25559457, "tbq-lol"} + +m["lhl"] = {"Lahul Lohar", 12953672} + +m["lhn"] = {"Lahanan", 12953660} + +m["lhp"] = {"Lhokpu", 3436603, "sit-dhi"} + +m["lhs"] = {"Mlahsö", 3393063, "sem-cna"} + +m["lht"] = {"Lo-Toga", 3257566, "poz-vnc", Latn} + +m["lhu"] = {"Lahu", 35780, "tbq-lol"} + +m["lia"] = {"West-Central Limba", 32867815, "alv-lim"} + +m["lib"] = {"Likum", 3240737, "poz-aay", Latn} + +m["lic"] = {"Hlai", 934738, "qfa-lic", Latn} + +m["lid"] = {"Nyindrou", 3346666, "poz-aay", Latn} + +m["lie"] = {"Likila", 11011614, "bnt-ngn"} + +m["lif"] = {"Limbu", 56477, "sit-kir", {"Limb"}, translit_module = "lif-translit"} + +m["lig"] = {"Ligbi", 33594, "dmn-jje"} + +m["lih"] = {"Lihir", 6546938, "poz-ocw", Latn} + +m["lii"] = {"Lingkhim", 12635536} + +m["lij"] = {"Ligurian", 36106, "roa-git", Latn} + +m["lik"] = {"Lika", 1530394, "bnt-boa"} + +m["lil"] = {"Lillooet", 34154, "sal"} + +m["lio"] = {"Liki", 4261493, "poz-ocw", Latn} + +m["lip"] = {"Sekpele", 36257, "alv-ntg"} + +m["liq"] = {"Libido", 35691, "cus"} + +m["lir"] = {"Liberian English", 6541128} + +m["lis"] = {"Lisu", 56480, "tbq-lol", {"Lisu"}} + +m["liu"] = {"Logorik", 6667811, "sdv-daj"} + +m["liv"] = {"Livonian", 33698, "fiu-fin", Latn, entry_name = {from = {u(0x01ed), u(0x0027), u(0x2019)}, to = {u(0x014d)}}} + +m["liw"] = {"Col", 2981948} + +m["lix"] = {"Liabuku", 13580912} + +m["liy"] = {"Banda-Bambari", 11051591, "bad-cnt"} + +m["liz"] = {"Libinza", 4914576, "bnt-zbi"} + +m["lja"] = {"Golpa", nil, "aus-yol", Latn} + +m["lje"] = {"Rampi", 7290041, "poz"} + +m["lji"] = {"Laiyolo", 6474218} + +m["ljl"] = {"Li'o", 2697010, "poz"} + +m["ljp"] = {"Lampung Api", 49215, "poz-lgx", Latn} + +m["ljw"] = {"Yirandali", 17059380} + +m["ljx"] = {"Yuru", nil} + +m["lka"] = {"Lakalei", 12952700, "poz-tim", Latn} + +m["lkb"] = {"Kabras", nil, "bnt-msl", ancestors = {"luy"}} + +m["lkc"] = {"Kucong", 6441572, "tbq-lol"} + +m["lkd"] = {"Lakondê", 20527166, "sai-nmk", Latn} + +m["lke"] = {"Kenyi", 12952628, "bnt-nyg"} + +m["lkh"] = {"Lakha", 56606, "sit-tib"} + +m["lki"] = {"Laki", 56483, "ku", {"fa-Arab"}, translit_module = "lki-translit", entry_name = {from = {u(0x0650), u(0x0652)}, to = {}}, ancestors = {"ku-pro"}} + +m["lkj"] = {"Remun", 7312239} + +m["lkl"] = {"Laeko-Libuat", 3504331, "qfa-tor"} + +m["lkm"] = {"Kalaamaya", 6349988} + +m["lkn"] = {"Lakon", 3216494, "poz-vnc", Latn} + +m["lko"] = {"Khayo", 6401095, "bnt-msl"} + +m["lkr"] = {"Päri", 36487, "sdv-lon"} + +m["lks"] = {"Kisa", nil, "bnt-msl", ancestors = {"luy"}} + +m["lkt"] = {"Lakota", 33537, "sio-dkt", Latn} + +m["lku"] = {"Kungkari", 6444526} + +m["lky"] = {"Lokoya", 56687, "sdv-lma"} + +m["lla"] = {"Lala-Roba", 3914878, "alv-yun"} + +m["llb"] = {"Lolo", 11006056, "bnt-mak", ancestors = {"vmw"}} + +m["llc"] = {"Lele (Guinea)", 6520837, "dmn-mok"} + +m["lld"] = {"Ladin", 36202, "roa-rhe", Latn} + +m["lle"] = {"Lele (New Guinea)", 3229269, "poz-oce", Latn} + +m["llf"] = {"Hermit", 3134240, "poz-aay", Latn} + +m["llg"] = {"Lole", 6668883, "poz-tim"} + +m["llh"] = {"Lamu", 6482736, "tbq-lol"} + +m["lli"] = {"Teke-Laali", 36543, "bnt-nze"} + +m["llj"] = {"Ladji-Ladji", 6512694, "aus-pam"} + +m["llk"] = {"Lelak", 3229263, "poz-swa", Latn} + +m["lll"] = {"Lilau", 6547570, "qfa-tor"} + +m["llm"] = {"Lasalimu", 6492774} + +m["lln"] = {"Lele (Chad)", 1641493, "cdc-est"} + +m["llo"] = {"Khlor", 27921409, "mkh-kat"} + +m["llp"] = {"North Efate", 3580152, "poz-vnc", Latn} + +m["llq"] = {"Lolak", 12953679, "phi"} + +m["lls"] = {"Lithuanian Sign Language", 3915480, "sgn"} + +m["llu"] = {"Lau", 3218574, "poz-sls", Latn} + +m["llx"] = {"Lauan", 35682, "poz-occ", Latn} + +m["lma"] = {"East Limba", 11034212, "alv-lim"} + +m["lmb"] = {"Merei", 12952843, "poz-vnc", Latn} + +m["lmc"] = {"Limilngan", 6549414} + +m["lmd"] = {"Lumun", 35777, "alv-tal"} + +m["lme"] = {"Pévé", 56249, "cdc-mas"} + +m["lmf"] = {"South Lembata", 7567815} + +m["lmg"] = {"Lamogai", 278365, "poz-ocw", Latn} + +m["lmh"] = {"Lambichhong", 6481472, "sit-kie", ancestors = {"ybh"}} + +m["lmi"] = {"Lombi", 11259563, "csu-maa"} + +m["lmj"] = {"West Lembata", 6864697} + +m["lmk"] = {"Lamkang", 12952703, "tbq-kuk"} + +m["lml"] = {"Raga", 3063193, "poz-vnc", Latn} + +m["lmn"] = {"Lambadi", 33474, "inc-wes", ancestors = {"raj"}} + +m["lmo"] = {"Lombard", 33754, "roa-git", Latn} + +m["lmp"] = {"Limbum", 35801, "nic-nka", Latn} + +m["lmq"] = {"Lamatuka", 6480982} + +m["lmr"] = {"Lamalera", 6480787} + +m["lmu"] = {"Lamenu", 740604, "poz-vnc", Latn} + +m["lmv"] = {"Lomaiviti", 3130221, "poz-occ", Latn} + +m["lmw"] = {"Lake Miwok", 3216471, "nai-you", Latn} + +m["lmx"] = {"Laimbue", 6473933, "nic-rnw"} + +m["lmy"] = {"Laboya", 6481538, "poz-cet", {"Latn"}, sort_key = {from = {"ɓ", "ɗ", "ĵ"}, to = {"b~", "d~", "j~"}}} + +m["lmz"] = {"Lumbee", 12635887, nil, Latn} + +m["lna"] = {"Langbashe", 11137550, "bad"} + +m["lnb"] = {"Mbalanhu", 12952830, "bnt-ova"} + +m["lnd"] = {"Lun Bawang", 13479839, "poz-swa", Latn} + +m["lnh"] = {"Lanoh", 6487291, "mkh-asl"} + +m["lni"] = {"Daantanai'", 5207384, "paa-sbo"} + +m["lnj"] = {"Linngithigh", 3915694, "aus-pmn", Latn} + +m["lnl"] = {"South Central Banda", 41354532, "bad"} + +m["lnm"] = {"Langam", 6485678, "paa", Latn} + +m["lnn"] = {"Lorediakarkar", 6680287, "poz-vnc", Latn} + +m["lno"] = {"Lango (Sudan)", 223306, "sdv-lma"} + +m["lns"] = {"Lamnso'", 35788, "nic-rng"} + +m["lnu"] = {"Longuda", 35797, "alv-bam", Latn} + +m["lnw"] = {"Lanima", nil, "aus-pam", Latn} + +m["loa"] = {"Loloda", 6669025, "paa", Latn} + +m["lob"] = {"Lobi", 35807} + +m["loc"] = {"Inonhan", 2400870, "phi"} + +m["lod"] = {"Berawan", 4891018, "poz-swa", Latn} + +m["loe"] = {"Saluan", 12953867, "poz"} + +m["lof"] = {"Logol", 35779, "alv-hei"} + +m["log"] = {"Logo", 2613477, "csu-mma"} + +m["loh"] = {"Narim", 56353, "sdv"} + +m["loi"] = {"Lomakka", 3913961, "alv-kul"} + +m["loj"] = {"Lou", 3260104, "poz-aay", Latn} + +m["lok"] = {"Loko", 3914912, "dmn-msw", Latn} + +m["lol"] = {"Mongo", 112893, "bnt-mon", Latn} + +m["lom"] = {"Looma", 35885, "dmn-msw"} + +m["lon"] = {"Malawi Lomwe", 10975286} + +m["loo"] = {"Lombo", 11167192, "bnt-ske"} + +m["lop"] = {"Lopa", 3914875} + +m["loq"] = {"Lobala", 4849710, "bnt-ngn"} + +m["lor"] = {"Téén", 36467, "alv-kul"} + +m["los"] = {"Loniu", 3259202, "poz-aay", Latn} + +m["lot"] = {"Lotuko", 56672, "sdv-lma"} + +m["lou"] = {"Louisiana Creole French", 1185127, "crp", Latn, ancestors = {"fr"}} + +m["lov"] = {"Lopi", 12952740, "tbq-lol"} + +m["low"] = {"Tampias Lobu", 12953674} + +m["lox"] = {"Loun", 6689636, "poz-cet", Latn} + +m["loz"] = {"Lozi", 33628, "bnt-sts", Latn} + +m["lpa"] = {"Lelepa", 3229273, "poz-vnc"} + +m["lpe"] = {"Lepki", 4259152, "paa-pau"} + +m["lpn"] = {"Long Phuri Naga", 6673049, "sit-aao"} + +m["lpo"] = {"Lipo", 56921, "tbq-lol", {"Plrd"}} + +m["lpx"] = {"Lopit", 56684, "sdv-lma"} + +m["lra"] = {"Rara Bakati'", 3419746, "day"} + +m["lrc"] = {"Northern Luri", 19933293, "ira-swi", {"fa-Arab"}, ancestors = {"pal"}} + +m["lre"] = {"Laurentian", 1790301, "iro", Latn} + +m["lrg"] = {"Laragia", 2591193} + +m["lri"] = {"Marachi", 6754565, "bnt-msl"} + +m["lrk"] = {"Loarki", 6663513} + +m["lrl"] = {"Lari", 33468, "ira-swi", {"fa-Arab"}} + +m["lrm"] = {"Marama", nil, "bnt-msl", ancestors = {"luy"}} + +m["lrn"] = {"Lorang", 6678781} + +m["lro"] = {"Laro", 35687, "alv-hei"} + +m["lrr"] = {"Southern Lorung", 12952742, "sit-kie"} + +m["lrt"] = {"Larantuka Malay", 6488691, "poz-mly", Latn} + +m["lrv"] = {"Larevat", 3217892, "poz-vnc", Latn} + +m["lrz"] = {"Lemerig", 2028448, "poz-vnc"} + +m["lsa"] = {"Lasgerdi", 3218296, "ira-kms"} + +m["lsd"] = {"Lishana Deni", 3436461, "sem-nna", {"Hebr"}} + +m["lse"] = {"Lusengo", 6683546, "bnt-zbi"} + +m["lsh"] = {"Lish", 6558822, "sit-khb"} + +m["lsi"] = {"Lashi", 6493203, "tbq-brm"} + +m["lsl"] = {"Latvian Sign Language", 6497414, "sgn"} + +m["lsm"] = {"Saamia", 3739441, "bnt-msl"} + +m["lso"] = {"Laos Sign Language", 6488022, "sgn"} + +m["lsp"] = {"Panamanian Sign Language", 7129968, "sgn"} + +m["lsr"] = {"Aruop", 3450566, "qfa-tor"} + +m["lss"] = {"Lasi", 12953669, "inc-snd", {"Arab"}, ancestors = {"sd"}} + +m["lst"] = {"Trinidad and Tobago Sign Language", 7842495, "sgn"} + +m["lsy"] = {"Mauritian Sign Language", 6793754, "sgn"} + +m["ltc"] = {"Middle Chinese", 2016252, "zhx", {"Hani"}, ancestors = {"och"}} + +m["ltg"] = {"Latgalian", 36212, "bat", Latn} + +m["lti"] = {"Leti (Indonesia)", 3236912, "poz-tim", Latn} + +m["ltn"] = {"Latundê", nil, "sai-nmk", Latn} + +m["lto"] = {"Olutsotso", nil, "bnt-msl", ancestors = {"luy"}} + +m["lts"] = {"Lutachoni", nil, "bnt-msl"} + +m["ltu"] = {"Latu", 6497181, "poz-cma"} + +m["lua"] = {"Luba-Kasai", 34173, "bnt-lub", Latn} + +m["luc"] = {"Aringa", 56556, "csu-mma"} + +m["lud"] = {"Ludian", 33918, "fiu-fin", Latn} + +m["lue"] = {"Luvale", 33597, "bnt-clu", Latn} + +m["luf"] = {"Laua", 6497673} + +m["lui"] = {"Luiseño", 56236, "azc-cup", Latn, entry_name = {from = {"̂", ACUTE}, to = {}}} + +m["luj"] = {"Luna", 11003832, "bnt-lbn"} + +m["luk"] = {"Lunanakha", 56446, "sit-tib", ancestors = {"dz"}} + +m["lul"] = {"Olu'bo", 6589401, "csu-mma"} + +m["lum"] = {"Luimbi", 10963134, "bnt-clu"} + +m["lun"] = {"Lunda", 33607, "bnt-lun", Latn} + +m["luo"] = {"Luo", 5414796, "sdv-los", Latn} + +m["lup"] = {"Lumbu", 35793, "bnt-sir"} + +m["luq"] = {"Lucumi", 1768321} + +m["lur"] = {"Laura", 2984540} + +m["lus"] = {"Mizo", 36147, "tbq-kuk", Latn} + +m["lut"] = {"Lushootseed", 33658, "sal", Latn} + +m["luu"] = {"Lumba-Yakkha", 6703050, "sit-kie", ancestors = {"ybh"}} + +m["luv"] = {"Luwati", 33402, "inc-snd", {"Khoj"}, ancestors = {"inc-vra"}} + +m["luy"] = {"Luhya", 35893, "bnt-msl", Latn} + +m["luz"] = {"Southern Luri", 12952748, "ira-swi", {"fa-Arab"}, ancestors = {"pal"}} + +m["lva"] = {"Maku'a", 35790, "poz-tim"} + +m["lvi"] = {"Lawi", 6502657, "mkh-ban", Latn} + +m["lvk"] = {"Lavukaleve", 770547, "paa", Latn} + +m["lvu"] = {"Levuka", 6535860} + +m["lwa"] = {"Lwalu", 6706953, "bnt-lbn"} + +m["lwe"] = {"Lewo Eleng", 6537465} + +m["lwg"] = {"Wanga", nil, "bnt-msl", ancestors = {"luy"}} + +m["lwh"] = {"White Lachi", 8842956, "qfa-kra"} + +m["lwl"] = {"Eastern Lawa", 18644464, "mkh-pal", {"Thai"}, sort_key = {from = {"[%p]", "[็-๎]", "([เแโใไ])(ʼ?[ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["lwm"] = {"Laomian", 19597674, "tbq-lol"} + +m["lwo"] = {"Luwo", 56362, "sdv-lon", Latn} + +m["lws"] = {"Malawian Sign Language", 47522462, "sgn"} + +m["lwt"] = {"Lewotobi", 14916885} + +m["lwu"] = {"Lawu", 6505073} + +m["lww"] = {"Lewo", 3237321, "poz-vnc", Latn} + +m["lya"] = {"Layakha", 56602, "sit-tib", ancestors = {"dz"}} + +m["lyg"] = {"Lyngngam", 12635902, "aav-pkl"} + +m["lyn"] = {"Luyana", 3268098} + +m["lzh"] = {"Literary Chinese", 37041, "zhx", {"Hani"}, wikimedia_codes = {"zh-classical"}} + +m["lzl"] = {"Litzlitz", 6653424, "poz-vnc", Latn} + +m["lzn"] = {"Leinong Naga", 5924455, "sit-kch"} + +m["lzz"] = {"Laz", 1160372, "ccs-zan", {"Geor", "Latn"}, translit_module = "translit-redirect", override_translit = true, entry_name = {from = {"̂", GRAVE, ACUTE}, to = {}}} + +return m diff --git a/wiktra/wikt/translit/languages/data3/m.lua b/wiktra/wikt/translit/languages/data3/m.lua new file mode 100644 index 0000000..9eecbef --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/m.lua @@ -0,0 +1,1278 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Cyrl = {"Cyrl"} +local Latn = {"Latn"} + +local m = {} + +m["maa"] = {"San Jerónimo Tecóatl Mazatec", 7692927, "omq-maz", Latn} + +m["mab"] = {"Yutanduchi Mixtec", 12645448, "omq-mxt", Latn} + +m["mad"] = {"Madurese", 36213, "poz-msa", Latn} + +m["mae"] = {"Bo-Rukul", 34967, "nic-ple", Latn} + +m["maf"] = {"Mafa", 35819, "cdc-cbm", Latn} + +m["mag"] = {"Magahi", 33728, "inc-eas", {"Deva", "Kthi"}, ancestors = {"bh"}} + +m["mai"] = {"Maithili", 36109, "inc-eas", {"Tirh", "Kthi", "Newa", "Deva"}, ancestors = {"bh"}, translit_module = "translit-redirect"} + +m["maj"] = {"Jalapa de Díaz Mazatec", 3915999, "omq-maz", Latn} + +m["mak"] = {"Makasar", 33643, "poz-ssw", {"Latn", "Maka"}} + +m["mam"] = {"Mam", 33467, "myn", Latn} + +m["man"] = {"Mandingo", 35772, "dmn-man", Latn} + +m["maq"] = {"Chiquihuitlán Mazatec", 5101757, "omq-maz", Latn} + +m["mas"] = {"Maasai", 35787, "sdv-lma", Latn} + +m["mat"] = {"Matlatzinca", 12953704, "omq", Latn} + +m["mau"] = {"Huautla Mazatec", 36230, "omq-maz", Latn} + +m["mav"] = {"Sateré-Mawé", 6794475, "tup", Latn} + +m["maw"] = {"Mampruli", 35804, "nic-wov", Latn} + +m["max"] = {"North Moluccan Malay", 7056136, "crp", Latn} + +m["maz"] = {"Central Mazahua", 36228, "oto", Latn} + +m["mba"] = {"Higaonon", 5753411, "mno", Latn} + +m["mbb"] = {"Western Bukidnon Manobo", 7987643, "mno", Latn} + +m["mbc"] = {"Macushi", 56633, "sai-car", Latn} + +m["mbd"] = {"Dibabawon Manobo", 18755523, "mno", Latn} + +m["mbe"] = {"Molale", 3319444, "nai-plp", Latn} + +m["mbf"] = {"Baba Malay", 18642798, "crp", Latn, ancestors = {"ms"}} + +m["mbh"] = {"Mangseng", 6749147, "poz-ocw", Latn} + +m["mbi"] = {"Ilianen Manobo", 14916911, "mno", Latn} + +m["mbj"] = {"Nadëb", 3335011, "sai-nad", Latn} + +m["mbk"] = {"Malol", 6744477, "poz-ocw", Latn} + +m["mbl"] = {"Maxakalí", 3029682, "sai-mje", Latn} + +m["mbm"] = {"Ombamba", 36407, "bnt-mbt", Latn} + +m["mbn"] = {"Macaguán", 3273980, "sai-guh", Latn} + +m["mbo"] = { -- is, like 'bqz', 'bsi' and 'bss', a dialect of Manenguba + "Mbo (Cameroon)", nil, "bnt-mne", Latn +} + +m["mbp"] = {"Wiwa", 3012604, "cba", Latn} + +m["mbq"] = {"Maisin", 3448149, nil, Latn} + +m["mbr"] = {"Nukak Makú", 3346228, "sai-nad", Latn} + +m["mbs"] = {"Sarangani Manobo", 7423093, "mno", Latn} + +m["mbt"] = {"Matigsalug Manobo", 6787447, "mno", Latn} + +m["mbu"] = {"Mbula-Bwazza", 3913324, "nic-jrn", Latn} + +m["mbv"] = {"Mbulungish", 36003, "alv-nal", Latn} + +m["mbw"] = {"Maring", 3293280, nil, Latn} + +m["mbx"] = {"Sepik Mari", 6760942, "paa-spk", Latn} + +m["mby"] = {"Memoni", 4180871, "inc-snd", {"Gujr", "ur-Arab"}, ancestors = {"inc-vra"}} + +m["mbz"] = {"Amoltepec Mixtec", 13583504, "omq-mxt", Latn} + +m["mca"] = {"Maca", 3281043, "sai-mtc", Latn} + +m["mcb"] = {"Machiguenga", 3915441, "awd", Latn} + +m["mcc"] = {"Bitur", 4919173} + +m["mcd"] = {"Sharanahua", 12953881, "sai-pan", Latn} + +m["mce"] = {"Itundujia Mixtec", 12953727, "omq-mxt", Latn} + +m["mcf"] = {"Matsés", 2981620, "sai-pan", Latn} + +m["mcg"] = {"Mapoyo", 56946, "sai-car", Latn} + +m["mch"] = {"Maquiritari", 3082027, "sai-car", Latn} + +m["mci"] = {"Mese", 6821190} + +m["mcj"] = {"Mvanip", 3913281, "nic-mmb", Latn} + +m["mck"] = {"Mbunda", 34170, "bnt-clu", Latn} + +m["mcl"] = {"Macaguaje", 6722435, "sai-tuc", Latn} + +m["mcm"] = {"Kristang", 2669169, "crp", Latn, ancestors = {"pt"}} + +m["mcn"] = {"Masana", 56668, "cdc-mas"} + +m["mco"] = {"Coatlán Mixe", 25559716, "nai-miz", Latn} + +m["mcp"] = {"Makaa", 35803, "bnt-mka"} + +m["mcq"] = {"Ese", 5397551, "ngf", Latn} + +m["mcr"] = {"Menya", 11732444, "ngf"} + +m["mcs"] = {"Mambai", 6748872, "alv-mbm"} + +m["mcu"] = {"Cameroon Mambila", 19359039, "nic-mmb", Latn} + +m["mcv"] = {"Minanibai", 6863167, "ngf"} + +m["mcw"] = {"Mawa", 3441333, "cdc-est", Latn} + +m["mcx"] = {"Mpiemo", 35908, "bnt-bek"} + +m["mcy"] = {"South Watut", 12953293, "poz-ocw"} + +m["mcz"] = {"Mawan", 11732429, "ngf-mad"} + +m["mda"] = {"Mada (Nigeria)", 3915843, "nic-nin", Latn} + +m["mdb"] = {"Morigi", 6912195, "paa-kiw"} + +m["mdc"] = {"Male", 6742927, "ngf-mad"} + +m["mdd"] = {"Mbum", 36170, "alv-mbm"} + +m["mde"] = {"Bura Mabang", 35860, "ssa"} + +m["mdf"] = {"Moksha", 13343, "urj-mdv", Cyrl, translit_module = "mdf-translit", override_translit = true} + +m["mdg"] = {"Massalat", 759984} + +m["mdh"] = {"Maguindanao", 33717, "phi", {"Latn", "Arab"}} + +m["mdi"] = {"Mamvu", 3033594, "csu-mle"} + +m["mdj"] = {"Mangbetu", 56327, "csu-maa"} + +m["mdk"] = {"Mangbutu", 6748877, "csu-mle"} + +m["mdl"] = {"Maltese Sign Language", 6744816, "sgn"} + +m["mdm"] = {"Mayogo", 6797580, "nic-nke", Latn} + +m["mdn"] = {"Mbati", 36165, "bnt-ngn"} + +m["mdp"] = {"Mbala", 6799583, "bnt-pen"} + +m["mdq"] = {"Mbole", 6799727, "bnt-mbe"} + +m["mdr"] = {"Mandar", 35995, "poz-ssw", {"Bugi", "Latn"}} + +m["mds"] = {"Maria", 3448673} + +m["mdt"] = {"Mbere", 36062, "bnt-mbt"} + +m["mdu"] = {"Mboko", 36058, "bnt-mbo"} + +m["mdv"] = {"Santa Lucía Monteverde Mixtec", 12953722, "omq-mxt", Latn} + +m["mdw"] = {"Mbosi", 36035, "bnt-mbo"} + +m["mdx"] = {"Dizin", 35313, "omv-diz"} + +m["mdy"] = {"Maale", 795327, "omv-ome"} + +m["mdz"] = {"Suruí Do Pará", 10322149, "tup-gua", Latn} + +m["mea"] = {"Menka", 36078, "nic-grs", Latn} + +m["meb"] = {"Ikobi-Mena", 11732241, "ngf", Latn} + +m["mec"] = {"Mara", 6772774} + +m["med"] = {"Melpa", 36166} + +m["mee"] = {"Mengen", 3305831, "poz-ocw"} + +m["mef"] = {"Megam", 6808589} + +m["meg"] = {"Mea", 12952836, "poz-cln"} + +m["meh"] = {"Southwestern Tlaxiaco Mixtec", 7070686, "omq-mxt", Latn} + +m["mei"] = {"Midob", 36007, "nub", Latn} + +m["mej"] = {"Meyah", 11732436, "paa-wpa"} + +m["mek"] = {"Mekeo", 3304803, "poz-ocw"} + +m["mel"] = {"Central Melanau", 18638319, "poz-swa", Latn} + +m["mem"] = {"Mangala", 6748664} + +m["men"] = {"Mende", 1478672, "dmn-msw", {"Latn", "Mend"}} + +m["meo"] = {"Kedah Malay", 4925684, "poz-mly", Latn} + +m["mep"] = {"Miriwung", 3111847} + +m["meq"] = {"Merey", 3502314, "cdc-cbm", Latn} + +m["mer"] = {"Meru", 13313, "bnt-kka", Latn} + +m["mes"] = {"Masmaje", 3440448} + +m["met"] = {"Mato", 3299190, "poz-ocw", Latn} + +m["meu"] = {"Motu", 33516, "poz-ocw", Latn} + +m["mev"] = {"Mano", 3913286, "dmn-mda", Latn} + +m["mew"] = {"Maaka", 3438764, "cdc-wst", Latn} + +m["mey"] = {"Hassaniya", 56231, "sem-arb", {"Arab"}} + +m["mez"] = {"Menominee", 13363, "alg", Latn, sort_key = {from = {"·"}, to = {""}}} + +m["mfa"] = {"Pattani Malay", 1199751, "poz-mly", {"Latn", "Thai", "Arab"}, sort_key = {from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["mfb"] = {"Bangka", 3258818} + +m["mfc"] = {"Mba", 4286464, "nic-mbc", Latn} + +m["mfd"] = {"Mendankwe-Nkwen", 11129537, "nic-nge", Latn} + +m["mfe"] = {"Mauritian Creole", 33661, "crp", Latn, ancestors = {"fr"}} + +m["mff"] = {"Naki", 36083, "nic-bbe", Latn} + +m["mfg"] = {"Mixifore", 3914478, "dmn-mok"} + +m["mfh"] = {"Matal", 3501751, "cdc-cbm", Latn} + +m["mfi"] = {"Wandala", 3441249, "cdc-cbm", Latn} + +m["mfj"] = {"Mefele", 3501871, "cdc-cbm"} + +m["mfk"] = {"North Mofu", 56303, "cdc-cbm"} + +m["mfl"] = {"Putai", 56291} + +m["mfm"] = {"Marghi South", 56248} + +m["mfn"] = {"Cross River Mbembe", 3915395, "nic-uce", Latn} + +m["mfo"] = {"Mbe", 36075, "nic-eko", Latn} + +m["mfp"] = {"Makassar Malay", 12952776, "qfa-mix", Latn} + +m["mfq"] = {"Moba", 19921578, "nic-grm", Latn} + +m["mfr"] = {"Marrithiyel", 6773014, "aus-dal", Latn} + +m["mfs"] = { + "Mexican Sign Language", 3915511, "sgn", Latn -- when documented +} + +m["mft"] = {"Mokerang", 3319387, "poz-aay", Latn} + +m["mfu"] = {"Mbwela", 11004988, "bnt-clu", ancestors = {"lch"}} + +m["mfv"] = {"Mandjak", 35822, "alv-pap"} + +m["mfw"] = {"Mulaha", 6933720} + +m["mfx"] = {"Melo", 6813268, "omv-nom"} + +m["mfy"] = {"Mayo", 56729, "azc-trc", Latn, sort_key = {from = {"á", "é", "í", "ó", "ú"}, to = {"a", "e", "i", "o", "u"}}} + +m["mfz"] = {"Mabaan", 20526385, "sdv"} + +m["mga"] = {"Middle Irish", 36116, "cel-gae", Latn, ancestors = {"sga"}, entry_name = {from = {"ḟ", "ṁ", "ṅ", "ṡ", "Ḟ", "Ṁ", "Ṅ", "Ṡ", "ä", "ë", "ï", "ö", "ü", "·"}, to = {"f", "m", "n", "s", "F", "M", "N", "S", "a", "e", "i", "o", "u"}}, sort_key = {from = {"á", "é", "æ", "ǽ", "í", "ó", "ú", "^h", "-"}, to = {"a", "e", "e", "e", "i", "o", "u"}}} + +m["mgb"] = {"Mararit", 56359, "sdv-tmn"} + +m["mgc"] = {"Morokodo", 6913216, "csu-bbk", Latn} + +m["mgd"] = {"Moru", 6915014, "csu-mma"} + +m["mge"] = {"Mango", 713659, "csu-sar"} + +m["mgf"] = {"Maklew", 6739816} + +m["mgg"] = {"Mpongmpong", 35924, "bnt-bek"} + +m["mgh"] = {"Makhuwa-Meetto", 33604, "bnt-mak", Latn, ancestors = {"vmw"}} + +m["mgi"] = {"Jili", 3914497, "nic-pls"} + +m["mgj"] = {"Abureni", 3441256, "nic-cde", Latn} + +m["mgk"] = {"Mawes", 6794395, "paa"} + +m["mgl"] = {"Maleu-Kilenge", 3281884} + +m["mgm"] = {"Mambae", 35774, "poz-tim", Latn} + +m["mgn"] = {"Mbangi", 11017443, "nic-ngd", Latn} + +m["mgo"] = {"Meta'", 36054, "nic-mom", Latn} + +m["mgp"] = {"Eastern Magar", 12952758, "sit-gma"} + +m["mgq"] = {"Malila", 6743679, "bnt-mby", Latn} + +m["mgr"] = {"Mambwe-Lungu", 626210, "bnt-mwi", Latn} + +m["mgs"] = {"Manda (Tanzania)", 16939267, "bnt-bki"} + +m["mgt"] = {"Mongol", 11260674, "paa", Latn} + +m["mgu"] = {"Mailu", 3278246, "ngf"} + +m["mgv"] = {"Matengo", 6786446, "bnt-mbi", Latn} + +m["mgw"] = {"Matumbi", 6791974, "bnt-mbi", Latn} + +m["mgy"] = {"Mbunga", 6799817, "bnt-kil"} + +m["mgz"] = {"Mbugwe", 3426367, "bnt-mra"} + +m["mha"] = {"Manda (India)", 56760, "dra"} + +m["mhb"] = {"Mahongwe", 35816, "bnt-kel"} + +m["mhc"] = {"Mocho", 1941682, "myn"} + +m["mhd"] = {"Mbugu", 36152, "qfa-mix", Latn, ancestors = {"asa"}} + +m["mhe"] = {"Besisi", 2742262, "mkh-asl"} + +m["mhf"] = {"Mamaa", 6745346, "ngf-fin", Latn} + +m["mhg"] = {"Margu", 6772812} + +m["mhi"] = {"Ma'di", 56670, "csu-mma"} + +m["mhj"] = {"Mogholi", 13336, "xgn"} + +m["mhk"] = {"Mungaka", 36068, "nic-nun", Latn} + +m["mhl"] = {"Mauwake", 6794095, "ngf-mad"} + +m["mhm"] = {"Makhuwa-Moniga", 6900145, "bnt-mak"} + +m["mhn"] = {"Mòcheno", 268130, "gmw", Latn, ancestors = {"bar"}, sort_key = {from = {"è", "ò"}, to = {"e", "o"}}} + +m["mho"] = {"Mashi", 10962737, "bnt-kav", Latn} + +m["mhp"] = {"Balinese Malay", 12473441, "crp"} + +m["mhq"] = {"Mandan", 1957120, "sio"} + +m["mhs"] = {"Buru (Indonesia)", 2928650, "poz-cma"} + +m["mht"] = {"Mandahuaca", 6747924, "awd-nwk"} + +m["mhu"] = {"Taraon", 56400, "sit-gsi", Latn} + +m["mhw"] = {"Mbukushu", 2691548} + +m["mhx"] = {"Lhao Vo", 11149315, "tbq-brm"} + +m["mhy"] = {"Ma'anyan", 2328761, "poz-bre"} + +m["mhz"] = {"Mor (Austronesian)", 2122792, "poz-hce", Latn} + +m["mia"] = {"Miami", 56523, "alg", Latn} + +m["mib"] = {"Atatláhuca Mixtec", 32093046, "omq-mxt", Latn} + +m["mic"] = {"Mi'kmaq", 13321, "alg-eas", Latn} + +m["mid"] = {"Mandaic", 6991742, "sem-ase", {"Mand"}, ancestors = {"myz"}} + +m["mie"] = {"Ocotepec Mixtec", 25559575, "omq-mxt", Latn} + +m["mif"] = {"Mofu-Gudur", 1365132, "cdc-cbm", Latn} + +m["mig"] = {"San Miguel el Grande Mixtec", 12953719, "omq-mxt", Latn} + +m["mih"] = {"Chayuco Mixtec", 13583510, "omq-mxt", Latn} + +m["mii"] = {"Chigmecatitlán Mixtec", 12953724, "omq-mxt", Latn} + +m["mij"] = {"Mungbam", 34725, "nic-beb", Latn} + +m["mik"] = {"Mikasuki", 13316, "nai-mus", Latn} + +m["mil"] = {"Peñoles Mixtec", 42411307, "omq-mxt", Latn} + +m["mim"] = {"Alacatlatzala Mixtec", 14697894, "omq-mxt", Latn} + +m["min"] = {"Minangkabau", 13324, "poz-mly", {"Latn", "Arab"}} + +m["mio"] = {"Pinotepa Nacional Mixtec", 7196415, "omq-mxt", Latn} + +m["mip"] = {"Apasco-Apoala Mixtec", 13583505, "omq-mxt", Latn} + +m["miq"] = {"Miskito", 1516803, "nai-min", Latn, entry_name = {from = {"Â", "â", "Î", "î", "Û", "û"}, to = {"A", "a", "I", "i", "U", "u"}}} + +m["mir"] = {"Isthmus Mixe", 6088873, "nai-miz"} + +m["mit"] = {"Southern Puebla Mixtec", 7570345, "omq-mxt", Latn} + +m["miu"] = {"Cacaloxtepec Mixtec", 12953723, "omq-mxt", Latn} + +m["miw"] = {"Akoye", 3327462, "ngf", Latn} + +m["mix"] = {"Mixtepec Mixtec", 6884125, "omq-mxt", Latn} + +m["miy"] = {"Ayutla Mixtec", 13583508, "omq-mxt", Latn} + +m["miz"] = {"Coatzospan Mixtec", 3317290, "omq-mxt", Latn} + +m["mjb"] = {"Makalero", 35729, "ngf", Latn} + +m["mjc"] = {"San Juan Colorado Mixtec", 12953718, "omq-mxt", Latn} + +m["mjd"] = {"Northwest Maidu", 3198700, "nai-mdu", Latn} + +m["mje"] = {"Muskum", 3913334} + +m["mjg"] = {"Monguor", 34214, "xgn", Latn} + +m["mji"] = {"Kim Mun", 1115317, "hmx-mie"} + +m["mjj"] = {"Mawak", 11732427, "ngf-mad"} + +m["mjk"] = {"Matukar", 6791963, "poz-ocw", Latn} + +m["mjl"] = {"Mandeali", 6747931, "him", {"Deva", "Takr"}, translit_module = "hi-translit"} + +m["mjm"] = {"Medebur", 6805227, "poz-ocw", Latn} + +m["mjn"] = {"Mebu", 6804364, "ngf-fin"} + +m["mjo"] = {"Malankuravan", 14916887, "dra"} + +m["mjp"] = {"Malapandaram", 10575729, "dra"} + +m["mjq"] = {"Malaryan", 12952773, "dra"} + +m["mjr"] = {"Malavedan", 12952775, "dra"} + +m["mjs"] = {"Miship", 3441264, "cdc-wst", Latn} + +m["mjt"] = {"Sawriya Paharia", 33907, "dra", {"Beng", "Deva"}} + +m["mju"] = {"Manna-Dora", 10576453, "dra", ancestors = {"te"}} + +m["mjv"] = {"Mannan", 3286037, "dra"} + +m["mjw"] = {"Karbi", 56591, "tbq-kuk", Latn} + +m["mjx"] = {"Mahali", 12953686, "mun"} + +m["mjy"] = {"Mahican", 3182562, "alg-eas", Latn} + +m["mjz"] = {"Majhi", 6737786, "inc-eas", ancestors = {"bh"}} + +m["mka"] = { + "Mbre", 3450154, "nic" -- unclassified within niger-congo tho +} + +m["mkb"] = {"Mal Paharia", 6583595} + +m["mkc"] = {"Siliput", 7515090, "qfa-tor", Latn} + +m["mke"] = {"Mawchi", 21403317} + +m["mkf"] = {"Miya", 43328, "cdc-wst", Latn} + +m["mkg"] = {"Mak (China)", 3280623, "qfa-kms"} + +m["mki"] = {"Dhatki", 32480, "inc-wes", ancestors = {"mwr"}} + +m["mkj"] = {"Mokilese", 2335528, "poz-mic", Latn} + +m["mkk"] = {"Byep", 35052, "bnt-mka"} + +m["mkl"] = {"Mokole", 36047} + +m["mkm"] = {"Moklen", 3319380} + +m["mkn"] = {"Kupang Malay", 18458203, "crp"} + +m["mko"] = {"Mingang Doso", 3915382, "alv-bwj"} + +m["mkp"] = {"Moikodi", 6894594, "ngf"} + +m["mkq"] = {"Bay Miwok", 3460957, "nai-you", Latn} + +m["mkr"] = {"Malas", 11732402, "ngf-mad"} + +m["mks"] = {"Silacayoapan Mixtec", 7514027, "omq-mxt", Latn} + +m["mkt"] = {"Vamale", 14916907, "poz-cln", Latn} + +m["mku"] = {"Konyanka Maninka", 11163298, "dmn-mnk"} + +m["mkv"] = {"Mafea", 3073532, "poz-vnc", Latn} + +m["mkx"] = {"Cinamiguin Manobo", 12953697, "mno"} + +m["mky"] = {"East Makian", 3512690, "poz-hce", Latn} + +m["mkz"] = {"Makasae", 35782, "ngf"} + +m["mla"] = {"Tamambo", 1153276, "poz-vnc", Latn} + +m["mlb"] = {"Mbule", 35843, "nic-ymb", Latn} + +m["mlc"] = {"Caolan", 3446682, "tai-cho", {"Latn", "Hani"}} + +m["mle"] = {"Manambu", 11732406, "paa-spk", Latn} + +m["mlf"] = {"Mal", 3281057, "mkh-khm"} + +m["mlh"] = {"Mape", 6753787} + +m["mli"] = {"Malimpung", 12473435} + +m["mlj"] = {"Miltu", 3441310} + +m["mlk"] = {"Ilwana", 6001357, "bnt-sab"} + +m["mll"] = {"Malua Bay", 6744946, "poz-vnc", Latn} + +m["mlm"] = {"Mulam", 3092284, "qfa-kms", Latn} + +m["mln"] = {"Malango", 3281522, "poz-sls", Latn} + +m["mlo"] = {"Mlomp", 36009, "alv-bak"} + +m["mlp"] = {"Bargam", 4860543, "ngf-mad", Latn} + +m["mlq"] = {"Western Maninkakan", 11028033, "dmn-wmn"} + +m["mlr"] = {"Vame", 3515088} + +m["mls"] = {"Masalit", 56557, "ssa"} + +m["mlu"] = {"To'abaita", 36645, "poz-sls", Latn} + +m["mlv"] = {"Mwotlap", 2475538, "poz-vnc", Latn} + +m["mlw"] = {"Moloko", 1965222, "cdc-cbm", Latn} + +m["mlx"] = {"Malfaxal", 2157421, "poz-vnc", Latn} + +m["mlz"] = {"Malaynon", 18755512, "phi"} + +m["mma"] = {"Mama", 3913963, "nic-jrn"} + +m["mmb"] = {"Momina", 6897297} + +m["mmc"] = {"Michoacán Mazahua", 12953705, "oto", Latn} + +m["mmd"] = {"Maonan", 3092293, "qfa-kms"} + +m["mme"] = {"Mae", 3276286, "poz-vnc", Latn} + +m["mmf"] = {"Mundat", 56263, "cdc-wst", Latn} + +m["mmg"] = {"North Ambrym", 2842468, "poz-vnc", Latn} + +m["mmh"] = {"Mehináku", 3501838, "awd", Latn} + +m["mmi"] = {"Musar", 6940113, "ngf-mad"} + +m["mmj"] = {"Majhwar", 6737795} + +m["mmk"] = {"Mukha-Dora", 6933447} + +m["mml"] = {"Man Met", 3194984, "mkh-pal"} + +m["mmm"] = {"Maii", 6735599, "poz-vnc", Latn} + +m["mmn"] = {"Mamanwa", 3206623, "phi", Latn} + +m["mmo"] = {"Mangga Buang", 12952294, "poz-ocw", Latn} + +m["mmp"] = {"Musan", 2605703, "paa-asa"} + +m["mmq"] = {"Aisi", 6940074, "ngf-mad", Latn} + +m["mmr"] = {"Western Xiangxi Miao", 3307901, "hmn"} + +m["mmt"] = {"Malalamai", 3281496, "poz-ocw", Latn} + +m["mmu"] = {"Mmaala", 13123461, "nic-ymb", Latn} + +m["mmv"] = {"Miriti", 6873567, "sai-tuc", Latn} + +m["mmw"] = {"Emae", 3051961, "poz-pnp", Latn} + +m["mmx"] = {"Madak", 3275205, "poz-ocw", Latn} + +m["mmy"] = {"Migaama", 56259, "cdc-est", Latn} + +m["mmz"] = {"Mabaale", 11003249, "bnt-ngn"} + +m["mna"] = {"Mbula", 3303572, "poz-ocw", Latn} + +m["mnb"] = {"Muna", 6935584, "poz-mun", Latn} + +m["mnc"] = {"Manchu", 33638, "tuw", {"Mong"}, ancestors = {"juc"}, translit_module = "mnc-translit"} + +m["mnd"] = {"Mondé", 6898840, "tup", Latn} + +m["mne"] = {"Naba", 760732, "csu-bgr"} + +m["mnf"] = {"Mundani", 35839, "nic-mom", Latn} + +m["mng"] = {"Eastern Mnong", 12953747, "mkh-ban"} + +m["mnh"] = {"Mono (Congo)", 33501, "bad-cnt", Latn} + +m["mni"] = {"Manipuri", 33868, "sit", {"Mtei", "Beng"}, ancestors = {"omp"}} + +m["mnj"] = {"Munji", 33639, "ira-mny", ancestors = {"ira-mny-pro"}} + +m["mnk"] = {"Mandinka", 33678, "dmn-wmn", Latn} + +m["mnl"] = {"Tiale", 6744350, "poz-vnc", Latn} + +m["mnm"] = {"Mapena", 11732415} + +m["mnn"] = {"Southern Mnong", 23857582} + +m["mnp"] = {"Min Bei", 36457, "zhx-min-shn", {"Hani"}} + +m["mnq"] = {"Minriq", 2742268, "mkh-asl"} + +m["mnr"] = {"Mono (California)", 33591, "azc-num", Latn} + +m["mns"] = {"Mansi", 33759, "urj-ugr", Cyrl, translit_module = "mns-translit", override_translit = true} + +m["mnt"] = {"Maykulan", 3915696, "aus-pam", Latn} + +m["mnu"] = {"Mer", 6817854} + +m["mnv"] = {"Rennellese", 3397346, "poz-pnp", Latn} + +m["mnw"] = {"Mon", 13349, "mkh-mnc", {"Mymr"}, ancestors = {"mkh-mmn"}, sort_key = {from = {"ျ", "ြ", "ွ", "ှ", "ၞ", "ၟ", "ၠ", "ၚ", "ဿ"}, to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "္န", "္မ", "္လ", "င", "သ္သ"}}} + +m["mnx"] = {"Manikion", 3507964, "paa-wpa"} + +m["mny"] = {"Manyawa", 11002622, "bnt-mak", ancestors = {"vmw"}} + +m["mnz"] = {"Moni", 6899857} + +m["moa"] = {"Mwan", 3320111, "dmn-nbe", Latn} + +m["moc"] = {"Mocoví", 3027906, "sai-guc", Latn} + +m["mod"] = {"Mobilian", 13333, "crp", Latn, ancestors = {"cho", "cic"}} + +m["moe"] = {"Montagnais", 13351, "alg", Latn, ancestors = {"cr"}, entry_name = {from = {"Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}} + +m["mog"] = {"Mongondow", 3058458, "phi", Latn} + +m["moh"] = {"Mohawk", 13339, "iro", Latn} + +m["moi"] = {"Mboi", 3914417, "alv-yun"} + +m["moj"] = {"Monzombo", 11154772, "nic-nkk", Latn} + +m["mok"] = {"Morori", 6913275} + +m["mom"] = {"Monimbo", 56542} + +m["moo"] = {"Monom", 6901726, "mkh-ban"} + +m["mop"] = {"Mopan Maya", 36183, "myn", Latn} + +m["moq"] = {"Mor (Papuan)", 11732468, "paa"} + +m["mor"] = {"Moro", 36172, "alv-hei"} + +m["mos"] = {"Moore", 36096, "nic-mre", Latn} + +m["mot"] = {"Barí", 2886281, "cba", Latn} + +m["mou"] = {"Mogum", 3440473, "cdc-est", Latn} + +m["mov"] = {"Mojave", 56510, "nai-yuc", Latn} + +m["mow"] = {"Moi (Congo)", 11124792, "bnt-bmo", Latn} + +m["mox"] = {"Molima", 3319495, "poz-ocw", Latn} + +m["moy"] = {"Shekkacho", 56827, "omv-gon"} + +m["moz"] = {"Mukulu", 3440403, "cdc-est"} + +m["mpa"] = {"Mpoto", 6928303, "bnt-mbi"} + +m["mpb"] = {"Mullukmulluk", 6741120} + +m["mpc"] = {"Mangarayi", 6748829} + +m["mpd"] = {"Machinere", 12953681, "awd", Latn} + +m["mpe"] = {"Majang", 56724, "sdv"} + +m["mpg"] = {"Marba", 56614, "cdc-mas"} + +m["mph"] = {"Maung", 6792550, "aus-wdj", Latn} + +m["mpi"] = {"Mpade", 3280670, "cdc-cbm", Latn} + +m["mpj"] = {"Martu Wangka", 3295916, "aus-pam", Latn} + +m["mpk"] = {"Mbara (Chad)", 3912770, "cdc-cbm"} + +m["mpl"] = {"Middle Watut", 15887910, "poz-ocw", Latn} + +m["mpm"] = {"Yosondúa Mixtec", 12953741, "omq-mxt", Latn} + +m["mpn"] = {"Mindiri", 6863842, "poz-ocw", Latn} + +m["mpo"] = {"Miu", 6883668, "poz-ocw", Latn} + +m["mpp"] = {"Migabac", 11732448} + +m["mpq"] = {"Matís", 3299145, "sai-pan", Latn} + +m["mpr"] = {"Vangunu", 3554582, "poz-ocw", Latn} + +m["mps"] = {"Dadibi", 5208077, "ngf", Latn} + +m["mpt"] = {"Mian", 12952846, "ngf-okk", Latn} + +m["mpu"] = {"Makuráp", 3281037, "tup", Latn} + +m["mpv"] = {"Mungkip", 11732485, "ngf-fin", Latn} + +m["mpw"] = {"Mapidian", 6753812, "awd", Latn} + +m["mpx"] = {"Misima-Paneati", 6875666, "poz-ocw", Latn} + +m["mpy"] = {"Mapia", 3287224, "poz-mic", Latn} + +m["mpz"] = {"Mpi", 6928276, "tbq-lol"} + +m["mqa"] = {"Maba", 3273750} + +m["mqb"] = {"Mbuko", 3502213, "cdc-cbm", Latn} + +m["mqc"] = {"Mangole", 6749097, "poz-cma", Latn} + +m["mqe"] = {"Matepi", 11732426, "ngf-mad"} + +m["mqf"] = {"Momuna", 6897518} + +m["mqg"] = {"Kota Bangun Kutai Malay", 12952778} + +m["mqh"] = {"Tlazoyaltepec Mixtec", 12953740, "omq-mxt", Latn} + +m["mqi"] = {"Mariri", 6765544} + +m["mqj"] = {"Mamasa", 6745452, "poz-ssw", Latn} + +m["mqk"] = {"Rajah Kabunsuwan Manobo", 12953700, "mno"} + +m["mql"] = {"Mbelime", 4286473, "nic-eov"} + +m["mqm"] = {"South Marquesan", 19694214, "poz-pep", Latn} + +m["mqn"] = {"Moronene", 642581, "poz-btk", Latn} + +m["mqo"] = {"Modole", 11732457, "paa-wpa"} + +m["mqp"] = {"Manipa", 6749799, "poz-cma", Latn} + +m["mqq"] = {"Minokok", 18642293} + +m["mqr"] = {"Mander", 6747979, "paa-tkw"} + +m["mqs"] = {"West Makian", 3033575, "paa-wpa"} + +m["mqt"] = {"Mok", 13018559, "mkh-pal"} + +m["mqu"] = {"Mandari", 3285426, "sdv-bri"} + +m["mqv"] = {"Mosimo", 11732478, "ngf-mad", Latn} + +m["mqw"] = {"Murupi", 11732486, "ngf-mad", Latn} + +m["mqx"] = {"Mamuju", 6746004, "poz-ssw", Latn} + +m["mqy"] = {"Manggarai", 3285748, "poz-cet", Latn} + +m["mqz"] = {"Malasanga", 14916889, "poz-ocw", Latn} + +m["mra"] = {"Mlabri", 3073465, "mkh"} + +m["mrb"] = {"Marino", 3293299} + +m["mrc"] = {"Maricopa", 56386, "nai-yuc", Latn} + +m["mrd"] = {"Western Magar", 22303263, "sit-gma"} + +m["mre"] = {"Martha's Vineyard Sign Language", 33494, "sgn", {"Latn", "Sgnw"}} + +m["mrf"] = {"Elseng", 3915667, "paa-brd"} + +m["mrg"] = {"Mising", 3316328, "sit-tan", {"Latn", "Beng", "Deva"}, ancestors = {"adi"}} + +m["mrh"] = {"Mara Chin", 4175893, "tbq-kuk", Latn} + +m["mrj"] = {"Western Mari", 1776032, "urj", Cyrl, ancestors = {"chm"}, translit_module = "chm-translit"} + +m["mrk"] = {"Hmwaveke", 5873712, "poz-cln", Latn} + +m["mrl"] = {"Mortlockese", 3324598, "poz-mic", Latn} + +m["mrm"] = {"Merlav", 3331115, "poz-vnc", Latn} + +m["mrn"] = {"Cheke Holo", 2962165, "poz-ocw", Latn} + +m["mro"] = {"Mru", 1951521, "sit-mru", {"Latn", "Mroo"}} + +m["mrp"] = {"Morouas", 6913299, "poz-vnc", Latn} + +m["mrq"] = {"North Marquesan", 2603808, "poz-pep", Latn} + +m["mrr"] = {"Hill Maria", 27602, "dra", ancestors = {"gon"}} + +m["mrs"] = {"Maragus", 6754640, "poz-vnc", Latn} + +m["mrt"] = {"Margi", 56241, "cdc-cbm", Latn} + +m["mru"] = {"Mono (Cameroon)", 11031964, "alv-mbm", Latn} + +m["mrv"] = {"Mangarevan", 36237, "poz-pep", Latn} + +m["mrw"] = {"Maranao", 33800, "phi", {"Latn", "Arab"}} + +m["mrx"] = {"Dineor", 5278044, "paa-tkw"} + +m["mry"] = {"Karaga Mandaya", 6747925, "phi"} + +m["mrz"] = {"Marind", 6763970} + +m["msb"] = {"Masbatenyo", 33948, "phi", Latn} + +m["msc"] = {"Sankaran Maninka", 11155812, "dmn-mnk"} + +m["msd"] = { + "Yucatec Maya Sign Language", 34281, "sgn", Latn -- when documented +} + +m["mse"] = {"Musey", 56328, "cdc-mas"} + +m["msf"] = {"Mekwei", 4544752, "paa-nim", Latn} + +m["msg"] = {"Moraid", 6909020} + +m["msi"] = {"Sabah Malay", 10867404, "crp"} + +m["msj"] = {"Ma", 6720909, "nic-mbc", Latn} + +m["msk"] = {"Mansaka", 12952800, "phi", Latn} + +m["msl"] = {"Molof", 4300950} + +m["msm"] = {"Agusan Manobo", 12953696, "mno"} + +m["msn"] = {"Vurës", 3563857, "poz-vnc", Latn} + +m["mso"] = {"Mombum", 6897079} + +m["msp"] = {"Maritsauá", 6765915, "tup", Latn} + +m["msq"] = {"Caac", 2932212, "poz-cln", Latn} + +m["msr"] = {"Mongolian Sign Language", 3915499, "sgn"} + +m["mss"] = {"West Masela", 12952816, "poz-tim"} + +m["msu"] = {"Musom", 6943041, "poz-ocw", Latn} + +m["msv"] = {"Maslam", 3502273} + +m["msw"] = {"Mansoanka", 35814} + +m["msx"] = {"Moresada", 11732475} + +m["msy"] = {"Aruamu", 3501809, "paa", Latn} + +m["msz"] = {"Momare", 6897030} + +m["mta"] = {"Cotabato Manobo", 12953698, "mno"} + +m["mtb"] = {"Anyin Morofo", 3502338, "alv-ctn", Latn, ancestors = {"any"}} + +m["mtc"] = {"Munit", 11732482, "ngf-mad"} + +m["mtd"] = {"Mualang", 3073458, "poz-mly"} + +m["mte"] = {"Alu", 33503, "poz-ocw", Latn} + +m["mtf"] = {"Murik (New Guinea)", 7050035, "paa-lsp", Latn} + +m["mtg"] = {"Una", 5580728} + +m["mth"] = {"Munggui", 6936018, "poz-hce", Latn} + +m["mti"] = {"Maiwa (New Guinea)", 6737223, "ngf", Latn} + +m["mtj"] = {"Moskona", 11288953} + +m["mtk"] = {"Mbe'", 10964025, "nic-nka", Latn} + +m["mtl"] = {"Montol", 3440457, "cdc-wst", Latn} + +m["mtm"] = {"Mator", 20669419, "syd", Cyrl} + +m["mtn"] = {"Matagalpa", 3490756, "nai-min"} + +m["mto"] = {"Totontepec Mixe", 7828400, "nai-miz"} + +m["mtp"] = {"Wichí Lhamtés Nocten", 5908756, "sai-wic", Latn} + +m["mtq"] = {"Muong", 3236789, "mkh-vie", Latn, sort_key = "vi-sortkey"} + +m["mtr"] = { + "Mewari", + nil, + "inc-wes", + {"Deva"}, + ancestors = {"raj"}, + translit_module = "hi-translit" -- for now +} + +m["mts"] = {"Yora", 3572572, "sai-pan", Latn} + +m["mtt"] = {"Mota", 3325052, "poz-vnc", Latn} + +m["mtu"] = {"Tututepec Mixtec", 7857069, "omq-mxt", Latn} + +m["mtv"] = {"Asaro'o", 3503684, "ngf-fin", Latn} + +m["mtw"] = {"Magahat", 6729600} + +m["mtx"] = {"Tidaá Mixtec", 7800805, "omq-mxt", Latn} + +m["mty"] = {"Nabi", 6956858, "qfa-tor", Latn} + +m["mua"] = {"Mundang", 36032, "alv-mbm"} + +m["mub"] = {"Mubi", 3440518, "cdc-est", Latn} + +m["muc"] = {"Mbu'", 35868, "nic-beb", Latn} + +m["mud"] = {"Mednyj Aleut", 1977419} + +m["mue"] = {"Media Lengua", 36066, "crp", Latn, ancestors = {"es", "qu"}} + +m["mug"] = {"Musgu", 3123545, "cdc-cbm", Latn} + +m["muh"] = {"Mündü", 35981, "nic-nke", Latn} + +m["mui"] = {"Musi", 615660, "poz-mly", Latn} + +m["muj"] = {"Mabire", 3440437} + +m["mul"] = {"Translingual", 20923490, "qfa-not", {"None", "Latn", "Bopo", "Brai", "Cyrl", "Hani", "Hans", "Hant", "IPAchar", "musical", "Ruminumerals", "Zsym", "Zmth"}, wikipedia_article = "Translingualism"} + +m["mum"] = {"Maiwala", 12952764, "poz-ocw", Latn} + +m["muo"] = {"Nyong", 36373, "alv-lek"} + +m["mup"] = {"Malvi", 33413, "inc-wes", ancestors = {"raj"}} + +m["muq"] = {"Eastern Xiangxi Miao", 27431376, "hmn"} + +m["mur"] = {"Murle", 56727, "sdv"} + +m["mus"] = {"Creek", 523014, "nai-mus", Latn} + +m["mut"] = {"Western Muria", 12952886, "dra", ancestors = {"gon"}} + +m["muu"] = {"Yaaku", 34222, "cus"} + +m["muv"] = {"Muthuvan", 3327420, "dra"} + +m["mux"] = {"Bo-Ung", 15831607} + +m["muy"] = {"Muyang", 3502301, "cdc-cbm", Latn} + +m["muz"] = {"Mursi", 36013, "sdv"} + +m["mva"] = {"Manam", 6746851, "poz-ocw", Latn} + +m["mvb"] = {"Mattole", 20824, "ath-pco", Latn} + +m["mvd"] = {"Mamboru", 578815, "poz"} + +m["mvg"] = {"Yucuañe Mixtec", 25562736, "omq-mxt", Latn} + +m["mvh"] = {"Mire", 3441359} + +m["mvi"] = {"Miyako", 36218, "jpx-ryu", {"Jpan"}} + +m["mvk"] = {"Mekmek", 6810592, "paa-yua"} + +m["mvl"] = {"Mbara (Australia)", 6799620, "aus-pam"} + +m["mvm"] = {"Muya", 2422759, "sit-qia"} + +m["mvn"] = {"Minaveha", 6863278, "poz-ocw", Latn} + +m["mvo"] = {"Marovo", 3294683, "poz-ocw", Latn} + +m["mvp"] = {"Duri", 3915414, "poz-ssw"} + +m["mvq"] = {"Moere", 11732458, "ngf-mad"} + +m["mvr"] = {"Marau", 6755069, "poz-hce", Latn} + +m["mvs"] = {"Massep", 3502895, "paa-tkw"} + +m["mvt"] = {"Mpotovoro", 6928305, "poz-vnc", Latn} + +m["mvu"] = {"Marfa", 713633} + +m["mvv"] = {"Tagal Murut", 7675300, "poz-san", Latn} + +m["mvw"] = {"Machinga", 12952754, "bnt-rvm"} + +m["mvx"] = {"Meoswar", 6817777, "poz-hce", Latn} + +m["mvy"] = {"Indus Kohistani", 33399, "inc-dar"} + +m["mvz"] = {"Mesqan", 6821677, "sem-eth"} + +m["mwa"] = {"Mwatebu", 14916896, "poz-ocw", Latn} + +m["mwb"] = {"Juwal", 6319103, "qfa-tor", Latn} + +m["mwc"] = {"Are", 29277, "poz-ocw", Latn} + +m["mwe"] = {"Mwera", 6944725, "bnt-rvm", Latn} + +m["mwf"] = {"Murrinh-Patha", 2980398, "aus-dal", Latn} + +m["mwg"] = {"Aiklep", 3399652, "poz-ocw", Latn} + +m["mwh"] = {"Mouk-Aria", 3325498, "poz-ocw", Latn} + +m["mwi"] = {"Labo", 2157452, "poz-vnc", Latn} + +m["mwk"] = {"Kita Maninkakan", 3015523, "dmn-wmn"} + +m["mwl"] = {"Mirandese", 13330, "roa-ibe", Latn, ancestors = {"roa-ole"}} + +m["mwm"] = {"Sar", 56850, "csu-sar"} + +m["mwn"] = {"Nyamwanga", 6944666, "bnt-mwi"} + +m["mwo"] = {"Central Maewo", 3276435} + +m["mwp"] = {"Kala Lagaw Ya", 2591262, "aus-pam", Latn} + +m["mwq"] = {"Mün Chin", 331340, "tbq-kuk"} + +m["mwr"] = { + "Marwari", + 56312, + "inc-wes", + {"Deva", "Mahj"}, + ancestors = {"raj"}, + translit_module = "hi-translit" -- for now +} + +m["mws"] = {"Mwimbi-Muthambi", 15632357, "bnt-kka", Latn} + +m["mwt"] = {"Moken", 18648701, "poz"} + +m["mwu"] = {"Mittu", 6883573, "csu-bbk", Latn} + +m["mwv"] = {"Mentawai", 13365, "poz-nws", Latn} + +m["mww"] = {"White Hmong", 3138829, "hmn", {"Latn", "Hmng"}} + +m["mwz"] = {"Moingi", 11011905} + +m["mxa"] = {"Northwest Oaxaca Mixtec", 12953739, "omq-mxt", Latn} + +m["mxb"] = {"Tezoatlán Mixtec", 3317286, "omq-mxt", Latn} + +m["mxd"] = {"Modang", 6888037, "poz"} + +m["mxe"] = {"Mele-Fila", 3305008, "poz-pnp", Latn} + +m["mxf"] = {"Malgbe", 3502224} + +m["mxg"] = {"Mbangala", 6799612, "bnt-yak"} + +m["mxh"] = {"Mvuba", 6944591, "csu-mle", Latn} + +m["mxi"] = {"Mozarabic", 317044, "roa-ibe", {"Arab", "Hebr", "Latn"}} + +m["mxj"] = {"Miju", 56332, "sit-mdz", {"Latn", "Deva"}} + +m["mxk"] = {"Monumbo", 6906792, "qfa-tor"} + +m["mxl"] = {"Maxi Gbe", 35770, "alv-gbe"} + +m["mxm"] = {"Meramera", 6817936, "poz-ocw", Latn} + +m["mxn"] = {"Moi (Indonesia)", 11732459, "paa"} + +m["mxo"] = {"Mbowe", 10962309, "bnt-kav"} + +m["mxp"] = {"Tlahuitoltepec Mixe", 7810697} + +m["mxq"] = {"Juquila Mixe", 25559721} + +m["mxr"] = {"Murik (Malaysia)", 3328150, nil, Latn} + +m["mxs"] = {"Huitepec Mixtec", 12953729, "omq-mxt", Latn} + +m["mxt"] = {"Jamiltepec Mixtec", 12953730, "omq-mxt", Latn} + +m["mxu"] = {"Mada (Cameroon)", 3441206, "cdc-cbm", Latn} + +m["mxv"] = {"Metlatónoc Mixtec", 36363, "omq-mxt", Latn} + +m["mxw"] = {"Namo", 12952923} + +m["mxx"] = {"Mahou", 11004334, "dmn-mnk"} + +m["mxy"] = {"Southeastern Nochixtlán Mixtec", 7070684, "omq-mxt", Latn} + +m["mxz"] = {"Central Masela", 42575433, "poz-tim"} + +m["myb"] = {"Mbay", 3033565, "csu-sar"} + +m["myc"] = {"Mayeka", 11129517, "bnt-boa"} + +m["mye"] = {"Myene", 35832, "bnt-tso"} + +m["myf"] = {"Bambassi", 56540, "omv-mao"} + +m["myg"] = {"Manta", 35799, "nic-mom", Latn} + +m["myh"] = {"Makah", 3280640, "wak", Latn} + +m["myj"] = {"Mangayat", 35988, "nic-ser"} + +m["myk"] = {"Mamara Senoufo", 36187, "alv-sma"} + +m["myl"] = {"Moma", 6897018, "poz"} + +m["mym"] = {"Me'en", 3408516, "sdv"} + +m["myo"] = {"Anfillo", 34928, "omv-gon"} + +m["myp"] = {"Pirahã", 33825, "sai-mur", Latn} + +m["myr"] = {"Muniche", 3915654} + +m["mys"] = {"Mesmes", 3508617, "sem-eth"} + +m["myu"] = {"Mundurukú", 746723, "tup", Latn} + +m["myv"] = {"Erzya", 29952, "urj-mdv", Cyrl, translit_module = "myv-translit", override_translit = true} + +m["myw"] = {"Muyuw", 3502878, "poz-ocw", Latn} + +m["myx"] = {"Masaba", 12952814, "bnt-msl", Latn} + +m["myy"] = {"Macuna", 3275059, "sai-tuc", Latn} + +m["myz"] = {"Classical Mandaic", 25559314, "sem-ase", {"Mand"}} + +m["mza"] = {"Santa María Zacatepec Mixtec", 8063756, "omq-mxt", Latn} + +m["mzb"] = {"Tumzabt", 36149, "ber"} + +m["mzc"] = {"Madagascar Sign Language", 12715020, "sgn"} + +m["mzd"] = {"Malimba", 35806, "bnt-saw"} + +m["mze"] = {"Morawa", 6909384} + +m["mzg"] = {"Monastic Sign Language", 3217333, "sgn"} + +m["mzh"] = {"Wichí Lhamtés Güisnay", 7998197, "sai-wic", Latn} + +m["mzi"] = {"Ixcatlán Mazatec", 6101049, "omq-maz", Latn} + +m["mzj"] = {"Manya", 11006832, "dmn-mnk"} + +m["mzk"] = {"Nigeria Mambila", 11004163, "nic-mmb", Latn} + +m["mzl"] = {"Mazatlán Mixe", 25559728} + +m["mzm"] = {"Mumuye", 36021, "alv-mum", Latn} + +m["mzn"] = {"Mazanderani", 13356, "ira-msh", {"mzn-Arab"}} + +m["mzo"] = {"Matipuhy", 6787588, "sai-car", Latn} + +m["mzp"] = {"Movima", 1659701, "qfa-iso"} + +m["mzq"] = {"Mori Atas", 3324070, "poz-btk"} + +m["mzr"] = {"Marúbo", 3296011, "sai-pan", Latn} + +m["mzs"] = {"Macanese", 35785, "crp", Latn, ancestors = {"pt"}} + +m["mzt"] = {"Mintil", 6869641, "mkh-asl"} + +m["mzu"] = {"Inapang", 6013569, "paa", Latn} + +m["mzv"] = {"Manza", 36038, "alv-gbf"} + +m["mzw"] = {"Deg", 35183, "nic-gnw", Latn} + +m["mzx"] = {"Mawayana", 6794377, "awd"} + +m["mzy"] = {"Mozambican Sign Language", 6927809, "sgn"} + +m["mzz"] = {"Maiadomu", 6735234, "poz-ocw", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/n.lua b/wiktra/wikt/translit/languages/data3/n.lua new file mode 100644 index 0000000..4ce6bac --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/n.lua @@ -0,0 +1,985 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Deva = {"Deva"} +local Latn = {"Latn"} + +local m = {} + +m["naa"] = {"Namla", 3508760, "paa-pau", Latn} + +m["nab"] = {"Nambikwara", 2068190, "sai-nmk", Latn} + +m["nac"] = {"Narak", 6965295, "ngf", Latn} + +m["nae"] = {"Naka'ela", 6960073, "poz", Latn} + +m["naf"] = {"Nabak", 11732491, "ngf", Latn} + +m["nag"] = {"Naga Pidgin", 3503454, "crp", Latn, ancestors = {"as"}} + +m["nah"] = {"Nahuatl", 13300, "azc-nah", Latn} + +m["naj"] = {"Nalu", 36026, "alv-nal", Latn} + +m["nak"] = {"Nakanai", 6528669, "poz-ocw", Latn} + +m["nal"] = {"Nalik", 3335387, "poz-ocw", Latn} + +m["nam"] = {"Ngan'gityemerri", 3298041, "aus-dal", Latn} + +m["nan"] = {"Min Nan", 36495, "zhx-min-hai", {"Hani", "Latn"}, wikimedia_codes = {"zh-min-nan"}} + +m["nao"] = {"Naaba", 11883865, "sit", ancestors = {"xct"}} + +m["nap"] = {"Neapolitan", 33845, "roa-itd", Latn} + +m["naq"] = {"Khoekhoe", 13301, "khi-khk", Latn} + +m["nar"] = {"Iguta", 5621686, "nic-jer", Latn} + +m["nas"] = {"Nasioi", 56772, "paa-sbo", Latn} + +m["nat"] = {"Hungworo", 3914395, "nic-kmk", Latn} + +m["naw"] = {"Nawuri", 35906, "alv-gng", Latn} + +m["nax"] = {"Nakwi", 3504178, "qfa-mal", Latn} + +m["nay"] = {"Ngarrindjeri", 7022091, "aus-pam", Latn} + +m["naz"] = {"Coatepec Nahuatl", 5138605, "azc-nah", Latn} + +m["nba"] = {"Nyemba", 3346655, "bnt-clu", Latn, ancestors = {"lch"}} + +m["nbb"] = {"Ndoe", 36134, "nic-eko", Latn} + +m["nbc"] = {"Chang", 5071694, "sit-kch", Latn} + +m["nbd"] = {"Ngbinda", 11132859, "bnt-boa", Latn} + +m["nbe"] = {"Konyak Naga", 6430448, "sit-kch", Latn} + +m["nbg"] = {"Nagarchal", 13299, "dra", ancestors = {"gon"}} + +m["nbh"] = {"Ngamo", 3438705, "cdc-wst", Latn} + +m["nbi"] = {"Mao Naga", 12952905, "tbq-anp", Latn} + +m["nbj"] = {"Ngarinman", 10600380, nil, Latn} + +m["nbk"] = {"Nake", 11732496, "ngf-mad", Latn} + +m["nbm"] = {"Ngbaka Ma'bo", 3915331, "nic-nkm", Latn} + +m["nbn"] = {"Kuri", 3200540, "poz", Latn} + +m["nbo"] = {"Nkukoli", 3914482, "nic-uce", Latn} + +m["nbp"] = {"Nnam", 36138, "nic-eko", Latn} + +m["nbq"] = {"Nggem", 12952956, "ngf", Latn} + +m["nbr"] = {"Numana", 5529310, "nic-nin", Latn} + +m["nbs"] = { + "Namibian Sign Language", 6961792, "sgn", Latn -- when documented +} + +m["nbt"] = {"Na", 12952895, "sit-tan", {"Deva", "Latn"}} + +m["nbu"] = {"Rongmei Naga", 12952912, "sit-zem", Latn} + +m["nbv"] = {"Ngamambo", 11129694, "nic-mom", Latn} + +m["nbw"] = {"Southern Ngbandi", 17522635, "nic-ngd", Latn} + +m["nby"] = {"Ningera", 11732524, "paa-brd", Latn} + +m["nca"] = {"Iyo", 6101336, "ngf-fin", Latn} + +m["ncb"] = {"Central Nicobarese", 3335553, "aav-nic", {"Deva", "Latn"}} + +m["ncc"] = {"Ponam", 3396122, "poz-aay", Latn} + +m["ncd"] = {"Nachering", 6957144, "sit-kic", Deva} + +m["nce"] = { + "Yale", 2992915, "paa", -- kwomtari or isolate + Latn +} + +m["ncf"] = {"Notsi", 3344784, "poz-ocw", Latn} + +m["ncg"] = {"Nisga'a", 3342138, "nai-tsi", Latn} + +m["nch"] = {"Central Huasteca Nahuatl", 2194290, "azc-nah", Latn} + +m["nci"] = {"Classical Nahuatl", 559242, "azc-nah", Latn, entry_name = {from = {"Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Ȳ", "ȳ"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "Y", "y"}}} + +m["ncj"] = {"Northern Puebla Nahuatl", 15705671, "azc-nah", Latn} + +m["nck"] = {"Nakara", 6960662, "aus-arn", Latn} + +m["ncl"] = {"Michoacán Nahuatl", 2896217, "azc-nah", Latn} + +m["ncm"] = {"Nambo", 42173731, nil, Latn} + +m["ncn"] = {"Nauna", 3337158, "poz-aay", Latn} + +m["nco"] = {"Sibe", 56806, "paa-sbo", Latn} + +m["ncr"] = {"Ncane", 11297920, "nic-bbe", Latn, ancestors = {"nhu"}} + +m["ncs"] = {"Nicaraguan Sign Language", 33765, "sgn", {"Sgnw"}} + +m["nct"] = {"Chothe Naga", 5105385, "tbq-kuk", {"Beng", "Latn"}} + +m["ncu"] = {"Chumburung", 35198, "alv-gng", Latn} + +m["ncx"] = {"Central Puebla Nahuatl", 5061727, "azc-nah", Latn} + +m["ncz"] = {"Natchez", 3111838, nil, Latn} + +m["nda"] = {"Ndasa", 35904, "bnt-kel", Latn} + +m["ndb"] = {"Kenswei Nsei", 7067553, "nic-rnn", Latn} + +m["ndc"] = {"Ndau", 13311, "bnt-sho", Latn} + +m["ndd"] = {"Nde-Nsele-Nta", 36131, "nic-eko", Latn} + +m["ndf"] = {"Nadruvian", 6957967, nil, Latn} + +m["ndg"] = {"Ndengereko", 6983726, "bnt-mbi", Latn} + +m["ndh"] = {"Ndali", 6983678, "bnt-run", Latn} + +m["ndi"] = {"Chamba Leko", 36381, "alv-lek", Latn} + +m["ndj"] = {"Ndamba", 6983684, "bnt-kil", Latn} + +m["ndk"] = {"Ndaka", 11164947, "bnt-nya", Latn} + +m["ndl"] = {"Ndolo", 6983788, "bnt-zbi", Latn, ancestors = {"lse"}} + +m["ndm"] = {"Ndam", 56283, "cdc-est", Latn} + +m["ndn"] = {"Ngundi", 35916, "bnt-ngn", Latn} + +m["ndp"] = {"Ndo", 6983774, "csu-mle", Latn} + +m["ndq"] = {"Ndombe", 6983792, "bnt-swb", Latn} + +m["ndr"] = {"Ndoola", 35837, "nic-mmb", Latn} + +m["nds"] = {"Low German", 25433, "gmw", Latn, ancestors = {"gml"}} + +m["ndt"] = {"Ndunga", 6983857, "nic-mbc", Latn} + +m["ndu"] = {"Dugun", 11015189, "alv-dur", Latn} + +m["ndv"] = {"Ndut", 36028, "alv-cng", Latn} + +m["ndw"] = {"Ndobo", 11008568, "bnt-ngn", Latn} + +m["ndx"] = {"Nduga", 6983833, nil, Latn} + +m["ndy"] = {"Lutos", 6705910, "csu-val", Latn} + +m["ndz"] = {"Ndogo", 35983, "nic-ser", Latn} + +m["nea"] = {"Eastern Ngad'a", 12473454, "poz-cet"} + +m["neb"] = {"Toura", 7853636, "dmn-mda", Latn} + +m["nec"] = {"Nedebang", 4925378, "ngf"} + +m["ned"] = {"Nde-Gbite", 11010279, "nic-grf"} + +m["nee"] = {"Kumak", 3347266, "poz-cln", Latn} + +m["nef"] = {"Nefamese", 6987002, "crp"} + +m["neg"] = {"Negidal", 33676, "tuw", {"Cyrl"}} + +m["neh"] = {"Nyenkha", 3695185, "sit-ebo", {"Tibt", "Latn"}} + +m["nej"] = {"Neko", 6989840, "ngf-fin", Latn} + +m["nek"] = {"Neku", 14916900, "poz-cln"} + +m["nem"] = {"Nemi", 3338008, "poz-cln", Latn} + +m["nen"] = {"Nengone", 3338052, "poz-occ", Latn} + +m["neo"] = {"Ná-Meo", 15977293, "hmn"} + +m["neq"] = {"North Central Mixe", 25559729, nil, Latn} + +m["ner"] = {"Yahadian", 8046778, nil, Latn} + +m["nes"] = {"Bhoti Kinnauri", 21179921, "sit-las"} + +m["net"] = {"Nete", 6998869, "paa-eng"} + +m["neu"] = {"Neo", 606917, "art", Latn, type = "appendix-constructed"} + +m["nev"] = {"Nyaheun", 7070801, "mkh-ban"} + +m["new"] = {"Newar", 33979, "sit-new", {"Deva", "Newa"}, ancestors = {"nwx"}, translit_module = "translit-redirect"} + +m["nex"] = {"Neme", 12952941} + +m["ney"] = {"Neyo", 36410, "kro"} + +m["nez"] = {"Nez Perce", 3339226, "nai-shp", Latn} + +m["nfa"] = {"Dhao", 2053828, "poz"} + +m["nfd"] = {"Ahwai", 3913957, "nic-plt", Latn} + +m["nfl"] = {"Aiwoo", 56742, "poz-oce", Latn} + +m["nfr"] = {"Nafaanra", 13297, "alv-snf", Latn} + +m["nfu"] = {"Mfumte", 6826794, "nic-nka", Latn} + +m["nga"] = {"Ngbaka", 36022, "alv-gbf", Latn} + +m["ngb"] = {"Northern Ngbandi", 17522631, "nic-ngd", Latn} + +m["ngc"] = {"Ngombe (Congo)", 3123524, "bnt-bun"} + +m["ngd"] = {"Ngando (Central African Republic)", 35910, "bnt-ngn"} + +m["nge"] = {"Ngemba", 6750551, "nic-nge", Latn} + +m["ngg"] = { -- compare 'aiy' + "Ngbaka Manza", 11033316, "alv-gbf", Latn +} + +m["ngh"] = {"Nǀuu", 2618974, "khi-tuu", Latn} + +m["ngi"] = {"Ngizim", 3914924, "cdc-wst", Latn} + +m["ngj"] = {"Ngie", 36361, "nic-mom", Latn} + +m["ngk"] = {"Ngalkbun", 3913790, "aus-gun", Latn} + +m["ngl"] = {"Lomwe", 35824, "bnt-mak", Latn} + +m["ngm"] = {"Ngatik Men's Creole", 36400, "crp", ancestors = {"en", "pon"}} + +m["ngn"] = {"Ngwo", 36051, "nic-mom", Latn} + +m["ngo"] = {"Ngoni", 7022547, "bnt-ngu", Latn} + +m["ngp"] = {"Ngulu", 7193332, "bnt-seu", Latn} + +m["ngq"] = {"Ngoreme", 7022573, "bnt-lok", Latn} + +m["ngr"] = {"Nagu", 3063524, "poz-oce", Latn} + +m["ngs"] = {"Gvoko", 3441188, "cdc-cbm", Latn} + +m["ngt"] = {"Ngeq", 25559548, "mkh-kat"} + +m["ngu"] = {"Guerrero Nahuatl", 5614980, "azc-nah", Latn} + +m["ngv"] = {"Nagumi", 35842, "nic-jrn"} + +m["ngw"] = {"Ngwaba", 3440480, "cdc-cbm", Latn} + +m["ngx"] = {"Nggwahyi", 56265, "cdc-cbm", Latn} + +m["ngy"] = {"Tibea", 36598, "bnt-baf", Latn} + +m["ngz"] = {"Ngungwel", 35920, "bnt-tkc", Latn} + +m["nha"] = {"Nhanda", 3339380, "aus-psw", Latn} + +m["nhb"] = {"Beng", 3913311, "dmn-nbe", Latn} + +m["nhc"] = {"Tabasco Nahuatl", 6047326, "azc-nah", Latn} + +m["nhd"] = {"Chiripá", 2873230, "tup-gua", ancestors = {"gn"}} + +m["nhe"] = {"Eastern Huasteca Nahuatl", 4358289, "azc-nah", Latn} + +m["nhf"] = {"Nhuwala", 10600396, "aus-nga", Latn} + +m["nhg"] = {"Tetelcingo Nahuatl", 3450252, "azc-nah", Latn} + +m["nhh"] = {"Nahari", 6583560, "inc-eas", ancestors = {"inc-mgd"}} + +m["nhi"] = {"Zacatlán-Ahuacatlán-Tepetzintla Nahuatl", 12953764, "azc-nah", Latn} + +m["nhk"] = {"Cosoleacaque Nahuatl", 12953757, "azc-nah", Latn} + +m["nhm"] = {"Morelos Nahuatl", 4800819, "azc-nah", Latn} + +m["nhn"] = {"Central Nahuatl", 6047309, "azc-nah", Latn} + +m["nho"] = {"Takuu", 3409818, "poz-pnp", Latn} + +m["nhp"] = {"Pajapan Nahuatl", 12953760, "azc-nah", Latn} + +m["nhq"] = {"Huaxcaleca Nahuatl", 12953758, "azc-nah", Latn} + +m["nhr"] = {"Naro", 2164778, "khi-kal", Latn} + +m["nht"] = {"Ometepec Nahuatl", 7090132, "azc-nah", Latn} + +m["nhu"] = {"Noone", 36072, "nic-bbe", Latn} + +m["nhv"] = {"Temascaltepec Nahuatl", 2379405, "azc-nah", Latn} + +m["nhw"] = {"Western Huasteca Nahuatl", 2678840, "azc-nah", Latn} + +m["nhx"] = {"Mecayapan Nahuatl", 12953756, "azc-nah", Latn} + +m["nhy"] = {"Northern Oaxaca Nahuatl", 12953763, "azc-nah", Latn} + +m["nhz"] = {"Santa María La Alta Nahuatl", 15705753, "azc-nah", Latn} + +m["nia"] = {"Nias", 2407831, "poz-nws", Latn} + +m["nib"] = {"Nakame", 11732495, "ngf-fin", Latn} + +m["nid"] = {"Ngandi", 7021977, "aus-arn", Latn} + +m["nie"] = {"Niellim", 33662, "alv-bua"} + +m["nif"] = {"Nek", 6989781, "ngf-fin", Latn} + +m["nig"] = {"Ngalakan", 3913796, "aus-gun", Latn} + +m["nih"] = {"Nyiha", 11128374, "bnt-mby", Latn} + +m["nii"] = {"Nii", 35237, "ngf", Latn} + +m["nij"] = {"Ngaju", 2992872, "poz-brw", Latn} + +m["nik"] = {"Southern Nicobarese", 7570194, "aav-nic"} + +m["nil"] = {"Nila", 7036821} + +m["nim"] = {"Nilamba", 4121200, "bnt-tkm", Latn} + +m["nin"] = {"Ninzo", 3914021, "nic-nin"} + +m["nio"] = {"Nganasan", 36743, "syd", {"Cyrl"}} + +m["niq"] = {"Nandi", 6956591, "sdv-nma"} + +m["nir"] = {"Nimboran", 301116, "paa-nim"} + +m["nis"] = {"Nimi", 11732523, "ngf-fin", Latn} + +m["nit"] = {"Southeastern Kolami", 56767, "dra"} + +m["niu"] = {"Niuean", 33790, "poz-pol", Latn} + +m["niv"] = {"Nivkh", 36464, "qfa-iso", {"Cyrl"}, translit_module = "niv-translit"} + +m["niw"] = {"Nimo", 3504126, "paa-asa"} + +m["nix"] = {"Hema", 5710904, "bnt-nyg", Latn} + +m["niy"] = {"Ngiti", 7022396, "csu-lnd"} + +m["niz"] = {"Ningil", 11732527, "qfa-tor"} + +m["nja"] = {"Nzanyi", 3441299, "cdc-cbm", Latn} + +m["njb"] = {"Nocte Naga", 7046410, "sit-tno"} + +m["njh"] = {"Lotha Naga", 33590, "sit-aao"} + +m["nji"] = {"Gudanji", 3915692, "aus-mir"} + +m["njj"] = {"Njen", 36112, "nic-mom", Latn} + +m["njl"] = {"Njalgulgule", 7071229, "sdv-daj"} + +m["njm"] = {"Angami", 56761, "tbq-anp"} + +m["njn"] = {"Liangmai Naga", 14194500, "sit-zem"} + +m["njo"] = {"Ao", 28433, "sit-aao", Latn} + +m["njr"] = {"Njerep", 35844, "nic-mmb", Latn} + +m["njs"] = {"Nisa", 13593518, "paa-egb"} + +m["njt"] = {"Ndyuka-Trio Pidgin", 13591205, "crp", ancestors = {"djk", "tri"}} + +m["nju"] = {"Ngadjunmaya", 7021846} + +m["njx"] = {"Kunyi", 3196559, "bnt-kng", Latn} + +m["njy"] = {"Njyem", 35898, "bnt-ndb", Latn} + +m["njz"] = {"Nyishi", 56870, "sit-tan", Latn} + +m["nka"] = {"Nkoya", 7042633, "bnt-lbn", Latn} + +m["nkb"] = {"Khoibu Naga", 21481876, "sit-mar"} + +m["nkc"] = {"Nkongho", 35863, "bnt-saw", Latn} + +m["nkd"] = {"Koireng", 6426342, "sit-zem"} + +m["nke"] = {"Duke", 3041075, "poz-ocw"} + +m["nkf"] = {"Inpui Naga", 21481817, "sit-zem"} + +m["nkg"] = {"Nekgini", 11732509, "ngf-fin", Latn} + +m["nkh"] = {"Khezha Naga", 6401519, "tbq-anp"} + +m["nki"] = {"Thangal Naga", 56374, "sit-zem"} + +m["nkj"] = {"Nakai", 14916897, "ngf-okk", Latn} + +m["nkk"] = {"Nokuku", 7048122, "poz-vnc"} + +m["nkm"] = {"Namat", 15634505} + +m["nkn"] = {"Nkangala", 10962292, "bnt-clu", Latn, ancestors = {"mck"}} + +m["nko"] = {"Nkonya", 35867, "alv-gng", Latn} + +m["nkp"] = {"Niuatoputapu", 3399095, "poz-pnp"} + +m["nkq"] = {"Nkami", 7042522, "alv-gng", Latn} + +m["nkr"] = {"Nukuoro", 2635961, "poz-pnp", Latn} + +m["nks"] = {"North Asmat", 11732049} + +m["nkt"] = {"Nyika", 16917497, "bnt-mwi", Latn} + +m["nku"] = {"Bouna Kulango", 20668241, "alv-kul"} + +-- nkv is treated as nkt, see WT:LT + +m["nkw"] = {"Nkutu", 7193313, "bnt-tet", Latn} + +m["nkx"] = {"Nkoroo", 36000, "ijo"} + +m["nkz"] = {"Nkari", 11130307, "nic-ief", ancestors = {"ibr"}} + +m["nla"] = {"Ngombale", 36292, "bai", Latn} + +m["nlc"] = {"Nalca", 6960839, "ngf", Latn} + +m["nle"] = {"East Nyala", 25559347, "bnt-msl", Latn, ancestors = {"luy"}} + +m["nlg"] = {"Gela", 3063531, "poz-sls", Latn} + +m["nli"] = {"Grangali", 3444203, "inc-dar"} + +m["nlj"] = {"Nyali", 7070830, "bnt-nya", Latn} + +m["nlk"] = {"Ninia Yali", 12953310} + +m["nll"] = {"Nihali", 33904, "qfa-iso", {"Deva", "Latn"}} + +m["nlm"] = {"Mankiyali", 47522426, "inc-dar"} + +m["nlo"] = {"Ngul", 35894, "bnt-bdz", Latn} + +m["nlq"] = {"Lao Naga", 63283609, "sit-tno"} + +m["nlu"] = {"Nchumbulu", 36143, "alv-gng", Latn} + +m["nlv"] = {"Orizaba Nahuatl", 3086050, "azc-nah", Latn} + +m["nlw"] = {"Walangama", 7961277} + +m["nlx"] = {"Nahali", 33361, "inc-bhi"} + +m["nly"] = {"Nyamal", 7070837, "aus-nga", Latn} + +m["nlz"] = {"Nalögo", 20527138, "poz-oce"} + +m["nma"] = {"Maram Naga", 56378, "sit-zem"} + +m["nmb"] = {"Big Nambas", 2902304, "poz-vnc"} + +m["nmc"] = {"Ngam", 3915446, "csu-sar", Latn} + +m["nmd"] = {"Ndumu", 35901, "bnt-mbt", Latn} + +m["nme"] = {"Mzieme Naga", 6949473, "sit-zem"} + +m["nmf"] = {"Tangkhul Naga", 7682992, "sit-tng"} + +m["nmg"] = {"Kwasio", 34098, "bnt-mnj", Latn} + +m["nmh"] = {"Monsang Naga", 6902496} + +m["nmi"] = {"Nyam", 3438738, "cdc-wst", Latn} + +m["nmj"] = {"Ngombe (Central African Republic)", 3913949, "alv-gbs"} + +m["nmk"] = {"Namakura", 3335410, "poz-vnc"} + +m["nml"] = {"Ndemli", 36089, "nic-grf", Latn} + +m["nmm"] = {"Manangba", 6746900, "sit-tam", {"Tibt", "Deva"}} + +m["nmn"] = {"ǃXóõ", 13229, "khi-tuu", Latn} + +m["nmo"] = {"Moyon Naga", 6927748, "tbq-kuk"} + +m["nmp"] = {"Nimanbur", nil} + +m["nmq"] = {"Nambya", 11008869, "bnt-sho", Latn} + +m["nmr"] = {"Nimbari", 36069, "alv-lni"} + +m["nms"] = {"Letemboi", 3236886, "poz-vnc"} + +m["nmt"] = {"Namonuito", 12908815, "poz-mic"} + +m["nmu"] = {"Northeast Maidu", 3278074, "nai-mdu", Latn} + +m["nmv"] = {"Ngamini", 7021944, "aus-kar", Latn} + +m["nmw"] = {"Nimoa", 7037729, "poz-ocw"} + +m["nmy"] = {"Namuyi", 56844, "sit-nax"} + +m["nmz"] = {"Nawdm", 36085, "nic-yon", Latn} + +m["nna"] = {"Nyangumarta", 33653} + +m["nnb"] = {"Nande", 3196953, "bnt-glb", Latn} + +m["nnc"] = {"Nancere", 3140491, "cdc-est", Latn} + +m["nnd"] = {"West Ambae", 2841479, "poz-vnc", Latn} + +m["nne"] = {"Ngandyera", 10961003, "bnt-ova", Latn} + +m["nnf"] = {"Ngaing", 11732510, "ngf-fin", Latn} + +m["nng"] = {"Maring Naga", 12952908, "sit-mar"} + +m["nnh"] = {"Ngiemboon", 36286, "bai", Latn} + +m["nni"] = {"North Nuaulu", 12952968, "poz-cma"} + +m["nnj"] = {"Nyangatom", 4662604, "sdv-ttu"} + +m["nnk"] = {"Nankina", 11732502, "ngf-fin", Latn} + +m["nnl"] = {"Northern Rengma Naga", 7067615, "tbq-anp"} + +m["nnm"] = {"Namia", 56363, "paa-spk", Latn} + +m["nnn"] = {"Ngete", 56625, "cdc-mas", Latn} + +m["nnp"] = {"Wancho", 7967085, "sit-kch"} + +m["nnq"] = {"Ngindo", 7022366, "bnt-mbi", Latn} + +m["nnr"] = {"Narungga", 13591127, "aus-pam"} + +m["nnt"] = {"Nanticoke", 3915517, "alg-eas", Latn} + +m["nnu"] = {"Dwang", 35258, "alv-gng", Latn} + +m["nnv"] = {"Nukunu", 10604066} + +m["nnw"] = {"Southern Nuni", 11152248, "nic-gnn", Latn} + +m["nnx"] = {"Ngong", 12952915} + +m["nny"] = { -- contrast aus-ynk + "Nyangga", 10604331, "aus-tnk", Latn +} + +m["nnz"] = {"Nda'nda'", 36016, "bai", Latn} + +m["noa"] = {"Woun Meu", 3111873, "sai-chc", Latn} + +m["noc"] = {"Nuk", 11732534, "ngf-fin", Latn} + +m["nod"] = {"Northern Thai", 565110, "tai-swe", {"Lana", "Thai"}, sort_key = {from = {"[%pᪧๆ]", "[᩠ᩳ-᩿]", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", "[็-๎]", "([เแโใไ])([ก-ฮ])", u(0x200C)}, to = {"", "", "ᩈᩈ", "ᩁ", "ᩃ", "ᨦ", "%1ᨮ", "%1ᨻ", "ᩣ", "", "%2%1"}}, entry_name = {from = {u(0x200C)}, to = {}}} + +m["noe"] = {"Nimadi", 3502294, "inc-wes", Deva, ancestors = {"raj"}} + +m["nof"] = {"Nomane", 11732531} + +m["nog"] = {"Nogai", 33871, "trk-kno", {"Cyrl", "Arab", "Latn"}, translit_module = "nog-translit", override_translit = true} + +m["noh"] = {"Nomu", 11732532} + +m["noi"] = {"Noiri", 12953774, "inc-bhi"} + +m["noj"] = {"Nonuya", 5372139, "sai-wit", Latn} + +m["nok"] = {"Nooksack", 3343396} + +m["nol"] = {"Nomlaki", 3343229, "nai-wtq", Latn} + +m["nom"] = {"Nocamán", 7046289, "sai-pan", Latn} + +m["non"] = {"Old Norse", 35505, "gmq", {"Latn", "Runr"}, translit_module = "translit-redirect"} + +m["nop"] = {"Numanggang", 7069052, "ngf-fin", Latn} + +m["noq"] = {"Ngongo", 11057478, "bnt-yak", Latn} + +m["nos"] = {"Eastern Nisu", 25559419, "tbq-lol"} + +m["not"] = {"Nomatsiguenga", 3342992, "awd", Latn} + +m["nou"] = {"Ewage-Notu", 5418860} + +m["nov"] = {"Novial", 36738, "art", Latn, type = "appendix-constructed"} + +m["now"] = {"Nyambo", 4967930, "bnt-haj", Latn} + +m["noy"] = {"Noy", 36321, "alv-bua"} + +m["noz"] = {"Nayi", 3183349, "omv-diz"} + +m["npa"] = {"Nar Phu", 4926353, "sit-tam"} + +m["npb"] = {"Nupbikha", 3695201, "sit-ebo"} + +m["npg"] = {"Ponyo", 7228475, "sit-kch"} + +m["nph"] = {"Phom", 7187109, "sit-kch"} + +m["npl"] = {"Southeastern Puebla Nahuatl", 4632950, "azc-nah", Latn} + +m["npn"] = {"Mondropolon", 3320594, "poz-aay"} + +m["npo"] = {"Pochuri Naga", 7206342, "tbq-anp"} + +m["nps"] = {"Nipsan", 11732528} + +m["npu"] = {"Puimei Naga", 7259044, "sit-zem"} + +m["npy"] = {"Napu", 12953768} + +m["nqg"] = {"Ede Nago", 12952408, "alv-ede"} + +m["nqk"] = {"Kura Ede Nago", 12952409, "alv-ede"} + +m["nql"] = {"Ngendelengo", 63283693, "bnt-swb", Latn} + +m["nqm"] = {"Ndom", 6983791, "ngf", Latn} + +m["nqn"] = {"Nen", 20816352, "paa-yam"} + +m["nqo"] = {"N'Ko", 18546266, "dmn-man", {"Nkoo"}} + +m["nqq"] = {"Kyan-Karyaw Naga", 63283784, "sit-tno"} + +m["nqy"] = {"Akyaung Ari", 4702035, "sit-tng"} + +m["nra"] = {"Ngom", 36087, "bnt-kel", Latn} + +m["nrb"] = {"Nara", 36179, "sdv-nes"} + +m["nrc"] = {"Noric", 37023, "cel", {"Ital"}} + +m["nre"] = {"Southern Rengma Naga", 7313205, "tbq-anp"} + +m["nrf"] = {"Norman", 33850, "roa-oil", Latn, ancestors = {"frm"}, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ", "œ", "'"}, to = {"a", "e", "i", "o", "u", "y", "c", "ae", "oe"}}, wikimedia_codes = {"nrm"}} + +m["nrg"] = {"Narango", 12952929, "poz-vnc"} + +m["nri"] = {"Chokri Naga", 5104247, "tbq-anp"} + +m["nrk"] = {"Ngarla", 3915860, "aus-nga", Latn} + +m["nrl"] = {"Ngarluma", 7022078, "aus-nga", Latn} + +m["nrm"] = {"Narom", 3336135, "poz-swa", Latn} + +m["nrn"] = {"Norn", 36708, "gmq", Latn, ancestors = {"non"}} + +m["nrp"] = {"North Picene", 430138, nil, {"Ital"}, translit_module = "Ital-translit"} + +m["nrr"] = {"Norra", 12952967, "tai"} + +m["nrt"] = {"Northern Kalapuya", 3192121, "nai-klp"} + +m["nru"] = {"Narua", 21658869, "sit-nax"} + +m["nrx"] = {"Ngurmbur", 2591251} + +m["nrz"] = {"Lala (New Guinea)", 6480151, "poz-ocw"} + +m["nsa"] = {"Sangtam Naga", 7418144, "sit-aao"} + +m["nsb"] = {"Lower Nossob", 6693681, "khi-tuu", Latn} + +m["nsc"] = {"Nshi", 11129508, "nic-rnn", Latn} + +m["nsd"] = {"Southern Nisu", nil, "tbq-lol"} + +m["nse"] = {"Nsenga", 3081996, "bnt-sna", Latn} + +m["nsg"] = {"Ngasa", 56345, "sdv-lma"} + +m["nsh"] = {"Ngoshie", 7022582, "nic-mom", Latn} + +m["nsi"] = {"Nigerian Sign Language", 7033021, "sgn"} + +m["nsk"] = {"Naskapi", 1704302, "alg", {"Cans"}, ancestors = {"cr"}, translit_module = "nsk-translit"} + +m["nsl"] = {"Norwegian Sign Language", 1781613, "sgn"} + +m["nsm"] = {"Sema", 3478238, "tbq-anp"} + +m["nsn"] = {"Nehan", 3337774, "poz-ocw"} + +m["nso"] = {"Northern Sotho", 33890, "bnt-sts", Latn} + +m["nsp"] = {"Nepalese Sign Language", 3915492, "sgn"} + +m["nsq"] = {"Northern Sierra Miwok", 3344226, "nai-utn", Latn} + +m["nsr"] = {"Maritime Sign Language", 3915483, "sgn"} + +m["nss"] = {"Nali", 3335385, "poz-aay"} + +m["nst"] = {"Tangsa", 56350, "sit-tno"} + +m["nsu"] = {"Sierra Negra Nahuatl", nil, "azc-nah", Latn} + +m["nsv"] = {"Southwestern Nisu", nil, "tbq-lol"} + +m["nsw"] = {"Navut", 3337327, "poz-vnc"} + +m["nsx"] = {"Nsongo", 7067577, "bnt-tmb", Latn} + +m["nsy"] = {"Nasal", 6966574} + +m["nsz"] = {"Nisenan", 33665, "nai-mdu", Latn} + +m["ntd"] = {"Northern Tidong", nil, "poz-san"} + +m["nte"] = {"Nathembo", 11030947, "bnt-mak"} + +m["ntg"] = {"Ngantangarra", 33060509} + +m["nti"] = {"Natioro", 36140, "alv-wan"} + +m["ntj"] = {"Ngaanyatjarra", 3915409, "aus-pam", Latn} + +m["ntk"] = {"Ikoma", 5996114, "bnt-lok", Latn} + +m["ntm"] = {"Nateni", 3070731, "nic-grm", Latn} + +m["nto"] = {"Ntomba", 11130292, "bnt-mon", Latn} + +m["ntp"] = {"Northern Tepehuan", 15615651, "azc", Latn, sort_key = {from = {"á", "é", "í", "ó", "ú"}, to = {"a", "e", "i", "o", "u"}}} + +m["ntr"] = {"Delo", 35195, "nic-gne", Latn} + +m["nts"] = {"Natagaimas", 6967931} + +m["ntu"] = {"Natügu", nil, "poz-oce"} + +m["ntw"] = {"Nottoway", 3344791, "iro"} + +m["ntx"] = {"Somra", 7560536, "sit-tng"} + +m["nty"] = {"Mantsi", 56878, "sit-mnz"} + +m["nua"] = {"Yuaga", 3573088, "poz-cln", Latn} + +m["nuc"] = {"Nukuini", 3346231} + +m["nud"] = {"Ngala", 7021893, "paa-spk", Latn} + +m["nue"] = {"Ngundu", 12952953, "bad-cnt", Latn} + +m["nuf"] = {"Nusu", 56413, "tbq-lol"} + +m["nug"] = {"Nungali", 7069826, "aus-mir"} + +m["nuh"] = {"Ndunda", 3913968, "nic-mmb", Latn} + +m["nui"] = {"Ngumbi", 36459, "bnt-yko"} + +m["nuj"] = {"Nyole (Uganda)", 3739448, "bnt-msl", Latn} + +m["nuk"] = {"Nootka", 2992876, "wak", Latn} + +m["nul"] = {"Nusa Laut", 7070332, "poz-cma"} + +m["num"] = {"Niuafo'ou", 36173, "poz-pol", Latn} + +m["nun"] = {"Anong", 2748232, "sit-nng"} + +m["nuo"] = {"Nguôn", 3915785, "mkh-vie"} + +m["nup"] = {"Nupe", 36720, "alv-ngb", Latn} + +m["nuq"] = {"Nukumanu", 12909019, "poz-pnp"} + +m["nur"] = {"Nuguria", 7068910, "poz-pnp"} + +m["nus"] = {"Nuer", 33675, "sdv-dnu", Latn} + +m["nut"] = {"Nung", 72695, "tai"} + +m["nuu"] = {"Ngbundu", 11126081, "bad", Latn} + +m["nuv"] = {"Northern Nuni", 11016572, "nic-gnn", Latn} + +m["nuw"] = {"Nguluwan", 6528643} + +m["nux"] = {"Mehek", 6809452, "paa-spk", Latn} + +m["nuy"] = {"Nunggubuyu", 1747811, "aus-arn"} + +m["nuz"] = {"Tlamacazapa Nahuatl", 2073277, "azc-nah", Latn} + +m["nvh"] = {"Nasarian", 6966614, "poz-vnc"} + +m["nvm"] = {"Namiae", 12952922} + +m["nvo"] = {"Nyokon", nil, "nic-mbw", Latn} + +m["nwa"] = {"Nawathinehena", 6982892, "alg-ara", Latn} + +m["nwb"] = {"Nyabwa", 33664, "kro-wee"} + +m["nwc"] = {"Classical Newar", 5128301, "sit-new"} + +m["nwe"] = {"Ngwe", 36181, "bai", Latn} + +m["nwi"] = {"Southwest Tanna", 3504488} + +m["nwm"] = {"Nyamusa-Molo", 12747951, "csu-bbk"} + +m["nwo"] = {"Nauo", 6981305, "aus-pam", Latn} + +m["nwr"] = {"Nawaru", 12638166, "ngf"} + +m["nwx"] = {"Middle Newar", nil, "sit-new", ancestors = {"nwc"}} + +m["nwy"] = {"Nottoway-Meherrin", nil, "iro"} + +m["nxa"] = {"Nauete", 6981095, "poz-tim"} + +m["nxd"] = {"Ngando (Congo)", 3913277, "bnt-ske"} + +m["nxe"] = {"Nage", 2295569, "poz-cet"} + +m["nxg"] = {"Ngadha", 1516651, "poz-cet", Latn} + +m["nxi"] = {"Nindi", 7038230, "bnt-mbi", Latn} + +m["nxl"] = {"South Nuaulu", 18544857, "poz-cma"} + +m["nxm"] = { + "Numidian", 35761, "afa", {"Tfng", "Latn"} -- Tfng may not support all the needed characters +} + +m["nxn"] = {"Ngawun", 3915711, "aus-pam", Latn} + +m["nxo"] = {"Ndambomo", 6983681, "bnt-kel", Latn} + +m["nxq"] = {"Naxi", 2478711, "sit-nax"} + +m["nxr"] = {"Ninggerum", 11732526, "ngf-okk", Latn} + +m["nxu"] = {"Narau", 6965452, "ngf", Latn} + +m["nxx"] = {"Nafri", 6958211, "paa-sen", Latn} + +m["nyb"] = {"Nyangbo", 36256, "alv-ktg", Latn} + +m["nyc"] = {"Nyanga-li", 7070876, "bnt-boa", Latn} + +m["nyd"] = {"Nyole (Kenya)", 7071227, "bnt-msl", Latn} + +m["nye"] = {"Nyengo", 7071068, "bnt-clu", Latn} + +m["nyf"] = {"Giryama", 3107606, "bnt-mij", Latn} + +m["nyg"] = {"Nyindu", 11030685, "bnt-shh", Latn} + +m["nyh"] = {"Nyigina", 3913780, "aus-nyu", Latn} + +m["nyi"] = {"Nyimang", 34846, "sdv-nyi", Latn} + +m["nyj"] = {"Nyanga (Congo)", 7070879, "bnt-nyb", Latn} + +m["nyk"] = {"Nyaneka", 10962298, "bnt-swb", Latn} + +m["nyl"] = {"Nyeu", 3033578, "mkh-kat"} + +m["nym"] = {"Nyamwezi", 4121131, "bnt-tkm", Latn} + +m["nyn"] = {"Nyankole", 13207, "bnt-nyg", Latn} + +m["nyo"] = {"Nyoro", 33794, "bnt-nyg", Latn} + +m["nyp"] = {"Nyang'i", 7070894, "ssa-klk"} + +m["nys"] = {"Nyunga", 7049771, "aus-pam", Latn} + +m["nyt"] = {"Nyawaygi", 3915783, "aus-dyb"} + +m["nyu"] = {"Nyungwe", 7071318, "bnt-sna", Latn} + +m["nyv"] = {"Nyulnyul", 3442732, "aus-nyu", Latn} + +m["nyw"] = { + "Nyaw", 26425602, "tai", {"Thai", "Latn"} -- Vietnamese alphabet +} + +m["nyx"] = {"Nganyaywana", 3913800, "aus-cww", Latn} + +m["nyy"] = {"Nyakyusa", 3272620, "bnt-run", Latn} + +m["nza"] = {"Tigon Mbembe", 36518, "nic-jkn", Latn} + +m["nzb"] = {"Njebi", 35923, "bnt-nze", Latn} + +m["nzd"] = {"Nzadi", 17152586, "bnt-bdz", Latn, entry_name = {from = {"[ÀÂǍÁ]", "[àâǎá]", "[ÈÊĚÉ]", "[èêěé]", "[ÌÎǏÍ]", "[ìîǐí]", "[ÒÔǑÓ]", "[òôǒó]", "[ÙÛǓÚ]", "[ùûǔú]", "[ǹń]", "ḿ", "[`ˋ]", GRAVE, CIRC, CARON, ACUTE}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "n", "m"}}} + +m["nzi"] = {"Nzima", 36337, "alv-ctn"} + +m["nzk"] = {"Nzakara", 3913339, "znd", Latn} + +m["nzm"] = {"Zeme Naga", 21491053, "sit-zem"} + +m["nzs"] = {"New Zealand Sign Language", 36239, "sgn"} + +m["nzu"] = {"Central Teke", 36473, "bnt-tkc"} + +m["nzy"] = {"Nzakambay", 36374, "alv-mbm", Latn} + +m["nzz"] = {"Nanga Dama Dogon", 6963443, "nic-nwa"} + +return m diff --git a/wiktra/wikt/translit/languages/data3/o.lua b/wiktra/wikt/translit/languages/data3/o.lua new file mode 100644 index 0000000..23d60cc --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/o.lua @@ -0,0 +1,348 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["oaa"] = {"Orok", 33928, "tuw", Latn} + +m["oac"] = {"Oroch", 33650, "tuw", {"Latn", "Cyrl"}} + +m["oav"] = {"Old Avar", nil, "cau-ava", {"Geor"}} + +m["obi"] = {"Obispeño", 1288385, "nai-chu", Latn} + +m["obk"] = {"Southern Bontoc", nil, "phi", Latn} + +m["obl"] = {"Oblo", 36309} + +m["obm"] = {"Moabite", 36385, "sem-can", {"Phnx"}, translit_module = "Phnx-translit"} + +m["obo"] = {"Obo Manobo", 12953699, "mno", Latn} + +m["obr"] = { + "Old Burmese", 17006600, "tbq-brm", {"Mymr", "Latn"} -- and also Pallava +} + +m["obt"] = {"Old Breton", 3558112, "cel-bry", Latn} + +m["obu"] = {"Obulom", 3813403, "nic-cde", Latn} + +m["oca"] = {"Ocaina", 3182577, "sai-wit", Latn} + +m["och"] = {"Old Chinese", 35137, "zhx", {"Hani"}} + +m["oco"] = {"Old Cornish", 48304520, "cel-bry", Latn} + +m["ocu"] = {"Tlahuica", 10751739, "omq", Latn} + +m["oda"] = {"Odut", 3915388, "nic-uce", Latn, ancestors = {"mfn"}} + +m["odk"] = {"Od", 7077191, "inc-wes", {"Arab"}} + +m["odt"] = {"Old Dutch", 443089, "gmw", {"Latn", "Runr"}, ancestors = {"gmw-pro"}, entry_name = {from = {"[ĀÂ]", "[āâ]", "[ĒÊ]", "[ēê]", "[ĪÎ]", "[īî]", "[ŌÔ]", "[ōô]", "[ŪÛ]", "[ūû]"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}} + +m["odu"] = {"Odual", 3813392, "nic-cde", Latn} + +m["ofo"] = {"Ofo", 3349758, "sio-ohv"} + +m["ofs"] = {"Old Frisian", 35133, "gmw-fri", Latn, entry_name = {from = {"[ĀÂ]", "[āâ]", "[ĒÊ]", "[ēê]", "[ĪÎ]", "[īî]", "[ŌÔ]", "[ōô]", "[ŪÛ]", "[ūû]"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}} + +m["ofu"] = {"Efutop", 35297, "nic-eko", Latn} + +m["ogb"] = {"Ogbia", 3813400, "nic-cde", Latn} + +m["ogc"] = {"Ogbah", 36291, "alv-igb", Latn} + +m["oge"] = {"Old Georgian", 34834, "ccs-gzn", {"Geor"}, translit_module = "Geor-translit", override_translit = true, entry_name = {from = {"̂"}, to = {""}}} + +m["ogg"] = {"Ogbogolo", 3813405, "nic-cde", Latn} + +m["ogo"] = {"Khana", 3914409, "nic-ogo", Latn} + +m["ogu"] = {"Ogbronuagum", 3914485, "nic-cde", Latn} + +m["ohu"] = {"Old Hungarian", nil, "urj-ugr", Latn} + +m["oia"] = {"Oirata", 56738, "ngf", Latn} + +m["oin"] = {"Inebu One", 12953782, "qfa-tor"} + +m["ojb"] = {"Northwestern Ojibwa", 7060356, "alg", Latn, ancestors = {"oj"}} + +m["ojc"] = {"Central Ojibwa", 5061548, "alg", Latn, ancestors = {"oj"}} + +m["ojg"] = {"Eastern Ojibwa", 5330342, "alg", Latn, ancestors = {"oj"}} + +m["ojp"] = {"Old Japanese", 5736700, "jpx", {"Jpan"}} + +m["ojs"] = {"Severn Ojibwa", 56494, "alg", Latn, ancestors = {"oj"}} + +m["ojv"] = {"Ontong Java", 7095071, "poz-pnp", Latn} + +m["ojw"] = {"Western Ojibwa", 3474222, "alg", Latn, ancestors = {"oj"}} + +m["oka"] = {"Okanagan", 2984602, "sal", Latn} + +m["okb"] = {"Okobo", 3813398, "nic-lcr", Latn} + +m["okd"] = {"Okodia", 36300, "ijo"} + +m["oke"] = {"Okpe (Southwestern Edo)", 268924, "alv-swd", Latn} + +m["okg"] = {"Kok-Paponk", nil, "aus-pmn", Latn} + +m["okh"] = {"Koresh-e Rostam", 6432160, "xme-ttc", ancestors = {"xme-ttc-cen"}} + +m["oki"] = {"Okiek", 56367, "sdv-kln", Latn} + +m["okj"] = {"Oko-Juwoi", 3436832, "qfa-adc"} + +m["okk"] = {"Kwamtim One", 19830649, "qfa-tor", Latn} + +m["okl"] = {"Old Kentish Sign Language", 7084319, "sgn"} + +m["okm"] = {"Middle Korean", 715339, "qfa-kor", {"Kore"}, ancestors = {"oko"}, entry_name = {from = {"[〮〯]"}, to = {""}}} + +m["okn"] = {"Oki-No-Erabu", 3350036, "jpx-ryu", {"Jpan"}} + +m["oko"] = {"Old Korean", 715364, "qfa-kor", {"Kore"}} + +m["okr"] = {"Kirike", 11006763, "ijo"} + +m["oks"] = {"Oko-Eni-Osayen", 36302, "alv-von", Latn} + +m["oku"] = {"Oku", 36289, "nic-rnc", Latn} + +m["okv"] = {"Orokaiva", 7103752, "ngf", Latn} + +m["okx"] = {"Okpe (Northwestern Edo)", 7082547, "alv-nwd", Latn} + +m["old"] = {"Mochi", 12952852, "bnt-chg", Latn} + +m["ole"] = {"Olekha", 3695204, "sit-bdi"} + +m["olm"] = {"Oloma", 3441166, "alv-nwd", Latn} + +m["olo"] = {"Livvi", 36584, "fiu-fin", Latn} + +m["olr"] = {"Olrat", 3351562, "poz-vnc"} + +m["olt"] = {"Old Lithuanian", 17417801, "bat", Latn, entry_name = {from = {"[áãà]", "[éẽè]", "[íĩì]", "[ýỹ]", "ñ", "[óõò]", "[úù]", ACUTE, GRAVE, TILDE}, to = {"a", "e", "i", "y", "n", "o", "u"}}} + +m["olu"] = {"Kuvale", 6448765, "bnt-swb", Latn} + +m["oma"] = {"Omaha-Ponca", 2917968, "sio-dhe", Latn} + +m["omb"] = {"Omba", 2841471, "poz-vnc", Latn} + +m["omc"] = {"Mochica", 1951641} + +m["omg"] = {"Omagua", 33663, "tup-gua", Latn} + +m["omi"] = {"Omi", 56795, "csu-mma"} + +m["omk"] = {"Omok", 4334657, "qfa-yuk", {"Cyrl"}, translit_module = "omk-translit"} + +m["oml"] = {"Ombo", 7089928, "bnt-tet", Latn} + +m["omn"] = {"Minoan", 1669994, nil, {"Lina"}} + +m["omo"] = {"Utarmbung", 7902577, "ngf", Latn} + +m["omp"] = {"Old Manipuri", nil, "sit"} + +m["omr"] = {"Old Marathi", nil, "inc-sou", {"Deva", "Modi"}, ancestors = {"pmh"}, translit_module = "translit-redirect"} + +m["omt"] = {"Omotik", 36313, "sdv-nis"} + +m["omu"] = {"Omurano", 1957612} + +m["omw"] = {"South Tairora", 20210553, "paa-kag", Latn} + +m["omx"] = { + "Old Mon", nil, "mkh-mnc", {"Mymr", "Latn"} -- and also Pallava +} + +m["ona"] = {"Selk'nam", 2721227, "sai-cho", Latn} + +m["onb"] = {"Lingao", 7093790, "qfa-onb"} + +m["one"] = {"Oneida", 857858, "iro", Latn} + +m["ong"] = {"Olo", 592162, "qfa-tor", Latn} + +m["oni"] = {"Onin", 7093910, "poz-cet", Latn} + +m["onj"] = {"Onjob", 7093968, "ngf", Latn} + +m["onk"] = {"Kabore One", 12953783, "qfa-tor", Latn} + +m["onn"] = {"Onobasulu", 7094437, "ngf", Latn} + +m["ono"] = {"Onondaga", 1077450, "iro", Latn} + +m["onp"] = {"Sartang", 7424639, "sit-khb"} + +m["onr"] = {"Northern One", 19830648, "qfa-tor", Latn} + +m["ons"] = {"Ono", 11732548, "ngf", Latn} + +m["ont"] = {"Ontenu", 3352827} + +m["onu"] = {"Unua", 3552042, "poz-vnc", Latn} + +m["onw"] = {"Old Nubian", 2268, "nub", {"Copt"}} + +m["onx"] = {"Pidgin Onin", 12953788, "crp", Latn, ancestors = {"oni"}} + +m["ood"] = {"O'odham", 2393095, "azc", Latn} + +m["oog"] = {"Ong", 12953787, "mkh-kat"} + +m["oon"] = {"Önge", 2475551, "qfa-ong"} + +m["oor"] = {"Oorlams", 2484337} + +m["oos"] = {"Old Ossetic", nil, "xsc", {"Grek", "Latn"}, translit_module = "grc-translit", ancestors = {"os-pro"}} + +m["opa"] = {"Okpamheri", 3913331, "alv-nwd", Latn} + +m["opk"] = {"Kopkaka", 6431129, "ngf-okk", Latn} + +m["opm"] = {"Oksapmin", 1068097, "ngf", Latn} + +m["opo"] = {"Opao", 7095585, "ngf", Latn} + +m["opt"] = {"Opata", 2304583, "azc-trc", Latn} + +m["opy"] = {"Ofayé", 3446691, "sai-mje", Latn} + +m["ora"] = {"Oroha", 36298, "poz-sls"} + +m["ore"] = {"Orejón", 3355834, "sai-tuc", Latn} + +m["org"] = {"Oring", 3915308, "nic-ucn", Latn} + +m["orh"] = {"Oroqen", 1367309, "tuw", Latn} + +m["oro"] = {"Orokolo", 7103758, "ngf", Latn} + +m["orr"] = {"Oruma", 36299, "ijo", Latn} + +m["ort"] = {"Adivasi Oriya", 12953791, "inc-eas", {"Orya"}, ancestors = {"or"}} + +m["oru"] = {"Ormuri", 33740, "ira-orp", {"fa-Arab"}} + +m["orv"] = { + "Old East Slavic", + 35228, + "zle", + {"Cyrs"}, + translit_module = "Cyrs-Glag-translit", + entry_name = { + from = {u(0x0484)}, -- kamora + to = {} + }, + sort_key = {from = {"оу", "є"}, to = {"у", "е"}} +} + +m["orw"] = {"Oro Win", 3450423, "sai-cpc", Latn} + +m["orx"] = {"Oro", 3813396, "nic-lcr", Latn} + +m["orz"] = {"Ormu", 7103494, "poz-ocw", Latn} + +m["osa"] = {"Osage", 2600085, "sio-dhe", {"Latn", "Osge"}} + +m["osc"] = {"Oscan", 36653, "itc", {"Ital", "Latn"}, translit_module = "Ital-translit"} + +m["osi"] = {"Osing", 2701322, "poz-sus", Latn} + +m["oso"] = {"Ososo", 3913398, "alv-yek", Latn} + +m["osp"] = {"Old Spanish", 1088025, "roa-ibe", Latn} + +m["ost"] = {"Osatu", 36243, "nic-grs", Latn} + +m["osu"] = {"Southern One", 12953785, "qfa-tor", Latn} + +m["osx"] = {"Old Saxon", 35219, "gmw", Latn, entry_name = {from = {"[ĀÂ]", "[āâ]", "[ĒÊ]", "[ēê]", "[ĪÎ]", "[īî]", "[ŌÔ]", "[ōô]", "[ŪÛ]", "[ūû]"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}} + +m["ota"] = {"Ottoman Turkish", 36730, "trk-ogz", {"ota-Arab", "Armn"}, ancestors = {"trk-oat"}} + +m["otd"] = {"Ot Danum", 3033781, "poz-brw", Latn} + +m["ote"] = {"Mezquital Otomi", 23755711, "oto-otm", Latn} + +m["oti"] = {"Oti", 3357881} + +m["otk"] = {"Old Turkic", 34988, "trk", {"Orkh"}, translit_module = "Orkh-translit"} + +m["otl"] = {"Tilapa Otomi", 7802050, "oto-otm", Latn} + +m["otm"] = {"Eastern Highland Otomi", 13581718, "oto-otm", Latn} + +m["otn"] = {"Tenango Otomi", 25559589, "oto-otm", Latn} + +m["otq"] = {"Querétaro Otomi", 23755688, "oto-otm", Latn} + +m["otr"] = {"Otoro", 36328, "alv-hei"} + +m["ots"] = {"Estado de México Otomi", 7413841, "oto-otm", Latn} + +m["ott"] = {"Temoaya Otomi", 7698191, "oto-otm", Latn} + +m["otu"] = {"Otuke", 7110049, "sai-mje", Latn} + +m["otw"] = {"Ottawa", 133678, "alg", Latn, ancestors = {"oj"}} + +m["otx"] = {"Texcatepec Otomi", 25559590, "oto-otm", Latn} + +m["oty"] = {"Old Tamil", 20987452, "dra", {"Brah"}, translit_module = "Brah-translit"} + +m["otz"] = {"Ixtenco Otomi", 6101171, "oto-otm", Latn} + +m["oua"] = {"Tagargrent", 56586, "ber"} + +m["oub"] = {"Glio-Oubi", 3914977, "kro-grb"} + +m["oue"] = {"Oune", 7110521, "paa-sbo"} + +m["oui"] = {"Old Uyghur", nil, "trk-sib", {"Latn", "Brah", "Mani", "Syrc"}} + +m["oum"] = {"Ouma", 7110494, "poz-ocw", Latn} + +m["ovd"] = {"Elfdalian", 254950, "gmq", Latn, ancestors = {"non"}} + +m["owi"] = {"Owiniga", 56454, "qfa-mal", Latn} + +m["owl"] = {"Old Welsh", 2266723, "cel-bry", Latn} + +m["oyb"] = {"Oy", 13593748, "mkh-ban"} + +m["oyd"] = {"Oyda", 7116251, "omv-nom"} + +m["oym"] = {"Wayampi", 7975842, "tup-gua", Latn} + +m["oyy"] = {"Oya'oya", 7116243, "poz-ocw", Latn} + +m["ozm"] = {"Koonzime", 35566, "bnt-ndb", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/p.lua b/wiktra/wikt/translit/languages/data3/p.lua new file mode 100644 index 0000000..bff39f4 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/p.lua @@ -0,0 +1,718 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["pab"] = {"Pareci", 3504312, "awd", Latn} + +m["pac"] = {"Pacoh", 3441136, "mkh-kat", Latn} + +m["pad"] = {"Paumarí", 389827, "auf", Latn} + +m["pae"] = {"Pagibete", 7124357, "bnt-bta", Latn} + +m["paf"] = {"Paranawát", 12953806, "tup-gua", Latn} + +m["pag"] = {"Pangasinan", 33879, "phi", Latn} + +m["pah"] = {"Tenharim", 10266010, "tup-gua", Latn} + +m["pai"] = {"Pe", 3914871, "nic-tar", Latn} + +m["pak"] = {"Parakanã", 12953804, "tup-gua", Latn} + +m["pal"] = { + "Middle Persian", + 32063, + "ira-swi", + {"Latn", "Phli", "pal-Avst", "Mani", "Phlp", "Phlv"}, -- Latn for translit; Phlv not in Unicode + translit_module = "translit-redirect", + ancestors = {"peo"} +} + +m["pam"] = { + "Kapampangan", 36121, "phi", Latn -- also Kulitan, which lacks a code +} + +m["pao"] = {"Northern Paiute", 3360656, "azc-num", Latn} + +m["pap"] = {"Papiamentu", 33856, "crp", Latn, ancestors = {"pt"}} + +m["paq"] = {"Parya", 1135134, "inc-cen", ancestors = {"psu"}} + +m["par"] = {"Panamint", 33926, "azc-num", Latn} + +m["pas"] = {"Papasena", 7132508, "paa-lkp", Latn} + +m["pat"] = {"Papitalai", 6528659, "poz-aay", Latn} + +m["pau"] = {"Palauan", 33776, "poz-sus", Latn} + +m["pav"] = {"Wari'", 3027909, "sai-cpc", Latn} + +m["paw"] = {"Pawnee", 56751, "cdd", Latn} + +m["pax"] = {"Pankararé", 25559779, nil, Latn} + +m["pay"] = {"Pech", 4898889, "cba", Latn} + +m["paz"] = {"Pankararú", 7131310, nil, Latn} + +m["pbb"] = {"Páez", 33677, nil, Latn} + +m["pbc"] = {"Patamona", 3915921, "sai-car", Latn} + +m["pbe"] = {"Mezontla Popoloca", 42365630, "omq-pop", Latn} + +m["pbf"] = {"Coyotepec Popoloca", 5180100, "omq-pop", Latn} + +m["pbg"] = {"Paraujano", 3501747, "awd-taa", Latn, ancestors = {"awd-taa-pro"}} + +m["pbh"] = {"Panare", 56610, "sai-car", Latn} + +m["pbi"] = {"Podoko", 3515096, "cdc-cbm", Latn} + +m["pbl"] = {"Mak (Nigeria)", 3915349, "alv-bwj", Latn} + +m["pbm"] = {"Puebla Mazatec", nil, "omq-maz", Latn} + +m["pbn"] = {"Kpasam", 3914902, "alv-mye", Latn} + +m["pbo"] = {"Papel", 36314, "alv-pap", Latn} + +m["pbp"] = {"Badyara", 35095, "alv-ten", Latn} + +m["pbr"] = {"Pangwa", 3847550, "bnt-bki", Latn} + +m["pbs"] = {"Central Pame", 3361763, "omq", Latn} + +m["pbv"] = {"Pnar", 3501850, "aav-pkl", Latn} + +m["pby"] = {"Pyu", 2567925, "paa-asa", Latn} + +m["pca"] = {"Santa Inés Ahuatempan Popoloca", 42365276, "omq-pop", Latn} + +m["pcb"] = {"Pear", 6583669, "mkh-pea", {"Khmr"}} + +m["pcc"] = {"Bouyei", 35100, "tai-nor", {"Latn", "Hani"}, sort_key = {from = {"%p"}, to = {""}}} + +m["pcd"] = {"Picard", 34024, "roa-oil", Latn, ancestors = {"fro"}, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"}, to = {"a", "e", "i", "o", "u", "y", "c"}}} + +m["pce"] = {"Ruching Palaung", 12953798, "mkh-pal"} + +m["pcf"] = {"Paliyan", 7127643, "dra"} + +m["pcg"] = {"Paniya", 7131211, "dra"} + +m["pch"] = {"Pardhan", 7133207, "dra", ancestors = {"gon"}} + +m["pci"] = {"Duruwa", 56753, "dra", {"Deva", "Orya"}} + +m["pcj"] = {"Parenga", 3111396, "mun"} + +m["pck"] = {"Paite", 12952337, "tbq-kuk"} + +m["pcl"] = {"Pardhi", 7136554, "inc-bhi"} + +m["pcm"] = {"Nigerian Pidgin", 33655, "crp", Latn, ancestors = {"en"}} + +m["pcn"] = {"Piti", 3913375, "nic-kne", Latn} + +m["pcp"] = {"Pacahuara", 2591165, "sai-pan", Latn} + +m["pcw"] = {"Pyapun", 3438807, nil, Latn} + +m["pda"] = {"Anam", 3501930, "ngf-mad", Latn} + +m["pdc"] = {"Pennsylvania German", 22711, "gmw", Latn, ancestors = {"gmw-rfr"}} + +m["pdi"] = {"Pa Di", 3359940, nil, Latn} + +m["pdn"] = {"Fedan", 7206699, "poz-ocw", Latn} + +m["pdo"] = {"Padoe", 3360370, "poz-btk", Latn} + +m["pdt"] = {"Plautdietsch", 1751432, "gmw", Latn, ancestors = {"nds-de"}} + +m["pdu"] = {"Kayan", 7123283} + +m["pea"] = {"Peranakan Indonesian", 653415, nil, Latn} + +m["peb"] = {"Eastern Pomo", 3396032, "nai-pom", Latn} + +m["ped"] = {"Mala (New Guinea)", 11732569, "ngf-mad", Latn} + +m["pee"] = {"Taje", 12953902, nil, Latn} + +m["pef"] = {"Northeastern Pomo", 3396018, "nai-pom", Latn} + +m["peg"] = {"Pengo", 56758, "dra", {"Orya"}} + +m["peh"] = {"Bonan", 32983, "xgn"} + +m["pei"] = {"Chichimeca-Jonaz", 3915427, "omq-otp", Latn} + +m["pej"] = {"Northern Pomo", 3396021, "nai-pom", Latn} + +m["pek"] = {"Penchal", 3374631, "poz-aay", Latn} + +m["pel"] = {"Pekal", 3241781, nil, Latn} + +m["pem"] = {"Phende", 7162372, "bnt-pen", Latn} + +m["peo"] = {"Old Persian", 35225, "ira-swi", {"Xpeo", "Latinx"}, translit_module = "peo-translit"} + +m["pep"] = {"Kunja", 6444807, nil, Latn} + +m["peq"] = {"Southern Pomo", 3396023, "nai-pom", Latn} + +-- "pes" IS TREATED AS "fa" (or as etymology-only), SEE WT:LT + +m["pev"] = {"Pémono", 3439012, "sai-car", Latn} + +m["pex"] = {"Petats", 3376353, "poz-ocw", Latn} + +m["pey"] = {"Petjo", 940486, nil, Latn} + +m["pez"] = {"Eastern Penan", 18638342, "poz-swa", Latn} + +m["pfa"] = {"Pááfang", 3063517, "poz-mic", Latn} + +m["pfe"] = {"Peere", 36377, "alv-dur", Latn} + +m["pga"] = {"Juba Arabic", 1262143, "crp", Latn, ancestors = {"apd"}} + +m["pgd"] = {"Gandhari", nil, "inc-mid", {"Deva", "Khar"}, ancestors = {"inc-ash"}, translit_module = "Khar-translit"} + +m["pgg"] = {"Pangwali", 13600429, "him", {"Deva", "Takr"}, translit_module = "hi-translit"} + +m["pgi"] = {"Pagi", 7124354, "paa-brd", Latn} + +m["pgk"] = {"Rerep", 586907, "poz-vnc", Latn} + +m["pgl"] = {"Primitive Irish", 3320030, "cel-gae", {"Ogam"}, translit_module = "pgl-translit"} + +m["pgn"] = {"Paelignian", nil, "itc", Latn} + +m["pgs"] = {"Pangseng", 3914027, "alv-mum", Latn} + +m["pgu"] = {"Pagu", 7124462, "paa-wpa", Latn} + +m["pgz"] = {"Papua New Guinean Sign Language", 25044405, "sgn"} + +m["pha"] = {"Pa-Hng", 2625410, "hmn"} + +m["phd"] = {"Phudagi", 7188289} + +m["phg"] = {"Phuong", 7188376, "mkh-kat"} + +m["phh"] = {"Phukha", 7188298, "tbq-lol"} + +m["phk"] = { + "Phake", + 7675798, + "tai-swe", + {"Mymr"}, + entry_name = { + from = {u(0xFE00)}, -- VS01 + to = {""} + } +} + +m["phl"] = {"Phalura", 2449549, "inc-dar"} + +m["phm"] = {"Phimbi", 11007144, "bnt-sna", Latn} + +m["phn"] = {"Phoenician", 36734, "sem-can", {"Phnx"}, translit_module = "Phnx-translit"} + +m["pho"] = {"Phunoi", 7188361, "tbq-lol"} + +m["phq"] = {"Phana'", 7180427, "tbq-lol"} + +m["phr"] = {"Pahari-Potwari", 33739, "inc-pan", {"fa-Arab", "Latn"}, ancestors = {"lah"}} + +m["pht"] = {"Phu Thai", 3626597, "tai-swe"} + +m["phu"] = {"Phuan", 3915665} + +m["phv"] = {"Pahlavani", 7124567} + +m["phw"] = {"Phangduwali", 12953036, "sit-kie", ancestors = {"ybh"}} + +m["pia"] = {"Pima Bajo", 3388544, "azc", Latn} + +m["pib"] = {"Yine", 3135432, "awd", Latn} + +m["pic"] = {"Pinji", 36296, "bnt-tso", Latn} + +m["pid"] = {"Piaroa", 3382207, nil, Latn} + +m["pie"] = {"Piro", 7198055, "nai-kta", Latn} + +m["pif"] = {"Pingelapese", 36421, "poz-mic", Latn} + +m["pig"] = {"Pisabo", 966883, "sai-pan", Latn} + +m["pih"] = {"Pitcairn-Norfolk", 36554, "crp", Latn, ancestors = {"en"}} + +m["pii"] = {"Pini", 10631925} + +m["pij"] = {"Pijao", 7193519} + +m["pil"] = {"Yom", 36893, "nic-yon"} + +m["pim"] = {"Powhatan", 2270532, "alg-eas", Latn} + +m["pin"] = {"Piame", 7190042} + +m["pio"] = {"Piapoco", 3382208, "awd-nwk", Latn} + +m["pip"] = {"Pero", 2411063, "cdc-wst"} + +m["pir"] = {"Piratapuyo", 3389119, "sai-tuc", Latn} + +m["pis"] = {"Pijin", 36699, "crp", Latn, ancestors = {"en"}} + +m["pit"] = {"Pitta-Pitta", 6433116, "aus-kar", Latn} + +m["piu"] = {"Pintupi-Luritja", 2591175, "aus-pam"} + +m["piv"] = {"Pileni", 2976736, "poz-pnp", Latn} + +m["piw"] = {"Pimbwe", 3894132, "bnt-mwi"} + +m["pix"] = {"Piu", 7199578} + +m["piy"] = {"Piya-Kwonci", 3440492} + +m["piz"] = {"Pije", 3388339, "poz-cln", Latn} + +m["pjt"] = {"Pitjantjatjara", 2982063, "aus-pam", {"pjt-Latn"}} + +m["pka"] = {"Ardhamagadhi Prakrit", 2652214, "inc-mid", {"Brah", "Deva"}, translit_module = "Brah-translit", ancestors = {"inc-pra"}} + +m["pkb"] = {"Kipfokomo", 7208693, "bnt-sab", Latn} + +m["pkc"] = {"Baekje", 4841264, "qfa-kor", {"Hani"}} + +m["pkg"] = {"Pak-Tong", 3360711} + +m["pkh"] = {"Pankhu", 7130962, "tbq-kuk"} + +m["pkn"] = {"Pakanha", 954916, "aus-pmn"} + +m["pko"] = {"Pökoot", 36323, "sdv-kln"} + +m["pkp"] = {"Pukapukan", 36447, "poz-pnp", Latn} + +m["pkr"] = {"Attapady Kurumba", 16835180, "dra"} + +m["pks"] = {"Pakistan Sign Language", 22964057, "sgn"} + +m["pkt"] = {"Maleng", 6583562, "mkh-vie"} + +m["pku"] = {"Paku", 2932604} + +m["pla"] = {"Miani", 12952844, nil, Latn} + +m["plb"] = {"Polonombauk", 7225957, "poz-vnc", Latn} + +m["plc"] = {"Central Palawano", 12953795, "phi", Latn} + +m["ple"] = {"Palu'e", 2196866, "poz-cet", Latn} + +m["plg"] = {"Pilagá", 2748259, "sai-guc", Latn} + +m["plh"] = {"Paulohi", 7155331, "poz-cma"} + +m["plj"] = {"Polci", 3914383} + +m["plk"] = {"Kohistani Shina", 12953882, "inc-dar"} + +m["pll"] = {"Shwe Palaung", 27941664, "mkh-pal"} + +m["pln"] = {"Palenquero", 36665, "crp", Latn, ancestors = {"es"}} + +m["plo"] = {"Oluta Popoluca", 5908687, "nai-miz", Latn} + +m["plq"] = {"Palaic", 36582, "ine-ana", {"Xsux"}} + +m["plr"] = {"Palaka Senoufo", 36346, "alv-snf", Latn} + +m["pls"] = {"San Marcos Tlalcoyalco Popoloca", 12641692, "omq-pop", Latn} + +m["plu"] = {"Palikur", 3073448, "awd", Latn} + +m["plv"] = {"Southwest Palawano", 15614922, "phi", Latn} + +m["plw"] = {"Brooke's Point Palawano", 12953796, "phi", Latn} + +m["ply"] = {"Bolyu", 3361723, "mkh-pkn", Latn} + +m["plz"] = {"Paluan", 7128795, nil, Latn} + +m["pma"] = {"Paama", 3130286, "poz-vnc", Latn} + +m["pmb"] = {"Pambia", 36267, "znd", Latn} + +m["pmd"] = {"Pallanganmiddang", 7127734, "aus-pam", Latn} + +m["pme"] = {"Pwaamei", 3411152, "poz-cln", Latn} + +m["pmf"] = {"Pamona", 3513320, "poz-kal", Latn} + +m["pmh"] = {"Maharastri Prakrit", 2586773, "inc-mid", {"Brah", "Deva"}, ancestors = {"inc-pra"}, translit_module = "Brah-translit"} + +m["pmi"] = {"Northern Pumi", 3403245, "sit-qia"} + +m["pmj"] = {"Southern Pumi", 3403246, "sit-qia"} + +m["pmk"] = {"Pamlico", nil, "alg-eas", Latn} + +m["pml"] = {"Sabir", 636479, "crp", Latn, ancestors = {"lij", "pro", "vec"}} + +m["pmm"] = {"Pol", 36408, "bnt-kak", Latn} + +m["pmn"] = {"Pam", 7129017, "alv-mbm"} + +m["pmo"] = {"Pom", 7227178, "poz-hce", Latn} + +m["pmq"] = {"Northern Pame", 3361762, "omq", Latn} + +m["pmr"] = {"Paynamar", 3450824} + +m["pms"] = {"Piedmontese", 15085, "roa-git", Latn} + +m["pmt"] = {"Tuamotuan", 36763, "poz-pep", Latn} + +m["pmu"] = {"Mirpur Panjabi", 6874480} + +m["pmw"] = {"Plains Miwok", 3391031, "nai-you", Latn} + +m["pmx"] = {"Poumei Naga", 12952910, "tbq-anp"} + +m["pmy"] = {"Papuan Malay", 12473446, nil, Latn} + +m["pmz"] = {"Southern Pame", 3361765, "omq", Latn} + +m["pna"] = {"Punan Bah-Biau", 4842201} + +m["pnb"] = {"Western Panjabi", 58635, "inc-pan", {"pa-Arab"}, ancestors = {"pa"}} + +m["pnc"] = {"Pannei", 7131391} + +m["pnd"] = {"Mpinda", 63308194, "bnt-kmb"} + +m["pne"] = {"Western Penan", 12953808, "poz-swa", Latn} + +m["png"] = {"Pongu", 36282, "nic-shi"} + +m["pnh"] = {"Penrhyn", 3130301, "poz-pep", Latn} + +m["pni"] = {"Aoheng", 4778608, "poz"} + +m["pnj"] = {"Pinjarup", 33103591} + +m["pnk"] = {"Paunaca", 2064378, "awd", Latn} + +m["pnl"] = {"Paleni", 7127118, "alv-wan", Latn} + +m["pnm"] = {"Punan Batu", 7259892} + +m["pnn"] = {"Pinai-Hagahai", 5638511} + +m["pno"] = {"Panobo", 3141869, "sai-pan", Latn} + +m["pnp"] = {"Pancana", 7130204} + +m["pnq"] = {"Pana (West Africa)", 7129739, "nic-gnn", Latn} + +m["pnr"] = {"Panim", 11732562, "ngf-mad"} + +m["pns"] = {"Ponosakan", 7227956, "phi"} + +m["pnt"] = { + "Pontic Greek", + 36748, + "grk", + {"Grek", "Latn", "Cyrl"}, + ancestors = {"grc"}, + translit_module = "grc-translit", + sort_key = { -- Keep this synchronized with el, grc, cpg + from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"}, + to = {"α", "ε", "η", "ι", "ο", "υ", "ω", "ρ", "σ"} + } +} + +m["pnu"] = {"Jiongnai Bunu", 56325, "hmn"} + +m["pnv"] = {"Pinigura", 10631927, "aus-psw", Latn} + +m["pnw"] = {"Panyjima", 3913830, "aus-nga", Latn} + +m["pnx"] = {"Phong-Kniang", 3914627, "mkh"} + +m["pny"] = {"Pinyin", 36250, "nic-nge", Latn} + +m["pnz"] = {"Pana (Central Africa)", 36241, "alv-mbm", Latn} + +m["poc"] = {"Poqomam", 36416, nil, Latn} + +m["poe"] = {"San Juan Atzingo Popoloca", 12953819, "omq-pop", Latn} + +m["pof"] = {"Poke", 7208577, "bnt-ske"} + +m["pog"] = {"Potiguára", 56722, "tup-gua", Latn} + +m["poh"] = {"Poqomchi'", 36414, nil, Latn} + +m["poi"] = {"Highland Popoluca", 7511556, "nai-miz", Latn} + +m["pok"] = {"Pokangá", 25559704, "sai-tuc", Latn} + +m["pom"] = {"Southeastern Pomo", 3396025, "nai-pom", Latn} + +m["pon"] = {"Pohnpeian", 28422, "poz-mic", Latn} + +m["poo"] = {"Central Pomo", 3396020, "nai-pom", Latn} + +m["pop"] = {"Pwapwa", 3411153, "poz-cln", Latn} + +m["poq"] = {"Texistepec Popoluca", 5908707, "nai-miz", Latn} + +m["pos"] = {"Sayula Popoluca", 5908722, "nai-miz", Latn} + +m["pot"] = {"Potawatomi", 56749, "alg", Latn} + +m["pov"] = {"Guinea-Bissau Creole", 33339, "crp", Latn, ancestors = {"pt"}} + +m["pow"] = {"San Felipe Otlaltepec Popoloca", 25559598, "omq-pop", Latn} + +m["pox"] = {"Polabian", 36741, "zlw-lch", Latn} + +m["poy"] = {"Pogolo", 2429648, "bnt-kil"} + +m["ppa"] = {"Pao", 7132069} + +m["ppe"] = {"Papi", 7132809} + +m["ppi"] = {"Paipai", 56726, "nai-yuc", Latn} + +m["ppk"] = {"Uma", 7881036, "poz-kal", Latn} + +m["ppl"] = {"Pipil", 1186896, "azc-nah", Latn, entry_name = {from = {"Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Ȳ", "ȳ", "Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "Y", "y", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}} + +m["ppm"] = {"Papuma", 7133239, "poz-hce", Latn} + +m["ppn"] = {"Papapana", 3362757, "poz-ocw", Latn} + +m["ppo"] = {"Folopa", 5464843, "paa", Latn} + +m["ppq"] = {"Pei", 7160903} + +m["pps"] = {"San Luís Temalacayuca Popoloca", 25559602, "omq-pop", Latn} + +m["ppt"] = {"Pa", 3504757, "ngf", Latn} + +m["ppu"] = {"Papora", 2094884, "map", Latn} + +m["pqa"] = {"Pa'a", 3441315, "cdc-wst"} + +m["pqm"] = {"Malecite-Passamaquoddy", 3183144, "alg-eas", Latn} + +m["prc"] = {"Parachi", 2640637, "ira-orp"} + +-- "prd" IS NOT INCLUDED, SEE WT:LT + +m["pre"] = {"Principense", 36520, "crp", Latn, ancestors = {"pt"}} + +m["prf"] = {"Paranan", 7135433, "phi"} + +m["prg"] = {"Old Prussian", 35501, "bat", Latn, entry_name = {remove_diacritics = MACRON}} + +m["prh"] = {"Porohanon", 6583710, "phi"} + +m["pri"] = {"Paicî", 732131, "poz-cln", Latn} + +m["prk"] = {"Parauk", 3363719, "mkh-pal"} + +m["prl"] = {"Peruvian Sign Language", 3915508, "sgn"} + +m["prm"] = {"Kibiri", 56745, "paa"} + +m["prn"] = {"Prasuni", 32689, "nur-nor"} + +m["pro"] = {"Old Occitan", 2779185, "roa", Latn, sort_key = {from = {"ç"}, to = {"c"}}} + +-- "prp" IS NOT INCLUDED, SEE WT:LT + +m["prq"] = {"Ashéninka Perené", 3450601, "awd", Latn} + +m["prr"] = {"Puri", 7261687} + +-- "prs" IS TREATED AS "fa" (or as etymology-only), SEE WT:LT + +m["prt"] = {"Phai", 7180184, "mkh"} + +m["pru"] = {"Puragi", 7260800, "ngf-sbh"} + +m["prw"] = {"Parawen", 7136291, "ngf-mad"} + +m["prx"] = {"Purik", 567905, "sit-lab"} + +m["prz"] = {"Providencia Sign Language", 3322084, "sgn"} + +m["psa"] = {"Asue Awyu", 11266334} + +m["psc"] = {"Persian Sign Language", 7170221, "sgn"} + +m["psd"] = {"Plains Indian Sign Language", 2380124, "sgn"} + +m["pse"] = {"Central Malay", 3367751, "poz-mly"} + +m["psg"] = {"Penang Sign Language", 4924925, "sgn"} + +m["psh"] = {"Southwest Pashayi", 16112270, "inc-dar"} + +m["psi"] = {"Southeast Pashayi", 23713536, "inc-dar", {"Arab"}} + +m["psl"] = {"Puerto Rican Sign Language", 7258608, "sgn-fsl"} + +m["psm"] = {"Pauserna", 2912846, "tup-gua", Latn} + +m["psn"] = {"Panasuan", 7130113, "poz"} + +m["pso"] = {"Polish Sign Language", 3915194, "sgn-gsl"} + +m["psp"] = {"Philippine Sign Language", 3551357, "sgn-fsl"} + +m["psq"] = {"Pasi", 7142091} + +m["psr"] = {"Portuguese Sign Language", 3915472, "sgn"} + +m["pss"] = {"Kaulong", 3194294, "poz-ocw"} + +m["psu"] = {"Sauraseni Prakrit", 2452885, "inc-psu", {"Deva", "Brah"}, translit_module = "Brah-translit", ancestors = {"inc-pra"}} + +m["psw"] = {"Port Sandwich", 3398324, "poz-vnc", Latn} + +m["psy"] = {"Piscataway", 3504233, "alg-eas"} + +m["pta"] = {"Pai Tavytera", 7124619, "tup-gua", Latn} + +m["pth"] = {"Pataxó Hã-Ha-Hãe", 7144304} + +m["pti"] = {"Pintiini", 10632026, "aus-pam"} + +m["ptn"] = {"Patani", 7144242, "poz-hce", Latn} + +m["pto"] = {"Zo'é", 8073148, "tup-gua", Latn} + +m["ptp"] = {"Patep", 3368679, "poz-ocw", Latn} + +m["ptq"] = {"Pattapu", nil, "dra"} + +m["ptr"] = {"Piamatsina", 7190040, "poz-vnc", Latn} + +m["ptt"] = {"Enrekang", 12953520} + +m["ptu"] = {"Bambam", 4853321, "poz-ssw"} + +m["ptv"] = {"Port Vato", 3398323, nil, Latn} + +m["ptw"] = {"Pentlatch", 2069475} + +m["pty"] = {"Pathiya", 7144790, "dra"} + +m["pua"] = {"Purepecha", 16114351, "qfa-iso", Latn, sort_key = {from = {"á", "é", "í", "ó", "ú"}, to = {"a", "e", "i", "o", "u"}}} + +m["pub"] = {"Purum", 6400562, "tbq-kuk", Latn} + +m["puc"] = {"Punan Merap", 7259895} + +m["pud"] = {"Punan Aput", 4782333} + +m["pue"] = {"Puelche", 33660} + +m["puf"] = {"Punan Merah", 7259894} + +m["pug"] = {"Phuie", 36375, "nic-gnw"} + +m["pui"] = {"Puinave", 3027918} + +m["puj"] = {"Punan Tubu", 7259896, "poz-swa"} + +m["pum"] = {"Puma", 33736, "sit-kic"} + +m["puo"] = {"Puoc", 6440803, "mkh"} + +m["pup"] = {"Pulabu", 7259163, "ngf-mad"} + +m["puq"] = {"Puquina", 1207739} + +m["pur"] = {"Puruborá", 7261619, "tup"} + +m["put"] = {"Putoh", 12953832, "poz-swa", Latn} + +m["puu"] = {"Punu", 36401, "bnt-sir", Latn} + +m["puw"] = {"Puluwat", 36397, "poz-mic", Latn} + +m["pux"] = {"Puare", 3507983} + +m["puy"] = {"Purisimeño", 2967638, "nai-chu", Latn} + +m["pwa"] = {"Pawaia", 7156099, "paa", Latn} + +m["pwb"] = {"Panawa", 47385077, "nic-jer", Latn, ancestors = {"jer"}} + +m["pwg"] = {"Gapapaiwa", 3095245, "poz-ocw", Latn} + +m["pwi"] = {"Patwin", 3370188, "nai-wtq", Latn} + +m["pwm"] = {"Molbog", 6895718, "poz-san", Latn} + +m["pwn"] = {"Paiwan", 715755, "map", Latn} + +m["pwo"] = {"Western Pwo", 7988202, "kar", {"Mymr"}} + +m["pwr"] = {"Powari", 12640277, "inc-hie", {"Deva"}} + +m["pww"] = {"Northern Pwo", 7058885, "kar", {"Thai"}} + +m["pxm"] = {"Quetzaltepec Mixe", 6842374, "nai-miz", Latn} + +m["pye"] = {"Pye Krumen", 11157382, "kro-grb"} + +m["pym"] = {"Fyam", 3914025, "nic-ple", Latn} + +m["pyn"] = {"Poyanáwa", 3401023, "sai-pan"} + +m["pys"] = {"Paraguayan Sign Language", 7134698, "sgn"} + +m["pyu"] = {"Puyuma", 716690, "map", Latn} + +m["pyx"] = {"Tircul", 36259, "sit"} + +m["pyy"] = {"Pyen", 7262966, "tbq-lol"} + +m["pzn"] = {"Para Naga", 7133667, "sit-aao"} + +return m diff --git a/wiktra/wikt/translit/languages/data3/q.lua b/wiktra/wikt/translit/languages/data3/q.lua new file mode 100644 index 0000000..6a55f25 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/q.lua @@ -0,0 +1,53 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["qua"] = {"Quapaw", 3412974, "sio-dhe", Latn} + +m["quc"] = {"K'iche'", 36494, "myn", Latn} + +m["qui"] = {"Quileute", 3414490, "chi", Latn} + +m["qum"] = {"Sipakapense", 36589, "myn", Latn} + +m["qun"] = {"Quinault", 3414522, "sal", Latn} + +m["quq"] = { -- should this continue to be considered separate language? + "Quinqui", 5908714, nil, Latn +} + +m["quv"] = {"Sacapulteco", 36412, "myn", Latn} + +m["qvy"] = {"Queyu", 3414352, "sit-qia"} + +m["qwc"] = {"Classical Quechua", 35882, "qwe", Latn} + +m["qwm"] = {"Kipchak", 1199226, "trk-kip", {"Latn", "Armn", "Arab"}, translit_module = "translit-redirect"} + +m["qwt"] = {"Kwalhioqua-Tlatskanai", 20641, "ath-nor", Latn} + +m["qxs"] = {"Southern Qiang", 56563, "sit-qia", Latn} + +m["qya"] = {"Quenya", 56383, "art", {"Latn", "Teng"}, type = "appendix-constructed"} + +m["qyp"] = {"Quiripi", 3414714, "alg-eas", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/r.lua b/wiktra/wikt/translit/languages/data3/r.lua new file mode 100644 index 0000000..461ddff --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/r.lua @@ -0,0 +1,325 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["raa"] = {"Dungmali", 56871, "sit-kic"} + +m["rab"] = {"Chamling", 3436664, "sit-kic", {"Deva"}} + +m["rac"] = {"Rasawa", 56443, "paa-lkp", Latn} + +m["rad"] = {"Rade", 3429088, "cmc", Latn} + +m["raf"] = {"Western Meohang", 17442461, "sit-kie"} + +m["rag"] = {"Logooli", 6667767, "bnt-lok", Latn} + +m["rah"] = {"Rabha", 7278686, "tbq-bdg", {"Beng", "Latn"}} + +m["rai"] = {"Ramoaaina", 3418509, "poz-ocw", Latn} + +m["raj"] = {"Rajasthani", 13196, "inc-wes", {"Deva"}, ancestors = {"inc-ogu"}} + +m["rak"] = {"Tulu-Bohuai", 2908807, "poz-aay", Latn} + +m["ral"] = {"Ralte", 7288392, "tbq-kuk", Latn} + +m["ram"] = {"Canela", 2936334, "sai-nje", Latn} + +m["ran"] = {"Riantana", 7322169, "ngf", Latn} + +m["rao"] = {"Rao", 11732596, "paa", Latn} + +m["rap"] = {"Rapa Nui", 36746, "poz-pep", Latn} + +m["raq"] = {"Saam", 7395644, "sit-kic"} + +m["rar"] = {"Rarotongan", 36745, "poz-pep", Latn} + +m["ras"] = {"Tegali", 36522, "nic-ras", Latn} + +m["rat"] = {"Razajerdi", 7299461, "xme-ttc", ancestors = {"xme-ttc-eas"}} + +m["rau"] = {"Raute", 7296262, "sit-gma", {"Deva", "Latn"}} + +m["rav"] = {"Sampang", 3449115, "sit-kic"} + +m["raw"] = {"Rawang", 542564, "sit-nng", Latn, sort_key = {from = {"[áāà]", "[éēè]", "[íīì]", "[óōò]", "[úūù]", "ǿ", GRAVE, MACRON, ACUTE}, to = {"a", "e", "i", "o", "u", "ø"}}} + +m["rax"] = {"Rang", 3913345, "alv-mum"} + +m["ray"] = {"Rapa", 36417, "poz-pep"} + +m["raz"] = {"Rahambuu", 3417555, "poz-btk"} + +m["rbb"] = {"Rumai Palaung", 12953797, "mkh-pal"} + +m["rbk"] = {"Northern Bontoc", nil, "phi"} + +m["rbl"] = {"Miraya Bikol", 18664557, "phi"} + +m["rcf"] = {"Réunion Creole French", 13198, "crp", Latn, ancestors = {"fr"}} + +m["rdb"] = {"Rudbari", 12953072, "xme", ancestors = {"xme-mid"}} + +m["rea"] = {"Rerau", 7314883, "ngf-mad"} + +m["reb"] = {"Rembong", 7311570, "poz-cet"} + +m["ree"] = {"Rejang Kayan", 3423957, "poz"} + +m["reg"] = {"Kara (Tanzania)", 6367567, "bnt-haj"} + +m["rei"] = {"Reli", 7310982} + +m["rej"] = {"Rejang", 3056339, "poz-sus", {"Rjng"}} + +m["rel"] = {"Rendille", 3447297, "cus"} + +m["rem"] = {"Remo", 3501825, "sai-pan", Latn} + +m["ren"] = {"Rengao", 6583692, "mkh"} + +m["rer"] = {"Rer Bare", 12953857} + +m["res"] = {"Reshe", 36258, "nic-knj"} + +m["ret"] = {"Retta", 7317113, "ngf"} + +m["rey"] = {"Reyesano", 3111857, "sai-tac", Latn} + +m["rga"] = {"Roria", 7366825, "poz-vnc"} + +m["rge"] = {"Romani Greek", 3915435} + +m["rgk"] = {"Rangkas", 7292645, "sit-alm"} + +m["rgn"] = {"Romagnol", 1641543, "roa-git", Latn, wikimedia_codes = {"eml"}} + +m["rgr"] = {"Resígaro", 3450504, "awd", Latn} + +m["rgs"] = {"Southern Roglai", 12953069} + +m["rgu"] = {"Ringgou", 7334886, "poz-tim"} + +m["rhg"] = {"Rohingya", 3241177, "inc-eas", {"Arab", "Rohg", "Mymr", "Latn", "Beng"}, ancestors = {"inc-obn"}, translit_module = "translit-redirect"} + +m["rhp"] = {"Yahang", 8046792, "qfa-tor"} + +m["ria"] = {"Reang", 12953063, "tbq-bdg"} + +m["rif"] = {"Tarifit", 34174, "ber", {"Tfng", "Arab", "Latn"}, translit_module = "Tfng-translit"} + +m["ril"] = {"Riang", 2741615, "mkh-pal"} + +m["rim"] = {"Nyaturu", 7193418, "bnt-tkm", Latn} + +m["rin"] = {"Nungu", 3913350, "nic-nin", Latn} + +m["rir"] = {"Ribun", 7322443, "day", Latn} + +m["rit"] = {"Ritarungo", 7336730, "aus-yol", Latn} + +m["riu"] = {"Riung", 7336938, "poz-cet", Latn} + +m["rjg"] = {"Rajong", 7286370, "poz-cet", Latn} + +m["rji"] = {"Raji", 7286138, "sit-gma"} + +m["rjs"] = {"Rajbanshi", 12640969, "inc-eas", {"Deva", "as-Beng"}, ancestors = {"inc-mgd"}} + +m["rka"] = { + "Kraol", 3199593, "mkh-ban", {"Khmr"} -- also Latn? +} + +m["rkb"] = {"Rikbaktsa", 2585357, "sai-mje", Latn} + +m["rkh"] = {"Rakahanga-Manihiki", 3119695, "poz-pep", Latn} + +m["rki"] = {"Rakhine", 3450749, "tbq-brm", {"Mymr"}, ancestors = {"obr"}} + +m["rkm"] = {"Marka", 36030, "dmn-wmn", Latn} + +m["rkt"] = {"Kamta", 3241618, "inc-eas", {"as-Beng"}, ancestors = {"inc-ork"}, translit_module = "as-translit"} + +m["rkw"] = {"Arakwal", 34295800, "aus-pam", Latn} + +m["rma"] = {"Rama", 3444486, "cba"} + +m["rmb"] = {"Rembarunga", 7311553, "aus-gun", Latn} + +m["rmc"] = {"Carpathian Romani", 5045611, "inc-rom"} + +m["rmd"] = {"Traveller Danish", 12640779, "inc-rom"} + +m["rme"] = {"Angloromani", 541279, "crp", Latn, ancestors = {"en", "rom"}} + +m["rmf"] = {"Kalo Finnish Romani", 2093214, "inc-rom"} + +m["rmg"] = {"Traveller Norwegian", 3177352, "inc-rom"} + +m["rmh"] = {"Murkim", 4308074, "paa-pau"} + +m["rmi"] = {"Lomavren", 2495696, "qfa-mix", {"Latn", "Armn"}, ancestors = {"psu", "hy"}, translit_module = "translit-redirect", override_translit = true} + +m["rmk"] = {"Romkun", 7363236, "paa", Latn} + +m["rml"] = {"Baltic Romani", 513736, "inc-rom"} + +m["rmm"] = {"Roma", 4414831} + +m["rmn"] = {"Balkan Romani", 1256701, "inc-rom"} + +m["rmo"] = {"Sinte Romani", 1793299, "qfa-mix", Latn, ancestors = {"rom"}} + +m["rmp"] = {"Rempi", 7312214, "ngf-mad"} + +m["rmq"] = {"Caló", 35466, "qfa-mix", Latn, ancestors = {"rom", "la"}} + +m["rms"] = {"Romanian Sign Language", 7362575, "sgn"} + +m["rmt"] = {"Domari", 35394, "inc-cen", ancestors = {"psu"}} + +m["rmu"] = {"Tavringer Romani", 27808413, "inc-rom"} + +m["rmv"] = {"Romanova", 1298715, "art", type = "appendix-constructed"} + +m["rmw"] = {"Welsh Romani", 2097387, "inc-rom"} + +m["rmx"] = {"Romam", 22694600, "mkh"} + +m["rmy"] = {"Vlax Romani", 2669199, "inc-rom"} + +m["rmz"] = {"Marma", 21403256, "tbq-brm", {"Mymr"}, ancestors = {"obr"}} + +m["rnd"] = {"Ruwund", 7383564, "bnt-lun"} + +m["rng"] = {"Ronga", 2520717, "bnt-tsr", Latn} + +m["rnl"] = {"Ranglong", 7292878} + +m["rnn"] = {"Roon", 7366335, "poz-hce"} + +m["rnp"] = {"Rongpo", 7365672, "sit-whm"} + +m["rnw"] = {"Rungwa", 7379873, "bnt-mwi", Latn} + +m["rob"] = {"Tae'", 12473476, "poz-ssw", Latn} + +m["roc"] = {"Cacgia Roglai", 2932485} + +m["rod"] = {"Rogo", 3914894, "nic-kmk"} + +m["roe"] = {"Ronji", 3441763, "poz-ocw"} + +m["rof"] = {"Rombo", 33330, "bnt-chg", Latn} + +m["rog"] = {"Northern Roglai", 3439680, "cmc", Latn} + +m["rol"] = {"Romblomanon", 13202, "phi"} + +m["rom"] = {"Romani", 13201, "inc-rom", {"Latn", "Cyrl"}, ancestors = {"psu"}} + +m["roo"] = {"Rotokas", 13203, "paa-nbo", Latn} + +m["rop"] = {"Kriol", 35671, "crp", Latn, ancestors = {"en"}} + +m["ror"] = {"Rongga", 12473464} + +m["rou"] = {"Runga", 56793} + +m["row"] = {"Dela-Oenale", 5253046, "poz-tim"} + +m["rpn"] = {"Repanbitip", 7313900, "poz-vnc"} + +m["rpt"] = {"Rapting", 7294362, "ngf-mad"} + +m["rri"] = {"Ririo", 2404190, "poz-ocw"} + +m["rro"] = {"Roro", 34197, "poz-ocw", Latn} + +m["rrt"] = {"Arritinngithigh", 4796002, nil, Latn} + +m["rsb"] = {"Romano-Serbian", 1268244} + +m["rsl"] = {"Russian Sign Language", 13210, "sgn"} + +m["rsm"] = {"Miriwoong Sign Language", 24090240, "sgn"} + +m["rtc"] = {"Rungtu", 7379867, "tbq-kuk"} + +m["rth"] = {"Ratahan", 3420026, "phi", Latn} + +m["rtm"] = {"Rotuman", 36754, "poz-occ", Latn} + +m["rtw"] = {"Rathawi", 12953854, "inc-bhi"} + +m["rub"] = {"Gungu", 11165235, "bnt-glb"} + +m["ruc"] = {"Ruuli", 7383562, "bnt-nyg"} + +m["rue"] = {"Rusyn", 26245, "zle", {"Cyrl"}, ancestors = {"orv"}, translit_module = "rue-translit", entry_name = {from = {GRAVE, ACUTE}, to = {}}} + +m["ruf"] = {"Luguru", 3437661, "bnt-ruv", Latn} + +m["rug"] = {"Roviana", 3445546, "poz-ocw", Latn} + +m["ruh"] = {"Ruga", 7378127} + +m["rui"] = {"Rufiji", 7377946, "bnt-mbi"} + +m["ruk"] = {"Che", 3915445, "nic-nin"} + +m["ruo"] = {"Istro-Romanian", 33622, "roa-eas", Latn} + +m["rup"] = {"Aromanian", 29316, "roa-eas", Latn, wikimedia_codes = {"roa-rup"}} + +m["ruq"] = {"Megleno-Romanian", 13358, "roa-eas", Latn} + +m["rut"] = {"Rutul", 36757, "cau-lzg", {"Cyrl"}} + +m["ruu"] = {"Lanas Lobu", 12953676} + +m["ruy"] = {"Mala (Nigeria)", 3913381, "nic-kau"} + +m["ruz"] = {"Ruma", 3913326, "nic-kau"} + +m["rwa"] = {"Rawo", 3504269} + +m["rwk"] = {"Rwa", 7985624, "bnt-chg"} + +m["rwm"] = {"Amba", 788423, "bnt-kbi", Latn} + +m["rwo"] = {"Rawa", 11732598, "ngf-fin", Latn} + +m["rxd"] = {"Ngardi", 7022063} + +m["rxw"] = {"Karuwali", 6881575} + +m["ryn"] = {"Northern Amami-Oshima", 2840988, "jpx-ryu", {"Jpan"}} + +m["rys"] = {"Yaeyama", 34203, "jpx-ryu", {"Jpan"}} + +m["ryu"] = {"Okinawan", 34233, "jpx-ryu", {"Jpan"}} + +m["rzh"] = {"Razihi", 16911222, "sem-osa", {"Arab"}, ancestors = {"sem-srb"}} + +return m diff --git a/wiktra/wikt/translit/languages/data3/s.lua b/wiktra/wikt/translit/languages/data3/s.lua new file mode 100644 index 0000000..86cdfa6 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/s.lua @@ -0,0 +1,1017 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +-- Punctuation to be used for standardChars field +local PUNCTUATION = " !#$%&*+,-./:;<=>?@^_`|~'()" + +local Latn = {"Latn"} + +local m = {} + +m["saa"] = {"Saba", 3914885, "cdc-est", Latn} + +m["sab"] = {"Buglere", 3368506, "cba", Latn} + +m["sac"] = {"Fox", 12714767, "alg-sfk", Latn} + +m["sad"] = {"Sandawe", 34016, "qfa-iso", Latn} + +m["sae"] = {"Sabanê", 3460478, "sai-nmk", Latn} + +m["saf"] = {"Safaliba", 36432, "nic-mre", Latn} + +m["sah"] = {"Yakut", 34299, "trk-sib", {"Cyrl"}, translit_module = "sah-translit", override_translit = true} + +m["saj"] = {"Sahu", 7399757, "paa", Latn} + +m["sak"] = {"Sake", 36425, "bnt-kel", Latn} + +m["sam"] = {"Samaritan Aramaic", 56612, "sem-arw", {"Samr"}, entry_name = {from = {"[" .. u(0x0816) .. "-" .. u(0x082D) .. "]"}, to = {}}, translit_module = "Samr-translit"} + +m["sao"] = {"Sause", 4409155, "paa-tkw", Latn} + +m["saq"] = {"Samburu", 56536, "sdv-lma"} + +m["sar"] = {"Saraveca", 3450556, "awd"} + +m["sas"] = {"Sasak", 1294047, "poz-mcm", Latn} + +m["sat"] = {"Santali", 33965, "mun", {"Olck"}, translit_module = "Olck-translit", override_translit = true} + +m["sau"] = {"Saleman", 7404262, "poz-cet"} + +m["sav"] = {"Saafi-Saafi", 36308, "alv-cng", {"Arab", "Latn"}} + +m["saw"] = {"Sawi", 677064, "ngf", Latn} + +m["sax"] = {"Sa", 3460352, "poz-vnc", Latn} + +m["say"] = {"Saya", 3914431, "cdc-wst", Latn} + +m["saz"] = {"Saurashtra", 13292, "inc-wes", {"Saur", "Latn", "Taml", "Deva"}, translit_module = "saz-translit", ancestors = {"inc-ogu"}} + +m["sba"] = {"Ngambay", 2372207, "csu-sar"} + +m["sbb"] = {"Simbo", 3484101, "poz-ocw"} + +m["sbc"] = {"Gele'", 3194847, "poz-oce"} + +m["sbd"] = {"Southern Samo", 33122730, "dmn-sam", Latn} + +m["sbe"] = {"Saliba (New Guinea)", 3469737, "poz-ocw"} + +m["sbf"] = {"Shabo", 36342, "ssa", Latn} + +m["sbg"] = {"Seget", 7446237} + +m["sbh"] = {"Sori-Harengan", 36515, "poz-aay", Latn} + +m["sbi"] = {"Seti", 7456682, "qfa-tor", Latn} + +m["sbj"] = {"Surbakhal", 759995} + +m["sbk"] = {"Safwa", 4121160, "bnt-mby", Latn} + +m["sbl"] = {"Botolan Sambal", 4095195, "phi"} + +m["sbm"] = {"Sagala", 11732610, "bnt-ruv", Latn} + +m["sbn"] = {"Sindhi Bhil", 25559289, "inc-nwe", {"Arab", "Deva", "Sind", "Guru"}, ancestors = {"sd"}} + +m["sbo"] = {"Sabüm", 7396535, "mkh-asl"} + +m["sbp"] = {"Sangu (Tanzania)", 7418149, "bnt-bki", Latn} + +m["sbq"] = {"Sileibi", 7514337, "ngf-mad"} + +m["sbr"] = {"Sembakung Murut", 7449148, "poz-san"} + +m["sbs"] = {"Subiya", 6442073, "bnt-bot", Latn} + +m["sbt"] = {"Kimki", 6410160, "paa-pau"} + +m["sbu"] = {"Stod Bhoti", 15622700, "sit-las"} + +m["sbv"] = {"Sabine", nil, "itc", Latn} + +m["sbw"] = {"Simba", 36430, "bnt-tso", Latn} + +m["sbx"] = {"Seberuang", 12473470, "poz-mly"} + +m["sby"] = {"Soli", 7557754, "bnt-bot", Latn} + +m["sbz"] = {"Sara Kaba", 25559318, "csu-kab", Latn} + +m["scb"] = {"Chut", 2967709, "mkh-vie"} + +m["sce"] = {"Dongxiang", 32947, "xgn", {"Arab", "Latn"}} + +m["scf"] = {"San Miguel Creole French", 12953094, "crp", ancestors = {"gcf"}} + +m["scg"] = {"Sanggau", 12473466, "day"} + +m["sch"] = {"Sakachep", 37054, "tbq-kuk"} + +m["sci"] = {"Sri Lankan Creole Malay", 1089151, "crp", Latn, ancestors = {"ms"}} + +m["sck"] = {"Sadri", 765922, "inc-eas", ancestors = {"bh"}} + +m["scl"] = {"Shina", 1353320, "inc-dar", {"ur-Arab", "Deva"}, ancestors = {"inc-dar-pro"}} + +m["scn"] = {"Sicilian", 33973, "roa-itd", Latn} + +m["sco"] = {"Scots", 14549, "gmw", Latn, ancestors = {"enm"}} + +m["scp"] = {"Yolmo", 22662107, "sit-kyk", {"Deva"}} + +m["scq"] = {"Sa'och", 6583617, "mkh-pea"} + +m["scs"] = {"North Slavey", 20628, "ath-nor", Latn} + +m["scu"] = {"Shumcho", 22077739, "sit-kin"} + +m["scv"] = {"Sheni", 11015820, "nic-jer", Latn, ancestors = {"zir"}} + +m["scw"] = {"Sha", 3438816, "cdc-wst", Latn} + +m["scx"] = {"Sicel", 36667, "ine"} + +m["sda"] = {"Toraja-Sa'dan", 36673, "poz-ssw"} + +m["sdb"] = {"Shabak", 3289596, "ira-zgr", ancestors = {"hac"}} + +m["sdc"] = {"Sassarese", 845441, "roa-itd", Latn} + +m["sde"] = {"Surubu", 3913336, "nic-kau", Latn} + +m["sdf"] = {"Sarli", 7424256, "ira-zgr", ancestors = {"hac"}} + +m["sdg"] = {"Savi", 3474654, "inc-dar", ancestors = {"inc-dar-pro"}} + +m["sdh"] = {"Southern Kurdish", 1496597, "ku", {"ku-Arab", "Latn"}, translit_module = "sdh-translit", link_tr = true, entry_name = {from = {u(0x0650), u(0x0652)}, to = {}}, ancestors = {"ku-pro"}} + +m["sdj"] = {"Suundi", 7650407, "bnt-kng", Latn} + +m["sdk"] = {"Sos Kundi", 7563811, "paa-spk", Latn} + +m["sdl"] = {"Saudi Arabian Sign Language", 3504160, "sgn"} + +m["sdm"] = {"Semandang", 7449012, "day"} + +m["sdn"] = {"Gallurese", 612220, "roa-itd", Latn, ancestors = {"co"}} + +m["sdo"] = {"Bukar-Sadung Bidayuh", 2927799, "day"} + +m["sdp"] = {"Sherdukpen", 7494785, "sit-khb"} + +m["sdr"] = {"Oraon Sadri", 12953860, "inc-eas", ancestors = {"bh"}} + +m["sds"] = {"Sened", 56570, "ber"} + +m["sdu"] = {"Sarudu", 7424700, "poz-cet"} + +m["sdx"] = {"Sibu Melanau", 18642842, "poz-bnn"} + +m["sea"] = {"Semai", 3135426, "mkh-asl", Latn} + +-- seb is a duplicate code of spp + +m["sec"] = {"Sechelt", 7442898, "sal", Latn} + +m["sed"] = {"Sedang", 56448, "mkh-nbn"} + +m["see"] = {"Seneca", 1185133, "iro", Latn} + +m["sef"] = {"Cebaara Senoufo", 10975121, "alv-snr"} + +m["seg"] = {"Segeju", 17584599, "bnt-mij", Latn} + +m["seh"] = {"Sena", 2964008, "bnt-sna", Latn} + +m["sei"] = {"Seri", 36583, "qfa-iso", Latn} + +m["sej"] = {"Sene", 7450252} + +m["sek"] = {"Sekani", 28562, "ath-nor", Latn} + +m["sel"] = {"Selkup", 34008, "syd", {"Cyrl"}} + +m["sen"] = {"Nanerigé Sénoufo", 36002, "alv-sma"} + +m["seo"] = {"Suarmin", 7630513, "paa"} + +m["sep"] = {"Sìcìté Sénoufo", 56787, "alv-sma"} + +m["seq"] = {"Senara Sénoufo", 35210, "alv-snr"} + +m["ser"] = {"Serrano", 3479942, "azc-tak", Latn} + +m["ses"] = {"Koyraboro Senni", 35655, "son", Latn} + +m["set"] = {"Sentani", 3441672, "paa-sen", Latn} + +m["seu"] = {"Serui-Laut", 7455503, "poz-hce", Latn} + +m["sev"] = {"Nyarafolo Senoufo", 36306, "alv-snr"} + +m["sew"] = {"Sewa Bay", 7458126, "poz-ocw"} + +m["sey"] = {"Secoya", 3477218, "sai-tuc", Latn} + +m["sez"] = {"Senthang Chin", 7451223, "tbq-kuk"} + +m["sfb"] = {"French Belgian Sign Language", 3217332, "sgn"} + +m["sfm"] = {"Small Flowery Miao", 7542773, "hmn"} + +m["sfs"] = {"South African Sign Language", 3322093, "sgn"} + +m["sfw"] = {"Sehwi", 36593, "alv-ctn", Latn} + +m["sga"] = {"Old Irish", 35308, "cel-gae", Latn, ancestors = {"pgl"}, entry_name = {from = {"ḟ", "ṁ", "ṅ", "ṡ", "Ḟ", "Ṁ", "Ṅ", "Ṡ", "ä", "ë", "ï", "ö", "ü", "·"}, to = {"f", "m", "n", "s", "F", "M", "N", "S", "a", "e", "i", "o", "u"}}, sort_key = {from = {"á", "é", "æ", "ǽ", "í", "ó", "ú", "^h", "-"}, to = {"a", "e", "e", "e", "i", "o", "u"}}, standardChars = "A-IL-PR-Ua-il-pr-u0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION} + +m["sgb"] = {"Mag-Anchi Ayta", 4356243, "phi"} + +m["sgc"] = {"Kipsigis", 56339, "sdv-nma"} + +m["sgd"] = {"Surigaonon", 34140, "phi", Latn} + +m["sge"] = {"Segai", 7446180} + +m["sgg"] = {"Swiss-German Sign Language", 35150, "sgn"} + +m["sgh"] = {"Shughni", 34053, "ira-shy", {"Latn", "Cyrl"}, translit_module = "sgh-translit", override_translit = true, ancestors = {"ira-shr-pro"}} + +m["sgi"] = {"Suga", 36475, "nic-mmb", Latn} + +m["sgk"] = {"Sangkong", 2945610, "tbq-lol"} + +m["sgm"] = {"Singa", 7522797, "bnt-lok", Latn} + +m["sgp"] = {"Singpho", 7524158, "sit-jnp"} + +m["sgr"] = {"Sangisari", 3394363, "ira-kms"} + +m["sgs"] = {"Samogitian", 213434, "bat", Latn, ancestors = {"olt"}, wikimedia_codes = {"bat-smg"}} + +m["sgt"] = {"Brokpake", 56603, "sit-tib"} + +m["sgu"] = {"Salas", 7403694, "poz-cma"} + +m["sgw"] = {"Sebat Bet Gurage", 2707343, "sem-eth", {"Ethi"}} + +m["sgx"] = {"Sierra Leone Sign Language", 7511448, "sgn"} + +m["sgy"] = {"Sanglechi", 3472220, "ira-sgi", ancestors = {"ira-sgi-pro"}} + +m["sgz"] = {"Sursurunga", 36511, "poz-ocw", Latn} + +m["sha"] = {"Shall-Zwall", 3915355, "nic-beo"} + +m["shb"] = {"Ninam", 3436586} + +m["shc"] = {"Sonde", 7560881, "bnt-pen", Latn} + +m["shd"] = {"Kundal Shahi", 6444265, "inc-dar"} + +m["she"] = {"Sheko", 3183355, "omv-diz"} + +m["shg"] = {"Shua", 3501092, "khi-kal", Latn} + +m["shh"] = {"Shoshone", 33811, "azc-num", Latn} + +m["shi"] = {"Tashelhit", 34152, "ber", {"Tfng", "Arab", "Latn"}, translit_module = "Tfng-translit"} + +m["shj"] = {"Shatt", 56344, "sdv-daj"} + +m["shk"] = {"Shilluk", 36486, "sdv-lon", Latn} + +m["shl"] = {"Shendu", 22074616, "tbq-kuk"} + +m["shm"] = {"Shahrudi", 7462280, "xme-ttc", ancestors = {"xme-ttc-cen"}} + +m["shn"] = {"Shan", 56482, "tai-swe", {"Mymr"}, translit_module = "shn-translit", sort_key = {from = {"[ၢႃ]", "ဵ", "ႅ", "ႇ", "ႈ", "း", "ႉ", "ႊ"}, to = {"ာ", "ေ", "ႄ", "႒", "႓", "႔", "႕", "႖"}}} + +m["sho"] = {"Shanga", 3913931, "dmn-bbu", Latn} + +m["shp"] = {"Shipibo-Conibo", 2671988, "sai-pan", Latn} + +m["shq"] = {"Sala", 10961665, "bnt-bot", Latn} + +m["shr"] = {"Shi", 3481999, "bnt-shh", Latn} + +m["shs"] = {"Shuswap", 3482685, "sal", Latn} + +m["sht"] = {"Shasta", 56396, "nai-shs", Latn} + +m["shu"] = {"Chadian Arabic", 56497, "sem-arb", {"Arab"}, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["shv"] = {"Shehri", 33445, "sem-sar", {"Arab", "Latn"}, ancestors = {"sem-pro"}} + +m["shw"] = {"Shwai", 36527, "alv-hei"} + +m["shx"] = {"She", 2605689, "hmn"} + +m["shy"] = {"Tachawit", 33274, "ber"} + +m["shz"] = {"Syenara Senoufo", 36316, "alv-snr"} + +m["sia"] = {"Akkala Sami", 35241, "smi", Latn} + +m["sib"] = {"Sebop", 7442799, "poz-swa", Latn} + +m["sid"] = {"Sidamo", 33786, "cus", {"Latn", "Ethi"}} + +m["sie"] = {"Simaa", 7517329, "bnt-kav", Latn} + +m["sif"] = {"Siamou", 36252} + +m["sig"] = {"Paasaal", 36426, "nic-sis", Latn} + +m["sih"] = {"Zire", 8072753, "poz-cln"} + +m["sii"] = {"Shom Peng", 1039346, "aav"} + +m["sij"] = {"Numbami", 3346277, "poz-ocw"} + +m["sik"] = {"Sikiana", 3443734, "sai-car", Latn} + +m["sil"] = {"Tumulung Sisaala", 25383006, "nic-sis", Latn} + +m["sim"] = {"Seim", 7446815, "paa-spk"} + +m["sip"] = {"Sikkimese", 35285, "sit-tib", {"Tibt"}, ancestors = {"xct"}, translit_module = "bo-translit"} + +m["siq"] = {"Sonia", 7561770} + +m["sir"] = {"Siri", 3438729, "cdc-wst", Latn} + +m["sis"] = {"Siuslaw", 2315424} + +m["siu"] = {"Sinagen", 7521655, "qfa-tor", Latn} + +m["siv"] = {"Sumariup", 7636966, "paa-spk", Latn} + +m["siw"] = {"Siwai", 7532519, "paa-sbo"} + +m["six"] = {"Sumau", 7637021, "ngf-mad", Latn} + +m["siy"] = {"Sivandi", 13269, "xme", ancestors = {"xme-mid"}} + +m["siz"] = {"Siwi", 36814, "ber", {"Tfng", "Arab", "Latn"}} + +m["sja"] = {"Epena", 3055682, "sai-chc"} + +m["sjb"] = {"Sajau Basap", 4684353, "poz-bnn"} + +m["sjd"] = {"Kildin Sami", 33656, "smi", {"Cyrl"}, translit_module = "sjd-translit", sort_key = {from = {"ӣ", "ӯ", MACRON}, to = {"и", "у"}}} + +m["sje"] = {"Pite Sami", 56314, "smi", Latn, entry_name = {from = {"ū", "ˈ", MACRON}, to = {"u"}}, sort_key = {from = {"á", "đ", "ŋ", "ŧ"}, to = {"a²", "d²", "n²", "t²"}}} + +m["sjg"] = {"Assangori", 3502255, "sdv-tmn"} + +m["sjk"] = {"Kemi Sami", 35871, "smi", Latn} + +m["sjl"] = {"Miji", 6845470, "sit-hrs"} + +m["sjm"] = {"Mapun", 3287253, "poz-sbj"} + +m["sjn"] = {"Sindarin", 56437, "art", {"Latn", "Teng"}, type = "appendix-constructed"} + +m["sjo"] = {"Xibe", 13223, "tuw", {"Mong"}} + +m["sjp"] = {"Surjapuri", 7645351, "inc-eas", ancestors = {"inc-mgd"}} + +m["sjr"] = {"Siar-Lak", 3482907, "poz-ocw"} + +m["sjs"] = {"Senhaja De Srair", 56744, "ber"} + +m["sjt"] = {"Ter Sami", 36656, "smi", Latn} + +m["sju"] = {"Ume Sami", 56415, "smi", Latn, entry_name = {from = {"ē", "ū", "ǖ", MACRON, "ˈ"}, to = {"e", "u", "ü"}}, sort_key = {from = {"á", "đ", "ï", "ŋ", "ŧ", "ü"}, to = {"a²", "d²", "i²", "n²", "t²", "u²"}}} + +m["sjw"] = {"Shawnee", 2669206, "alg", Latn} + +m["ska"] = {"Skagit", 25559652, "sal", Latn} + +m["skb"] = {"Saek", 36437, "tai-nor", {"Thai"}, sort_key = {from = {"[%pๆ]", "[็-๎̱̄̂]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["skc"] = {"Ma Manda", 6720783, "ngf-fin"} + +m["skd"] = {"Southern Sierra Miwok", 3492334, "nai-utn", Latn} + +m["ske"] = {"Ske", 7534244, "poz-vnc", Latn} + +m["skf"] = {"Sakirabiá", 3304806, "tup", Latn} + +m["skh"] = {"Sikule", 3121081, "poz-nws"} + +m["ski"] = {"Sika", 33960, "poz-cet", Latn} + +m["skj"] = { -- compare 'ths' + "Seke", 30226846, "sit-tam" +} + +m["skk"] = {"Sok", 12953887, "mkh-ban"} + +m["skm"] = {"Sakam", 6448517, "ngf-fin"} + +m["skn"] = {"Kolibugan Subanon", 18755617, "phi"} + +m["sko"] = {"Seko Tengah", 15613270, "poz"} + +m["skp"] = {"Sekapan", 7447132, "poz-bnn"} + +m["skq"] = {"Sininkere", 3914896, "dmn-man", Latn} + +m["skr"] = {"Saraiki", 33902, "inc-pan", {"ur-Arab", "Mult", "Deva"}, ancestors = {"lah"}, entry_name = {from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652)}, to = {}}} + +m["sks"] = {"Maia", 12952760, "ngf-mad", Latn} + +m["skt"] = {"Sakata", 36691, "bnt-bnm", Latn} + +m["sku"] = {"Sakao", 3298421, "poz-vnc", Latn} + +m["skv"] = {"Skou", 3915200, "paa-msk", Latn} + +m["skw"] = {"Skepi Creole Dutch", 2522153, "crp", ancestors = {"nl"}} + +m["skx"] = {"Seko Padang", 15613282, "poz-ssw"} + +m["sky"] = {"Sikaiana", 7439242, "poz-pnp", Latn} + +m["skz"] = {"Sekar", 7447136, "poz-cet"} + +m["slc"] = {"Saliba (Colombia)", 3441097} + +m["sld"] = {"Sissala", 11020264, "nic-sis", Latn} + +m["sle"] = {"Sholaga", 7500203, "dra", {"Knda"}} + +m["slf"] = {"Swiss-Italian Sign Language", 12953479, "sgn"} + +m["slg"] = {"Selungai Murut", 7448844, "poz-san"} + +m["slh"] = {"Southern Puget Sound Salish", 12642471, "sal", Latn} + +-- "sli" "Silesian German" IS SUBSUMED INTO "gmw-ecg" "East Central German" + +m["slj"] = {"Salumá", 7406296, "sai-car", Latn} + +m["sll"] = {"Salt-Yui", 7405785} + +m["slm"] = {"Pangutaran Sama", 3362086, "poz-sbj"} + +m["sln"] = {"Salinan", 1568938} + +m["slp"] = {"Lamaholot", 6480777, "poz-cet", Latn} + +m["slq"] = {"Salchuq", 56752, "trk"} + +m["slr"] = {"Salar", 33963, "trk", {"Arab", "Latn"}} + +m["sls"] = {"Singapore Sign Language", 7512563, "sgn"} + +m["slt"] = {"Sila", 7514021, "tbq-lol"} + +m["slu"] = {"Selaru", 7447500, "poz-cet"} + +m["slw"] = {"Sialum", 7506694, "ngf"} + +m["slx"] = {"Salampasu", 7403607, "bnt-lun", Latn} + +m["sly"] = {"Selayar", 7447520, "poz-ssw"} + +m["slz"] = {"Ma'ya", 2291492, "poz-hce"} + +m["sma"] = { + "Southern Sami", + 13293, + "smi", + Latn, + sort_key = { -- Standardise on Norwegian orthography for sorting purposes + from = {"ï", "ä", "ö"}, + to = {"i²", "æ", "ø"} + } +} + +m["smb"] = {"Simbari", 7517427, "ngf"} + +m["smc"] = {"Som", 7559081, "ngf-fin", Latn} + +m["smd"] = {"Sama", 6407456, "bnt-kmb", Latn} + +m["smf"] = {"Auwe", 3502072, "paa-brd", ancestors = {"dnd"}} + +m["smg"] = {"Simbali", 56692, "paa-bng"} + +m["smh"] = {"Samei", 7409269, "tbq-lol"} + +m["smj"] = { + "Lule Sami", + 56322, + "smi", + Latn, + entry_name = {from = {"ˈ", MACRON}, to = {}}, + sort_key = { -- Standardise on Swedish orthography for sorting purposes + from = {"á", "ŋ", "æ"}, + to = {"a²", "n²", "ä"} + } +} + +m["smk"] = {"Bolinao", 2669235, "phi"} + +m["sml"] = {"Central Sama", 3470593, "poz-sbj"} + +m["smm"] = {"Musasa", 6940122, "inc-eas", ancestors = {"bh"}} + +m["smn"] = {"Inari Sami", 33462, "smi", Latn, entry_name = {from = {"ḥ", "ḷ", "ṃ", "ṇ", "ṛ", "ṿ", DOTBELOW, "ˈ"}, to = {"h", "l", "m", "n", "r", "v"}}, sort_key = {from = {"â", "č", "đ", "ŋ", "š", "ž", "á"}, to = {"a˿", "c˿", "d˿", "n˿", "s˿", "z˿", "ä˿"}}} + +m["smp"] = {"Samaritan Hebrew", 56502, "sem-can", {"Samr"}, entry_name = {from = {"[" .. u(0x0816) .. "-" .. u(0x082D) .. "]"}, to = {}}} + +m["smq"] = {"Samo", 7409884, "ngf"} + +m["smr"] = {"Simeulue", 2992833, "poz-nws", Latn} + +m["sms"] = {"Skolt Sami", 13271, "smi", Latn, entry_name = {from = {"ẹ", "ˈ"}, to = {"e"}}, sort_key = {from = {"â", "č", "đ", "ǧ", "ǥ", "ǩ", "ŋ", "õ", "š", "ž"}, to = {"a²", "c²", "d²", "g²", "g³", "k˿", "n˿", "o˿", "s˿", "z˿"}}} + +m["smt"] = {"Simte", 7521268, "tbq-kuk"} + +m["smu"] = {"Somray", 6583612, "mkh-pea"} + +m["smv"] = {"Samvedi", 6345632, "inc-sou", ancestors = {"pmh"}} + +m["smw"] = {"Sumbawa", 3182585, "poz-mcm", Latn} + +m["smx"] = {"Samba", 11120157, "bnt-pen", Latn} + +m["smy"] = {"Semnani", 14531212, "xme", ancestors = {"xme-old"}} + +m["smz"] = {"Simeku", 7517534, "paa-sbo"} + +m["snb"] = {"Sebuyau", 7442836, "poz-mly"} + +m["snc"] = {"Sinaugoro", 4170719, "poz-ocw"} + +m["sne"] = {"Bau Bidayuh", 2891938, "day", Latn} + +m["snf"] = {"Noon", 36304, "alv-cng", Latn} + +m["sng"] = {"Sanga (Congo)", 3438316, "bnt-lub", Latn} + +m["sni"] = {"Sensi", 7451029, "sai-pan", Latn} + +m["snj"] = {"Riverain Sango", 25559751, "crp", Latn, ancestors = {"ngb"}} + +m["snk"] = {"Soninke", 36660, "dmn-snb", Latn} + +m["snl"] = {"Sangil", 3472206, "phi"} + +m["snm"] = {"Southern Ma'di", 15637273, "csu-mma"} + +m["snn"] = {"Siona", 3485116, "sai-tuc", Latn} + +m["sno"] = {"Snohomish", 25559662, "sal", Latn} + +m["snp"] = {"Siane", 7506812, "paa-kag", Latn} + +m["snq"] = {"Sangu (Gabon)", 36609, "bnt-sir", Latn} + +m["snr"] = {"Sihan", 7513400, "ngf-mad"} + +m["sns"] = {"Nahavaq", 2160435, "poz-vnc"} + +m["snu"] = {"Senggi", 7929052, "paa-brd"} + +m["snv"] = {"Sa'ban", 3474891, "poz-swa", Latn} + +m["snw"] = {"Selee", 36272, "alv-ntg", Latn} + +m["snx"] = {"Sam", 7408387} + +m["sny"] = {"Saniyo-Hiyewe", 7418302, "paa-spk", Latn} + +m["snz"] = { + "Kou", 7525035, -- also 4803639 + "ngf-mad", Latn +} + +m["soa"] = { + "Thai Song", + 7709159, + "tai-swe", + {"Tavt", "Thai"}, + -- translit_module = "Tavt-translit", + sort_key = {from = {"([ꪵꪶꪹꪻꪼ])([ꪀ-ꪯ])", "([เแโใไ])([ก-ฮ])"}, to = {"%2%1", "%2%1"}} +} + +m["sob"] = {"Sobei", 3121035, "poz-ocw"} + +m["soc"] = {"Soko", 7555138, "bnt-ske", Latn} + +m["sod"] = {"Songoora", 7561296, "bnt-lgb", Latn} + +m["soe"] = {"Songomeno", 5713543, "bnt-bsh", Latn} + +m["sog"] = {"Sogdian", 205979, "ira-sgc", {"Sogd", "Mani", "Syrc", "Sogo"}, translit_module = "translit-redirect"} + +m["soh"] = {"Aka (Sudan)", 3450949, "sdv-eje", Latn} + +m["soi"] = {"Sonha", 12953890, "inc-eas", ancestors = {"inc-mgd"}} + +m["sok"] = {"Sokoro", 3441303, "cdc-est", Latn} + +m["sol"] = {"Solos", 3489591, "poz-ocw"} + +m["soo"] = {"Nsong", 12953148, "bnt-bdz", Latn} + +m["sop"] = {"Songe", 3130911, "bnt-lbn", Latn} + +m["soq"] = {"Kanasi", 11732656} + +m["sor"] = {"Somrai", 3123566, "cdc-est", Latn} + +m["sos"] = {"Seenku", 36274, "dmn-smg"} + +m["sou"] = {"Southern Thai", 56508, "tai-swe", {"Thai"}, sort_key = {from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["sov"] = {"Sonsorolese", 13281, "poz-mic", Latn} + +m["sow"] = {"Sowanda", 7571845, "paa-brd"} + +m["sox"] = {"Swo", 36604, "bnt-mka", Latn} + +m["soy"] = {"Miyobe", 35913, "alv-sav", Latn} + +m["soz"] = {"Temi", 13278, "bnt-kka", Latn} + +m["spb"] = {"Sepa (Indonesia)", 18603687, "poz-cma", Latn} + +m["spc"] = {"Sapé", 2888158, nil, Latn} + +m["spd"] = {"Saep", 7398312, "ngf-mad"} + +m["spe"] = {"Sepa (New Guinea)", 7451725, "poz-ocw", Latn} + +m["spg"] = {"Sian", 7506806, "poz-bnn"} + +m["spi"] = {"Saponi", 3915418, "paa"} + +m["spk"] = {"Sengo", 7450584, "paa-spk", Latn} + +m["spl"] = {"Selepet", 7447917, "ngf"} + +m["spm"] = {"Sepen", 4701931, "paa", Latn} + +m["spn"] = {"Sanapaná", 3033556, "sai-mas", Latn} + +m["spo"] = {"Spokane", 3493704, "sal"} + +m["spp"] = {"Supyire", 56284, "alv-sma", Latn} + +m["spr"] = {"Saparua", 7420921, "poz-cma"} + +m["sps"] = {"Saposa", 3473187, "poz-ocw"} + +m["spt"] = {"Spiti Bhoti", 22080879, "sit-las"} + +m["spu"] = {"Sapuan", 7421168, "mkh-ban"} + +m["spv"] = {"Sambalpuri", 6433240, "inc-eas", {"Orya"}, translit_module = "or-translit", ancestors = {"or"}} + +m["spx"] = {"South Picene", 36688, "itc", {"Ital", "Latn"}, translit_module = "Ital-translit"} + +m["spy"] = {"Sabaot", 7395896, "sdv-kln"} + +m["sqa"] = {"Shama-Sambuga", 3914392, "nic-kmk", Latn} + +m["sqh"] = {"Shau", 3913925, "nic-jer", Latn} + +m["sqk"] = {"Albanian Sign Language", 4709168, "sgn"} + +m["sqm"] = {"Suma", 11008431, "alv-gbw"} + +m["sqn"] = {"Susquehannock", 3505736, "iro"} + +m["sqo"] = {"Sorkhei", 3491964, "ira-kms"} + +m["sqq"] = {"Sou", 16979751, "mkh-ban"} + +m["sqr"] = {"Siculo-Arabic", 1069489, "sem-arb"} + +m["sqs"] = {"Sri Lankan Sign Language", 3915466, "sgn"} + +m["sqt"] = {"Soqotri", 13283, "sem-sar", {"Arab", "Latn"}} + +m["squ"] = {"Squamish", 2484579, "sal", Latn} + +m["sra"] = {"Saruga", 7424699, "ngf-mad", Latn} + +m["srb"] = {"Sora", 13284, "mun", {"Sora"}} + +m["sre"] = {"Sara", 33957, "day"} + +m["srf"] = {"Nafi", 6958174, "poz-ocw"} + +m["srg"] = {"Sulod", 7636489, "phi"} + +m["srh"] = {"Sarikoli", 33873, "ira-shy", {"Latn", "ug-Arab", "Cyrl"}, ancestors = {"ira-shr-pro"}} + +m["sri"] = {"Siriano", 3485264, "sai-tuc", Latn} + +m["srk"] = {"Serudung Murut", 7455497, "poz-san"} + +m["srl"] = {"Isirawa", 4203802, "paa-tkw"} + +m["srm"] = {"Saramaccan", 33779, "crp", Latn, ancestors = {"en", "pt"}} + +m["srn"] = {"Sranan Tongo", 33989, "crp", Latn} + +m["srq"] = {"Sirionó", 3027953, "tup-gua", Latn} + +m["srr"] = {"Serer", 36284, "alv-fwo", Latn} + +m["srs"] = {"Sarcee", 20825, "ath-nor", Latn} + +m["srt"] = {"Sauri", 7427547, "paa-egb"} + +m["sru"] = {"Suruí", 7646993, "tup", Latn} + +m["srv"] = {"Waray Sorsogon", 18755610, "phi"} + +m["srw"] = {"Serua", 14916905, "poz-cet"} + +m["srx"] = {"Sirmauri", 7530505, "him"} + +m["sry"] = {"Sera", 7452602, "poz-ocw", Latn} + +m["srz"] = {"Shahmirzadi", 12953126, "ira-msh", {"fa-Arab"}} + +m["ssb"] = {"Southern Sama", 3470594, "poz-sbj", Latn} + +m["ssc"] = {"Suba-Simbiti", 7630687, "bnt-lok", Latn} + +m["ssd"] = {"Siroi", 10771067, "ngf-mad", Latn} + +m["sse"] = {"Balangingi", 2880535, "poz-sbj", Latn} + +m["ssf"] = {"Thao", 676492, "map", Latn} + +m["ssg"] = {"Seimat", 3182581, "poz-aay"} + +m["ssh"] = {"Shihhi Arabic", 56571, "sem-arb", {"Arab"}, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["ssi"] = {"Sansi", 3309366, "inc-nwe", ancestors = {"psu"}} + +m["ssj"] = {"Sausi", 7427605, "ngf-mad", Latn} + +m["ssk"] = {"Sunam", 11002210, "sit-kin"} + +m["ssl"] = {"Western Sisaala", 11154776, "nic-sis", Latn} + +m["ssm"] = {"Semnam", 7449713, "mkh-asl"} + +m["ssn"] = {"Waata", 3501553, "cus"} + +m["sso"] = {"Sissano", 7530937, "poz-ocw", Latn} + +m["ssp"] = {"Spanish Sign Language", 3100814, "sgn"} + +m["ssq"] = {"So'a", 7572120, "poz-cet", Latn} + +m["ssr"] = {"Swiss-French Sign Language", 12953483, "sgn"} + +m["sss"] = {"Sô", 3082037, "mkh-kat"} + +m["sst"] = {"Sinasina", 7521813, "ngf", Latn} + +m["ssu"] = {"Susuami", 7649752, "ngf", Latn} + +m["ssv"] = {"Shark Bay", 7489783, "poz-vnc"} + +m["ssx"] = {"Samberigi", 7409020, "paa-eng", Latn} + +m["ssy"] = {"Saho", 36353, "cus", {"Latn", "Ethi", "Arab"}, entry_name = {remove_diacritics = ACUTE}} + +m["ssz"] = {"Sengseng", 7450601, "poz-ocw", Latn} + +m["stb"] = {"Northern Subanen", 12953892, "phi"} + +m["std"] = {"Sentinelese", 568377} + +m["ste"] = {"Liana-Seti", 6539924, "poz-cma"} + +m["stf"] = {"Seta", 7456326, "qfa-tor", Latn} + +m["stg"] = {"Trieng", 22694648, "mkh-ban"} + +m["sth"] = {"Shelta", 36705, "crp", Latn, ancestors = {"en"}} + +m["sti"] = {"Bulo Stieng", 15771431, "mkh-ban"} + +m["stj"] = {"Matya Samo", 10974879, "dmn-sam", Latn} + +m["stk"] = {"Arammba", 3502094, "ngf"} + +m["stm"] = {"Setaman", 7456333, "ngf-okk", Latn} + +m["stn"] = {"Owa", 1324132, "poz-sls", Latn} + +m["sto"] = {"Stoney", 3033570, "sio-dkt"} + +m["stp"] = {"Southeastern Tepehuan", 12953917, "azc", Latn} + +m["stq"] = {"Saterland Frisian", 27154, "gmw-fri", Latn} + +m["str"] = {"Saanich", 36444, "sal", Latn} + +m["sts"] = {"Shumashti", 33777, "inc-dar"} + +m["stt"] = {"Budeh Stieng", 12953891, "mkh-ban"} + +m["stu"] = {"Samtao", 25559550, "mkh-pal"} + +m["stv"] = {"Silt'e", nil, "sem-eth", {"Ethi"}} + +m["stw"] = {"Satawalese", 28477, "poz-mic"} + +m["sty"] = {"Siberian Tatar", 4418344, "trk-kno", {"Cyrl"}} + +m["sua"] = {"Sulka", 7636341, "qfa-iso"} + +m["sub"] = {"Suku", 12953160, "bnt-yak", Latn} + +m["suc"] = {"Western Subanon", 16113894, "phi"} + +m["sue"] = {"Suena", 7634386, "ngf", Latn} + +m["sug"] = {"Suganga", 7634706, "ngf-okk", Latn} + +m["sui"] = {"Suki", 2089984, "ngf", Latn} + +m["suk"] = {"Sukuma", 2638144, "bnt-tkm", Latn} + +m["suq"] = {"Suri", 5364172, "sdv"} + +m["sur"] = {"Mwaghavul", 3440486, "cdc-wst", Latn} + +m["sus"] = {"Susu", 33990, "dmn-sya", Latn} + +m["sut"] = {"Subtiaba", 3915405, "omq", Latn} + +m["suv"] = {"Sulung", 56408, "sit-khb"} + +m["suw"] = {"Sumbwa", 7637055, "bnt-glb", Latn} + +m["sux"] = {"Sumerian", 36790, "qfa-iso", {"Xsux"}} + +m["suy"] = {"Suyá", 3505859, "sai-nje", Latn} + +m["suz"] = {"Sunwar", 56549, "sit-kiw"} + +m["sva"] = {"Svan", 34067, "ccs", {"Geor", "Cyrl"}, translit_module = "sva-translit", override_translit = true, entry_name = {from = {DIAER, MACRON}, to = {}}} + +m["svb"] = {"Ulau-Suain", 7878769, "poz-ocw", Latn} + +m["svc"] = {"Vincentian Creole English", 3501785, "crp", Latn, ancestors = {"en"}} + +m["sve"] = {"Serili", 7454834, "poz-tim"} + +m["svk"] = {"Slovakian Sign Language", 7541557, "sgn"} + +m["svm"] = {"Slavomolisano", 36254, "zls", Latn, ancestors = {"sh"}} + +m["svs"] = {"Savosavo", 3130296, "paa", Latn} + +m["svx"] = {"Skalvian", 3486125, "bat", Latn} + +m["swb"] = {"Maore Comorian", 34075, "bnt-com", Latn, sort_key = {from = {"ɓ", "ɗ"}, to = {"bz", "dz"}}} + +m["swf"] = {"Sere", 7453056, "nic-ser", Latn} + +m["swg"] = {"Swabian", 327274, "gmw", Latn, ancestors = {"gsw"}} + +m["swi"] = { + "Sui", 3112388, "qfa-kms", {"Latn", "Hani"} -- Shui is not available yet. +} + +m["swj"] = {"Sira", 36599, "bnt-sir", Latn} + +m["swl"] = {"Swedish Sign Language", 36558, "sgn"} + +m["swm"] = {"Samosa", 7410037, "ngf-mad", Latn} + +m["swn"] = {"Sokna", 2988323, "ber"} + +m["swo"] = {"Shanenawa", nil, "sai-pan", Latn} + +m["swp"] = {"Suau", 3502368, "poz-ocw"} + +m["swq"] = {"Sharwa", 56791, "cdc-cbm", Latn} + +m["swr"] = {"Saweru", 3474649} + +m["sws"] = {"Seluwasan", 7448845, "poz-cet"} + +m["swt"] = {"Sawila", 7428639} + +m["swu"] = {"Suwawa", 7650588, "phi"} + +m["sww"] = {"Sowa", 7571843, "poz-vnc"} + +m["swx"] = {"Suruahá", 3114402, "auf"} + +m["swy"] = {"Sarua", 56261, "cdc-est", Latn} + +m["sxb"] = {"Suba", 33916, "bnt-lok", Latn} + +m["sxc"] = {"Sicanian", 36335} + +m["sxe"] = {"Sighu", 36431, "bnt-kel", Latn} + +m["sxg"] = {"Shixing", 56337, "sit-nax"} + +m["sxk"] = {"Southern Kalapuya", 3192122, "nai-klp"} + +m["sxl"] = {"Selonian", 36491, "bat", Latn} + +m["sxm"] = {"Samre", 6583615, "mkh-pea"} + +m["sxn"] = {"Sangir", 25714758, "phi", Latn} + +m["sxo"] = {"Sorothaptic", 2762254} + +m["sxr"] = {"Saaroa", 716599, "map", Latn} + +m["sxs"] = {"Sasaru", 3913384, "alv-yek", Latn} + +-- "sxu" "Upper Saxon" IS SUBSUMED INTO "gmw-ecg" "East Central German" + +m["sxw"] = {"Saxwe Gbe", 7428892, "alv-pph"} + +m["sya"] = {"Siang", 3482903} + +m["syb"] = {"Central Subanen", 12953893, "phi", Latn} + +m["syc"] = {"Classical Syriac", 33538, "sem-are", {"Syrc"}, entry_name = {from = {"[" .. u(0x0304) .. u(0x0308) .. u(0x0331) .. u(0x0730) .. "-" .. u(0x0748) .. "]"}, to = {}}} + +m["syi"] = {"Seki", 36547, "bnt-kel", Latn} + +m["syk"] = {"Sukur", 56292, "cdc-cbm", Latn} + +m["syl"] = {"Sylheti", 2044560, "inc-eas", {"Sylo", "Beng"}, ancestors = {"inc-obn"}, translit_module = "syl-translit"} + +m["sym"] = {"Maya Samo", 10950421, "dmn-sam", Latn} + +m["syn"] = {"Senaya", 33914, "sem-nna"} + +m["syo"] = {"Suoy", 7641864, "mkh-pea"} + +m["sys"] = {"Sinyar", 56840, "csu", Latn} + +m["syw"] = {"Kagate", 12952538, "sit-kyk", {"Deva"}} + +m["syx"] = {"Osamayi", 7408415, "bnt-kel", {"Latn"}} + +m["syy"] = {"Al-Sayyid Bedouin Sign Language", 2915457, "sgn"} + +m["sza"] = {"Semelai", 3111827, "mkh-asl"} + +m["szb"] = {"Ngalum", 11732516, "ngf-okk", Latn} + +m["szc"] = {"Semaq Beri", 7449119, "mkh-asl"} + +m["szd"] = {"Seru", 7455488, "poz-bnn", Latn} + +m["sze"] = {"Seze", 373683, "omv-mao", Latn} + +m["szg"] = {"Sengele", 7450555, "bnt-mon", Latn} + +m["szl"] = {"Silesian", 30319, "zlw-lch", Latn} + +m["szn"] = {"Sula", 3503403, "poz-cma", Latn} + +m["szp"] = {"Suabo", 7630429, "ngf-sbh", Latn} + +m["szv"] = {"Isubu", 35431, "bnt-saw", Latn} + +m["szw"] = {"Sawai", 3447258, "poz-hce", Latn} + +m["szy"] = {"Sakizaya", 718269, "map", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/t.lua b/wiktra/wikt/translit/languages/data3/t.lua new file mode 100644 index 0000000..5586358 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/t.lua @@ -0,0 +1,1056 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +-- Punctuation to be used for standardChars field +local PUNCTUATION = " !#$%&*+,-./:;<=>?@^_`|~'()" + +local Latn = {"Latn"} + +local m = {} + +m["taa"] = {"Lower Tanana", 28565, "ath-nor", Latn} + +m["tab"] = {"Tabasaran", 34079, "cau-lzg", {"Cyrl"}, translit_module = "tab-translit", override_translit = true, entry_name = {from = {ACUTE}, to = {}}} + +m["tac"] = {"Lowland Tarahumara", 15616384, "azc-trc", Latn} + +m["tad"] = {"Tause", 2356440, "paa-lkp", Latn} + +m["tae"] = {"Tariana", 732726, "awd-nwk", Latn} + +m["taf"] = {"Tapirapé", 7684673, "tup-gua", Latn} + +m["tag"] = {"Tagoi", 36537, "nic-ras", Latn} + +m["taj"] = {"Eastern Tamang", 12953177, "sit-tam", {"Deva"}} + +m["tak"] = {"Tala", 3914494, "cdc-wst", Latn} + +m["tal"] = {"Tal", 3440387, "cdc-wst", Latn} + +m["tan"] = {"Tangale", 529921, "cdc-wst", Latn} + +m["tao"] = {"Yami", 715760, "phi", Latn} + +m["tap"] = {"Taabwa", 7673650, "bnt-sbi", Latn} + +m["taq"] = {"Tamasheq", 4670066, "ber", {"Tfng", "Arab", "Latn"}, ancestors = {"tmh"}} + +m["tar"] = {"Central Tarahumara", 20090009, "azc-trc", Latn, sort_key = {from = {"á", "é", "í", "ó", "ú", "ꞌ"}, to = {"a", "e", "i", "o", "u"}}} + +m["tas"] = {"Tay Boi", 2233794, "crp", Latn, ancestors = {"fr"}} + +m["tau"] = {"Upper Tanana", 28281, "ath-nor", Latn} + +m["tav"] = {"Tatuyo", 2524007, "sai-tuc", Latn} + +m["taw"] = {"Tai", 7675861, "ngf-mad", Latn} + +m["tax"] = {"Tamki", 3449082, "cdc-est", Latn} + +m["tay"] = {"Atayal", 715766, "map-ata", Latn} + +m["taz"] = {"Tocho", 36680, "alv-tal", Latn} + +m["tba"] = {"Aikanã", 3409307, "qfa-iso"} + +m["tbb"] = {"Tapeba", 12953908} + +m["tbc"] = {"Takia", 3514336, "poz-oce"} + +m["tbd"] = {"Kaki Ae", 6349417, "poz-ocw", Latn} + +m["tbe"] = {"Tanimbili", 3515188, "poz-oce", Latn} + +m["tbf"] = {"Mandara", 3285424, "poz-ocw", Latn} + +m["tbg"] = {"North Tairora", 20210398, "paa-kag"} + +m["tbh"] = {"Thurawal", 3537135, "aus-yuk"} + +m["tbi"] = {"Gaam", 35338, "sdv-eje", Latn} + +m["tbj"] = {"Tiang", 3528020, "poz-ocw", Latn} + +m["tbk"] = {"Calamian Tagbanwa", 3915487, "phi-kal"} + +m["tbl"] = {"Tboli", 7690594, "phi"} + +m["tbm"] = {"Tagbu", 7675188, "nic-ser"} + +m["tbn"] = {"Barro Negro Tunebo", 12953943, "cba"} + +m["tbo"] = {"Tawala", 7689206, "poz-ocw", Latn} + +m["tbp"] = {"Taworta", 7689337, "paa-lkp", Latn} + +m["tbr"] = {"Tumtum", 3407029, "qfa-kad"} + +m["tbs"] = {"Tanguat", 7683166, "paa", Latn} + +m["tbt"] = {"Kitembo", 13123561, "bnt-shh", Latn} + +m["tbu"] = {"Tubar", 56730, "azc-trc", Latn} + +m["tbv"] = {"Tobo", 7811712, "ngf"} + +m["tbw"] = {"Tagbanwa", 3915475, "phi", Latn} + +m["tbx"] = {"Kapin", 6366665, "poz-ocw", Latn} + +m["tby"] = {"Tabaru", 11732670, "paa-wpa"} + +m["tbz"] = {"Ditammari", 35186, "nic-eov"} + +m["tca"] = {"Ticuna", 1815205, "sai-tyu", Latn} + +m["tcb"] = {"Tanacross", 28268, "ath-nor", Latn} + +m["tcc"] = {"Datooga", 35327, "sdv-nis", Latn} + +m["tcd"] = {"Tafi", 36545, "alv-ktg"} + +m["tce"] = {"Southern Tutchone", 31091048, "ath-nor", Latn} + +m["tcf"] = {"Malinaltepec Tlapanec", 25559732, "omq", Latn} + +m["tcg"] = {"Tamagario", 7680531, "ngf"} + +m["tch"] = {"Turks and Caicos Creole English", 7855478, "crp", Latn, ancestors = {"en"}} + +m["tci"] = {"Wára", 20825638, "paa-yam"} + +m["tck"] = {"Tchitchege", 36595, "bnt-tek"} + +m["tcl"] = {"Taman (Burma)", 15616518, "sit-jnp", Latn} + +m["tcm"] = {"Tanahmerah", 3514927, "ngf"} + +m["tco"] = {"Taungyo", 12953186, "tbq-brm", ancestors = {"obr"}} + +m["tcp"] = {"Tawr Chin", 7689338, "tbq-kuk"} + +m["tcq"] = {"Kaiy", 6348709, "paa-lkp"} + +m["tcs"] = {"Torres Strait Creole", 36648, "crp", Latn, ancestors = {"en"}} + +m["tct"] = {"T'en", 3442330, "qfa-kms"} + +m["tcu"] = {"Southeastern Tarahumara", 36807, "azc-trc", Latn} + +m["tcw"] = {"Tecpatlán Totonac", 7692795, "nai-ttn", Latn} + +m["tcx"] = {"Toda", 34042, "dra"} + +m["tcy"] = {"Tulu", 34251, "dra", {"Knda"}, translit_module = "kn-translit"} + +m["tcz"] = {"Thado Chin", 6583558, "tbq-kuk"} + +m["tda"] = {"Tagdal", 36570, "son"} + +m["tdb"] = {"Panchpargania", 21946879, "inc-eas", ancestors = {"bh"}} + +m["tdc"] = {"Emberá-Tadó", 3052041, "sai-chc"} + +m["tdd"] = {"Tai Nüa", 36556, "tai-swe", {"Tale"}, translit_module = "Tale-translit"} + +m["tde"] = {"Tiranige Diga Dogon", 5313387, "nic-dgw"} + +m["tdf"] = {"Talieng", 37525108, "mkh-ban"} + +m["tdg"] = {"Western Tamang", 12953178, "sit-tam"} + +m["tdh"] = {"Thulung", 56553, "sit-kiw"} + +m["tdi"] = {"Tomadino", 7818197, "poz-btk", Latn} + +m["tdj"] = {"Tajio", 7676870, "poz"} + +m["tdk"] = {"Tambas", 3440392, "cdc-wst"} + +m["tdl"] = {"Sur", 3914453, "nic-tar"} + +m["tdm"] = {"Taruma", nil} + +m["tdn"] = {"Tondano", 3531514, "phi"} + +m["tdo"] = {"Teme", 3913994, "alv-mye"} + +m["tdq"] = {"Tita", 3914899, "nic-bco"} + +m["tdr"] = {"Todrah", 7812881, "mkh"} + +m["tds"] = {"Doutai", 5302331, "paa-lkp"} + +m["tdt"] = {"Tetun Dili", 12643484, "crp", Latn, ancestors = {"tet"}} + +m["tdu"] = {"Tempasuk Dusun", 3529155, "poz-san"} + +m["tdv"] = {"Toro", 3438367, "nic-alu"} + +m["tdy"] = {"Tadyawan", 7674700, "phi"} + +m["tea"] = {"Temiar", 3914693, "mkh-asl"} + +m["teb"] = {"Tetete", 7706087, "sai-tuc", Latn} + +m["tec"] = {"Terik", 3518379, "sdv-nma"} + +m["ted"] = {"Tepo Krumen", 11152243, "kro-grb"} + +m["tee"] = {"Huehuetla Tepehua", 56455, "nai-ttn"} + +m["tef"] = {"Teressa", 3518362, "aav-nic"} + +m["teg"] = {"Teke-Tege", 36478, "bnt-tek"} + +m["teh"] = {"Tehuelche", 33930, "sai-cho"} + +m["tei"] = {"Torricelli", 3450788, "qfa-tor"} + +m["tek"] = {"Ibali Teke", 2802914, "bnt-tek"} + +m["tem"] = {"Temne", 36613, "alv-mel"} + +m["ten"] = {"Tama (Colombia)", 3832969, "sai-tuc"} + +m["teo"] = {"Ateso", 29474, "sdv-ttu", Latn} + +m["tep"] = {"Tepecano", 3915525, "azc", Latn} + +m["teq"] = {"Temein", 7698064, "sdv"} + +m["ter"] = {"Tereno", 3314742, "awd", Latn} + +m["tes"] = {"Tengger", 12473479, "poz"} + +m["tet"] = {"Tetum", 34125, "poz-tim", Latn} + +m["teu"] = {"Soo", 3437607, "ssa-klk"} + +m["tev"] = {"Teor", 12953198, "poz-cma"} + +m["tew"] = {"Tewa", 56492, "nai-kta", Latn} + +m["tex"] = {"Tennet", 56346, "sdv"} + +m["tey"] = {"Tulishi", 12911106, "qfa-kad", Latn} + +m["tez"] = {"Tetserret", 7706841, "ber", Latn} + +m["tfi"] = {"Tofin Gbe", 3530330, "alv-pph"} + +m["tfn"] = {"Dena'ina", 27785, "ath-nor", Latn} + +m["tfo"] = {"Tefaro", 7694618, "paa-egb"} + +m["tfr"] = {"Teribe", 36533, "cba"} + +m["tft"] = {"Ternate", 3518492, "paa-wpa", {"Latn", "Arab"}} + +m["tga"] = {"Sagalla", 12953082, "bnt-cht"} + +m["tgb"] = {"Tobilung", 12953913, "poz-san"} + +m["tgc"] = {"Tigak", 3528276, "poz-ocw"} + +m["tgd"] = {"Ciwogai", 3438799, "cdc-wst"} + +m["tge"] = {"Eastern Gorkha Tamang", 12953175, "sit-tam"} + +m["tgf"] = {"Chali", 3695197, "sit-ebo", {"Tibt"}} + +m["tgh"] = {"Tobagonian Creole English", 7811541, "crp", ancestors = {"en"}} + +m["tgi"] = {"Lawunuia", 3219937, "poz-ocw"} + +m["tgn"] = {"Tandaganon", nil, "phi"} + +m["tgo"] = {"Sudest", 7675351, "poz-ocw"} + +m["tgp"] = {"Tangoa", 2410276, "poz-vnc", Latn} + +m["tgq"] = {"Tring", 7842360, "poz-swa"} + +m["tgr"] = {"Tareng", 25559541, "mkh"} + +m["tgs"] = {"Nume", 3346290, "poz-vnc"} + +m["tgt"] = {"Central Tagbanwa", 3915515, "phi", {"Tagb"}} + +m["tgu"] = {"Tanggu", 7682930, "paa", Latn} + +m["tgv"] = {"Tingui-Boto", 7808195, "sai-mje"} + +m["tgw"] = {"Tagwana Senoufo", 36514, "alv-tdj"} + +m["tgx"] = {"Tagish", 28064, "ath-nor", Latn} + +m["tgy"] = {"Togoyo", 36825, "nic-ser"} + +m["thc"] = {"Tai Hang Tong", 7675753, "tai-nor"} + +m["thd"] = {"Kuuk Thaayorre", 6448718, "aus-pmn", Latn} + +m["the"] = {"Chitwania Tharu", 22083804, "inc-eas", ancestors = {"inc-mgd"}} + +m["thf"] = {"Thangmi", 7710314, "sit-new"} + +m["thh"] = {"Northern Tarahumara", 15616395, "azc-trc", Latn} + +m["thi"] = {"Tai Long", 25559562, "tai-swe"} + +m["thk"] = {"Tharaka", 15407179, "bnt-kka"} + +m["thl"] = {"Dangaura Tharu", 22083815, "inc-eas", ancestors = {"inc-mgd"}} + +m["thm"] = { + "Thavung", + 34780, + "mkh-vie", + {"Thai"}, -- Laoo is feasible but no evidence yet. + sort_key = {from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}} +} + +m["thn"] = {"Thachanadan", 7708880, "dra"} + +m["thp"] = {"Thompson", 1755054, "sal"} + +m["thq"] = {"Kochila Tharu", 22083826, "inc-eas", ancestors = {"inc-mgd"}} + +m["thr"] = {"Rana Tharu", 12953920, "inc-eas", ancestors = {"inc-mgd"}} + +m["ths"] = {"Thakali", 7709348, "sit-tam"} + +m["tht"] = {"Tahltan", 30125, "ath-nor", Latn} + +m["thu"] = {"Thuri", 7799291, "sdv-lon"} + +m["thv"] = {"Tahaggart Tamahaq", 56703, "ber", {"Tfng", "Arab", "Latn"}, ancestors = {"tmh"}} + +m["thy"] = {"Tha", 3915849, "alv-bwj"} + +m["thz"] = {"Tayart Tamajeq", 56388, "ber", {"Tfng", "Arab", "Latn"}} + +m["tia"] = {"Tidikelt Tamazight", 56392, "ber"} + +m["tic"] = {"Tira", 36677, "alv-hei"} + +m["tif"] = {"Tifal", 11732691, "ngf-okk"} + +m["tig"] = {"Tigre", 34129, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit"} + +m["tih"] = {"Timugon Murut", 7807680, "poz-san"} + +m["tii"] = {"Tiene", 36469, "bnt-tek"} + +m["tij"] = {"Tilung", 7803037, "sit-kiw"} + +m["tik"] = {"Tikar", 36483, "nic-bdn", Latn} + +m["til"] = {"Tillamook", 2109432, "sal"} + +m["tim"] = {"Timbe", 7804599, "ngf"} + +m["tin"] = {"Tindi", 36860, "cau-ava"} + +m["tio"] = {"Teop", 3518239, "poz-ocw"} + +m["tip"] = {"Trimuris", 7842270, "paa-tkw"} + +m["tiq"] = {"Tiéfo", 3914874, "alv-sav"} + +m["tis"] = {"Masadiit Itneg", 18748769, "phi"} + +m["tit"] = {"Tinigua", 3029805} + +m["tiu"] = {"Adasen", 11214797, "phi"} + +m["tiv"] = {"Tiv", 34131, "nic-tvc", Latn} + +m["tiw"] = {"Tiwi", 1656014, "qfa-iso", Latn} + +m["tix"] = {"Southern Tiwa", 7570552, "nai-kta", Latn} + +m["tiy"] = {"Tiruray", 7809425, "phi", Latn} + +m["tiz"] = {"Tai Hongjin", 3915716, "tai-swe"} + +m["tja"] = {"Tajuasohn", 3915326, "kro-wkr"} + +m["tjg"] = {"Tunjung", 3542117, "poz"} + +m["tji"] = {"Northern Tujia", 12953229, "sit-tja"} + +m["tjm"] = {"Timucua", 638300, "qfa-iso"} + +m["tjn"] = {"Tonjon", 3913372, "dmn-jje"} + +m["tjo"] = {"Temacine Tamazight", 3507211, "ber"} + +m["tjs"] = {"Southern Tujia", 12633994, "sit-tja", Latn} + +m["tju"] = {"Tjurruru", 3913834, "aus-nga", Latn} + +m["tjw"] = {"Chaap Wuurong", 5285187, "aus-pam", Latn} + +m["tka"] = {"Truká", 7847648} + +m["tkb"] = {"Buksa", 20983638, "inc-eas", ancestors = {"inc-mgd"}} + +m["tkd"] = {"Tukudede", 36863, "poz-tim", Latn} + +m["tke"] = {"Takwane", 11030092, "bnt-mak", ancestors = {"vmw"}} + +m["tkf"] = {"Tukumanféd", 42330115, "tup-gua", Latn} + +m["tkl"] = {"Tokelauan", 34097, "poz-pnp", Latn} + +m["tkm"] = {"Takelma", 56710} + +m["tkn"] = {"Toku-No-Shima", 3530484, "jpx-ryu", {"Jpan"}} + +m["tkp"] = {"Tikopia", 36682, "poz-pnp", Latn} + +m["tkq"] = {"Tee", 3075144, "nic-ogo", Latn} + +m["tkr"] = {"Tsakhur", 36853, "cau-lzg", {"Cyrl"}} + +m["tks"] = {"Ramandi", 25261947, "xme-ttc", ancestors = {"xme-ttc-sou"}} + +m["tkt"] = {"Kathoriya Tharu", 22083822, "inc-eas", ancestors = {"inc-mgd"}} + +m["tku"] = {"Upper Necaxa Totonac", 56343, "nai-ttn", Latn} + +m["tkv"] = {"Mur Pano", nil, "poz-ocw", Latn} + +m["tkw"] = {"Teanu", 3516731, "poz-oce", Latn} + +m["tkx"] = {"Tangko", 7682993, "ngf-okk"} + +m["tkz"] = {"Takua", 7678544, "mkh"} + +m["tla"] = {"Southwestern Tepehuan", 3518245, "azc", Latn} + +m["tlb"] = {"Tobelo", 1142333, "paa-wpa"} + +m["tlc"] = {"Misantla Totonac", 56460, "nai-ttn", Latn} + +m["tld"] = {"Talaud", 7678964, "phi"} + +m["tlf"] = {"Telefol", 7696150, "ngf-okk"} + +m["tlg"] = {"Tofanma", 4461493, "paa-pau"} + +m["tlh"] = {"Klingon", 10134, "art", Latn, type = "appendix-constructed"} + +m["tli"] = {"Tlingit", 27792, "xnd", {"Latn", "Cyrl"}} + +m["tlj"] = {"Talinga-Bwisi", 7679530, "bnt-haj"} + +m["tlk"] = {"Taloki", 3514563, "poz-btk"} + +m["tll"] = {"Tetela", 2613465, "bnt-tet"} + +m["tlm"] = {"Tolomako", 3130514, "poz-vnc"} + +m["tln"] = {"Talondo'", 7680293, "poz-ssw"} + +m["tlo"] = {"Talodi", 36525, "alv-tal"} + +m["tlp"] = {"Filomena Mata-Coahuitlán Totonac", 5449202, "nai-ttn", Latn} + +m["tlq"] = {"Tai Loi", 7675784, "mkh-pal"} + +m["tlr"] = {"Talise", 3514510, "poz-sls"} + +m["tls"] = {"Tambotalo", 7681065, "poz-vnc"} + +m["tlt"] = {"Teluti", 12953194, "poz-cma"} + +m["tlu"] = {"Tulehu", 7852006, "poz-cma"} + +m["tlv"] = {"Taliabu", 3514498, "poz-cma", Latn} + +m["tlx"] = {"Khehek", 3196124, "poz-aay"} + +m["tly"] = {"Talysh", 34318, "xme-ttc", {"Latn", "Cyrl", "fa-Arab"}, ancestors = {"xme-ttc-pro"}} + +m["tma"] = {"Tama (Chad)", 57001, "sdv-tmn"} + +m["tmb"] = {"Avava", 2157461, "poz-vnc"} + +m["tmc"] = {"Tumak", 3121045, "cdc-est"} + +m["tmd"] = {"Haruai", 12632146, "ngf-mad"} + +m["tme"] = {"Tremembé", 5246937} + +m["tmf"] = {"Toba-Maskoy", 3033544, "sai-mas"} + +m["tmg"] = {"Ternateño", 7232597} + +m["tmh"] = {"Tuareg", 34065, "ber", {"Tfng", "Arab", "Latn"}} + +m["tmi"] = {"Tutuba", 7857052, "poz-vnc"} + +m["tmj"] = {"Samarokena", 7408865, "paa-tkw"} + +m["tmk"] = {"Northwestern Tamang", 15616509, "sit-tam"} + +m["tml"] = {"Tamnim Citak", 12643315, "ngf"} + +m["tmm"] = {"Tai Thanh", 7675842, "tai-swe"} + +m["tmn"] = {"Taman (Indonesia)", 7680671, "poz", Latn} + +m["tmo"] = {"Temoq", 7698205, "mkh-asl"} + +m["tmp"] = {"Tai Mène", 7675790, "tai-nor"} + +m["tmq"] = {"Tumleo", 7852641, "poz-ocw"} + +m["tms"] = {"Tima", 36684, "nic-ktl"} + +m["tmt"] = {"Tasmate", 7687571, "poz-vnc"} + +m["tmu"] = {"Iau", 56867, "paa-lkp"} + +m["tmv"] = {"Motembo", 11013108, "bnt-bun"} + +m["tmy"] = {"Tami", 3514812, "poz-oce"} + +m["tmz"] = {"Tamanaku", 3441435, "sai-car", Latn} + +m["tna"] = {"Tacana", 3182551, "sai-tac", Latn} + +m["tnb"] = {"Western Tunebo", 3181238, "cba"} + +m["tnc"] = {"Tanimuca-Retuarã", 36535, "sai-tuc", Latn} + +m["tnd"] = {"Angosturas Tunebo", 25559604, "cba"} + +m["tne"] = {"Tinoc Kallahan", 3192219} + +m["tng"] = {"Tobanga", 3440501, "cdc-est"} + +m["tnh"] = {"Maiani", 6735243, "ngf-mad", Latn} + +m["tni"] = {"Tandia", 7682454, "poz-hce", Latn} + +m["tnk"] = {"Kwamera", 3200806, "poz-oce"} + +m["tnl"] = {"Lenakel", 3229429, "poz-oce"} + +m["tnm"] = {"Tabla", 7673105, "paa-sen"} + +m["tnn"] = {"North Tanna", 957945, "poz-oce"} + +m["tno"] = {"Toromono", 510544, "sai-tac", Latn} + +m["tnp"] = {"Whitesands", 3063761, "poz-oce"} + +m["tnq"] = {"Taíno", 5232952, "awd-taa", Latn, ancestors = {"awd-taa-pro"}} + +m["tnr"] = {"Bedik", 35096, "alv-ten"} + +m["tns"] = {"Tenis", 7699870, "poz-ocw"} + +m["tnt"] = {"Tontemboan", 3531666, "phi", Latn} + +m["tnu"] = {"Tay Khang", 6362363, "tai"} + +m["tnv"] = {"Tangchangya", 7682361, "inc-eas", {"Cakm"}, ancestors = {"inc-obn"}} + +m["tnw"] = {"Tonsawang", 3531660, "phi"} + +m["tnx"] = {"Tanema", 2106984, "poz-oce"} + +m["tny"] = {"Tongwe", 7821200, "bnt"} + +m["tnz"] = {"Ten'edn", 3073453, "mkh-asl", Latn} + +m["tob"] = {"Toba", 3113756, "sai-guc", Latn} + +m["toc"] = {"Coyutla Totonac", 15615591, "nai-ttn", Latn} + +m["tod"] = {"Toma", 11055484, "dmn-msw"} + +m["tof"] = {"Gizrra", 5565941} + +m["tog"] = {"Tonga (Malawi)", 3847648, "bnt-nys", Latn} + +m["toh"] = {"Tonga (Mozambique)", 7820988, "bnt-bso"} + +m["toi"] = {"Tonga (Zambia)", 34101, "bnt-bot"} + +m["toj"] = {"Tojolabal", 36762, "myn"} + +m["tol"] = {"Tolowa", 20827, "ath-pco", Latn} + +m["tom"] = {"Tombulu", 3531199, "phi"} + +m["too"] = {"Xicotepec de Juárez Totonac", 8044353, "nai-ttn", Latn} + +m["top"] = {"Papantla Totonac", 56329, "nai-ttn", Latn} + +m["toq"] = {"Toposa", 3033588, "sdv-ttu"} + +m["tor"] = {"Togbo-Vara Banda", 11002922, "bad-cnt"} + +m["tos"] = {"Highland Totonac", 13154149, "nai-ttn", Latn} + +m["tou"] = {"Tho", 22694631, "mkh-vie"} + +m["tov"] = {"Upper Taromi", 12953183, "xme-ttc", ancestors = {"xme-ttc-cen"}} + +m["tow"] = {"Jemez", 3912876, "nai-kta", Latn} + +m["tox"] = {"Tobian", 34022, "poz-mic"} + +m["toy"] = {"Topoiyo", 7824977, "poz-kal"} + +m["toz"] = {"To", 7811216, "alv-mbm"} + +m["tpa"] = {"Taupota", 7688832, "poz-ocw"} + +m["tpc"] = {"Azoyú Me'phaa", 25559730, "omq"} + +m["tpe"] = {"Tippera", 16115423, "tbq-bdg"} + +m["tpf"] = {"Tarpia", 12953185, "poz-ocw"} + +m["tpg"] = {"Kula", 6442714, "qfa-tap"} + +m["tpi"] = {"Tok Pisin", 34159, "crp", Latn, ancestors = {"en"}} + +m["tpj"] = {"Tapieté", 3121063} + +m["tpk"] = {"Tupinikin", 33924, "tup-gua"} + +m["tpl"] = {"Tlacoapa Me'phaa", 16115511, "omq"} + +m["tpm"] = {"Tampulma", 36590, "nic-gnw"} + +m["tpn"] = {"Tupinambá", 31528147, "tup-gua", Latn} + +m["tpo"] = {"Tai Pao", 7675795, "tai-nor"} + +m["tpp"] = {"Pisaflores Tepehua", 56349, "nai-ttn"} + +m["tpq"] = {"Tukpa", 12953230, "sit-las"} + +m["tpr"] = {"Tuparí", 3542217, "tup", Latn} + +m["tpt"] = {"Tlachichilco Tepehua", 56330, "nai-ttn"} + +m["tpu"] = {"Tampuan", 3514882, "mkh-ban"} + +m["tpv"] = {"Tanapag", 3397371, "poz-mic"} + +m["tpw"] = {"Old Tupi", 56944, "tup-gua", Latn} + +m["tpx"] = {"Acatepec Me'phaa", 31157882, "omq"} + +m["tpy"] = {"Trumai", 12294279, "qfa-iso"} + +m["tpz"] = {"Tinputz", 3529205, "poz-ocw"} + +m["tqb"] = {"Tembé", 10322157, "tup-gua", Latn} + +m["tql"] = {"Lehali", 3229119, "poz-oce"} + +m["tqm"] = {"Turumsa", 7856508, "paa"} + +m["tqn"] = {"Tenino", 15699255, "nai-shp", Latn, ancestors = {"nai-spt"}} + +m["tqo"] = {"Toaripi", 7811403, "ngf"} + +m["tqp"] = {"Tomoip", 3531388, "poz-ocw"} + +m["tqq"] = {"Tunni", 3514343, "cus"} + +m["tqr"] = {"Torona", 36679, "alv-tal"} + +m["tqt"] = {"Western Totonac", 7116691, "nai-ttn", Latn} + +m["tqu"] = {"Touo", 56750} + +m["tqw"] = {"Tonkawa", 2454881, "qfa-iso"} + +m["tra"] = {"Tirahi", 3812406, "inc-dar"} + +m["trb"] = {"Terebu", 7701797, "poz-ocw"} + +m["trc"] = {"Copala Triqui", 12953935, "omq-tri", Latn} + +m["trd"] = {"Turi", 7854914, "mun"} + +m["tre"] = {"East Tarangan", 18609750, "poz"} + +m["trf"] = {"Trinidadian Creole English", 7842493, "crp", ancestors = {"en"}} + +m["trg"] = {"Lishán Didán", 56473, "sem-nna"} + +m["trh"] = {"Turaka", 12953237, "ngf"} + +m["tri"] = {"Trió", 56885, "sai-car", Latn} + +m["trj"] = {"Toram", 3441225, "cdc-est"} + +m["trl"] = {"Traveller Scottish", 3915671} + +m["trm"] = {"Tregami", 34081, "nur-sou"} + +m["trn"] = {"Trinitario", 3539279, "awd"} + +m["tro"] = {"Tarao", 3515603, "tbq-kuk", Latn} + +m["trp"] = {"Kokborok", 35947, "tbq-bdg"} + +m["trq"] = {"San Martín Itunyoso Triqui", 12953934, "omq-tri", Latn} + +m["trr"] = {"Taushiro", 1957508} + +m["trs"] = {"Chicahuaxtla Triqui", 3539587, "omq-tri", Latn} + +m["trt"] = {"Tunggare", 615071, "paa-egb"} + +m["tru"] = {"Turoyo", 34040, "sem-cna", {"Syrc", "Latn"}, entry_name = {from = {"[" .. u(0x0711, 0x0730, ("-"):byte(), 0x074A) .. "]"}, to = {}}} + +m["trv"] = {"Taroko", 716686, "map-ata", Latn} + +m["trw"] = {"Torwali", 2665246, "inc-dar", {"ur-Arab"}} + +m["trx"] = {"Tringgus", 7842365, "day"} + +m["try"] = {"Turung", 7856514, "tai-swe", {"as-Beng"}} + +m["trz"] = {"Torá", 7827518, "sai-cpc"} + +m["tsa"] = {"Tsaangi", 36675, "bnt-nze"} + +m["tsb"] = {"Tsamai", 2371358, "cus"} + +m["tsc"] = {"Tswa", 2085051, "bnt-tsr"} + +m["tsd"] = { + "Tsakonian", + 220607, + "grk", + {"Grek"}, + sort_key = { -- Keep this synchronized with el, grc, cpg, pnt + from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"}, + to = {"α", "ε", "η", "ι", "ο", "υ", "ω", "ρ", "σ"} + }, + ancestors = {"grc"} +} + +m["tse"] = {"Tunisian Sign Language", 7853191, "sgn"} + +m["tsf"] = {"Southwestern Tamang", 12953176, "sit-tam"} + +m["tsg"] = {"Tausug", 34142, "phi", {"Latn", "Arab"}} + +m["tsh"] = {"Tsuvan", 3502326, "cdc-cbm"} + +m["tsi"] = {"Tsimshian", 20085721, "nai-tsi"} + +m["tsj"] = {"Tshangla", 36840, "sit-tsk", {"Latn", "Deva", "Tibt"}} + +m["tsl"] = {"Ts'ün-Lao", 3446675, "tai"} + +m["tsm"] = {"Turkish Sign Language", 36885, "sgn"} + +m["tsp"] = {"Northern Toussian", 11155635, "alv-sav"} + +m["tsq"] = {"Thai Sign Language", 7709156, "sgn"} + +m["tsr"] = {"Akei", 2828964, "poz-vnc"} + +m["tss"] = {"Taiwan Sign Language", 34019, "sgn-jsl"} + +m["tsu"] = {"Tsou", 716681, "map", Latn} + +m["tsv"] = {"Tsogo", 36674, "bnt-tso"} + +m["tsw"] = {"Tsishingini", 13123571, "nic-kam"} + +m["tsx"] = {"Mubami", 6930815, "ngf"} + +m["tsy"] = {"Tebul Sign Language", 7692090, "sgn"} + +m["tta"] = {"Tutelo", 2311602, "sio-ohv"} + +m["ttb"] = {"Gaa", 3438361, "nic-dak"} + +m["ttc"] = {"Tektiteko", 36686, "myn"} + +m["ttd"] = {"Tauade", 7688634} + +m["tte"] = {"Bwanabwana", 5003667, "poz-ocw", Latn} + +m["ttf"] = {"Tuotomb", 7853459, "nic-mbw", Latn} + +m["ttg"] = {"Tutong", 3507990, "poz-swa"} + +m["tth"] = {"Upper Ta'oih", 3512660, "mkh-kat"} + +m["tti"] = {"Tobati", 7811556, "poz-ocw"} + +m["ttj"] = {"Tooro", 7824218, "bnt-nyg"} + +m["ttk"] = {"Totoro", 3532756, "sai-bar"} + +m["ttl"] = {"Totela", 10962316, "bnt-bot"} + +m["ttm"] = {"Northern Tutchone", 20822, "ath-nor", Latn} + +m["ttn"] = {"Towei", 7829606, "paa-pau"} + +m["tto"] = {"Lower Ta'oih", 25559539, "mkh-kat"} + +m["ttp"] = {"Tombelala", 6799663, "poz-kal"} + +m["ttq"] = {"Tawallammat Tamajaq", 56390, "ber", {"Tfng", "Arab", "Latn"}, ancestors = {"tmh"}} + +m["ttr"] = {"Tera", 56267, "cdc-cbm"} + +m["tts"] = {"Isan", 33417, "tai-swe", {"Thai"}, sort_key = {from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["ttt"] = {"Tat", 56489, "ira-swi", {"Cyrl", "Latn", "Armn", "fa-Arab"}, ancestors = {"fa"}} + +m["ttu"] = {"Torau", 3532208, "poz-ocw"} + +m["ttv"] = {"Titan", 3445811, "poz-aay"} + +m["ttw"] = {"Long Wat", 7856961, "poz-swa"} + +m["tty"] = {"Sikaritai", 7513600, "paa-lkp"} + +m["ttz"] = {"Tsum", 12953223, "sit-kyk"} + +m["tua"] = {"Wiarumus", 7998045, "qfa-tor", Latn} + +m["tub"] = {"Tübatulabal", 56704, "azc", Latn} + +m["tuc"] = {"Mutu", 3331003, "poz-ocw", Latn} + +m["tud"] = {"Tuxá", 7857217} + +m["tue"] = {"Tuyuca", 2520538, "sai-tuc", Latn} + +m["tuf"] = {"Central Tunebo", 12953942, "cba"} + +m["tug"] = {"Tunia", 863721, "alv-bua"} + +m["tuh"] = {"Taulil", 3516141, "paa-bng"} + +m["tui"] = {"Tupuri", 36646, "alv-mbm", Latn} + +m["tuj"] = {"Tugutil", 12953228} + +m["tul"] = {"Tula", 3914907, "alv-wjk"} + +m["tum"] = {"Tumbuka", 34138, "bnt-nys", Latn} + +m["tun"] = {"Tunica", 56619, "qfa-iso", Latn} + +m["tuo"] = {"Tucano", 3541834, "sai-tuc", Latn} + +m["tuq"] = {"Tedaga", 36639, "ssa-sah"} + +m["tus"] = {"Tuscarora", 36944, "iro", {"Latinx"}} + +m["tuu"] = {"Tututni", 20627, "ath-pco", Latn} + +m["tuv"] = {"Turkana", 36958, "sdv-ttu", Latn} + +m["tux"] = {"Tuxináwa", 7857204, "sai-pan", Latn} + +m["tuy"] = {"Tugen", 3541935, "sdv-nma"} + +m["tuz"] = {"Turka", 36643, "nic-gur", Latn} + +m["tva"] = {"Vaghua", 3553248, "poz-ocw", Latn} + +m["tvd"] = {"Tsuvadi", 3914936, "nic-kam"} + +m["tve"] = {"Te'un", 7690709, "poz-cet", Latn} + +m["tvk"] = {"Southeast Ambrym", 252411, "poz-vnc", Latn} + +m["tvl"] = {"Tuvaluan", 34055, "poz-pnp", Latn} + +m["tvm"] = {"Tela-Masbuar", 7695666, "poz-tim"} + +m["tvn"] = {"Tavoyan", 7689158, "tbq-brm", {"Mymr"}, ancestors = {"obr"}} + +m["tvo"] = {"Tidore", 3528199, "paa-wpa"} + +m["tvs"] = {"Taveta", 15632387, "bnt-par"} + +m["tvt"] = {"Tutsa Naga", 7856987, "sit-tno"} + +m["tvu"] = {"Tunen", 36632, "nic-mbw"} + +m["tvw"] = {"Sedoa", 7445362, "poz-kal"} + +m["tvx"] = {"Taivoan", 1975271, "map", Latn} + +m["tvy"] = {"Timor Pidgin", 4904029, "crp", ancestors = {"pt"}} + +m["twa"] = {"Twana", 7857412, "sal"} + +m["twb"] = {"Western Tawbuid", 12953912, "phi"} + +m["twc"] = {"Teshenawa", 3436597, "phi"} + +m["twe"] = {"Teiwa", 3519302, "ngf", Latn} + +m["twf"] = {"Taos", 7684320, "nai-kta", Latn} + +m["twg"] = {"Tereweng", 12953200, "qfa-tap"} + +m["twh"] = { + "Tai Dón", + 7675751, + "tai-swe", + {"Tavt"}, + -- translit_module = "Tavt-translit", + sort_key = {from = {"[꪿ꫀ꫁ꫂ]", "([ꪵꪶꪹꪻꪼ])([ꪀ-ꪯ])"}, to = {"", "%2%1"}} +} + +m["twm"] = {"Tawang Monpa", 36586, "sit-ebo"} + +m["twn"] = {"Twendi", 7857682, "nic-mmb"} + +m["two"] = {"Tswapong", 3446241, "bnt-sts"} + +m["twp"] = {"Ere", 3056045, "poz-aay", Latn} + +m["twq"] = {"Tasawaq", 36564, "son"} + +m["twr"] = {"Southwestern Tarahumara", 12953909, "azc-trc", Latn} + +m["twt"] = {"Turiwára", 3542307, "tup-gua", Latn} + +m["twu"] = {"Termanu", 7702572, "poz-tim"} + +m["tww"] = {"Tuwari", 7857159, "paa-spk"} + +m["twy"] = {"Tawoyan", 3513542, "poz-bre"} + +m["txa"] = {"Tombonuo", 7818692, "poz-san"} + +m["txb"] = { + "Tocharian B", + 3199353, + "ine-toc", + Latn, + wikipedia_article = "Tocharian languages", -- wikidata id has no associated article + standardChars = "ACEIK-PR-UWYaceik-pr-uwyÄäĀāṂṃṄṅÑñŚśṢṣ" .. PUNCTUATION +} + +m["txc"] = {"Tsetsaut", 20829, "ath-nor", Latn} + +m["txe"] = {"Totoli", 7828387, "poz-tot", Latn} + +m["txg"] = {"Tangut", 2727930, "sit-qia", {"Tang"}} + +m["txj"] = {"Tarjumo", nil, "ssa-sah", {"Latn", "Arab"}} + +m["txh"] = {"Thracian", 36793, "ine", {"Grek"}, translit_module = "el-translit"} + +m["txi"] = {"Ikpeng", 9344891, "sai-car", Latn} + +m["txm"] = {"Tomini", 7818911, "poz"} + +m["txn"] = {"West Tarangan", 3515594, "poz"} + +m["txo"] = {"Toto", 36709, "sit-dhi"} + +m["txq"] = {"Tii", 7801784, "poz-tim"} + +m["txr"] = {"Tartessian", 36795} + +m["txs"] = {"Tonsea", 3531659, "phi"} + +m["txt"] = {"Citak", 3447279, "ngf"} + +m["txu"] = {"Kayapó", 3101212, "sai-nje", Latn} + +m["txx"] = {"Tatana", 18643518, "poz-san"} + +m["tya"] = {"Tauya", 7688978, "ngf-mad"} + +m["tye"] = {"Kyenga", 3913304, "dmn-bbu", Latn} + +m["tyh"] = {"O'du", 3347428, "mkh"} + +m["tyi"] = {"Teke-Tsaayi", 33123613, "bnt-nze"} + +m["tyj"] = {"Tai Do", 7675746, "tai-nor"} + +m["tyl"] = {"Thu Lao", 12953921, "tai-cen"} + +m["tyn"] = {"Kombai", 6428241, "ngf"} + +m["typ"] = {"Kuku-Thaypan", 3915693, "aus-pmn", Latn} + +m["tyr"] = {"Tai Daeng", 3915207, "tai-swe"} + +m["tys"] = {"Sapa", 3446668, "tai-sap", Latn} + +m["tyt"] = {"Tày Tac", 7862029, "tai-swe"} + +m["tyu"] = {"Kua", 3832933, "khi-kal"} + +m["tyv"] = {"Tuvan", 34119, "trk-sib", {"Cyrl"}, translit_module = "tyv-translit", override_translit = true} + +m["tyx"] = {"Teke-Tyee", 36634, "bnt-nze"} + +m["tyz"] = { + "Tày", -- This does not mean its umbrella "Tai" languages. + 2511476, "tai-tay", Latn +} + +m["tza"] = {"Tanzanian Sign Language", 7684177, "sgn"} + +m["tzh"] = {"Tzeltal", 36808, "myn", Latn} + +m["tzj"] = {"Tz'utujil", 36941, "myn", Latn} + +m["tzl"] = { + "Talossan", + 1063911, + "art", + Latn, + type = "appendix-constructed", + sort_key = {from = {"[àáâäå]", "ç", "ð", "[ëèéê]", "[ìíîï]", "ñ", "[öòóô]", "ß", "[üùúû]", "þ"}, to = {"a", "c", "d∙", "e", "i", "n", "o", "s", "u", "z∙"}} -- bullet so ð and þ are sorted after all other words with d and z, respectively +} + +m["tzm"] = {"Central Atlas Tamazight", 49741, "ber", {"Tfng", "Arab", "Latn"}, translit_module = "Tfng-translit"} + +m["tzn"] = {"Tugun", 12953225, "poz-tim"} + +m["tzo"] = {"Tzotzil", 36809, "myn", Latn} + +m["tzx"] = {"Tabriak", 56872, "paa-lsp", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/u.lua b/wiktra/wikt/translit/languages/data3/u.lua new file mode 100644 index 0000000..5229c84 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/u.lua @@ -0,0 +1,283 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["uam"] = {"Uamué", 3441418} + +m["uan"] = {"Kuan", 6441085} + +m["uar"] = {"Tairuma", 7676386, "ngf"} + +m["uba"] = {"Ubang", 3914467, "nic-ben", Latn} + +m["ubi"] = {"Ubi", 56264} + +m["ubl"] = {"Buhi'non Bikol", 18664494, "phi"} + +m["ubr"] = {"Ubir", 3547642, "poz-ocw", Latn} + +m["ubu"] = {"Umbu-Ungu", 12953245, "ngf"} + +m["uby"] = {"Ubykh", 36931, "cau-nwc", {"Latn", "Cyrl"}} + +m["uda"] = {"Uda", 11011951, "nic-lcr"} + +m["ude"] = {"Udihe", 13235, "tuw", {"Cyrl"}} + +m["udg"] = {"Muduga", 16886762, "dra"} + +m["udi"] = {"Udi", 36867, "cau-nec", {"Cyrl", "Latn", "Armn", "Geor"}, ancestors = {"xag"}, translit_module = "translit-redirect", override_translit = true} + +m["udj"] = {"Ujir", 14916906, "poz-cet"} + +m["udl"] = {"Uldeme", 3515078, "cdc-cbm"} + +m["udm"] = {"Udmurt", 13238, "urj-prm", {"Cyrl"}, translit_module = "udm-translit", override_translit = true} + +m["udu"] = {"Uduk", 3182573, "ssa-kom"} + +m["ues"] = {"Kioko", 18343036} + +m["ufi"] = {"Ufim", 7877531, "ngf-fin", Latn} + +m["uga"] = {"Ugaritic", 36928, "sem-nwe", {"Ugar"}, translit_module = "uga-translit"} + +m["ugb"] = {"Kuku-Ugbanh", 10549854} + +m["uge"] = {"Ughele", 966303, "poz-ocw"} + +m["ugn"] = {"Ugandan Sign Language", 7877677, "sgn"} + +m["ugo"] = {"Gong", 3448919, "tbq-lob", {"Thai"}, sort_key = {from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["ugy"] = {"Uruguayan Sign Language", 7901470, "sgn"} + +m["uha"] = {"Uhami", 3913328, "alv-nwd", Latn} + +m["uhn"] = {"Damal", 4748974} + +m["uis"] = {"Uisai", 7878123, "paa-sbo"} + +m["uiv"] = {"Iyive", 11128658, "nic-tvc", Latn} + +m["uji"] = {"Tanjijili", 3914939, "nic-pls"} + +m["uka"] = {"Kaburi", 6344482} + +m["ukg"] = {"Ukuriguma", 7878623, "ngf-mad"} + +m["ukh"] = {"Ukhwejo", 36623, "bnt-bek"} + +m["ukk"] = {"Muak Sa-aak", nil, "mkh-pal"} + +m["ukl"] = {"Ukrainian Sign Language", 10322106, "sgn"} + +m["ukp"] = {"Ukpe-Bayobiri", 3914470, "nic-ben", Latn} + +m["ukq"] = {"Ukwa", 7878635, "nic-ief"} + +m["uks"] = {"Kaapor Sign Language", 3322101, "sgn"} + +m["uku"] = {"Ukue", 3913387, "alv-nwd", Latn} + +m["ukw"] = {"Ukwuani-Aboh-Ndoni", 36636, "alv", Latn} + +m["uky"] = {"Kuuk Yak", 6448719, "aus-psw", Latn} + +m["ula"] = {"Fungwa", 5509187, "nic-shi"} + +m["ulb"] = {"Ulukwumi", 36722, "alv-yor"} + +m["ulc"] = {"Ulch", 13239, "tuw", {"Cyrl", "Latn"}} + +m["ule"] = {"Lule", 12635889, nil, Latn} + +m["ulf"] = {"Afra", 4477735, "paa-pau"} + +m["uli"] = {"Ulithian", 36842, "poz-mic"} + +m["ulk"] = {"Meriam", 788174, "ngf", Latn} + +m["ull"] = {"Ullatan", 8761579, "dra"} + +m["ulm"] = {"Ulumanda'", 3501892} + +m["uln"] = {"Unserdeutsch", 13244, "crp", Latn, ancestors = {"de"}} + +m["ulu"] = {"Uma' Lung", 3548186, "poz-swa"} + +m["ulw"] = {"Ulwa", 2405552} + +m["uma"] = {"Umatilla", 12953952, "nai-shp", Latn, ancestors = {"nai-spt"}} + +m["umb"] = {"Umbundu", 36983, "bnt", Latn} + +m["umc"] = {"Marrucinian", 36110, "itc", {"Latn", "Ital"}} + +m["umd"] = {"Umbindhamu", 7881346, "aus-pmn"} + +m["umg"] = {"Umbuygamu", 3915677, "aus-pmn"} + +m["umi"] = {"Ukit", 7878321} + +m["umm"] = {"Umon", 3915448, "nic-ucn", Latn} + +m["umn"] = {"Makyan Naga", 6740516, "sit-kch"} + +m["umo"] = {"Umotína", 7881740, "sai-mje"} + +m["ump"] = {"Umpila", 12953954, "aus-pmn", Latn} + +m["umr"] = {"Umbugarla", 2980392} + +m["ums"] = {"Pendau", 7162371, "poz-tot"} + +m["umu"] = {"Munsee", 56547, "del", Latn, entry_name = {from = {"Á", "Ă", "É", "Í", "Ó", "Ú", "Ŭ", "á", "á", "ă", "é", "í", "ó", "ú", "ŭ"}, to = {"A", "A", "E", "I", "O", "U", "U", "a", "á", "a", "e", "i", "o", "u", "u"}}} + +m["una"] = {"North Watut", 15887898, "poz-ocw", Latn} + +m["und"] = {"Undetermined", nil, "qfa-not", {"Zyyy"}} + +m["une"] = {"Uneme", 3913357, "alv-yek", Latn} + +m["ung"] = {"Ngarinyin", 1284885, "aus-wor", Latn} + +m["unk"] = {"Enawené-Nawé", 3307184, "awd", Latn} + +m["unm"] = { + "Unami", 3549180, "del", Latn + -- Don't strip diacritics from entry names, per [[WT:Grease pit/2020/May]]. + -- entry_name = { + -- from = {"À", "Ä", "È", "Ë", "Ì", "Ò", "Ù", "à", "ä", "è", "ë", "ì", "ò", "ù"}, + -- to = {"A", "A", "E", "E", "I", "O", "U", "a", "a", "e", "e", "i", "o", "u"}} , +} + +m["unn"] = {"Kurnai", nil, "aus-pam", Latn} + +m["unr"] = { + "Mundari", + 3327828, + "mun", + {"Deva"}, + translit_module = "hi-translit" -- for now +} + +m["unu"] = {"Unubahe", 7897776} + +m["unx"] = {"Munda", 36264959, "mun", Latn} + +m["unz"] = {"Unde Kaili", 12953596, "poz-kal", Latn} + +m["uok"] = {"Uokha", 3441216, "alv-edo", Latn} + +m["upi"] = {"Umeda", 7881465, "paa-brd"} + +m["upv"] = {"Uripiv-Wala-Rano-Atchin", 13249, "poz-vnc", Latn} + +m["ura"] = {"Urarina", 1579560} + +m["urb"] = {"Urubú-Kaapor", 13893353, "tup-gua", Latn} + +m["urc"] = {"Urningangg", 10710522} + +m["ure"] = {"Uru", 2992892} + +m["urf"] = {"Uradhi", 3915680, "aus-pam", Latn} + +m["urg"] = {"Urigina", 7900603, "ngf", Latn} + +m["urh"] = {"Urhobo", 36663, "alv-swd", Latn} + +m["uri"] = {"Urim", 7900609, "qfa-tor", Latn} + +m["urk"] = {"Urak Lawoi'", 7899573, "poz-mly", {"Thai"}, sort_key = {from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}} + +m["url"] = {"Urali", 7899602, "dra", {"Knda"}} + +m["urm"] = {"Urapmin", 7899769, "ngf-okk"} + +m["urn"] = {"Uruangnirin", 7901389, "poz-cet", Latn} + +m["uro"] = {"Ura (New Guinea)", 3121049, "paa-bng", Latn} + +m["urp"] = {"Uru-Pa-In", 7901376, "tup-gua", Latn} + +m["urr"] = {"Lehalurup", 3272124} + +m["urt"] = {"Urat", 3502084, "qfa-tor", Latn} + +m["uru"] = {"Urumi", 7901530, "tup", Latn} + +m["urv"] = {"Uruava", 36875, "poz-ocw", Latn} + +m["urw"] = {"Sop", 7562808, "ngf-mad", Latn} + +m["urx"] = {"Urimo", 7900611, "qfa-tor", Latn} + +m["ury"] = {"Orya", 7105295, "paa-tkw", Latn} + +m["urz"] = {"Uru-Eu-Wau-Wau", 10266012, "tup-gua", Latn} + +m["usa"] = {"Usarufa", 7901714, "paa-kag", Latn} + +m["ush"] = {"Ushojo", 3540446} + +m["usi"] = {"Usui", 12644231} + +m["usk"] = {"Usaghade", 3914048, "nic-lcr", Latn} + +m["usp"] = {"Uspanteco", 36728, "myn", Latn} + +m["uss"] = {"Saare", nil, "nic-knn", Latn} + +m["usu"] = {"Uya", 7904082} + +m["uta"] = {"Otank", 3913990, "nic-tvc", Latn} + +m["ute"] = {"Ute", 13260, "azc-num", Latn} + +m["uth"] = {"Hun", nil, "nic-knn", Latn} + +m["utp"] = {"Aba", 2841465, "poz-oce", Latn} + +m["utr"] = {"Etulo", 35262, "alv-ido", Latn} + +m["utu"] = {"Utu", 7903469, "ngf-mad"} + +m["uum"] = {"Urum", 13257, "trk-kcu", {"Cyrl"}} + +m["uun"] = {"Kulon-Pazeh", 36435, "map", Latn} + +m["uur"] = {"Ura (Vanuatu)", 7899531, "poz-oce", Latn} + +m["uuu"] = {"U", 953082, "mkh-pal"} + +m["uve"] = {"West Uvean", 36837, "poz-pnp", Latn} + +m["uvh"] = {"Uri", 7900540, "ngf-fin", Latn} + +m["uvl"] = {"Lote", 3259972, "poz-ocw", Latn} + +m["uwa"] = {"Kuku-Uwanh", 3915687, "aus-pmn"} + +m["uya"] = {"Doko-Uyanga", 7904095, "nic-ucr", Latn} + +return m diff --git a/wiktra/wikt/translit/languages/data3/v.lua b/wiktra/wikt/translit/languages/data3/v.lua new file mode 100644 index 0000000..2340851 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/v.lua @@ -0,0 +1,195 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["vaa"] = {"Vaagri Booli", 7907798} + +m["vae"] = {"Vale", 3450194, "csu-val"} + +m["vag"] = {"Vagla", 36637, "nic-gnw"} + +m["vah"] = {"Varhadi-Nagpuri", 155645, "inc-sou", ancestors = {"mr"}} + +m["vai"] = {"Vai", 36939, "dmn-vak", {"Vaii"}, translit_module = "Vaii-translit"} + +m["vaj"] = {"Sekele", 56528} + +m["val"] = {"Vehes", 7918407} + +m["vam"] = {"Vanimo", 3327415, "paa-msk"} + +m["van"] = {"Valman", 7912479, "qfa-tor"} + +m["vao"] = {"Vao", 2160405, "poz-vnc"} + +m["vap"] = {"Vaiphei", 56368, "tbq-kuk"} + +m["var"] = {"Huarijio", 10974017, "azc-trc", Latn} + +m["vas"] = {"Vasavi", 765418} + +m["vau"] = {"Vanuma", 7915259, "bnt-nya"} + +m["vav"] = {"Varli", 7915983, "inc-sou", {"Deva", "Gujr"}} + +m["vay"] = {"Vayu", 7917585, "sit-kiw"} + +m["vbb"] = {"Southeast Babar", 12952247, "poz-tim"} + +m["vbk"] = {"Southwestern Bontoc", nil, "phi", Latn} + +m["vec"] = {"Venetian", 32724, "roa", Latn} + +m["ved"] = {"Veddah", 2567934} + +m["vem"] = {"Vemgo-Mabas", 56268} + +m["veo"] = {"Ventureño", 56712, "nai-chu", Latn} + +m["vep"] = {"Veps", 32747, "fiu-fin", Latn} + +m["ver"] = {"Mom Jango", 35862, "alv-dur"} + +m["vgr"] = {"Vaghri", 7908480, "inc-bhi", {"Gujr"}, translit_module = "gu-translit"} + +m["vgt"] = {"Flemish Sign Language", 2107617, "sgn"} + +m["vic"] = {"Virgin Islands Creole", 7933935, "crp", Latn, ancestors = {"en"}} + +m["vid"] = {"Vidunda", 7928151, "bnt-ruv"} + +m["vif"] = {"Vili", 3558409, "bnt-kng"} + +m["vig"] = {"Viemo", 36912, "alv-sav"} + +m["vil"] = {"Vilela", 3409297} + +m["vis"] = {"Vishavan", 14916908, "dra"} + +m["vit"] = {"Viti", 11011055, "nic-grf"} + +m["viv"] = {"Iduna", 5989839, "poz-ocw"} + +m["vka"] = {"Kariyarra", 13586632, "aus-nga", Latn} + +m["vki"] = {"Ija-Zuba", 11011389, "nic-pls", ancestors = {"uji"}} + +m["vkj"] = {"Kujarge", 33448} + +m["vkk"] = {"Kaur", 6378867} + +m["vkl"] = {"Kulisusu", 3200326, "poz-btk"} + +m["vkm"] = {"Kamakan", 3192316, "sai-mje", Latn} + +m["vko"] = {"Kodeoha", 3198209} + +m["vkp"] = {"Korlai Creole Portuguese", 3915520, "crp", Latn, ancestors = {"idb"}} + +m["vkt"] = {"Tenggarong Kutai Malay", 12683226} + +m["vku"] = {"Kurrama", 3915684, "aus-nga", Latn} + +m["vlp"] = {"Valpei", 7912582, "poz-vnc"} + +m["vls"] = {"West Flemish", 100103, "gmw", Latn, ancestors = {"dum"}} + +m["vma"] = {"Martuthunira", 975399, "aus-nga", Latn} + +m["vmb"] = {"Mbabaram", 3303475, "aus-pam", Latn} + +m["vmc"] = {"Juxtlahuaca Mixtec", 25559582, "omq-mxt", Latn} + +m["vmd"] = {"Mudu Koraga", 12952656, "dra", {"Knda"}} + +m["vme"] = {"East Masela", 18487451, "poz-tim"} + +m["vmf"] = {"East Franconian", 497345, "gmw", Latn, ancestors = {"gmh"}, sort_key = {from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]", "ß"}, to = {"a", "e", "i", "o", "u", "ss"}}} + +m["vmg"] = {"Minigir", 17053237, "poz-ocw", Latn} + +m["vmh"] = {"Maraghei", 36220, "xme-ttc", ancestors = {"xme-ttc-eas"}} + +m["vmi"] = {"Miwa", 10586712, "aus-wor"} + +m["vmj"] = {"Ixtayutla Mixtec", 6101163, "omq-mxt", Latn} + +m["vmk"] = {"Makhuwa-Shirima", 2963909, "bnt-mak", Latn, ancestors = {"vmw"}} + +m["vml"] = {"Malgana", 6743201, "aus-psw", Latn} + +m["vmm"] = {"Mitlatongo Mixtec", 6881813, "omq-mxt", Latn} + +m["vmp"] = {"Soyaltepec Mazatec", 7572000, nil, Latn} + +m["vmq"] = {"Soyaltepec Mixtec", 7572001, "omq-mxt", Latn} + +m["vmr"] = {"Marenje", 11128833, ancestors = {"vmw"}, "bnt-mak"} + +-- vms "Moskela" is extinct and unattested; see Wikipedia + +m["vmu"] = {"Muluridyi", 10590149} + +m["vmv"] = {"Valley Maidu", 5096458, "nai-mdu", Latn} + +m["vmw"] = {"Makhuwa", 33882, "bnt-mak", Latn} + +m["vmx"] = {"Tamazola Mixtec", 12953734, "omq-mxt", Latn} + +m["vmy"] = {"Ayautla Mazatec", 14916912, nil, Latn} + +m["vmz"] = {"Mazatlán Mazatec", 12953706, "omq-maz", Latn} + +m["vnk"] = {"Lovono", 3211090, "poz-oce"} + +m["vnm"] = {"Neve'ei", 2157431, "poz-vnc"} + +m["vnp"] = {"Vunapu", 7943647, "poz-vnc"} + +m["vor"] = {"Voro", 3914407, "alv-yun"} + +m["vot"] = {"Votic", 32858, "fiu-fin", Latn} + +m["vra"] = {"Vera'a", 3555689, nil, Latn} + +m["vro"] = {"Võro", 32762, "fiu-fin", Latn, wikimedia_codes = {"fiu-vro"}} + +m["vrs"] = {"Varisi", 3554807, "poz-ocw"} + +m["vrt"] = {"Banam Bay", 2928522, "poz-vnc"} + +m["vsi"] = {"Moldova Sign Language", 12953478, "sgn"} + +m["vsl"] = {"Venezuelan Sign Language", 3322064, "sgn"} + +m["vsv"] = {"Valencian Sign Language", 32663, "sgn"} + +m["vto"] = {"Vitou", 7937210, "paa-tkw"} + +m["vum"] = {"Vumbu", 36629, "bnt-sir"} + +m["vun"] = {"Vunjo", 12953261, "bnt-chg", Latn} + +m["vut"] = {"Vute", 36897, "nic-mmb", Latn} + +m["vwa"] = {"Awa (China)", 2874642, "mkh-pal"} + +return m diff --git a/wiktra/wikt/translit/languages/data3/w.lua b/wiktra/wikt/translit/languages/data3/w.lua new file mode 100644 index 0000000..ee5e95e --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/w.lua @@ -0,0 +1,439 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["waa"] = {"Walla Walla", 12953960, "nai-shp", Latn, ancestors = {"nai-spt"}} + +m["wab"] = {"Wab", 11222271, "poz-ocw", Latn} + +m["wac"] = {"Wasco-Wishram", 12645081, "nai-ckn", Latn} + +m["wad"] = {"Wandamen", 2806128, "poz-hce", Latn} + +m["waf"] = {"Wakoná", 7961205} + +m["wag"] = {"Wa'ema", 12953264, "poz-ocw", Latn} + +m["wah"] = {"Watubela", 7975070, "poz-cma", Latn} + +m["waj"] = {"Waffa", 3565058, "paa-kag", Latn} + +m["wal"] = {"Wolaytta", 36943, "omv-nom"} + +m["wam"] = {"Massachusett", 56519, "alg-eas", Latn} + +m["wan"] = {"Wan", 3913272, "dmn-nbe"} + +m["wao"] = {"Wappo", 56530} + +m["wap"] = {"Wapishana", 3450493, "awd", Latn} + +m["waq"] = {"Wageman", 3436843, "aus-gun", Latn} + +m["war"] = {"Waray-Waray", 34279, "phi", Latn} + +m["was"] = {"Washo", 34198} + +m["wat"] = {"Kaninuwa", 12952565, "poz-ocw", Latn} + +m["wau"] = {"Wauja", 3450522, "awd", Latn} + +m["wav"] = {"Waka", 3913394, "alv-mye"} + +m["waw"] = {"Waiwai", 56632, "sai-car", Latn} + +m["wax"] = {"Watam", 3566597, "paa", Latn} + +m["way"] = {"Wayana", 5908753, "sai-car", Latn} + +m["waz"] = {"Wampur", 7966957, "poz-ocw", Latn} + +m["wba"] = {"Warao", 36946, "qfa-iso", Latn} + +m["wbb"] = {"Wabo", 7958701, "poz-hce", Latn} + +m["wbe"] = {"Waritai", 7969453, "paa-lkp", Latn} + +m["wbf"] = {"Wara", 3914052, "alv-wan"} + +m["wbh"] = {"Wanda", 7967153, "bnt-mwi"} + +m["wbi"] = {"Wanji", 3376818, "bnt-bki", Latn} + +m["wbj"] = {"Alagwa", 56621, "cus"} + +m["wbk"] = {"Waigali", 34196, "nur-sou"} + +m["wbl"] = {"Wakhi", 34208, "xsc-skw", {"Cyrl", "Latn", "Arab"}, translit_module = "translit-redirect"} + +m["wbm"] = {"Wa", 12644869, "mkh-pal"} + +m["wbp"] = {"Warlpiri", 1639998, "aus-pam", Latn} + +m["wbq"] = {"Waddar", 6708569, "dra", ancestors = {"te"}} + +m["wbr"] = {"Wagdi", 7959490, "inc-bhi"} + +m["wbt"] = {"Wanman", 7967989} + +m["wbv"] = {"Wajarri", 3913856, "aus-psw", Latn} + +m["wbw"] = {"Woi", 8029092, "poz-hce", Latn} + +m["wca"] = {"Yanomámi", 7960056} + +m["wci"] = {"Waci Gbe", 36987, "alv-gbe"} + +m["wdd"] = {"Wandji", 36976, "bnt-nze"} + +m["wdg"] = {"Wadaginam", 7958930} + +m["wdj"] = {"Wadjiginy", 7959489} + +m["wdu"] = {"Wadjigu", 10719025} + +m["wdy"] = {"Wadjabangayi", nil} + +m["wea"] = {"Wewaw", 15895870} + +m["wec"] = {"Wè Western", 11159067, "kro-wee"} + +m["wed"] = {"Wedau", 12953294, "poz-ocw", Latn} + +m["weh"] = {"Weh", 7979690, "nic-rnw"} + +m["wei"] = {"Kiunum", 7983230} + +m["wem"] = {"Weme Gbe", 18379970, "alv-gbe"} + +m["weo"] = {"Wemale", 7982165, "poz-cma"} + +m["wer"] = {"Weri", 11732752, "paa"} + +m["wes"] = {"Cameroon Pidgin", 35541, "crp", Latn, ancestors = {"en"}} + +m["wet"] = {"Perai", 12953035, "poz-tim"} + +m["weu"] = {"Welaung", 7980503, "tbq-kuk"} + +m["wew"] = {"Weyewa", 4314526, "poz-cet", Latn} + +m["wfg"] = {"Yafi", 8074520, "paa-pau"} + +m["wga"] = {"Wagaya", 7959487, "aus-pam"} + +m["wgb"] = {"Wagawaga", 7959485} + +m["wgg"] = {"Wangganguru", 7967859, "aus-kar", Latn} + +m["wgi"] = {"Wahgi", 3565122} + +m["wgo"] = {"Waigeo", 7959937, "poz-hce"} + +m["wgu"] = {"Wirangu", 2092286, "aus-pam", Latn} + +m["wgy"] = {"Warrgamay", 3915942, "aus-pam", Latn} + +m["wha"] = {"Manusela", 3287127, "poz-cma"} + +m["whg"] = {"North Wahgi", 12953273, "ngf"} + +m["whk"] = {"Wahau Kenyah", 7959737, "poz-swa"} + +m["whu"] = {"Wahau Kayan", 12473397} + +m["wib"] = {"Southern Toussian", 11158982, "alv-sav"} + +m["wic"] = {"Wichita", 56513, "cdd", Latn} + +m["wie"] = {"Wik-Epa", 10720035, "aus-pmn"} + +m["wif"] = {"Wik-Keyangan", 10720037, "aus-pmn"} + +m["wig"] = {"Wik-Ngathana", 3915695, "aus-pmn"} + +m["wih"] = {"Wik-Me'anha", 10720039, "aus-pmn"} + +m["wii"] = {"Minidien", 6865237, "qfa-tor", Latn} + +m["wij"] = {"Wik-Iiyanh", 10720036, "aus-pmn"} + +m["wik"] = {"Wikalkan", 7999800, "aus-pmn"} + +m["wil"] = {"Wilawila", 10720050, "aus-wor"} + +m["wim"] = {"Wik-Mungkan", 2092246, "aus-pmn", Latn} + +m["win"] = {"Winnebago", 1957108, "sio-msv", Latn} + +m["wir"] = {"Wiraféd", 12953970, "tup-gua", Latn} + +m["wiu"] = {"Wiru", 8027044, "paa"} + +m["wiv"] = {"Muduapa", 3121040, "poz-ocw", Latn} + +m["wiy"] = {"Wiyot", 36937, "aql", Latn} + +m["wja"] = {"Waja", 3914415, "alv-wjk"} + +m["wji"] = {"Warji", 3440381} + +m["wka"] = {"Kw'adza", 3807652, "cus"} + +m["wkb"] = {"Kumbaran", 16878146, "dra"} + +m["wkd"] = {"Mo", 7960881, "poz-ocw", Latn} + +m["wkl"] = {"Kalanadi", 6350515, "dra"} + +m["wku"] = {"Kunduvadi", 6444383, "dra"} + +m["wkw"] = {"Wakawaka", 10719110, "aus-pam"} + +m["wky"] = {"Wangkayutyuru", 33060533, "aus-kar"} + +m["wla"] = {"Walio", 7961958} + +m["wlc"] = {"Mwali Comorian", 3319155, "bnt-com", Latn, sort_key = {from = {"ɓ", "ɗ"}, to = {"bz", "dz"}}} + +m["wle"] = {"Wolane", 12645275, "sem-eth"} + +m["wlg"] = {"Kunbarlang", 5618523, "aus-gun", Latn} + +m["wli"] = {"Waioli", 7960241} + +m["wlk"] = {"Wailaki", 20832, "ath-pco", Latn} + +m["wll"] = {"Wali (Sudan)", 30597440, "nub-hil"} + +m["wlm"] = {"Middle Welsh", 2487263, "cel-bry", Latn, entry_name = {from = {"Ð", "ð"}, to = {"D", "d"}}, sort_key = {from = {"K", "k"}, to = {"C", "c"}}, ancestors = {"owl"}} + +m["wlo"] = {"Wolio", 1185114, "poz-wot", {"Latn", "Arab"}} + +m["wlr"] = {"Wailapa", 7960062, "poz-vnc", Latn} + +m["wls"] = {"Wallisian", 36979, "poz-pnp", Latn} + +m["wlu"] = {"Wuliwuli", 8039208} + +m["wlv"] = {"Wichí Lhamtés Vejoz", 13526867, "sai-wic", Latn} + +m["wlw"] = {"Walak", 7961258} + +m["wlx"] = {"Wali (Ghana)", 36895, "nic-mre", Latn} + +m["wly"] = {"Waling", 7961957, "sit-kic", ancestors = {"bap"}} + +m["wmb"] = {"Wambaya", 2083197, "aus-mir"} + +m["wmc"] = {"Wamas", 7966909, "ngf-mad"} + +m["wmd"] = {"Mamaindé", 3284890, "sai-nmk", Latn} + +m["wme"] = {"Wambule", 56785, "sit-kiw", Latn} + +m["wmh"] = {"Waima'a", 7960132, "poz-tim", Latn} + +m["wmi"] = {"Wamin", 7966934} + +m["wmm"] = {"Maiwa (Indonesia)", 6737226, "poz", Latn} + +m["wmn"] = {"Waamwang", 7958575, "poz-cln", Latn} + +m["wmo"] = {"Wam", 8030620, "qfa-tor", Latn} + +m["wms"] = {"Wambon", 7966922, "ngf", Latn} + +m["wmt"] = {"Walmajarri", 2232696, "aus-pam", Latn} + +m["wmw"] = {"Mwani", 3042206, "bnt-swh", Latn} + +m["wmx"] = {"Womo", 8031646, "paa-msk", Latn} + +m["wnb"] = {"Wanambre", 7967057, "ngf", Latn} + +m["wnc"] = {"Wantoat", 7968184, "ngf-fin", Latn} + +m["wnd"] = {"Wandarang", 3913767, "aus-arn", Latn} + +m["wne"] = {"Waneci", 7967334, "ira-pat", {"ps-Arab"}, ancestors = {"ira-pat-pro"}} + +m["wng"] = {"Wanggom", 11732736, "ngf", Latn} + +m["wni"] = {"Ndzwani Comorian", 2850262, "bnt-com", Latn, sort_key = {from = {"ɓ", "ɗ"}, to = {"bz", "dz"}}} + +m["wnk"] = {"Wanukaka", 2370136, "poz", Latn} + +m["wnm"] = {"Wanggamala", 7967860, "aus-kar", Latn} + +m["wno"] = {"Wano", 3566166, "ngf", Latn} + +m["wnp"] = {"Wanap", 7967060, "qfa-tor", Latn} + +m["wnu"] = {"Usan", 7901709, "ngf", Latn} + +m["wnw"] = {"Wintu", 56754, "nai-wtq", Latn} + +m["wny"] = {"Wanyi", 7968201, "aus-gar", Latn} + +m["woa"] = {"Tyaraity", 10706951} + +m["wob"] = {"Wè Northern", 3915363, "kro-wee"} + +m["woc"] = {"Wogeo", 8029061, "poz-ocw", Latn} + +m["wod"] = {"Wolani", 8029704, "ngf", Latn} + +m["woe"] = {"Woleaian", 34037, "poz-mic", Latn} + +m["wog"] = {"Wogamusin", 56991, "paa-spk", Latn} + +m["woi"] = {"Kamang", 8029096, "ngf", Latn} + +m["wok"] = {"Longto", 35795, "alv-dur", Latn} + +m["wom"] = {"Perema", 3913378, "alv-lek", Latn} + +m["won"] = {"Wongo", 8032058, "bnt-bsh", Latn} + +m["woo"] = {"Manombai", 6751253, "poz", Latn} + +m["wor"] = {"Woria", 8034514, "paa-egb", Latn} + +m["wos"] = {"Hanga Hundi", 6450232, "paa-spk", Latn} + +m["wow"] = {"Wawonii", 3566780, "poz-btk", Latn} + +m["wpc"] = {"Wirö", 12953684, nil, Latn} + +m["wra"] = {"Warapu", 56739, "paa-msk", Latn} + +m["wrb"] = {"Warluwara", 3913761, "aus-pam", Latn} + +m["wrg"] = {"Warungu", 7970854, "aus-pam", Latn} + +m["wrh"] = {"Wiradhuri", 3913840, "aus-cww", Latn} + +m["wri"] = {"Wariyangga", 10719289, "aus-psw", Latn} + +m["wrk"] = {"Garawa", 2524022, "aus-gar", Latn} + +m["wrl"] = {"Warlmanpa", 3913823, "aus-pam"} + +m["wrm"] = {"Warumungu", 1764544} + +m["wrn"] = {"Warnang", 36971, "alv-hei"} + +m["wro"] = {"Worora", 3504106, "aus-wor"} + +m["wrp"] = {"Waropen", 7969851, "poz-hce", Latn} + +m["wrr"] = {"Wardaman", 3913842, "aus-yng"} + +m["wrs"] = {"Waris", 3502610, "paa-brd"} + +m["wru"] = {"Waru", 3566463} + +m["wrv"] = {"Waruna", 7971078} + +m["wrw"] = {"Gugu Warra", 5615286} + +m["wrx"] = {"Wae Rana", 7959375} + +m["wrz"] = {"Warray", 7969971, "aus-gun"} + +m["wsa"] = {"Warembori", 56459} + +m["wsi"] = {"Wusi", 8039349, "poz-vnc", Latn} + +m["wsk"] = {"Waskia", 7972683, "ngf-mad", Latn} + +m["wsr"] = {"Owenia", 7114727} + +m["wss"] = {"Wasa", 36914, "alv-ctn", ancestors = {"ak"}} + +m["wsu"] = {"Wasu", 7972892} + +m["wsv"] = {"Wotapuri-Katarqalai", 3877569, "inc-dar"} + +m["wtf"] = {"Watiwa", 35316, "ngf-mad", Latn} + +m["wth"] = {"Wathaurong", 7974656, "aus-pam", Latn} + +m["wti"] = {"Berta", 33178} + +m["wtk"] = {"Watakataui", 7972975, "paa-spk"} + +m["wtm"] = {"Mewati", 2605943, "inc-wes"} + +m["wtw"] = {"Wotu", 12473488} + +m["wua"] = {"Wikngenchera", 10720045, "aus-pmn"} + +m["wub"] = {"Wunambal", 3913805, "aus-wor"} + +m["wud"] = {"Wudu", 36972, "alv-gbe", Latn} + +m["wuh"] = {"Wutunhua", 1012917} + +m["wul"] = {"Silimo", 11732514, "ngf"} + +m["wum"] = {"Wumbvu", 36891, "bnt-kel", Latn} + +m["wun"] = {"Bungu", 4997686, "bnt-mby", Latn} + +m["wur"] = {"Wurrugu", 8039305, "aus-wdj"} + +m["wut"] = {"Wutung", 56743, "paa-msk", Latn} + +m["wuu"] = {"Wu", 34290, "zhx", {"Hani"}, ancestors = {"ltc"}} + +m["wuv"] = {"Wuvulu-Aua", 3062746, "poz-aay"} + +m["wux"] = {"Wulna", 13591670} + +m["wuy"] = {"Wauyai", 12953295, "poz-hce"} + +m["wwa"] = {"Waama", 7958576, "nic-eov", Latn} + +m["wwo"] = {"Dorig", 3037047, "poz-vnc"} + +m["wwr"] = {"Warrwa", 7970852} + +m["www"] = {"Wawa", 36889, "nic-mmb", Latn} + +m["wxa"] = {"Waxianghua", 2252191} + +m["wxw"] = {"Wardandi", nil} + +m["wya"] = {"Wyandot", 1185119, "iro", Latn} + +m["wyb"] = {"Ngiyambaa", 3913825, "aus-cww", Latn} + +m["wyi"] = {"Woiwurrung", 8029099, "aus-pam", Latn} + +m["wym"] = {"Vilamovian", 56485, "gmw", Latn, ancestors = {"gmh"}, entry_name = {from = {"ȧ", "ḱ"}, to = {"a", "k"}}} + +m["wyr"] = {"Wayoró", 2875044, "tup"} + +m["wyy"] = {"Western Fijian", 3062751, "poz-occ"} + +return m diff --git a/wiktra/wikt/translit/languages/data3/x.lua b/wiktra/wikt/translit/languages/data3/x.lua new file mode 100644 index 0000000..0f3e150 --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/x.lua @@ -0,0 +1,642 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["xaa"] = {"Andalusian Arabic", 1137945, "sem-arb", {"Arab", "Latn"}, entry_name = {from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}} + +m["xab"] = {"Sambe", 36265, "nic-alu", Latn} + +m["xac"] = {"Kachari", 3442442, "tbq-bdg"} + +m["xad"] = {"Adai", 346744} + +m["xae"] = {"Aequian", 930579, "itc"} + +m["xag"] = {"Aghwan", 34931, "cau-lzg", {"Aghb"}, translit_module = "Aghb-translit", override_translit = true} + +m["xai"] = {"Kaimbé", 6348017} + +m["xaj"] = {"Ararandewára", nil, "tup-gua", Latn} + +m["xak"] = {"Maku", 2032882, nil, Latn} + +m["xal"] = {"Kalmyk", 33634, "xgn", {"Cyrl"}, ancestors = {"xwo"}, translit_module = "xal-translit", override_translit = true} + +m["xam"] = {"ǀXam", 2086145, "khi-tuu", Latn} + +m["xan"] = {"Xamtanga", 56527, "cus"} + +m["xao"] = {"Khao", 3196077, "mkh-pal"} + +m["xap"] = {"Apalachee", 686501, "nai-mus", Latn} + +m["xaq"] = {"Aquitanian", 500522, "euq", Latn} + +m["xar"] = {"Karami", 11732281} + +m["xas"] = {"Kamassian", 35991, "syd", {"Cyrl"}} + +m["xat"] = {"Katawixi", 3440512, "sai-ktk"} + +m["xau"] = {"Kauwera", 6378983, "paa-tkw"} + +m["xav"] = {"Xavante", 36962, "sai-cje", Latn} + +m["xaw"] = {"Kawaiisu", 56338, "azc-num", Latn} + +m["xay"] = {"Kayan Mahakam", 25337171} + +m["xbb"] = {"Lower Burdekin", 6693353} + +m["xbc"] = {"Bactrian", 756651, "ira-sbc", {"Grek", "Mani"}, entry_name = {from = {"Þ", "þ"}, to = {"Ϸ", "ϸ"}}, translit_module = "xbc-translit"} + +m["xbd"] = {"Bindal", 4913975} + +m["xbe"] = { + "Bigambal", 16841801, "aus-pam" -- unclassified within +} + +m["xbg"] = {"Bunganditj", 4997615} + +m["xbi"] = {"Kombio", 6428259, "qfa-tor", Latn} + +m["xbj"] = {"Birrpayi", nil} + +m["xbm"] = {"Middle Breton", 787610, "cel-bry", Latn, ancestors = {"obt"}} + +m["xbn"] = {"Kenaboi", 6388752} + +m["xbo"] = {"Bulgar", 36880, "trk-ogr", {"Grek"}} + +m["xbp"] = {"Bibbulman", 22918391} + +m["xbr"] = {"Kambera", 3053279, "poz-cet", Latn} + +m["xbw"] = {"Kambiwá", 9006744} + +m["xby"] = {"Butchulla", 31752631} + +m["xcb"] = {"Cumbric", 35965, "cel-bry"} + +m["xcc"] = {"Camunic", 489011, nil, {"Ital"}, translit_module = "Ital-translit"} + +m["xce"] = {"Celtiberian", 37012, "cel", Latn} + +m["xch"] = {"Chemakum", 56397, "chi", Latn} + +m["xcl"] = {"Old Armenian", 181074, "hyx", {"Armn"}, translit_module = "Armn-translit", override_translit = true, entry_name = {from = {"՞", "՜", "՛", "՟", "և"}, to = {"", "", "", "", "եւ"}}} + +m["xcm"] = {"Comecrudo", 609808, "nai-pak"} + +m["xcn"] = {"Cotoname", 56889, "nai-pak"} + +m["xco"] = {"Khwarezmian", 33138, "ira-sbc", {"Arab", "Armi", "Chrs", "Phlv", "Sogd"}, translit_module = "Chrs-translit"} + +m["xcr"] = {"Carian", 35929, "ine-ana", {"Cari"}} + +m["xct"] = {"Classical Tibetan", 5128314, "sit-tib"} + +m["xcu"] = {"Curonian", 35857, "bat", Latn} + +m["xcv"] = {"Chuvan", 3516641, "qfa-yuk", Cyrl} + +m["xcw"] = {"Coahuilteco", 2008062, "nai-pak"} + +m["xcy"] = {"Cayuse", 2472016} + +m["xda"] = {"Darkinjung", 5223660, "aus-yuk", Latn} + +m["xdc"] = {"Dacian", 682547, "ine", Latn} + +m["xdk"] = {"Dharug", 1166814, "aus-yuk", Latn} + +m["xdm"] = {"Edomite", 2363529, "sem-can", {"Phnx"}, translit_module = "Phnx-translit"} + +m["xdy"] = {"Malayic Dayak", 3514892} + +m["xeb"] = {"Eblaite", 35345, "sem-eas", {"Xsux"}} + +m["xed"] = {"Hdi", 56246, "cdc-cbm", Latn} + +m["xeg"] = {"ǁXegwi", 3509732, "khi-tuu", Latn} + +m["xel"] = {"Kelo", 6386412, "sdv-eje"} + +m["xem"] = {"Kembayan", 6386874} + +m["xep"] = {"Epi-Olmec", nil} + +m["xer"] = {"Xerénte", 3073436, "sai-cje", Latn} + +m["xes"] = {"Kesawai", 6394907, "ngf-mad", Latn} + +m["xet"] = {"Xetá", 2980404, "tup-gua", Latn} + +m["xeu"] = {"Keoru-Ahia", 11732313, "ngf"} + +m["xfa"] = {"Faliscan", 35669, "itc", {"Ital", "Latn"}, translit_module = "Ital-translit", entry_name = {from = {"[ĀĂ]", "[āă]", "[ĒĔ]", "[ēĕë]", "[ĪĬÏ]", "[īĭï]", "[ŌŎ]", "[ōŏ]", "[ŪŬÜ]", "[ūŭü]", "Ȳ", "ȳ", MACRON, BREVE, DIAER}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "Y", "y"}}} + +m["xga"] = {"Galatian", 27403, "cel", {"Latn", "Grek"}, ancestors = {"cel-gau"}} + +m["xgb"] = {"Gbin", 16934745, "dmn-mse", Latn} + +m["xgd"] = {"Gudang", 5614528} + +m["xgf"] = {"Gabrielino-Fernandeño", 56387, "azc-tak", Latn} + +m["xgg"] = {"Goreng", nil} + +m["xgi"] = {"Garingbal", nil} + +m["xgl"] = {"Galindan", 1190494, "bat", Latn} + +m["xgm"] = {"Darumbal", 16954400} + +m["xgr"] = {"Garza", 3098656, "nai-pak"} + +m["xgu"] = {"Unggumi", 62000004, "aus-wor", Latn} + +m["xgw"] = {"Guwa", 5621992} + +m["xha"] = {"Harami", 41506724, nil, {"Sarb"}, translit_module = "Sarb-translit"} + +m["xhc"] = {"Hunnic", 35959} + +m["xhd"] = {"Hadrami", 1032453, nil, {"Sarb"}, translit_module = "Sarb-translit"} + +m["xhe"] = {"Khetrani", 2614111, "inc-pan", ancestors = {"lah"}} + +m["xhr"] = {"Hernican", 5908773, "itc", {"Ital"}} + +m["xht"] = {"Hattic", 31107, "qfa-iso", {"Xsux"}} + +m["xhu"] = {"Hurrian", 35740, "qfa-hur", {"Xsux", "Ugar"}} + +m["xhv"] = {"Khua", 22970290, "mkh-kat"} + +m["xib"] = {"Iberian", 855215, "qfa-iso", {"Latn", "Ibrn"}} + +m["xii"] = {"Xiri", 36876} + +m["xin"] = {"Xinca", 1546494, "nai-xin", Latn} + +m["xil"] = {"Illyrian", 35976, "ine", type = "reconstructed"} + +m["xir"] = {"Xiriâna", 2028772, "awd", Latn} + +m["xis"] = {"Kisan", nil} + +m["xiv"] = {"Indus Valley Language", 3428279, nil, {"Inds"}} + +m["xiy"] = {"Xipaya", 13226, "tup"} + +m["xjb"] = {"Minjungbal", nil, "aus-pam", Latn} + +m["xka"] = {"Kalkoti", 3877551} + +m["xkb"] = {"Manigri-Kambolé Ede Nago", 36042, "alv-ede"} + +m["xkc"] = {"Khoini", 6401919, "xme-ttc", ancestors = {"xme-ttc-wes"}} + +m["xkd"] = {"Mendalam Kayan", 12952597} + +m["xke"] = {"Kereho", 6437086, "poz", Latn} + +m["xkf"] = {"Khengkha", 3695207, "sit-ebo"} + +m["xkg"] = {"Kagoro", 11159524, "dmn-wmn"} + +m["xki"] = {"Kenyan Sign Language", 6392859, "sgn"} + +m["xkj"] = {"Kajali", 14916876, "xme-ttc", ancestors = {"xme-ttc-cen"}} + +m["xkk"] = {"Kaco'", 6344767, "mkh"} + +m["xkl"] = {"Bakung", 6736761, "poz-swa", Latn} + +m["xkn"] = {"Kayan River Kayan", 12473395, "poz"} + +m["xko"] = {"Kiorr", 6414519, "mkh-pal"} + +m["xkp"] = {"Kabatei", 34165, "xme-ttc", ancestors = {"xme-ttc-cen"}} + +m["xkq"] = {"Koroni", 3199000, "poz-btk"} + +m["xkr"] = {"Xakriabá", 3073441, "sai-cje", Latn} + +m["xks"] = {"Kumbewaha", 6443722} + +m["xkt"] = {"Kantosi", 35651, "nic-dag"} + +m["xku"] = {"Kaamba", 11042324, "bnt-kng"} + +m["xkv"] = {"Kgalagadi", 2088743, "bnt-sts", Latn} + +m["xkw"] = {"Kembra", 12953627, "paa-pau"} + +m["xkx"] = {"Karore", 6373260, "poz-ocw"} + +m["xky"] = {"Uma' Lasan", nil, "poz-swa"} + +m["xkz"] = {"Kurtop", 3695193, "sit-ebo", {"Tibt", "Latn"}} + +m["xla"] = {"Kamula", 10957277, "ngf"} + +m["xlb"] = {"Loup B", 13108281, "alg-eas", Latn} + +m["xlc"] = {"Lycian", 35969, "ine-ana", {"Lyci"}, translit_module = "Lyci-translit"} + +m["xld"] = {"Lydian", 36095, "ine-ana", {"Lydi"}, translit_module = "Lydi-translit"} + +m["xle"] = {"Lemnian", 36203, "qfa-tyn", {"Ital"}, translit_module = "Ital-translit"} + +m["xlg"] = {"Ancient Ligurian", 36104, "ine"} + +m["xli"] = {"Liburnian", 35835, "ine"} + +-- xln is etymology-only + +m["xlo"] = {"Loup A", 27921265, "alg-eas", Latn} + +m["xlp"] = {"Lepontic", 35993, "cel", {"Ital"}, translit_module = "Ital-translit"} + +m["xls"] = {"Lusitanian", 35960, "ine", Latn} + +m["xlu"] = {"Luwian", 12634577, "ine-ana", {"Xsux", "Hluw"}} + +m["xly"] = {"Elymian", 35329, nil, {"Grek"}} + +m["xmb"] = {"Mbonga", 36064, "nic-jrn", Latn} + +m["xmc"] = {"Makhuwa-Marrevone", 11127231, "bnt-mak", ancestors = {"vmw"}} + +m["xmd"] = {"Mbudum", 6799790, "cdc-cbm", Latn} + +m["xmf"] = {"Mingrelian", 13359, "ccs-zan", {"Geor"}, translit_module = "Geor-translit", override_translit = true} + +m["xmg"] = {"Mengaka", 36017, "bai", Latn} + +m["xmh"] = {"Kugu-Muminh", 10549849, "aus-pmn", Latn} + +m["xmj"] = {"Majera", 6737666, "cdc-cbm", Latn} + +m["xmk"] = { + "Ancient Macedonian", + 35974, + "grk", + {"polytonic"}, + translit_module = "grc-translit", + sort_key = { -- Keep this synchronized with el, cpg, pnt + from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇᾱ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗῑ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧῡ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"}, + to = {"α", "ε", "η", "ι", "ο", "υ", "ω", "ρ", "σ"} + }, + entry_name = {from = {"[ᾸᾹ]", "[ᾰᾱ]", "[ῘῙ]", "[ῐῑ]", "[ῨῩ]", "[ῠῡ]"}, to = {"Α", "α", "Ι", "ι", "Υ", "υ"}} +} + +m["xml"] = {"Malaysian Sign Language", 33420, "sgn"} + +m["xmm"] = {"Manado Malay", 1068112} + +m["xmo"] = {"Morerebi", 12953749, "tup", Latn} + +m["xmp"] = {"Kuku-Mu'inh", 10549852, nil, Latn} + +m["xmq"] = {"Kuku-Mangk", 10549851, "aus-pam", Latn} + +m["xmr"] = { + "Meroitic", + 13366, + "afa", + {"Mero", "Merc", "Latn"}, -- we have entries in Latn + translit_module = "xmr-translit" +} + +m["xms"] = {"Moroccan Sign Language", 6913107, "sgn"} + +m["xmt"] = {"Matbat", 6786187, "poz-hce"} + +m["xmu"] = {"Kamu", 6359779} + +m["xmx"] = {"Maden", 12952756, "poz-hce"} + +m["xmy"] = {"Mayaguduna", 3436736} + +m["xmz"] = {"Mori Bawah", 3324069, "poz-btk", Latn} + +m["xna"] = {"Ancient North Arabian", 1472213, "sem", {"Narb"}} + +m["xnb"] = {"Kanakanabu", 172244, "map", Latn} + +m["xng"] = {"Middle Mongolian", 2582455, "xgn", {"Mong", "Arab", "Phag"}, translit_module = "mn-translit"} + +m["xnh"] = {"Kuanhua", 6441084, "mkh-pal"} + +m["xni"] = {"Ngarigu", 7022072} + +m["xnk"] = {"Nganakarti", 33087049} + +m["xnn"] = {"Northern Kankanay", 12953609, "phi"} + +m["xnr"] = {"Kangri", 2331560, "him", {"Deva", "Takr", "fa-Arab"}, ancestors = {"doi"}, translit_module = "hi-translit"} + +m["xns"] = {"Kanashi", 6360672, "sit-whm"} + +m["xnt"] = {"Narragansett", 3336118, "alg-eas", Latn} + +m["xnu"] = {"Nukunul", 7068904} + +m["xny"] = {"Nyiyaparli", 16919427, "aus-nga", Latn} + +m["xoc"] = {"O'chi'chi'", 3813833, "nic-cde", Latn} + +m["xod"] = {"Kokoda", 6426734, "ngf-sbh"} + +m["xog"] = {"Soga", 33784, "bnt-nyg", Latn} + +m["xoi"] = {"Kominimung", 6428352, "paa", Latn} + +m["xok"] = {"Xokleng", 3027930, "sai-sje"} + +m["xom"] = {"Komo", 56681, "ssa-kom"} + +m["xon"] = {"Konkomba", 35674, "nic-grm", Latn} + +m["xoo"] = { -- contrast kzw, sai-kat, sai-xoc + "Xukurú", 9096758 +} + +m["xop"] = {"Kopar", 11732346} + +m["xor"] = {"Korubo", 3199022} + +m["xow"] = {"Kowaki", 6434920, "ngf-mad"} + +m["xpa"] = {"Pirriya", 16978087} + +m["xpb"] = {"Pyemmairre", 7262964, nil, Latn} + +m["xpc"] = {"Pecheneg", 877881, "trk"} + +m["xpd"] = {"Paredarerme", 7136678, nil, Latn} + +m["xpe"] = {"Liberia Kpelle", 20527226, "dmn-msw", ancestors = {"kpe"}} + +m["xpf"] = {"Southeast Tasmanian", 7068421, nil, Latn} + +m["xpg"] = {"Phrygian", 36751, "ine", {"Grek"}, translit_module = "grc-translit"} + +m["xph"] = {"Tyerrernotepanner", 7859815, nil, Latn} + +m["xpi"] = {"Pictish", 856383, "cel", {"Ogam", "Latn"}} + +m["xpj"] = {"Mpalitjanh", 6928192, "aus-pam"} + +m["xpk"] = {"Kulina", 6443027, "sai-pan"} + +m["xpl"] = {"Port Sorell", 7230944, nil, Latn} + +m["xpm"] = {"Pumpokol", 2991985, "qfa-yen", Latn} + +m["xpn"] = {"Kapinawá", 6366667} + +m["xpo"] = {"Pochutec", 2427341, "azc-nah", Latn} + +m["xpp"] = {"Puyo-Paekche", nil} + +m["xpq"] = {"Mohegan-Pequot", 3319130, "alg-eas", Latn} + +m["xpr"] = {"Parthian", 25953, "ira-mpr", {"Prti", "Mani", "Phlv"}, translit_module = "translit-redirect"} + +m["xps"] = {"Pisidian", 36580, "ine-ana"} + +m["xpu"] = {"Punic", 535958, "sem-can", {"Phnx", "Latn", "Grek"}, ancestors = {"phn"}, translit_module = "translit-redirect"} + +m["xpv"] = {"Tommeginne", 7819095, nil, Latn} + +m["xpw"] = {"Peerapper", 7160431, nil, Latn} + +m["xpx"] = {"Toogee", 7824008, nil, Latn} + +m["xpy"] = {"Buyeo", 5003359, "qfa-kor", {"Hani"}} + +m["xpz"] = {"Bruny Island", 4979601, nil, Latn} + +m["xqa"] = {"Karakhanid", nil, "trk-kar", {"Arab"}} + +m["xqt"] = {"Qatabanian", 384101, "sem-osa", {"Sarb"}, translit_module = "Sarb-translit"} + +m["xra"] = {"Krahô", 3199549, "sai-nje"} + +m["xrb"] = {"Eastern Karaboro", 35716, "alv-krb"} + +m["xrd"] = {"Gundungurra", nil} + +m["xre"] = {"Kreye", 3199686, "sai-nje"} + +m["xrg"] = {"Minang", 22893424} + +m["xri"] = {"Krikati-Timbira", 3199710} + +m["xrm"] = {"Armazic", 7599646} + +m["xrn"] = {"Arin", 34088, "qfa-yen", Latn} + +m["xrq"] = {"Karranga", 6373349, nil, Latn} + +m["xrr"] = {"Raetic", 36689, nil, {"Ital"}, translit_module = "Ital-translit"} + +m["xrt"] = {"Aranama-Tamique", 2859505} + +m["xru"] = {"Marriammu", 10577724, "aus-dal"} + +m["xrw"] = {"Karawa", 6368857, "paa-spk"} + +m["xsa"] = {"Sabaean", 1070391, "sem-osa", {"Sarb"}, translit_module = "Sarb-translit"} + +m["xsb"] = {"Sambali", 2592378, "phi", Latn} + +m["xsd"] = {"Sidetic", 36659, "ine-ana"} + +m["xse"] = {"Sempan", 3504358} + +m["xsh"] = {"Shamang", 3914876, "nic-plc"} + +m["xsi"] = {"Sio", 3485100, "poz-ocw"} + +m["xsj"] = {"Subi", 7631298, "bnt-haj"} + +m["xsl"] = {"South Slavey", 28552, "ath-nor", Latn} + +m["xsm"] = {"Kasem", 35552, "nic-gnn"} + +m["xsn"] = {"Sanga (Nigeria)", 3915334, "nic-jer", Latn} + +m["xso"] = {"Solano", 2474492, nil, Latn} + +m["xsp"] = {"Silopi", 7515533, "ngf-mad"} + +m["xsq"] = {"Makhuwa-Saka", 11008159, "bnt-mak", ancestors = {"vmw"}} + +m["xsr"] = {"Sherpa", 36612, "sit-tib", {"Tibt"}, ancestors = {"xct"}} + +m["xss"] = {"Assan", 34089, "qfa-yen", Latn} + +m["xsu"] = {"Sanumá", 251728, "sai-ynm"} + +m["xsv"] = {"Sudovian", 35603, "bat", Latn} + +m["xsy"] = {"Saisiyat", 716695, "map", Latn} + +m["xta"] = {"Alcozauca Mixtec", 25559587, "omq-mxt", Latn} + +m["xtb"] = {"Chazumba Mixtec", 12182838, "omq-mxt", Latn} + +m["xtc"] = {"Kadugli", 3407136, "qfa-kad", Latn} + +m["xtd"] = {"Diuxi-Tilantongo Mixtec", 7802048, "omq-mxt", Latn} + +m["xte"] = {"Ketengban", 10990152} + +m["xth"] = {"Yitha Yitha", nil} + +m["xti"] = {"Sinicahua Mixtec", 12953733, "omq-mxt", Latn} + +m["xtj"] = {"San Juan Teita Mixtec", 32093049, "omq-mxt", Latn} + +m["xtl"] = {"Tijaltepec Mixtec", 12953738, "omq-mxt", Latn} + +m["xtm"] = {"Magdalena Peñasco Mixtec", 7179700, "omq-mxt", Latn} + +m["xtn"] = {"Northern Tlaxiaco Mixtec", 25559585, "omq-mxt", Latn} + +m["xto"] = { + "Tocharian A", + 2827041, + "ine-toc", + Latn, + wikipedia_article = "Tocharian languages" -- wikidata id has no associated article +} + +m["xtp"] = {"San Miguel Piedras Mixtec", 7414970, "omq-mxt", Latn} + +m["xtq"] = {"Tumshuqese", nil, "xsc-sak", {"Brah", "Khar"}, translit_module = "Brah-translit"} + +m["xtr"] = {"Early Tripuri", nil} + +m["xts"] = {"Sindihui Mixtec", 13583581, "omq-mxt", Latn} + +m["xtt"] = {"Tacahua Mixtec", 7673668, "omq-mxt", Latn} + +m["xtu"] = {"Cuyamecalco Mixtec", 12953726, "omq-mxt", Latn} + +m["xtv"] = {"Thawa", 7711494} + +m["xtw"] = {"Tawandê", nil, "sai-nmk", Latn} + +m["xty"] = {"Yoloxochitl Mixtec", 8054817, "omq-mxt", Latn} + +m["xtz"] = {"Tasmanian", 530739, nil, Latn} + +m["xua"] = {"Alu Kurumba", 12952679, "dra"} + +m["xub"] = {"Betta Kurumba", 16841033, "dra", {"Knda", "Mlym", "Taml"}} + +m["xud"] = {"Umiida", 61999874, "aus-wor", Latn} + +m["xug"] = {"Kunigami", 56558, "jpx-ryu", {"Jpan"}} + +m["xuj"] = {"Jennu Kurumba", 21282543, "dra"} + +m["xul"] = {"Ngunawal", 7022712, "aus-yuk", Latn} + +m["xum"] = {"Umbrian", 36957, "itc", {"Ital", "Latn"}, translit_module = "Ital-translit"} + +m["xun"] = {"Unggaranggu", 61999823, "aus-wor", Latn} + +m["xuo"] = {"Kuo", 6445233, "alv-mbm"} + +m["xup"] = {"Upper Umpqua", 20607, "ath-pco", Latn} + +m["xur"] = {"Urartian", 36934, "qfa-hur", {"Xsux"}} + +m["xut"] = {"Kuthant", 6448417} + +m["xuu"] = {"Khwe", 28305, "khi-kal", Latn} + +m["xve"] = {"Venetic", 36871, "ine", {"Ital"}, translit_module = "Ital-translit"} + +-- m["xvi"] = { "Kamviri", 1193495, "nur-nor", scripts = {"Arab"} } moved to etym-only code + +m["xvn"] = {"Vandalic", 36835, "gme", Latn} + +m["xvo"] = {"Volscian", 622110, "itc", Latn} + +m["xvs"] = {"Vestinian", 2576407, "itc", Latn} + +m["xwa"] = {"Kwaza", 3200839} + +m["xwc"] = {"Woccon", 3569569, "nai-cat", Latn} + +m["xwd"] = {"Wadi Wadi", 7959249} + +m["xwe"] = {"Xwela Gbe", 36887, "alv-pph"} + +m["xwg"] = {"Kwegu", 56723, "sdv"} + +m["xwj"] = {"Wajuk", 33110188} + +m["xwk"] = {"Wangkumara", 7967891, "aus-pam", Latn} + +m["xwl"] = {"Western Xwla Gbe", 36924, "alv-pph", Latn} + +m["xwo"] = {"Written Oirat", 56959, "xgn", {"Mong"}, translit_module = "xwo-translit"} + +m["xwr"] = {"Kwerba Mamberamo", 6450325, "paa-tkw"} + +m["xww"] = {"Wemba-Wemba", 18472819, "aus-pam", Latn} + +m["xxb"] = {"Boro", 16844787, nil, Latn} + +m["xxk"] = {"Ke'o", 3195346} + +m["xxm"] = {"Minkin", 6867836} + +m["xxr"] = {"Koropó", 6432560} + +m["xxt"] = {"Tambora", 36711, "paa", Latn} + +m["xya"] = {"Yaygir", 8050525, "aus-pam"} + +m["xyb"] = {"Yandjibara", nil, nil, Latn} + +m["xyl"] = {"Yalakalore", 12645352, "sai-nmk", Latn} + +m["xyt"] = {"Mayi-Thakurti", 47004719, "aus-pam", Latn} + +m["xyy"] = {"Yorta Yorta", 8055849, "aus-pam", Latn} + +m["xzh"] = {"Zhang-Zhung", 3437292, "sit-alm", {"xzh-Tibt", "Marc"}} + +m["xzm"] = {"Zemgalian", 47631, "bat"} + +m["xzp"] = {"Ancient Zapotec", nil} + +return m diff --git a/wiktra/wikt/translit/languages/data3/y.lua b/wiktra/wikt/translit/languages/data3/y.lua new file mode 100644 index 0000000..09543da --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/y.lua @@ -0,0 +1,471 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["yaa"] = {"Yaminahua", 3026110, "sai-pan", Latn} + +m["yab"] = {"Yuhup", 3573115, "sai-nad", Latn} + +m["yac"] = {"Pass Valley Yali", 12953309, "ngf", Latn} + +m["yad"] = {"Yagua", 3182567, nil, Latn} + +m["yae"] = {"Pumé", 3121835, nil, Latn} + +m["yaf"] = {"Yaka", 35588, "bnt-yak", Latn} + +m["yag"] = {"Yámana", 531826, "qfa-iso", Latn} + +m["yah"] = {"Yazghulami", 34186, "ira-shy", {"Latn", "Cyrl"}, ancestors = {"ira-shy-pro"}} + +m["yai"] = {"Yagnobi", 34247, "ira-sgc", {"Latn", "Cyrl"}, translit_module = "tg-translit"} + +m["yaj"] = {"Banda-Yangere", 8048561, "bad"} + +m["yak"] = {"Yakima", 35976053, "nai-shp", Latn, ancestors = {"nai-spt"}} + +m["yal"] = {"Yalunka", 35524, "dmn-sya"} + +m["yam"] = {"Yamba", 36904, "nic-nka", Latn} + +m["yan"] = {"Mayangna", 3302929, "nai-min", Latn} + +m["yao"] = {"Yao", 36902, "bnt-rvm", Latn} + +m["yap"] = {"Yapese", 34029, "poz-oce", Latn} + +m["yaq"] = {"Yaqui", 34191, "azc-trc", Latn} + +m["yar"] = {"Yabarana", 3571238, "sai-car", Latn} + +m["yas"] = {"Gunu", 36358, "nic-ymb", Latn} + +m["yat"] = {"Yambeta", 8048020, "nic-mbw", Latn} + +m["yau"] = {"Yuwana", 5876347} + +m["yav"] = {"Yangben", 12953315, "nic-ymb", Latn} + +m["yaw"] = {"Yawalapití", 3450709, "awd", Latn} + +m["yay"] = {"Agwagwune", 34736, "nic-ucn", Latn} + +m["yaz"] = {"Lokaa", 3914439, "nic-uce", Latn} + +m["yba"] = {"Yala", 3914920, "alv-ido", Latn} + +m["ybb"] = {"Yemba", 36917, "bai", Latn} + +m["ybe"] = {"Western Yugur", 34224, "trk-sib", ancestors = {"oui"}} + +m["ybh"] = {"Yakkha", 56666, "sit-kie", {"Deva"}} + +m["ybi"] = {"Yamphu", 56316, "sit-kie", {"Deva"}, translit_module = "ybi-translit"} + +m["ybj"] = {"Hasha", 3915338, "nic-alu"} + +m["ybk"] = {"Bokha", nil, "tbq-lol"} + +m["ybl"] = {"Yukuben", 3915846, "nic-ykb"} + +m["ybm"] = {"Yaben", 8046372, "ngf-mad"} + +m["ybn"] = {"Yabaâna", 3450534, "awd", Latn} + +m["ybo"] = {"Yabong", 8046383, "ngf-mad"} + +m["ybx"] = {"Yawiyo", 8050463, "paa-spk"} + +m["yby"] = {"Yaweyuha", 3571231, "paa-kag"} + +m["ych"] = {"Chesu", nil, "tbq-lol"} + +m["ycl"] = {"Lolopo", 56441, "tbq-lol"} + +m["ycn"] = {"Yucuna", 3438356, "awd-nwk", Latn} + +m["ycp"] = {"Chepya", 46603077, "tbq-lol"} + +m["yda"] = {"Yanda", 8048318, "aus-pam"} + +m["yde"] = {"Yangum Dey", nil, "qfa-tor", Latn} + +m["ydg"] = {"Yidgha", 34179, "ira-mny", {"Arab"}, ancestors = {"ira-mny-pro"}} + +m["ydk"] = {"Yoidik", 8054512, "ngf-mad"} + +m["yea"] = {"Ravula", 7296830, "dra"} + +m["yec"] = {"Yeniche", 1365342, "gmw", Latn, ancestors = {"gmh"}} + +m["yee"] = {"Yimas", 36954, "paa-lsp"} + +m["yei"] = {"Yeni", 34213, "nic-mmb"} + +m["yej"] = {"Yevanic", 34200, "grk", {"Hebr"}, ancestors = {"grc"}, entry_name = {from = {"[" .. u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. "]"}, to = {}}} + +m["yen"] = {"Yendang", nil, "alv-mye", Latn} + +m["yer"] = {"Tarok", 3914953, "nic-tar", Latn} + +m["yes"] = {"Yeskwa", 3914962, "nic-plc", Latn} + +m["yet"] = {"Yetfa", 8053020, "paa-pau"} + +m["yeu"] = {"Yerukula", 3535117, "dra"} + +m["yev"] = {"Yapunda", 11044384, "qfa-tor", Latn} + +m["yey"] = {"Yeyi", 8053347, "bnt"} + +m["ygi"] = {"Yiningayi", nil, "aus-pam"} + +m["ygl"] = {"Yangum Gel", nil, "qfa-tor", Latn} + +m["ygm"] = {"Yagomi", 20771657, "ngf-fin", Latn} + +m["ygp"] = {"Gepo", 5548692, "tbq-lol"} + +m["ygr"] = {"Yagaria", 8046690, "paa-kag", Latn} + +m["ygs"] = {"Yolngu Sign Language", 16211970, "sgn"} + +m["ygu"] = {"Yugul", 8060437, "aus-arn"} + +m["ygw"] = {"Yagwoia", 8046761, "ngf", Latn} + +m["yha"] = {"Baha", 2879238, "qfa-kra"} + +m["yhd"] = {"Judeo-Iraqi Arabic", 56599, "sem-arb", {"Hebr"}, ancestors = {"jrb"}} + +m["yhl"] = {"Hlepho Phowa", nil, "tbq-lol"} + +m["yia"] = {"Yinggarda", 3913777, "aus-psw", Latn} + +m["yif"] = {"Ache", 10949828, "tbq-lol"} + +m["yig"] = {"Wusa Nasu", 12953334, "tbq-lol"} + +m["yii"] = {"Yidiny", 3053283, "aus-yid", Latn} + +m["yij"] = {"Yindjibarndi", 3121073, "aus-nga", Latn} + +m["yik"] = {"Dongshanba Lalo", 12953333, "tbq-lal"} + +m["yil"] = {"Yindjilandji", 10723541, "aus-pam", Latn} + +m["yim"] = {"Yimchungru Naga", 56348, "sit-aao", Latn} + +m["yin"] = {"Yinchia", 12953981, "mkh-pal"} + +m["yip"] = {"Pholo", 7187103, "tbq-lol"} + +m["yiq"] = {"Miqie", 6827993, "tbq-lol"} + +m["yir"] = {"North Awyu", 12642164, "ngf"} + +m["yis"] = {"Yis", 8053831, "qfa-tor", Latn} + +m["yit"] = {"Eastern Lalu", 12953328, "tbq-lal"} + +m["yiu"] = {"Awu", 11163308, "tbq-lol"} + +m["yiv"] = {"Northern Nisu", 25559454, "tbq-lol"} + +m["yix"] = {"Axi Yi", 4830439, "tbq-lol"} + +m["yiy"] = {"Yir-Yoront", 8053819, "aus-pmn", Latn} + +m["yiz"] = {"Azhe", 4832535, "tbq-lol"} + +m["yka"] = {"Yakan", 3571351, "poz-sbj"} + +m["ykg"] = {"Northern Yukaghir", 56319, "qfa-yuk", {"Cyrl"}, translit_module = "ykg-translit"} + +m["yki"] = {"Yoke", 3832977} + +m["ykk"] = {"Yakaikeke", 8047041, "poz-oce"} + +m["ykl"] = {"Khlula", 6401849, "tbq-lol"} + +m["ykm"] = {"Kap", 8047048, "poz-ocw", Latn} + +m["ykn"] = {"Kua-nsi", 6440952, "tbq-lol"} + +m["yko"] = {"Yasa", 36899, "bnt-yko", Latn} + +m["ykr"] = {"Yekora", 11732781, "ngf"} + +m["ykt"] = {"Kathu", 6377155, "sit-mnz"} + +m["yku"] = {"Kuamasi", 6441074, "tbq-lol"} + +m["yky"] = {"Yakoma", 3571364, "nic-ngd"} + +m["yla"] = {"Yaul", 8050336, "paa", Latn} + +m["ylb"] = {"Yaleba", 37710600, "poz-oce"} + +m["yle"] = {"Yele", 36942} + +m["ylg"] = {"Yelogu", 8052024, "paa-spk"} + +m["yli"] = {"Angguruk Yali", 3514481, "ngf"} + +m["yll"] = {"Yil", 3501797, "qfa-tor", Latn} + +m["ylm"] = {"Limi", 12953327, "tbq-lol"} + +m["yln"] = {"Langnian Buyang", 2929025, "qfa-buy"} + +m["ylo"] = {"Naluo Yi", 6961032, "tbq-lol"} + +m["ylr"] = {"Yalarnnga", 3915686, "aus-pam", Latn} + +m["ylu"] = {"Aribwaung", 11044246, "poz-ocw", Latn} + +m["yly"] = {"Nyâlayu", 303154, "poz-cln", Latn} + +m["ymb"] = {"Yambes", 8048022, "qfa-tor", Latn} + +m["ymc"] = {"Southern Muji", nil, "tbq-lol"} + +m["ymd"] = {"Muda", 6931494, "tbq-lol"} + +m["yme"] = {"Yameo", 3121032} + +m["ymg"] = {"Yamongeri", 11008893, "bnt-mon", Latn, ancestors = {"lol"}} + +m["ymh"] = {"Mili", 12953329, "tbq-lol"} + +m["ymi"] = {"Moji", 6895060, "tbq-lol"} + +m["ymk"] = {"Makwe", 6740513, "bnt-swh"} + +m["yml"] = {"Iamalele", 8047849, "poz-ocw", Latn} + +m["ymm"] = {"Maay", 36221, "cus", Latn} + +m["ymn"] = {"Sunum", 8048083, "poz-ocw", Latn} + +m["ymo"] = {"Yangum Mon", nil, "qfa-tor", Latn} + +m["ymp"] = {"Yamap", 8047913, "poz-ocw", Latn} + +m["ymq"] = {"Qila Muji", nil, "tbq-lol"} + +m["ymr"] = {"Malasar", 16889525, "dra"} + +m["yms"] = {"Mysian", 2577228, "ine"} + +m["ymx"] = {"Northern Muji", nil, "tbq-lol"} + +m["ymz"] = {"Muzi", 6944445, "tbq-lol"} + +m["yna"] = {"Aluo", 4737539, "tbq-lol"} + +m["ynd"] = {"Yandruwandha", 8048335, "aus-kar", Latn} + +m["yne"] = {"Lang'e", 6485577, "tbq-lol"} + +m["yng"] = {"Yango", 13123599, "nic-nkk", Latn} + +m["ynk"] = {"Naukanski", 27963, "ypk", {"Cyrl", "Latn"}} + +m["ynl"] = {"Yangulam", 8048663, "ngf-mad"} + +m["ynn"] = {"Yana", 56419, "qfa-iso"} + +m["yno"] = {"Yong", 8054978, "tai-swe"} + +m["yns"] = {"Yansi", 36953, "bnt-yak"} + +m["ynu"] = {"Yahuna", 8050347, "sai-tuc"} + +m["yob"] = {"Yoba", 8054220, "poz-ocw", Latn} + +m["yog"] = {"Yogad", 8054343, "phi"} + +m["yoi"] = {"Yonaguni", 34243, "jpx-ryu", {"Jpan"}} + +m["yol"] = {"Yola", 56395, "gmw", Latn, ancestors = {"enm"}} + +m["yom"] = {"Yombe", 10961975, "bnt-kng", Latn} + +m["yon"] = {"Yongkom", 8055002, "ngf-okk"} + +m["yox"] = {"Yoron", 2424943, "jpx-ryu", {"Jpan"}} + +m["yoy"] = {"Yoy", 3503717, "tai"} + +m["ypa"] = {"Phala", 36211907, "tbq-lol"} + +m["ypb"] = {"Labo Phowa", nil, "tbq-lol"} + +m["ypg"] = {"Phola", nil, "tbq-lol"} + +m["yph"] = {"Phupha", 7188378, "tbq-lol"} + +m["ypm"] = {"Phuma", nil, "tbq-lol"} + +m["ypn"] = {"Ani Phowa", 33590104, "tbq-lol"} + +m["ypo"] = {"Alo Phola", 33522157, "tbq-lol"} + +m["ypp"] = {"Phupa", 48565467, "tbq-lol"} + +m["ypz"] = {"Phuza", 48565339, "tbq-lol"} + +m["yra"] = {"Yerakai", 8052531} + +m["yrb"] = {"Yareba", 3571876, "ngf"} + +m["yre"] = {"Yaouré", 3913951, "dmn-mda"} + +m["yri"] = {"Yarí", 8049616} + +m["yrk"] = {"Tundra Nenets", 36452, "syd", {"Cyrl"}, entry_name = {from = {"Ӑ", "ӑ", "[ӖЀ]", "[ӗѐ]", "[ӢЍ]", "[ӣѝ]", "Ӯ", "ӯ", BREVE, MACRON, GRAVE, ACUTE, DOTABOVE}, to = {"А", "а", "Е", "е", "И", "и", "У", "у"}}} + +m["yrl"] = {"Nheengatu", 34333, "tup-gua"} + +m["yrn"] = {"Yerong", 3572191, "qfa-buy"} + +m["yro"] = {"Yaroamë", 24190396, "sai-ynm", Latn} + +m["yrw"] = {"Yarawata", 8049237, "ngf-mad"} + +m["yry"] = {"Yarluyandi", 33061540, "aus-kar"} + +m["ysc"] = {"Jassic", 2479368, "xsc", Latn, ancestors = {"oos"}} + +m["ysd"] = {"Samatao", 7408902, "tbq-lol"} + +m["ysg"] = {"Sonaga", 7560736, "tbq-lol"} + +m["ysl"] = {"Yugoslavian Sign Language", 8060373, "sgn"} + +m["ysn"] = {"Sani", 1055287, "tbq-lol", {"Yiii"}} + +m["yso"] = {"Nisi", 12953326, "sit-mnz"} + +m["ysp"] = {"Southern Lolopo", 12633989, "tbq-lol"} + +m["ysr"] = {"Sirenik", 28156, "ypk", {"Cyrl"}} + +m["yss"] = {"Yessan-Mayo", 8052927, "paa-spk", Latn} + +m["ysy"] = {"Sanie", 7418287, "tbq-lol"} + +m["yta"] = {"Talu", 16999095, "tbq-lol"} + +m["ytl"] = {"Tanglang", 7786695, "tbq-lol"} + +m["ytp"] = {"Thopho", 7796015, "tbq-lol"} + +m["ytw"] = {"Yout Wam", nil, "ngf-fin", Latn} + +m["yty"] = {"Yatay", nil, "aus-pmn"} + +m["yua"] = {"Yucatec Maya", 13354, "myn", Latn} + +m["yub"] = {"Yugambal", 3446663} + +m["yuc"] = {"Yuchi", 34204, "qfa-iso"} + +m["yud"] = {"Judeo-Tripolitanian Arabic", 56598, "sem-arb", {"Hebr"}, ancestors = {"jrb"}} + +m["yue"] = {"Cantonese", 9186, "zhx", {"Hani", "Latn"}, wikimedia_codes = {"yue", "zh-yue"}, ancestors = {"ltc"}} + +m["yuf"] = {"Havasupai-Walapai-Yavapai", 3565286, "nai-yuc", Latn} + +m["yug"] = {"Yug", 56311, "qfa-yen"} + +m["yui"] = {"Yurutí", 3573266, "sai-tuc", Latn} + +m["yuj"] = {"Karkar-Yuri", 2992906, "paa-pau", Latn} + +m["yuk"] = {"Yuki", 36993, nil, Latn} + +m["yul"] = {"Yulu", 3915595, "csu-bba"} + +m["yum"] = {"Yuma", 3573199, "nai-yuc", Latn} + +m["yun"] = {"Bena", 3913283, "alv-yun"} + +m["yup"] = {"Yukpa", 3441447, "sai-car", Latn} + +m["yuq"] = {"Yuqui", 8061440, "tup-gua", Latn} + +m["yur"] = {"Yurok", 34685, "aql", Latn} + +m["yut"] = {"Yopno", 12953338, "ngf-fin", Latn} + +m["yuw"] = {"Finisterre Yau", 12953319, "ngf-fin", Latn} + +m["yux"] = {"Southern Yukaghir", 56545, "qfa-yuk", {"Cyrl"}, translit_module = "yux-translit"} + +m["yuy"] = {"East Yugur", 29902, "xgn"} + +m["yuz"] = {"Yuracare", 2640646, "qfa-iso", Latn} + +m["yva"] = {"Yawa", 3572020, "paa", Latn} + +m["yvt"] = {"Yavitero", 3441427, "awd", Latn} + +m["ywa"] = {"Kalou", 6354305, "paa-spk", Latn} + +m["ywg"] = {"Yinhawangka", 8053734, "aus-nga", Latn} + +m["ywl"] = {"Western Lalu", 12953325, "tbq-lal"} + +m["ywn"] = {"Yawanawa", 10322118, "sai-pan", Latn} + +m["ywq"] = {"Nasu", 25559456, "tbq-lol", {"Plrd", "Yiii"}} + +m["ywr"] = {"Yawuru", 8050479, "aus-nyu", Latn} + +m["ywt"] = {"Xishanba Lalo", 12953336, "tbq-lal"} + +m["ywu"] = {"Wumeng Nasu", 25559442, "tbq-lol"} + +m["yww"] = {"Yawarawarga", 10723454, "aus-kar", Latn} + +m["yxa"] = {"Mayawali", 33060513, "aus-pam", Latn} + +m["yxg"] = {"Yagara", nil, "aus-pam", Latn} + +m["yxl"] = {"Yarli", 46264708, "aus-pam", Latn} + +m["yxm"] = {"Yinwum", 8053763, "aus-pam", Latn} + +m["yxu"] = {"Yuyu", 8062232, "aus-pam", Latn} + +m["yxy"] = {"Yabula Yabula", 8046394, "aus-pam", Latn} + +m["yyu"] = {"Torricelli Yau", 8050328, "qfa-tor", Latn} + +m["yyz"] = {"Ayizi", 20527363, "tbq-lol"} + +m["yzg"] = {"E'ma Buyang", 16115619, "qfa-buy"} + +m["yzk"] = {"Zokhuo", 8073523, "tbq-lol"} + +return m diff --git a/wiktra/wikt/translit/languages/data3/z.lua b/wiktra/wikt/translit/languages/data3/z.lua new file mode 100644 index 0000000..fed52ad --- /dev/null +++ b/wiktra/wikt/translit/languages/data3/z.lua @@ -0,0 +1,335 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["zaa"] = {"Sierra de Juárez Zapotec", 12953989, "omq-zap", Latn} + +m["zab"] = {"San Juan Guelavía Zapotec", 5614751, "omq-zpc", Latn} + +m["zac"] = {"Ocotlán Zapotec", 7076643, "omq-zap", Latn} + +m["zad"] = {"Cajonos Zapotec", 5017997, "omq-zpc", Latn} + +m["zae"] = {"Yareni Zapotec", 12645368, "omq-zap", Latn} + +m["zaf"] = {"Ayoquesco Zapotec", 4831570, "omq-zpc", Latn} + +m["zag"] = { + "Zaghawa", 37007, "ssa-sah", Latn -- also Beria +} + +m["zah"] = {"Zangwal", 3441387, "cdc-wst", Latn} + +m["zai"] = {"Isthmus Zapotec", 56728, "omq-zpc", Latn} + +m["zaj"] = {"Zaramo", 8066599, "bnt-ruv", Latn} + +m["zak"] = {"Zanaki", 8066018, "bnt-lok", Latn} + +m["zal"] = {"Zauzou", 3616358, "tbq-lol", {"Latn", "Hani"}} + +m["zam"] = {"Central Mahuatlán Zapoteco", 13541830, "omq-zap", Latn} + +m["zao"] = {"Ozolotepec Zapotec", 7116610, "omq-zap", Latn} + +m["zap"] = {"Zapotec", 13214, "omq-zap", Latn} + +m["zaq"] = {"Aloápam Zapotec", 4734726, "omq-zap", Latn} + +m["zar"] = {"Rincón Zapotec", 7334628, "omq-zap", Latn} + +m["zas"] = {"Santo Domingo Albarradas Zapotec", 4709425, "omq-zap", Latn} + +m["zat"] = {"Tabaa Zapotec", 7672849, "omq-zap", Latn} + +m["zau"] = {"Zangskari", 771203, "sit-lab", {"Tibt"}, ancestors = {"lbj"}} + +m["zav"] = {"Yatzachi Zapotec", 8050301, "omq-zpc", Latn} + +m["zaw"] = {"Mitla Zapotec", 3053288, "omq-zpc", Latn} + +m["zax"] = {"Xadani Zapotec", 8042823, "omq-zap", Latn} + +m["zay"] = {"Zayse-Zergulla", 673895, "omv-eom", Latn} + +m["zaz"] = {"Zari", 3914398, "cdc-wst", Latn} + +m["zbt"] = {"Batui", 16839143, "poz-slb", Latn} + +m["zca"] = {"Coatecas Altas Zapotec", 5138603, "omq-zap", Latn} + +m["zdj"] = {"Ngazidja Comorian", 3114653, "bnt-com", Latn, sort_key = {from = {"ɓ", "ɗ"}, to = {"bz", "dz"}}} + +m["zea"] = {"Zealandic", 237409, "gmw", Latn, ancestors = {"dum"}, sort_key = {from = {"^'([aeiouy].*)$"}, to = {"%1'"}}} + +m["zeg"] = {"Zenag", 12953345, "poz-ocw", Latn} + +m["zen"] = {"Zenaga", 37005, "ber", Latn} + +m["zga"] = {"Kinga", 11005332, "bnt-bki", Latn} + +m["zgh"] = {"Moroccan Amazigh", 7598268, "ber", {"Tfng"}, translit_module = "Tfng-translit"} + +m["zgr"] = {"Magori", 3277370, "poz-ocw", Latn} + +m["zhb"] = {"Zhaba", 56334, "sit-qia"} + +m["zhi"] = {"Zhire", 3914910, "nic-plc", Latn} + +m["zhn"] = {"Nong Zhuang", 7049385, "tai-cen", {"Latn"}, sort_key = {from = {"%p"}, to = {""}}} + +m["zhw"] = {"Zhoa", 8070885, "nic-rnw", Latn} + +m["zia"] = {"Zia", 3038636, "ngf", Latn} + +m["zib"] = {"Zimbabwe Sign Language", 8072097, "sgn"} + +m["zik"] = {"Zimakani", 56740, "ngf", Latn} + +m["zil"] = {"Zialo", 36991, "dmn-msw", Latn} + +m["zim"] = {"Mesme", 56282, "cdc-mas", Latn} + +m["zin"] = {"Zinza", 8072460, "bnt-haj", Latn} + +m["zir"] = {"Ziriya", 3913943, "nic-jer", Latn} + +m["ziw"] = {"Zigula", 37010, "bnt-seu", Latn} + +m["ziz"] = {"Zizilivakan", 56300, "cdc-cbm", Latn} + +m["zka"] = {"Kaimbulawa", 6348011, "poz-mun", Latn} + +m["zkb"] = {"Koibal", 949259, "syd", {"Latn", "Cyrl"}} + +m["zkg"] = {"Goguryeo", 706327, "qfa-kor", {"Hani"}} + +m["zkh"] = {"Khorezmian Turkic", 25502, "trk", {"Arab"}} + +m["zkk"] = {"Karankawa", 3192947, nil, Latn} + +m["zko"] = {"Kott", 34163, "qfa-yen", Latn} + +m["zkp"] = {"São Paulo Kaingáng", 7665661, "sai-jee", Latn} + +m["zkr"] = {"Zakhring", 56996, "sit-mdz", {"Latn", "Hani"}} + +m["zkt"] = {"Khitan", 1064482, "xgn", {"Kitl", "Kits"}} + +m["zku"] = {"Kaurna", 6378899, "aus-psw", Latn} + +m["zkv"] = {"Krevinian", 6436902, "fiu-fin", Latn, ancestors = {"vot"}} + +m["zkz"] = {"Khazar", 1067986, "trk", {"Orkh"}} + +m["zma"] = {"Manda (Australia)", 18650060, "aus-dal", Latn} + +m["zmb"] = {"Zimba", 8071960, "bnt-lgb", Latn} + +m["zmc"] = {"Margany", 10577017, "aus-pam", Latn} + +m["zmd"] = {"Maridan", 10577273, "aus-dal", Latn} + +m["zme"] = {"Mangerr", 10576387, nil, Latn} + +m["zmf"] = {"Mfinu", 35915, "bnt-tek", Latn} + +m["zmg"] = {"Marti Ke", 10577823, "aus-dal", Latn} + +m["zmh"] = {"Makolkol", 12636052, "paa-bng", Latn} + +m["zmi"] = {"Negeri Sembilan Malay", 3915909, "poz-mly", Latn} + +m["zmj"] = {"Maridjabin", 10577274, "aus-dal", Latn} + +m["zmk"] = {"Mandandanyi", 10576338, "aus-pam", Latn} + +m["zml"] = {"Madngele", 10575155, "aus-dal", Latn} + +m["zmm"] = {"Marimanindji", 10577424, "aus-dal", Latn} + +m["zmn"] = {"Mbangwe", 35928, "bnt-kel", Latn} + +m["zmo"] = {"Molo", 15974357, "sdv-eje", Latn} + +m["zmp"] = {"Mbuun", 106249400, "bnt", Latn} + +m["zmq"] = {"Mituku", 6883590, "bnt-mbe", Latn} + +m["zmr"] = {"Maranungku", 6772792, "aus-dal", Latn} + +m["zms"] = {"Mbesa", 6799676, "bnt-ske", Latn} + +m["zmt"] = {"Maringarr", 10577443, "aus-dal", Latn} + +m["zmu"] = {"Muruwari", 3915442, "aus-pam", Latn} + +m["zmv"] = {"Mbariman-Gudhinma", 3915672, "aus-pmn", Latn} + +m["zmw"] = {"Mbo (Congo)", 6799710, "bnt-nya", Latn} + +m["zmx"] = {"Bomitaba", 35063, "bnt-ngn", Latn} + +m["zmy"] = {"Mariyedi", 10577501, "aus-dal", Latn} + +m["zmz"] = {"Mbandja", 3915310, "bad", Latn} + +m["zna"] = {"Zan Gula", 863726, "alv-bua", Latn} + +m["zne"] = {"Zande", 35015, "znd", Latn} + +m["zng"] = {"Mang", 720192, "mkh-mng"} + +m["znk"] = {"Manangkari", 6746906, "aus-wdj", Latn} + +m["zns"] = {"Mangas", 3438780, "cdc-wst", Latn} + +m["zoc"] = {"Copainalá Zoque", 12954017, "nai-miz", Latn} + +m["zoh"] = {"Chimalapa Zoque", 5099289, "nai-miz", Latn} + +m["zom"] = {"Zou", 37011, "tbq-kuk"} + +m["zoo"] = {"Asunción Mixtepec Zapotec", 4811888, "omq-zap", Latn} + +m["zoq"] = {"Tabasco Zoque", 323325, "nai-miz", Latn} + +m["zor"] = {"Rayón Zoque", 12954015, "nai-miz", Latn} + +m["zos"] = {"Francisco León Zoque", 12954011, "nai-miz", Latn} + +m["zpa"] = {"Lachiguiri Zapotec", 6468403, "omq-zap", Latn} + +m["zpb"] = {"Yautepec Zapotec", 7413392, "omq-zap", Latn} + +m["zpc"] = {"Choapan Zapotec", 5103425, "omq-zap", Latn} + +m["zpd"] = {"Southeastern Ixtlán Zapotec", 8050392, "omq-zap", Latn} + +m["zpe"] = {"Petapa Zapotec", 7171675, "omq-zap", Latn} + +m["zpf"] = {"San Pedro Quiatoni Zapotec", 7271640, "omq-zpc", Latn} + +m["zpg"] = {"Guevea de Humboldt Zapotec", 13459953, "omq-zap", Latn} + +m["zph"] = {"Totomachapan Zapotec", 7828390, "omq-zap", Latn} + +m["zpi"] = {"Santa María Quiegolani Zapotec", 7271823, "omq-zpc", Latn} + +m["zpj"] = {"Quiavicuzas Zapotec", 7271642, "omq-zap", Latn} + +m["zpk"] = {"Tlacolulita Zapotec", 7810685, "omq-zpc", Latn} + +m["zpl"] = {"Lachixío Zapotec", 6468420, "omq-zap", Latn} + +m["zpm"] = {"Mixtepec Zapotec", 7414598, "omq-zpc", Latn} + +m["zpn"] = {"Santa Inés Yatzechi Zapotec", 8050300, "omq-zap", Latn} + +m["zpo"] = {"Amatlán Zapotec", 4740613, "omq-zpc", Latn} + +m["zpp"] = {"El Alto Zapotec", 5350733, "omq-zap", Latn} + +m["zpq"] = {"Zoogocho Zapotec", 8074100, "omq-zpc", Latn} + +m["zpr"] = {"Santiago Xanica Zapotec", 8042924, "omq-zap", Latn} + +m["zps"] = {"Coatlán Zapotec", 7420514, "omq-zap", Latn} + +m["zpt"] = {"San Vicente Coatlán Zapotec", 13541831, "omq-zap", Latn} + +m["zpu"] = {"Yalálag Zapotec", 8047534, "omq-zpc", Latn} + +m["zpv"] = {"Chichicapan Zapotec", 5096050, "omq-zap", Latn} + +m["zpw"] = {"Zaniza Zapotec", 8066220, "omq-zpc", Latn} + +m["zpx"] = {"San Baltazar Loxicha Zapotec", 7413390, "omq-zap", Latn} + +m["zpy"] = {"Mazaltepec Zapotec", 6798223, "omq-zap", Latn} + +m["zpz"] = {"Texmelucan Zapotec", 7708357, "omq-zpc", Latn} + +m["zra"] = {"Kaya", 5528695, "qfa-kor"} + +m["zrg"] = {"Mirgan", 6873206, "inc-eas", ancestors = {"inc-mgd"}} + +m["zrn"] = {"Zirenkel", 3441365, "cdc-est", Latn} + +m["zro"] = {"Záparo", 10206, "sai-zap", Latn} + +m["zrs"] = {"Mairasi", 3038645, "paa-mai", Latn} + +m["zsa"] = {"Sarasira", nil, "poz-ocw"} + +m["zsk"] = { -- attested? + "Kaskean", 6374586 +} + +m["zsl"] = {"Zambian Sign Language", 8065713, "sgn"} + +m["zsr"] = {"Southern Rincon Zapotec", 12954000, "omq-zpc", Latn} + +m["zsu"] = {"Sukurum", nil, "poz-ocw"} + +m["zte"] = {"Elotepec Zapotec", 5367223, "omq-zap", Latn} + +m["ztg"] = {"Xanaguía Zapotec", 8042887, "omq-zpc", Latn} + +m["ztl"] = {"Lapaguía-Guivini Zapotec", 6488084, "omq-zap", Latn} + +m["ztm"] = {"San Agustín Mixtepec Zapotec", 7413220, "omq-zap", Latn} + +m["ztn"] = {"Santa Catarina Albarradas Zapotec", 7419277, "omq-zap", Latn} + +m["ztp"] = {"Loxicha Zapotec", 6694268, "omq-zap", Latn} + +m["ztq"] = {"Quioquitani-Quierí Zapotec", 3574818, "omq-zpc", Latn} + +m["zts"] = {"Tilquiapan Zapotec", 7802959, "omq-zpc", Latn} + +m["ztt"] = {"Tejalapan Zapotec", 13510225, "omq-zap", Latn} + +m["ztu"] = {"San Pablo Güilá Zapotec", 5626813, "omq-zap", Latn} + +m["ztx"] = {"Zaachila Zapotec", 8063390, "omq-zap", Latn} + +m["zty"] = {"Yatee Zapotec", 3574815, "omq-zpc", Latn} + +m["zua"] = {"Zeem", 3450131, "cdc-wst", Latn} + +m["zuh"] = {"Tokano", 7813481, "ngf", Latn} + +m["zum"] = {"Kumzari", 36158, "ira-swi"} + +m["zun"] = {"Zuni", 10188, "qfa-iso", Latn} + +m["zuy"] = {"Zumaya", 56626, "cdc-mas", Latn} + +m["zwa"] = {"Zay", 10195, "sem-eth"} + +m["zyp"] = {"Zyphe", 57004, "tbq-kuk", Latn} + +m["zza"] = {"Zazaki", 10199, "ira-zgr", Latn, wikimedia_codes = {"diq"}} + +m["zzj"] = {"Zuojiang Zhuang", 13848149, "tai-cen", {"Latn"}, sort_key = {from = {"%p"}, to = {""}}} + +return m diff --git a/wiktra/wikt/translit/languages/datax.lua b/wiktra/wikt/translit/languages/datax.lua new file mode 100644 index 0000000..46bfe89 --- /dev/null +++ b/wiktra/wikt/translit/languages/datax.lua @@ -0,0 +1,1369 @@ +local u = mw.ustring.char + +-- UTF-8 encoded strings for some commonly-used diacritics +local GRAVE = u(0x0300) +local ACUTE = u(0x0301) +local CIRC = u(0x0302) +local TILDE = u(0x0303) +local MACRON = u(0x0304) +local BREVE = u(0x0306) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) +local DGRAVE = u(0x030F) +local INVBREVE = u(0x0311) +local DOTBELOW = u(0x0323) +local RINGBELOW = u(0x0325) +local CEDILLA = u(0x0327) + +local Latn = {"Latn"} + +local m = {} + +m["aav-khs-pro"] = {"Proto-Khasian", nil, "aav-khs", {"Latinx"}, type = "reconstructed"} + +m["aav-nic-pro"] = {"Proto-Nicobarese", nil, "aav-nic", {"Latinx"}, type = "reconstructed"} + +m["aav-pkl-pro"] = {"Proto-Pnar-Khasi-Lyngngam", nil, "aav-pkl", {"Latinx"}, type = "reconstructed"} + +m["aav-pro"] = { -- The mkh-pro will merge into this. + "Proto-Austroasiatic", + nil, + "aav", + {"Latinx"}, + type = "reconstructed" +} + +m["afa-pro"] = {"Proto-Afroasiatic", 269125, "afa", {"Latinx"}, type = "reconstructed"} + +m["alg-aga"] = {"Agawam", nil, "alg-eas", Latn} + +m["alg-pro"] = {"Proto-Algonquian", 7251834, "alg", {"Latinx"}, type = "reconstructed", sort_key = {from = {"·"}, to = {""}}} + +m["alv-ama"] = {"Amasi", 4740400, "nic-grs", Latn, entry_name = {from = {"[àáâãā]", "[èéêē]", "[ìíîī]", "[òóôõō]", "[ùúûũū]", GRAVE, ACUTE, CIRC, TILDE, MACRON}, to = {"a", "e", "i", "o", "u"}}} + +m["alv-bgu"] = {"Baïnounk Gubëeher", 17002646, "alv-bny", Latn} + +m["alv-bua-pro"] = {"Proto-Bua", nil, "alv-bua", Latn, type = "reconstructed"} + +m["alv-cng-pro"] = {"Proto-Cangin", nil, "alv-cng", Latn, type = "reconstructed"} + +m["alv-edo-pro"] = {"Proto-Edoid", nil, "alv-edo", Latn, type = "reconstructed"} + +m["alv-fli-pro"] = {"Proto-Fali", nil, "alv-fli", Latn, type = "reconstructed"} + +m["alv-gbe-pro"] = {"Proto-Gbe", nil, "alv-gbe", Latn, type = "reconstructed"} + +m["alv-gng-pro"] = {"Proto-Guang", nil, "alv-gng", Latn, type = "reconstructed"} + +m["alv-gtm-pro"] = {"Proto-Central Togo", nil, "alv-gtm", Latn, type = "reconstructed"} + +m["alv-gwa"] = {"Gwara", 16945580, "nic-pla", Latn} + +m["alv-hei-pro"] = {"Proto-Heiban", nil, "alv-hei", Latn, type = "reconstructed"} + +m["alv-igb-pro"] = {"Proto-Igboid", nil, "alv-igb", Latn, type = "reconstructed"} + +m["alv-kwa-pro"] = {"Proto-Kwa", nil, "alv-kwa", Latn, type = "reconstructed"} + +m["alv-mum-pro"] = {"Proto-Mumuye", nil, "alv-mum", Latn, type = "reconstructed"} + +m["alv-nup-pro"] = {"Proto-Nupoid", nil, "alv-nup", Latn, type = "reconstructed"} + +m["alv-pro"] = {"Proto-Atlantic-Congo", nil, "alv", Latn, type = "reconstructed"} + +m["alv-yor-pro"] = {"Proto-Yoruboid", nil, "alv-yor", Latn, type = "reconstructed"} + +m["apa-pro"] = {"Proto-Apachean", nil, "apa", {"Latinx"}, type = "reconstructed"} + +m["aql-pro"] = {"Proto-Algic", 18389588, "aql", {"Latinx"}, type = "reconstructed", sort_key = {from = {"·"}, to = {""}}} + +m["art-blk"] = {"Bolak", 2909283, "art", Latn, type = "appendix-constructed"} + +m["art-bsp"] = {"Black Speech", 686210, "art", {"Latn", "Teng"}, type = "appendix-constructed"} + +m["art-com"] = {"Communicationssprache", 35227, "art", Latn, type = "appendix-constructed"} + +m["art-dtk"] = {"Dothraki", 2914733, "art", Latn, type = "appendix-constructed"} + +m["art-elo"] = {"Eloi", nil, "art", Latn, type = "appendix-constructed"} + +m["art-gld"] = {"Goa'uld", 19823, "art", {"Latn", "Egyp", "Mero"}, type = "appendix-constructed"} + +m["art-lap"] = {"Lapine", 6488195, "art", Latn, type = "appendix-constructed"} + +m["art-man"] = {"Mandalorian", 54289, "art", Latn, type = "appendix-constructed"} + +m["art-mun"] = {"Mundolinco", 851355, "art", Latn, type = "appendix-constructed"} + +m["art-nav"] = {"Na'vi", 316939, "art", Latn, type = "appendix-constructed"} + +m["art-nox"] = {"Noxilo", nil, "art", Latn, type = "appendix-constructed"} + +m["art-top"] = {"Toki Pona", 36846, "art", Latn, type = "appendix-constructed"} + +m["art-una"] = {"Unas", nil, "art", Latn, type = "appendix-constructed"} + +m["ath-nic"] = {"Nicola", 20609, "ath-nor", Latn} + +m["ath-pro"] = {"Proto-Athabaskan", nil, "ath", {"Latinx"}, type = "reconstructed"} + +m["auf-pro"] = {"Proto-Arawa", nil, "auf", {"Latinx"}, type = "reconstructed"} + +m["aus-alu"] = {"Alungul", 16827670, "aus-pmn", Latn} + +m["aus-and"] = {"Andjingith", 4754509, "aus-pmn", Latn} + +m["aus-ang"] = {"Angkula", 16828520, "aus-pmn", Latn} + +m["aus-arn-pro"] = {"Proto-Arnhem", nil, "aus-arn", {"Latinx"}, type = "reconstructed"} + +m["aus-bra"] = {"Barranbinya", 4863220, "aus-pmn", Latn} + +m["aus-brm"] = {"Barunggam", 4865914, "aus-pmn", Latn} + +m["aus-cww-pro"] = {"Proto-Central New South Wales", nil, "aus-cww", {"Latinx"}, type = "reconstructed"} + +m["aus-dal-pro"] = {"Proto-Daly", nil, "aus-dal", {"Latinx"}, type = "reconstructed"} + +m["aus-guw"] = {"Guwar", 6652138, "aus-pam", Latn} + +m["aus-lsw"] = {"Little Swanport", 6652138, nil, Latn} + +m["aus-mbi"] = {"Mbiywom", 6799701, "aus-pmn", Latn} + +m["aus-ngk"] = {"Ngkoth", 7022405, "aus-pmn", Latn} + +m["aus-nyu-pro"] = {"Proto-Nyulnyulan", nil, "aus-nyu", {"Latinx"}, type = "reconstructed"} + +m["aus-pam-pro"] = {"Proto-Pama-Nyungan", 33942, "aus-pam", {"Latinx"}, type = "reconstructed"} + +m["aus-tul"] = {"Tulua", 16938541, "aus-pam", Latn} + +m["aus-uwi"] = {"Uwinymil", 7903995, "aus-arn", Latn} + +m["aus-wdj-pro"] = {"Proto-Iwaidjan", nil, "aus-wdj", {"Latinx"}, type = "reconstructed"} + +m["aus-won"] = {"Wong-gie", nil, "aus-pam", Latn} + +m["aus-wul"] = {"Wulguru", 8039196, "aus-dyb", Latn} + +m["aus-ynk"] = { -- contrast nny + "Yangkaal", 3913770, "aus-tnk", Latn +} + +m["awd-amc-pro"] = {"Proto-Amuesha-Chamicuro", nil, "awd", Latn, type = "reconstructed", ancestors = {"awd-pro"}} + +m["awd-kmp-pro"] = {"Proto-Kampa", nil, "awd", Latn, type = "reconstructed", ancestors = {"awd-pro"}} + +m["awd-prw-pro"] = {"Proto-Paresi-Waura", nil, "awd", Latn, type = "reconstructed", ancestors = {"awd-pro"}} + +m["awd-ama"] = {"Amarizana", 16827787, "awd", Latn} + +m["awd-ana"] = {"Anauyá", 16828252, "awd", Latn} + +m["awd-apo"] = {"Apolista", 16916645, "awd", Latn} + +m["awd-cav"] = {"Cavere", nil, "awd", Latn} + +m["awd-gnu"] = {"Guinau", 3504087, "awd", Latn} + +m["awd-kar"] = {"Cariay", 16920253, "awd", Latn} + +m["awd-kaw"] = {"Kawishana", 6379993, "awd-nwk", Latn} + +m["awd-kus"] = {"Kustenau", 5196293, "awd", Latn} + +m["awd-man"] = {"Manao", 6746920, "awd", Latn} + +m["awd-mar"] = {"Marawan", 6755108, "awd", Latn} + +m["awd-mpr"] = {"Maypure", nil, "awd", Latn} + +m["awd-mrt"] = {"Mariaté", 16910017, "awd-nwk", Latn} + +m["awd-nwk-pro"] = {"Proto-Nawiki", nil, "awd-nwk", Latn, type = "reconstructed"} + +m["awd-pai"] = {"Paikoneka", nil, "awd", Latn} + +m["awd-pas"] = {"Passé", nil, "awd-nwk", Latn} + +m["awd-pro"] = {"Proto-Arawak", nil, "awd", Latn, type = "reconstructed"} + +m["awd-she"] = {"Shebayo", 7492248, "awd", Latn} + +m["awd-taa-pro"] = {"Proto-Ta-Arawak", nil, "awd-taa", Latn, type = "reconstructed"} + +m["awd-wai"] = {"Wainumá", 16910017, "awd-nwk", Latn} + +m["awd-yum"] = {"Yumana", 8061062, "awd-nwk", Latn} + +m["azc-caz"] = {"Cazcan", 5055514, "azc", Latn} + +m["azc-cup-pro"] = {"Proto-Cupan", nil, "azc-cup", {"Latinx"}, type = "reconstructed"} + +m["azc-ktn"] = {"Kitanemuk", 3197558, "azc-tak", Latn} + +m["azc-nah-pro"] = {"Proto-Nahuan", 7251860, "azc-nah", {"Latinx"}, type = "reconstructed"} + +m["azc-num-pro"] = {"Proto-Numic", nil, "azc-num", {"Latinx"}, type = "reconstructed"} + +m["azc-pro"] = {"Proto-Uto-Aztecan", 96400333, "azc", {"Latinx"}, type = "reconstructed"} + +m["azc-tak-pro"] = {"Proto-Takic", nil, "azc-tak", {"Latinx"}, type = "reconstructed"} + +m["azc-tat"] = {"Tataviam", 743736, "azc", Latn} + +m["ber-pro"] = {"Proto-Berber", 2855698, "ber", {"Latinx"}, type = "reconstructed"} + +m["ber-fog"] = {"Fogaha", 107610173, "ber", Latn} + +m["bnt-bal"] = {"Balong", 93935237, "bnt-bbo", Latn} + +m["bnt-bon"] = {"Boma Nkuu", nil, "bnt", Latn} + +m["bnt-boy"] = {"Boma Yumu", nil, "bnt", Latn} + +m["bnt-cmw"] = {"Chimwiini", 4958328, "bnt-swh", Latn} + +m["bnt-ind"] = {"Indanga", 51412803, "bnt", Latn} + +m["bnt-lal"] = {"Lala (South Africa)", 6480154, "bnt-ngu", Latn} + +m["bnt-lwl"] = {"Lwel", 93936908, "bnt-bdz", Latn} + +m["bnt-mpi"] = {"Mpiin", 93937013, "bnt-bdz", Latn} + +m["bnt-mpu"] = { + "Mpuono", -- not to be confused with Mbuun zmp + 36056, "bnt", Latn +} + +m["bnt-ngu-pro"] = {"Proto-Nguni", 961559, "bnt-ngu", {"Latinx"}, type = "reconstructed", sort_key = {from = {"[àáâǎ]", "[èéêě]", "[ìíîǐ]", "[òóôǒ]", "[ùúûǔ]", "ḿ", "[ǹńň]", ACUTE, GRAVE, CIRC, CARON}, to = {"a", "e", "i", "o", "u", "m", "n"}}} + +m["bnt-phu"] = {"Phuthi", 33796, "bnt-ngu", Latn, entry_name = {from = {"[àá]", "[èé]", "[ìí]", "[òó]", "[ùú]", "ḿ", "[ǹń]", ACUTE, GRAVE}, to = {"a", "e", "i", "o", "u", "m", "n"}}} + +m["bnt-pro"] = {"Proto-Bantu", 3408025, "bnt", {"Latinx"}, type = "reconstructed", sort_key = {from = {"[àá]", "[èé]", "[ìí]", "[òó]", "[ùú]", "[ǹń]", "ɪ", "ʊ", ACUTE, GRAVE}, to = {"a", "e", "i2", "o", "u2", "n", "i1", "u1"}}} + +m["bnt-sbo"] = {"South Boma", nil, "bnt", Latn} + +m["bnt-sts-pro"] = {"Proto-Sotho-Tswana", nil, "bnt-sts", Latn, type = "reconstructed"} + +m["btk-pro"] = {"Proto-Batak", nil, "btk", {"Latinx"}, type = "reconstructed"} + +m["cau-abz-pro"] = {"Proto-Abkhaz-Abaza", 7251831, "cau-abz", {"Latinx"}, type = "reconstructed"} + +m["cau-ava-pro"] = {"Proto-Avaro-Andian", nil, "cau-ava", {"Latinx"}, type = "reconstructed"} + +m["cau-cir-pro"] = {"Proto-Circassian", 7251838, "cau-cir", {"Latinx"}, type = "reconstructed"} + +m["cau-drg-pro"] = {"Proto-Dargwa", nil, "cau-drg", {"Latinx"}, type = "reconstructed"} + +m["cau-lzg-pro"] = {"Proto-Lezghian", nil, "cau-lzg", {"Latinx"}, type = "reconstructed"} + +m["cau-nec-pro"] = {"Proto-Northeast Caucasian", nil, "cau-nec", {"Latinx"}, type = "reconstructed"} + +m["cau-nkh-pro"] = {"Proto-Nakh", nil, "cau-nkh", {"Latinx"}, type = "reconstructed"} + +m["cau-nwc-pro"] = {"Proto-Northwest Caucasian", 7251861, "cau-nwc", {"Latinx"}, type = "reconstructed"} + +m["cau-tsz-pro"] = {"Proto-Tsezian", nil, "cau-tsz", {"Latinx"}, type = "reconstructed"} + +m["cba-ata"] = {"Atanques", 4812783, "cba", Latn} + +m["cba-cat"] = {"Catío Chibcha", 7083619, "cba", Latn} + +m["cba-dor"] = {"Dorasque", 5297532, "cba", Latn} + +m["cba-dui"] = {"Duit", 3041061, "cba", Latn} + +m["cba-hue"] = {"Huetar", 35514, "cba", Latn} + +m["cba-nut"] = {"Nutabe", 7070405, "cba", Latn} + +m["cba-pro"] = {"Proto-Chibchan", nil, "cba", {"Latinx"}, type = "reconstructed"} + +m["ccn-pro"] = {"Proto-North Caucasian", nil, "ccn", {"Latinx"}, type = "reconstructed"} + +m["ccs-pro"] = {"Proto-Kartvelian", 2608203, "ccs", {"Latinx"}, type = "reconstructed", entry_name = {from = {"q̣", "p̣", "ʓ", "ċ"}, to = {"q̇", "ṗ", "ʒ", "c̣"}}} + +m["ccs-gzn-pro"] = {"Proto-Georgian-Zan", 23808119, "ccs-gzn", {"Latinx"}, type = "reconstructed", entry_name = {from = {"q̣", "p̣", "ʓ", "ċ"}, to = {"q̇", "ṗ", "ʒ", "c̣"}}} + +m["cdc-cbm-pro"] = {"Proto-Central Chadic", nil, "cdc-cbm", Latn, type = "reconstructed"} + +m["cdc-mas-pro"] = {"Proto-Masa", nil, "cdc-mas", Latn, type = "reconstructed"} + +m["cdc-pro"] = {"Proto-Chadic", nil, "cdc", Latn, type = "reconstructed"} + +m["cdd-pro"] = {"Proto-Caddoan", nil, "cdd", {"Latinx"}, type = "reconstructed"} + +m["cel-bry-pro"] = {"Proto-Brythonic", 156877, "cel-bry", {"Latinx", "Grek"}, sort_key = {from = {"[ββ̃]", "ð", "ė", "ɣ", "ɨ", "[ọö]", "[ʉü]", "θ"}, to = {"b¯", "d¯", "e", "g¯", "i", "o", "u", "t¯"}}} + +m["cel-gal"] = {"Gallaecian", 3094789, "cel"} + +m["cel-gau"] = {"Gaulish", 29977, "cel", {"Latn", "Grek", "Ital"}, entry_name = {remove_diacritics = MACRON .. BREVE .. DIAER}} + +m["cel-pro"] = {"Proto-Celtic", 653649, "cel", {"Latinx"}, type = "reconstructed", sort_key = {from = {"ā", "ē", "[ɸφ]", "ī", "ū", "ʷ"}, to = {"a", "e", "f", "i", "u", "¯w"}}} + +m["chi-pro"] = {"Proto-Chimakuan", nil, "chi", {"Latinx"}, type = "reconstructed"} + +m["cmc-pro"] = {"Proto-Chamic", nil, "cmc", {"Latinx"}, type = "reconstructed"} + +m["cpe-mar"] = {"Maroon Spirit Language", 1093206, "crp", Latn, ancestors = {"en"}} + +m["cpe-spp"] = {"Samoan Plantation Pidgin", 7409948, "crp", Latn, ancestors = {"en"}} + +m["crp-gep"] = {"West Greenlandic Pidgin", 17036301, "crp", Latn, ancestors = {"kl"}} + +m["crp-mpp"] = {"Macau Pidgin Portuguese", nil, "crp", {"Hani", "Latn"}, ancestors = {"pt"}} + +m["crp-rsn"] = {"Russenorsk", 505125, "crp", {"Cyrl", "Latn"}, ancestors = {"no", "ru"}} + +m["crp-tpr"] = {"Taimyr Pidgin Russian", 16930506, "crp", {"Cyrl"}, ancestors = {"ru"}} + +m["csu-bba-pro"] = {"Proto-Bongo-Bagirmi", nil, "csu-bba", {"Latinx"}, type = "reconstructed"} + +m["csu-maa-pro"] = {"Proto-Mangbetu", nil, "csu-maa", {"Latinx"}, type = "reconstructed"} + +m["csu-pro"] = {"Proto-Central Sudanic", nil, "csu", {"Latinx"}, type = "reconstructed"} + +m["csu-sar-pro"] = {"Proto-Sara", nil, "csu-sar", {"Latinx"}, type = "reconstructed"} + +m["ctp-san"] = {"San Juan Quiahije Chatino", nil, "omq-cha", {"Latinx"}} + +m["cus-ash"] = {"Ashraaf", 4805855, "cus", {"Latn"}} + +m["cus-pro"] = {"Proto-Cushitic", nil, "cus", {"Latinx"}, type = "reconstructed"} + +m["dmn-dam"] = {"Dama (Sierra Leone)", 19601574, "dmn", {"Latn"}} + +m["dra-mkn"] = {"Middle Kannada", nil, "dra", {"Knda"}, ancestors = {"dra-okn"}, translit_module = "kn-translit"} + +m["dra-okn"] = {"Old Kannada", 15723156, "dra", {"Knda"}, ancestors = {"dra-pro"}, translit_module = "kn-translit"} + +m["dra-pro"] = {"Proto-Dravidian", 1702853, "dra", {"Latinx"}, type = "reconstructed"} + +m["egx-dem"] = {"Demotic", 36765, "egx", {"Latinx", "Egyd"}, ancestors = {"egy"}, sort_key = {from = {"ṱ", "t"}, to = {"h̭", "ḫ"}}} + +m["elu-prk"] = {"Helu", 15080869, "inc-mid", {"Brah"}, ancestors = {"inc-pra"}} + +m["dmn-pro"] = {"Proto-Mande", nil, "dmn", {"Latinx"}, type = "reconstructed"} + +m["dmn-mdw-pro"] = {"Proto-Western Mande", nil, "dmn-mdw", {"Latinx"}, type = "reconstructed"} + +m["dru-pro"] = {"Proto-Rukai", nil, "map", {"Latinx"}, type = "reconstructed"} + +m["esx-esk-pro"] = {"Proto-Eskimo", 7251842, "esx-esk", {"Latinx"}, type = "reconstructed"} + +m["esx-ink"] = {"Inuktun", 1671647, "esx-inu", Latn} + +m["esx-inq"] = {"Inuinnaqtun", 28070, "esx-inu", Latn} + +m["esx-inu-pro"] = {"Proto-Inuit", nil, "esx-inu", {"Latinx"}, type = "reconstructed"} + +m["esx-pro"] = {"Proto-Eskimo-Aleut", 7251843, "esx", {"Latinx"}, type = "reconstructed"} + +m["esx-tut"] = {"Tunumiisut", 15665389, "esx-inu", Latn} + +m["euq-pro"] = {"Proto-Basque", 938011, "euq", {"Latinx"}, type = "reconstructed"} + +m["fiu-fin-pro"] = {"Proto-Finnic", 11883720, "fiu-fin", Latn, type = "reconstructed"} + +m["gem-bur"] = {"Burgundian", nil, "gme", Latn} + +m["gem-pro"] = {"Proto-Germanic", 669623, "gem", {"Latinx"}, type = "reconstructed", sort_key = {from = {"ā", "[ēê]", "ī", "[ōô]", "ū", "ą", "į", "ǫ", "ų", CIRC, MACRON}, to = {"a", "e", "i", "o", "u", "an", "in", "on", "un"}}} + +m["gme-cgo"] = {"Crimean Gothic", 36211, "gme", Latn} + +m["gmq-bot"] = {"Westrobothnian", 7989641, "gmq", Latn, ancestors = {"non"}} + +m["gmq-gut"] = {"Gutnish", 1256646, "gmq", Latn, ancestors = {"non"}} + +m["gmq-jmk"] = {"Jamtish", nil, "gmq", Latn, ancestors = {"non"}} + +m["gmq-mno"] = {"Middle Norwegian", 3417070, "gmq", Latn, ancestors = {"non"}} + +-- Used for both gmq-oda (Old Danish) and gmq-osw (Old Swedish). +-- Ensure any changes are appropriate for both languages, or copy to each +-- language's table before making any changes. +local gmq_oda_entry_name = {from = {"Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Ȳ", "ȳ", "Ǣ", "ǣ", MACRON}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "Y", "y", "Æ", "æ"}} + +m["gmq-oda"] = {"Old Danish", nil, "gmq", Latn, ancestors = {"non"}, entry_name = gmq_oda_entry_name} + +m["gmq-osw"] = {"Old Swedish", 2417210, "gmq", Latn, ancestors = {"non"}, entry_name = gmq_oda_entry_name} + +m["gmq-pro"] = {"Proto-Norse", 1671294, "gmq", {"Runr"}, translit_module = "Runr-translit"} + +m["gmq-scy"] = {"Scanian", 768017, "gmq", Latn, ancestors = {"non"}} + +m["gmw-cfr"] = {"Central Franconian", nil, "gmw", Latn, ancestors = {"gmh"}, wikimedia_codes = {"ksh"}} + +m["gmw-ecg"] = { + "East Central German", + 499344, -- subsumes Q699284, Q152965 + "gmw", + Latn, + ancestors = {"gmh"} +} + +m["gmw-gts"] = {"Gottscheerish", 533109, "gmw", Latn, ancestors = {"bar"}} + +m["gmw-jdt"] = {"Jersey Dutch", 1687911, "gmw", Latn, ancestors = {"nl"}} + +m["gmw-pro"] = {"Proto-West Germanic", 78079021, "gmw", {"Latinx"}, type = "reconstructed", sort_key = {from = {"[āą]", "ē", "[īį]", "ō", "[ūų]", "ʀ", MACRON}, to = {"a", "e", "i", "o", "u", "r"}}} + +m["gmw-rfr"] = {"Rhine Franconian", 707007, "gmw", Latn, ancestors = {"gmh"}} + +m["gmw-stm"] = {"Sathmar Swabian", 2223059, "gmw", Latn, ancestors = {"swg"}} + +m["gmw-tsx"] = {"Transylvanian Saxon", 260942, "gmw", Latn, ancestors = {"gmw-cfr"}} + +m["gmw-vog"] = {"Volga German", 312574, "gmw", Latn, ancestors = {"gmw-rfr"}} + +m["gmw-zps"] = {"Zipser German", 205548, "gmw", Latn, ancestors = {"gmh"}} + +m["grk-cal"] = {"Calabrian Greek", 1146398, "grk", Latn, ancestors = {"grc"}} + +m["grk-ita"] = {"Italiot Greek", nil, "grk", {"Latn", "Grek"}, ancestors = {"grc"}} + +m["grk-mar"] = {"Mariupol Greek", 4400023, "grk", {"Cyrl", "Latn", "Grek"}, ancestors = {"grc"}, entry_name = {from = {ACUTE}, to = {}}} + +m["grk-pro"] = {"Proto-Hellenic", 1231805, "grk", {"Latinx"}, type = "reconstructed", sort_key = {from = {"[áā]", "[éēḗ]", "[íī]", "[óōṓ]", "[úū]", "ď", "ľ", "ň", "ř", "ʰ", "ʷ", ACUTE, MACRON}, to = {"a", "e", "i", "o", "u", "d", "l", "n", "r", "¯h", "¯w"}}} + +m["hmn-pro"] = {"Proto-Hmong", nil, "hmn", {"Latinx"}, type = "reconstructed"} + +m["hmx-mie-pro"] = {"Proto-Mien", nil, "hmx-mie", {"Latinx"}, type = "reconstructed"} + +m["hmx-pro"] = {"Proto-Hmong-Mien", 7251846, "hmx", {"Latinx"}, type = "reconstructed"} + +m["hyx-pro"] = {"Proto-Armenian", 3848498, "hyx", Latn, type = "reconstructed"} + +m["iir-nur-pro"] = {"Proto-Nuristani", nil, "iir-nur", {"Latinx"}, type = "reconstructed"} + +m["iir-pro"] = {"Proto-Indo-Iranian", 966439, "iir", {"Latinx"}, type = "reconstructed"} + +m["ijo-pro"] = {"Proto-Ijoid", nil, "ijo", {"Latinx"}, type = "reconstructed"} + +m["inc-ash"] = {"Ashokan Prakrit", nil, "inc-mid", {"Brah", "Khar"}, ancestors = {"sa"}, translit_module = "translit-redirect"} + +m["inc-gup"] = {"Gurjar Apabhramsa", nil, "inc-wes", {"Deva"}, ancestors = {"psu"}} + +m["inc-kam"] = {"Kamarupi Prakrit", 6356097, "inc-mid", {"Brah", "Sidd"}, ancestors = {"inc-mgd"}} + +m["inc-kha"] = {"Khasa Prakrit", nil, "inc-nor", {"Latn"}, ancestors = {"inc-pra"}} + +m["inc-kho"] = {"Kholosi", 24952008, "inc-snd", {"Latn"}, ancestors = {"inc-vra"}} + +m["inc-mas"] = {"Middle Assamese", nil, "inc-eas", {"as-Beng"}, ancestors = {"inc-oas"}, translit_module = "inc-mas-translit"} + +m["inc-mbn"] = {"Middle Bengali", nil, "inc-eas", {"Beng"}, ancestors = {"inc-obn"}, translit_module = "inc-mbn-translit"} + +m["inc-mgd"] = {"Magadhi Prakrit", 2652214, "inc-mid", {"Brah"}, ancestors = {"inc-pra"}, translit_module = "Brah-translit"} + +m["inc-mgu"] = {"Middle Gujarati", 24907429, "inc-wes", {"Deva"}, ancestors = {"inc-ogu"}} + +m["inc-mor"] = {"Middle Oriya", nil, "inc-eas", {"Orya"}, ancestors = {"inc-oor"}} + +m["inc-oas"] = {"Early Assamese", nil, "inc-eas", {"as-Beng"}, ancestors = {"inc-kam"}, translit_module = "inc-oas-translit"} + +m["inc-obn"] = {"Old Bengali", nil, "inc-eas", {"Beng"}, ancestors = {"inc-mgd"}} + +m["inc-ogu"] = {"Old Gujarati", 24907427, "inc-wes", {"Deva"}, ancestors = {"inc-gup"}, translit_module = "sa-translit"} + +m["inc-ohi"] = {"Old Hindi", 48767781, "inc-hiw", {"Deva"}, ancestors = {"inc-sap"}, translit_module = "sa-translit"} + +m["inc-oor"] = {"Old Oriya", nil, "inc-eas", {"Orya"}, ancestors = {"inc-mgd"}} + +m["inc-opa"] = {"Old Punjabi", nil, "inc-pan", {"Guru", "pa-Arab"}, ancestors = {"inc-tak"}, translit_module = "translit-redirect", entry_name = {from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652)}, to = {}}} + +m["inc-ork"] = {"Old Kamta", nil, "inc-eas", {"as-Beng"}, ancestors = {"inc-kam"}, translit_module = "as-translit"} + +m["inc-pra"] = {"Prakrit", 192170, "inc-mid", {"Brah", "Deva", "Knda"}, ancestors = {"inc-ash"}, translit_module = "translit-redirect"} + +m["inc-pro"] = {"Proto-Indo-Aryan", 23808344, "inc", {"Latinx"}, type = "reconstructed"} + +m["inc-psc"] = {"Paisaci Prakrit", 2995607, "inc-mid", {"Brah"}, ancestors = {"inc-ash"}, translit_module = "Brah-translit"} + +m["inc-sap"] = {"Sauraseni Apabhramsa", nil, "inc-cen", {"Deva"}, ancestors = {"psu"}} + +m["inc-tak"] = {"Takka Apabhramsa", nil, "inc-pan", {"Deva"}, ancestors = {"inc-pra"}, translit_module = "sa-translit"} + +m["inc-vra"] = {"Vracada Apabhramsa", nil, "inc-snd", {"Deva"}, ancestors = {"inc-pra"}, translit_module = "sa-translit"} + +m["inc-dar-pro"] = { + "Proto-Dardic", + nil, + "inc-dar", + {"Latinx"}, + type = "reconstructed", + ancestors = {"sa"} -- to be specific "an unattested Old Indo-Aryan dialect" +} + +m["inc-cen-pro"] = {"Proto-Central Indo-Aryan", nil, "inc-cen", {"Latinx"}, type = "reconstructed", ancestors = {"psu"}} + +m["inc-dar-pro"] = {"Proto-Dardic", nil, "inc-dar", {"Latinx"}, type = "reconstructed", ancestors = {"sa"}} + +m["ine-ana-pro"] = {"Proto-Anatolian", 7251833, "ine-ana", {"Latinx"}, type = "reconstructed"} + +m["ine-bsl-pro"] = {"Proto-Balto-Slavic", 1703347, "ine-bsl", {"Latinx"}, type = "reconstructed", sort_key = {from = {"[áā]", "[éēḗ]", "[íī]", "[óōṓ]", "[úū]", ACUTE, MACRON, "ˀ"}, to = {"a", "e", "i", "o", "u"}}} + +m["ine-pae"] = { + "Paeonian", + 2705672, + "ine", + {"polytonic"}, + translit_module = "grc-translit", + sort_key = { -- Keep this synchronized with grc and others synced to it + from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇᾱᾍ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗῑ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧῡ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧᾨ]", "ῥ", "ς"}, + to = {"α", "ε", "η", "ι", "ο", "υ", "ω", "ρ", "σ"} + }, + entry_name = {from = {"[ᾸᾹ]", "[ᾰᾱ]", "[ῘῙ]", "[ῐῑ]", "[ῨῩ]", "[ῠῡ]"}, to = {"Α", "α", "Ι", "ι", "Υ", "υ"}} +} + +m["ine-pro"] = {"Proto-Indo-European", 37178, "ine", {"Latinx"}, type = "reconstructed", sort_key = {from = {"[áā]", "[éēḗ]", "[íī]", "[óōṓ]", "[úū]", "ĺ", "ḿ", "ń", "ŕ", "ǵ", "ḱ", "ʰ", "ʷ", "₁", "₂", "₃", RINGBELOW, ACUTE, MACRON}, to = {"a", "e", "i", "o", "u", "l", "m", "n", "r", "g'", "k'", "¯h", "¯w", "1", "2", "3"}}} + +m["ine-toc-pro"] = {"Proto-Tocharian", 37029, "ine-toc", {"Latinx"}, type = "reconstructed"} + +m["xme-old"] = {"Old Median", 36461, "xme", {"Grek", "Latn"}} + +m["xme-mid"] = {"Middle Median", nil, "xme", {"Latn"}, ancestors = {"xme-old"}} + +m["xme-ker"] = {"Kermanic", 129850, "xme", {"fa-Arab", "Latn"}, ancestors = {"xme-mid"}} + +m["xme-taf"] = {"Tafreshi", nil, "xme", {"fa-Arab", "Latn"}, ancestors = {"xme-mid"}} + +m["xme-ttc-pro"] = {"Proto-Tatic", nil, "xme-ttc", {"Latn"}, ancestors = {"xme-mid"}} + +m["xme-kls"] = {"Kalasuri", nil, "xme-ttc", ancestors = {"xme-ttc-nor"}} + +m["xme-klt"] = { + "Kilit", + 3612452, + "xme-ttc", + {"Cyrl"}, -- and fa-Arab? + ancestors = {"xme-ttc-pro"} +} + +m["xme-ott"] = {"Old Tati", 434697, "xme-ttc", {"fa-Arab", "Latinx"}, ancestors = {"xme-ttc-pro"}} + +m["ira-pro"] = {"Proto-Iranian", 4167865, "ira", {"Latinx"}, type = "reconstructed"} + +m["ira-mpr-pro"] = {"Proto-Medo-Parthian", nil, "ira-mpr", {"Latinx"}, type = "reconstructed"} + +m["ira-kms-pro"] = {"Proto-Komisenian", nil, "ira-kms", {"Latinx"}, type = "reconstructed"} + +m["ira-zgr-pro"] = {"Proto-Zaza-Gorani", nil, "ira-zgr", {"Latinx"}, type = "reconstructed"} + +m["ira-pat-pro"] = {"Proto-Pathan", nil, "ira-pat", {"Latinx"}, type = "reconstructed"} + +m["os-pro"] = {"Proto-Ossetic", nil, "xsc", {"Latinx"}, type = "reconstructed"} + +m["xsc-pro"] = {"Proto-Scythian", nil, "xsc", {"Latinx"}, type = "reconstructed"} + +m["xsc-skw-pro"] = {"Proto-Saka-Wakhi", nil, "xsc-skw", {"Latinx"}, type = "reconstructed", ancestors = {"xsc-pro"}} + +m["xsc-sak-pro"] = {"Proto-Saka", nil, "xsc-sak", {"Latinx"}, type = "reconstructed", ancestors = {"xsc-skw-pro"}} + +m["ira-sym-pro"] = {"Proto-Shughni-Yazghulami-Munji", nil, "ira-sym", {"Latinx"}, type = "reconstructed"} + +m["ira-sgi-pro"] = {"Proto-Sanglechi-Ishkashimi", nil, "ira-sgi", {"Latinx"}, type = "reconstructed"} + +m["ira-mny-pro"] = {"Proto-Munji-Yidgha", nil, "ira-mny", {"Latinx"}, type = "reconstructed", ancestors = {"ira-sym-pro"}} + +m["ira-shy-pro"] = {"Proto-Shughni-Yazghulami", nil, "ira-shy", {"Latinx"}, type = "reconstructed", ancestors = {"ira-sym-pro"}} + +m["ira-shr-pro"] = {"Proto-Shughni-Roshani", nil, "ira-shy", {"Latinx"}, type = "reconstructed", ancestors = {"ira-shy-pro"}} + +m["ira-sgc-pro"] = {"Proto-Sogdic", nil, "ira-sgc", {"Latinx"}, type = "reconstructed"} + +m["ira-wnj"] = {"Vanji", nil, "ira-shy", {"Latinx"}, ancestors = {"ira-shy-pro"}} + +m["iro-ere"] = {"Erie", 5388365, "iro", Latn} + +m["iro-min"] = {"Mingo", 128531, "iro", Latn} + +m["iro-pro"] = {"Proto-Iroquoian", 7251852, "iro", {"Latinx"}, type = "reconstructed"} + +m["itc-ola"] = {"Old Latin", 12289, "itc", {"Latn", "Ital"}, entry_name = {from = {"Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Ȳ", "ȳ"}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "Y", "y"}}, translit_module = "Ital-translit"} + +m["itc-pro"] = {"Proto-Italic", 17102720, "itc", {"Latinx"}, type = "reconstructed"} + +m["jpx-pro"] = {"Proto-Japonic", nil, "jpx", {"Latinx"}, type = "reconstructed"} + +m["jpx-ryu-pro"] = {"Proto-Ryukyuan", nil, "jpx-ryu", {"Latinx"}, type = "reconstructed"} + +m["kar-pro"] = {"Proto-Karen", nil, "kar", {"Latinx"}, type = "reconstructed"} + +m["khi-kho-pro"] = {"Proto-Khoe", nil, "khi-kho", Latn, type = "reconstructed"} + +m["khi-kun"] = {"ǃKung", 32904, "khi-kxa", Latn} + +m["kro-pro"] = {"Proto-Kru", nil, "kro", Latn, type = "reconstructed"} + +m["ku-pro"] = {"Proto-Kurdish", nil, "ku", {"Latinx"}, type = "reconstructed"} + +m["map-ata-pro"] = {"Proto-Atayalic", nil, "map-ata", {"Latinx"}, type = "reconstructed"} + +m["map-bms"] = {"Banyumasan", 33219, "map", Latn} + +m["map-pro"] = {"Proto-Austronesian", 49230, "map", {"Latinx"}, type = "reconstructed"} + +m["mkh-asl-pro"] = {"Proto-Aslian", 55630680, "mkh-asl", Latn, type = "reconstructed"} + +m["mkh-ban-pro"] = {"Proto-Bahnaric", nil, "mkh-ban", Latn, type = "reconstructed"} + +m["mkh-kat-pro"] = {"Proto-Katuic", nil, "mkh-kat", {"Latinx"}, type = "reconstructed"} + +m["mkh-khm-pro"] = {"Proto-Khmuic", nil, "mkh-khm", Latn, type = "reconstructed"} + +m["mkh-kmr-pro"] = {"Proto-Khmeric", 55630684, "mkh-kmr", Latn, type = "reconstructed"} + +m["mkh-mkm"] = { + "Middle Khmer", + 25226861, + "mkh-kmr", + {"Latinx", "Khmr"}, -- and also Pallava + ancestors = {"mkh-okm"} +} + +m["mkh-mmn"] = { + "Middle Mon", + nil, + "mkh-mnc", + {"Latinx", "Mymr"}, -- and also Pallava + ancestors = {"omx"} +} + +m["mkh-mnc-pro"] = {"Proto-Monic", nil, "mkh-mnc", Latn, type = "reconstructed"} + +m["mkh-mvi"] = {"Middle Vietnamese", 9199, "mkh-vie", {"Latinx", "Hani"}} + +m["mkh-okm"] = { + "Old Khmer", 9205, "mkh-kmr", {"Latinx", "Khmr"} -- and also Pallava +} + +m["mkh-pal-pro"] = {"Proto-Palaungic", nil, "mkh-pal", {"Latinx"}, type = "reconstructed"} + +m["mkh-pea-pro"] = {"Proto-Pearic", nil, "mkh-pea", {"Latinx"}, type = "reconstructed"} + +m["mkh-pkn-pro"] = {"Proto-Pakanic", nil, "mkh-pkn", Latn, type = "reconstructed"} + +m["mkh-pro"] = { -- This will be merged into 2015 aav-pro. + "Proto-Mon-Khmer", + 7251859, + "mkh", + {"Latinx"}, + type = "reconstructed" +} + +m["mkh-vie-pro"] = {"Proto-Vietic", nil, "mkh-vie", {"Latinx"}, type = "reconstructed"} + +m["mun-pro"] = {"Proto-Munda", nil, "mun", {"Latinx"}, type = "reconstructed"} + +m["myn-chl"] = { -- the stage after ''emy'' + "Ch'olti'", 873995, "myn", {"Latinx"} +} + +m["myn-pro"] = {"Proto-Mayan", 3321532, "myn", {"Latinx"}, type = "reconstructed"} + +m["nai-ala"] = {"Alazapa", nil, nil, Latn} + +m["nai-bay"] = {"Bayogoula", 1563704, nil, Latn} + +m["nai-bvy"] = {"Buena Vista Yokuts", 4985474, "nai-yok", Latn} + +m["nai-cal"] = {"Calusa", 51782, nil, Latn} + +m["nai-chi"] = {"Chiquimulilla", 25339627, "nai-xin", Latn} + +m["nai-chu-pro"] = {"Proto-Chumash", nil, "nai-chu", {"Latinx"}, type = "reconstructed"} + +m["nai-cig"] = {"Ciguayo", 20741700, nil, Latn} + +m["nai-ckn-pro"] = {"Proto-Chinookan", nil, "nai-ckn", {"Latinx"}, type = "reconstructed"} + +m["nai-dly"] = {"Delta Yokuts", nil, "nai-yok", Latn} + +m["nai-gsy"] = {"Gashowu", nil, "nai-yok", Latn} + +m["nai-guz"] = {"Guazacapán", 19572028, "nai-xin", Latn} + +m["nai-hit"] = {"Hitchiti", 1542882, "nai-mus", Latn} + +m["nai-ipa"] = {"Ipai", 3027474, "nai-yuc", Latn} + +m["nai-jtp"] = {"Jutiapa", nil, "nai-xin", Latn} + +m["nai-jum"] = {"Jumaytepeque", 25339626, "nai-xin", Latn} + +m["nai-kat"] = {"Kathlamet", 6376639, "nai-ckn", Latn} + +m["nai-klp-pro"] = {"Proto-Kalapuyan", nil, "nai-klp", type = "reconstructed"} + +m["nai-knm"] = {"Konomihu", 3198734, "nai-shs", Latn} + +m["nai-kry"] = {"Kings River Yokuts", 6413014, "nai-yok", Latn} + +m["nai-kum"] = {"Kumeyaay", 4910139, "nai-yuc", Latn} + +m["nai-mac"] = {"Macoris", 21070851, nil, Latn} + +m["nai-mdu-pro"] = {"Proto-Maidun", nil, "nai-mdu", {"Latinx"}, type = "reconstructed"} + +m["nai-miz-pro"] = {"Proto-Mixe-Zoque", nil, "nai-miz", {"Latinx"}, type = "reconstructed"} + +m["nai-nao"] = {"Naolan", 6964594, nil, Latn} + +m["nai-nrs"] = {"New River Shasta", 7011254, "nai-shs", Latn} + +m["nai-nvy"] = {"Northern Valley Yokuts", nil, "nai-yok", Latn} + +m["nai-okw"] = {"Okwanuchu", 3350126, "nai-shs", Latn} + +m["nai-per"] = {"Pericú", 3375369, nil, Latn} + +m["nai-pic"] = {"Picuris", 7191257, "nai-kta", Latn} + +m["nai-plp-pro"] = {"Proto-Plateau Penutian", nil, "nai-plp", {"Latinx"}, type = "reconstructed"} + +m["nai-ply"] = {"Palewyami", 2387391, "nai-yok", Latn} + +m["nai-pom-pro"] = {"Proto-Pomo", nil, "nai-pom", Latn, type = "reconstructed"} + +m["nai-qng"] = {"Quinigua", 36360, nil, Latn} + +m["nai-sca-pro"] = { -- NB 'sio-pro' "Proto-Siouan" which is Proto-Western Siouan + "Proto-Siouan-Catawban", + nil, + "nai-sca", + {"Latinx"}, + type = "reconstructed" +} + +m["nai-sin"] = {"Sinacantán", 24190249, "nai-xin", Latn} + +m["nai-sln"] = {"Salvadoran Lenca", 3229434, "nai-len", Latn} + +m["nai-spt"] = {"Sahaptin", 3833015, "nai-shp", Latn} + +m["nai-svy"] = {"Southern Valley Yokuts", nil, "nai-yok", Latn} + +m["nai-tap"] = {"Tapachultec", 7684401, "nai-miz", Latn} + +m["nai-taw"] = {"Tawasa", 7689233, nil, Latn} + +m["nai-teq"] = {"Tequistlatec", 2964454, "nai-tqn", Latn} + +m["nai-tip"] = {"Tipai", 3027471, "nai-yuc", Latn} + +m["nai-tky"] = {"Tule-Kaweah Yokuts", 7851988, "nai-yok", Latn} + +m["nai-tot-pro"] = {"Proto-Totozoquean", nil, "nai-tot", {"Latinx"}, type = "reconstructed"} + +m["nai-tsi-pro"] = {"Proto-Tsimshianic", nil, "nai-tsi", {"Latinx"}, type = "reconstructed"} + +m["nai-utn-pro"] = {"Proto-Utian", nil, "nai-utn", {"Latinx"}, type = "reconstructed"} + +m["nai-wai"] = {"Waikuri", 3118702, nil, Latn} + +m["nai-yav"] = {"Yavapai", 34202, "nai-yuc", Latn} + +m["nai-yup"] = {"Yupiltepeque", 25339628, "nai-xin", Latn} + +m["nds-de"] = {"German Low German", 25433, "gmw", Latn, ancestors = {"nds"}, wikimedia_codes = {"nds"}} + +m["nds-nl"] = {"Dutch Low Saxon", 516137, "gmw", Latn, ancestors = {"nds"}} + +m["ngf-pro"] = {"Proto-Trans-New Guinea", nil, "ngf", Latn, type = "reconstructed"} + +m["nic-bco-pro"] = {"Proto-Benue-Congo", nil, "nic-bco", Latn, type = "reconstructed"} + +m["nic-bod-pro"] = {"Proto-Bantoid", nil, "nic-bod", Latn, type = "reconstructed"} + +m["nic-eov-pro"] = {"Proto-Eastern Oti-Volta", nil, "nic-eov", Latn, type = "reconstructed"} + +m["nic-gns-pro"] = {"Proto-Gurunsi", nil, "nic-gns", Latn, type = "reconstructed"} + +m["nic-grf-pro"] = {"Proto-Grassfields", nil, "nic-grf", Latn, type = "reconstructed"} + +m["nic-gur-pro"] = {"Proto-Gur", nil, "nic-gur", Latn, type = "reconstructed"} + +m["nic-jkn-pro"] = {"Proto-Jukunoid", nil, "nic-jkn", Latn, type = "reconstructed"} + +m["nic-lcr-pro"] = {"Proto-Lower Cross River", nil, "nic-lcr", Latn, type = "reconstructed"} + +m["nic-ogo-pro"] = {"Proto-Ogoni", nil, "nic-ogo", Latn, type = "reconstructed"} + +m["nic-ovo-pro"] = {"Proto-Oti-Volta", nil, "nic-ovo", Latn, type = "reconstructed"} + +m["nic-plt-pro"] = {"Proto-Plateau", nil, "nic-plt", Latn, type = "reconstructed"} + +m["nic-pro"] = {"Proto-Niger-Congo", nil, "nic", Latn, type = "reconstructed"} + +m["nic-ubg-pro"] = {"Proto-Ubangian", nil, "nic-ubg", Latn, type = "reconstructed"} + +m["nic-ucr-pro"] = {"Proto-Upper Cross River", nil, "nic-ucr", Latn, type = "reconstructed"} + +m["nic-vco-pro"] = {"Proto-Volta-Congo", nil, "nic-vco", Latn, type = "reconstructed"} + +m["nub-har"] = {"Haraza", 19572059, "nub", {"Arab", "Latn"}} + +m["nub-pro"] = {"Proto-Nubian", nil, "nub", Latn, type = "reconstructed"} + +m["omq-cha-pro"] = {"Proto-Chatino", nil, "omq-cha", Latn, type = "reconstructed"} + +m["omq-maz-pro"] = {"Proto-Mazatec", nil, "omq-maz", Latn, type = "reconstructed"} + +m["omq-mix-pro"] = {"Proto-Mixtecan", nil, "omq-mix", Latn, type = "reconstructed"} + +m["omq-mxt-pro"] = {"Proto-Mixtec", nil, "omq-mxt", Latn, type = "reconstructed"} + +m["omq-otp-pro"] = {"Proto-Oto-Pamean", nil, "omq-otp", Latn, type = "reconstructed", ancestors = {"omq-pro"}} + +m["omq-pro"] = {"Proto-Oto-Manguean", 33669, "omq", Latn, type = "reconstructed"} + +m["omq-tel"] = {"Teposcolula Mixtec", nil, "omq-mxt", Latn} + +m["omq-teo"] = {"Teojomulco Chatino", 25340451, "omq-cha", Latn} + +m["omq-tri-pro"] = {"Proto-Trique", nil, "omq-tri", Latn, type = "reconstructed"} + +m["omq-zap-pro"] = {"Proto-Zapotecan", nil, "omq-zap", Latn, type = "reconstructed"} + +m["omq-zpc-pro"] = {"Proto-Zapotec", nil, "omq-zpc", Latn, type = "reconstructed"} + +m["omv-aro-pro"] = {"Proto-Aroid", nil, "omv-aro", Latn, type = "reconstructed"} + +m["omv-diz-pro"] = {"Proto-Dizoid", nil, "omv-diz", Latn, type = "reconstructed"} + +m["omv-pro"] = {"Proto-Omotic", nil, "omv", Latn, type = "reconstructed"} + +m["oto-otm-pro"] = {"Proto-Otomi", nil, "oto-otm", Latn, type = "reconstructed", ancestors = {"oto-pro"}} + +m["oto-pro"] = {"Proto-Otomian", nil, "oto", Latn, type = "reconstructed", ancestors = {"omq-otp-pro"}} + +m["paa-kom"] = {"Kómnzo", 18344310, "paa-yam", Latn} + +m["paa-kwn"] = {"Kuwani", 6449056, "paa", Latn} + +m["paa-nun"] = {"Nungon", nil, "paa", Latn} + +m["phi-din"] = {"Dinapigue Agta", 16945774, "phi", Latn} + +m["phi-kal-pro"] = {"Proto-Kalamian", nil, "phi-kal", Latn, type = "reconstructed"} + +m["phi-nag"] = {"Nagtipunan Agta", 16966111, "phi", Latn} + +m["phi-pro"] = {"Proto-Philippine", 18204898, "phi", {"Latinx"}, type = "reconstructed"} + +m["poz-abi"] = {"Abai", 19570729, "poz-san", Latn} + +m["poz-bal"] = {"Baliledo", 4850912, "poz", Latn} + +m["poz-btk-pro"] = {"Proto-Bungku-Tolaki", nil, "poz-btk", {"Latinx"}, type = "reconstructed"} + +m["poz-cet-pro"] = {"Proto-Central-Eastern Malayo-Polynesian", 2269883, "poz-cet", {"Latinx"}, type = "reconstructed"} + +m["poz-hce-pro"] = {"Proto-Halmahera-Cenderawasih", nil, "poz-hce", {"Latinx"}, type = "reconstructed"} + +m["poz-lgx-pro"] = {"Proto-Lampungic", nil, "poz-lgx", {"Latinx"}, type = "reconstructed"} + +m["poz-mcm-pro"] = {"Proto-Malayo-Chamic", nil, "poz-mcm", {"Latinx"}, type = "reconstructed"} + +m["poz-mly-pro"] = {"Proto-Malayic", nil, "poz-mly", {"Latinx"}, type = "reconstructed"} + +m["poz-msa-pro"] = {"Proto-Malayo-Sumbawan", nil, "poz-msa", {"Latinx"}, type = "reconstructed"} + +m["poz-oce-pro"] = {"Proto-Oceanic", 141741, "poz-oce", {"Latinx"}, type = "reconstructed"} + +m["poz-pep-pro"] = {"Proto-Eastern Polynesian", nil, "poz-pep", {"Latinx"}, type = "reconstructed"} + +m["poz-pnp-pro"] = {"Proto-Nuclear Polynesian", nil, "poz-pnp", {"Latinx"}, type = "reconstructed"} + +m["poz-pol-pro"] = {"Proto-Polynesian", 1658709, "poz-pol", {"Latinx"}, type = "reconstructed"} + +m["poz-pro"] = {"Proto-Malayo-Polynesian", 3832960, "poz", {"Latinx"}, type = "reconstructed"} + +m["poz-ssw-pro"] = {"Proto-South Sulawesi", nil, "poz-ssw", {"Latinx"}, type = "reconstructed"} + +m["poz-sus-pro"] = {"Proto-Sunda-Sulawesi", nil, "poz-sus", {"Latinx"}, type = "reconstructed"} + +m["poz-swa-pro"] = {"Proto-North Sarawak", nil, "poz-swa", {"Latinx"}, type = "reconstructed"} + +m["pqe-pro"] = {"Proto-Eastern Malayo-Polynesian", 2269883, "pqe", {"Latinx"}, type = "reconstructed"} + +m["pra-niy"] = {"Niya Prakrit", nil, "inc-mid", {"Khar"}, ancestors = {"inc-ash"}, translit_module = "Khar-translit"} + +m["qfa-adm-pro"] = {"Proto-Great Andamanese", nil, "qfa-adm", {"Latinx"}, type = "reconstructed"} + +m["qfa-bet-pro"] = {"Proto-Be-Tai", nil, "qfa-bet", {"Latinx"}, type = "reconstructed"} + +m["qfa-cka-pro"] = {"Proto-Chukotko-Kamchatkan", 7251837, "qfa-cka", {"Latinx"}, type = "reconstructed"} + +m["qfa-hur-pro"] = {"Proto-Hurro-Urartian", nil, "qfa-hur", {"Latinx"}, type = "reconstructed"} + +m["qfa-kad-pro"] = {"Proto-Kadu", nil, "qfa-kad", Latn, type = "reconstructed"} + +m["qfa-kms-pro"] = {"Proto-Kam-Sui", nil, "qfa-kms", Latn, type = "reconstructed"} + +m["qfa-kor-pro"] = {"Proto-Korean", 467883, "qfa-kor", {"Latinx"}, type = "reconstructed"} + +m["qfa-kra-pro"] = {"Proto-Kra", 7251854, "qfa-kra", {"Latinx"}, type = "reconstructed"} + +m["qfa-lic-pro"] = {"Proto-Hlai", 7251845, "qfa-lic", {"Latinx"}, type = "reconstructed"} + +m["qfa-onb-pro"] = {"Proto-Be", nil, "qfa-onb", {"Latinx"}, type = "reconstructed"} + +m["qfa-ong-pro"] = {"Proto-Ongan", nil, "qfa-ong", {"Latinx"}, type = "reconstructed"} + +m["qfa-tak-pro"] = {"Proto-Kra-Dai", nil, "qfa-tak", Latn, type = "reconstructed"} + +m["qfa-yen-pro"] = {"Proto-Yeniseian", 27639, "qfa-yen", {"Latinx"}, type = "reconstructed"} + +m["qfa-yuk-pro"] = {"Proto-Yukaghir", nil, "qfa-yuk", {"Latinx"}, type = "reconstructed"} + +m["qwe-kch"] = {"Kichwa", 1740805, "qwe", Latn, ancestors = {"qu"}} + +m["roa-ang"] = {"Angevin", 56782, "roa-oil", Latn, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ", "œ", "'"}, to = {"a", "e", "i", "o", "u", "y", "c", "ae", "oe"}}} + +m["roa-bbn"] = {"Bourbonnais-Berrichon", nil, "roa-oil", Latn, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ", "œ", "'"}, to = {"a", "e", "i", "o", "u", "y", "c", "ae", "oe"}}} + +m["roa-brg"] = {"Bourguignon", 508332, "roa-oil", Latn, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"}, to = {"a", "e", "i", "o", "u", "y", "c"}}} + +m["roa-cha"] = {"Champenois", 430018, "roa-oil", Latn, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ", "œ", "'"}, to = {"a", "e", "i", "o", "u", "y", "c", "ae", "oe"}}} + +m["roa-fcm"] = {"Franc-Comtois", 510561, "roa-oil", Latn, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ", "œ", "'"}, to = {"a", "e", "i", "o", "u", "y", "c", "ae", "oe"}}} + +m["roa-gal"] = {"Gallo", 37300, "roa-oil", Latn, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"}, to = {"a", "e", "i", "o", "u", "y", "c"}}} + +m["roa-leo"] = {"Leonese", 34108, "roa-ibe", Latn, ancestors = {"roa-ole"}} + +m["roa-lor"] = {"Lorrain", 671198, "roa-oil", Latn, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ", "œ", "'"}, to = {"a", "e", "i", "o", "u", "y", "c", "ae", "oe"}}} + +m["roa-oan"] = {"Navarro-Aragonese", 2736184, "roa-ibe", Latn} + +m["roa-oca"] = {"Old Catalan", 15478520, "roa", Latn, ancestors = {"pro"}, sort_key = {from = {"à", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "·"}, to = {"a", "e", "i", "o", "u", "c"}}} + +m["roa-ole"] = {"Old Leonese", nil, "roa-ibe", Latn} + +m["roa-opt"] = {"Old Portuguese", 1072111, "roa-ibe", Latn} + +m["roa-orl"] = {"Orléanais", nil, "roa-oil", Latn, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ", "œ", "'"}, to = {"a", "e", "i", "o", "u", "y", "c", "ae", "oe"}}} + +m["roa-poi"] = {"Poitevin-Saintongeais", 514123, "roa-oil", Latn, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ", "œ", "'"}, to = {"a", "e", "i", "o", "u", "y", "c", "ae", "oe"}}} + +m["roa-tar"] = {"Tarantino", 695526, "roa-itd", Latn, ancestors = {"nap"}, wikimedia_codes = {"roa-tara"}} + +m["roa-tou"] = {"Tourangeau", nil, "roa-oil", Latn, sort_key = {from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ", "œ", "'"}, to = {"a", "e", "i", "o", "u", "y", "c", "ae", "oe"}}} + +m["sai-ajg"] = {"Ajagua", nil, nil, Latn} + +m["sai-all"] = {"Allentiac", 19570789, "sai-hrp", Latn} + +m["sai-and"] = { -- not to be confused with 'cbc' or 'ano' + "Andoquero", 16828359, "sai-wit", Latn +} + +m["sai-ayo"] = {"Ayomán", 16937754, "sai-jir", Latn} + +m["sai-bae"] = {"Baenan", 3401998, nil, Latn} + +m["sai-bag"] = {"Bagua", 5390321, nil, Latn} + +m["sai-bet"] = {"Betoi", 926551, "qfa-iso", Latn} + +m["sai-bor-pro"] = {"Proto-Boran", nil, "sai-bor", Latn} + +m["sai-cac"] = {"Cacán", 945482, nil, Latn} + +m["sai-caq"] = {"Caranqui", 2937753, "sai-bar", Latn} + +m["sai-cat"] = {"Catacao", 5051136, "sai-ctc", {"Latinx"}} + +m["sai-cer-pro"] = {"Proto-Cerrado", nil, "sai-cer", Latn, type = "reconstructed"} + +m["sai-chi"] = {"Chirino", 5390321, nil, Latn} + +m["sai-chn"] = {"Chaná", 5072718, "sai-crn", Latn} + +m["sai-chp"] = {"Chapacura", 5072884, "sai-cpc", Latn} + +m["sai-chr"] = {"Charrua", 5086680, "sai-crn", Latn} + +m["sai-chu"] = {"Churuya", 5118339, "sai-guh", Latn} + +m["sai-cje-pro"] = {"Proto-Central Jê", nil, "sai-cje", Latn, type = "reconstructed"} + +m["sai-cmg"] = {"Comechingon", 6644203, nil, Latn} + +m["sai-cno"] = {"Chono", 5104704, nil, Latn} + +m["sai-cnr"] = {"Cañari", 5055572, nil, Latn} + +m["sai-coe"] = {"Coeruna", 6425639, "sai-wit", Latn} + +m["sai-col"] = {"Colán", 5141893, "sai-ctc", {"Latinx"}} + +m["sai-cop"] = {"Copallén", 5390321, nil, Latn} + +m["sai-crd"] = {"Coroado Puri", 24191321, "sai-mje", Latn} + +m["sai-ctq"] = {"Catuquinaru", 16858455, nil, Latn} + +m["sai-cul"] = {"Culli", 2879660, nil, Latn} + +m["sai-cva"] = {"Cueva", nil, nil, Latn} + +m["sai-esm"] = {"Esmeralda", 3058083, nil, Latn} + +m["sai-ewa"] = {"Ewarhuyana", 16898104, nil, Latn} + +m["sai-gam"] = {"Gamela", 5403661, nil, Latn} + +m["sai-gay"] = {"Gayón", 5528902, "sai-jir", Latn} + +m["sai-gmo"] = {"Guamo", 5613495, nil, Latn} + +m["sai-gue"] = {"Güenoa", 5626799, "sai-crn", Latn} + +m["sai-hau"] = {"Haush", 3128376, "sai-cho", Latn} + +m["sai-hoc-pro"] = {"Proto-Huitoto-Ocaina", nil, "sai-hoc", Latn, type = "reconstructed"} + +m["sai-jee-pro"] = {"Proto-Jê", nil, "sai-jee", Latn, type = "reconstructed"} + +m["sai-jko"] = {"Jeikó", 6176527, "sai-mje", Latn} + +m["sai-jrj"] = {"Jirajara", 6202966, "sai-jir", Latn} + +m["sai-kat"] = { -- contrast xoo, kzw, sai-xoc + "Katembri", 6375925, nil, Latn +} + +m["sai-mal"] = {"Malalí", 6741212, nil, Latn} + +m["sai-mar"] = {"Maratino", 6755055, nil, Latn} + +m["sai-mat"] = {"Matanawi", 6786047, nil, Latn} + +m["sai-mcn"] = {"Mocana", 3402048, nil, Latn} + +m["sai-men"] = {"Menien", 16890110, "sai-mje", Latn} + +m["sai-mil"] = {"Millcayac", 19573012, "sai-hrp", Latn} + +m["sai-mlb"] = {"Malibu", 3402048, nil, Latn} + +m["sai-msk"] = {"Masakará", 6782426, "sai-mje", Latn} + +m["sai-muc"] = {"Mucuchí", nil, nil, Latn} + +m["sai-mue"] = {"Muellama", 16886936, "sai-bar", Latn} + +m["sai-muz"] = {"Muzo", 6644203, nil, Latn} + +m["sai-mys"] = {"Maynas", 16919393, nil, Latn} + +m["sai-nat"] = {"Natú", 9006749, nil, Latn} + +m["sai-nje-pro"] = {"Proto-Northern Jê", nil, "sai-nje", Latn, type = "reconstructed"} + +m["sai-opo"] = {"Opón", 7099152, "sai-car", Latn} + +m["sai-oto"] = {"Otomaco", 16879234, "sai-otm", Latn} + +m["sai-pal"] = {"Palta", 3042978, nil, Latn} + +m["sai-pam"] = {"Pamigua", 5908689, "sai-otm", Latn} + +m["sai-par"] = {"Paratió", 16890038, nil, Latn} + +m["sai-pnz"] = {"Panzaleo", 3123275, nil, Latn} + +m["sai-prh"] = {"Puruhá", 3410994, nil, Latn} + +m["sai-ptg"] = {"Patagón", nil, nil, Latn} + +m["sai-pur"] = {"Purukotó", 7261622, "sai-car", Latn} + +m["sai-pyg"] = {"Payaguá", 7156643, "sai-guc", Latn} + +m["sai-pyk"] = {"Pykobjê", 98113977, "sai-nje", Latn} + +m["sai-qmb"] = {"Quimbaya", 7272043, nil, Latn} + +m["sai-qtm"] = {"Quitemo", 7272651, "sai-cpc", Latn} + +m["sai-rab"] = {"Rabona", 6644203, nil, Latn} + +m["sai-ram"] = {"Ramanos", 16902824, nil, Latn} + +m["sai-sac"] = {"Sácata", 5390321, nil, Latn} + +m["sai-san"] = {"Sanaviron", 16895999, nil, Latn} + +m["sai-sap"] = {"Sapará", 7420922, "sai-car", Latn} + +m["sai-sec"] = {"Sechura", 7442912, nil, Latn} + +m["sai-sin"] = {"Sinúfana", 7525275, nil, Latn} + +m["sai-sje-pro"] = {"Proto-Southern Jê", nil, "sai-sje", Latn, type = "reconstructed"} + +m["sai-tab"] = {"Tabancale", 5390321, nil, Latn} + +m["sai-tal"] = {"Tallán", 16910468, nil, Latn} + +m["sai-tap"] = {"Tapayuna", nil, "sai-nje", Latn} + +m["sai-teu"] = {"Teushen", 3519243, nil, Latn} + +m["sai-tim"] = {"Timote", nil, nil, Latn} + +m["sai-tpr"] = {"Taparita", 7684460, "sai-otm", Latn} + +m["sai-trr"] = {"Tarairiú", 7685313, nil, Latn} + +m["sai-wai"] = {"Waitaká", 16918610, nil, Latn} + +m["sai-way"] = {"Wayumará", nil, "sai-car", Latn} + +m["sai-wit-pro"] = {"Proto-Witotoan", nil, "sai-wit", Latn, type = "reconstructed"} + +m["sai-wnm"] = {"Wanham", 16879440, "sai-cpc", Latn} + +m["sai-xoc"] = { -- contrast xoo, kzw, sai-kat + "Xocó", 12953620, nil, Latn +} + +m["sai-yao"] = {"Yao (South America)", nil, "sai-car", Latn} + +m["sai-yar"] = { -- not the same family as 'suy' + "Yarumá", 3505859, "sai-car", Latn +} + +m["sai-yri"] = {"Yuri", nil, "sai-tyu", Latn} + +m["sai-yup"] = {"Yupua", 8061430, "sai-tuc", Latn} + +m["sai-yur"] = {"Yurumanguí", 1281291, nil, Latn} + +m["sal-pro"] = {"Proto-Salish", nil, "sal", Latn, type = "reconstructed"} + +m["sdv-daj-pro"] = {"Proto-Daju", nil, "sdv-daj", Latn, type = "reconstructed"} + +m["sdv-eje-pro"] = {"Proto-Eastern Jebel", nil, "sdv-eje", Latn, type = "reconstructed"} + +m["sdv-nil-pro"] = {"Proto-Nilotic", nil, "sdv-nil", Latn, type = "reconstructed"} + +m["sdv-nyi-pro"] = {"Proto-Nyima", nil, "sdv-nyi", Latn, type = "reconstructed"} + +m["sdv-tmn-pro"] = {"Proto-Taman", nil, "sdv-tmn", Latn, type = "reconstructed"} + +m["sem-amm"] = {"Ammonite", 279181, "sem-can", {"Phnx"}, translit_module = "Phnx-translit"} + +m["sem-amo"] = {"Amorite", 35941, "sem-nwe", {"Xsux", "Latn"}} + +m["sem-cha"] = {"Chaha", nil, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit", ancestors = {"sem-pro"}} + +m["sem-dad"] = {"Dadanitic", 21838040, "sem-cen", {"Narb"}, translit_module = "Narb-translit"} + +m["sem-dum"] = {"Dumaitic", nil, "sem-cen", {"Narb"}, translit_module = "Narb-translit"} + +m["sem-has"] = {"Hasaitic", 3541433, "sem-cen", {"Narb"}, translit_module = "Narb-translit"} + +m["sem-him"] = {"Himyaritic", 35604, "sem", {"Arab", "Sarb"}} + +m["sem-his"] = {"Hismaic", 22948260, "sem-cen", {"Narb"}, translit_module = "Narb-translit"} + +m["sem-mhr"] = {"Muher", 33743, "sem-eth", Latn} + +m["sem-pro"] = {"Proto-Semitic", 1658554, "sem", {"Latinx"}, type = "reconstructed"} + +m["sem-saf"] = {"Safaitic", 472586, "sem-cen", {"Narb"}, translit_module = "Narb-translit"} + +m["sem-srb"] = {"Old South Arabian", 35025, "sem-osa", {"Sarb"}, translit_module = "Sarb-translit"} + +m["sem-tay"] = {"Taymanitic", 24912301, "sem-cen", {"Narb"}, translit_module = "Narb-translit"} + +m["sem-tha"] = {"Thamudic", 843030, "sem-cen", {"Narb"}, translit_module = "Narb-translit"} + +m["sem-wes-pro"] = {"Proto-West Semitic", 98021726, "sem-wes", {"Latinx"}, type = "reconstructed"} + +m["sio-pro"] = { -- NB this is not Proto-Siouan-Catawban 'nai-sca-pro' + "Proto-Siouan", + 34181, + "sio", + {"Latinx"}, + type = "reconstructed" +} + +m["sit-bok"] = {"Bokar", 4938727, "sit-tan", Latn} + +m["sit-cha"] = {"Chairel", 5068066, "sit-luu", Latn} + +m["sit-gkh"] = {"Gokhy", 5578069, "tbq-lol", Latn} + +m["sit-hrs-pro"] = {"Proto-Hrusish", nil, "sit-hrs", type = "reconstructed"} + +m["sit-jap"] = {"Japhug", 3162245, "sit-rgy", Latn} + +m["sit-kha-pro"] = {"Proto-Kham", nil, "sit-kha", type = "reconstructed"} + +m["sit-liz"] = { + "Lizu", 6660653, "sit-qia", Latn -- and Ersu Shaba +} + +m["sit-luu-pro"] = {"Proto-Luish", nil, "sit-luu", type = "reconstructed"} + +m["sit-mor"] = {"Moran", 6909216, "tbq-bdg", Latn} + +m["sit-prn"] = {"Puiron", 7259048, "sit-zem"} + +m["sit-pro"] = {"Proto-Sino-Tibetan", 45961, "sit", Latn, type = "reconstructed"} + +m["sit-sit"] = {"Situ", 19840830, "sit-rgy", Latn} + +m["sit-tan-pro"] = { + "Proto-Tani", + nil, + "sit-tan", + Latn, -- needs verification + type = "reconstructed" +} + +m["sit-tgm"] = {"Tangam", 17041370, "sit-tan", Latn} + +m["sit-tos"] = { + "Tosu", 7827899, "sit-qia", Latn -- also Ersu Shaba +} + +m["sit-tsh"] = {"Tshobdun", 19840950, "sit-rgy", Latn} + +m["sit-zbu"] = {"Zbu", 19841106, "sit-rgy", Latn} + +m["sla-pro"] = {"Proto-Slavic", 747537, "sla", {"Latinx"}, type = "reconstructed", entry_name = {from = {"[ÀÁÃĀȀȂ]", "[àáãāȁȃ]", "[ÈÉẼĒȄȆ]", "[èéẽēȅȇ]", "[ÌÍĨĪȈȊ]", "[ìíĩīȉȋ]", "[ÒÓÕŌȌȎ]", "[òóõōȍȏ]", "[ÙÚŨŪȔȖ]", "[ùúũūȕȗ]", "[ỲÝỸȲ]", "[ỳýỹȳ]", "[Ǭ]", "[ǭ]", GRAVE, ACUTE, TILDE, MACRON, DGRAVE, INVBREVE}, to = {"A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "Y", "y", "Ǫ", "ǫ"}}, sort_key = {from = {"č", "ď", "ě", "ę", "ь", "ľ", "ň", "ǫ", "ř", "š", "ś", "ť", "ъ", "ž"}, to = {"c²", "d²", "e²", "e³", "i²", "l²", "nj", "o²", "r²", "s²", "s³", "t²", "u²", "z²"}}} + +m["smi-pro"] = {"Proto-Samic", 7251862, "smi", Latn, type = "reconstructed", sort_key = {from = {"ā", "č", "δ", "[ëē]", "ŋ", "ń", "ō", "š", "θ", "%([^()]+%)"}, to = {"a", "c²", "d", "e", "n²", "n³", "o", "s²", "t²"}}} + +m["son-pro"] = {"Proto-Songhay", nil, "son", {"Latinx"}, type = "reconstructed"} + +m["sqj-pro"] = {"Proto-Albanian", 18210846, "sqj", {"Latinx"}, type = "reconstructed"} + +m["ssa-klk-pro"] = {"Proto-Kuliak", nil, "ssa-klk", {"Latinx"}, type = "reconstructed"} + +m["ssa-kom-pro"] = {"Proto-Koman", nil, "ssa-kom", {"Latinx"}, type = "reconstructed"} + +m["ssa-pro"] = {"Proto-Nilo-Saharan", nil, "ssa", {"Latinx"}, type = "reconstructed"} + +m["syd-fne"] = {"Forest Nenets", 1295107, "syd", {"Cyrl"}, entry_name = {from = {"Ӑ", "ӑ", "[ӖЀ]", "[ӗѐ]", "[ӢЍ]", "[ӣѝ]", "Ӯ", "ӯ", BREVE, MACRON, GRAVE, ACUTE, DOTABOVE}, to = {"А", "а", "Е", "е", "И", "и", "У", "у"}}} + +m["syd-pro"] = {"Proto-Samoyedic", 7251863, "syd", Latn, type = "reconstructed"} + +m["tai-pro"] = {"Proto-Tai", 6583709, "tai", {"Latinx"}, type = "reconstructed"} + +m["tai-swe-pro"] = {"Proto-Southwestern Tai", nil, "tai-swe", {"Latinx"}, type = "reconstructed"} + +m["tbq-bdg-pro"] = {"Proto-Bodo-Garo", nil, "tbq-bdg", {"Latinx"}, type = "reconstructed"} + +m["tbq-kuk-pro"] = {"Proto-Kuki-Chin", nil, "tbq-kuk", {"Latinx"}, type = "reconstructed"} + +m["tbq-lal-pro"] = {"Proto-Lalo", nil, "tbq-lol", {"Latinx"}, type = "reconstructed"} + +m["tbq-laz"] = {"Laze", 17007626, "sit-nax"} + +m["tbq-lob-pro"] = {"Proto-Lolo-Burmese", nil, "tbq-lob", {"Latinx"}, type = "reconstructed"} + +m["tbq-lol-pro"] = {"Proto-Loloish", 7251855, "tbq-lol", {"Latinx"}, type = "reconstructed"} + +m["tbq-plg"] = {"Pai-lang", 2879843, "tbq-lob", {"Hani", "Latinx"}} + +-- tbq-pro is now etymology-only + +m["trk-dkh"] = {"Dukhan", nil, "trk-sib", {"Latn", "Cyrl", "Mong"}} + +m["trk-oat"] = {"Old Anatolian Turkish", 7083390, "trk-ogz", {"ota-Arab"}, ancestors = {"trk-ogz-pro"}} + +m["trk-pro"] = {"Proto-Turkic", 3657773, "trk", {"Latinx"}, type = "reconstructed"} + +m["tup-gua-pro"] = {"Proto-Tupi-Guarani", nil, "tup-gua", {"Latinx"}, type = "reconstructed"} + +m["tup-kab"] = {"Kabishiana", 15302988, "tup", Latn} + +m["tup-pro"] = {"Proto-Tupian", 10354700, "tup", {"Latinx"}, type = "reconstructed"} + +m["tuw-pro"] = {"Proto-Tungusic", nil, "tuw", {"Latinx"}, type = "reconstructed"} + +m["tuw-sol"] = {"Solon", 30004, "tuw"} + +m["und-isa"] = { + "Isaurian", 16956868, nil + -- {"Xsux", "Hluw", "Latn"}, +} + +m["und-kas"] = {"Kassite", 35612, nil, {"Xsux"}} + +m["und-mil"] = {"Milang", 6850761, nil, {"Deva", "Latn"}} + +m["und-mmd"] = {"Mimi of Decorse", 6862206, nil, Latn} + +m["und-mmn"] = {"Mimi of Nachtigal", 6862207, nil, Latn} + +m["und-phi"] = {"Philistine", 2230924} + +m["und-wji"] = {"Western Jicaque", 3178610, "hok", Latn} + +m["urj-mdv-pro"] = {"Proto-Mordvinic", nil, "urj-mdv", Latn, type = "reconstructed"} + +m["urj-prm-pro"] = {"Proto-Permic", nil, "urj-prm", Latn, type = "reconstructed"} + +m["urj-pro"] = {"Proto-Uralic", 288765, "urj", {"Latinx"}, type = "reconstructed"} + +m["urj-ugr-pro"] = {"Proto-Ugric", 156631, "urj-ugr", {"Latinx"}, type = "reconstructed"} + +m["xnd-pro"] = {"Proto-Na-Dene", nil, "xnd", {"Latinx"}, type = "reconstructed"} + +m["xgn-kha"] = {"Khamnigan Mongol", 3196052, "xgn", {"Mong", "Latn", "Cyrl"}} + +m["xgn-mgr"] = { + "Mangghuer", 34214, "xgn", Latn -- also "Mong", "Cyrl" ? +} + +m["xgn-mgl"] = { + "Mongghul", 34214, "xgn", Latn -- also "Mong", "Cyrl" ? +} + +m["xgn-pro"] = {"Proto-Mongolic", 2493677, "xgn", {"Latinx"}, type = "reconstructed"} + +m["ypk-pro"] = {"Proto-Yupik", nil, "ypk", {"Latinx"}, type = "reconstructed"} + +m["zhx-min-pro"] = {"Proto-Min", 19646347, "zhx-min", {"Latinx"}, type = "reconstructed"} + +m["zhx-sht"] = {"Shaozhou Tuhua", 1920769, "zhx", {"Nshu"}} + +m["zhx-tai"] = {"Taishanese", 2208940, "zhx", {"Hani"}, ancestors = {"yue"}} + +m["zhx-teo"] = {"Teochew", 36759, "zhx-min-hai", {"Hani"}, ancestors = {"nan"}} + +m["zle-ono"] = { + "Old Novgorodian", + 162013, + "zle", + {"Cyrs", "Glag"}, + translit_module = "Cyrs-Glag-translit", + entry_name = { + from = {u(0x0484)}, -- kamora + to = {} + }, + sort_key = {from = {"оу"}, to = {"у"}} +} + +m["zlw-ocs"] = {"Old Czech", 593096, "zlw", Latn} + +m["zlw-opl"] = {"Old Polish", 149838, "zlw-lch", Latn} + +m["zlw-pom"] = {"Pomeranian", 149588, "zlw-lch", Latn} + +m["zlw-slv"] = {"Slovincian", 36822, "zlw-lch", Latn, ancestors = {"zlw-pom"}} + +return m diff --git a/wiktra/wikt/translit/lbe-translit.lua b/wiktra/wikt/translit/lbe-translit.lua new file mode 100644 index 0000000..84b48a2 --- /dev/null +++ b/wiktra/wikt/translit/lbe-translit.lua @@ -0,0 +1,26 @@ +local export = {} + +local tt = {["б"] = "b", ["п"] = "p", ["ф"] = "f", ["в"] = "w", ["м"] = "m", ["д"] = "d", ["т"] = "t", ["й"] = "y", ["н"] = "n", ["з"] = "z", ["ц"] = "c", ["с"] = "s", ["ж"] = "ž", ["ш"] = "š", ["щ"] = "š:", ["л"] = "l", ["ч"] = "č", ["р"] = "r", ["г"] = "g", ["к"] = "k", ["х"] = "x", ["ъ"] = "ʼ", ["а"] = "a", ["е"] = "e", ["и"] = "i", ["о"] = "o", ["у"] = "u", ["э"] = "e", ["ю"] = "uˤ", ["я"] = "aˤ", ["Б"] = "B", ["П"] = "P", ["Ф"] = "F", ["В"] = "W", ["М"] = "M", ["Д"] = "D", ["Т"] = "T", ["Й"] = "Y", ["Н"] = "N", ["З"] = "Z", ["Ц"] = "C", ["С"] = "S", ["Ж"] = "Ž", ["Ш"] = "Š", ["Щ"] = "Š:", ["Л"] = "L", ["Ч"] = "Č", ["Р"] = "R", ["Г"] = "G", ["К"] = "K", ["Х"] = "X", ["Ъ"] = "ʼ", ["А"] = "A", ["Е"] = "E", ["И"] = "I", ["О"] = "O", ["У"] = "U", ["Э"] = "E", ["Ю"] = "Uˤ", ["Я"] = "Aˤ"}; + +local tetragraphs = {["хьхь"] = "x̂:", ["Хьхь"] = "X̂:"} + +local digraphs = {["пп"] = "p:", ["пӏ"] = "pʼ", ["тт"] = "t:", ["аь"] = "ä", ["оь"] = "oˤ", ["Пп"] = "P:", ["Пӏ"] = "Pʼ", ["Тт"] = "T:", ["Аь"] = "Ä", ["Оь"] = "Oˤ", ["цӏ"] = "cʼ", ["цц"] = "c:", ["тӏ"] = "tʼ", ["сс"] = "s:", ["чч"] = "č:", ["чӏ"] = "čʼ", ["кь"] = "qʼ", ["кк"] = "k:", ["кӏ"] = "kʼ", ["хь"] = "x̂", ["хъ"] = "q", ["къ"] = "q:", ["гъ"] = "ğ", ["хх"] = "x:", ["хӏ"] = "ħ", ["гь"] = "h", ["Цӏ"] = "Cʼ", ["Цц"] = "C:", ["Тӏ"] = "Tʼ", ["Сс"] = "S:", ["Чч"] = "Č:", ["Чӏ"] = "Čʼ", ["Кь"] = "Qʼ", ["Кк"] = "K:", ["Кӏ"] = "Kʼ", ["Хь"] = "X̂", ["Хъ"] = "Q", ["Къ"] = "Q:", ["Гъ"] = "Ğ", ["Хх"] = "X:", ["Хӏ"] = "Ħ", ["Гь"] = "H"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" + + -- Convert capital to lowercase palochka. Lowercase is found in tables + -- above. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for tetragraph, replacement in pairs(tetragraphs) do text = str_gsub(text, tetragraph, replacement) end + + for digraph, replacement in pairs(digraphs) do text = str_gsub(text, digraph, replacement) end + + text = str_gsub(text, UTF8_char, tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/lep-translit.lua b/wiktra/wikt/translit/lep-translit.lua new file mode 100644 index 0000000..24ad705 --- /dev/null +++ b/wiktra/wikt/translit/lep-translit.lua @@ -0,0 +1,113 @@ +local export = {} + +local conv = { + [""] = "", + ["\225\176\128"] = "k", -- U+1C00 + ["\225\176\129"] = "kl", -- U+1C01 + ["\225\176\130"] = "kh", -- U+1C02 + ["\225\176\131"] = "g", -- U+1C03 + ["\225\176\132"] = "gl", -- U+1C04 + ["\225\176\133"] = "ng", -- U+1C05 + ["\225\176\134"] = "c", -- U+1C06 + ["\225\176\135"] = "ch", -- U+1C07 + ["\225\176\136"] = "j", -- U+1C08 + ["\225\176\137"] = "ny", -- U+1C09 + ["\225\176\138"] = "t", -- U+1C0A + ["\225\176\139"] = "th", -- U+1C0B + ["\225\176\140"] = "d", -- U+1C0C + ["\225\176\141"] = "n", -- U+1C0D + ["\225\176\142"] = "p", -- U+1C0E + ["\225\176\143"] = "pl", -- U+1C0F + ["\225\176\144"] = "ph", -- U+1C10 + ["\225\176\145"] = "f", -- U+1C11 + ["\225\176\146"] = "fl", -- U+1C12 + ["\225\176\147"] = "b", -- U+1C13 + ["\225\176\148"] = "bl", -- U+1C14 + ["\225\176\149"] = "m", -- U+1C15 + ["\225\176\150"] = "ml", -- U+1C16 + ["\225\176\151"] = "ts", -- U+1C17 + ["\225\176\152"] = "tsh", -- U+1C18 + ["\225\176\153"] = "dz", -- U+1C19 + ["\225\176\154"] = "y", -- U+1C1A + ["\225\176\155"] = "r", -- U+1C1B + ["\225\176\156"] = "l", -- U+1C1C + ["\225\176\157"] = "h", -- U+1C1D + ["\225\176\158"] = "hl", -- U+1C1E + ["\225\176\159"] = "v", -- U+1C1F + ["\225\176\160"] = "s", -- U+1C20 + ["\225\176\161"] = "sh", -- U+1C21 + ["\225\176\162"] = "w", -- U+1C22 + ["\225\176\163"] = "", -- U+1C23 + ["\225\176\164"] = "y", -- U+1C24 + ["\225\176\165"] = "r", -- U+1C25 + ["\225\176\166"] = "á", -- U+1C26 + ["\225\176\167"] = "i", -- U+1C27 + ["\225\176\168"] = "o", -- U+1C28 + ["\225\176\169"] = "ó", -- U+1C29 + ["\225\176\170"] = "u", -- U+1C2A + ["\225\176\171"] = "ú", -- U+1C2B + ["\225\176\172"] = "e", -- U+1C2C + ["\225\176\173"] = "k", -- U+1C2D + ["\225\176\174"] = "m", -- U+1C2E + ["\225\176\175"] = "l", -- U+1C2F + ["\225\176\176"] = "n", -- U+1C30 + ["\225\176\177"] = "p", -- U+1C31 + ["\225\176\178"] = "r", -- U+1C32 + ["\225\176\179"] = "t", -- U+1C33 + ["\225\176\180"] = "ng", -- U+1C34 + ["\225\176\181"] = "ng", -- U+1C35 + ["\225\176\182"] = "^", -- U+1C36 + ["\225\176\183"] = "", -- U+1C37 + ["\225\176\184"] = "[ta-rol]", -- U+1C38 + ["\225\176\185"] = "[nyet thyoom ta-rol]", -- U+1C39 + ["\225\176\186"] = "[cer-wa]", -- U+1C3A + ["\225\176\187"] = "[tshook cer-wa]", -- U+1C3B + ["\225\176\188"] = "[tshook]", -- U+1C3C + ["\225\176\189"] = "0", -- U+1C3D + ["\225\176\190"] = "1", -- U+1C3E + ["\225\176\191"] = "2", -- U+1C3F + ["\225\177\128"] = "3", -- U+1C40 + ["\225\177\129"] = "4", -- U+1C41 + ["\225\177\130"] = "5", -- U+1C42 + ["\225\177\131"] = "6", -- U+1C43 + ["\225\177\132"] = "7", -- U+1C44 + ["\225\177\133"] = "8", -- U+1C45 + ["\225\177\134"] = "9", -- U+1C46 + ["\225\177\141"] = "tt", -- U+1C4D + ["\225\177\142"] = "tth", -- U+1C4E + ["\225\177\143"] = "dd" -- U+1C4F +} + +function export.tr(text, lang, sc) + local nukta = "\225\176\183" + + local initials = "([\225\176\128-\225\176\163\225\177\141-\225\177\143])" + local medials = "([\225\176\164-\225\176\165]?)" + local vowels_after = "([\225\176\166\225\176\170-\225\176\172\225\176\167-\225\176\169])" + local finals_after = "([\225\176\173-\225\176\179\225\176\182]?)" + local finals_before = "([\225\176\180\225\176\181]?)" + local others = "[\225\176\184-\225\177\134]" + + local im = initials .. medials + local imf = initials .. medials .. finals_after + local fim = finals_before .. initials .. medials + + local imvf = initials .. medials .. vowels_after .. finals_after + local fimv = finals_before .. initials .. medials .. vowels_after + + text = mw.ustring.gsub(text, nukta, "") + + text = mw.ustring.gsub(text, imvf, function(i, m, v, f) return conv[i] .. conv[m] .. conv[v] .. conv[f] end) + text = mw.ustring.gsub(text, fimv, function(f, i, m, v) return conv[i] .. conv[m] .. conv[v] .. conv[f] end) + + text = mw.ustring.gsub(text, imf, function(i, m, f) return conv[i] .. conv[m] .. "a" .. conv[f] end) + text = mw.ustring.gsub(text, fim, function(f, i, m) return conv[i] .. conv[m] .. "a" .. conv[f] end) + + text = mw.ustring.gsub(text, im, function(i, m) return conv[i] .. conv[m] .. "a" end) + + text = mw.ustring.gsub(text, others, conv) + + return text +end + +return export diff --git a/wiktra/wikt/translit/lez-translit.lua b/wiktra/wikt/translit/lez-translit.lua new file mode 100644 index 0000000..3074f40 --- /dev/null +++ b/wiktra/wikt/translit/lez-translit.lua @@ -0,0 +1,26 @@ +local export = {} + +local tt = {["б"] = "b", ["п"] = "p", ["ф"] = "f", ["в"] = "v", ["м"] = "m", ["д"] = "d", ["т"] = "t", ["й"] = "j", ["н"] = "n", ["з"] = "z", ["ц"] = "c", ["с"] = "s", ["ж"] = "ž", ["ш"] = "š", ["щ"] = "šč", ["л"] = "l", ["ч"] = "č", ["р"] = "r", ["г"] = "g", ["к"] = "k", ["х"] = "χ", ["ъ"] = "ʾ", ["а"] = "a", ["е"] = "e", ["ы"] = "y", ["и"] = "i", ["о"] = "o", ["у"] = "u", ["ё"] = "ë", ["ь"] = "’", ["э"] = "è", ["ю"] = "ju", ["я"] = "ä", ["Б"] = "B", ["П"] = "P", ["Ф"] = "F", ["В"] = "V", ["М"] = "M", ["Д"] = "D", ["Т"] = "T", ["Й"] = "J", ["Н"] = "N", ["З"] = "Z", ["Ц"] = "C", ["С"] = "S", ["Ж"] = "Ž", ["Ш"] = "Š", ["Щ"] = "Šč", ["Л"] = "L", ["Ч"] = "Č", ["Р"] = "R", ["Г"] = "G", ["К"] = "K", ["Х"] = "Χ", ["Ъ"] = "ʾ", ["А"] = "A", ["Е"] = "E", ["Ы"] = "Y", ["И"] = "I", ["О"] = "O", ["У"] = "U", ["Ё"] = "Ë", ["Ь"] = "’", ["Э"] = "È", ["Ю"] = "Ju", ["Я"] = "Ä"}; + +local trigraphs = {["хъв"] = "q°", ["Хъв"] = "Q°", ["къв"] = "q̄°", ["Къв"] = "Q̄°", ["кьв"] = "q̇°", ["Кьв"] = "Q̇°", ["гъв"] = "ġ°", ["Гъв"] = "Ġ°", ["ттв"] = "t̄°", ["Ттв"] = "t̄°", ["ццв"] = "c̄°", ["Ццв"] = "C̄°", ["ккв"] = "k̄°", ["Ккв"] = "K̄°", ["тӏв"] = "ṭ°", ["Тӏв"] = "Ṭ°", ["цӏв"] = "c̣°", ["Цӏв"] = "C̣°", ["кӏв"] = "ḳ°", ["Кӏв"] = "Ḳ°"} + +local digraphs = {["тв"] = "t°", ["Тв"] = "T°", ["зв"] = "z°", ["Зв"] = "Z°", ["хв"] = "χ°", ["Хв"] = "Χ°", ["цв"] = "c°", ["Цв"] = "C°", ["св"] = "s°", ["Св"] = "S°", ["пп"] = "p̄", ["пӏ"] = "ṗ", ["тт"] = "t̄", ["Пп"] = "P̄", ["Пӏ"] = "Ṗ", ["Тт"] = "T̄", ["цӏ"] = "c̣", ["цц"] = "c̄", ["тӏ"] = "ṭ", ["чч"] = "č̄", ["чӏ"] = "č̣", ["кь"] = "q̇", ["кк"] = "k̄", ["кӏ"] = "ḳ", ["хъ"] = "q", ["къ"] = "q̄", ["гъ"] = "ġ", ["гь"] = "h", ["Цӏ"] = "C̣", ["Цц"] = "C̄", ["Тӏ"] = "Ṭ", ["Чч"] = "Č̄", ["Чӏ"] = "Č̣", ["Кь"] = "Q̇", ["Кк"] = "K̄", ["Кӏ"] = "Ḳ", ["Хъ"] = "Q", ["Къ"] = "Q̄", ["Гъ"] = "Ġ", ["Гь"] = "H", ["уь"] = "ü", ["Уь"] = "Ü", ["хь"] = "x", ["Хь"] = "X", ["гв"] = "g°", ["Гв"] = "G°", ["кв"] = "k°", ["Кв"] = "K°"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" + + -- Convert capital to lowercase palochka. Lowercase is found in tables + -- above. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for digraph, replacement in pairs(digraphs) do text = str_gsub(text, digraph, replacement) end + + for trigraph, replacement in pairs(trigraphs) do text = str_gsub(text, trigraph, replacement) end + + text = str_gsub(text, UTF8_char, tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/lif-translit.lua b/wiktra/wikt/translit/lif-translit.lua new file mode 100644 index 0000000..53ea5a0 --- /dev/null +++ b/wiktra/wikt/translit/lif-translit.lua @@ -0,0 +1,63 @@ +local export = {} +local gsub = mw.ustring.gsub +local consonants = {["ᤁ"] = "k", ["ᤂ"] = "kh", ["ᤃ"] = "g", ["ᤄ"] = "gh", ["ᤅ"] = "ṅ", ["ᤆ"] = "c", ["ᤇ"] = "ch", ["ᤈ"] = "j", ["ᤉ"] = "jh", ["ᤊ"] = "ñ", ["ᤋ"] = "t", ["ᤌ"] = "th", ["ᤍ"] = "d", ["ᤎ"] = "dh", ["ᤏ"] = "n", ["ᤐ"] = "p", ["ᤑ"] = "ph", ["ᤒ"] = "b", ["ᤓ"] = "bh", ["ᤔ"] = "m", ["ᤕ"] = "y", ["ᤖ"] = "r", ["ᤗ"] = "l", ["ᤘ"] = "w", ["ᤙ"] = "ś", ["ᤚ"] = "ṣ", ["ᤛ"] = "s", ["ᤜ"] = "h", ["ᤝ"] = "gy", ["ᤞ"] = "tr", ["ᤀ"] = ""} +local diacritics = {["ᤠ"] = "a", ["ᤡ"] = "i", ["ᤢ"] = "u", ["ᤣ"] = "e", ["ᤤ"] = "ai", ["ᤥ"] = "o", ["ᤦ"] = "au", ["ᤧ"] = "ê", ["ᤨ"] = "ô"} + +local special = { + -- idk what to call these + ["᤹"] = "’", -- mukphreng (glottalizer) + ["ᤲ"] = "̃" -- anusvara (now obsolete) +} + +local subjoined = {["ᤪ"] = "r", ["ᤫ"] = "w", ["ᤩ"] = "y"} + +local finals = {["ᤰ"] = "k", ["ᤱ"] = "ṅ", ["ᤳ"] = "t", ["ᤴ"] = "n", ["ᤵ"] = "p", ["ᤶ"] = "m", ["ᤷ"] = "r", ["ᤸ"] = "l"} + +local nonconsonants = { + + -- digits + ["᥆"] = "0", + ["᥇"] = "1", + ["᥈"] = "2", + ["᥉"] = "3", + ["᥊"] = "4", + ["᥋"] = "5", + ["᥌"] = "6", + ["᥍"] = "7", + ["᥎"] = "8", + ["᥏"] = "9", + ["॥"] = ".", + ["᥄"] = "!", + ["᥅"] = "?", + ["᥀"] = "lo" +} + +-- translit any words or phrases +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])᤻", "᤺%1⌫") -- treat underscore as kemphreng + text = mw.ustring.gsub(text, "([ᤁᤂᤃᤄᤅᤆᤇᤈᤉᤊᤋᤌᤍᤎᤏᤐᤑᤒᤓᤔᤕᤖᤗᤘᤙᤚᤛᤜᤝᤞᤀ])" .. "([ᤪᤫᤩ]?)" .. "([ᤠᤡᤢᤣᤤᤥᤦᤧ ᤨ]?)" .. "([᤹᤺ᤲ]?)" .. "([ᤰᤱᤳᤴᤵᤶᤷᤸ]?)", function(c, d, e, f, g) + -- mw.log('match', c, d) + return (consonants[c] or c) .. (subjoined[d] or d) .. (diacritics[e] or (e ~= "") and e or "ô") .. (special[f] or f) .. (finals[g] or g) + end) + + text = gsub(text, "[<>]", "") + text = mw.ustring.gsub(text, ".", nonconsonants) + text = mw.ustring.gsub(text, "(.)⌫", "") + text = gsub(text, "aᤣ", "o") + text = gsub(text, "ᤣᤣ", "ai") + text = gsub(text, "aᤣᤣ", "au") + text = mw.ustring.gsub(text, "᤺", "̄") + text = mw.ustring.gsub(text, "ᤰ", "k") + text = mw.ustring.gsub(text, "ᤱ", "ṅ") + text = mw.ustring.gsub(text, "ᤳ", "t") + text = mw.ustring.gsub(text, "ᤴ", "n") + text = mw.ustring.gsub(text, "ᤵ", "p") + text = mw.ustring.gsub(text, "ᤶ", "m") + text = mw.ustring.gsub(text, "ᤷ", "r") + text = mw.ustring.gsub(text, "ᤸ", "l") + text = gsub(text, "̄ᤣ", "ō") + text = mw.ustring.gsub(text, "aō", "ō") + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/linb-translit.lua b/wiktra/wikt/translit/linb-translit.lua new file mode 100644 index 0000000..e25df90 --- /dev/null +++ b/wiktra/wikt/translit/linb-translit.lua @@ -0,0 +1,131 @@ +local export = {} + +local chars = { + ["𐀀"] = "a", + ["𐀁"] = "e", + ["𐀂"] = "i", + ["𐀃"] = "o", + ["𐀄"] = "u", + + ["𐀅"] = "da", + ["𐀆"] = "de", + ["𐀇"] = "di", + ["𐀈"] = "do", + ["𐀉"] = "du", + + ["𐀊"] = "ja", + ["𐀋"] = "je", + -- ji not in Unicode + ["𐀍"] = "jo", + ["𐀎"] = "ju", + + ["𐀏"] = "ka", + ["𐀐"] = "ke", + ["𐀑"] = "ki", + ["𐀒"] = "ko", + ["𐀓"] = "ku", + + ["𐀔"] = "ma", + ["𐀕"] = "me", + ["𐀖"] = "mi", + ["𐀗"] = "mo", + ["𐀘"] = "mu", + + ["𐀙"] = "na", + ["𐀚"] = "ne", + ["𐀛"] = "ni", + ["𐀜"] = "no", + ["𐀝"] = "nu", + + ["𐀞"] = "pa", + ["𐀟"] = "pe", + ["𐀠"] = "pi", + ["𐀡"] = "po", + ["𐀢"] = "pu", + + ["𐀣"] = "qa", + ["𐀤"] = "qe", + ["𐀥"] = "qi", + ["𐀦"] = "qo", + -- qu not in Unicode + + ["𐀨"] = "ra", + ["𐀩"] = "re", + ["𐀪"] = "ri", + ["𐀫"] = "ro", + ["𐀬"] = "ru", + + ["𐀭"] = "sa", + ["𐀮"] = "se", + ["𐀯"] = "si", + ["𐀰"] = "so", + ["𐀱"] = "su", + + ["𐀲"] = "ta", + ["𐀳"] = "te", + ["𐀴"] = "ti", + ["𐀵"] = "to", + ["𐀶"] = "tu", + + ["𐀷"] = "wa", + ["𐀸"] = "we", + ["𐀹"] = "wi", + ["𐀺"] = "wo", + -- wu not in Unicode + + ["𐀼"] = "za", + ["𐀽"] = "ze", + -- zi not in Unicode + ["𐀿"] = "zo", + -- zu not in Unicode + + ["𐁀"] = "ha", + ["𐁁"] = "ai", + ["𐁂"] = "au", + ["𐁃"] = "dwe", + ["𐁄"] = "dwo", + ["𐁅"] = "nwo", + ["𐁆"] = "phu", + ["𐁇"] = "pte", + ["𐁈"] = "rya", + ["𐁉"] = "rai", + ["𐁊"] = "ryo", + ["𐁋"] = "tya", + ["𐁌"] = "twe", + ["𐁍"] = "two", + + ["𐁐"] = "*18", + ["𐁑"] = "*19", + ["𐁒"] = "*22", + ["𐁓"] = "*34", + ["𐁔"] = "*47", + ["𐁕"] = "*49", + ["𐁖"] = "*56", + ["𐁗"] = "*63", + ["𐁘"] = "*64", + ["𐁙"] = "*79", + ["𐁚"] = "*82", + ["𐁛"] = "*83", + ["𐁜"] = "*86", + ["𐁝"] = "*89", + + -- explicit morpheme boundary + ["-"] = "`" +} + +function export.tr(text, lang, sc) + local ret = {} + local i = 1 + + for c in string.gmatch(text, "[%z\1-\127\194-\244][\128-\191]*") do -- UTF-8 character pattern + ret[i] = chars[c] or c + i = i + 1 + end + + text = string.gsub(table.concat(ret, "-"), "%- %-", " ") + text = string.gsub(text, "%-?`%-?", "-") + + return text +end + +return export diff --git a/wiktra/wikt/translit/links.lua b/wiktra/wikt/translit/links.lua new file mode 100644 index 0000000..dc29310 --- /dev/null +++ b/wiktra/wikt/translit/links.lua @@ -0,0 +1,497 @@ +local export = {} + +--[=[ + [[Unsupported titles]] and pages with high + memory usage are listed at [[Module:links/data]]. + + Other modules used: + [[Module:script utilities]] + [[Module:scripts]] + [[Module:languages]] and its submodules + [[Module:gender and number]] + [[Module:utilities]] + [[Module:string]] + [[Module:debug]] +]=] + +-- These are prefixed with u to avoid confusion with the default string methods +-- of the same name. +local usub = mw.ustring.sub + +local table_insert = table.insert +local table_concat = table.concat + +local ignore_cap = {["ko"] = true} + +local phonetic_extraction = {["th"] = "Module:th", ["km"] = "Module:km"} + +local pos_tags = {["a"] = "adjective", ["adv"] = "adverb", ["int"] = "interjection", ["n"] = "noun", ["pron"] = "pronoun", ["v"] = "verb", ["vi"] = "intransitive verb", ["vt"] = "transitive verb", ["vti"] = "transitive and intransitive verb"} + +local unsupported_titles +function export.getLinkPage(target, lang) + unsupported_titles = unsupported_titles or mw.loadData("links/data").unsupported_titles + if unsupported_titles[target] then return "Unsupported titles/" .. unsupported_titles[target] end + + -- If the link contains unexpanded template parameters, then don't create a link. + if target:find("{{{") then return nil end + + if target:sub(1, 1) == ":" or target:sub(1, 2) == "w:" or target:sub(1, 10) == "wikipedia:" then return target end + + -- Remove diacritics from the page name + target = lang:makeEntryName(target) + + if target:sub(1, 1) == "/" then + return ":" .. target + + -- Link to appendix for reconstructed terms and terms in appendix-only languages + elseif target:sub(1, 1) == "*" and #target > 1 then + if lang:getCode() == "und" then return nil end + + target = "Reconstruction:" .. lang:getCanonicalName() .. "/" .. usub(target, 2) + elseif lang:getType() == "reconstructed" then + error("The specified language " .. lang:getCanonicalName() .. " is unattested, while the given word is not marked with '*' to indicate that it is reconstructed") + elseif lang:getType() == "appendix-constructed" then + target = "Appendix:" .. lang:getCanonicalName() .. "/" .. target + end + + return target +end + +-- Make a language-specific link from given link's parts +local function makeLangLink(link, lang, id, allow_self_link) + -- Temporary tracking code + local langCode = lang:getCode() + if langCode == "se" or langCode == "sia" or langCode:find("^sm[ajns]$") or langCode:find("^sj[dektu]$") then + if link.display and link.display:find("'") then + require("debug").track("links/Sami apostrophe display") + elseif link.target and link.target:find("'") then + require("debug").track("links/Sami apostrophe target") + end + end + + -- Find fragments (when link didn't come from parseLink). + -- Prevents {{l|en|word#Etymology 2|word}} from linking to [[word#Etymology 2#English]]. + if link.fragment == nil then + -- Replace numeric character references with the corresponding character ( → '), + -- as they contain #, which causes the numeric character reference to be + -- misparsed (wa'a → waa → pagename wa&, fragment 29;a). + link.target = link.target:gsub("&#(%d+);", function(number) return mw.ustring.char(tonumber(number)) end) + local first, second = link.target:match("^([^#]+)#(.+)$") + if first then link.target, link.fragment = first, second end + end + + -- If there is no display form, then create a default one + if not link.display then + link.display = link.target + + -- Strip the prefix from the displayed form + -- TODO: other interwiki links? + if link.display:sub(1, 1) == ":" and not mw.loadData("links/data").unsupported_titles[link.display] then + link.display = link.display:sub(2) -- remove colon from beginning + else + local prefix = link.display:match("^([^:]+):") + local prefixes = {w = true, wikipedia = true} + + if prefixes[prefix] then + link.display = link.display:sub(#prefix + 2) -- remove prefix plus colon + end + end + end + + -- Process the target + link.target = export.getLinkPage(link.target, lang) + + if not link.target then return link.display end + + -- If the target is the same as the current page and there is no sense id + -- and linking to the same page hasn't been turned on, then return a "self-link" + -- like the software does. + if not (allow_self_link or id) and link.target:gsub("^:", "") == mw.title.getCurrentTitle().prefixedText then return "" .. link.display .. "" end + + --[[ + Add fragment + Do not add a section link to "Undetermined", as such sections do not exist and are invalid. + TabbedLanguages handles links without a section by linking to the "last visited" section, + but adding "Undetermined" would break that feature. + For localized prefixes that make syntax error, please use the format: ["xyz"] = true, + ]] + local prefix = link.target:match("^:?([^:]+):") + local prefixes = {w = true, wikipedia = true, Category = true} + + if not prefixes[prefix] then + if link.fragment or link.target:find("#$") then require("debug").track {"links/fragment", "links/fragment/" .. lang:getCode()} end + + if not link.fragment and lang:getCode() ~= "und" then + if id then + link.fragment = require("senseid").anchor(lang, id) + elseif not mw.ustring.find(link.target, "^Appendix:") and not mw.ustring.find(link.target, "^Reconstruction:") then + link.fragment = lang:getCanonicalName() + end + end + + -- This allows linking to pages like [[sms:a]] without it being treated weirdly. + link.target = link.target:gsub(":", ":") + end + + return "[[" .. link.target .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]" +end + +-- Split a link into its parts +local function parseLink(linktext) + local link = {target = linktext} + local first, second = link.target:match("^([^|]+)|(.+)$") + + if first then + link.target = first + link.display = second + else + link.display = link.target + end + + first, second = link.target:match("^(.+)#(.+)$") + + if first then + link.target = first + link.fragment = second + else + -- So that makeLangLink does not look for a fragment again + link.fragment = false + end + + return link +end + +-- Creates a basic wikilink to the given term. If the text already contains +-- links, these are replaced with links to the correct section. +function export.language_link(data, allow_self_link) + if type(data) ~= "table" then error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.") end + + local text = data.term + + if ignore_cap[data.lang:getCode()] and text then text = text:gsub("%^", "") end + + -- If the text begins with * and another character, + -- then act as if each link begins with * + local allReconstructed = false + + if text:find("^*.") then allReconstructed = true end + + -- Do we have embedded wikilinks? + if text:find("[[", nil, true) then + --[=[ + [[Special:WhatLinksHere/Template:tracking/links/alt-ignored]] + [[Special:WhatLinksHere/Template:tracking/links/id-ignored]] + ]=] + + if data.alt then + require("debug").track("links/alt-ignored") + mw.log("(from Module:links)", "text with embedded wikilinks:", text, "ignored alt:", data.alt, "lang:", data.lang:getCode()) + end + + if data.id then + require("debug").track("links/id-ignored") + mw.log("(from Module:links)", "text with embedded wikilinks:", text, "ignored id:", data.id, "lang:", data.lang:getCode()) + end + + -- Begins and ends with a wikilink tag + if text:find("^%[%[(.+)%]%]$") then + -- There are no [ ] in between. + -- This makes the wikilink tag redundant. + if text:find("^%[%[[^%[%]]+%]%]$") then + require("debug").track("links/redundant wikilink") + else + local temp = text:gsub("^%[%[(.+)%]%]$", "%1") + temp = temp:gsub("%]%], %[%[", "|") + + if not temp:find("[%[%]]") then require("debug").track("links/list") end + end + end + + text = text:gsub("%[%[([^%]]+)%]%]", function(linktext) + local link = parseLink(linktext) + + if allReconstructed then link.target = "*" .. link.target end + + return makeLangLink(link, data.lang, data.id, allow_self_link) + end) + + -- Remove the extra * at the beginning if it's immediately followed + -- by a link whose display begins with * too + if allReconstructed then text = text:gsub("^%*%[%[([^|%]]+)|%*", "[[%1|*") end + else + -- There is no embedded wikilink, make a link using the parameters. + text = makeLangLink({target = text, display = data.alt}, data.lang, data.id, allow_self_link) + end + + return text +end + +function export.mark(text, itemType, face, lang) + local tag = {"", ""} + + if itemType == "gloss" then + tag = {"", ""} + elseif itemType == "tr" then + if face == "term" then + tag = {"", ""} + else + tag = {"", ""} + end + elseif itemType == "ts" then + tag = {"/", "/"} + elseif itemType == "pos" then + tag = {"", ""} + elseif itemType == "annotations" then + tag = {"(", ")"} + end + + if type(text) == "string" then + return tag[1] .. text .. tag[2] + else + return "" + end +end + +-- Format the annotations (things following the linked term) +function export.format_link_annotations(data, face) + local output = {} + + -- Interwiki link + if data.interwiki then table_insert(output, data.interwiki) end + + -- Genders + if type(data.genders) ~= "table" then data.genders = {data.genders} end + + if data.genders and #data.genders > 0 then + local m_gen = require("gender and number") + table_insert(output, " " .. m_gen.format_list(data.genders, data.lang)) + end + + local annotations = {} + + -- Transliteration and transcription + if data.tr or data.ts then + local kind + if face == "term" then + kind = face + else + kind = "default" + end + + if data.tr and data.ts then + table_insert(annotations, require("script utilities").tag_translit(data.tr, data.lang, kind) .. " " .. export.mark(data.ts, "ts")) + elseif data.ts then + table_insert(annotations, export.mark(data.ts, "ts")) + else + table_insert(annotations, require("script utilities").tag_translit(data.tr, data.lang, kind)) + end + end + + -- Gloss/translation + if data.gloss then table_insert(annotations, export.mark(data.gloss, "gloss")) end + + -- Part of speech + if data.pos then + -- debug category for pos= containing transcriptions + if data.pos:find("/[^><]*/") then data.pos = data.pos .. "[[Category:links likely containing transcriptions in pos]]" end + + table_insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos")) + end + + -- Literal/sum-of-parts meaning + if data.lit then table_insert(annotations, "literally " .. export.mark(data.lit, "gloss")) end + + if #annotations > 0 then table_insert(output, " " .. export.mark(table_concat(annotations, ", "), "annotations")) end + + return table_concat(output) +end + +-- A version of {{l}} or {{m}} that can be called from other modules too +function export.full_link(data, face, allow_self_link, no_check_redundant_translit) + if type(data) ~= "table" then error("The first argument to the function full_link must be a table. " .. "See Module:links/documentation for more information.") end + + -- Create the link + local output = {} + local categories = {} + local link = "" + local annotations + + -- local m_utilities = require("utilities") + + -- Is there any text to show? + if (data.term or data.alt) then + -- Try to detect the script if it was not provided + if not data.sc then + data.sc = require("scripts").findBestScript(data.alt or data.term, data.lang) + else + -- Track uses of sc parameter + local best = require("scripts").findBestScript(data.alt or data.term, data.lang) + require("debug").track("links/sc") + + if data.sc:getCode() == best:getCode() then + require("debug").track("links/sc/redundant") + require("debug").track("links/sc/redundant/" .. data.sc:getCode()) + else + require("debug").track("links/sc/needed") + require("debug").track("links/sc/needed/" .. data.sc:getCode()) + end + end + + local class = "" + + local function encode_accel_param(prefix, param) + -- This is decoded again by [[WT:ACCEL]]. + return param and prefix .. param:gsub("%%", "."):gsub(" ", "_") or "" + end + + if data.accel then + local form = data.accel.form and data.accel.form .. "-form-of" or "" + local gender = encode_accel_param("gender-", data.accel.gender) + local pos = encode_accel_param("pos-", data.accel.pos) + local translit = encode_accel_param("transliteration-", data.accel.translit) + local lemma = encode_accel_param("origin-", data.accel.lemma) + local lemma_translit = encode_accel_param("origin_transliteration-", data.accel.lemma_translit) + local no_store = data.accel.no_store and "form-of-nostore" or "" + + local accel = form .. " " .. gender .. " " .. pos .. " " .. translit .. " " .. lemma .. " " .. lemma_translit .. " " .. no_store .. " " + + class = "form-of lang-" .. data.lang:getCode() .. " " .. accel + end + + -- Only make a link if the term has been given, otherwise just show the alt text without a link + link = require("script utilities").tag_text(data.term and export.language_link(data, allow_self_link) or data.alt, data.lang, data.sc, face, class) + else + --[[ No term to show. + Is there at least a transliteration we can work from? ]] + link = require("script utilities").request_script(data.lang, data.sc) + + if link == "" or not data.tr or data.tr == "-" then + -- No link to show, and no transliteration either. Show a term request. + local category = "" + + if mw.title.getCurrentTitle().nsText ~= "Template" then table_insert(categories, "[[Category:" .. data.lang:getCanonicalName() .. " term requests]]") end + + link = "[Term?]" + end + end + + table_insert(output, link) + + if data.tr == "" or data.tr == "-" then + data.tr = nil + + elseif phonetic_extraction[data.lang:getCode()] then + local m_phonetic = require(phonetic_extraction[data.lang:getCode()]) + data.tr = data.tr or m_phonetic.getTranslit(export.remove_links(data.term)) + + elseif (data.term or data.alt) and not data.sc:getCode():find("Lati?n") then + + -- Try to generate a transliteration, unless transliteration has been supplied and either + -- no_check_redundant_translit is given or we are in a high-memory entry. (Checking for redundant + -- transliteration can use up significant amounts of memory so we don't want to do it if memory + -- is tight. `no_check_redundant_translit` is currently set when called ultimately from + -- {{multitrans|...|no-check-redundant-translit=1}}.) + if not (data.tr and (no_check_redundant_translit or mw.loadData("links/data").high_memory_entries[mw.title.getCurrentTitle().text])) then + local automated_tr = data.lang:transliterate(export.remove_links(data.alt or data.term), data.sc) + + if automated_tr then + local manual_tr = data.tr + + if manual_tr then + if manual_tr == automated_tr then + table_insert(categories, "[[Category:Terms with redundant transliterations]]" .. "[[Category:Terms with redundant transliterations/" .. data.lang:getCode() .. "]]") + else + -- Prevents Arabic root categories from flooding the tracking categories. + if mw.title.getCurrentTitle().nsText ~= "Category" then table_insert(categories, "[[Category:Terms with manual transliterations different from the automated ones]]" .. "[[Category:Terms with manual transliterations different from the automated ones/" .. data.lang:getCode() .. "]]") end + end + end + + if (not manual_tr) or data.lang:overrideManualTranslit() then data.tr = automated_tr end + end + end + end + + -- Link to the transliteration entry for languages that require this + if data.tr and data.lang:link_tr() then data.tr = export.language_link {lang = data.lang, term = data.tr} end + + table_insert(output, export.format_link_annotations(data, face)) + + return table_concat(output) .. table_concat(categories) +end + +--[[ Strips links: deletes category links, + the targets of piped links, + and all double square brackets. ]] +function export.remove_links(text) + if type(text) == "table" then text = text.args[1] end + + if not text or text == "" then return "" end + + text = mw.ustring.gsub(text, "%[%[Category:[^|%]]-|?[^|%]]-%]%]", "") + text = text:gsub("%[%[[^|%]]-|", "") + text = text:gsub("%[%[", "") + text = text:gsub("%]%]", "") + + return text +end + +function export.english_links(text) + local lang = require("languages").getByCode("en") + + -- Parentheses around function call to remove second return value, the + -- number of replacements. + return (text:gsub("%[%[([^%]]+)%]%]", function(linktext) + local link = parseLink(linktext) + return makeLangLink(link, lang, nil, true, false) + end)) +end + +--[=[ + For example, Norwegian_Bokm.C3.A5l → Norwegian_Bokmål. 0xC3 and 0xA5 are the + hexadecimal-base representation of the two bytes used to encode the character + å in the UTF-8 encoding: + 11000011 10100101 + + Note that the bytes used to represent a character are actually different from + the Unicode codepoint. For å, the codepoint is 0xE5. The bits (digits) that + actually spell the codepoint are found in the brackets: 110[00011] 10[100101]. + For further explanation, see [[w:UTF-8#Description]]. +]=] + +-- The character class %x should not be used, as it includes the characters a-f, +-- which do not occur in these anchor encodings. +local capitalHex = "[0-9A-F]" + +local function decodeAnchor(anchor) return (anchor:gsub("%.(" .. capitalHex .. capitalHex .. ")", function(hexByte) return string.char(tonumber(hexByte, 16)) end)) end + +function export.section_link(link) + if type(link) ~= "string" then error("The first argument to section_link was a " .. type(link) .. ", but it should be a string.") end + + link = link:gsub("_", " ") + + local numberSigns = require("string").count(link, "#") + + if numberSigns > 1 then error("The section link should only contain one number sign (#).") end + + link = mw.uri.decode(link, "WIKI") + local page, section = link:match("^([^#]*)#(.+)$") + if page == "" then page = nil end + + if section then + section = decodeAnchor(section) + + -- URI-encode (percent-encode) section to allow square brackets and + -- other dodgy characters in section name. + -- If not percent-encoded, they prevent the parser from creating a link. + -- Decode percent-encoding in the displayed text + if page then + return "[[" .. page .. "#" .. mw.uri.encode(section, "WIKI") .. "|" .. page .. " § " .. section .. "]]" + else + return "[[#" .. mw.uri.encode(section, "WIKI") .. "|§ " .. section .. "]]" + end + else + error("The function “section_link” could not find a number sign marking a section name.") + end +end + +return export diff --git a/wiktra/wikt/translit/links/data.lua b/wiktra/wikt/translit/links/data.lua new file mode 100644 index 0000000..bab04bc --- /dev/null +++ b/wiktra/wikt/translit/links/data.lua @@ -0,0 +1,86 @@ +local data = {} + +data.high_memory_entries = {"a", "animal", "book", "coffee", "do", "e", "language", "night", "smoke", "son", "sun", "water", "wind"} + +local U = mw.ustring.char +local soft_hyphen = U(0xAD) + +--[[ The "actual title" is the page name with the prefix "Unsupported titles/" removed. + ["displayed_title"] = "actual title" ]] +data.unsupported_titles = { + [" "] = "Space", + ["{"] = "Left curly bracket", + ["}"] = "Right curly bracket", + ["["] = "Left square bracket", + ["]"] = "Right square bracket", + ["<"] = "Less than", + [">"] = "Greater than", + ["=<"] = "Equal less than", + ["=>"] = "Equal greater than", + [">="] = "Greater than equal", + ["<="] = "Less than equal", + ["->"] = "Hyphen greater than", + ["<-"] = "Less than hyphen", + [">_<"] = "Greater than low line less than", + ["::"] = "Double colon", + [": :"] = "Enclosing colons", + [":/"] = "Colon slash", + [":="] = "Colon equals", + [":Þ"] = "Colon capital thorn", + [":þ"] = "Colon lowercase thorn", + [":("] = "Colon left paren", + [":)"] = "Colon right paren", + [":3"] = "Colon three", + ["<>"] = "Less than greater than", + ["<3"] = "Less than three", + [""] = "Enclosing less than greater than", + ["< />"] = "Less than trailing slash greater than", + ["< > "] = "HTML start tag end tag", + [""] = "HTML comment", + [""] = "g tag", + [":-("] = "Colon hyphen left paren", + [":-)"] = "Colon hyphen right paren", + ["|"] = "Vertical line", + ["||"] = "Vertical line vertical line", + ["| |"] = "Enclosing vertical lines", + ["C#"] = "C sharp", + ["#"] = "Number sign", + ["# #"] = "Enclosing number signs", + ["&"] = "Amp", + [":"] = "Colon", + [".."] = "Double period", + ["."] = "Full stop", + ["_"] = "Low line", + ["-_-"] = "Low line interfix", + [U(0xFFFD)] = "Replacement character", + [U(0x1680)] = "Ogham space", + ["[ ]"] = "Square brackets", + ["{ }"] = "Curly brackets", + ["[…]"] = "Square bracketed ellipsis", + ["_ _"] = "Enclosing low lines", + ["C|N>K"] = "C through N to K", + ["#MeToo"] = "MeToo", + ["о/."] = "о slash dot", + ["กรุงเทพมหานคร อมรรัตนโกสินทร์ มหินทรายุธยา มหาดิลกภพ นพรัตนราชธานีบูรีรมย์ อุดมราชนิเวศน์มหาสถาน อมรพิมานอวตารสถิต สักกะทัตติยวิษณุกรรมประสิทธิ์"] = "Thai name of Bangkok", + ["λοπαδοτεμαχοσελαχογαλεοκρανιολειψανοδριμυποτριμματοσιλφιοκαραβομελιτοκατακεχυμενοκιχλ" .. soft_hyphen .. "επικοσσυφοφαττοπεριστεραλεκτρυονοπτοκεφαλλιοκιγκλοπελειολαγῳοσιραιοβαφητραγανοπτερύγων"] = "Ancient Greek dish", + [":≠"] = ":≠", + ["S:t"] = "S:t", + ["S:ta"] = "S:ta", + ["c:a"] = "c:a", + ["eq #"] = "eq number sign", + ["hr #"] = "hr number sign", + ["n:a"] = "n:a", + ["n:o"] = "n:o", + ["n:r"] = "n:r", + ["s:a"] = "s:a", + ["st:a"] = "st:a", + ["v:a"] = "v:a" +} + +for i, item in ipairs(data.high_memory_entries) do + data.high_memory_entries[i] = nil + data.high_memory_entries[item] = true +end + +return data diff --git a/wiktra/wikt/translit/lki-translit.lua b/wiktra/wikt/translit/lki-translit.lua new file mode 100644 index 0000000..82a589b --- /dev/null +++ b/wiktra/wikt/translit/lki-translit.lua @@ -0,0 +1,176 @@ +-- Authors: JavaScript ئاسۆ; Lua Ghybu, Calak +local export = {} + +local gsub = mw.ustring.gsub +local U = mw.ustring.char + +local mapping = { + ["ا"] = "a", + ["ب"] = "b", + ["چ"] = "ç", + ["ج"] = "c", + ["د"] = "d", + ["ە"] = "e", + ["ێ"] = "ê", + ["ف"] = "f", + ["گ"] = "g", + ["ھ"] = "h", + ["ه"] = "h", + ["ح"] = "ḧ", + ["ژ"] = "j", + ["ک"] = "k", + ["ڵ"] = "ll", + ["ل"] = "l", + ["م"] = "m", + ["ن"] = "n", + ["ۆ"] = "o", + ["پ"] = "p", + ["ق"] = "q", + ["ر"] = "r", + ["ڕ"] = "r", + ["س"] = "s", + ["ش"] = "ş", + ["ت"] = "t", + ["ۊ"] = "ü", + ["ڤ"] = "v", + ["خ"] = "x", + ["غ"] = "ẍ", + ["ز"] = "z", + ["ئ"] = "", + ["ع"] = "'", + + [U(0x200C)] = "", -- ZWNJ (zero-width non-joiner) + ["ـ"] = "", -- kashida, no sound + + -- numerals + ["١"] = "1", + ["٢"] = "2", + ["٣"] = "3", + ["٤"] = "4", + ["٥"] = "5", + ["٦"] = "6", + ["٧"] = "7", + ["٨"] = "8", + ["٩"] = "9", + ["٠"] = "0", + -- persian variants to numerals + ["۱"] = "1", + ["۲"] = "2", + ["۳"] = "3", + ["۴"] = "4", + ["۵"] = "5", + ["۶"] = "6", + ["۷"] = "7", + ["۸"] = "8", + ["۹"] = "9", + ["۰"] = "0" +} + +-- punctuation (leave on separate lines) +local punctuation = { + ["؟"] = "?", -- question mark + ["،"] = ",", -- comma + ["؛"] = ";", -- semicolon + ["«"] = "“", -- quotation mark + ["»"] = "”", -- quotation mark + ["٪"] = "%", -- percent + ["؉"] = "‰", -- per mille + ["٫"] = ".", -- decimals + ["٬"] = "," -- thousand +} + +-- translit +local function tr_word(word) + + word = gsub(word, ".", punctuation) + + -- Remove punctuation at the end of the word. + if mw.ustring.find(word, "[%.%!،؛»«٪؉٫٬%p]$") then + ponct = mw.ustring.sub(word, -1) + word = gsub(word, "[%.%!،؛»«٪؉٫٬%p]$", "") + else + word = word + ponct = "" + end + + word = gsub(word, "ه‌", "ە") -- correct unicode for letter ە + -- U+0647 (Arabic letter heh) + U+200C (zero-width non-joiner) → U+06D5 (Arabic letter ae) + + -- diacritics + word = gsub(word, "ْ", "i") -- U+0652, Arabic sukun + word = gsub(word, "ِ", "i") -- U+0650, Arabic kasra + + -- managing 'و' and 'ی' + word = gsub(word, "و([iاێۆۊە])", "w%1") -- و + vowel => w (e.g. wan) + word = gsub(word, "ی([iاێۆۊە])", "y%1") -- ی + vowel => y (e.g. yas) + word = gsub(word, "([iاێۆۊە])و", "%1w") -- vowel + و => w (e.g. kew) + word = gsub(word, "([iاێۆۊە])ی", "%1y") -- vowel + ی => y (e.g. bey) + word = gsub(word, "([iاێۆە])ۊ", "%1ẅ") -- vowel + و => ẅ (e.g. taẅ) + word = gsub(word, "([iاۆۊە])ێ", "%1ÿ") -- vowel + ێ => ÿ (e.g. şeÿtan) + word = gsub(word, "^و$", "û") -- non-letter + 'و' + non-letter => û (=and) + + word = gsub(word, "([^ء-يٱ-ەiwẅyÿ])و", "%1w") -- non-letter + 'و' => w (e.g. wetar) + word = gsub(word, "^و", "w") -- first 'و' => w (e.g. wetar) + word = gsub(word, "یو", "îw") -- 'ی' + 'و' => îw (e.g. mîwe) + word = gsub(word, "([^و])یی", "%1îy") -- 'ی' + 'ی' => îy (e.g. kanîy) + word = gsub(word, "وی", "uy") -- 'و' + 'ی' => uy (e.g. buyn) + word = gsub(word, "وو", "û") -- 'و' + 'و' => û (e.g. nû) + word = gsub(word, "ی", "î") + word = gsub(word, "و", "u") + word = gsub(word, "uu", "û") -- 'و' + 'و' => û (e.g. nû) + word = gsub(word, "([ء-يٱ-ەiîuûwẅyÿ])ڕ", "%1rr") -- when 'ڕ' not at the beginning of a word => rr + word = gsub(word, "([ء-يٱ-ەiîuûwẅyÿ])ئ", "%1'") -- when 'ئ' not at the beginning of a word => ' + + word = gsub(word, ".", mapping) + + -- insert i where applicable + word = gsub(word, "ll", "Ľ") -- temporary conversion to avoid seeing ll as 2 letters + word = gsub(word, "rr", "Ŕ") -- temporary conversion to avoid seeing rr as 2 letters + + word = gsub(word, "([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([fjlĽmnrŔsşvwẅxẍyÿz])([fjlĽmnrŔsşvwẅxẍyÿz])([^aeêiîouûüy])", "%1%2i%3%4") -- e.g. grft -> grift + word = gsub(word, "([aeêiîouûü])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])$", "%1%2%3i%4") -- e.g. cejnt -> cejnit + word = gsub(word, "([fjlĽrŔsşwyz])([fjlĽmnrŔsşvwẅxẍyÿz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])", "%1i%2%3") -- e.g. wrd -> wird + + word = gsub(word, "([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwẅxẍyÿz])([^aeêiîouûü])", "%1i%2%3") -- e.g. prd -> pird + word = gsub(word, "([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwẅxẍyÿz])$", "%1i%2") -- like above + + word = gsub(word, "([^aeêiîouûü])([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwẅxẍyÿz])([^aeêiîouûü])", "%1%2i%3%4") -- repeat the latter expression, in case skipped + word = gsub(word, "([^aeêiîouûü])([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwẅxẍyÿz])$", "%1%2i%3") -- repeat the latter expression, in case skipped + + word = gsub(word, "^([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([^aeêiîouûü])", "%1i%2%3") -- e.g. ktk -> kitk + word = gsub(word, "^([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])$", "%1i%2") -- e.g. ktk -> kitk + word = gsub(word, "([^aeêiîouüy])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([^aeêiîouûü])", "%1%2i%3%4") -- e.g. ktk -> kitk + word = gsub(word, "([^aeêiîouüy])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])$", "%1%2i%3") -- e.g. ktk -> kitk + + word = gsub(word, "([^a-zçşêîûüĽŔ])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])$", "%1%2i") -- e.g. j -> ji + word = gsub(word, "^([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])$", "%1i") -- e.g. j -> ji + + -- word = gsub(word, '([^a-zêîûçş0-9\'’])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvxẍz])', "%1%2i%3") --e.g. bra -> bira + -- word = gsub(word, '^([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvxẍz])', "%1i%2") --e.g. bra -> bira + + -- word = gsub(word, '([bcçdfghḧjklmnpqrsştvwẅxẍz][bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])', "%1i%2") --e.g. aşkra -> aşkira + + -- word = gsub(word, 'si([tp][aeêiîouû])', "s%1") -- sp, st cluster + + word = gsub(word, "Ľ", "ll") -- revert the temporary conversion + word = gsub(word, "Ŕ", "rr") -- revert the temporary conversion + + -- Add the punctuation who had previously deleted. + word = word .. ponct + + return word +end + +function export.tr(text, lang, sc) + local textTab = {} + + -- Create a word table separated by a space (%s). + for _, word in ipairs(mw.text.split(text, "%s+")) do table.insert(textTab, word) end + + -- Tablo of translit. + for key, word in ipairs(textTab) do textTab[key] = tr_word(word) end + + return table.concat(textTab, " ") +end + +return export diff --git a/wiktra/wikt/translit/lo-translit.lua b/wiktra/wikt/translit/lo-translit.lua new file mode 100644 index 0000000..412b134 --- /dev/null +++ b/wiktra/wikt/translit/lo-translit.lua @@ -0,0 +1,250 @@ +local export = {} +local gsub = mw.ustring.gsub +local len = mw.ustring.len +local match = mw.ustring.match +local sub = mw.ustring.sub + +local initial_conv = {["ກ"] = "k", ["ຂ"] = "kh", ["ຄ"] = "kh", ["ງ"] = "ng", ["ຈ"] = "ch", ["ສ"] = "s", ["ຊ"] = "s", ["ຍ"] = "ny", ["ດ"] = "d", ["ຕ"] = "t", ["ຖ"] = "th", ["ທ"] = "th", ["ນ"] = "n", ["ບ"] = "b", ["ປ"] = "p", ["ຜ"] = "ph", ["ຝ"] = "f", ["ພ"] = "ph", ["ຟ"] = "f", ["ມ"] = "m", ["ຢ"] = "y", ["ຣ"] = "r", ["ລ"] = "l", ["ວ"] = "w", ["ຫ"] = "h", ["ອ"] = "ʼ", ["ຮ"] = "h", ["ຫງ"] = "ng", ["ຫຍ"] = "ny", ["ໜ"] = "n", ["ຫນ"] = "n", ["ໝ"] = "m", ["ຫມ"] = "m", ["ຫຼ"] = "r", ["ຫຣ"] = "r", ["ຫຼ"] = "l", ["ຫລ"] = "l", ["ຫວ"] = "w"} + +local vowel_conv = { + ["ະ"] = "a", + ["ັ"] = "a", + ["ິ"] = "i", + ["ຶ"] = "ư", + ["ຸ"] = "u", + ["ເະ"] = "e", + ["ເັ"] = "e", + ["ແະ"] = "æ", + ["ແັ"] = "æ", + ["ໂະ"] = "o", + ["ົ"] = "o", + ["ເາະ"] = "ǫ", + ["ັອ"] = "ǫ", + ["ເິ"] = "œ", + ["ເັຍ"] = "ia", + ["ັຽ"] = "ia", + ["ເຶອ"] = "ưa", + ["ົວະ"] = "ua", + ["ັວ"] = "ua", + ["ວັ"] = "ua", + ["ໄ"] = "ai", + ["ໃ"] = "ai", + ["ັຍ"] = "ai", + ["ເົາ"] = "ao", + ["ົາວ"] = "uau", + ["ຳ"] = "am", + ["ໍາ"] = "am", + ["ວຳ"] = "uam", + + ["າ"] = "ā", + ["າວ"] = "āo", + ["ີ"] = "ī", + ["ື"] = "ư̄", + ["ູ"] = "ū", + ["ເ"] = "ē", + ["ແ"] = "ǣ", + ["ໂ"] = "ō", + ["ໂຍ"] = "ōi", + ["ໂຽ"] = "ōi", + ["ໍ"] = "ǭ", + ["ອ"] = "ǭ", + ["ອຍ"] = "ǭi", + ["ອຽ"] = "ǭi", + ["ເີ"] = "œ̄", + ["ເີຽ"] = "œ̄i", + ["ເີຍ"] = "œ̄i", + ["ເຍ"] = "īa", + ["ເັຽ"] = "īa", + ["ຽ"] = "īa", + ["ເືອ"] = "ư̄a", + ["ົວ"] = "ūa", + ["ວ"] = "ūa", + ["ວຍ"] = "uāi", + ["ວຽ"] = "uāi", + ["ວຽນ"] = "uīan", + ["າຍ"] = "āi", + ["າຽ"] = "āi", + ["ວາ"] = "uā", + ["ວາຍ"] = "uāi", + ["ວາຽ"] = "uāi", + ["ແວ"] = "ǣu", -- ແ_ວ can bo both ǣu and uǣ. The first is more common. + ["ີວ"] = "īu", + ["ິວ"] = "iu", + ["ຽວ"] = "iāu", + ["ວີວ"] = "uīu" +} + +local coda_conv = {["ກ"] = "k", ["ຂ"] = "k", ["ຄ"] = "k", ["ງ"] = "ng", ["ຈ"] = "t", ["ສ"] = "t", ["ຊ"] = "t", ["ຍ"] = "ny", ["ດ"] = "t", ["ຕ"] = "t", ["ຖ"] = "t", ["ທ"] = "t", ["ນ"] = "n", ["ບ"] = "p", ["ປ"] = "p", ["ຜ"] = "p", ["ຝ"] = "p", ["ພ"] = "p", ["ຟ"] = "p", ["ມ"] = "m", ["ຢ"] = "y", ["ຣ"] = "n", ["ລ"] = "n", ["ວ"] = "w", [""] = ""} + +local sp_symbols = {["ຯ"] = "〃", ["ໆ"] = "〃", ["໌"] = "", ["໐"] = "0", ["໑"] = "1", ["໒"] = "2", ["໓"] = "3", ["໔"] = "4", ["໕"] = "5", ["໖"] = "6", ["໗"] = "7", ["໘"] = "8", ["໙"] = "9"} + +local char_type = { + ["ກ"] = "coda", + ["ຂ"] = "coda", + ["ຄ"] = "coda", + ["ງ"] = "coda", + ["ຈ"] = "coda", + ["ຊ"] = "coda", + ["ຍ"] = "ambig", + ["ດ"] = "coda", + ["ຕ"] = "coda", + ["ຖ"] = "coda", + ["ທ"] = "coda", + ["ນ"] = "coda", + ["ບ"] = "coda", + ["ປ"] = "coda", + ["ຜ"] = "coda", + ["ຝ"] = "coda", + ["ພ"] = "coda", + ["ຟ"] = "coda", + ["ມ"] = "coda", + ["ຢ"] = "coda", + ["ຣ"] = "coda", + ["ລ"] = "coda", + ["ວ"] = "ambig", + ["ສ"] = "coda", + ["ຫ"] = "cons", + ["ອ"] = "ambig", + ["ຮ"] = "cons", + ["ຯ"] = "iter_symbol", + ["ະ"] = "vowel_let", + ["ັ"] = "suf_vowel", + ["າ"] = "vowel_let", + ["ຳ"] = "suf_vowel", + ["ິ"] = "suf_vowel", + ["ີ"] = "suf_vowel", + ["ຶ"] = "suf_vowel", + ["ື"] = "suf_vowel", + ["ຸ"] = "suf_vowel", + ["ູ"] = "suf_vowel", + ["ົ"] = "suf_vowel", + ["ຼ"] = "cons", + ["ຽ"] = "vowel_let", + ["ເ"] = "pref_vowel", + ["ແ"] = "pref_vowel", + ["ໂ"] = "pref_vowel", + ["ໃ"] = "pref_vowel", + ["ໄ"] = "pref_vowel", + ["ໆ"] = "iter_symbol", + ["່"] = "tone", + ["້"] = "tone", + ["໊"] = "tone", + ["໋"] = "tone", + ["໌"] = "canc_symbol", + ["ໍ"] = "suf_vowel", + ["໐"] = "number", + ["໑"] = "number", + ["໒"] = "number", + ["໓"] = "number", + ["໔"] = "number", + ["໕"] = "number", + ["໖"] = "number", + ["໗"] = "number", + ["໘"] = "number", + ["໙"] = "number", + ["ໜ"] = "cons", + ["ໝ"] = "cons" +} + +function export.tr(text, lang, sc, debug_mode) + text = gsub(text, "[່້໊໋​]", "") + + for lao_text in mw.ustring.gmatch(text, "[ກ-ໝ]+") do + local word, c, chartype, output = {}, {}, {}, {} + local curr_word, curr_initial, curr_vowel, curr_coda = {}, {}, {}, {} + local i = 1 + local original_text = lao_text + + for i = 1, len(lao_text) do + c[i] = sub(lao_text, i, i) + chartype[i] = char_type[c[i]] or table.insert(word, c[i]) + end + + for i = 1, #c + 1 do + if chartype[i] == "pref_vowel" or i == #c + 1 then + if #curr_word ~= 0 then + table.insert(word, table.concat(curr_word)) + curr_word, curr_initial, curr_vowel, curr_coda = {c[i]}, {}, {c[i]}, {} + else + table.insert(curr_vowel, c[i]) + table.insert(curr_word, c[i]) + end + + elseif chartype[i] == "suf_vowel" then + table.insert(curr_vowel, c[i]) + table.insert(curr_word, c[i]) + + elseif chartype[i] == "ambig" then + if #curr_initial ~= 0 and vowel_conv[table.concat(curr_vowel) .. c[i]] and (chartype[i + 1] ~= "suf_vowel" or match(c[i + 1], "[ໍຳີັ]")) and #curr_coda == 0 then + table.insert(curr_vowel, c[i]) + table.insert(curr_word, c[i]) + elseif (#curr_initial == 0 and char_type[table.concat(curr_vowel)] == "pref_vowel") or (#curr_initial ~= 0 and initial_conv[table.concat(curr_initial) .. c[i]]) then + table.insert(curr_initial, c[i]) + table.insert(curr_word, c[i]) + else + if #curr_word ~= 0 then table.insert(word, table.concat(curr_word)) end + curr_word, curr_initial, curr_vowel, curr_coda = {c[i]}, {c[i]}, {}, {} + end + + elseif chartype[i] == "vowel_let" then + table.insert(curr_vowel, c[i]) + table.insert(curr_word, c[i]) + + elseif chartype[i] == "coda" and #curr_coda == 0 and #curr_initial ~= 0 and chartype[i + 1] ~= "suf_vowel" and chartype[i + 1] ~= "vowel_let" and not (chartype[i + 1] == "ambig" and match(chartype[i + 2] or "", "co")) and table.concat(curr_vowel) ~= "ວີວ" then + table.insert(curr_coda, c[i]) + table.insert(curr_word, c[i]) + + elseif chartype[i] == "cons" or chartype[i] == "coda" then + if #curr_coda == 0 and initial_conv[table.concat(curr_initial) .. c[i]] and (#curr_vowel == 0 or char_type[table.concat(curr_vowel)] == "pref_vowel") then + table.insert(curr_initial, c[i]) + table.insert(curr_word, c[i]) + else + table.insert(word, table.concat(curr_word)) + curr_word, curr_initial, curr_vowel, curr_coda = {c[i]}, {c[i]}, {}, {} + end + + elseif chartype[i] == "iter_symbol" then + if #curr_word ~= 0 then table.insert(word, table.concat(curr_word)) end + curr_word, curr_initial, curr_vowel, curr_coda = {c[i]}, {}, {}, {} + + elseif chartype[i] == "canc_symbol" then + table.insert(curr_word, c[i]) + + elseif chartype[i] == "number" then + table.insert(curr_word, sp_symbols[c[i]]) + end + end + for i = 1, #word do + word[i] = gsub(word[i], "^([ເແໂໄໃຽ]?)(ຫ?[ກຂຄງຈສຊຍດຕຖທນບປຜຝພຟມຢຣລວຫອຮໜໝ]ຼ?)([^໌]*)(໌?)$", function(a, b, c, e) + if match(sub(c, -1, -1), "[ກຂຄງຈສຊຍດຕຖທນບປຜຝພຟມຢຣລວ]") then + d = sub(c, -1, -1) + c = sub(c, 1, -2) + else + d = "" + end + if a .. c == "" then c = "ະ" end + vowel = vowel_conv[a .. c .. d] or (vowel_conv[a .. c] or a .. c) .. (coda_conv[d] or d) + if match(vowel, "[ກ-ໝ]") then vowel = gsub(vowel, "^(.*)([ຍອວ])(.*)$", function(x, y, z) return (vowel_conv[x] or x) .. " " .. (initial_conv[y] or y) .. (vowel_conv[z] or z) end) end + return (initial_conv[b] or b) .. vowel .. e + end) + + if char_type[word[i]] == "iter_symbol" and i >= 2 then word[i] = "" .. word[i - 1] .. "" end + + if match(word[i], "໌") and len(word[i]) > 1 then word[i] = gsub(word[i], "(.)໌", "%1") end + + table.insert(output, word[i]) + end + lao_text = table.concat(output, " ") + lao_text = gsub(lao_text, ".", sp_symbols) + + text = gsub(text, original_text, lao_text) + end + + if match(text, "[ກ-ໝ]") and not debug_mode then + return nil + else + return text + end +end + +return export diff --git a/wiktra/wikt/translit/lyci-translit.lua b/wiktra/wikt/translit/lyci-translit.lua new file mode 100644 index 0000000..0570c46 --- /dev/null +++ b/wiktra/wikt/translit/lyci-translit.lua @@ -0,0 +1,7 @@ +local export = {} + +local chars = {["𐊀"] = "a", ["𐊂"] = "b", ["𐊄"] = "g", ["𐊅"] = "d", ["𐊆"] = "i", ["𐊇"] = "w", ["𐊈"] = "z", ["𐊛"] = "h", ["𐊉"] = "θ", ["𐊊"] = "j", ["𐊋"] = "k", ["𐊍"] = "l", ["𐊎"] = "m", ["𐊏"] = "n", ["𐊒"] = "u", ["𐊓"] = "p", ["𐊔"] = "k", ["𐊕"] = "r", ["𐊖"] = "s", ["𐊗"] = "t", ["𐊁"] = "e", ["𐊙"] = "ã", ["𐊚"] = "ẽ", ["𐊐"] = "m̃", ["𐊑"] = "ñ", ["𐊘"] = "τ", ["𐊌"] = "q", ["𐊃"] = "β", ["𐊜"] = "χ"} + +function export.tr(text, lang, sc) return (mw.ustring.gsub(text, ".", chars)) end + +return export diff --git a/wiktra/wikt/translit/lydi-translit.lua b/wiktra/wikt/translit/lydi-translit.lua new file mode 100644 index 0000000..a7d0ddc --- /dev/null +++ b/wiktra/wikt/translit/lydi-translit.lua @@ -0,0 +1,7 @@ +local export = {} + +local chars = {["𐤠"] = "a", ["𐤵"] = "ã", ["𐤡"] = "b", ["𐤹"] = "c", ["𐤣"] = "d", ["𐤤"] = "e", ["𐤶"] = "ẽ", ["𐤱"] = "f", ["𐤢"] = "g", ["𐤦"] = "i", ["𐤧"] = "y", ["𐤨"] = "k", ["𐤩"] = "l", ["𐤷"] = "λ", ["𐤪"] = "m", ["𐤫"] = "n", ["𐤸"] = "ν", ["𐤬"] = "o", ["𐤲"] = "q", ["𐤭"] = "r", ["𐤳"] = "s", ["𐤮"] = "ś", ["𐤯"] = "t", ["𐤴"] = "τ", ["𐤰"] = "u", ["𐤥"] = "w"} + +function export.tr(text, lang, sc) return (mw.ustring.gsub(text, ".", chars)) end + +return export diff --git a/wiktra/wikt/translit/mai-Tirh-translit.lua b/wiktra/wikt/translit/mai-Tirh-translit.lua new file mode 100644 index 0000000..99b7a2d --- /dev/null +++ b/wiktra/wikt/translit/mai-Tirh-translit.lua @@ -0,0 +1,172 @@ +-- Transliteration for Maithili in Tirhuta script +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["𑒏"] = "k", + ["𑒐"] = "kh", + ["𑒑"] = "g", + ["𑒒"] = "gh", + ["𑒓"] = "ṅ", + ["𑒔"] = "c", + ["𑒕"] = "ch", + ["𑒖"] = "j", + ["𑒗"] = "jh", + ["𑒘"] = "ñ", + ["𑒙"] = "ṭ", + ["𑒚"] = "ṭ", + ["𑒛"] = "ḍ", + ["𑒜"] = "ḍh", + ["𑒝"] = "ṇ", + ["𑒞"] = "t", + ["𑒟"] = "th", + ["𑒠"] = "d", + ["𑒡"] = "dh", + ["𑒢"] = "n", + ["𑒣"] = "p", + ["𑒤"] = "ph", + ["𑒥"] = "b", + ["𑒦"] = "bh", + ["𑒧"] = "m", + ["𑒨"] = "y", + ["𑒩"] = "r", + ["𑒪"] = "l", + ["𑒫"] = "v", + ["𑒪𑓃"] = "ḷ", + ["𑒬"] = "ś", + ["𑒭"] = "ṣ", + ["𑒮"] = "s", + ["𑒯"] = "h", + ["𑒛𑓃"] = "ṛ", + ["𑒜𑓃"] = "ṛh", + + -- vowel diacritics + ["𑒱"] = "i", + ["𑒳"] = "u", + ["𑒹"] = "ē", + ["𑒺"] = "e", + ["𑒼"] = "ō", + ["𑒽"] = "o", + ["𑒰"] = "ā", + ["𑒲"] = "ī", + ["𑒴"] = "ū", + ["𑒵"] = "r̥", + ["𑒶"] = "r̥̄", + ["𑒻"] = "ai", + ["𑒾"] = "au", + ["𑒷"] = "l̥", + ["𑒸"] = "l̥̄", + ["ॏ"] = "ŏ", + ["ऺ"] = "â", + ["ॅ"] = "ĕ", + + -- vowels + ["𑒁"] = "a", + ["𑒂"] = "ā", + ["𑒃"] = "i", + ["𑒄"] = "ī", + ["𑒅"] = "u", + ["𑒆"] = "ū", + ["𑒇"] = "r̥", + ["𑒈"] = "r̥̄", + ["𑒉"] = "l̥", + ["𑒊"] = "l̥̄", + ["𑒋"] = "ē", + ["𑒌"] = "ai", + ["𑒍"] = "ō", + ["𑒎"] = "au", + ["ऎ"] = "e", + ["ऒ"] = "o", + ["व़"] = "u", + ["य़"] = "i", + ["ॵ"] = "ŏ", + ["ॴ"] = "â", + ["ऍ"] = "ĕ", + ["इऺ"] = "ï", + ["उऺ"] = "ü", + ["२"] = "’", + ["ऽ"] = "ô", + + ["𑒿"] = "̃", -- chandrabindu + ["𑓀"] = "̃", -- anusvara + ["𑓅"] = "̃", -- gvang + ["𑓁"] = "ḥ", -- visarga + ["𑓂"] = "", -- virama + ["𑓇"] = "om̐", -- om + + -- numerals + ["𑓐"] = "0", + ["𑓑"] = "1", + ["𑓒"] = "2", + ["𑓓"] = "3", + ["𑓔"] = "4", + ["𑓕"] = "5", + ["𑓖"] = "6", + ["𑓗"] = "7", + ["𑓘"] = "8", + ["𑓙"] = "9", + + -- punctuation + ["।"] = ".", -- danda + ["॥"] = ".", -- double danda + ["+"] = "", -- compound separator + -- abbreviation sign + ["॰"] = "." +} + +local nasal_assim = {["𑒏"] = "𑒓", ["𑒐"] = "𑒓", ["𑒑"] = "𑒓", ["𑒒"] = "𑒓", ["𑒔"] = "𑒘", ["𑒕"] = "𑒘", ["𑒖"] = "𑒘", ["𑒗"] = "𑒘", ["𑒘"] = "𑒘", ["𑒙"] = "𑒝", ["𑒚"] = "𑒝", ["𑒛"] = "𑒝", ["𑒜"] = "𑒝", ["𑒞"] = "𑒢", ["𑒟"] = "𑒢", ["𑒠"] = "𑒢", ["𑒡"] = "𑒢", ["𑒢"] = "𑒢", ["𑒣"] = "𑒧", ["𑒤"] = "𑒧", ["𑒥"] = "𑒧", ["𑒦"] = "𑒧", ["𑒧"] = "𑒧"} +local perm_cl = {["𑒧𑓂𑒪"] = true} +local all_cons, special_cons = "𑒏𑒐𑒑𑒒𑒓𑒔𑒕𑒖𑒗𑒘𑒙𑒚𑒛𑒜𑒝𑒞𑒟𑒠𑒡𑒢𑒣𑒤𑒥𑒦𑒧𑒨𑒩𑒪𑒫𑒮𑒬𑒭𑒯", "𑒨𑒩𑒪𑒥𑒫𑒯𑒧𑒢" +local vowel, vowel_sign = "a𑒰𑒱𑒱𑒳𑒴𑒹𑒻𑒼𑒾ॉॅॆॊॏॊऺऻॅॉ𑒵𑒶𑒵𑒶𑒸𑒷𑒽𑒺", "𑒁𑒂𑒃𑒄𑒅𑒆𑒋𑒌𑒍𑒎ऍऑऎऒॵॳॴॲ𑒈𑒈𑒉𑒊" +local syncope_pattern = "([" .. vowel .. vowel_sign .. "])(𑓃?[" .. all_cons .. "])a(𑓃?[" .. gsub(all_cons, "𑒨", "") .. "])([𑓀𑒿]?[" .. vowel .. vowel_sign .. "])" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = 1, length do table.insert(result, mw.ustring.sub(text, length - i + 1, length - i + 1)) end + return table.concat(result) +end + +function export.tr(text, lang, sc) + text = gsub(text, "([" .. all_cons .. "]𑓃?)([" .. vowel .. "𑓂]?)", function(c, d) return c .. (d == "" and "a" or d) end) + + for word in mw.ustring.gmatch(text, "[𑒿a]+") do + local orig_word = word + + word = rev_string(word) + + word = gsub(word, "^a(𑓃?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) + local a = "" + if match(first, "[" .. special_cons .. "]") and match(second, "𑓂") and not perm_cl[first .. second .. third] or match(first .. second, "𑒨[𑒹𑒻𑒲]") then a = "a" end + + return a .. opt .. first .. second .. third + end) + + while match(word, syncope_pattern) do word = gsub(word, syncope_pattern, "%1%2%3%4") end + + word = gsub(word, "(.?)𑓀(.)", function(succ, prev) + local mid = nasal_assim[succ] or "n" + if succ .. prev == "a" then + mid = "𑓃𑓃𑒧" + elseif succ == "" and match(prev, "[" .. vowel .. "]") then + mid = "̃" + end + return succ .. mid .. prev + end) + + local escaped_orig_word = gsub(orig_word, "%+", "") + text = gsub(text, orig_word, rev_string(word)) + text = gsub(text, "𑒖𑓂𑒘", "gy") + end + text = gsub(text, "ā([iu])̃", "ā͠%1") + text = gsub(text, "uu", "u") + text = gsub(text, "aâ", "â") + text = gsub(text, "ii", "i") + text = gsub(text, "([iīaāuūeoâ])a", "%1") + text = gsub(text, "[<>]", "") + text = gsub(text, ".𑓃?", conv) + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/mai-translit.lua b/wiktra/wikt/translit/mai-translit.lua new file mode 100644 index 0000000..65c4b9c --- /dev/null +++ b/wiktra/wikt/translit/mai-translit.lua @@ -0,0 +1,158 @@ +-- Transliteration for Maithili +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["क"] = "k", + ["ख"] = "kh", + ["ग"] = "g", + ["घ"] = "gh", + ["ङ"] = "ṅ", + ["च"] = "c", + ["छ"] = "ch", + ["ज"] = "j", + ["झ"] = "jh", + ["ञ"] = "ñ", + ["ट"] = "ṭ", + ["ठ"] = "ṭh", + ["ड"] = "ḍ", + ["ढ"] = "ḍh", + ["ण"] = "ṇ", + ["त"] = "t", + ["थ"] = "th", + ["द"] = "d", + ["ध"] = "dh", + ["न"] = "n", + ["प"] = "p", + ["फ"] = "ph", + ["ब"] = "b", + ["भ"] = "bh", + ["म"] = "m", + ["य"] = "y", + ["र"] = "r", + ["ल"] = "l", + ["व"] = "v", + ["ळ"] = "ḷ", + ["श"] = "ś", + ["ष"] = "ṣ", + ["स"] = "s", + ["ह"] = "h", + ["क़"] = "q", + ["ख़"] = "x", + ["ग़"] = "ġ", + ["ऴ"] = "ḻ", + ["ज़"] = "z", + ["झ़"] = "ž", + ["ड़"] = "ṛ", + ["ढ़"] = "ṛh", + ["फ़"] = "f", + ["ऩ"] = "ṉ", + ["ऱ"] = "ṟ", + + -- vowel diacritics + ["ि"] = "i", + ["ु"] = "u", + ["े"] = "ē", + ["ॆ"] = "e", + ["ो"] = "ō", + ["ॊ"] = "o", + ["ा"] = "ā", + ["ी"] = "ī", + ["ू"] = "ū", + ["ृ"] = "r̥", + ["ॄ"] = "r̥̄", + ["ै"] = "ai", + ["ौ"] = "au", + ["ॏ"] = "ŏ", + ["ऺ"] = "â", + ["ॅ"] = "ĕ", + + -- vowel signs + ["अ"] = "a", + ["इ"] = "i", + ["उ"] = "u", + ["ए"] = "ē", + ["ऎ"] = "e", + ["ओ"] = "ō", + ["आ"] = "ā", + ["ई"] = "ī", + ["ऊ"] = "ū", + ["व़"] = "u", + ["य़"] = "i", + ["ऋ"] = "r̥", + ["ॠ"] = "r̥̄", + ["ऐ"] = "ai", + ["औ"] = "au", + ["ꣾ"] = "ai", + ["◌ꣿ"] = "ai", + ["ॵ"] = "ŏ", + ["ऒ"] = "o", + ["ॴ"] = "â", + ["ऍ"] = "ĕ", + ["इऺ"] = "ï", + ["उऺ"] = "ü", + ["२"] = "’", + ["ऽ"] = "ô", + ["ँ"] = "̃", -- chandrabindu + ["ं"] = "̃", -- anusvara + ["ः"] = "ḥ", -- visarga + ["्"] = "", -- virama + ["ॐ"] = "om̐", -- om + + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + + -- punctuation + ["।"] = ".", -- danda + ["॥"] = ".", -- double danda + ["+"] = "", -- compound separator + + -- abbreviation sign + ["॰"] = "." +} + +local nasal_assim = {["क"] = "ङ", ["ख"] = "ङ", ["ग"] = "ङ", ["घ"] = "ङ", ["च"] = "ञ", ["छ"] = "ञ", ["ज"] = "ञ", ["झ"] = "ञ", ["ट"] = "ण", ["ठ"] = "ण", ["ड"] = "ण", ["ढ"] = "ण", ["प"] = "म", ["फ"] = "म", ["ब"] = "म", ["भ"] = "म", ["म"] = "म", ["त"] = "न", ["थ"] = "न", ["द"] = "न", ["ध"] = "न", ["न"] = "न"} +local perm_cl = {["म्ल"] = true, ["व्ल"] = true, ["न्ल"] = true} + +local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "छकखगतसहयथडढठपदणधरषटलवब भडचनशम" +local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊऻऻॊॆॏ’", "अइउएओआईऊऋॠऎऒव़य़ॵॳॴऐऔऑऍ" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = length, 1, -1 do table.insert(result, mw.ustring.sub(text, i, i)) end + return table.concat(result) +end +function export.tr(text, lang, sc) + text = gsub(text, "([" .. all_cons .. "]़?)([" .. vowel .. "्]?)", function(c, d) return c .. (d == "" and "a" or d) end) + for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^a(़?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) return (((match(first, "[" .. special_cons .. "]") and match(second, "ं") or match(first, "[" .. special_cons .. "]") and match(second, "्") and not perm_cl[first .. second .. third]) or match(first .. second, "य[aिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]") or match(first .. second, "ह[अaिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]")) and "a" or "") .. opt .. first .. second .. third end) + + word = gsub(word, "(.?)ं(.)", function(succ, prev) return succ .. (succ .. prev == "a" and "्म" or (succ == "" and match(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "̃")) .. prev end) + + local escaped_orig_word = gsub(orig_word, "%+", "") + text = gsub(text, orig_word, rev_string(word)) + text = gsub(text, "ज्ञ", "gy") + end + text = gsub(text, ".़?", conv) + text = gsub(text, "ā([iu])̃", "ā͠%1") + text = gsub(text, "uu", "u") + text = gsub(text, "aâ", "â") + text = gsub(text, "ii", "i") + text = gsub(text, "([iīaāuūeoâ])a", "%1") + text = gsub(text, "[<>]", "") + return mw.ustring.toNFC(text) +end +return export diff --git a/wiktra/wikt/translit/mak-translit.lua b/wiktra/wikt/translit/mak-translit.lua new file mode 100644 index 0000000..43a80e2 --- /dev/null +++ b/wiktra/wikt/translit/mak-translit.lua @@ -0,0 +1,123 @@ +local export = {} + +local tt = {} + +tt["Latn"] = { + ["ka"] = "ᨀ", + ["ki"] = "ᨀᨗ", + ["ku"] = "ᨀᨘ", + ["ke"] = "ᨀᨙ", + ["ko"] = "ᨀᨚ", + ["ga"] = "ᨁ", + ["gi"] = "ᨁᨗ", + ["gu"] = "ᨁᨘ", + ["ge"] = "ᨁᨙ", + ["go"] = "ᨁᨚ", + ["pa"] = "ᨄ", + ["pi"] = "ᨄᨗ", + ["pu"] = "ᨄᨘ", + ["pe"] = "ᨄᨙ", + ["po"] = "ᨄᨚ", + ["ba"] = "ᨅ", + ["bi"] = "ᨅᨗ", + ["bu"] = "ᨅᨘ", + ["be"] = "ᨅᨙ", + ["bo"] = "ᨅᨚ", + ["ma"] = "ᨆ", + ["mi"] = "ᨆᨗ", + ["mu"] = "ᨆᨘ", + ["me"] = "ᨆᨙ", + ["mo"] = "ᨆᨚ", + ["ta"] = "ᨈ", + ["ti"] = "ᨈᨗ", + ["tu"] = "ᨈᨘ", + ["te"] = "ᨈᨙ", + ["to"] = "ᨈᨚ", + ["da"] = "ᨉ", + ["di"] = "ᨉᨗ", + ["du"] = "ᨉᨘ", + ["de"] = "ᨉᨙ", + ["do"] = "ᨉᨚ", + ["na"] = "ᨊ", + ["ni"] = "ᨊᨗ", + ["nu"] = "ᨊᨘ", + ["ne"] = "ᨊᨙ", + ["no"] = "ᨊᨚ", + ["ca"] = "ᨌ", + ["ci"] = "ᨌᨗ", + ["cu"] = "ᨌᨘ", + ["ce"] = "ᨌᨙ", + ["co"] = "ᨌᨚ", + ["ja"] = "ᨍ", + ["ji"] = "ᨍᨗ", + ["ju"] = "ᨍᨘ", + ["je"] = "ᨍᨙ", + ["jo"] = "ᨍᨚ", + ["ya"] = "ᨐ", + ["yi"] = "ᨐᨗ", + ["yu"] = "ᨐᨘ", + ["ye"] = "ᨐᨙ", + ["yo"] = "ᨐᨚ", + ["ra"] = "ᨑ", + ["ri"] = "ᨑᨗ", + ["ru"] = "ᨑᨘ", + ["re"] = "ᨑᨙ", + ["ro"] = "ᨑᨚ", + ["la"] = "ᨒ", + ["li"] = "ᨒᨗ", + ["lu"] = "ᨒᨘ", + ["le"] = "ᨒᨙ", + ["lo"] = "ᨒᨚ", + ["wa"] = "ᨓ", + ["wi"] = "ᨓᨗ", + ["wu"] = "ᨓᨘ", + ["we"] = "ᨓᨙ", + ["wo"] = "ᨓᨚ", + ["sa"] = "ᨔ", + ["si"] = "ᨔᨗ", + ["su"] = "ᨔᨘ", + ["se"] = "ᨔᨙ", + ["so"] = "ᨔᨚ", + ["ha"] = "ᨖ", + ["hi"] = "ᨖᨗ", + ["hu"] = "ᨖᨘ", + ["he"] = "ᨖᨙ", + ["ho"] = "ᨖᨚ", + ["a"] = "ᨕ", + ["i"] = "ᨕᨗ", + ["u"] = "ᨕᨘ", + ["e"] = "ᨕᨙ", + ["o"] = "ᨕᨚ", + + ["nga"] = "ᨂ", + ["ngi"] = "ᨂᨗ", + ["ngu"] = "ᨂᨘ", + ["nge"] = "ᨂᨙ", + ["ngo"] = "ᨂᨚ", + ["nya"] = "ᨎ", + ["nyi"] = "ᨎᨗ", + ["nyu"] = "ᨎᨘ", + ["nye"] = "ᨎᨙ", + ["nyo"] = "ᨎᨚ" +}; + +function export.tr(text, lang, sc) + if (sc == "Latn") then + -- Lowercase + text = mw.ustring.lower(text) + -- Remove glottal stop + text = mw.ustring.gsub(text, "\'", "") + -- Replace character clusters + text = mw.ustring.gsub(text, "ng[aeiou]", tt[sc]) + text = mw.ustring.gsub(text, "ny[aeiou]", tt[sc]) + -- Replace the rest of the characters + text = mw.ustring.gsub(text, "[kgnpbmtdnrcjyclwsh][aeiou]", tt[sc]) + -- Replace one-vowel syllables + text = mw.ustring.gsub(text, "[aeiou]", tt[sc]) + -- Remove left-overs + text = mw.ustring.gsub(text, "[kgnpbmtdnrcjyclwsh-]", "") + end + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/mani-translit.lua b/wiktra/wikt/translit/mani-translit.lua new file mode 100644 index 0000000..2a6848f --- /dev/null +++ b/wiktra/wikt/translit/mani-translit.lua @@ -0,0 +1,60 @@ +local export = {} + +local tt = { + ["𐫀"] = "ʾ", -- aleph + ["𐫁"] = "b", -- beth + ["𐫂"] = "β", -- bheth + ["𐫃"] = "g", -- gimel + ["𐫄"] = "ɣ", -- ghimel + ["𐫅"] = "d", -- daleth + ["𐫆"] = "ẖ", -- he + ["𐫇"] = "w", -- waw + ["𐫈"] = "ẉ̇", -- ud (conjunction) + ["𐫉"] = "z", -- zayin + ["𐫊"] = "ž", -- zhayin + ["𐫋"] = "j", -- jayin + ["𐫌"] = "ǰ", -- jhayin + ["𐫍"] = "h", -- heth + ["𐫎"] = "ṯ", -- teth + ["𐫏"] = "y", -- yodh + ["𐫐"] = "k", -- kaph + ["𐫑"] = "k̇", -- xaph + ["𐫒"] = "k̈", -- khaph + ["𐫓"] = "l", -- lamedh + ["𐫔"] = "δ", -- dhamedh + ["𐫕"] = "θ", -- thamedh + ["𐫖"] = "m", -- mem + ["𐫗"] = "n", -- nun + ["𐫘"] = "s", -- samekh + ["𐫙"] = "ʿ", -- ayin + ["𐫚"] = "ʿ̈ ", -- aayin + ["𐫛"] = "p", -- pe + ["𐫜"] = "f", -- fe + ["𐫝"] = "c", -- sadhe + ["𐫞"] = "q", -- qoph + ["𐫟"] = "x", -- xoph + ["𐫠"] = "q̈", -- qhoph + ["𐫡"] = "r", -- resh + ["𐫢"] = "š", -- shin + ["𐫣"] = "ś", -- sshin + ["𐫤"] = "t", -- taw + ["◌𐫥"] = "̃", -- abbreviation mark above + ["◌𐫦"] = "̃", -- abbreviation mark below + ["𐫫"] = "1", -- one + ["𐫬"] = "5", -- five + ["𐫭"] = "10", -- ten + ["𐫮"] = "20", -- twenty + ["𐫯"] = "100" -- one hundred +} + +function export.tr(text, lang, sc) + -- If the script is not Mani, do not transliterate + if sc ~= "Mani" then return end + + -- Transliterate characters + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/mdf-translit.lua b/wiktra/wikt/translit/mdf-translit.lua new file mode 100644 index 0000000..16f8af3 --- /dev/null +++ b/wiktra/wikt/translit/mdf-translit.lua @@ -0,0 +1,19 @@ +local export = {} + +local tab = {["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Jo", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "X", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Щ"] = "Šč", ["Ъ"] = "ʺ", ["Ы"] = "Y", ["Ь"] = "ʹ", ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ja", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "jo", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "x", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["щ"] = "šč", ["ъ"] = "ʺ", ["ы"] = "y", ["ь"] = "ʹ", ["э"] = "e", ["ю"] = "ju", ["я"] = "ja"} + +function export.tr(text, lang, sc) + -- Ё needs converting if is decomposed + text = text:gsub("ё", "ё"):gsub("Ё", "Ё") + + -- е after a vowel or at the beginning of a word becomes je + text = mw.ustring.gsub(text, "([АОУЫЕЯЁЮИЕЪЬаоуыэяёюиеъь%A][́̀]?)е", "%1je") + text = mw.ustring.gsub(text, "^Е", "Je") + text = mw.ustring.gsub(text, "^е", "je") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е", "%1Je") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е", "%1je") + + return (mw.ustring.gsub(text, ".", tab)) +end + +return export diff --git a/wiktra/wikt/translit/mk-translit.lua b/wiktra/wikt/translit/mk-translit.lua new file mode 100644 index 0000000..e0014cf --- /dev/null +++ b/wiktra/wikt/translit/mk-translit.lua @@ -0,0 +1,7 @@ +local export = {} + +local tt = {["А"] = "A", ["а"] = "a", ["Б"] = "B", ["б"] = "b", ["В"] = "V", ["в"] = "v", ["Г"] = "G", ["г"] = "g", ["Ѓ"] = "Ǵ", ["ѓ"] = "ǵ", ["Д"] = "D", ["д"] = "d", ["Е"] = "E", ["е"] = "e", ["Ѐ"] = "È", ["ѐ"] = "è", ["Ж"] = "Ž", ["ж"] = "ž", ["З"] = "Z", ["з"] = "z", ["Ѕ"] = "Dz", ["ѕ"] = "dz", ["И"] = "I", ["и"] = "i", ["Ѝ"] = "Ì", ["ѝ"] = "ì", ["Ј"] = "J", ["ј"] = "j", ["К"] = "K", ["к"] = "k", ["Л"] = "L", ["л"] = "l", ["Љ"] = "Lj", ["љ"] = "lj", ["М"] = "M", ["м"] = "m", ["Н"] = "N", ["н"] = "n", ["Њ"] = "Nj", ["њ"] = "nj", ["О"] = "O", ["о"] = "o", ["П"] = "P", ["п"] = "p", ["Р"] = "R", ["р"] = "r", ["С"] = "S", ["с"] = "s", ["Т"] = "T", ["т"] = "t", ["Ќ"] = "Ḱ", ["ќ"] = "ḱ", ["У"] = "U", ["у"] = "u", ["Ф"] = "F", ["ф"] = "f", ["Х"] = "H", ["х"] = "h", ["Ц"] = "C", ["ц"] = "c", ["Ч"] = "Č", ["ч"] = "č", ["Џ"] = "Dž", ["џ"] = "dž", ["Ш"] = "Š", ["ш"] = "š"}; + +function export.tr(text, lang, sc) return (mw.ustring.gsub(text, ".", tt)) end + +return export diff --git a/wiktra/wikt/translit/ml-translit.lua b/wiktra/wikt/translit/ml-translit.lua new file mode 100644 index 0000000..a43ecd8 --- /dev/null +++ b/wiktra/wikt/translit/ml-translit.lua @@ -0,0 +1,101 @@ +local export = {} + +local consonants = {["ക"] = "k", ["ഖ"] = "kh", ["ഗ"] = "g", ["ഘ"] = "gh", ["ങ"] = "ṅ", ["ച"] = "c", ["ഛ"] = "ch", ["ജ"] = "j", ["ഝ"] = "jh", ["ഞ"] = "ñ", ["ട"] = "ṭ", ["ഠ"] = "ṭh", ["ഡ"] = "ḍ", ["ഢ"] = "ḍh", ["ണ"] = "ṇ", ["ത"] = "t", ["ഥ"] = "th", ["ദ"] = "d", ["ധ"] = "dh", ["ന"] = "n", ["പ"] = "p", ["ഫ"] = "ph", ["ബ"] = "b", ["ഭ"] = "bh", ["മ"] = "m", ["യ"] = "y", ["ര"] = "r", ["ല"] = "l", ["വ"] = "v", ["ശ"] = "ś", ["ഷ"] = "ṣ", ["സ"] = "s", ["ഹ"] = "h", ["ള"] = "ḷ", ["ഴ"] = "ḻ", ["റ"] = "ṟ", ["ഩ"] = "ṉ", ["ഺ"] = "ṯ"} + +local diacritics = { + ["\224\181\129\224\181\141"] = "ŭ", + ["\224\180\190"] = "ā", + ["\224\180\191"] = "i", + ["\224\181\128"] = "ī", + ["\224\181\129"] = "u", + ["\224\181\130"] = "ū", + ["\224\181\131"] = "ṛ", + ["\224\181\132"] = "ṝ", + ["\224\181\134"] = "e", + ["\224\181\135"] = "ē", + ["\224\181\136"] = "ai", + ["\224\181\138"] = "o", + ["\224\181\139"] = "ō", + ["\224\181\151"] = "au", + ["\224\181\162"] = "l̥ ", + ["\224\181\163"] = "l̥̄", + -- virama, supresses the inherent vowel "a" + ["\224\181\141"] = "", + -- no diacritic + [""] = "a" +} + +local nonconsonants = { + -- vowels + ["അ"] = "a", + ["ആ"] = "ā", + ["ഇ"] = "i", + ["ഈ"] = "ī", + ["ഉ"] = "u", + ["ഊ"] = "ū", + ["ഋ"] = "ṛ", + ["ൠ"] = "ṝ", + ["ഌ"] = "l̥", + ["ൡ"] = "l̥̄", + ["എ"] = "e", + ["ഏ"] = "ē", + ["ഐ"] = "ai", + ["ഒ"] = "o", + ["ഓ"] = "ō", + ["ഔ"] = "au", + -- other symbols + ["ം"] = "ṃ", -- anusvara + ["ഃ"] = "ḥ", -- visarga + ["ഽ"] = "’", -- praślēṣam + -- chillus, consonants that never take vowels + ["ൺ"] = "ṇ", + ["ൻ"] = "n", + ["ർ"] = "r", + ["ൽ"] = "l", + ["ൾ"] = "ḷ", + ["ൿ"] = "k", + ["ൎ"] = "r", + -- digits + ["൦"] = "0", + ["൧"] = "1", + ["൨"] = "2", + ["൩"] = "3", + ["൪"] = "4", + ["൫"] = "5", + ["൬"] = "6", + ["൭"] = "7", + ["൮"] = "8", + ["൯"] = "9", + ["൰"] = "10", + ["൱"] = "100", + ["൲"] = "1000", + ["൳"] = "¼", + ["൴"] = "½", + ["൵"] = "¾" +} + +-- translit any words or phrases +function export.tr(text, lang, sc) + local VIRAMA = "്" + + -- final virama rules + text = mw.ustring.gsub(text, VIRAMA .. "([%,%.%!%?%:%;]?)$", VIRAMA .. "ŭ%1") + text = mw.ustring.gsub(text, VIRAMA .. "([%,%.%!%?%:%;]?) ", VIRAMA .. "ŭ%1 ") + + text = mw.ustring.gsub(text, "([കഖഗഘങചഛജഝഞടഠഡഢണതഥദധനപഫബഭമയരലവശഷസഹളഴറഩഺ])" .. "(\224\181\129?[\224\180\190\224\180\191\224\181\128\224\181\129\224\181\130\224\181\131\224\181\132\224\181\162\224\181\163\224\181\134\224\181\135\224\181\138\224\181\139\224\181\136\224\181\151\224\181\141]?)", function(c, d) return consonants[c] .. (diacritics[d] or d) end) + + text = mw.ustring.gsub(text, ".", nonconsonants) + + -- anusvara + text = mw.ustring.gsub(text, "ṃ([kgṅ])", "ṅ%1") + text = mw.ustring.gsub(text, "ṃ([cjñ])", "ñ%1") + text = mw.ustring.gsub(text, "ṃ([ṭḍṇ])", "ṇ%1") + text = mw.ustring.gsub(text, "ṃ([tdn])", "n%1") + text = mw.ustring.gsub(text, "ṃ([pbm])", "m%1") + + text = mw.ustring.gsub(text, "ŭ ([%,%.%!%?%:%;]?)([aāiīuūeo])", " %1%2") -- ŭ is elided before vowels + + return text +end + +return export diff --git a/wiktra/wikt/translit/mn-translit.lua b/wiktra/wikt/translit/mn-translit.lua new file mode 100644 index 0000000..29f1c22 --- /dev/null +++ b/wiktra/wikt/translit/mn-translit.lua @@ -0,0 +1,106 @@ +local export = {} + +local trfunc = {} + +local tt_Cyrl = { + ["Щ"] = "Sh", + ["щ"] = "sh", + ["Ы"] = "Y", + ["ы"] = "y", + ["Э"] = "E", + ["э"] = "e", + ["Я"] = "Ya", + ["я"] = "ya", + ["У"] = "U", + ["у"] = "u", + ["С"] = "S", + ["с"] = "s", + ["Ч"] = "Ch", + ["ч"] = "ch", + ["Х"] = "Kh", + ["х"] = "kh", + ["Ж"] = "J", + ["ж"] = "j", + ["Д"] = "D", + ["д"] = "d", + ["В"] = "V", + ["в"] = "v", + ["А"] = "A", + ["а"] = "a", + ["М"] = "M", + ["м"] = "m", + ["О"] = "O", + ["о"] = "o", + ["И"] = "I", + ["и"] = "i", + ["К"] = "K", + ["к"] = "k", + ["Ш"] = "Sh", + ["ш"] = "sh", + ["Ъ"] = "I", + ["ъ"] = "i", + ["Ё"] = "Yo", + ["ё"] = "yo", + ["Ь"] = "I", + ["ь"] = "i", + ["Ю"] = "Yu", + ["ю"] = "yu", + ["Т"] = "T", + ["т"] = "t", + ["Р"] = "R", + ["р"] = "r", + ["Ц"] = "Ts", + ["ц"] = "ts", + ["Ф"] = "F", + ["ф"] = "f", + ["З"] = "Z", + ["з"] = "z", + ["Е"] = "E", + ["е"] = "e", + ["Г"] = "G", + ["г"] = "g", + ["Б"] = "B", + ["б"] = "b", + ["Н"] = "N", + ["н"] = "n", + ["П"] = "P", + ["п"] = "p", + ["Й"] = "I", + ["й"] = "i", + ["Л"] = "L", + ["л"] = "l", + ["Ү"] = "Ü", + ["ү"] = "ü", + ["Ө"] = "Ö", + ["ө"] = "ö" +} + +local tt_Mong = {["ᠠ"] = "a", ["ᠡ"] = "e", ["ᠢ"] = "i", ["ᠣ"] = "o", ["ᠤ"] = "u", ["ᠥ"] = "ö", ["ᠦ"] = "ü", ["ᠧ"] = "ē", ["ᠨ"] = "n", ["ᠩ"] = "ng", ["ᠪ"] = "b", ["ᠫ"] = "p", ["ᠬ"] = "q", ["ᢉ"] = "k", ["ᠭ"] = "ɣ", ["ᢉ"] = "g", ["ᠮ"] = "m", ["ᠯ"] = "l", ["ᠰ"] = "s", ["ᠱ"] = "š", ["ᠲ"] = "t", ["ᠳ"] = "d", ["ᠴ"] = "č", ["ᠵ"] = "ǰ", ["ᠶ"] = "y", ["ᠷ"] = "r", ["ᠸ"] = "w", ["ᠹ"] = "f", ["ᠺ"] = "g", ["ᠻ"] = "k", ["ᠼ"] = "c", ["ᠽ"] = "z", ["ᠾ"] = "h", ["ᠿ"] = "ž", ["ᡀ"] = "lh", ["ᡁ"] = "zh", ["ᡂ"] = "ch", ["᠋"] = "", ["᠌"] = "", ["᠍"] = "", ["᠎"] = "-", ["︖"] = "?", ["︕"] = "!", ["᠂"] = ",", ["᠃"] = ".", [" "] = "-", ["᠊"] = "-"} + +function trfunc.Cyrl(text) + text = mw.ustring.gsub(text, "([АОУЫЕЯЁЮИЕЪЬаоуыэяёюиеъь%A][\204\129\204\128]?)([Ее])", function(a, e) return a .. (e == "е" and "ye" or "Ye") end):gsub("^Е", "Ye"):gsub("^е", "ye") + return (mw.ustring.gsub(text, ".", tt_Cyrl)) +end + +function trfunc.Mong(text) + local velar_conv = {["q"] = "k", ["ɣ"] = "g"} + text = mw.ustring.gsub(text, ".", tt_Mong) + text = mw.ustring.gsub(text, "([qɣ])(.?)", function(velar, vowel) return ((mw.ustring.match(vowel, "[eöü ]") or vowel == "") and mw.ustring.gsub(velar, "[qɣ]", velar_conv) or velar) .. vowel end) + return text +end + +function export.tr(text, lang, sc) + if not sc then + sc = require("scripts").findBestScript(text, require("languages").getByCode("mn")) + if sc then + sc = sc:getCode() + else + return nil + end + end + + local func = trfunc[sc] + if func then return trfunc[sc](text) end +end + +return export diff --git a/wiktra/wikt/translit/mnc-Latn-translit.lua b/wiktra/wikt/translit/mnc-Latn-translit.lua new file mode 100644 index 0000000..dabc969 --- /dev/null +++ b/wiktra/wikt/translit/mnc-Latn-translit.lua @@ -0,0 +1,17 @@ +local export = {} + +local vowel = {["a"] = "ᠠ", ["e"] = "ᡝ", ["i"] = "ᡳ", ["y"] = "ᡟ", ["o"] = "ᠣ", ["u"] = "ᡠ", ["ū"] = "ᡡ"} + +local consonant = {["n"] = "ᠨ", ["ng"] = "ᠩ", ["k"] = "ᡴ", ["g"] = "ᡤ", ["h"] = "ᡥ", ["b"] = "ᠪ", ["p"] = "ᡦ", ["s"] = "ᠰ", ["š"] = "ᡧ", ["t"] = "ᡨ", ["d"] = "ᡩ", ["l"] = "ᠯ", ["m"] = "ᠮ", ["c"] = "ᠴ", ["j"] = "ᠵ", ["y"] = "ᠶ", ["r"] = "ᡵ", ["f"] = "ᡶ", ["w"] = "ᠸ", ["k'"] = "ᠺ", ["g'"] = "ᡬ", ["h'"] = "ᡭ", ["ts'"] = "ᡮ", ["ts"] = "ᡮᡟ", ["dz"] = "ᡯ", ["ž"] = "ᡰ", ["sy"] = "ᠰᡟ", ["c'"] = "ᡱ", ["c'y"] = "ᡱᡳ", ["j"] = "ᡷ", ["jy"] = "ᡷᡳ"} + +function export.tr(text) + if type(text) == "table" then text = text:getParent().args[1] end + text = mw.ustring.gsub(text, "ng", consonant) + text = mw.ustring.gsub(text, "ts", consonant) + text = mw.ustring.gsub(text, "dz", consonant) + text = mw.ustring.gsub(text, "[^aeiouūy]'?y?", consonant) + text = mw.ustring.gsub(text, ".", vowel) + return text +end + +return export diff --git a/wiktra/wikt/translit/mnc-translit.lua b/wiktra/wikt/translit/mnc-translit.lua new file mode 100644 index 0000000..4a10fad --- /dev/null +++ b/wiktra/wikt/translit/mnc-translit.lua @@ -0,0 +1,12 @@ +local export = {} + +local conv = {["ᠠ"] = "a", ["ᡝ"] = "e", ["ᡳ"] = "i", ["ᡟ"] = "y", ["ᠣ"] = "o", ["ᡠ"] = "u", ["ᡡ"] = "ū", ["ᠨ"] = "n", ["ᠩ"] = "ng", ["ᡴ"] = "k", ["ᡤ"] = "g", ["ᡥ"] = "h", ["ᠪ"] = "b", ["ᡦ"] = "p", ["ᠰ"] = "s", ["ᡧ"] = "š", ["ᡨ"] = "t", ["ᡩ"] = "d", ["ᠯ"] = "l", ["ᠮ"] = "m", ["ᠴ"] = "c", ["ᠵ"] = "j", ["ᠶ"] = "y", ["ᡵ"] = "r", ["ᡶ"] = "f", ["ᠸ"] = "w", ["ᠺ"] = "k'", ["ᡬ"] = "g'", ["ᡭ"] = "h'", ["ᡮ"] = "ts'", ["ᡮᡟ"] = "ts", ["ᡯ"] = "dz", ["ᡰ"] = "ž", ["ᠰᡟ"] = "sy", ["ᡱ"] = "c'", ["ᡱᡳ"] = "c'y", ["ᡷ"] = "j", ["ᡷᡳ"] = "jy", ["᠎"] = "-", ["᠈"] = ",", ["᠉"] = ".", [" "] = "-", ["᠊"] = "-"} + +function export.tr(text, lang, sc) + if sc ~= "Mong" then return nil end + + text = mw.ustring.gsub(text, "([ᠠᡝᡳᡟᠣᡠᡡᠨᠩᡴᡤᡥᠪᡦᠰᡧᡨᡩᠯᠮᠴᠵᠶᡵᡶᠸᠺᡬᡭᡮᡯᡰᡱᡷ᠈᠉])([ᡟᡳ]?)", function(a, b) return conv[a .. b] or conv[a] .. conv[b] end) + return text +end + +return export diff --git a/wiktra/wikt/translit/mns-translit.lua b/wiktra/wikt/translit/mns-translit.lua new file mode 100644 index 0000000..906167d --- /dev/null +++ b/wiktra/wikt/translit/mns-translit.lua @@ -0,0 +1,96 @@ +local export = {} + +local tt = { + ["а"] = "a", + ["а̄"] = "ā", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["е̄"] = "ē", + ["ё"] = "ë", + ["ё̄"] = "ë̄", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["ӣ"] = "ī", + ["й"] = "j", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["ӈ"] = "ň", + ["о"] = "o", + ["о̄"] = "ō", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ӯ"] = "ū", + ["ф"] = "f", + ["х"] = "h", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "ŝ", + ["ъ"] = "ʺ", + ["ы"] = "y", + ["ы̄"] = "ȳ", + ["ь"] = "ʹ", + ["э"] = "è", + ["э̄"] = "è̄", + ["ю"] = "û", + ["ю̄"] = "û̄", + ["я"] = "â", + ["я̄"] = "â̄", + ["А"] = "A", + ["А̄"] = "Ā", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Е̄"] = "Ē", + ["Ё"] = "Ë", + ["Ё̄"] = "Ë̄", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Ӣ"] = "Ī", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["Ӈ"] = "Ň", + ["О"] = "O", + ["О̄"] = "Ō", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ӯ"] = "Ū", + ["Ф"] = "F", + ["Х"] = "H", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Ŝ", + ["Ъ"] = "ʺ", + ["Ы"] = "Y", + ["Ы̄"] = "Ȳ", + ["Ь"] = "ʹ", + ["Э"] = "È", + ["Э̄"] = "È̄", + ["Ю"] = "Û", + ["Ю̄"] = "Û̄", + ["Я"] = "Â", + ["Я̄"] = "Â̄" +}; + +function export.tr(text) return (mw.ustring.gsub(text, ".", tt)) end + +return export diff --git a/wiktra/wikt/translit/mr-translit.lua b/wiktra/wikt/translit/mr-translit.lua new file mode 100644 index 0000000..c35f1a8 --- /dev/null +++ b/wiktra/wikt/translit/mr-translit.lua @@ -0,0 +1,155 @@ +-- Transliteration for Marathi (in progress) +local export = {} +local gsub = mw.ustring.gsub +local find = mw.ustring.find + +local conv = { + -- consonants + ["क"] = "k", + ["ख"] = "kh", + ["ग"] = "g", + ["घ"] = "gh", + ["ङ"] = "ṅ", + ["च"] = "c", + ["छ"] = "ch", + ["ज"] = "j", + ["झ"] = "jh", + ["ञ"] = "ñ", + ["ट"] = "ṭ", + ["ठ"] = "ṭh", + ["ड"] = "ḍ", + ["ढ"] = "ḍh", + ["ण"] = "ṇ", + ["त"] = "t", + ["थ"] = "th", + ["द"] = "d", + ["ध"] = "dh", + ["न"] = "n", + ["प"] = "p", + ["फ"] = "ph", + ["ब"] = "b", + ["भ"] = "bh", + ["म"] = "m", + ["य"] = "y", + ["र"] = "r", + ["ल"] = "l", + ["व"] = "v", + ["ळ"] = "ḷ", + ["श"] = "ś", + ["ष"] = "ṣ", + ["स"] = "s", + ["ह"] = "h", + ["ऱ"] = "r", + -- ['ज्ञ'] = 'dny', + + -- special nuqta consonants ONLY FOR [[MOD:mr-IPA]] not mainspace entries + ["ज़"] = "j̈", + ["झ़"] = "j̈h", + ["च़"] = "ċ", + ["छ़"] = "ċh", + + -- vowel diacritics + ["ि"] = "i", + ["ु"] = "u", + ["े"] = "e", + ["ो"] = "o", + ["ा"] = "ā", + ["ी"] = "ī", + ["ू"] = "ū", + ["ृ"] = "ru", + ["ै"] = "ai", + ["ौ"] = "au", + ["ॉ"] = "ŏ", + ["ॅ"] = "ĕ", + + -- vowel signs + ["अ"] = "a", + ["इ"] = "i", + ["उ"] = "u", + ["ए"] = "e", + ["ओ"] = "o", + ["आ"] = "ā", + ["ई"] = "ī", + ["ऊ"] = "ū", + ["ऋ"] = "ŕ", + ["ऐ"] = "ai", + ["औ"] = "au", + ["ऑ"] = "ŏ", + ["ॲ"] = "ĕ", + ["ऍ"] = "ĕ", + + ["ॐ"] = "om", + + -- chandrabindu + ["ँ"] = "̃", + + -- anusvara + ["ं"] = "ṁ", + + -- visarga + ["ः"] = "ḥ", + + -- virama + ["्"] = "", + + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + + -- punctuation + ["।"] = ".", -- danda + ["॥"] = ".", -- double danda + ["+"] = "", -- compound separator + + -- abbreviation sign + ["॰"] = "." +} + +local nasal_assim = {["क"] = "ङ", ["ख"] = "ङ", ["ग"] = "ङ", ["घ"] = "ङ", ["च"] = "ञ", ["छ"] = "ञ", ["ज"] = "ञ", ["झ"] = "ञ", ["ट"] = "ण", ["ठ"] = "ण", ["ड"] = "ण", ["ढ"] = "ण", ["प"] = "म", ["फ"] = "म", ["ब"] = "म", ["भ"] = "म", ["म"] = "म", ["य"] = "इ", ["र"] = "उ", ["ल"] = "ल", ["व"] = "उ", ["श"] = "उ", ["ष"] = "उ", ["स"] = "उ", ["ह"] = "उ"} + +local perm_cl = {["म्ल"] = true, ["व्ल"] = true, ["न्ल"] = true} + +local all_cons, special_cons = "कखगघङचछजझञटठडढतथदधपफबभशषसयरलवहणनमळ", "दतयरलवहनम" +local vowel, vowel_sign = "%*aिुृेोाीूैौॉॅ", "अइउएओआईऊऋऐऔऑऍ" +local syncope_pattern = "([" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])a(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])" + +local function rev_string(text) + local char_array, i = {}, 1 + for char in string.gmatch(text, "[%z\1-\127\194-\244][\128-\191]*") do -- UTF-8 character pattern + char_array[i] = char + i = i + 1 + end + return table.concat(require("table").reverse(char_array)) +end + +function export.tr(text, lang, sc) + text = gsub(text, "ाँ", "ॉ" .. "ं") + text = gsub(text, "ँ", "ॅ" .. "ं") + text = gsub(text, "([^" .. vowel .. vowel_sign .. "])ं ", "%1अ ") + text = gsub(text, "([^" .. vowel .. vowel_sign .. "])ं$", "%1अ") + text = gsub(text, "([" .. all_cons .. "]़?)([" .. vowel .. "्]?)", function(c, d) return c .. (d == "" and "a" or d) end) + for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^a(़?[" .. all_cons .. "][" .. vowel .. vowel_sign .. "])", "%1") + while find(word, syncope_pattern) do word = gsub(word, syncope_pattern, "%1%2%3%4") end + word = gsub(word, "(.?)ं(.)", function(succ, prev) return succ .. (succ .. prev == "a" and "्म" or (succ == "" and find(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "n")) .. prev end) + text = gsub(text, orig_word, rev_string(word)) + end + text = gsub(text, ".़?", conv) + text = gsub(text, "a([iu])̃", "a͠%1") + text = gsub(text, "aa", "a") + text = gsub(text, "ñjñ", "ndny") + text = gsub(text, "jñ", "dny") + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/my-pron.lua b/wiktra/wikt/translit/my-pron.lua new file mode 100644 index 0000000..2cf6ee0 --- /dev/null +++ b/wiktra/wikt/translit/my-pron.lua @@ -0,0 +1,502 @@ +local export = {} +local gsub = mw.ustring.gsub +local sub = mw.ustring.sub +local match = mw.ustring.match + +local system_list = {{1, ["type"] = "phonetic", ["name"] = "IPA"}, {2, ["type"] = "orthographic", ["name"] = "MLCTS"}, {3, ["type"] = "orthographic", ["name"] = "ALA-LC"}, {4, ["type"] = "phonetic", ["name"] = "BGN/PCGN"}, {5, ["type"] = "phonetic", ["name"] = "Okell"}} + +local initial_table = { + ["က"] = {"k", "k", "k", "k", "k"}, + ["ကျ"] = {"t͡ɕ", "ky", "ky", "ky", "c"}, + ["ကြ"] = {"t͡ɕ", "kr", "kr", "ky", "c"}, + ["ကျွ"] = {"t͡ɕw", "kyw", "kyv", "kyw", "cw"}, + ["ကြွ"] = {"t͡ɕw", "krw", "krv", "kyw", "cw"}, + ["ကွ"] = {"kw", "kw", "kv", "kw", "kw"}, + ["ခ"] = {"kʰ", "hk", "kh", "hk", "hk"}, + ["ချ"] = {"t͡ɕʰ", "hky", "khy", "ch", "hc"}, + ["ခြ"] = {"t͡ɕʰ", "hkr", "khr", "ch", "hc"}, + ["ချွ"] = {"t͡ɕʰw", "hkyw", "khyv", "chw", "hcw"}, + ["ခြွ"] = {"t͡ɕʰw", "hkrw", "khrv", "chw", "hcw"}, + ["ခွ"] = {"kʰw", "hkw", "khv", "hkw", "hkw"}, + ["ဂ"] = {"ɡ", "g", "g", "g", "g"}, + ["ဂျ"] = {"d͡ʑ", "gy", "gy", "gy", "j"}, + ["ဂြ"] = {"d͡ʑ", "gr", "gr", "gy", "j"}, + ["ဂျွ"] = {"d͡ʑw", "gyw", "gyv", "gyw", "jw"}, + ["ဂွ"] = {"ɡw", "gw", "gv", "gw", "gw"}, + ["ဃ"] = {"ɡ", "gh", "gh", "g", "g"}, + ["င"] = {"ŋ", "ng", "ṅ", "ng", "ng"}, + ["ငှ"] = {"ŋ̊", "hng", "ṅh", "hng", "hng"}, + ["ငြ"] = {"ɲ", "ngr", "ṅr", "ny", "ny"}, + ["ငြှ"] = {"ɲ̊", "hngr", "ṅrh", "hny", "hny"}, + ["ငွ"] = {"ŋw", "ngw", "ṅv", "ngw", "ngw"}, + ["ငွှ"] = {"ŋ̊w", "hngw", "ṅvh", "hngw", "hngw"}, + ["စ"] = {"s", "c", "c", "s", "s"}, + ["စွ"] = {"sw", "cw", "cv", "sw", "sw"}, + ["ဆ"] = {"sʰ", "hc", "ch", "hs", "hs"}, + ["ဆွ"] = {"sʰw", "hcw", "chv", "hsw", "hsw"}, + ["ဇ"] = {"z", "j", "j", "z", "z"}, + ["ဇွ"] = {"zw", "jw", "jv", "zw", "zw"}, + ["ဈ"] = {"z", "jh", "jh", "z", "z"}, + ["ဉ"] = {"ɲ", "ny", "ñ", "ny", "ny"}, + ["ည"] = {"ɲ", "ny", "ññ", "ny", "ny"}, + ["ဉှ"] = {"ɲ̊", "hny", "ñh", "hny", "hny"}, + ["ညှ"] = {"ɲ̊", "hny", "ññh", "hny", "hny"}, + ["ညွ"] = {"ɲw", "nyw", "ñv", "nyw", "nyw"}, + ["ညွှ"] = {"ɲ̊w", "hnyw", "ñvh", "hnyw", "hnyw"}, + ["ဋ"] = {"t", "t", "ṭ", "t", "t"}, + ["ဌ"] = {"tʰ", "ht", "ṭh", "ht", "ht"}, + ["ဍ"] = {"d", "d", "ḍ", "d", "d"}, + ["ဎ"] = {"d", "dh", "ḍh", "d", "d"}, + ["ဏ"] = {"n", "n", "ṇ", "n", "n"}, + ["ဏှ"] = {"n̥", "hn", "ṇh", "hn", "hn"}, + ["တ"] = {"t", "t", "t", "t", "t"}, + ["တျ"] = {"tj", "ty", "ty", "ty", "ty"}, + ["တြ"] = {"tɹ", "tr", "tr", "tr", "tr"}, + ["တွ"] = {"tw", "tw", "tv", "tw", "tw"}, + ["ထ"] = {"tʰ", "ht", "th", "ht", "ht"}, + ["ထွ"] = {"tʰw", "htw", "thv", "htw", "htw"}, + ["ဒ"] = {"d", "d", "d", "d", "d"}, + ["ဒျ"] = {"dj", "dy", "dy", "dy", "dy"}, + ["ဒြ"] = {"dɹ", "dr", "dr", "dr", "dr"}, + ["ဒွ"] = {"dw", "dw", "dv", "dw", "dw"}, + ["ဓ"] = {"d", "dh", "dh", "d", "d"}, + ["န"] = {"n", "n", "n", "n", "n"}, + ["နှ"] = {"n̥", "hn", "nh", "hn", "hn"}, + ["နျ"] = {"nj", "ny", "ny", "ny", "ny"}, + ["နွ"] = {"nw", "nw", "nv", "nw", "nw"}, + ["နွှ"] = {"n̥w", "hnw", "nvh", "hnw", "hnw"}, + ["ပ"] = {"p", "p", "p", "p", "p"}, + ["ပျ"] = {"pj", "py", "py", "py", "py"}, + ["ပြ"] = {"pj", "pr", "pr", "py", "py"}, + ["ပြွ"] = {"pw", "prw", "prv", "pw", "pw"}, + ["ပွ"] = {"pw", "pw", "pv", "pw", "pw"}, + ["ဖ"] = {"pʰ", "hp", "ph", "hp", "hp"}, + ["ဖျ"] = {"pʰj", "hpy", "phy", "hpy", "hpy"}, + ["ဖြ"] = {"pʰj", "hpr", "phr", "hpy", "hpy"}, + ["ဖွ"] = {"pʰw", "hpw", "phv", "hpw", "hpw"}, + ["ဗ"] = {"b", "b", "b", "b", "b"}, + ["ဗျ"] = {"bj", "by", "by", "by", "by"}, + ["ဗြ"] = {"bj", "br", "br", "by", "by"}, + ["ဗွ"] = {"bw", "bw", "bv", "bw", "bw"}, + ["ဘ"] = {"b", "bh", "bh", "b", "b"}, + ["-ဘ"] = {"pʰ", "bh", "bh", "hp", "hp"}, + ["ဘွ"] = {"bw", "bhw", "bhv", "bw", "bw"}, + ["-ဘွ"] = {"pʰw", "bhw", "bhw", "hpw", "hpw"}, + ["မ"] = {"m", "m", "m", "m", "m"}, + ["မှ"] = {"m̥", "hm", "mh", "hm", "hm"}, + ["မျ"] = {"mj", "my", "my", "my", "my"}, + ["မျှ"] = {"m̥j", "hmy", "myh", "hmy", "hmy"}, + ["မြ"] = {"mj", "mr", "mr", "my", "my"}, + ["မြှ"] = {"m̥j", "hmr", "mrh", "hmy", "hmy"}, + ["မြွ"] = {"mjw", "mrw", "mrv", "myw", "myw"}, + ["မြွှ"] = {"m̥w", "hmrw", "mrvh", "hmw", "hmw"}, + ["မွ"] = {"mw", "mw", "mv", "mw", "mw"}, + ["မွှ"] = {"m̥w", "hmw", "mvh", "hmw", "hmw"}, + ["ယ"] = {"j", "y", "y", "y", "y"}, + ["ယှ"] = {"ʃ", "hy", "yh", "sh", "hy"}, + ["သျှ"] = {"ʃ", "hsy", "syh", "sh", "hy"}, + ["ယွ"] = {"jw", "yw", "yv", "yw", "yw"}, + ["ရ"] = {"j", "r", "r", "y", "y"}, + ["*ရ"] = {"ɹ", "r", "r", "r", "r"}, + ["ရှ"] = {"ʃ", "hr", "rh", "sh", "hy"}, + ["ရွ"] = {"jw", "rw", "rv", "yw", "yw"}, + ["ရွှ"] = {"ʃw", "hrw", "rvh", "shw", "hyw"}, + ["လ"] = {"l", "l", "l", "l", "l"}, + ["လှ"] = {"l̥", "hl", "lh", "hl", "hl"}, + ["လျ"] = {"j", "ly", "ly", "y", "y"}, + ["+သျှ"] = {"j", "hsy", "syh", "y", "y"}, + ["*လျ"] = {"lj", "ly", "ly", "ly", "ly"}, + ["လျှ"] = {"ʃ", "hly", "lyh", "sh", "hy"}, + ["*လျှ"] = {"l̥j", "hly", "lyh", "hly", "hly"}, + ["လွ"] = {"lw", "lw", "lv", "lw", "lw"}, + ["လွှ"] = {"l̥w", "hlw", "lvh", "hlw", "hlw"}, + ["ဝ"] = {"w", "w", "v", "w", "w"}, + ["ဝှ"] = {"ʍ", "hw", "vh", "hw", "hw"}, + ["သ"] = {"θ", "s", "s", "th", "th"}, + ["+သ"] = {"ð", "s", "s", "dh", "th"}, + ["သွ"] = {"θw", "sw", "sv", "thw", "thw"}, + ["+သွ"] = {"ðw", "sw", "sw", "dhw", "thw"}, + ["ဟ"] = {"h", "h", "h", "h", "h"}, + ["ဟွ"] = {"hw", "hw", "hv", "hw", "hw"}, + ["ဠ"] = {"l", "l", "ḷ", "l", "l"}, + ["အ"] = {"ʔ", "", "ʼ", "", ""}, + -- only appears after a vowel in the same word + ["ဿ"] = {"ʔθ", "ss", "ss", "tth", "ʔth"}, + [""] = {"ʔ", "", "", "", ""}, + ["-"] = {"", "", "", "", ""}, + + ["ျ"] = {nil, "y", "y", nil, nil}, + ["ြ"] = {nil, "r", "r", nil, nil}, + ["ွ"] = {nil, "w", "w", nil, nil} +} + +local initial_voicing = {["+က"] = "ဂ", ["+ခ"] = "ဂ", ["+စ"] = "ဇ", ["+ဆ"] = "ဇ", ["+ဋ"] = "ဍ", ["+ဌ"] = "ဍ", ["+တ"] = "ဒ", ["+ထ"] = "ဒ", ["+ပ"] = "ဗ", ["+ဖ"] = "ဗ", ["-ဘ"] = "ဖ"} + +local final_table = { + [""] = {"a̰", "a.", "a", "a.", "á"}, + ["က်"] = {"ɛʔ", "ak", "akʻ", "et", "eʔ"}, + ["င်"] = {"ɪ̀ɴ", "ang", "aṅʻ", "in", "iñ"}, + ["စ်"] = {"ɪʔ", "ac", "acʻ", "it", "iʔ"}, + ["ည်"] = {"ì", "any", "aññʻ", "i", "i"}, + ["ည်2"] = {"è", "any", "aññʻ", "e", "ei"}, + ["ည်3"] = {"ɛ̀", "any", "aññʻ", "è", "e"}, + ["ဉ်"] = {"ɪ̀ɴ", "any", "añʻ", "in", "iñ"}, + ["တ်"] = {"aʔ", "at", "atʻ", "at", "aʔ"}, + ["န်"] = {"àɴ", "an", "anʻ", "an", "añ"}, + ["ပ်"] = {"aʔ", "ap", "apʻ", "at", "aʔ"}, + ["မ်"] = {"àɴ", "am", "amʻ", "an", "añ"}, + ["ယ်"] = {"ɛ̀", "ai", "ayʻ", "è", "e"}, + ["ံ"] = {"àɴ", "am", "aṃ", "an", "añ"}, + ["ာ"] = {"à", "a", "ā", "a", "a"}, + ["ါ"] = {"à", "a", "ā", "a", "a"}, + ["ိ"] = {"ḭ", "i.", "i", "i.", "í"}, + ["ိတ်"] = {"eɪʔ", "it", "itʻ", "eik", "eiʔ"}, + ["ိန်"] = {"èɪɴ", "in", "inʻ", "ein", "eiñ"}, + ["ိပ်"] = {"eɪʔ", "ip", "ipʻ", "eik", "eiʔ"}, + ["ိမ်"] = {"èɪɴ", "im", "imʻ", "ein", "eiñ"}, + ["ိံ"] = {"èɪɴ", "im", "iṃ", "ein", "eiñ"}, + ["ီ"] = {"ì", "i", "ī", "i", "i"}, + ["ု"] = {"ṵ", "u.", "u", "u.", "ú"}, + ["ုတ်"] = {"oʊʔ", "ut", "utʻ", "ok", "ouʔ"}, + ["ုန်"] = {"òʊɴ", "un", "unʻ", "on", "ouñ"}, + ["ုပ်"] = {"oʊʔ", "up", "upʻ", "ok", "ouʔ"}, + ["ုမ်"] = {"òʊɴ", "um", "umʻ", "on", "ouñ"}, + ["ုံ"] = {"òʊɴ", "um", "uṃ", "on", "ouñ"}, + ["ူ"] = {"ù", "u", "ū", "u", "u"}, + ["ေ"] = {"è", "e", "e", "e", "ei"}, + ["ဲ"] = {"ɛ́", "ai:", "ai", "è:", "è"}, + ["ော"] = {"ɔ́", "au:", "o", "aw:", "ò"}, + ["ောက်"] = {"aʊʔ", "auk", "okʻ", "auk", "auʔ"}, + ["ောင်"] = {"àʊɴ", "aung", "oṅʻ", "aung", "auñ"}, + ["ော်"] = {"ɔ̀", "au", "oʻ", "aw", "o"}, + ["ို"] = {"ò", "ui", "ui", "o", "ou"}, + ["ိုက်"] = {"aɪʔ", "uik", "uikʻ", "aik", "aiʔ"}, + ["ိုင်"] = {"àɪɴ", "uing", "uiṅʻ", "aing", "aiñ"}, + ["ွတ်"] = {"ʊʔ", "wat", "vatʻ", "ut", "uʔ"}, + ["ွန်"] = {"ʊ̀ɴ", "wan", "vanʻ", "un", "uñ"}, + ["ွပ်"] = {"ʊʔ", "wap", "vapʻ", "ut", "uʔ"}, + ["ွမ်"] = {"ʊ̀ɴ", "wam", "vamʻ", "un", "uñ"}, + ["ွံ"] = {"ʊ̀ɴ", "wam", "vaṃ", "un", "uñ"}, + ["'"] = {"ə", "a", "a", "ă", "ă"}, + ["်"] = {"", "", "ʻ", "", ""} +} + +local nucleus_table = {[""] = {"à", "a", "a", "a", "a"}, ["ိ"] = {"ì", "i", "i", "i", "i"}, ["ု"] = {"ù", "u", "u", "u", "u"}, ["ော"] = {"ɔ̀", "au", "o", "aw", "o"}, ["ေါ"] = {"ɔ̀", "au", "o", "aw", "o"}, ["ွ"] = {"ʊ̀", "wa", "va", "u", "u"}} + +local indep_letter_table = {["ဣ"] = {"ḭ", "i.", "i", "i.", "í"}, ["ဤ"] = {"ì", "i", "ī", "i", "i"}, ["ဥ"] = {"ṵ", "u.", "u", "u.", "ú"}, ["ဦ"] = {"ù", "u", "ū", "u", "u"}, ["ဧ"] = {"è", "e", "e", "e", "ei"}, ["၏"] = {"ɛ̰", "e", "e*", "è.", "é"}, ["ဩ"] = {"ɔ́", "au:", "o", "aw:", "ò"}, ["ဪ"] = {"ɔ̀", "au", "oʻ", "aw", "o"}, ["၌"] = {"n̥aɪʔ", "hnai.", "n*", "hnaik", "hnaiʔ"}, ["၍"] = {"jwḛ", "rwe", "r*", "ywe.", "yweí"}} + +local tone_table = {["း"] = {"́", ":", "ʺ", ":", "̀"}, ["့"] = {"̰", ".", "ʹ", ".", "́"}} + +local ambig_intersyl = {[1] = {}, [2] = {["ky"] = 1, ["kr"] = 1, ["kw"] = 1, ["gy"] = 1, ["gr"] = 1, ["gw"] = 1, ["ng"] = 1, ["ny"] = 1, ["cw"] = 1, ["tw"] = 1, ["nw"] = 1, ["py"] = 1, ["pr"] = 1, ["pw"] = 1, ["my"] = 1, ["mr"] = 1, ["mw"] = 1}, [3] = {}, [4] = {["ky"] = 1, ["kr"] = 1, ["kw"] = 1, ["gy"] = 1, ["gr"] = 1, ["gw"] = 1, ["ng"] = 1, ["ny"] = 1, ["cw"] = 1, ["tw"] = 1, ["nw"] = 1, ["tr"] = 1, ["tw"] = 1, ["py"] = 1, ["pr"] = 1, ["pw"] = 1, ["my"] = 1, ["mr"] = 1, ["mw"] = 1}, [5] = {["ou"] = 1}} + +local reverse_table = { + ["hm"] = "မှ", + ["m"] = "မ", + ["hn"] = "နှ", + ["n"] = "န", + ["hny"] = "ညှ", + ["ny"] = "ည", + ["hng"] = "ငှ", + ["ng"] = "င", + ["p"] = "ပ", + ["hp"] = "ဖ", + ["b"] = "ဗ", + ["t"] = "တ", + ["ht"] = "ထ", + ["d"] = "ဒ", + ["c"] = "ကျ", + ["hc"] = "ချ", + ["j"] = "ဂျ", + ["k"] = "က", + ["hk"] = "ခ", + ["g"] = "ဂ", + [""] = "အ", + ["th"] = "သ", + ["+th"] = "+သ", + ["s"] = "စ", + ["hs"] = "ဆ", + ["z"] = "ဇ", + ["hy"] = "ရှ", + ["h"] = "ဟ", + ["r"] = "*ရ", + ["y"] = "ယ", + ["hw"] = "ဝှ", + ["w"] = "ဝ", + ["hl"] = "လှ", + ["l"] = "လ", + ["hmw"] = "မွှ", + ["mw"] = "မွ", + ["hmy"] = "မျှ", + ["my"] = "မျ", + ["hnw"] = "နွှ", + ["nw"] = "နွ", + ["hnyw"] = "ညွှ", + ["nyw"] = "ညွ", + ["hngw"] = "ငွှ", + ["ngw"] = "ငွ", + ["pw"] = "ပွ", + ["hpw"] = "ဖွ", + ["bw"] = "ဗွ", + ["py"] = "ပျ", + ["hpy"] = "ဖျ", + ["by"] = "ဗျ", + ["tw"] = "တွ", + ["htw"] = "ထွ", + ["dw"] = "ဒွ", + ["cw"] = "ကျွ", + ["hcw"] = "ချွ", + ["jw"] = "ဂျွ", + ["kw"] = "ကွ", + ["hkw"] = "ခွ", + ["gw"] = "ဂွ", + ["thw"] = "သွ", + ["sw"] = "စွ", + ["hsw"] = "ဆွ", + ["zw"] = "ဇွ", + ["hyw"] = "ရွှ", + ["hw"] = "ဟွ", + ["yw"] = "ယွ", + ["hlw"] = "လွှ", + ["lw"] = "လွ", + ["hly"] = "*လျှ", + ["ly"] = "*လျ", + + ["i"] = "ီ", + ["i\\"] = "ီး", + ["i/"] = "ိ", + ["i?"] = "စ်", + ["i~"] = "င်", + ["i\\~"] = "င်း", + ["i/~"] = "င့်", + ["ei"] = "ေ", + ["ei\\"] = "ေး", + ["ei/"] = "ေ့", + ["ei?"] = "ိတ်", + ["ei~"] = "ိန်", + ["ei\\~"] = "ိန်း", + ["ei/~"] = "ိန့်", + ["e"] = "ယ်", + ["e\\"] = "ဲ", + ["e/"] = "ယ့်", + ["e?"] = "က်", + ["ai~"] = "ိုင်", + ["ai\\~"] = "ိုင်း", + ["ai/~"] = "ိုင့်", + ["ai?"] = "ိုက်", + ["a"] = "ာ", + ["a\\"] = "ား", + ["a/"] = "", + ["a?"] = "တ်", + ["a~"] = "န်", + ["a\\~"] = "န်း", + ["a/~"] = "န့်", + ["o"] = "ော်", + ["o\\"] = "ော", + ["o/"] = "ော့", + ["au?"] = "ောက်", + ["au~"] = "ောင်", + ["au\\~"] = "ောင်း", + ["au/~"] = "ောင့်", + ["ou"] = "ို", + ["ou\\"] = "ိုး", + ["ou/"] = "ို့", + ["ou?"] = "ုပ်", + ["ou~"] = "ုန်", + ["ou\\~"] = "ုန်း", + ["ou/~"] = "ုန့်", + ["u"] = "ူ", + ["u\\"] = "ူး", + ["u/"] = "ု", + ["u?"] = "ွတ်", + ["u~"] = "ွန်", + ["u\\~"] = "ွန်း", + ["u/~"] = "ွန့်", + ["a'"] = "'" +} + +local repl_string = "([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဿ][ျြွှ]*[ံ့းွာါါိီုူေဲ]*)([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဿ][့]?[^့်္])" + +function syllabify(text) + text = gsub(text, "('?)([%+%-%*]*)", function(a, b) if a .. b ~= "" then return a .. " " .. b end end) + + text = gsub(text, "([ဣဤဥဦဧဩဪ၏၌၍][့း်]?)(.?)(.?)", function(a, b, c) return (c == "္" and " " .. a .. b .. " " .. c or (c == "်" and " " .. a .. b .. c or " " .. a .. " " .. b .. c)) end) .. " " + + text = gsub(text, "(်း?'?)", "%1 ") + text = gsub(text, "([း့])([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအ]်)", "%2%1") + + while match(text, repl_string) do text = gsub(text, repl_string, "%1 %2") end + + text = gsub(text, "္", " , ") + text = gsub(text, " +", " ") + text = gsub(text, "^ ?(.*[^ ]) ?$", "%1") + text = gsub(text, " , ", " ") + text = gsub(text, " ([23])", "%1") + return text +end + +function initial_by_char(initial_string, system_index, ref_table) + local initial_set = {} + for character in mw.text.gsplit(initial_string, "") do + local temp_initial = ref_table[character] or error("Initial data not found.") + table.insert(initial_set, temp_initial[system_index] or temp_initial) + end + return table.concat(initial_set) +end + +function generate_respelling(text) + text = gsub(text, " ", "   ") + text = gsub(text, "ါ", "ာ") + if match(text, "[က-႟ꩠ-ꩻ]") then return text end + text = gsub(text, "(%+?)([^%?%+'/\\~aeiou ]*)(/?)([%?'/\\~aeiou]+)", function(voicing_mark, latin_initial, opt_sep, latin_final) return voicing_mark .. (reverse_table[latin_initial] or initial_by_char(latin_initial, nil, reverse_table)) .. opt_sep .. reverse_table[latin_final] end) + return text +end + +function process(initial, final, tone, schwa, system, system_index) + if match(initial .. final, "ွှ?[တနပမံ]") and system["type"] == "phonetic" then + initial = gsub(initial, "[ွ/]", "") + final = "ွ" .. final + else + initial = gsub(initial, "/", "") + end + + initial_new = system["type"] == "phonetic" and gsub(initial, "%+.", initial_voicing) or initial + + if indep_letter_table[initial_new] then + initial_new = match(initial_new, "[၌၍]") and "-" or "" + final = initial .. final + end + if initial_new == "မြွ" then require("debug").track("my-pron/mrw") end + + initial_data = initial_table[initial_new] or initial_table[gsub(initial_new, "[%+%-%*]", "")] or (system["type"] == "orthographic" and initial_by_char(initial_new, system_index, initial_table) or error("Initial data not found.")) + + initial_value = initial_data[system_index] or initial_data + + if match(initial, "^%+") and system_index == 5 then + initial_value = initial_table[gsub(initial, "%+", "")][system_index] + initial_value = gsub(initial_value, "^([^rwy]+)", "%1") + end + + final_data = final_table[system["type"] .. schwa == "phonetic'" and schwa or final] or (system["type"] == "phonetic" and (final_table[final .. "်"] or indep_letter_table[final]) or indep_letter_table[final]) or gsub(final, "^([^်]*)([^်])(်?)$", function(first, second, third) + first_data = nucleus_table[first] or final_table[first] or indep_letter_table[first] or first + second_data = initial_table[second] or second + first = first_data ~= first and first_data[system_index] or first + second = second_data ~= second and second_data[system_index] .. ((system_index == 3 and third ~= "") and "ʻ" or "") or second + return (gsub(first .. second, "([%.:])(.*)", "%2")) + end) + + final_value = type(final_data) == "table" and final_data[system_index] or final_data + final_value = mw.ustring.toNFD(final_value) + if tone == "" then + tone_value = "" + else + if system_index ~= 4 then final_value = gsub(final_value, "̀", "") end + final_value = gsub(final_value, "[́:%.]", "") + if system["type"] .. schwa == "phonetic'" then + tone_value = "" + else + tone_data = tone_table[tone] or error("Tone data not found.") + tone_value = tone_data[system_index] + end + end + + if system_index == 1 then + final_value = gsub(final_value, "^([aeəɛiɪoɔuʊ])", "%1" .. tone_value) + elseif system_index == 5 then + final_value = gsub(final_value, "([aeiou])([^aeiou]*)$", "%1" .. tone_value .. "%2") + else + final_value = final_value .. tone_value + end + + return mw.ustring.toNFC(initial_value .. final_value) +end + +function remove_wide_space(text) return (gsub(text, " ", "")) end + +function concatenate(set, system_index) + if system_index == 1 then return remove_wide_space(table.concat(set)) end + result_text = remove_wide_space(table.concat(set, " ")) + + for count = 1, 3 do + result_text = gsub(result_text, "(.) (.)([^ ]?)", function(previous, next, after_next) + if ambig_intersyl[system_index][previous .. next] or ((system_index == 2 or system_index == 4) and (match(previous .. " " .. next, "[ptkgmngy] [aeiou]") or (match(previous .. next .. after_next, "[aeiou][ptkmn][rwyg]") and not match(after_next, "[aeiou]")))) then + return previous .. "-" .. next .. after_next + else + return previous .. next .. after_next + end + end) + end + + return result_text +end + +function export.get_romanisation(word, pronunciations, system, system_index, mode) + local sentences = {} + word = gsub(word, " ", "|") + if system["type"] == "phonetic" then word = gsub(word, "ဿ", "တ်သ") end + word = syllabify(word) + word = gsub(word, "ါ", "ာ") + if system["type"] == "phonetic" then word = gsub(word, "ဝ([တနပမံ])", "ဝွ%1") end + for phrase in mw.text.gsplit(word, "|", true) do + local temp = {} + local syllable = mw.text.split(phrase, " ", true) + for syllable_index = 1, #syllable do + syllable[syllable_index] = gsub(syllable[syllable_index], "([း့])(်)", "%2%1") + temp[syllable_index] = gsub(syllable[syllable_index], "^([%+%-%*]*[ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဣဤဥဦဧဩဪ၏၌၍ဿ][ျြ]?ွ?ှ?/?)([^း့']*)([း့]?)('?)$", function(initial, final, tone, schwa) return process(initial, final, tone, schwa, system, system_index) end) + end + table.insert(sentences, concatenate(temp, system_index)) + end + if mode == "translit_module" then return table.concat(sentences, " ") end + table.insert(pronunciations[system_index], table.concat(sentences, " ")) + return pronunciations[system_index] +end + +function respelling_format(phonetic, page_title) + local page_title_set = mw.text.split(syllabify(page_title), " ") + local new_respellings = {} + for _, respelling in ipairs(phonetic) do + local respelling_set = mw.text.split(syllabify(respelling), " ") + if gsub(table.concat(respelling_set), "[%+%-%*']", "") == (gsub(table.concat(page_title_set), "ါ", "ာ")) then for index, element in ipairs(respelling_set) do if element ~= page_title_set[index] then respelling_set[index] = "" .. element .. "" end end end + table.insert(new_respellings, table.concat(respelling_set)) + end + text = table.concat(new_respellings, ", ") + text = remove_wide_space(text) + text = gsub(text, "[%+%-].", initial_voicing) + text = gsub(text, "([ခဂငဒပဝ]ေ?)ာ", "%1ါ") + return text +end + +function export.generate_tests(word, respelling) + respelling, word = generate_respelling(respelling), generate_respelling(word) + local pronunciations = {[1] = {}, [2] = {}, [3] = {}, [4] = {}, [5] = {}} + local p, result = {["orthographic"] = word, ["phonetic"] = respelling or word}, {} + + table.sort(system_list, function(first, second) return first[1] < second[1] end) + for system_index, system in ipairs(system_list) do pronunciations[system_index] = export.get_romanisation(p[system["type"]], pronunciations, system, system_index) end + for system_index = 1, 5 do table.insert(result, table.concat(pronunciations[system_index])) end + return (gsub(gsub(table.concat(result, " | "), "", "("), "", ")")) +end + +function export.make(frame) + local args = frame:getParent().args + local page_title = mw.title.getCurrentTitle().text + local title = generate_respelling(args["word"] or page_title) + + local p, result = {["orthographic"] = {title}, ["phonetic"] = {}}, {} + local pronunciations = {[1] = {}, [2] = {}, [3] = {}, [4] = {}, [5] = {}} + + if not args[1] then args = {title} end + for index, item in ipairs(args) do table.insert(p["phonetic"], (item ~= "") and generate_respelling(item) or nil) end + + table.sort(system_list, function(first, second) return first[1] < second[1] end) + for system_index, system in ipairs(system_list) do for _, word in ipairs(p[system["type"]]) do pronunciations[system_index] = export.get_romanisation(word, pronunciations, system, system_index) end end + + if title ~= table.concat(args) then table.insert(result, "* Phonetic respelling" .. (#p["phonetic"] > 1 and "s" or "") .. ": " .. tostring(mw.html.create("span"):attr("lang", "my"):attr("class", "Mymr"):wikitext(respelling_format(p["phonetic"], page_title))) .. "\n") end + + table.insert(result, "* [[Wiktionary:International Phonetic Alphabet|IPA]]" .. "([[Appendix:Burmese pronunciation|key]]): " .. (tostring(mw.html.create("span"):attr("class", "IPA"):wikitext("/" .. gsub(table.concat(pronunciations[1], "/, /"), "ʔʔ", "ʔ.ʔ") .. "/"))) .. "\n* [[Wiktionary:Burmese transliteration|Romanization:]] ") + + for system_index = 2, 5 do table.insert(result, (system_index ~= 2 and " • " or "") .. "''" .. system_list[system_index]["name"] .. ":'' " .. table.concat(pronunciations[system_index], "/")) end + + return table.concat(result) +end + +return export diff --git a/wiktra/wikt/translit/my-translit.lua b/wiktra/wikt/translit/my-translit.lua new file mode 100644 index 0000000..d1b8a8b --- /dev/null +++ b/wiktra/wikt/translit/my-translit.lua @@ -0,0 +1,21 @@ +local export = {} +local gsub = mw.ustring.gsub + +local symbols = {["၀"] = "0", ["၁"] = "1", ["၂"] = "2", ["၃"] = "3", ["၄"] = "4", ["၅"] = "5", ["၆"] = "6", ["၇"] = "7", ["၈"] = "8", ["၉"] = "9", ["၊"] = "|", ["။"] = "||"} + +function export.tr(text, lang, sc, debug_mode) + local m_pron = require("my-pron").get_romanisation + text = gsub(text, ".", symbols) + for word in mw.ustring.gmatch(text, "[က-႟ꩠ-ꩻ]+") do + success, translit = pcall(m_pron, word, nil, {2, ["type"] = "orthographic", ["name"] = "MLCTS"}, 2, "translit_module") + if success then + text = gsub(text, word, translit, 1) + else + return nil + end + end + if mw.ustring.match(text, "[က-႟ꩠ-ꩻ]") and not debug_mode then return nil end + return text +end + +return export diff --git a/wiktra/wikt/translit/myv-translit.lua b/wiktra/wikt/translit/myv-translit.lua new file mode 100644 index 0000000..16f8af3 --- /dev/null +++ b/wiktra/wikt/translit/myv-translit.lua @@ -0,0 +1,19 @@ +local export = {} + +local tab = {["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Jo", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "X", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Щ"] = "Šč", ["Ъ"] = "ʺ", ["Ы"] = "Y", ["Ь"] = "ʹ", ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ja", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "jo", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "x", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["щ"] = "šč", ["ъ"] = "ʺ", ["ы"] = "y", ["ь"] = "ʹ", ["э"] = "e", ["ю"] = "ju", ["я"] = "ja"} + +function export.tr(text, lang, sc) + -- Ё needs converting if is decomposed + text = text:gsub("ё", "ё"):gsub("Ё", "Ё") + + -- е after a vowel or at the beginning of a word becomes je + text = mw.ustring.gsub(text, "([АОУЫЕЯЁЮИЕЪЬаоуыэяёюиеъь%A][́̀]?)е", "%1je") + text = mw.ustring.gsub(text, "^Е", "Je") + text = mw.ustring.gsub(text, "^е", "je") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е", "%1Je") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е", "%1je") + + return (mw.ustring.gsub(text, ".", tab)) +end + +return export diff --git a/wiktra/wikt/translit/ne-translit.lua b/wiktra/wikt/translit/ne-translit.lua new file mode 100644 index 0000000..b8055ff --- /dev/null +++ b/wiktra/wikt/translit/ne-translit.lua @@ -0,0 +1,231 @@ +-- Transliteration for Nepali +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["क"] = "k", + ["ख"] = "kh", + ["ग"] = "g", + ["घ"] = "gh", + ["ङ"] = "ṅ", + ["च"] = "c", + ["छ"] = "ch", + ["ज"] = "j", + ["झ"] = "jh", + ["ञ"] = "ñ", + ["ट"] = "ṭ", + ["ठ"] = "ṭh", + ["ड"] = "ḍ", + ["ढ"] = "ḍh", + ["ण"] = "ṇ", + ["त"] = "t", + ["थ"] = "th", + ["द"] = "d", + ["ध"] = "dh", + ["न"] = "n", + ["प"] = "p", + ["फ"] = "ph", + ["ब"] = "b", + ["भ"] = "bh", + ["म"] = "m", + ["य"] = "y", + ["र"] = "r", + ["ल"] = "l", + ["व"] = "w", + ["श"] = "ś", + ["ष"] = "ṣ", + ["स"] = "s", + ["ह"] = "h", + + ["क़"] = "q", + ["ख़"] = "x", + ["ग़"] = "ġ", + ["ऴ"] = "ḻ", + ["ळ"] = "ḷ", + ["ज़"] = "z", + ["श़"] = "ž", + ["झ़"] = "ž", + ["ड़"] = "ṛ", + ["ढ़"] = "ṛh", + ["फ़"] = "f", + ["थ़"] = "θ", + ["द़"] = "ð", + ["ऩ"] = "ṉ", + ["ऱ"] = "ṟ", + ["ॽ"] = "'", + ["व़"] = "V", + ["ॹ"] = "ž", + -- vowel diacritics + ["ि"] = "i", + ["ु"] = "u", + ["े"] = "e", + ["ो"] = "o", + ["ा"] = "ā", + ["ी"] = "ī", + ["ू"] = "ū", + ["ृ"] = "r̥", + ["ॄ"] = "r̥̄", + ["ॢ"] = "l̥", + ["ॣ"] = "l̥̄", + ["ै"] = "ai", + ["ौ"] = "au", + ["ॉ"] = "ŏ", + ["ॅ"] = "ĕ", + -- vowel signs + ["अ"] = "a", + ["इ"] = "i", + ["उ"] = "u", + ["ए"] = "e", + ["ओ"] = "o", + ["आ"] = "ā", + ["ई"] = "ī", + ["ऊ"] = "ū", + ["ऋ"] = "r̥", + ["ॠ"] = "r̥̄", + ["ऌ"] = "l̥", + ["ॡ"] = "l̥̄", + ["ऐ"] = "ai", + ["औ"] = "au", + ["ऑ"] = "ŏ", + ["ॲ"] = "ĕ", + ["ऍ"] = "ĕ", + -- chandrabindu + ["ँ"] = "̃", + -- anusvara + ["ं"] = "ṃ", + -- visarga + ["ः"] = "ḥ", + -- virama + ["्"] = "", + -- om + ["ॐ"] = "om̐", + -- zero width joiner + ["‍"] = "a", + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + -- punctuation + ["।"] = ".", -- danda + ["॥"] = ".", -- double danda + ["+"] = "", -- compound separator + + -- abbreviation sign + ["॰"] = "." +} + +local nasal_assim = {["क"] = "ङ", ["ख"] = "ङ", ["ग"] = "ङ", ["घ"] = "ङ", ["च"] = "ञ", ["छ"] = "ञ", ["ज"] = "ञ", ["झ"] = "ञ", ["ट"] = "ण", ["ठ"] = "ण", ["ड"] = "ण", ["ढ"] = "ण", ["प"] = "म", ["फ"] = "म", ["ब"] = "म", ["भ"] = "म", ["म"] = "म", ["त"] = "न", ["थ"] = "न", ["द"] = "न", ["ध"] = "न", ["न"] = "न", ["ष"] = "न", ["श"] = "ङ", ["स"] = "न", ["य"] = "म", ["र"] = "म", ["ल"] = "ँ", ["व"] = "म", ["ह"] = "ङ"} +local perm_cl = {["ज्न"] = true, ["ज्ञ"] = true, ["ट्र"] = true, ["ड्र"] = true, ["ट्स"] = true, ["ड्स"] = true, ["स्ड"] = true} + +local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "कखगघचछजझटठडढणतथदधनपफबभमयरलवशषसह" +local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊॄॢॣ", "अइउएओआईऊऋॠॡऌऐऔऑऍ" +local syncope_pattern = "(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])a(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])" + +local nor_cons, sp_cons = "कखगघङचछजझञटठडढतथदधपफबभशषसयरलवणनमयरलवनम", "कलम" +local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊॄॢॣ", "अइउएओआईऊऋॠॡऌऐऔऑऍ" +local koka_sign = "ोीाैे" +local koka_pattern = "([" .. koka_sign .. "])(़?[" .. sp_cons .. "])a(़?[" .. gsub(nor_cons, "य", "") .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = length, 1, -1 do table.insert(result, mw.ustring.sub(text, i, i)) end + return table.concat(result) +end +function export.tr(text, lang, sc) + text = gsub(text, "([" .. all_cons .. "]़?)([" .. vowel .. "्]?)", function(c, d) return c .. (d == "" and "a" or d) end) + for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^a(़?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) return (((match(first, "[" .. special_cons .. "]") and match(second, "ं") or match(first, "[" .. special_cons .. "]") and match(second, "्") and not perm_cl[first .. second .. third]) or match(first .. second, "य[aिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]") or match(first .. second, "ह[अaिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]")) and "a" or "") .. opt .. first .. second .. third end) + + while match(word, syncope_pattern) do word = gsub(word, syncope_pattern, "%1%2%3%4%5%6%7%8%9") end + while match(word, koka_pattern) do word = gsub(word, koka_pattern, "%1%2%3%4") end + word = gsub(word, "(.?)ं(.)", function(succ, prev) return succ .. (succ .. prev == "a" and "्म" or (succ == "" and match(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "ṃ")) .. prev end) + + local escaped_orig_word = gsub(orig_word, "%+", "") + text = gsub(text, orig_word, rev_string(word)) + text = gsub(text, "ईन$", "īna") + text = gsub(text, "(...)ईन ", "%1īna ") + text = gsub(text, "इन$", "ina") + text = gsub(text, "(...)इन ", "%1ina ") + text = gsub(text, "ैन$", "aina") + text = gsub(text, "(...)ैैैैन ", "%1aina ") + text = gsub(text, "उन$", "una") + text = gsub(text, "(...)उन ", "%1una ") + text = gsub(text, "िन$", "ina") + text = gsub(text, "(...)िन ", "%1ina ") + text = gsub(text, "िछ$", "icha") + text = gsub(text, "(...)िछ ", "%1icha ") + text = gsub(text, "उछ$", "ucha") + text = gsub(text, "(...)उछ ", "%1ucha ") + text = gsub(text, "इछ$", "icha") + text = gsub(text, "(...)इछ ", "%1icha ") + text = gsub(text, "एछ$", "echa") + text = gsub(text, "ेछ$", "echa") + text = gsub(text, "(...)ेछ ", "%1echa ") + text = gsub(text, "(...)ेन ", "%1ena ") + text = gsub(text, "ेन$", "ena") + text = gsub(text, "(...)एन ", "%1ena ") + text = gsub(text, "एर$", "era") + text = gsub(text, "(...)एर ", "%1era ") + text = gsub(text, "ेर$", "era") + text = gsub(text, "(...)ेर ", "%1era ") + text = gsub(text, "एन$", "ena") + text = gsub(text, "उँछ$", "ũcha") + text = gsub(text, "(...)उँछ ", "%1ucha ") + text = gsub(text, "बाट$", "bāṭa") + text = gsub(text, "(...)बाट ", "%1bāṭa ") + text = gsub(text, "ज्ञ", "gy") + end + text = gsub(text, ".़?", conv) + text = gsub(text, "a([iu])̃", "a͠%1") + text = gsub(text, "[<>]", "") + text = gsub(text, "ॱ", "") + + text = gsub(text, "dach$", "dacha") + text = gsub(text, "(...)dach ", "%1dacha ") + text = gsub(text, "ain$", "aina") + text = gsub(text, "(...)ain ", "%1aina ") + text = gsub(text, "nach$", "nacha") + text = gsub(text, "(...)nach ", "%1nacha ") + text = gsub(text, "wai", "vai") + text = gsub(text, "w$", "v") + text = gsub(text, "(...)w ", "%1v ") + text = gsub(text, "([raäāiīuūeo])([r])w", "%1rv") + text = gsub(text, "w([iīewoyr̥l̥l̥̄r̥̄])", "v%1") + text = gsub(text, "([w])a([krjtcṅñysśdphṇn])([tnrṇṣcśkghjsueoayd])", "va%2%3") + text = gsub(text, "([w])ā([cgjṇtdmyshśṣn])", "vā%2") + text = gsub(text, "([w])ā([r])([tdābuṇṣh])", "vā%2%3") + text = gsub(text, "([w])ā([l])([m])", "vā%2%3") + text = gsub(text, "([w])a([sśṣṅñṃypdtnc])", "va%2") + text = gsub(text, "rh̥", "hr̥") + ---text = gsub(text, 'kṣ', 'ch̥') + text = gsub(text, "ahar([uū])", "har%1") + text = gsub(text, "abāṭa", "bāṭa") + + text = gsub(text, "kan$", "kana") + text = gsub(text, "(...)kan ", "%1kana ") + text = gsub(text, "([ptkbdgṭṇñḍmṅnlrwyhṣśs])([ptkbdgṭḍmṇñnlrwyṣśs])har([uū])", "%1%2ahar%3") + text = gsub(text, "([ptkbdgṭṇñḍmṅnlrwyhṣśs])([ptkbdgṭḍmṇñnlrwyṣśs])bāṭa", "%1%2abāṭa") + text = gsub(text, "([ptkbdgṭḍmṅṇñnlrwyhṣśs])([ptkbdgṭḍmnlrṇñwyś])([mlk])", "%1%2a%3") + text = gsub(text, "ñz", "nz") + text = gsub(text, "ñgy", "ṅgy") + text = gsub(text, "([a])(̃)l([a])", "%1ml%3") -- assim case l + text = gsub(text, "([eāuūiīo])([īuiū])m([m])", "%1%2ṃ%3") -- assim case m + text = gsub(text, "([a])([īuiū])m([m])", "%1͠%2%3") -- assim case m2 + text = gsub(text, "([eāuūiīo])([īuiū])n([st])", "%1%2ṃ%3") -- assim case s + text = gsub(text, "([a])([īuiū])n([st])", "%1͠%2%3") -- assim case s2 + text = gsub(text, "([a])([uū])ṅ([hk])", "%1͠%2%3") -- assim case kh + text = gsub(text, "([eāuūiīo])([īuiū])ṅ([kh])", "%1%2ṃ%3") -- assim case kh2 + return mw.ustring.toNFC(text) +end +return export diff --git a/wiktra/wikt/translit/new-Newa-translit.lua b/wiktra/wikt/translit/new-Newa-translit.lua new file mode 100644 index 0000000..be15842 --- /dev/null +++ b/wiktra/wikt/translit/new-Newa-translit.lua @@ -0,0 +1,135 @@ +-- Transliteration for Newar in Newa script +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["𑐎"] = "k", + ["𑐏"] = "kh", + ["𑐐"] = "g", + ["𑐑"] = "gh", + ["𑐒"] = "ṅ", + ["𑐓"] = "ṅh", + ["𑐔"] = "c", + ["𑐕"] = "ch", + ["𑐖"] = "j", + ["𑐗"] = "jh", + ["𑐘"] = "ñ", + ["𑐙"] = "ñh", + ["𑐚"] = "ṭ", + ["𑐛"] = "ṭh", + ["𑐜"] = "ḍ", + ["𑐝"] = "ḍh", + ["𑐞"] = "ṇ", + ["𑐟"] = "t", + ["𑐠"] = "th", + ["𑐡"] = "d", + ["𑐢"] = "dh", + ["𑐣"] = "n", + ["𑐤"] = "nh", + ["𑐥"] = "p", + ["𑐦"] = "ph", + ["𑐧"] = "b", + ["𑐨"] = "bh", + ["𑐩"] = "m", + ["𑐪"] = "mh", + ["𑐫"] = "y", + ["𑐬"] = "r", + ["𑐭"] = "rh", + ["𑐮"] = "l", + ["𑐯"] = "lh", + ["𑐰"] = "w", + ["𑐱"] = "ś", + ["𑐲"] = "ṣ", + ["𑐳"] = "s", + ["𑐴"] = "h", + -- vowel diacritics + ["𑐶"] = "i", + ["𑐸"] = "u", + ["𑐾"] = "e", + ["𑑀"] = "o", + ["𑐵"] = "ā", + ["𑐷"] = "ī", + ["𑐹"] = "ū", + ["𑐺"] = "r̥", + ["𑐻"] = "r̥̄", + ["𑐼"] = "l̥", + ["𑐽"] = "l̥̄", + ["𑐿"] = "ai", + ["𑑁"] = "au", + -- vowels + ["𑐀"] = "a", + ["𑐂"] = "i", + ["𑐄"] = "u", + ["𑐊"] = "e", + ["𑐌"] = "o", + ["𑐁"] = "ā", + ["𑐃"] = "ī", + ["𑐅"] = "ū", + ["𑐆"] = "r̥", + ["𑐇"] = "r̥̄", + ["𑐈"] = "l̥", + ["𑐉"] = "l̥̄", + ["𑐋"] = "ai", + ["𑐍"] = "au", + -- chandrabindu + ["𑑃"] = "̃", + -- anusvara + ["𑑄"] = "ṃ", + ["𑑈"] = "ṃ", + -- visarga + ["𑑅"] = "ḥ", + -- nuqta + ["𑑆"] = "", + -- virama + ["𑑂"] = "", + -- om + ["𑑉"] = "om", + -- avagraha + ["𑑇"] = "’", + -- numerals + ["𑑐"] = "0", + ["𑑑"] = "1", + ["𑑓"] = "2", + ["𑑓"] = "3", + ["𑑔"] = "4", + ["𑑕"] = "5", + ["𑑖"] = "6", + ["𑑗"] = "7", + ["𑑘"] = "8", + ["𑑙"] = "9", + -- punctuation + ["𑑋"] = ".", -- danda + ["𑑌"] = ".", -- double danda + ["𑑍"] = "", -- comma + -- abbreviation sign + ["𑑏"] = "." +} + +local all_cons, special_cons = "𑐎𑐏𑐐𑐑𑐒𑐓𑐔𑐕𑐖𑐗𑐘𑐙𑐚𑐛𑐜𑐝𑐞𑐟𑐠𑐡𑐢𑐣𑐤𑐥𑐦𑐧𑐨𑐩𑐪𑐫𑐬𑐭𑐮𑐯𑐰𑐱𑐲𑐳𑐴", "𑐎𑐏𑐐𑐑𑐒𑐓𑐔𑐕𑐖𑐗𑐘𑐙𑐚𑐛𑐜𑐝𑐞𑐞𑐟𑐠𑐡𑐢𑐣𑐤𑐥𑐦𑐧𑐨𑐩𑐪𑐫𑐬𑐭𑐮𑐯𑐰𑐱𑐲𑐳𑐴" +local vowel, vowel_sign = "a𑐼𑐽𑐾𑐿𑑀𑑁𑐻𑐺𑐹𑐸𑐶𑐵𑐷", "𑐄𑐃𑐂𑐁𑐀𑐉𑐈𑐇𑐆𑐅𑐍𑐌𑐋𑐊" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = length, 1, -1 do table.insert(result, mw.ustring.sub(text, i, i)) end + return table.concat(result) +end +function export.tr(text, lang, sc) + text = gsub(text, "([" .. all_cons .. "]𑑆?)([" .. vowel .. "𑑂]?)", function(c, d) return c .. (d == "" and "a" or d) end) + for word in mw.ustring.gmatch(text, "[𑑃-𑐧a]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^a(𑑆?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) return (((match(first, "[" .. special_cons .. "]") and match(second, "𑑄") or match(first, "[" .. special_cons .. "]") and match(second, "𑑂") and not perm_cl[first .. second .. third]) or match(first .. second, "𑐫[𑐄𑐃𑐂𑐁𑐀𑐉𑐈𑐇𑐆𑐅𑐍𑐌𑐋𑐊a𑐼𑐽𑐾𑐿𑑀𑑁𑐻𑐺𑐹𑐸𑐶𑐵𑐷]") or match(first .. second, "𑐴[𑐄𑐃𑐂𑐁𑐀𑐉𑐈𑐇𑐆𑐅𑐍𑐌𑐋𑐊a𑐼𑐽𑐾𑐿𑑀𑑁𑐻𑐺𑐹𑐸𑐶𑐵𑐷]")) and "a" or "") .. opt .. first .. second .. third end) + word = gsub(word, "(.?)𑑄(.)", function(succ, prev) return succ .. (succ .. prev == "" and "𑑂𑐩" or (succ == "" and match(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "ṃ")) .. prev end) + + local escaped_orig_word = gsub(orig_word, "%+", "") + text = gsub(text, orig_word, rev_string(word)) + text = gsub(text, "𑐫𑑂", "y") + end + text = gsub(text, ".𑑆?", conv) + text = gsub(text, "a([iu])̃", "a͠%1") + text = gsub(text, "[<>]", "") + return mw.ustring.toNFC(text) +end +return export diff --git a/wiktra/wikt/translit/new-translit.lua b/wiktra/wikt/translit/new-translit.lua new file mode 100644 index 0000000..3ee7ed7 --- /dev/null +++ b/wiktra/wikt/translit/new-translit.lua @@ -0,0 +1,139 @@ +-- Transliteration for Newar +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["क"] = "k", + ["ख"] = "kh", + ["ग"] = "g", + ["घ"] = "gh", + ["ङ"] = "ṅ", + ["च"] = "c", + ["छ"] = "ch", + ["ज"] = "j", + ["झ"] = "jh", + ["ञ"] = "ñ", + ["ट"] = "ṭ", + ["ठ"] = "ṭh", + ["ड"] = "ḍ", + ["ढ"] = "ḍh", + ["ण"] = "ṇ", + ["त"] = "t", + ["थ"] = "th", + ["द"] = "d", + ["ध"] = "dh", + ["न"] = "n", + ["प"] = "p", + ["फ"] = "ph", + ["ब"] = "b", + ["भ"] = "bh", + ["म"] = "m", + ["य"] = "y", + ["र"] = "r", + ["ल"] = "l", + ["व"] = "w", + ["ळ"] = "ḷ", + ["श"] = "ś", + ["ष"] = "ṣ", + ["स"] = "s", + ["ह"] = "h", + ["ऴ"] = "ḻ", + ["ॽ"] = "ॽ", + -- vowel diacritics + ["ि"] = "i", + ["ु"] = "u", + ["े"] = "e", + ["ो"] = "o", + ["ा"] = "ā", + ["ी"] = "ī", + ["ू"] = "ū", + ["ृ"] = "r̥", + ["ॄ"] = "r̥̄", + ["ॢ"] = "l̥", + ["ॣ"] = "l̥̄", + ["ै"] = "ai", + ["ौ"] = "au", + ["ॉ"] = "ŏ", + ["ॅ"] = "ĕ", + -- vowel signs + ["अ"] = "a", + ["इ"] = "i", + ["उ"] = "u", + ["ए"] = "e", + ["ओ"] = "o", + ["आ"] = "ā", + ["ई"] = "ī", + ["ऊ"] = "ū", + ["ऋ"] = "r̥", + ["ॠ"] = "r̥̄", + ["ऌ"] = "l̥", + ["ॡ"] = "l̥̄", + ["ऐ"] = "ai", + ["औ"] = "au", + ["ऑ"] = "ŏ", + ["ॲ"] = "ĕ", + ["ऍ"] = "ĕ", + -- chandrabindu + ["ँ"] = "̃", + -- anusvara + ["ं"] = "ṃ", + -- visarga + ["ः"] = "ḥ", + -- virama + ["्"] = "", + -- om + ["ॐ"] = "oḥm", + -- avagraha + ["ऽ"] = "’", + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + -- punctuation + ["।"] = ".", -- danda + ["॥"] = ".", -- double danda + ["+"] = "", -- compound separator + + -- abbreviation sign + ["॰"] = "." +} + +local nasal_assim = {["क"] = "ङ", ["ख"] = "ङ", ["ग"] = "ङ", ["घ"] = "ङ", ["च"] = "ञ", ["छ"] = "ञ", ["ज"] = "ञ", ["झ"] = "ञ", ["ट"] = "ण", ["ठ"] = "ण", ["ड"] = "ण", ["ढ"] = "ण", ["प"] = "म", ["फ"] = "म", ["ब"] = "म", ["भ"] = "म", ["म"] = "म", ["त"] = "न", ["थ"] = "न", ["द"] = "न", ["ध"] = "न", ["न"] = "ँ", ["ष"] = "ँ", ["श"] = "ँ", ["स"] = "ँ", ["य"] = "ँ", ["र"] = "ँ", ["ल"] = "ँ", ["व"] = "ँ", ["ह"] = "ँ"} +local perm_cl = {["ज्ञ"] = true} + +local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "कखगघचछजझटठडढणतथदधनपफबभमयरलवशषसह" +local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊॄॢॣ", "अइउएओआईऊऋॠॡऌऐऔऑऍ" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = length, 1, -1 do table.insert(result, mw.ustring.sub(text, i, i)) end + return table.concat(result) +end +function export.tr(text, lang, sc) + text = gsub(text, "([" .. all_cons .. "]़?)([" .. vowel .. "्]?)", function(c, d) return c .. (d == "" and "a" or d) end) + for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^a(़?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) return (((match(first, "[" .. special_cons .. "]") and match(second, "ं") or match(first, "[" .. special_cons .. "]") and match(second, "्") and not perm_cl[first .. second .. third]) or match(first .. second, "य[aिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]") or match(first .. second, "ह[अaिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]")) and "a" or "") .. opt .. first .. second .. third end) + + word = gsub(word, "(.?)ं(.)", function(succ, prev) return succ .. (succ .. prev == "" and "्म" or (succ == "" and match(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "ṃ")) .. prev end) + + local escaped_orig_word = gsub(orig_word, "%+", "") + text = gsub(text, orig_word, rev_string(word)) + text = gsub(text, "य्", "y") + end + text = gsub(text, ".़?", conv) + text = gsub(text, "a([iu])̃", "a͠%1") + text = gsub(text, "[<>]", "") + return mw.ustring.toNFC(text) +end +return export diff --git a/wiktra/wikt/translit/niv-translit.lua b/wiktra/wikt/translit/niv-translit.lua new file mode 100644 index 0000000..8978879 --- /dev/null +++ b/wiktra/wikt/translit/niv-translit.lua @@ -0,0 +1,126 @@ +local u = mw.ustring.char + +local UTF8_char = "[\1-\127\194-\244][\128-\191]*" + +local export = {} + +local tab = { + ["А"] = "A", + ["а"] = "a", + ["В"] = "V", + ["в"] = "v", + ["Е"] = "E", + ["е"] = "e", + ["Ё"] = "Jo", + ["ё"] = "jo", + ["Г"] = "G", + ["г"] = "g", + ["Ӷ"] = "", + ["ӷ"] = "", + ["Ғ"] = "Ꜧ", + ["ғ"] = "ꜧ", + ["Ӻ"] = "Ꜧ̇", + ["ӻ"] = "ꜧ̇", + ["Д"] = "D", + ["д"] = "d", + ["И"] = "I", + ["и"] = "i", + ["Й"] = "J", + ["й"] = "j", + ["К"] = "K", + ["к"] = "k", + ["Ӄ"] = "Q", + ["ӄ"] = "q", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["Ӈ"] = "Ŋ", + ["ӈ"] = "ŋ", + ["О"] = "O", + ["о"] = "o", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["Р̌"] = "Ř", + ["р̌"] = "ř", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "X", + ["х"] = "x", + ["Ӽ"] = "Ẋ", + ["ӽ"] = "ẋ", + ["Ӿ"] = "H", + ["ӿ"] = "h", + ["Ч"] = "Ț’", + ["ч"] = "ț’", + ["Ы"] = "Ə", + ["ы"] = "ə", + ["Э"] = "E", + ["э"] = "e", + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Я"] = "Ja", + ["я"] = "ja", + ["’"] = "ʼ", + ["ʼ"] = "ʼ", + -- non-native letters + ["Б"] = "B", + ["б"] = "b", + ["Ж"] = "ž", + ["ж"] = "ž", + ["З"] = "Z", + ["з"] = "z", + ["Ц"] = "C", + ["ц"] = "c", + ["Ш"] = "Š", + ["ш"] = "š", + ["Щ"] = "Šč", + ["щ"] = "šč", + ["Ъ"] = "ʺ", + ["ъ"] = "ʺ", + ["Ь"] = "’", + ["ь"] = "’" +} + +local palatal = {{"Дj", "D̦"}, {"дj", "d̦"}, {"Нj", "Ņ"}, {"нj", "ņ"}, {"Тj", "Ț"}, {"тj", "ț"}} + +function export.tr(text, lang, sc) + --[=[ + Unfortunately the Cyrillic alphabet doesn't distinguish between ţi and ti + or ţe and te and so on. + + Represent iotation with j to allow the palatal consonant replacements. + ]=] + text = string.gsub(text, UTF8_char, {["Я"] = "Ja", ["я"] = "ja", ["Ё"] = "Jo", ["ё"] = "jo", ["Ю"] = "Ju", ["ю"] = "ju", ["ь"] = "j"}) + + for _, item in ipairs(palatal) do text = string.gsub(text, unpack(item)) end + + local vowels = {} + for char in string.gmatch("АОУЫЕИЪЬаӣиоуыэеъьaeiou", UTF8_char) do vowels[char] = true end + text = mw.ustring.gsub(text, "(.?)([Ее])", function(preceding, e) + -- modifier letter apostrophe or right single quotation mark + local capital = e == "Е" + if preceding == "ʼ" or preceding == "’" then + e = capital and "E" or "e" + elseif preceding == "" or vowels[preceding] or mw.ustring.find(preceding, "[^Ѐ-ӿ]") then + e = capital and "Je" or "je" + else + mw.log("Module:niv-translit could not decide how to transliterate " .. e .. " after " .. preceding .. ".") + end + return preceding .. e + end) + + return string.gsub(text, UTF8_char, tab) +end + +return export diff --git a/wiktra/wikt/translit/nog-translit.lua b/wiktra/wikt/translit/nog-translit.lua new file mode 100644 index 0000000..c27737d --- /dev/null +++ b/wiktra/wikt/translit/nog-translit.lua @@ -0,0 +1,17 @@ +local export = {} + +local tt = {["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "yo", ["ж"] = "j", ["з"] = "z", ["и"] = "i", ["й"] = "y", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "x", ["ц"] = "c", ["ч"] = "ç", ["ш"] = "ş", ["щ"] = "şç", ["ъ"] = "”", ["ы"] = "ı", ["ь"] = "’", ["э"] = "é", ["ю"] = "yu", ["я"] = "ya", ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Yo", ["Ж"] = "J", ["З"] = "Z", ["И"] = "I", ["Й"] = "Y", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "X", ["Ц"] = "C", ["Ч"] = "Ç", ["Ш"] = "Ş", ["Щ"] = "Şç", ["Ъ"] = "”", ["Ы"] = "I", ["Ь"] = "’", ["Э"] = "É", ["Ю"] = "Yu", ["Я"] = "Ya"}; + +local digraphs = {["аь"] = "ä", ["Аь"] = "Ä", ["нъ"] = "ñ", ["Нъ"] = "Ñ", ["оь"] = "ö", ["Оь"] = "Ö", ["уь"] = "ü", ["Уь"] = "Ü"} + +function export.tr(text) + text = mw.ustring.gsub(text, "([АОУЫЕЯЁЮИЕаоуыэяёюиеь%A][́̀]?)([Ее])", function(a, e) return a .. (e == "е" and "ye" or "Ye") end):gsub("^Е", "Ye"):gsub("^е", "ye"); + + for digraph, translit in pairs(digraphs) do text = mw.ustring.gsub(text, digraph, translit) end + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/nsk-translit.lua b/wiktra/wikt/translit/nsk-translit.lua new file mode 100644 index 0000000..561e26d --- /dev/null +++ b/wiktra/wikt/translit/nsk-translit.lua @@ -0,0 +1,13 @@ +local export = {} + +local replacements = {["c"] = "ch", ["ī"] = "ii", ["ō"] = "oo", ["ā"] = "aa", ["o"] = "u", ["š"] = "sh", ["ð"] = "th", ["(.)ː"] = "%1%1", "[ᔌᔍᔎᔏ]", {["ᔌ"] = "spwaa", ["ᔍ"] = "stwaa", ["ᔎ"] = "skwaa", ["ᔏ"] = "schwaa"}} + +function export.tr(text, lang, sc) + text = require("Cans-translit").tr(text, lang, sc) + + for regex, replacement in pairs(replacements) do text = mw.ustring.gsub(text, regex, replacement) end + + return text +end + +return export diff --git a/wiktra/wikt/translit/omk-translit.lua b/wiktra/wikt/translit/omk-translit.lua new file mode 100644 index 0000000..30afe9b --- /dev/null +++ b/wiktra/wikt/translit/omk-translit.lua @@ -0,0 +1,109 @@ +local u = mw.ustring.char + +local ACUTE = u(0x0301) +local MACRON = u(0x0304) +local DOTABOVE = u(0x0307) +local DIAER = u(0x0308) +local CARON = u(0x030C) + +local str_gsub, ugsub = string.gsub, mw.ustring.gsub +local UTF8char = "[\1-\127\194-\244][\128-\191]*" + +local export = {} + +local tab = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "W", + ["в"] = "w", + ["Г"] = "G", + ["г"] = "g", + ["Д"] = "D", + ["д"] = "d", + ["Е"] = "Je", + ["е"] = "je", + ["Ё"] = "Jo", + ["ё"] = "jo", + ["Ж"] = "Ž", + ["ж"] = "ž", + ["З"] = "Z", + ["з"] = "z", + ["І"] = "Ï", + ["і"] = "ï", + ["И"] = "I", + ["и"] = "i", + ["Й"] = "J", + ["й"] = "j", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["О"] = "O", + ["о"] = "o", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "Q", + ["х"] = "q", + ["Ц"] = "C", + ["ц"] = "c", + ["Ч"] = "Č", + ["ч"] = "č", + ["Ш"] = "Š", + ["ш"] = "š", + ["Щ"] = "Ś", + ["щ"] = "ś", + ["Ъ"] = "ʺ", + ["ъ"] = "ʺ", + ["Ы"] = "Y", + ["ы"] = "y", + ["Ь"] = "ʹ", + ["ь"] = "ʹ", + ["Э"] = "E", + ["э"] = "e", + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Я"] = "ja", + ["я"] = "ja", + ["Ѣ"] = "Ě", + ["ѣ"] = "ě" +} + +local other = { + {"Аа", "Ā"}, {"аа", "ā"}, -- long "а" + {"Ее", "Jē"}, {"ее", "jē"}, -- long "е" + {"Ии", "Ī"}, {"ии", "ī"}, -- long "и" + {"Оо", "Ō"}, {"оо", "ō"}, -- long "о" + {"Уу", "Ū"}, {"уу", "ū"}, -- long "у" + {"Ээ", "Ē"}, {"ээ", "ē"}, -- long "э" + {"Ъ%-", "%-"}, {"ъ%-", "%-"}, -- final "ъ" + {"Ъ ", " "}, {"ъ ", " "}, -- final "ъ" + {"Ъ$", ""}, {"ъ$", ""}, -- final "ъ" + {"([БВГДЖЗЙКЛМНПРСТФХЦЧШЩбвгджзйклмнпрстфхцчшщ])Е", "%1E"}, -- post-consonantal E + {"([БВГДЖЗЙКЛМНПРСТФХЦЧШЩбвгджзйклмнпрстфхцчшщ])е", "%1e"}, -- post-consonantal e + {"([БВГДЖЗЙКЛМНПРСТФХЦЧШЩбвгджзйклмнпрстфхцчшщ])Ее", "%1Ē"}, -- post-consonantal long E + {"([БВГДЖЗЙКЛМНПРСТФХЦЧШЩбвгджзйклмнпрстфхцчшщ])ее", "%1ē"} -- post-consonantal long e +} + +function export.tr(text, lang, sc) + for i, replacement in ipairs(other) do text = str_gsub(text, unpack(replacement)) end + + return (str_gsub(text, UTF8char, tab)) +end + +return export diff --git a/wiktra/wikt/translit/or-translit.lua b/wiktra/wikt/translit/or-translit.lua new file mode 100644 index 0000000..634785a --- /dev/null +++ b/wiktra/wikt/translit/or-translit.lua @@ -0,0 +1,114 @@ +local export = {} + +local consonants = { + -- common + ["କ"] = "k", + ["ଖ"] = "kh", + ["ଗ"] = "g", + ["ଘ"] = "gh", + ["ଙ"] = "ṅ", + ["ଚ"] = "c", + ["ଛ"] = "ch", + ["ଜ"] = "j", + ["ଝ"] = "jh", + ["ଞ"] = "ñ", + ["ଟ"] = "ṭ", + ["ଠ"] = "ṭh", + ["ଡ"] = "ḍ", + ["ଢ"] = "ḍh", + ["ଣ"] = "ṇ", + ["ତ"] = "t", + ["ଥ"] = "th", + ["ଦ"] = "d", + ["ଧ"] = "dh", + ["ନ"] = "n", + ["ପ"] = "p", + ["ଫ"] = "ph", + ["ବ"] = "b", + ["ଭ"] = "bh", + ["ମ"] = "m", + ["ଯ"] = "j", + ["ୟ"] = "y", + ["ର"] = "r", + ["ଲ"] = "l", + ["ଳ"] = "ḷ", + ["ଵ"] = "v", + ["ୱ"] = "w", + ["ଶ"] = "ś", + ["ଷ"] = "ṣ", + ["ସ"] = "s", + ["ହ"] = "h", + -- nuktas + ["କ଼"] = "q", + ["ଖ଼"] = "ḵẖ", + ["ଗ଼"] = "ġ", + ["ଜ଼"] = "z", + ["ଝ଼"] = "zh", + ["ଡ଼"] = "ṛ", + ["ଢ଼"] = "ṛh", + ["ଫ଼"] = "f" +} + +local diacritics = {["ା"] = "a", ["ି"] = "i", ["ୀ"] = "i", ["ୁ"] = "u", ["ୂ"] = "u", ["ୃ"] = "ru", ["ୄ"] = "ru", ["ୢ"] = "lu", ["ୣ"] = "lu", ["େ"] = "e", ["ୈ"] = "ôi", ["ୖ"] = "ôi", ["ୋ"] = "o", ["ୌ"] = "ôu", ["ୗ"] = "ôu", ["୍"] = ""} + +local tt = { + -- vowels + ["ଅ"] = "ô", + ["ଆ"] = "a", + ["ଇ"] = "i", + ["ଈ"] = "i", + ["ଉ"] = "u", + ["ଊ"] = "u", + ["ଋ"] = "ru", + ["ୠ"] = "ru", + ["ଌ"] = "lu", + ["ୡ"] = "lu", + ["ଏ"] = "e", + ["ଐ"] = "ôi", + ["ଓ"] = "o", + ["ଔ"] = "ôu", + -- chandrabindu + ["ଁ"] = "m̐", -- until a better method is found + -- anusvara + ["ଂ"] = "ṃ", -- until a better method is found + -- visarga + ["ଃ "] = "ḥ", + -- avagraha + ["ଽ"] = "’", + -- numerals + ["୦"] = "0", + ["୧"] = "1", + ["୨"] = "2", + ["୩"] = "3", + ["୪"] = "4", + ["୫"] = "5", + ["୬"] = "6", + ["୭"] = "7", + ["୮"] = "8", + ["୯"] = "9", + ["୲"] = "¼", + ["୳"] = "½", + ["୴"] = "¾", + ["୵"] = "¹⁄₁₆", + ["୶"] = "⅛", + ["୷"] = "³⁄₁₆", + -- punctuation + ["।"] = "." -- danda +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([କଖଗଘଙଚଛଜଝଞଟଠଡଢଣତଥଦଧନପଫବଵଭମଯୟରଲଳୱଶଷସହ]଼?)" .. "([ାିୀୁୂୃୄେୈୖୋୌୗ୍ୢୣ]?)", function(c, d) + if not consonants[c] then return c end + if d == "" then + return consonants[c] .. "ô" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/os-translit.lua b/wiktra/wikt/translit/os-translit.lua new file mode 100644 index 0000000..b322376 --- /dev/null +++ b/wiktra/wikt/translit/os-translit.lua @@ -0,0 +1,92 @@ +local export = {} + +local mapping1 = { + ["ӕ"] = "æ", + ["Ӕ"] = "Æ", + ["т"] = "t", + ["Т"] = "T", + ["р"] = "r", + ["Р"] = "R", + ["ф"] = "f", + ["Ф"] = "F", + ["э"] = "è", + ["Э"] = "È", + ["ю"] = "ju", + ["Ю"] = "Ju", + ["ш"] = "š", + ["Ш"] = "Š", + ["ь"] = "ʹ", + ["Ь"] = "ʹ", + ["ъ"] = "ʺ", + ["Ъ"] = "ʺ", + ["н"] = "n", + ["Н"] = "N", + ["п"] = "p", + ["П"] = "P", + ["й"] = "j", + ["Й"] = "J", + ["л"] = "l", + ["Л"] = "L", + ["з"] = "z", + ["З"] = "Z", + ["е"] = "e", + ["Е"] = "E", + ["г"] = "g", + ["Г"] = "G", + ["б"] = "b", + ["Б"] = "B", + ["у"] = "u", + ["У"] = "U", + ["с"] = "s", + ["С"] = "S", + ["х"] = "x", + ["Х"] = "X", + ["ч"] = "ḱ", + ["Ч"] = "Ḱ", + ["щ"] = "šč", + ["Щ"] = "ŠČ", + ["я"] = "ja", + ["Я"] = "Ja", + ["ы"] = "y", + ["Ы"] = "Y", + ["э"] = "è", + ["Э"] = "È", + ["м"] = "m", + ["М"] = "M", + ["о"] = "o", + ["О"] = "O", + ["и"] = "i", + ["И"] = "I", + ["ё"] = "ë", + ["Ё"] = "Ë", + ["ж"] = "ž", + ["Ж"] = "Ž", + ["к"] = "k", + ["К"] = "K", + ["д"] = "d", + ["Д"] = "D", + ["в"] = "v", + ["В"] = "V", + ["ц"] = "c", + ["Ц"] = "C", + ["а"] = "a", + ["А"] = "A" +} + +local mapping2 = {["къ"] = "k’", ["Къ"] = "K’", ["пъ"] = "p’", ["Пъ"] = "P’", ["тъ"] = "t’", ["Tъ"] = "T’", ["цъ"] = "c’", ["Цъ"] = "C’", ["чъ"] = "ḱ’", ["Чъ"] = "Ḱ’", ["хъ"] = "q", ["Хъ"] = "Q", ["гъ"] = "ǧ", ["Гъ"] = "Ǧ", ["дж"] = "ǵ", ["Дж"] = "Ǵ", ["дз"] = "ʒ", ["Дз"] = "Ʒ", ["ау"] = "aw", ["Ау"] = "Aw", ["ӕу"] = "æw", ["Ӕу"] = "Æw", ["иу"] = "iw", ["Иу"] = "Iw", ["ыу"] = "yw", ["Ыу"] = "Yw", ["еу"] = "ew", ["Еу"] = "Ew", ["уа"] = "wa", ["Уа"] = "Wa", ["уӕ"] = "wæ", ["Уӕ"] = "Wæ", ["уи"] = "wi", ["Уи"] = "Wi", ["уы"] = "wy", ["Уы"] = "Wy", ["уе"] = "we", ["Уе"] = "We"} + +local mapping3 = {["гуы"] = "g°y", ["Гуы"] = "G°y", ["куы"] = "k°y", ["Kуы"] = "K°y", ["хуы"] = "x°y", ["Хуы"] = "X°y"} + +function export.tr(text, lang, sc) + -- If the script is given as Geor, then forward the transliteration to that module + if sc == "Geor" then return require("Geor-translit").tr(text, lang, sc) end + text = mw.ustring.gsub(text, "къуы", "k’°y") + text = mw.ustring.gsub(text, "Kъуы", "K’°y") + for pat, repl in pairs(mapping3) do text = mw.ustring.gsub(text, pat, repl) end + for pat, repl in pairs(mapping2) do text = mw.ustring.gsub(text, pat, repl) end + text = mw.ustring.gsub(text, ".", mapping1) + + return text +end + +return export diff --git a/wiktra/wikt/translit/pa-Arab-translit.lua b/wiktra/wikt/translit/pa-Arab-translit.lua new file mode 100644 index 0000000..651b48a --- /dev/null +++ b/wiktra/wikt/translit/pa-Arab-translit.lua @@ -0,0 +1,275 @@ +-- To whoever decides to work on this, sorry for this code being such a mess ~~ User:Taimoorahmed11 +local u = mw.ustring.char +local gsub = mw.ustring.gsub + +local export = {} + +local vav = u(0x0648) +local ye = u(0x06CC) +local alif = u(0x0627) +local he = "ہ" + +local vw_s_cfu = u(0x0650) -- zer (i) +local vw_s_ccu = u(0x0655) -- hamza below +local vw_s_cbr = u(0x064F) -- pesh (u) +local vw_s_mcu = u(0x0654) -- hamza above +local vw_s_ocu = u(0x064E) -- zabar (a) + +local vw_l_cbr = u(0x0657) -- inverted zer +local vw_l_cfu = u(0x0656) -- subscript alif + +local hat = u(0x065A) +local inverted_hat = u(0x065B) +local hats = hat .. inverted_hat + +local short_vowels_list = vw_s_cfu .. vw_s_ccu .. vw_s_cbr .. vw_s_mcu .. vw_s_ocu + +-- carrier + diacritic combos +local long_u = vav .. vw_l_cbr +local short_o = vav .. inverted_hat +local long_i = ye .. vw_l_cfu +local short_e = ye .. inverted_hat + +local vocalised_carrier = long_u .. short_o .. long_i .. short_e .. "و" .. "و" .. "ی" .. "ے" + +local consonants_1 = "ببپتثہجچحخدذرزژسشصضطظعغفقکگلࣇمنݨوہىھٹڈڑ" +local consonants_2 = "ببھتھڈھجھدھٹھکھڑھ" +local vowels = "اِیاُوآے" +local consonants = gsub(consonants_1, consonants_2, "") + +local conv = { + -- consonants + ["ب"] = "b", + ["پ"] = "p", + ["ت"] = "t", + ["ث"] = "s", + ["ج"] = "j", + ["چ"] = "c", + ["ح"] = "h", + ["خ"] = "x", + ["د"] = "d", + ["ذ"] = "z", + ["ر"] = "r", + ["ز"] = "z", + ["ژ"] = "ž", + ["س"] = "s", + ["ش"] = "ś", + ["ص"] = "s", + ["ض"] = "z", + ["ط"] = "t", + ["ظ"] = "z", + ["ع"] = "", + ["غ"] = "ġ", + ["ف"] = "f", + ["ق"] = "q", + ["ک"] = "k", + ["گ"] = "g", + ["ل"] = "l", + ["م"] = "m", + ["ن"] = "n", + ["ݨ"] = "ṇ", + ["وَ"] = "v", + ["ہ"] = "h", + ["ى"] = "y", + ["ھ"] = "h", + ["ٹ"] = "ṭ", + ["ڈ"] = "ḍ", + ["ڑ"] = "ṛ", + ["ࣇ"] = "ḷ", + + -- aspirated consonants + ["بھ"] = "bh", + ["تھ"] = "th", + ["ڈھ"] = "ḍh", + ["جھ"] = "jh", + ["دھ"] = "dh", + ["ٹھ"] = "ṭh", + ["کھ"] = "kh", + ["ڑھ"] = "ṛh", + ["گھ"] = "gh", + ["لھ"] = "lh", + + -- digits + ["۰"] = "0", + ["۱"] = "1", + ["۲"] = "2", + ["۳"] = "3", + ["۴"] = "4", + ["۵"] = "5", + ["۶"] = "6", + ["۷"] = "7", + ["۸"] = "8", + ["۹"] = "9" +} + +local nasal_assim = {["[kg]h?"] = "ṅ", ["[cj]h?"] = "ñ", ["[ṭḍ]h?"] = "ṇ", ["[td]h?"] = "n", ["[pb]h?"] = "m", ["n"] = "n", ["m"] = "m", ["s"] = "n"} + +local short_vowels = { + -- independent vowels + [u(0x00627)] = "a", -- alif ا + [u(0x00623)] = "ā", -- alif أ + [u(0x00622)] = "ā", -- alif with madda آ + [u(0x06D2)] = "ē", -- ye ے + ["اُ "] = "u", -- alif with damma + [u(0x0624)] = "au", -- vav with hamza ؤ + + -- vowels + [u(0x064E)] = "a", -- zabar َ◌ + [u(0x0670)] = "ā", -- khari zabar ◌ٰ + [u(0x0650)] = "i", -- zer ◌ِ + [u(0x064F)] = "u", -- pesh ُ◌ + + -- other diacritics + [u(0x06BA)] = "N", -- noon ghunna ں + [u(0x0621)] = "i", -- hamza ء + [u(0x0651)] = "ː", -- shad ◌ّ + [u(0x064B)] = "n", -- do zabar ◌ً + [u(0x064D)] = "ni" -- do zer ◌ٍ +} + +local alif = "ا" +local alif2 = "أ" +local vav = "و" +local ye = "ی" +local noon = u(0x06BA) +local shadda = u(0x0651) +local ghunna = u(0x06BA) +local aspirate = u(0x06BE) +local sukoon = u(0x0652) + +function export.tr(text, lang, sc) + + -- special cases (added 'mai' since in Punjabi 'میں' is only used for 'mai') + text = gsub(text, "([" .. consonants .. "])" .. alif .. "([" .. consonants .. "])", "%1ā%2") + text = gsub(text, "اللہ", "Allāh") + text = gsub(text, "مَیں", "ma͠i") + text = gsub(text, alif .. ye .. "([" .. consonants .. "])", "ē%1") + text = gsub(text, "([" .. consonants .. "])" .. "ائی", "%1āī") + -- Can't remember why the line below was added but it's messing up some of the translations | N2: It was added for 'Waheguru' + -- text = gsub(text, '([' .. consonants .. '])' .. alif .. '([' .. consonants .. '])' .. vw_s_cfu .. '([' .. consonants .. '])', "%1ā%2e%3") + text = gsub(text, "ن٘", "ṅ") + text = gsub(text, alif .. vav .. "([" .. consonants .. "])", "ō%1") + text = gsub(text, "([" .. consonants .. "])" .. vav, "%1ō") + text = gsub(text, "([" .. consonants .. "])" .. vav .. "([" .. consonants .. "])", "%1ō%2") + + text = gsub(text, "وا" .. "([" .. consonants .. "])", "vā%1") + + -- full stop + text = gsub(text, "۔", ".") + text = gsub(text, "؟", "?") + + -- word-final alif + text = gsub(text, alif .. ghunna, "ā̃") + text = gsub(text, "([" .. consonants .. "])" .. alif, "%1ā") + text = gsub(text, "([" .. consonants .. "])" .. shadda .. alif, "%1%1ā") + -- if 'ا' is medial + text = gsub(text, "([" .. consonants .. "])" .. "ا" .. "([" .. consonants .. "])", "%1ā%2") + text = gsub(text, "([" .. consonants .. "])" .. ye .. alif, "%1iā") + + -- if choti ye at end + case if not at end + text = gsub(text, "([" .. consonants .. "])" .. ye .. "([" .. consonants .. "])", "%1ē%2") + text = gsub(text, "([" .. consonants .. "])" .. ye, "%1i") + + -- intervocalic alif is a long a sound + text = gsub(text, "([" .. consonants .. "و " .. "])" .. alif2 .. "([" .. consonants .. "و" .. "])", "%1ā%2") + + -- schwa rules + --- after do-chashme-he + text = gsub(text, "([" .. consonants .. "])" .. aspirate .. "([" .. consonants .. "])", "%1ha%2") + --- after zer | removed this, as it was realistically unecessary + -- text = gsub(text, '([' .. consonants .. '])' .. vw_s_cfu .. '([' .. consonants .. '])' .. "ْ", "%1i%2") + -- text = gsub(text, '([' .. consonants .. '])' .. vw_s_cfu .. '([' .. consonants .. '])', "%1i%2a") + --- initial letter and second letter except with diacritics + + -- initial consonant + he sound = e/ê + text = gsub(text, "([" .. consonants .. vowels .. "])" .. he .. "([" .. consonants .. "])", "%1eh%2") + text = gsub(text, "([" .. consonants .. "])" .. "ح" .. "([" .. consonants .. "])", "%1êhê%2") + + -- word-initial alif + vowelled carrier drops the alif + text = gsub(text, "^" .. alif .. "([" .. vocalised_carrier .. "])", "%1") + + -- word-initial alif + short vowel diacritic drops the alif + text = gsub(text, "^" .. alif .. "([" .. short_vowels_list .. "])", "%1") + + -- do-chashme-he zabar, zer, pesh + text = gsub(text, "([" .. consonants .. "])" .. vw_s_ocu .. aspirate, "%1ha") + text = gsub(text, "([" .. consonants .. "])" .. vw_s_cfu .. aspirate, "%1hi") + text = gsub(text, "([" .. consonants .. "])" .. vw_s_cbr .. aspirate, "%1hu") + + -- shadda + text = gsub(text, "([" .. consonants .. "])" .. shadda, "%1%1") + + -- long /u:/ and /i:/ and /e:/ and /o:/ + text = gsub(text, "([" .. consonants .. "])" .. vw_s_cbr .. vav, "%1ū") + text = gsub(text, "([" .. consonants .. "])" .. vw_s_cfu .. ye, "%1ī") + text = gsub(text, "([" .. consonants .. "])" .. ye .. "([" .. consonants .. "])", "%1ē") + text = gsub(text, "([" .. consonants .. "])" .. vav .. "([" .. consonants .. "])", "ō%2") + + --- nasalised + text = gsub(text, "([" .. consonants .. "])" .. vw_s_cbr .. vav .. ghunna, "%1ū̃") + text = gsub(text, "([" .. consonants .. "])" .. vw_s_cfu .. ye .. ghunna, "%1ī̃") + text = gsub(text, "([" .. consonants .. "])" .. vav .. ghunna, "%1ō̃") + text = gsub(text, "([" .. consonants .. "])" .. ye .. ghunna, "%1ē̃") + + -- final he + short vowel disregards the he and transliterates the vowel + text = gsub(text, "([" .. vowels .. "])" .. he, "%1h") + text = gsub(text, "([" .. consonants .. "])" .. he, "%1a") + text = gsub(text, "([" .. consonants .. "])" .. he .. "([" .. consonants .. vowels .. "])", "%1h") + + -- sukoon diacritic + text = gsub(text, "ْ", "​") + -- vav with hat = short o + text = gsub(text, vav .. "[" .. hats .. "]", "o") + + -- vav with short vowel + text = gsub(text, vav .. "([" .. short_vowels_list .. "])", function(c) return "v" .. short_vowels[c] end) + + -- nun or re with hat + -- TODO: add support for re + text = gsub(text, "ن" .. "[" .. hats .. "]", "n") + + -- ye with hat = short e + text = gsub(text, ye .. "[" .. hats .. "]", "e") + + -- long i + text = gsub(text, ye .. vw_l_cfu, "ī") + + -- intervocalic ye is a long a sound + text = gsub(text, "([" .. consonants .. "])" .. ye .. "([" .. consonants .. "])", "%1ē%2") + + -- regard the consonant + short vowel combinations throughout + text = gsub(text, ".", short_vowels) + + text = gsub(text, "[بتجدکگٹڈڑ]ھ", conv) + text = gsub(text, "[ببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨوہىھٹڈڑ]", conv) + + -- normal consonants left over + text = gsub(text, vav, "v") + text = gsub(text, "ہ", "h") + text = gsub(text, "ی", "y") + + for key, val in pairs(nasal_assim) do text = mw.ustring.gsub(text, "N(" .. key .. ")", val .. "%1") end + text = gsub(text, "([aiueēoāīū])N ", "%1̃ ") + text = gsub(text, "(.?)N", "%1̃") + + text = gsub(text, "ː(.)", "%1%1") + + text = gsub(text, " ?।", ".") + + text = gsub(text, " $", "") + + text = gsub(text, "aā", "ā") + text = gsub(text, "aê", "ê") + text = gsub(text, "ah", "êh") + text = gsub(text, "uō", "ū") + text = gsub(text, "iō", "ī") + text = gsub(text, "ĩ", "ē̃") + text = gsub(text, "nj", "ñj") + text = gsub(text, "nk", "ṅk") + text = gsub(text, "ng", "ṅg") + text = gsub(text, "ṅj", "ñj") + + return text +end + +return export diff --git a/wiktra/wikt/translit/parameters.lua b/wiktra/wikt/translit/parameters.lua new file mode 100644 index 0000000..7a757fa --- /dev/null +++ b/wiktra/wikt/translit/parameters.lua @@ -0,0 +1,207 @@ +local export = {} + +-- A helper function to escape magic characters in a string +-- Magic characters: ^$()%.[]*+-? +local plain = require("string").pattern_escape + +-- A helper function that removes empty numeric indexes in a table, +-- so that the values are tightly packed like in a normal Lua table. +local remove_holes = require("table").compressSparseArray + +function export.process(args, params, return_unknown) + local args_new = {} + + -- Process parameters for specific properties + local required = {} + local patterns = {} + local names_with_equal_sign = {} + local list_from_index = nil + + for name, param in pairs(params) do + if param.required then + if param.alias_of then require("debug").track("parameters/required alias") end + required[name] = true + end + + if param.list then + local key = name + if type(name) == "string" then key = string.gsub(name, "=", "") end + if param.default ~= nil then + args_new[key] = {param.default, maxindex = 1} + else + args_new[key] = {maxindex = 0} + end + + if type(param.list) == "string" then + -- If the list property is a string, then it represents the name + -- to be used as the prefix for list items. This is for use with lists + -- where the first item is a numbered parameter and the + -- subsequent ones are named, such as 1, pl2, pl3. + if string.find(param.list, "=") then + patterns["^" .. string.gsub(plain(param.list), "=", "(%%d+)") .. "$"] = name + else + patterns["^" .. plain(param.list) .. "(%d+)$"] = name + end + elseif type(name) == "number" then + -- If the name is a number, then all indexed parameters from + -- this number onwards go in the list. + list_from_index = name + else + if string.find(name, "=") then + patterns["^" .. string.gsub(plain(name), "=", "(%%d+)") .. "$"] = string.gsub(name, "=", "") + else + patterns["^" .. plain(name) .. "(%d+)$"] = name + end + end + + if string.find(name, "=") then + -- DO NOT SIDE-EFFECT A TABLE WHILE ITERATING OVER IT. + -- Some elements may be skipped or processed twice if you do. + -- Instead, track the changes we want to make to `params`, and + -- do them after the iteration over `params` is done. + table.insert(names_with_equal_sign, name) + end + elseif param.default ~= nil then + args_new[name] = param.default + end + end + + -- Process required changes to `params` + for _, name in ipairs(names_with_equal_sign) do + require("debug").track("parameters/name with equals") + params[string.gsub(name, "=", "")] = params[name] + params[name] = nil + end + + -- Process the arguments + local args_unknown = {} + + for name, val in pairs(args) do + local index = nil + + if type(name) == "number" then + if list_from_index ~= nil and name >= list_from_index then + index = name - list_from_index + 1 + name = list_from_index + end + else + -- Does this argument name match a pattern? + for pattern, pname in pairs(patterns) do + index = mw.ustring.match(name, pattern) + + -- It matches, so store the parameter name and the + -- numeric index extracted from the argument name. + if index then + index = tonumber(index) + name = pname + break + end + end + end + + local param = params[name] + + -- If a parameter without the trailing index was found, and + -- require_index is set on the param, set the param to nil to treat it + -- as if it isn't recognized. + if not index and param and param.require_index then param = nil end + + -- If no index was found, use 1 as the default index. + -- This makes list parameters like g, g2, g3 put g at index 1. + index = index or 1 + + -- If the argument is not in the list of parameters, trigger an error. + -- return_unknown suppresses the error, and stores it in a separate list instead. + if not param then + if return_unknown then + args_unknown[name] = val + else + error("The parameter \"" .. name .. "\" is not used by this template.", 2) + end + else + -- Remove leading and trailing whitespace unless allow_whitespace is true. + if not param.allow_whitespace then val = mw.text.trim(val) end + + -- Empty string is equivalent to nil unless allow_empty is true. + if val == "" and not param.allow_empty then val = nil end + + -- Convert to proper type if necessary. + if param.type == "boolean" then + val = not (not val or val == "" or val == "0" or val == "no" or val == "n" or val == "false") + elseif param.type == "number" then + val = tonumber(val) + elseif param.type then + require("debug").track {"parameters/unrecognized type", "parameters/unrecognized type/" .. tostring(param.type)} + end + + -- Can't use "if val" alone, because val may be a boolean false. + if val ~= nil then + -- Mark it as no longer required, as it is present. + required[param.alias_of or name] = nil + + -- Store the argument value. + if param.list then + -- If the parameter is an alias of another, store it as the original, + -- but avoid overwriting it; the original takes precedence. + if not param.alias_of then + args_new[name][index] = val + + -- Store the highest index we find. + args_new[name].maxindex = math.max(index, args_new[name].maxindex) + elseif args[param.alias_of] == nil then + if params[param.alias_of] and params[param.alias_of].list then + args_new[param.alias_of][index] = val + + -- Store the highest index we find. + args_new[param.alias_of].maxindex = math.max(index, args_new[param.alias_of].maxindex) + else + args_new[param.alias_of] = val + end + end + else + -- If the parameter is an alias of another, store it as the original, + -- but avoid overwriting it; the original takes precedence. + if not param.alias_of then + args_new[name] = val + elseif args[param.alias_of] == nil then + if params[param.alias_of] and params[param.alias_of].list then + args_new[param.alias_of][1] = val + + -- Store the highest index we find. + args_new[param.alias_of].maxindex = math.max(1, args_new[param.alias_of].maxindex) + else + args_new[param.alias_of] = val + end + end + end + end + end + end + + -- The required table should now be empty. + -- If any entry remains, trigger an error, unless we're in the template namespace. + if mw.title.getCurrentTitle().nsText ~= "Template" then + local list = {} + for name, param in pairs(required) do table.insert(list, name) end + + local count = #list + if count == 1 then + error("The parameter \"" .. list[1] .. "\" is required.", 2) + elseif count == 2 then + error("The parameters \"" .. table.concat(list, "\" and \"") .. "\" are required.", 2) + elseif count > 2 then + error("The parameters \"" .. mw.text.listToText(list, "\", \"", "\", and \"") .. "\" are required.", 2) + end + end + + -- Remove holes in any list parameters if needed. + for name, val in pairs(args_new) do if type(val) == "table" and not params[name].allow_holes then args_new[name] = remove_holes(val) end end + + if return_unknown then + return args_new, args_unknown + else + return args_new + end +end + +return export diff --git a/wiktra/wikt/translit/peo-translit.lua b/wiktra/wikt/translit/peo-translit.lua new file mode 100644 index 0000000..685afa9 --- /dev/null +++ b/wiktra/wikt/translit/peo-translit.lua @@ -0,0 +1,95 @@ +local export = {} + +local alphabetic_tt = { + -- this should only contain alphabetic characters + ["𐎠"] = "a", + ["𐎡"] = "i", + ["𐎢"] = "u", + ["𐎣"] = "k", + ["𐎤"] = "ku", + ["𐎥"] = "g", + ["𐎦"] = "gu", + ["𐎧"] = "x", + ["𐎨"] = "c", + ["𐎩"] = "j", + ["𐎪"] = "ji", + ["𐎫"] = "t", + ["𐎬"] = "tu", + ["𐎭"] = "d", + ["𐎮"] = "di", + ["𐎯"] = "du", + ["𐎰"] = "θ", + ["𐎱"] = "p", + ["𐎲"] = "b", + ["𐎳"] = "f", + ["𐎴"] = "n", + ["𐎵"] = "nu", + ["𐎶"] = "m", + ["𐎷"] = "mi", + ["𐎸"] = "mu", + ["𐎹"] = "y", + ["𐎺"] = "v", + ["𐎻"] = "vi", + ["𐎼"] = "r", + ["𐎽"] = "ru", + ["𐎾"] = "l", + ["𐎿"] = "s", + ["𐏀"] = "z", + ["𐏁"] = "š", + ["𐏂"] = "ç", + ["𐏃"] = "h", + ["𐏈"] = "AM", -- Auramazdā + ["𐏉"] = "AM", -- Auramazdā + ["𐏊"] = "AMha", -- Auramazdāha + ["𐏋"] = "XŠ", -- xšāyathiya + ["𐏌"] = "DH", -- dahyāuš + ["𐏍"] = "DH", -- dahyāuš + ["𐏎"] = "BG", -- baga + ["𐏏"] = "BU" -- būmiš +} + +local nonalphabetic_tt = { + ["𐏐"] = " : " -- word divider +} + +local numbers = {["𐏑"] = 1, ["𐏒"] = 2, ["𐏓"] = 10, ["𐏔"] = 20, ["𐏕"] = 100} + +function export.convert_numbers(numeric_str) + local total = 0 + for c in mw.ustring.gmatch(numeric_str, ".") do total = total + numbers[c] end + return total +end + +function export.tr(text, lang, sc) + -- If the script is not Xpeo, do not transliterate + if sc ~= "Xpeo" then return end + + local t = {} + local preceding_num = false + local need_hyphen = false + -- Transliterate characters + text = mw.ustring.gsub(text, ".", function(c) + if alphabetic_tt[c] then + if need_hyphen then t[#t + 1] = "-" end + t[#t + 1] = alphabetic_tt[c] + need_hyphen = true + else + need_hyphen = false + if numbers[c] then + if preceding_num then + t[#t] = t[#t] + numbers[c] + else + t[#t + 1] = numbers[c] + end + preceding_num = true + else + preceding_num = false + t[#t + 1] = nonalphabetic_tt[c] or c + end + end + end) + + return table.concat(t) +end + +return export diff --git a/wiktra/wikt/translit/pgl-translit.lua b/wiktra/wikt/translit/pgl-translit.lua new file mode 100644 index 0000000..6efce3a --- /dev/null +++ b/wiktra/wikt/translit/pgl-translit.lua @@ -0,0 +1,44 @@ +local export = {} + +local chars = { + [" "] = " ", + ["·"] = ".", + + ["ᚁ"] = "b", -- beith + ["ᚂ"] = "l", -- luis + ["ᚃ"] = "v", -- fearn + ["ᚄ"] = "s", -- saille + ["ᚅ"] = "n", -- nuin + + ["ᚆ"] = "h", -- uath + ["ᚇ"] = "d", -- dair + ["ᚈ"] = "t", -- tinne + ["ᚉ"] = "c", -- coll + ["ᚊ"] = "q", -- ceirt + + ["ᚋ"] = "m", -- muin + ["ᚌ"] = "g", -- gort + ["ᚍ"] = "ŋ", -- ngétal + ["ᚎ"] = "z", -- straif + ["ᚏ"] = "r", -- ruis + + ["ᚐ"] = "a", -- ailm + ["ᚑ"] = "o", -- onn + ["ᚒ"] = "u", -- úr + ["ᚓ"] = "e", -- edad + ["ᚔ"] = "i", -- idad + + ["ᚕ"] = "k", -- Or "ea" + ["ᚖ"] = "oi", + ["ᚗ"] = "ui", + ["ᚘ"] = "p", -- Or "ia" + ["ᚙ"] = "x", -- Or "ae" + ["ᚚ"] = "p", -- peithboc "soft B" + + ["᚛"] = "", + ["᚜"] = "" +} + +function export.tr(text, lang, sc) return (mw.ustring.gsub(text, ".", chars)) end + +return export diff --git a/wiktra/wikt/translit/phli-translit.lua b/wiktra/wikt/translit/phli-translit.lua new file mode 100644 index 0000000..79e3aaf --- /dev/null +++ b/wiktra/wikt/translit/phli-translit.lua @@ -0,0 +1,50 @@ +local export = {} + +local tt = { + ["𐭠"] = "ʾ", -- aleph + ["𐭡"] = "b", -- beth + ["𐭢"] = "g", -- gimil + ["𐭣"] = "d", -- daleth + ["𐭤"] = "h", -- he + ["𐭥"] = "ʿ", -- waw-ayin-resh + ["𐭦"] = "z", -- zayin + ["𐭧"] = "ḥ", -- heth + ["𐭨"] = "ṭ", -- teth + ["𐭩"] = "y", -- yodh + ["𐭪"] = "k", -- kaph + ["𐭫"] = "l", -- lamedh + ["𐭬"] = "m", -- mem-qoph + ["𐭭"] = "n", -- nun + ["𐭮"] = "s", -- samekh + ["𐭯"] = "p", -- pe + ["𐭰"] = "c", -- sadhe + ["𐭱"] = "š", -- shin + ["𐭲"] = "t" -- taw +} + +local numbers = {["𐭸"] = 1, ["𐭹"] = 2, ["𐭺"] = 3, ["𐭻"] = 4, ["𐭼"] = 10, ["𐭽"] = 20, ["𐭾"] = 100, ["𐭿"] = 1000} + +function export.convert_numbers(numeric_str) + local total = 0 + for c in mw.ustring.gmatch(numeric_str, ".") do total = total + numbers[c] end + return total +end + +function export.tr(text, lang, sc) + -- If the script is not Phli, do not transliterate + if sc ~= "Phli" then return end + + -- Category to catch automatic transcriptions of waw-ayin-resh and mem-qoph + -- [[Category:Automatic Inscriptional Pahlavi transliterations containing ambiguous characters]] + local ambig_note = "" + if mw.ustring.match(text, "[𐭬𐭥]") then ambig_note = "[[Category:Automatic Inscriptional Pahlavi transliterations containing ambiguous characters]]" end + + if mw.ustring.match(text, "[𐭠-𐭿]") then text = mw.ustring.gsub(text, "[𐭸-𐭿]+", export.convert_numbers) end + + -- Transliterate characters + text = mw.ustring.gsub(text, ".", tt) + + return text .. ambig_note +end + +return export diff --git a/wiktra/wikt/translit/pi-Latn-translit.lua b/wiktra/wikt/translit/pi-Latn-translit.lua new file mode 100644 index 0000000..b97a274 --- /dev/null +++ b/wiktra/wikt/translit/pi-Latn-translit.lua @@ -0,0 +1,349 @@ +-- For Lao script, see https://www.unicode.org/charts/PDF/U0E80.pdf (Unicode 12.0+) +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match +local sub = mw.ustring.sub +local u = mw.ustring.char + +local c = { + ["Deva"] = {["k"] = "क", ["kh"] = "ख", ["g"] = "ग", ["gh"] = "घ", ["ṅ"] = "ङ", ["c"] = "च", ["ch"] = "छ", ["j"] = "ज", ["jh"] = "झ", ["ñ"] = "ञ", ["ṭ"] = "ट", ["ṭh"] = "ठ", ["ḍ"] = "ड", ["ḍh"] = "ढ", ["ṇ"] = "ण", ["t"] = "त", ["th"] = "थ", ["d"] = "द", ["dh"] = "ध", ["n"] = "न", ["p"] = "प", ["ph"] = "फ", ["b"] = "ब", ["bh"] = "भ", ["m"] = "म", ["y"] = "य", ["r"] = "र", ["l"] = "ल", ["v"] = "व", ["s"] = "स", ["h"] = "ह", ["ḷ"] = "ळ", ["a"] = "अ", ["ā"] = "आ", ["i"] = "इ", ["ī"] = "ई", ["u"] = "उ", ["ū"] = "ऊ", ["e"] = "ए", ["o"] = "ओ", ["ṃ"] = "ं", [""] = ""}, + + ["Beng"] = {["k"] = "ক", ["kh"] = "খ", ["g"] = "গ", ["gh"] = "ঘ", ["ṅ"] = "ঙ", ["c"] = "চ", ["ch"] = "ছ", ["j"] = "জ", ["jh"] = "ঝ", ["ñ"] = "ঞ", ["ṭ"] = "ট", ["ṭh"] = "ঠ", ["ḍ"] = "ড", ["ḍh"] = "ঢ", ["ṇ"] = "ণ", ["t"] = "ত", ["th"] = "থ", ["d"] = "দ", ["dh"] = "ধ", ["n"] = "ন", ["p"] = "প", ["ph"] = "ফ", ["b"] = "ব", ["bh"] = "ভ", ["m"] = "ম", ["y"] = "য", ["r"] = "র", ["l"] = "ল", ["v"] = "ৰ", ["s"] = "স", ["h"] = "হ", ["ḷ"] = "ল়", ["a"] = "অ", ["ā"] = "আ", ["i"] = "ই", ["ī"] = "ঈ", ["u"] = "উ", ["ū"] = "ঊ", ["e"] = "এ", ["o"] = "ও", ["ṃ"] = "ং", [""] = ""}, + + ["Brah"] = {["k"] = "𑀓", ["kh"] = "𑀔", ["g"] = "𑀕", ["gh"] = "𑀖", ["ṅ"] = "𑀗", ["c"] = "𑀘", ["ch"] = "𑀙", ["j"] = "𑀚", ["jh"] = "𑀛", ["ñ"] = "𑀜", ["ṭ"] = "𑀝", ["ṭh"] = "𑀞", ["ḍ"] = "𑀟", ["ḍh"] = "𑀠", ["ṇ"] = "𑀡", ["t"] = "𑀢", ["th"] = "𑀣", ["d"] = "𑀤", ["dh"] = "𑀥", ["n"] = "𑀦", ["p"] = "𑀧", ["ph"] = "𑀨", ["b"] = "𑀩", ["bh"] = "𑀪", ["m"] = "𑀫", ["y"] = "𑀬", ["r"] = "𑀭", ["l"] = "𑀮", ["v"] = "𑀯", ["s"] = "𑀲", ["h"] = "𑀳", ["ḷ"] = "𑀴", ["a"] = "𑀅", ["ā"] = "𑀆", ["i"] = "𑀇", ["ī"] = "𑀈", ["u"] = "𑀉", ["ū"] = "𑀊", ["e"] = "𑀏", ["o"] = "𑀑", ["ṃ"] = "𑀁", [""] = ""}, + + ["Khmr"] = {["k"] = "ក", ["kh"] = "ខ", ["g"] = "គ", ["gh"] = "ឃ", ["ṅ"] = "ង", ["c"] = "ច", ["ch"] = "ឆ", ["j"] = "ជ", ["jh"] = "ឈ", ["ñ"] = "ញ", ["ṭ"] = "ដ", ["ṭh"] = "ឋ", ["ḍ"] = "ឌ", ["ḍh"] = "ឍ", ["ṇ"] = "ណ", ["t"] = "ត", ["th"] = "ថ", ["d"] = "ទ", ["dh"] = "ធ", ["n"] = "ន", ["p"] = "ប", ["ph"] = "ផ", ["b"] = "ព", ["bh"] = "ភ", ["m"] = "ម", ["y"] = "យ", ["r"] = "រ", ["l"] = "ល", ["v"] = "វ", ["s"] = "ស", ["h"] = "ហ", ["ḷ"] = "ឡ", ["a"] = "អ", ["ā"] = "អា", ["i"] = "ឥ", ["ī"] = "ឦ", ["u"] = "ឧ", ["ū"] = "ឨ", ["e"] = "ឯ", ["o"] = "ឲ", ["ṃ"] = "ំ", [""] = ""}, + + ["Mymr"] = {["k"] = "က", ["kh"] = "ခ", ["g"] = "ဂ", ["gh"] = "ဃ", ["ṅ"] = "င", ["c"] = "စ", ["ch"] = "ဆ", ["j"] = "ဇ", ["jh"] = "ဈ", ["ñ"] = "ဉ", ["ṭ"] = "ဋ", ["ṭh"] = "ဌ", ["ḍ"] = "ဍ", ["ḍh"] = "ဎ", ["ṇ"] = "ဏ", ["t"] = "တ", ["th"] = "ထ", ["d"] = "ဒ", ["dh"] = "ဓ", ["n"] = "န", ["p"] = "ပ", ["ph"] = "ဖ", ["b"] = "ဗ", ["bh"] = "ဘ", ["m"] = "မ", ["y"] = "ယ", ["r"] = "ရ", ["l"] = "လ", ["v"] = "ဝ", ["s"] = "သ", ["h"] = "ဟ", ["ḷ"] = "ဠ", ["a"] = "အ", ["ā"] = "အာ", ["i"] = "ဣ", ["ī"] = "ဤ", ["u"] = "ဥ", ["ū"] = "ဦ", ["e"] = "ဧ", ["o"] = "ဩ", ["ṃ"] = "ံ", [""] = ""}, + + ["Sinh"] = {["k"] = "ක", ["kh"] = "ඛ", ["g"] = "ග", ["gh"] = "ඝ", ["ṅ"] = "ඞ", ["c"] = "ච", ["ch"] = "ඡ", ["j"] = "ජ", ["jh"] = "ඣ", ["ñ"] = "ඤ", ["ṭ"] = "ට", ["ṭh"] = "ඨ", ["ḍ"] = "ඩ", ["ḍh"] = "ඪ", ["ṇ"] = "ණ", ["t"] = "ත", ["th"] = "ථ", ["d"] = "ද", ["dh"] = "ධ", ["n"] = "න", ["p"] = "ප", ["ph"] = "ඵ", ["b"] = "බ", ["bh"] = "භ", ["m"] = "ම", ["y"] = "ය", ["r"] = "ර", ["l"] = "ල", ["v"] = "ව", ["s"] = "ස", ["h"] = "හ", ["ḷ"] = "ළ", ["a"] = "අ", ["ā"] = "ආ", ["i"] = "ඉ", ["ī"] = "ඊ", ["u"] = "උ", ["ū"] = "ඌ", ["e"] = "එ", ["o"] = "ඔ", ["ṃ"] = "ං", [""] = ""}, + + ["Thai"] = { + ["k"] = "ก", + ["kh"] = "ข", + ["g"] = "ค", + ["gh"] = "ฆ", + ["ṅ"] = "ง", + ["c"] = "จ", + ["ch"] = "ฉ", + ["j"] = "ช", + ["jh"] = "ฌ", + ["ñ"] = "ญ", + ["ṭ"] = "ฏ", + ["ṭh"] = "ฐ", + ["ḍ"] = "ฑ", + ["ḍh"] = "ฒ", + ["ṇ"] = "ณ", + ["t"] = "ต", + ["th"] = "ถ", + ["d"] = "ท", + ["dh"] = "ธ", + ["n"] = "น", + ["p"] = "ป", + ["ph"] = "ผ", + ["b"] = "พ", + ["bh"] = "ภ", + ["m"] = "ม", + ["y"] = "ย", + ["r"] = "ร", + ["l"] = "ล", + ["v"] = "ว", + ["s"] = "ส", + ["h"] = "ห", + ["ḷ"] = "ฬ", + + ["a"] = "อ", + ["ā"] = "อา", + ["i"] = "อิ", + ["ī"] = "อี", + ["u"] = "อุ", + ["ū"] = "อู", + ["e"] = "อเ", -- to be swapped later + ["o"] = "อโ", -- to be swapped later + + ["ṃ"] = "ํ", + [""] = "" + }, + + ["Lana"] = {["k"] = "ᨠ", ["kh"] = "ᨡ", ["g"] = "ᨣ", ["gh"] = "ᨥ", ["ṅ"] = "ᨦ", ["c"] = "ᨧ", ["ch"] = "ᨨ", ["j"] = "ᨩ", ["jh"] = "ᨫ", ["ñ"] = "ᨬ", ["ṭ"] = "ᨭ", ["ṭh"] = "ᨮ", ["ḍ"] = "ᨯ", ["ḍh"] = "ᨰ", ["ṇ"] = "ᨱ", ["t"] = "ᨲ", ["th"] = "ᨳ", ["d"] = "ᨴ", ["dh"] = "ᨵ", ["n"] = "ᨶ", ["p"] = "ᨷ", ["ph"] = "ᨹ", ["b"] = "ᨻ", ["bh"] = "ᨽ", ["m"] = "ᨾ", ["y"] = "ᨿ", ["r"] = "ᩁ", ["l"] = "ᩃ", ["v"] = "ᩅ", ["s"] = "ᩈ", ["h"] = "ᩉ", ["ḷ"] = "ᩊ", ["a"] = "ᩋ", ["ā"] = "ᩋᩣ", ["i"] = "ᩍ", ["ī"] = "ᩎ", ["u"] = "ᩏ", ["ū"] = "ᩐ", ["e"] = "ᩑ", ["o"] = "ᩋᩰ", ["ṃ"] = "ᩴ", [""] = ""}, + + ["Laoo"] = { + ["k"] = "ກ", + ["kh"] = "ຂ", + ["g"] = "ຄ", + ["gh"] = "ຆ", + ["ṅ"] = "ງ", + ["c"] = "ຈ", + ["ch"] = "ຉ", + ["j"] = "ຊ", + ["jh"] = "ຌ", + ["ñ"] = "ຎ", + ["ṭ"] = "ຏ", + ["ṭh"] = "ຐ", + ["ḍ"] = "ຑ", + ["ḍh"] = "ຒ", + ["ṇ"] = "ຓ", + ["t"] = "ຕ", + ["th"] = "ຖ", + ["d"] = "ທ", + ["dh"] = "ຘ", + ["n"] = "ນ", + ["p"] = "ປ", + ["ph"] = "ຜ", + ["b"] = "ພ", + ["bh"] = "ຠ", + ["m"] = "ມ", + ["y"] = "ຍ", + ["r"] = "ຣ", + ["l"] = "ລ", + ["v"] = "ວ", + ["s"] = "ສ", + ["h"] = "ຫ", + ["ḷ"] = "ຬ", + + ["a"] = "ອ", + ["ā"] = "ອາ", + ["i"] = "ອິ", + ["ī"] = "ອີ", + ["u"] = "ອຸ", + ["ū"] = "ອູ", + ["e"] = "ອເ", -- to be swapped later + ["o"] = "ອໂ", -- to be swapped later + + ["ṃ"] = "ໍ", + [""] = "" + } + +} + +local v = { + ["Deva"] = {["a"] = "", ["ā"] = "ा", ["i"] = "ि", ["ī"] = "ी", ["u"] = "ु", ["ū"] = "ू", ["e"] = "े", ["o"] = "ो", [""] = ""}, + + ["Beng"] = {["a"] = "", ["ā"] = "া", ["i"] = "ি", ["ī"] = "ী", ["u"] = "ু", ["ū"] = "ূ", ["e"] = "ে", ["o"] = "ো", [""] = ""}, + + ["Brah"] = {["a"] = "", ["ā"] = "𑀸", ["i"] = "𑀺", ["ī"] = "𑀻", ["u"] = "𑀼", ["ū"] = "𑀽", ["e"] = "𑁂", ["o"] = "𑁄", [""] = ""}, + + ["Khmr"] = {["a"] = "", ["ā"] = "ា", ["i"] = "ិ", ["ī"] = "ី", ["u"] = "ុ", ["ū"] = "ូ", ["e"] = "េ", ["o"] = "ោ", [""] = ""}, + + ["Mymr"] = {["a"] = "", ["ā"] = "ာ", ["i"] = "ိ", ["ī"] = "ီ", ["u"] = "ု", ["ū"] = "ူ", ["e"] = "ေ", ["o"] = "ော", [""] = ""}, + + ["Sinh"] = {["a"] = "", ["ā"] = "ා", ["i"] = "ි", ["ī"] = "ී", ["u"] = "ු", ["ū"] = "ූ", ["e"] = "ෙ", ["o"] = "ො", [""] = ""}, + + ["Thai"] = {["a"] = "", ["ā"] = "า", ["i"] = "ิ", ["ī"] = "ี", ["u"] = "ุ", ["ū"] = "ู", ["e"] = "เ", ["o"] = "โ", [""] = ""}, + + ["Lana"] = {["a"] = "", ["ā"] = "ᩣ", ["i"] = "ᩥ", ["ī"] = "ᩦ", ["u"] = "ᩩ", ["ū"] = "ᩪ", ["e"] = "ᩮ", ["o"] = "ᩮᩣ", [""] = ""}, + + ["Laoo"] = {["a"] = "", ["ā"] = "າ", ["i"] = "ິ", ["ī"] = "ີ", ["u"] = "ຸ", ["ū"] = "ູ", ["e"] = "ເ", ["o"] = "ໂ", [""] = ""} +} + +local s = { + ["Deva"] = {["0"] = "०", ["1"] = "१", ["2"] = "२", ["3"] = "३", ["4"] = "४", ["5"] = "५", ["6"] = "६", ["7"] = "७", ["8"] = "८", ["9"] = "९", ["."] = "॥", [","] = "।", ["-"] = "-"}, + + ["Beng"] = {["0"] = "০", ["1"] = "১", ["2"] = "২", ["3"] = "৩", ["4"] = "৪", ["5"] = "৫", ["6"] = "৬", ["7"] = "৭", ["8"] = "৮", ["9"] = "৯", ["."] = ".", [","] = ",", ["-"] = "-"}, + + ["Brah"] = {["0"] = "𑁦", ["1"] = "𑁧", ["2"] = "𑁨", ["3"] = "𑁩", ["4"] = "𑁪", ["5"] = "𑁫", ["6"] = "𑁬", ["7"] = "𑁭", ["8"] = "𑁮", ["9"] = "𑁯", ["."] = "𑁈", [","] = "𑁇", ["-"] = "-"}, + + ["Khmr"] = {["0"] = "០", ["1"] = "១", ["2"] = "២", ["3"] = "៣", ["4"] = "៤", ["5"] = "៥", ["6"] = "៦", ["7"] = "៧", ["8"] = "៨", ["9"] = "៩", ["."] = "៕", [","] = "។", ["-"] = "-"}, + + ["Mymr"] = {["0"] = "၀", ["1"] = "၁", ["2"] = "၂", ["3"] = "၃", ["4"] = "၄", ["5"] = "၅", ["6"] = "၆", ["7"] = "၇", ["8"] = "၈", ["9"] = "၉", ["."] = "။", [","] = "၊", ["-"] = "-"}, + + ["Sinh"] = {["0"] = "0", ["1"] = "1", ["2"] = "2", ["3"] = "3", ["4"] = "4", ["5"] = "5", ["6"] = "6", ["7"] = "7", ["8"] = "8", ["9"] = "9", ["."] = ".", [","] = ",", ["-"] = "-"}, + + ["Thai"] = {["0"] = "๐", ["1"] = "๑", ["2"] = "๒", ["3"] = "๓", ["4"] = "๔", ["5"] = "๕", ["6"] = "๖", ["7"] = "๗", ["8"] = "๘", ["9"] = "๙", ["."] = "๚", [","] = "ฯ", ["-"] = "-"}, + + ["Lana"] = {["0"] = "᪐", ["1"] = "᪑", ["2"] = "᪒", ["3"] = "᪓", ["4"] = "᪔", ["5"] = "᪕", ["6"] = "᪖", ["7"] = "᪗", ["8"] = "᪘", ["9"] = "᪙", ["."] = "᪩", [","] = "᪨", ["-"] = "-"}, + + ["Laoo"] = {["0"] = "໐", ["1"] = "໑", ["2"] = "໒", ["3"] = "໓", ["4"] = "໔", ["5"] = "໕", ["6"] = "໖", ["7"] = "໗", ["8"] = "໘", ["9"] = "໙", ["."] = "ຯຯ", [","] = "ຯ", ["-"] = "-"} +} + +local join = {["Deva"] = "्", ["Beng"] = "্", ["Brah"] = "𑁆", ["Khmr"] = "្", ["Mymr"] = "္", ["Sinh"] = u(0x200d, 0x0dca), ["Thai"] = "ฺ", ["Lana"] = "᩠", ["Laoo"] = "຺"} + +local kill = {["Deva"] = "्", ["Beng"] = "্", ["Brah"] = "𑁆", ["Khmr"] = "៑", ["Mymr"] = "်", ["Sinh"] = "්", ["Thai"] = "ฺ", ["Lana"] = "᩺", ["Laoo"] = "຺"} + +local sinh_cjct = {["ක‍්ව"] = "ක්‍ව", ["ත‍්ථ"] = "ත්‍ථ", ["ත‍්ව"] = "ත්‍ව", ["න‍්ථ"] = "න්‍ථ", ["න‍්ද"] = "න්‍ද", ["න‍්ධ"] = "න්‍ධ", ["න‍්ව"] = "න්‍ව"} + +local nukta = u(0x09bc) -- Just list all those used here. + +local variations = { + ["Mon"] = { + ["ဈ"] = "ၛ", + ["ဤ"] = "ဣဳ", + ["ဦ"] = "ဥု", + ["ဧ"] = "ဨ", + ["ီ"] = "ဳ" -- for IM fix below + -- Unicode doesn't have "great nya" so just leave ည္ည as is. (It looks like ည with one extra curve.) + }, + ["OldShan"] = { + ["က"] = "ၵ", + ["ခ"] = "ၶ", + ["ဂ"] = "ၷ", + ["ဃ"] = "ꧠ", + ["စ"] = "ၸ", + ["ဆ"] = "ꧡ", + ["ဇ"] = "ၹ", + ["ဈ"] = "ꧢ", + ["ဉ"] = "ၺ", + ["ည"] = "ၺ္ၺ", + ["ဋ"] = "ꩦ", + ["ဌ"] = "ꩧ", + ["ဍ"] = "ꩨ", + ["ဎ"] = "ꩩ", + ["ဏ"] = "ꧣ", + ["ဒ"] = "ၻ", + ["ဓ"] = "ꩪ", + ["န"] = "ၼ", + ["ဖ"] = "ၽ", + ["ဗ"] = "ၿ", + ["ဘ"] = "ꧤ", + ["ဟ"] = "ႁ", + ["ဠ"] = "ꩮ", + ["အ"] = "ဢ", + ["ဣ"] = "ဢိ", + ["ဤ"] = "ဢီ", + ["ဥ"] = "ဢု", + ["ဦ"] = "ဢူ", + ["ဧ"] = "ဢေ", + ["ဩ"] = "ဢေႃ", + ["ါ"] = "ႃ", + ["ာ"] = "ႃ" + -- Unicode doesn't have "Shan great sa" so just leave ဿ as is. + }, + ["NewShan"] = { + -- includes all Old Shan and the followings + ["ဿ"] = "သ်သ", + ["္"] = "်" + } +} + +-- Unnatural sequences of combining marks are frequently unreadable. Therefore, they are displayed on +-- bearers so that the code can be understood and, if necessary, corrected. +local dc = function(text) return gsub(text, "[อອ]", "") end -- Discard bearer + +local transform +function export.mono_form(text, script) + + local result = text + + if script == "Thai" then + result = gsub(result, "ํ", "งฺ") + result = gsub(result, "([ก-ฮ])ฺ", "ั%1ฺ") + result = gsub(result, "([ก-ฮ])([^ะัาิีฺุู])", "%1ะ%2") + result = gsub(result, "([ก-ฮ])([^ะัาิีฺุู])", "%1ะ%2") -- twice + result = gsub(result, "([ก-ฮ])$", "%1ะ") + -- result = gsub(result, "([ก-ฮ])([าิีุู])ั", "%1%2") + result = gsub(result, dc("([ก-ฮ])([าอิอีอฺอุอู])อั"), "%1%2") + result = gsub(result, "([เโ])([ก-ฮ])([ะั])", "%1%2") + result = gsub(result, "^ั", "") + result = gsub(result, "([%s%p])ั", "%1") + result = gsub(result, "ฺ", "") + elseif script == "Laoo" then + -- Calculating transform in argument list of gsub() fails! + if not transform then + transform = { + {dc("ອໍ"), dc("ງອ຺")}, {dc("([ກ-ຮ])ອ຺"), dc("ອັ%1ອ຺")}, {dc("([ກ-ຮ])([^ະອັາອິອີອ຺ອຸອູ])"), "%1ະ%2"}, {dc("([ກ-ຮ])([^ະອັາອິອີອ຺ອຸອູ])"), "%1ະ%2"}, -- twice! + {"([ກ-ຮ])$", "%1ະ"}, {dc("([ກ-ຮ])([າອິອີອ຺ອຸອູ])ອັ"), "%1%2"}, {dc("([ເໂ])([ກ-ຮ])([ະອັ])"), "%1%2"}, {dc("^ອັ"), ""}, {dc("([%s%p])ອັ"), "%1"}, {dc("ອ຺"), ""} + } + end + for _, v in ipairs(transform) do result = gsub(result, v[1], v[2]) end + end + + return result +end + +local function return_error(text) return error(("Unrecognised part: \"%s\""):format(text)) end + +function export.tr(text, script, options) + if type(text) == "table" then + options = {} + options.impl = text.args["impl"] + options.variation = text.args["variation"] -- ID of variation: [Mymr: 1=Mon, 2=Old Shan, 3=New Shan] + text, script = text.args[1], text.args[2] + end + if script == "Latn" then return text end + if not s[script] then return nil end + + text = mw.ustring.lower(text) + text = gsub(text, "[0-9%.,%-]", s[script]) + -- Compose patterns for processing onsets. + local letter = "[^" .. join[script] .. nukta .. "][" .. nukta .. "]?" + local letter_pair = "(" .. letter .. ")(" .. letter .. ")" + + for word in mw.ustring.gmatch(text, "[aāiīuūeoṃkhgṅcjñṭḍṇtdnpbmyrlḷvs]+") do + local word_conv, orig_word = {}, word + word = gsub(word, "([aāiīuūeo]ṃ?)", "%1 ") + word = gsub(word, " $", "") + + for syllable in mw.text.gsplit(word, " ") do + if not match(syllable, "[aāiīuūeoṃ]$") then syllable = syllable .. "a" .. kill[script] end + syllable = gsub(syllable, "^([khgṅcjñṭḍṇtdnpbmyrlḷvs]*)([aāiīuūeo])(ṃ?)([्্៑်්ฺ𑁆຺᩺]?)$", function(onset, vowel, coda, optJoin) + if onset == "" then + onset = vowel + vowel = "" + end + if not c[script][onset] then + onset = gsub(onset, ".h", c[script]) + onset = gsub(onset, ".", c[script]) + -- Join pairs of consonants + onset = gsub(onset, letter_pair, "%1" .. join[script] .. "%2") + -- Join adjacent consonants that were in different pairs. + onset = gsub(onset, letter_pair, "%1" .. join[script] .. "%2") + else + onset = c[script][onset] + end + + return onset .. (v[script][vowel] or return_error(vowel)) .. c[script][coda] .. optJoin + end) + + table.insert(word_conv, syllable) + end + word = table.concat(word_conv, "") + if script == "Thai" then + word = gsub(word, "(.)([เโ])", "%2%1") + elseif script == "Mymr" then + word = gsub(word, "င္", "င်္") + word = gsub(word, "(င်္)([ခဂငဒပဝ])(ေ?)ာ", "%1%2%3ါ") + word = gsub(word, "္[ယရ]", {["္ယ"] = "ျ", ["္ရ"] = "ြ"}) -- these not need tall aa + word = gsub(word, "^([ခဂငဒပဝ])(ေ?)ာ", "%1%2ါ") + word = gsub(word, "([^္])([ခဂငဒပဝ])(ေ?)ာ", "%1%2%3ါ") + word = gsub(word, "([ခဂငဒပဝ])(္[က-အဿ])(ေ?)ာ", "%1%2%3ါ") + word = gsub(word, "္[ဝဟ]", {["္ဝ"] = "ွ", ["္ဟ"] = "ှ"}) + word = gsub(word, "ဉ္ဉ", "ည") + word = gsub(word, "သ္သ", "ဿ") + if not (options and options.variation) then + -- Arg options should be optional, so nothing to do. + elseif options.variation == "1" then + word = gsub(word, ".", variations.Mon) + word = gsub(word, "ိံ", "ီ") -- fix IM + elseif options.variation == "2" then + word = gsub(word, ".", variations.OldShan) + elseif options.variation == "3" then + word = gsub(word, ".", variations.OldShan) + word = gsub(word, ".", variations.NewShan) + word = gsub(word, "်" .. "်", "်") -- fix nga + end + elseif script == "Lana" then + word = gsub(word, "ᨦ᩠", "ᩘ") + word = gsub(word, "^([ᨣᨴᨵᨷᩅ])(ᩮ?)ᩣ", "%1%2ᩤ") + word = gsub(word, "([^᩠])([ᨣᨴᨵᨷᩅ])(ᩮ?)ᩣ", "%1%2%3ᩤ") + word = gsub(word, "([ᨣᨴᨵᨷᩅ])(᩠[ᨠ-ᩌᩔ])(ᩮ?)ᩣ", "%1%2%3ᩤ") + word = gsub(word, "᩠[ᩁᩃ]", {["᩠ᩁ"] = "ᩕ", ["᩠ᩃ"] = "ᩖ"}) + word = gsub(word, "([ᨭ-ᨱ])᩠ᨮ", "%1ᩛ") + word = gsub(word, "([ᨷ-ᨾ])᩠ᨻ", "%1ᩛ") + word = gsub(word, "ᩈ᩠ᩈ", "ᩔ") + elseif script == "Beng" then + word = gsub(word, "ৰ্", "ৰ" .. u(0x200d) .. "্") -- ৰ্(v-) needs ZWJ to display correctly + elseif script == "Sinh" then + local js = join["Sinh"] + word = gsub(word, "(" .. js .. ")([යර])", u(0xdca, 0x200d) .. "%2") + word = gsub(word, "[කතන]" .. js .. "[ථදධව]", sinh_cjct) + elseif script == "Laoo" then + word = gsub(word, "(.)([ເໂ])", "%2%1") + end + text = gsub(text, orig_word, word, 1) + end + local impl = options and options.impl or "yes" + if impl == "no" then text = export.mono_form(text, script) end + return text +end + +return export diff --git a/wiktra/wikt/translit/pi-translit.lua b/wiktra/wikt/translit/pi-translit.lua new file mode 100644 index 0000000..b73f5a5 --- /dev/null +++ b/wiktra/wikt/translit/pi-translit.lua @@ -0,0 +1,850 @@ +local export = {} +local gsub = mw.ustring.gsub + +function export.trwo(text, lang, sc, options) + if (sc == "Brah") then + text = require("Brah-translit").tr(text, lang, sc) + elseif (sc == "Deva") then + text = require("sa-translit").tr(text, lang, sc) + elseif (sc == "Sinh") then + text = require("si-translit").tr(text, lang, sc) + elseif sc == "Beng" or sc == "Mymr" or sc == "Lana" or sc == "Khmr" or sc == "Thai" or sc == "Laoo" then + local u = mw.ustring.char + local function dc(x) -- Use this to make marks legible. The name 'dc' means 'drop carrier'. + return gsub(x, "[𑀓कকකကกᨠកກ]", "") -- These are the letter ka in the 9 supported Indic scripts. + end + local consonants = { -- And parts 1 of NFC-multipart independent vowels! + -- Bengali + ["ক"] = "k", + ["খ"] = "kh", + ["গ"] = "g", + ["ঘ"] = "gh", + ["ঙ"] = "ṅ", + ["চ"] = "c", + ["ছ"] = "ch", + ["জ"] = "j", + ["ঝ"] = "jh", + ["ঞ"] = "ñ", + ["ট"] = "ṭ", + ["ঠ"] = "ṭh", + ["ড"] = "ḍ", + ["ঢ"] = "ḍh", + ["ণ"] = "ṇ", + ["ত"] = "t", + ["থ"] = "th", + ["দ"] = "d", + ["ধ"] = "dh", + ["ন"] = "n", + ["প"] = "p", + ["ফ"] = "ph", + ["ব"] = "b", + ["ভ"] = "bh", + ["ম"] = "m", + ["য"] = "y", + ["র"] = "r", + ["ল"] = "l", + ["ল়"] = "ḷ", -- xx['व']='v', + ["শ"] = "ś", + ["ষ"] = "ṣ", + ["স"] = "s", + ["হ"] = "h", + ["ৰ"] = "v", + ["ৰ" .. u(0x200d)] = "v", + ["ৱ"] = "v", + -- Myanmar + ["က"] = "k", + ["ခ"] = "kh", + ["ဂ"] = "g", + ["ဃ"] = "gh", + ["င"] = "ṅ", + ["စ"] = "c", + ["ဆ"] = "ch", + ["ဇ"] = "j", + ["ဈ"] = "jh", + ["ဉ"] = "ñ", + ["ဋ"] = "ṭ", + ["ဌ"] = "ṭh", + ["ဍ"] = "ḍ", + ["ဎ"] = "ḍh", + ["ဏ"] = "ṇ", + ["တ"] = "t", + ["ထ"] = "th", + ["ဒ"] = "d", + ["ဓ"] = "dh", + ["န"] = "n", + ["ပ"] = "p", + ["ဖ"] = "ph", + ["ဗ"] = "b", + ["ဘ"] = "bh", + ["မ"] = "m", + ["ယ"] = "y", + ["ရ"] = "r", + ["လ"] = "l", + ["ဝ"] = "v", + ["ဠ"] = "ḷ", + ["ၐ"] = "ś", + ["ၑ"] = "ṣ", + ["သ"] = "s", + ["ဟ"] = "h", + ["ည"] = "ññ", + ["ဿ"] = "ss", + ["အ"] = "ʼ", + -- Subscript consonants: 103B..103E, 105E..1060 + [dc("ကျ")] = "y", + [dc("ကြ")] = "r", + [dc("ကွ")] = "v", + [dc("ကှ")] = "h", + [dc("ကၞ")] = "n", + [dc("ကၟ")] = "m", + [dc("ကၠ")] = "l", + -- Mon Pali consonants + ["ၚ"] = "ṅ", + ["ၛ"] = "jh", + -- Shan (Pali) consonants - Excludes SHAN THA, MEDIAL WA, SIGN SHAW + ["ၵ"] = "k", + ["ၶ"] = "kh", + ["ၷ"] = "g", + ["ꧠ"] = "gh", -- ['']='ṅ', + ["ၸ"] = "c", + ["ꧡ"] = "ch", + ["ၹ"] = "j", + ["ꧢ"] = "jh", + ["ၺ"] = "ñ", + ["ꩦ"] = "ṭ", + ["ꩧ"] = "ṭh", + ["ꩨ"] = "ḍ", + ["ꩩ"] = "ḍh", + ["ꧣ"] = "ṇ", + -- ['']='t', ['']='th', + ["ၻ"] = "d", + ["ꩪ"] = "dh", + ["ၼ"] = "n", + -- ['']='p', + ["ၽ"] = "ph", + ["ၿ"] = "b", + ["ꧤ"] = "bh", -- ['']='m', + -- ['']='y', ['']='r', ['']='l', ['']='v', + ["ꩮ"] = "ḷ", + -- ['']='ś', ['']='ṣ', ['']='s', + ["ႁ"] = "h", + -- ['']='ññ', ['']='ss', + ["ဢ"] = "ʼ", + -- Other first parts of independent vowels. + ["ဣ"] = "i", + ["ဥ"] = "u", + -- Lanna + ["ᨠ"] = "k", + ["ᨡ"] = "kh", + ["ᨣ"] = "g", + ["ᨥ"] = "gh", + ["ᨦ"] = "ṅ", + ["ᨧ"] = "c", + ["ᨨ"] = "ch", + ["ᨩ"] = "j", + ["ᨫ"] = "jh", + ["ᨬ"] = "ñ", + ["ᨭ"] = "ṭ", + ["ᨮ"] = "ṭh", + ["ᨯ"] = "ḍ", + ["ᨰ"] = "ḍh", + ["ᨱ"] = "ṇ", + ["ᨲ"] = "t", + ["ᨳ"] = "th", + ["ᨴ"] = "d", + ["ᨵ"] = "dh", + ["ᨶ"] = "n", + ["ᨷ"] = "p", + ["ᨸ"] = "p", + ["ᨹ"] = "ph", + ["ᨻ"] = "b", + ["ᨽ"] = "bh", + ["ᨾ"] = "m", + ["ᨿ"] = "y", + ["ᩁ"] = "r", + ["ᩃ"] = "l", + ["ᩅ"] = "v", + ["ᩊ"] = "ḷ", + ["ᩆ"] = "ś", + ["ᩇ"] = "ṣ", + ["ᩈ"] = "s", + ["ᩉ"] = "h", + ["ᩔ"] = "ss", + ["ᩋ"] = "ʼ", + ["ᩐ"] = "ū", + -- Subscript consonants: 1A55, 1A56, 1A5B to 1A5E + [dc("ᨠᩕ")] = "r", + [dc("ᨠᩖ")] = "l", + [dc("ᨠᩛ")] = "ṭh", + [dc("ᨠᩜ")] = "m", + [dc("ᨠᩝ")] = "p", + [dc("ᨠᩞ")] = "s", + -- Khmer + ["ក"] = "k", + ["ខ"] = "kh", + ["គ"] = "g", + ["ឃ"] = "gh", + ["ង"] = "ṅ", + ["ច"] = "c", + ["ឆ"] = "ch", + ["ជ"] = "j", + ["ឈ"] = "jh", + ["ញ"] = "ñ", + ["ដ"] = "ṭ", + ["ឋ"] = "ṭh", + ["ឌ"] = "ḍ", + ["ឍ"] = "ḍh", + ["ណ"] = "ṇ", + ["ត"] = "t", + ["ថ"] = "th", + ["ទ"] = "d", + ["ធ"] = "dh", + ["ន"] = "n", + ["ប"] = "p", + ["ផ"] = "ph", + ["ព"] = "b", + ["ភ"] = "bh", + ["ម"] = "m", + ["យ"] = "y", + ["រ"] = "r", + ["ល"] = "l", + ["វ"] = "v", + ["ឡ"] = "ḷ", + ["ឝ"] = "ś", + ["ឞ"] = "ṣ", + ["ស"] = "s", + ["ហ"] = "h", + ["អ"] = "ʼ", + -- Thai + ["ก"] = "k", + ["ข"] = "kh", + ["ค"] = "g", + ["ฆ"] = "gh", + ["ง"] = "ṅ", + ["จ"] = "c", + ["ฉ"] = "ch", + ["ช"] = "j", + ["ฌ"] = "jh", + ["ญ"] = "ñ", + ["ฏ"] = "ṭ", + ["ฐ"] = "ṭh", + ["ฑ"] = "ḍ", + ["ฒ"] = "ḍh", + ["ณ"] = "ṇ", + ["ต"] = "t", + ["ถ"] = "th", + ["ท"] = "d", + ["ธ"] = "dh", + ["น"] = "n", + ["ป"] = "p", + ["ผ"] = "ph", + ["พ"] = "b", + ["ภ"] = "bh", + ["ม"] = "m", + ["ย"] = "y", + ["ร"] = "r", + ["ล"] = "l", + ["ว"] = "v", + ["ฬ"] = "ḷ", + ["ศ"] = "ś", + ["ษ"] = "ṣ", + ["ส"] = "s", + ["ห"] = "h", + ["อ"] = "", -- ['อ'] = 'ʼ', + + -- Lao + ["ກ"] = "k", + ["ຂ"] = "kh", + ["ຄ"] = "g", + ["ຆ"] = "gh", + ["ງ"] = "ṅ", + ["ຈ"] = "c", + ["ຉ"] = "ch", + ["ຊ"] = "j", + ["ຌ"] = "jh", + ["ຎ"] = "ñ", + ["ຏ"] = "ṭ", + ["ຐ"] = "ṭh", + ["ຑ"] = "ḍ", + ["ຒ"] = "ḍh", + ["ຓ"] = "ṇ", + ["ຕ"] = "t", + ["ຖ"] = "th", + ["ທ"] = "d", + ["ຘ"] = "dh", + ["ນ"] = "n", + ["ປ"] = "p", + ["ຜ"] = "ph", + ["ພ"] = "b", + ["ຠ"] = "bh", + ["ມ"] = "m", + ["ຍ"] = "y", + ["ຢ"] = "y", + ["ຣ"] = "r", + ["ລ"] = "l", + ["ວ"] = "v", + ["ຬ"] = "ḷ", + ["ຨ"] = "ś", + ["ຩ"] = "ṣ", + ["ສ"] = "s", + ["ຫ"] = "h", + ["ອ"] = "", -- ['ອ'] = 'ʼ', + ["ດ"] = "d" + } + + local diacritics = { + -- Bengali - only NFC needed + [dc("কা")] = "ā", + [dc("কি")] = "i", + [dc("কী")] = "ī", + [dc("কু")] = "u", + [dc("কূ")] = "ū", + [dc("কৃ")] = "ṛ", + [dc("কৄ")] = "ṝ", + [dc("কৢ")] = "ḷ", + [dc("কৣ")] = "ḹ", + [dc("কে")] = "e", + [dc("কৈ")] = "ai", + [dc("কো")] = "o", + [dc("কৌ")] = "au", + [dc("ক্")] = "", + -- Myanmar + [dc("ကာ")] = "ā", + [dc("ကါ")] = "ā", + [dc("ကိ")] = "i", + [dc("ကီ")] = "ī", + [dc("ကု")] = "u", + [dc("ကူ")] = "ū", + [dc("ကၖ")] = "ṛ", + [dc("ကၗ")] = "ṝ", + [dc("ကၘ")] = "ḷ", + [dc("ကၙ")] = "ḹ", + [dc("ကေ")] = "e", + [dc("ကဲ")] = "ai", + -- The following are multicharacter! + [dc("ကော")] = "o", + [dc("ကော်")] = "au", + [dc("က်က္")] = "", + [dc("ကေါ")] = "o", + [dc("ကေါ်")] = "au", + [dc("က္")] = "", + [dc("က်")] = "", + -- Mon - treatment of Sanskrit au is to be determined! + [dc("ကဳ")] = "ī", + -- Shan + [dc("ကႃ")] = "ā", + [dc("ကေႃ")] = "o", + -- Lanna + [dc("ᨠᩣ")] = "ā", + [dc("ᨠᩤ")] = "ā", + [dc("ᨠᩥ")] = "i", + [dc("ᨠᩦ")] = "ī", + [dc("ᨠᩩ")] = "u", + [dc("ᨠᩪ")] = "ū", + ["ᩂ"] = "ṛ", + ["ᩄ"] = "ḷ", -- Syllabic consonants may be very wrong! + [dc("ᨠᩮ")] = "e", + [dc("ᨠᩱ")] = "ai", + [dc("ᨠᩰ")] = "o", + -- The next two rows are are multicharacter! + [dc("ᨠᩮᩣ")] = "o", + [dc("ᨠᩮᩢᩣ")] = "au", + [dc("ᨠᩮᩫᩢᩣ")] = "au", + [dc("ᨠᩮᩫᩣ")] = "au", + [dc("ᨠᩮᩤ")] = "o", + [dc("ᨠᩮᩢᩤ")] = "au", + [dc("ᨠᩮᩫᩢᩤ")] = "au", + [dc("ᨠᩮᩫᩤ")] = "au", + [dc("ᨠ᩠")] = "", + [dc("ᨠ᩺")] = "", + [dc("ᨠ᩼")] = "", + -- Khmer + [dc("កា")] = "ā", + [dc("កិ")] = "i", + [dc("កី")] = "ī", + [dc("កុ")] = "u", + [dc("កូ")] = "ū", + [dc("ក្ឫ")] = "ṛ", + [dc("ក្ឬ")] = "ṝ", + [dc("ក្ឭ")] = "ḷ", + [dc("ក្ឮ")] = "ḹ", -- Multipart + [dc("កេ")] = "e", + [dc("កៃ")] = "ai", + [dc("កោ")] = "o", + [dc("កៅ")] = "au", + [dc("ក្")] = "", + [dc("ក៑")] = "", + -- Thai + [dc("กั")] = "a", + ["า"] = "ā", + [dc("กิ")] = "i", + [dc("กี")] = "ī", + [dc("กุ")] = "u", + [dc("กู")] = "ū", + ["ฤ"] = "ṛ", + ["ฤๅ"] = "ṝ", + ["ฦ"] = "ḷ", + ["ฦๅ"] = "ḹ", -- Multipart + ["เ"] = "e", + ["ไ"] = "ai", + ["โ"] = "o", + ["เา"] = "au", + [dc("กฺ")] = "", + [dc("ก์")] = "", + ["ะ"] = "a", + [dc("กึ")] = "iṃ", -- Induced by jackbooted I/O + -- Lao + [dc("ກັ")] = "a", + ["າ"] = "ā", + [dc("ກິ")] = "i", + [dc("ກີ")] = "ī", + [dc("ກຸ")] = "u", + [dc("ກູ")] = "ū", + -- ['ฤ']='ṛ', ['ฤๅ')]='ṝ', ['ฦ']='ḷ', ['ฦๅ']='ḹ', -- Multipart + ["ເ"] = "e", + ["ໄ"] = "ai", + ["ໂ"] = "o", + [dc("ເກົາ")] = "au", + [dc("ກ຺")] = "", + [dc("ກ໌")] = "", + ["ະ"] = "a", + [dc("ກຶ")] = "iṃ", -- Induced by jackbooted I/O + -- Results of subscripts - for 2nd level special subscripts. + ["ṭ"] = "ṭ", + ["n"] = "n", + ["p"] = "p", + ["m"] = "m", + ["y"] = "y", + ["r"] = "r", + ["l"] = "l", + ["w"] = "w", + ["s"] = "s", + ["h"] = "h" + } + + local tt = { + -- Bengali independent vowels + ["অ"] = "a", + ["আ"] = "ā", + ["ই"] = "i", + ["ঈ"] = "ī", + ["উ"] = "u", + ["ঊ"] = "ū", + ["ঋ"] = "ṛ", + ["ৠ"] = "ṝ", + ["ঌ"] = "ḷ", + ["ৡ"] = "ḹ", + ["এ"] = "e", + ["ঐ"] = "ai", + ["ও"] = "o", + ["ঔ"] = "au", + -- chandrabindu, anusvara, visarga & avagraha + [dc("কঁ")] = "m̐", + [dc("কং")] = "ṃ", + ["ঃ"] = "ḥ", + ["ঽ"] = "’", + -- numerals + ["০"] = "0", + ["১"] = "1", + ["২"] = "2", + ["৩"] = "3", + ["৪"] = "4", + ["৫"] = "5", + ["৬"] = "6", + ["৭"] = "7", + ["৮"] = "8", + ["৯"] = "9", + -- Myanmar independent vowels + ["အ"] = "a", + ["အာ"] = "ā", + ["ဣ"] = "i", + ["ဤ"] = "ī", + ["ဥ"] = "u", + ["ဦ"] = "ū", + ["ၒ"] = "ṛ", + ["ၓ"] = "ṝ", + ["ၔ"] = "ḷ", + ["ၕ"] = "ḹ", + ["ဧ"] = "e", + ["အဲ"] = "ai", + ["ဩ"] = "o", + ["ဪ"] = "au", -- 2 of these are multi-character keys! + -- Mon + ["ဣဳ"] = "ī", + ["ဥု"] = "ū", + ["ဨ"] = "e", + -- Shan + ["ဢ"] = "a", + ["ဢႃ"] = "ā", + ["ဢိ"] = "i", + ["ဢီ"] = "ī", + ["ဢု"] = "u", + ["ဢူ"] = "ū", + ["ဢေ"] = "e", + ["ဢေႃ"] = "o", + ["ဢဲ"] = "ai", + ["ဢော်"] = "au", + -- chandrabindu, anusvara, visarga & avagraha + -- [dc('က')]='m̐', + [dc("ကံ")] = "ṃ", + ["း"] = "ḥ", + -- ['']='’', + -- numerals + ["၀"] = "0", + ["၁"] = "1", + ["၂"] = "2", + ["၃"] = "3", + ["၄"] = "4", + ["၅"] = "5", + ["၆"] = "6", + ["၇"] = "7", + ["၈"] = "8", + ["၉"] = "9", + -- Lanna independent vowels + ["ᩋ"] = "a", + ["ᩋᩣ"] = "ā", + ["ᩍ"] = "i", + ["ᩎ"] = "ī", + ["ᩏ"] = "u", + ["ᩐ"] = "ū", + ["ᩂ"] = "ṛ", -- ['']='ṝ', + ["ᩄ"] = "ḷ", + -- ['']='ḹ', + ["ᩑ"] = "e", + ["ᩒ"] = "o", + ["ᩋᩰ"] = "o", + ["ᩋᩮᩣ"] = "o", + ["ᩐᩣ"] = "au", + ["ᩋᩱ"] = "ai", + -- ['']='ai', ['']='au', + -- chandrabindu, anusvara, visarga & avagraha + -- [dc('')]='m̐', + [dc("ᨠᩴ")] = "ṃ", + ["ᩡ"] = "ḥ", + [dc("ᨠᩘ")] = "ṅ", + -- ['']='’', + -- numerals + ["᪀"] = "0", + ["᪁"] = "1", + ["᪂"] = "2", + ["᪃"] = "3", + ["᪄"] = "4", + ["᪅"] = "5", + ["᪆"] = "6", + ["᪇"] = "7", + ["᪈"] = "8", + ["᪉"] = "9", + ["᪐"] = "0", + ["᪑"] = "1", + ["᪒"] = "2", + ["᪓"] = "3", + ["᪔"] = "4", + ["᪕"] = "5", + ["᪖"] = "6", + ["᪗"] = "7", + ["᪘"] = "8", + ["᪙"] = "9", + -- Khmer independent vowels + ["អ"] = "a", + ["អា"] = "ā", + ["ឥ"] = "i", + ["ឦ"] = "ī", + ["ឧ"] = "u", + ["ឨ"] = "ū", + ["ឫ"] = "ṛ", + ["ឬ"] = "ṝ", + ["ឭ"] = "ḷ", + ["ឮ"] = "ḹ", + ["ឯ"] = "e", + ["ឰ"] = "ai", + ["ឱ"] = "o", + ["ឲ"] = "o", + ["ឳ"] = "au", + -- chandrabindu, anusvara, visarga & avagraha + -- [dc('ក')]='m̐', + [dc("កំ")] = "ṃ", + ["ះ"] = "ḥ", + -- ['']='’', + -- numerals + ["០"] = "0", + ["១"] = "1", + ["២"] = "2", + ["៣"] = "3", + ["៤"] = "4", + ["៥"] = "5", + ["៦"] = "6", + ["៧"] = "7", + ["៨"] = "8", + ["៩"] = "9", + ["៰"] = "0", + ["៱"] = "1", + ["៲"] = "2", + ["៳"] = "3", + ["៴"] = "4", + ["៵"] = "5", + ["៶"] = "6", + ["៷"] = "7", + ["៸"] = "8", + ["៹"] = "9", + -- Thai miscellanea + -- independent vowels + ["ฤ"] = "ṛ", + ["ฤๅ"] = "ṝ", + ["ฦ"] = "ḷ", + ["ฦๅ"] = "ḹ", + -- chandrabindu, anusvara, visarga & avagraha + -- [dc('ក')]='m̐', + [dc("กํ")] = "ṃ", + ["ะ"] = "ḥ", + -- ['']='’', + -- numerals + ["๐"] = "0", + ["๑"] = "1", + ["๒"] = "2", + ["๓"] = "3", + ["๔"] = "4", + ["๕"] = "5", + ["๖"] = "6", + ["๗"] = "7", + ["๘"] = "8", + ["๙"] = "9", + -- Lao miscellanea + -- chandrabindu, anusvara, visarga & avagraha + -- [dc('ក')]='m̐', + [dc("ກໍ")] = "ṃ", + ["ະ"] = "ḥ", + -- ['']='’', + -- numerals + ["໐"] = "0", + ["໑"] = "1", + ["໒"] = "2", + ["໓"] = "3", + ["໔"] = "4", + ["໕"] = "5", + ["໖"] = "6", + ["໗"] = "7", + ["໘"] = "8", + ["໙"] = "9", + -- All scripts + -- punctuation + ["॥"] = ".", + ["။"] = ".", + ["᪩"] = ".", + ["᪫"] = ".", + ["៕"] = ".", + ["๚"] = ".", -- double danda + ["।"] = ".", + ["၊"] = ".", + ["‌᪨"] = ".", + ["᪪"] = ".", + ["។"] = ".", + ["ฯ"] = ".", + ["ຯ"] = ".", -- danda + -- Vedic extensions + ["ᳵ"] = "x", + ["ᳶ"] = "f", + -- Om + -- ['ॐ']='oṃ', + -- reconstructed + ["*"] = "" + } + -- Also handle subscript consonants encoded as marks. + local S = dc("ကျကြကွကှကၞကၟကၠ") .. -- Myanmar subscripts + dc("ᨠᩕᨠᩖᨠᩛᨠᩜᨠᩝᨠᩞ") -- Lanna subscripts + -- consonants and part 1 of NFC-multi-part independent vowels. + local C = "[কখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলवळশষসহৰৱ" .. -- Bengali + "ကခဂဃငစဆဇဈဉဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝဠၐၑသဟညဿအ" .. -- Myanmar Part 1 + "ၚၛၵၶၷꧠၸꧡၹꧢၺꩦꩧꩨꩩꧣၻꩪၼၽၿꧤꩮႁဢဣဥ" .. -- Myanmar Part 2 (Mon and Shan) + "ᨠᨡᨣᨥᨦᨧᨨᨩᨫᨬᨭᨮᨯᨰᨱᨲᨳᨴᨵᨶᨷᨸᨹᨻᨽᨾᨿᩁᩃᩅᩊᩆᩇᩈᩉᩔᩋᩐ" .. -- Lanna + "កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឡឝសឞហអ" .. -- Khmer + "กขคฆงจฉชฌญฏฐฑฒณตถทธนปผพภมยรลวศษสหฬอฤฦ" .. -- Thai. + "ກຂຄຆງຈຉຊຌຎຏຐຑຒຓຕຖທຘນປຜພຠມຍຢຣລວຨຩສຫຬອ" .. -- Lao + S .. "][" .. u(0x09bc, 0x200d) .. "]?" -- And allow Bengali nukta or necessary ZWJ. + + -- One character diacritics + local dia = dc("[কাকিকীকুকূকৃকৄকৢকৣকেকৈকোকৌক্" .. -- Bengali + "ကာကါကိကီကုကူကၖကၗကၘကၙကေကဲက္က်ကဳကႃ" .. -- Myanmar + "ᨠᩣᨠᩤᨠᩥᨠᩦᨠᩩᨠᩪᩂᩄᨠᩮᨠᩱᨠᩰᨠ᩠ᨠ᩺ᨠ᩼" .. -- Lanna + "ᨠᩫᩢ" .. -- Lanna diacritics in second or third place. + "កាកិកីកុកូកេកៃកោកៅក្ក៑" .. -- Khmer + "กักุิกูีเโไาๅฤฦกฺกึก์" .. -- Thai + "ກັກຸິກູີເກົາໂໄກ຺ກຶກ໌" .. -- Lao + "]") + local diax = {} + local ti = table.insert; + ti(diax, "(") + ti(diax, dia) + ti(diax, "?") + ti(diax, dia) + ti(diax, "?") + ti(diax, dia) + ti(diax, "?") + ti(diax, dia) + ti(diax, "?)") + diax = table.concat(diax) + local explicit = nil + if options and options.impl then + if options.impl == "yes" then + explicit = false + elseif options.impl == "no" then + explicit = true + end + end + if sc == "Khmr" then + dep_liquid = "(" .. C .. dc(")(ក្)") .. "([ឫឬឭឮ])"; -- Avoid gsub in gsub bug. + text = gsub(text, dep_liquid, function(c, j, d) return consonants[c] .. d end) + -- Regularise robat + robat_fix3 = "(" .. C .. dc("ក្") .. C .. dc("ក្") .. C .. ")(" .. dc("ក៌)") + robat_fix2 = "(" .. C .. dc("ក្") .. C .. ")(" .. dc("ក៌)") + robat_fix1 = "(" .. C .. ")(" .. dc("ក៌)") + derobatted = "រ្" .. "%1" + text = gsub(text, robat_fix3, derobatted) + text = gsub(text, robat_fix2, derobatted) + text = gsub(text, robat_fix1, derobatted) + elseif sc == "Thai" or sc == "Laoo" then + local match = mw.ustring.match + local v1 = dc("[กัກັ]") + local v2 = dc("[กัะກັະ]") + local va = dc("[กฺ]") + if lang == "sa" then + if match(text, v1) then explicit = true end -- SARA A is visarga! + else + if match(text, v2) then explicit = true end + end + local yLao, nuktaed + if sc == "Laoo" then + if options and options.y then + if options.y == "yaa" or options.y == "ຢ" then + yLao = "yaa" + elseif options.y == "yung" or options.y == "ຍ" then + yLao = "yung" + end + end + if not yLao then + if match(text, "ຢ") then + yLao = "yaa" + else + yLao = "yung" + end + end + if explicit then + nuktaed = match(text, u(0xeba)) + else + local str + if lang == "sa" then + str = dc("ກ຺[ກັາກຸິກູີ]") + else + str = dc("ກ຺[ກັາກຸິກູີະ]") + end + nuktaed = match(text, str) + end + if nuktaed then -- Convert to extended alphabet + rs = {["ຄ຺"] = "ຆ", ["ສ຺"] = "ຉ", ["ຊ຺"] = "ຌ", ["ຍ຺"] = "ຎ", ["ຕ຺"] = "ຏ", ["ຖ຺"] = "ຐ", ["ດ"] = "ຑ", ["ນ຺"] = "ຓ", ["ທ຺"] = "ຘ", ["ພ຺"] = "ຠ", ["ລ຺"] = "ຬ", ["ງ຺"] = dc("ໍກ")} + text = gsub(text, "[ຄສຊຍຕຖດນທພລງ]" .. u(0xeba) .. "?", rs) + explicit = true + end + if yLao == "yaa" then text = gsub(text, "ຍ", "ຎ") end + end + if match(text, va) then explicit = false end + if explicit == nil then + -- It looks as though gsub (from dc()) and match interfere, so need local variables. + local ngf1 = dc("[กักิกุກັກິກຸ][งງ]$") + local ngf2 = dc("[กักิกุກັກິກຸ][งງ] ") + if (match(text, ngf1) or match(text, ngf2)) then explicit = true end + end + if not explicit then + diax = gsub(diax, "[ฤฦ]", "า") -- Treat as ฤ and ฦ consonants. + local search = "([เโไເໂໄ])(" .. C .. ")([" .. u(0x0e3a, 0xeba) .. "])" + local longswap = function(p, c, v) return c .. v .. p end + text = gsub(text, search, longswap) + text = gsub(text, search, longswap) + end + if false and sc == "Laoo" then -- Keep around for future debugging + local nr = "F" + if nuktaed then nr = "T" end + if yLao then + nr = nr .. yLao + else + nr = nr .. "y?" + end + nr = nr .. "-" + if not options or not options.impl then + if explicit == nil then + text = "GN-" .. nr .. text + elseif explicit == false then + text = "GI-" .. nr .. text + else + text = "GE-" .. nr .. text + end + elseif options.impl == "both" then + if explicit == nil then + text = "N-" .. nr .. text + elseif explicit == false then + text = "I-" .. nr .. text + else + text = "E-" .. nr .. text + end + elseif options.impl == "yes" then + text = "Y-" .. nr .. text + elseif options.impl == "no" then + text = "N-" .. nr .. text + elseif options.impl then + text = options.impl .. nr .. text + end + end + local pair = "([เโไເໂໄ])(" .. C .. ")" + text = gsub(text, pair, "%2%1") + if explicit and lang ~= "sa" then -- SARA A is a vowel. + diax = "([ะະ" .. string.sub(diax, 3) + end + if explicit and sc == "Laoo" then -- Clean up clusters + ass = {["ກຄ"] = "ຄຄ", ["ດຈ"] = "ຈຈ", ["ດຊ"] = "ຊຊ", ["ດຕ"] = "ຕຕ", ["ດຖ"] = "ຕຖ", ["ດທ"] = "ທທ", ["ດສ"] = "ສສ", ["ຍຈ"] = "ຎຈ", ["ຍສ"] = "ຎຉ", ["ຍຊ"] = "ຎຊ", ["ນຈ"] = "ນຈ", ["ນສ"] = "ຎຉ", ["ນຊ"] = "ຎຊ", ["ນຍ"] = "ຎຎ", ["ບປ"] = "ປປ", ["ບຜ"] = "ປຜ", ["ບພ"] = "ພພ", ["ບ"] = "ປ"} + text = gsub(text, "[ກດບຍນ][ຄຕຖທປຜພສລຈສຊຍ]?", ass) + end + end + if sc == "Lana" then + -- Disambiguate lanna combining loop below. + cl_search = dc("[ᨲᨻᨾ]ᨠᩛ") + text = gsub(text, cl_search, {["ᨲᩛ"] = "ᨲ᩠ᨳ", ["ᨻᩛ"] = "ᨻ᩠ᨻ", ["ᨾᩛ"] = "ᨾ᩠ᨻ"}) + end + if sc == "Mymr" or sc == "Lana" then + local fn = function(c, d) return consonants[c] .. d end + local search = "(" .. C .. ")([" .. S .. "])" + text = gsub(text, search, fn); + text = gsub(text, search, fn); -- and again + end + if sc == "Beng" then + -- Aberrant conversion: + text = gsub(text, u(0x09b2, 0x9cd, 0x9bc, 0x9cd, 0x9b9), "ḷহ") -- raw + text = gsub(text, u(0x09b2, 0x9bc, 0x9cd, 0x9cd, 0x9b9), "ḷহ") -- NFC + -- Proper conversion: + text = gsub(text, u(0x09b2, 0x9cd, 0x9bc, 0x9b9), "ḷহ") -- not NFC + -- text = gsub(text, u(0x09b2, 0x9bc, 0x9cd, 0x9b9), 'ḷহ') -- NFC + end + text = gsub(text, "(" .. C .. ")" .. diax, function(c, d) + local val = tt[c .. d] + if val then return val end + local cn = consonants[c] + if not cn then return "X(" .. c .. ")" end + if d ~= "" then + return cn .. (diacritics[d] or "NIL(" .. d .. ")") + elseif explicit then + return cn + else + return cn .. "a" + end + end) + text = gsub(text, ".", tt) + -- Bodge alphabetic Thai and Lao anusvara + if explicit and text then + text = gsub(text, "ṅ$", "ṃ") + text = gsub(text, "ṅ([^kg])", "ṃ%1") + end + else + text = nil -- Not ready for use yet! + end + return text +end + +function export.tr(text, lang, sc) return export.trwo(text, lang, sc, {}) end + +return export diff --git a/wiktra/wikt/translit/pnb-translit.lua b/wiktra/wikt/translit/pnb-translit.lua new file mode 100644 index 0000000..c1dc961 --- /dev/null +++ b/wiktra/wikt/translit/pnb-translit.lua @@ -0,0 +1,22 @@ +local export = {} + +function export.tr(text, lang, sc, debug_mode) + + local out_text + if (sc == "Shah" or "pa-Arab") then + out_text = require("pa-Arab-translit").tr(text, lang, sc, debug_mode) + -- out_text = nil + else + local namespace = mw.title:getCurrentTitle().nsText + if namespace == "Category" then + out_text = nil + else + error("Invalid script for Punjabi language.") + end + end + + return out_text + +end + +return export diff --git a/wiktra/wikt/translit/prti-translit.lua b/wiktra/wikt/translit/prti-translit.lua new file mode 100644 index 0000000..47426f0 --- /dev/null +++ b/wiktra/wikt/translit/prti-translit.lua @@ -0,0 +1,25 @@ +local export = {} + +local tt = {["𐭀"] = "ʾ", ["𐭁"] = "b", ["𐭂"] = "g", ["𐭃"] = "d", ["𐭄"] = "h", ["𐭅"] = "w", ["𐭆"] = "z", ["𐭇"] = "ḥ", ["𐭈"] = "ṭ", ["𐭉"] = "y", ["𐭊"] = "k", ["𐭋"] = "l", ["𐭌"] = "m", ["𐭍"] = "n", ["𐭎"] = "s", ["𐭏"] = "ʿ", ["𐭐"] = "p", ["𐭑"] = "c", ["𐭒"] = "q", ["𐭓"] = "r", ["𐭔"] = "š", ["𐭕"] = "t"} + +local numbers = {["𐭘"] = 1, ["𐭙"] = 2, ["𐭚"] = 3, ["𐭛"] = 4, ["𐭜"] = 10, ["𐭝"] = 20, ["𐭞"] = 100, ["𐭟"] = 1000} + +function export.convert_numbers(numeric_str) + local total = 0 + for c in mw.ustring.gmatch(numeric_str, ".") do total = total + numbers[c] end + return total +end + +function export.tr(text, lang, sc) + -- If the script is not Prti, do not transliterate + if sc ~= "Prti" then return end + + if mw.ustring.match(text, "[𐭘-𐭟]") then text = mw.ustring.gsub(text, "[𐭘-𐭟]+", export.convert_numbers) end + + -- Transliterate characters + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/qwm-translit.lua b/wiktra/wikt/translit/qwm-translit.lua new file mode 100644 index 0000000..19ab8f2 --- /dev/null +++ b/wiktra/wikt/translit/qwm-translit.lua @@ -0,0 +1,13 @@ +local export = {} + +function export.tr(text, lang, sc) + if sc == "Latn" or sc == "Arab" then + return nil + elseif sc == "Armn" then + return require("Armn-translit").tr(text, lang, sc) + else + error("Huệ nương!") + end +end + +return export diff --git a/wiktra/wikt/translit/ru-translit.lua b/wiktra/wikt/translit/ru-translit.lua new file mode 100644 index 0000000..73959fa --- /dev/null +++ b/wiktra/wikt/translit/ru-translit.lua @@ -0,0 +1,415 @@ +local export = {} + +--[=[ + +FIXME: + +1. (DONE) If you write '''Б'''ез, it transliterates to '''B'''jez instead of + '''B'''ez, as it should. +2. (DONE) Convert ъ to nothing before comma or other non-letter particle, e.g. + in Однимъ словомъ, идешь на чтеніе. +3. (DONE) Make special-casing for adjectives in -го and for что (and friends) + be the default, and implement transformations in Cyrillic rather than after + translit so that we can display the transformed Cyrillic in the + "phonetic respelling" notation of {{ru-IPA}}. +4. (DONE) Convert apostrophe to ъ before transliteration when after a consonant + and before a vowel (requested by Atitarev). +]=] + +local u = mw.ustring.char +local rfind = mw.ustring.find +local rsub = mw.ustring.gsub -- WARNING: Don't return this directly in a function, or surround in parens +local rmatch = mw.ustring.match +local rsplit = mw.text.split +local ulower = mw.ustring.lower +local usub = mw.ustring.sub + +local GR = u(0x0300) -- grave = ̀ +local TEMP_G = u(0xFFF1) -- substitute to preserve g from changing to v + +local function ine(x) -- if not empty + if x == "" then + return nil + else + return x + end +end + +-- In this table, we now map Cyrillic е and э to je and e, and handle the +-- post-consonant version (plain e and ɛ) specially. +local tab = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "Je", + ["Ё"] = "Jó", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "X", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Šč", + ["Ъ"] = "ʺ", + ["Ы"] = "Y", + ["Ь"] = "ʹ", + ["Э"] = "E", + ["Ю"] = "Ju", + ["Я"] = "Ja", + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "je", + ["ё"] = "jó", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["й"] = "j", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ф"] = "f", + ["х"] = "x", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "šč", + ["ъ"] = "ʺ", + ["ы"] = "y", + ["ь"] = "ʹ", + ["э"] = "e", + ["ю"] = "ju", + ["я"] = "ja", + -- Russian style quotes + ["«"] = "“", + ["»"] = "”", + -- archaic, pre-1918 letters + ["І"] = "I", + ["і"] = "i", + ["Ѳ"] = "F", + ["ѳ"] = "f", + ["Ѣ"] = "Jě", + ["ѣ"] = "jě", + ["Ѵ"] = "I", + ["ѵ"] = "i" +} + +-- following based on ru-common for use with is_monosyllabic() +-- any Cyrillic or Latin vowel, including ёЁ and composed Cyrillic vowels with grave accent; +-- not including accented Latin vowels except ě (FIXME, might want to change this) +local lowercase_vowels = "аеиоуяэыюіѣѵүѐѝёaeiouyěɛ" +local uppercase_vowels = "АЕИОУЯЭЫЮІѢѴҮЀЍЁAEIOUYĚƐ" +local vowels = lowercase_vowels .. uppercase_vowels + +-- FIXME! Doesn't work with ɣ, which gets included in this character set +local non_consonants = "[" .. vowels .. "ЪЬъьʹʺ%A]" +local consonants = "[^" .. vowels .. "ЪЬъьʹʺ%A]" + +local map_to_plain_e_map = {["Е"] = "E", ["е"] = "e", ["Ѣ"] = "Ě", ["ѣ"] = "ě", ["Э"] = "Ɛ", ["э"] = "ɛ"} +local function map_to_plain_e(pre, e) return pre .. map_to_plain_e_map[e] end + +local map_to_je_map = {["Е"] = "Je", ["е"] = "je", ["Ѣ"] = "Jě", ["ѣ"] = "jě", ["Э"] = "E", ["э"] = "e"} +local function map_to_je(pre, e) + if e == nil then + e = pre + pre = "" + end + return pre .. map_to_je_map[e] +end + +-- decompose composed grave chars; they will map to uncomposed Latin letters for +-- consistency with other char+grave combinations, and we do this early to +-- avoid problems converting to e or je +local decompose_grave_map = {["ѐ"] = "е" .. GR, ["Ѐ"] = "Е" .. GR, ["ѝ"] = "и" .. GR, ["Ѝ"] = "И" .. GR} + +-- True if Cyrillic or decomposed Latin word has no more than one vowel; +-- includes non-syllabic stems such as льд-; copied from ru-common and modified +-- to avoid having to import that module (which would slow things down +-- significantly) +local function is_monosyllabic(word) return not rfind(word, "[" .. vowels .. "].*[" .. vowels .. "]") end + +-- Apply transformations to the Cyrillic to more closely match pronunciation. +-- Return two arguments: the "original" text (after decomposing composed +-- grave characters), and the transformed text. If the two are different, +-- {{ru-IPA}} should display a "phonetic respelling" notation. +-- NOADJ disables special-casing for adjectives in -го, while FORCEADJ forces +-- special-casing for adjectives, including those in -аго (pre-reform spelling) +-- and disables checking for exceptions (e.g. много, ого). NOSHTO disables +-- special-casing for что and related words. +function export.apply_tr_fixes(text, noadj, noshto, forceadj) + -- decompose composed grave characters before we convert Cyrillic е to + -- Latin e or je + text = rsub(text, "[ѐЀѝЍ]", decompose_grave_map) + + local origtext = text + -- the second half of the if-statement below is an optimization; see above. + if not noadj and text:find("го") then + local v = {["г"] = "в", ["Г"] = "В"} + local repl = function(e, g, o, sja) return e .. v[g] .. o .. (sja or "") end + -- Handle какого-нибудь/-либо/-то; must be done first because of an exception + -- made for бого-, снего-, etc. + text = rsub(text, "([кКтТ][аА][кК][оеОЕ" .. (forceadj and "аА" or "") .. "][\204\129\204\128]?)([гГ])([оО]%-)", repl) + if not forceadj then + -- handle много + text = rsub(text, "%f[%a\204\129\204\128]([Мм]но[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle немного, намного + text = rsub(text, "%f[%a\204\129\204\128]([Нн][еа]мно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle до́рого [short form of дорогой, adverb] + text = rsub(text, "%f[%a\204\129\204\128]([Дд]о[\204\129\204\128]?ро)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle недо́рого [short form of недорогой, adverb] + text = rsub(text, "%f[%a\204\129\204\128]([Нн]едо[\204\129\204\128]?ро)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle стро́го + text = rsub(text, "%f[%a\204\129\204\128]([Сс]тро[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle нестро́го + text = rsub(text, "%f[%a\204\129\204\128]([Нн]естро[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle на́строго + text = rsub(text, "%f[%a\204\129\204\128]([Нн]а[\204\129\204\128]?стро)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle убо́го + text = rsub(text, "%f[%a\204\129\204\128]([Уу]бо[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle поло́го + text = rsub(text, "%f[%a\204\129\204\128]([Пп]оло[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- check for neuter short forms of compound adjectives in -но́гий + if rfind(text, "оно[\204\129\204\128]?го%f[^%a\204\129\204\128]") then + -- handle безно́го + text = rsub(text, "%f[%a\204\129\204\128]([Бб]езно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle босоно́го + text = rsub(text, "%f[%a\204\129\204\128]([Бб]осоно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle веслоно́го + text = rsub(text, "%f[%a\204\129\204\128]([Вв]еслоно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle длинноно́го + text = rsub(text, "%f[%a\204\129\204\128]([Дд]линноно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle двуно́го + text = rsub(text, "%f[%a\204\129\204\128]([Дд]вуно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle коротконо́го + text = rsub(text, "%f[%a\204\129\204\128]([Кк]оротконо[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle кривоно́го + text = rsub(text, "%f[%a\204\129\204\128]([Кк]ривоно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle одноно́го + text = rsub(text, "%f[%a\204\129\204\128]([Оо]дноно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle пятино́го + text = rsub(text, "%f[%a\204\129\204\128]([Пп]ятино[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle трёхно́го + text = rsub(text, "%f[%a\204\129\204\128]([Тт]р[ёе][\204\129\204\128]?хно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle хромоно́го + text = rsub(text, "%f[%a\204\129\204\128]([Хх]ромоно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle четвероно́го + text = rsub(text, "%f[%a\204\129\204\128]([Чч]етвероно[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle шестино́го + text = rsub(text, "%f[%a\204\129\204\128]([Шш]естино[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + end + -- handle пе́го [short form of пе́гий "piebald"] + text = rsub(text, "%f[%a\204\129\204\128]([Пп]е[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle лого, сого, ого + text = rsub(text, "%f[%a\204\129\204\128]([лсЛС]?[Оо][\204\129\204\128]?)г(о[\204\129\204\128]?)%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "%2") + -- handle Того, То́го (but not того or Того́, which have /v/) + text = rsub(text, "%f[%a\204\129\204\128](То́?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle лего + text = rsub(text, "%f[%a\204\129\204\128]([Лл]е[\204\129\204\128]?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle игого, огого; note, we substitute TEMP_G for both г's + -- because otherwise the ого- at the beginning gets converted to ово + text = rsub(text, "%f[%a\204\129\204\128]([ИиОо])гог(о[\204\129\204\128]?)%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о" .. TEMP_G .. "%2") + -- handle Диего + text = rsub(text, "%f[%a\204\129\204\128](Дие́?)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle бо́лого + text = rsub(text, "%f[%a\204\129\204\128]([Бб]о[\204\129\204\128]?ло)го%f[^%a\204\129\204\128]", "%1" .. TEMP_G .. "о") + -- handle *ого-, *его- (e.g. бого-, снего-) + text = rsub(text, "([ео][\204\129\204\128]?)го%-", "%1" .. TEMP_G .. "о-") + end + -- handle genitive/accusative endings, which are spelled -ого/-его/-аго + -- (-ogo/-ego/-ago) but transliterated -ovo/-evo/-avo; only for adjectives + -- and pronouns, excluding words like много, ого (-аго occurs in + -- pre-reform spelling); \204\129 is an acute accent, \204\128 is a grave accent + local pattern = "([оеОЕ" .. (forceadj and "аА" or "") .. "][\204\129\204\128]?)([гГ])([оО][\204\129\204\128]?)" + local reflexive = "([сС][яЯ][\204\129\204\128]?)" + text = rsub(text, pattern .. "%f[^%a\204\129\204\128]", repl) + text = rsub(text, pattern .. reflexive .. "%f[^%a\204\129\204\128]", repl) + -- handle сегодня + text = rsub(text, "%f[%a\204\129\204\128]([Сс]е)г(о[\204\129\204\128]?дня)%f[^%a\204\129\204\128]", "%1в%2") + -- handle сегодняшн- + text = rsub(text, "%f[%a\204\129\204\128]([Сс]е)г(о[\204\129\204\128]?дняшн)", "%1в%2") + -- replace TEMP_G with g; must be done after the -go -> -vo changes + text = rsub(text, TEMP_G, "г") + end + + -- the second half of the if-statement below is an optimization; see above. + if not noshto and text:find("то") then + local ch2sh = {["ч"] = "ш", ["Ч"] = "Ш"} + -- Handle что + text = rsub(text, "%f[%a\204\129\204\128]([Чч])(то[\204\129\204\128]?)%f[^%a\204\129\204\128]", function(ch, to) return ch2sh[ch] .. to end) + -- Handle чтобы, чтоб + text = rsub(text, "%f[%a\204\129\204\128]([Чч])(то[\204\129\204\128]?бы?)%f[^%a\204\129\204\128]", function(ch, to) return ch2sh[ch] .. to end) + -- Handle ничто + text = rsub(text, "%f[%a\204\129\204\128]([Нн]и)ч(то[\204\129\204\128]?)%f[^%a\204\129\204\128]", "%1ш%2") + end + + -- Handle мягкий, лёгкий, легчать, etc. + text = rsub(text, "([МмЛл][яеё][\204\129\204\128]?)г([кч])", "%1х%2") + + return origtext, text +end + +-- Transliterate after the pronunciation-related transformations of +-- export.apply_tr_fixes() have been applied. Called from {{ru-IPA}}. +-- INCLUDE_MONOSYLLABIC_JO_ACCENT is as in export.tr(). +function export.tr_after_fixes(text, include_monosyllabic_jo_accent) + -- Remove word-final hard sign, either utterance-finally or followed by + -- a non-letter character such as space, comma, period, hyphen, etc. + text = rsub(text, "[Ъъ]$", "") + text = rsub(text, "[Ъъ]([%A])", "%1") + + -- Convert apostrophe the hard sign between consonant and vowel (i.e. + -- in the places where the hard sign normally occurs in modern text). + -- Apostrophe is sometimes used to indicate the hard sign; this may have + -- originated from the forcible removal of the hard sign from printing + -- offices in the 1920's, after the implementation of the Russian + -- orthography reform. The if-statement is an optimization; see below. + if rfind(text, "'") then + text = rsub(text, "(" .. consonants .. ")'([" .. lowercase_vowels .. "])", "%1ъ%2") + text = rsub(text, "(" .. consonants .. ")'([" .. uppercase_vowels .. "])", "%1Ъ%2") + end + + -- the if-statement below isn't necessary but may speed things up, + -- particularly when include_monosyllabic_jo_accent isn't set, in that + -- in the majority of cases where ё doesn't occur, we avoid a pattern find + -- (in is_monosyllabic()) and three pattern subs. The translit module needs + -- to be as fast as possible since it may be called hundreds or + -- thousands of times on some pages. + if rfind(text, "[Ёё]") then + -- We need to special-case ё after a "hushing" consonant, which becomes + -- ó (or o), without j. We also need special cases for monosyllabic ё + -- when INCLUDE_MONOSYLLABIC_JO_ACCENT isn't set, so we don't add the + -- accent mark that we would otherwise include. + if not include_monosyllabic_jo_accent and is_monosyllabic(text) and not rfind(text, "^%-") then + text = rsub(text, "([жшчщЖШЧЩ])ё", "%1o") + text = text:gsub("ё", "jo") + text = text:gsub("Ё", "Jo") + else + -- in a nonmonosyllabic word where there is a later stressed vowel + -- in the same word (e.g. трёхэта́жный), don't put an accent mark on ё. + text = rsub(text, "ё([^ %-]-[" .. vowels .. "]́)", "jo%1") + -- same goes if there are two ё's in a word (трёхколёсный, четырёхзвёздный) + text = rsub(text, "ё([^ %-]-ё)", "jo%1") + -- same goes if this is a prefix (четырёх-) + text = rsub(text, "ё([^ %-]-%-)$", "jo%1") + -- same goes if there is a stressed vowel *followed* by ё (this is + -- quite rare but occurs e.g. in А́ндзё "Anjō (city in Japan)" + text = rsub(text, "([" .. vowels .. "]́[^ %-]-)ё", "%1jo") + -- handle hushing consonant + ё + another accented vowel (e.g. шёлкопряди́льня) + -- (already partly converted by previous regexes) + text = rsub(text, "([жшчщЖШЧЩ])j", "%1") + -- handle remaining cases of hushing consonant + ё + text = rsub(text, "([жшчщЖШЧЩ])ё", "%1ó") + -- conversion of remaining ё will occur as a result of 'tab'. + end + end + + -- ю after ж and ш becomes u (e.g. брошюра, жюри) + text = rsub(text, "([жшЖШ])ю", "%1u") + + -- the if-statement below isn't necessary but may speed things up in that + -- in the majority of cases where the letters below don't occur, we avoid + -- six pattern subs. + if rfind(text, "[ЕеѢѣЭэ]") then + -- е after a dash at the beginning of a word becomes e, and э becomes ɛ + -- (like after a consonant) + text = rsub(text, "^(%-)([ЕеѢѣЭэ])", map_to_plain_e) + text = rsub(text, "(%s%-)([ЕеѢѣЭэ])", map_to_plain_e) + -- don't get confused by single quote or parens between consonant and е; + -- e.g. Б'''ез''', американ(ец) + text = rsub(text, "(" .. consonants .. "['%(%)]*)([ЕеѢѣЭэ])", map_to_plain_e) + + -- This is now the default + -- е after a vowel or at the beginning of a word becomes je, and э becomes e + -- text = rsub(text, "^([ЕеѢѣЭэ])", map_to_je) + -- text = rsub(text, "(" .. non_consonants .. ")([ЕеѢѣЭэ])", map_to_je) + -- -- need to do it twice in case of sequences of such vowels + -- text = rsub(text, "^([ЕеѢѣЭэ])", map_to_je) + -- text = rsub(text, "(" .. non_consonants .. ")([ЕеѢѣЭэ])", map_to_je) + end + + text = (rsub(text, ".", tab)) + return text +end + +-- Transliterates text, which should be a single word or phrase. It should +-- include stress marks, which are then preserved in the transliteration. +-- ё is a special case: it is rendered (j)ó in multisyllabic words and +-- monosyllabic words in multi-word phrases, but rendered (j)o without an +-- accent in isolated monosyllabic words, unless INCLUDE_MONOSYLLABIC_JO_ACCENT +-- is specified. (This is used in conjugation and declension tables.) +-- NOADJ disables special-casing for adjectives in -го, while FORCEADJ forces +-- special-casing for adjectives and disables checking for exceptions +-- (e.g. много). NOSHTO disables special-casing for что and related words. +function export.tr(text, lang, sc, include_monosyllabic_jo_accent, noadj, noshto, forceadj) + local origtext, subbed_text = export.apply_tr_fixes(text, noadj, noshto, forceadj) + return export.tr_after_fixes(subbed_text, include_monosyllabic_jo_accent) +end + +-- translit with various special-case substitutions; NOADJ disables +-- special-casing for adjectives in -го, while FORCEADJ forces special-casing +-- for adjectives and disables checking for expections (e.g. много). +-- NOSHTO disables special-casing for что and related words. SUB is used +-- to implement arbitrary substitutions in the Cyrillic text before other +-- transformations are applied and before translit. It is of the form +-- FROM/TO,FROM/TO,... +function export.tr_sub(text, include_monosyllabic_jo_accent, noadj, noshto, sub, forceadj) + if type(text) == "table" then -- called directly from a template + include_monosyllabic_jo_accent = ine(text.args.include_monosyllabic_jo_accent) + noadj = ine(text.args.noadj) + noshto = ine(text.args.noshto) + sub = ine(text.args.sub) + text = text.args[1] + end + + if sub then + local subs = rsplit(sub, ",") + for _, subpair in ipairs(subs) do + local subsplit = rsplit(subpair, "/") + text = rsub(text, subsplit[1], subsplit[2]) + end + end + + return export.tr(text, nil, nil, include_monosyllabic_jo_accent, noadj, noshto, forceadj) +end + +-- for adjectives, pronouns +function export.tr_adj(text, include_monosyllabic_jo_accent) + if type(text) == "table" then -- called directly from a template + include_monosyllabic_jo_accent = ine(text.args.include_monosyllabic_jo_accent) + text = text.args[1] + end + + -- we have to include "forceadj" because typically when tr_adj() is called + -- from the noun or adjective modules, it's called with suffix ого, which + -- would otherwise trigger the exceptional case and be transliterated as ogo + return export.tr(text, nil, nil, include_monosyllabic_jo_accent, false, "noshto", "forceadj") +end + +return export + +-- For Vim, so we get 4-space tabs +-- vim: set ts=4 sw=4 noet: diff --git a/wiktra/wikt/translit/rue-translit.lua b/wiktra/wikt/translit/rue-translit.lua new file mode 100644 index 0000000..e9549c3 --- /dev/null +++ b/wiktra/wikt/translit/rue-translit.lua @@ -0,0 +1,95 @@ +local export = {} + +local tt = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "V", + ["в"] = "v", + ["Г"] = "H", + ["г"] = "h", + ["Ґ"] = "G", + ["ґ"] = "g", + ["Д"] = "D", + ["д"] = "d", + ["Е"] = "E", + ["е"] = "e", + ["Є"] = "Je", + ["є"] = "je", + ["Ё"] = "Jo", + ["ё"] = "jo", + ["Ж"] = "Ž", + ["ж"] = "ž", + ["З"] = "Z", + ["з"] = "z", + ["І"] = "I", + ["і"] = "i", + ["Ї"] = "Ji", + ["ї"] = "ji", + ["И"] = "Y", + ["и"] = "y", + ["Ы"] = "Ŷ", + ["ы"] = "ŷ", + ["Й"] = "J", + ["й"] = "j", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["О"] = "O", + ["о"] = "o", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ӱ"] = "Ü", + ["ӱ"] = "ü", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "X", + ["х"] = "x", + ["Ц"] = "C", + ["ц"] = "c", + ["Ч"] = "Č", + ["ч"] = "č", + ["Ш"] = "Š", + ["ш"] = "š", + ["Щ"] = "Šč", + ["щ"] = "šč", + ["Ь"] = "ʹ", + ["ь"] = "ʹ", + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Я"] = "Ja", + ["я"] = "ja", + ["Ъ"] = "ʺ", + ["ъ"] = "ʺ", + -- neutral apostrophe, right single quotation mark, modifier letter apostrophe → modifier letter double prime + ["’"] = "ʺ", + ["ʼ"] = "ʺ", + -- obsolete letters + ["О̂"] = "Ô", + ["о̂"] = "ô", + ["Э"] = "È", + ["э"] = "è", + ["Ѣ"] = "Î", + ["ѣ"] = "î" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "'+", {["'"] = "ʺ"}) + return (mw.ustring.gsub(text, ".", tt)) +end + +return export diff --git a/wiktra/wikt/translit/sa-Beng-translit.lua b/wiktra/wikt/translit/sa-Beng-translit.lua new file mode 100644 index 0000000..496f2b9 --- /dev/null +++ b/wiktra/wikt/translit/sa-Beng-translit.lua @@ -0,0 +1,70 @@ +local export = {} + +local consonants = {["ক"] = "k", ["খ"] = "kh", ["গ"] = "g", ["ঘ"] = "gh", ["ঙ"] = "ṅ", ["চ"] = "c", ["ছ"] = "ch", ["জ"] = "j", ["ঝ"] = "jh", ["ঞ"] = "ñ", ["ট"] = "ṭ", ["ঠ"] = "ṭh", ["ড"] = "ḍ", ["ঢ"] = "ḍh", ["ণ"] = "ṇ", ["ত"] = "t", ["থ"] = "th", ["দ"] = "d", ["ধ"] = "dh", ["ন"] = "n", ["প"] = "p", ["ফ"] = "ph", ["ব"] = "b", ["ভ"] = "bh", ["ম"] = "m", ["য"] = "y", ["য়"] = "y", ["ৰ"] = "r", ["র"] = "r", ["ল"] = "l", ["ল়"] = "ḷ", ["ৱ"] = "v", ["শ"] = "ś", ["ষ"] = "ṣ", ["স"] = "s", ["হ"] = "h"} + +local diacritics = {["া"] = "ā", ["ি"] = "i", ["ী"] = "ī", ["ু"] = "u", ["ূ"] = "ū", ["ৃ"] = "ṛ", ["ৄ"] = "ṝ", ["ৢ"] = "ḷ", ["ৣ"] = "ḹ", ["ে"] = "e", ["ৈ"] = "ai", ["ো"] = "o", ["ৌ"] = "au", ["্"] = ""} + +local tt = { + -- vowels + ["অ"] = "a", + ["আ"] = "ā", + ["ই"] = "i", + ["ঈ"] = "ī", + ["উ"] = "u", + ["ঊ"] = "ū", + ["ঋ"] = "ṛ", + ["ৠ"] = "ṝ", + ["ঌ"] = "ḷ", + ["ৡ"] = "ḹ", + ["এ"] = "e", + ["ঐ"] = "ai", + ["ও"] = "o", + ["ঔ"] = "au", + -- chandrabindu + ["ঁ"] = "m̐", -- until a better method is found + -- anusvara + ["ং"] = "ṃ", -- until a better method is found + -- visarga + ["ঃ"] = "ḥ", + -- + ["ৎ"] = "t", + -- avagraha + ["ঽ"] = "’", + -- numerals + ["૦"] = "০", + ["૧"] = "১", + ["૨"] = "২", + ["૩"] = "৩", + ["૪"] = "৪", + ["૫"] = "৫", + ["૬"] = "৬", + ["૭"] = "৭", + ["૮"] = "৮", + ["૯"] = "৯", + -- punctuation + ["॥"] = ".", -- double danda + ["।"] = ".", -- danda + -- Vedic extensions + -- ['ᳵ']='x', ['ᳶ']='f', + -- Om + ["ওঁ"] = "oṃ", + -- reconstructed + ["*"] = "" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([কখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযৰরলৱশষসহ]়?)" .. -- য় and ল় are composition exclusions! + "([ািীুূৃৄৢৣেৈোৌ্]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sa-Brah-translit.lua b/wiktra/wikt/translit/sa-Brah-translit.lua new file mode 100644 index 0000000..e28d487 --- /dev/null +++ b/wiktra/wikt/translit/sa-Brah-translit.lua @@ -0,0 +1,68 @@ +local export = {} + +local consonants = {["𑀓"] = "k", ["𑀔"] = "kh", ["𑀕"] = "g", ["𑀖"] = "gh", ["𑀗"] = "ṅ", ["𑀘"] = "c", ["𑀙"] = "ch", ["𑀚"] = "j", ["𑀛"] = "jh", ["𑀜"] = "ñ", ["𑀝"] = "ṭ", ["𑀞"] = "ṭh", ["𑀟"] = "ḍ", ["𑀠"] = "ḍh", ["𑀡"] = "ṇ", ["𑀢"] = "t", ["𑀣"] = "th", ["𑀤"] = "d", ["𑀥"] = "dh", ["𑀦"] = "n", ["𑀧"] = "p", ["𑀨"] = "ph", ["𑀩"] = "b", ["𑀪"] = "bh", ["𑀫"] = "m", ["𑀬"] = "y", ["𑀭"] = "r", ["𑀮"] = "l", ["𑀯"] = "v", ["𑀴"] = "ḷ", ["𑀰"] = "ś", ["𑀱"] = "ṣ", ["𑀲"] = "s", ["𑀳"] = "h"} + +local diacritics = {["𑀸"] = "ā", ["𑀺"] = "i", ["𑀻"] = "ī", ["𑀼"] = "u", ["𑀽"] = "ū", ["𑀾"] = "ṛ", ["𑀿"] = "ṝ", ["𑁀"] = "ḷ", ["𑁁"] = "ḹ", ["𑁂"] = "e", ["𑁃"] = "ai", ["𑁄"] = "o", ["𑁅"] = "au", ["𑁆"] = ""} + +local tt = { + -- vowels + ["𑀅"] = "a", + ["𑀆"] = "ā", + ["𑀇"] = "i", + ["𑀈"] = "ī", + ["𑀉"] = "u", + ["𑀊"] = "ū", + ["𑀋"] = "ṛ", + ["𑀌"] = "ṝ", + ["𑀍"] = "ḷ", + ["𑀎"] = "ḹ", + ["𑀏"] = "e", + ["𑀐"] = "ai", + ["𑀑"] = "o", + ["𑀒"] = "au", + -- chandrabindu + ["𑀀"] = "m̐", -- until a better method is found + -- anusvara + ["𑀁"] = "ṃ", -- until a better method is found + -- visarga + ["𑀂"] = "ḥ", + -- avagraha + ["ऽ"] = "’", + -- numerals + ["𑁦"] = "0", + ["𑁧"] = "1", + ["𑁨"] = "2", + ["𑁩"] = "3", + ["𑁪"] = "4", + ["𑁫"] = "5", + ["𑁬"] = "6", + ["𑁭"] = "7", + ["𑁮"] = "8", + ["𑁯"] = "9", + -- punctuation + ["𑁈"] = ".", -- double danda + ["𑁇"] = ".", -- danda + -- Vedic extensions + ["𑀃"] = "x", + ["𑀄"] = "f", + -- Om + ["𑀑𑀁"] = "oṃ", + -- reconstructed + ["*"] = "" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([𑀓𑀔𑀕𑀖𑀗𑀘𑀙𑀚𑀛𑀜𑀝𑀞𑀟𑀠𑀡𑀢𑀣𑀤𑀥𑀦𑀧𑀨𑀩𑀪𑀫𑀬𑀭𑀮𑀯𑀰𑀱𑀲𑀳])" .. "([𑀸𑀺𑀻𑀼𑀽𑀾𑀿𑁀𑁁𑁂𑁃𑁄𑁅𑁆]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sa-Gujr-translit.lua b/wiktra/wikt/translit/sa-Gujr-translit.lua new file mode 100644 index 0000000..5042f92 --- /dev/null +++ b/wiktra/wikt/translit/sa-Gujr-translit.lua @@ -0,0 +1,68 @@ +local export = {} + +local consonants = {["ક"] = "k", ["ખ"] = "kh", ["ગ"] = "g", ["ઘ"] = "gh", ["ઙ"] = "ṅ", ["ચ"] = "c", ["છ"] = "ch", ["જ"] = "j", ["ઝ"] = "jh", ["ઞ"] = "ñ", ["ટ"] = "ṭ", ["ઠ"] = "ṭh", ["ડ"] = "ḍ", ["ઢ"] = "ḍh", ["ણ"] = "ṇ", ["ત"] = "t", ["થ"] = "th", ["દ"] = "d", ["ધ"] = "dh", ["ન"] = "n", ["પ"] = "p", ["ફ"] = "ph", ["બ"] = "b", ["ભ"] = "bh", ["મ"] = "m", ["ય"] = "y", ["ર"] = "r", ["લ"] = "l", ["વ"] = "v", ["ળ"] = "ḷ", ["શ"] = "ś", ["ષ"] = "ṣ", ["સ"] = "s", ["હ"] = "h"} + +local diacritics = {["ા"] = "ā", ["િ"] = "i", ["ી"] = "ī", ["ુ"] = "u", ["ૂ"] = "ū", ["ૃ"] = "ṛ", ["ૄ"] = "ṝ", ["ૢ"] = "ḷ", ["ૣ"] = "ḹ", ["ે"] = "e", ["ૈ"] = "ai", ["ો"] = "o", ["ૌ"] = "au", ["્"] = ""} + +local tt = { + -- vowels + ["અ"] = "a", + ["આ"] = "ā", + ["ઇ"] = "i", + ["ઈ"] = "ī", + ["ઉ"] = "u", + ["ઊ"] = "ū", + ["ઋ"] = "ṛ", + ["ૠ"] = "ṝ", + ["ઌ"] = "ḷ", + ["ૡ"] = "ḹ", + ["એ"] = "e", + ["ઐ"] = "ai", + ["ઓ"] = "o", + ["ઔ"] = "au", + -- chandrabindu + ["ઁ"] = "m̐", -- until a better method is found + -- anusvara + ["ં"] = "ṃ", -- until a better method is found + -- visarga + ["ઃ"] = "ḥ", + -- avagraha + ["ઽ"] = "’", + -- numerals + ["૦"] = "0", + ["૧"] = "1", + ["૨"] = "2", + ["૩"] = "3", + ["૪"] = "4", + ["૫"] = "5", + ["૬"] = "6", + ["૭"] = "7", + ["૮"] = "8", + ["૯"] = "9", + -- punctuation + ["॥"] = ".", -- double danda + ["।"] = ".", -- danda + -- Vedic extensions + ["ᳵ"] = "x", + ["ᳶ"] = "f", + -- Om + ["ૐ"] = "oṃ", + -- reconstructed + ["*"] = "" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([કખગઘઙચછજઝઞટઠડઢણતથદધનપફબભમયરલવળશષસહ])" .. "([ાિીુૂૃૄૢૣેૈોૌ્]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sa-Java-translit.lua b/wiktra/wikt/translit/sa-Java-translit.lua new file mode 100644 index 0000000..e29bd94 --- /dev/null +++ b/wiktra/wikt/translit/sa-Java-translit.lua @@ -0,0 +1,135 @@ +local export = {} +local gsub = mw.ustring.gsub + +local consonants = { + ["ꦏ"] = "k", + ["ꦑ"] = "kh", + ["ꦒ"] = "g", + ["ꦓ"] = "gh", + ["ꦔ"] = "ṅ", + ["ꦕ"] = "c", + ["ꦖ"] = "ch", + ["ꦗ"] = "j", + ["ꦙ"] = "jh", + ["ꦚ"] = "ñ", + ["ꦛ"] = "ṭ", + ["ꦜ"] = "ṭh", + ["ꦝ"] = "ḍ", + ["ꦞ"] = "ḍh", + ["ꦟ"] = "ṇ", + ["ꦠ"] = "t", + ["ꦡ"] = "th", + ["ꦢ"] = "d", + ["ꦣ"] = "dh", + ["ꦤ"] = "n", + ["ꦥ"] = "p", + ["ꦦ"] = "ph", + ["ꦧ"] = "b", + ["ꦨ"] = "bh", + ["ꦩ"] = "m", + ["ꦪ"] = "y", + ["ꦫ"] = "r", + ["ꦭ"] = "l", + ["ꦮ"] = "v", -- ['ળ']='ḷ', + ["ꦯ"] = "ś", + ["ꦰ"] = "ṣ", + ["ꦱ"] = "s", + ["ꦲ"] = "h", + -- Include subscript ('medial') consonants for translation only. + ["ꦿ"] = "r", + ["ꦾ"] = "y" +} + +local diacritics = { + ["ꦴ"] = "ā", + ["ꦶ"] = "i", + ["ꦷ"] = "ī", + ["ꦸ"] = "u", + ["ꦹ"] = "ū", + ["ꦽ"] = "ṛ", + ["ꦽꦴ"] = "ṝ", + ["꧀ꦊ"] = "ḷ", + ["꧀ꦋ"] = "ḹ", + ["ꦺ"] = "e", + ["ꦻ"] = "ai", + ["ꦺꦴ"] = "o", + ["ꦵ"] = "o", + ["ꦻꦴ"] = "au", + ["꧀"] = "" + -- In general, include results of second level diacritics. I think not needed for Javanese. + -- ['y']='y', ['r']='r', +} + +local tt = { + -- vowels + ["ꦄ"] = "a", + ["ꦄꦴ"] = "ā", + ["ꦆ"] = "i", + ["ꦇ"] = "ī", + ["ꦈ"] = "u", + ["ꦈꦴ"] = "ū", + ["ꦉ"] = "ṛ", + ["ꦉꦴ"] = "ṝ", + ["ꦊ"] = "ḷ", + ["ꦋ"] = "ḹ", + ["ꦌ"] = "e", + ["ꦍ"] = "ai", + ["ꦎ"] = "o", + ["ꦎꦴ"] = "au", + -- chandrabindu + ["ꦀ"] = "m̐", -- until a better method is found + -- anusvara + ["ꦁ"] = "ṃ", -- until a better method is found + -- visarga + ["ꦃ"] = "ḥ", + -- avagraha + -- ['ઽ']='’', + -- others + ["ꦂ"] = "r", + -- numerals + ["꧐"] = "0", + ["꧑"] = "1", + ["꧒"] = "2", + ["꧓"] = "3", + ["꧔"] = "4", + ["꧕"] = "5", + ["꧖"] = "6", + ["꧗"] = "7", + ["꧘"] = "8", + ["꧙"] = "9", + ["꧇"] = "", + -- punctuation + ["꧉"] = ".", -- double danda + ["꧈"] = ".", -- danda + -- Vedic extensions + -- ['ᳵ']='x', ['ᳶ']='f', + -- Om + ["ꦎꦴꦀ"] = "oṃ", + -- reconstructed + ["*"] = "" +} +-- List the consonants +local S = "ꦾꦿ" -- Subscript y and r. +local C = "ꦏꦑꦒꦓꦔꦕꦖꦗꦙꦚꦛꦜꦝꦞꦟꦠꦡꦢꦣꦤꦥꦦꦧꦨꦩꦪꦫꦭꦮꦯꦰꦱꦲ" .. S + +function export.tr(text, lang, sc) + -- Handle subscript consonants + local fn = function(c, d) return consonants[c] .. d end + local search = "([" .. C .. "])([" .. S .. "])" + text = gsub(text, search, fn); + text = gsub(text, search, fn); -- and again + text = gsub(text, "([" .. C .. S .. "])" .. "(꧀?[ꦴꦶꦷꦸꦹꦽꦊꦋꦺꦻꦵ꧀]?ꦴ?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".ꦴ", tt) -- Two part independent vowels. + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sa-Knda-translit.lua b/wiktra/wikt/translit/sa-Knda-translit.lua new file mode 100644 index 0000000..c9eda22 --- /dev/null +++ b/wiktra/wikt/translit/sa-Knda-translit.lua @@ -0,0 +1,68 @@ +local export = {} + +local consonants = {["ಕ"] = "k", ["ಖ"] = "kh", ["ಗ"] = "g", ["ಘ"] = "gh", ["ಙ"] = "ṅ", ["ಚ"] = "c", ["ಛ"] = "ch", ["ಜ"] = "j", ["ಝ"] = "jh", ["ಞ"] = "ñ", ["ಟ"] = "ṭ", ["ಠ"] = "ṭh", ["ಡ"] = "ḍ", ["ಢ"] = "ḍh", ["ಣ"] = "ṇ", ["ತ"] = "t", ["ಥ"] = "th", ["ದ"] = "d", ["ಧ"] = "dh", ["ನ"] = "n", ["ಪ"] = "p", ["ಫ"] = "ph", ["ಬ"] = "b", ["ಭ"] = "bh", ["ಮ"] = "m", ["ಯ"] = "y", ["ರ"] = "r", ["ಲ"] = "l", ["ವ"] = "v", ["ಳ"] = "ḷ", ["ಶ"] = "ś", ["ಷ"] = "ṣ", ["ಸ"] = "s", ["ಹ"] = "h"} + +local diacritics = {["ಾ"] = "ā", ["ಿ"] = "i", ["ೀ"] = "ī", ["ು"] = "u", ["ೂ"] = "ū", ["ೃ"] = "ṛ", ["ೄ"] = "ṝ", ["ೢ"] = "ḷ", ["ೣ"] = "ḹ", ["ೇ"] = "e", ["ೈ"] = "ai", ["ೋ"] = "o", ["ೌ"] = "au", ["್"] = ""} + +local tt = { + -- vowels + ["ಅ"] = "a", + ["ಆ"] = "ā", + ["ಇ"] = "i", + ["ಈ"] = "ī", + ["ಉ"] = "u", + ["ಊ"] = "ū", + ["ಋ"] = "ṛ", + ["ೠ"] = "ṝ", + ["ಌ"] = "ḷ", + ["ೡ"] = "ḹ", + ["ಏ"] = "e", + ["ಐ"] = "ai", + ["ಓ"] = "o", + ["ಔ"] = "au", + -- chandrabindu + ["ಁ"] = "m̐", -- until a better method is found + -- anusvara + ["ಂ"] = "ṃ", -- until a better method is found + -- visarga + ["ಃ"] = "ḥ", + -- avagraha + ["ಽ"] = "’", + -- numerals + ["೦"] = "0", + ["೧"] = "1", + ["೨"] = "2", + ["೩"] = "3", + ["೪"] = "4", + ["೫"] = "5", + ["೬"] = "6", + ["೭"] = "7", + ["೮"] = "8", + ["೯"] = "9", + -- punctuation + ["॥"] = ".", -- double danda + ["।"] = ".", -- danda + -- Vedic extensions + ["ೱ"] = "x", + ["ೲ"] = "f", + -- Om + ["ಓಂ"] = "oṃ", + -- reconstructed + ["*"] = "" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([ಕಖಗಘಙಚಛಜಝಞಟಠಡಢಣತಥದಧನಪಫಬಭಮಯರಲವಳಶಷಸಹ])" .. "([ಾಿೀುೂೃೄೢೣೇೈೋೌ್]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sa-Modi-translit.lua b/wiktra/wikt/translit/sa-Modi-translit.lua new file mode 100644 index 0000000..739ecf7 --- /dev/null +++ b/wiktra/wikt/translit/sa-Modi-translit.lua @@ -0,0 +1,68 @@ +local export = {} + +local consonants = {["𑘎"] = "k", ["𑘏"] = "kh", ["𑘐"] = "g", ["𑘑"] = "gh", ["𑘒"] = "ṅ", ["𑘓"] = "c", ["𑘔"] = "ch", ["𑘕"] = "j", ["𑘖"] = "jh", ["𑘗"] = "ñ", ["𑘘"] = "ṭ", ["𑘙"] = "ṭh", ["𑘚"] = "ḍ", ["𑘛"] = "ḍh", ["𑘜"] = "ṇ", ["𑘝"] = "t", ["𑘞"] = "th", ["𑘟"] = "d", ["𑘠"] = "dh", ["𑘡"] = "n", ["𑘢"] = "p", ["𑘣"] = "ph", ["𑘤"] = "b", ["𑘥"] = "bh", ["𑘦"] = "m", ["𑘧"] = "y", ["𑘨"] = "r", ["𑘩"] = "l", ["𑘪"] = "v", ["𑘯"] = "ḷ", ["𑘫"] = "ś", ["𑘬"] = "ṣ", ["𑘭"] = "s", ["𑘮"] = "h"} + +local diacritics = {["𑘰"] = "ā", ["𑘱"] = "i", ["𑘲"] = "ī", ["𑘳"] = "u", ["𑘴"] = "ū", ["𑘵"] = "ṛ", ["𑘶"] = "ṝ", ["𑘷"] = "ḷ", ["𑘸"] = "ḹ", ["𑘹"] = "e", ["𑘺"] = "ai", ["𑘻"] = "o", ["𑘼"] = "au", ["𑘿"] = ""} + +local tt = { + -- vowels + ["𑘀"] = "a", + ["𑘁"] = "ā", + ["𑘂"] = "i", + ["𑘃"] = "ī", + ["𑘄"] = "u", + ["𑘅"] = "ū", + ["𑘆"] = "ṛ", + ["𑘇"] = "ṝ", + ["𑘈"] = "ḷ", + ["𑘉"] = "ḹ", + ["𑘊"] = "e", + ["𑘋"] = "ai", + ["𑘌"] = "o", + ["𑘍"] = "au", + -- chandrabindu + ["𑙀"] = "m̐", -- until a better method is found + -- anusvara + ["𑘽"] = "ṃ", -- until a better method is found + -- visarga + ["𑘾"] = "ḥ", + -- avagraha + ["ऽ"] = "’", + -- numerals + ["𑙐"] = "0", + ["𑙑"] = "1", + ["𑙒"] = "2", + ["𑙓"] = "3", + ["𑙔"] = "4", + ["𑙕"] = "5", + ["𑙖"] = "6", + ["𑙗"] = "7", + ["𑙘"] = "8", + ["𑙙"] = "9", + -- punctuation + ["𑙂"] = ".", -- double danda + ["𑙁"] = ".", -- danda + -- Vedic extensions + ["ᳵ"] = "x", + ["ᳶ"] = "f", + -- Om + ["𑘌𑘦𑘿"] = "oṃ", + -- reconstructed + ["*"] = "" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([𑘎𑘏𑘐𑘑𑘒𑘓𑘔𑘕𑘖𑘗𑘘𑘙𑘚𑘛𑘜𑘝𑘞𑘟𑘠𑘡𑘢𑘣𑘤𑘥𑘦𑘧𑘨𑘩𑘪𑘫𑘬𑘭𑘮𑘯])" .. "([𑘰𑘱𑘲𑘳𑘴𑘵𑘶𑘷𑘸𑘹𑘺𑘻𑘼𑘿]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sa-Orya-translit.lua b/wiktra/wikt/translit/sa-Orya-translit.lua new file mode 100644 index 0000000..aae1491 --- /dev/null +++ b/wiktra/wikt/translit/sa-Orya-translit.lua @@ -0,0 +1,67 @@ +local export = {} + +local consonants = {["କ"] = "k", ["ଖ"] = "kh", ["ଗ"] = "g", ["ଘ"] = "gh", ["ଙ"] = "ṅ", ["ଚ"] = "c", ["ଛ"] = "ch", ["ଜ"] = "j", ["ଝ"] = "jh", ["ଞ"] = "ñ", ["ଟ"] = "ṭ", ["ଠ"] = "ṭh", ["ଡ"] = "ḍ", ["ଢ"] = "ḍh", ["ଣ"] = "ṇ", ["ତ"] = "t", ["ଥ"] = "th", ["ଦ"] = "d", ["ଧ"] = "dh", ["ନ"] = "n", ["ପ"] = "p", ["ଫ"] = "ph", ["ବ"] = "b", ["ଭ"] = "bh", ["ମ"] = "m", ["ଯ"] = "y", ["ୟ"] = "y", ["ର"] = "r", ["ଲ"] = "l", ["ଳ"] = "ḷ", ["ଵ"] = "v", ["ଶ"] = "ś", ["ଷ"] = "ṣ", ["ସ"] = "s", ["ହ"] = "h"} + +local diacritics = {["ା"] = "ā", ["ି"] = "i", ["ୀ"] = "ī", ["ୁ"] = "u", ["ୂ"] = "ū", ["ୃ"] = "ṛ", ["ୄ"] = "ṝ", ["ୢ"] = "ḷ", ["ୣ"] = "ḹ", ["େ"] = "e", ["ୈ"] = "ai", ["ୋ"] = "o", ["ୌ"] = "au", ["୍"] = ""} + +local tt = { + -- vowels + ["ଅ"] = "a", + ["ଆ"] = "ā", + ["ଇ"] = "i", + ["ଈ"] = "ī", + ["ଉ"] = "u", + ["ଊ"] = "ū", + ["ଋ"] = "ṛ", + ["ୠ"] = "ṝ", + ["ଌ"] = "ḷ", + ["ୡ"] = "ḹ", + ["ଏ"] = "e", + ["ଐ"] = "ai", + ["ଓ"] = "o", + ["ଔ"] = "au", + -- chandrabindu + ["ଁ"] = "m̐", -- until a better method is found + -- anusvara + ["ଂ"] = "ṃ", -- until a better method is found + -- visarga + ["ଃ"] = "ḥ", + -- avagraha + ["ଽ"] = "’", + -- numerals + ["૦"] = "୦", + ["૧"] = "୧", + ["૨"] = "୨", + ["૩"] = "୩", + ["૪"] = "୪", + ["૫"] = "୫", + ["૬"] = "୬", + ["૭"] = "୭", + ["૮"] = "୮", + ["૯"] = "୯", + -- punctuation + ["॥"] = ".", -- double danda + ["।"] = ".", -- danda + -- Vedic extensions + -- ['ᳵ']='x', ['ᳶ']='f', + -- Om + ["ଓମ୍"] = "oṃ", + -- reconstructed + ["*"] = "" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([କଖଗଘଙଚଛଜଝଞଟଠଡଢଣତଥଦଧନପଫବଭମଯୟରଲଳଵଶଷସହ])" .. "([ାିୀୁୂୃୄୢୣେୈୋୌ୍]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sa-translit.lua b/wiktra/wikt/translit/sa-translit.lua new file mode 100644 index 0000000..6b4a030 --- /dev/null +++ b/wiktra/wikt/translit/sa-translit.lua @@ -0,0 +1,71 @@ +local export = {} + +local consonants = {["क"] = "k", ["ख"] = "kh", ["ग"] = "g", ["घ"] = "gh", ["ङ"] = "ṅ", ["च"] = "c", ["छ"] = "ch", ["ज"] = "j", ["झ"] = "jh", ["ञ"] = "ñ", ["ट"] = "ṭ", ["ठ"] = "ṭh", ["ड"] = "ḍ", ["ढ"] = "ḍh", ["ण"] = "ṇ", ["त"] = "t", ["थ"] = "th", ["द"] = "d", ["ध"] = "dh", ["न"] = "n", ["प"] = "p", ["फ"] = "ph", ["ब"] = "b", ["भ"] = "bh", ["म"] = "m", ["य"] = "y", ["र"] = "r", ["ल"] = "l", ["व"] = "v", ["ळ"] = "ḷ", ["श"] = "ś", ["ष"] = "ṣ", ["स"] = "s", ["ह"] = "h"} + +local diacritics = {["ा"] = "ā", ["ि"] = "i", ["ी"] = "ī", ["ु"] = "u", ["ू"] = "ū", ["ृ"] = "ṛ", ["ॄ"] = "ṝ", ["ॢ"] = "ḷ", ["ॣ"] = "ḹ", ["े"] = "e", ["ै"] = "ai", ["ो"] = "o", ["ौ"] = "au", ["्"] = ""} + +local tt = { + -- vowels + ["अ"] = "a", + ["आ"] = "ā", + ["इ"] = "i", + ["ई"] = "ī", + ["उ"] = "u", + ["ऊ"] = "ū", + ["ऋ"] = "ṛ", + ["ॠ"] = "ṝ", + ["ऌ"] = "ḷ", + ["ॡ"] = "ḹ", + ["ए"] = "e", + ["ऐ"] = "ai", + ["ओ"] = "o", + ["औ"] = "au", + -- chandrabindu + ["ँ"] = "m̐", -- until a better method is found + -- anusvara + ["ं"] = "ṃ", -- until a better method is found + -- visarga + ["ः"] = "ḥ", + -- avagraha + ["ऽ"] = "’", + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + -- punctuation + ["॥"] = ".", -- double danda + ["।"] = ".", -- danda + -- Vedic extensions + ["ᳵ"] = "x", + ["ᳶ"] = "f", + -- Om + ["ॐ"] = "oṃ", + -- reconstructed + ["*"] = "", + -- accentuation (needs to be handled) + ["॑"] = "", + ["॒"] = "" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([कखगघङचछजझञटठडढणतथदधनपफबभमयरलळवशषसह])" .. "([ािीुूृॄॢॣेैोौ्]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sa-utilities/translit.lua b/wiktra/wikt/translit/sa-utilities/translit.lua new file mode 100644 index 0000000..b4f7029 --- /dev/null +++ b/wiktra/wikt/translit/sa-utilities/translit.lua @@ -0,0 +1,36 @@ +local export = {} + +local lang = require("languages").getByCode("sa") + +function export.Deva_to_SLP_template(frame) + local params = {[1] = {required = true}} + local args = require("parameters").process(frame:getParent().args, params) + return require("sa-utilities/translit/Deva-to-SLP1").tr(args[1]) +end + +--[=[ +Converts Devanagari or IAST to SLP1. +]=] +function export.detect_to_SLP(text) + local sc = require("scripts").findBestScript(text, lang):getCode() + if sc == "None" then sc = "IAST" end + return require("sa-utilities/translit/" .. sc .. "-to-SLP1").tr(text) +end + +function export.retrieve_tr_modules(sc) + local tr = require("sa-utilities/translit/" .. sc .. "-to-SLP1").tr + local reverse_tr = require("sa-utilities/translit/SLP1-to-" .. sc).tr + local IAST_tr = require("sa-utilities/translit/IAST-to-SLP1").tr + + local safe_tr = function(text) + local sc = require("scripts").findBestScript(text, lang):getCode() + if sc == "None" then + return IAST_tr(text) + else + return tr(text) + end + end + return safe_tr, reverse_tr +end + +return export diff --git a/wiktra/wikt/translit/sa-utilities/translit/Deva-to-IAST.lua b/wiktra/wikt/translit/sa-utilities/translit/Deva-to-IAST.lua new file mode 100644 index 0000000..d7013e3 --- /dev/null +++ b/wiktra/wikt/translit/sa-utilities/translit/Deva-to-IAST.lua @@ -0,0 +1,66 @@ +local export = {} + +local consonants = {["क"] = "k", ["ख"] = "kh", ["ग"] = "g", ["घ"] = "gh", ["ङ"] = "ṅ", ["च"] = "c", ["छ"] = "ch", ["ज"] = "j", ["झ"] = "jh", ["ञ"] = "ñ", ["ट"] = "ṭ", ["ठ"] = "ṭh", ["ड"] = "ḍ", ["ढ"] = "ḍh", ["ण"] = "ṇ", ["त"] = "t", ["थ"] = "th", ["द"] = "d", ["ध"] = "dh", ["न"] = "n", ["प"] = "p", ["फ"] = "ph", ["ब"] = "b", ["भ"] = "bh", ["म"] = "m", ["य"] = "y", ["र"] = "r", ["ल"] = "l", ["व"] = "v", ["ळ"] = "ḷ", ["श"] = "ś", ["ष"] = "ṣ", ["स"] = "s", ["ह"] = "h"} + +local diacritics = {["्"] = "", ["ा"] = "ā", ["ि"] = "i", ["ी"] = "ī", ["ु"] = "u", ["ू"] = "ū", ["ृ"] = "ṛ", ["ॄ"] = "ṝ", ["ॢ"] = "ḷ", ["ॣ"] = "ḹ", ["े"] = "e", ["ै"] = "ai", ["ो"] = "o", ["ौ"] = "au"} + +local tt = { + -- vowels + ["अ"] = "a", + ["आ"] = "ā", + ["इ"] = "i", + ["ई"] = "ī", + ["उ"] = "u", + ["ऊ"] = "ū", + ["ऋ"] = "ṛ", + ["ॠ"] = "ṝ", + ["ऌ"] = "ḷ", + ["ॡ"] = "ḹ", + ["ए"] = "e", + ["ऐ"] = "ai", + ["ओ"] = "o", + ["औ"] = "au", + -- chandrabindu + ["ँ"] = "m̐", -- until a better method is found + -- anusvara + ["ं"] = "ṃ", -- until a better method is found + -- visarga + ["ः"] = "ḥ", + -- avagraha + ["ऽ"] = "’", + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + -- punctuation + ["॥"] = ".", -- double danda + ["।"] = ".", -- danda + -- Vedic extensions + ["ᳵ"] = "x", + ["ᳶ"] = "f", + -- Om + ["ॐ"] = "oṃ" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([कखगघङचछजझञटठडढणतथदधनपफबभमयरलळवशषसह])" .. "([ािीुूृॄॢॣेैोौ्]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sa-utilities/translit/Deva-to-SLP1.lua b/wiktra/wikt/translit/sa-utilities/translit/Deva-to-SLP1.lua new file mode 100644 index 0000000..de669cb --- /dev/null +++ b/wiktra/wikt/translit/sa-utilities/translit/Deva-to-SLP1.lua @@ -0,0 +1,67 @@ +local export = {} + +local consonants = {["क"] = "k", ["ख"] = "K", ["ग"] = "g", ["घ"] = "G", ["ङ"] = "N", ["च"] = "c", ["छ"] = "C", ["ज"] = "j", ["झ"] = "J", ["ञ"] = "Y", ["ट"] = "w", ["ठ"] = "W", ["ड"] = "q", ["ढ"] = "Q", ["ण"] = "R", ["त"] = "t", ["थ"] = "T", ["द"] = "d", ["ध"] = "D", ["न"] = "n", ["प"] = "p", ["फ"] = "P", ["ब"] = "b", ["भ"] = "B", ["म"] = "m", ["य"] = "y", ["र"] = "r", ["ल"] = "l", ["व"] = "v", ["ळ"] = "L", ["श"] = "S", ["ष"] = "z", ["स"] = "s", ["ह"] = "h"} + +local diacritics = {["्"] = "", ["ा"] = "A", ["ि"] = "i", ["ी"] = "I", ["ु"] = "u", ["ू"] = "U", ["ृ"] = "f", ["ॄ"] = "F", ["ॢ"] = "x", ["ॣ"] = "X", ["े"] = "e", ["ै"] = "E", ["ो"] = "o", ["ौ"] = "O"} + +local tt = { + -- vowels + ["अ"] = "a", + ["आ"] = "A", + ["इ"] = "i", + ["ई"] = "I", + ["उ"] = "u", + ["ऊ"] = "U", + ["ऋ"] = "f", + ["ॠ"] = "F", + ["ऌ"] = "x", + ["ॡ"] = "X", + ["ए"] = "e", + ["ऐ"] = "E", + ["ओ"] = "o", + ["औ"] = "O", + -- chandrabindu + ["ँ"] = "~", + -- anusvara + ["ं"] = "M", + -- visarga + ["ः"] = "H", + -- avagraha + ["ऽ"] = "", + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + -- punctuation + ["॥"] = ".", -- double danda + ["।"] = ".", -- danda + -- Vedic extensions + ["ᳵ"] = "Z", + ["ᳶ"] = "V", + -- Om + ["ॐ"] = "oM", + ["◌॑"] = "/" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([कखगघङचछजझञटठडढणतथदधनपफबभमयरलळवशषसह])" .. "([ािीुूृॄॢॣेैोौ्]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sa-utilities/translit/IAST-to-SLP1.lua b/wiktra/wikt/translit/sa-utilities/translit/IAST-to-SLP1.lua new file mode 100644 index 0000000..5a3d78e --- /dev/null +++ b/wiktra/wikt/translit/sa-utilities/translit/IAST-to-SLP1.lua @@ -0,0 +1,92 @@ +local export = {} + +local U = mw.ustring.char + +local acute = U(0x301) -- combining acute +local grave = U(0x300) -- combining grave + +local composed_accents = {["á"] = "a/", ["à"] = "a\\", ["í"] = "i/", ["ì"] = "i\\", ["ú"] = "u/", ["ù"] = "u\\", ["é"] = "e/", ["è"] = "e\\", ["ó"] = "o/", ["ò"] = "o\\", [acute] = "/", [grave] = "\\"} + +local normalize_accents = {["á"] = "/", ["à"] = "\\"} + +local function change_accent(text) + -- normalize accent placement + text = mw.ustring.gsub(text, "([áà])([iu])", function(a, iu) return "a" .. iu .. normalize_accents[a] end) + text = mw.ustring.gsub(text, ".", composed_accents) + return text +end + +local function detect_lateral_flap(text) + -- an "ḷ" initially before a non-vowel or between two non-vowels vowel is assumed to be "x" + text = mw.ustring.gsub(text, "^ḷ([^aāiīuūeoṛṝḷḹ])", "x%1") + text = mw.ustring.gsub(text, "([^aāiīuūeoṛṝḷḹ])ḷ([^aāiīuūeoṛṝḷḹ])", "%1x%2") + -- all others are assumed to be "L" + text = mw.ustring.gsub(text, "ḷ", "L") + return text +end + +local tt = { + [1] = { + -- consonants + ["kh"] = "K", + ["gh"] = "G", + ["ch"] = "C", + ["jh"] = "J", + ["ṭh"] = "W", + ["ḍh"] = "Q", + ["th"] = "T", + ["dh"] = "D", + ["ph"] = "P", + ["bh"] = "B" + }, + [2] = { + -- vowels + ["ai"] = "E", + ["au"] = "O" + }, + [3] = { + -- chandrabindu + ["m̐"] = "~" + }, + [4] = { + + -- consonants + ["ṅ"] = "N", + ["ñ"] = "Y", + ["ṭ"] = "w", + ["ḍ"] = "q", + ["ṇ"] = "R", + ["ś"] = "S", + ["ṣ"] = "z", -- ["ḷ"] = "L" + + -- vowels + ["ā"] = "A", + ["ī"] = "I", + ["ū"] = "U", + ["ṛ"] = "f", + ["ṝ"] = "F", + ["ḹ"] = "X", -- ["ḷ"] = "x", + + -- avagraha + ["’"] = "", + + -- other + ["ṃ"] = "M", + ["ḥ"] = "H", + ["x"] = "Z", + ["f"] = "V" + } +} + +function export.tr(text, lang, sc) + text = mw.ustring.toNFC(text) + text = change_accent(text) + text = detect_lateral_flap(text) + text = mw.ustring.gsub(text, "[kgcjṭḍtdpb]h", tt[1]) + text = mw.ustring.gsub(text, "a[iu]", tt[2]) + text = mw.ustring.gsub(text, "m̐", tt[3]) + text = mw.ustring.gsub(text, ".", tt[4]) + return text +end + +return export diff --git a/wiktra/wikt/translit/sa-utilities/translit/SLP1-to-Deva.lua b/wiktra/wikt/translit/sa-utilities/translit/SLP1-to-Deva.lua new file mode 100644 index 0000000..3537bcf --- /dev/null +++ b/wiktra/wikt/translit/sa-utilities/translit/SLP1-to-Deva.lua @@ -0,0 +1,104 @@ +local export = {} + +local consonant_list = "kKgGNcCjJYwWqQRtTdDnpPbBmyrlLvSzsh" +local consonant = "[" .. consonant_list .. "]" +local vowel_list = "aAiIuUfFxXeEoO" +local vowel = "[" .. vowel_list .. "]" + +local U = mw.ustring.char + +local virAma = U(0x94D) + +local diacritics = {["a"] = "", ["A"] = "ा", ["i"] = "ि", ["I"] = "ी", ["u"] = "ु", ["U"] = "ू", ["f"] = "ृ", ["F"] = "ॄ", ["x"] = "ॢ", ["X"] = "ॣ", ["e"] = "े", ["E"] = "ै", ["o"] = "ो", ["O"] = "ौ"} + +local tt = { + -- consonants + ["k"] = "क", + ["K"] = "ख", + ["g"] = "ग", + ["G"] = "घ", + ["N"] = "ङ", + ["c"] = "च", + ["C"] = "छ", + ["j"] = "ज", + ["J"] = "झ", + ["Y"] = "ञ", + ["w"] = "ट", + ["W"] = "ठ", + ["q"] = "ड", + ["Q"] = "ढ", + ["R"] = "ण", + ["t"] = "त", + ["T"] = "थ", + ["d"] = "द", + ["D"] = "ध", + ["n"] = "न", + ["p"] = "प", + ["P"] = "फ", + ["b"] = "ब", + ["B"] = "भ", + ["m"] = "म", + ["y"] = "य", + ["r"] = "र", + ["l"] = "ल", + ["v"] = "व", + ["L"] = "ळ", + ["S"] = "श", + ["z"] = "ष", + ["s"] = "स", + ["h"] = "ह", + -- vowels + ["a"] = "अ", + ["A"] = "आ", + ["i"] = "इ", + ["I"] = "ई", + ["u"] = "उ", + ["U"] = "ऊ", + ["f"] = "ऋ", + ["F"] = "ॠ", + ["x"] = "ऌ", + ["X"] = "ॡ", + ["e"] = "ए", + ["E"] = "ऐ", + ["o"] = "ओ", + ["O"] = "औ", + -- chandrabindu + ["~"] = "ँ", + -- anusvara + ["M"] = "ं", + -- visarga + ["H"] = "ः", + -- avagraha + ["'"] = "ऽ", + -- numerals + ["0"] = "०", + ["1"] = "१", + ["2"] = "२", + ["3"] = "३", + ["4"] = "४", + ["5"] = "५", + ["6"] = "६", + ["7"] = "७", + ["8"] = "८", + ["9"] = "९", + -- Vedic extensions + ["Z"] = "ᳵ", + ["V"] = "ᳶ", + ["/"] = "", + ["\\"] = "" +} + +function export.tr(text, lang, sc) + -- capture twice so that all adjacent pairs are covered + text = mw.ustring.gsub(text, "(" .. consonant .. ")" .. "(" .. consonant .. ")", "%1" .. virAma .. "%2") + text = mw.ustring.gsub(text, "(" .. consonant .. ")" .. "(" .. consonant .. ")", "%1" .. virAma .. "%2") + -- whitespace and end of string + text = mw.ustring.gsub(text, "(" .. consonant .. ")%f[%s%z]", "%1" .. virAma) + -- all vowel diacritics + text = mw.ustring.gsub(text, "(" .. consonant .. ")(" .. vowel .. ")", function(c, v) return c .. diacritics[v] end) + -- everything else + text = mw.ustring.gsub(text, ".", tt) + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/sa-utilities/translit/SLP1-to-IAST.lua b/wiktra/wikt/translit/sa-utilities/translit/SLP1-to-IAST.lua new file mode 100644 index 0000000..a78fd9f --- /dev/null +++ b/wiktra/wikt/translit/sa-utilities/translit/SLP1-to-IAST.lua @@ -0,0 +1,72 @@ +local export = {} + +local U = mw.ustring.char + +local acute = U(0x301) -- combining acute +local grave = U(0x300) -- combining grave + +local tt = { + [1] = { + -- consonants + ["K"] = "kh", + ["G"] = "gh", + ["N"] = "ṅ", + ["C"] = "ch", + ["J"] = "jh", + ["Y"] = "ñ", + ["w"] = "ṭ", + ["W"] = "ṭh", + ["q"] = "ḍ", + ["Q"] = "ḍh", + ["R"] = "ṇ", + ["T"] = "th", + ["D"] = "dh", + ["P"] = "ph", + ["B"] = "bh", + ["L"] = "ḷ", + ["S"] = "ś", + ["z"] = "ṣ", + + -- vowels + ["A"] = "ā", + ["I"] = "ī", + ["U"] = "ū", + ["f"] = "ṛ", + ["F"] = "ṝ", + ["x"] = "ḷ", + ["X"] = "ḹ", + ["E"] = "ai", + ["O"] = "au", + + -- chandrabindu + ["~"] = "m̐", -- until a better method is found + + -- anusvara + ["M"] = "ṃ", -- until a better method is found + + -- visarga + ["H"] = "ḥ", + + -- avagraha + ["'"] = "’", + + -- Vedic extensions + ["/"] = acute, + ["\\"] = grave + }, + [2] = { + -- Vedic extensions + ["Z"] = "x", + ["V"] = "f" + } +} + +function export.tr(text, lang, sc) + + text = mw.ustring.gsub(text, ".", tt[1]) + text = mw.ustring.gsub(text, ".", tt[2]) + + return mw.ustring.toNFC(text) +end + +return export diff --git a/wiktra/wikt/translit/sah-translit.lua b/wiktra/wikt/translit/sah-translit.lua new file mode 100644 index 0000000..56a2ebe --- /dev/null +++ b/wiktra/wikt/translit/sah-translit.lua @@ -0,0 +1,99 @@ +local export = {} + +local tab = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["Г"] = "G", + ["г"] = "g", + ["Ҕ"] = "Ğ", + ["ҕ"] = "ğ", + ["Д"] = "D", + ["д"] = "d", + ["И"] = "İ", + ["и"] = "i", + ["Й"] = "Y", + ["й"] = "y", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["Ҥ"] = "Ŋ", + ["ҥ"] = "ŋ", + ["О"] = "O", + ["о"] = "o", + ["Ө"] = "Ö", + ["ө"] = "ö", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Һ"] = "H", + ["һ"] = "h", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ү"] = "Ü", + ["ү"] = "ü", + ["Х"] = "X", + ["х"] = "x", + ["Ч"] = "Ç", + ["ч"] = "ç", + ["Ш"] = "Ş", + ["ш"] = "ş", + ["Ы"] = "I", + ["ы"] = "ı", + ["Э"] = "E", + ["э"] = "e", + -- non-native letters + ["В"] = "V", + ["в"] = "v", + ["Е"] = "E", + ["е"] = "e", + ["Ё"] = "Yo", + ["ё"] = "yo", + ["Ж"] = "J", + ["ж"] = "j", + ["З"] = "Z", + ["з"] = "z", + ["Ф"] = "F", + ["ф"] = "f", + ["Ц"] = "Ts", + ["ц"] = "ts", + ["Щ"] = "Şç", + ["щ"] = "şç", + ["Ъ"] = "ʺ", + ["ъ"] = "ʺ", + ["Ь"] = "’", + ["ь"] = "’", + ["Ю"] = "Yu", + ["ю"] = "yu", + ["Я"] = "Ya", + ["я"] = "ya" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "Дь", "C") + text = mw.ustring.gsub(text, "дь", "c") + text = mw.ustring.gsub(text, "Нь", "Ń") + text = mw.ustring.gsub(text, "нь", "ń") + + -- е after a vowel or at the beginning of a word becomes ye + text = mw.ustring.gsub(text, "([АОӨУҮЫЕЯЁЮИЕЪЬаоөуүыэяёюиеъь%A][́̀]?)е", "%1je") + text = mw.ustring.gsub(text, "^Е", "Ye") + text = mw.ustring.gsub(text, "^е", "ye") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е", "%1Ye") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е", "%1ye") + + return (mw.ustring.gsub(text, ".", tab)) +end + +return export diff --git a/wiktra/wikt/translit/sarb-translit.lua b/wiktra/wikt/translit/sarb-translit.lua new file mode 100644 index 0000000..3e7aca5 --- /dev/null +++ b/wiktra/wikt/translit/sarb-translit.lua @@ -0,0 +1,21 @@ +local export = {} + +local correspondences = {["𐩠"] = "h", ["𐩡"] = "l", ["𐩢"] = "ḥ", ["𐩣"] = "m", ["𐩤"] = "q", ["𐩥"] = "w", ["𐩦"] = "s²", ["𐩧"] = "r", ["𐩨"] = "b", ["𐩩"] = "t", ["𐩪"] = "s¹", ["𐩫"] = "k", ["𐩬"] = "n", ["𐩭"] = "ḫ", ["𐩮"] = "ṣ", ["𐩯"] = "s³", ["𐩰"] = "f", ["𐩱"] = "ʾ", ["𐩲"] = "ʿ", ["𐩳"] = "ḍ", ["𐩴"] = "g", ["𐩵"] = "d", ["𐩶"] = "ġ", ["𐩷"] = "ṭ", ["𐩸"] = "z", ["𐩹"] = "ḏ", ["𐩺"] = "y", ["𐩻"] = "ṯ", ["𐩼"] = "ẓ", ["𐩽"] = " "} + +local numbers = {["𐩽"] = "1", ["𐩭"] = "5", ["𐩲"] = "10", ["𐩾"] = "50", ["𐩣"] = "100", ["𐩱"] = "1000"} + +function export.tr(text, lang, sc) + -- Interpret numbers. + -- Will not work for thousands! + text = text:gsub("𐩿(..-)𐩿", function(number) + local value = 0 + for digit in mw.ustring.gmatch(number, ".") do value = value + numbers[digit] or error("The character " .. digit .. " in " .. number .. " does not have a numeric value.") end + return value + end) + + text = mw.ustring.gsub(text, ".", correspondences) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sat-translit.lua b/wiktra/wikt/translit/sat-translit.lua new file mode 100644 index 0000000..ac26217 --- /dev/null +++ b/wiktra/wikt/translit/sat-translit.lua @@ -0,0 +1,70 @@ +local export = {} + +local tt = { + ["ᱛ"] = "t", + ["ᱜ"] = "g", + ["ᱝ"] = "ṅ", + ["ᱞ"] = "l", + ["ᱠ"] = "k", + ["ᱡ"] = "j", + ["ᱢ"] = "m", + ["ᱣ"] = "w", + ["ᱥ"] = "s", + ["ᱦ"] = "h", + ["ᱧ"] = "ñ", + ["ᱨ"] = "r", + ["ᱪ"] = "c", + ["ᱫ"] = "d", + ["ᱬ"] = "ṇ", + ["ᱭ"] = "y", + ["ᱯ"] = "p", + ["ᱰ"] = "ḍ", + ["ᱱ"] = "n", + ["ᱲ"] = "ṛ", + ["ᱴ"] = "ṭ", + ["ᱵ"] = "b", + ["ᱶ"] = "v", + ["ᱷ"] = "ʰ", + + -- vowels + ["ᱚ"] = "ô", + ["ᱟ"] = "a", + ["ᱤ"] = "i", + ["ᱩ"] = "u", + ["ᱮ"] = "e", + ["ᱳ"] = "o", + ["ᱚᱹ"] = "ô", + ["ᱟᱹ"] = "ə", + ["ᱮᱹ"] = "ɛ", + ["ᱚᱺ"] = "ỗ", + ["ᱟᱺ"] = "ə̃", + ["ᱮᱺ"] = "ɛ̃", + + -- special stuff + ["ᱸ"] = "̃", + + -- numerals + ["᱐"] = "0", + ["᱑"] = "1", + ["᱒"] = "2", + ["᱓"] = "3", + ["᱔"] = "4", + ["᱕"] = "5", + ["᱖"] = "6", + ["᱗"] = "7", + ["᱘"] = "8", + ["᱙"] = "9", + + -- punctuation + ["᱾"] = ".", + ["᱿"] = "." +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([᱐᱑᱒᱓᱔᱕᱖᱗᱘᱙ᱚᱛᱜᱝᱞᱟᱠᱡᱢᱣᱤᱥᱦᱧᱨᱩᱪᱫᱬᱭᱮᱯᱰᱱᱲᱳᱴᱵᱶᱷᱸᱻᱼᱽ᱾᱿]ᱹ?ᱺ?)", tt) + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/saz-translit.lua b/wiktra/wikt/translit/saz-translit.lua new file mode 100644 index 0000000..f9ebe5c --- /dev/null +++ b/wiktra/wikt/translit/saz-translit.lua @@ -0,0 +1,60 @@ +local export = {} + +local consonants = {["ꢒ"] = "k", ["ꢓ"] = "kh", ["ꢔ"] = "g", ["ꢕ"] = "gh", ["ꢖ"] = "ṅ", ["ꢗ"] = "c", ["ꢘ"] = "ch", ["ꢙ"] = "j", ["ꢚ"] = "jh", ["ꢛ"] = "ñ", ["ꢜ"] = "ṭ", ["ꢝ"] = "ṭh", ["ꢞ"] = "ḍ", ["ꢟ"] = "ḍh", ["ꢠ"] = "ṇ", ["ꢡ"] = "t", ["ꢢ"] = "th", ["ꢣ"] = "d", ["ꢤ"] = "dh", ["ꢥ"] = "n", ["ꢦ"] = "p", ["ꢧ"] = "ph", ["ꢨ"] = "b", ["ꢩ"] = "bh", ["ꢪ"] = "m", ["ꢫ"] = "y", ["ꢬ"] = "r", ["ꢭ"] = "l", ["ꢮ"] = "v", ["ꢯ"] = "ś", ["ꢰ"] = "ṣ", ["ꢱ"] = "s", ["ꢲ"] = "h", ["ꢳ"] = "ḷ"} + +local diacritics = {["ꢵ"] = "ā", ["ꢶ"] = "i", ["ꢷ"] = "ī", ["ꢸ"] = "u", ["ꢹ"] = "ū", ["ꢺ"] = "ṛ", ["ꢻ"] = "ṝ", ["ꢼ"] = "ḷ", ["ꢽ"] = "ḹ", ["ꢾ"] = "e", ["ꢿ"] = "ē", ["ꣀ"] = "ai", ["ꣁ"] = "o", ["ꣂ"] = "ō", ["ꣃ"] = "au", ["꣄"] = "", ["ꢴ"] = "h"} + +local nonconsonants = { + -- vowels + ["ꢂ"] = "a", + ["ꢃ"] = "ā", + ["ꢄ"] = "i", + ["ꢅ"] = "ī", + ["ꢆ"] = "u", + ["ꢇ"] = "ū", + ["ꢈ"] = "ṛ", + ["ꢉ"] = "ṝ", + ["ꢊ"] = "ḷ", + ["ꢋ"] = "ḹ", + ["ꢌ"] = "e", + ["ꢍ"] = "ē", + ["ꢎ"] = "ai", + ["ꢏ"] = "o", + ["ꢐ"] = "ō", + ["ꢑ"] = "au", + -- other symbols + ["ꢀ"] = "ṃ", -- anusvara + ["ꢁ"] = "ḥ", -- visarga + ["ꣅ"] = "◌̃", + ["꣎"] = ".", + -- digits + ["꣐"] = "0", + ["꣑"] = "1", + ["꣒"] = "2", + ["꣓"] = "3", + ["꣔"] = "4", + ["꣕"] = "5", + ["꣖"] = "6", + ["꣗"] = "7", + ["꣘"] = "8", + ["꣙"] = "9" +} + +-- translit any words or phrases +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([ꢒꢓꢔꢕꢖꢗꢘꢙꢚꢛꢜꢝꢞꢟꢠꢡꢢꢣꢤꢥꢦꢧꢨꢩꢪꢫꢬꢭꢮꢯꢰꢱꢲꢳ])" .. "([ꢵꢶꢷꢸꢹꢺꢻꢼꢽꢾꢿꣀꣁꣂꣃ꣄ꢴ]?)", function(c, d) + -- mw.log('match', c, d) + c = consonants[c] or c + if d == "" then + return c .. "a" + else + return c .. (diacritics[d] or d) + end + end) + + text = mw.ustring.gsub(text, ".", nonconsonants) + + return text +end + +return export diff --git a/wiktra/wikt/translit/script utilities.lua b/wiktra/wikt/translit/script utilities.lua new file mode 100644 index 0000000..a794f7d --- /dev/null +++ b/wiktra/wikt/translit/script utilities.lua @@ -0,0 +1,359 @@ +local export = {} + +--[=[ + Modules used: + [[Module:script utilities/data]] + [[Module:scripts]] + [[Module:senseid]] (only when id's present) + [[Module:string utilities]] (only when hyphens in Korean text or spaces in vertical text) + [[Module:languages]] + [[Module:parameters]] + [[Module:utilities]] + [[Module:debug]] +]=] + +function export.is_Latin_script(sc) + -- Latn, Latf, Latinx, pjt-Latn + return sc:getCode():find("Lat") and true or false +end + +-- Used by [[Template:lang]] +function export.lang_t(frame) + params = {[1] = {}, [2] = {allow_empty = true, default = ""}, ["sc"] = {}, ["face"] = {}, ["class"] = {}} + + local args = require("parameters").process(frame:getParent().args, params) + local NAMESPACE = mw.title.getCurrentTitle().nsText + + local lang = args[1] or (NAMESPACE == "Template" and "und") or error("Language code has not been specified. Please pass parameter 1 to the template.") + lang = require("languages").getByCode(lang) or require("languages").err(lang, 1) + + local text = args[2] + + local sc = args["sc"] + sc = (sc and (require("scripts").getByCode(sc) or error("The script code \"" .. sc .. "\" is not valid.")) or nil) + + local face = args["face"] + + return export.tag_text(text, lang, sc, face, class) +end + +-- Ustring turns on the codepoint-aware string matching. The basic string function +-- should be used for simple sequences of characters, Ustring function for +-- sets – []. +local function trackPattern(text, pattern, tracking, ustring) + local find = ustring and mw.ustring.find or string.find + if pattern and find(text, pattern) then require("debug").track("script/" .. tracking) end +end + +local function track(text, lang, sc) + local U = mw.ustring.char + + if lang and text then + local langCode = lang:getCode() + + -- [[Special:WhatLinksHere/Template:tracking/script/ang/acute]] + if langCode == "ang" then + local decomposed = mw.ustring.toNFD(text) + local acute = U(0x301) + + trackPattern(decomposed, acute, "ang/acute") + + --[=[ + [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-phi]] + [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-theta]] + [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-kappa]] + [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-rho]] + ϑ, ϰ, ϱ, ϕ should generally be replaced with θ, κ, ρ, φ. + ]=] + elseif langCode == "el" or langCode == "grc" then + trackPattern(text, "ϑ", "Greek/wrong-theta") + trackPattern(text, "ϰ", "Greek/wrong-kappa") + trackPattern(text, "ϱ", "Greek/wrong-rho") + trackPattern(text, "ϕ", "Greek/wrong-phi") + + --[=[ + [[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-coronis]] + [[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-smooth-breathing]] + [[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/wrong-apostrophe]] + When spacing coronis and spacing smooth breathing are used as apostrophes, + they should be replaced with right single quotation marks (’). + ]=] + if langCode == "grc" then + trackPattern(text, U(0x1FBD), "Ancient Greek/spacing-coronis") + trackPattern(text, U(0x1FBF), "Ancient Greek/spacing-smooth-breathing") + trackPattern(text, "[" .. U(0x1FBD) .. U(0x1FBF) .. "]", "Ancient Greek/wrong-apostrophe", true) + end + + -- [[Special:WhatLinksHere/Template:tracking/script/Russian/grave-accent]] + elseif langCode == "ru" then + local decomposed = mw.ustring.toNFD(text) + + trackPattern(decomposed, U(0x300), "Russian/grave-accent") + + -- [[Special:WhatLinksHere/Template:tracking/script/Tibetan/trailing-punctuation]] + elseif langCode == "bo" then + trackPattern(text, "[་།]$", "Tibetan/trailing-punctuation", true) + trackPattern(text, "[་།]%]%]$", "Tibetan/trailing-punctuation", true) + + --[=[ + [[Special:WhatLinksHere/Template:tracking/script/Thai/broken-ae]] + [[Special:WhatLinksHere/Template:tracking/script/Thai/broken-am]] + [[Special:WhatLinksHere/Template:tracking/script/Thai/wrong-rue-lue]] + ]=] + elseif langCode == "th" then + trackPattern(text, "เ" .. "เ", "Thai/broken-ae") + trackPattern(text, "ํ[่้๊๋]?า", "Thai/broken-am", true) + trackPattern(text, "[ฤฦ]า", "Thai/wrong-rue-lue", true) + + --[=[ + [[Special:WhatLinksHere/Template:tracking/script/Lao/broken-ae]] + [[Special:WhatLinksHere/Template:tracking/script/Lao/broken-am]] + [[Special:WhatLinksHere/Template:tracking/script/Lao/possible-broken-ho-no]] + [[Special:WhatLinksHere/Template:tracking/script/Lao/possible-broken-ho-mo]] + [[Special:WhatLinksHere/Template:tracking/script/Lao/possible-broken-ho-lo]] + ]=] + elseif langCode == "lo" then + trackPattern(text, "ເ" .. "ເ", "Lao/broken-ae") + trackPattern(text, "ໍ[່້໊໋]?າ", "Lao/broken-am", true) + trackPattern(text, "ຫນ", "Lao/possible-broken-ho-no") + trackPattern(text, "ຫມ", "Lao/possible-broken-ho-mo") + trackPattern(text, "ຫລ", "Lao/possible-broken-ho-lo") + + --[=[ + [[Special:WhatLinksHere/Template:tracking/script/Lü/broken-ae]] + [[Special:WhatLinksHere/Template:tracking/script/Lü/possible-wrong-sequence]] + ]=] + elseif langCode == "khb" then + trackPattern(text, "ᦵ" .. "ᦵ", "Lü/broken-ae") + trackPattern(text, "[ᦀ-ᦫ][ᦵᦶᦷᦺ]", "Lü/possible-wrong-sequence", true) + end + end +end + +-- Apply a function to `text`, but not to the target of wikilinks or to HTML tags. +local function munge_text(text, fn) + local has_html = text:find("<") + local has_two_part_link = text:find("%[%[.*|") + if not has_html and not has_two_part_link then return fn(text) end + + local strutils = require("string utilities") + + local function munge_text_with_html(txt) + local parts = strutils.capturing_split(txt, "(<[^>]->)") + for i = 1, #parts, 2 do parts[i] = fn(parts[i]) end + return table.concat(parts) + end + + if has_two_part_link then + -- The hard case is when both two-part links and HTML tags occur, because crippled Lua patterns + -- don't support alternation. We need to first split on two-part links (which seem more likely + -- to occur), then split odd-numbered fragments on HTML tags, then apply the function to + -- odd-numbered subfragments. This is unlikely to be very efficient, but should occur rarely. + local parts = strutils.capturing_split(text, "(%[%[[^%[%]|]-|)") + for i = 1, #parts, 2 do + if has_html then + parts[i] = munge_text_with_html(parts[i]) + else + parts[i] = fn(parts[i]) + end + end + return table.concat(parts) + else -- HTML tags only + return munge_text_with_html(text) + end +end + +-- Wrap text in the appropriate HTML tags with language and script class. +function export.tag_text(text, lang, sc, face, class, id) + if not sc then sc = require("scripts").findBestScript(text, lang) end + + track(text, lang, sc) + + -- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom. + if sc and sc:getDirection() == "down" and text:find(" ") then + text = munge_text(text, function(txt) + -- having extra parentheses makes sure only the first return value gets through + return (txt:gsub(" +", "
")) + end) + end + + -- Hack Korean text to remove hyphens. This should be handled in a more general fashion, but needs to + -- be efficient by not doing anything if no hyphens are present, and currently this is the only + -- language needing such processing. + if lang:getCode() == "ko" and text:find("%-") then + text = munge_text(text, function(txt) + -- having extra parentheses makes sure only the first return value gets through + return (txt:gsub("%-", "")) + end) + end + + if sc:getCode() == "Imag" then face = nil end + + local function class_attr(classes) + table.insert(classes, 1, sc:getCode()) + if class and class ~= "" then table.insert(classes, class) end + return "class=\"" .. table.concat(classes, " ") .. "\"" + end + + local function tag_attr(...) + local output = {} + if id then table.insert(output, "id=\"" .. require("senseid").anchor(lang, id) .. "\"") end + + table.insert(output, class_attr({...})) + + if lang then table.insert(output, "lang=\"" .. lang:getCode() .. "\"") end + + return table.concat(output, " ") + end + + if face == "hypothetical" then + -- [[Special:WhatLinksHere/Template:tracking/script-utilities/face/hypothetical]] + require("debug").track("script-utilities/face/hypothetical") + end + + local data = mw.loadData("script utilities/data").faces[face or "nil"] + + local post = "" + if sc:getDirection() == "rtl" and (face == "translation" or mw.ustring.find(text, "%p$")) then post = "‎" end + + -- Add a script wrapper + if data then + return (data.prefix or "") .. "<" .. data.tag .. " " .. tag_attr(data.class) .. ">" .. text .. "" .. post + else + error("Invalid script face \"" .. face .. "\".") + end +end + +function export.tag_translit(translit, lang, kind, attributes, is_manual) + if type(lang) == "table" then lang = lang.getCode and lang:getCode() or error("Second argument to tag_translit should be a language code or language object.") end + + local data = mw.loadData("script utilities/data").translit[kind or "default"] + + local opening_tag = {} + + table.insert(opening_tag, data.tag) + if lang == "ja" then + table.insert(opening_tag, "class=\"" .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. "tr\"") + else + table.insert(opening_tag, "lang=\"" .. lang .. "-Latn\"") + table.insert(opening_tag, "class=\"" .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. "tr Latn\"") + end + + if data.dir then table.insert(opening_tag, "dir=\"" .. data.dir .. "\"") end + + table.insert(opening_tag, attributes) + + return "<" .. table.concat(opening_tag, " ") .. ">" .. translit .. "" +end + +function export.tag_transcription(transcription, lang, kind, attributes) + if type(lang) == "table" then lang = lang.getCode and lang:getCode() or error("Third argument to tag_translit should be a language code or language object.") end + + local data = mw.loadData("script utilities/data").transcription[kind or "default"] + + local opening_tag = {} + + table.insert(opening_tag, data.tag) + if lang == "ja" then + table.insert(opening_tag, "class=\"" .. (data.classes and data.classes .. " " or "") .. "ts\"") + else + table.insert(opening_tag, "lang=\"" .. lang .. "-Latn\"") + table.insert(opening_tag, "class=\"" .. (data.classes and data.classes .. " " or "") .. "ts Latn\"") + end + + if data.dir then table.insert(opening_tag, "dir=\"" .. data.dir .. "\"") end + + table.insert(opening_tag, attributes) + + return "<" .. table.concat(opening_tag, " ") .. ">" .. transcription .. "" +end + +-- Add a notice to request the native script of a word +function export.request_script(lang, sc, usex, nocat, sort_key) + local scripts = lang.getScripts and lang:getScripts() or error("The language \"" .. lang:getCode() .. "\" does not have the method getScripts. It may be unwritten.") + + -- By default, request for "native" script + local cat_script = "native" + local disp_script = "script" + + -- If the script was not specified, and the language has only one script, use that. + if not sc and #scripts == 1 then sc = scripts[1] end + + -- Is the script known? + if sc then + -- If the script is Latin, return nothing. + if export.is_Latin_script(sc) then return "" end + + if sc:getCode() ~= scripts[1]:getCode() then disp_script = sc:getCanonicalName() end + + -- The category needs to be specific to script only if there is chance + -- of ambiguity. This occurs when lang=und, or when the language has + -- multiple scripts. + if lang:getCode() == "und" or scripts[2] then cat_script = sc:getCanonicalName() end + else + -- The script is not known. + -- Does the language have at least one non-Latin script in its list? + local has_nonlatin = false + + for i, val in ipairs(scripts) do + if not export.is_Latin_script(val) then + has_nonlatin = true + break + end + end + + -- If there are no non-Latin scripts, return nothing. + if not has_nonlatin then return "" end + end + + local category + + if usex then + category = "Requests for " .. cat_script .. " script in " .. lang:getCanonicalName() .. " usage examples" + else + category = "Requests for " .. cat_script .. " script for " .. lang:getCanonicalName() .. " terms" + end + + return "[" .. disp_script .. " needed]" .. (nocat and "" or require("utilities").format_categories({category}, lang, sort_key)) +end + +function export.template_rfscript(frame) + params = {[1] = {required = true, default = "und"}, ["sc"] = {}, ["usex"] = {type = "boolean"}, ["nocat"] = {type = "boolean"}, ["sort"] = {}} + + local args = require("parameters").process(frame:getParent().args, params) + + local lang = require("languages").getByCode(args[1], 1) + local sc = args.sc and require("scripts").getByCode(args.sc, true) + + local ret = export.request_script(lang, sc, args.usex, args.nocat, args.sort) + + if ret == "" then + error("This language is written in the Latin alphabet. It does not need a native script.") + else + return ret + end +end + +function export.checkScript(text, scriptCode, result) + local scriptObject = require("scripts").getByCode(scriptCode) + + if not scriptObject then error("The script code \"" .. scriptCode .. "\" is not recognized.") end + + local originalText = text + + -- Remove non-letter characters. + text = mw.ustring.gsub(text, "[%A]", "") + + -- Remove all characters of the script in question. + text = mw.ustring.gsub(text, "[" .. scriptObject:getCharacters() .. "]", "") + + if text ~= "" then + if type(result) == "string" then + error(result) + else + error("The text \"" .. originalText .. "\" contains the letters \"" .. text .. "\" that do not belong to the " .. scriptObject:getDisplayForm() .. ".", 2) + end + end +end + +return export diff --git a/wiktra/wikt/translit/script utilities/data.lua b/wiktra/wikt/translit/script utilities/data.lua new file mode 100644 index 0000000..656947a --- /dev/null +++ b/wiktra/wikt/translit/script utilities/data.lua @@ -0,0 +1,21 @@ +local data = {} + +data.translit = { + ["term"] = { + --[=[ can't be done until Kana transliterations are correctly parsed by [[Module:links]] + ["tag"] = "i", + ]=] + ["classes"] = "mention-tr" + }, + ["usex"] = {["tag"] = "i", ["classes"] = "e-transliteration"}, + ["head"] = {["classes"] = "headword-tr", ["dir"] = "ltr"}, + ["default"] = {} +} + +data.transcription = {["head"] = {["tag"] = "span", ["classes"] = "headword-ts", ["dir"] = "ltr"}, ["usex"] = {tag = "span", ["classes"] = "e-transcription"}, ["default"] = {}} + +for key, value in pairs(data.translit) do if not value.tag then value.tag = "span" end end + +data.faces = {["term"] = {tag = "i", class = "mention"}, ["head"] = {tag = "strong", class = "headword"}, ["hypothetical"] = {prefix = "*", tag = "i", class = "hypothetical"}, ["bold"] = {tag = "b"}, ["translation"] = {tag = "span"}, ["nil"] = {tag = "span"}} + +return data diff --git a/wiktra/wikt/translit/scripts.lua b/wiktra/wikt/translit/scripts.lua new file mode 100644 index 0000000..8457c9d --- /dev/null +++ b/wiktra/wikt/translit/scripts.lua @@ -0,0 +1,254 @@ +local export = {} +local Script = {} + +function Script:getCode() return self._code end + +function Script:getCanonicalName() return self._rawData.canonicalName end + +function Script:getDisplayForm() return self:getCategoryName("nocap") end + +function Script:getOtherNames(onlyOtherNames) return require("language-like").getOtherNames(self, onlyOtherNames) end + +function Script:getAliases() return self._rawData.aliases or {} end + +function Script:getVarieties(flatten) return require("language-like").getVarieties(self, flatten) end + +function Script:getParent() return self._rawData.parent end + +function Script:getSystems() + if not self._systemObjects then + local m_systems = require("writing systems") + self._systemObjects = {} + + for _, sys in ipairs(self._rawData.systems or {}) do table.insert(self._systemObjects, m_systems.getByCode(sys)) end + end + + return self._systemObjects +end + +-- function Script:getAllNames() +-- return self._rawData.names +-- end + +function Script:getType() return "script" end + +function Script:getCategoryName(nocap) + local name = self._rawData.canonicalName + + -- If the name already has "code" or "semaphore" in it, don't add it. + -- No names contain "script". + if not name:find("[Cc]ode$") and not name:find("[Ss]emaphore$") then name = name .. " script" end + if not nocap then name = mw.getContentLanguage():ucfirst(name) end + return name +end + +function Script:makeCategoryLink() return "[[:Category:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]" end + +function Script:getWikipediaArticle() return self._rawData.wikipedia_article or self:getCategoryName() end + +function Script:getCharacters() + if self._rawData.characters then + return self._rawData.characters + else + return nil + end +end + +function Script:countCharacters(text) + if not self._rawData.characters then + return 0 + else + local _, num = mw.ustring.gsub(text, "[" .. self._rawData.characters .. "]", "") + return num + end +end + +function Script:getDirection() + local direction = self._rawData.direction + if not direction then + return nil + else + return direction + end +end + +function Script:getRawData() return self._rawData end + +function Script:toJSON() + local ret = {canonicalName = self:getCanonicalName(), categoryName = self:getCategoryName("nocap"), code = self._code, otherNames = self:getOtherNames(true), aliases = self:getAliases(), varieties = self:getVarieties(), type = self:getType(), direction = self:getDirection(), characters = self:getCharacters(), parent = self:getParent(), systems = self._rawData.systems or {}, wikipediaArticle = self._rawData.wikipedia_article} + + return require("JSON").toJSON(ret) +end + +Script.__index = Script + +function export.makeObject(code, data) return data and setmetatable({_rawData = data, _code = code}, Script) or nil end + +function export.getByCode(code, paramForError, disallowNil) + if code == nil and not disallowNil then return nil end + if code == "IPAchar" then require("debug").track("IPAchar") end + local retval = export.makeObject(code, mw.loadData("scripts/data")[code]) + if not retval and paramForError then require("languages").err(code, paramForError, "script code", nil, "not real lang") end + return retval +end + +function export.getByCanonicalName(name) + local code = mw.loadData("scripts/by name")[name] + + if not code then return nil end + + return export.makeObject(code, mw.loadData("scripts/data")[code]) +end + +-- Find the best script to use, based on the characters of a string. +-- If forceDetect is set, run the detection algorithm even if there's only one +-- possible script; in that case, if the text isn't in the script, the return +-- value will be None. +function export.findBestScript(text, lang, forceDetect) + if not text or not lang or not lang.getScripts then return export.getByCode("None") end + + local scripts = lang:getScripts() + + if not scripts[2] and not forceDetect then return scripts[1] end + + --[=[ + Remove any HTML entities; catfix function in [[Module:utilities]] + adds tagging to a no-break space ( ), which contains Latin characters; + hence Latin was returned as the script if "Latn" is one of the language's scripts. + ]=] + text = string.gsub(text, "&[a-zA-Z0-9]+;", "") + + -- Try to match every script against the text, + -- and return the one with the most matching characters. + local bestcount = 0 + local bestscript = nil + + -- Get length of text minus any spacing or punctuation characters. + -- Counting instances of UTF-8 character pattern is faster than mw.ustring.len. + local _, length = string.gsub(mw.ustring.gsub(text, "[%s%p]+", ""), "[\1-\127\194-\244][\128-\191]*", "") + + if length == 0 then return export.getByCode("None") end + + for i, script in ipairs(scripts) do + local count = script:countCharacters(text) + + if count >= length then return script end + + if count > bestcount then + bestcount = count + bestscript = script + end + end + + if bestscript then return bestscript end + + -- No matching script was found. Return "None". + return export.getByCode("None") +end + +-- Copied from [[Module:Unicode data]]. +local floor = math.floor +local function binaryRangeSearch(codepoint, ranges) + local low, mid, high + low, high = 1, ranges.length or require"Module:table".length(ranges) + while low <= high do + mid = floor((low + high) / 2) + local range = ranges[mid] + if codepoint < range[1] then + high = mid - 1 + elseif codepoint <= range[2] then + return range, mid + else + low = mid + 1 + end + end + return nil, mid +end + +-- Copied from [[Module:Unicode data]]. +local function linearRangeSearch(codepoint, ranges) + for i, range in ipairs(ranges) do + if codepoint < range[1] then + break + elseif codepoint <= range[2] then + return range + end + end +end + +local function compareRanges(range1, range2) return range1[1] < range2[1] end + +-- Save previously used codepoint ranges in case another character is in the +-- same range. +local rangesCache = {} + +--[=[ + Takes a codepoint or a character and finds the script code (if any) that is + appropriate for it based on the codepoint, using the data module + [[Module:scripts/recognition data]]. The data module was generated from the + patterns in [[Module:scripts/data]] using [[Module:User:Erutuon/script recognition]]. + + Converts the character to a codepoint. Returns a script code if the codepoint + is in the list of individual characters, or if it is in one of the defined + ranges in the 4096-character block that it belongs to, else returns "None". +]=] +local charToScriptData +function export.charToScript(char) + charToScriptData = charToScriptData or mw.loadData("scripts/recognition data") + local t = type(char) + local codepoint + if t == "string" then + local etc + codepoint, etc = mw.ustring.codepoint(char, 1, 2) + if etc then error("bad argument #1 to 'charToScript' (expected a single character)") end + elseif t == "number" then + codepoint = char + else + error(("bad argument #1 to 'charToScript' (expected string or a number, got %s)"):format(t)) + end + + local individualMatch = charToScriptData.individual[codepoint] + if individualMatch then + return individualMatch + else + local range + if rangesCache[1] then + range = linearRangeSearch(codepoint, rangesCache) + if range then return range[3] end + end + + local index = floor(codepoint / 0x1000) + + range = linearRangeSearch(index, charToScriptData.blocks) + if not range and charToScriptData[index] then + range = binaryRangeSearch(codepoint, charToScriptData[index]) + if range then + table.insert(rangesCache, range) + table.sort(rangesCache, compareRanges) + end + end + + return range and range[3] or "None" + end +end + +function export.findBestScriptWithoutLang(text) + local scripts = {} + for character in text:gmatch("[%z\1-\127\194-\244][\128-\191]*") do + local script = export.charToScript(character) + scripts[script] = (scripts[script] or 0) + 1 + end + + local bestScript + local greatestCount = 0 + for script, count in pairs(scripts) do + if count > greatestCount then + bestScript = script + greatestCount = count + end + end + + return bestScript +end + +return export diff --git a/wiktra/wikt/translit/scripts/by name.lua b/wiktra/wikt/translit/scripts/by name.lua new file mode 100644 index 0000000..f5b7c66 --- /dev/null +++ b/wiktra/wikt/translit/scripts/by name.lua @@ -0,0 +1,185 @@ +return { + ["Adlam"] = "Adlm", + ["Afaka"] = "Afak", + ["Ahom"] = "Ahom", + ["Anatolian Hieroglyphs"] = "Hluw", + ["Arabic"] = "Arab", + ["Armenian"] = "Armn", + ["Assamese"] = "as-Beng", + ["Avestan"] = "Avst", + ["Balinese"] = "Bali", + ["Bamum"] = "Bamu", + ["Bassa"] = "Bass", + ["Batak"] = "Batk", + ["Baybayin"] = "Tglg", + ["Bengali"] = "Beng", + ["Bhaiksuki"] = "Bhks", + ["Book Pahlavi"] = "Phlv", + ["Brahmi"] = "Brah", + ["Braille"] = "Brai", + ["Buginese"] = "Bugi", + ["Buhid"] = "Buhd", + ["Burmese"] = "Mymr", + ["Canadian syllabics"] = "Cans", + ["Carian"] = "Cari", + ["Caucasian Albanian"] = "Aghb", + ["Chakma"] = "Cakm", + ["Cham"] = "Cham", + ["Cherokee"] = "Cher", + ["Chorasmian"] = "Chrs", + ["Coptic"] = "Copt", + ["Cuneiform"] = "Xsux", + ["Cypriot"] = "Cprt", + ["Cyrillic"] = "Cyrl", + ["Demotic"] = "Egyd", + ["Deseret"] = "Dsrt", + ["Devanagari"] = "Deva", + ["Dogra"] = "Dogr", + ["Duployan"] = "Dupl", + ["Egyptian hieroglyphic"] = "Egyp", + ["Elbasan"] = "Elba", + ["Ethiopic"] = "Ethi", + ["Fraktur"] = "Latf", + ["Georgian"] = "Geor", + ["Glagolitic"] = "Glag", + ["Gothic"] = "Goth", + ["Grantha"] = "Gran", + ["Greek"] = "Grek", + ["Gujarati"] = "Gujr", + ["Gunjala Gondi"] = "Gong", + ["Gurmukhi"] = "Guru", + ["Han"] = "Hani", + ["Hangul"] = "Hang", + ["Hanifi Rohingya"] = "Rohg", + ["Hanunoo"] = "Hano", + ["Hatran"] = "Hatr", + ["Hebrew"] = "Hebr", + ["Hieratic"] = "Egyh", + ["Hiragana"] = "Hira", + ["Hmong"] = "Hmng", + ["Iberian"] = "Ibrn", + ["Image-rendered"] = "Imag", + ["Imperial Aramaic"] = "Armi", + ["Indus"] = "Inds", + ["Inscriptional Pahlavi"] = "Phli", + ["Inscriptional Parthian"] = "Prti", + ["International Phonetic Alphabet"] = "IPAchar", + ["Japanese"] = "Jpan", + ["Javanese"] = "Java", + ["Jurchen"] = "Jurc", + ["Kaithi"] = "Kthi", + ["Kannada"] = "Knda", + ["Katakana"] = "Kana", + ["Kayah Li"] = "Kali", + ["Kharoshthi"] = "Khar", + ["Khitan Large"] = "Kitl", + ["Khitan Small"] = "Kits", + ["Khmer"] = "Khmr", + ["Khojki"] = "Khoj", + ["Khudawadi"] = "Sind", + ["Khutsuri"] = "Geok", + ["Korean"] = "Kore", + ["Lao"] = "Laoo", + ["Latin"] = "Latn", + ["Leke"] = "Leke", + ["Lepcha"] = "Lepc", + ["Limbu"] = "Limb", + ["Linear A"] = "Lina", + ["Linear B"] = "Linb", + ["Lisu"] = "Lisu", + ["Lycian"] = "Lyci", + ["Lydian"] = "Lydi", + ["Mahajani"] = "Mahj", + ["Lontara"] = "Maka", + ["Malayalam"] = "Mlym", + ["Mandaic"] = "Mand", + ["Manichaean"] = "Mani", + ["Marchen"] = "Marc", + ["Masaram Gondi"] = "Gonm", + ["Maya"] = "Maya", + ["Medefaidrin"] = "Medf", + ["Meitei Mayek"] = "Mtei", + ["Mende"] = "Mend", + ["Meroitic cursive"] = "Merc", + ["Meroitic hieroglyphic"] = "Mero", + ["Modi"] = "Modi", + ["Morse code"] = "Morse", + ["Mro"] = "Mroo", + ["Multani"] = "Mult", + ["Musical notation"] = "musical", + ["N'Ko"] = "Nkoo", + ["Nabataean"] = "Nbat", + ["New Tai Lue"] = "Talu", + ["Newa"] = "Newa", + ["Nushu"] = "Nshu", + ["Ogham"] = "Ogam", + ["Ol Chiki"] = "Olck", + ["Old Cyrillic"] = "Cyrs", + ["Old Hungarian"] = "Hung", + ["Old Italic"] = "Ital", + ["Old North Arabian"] = "Narb", + ["Old Permic"] = "Perm", + ["Old Persian"] = "Xpeo", + ["Old Sogdian"] = "Sogo", + ["Old South Arabian"] = "Sarb", + ["Oriya"] = "Orya", + ["Orkhon runes"] = "Orkh", + ["Osage"] = "Osge", + ["Osmanya"] = "Osma", + ["Palmyrene"] = "Palm", + ["Pau Cin Hau"] = "Pauc", + ["Pazend"] = "pal-Avst", + ["Phags-pa"] = "Phag", + ["Phoenician"] = "Phnx", + ["Pollard"] = "Plrd", + ["Psalter Pahlavi"] = "Phlp", + ["Rejang"] = "Rjng", + ["Rumi numerals"] = "Ruminumerals", + ["Runic"] = "Runr", + ["Samaritan"] = "Samr", + ["Saurashtra"] = "Saur", + ["Shahmukhi"] = "pa-Arab", + ["Sharada"] = "Shrd", + ["Shavian"] = "Shaw", + ["Siddham"] = "Sidd", + ["SignWriting"] = "Sgnw", + ["Simplified Han"] = "Hans", + ["Sinhalese"] = "Sinh", + ["Sogdian"] = "Sogd", + ["Sorang Sompeng"] = "Sora", + ["Soyombo"] = "Soyo", + ["Sundanese"] = "Sund", + ["Syloti Nagri"] = "Sylo", + ["Syriac"] = "Syrc", + ["Tagbanwa"] = "Tagb", + ["Tai Nüa"] = "Tale", + ["Tai Tham"] = "Lana", + ["Tai Viet"] = "Tavt", + ["Takri"] = "Takr", + ["Tamil"] = "Taml", + ["Tangut"] = "Tang", + ["Telugu"] = "Telu", + ["Tengwar"] = "Teng", + ["Thaana"] = "Thaa", + ["Thai"] = "Thai", + ["Tibetan"] = "Tibt", + ["Tifinagh"] = "Tfng", + ["Tirhuta"] = "Tirh", + ["Traditional Han"] = "Hant", + ["Ugaritic"] = "Ugar", + ["Unspecified"] = "None", + ["Mongolian"] = "Mong", + ["Vai"] = "Vaii", + ["Varang Kshiti"] = "Wara", + ["Yi"] = "Yiii", + ["Zanabazar Square"] = "Zanb", + ["Zhang-Zhung"] = "xzh-Tibt", + ["Zhuyin"] = "Bopo", + ["flag semaphore"] = "Semap", + ["mathematical notation"] = "Zmth", + ["symbol"] = "Zsym", + ["uncoded"] = "Zzzz", + ["undetermined"] = "Zyyy" +} + +-- To update, go to [[Module:scripts/print]]. diff --git a/wiktra/wikt/translit/scripts/code_to_canonical_name.lua b/wiktra/wikt/translit/scripts/code_to_canonical_name.lua new file mode 100644 index 0000000..6ebca98 --- /dev/null +++ b/wiktra/wikt/translit/scripts/code_to_canonical_name.lua @@ -0,0 +1,176 @@ +return { + ["Adlm"] = "Adlam", + ["Afak"] = "Afaka", + ["Aghb"] = "Caucasian Albanian", + ["Ahom"] = "Ahom", + ["Arab"] = "Arabic", + ["Armi"] = "Imperial Aramaic", + ["Armn"] = "Armenian", + ["Avst"] = "Avestan", + ["Bali"] = "Balinese", + ["Bamu"] = "Bamum", + ["Bass"] = "Bassa", + ["Batk"] = "Batak", + ["Beng"] = "Bengali", + ["Bhks"] = "Bhaiksuki", + ["Bopo"] = "Zhuyin", + ["Brah"] = "Brahmi", + ["Brai"] = "Braille", + ["Bugi"] = "Buginese", + ["Buhd"] = "Buhid", + ["Cakm"] = "Chakma", + ["Cans"] = "Canadian syllabics", + ["Cari"] = "Carian", + ["Cham"] = "Cham", + ["Cher"] = "Cherokee", + ["Chrs"] = "Chorasmian", + ["Copt"] = "Coptic", + ["Cprt"] = "Cypriot", + ["Cyrl"] = "Cyrillic", + ["Cyrs"] = "Old Cyrillic", + ["Deva"] = "Devanagari", + ["Dogr"] = "Dogra", + ["Dsrt"] = "Deseret", + ["Dupl"] = "Duployan", + ["Egyd"] = "Demotic", + ["Egyh"] = "Hieratic", + ["Egyp"] = "Egyptian hieroglyphic", + ["Elba"] = "Elbasan", + ["Ethi"] = "Ethiopic", + ["Geok"] = "Khutsuri", + ["Geor"] = "Georgian", + ["Glag"] = "Glagolitic", + ["Gong"] = "Gunjala Gondi", + ["Gonm"] = "Masaram Gondi", + ["Goth"] = "Gothic", + ["Gran"] = "Grantha", + ["Grek"] = "Greek", + ["Gujr"] = "Gujarati", + ["Guru"] = "Gurmukhi", + ["Hang"] = "Hangul", + ["Hani"] = "Han", + ["Hano"] = "Hanunoo", + ["Hans"] = "Simplified Han", + ["Hant"] = "Traditional Han", + ["Hatr"] = "Hatran", + ["Hebr"] = "Hebrew", + ["Hira"] = "Hiragana", + ["Hluw"] = "Anatolian Hieroglyphs", + ["Hmng"] = "Hmong", + ["Hung"] = "Old Hungarian", + ["IPAchar"] = "International Phonetic Alphabet", + ["Ibrn"] = "Iberian", + ["Imag"] = "Image-rendered", + ["Inds"] = "Indus", + ["Ital"] = "Old Italic", + ["Java"] = "Javanese", + ["Jpan"] = "Japanese", + ["Jurc"] = "Jurchen", + ["Kali"] = "Kayah Li", + ["Kana"] = "Katakana", + ["Khar"] = "Kharoshthi", + ["Khmr"] = "Khmer", + ["Khoj"] = "Khojki", + ["Kitl"] = "Khitan Large", + ["Kits"] = "Khitan Small", + ["Knda"] = "Kannada", + ["Kore"] = "Korean", + ["Kthi"] = "Kaithi", + ["Lana"] = "Tai Tham", + ["Laoo"] = "Lao", + ["Latf"] = "Fraktur", + ["Latn"] = "Latin", + ["Leke"] = "Leke", + ["Lepc"] = "Lepcha", + ["Limb"] = "Limbu", + ["Lina"] = "Linear A", + ["Linb"] = "Linear B", + ["Lisu"] = "Lisu", + ["Lyci"] = "Lycian", + ["Lydi"] = "Lydian", + ["Mahj"] = "Mahajani", + ["Maka"] = "Makasar", + ["Mand"] = "Mandaic", + ["Mani"] = "Manichaean", + ["Marc"] = "Marchen", + ["Maya"] = "Maya", + ["Medf"] = "Medefaidrin", + ["Mend"] = "Mende", + ["Merc"] = "Meroitic cursive", + ["Mero"] = "Meroitic hieroglyphic", + ["Mlym"] = "Malayalam", + ["Modi"] = "Modi", + ["Mong"] = "Mongolian", + ["Mroo"] = "Mro", + ["Mtei"] = "Meitei Mayek", + ["Mult"] = "Multani", + ["Mymr"] = "Burmese", + ["Narb"] = "Old North Arabian", + ["Nbat"] = "Nabataean", + ["Newa"] = "Newa", + ["Nkoo"] = "N'Ko", + ["None"] = "Unspecified", + ["Nshu"] = "Nushu", + ["Ogam"] = "Ogham", + ["Olck"] = "Ol Chiki", + ["Orkh"] = "Orkhon runes", + ["Orya"] = "Oriya", + ["Osge"] = "Osage", + ["Osma"] = "Osmanya", + ["Palm"] = "Palmyrene", + ["Pauc"] = "Pau Cin Hau", + ["Perm"] = "Old Permic", + ["Phag"] = "Phags-pa", + ["Phli"] = "Inscriptional Pahlavi", + ["Phlp"] = "Psalter Pahlavi", + ["Phlv"] = "Book Pahlavi", + ["Phnx"] = "Phoenician", + ["Plrd"] = "Pollard", + ["Prti"] = "Inscriptional Parthian", + ["Rjng"] = "Rejang", + ["Rohg"] = "Hanifi Rohingya", + ["Runr"] = "Runic", + ["Samr"] = "Samaritan", + ["Sarb"] = "Old South Arabian", + ["Saur"] = "Saurashtra", + ["Sgnw"] = "SignWriting", + ["Shaw"] = "Shavian", + ["Shrd"] = "Sharada", + ["Sidd"] = "Siddham", + ["Sind"] = "Khudawadi", + ["Sinh"] = "Sinhalese", + ["Sogd"] = "Sogdian", + ["Sogo"] = "Old Sogdian", + ["Sora"] = "Sorang Sompeng", + ["Soyo"] = "Soyombo", + ["Sund"] = "Sundanese", + ["Sylo"] = "Syloti Nagri", + ["Syrc"] = "Syriac", + ["Tagb"] = "Tagbanwa", + ["Takr"] = "Takri", + ["Tale"] = "Tai Nüa", + ["Talu"] = "New Tai Lue", + ["Taml"] = "Tamil", + ["Tang"] = "Tangut", + ["Tavt"] = "Tai Viet", + ["Telu"] = "Telugu", + ["Teng"] = "Tengwar", + ["Tfng"] = "Tifinagh", + ["Tglg"] = "Baybayin", + ["Thaa"] = "Thaana", + ["Thai"] = "Thai", + ["Tibt"] = "Tibetan", + ["Tirh"] = "Tirhuta", + ["Ugar"] = "Ugaritic", + ["Vaii"] = "Vai", + ["Wara"] = "Varang Kshiti", + ["Xpeo"] = "Old Persian", + ["Xsux"] = "Cuneiform", + ["Yiii"] = "Yi", + ["Zanb"] = "Zanabazar Square", + ["Zmth"] = "mathematical notation", + ["Zsym"] = "symbol", + ["Zyyy"] = "undetermined", + ["Zzzz"] = "uncoded" +} +-- To update, go to [[Module:scripts/print]]. diff --git a/wiktra/wikt/translit/scripts/data.lua b/wiktra/wikt/translit/scripts/data.lua new file mode 100644 index 0000000..5bab878 --- /dev/null +++ b/wiktra/wikt/translit/scripts/data.lua @@ -0,0 +1,505 @@ +--[=[ + When adding new scripts to this file, please don't forget to add + style definitons for the script in [[MediaWiki:Common.css]]. +]=] local u = mw.ustring.char +local m = {} + +m["Adlm"] = {canonicalName = "Adlam", characters = "𞤀-𞥟", direction = "rtl"} + +m["Afak"] = {canonicalName = "Afaka"} + +m["Aghb"] = {canonicalName = "Caucasian Albanian", characters = "𐔰-𐕣𐕯"} + +m["Ahom"] = {canonicalName = "Ahom", characters = "𑜀-𑜿", systems = {"abugida"}} + +m["Arab"] = { + canonicalName = "Arabic", + varieties = {"Jawi", {"Nastaliq", "Nastaleeq"}}, + characters = "؀-ۿݐ-ݿࢠ-ࣿﭐ-﷽ﹰ-ﻼ", + direction = "rtl", + systems = {"abjad"} -- more precisely, impure abjad +} + +m["fa-Arab"] = {canonicalName = "Arabic", otherNames = {"Perso-Arabic"}, characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +m["kk-Arab"] = {canonicalName = "Arabic", characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +m["ks-Arab"] = {canonicalName = "Arabic", characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +m["ku-Arab"] = {canonicalName = "Arabic", characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +m["ms-Arab"] = {canonicalName = "Arabic", characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +m["mzn-Arab"] = {canonicalName = "Arabic", characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +m["ota-Arab"] = {canonicalName = "Arabic", characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +m["pa-Arab"] = {canonicalName = "Shahmukhi", otherNames = {"Arabic"}, characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +m["ps-Arab"] = {canonicalName = "Arabic", characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +m["sd-Arab"] = {canonicalName = "Arabic", characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +m["tt-Arab"] = {canonicalName = "Arabic", characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +m["ug-Arab"] = {canonicalName = "Arabic", characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +m["ur-Arab"] = {canonicalName = "Arabic", characters = m["Arab"].characters, direction = "rtl", parent = "Arab"} + +-- Aran (Nastaliq) is subsumed into Arab + +m["Armi"] = {canonicalName = "Imperial Aramaic", characters = "𐡀-𐡟", direction = "rtl", systems = {"abjad"}} + +m["Armn"] = {canonicalName = "Armenian", characters = "Ա-֏ﬓ-ﬗ"} + +m["Avst"] = {canonicalName = "Avestan", characters = "𐬀-𐬿", direction = "rtl"} + +m["pal-Avst"] = {canonicalName = "Pazend", characters = m["Avst"].characters, direction = "rtl", parent = "Avst"} + +m["Bali"] = {canonicalName = "Balinese", characters = "ᬀ-᭼", systems = {"abugida"}} + +m["Bamu"] = {canonicalName = "Bamum", characters = "ꚠ-꛷𖠀-𖨸"} + +m["Bass"] = {canonicalName = "Bassa", aliases = {"Bassa Vah", "Vah"}, characters = "𖫐-𖫵"} + +m["Batk"] = {canonicalName = "Batak", characters = "ᯀ-᯿", systems = {"abugida"}} + +m["Beng"] = {canonicalName = "Bengali", characters = "ঀ-ঃঅ-ঌএঐও-নপ-রললশ-হ়-ৄেৈো-ৎৗড়ঢ়য়়ৠ-ৣ০-৯", systems = {"abugida"}} + +m["as-Beng"] = {canonicalName = "Assamese", otherNames = {"Bengali-Assamese", "Eastern Nagari"}, characters = "ঁ-ঃঅ-ঌএঐও-নপ-যশ-হ়-ৄেৈো-ৎৗড়ঢ়য়়ৠ-ৣ০-ৱ", systems = {"abugida"}} + +m["Bhks"] = {canonicalName = "Bhaiksuki", characters = "𑰀-𑱬", systems = {"abugida"}} + +m["Bopo"] = {canonicalName = "Zhuyin", aliases = {"Zhuyin Fuhao", "Bopomofo"}, characters = "ㄅ-ㄯㆠ-ㆿ"} + +m["Brah"] = {canonicalName = "Brahmi", characters = "𑀀-𑁿", systems = {"abugida"}} + +m["Brai"] = {canonicalName = "Braille", characters = "⠀-⣿"} + +m["Bugi"] = {canonicalName = "Buginese", aliases = {"Lontara"}, characters = "ᨀ-᨟", systems = {"abugida"}} + +m["Buhd"] = {canonicalName = "Buhid", characters = "ᝀ-ᝓ", systems = {"abugida"}} + +m["Cakm"] = {canonicalName = "Chakma", characters = "𑄀-𑅇", systems = {"abugida"}} + +m["Cans"] = {canonicalName = "Canadian syllabics", characters = "᐀-ᙿᢰ-ᣵ", systems = {"abugida"}} + +m["Cari"] = {canonicalName = "Carian", characters = "𐊠-𐋐", systems = {"alphabet"}} + +m["Cham"] = {canonicalName = "Cham", characters = "ꨀ-꩟", systems = {"abugida"}} + +m["Chrs"] = {canonicalName = "Chorasmian", characters = "𐾰-𐿋", direction = "rtl", systems = {"abjad"}} + +m["Cher"] = {canonicalName = "Cherokee", characters = "Ꭰ-ᏽꭰ-ꮿ", systems = {"syllabary"}} + +m["Copt"] = { + canonicalName = "Coptic", + characters = "Ϣ-ϯⲀ-⳿𐋠-𐋻", -- this is mostly "Coptic", not unified "Greek and Coptic" + systems = {"alphabet"} +} + +m["Cprt"] = {canonicalName = "Cypriot", characters = "𐠀-𐠿", direction = "rtl", systems = {"syllabary"}} + +m["Cyrl"] = {canonicalName = "Cyrillic", characters = "Ѐ-џѢѣѪѫѬѭѲѳѴѵҊ-ԧꚀ-ꚗ", systems = {"alphabet"}} + +m["Cyrs"] = {canonicalName = "Old Cyrillic", aliases = {"Early Cyrillic"}, characters = "Ѐ-ԧꙀ-ꚗ", wikipedia_article = "Early Cyrillic alphabet", systems = {"alphabet"}} + +m["Deva"] = {canonicalName = "Devanagari", characters = "ऀ-ॿ꣠-ꣿ", systems = {"abugida"}} + +m["Dogr"] = {canonicalName = "Dogra", characters = "𑠀-𑠻", systems = {"abugida"}} + +m["Dsrt"] = {canonicalName = "Deseret", characters = "𐐀-𐑏", systems = {"alphabet"}} + +m["Dupl"] = {canonicalName = "Duployan", characters = "𛰀-𛲟"} + +m["Egyd"] = {canonicalName = "Demotic", systems = {"abjad", "logography"}} + +m["Egyh"] = {canonicalName = "Hieratic", systems = {"abjad", "logography"}} + +m["Egyp"] = {canonicalName = "Egyptian hieroglyphic", varieties = {"Hieratic"}, characters = "𓀀-𓐮" .. u(0x13430) .. "-" .. u(0x13438), wikipedia_article = "Egyptian hieroglyphs", systems = {"abjad", "logography"}} + +m["Elba"] = {canonicalName = "Elbasan", characters = "𐔀-𐔧", systems = {"alphabet"}} + +m["Ethi"] = {canonicalName = "Ethiopic", aliases = {"Ge'ez"}, characters = "ሀ-᎙ⶀ-ⷞꬁ-ꬮ", systems = {"abugida"}} + +m["Geok"] = { + canonicalName = "Khutsuri", + varieties = {"Nuskhuri", "Asomtavruli"}, + characters = "Ⴀ-Ⴭⴀ-ⴭ", -- Ⴀ-Ⴭ is Asomtavruli, ⴀ-ⴭ is Nuskhuri + systems = {"alphabet"} +} + +m["Geor"] = { + canonicalName = "Georgian", + varieties = {"Mkhedruli", "Mtavruli"}, + characters = "ა-ჿᲐ-Ჿ", -- ა-ჿ is lowercase Mkhedruli; Ა-Ჿ is uppercase Mkhedruli (Mtavruli) + systems = {"alphabet"} +} + +m["Glag"] = {canonicalName = "Glagolitic", characters = "Ⰰ-ⱞ𞀀-𞀪", systems = {"alphabet"}} + +m["Gong"] = {canonicalName = "Gunjala Gondi", characters = "𑵠-𑶩", systems = {"abugida"}} + +m["Gonm"] = {canonicalName = "Masaram Gondi", characters = "𑴀-𑵙", systems = {"abugida"}} + +m["Goth"] = {canonicalName = "Gothic", characters = "𐌰-𐍊", systems = {"alphabet"}} + +m["Gran"] = {canonicalName = "Grantha", characters = "𑌀-𑍴", systems = {"abugida"}} + +m["Grek"] = {canonicalName = "Greek", characters = "Ͱ-ϡϰ-Ͽ", systems = {"alphabet"}} + +m["polytonic"] = {canonicalName = "Greek", characters = "ἀ-῾" .. m["Grek"].characters, parent = "Grek", systems = {"alphabet"}} + +m["Gujr"] = {canonicalName = "Gujarati", characters = "ઁ-૿", systems = {"abugida"}} + +m["Guru"] = {canonicalName = "Gurmukhi", characters = "ਁ-੶", systems = {"abugida"}} + +m["Hang"] = { + canonicalName = "Hangul", + aliases = {"Hangeul"}, + characters = ("가-힣" .. -- Syllables + "ᄀ-ᇿ" .. -- Jamo + "ꥠ-ꥼ" .. -- Jamo Ext-A + "ힰ-ퟻ" .. -- Jamo Ext-B + "ㄱ-ㆎ" .. -- Compat Jamo + "ᅠ-ᅵ" -- Halfwidth + ), + systems = {"syllabary"} +} + +m["Hani"] = { + canonicalName = "Han", + varieties = {"Hanzi", "Kanji", "Hanja", "Chu Nom"}, + characters = ("一-鿿" .. "㐀-䶿" .. -- ExtA + "𠀀-𮯯" .. -- SIP + "𰀀-𱍏" .. -- ExtG + "﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧﨨﨩" .. "⺀-⻿" .. -- Radicals Supplement + " -〿" .. -- CJK Symbols and Punctuation + "𖿢𖿣𖿰𖿱" .. -- Ideographic Symbols and Punctuation + "㇀-㇯" .. -- Strokes + "㍻-㍿㋿" -- 組文字 + ), + systems = {"logography"} +} + +m["Hans"] = {canonicalName = "Simplified Han", characters = m["Hani"].characters, systems = {"logography"}} + +m["Hant"] = {canonicalName = "Traditional Han", characters = m["Hani"].characters, systems = {"logography"}} + +m["Hatr"] = {canonicalName = "Hatran", characters = "𐣠-𐣿", direction = "rtl", systems = {"abjad"}} + +m["Hira"] = {canonicalName = "Hiragana", varieties = {"Hentaigana"}, characters = "ぁ-ゟ𛀁-𛄞𛅐𛅑𛅒🈀", systems = {"syllabary"}} + +m["Hluw"] = {canonicalName = "Anatolian Hieroglyphs", characters = "𔐀-𔙆", wikipedia_article = "Anatolian hieroglyphs"} + +m["Hung"] = {canonicalName = "Old Hungarian", aliases = {"Hungarian runic"}, characters = "𐲀-𐲲", direction = "rtl"} + +m["Kana"] = {canonicalName = "Katakana", characters = "゠-ヿㇰ-ㇿ㌀-㍗ヲ-゚𛀀𛅤𛅥𛅦𛅧", systems = {"syllabary"}} + +-- These should be defined after the scripts they are composed of + +m["Kore"] = {canonicalName = "Korean", characters = m["Hang"].characters .. m["Hani"].characters, systems = {"syllabary", "logography"}} + +m["Hano"] = {canonicalName = "Hanunoo", characters = "ᜠ-᜴", systems = {"abugida"}} + +m["Hebr"] = { + canonicalName = "Hebrew", + characters = u(0x0590) .. "-" .. u(0x05FF) .. u(0xFB1D) .. "-" .. u(0xFB4F), + direction = "rtl", + systems = {"abjad"} -- more precisely, impure abjad +} + +m["Hmng"] = {canonicalName = "Hmong", aliases = {"Pahawh Hmong"}, characters = "𖬀-𖮏"} + +m["Ibrn"] = {canonicalName = "Iberian"} + +m["Imag"] = { + -- To be used to avoid any formatting or link processing + canonicalName = "Image-rendered", + -- This should not have any characters listed + character_category = false +} + +m["Inds"] = {canonicalName = "Indus", aliases = {"Harappan", "Indus Valley"}} + +m["IPAchar"] = {canonicalName = "International Phonetic Alphabet", aliases = {"IPA"}} + +m["Ital"] = {canonicalName = "Old Italic", characters = "𐌀-𐌯", systems = {"alphabet"}} + +m["Java"] = {canonicalName = "Javanese", characters = "ꦀ-꧟", systems = {"abugida"}} + +m["Jurc"] = {canonicalName = "Jurchen"} + +m["Kali"] = {canonicalName = "Kayah Li", characters = "꤀-꤯", systems = {"abugida"}} + +m["Khar"] = {canonicalName = "Kharoshthi", characters = "𐨀-𐩘", systems = {"abugida"}, direction = "rtl"} + +m["Khmr"] = {canonicalName = "Khmer", characters = "ក-៹᧠-᧿", systems = {"abugida"}} + +m["Khoj"] = {canonicalName = "Khojki", characters = "𑈀-𑈾", systems = {"abugida"}} + +m["Kitl"] = {canonicalName = "Khitan Large", systems = {"logography", "syllabary"}} + +m["Kits"] = {canonicalName = "Khitan Small", characters = "𘬀-𘳕" .. u(0x16FE4), systems = {"logography", "syllabary"}} + +m["Knda"] = {canonicalName = "Kannada", characters = "ಀ-ೲ", systems = {"abugida"}} + +m["Kthi"] = {canonicalName = "Kaithi", characters = "𑂀-𑃍", systems = {"abugida"}} + +m["Lana"] = {canonicalName = "Tai Tham", aliases = {"Tham", "Tua Mueang", "Lanna"}, characters = "ᨠ-᪭", systems = {"abugida"}} + +m["Laoo"] = {canonicalName = "Lao", characters = "ກ-ໟ", systems = {"abugida"}} + +m["Latn"] = {canonicalName = "Latin", aliases = {"Roman"}, varieties = {"Rumi", "Romaji", "Rōmaji", "Romaja"}, characters = "A-Za-zÀ-ÖØ-öø-ɏḀ-ỿ", systems = {"alphabet"}} + +m["Latf"] = { + canonicalName = "Fraktur", + otherNames = {"Blackletter"}, -- Blackletter is actually the parent "script" + characters = m["Latn"].characters +} + +m["Latinx"] = {canonicalName = "Latin", characters = m["Latn"].characters .. "Ⱡ-Ɀ꜠-ꟿꬰ-ꭥ", parent = "Latn"} + +m["pjt-Latn"] = {canonicalName = "Latin", characters = m["Latn"].characters, parent = "Latn"} + +m["Jpan"] = {canonicalName = "Japanese", characters = m["Hira"].characters .. m["Kana"].characters .. m["Hani"].characters .. m["Latn"].characters, systems = {"syllabary", "logography"}} + +m["Leke"] = {canonicalName = "Leke", systems = {"abugida"}} + +m["Lepc"] = {canonicalName = "Lepcha", characters = "ᰀ-ᱏ", systems = {"abugida"}} + +m["Limb"] = {canonicalName = "Limbu", characters = "ᤀ-᥏", systems = {"abugida"}} + +m["Lina"] = {canonicalName = "Linear A", characters = "𐘀-𐝧"} + +m["Linb"] = {canonicalName = "Linear B", characters = "𐀀-𐃺"} + +m["Lisu"] = {canonicalName = "Lisu", aliases = {"Fraser"}, characters = "ꓐ-꓿𑾰", systems = {"alphabet"}} + +m["Lyci"] = {canonicalName = "Lycian", characters = "𐊀-𐊜", systems = {"alphabet"}} + +m["Lydi"] = {canonicalName = "Lydian", characters = "𐤠-𐤿", direction = "rtl", systems = {"alphabet"}} + +m["Mahj"] = {canonicalName = "Mahajani", characters = "𑅐-𑅶", systems = {"abugida"}} + +m["Maka"] = {canonicalName = "Lontara", characters = "𑻠-𑻸", systems = {"abugida"}} + +m["Mand"] = {canonicalName = "Mandaic", aliases = {"Mandaean"}, characters = "ࡀ-࡞", direction = "rtl"} + +m["Mani"] = {canonicalName = "Manichaean", characters = "𐫀-𐫶", direction = "rtl", systems = {"abjad"}} + +m["Maya"] = {canonicalName = "Maya", aliases = {"Maya hieroglyphic", "Mayan", "Mayan hieroglyphic"}, characters = "𝋠-𝋳"} + +m["Medf"] = {canonicalName = "Medefaidrin", aliases = {"Oberi Okaime", "Oberi Ɔkaimɛ"}, characters = "𖹀-𖺚"} + +m["Mend"] = {canonicalName = "Mende", aliases = {"Mende Kikakui"}, characters = "𞠀-𞣖", direction = "rtl"} + +m["Merc"] = {canonicalName = "Meroitic cursive", characters = "𐦠-𐦿", direction = "rtl", systems = {"abugida"}} + +m["Mero"] = {canonicalName = "Meroitic hieroglyphic", characters = "𐦀-𐦟", direction = "rtl", systems = {"abugida"}} + +m["Mlym"] = {canonicalName = "Malayalam", characters = "ഀ-ൿ", systems = {"abugida"}} + +m["Modi"] = {canonicalName = "Modi", characters = "𑘀-𑙙", systems = {"abugida"}} + +m["Mong"] = {canonicalName = "Mongolian", aliases = {"Uyghurjin"}, characters = "᠀-ᢪ𑙠-𑙬", direction = "down"} + +m["Morse"] = {canonicalName = "Morse code"} + +m["Mroo"] = {canonicalName = "Mro", characters = "𖩀-𖩯"} + +m["Mtei"] = {canonicalName = "Meitei Mayek", characters = "ꯀ-꯹ꫠ-꫶", systems = {"abugida"}} + +m["Mult"] = {canonicalName = "Multani", characters = "𑊀-𑊩", systems = {"abugida"}} + +m["musical"] = {canonicalName = "Musical notation", characters = "𝄀-𝇨", systems = {"pictography"}} + +m["Mymr"] = {canonicalName = "Burmese", aliases = {"Myanmar"}, characters = "က-႟ꩠ-ꩿꧠ-ꧾ", systems = {"abugida"}} + +m["Narb"] = {canonicalName = "Old North Arabian", characters = "𐪀-𐪟", direction = "rtl", systems = {"abjad"}} + +m["Nbat"] = {canonicalName = "Nabataean", aliases = {"Nabatean"}, characters = "𐢀-𐢯", direction = "rtl", systems = {"abjad"}} + +m["Newa"] = { + canonicalName = "Newa", + aliases = {"Newar", "Newari", "Prachalit Nepal"}, -- and Ranjana? + characters = "𑐀-𑑡", + systems = {"abugida"} +} + +m["Nkoo"] = {canonicalName = "N'Ko", characters = "߀-߿", direction = "rtl", systems = {"alphabet"}} + +m["None"] = { + canonicalName = "Unspecified", -- renders as 'unspecified script' + -- This should not have any characters listed + character_category = false -- none +} + +m["Nshu"] = {canonicalName = "Nushu", aliases = {"Nüshu"}, characters = "𖿡𛅰-𛋻", systems = {"syllabary"}} + +m["Ogam"] = {canonicalName = "Ogham", characters = " -᚜"} + +m["Olck"] = {canonicalName = "Ol Chiki", characters = "᱐-᱿"} + +m["Orkh"] = {canonicalName = "Orkhon runes", characters = "𐰀-𐱈", direction = "rtl"} + +m["Orya"] = {canonicalName = "Oriya", aliases = {"Odia"}, characters = "ଁ-୷"} + +m["Osge"] = {canonicalName = "Osage", characters = "𐒰-𐓻"} + +m["Osma"] = {canonicalName = "Osmanya", characters = "𐒀-𐒩"} + +m["Palm"] = {canonicalName = "Palmyrene", characters = "𐡠-𐡿", direction = "rtl"} + +m["Pauc"] = {canonicalName = "Pau Cin Hau", characters = "𑫀-𑫸"} + +m["Perm"] = {canonicalName = "Old Permic", characters = "𐍐-𐍺"} + +m["Phag"] = {canonicalName = "Phags-pa", characters = "ꡀ-꡷", direction = "down", systems = {"abugida"}} + +m["Marc"] = {canonicalName = "Marchen", characters = "𑱰-𑲶", systems = {"abugida"}} + +m["Phli"] = {canonicalName = "Inscriptional Pahlavi", characters = "𐭠-𐭿", direction = "rtl", systems = {"abjad"}} + +m["Phlp"] = {canonicalName = "Psalter Pahlavi", characters = "𐮀-𐮯", direction = "rtl", systems = {"abjad"}} + +m["Phlv"] = { + canonicalName = "Book Pahlavi", + direction = "rtl", + systems = {"abjad"} + -- Not in Unicode +} + +m["Phnx"] = {canonicalName = "Phoenician", characters = "𐤀-𐤟", direction = "rtl", systems = {"abjad"}} + +m["Plrd"] = {canonicalName = "Pollard", characters = "𖼀-𖾟", systems = {"abugida"}} + +m["Prti"] = {canonicalName = "Inscriptional Parthian", characters = "𐭀-𐭟", direction = "rtl"} + +m["Rjng"] = {canonicalName = "Rejang", characters = "ꤰ-꥟", systems = {"abugida"}} + +m["Rohg"] = {canonicalName = "Hanifi Rohingya", characters = "𐴀-𐴹", direction = "rtl", systems = {"alphabet"}} + +m["Ruminumerals"] = {canonicalName = "Rumi numerals", characters = "𐹠-𐹾", character_category = "Rumi numerals"} + +m["Runr"] = {canonicalName = "Runic", characters = "ᚠ-ᛰ", systems = {"alphabet"}} + +m["Samr"] = {canonicalName = "Samaritan", characters = "ࠀ-࠾", direction = "rtl", systems = {"abjad"}} + +m["Sarb"] = {canonicalName = "Old South Arabian", characters = "𐩠-𐩿", direction = "rtl", systems = {"abjad"}} + +m["Saur"] = {canonicalName = "Saurashtra", characters = "ꢀ-꣙", systems = {"abugida"}} + +m["Semap"] = {canonicalName = "flag semaphore", systems = {"pictography"}} + +m["Sgnw"] = {canonicalName = "SignWriting", characters = "𝠀-𝪯", systems = {"pictography"}} + +m["Shaw"] = {canonicalName = "Shavian", characters = "𐑐-𐑿"} + +m["Shrd"] = {canonicalName = "Sharada", characters = "𑆀-𑇙", systems = {"abugida"}} + +m["Sidd"] = {canonicalName = "Siddham", characters = "𑖀-𑗝", systems = {"abugida"}} + +m["Sind"] = {canonicalName = "Khudawadi", characters = "𑊰-𑋹", systems = {"abugida"}} + +m["Sinh"] = {canonicalName = "Sinhalese", characters = "ං-෴", systems = {"abugida"}} + +m["Sogd"] = {canonicalName = "Sogdian", characters = "𐼰-𐽙", direction = "rtl", systems = {"abjad"}} + +m["Sogo"] = {canonicalName = "Old Sogdian", characters = "𐼀-𐼧", direction = "rtl", systems = {"abjad"}} + +m["Sora"] = {canonicalName = "Sorang Sompeng", aliases = {"Sora Sompeng"}, characters = "𑃐-𑃹"} + +m["Soyo"] = {canonicalName = "Soyombo", characters = "𑩐-𑪢", systems = {"abugida"}} + +m["Sund"] = {canonicalName = "Sundanese", characters = "ᮀ-ᮿ", systems = {"abugida"}} + +m["Sylo"] = {canonicalName = "Syloti Nagri", aliases = {"Sylheti Nagari"}, characters = "ꠀ-꠫", systems = {"abugida"}} + +m["Syrc"] = { + canonicalName = "Syriac", + characters = "܀-ݏ" .. u(0x0860) .. "-" .. u(0x086A), + direction = "rtl", + systems = {"abjad"} -- more precisely, impure abjad +} + +-- Syre, Syrj, Syrn are apparently subsumed into Syrc; discuss if this causes issues + +m["Tagb"] = {canonicalName = "Tagbanwa", characters = "ᝠ-ᝳ", systems = {"abugida"}} + +m["Takr"] = {canonicalName = "Takri", characters = "𑚀-𑛉", systems = {"abugida"}} + +m["Tale"] = {canonicalName = "Tai Nüa", aliases = {"Tai Nuea", "New Tai Nüa", "New Tai Nuea", "Dehong Dai", "Tai Dehong", "Tai Le"}, characters = "ᥐ-ᥴ", systems = {"abugida"}} + +m["Talu"] = {canonicalName = "New Tai Lue", characters = "ᦀ-᧟", systems = {"abugida"}} + +m["Taml"] = {canonicalName = "Tamil", characters = "ஂ-௺𑿀-𑿿", systems = {"abugida"}} + +m["Tang"] = {canonicalName = "Tangut", characters = "𖿠𗀀-𘫿𘴀-𘴈", systems = {"logography", "syllabary"}} + +m["Tavt"] = {canonicalName = "Tai Viet", characters = "ꪀ-꫟", systems = {"abugida"}} + +m["Telu"] = {canonicalName = "Telugu", characters = "ఀ-౿", systems = {"abugida"}} + +m["Teng"] = {canonicalName = "Tengwar"} + +m["Tfng"] = { + canonicalName = "Tifinagh", + otherNames = {"Libyco-Berber", "Berber"}, -- per Wikipedia, Libyco-Berber is the parent + characters = "ⴰ-⵿", + systems = {"abjad", "alphabet"} +} + +m["Tglg"] = {canonicalName = "Baybayin", aliases = {"Tagalog"}, characters = "ᜀ-᜔", systems = {"abugida"}} + +m["Thaa"] = {canonicalName = "Thaana", characters = "ހ-ޱ", systems = {"abugida"}, direction = "rtl"} + +m["Thai"] = {canonicalName = "Thai", characters = "ก-๛", systems = {"abugida"}} + +m["Tibt"] = {canonicalName = "Tibetan", characters = "ༀ-࿚", systems = {"abugida"}} + +m["Tirh"] = {canonicalName = "Tirhuta", characters = "𑒀-𑓙", systems = {"abugida"}} + +m["xzh-Tibt"] = {canonicalName = "Zhang-Zhung", systems = {"abugida"}} + +m["Ugar"] = {canonicalName = "Ugaritic", characters = "𐎀-𐎟", systems = {"abjad"}} + +m["Vaii"] = {canonicalName = "Vai", characters = "ꔀ-ꘫ", systems = {"syllabary"}} + +m["Wara"] = {canonicalName = "Varang Kshiti", characters = "𑢠-𑣿"} + +m["Xpeo"] = {canonicalName = "Old Persian", characters = "𐎠-𐏕"} + +m["Xsux"] = {canonicalName = "Cuneiform", aliases = {"Sumero-Akkadian Cuneiform"}, characters = "𒀀-𒎙𒐀-𒑳"} + +m["Yiii"] = {canonicalName = "Yi", characters = "ꀀ-꓆", systems = {"syllabary"}} + +m["Zanb"] = {canonicalName = "Zanabazar Square", characters = u(0x11A00) .. "-" .. u(0x11A47)} + +m["Zmth"] = { + canonicalName = "mathematical notation", + characters = "ℵ∀-⋿⟀-⟯⦀-⫿𝐀-𝟿", + character_category = "Mathematical notation symbols" -- ? +} + +m["Zsym"] = { + canonicalName = "symbol", + characters = "─-➿←-⇿⌀-⏿⬀-⯾🀀-🃵🌀-🩭", + character_category = false, -- none + systems = {"pictography"} +} + +m["Zyyy"] = { + canonicalName = "undetermined", + -- This should not have any characters listed, probably + character_category = false, -- none + characters = m["Latn"].characters +} + +m["Zzzz"] = { + canonicalName = "uncoded", + -- This should not have any characters listed + character_category = false -- none +} + +return m diff --git a/wiktra/wikt/translit/scripts/recognition data.lua b/wiktra/wikt/translit/scripts/recognition data.lua new file mode 100644 index 0000000..486829b --- /dev/null +++ b/wiktra/wikt/translit/scripts/recognition data.lua @@ -0,0 +1,139 @@ +return { + [0x00] = { + {0x00041, 0x0005A, "Latn"}, + {0x00061, 0x0007A, "Latn"}, + {0x000C0, 0x000D6, "Latn"}, + {0x000D8, 0x000F6, "Latn"}, + {0x000F8, 0x0024F, "Latn"}, + {0x00370, 0x003E1, "Grek"}, + {0x003E2, 0x003EF, "Copt"}, + {0x003F0, 0x003FF, "Grek"}, + {0x00400, 0x0045F, "Cyrl"}, + {0x00460, 0x00469, "Cyrs"}, + {0x0046A, 0x0046D, "Cyrl"}, + {0x0046E, 0x00471, "Cyrs"}, + {0x00472, 0x00475, "Cyrl"}, + {0x00476, 0x00489, "Cyrs"}, + {0x0048A, 0x00527, "Cyrl"}, + {0x00531, 0x0058F, "Armn"}, + {0x00590, 0x005FF, "Hebr"}, + {0x00600, 0x006FF, "Arab"}, + {0x00700, 0x0074F, "Syrc"}, + {0x00750, 0x0077F, "Arab"}, + {0x00780, 0x007B1, "Thaa"}, + {0x007C0, 0x007FF, "Nkoo"}, + {0x00800, 0x0083E, "Samr"}, + {0x00840, 0x0085E, "Mand"}, + {0x00860, 0x0086A, "Syrc"}, + {0x008A0, 0x008FF, "Arab"}, + {0x00900, 0x0097F, "Deva"}, + {0x00980, 0x00983, "Beng"}, + {0x00985, 0x0098C, "Beng"}, + {0x00993, 0x009A8, "Beng"}, + {0x009AA, 0x009B0, "Beng"}, + {0x009B6, 0x009B9, "Beng"}, + {0x009BC, 0x009C4, "Beng"}, + {0x009CB, 0x009CE, "Beng"}, + {0x009E0, 0x009E3, "Beng"}, + {0x009E6, 0x009EF, "Beng"}, + {0x009F0, 0x009F1, "as-Beng"}, + {0x00A01, 0x00A76, "Guru"}, + {0x00A81, 0x00AF1, "Gujr"}, + {0x00B01, 0x00B77, "Orya"}, + {0x00B82, 0x00BFA, "Taml"}, + {0x00C00, 0x00C7F, "Telu"}, + {0x00C80, 0x00CF2, "Knda"}, + {0x00D02, 0x00D7F, "Mlym"}, + {0x00D82, 0x00DF4, "Sinh"}, + {0x00E01, 0x00E5B, "Thai"}, + {0x00E81, 0x00EDF, "Laoo"}, + {0x00F00, 0x00FDA, "Tibt"}, + length = 48 + }, + [0x01] = {{0x01000, 0x0109F, "Mymr"}, {0x010A0, 0x010CD, "Geok"}, {0x010D0, 0x010FF, "Geor"}, {0x01100, 0x011FF, "Hang"}, {0x01200, 0x01399, "Ethi"}, {0x013A0, 0x013F4, "Cher"}, {0x01400, 0x0167F, "Cans"}, {0x01680, 0x0169C, "Ogam"}, {0x016A0, 0x016F0, "Runr"}, {0x01700, 0x01714, "Tglg"}, {0x01720, 0x01734, "Hano"}, {0x01740, 0x01753, "Buhd"}, {0x01760, 0x01773, "Tagb"}, {0x01780, 0x017F9, "Khmr"}, {0x01800, 0x018AA, "Mong"}, {0x01900, 0x0194F, "Limb"}, {0x01950, 0x01974, "Tale"}, {0x01980, 0x019DF, "Talu"}, {0x019E0, 0x019FF, "Khmr"}, {0x01A00, 0x01A1F, "Bugi"}, {0x01A20, 0x01AAD, "Lana"}, {0x01B00, 0x01B7C, "Bali"}, {0x01B80, 0x01BBF, "Sund"}, {0x01BC0, 0x01BFF, "Batk"}, {0x01C00, 0x01C4F, "Lepc"}, {0x01C50, 0x01C7F, "Olck"}, {0x01C90, 0x01CBF, "Geor"}, {0x01E00, 0x01EFF, "Latn"}, {0x01F00, 0x01FFE, "polytonic"}, length = 29}, + [0x02] = {{0x02190, 0x021FF, "Zsym"}, {0x02200, 0x022FF, "Zmth"}, {0x02300, 0x023FF, "Zsym"}, {0x02500, 0x027BF, "Zsym"}, {0x027C0, 0x027EF, "Zmth"}, {0x02800, 0x028FF, "Brai"}, {0x02980, 0x02AFF, "Zmth"}, {0x02B00, 0x02BFE, "Zsym"}, {0x02C00, 0x02C5E, "Glag"}, {0x02C60, 0x02C7F, "Latinx"}, {0x02C80, 0x02CFF, "Copt"}, {0x02D00, 0x02D2D, "Geok"}, {0x02D30, 0x02D7F, "Tfng"}, {0x02D80, 0x02DDE, "Ethi"}, {0x02E80, 0x02FDF, "Hani"}, length = 15}, + [0x03] = {{0x03000, 0x0303F, "Hani"}, {0x03041, 0x0309F, "Hira"}, {0x030A0, 0x030FF, "Kana"}, {0x03105, 0x0312F, "Bopo"}, {0x03131, 0x0318E, "Hang"}, {0x031A0, 0x031BA, "Bopo"}, {0x031C0, 0x031E3, "Hani"}, {0x031F0, 0x031FF, "Kana"}, {0x03300, 0x03357, "Kana"}, {0x0337B, 0x0337F, "Hani"}, {0x03400, 0x03FFF, "Hani"}, length = 11}, + [0x04] = {{0x04000, 0x04DB5, "Hani"}, {0x04E00, 0x04FFF, "Hani"}, length = 2}, + [0x05] = {{0x05000, 0x05FFF, "Hani"}, length = 1}, + [0x06] = {{0x06000, 0x06FFF, "Hani"}, length = 1}, + [0x07] = {{0x07000, 0x07FFF, "Hani"}, length = 1}, + [0x08] = {{0x08000, 0x08FFF, "Hani"}, length = 1}, + [0x09] = {{0x09000, 0x09FFF, "Hani"}, length = 1}, + [0x0A] = {{0x0A000, 0x0A4C6, "Yiii"}, {0x0A4D0, 0x0A4FF, "Lisu"}, {0x0A500, 0x0A62B, "Vaii"}, {0x0A640, 0x0A67F, "Cyrs"}, {0x0A680, 0x0A697, "Cyrl"}, {0x0A6A0, 0x0A6F7, "Bamu"}, {0x0A720, 0x0A7FF, "Latinx"}, {0x0A800, 0x0A82B, "Sylo"}, {0x0A840, 0x0A877, "Phag"}, {0x0A880, 0x0A8D9, "Saur"}, {0x0A8E0, 0x0A8FF, "Deva"}, {0x0A900, 0x0A92F, "Kali"}, {0x0A930, 0x0A95F, "Rjng"}, {0x0A980, 0x0A9DF, "Java"}, {0x0A9E0, 0x0A9FE, "Mymr"}, {0x0AA00, 0x0AA5F, "Cham"}, {0x0AA60, 0x0AA7F, "Mymr"}, {0x0AA80, 0x0AADF, "Tavt"}, {0x0AAE0, 0x0AAFF, "Mtei"}, {0x0AB01, 0x0AB2E, "Ethi"}, {0x0AB30, 0x0AB65, "Latinx"}, {0x0AB70, 0x0ABBF, "Cher"}, {0x0ABC0, 0x0ABFF, "Mtei"}, {0x0AC00, 0x0AFFF, "Hang"}, length = 24}, + [0x0B] = {{0x0B000, 0x0BFFF, "Hang"}, length = 1}, + [0x0C] = {{0x0C000, 0x0CFFF, "Hang"}, length = 1}, + [0x0D] = {{0x0D000, 0x0D7A3, "Hang"}, length = 1}, + [0x0F] = {{0x0FA27, 0x0FA29, "Hani"}, {0x0FB13, 0x0FB17, "Armn"}, {0x0FB1D, 0x0FB4F, "Hebr"}, {0x0FB50, 0x0FDFD, "Arab"}, {0x0FE70, 0x0FEFC, "Arab"}, length = 5}, + [0x10] = { + {0x10000, 0x100FA, "Linb"}, + {0x10280, 0x1029C, "Lyci"}, + {0x102A0, 0x102D0, "Cari"}, + {0x102E1, 0x102FB, "Copt"}, + {0x10300, 0x10323, "Ital"}, + {0x10330, 0x1034A, "Goth"}, + {0x10350, 0x1037A, "Perm"}, + {0x10380, 0x1039F, "Ugar"}, + {0x103A0, 0x103D5, "Xpeo"}, + {0x10400, 0x1044F, "Dsrt"}, + {0x10450, 0x1047F, "Shaw"}, + {0x10480, 0x104A9, "Osma"}, + {0x104B0, 0x104FB, "Osge"}, + {0x10500, 0x10527, "Elba"}, + {0x10530, 0x10563, "Aghb"}, + {0x10600, 0x10767, "Lina"}, + {0x10800, 0x1083F, "Cprt"}, + {0x10840, 0x1085F, "Armi"}, + {0x10860, 0x1087F, "Palm"}, + {0x10880, 0x108AF, "Nbat"}, + {0x108E0, 0x108FF, "Hatr"}, + {0x10900, 0x1091F, "Phnx"}, + {0x10920, 0x1093F, "Lydi"}, + {0x10980, 0x1099F, "Mero"}, + {0x109A0, 0x109BF, "Merc"}, + {0x10A00, 0x10A58, "Khar"}, + {0x10A60, 0x10A7F, "Sarb"}, + {0x10A80, 0x10A9F, "Narb"}, + {0x10AC0, 0x10AF6, "Mani"}, + {0x10B00, 0x10B3F, "Avst"}, + {0x10B40, 0x10B5F, "Prti"}, + {0x10B60, 0x10B7F, "Phli"}, + {0x10B80, 0x10BAF, "Phlp"}, + {0x10C00, 0x10C48, "Orkh"}, + {0x10C80, 0x10CB2, "Hung"}, + {0x10D00, 0x10D39, "Rohg"}, + {0x10E60, 0x10E7E, "Ruminumerals"}, + {0x10F00, 0x10F27, "Sogo"}, + {0x10F30, 0x10F59, "Sogd"}, + length = 39 + }, + [0x11] = {{0x11000, 0x1107F, "Brah"}, {0x11080, 0x110CD, "Kthi"}, {0x110D0, 0x110F9, "Sora"}, {0x11100, 0x11146, "Cakm"}, {0x11150, 0x11176, "Mahj"}, {0x11180, 0x111D9, "Shrd"}, {0x11200, 0x1123D, "Khoj"}, {0x11280, 0x112A9, "Mult"}, {0x112B0, 0x112F9, "Sind"}, {0x11301, 0x11374, "Gran"}, {0x11400, 0x1145E, "Newa"}, {0x11480, 0x114D9, "Tirh"}, {0x11580, 0x115DD, "Sidd"}, {0x11600, 0x11659, "Modi"}, {0x11680, 0x116C9, "Takr"}, {0x11700, 0x1173F, "Ahom"}, {0x11800, 0x1183B, "Dogr"}, {0x118A0, 0x118FF, "Wara"}, {0x11A00, 0x11A47, "Zanb"}, {0x11A50, 0x11AA2, "Soyo"}, {0x11AC0, 0x11AF8, "Pauc"}, {0x11C00, 0x11C6C, "Bhks"}, {0x11C70, 0x11CB6, "Marc"}, {0x11D00, 0x11D59, "Gonm"}, {0x11D60, 0x11DA9, "Gong"}, {0x11EE0, 0x11EF8, "Maka"}, length = 26}, + [0x12] = {{0x12000, 0x1236E, "Xsux"}, {0x12400, 0x12473, "Xsux"}, length = 2}, + [0x13] = {{0x13000, 0x1342E, "Egyp"}, length = 1}, + [0x14] = {{0x14400, 0x14646, "Hluw"}, length = 1}, + [0x16] = {{0x16800, 0x16A38, "Bamu"}, {0x16A40, 0x16A6F, "Mroo"}, {0x16AD0, 0x16AF5, "Bass"}, {0x16B00, 0x16B8F, "Hmng"}, {0x16E40, 0x16E9A, "Medf"}, {0x16F00, 0x16F9F, "Plrd"}, length = 6}, + [0x17] = {{0x17000, 0x17FFF, "Tang"}, length = 1}, + [0x18] = {{0x18000, 0x18AF2, "Tang"}, length = 1}, + [0x1B] = {{0x1B001, 0x1B11E, "Hira"}, {0x1B170, 0x1B2FB, "Nshu"}, {0x1BC00, 0x1BC9F, "Dupl"}, length = 3}, + [0x1D] = {{0x1D100, 0x1D1DD, "musical"}, {0x1D2E0, 0x1D2F3, "Maya"}, {0x1D400, 0x1D7FF, "Zmth"}, {0x1D800, 0x1DAAF, "Sgnw"}, length = 4}, + [0x1E] = {{0x1E000, 0x1E02A, "Glag"}, {0x1E800, 0x1E8D6, "Mend"}, {0x1E900, 0x1E95F, "Adlm"}, length = 3}, + [0x1F] = {{0x1F000, 0x1F0F5, "Zsym"}, {0x1F300, 0x1FA6D, "Zsym"}, length = 2}, + [0x20] = {{0x20000, 0x20FFF, "Hani"}, length = 1}, + [0x21] = {{0x21000, 0x21FFF, "Hani"}, length = 1}, + [0x22] = {{0x22000, 0x22FFF, "Hani"}, length = 1}, + [0x23] = {{0x23000, 0x23FFF, "Hani"}, length = 1}, + [0x24] = {{0x24000, 0x24FFF, "Hani"}, length = 1}, + [0x25] = {{0x25000, 0x25FFF, "Hani"}, length = 1}, + [0x26] = {{0x26000, 0x26FFF, "Hani"}, length = 1}, + [0x27] = {{0x27000, 0x27FFF, "Hani"}, length = 1}, + [0x28] = {{0x28000, 0x28FFF, "Hani"}, length = 1}, + [0x29] = {{0x29000, 0x29FFF, "Hani"}, length = 1}, + [0x2A] = {{0x2A000, 0x2AFFF, "Hani"}, length = 1}, + [0x2B] = {{0x2B000, 0x2BFFF, "Hani"}, length = 1}, + [0x2C] = {{0x2C000, 0x2CFFF, "Hani"}, length = 1}, + [0x2D] = {{0x2D000, 0x2DFFF, "Hani"}, length = 1}, + [0x2E] = {{0x2E000, 0x2EBE0, "Hani"}, length = 1}, + + individual = {[0x00462] = "Cyrl", [0x00463] = "Cyrl", [0x0098F] = "Beng", [0x00990] = "Beng", [0x009A1] = "Beng", [0x009A2] = "Beng", [0x009AF] = "Beng", [0x009B2] = "Beng", [0x009BC] = "Beng", [0x009C7] = "Beng", [0x009C8] = "Beng", [0x009D7] = "Beng", [0x02135] = "Zmth", [0x0FA0E] = "Hani", [0x0FA0F] = "Hani", [0x0FA11] = "Hani", [0x0FA13] = "Hani", [0x0FA14] = "Hani", [0x0FA1F] = "Hani", [0x0FA21] = "Hani", [0x0FA23] = "Hani", [0x0FA24] = "Hani", [0x1056F] = "Aghb", [0x16FE0] = "Tang", [0x16FE1] = "Nshu", [0x1B000] = "Kana"}, + + blocks = {{0x04, 0x09, "Hani"}, {0x0B, 0x0D, "Hang"}, {0x17, 0x18, "Tang"}, {0x20, 0x2E, "Hani"}} +} diff --git a/wiktra/wikt/translit/sdh-translit.lua b/wiktra/wikt/translit/sdh-translit.lua new file mode 100644 index 0000000..82a589b --- /dev/null +++ b/wiktra/wikt/translit/sdh-translit.lua @@ -0,0 +1,176 @@ +-- Authors: JavaScript ئاسۆ; Lua Ghybu, Calak +local export = {} + +local gsub = mw.ustring.gsub +local U = mw.ustring.char + +local mapping = { + ["ا"] = "a", + ["ب"] = "b", + ["چ"] = "ç", + ["ج"] = "c", + ["د"] = "d", + ["ە"] = "e", + ["ێ"] = "ê", + ["ف"] = "f", + ["گ"] = "g", + ["ھ"] = "h", + ["ه"] = "h", + ["ح"] = "ḧ", + ["ژ"] = "j", + ["ک"] = "k", + ["ڵ"] = "ll", + ["ل"] = "l", + ["م"] = "m", + ["ن"] = "n", + ["ۆ"] = "o", + ["پ"] = "p", + ["ق"] = "q", + ["ر"] = "r", + ["ڕ"] = "r", + ["س"] = "s", + ["ش"] = "ş", + ["ت"] = "t", + ["ۊ"] = "ü", + ["ڤ"] = "v", + ["خ"] = "x", + ["غ"] = "ẍ", + ["ز"] = "z", + ["ئ"] = "", + ["ع"] = "'", + + [U(0x200C)] = "", -- ZWNJ (zero-width non-joiner) + ["ـ"] = "", -- kashida, no sound + + -- numerals + ["١"] = "1", + ["٢"] = "2", + ["٣"] = "3", + ["٤"] = "4", + ["٥"] = "5", + ["٦"] = "6", + ["٧"] = "7", + ["٨"] = "8", + ["٩"] = "9", + ["٠"] = "0", + -- persian variants to numerals + ["۱"] = "1", + ["۲"] = "2", + ["۳"] = "3", + ["۴"] = "4", + ["۵"] = "5", + ["۶"] = "6", + ["۷"] = "7", + ["۸"] = "8", + ["۹"] = "9", + ["۰"] = "0" +} + +-- punctuation (leave on separate lines) +local punctuation = { + ["؟"] = "?", -- question mark + ["،"] = ",", -- comma + ["؛"] = ";", -- semicolon + ["«"] = "“", -- quotation mark + ["»"] = "”", -- quotation mark + ["٪"] = "%", -- percent + ["؉"] = "‰", -- per mille + ["٫"] = ".", -- decimals + ["٬"] = "," -- thousand +} + +-- translit +local function tr_word(word) + + word = gsub(word, ".", punctuation) + + -- Remove punctuation at the end of the word. + if mw.ustring.find(word, "[%.%!،؛»«٪؉٫٬%p]$") then + ponct = mw.ustring.sub(word, -1) + word = gsub(word, "[%.%!،؛»«٪؉٫٬%p]$", "") + else + word = word + ponct = "" + end + + word = gsub(word, "ه‌", "ە") -- correct unicode for letter ە + -- U+0647 (Arabic letter heh) + U+200C (zero-width non-joiner) → U+06D5 (Arabic letter ae) + + -- diacritics + word = gsub(word, "ْ", "i") -- U+0652, Arabic sukun + word = gsub(word, "ِ", "i") -- U+0650, Arabic kasra + + -- managing 'و' and 'ی' + word = gsub(word, "و([iاێۆۊە])", "w%1") -- و + vowel => w (e.g. wan) + word = gsub(word, "ی([iاێۆۊە])", "y%1") -- ی + vowel => y (e.g. yas) + word = gsub(word, "([iاێۆۊە])و", "%1w") -- vowel + و => w (e.g. kew) + word = gsub(word, "([iاێۆۊە])ی", "%1y") -- vowel + ی => y (e.g. bey) + word = gsub(word, "([iاێۆە])ۊ", "%1ẅ") -- vowel + و => ẅ (e.g. taẅ) + word = gsub(word, "([iاۆۊە])ێ", "%1ÿ") -- vowel + ێ => ÿ (e.g. şeÿtan) + word = gsub(word, "^و$", "û") -- non-letter + 'و' + non-letter => û (=and) + + word = gsub(word, "([^ء-يٱ-ەiwẅyÿ])و", "%1w") -- non-letter + 'و' => w (e.g. wetar) + word = gsub(word, "^و", "w") -- first 'و' => w (e.g. wetar) + word = gsub(word, "یو", "îw") -- 'ی' + 'و' => îw (e.g. mîwe) + word = gsub(word, "([^و])یی", "%1îy") -- 'ی' + 'ی' => îy (e.g. kanîy) + word = gsub(word, "وی", "uy") -- 'و' + 'ی' => uy (e.g. buyn) + word = gsub(word, "وو", "û") -- 'و' + 'و' => û (e.g. nû) + word = gsub(word, "ی", "î") + word = gsub(word, "و", "u") + word = gsub(word, "uu", "û") -- 'و' + 'و' => û (e.g. nû) + word = gsub(word, "([ء-يٱ-ەiîuûwẅyÿ])ڕ", "%1rr") -- when 'ڕ' not at the beginning of a word => rr + word = gsub(word, "([ء-يٱ-ەiîuûwẅyÿ])ئ", "%1'") -- when 'ئ' not at the beginning of a word => ' + + word = gsub(word, ".", mapping) + + -- insert i where applicable + word = gsub(word, "ll", "Ľ") -- temporary conversion to avoid seeing ll as 2 letters + word = gsub(word, "rr", "Ŕ") -- temporary conversion to avoid seeing rr as 2 letters + + word = gsub(word, "([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([fjlĽmnrŔsşvwẅxẍyÿz])([fjlĽmnrŔsşvwẅxẍyÿz])([^aeêiîouûüy])", "%1%2i%3%4") -- e.g. grft -> grift + word = gsub(word, "([aeêiîouûü])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])$", "%1%2%3i%4") -- e.g. cejnt -> cejnit + word = gsub(word, "([fjlĽrŔsşwyz])([fjlĽmnrŔsşvwẅxẍyÿz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])", "%1i%2%3") -- e.g. wrd -> wird + + word = gsub(word, "([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwẅxẍyÿz])([^aeêiîouûü])", "%1i%2%3") -- e.g. prd -> pird + word = gsub(word, "([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwẅxẍyÿz])$", "%1i%2") -- like above + + word = gsub(word, "([^aeêiîouûü])([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwẅxẍyÿz])([^aeêiîouûü])", "%1%2i%3%4") -- repeat the latter expression, in case skipped + word = gsub(word, "([^aeêiîouûü])([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwẅxẍyÿz])$", "%1%2i%3") -- repeat the latter expression, in case skipped + + word = gsub(word, "^([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([^aeêiîouûü])", "%1i%2%3") -- e.g. ktk -> kitk + word = gsub(word, "^([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])$", "%1i%2") -- e.g. ktk -> kitk + word = gsub(word, "([^aeêiîouüy])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([^aeêiîouûü])", "%1%2i%3%4") -- e.g. ktk -> kitk + word = gsub(word, "([^aeêiîouüy])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])$", "%1%2i%3") -- e.g. ktk -> kitk + + word = gsub(word, "([^a-zçşêîûüĽŔ])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])$", "%1%2i") -- e.g. j -> ji + word = gsub(word, "^([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])$", "%1i") -- e.g. j -> ji + + -- word = gsub(word, '([^a-zêîûçş0-9\'’])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvxẍz])', "%1%2i%3") --e.g. bra -> bira + -- word = gsub(word, '^([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvxẍz])', "%1i%2") --e.g. bra -> bira + + -- word = gsub(word, '([bcçdfghḧjklmnpqrsştvwẅxẍz][bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])([bcçdfghḧjklĽmnpqrŔsştvwẅxẍz])', "%1i%2") --e.g. aşkra -> aşkira + + -- word = gsub(word, 'si([tp][aeêiîouû])', "s%1") -- sp, st cluster + + word = gsub(word, "Ľ", "ll") -- revert the temporary conversion + word = gsub(word, "Ŕ", "rr") -- revert the temporary conversion + + -- Add the punctuation who had previously deleted. + word = word .. ponct + + return word +end + +function export.tr(text, lang, sc) + local textTab = {} + + -- Create a word table separated by a space (%s). + for _, word in ipairs(mw.text.split(text, "%s+")) do table.insert(textTab, word) end + + -- Tablo of translit. + for key, word in ipairs(textTab) do textTab[key] = tr_word(word) end + + return table.concat(textTab, " ") +end + +return export diff --git a/wiktra/wikt/translit/senseid.lua b/wiktra/wikt/translit/senseid.lua new file mode 100644 index 0000000..ede3ca7 --- /dev/null +++ b/wiktra/wikt/translit/senseid.lua @@ -0,0 +1,123 @@ +local export = {} + +-- Extract a Wikidata id from a sense id +local function Wikidata_id(id) return mw.ustring.match(id, "Q%d+") end + +local function Wikidata_entity_is_any_of(id, ids) + if not mw.wikibase then return false end + -- P31: instance of + for _, val in ipairs(mw.wikibase.getBestStatements(id, "P31")) do for _, sid in ipairs(ids) do if val["mainsnak"]["datavalue"] and val["mainsnak"]["datavalue"]["type"] == "wikibase-entityid" and val["mainsnak"]["datavalue"]["value"]["id"] == sid then return true end end end + + return false +end + +local function Wikidata_entity_continents(id) + if not mw.wikibase then return nil end + local ret = {} + + local continent_names = {["Q15"] = "Africa", ["Q18"] = "South America", ["Q46"] = "Europe", ["Q48"] = "Asia", ["Q49"] = "North America", ["Q538"] = "Oceania", ["Q27611"] = "North America"} + + -- P30: continent + for _, val in ipairs(mw.wikibase.getBestStatements(id, "P30")) do if val["mainsnak"]["datavalue"]["type"] == "wikibase-entityid" then for cid, name in pairs(continent_names) do if val["mainsnak"]["datavalue"]["value"]["id"] == cid then table.insert(ret, name) end end end end + + if #ret > 0 then + return ret + else + return nil + end +end + +function export.anchor(lang, id) + if not (type(id) == "string" or type(id) == "number") then error("The second argument to senseid should be a string or a number.") end + + return lang:getCanonicalName() .. ":_" .. mw.uri.encode(id, "WIKI") +end + +function senseid_wikidata_track(lang, id) + if Wikidata_id(id) then + --[=[ + [[Special:WhatLinksHere/Template:tracking/senseid/Wikidata]] + ]=] + require("debug").track("senseid/Wikidata") + + local tracking = {} + + -- Q30014: outer planet of the Solar System + -- Q3504248: inner planet of the Solar System + -- Q17362350: planet of the Solar System + if Wikidata_entity_is_any_of(id, {"Q30014", "Q3504248", "Q17362350"}) then table.insert(tracking, "senseid/Wikidata/planet") end + + -- Q2199: dwarf planet + if Wikidata_entity_is_any_of(id, {"Q2199"}) then table.insert(tracking, "senseid/Wikidata/dwarf planet") end + + -- Q5107: continent + -- Q55833: supercontinent + if Wikidata_entity_is_any_of(id, {"Q5107", "Q55833"}) then table.insert(tracking, "senseid/Wikidata/continent") end + + -- Q6256: country + -- Q3024240: former country + if Wikidata_entity_is_any_of(id, {"Q6256", "Q3024240"}) then + table.insert(tracking, "senseid/Wikidata/country") + + local continents = Wikidata_entity_continents(id) + + if continents then for _, name in ipairs(continents) do table.insert(tracking, "senseid/Wikidata/country/" .. name) end end + end + + -- Q82794: geographic region + if Wikidata_entity_is_any_of(id, {"Q82794"}) then table.insert(tracking, "senseid/Wikidata/region") end + + -- Q23442: island + if Wikidata_entity_is_any_of(id, {"Q23442"}) then table.insert(tracking, "senseid/Wikidata/island") end + + -- Q4022: river + if Wikidata_entity_is_any_of(id, {"Q4022"}) then table.insert(tracking, "senseid/Wikidata/river") end + + -- Q515: city + if Wikidata_entity_is_any_of(id, {"Q515"}) then table.insert(tracking, "senseid/Wikidata/city") end + + -- Q34770: language + -- Q33742: natural language + -- Q33215: constructed language + if Wikidata_entity_is_any_of(id, {"Q34770", "Q33742", "Q33215"}) then table.insert(tracking, "senseid/Wikidata/language") end + + -- Q9779: alphabet + -- Q335806: abugida + if Wikidata_entity_is_any_of(id, {"Q9779", "Q335806"}) then table.insert(tracking, "senseid/Wikidata/writing system") end + + -- Q11344: chemical element + if Wikidata_entity_is_any_of(id, {"Q11344"}) then table.insert(tracking, "senseid/Wikidata/chemical element") end + + -- Q16521: taxon + if Wikidata_entity_is_any_of(id, {"Q16521"}) then table.insert(tracking, "senseid/Wikidata/taxon") end + + -- Q9415: emotion + if Wikidata_entity_is_any_of(id, {"Q9415"}) then table.insert(tracking, "senseid/Wikidata/emotion") end + + -- Q11688446: Roman deity + if Wikidata_entity_is_any_of(id, {"Q11688446"}) then table.insert(tracking, "senseid/Wikidata/Roman deity") end + + if #tracking > 0 then + require("debug").track(tracking) + else + require("debug").track("senseid/Wikidata/nothing") + end + end +end + +function export.senseid(lang, id, tag_name) + -- Track which entries use Wikidata ids + senseid_wikidata_track(lang, id) + -- The following tag is opened but never closed, where is it supposed to be closed? + -- with
  • it doesn't matter, as it is closed automatically. + -- with

    it is a problem + return "<" .. tag_name .. " class=\"senseid\" id=\"" .. export.anchor(lang, id) .. "\">" +end + +function export.etymid(lang, id) + -- Track which entries use Wikidata ids + senseid_wikidata_track(lang, id) + return "" +end + +return export diff --git a/wiktra/wikt/translit/sgh-translit.lua b/wiktra/wikt/translit/sgh-translit.lua new file mode 100644 index 0000000..d6eab1c --- /dev/null +++ b/wiktra/wikt/translit/sgh-translit.lua @@ -0,0 +1,17 @@ +local export = {} + +local tt = {["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "ē", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["ӣ"] = "ī", ["й"] = "y", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "ō", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ӯ"] = "ū", ["ф"] = "f", ["х"] = "x", ["ц"] = "c", ["ӡ"] = "ʒ", ["ч"] = "č", ["ш"] = "š", ["ғ"] = "ɣ", ["қ"] = "q", ["ҳ"] = "h", ["ҷ"] = "ǰ", ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "Ē", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Ӣ"] = "Ī", ["Й"] = "Y", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "Ō", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ӯ"] = "Ū", ["Ф"] = "F", ["Х"] = "X", ["Ц"] = "C", ["Ӡ"] = "Ʒ", ["Ч"] = "Č", ["Ш"] = "Š", ["Ғ"] = "Ɣ", ["Қ"] = "Q", ["Ҳ"] = "H", ["Ҷ"] = "J̌"}; + +local mapping = {["в̌"] = "w", ["д̌"] = "δ", ["т̌"] = "θ", ["е̂"] = "ê", ["е̣"] = "ẹ", ["о̣"] = "ọ", ["у̊"] = "ū̊", ["г̌"] = "ɣ̌", ["В̌"] = "W", ["Д̌"] = "Δ", ["Т̌"] = "Θ", ["Е̂"] = "Ê", ["Е̣"] = "Ẹ", ["О̣"] = "Ọ", ["У̊"] = "Ū̊", ["Г̌"] = "Ɣ̌"} + +function export.tr(text, lang, sc) + if sc == "Latn" then return nil end + + for char, translit in pairs(mapping) do text = mw.ustring.gsub(text, char, translit) end + + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sh-translit.lua b/wiktra/wikt/translit/sh-translit.lua new file mode 100644 index 0000000..bf9b368 --- /dev/null +++ b/wiktra/wikt/translit/sh-translit.lua @@ -0,0 +1,224 @@ +local export = {} + +local tt = {} +tt["Cyrl"] = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "V", + ["в"] = "v", + ["Г"] = "G", + ["г"] = "g", + ["Д"] = "D", + ["д"] = "d", + ["Ђ"] = "Đ", + ["ђ"] = "đ", + ["Е"] = "E", + ["е"] = "e", + ["Ж"] = "Ž", + ["ж"] = "ž", + ["З"] = "Z", + ["з"] = "z", + ["И"] = "I", + ["и"] = "i", + ["Ј"] = "J", + ["ј"] = "j", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["Љ"] = "Lj", + ["љ"] = "lj", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["Њ"] = "Nj", + ["њ"] = "nj", + ["О"] = "O", + ["о"] = "o", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "T", + ["т"] = "t", + ["Ћ"] = "Ć", + ["ћ"] = "ć", + ["У"] = "U", + ["у"] = "u", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "H", + ["х"] = "h", + ["Ц"] = "C", + ["ц"] = "c", + ["Ч"] = "Č", + ["ч"] = "č", + ["Џ"] = "Dž", + ["џ"] = "dž", + ["Ш"] = "Š", + ["ш"] = "š", + + -- letters with diacritics + ["Ѐ"] = "È", + ["ѐ"] = "è", + ["Ѝ"] = "Ì", + ["ѝ"] = "ì", + ["Ӣ"] = "Ī", + ["ӣ"] = "ī", + ["Ӯ"] = "Ū", + ["ӯ"] = "ū", + + -- proposed Montenegrin letters + ["Ć"] = "Ś", + ["ć"] = "ś" +}; + +tt["Latn"] = { + -- Digraphs + ["Lj"] = "Љ", + ["lj"] = "љ", + ["Nj"] = "Њ", + ["nj"] = "њ", + ["Dž"] = "Џ", + ["dž"] = "џ", + + ["A"] = "А", + ["a"] = "а", + ["B"] = "Б", + ["b"] = "б", + ["V"] = "В", + ["v"] = "в", + ["G"] = "Г", + ["g"] = "г", + ["D"] = "Д", + ["d"] = "д", + ["Đ"] = "Ђ", + ["đ"] = "ђ", + ["E"] = "Е", + ["e"] = "е", + ["Ž"] = "Ж", + ["ž"] = "ж", + ["Z"] = "З", + ["z"] = "з", + ["I"] = "И", + ["i"] = "и", + ["J"] = "Ј", + ["j"] = "ј", + ["K"] = "К", + ["k"] = "к", + ["L"] = "Л", + ["l"] = "л", + ["M"] = "М", + ["m"] = "м", + ["N"] = "Н", + ["n"] = "н", + ["O"] = "О", + ["o"] = "о", + ["P"] = "П", + ["p"] = "п", + ["R"] = "Р", + ["r"] = "р", + ["S"] = "С", + ["s"] = "с", + ["T"] = "Т", + ["t"] = "т", + ["Ć"] = "Ћ", + ["ć"] = "ћ", + ["U"] = "У", + ["u"] = "у", + ["F"] = "Ф", + ["f"] = "ф", + ["H"] = "Х", + ["h"] = "х", + ["C"] = "Ц", + ["c"] = "ц", + ["Č"] = "Ч", + ["č"] = "ч", + ["Š"] = "Ш", + ["š"] = "ш", + + -- letters with diacritics + ["È"] = "Ѐ", + ["è"] = "ѐ", + ["Ì"] = "Ѝ", + ["ì"] = "ѝ", + ["Ī"] = "Ӣ", + ["ī"] = "ӣ", + ["Ū"] = "Ӯ", + ["ū"] = "ӯ", + + ["Á"] = "А́", + ["á"] = "а́", + ["À"] = "А̀", + ["à"] = "а̀", + ["Ā"] = "А̄", + ["ā"] = "а̄", + ["Ȁ"] = "А̏", + ["ȁ"] = "а̏", + ["Ȃ"] = "А̑", + ["ȃ"] = "а̑", + + ["É"] = "Е́", + ["é"] = "е́", + ["Ē"] = "Е̄", + ["ē"] = "е̄", + ["Ȅ"] = "Е̏", + ["ȅ"] = "е̏", + ["Ȇ"] = "Е̑", + ["ȇ"] = "е̑", + + ["Í"] = "И́", + ["í"] = "и́", + ["Ȉ"] = "И̏", + ["ȉ"] = "и̏", + ["Ȋ"] = "И̑", + ["ȋ"] = "и̑", + + ["Ó"] = "О́", + ["ó"] = "о́", + ["Ò"] = "О̀", + ["ò"] = "о̀", + ["Ō"] = "О̄", + ["ō"] = "о̄", + ["Ȍ"] = "О̏", + ["ȍ"] = "о̏", + ["Ȏ"] = "О̑", + ["ȏ"] = "о̑", + + ["Ŕ"] = "Р́", + ["ŕ"] = "р́", + ["Ȑ"] = "Р̏", + ["ȑ"] = "р̏", + ["Ȓ"] = "Р̑", + ["ȓ"] = "р̑", + + ["Ú"] = "У́", + ["ú"] = "у́", + ["Ù"] = "У̀", + ["ù"] = "у̀", + ["Ȕ"] = "У̏", + ["ȕ"] = "у̏", + ["Ȗ"] = "У̑", + ["ȗ"] = "у̑", + + -- proposed Montenegrin letters + ["Ź"] = "З́", + ["ź"] = "з́", + ["Ś"] = "Ć", + ["ś"] = "ć", + + -- backtick needs to be removed so that "nad`živeti" returns "надживети" + ["`"] = "" +}; + +function export.tr(text, lang, sc) + if (sc == "Latn") then text = mw.ustring.gsub(text, "[dDnNlL][jž]", tt[sc]) end + return mw.ustring.toNFC(mw.ustring.gsub(text, ".", tt[sc])) +end + +return export diff --git a/wiktra/wikt/translit/shn-translit.lua b/wiktra/wikt/translit/shn-translit.lua new file mode 100644 index 0000000..99b95ca --- /dev/null +++ b/wiktra/wikt/translit/shn-translit.lua @@ -0,0 +1,39 @@ +local export = {} +local u = mw.ustring.char +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local initial_table = {["ၵ"] = "k", ["ၶ"] = "kh", ["ၷ"] = "g", ["ꧠ"] = "gh", ["င"] = "ng", ["ၸ"] = "ts", ["ꧡ"] = "tsh", ["ၹ"] = "z", ["ꧢ"] = "zh", ["ၺ"] = "ny", ["ꩦ"] = "tt", ["ꩧ"] = "tth", ["ꩨ"] = "dd", ["ꩩ"] = "ddh", ["ꧣ"] = "nn", ["တ"] = "t", ["ထ"] = "th", ["ၻ"] = "d", ["ꩪ"] = "dh", ["ၼ"] = "n", ["ပ"] = "p", ["ၽ"] = "ph", ["ၾ"] = "f", ["ၿ"] = "b", ["ꧤ"] = "bh", ["မ"] = "m", ["ယ"] = "y", ["ရ"] = "r", ["လ"] = "l", ["ဝ"] = "w", ["ႀ"] = "x", ["သ"] = "s", ["ႁ"] = "h", ["ꩮ"] = "ll", ["ဢ"] = "ʼ"} + +local glide_table = {["ျ"] = "j", ["ြ"] = "r", ["ႂ"] = "w", [""] = ""} + +local vowel_table = {[""] = "a", ["ၢ"] = "aa", ["ိ"] = "i", ["ဵ"] = "e", ["ႅ"] = "ae", ["ု"] = "u", ["ူ"] = "uu", ["ွ"] = "oa", ["ို"] = "ue", ["ိူ"] = "oe", ["ႃ"] = "aa", ["ီ"] = "ii", ["ေ"] = "ee", ["ႄ"] = "aae", ["ူဝ်"] = "o", ["ေႃ"] = "oa", ["ိုဝ်"] = "ue", ["ိူဝ်"] = "oe", ["ႆ"] = "ay", ["ႆၢ"] = "aay", ["ၢႆ"] = "aay", ["ုၺ်"] = "uy", ["ူၺ်"] = "oy", ["ွႆ"] = "oay", ["ိုၺ်"] = "uey", ["ိူၺ်"] = "oey", ["ဝ်"] = "aw", ["ၢဝ်"] = "aaw", ["ိဝ်"] = "iw", ["ဵဝ်"] = "ew", ["ႅဝ်"] = "aew", ["ႂ်"] = "aue"} + +local coda_table = {["မ်"] = "m", ["ၼ်"] = "n", ["င်"] = "ng", ["ပ်"] = "p", ["တ်"] = "t", ["ၵ်"] = "k", [""] = ""} + +local tone_table = {[""] = u(0x030C), ["ႇ"] = u(0x0300), ["ႈ"] = u(0x0304), ["း"] = u(0x0301), ["ႉ"] = u(0x0302) .. u(0x0330), ["ႊ"] = u(0x1DC8)} + +local digits = {["႐"] = "0", ["႑"] = "1", ["႒"] = "2", ["႓"] = "3", ["႔"] = "4", ["႕"] = "5", ["႖"] = "6", ["႗"] = "7", ["႘"] = "8", ["႙"] = "9", ["၀"] = "0", ["၁"] = "1", ["၂"] = "2", ["၃"] = "3", ["၄"] = "4", ["၅"] = "5", ["၆"] = "6", ["၇"] = "7", ["၈"] = "8", ["၉"] = "9"} + +local syllable_pattern = "^([ၵၶၷꧠငၸꧡၹꧢၺꩦꩧꩨꩩꧣတထၻꩪၼပၽၾၿꧤမယရလဝႀသႁꩮဢ])" .. "([ျြႂ]?)" .. "([ဝွႂႃိီုူေႄဵႅၢႆ်]*)" .. "([မၼငၺပတၵ]?်?)" .. "([ႇႈးႉႊ]?)$" + +local repeat_syllabify = "([^ ])([ၵၶၷꧠငၸꧡၹꧢၺꩦꩧꩨꩩꧣတထၻꩪၼပၽၾၿꧤမယရလဝႀသႁꩮဢ][^်])" + +function export.tr(text, lang, sc) + text = gsub(text, ".", digits) + while match(text, repeat_syllabify) do text = gsub(text, repeat_syllabify, "%1 %2") end + for old in mw.text.gsplit(text, " ") do + new = gsub(old, syllable_pattern, function(initial, glide, vowel, coda, tone) + local untoned = initial_table[initial] .. (vowel_table[glide .. vowel .. coda] or glide_table[glide] .. (vowel_table[vowel .. coda] or (vowel_table[vowel] or vowel) .. (coda_table[coda] or coda))) + return gsub(untoned, "([aeiou])", "%1" .. tone_table[tone], 1) + end) + text = gsub(text, old, new, 1) + end + if not match(text, "[က-႟ꩠ-ꩿꧠ-ꧾ]") then + return text + else + return nil + end +end + +return export diff --git a/wiktra/wikt/translit/si-translit.lua b/wiktra/wikt/translit/si-translit.lua new file mode 100644 index 0000000..af804bd --- /dev/null +++ b/wiktra/wikt/translit/si-translit.lua @@ -0,0 +1,60 @@ +local gsub = mw.ustring.gsub +local export = {} + +local consonants = {["ක"] = "k", ["ඛ"] = "kh", ["ග"] = "g", ["ඝ"] = "gh", ["ඞ"] = "ṅ", ["ඟ"] = "ⁿg", ["ච"] = "c", ["ඡ"] = "ch", ["ජ"] = "j", ["ඣ"] = "jh", ["ඤ"] = "ñ", ["ඥ"] = "gn", ["ඦ"] = "ⁿj", ["ට"] = "ṭ", ["ඨ"] = "ṭh", ["ඩ"] = "ḍ", ["ඪ"] = "ḍh", ["ණ"] = "ṇ", ["ඬ"] = "ⁿḍ", ["ත"] = "t", ["ථ"] = "th", ["ද"] = "d", ["ධ"] = "dh", ["න"] = "n", ["ඳ"] = "ⁿd", ["ප"] = "p", ["ෆ"] = "f", ["ඵ"] = "ph", ["බ"] = "b", ["භ"] = "bh", ["ම"] = "m", ["ඹ"] = "ᵐb", ["ය"] = "y", ["ර"] = "r", ["ල"] = "l", ["ව"] = "w", ["ශ"] = "ś", ["ෂ"] = "ṣ", ["ස"] = "s", ["හ"] = "h", ["ළ"] = "ḷ", ["ෆ"] = "f"} + +local diacritics = {["ා"] = "ā", ["ැ"] = "æ", ["ෑ"] = "ǣ", ["ි"] = "i", ["ී"] = "ī", ["ු"] = "u", ["ූ"] = "ū", ["ෙ"] = "e", ["ේ"] = "ē", ["ෛ"] = "ai", ["ො"] = "o", ["ෝ"] = "ō", ["ෞ"] = "au", ["ෘ"] = "r̥", ["ෟ"] = "l̥", ["ෲ"] = "r̥̄", ["ෳ"] = "l̥̄", ["්"] = ""} +local tt = { + -- vowels + ["අ"] = "a", + ["ආ"] = "ā", + ["ඇ"] = "æ", + ["ඈ"] = "ǣ", + ["ඉ"] = "i", + ["ඊ"] = "ī", + ["උ"] = "u", + ["ඌ"] = "ū", + ["එ"] = "e", + ["ඒ"] = "ē", + ["ඓ"] = "ai", + ["ඔ"] = "o", + ["ඕ"] = "ō", + ["ඖ"] = "au", + ["ඍ"] = "r̥", + ["ඎ"] = "r̥̄", + ["ඏ"] = "l̥", + ["ඐ"] = "l̥̄", + -- other symbols + ["ං"] = "ṁ", -- anusvara + ["ඃ"] = "ḥ", -- visarga + ["්"] = "", -- hal kirīma, suppresses the inherent vowel "a" + -- punctuation + ["෴"] = "." -- kunddaliya (obsolete) +} + +-- translit any words or phrases +function export.tr(text, lang, sc) + local u = mw.ustring.char + if type(text) == "table" then + text = text.args[1] + lang = text.args[2] + sc = text.args[3] + end + text = mw.ustring.gsub(text, -- Handle conjunct and touching clusters. + "[" .. u(0x200d, 0x0dca) .. "][" .. u(0x200d, 0x0dca) .. "]", {[u(0x200d, 0x0dca)] = u(0x0dca), [u(0x0dca, 0x200d)] = u(0x0dca)}) + text = mw.ustring.gsub(text, "([කඛගඝඞඟචඡජඣඤඥඦටඨඩඪණඬතථදධනඳපපඵබභමඹයරලවශෂසහළෆ])" .. "([\224\183\153\224\183\146\224\183\156\224\183\148\224\183\144\224\183\146\224\183\143\224\183\154\224\183\157\224\183\150\224\183\145\224\183\147\224\183\152\224\183\159\224\183\178\224\183\179\224\183\155\224\183\158\224\183\138]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = gsub(text, ".", tt) + if (lang == "pi" or lang == "sa") then -- Convert to IAST. + text = gsub(text, "." .. mw.ustring.char(0x325) .. "?" .. mw.ustring.char(0x304) .. "?", {["ṁ"] = "ṃ", ["w"] = "v", ["r̥"] = "ṛ", ["r̥̄"] = "ṝ", ["l̥"] = "ḷ", ["l̥̄"] = "ḹ"}) + end + return text +end + +return export diff --git a/wiktra/wikt/translit/sjd-translit.lua b/wiktra/wikt/translit/sjd-translit.lua new file mode 100644 index 0000000..7bfc769 --- /dev/null +++ b/wiktra/wikt/translit/sjd-translit.lua @@ -0,0 +1,118 @@ +local export = {} + +local tt = { + ["А"] = "A", + ["а"] = "a", + ["Ӓ"] = "ʹa", + ["ӓ"] = "ʹa", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "V", + ["в"] = "v", + ["Г"] = "G", + ["г"] = "g", + ["Д"] = "D", + ["д"] = "d", + ["Е"] = "Je", + ["е"] = "je", + ["Ё"] = "Jo", + ["ё"] = "jo", + ["Ж"] = "Ž", + ["ж"] = "ž", + ["З"] = "Z", + ["з"] = "z", + ["Һ"] = "H", + ["һ"] = "h", + ["ʼ"] = "h", + ["И"] = "I", + ["и"] = "i", + ["Ӣ"] = "Ī", + ["ӣ"] = "ī", + ["Й"] = "J", + ["й"] = "j", + ["Ј"] = "J̥", + ["ј"] = "j̥", + ["Ҋ"] = "J̥", + ["ҋ"] = "j̥", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["Ӆ"] = "L̥", + ["ӆ"] = "l̥", + ["М"] = "M", + ["м"] = "m", + ["Ӎ"] = "M̥", + ["ӎ"] = "m̥", + ["Н"] = "N", + ["н"] = "n", + ["Ӊ"] = "N̥", + ["ӊ"] = "n̥", + ["Ӈ"] = "Ŋ", + ["ӈ"] = "ŋ", + ["О"] = "O", + ["о"] = "o", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["Ҏ"] = "R̥", + ["ҏ"] = "r̥", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ӯ"] = "Ū", + ["ӯ"] = "ū", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "X", + ["х"] = "x", + ["Ц"] = "C", + ["ц"] = "c", + ["Ч"] = "Č", + ["ч"] = "č", + ["Ш"] = "Š", + ["ш"] = "š", + ["Щ"] = "Šč", + ["щ"] = "šč", + ["Ы"] = "Ɨ", + ["ы"] = "ɨ", + ["Ъ"] = "", + ["ъ"] = "", + ["Ь"] = "ʹ", + ["ь"] = "ʹ", + ["Ҍ"] = "ʹ", + ["ҍ"] = "ʹ", + ["Э"] = "E", + ["э"] = "e", + ["Ӭ"] = "ʹE", + ["ӭ"] = "ʹe", + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Я"] = "Ja", + ["я"] = "ja" +} + +local vowel = "аӓеёиӣоуӯыэӭюяАӒЕЁИӢОУӮЫЭӬЮЯ" + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])е", "%1ьэ") + text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])ё", "%1ьо") + text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])ю", "%1ьу") + text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])я", "%1ьа") + + text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])Е", "%1ЬЭ") + text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])Ё", "%1ЬО") + text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])Ю", "%1ЬУ") + text = mw.ustring.gsub(text, "([^" .. vowel .. "НнЪъЬьҌҍ])Я", "%1ЬА") + + text = mw.ustring.gsub(text, "([нН])ь", "%1й") + text = mw.ustring.gsub(text, "НЬ", "НЙ") + + return (mw.ustring.gsub(text, ".", tt)) +end + +return export diff --git a/wiktra/wikt/translit/slr-translit.lua b/wiktra/wikt/translit/slr-translit.lua new file mode 100644 index 0000000..7857423 --- /dev/null +++ b/wiktra/wikt/translit/slr-translit.lua @@ -0,0 +1,61 @@ +local export = {} + +local data = {} + +data["slr-Arab"] = { + -- consonants + ["م"] = "m", + ["ن"] = "n", + ["د"] = "d", + ["ت"] = "t", + ["ب"] = "b", + ["پ"] = "p", + ["ف"] = "f", + ["ق"] = "q", + ["ك"] = "k", + ["ڭ"] = "ñ", + ["گ"] = "g", + ["غ"] = "ġ", + ["ع"] = "ğ", + ["ھ"] = "h", + ["خ"] = "x", + ["چ"] = "č", + ["ج"] = "ǧ", + ["ژ"] = "ž", + ["ز"] = "z", + ["س"] = "s", + ["ش"] = "š", + ["ر"] = "r", + ["ل"] = "l", + ["وْ"] = "u", + ["ي"] = "y", + ["ۋ"] = "w", + -- vowels + ["ا"] = "a", + ["ە"] = "e", + ["ې"] = "ë", + ["ى"] = "i", + ["و"] = "o", + ["ۆ"] = "ö", + ["ۇ"] = "u", + ["ۈ"] = "ü", + -- punctuation + ["؟"] = "?", + ["،"] = ",", + ["؛"] = ";" +} + +function export.tr(text, lang, sc) + if not data[sc] then return nil end + + -- remove initial hamza + text = mw.ustring.gsub(text, "^\216\166(.)", "%1") + text = mw.ustring.gsub(text, "%s\216\166(.)", " %1") + + -- transliterate letters one to one + text = mw.ustring.gsub(text, ".", data[sc]) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sog-translit.lua b/wiktra/wikt/translit/sog-translit.lua new file mode 100644 index 0000000..8fdf614 --- /dev/null +++ b/wiktra/wikt/translit/sog-translit.lua @@ -0,0 +1,14 @@ +local export = {} + +function export.tr(text, lang, sc) + if not sc then sc = require("scripts").findBestScript(text, require("languages").getByCode(lang)):getCode() end + if sc == "Sogd" then -- transliterate Sogdian + return require("Sogd-translit").tr(text, lang, sc) + elseif sc == "Mani" then -- transliterate Manichaean + return require("Mani-translit").tr(text, lang, sc) + else + return nil + end +end + +return export diff --git a/wiktra/wikt/translit/string utilities.lua b/wiktra/wikt/translit/string utilities.lua new file mode 100644 index 0000000..ecb9a27 --- /dev/null +++ b/wiktra/wikt/translit/string utilities.lua @@ -0,0 +1,200 @@ +local module_name = "string_utilities" +local export = {} + +local rfind = mw.ustring.find + +local format_escapes = {["op"] = "{", ["cl"] = "}"} + +function export.format(str, tbl) + return (string.gsub(str, "{(\\?)((\\?)[^{}]*)}", function(p1, name, p2) + if #p1 + #p2 == 1 then + return format_escapes[name] or error(module_name .. ".format: unrecognized escape sequence '{\\" .. name .. "}'") + else + if tbl[name] and type(tbl[name]) ~= "string" then error(module_name .. ".format: '" .. name .. "' is a " .. type(tbl[name]) .. ", not a string") end + return tbl[name] or error(module_name .. ".format: '" .. name .. "' not found in table") + end + end)) +end + +-- Reimplementation of mw.ustring.split() that includes any capturing +-- groups in the splitting pattern. This works like Python's re.split() +-- function, except that it has Lua's behavior when the split pattern +-- is empty (i.e. advancing by one character at a time; Python returns the +-- whole remainder of the string). +function export.capturing_split(str, pattern) + local ret = {} + -- (.-) corresponds to (.*?) in Python or Perl; () captures the + -- current position after matching. + pattern = "(.-)" .. pattern .. "()" + local start = 1 + while true do + -- Did we reach the end of the string? + if start > #str then + table.insert(ret, "") + return ret + end + -- match() returns all captures as multiple return values; + -- we need to insert into a table to get them all. + local captures = {mw.ustring.match(str, pattern, start)} + -- If no match, add the remainder of the string. + if #captures == 0 then + table.insert(ret, mw.ustring.sub(str, start)) + return ret + end + local newstart = table.remove(captures) + -- Special case: If we don't advance by any characters, then advance + -- by one character; this avoids an infinite loop, and makes splitting + -- by an empty string work the way mw.ustring.split() does. If we + -- reach the end of the string this way, return immediately, so we + -- don't get a final empty string. + if newstart == start then + table.insert(ret, mw.ustring.sub(str, start, start)) + table.remove(captures, 1) + start = start + 1 + if start > #str then return ret end + else + table.insert(ret, table.remove(captures, 1)) + start = newstart + end + -- Insert any captures from the splitting pattern. + for _, x in ipairs(captures) do table.insert(ret, x) end + end +end + +local function uclcfirst(text, dolower) + local function douclcfirst(text) + -- Actual function to re-case of the first letter. + local first_letter = mw.ustring.sub(text, 1, 1) + first_letter = dolower and mw.ustring.lower(first_letter) or mw.ustring.upper(first_letter) + return first_letter .. mw.ustring.sub(text, 2) + end + -- If there's a link at the beginning, re-case the first letter of the + -- link text. This pattern matches both piped and unpiped links. + -- If the link is not piped, the second capture (linktext) will be empty. + local link, linktext, remainder = mw.ustring.match(text, "^%[%[([^|%]]+)%|?(.-)%]%](.*)$") + if link then return "[[" .. link .. "|" .. douclcfirst(linktext ~= "" and linktext or link) .. "]]" .. remainder end + return douclcfirst(text) +end + +function export.ucfirst(text) return uclcfirst(text, false) end + +function export.lcfirst(text) return uclcfirst(text, true) end + +function export.pluralize(text) + if type(text) == "table" then + -- allow calling from a template + text = text.args[1] + end + -- Pluralize a word in a smart fashion, according to normal English rules. + -- 1. If word ends in consonant + -y, replace the -y with -ies. + -- 2. If the word ends in -s, -x, -z, -sh, -ch, add -es. + -- 3. Otherwise, add -s. + -- This handles links correctly: + -- 1. If a piped link, change the second part appropriately. + -- 2. If a non-piped link and rule #1 above applies, convert to a piped link + -- with the second part containing the plural. + -- 3. If a non-piped link and rules #2 or #3 above apply, add the plural + -- outside the link. + + local function word_ends_in_consonant_plus_y(text) + -- FIXME, a subrule of rule #1 above says the -ies ending doesn't + -- apply to proper nouns, hence "the Gettys", "the public Ivys". + -- We should maybe consider applying this rule here; but it may not + -- be important as this function is almost always called on common nouns + -- (e.g. parts of speech, place types). + return text:find("[^aeiouAEIOU ]y$") + end + + local function word_takes_es_plural(text) return text:find("[sxz]$") or text:find("[cs]h$") end + + local function do_pluralize(text) + if word_ends_in_consonant_plus_y(text) then + -- avoid returning multiple values + local hack_single_retval = text:gsub("y$", "ies") + return hack_single_retval + elseif word_takes_es_plural(text) then + return text .. "es" + else + return text .. "s" + end + end + + -- Check for a link. This pattern matches both piped and unpiped links. + -- If the link is not piped, the second capture (linktext) will be empty. + local beginning, link, linktext = mw.ustring.match(text, "^(.*)%[%[([^|%]]+)%|?(.-)%]%]$") + if link then + if linktext ~= "" then return beginning .. "[[" .. link .. "|" .. do_pluralize(linktext) .. "]]" end + if word_ends_in_consonant_plus_y(link) then return beginning .. "[[" .. link .. "|" .. link:gsub("y$", "ies") .. "]]" end + return beginning .. "[[" .. link .. "]]" .. (word_takes_es_plural(link) and "es" or "s") + end + return do_pluralize(text) +end + +function export.singularize(text) + if type(text) == "table" then + -- allow calling from a template + text = text.args[1] + end + -- Singularize a word in a smart fashion, according to normal English rules. + -- Works analogously to pluralize(). + -- NOTE: This doesn't always work as well as pluralize(). Beware. It will + -- mishandle cases like "passes" -> "passe", "eyries" -> "eyry". + -- 1. If word ends in -ies, replace -ies with -y. + -- 2. If the word ends in -xes, -shes, -ches, remove -es. [Does not affect + -- -ses, cf. "houses", "impasses".] + -- 3. Otherwise, remove -s. + -- This handles links correctly: + -- 1. If a piped link, change the second part appropriately. Collapse the + -- link to a simple link if both parts end up the same. + -- 2. If a non-piped link, singularize the link. + -- 3. A link like "[[parish]]es" will be handled correctly because the + -- code that checks for -shes etc. allows ] characters between the + -- 'sh' etc. and final -es. + local function do_singularize(text) + local sing = text:match("^(.-)ies$") + if sing then return sing .. "y" end + -- Handle cases like "[[parish]]es" + local sing = text:match("^(.-[sc]h%]*)es$") + if sing then return sing end + -- Handle cases like "[[box]]es" + local sing = text:match("^(.-x%]*)es$") + if sing then return sing end + local sing = text:match("^(.-)s$") + if sing then return sing end + return text + end + + local function collapse_link(link, linktext) + if link == linktext then + return "[[" .. link .. "]]" + else + return "[[" .. link .. "|" .. linktext .. "]]" + end + end + + -- Check for a link. This pattern matches both piped and unpiped links. + -- If the link is not piped, the second capture (linktext) will be empty. + local beginning, link, linktext = mw.ustring.match(text, "^(.*)%[%[([^|%]]+)%|?(.-)%]%]$") + if link then + if linktext ~= "" then return beginning .. collapse_link(link, do_singularize(linktext)) end + return beginning .. "[[" .. do_singularize(link) .. "]]" + end + + return do_singularize(text) +end + +function export.add_indefinite_article(text, uppercase) + local is_vowel = false + -- If there's a link at the beginning, examine the first letter of the + -- link text. This pattern matches both piped and unpiped links. + -- If the link is not piped, the second capture (linktext) will be empty. + local link, linktext, remainder = mw.ustring.match(text, "^%[%[([^|%]]+)%|?(.-)%]%](.*)$") + if link then + is_vowel = rfind(linktext ~= "" and linktext or link, "^[AEIOUaeiou]") + else + is_vowel = rfind(text, "^[AEIOUaeiou]") + end + return (is_vowel and (uppercase and "An " or "an ") or (uppercase and "A " or "a ")) .. text +end + +return export diff --git a/wiktra/wikt/translit/string.lua b/wiktra/wikt/translit/string.lua new file mode 100644 index 0000000..cdd0e5f --- /dev/null +++ b/wiktra/wikt/translit/string.lua @@ -0,0 +1,595 @@ +local str = {} + +-- Cannot include null byte. +local UTF8_char = "[\1-\127\194-\244][\128-\191]*" + +--[[ +ulen + +Counts UTF-8 characters. Faster than mw.ustring.len. + +Assumes that the encoding is correct. Unlike mw.ustring.len, does not return nil +if encoding is invalid. + +Does not count the bytes 192, 193, and 245-255. They are not used in UTF-8 and +will not occur if the string is valid. They are replaced with the replacement +character (U+FFFD) on MediaWiki pages. +--]] +function str.ulen(text) + local _, length = string.gsub(text, UTF8_char, "") + return length +end + +--[[ +len + +This function returns the length of the target string. + +Usage: +{{#invoke:string|len|target_string|}} +OR +{{#invoke:string|len|s=target_string}} + +Parameters + s: The string whose length to report + +If invoked using named parameters, Mediawiki will automatically remove any leading or +trailing whitespace from the target string. +]] +function str.len(frame) + local new_args = str._getParameters(frame.args, {"s"}); + local s = new_args["s"] or ""; + return mw.ustring.len(s) +end + +--[[ +sub + +This function returns a substring of the target string at specified indices. + +Usage: +{{#invoke:string|sub|target_string|start_index|end_index}} +OR +{{#invoke:string|sub|s=target_string|i=start_index|j=end_index}} + +Parameters + s: The string to return a subset of + i: The fist index of the substring to return, defaults to 1. + j: The last index of the string to return, defaults to the last character. + +The first character of the string is assigned an index of 1. If either i or j +is a negative value, it is interpreted the same as selecting a character by +counting from the end of the string. Hence, a value of -1 is the same as +selecting the last character of the string. + +If the requested indices are out of range for the given string, an error is +reported. +]] +function str.sub(frame) + local new_args = str._getParameters(frame.args, {"s", "i", "j"}); + local s = new_args["s"] or ""; + local i = tonumber(new_args["i"]) or 1; + local j = tonumber(new_args["j"]) or -1; + + local len = mw.ustring.len(s); + + -- Convert negatives for range checking + if i < 0 then i = len + i + 1; end + if j < 0 then j = len + j + 1; end + + if i > len or j > len or i < 1 or j < 1 then return str._error("String subset index out of range"); end + if j < i then return str._error("String subset indices out of order"); end + + return mw.ustring.sub(s, i, j) +end + +--[[ +This function implements that features of {{str sub old}} and is kept in order +to maintain these older templates. +]] +function str.sublength(frame) + local i = tonumber(frame.args.i) or 0 + local len = tonumber(frame.args.len) + return mw.ustring.sub(frame.args.s, i + 1, len and (i + len)) +end + +--[[ +match + +This function returns a substring from the source string that matches a +specified pattern. + +Usage: +{{#invoke:string|match|source_string|pattern_string|start_index|match_number|plain_flag|nomatch_output}} +OR +{{#invoke:string|pos|s=source_string|pattern=pattern_string|start=start_index + |match=match_number|plain=plain_flag|nomatch=nomatch_output}} + +Parameters + s: The string to search + pattern: The pattern or string to find within the string + start: The index within the source string to start the search. The first + character of the string has index 1. Defaults to 1. + match: In some cases it may be possible to make multiple matches on a single + string. This specifies which match to return, where the first match is + match= 1. If a negative number is specified then a match is returned + counting from the last match. Hence match = -1 is the same as requesting + the last match. Defaults to 1. + plain: A flag indicating that the pattern should be understood as plain + text. Defaults to false. + nomatch: If no match is found, output the "nomatch" value rather than an error. + +If invoked using named parameters, Mediawiki will automatically remove any leading or +trailing whitespace from each string. In some circumstances this is desirable, in +other cases one may want to preserve the whitespace. + +If the match_number or start_index are out of range for the string being queried, then +this function generates an error. An error is also generated if no match is found. +If one adds the parameter ignore_errors=true, then the error will be suppressed and +an empty string will be returned on any failure. + +For information on constructing Lua patterns, a form of [regular expression], see: + +* http://www.lua.org/manual/5.1/manual.html#5.4.1 +* http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns +* http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns + +]] +function str.match(frame) + local new_args = str._getParameters(frame.args, {"s", "pattern", "start", "match", "plain", "nomatch"}); + local s = new_args["s"] or ""; + local start = tonumber(new_args["start"]) or 1; + local plain_flag = str._getBoolean(new_args["plain"] or false); + local pattern = new_args["pattern"] or ""; + local match_index = math.floor(tonumber(new_args["match"]) or 1); + local nomatch = new_args["nomatch"]; + + if s == "" then return str._error("Target string is empty"); end + if pattern == "" then return str._error("Pattern string is empty"); end + if math.abs(start) < 1 or math.abs(start) > mw.ustring.len(s) then return str._error("Requested start is out of range"); end + if match_index == 0 then return str._error("Match index is out of range"); end + if plain_flag then pattern = str.pattern_escape(pattern); end + + local result + if match_index == 1 then + -- Find first match is simple case + result = mw.ustring.match(s, pattern, start) + else + if start > 1 then s = mw.ustring.sub(s, start); end + + local iterator = mw.ustring.gmatch(s, pattern); + if match_index > 0 then + -- Forward search + for w in iterator do + match_index = match_index - 1; + if match_index == 0 then + result = w; + break + end + end + else + -- Reverse search + local result_table = {}; + local count = 1; + for w in iterator do + result_table[count] = w; + count = count + 1; + end + + result = result_table[count + match_index]; + end + end + + if result == nil then + if nomatch == nil then + return str._error("Match not found"); + else + return nomatch; + end + else + return result; + end +end + +--[[ +pos + +This function returns a single character from the target string at position pos. + +Usage: +{{#invoke:string|pos|target_string|index_value}} +OR +{{#invoke:string|pos|target=target_string|pos=index_value}} + +Parameters + target: The string to search + pos: The index for the character to return + +If invoked using named parameters, Mediawiki will automatically remove any leading or +trailing whitespace from the target string. In some circumstances this is desirable, in +other cases one may want to preserve the whitespace. + +The first character has an index value of 1. + +If one requests a negative value, this function will select a character by counting backwards +from the end of the string. In other words pos = -1 is the same as asking for the last character. + +A requested value of zero, or a value greater than the length of the string returns an error. +]] +function str.pos(frame) + local new_args = str._getParameters(frame.args, {"target", "pos"}); + local target_str = new_args["target"] or ""; + local pos = tonumber(new_args["pos"]) or 0; + + if pos == 0 or math.abs(pos) > mw.ustring.len(target_str) then return str._error("String index out of range"); end + + return mw.ustring.sub(target_str, pos, pos); +end + +--[[ +str_find + +This function duplicates the behavior of {{str_find}}, including all of its quirks. +This is provided in order to support existing templates, but is NOT RECOMMENDED for +new code and templates. New code is recommended to use the "find" function instead. + +Returns the first index in "source" that is a match to "target". Indexing is 1-based, +and the function returns -1 if the "target" string is not present in "source". + +Important Note: If the "target" string is empty / missing, this function returns a +value of "1", which is generally unexpected behavior, and must be accounted for +separatetly. +]] +function str.str_find(frame) + local new_args = str._getParameters(frame.args, {"source", "target"}); + local source_str = new_args["source"] or ""; + local target_str = new_args["target"] or ""; + + if target_str == "" then return 1; end + + local start = mw.ustring.find(source_str, target_str, 1, true) + if start == nil then start = -1 end + + return start +end + +--[[ +find + +This function allows one to search for a target string or pattern within another +string. + +Usage: +{{#invoke:string|find|source_str|target_string|start_index|plain_flag}} +OR +{{#invoke:string|find|source=source_str|target=target_str|start=start_index|plain=plain_flag}} + +Parameters + source: The string to search + target: The string or pattern to find within source + start: The index within the source string to start the search, defaults to 1 + plain: Boolean flag indicating that target should be understood as plain + text and not as a Lua style regular expression, defaults to true + +If invoked using named parameters, Mediawiki will automatically remove any leading or +trailing whitespace from the parameter. In some circumstances this is desirable, in +other cases one may want to preserve the whitespace. + +This function returns the first index >= "start" where "target" can be found +within "source". Indices are 1-based. If "target" is not found, then this +function returns an empty string. If either "source" or "target" are missing / empty, this +function also returns an empty string. + +This function should be safe for UTF-8 strings. +]] +function str.find(frame) + local params = {[1] = {required = true, allow_empty = true}, [2] = {required = true, allow_empty = true}, [3] = {type = "number"}, [4] = {type = "boolean"}} + + local args = require("parameters").process(frame.args, params) + + return mw.ustring.find(args[1], args[2], args[3], args[4]) +end + +--[[ +replace + +This function allows one to replace a target string or pattern within another +string. + +Usage: +{{#invoke:string|replace|source_str|pattern_string|replace_string|replacement_count|plain_flag}} +OR +{{#invoke:string|replace|source=source_string|pattern=pattern_string|replace=replace_string| + count=replacement_count|plain=plain_flag}} + +Parameters + source: The string to search + pattern: The string or pattern to find within source + replace: The replacement text + count: The number of occurences to replace, defaults to all. + plain: Boolean flag indicating that pattern should be understood as plain + text and not as a Lua style regular expression, defaults to true +]] +function str.replace(frame) + local new_args = str._getParameters(frame.args, {"source", "pattern", "replace", "count", "plain"}); + local source_str = new_args["source"] or ""; + local pattern = new_args["pattern"] or ""; + local replace = new_args["replace"] or ""; + local count = tonumber(new_args["count"]); + local plain = new_args["plain"] or true; + + if source_str == "" or pattern == "" then return source_str; end + plain = str._getBoolean(plain); + + if plain then + pattern = str.pattern_escape(pattern); + replace = mw.ustring.gsub(replace, "%%", "%%%%"); -- Only need to escape replacement sequences. + end + + local result; + + result = mw.ustring.gsub(source_str, pattern, replace, count); + + return result; +end + +function str.gsub(frame) + local params = {[1] = {required = true, allow_empty = true, allow_whitespace = true}, [2] = {required = true, allow_empty = true, allow_whitespace = true}, [3] = {required = true, allow_empty = true, allow_whitespace = true}, [4] = {type = "number"}} + + local args = require("parameters").process(frame.args, params) + + return (mw.ustring.gsub(args[1], args[2], args[3], args[4])) +end + +--[[ + simple function to pipe string.rep to templates. +]] + +function str.rep(frame) + local repetitions = tonumber(frame.args[2]) + if not repetitions then return str._error("function rep expects a number as second parameter, received \"" .. (frame.args[2] or "") .. "\"") end + return string.rep(frame.args[1] or "", repetitions) +end + +function str.lower(frame) + local text = frame.args[1] or "" + return mw.ustring.lower(text) +end + +str.lc = str.lower + +--[[ +Helper function that populates the argument list given that user may need to use a mix of +named and unnamed parameters. This is relevant because named parameters are not +identical to unnamed parameters due to string trimming, and when dealing with strings +we sometimes want to either preserve or remove that whitespace depending on the application. +]] +function str._getParameters(frame_args, arg_list) + local new_args = {}; + local index = 1; + local value; + + for _, arg in ipairs(arg_list) do + value = frame_args[arg] + if value == nil then + value = frame_args[index]; + index = index + 1; + end + new_args[arg] = value; + end + + return new_args; +end + +--[[ +Helper function to handle error messages. +]] +function str._error(error_str) + local frame = mw.getCurrentFrame(); + local error_category = frame.args.error_category or "Errors reported by Module String"; + local ignore_errors = frame.args.ignore_errors or false; + local no_category = frame.args.no_category or false; + + if str._getBoolean(ignore_errors) then return ""; end + + local error_str = "String Module Error: " .. error_str .. ""; + if error_category ~= "" and not str._getBoolean(no_category) then error_str = "[[Category:" .. error_category .. "]]" .. error_str; end + + return error_str; +end + +--[[ +Helper Function to interpret boolean strings +]] +function str._getBoolean(boolean_str) + local boolean_value; + + if type(boolean_str) == "string" then + boolean_str = boolean_str:lower(); + if boolean_str == "false" or boolean_str == "no" or boolean_str == "0" or boolean_str == "" then + boolean_value = false; + else + boolean_value = true; + end + elseif type(boolean_str) == "boolean" then + boolean_value = boolean_str; + else + error("No boolean value found"); + end + return boolean_value +end + +--[[ +Helper function that escapes all pattern characters – ().%+-*?[^$] – so that they will be treated +as plain text. +]] +function str.pattern_escape(pattern_str) + local invoked = false + + if type(pattern_str) == "table" then + if pattern_str.args then + local frame = pattern_str + invoked = true + + if frame.args[1] then + pattern_str = frame.args[1] + else + pattern_str = frame:getParent().args[1] + end + else + error("First argument to pattern_escape should be a string, a number, or a frame object.") + end + elseif not (type(pattern_str) == "string" or type(pattern_str) == "number") then + error("First argument to pattern_escape should be a string or a number.") + end + + if invoked then + local escaped = mw.ustring.gsub(pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1") + return escaped + else + return mw.ustring.gsub(pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1"); + end +end + +function str.count(text, pattern, plain) + if not (type(text) == "string" or type(text) == "number") then error("The first argument to the function \"count\" must be a string or a number, not a " .. type(text) .. ".") end + + if not (type(pattern) == "string" or type(pattern) == "number") then error("The first argument to the function \"count\" must be a string or a number, not a " .. type(text) .. ".") end + + if plain then pattern = str.pattern_escape(pattern) end + + local _, count = mw.ustring.gsub(text, pattern, "") + + return count +end + +function str.plain_gsub(text, pattern, replacement) + local invoked = false + + if type(text) == "table" then + invoked = true + + if text.args then + local frame = text + + local params = {[1] = {}, [2] = {}, [3] = {allow_empty = true}} + + local args = require("parameters").process(frame.args, params) + + text = args[1] + pattern = args[2] + replacement = args[3] + else + error("If the first argument to plain_gsub is a table, it should be a frame object.") + end + else + if not (type(pattern) == "string" or type(pattern) == "number") then error("The second argument to plain_gsub should be a string or a number.") end + + if not (type(replacement) == "string" or type(replacement) == "number") then error("The third argument to plain_gsub should be a string or a number.") end + end + + pattern = str.pattern_escape(pattern) + + if invoked then + text = mw.ustring.gsub(text, pattern, replacement) + return text + else + return mw.ustring.gsub(text, pattern, replacement) + end +end + +function str.matchToArray(text, pattern) + local invoked = false + + if type(text) == "table" then + invoked = true + + if text.args then + local frame = text + + local params = {[1] = {required = true}, [2] = {required = true}} + + local args = require("parameters").process(frame.args, params) + + text = args[1] + pattern = args[2] + else + error("If the first argument to matchToArray is a table, it should be a frame object.") + end + else + if not (type(pattern) == "string" or type(pattern) == "number") then error("The second argument to matchToArray should be a string or a number.") end + end + + local matches = {} + local i = 0 + for match in mw.ustring.gmatch(text, pattern) do + i = i + 1 + matches[i] = match + end + + if i > 0 then + if invoked then + return table.concat(matches, ", ") + else + return matches + end + else + if invoked then + return "" + else + return nil + end + end +end + +--[=[ + Similar to gmatch, but it returns the count of the match in addition to the + list of captures, something like ipairs(). + + If the pattern doesn't contain any captures, the whole match is returned. + + Invoke thus: + + for i, whole_match in require("string").imatch(text, pattern) do + [ do something with i and whole_match ] + end + + or + + for i, capture1[, capture2[, capture3[, ...]]] in require("string").imatch(text, pattern) do + [ do something with i and capture1 ] + end + + For example, this code + for i, whole_match in require("string").imatch("a b c", "[a-z]") do + mw.log(i, whole_match) + end + will log + 1 a + 2 b + 3 c +]=] +function str.imatch(text, pattern, pos, plain, use_basic_Lua_function) + local i = 0 + pos = pos or 0 + if not string.find(pattern, "%b()") then pattern = "(" .. pattern .. ")" end + local find = use_basic_Lua_function and string.find or mw.ustring.find + return function() + i = i + 1 + local return_values = {find(text, pattern, pos, plain)} + local j = return_values[2] + + if return_values[3] then + pos = j + 1 + -- Skip the first two returned values, which are the indices of the + -- whole match. + return i, unpack(return_values, 3) + end + end +end + +function str.escapebytes(s) return (string.gsub(s, ".", function(char) return ("\\%03d"):format(string.byte(char)) end)) end + +function str.URIdecode(frame) return mw.uri.decode(frame.args[1], frame.args[2] or "PATH") end + +return str diff --git a/wiktra/wikt/translit/sty-translit.lua b/wiktra/wikt/translit/sty-translit.lua new file mode 100644 index 0000000..a9f6106 --- /dev/null +++ b/wiktra/wikt/translit/sty-translit.lua @@ -0,0 +1,92 @@ +local export = {} + +local tt = { + ["ү"] = "ü", + ["Ү"] = "Ü", + ["т"] = "t", + ["Т"] = "T", + ["һ"] = "h", + ["Һ"] = "H", + ["р"] = "r", + ["Р"] = "R", + ["ф"] = "f", + ["Ф"] = "F", + ["ю"] = "yu", + ["Ю"] = "Yu", + ["ш"] = "ş", + ["Ш"] = "Ş", + ["ь"] = "ʹ", + ["Ь"] = "ʹ", + ["ъ"] = "ʺ", + ["Ъ"] = "ʺ", + ["н"] = "n", + ["Н"] = "N", + ["п"] = "p", + ["П"] = "P", + ["й"] = "y", + ["Й"] = "Y", + ["л"] = "l", + ["Л"] = "L", + ["з"] = "z", + ["З"] = "Z", + ["е"] = "e", + ["Е"] = "E", + ["г"] = "g", + ["Г"] = "G", + ["б"] = "b", + ["Б"] = "B", + ["у"] = "u", + ["У"] = "U", + ["с"] = "s", + ["С"] = "S", + ["х"] = "x", + ["Х"] = "X", + ["ч"] = "ç", + ["Ч"] = "Ç", + ["щ"] = "şç", + ["Щ"] = "Şç", + ["я"] = "ya", + ["Я"] = "Ya", + ["ы"] = "ı", + ["Ы"] = "I", + ["э"] = "e", + ["Э"] = "E", + ["м"] = "m", + ["М"] = "M", + ["о"] = "o", + ["О"] = "O", + ["и"] = "i", + ["И"] = "I", + ["ё"] = "yo", + ["Ё"] = "Yo", + ["ж"] = "j", + ["Ж"] = "J", + ["к"] = "k", + ["К"] = "K", + ["д"] = "d", + ["Д"] = "D", + ["в"] = "v", + ["В"] = "V", + ["ц"] = "c", + ["Ц"] = "C", + ["а"] = "a", + ["А"] = "A", + ["ң"] = "ŋ", + ["Ң"] = "Ŋ", + ["ә"] = "ä", + ["Ә"] = "Ä", + ["э"] = "é", + ["Э"] = "É", + ["ў"] = "w", + ["Ў"] = "W", + ["ҡ"] = "q", + ["Ҡ"] = "Q", + ["ғ"] = "ğ", + ["Ғ"] = "Ğ", + ["ө"] = "ö", + ["Ө"] = "Ö" +}; + +function export.tr(text) return (mw.ustring.gsub(text, ".", tt)) end + +return export diff --git a/wiktra/wikt/translit/su-translit.lua b/wiktra/wikt/translit/su-translit.lua new file mode 100644 index 0000000..ba84310 --- /dev/null +++ b/wiktra/wikt/translit/su-translit.lua @@ -0,0 +1,63 @@ +local export = {} + +local consonants = {["ᮊ"] = "k", ["ᮌ"] = "g", ["ᮍ"] = "ng", ["ᮎ"] = "c", ["ᮏ"] = "j", ["ᮑ"] = "ny", ["ᮒ"] = "t", ["ᮓ"] = "d", ["ᮔ"] = "n", ["ᮕ"] = "p", ["ᮘ"] = "b", ["ᮙ"] = "m", ["ᮚ"] = "y", ["ᮛ"] = "r", ["ᮜ"] = "l", ["ᮝ"] = "w", ["ᮞ"] = "s", ["ᮠ"] = "h", ["ᮖ"] = "f", ["ᮋ"] = "q", ["ᮗ"] = "v", ["ᮟ"] = "x", ["ᮐ"] = "z", ["ᮮ"] = "kh", ["ᮯ"] = "sy"} + +local diacritics = {["ᮤ"] = "i", ["ᮥ"] = "u", ["ᮦ"] = "é", ["ᮧ"] = "o", ["ᮨ"] = "e", ["ᮩ"] = "eu", ["ᮺ"] = "-a", ["᮫"] = "", ["᮪"] = ""} + +local special = {["ᮬ"] = "m", ["ᮭ"] = "w", ["ᮡ"] = "y", ["ᮢ"] = "r", ["ᮣ"] = "l"} + +local nonconsonants = { + -- vowels + ["ᮃ"] = "a", + ["ᮆ"] = "é", + ["ᮄ"] = "i", + ["ᮇ"] = "o", + ["ᮅ"] = "u", + ["ᮈ"] = "e", + ["ᮉ"] = "eu", + ["ᮻ"] = "reu", + ["ᮼ"] = "leu", + -- aditional characters + ["|"] = "", -- digit pipe bar + ["ᮀ"] = "ng", + ["ᮁ"] = "r", + ["ᮂ"] = "h", + ["ᮾ"] = "k", + ["ᮿ"] = "m", + -- digits + ["᮰"] = "0", + ["᮱"] = "1", + ["᮲"] = "2", + ["᮳"] = "3", + ["᮴"] = "4", + ["᮵"] = "5", + ["᮶"] = "6", + ["᮷"] = "7", + ["᮸"] = "8", + ["᮹"] = "9" +} + +-- translit any words or phrases +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([ᮊᮌᮍᮎᮏᮑᮒᮓᮔᮕᮘᮙᮚᮛᮜᮝᮞᮠᮖᮋᮗᮟᮐᮮᮯ])" .. "([ᮬᮭᮡᮢᮣ]?)" .. "([ᮤᮥᮦᮧᮨᮩᮺ ᮫᮪]?)", function(c, s, d) + if s == "" then + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. (diacritics[d] or d) + end + else + if d == "" then + return consonants[c] .. (special[s] or s) .. "a" + else + return consonants[c] .. (special[s] or s) .. (diacritics[d] or d) + end + end + end) + + text = mw.ustring.gsub(text, ".", nonconsonants) + + return text +end + +return export diff --git a/wiktra/wikt/translit/sva-translit.lua b/wiktra/wikt/translit/sva-translit.lua new file mode 100644 index 0000000..a9a1b42 --- /dev/null +++ b/wiktra/wikt/translit/sva-translit.lua @@ -0,0 +1,16 @@ +local export = {} +-- Keep synchronized with [[Module:Geor-translit]] +local tt = {["ა"] = "a", ["а"] = "a", ["ბ"] = "b", ["б"] = "b", ["გ"] = "g", ["г"] = "g", ["დ"] = "d", ["д"] = "d", ["ე"] = "e", ["ვ"] = "v", ["в"] = "v", ["ზ"] = "z", ["ჱ"] = "ē", ["თ"] = "t", ["ꚋ"] = "t", ["ი"] = "i", ["і"] = "i", ["კ"] = "ḳ", ["к"] = "ḳ", ["ლ"] = "l", ["л"] = "l", ["მ"] = "m", ["ნ"] = "n", ["н"] = "n", ["ჲ"] = "y", ["ო"] = "o", ["პ"] = "ṗ", ["п"] = "ṗ", ["ჟ"] = "ž", ["ж"] = "ž", ["რ"] = "r", ["р"] = "r", ["ს"] = "s", ["ტ"] = "ṭ", ["ჳ"] = "w", ["უ"] = "u", ["у"] = "u", ["ფ"] = "p", ["ҧ"] = "p", ["ქ"] = "k", ["ӄ"] = "k", ["ღ"] = "ɣ", ["ҕ"] = "ɣ", ["ყ"] = "q̇", ["შ"] = "š", ["ш"] = "š", ["ჩ"] = "č", ["ч"] = "č", ["ც"] = "c", ["ძ"] = "ʒ", ["წ"] = "c̣", ["ჭ"] = "č̣", ["ꚓ"] = "č̣", ["ხ"] = "x", ["х"] = "x", ["ჴ"] = "q", ["ჯ"] = "ǯ", ["ჰ"] = "h", ["ჵ"] = "ō", ["ჶ"] = "f", ["ჷ"] = "ə", ["ѵ"] = "ə", ["ჸ"] = "ʾ"}; + +function export.tr(text) + text = mw.ustring.gsub(text, "ჳი", "ü") + text = mw.ustring.gsub(text, "ჳე", "ö") + text = mw.ustring.gsub(text, "а̇", "ä") + -- Transliterating vowel nasalization in some dialects + text = mw.ustring.gsub(text, "ჼ", "̃") + text = mw.ustring.gsub(text, ".", tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/syl-translit.lua b/wiktra/wikt/translit/syl-translit.lua new file mode 100644 index 0000000..7d8abba --- /dev/null +++ b/wiktra/wikt/translit/syl-translit.lua @@ -0,0 +1,132 @@ +-- Transliteration for Sylheti in Sylheti Nagri script +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["ꠇ"] = "x", + ["ꠈ"] = "x", + ["ꠉ"] = "g", + ["ꠊ"] = "g", + ["ꠌ"] = "s", + ["ꠍ"] = "s", + ["ꠎ"] = "z", + ["ꠏ"] = "z", + ["ꠐ"] = "ṭ", + ["ꠑ"] = "ṭ", + ["ꠒ"] = "ḍ", + ["ꠓ"] = "ḍ", + ["ꠔ"] = "t", + ["ꠕ"] = "t", + ["ꠖ"] = "d", + ["ꠗ"] = "d", + ["ꠘ"] = "n", + ["ꠙ"] = "f", + ["ꠚ"] = "f", + ["ꠛ"] = "b", + ["ꠜ"] = "b", + ["ꠝ"] = "m", + ["ꠞ"] = "r", + ["ꠟ"] = "l", + ["ꠠ"] = "ṛ", + ["ꠡ"] = "ś", + ["ꠢ"] = "h", + ["ꠋ"] = "ṅ", + + -- vowel diacritics + ["ꠣ"] = "a", + ["ꠤ"] = "i", + ["ꠥ"] = "u", + ["ꠦ"] = "e", + ["ꠧ"] = "o", + + -- vowel signs + ["ꠀ"] = "a", + ["ꠁ"] = "i", + ["ꠃ"] = "u", + ["ꠄ"] = "e", + ["ꠅ"] = "o", + + -- virama + ["꠆"] = "", + + -- anusvar + [" ꠋ"] = "ṅ", + + -- dvisvara + ["ꠂ"] = "i", + + -- numerals + ["০"] = "0", + ["১"] = "1", + ["২"] = "2", + ["৩"] = "3", + ["৪"] = "4", + ["৫"] = "5", + ["৬"] = "6", + ["৭"] = "7", + ["৮"] = "8", + ["৯"] = "9", + + -- punctuation + ["꠪"] = ".", -- dari/purn virama + ["꠫"] = "." +} + +local aspirates = "ꠈꠊꠍꠏꠑꠓꠕꠗꠚꠜ" +local consonant, vowel, vowel_sign = "ꠇ-ꠊꠌ-ꠢ", "ꠣ-ꠧ", "ꠀꠁꠃ-ꠅ" +local c = "[" .. consonant .. "]" +local v = "[" .. vowel .. vowel_sign .. "]" +local syncope_pattern = "(" .. v .. c .. v .. c .. ")ô(" .. c .. "ঁ?" .. v .. ")" + +local function rev_string(text) + local result, length = "", mw.ustring.len(text) + for i = 1, length do result = result .. mw.ustring.sub(text, -i, -i) end + return result +end + +function export.tr(text, lang, sc) + -- from [[MOD:as-translit]] + text = gsub(text, "(" .. c .. ")([" .. vowel .. "’?꠆]?)", function(a, b) + local res = a .. (b == "" and "ô" or b) + if match(a, "[" .. aspirates .. "]") then res = res .. "’" end + return res + end) + + for word in mw.ustring.gmatch(text, "[ꠀ-ꠧô’]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^ô(" .. c .. ")(" .. v .. ")", "%1%2") + while match(word, syncope_pattern) do word = gsub(word, syncope_pattern, "%1%2") end + text = gsub(text, orig_word, rev_string(word)) + end + + text = gsub(text, ".", conv) + + -- ô is really just o + text = mw.ustring.gsub(text, "ô", "o") + + -- velars + text = mw.ustring.gsub(text, "x([iu])", "k%1") + text = mw.ustring.gsub(text, "([iu])x", "%1k") + text = mw.ustring.gsub(text, "xx", "kk") + + -- palatals (I think?) + text = mw.ustring.gsub(text, "ss", "cc") + + -- affricates (I'm just guessing now) + text = mw.ustring.gsub(text, "jj", "zz") + text = mw.ustring.gsub(text, "zs", "jc") + + -- final r/l + text = mw.ustring.gsub(text, "([xrl])o$", "%1") + text = mw.ustring.gsub(text, "([xrl])o ", "%1 ") + + -- tone + text = gsub(text, "’", "́") + + return text +end + +return export diff --git a/wiktra/wikt/translit/ta-translit.lua b/wiktra/wikt/translit/ta-translit.lua new file mode 100644 index 0000000..135ab19 --- /dev/null +++ b/wiktra/wikt/translit/ta-translit.lua @@ -0,0 +1,52 @@ +local export = {} + +local consonants = {["க"] = "k", ["ங"] = "ṅ", ["ச"] = "c", ["ஞ"] = "ñ", ["ட"] = "ṭ", ["ண"] = "ṇ", ["த"] = "t", ["ந"] = "n", ["ப"] = "p", ["ம"] = "m", ["ய"] = "y", ["ர"] = "r", ["ல"] = "l", ["வ"] = "v", ["ழ"] = "ḻ", ["ள"] = "ḷ", ["ற"] = "ṟ", ["ன"] = "ṉ", ["ஶ"] = "ś", ["ஜ"] = "j", ["ஷ"] = "ṣ", ["ஸ"] = "s", ["ஹ"] = "h", ["ஃப"] = "f", ["ஃஜ"] = "z", ["ஃஸ"] = "x", ["ஃ"] = "ḥ"} + +local diacritics = { + ["ா"] = "ā", + ["ி"] = "i", + ["ீ"] = "ī", + ["ு"] = "u", + ["ூ"] = "ū", + ["ெ"] = "e", + ["ே"] = "ē", + ["ை"] = "ai", + ["ொ"] = "o", + ["ோ"] = "ō", + ["ௌ"] = "au", + ["்"] = "", -- halant, supresses the inherent vowel "a" + -- no diacritic + [""] = "a" +} + +local nonconsonants = { + -- vowels + ["அ"] = "’a", + ["ஆ"] = "’ā", + ["இ"] = "’i", + ["ஈ"] = "’ī", + ["உ"] = "’u", + ["ஊ"] = "’ū", + ["எ"] = "’e", + ["ஏ"] = "’ē", + ["ஐ"] = "’ai", + ["ஒ"] = "’o", + ["ஓ"] = "’ō", + ["ஔ"] = "’au" + -- other symbols + -- ['ஃ']='' , +} + +-- translit any words or phrases +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "(ஃ?)([க-ஹ])([ா-்]?)", function(h, c, d) return (consonants[h .. c] or consonants[h] .. (consonants[c] or c)) .. diacritics[d] end) + + text = mw.ustring.gsub(text, "[அ-ஔ]", nonconsonants) + + text = mw.ustring.gsub(text, "^’", "") + text = mw.ustring.gsub(text, "([%s%p])’", "%1") + + return text +end + +return export diff --git a/wiktra/wikt/translit/tab-translit.lua b/wiktra/wikt/translit/tab-translit.lua new file mode 100644 index 0000000..a723bef --- /dev/null +++ b/wiktra/wikt/translit/tab-translit.lua @@ -0,0 +1,26 @@ +local export = {} + +local tt = {["б"] = "b", ["п"] = "p", ["ф"] = "f", ["в"] = "v", ["м"] = "m", ["д"] = "d", ["т"] = "t", ["й"] = "j", ["н"] = "n", ["з"] = "z", ["ц"] = "c", ["с"] = "s", ["ж"] = "ž", ["ш"] = "š", ["щ"] = "šč", ["л"] = "l", ["ч"] = "č", ["р"] = "r", ["г"] = "g", ["к"] = "k", ["х"] = "χ", ["ъ"] = "ʾ", ["а"] = "a", ["е"] = "e", ["ы"] = "y", ["и"] = "i", ["о"] = "o", ["у"] = "u", ["ё"] = "ë", ["ь"] = "’", ["э"] = "e", ["ю"] = "ju", ["я"] = "ja", ["Б"] = "B", ["П"] = "P", ["Ф"] = "F", ["В"] = "V", ["М"] = "M", ["Д"] = "D", ["Т"] = "T", ["Й"] = "J", ["Н"] = "N", ["З"] = "Z", ["Ц"] = "C", ["С"] = "S", ["Ж"] = "Ž", ["Ш"] = "Š", ["Щ"] = "Šč", ["Л"] = "L", ["Ч"] = "Č", ["Р"] = "R", ["Г"] = "G", ["К"] = "K", ["Х"] = "Χ", ["Ъ"] = "ʾ", ["А"] = "A", ["Е"] = "E", ["Ы"] = "Y", ["И"] = "I", ["О"] = "O", ["У"] = "U", ["Ё"] = "Ë", ["Ь"] = "’", ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ja"}; + +local trigraphs = {["хъв"] = "q°", ["Хъв"] = "Q°", ["къв"] = "q̄°", ["Къв"] = "Q̄°", ["кьв"] = "q̇°", ["Кьв"] = "Q̇°", ["гъв"] = "ġ°", ["Гъв"] = "Ġ°", ["кӏв"] = "ḳ°", ["Кӏв"] = "Ḳ°", ["ккв"] = "k̄°", ["Ккв"] = "K̄°"} + +local digraphs = {["жв"] = "ž°", ["Жв"] = "Ž°", ["чв"] = "č°", ["Чв"] = "Č°", ["жъ"] = "č̄°", ["Жъ"] = "Č̄°", ["чъ"] = "č̣", ["Чъ"] = "Č̣", ["шв"] = "š°", ["Шв"] = "Š°", ["хв"] = "χ°", ["Хв"] = "Χ°", ["пп"] = "p̄", ["пӏ"] = "ṗ", ["тт"] = "t̄", ["Пп"] = "P̄", ["Пӏ"] = "Ṗ", ["Тт"] = "T̄", ["цӏ"] = "c̣", ["цц"] = "c̄", ["тӏ"] = "ṭ", ["чч"] = "č̄", ["чӏ"] = "č̣", ["кь"] = "q̇", ["кк"] = "k̄", ["кӏ"] = "ḳ", ["хъ"] = "q", ["къ"] = "q̄", ["гъ"] = "ġ", ["гь"] = "h", ["Цӏ"] = "C̣", ["Цц"] = "C̄", ["Тӏ"] = "Ṭ", ["Чч"] = "Č̄", ["Чӏ"] = "Č̣", ["Кь"] = "Q̇", ["Кк"] = "K̄", ["Кӏ"] = "Ḳ", ["Хъ"] = "Q", ["Къ"] = "Q̄", ["Гъ"] = "Ġ", ["Гь"] = "H", ["уь"] = "u̱", ["Уь"] = "U̱", ["хь"] = "x", ["Хь"] = "X", ["гв"] = "g°", ["Гв"] = "G°", ["кв"] = "k°", ["Кв"] = "K°", ["аь"] = "a̱", ["Аь"] = "A̱"} + +function export.tr(text, lang, sc) + local str_gsub = string.gsub + local UTF8char = "[\1-\127\194-\244][\128-\191]*" + + -- Convert uppercase palochka to lowercase. Lowercase is found in tables + -- above. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for trigraph, translit in pairs(trigraphs) do text = str_gsub(text, trigraph, translit) end + + for digraph, translit in pairs(digraphs) do text = str_gsub(text, digraph, translit) end + + text = str_gsub(text, UTF8char, tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/table.lua b/wiktra/wikt/translit/table.lua new file mode 100644 index 0000000..9daa6ab --- /dev/null +++ b/wiktra/wikt/translit/table.lua @@ -0,0 +1,686 @@ +--[[ +------------------------------------------------------------------------------------ +-- table (formerly TableTools) -- +-- -- +-- This module includes a number of functions for dealing with Lua tables. -- +-- It is a meta-module, meant to be called from other Lua modules, and should -- +-- not be called directly from #invoke. -- +------------------------------------------------------------------------------------ +--]] --[[ + Inserting new values into a table using a local "index" variable, which is + incremented each time, is faster than using "table.insert(t, x)" or + "t[#t + 1] = x". See the talk page. +]] local libraryUtil = require("libraryUtil") + +local export = {} + +-- Define often-used variables and functions. +local floor = math.floor +local infinity = math.huge +local checkType = libraryUtil.checkType +local checkTypeMulti = libraryUtil.checkTypeMulti + +local function _check(funcName, expectType) + if type(expectType) == "string" then + return function(argIndex, arg, nilOk) checkType(funcName, argIndex, arg, expectType, nilOk) end + else + return function(argIndex, arg, expectType, nilOk) + if type(expectType) == "table" then + checkTypeMulti(funcName, argIndex, arg, expectType, nilOk) + else + checkType(funcName, argIndex, arg, expectType, nilOk) + end + end + end +end + +--[[ +------------------------------------------------------------------------------------ +-- isPositiveInteger +-- +-- This function returns true if the given value is a positive integer, and false +-- if not. Although it doesn't operate on tables, it is included here as it is +-- useful for determining whether a given table key is in the array part or the +-- hash part of a table. +------------------------------------------------------------------------------------ +--]] +function export.isPositiveInteger(v) return type(v) == "number" and v >= 1 and floor(v) == v and v < infinity end + +--[[ +------------------------------------------------------------------------------------ +-- isNan +-- +-- This function returns true if the given number is a NaN value, and false +-- if not. Although it doesn't operate on tables, it is included here as it is +-- useful for determining whether a value can be a valid table key. Lua will +-- generate an error if a NaN is used as a table key. +------------------------------------------------------------------------------------ +--]] +function export.isNan(v) + if type(v) == "number" and tostring(v) == "-nan" then + return true + else + return false + end +end + +--[[ +------------------------------------------------------------------------------------ +-- shallowClone +-- +-- This returns a clone of a table. The value returned is a new table, but all +-- subtables and functions are shared. Metamethods are respected, but the returned +-- table will have no metatable of its own. +------------------------------------------------------------------------------------ +--]] +function export.shallowClone(t) + local ret = {} + for k, v in pairs(t) do ret[k] = v end + return ret +end + +--[[ +Shallow copy +]] +function export.shallowcopy(orig) + local orig_type = type(orig) + local copy + if orig_type == "table" then + copy = {} + for orig_key, orig_value in pairs(orig) do copy[orig_key] = orig_value end + else -- number, string, boolean, etc + copy = orig + end + return copy +end + +--[[ + Recursive deep copy function + Equivalent to mw.clone? +]] +local function deepcopy(orig, includeMetatable, already_seen) + -- Stores copies of tables indexed by the original table. + already_seen = already_seen or {} + + local copy = already_seen[orig] + if copy ~= nil then return copy end + + if type(orig) == "table" then + copy = {} + for orig_key, orig_value in pairs(orig) do copy[deepcopy(orig_key, includeMetatable, already_seen)] = deepcopy(orig_value, includeMetatable, already_seen) end + already_seen[orig] = copy + + if includeMetatable then + local mt = getmetatable(orig) + if mt ~= nil then + local mt_copy = deepcopy(mt, includeMetatable, already_seen) + setmetatable(copy, mt_copy) + end + end + else -- number, string, boolean, etc + copy = orig + end + return copy +end + +function export.deepcopy(orig, noMetatable, already_seen) + checkType("deepcopy", 3, already_seen, "table", true) + + return deepcopy(orig, not noMetatable, already_seen) +end + +--[[ +------------------------------------------------------------------------------------ +-- append +-- +-- This appends two tables together and returns the result. Compare the Lisp +-- expression (append list1 list2). +------------------------------------------------------------------------------------ +--]] +function export.append(t1, t2) + checkType("append", 1, t1, "table") + checkType("append", 2, t2, "table") + local ret = {} + for _, v in ipairs(t1) do table.insert(ret, v) end + for _, v in ipairs(t2) do table.insert(ret, v) end + return ret +end + +--[[ +------------------------------------------------------------------------------------ +-- removeDuplicates +-- +-- This removes duplicate values from an array. Non-positive-integer keys are +-- ignored. The earliest value is kept, and all subsequent duplicate values are +-- removed, but otherwise the array order is unchanged. +------------------------------------------------------------------------------------ +--]] +function export.removeDuplicates(t) + checkType("removeDuplicates", 1, t, "table") + local isNan = export.isNan + local ret, exists = {}, {} + local index = 1 + for _, v in ipairs(t) do + if isNan(v) then + -- NaNs can't be table keys, and they are also unique, so we don't need to check existence. + ret[index] = v + index = index + 1 + else + if not exists[v] then + ret[index] = v + index = index + 1 + exists[v] = true + end + end + end + return ret +end + +--[[ +------------------------------------------------------------------------------------ +-- numKeys +-- +-- This takes a table and returns an array containing the numbers of any numerical +-- keys that have non-nil values, sorted in numerical order. +------------------------------------------------------------------------------------ +--]] +function export.numKeys(t, checked) + if not checked then checkType("numKeys", 1, t, "table") end + local isPositiveInteger = export.isPositiveInteger + local nums = {} + local index = 1 + for k, _ in pairs(t) do + if isPositiveInteger(k) then + nums[index] = k + index = index + 1 + end + end + table.sort(nums) + return nums +end + +function export.maxIndex(t) + checkType("maxIndex", 1, t, "table") + local positiveIntegerKeys = export.numKeys(t) + if positiveIntegerKeys[1] then + return math.max(unpack(positiveIntegerKeys)) + else + return 0 -- ??? + end +end + +--[[ +------------------------------------------------------------------------------------ +-- affixNums +-- +-- This takes a table and returns an array containing the numbers of keys with the +-- specified prefix and suffix. +-- affixNums({a1 = 'foo', a3 = 'bar', a6 = 'baz'}, "a") +-- ↓ +-- {1, 3, 6}. +------------------------------------------------------------------------------------ +--]] +function export.affixNums(t, prefix, suffix) + local check = _check("affixNums") + check(1, t, "table") + check(2, prefix, "string", true) + check(3, suffix, "string", true) + + local function cleanPattern(s) + -- Cleans a pattern so that the magic characters ()%.[]*+-?^$ are interpreted literally. + s = s:gsub("([%(%)%%%.%[%]%*%+%-%?%^%$])", "%%%1") + return s + end + + prefix = prefix or "" + suffix = suffix or "" + prefix = cleanPattern(prefix) + suffix = cleanPattern(suffix) + local pattern = "^" .. prefix .. "([1-9]%d*)" .. suffix .. "$" + + local nums = {} + local index = 1 + for k, _ in pairs(t) do + if type(k) == "string" then + local num = mw.ustring.match(k, pattern) + if num then + nums[index] = tonumber(num) + index = index + 1 + end + end + end + table.sort(nums) + return nums +end + +--[[ +------------------------------------------------------------------------------------ +-- numData +-- +-- Given a table with keys like ("foo1", "bar1", "foo2", "baz2"), returns a table +-- of subtables in the format +-- { [1] = {foo = 'text', bar = 'text'}, [2] = {foo = 'text', baz = 'text'} } +-- Keys that don't end with an integer are stored in a subtable named "other". +-- The compress option compresses the table so that it can be iterated over with +-- ipairs. +------------------------------------------------------------------------------------ +--]] +function export.numData(t, compress) + local check = _check("numData") + check(1, t, "table") + check(2, compress, "boolean", true) + + local ret = {} + for k, v in pairs(t) do + local prefix, num = tostring(k):match("^([^0-9]*)([1-9][0-9]*)$") + if num then + num = tonumber(num) + local subtable = ret[num] or {} + if prefix == "" then + -- Positional parameters match the blank string; put them at the start of the subtable instead. + prefix = 1 + end + subtable[prefix] = v + ret[num] = subtable + else + local subtable = ret.other or {} + subtable[k] = v + ret.other = subtable + end + end + if compress then + local other = ret.other + ret = export.compressSparseArray(ret) + ret.other = other + end + return ret +end + +--[[ +------------------------------------------------------------------------------------ +-- compressSparseArray +-- +-- This takes an array with one or more nil values, and removes the nil values +-- while preserving the order, so that the array can be safely traversed with +-- ipairs. +------------------------------------------------------------------------------------ +--]] +function export.compressSparseArray(t) + checkType("compressSparseArray", 1, t, "table") + local ret = {} + local index = 1 + local nums = export.numKeys(t) + for _, num in ipairs(nums) do + ret[index] = t[num] + index = index + 1 + end + return ret +end + +--[[ +------------------------------------------------------------------------------------ +-- sparseIpairs +-- +-- This is an iterator for sparse arrays. It can be used like ipairs, but can +-- handle nil values. +------------------------------------------------------------------------------------ +--]] +function export.sparseIpairs(t) + checkType("sparseIpairs", 1, t, "table") + local nums = export.numKeys(t) + local i = 0 + return function() + i = i + 1 + local key = nums[i] + if key then + return key, t[key] + else + return nil, nil + end + end +end + +--[[ +------------------------------------------------------------------------------------ +-- size +-- +-- This returns the size of a key/value pair table. It will also work on arrays, +-- but for arrays it is more efficient to use the # operator. +------------------------------------------------------------------------------------ +--]] +function export.size(t) + checkType("size", 1, t, "table") + local i = 0 + for _ in pairs(t) do i = i + 1 end + return i +end + +--[[ +-- This returns the length of a table, or the first integer key n counting from +-- 1 such that t[n + 1] is nil. It is similar to the operator #, but may return +-- a different value when there are gaps in the array portion of the table. +-- Intended to be used on data loaded with mw.loadData. For other tables, use #. +--]] +function export.length(t) + local i = 0 + repeat i = i + 1 until t[i] == nil + return i - 1 +end + +--[[ +Recursively compare two values that may be tables, including tables with +nested tables as values. Return true if both values are structurally equal. +Note that this handles arbitary levels of nesting. If all tables are known +to be lists (with only integral keys), use export.deepEqualsList, which will +be more efficient. + +NOTE: This is *NOT* smart enough to properly handle cycles; in such a case, it +will get into an infinite loop. +]] +function export.deepEquals(x, y) + if type(x) == "table" and type(y) == "table" then + -- Two tables are the same if they have the same number of elements + -- and all keys that are present in one of the tables compare equal + -- to the corresponding keys in the other table, using structural + -- comparison. + local sizex = 0 + for key, value in pairs(x) do + if not export.deepEquals(value, y[key]) then return false end + sizex = sizex + 1 + end + local sizey = export.size(y) + if sizex ~= sizey then return false end + return true + end + return x == y +end + +--[[ +Recursively compare two values that may be lists (i.e. tables with integral +keys), including lists with nested lists as values. Return true if both values +are structurally equal. Note that this handles arbitary levels of nesting. +Results are undefined if tables with non-integral keys are present anywhere in +either structure; if that may be the case, use export.deepEquals, which will +handle such tables correctly but be less efficient on lists than +export.deepEqualsList. + +NOTE: This is *NOT* smart enough to properly handle cycles; in such a case, it +will get into an infinite loop. +]] +function export.deepEqualsList(x, y) + if type(x) == "table" and type(y) == "table" then + if #x ~= #y then return false end + for key, value in ipairs(x) do if not export.deepEqualsList(value, y[key]) then return false end end + return true + end + return x == y +end + +--[[ +Given a list and a value to be found, return true if the value is in the array +portion of the list. Shallow comparison is used unless `deepCompare` is given +(in which case comparison is done using `deepEqualsList`). +]] +function export.contains(list, x, deepCompare) + checkType("contains", 1, list, "table") + if deepCompare then + for _, v in ipairs(list) do if export.deepEqualsList(v, x) then return true end end + else + for _, v in ipairs(list) do if v == x then return true end end + end + return false +end + +--[[ +Given a general table and a value to be found, return true if the value is in +either the array or hashmap portion of the table. Shallow comparison is used +unless `deepCompare` is given (in which case comparison is done using +`deepEquals`). +]] +function export.tableContains(tbl, x, deepCompare) + checkType("tableContains", 1, tbl, "table") + if deepCompare then + for _, v in pairs(tbl) do if export.deepEquals(v, x) then return true end end + else + for _, v in pairs(tbl) do if v == x then return true end end + end + return false +end + +--[[ +Given a list and a value to be inserted, append or insert the value if not +already present in the list. Shallow comparison is used unless `deepCompare` +is given (in which case comparison is done using `deepEqualsList`). Appends to +the end, like the default behavior of table.insert(), unless `pos` is given, +in which case insertion happens at position `pos` (i.e. before the existing +item at position `pos`). + +NOTE: The order of `item` and `pos` is reversed in comparison to table.insert(), +which uses `table.insert(list, item)` to insert at the end but +`table.insert(list, pos, item)` to insert at position POS. +]] +-- append to list if element not already present +function export.insertIfNot(list, item, pos, deepCompare) + if not export.contains(list, item, deepCompare) then + if pos then + table.insert(list, pos, item) + else + table.insert(list, item) + end + end +end + +--[[ + Finds key for specified value in a given table. + Roughly equivalent to reversing the key-value pairs in the table – + reversed_table = { [value1] = key1, [value2] = key2, ... } + – and then returning reversed_table[valueToFind]. + + The value can only be a string or a number + (not nil, a boolean, a table, or a function). + + Only reliable if there is just one key with the specified value. + Otherwise, the function returns the first key found, + and the output is unpredictable. +]] +function export.keyFor(t, valueToFind) + local check = _check("keyFor") + check(1, t, "table") + check(2, valueToFind, {"string", "number"}) + + for key, value in pairs(t) do if value == valueToFind then return key end end + + return nil +end + +--[[ + The default sorting function used in export.keysToList if no keySort + is defined. +]] +local function defaultKeySort(key1, key2) + -- "number" < "string", so numbers will be sorted before strings. + local type1, type2 = type(key1), type(key2) + if type1 ~= type2 then + return type1 < type2 + else + return key1 < key2 + end +end + +--[[ + Returns a list of the keys in a table, sorted using either the default + table.sort function or a custom keySort function. + If there are only numerical keys, numKeys is probably more efficient. +]] +function export.keysToList(t, keySort, checked) + if not checked then + local check = _check("keysToList") + check(1, t, "table") + check(2, keySort, "function", true) + end + + local list = {} + local index = 1 + for key, _ in pairs(t) do + list[index] = key + index = index + 1 + end + + -- Place numbers before strings, otherwise sort using <. + if not keySort then keySort = defaultKeySort end + + table.sort(list, keySort) + + return list +end + +--[[ + Iterates through a table, with the keys sorted using the keysToList function. + If there are only numerical keys, sparseIpairs is probably more efficient. +]] +function export.sortedPairs(t, keySort) + local check = _check("keysToList") + check(1, t, "table") + check(2, keySort, "function", true) + + local list = export.keysToList(t, keySort, true) + + local i = 0 + return function() + i = i + 1 + local key = list[i] + if key ~= nil then + return key, t[key] + else + return nil, nil + end + end +end + +function export.reverseIpairs(list) + checkType("reverse_ipairs", 1, list, "table") + + local i = #list + 1 + return function() + i = i - 1 + if list[i] ~= nil then + return i, list[i] + else + return nil, nil + end + end +end + +--[=[ + Joins an array with serial comma and serial conjunction, normally "and". + An improvement on mw.text.listToText, which doesn't properly handle serial + commas. + + Options: + - conj + Conjunction to use; defaults to "and". + - italicizeConj + Italicize conjunction: for [[Module:Template:also]] + - dontTag + Don't tag the serial comma and serial "and". For error messages, in + which HTML cannot be used. +]=] +function export.serialCommaJoin(seq, options) + local check = _check("serialCommaJoin", "table") + check(1, seq) + check(2, options, true) + + local length = #seq + + if not options then options = {} end + + local conj + if length > 1 then + conj = options.conj or "and" + if options.italicizeConj then conj = "''" .. conj .. "''" end + end + + if length == 0 then + return "" + elseif length == 1 then + return seq[1] -- nothing to join + elseif length == 2 then + return seq[1] .. " " .. conj .. " " .. seq[2] + else + local comma = options.dontTag and "," or "," + conj = options.dontTag and " " .. conj .. " " or " " .. conj .. " " + return table.concat(seq, ", ", 1, length - 1) .. comma .. conj .. seq[length] + end +end + +--[[ + Concatenates all values in the table that are indexed by a number, in order. + sparseConcat{ a, nil, c, d } => "acd" + sparseConcat{ nil, b, c, d } => "bcd" +]] +function export.sparseConcat(t, sep, i, j) + local list = {} + + local list_i = 0 + for _, v in export.sparseIpairs(t) do + list_i = list_i + 1 + list[list_i] = v + end + + return table.concat(list, sep, i, j) +end + +--[[ + Values of numberic keys in array portion of table are reversed: + { "a", "b", "c" } -> { "c", "b", "a" } +--]] +function export.reverse(t) + checkType("reverse", 1, t, "table") + + local new_t = {} + local new_t_i = 1 + for i = #t, 1, -1 do + new_t[new_t_i] = t[i] + new_t_i = new_t_i + 1 + end + return new_t +end + +function export.reverseConcat(t, sep, i, j) return table.concat(export.reverse(t), sep, i, j) end + +-- { "a", "b", "c" } -> { a = 1, b = 2, c = 3 } +function export.invert(array) + checkType("invert", 1, array, "table") + + local map = {} + for i, v in ipairs(array) do map[v] = i end + + return map +end + +--[[ + { "a", "b", "c" } -> { ["a"] = true, ["b"] = true, ["c"] = true } +--]] +function export.listToSet(t) + checkType("listToSet", 1, t, "table") + + local set = {} + for _, item in ipairs(t) do set[item] = true end + return set +end + +--[[ + Returns true if all keys in the table are consecutive integers starting at 1. +--]] +function export.isArray(t) + checkType("isArray", 1, t, "table") + + local i = 0 + for _ in pairs(t) do + i = i + 1 + if t[i] == nil then return false end + end + return true +end + +return export diff --git a/wiktra/wikt/translit/talu-translit.lua b/wiktra/wikt/translit/talu-translit.lua new file mode 100644 index 0000000..63d571a --- /dev/null +++ b/wiktra/wikt/translit/talu-translit.lua @@ -0,0 +1,115 @@ +local export = {} +local gsub = mw.ustring.gsub + +local tt = { + -- consonants + ["ᦀ"] = "˙ʼ", + ["ᦁ"] = "ʼ", + ["ᦂ"] = "k\204\135", + ["ᦃ"] = "x\204\135", + ["ᦄ"] = "n\204\135g", + ["ᦅ"] = "k", + ["ᦆ"] = "x", + ["ᦇ"] = "ng", + ["ᦈ"] = "t\204\135s", + ["ᦉ"] = "s\204\135", + ["ᦊ"] = "y\204\135", + ["ᦋ"] = "ts", + ["ᦌ"] = "s", + ["ᦍ"] = "y", + ["ᦎ"] = "t\204\135", + ["ᦏ"] = "t\204\135h", + ["ᦐ"] = "n\204\135", + ["ᦑ"] = "t", + ["ᦒ"] = "th", + ["ᦓ"] = "n", + ["ᦔ"] = "p\204\135", + ["ᦕ"] = "p\204\135h", + ["ᦖ"] = "m\204\135", + ["ᦗ"] = "p", + ["ᦘ"] = "ph", + ["ᦙ"] = "m", + ["ᦚ"] = "f\204\135", + ["ᦛ"] = "v\204\135", + ["ᦜ"] = "l\204\135", + ["ᦝ"] = "f", + ["ᦞ"] = "v", + ["ᦟ"] = "l", + ["ᦠ"] = "h\204\135", + ["ᦡ"] = "d\204\135", + ["ᦢ"] = "b\204\135", + ["ᦣ"] = "h", + ["ᦤ"] = "d", + ["ᦥ"] = "b", + ["ᦦ"] = "k\204\135w", + ["ᦧ"] = "x\204\135w", + ["ᦨ"] = "kw", + ["ᦩ"] = "xw", + ["ᦪ"] = "s\204\135w", + ["ᦫ"] = "sw", + -- vowels and finals (visual ordering by Unicode 8) + ["ᦰ"] = "!", + ["ᦱ"] = "aa", + ["ᦲ"] = "ii", + ["ᦳ"] = "u", + ["ᦴ"] = "uu", + ["ᦸ"] = "oa", + ["ᦹ"] = "ue", + ["ᦵ"] = "e", + ["ᦶ"] = "ae", + ["ᦷ"] = "o", + ["ᦺ"] = "ay", -- this line to be swapped + ["ᦻ"] = "aay", + ["ᦼ"] = "uy", + ["ᦽ"] = "oy", + ["ᦾ"] = "oay", + ["ᦿ"] = "uey", + ["ᧀ"] = "iiy", + ["ᧁ"] = "w", + ["ᧂ"] = "ng", + ["ᧃ"] = "n", + ["ᧄ"] = "m", + ["ᧅ"] = "k", + ["ᧆ"] = "d", + ["ᧇ"] = "b", + -- tones + ["ᧈ"] = "1", + ["ᧉ"] = "2", + -- numerals + ["᧐"] = "0", + ["᧑"] = "1", + ["᧒"] = "2", + ["᧓"] = "3", + ["᧔"] = "4", + ["᧕"] = "5", + ["᧖"] = "6", + ["᧗"] = "7", + ["᧘"] = "8", + ["᧙"] = "9", + ["᧚"] = "1", + -- ligatures ᧞ ᧟ sorted after ᦶᦜ + ["᧞"] = "l\204\135ae", + ["᧟"] = "l\204\135aew" +} + +function export.tr(text, lang, sc, debug_mode) + + if type(text) == "table" then -- called directly from a template + text = text.args[1] + end + + text = gsub(text, "([ᦵᦶᦷᦺ])([ᦀ-ᦫ])", "%2%1") -- swapped + text = gsub(text, "([ᦀ-ᦫ])([ᧁ-ᧇ])", "%1a%2") + + text = gsub(text, ".", tt) + + text = gsub(text, "aa!", "a") + text = gsub(text, "ii!", "i") + text = gsub(text, "uu!", "u") + text = gsub(text, "eii", "oe") + + return text + +end + +return export diff --git a/wiktra/wikt/translit/te-translit.lua b/wiktra/wikt/translit/te-translit.lua new file mode 100644 index 0000000..dbecf02 --- /dev/null +++ b/wiktra/wikt/translit/te-translit.lua @@ -0,0 +1,75 @@ +local export = {} + +local consonants = {["క"] = "k", ["ఖ"] = "kh", ["గ"] = "g", ["ఘ"] = "gh", ["ఙ"] = "ṅ", ["చ"] = "c", ["ఛ"] = "ch", ["జ"] = "j", ["ఝ"] = "jh", ["ఞ"] = "ñ", ["ట"] = "ṭ", ["ఠ"] = "ṭh", ["డ"] = "ḍ", ["ఢ"] = "ḍh", ["ణ"] = "ṇ", ["త"] = "t", ["థ"] = "th", ["ద"] = "d", ["ధ"] = "dh", ["న"] = "n", ["ప"] = "p", ["ఫ"] = "ph", ["బ"] = "b", ["భ"] = "bh", ["మ"] = "m", ["య"] = "y", ["ర"] = "r", ["ల"] = "l", ["వ"] = "v", ["ళ"] = "ḷ", ["శ"] = "ś", ["ష"] = "ṣ", ["స"] = "s", ["హ"] = "h", ["ఱ"] = "ṛ", ["ౘ"] = "ts", ["ౙ"] = "dz", ["ౚ"] = "ṟ"} + +local diacritics = {["ా"] = "ā", ["ి"] = "i", ["ీ"] = "ī", ["ు"] = "u", ["ూ"] = "ū", ["ృ"] = "r̥", ["ౄ"] = "r̥̄", ["ె"] = "e", ["ే"] = "ē", ["ై"] = "ai", ["ొ"] = "o", ["ో"] = "ō", ["ౌ"] = "au", ["్"] = ""} +local tt = { + -- vowels + ["అ"] = "a", + ["ఆ"] = "ā", + ["ఇ"] = "i", + ["ఈ"] = "ī", + ["ఉ"] = "u", + ["ఊ"] = "ū", + ["ఋ"] = "r̥", + ["ౠ"] = "r̥̄", + ["ఌ"] = "l̥", + ["ౡ"] = "l̥̄", + ["ఎ"] = "e", + ["ఏ"] = "ē", + ["ఐ"] = "ai", + ["ఒ"] = "o", + ["ఓ"] = "ō", + ["ఔ"] = "au", + ["అం"] = "aṅ", + ["అఁ"] = "aṃ", + ["అః"] = "ah", + -- other symbols + ["ం"] = "ṃ", -- anusvara + ["ః"] = "ḥ", -- visarga + ["ఁ"] = "ṅ", -- candrabindu/arthanusvāra/aranusa + ["ఽ"] = "’", -- avagraha + -- digits + ["౦"] = "0", + ["౧"] = "1", + ["౨"] = "2", + ["౩"] = "3", + ["౪"] = "4", + ["౫"] = "5", + ["౬"] = "6", + ["౭"] = "7", + ["౮"] = "8", + ["౯"] = "9", + ["౸"] = "0⁄4", + ["౹"] = "¼", + ["౺"] = "2⁄4", + ["౻"] = "¾", + ["౦"] = "0⁄16", + ["౼"] = "1⁄16", + ["౽"] = "2⁄16", + ["౾"] = "3⁄16" +} + +-- translit any words or phrases +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([కఖగఘఙచఛజఝఞటఠడఢణతథదధనపఫబభమయరలవళశషసహఱౘౙౚ])" .. "([ాిీుూృ̥ౄ̥̄ెేైొోౌ్]?)", function(c, d) + if d == "" then + return consonants[c] .. "a" + else + return consonants[c] .. diacritics[d] + end + end) + + text = mw.ustring.gsub(text, ".", tt) + + -- anusvara + text = mw.ustring.gsub(text, "ṃ([kgṅ])", "ṅ%1") + text = mw.ustring.gsub(text, "ṃ([cjñ])", "ñ%1") + text = mw.ustring.gsub(text, "ṃ([ṭḍṇ])", "ṇ%1") + text = mw.ustring.gsub(text, "ṃ([tdn])", "n%1") + text = mw.ustring.gsub(text, "ṃ([pbm])", "m%1") + + return text +end + +return export diff --git a/wiktra/wikt/translit/tg-translit.lua b/wiktra/wikt/translit/tg-translit.lua new file mode 100644 index 0000000..fe838ae --- /dev/null +++ b/wiktra/wikt/translit/tg-translit.lua @@ -0,0 +1,96 @@ +local export = {} + +local tt = { + ["т"] = "t", + ["Т"] = "T", + ["р"] = "r", + ["Р"] = "R", + ["ф"] = "f", + ["Ф"] = "F", + ["ю"] = "yu", + ["Ю"] = "Yu", + ["ш"] = "š", + ["Ш"] = "Š", + ["ҳ"] = "h", + ["Ҳ"] = "H", + ["ъ"] = "ʾ", + ["Ъ"] = "ʾ", + ["н"] = "n", + ["Н"] = "N", + ["п"] = "p", + ["П"] = "P", + ["й"] = "y", + ["Й"] = "Y", + ["л"] = "l", + ["Л"] = "L", + ["з"] = "z", + ["З"] = "Z", + ["е"] = "e", + ["Е"] = "E", + ["г"] = "g", + ["Г"] = "G", + ["б"] = "b", + ["Б"] = "B", + ["у"] = "u", + ["У"] = "U", + ["с"] = "s", + ["С"] = "S", + ["х"] = "x", + ["Х"] = "X", + ["ч"] = "č", + ["Ч"] = "Č", + ["я"] = "ya", + ["Я"] = "Ya", + ["м"] = "m", + ["М"] = "M", + ["о"] = "o", + ["О"] = "O", + ["и"] = "i", + ["И"] = "I", + ["ё"] = "yo", + ["Ё"] = "Yo", + ["ж"] = "ž", + ["Ж"] = "Ž", + ["к"] = "k", + ["К"] = "K", + ["д"] = "d", + ["Д"] = "D", + ["в"] = "v", + ["В"] = "V", + ["а"] = "a", + ["А"] = "A", + ["ҷ"] = "j", + ["Ҷ"] = "J", + ["ӯ"] = "ü", + ["Ӯ"] = "Ü", + ["э"] = "e", + ["Э"] = "E", + ["ӣ"] = "ī", + ["Ӣ"] = "Ī", + ["қ"] = "q", + ["Қ"] = "Q", + ["ғ"] = "ġ", + ["Ғ"] = "Ġ", + -- dated, removed in the 1998 reform + ["Ц"] = "Ts", + ["ц"] = "ts", -- replaced with "тс", sometimes "с" + ["Щ"] = "Šč", + ["щ"] = "šč", -- replaced with "шч" + ["Ы"] = "Y", + ["ы"] = "y", -- replaced with "и" + ["Ь"] = "'", + ["ь"] = "'" -- removed entirely +}; + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([АОУЕЯЁЮИӢЕЪаоуэяёюиӣе][́̀]?)([ЕеИиӢӣ])", function(a, e) + local iotated = {["е"] = "ye", ["Е"] = "Ye", ["и"] = "yi", ["И"] = "Yi", ["ӣ"] = "yī", ["Ӣ"] = "Yī"} + return a .. iotated[e] + end) + + text:gsub("^Е", "Ye"):gsub("^е", "ye") + + return (mw.ustring.gsub(text, ".", tt)) +end + +return export diff --git a/wiktra/wikt/translit/translit-redirect.lua b/wiktra/wikt/translit/translit-redirect.lua new file mode 100644 index 0000000..d6cf1d7 --- /dev/null +++ b/wiktra/wikt/translit/translit-redirect.lua @@ -0,0 +1,30 @@ +local export = {} + +function export.tr(text, lang, sc, debug_mode) + if not sc then sc = require("scripts").findBestScript(text, require("languages").getByCode(lang)):getCode() end + + local language_data = mw.loadData("translit-redirect/data")[lang] + + if language_data then + local script_data = language_data[sc] + + if script_data then + if script_data.module then + local success, translit_module = pcall(require, "" .. script_data.module) + + if success then + return translit_module.tr(text, lang, sc, debug_mode) + else + error(translit_module) + end + else + return nil + end + else + require("debug").track {"translit-redirect/incorrect-script/" .. lang, "translit-redirect/incorrect-script/" .. lang .. "/" .. sc} + mw.log("script code (" .. sc .. ") for language code " .. lang .. " not found in Module:translit-redirect/data; text: " .. text) + end + end +end + +return export diff --git a/wiktra/wikt/translit/translit-redirect/data.lua b/wiktra/wikt/translit/translit-redirect/data.lua new file mode 100644 index 0000000..381543a --- /dev/null +++ b/wiktra/wikt/translit/translit-redirect/data.lua @@ -0,0 +1,70 @@ +result = { + ["ang"] = {["Latn"] = {}, ["Runr"] = {["module"] = "Runr-translit"}}, + ["arc"] = {["Armi"] = {["module"] = "Armi-translit"}, ["Hebr"] = {}, ["Palm"] = {["module"] = "Palm-translit"}, ["Syrc"] = {}}, + ["byn"] = {["Latn"] = {}, ["Ethi"] = {["module"] = "Ethi-translit"}}, + ["cr"] = {["Latn"] = {}, ["Cans"] = {["module"] = "cr-translit"}}, + ["grc"] = {["polytonic"] = {["module"] = "grc-translit"}, ["Cprt"] = {["module"] = "Cprt-translit"}, ["noError"] = true}, + ["khb"] = { + ["Talu"] = {["module"] = "Talu-translit"}, + ["Lana"] = { + -- ["module"] = "Lana-translit", + } + }, + ["iu"] = {["Cans"] = {["module"] = "iu-translit"}, ["Latn"] = {}}, + ["kyu"] = {["Kali"] = {["module"] = "Kali-translit"}, ["Mymr"] = {}, ["Latn"] = {}}, + ["lzz"] = {["Geor"] = {["module"] = "Geor-translit"}, ["Latn"] = {}}, + ["new"] = {["Deva"] = {["module"] = "new-translit"}, ["Newa"] = {["module"] = "new-Newa-translit"}}, + ["mai"] = {["Deva"] = {["module"] = "mai-translit"}, ["Tirh"] = {["module"] = "mai-Tirh-translit"}, ["Kthi"] = {["module"] = "bho-Kthi-translit"}}, + ["non"] = {["Latn"] = {}, ["Runr"] = {["module"] = "Runr-translit"}}, + ["pa"] = {["debug_mode"] = true, ["Guru"] = {["module"] = "Guru-translit"}, ["pa-Arab"] = {["module"] = "pa-Arab-translit"}}, + ["pal"] = { + ["Latn"] = {}, + ["Phli"] = {["module"] = "Phli-translit"}, + ["Avst"] = {["module"] = "Avst-translit"}, + ["pal-Avst"] = {["module"] = "Avst-translit"}, + ["Mani"] = {["module"] = "Mani-translit"}, + ["Phlv"] = {["module"] = "Phlv-translit"}, + ["Phlp"] = { + -- ["module"] = "Phlp-translit", + } + }, + ["pi"] = {["Brah"] = {["module"] = "Brah-translit"}, ["Deva"] = {["module"] = "sa-translit"}, ["Sinh"] = {["module"] = "si-translit"}, ["Beng"] = {["module"] = "pi-translit"}, ["Mymr"] = {["module"] = "pi-translit"}, ["Thai"] = {["module"] = "pi-translit"}, ["Lana"] = {["module"] = "pi-translit"}, ["Laoo"] = {["module"] = "pi-translit"}, ["Khmr"] = {["module"] = "pi-translit"}}, + ["rhg"] = {["Rohg"] = {["module"] = "Rohg-translit"}, ["Latn"] = {}, ["Arab"] = {}}, + ["qwm"] = {["Latn"] = {}, ["Arab"] = {}, ["Armn"] = {["module"] = "Armn-translit"}}, + ["rmi"] = {["Latn"] = {}, ["Armn"] = {["module"] = "Armn-translit"}}, + ["sa"] = {["Deva"] = {["module"] = "sa-translit"}, ["Brah"] = {["module"] = "Brah-translit"}, ["Gujr"] = {["module"] = "sa-Gujr-translit"}, ["as-Beng"] = {["module"] = "sa-Beng-translit"}, ["Beng"] = {["module"] = "sa-Beng-translit"}, ["Knda"] = {["module"] = "sa-Knda-translit"}, ["Modi"] = {["module"] = "sa-Modi-translit"}, ["Orya"] = {["module"] = "sa-Orya-translit"}, ["Java"] = {["module"] = "sa-Java-translit"}, ["Khmr"] = {["module"] = "pi-translit"}, ["Sinh"] = {["module"] = "si-translit"}, ["Mymr"] = {["module"] = "pi-translit"}, ["Thai"] = {["module"] = "pi-translit"}, ["Lana"] = {["module"] = "pi-translit"}, ["Laoo"] = {["module"] = "pi-translit"}, ["noError"] = true}, + ["udi"] = {["Latn"] = {}, ["Armn"] = {["module"] = "Armn-translit"}, ["Geor"] = {["module"] = "Geor-translit"}, ["Cyrl"] = {["module"] = "udi-translit"}}, + ["xpr"] = {["Mani"] = {["module"] = "Mani-translit"}, ["Latn"] = {}, ["Prti"] = {["module"] = "Prti-translit"}, ["Phlv"] = {}, ["None"] = {}}, + ["sog"] = {["Sogd"] = {["module"] = "Sogd-translit"}, ["Mani"] = {["module"] = "Mani-translit"}, ["Sogo"] = {["module"] = "Sogo-translit"}, ["Syrc"] = {}}, + ["inc-ash"] = {["Brah"] = {["module"] = "Brah-translit"}, ["Khar"] = {["module"] = "Khar-translit"}}, + ["inc-opa"] = {["Guru"] = {["module"] = "Guru-translit"}, ["pa-Arab"] = {["module"] = "pa-Arab-translit"}}, + ["inc-pra"] = {["Brah"] = {["module"] = "Brah-translit"}, ["Deva"] = {["module"] = "inc-pra-Deva-translit"}, ["Knda"] = {["module"] = "inc-pra-Knda-translit"}}, + ["kok"] = { + ["Deva"] = { + -- ["module"] = "kok-translit", + }, + ["Knda"] = {["module"] = "kn-translit"}, + ["Mlym"] = {["module"] = "ml-translit"}, + ["Arab"] = { + -- ["module"] = "kok-Arab-translit", + } + }, + ["omr"] = {["Deva"] = {["module"] = "sa-translit"}, ["Modi"] = {["module"] = "Modi-translit"}}, + ["bho"] = {["Deva"] = {["module"] = "bho-translit"}, ["Kthi"] = {["module"] = "bho-Kthi-translit"}}, + ["wbl"] = { + ["Arab"] = { + -- ["module"] = "Arab-translit", + }, + ["Cyrl"] = {["module"] = "tg-translit"}, + ["Latn"] = {} + }, + ["ks"] = {["ks-Arab"] = {["module"] = "ks-Arab-translit"}, ["Deva"] = {["module"] = "ks-Deva-translit"}, ["Shrd"] = {["module"] = "Shrd-translit"}, ["Latn"] = {}}, + ["xco"] = {["Chrs"] = {["module"] = "Chrs-translit"}}, + ["xpu"] = {["Phnx"] = {["module"] = "Phnx-translit"}, ["Grek"] = {}, ["Latn"] = {}}, + ["phn"] = {["Phnx"] = {["module"] = "Phnx-translit"}}, + ["xsr"] = {["Deva"] = {["module"] = "xsr-deva-translit"}, ["Tibt"] = {["module"] = "xsr-translit"}} +} + +result["kmr"] = result["ku"] + +return result diff --git a/wiktra/wikt/translit/tt-translit.lua b/wiktra/wikt/translit/tt-translit.lua new file mode 100644 index 0000000..b34505f --- /dev/null +++ b/wiktra/wikt/translit/tt-translit.lua @@ -0,0 +1,105 @@ +local export = {} + +local tt = { + ["ү"] = "ü", + ["Ү"] = "Ü", + ["т"] = "t", + ["Т"] = "T", + ["р"] = "r", + ["Р"] = "R", + ["ф"] = "f", + ["Ф"] = "F", + ["ю"] = "yu", + ["Ю"] = "Yu", + ["ш"] = "ş", + ["Ш"] = "Ş", + ["ь"] = "’", + ["Ь"] = "’", + ["ъ"] = "ʺ", + ["Ъ"] = "ʺ", + ["н"] = "n", + ["Н"] = "N", + ["п"] = "p", + ["П"] = "P", + ["й"] = "y", + ["Й"] = "Y", + ["л"] = "l", + ["Л"] = "L", + ["з"] = "z", + ["З"] = "Z", + ["е"] = "e", + ["Е"] = "E", + ["г"] = "g", + ["Г"] = "G", + ["б"] = "b", + ["Б"] = "B", + ["у"] = "u", + ["У"] = "U", + ["с"] = "s", + ["С"] = "S", + ["х"] = "x", + ["Х"] = "X", + ["ч"] = "ç", + ["Ч"] = "Ç", + ["щ"] = "şç", + ["Щ"] = "Şç", + ["я"] = "ya", + ["Я"] = "Ya", + ["ы"] = "ı", + ["Ы"] = "I", + ["э"] = "e", + ["Э"] = "E", + ["м"] = "m", + ["М"] = "M", + ["о"] = "o", + ["О"] = "O", + ["ө"] = "ö", + ["Ө"] = "Ö", + ["и"] = "i", + ["И"] = "İ", + ["ё"] = "yo", + ["Ё"] = "Yo", + ["ж"] = "j", + ["Ж"] = "J", + ["к"] = "k", + ["К"] = "K", + ["д"] = "d", + ["Д"] = "D", + ["в"] = "w", + ["В"] = "W", + ["ц"] = "ts", + ["Ц"] = "Ts", + ["а"] = "a", + ["А"] = "A", + ["ң"] = "ñ", + ["Ң"] = "Ñ", + ["җ"] = "c", + ["Җ"] = "C", + ["һ"] = "h", + ["Һ"] = "H", + ["ә"] = "ä", + ["Ә"] = "Ä" +}; + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "([АОӘУЫЕЯЁЮИЕаоәуыэяёюиеъь%A][́̀]?)([Ее])", function(a, e) return a .. (e == "е" and "ye" or "Ye") end) + + text:gsub("^Е", "Ye"):gsub("^е", "ye"):gsub("ия$", "iyä") -- not last word end handled in code end + + -- Deal with dual nature of к, г, transliterated either to "front" variants + -- k/g or "back" variants q/ğ. The back variants occur before hard signs + -- (Ъ/ъ), which then disappear, and also in the vicinity of the back vowels + -- а/о/у/ы (and their capital equivalents А/О/У/Ы). The code below that + -- handles this appears to say that the sound of word-initial к/г is + -- determined by the following vowel, and the sound of non-word-initial + -- к/г is determined by the preceding vowel. FIXME: Not sure if this is + -- correct. + + local t = {["К"] = "Q", ["к"] = "q", ["Г"] = "Ğ", ["г"] = "ğ"} + text = mw.ustring.gsub(text, "([КкГг])([Ъъ])", function(a, b) return t[a] end) + text = mw.ustring.gsub(text, "(%a?)([КкГг])(.?)", function(b, c, a) return b .. (mw.ustring.match(b > "" and b or a, "[АОУЫаоуы]") and t[c] or tt[c]) .. a end) + + return (mw.ustring.gsub(mw.ustring.gsub(text, "ия%A", "iyä"), ".", tt)) +end + +return export diff --git a/wiktra/wikt/translit/tyv-translit.lua b/wiktra/wikt/translit/tyv-translit.lua new file mode 100644 index 0000000..2b4beff --- /dev/null +++ b/wiktra/wikt/translit/tyv-translit.lua @@ -0,0 +1,101 @@ +local export = {} + +local tab = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Yo", + ["Ж"] = "J", + ["З"] = "Z", + ["И"] = "İ", + ["Й"] = "Y", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["Ң"] = "Ñ", + ["О"] = "O", + ["Ө"] = "Ö", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ү"] = "Ü", + ["Ф"] = "F", + ["Х"] = "X", + ["Ц"] = "Ts", + ["Ч"] = "Ç", + ["Ш"] = "Ş", + ["Щ"] = "Şş", + ["Ъ"] = "ʺ", + ["Ы"] = "I", + ["Ь"] = "ʹ", + ["Э"] = "E", + ["Ю"] = "Yu", + ["Я"] = "Ya", + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "yo", + ["ж"] = "j", + ["з"] = "z", + ["и"] = "i", + ["й"] = "y", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["ң"] = "ñ", + ["о"] = "o", + ["ө"] = "ö", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ү"] = "ü", + ["ф"] = "f", + ["х"] = "x", + ["ц"] = "ts", + ["ч"] = "ç", + ["ш"] = "ş", + ["щ"] = "şş", + ["ъ"] = "ʺ", + ["ы"] = "ı", + ["ь"] = "ʹ", + ["э"] = "e", + ["ю"] = "yu", + ["я"] = "ya" +} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, "Аъ", "À") + text = mw.ustring.gsub(text, "аъ", "à") + text = mw.ustring.gsub(text, "Эъ", "È") + text = mw.ustring.gsub(text, "эъ", "è") + text = mw.ustring.gsub(text, "Оъ", "Ò") + text = mw.ustring.gsub(text, "оъ", "ò") + text = mw.ustring.gsub(text, "Үъ", "Ü'") + text = mw.ustring.gsub(text, "үъ", "ü'") + text = mw.ustring.gsub(text, "Уъ", "Ỳ") + text = mw.ustring.gsub(text, "уъ", "ỳ") + text = mw.ustring.gsub(text, "Иъ", "Ì") + text = mw.ustring.gsub(text, "иъ", "ì") + text = mw.ustring.gsub(text, "Ыъ", "I'") + text = mw.ustring.gsub(text, "ыъ", "ı'") + text = mw.ustring.gsub(text, "Өъ", "Ö'") + text = mw.ustring.gsub(text, "өъ", "ö'") + -- Ё needs converting if is decomposed + text = text:gsub("ё", "ё"):gsub("Ё", "Ё") + + return (mw.ustring.gsub(text, ".", tab)) +end + +return export diff --git a/wiktra/wikt/translit/udi-translit.lua b/wiktra/wikt/translit/udi-translit.lua new file mode 100644 index 0000000..86d5daa --- /dev/null +++ b/wiktra/wikt/translit/udi-translit.lua @@ -0,0 +1,27 @@ +local export = {} + +local tt = {["б"] = "b", ["п"] = "p", ["в"] = "v", ["ф"] = "f", ["м"] = "m", ["б"] = "b", ["д"] = "d", ["т"] = "t", ["ц"] = "c", ["з"] = "z", ["с"] = "s", ["н"] = "n", ["л"] = "l", ["ч"] = "č", ["ж"] = "ž", ["ш"] = "š", ["р"] = "r", ["г"] = "g", ["к"] = "k", ["х"] = "χ", ["й"] = "j", ["и"] = "i", ["у"] = "u", ["е"] = "e", ["о"] = "o", ["а"] = "a", ["ы"] = "ə", ["ҝ"] = "gʲ"}; + +local trigraphs = {["джъ"] = "ǯ:", ["джӏ"] = "ǯ:", ["чӏъ"] = "č̣:"} +local digraphs = {["пӏ"] = "ṗ", ["тӏ"] = "ṭ", ["дз"] = "ʒ", ["цӏ"] = "c̣", ["дж"] = "ǯ", ["чӏ"] = "č̣", ["чъ"] = "č:", ["жъ"] = "ž:", ["жӏ"] = "ž:", ["шъ"] = "š:", ["шӏ"] = "š:", ["кӏ"] = "ḳ", ["гъ"] = "ɣ", ["къ"] = "q̇", ["хъ"] = "q", ["гь"] = "h", ["уь"] = "ü", ["оь"] = "ö", ["аь"] = "ä", ["иӏ"] = "i̱", ["иъ"] = "i̱", ["уӏ"] = "u̱", ["уъ"] = "u̱", ["еӏ"] = "e̱", ["еъ"] = "e̱", ["оӏ"] = "o̱", ["оъ"] = "o̱", ["аӏ"] = "a̱", ["аъ"] = "a̱", ["ыъ"] = "ə̱"} + +function export.tr(text, lang, sc) + if sc ~= "Cyrl" then return nil end + + local str_gsub = string.gsub + local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" + + -- Convert capital to lowercase palochka. Lowercase is found in tables + -- above. + text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF)) + + for trigraph, translit in pairs(trigraphs) do text = str_gsub(text, trigraph, translit) end + + for digraph, translit in pairs(digraphs) do text = str_gsub(text, digraph, translit) end + + text = str_gsub(text, UTF8_char, tt) + + return text +end + +return export diff --git a/wiktra/wikt/translit/udm-translit.lua b/wiktra/wikt/translit/udm-translit.lua new file mode 100644 index 0000000..40562c6 --- /dev/null +++ b/wiktra/wikt/translit/udm-translit.lua @@ -0,0 +1,97 @@ +local export = {} + +local mapping = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Jo", + ["Ж"] = "Ž", + ["Ӝ"] = "Dž", + ["З"] = "Z", + ["Ӟ"] = "Dź", + ["И"] = "I", + ["Ӥ"] = "Ï", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["Ӧ"] = "Ö", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "X", + ["Ц"] = "C", + ["Ч"] = "Ć", + ["Ӵ"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Šč", + ["Ъ"] = "ʺ", + ["Ы"] = "Y", + ["Ь"] = "ʹ", + ["Э"] = "E", + ["Ю"] = "Ju", + ["Я"] = "Ja", + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "jo", + ["ж"] = "ž", + ["ӝ"] = "dž", + ["з"] = "z", + ["ӟ"] = "dź", + ["и"] = "i", + ["ӥ"] = "ï", + ["й"] = "j", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["ӧ"] = "ö", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ў"] = "w", + ["ф"] = "f", + ["х"] = "x", + ["ц"] = "c", + ["ч"] = "ć", + ["ӵ"] = "č", + ["ш"] = "š", + ["щ"] = "šč", + ["ъ"] = "ʺ", + ["ы"] = "y", + ["ь"] = "ʹ", + ["э"] = "e", + ["ю"] = "ju", + ["я"] = "ja" +} + +function export.tr(text, lang, sc) + -- Ё needs converting if is decomposed + text = text:gsub("ё", "ё"):gsub("Ё", "Ё") + + -- е after a vowel or at the beginning of a word becomes je + text = mw.ustring.gsub(text, "([АОӦУЫЕЯЁЮИӤЕЪЬаоӧуыэяёюиӥеъь%A][́̀]?)е", "%1je") + text = mw.ustring.gsub(text, "^Е", "Je") + text = mw.ustring.gsub(text, "^е", "je") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е", "%1Je") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е", "%1je") + + return (mw.ustring.gsub(text, ".", mapping)) +end + +return export diff --git a/wiktra/wikt/translit/ug-translit.lua b/wiktra/wikt/translit/ug-translit.lua new file mode 100644 index 0000000..1dd57a3 --- /dev/null +++ b/wiktra/wikt/translit/ug-translit.lua @@ -0,0 +1,135 @@ +local export = {} + +local data = {} + +data["ug-Arab"] = { + -- consonants + ["م"] = "m", + ["ن"] = "n", + ["د"] = "d", + ["ت"] = "t", + ["ب"] = "b", + ["پ"] = "p", + ["ف"] = "f", + ["ق"] = "q", + ["ك"] = "k", + ["ڭ"] = "ng", + ["گ"] = "g", + ["غ"] = "gh", + ["ع"] = "ğ", + ["ھ"] = "h", + ["خ"] = "x", + ["چ"] = "ch", + ["ج"] = "j", + ["ژ"] = "zh", + ["ز"] = "z", + ["س"] = "s", + ["ش"] = "sh", + ["ر"] = "r", + ["ل"] = "l", + ["ئ"] = "'", + ["ي"] = "y", + ["ۋ"] = "w", + -- vowels + ["ا"] = "a", + ["ە"] = "e", + ["ې"] = "ë", + ["ى"] = "i", + ["و"] = "o", + ["ۆ"] = "ö", + ["ۇ"] = "u", + ["ۈ"] = "ü", + -- punctuation + ["؟"] = "?", + ["،"] = ",", + ["؛"] = ";", + ["ـ"] = "-" +} + +data["Cyrl"] = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "W", + ["Г"] = "G", + ["Ғ"] = "Gh", + ["Д"] = "D", + ["Е"] = "Ë", + ["Ә"] = "E", + ["Ж"] = "Zh", + ["Җ"] = "J", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "Y", + ["К"] = "K", + ["Қ"] = "Q", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["Ң"] = "Ng", + ["О"] = "O", + ["Ө"] = "Ö", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ү"] = "Ü", + ["Ф"] = "F", + ["Х"] = "X", + ["Һ"] = "H", + ["Ч"] = "Ch", + ["Ш"] = "Sh", + ["Ю"] = "Yu", + ["Я"] = "Ya", + ["Э"] = "É", + ["а"] = "a", + ["б"] = "b", + ["в"] = "w", + ["г"] = "g", + ["ғ"] = "gh", + ["д"] = "d", + ["е"] = "ë", + ["ә"] = "e", + ["ж"] = "zh", + ["җ"] = "j", + ["з"] = "z", + ["и"] = "i", + ["й"] = "y", + ["к"] = "k", + ["қ"] = "q", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["ң"] = "ng", + ["о"] = "o", + ["ө"] = "ö", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ү"] = "ü", + ["ф"] = "f", + ["х"] = "x", + ["һ"] = "h", + ["ч"] = "ch", + ["ш"] = "sh", + ["ю"] = "yu", + ["я"] = "ya", + ["э"] = "é" +} + +function export.tr(text, lang, sc) + if not data[sc] then return nil end + + -- remove initial hamza + text = mw.ustring.gsub(text, "^\216\166(.)", "%1") + text = mw.ustring.gsub(text, "%s\216\166(.)", " %1") + + -- transliterate letters one to one + text = mw.ustring.gsub(text, ".", data[sc]) + + return text +end + +return export diff --git a/wiktra/wikt/translit/uga-translit.lua b/wiktra/wikt/translit/uga-translit.lua new file mode 100644 index 0000000..85ada0b --- /dev/null +++ b/wiktra/wikt/translit/uga-translit.lua @@ -0,0 +1,92 @@ +local export = {} + +local tt = { + ["𐎀"] = "ả", + ["𐎁"] = "b", + ["𐎂"] = "g", + ["𐎃"] = "ḫ", + ["𐎄"] = "d", + ["𐎅"] = "h", + ["𐎆"] = "w", + ["𐎇"] = "z", + ["𐎈"] = "ḥ", + ["𐎉"] = "ṭ", + ["𐎊"] = "y", + ["𐎋"] = "k", + ["𐎌"] = "š", + ["𐎍"] = "l", + ["𐎎"] = "m", + ["𐎏"] = "ḏ", + ["𐎐"] = "n", + ["𐎑"] = "ẓ", + ["𐎒"] = "s", + ["𐎓"] = "ʿ", + ["𐎔"] = "p", + ["𐎕"] = "ṣ", + ["𐎖"] = "q", + ["𐎗"] = "r", + ["𐎘"] = "ṯ", + ["𐎙"] = "ġ", + ["𐎚"] = "t", + ["𐎛"] = "ỉ", + ["𐎜"] = "ủ", + ["𐎝"] = "s̀", + ["𐎟"] = " · " -- word divider +} + +local reverse = { + ["ả"] = "𐎀", + ["b"] = "𐎁", + ["g"] = "𐎂", + ["ḫ"] = "𐎃", + ["d"] = "𐎄", + ["h"] = "𐎅", + ["w"] = "𐎆", + ["z"] = "𐎇", + ["ḥ"] = "𐎈", + ["ṭ"] = "𐎉", + ["y"] = "𐎊", + ["k"] = "𐎋", + ["š"] = "𐎌", + ["l"] = "𐎍", + ["m"] = "𐎎", + ["ḏ"] = "𐎏", + ["n"] = "𐎐", + ["ẓ"] = "𐎑", + ["s"] = "𐎒", + ["ʿ"] = "𐎓", + ["p"] = "𐎔", + ["ṣ"] = "𐎕", + ["q"] = "𐎖", + ["r"] = "𐎗", + ["ṯ"] = "𐎘", + ["ġ"] = "𐎙", + ["t"] = "𐎚", + ["ỉ"] = "𐎛", + ["ủ"] = "𐎜", + ["ś"] = "𐎝", + ["·"] = "𐎟" -- word divider +} + +local get_glottal = {["a"] = "ả", ["i"] = "ỉ", ["u"] = "ủ"} + +local aliases = {["a"] = "ả", ["i"] = "ỉ", ["u"] = "ủ", ["θ"] = "ṯ", ["ð"] = "ḏ", ["x"] = "ḫ", ["ẖ"] = "ḫ", ["ɣ"] = "ġ", ["ḡ"] = "ġ", ["ħ"] = "ḥ", ["ḳ"] = "q", ["ḓ"] = "ẓ"} + +function export.tr(text, lang, sc) + text = mw.ustring.gsub(text, ".", tt) + return text +end + +function export.reverse(text) + if type(text) == "table" then text = text.args[1] end + text = mw.ustring.gsub(text, " *[.·] *", "·") + text = mw.ustring.gsub(text, "[ʿˁʕ‘]", "ʿ") + text = mw.ustring.gsub(text, "[ʾˀʔ’]", "ʾ") + text = mw.ustring.gsub(text, "ʾ([aiu])", get_glottal) + text = mw.ustring.gsub(text, "s[̀2₂]", "ś") + text = mw.ustring.gsub(text, "s2", "ś") + text = mw.ustring.gsub(text, ".", reverse) + return text +end + +return export diff --git a/wiktra/wikt/translit/uk-translit.lua b/wiktra/wikt/translit/uk-translit.lua new file mode 100644 index 0000000..2964d18 --- /dev/null +++ b/wiktra/wikt/translit/uk-translit.lua @@ -0,0 +1,153 @@ +local export = {} + +local rsubn = mw.ustring.gsub + +-- version of rsubn() that discards all but the first return value +local function rsub(term, foo, bar) + local retval = rsubn(term, foo, bar) + return retval +end + +local tt = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "V", + ["в"] = "v", + ["Г"] = "H", + ["г"] = "h", + ["Ґ"] = "G", + ["ґ"] = "g", + ["Д"] = "D", + ["д"] = "d", + ["Е"] = "E", + ["е"] = "e", + ["Є"] = "Je", + ["є"] = "je", + ["Ж"] = "Ž", + ["ж"] = "ž", + ["З"] = "Z", + ["з"] = "z", + ["И"] = "Y", + ["и"] = "y", + ["І"] = "I", + ["і"] = "i", + ["Ї"] = "Ji", + ["ї"] = "ji", + ["Й"] = "J", + ["й"] = "j", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["О"] = "O", + ["о"] = "o", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "X", + ["х"] = "x", + ["Ц"] = "C", + ["ц"] = "c", + ["Ч"] = "Č", + ["ч"] = "č", + ["Ш"] = "Š", + ["ш"] = "š", + ["Щ"] = "Šč", + ["щ"] = "šč", + ["Ь"] = "ʹ", + ["ь"] = "ʹ", + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Я"] = "Ja", + ["я"] = "ja", + -- right single quotation mark, modifier letter apostrophe → modifier letter double prime + ["’"] = "ʺ", + ["ʼ"] = "ʺ", + -- obsolete letters, pre-refom + ["Ё"] = "Ë", + ["ё"] = "ë", + ["Ъ"] = "ʺ", + ["ъ"] = "ʺ", + ["Ы"] = "Y", + ["ы"] = "y", + ["Ѣ"] = "Ě", + ["ѣ"] = "ě", + ["Э"] = "È", + ["э"] = "è", + -- obsolete letters, Middle Ukrainian + ["Ѥ"] = "Je", + ["ѥ"] = "je", + ["Ъ"] = "ʺ", + ["ъ"] = "ʺ", + ["Ы"] = "Y", + ["ы"] = "y", + ["Ѣ"] = "Ě", + ["ѣ"] = "ě", + ["Ѧ"] = "Ę", + ["ѧ"] = "ę", + ["Ѩ"] = "Ję", + ["ѩ"] = "ję", + ["Ѫ"] = "Ǫ", + ["ѫ"] = "ǫ", + ["Ѭ"] = "Jǫ", + ["ѭ"] = "jǫ", + ["Ѯ"] = "Ks", + ["ѯ"] = "ks", + ["Ѱ"] = "Ps", + ["Ѱ"] = "ps", + ["Ѳ"] = "F", + ["ѳ"] = "f", + ["Ѵ"] = "I", + ["ѵ"] = "i", + ["Ѡ"] = "O", + ["ѡ"] = "o", + -- Ukrainian style quotes + ["«"] = "“", + ["»"] = "”" +} + +local AC = mw.ustring.char(0x0301) -- acute = ́ +local acute_decomposer = {["á"] = "a" .. AC, ["é"] = "e" .. AC, ["í"] = "i" .. AC, ["ó"] = "o" .. AC, ["ú"] = "u" .. AC, ["ý"] = "y" .. AC, ["Á"] = "A" .. AC, ["É"] = "E" .. AC, ["Í"] = "I" .. AC, ["Ó"] = "O" .. AC, ["Ú"] = "U" .. AC, ["Ý"] = "Y" .. AC} + +function export.tr(text) -- translit any words or phrases + -- Remove word-final hard sign, either utterance-finally or followed by + -- a non-letter character such as space, comma, period, hyphen, etc. + text = rsub(text, "[Ъъ]$", "") + text = rsub(text, "[Ъъ]([%A])", "%1") + + text = rsub(text, "'+", {["'"] = "ʺ"}) -- neutral apostrophe + text = rsub(text, ".", tt) + + return text +end + +function export.reverse_tr(text) -- reverse-translit any words or phrases + local reverse_tt = {} + for k, v in pairs(tt) do reverse_tt[v] = k end + reverse_tt["ʺ"] = "'" + reverse_tt["ʹ"] = "ь" + reverse_tt["y"] = "и" + reverse_tt["Y"] = "И" + text = rsub(text, ".", acute_decomposer) + text = rsub(text, "[Jj][aeiu]", reverse_tt) + text = rsub(text, "[Šš]č", reverse_tt) + text = rsub(text, ".", reverse_tt) + return text +end + +return export diff --git a/wiktra/wikt/translit/ur-translit.lua b/wiktra/wikt/translit/ur-translit.lua new file mode 100644 index 0000000..3f155b5 --- /dev/null +++ b/wiktra/wikt/translit/ur-translit.lua @@ -0,0 +1,263 @@ +local U = mw.ustring.char +local gsub = mw.ustring.gsub +local export = {} + +local zabar = U(0x64E) +local zer = U(0x650) +local pesh = U(0x64F) +local tashdid = U(0x651) -- also called shadda +local jazm = "ْ" +local he = "ہ" + +local consonants = "ببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨوہھٹڈڑ" +local consonantS = "ببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨہھٹڈڑ" +local consonantS2 = "یببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨوہھٹڈڑ" +local vowels = "ایئےۓوؤ" +local hes = "ہح" +local diacritics = "َُِّْٰ" +local ZZP = "َُِ" + +local mapping = { + ["آ"] = "ā", + ["ب"] = "b", + ["پ"] = "p", + ["ت"] = "t", + ["ٹ"] = "ṭ", + ["ث"] = "s", + ["ج"] = "j", + ["چ"] = "c", + ["ح"] = "h", + ["خ"] = "x", + ["د"] = "d", + ["ڈ"] = "ḍ", + ["ذ"] = "z", + ["ر"] = "r", + ["ڑ"] = "ṛ", + ["ز"] = "z", + ["ژ"] = "ž", + ["س"] = "s", + ["ش"] = "ś", + ["ص"] = "s", + ["ض"] = "z", + ["ط"] = "t", + ["ظ"] = "z", + ["غ"] = "ġ", + ["ف"] = "f", + ["ق"] = "q", + ["ک"] = "k", + ["گ"] = "g", + ["ݨ"] = "ṇ", + ["ࣇ"] = "ḷ", + ["ل"] = "l", + ["م"] = "m", + ["ن"] = "n", + ["و"] = "ō", + ["ہ"] = "h", + ["ی"] = "y", + ["ے"] = "ē", + ["۔"] = ".", + ["ں"] = "̃", + + ["ھ"] = "h", + + ["ع"] = "‘", + ["ء"] = "’", + ["ئ"] = "", + ["ؤ"] = "ō", + ["أ"] = "", + + -- diacritics + [zabar] = "a", + [zer] = "i", + [pesh] = "u", + [jazm] = "", -- also sukun - no vowel + [U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner) + + -- ligatures + ["ﻻ"] = "lā", + ["ﷲ"] = "allāh", + + -- kashida + ["ـ"] = "-", -- kashida, no sound + + -- numerals + ["۱"] = "1", + ["۲"] = "2", + ["۳"] = "3", + ["۴"] = "4", + ["۵"] = "5", + ["۶"] = "6", + ["۷"] = "7", + ["۸"] = "8", + ["۹"] = "9", + ["۰"] = "0", + + -- punctuation (leave on separate lines) + ["؟"] = "?", -- question mark + ["،"] = ",", -- comma + ["؛"] = ";", -- semicolon + ["«"] = "“", -- quotation mark + ["»"] = "”", -- quotation mark + ["٪"] = "%", -- percent + ["؉"] = "‰", -- per mille + ["٫"] = ".", -- decimals + ["٬"] = ",", -- thousand + ["ۓ"] = "-ye", + ["ۀ"] = "-e" -- he ye (in ezâfe) +} + +local ain = "ع" +local alif = "ا" +local ye = "ی" +local ye2 = "ئ" +local ye3 = "ے" +local vao = "و" +local aspirate = "ھ" + +function export.tr(text, lang, sc) + + -- EXCEPTIONS - leave as they are, unless they have been sorted out elsewhere + text = gsub(text, "([" .. consonants .. "])" .. ye .. "ں", "%1ē̃") + text = gsub(text, zabar .. aspirate .. "(ی)", "hɛ̄") + text = gsub(text, zabar .. aspirate .. "(و)", "hɔ̄") + text = gsub(text, "ؤ" .. pesh, "ū") + text = gsub(text, "وہ", "vo") + text = gsub(text, alif .. ye2 .. "([" .. consonants .. "])", "ā'ya%1") + + -- Tashdeed + text = gsub(text, "([" .. consonantS2 .. "])" .. tashdid, "%1%1") + text = gsub(text, "([" .. consonantS2 .. "])" .. tashdid .. "([" .. ZZP .. "])", "%1%1%2") + text = gsub(text, "([" .. ZZP .. "])" .. ye .. "([" .. ZZP .. "])" .. tashdid, "%1yy%2") + text = gsub(text, "([" .. ZZP .. "])" .. vao .. "([" .. ZZP .. "])" .. tashdid, "%1vv%2") + -- For some reason the tashdeed gets pushed after the other diacritics, so this line is necessary for tashdeed to work with other diacritics + text = gsub(text, "([" .. consonants .. "])" .. "([" .. ZZP .. "])" .. tashdid, "%1%1%2") + + -- e, instead of i + text = gsub(text, alif .. zer .. ain .. jazm .. "([" .. consonants .. "])" .. zer, "ē‘%1e") + text = gsub(text, jazm .. "([" .. consonants .. "])" .. zer .. "([" .. consonants .. "])" .. alif, "%1e%2ā") + text = gsub(text, alif .. zer .. ain .. jazm, "ē‘") + + -- tanween diacritic + text = gsub(text, "([" .. consonants .. "])" .. "ً" .. alif, "%1an") + text = gsub(text, alif .. "ً", "an") + text = gsub(text, "([" .. consonants .. "])" .. "ً", "%1an") + + -- khari zabar -- + text = gsub(text, "([" .. vowels .. "])" .. "ٰ", "ā") + text = gsub(text, "([" .. consonants .. "])" .. "ٰ" .. "([" .. vowels .. "])", "%1ā") + + -- ‘ain + text = gsub(text, alif .. ain, "ā‘") + text = gsub(text, ain .. alif .. "([" .. consonants .. "])", "‘ā%1") + text = gsub(text, "([" .. consonants .. "])" .. ain .. he, "%1‘a") + text = gsub(text, "([" .. consonants .. "])" .. "([" .. zer .. pesh .. "]?)" .. ain, "%1%2‘") + text = gsub(text, ain .. zabar .. vao .. "([" .. consonants .. "])", "‘ɔ̄%1") + text = gsub(text, ain .. zabar .. ye .. "([" .. consonants .. "])", "‘ɛ̄%1") + text = gsub(text, ain .. zer .. "([" .. consonants .. "])", "‘i%1") + text = gsub(text, ain .. pesh .. "([" .. consonants .. "])", "‘u%1") + text = gsub(text, ain .. zer .. ye .. "([" .. consonants .. "])", "‘ī%1") + text = gsub(text, ain .. pesh .. vao .. "([" .. consonantS .. "])", "‘ū%1") + + -- Vao + text = gsub(text, vao .. "([" .. ZZP .. "])", "v%1") + text = gsub(text, "([" .. consonants .. "])" .. zabar .. vao .. alif, "%1avā") + + -- Fatha Majhool -- + text = gsub(text, "([" .. consonants .. "])" .. zabar .. aspirate .. "([" .. hes .. "])" .. "([" .. ZZP .. "])" .. jazm, "%1hêh%3") + text = gsub(text, "([" .. consonants .. "])" .. zabar .. "([" .. hes .. "])" .. jazm .. "([" .. ZZP .. "])", "%1êh%3") + text = gsub(text, "([" .. consonants .. "])" .. zabar .. "([" .. hes .. "])" .. jazm, "%1êh") + + -- medial/final consonants. + text = gsub(text, zabar .. he .. zer .. ye, "ahī") + text = gsub(text, zabar .. he .. alif, "ahā") + text = gsub(text, zabar .. he .. "([" .. consonants .. vowels .. "])", "ah%1") + text = gsub(text, "([" .. consonants .. "])" .. alif, "%1ā") + text = gsub(text, "([" .. consonants .. "])" .. tashdid .. alif, "%1%1ā") + + text = gsub(text, "([" .. consonants .. "])" .. vao, "%1ō") + text = gsub(text, "([" .. consonants .. "])" .. tashdid .. vao, "%1%1ō") + + text = gsub(text, zer .. ye .. alif, "iyā") + text = gsub(text, "([" .. consonants .. "])" .. ye .. "([" .. consonants .. "])", "%1ē%2") + text = gsub(text, ye2 .. ye, "ï") + text = gsub(text, ye2 .. "ے", "ë") + text = gsub(text, "([" .. consonants .. "])" .. ye .. ye3, "%1ië") + text = gsub(text, alif .. zabar .. ye3, "ɛ̄") + text = gsub(text, "([" .. consonants .. alif .. "])" .. ye2 .. ye, "%1aï") + text = gsub(text, "([" .. consonants .. "])" .. ye2 .. ye3, "%1aë") + text = gsub(text, zabar .. ye3, "ɛ̄") + text = gsub(text, "([" .. consonants .. "])" .. zer .. " ", "%1-e ") + + -- Initial alif + text = gsub(text, "" .. alif .. "([" .. consonantS .. "])", "ā%1") + text = gsub(text, alif .. "([" .. consonantS .. "])", "a%1") + text = gsub(text, alif .. zabar .. "([" .. consonantS .. "])", "a%1") + text = gsub(text, alif .. zabar .. vao .. "([" .. consonants .. "])", "ɔ̄%1") + text = gsub(text, alif .. vao .. "([" .. consonants .. "])", "ō%1") + text = gsub(text, alif .. ye .. "([" .. consonants .. "])", "ai%1") + text = gsub(text, alif .. zabar .. ye .. "([" .. consonants .. "])", "ɛ̄%1") + text = gsub(text, alif .. pesh .. "([" .. consonantS .. "])", "u%1") + text = gsub(text, alif .. zer .. "([" .. consonants .. "])", "i%1") + text = gsub(text, pesh .. vao, "ū") + text = gsub(text, alif .. zer .. ye .. "([" .. consonants .. "])", "ī%1") + + -- do-chashme-he zabar, zer, pesh + text = gsub(text, "([" .. consonants .. "])" .. "([" .. ZZP .. "])" .. aspirate, "%1h%2") + + -- diacritics + text = gsub(text, "([" .. consonants .. "])" .. zabar .. vao, "%1ɔ̄") + text = gsub(text, "([" .. consonants .. "])" .. zabar .. ye, "%1ɛ̄") + text = gsub(text, "([" .. consonants .. "])" .. zabar .. ye3, "%1ɛ̄") + text = gsub(text, "([" .. consonants .. "])" .. ye, "%1ī") + text = gsub(text, "([" .. consonants .. "])" .. zer .. ye, "%1ī") + + -- final he + short vowel disregards the he and transliterates the vowel + text = gsub(text, ye .. he, "ye") + text = gsub(text, "([" .. consonants .. "])" .. he, "%1e") + + -- + text = gsub(text, zabar .. he .. "([" .. ZZP .. "])", "ah%1") + text = gsub(text, zabar .. he, "a") + -- + + text = gsub(text, "ۂ", "a-e") + text = mw.ustring.gsub(text, "ahē", "hɛ̄") + text = mw.ustring.gsub(text, "ahō", "hɔ̄") + text = mw.ustring.gsub(text, ".", mapping) + + text = mw.ustring.gsub(text, "ōā", "vā") + text = mw.ustring.gsub(text, "ɔ̄ā", "vā") + text = mw.ustring.gsub(text, "ōا", "vā") + text = mw.ustring.gsub(text, "ɔ̄aاa", "avā") + text = mw.ustring.gsub(text, "ɔ̄ا", "vā") + text = mw.ustring.gsub(text, "ɔ̄ا", "vā") + + -- Changed these to 'iy(*)', because they will be used for with ی, which are normally written as 'iy' + text = mw.ustring.gsub(text, "īā", "iyā") + text = mw.ustring.gsub(text, "īa", "iya") + -- + + text = mw.ustring.gsub(text, "aا", "ā") + text = mw.ustring.gsub(text, "اē", "ē") + text = mw.ustring.gsub(text, "īا", "yā") + text = mw.ustring.gsub(text, "yا", "yā") + text = mw.ustring.gsub(text, "huō", "hū") + text = mw.ustring.gsub(text, "hiē", "hī") + text = mw.ustring.gsub(text, "êha", "êhê") + + -- vao as a medial consonant + text = mw.ustring.gsub(text, "ūa", "uva") + text = mw.ustring.gsub(text, "ɔ̄([aiu])", "av%1") + -- + + -- Final corrections + text = mw.ustring.gsub(text, "āa", "ā") + text = mw.ustring.gsub(text, "aaa", "ā") + text = mw.ustring.gsub(text, "āā", "ā") + text = mw.ustring.gsub(text, "aa", "ā") + text = mw.ustring.gsub(text, "ë", "ē") + text = mw.ustring.gsub(text, "ï", "ī") + text = mw.ustring.gsub(text, "’alle", "’allāh") + return text +end +return export diff --git a/wiktra/wikt/translit/utilities.lua b/wiktra/wikt/translit/utilities.lua new file mode 100644 index 0000000..6d0bbb9 --- /dev/null +++ b/wiktra/wikt/translit/utilities.lua @@ -0,0 +1,242 @@ +local export = {} + +local data = mw.loadData("utilities/data") +local notneeded = data.notneeded +local neededhassubpage = data.neededhassubpage + +-- A helper function to escape magic characters in a string +-- Magic characters: ^$()%.[]*+-? +function export.pattern_escape(text) + if type(text) == "table" then text = text.args[1] end + text = mw.ustring.gsub(text, "([%^$()%%.%[%]*+%-?])", "%%%1") + return text +end + +function export.plain_gsub(text, pattern, replacement) + local invoked = false + + if type(text) == "table" then + invoked = true + + if text.args then + local frame = text + + local params = {[1] = {}, [2] = {}, [3] = {allow_empty = true}} + + local args = require("parameters").process(frame.args, params) + + text = args[1] + pattern = args[2] + replacement = args[3] + else + error("If the first argument to plain_gsub is a table, it should be a frame object.") + end + else + if not (type(pattern) == "string" or type(pattern) == "number") then error("The second argument to plain_gsub should be a string or a number.") end + + if not (type(replacement) == "string" or type(replacement) == "number") then error("The third argument to plain_gsub should be a string or a number.") end + end + + pattern = export.pattern_escape(pattern) + + if invoked then + text = mw.ustring.gsub(text, pattern, replacement) + return text + else + return mw.ustring.gsub(text, pattern, replacement) + end +end + +--[[ +Format the categories with the appropriate sort key. CATEGORIES is a list of +categories. + -- LANG is an object encapsulating a language; if nil, the object for + language code 'und' (undetermined) will be used. + -- SORT_KEY is placed in the category invocation, and indicates how the + page will sort in the respective category. Normally this should be nil, + and a default sort key based on the subpage name (the part after the + colon) will be used. + -- SORT_BASE lets you override the default sort key used when SORT_KEY is + nil. Normally, this should be nil, and a language-specific default sort + key is computed from the subpage name (e.g. for Russian this converts + Cyrillic ё to a string consisting of Cyrillic е followed by U+10FFFF, + so that effectively ё sorts after е instead of the default Wikimedia + sort, which (I think) is based on Unicode sort order and puts ё after я, + the last letter of the Cyrillic alphabet. + -- FORCE_OUTPUT forces normal output in all namespaces. Normally, nothing + is output if the page isn't in the main, Appendix:, Reconstruction: or + Citations: namespaces. +]] +function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc) + if type(lang) == "table" and not lang.getCode then error("The second argument to format_categories should be a language object.") end + + local title_obj = mw.title.getCurrentTitle() + + if force_output or data.allowedNamespaces[title_obj.nsText] or data.allowedPrefixedPages[title_obj.prefixedText] then + local PAGENAME = title_obj.text + local SUBPAGENAME = title_obj.subpageText + + if not lang then lang = require("languages").getByCode("und") end + + -- Generate a default sort key + sort_base = lang:makeSortKey(sort_base or SUBPAGENAME, sc) + + if sort_key and sort_key ~= "" then + -- Gather some statistics regarding sort keys + if mw.ustring.upper(sort_key) == sort_base then table.insert(categories, "Sort key tracking/redundant") end + else + sort_key = sort_base + end + + -- If the sortkey is empty, remove it. + -- Leave the sortkey if it is equal to PAGENAME, because it still + -- might be different from DEFAULTSORT and therefore have an effect; see + -- [[Wiktionary:Grease pit/2020/April#Module:utilities#format categories]]. + if sort_key == "" then sort_key = nil end + + local out_categories = {} + for key, cat in ipairs(categories) do out_categories[key] = "[[Category:" .. cat .. (sort_key and "|" .. sort_key or "") .. "]]" end + + return table.concat(out_categories, "") + else + return "" + end +end + +-- Used by {{categorize}} +function export.template_categorize(frame) + local NAMESPACE = mw.title.getCurrentTitle().nsText + local format = frame.args["format"] + local args = frame:getParent().args + + local langcode = args[1]; + if langcode == "" then langcode = nil end + local sort_key = args["sort"]; + if sort_key == "" then sort_key = nil end + local categories = {} + + if not langcode then + if NAMESPACE == "Template" then return "" end + error("Language code has not been specified. Please pass parameter 1 to the template.") + end + + local lang = require("languages").getByCode(langcode) + + if not lang then + if NAMESPACE == "Template" then return "" end + error("The language code \"" .. langcode .. "\" is not valid.") + end + + local prefix = "" + + if format == "pos" then + prefix = lang:getCanonicalName() .. " " + elseif format == "topic" then + prefix = lang:getCode() .. ":" + end + + local i = 2 + local cat = args[i] + + while cat do + if cat ~= "" then table.insert(categories, prefix .. cat) end + + i = i + 1 + cat = args[i] + end + + return export.format_categories(categories, lang, sort_key) +end + +function export.catfix(lang, sc) + if not lang then + require("debug").track("catfix/no lang") + return nil + elseif type(lang) ~= "table" then + require("debug").track("catfix/lang not table") + return nil + end + local canonicalName = lang:getCanonicalName() or error("The first argument to the function \"catfix\" should be a language object from Module:languages.") + + if sc and not sc.getCode then error("The second argument to the function \"catfix\" should be a script object from Module:scripts.") end + + -- To add script classes to links on pages created by category boilerplate templates. + if not sc then + sc = data.catfix_scripts[lang:getCode()] + if sc then sc = require("scripts").getByCode(sc) end + end + + return "" .. require("script utilities").tag_text(" ", lang, sc, nil) .. "" +end + +function export.catfix_template(frame) + local params = {[1] = {}, [2] = {alias_of = "sc"}, ["sc"] = {}} + + local args = require("parameters").process(frame:getParent().args, params) + + local lang = require("languages").getByCode(args[1]) or require("languages").err(args[1], 1) + + local sc = args.sc + if sc then sc = require("scripts").getByCode(sc) or error("The script code \"" .. sc .. "\", provided in the second parameter, is not valid.") end + + return export.catfix(lang, sc) +end + +-- Not exporting because it is not used yet. +local function getDateTense(frame) + local name_num_mapping = {["January"] = 1, ["February"] = 2, ["March"] = 3, ["April"] = 4, ["May"] = 5, ["June"] = 6, ["July"] = 7, ["August"] = 8, ["September"] = 9, ["October"] = 10, ["November"] = 11, ["December"] = 12, [1] = 1, [2] = 2, [3] = 3, [4] = 4, [5] = 5, [6] = 6, [7] = 7, [8] = 8, [9] = 9, [10] = 10, [11] = 11, [12] = 12} + local month = name_num_mapping[frame.args[2]] + local date = os.time({year = frame.args[1], day = frame.args[3], month = month}) + local today = os.time() -- 12 AM/PM + local diff = os.difftime(date, today) + local daylength = 24 * 3600 + + if diff < -daylength / 2 then + return "past" + else + if diff > daylength / 2 then + return "future" + else + return "present" + end + end +end + +function export.make_id(lang, str) + --[[ If called with invoke, first argument is a frame object. + If called by a module, first argument is a language object. ]] + local invoked = false + + if type(lang) == "table" then + if lang.args then + invoked = true + + local frame = lang + + local params = {[1] = {}, [2] = {}} + + local args = require("parameters").process(frame:getParent().args, params) + + local langCode = args[1] + str = args[2] + + local m_languages = require("languages") + + lang = m_languages.getByCode(langCode) or m_languages.err(langCode, 1) + elseif not lang.getCanonicalName then + error("The first argument to make_id should be a language object.") + end + end + + if not (type(str) == "string" or type(str) == "number") then error("The second argument to make_id should be a string or a number.") end + + local id = require("senseid").anchor(lang, str) + + if invoked then + return "

  • " + else + return id + end +end + +return export diff --git a/wiktra/wikt/translit/utilities/data.lua b/wiktra/wikt/translit/utilities/data.lua new file mode 100644 index 0000000..f86b455 --- /dev/null +++ b/wiktra/wikt/translit/utilities/data.lua @@ -0,0 +1,41 @@ +local data = {} + +data.notneeded = {["und"] = true, ["cmn"] = true, ["ja"] = true, ["zu"] = true, ["nan"] = true, ["yue"] = true, ["ko"] = true} + +data.neededhassubpage = {["ga"] = true, ["gv"] = true, ["nv"] = true, ["roa-jer"] = true, ["fr"] = true, ["rm"] = true, ["prg"] = true, ["gd"] = true, ["twf"] = true, ["en"] = true, ["ro"] = true, ["egl"] = true, ["roa-tar"] = true, ["gl"] = true, ["ast"] = true, ["br"] = true} + +--[[ + Script that should be applied to links in categories. +]] +data.catfix_scripts = { + ["ab"] = "Cyrl", + ["ae"] = "Avst", + ["ar"] = "Arab", + ["ary"] = "Arab", + ["be"] = "Cyrl", + ["cu"] = "Cyrs", + ["el"] = "Grek", + ["grc"] = "polytonic", + ["he"] = "Hebr", + ["ka"] = "Geor", + ["orv"] = "Cyrs", + ["ru"] = "Cyrl", + ["sa"] = "Deva", + ["syl"] = "Sylo", + ["xfa"] = "Ital", + ["zle-ono"] = "Cyrs", + ["hi"] = "Deva", + ["mr"] = "Deva" + --[[ + [""] = "", +]] +} + +-- Namespaces in which format_categories will add categories. +data.allowedNamespaces = {[""] = true, ["Appendix"] = true, ["Reconstruction"] = true, ["Citations"] = true} + +-- Pages not in namespaces in which format_categories normally adds categories, +-- but where we make an exception. +data.allowedPrefixedPages = {["Wiktionary:Sandbox"] = true} + +return data diff --git a/wiktra/wikt/translit/uum-translit.lua b/wiktra/wikt/translit/uum-translit.lua new file mode 100644 index 0000000..6fb8c05 --- /dev/null +++ b/wiktra/wikt/translit/uum-translit.lua @@ -0,0 +1,18 @@ +local export = {} +local tab = {["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["Ӧ"] = "Ö", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ӱ"] = "Ü", ["Ф"] = "F", ["Х"] = "H", ["Ч"] = "Č", ["Ш"] = "Š", ["Ґ"] = "Ğʺ", ["Ы"] = "Ï", ["Э"] = "E", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["ӧ"] = "ö", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ӱ"] = "ü", ["ф"] = "f", ["х"] = "h", ["ч"] = "č", ["ш"] = "š", ["ґ"] = "ğ", ["ы"] = "ï", ["э"] = "e"} +local mapping = {["дж"] = "dž", ["Дж"] = "Dž"} +function export.tr(text, lang, sc) + -- Ё needs converting if is decomposed + text = text:gsub("ё", "ё"):gsub("Ё", "Ё") + + -- е after a vowel or at the beginning of a text becomes ye + text = mw.ustring.gsub(text, "([АОӦУӰЫЕИЕаоӧуӱыэёие%A][\204\129\204\128]?)е", "%1je") + text = mw.ustring.gsub(text, "^Е", "Je") + text = mw.ustring.gsub(text, "^е", "je") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])Е", "%1je") + text = mw.ustring.gsub(text, "([^Ѐ-ӿ])е", "%1je") + + return (mw.ustring.gsub(text, ".", tab)) +end + +return export diff --git a/wiktra/wikt/translit/wikimedia languages.lua b/wiktra/wikt/translit/wikimedia languages.lua new file mode 100644 index 0000000..c0744f1 --- /dev/null +++ b/wiktra/wikt/translit/wikimedia languages.lua @@ -0,0 +1,60 @@ +local export = {} + +local WikimediaLanguage = {} + +function WikimediaLanguage:getCode() return self._code end + +function WikimediaLanguage:getCanonicalName() return self._rawData.canonicalName end + +-- function WikimediaLanguage:getAllNames() +-- return self._rawData.names +-- end + +function WikimediaLanguage:getType() return "Wikimedia" end + +function WikimediaLanguage:getWiktionaryLanguage() + if not self._wiktionaryLanguageObject then self._wiktionaryLanguageObject = require("languages").getByCode(self._rawData.wiktionary_code) end + + return self._wiktionaryLanguageObject +end + +-- Do NOT use this method! +-- All uses should be pre-approved on the talk page! +function WikimediaLanguage:getRawData() return self._rawData end + +WikimediaLanguage.__index = WikimediaLanguage + +function export.getByCode(code) + -- Only accept codes the software recognises + if not mw.language.isKnownLanguageTag(code) then return nil end + + local rawData = mw.loadData("wikimedia languages/data")[code] + + -- If there is no specific Wikimedia code, then "borrow" the information + -- from the general Wiktionary language code + if not rawData then + local lang = require("languages").getByCode(code) + + if not lang then return nil end + + rawData = {canonicalName = lang:getCanonicalName(), wiktionary_code = code} + elseif not rawData.canonicalName then + rawData = {canonicalName = require("languages").getByCode(rawData.wiktionary_code):getCanonicalName(), wiktionary_code = rawData.wiktionary_code} + end + + return setmetatable({_rawData = rawData, _code = code}, WikimediaLanguage) +end + +function export.getByCodeWithFallback(code) + local object = export.getByCode(code) + + if object then return object end + + local lang = require("languages").getByCode(code) + + if not lang then return nil end + + return lang:getWikimediaLanguages()[1] +end + +return export diff --git a/wiktra/wikt/translit/wikimedia languages/data.lua b/wiktra/wikt/translit/wikimedia languages/data.lua new file mode 100644 index 0000000..f39b119 --- /dev/null +++ b/wiktra/wikt/translit/wikimedia languages/data.lua @@ -0,0 +1,43 @@ +local m = {} + +m["als"] = {wiktionary_code = "gsw"} + +m["bat-smg"] = {wiktionary_code = "sgs"} + +m["bs"] = {canonicalName = "Bosnian", wiktionary_code = "sh"} + +m["bxr"] = {wiktionary_code = "bua"} + +m["diq"] = {wiktionary_code = "zza"} + +m["eml"] = {canonicalName = "Emiliano-Romagnolo", wiktionary_code = "egl"} + +m["fiu-vro"] = {wiktionary_code = "vro"} + +m["hr"] = {canonicalName = "Croatian", wiktionary_code = "sh"} + +m["ksh"] = {wiktionary_code = "gmw-cfr"} + +m["ku"] = {canonicalName = "Kurdish", wiktionary_code = "kmr"} + +m["kv"] = {canonicalName = "Komi", wiktionary_code = "kpv"} + +m["mhr"] = {wiktionary_code = "chm"} + +m["nrm"] = {wiktionary_code = "nrf"} + +m["roa-rup"] = {wiktionary_code = "rup"} + +m["roa-tara"] = {wiktionary_code = "roa-tar"} + +m["simple"] = {canonicalName = "Simple English", wiktionary_code = "en"} + +m["sr"] = {canonicalName = "Serbian", wiktionary_code = "sh"} + +m["zh-classical"] = {wiktionary_code = "ltc"} + +m["zh-min-nan"] = {wiktionary_code = "nan"} + +m["zh-yue"] = {wiktionary_code = "yue"} + +return m diff --git a/wiktra/wikt/translit/writing systems.lua b/wiktra/wikt/translit/writing systems.lua new file mode 100644 index 0000000..1cd89ee --- /dev/null +++ b/wiktra/wikt/translit/writing systems.lua @@ -0,0 +1,33 @@ +local export = {} + +local System = {} + +function System:getCode() return self._code end + +function System:getCanonicalName() return self._rawData.canonicalName end + +function System:getOtherNames() return self._rawData.otherNames or {} end + +-- function System:getAllNames() +-- return self._rawData.names +-- end + +function System:getType() return "Writing system" end + +function System:getCategoryName() return self._rawData.category or mw.getContentLanguage():ucfirst(self:getCanonicalName() .. "s") end + +function System:getRawData() return self._rawData end + +function System:toJSON() + local ret = {canonicalName = self:getCanonicalName(), categoryName = self:getCategoryName(), code = self._code, otherNames = self:getOtherNames(), type = self:getType()} + + return require("JSON").toJSON(ret) +end + +System.__index = System + +function export.makeObject(code, data) return data and setmetatable({_rawData = data, _code = code}, System) or nil end + +function export.getByCode(code) return export.makeObject(code, mw.loadData("writing systems/data")[code]) end + +return export diff --git a/wiktra/wikt/translit/writing systems/data.lua b/wiktra/wikt/translit/writing systems/data.lua new file mode 100644 index 0000000..7ab5915 --- /dev/null +++ b/wiktra/wikt/translit/writing systems/data.lua @@ -0,0 +1,15 @@ +local m = {} + +m["abjad"] = {canonicalName = "abjad", otherNames = {"consonantary", "consonantal alphabet"}} + +m["abugida"] = {canonicalName = "abugida", otherNames = {"alphasyllabary"}} + +m["alphabet"] = {canonicalName = "alphabet", category = "Alphabetic writing systems"} + +m["logography"] = {canonicalName = "logography", otherNames = {"ideography"}, category = "Logographic writing systems"} + +m["pictography"] = {canonicalName = "pictography", category = "Pictographic writing systems"} + +m["syllabary"] = {canonicalName = "syllabary", category = "Syllabaries"} + +return m diff --git a/wikt/translit/xal-translit.lua b/wiktra/wikt/translit/xal-translit.lua similarity index 77% rename from wikt/translit/xal-translit.lua rename to wiktra/wikt/translit/xal-translit.lua index e7086a8..bc610d8 100644 --- a/wikt/translit/xal-translit.lua +++ b/wiktra/wikt/translit/xal-translit.lua @@ -1,5 +1,3 @@ --- This module will transliterate Kalmyk language text per WT:XAL TR. - local export = {} local trtab = { @@ -11,7 +9,7 @@ local trtab = { [0x04BA] = "H" , [0x04BB] = "h" , -- SHHA [0x0414] = "D" , [0x0434] = "d" , -- DE -- IE is handled specially - [0x0401] = "Jo", [0x0451] = "jo", -- IO + [0x0401] = "Yo", [0x0451] = "yo", -- IO [0x0416] = "Ž" , [0x0436] = "ž" , -- ZHE [0x0496] = "J" , [0x0497] = "j" , -- ZHE WITH DESCENDER [0x0417] = "Z" , [0x0437] = "z" , -- ZE @@ -36,7 +34,7 @@ local trtab = { [0x0427] = "Ç" , [0x0447] = "ç" , -- CHE [0x0428] = "Ş" , [0x0448] = "ş" , -- SHA [0x0429] = "Şç", [0x0449] = "şç", -- SHCHA - [0x042A] = "\ʺ", [0x044A] = "\"", -- HARD SIGN + [0x042A] = "ʺ", [0x044A] = "\"", -- HARD SIGN [0x042B] = "Y" , [0x044B] = "y" , -- YERU [0x042C] = "ʹ" , [0x044C] = "'" , -- SOFT SIGN [0x042D] = "E" , [0x044D] = "e" , -- E @@ -72,6 +70,16 @@ local gives_e = { [0x0429] = true, [0x0449] = true -- SHCHA } +local tt_Mong = { + ["ᡃ"] = "ː", ["ᠠ"] = "a", ["ᡄ"] = "e", ["ᡅ"] = "i", ["ᡆ"] = "o", + ["ᡈ"] = "ö", ["ᡇ"] = "u", ["ᡉ"] = "ü", ["ᠨ"] = "n", ["ᡊ"] = "ng", + ["ᡋ"] = "b", ["ᡌ"] = "p", ["ᡍ"] = "x", ["ᡎ"] = "g", ["ᡏ"] = "m", ["ᠯ"] = "l", ["ᠰ"] = "s", ["ᠱ"] = "š", + ["ᡐ"] = "t", ["ᡑ"] = "d", ["ᡒ"] = "ċ", ["ᡓ"] = "j", ["ᡔ"] = "ć", + ["ᡕ"] = "y", ["ᠷ"] = "r", ["ᡖ"] = "w", ["ᠺ"] = "k", ["ᡗ"] = "ģ", ["ᡙ"] = "h", + ["ᡚ"] = "ĵ", ["ᡛ"] = "ń", ["ᡜ"] = "ź", ["ᢘ"] = "t", ["ᢙ"] = "ž", + ["᠎"] = "-", ["︖"] = "?", ["︕"] = "!", ["᠂"] = ",", ["᠃"] = "." +} + function export.tr(text, lang, sc) local result = {} local last = false @@ -94,35 +102,4 @@ function export.tr(text, lang, sc) return table.concat(result) end -return export - - --- Text Expected Actual --- Passed Хальмг Таңһч Xal'mg Tañhç Xal'mg Tañhç --- Passed мөрн mörn mörn --- Passed Әрәсә Äräsä Äräsä --- local tests = require('Module:UnitTests') --- local tr = require('Module:xal-translit').tr - --- local tag = mw.text.tag --- local function tag_Kalmyk(text) --- return tag("span", { class = "Cyrl", lang = "xal" }, text) --- end - --- function tests:check_translit(example, expected) --- self:equals( --- tag_Kalmyk(example), --- tr(example), --- expected) --- end - --- function tests:test_translit() --- local examples = { --- { "Хальмг Таңһч", "Xal'mg Tañhç" }, --- { "мөрн", "mörn" }, --- { "Әрәсә", "Äräsä" }, --- } --- self:iterate(examples, "check_translit") --- end - --- return tests +return export \ No newline at end of file diff --git a/wiktra/wikt/translit/xbc-translit.lua b/wiktra/wikt/translit/xbc-translit.lua new file mode 100644 index 0000000..898dfa2 --- /dev/null +++ b/wiktra/wikt/translit/xbc-translit.lua @@ -0,0 +1,12 @@ +local export = {} + +local mapping = {["α"] = "a", ["β"] = "b", ["γ"] = "g", ["δ"] = "d", ["ε"] = "e", ["ζ"] = "z", ["η"] = "ē", ["θ"] = "θ", ["ι"] = "i", ["κ"] = "k", ["λ"] = "l", ["μ"] = "m", ["ν"] = "n", ["ο"] = "o", ["π"] = "p", ["ρ"] = "r", ["ϸ"] = "š", ["σ"] = "s", ["τ"] = "t", ["υ"] = "u", ["φ"] = "f", ["χ"] = "x", ["ω"] = "ō", ["ς"] = "s", ["Α"] = "A", ["Β"] = "B", ["Γ"] = "G", ["Δ"] = "D", ["Ε"] = "E", ["Ζ"] = "Z", ["Η"] = "Ē", ["Θ"] = "Θ", ["Ι"] = "I", ["Κ"] = "K", ["Λ"] = "L", ["Μ"] = "M", ["Ν"] = "N", ["Ο"] = "O", ["Π"] = "P", ["Ρ"] = "R", ["Ϸ"] = "Š", ["Σ"] = "S", ["Τ"] = "T", ["Υ"] = "U", ["Φ"] = "F", ["Χ"] = "X", ["Ω"] = "Ō"} + +function export.tr(text, lang, sc) + if sc == "Grek" then + text = string.gsub(text, "[\1-\127\194-\244][\128-\191]*", mapping) + return text + end +end + +return export diff --git a/wiktra/wikt/translit/xmr-translit.lua b/wiktra/wikt/translit/xmr-translit.lua new file mode 100644 index 0000000..ab7fef1 --- /dev/null +++ b/wiktra/wikt/translit/xmr-translit.lua @@ -0,0 +1,7 @@ +local export = {} + +local chars = {["𐦠"] = "a", ["𐦀"] = "a", ["𐦡"] = "e", ["𐦁"] = "e", ["𐦣"] = "o", ["𐦃"] = "o", ["𐦢"] = "i", ["𐦂"] = "i", ["𐦤"] = "y", ["𐦄"] = "y", ["𐦥"] = "w", ["𐦅"] = "w", ["𐦦"] = "b", ["𐦆"] = "b", ["𐦧"] = "p", ["𐦈"] = "p", ["𐦨"] = "m", ["𐦉"] = "m", ["𐦩"] = "n", ["𐦊"] = "n", ["𐦪"] = "ne", ["𐦌"] = "ne", ["𐦫"] = "r", ["𐦎"] = "r", ["𐦏"] = "r", ["𐦬"] = "l", ["𐦐"] = "l", ["𐦭"] = "ḫ", ["𐦑"] = "ḫ", ["𐦮"] = "ẖ", ["𐦒"] = "ẖ", ["𐦱"] = "se", ["𐦕"] = "se", ["𐦯"] = "s", ["𐦓"] = "s", ["𐦲"] = "k", ["𐦖"] = "k", ["𐦳"] = "q", ["𐦗"] = "q", ["𐦴"] = "t", ["𐦘"] = "t", ["𐦵"] = "te", ["𐦚"] = "te", ["𐦶"] = "to", ["𐦜"] = "to", ["𐦷"] = "d", ["𐦝"] = "d"} + +function export.tr(text, lang, sc) return (mw.ustring.gsub(text, ".", chars)) end + +return export diff --git a/wiktra/wikt/translit/xsr-deva-translit.lua b/wiktra/wikt/translit/xsr-deva-translit.lua new file mode 100644 index 0000000..e9b83e4 --- /dev/null +++ b/wiktra/wikt/translit/xsr-deva-translit.lua @@ -0,0 +1,153 @@ +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["क"] = "k", + ["ख"] = "kh", + ["ग"] = "g", + ["घ"] = "gh", + ["ङ"] = "ng", + ["च"] = "ts", + ["छ"] = "tsh", + ["ज"] = "dz", + ["झ"] = "dzh", + ["ञ"] = "ny", + ["ट"] = "ṭ", + ["ठ"] = "ṭh", + ["ड"] = "ḍ", + ["ढ"] = "ḍh", + ["ण"] = "ṇ", + ["त"] = "t", + ["थ"] = "th", + ["द"] = "d", + ["ध"] = "dh", + ["न"] = "n", + ["प"] = "p", + ["फ"] = "ph", + ["ब"] = "b", + ["भ"] = "bh", + ["म"] = "m", + ["य"] = "y", + ["र"] = "r", + ["ल"] = "l", + ["व"] = "w", + ["ळ"] = "ḷ", + ["श"] = "sh", + ["ष"] = "ṣ", + ["स"] = "s", + ["ह"] = "h", + ["श़"] = "zh", + ["स़"] = "z", + ["ॽ"] = "'", + ["च़"] = "c", + ["छ़"] = "ch", + + -- + ["क़"] = "q", + ["ख़"] = "x", + ["ग़"] = "ġ", + ["ऴ"] = "ḻ", + ["ज़"] = "z", + ["झ़"] = "ž", + ["ड़"] = "ṛ", + ["ढ़"] = "ṛh", + ["फ़"] = "f", + ["थ़"] = "θ", + ["द़"] = "ð", + ["ऩ"] = "ṉ", + ["ऱ"] = "ṟ", + ["व़"] = "V", + ["ॹ"] = "ž", + -- vowel diacritics + ["ि"] = "i", + ["ु"] = "u", + ["े"] = "e", + ["ो"] = "o", + ["ा"] = "ā", + ["ी"] = "ī", + ["ू"] = "ū", + ["ृ"] = "r̥", + ["ॄ"] = "r̥̄", + ["ॢ"] = "l̥", + ["ॣ"] = "l̥̄", + ["ै"] = "ai", + ["ौ"] = "au", + ["ॉ"] = "ŏ", + ["ॅ"] = "ĕ", + -- vowel signs + ["अ"] = "a", + ["इ"] = "i", + ["उ"] = "u", + ["ए"] = "e", + ["ओ"] = "o", + ["आ"] = "ā", + ["ई"] = "ī", + ["ऊ"] = "ū", + ["ऋ"] = "r̥", + ["ॠ"] = "r̥̄", + ["ऌ"] = "l̥", + ["ॡ"] = "l̥̄", + ["ऐ"] = "ai", + ["औ"] = "au", + ["ऑ"] = "ŏ", + ["ॲ"] = "ĕ", + ["ऍ"] = "ĕ", + -- chandrabindu + ["ँ"] = "̃", + -- anusvara + ["ं"] = "̃", + -- visarga + ["ः"] = "ḥ", + -- virama + ["्"] = "", + + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + -- punctuation + ["।"] = ".", -- danda + ["॥"] = ".", -- double danda + ["+"] = "", -- compound separator + + -- abbreviation sign + ["॰"] = "." +} + +local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "कखगघचछजझटठडढणतथदधनपफबभमयरलवशषसह" +local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊॄॢॣ", "अइउएओआईऊऋॠॡऌऐऔऑऍ" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = length, 1, -1 do table.insert(result, mw.ustring.sub(text, i, i)) end + return table.concat(result) +end +function export.tr(text, lang, sc) + text = gsub(text, "([" .. all_cons .. "]़?)([" .. vowel .. "्]?)", function(c, d) return c .. (d == "" and "a" or d) end) + for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^a(़?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) return (((match(first, "[" .. special_cons .. "]") and match(second, "ं") or match(first, "[" .. special_cons .. "]") and match(second, "्") and not perm_cl[first .. second .. third]) or match(first .. second, "य[aिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]") or match(first .. second, "ह[अaिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]")) and "a" or "") .. opt .. first .. second .. third end) + + local escaped_orig_word = gsub(orig_word, "%+", "") + text = gsub(text, orig_word, rev_string(word)) + text = gsub(text, "च्य", "c") + text = gsub(text, "छ्य", "ch") + text = gsub(text, "ज्य", "j") + text = gsub(text, "झ्य", "jh") + end + text = gsub(text, ".़?", conv) + text = gsub(text, "a([iu])̃", "a͠%1") + text = gsub(text, "[<>]", "") + return mw.ustring.toNFC(text) +end +return export diff --git a/wiktra/wikt/translit/xsr-translit.lua b/wiktra/wikt/translit/xsr-translit.lua new file mode 100644 index 0000000..80d4751 --- /dev/null +++ b/wiktra/wikt/translit/xsr-translit.lua @@ -0,0 +1,169 @@ +--- from Module:bo-translit +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + + ["ཀ"] = "k", + ["ཁ"] = "kh", + ["ག"] = "g", + ["ང"] = "ng", + ["ཅ"] = "c", + ["ཆ"] = "ch", + ["ཇ"] = "j", + ["ཉ"] = "ny", + ["ཏ"] = "t", + ["ཐ"] = "th", + ["ད"] = "d", + ["ན"] = "n", + ["པ"] = "p", + ["ཕ"] = "ph", + ["བ"] = "b", + ["མ"] = "m", + ["ཙ"] = "ts", + ["ཚ"] = "tsh", + ["ཛ"] = "dz", + ["ཝ"] = "w", + ["ཞ"] = "zh", + ["ཟ"] = "z", + ["འ"] = "‘", + ["ཡ"] = "y", + ["ར"] = "r", + ["ལ"] = "l", + ["ཤ"] = "sh", + ["ས"] = "s", + ["ཧ"] = "h", + ["ཨ"] = "a", + ["ཊ"] = "ṭ", + ["ཋ"] = "ṭh", + ["ཌ"] = "ḍ", + ["ཎ"] = "ṇ", + ["ཥ"] = "ṣ", + + ["ི"] = "i", + ["ུ"] = "u", + ["ེ"] = "e", + ["ོ"] = "o", + ["ཱ"] = "ā", + ["ཱི"] = "ī", + ["ཱུ"] = "ū", + ["ཻ"] = "ai", + ["ཽ"] = "au", + ["ྲྀ"] = "ṛ", + ["ཷ"] = "ṝ", + ["ླྀ"] = "ḷ", + ["ཹ"] = "ḹ", + ["ཾ"] = "ṃ", + ["ྃ"] = "ṃ", + ["ཿ"] = "aḥ", + ["ༀ"] = "oṃ", + ["༎"] = ".", + ["།"] = ".", + ["ྈ"] = "x", + ["ྉ"] = "f", + + ["ྐ"] = "k", + ["ྑ"] = "kh", + ["ྒ"] = "g", + ["ྔ"] = "ng", + ["ྕ"] = "c", + ["ྖ"] = "ch", + ["ྗ"] = "j", + ["ྙ"] = "ny", + ["ྟ"] = "t", + ["ྠ"] = "th", + ["ྡ"] = "d", + ["ྣ"] = "n", + ["ྤ"] = "p", + ["ྥ"] = "ph", + ["ྦ"] = "b", + ["ྨ"] = "m", + ["ྩ"] = "ts", + ["ྪ"] = "tsh", + ["ྫ"] = "dz", + ["ྭ"] = "w", + ["ྮ"] = "zh", + ["ྯ"] = "z", + ["ྰ"] = "‘", + ["ྱ"] = "y", + ["ྲ"] = "r", + ["ླ"] = "l", + ["ྴ"] = "sh", + ["ྶ"] = "s", + ["ྷ"] = "h", + ["ྸ"] = "a", + ["ྚ"] = "ṭ", + ["ྛ"] = "ṭh", + ["ྜ"] = "ḍ", + ["ྞ"] = "ṇ", + ["ྵ"] = "ṣ" +} + +local symbol = {["༠"] = "0", ["༡"] = "1", ["༢"] = "2", ["༣"] = "3", ["༤"] = "4", ["༥"] = "5", ["༦"] = "6", ["༧"] = "7", ["༨"] = "8", ["༩"] = "9", ["༪"] = "0.5", ["༫"] = "1.5", ["༬"] = "2.5", ["༭"] = "3.5", ["༮"] = "4.5", ["༯"] = "5.5", ["༰"] = "6.5", ["༱"] = "7.5", ["༲"] = "8.5", ["༳"] = "9.5", ["་"] = " ", ["།"] = "·", ["‘"] = "'"} + +function export.tr(text, lang, sc, debug_mode) + text = gsub(text, "[༌་]+ ?", "་") + text = gsub(text, "[་།]+$", "") + text = gsub(text, "([^་])(འ[ཱིེོིུྲཷླཹཾཿ])", "%1་‘་%2") + + for Tibetan_word in mw.ustring.gmatch(text, "[་-༑ཀ-ྼ]+") do + Tibetan_word = gsub(Tibetan_word, "་$", "") + + for syllable in mw.ustring.gmatch(Tibetan_word, "[ཀ-ྼ]+") do + local tr = {} + + local syl = gsub(syllable, "ཨ([ཱིེོིུྲཷླཹཾཿ])", "%1") + syl = gsub(syl, "([ྐྑྒྔྕྖྗྙྟྠྡྣྤྥྦྨྩྪྫྮྯྭྰྱྲླྴྶྷྸ]+)([^ཱིེོིུྲཷླཹཾཿྐྑྒྔྕྖྗྙྟྠྡྣྤྥྦྨྩྪྫྮྯྭྰྱྲླྴྶྷྸ]*)$", "%1a%2") + letter = mw.text.split(syl, "", true) + + for i = 1, #letter do tr[i] = conv[letter[i]] or letter[i] end + + if not match(syllable, "[ྐྑྒྔྕྖྗྙྟྠྡྣྤྥྦྨྩྪྫྮྯྭྰྱྲླྴྶྷྸིེོུཨཱཱཱིཻཽུྲྀཷླྀཹཾྃཿ]") then + if #letter < 5 then + if #letter > 3 then + if letter[2] ~= "འ" and letter[3] ~= "འ" then table.insert(tr, 3, "a") end + + elseif #letter > 2 then + if letter[3] == "འ" then + if letter[1] == "འ" then + tr = {"‘", tr[2], "a", "‘"} + else + tr = {tr[1], tr[2], "a", "‘"} + end + + elseif not match(letter[1], "[གདབའམ]") or (match(letter[1] .. letter[2] .. letter[3], "[གདབའམ][གངཐབམའརལ]ས")) then + table.insert(tr, 2, "a") + else + table.insert(tr, 3, "a") + end + + elseif match(letter[1], "[གདབའམ]") and not match(letter[2] or "", "[གངདཐབམའརལསན]") then + table.insert(tr, "a") + else + table.insert(tr, 2, "a") + end + end + end + + tr = table.concat(tr) + + if match(syllable, "གཡ") then tr = gsub(tr, "gy", "g.y") end + + text = gsub(text, syllable, tr, 1) + end + end + + text = gsub(text, ".", symbol) + text = gsub(text, " ' ", "") + text = gsub(text, " *· *·? *", " · ") + text = gsub(text, " *%.", ".") + + if match(text, "[ཀ-ྼ]") and not debug_mode then + return nil + else + return text + end +end + +return export diff --git a/wiktra/wikt/translit/xwo-translit.lua b/wiktra/wikt/translit/xwo-translit.lua new file mode 100644 index 0000000..b876283 --- /dev/null +++ b/wiktra/wikt/translit/xwo-translit.lua @@ -0,0 +1,14 @@ +local export = {} +local MACRON = mw.ustring.char(0x0304) + +local tt = {["ᠠ"] = "a", ["ᡄ"] = "e", ["ᡅ"] = "i", ["ᡆ"] = "o", ["ᡇ"] = "u", ["ᡈ"] = "ö", ["ᡉ"] = "ü", ["ᡋ"] = "b", ["ᡌ"] = "p", ["ᡏ"] = "m", ["ᠯ"] = "l", ["ᠰ"] = "s", ["ᠱ"] = "š", ["ᠨ"] = "n", ["ᡍ"] = "x", ["ᡎ"] = "ɣ", ["ᡐ"] = "t", ["ᡑ"] = "d", ["ᡔ"] = "c", ["ᡒ"] = "č", ["ᡓ"] = "ǰ", ["ᡕ"] = "y", ["ᠷ"] = "r", ["ᡖ"] = "w", ["ᡙ"] = "h", ["ᡘ"] = "gh", ["ᡗ"] = "q", ["ᡚ"] = "ž", ["ᡛ"] = "ń", ["ᡜ"] = "dz", ["ᡊ"] = "ng", ["ᠴ"] = "z", ["ᡃ"] = MACRON, ["᠎"] = "-", ["︖"] = "?", ["︕"] = "!", ["᠂"] = ",", ["᠃"] = ".", [" "] = "-", ["᠊"] = "-"} + +function export.tr(text) + local velar_conv = {["x"] = "k", ["ɣ"] = "g"} + text = mw.ustring.gsub(text, ".", tt) + text = mw.ustring.gsub(text, "([xɣ])(.?)", function(velar, vowel) return ((mw.ustring.match(vowel, "[eiöü ]") or vowel == "") and mw.ustring.gsub(velar, "[xɣ]", velar_conv) or velar) .. vowel end) + text = mw.ustring.gsub(text, "zi", "ji") + return text +end + +return export diff --git a/wiktra/wikt/translit/ybh-translit.lua b/wiktra/wikt/translit/ybh-translit.lua new file mode 100644 index 0000000..d9a272f --- /dev/null +++ b/wiktra/wikt/translit/ybh-translit.lua @@ -0,0 +1,160 @@ +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["क"] = "k", + ["ख"] = "kh", + ["ग"] = "g", + ["घ"] = "gh", + ["ङ"] = "ṅ", + ["च"] = "c", + ["छ"] = "ch", + ["ज"] = "j", + ["झ"] = "jh", + ["ञ"] = "ñ", + ["ट"] = "ṭ", + ["ठ"] = "ṭh", + ["ड"] = "ḍ", + ["ढ"] = "ḍh", + ["ण"] = "ṇ", + ["त"] = "t", + ["थ"] = "th", + ["द"] = "d", + ["ध"] = "dh", + ["न"] = "n", + ["प"] = "p", + ["फ"] = "ph", + ["ब"] = "b", + ["भ"] = "bh", + ["म"] = "m", + ["य"] = "y", + ["र"] = "r", + ["ल"] = "l", + ["व"] = "w", + ["श"] = "ś", + ["ष"] = "ṣ", + ["स"] = "s", + ["ह"] = "h", + + ["क़"] = "q", + ["ख़"] = "x", + ["ग़"] = "ġ", + ["ऴ"] = "ḻ", + ["ळ"] = "ḷ", + ["ज़"] = "z", + ["श़"] = "ž", + ["झ़"] = "ž", + ["ड़"] = "ṛ", + ["ढ़"] = "ṛh", + ["फ़"] = "f", + ["थ़"] = "θ", + ["द़"] = "ð", + ["ऩ"] = "ṉ", + ["ऱ"] = "ṟ", + ["ॽ"] = "'", + ["व़"] = "V", + ["ॹ"] = "ž", + -- vowel diacritics + ["ि"] = "i", + ["ु"] = "u", + ["े"] = "e", + ["ो"] = "o", + ["ा"] = "ā", + ["ी"] = "ī", + ["ू"] = "ū", + ["ृ"] = "r̥", + ["ॄ"] = "r̥̄", + ["ॢ"] = "l̥", + ["ॣ"] = "l̥̄", + ["ै"] = "ai", + ["ौ"] = "au", + ["ॉ"] = "ŏ", + ["ॅ"] = "ĕ", + -- vowel signs + ["अ"] = "a", + ["इ"] = "i", + ["उ"] = "u", + ["ए"] = "e", + ["ओ"] = "o", + ["आ"] = "ā", + ["ई"] = "ī", + ["ऊ"] = "ū", + ["ऋ"] = "r̥", + ["ॠ"] = "r̥̄", + ["ऌ"] = "l̥", + ["ॡ"] = "l̥̄", + ["ऐ"] = "ai", + ["औ"] = "au", + ["ऑ"] = "ŏ", + ["ॲ"] = "ĕ", + ["ऍ"] = "ĕ", + -- chandrabindu + ["ँ"] = "̃", + -- anusvara + ["ं"] = "ṃ", + -- visarga (length) + ["ः"] = ":", + -- virama + ["्"] = "", + -- om + ["ॐ"] = "om̐", + -- zero width joiner + ["‍"] = "", + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + -- punctuation + ["।"] = ".", -- danda + ["॥"] = ".", -- double danda + ["+"] = "", -- compound separator + + -- abbreviation sign + ["॰"] = "." +} + +local nasal_assim = {["क"] = "ङ", ["ख"] = "ङ", ["ग"] = "ङ", ["घ"] = "ङ", ["च"] = "ञ", ["छ"] = "ञ", ["ज"] = "ञ", ["झ"] = "ञ", ["ट"] = "ण", ["ठ"] = "ण", ["ड"] = "ण", ["ढ"] = "ण", ["प"] = "म", ["फ"] = "म", ["ब"] = "म", ["भ"] = "म", ["म"] = "म", ["त"] = "न", ["थ"] = "न", ["द"] = "न", ["ध"] = "न", ["न"] = "न", ["ष"] = "न", ["श"] = "ङ", ["स"] = "न", ["य"] = "म", ["र"] = "म", ["ल"] = "ँ", ["व"] = "म", ["ह"] = "ङ"} +local perm_cl = {["ज्न"] = true, ["ज्ञ"] = true, ["ट्र"] = true, ["ड्र"] = true, ["ट्स"] = true, ["ड्स"] = true, ["स्ड"] = true} + +local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "कखगघचछजझटठडढणतथदधनपफबभमयरलवशषसह" +local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊॄॢॣ", "अइउएओआईऊऋॠॡऌऐऔऑऍ" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = length, 1, -1 do table.insert(result, mw.ustring.sub(text, i, i)) end + return table.concat(result) +end +function export.tr(text, lang, sc) + text = gsub(text, "([" .. all_cons .. "]़?)([" .. vowel .. "्]?)", function(c, d) return c .. (d == "" and "a" or d) end) + for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^a(़?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) return (((match(first, "[" .. special_cons .. "]") and match(second, "ं") or match(first, "[" .. special_cons .. "]") and match(second, "्") and not perm_cl[first .. second .. third]) or match(first .. second, "य[aिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]") or match(first .. second, "ह[अaिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]")) and "a" or "a") .. opt .. first .. second .. third end) + + word = gsub(word, "(.?)ं(.)", function(succ, prev) return succ .. (succ .. prev == "a" and "्म" or (succ == "" and match(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "ṃ")) .. prev end) + + local escaped_orig_word = gsub(orig_word, "%+", "") + text = gsub(text, orig_word, rev_string(word)) + text = gsub(text, "ज्ञ", "gy") + text = gsub(text, "े़", "ê") + text = gsub(text, "ए़", "ê") + end + text = gsub(text, ".़?", conv) + text = gsub(text, "a([iu])̃", "a͠%1") + text = gsub(text, "[<>]", "") + text = gsub(text, "ॱ", "") + text = gsub(text, "e़", "ê") + + text = gsub(text, "rh̥", "hr̥") + return mw.ustring.toNFC(text) +end +return export diff --git a/wiktra/wikt/translit/ybi-translit.lua b/wiktra/wikt/translit/ybi-translit.lua new file mode 100644 index 0000000..07d4a97 --- /dev/null +++ b/wiktra/wikt/translit/ybi-translit.lua @@ -0,0 +1,162 @@ +local export = {} +local gsub = mw.ustring.gsub +local match = mw.ustring.match + +local conv = { + -- consonants + ["क"] = "k", + ["ख"] = "kh", + ["ग"] = "g", + ["घ"] = "gh", + ["ङ"] = "ṅ", + ["च"] = "c", + ["छ"] = "ch", + ["ज"] = "j", + ["झ"] = "jh", + ["ञ"] = "ñ", + ["ट"] = "ṭ", + ["ठ"] = "ṭh", + ["ड"] = "ḍ", + ["ढ"] = "ḍh", + ["ण"] = "ṇ", + ["त"] = "t", + ["थ"] = "th", + ["द"] = "d", + ["ध"] = "dh", + ["न"] = "n", + ["प"] = "p", + ["फ"] = "ph", + ["ब"] = "b", + ["भ"] = "bh", + ["म"] = "m", + ["य"] = "y", + ["र"] = "r", + ["ल"] = "l", + ["व"] = "w", + ["श"] = "ś", + ["ष"] = "ṣ", + ["स"] = "s", + ["ह"] = "h", + + ["क़"] = "q", + ["ख़"] = "x", + ["ग़"] = "ġ", + ["ऴ"] = "ḻ", + ["ळ"] = "ḷ", + ["ज़"] = "z", + ["श़"] = "ž", + ["झ़"] = "ž", + ["ड़"] = "ṛ", + ["ढ़"] = "ṛh", + ["फ़"] = "f", + ["थ़"] = "θ", + ["द़"] = "ð", + ["ऩ"] = "ṉ", + ["ऱ"] = "ṟ", + ["ॽ"] = "'", + ["व़"] = "V", + ["ॹ"] = "ž", + -- vowel diacritics + ["ि"] = "i", + ["ु"] = "u", + ["े"] = "e", + ["ो"] = "o", + ["ा"] = "ā", + ["ी"] = "ī", + ["ू"] = "ū", + ["ृ"] = "r̥", + ["ॄ"] = "r̥̄", + ["ॢ"] = "l̥", + ["ॣ"] = "l̥̄", + ["ै"] = "ai", + ["ौ"] = "au", + ["ॉ"] = "ŏ", + ["ॅ"] = "ĕ", + -- vowel signs + ["अ"] = "a", + ["इ"] = "i", + ["उ"] = "u", + ["ए"] = "e", + ["ओ"] = "o", + ["आ"] = "ā", + ["ई"] = "ī", + ["ऊ"] = "ū", + ["ऋ"] = "r̥", + ["ॠ"] = "r̥̄", + ["ऌ"] = "l̥", + ["ॡ"] = "l̥̄", + ["ऐ"] = "ai", + ["औ"] = "au", + ["ऑ"] = "ŏ", + ["ॲ"] = "ĕ", + ["ऍ"] = "ĕ", + -- chandrabindu + ["ँ"] = "̃", + -- anusvara + ["ं"] = "ṃ", + -- visarga (length) + ["ः"] = ":", + -- virama + ["्"] = "", + -- om + ["ॐ"] = "om̐", + -- zero width joiner + ["‍"] = "", + -- zero width non joiner + ["‌"] = "", + -- numerals + ["०"] = "0", + ["१"] = "1", + ["२"] = "2", + ["३"] = "3", + ["४"] = "4", + ["५"] = "5", + ["६"] = "6", + ["७"] = "7", + ["८"] = "8", + ["९"] = "9", + -- punctuation + ["।"] = ".", -- danda + ["॥"] = ".", -- double danda + ["+"] = "", -- compound separator + + -- abbreviation sign + ["॰"] = "." +} + +local nasal_assim = {["क"] = "ङ", ["ख"] = "ङ", ["ग"] = "ङ", ["घ"] = "ङ", ["च"] = "ञ", ["छ"] = "ञ", ["ज"] = "ञ", ["झ"] = "ञ", ["ट"] = "ण", ["ठ"] = "ण", ["ड"] = "ण", ["ढ"] = "ण", ["प"] = "म", ["फ"] = "म", ["ब"] = "म", ["भ"] = "म", ["म"] = "म", ["त"] = "न", ["थ"] = "न", ["द"] = "न", ["ध"] = "न", ["न"] = "न", ["ष"] = "न", ["श"] = "ङ", ["स"] = "न", ["य"] = "म", ["र"] = "म", ["ल"] = "ँ", ["व"] = "म", ["ह"] = "ङ"} +local perm_cl = {["ज्न"] = true, ["ज्ञ"] = true, ["ट्र"] = true, ["ड्र"] = true, ["ट्स"] = true, ["ड्स"] = true, ["स्ड"] = true} + +local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "कखगघचछजझटठडढणतथदधनपफबभमयरलवशषसह" +local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊॄॢॣ", "अइउएओआईऊऋॠॡऌऐऔऑऍ" + +local function rev_string(text) + local result, length = {}, mw.ustring.len(text) + for i = length, 1, -1 do table.insert(result, mw.ustring.sub(text, i, i)) end + return table.concat(result) +end +function export.tr(text, lang, sc) + text = gsub(text, "([" .. all_cons .. "]़?)([" .. vowel .. "्]?)", function(c, d) return c .. (d == "" and "a" or d) end) + for word in mw.ustring.gmatch(text, "[ऀ-ॿa]+") do + local orig_word = word + word = rev_string(word) + word = gsub(word, "^a(़?)([" .. all_cons .. "])(.)(.?)", function(opt, first, second, third) return (((match(first, "[" .. special_cons .. "]") and match(second, "ं") or match(first, "[" .. special_cons .. "]") and match(second, "्") and not perm_cl[first .. second .. third]) or match(first .. second, "य[aिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]") or match(first .. second, "ह[अaिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔ]")) and "a" or "") .. opt .. first .. second .. third end) + + word = gsub(word, "(.?)ं(.)", function(succ, prev) return succ .. (succ .. prev == "a" and "्म" or (succ == "" and match(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "ṃ")) .. prev end) + + local escaped_orig_word = gsub(orig_word, "%+", "") + text = gsub(text, orig_word, rev_string(word)) + text = gsub(text, "ज्ञ", "gy") + text = gsub(text, "े़", "ê") + text = gsub(text, "ए़", "ê") + end + text = gsub(text, ".़?", conv) + text = gsub(text, "a([iu])̃", "a͠%1") + text = gsub(text, "[<>]", "") + text = gsub(text, "ॱ", "") + text = gsub(text, "e़", "ê") + + text = gsub(text, "rh̥", "hr̥") + return mw.ustring.toNFC(text) +end +return export diff --git a/wiktra/wikt/translit/yi-translit.lua b/wiktra/wikt/translit/yi-translit.lua new file mode 100644 index 0000000..60e2b3d --- /dev/null +++ b/wiktra/wikt/translit/yi-translit.lua @@ -0,0 +1,55 @@ +local export = {} + +local tt = {["א"] = "q", ["אָ"] = "o", ["אַ"] = "a", ["בּ"] = "b", ["ב"] = "b", ["בֿ"] = "v", ["גּ"] = "g", ["ג"] = "g", ["גֿ"] = "g", ["דּ"] = "d", ["ד"] = "d", ["דֿ"] = "d", ["ה"] = "H", ["ו"] = "w", ["וּ"] = "u", ["וו"] = "v", ["װ"] = "v", ["וי"] = "oy", ["ױ"] = "oy", ["ז"] = "z", ["ח"] = "kh", ["ט"] = "t", ["י"] = "y", ["יִ"] = "i", ["יִ"] = "i", ["יי"] = "ey", ["ײ"] = "ey", ["ייַ"] = "ay", ["ײַ"] = "ay", ["ײַ"] = "ay", ["כּ"] = "k", ["כ"] = "kh", ["כֿ"] = "kh", ["ךּ"] = "k", ["ך"] = "kh", ["ךֿ"] = "kh", ["ל"] = "l", ["מ"] = "m", ["ם"] = "m", ["נ"] = "n", ["ן"] = "n", ["ס"] = "s", ["ע"] = "e", ["פּ"] = "p", ["פ"] = "F", ["פֿ"] = "f", ["ףּ"] = "p", ["ף"] = "f", ["ףֿ"] = "f", ["צ"] = "ts", ["ץ"] = "ts", ["ק"] = "k", ["ר"] = "r", ["שׁ"] = "sh", ["ש"] = "sh", ["שׂ"] = "s", ["תּ"] = "t", ["ת"] = "s", ["תֿ"] = "s", ["־"] = "-", ["׳"] = "'", ["״"] = "\""} + +-- in precedence order +local tokens = {"ייַ", "אָ", "אַ", "בּ", "בֿ", "גּ", "גֿ", "דּ", "דֿ", "וּ", "וו", "יִ", "יִ", "יי", "ײַ", "וי", "כּ", "כֿ", "ךּ", "ךֿ", "פּ", "פֿ", "ףּ", "ףֿ", "שׁ", "שׂ", "תּ", "תֿ", "א", "ב", "ג", "ד", "ה", "ו", "ױ", "װ", "ז", "ח", "ט", "י", "ײ", "ײַ", "כ", "ך", "ל", "מ", "ם", "נ", "ן", "ס", "ע", "פ", "ף", "צ", "ץ", "ק", "ר", "ש", "ת", "־", "׳", "״"} + +hebrew_only_tokens = {"בֿ", "ח", "כּ", "שׂ", "ת"} + +function export.tr(text, lang, sc) + local hebrew_only = false + for _, token in ipairs(hebrew_only_tokens) do + if string.find(text, token) ~= nil then + hebrew_only = true + break + end + end + + for _, token in ipairs(tokens) do text = string.gsub(text, token, tt[token]) end + + local suffix = text ~= "-" and string.sub(text, 1, 1) == "-" + local prefix = text ~= "-" and string.sub(text, -1, -1) == "-" + + if suffix then text = string.gsub(text, "^-", "-q") end + if prefix then text = string.gsub(text, "-$", "q-") end + text = string.gsub(text, "([bcdfFghHjklmnpqrstvwxz])y$", "%1i") + text = string.gsub(text, "([bcdfFghHjklmnpqrstvwxz])y([^aeiouwy])", "%1i%2") + text = string.gsub(text, "([bcdfFghHjklmnpqrstvwxz])y([^aeiouwy])", "%1i%2") -- repeated to handle overlapping cases + text = string.gsub(text, "([abcdefFghHijklmnopqrstuvxyz])w", "%1u") + hebrew_only = hebrew_only or (string.find(text, "w") ~= nil) + text = string.gsub(text, "w", "v") + hebrew_only = hebrew_only or (string.find(text, "F") ~= nil) + text = string.gsub(text, "F$", "p") + text = string.gsub(text, "F([^a-zFH])", "p%1") + text = string.gsub(text, "F", "f") + text = string.gsub(text, "zsh", "zh") + if suffix then text = string.gsub(text, "^%-q", "-") end + if prefix then text = string.gsub(text, "q%-$", "-") end + text = string.gsub(text, "q([aeo]y)", "%1") + text = string.gsub(text, "q([iu])", "%1") + hebrew_only = hebrew_only or (string.find(text, "q") ~= nil) + text = string.gsub(text, "q", "a") + hebrew_only = hebrew_only or (string.find(text, "H[^aeiou]") ~= nil) or (string.find(text, "H$") ~= nil) + text = string.gsub(text, "H", "h") + + local categories = "" + if hebrew_only then + local namespace = mw.title.getCurrentTitle().nsText + if namespace == "" or namespace == "Appendix" then categories = "[[Category:Requests for transliteration of Yiddish terms with Hebrew-only letters]]" end + end + + return text .. categories +end + +return export diff --git a/wiktra/wikt/translit/ykg-translit.lua b/wiktra/wikt/translit/ykg-translit.lua new file mode 100644 index 0000000..0e7054c --- /dev/null +++ b/wiktra/wikt/translit/ykg-translit.lua @@ -0,0 +1,109 @@ +local u = mw.ustring.char + +local MACRON = u(0x0304) +local DOTABOVE = u(0x0307) +local DOTBELOW = u(0x0323) + +local str_gsub, ugsub = string.gsub, mw.ustring.gsub +local UTF8char = "[\1-\127\194-\244][\128-\191]*" + +local export = {} + +local tab = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "W", + ["в"] = "w", + ["Г"] = "G", + ["г"] = "g", + ["Г̧̧"] = "H", + ["г̧"] = "h", + ["Ғ"] = "H", + ["ғ"] = "h", + ["Ҕ"] = "H", + ["ҕ"] = "h", -- Atlasova, Wikipedia and Kurilov all differ in this + ["Д"] = "D", + ["д"] = "d", + ["И"] = "I", + ["и"] = "i", + ["Й"] = "J", + ["й"] = "j", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["Ӈ"] = "Ŋ", + ["ӈ"] = "ŋ", + ["Ң"] = "Ŋ", + ["ң"] = "ŋ", + ["Ҥ"] = "Ŋ", + ["ҥ"] = "ŋ", -- Same as h above + ["О"] = "O", + ["о"] = "o", + ["Ө"] = "Ö", + ["ө"] = "ö", + ["Ӧ"] = "Ö", + ["ӧ"] = "ö", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["С"] = "S", + ["с"] = "s", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "Q", + ["х"] = "q", + ["Ч"] = "Ț", + ["ч"] = "ț", + ["Э"] = "E", + ["э"] = "e", + + -- non-native letters + ["Е"] = "E", + ["е"] = "e", + ["Ё"] = "Jo", + ["ё"] = "jo", + ["Ж"] = "Z", + ["ж"] = "z", + ["З"] = "Z", + ["з"] = "z", + ["Ц"] = "C", + ["ц"] = "c", + ["Ш"] = "Ş", + ["ш"] = "ş", + ["Щ"] = "Ş", + ["щ"] = "ş", + ["Ы"] = "Y", + ["ы"] = "y", + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Я"] = "Ja", + ["я"] = "ja", + ["Ъ"] = "", + ["ъ"] = "", + ["Ь"] = "", + ["ь"] = "" +} + +local other = {{"Дь", "D̦"}, {"дь", "d̦"}, {"Нь", "Ņ"}, {"нь", "ņ"}, {"Ль", "Ļ"}, {"ль", "ļ"}, {"Г̧", "H"}, {"г̧", "h"}} + +function export.tr(text, lang, sc) + for i, replacement in ipairs(other) do text = str_gsub(text, unpack(replacement)) end + + -- Intrestingly the orthography(ies) don't use Cyrillic palatal vowels + + return (str_gsub(text, UTF8char, tab)) +end + +return export diff --git a/wiktra/wikt/translit/yux-translit.lua b/wiktra/wikt/translit/yux-translit.lua new file mode 100644 index 0000000..80f6531 --- /dev/null +++ b/wiktra/wikt/translit/yux-translit.lua @@ -0,0 +1,97 @@ +local u = mw.ustring.char + +local MACRON = u(0x0304) +local DOTABOVE = u(0x0307) +local DOTBELOW = u(0x0323) + +local str_gsub, ugsub = string.gsub, mw.ustring.gsub +local UTF8char = "[\1-\127\194-\244][\128-\191]*" + +local export = {} + +local tab = { + ["А"] = "A", + ["а"] = "a", + ["Б"] = "B", + ["б"] = "b", + ["В"] = "W", + ["в"] = "w", + ["Г"] = "G", + ["г"] = "g", + ["Ҕ"] = "H", + ["ҕ"] = "h", + ["Д"] = "D", + ["д"] = "d", + ["Ж"] = "Ž", + ["ж"] = "ž", + ["И"] = "I", + ["и"] = "i", + ["Й"] = "J", + ["й"] = "j", + ["К"] = "K", + ["к"] = "k", + ["Л"] = "L", + ["л"] = "l", + ["М"] = "M", + ["м"] = "m", + ["Н"] = "N", + ["н"] = "n", + ["Ҥ"] = "Ŋ", + ["ҥ"] = "ŋ", + ["О"] = "O", + ["о"] = "o", + ["Ө"] = "Ø", + ["ө"] = "ø", + ["П"] = "P", + ["п"] = "p", + ["Р"] = "R", + ["р"] = "r", + ["Т"] = "T", + ["т"] = "t", + ["У"] = "U", + ["у"] = "u", + ["Ф"] = "F", + ["ф"] = "f", + ["Х"] = "Q", + ["х"] = "q", + ["Ч"] = "Č", + ["ч"] = "č", + ["Ш"] = "Š", + ["ш"] = "š", + ["Э"] = "E", + ["э"] = "e", + + -- non-native letters + ["Е"] = "Je", + ["е"] = "je", + ["Ё"] = "Jo", + ["ё"] = "jo", + ["З"] = "Z", + ["з"] = "z", + ["Ц"] = "C", + ["ц"] = "c", + ["Щ"] = "Ś", + ["щ"] = "ś", + ["Ы"] = "Y", + ["ы"] = "y", + ["Ю"] = "Ju", + ["ю"] = "ju", + ["Я"] = "Ja", + ["я"] = "ja", + ["Ъ"] = "ʺ", + ["ъ"] = "ʺ", + ["Ь"] = "ʹ", + ["ь"] = "ʹ", + ["С"] = "S", + ["с"] = "s" +} + +local other = {{"Аа", "Ā"}, {"аа", "ā"}, {"Ээ", "Ē"}, {"ээ", "ē"}, {"Ии", "Ī"}, {"ии", "ī"}, {"Оо", "Ō"}, {"оо", "ō"}, {"Уу", "Ū"}, {"уу", "ū"}, {"Өө", "Ø̄"}, {"өө", "ø̄"}, {"Оу", "Ow"}, {"оу", "ow"}} + +function export.tr(text, lang, sc) + for i, replacement in ipairs(other) do text = str_gsub(text, unpack(replacement)) end + + return (str_gsub(text, UTF8char, tab)) +end + +return export diff --git a/wikt/ustring/README.txt b/wiktra/wikt/ustring/README.txt similarity index 100% rename from wikt/ustring/README.txt rename to wiktra/wikt/ustring/README.txt diff --git a/wikt/ustring/charsets.lua b/wiktra/wikt/ustring/charsets.lua old mode 100755 new mode 100644 similarity index 99% rename from wikt/ustring/charsets.lua rename to wiktra/wikt/ustring/charsets.lua index 5f0f48c..7c0d98d --- a/wikt/ustring/charsets.lua +++ b/wiktra/wikt/ustring/charsets.lua @@ -1800,6 +1800,10 @@ local pats = { [0x00207e] = 1, [0x00208d] = 1, [0x00208e] = 1, + [0x002308] = 1, + [0x002309] = 1, + [0x00230a] = 1, + [0x00230b] = 1, [0x002329] = 1, [0x00232a] = 1, [0x0027c5] = 1, @@ -1954,6 +1958,7 @@ local pats = { [0x00000c] = 1, [0x00000d] = 1, [0x000020] = 1, + [0x000085] = 1, [0x0000a0] = 1, [0x001680] = 1, [0x00180e] = 1, diff --git a/wikt/ustring/lower.lua b/wiktra/wikt/ustring/lower.lua old mode 100755 new mode 100644 similarity index 100% rename from wikt/ustring/lower.lua rename to wiktra/wikt/ustring/lower.lua diff --git a/wikt/ustring/make-normalization-table.php b/wiktra/wikt/ustring/make-normalization-table.php similarity index 100% rename from wikt/ustring/make-normalization-table.php rename to wiktra/wikt/ustring/make-normalization-table.php diff --git a/wikt/ustring/make-tables.php b/wiktra/wikt/ustring/make-tables.php similarity index 100% rename from wikt/ustring/make-tables.php rename to wiktra/wikt/ustring/make-tables.php diff --git a/wikt/ustring/normalization-data.lua b/wiktra/wikt/ustring/normalization-data.lua old mode 100755 new mode 100644 similarity index 98% rename from wikt/ustring/normalization-data.lua rename to wiktra/wikt/ustring/normalization-data.lua index e89a1db..5d110c4 --- a/wikt/ustring/normalization-data.lua +++ b/wiktra/wikt/ustring/normalization-data.lua @@ -1184,6 +1184,12 @@ local normal = { [0x0011c2] = 1, [0x001b35] = 1, [0x011127] = 1, + [0x01133e] = 1, + [0x011357] = 1, + [0x0114b0] = 1, + [0x0114ba] = 1, + [0x0114bd] = 1, + [0x0115af] = 1, }, -- Combining characters, mapped to combining class @@ -1468,6 +1474,7 @@ local normal = { [0x000859] = 220, [0x00085a] = 220, [0x00085b] = 220, + [0x0008e3] = 220, [0x0008e4] = 230, [0x0008e5] = 230, [0x0008e6] = 220, @@ -1495,6 +1502,7 @@ local normal = { [0x0008fc] = 230, [0x0008fd] = 230, [0x0008fe] = 230, + [0x0008ff] = 230, [0x00093c] = 7, [0x00094d] = 9, [0x000951] = 230, @@ -1576,6 +1584,20 @@ local normal = { [0x001a7b] = 230, [0x001a7c] = 230, [0x001a7f] = 220, + [0x001ab0] = 230, + [0x001ab1] = 230, + [0x001ab2] = 230, + [0x001ab3] = 230, + [0x001ab4] = 230, + [0x001ab5] = 220, + [0x001ab6] = 220, + [0x001ab7] = 220, + [0x001ab8] = 220, + [0x001ab9] = 220, + [0x001aba] = 220, + [0x001abb] = 230, + [0x001abc] = 230, + [0x001abd] = 220, [0x001b34] = 7, [0x001b44] = 9, [0x001b6b] = 230, @@ -1618,6 +1640,8 @@ local normal = { [0x001ce8] = 1, [0x001ced] = 220, [0x001cf4] = 230, + [0x001cf8] = 230, + [0x001cf9] = 230, [0x001dc0] = 230, [0x001dc1] = 230, [0x001dc2] = 220, @@ -1657,6 +1681,21 @@ local normal = { [0x001de4] = 230, [0x001de5] = 230, [0x001de6] = 230, + [0x001de7] = 230, + [0x001de8] = 230, + [0x001de9] = 230, + [0x001dea] = 230, + [0x001deb] = 230, + [0x001dec] = 230, + [0x001ded] = 230, + [0x001dee] = 230, + [0x001def] = 230, + [0x001df0] = 230, + [0x001df1] = 230, + [0x001df2] = 230, + [0x001df3] = 230, + [0x001df4] = 230, + [0x001df5] = 230, [0x001dfc] = 233, [0x001dfd] = 220, [0x001dfe] = 230, @@ -1742,6 +1781,7 @@ local normal = { [0x00a67b] = 230, [0x00a67c] = 230, [0x00a67d] = 230, + [0x00a69e] = 230, [0x00a69f] = 230, [0x00a6f0] = 230, [0x00a6f1] = 230, @@ -1790,14 +1830,32 @@ local normal = { [0x00fe24] = 230, [0x00fe25] = 230, [0x00fe26] = 230, + [0x00fe27] = 220, + [0x00fe28] = 220, + [0x00fe29] = 220, + [0x00fe2a] = 220, + [0x00fe2b] = 220, + [0x00fe2c] = 220, + [0x00fe2d] = 220, + [0x00fe2e] = 230, + [0x00fe2f] = 230, [0x0101fd] = 220, + [0x0102e0] = 220, + [0x010376] = 230, + [0x010377] = 230, + [0x010378] = 230, + [0x010379] = 230, + [0x01037a] = 230, [0x010a0d] = 220, [0x010a0f] = 230, [0x010a38] = 230, [0x010a39] = 1, [0x010a3a] = 220, [0x010a3f] = 9, + [0x010ae5] = 230, + [0x010ae6] = 220, [0x011046] = 9, + [0x01107f] = 9, [0x0110b9] = 9, [0x0110ba] = 7, [0x011100] = 230, @@ -1805,9 +1863,48 @@ local normal = { [0x011102] = 230, [0x011133] = 9, [0x011134] = 9, + [0x011173] = 7, [0x0111c0] = 9, + [0x0111ca] = 7, + [0x011235] = 9, + [0x011236] = 7, + [0x0112e9] = 7, + [0x0112ea] = 9, + [0x01133c] = 7, + [0x01134d] = 9, + [0x011366] = 230, + [0x011367] = 230, + [0x011368] = 230, + [0x011369] = 230, + [0x01136a] = 230, + [0x01136b] = 230, + [0x01136c] = 230, + [0x011370] = 230, + [0x011371] = 230, + [0x011372] = 230, + [0x011373] = 230, + [0x011374] = 230, + [0x0114c2] = 9, + [0x0114c3] = 7, + [0x0115bf] = 9, + [0x0115c0] = 7, + [0x01163f] = 9, [0x0116b6] = 9, [0x0116b7] = 7, + [0x01172b] = 9, + [0x016af0] = 1, + [0x016af1] = 1, + [0x016af2] = 1, + [0x016af3] = 1, + [0x016af4] = 1, + [0x016b30] = 230, + [0x016b31] = 230, + [0x016b32] = 230, + [0x016b33] = 230, + [0x016b34] = 230, + [0x016b35] = 230, + [0x016b36] = 230, + [0x01bc9e] = 1, [0x01d165] = 216, [0x01d166] = 216, [0x01d167] = 1, @@ -1841,6 +1938,13 @@ local normal = { [0x01d242] = 230, [0x01d243] = 230, [0x01d244] = 230, + [0x01e8d0] = 220, + [0x01e8d1] = 220, + [0x01e8d2] = 220, + [0x01e8d3] = 220, + [0x01e8d4] = 220, + [0x01e8d5] = 220, + [0x01e8d6] = 220, }, -- Characters mapped to what they decompose to @@ -3344,6 +3448,13 @@ local normal = { [0x0110ab] = { 0x0110a5, 0x0110ba }, [0x01112e] = { 0x011131, 0x011127 }, [0x01112f] = { 0x011132, 0x011127 }, + [0x01134b] = { 0x011347, 0x01133e }, + [0x01134c] = { 0x011347, 0x011357 }, + [0x0114bb] = { 0x0114b9, 0x0114ba }, + [0x0114bc] = { 0x0114b9, 0x0114b0 }, + [0x0114be] = { 0x0114b9, 0x0114bd }, + [0x0115ba] = { 0x0115b8, 0x0115af }, + [0x0115bb] = { 0x0115b9, 0x0115af }, [0x01d15e] = { 0x01d157, 0x01d165 }, [0x01d15f] = { 0x01d158, 0x01d165 }, [0x01d160] = { 0x01d158, 0x01d165, 0x01d16e }, @@ -5254,9 +5365,15 @@ local normal = { [0x0033fd] = { 0x000033, 0x000030, 0x0065e5 }, [0x0033fe] = { 0x000033, 0x000031, 0x0065e5 }, [0x0033ff] = { 0x000067, 0x000061, 0x00006c }, + [0x00a69c] = { 0x00044a }, + [0x00a69d] = { 0x00044c }, [0x00a770] = { 0x00a76f }, [0x00a7f8] = { 0x000126 }, [0x00a7f9] = { 0x000153 }, + [0x00ab5c] = { 0x00a727 }, + [0x00ab5d] = { 0x00ab37 }, + [0x00ab5e] = { 0x00026b }, + [0x00ab5f] = { 0x00ab52 }, [0x00fb00] = { 0x000066, 0x000066 }, [0x00fb01] = { 0x000066, 0x000069 }, [0x00fb02] = { 0x000066, 0x00006c }, @@ -9255,6 +9372,21 @@ local normal = { [0x011132] = { [0x011127] = 0x01112f, }, + [0x011347] = { + [0x01133e] = 0x01134b, + [0x011357] = 0x01134c, + }, + [0x0114b9] = { + [0x0114b0] = 0x0114bc, + [0x0114ba] = 0x0114bb, + [0x0114bd] = 0x0114be, + }, + [0x0115b8] = { + [0x0115af] = 0x0115ba, + }, + [0x0115b9] = { + [0x0115af] = 0x0115bb, + }, }, } diff --git a/wikt/ustring/string.lua b/wiktra/wikt/ustring/string.lua old mode 100755 new mode 100644 similarity index 100% rename from wikt/ustring/string.lua rename to wiktra/wikt/ustring/string.lua diff --git a/wikt/ustring/upper.lua b/wiktra/wikt/ustring/upper.lua old mode 100755 new mode 100644 similarity index 100% rename from wikt/ustring/upper.lua rename to wiktra/wikt/ustring/upper.lua diff --git a/wikt/ustring/ustring.lua b/wiktra/wikt/ustring/ustring.lua old mode 100755 new mode 100644 similarity index 96% rename from wikt/ustring/ustring.lua rename to wiktra/wikt/ustring/ustring.lua index d27cd92..994ae81 --- a/wikt/ustring/ustring.lua +++ b/wiktra/wikt/ustring/ustring.lua @@ -1,6 +1,6 @@ local ustring = {} -unpack = table.unpack +local unpack = table.unpack or unpack -- Copy these, just in case local S = { @@ -404,7 +404,7 @@ end -- @return string function ustring.upper( s ) checkString( 'upper', s ) - local map = require 'wikt.ustring.upper'; + local map = require 'ustring/upper'; local ret = S.gsub( s, '([^\128-\191][\128-\191]*)', map ) return ret end @@ -416,7 +416,7 @@ end -- @return string function ustring.lower( s ) checkString( 'lower', s ) - local map = require 'wikt.ustring.lower'; + local map = require 'ustring/lower'; local ret = S.gsub( s, '([^\128-\191][\128-\191]*)', map ) return ret end @@ -443,7 +443,7 @@ setmetatable( charset_cache, { __weak = 'kv' } ) -- @return int ending index of the match -- @return string|int* captures local function find( s, cps, rawpat, pattern, init, noAnchor ) - local charsets = require 'wikt.ustring.charsets' + local charsets = require 'ustring/charsets' local anchor = false local ncapt, captures local captparen = {} @@ -1004,15 +1004,20 @@ function ustring.gsub( s, pattern, repl, n ) ret[#ret + 1] = sub( s, cps, init, m[1] - 1 ) end local mm = sub( s, cps, m[1], m[2] ) - local val + + -- This simplifies the code for the function and table cases (tp == 1 and tp == 2) when there are + -- no captures in the pattern. As documented it would be incorrect for the string case by making + -- %1 act like %0 instead of raising an "invalid capture index" error, but Lua in fact does + -- exactly that for string.gsub. + if #m < 3 then + m[3] = mm + end + + local val, valType if tp == 1 then - if m[3] then - val = repl( unpack( m, 3 ) ) - else - val = repl( mm ) - end + val = repl( unpack( m, 3 ) ) elseif tp == 2 then - val = repl[m[3] or mm] + val = repl[m[3]] elseif tp == 3 then if ct == 0 and #m < 11 then local ss = S.gsub( repl, '%%[%%0-' .. ( #m - 2 ) .. ']', 'x' ) @@ -1036,6 +1041,10 @@ function ustring.gsub( s, pattern, repl, n ) } val = S.gsub( repl, '%%[%%0-9]', t ) end + valType = type( val ) + if valType ~= 'nil' and valType ~= 'string' and valType ~= 'number' then + error( 'invalid replacement value (a ' .. valType .. ')', 2 ) + end ret[#ret + 1] = val or mm init = m[2] + 1 ct = ct + 1 @@ -1052,7 +1061,7 @@ end local function internalDecompose( cps, decomp ) local cp = {} - local normal = require 'wikt.ustring.normalization-data' + local normal = require 'ustring/normalization-data' -- Decompose into cp, using the lookup table and logic for hangul for i = 1, cps.len do @@ -1088,7 +1097,7 @@ local function internalDecompose( cps, decomp ) end local function internalCompose( cp, _, l ) - local normal = require 'wikt.ustring.normalization-data' + local normal = require 'ustring/normalization-data' -- Since NFD->NFC can never expand a character sequence, we can do this -- in-place. @@ -1149,7 +1158,7 @@ function ustring.toNFC( s ) if cps == nil then return nil end - local normal = require 'wikt.ustring.normalization-data' + local normal = require 'ustring/normalization-data' -- First, scan through to see if the string is definitely already NFC local ok = true @@ -1188,7 +1197,7 @@ function ustring.toNFD( s ) return nil end - local normal = require 'wikt.ustring.normalization-data' + local normal = require 'ustring/normalization-data' return internalChar( internalDecompose( cps, normal.decomp ) ) end @@ -1211,7 +1220,7 @@ function ustring.toNFKC( s ) if cps == nil then return nil end - local normal = require 'wikt.ustring.normalization-data' + local normal = require 'ustring/normalization-data' -- Next, expand to NFKD then recompose return internalChar( internalCompose( internalDecompose( cps, normal.decompK ) ) ) @@ -1237,7 +1246,7 @@ function ustring.toNFKD( s ) return nil end - local normal = require 'wikt.ustring.normalization-data' + local normal = require 'ustring/normalization-data' return internalChar( internalDecompose( cps, normal.decompK ) ) end