Skip to content

Commit

Permalink
Adds partial language understanding (#630)
Browse files Browse the repository at this point in the history
  • Loading branch information
MrMelbert authored Dec 22, 2024
1 parent 06ad29c commit 43fe884
Show file tree
Hide file tree
Showing 14 changed files with 207 additions and 69 deletions.
9 changes: 9 additions & 0 deletions code/_globalvars/lists/flavor_misc.dm
Original file line number Diff line number Diff line change
Expand Up @@ -307,3 +307,12 @@ GLOBAL_LIST_INIT(status_display_state_pictures, list(
"blank",
"shuttle",
))

GLOBAL_LIST_INIT(most_common_words, init_common_words())

/proc/init_common_words()
. = list()
var/i = 1
for(var/word in world.file2list("strings/1000_most_common.txt"))
.[word] = i
i += 1
7 changes: 1 addition & 6 deletions code/controllers/subsystem/discord.dm
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,13 @@ SUBSYSTEM_DEF(discord)
/// People who have tried to verify this round already
var/list/reverify_cache

/// Common words list, used to generate one time tokens
var/list/common_words

/// The file where notification status is saved
var/notify_file = file("data/notify.json")

/// Is TGS enabled (If not we won't fire because otherwise this is useless)
var/enabled = FALSE

/datum/controller/subsystem/discord/Initialize()
common_words = world.file2list("strings/1000_most_common.txt")
reverify_cache = list()
// Check for if we are using TGS, otherwise return and disables firing
if(world.TgsAvailable())
Expand Down Expand Up @@ -156,7 +152,7 @@ SUBSYSTEM_DEF(discord)
// While there's a collision in the token, generate a new one (should rarely happen)
while(not_unique)
//Column is varchar 100, so we trim just in case someone does us the dirty later
one_time_token = trim("[pick(common_words)]-[pick(common_words)]-[pick(common_words)]-[pick(common_words)]-[pick(common_words)]-[pick(common_words)]", 100)
one_time_token = trim("[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]", 100)

not_unique = find_discord_link_by_token(one_time_token, timebound = TRUE)

Expand Down Expand Up @@ -298,4 +294,3 @@ SUBSYSTEM_DEF(discord)
if (length(discord_mention_extraction_regex.group) == 1)
return discord_mention_extraction_regex.group[1]
return null

4 changes: 1 addition & 3 deletions code/datums/brain_damage/mild.dm
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,6 @@
gain_text = span_warning("You lose your grasp on complex words.")
lose_text = span_notice("You feel your vocabulary returning to normal again.")

var/static/list/common_words = world.file2list("strings/1000_most_common.txt")

/datum/brain_trauma/mild/expressive_aphasia/handle_speech(datum/source, list/speech_args)
var/message = speech_args[SPEECH_MESSAGE]
if(message)
Expand All @@ -212,7 +210,7 @@
word = copytext(word, 1, suffix_foundon)
word = html_decode(word)

if(lowertext(word) in common_words)
if(GLOB.most_common_words[lowertext(word)])
new_message += word + suffix
else
if(prob(30) && message_split.len > 2)
Expand Down
4 changes: 4 additions & 0 deletions code/game/atoms_movable.dm
Original file line number Diff line number Diff line change
Expand Up @@ -1521,6 +1521,10 @@
/atom/movable/proc/get_random_understood_language()
return get_language_holder().get_random_understood_language()

/// Gets a list of all mutually understood languages.
/atom/movable/proc/get_mutually_understood_languages()
return get_language_holder().get_mutually_understood_languages()

/// Gets a random spoken language, useful for forced speech and such.
/atom/movable/proc/get_random_spoken_language()
return get_language_holder().get_random_spoken_language()
Expand Down
2 changes: 1 addition & 1 deletion code/game/machinery/telecomms/computers/logbrowser.dm
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
message_out = "\"[message_in]\""
else if(!user.has_language(language))
// Language unknown: scramble
message_out = "\"[language_instance.scramble(message_in)]\""
message_out = "\"[language_instance.scramble_sentence(message_in, user.get_mutually_understood_languages())]\""
else
message_out = "(Unintelligible)"
packet_out["message"] = message_out
Expand Down
2 changes: 1 addition & 1 deletion code/game/say.dm
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ GLOBAL_LIST_INIT(freqtospan, list(

if(!has_language(language))
var/datum/language/dialect = GLOB.language_datum_instances[language]
raw_message = dialect.scramble(raw_message)
raw_message = dialect.scramble_sentence(raw_message, get_mutually_understood_languages())

return raw_message

Expand Down
191 changes: 148 additions & 43 deletions code/modules/language/_language.dm
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
/// maximum of 50 specific scrambled lines per language
/// Last 50 spoken (uncommon) words will be cached before we start cycling them out (re-randomizing them)
#define SCRAMBLE_CACHE_LEN 50
/// Last 20 spoken sentences will be cached before we start cycling them out (re-randomizing them)
#define SENTENCE_CACHE_LEN 20

/// Datum based languages. Easily editable and modular.
/datum/language
Expand All @@ -18,13 +20,23 @@
var/list/special_characters
/// Likelihood of making a new sentence after each syllable.
var/sentence_chance = 5
/// Likelihood of making a new sentence after each word.
var/between_word_sentence_chance = 0
/// Likelihood of getting a space in the random scramble string
var/space_chance = 55
/// Likelyhood of getting a space between words
var/between_word_space_chance = 100
/// Spans to apply from this language
var/list/spans
/// Cache of recently scrambled text
/// This allows commonly reused words to not require a full re-scramble every time.
var/list/scramble_cache = list()
/// Cache of recently spoken sentences
/// So if one person speaks over the radio, everyone hears the same thing.
var/list/last_sentence_cache = list()
/// The 1000 most common words get permanently cached
var/list/most_common_cache = list()

/// The language that an atom knows with the highest "default_priority" is selected by default.
var/default_priority = 0
/// If TRUE, when generating names, we will always use the default human namelist, even if we have syllables set.
Expand All @@ -45,6 +57,11 @@
/// What char to place in between randomly generated names
var/random_name_spacer = " "

/// Assoc Lazylist of other language types that would have a degree of mutual understanding with this language.
/// For example, you could do `list(/datum/language/common = 50)` to say that this language has a 50% chance to understand common words
/// And yeah if you give a 100% chance, they can basically just understand the language
var/list/mutual_understanding

/// Checks whether we should display the language icon to the passed hearer.
/datum/language/proc/display_icon(atom/movable/hearer)
var/understands = hearer.has_language(src.type)
Expand Down Expand Up @@ -109,56 +126,144 @@

return result

/datum/language/proc/check_cache(input)
var/lookup = scramble_cache[input]
if(lookup)
/// Checks the word cache for a word
/datum/language/proc/read_word_cache(input)
SHOULD_NOT_OVERRIDE(TRUE)
if(most_common_cache[input])
return most_common_cache[input]

. = scramble_cache[input]
if(. && scramble_cache[1] != input)
// bumps it to the top of the cache
scramble_cache -= input
scramble_cache[input] = lookup
. = lookup
scramble_cache[input] = .
return .

/datum/language/proc/add_to_cache(input, scrambled_text)
/// Adds a word to the cache
/datum/language/proc/write_word_cache(input, scrambled_text)
SHOULD_NOT_OVERRIDE(TRUE)
if(GLOB.most_common_words[lowertext(input)])
most_common_cache[input] = scrambled_text
return
// Add it to cache, cutting old entries if the list is too long
scramble_cache[input] = scrambled_text
if(scramble_cache.len > SCRAMBLE_CACHE_LEN)
scramble_cache.Cut(1, scramble_cache.len-SCRAMBLE_CACHE_LEN-1)
scramble_cache.Cut(1, scramble_cache.len - SCRAMBLE_CACHE_LEN + 1)

/datum/language/proc/scramble(input)
/// Checks the sentence cache for a sentence
/datum/language/proc/read_sentence_cache(input)
SHOULD_NOT_OVERRIDE(TRUE)
. = last_sentence_cache[input]
if(. && last_sentence_cache[1] != input)
// bumps it to the top of the cache (don't anticipate this happening often)
last_sentence_cache -= input
last_sentence_cache[input] = .
return .

if(!length(syllables))
return stars(input)
/// Adds a sentence to the cache, though the sentence should be modified with a key
/datum/language/proc/write_sentence_cache(input, key, result_scramble)
SHOULD_NOT_OVERRIDE(TRUE)
// Add to the cache (the cache being an assoc list of assoc lists), cutting old entries if the list is too long
LAZYSET(last_sentence_cache[input], key, result_scramble)
if(last_sentence_cache.len > SENTENCE_CACHE_LEN)
last_sentence_cache.Cut(1, last_sentence_cache.len - SENTENCE_CACHE_LEN + 1)

/// Goes through the input and removes any punctuation from the end of the string.
/proc/strip_punctuation(input)
var/static/list/bad_punctuation = list("!", "?", ".", "~", ";", ":", "-")
var/last_char = copytext_char(input, -1)
while(last_char in bad_punctuation)
input = copytext(input, 1, -1)
last_char = copytext_char(input, -1)

return trim_right(input)

/// Find what punctuation is at the end of the input, returns it.
/proc/find_last_punctuation(input)
. = copytext_char(input, -3)
if(. == "...")
return .
. = copytext_char(input, -2)
if(. in list("!!", "??", "..", "?!", "!?"))
return .
. = copytext_char(input, -1)
if(. in list("!", "?" ,".", "~", ";", ":", "-"))
return .
return ""

/// Scrambles a sentence in this language.
/// Takes into account any languages the hearer knows that has mutual understanding with this language.
/datum/language/proc/scramble_sentence(input, list/mutual_languages)
var/cache_key = "[mutual_languages?[type] || 0]-understanding"
var/list/cache = read_sentence_cache(cache_key)
if(cache?[cache_key])
return cache[cache_key]

var/list/real_words = splittext(input, " ")
var/list/scrambled_words = list()
for(var/word in real_words)
var/translate_prob = mutual_languages?[type] || 0
if(translate_prob > 0)
var/base_word = lowertext(strip_punctuation(word))
// the probability of managing to understand a word is based on how common it is
// 1000 words in the list, so words outside the list are just treated as "the 1500th most common word"
var/commonness = GLOB.most_common_words[base_word] || 1500
translate_prob += (translate_prob * 0.2 * (1 - (min(commonness, 1500) / 500)))
if(prob(translate_prob))
scrambled_words += base_word
continue

scrambled_words += scramble_word(word)

// start building the word. first word is capitalized and otherwise untouched
. = capitalize(popleft(scrambled_words))
for(var/word in scrambled_words)
if(prob(between_word_sentence_chance))
. += ". "
else if(prob(between_word_space_chance))
. += " "

. += word

// scrambling the words will drop punctuation, so re-add it at the end
. += find_last_punctuation(trim_right(input))

write_sentence_cache(input, cache_key, .)

return .

/// Scrambles a single word in this language.
/datum/language/proc/scramble_word(input)
// If the input is cached already, move it to the end of the cache and return it
var/lookup = check_cache(input)
if(lookup)
return lookup

var/input_size = length_char(input)
var/scrambled_text = ""
var/capitalize = TRUE

while(length_char(scrambled_text) < input_size)
var/next = (length(scrambled_text) && length(special_characters) && prob(1)) ? pick(special_characters) : pick_weight_recursive(syllables)
if(capitalize)
next = capitalize(next)
capitalize = FALSE
scrambled_text += next
var/chance = rand(100)
if(chance <= sentence_chance)
scrambled_text += ". "
capitalize = TRUE
else if(chance > sentence_chance && chance <= space_chance)
scrambled_text += " "

scrambled_text = trim(scrambled_text)
var/ending = copytext_char(scrambled_text, -1)
if(ending == ".")
scrambled_text = copytext_char(scrambled_text, 1, -2)
var/input_ending = copytext_char(input, -1)
if(input_ending in list("!","?","."))
scrambled_text += input_ending

add_to_cache(input, scrambled_text)

return scrambled_text
. = read_word_cache(input)
if(.)
return .

if(!length(syllables))
. = stars(input)

else
var/input_size = length_char(input)
var/add_space = FALSE
var/add_period = FALSE
. = ""
while(length_char(.) < input_size)
// add in the last syllable's period or space first
if(add_period)
. += ". "
else if(add_space)
. += " "
// generate the next syllable (capitalize if we just added a period)
var/next = (. && length(special_characters) && prob(1)) ? pick(special_characters) : pick_weight_recursive(syllables)
if(add_period)
next = capitalize(next)
. += next
// determine if the next syllable gets a period or space
add_period = prob(sentence_chance)
add_space = prob(space_chance)

write_word_cache(input, .)

return .

#undef SCRAMBLE_CACHE_LEN
12 changes: 12 additions & 0 deletions code/modules/language/_language_holder.dm
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,18 @@ Key procs
/datum/language_holder/proc/get_random_understood_language()
return pick(understood_languages)

/// Gets a list of all mutually understood languages.
/datum/language_holder/proc/get_mutually_understood_languages()
var/list/mutual_languages = list()
for(var/language_type in understood_languages)
var/datum/language/language_instance = GLOB.language_datum_instances[language_type]
for(var/mutual_language_type in language_instance.mutual_understanding)
// add it to the list OR override it if it's a stronger mutual understanding
if(!mutual_languages[mutual_language_type] || mutual_languages[mutual_language_type] < language_instance.mutual_understanding[mutual_language_type])
mutual_languages[mutual_language_type] = language_instance.mutual_understanding[mutual_language_type]

return mutual_languages

/// Gets a random spoken language, useful for forced speech and such.
/datum/language_holder/proc/get_random_spoken_language()
return pick(spoken_languages)
Expand Down
5 changes: 5 additions & 0 deletions code/modules/language/beachbum.dm
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,8 @@
)
icon_state = "beach"
always_use_default_namelist = TRUE

mutual_understanding = list(
/datum/language/common = 50,
/datum/language/uncommon = 30,
)
10 changes: 5 additions & 5 deletions code/modules/language/codespeak.dm
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
icon_state = "codespeak"
always_use_default_namelist = TRUE // No syllables anyways

/datum/language/codespeak/scramble(input)
var/lookup = check_cache(input)
if(lookup)
return lookup
/datum/language/codespeak/scramble_sentence(input, list/mutual_languages)
. = read_word_cache(input)
if(.)
return .

. = ""
var/list/words = list()
Expand All @@ -29,4 +29,4 @@
if(input_ending in endings)
. += input_ending

add_to_cache(input, .)
write_word_cache(input, .)
5 changes: 5 additions & 0 deletions code/modules/language/common.dm
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,8 @@
"his", "ing", "ion", "ith", "not", "ome", "oul", "our", "sho", "ted", "ter", "tha", "the", "thi",
),
)

mutual_understanding = list(
/datum/language/beachbum = 33,
/datum/language/uncommon = 20,
)
5 changes: 5 additions & 0 deletions code/modules/language/uncommon.dm
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,8 @@
)
icon_state = "galuncom"
default_priority = 90

mutual_understanding = list(
/datum/language/common = 33,
/datum/language/beachbum = 20,
)
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
spans = list(SPAN_ROBOT)
icon_state = "ratvar"

/datum/language/ratvarian/scramble(input)
/datum/language/ratvarian/scramble_sentence(input, list/mutual_languages)
return text2ratvar(input)

/// Regexes used to add ratvarian styling to rot13 english
Expand Down
Loading

0 comments on commit 43fe884

Please sign in to comment.