Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds partial language understanding #630

Merged
merged 6 commits into from
Dec 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions code/_globalvars/lists/flavor_misc.dm
Original file line number Diff line number Diff line change
Expand Up @@ -307,3 +307,12 @@ GLOBAL_LIST_INIT(status_display_state_pictures, list(
"blank",
"shuttle",
))

GLOBAL_LIST_INIT(most_common_words, init_common_words())

/proc/init_common_words()
. = list()
var/i = 1
for(var/word in world.file2list("strings/1000_most_common.txt"))
.[word] = i
i += 1
7 changes: 1 addition & 6 deletions code/controllers/subsystem/discord.dm
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,13 @@ SUBSYSTEM_DEF(discord)
/// People who have tried to verify this round already
var/list/reverify_cache

/// Common words list, used to generate one time tokens
var/list/common_words

/// The file where notification status is saved
var/notify_file = file("data/notify.json")

/// Is TGS enabled (If not we won't fire because otherwise this is useless)
var/enabled = FALSE

/datum/controller/subsystem/discord/Initialize()
common_words = world.file2list("strings/1000_most_common.txt")
reverify_cache = list()
// Check for if we are using TGS, otherwise return and disables firing
if(world.TgsAvailable())
Expand Down Expand Up @@ -156,7 +152,7 @@ SUBSYSTEM_DEF(discord)
// While there's a collision in the token, generate a new one (should rarely happen)
while(not_unique)
//Column is varchar 100, so we trim just in case someone does us the dirty later
one_time_token = trim("[pick(common_words)]-[pick(common_words)]-[pick(common_words)]-[pick(common_words)]-[pick(common_words)]-[pick(common_words)]", 100)
one_time_token = trim("[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]", 100)

not_unique = find_discord_link_by_token(one_time_token, timebound = TRUE)

Expand Down Expand Up @@ -298,4 +294,3 @@ SUBSYSTEM_DEF(discord)
if (length(discord_mention_extraction_regex.group) == 1)
return discord_mention_extraction_regex.group[1]
return null

4 changes: 1 addition & 3 deletions code/datums/brain_damage/mild.dm
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,6 @@
gain_text = span_warning("You lose your grasp on complex words.")
lose_text = span_notice("You feel your vocabulary returning to normal again.")

var/static/list/common_words = world.file2list("strings/1000_most_common.txt")

/datum/brain_trauma/mild/expressive_aphasia/handle_speech(datum/source, list/speech_args)
var/message = speech_args[SPEECH_MESSAGE]
if(message)
Expand All @@ -212,7 +210,7 @@
word = copytext(word, 1, suffix_foundon)
word = html_decode(word)

if(lowertext(word) in common_words)
if(GLOB.most_common_words[lowertext(word)])
new_message += word + suffix
else
if(prob(30) && message_split.len > 2)
Expand Down
4 changes: 4 additions & 0 deletions code/game/atoms_movable.dm
Original file line number Diff line number Diff line change
Expand Up @@ -1521,6 +1521,10 @@
/atom/movable/proc/get_random_understood_language()
return get_language_holder().get_random_understood_language()

/// Gets a list of all mutually understood languages.
/atom/movable/proc/get_mutually_understood_languages()
return get_language_holder().get_mutually_understood_languages()

/// Gets a random spoken language, useful for forced speech and such.
/atom/movable/proc/get_random_spoken_language()
return get_language_holder().get_random_spoken_language()
Expand Down
2 changes: 1 addition & 1 deletion code/game/machinery/telecomms/computers/logbrowser.dm
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
message_out = "\"[message_in]\""
else if(!user.has_language(language))
// Language unknown: scramble
message_out = "\"[language_instance.scramble(message_in)]\""
message_out = "\"[language_instance.scramble_sentence(message_in, user.get_mutually_understood_languages())]\""
else
message_out = "(Unintelligible)"
packet_out["message"] = message_out
Expand Down
2 changes: 1 addition & 1 deletion code/game/say.dm
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ GLOBAL_LIST_INIT(freqtospan, list(

if(!has_language(language))
var/datum/language/dialect = GLOB.language_datum_instances[language]
raw_message = dialect.scramble(raw_message)
raw_message = dialect.scramble_sentence(raw_message, get_mutually_understood_languages())

return raw_message

Expand Down
191 changes: 148 additions & 43 deletions code/modules/language/_language.dm
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
/// maximum of 50 specific scrambled lines per language
/// Last 50 spoken (uncommon) words will be cached before we start cycling them out (re-randomizing them)
#define SCRAMBLE_CACHE_LEN 50
/// Last 20 spoken sentences will be cached before we start cycling them out (re-randomizing them)
#define SENTENCE_CACHE_LEN 20

/// Datum based languages. Easily editable and modular.
/datum/language
Expand All @@ -18,13 +20,23 @@
var/list/special_characters
/// Likelihood of making a new sentence after each syllable.
var/sentence_chance = 5
/// Likelihood of making a new sentence after each word.
var/between_word_sentence_chance = 0
/// Likelihood of getting a space in the random scramble string
var/space_chance = 55
/// Likelyhood of getting a space between words
var/between_word_space_chance = 100
/// Spans to apply from this language
var/list/spans
/// Cache of recently scrambled text
/// This allows commonly reused words to not require a full re-scramble every time.
var/list/scramble_cache = list()
/// Cache of recently spoken sentences
/// So if one person speaks over the radio, everyone hears the same thing.
var/list/last_sentence_cache = list()
/// The 1000 most common words get permanently cached
var/list/most_common_cache = list()

/// The language that an atom knows with the highest "default_priority" is selected by default.
var/default_priority = 0
/// If TRUE, when generating names, we will always use the default human namelist, even if we have syllables set.
Expand All @@ -45,6 +57,11 @@
/// What char to place in between randomly generated names
var/random_name_spacer = " "

/// Assoc Lazylist of other language types that would have a degree of mutual understanding with this language.
/// For example, you could do `list(/datum/language/common = 50)` to say that this language has a 50% chance to understand common words
/// And yeah if you give a 100% chance, they can basically just understand the language
var/list/mutual_understanding

/// Checks whether we should display the language icon to the passed hearer.
/datum/language/proc/display_icon(atom/movable/hearer)
var/understands = hearer.has_language(src.type)
Expand Down Expand Up @@ -109,56 +126,144 @@

return result

/datum/language/proc/check_cache(input)
var/lookup = scramble_cache[input]
if(lookup)
/// Checks the word cache for a word
/datum/language/proc/read_word_cache(input)
SHOULD_NOT_OVERRIDE(TRUE)
if(most_common_cache[input])
return most_common_cache[input]

. = scramble_cache[input]
if(. && scramble_cache[1] != input)
// bumps it to the top of the cache
scramble_cache -= input
scramble_cache[input] = lookup
. = lookup
scramble_cache[input] = .
return .

/datum/language/proc/add_to_cache(input, scrambled_text)
/// Adds a word to the cache
/datum/language/proc/write_word_cache(input, scrambled_text)
SHOULD_NOT_OVERRIDE(TRUE)
if(GLOB.most_common_words[lowertext(input)])
most_common_cache[input] = scrambled_text
return
// Add it to cache, cutting old entries if the list is too long
scramble_cache[input] = scrambled_text
if(scramble_cache.len > SCRAMBLE_CACHE_LEN)
scramble_cache.Cut(1, scramble_cache.len-SCRAMBLE_CACHE_LEN-1)
scramble_cache.Cut(1, scramble_cache.len - SCRAMBLE_CACHE_LEN + 1)

/datum/language/proc/scramble(input)
/// Checks the sentence cache for a sentence
/datum/language/proc/read_sentence_cache(input)
SHOULD_NOT_OVERRIDE(TRUE)
. = last_sentence_cache[input]
if(. && last_sentence_cache[1] != input)
// bumps it to the top of the cache (don't anticipate this happening often)
last_sentence_cache -= input
last_sentence_cache[input] = .
return .

if(!length(syllables))
return stars(input)
/// Adds a sentence to the cache, though the sentence should be modified with a key
/datum/language/proc/write_sentence_cache(input, key, result_scramble)
SHOULD_NOT_OVERRIDE(TRUE)
// Add to the cache (the cache being an assoc list of assoc lists), cutting old entries if the list is too long
LAZYSET(last_sentence_cache[input], key, result_scramble)
if(last_sentence_cache.len > SENTENCE_CACHE_LEN)
last_sentence_cache.Cut(1, last_sentence_cache.len - SENTENCE_CACHE_LEN + 1)

/// Goes through the input and removes any punctuation from the end of the string.
/proc/strip_punctuation(input)
var/static/list/bad_punctuation = list("!", "?", ".", "~", ";", ":", "-")
var/last_char = copytext_char(input, -1)
while(last_char in bad_punctuation)
input = copytext(input, 1, -1)
last_char = copytext_char(input, -1)

return trim_right(input)

/// Find what punctuation is at the end of the input, returns it.
/proc/find_last_punctuation(input)
. = copytext_char(input, -3)
if(. == "...")
return .
. = copytext_char(input, -2)
if(. in list("!!", "??", "..", "?!", "!?"))
return .
. = copytext_char(input, -1)
if(. in list("!", "?" ,".", "~", ";", ":", "-"))
return .
return ""

/// Scrambles a sentence in this language.
/// Takes into account any languages the hearer knows that has mutual understanding with this language.
/datum/language/proc/scramble_sentence(input, list/mutual_languages)
var/cache_key = "[mutual_languages?[type] || 0]-understanding"
var/list/cache = read_sentence_cache(cache_key)
if(cache?[cache_key])
return cache[cache_key]

var/list/real_words = splittext(input, " ")
var/list/scrambled_words = list()
for(var/word in real_words)
var/translate_prob = mutual_languages?[type] || 0
if(translate_prob > 0)
var/base_word = lowertext(strip_punctuation(word))
// the probability of managing to understand a word is based on how common it is
// 1000 words in the list, so words outside the list are just treated as "the 1500th most common word"
var/commonness = GLOB.most_common_words[base_word] || 1500
translate_prob += (translate_prob * 0.2 * (1 - (min(commonness, 1500) / 500)))
if(prob(translate_prob))
scrambled_words += base_word
continue

scrambled_words += scramble_word(word)

// start building the word. first word is capitalized and otherwise untouched
. = capitalize(popleft(scrambled_words))
for(var/word in scrambled_words)
if(prob(between_word_sentence_chance))
. += ". "
else if(prob(between_word_space_chance))
. += " "

. += word

// scrambling the words will drop punctuation, so re-add it at the end
. += find_last_punctuation(trim_right(input))

write_sentence_cache(input, cache_key, .)

return .

/// Scrambles a single word in this language.
/datum/language/proc/scramble_word(input)
// If the input is cached already, move it to the end of the cache and return it
var/lookup = check_cache(input)
if(lookup)
return lookup

var/input_size = length_char(input)
var/scrambled_text = ""
var/capitalize = TRUE

while(length_char(scrambled_text) < input_size)
var/next = (length(scrambled_text) && length(special_characters) && prob(1)) ? pick(special_characters) : pick_weight_recursive(syllables)
if(capitalize)
next = capitalize(next)
capitalize = FALSE
scrambled_text += next
var/chance = rand(100)
if(chance <= sentence_chance)
scrambled_text += ". "
capitalize = TRUE
else if(chance > sentence_chance && chance <= space_chance)
scrambled_text += " "

scrambled_text = trim(scrambled_text)
var/ending = copytext_char(scrambled_text, -1)
if(ending == ".")
scrambled_text = copytext_char(scrambled_text, 1, -2)
var/input_ending = copytext_char(input, -1)
if(input_ending in list("!","?","."))
scrambled_text += input_ending

add_to_cache(input, scrambled_text)

return scrambled_text
. = read_word_cache(input)
if(.)
return .

if(!length(syllables))
. = stars(input)

else
var/input_size = length_char(input)
var/add_space = FALSE
var/add_period = FALSE
. = ""
while(length_char(.) < input_size)
// add in the last syllable's period or space first
if(add_period)
. += ". "
else if(add_space)
. += " "
// generate the next syllable (capitalize if we just added a period)
var/next = (. && length(special_characters) && prob(1)) ? pick(special_characters) : pick_weight_recursive(syllables)
if(add_period)
next = capitalize(next)
. += next
// determine if the next syllable gets a period or space
add_period = prob(sentence_chance)
add_space = prob(space_chance)

write_word_cache(input, .)

return .

#undef SCRAMBLE_CACHE_LEN
12 changes: 12 additions & 0 deletions code/modules/language/_language_holder.dm
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,18 @@ Key procs
/datum/language_holder/proc/get_random_understood_language()
return pick(understood_languages)

/// Gets a list of all mutually understood languages.
/datum/language_holder/proc/get_mutually_understood_languages()
var/list/mutual_languages = list()
for(var/language_type in understood_languages)
var/datum/language/language_instance = GLOB.language_datum_instances[language_type]
for(var/mutual_language_type in language_instance.mutual_understanding)
// add it to the list OR override it if it's a stronger mutual understanding
if(!mutual_languages[mutual_language_type] || mutual_languages[mutual_language_type] < language_instance.mutual_understanding[mutual_language_type])
mutual_languages[mutual_language_type] = language_instance.mutual_understanding[mutual_language_type]

return mutual_languages

/// Gets a random spoken language, useful for forced speech and such.
/datum/language_holder/proc/get_random_spoken_language()
return pick(spoken_languages)
Expand Down
5 changes: 5 additions & 0 deletions code/modules/language/beachbum.dm
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,8 @@
)
icon_state = "beach"
always_use_default_namelist = TRUE

mutual_understanding = list(
/datum/language/common = 50,
/datum/language/uncommon = 30,
)
10 changes: 5 additions & 5 deletions code/modules/language/codespeak.dm
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
icon_state = "codespeak"
always_use_default_namelist = TRUE // No syllables anyways

/datum/language/codespeak/scramble(input)
var/lookup = check_cache(input)
if(lookup)
return lookup
/datum/language/codespeak/scramble_sentence(input, list/mutual_languages)
. = read_word_cache(input)
if(.)
return .

. = ""
var/list/words = list()
Expand All @@ -29,4 +29,4 @@
if(input_ending in endings)
. += input_ending

add_to_cache(input, .)
write_word_cache(input, .)
5 changes: 5 additions & 0 deletions code/modules/language/common.dm
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,8 @@
"his", "ing", "ion", "ith", "not", "ome", "oul", "our", "sho", "ted", "ter", "tha", "the", "thi",
),
)

mutual_understanding = list(
/datum/language/beachbum = 33,
/datum/language/uncommon = 20,
)
5 changes: 5 additions & 0 deletions code/modules/language/uncommon.dm
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,8 @@
)
icon_state = "galuncom"
default_priority = 90

mutual_understanding = list(
/datum/language/common = 33,
/datum/language/beachbum = 20,
)
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
spans = list(SPAN_ROBOT)
icon_state = "ratvar"

/datum/language/ratvarian/scramble(input)
/datum/language/ratvarian/scramble_sentence(input, list/mutual_languages)
return text2ratvar(input)

/// Regexes used to add ratvarian styling to rot13 english
Expand Down
Loading
Loading