MrMelbert · MrMelbert · Dec 22, 2024 · Dec 11, 2024 · Dec 11, 2024 · Dec 11, 2024
diff --git a/code/_globalvars/lists/flavor_misc.dm b/code/_globalvars/lists/flavor_misc.dm
@@ -307,3 +307,12 @@ GLOBAL_LIST_INIT(status_display_state_pictures, list(
 	"blank",
 	"shuttle",
 ))
+
+GLOBAL_LIST_INIT(most_common_words, init_common_words())
+
+/proc/init_common_words()
+	. = list()
+	var/i = 1
+	for(var/word in world.file2list("strings/1000_most_common.txt"))
+		.[word] = i
+		i += 1
diff --git a/code/controllers/subsystem/discord.dm b/code/controllers/subsystem/discord.dm
@@ -43,17 +43,13 @@ SUBSYSTEM_DEF(discord)
 	/// People who have tried to verify this round already
 	var/list/reverify_cache
 
-	/// Common words list, used to generate one time tokens
-	var/list/common_words
-
 	/// The file where notification status is saved
 	var/notify_file = file("data/notify.json")
 
 	/// Is TGS enabled (If not we won't fire because otherwise this is useless)
 	var/enabled = FALSE
 
 /datum/controller/subsystem/discord/Initialize()
-	common_words = world.file2list("strings/1000_most_common.txt")
 	reverify_cache = list()
 	// Check for if we are using TGS, otherwise return and disables firing
 	if(world.TgsAvailable())
@@ -156,7 +152,7 @@ SUBSYSTEM_DEF(discord)
 	// While there's a collision in the token, generate a new one (should rarely happen)
 	while(not_unique)
 		//Column is varchar 100, so we trim just in case someone does us the dirty later
-		one_time_token = trim("[pick(common_words)]-[pick(common_words)]-[pick(common_words)]-[pick(common_words)]-[pick(common_words)]-[pick(common_words)]", 100)
+		one_time_token = trim("[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]", 100)
 
 		not_unique = find_discord_link_by_token(one_time_token, timebound = TRUE)
 
@@ -298,4 +294,3 @@ SUBSYSTEM_DEF(discord)
 	if (length(discord_mention_extraction_regex.group) == 1)
 		return discord_mention_extraction_regex.group[1]
 	return null
-
diff --git a/code/datums/brain_damage/mild.dm b/code/datums/brain_damage/mild.dm
@@ -191,8 +191,6 @@
 	gain_text = span_warning("You lose your grasp on complex words.")
 	lose_text = span_notice("You feel your vocabulary returning to normal again.")
 
-	var/static/list/common_words = world.file2list("strings/1000_most_common.txt")
-
 /datum/brain_trauma/mild/expressive_aphasia/handle_speech(datum/source, list/speech_args)
 	var/message = speech_args[SPEECH_MESSAGE]
 	if(message)
@@ -212,7 +210,7 @@
 				word = copytext(word, 1, suffix_foundon)
 			word = html_decode(word)
 
-			if(lowertext(word) in common_words)
+			if(GLOB.most_common_words[lowertext(word)])
 				new_message += word + suffix
 			else
 				if(prob(30) && message_split.len > 2)

diff --git a/code/game/atoms_movable.dm b/code/game/atoms_movable.dm
@@ -1521,6 +1521,10 @@
 /atom/movable/proc/get_random_understood_language()
 	return get_language_holder().get_random_understood_language()
 
+/// Gets a list of all mutually understood languages.
+/atom/movable/proc/get_mutually_understood_languages()
+	return get_language_holder().get_mutually_understood_languages()
+
 /// Gets a random spoken language, useful for forced speech and such.
 /atom/movable/proc/get_random_spoken_language()
 	return get_language_holder().get_random_spoken_language()

diff --git a/code/game/machinery/telecomms/computers/logbrowser.dm b/code/game/machinery/telecomms/computers/logbrowser.dm
@@ -59,7 +59,7 @@
 					message_out = "\"[message_in]\""
 				else if(!user.has_language(language))
 					// Language unknown: scramble
-					message_out = "\"[language_instance.scramble(message_in)]\""
+					message_out = "\"[language_instance.scramble_sentence(message_in, user.get_mutually_understood_languages())]\""
 				else
 					message_out = "(Unintelligible)"
 				packet_out["message"] = message_out

diff --git a/code/game/say.dm b/code/game/say.dm
@@ -213,7 +213,7 @@ GLOBAL_LIST_INIT(freqtospan, list(
 
 	if(!has_language(language))
 		var/datum/language/dialect = GLOB.language_datum_instances[language]
-		raw_message = dialect.scramble(raw_message)
+		raw_message = dialect.scramble_sentence(raw_message, get_mutually_understood_languages())
 
 	return raw_message
 

diff --git a/code/modules/language/_language.dm b/code/modules/language/_language.dm
@@ -1,5 +1,7 @@
-/// maximum of 50 specific scrambled lines per language
+/// Last 50 spoken (uncommon) words will be cached before we start cycling them out (re-randomizing them)
 #define SCRAMBLE_CACHE_LEN 50
+/// Last 20 spoken sentences will be cached before we start cycling them out (re-randomizing them)
+#define SENTENCE_CACHE_LEN 20
 
 /// Datum based languages. Easily editable and modular.
 /datum/language
@@ -18,13 +20,23 @@
 	var/list/special_characters
 	/// Likelihood of making a new sentence after each syllable.
 	var/sentence_chance = 5
+	/// Likelihood of making a new sentence after each word.
+	var/between_word_sentence_chance = 0
 	/// Likelihood of getting a space in the random scramble string
 	var/space_chance = 55
+	/// Likelyhood of getting a space between words
+	var/between_word_space_chance = 100
 	/// Spans to apply from this language
 	var/list/spans
 	/// Cache of recently scrambled text
 	/// This allows commonly reused words to not require a full re-scramble every time.
 	var/list/scramble_cache = list()
+	/// Cache of recently spoken sentences
+	/// So if one person speaks over the radio, everyone hears the same thing.
+	var/list/last_sentence_cache = list()
+	/// The 1000 most common words get permanently cached
+	var/list/most_common_cache = list()
+
 	/// The language that an atom knows with the highest "default_priority" is selected by default.
 	var/default_priority = 0
 	/// If TRUE, when generating names, we will always use the default human namelist, even if we have syllables set.
@@ -45,6 +57,11 @@
 	/// What char to place in between randomly generated names
 	var/random_name_spacer = " "
 
+	/// Assoc Lazylist of other language types that would have a degree of mutual understanding with this language.
+	/// For example, you could do `list(/datum/language/common = 50)` to say that this language has a 50% chance to understand common words
+	/// And yeah if you give a 100% chance, they can basically just understand the language
+	var/list/mutual_understanding
+
 /// Checks whether we should display the language icon to the passed hearer.
 /datum/language/proc/display_icon(atom/movable/hearer)
 	var/understands = hearer.has_language(src.type)
@@ -109,56 +126,144 @@
 
 	return result
 
-/datum/language/proc/check_cache(input)
-	var/lookup = scramble_cache[input]
-	if(lookup)
+/// Checks the word cache for a word
+/datum/language/proc/read_word_cache(input)
+	SHOULD_NOT_OVERRIDE(TRUE)
+	if(most_common_cache[input])
+		return most_common_cache[input]
+
+	. = scramble_cache[input]
+	if(. && scramble_cache[1] != input)
+		// bumps it to the top of the cache
 		scramble_cache -= input
-		scramble_cache[input] = lookup
-	. = lookup
+		scramble_cache[input] = .
+	return .
 
-/datum/language/proc/add_to_cache(input, scrambled_text)
+/// Adds a word to the cache
+/datum/language/proc/write_word_cache(input, scrambled_text)
+	SHOULD_NOT_OVERRIDE(TRUE)
+	if(GLOB.most_common_words[lowertext(input)])
+		most_common_cache[input] = scrambled_text
+		return
 	// Add it to cache, cutting old entries if the list is too long
 	scramble_cache[input] = scrambled_text
 	if(scramble_cache.len > SCRAMBLE_CACHE_LEN)
-		scramble_cache.Cut(1, scramble_cache.len-SCRAMBLE_CACHE_LEN-1)
+		scramble_cache.Cut(1, scramble_cache.len - SCRAMBLE_CACHE_LEN + 1)
 
-/datum/language/proc/scramble(input)
+/// Checks the sentence cache for a sentence
+/datum/language/proc/read_sentence_cache(input)
+	SHOULD_NOT_OVERRIDE(TRUE)
+	. = last_sentence_cache[input]
+	if(. && last_sentence_cache[1] != input)
+		// bumps it to the top of the cache (don't anticipate this happening often)
+		last_sentence_cache -= input
+		last_sentence_cache[input] = .
+	return .
 
-	if(!length(syllables))
-		return stars(input)
+/// Adds a sentence to the cache, though the sentence should be modified with a key
+/datum/language/proc/write_sentence_cache(input, key, result_scramble)
+	SHOULD_NOT_OVERRIDE(TRUE)
+	// Add to the cache (the cache being an assoc list of assoc lists), cutting old entries if the list is too long
+	LAZYSET(last_sentence_cache[input], key, result_scramble)
+	if(last_sentence_cache.len > SENTENCE_CACHE_LEN)
+		last_sentence_cache.Cut(1, last_sentence_cache.len - SENTENCE_CACHE_LEN + 1)
+
+/// Goes through the input and removes any punctuation from the end of the string.
+/proc/strip_punctuation(input)
+	var/static/list/bad_punctuation = list("!", "?", ".", "~", ";", ":", "-")
+	var/last_char = copytext_char(input, -1)
+	while(last_char in bad_punctuation)
+		input = copytext(input, 1, -1)
+		last_char = copytext_char(input, -1)
+
+	return trim_right(input)
+
+/// Find what punctuation is at the end of the input, returns it.
+/proc/find_last_punctuation(input)
+	. = copytext_char(input, -3)
+	if(. == "...")
+		return .
+	. = copytext_char(input, -2)
+	if(. in list("!!", "??", "..", "?!", "!?"))
+		return .
+	. = copytext_char(input, -1)
+	if(. in list("!", "?" ,".", "~", ";", ":", "-"))
+		return .
+	return ""
+
+/// Scrambles a sentence in this language.
+/// Takes into account any languages the hearer knows that has mutual understanding with this language.
+/datum/language/proc/scramble_sentence(input, list/mutual_languages)
+	var/cache_key = "[mutual_languages?[type] || 0]-understanding"
+	var/list/cache = read_sentence_cache(cache_key)
+	if(cache?[cache_key])
+		return cache[cache_key]
+
+	var/list/real_words = splittext(input, " ")
+	var/list/scrambled_words = list()
+	for(var/word in real_words)
+		var/translate_prob = mutual_languages?[type] || 0
+		if(translate_prob > 0)
+			var/base_word = lowertext(strip_punctuation(word))
+			// the probability of managing to understand a word is based on how common it is
+			// 1000 words in the list, so words outside the list are just treated as "the 1500th most common word"
+			var/commonness = GLOB.most_common_words[base_word] || 1500
+			translate_prob += (translate_prob * 0.2 * (1 - (min(commonness, 1500) / 500)))
+			if(prob(translate_prob))
+				scrambled_words += base_word
+				continue
+
+		scrambled_words += scramble_word(word)
 
+	// start building the word. first word is capitalized and otherwise untouched
+	. = capitalize(popleft(scrambled_words))
+	for(var/word in scrambled_words)
+		if(prob(between_word_sentence_chance))
+			. += ". "
+		else if(prob(between_word_space_chance))
+			. += " "
+
+		. += word
+
+	// scrambling the words will drop punctuation, so re-add it at the end
+	. += find_last_punctuation(trim_right(input))
+
+	write_sentence_cache(input, cache_key, .)
+
+	return .
+
+/// Scrambles a single word in this language.
+/datum/language/proc/scramble_word(input)
 	// If the input is cached already, move it to the end of the cache and return it
-	var/lookup = check_cache(input)
-	if(lookup)
-		return lookup
-
-	var/input_size = length_char(input)
-	var/scrambled_text = ""
-	var/capitalize = TRUE
-
-	while(length_char(scrambled_text) < input_size)
-		var/next = (length(scrambled_text) && length(special_characters) && prob(1)) ? pick(special_characters) : pick_weight_recursive(syllables)
-		if(capitalize)
-			next = capitalize(next)
-			capitalize = FALSE
-		scrambled_text += next
-		var/chance = rand(100)
-		if(chance <= sentence_chance)
-			scrambled_text += ". "
-			capitalize = TRUE
-		else if(chance > sentence_chance && chance <= space_chance)
-			scrambled_text += " "
-
-	scrambled_text = trim(scrambled_text)
-	var/ending = copytext_char(scrambled_text, -1)
-	if(ending == ".")
-		scrambled_text = copytext_char(scrambled_text, 1, -2)
-	var/input_ending = copytext_char(input, -1)
-	if(input_ending in list("!","?","."))
-		scrambled_text += input_ending
-
-	add_to_cache(input, scrambled_text)
-
-	return scrambled_text
+	. = read_word_cache(input)
+	if(.)
+		return .
+
+	if(!length(syllables))
+		. = stars(input)
+
+	else
+		var/input_size = length_char(input)
+		var/add_space = FALSE
+		var/add_period = FALSE
+		. = ""
+		while(length_char(.) < input_size)
+			// add in the last syllable's period or space first
+			if(add_period)
+				. += ". "
+			else if(add_space)
+				. += " "
+			// generate the next syllable (capitalize if we just added a period)
+			var/next = (. && length(special_characters) && prob(1)) ? pick(special_characters) : pick_weight_recursive(syllables)
+			if(add_period)
+				next = capitalize(next)
+			. += next
+			// determine if the next syllable gets a period or space
+			add_period = prob(sentence_chance)
+			add_space = prob(space_chance)
+
+	write_word_cache(input, .)
+
+	return .
 
 #undef SCRAMBLE_CACHE_LEN
diff --git a/code/modules/language/_language_holder.dm b/code/modules/language/_language_holder.dm
@@ -176,6 +176,18 @@ Key procs
 /datum/language_holder/proc/get_random_understood_language()
 	return pick(understood_languages)
 
+/// Gets a list of all mutually understood languages.
+/datum/language_holder/proc/get_mutually_understood_languages()
+	var/list/mutual_languages = list()
+	for(var/language_type in understood_languages)
+		var/datum/language/language_instance = GLOB.language_datum_instances[language_type]
+		for(var/mutual_language_type in language_instance.mutual_understanding)
+			// add it to the list OR override it if it's a stronger mutual understanding
+			if(!mutual_languages[mutual_language_type] || mutual_languages[mutual_language_type] < language_instance.mutual_understanding[mutual_language_type])
+				mutual_languages[mutual_language_type] = language_instance.mutual_understanding[mutual_language_type]
+
+	return mutual_languages
+
 /// Gets a random spoken language, useful for forced speech and such.
 /datum/language_holder/proc/get_random_spoken_language()
 	return pick(spoken_languages)

diff --git a/code/modules/language/beachbum.dm b/code/modules/language/beachbum.dm
@@ -19,3 +19,8 @@
 	)
 	icon_state = "beach"
 	always_use_default_namelist = TRUE
+
+	mutual_understanding = list(
+		/datum/language/common = 50,
+		/datum/language/uncommon = 30,
+	)
diff --git a/code/modules/language/codespeak.dm b/code/modules/language/codespeak.dm
@@ -7,10 +7,10 @@
 	icon_state = "codespeak"
 	always_use_default_namelist = TRUE // No syllables anyways
 
-/datum/language/codespeak/scramble(input)
-	var/lookup = check_cache(input)
-	if(lookup)
-		return lookup
+/datum/language/codespeak/scramble_sentence(input, list/mutual_languages)
+	. = read_word_cache(input)
+	if(.)
+		return .
 
 	. = ""
 	var/list/words = list()
@@ -29,4 +29,4 @@
 	if(input_ending in endings)
 		. += input_ending
 
-	add_to_cache(input, .)
+	write_word_cache(input, .)
diff --git a/code/modules/language/common.dm b/code/modules/language/common.dm
@@ -55,3 +55,8 @@
 			"his", "ing", "ion", "ith", "not", "ome", "oul", "our", "sho", "ted", "ter", "tha", "the", "thi",
 		),
 	)
+
+	mutual_understanding = list(
+		/datum/language/beachbum = 33,
+		/datum/language/uncommon = 20,
+	)
diff --git a/code/modules/language/uncommon.dm b/code/modules/language/uncommon.dm
@@ -14,3 +14,8 @@
 	)
 	icon_state = "galuncom"
 	default_priority = 90
+
+	mutual_understanding = list(
+		/datum/language/common = 33,
+		/datum/language/beachbum = 20,
+	)
diff --git a/maplestation_modules/code/modules/antagonists/advanced_cult/clock_cult/clock_language.dm b/maplestation_modules/code/modules/antagonists/advanced_cult/clock_cult/clock_language.dm
@@ -11,7 +11,7 @@
 	spans = list(SPAN_ROBOT)
 	icon_state = "ratvar"
 
-/datum/language/ratvarian/scramble(input)
+/datum/language/ratvarian/scramble_sentence(input, list/mutual_languages)
 	return text2ratvar(input)
 
 /// Regexes used to add ratvarian styling to rot13 english