commit 844094aca0e0bea786df6a0e198e48f9924abe9f
parent d09d4ee96eb567107bb577019c8b50116f4a41f3
Author: rhunk <101876869+rhunk@users.noreply.github.com>
Date:   Wed, 12 Jun 2024 01:20:39 +0200

feat(core): better transcript

Signed-off-by: rhunk <101876869+rhunk@users.noreply.github.com>

Diffstat:
Mcommon/src/main/assets/lang/en_US.json | 18++++++++++++++++++
Mcommon/src/main/kotlin/me/rhunk/snapenhance/common/config/impl/Experimental.kt | 7+++++++
Acommon/src/main/kotlin/me/rhunk/snapenhance/common/util/TranscriptApi.kt | 46++++++++++++++++++++++++++++++++++++++++++++++
Mcore/src/main/kotlin/me/rhunk/snapenhance/core/features/FeatureManager.kt | 1+
Acore/src/main/kotlin/me/rhunk/snapenhance/core/features/impl/experiments/BetterTranscript.kt | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 150 insertions(+), 0 deletions(-)

diff --git a/common/src/main/assets/lang/en_US.json b/common/src/main/assets/lang/en_US.json @@ -1043,6 +1043,24 @@ } } }, + "better_transcript": { + "name": "Better Transcript", + "description": "Improves the voice note transcript", + "properties": { + "force_transcription": { + "name": "Force Voice Note Transcription", + "description": "Allows all voice notes to be transcribed" + }, + "preferred_transcription_lang": { + "name": "Preferred Transcription Language", + "description": "The preferred language for the voice note transcript (e.g. EN, ES, FR)" + }, + "enhanced_transcript": { + "name": "Enhanced Transcript", + "description": "Improves the voice note transcript using DeepL.\nBefore using this feature, please ensure that you have read their privacy policy." + } + } + }, "cof_experiments": { "name": "COF Experiments", "description": "Enables unreleased/beta Snapchat features" diff --git a/common/src/main/kotlin/me/rhunk/snapenhance/common/config/impl/Experimental.kt b/common/src/main/kotlin/me/rhunk/snapenhance/common/config/impl/Experimental.kt @@ -17,6 +17,12 @@ class Experimental : ConfigContainer() { ) } + class BetterTranscriptConfig: ConfigContainer(hasGlobalState = true) { + val forceTranscription = boolean("force_transcription") { requireRestart() } + val preferredTranscriptionLang = string("preferred_transcription_lang") { requireRestart() } + val enhancedTranscript = boolean("enhanced_transcript") { requireRestart(); addNotices(FeatureNotice.UNSTABLE) } + } + class ComposerHooksConfig: ConfigContainer(hasGlobalState = true) { val showFirstCreatedUsername = boolean("show_first_created_username") val bypassCameraRollLimit = boolean("bypass_camera_roll_limit") @@ -55,6 +61,7 @@ class Experimental : ConfigContainer() { val storyLogger = boolean("story_logger") { requireRestart(); addNotices(FeatureNotice.UNSTABLE); } val callRecorder = boolean("call_recorder") { requireRestart(); addNotices(FeatureNotice.UNSTABLE); } val accountSwitcher = container("account_switcher", AccountSwitcherConfig()) { requireRestart(); addNotices(FeatureNotice.UNSTABLE) } + val betterTranscript = container("better_transcript", BetterTranscriptConfig()) { requireRestart() } val editMessage = boolean("edit_message") { requireRestart() } val contextMenuFix = boolean("context_menu_fix") { requireRestart() } val cofExperiments = multiple("cof_experiments", *cofExperimentList.toTypedArray()) { requireRestart(); addFlags(ConfigFlag.NO_TRANSLATE); addNotices(FeatureNotice.UNSTABLE) } diff --git a/common/src/main/kotlin/me/rhunk/snapenhance/common/util/TranscriptApi.kt b/common/src/main/kotlin/me/rhunk/snapenhance/common/util/TranscriptApi.kt @@ -0,0 +1,46 @@ +package me.rhunk.snapenhance.common.util + +import com.google.gson.JsonParser +import me.rhunk.snapenhance.common.Constants +import okhttp3.Headers +import okhttp3.HttpUrl.Companion.toHttpUrl +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.RequestBody + + +class TranscriptApi( + private val okHttpClient: OkHttpClient = OkHttpClient.Builder().addInterceptor { + it.proceed(it.request().newBuilder().header("User-Agent", Constants.USER_AGENT).build()) + }.build() +) { + private fun genDlClearance() = okHttpClient.newCall( + Request("https://clearance.deepl.com/token".toHttpUrl()) + ).execute().use { response -> + val cookie = response.headers.firstOrNull { it.first.lowercase() == "set-cookie" && it.second.contains("dl_clearance", ignoreCase = true) } + cookie?.second?.substringBefore(";")?.substringAfter("dl_clearance=") + } + + fun transcribe( + body: RequestBody, + lang: String? = null, + ): String? { + val clearance = genDlClearance() ?: return null + val url = "https://voice-pro.www.deepl.com/sync/transcribe".toHttpUrl().newBuilder() + .apply { + lang?.let { addQueryParameter("lang", it) } + } + .build() + val request = Request(url, headers = Headers.headersOf( + "Cookie", "dl_clearance=$clearance", + "Content-Type", "audio/webm" + ), method = "POST", body = body) + return okHttpClient.newCall(request).execute().use { response -> + if (!response.isSuccessful) return@use null + val jsonObject = JsonParser.parseString(response.body.string()).asJsonObject + jsonObject.getAsJsonArray("segments").fold("") { text, segment -> + text + segment.asJsonObject.getAsJsonPrimitive("text").asString + }.trim() + } + } +} diff --git a/core/src/main/kotlin/me/rhunk/snapenhance/core/features/FeatureManager.kt b/core/src/main/kotlin/me/rhunk/snapenhance/core/features/FeatureManager.kt @@ -130,6 +130,7 @@ class FeatureManager( BestFriendPinning(), ContextMenuFix(), DisableTelecomFramework(), + BetterTranscript(), ) initializeFeatures() } diff --git a/core/src/main/kotlin/me/rhunk/snapenhance/core/features/impl/experiments/BetterTranscript.kt b/core/src/main/kotlin/me/rhunk/snapenhance/core/features/impl/experiments/BetterTranscript.kt @@ -0,0 +1,77 @@ +package me.rhunk.snapenhance.core.features.impl.experiments + +import me.rhunk.snapenhance.common.data.ContentType +import me.rhunk.snapenhance.common.util.TranscriptApi +import me.rhunk.snapenhance.common.util.protobuf.ProtoEditor +import me.rhunk.snapenhance.core.event.events.impl.BuildMessageEvent +import me.rhunk.snapenhance.core.features.Feature +import me.rhunk.snapenhance.core.features.FeatureLoadParams +import me.rhunk.snapenhance.core.util.dataBuilder +import me.rhunk.snapenhance.core.util.hook.HookStage +import me.rhunk.snapenhance.core.util.hook.hook +import me.rhunk.snapenhance.core.util.ktx.getObjectFieldOrNull +import me.rhunk.snapenhance.core.util.ktx.setObjectField +import okhttp3.RequestBody.Companion.toRequestBody +import java.lang.reflect.Method +import java.nio.ByteBuffer + +class BetterTranscript: Feature("Better Transcript", loadParams = FeatureLoadParams.ACTIVITY_CREATE_SYNC) { + override fun onActivityCreate() { + if (context.config.experimental.betterTranscript.globalState != true) return + val config = context.config.experimental.betterTranscript + val preferredTranscriptionLang = config.preferredTranscriptionLang.getNullable()?.takeIf { + it.isNotBlank() + } + val transcriptApi by lazy { TranscriptApi() } + + if (config.forceTranscription.get()) { + context.event.subscribe(BuildMessageEvent::class, priority = 104) { event -> + if (event.message.messageContent?.contentType != ContentType.NOTE) return@subscribe + event.message.messageContent!!.content = ProtoEditor(event.message.messageContent!!.content!!).apply { + edit(6, 1) { + if (firstOrNull(3) == null) { + addString(3, context.getConfigLocale()) + } + } + }.toByteArray() + } + } + + findClass("com.snapchat.client.voiceml.IVoiceMLSDK\$CppProxy").hook("asrTranscribe", HookStage.BEFORE) { param -> + if (config.enhancedTranscript.get()) { + val buffer = param.arg<ByteBuffer>(2).let { + it.rewind() + ByteArray(it.remaining()).also { it1 -> it.get(it1); it.rewind() } + } + val result = runCatching { + transcriptApi.transcribe( + buffer.toRequestBody(), + lang = config.preferredTranscriptionLang.getNullable()?.takeIf { + it.isNotBlank() + }?.uppercase() + ) + }.onFailure { + context.log.error("Failed to transcribe audio", it) + context.shortToast("Failed to transcribe audio! Check logcat for more details.") + }.getOrNull() + + param.setResult( + (param.method() as Method).returnType.dataBuilder { + set("mError", result == null) + set("mNlpResponses", ArrayList<Any>()) + set("mWordInfo", ArrayList<Any>()) + set("mTranscription", result) + } + ) + return@hook + } + preferredTranscriptionLang?.lowercase()?.let { + val asrConfig = param.arg<Any>(1) + asrConfig.getObjectFieldOrNull("mBaseConfig")?.apply { + setObjectField("mLanguageModel", it) + setObjectField("mUiLanguage", it) + } + } + } + } +}+ \ No newline at end of file