commit 844094aca0e0bea786df6a0e198e48f9924abe9f
parent d09d4ee96eb567107bb577019c8b50116f4a41f3
Author: rhunk <101876869+rhunk@users.noreply.github.com>
Date: Wed, 12 Jun 2024 01:20:39 +0200
feat(core): better transcript
Signed-off-by: rhunk <101876869+rhunk@users.noreply.github.com>
Diffstat:
5 files changed, 150 insertions(+), 0 deletions(-)
diff --git a/common/src/main/assets/lang/en_US.json b/common/src/main/assets/lang/en_US.json
@@ -1043,6 +1043,24 @@
}
}
},
+ "better_transcript": {
+ "name": "Better Transcript",
+ "description": "Improves the voice note transcript",
+ "properties": {
+ "force_transcription": {
+ "name": "Force Voice Note Transcription",
+ "description": "Allows all voice notes to be transcribed"
+ },
+ "preferred_transcription_lang": {
+ "name": "Preferred Transcription Language",
+ "description": "The preferred language for the voice note transcript (e.g. EN, ES, FR)"
+ },
+ "enhanced_transcript": {
+ "name": "Enhanced Transcript",
+ "description": "Improves the voice note transcript using DeepL.\nBefore using this feature, please ensure that you have read their privacy policy."
+ }
+ }
+ },
"cof_experiments": {
"name": "COF Experiments",
"description": "Enables unreleased/beta Snapchat features"
diff --git a/common/src/main/kotlin/me/rhunk/snapenhance/common/config/impl/Experimental.kt b/common/src/main/kotlin/me/rhunk/snapenhance/common/config/impl/Experimental.kt
@@ -17,6 +17,12 @@ class Experimental : ConfigContainer() {
)
}
+ class BetterTranscriptConfig: ConfigContainer(hasGlobalState = true) {
+ val forceTranscription = boolean("force_transcription") { requireRestart() }
+ val preferredTranscriptionLang = string("preferred_transcription_lang") { requireRestart() }
+ val enhancedTranscript = boolean("enhanced_transcript") { requireRestart(); addNotices(FeatureNotice.UNSTABLE) }
+ }
+
class ComposerHooksConfig: ConfigContainer(hasGlobalState = true) {
val showFirstCreatedUsername = boolean("show_first_created_username")
val bypassCameraRollLimit = boolean("bypass_camera_roll_limit")
@@ -55,6 +61,7 @@ class Experimental : ConfigContainer() {
val storyLogger = boolean("story_logger") { requireRestart(); addNotices(FeatureNotice.UNSTABLE); }
val callRecorder = boolean("call_recorder") { requireRestart(); addNotices(FeatureNotice.UNSTABLE); }
val accountSwitcher = container("account_switcher", AccountSwitcherConfig()) { requireRestart(); addNotices(FeatureNotice.UNSTABLE) }
+ val betterTranscript = container("better_transcript", BetterTranscriptConfig()) { requireRestart() }
val editMessage = boolean("edit_message") { requireRestart() }
val contextMenuFix = boolean("context_menu_fix") { requireRestart() }
val cofExperiments = multiple("cof_experiments", *cofExperimentList.toTypedArray()) { requireRestart(); addFlags(ConfigFlag.NO_TRANSLATE); addNotices(FeatureNotice.UNSTABLE) }
diff --git a/common/src/main/kotlin/me/rhunk/snapenhance/common/util/TranscriptApi.kt b/common/src/main/kotlin/me/rhunk/snapenhance/common/util/TranscriptApi.kt
@@ -0,0 +1,46 @@
+package me.rhunk.snapenhance.common.util
+
+import com.google.gson.JsonParser
+import me.rhunk.snapenhance.common.Constants
+import okhttp3.Headers
+import okhttp3.HttpUrl.Companion.toHttpUrl
+import okhttp3.OkHttpClient
+import okhttp3.Request
+import okhttp3.RequestBody
+
+
+class TranscriptApi(
+ private val okHttpClient: OkHttpClient = OkHttpClient.Builder().addInterceptor {
+ it.proceed(it.request().newBuilder().header("User-Agent", Constants.USER_AGENT).build())
+ }.build()
+) {
+ private fun genDlClearance() = okHttpClient.newCall(
+ Request("https://clearance.deepl.com/token".toHttpUrl())
+ ).execute().use { response ->
+ val cookie = response.headers.firstOrNull { it.first.lowercase() == "set-cookie" && it.second.contains("dl_clearance", ignoreCase = true) }
+ cookie?.second?.substringBefore(";")?.substringAfter("dl_clearance=")
+ }
+
+ fun transcribe(
+ body: RequestBody,
+ lang: String? = null,
+ ): String? {
+ val clearance = genDlClearance() ?: return null
+ val url = "https://voice-pro.www.deepl.com/sync/transcribe".toHttpUrl().newBuilder()
+ .apply {
+ lang?.let { addQueryParameter("lang", it) }
+ }
+ .build()
+ val request = Request(url, headers = Headers.headersOf(
+ "Cookie", "dl_clearance=$clearance",
+ "Content-Type", "audio/webm"
+ ), method = "POST", body = body)
+ return okHttpClient.newCall(request).execute().use { response ->
+ if (!response.isSuccessful) return@use null
+ val jsonObject = JsonParser.parseString(response.body.string()).asJsonObject
+ jsonObject.getAsJsonArray("segments").fold("") { text, segment ->
+ text + segment.asJsonObject.getAsJsonPrimitive("text").asString
+ }.trim()
+ }
+ }
+}
diff --git a/core/src/main/kotlin/me/rhunk/snapenhance/core/features/FeatureManager.kt b/core/src/main/kotlin/me/rhunk/snapenhance/core/features/FeatureManager.kt
@@ -130,6 +130,7 @@ class FeatureManager(
BestFriendPinning(),
ContextMenuFix(),
DisableTelecomFramework(),
+ BetterTranscript(),
)
initializeFeatures()
}
diff --git a/core/src/main/kotlin/me/rhunk/snapenhance/core/features/impl/experiments/BetterTranscript.kt b/core/src/main/kotlin/me/rhunk/snapenhance/core/features/impl/experiments/BetterTranscript.kt
@@ -0,0 +1,77 @@
+package me.rhunk.snapenhance.core.features.impl.experiments
+
+import me.rhunk.snapenhance.common.data.ContentType
+import me.rhunk.snapenhance.common.util.TranscriptApi
+import me.rhunk.snapenhance.common.util.protobuf.ProtoEditor
+import me.rhunk.snapenhance.core.event.events.impl.BuildMessageEvent
+import me.rhunk.snapenhance.core.features.Feature
+import me.rhunk.snapenhance.core.features.FeatureLoadParams
+import me.rhunk.snapenhance.core.util.dataBuilder
+import me.rhunk.snapenhance.core.util.hook.HookStage
+import me.rhunk.snapenhance.core.util.hook.hook
+import me.rhunk.snapenhance.core.util.ktx.getObjectFieldOrNull
+import me.rhunk.snapenhance.core.util.ktx.setObjectField
+import okhttp3.RequestBody.Companion.toRequestBody
+import java.lang.reflect.Method
+import java.nio.ByteBuffer
+
+class BetterTranscript: Feature("Better Transcript", loadParams = FeatureLoadParams.ACTIVITY_CREATE_SYNC) {
+ override fun onActivityCreate() {
+ if (context.config.experimental.betterTranscript.globalState != true) return
+ val config = context.config.experimental.betterTranscript
+ val preferredTranscriptionLang = config.preferredTranscriptionLang.getNullable()?.takeIf {
+ it.isNotBlank()
+ }
+ val transcriptApi by lazy { TranscriptApi() }
+
+ if (config.forceTranscription.get()) {
+ context.event.subscribe(BuildMessageEvent::class, priority = 104) { event ->
+ if (event.message.messageContent?.contentType != ContentType.NOTE) return@subscribe
+ event.message.messageContent!!.content = ProtoEditor(event.message.messageContent!!.content!!).apply {
+ edit(6, 1) {
+ if (firstOrNull(3) == null) {
+ addString(3, context.getConfigLocale())
+ }
+ }
+ }.toByteArray()
+ }
+ }
+
+ findClass("com.snapchat.client.voiceml.IVoiceMLSDK\$CppProxy").hook("asrTranscribe", HookStage.BEFORE) { param ->
+ if (config.enhancedTranscript.get()) {
+ val buffer = param.arg<ByteBuffer>(2).let {
+ it.rewind()
+ ByteArray(it.remaining()).also { it1 -> it.get(it1); it.rewind() }
+ }
+ val result = runCatching {
+ transcriptApi.transcribe(
+ buffer.toRequestBody(),
+ lang = config.preferredTranscriptionLang.getNullable()?.takeIf {
+ it.isNotBlank()
+ }?.uppercase()
+ )
+ }.onFailure {
+ context.log.error("Failed to transcribe audio", it)
+ context.shortToast("Failed to transcribe audio! Check logcat for more details.")
+ }.getOrNull()
+
+ param.setResult(
+ (param.method() as Method).returnType.dataBuilder {
+ set("mError", result == null)
+ set("mNlpResponses", ArrayList<Any>())
+ set("mWordInfo", ArrayList<Any>())
+ set("mTranscription", result)
+ }
+ )
+ return@hook
+ }
+ preferredTranscriptionLang?.lowercase()?.let {
+ val asrConfig = param.arg<Any>(1)
+ asrConfig.getObjectFieldOrNull("mBaseConfig")?.apply {
+ setObjectField("mLanguageModel", it)
+ setObjectField("mUiLanguage", it)
+ }
+ }
+ }
+ }
+}+
\ No newline at end of file