-
Notifications
You must be signed in to change notification settings - Fork 760
Fix subtitle selection #2449
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Fix subtitle selection #2449
Conversation
|
|
||
| /** Tries hard to figure out a valid IETF tag based on language code and name. Will return null if not found. */ | ||
| fun getIETF_tag(): String? { | ||
| val tag = fromCodeToLangTagIETF(this.languageCode) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did you checked data class SubtitleFile? I feel like the same thing is done 2 times, in SubtitleFile we already try to convert lang -> ietf tag
cloudstream/library/src/commonMain/kotlin/com/lagradost/cloudstream3/MainAPI.kt
Lines 1113 to 1129 in c1a2ae8
| data class SubtitleFile private constructor( | |
| var lang: String, | |
| var url: String, | |
| var headers: Map<String, String>? | |
| ) { | |
| @Deprecated("Use newSubtitleFile method", level = DeprecationLevel.WARNING) | |
| constructor(lang: String, url: String) : this(lang = lang, url = url, headers = null) | |
| /** Language code to properly filter auto select / download subtitles */ | |
| val langTag: String? | |
| get() = fromCodeToLangTagIETF(lang) ?: fromLanguageToTagIETF(lang, true) | |
| /** Backwards compatible copy */ | |
| fun copy( | |
| lang: String = this.lang, url: String = this.url | |
| ): SubtitleFile = SubtitleFile(lang = lang, url = url, headers = this.headers) | |
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I remember correctly, SubtitleFile is always turned into SubtitleData
SubtitleFile: what we get from plugins and providers
SubtitleData: the app's internal representation (that has more properties)
| SubtitleHelper.languages.forEach { language -> | ||
| if (language.languageName.equals(cleanedLanguage, ignoreCase = true) || | ||
| language.nativeName.equals(cleanedLanguage, ignoreCase = true) || | ||
| // Also match exact IETF tags | ||
| language.IETF_tag.equals(cleanedLanguage, ignoreCase = true) | ||
| ) { | ||
| return language.IETF_tag | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is basically what is implemented in the library , the key difference is that we turn languages in indexMapLanguageName and indexMapNativeName then get it from the map instead of .equals(cleanedLanguage). By doing it this way, the performance improved.
cloudstream/library/src/commonMain/kotlin/com/lagradost/cloudstream3/utils/SubtitleHelper.kt
Lines 81 to 104 in c1a2ae8
| private fun getLanguageDataFromName(languageName: String?, halfMatch: Boolean? = false): LanguageMetadata? { | |
| if (languageName.isNullOrBlank() || languageName.length < 2) return null | |
| // Workaround to avoid junk like "English (original audio)" or "Spanish 123" | |
| // or "اَلْعَرَبِيَّةُ (Original Audio) 1" or "English (hindi sub)"… | |
| val garbage = Regex( | |
| "\\([^)]*(?:dub|sub|original|audio|code)[^)]*\\)|" + // junk words in parenthesis | |
| "[\\u064B-\\u065B]|" + // arabic diacritics | |
| "\\d|" + // numbers | |
| "[^\\p{L}\\p{Mn}\\p{Mc}\\p{Me} ()]" // non-letter (from any language) | |
| ) | |
| val lowLangName = languageName.lowercase().replace(garbage, "").trim() | |
| val index = | |
| indexMapLanguageName[lowLangName] ?: | |
| indexMapNativeName[lowLangName] ?: -1 | |
| val langMetadata = languages.getOrNull(index) | |
| if (halfMatch == true && langMetadata == null) { | |
| for (lang in languages) | |
| if (lang.languageName.contains(lowLangName, ignoreCase = true) || | |
| lang.nativeName.contains(lowLangName, ignoreCase = true)) | |
| return lang | |
| } | |
| return langMetadata | |
| } |
| var closestMatch: Pair<String?, Int> = null to 0 | ||
| // Then go for partial matches, however only use the best match | ||
| SubtitleHelper.languages.forEach { language -> | ||
| val lowerCleaned = cleanedLanguage.lowercase() | ||
| val score = maxOf( | ||
| FuzzySearch.ratio(lowerCleaned, language.languageName.lowercase()), | ||
| FuzzySearch.ratio( | ||
| lowerCleaned, language.nativeName.lowercase() | ||
| ) | ||
| ) | ||
|
|
||
| // Arbitrary cutoff at 80. | ||
| if (cleanedLanguage.contains(language.languageName, ignoreCase = true) || | ||
| cleanedLanguage.contains(language.nativeName, ignoreCase = true) || score > 80 | ||
| ) { | ||
| if (score > closestMatch.second) { | ||
| closestMatch = language.IETF_tag to score | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I find it more useful to implement the FuzzySearch within the library, so it can be used by extensions as well, and not just by the app itself.
My suggestion is to replace the halfMatch code with the FuzzySearch code. That's what I started doing but never finished or submitted a request 😬.
cloudstream/library/src/commonMain/kotlin/com/lagradost/cloudstream3/utils/SubtitleHelper.kt
Lines 81 to 103 in c1a2ae8
| private fun getLanguageDataFromName(languageName: String?, halfMatch: Boolean? = false): LanguageMetadata? { | |
| if (languageName.isNullOrBlank() || languageName.length < 2) return null | |
| // Workaround to avoid junk like "English (original audio)" or "Spanish 123" | |
| // or "اَلْعَرَبِيَّةُ (Original Audio) 1" or "English (hindi sub)"… | |
| val garbage = Regex( | |
| "\\([^)]*(?:dub|sub|original|audio|code)[^)]*\\)|" + // junk words in parenthesis | |
| "[\\u064B-\\u065B]|" + // arabic diacritics | |
| "\\d|" + // numbers | |
| "[^\\p{L}\\p{Mn}\\p{Mc}\\p{Me} ()]" // non-letter (from any language) | |
| ) | |
| val lowLangName = languageName.lowercase().replace(garbage, "").trim() | |
| val index = | |
| indexMapLanguageName[lowLangName] ?: | |
| indexMapNativeName[lowLangName] ?: -1 | |
| val langMetadata = languages.getOrNull(index) | |
| if (halfMatch == true && langMetadata == null) { | |
| for (lang in languages) | |
| if (lang.languageName.contains(lowLangName, ignoreCase = true) || | |
| lang.nativeName.contains(lowLangName, ignoreCase = true)) | |
| return lang | |
| } | |
| return langMetadata |
Prevents user options getting erased and properly saves and selects the chosen language.