-
-
Notifications
You must be signed in to change notification settings - Fork 800
Expand file tree
/
Copy pathWikiSite.kt
More file actions
192 lines (167 loc) · 7.45 KB
/
WikiSite.kt
File metadata and controls
192 lines (167 loc) · 7.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
package org.wikipedia.dataclient
import android.net.Uri
import android.os.Parcelable
import androidx.core.net.toUri
import kotlinx.parcelize.Parcelize
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
import org.wikipedia.Constants
import org.wikipedia.WikipediaApp
import org.wikipedia.json.UriSerializer
import org.wikipedia.language.AppLanguageLookUpTable
import org.wikipedia.util.UriUtil
/**
* The base URL and Wikipedia language code for a MediaWiki site. Examples:
*
*
* <lh>Name: scheme / authority / language code</lh>
* * English Wikipedia: HTTPS / en.wikipedia.org / en
* * Chinese Wikipedia: HTTPS / zh.wikipedia.org / zh-hans or zh-hant
* * Meta-Wiki: HTTPS / meta.wikimedia.org / (none)
* * Test Wikipedia: HTTPS / test.wikipedia.org / test
* * Võro Wikipedia: HTTPS / fiu-vro.wikipedia.org / fiu-vro
* * Simple English Wikipedia: HTTPS / simple.wikipedia.org / simple
* * Simple English Wikipedia (beta cluster mirror): HTTP / simple.wikipedia.beta.wmflabs.org / simple
* * Development: HTTP / 192.168.1.11:8080 / (none)
*
*
* **As shown above, the language code or mapping is part of the authority:**
*
* <lh>Validity: authority / language code</lh>
* * Correct: "test.wikipedia.org" / "test"
* * Correct: "wikipedia.org", ""
* * Correct: "no.wikipedia.org", "nb"
* * Incorrect: "wikipedia.org", "test"
*
*/
@Serializable
@Parcelize
data class WikiSite(
@SerialName("domain") @Serializable(with = UriSerializer::class) var uri: Uri,
var languageCode: String = ""
) : Parcelable {
constructor(uri: Uri) : this(uri, "") {
val tempUri = ensureScheme(uri)
var authority = tempUri.authority.orEmpty()
if ((BASE_DOMAIN == authority || ("www.$BASE_DOMAIN") == authority) &&
tempUri.path?.startsWith("/wiki") == true
) {
// Special case for Wikipedia only: assume English subdomain when none given.
authority = "en.$BASE_DOMAIN"
}
// Unconditionally transform any mobile authority to canonical.
authority = authority.replace(".m.", ".")
languageCode = UriUtil.getLanguageVariantFromUri(tempUri).ifEmpty { authorityToLanguageCode(authority) }
// For language variant wikis, automatically switch to the preferred variant if possible.
val parentLanguageCode = WikipediaApp.instance.languageState.getDefaultLanguageCode(languageCode)
// For the default language code like "zh", we need to check if it has variants.
var languageVariants = WikipediaApp.instance.languageState.getLanguageVariants(languageCode)
if (parentLanguageCode != null || languageVariants != null) {
// Get language variants from the parent language code
if (languageVariants == null) {
languageVariants = WikipediaApp.instance.languageState.getLanguageVariants(parentLanguageCode)
}
// Try to find the first selected variant that matches the URL's parent language code
// This prevents showing mixed language variants article when the URL contains parent language codes such as "zh" or "wiki"
languageCode = WikipediaApp.instance.languageState.appLanguageCodes.firstOrNull {
languageVariants?.contains(it) == true
} ?: languageCode
}
if (languageCode == Constants.WIKI_CODE_COMMONS) {
// Special case for Commons: if the WikiSite was constructed from "commons.wikimedia.org",
// then the languageCode will be "commons" which is incorrect, so set it to the default language.
languageCode = WikipediaApp.instance.appOrSystemLanguageCode
}
// Use default subdomain in authority to prevent error when requesting endpoints. e.g. zh-tw.wikipedia.org
if (authority.contains(BASE_DOMAIN) && subdomain().isNotEmpty()) {
authority = subdomain() + "." + BASE_DOMAIN
}
this.uri = Uri.Builder().scheme(tempUri.scheme).encodedAuthority(authority).build()
}
constructor(url: String) : this(
when {
url.startsWith("http") -> Uri.parse(url)
url.startsWith("//") -> Uri.parse("$DEFAULT_SCHEME:$url")
else -> Uri.parse("$DEFAULT_SCHEME://$url")
}
)
constructor(authority: String, languageCode: String) : this(authority) {
this.languageCode = languageCode
}
fun scheme(): String {
return uri.scheme.orEmpty().ifEmpty { DEFAULT_SCHEME }
}
fun authority(): String {
return uri.authority.orEmpty()
}
fun subdomain(): String {
return languageCodeToSubdomain(languageCode)
}
fun path(segment: String): String {
return "/w/$segment"
}
fun url(): String {
return uri.toString()
}
fun url(segment: String): String {
return url() + path(segment)
}
fun dbName(): String {
return (if (uri.authority.orEmpty().contains("wikidata")) {
"wikidata"
} else if (uri.authority.orEmpty().contains("commons")) {
"commons"
} else {
subdomain().replace("-".toRegex(), "_")
}) + "wiki"
}
companion object {
const val DEFAULT_SCHEME = "https"
const val BASE_DOMAIN = "wikipedia.org"
private var DEFAULT_BASE_URL: String? = null
fun supportedAuthority(authority: String): Boolean {
return authority.endsWith(DEFAULT_BASE_URL!!.toUri().authority!!)
}
fun setDefaultBaseUrl(url: String) {
DEFAULT_BASE_URL = url.ifEmpty { Service.WIKIPEDIA_URL }
}
fun forLanguageCode(languageCode: String): WikiSite {
val uri = ensureScheme(DEFAULT_BASE_URL!!.toUri())
return WikiSite(
(if (languageCode.isEmpty()) "" else languageCodeToSubdomain(languageCode) + ".") + uri.authority,
languageCode
)
}
fun normalizeLanguageCode(languageCode: String): String {
return when (languageCode) {
AppLanguageLookUpTable.NORWEGIAN_BOKMAL_LANGUAGE_CODE -> AppLanguageLookUpTable.NORWEGIAN_LEGACY_LANGUAGE_CODE // T114042
AppLanguageLookUpTable.BELARUSIAN_LEGACY_LANGUAGE_CODE -> AppLanguageLookUpTable.BELARUSIAN_TARASK_LANGUAGE_CODE // T111853
AppLanguageLookUpTable.CHINESE_LEGACY_YUE_LANGUAGE_CODE -> AppLanguageLookUpTable.CHINESE_YUE_LANGUAGE_CODE
else -> languageCode
}
}
fun preview(languageCode: String = "en"): WikiSite {
return WikiSite("https://$languageCode.wikipedia.org/".toUri(), languageCode)
}
private fun languageCodeToSubdomain(languageCode: String): String {
return WikipediaApp.instance.languageState.getDefaultLanguageCode(languageCode) ?: normalizeLanguageCode(languageCode)
}
fun authorityToLanguageCode(authority: String): String {
val parts = authority.split("\\.".toRegex()).toTypedArray()
val minLengthForSubdomain = 3
return if (parts.size < minLengthForSubdomain ||
parts.size == minLengthForSubdomain && parts[0] == "m"
) {
// ""
// wikipedia.org
// m.wikipedia.org
""
} else parts[0]
}
private fun ensureScheme(uri: Uri): Uri {
return if (uri.scheme.isNullOrEmpty()) {
uri.buildUpon().scheme(DEFAULT_SCHEME).build()
} else uri
}
}
}