Merge pull request #3206 from element-hq/feature/bma/updateGrammar

Update grammar on Matrix Ids to be more spec compliant and render error instead of infinite loading in room member list screen
This commit is contained in:
Benoit Marty 2024-07-17 18:20:46 +02:00 committed by GitHub
commit 0ddc306f01
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 280 additions and 137 deletions

View file

@ -27,32 +27,40 @@ object MatrixPatterns {
// Note: TLD is not mandatory (localhost, IP address...)
private const val DOMAIN_REGEX = ":[A-Za-z0-9.-]+(:[0-9]{2,5})?"
private const val BASE_64_ALPHABET = "[0-9A-Za-z/\\+=]+"
private const val BASE_64_URL_SAFE_ALPHABET = "[0-9A-Za-z/\\-_]+"
// regex pattern to find matrix user ids in a string.
// See https://matrix.org/docs/spec/appendices#historical-user-ids
// Sadly, we need to relax the regex pattern a bit as there already exist some ids that don't match the spec.
private const val MATRIX_USER_IDENTIFIER_REGEX = "^@.*?$DOMAIN_REGEX$"
private val PATTERN_CONTAIN_MATRIX_USER_IDENTIFIER = MATRIX_USER_IDENTIFIER_REGEX.toRegex(RegexOption.IGNORE_CASE)
// Note: local part can be empty
private const val MATRIX_USER_IDENTIFIER_REGEX = "^@\\S*?$DOMAIN_REGEX$"
private val PATTERN_CONTAIN_MATRIX_USER_IDENTIFIER = MATRIX_USER_IDENTIFIER_REGEX.toRegex()
// regex pattern to find room ids in a string.
private const val MATRIX_ROOM_IDENTIFIER_REGEX = "![A-Z0-9.-]+$DOMAIN_REGEX"
private val PATTERN_CONTAIN_MATRIX_ROOM_IDENTIFIER = MATRIX_ROOM_IDENTIFIER_REGEX.toRegex(RegexOption.IGNORE_CASE)
// regex pattern to match room ids.
// Note: roomId can be arbitrary strings, including space and new line char
private const val MATRIX_ROOM_IDENTIFIER_REGEX = "^!.+$DOMAIN_REGEX$"
private val PATTERN_CONTAIN_MATRIX_ROOM_IDENTIFIER = MATRIX_ROOM_IDENTIFIER_REGEX.toRegex(RegexOption.DOT_MATCHES_ALL)
// regex pattern to find room aliases in a string.
private const val MATRIX_ROOM_ALIAS_REGEX = "#[A-Z0-9._%#@=+-]+$DOMAIN_REGEX"
// regex pattern to match room aliases.
private const val MATRIX_ROOM_ALIAS_REGEX = "^#\\S+$DOMAIN_REGEX$"
private val PATTERN_CONTAIN_MATRIX_ALIAS = MATRIX_ROOM_ALIAS_REGEX.toRegex(RegexOption.IGNORE_CASE)
// regex pattern to find message ids in a string.
// regex pattern to match event ids.
// Sadly, we need to relax the regex pattern a bit as there already exist some ids that don't match the spec.
// v1 and v2: arbitrary string + domain
private const val MATRIX_EVENT_IDENTIFIER_REGEX = "^\\$.+$DOMAIN_REGEX$"
private val PATTERN_CONTAIN_MATRIX_EVENT_IDENTIFIER = MATRIX_EVENT_IDENTIFIER_REGEX.toRegex(RegexOption.IGNORE_CASE)
private val PATTERN_CONTAIN_MATRIX_EVENT_IDENTIFIER = MATRIX_EVENT_IDENTIFIER_REGEX.toRegex()
// regex pattern to find message ids in a string.
private const val MATRIX_EVENT_IDENTIFIER_V3_REGEX = "\\$[A-Z0-9/+]+"
private val PATTERN_CONTAIN_MATRIX_EVENT_IDENTIFIER_V3 = MATRIX_EVENT_IDENTIFIER_V3_REGEX.toRegex(RegexOption.IGNORE_CASE)
// v3: base64
private const val MATRIX_EVENT_IDENTIFIER_V3_REGEX = "\\$$BASE_64_ALPHABET"
private val PATTERN_CONTAIN_MATRIX_EVENT_IDENTIFIER_V3 = MATRIX_EVENT_IDENTIFIER_V3_REGEX.toRegex()
// Ref: https://matrix.org/docs/spec/rooms/v4#event-ids
private const val MATRIX_EVENT_IDENTIFIER_V4_REGEX = "\\$[A-Z0-9\\-_]+"
private val PATTERN_CONTAIN_MATRIX_EVENT_IDENTIFIER_V4 = MATRIX_EVENT_IDENTIFIER_V4_REGEX.toRegex(RegexOption.IGNORE_CASE)
// v4: url-safe base64
private const val MATRIX_EVENT_IDENTIFIER_V4_REGEX = "\\$$BASE_64_URL_SAFE_ALPHABET"
private val PATTERN_CONTAIN_MATRIX_EVENT_IDENTIFIER_V4 = MATRIX_EVENT_IDENTIFIER_V4_REGEX.toRegex()
private const val MAX_IDENTIFIER_LENGTH = 255
/**
* Tells if a string is a valid user Id.
@ -61,7 +69,9 @@ object MatrixPatterns {
* @return true if the string is a valid user id
*/
fun isUserId(str: String?): Boolean {
return str != null && str matches PATTERN_CONTAIN_MATRIX_USER_IDENTIFIER
return str != null &&
str.length <= MAX_IDENTIFIER_LENGTH &&
str matches PATTERN_CONTAIN_MATRIX_USER_IDENTIFIER
}
/**
@ -79,7 +89,9 @@ object MatrixPatterns {
* @return true if the string is a valid room Id
*/
fun isRoomId(str: String?): Boolean {
return str != null && str matches PATTERN_CONTAIN_MATRIX_ROOM_IDENTIFIER
return str != null &&
str.length <= MAX_IDENTIFIER_LENGTH &&
str matches PATTERN_CONTAIN_MATRIX_ROOM_IDENTIFIER
}
/**
@ -89,7 +101,9 @@ object MatrixPatterns {
* @return true if the string is a valid room alias.
*/
fun isRoomAlias(str: String?): Boolean {
return str != null && str matches PATTERN_CONTAIN_MATRIX_ALIAS
return str != null &&
str.length <= MAX_IDENTIFIER_LENGTH &&
str matches PATTERN_CONTAIN_MATRIX_ALIAS
}
/**
@ -100,9 +114,10 @@ object MatrixPatterns {
*/
fun isEventId(str: String?): Boolean {
return str != null &&
(str matches PATTERN_CONTAIN_MATRIX_EVENT_IDENTIFIER ||
str.length <= MAX_IDENTIFIER_LENGTH &&
(str matches PATTERN_CONTAIN_MATRIX_EVENT_IDENTIFIER_V4 ||
str matches PATTERN_CONTAIN_MATRIX_EVENT_IDENTIFIER_V3 ||
str matches PATTERN_CONTAIN_MATRIX_EVENT_IDENTIFIER_V4)
str matches PATTERN_CONTAIN_MATRIX_EVENT_IDENTIFIER)
}
/**
@ -118,8 +133,8 @@ object MatrixPatterns {
* Note not all cases are implemented.
*/
fun findPatterns(text: CharSequence, permalinkParser: PermalinkParser): List<MatrixPatternResult> {
val rawTextMatches = "\\S+?$DOMAIN_REGEX".toRegex(RegexOption.IGNORE_CASE).findAll(text)
val urlMatches = "\\[\\S+?\\]\\((\\S+?)\\)".toRegex(RegexOption.IGNORE_CASE).findAll(text)
val rawTextMatches = "\\S+$DOMAIN_REGEX".toRegex(RegexOption.IGNORE_CASE).findAll(text)
val urlMatches = "\\[\\S+\\]\\((\\S+)\\)".toRegex(RegexOption.IGNORE_CASE).findAll(text)
val atRoomMatches = Regex("@room").findAll(text)
return buildList {
for (match in rawTextMatches) {

View file

@ -23,6 +23,8 @@ import io.element.android.libraries.matrix.api.permalink.PermalinkParser
import org.junit.Test
class MatrixPatternsTest {
private val longLocalPart = "a".repeat(255 - ":server.com".length - 1)
@Test
fun `findPatterns - returns raw user ids`() {
val text = "A @user:server.com and @user2:server.com"
@ -54,7 +56,7 @@ class MatrixPatternsTest {
}
@Test
fun `findPatterns - returns raw room event ids`() {
fun `findPatterns - returns raw event ids`() {
val text = "A \$event:server.com and \$event2:server.com"
val patterns = MatrixPatterns.findPatterns(text, aPermalinkParser())
assertThat(patterns).containsExactly(
@ -90,6 +92,76 @@ class MatrixPatternsTest {
assertThat(patterns).containsExactly(MatrixPatternResult(MatrixPatternType.ROOM_ALIAS, "#room:server.com", 2, 46))
}
@Test
fun `test isRoomId`() {
assertThat(MatrixPatterns.isRoomId(null)).isFalse()
assertThat(MatrixPatterns.isRoomId("")).isFalse()
assertThat(MatrixPatterns.isRoomId("not a room id")).isFalse()
assertThat(MatrixPatterns.isRoomId(" !room:server.com")).isFalse()
assertThat(MatrixPatterns.isRoomId("!room:server.com ")).isFalse()
assertThat(MatrixPatterns.isRoomId("@room:server.com")).isFalse()
assertThat(MatrixPatterns.isRoomId("#room:server.com")).isFalse()
assertThat(MatrixPatterns.isRoomId("\$room:server.com")).isFalse()
assertThat(MatrixPatterns.isRoomId("!${longLocalPart}a:server.com")).isFalse()
assertThat(MatrixPatterns.isRoomId("!room:server.com")).isTrue()
assertThat(MatrixPatterns.isRoomId("!$longLocalPart:server.com")).isTrue()
assertThat(MatrixPatterns.isRoomId("!#test/room\nversion <u>11</u>, with @🐈️:maunium.net")).isTrue()
}
@Test
fun `test isRoomAlias`() {
assertThat(MatrixPatterns.isRoomAlias(null)).isFalse()
assertThat(MatrixPatterns.isRoomAlias("")).isFalse()
assertThat(MatrixPatterns.isRoomAlias("not a room alias")).isFalse()
assertThat(MatrixPatterns.isRoomAlias(" #room:server.com")).isFalse()
assertThat(MatrixPatterns.isRoomAlias("#room:server.com ")).isFalse()
assertThat(MatrixPatterns.isRoomAlias("@room:server.com")).isFalse()
assertThat(MatrixPatterns.isRoomAlias("!room:server.com")).isFalse()
assertThat(MatrixPatterns.isRoomAlias("\$room:server.com")).isFalse()
assertThat(MatrixPatterns.isRoomAlias("#${longLocalPart}a:server.com")).isFalse()
assertThat(MatrixPatterns.isRoomAlias("#room:server.com")).isTrue()
assertThat(MatrixPatterns.isRoomAlias("#nico's-stickers:neko.dev")).isTrue()
assertThat(MatrixPatterns.isRoomAlias("#$longLocalPart:server.com")).isTrue()
}
@Test
fun `test isEventId`() {
assertThat(MatrixPatterns.isEventId(null)).isFalse()
assertThat(MatrixPatterns.isEventId("")).isFalse()
assertThat(MatrixPatterns.isEventId("not an event id")).isFalse()
assertThat(MatrixPatterns.isEventId(" \$event:server.com")).isFalse()
assertThat(MatrixPatterns.isEventId("\$event:server.com ")).isFalse()
assertThat(MatrixPatterns.isEventId("@event:server.com")).isFalse()
assertThat(MatrixPatterns.isEventId("!event:server.com")).isFalse()
assertThat(MatrixPatterns.isEventId("#event:server.com")).isFalse()
assertThat(MatrixPatterns.isEventId("$${longLocalPart}a:server.com")).isFalse()
assertThat(MatrixPatterns.isEventId("\$" + "a".repeat(255))).isFalse()
assertThat(MatrixPatterns.isEventId("\$event:server.com")).isTrue()
assertThat(MatrixPatterns.isEventId("$$longLocalPart:server.com")).isTrue()
assertThat(MatrixPatterns.isEventId("\$9BozuV4TBw6rfRW3rMEgZ5v-jNk1D6FA8Hd1OsWqT9k")).isTrue()
assertThat(MatrixPatterns.isEventId("\$" + "a".repeat(254))).isTrue()
}
@Test
fun `test isUserId`() {
assertThat(MatrixPatterns.isUserId(null)).isFalse()
assertThat(MatrixPatterns.isUserId("")).isFalse()
assertThat(MatrixPatterns.isUserId("not a user id")).isFalse()
assertThat(MatrixPatterns.isUserId(" @user:server.com")).isFalse()
assertThat(MatrixPatterns.isUserId("@user:server.com ")).isFalse()
assertThat(MatrixPatterns.isUserId("!user:server.com")).isFalse()
assertThat(MatrixPatterns.isUserId("#user:server.com")).isFalse()
assertThat(MatrixPatterns.isUserId("\$user:server.com")).isFalse()
assertThat(MatrixPatterns.isUserId("@${longLocalPart}a:server.com")).isFalse()
assertThat(MatrixPatterns.isUserId("@user:server.com")).isTrue()
assertThat(MatrixPatterns.isUserId("@:server.com")).isTrue()
assertThat(MatrixPatterns.isUserId("@$longLocalPart:server.com")).isTrue()
}
private fun aPermalinkParser(block: (String) -> PermalinkData = { PermalinkData.FallbackLink(Uri.EMPTY) }) = object : PermalinkParser {
override fun parse(uriString: String): PermalinkData {
return block(uriString)