fractal/utils/string/mod.rs
1//! Helper traits and methods for strings.
2
3use std::fmt::Write;
4
5use gtk::glib::markup_escape_text;
6use linkify::{LinkFinder, LinkKind};
7use ruma::{MatrixUri, RoomAliasId, RoomId, UserId};
8use url::Url;
9
10#[cfg(test)]
11mod tests;
12
13use super::matrix::{AT_ROOM, MatrixIdUri, find_at_room};
14use crate::{
15 components::{AvatarImageSafetySetting, LabelWithWidgets, Pill},
16 prelude::*,
17 session::Room,
18};
19
20/// The prefix for an email URI.
21const EMAIL_URI_PREFIX: &str = "mailto:";
22/// The prefix for a HTTPS URL.
23const HTTPS_URI_PREFIX: &str = "https://";
24/// The scheme for a `matrix:` URI.
25const MATRIX_URI_SCHEME: &str = "matrix";
26
27/// Common extensions to strings.
28pub(crate) trait StrExt {
29 /// Escape markup for compatibility with Pango.
30 fn escape_markup(&self) -> String;
31
32 /// Collapse contiguous whitespaces in this string into a single space.
33 fn collapse_whitespaces(&self, trim_start: bool, trim_end: bool) -> String;
34}
35
36impl<T> StrExt for T
37where
38 T: AsRef<str>,
39{
40 fn escape_markup(&self) -> String {
41 markup_escape_text(self.as_ref()).into()
42 }
43
44 fn collapse_whitespaces(&self, trim_start: bool, trim_end: bool) -> String {
45 let mut str = self.as_ref();
46
47 if trim_start {
48 str = str.trim_start();
49 }
50 if trim_end {
51 str = str.trim_end();
52 }
53
54 let mut new_string = String::with_capacity(str.len());
55 let mut prev_is_space = false;
56
57 for char in str.chars() {
58 if char.is_whitespace() {
59 if prev_is_space {
60 // We have already added a space as the last character, ignore this whitespace.
61 continue;
62 }
63
64 prev_is_space = true;
65 new_string.push(' ');
66 } else {
67 prev_is_space = false;
68 new_string.push(char);
69 }
70 }
71
72 new_string
73 }
74}
75
76/// Common extensions to mutable strings.
77pub(crate) trait StrMutExt {
78 /// Truncate this string at the first newline.
79 ///
80 /// Appends an ellipsis if the string was truncated.
81 ///
82 /// Returns `true` if the string was truncated.
83 fn truncate_newline(&mut self) -> bool;
84
85 /// Truncate whitespaces at the end of the string.
86 fn truncate_end_whitespaces(&mut self);
87
88 /// Append an ellipsis, except if this string already ends with an ellipsis.
89 fn append_ellipsis(&mut self);
90
91 /// Strip the NUL byte.
92 ///
93 /// Since they are used by GTK as the end of a string, strings in properties
94 /// will be truncated at the first NUL byte.
95 fn strip_nul(&mut self);
96
97 /// Remove unnecessary or problematic characters from the string.
98 fn clean_string(&mut self) {
99 self.strip_nul();
100 self.truncate_end_whitespaces();
101 }
102}
103
104impl StrMutExt for String {
105 fn truncate_newline(&mut self) -> bool {
106 let newline = self.find('\n');
107
108 if let Some(newline) = newline {
109 self.truncate(newline);
110 self.append_ellipsis();
111 }
112
113 newline.is_some()
114 }
115
116 fn truncate_end_whitespaces(&mut self) {
117 if self.is_empty() {
118 return;
119 }
120
121 let new_len = self
122 .char_indices()
123 .rfind(|(_, c)| !c.is_whitespace())
124 .map(|(idx, c)| {
125 // We have the position of the last non-whitespace character, so the last
126 // whitespace character is the character after it.
127 idx + c.len_utf8()
128 })
129 // 0 means that there are only whitespaces in the string.
130 .unwrap_or_default();
131
132 self.truncate(new_len);
133 }
134
135 fn append_ellipsis(&mut self) {
136 if !self.ends_with('…') && !self.ends_with("..") {
137 self.push('…');
138 }
139 }
140
141 fn strip_nul(&mut self) {
142 self.retain(|c| c != '\0');
143 }
144}
145
146/// Extensions to `Option<String>`.
147pub(crate) trait OptionStringExt: Sized {
148 /// Remove unnecessary or problematic characters from the string.
149 ///
150 /// If the final string is empty, replaces it with `None`.
151 fn clean_string(&mut self);
152
153 /// Remove unnecessary or problematic characters from the string.
154 ///
155 /// If the final string is empty, replaces it with `None`.
156 fn into_clean_string(mut self) -> Self {
157 self.clean_string();
158 self
159 }
160}
161
162impl OptionStringExt for Option<String> {
163 fn clean_string(&mut self) {
164 self.take_if(|s| {
165 s.clean_string();
166 s.is_empty()
167 });
168 }
169}
170
171/// Common extensions for adding Pango markup to mutable strings.
172pub(crate) trait PangoStrMutExt {
173 /// Append the opening Pango markup link tag of the given URI parts.
174 ///
175 /// The URI is also used as a title, so users can preview the link on hover.
176 fn append_link_opening_tag(&mut self, uri: impl AsRef<str>);
177
178 /// Append the given emote's sender name and consumes it, if it is set.
179 fn maybe_append_emote_name(&mut self, name: &mut Option<&str>);
180
181 /// Append the given URI as a mention, if it is one.
182 ///
183 /// Returns the created [`Pill`], it the URI was added as a mention.
184 fn maybe_append_mention(&mut self, uri: impl TryInto<MatrixIdUri>, room: &Room)
185 -> Option<Pill>;
186
187 /// Append the given string and replace `@room` with a mention.
188 ///
189 /// Returns the created [`Pill`], it `@room` was found.
190 fn append_and_replace_at_room(&mut self, s: &str, room: &Room) -> Option<Pill>;
191}
192
193impl PangoStrMutExt for String {
194 fn append_link_opening_tag(&mut self, uri: impl AsRef<str>) {
195 let uri = uri.escape_markup();
196 // We need to escape the title twice because GTK doesn't take care of it.
197 let title = uri.escape_markup();
198
199 let _ = write!(self, r#"<a href="{uri}" title="{title}">"#);
200 }
201
202 fn maybe_append_emote_name(&mut self, name: &mut Option<&str>) {
203 if let Some(name) = name.take() {
204 let _ = write!(self, "<b>{}</b> ", name.escape_markup());
205 }
206 }
207
208 fn maybe_append_mention(
209 &mut self,
210 uri: impl TryInto<MatrixIdUri>,
211 room: &Room,
212 ) -> Option<Pill> {
213 let pill = uri.try_into().ok().and_then(|uri| uri.into_pill(room))?;
214
215 self.push_str(LabelWithWidgets::PLACEHOLDER);
216
217 Some(pill)
218 }
219
220 fn append_and_replace_at_room(&mut self, s: &str, room: &Room) -> Option<Pill> {
221 if let Some(pos) = find_at_room(s) {
222 self.push_str(&(&s[..pos]).escape_markup());
223 self.push_str(LabelWithWidgets::PLACEHOLDER);
224 self.push_str(&(&s[pos + AT_ROOM.len()..]).escape_markup());
225
226 // We do not need to watch safety settings for mentions, rooms will be watched
227 // automatically.
228 Some(room.at_room().to_pill(AvatarImageSafetySetting::None, None))
229 } else {
230 self.push_str(&s.escape_markup());
231 None
232 }
233 }
234}
235
236/// Linkify the given text.
237///
238/// The text will also be escaped with [`StrExt::escape_markup()`].
239pub(crate) fn linkify(text: &str) -> String {
240 let mut linkified = String::with_capacity(text.len());
241 Linkifier::new(&mut linkified).linkify(text);
242 linkified
243}
244
245/// A helper type to linkify text.
246pub(crate) struct Linkifier<'a> {
247 /// The string containing the result.
248 inner: &'a mut String,
249 /// The mentions detection setting and results.
250 mentions: MentionsMode<'a>,
251}
252
253impl<'a> Linkifier<'a> {
254 /// Construct a new linkifier that will add text in the given string.
255 pub(crate) fn new(inner: &'a mut String) -> Self {
256 Self {
257 inner,
258 mentions: MentionsMode::NoMentions,
259 }
260 }
261
262 /// Enable mentions detection in the given room and add pills to the given
263 /// list.
264 ///
265 /// If `detect_at_room` is `true`, it will also try to detect `@room`
266 /// mentions.
267 pub(crate) fn detect_mentions(
268 mut self,
269 room: &'a Room,
270 pills: &'a mut Vec<Pill>,
271 detect_at_room: bool,
272 ) -> Self {
273 self.mentions = MentionsMode::WithMentions {
274 pills,
275 room,
276 detect_at_room,
277 };
278 self
279 }
280
281 /// Search and replace links in the given text.
282 ///
283 /// Returns the list of mentions, if any where found.
284 pub(crate) fn linkify(mut self, text: &str) {
285 let mut finder = LinkFinder::new();
286 // Allow URLS without a scheme.
287 finder.url_must_have_scheme(false);
288
289 let mut prev_span = None;
290
291 for span in finder.spans(text) {
292 let span_text = span.as_str();
293
294 match span.kind() {
295 Some(LinkKind::Url) => {
296 let is_valid_url = self.append_detected_url(span_text, prev_span);
297
298 if is_valid_url {
299 prev_span = None;
300 } else {
301 prev_span = Some(span_text);
302 }
303 }
304 Some(LinkKind::Email) => {
305 self.inner
306 .append_link_opening_tag(format!("{EMAIL_URI_PREFIX}{span_text}"));
307 self.inner.push_str(&span_text.escape_markup());
308 self.inner.push_str("</a>");
309
310 // The span was a valid email so we will not need to check it for the next span.
311 prev_span = None;
312 }
313 _ => {
314 if let MentionsMode::WithMentions {
315 pills,
316 room,
317 detect_at_room: true,
318 } = &mut self.mentions
319 {
320 if let Some(pill) = self.inner.append_and_replace_at_room(span_text, room) {
321 pills.push(pill);
322 }
323
324 prev_span = Some(span_text);
325 continue;
326 }
327
328 self.append_string(span_text);
329 prev_span = Some(span_text);
330 }
331 }
332 }
333 }
334
335 /// Append the given string.
336 ///
337 /// Escapes the markup of the string.
338 fn append_string(&mut self, s: &str) {
339 self.inner.push_str(&s.escape_markup());
340 }
341
342 /// Append the given URI with the given link content.
343 fn append_uri(&mut self, uri: &str, content: &str) {
344 if let MentionsMode::WithMentions { pills, room, .. } = &mut self.mentions
345 && let Some(pill) = self.inner.maybe_append_mention(uri, room)
346 {
347 pills.push(pill);
348
349 return;
350 }
351
352 self.inner.append_link_opening_tag(uri);
353 self.append_string(content);
354 self.inner.push_str("</a>");
355 }
356
357 /// Append the given string detected as a URL.
358 ///
359 /// Appends false positives as normal strings, otherwise appends it as a
360 /// URI.
361 ///
362 /// Returns `true` if it was detected as a valid URL.
363 fn append_detected_url(&mut self, detected_url: &str, prev_span: Option<&str>) -> bool {
364 if Url::parse(detected_url).is_ok() {
365 // This is a full URL with a scheme, we can trust that it is valid.
366 self.append_uri(detected_url, detected_url);
367 return true;
368 }
369
370 // It does not have a scheme, try to split it to get only the domain.
371 let domain = if let Some((domain, _)) = detected_url.split_once('/') {
372 // This is a URL with a path component.
373 domain
374 } else if let Some((domain, _)) = detected_url.split_once('?') {
375 // This is a URL with a query component.
376 domain
377 } else if let Some((domain, _)) = detected_url.split_once('#') {
378 // This is a URL with a fragment.
379 domain
380 } else {
381 // It should only contain the full domain.
382 detected_url
383 };
384
385 // Check that the top-level domain is known.
386 if !domain.rsplit_once('.').is_some_and(|(_, d)| tld::exist(d)) {
387 // This is a false positive, treat it like a regular string.
388 self.append_string(detected_url);
389 return false;
390 }
391
392 // The LinkFinder detects the homeserver part of `matrix:` URIs and Matrix
393 // identifiers, e.g. it detects `example.org` in `matrix:r/somewhere:
394 // example.org` or in `#somewhere:matrix.org`. We can use that to detect the
395 // full URI or identifier with the previous span.
396
397 // First, detect if the previous character is `:`, this is common to URIs and
398 // identifiers.
399 if let Some(prev_span) = prev_span.filter(|s| s.ends_with(':')) {
400 // Most identifiers in Matrix do not have a list of allowed characters, so all
401 // characters are allowed… which makes it difficult to find where they start.
402 // We have to set arbitrary rules for the localpart to match most cases:
403 // - No whitespaces
404 // - No `:`, as it is the separator between localpart and server name, and after
405 // the scheme in URIs
406 // - As soon as we encounter a known sigil, we assume we have the full ID. We
407 // ignore event IDs because we need a room to be able to generate a link.
408 if let Some((pos, c)) = prev_span[..]
409 .char_indices()
410 .rev()
411 // Skip the `:` we detected earlier.
412 .skip(1)
413 .find(|(_, c)| c.is_whitespace() || matches!(c, ':' | '!' | '#' | '@'))
414 {
415 let maybe_id_start = &prev_span[pos..];
416
417 match c {
418 ':' if prev_span[..pos].ends_with(MATRIX_URI_SCHEME) => {
419 // This should be a matrix URI.
420 let maybe_full_uri =
421 format!("{MATRIX_URI_SCHEME}{maybe_id_start}{detected_url}");
422 if MatrixUri::parse(&maybe_full_uri).is_ok() {
423 // Remove the start of the URI from the string.
424 self.inner.truncate(
425 self.inner.len() - maybe_id_start.len() - MATRIX_URI_SCHEME.len(),
426 );
427 self.append_uri(&maybe_full_uri, &maybe_full_uri);
428
429 return true;
430 }
431 }
432 '!' => {
433 // This should be a room ID.
434 if let Ok(room_id) =
435 RoomId::parse(format!("{maybe_id_start}{detected_url}"))
436 {
437 // Remove the start of the ID from the string.
438 self.inner.truncate(self.inner.len() - maybe_id_start.len());
439 // Transform it into a link.
440 self.append_uri(&room_id.matrix_to_uri().to_string(), room_id.as_str());
441 return true;
442 }
443 }
444 '#' => {
445 // This should be a room alias.
446 if let Ok(room_alias) =
447 RoomAliasId::parse(format!("{maybe_id_start}{detected_url}"))
448 {
449 // Remove the start of the ID from the string.
450 self.inner.truncate(self.inner.len() - maybe_id_start.len());
451 // Transform it into a link.
452 self.append_uri(
453 &room_alias.matrix_to_uri().to_string(),
454 room_alias.as_str(),
455 );
456 return true;
457 }
458 }
459 '@' => {
460 // This should be a user ID.
461 if let Ok(user_id) =
462 UserId::parse(format!("{maybe_id_start}{detected_url}"))
463 {
464 // Remove the start of the ID from the string.
465 self.inner.truncate(self.inner.len() - maybe_id_start.len());
466 // Transform it into a link.
467 self.append_uri(&user_id.matrix_to_uri().to_string(), user_id.as_str());
468 return true;
469 }
470 }
471 _ => {
472 // We reached a whitespace without a sigil or URI
473 // scheme, this must be a regular URL.
474 }
475 }
476 }
477 }
478
479 self.append_uri(&format!("{HTTPS_URI_PREFIX}{detected_url}"), detected_url);
480 true
481 }
482}
483
484/// The mentions mode of the [`Linkifier`].
485#[derive(Debug, Default)]
486enum MentionsMode<'a> {
487 /// The builder will not detect mentions.
488 #[default]
489 NoMentions,
490 /// The builder will detect mentions.
491 WithMentions {
492 /// The pills for the detected mentions.
493 pills: &'a mut Vec<Pill>,
494 /// The room containing the mentions.
495 room: &'a Room,
496 /// Whether to detect `@room` mentions.
497 detect_at_room: bool,
498 },
499}