Skip to main content

fractal/utils/string/
mod.rs

1//! Helper traits and methods for strings.
2
3use std::fmt::Write;
4
5use gtk::glib::markup_escape_text;
6use linkify::{LinkFinder, LinkKind};
7use ruma::{MatrixUri, RoomAliasId, RoomId, UserId};
8use url::Url;
9
10#[cfg(test)]
11mod tests;
12
13use super::matrix::{AT_ROOM, MatrixIdUri, find_at_room};
14use crate::{
15    components::{AvatarImageSafetySetting, LabelWithWidgets, Pill},
16    prelude::*,
17    session::Room,
18};
19
20/// The prefix for an email URI.
21const EMAIL_URI_PREFIX: &str = "mailto:";
22/// The prefix for a HTTPS URL.
23const HTTPS_URI_PREFIX: &str = "https://";
24/// The scheme for a `matrix:` URI.
25const MATRIX_URI_SCHEME: &str = "matrix";
26
27/// Common extensions to strings.
28pub(crate) trait StrExt {
29    /// Escape markup for compatibility with Pango.
30    fn escape_markup(&self) -> String;
31
32    /// Collapse contiguous whitespaces in this string into a single space.
33    fn collapse_whitespaces(&self, trim_start: bool, trim_end: bool) -> String;
34}
35
36impl<T> StrExt for T
37where
38    T: AsRef<str>,
39{
40    fn escape_markup(&self) -> String {
41        markup_escape_text(self.as_ref()).into()
42    }
43
44    fn collapse_whitespaces(&self, trim_start: bool, trim_end: bool) -> String {
45        let mut str = self.as_ref();
46
47        if trim_start {
48            str = str.trim_start();
49        }
50        if trim_end {
51            str = str.trim_end();
52        }
53
54        let mut new_string = String::with_capacity(str.len());
55        let mut prev_is_space = false;
56
57        for char in str.chars() {
58            if char.is_whitespace() {
59                if prev_is_space {
60                    // We have already added a space as the last character, ignore this whitespace.
61                    continue;
62                }
63
64                prev_is_space = true;
65                new_string.push(' ');
66            } else {
67                prev_is_space = false;
68                new_string.push(char);
69            }
70        }
71
72        new_string
73    }
74}
75
76/// Common extensions to mutable strings.
77pub(crate) trait StrMutExt {
78    /// Truncate this string at the first newline.
79    ///
80    /// Appends an ellipsis if the string was truncated.
81    ///
82    /// Returns `true` if the string was truncated.
83    fn truncate_newline(&mut self) -> bool;
84
85    /// Truncate whitespaces at the end of the string.
86    fn truncate_end_whitespaces(&mut self);
87
88    /// Append an ellipsis, except if this string already ends with an ellipsis.
89    fn append_ellipsis(&mut self);
90
91    /// Strip the NUL byte.
92    ///
93    /// Since they are used by GTK as the end of a string, strings in properties
94    /// will be truncated at the first NUL byte.
95    fn strip_nul(&mut self);
96
97    /// Remove unnecessary or problematic characters from the string.
98    fn clean_string(&mut self) {
99        self.strip_nul();
100        self.truncate_end_whitespaces();
101    }
102}
103
104impl StrMutExt for String {
105    fn truncate_newline(&mut self) -> bool {
106        let newline = self.find('\n');
107
108        if let Some(newline) = newline {
109            self.truncate(newline);
110            self.append_ellipsis();
111        }
112
113        newline.is_some()
114    }
115
116    fn truncate_end_whitespaces(&mut self) {
117        if self.is_empty() {
118            return;
119        }
120
121        let new_len = self
122            .char_indices()
123            .rfind(|(_, c)| !c.is_whitespace())
124            .map(|(idx, c)| {
125                // We have the position of the last non-whitespace character, so the last
126                // whitespace character is the character after it.
127                idx + c.len_utf8()
128            })
129            // 0 means that there are only whitespaces in the string.
130            .unwrap_or_default();
131
132        self.truncate(new_len);
133    }
134
135    fn append_ellipsis(&mut self) {
136        if !self.ends_with('…') && !self.ends_with("..") {
137            self.push('…');
138        }
139    }
140
141    fn strip_nul(&mut self) {
142        self.retain(|c| c != '\0');
143    }
144}
145
146/// Extensions to `Option<String>`.
147pub(crate) trait OptionStringExt: Sized {
148    /// Remove unnecessary or problematic characters from the string.
149    ///
150    /// If the final string is empty, replaces it with `None`.
151    fn clean_string(&mut self);
152
153    /// Remove unnecessary or problematic characters from the string.
154    ///
155    /// If the final string is empty, replaces it with `None`.
156    fn into_clean_string(mut self) -> Self {
157        self.clean_string();
158        self
159    }
160}
161
162impl OptionStringExt for Option<String> {
163    fn clean_string(&mut self) {
164        self.take_if(|s| {
165            s.clean_string();
166            s.is_empty()
167        });
168    }
169}
170
171/// Common extensions for adding Pango markup to mutable strings.
172pub(crate) trait PangoStrMutExt {
173    /// Append the opening Pango markup link tag of the given URI parts.
174    ///
175    /// The URI is also used as a title, so users can preview the link on hover.
176    fn append_link_opening_tag(&mut self, uri: impl AsRef<str>);
177
178    /// Append the given emote's sender name and consumes it, if it is set.
179    fn maybe_append_emote_name(&mut self, name: &mut Option<&str>);
180
181    /// Append the given URI as a mention, if it is one.
182    ///
183    /// Returns the created [`Pill`], it the URI was added as a mention.
184    fn maybe_append_mention(&mut self, uri: impl TryInto<MatrixIdUri>, room: &Room)
185    -> Option<Pill>;
186
187    /// Append the given string and replace `@room` with a mention.
188    ///
189    /// Returns the created [`Pill`], it `@room` was found.
190    fn append_and_replace_at_room(&mut self, s: &str, room: &Room) -> Option<Pill>;
191}
192
193impl PangoStrMutExt for String {
194    fn append_link_opening_tag(&mut self, uri: impl AsRef<str>) {
195        let uri = uri.escape_markup();
196        // We need to escape the title twice because GTK doesn't take care of it.
197        let title = uri.escape_markup();
198
199        let _ = write!(self, r#"<a href="{uri}" title="{title}">"#);
200    }
201
202    fn maybe_append_emote_name(&mut self, name: &mut Option<&str>) {
203        if let Some(name) = name.take() {
204            let _ = write!(self, "<b>{}</b> ", name.escape_markup());
205        }
206    }
207
208    fn maybe_append_mention(
209        &mut self,
210        uri: impl TryInto<MatrixIdUri>,
211        room: &Room,
212    ) -> Option<Pill> {
213        let pill = uri.try_into().ok().and_then(|uri| uri.into_pill(room))?;
214
215        self.push_str(LabelWithWidgets::PLACEHOLDER);
216
217        Some(pill)
218    }
219
220    fn append_and_replace_at_room(&mut self, s: &str, room: &Room) -> Option<Pill> {
221        if let Some(pos) = find_at_room(s) {
222            self.push_str(&(&s[..pos]).escape_markup());
223            self.push_str(LabelWithWidgets::PLACEHOLDER);
224            self.push_str(&(&s[pos + AT_ROOM.len()..]).escape_markup());
225
226            // We do not need to watch safety settings for mentions, rooms will be watched
227            // automatically.
228            Some(room.at_room().to_pill(AvatarImageSafetySetting::None, None))
229        } else {
230            self.push_str(&s.escape_markup());
231            None
232        }
233    }
234}
235
236/// Linkify the given text.
237///
238/// The text will also be escaped with [`StrExt::escape_markup()`].
239pub(crate) fn linkify(text: &str) -> String {
240    let mut linkified = String::with_capacity(text.len());
241    Linkifier::new(&mut linkified).linkify(text);
242    linkified
243}
244
245/// A helper type to linkify text.
246pub(crate) struct Linkifier<'a> {
247    /// The string containing the result.
248    inner: &'a mut String,
249    /// The mentions detection setting and results.
250    mentions: MentionsMode<'a>,
251}
252
253impl<'a> Linkifier<'a> {
254    /// Construct a new linkifier that will add text in the given string.
255    pub(crate) fn new(inner: &'a mut String) -> Self {
256        Self {
257            inner,
258            mentions: MentionsMode::NoMentions,
259        }
260    }
261
262    /// Enable mentions detection in the given room and add pills to the given
263    /// list.
264    ///
265    /// If `detect_at_room` is `true`, it will also try to detect `@room`
266    /// mentions.
267    pub(crate) fn detect_mentions(
268        mut self,
269        room: &'a Room,
270        pills: &'a mut Vec<Pill>,
271        detect_at_room: bool,
272    ) -> Self {
273        self.mentions = MentionsMode::WithMentions {
274            pills,
275            room,
276            detect_at_room,
277        };
278        self
279    }
280
281    /// Search and replace links in the given text.
282    ///
283    /// Returns the list of mentions, if any where found.
284    pub(crate) fn linkify(mut self, text: &str) {
285        let mut finder = LinkFinder::new();
286        // Allow URLS without a scheme.
287        finder.url_must_have_scheme(false);
288
289        let mut prev_span = None;
290
291        for span in finder.spans(text) {
292            let span_text = span.as_str();
293
294            match span.kind() {
295                Some(LinkKind::Url) => {
296                    let is_valid_url = self.append_detected_url(span_text, prev_span);
297
298                    if is_valid_url {
299                        prev_span = None;
300                    } else {
301                        prev_span = Some(span_text);
302                    }
303                }
304                Some(LinkKind::Email) => {
305                    self.inner
306                        .append_link_opening_tag(format!("{EMAIL_URI_PREFIX}{span_text}"));
307                    self.inner.push_str(&span_text.escape_markup());
308                    self.inner.push_str("</a>");
309
310                    // The span was a valid email so we will not need to check it for the next span.
311                    prev_span = None;
312                }
313                _ => {
314                    if let MentionsMode::WithMentions {
315                        pills,
316                        room,
317                        detect_at_room: true,
318                    } = &mut self.mentions
319                    {
320                        if let Some(pill) = self.inner.append_and_replace_at_room(span_text, room) {
321                            pills.push(pill);
322                        }
323
324                        prev_span = Some(span_text);
325                        continue;
326                    }
327
328                    self.append_string(span_text);
329                    prev_span = Some(span_text);
330                }
331            }
332        }
333    }
334
335    /// Append the given string.
336    ///
337    /// Escapes the markup of the string.
338    fn append_string(&mut self, s: &str) {
339        self.inner.push_str(&s.escape_markup());
340    }
341
342    /// Append the given URI with the given link content.
343    fn append_uri(&mut self, uri: &str, content: &str) {
344        if let MentionsMode::WithMentions { pills, room, .. } = &mut self.mentions
345            && let Some(pill) = self.inner.maybe_append_mention(uri, room)
346        {
347            pills.push(pill);
348
349            return;
350        }
351
352        self.inner.append_link_opening_tag(uri);
353        self.append_string(content);
354        self.inner.push_str("</a>");
355    }
356
357    /// Append the given string detected as a URL.
358    ///
359    /// Appends false positives as normal strings, otherwise appends it as a
360    /// URI.
361    ///
362    /// Returns `true` if it was detected as a valid URL.
363    fn append_detected_url(&mut self, detected_url: &str, prev_span: Option<&str>) -> bool {
364        if Url::parse(detected_url).is_ok() {
365            // This is a full URL with a scheme, we can trust that it is valid.
366            self.append_uri(detected_url, detected_url);
367            return true;
368        }
369
370        // It does not have a scheme, try to split it to get only the domain.
371        let domain = if let Some((domain, _)) = detected_url.split_once('/') {
372            // This is a URL with a path component.
373            domain
374        } else if let Some((domain, _)) = detected_url.split_once('?') {
375            // This is a URL with a query component.
376            domain
377        } else if let Some((domain, _)) = detected_url.split_once('#') {
378            // This is a URL with a fragment.
379            domain
380        } else {
381            // It should only contain the full domain.
382            detected_url
383        };
384
385        // Check that the top-level domain is known.
386        if !domain.rsplit_once('.').is_some_and(|(_, d)| tld::exist(d)) {
387            // This is a false positive, treat it like a regular string.
388            self.append_string(detected_url);
389            return false;
390        }
391
392        // The LinkFinder detects the homeserver part of `matrix:` URIs and Matrix
393        // identifiers, e.g. it detects `example.org` in `matrix:r/somewhere:
394        // example.org` or in `#somewhere:matrix.org`. We can use that to detect the
395        // full URI or identifier with the previous span.
396
397        // First, detect if the previous character is `:`, this is common to URIs and
398        // identifiers.
399        if let Some(prev_span) = prev_span.filter(|s| s.ends_with(':')) {
400            // Most identifiers in Matrix do not have a list of allowed characters, so all
401            // characters are allowed… which makes it difficult to find where they start.
402            // We have to set arbitrary rules for the localpart to match most cases:
403            // - No whitespaces
404            // - No `:`, as it is the separator between localpart and server name, and after
405            //   the scheme in URIs
406            // - As soon as we encounter a known sigil, we assume we have the full ID. We
407            //   ignore event IDs because we need a room to be able to generate a link.
408            if let Some((pos, c)) = prev_span[..]
409                .char_indices()
410                .rev()
411                // Skip the `:` we detected earlier.
412                .skip(1)
413                .find(|(_, c)| c.is_whitespace() || matches!(c, ':' | '!' | '#' | '@'))
414            {
415                let maybe_id_start = &prev_span[pos..];
416
417                match c {
418                    ':' if prev_span[..pos].ends_with(MATRIX_URI_SCHEME) => {
419                        // This should be a matrix URI.
420                        let maybe_full_uri =
421                            format!("{MATRIX_URI_SCHEME}{maybe_id_start}{detected_url}");
422                        if MatrixUri::parse(&maybe_full_uri).is_ok() {
423                            // Remove the start of the URI from the string.
424                            self.inner.truncate(
425                                self.inner.len() - maybe_id_start.len() - MATRIX_URI_SCHEME.len(),
426                            );
427                            self.append_uri(&maybe_full_uri, &maybe_full_uri);
428
429                            return true;
430                        }
431                    }
432                    '!' => {
433                        // This should be a room ID.
434                        if let Ok(room_id) =
435                            RoomId::parse(format!("{maybe_id_start}{detected_url}"))
436                        {
437                            // Remove the start of the ID from the string.
438                            self.inner.truncate(self.inner.len() - maybe_id_start.len());
439                            // Transform it into a link.
440                            self.append_uri(&room_id.matrix_to_uri().to_string(), room_id.as_str());
441                            return true;
442                        }
443                    }
444                    '#' => {
445                        // This should be a room alias.
446                        if let Ok(room_alias) =
447                            RoomAliasId::parse(format!("{maybe_id_start}{detected_url}"))
448                        {
449                            // Remove the start of the ID from the string.
450                            self.inner.truncate(self.inner.len() - maybe_id_start.len());
451                            // Transform it into a link.
452                            self.append_uri(
453                                &room_alias.matrix_to_uri().to_string(),
454                                room_alias.as_str(),
455                            );
456                            return true;
457                        }
458                    }
459                    '@' => {
460                        // This should be a user ID.
461                        if let Ok(user_id) =
462                            UserId::parse(format!("{maybe_id_start}{detected_url}"))
463                        {
464                            // Remove the start of the ID from the string.
465                            self.inner.truncate(self.inner.len() - maybe_id_start.len());
466                            // Transform it into a link.
467                            self.append_uri(&user_id.matrix_to_uri().to_string(), user_id.as_str());
468                            return true;
469                        }
470                    }
471                    _ => {
472                        // We reached a whitespace without a sigil or URI
473                        // scheme, this must be a regular URL.
474                    }
475                }
476            }
477        }
478
479        self.append_uri(&format!("{HTTPS_URI_PREFIX}{detected_url}"), detected_url);
480        true
481    }
482}
483
484/// The mentions mode of the [`Linkifier`].
485#[derive(Debug, Default)]
486enum MentionsMode<'a> {
487    /// The builder will not detect mentions.
488    #[default]
489    NoMentions,
490    /// The builder will detect mentions.
491    WithMentions {
492        /// The pills for the detected mentions.
493        pills: &'a mut Vec<Pill>,
494        /// The room containing the mentions.
495        room: &'a Room,
496        /// Whether to detect `@room` mentions.
497        detect_at_room: bool,
498    },
499}