aws_sigv4/http_request/
uri_path_normalization.rs

1/*
2 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 */
5
6use std::borrow::Cow;
7
8// Normalize `uri_path` according to
9// https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
10pub(super) fn normalize_uri_path(uri_path: &str) -> Cow<'_, str> {
11    // If the absolute path is empty, use a forward slash (/).
12    if uri_path.is_empty() {
13        return Cow::Borrowed("/");
14    }
15
16    // The canonical URI is the URI-encoded version of the _absolute_ path component of the URI.
17    let result = if uri_path.starts_with('/') {
18        Cow::Borrowed(uri_path)
19    } else {
20        Cow::Owned(format!("/{uri_path}"))
21    };
22
23    if !(result.contains('.') || result.contains("//")) {
24        return result;
25    }
26
27    Cow::Owned(normalize_path_segment(&result))
28}
29
30// Implement 5.2.4. Remove Dot Segments in https://www.rfc-editor.org/rfc/rfc3986
31//
32// The function assumes that `uri_path` is an absolute path,
33// starting with a forward slash.
34fn normalize_path_segment(uri_path: &str) -> String {
35    let number_of_slashes = uri_path.matches('/').count();
36    let mut normalized: Vec<&str> = Vec::with_capacity(number_of_slashes + 1);
37
38    for segment in uri_path.split('/') {
39        match segment {
40            // Segments that are empty or contain only a single period should not be preserved
41            "" | "." => {}
42            ".." => {
43                normalized.pop();
44            }
45            otherwise => normalized.push(otherwise),
46        }
47    }
48
49    let mut result = normalized.join("/");
50
51    // Even though `uri_path` starts with a `/`, that may not be the case for `result`.
52    // An example of this is `uri_path` being "/../foo" where the corresponding `result`
53    // will be "foo".
54    if !result.starts_with('/') {
55        result.insert(0, '/');
56    }
57
58    // If `uri_path` is "/foo/bar/.", normalizing it should be "/foo/bar/". However,
59    // the logic so far only makes `result` "/foo/bar", without the trailing slash.
60    // The condition below ensures that the trailing slash is appended to `result`
61    // if `uri_path` ends with a slash (per the RFC) but `result` does not.
62    if ends_with_slash(uri_path) && !result.ends_with('/') {
63        result.push('/');
64    }
65
66    result
67}
68
69fn ends_with_slash(uri_path: &str) -> bool {
70    // These are all translated to "/" per 2.B and 2.C in section 5.2.4 in RFC 3986.
71    ["/", "/.", "/./", "/..", "/../"]
72        .iter()
73        .any(|s| uri_path.ends_with(s))
74}
75
76#[cfg(test)]
77mod tests {
78    use super::*;
79
80    #[test]
81    fn normalize_uri_path_should_not_modify_input_containing_just_a_forward_slash() {
82        assert_eq!(normalize_uri_path("/"), Cow::<'_, str>::Borrowed("/"));
83    }
84
85    #[test]
86    fn normalize_uri_path_should_add_a_forward_slash_when_input_is_empty() {
87        assert_eq!(
88            normalize_uri_path(""),
89            Cow::<'_, str>::Owned("/".to_owned())
90        );
91    }
92
93    #[test]
94    fn normalize_uri_path_should_not_modify_single_non_dot_segment_starting_with_a_single_forward_slash(
95    ) {
96        assert_eq!(normalize_uri_path("/foo"), Cow::Borrowed("/foo"));
97    }
98
99    #[test]
100    fn normalize_uri_path_should_prepend_forward_slash_when_input_is_relative() {
101        assert_eq!(
102            normalize_uri_path("foo"),
103            Cow::<'_, str>::Owned("/foo".to_owned())
104        );
105    }
106
107    #[test]
108    fn normalize_uri_path_should_not_modify_multiple_non_dot_segments_starting_with_a_single_forward_slash(
109    ) {
110        assert_eq!(normalize_uri_path("/foo/bar"), Cow::Borrowed("/foo/bar"));
111    }
112
113    #[test]
114    fn normalize_uri_path_should_not_modify_multiple_non_dot_segments_with_a_trailing_forward_slash(
115    ) {
116        assert_eq!(normalize_uri_path("/foo/bar/"), Cow::Borrowed("/foo/bar/"));
117    }
118
119    // 2.A in https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4
120    #[test]
121    fn normalize_uri_path_should_remove_a_leading_dot_from_input() {
122        // The expected value is "/" rather than "" because if the absolute path is empty,
123        // we use a forward slash.
124        assert_eq!(
125            normalize_uri_path("./"),
126            Cow::<'_, str>::Owned("/".to_owned())
127        );
128
129        assert_eq!(
130            normalize_uri_path("./foo"),
131            Cow::<'_, str>::Owned("/foo".to_owned())
132        );
133    }
134
135    // 2.A in https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4
136    #[test]
137    fn normalize_uri_path_should_remove_leading_double_dots_from_input() {
138        // The expected value is "/" rather than "" because if the absolute path is empty,
139        // we use a forward slash.
140        assert_eq!(
141            normalize_uri_path("../"),
142            Cow::<'_, str>::Owned("/".to_owned())
143        );
144
145        assert_eq!(
146            normalize_uri_path("../foo"),
147            Cow::<'_, str>::Owned("/foo".to_owned())
148        );
149    }
150
151    // 2.B in https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4
152    #[test]
153    fn normalize_uri_path_should_remove_a_singel_dot_from_input() {
154        assert_eq!(
155            normalize_uri_path("/."),
156            Cow::<'_, str>::Owned("/".to_owned())
157        );
158        assert_eq!(
159            normalize_uri_path("/./"),
160            Cow::<'_, str>::Owned("/".to_owned())
161        );
162        assert_eq!(
163            normalize_uri_path("/./foo"),
164            Cow::<'_, str>::Owned("/foo".to_owned())
165        );
166        assert_eq!(
167            normalize_uri_path("/foo/bar/."),
168            Cow::<'_, str>::Owned("/foo/bar/".to_owned())
169        );
170        assert_eq!(
171            normalize_uri_path("/foo/bar/./"),
172            Cow::<'_, str>::Owned("/foo/bar/".to_owned())
173        );
174        assert_eq!(
175            normalize_uri_path("/foo/./bar/./"),
176            Cow::<'_, str>::Owned("/foo/bar/".to_owned())
177        );
178    }
179
180    // 2.C in https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4
181    #[test]
182    fn normalize_uri_path_should_remove_double_dots_from_input() {
183        assert_eq!(
184            normalize_uri_path("/.."),
185            Cow::<'_, str>::Owned("/".to_owned())
186        );
187        assert_eq!(
188            normalize_uri_path("/../"),
189            Cow::<'_, str>::Owned("/".to_owned())
190        );
191        assert_eq!(
192            normalize_uri_path("/../foo"),
193            Cow::<'_, str>::Owned("/foo".to_owned())
194        );
195        assert_eq!(
196            normalize_uri_path("/foo/bar/.."),
197            Cow::<'_, str>::Owned("/foo/".to_owned())
198        );
199        assert_eq!(
200            normalize_uri_path("/foo/bar/../"),
201            Cow::<'_, str>::Owned("/foo/".to_owned())
202        );
203        assert_eq!(
204            normalize_uri_path("/foo/../bar/../"),
205            Cow::<'_, str>::Owned("/".to_owned())
206        );
207    }
208
209    // 2.D in https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4
210    #[test]
211    fn normalize_uri_path_should_replace_a_dot_segment_with_a_forward_slash() {
212        assert_eq!(
213            normalize_uri_path("."),
214            Cow::<'_, str>::Owned("/".to_owned())
215        );
216        assert_eq!(
217            normalize_uri_path(".."),
218            Cow::<'_, str>::Owned("/".to_owned())
219        );
220    }
221
222    // Page 34 in https://www.rfc-editor.org/rfc/rfc3986
223    #[test]
224    fn normalize_uri_path_should_behave_as_expected_against_examples_in_rfc() {
225        assert_eq!(
226            normalize_uri_path("/a/b/c/./../../g"),
227            Cow::<'_, str>::Owned("/a/g".to_owned())
228        );
229        // The expected value will be absolutized.
230        assert_eq!(
231            normalize_uri_path("mid/content=5/../6"),
232            Cow::<'_, str>::Owned("/mid/6".to_owned())
233        );
234    }
235
236    // The CRT does this so I figured we should too. - Zelda
237    #[test]
238    fn normalize_uri_path_should_merge_multiple_subsequent_slashes_into_one() {
239        assert_eq!(
240            normalize_uri_path("//foo//"),
241            Cow::<'_, str>::Owned("/foo/".to_owned())
242        );
243    }
244
245    #[test]
246    fn normalize_uri_path_should_not_remove_dot_when_surrounded_by_percent_encoded_forward_slashes()
247    {
248        assert_eq!(
249            normalize_uri_path("/foo%2F.%2Fbar"),
250            Cow::<'_, str>::Borrowed("/foo%2F.%2Fbar")
251        );
252    }
253}